#!/bin/bash
#
#   Copyright information
#
#       Copyright (C) 2004-2008 Thomas Sattler <tsattler@users.sourceforge.net>
#       Copyright (C) 2008-2011 Jari Aalto <jari.aalto@cante.net>
#
#   License
#
#       This program is free software; you can redistribute it and/or
#       modify it under the terms of the GNU General Public License as
#       published by the Free Software Foundation; either version 2 of
#       the License, or (at your option) any later version.
#
#       This program is distributed in the hope that it will be useful, but
#       WITHOUT ANY WARRANTY; without even the implied warranty of
#       MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
#       General Public License for more details.
#
#       You should have received a copy of the GNU General Public License
#       along with this program. If not, see <http://www.gnu.org/licenses>.
#
#   Description
#
#       Call program with -h. See http://gt5.sf.net/browsers.html
#
#   Notes
#
#	The bash features used are as follows:
#
#	    RANDOM
#	    local VAR

version=1.4.0.3
GT5_DEBUG_DIR=${GT5_DEBUG_DIR:-$HOME}

# HTML-colors: ---------------------------------------------------------------

BG=black       #background
DIR=blue       #dir-links
FILE=cyan      #file-links
HR=olive       #seperators
LAST=green     #last check was on ...
LAST2=magenta  # ... date_string ...
LAST3=magenta  # ... time since
LESS=lime      #smaller files
MORE=red       #bigger files
NEW=yellow     #new files
PC=darkred     #percent
SIZE=purple    #sizes
SUM=white      #dir-size
TEXT=grey      #plaintext

REQUIRED_PROGRAMS="\
 cat \
 cp \
 date \
 du \
 grep \
 gzip \
 head \
 hostname \
 ls \
 mkdir \
 mv \
 rm \
 sed \
 sort \
 tac \
 tee \
 touch \
"

SUPPORTED_BROWSERS="\
 links \
 links2 \
 elinks \
 lynx \
 w3m \
"

AWK=awk

# Online help: ----------------------------------------------------------------

Help () {

  echo "\
SYNOPSIS
    $0 [ dir | file | dir file | file file2 ] [options]

DESCRIPTION
    Display disk usage statistics using text based web browser.

ARGUMENTS
    dir             The directory you want to check for space used
    file/file2      Existing du-logs (du -akx \$DIR #gz/bz2/plain)

OPTIONS
    --cut-at float  Don't show files below 'float'% of its parent,
                    Default is $CUT_AT, use values between 0.01 and 30
    --debug	    Turn on debug mode. Do not run browser.
    --diff-dir DIR  Use DIR to store diffs between runs
                    [$(Replace $DDIR $HOME/ ~/)]
    --discard       Do not save this one for diffs, discard it
    --exclude s     Hand s over to du to exclude its size
    --link-files    Insert links to files for direct access
    --max-depth i   Ignore anything deeper than i directories [$MAX_DEPTH]
    --max-lines i   Only consider the i biggest files [$ML]
    --megabytes     Expect du-logs to be run as 'du -amx'
    --no-diffs      Use this if you really do not want diffs
    --save-state    Overwrite a previous '--discard'
    --verbose	    Display verbose messages.
    --with-mounts   Also look below mountpoints"

    exit 0
}

# Support functions: ------------------------------------------------------------

Warn () {
    echo "$*" >&2
}

Die () {
    Warn "$*" >&2
    exit 1
}

Debug_pipe () {
    if [ "$DEBUG" ]; then
	tee $GT5_DEBUG_DIR/gt5.debug$1.log
    else
	cat
    fi
}

Verbose () {
    if [ "$VERBOSE" ]; then
	Warn "$*"
    fi
}

Exist_or_die () {
    [ -s "$1" ] && continue				# Non zero?
    [ -d "$1" ] && continue

    shift
    Die "$@"
}

Which () {(
    # Run in a subshell to save "--" positional arguments.

    bin="$1"

    case "$bin" in
	/* | ./*)
	    if [ -x "$bin" ]; then
		echo "$bin"
		return 0
	    fi
	    ;;
    esac

    IFS=:
    set -- $PATH

    for path in "$@"
    do
	path="$path/$bin"
	if [ -x "$path" ]; then
	    echo "$path"
	    return 0
	fi
    done

    return 1
)}

WhichList () {
    local i

    for i in "$@"
    do
	if Which "$i" ; then
	    return 0
	fi
    done

    return 1
}

Requirebin () {
    local i

    for i in "$@"
    do
	if ! Which "$i" > /dev/null; then
	    Die "ERROR: Not any found in PATH: $i"
	fi
    done
}

Grep () {
    # Return only $? status code: matched or not
    echo "$*" | egrep -e "$1" > /dev/null 2>&1
}

Initialize () {

    if [ -e /vmlinuz ]; then
        # The program tests are not needed under Linux. Faster startup
	:
    else
	Requirebin $REQUIRED_PROGRAMS
    fi

    Grep '/$' "$_tmpdir" || _tmpdir="$_tmpdir/"  # verify trailing slash

    local _tmpdir
    _tmpdir=${TMPDIR:-/tmp}

    local _tmpfile
    _tmpfile=$_tmpdir/$$.tmp

    WhichList $GT5_BROWSER $SUPPORTED_BROWSERS > $_tmpfile
    BROWSER=$(cat $_tmpfile)

    if [ ! "$BROWSER" ]; then
	Die "ERROR: Can't set browser from \$GT5_BROWSER or list: $SUPPORTED_BROWSERS"
    fi

    WhichList $GT5_AWK gawk awk > $_tmpfile
    AWK=$(cat $_tmpfile)

    if [ ! "$AWK" ]; then
	Die "ERROR: Can't set awk from \$GT5_AWK or list gawk awk"
    fi

    # Does sort work? (broken in busybox 1.4.[01])
    # No 'echo -e' in all /bin/sh versions like dash

    local test_in
    test_in="$(echo 1 b; echo 2 a)"

    local test_out
    test_out="$(echo "$test_in" | sort -k1 -k2)"

    if [ "$test_in" != "$test_out" ] ; then
	Die "ERROR: sort options -k1 -k2 do not work correctly."
    fi

    unset test_in test_out

    rm -f $_tmpfile
    unset _tmpdir _tmpfile
}

Initialize_tempdir () {
    # Set global variable TMP for temporary directory

    if [ ! "$TMPDIR" ]; then
	TMPDIR=/tmp
    fi

    if [ ! -w "$TMPDIR" ]; then
	Dir "ERROR: TMPDIR not setcan't write to \$TMPDIR $TMPDIR"
    fi

    local dir

    dir="${TMPDIR:-/tmp}/$(Substr_after_all "$0" /).$(date '+%Y%m%d-%H%M%S')"
    dir="$(Replace_all "$dir" '[^a-zA-Z0-9/.-]' _)"

    # there may be systems without mktemp

    unset TMP

    if Which mktemp > /dev/null ; then
	TMP=$(mktemp -d -q "$dir.XXXXXXXX")
    else
	dir="$dir.$$.$RANDOM"
	mkdir "$dir" || exit $?
	TMP=$dir
    fi

    if [ ! "$TMP" ]; then
	Die "ERROR: error creating a temporary directory"
    fi

    unset dir
}

At_exit () {
    #  From Initialize()

    rm -f ${TMPDIR:-/tmp}/$$*

    # From Initialize_tempdir()

    if [ "$TMP" ]; then
	if [ "$DEBUG" ]; then
	    echo "DEBUG: not removed $TMP"
	else
	    rm -rf "$TMP"
	fi
    fi

    # From end of file, generated files

    if [ "$ESCAPED_GS" ]; then
	rm -rf "$ESCAPED_GS"
    fi
}

# Functions: -------------------------------------------------------------

Replace() {
    # replace: ${s/a/b}, args: string from to
    echo "$1" | "$AWK" '{ sub(a,b); print }' a="$2" b="$3"
}

Replace_all() {
    # replace: ${s//a/b}, args: string from to
    echo "$1" | "$AWK" '{ gsub(a,b); print}' a="$2" b="$3"
}

Substr() {
    # replace: ${s:0:1}, args: string start [end]
    echo "$1" | "$AWK" -F"\n" "{ print( substr(\$1,$2${3:+,$3})) }"
}

Substr_after_all() {
    # replace: ${s##*/}, args: string return_s_after
    echo "$1" | "$AWK" '{ sub(s,""); print} ' s=".*$2"
}

Substr_before() {
    # replace: ${s%/*}, args: string return_s_before

    echo "$1" | "$AWK" '
    {
	for ( i = length($0); i; i-- )
        {
	    if ( index(substr($0, i), s) )
            {
	        print( substr($0, 1, i -1) )
                exit
            }
        }
        print
    }' s="$2"
}

# Parse Options: --------------------------------------------------------------

Options () {
    while [ "$1" ]
    do

	case "$1" in

	    --cut-at)
		case "$2" in
		  [0-9]|[0-9].[0-9]|[0-9].[0-9][1-9])
		      ;;
		  [1-3][0-9]|[1-3][0-9].[0-9]|[1-3][0-9].[0-9][1-9])
		      ;;
		  *)  echo "bad cut-at-arg \"$2\", use values between 0.01 and 30"
		      exit 1
		      ;;
		esac

		CUT_AT=$2
		shift
		;;

	    --debug)
		DEBUG=debug
		SAVE_DU=save_du
		;;

	    --diff-dir)
		DDIR="$2"
		shift
		;;

	    --discard)
		unset SAVE_DU
		;;

	     --exclude)
		 [ "$2" ] || Help
		 EXCLUDE="$EXCLUDE --exclude $2"
		 shift
		 ;;

	    --link-files)
		LINK_FILES=true
		;;

	    --max-depth)
		case "$2" in
		    [0-9]|[1-9][0-9])
			;;
		    *)  echo "bad max-depth-arg \"$2\", valid is 0-99"
			exit 1
			;;
		esac

		MAX_DEPTH=$2
		shift
		;;

	    --max-lines)
		case "$2" in
		    [1-9][0-9][0-9][0-9][0-9]|[1-9][0-9][0-9][0-9][0-9][0-9])
			;;
		    *)  echo "bad max-lines-arg \"$2\", valid is 10000-999999"
			exit 1
			;;
		esac

		ML=$2
		shift
		;;

	    --megabytes)
		UNIT=M
		;;

	    --no-diffs)
		DIFFS=0
		unset SAVE_DU
		;;

	    --save-du-as)
		SAVE_DU_AS="$2"
		shift
		;;

	    --save-state)
		SAVE_DU=1
		;;

	    --verbose)
		VERBOSE=verbose
		;;

	    --version)
		echo $version
		exit 0
		;;

	    --with-mounts)
		unset X
		;;

	    *) # Directories and files to show
		unset VALID

		if [ -d "$1" ] ; then
		    [ "$CD$F2" ] && Help
		    cd "$1" || exit $?
		    cd "$OLDPWD"
		    CD="$OLDPWD"
		    VALID=1
		fi

		if [ -f "$1" ] ; then
		    H="$(Substr "$1" 1 1)"

		    [ "$F2" ] && Help
		    [ "$F1" -a "$CD" ] && Help

		    if [ ! -r "$1" ] ; then
			Die "ERROR: cannot read \"$1\""
		    fi

		    [ "$H" != "~" -a "$H" != "/" ] && H="$PWD/" || unset H

		    [ "$F1" ] && F2="$H$1" || F1="$H$1"
		    VALID=1
		fi

		[ "$VALID" ] || Help
		;;

      esac
      shift

    done
}

# Main: --------------------------------------------------------------

umask 077
unset BIN CD CONFIGURE_OPTS CUT DATE F1 F2 LINK_FILES SAVE_DU_AS SINCE
unset VERBOSE DEBUG

trap At_exit 0 1 3 15

Initialize
Initialize_tempdir
Options "$@"

SPACE="_escaped_${RANDOM}_space_"
TAB="_escaped_${RANDOM}_tab_"
DATA=$TMP/gt5.html
DDIR=$HOME/.gt5-diffs
CUT_AT="0.1"
MAX_DEPTH=5
SAVE_DU=1
ML=10000
DIFFS=1
UNIT=K
X=x

# Treat options: ------------------------------------------------------

[ "$CD" ] && cd "$OLDPWD"

if [ "$SAVE_DU_AS" ] ; then
    du -ak$X 2> /dev/null |
        grep -v ^0 |
	gzip > "$SAVE_DU_AS-$(date +%y%m%d-%H%M).gz"
    exit
fi

GENERIC_STORE=$(Replace_all "$PWD" "_" "__").gz
GENERIC_STORE="$DDIR/$(Replace_all "$GENERIC_STORE" "/" "_,")"

[ -d "$DDIR" ] || mkdir "$DDIR" || exit $?

[ "$F1" ] && unset SAVE_DU || {

    [ "$CD" ] || CD=$PWD

    unset H

    F1="$(Replace_all "$CD" "_" "__")"
    F1="$DDIR/$(Replace_all "$F1" "/" "_,")"

    while [ ! -f "$F1.gz" ] && [ "$F1" != "$DDIR/" ]
    do
      H="/$(Substr_after_all "$F1" "_,")$H"
      F1=$(Substr_before "$F1" "_,")
    done

    #FIX: no '==' in dash
    [ "$F1" = "$DDIR/" ] && F1="$DDIR/_,"

    [ -f "$F1.gz" ] && { CUT="$H"; F1="$F1.gz"; } || unset F1

}

if [ "$F1" ]; then
    DATE=$(date -r "$F1")
    SINCE=$(date -r "$F1" +%s)
fi

if [ "$CD" ] ; then
    P="${HOSTNAME:-$(hostname)}:$PWD"

    [ "$P" ] || P="file: $F1"
fi

if [ -z "$CD$F2" ] || [ -z "$F1" ]; then
    DIFFS=0
fi

[ "$CD" ] || unset LINK_FILES SAVE_DU

# Can du handle depths?

if du --help 2>&1 | grep max-depth > /dev/null 2>&1
then
    DEPTH="--max-depth=$((MAX_DEPTH + 1))"
fi

{
    ID=0

    if [ "$CD" ]; then
	Verbose "processing \"$(Replace "$PWD" "$HOME/" "~/")\", please be patient ... "

	du -ak$X $DEPTH $EXCLUDE 2> /dev/null |
	grep -v ^0 |
	tee "$TMP/this" |
	sed "s,^,$ID ,"

	ID=$((ID + 1))

	Verbose "done"
    fi

    for f in "$F1" "$F2"
    do

	[ -f "$f" ] || continue

	case "$f" in
	    *gz)
		CAT="gzip -cd" ;;
            *bz2)
		CAT="bzip2 -cd" ;;
            *)
		CAT=cat ;;
	esac

	if [ "$f" ] && [ $ID -le $DIFFS ] ; then
	    Verbose "processing \"$(Replace "$f" "$HOME/" "~/")\" ... "
	    $CAT "$f" |
		sort -k1nr -k2 |
		"$AWK" '
		    NR == 1 {
			sub(/\/$/,"")
			P = "	"$2"'"$CUT"'"
		    }
		    $1 && /'"$(Replace_all "$CUT" "/" "\\\/")"'/ {
			sub(/^/,"'$ID' ")
			sub(P,"	.")
			sub(/\/$/,"")
			print
		 }'


	    ID=$((ID+1))
	    Verbose "done"
	fi
    done

    Verbose "Generating HTML ... "

} |
Debug_pipe 1a |
sed 's, ,'"$SPACE"',g; s,'"$SPACE"', ,; s,	,'"$TAB"',g; s,'"$TAB"', ,' |
Debug_pipe 1b |

# gt5's magic starts here:

sort -k3 -k1,1nr  |
Debug_pipe 1c  |
"$AWK" '
{
    if ( ! $1 )
    {
	printf("%s %s %s\n", $2, (on == $3 ? os : -1), $3)
    }
    else
    {
	os = $2
	on = $3
    }
    on = $3

}' |
   Debug_pipe 2a |
   sort -k1,1nr -k3 |
   Debug_pipe 2b |
   "$AWK" -v ML="$ML" '
    NR > ML {
	exit
    }

    {
	if ( NR == 1 )
	{
	    printf("%s %s\n", $0, $0)
	}

	size[me]  = $1
	osize[me] = $2
	me        = $3
	e         = 0

	while ( i = index( substr(me, e + 1), "/") )
	    e += i;

	if (e)
	{
	    p = substr(me, 1, e - 1)
	    printf("%s %s %s %s\n", size[p], osize[p], p, $0)
	}
    }
' |
Debug_pipe 3a |
sort -k1,1n -k3,3r -k4,4n |
Debug_pipe 3b |
"$AWK" -v unit=$UNIT -v DATE=$(date +%s) -v SINCE=${SINCE:-0} '

    BEGIN {
	H = DATE - SINCE
	print "</pre></body></html>"

	s  = H%60
	H /= 60
	m  = H%60
	H /= 60
	h  = H%24
	d  = H/24

	timespan = sprintf("%id, %02i:%02i:%02i", d, h, m, s)
    }

    function i2h(i)
    {
	E = unit
	i += 0 # Force to number

	if ( unit == "K"  &&  i > 999)
	{
	    i /= 1024
	    E = "M"
	}

	if ( i > 999 )
	{
	    i /= 1024
	    E = "G"
	}

	if ( i > 999 )
	{
	    i /= 1024
	    E = "T"
	}

	if ( i < 9.95 )
	    return sprintf("%.1f%cB", i, E)
	else
	    return sprintf("%.0f%cB", i, E)
    }

    function directory (p, ps, pos, pf)
    {
	if ( !p )
	    return; # No parent on first call

	printf "\n%s/:   [<font color='$SUM'>%s",p,i2h(ps)
	printf "</font> in "children[p]" files or directories]  "

	if ( '$DIFFS' )
	{
	    if ( pos == -1 )
	    {
		printf "<font color='$NEW'>new</font>";
	    }
	    else
	    {
		if ( ps < pos )
		    printf "<font color='$LESS'>-%s</font>", i2h(pos - ps)

		if ( ps > pos )
		    printf "<font color='$MORE'>+%s</font>", i2h(ps - pos)
	    }
	}

	print "\n<font color='$HR'><hr></font></a>"

	if ( '$DIFFS'  &&  "'"$F2"'"=="" )
	{
	    printf "<font color='$LAST'> last check was on <font"
	    printf " color='$LAST2'>'"$DATE"'</font> (i.e. <font color="
	    printf "'$LAST3'>"timespan"</font> ago)</font>\n"
	}

	printf "<a name=\""(nr[p]=x++)"\">gt5 '"$version</a> ($P)"':   [cut:"
	print '$CUT_AT'"% depth:"'$MAX_DEPTH'" lines:"'$ML'"]"

	if ( pf )
	{
	    for ( i = 0; i < 16; ++i )
		print "\n\n\n\n\n\n\n\n"
	}
    }

    $1 > 0 {
	# Division by zero ...

	if ( op != $3 )
	    directory(op, ops, opos, 1)

	ops  = $1
	opos = $2
	op   = $3
	pz   = 100 * $4 / ops
	e    = 0

	while ( j = index( substr($6, e + 1), "/") )
	    e += j;

	s = substr($6, e + 1)
	children[$3]++

	if ( children[$6] )
	{
	    s = "<a href=\"#"nr[$6]"\">"s"</a>/"
	}
	else if ( "'"$LINK_FILES"'" )
	{
	    h = "<a href=\"'"$PWD"'" substr($6, 2)
	    s = h"\"><font color='$FILE'>"s"</font></a>"
	}

	if ( pz >= '"$CUT_AT"'  &&  $6 != "." )
	{
	    printf "<font color='$SIZE'>%6s</font> ",i2h($4)
	    printf "[<font color='$PC'>%5.2f%%</font>] ./%s  ", pz, s

	    if ( '$DIFFS' )
	    {
		if ( $5 == -1 )
		    printf "<font color='$NEW'>new</font>"
		else
		{
		    if ( $4 < $5)
			printf "<font color='$LESS'>-%s</font>", i2h( $5 - $4)

		    if ( $4 > $5)
			printf "<font color='$MORE'>+%s</font>", i2h( $4 - $5)
		}
	    }

	    printf "\n"
        }
    }

    END {
	directory(op, ops, opos)

	if ( !x )
	    print "     directory seems to be empty\n\n\ngt5 v'"$version ($P)"'"

	printf "<html><head><meta http-equiv=\"Content-Type\" content=\"text/html;"
	printf " charset='${GT5_CHARSET:-$LANG}'\"></head><body bgcolor='$BG'"
	print " text='$TEXT' link='$DIR'><pre>"
    }
' |
  Debug_pipe 4a |
  sed 's,'"$SPACE"', ,g;s,'"$TAB"',   ,g' |
  Debug_pipe 4b |
  head -c 16m |
  Debug_pipe 4c |
  tac |
  Debug_pipe 4d \
  > $DATA #max 16MB HTML

Verbose "Generating HTML ... done (size: $(Substr_before "$(ls -sh "$DATA")" " "))"

if [ "$SAVE_DU" ] ; then

  touch "$GENERIC_STORE.$$.tmp" || exit $?

  gzip < "$TMP/this" > "$GENERIC_STORE.$$.tmp" &&
    mv "$GENERIC_STORE.$$.tmp" "$GENERIC_STORE"

  ESCAPED_GS=$(Replace_all "$GENERIC_STORE.$$.tmp" "\"" "\\\"")
fi

Verbose "Starting browser ($(Replace "$BROWSER" "$HOME/" "~/")) ... "

cp -l $DATA $HOME/.gt5.html 2> /dev/null || cp $DATA $HOME/.gt5.html || exit $?

${DEBUG+echo} $BROWSER $DATA

# End of file
