#!/bin/bash

################################################################################
#                                                                              #
# ViaThinkSoft - intelligent software for everyone                             #
#                                                                              #
# File:         /usr/bin/vdown                                                 #
# Description:  ViaThinkSoft easy downloader script                            #
# License:      LGPL                                                           #
# Author:       Daniel Marschall                                               #
# Version:      1.3                                                            #
# Last change:  2014-01-24                                                     #
#                                                                              #
# Downloads a file and respects Modified-Since resp. ETag Useful e.g. for      #
# monitoring large online files via cronjob to safe traffic.                   #
#                                                                              #
# Changelog:                                                                   #
#                                                                              #
# 2012-01-23  -  1.0   -  daniel  -  Release                                   #
# 2012-01-24  -  1.1   -  daniel  -  Added ETag Support                        #
# 2012-01-24  -  1.2   -  daniel  -  Added last_header()                       #
# 2012-01-29  -  1.2.1 -  daniel  -  Small fix for temp files                  #
# 2012-11-30  -  1.2.2 -  daniel  -  Returns error code of cURL now            #
# 2014-01-24  -  1.3   -  daniel  -  tmp files now in the dest dir, because of #
#                                    size limitation of /tmp vs big downloads  #
#                                                                              #
################################################################################

function last_header() {
	# A headerdump generated by curl contains multiple headers if "redirect follow" is enabled.
	# This script filters the last headers in the header dump file

	split=();
	newl=0;
	while read line
	do
		line=$( echo "$line" | tr -d "\r" );
		if [ "$line" == "" ]
		then
			newl=1;
		else
			if [ $newl -eq 1 ]
			then
				split=();
				newl=0;
			fi
			split=("${split[@]}" "$line");
		fi
	#done < "$1"
	done

	for x in "${split[@]}"
	do
		echo "$x";
	done
}

if [ $# -lt 2 ]
then
	echo "Syntax: $0 <url> <output_file> [<additional curl params>]";
	exit 2;
fi

URL="$1";
shift

OUTPUT="$1";
shift

HEADERDUMP="${OUTPUT}_header";

TMPDIR=$( dirname "$OUTPUT" )

if [ ! -d "$TMPDIR" ]; then
	TMPDIR="/tmp"
fi

TMP_HEADER_DUMP=$( mktemp --tmpdir="$TMPDIR" --suffix="_vdown" );
test -z "$TMP_HEADER_DUMP" && TMP_HEADER_DUMP="/tmp/tmp_vdown_headerdump$$";
touch "$TMP_HEADER_DUMP";

TMP_DOWNLOADED=$( mktemp --tmpdir="$TMPDIR" --suffix="_vdown" );
test -z "$TMP_DOWNLOADED" && TMP_DOWNLOADED="/tmp/tmp_vdown_downloaded$$";
touch "$TMP_DOWNLOADED";

if [ ! -f "$OUTPUT" ]
then
	if [ -f "$HEADERDUMP" ]
	then
		rm "$HEADERDUMP";
	fi
fi

ADDITIONAL_PARAMS=("$@");

if [ -f "$HEADERDUMP" ]
then
	# Take last (e.g. dump contains multiple headers when redirects are followed by -L)
	LASTMOD=$( cat "$HEADERDUMP" | last_header | grep 'Last-Modified:' | cut -d ' ' -f 2-7 | tr -d '\r' | tr -d '\n' );
	if [ "$LASTMOD" != "" ]
	then
		ADDITIONAL_PARAMS=("${ADDITIONAL_PARAMS[@]}" "--time-cond" "\"$LASTMOD\"");
	fi

	# ETag (außer schwache) herausfinden
	ETAG=$( cat "$HEADERDUMP" | last_header | grep 'ETag:' | grep -v 'W/' | cut -d '"' -f 2 | tr -d '\r' | tr -d '\n' );
	if [ "$ETAG" != "" ]
	then
		ADDITIONAL_PARAMS=("${ADDITIONAL_PARAMS[@]}" "-H" "If-None-Match: \"$ETAG\"");
	fi
fi

# Datei herunterladen
# Bug in CURL?! -L cannot be used together with -o (output goes to stdout)
# TODO: report bug
curl -fs -L "${ADDITIONAL_PARAMS[@]}" -D "$TMP_HEADER_DUMP" "$URL" > "$TMP_DOWNLOADED"

CURL_RET=$?;
if [ $CURL_RET -ne 0 ]
then
	rm "$TMP_DOWNLOADED";
	rm "$TMP_HEADER_DUMP";
	# exit 1;
	exit $CURL_RET;
fi

cat "$TMP_HEADER_DUMP" | grep -ie "^HTTP/1\.[01] 304" > /dev/null

if [ $? -eq 0 ]
then
	# Es ist ein 304 - Not Modified
	# echo "Not Modified";

	# TODO: das wäre besser
	# touch "$HEADERDUMP";
	touch "$OUTPUT";

	rm "$TMP_HEADER_DUMP";
	rm "$TMP_DOWNLOADED";
else
	# File was downloaded
	# echo "File downloaded";

	# mv "$TMP_HEADER_DUMP" "$HEADERDUMP";
	# chown nicht verändern...
	cat "$TMP_HEADER_DUMP" > "$HEADERDUMP";
	rm "$TMP_HEADER_DUMP";

	# mv "$TMP_DOWNLOADED" "$OUTPUT";
	# chown nicht verändern...
	cat "$TMP_DOWNLOADED" > "$OUTPUT";
	rm "$TMP_DOWNLOADED";
fi

exit 0;
