#! /bin/sh # # webwatcher.sh - Command line utility for tracking changes on webpages # Copyright (C) 2002-2003 Tommi Saviranta # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # # # Version: webwatcher.sh v0.2.2 22-Jan-2003 tsaviran@cs.helsinki.fi BROWSER="links" BOPTIONS="-dump" MAILTO="user@host" DIFFDEBUG=1 # Go to homedir cd `echo $0 | sed -e 's/\(.*\)\/.*/\1/'` URL="" MODE="" SUBJECT="" GREP="" OPTIONS="" NUSER=USER test "x$USER" == "x" && NUSER="dummy" TEMPFILE1="/tmp/.webwatcher.1.$NUSER" # We could use /bin/tempfile TEMPFILE2="/tmp/.webwatcher.2.$NUSER" # We could use /bin/tempfile TEMPFILE3="/tmp/.webwatcher.3.$NUSER" # We could use /bin/tempfile TEMPFILE4="/tmp/.webwatcher.4.$NUSER" # We could use /bin/tempfile if [ ! -d cache ]; then if [ -e cache ]; then echo "File 'cache' exists, but it's not a directory." exit 1 fi mkdir cache fi test ! -f cache/index && touch cache/index test ! -f cache/count && echo "0" >cache/count for LINE in `cat sites | tr ' ' '§'`; do # Read lines, receive data etc. LINE=`echo $LINE | tr '§' ' '` CMD=`echo $LINE | cut -d ' ' -f 1` DATA=`echo $LINE | cut -d ' ' -f 2-` test "x$CMD" == "xurl:" && URL="$DATA" test "x$CMD" == "xmode:" && MODE="$DATA" test "x$CMD" == "xsubject:" && SUBJECT="$DATA" test "x$CMD" == "xgrep:" && GREP="$DATA" test "x$CMD" == "xoptions:" && OPTIONS="$DATA" test "x$CMD" == "xbrowser:" && BROWSER="$DATA" test "x$CMD" == "xmailto:" && MAILTO="$DATA" # We got all we wanted. if [ "x$CMD" == "xok" ]; then # Make sure this file is in use, too touch $TEMPFILE4 # See if page has been watched before CACHED=`grep -- "$URL " cache/index | cut -d ' ' -f 2` REPORT=1 if [ "x$CACHED" == "x" ]; then # We're watching this page for the first time CACHED=`cat cache/count` touch cache/cached.$CACHED echo "$URL $CACHED" >>cache/index expr $CACHED + 1 >cache/count # Don't report these changes REPORT=0 fi CACHED="cache/cached.$CACHED" # Fetch the page $BROWSER -source "$URL" >$TEMPFILE1 2>/dev/null # Something went wrong while fetching... if [ $? -ne 0 ]; then # Simpliest way to ignore this is to use cached file # as "current" version of the page. cp $CACHED $TEMPFILE1 fi # If we're grepping something, grep it. if [ "x$GREP" != "x" ]; then grep $OPTIONS "$GREP" $TEMPFILE1 >$TEMPFILE2 grep $OPTIONS "$GREP" $CACHED >$TEMPFILE3 else cp $TEMPFILE1 $TEMPFILE2 cp $CACHED $TEMPFILE3 fi # Compare... diff -q $TEMPFILE3 $TEMPFILE2 1>/dev/null 2>&1 if [ $? -eq 1 ]; then # The do differ. test "x$MODE" == "xascii" && \ $BROWSER $BOPTIONS \ file://$HOMEDIR/$TEMPFILE1 \ >$TEMPFILE4 test "x$MODE" == "xhtml" && \ cp -f $TEMPFILE1 $TEMPFILE4 test "x$MODE" == "xdiff" && \ diff $TEMPFILE2 $TEMPFILE3 >$TEMPFILE4 test "x$MODE" == "xbrief" && echo "Page changed." >$TEMPFILE4 if [ "x$DIFFDEBUG" != "x" ]; then echo "-- differences --" >>$TEMPFILE4 diff $TEMPFILE2 $TEMPFILE3 >>$TEMPFILE4 fi test $REPORT -eq 1 && \ mail -s "$SUBJECT" $MAILTO <$TEMPFILE4 fi # Copy new file to replace old one and delete temporary files cp $TEMPFILE1 $CACHED rm $TEMPFILE1 $TEMPFILE2 $TEMPFILE3 $TEMPFILE4 URL="" MODE="" SUBJECT="" GREP="" OPTIONS="" fi done