#!/bin/sh # ~jhs/bin/.sh/web_cp_berklix # /site/usr/local/www/Data/virtual/berklix.net/backup -> # /usr/local/www/backup # Called from /var/jhs/crontab on both # www.berklix.org & user.js.berklix.net # See also: # http://www.freebsd.org/cgi/cvsweb.cgi/ports/www/httrack/ # http://www.httrack.com/ # Warning: This used 120M of swap, & gate=park ran out, it also make gate # slow. So as it also runs on internal host, using gate as a proxy, # just run it on internal host=user, # which has the other advantage: do not need to keep both alternate # gates up to date. cd /usr/local/www/backup # running: # cd /usr/local/www/backup ; mv a* f* gp* m* p* s* /usrb/backup/www # later: # cd /usr/local/www/backup ; mv * /usr/backup/www/ # rmdir /usr/local/www/backup # ln -s /usr/backup/www /usr/local/www/backup if test $? -eq 0 ; then true # echo "web_cp_berklix cd succeeded on `hostname -s` `date`" | \ # mail -s "Cron: `hostname -s`" jhs else echo "web_cp_berklix cd failed on `hostname -s` `date`" | \ mail -s "Cron: `hostname -s`" jhs exit 1 fi domain1=`hostname -s` domain2=`hostname` domain=`hostname | sed -e s/${domain1}.//` ht="nice /usr/local/bin/httrack" # ht="nice /usr/local/bin/httrack --verbose --debug-log" sl="sleep 6" # sleep is so if I hit with ^C I dont have to manually do a load # of key strokes about 6 times to finally escape the shell. params="" # httrack --help params="$params -A10000" # max 10 Kbyte/sec params="$params -a" # = --stay-on-same-address params="$params -d" # = --stay-on-same-domain # params="$params --quiet" params="$params -v" # for debug params="$params -w" # ? Insert "Mirrored from..." params="$params --update" # To reduce traffic params="$params -X" # purge old files # params="$params -s0" # ignore robots.txt if [ "$domain" = berklix.org ]; then # Max bytes per job params="$params -M10000000" # Less, to be gentle on server sites. fi if [ "$domain" = js.berklix.net ]; then # Allow more data as I can load my flat rate DSL. # www.uk.freebsd.org # More than 100000000 bytes have been transfered.. giving up) - OK # 100,000,000 params="$params -M1000000000" fi if [ "$domain" = js.berklix.net ]; then if [ "${domain1}" = mart ]; then # Max bytes per job echo "Proxy not needed on gateway, but you may need lots of swap" elif [ "${domain1}" = park ]; then # Max bytes per job echo "Proxy not needed on gateway, but you may need lots of swap" else params="$params -P gate:80" echo "Proxy is set to gate:80" fi fi if [ "$domain" = js.berklix.net ]; then #{ Local @ Holz. $sl ; $ht $params -O scanjet http://www.madole.net/scanjet/ # 4M $sl ; $ht $params -s0 -O gea-muc.de http://gea-muc.de # $sl ; $ht http://www.berklix.com -O berklix.com # Recusive on txt/ sym link $sl ; $ht http://misc.allbsd.de/Flyer/FreeBSD/PDF/en/ -O misc.allbsd.de http://www.allbsd.de/src/Flyer/FreeBSD/PDF/ # A mix of German French etc. $sl ; $ht $params -O freebsd.org http://www.de.freebsd.org # 870 Meg 2.5 hours minimum fi #} if [ "$domain" = berklix.org ]; then # { Remote servers. # ----------- $sl ; $ht $params -O scanjet http://www.madole.net/scanjet/ # 4M # ----------- # Phillip $sl ; $ht $params -M14000000 -O a1med.co.uk http://www.a1med.co.uk $sl ; $ht $params -M14000000 -O a1med.net http://www.a1med.net $sl ; $ht $params -M14000000 -O cyberknifeuk.net http://www.cyberknifeuk.net $sl ; $ht $params -M14000000 -O mediluxhealth.net http://www.mediluxhealth.net # Main $sl ; $ht $params -M14000000 -O mediluxprofessional.net http://mediluxprofessional.net # Main $sl ; $ht $params -M14000000 -O mediluxretail.co.uk http://www.mediluxretail.co.uk # Main $sl ; $ht $params -M14000000 -O ppmconsult.co.uk http://www.ppmconsult.co.uk # ----------- $sl ; $ht $params -M60000000 -O surfacevision.com \ http://user.surfacevision.com # Graham Backup # ----------- $sl ; $ht $params -M14000000 -O nostradamus-dimde.de \ http://www.nostradamus-dimde.de # Ernst's friend. - Backup # ----------- fi # } date > Date_Of_Backup echo "Built by: http://berklix.com/~jhs/bin/.sh/`basename $0`" \ >> Date_Of_Backup # echo "web_cp_berklix finished on `hostname -s` `date`" | \ # mail -s "Cron: `hostname -s`" jhs exit 0 # Other HTML site copying tools apart from htttrack: # pavuk < ernst # spider < ernst # webcopy # wget < gary