#!/bin/sh # ~jhs/bin/.sh/web_cp_remote # See also wed_cp_local # Link setup: /site/domain/berklix/usr/local/www/backup/Makefile # /site/usr/local/www/Data/virtual/berklix.net/backup -> # /usr/local/www/backup # Called from /var/jhs/crontab on both # www.berklix.org & user.js.berklix.net # See also: # http://www.freebsd.org/cgi/cvsweb.cgi/ports/www/httrack/ # http://www.httrack.com/ # Warning: This used 120M of swap, & gate=park ran out, it also makes gate # slow. So as it also runs on internal host, using gate as a proxy, # just run it on internal host=user, # which has the other advantage: do not need to keep both alternate # gates up to date. # 45 mins on a repeat run when already up to date. cd /usr/backup/www # --> /usr/local/www/backup # remote: ln -s ../local/www/backup /usr/backup/www # Only on local: ln -s /usr/backup/www /usr/local/www/backup if test $? -eq 0 ; then true # echo "wed_cp_remote cd succeeded on `hostname -s` `date`" | \ # mail -s "Cron: `hostname -s`" jhs else echo "wed_cp_remote cd failed on `hostname -s` `date`" | \ mail -s "Cron: `hostname -s`" jhs exit 1 fi domain1=`hostname -s` domain2=`hostname` domain=`hostname | sed -e s/${domain1}.//` ht="nice /usr/local/bin/httrack" # ht="nice /usr/local/bin/httrack --verbose --debug-log" sl="sleep 6" # sleep is so if I hit with ^C I dont have to manually do a load # of key strokes about 6 times to finally escape the shell. # See ideas for parameters in ~/bin/.sh/web_cp_0_inc # but as this is remote, set them all here, in case something # goes astray & I might otherwise not notice. params="" # httrack --help # -AN maximum transfer rate in bytes/seconds (1000=1KB/s max) # (--max-rate[=N]) # See performance measurements in ~/bin/.sh/web_cp_0_inc params="$params -A3000" # Keep low to avoid loading BSN # # Remember bit rate = 8 *, + protocol overhead. # # Preserve server response. params="$params --stay-on-same-address" params="$params --stay-on-same-domain" # params="$params --quiet" # params="$params --verbose" # for debug params="$params --mirror" # ? Insert "Mirrored from..." params="$params --update" # To reduce traffic params="$params -X" # purge old files # params="$params --robots=0" # ignore robots.txt params="$params -c3" # Keep low, use less swap, # # Preserve server response. if [ "$domain" = berklix.org ]; then # Max bytes per job params="$params -M10000000" # Less, to be gentle on server sites. fi if [ "$domain" = js.berklix.net ]; then # Allow more data as I can load my flat rate DSL. # www.uk.freebsd.org # More than 100000000 bytes have been transfered.. giving up) - OK # 100,000,000 params="$params -M1000000000" fi if [ "$domain" = js.berklix.net ]; then # Allow more data as I can load my flat rate DSL. # www.uk.freebsd.org # More than 100000000 bytes have been transfered.. giving up) - OK # 100,000,000 params="$params -M1000000000" if [ "${domain1}" = "mart" ]; then # Max bytes per job echo "Proxy not needed on gateway, but you may need lots of swap" elif [ "${domain1}" = "park" ]; then # Max bytes per job echo "Proxy not needed on gateway, but you may need lots of swap" else params="$params -P gate:80" echo "Proxy is set to gate:80" fi fi if [ "$domain" = berklix.org ]; then # { Remote servers. # du -s -k @ 2009.05: 794602 BAFUG $sl; $ht $params -M9000000 -O geoffharries.com http://www.geoffharries.com # 8M $sl; $ht $params -M9000000 -O the-phoney-photon.com http://www.the-phoney-photon.com # 4M # ---------- # For Phillips domain lists see also: # /site//site/usr/local/www/Data/virtual/berklix.net/index.lmth # /site/domain/berklix/usr/local/www/backup/index.lmth # ~jhs/bin/.sh/web_cp_remote $sl; $ht $params -M2000000 -O a1med.co.uk http://www.a1med.co.uk # 2M $sl; $ht $params -M2000000 -O a1med.net http://www.a1med.net # 2M $sl; $ht $params -M120000000 -O cyberknifeservice.com http://www.cyberknifeservice.com # 34M $sl; $ht $params -M70000000 -O mediluxhealth.net http://www.mediluxhealth.net # 2M $sl; $ht $params -O mediluxhealthcare.in http://mediluxhealthcare.in # 20M # 2010.01 http://mediluxhealth.net/mhl_web_sites.html lists sites that could be backed up/mirrored. $sl; $ht $params -M70000000 -O mediluxprofessional.net http://mediluxprofessional.net # 56M $sl; $ht $params -M120000000 -O mhlclinics.com http://www.mhlclinics.com # 123M $sl; $ht $params -M120000000 -O mhldialysis.com http://www.mhldialysis.com # 39M $sl; $ht $params -M90000000 -O ppmconsult.co.uk http://www.ppmconsult.co.uk # 23M $sl; $ht $params -M90000000 -O ppmconsult.com http://www.ppmconsult.com # 23M $sl; $ht $params -M30000000 -O surfacevision.com http://user.surfacevision.com # 14M # ----------- fi # } date > Backup_date echo "Built by: http://www.berklix.com/~jhs/bin/.sh/`basename $0`" \ >> Backup_date # echo "wed_cp_remote finished on `hostname -s` `date`" | \ # mail -s "Cron: `hostname -s`" jhs exit 0 # Other HTML site copying tools apart from htttrack: # pavuk < ernst # spider < ernst # webcopy # wget < gary