###################################################################### # # ScanGraphics.pl - the much-requested follow-up to ScanGraphics, the # program for ripping off peoples' pictures from their web sites. # # You will need perl to run this program. Assuming you have windows, # get perl from www.activestate.com. If you have a better o/s, you # probably already have perl. The web site is www.perl.com # # Usage: # c:\perl\perl.exe ScanSite.pl http://www.netlife.lu/ # (perl program) (this script) (URL of web site) # # Program to scan a web site, find all the graphics from it, and put # links to them into an HTML page in the current directory names gfx.htm # # This program does not download the graphics, just lists them. Open the # gfx.htm file in any web browser to view the graphics. # # This is not a polite program. It ignores any robots.txt files or other # robot directives, and it loads pages as fast as your network connection # will allow. This will seriously annoy many server administrators, so # use this program infrequently with caution. For large-scale projects, # I suggest you put a wait(1 minute) command in, and run a version of this # program in a different process for each web site you're interested in, # then leave it running overnight. # # Technical: # * Follows all 'downward' links to find more web pages # (i.e. not out-bound links, not ../ links, and not / links) # # * Similar rules for finding photos - it won't index any pics from another # web site, or pics which are not in a lower-level directory than the page # being indexed. This is designed to stop it indexing (a) adverts # (b) bullets (c) any other crap on a site's /gfx directory. # # * Flushes the output HTML file after every 10 pages indexed. # # Oliver White, August 2000, www.blibbleblobble.co.uk/Programming/Perl # ################################################################################## $true=1;$false=0; #Options $OutputFilename = "gfx.htm"; $FlushOutput = $true; #Internet library use LWP::Simple; #Get command line argument $BaseFile = shift(); #Turn into array @Files = ($BaseFile); open (GFX, ">$OutputFilename") || die(); $Title = "Graphics from $BaseFile"; #HTML header print GFX "\n
\nCreated by ScanGraphics.pl, from BlibbleBlobble.co.uk
\nIf you have problems persuading IE to load images, try navigator
\n\n\n\n"; close GFX;