Generated: Tue Feb 2 17:54:31 2010 from fav-04.pl 2006/07/15 17 KB.
#!/Perl # AIM: To read the Internet Favorites, and produce # a HTML document, with links and description # 2006.07.11 - switch link column, and add (B) broken, from c:\HOMEPAGE\Broken02.htm # update 2006.06.28 - weed out local references # Added a MAXIMUM width, so the table approximately 'fits' a 1024 wide screen # change to using '<base target="_blank">' # 2005.11.12 - works ok - geoff mclane # use File::stat; my $DT = '2006.07.15'; $VERSION = '0.4'; $PACKAGE = 'fav-04'; my $hvers = "<!-- P26.$DT - minor update -->\n"; $hvers .= "<!-- P26.2006.07.11 - update -->\n"; $hvers .= '<!-- p26.2005.11.11 - List of favorites in PRO-1 geoffmclane.com/favorites.htm -->'; print "$0 ... Hello, World ...\n"; if( !defined( $ENV{'USERPROFILE'} ) ) { print "Can NOT locate USERPROFILE in ENVironment!\n"; exit(1); } my $ff = $ENV{'USERPROFILE'} . '\\Favorites'; if( !( -d $ff ) ) { print "Folder $ff is NOT a directory!\n"; exit(2); } # set a sample maximum title, wrap start at -10 from this - original set at 60 # 12345678901234567890123456789012345678901234567890123456789012345678901234567890 # 1 2 3 4 5 6 7 my $maxtit = 'Domain Name Registration, Domain Transfe'; # rs. Your domain name search starts here.'; my $logfil = "temp.$0.txt"; my $htmfil = 'favorites.htm'; my @mths = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec); my $addfold = 1; my $sch = '™'; # avoid this character ... my @fav_exclude = ( 'https://geoffmclane.com:2083/frontend/x/index.html' ); my @fav_broken = ( 'http://a.ninemsn.com.au/b.aspx', 'http://blogs.msdn.com/nikolad/archive/2005/09/02/460368.aspx', 'http://code.jenseng.com/jenChat/', 'http://datacompression.info/JPEG.shtml', 'http://document.ihg.uni-duisburg.de/cgi-bin/mapserv40', 'http://drivers.soft32.com/index-2-12-110-0-4.html', 'http://flightgear.org/Downloads/scenery-0.9.5.html', 'http://free.compuserve.com/trycsfree/index2.adp', 'http://grass.ibiblio.org/grass57/index.html', 'http://home.exetel.com.au/atmint.exetel.com.au/2004SBTS.html', 'http://ourworld.compuserve.com/homepages/GEOFF_MCLANE', 'http://pubs.logicalexpressions.com/Pub0009/LPMArticle.asp', 'http://serenitysydney.com.au/', 'http://usa.asus.com/products/mb/socket478/p4c800-d/overview.htm', 'http://www.adg.dk/airport.asp', 'http://www.candleart.com.au/', 'http://www.cartexpress.com/', 'http://www.commerce-cgi.com/download.htm', 'http://www.compalseast.org.au/', 'http://www.dmartias.fr/mondial/', 'http://www.e-directory.org/download/list/modules.html', 'http://www.elanit.com.au/immediacy/main.asp', 'http://www.fgdc.gov/clearinghouse/clearinghouse.html', 'http://www.flightgear.org/~curt/Models/Special/Rascal110_2/', 'http://www.flightgear.org/~curt/Photos/KMHV/', 'http://www.flightgear.org/Downloads/scenery-0.9.7.html', 'http://www.flymig.com/iata/r/Country.Papua_New_Guinea.htm', 'http://www.frenchlinguistics.com/dictionary/', 'http://www.friendofflowers.com/images/famphots/famphot.php', 'http://www.interweb.com.au/', 'http://www.iridiumsoftsol.com/content.aspx', 'http://www.jobsearch.gov.au/', 'http://www.libsdl.org/index.php', 'http://www.linuxguruz.com/', 'http://www.megxon.com/products/S302/S302.htm', 'http://www.microsoft.com/downloads/details.aspx', 'http://www.microsoft.com/isapi/redir.dll', 'http://www.navigate.com.au/navigate/index.jsp', 'http://www.netopia.com/buy/download_promo.jsp', 'http://www.nottingham.ac.uk/~eazdluf/taxidraw.html', 'http://www.nottingham.ac.uk/~eazdluf/taxidraw/airportdata.html', 'http://www.ntsb.gov/', 'http://www.open-bits.org/browse.php', 'http://www.ossim.org/tiki-read_article.php', 'http://www.perldoc.com/perl5.8.0/lib.html', 'http://www.stockill.org/fgfsdb/models.php', 'http://www.wajb.freeserve.co.uk/codes.htm', 'http://www.web-developer-india.com/web/jscript/refp_10.html', 'http://www.worldofmaya.com/t_poly.html', 'http://www.worldzone.net/games/azrael_dark/PROJECT_ZERO/GMAX.html', 'http://x-plane.org/home/robinp/AptNavFAQ.htm', 'https://164.214.2.62/products/digitalaero/index.cfm', 'https://geoffmclane.com:2083/frontend/x/index.html', 'https://www.clickstart.com.au/capabiliti/menuscript.asp'); my @oth_broken = ( 'file:///cgi-sys/Count.cgi', 'file:///cgi-sys/guestbook.cgi', 'http://<!--', 'http://docs.rinet.ru:8083/WebPub/ch56.htm', 'http://emporium.turnpike.net/~viredit/emploi/cv/query.htm', 'http://ev.free2code.net/plugins/articles/read.php', 'http://geoffmclane.com/fgfs-003.htm', 'http://home.netscape.com/assist/net_sites/new_html3_prop.html', 'http://homepages.wmich.edu/~l0lazaro/perld/fileio.html', 'http://jobs.iconrec.com.au', 'http://lib.risk.ee/javanotes/c7/s6.html', 'http://perl.hamtech.net/prog/ch03_109.htm', 'http://tidy.sf.net/issue/1365706', 'http://vadivel.thinkingms.com/PermaLink.aspx', 'http://www.accuweather.com/adcbin/public/intlocal_index.asp', 'http://www.ao.net/~juang/IntroJava2/JavaIO/JavaIO.html', 'http://www.bradchoate.com/weblog/2002/08/12/mtmacro', 'http://www.cclabs.missouri.edu/things/instruction/perl/perlcourse.html', 'http://www.cruising.org/cvpc/cruiselines/DisplayShip.cfm', 'http://www.digistuff.com/story_photos.asp', 'http://www.flightgear.org/Downloads/scenery-0.9.7.html', 'http://www.hollandamerica.com/fivestarfleet/rotterdam.htm', 'http://www.jobsearch.gov.au/', 'http://www.libsdl.org/cvs.php', 'http://www.microsoft.com/downloads/details.aspx', 'http://www.microsoft.com/downloads/details.aspx ', 'http://www.neosoft.com/neosoft/man/perl.1.html', 'http://www.netacc.net/~poulsen/moonphase.html', 'http://www.netscape.com/navigator/', 'http://www.nottingham.ac.uk/~eazdluf/taxidraw.html', 'http://www.novell.com/products/netware4/quicklook.html', 'http://www.opengl.org/resources/libraries/glut.html', 'http://www.opengl.org/resources/libraries/glut/glut_downloads.html', 'http://www.reunir.com/fiche.asp', 'http://www.shfa.nsw.gov.au/content/home.cfm', 'http://www.stratus.com/products/vos', 'http://www.tek-tips.com/viewthread.cfm', 'http://www.x-plane.org/users/robinp', 'http://www.x-plane.org/users/robinp/', 'https://ccvs.cvshome.org/', 'https://www.cvshome.org/'); my $basedir = $ff; my $blen = length($basedir); my ($fn,$ffn,$LF,$HF); my @dirs = ($ff); my @fils = (); my @tblist = (); my @warnings = (); my $wmsg = ''; my $f_title = ''; my $f_link = ''; my $f_tlink = ''; my $f_data = ''; my ($f_fold, $f_tit); open $LF, ">$logfil" or die "Can NOT open LOG file $logfil!\n"; open $HF, ">$htmfil" or die "Can NOT open HTML file $htmfil!\n"; my $fcnt = scalar @fils; my $dcnt = scalar @dirs; my $maxwid = length($maxtit); #print "Found $fcnt files, and $dcnt directories ...\n"; while (scalar @dirs) { local @dir2 = @dirs; @dirs = (); while ($fn = shift @dir2) { do_dir($fn); } } $fcnt = scalar @fils; $dcnt = scalar @dirs; prt( "Total: $fcnt URL files ...\n" ); prt( "Maximum line length used = $maxwid ...\n" ); # choosing a DOCTYPE ##my $doctyp4 = '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">'; my $doctyp4 = '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">'; ##my $doctyp3 = '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">'; out_htm_head(); oh( '<table border="1" width="100%" summary="List of favorites - First column is the title, and the 2nd is link">' ); oh( ' <tr>' ); if ($addfold) { oh( ' <td><b>Folder</b></td>' ); } oh( ' <td><b>Title</b></td>' ); oh( ' <td><b>Link</b></td>' ); oh( ' <td><b>Date</b></td>' ); oh( ' </tr>' ); get_table_arr(); my $tcnt = scalar @tblist; for (my $i = 0; $i < $tcnt; $i++) { $f_title = $tblist[$i][0]; $f_link = $tblist[$i][1]; $f_tlink = $tblist[$i][2]; $f_date = $tblist[$i][3]; $f_fold = $tblist[$i][4]; $f_tit = $tblist[$i][5]; oh( ' <tr>' ); if ($addfold) { oh( " <td>$f_fold</td>" ); oh( " <td>$f_tit</td>" ); } else { oh( " <td>$f_title</td>" ); } oh( " <td><a href=\"$f_link\">$f_tlink</a></td>" ); oh( " <td>$f_date</td>" ); oh( ' </tr>' ); ###prt( "$i [".$tblist[$i][0].", ".$tblist[$i][1].", ".$tblist[$i][2].", ".$tblist[$i][3]."]\n" ); ###prt( "$i [$f_title, $f_link, $f_tlink, $f_date, $f_fold, $f_tit]\n" ); } oh( '</table>' ); out_htm_tail(); if (@warnings) { prt( "Repeating WARNINGS issues ...\n" ); foreach $wmsg (@warnings) { prt($wmsg); } } prt("Loading $htmfil ... may have to be closed to continue...\n"); close( $HF ); close( $LF ); system( $htmfil ); ##system( $logfil ); exit(0); ####################################################################### ### just subs below sub get_table_arr { prt( "Getting array of ".scalar @fils." files ...\n" ); foreach $fn (@fils) { # process each file local $FH; my $sb = stat($fn); my $tms = get_YYYYMMDD(scalar localtime $sb->mtime); if ( open( $FH, $fn ) ) { local @lns = <$FH>; # slurp in the lines local $sn = remdir($fn); # file name is the TITLE of the favorite ... ###prt( "Processing " . remdir($fn) . " of " . scalar @lns . " lines ...\n"); close( $FH ); local $line; my $fnd = 1; my $bkn = 0; # assume NOT broken link, per FP # get the FOLDER my $ind = rindex($sn, "\\"); my $fold = '.'; my $tit = $sn; if ($ind != -1) { $fold = substr($sn, 0, $ind); $tit = substr($sn, ($ind + 1)); } foreach $line (@lns) { chomp $line; if( $line =~ /^URL=/ ) { local $u = substr($line,4); ## ~ s/^URL=//; if (in_exclude($u)) { $fnd = 0; # avoid a WARNING ... last; } if (in_fav_broken($u) || in_oth_broken($u)) { $bkn = 1; } ##prt( "\"$sn\",$u\n" ); local $mu = max_sub2($u,$maxwid); $mu =~ s/&/&/g; $sn = max_sub($sn, $maxwid); # wrap text to max width $sn =~ s/&/&/mg; # possible MULTIPLE lines if (($fold eq 'Links') && (substr($tit,0,4) eq 'FIFA')) { prt( "Exception - changed [$sn] and [$tit] \n" ); $tit =~ s/$sch/™/g; $sn =~ s/$sch/™/mg; prt( "Exception - to [$sn] and [$tit] \n" ); } $u =~ s/&/&/g; $tit =~ s/&/&/g; if ($bkn) { ### $tms .= '<b>(B)</b>'; $sn = '<b>(B)</b> '.$sn; $tit = '<b>(B)</b> '.$tit; } push(@tblist, [$sn, $u, $mu, $tms, $fold, $tit]); ###prt( "push(\@tblist, [$sn, $u, $mu, $tms, $fold])\n" ); $fnd = 0; last; } } if ($fnd) { $wmsg = "WARNING: Did NOT find a URL line in [$fn] ...\n"; prt($wmsg); push(@warnings,$wmsg); } } else { $wmsg = "WARNING: Unable to open file [$fn] ...\n"; prt($wmsg); push(@warnings,$wmsg); } } prt( "Got array of ".scalar @tblist." items ...\n" ); } ## month to number sub mth_to_num { my ($mth) = shift; my $cnt = 0; ###prt( "Chk [$mth] " ); foreach my $m (@mths) { $cnt++; if ($m eq $mth) { ###prt( "Is $m - return $cnt\n" ); return $cnt; } } mydie( "ERROR: Returning 0!!! for [$mth]\n" ); return '??'; } sub get_YYYYMMDD { my ($tm) = shift; my @arr = split( / /, $tm ); # time of form 'Sat Mar 12 03:11:55 2005' my $ac = scalar @arr; my $doff = 2; my $yoff = 4; if ($ac == 5) { $doff = 2; $yoff = 4; } elsif ($ac == 6) { $doff = 3; $yoff = 5; } else { mydie( "ERROR: Time ($tm) did NOT split correctly!\n" ); } my $mn = mth_to_num( $arr[1] ); if ($mn < 10) { $mn = '0'.$mn; } my $dn = $arr[$doff]; if ($dn < 10) { $dn = '0'.$dn; } my $dtt = $arr[$yoff].'/'.$mn.'/'.$dn; # translated to 2005/03/12 return $dtt; } sub get_lists { ###foreach $fn (@files) { while ($fn = shift @_) { next if ($fn eq '.'); next if ($fn eq '..'); $ffn = $ff . '\\' . $fn; if( -d $ffn ) { push(@dirs, $ffn); } else { if ($fn =~ /\.url$/i) { push(@fils, $ffn); } else { prt( "Discarding file $ffn ...\n" ); } } } $fcnt = scalar @fils; $dcnt = scalar @dirs; prt( "Found $fcnt files, and $dcnt directories ...\n" ); } sub do_dir { local ($dn) = @_; print "Processing $dn ...\n"; opendir(DIRH, $dn); local @f = readdir(DIRH); closedir(DIRH); print "Found " . scalar @f . " entries ...\n"; $ff = $dn; get_lists(@f); } sub prt { my $msg = shift; print $msg; print $LF $msg; } sub max_sub2 { my ($ln, $max) = @_; if (length($ln) > ($max+5)) { $ln = substr($ln,0,$max) . '...'; } return $ln; } sub max_sub { my ($ln, $max) = @_; my $nln = $ln; if (length($ln) > $max) { my @arr = split(/ /,$ln); $nln = ''; my $bit = ''; my $bl = 0; my $sl = 0; my $sc = 0; foreach my $s (@arr) { $sl = length($s); $bl = length($bit); while ($sl > $max) { if ($bl) { $bit .= ' '; } $bit .= substr($s, 0, $max - $bl); $s = substr($s, $max - $bl); if (length($nln)) { $nln .= "<br>\n"; } $nln .= $bit; $bit = ''; $sl = length($s); $bl = length($bit); $sc = 0; } if ($bl) { if (( $bl + $sc + length($s) ) > $max ) { if (length($nln)) { $nln .= "<br>\n"; } $nln .= $bit; $bit = $s; $sc = 0; } else { $bit .= ' '; $sc++; $bit .= $s; } } else { $bit = $s; $sc = 0; } } if (length($bit)) { if (length($nln)) { $nln .= "<br>\n"; } $nln .= $bit; } } return $nln; } sub remdir { local ($f) = @_; local $b2 = quotemeta($basedir); ###$f =~ s/^$basedir//; ###$f = substr( $f, (length($basedir) + 1) ); ##$f = substr( $f, ($blen + 1), (length($f) - $blen - 5) ); $f =~ s/^$b2\\//; # remove beginning ... $f =~ s/\.url$//; # and remove tail return $f; ###return (max_sub($f, $maxwid)); } sub ohl { print $HF "\n"; } sub oh { local ($txt) = @_; print $HF $txt; ohl(); } sub out_htm_head { oh( $doctyp4 ); oh( '<html>' ); oh( '<head>' ); oh( "<title>List of Geoff Favorites</title>" ); oh( '<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">' ); oh( '<meta name="Author" content="Geoff Mclane">' ); oh( '<style type="text/css">' ); oh( '<!-- /* Style Definitions */' ); oh( 'body {' ); oh( ' background-image:url("clds3.jpg");' ); oh( ' margin: 0cm 1cm 0cm 1cm;' ); oh( '}' ); oh( 'h1{' ); oh( ' background:#efefef;' ); oh( ' border-style: solid solid solid solid;' ); oh( ' border-color:#d9e2e2;' ); oh( ' border-width:1px;' ); oh( ' padding:2px 2px 2px 2px;' ); oh( ' font-size:200%;' ); oh( ' text-align:center;' ); oh( '}' ); oh( '.ctr { text-align:center; }' ); oh( '.bld { font-weight:bold; }' ); oh( '-->' ); oh( '</style>' ); oh( '<base target="_blank">' ); # set so ALL open in 'New Window' oh( '</head>' ); oh( '<body>' ); oh( '<h1><a name="top"></a>List of Geoff Favorites</h1>' ); oh( '<p class="ctr"><a href="favorite.htm">back</a> <a href="home2.htm">home</a> ' ); oh( '<a target="_self" href="#bottom">bottom</a></p>' ); oh( '<p>This is a simple table, as at $DT, of my ever changing, personal <span class="bld">Favorites</span>. '); oh( 'It is autogenerated periodically, using a Perl script, in an attempt to keep it up to date ;=)) ' ); oh( 'It does contain some broken links, sites that have disappeared, but most are valid and current. ' ); oh( 'The base target has been set to _blank, so when a link is clicked, it should open in a NEW '); oh( 'browser page. While the link text is sometimes truncated, the underlying anchor reference '); oh( 'contains the full link ... Enjoy ...</p>' ); } sub out_htm_tail { oh( '<p><a name="bottom"></a>' ); oh( "This table is auto-generated from a Perl script, reading and analysing my 'Favorites' folder, " ); oh( "from the USERPROFILE given in the environment. Those marked with a <b>(B)</b> were <b>BROKEN</b> links "); oh( "at the last full verification done by FrontPage ... sometimes it is due to the fact that they are " ); oh( "secure sites (https), and sometimes due to the fact that the site, or at least that page, has since been pulled down, " ); oh( "but I have yet to delete this link from my personal 'Favorites' ... and just sometimes FrontPage " ); oh( "makes a mistake in its verification process, and/or the site has a redirection active!</p>" ); oh( '<p class="ctr"><a href="favorite.htm">back</a> <a href="home2.htm">home</a> ' ); oh( '<a target="_self" href="#top">top</a></p>' ); print $HF <<"EOF"; <p><a href="http://validator.w3.org/check?uri=referer"> <img src="images/valid-html401.gif" alt="Valid HTML 4.01 Transitional" height="31" width="88"></a></p> </body> $hverss </html> EOF } sub in_fav_broken { my ($h) = shift; foreach $l (@fav_broken) { if ($l eq $h) { return 1; } } return 0; } sub in_oth_broken { my ($h) = shift; foreach $l (@oth_broken) { if ($l eq $h) { return 1; } } return 0; } # my @fav_exclude = ( sub in_exclude { my ($h) = shift; foreach $l (@fav_exclude) { if ($l eq $h) { return 1; } } return 0; } #eof