Generated: Tue Feb 2 17:54:58 2010 from tidycmp02.pl 2007/06/08 10.6 KB.
#!/Perl # tidycmp02.pl # ############################################################################ # AIM: To DOWNLOAD the accessibility table from the web site # http://www.aprompt.ca/Tidy/accessibilitychecks.html # parse the html, extracting the HTML test file link # Download the link, advise if FAILED, # else write the file to an OUTPUT folder, # converting the line endings to DOS line endings ... # and compare its contents to Tidy's accesscases.txt # NOTE: With $dbg1 == 0, there can be quite LONG delays before NEXT output ... # Likewise if $dbg2 == 0, and/or $dbg3 == 0 - it looks like NOTHING is happening!!! # ############################################################################# use strict; use warnings; use LWP::Simple; require "logfile.pl" or die "Missing logfile.pl ...\n"; # my simple log file and some other utility subs require "htmltools.pl" or die "Missing htmltools.pl ...\n"; # log file stuff my ($LF); my $outfile = 'temp'.$0.'.txt'; my $outfil1 = 'temp1'.$0.'.htm'; # program variables my $download = 1; # do the ACTUAL downloads, my $dotidytest = 0; # compare with Tidy file ... # or use locally saved file after first download # online source my $site = 'http://www.aprompt.ca/Tidy/'; my $URL = $site . 'accessibilitychecks.html'; # local HDD source ###my $src_folder = "F:\\Gtools\\tidyproj\\tidycvs6-2\\test\\"; my $src_folder = "F:\\FGCVS\\tidy\\test\\"; my $in_file = $src_folder.'accesscases.txt'; my $in_folder = $src_folder."accessTest\\"; my $out_folder = 'tmp6'; # and output FOLDER, for download my $new_out = 'tempaccess.txt'; my @tests = (); my $text = ''; my $tcnt = 0; my @arr = (); my $dtext = ''; my $line = ''; my @lines = (); my $tln = ''; my $tlcnt = 0; my @mdarr = (); my @hrefs = (); my $thrftxt = ''; my $lhrftxt = 0; my $thrffil = ''; my @newtest = (); my @desc = (); my $lcnt = 0; my $cnt = 0; my $we = ''; my $test = ''; my $lev = 0; my $href = ''; my $href2 = ''; my $flip = 0; my $fnd = 0; my $dsc = ''; my $tstcnt = 0; my $ntcnt = 0; my $dtcnt = 0; my $wrtncnt = 0; my $dsccnt = 0; my $msg = ''; # information collected my @zeroonline = (); my @missingcvs = (); my @difflevels = (); # debug my $dbg1 = 0; # additional diagnostic output my $dbg2 = 0; # output the test cases read in ... my $dbg3 = 0; # output information when found ... ####################################################### ### main program open_log($outfile); prt( "$0 ... Hello, World...\n" ); if ($download) { prt("Fetching text from $URL ...\n"); $text = get("$URL"); # this assumes CR line endings ###@arr = split("\r", $text); ##$dtext = join( "\n", @arr ); ## so without assumption if (defined $text) { $dtext = force_unix_le($text); $tcnt = length($text); $dtcnt = length($dtext); } else { $text = ''; $dtext = ''; $tcnt = 0; $dtcnt = 0; } prt( "Got $tcnt ($dtcnt) characters from URL $URL ...\n"); write2file($dtext,$outfil1); @arr = split( "\n", $dtext ); } else { open INF, "<$outfil1" or mydie( "ERROR: Unable to open [$outfil1] ... $1\n" ); @arr = <INF>; # slurp it all close INF; } if ($dotidytest) { prt("Openning the compare file [$in_file] ...\n"); open INF, "<$in_file" or mydie( "ERROR: Unable to open [$in_file] ... $1\n" ); my @tmp = <INF>; # slurp it all close INF; prt( "Got ".scalar @tmp." from $in_file ... putting into a multi-dimensional array ...\n" ); foreach $line (@tmp) { $tln = trimall($line); if (length($tln)) { push(@lines, $tln); my @ts = split(" ",$tln); if (scalar @ts == 3) { push(@mdarr, [ $ts[0], $ts[1], $ts[2], 0 ]); } else { prt( "WARNING: [$tln] did not split correctly ...\n" ); } } } $tlcnt = scalar @mdarr; prt( "Got $tlcnt (".(scalar @lines).") from $in_file ...\n" ); for (my $i3 = 0; $i3 < $tlcnt; $i3++) { $msg = $mdarr[$i3][0] . ' ' . $mdarr[$i3][1] . ' ' . $mdarr[$i3][2]; prt( "$msg\n" ) if ($dbg2); } } ###my $etext = htmlexpand($text); ###my $ctext = htmlcleanall($etext); ##open WOF, ">$outfil1" or mydie("ERROR: Unable to open $outfil1 - $!\n"); $lcnt = scalar @arr; prt( "Processing $lcnt lines ...\n" ); # expect something like .............. # Error number [13.2.1.3] - Priority 2 # or # Warning number [7.4.1.1] - Priority 2 # Warning number [1.1.1.2] - Priority 1 # All images require text equivalents but "alt" text must also meet ... # # Testfile 1.1.1.f2: suspicious "alt" text (filename) # View testfile source = link # Testfile = link # .................................... foreach $line (@arr) { ## print WOF $line."\n"; $tln = trimall($line); $tln = removetag($tln, 'b'); $tln = removetag($tln, 'br'); ##if ($line =~ /(Error|Warning)\s+\[(\d+\.\d+\.\d+\.\d+)\]\s+-\s+Priority\s+(\d{1})/) { ##if ($tln =~ /(Error|Warning)\s+number\s+/i) { ##if ($tln =~ /(Error|Warning)\s+number\s+\[(.*)\]/i) { if ($tln =~ /(Error|Warning)\s+number\s+\[(.*)\].+Priority\s+(\d+)/i) { $cnt++; $we = $1; $test = $2; $lev = $3; ##prt( "[$2] $tln\n" ); ##prt( "$cnt [$we] [$test] [$lev]\n" ); push(@tests, [$test, $lev, $we]); $flip = 0; } elsif ($tln =~ /href=["'](\S+)["']./i ) { if ($cnt) { my $hrf = $1; if ($flip) { if ($flip == 1) { $href = $site . $hrf; if ($download) { prt( "Moment ... loading [$href] ...\n" ); $thrftxt = get($href); if (defined $thrftxt) { $lhrftxt = length($thrftxt); } else { $thrftxt = ''; $lhrftxt = 0; } if ($lhrftxt) { ###$thrftxt =~ s/\r/\r\n/gm; ###$thrftxt =~ s/\r/\n/gm; $thrftxt = force_unix_le($thrftxt); $thrffil = $test; $thrffil =~ s/\./-/g; $thrffil = $out_folder . '/' . $thrffil . '.html'; write2file( $thrftxt, $thrffil ); $wrtncnt++; # count another WRITTEN prt( "[$test] Test HREF=\"$href\" length $lhrftxt ... written [$thrffil]\n" ) if ($dbg1); } else { $msg = "[$test] Test HREF=\"$href\" length is ZERO - CHECK ME! ..."; prt( "$msg\n" ) if ($dbg3); push(@zeroonline, $msg); } } else { # no download done ... } } else { prt( "[$test] CHECK ME HREF=\"$hrf\" \n" ); } } else { $href2 = $hrf; prt( "[$test] View HREF=\"$href2\"\n" ) if ($dbg1); } $flip++; } } elsif ($tln =~ /Testfile\s+\d+.+:\s+(.*)/) { # like - Testfile 1.1.1.f1: <img> missing "alt" text my $ds = $1; $ds =~ s/</</g; $ds =~ s/>/>/g; $ds =~ s/"/'/g; push(@desc, [$test, $ds]); prt( "[$test] Description=[$ds]\n" ) if ($dbg1); } } prt( "DONE processing $lcnt lines ...\n" ); $tstcnt = scalar @tests; $dsccnt = scalar @desc; prt( "Written $wrtncnt new files ... Got $tstcnt test sets ... $dsccnt desciptions ...\n" ); ##close WOF; for (my $i = 0; $i < $tstcnt; $i++) { $we = $tests[$i][2]; $test = $tests[$i][0]; $lev = $tests[$i][1]; $fnd = test_in_lines($test); $dsc = find_desc($test); my ($tf, $tff); $tf = $test; $tf =~ s/\./-/g; $tff = $in_folder . $tf . ".html"; if ($fnd) { $tln = $lines[$fnd-1]; ###my $tf = $mdarr[$fnd-1][0]; my $tc = $mdarr[$fnd-1][1]; ###my $tff = $in_folder . "\\" . $tf . ".html"; if (-f $tff) { my @tmparr = split(" ", $tln); if (scalar @tmparr == 3) { my $lev2 = $tmparr[2]; if ($lev2 == $lev) { prt( "[$test] [$lev] [$tln] [$tc] ok\n" ) if ($dbg1); } else { $msg = "[$test] [$lev] [$tln] [$tc] ok BUT different level [$lev2] ..."; push(@difflevels, $msg); prt( "$msg\n" ) if ($dbg3); } push(@newtest, [$tf, $test, $lev, $we, $dsc]); } else { mydie( "[$test] [$lev] [$tln] [$tc] ok BUT NO LEVEL COMPARE\n" ); } } else { prt( "[$test] [$lev] [$tln] [$tc] missing [$tff]?\n" ); push(@newtest, [$tf, $test, $lev, $we, $dsc]); } } else { if (-f $tff) { $msg = "NOT FOUND [$test] [$lev] BUT found [$tff]"; } else { $msg = "NOT FOUND [$test] [$lev]"; } push(@missingcvs, $msg); prt( "$msg\n" ) if ($dbg3); } } # output warning information, if NOT output during processing if (!$dbg3) { # no output during processing $cnt = scalar @zeroonline; if ($cnt) { prt( "Count $cnt file(s) appear MISSING from on-line site ...\n" ); foreach $msg (@zeroonline) { prt( "$msg\n" ); } } $cnt = scalar @missingcvs; if ($cnt) { prt( "Count $cnt file(s) appear MISSING from CVS download ...\n" ); foreach $msg (@missingcvs) { prt( "$msg\n" ); } } $cnt = scalar @difflevels; if ($cnt) { prt( "Count $cnt item(s) appear to have DIFFERENT priority ...\n" ); foreach $msg (@difflevels) { prt( "$msg\n" ); } } } $ntcnt = scalar @newtest; prt( "\nOutputting $ntcnt tests to [$new_out] ...\n" ); open OUTF, ">$new_out" or mydie( "ERROR: Unable to open $new_out ...$! \n" ); for (my $i = 0; $i < $ntcnt; $i++) { print OUTF $newtest[$i][0] . ' ' . $newtest[$i][1] . ' ' . $newtest[$i][2]; print OUTF ' ' . $newtest[$i][3] . ' ' . $newtest[$i][4] . "\n"; } close OUTF; ##system($outfil1); close_log($outfile,1); exit(0); ### push(@desc, [$test, $dsc]); sub find_desc { my ($tst) = shift; my $d = 'NOT FOUND'; my $ct = scalar @desc; for (my $i2 = 0; $i2 < $ct; $i2++) { if ($desc[$i2][0] eq $tst) { $d = $desc[$i2][1]; last; } } return $d; } ### file test level ### push(@mdarr, [ $ts[0], $ts[1], $ts[2], 0 ]); sub test_in_lines { my ($tst) = shift; my $f = 0; my $ct = 0; my $ln = ''; ###prt( "Finding [$tst] ...\n" ); for (my $i2 = 0; $i2 < $tlcnt; $i2++) { $ct++; ##my $ts = $mdarr[$i2][1]; ##prt( "Compare with [$ts] ...\n" ); if ($mdarr[$i2][1] eq $tst) { $f = $ct; last; } } return $f; } sub trimall { my ($ln) = shift; chomp $ln; $ln =~ s/\r$//; $ln =~ s/\t/ /g; while ($ln =~ /\s\s/) { $ln =~ s/\s\s/ /g; } while ($ln =~ /^\s/) { $ln = substr($ln,1); } while ($ln =~ /\s$/) { $ln = substr($ln,0, length($ln) - 1); } return $ln; } sub force_unix_le { my ($dtx) = shift; my $ntx = ''; my $len = length($dtx); for (my $i = 0; $i < $len; $i++) { my $ch = substr($dtx,$i,1); if ($ch eq "\r") { # if CR, check for CR/LF $i++; # move to next char if ($i < $len) { # if length $ch = substr($dtx,$i,1); if ($ch ne "\n") { # is is LF $ntx .= "\n"; # no, force LF to replace CR if ($ch eq "\r") { # but if it IS another CR $i--; # back up to collect this next; # and loop } } # else let this caracter be added } else { # last char $ch = "\n"; # add final LF } } $ntx .= $ch; } return $ntx; } # eof - tidycmp02.pl