Generated: Tue Feb 2 17:54:57 2010 from table2str.pl 2009/10/01 5 KB.
#!/perl -w # NAME: table2str.pl # AIM: VERY SPECIFIC - read a HTML file, extract the 'table', and write it as a structure... # 01/10/2009 geoff mclane http://geoffair.net/mperl use strict; use warnings; require 'logfile.pl' or die "Unable to load logfile.pl ...\n"; # log file stuff my ($LF); my $pgmname = $0; if ($pgmname =~ /\w{1}:\\.*/) { my @tmpsp = split(/\\/,$pgmname); $pgmname = $tmpsp[-1]; } my $outfile = "temp.$pgmname.txt"; open_log($outfile); my $in_file = 'C:\Documents and Settings\Geoff McLane\My Documents\MS\FOURCC_tidy.htm'; sub process_file($) { my ($fil) = @_; my @codes = (); if (open INF, "<$fil") { my @lines = <INF>; close INF; my $lncnt = scalar @lines; prt( "Doing $lncnt lines, from [$fil]...\n" ); my ($line,$i,$max,$cc, $tag, $in_table); for ($i = 0; $i < $lncnt; $i++) { $line = $lines[$i]; chomp $line; $line = trim_all($line); $lines[$i] = $line; } $line = join(" ",@lines); $line = trim_all($line); $max = length($line); prt( "Processing $max characters...\n" ); my ($in_tr, $in_td, $code, $tr_lns, $last, $tmp, $prev); my ($col2, $col3, $col4); $in_table = 0; $tag = ''; $in_tr = 0; $code = ''; $tr_lns = 0; $last = 0; $in_td = 0; $col2 = ''; $col3 = ''; $col4 = ''; for ($i = 0; $i < $max; $i++) { $cc = substr($line,$i,1); if ($cc eq '<') { if ( !($tag =~ /^br$/i) && ($in_td == 4)) { $last = -1; if (@codes) { $last = $codes[-1][1]; } $tmp = trim_all($code); if (length($tmp)) { if ($last == $tr_lns) { $prev = $codes[-1][0]; $prev .= $code; $codes[-1][0] = $prev; } else { $code = substr($code,1) while ($code =~ /^\s/); push(@codes, [$code, $tr_lns, $col2, $col3, $col4]); prt( "$code " ); $col2 = ''; $col3 = ''; $col4 = ''; } $code = ''; } else { $code = ''; } } $tag = ''; $i++; for (; $i < $max; $i++) { $cc = substr($line,$i,1); last if ($cc eq '>'); $tag .= $cc; if ($cc eq '"') { $i++; for (; $i < $max; $i++) { $cc = substr($line,$i,1); $tag .= $cc; last if ($cc eq '"'); } } } if ($tag =~ /^table/i) { $in_table = 1; prt("$i: Entered table...\n"); } elsif ($tag =~ /^\/table/i) { $in_table = 0; prt("$i: Exit table...\n"); } next; } if ($in_table) { if ($tag =~ /^tr/i) { $in_tr = 1; $in_td = 0; # start column counter $tr_lns++; } elsif ($tag =~ /\/tr/i) { $in_tr = 0; } elsif ($tag =~ /^td/i) { $in_td++; } elsif ($in_td) { if ($in_td == 1) { $code .= $cc; } elsif ($in_td == 2) { $col2 .= $cc; } elsif ($in_td == 3) { $col3 .= $cc; } elsif ($in_td == 4) { $col4 .= $cc; } } } } } else { prt("ERROR: Can not open file $fil!\n"); } return \@codes; } sub show_code_ref_simple($) { my ($rca) = @_; my $cnt = scalar @{$rca}; my ($j, $cd, $ln, $wrap, $c2, $c3, $c4); $wrap = 0; for ($j = 0; $j < $cnt; $j++) { $cd = ${$rca}[$j][0]; $ln = ${$rca}[$j][1]; $c2 = ${$rca}[$j][2]; $c3 = ${$rca}[$j][3]; $c4 = ${$rca}[$j][4]; prt( "$ln: $cd" ); $wrap++; if ($wrap == 6) { $wrap = 0; prt("\n"); } } if ($wrap) { prt("\n"); } } sub show_code_ref($) { my ($rca) = @_; my $cnt = scalar @{$rca}; my ($j, $cd, $ln, $wrap, $c2, $c3, $c4); $wrap = 0; for ($j = 0; $j < $cnt; $j++) { $cd = trim_all(${$rca}[$j][0]); $ln = ${$rca}[$j][1]; $c2 = trim_all(${$rca}[$j][2]); $c3 = trim_all(${$rca}[$j][3]); $c4 = trim_all(${$rca}[$j][4]); #prt( "$ln: $cd, $c2, $c3, $c4\n" ); $c2 =~ s/"/\\"/g; $c3 =~ s/"/\\"/g; $c4 =~ s/"/\\"/g; prt( " { \"$cd\", \"$c2\", \"$c3\", \"$c4\" },\n" ); } } my $code_ref = process_file($in_file); show_code_ref($code_ref); close_log($outfile,1); exit(0); # eof