test7.pl to HTML.

index -|- end

Generated: Tue Feb 2 17:54:58 2010 from test7.pl 2006/09/18 13.5 KB.

#!perl -w
use strict;
use warnings;
require 'logfile.pl' or die "Unable to load logfile.pl ...\n";
#######################################################
# Load of HTM tags and PHP reserved words and built-in
my $html_stx = 'C:/Program Files/EditPlus 2/html.stx';
my $php_stx = 'C:/Program Files/EditPlus 2/php.stx';
# if in HTML (default)
#if ($kw == 1) {
my @stxHTM = ();
#} elsif ($kw == 2) {
my @stxATT = ();
#} elsif ($kw == 3) {
my @stxSPL = ();
#else in PHP
#if ($kw == 1) {
my @stxRW = ();
#} elsif ($kw == 2) {
my @stxBI = (); # like @BuiltIns;
#} elsif ($kw == 3) {
my @stxVA = ();
my %HFuncsFnd = ();   # set of FOUND builtin functions
my %HResWdFnd = (); # reserved words used
my @AFileNames = (); # for each output file, with hash of functions
my @AFileHashs = (); # for each output file, with hash of functions
my %HOldbifs = ();   # old BIF, from previous index, if any
#########################################################
#########################################################
######## keep the OLD index
### this is needed IF files have been DELETED ...
### VARIABLES
my $oi_tblcnt = 0;
my $tbl_num = 1;
my $tbl_num3 = 3;
my @tbl_arr = ();
my @tbl_arr3 = ();
my @tbl_set = ();
my @tbl_set3 = ();
my $no_index = 0;
my $dbg20 = 0; # get_table_array()
my $dbg21 = 0;
my $dbg22 = 0;
my $dbg23 = 0;
my $dbg24 = 0;
my $oi_tacnt = 0;
my $oi_tacnt3 = 0;
my @oi_larr = ();
my @oi_larr2 = ();
my @oi_hrefs = ();
### FUNCTIONS
# search the @stxBI array for an entry
sub is_built_in($) {
   my ($t) = shift;
   foreach my $rw (@stxBI) {
      if ($t eq $rw) {
         return 1;
      }
   }
   return 0;
}
sub transfer_old_table3() {
   $oi_tacnt3 = scalar @tbl_set3;
   if ($oi_tacnt3 > 0) {
      prt( "Collected $oi_tacnt3 in \@tbl_set3 ... need to intialise built-in hash ...\n" );
      ## load into my %HOldbifs = ();
      my $elimcnt = 0;
      my $elimcnt2 = 0;
      for (my $i = 0; $i < $oi_tacnt3; $i++) {
         my $bif = $tbl_set3[$i][0];
         my $fss = $tbl_set3[$i][1];
         if (is_built_in($bif)) {
            # each new htm file written is kept in -
            # push(@AFileNames, $ind_file   );
            # and for each of these a new hash of built ins has been kept
            # push(@AFileHashs, \%th); # store the functions used ...
            # so these files can be (safely) eliminated, since they will be added later
            foreach my $nhf (@AFileNames) {
               if ($fss =~ /$nhf/i) {
                  $fss =~ s/$nhf//;
                  $elimcnt++;
               }
            }
            $fss = trim_line($fss);
            if (length($fss)) {
               if (exists $HOldbifs{$bif}) {
                  prt("\nWARNING: [$bif] appears DUPLICATED ...\n had=[".$HOldbifs{$bif}."\nadding [$fss]\n\n");
                  $HOldbifs{$bif} .= $fss;
               } else {
                  $HOldbifs{$bif} = $fss;
               }
            } else {
               $elimcnt2++;
            }
         } else {
            prt("WARNING: DISCARDING [$bif] - NOT BUILT-IN!\n");
         }
      }
      my $nwcnt = scalar keys %HOldbifs;
      if ($elimcnt > 0) {
         prt( "Elimated old files $elimcnt times, avoiding $elimcnt2 bifs being added...\n" );
      }
      prt( "Done $oi_tacnt3 in \@tbl_set3 ... now $nwcnt in \%HOldbifs ...\n" );
   }
}
sub mark_old_index($) {
   my ($f) = shift;
   my $tsc = scalar @tbl_set;
   for (my $i = 0; $i < $tsc; $i++ ) {
      if ($tbl_set[$i][0] eq $f) {
         $tbl_set[$i][7] = 1;
         last;
      }
   }
}
sub get_table_array {
   my $fnd = 0;
   my $lncnt = scalar @oi_larr2;
   for (my $i = 0; $i < $lncnt ; $i++) {
      my $ln = $oi_larr2[$i]; # extract a line
      chomp $ln; # remove LF (\n)
      $ln =~ s/\r$//; # and remove CR, if present
      if ($ln =~ /<table.*>/i) {
         prt( "FOUND TABLE: [$ln] ...\n" ) if ($dbg20);
         $oi_tblcnt++; # bump table counter
         if ($oi_tblcnt == $tbl_num) {
            prt( "Is my TABLE [$oi_tblcnt] ...\n" ) if ($dbg20);
            push(@tbl_arr,$ln);
            if ( !($ln =~ /<\/table>/i) ) {
               $i++; # move to next line
               for ( ; $i < $lncnt; $i++) {
                  $ln = $oi_larr2[$i]; # extract a line
                  chomp $ln; # remove LF (\n)
                  $ln =~ s/\r$//; # and remove CR, if present
                  if ( $ln =~ /<\/table>/i ) {
                     prt( "END TABLE $tbl_num: [$ln] ...\n" ) if ($dbg20);
                     push(@tbl_arr,$ln);
                     $fnd++;
                     last;
                  }
                  push(@tbl_arr,$ln);
               }
            }
         } elsif ($oi_tblcnt == $tbl_num3) {
            prt( "Is my TABLE [$oi_tblcnt] ...\n" ) if ($dbg20);
            push(@tbl_arr3,$ln);
            if ( !($ln =~ /<\/table>/i) ) {
               $i++; # move to next line
               for ( ; $i < $lncnt; $i++) {
                  $ln = $oi_larr2[$i]; # extract a line
                  chomp $ln; # remove LF (\n)
                  $ln =~ s/\r$//; # and remove CR, if present
                  if ( $ln =~ /<\/table>/i ) {
                     prt( "END TABLE $tbl_num3: [$ln] ...\n" ) if ($dbg20);
                     push(@tbl_arr3,$ln);
                     $fnd++;
                     last;
                  }
                  push(@tbl_arr3,$ln);
               }
            }
         }
      }
   }
   return $fnd;
}
sub process_tbl_arr() {
   my $cc = 0;
   for (my $i = 0; $i < $oi_tacnt ; $i++) {
      my $ln = $tbl_arr[$i]; # extract a line
      if ($ln =~ /<td.*>/i) {
         while ( !($ln =~ /<\/td>/i) ) {
            $i++;
            if ($i < $oi_tacnt) {
               $ln .= ' '.$tbl_arr[$i]; # extract a line
            } else {
               last;
            }
         }
         # got begin and end of <td>...</td> block
         if ($ln =~ /(<td.*?>)(.*)(<\/td>)/i) {
            my $tds = $1;
            my $inb = $2;
            my $tde = $3;
            # like Line [<td><a href="adjrt01.htm">adjrt01.htm</a> <br>2006/05/23 <br>10,213</td>] = 
            # [<td>][<a href="adjrt01.htm">adjrt01.htm</a> <br>2006/05/23 <br>10,213][</td>] ...
            prt( "Line [$ln] = \nBlocks [$tds][$inb][$tde] ...\n" ) if ($dbg21);
            ###if ($inb =~ /<a\s*href=\"(.*)\">(.*)<\/a>/) {
            ##if ($inb =~ /<a\s*href=\"(.*)\">(.*)<\/a>\s*<br>(\d{4}\S*)\s*<br>/i) {
            #if ($inb =~ /<a\s*href=\"(.*)\">(.*)<\/a>\s*<br>(\d{4}\S*)\s*<br>(\d{1}\S*)/i) {
            if ($inb =~ /<a\s*href=\"(.*)\">(.*)<\/a>\s*<br>(\d{4}\S*)\s*<br>(\d{1}\S*)\s*/i) {
               my $hrf = $1;
               my $fil = $2;
               my $dt = $3;
               my $sz = $4;
               my ($yr, $mt, $dy) = split(/\//,$dt);
               ###$sz =~ s/,//g;
               #               0     1     2    3    4    5    6    7
               push(@tbl_set, [$hrf, $fil, $dt, $sz, $yr, $mt, $dy, 0]);
               prt("href=[$hrf], file=[$fil], date=[$dt][$yr][$mt][$dy], size=[$sz]...\n") if ($dbg22);
            } else {
               prt("HREF not found - CHECK!\n") if ($dbg22);
            }
         }
      }
   }
}
sub process_tbl_arr3() {
   my $cc = 0;
   my $ff = 0; # since just two columns - flip flop
   my $bif = '';
   my $fil = '';
   for (my $i = 0; $i < $oi_tacnt3 ; $i++) {
      my $ln = $tbl_arr3[$i]; # extract a line
      if ($ln =~ /<td.*>/i) {
         $cc = length($ln);
         prt( "$i - Line [$ln] $cc...\n" ) if ($dbg24);
         while ( !($ln =~ /<\/td>/i) ) {
            $i++;
            if ($i < $oi_tacnt3) {
               $ln .= ' '.$tbl_arr3[$i]; # extract a line
            } else {
               last;
            }
         }
         if ($cc != length($ln)) {
            $cc = length($ln);
            prt( "$i - Line [$ln] $cc...\n" ) if ($dbg24);
         }
         # got begin and end of <td>...</td> block
         # 2006.09.11 '?' added to STOP greedy parsing
         if ($ln =~ /(<td.*?>)(.*)(<\/td>)/i) {
            my $tds = $1;
            my $inb = $2;
            my $tde = $3;
            prt( "$i - td[$tds] in[$inb] te[$tde]...\n" ) if ($dbg24);
            if ($ff > 0) {
               $fil = collectoi_hrefs($inb, 1); # remove HREF
               $fil = trim_line($fil);
               if (is_built_in($bif)) {
                  push(@tbl_set3, [$bif, $fil, 0]);
                  prt( " push(\@tbl_set3, [$bif, $fil, 0]); ...\n" ) if ($dbg23);
               } else {
                  if (($bif =~ /unused/i)||($bif =~ /missed/i)) {
                     prt( " Advice: Skipping [$bif] ...\n" ) if ($dbg23);
                  } else {
                     prt( " Advice: Skipping [$bif] - NOT BUILT IN FUNCTION!\n" );
                  }
               }
               $ff = 0;
            } else {
               $bif = $inb;
               $bif =~ s/\[//;
               $bif =~ s/\]//;
               $bif = trim_line($bif);
               if ($bif =~ /<.*?>(.*?)<\/.*?>/) {
                  $bif = trim_line($1);
               }
               $ff = 1;
            }
         } else {
            prt( "CHECK ME: Missed <td> ... </td> \n");
         }
      }
   }
}
sub get_old_index($) {
   my ($ind) = shift;
   $oi_tacnt = 0;
   $oi_tacnt3 = 0;
   my $ln = '';
   my $lncnt = 0;
   if (open IF, "<$ind") {
      @oi_larr = <IF>; # slurp it all in ...
      close(IF);
      $lncnt = scalar @oi_larr;
      prt( "Got $lncnt lines to process ... from [$ind]\n" );
      ###write2file( join('',@oi_larr), 'tempout.txt');
      $ln = tag2newline( join('',@oi_larr), 'td' );
      ###$ln = tag2newline( $ln, 'br' );
      @oi_larr2 = split(/\n/, $ln);
      ###write2file( join("\n",@oi_larr2), 'tempout3.txt');
      if (get_table_array()) {
         $oi_tacnt = scalar @tbl_arr;
         $oi_tacnt3 = scalar @tbl_arr3;
         prt( "Got $oi_tacnt and $oi_tacnt3 lines to process ... from [$ind]...\n" );
      } else {
         prt( "Failed to find table $tbl_num or $tbl_num3 ... in [$ind]...\n" );
      }
   } else {
      prt( "Warning: Failed to open $ind ...\n" );
      $no_index = 1;
   }
   if ($oi_tacnt > 0) {
      process_tbl_arr();
   } else {
      prt( "Warning: Failed to load table $tbl_num ...\n" );
   }
   if ($oi_tacnt3 > 0) {
      process_tbl_arr3();
   } else {
      prt( "Warning: Failed to load table $tbl_num3 ...\n" );
   }
   transfer_old_table3();
}
###################################################################
# COPIED OUT OF htmltools.pl, since I do NOT want to include it, just now ...
sub tag2newline { # ($txt2,'td');
   my ($txt, $tag) = @_;
   my $len = length($txt);
   my $ntxt = '';
   my $i;
   my $ch = '';
   my $ft = '';
   my $lcnt = 0;
   for ($i = 0; $i < $len; $i++ ) {
      $ch = substr($txt,$i,1);
      if ($lcnt && ($ch eq '<')) {
         $ft = $ch;
         $i++;
         for ( ; $i < $len; $i++ ) {
            $ch = substr($txt,$i,1);
            $ft .= $ch;
            if ($ch eq '>') {
               if ($ft =~ /^<$tag/i) {
                  $ft = "\n".$ft;
               }
               last;
            }
         }
         $ntxt .= $ft;
      } else {
         $ntxt .= $ch;
         if ($ch eq "\n") {
            $lcnt = 0;
         } else {
            $lcnt++;
         }
      }
   }
   return $ntxt;
}
sub collectoi_hrefs {
   my ($txt,$del) = @_;
   my $ntxt = '';
   my $len = length($txt);
   my $ch = '';
   my $hrf = '';
   my $i;
   for ($i = 0; $i < $len; $i++) {
      $ch = substr($txt,$i,1);
      if ($ch eq '<') {
         $hrf = $ch;
         $i++;
         for ( ; $i < $len; $i++) {
            $ch = substr($txt,$i,1);
            $hrf .= $ch;
            if ($ch eq '>') {
               last;
            }
         }
         if ($hrf =~ /^<a\s/i) {
            if ($del == 0) {
               $ntxt .= $hrf;
            }
            ### prt("Got [$hrf] ...\n");
            if ($hrf =~ /href=["'](\S+)["']./i) {
               $hrf = $1;
               push(@oi_hrefs,$hrf);
               ### prt("Got [$hrf] ...\n");
            }
         } elsif ($hrf =~ /^<\/a>$/i) {
            if ($del == 0) {
               $ntxt .= $hrf;
            }
         } else {
            $ntxt .= $hrf;
         }
      } else {
         $ntxt .= $ch;
      }
   }
   return $ntxt;
}
###################################################################
####################################
# Reducing a line to bare bones
# Used when loading
# the EditPlus 2 stx files.
####################################
sub trim_line($) {
   my ($l) = shift;
   chomp $l; # remove LF
   $l =~ s/\r$//; # and remove CR, if present
   $l =~ s/\t/ /g; # tabs to a space
   $l =~ s/\s\s/ /g while ($l =~ /\s\s/); # duplicate space to single
   $l = substr($l,1) while ($l =~ /^\s/); # each off leading space
   $l = substr($l,0,length($l)-1) while (($l =~ /\s$/)&&(length($l))); # and trailing space
   return $l;
}
#Loading HTML stx [C:/Program Files/EditPlus 2/html.stx] ...
#Got KEYWORD [HTML Tags] ...
#Got KEYWORD [HTML Attributes] ...
#Got KEYWORD [Special characters] ...
sub load_html_stx($) {
   my ($fil) = shift;
   my $kw = 0;
   my $nl = '';
   prt("Loading HTML stx [$fil] ...\n");
   open IF, "<$fil" or mydie( "ERROR: Unable to open [$fil] ...\n" );
   my @la = <IF>;
   close IF;
   foreach my $ln (@la) {
      chomp $ln;
      $ln =~ s/\r$//;
      if ($ln =~ /^#/) {
         if ($ln =~ /^#KEYWORD=(.*)/) {
            prt( "Got KEYWORD [$1] ...\n" );
            if ($1 eq 'HTML Tags') {
               $kw = 1;
               next;
            } elsif ($1 eq 'HTML Attributes') {
               $kw = 2;
               next;
            } elsif ($1 eq 'Special characters') {
               $kw = 3;
               next;
            }
         }
         $kw = 0;
         next;
      }
      if ($kw == 1) {
         $nl = trim_line($ln);
         push(@stxHTM, $nl) if (length($ln));
      } elsif ($kw == 2) {
         $nl = trim_line($ln);
         push(@stxATT, $nl) if (length($ln));
      } elsif ($kw == 3) {
         $nl = trim_line($ln);
         push(@stxSPL, $nl) if (length($ln));
      }
   }
}
#Loading PHP stx [C:/Program Files/EditPlus 2/php.stx] ...
#Got KEYWORD [Reserved words] ...
#Got KEYWORD [Built-in functions] ...
#Got KEYWORD [Variables] ...
sub load_php_stx($) {
   my ($fil) = shift;
   my $kw = 0;
   my $nl = '';
   prt("Loading PHP stx [$fil] ...\n");
   open IF, "<$fil" or mydie( "ERROR: Unable to open [$fil] ...\n" );
   my @la = <IF>;
   close IF;
   foreach my $ln (@la) {
      chomp $ln;
      $ln =~ s/\r$//;
      if ($ln =~ /^#/) {
         if ($ln =~ /^#KEYWORD=(.*)/) {
            prt( "Got KEYWORD [$1] ...\n" );
            if ($1 eq 'Reserved words') {
               $kw = 1;
               next;
            } elsif ($1 eq 'Built-in functions') {
               $kw = 2;
               next;
            } elsif ($1 eq 'Variables') {
               $kw = 3;
               next;
            }
         }
         $kw = 0;
         next;
      } elsif ($ln =~ /^;/) { # skip these 'comments'
         next;
      }
      if ($kw == 1) {
         $nl = trim_line($ln);
         push(@stxRW, $nl) if (length($ln));
      } elsif ($kw == 2) {
         $nl = trim_line($ln);
         push(@stxBI, $nl) if (length($ln));
      } elsif ($kw == 3) {
         $nl = trim_line($ln);
         push(@stxVA, $nl) if (length($ln));
      }
   }
}
sub do_stx_load() {
   load_html_stx( $html_stx );
   prt( "Loaded ".scalar @stxHTM." HTM, ".scalar @stxATT." ATT, and ".scalar @stxSPL." spls\n" );
   load_php_stx( $php_stx );
   prt( "Loaded ".scalar @stxRW." RW, ".scalar @stxBI." BI, and ".scalar @stxVA." vars\n" );
}
my $old_ind = 'temp2/index.htm';
do_stx_load();
get_old_index($old_ind);
# eof - test7.pl

index -|- top

checked by tidy  Valid HTML 4.01 Transitional