p2h.pl to HTML.

Generated: Tue Feb 2 17:54:47 2010 from p2h.pl 2007/06/02 42.3 KB.
#!/perl -w
# NAME: p2h.pl
# AIM: Convert a single perl file to HTML, using perl.css style
# USE: p2h <input> [<output>]
use strict;
use warnings;
use File::stat;
use File::Basename;
require 'logfile.pl' or die "Unable to load logfile.pl ...\n";
# log file stuff
my ($LF);
my $outfile = 'temp.'.$0.'.txt';
if ($0 =~ /\w{1}:\\.*/) {
   my @tmpsp = split(/\\/,$0);
   $outfile = 'temp.'.($tmpsp[-1]).'.txt';
}
open_log($outfile);
prt( "$0 ... Hello, World ...\n" );
my $in_file = $0;   # 'unlink.pl';
my $output = 'tempp2h.htm';
my $in_date = '';
my $in_size = '';
if (@ARGV) {
   parse_arguments(@ARGV);
}
prt( "Reading input [$in_file], with HTML output to [$output] ...\n" );
my $sb = stat($in_file);
if ( !defined $sb) {
   mydie( "ERROR: Unable to stat $in_file ... check name, location ...\n" );
}
$in_size = $sb->size;
$in_date = $sb->mtime; # keep DATE unchanged, so a SORT can be done
prt( "File $in_file has $in_size bytes to process ...\n" );
# user options
# this option REALLY adds weight to certain files
my $add_uvars = 1; # colour code user variables
my $add_pre = 1;   # use <pre ...>...</pre>
my $brown_qw = 1; # to process a qw(...);
my $add_chart = 0;   # add a chart of classes, and counts, etc diagnostic only
my $add_table = 0;   # add table, instead of above pre
# other USER variables
my $indexhtm = 'index.htm';
my $tab_space = '   '; # note tabs to 3 spaces - change if desired
my $emreg = '(geoff\\w+\\@{1})(hotmail\\.com)';
my $m_doctype = '<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"'."\n".
'"http://www.w3.org/TR/html4/loose.dtd">';
# extract from perl.css
#.rw { color: #0000cd } /* reserved words - blue */
#.bif { color: #ff0000 } /* built-in functions - red */
#.sca { color: #9400d3 } /* scalar variables */
#.arr { color: #008b8b } /* array variables */
#.has { color: #a52a2a } /* hash variables */
#.com { color: #008000 } /* comments after # - dark green */
#.qot { color: #009900 } /* quoted items */
# set the CLASS and COLOUR strings
my $a_class = 'bif';   # or 'a'; # built-in function (red)
my $b_class = 'com';   # or 'b'; # comments (#006666)
my $c_class = 'rw';      # or 'c'; # reserved words (blue)
my $d_class = 'd'; # inside qw(...)
my $e_class = 'sca';   # or 'e'; # $scalar (#9400d3)
my $f_class = 'f'; # in <<EOF...EOF block (#666666)
my $o_class = 'arr';   # or 'o'; # @array  (#008b8b - was #FFA500)
my $v_class = 'has';   # or 'v'; # %hash (#a52a2a - was #808000)
my $t_class = 'qot';   # or 't'; # quoted - single and double (#006600)
my $a_color = '#ff0000';   # was 'red';
my $b_color = '#008000';   # was '#006666';
my $c_color = '#0000cd';   # was 'blue';
my $d_color = '#a52a2a';   # was 'brown'; # does not exist!
my $e_color = '#9400d3';   # was '#9400d3' or '#00008B'
my $f_color = '#666666';
my $o_color = '#008b8b';   # was '#008b8b' or '#FFA500'
my $v_color = '#a52a2a';   # was '#808000'
my $t_color = '#009900';   # was '#006600';
# debug options
my $debug_on = 0;
my $dbgem = 0;   # debug mangling email
my $verb5 = 0;   # debug add_2_used sub
my $verb52 = 0;   # more debug add_2_used sub
my $verb6 = 0;
my $dbg4 = 0;   # show adding metas
my $dbg25 = 0;   # show full metas added
# these are really just DEBUG counters
my $a_cnt = 0;
my $b_cnt = 0;
my $c_cnt = 0;
my $d_cnt = 0;
my $e_cnt = 0;
my $f_cnt = 0;
my $o_cnt = 0;
my $v_cnt = 0;
my $q_cnt = 0;
my $efix_cnt = 0;   # keep count of email names changed,
my $out_total  = 0;
my $ind_file = '';   # file name for INDEX list
my $doc_total = 0;
my $last_builtin = '';
my $last_resword = '';
# reserved words, and build-ins - use this local list
my @ResWords = qw/ continue do else elsif for foreach goto if last local lock map my next package redo 
require return sub unless until use while STDIN STDOUT STDERR ARGV ARGVOUT ENV INC SIG TRUE FALSE __FILE__ 
__LINE__ __PACKAGE__ __END__ __DATA__ lt gt le ge eq ne cmp x not and or xor q qq qx qw $ @ % /;
my @BuiltIns = qw(abs accept alarm atan2 bind binmode bless caller chdir chmod chomp chop chown chr 
chroot close closedir connect cos crypt dbmclose dbmopen defined delete die dump each eof eval exec exists 
exit exp fcntl fileno flock fork format formline getc getlogin getpeername getpgrp getppid getpriority 
getpwnam getgrnam gethostbyname getnetbyname getprotobyname getpwuid getgrgid getservbyname gethostbyaddr 
getnetbyaddr getprotobynumber getservbyport getpwent getgrent gethostent getnetent getprotoent 
getservent setpwent setgrent sethostent setnetent setprotoent setservent endpwent endgrent endhostent 
endnetent endprotoent endservent getsockname getsockopt glob gmtime grep hex import index int ioctl 
join keys kill lc lcfirst length link listen localtime log lstat mkdir msgctl msgget msgsnd msgrcv no oct 
open opendir ord pack pipe pop pos print printf prototype push quotemeta rand read readdir readlink recv 
ref rename reset reverse rewinddir rindex rmdir scalar seek seekdir select semctl semget semop send setpgrp 
setpriority setsockopt shift shmctl shmget shmread shmwrite shutdown sin sleep socket socketpair sort 
splice split sprintf sqrt srand stat study substr symlink syscall sysopen sysread sysseek system syswrite 
tell telldir tie tied time times truncate uc ucfirst umask undef unlink unpack untie unshift utime values 
vec wait waitpid wantarray warn write );
prt( "Got ".scalar @ResWords." Reserved Words, and ".scalar @BuiltIns." Built-in functions ...\n" );
my @efix_files = ();
my %HUsedpack = ();
my @lines = ();      # html output lines ...
my %HFuncsFnd = ();
my %HResWdFnd = (); # reserved words used
my @AFileNames = (); # for each output file, with hash of functions
my @AFileHashs = (); # for each output file, with hash of functions
# the main program
# =================================================================
$ind_file = basename( $in_file, get_suffix($in_file) ) . 'htm';
prt( "Sent index name to [$ind_file] ...\n" );
process_file( $in_file, $in_file );
prt( "Got ".scalar @lines." lines of output ...\n" );
write_out_file($output);
prt( "END: Closing LOG $outfile, and passing $output to the system ...\n" );
close_log($outfile,0);
system($output);   # see what we created
exit(0);
# =================================================================
#######################################################################
### only subs below
sub get_suffix {
   my ($f) = shift;
   my @arr = split(/\./,$f);
   return $arr[-1];
}
#########################################################
# A small set of 9 services which add in the CSS class,
# using <span class="???">.thetext.</span>
#
# Each one does a different class, and the class
# is extracted to variables set above. This means
# they can easily be adjusted to new, different
# values ...
#
# They also accumulate statistic information on how
# many time each is used ...
#########################################################
# built-in functions
sub add_red {
   my ($t) = shift;
   $a_cnt++;
   return ('<span class="'.$a_class.'">'.$t.'</span>');
}
# perl comments
sub add_class_b {
   my ($t) = shift;
   $b_cnt++;
   return ('<span class="'.$b_class.'">'.$t.'</span>');
}
# perl reserved words
sub add_blue {
   my ($t) = shift;
   $c_cnt++;
   return ('<span class="'.$c_class.'">'.$t.'</span>');
}
# perl qw set
sub add_class_d {
   my ($t) = shift;
   $d_cnt++;
   return ('<span class="'.$d_class.'">'.$t.'</span>');
}
sub add_class_e {
   my ($t) = shift;
   $e_cnt++;
   return ('<span class="'.$e_class.'">'.$t.'</span>');
}
sub add_class_f {
   my ($t) = shift;
   $f_cnt++;
   return ('<span class="'.$f_class.'">'.$t.'</span>');
}
sub add_class_o {
   my ($t) = shift;
   $o_cnt++;
   return ('<span class="'.$o_class.'">'.$t.'</span>');
}
sub add_class_v {
   my ($t) = shift;
   $v_cnt++;
   return ('<span class="'.$v_class.'">'.$t.'</span>');
}
sub add_quote {
   my ($t) = shift;
   $q_cnt++;
   return ('<span class="'.$t_class.'">'.$t.'</span>');
}
#########################################################
# search the @ResWord array for an entry
sub in_res_words {
   my ($t) = shift;
   foreach my $rw (@ResWords) {
      if ($t eq $rw) {
         $last_resword = $rw;
       if (exists $HResWdFnd{$rw}) {
         $HResWdFnd{$rw}++; # another count
       } else {
         $HResWdFnd{$rw} = 1; # start count
       }
         return 1;
      }
   }
   return 0;
}
# search the @BuiltIns array for an entry
sub is_built_in {
   my ($t) = shift;
   foreach my $rw (@BuiltIns) {
      if ($t eq $rw) {
         return 1;
      }
   }
   return 0;
}
sub in_built_in {
   my ($t) = shift;
   if (is_built_in($t)) {
      $last_builtin = $t;
     if (exists $HFuncsFnd{$t}) {
      ### prt ( "Bumped Funcs [$t] ...\n" );
      $HFuncsFnd{$t}++; # another count
    } else {
      ### prt ( "Created Funcs [$t] ...\n" );
      $HFuncsFnd{$t} = 1; # start count
    }
     return 1;
   }
   return 0;
}
sub is2lt {
   my $t = shift;
   $t =~ s/&lt;/</g;
   if ( (length($t) >= 2 ) && ( $t =~ /<<$/ ) ) {
      return 1;
   }
   return 0;
}
sub sans_quotes {
   my $t = shift;
   $t =~ s/"//g;
   $t =~ s/'//g;
   return $t;
}
######################################################
# Converting SPACES to '&nbsp;'
# Of course this could be done just using perl's
# powerful search and replace, but this handles
# any number of spaces, only converting the number
# minus 1 to &nbsp; ... not sure how to have
# this level of control with regex replacement
######################################################
sub conv_spaces {
   my $t = shift;
   my ($c, $i, $nt, $ln, $sc, $sp);
   $nt = ''; # accumulate new line here
   $ln = length($t);
   for ($i = 0; $i < $ln; $i++) {
      $c = substr($t,$i,1);
      if ($c eq ' ') {
         $i++; # bump to next 
         $sc = 0;
         $sp = '';
         for ( ; $i < $ln; $i++) {
            $c = substr($t,$i,1);
            if ($c ne ' ') {
               last; # exit
            }
            $sc++;
            $sp .= $c;
         }
         if ($sc) {
            $sp =~ s/ /&nbsp;/g;
            $nt .= $sp;
         }
         $i--; # back up one
         $c = ' '; # add back the 1 space
      }
      $nt .= $c;
   }
   prt( "conv_space: from [$t] to [$nt] ...\n" ) if $debug_on;
   return $nt;
}
###########################################################################
# VERY IMPORTANT SERVICE
# This converts the 'text' into HTML text, but only does a partial job!
# 1. Convert '&' to '&amp;' to avoid interpreting as replacement
# 2. Convert '<' to '&lt;' to avoid interpreting as HTML
# 3. Convert '"' to '&quot;'
# 4. Convert '\t' to SPACES
# 5. Finally, if there are double or more SPACES, convert to '&nbsp;'
###########################################################################
sub html_line {
   my $t = shift;
   my $ot = $t;
   $t =~ s/&/&amp;/g; # all '&' become '&amp;'
   $t =~ s/</&lt;/g; # make sure all '<' is/are swapped out
   $t =~ s/\"/&quot;/g; # and all quotes become &quot;
   $t =~ s/\t/$tab_space/g; # tabs to spaces
   if ($t =~ /\s\s/) { # if any two consecutive white space
      return conv_spaces($t);
   }
   prt( "html_line: from [$ot] to [$t] ...\n" ) if $debug_on;
   return $t;
}
#############################################################
sub in_efix_list {
   my ($fn) = shift;
   my $cc = scalar @efix_files;
   ###foreach my $n (@efix_files) {
   for (my $i = 0; $i < $cc; $i++) {
      my $n = $efix_files[$i][0];
      if ($n eq $fn) {
         return 1;
      }
   }
   return 0;
}
sub mangled_email {
   my ($em) = shift;
   $em =~ s/geoffmclane/geoffair/i;
   $em =~ s/\./ _dot_ /;
   $em =~ s/\@/ _at_ /;
   return $em;
}
################################################
# sadly, this is to mangle my email, so
# it does not 'appear' to web scrapers
################################################
sub fix_email {
   my ($eml, $bfn) = @_;   # get line, and base name
   my $nem = $eml;
   if ($eml =~ /$emreg/i) {
      my $nm = $1.$2;
      my $sm = mangled_email($nm);
      my $ind = index($eml, $nm);
      if (!($ind == 1)) {
         $nem = substr($eml,0,$ind);
         $nem .= $sm;
         $nem .= substr($eml, $ind+length($nm));
         $efix_cnt++;
         push(@efix_files, [$bfn, $nm, $sm]) if (!in_efix_list($bfn));
      }
      print "got [$nm] ... now [$sm] ... ind $ind ...\n" if $dbgem;
   } else {
      print "failed\n" if $dbgem;
   }
   return $nem;
}
sub add_2_used($) {
   my ($lne) = $_[0];
   my $ll = length($lne);
   my $i2 = index($lne, ';');
   my $dn = 0;
   my $pkg = '';
   my $v = '';
   if (($i2 > 4)&&(length($ind_file))) {
      $lne = substr($lne,0,$i2);
      prt( "processing line [$lne]\n" ) if ($verb52);
      if ($lne =~ /^use\s+(.+)/) {
         $pkg = trimall($1);
         prt( "got package [$pkg]\n" ) if ($verb52);
         if ( defined $HUsedpack{$pkg} ) {
            $v = $HUsedpack{$pkg};
            if ( $v =~ /$ind_file/ ) {
               $dn = 3;
            } else {
               $v .= ' ' . $ind_file;
               $HUsedpack{$pkg} = $v;
               $dn = 2;
            }
         } else {
            $HUsedpack{$pkg} = $ind_file;
            $dn = 1;
         }
      }
   }
   if ($dn) {
      if ($dn == 1) {
         prt( "New USE [$pkg] in [$ind_file] ... [$_[0]]\n" ) if ($verb5);
      } elsif ($dn == 3) {
         prt( "Repeat USE [$pkg] in [$ind_file] ...[$_[0]]\n" ) if ($verb5);
      } else {
         prt( "Added USE [$pkg] in [$ind_file] ...[$_[0]]\n" ) if ($verb5);
      }
   } else {
      prt( "WARNING: failed USE with $_[0] ...[$ind_file]\n" );
   }
}
##########################################################
# The following two functions 'convert' scalar variables
# to colour codes spans, in the print <<EOF = get_uform,
# and withing double quoted text "this $cnt ..." ...
# THESE ADD LOTS OF WEIGHT TO THE FILE
##########################################################
sub get_uform {
   my $ln = shift;
   my $tok = ''; # colour up the USER scalar variables within
   my $len = length($ln);
   my $nline = '';
   for (my $i = 0; $i < $len; $i++) {
      my $ch = substr($ln, $i, 1);
      if (($ch eq '$') && (($i + 1) < $len) && (substr($ln,$i+1,1) =~ /\w/) ) {
         $nline .= add_class_f(html_line($tok)) if (length($tok));
         $tok = $ch;
         $i++;
         for ( ; $i < $len; $i++) {
            $ch = substr($ln, $i, 1);
            if ( ! ($ch =~ /\w/) ) {
               # end of token
               $nline .= add_class_e(html_line($tok));
               $tok = '';
               last;
            }
            $tok .= $ch;
         }
      }
      $tok .= $ch;
    }
   $nline .= add_class_f(html_line($tok)) if (length($tok));
   return $nline;
}
sub add_2_lines {
   my $t = shift;
   if ( ! $add_pre ) {
      $t .= "<br>";
   }
    prt( "nline[$t]\n" ) if $debug_on;
   $t .= "\n";
   push(@lines, $t);
}
sub get_balance {
   my ($t) = shift;
   if ($t =~ /#/) {
      my $off = index($t, '#');
      if ($off != -1) {
         $t = substr($t,0,$off);
      }
   }
   return $t;
}
sub get_comment {
   my ($t) = shift;
   my $off = index($t, '#');
   if ($off != -1) {
      $t = substr($t,$off);
   } else {
      $t = '';
   }
   return $t;
}
sub add_quote2 {
   my ($ln) = shift;
   my $len = length($ln);
   my $ch = '';
   my $ch2 = '';
   my $pc = '';
   my $pc2 = '';
   my $nl = ''; # put the NEW line in here
   my $tok = ''; # colour up the USER scalar variables within DOUBLE quotes
   for (my $i = 0; $i < $len; $i++ ) {
      $ch = substr($ln, $i, 1);
      $ch2 = (($i + 1) < $len) ? substr($ln,$i+1,1) : '';
      # if a scalar variable, and not 'escaped', or the escape escaped and next is 'an_'
      if (($ch eq '$') && (($pc ne '\\')||(($pc eq '\\') && ($pc2 eq '\\'))) &&
         (($i + 1) < $len) && ($ch2 =~ /\w/) ) {
         $nl .= add_quote(html_line($tok)) if (length($tok));
         $tok = $ch;
         $i++;
         for ( ; $i < $len; $i++) {
            $ch = substr($ln, $i, 1);
            if ( ! ($ch =~ /\w/) ) {
               # end of token
               $nl .= add_class_e(html_line($tok));
               $tok = '';
               last; # exit
            }
            $tok .= $ch;
         }
      }
      $tok .= $ch;
      $pc2 = $pc;
      $pc = $ch;
   }
   $nl .= add_quote(html_line($tok)) if (length($tok));
   return $nl;   
}
#################################################################
# The MAIN file processing
# The input file is openned, and all the lines read
# into an array @lns, then each line is processed,
# cheracter by character ...
# It does it mainly via a state, $st
# $st == 0 - processing white space
# $st == 1 - processing alphanumeric, plus _
# $st == 2 - processing nither space nor alphanumeric, here
#            referred to as 'an_' ...
# $st == 3 - Locked in one of << thingies, until the end
#            token located, or until end of file ...
# $st == 4 - Processing a qw(...) function, of qw/.../ if 
# enabled.
#
# Generally the 'tokens' are stored in $tok, as the line
# is processed, added to the $nline at various change
# points, and finally the $nline is stored in the array
# @lines, for later output ...
#
# Setting $debug_on will give a BIG TRACE of where the
# code is handling something ...
#
# Setting $add_uvars to on will add colour code user variables
# but this adds a lot of extra weight to the file.
#
# At this time, the global variable $ind_file contains the
# HTML output file name ...
#################################################################
sub process_file {
   my ($inf, $bn) = @_;   # input and base name
   my ($IF);
   my ($ch1,$ch2,$ch3,$ch4);
   open $IF, "<$inf" or die "ERROR: Unable to open $inf ... $! ... aborting ...\n";
   my @lns = <$IF>; # slurp into line array
   close($IF);
   prt( "\nGot ".scalar @lns." to process from $inf ...\n" ) if ($verb6 || $debug_on);
   my $st = 0; # current status
   my $nst = 0;
   my $pc = '';
   my $pc2 = '';
   my $ch = '';
   my $tok = '';
   my $ltok = ''; # last token
   my $ltok1 = '';
   my $ltok2 = '';
   my $qtok = ''; # print <<"EOF" or ANY <<'until_end', token
   my $end_qw = '/';
   my $i = 0;
   foreach my $ln (@lns) {
      $doc_total += length($ln);
      chomp $ln;
      $ln =~ s/\r$//; # and remove CR, if present
     $ln = fix_email($ln, $bn); # keep list where email is present
      my $len = length($ln);
      my $nline = '';
      prt( "\nline=[$ln] ...\n" ) if $debug_on;
      $pc = '';
     $pc2 = '';
      $tok = '';
      $ltok = ''; # last token
      $ltok1 = ''; # token stack
      $ltok2 = '';
      $i = 0;
      $nst = 0; # if fall through, next status is IN space
     add_2_used($ln) if ($ln =~ /^use\s+/); # if line BEGINS with 'use '
      if ($st == 3) {
         # locked in a 'print' string to end token
       if ($add_uvars) {
          $nline = get_uform( $ln );
       } else {
          $nline = add_class_f(html_line($ln));
       }
       add_2_lines($nline);
         if ($ln =~ /^$qtok/) {
            $st = 0;
         }
         next; # next LINE of file
      } elsif ($st == 4) {
         # processing a 'qw' block - only if $brown_qw is ON
         $tok = '';
         for ( ; $i < $len; $i++) {
            $ch = substr($ln, $i, 1);
            if ($ch eq $end_qw) { # either '/' or ')' depending on start
               $nline .= add_class_d(html_line($tok)) if (length($tok));
               $tok = '';
               last;
            }
            $tok .= $ch;
         }
         if ($i < $len) {
            $nst = 2; # fall through to continue line
         } else {
            $nline = add_class_d(html_line($ln));
         add_2_lines($nline);
            next;
         }
      }
      $st = $nst;
      for ( ; $i < $len; $i++) {
         $ch = substr($ln, $i, 1);
         # make a BIG exception of '&lt;' ...
         if (($ch eq '&') && (($i + 3) < $len)) {
            $ch1 = substr($ln, $i, 4);
         if ($ch1 eq '&lt;') {
            $tok .= $ch1;
            $i += 3;
            $st = 2;
            $pc = ';';
             next;
         }
       }
         if ($st == 0) {
            # IN white space territory
            if ($ch =~ /\S/) {
               prt( "IN ws, changed to NOT with [$ch] ".
                  "\$tok=[$tok] \$ltok[$ltok] \$ltok1[$ltok1] \$ltok2[$ltok2] html\n" ) if $debug_on; 
               $nline .= html_line($tok); # add any white space to new line
               $ltok2 = $ltok1;
               $ltok1 = $ltok;
               $ltok = $tok;
               $tok = '';
            # if NOT escape, or escaped escape character
               if ( ($pc ne '\\') || (($pc eq '\\') && ($pc2 eq '\\')) ){
                  if ($ch eq '#') {
                     # start of a COMMENT
                     prt( "start of a COMMENT [$ch] ".
                        "tok=[$tok] ltok[$ltok] ltok1[$ltok1] ltok2[$ltok2] html\n" ) if $debug_on; 
                     $tok = substr($ln, $i); 
                     $nline .= add_class_b(html_line($tok));
                     $tok = '';
                     $st = 0;
                     last;
                  } elsif (($ch eq '"')||($ch eq "'")) {
                     my $bch = $ch;
                     prt( "start of a QUOTE [$ch] ".
                        "tok=[$tok] ltok[$ltok] ltok1[$ltok1] ltok2[$ltok2]\n" ) if $debug_on; 
                     $tok = $ch;
                     $i++;
                $pc2 = '';
                     for ( ; $i < $len; $i++ ) {
                        $ch = substr($ln, $i, 1);
                  # if the PREVIOUS is NOT an ESCAPE, OR the previous and previous ARE
                  # that is a ESCAPED ESCAPE character, which is NOT an escape at all ;=))
                        if ( ($pc ne '\\') || (($pc eq '\\')&&($pc2 eq '\\')) ) {
                           if ($ch eq $bch) {
                              $tok .= $ch;
                              prt( "End of a QUOTE [$ch] ".
                                 "tok=[$tok] ltok[$ltok] ltok1[$ltok1] ltok2[$ltok2] html\n" ) if $debug_on; 
                       if ($add_uvars && ($bch eq '"')) {
                          $nline .= add_quote2($tok);
                       } else {
                          $nline .= add_quote(html_line($tok));
                       }
                              $tok = '';
                       $pc2 = $pc;
                              $pc = $ch;
                              last;
                           }
                        }
                        $tok .= $ch;
                  $pc2 = $pc;
                        $pc = $ch;
                     }
                     $pc = $ch;
                     next;
                  }
               }
               $tok = $ch;
               if ($ch =~ /\w/) {
               prt( "Start tok with $ch ... sw st [$st] to 1\n" ) if $debug_on;
                  $st = 1;
               } else {
               prt( "Start tok with $ch ... sw st [$st] to 2\n" ) if $debug_on;
                  $st = 2;
               }
            $pc2 = $pc;
               $pc = $ch;
               next;
            } else {
               # staying in white space
               $tok .= $ch;
            $pc2 = $pc;
               $pc = $ch;
               next;
            }
         } elsif ($st == 1) {
            # dealing with alphanumberic + _
            if ($ch =~ /\w/) {
               $tok .= $ch;
            $pc2 = $pc;
               $pc = $ch;
               next; # continue alphanumeric + _
            }
            prt( "IN an_, no longer an_ with [$ch] ... tok=[$tok]\n" ) if $debug_on;
            if (length($tok)) {
               if (in_res_words($tok) ) {
                  $nline .= add_blue(html_line($tok));
                  if ($brown_qw && (($ch eq '(')||($ch eq '/')) && ($last_resword eq 'qw')) {
                # entering a qw list
                $end_qw = '/';
                $end_qw = ')' if ($ch eq '(');
                     prt( "Excepting a qw list ... Begin $ch, End $end_qw ...\n" ) if $debug_on;
                     $i++;
                     $nline .= $ch;
                     $tok = ''; # no token
                     for ( ; $i < $len ; $i++) {
                        $ch = substr($ln,$i,1);
                        if ($ch eq $end_qw) { # end on '/' or ')' depending on start
                           $nline .= add_class_d(html_line($tok)) if (length($tok));
                           $nline .= $ch;
                           $tok = '';
                           last;
                        }
                        $tok .= $ch;
                     }
                     if ($i < $len) {
                        next; # get next character
                     } # else, we have ended the line, still in a 'qw' ...
                     $nline .= add_class_d(html_line($tok)) if (length($tok));
                     $tok = '';
                     $st = 4;
                     last; # end of THIS line
                  }
               } elsif (in_built_in($tok)) {
                  $nline .= add_red(html_line($tok));
               } else {
               if ($add_uvars) { # colour code user variables
                  $ch1 = substr($tok,0,1);
                 if ($ch1 eq '$') {
                    $nline .= add_class_e(html_line($tok));
                 } elsif ($ch1 eq '@') {
                    $nline .= add_class_o(html_line($tok));
                 } elsif ($ch1 eq '%') {
                    $nline .= add_class_v(html_line($tok));
                 } else {
                        $nline .= html_line($tok);
                 }
               } else {
                  $nline .= html_line($tok);
               }
               }
               $ltok2 = $ltok1;
               $ltok1 = $ltok;
               $ltok = $tok;
               $tok = '';
            }
            $tok = $ch;
            if ($ch =~ /\s/) {
               $st = 0; # goto SPACE mode
            } elsif ($ch =~ /\w/) {
               $st = 1; # goto AN_ mode
            } else {
               $st = 2; # goto NOT SPACE or AN_ mode
            }
         $pc2 = $pc;
            $pc = $ch;
            next;
         } elsif ($st == 2) {
            # not IN space or IN an_
            if ($ch =~ /\s/) {
               prt( "IN 2 - change back to space with [$ch] ... tok=[$tok]\n" ) if $debug_on;
               if ( is2lt($tok) ) {
                  $ch1 = get_balance(substr($ln,$i)); # get balance of line
                  $ch1 =~ s/\s+$//; # remove any trailing white space
                  ##if ( ($ch1 =~ /;$/) && ($ltok =~ /=/) ) { mod20060829 add ltok2 also
                  if ( ($ch1 =~ /;$/) && (($ltok =~ /=/)||($ltok1 =~ /=/)) ) {
                     $ch1 =~ s/^\s+//; # remove any leading spaces
                     $ch1 =~ s/;$//; # remove colon
                     $ch1 =~ s/\s+$//; # now again remove any trailing white space
                     if ( !($ch1 =~ /\s/) ) {
                        $qtok = sans_quotes($ch1); # STORE THE END MARKER !!!
                        prt( "Got <<EOH type tok[$tok] $ch1 ltok[$ltok] ltok1[$ltok1] ltok2[$ltok2]... qtok[$qtok]\n" ) if $debug_on;
                        $nline .= html_line($tok);
                        $tok = '';
                  $ch1 = substr($ln,$i);
                  if ($ch1 =~ /#/) {
                     $nline .= html_line(get_balance($ch1)); # add this part
                     $ch1 = get_comment($ch1);
                     if (length($ch1)) {
                        $nline .= add_class_b(html_line($ch1));
                     }
                  } else {
                     $nline .= html_line($ch1); # get balance of line
                  }
                        $st = 3;
                        last; # done this line
                     } else {
                        prt( "NOT 1 <<EOH type tok[$tok] $ch1 ltok[$ltok] ltok1[$ltok1] ltok2[$ltok2]... qtok[$qtok]\n" ) if $debug_on;
                }
                  } else {
                     prt( "NOT 2 <<EOH type tok[$tok] tbol=[$ch1] ltok[$ltok] ltok1[$ltok1] ltok2[$ltok2]... abol=[".
                   substr($ln,$i)."]\n" ) if $debug_on;
              }
               }
               $nline .= html_line($tok);
               $ltok2 = $ltok1;
               $ltok1 = $ltok;
               $ltok = $tok;
               $tok = $ch;
               $st = 0;
            $pc2 = $pc;
               $pc = $ch;
               next;
            } elsif ($ch =~ /\w/) { # alphanumeric, including _
               prt( "IN 2 - change back to an_ with [$ch] ... tok=[$tok]\n" ) if $debug_on;
               if ( is2lt($tok) ) {
                  $ch1 = get_balance(substr($ln,$i)); # get balance of line
                  $ch1 =~ s/\s+$//; # remove any trailing white space
                  ##if ( ($ch1 =~ /;$/) && ($ltok =~ /=/) ) { mod20060829 add ltok2 also
                  if ( ($ch1 =~ /;$/) && (($ltok =~ /=/)||($ltok1 =~ /=/)) ) {
                     $ch1 =~ s/^\s+//; # remove any leading spaces
                     $ch1 =~ s/;$//; # remove colon
                     $ch1 =~ s/\s+$//; # now again remove any trailing white space
                     if ( !($ch1 =~ /\s/) ) {
                        $qtok = sans_quotes($ch1); # STORE THE END MARKER !!!
                        prt( "Got <<EOH type tok[$tok] $ch1 ltok[$ltok] ltok1[$ltok1] ltok2[$ltok2]... qtok[$qtok]\n" ) if $debug_on;
                        $nline .= html_line($tok);
                        $tok = '';
                  $ch1 = substr($ln,$i);
                  if ($ch1 =~ /#/) {
                     $nline .= html_line(get_balance($ch1)); # add this part
                     $ch1 = get_comment($ch1);
                     if (length($ch1)) {
                        $nline .= add_class_b(html_line($ch1));
                     }
                  } else {
                     $nline .= html_line($ch1); # get balance of line
                  }
                        $st = 3;
                        last; # done this line
                     } else {
                        prt( "NOT 1 <<EOH type tok[$tok] $ch1 ltok[$ltok] ltok1[$ltok1] ltok2[$ltok2]... qtok[$qtok]\n" ) if $debug_on;
                }
                  } else {
                     prt( "NOT 2 <<EOH type tok[$tok] tbol=[$ch1] ltok[$ltok] ltok1[$ltok1] ltok2[$ltok2]... abol=[".
                   substr($ln,$i)."]\n" ) if $debug_on;
              }
               }
               if (($tok eq '$')||($tok eq '@')||($tok eq '%')) {
                  $tok .= $ch;
               } else {
               prt( "Not \$, \@, or \% - html\n" ) if $debug_on;
                  $nline .= html_line($tok);
                  $ltok2 = $ltok1;
                  $ltok1 = $ltok;
                  $ltok = $tok;
                  $tok = $ch;
               }
               $st = 1;
            $pc2 = $pc;
               $pc = $ch;
               next;
            }
         ## NOT space or alphanumeric, including _ ...
            ###if (($pc ne '\\') && (($ch eq '#') || ($ch eq '"') || ($ch eq "'"))) {
            if ((($pc ne '\\')||(($pc eq '\\')&&($pc2 eq '\\'))) &&
            ((($ch eq '#')&&($pc ne '$')) || ($ch eq '"') || ($ch eq "'"))) {
            prt( "add in current tok[$tok] ltok[$ltok] ltok1[$ltok1] ltok2[$ltok2] ...\n" ) if $debug_on;
               if ( is2lt($tok) ) {
                  $ch1 = get_balance(substr($ln,$i)); # get balance of line
                  $ch1 =~ s/\s+$//; # remove any trailing white space
                  ##if ( ($ch1 =~ /;$/) && ($ltok =~ /=/) ) { mod20060829 add ltok2 also
                  if ( ($ch1 =~ /;$/) && (($ltok =~ /=/)||($ltok1 =~ /=/)) ) {
                     $ch1 =~ s/^\s+//; # remove any leading spaces
                     $ch1 =~ s/;$//; # remove colon
                     $ch1 =~ s/\s+$//; # now again remove any trailing white space
                     if ( !($ch1 =~ /\s/) ) {
                        $qtok = sans_quotes($ch1); # STORE THE END MARKER !!!
                        prt( "Got <<EOH type tok[$tok] $ch1 ltok[$ltok] ltok1[$ltok1] ltok2[$ltok2]... qtok[$qtok]\n" ) if $debug_on;
                        $nline .= html_line($tok);
                        $tok = '';
                  $ch1 = substr($ln,$i);
                  if ($ch1 =~ /#/) {
                     $nline .= html_line(get_balance($ch1)); # add this part
                     $ch1 = get_comment($ch1);
                     if (length($ch1)) {
                        $nline .= add_class_b(html_line($ch1));
                     }
                  } else {
                     $nline .= html_line($ch1); # get balance of line
                  }
                        $st = 3;
                        last; # done this line
                     } else {
                        prt( "NOT 1 <<EOH type tok[$tok] $ch1 ltok[$ltok] ltok1[$ltok1] ltok2[$ltok2]... qtok[$qtok]\n" ) if $debug_on;
                }
                  } else {
                     prt( "NOT 2 <<EOH type tok[$tok] tbol=[$ch1] ltok[$ltok] ltok1[$ltok1] ltok2[$ltok2]... abol=[".
                   substr($ln,$i)."]\n" ) if $debug_on;
              }
               }
               $nline .= html_line($tok); # add in current token
               $ltok2 = $ltok1;
               $ltok1 = $ltok;
               $ltok = $tok;
               $tok = '';
               if ($ch eq '#') {
                  prt("# start of a COMMENT ...\n") if $debug_on;
                  $tok = substr($ln, $i); 
                  $nline .= add_class_b(html_line($tok));
                  $tok = '';
                  $st = 0;
                  last;
               } elsif (($ch eq '"')||($ch eq "'")) {
                  my $bch = $ch;
                  $tok = $ch;
                  $i++;
              $pc2 = '';
                  for ( ; $i < $len; $i++ ) {
                     $ch = substr($ln, $i, 1);
                     if ( ($pc ne '\\') || ( ($pc eq '\\') && ($pc2 eq '\\') ) ) {
                        if ($ch eq $bch) {
                           $tok .= $ch;
                           $qtok = sans_quotes($tok);
                     if ($add_uvars && ($bch eq '"')) {
                       $nline .= add_quote2($tok);
                     } else {
                       $nline .= add_quote(html_line($tok));
                     }
                           $tok = '';
                     $pc2 = $pc;
                           $pc = $ch;
                           last;
                        }
                     }
                     $tok .= $ch;
                $pc2 = $pc;
                     $pc = $ch;
                  }
                  # check for 'print ... <<"EOF";'
                  if (($i < $len) && 
                 ($last_builtin eq 'print') &&
                 (length($ltok) >= 2) && 
                 is2lt($ltok) && 
                 length($qtok) ) {
                $qtok = sans_quotes($qtok);   # strip any DOUBLE/SINGLE quotes
                     prt( "Got print [$last_builtin] ltok[$ltok] qtok[$qtok] ...\n" ) if $debug_on;
                     $i++;
                     $nline .= html_line(substr($ln,$i));
                     $tok = '';
                     $st = 3;
                     last; # done this line
                  }
              $pc2 = $pc;
                  $pc = $ch;
                  next;
               }
            }
         if ($add_uvars && (($ch eq '$')||($ch eq '@')||($ch eq '%'))) {
            prt( "In add_uvars and got \$\@\% [$ch] add tok 2 line ... reset tok\n" ) if $debug_on;
               $nline .= html_line($tok); # add in current token
               $ltok2 = $ltok1;
               $ltok1 = $ltok;
               $ltok = $tok;
               $tok = '';
         } else {
            prt( "NOT space or alphanumeric, including _, or special, or \$\@\% [$ch] add2tok ...\n" ) if $debug_on;
         }
            $tok .= $ch;
         }
       $pc2 = $pc;
         $pc = $ch;
      }
      $nline .= html_line($tok);
     add_2_lines($nline);   # push(@lines, $nline); after appending EOL
   }
}
####################################
# Reducing a line to bare bones
# used when loading
# the EditPlus 2 perl.stx file.
# and getting used packages
####################################
sub trim_line($) {
   my ($l) = shift;
   chomp $l; # remove LF
   $l =~ s/\r$//; # and remove CR, if present
   $l =~ s/\t/ /g; # tabs to a space
   $l =~ s/\s\s/ /g while ($l =~ /\s\s/); # duplicate space to single
   $l = substr($l,1) while ($l =~ /^\s/); # each off leading space
   $l = substr($l,0,length($l)-1) while (($l =~ /\s$/)&&(length($l))); # and trailing space
   return $l;
}
sub trimall($) {
   return( trim_line($_[0]) );
}
sub add_metas($$) {
   my ($oh, $ad) = @_;
   my $m = '';
   my $m2 = '';
   prt( "Add metas to handle ...\n" ) if ($dbg4);
   $m = '<meta name="author" content="geoff mclane">'."\n";
   $m .= '<meta name="keywords" content="geoff, mclane, geoffmclane, computer, consultant, programmer,'."\n";
   $m2 = 'perl, scripts, samples, examples';
   if ($ad) {
      foreach my $k (keys %HFuncsFnd) {
         if (length($m2) > 76) {
            $m2 .= ",\n";
            $m .= $m2;
            $m2 = $k;
         } else {
            $m2 .= ', '.$k;
         }
      }
   } else {
      my $bcnt = scalar @AFileHashs; # collection of HASHES from each file
      my $nkys = ' ';
      my $ky = '';
      my @kys = ();
      for (my $ih = 0; $ih < $bcnt; $ih++) { # for each HASH
         @kys = keys %{$AFileHashs[$ih]}; # get built-ins used for this file
         foreach $ky (@kys) {   # go through the keys
            if ( !($nkys =~ / $ky /) ) { # if NOT already in the list
               $nkys .= $ky.' '; # add it
            }
         }
      }
      @kys = split(/ /, $nkys); # split the list into an array
      foreach $ky (@kys) {   # and add each from the array
         if (length($ky)) {
            if (length($m2) > 76) {
               $m2 .= ",\n";
               $m .= $m2;
               $m2 = $ky;
            } else {
               $m2 .= ', '.$ky;
            }
         }
      }
   }
   $m .= $m2;
   $m .= ', free">'."\n";
   $m .= '<meta name="description" content="page of a computer programmer, with sample perl scripts">'."\n";
   print $oh $m;
   prt("$m") if ($dbg25);
}
##########################################################################
# The main file OUTPUT - that is the HTML file.
# It establishes the HTML header, which includes the CSS style
# information. then outputs each of the 'converted' lines ...
# this is what it is all about - to generate a HTML document
##########################################################################
sub write_out_file {
   my ($outf) = shift;
   my ($OF, $line);
   open $OF, ">$outf" or mydie( "ERROR: Unable to create $outf ... aborting ...\n" );
   print $OF "$m_doctype\n";
   print $OF <<"EOF";
<html>
<head>
<title>$in_file to HTML</title>
<meta http-equiv="Content-Language" content="en-gb">
<meta http-equiv="Content-Type" content="text/html; charset=windows-1252">
EOF
   add_metas($OF, 1);
   print $OF <<"EOF";
<link rel=stylesheet href="perl.css" type="text/css">
</head>
<body>
EOF
   print $OF "<h1>$in_file to HTML.</h1>\n";
   print $OF '<p class="top"><a href="'.$indexhtm.'">index</a></p>'."\n";
   print $OF '<p>Generated: ' . localtime(time()) . " from $in_file ";
   print $OF YYYYMMDD($in_date).' '.b2KMG($in_size).".</p>\n\n";
   if ($add_table) {
      print $OF '<table width="100%" border="1" summary="Simple HTML of $in_file"><tr><td>'."\n";
   } elsif ($add_pre) {
      print $OF '<pre class="cd">'."\n";
   }
   # actual output of generated lines
   foreach $line (@lines) {
      $out_total += length($line);
      print $OF $line;
   }
   if ($add_table) {
      print $OF '</td></tr></table>'."\n";
   } elsif ($add_pre) {
      print $OF '</pre>'."\n\n";
   }
   if ($add_chart) {
      # mainly only for DEBUG
     print $OF <<"EOF";
Chart of Colours Used<br>
<table border="1" summary="Table of colours, and count of times used">
<tr>
   <th>Class</th><th>Colour</th><th>Use</th><th>Count</th>
</tr>
<tr>
<td><span class="$a_class">class='$a_class'</span></td>
<td><span class="$a_class">$a_color RED</span></td>
<td><span class="$a_class">Built-in Functions</span></td>
<td><span class="$a_class">$a_cnt</span></td>
</tr>
<tr>
<td><span class="$b_class">class='$b_class'</span></td>
<td><span class="$b_class">$b_color BLUEGREEN</span></td>
<td><span class="$b_class">Comments (following #)</span></td>
<td><span class="$b_class">$b_cnt</span></td>
</tr>
<tr>
<td><span class="$c_class">class='$c_class'</span></td>
<td><span class="$c_class">$c_color BLUE</span></td>
<td><span class="$c_class">Reserved Words</span></td>
<td><span class="$c_class">$c_cnt</span></td>
</tr>
<tr>
<td><span class="$d_class">class='$d_class'</span></td>
<td><span class="$d_class">$d_color BROWN</span></td>
<td><span class="$d_class">Inside qw(...)</span></td>
<td><span class="$d_class">$d_cnt</span></td>
</tr>
<tr>
<td><span class="$e_class">class='$e_class'</span></td>
<td><span class="$e_class">$e_color DARKBLUE</span></td>
<td><span class="$e_class">Scalar Variables</span></td>
<td><span class="$e_class">$e_cnt</span></td>
</tr>
<tr>
<td><span class="$f_class">class='$f_class'</span></td>
<td><span class="$f_class">$f_color GREY</span></td>
<td><span class="$f_class">Inside << EOF thingy</span></td>
<td><span class="$f_class">$f_cnt</span></td>
</tr>
<tr>
<td><span class="$o_class">class='$o_class'</span></td>
<td><span class="$o_class">$o_color ORANGE</span></td>
<td><span class="$o_class">Array Variables</span></td>
<td><span class="$o_class">$o_cnt</span></td>
</tr>
<tr>
<td><span class="$v_class">class='$v_class'</span></td>
<td><span class="$v_class">$v_color OLIVE</span></td>
<td><span class="$v_class">Hash Variables</span></td>
<td><span class="$v_class">$v_cnt</span></td>
</tr>
<tr>
<td><span class="$t_class">class='$t_class'</span></td>
<td><span class="$t_class">$t_color GREEN</span></td>
<td><span class="$t_class">Single and Double Quotes</span></td>
<td><span class="$t_class">$q_cnt</span></td>
</tr>
</table>
<br>End of chart<br>
EOF
      my $tot = ($a_cnt+$b_cnt+$c_cnt+$d_cnt+$e_cnt+$f_cnt+$o_cnt+$v_cnt+$q_cnt);
      my $diff = $out_total - $doc_total;
      print $OF "This use of $tot colour span sequences added $diff bytes. In=$doc_total Out=$out_total<br>\n";
   }
   print $OF '<p class="top"><a href="'.$indexhtm.'">index</a></p>'."\n";
   # add 4.01 validation ...
   print $OF <<"EOF";
<p>
<a href="http://validator.w3.org/check?uri=referer">
<img src="valid-html401.gif" alt="Valid HTML 4.01 Transitional" width="88" height="31">
</a>
</p>
EOF
   print $OF "</body>\n";
   print $OF "</html>\n";
   close($OF);
}
################################################
# My particular time 'translation'
sub YYYYMMDD {
   #  0    1    2     3     4    5     6     7     8
   my ($tm) = shift;
    my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($tm);
   $year += 1900;
   $mon += 1;
   my $ymd = "$year/";
   if ($mon < 10) {
      $ymd .= '0'.$mon.'/';
   } else {
      $ymd .= "$mon/";
   }
   if ($mday < 10) {
      $ymd .= '0'.$mday;
   } else {
      $ymd .= "$mday";
   }
   return $ymd;
}
##################################################
# My particular bytes to K, M, G
sub b2KMG($) {
   my ($d) = shift;
   if ($d < 1000) {
      return $d;
   }
   my $oss;
   my $kss;
   my $lg = 0;
   my $ks = ($d / 1024); #// get Ks
   my $div = 1;
    if( $ks < 1000 ) {
      $div = 1;
      $oss = "KB";
    } elsif ( $ks < 1000000 ) {
     $div = 1000;
      $oss = "MB";
    } elsif ( $ks < 1000000000 ) {
      $div = 1000000;
      $oss = "GB";
    } else {
      $div = 1000000000;
      $oss = "TB";
    }
    $kss = $ks / $div;
    $kss += 0.05;
    $kss *= 10;
    $lg = int($kss);
    return( ($lg / 10) . " " . $oss );
   ###return( ($lg / 10) . $oss );
}
sub parse_arguments {
   my (@av) = @_; # take it off the passed stack
   my $cnt = 0;
   while (@av) {
      $cnt++;
      my $a = shift @av; # get and move to next
      if ($cnt == 1) {
         $in_file = $a;
      } elsif ($cnt == 2) {
         $output = $a;
      } else {
         prt( "ERROR: Only maximum of 2 arguments allowed!\n" );
         mydie( "First is input file name, second, optional is output file name ...\n" );
      }
   } # while arguments
}
# eof - p2h.pl
index -|- top