#!/usr/bin/perl -w
# NAME: geniframe.pl
# AIM: Based on genindex04.pl - Given an input folder, generate a tempimap.htm of
# a complete directory scan
# 12/09/2014 geoff mclane http://geoffair.net/mperl
use strict;
use warnings;
use File::Basename; # split path ($name,$dir,$ext) = fileparse($file [, qr/\.[^.]*/] )
use File::stat;
use Cwd;
my $os = $^O;
my $perl_dir = '/home/geoff/bin';
my $PATH_SEP = '/';
my $temp_dir = '/tmp';
if ($os =~ /win/i) {
$perl_dir = 'C:\GTools\perl';
$temp_dir = $perl_dir;
$PATH_SEP = "\\";
}
unshift(@INC, $perl_dir);
require 'lib_utils.pl' or die "Unable to load 'lib_utils.pl' Check paths in \@INC...\n";
require 'lib_html.pl' or die "Unable to load 'lib_html.pl' Check paths in \@INC...\n";
# log file stuff
our ($LF);
my $pgmname = $0;
if ($pgmname =~ /(\\|\/)/) {
my @tmpsp = split(/(\\|\/)/,$pgmname);
$pgmname = $tmpsp[-1];
}
my $outfile = $temp_dir.$PATH_SEP."temp.$pgmname.txt";
open_log($outfile);
# user variables
my $VERS = "0.0.1 2014-09-12";
my $load_log = 0;
my $in_dir = '';
my $verbosity = 0;
my $out_xml = 'tempimap.htm';
my $xclude_repo_dirs = 1;
my @repo_dirs = qw( CVS .svn .git .hg );
my $recursive = 0;
my $html_only = 1;
my $blank = 0;
my $colcount = 2;
my $desccol = 0; # from file, this could be the title of the html is there is one
my %descriptions = ();
my $add_sorted = 0;
my @excluded = ();
my @descriptions = ();
my $add_href = 0; # this does not work out as expected!!!
# debug
my $debug_on = 0;
my $def_file = 'C:\GTools\perl';
### program variables
my @warnings = ();
my $cwd = cwd();
# forward
sub scan_directory($$);
sub VERB1() { return $verbosity >= 1; }
sub VERB2() { return $verbosity >= 2; }
sub VERB5() { return $verbosity >= 5; }
sub VERB9() { return $verbosity >= 9; }
sub is_repo_directory($) {
my $dir = shift;
my ($test);
foreach $test (@repo_dirs) {
return 1 if ($dir eq $test);
}
return 0;
}
sub show_warnings($) {
my ($val) = @_;
if (@warnings) {
prt( "\nGot ".scalar @warnings." WARNINGS...\n" );
foreach my $itm (@warnings) {
prt("$itm\n");
}
prt("\n");
} else {
prt( "\nNo warnings issued.\n\n" ) if (VERB9());
}
}
sub pgm_exit($$) {
my ($val,$msg) = @_;
if (length($msg)) {
$msg .= "\n" if (!($msg =~ /\n$/));
prt($msg);
}
show_warnings($val);
close_log($outfile,$load_log);
exit($val);
}
sub prtw($) {
my ($tx) = shift;
$tx =~ s/\n$//;
prt("$tx\n");
push(@warnings,$tx);
}
sub process_in_file($) {
my ($inf) = @_;
if (! open INF, "<$inf") {
pgm_exit(1,"ERROR: Unable to open file [$inf]\n");
}
my @lines = ;
close INF;
my $lncnt = scalar @lines;
prt("Processing $lncnt lines, from [$inf]...\n");
my ($line,$inc,$lnn);
$lnn = 0;
foreach $line (@lines) {
chomp $line;
$lnn++;
if ($line =~ /\s*#\s*include\s+(.+)$/) {
$inc = $1;
prt("$lnn: $inc\n");
}
}
}
sub is_html_like($) {
my $fil = shift;
return 1 if ($fil =~ /\.html$/i);
return 1 if ($fil =~ /\.htm$/i);
return 1 if ($fil =~ /\.php$/i);
return 1 if ($fil =~ /\.shtml$/i);
return 1 if ($fil =~ /\.phtml$/i);
return 0;
}
my $dbg_mww = 0;
sub match_with_wild($$) {
my ($fil1,$fil2) = @_;
my $len1 = length($fil1);
my $len2 = length($fil2);
prt("match_with_wild: [$fil1] [$fil2] ") if ($dbg_mww);
my ($i,$j,$c1,$c2);
$i = 0;
$j = 0;
if (($len1 > 0) && ($len2 > 0)) {
while (($i < $len1)&&($j < $len2)) {
$c1 = substr($fil1,$i,1);
$c2 = substr($fil2,$j,1);
if (($c1 eq $c2)||($c1 eq '?')||($c2 eq '?')) {
$i++;
$j++;
prt("$c1= ") if ($dbg_mww);
} elsif ($c2 eq '*') {
$i++; # any $c1 matches asterick
if (($j + 1) < $len2) {
# but if more, maybe time to step past '*'
$c2 = substr($fil2,($j+1),1);
if ($c1 eq $c2) {
$j += 2;
}
}
prt("$c1* ") if ($dbg_mww);
} elsif ($c1 eq '*') {
$j++; # any $c2 matches asterick
if (($i + 1) < $len1) {
# but if more, maybe time to step past '*'
$c1 = substr($fil1,($i+1),1);
if ($c1 eq $c2) {
$i += 2;
}
}
prt("$c2* ") if ($dbg_mww);
} else {
prt(" = 0 - [$c1] ne [$c2]\n") if ($dbg_mww);
return 0;
}
}
if (($i == $len1)&&($j == $len2)) {
prt(" = 1 - both ran out of chars\n") if ($dbg_mww);
return 1; # both ran out of chars
} elsif (($i == $len1)&&($c2 eq '*')&&(($j + 1) == $len2)){
prt(" = 1 - first ran out and last is second $c2\n") if ($dbg_mww);
return 1; # first ran out, and second is last '*'
} elsif (($j == $len2)&&($c1 eq '*')&&(($i + 1) == $len1)){
prt(" = 1 - second ran out and last of first is $c1\n") if ($dbg_mww);
return 1; # second ran out, and second is last '*'
}
prt(" = 0 - failed - no case\n") if ($dbg_mww);
} elsif ($len1 > 0) {
# 2nd is nul
if ($fil1 eq '*') {
prt(" = 1 - asterix matches nul\n") if ($dbg_mww);
return 1; # nul matches asterix
}
prt(" = 0 - len1 > 0, but [$fil1]\n") if ($dbg_mww);
} elsif ($len2 > 0) {
# 1st is nul
if ($fil2 eq '*') {
prt(" = 1 - nul match asterix\n") if ($dbg_mww);
return 1; # nul matches asterix
}
prt(" = 0 - len2 > 0, but [$fil1]\n") if ($dbg_mww);
} else {
prt(" = 0 - no case\n") if ($dbg_mww);
}
return 0;
}
# 20140911 - fix for wild like 'temp*' = 'temp*.*';
sub matches_wild($$) { # 20140911 - fix for wild like 'temp*' = 'temp*.*';
my ($fil,$wild) = @_;
my ($n1,$d1,$e1) = fileparse( $fil, qr/\.[^.]*/ );
my ($n2,$d2,$e2) = fileparse( $wild, qr/\.[^.]*/ );
my $lcn1 = lc($n1);
my $lcn2 = lc($n2);
# strip . from extension
$e1 =~ s/^\.//;
$e2 =~ s/^\.//;
my $lce1 = lc($e1);
my $lce2 = lc($e2);
# add * if no extent
$lce1 = '*' if (length($lce1) == 0);
$lce2 = '*' if (length($lce2) == 0);
prt("matches_wild: [$n1] [$n2] and [$e1] [$e2]\n") if (VERB9());
return 1 if (($lcn1 eq $lcn2)&&($lce1 eq $lce2));
return 2 if (($lcn1 eq $lcn2)&&($lce2 eq '*'));
return 3 if (($lcn2 eq '*')&&($lce1 eq $lce2));
return 4 if (match_with_wild($lcn1,$lcn2) && match_with_wild($lce1,$lce2));
return 0;
}
sub has_wild($) {
my $txt = shift;
my $len = length($txt);
my ($i,$c);
for ($i = 0; $i < $len; $i++) {
$c = substr($txt,$i,1);
return 1 if (($c eq '?')||($c eq '*'));
}
return 0;
}
sub is_in_excluded($) {
my $file = shift;
my ($xcl);
foreach $xcl (@excluded) {
return 1 if ($xcl eq $file);
return 1 if (lc($xcl) eq lc($file));
if (has_wild($xcl)) {
return 1 if (matches_wild($file,$xcl));
}
}
return 0;
}
sub get_html_title($) {
my $ff = shift;
my $title = '';
if (open FIL,"<$ff") {
my @lines = ;
close FIL;
my ($line);
$line = join(" ",@lines);
my $ra = get_html_refarray($line); # my $content = shift;
my $ta = get_whole_tag_array($ra,'title',0);
# show_html_refarray($ta);
my $tta = get_title_text($ta,0);
$title = join(" ",@{$tta});
prt("File $ff has title: $title\n") if (VERB9());
}
return $title;
}
sub scan_directory($$) {
my ($dir,$rlist) = @_;
if (!opendir(DIR,$dir)) {
prtw("WARNING: Failed to open directory [$dir]\n");
}
my @files = readdir(DIR);
closedir(DIR);
my ($item,$ff,$ishtml,$sb,$ft,$desc,$title);
$dir .= $PATH_SEP if (!($dir =~ /(\\|\/)$/));
my @dirs = ();
$desc = 'nbsp;';
foreach $item (@files) {
next if (($item eq '.')||($item eq '..'));
$ff = $dir.$item;
if (-f $ff) {
# next if ($item eq $out_xml); # skip self
next if ($item =~ /\.bak$/i); # skip .bak
$ishtml = is_html_like($item);
next if ($html_only && !$ishtml);
if (is_in_excluded($item)) {
prt("User excluded [$ff]\n") if (VERB1());
next;
}
my ($n,$d,$e) = fileparse($item, qr/\.[^.]*/);
$sb = stat($ff);
$ft = $sb->mtime;
$desc = ' ';
if ($desccol) {
if (defined $descriptions{$item}) {
$desc = $descriptions{$item};
} else {
$title = get_html_title($ff);
prtw("WARNING: No desc for $item,$title\n");
if (length($title)) {
push(@descriptions,"$item,$title");
$desc = $title;
} else {
$desc = ' ';
}
}
}
# 0 1 2 3
push(@{$rlist},[$ff,$e,$ft,$desc]); # got a FILE
} elsif (-d $ff) {
push(@dirs,$ff) if (!is_repo_directory($item)); # got a directory
} else {
prtw("WARNING: item [$ff] skipped!\n");
}
}
foreach $dir (@dirs) {
scan_directory($dir,$rlist);
}
}
sub mycmp_nc_n1 {
my $nm1 = lc(${$a}[1]);
my $nm2 = lc(${$b}[2]);
return 1 if ($nm1 gt $nm2);
return -1 if ($nm1 lt $nm2);
return 0;
}
sub write_html($$) {
my ($dir,$rlist) = @_;
my $len = length($dir);
my ($file,$cnt,$i,$html,$wrap,$cols,$rows,$mrow,$flen,$maxlen,$ind,$href,$tmp,$desc);
my ($line,$name,$ncnt);
my $added_cnt = 0;
$cnt = scalar @{$rlist};
if ($cnt == 0) {
prt("No files to write, thus no html generated!\n");
return;
}
my $wh = " width=\"400\" height=\"400\"";
$ncnt = 0;
$html = "\n";
$html .= "\n";
$html .= " \n";
$html .= " IFrame Map\n";
$html .= " \n";
$html .= " \n";
$html .= " IFrame Map - $cnt Files
\n";
$html .= " index\n";
$html .= " end
\n";
if ($add_sorted) {
$html .= "Alpha sorted jump list
\n";
}
$line = '';
$wrap = $colcount;
$mrow = 24;
$maxlen = 50;
$rows = 0;
$cols = 0;
my %dupes = ();
my @backups = ();
my @links = ();
for ($i = 0; $i < $cnt; $i++) {
## 0 1 2 3
#push(@{$rlist},[$ff,$e,$ft,$desc]); # got a FILE
$file = path_d2u(substr(${$rlist}[$i][0],$len)); # strip base directory
next if (defined $dupes{$file});
$dupes{$file} = 1;
$desc = ${$rlist}[$i][3];
next if ($desc eq 'EXCLUDE');
$href = $file;
my ($nm,$dr) = fileparse($file);
$flen = length($file);
$ind = index($file,'/');
if (($len > $maxlen)&&($ind > 0)) {
$dr = substr($file,0,$ind);
$file = "$dr...$nm";
}
push(@links,[$href,$nm,$file]);
if ($desc eq 'Previous backup copy') {
push(@backups,[$file,$href,$desc]);
next;
}
$added_cnt++;
$line = '';
$line .= "
\n";
}
$html .= $line;
$cols++;
}
# maybe now another table by the extension, or whatevr
my $date = lu_get_YYYYMMDD_hhmmss_UTC(time());
$html .= " Done Site Map of $added_cnt of $cnt files, on ".$date." UTC, by $pgmname
\n";
$html .= " ";
$html .= " \n";
$html .= "\n";
write2file($html,$out_xml);
prt("Site list written to $out_xml\n");
if ($load_log && VERB5()) {
prt("=== HTML start ==========================================\n");
prt($html);
prt("=== HTML end ==========================================\n");
}
if ($os =~ /win/i) {
system($out_xml);
} else {
system("firefox $out_xml");
}
}
sub mycmp_decend_n2 {
return 1 if (${$a}[2] < ${$b}[2]);
return -1 if (${$a}[2] > ${$b}[2]);
return 0;
}
sub process_in_directory($) {
my $dir = shift;
opendir(DIR,$dir) || pgm_exit(1,"ERROR: Unable to open directory [$dir]!\n");
my @files = readdir(DIR);
closedir(DIR);
my $itemcnt = scalar @files;
prt("Got $itemcnt items, from base directory [$dir]...\n");
my ($item,$ff,$ishtml,$sb,$ft,$desc);
ut_fix_directory(\$dir);
#$dir .= $PATH_SEP if (!($dir =~ /(\\|\/)$/));
my @file_list = ();
my @dirs = ();
$desc = ' ';
foreach $item (@files) {
next if (($item eq '.')||($item eq '..'));
$ff = $dir.$item;
if (-f $ff) {
next if ($item eq $out_xml); # skip self
next if ($item =~ /\.bak$/i); # skip .bak
next if ($item =~ /^temp/i);
$ishtml = is_html_like($item);
next if ($html_only && !$ishtml);
if (is_in_excluded($item)) {
prt("User excluded [$ff]\n") if (VERB1());
next;
}
my ($n,$d,$e) = fileparse($item, qr/\.[^.]*/);
$sb = stat($ff);
$ft = $sb->mtime;
$desc = ' ';
if ($desccol) {
if (defined $descriptions{$item}) {
$desc = $descriptions{$item};
} else {
$desc = get_html_title($ff); # get 'title'
prtw("WARNING: No decription for $item,$desc\n");
if (length($desc) == 0) {
$desc = ' ';
} else {
push(@descriptions,"$item,$desc");
}
}
}
# 0 1 2 3
push(@file_list,[$ff,$e,$ft,$desc]); # got a FILE
} elsif (-d $ff) {
push(@dirs,$ff) if (!is_repo_directory($item)); # got a directory - skip repos
} else {
prtw("WARNING: item [$ff] skipped!\n");
}
}
if ($recursive) {
foreach $dir (@dirs) {
scan_directory($dir,\@file_list);
}
}
$itemcnt = scalar @file_list;
my @arr = sort mycmp_decend_n2 @file_list;
prt("Got TOTAL $itemcnt files, from directory $dir...\n");
write_html($dir,\@arr);
##write_html($dir,\@file_list);
}
sub show_descriptions()
{
my $cnt = scalar @descriptions;
if (!$cnt) {
return;
}
prt("Using document title, have following $cnt descriptions for consideration...\n");
prt(join("\n",@descriptions)."\n");
}
#########################################
### MAIN ###
parse_args(@ARGV);
set_show_warnings(0) if (!VERB2());
process_in_directory($in_dir);
## show_descriptions();
pgm_exit(0,"");
########################################
sub need_arg {
my ($arg,@av) = @_;
pgm_exit(1,"ERROR: [$arg] must have a following argument!\n") if (!@av);
}
sub load_descriptions($) {
my $fil = shift;
if (! open(FIL,"<$fil")) {
pgm_exit(1,"ERROR: Unable to open description file [$fil]!\n");
}
my @lines = ;
close FIL;
my $lncnt = scalar @lines;
my ($i,$line,@arr,$cnt,$len,$tline,$i2,$file,$desc);
my $dcnt = 0;
for ($i = 1; $i < $lncnt; $i++) {
$i2 = $i+1;
$line = $lines[$i];
chomp $line;
$tline = trim_all($line);
$len = length($tline);
next if ($len == 0);
@arr = split(",",$line);
$cnt = scalar @arr;
if ($cnt == 2) {
$file = $arr[0];
$desc = $arr[1];
$descriptions{$file} = $desc;
$dcnt++;
} else {
prtw("$i2: Did NOT split into 2 [$line]! Got $cnt\n");
}
}
prt("Loaded $dcnt descriptions from $fil\n");
}
sub parse_args {
my (@av) = @_;
my ($arg,$sarg,$tmp,@arr,$len);
while (@av) {
$arg = $av[0];
if ($arg =~ /^-/) {
$sarg = substr($arg,1);
$sarg = substr($sarg,1) while ($sarg =~ /^-/);
if (($sarg =~ /^h/i)||($sarg eq '?')) {
give_help();
pgm_exit(0,"Help exit(0)");
} elsif ($sarg =~ /^v/) {
if ($sarg =~ /^v.*(\d+)$/) {
$verbosity = $1;
} else {
while ($sarg =~ /^v/) {
$verbosity++;
$sarg = substr($sarg,1);
}
}
prt("Verbosity = $verbosity\n") if (VERB1());
} elsif ($sarg =~ /^l/) {
$load_log = 1;
prt("Set to load log at end.\n") if (VERB1());
} elsif ($sarg =~ /^r/) {
$recursive = 1;
prt("Set recrusive to $recursive.\n") if (VERB1());
} elsif ($sarg =~ /^a/) {
$html_only = 0;
prt("Set load ALL files.\n") if (VERB1());
# } elsif ($sarg =~ /^d/) {
# $desccol = 1;
# need_arg(@av);
# shift @av;
# $sarg = $av[0];
# load_descriptions($sarg);
# prt("Set to add a description column.\n") if (VERB1());
} elsif ($sarg =~ /^b/) {
$blank = 1;
prt("Set to add _blank to href.\n") if (VERB1());
} elsif ($sarg =~ /^o/) {
need_arg(@av);
shift @av;
$sarg = $av[0];
$out_xml = $sarg;
prt("Set out file to [$out_xml].\n") if (VERB1());
} elsif ($sarg =~ /^c/) {
need_arg(@av);
shift @av;
$sarg = $av[0];
if (($sarg =~ /^\d+$/) && ($sarg ne '0')) {
$colcount = $sarg;
prt("Set column count to [$colcount].\n") if (VERB1());
} else {
pgm_exit(1,"ERROR: Column count must be 1 to nn! Not [$arg]!\n");
}
} elsif ($sarg =~ /^s/) {
$add_sorted = 1;
prt("Add alpha sorted links.\n") if (VERB1());
} elsif ($sarg =~ /^x/) {
need_arg(@av);
shift @av;
$sarg = $av[0];
@arr = split(':',$sarg);
foreach $tmp (@arr) {
push(@excluded,$tmp);
prt("Exclude file/files matching $tmp\n") if (VERB1());
}
} elsif ($sarg =~ /^X/) {
need_arg(@av);
shift @av;
$sarg = $av[0];
if (-f $sarg) {
if (open FIL, "<$sarg") {
@arr = ;
close FIL;
foreach $tmp (@arr) {
chomp $tmp;
$tmp = trim_all($tmp);
$len = length($tmp);
next if ($len == 0);
next if ($tmp =~ /^\#/);
push(@excluded,$tmp);
prt("Exclude file/files matching $tmp\n") if (VERB1());
}
} else {
pgm_exit(1,"ERROR: Unable to 'open' file [$sarg]! Check name, location, spelling.\n");
}
} else {
pgm_exit(1,"ERROR: Unable to 'stat' file [$sarg]! Check name, location, spelling.\n");
}
} else {
pgm_exit(1,"ERROR: Invalid argument [$arg]! Try -?\n");
}
} else {
$in_dir = $arg;
prt("Set input to [$in_dir]\n") if (VERB1());
if (! -d $in_dir) {
pgm_exit(1,"ERROR: Unable to find in directory [$in_dir]! Check name, location...\n");
}
}
shift @av;
}
if ((length($in_dir) == 0) && $debug_on) {
$in_dir = $def_file;
prt("Set DEFAULT input to [$in_dir]\n");
}
if (length($in_dir) == 0) {
pgm_exit(1,"ERROR: No input directory found in command!\n");
}
if (! -d $in_dir) {
pgm_exit(1,"ERROR: Unable to find in directory [$in_dir]! Check name, location...\n");
}
}
sub give_help {
prt("$pgmname: version $VERS\n");
prt("Usage: $pgmname [options] in-directory\n");
prt("Options:\n");
prt(" --help (-h or -?) = This help, and exit 0.\n");
prt(" --verb[n] (-v) = Bump [or set] verbosity. def=$verbosity\n");
prt(" --load (-l) = Load LOG at end. ($outfile)\n");
prt(" --out (-o) = Write output to this file.\n");
prt(" --recursive (-r) = Recurse into subdirectories. (def=$recursive)\n");
prt(" --all (-a) = Include ALL files. Default is just html like files. (def=$html_only)\n");
prt(" --blank (-b) = Add target=\"_blank\" to href. (def=$blank)\n");
prt(" --cols n (-c) = Set column count. (def=$colcount)\n");
#prt(" --desc file.csv (-d) = Add description column, from csv file. (def=$desccol)\n");
prt(" --sort (-s) = Add alpha sorted jump list. (def=$add_sorted)\n");
prt(" --xclude nm1[:nm2] (-x) = Exclude matching files. Can be ':' sep. list and wild, *,?.\n");
prt(" --Xclude file (-X) = Exclude file list, line separated. Lines begin # ignored\n");
prt(" Will scan the input directory, and build a $out_xml html file.\n");
}
# eof - genindex04.pl