#!/usr/bin/perl -w # NAME: findinfile.pl # AIM: Find a string in a file, using perl regex # 07/07/2013 - Improve UI # 31/05/2011 geoff mclane http://geoffair.net/mperl use strict; use warnings; use File::Basename; # split path ($name,$dir,$ext) = fileparse($file [, qr/\.[^.]*/] ) use File::stat; use Cwd; my $perl_dir = 'C:\GTools\perl'; unshift(@INC, $perl_dir); require 'lib_utils.pl' or die "Unable to load 'lib_utils.pl ...\n"; # log file stuff our ($LF); my $pgmname = $0; if ($pgmname =~ /(\\|\/)/) { my @tmpsp = split(/(\\|\/)/,$pgmname); $pgmname = $tmpsp[-1]; } my $outfile = $perl_dir."\\temp.$pgmname.txt"; open_log($outfile); my $VERS = "0.0.2 2013-07-07"; ###my $VERS = "0.0.1 2011-05-31"; # user variables my $load_log = 0; my $in_file = ''; my $find_this = ''; my @in_files = (); my $whole_finds = 0; my $starts_with = 0; my $ends_with = 0; my $out_file = ''; my $files_scanned = 0; my $total_lines = 0; my $total_finds = 0; my $def_dbg_on = 0; my $def_file = 'C:\DTEMP\explist.txt'; # my $def_find = "\\b(\\d{7})\\.(\\d+)\\b"; my $def_find = '\b(\d{7})\.(\d+)\b'; ### program variables my @warnings = (); my $cwd = cwd(); my $os = $^O; my $verbosity = 0; sub VERB1() { return ($verbosity >= 1); } sub VERB2() { return ($verbosity >= 2); } sub VERB5() { return ($verbosity >= 5); } sub VERB9() { return ($verbosity >= 9); } ### debug my $dbg_01 = 0; sub show_warnings($) { my ($val) = @_; if (@warnings) { prt( "\nGot ".scalar @warnings." WARNINGS...\n" ); foreach my $itm (@warnings) { prt("$itm\n"); } prt("\n"); } else { prt( "\nNo warnings issued.\n\n" ) if ($val); } } sub pgm_exit($$) { my ($val,$msg) = @_; if (length($msg)) { $msg .= "\n" if (!($msg =~ /\n$/)); prt($msg); } show_warnings($val); close_log($outfile,$load_log); exit($val); } sub prtw($) { my ($tx) = shift; $tx =~ s/\n$//; prt("$tx\n"); push(@warnings,$tx); } my @found_lines = (); sub split_words($) { my $txt = shift; my $len = length($txt); my $ch = substr($txt,0,1); # get FIRST my $inword = ($ch =~ /\w/) ? 1 : 0; my $tag = $ch; my ($i); my @arr = (); for ($i = 1; $i < $len; $i++) { $ch = substr($txt,$i,1); if ($inword) { if ($ch =~ /\w/) { $tag .= $ch; # continue accumulating the word } else { $inword = 0; # end of word push(@arr,$tag); # bag it $tag = $ch; # start not word } } else { if ($ch =~ /\w/) { push(@arr,$tag); # bag it $inword = 1; # beginning a word $tag = $ch; } else { $tag .= $ch; # continue non-word accumutation } } } push(@arr,$tag) if (length($tag)); # bag it return @arr; # return WORD SPLIT array } sub word_split($) { my $line = shift; my @arr = (); my @a = space_split($line); my ($itm,$len); foreach $itm (@arr) { $len = length($itm); if ($len == 1) { push(@arr,$itm); } elsif ($itm =~ /^\w+$/) { push(@arr,$itm); } else { push(@arr,split_words($itm)); } } return \@arr; } sub process_in_file($) { my ($inf) = @_; if (! open INF, "<$inf") { pgm_exit(1,"ERROR: Unable to open file [$inf]\n"); } prt("Processing [$inf]...\n") if (VERB9()); $files_scanned++; my ($line,$inc,$lnn,$tline,$finds,$fnd,$ra,$word,$cnt); $lnn = 0; $finds = 0; while (defined($line = )) { chomp $line; $lnn++; if (($lnn % 100000)==0) { prt("$lnn\n"); } $tline = trim_all($line); next if (length($tline) == 0); if ($whole_finds || $starts_with || $ends_with) { $ra = word_split($tline); $cnt = scalar @{$ra}; foreach $word (@{$ra}) { if ($whole_finds) { $fnd = ($word =~ /^$find_this$/) ? 1 : 0; } elsif ($starts_with) { $fnd = ($word =~ /^$find_this/) ? 1 : 0; } elsif ($ends_with) { $fnd = ($word =~ /$find_this$/) ? 1 : 0; } last if ($fnd); # got a match } } else { $fnd = ($line =~ /$find_this/) ? 1 : 0; } if ($fnd) { prt("$lnn: $line\n") if (VERB9()); $finds++; push(@found_lines, [$line, $lnn, $inf]); $total_finds++; } } close INF; prt("Done $lnn lines with $finds finds...\n") if (VERB5()); $total_lines += $lnn; } sub process_in_files() { my ($file); foreach $file (@in_files) { process_in_file($file); } } sub show_finds() { # 0 1 2 #push(@found_lines, [$line, $lnn, $inf]); my $cnt = scalar @found_lines; my ($i,$line,$lnn,$inf,$ra,$clnn,$msg); my %h = (); for ($i = 0; $i < $cnt; $i++) { $line = $found_lines[$i][0]; $lnn = $found_lines[$i][1]; $inf = $found_lines[$i][2]; $h{$inf} = [] if (! defined $h{$inf}); $ra = $h{$inf}; push(@{$ra},[$line,$lnn]); } my @arr = sort keys(%h); # get file list $msg = ''; $msg .= "Scanned $files_scanned files, $total_lines lines, for $total_finds finds.\n"; foreach $inf (@arr) { $ra = $h{$inf}; $cnt = scalar @{$ra}; prt("File: $inf with $cnt finds...\n"); $msg .= "File: $inf with $cnt finds...\n"; for ($i = 0; $i < $cnt; $i++) { $line = ${$ra}[$i][0]; $lnn = ${$ra}[$i][1]; $clnn = sprintf("%4d:",$lnn); prt("$clnn $line\n"); $msg .= "$clnn $line\n"; } } prt("Scanned $files_scanned files, $total_lines lines, for $total_finds finds.\n"); if (length($out_file) && length($msg)) { write2file($msg,$out_file); prt("Written results to [$out_file]\n"); } } ######################################### ### MAIN ### parse_args(@ARGV); ###prt( "$pgmname: in [$cwd]: Hello, World...\n" ); process_in_files(); show_finds(); pgm_exit(0,""); ######################################## sub give_help { prt("$pgmname: version $VERS\n"); prt("Usage: $pgmname [options] in-file\n"); prt("Options:\n"); prt(" --help (-h or -?) = This help, and exit 0.\n"); prt(" --verb[n] (-v) = Bump [or set] verbosity. def=$verbosity\n"); prt(" --find regex (-f) = Regex to use to FIND in the in-file\n"); prt(" --inp file (-i) = Treat file as a line separated list of files\n"); prt(" --load_log (-l) = Load log file at end.\n"); prt(" --out file (-o) = Output the finds to this file.\n"); prt(" Lines beginning with other than alphanumeric, and 'NOT ' will be skipped\n"); } sub need_arg { my ($arg,@av) = @_; pgm_exit(1,"ERROR: [$arg] must have following argument!\n") if (!@av); } sub get_in_file_list($) { my $fil = shift; if (! open INF, "<$fil") { pgm_exit(1,"ERROR: Unable to open $fil!\n"); } my @lines = ; close INF; my ($line,$len); foreach $line (@lines) { chomp $line; $len = length($line); next if ($len == 0); # skip blank lines next if ($line =~ /^NOT\s+/); # skip begin with 'NOT ' next if ( !($line =~ /^\w+/) ); # skip does NOT start with alphanum... if (-f $line) { push(@in_files,$line); } else { prtw("WARNING: Unable to locate file [$line]\n"); } } my $cnt = scalar @in_files; return $cnt; } sub parse_args { my (@av) = @_; my ($arg,$sarg,$cnt); while (@av) { $arg = $av[0]; if ($arg =~ /^-/) { $sarg = substr($arg,1); $sarg = substr($sarg,1) while ($sarg =~ /^-/); if (($sarg =~ /^h/i)||($sarg eq '?')) { give_help(); pgm_exit(0,"Help exit(0)"); } elsif ($sarg =~ /^l/) { $load_log = 1; $load_log = 2 if ($sarg =~ /^ll/); $load_log = 3 if ($sarg =~ /^lll/); prt("Set to load log $load_log at end\n") if (VERB1()); } elsif ($sarg =~ /^f/) { need_arg(@av); shift @av; $sarg = $av[0]; $find_this = $sarg; prt("Set regex to [$find_this]\n") if (VERB1()); } elsif ($sarg =~ /^o/) { need_arg(@av); shift @av; $sarg = $av[0]; $out_file = $sarg; prt("Set output file to [$out_file]\n") if (VERB1()); } elsif ($sarg =~ /^i/) { need_arg(@av); shift @av; $sarg = $av[0]; if (-f $sarg) { $cnt = get_in_file_list($sarg); if ($cnt) { prt("Set $cnt file list from [$sarg]\n") if (VERB1()); $in_file = $in_files[0]; } else { prtw("WARNING: No input files found in [$sarg]\n"); } } else { pgm_exit(1,"ERROR: Can NOT find file $sarg\n"); } } elsif ($sarg =~ /^v/i) { if ($sarg =~ /^v.*(\d+)$/) { $verbosity = $1; } else { while ($sarg =~ /^v/i) { $verbosity++; $sarg = substr($sarg,1); } } prt("Set Verbosity = $verbosity\n") if (VERB1()); } elsif ($sarg =~ /^w/i) { $whole_finds = 1; prt("Set whole find only\n") if (VERB1()); } elsif ($sarg =~ /^s/i) { $starts_with = 1; prt("Set starts with\n") if (VERB1()); } elsif ($sarg =~ /^e/i) { $ends_with = 1; prt("Set ends with\n") if (VERB1()); } else { pgm_exit(1,"ERROR: Invalid argument [$arg]! Try -?\n"); } } else { $in_file = $arg; if (-f $in_file) { prt("Set input to [$in_file]\n") if (VERB1()); push(@in_files,$in_file); } else { pgm_exit(1,"ERROR: Can NOT find file $arg\n"); } } shift @av; } if ((length($in_file) == 0) && $def_dbg_on) { $in_file = $def_file; $find_this = $def_find; } if (length($in_file) == 0) { pgm_exit(1,"ERROR: No input files found in command!\n"); } if (length($find_this) == 0) { pgm_exit(1,"ERROR: No 'find' item found in command!\n"); } if (! -f $in_file) { pgm_exit(1,"ERROR: Unable to find in file [$in_file]! Check name, location...\n"); } if ($starts_with && $ends_with) { $whole_finds = 1; } } # eof - findinfile.pl