#!/usr/bin/perl -w # NAME: samefiles.pl # AIM: Given a directory, search, recursively for the same files and report... use strict; use warnings; use File::Basename; # split path ($name,$dir,$ext) = fileparse($file [, qr/\.[^.]*/] ) use File::stat; # to get the file date use Cwd; my $os = $^O; my $perl_dir = '/home/geoff/bin'; my $PATH_SEP = '/'; my $temp_dir = '/tmp'; if ($os =~ /win/i) { $perl_dir = 'C:\GTools\perl'; $temp_dir = $perl_dir; $PATH_SEP = "\\"; } unshift(@INC, $perl_dir); require 'lib_utils.pl' or die "Unable to load 'lib_utils.pl' Check paths in \@INC...\n"; # log file stuff our ($LF); my $pgmname = $0; if ($pgmname =~ /(\\|\/)/) { my @tmpsp = split(/(\\|\/)/,$pgmname); $pgmname = $tmpsp[-1]; } my $outfile = $temp_dir.$PATH_SEP."temp.$pgmname.txt"; open_log($outfile); # user variables my $VERS = "0.0.5 2015-01-09"; my $load_log = 0; my $in_dir = ''; my $verbosity = 0; my $out_file = ''; my @repofolders = qw( CVS .svn .git .hg ); # ### DEBUG ### my $debug_on = 1; my $def_file = 'F:\Projects\tidy-tests\cases'; ### program variables my @warnings = (); my $cwd = cwd(); my $total_files = 0; my $total_dirs = 0; my $total_bytes = 0; my $dupe_bytes = 0; my $same_same = 0; my %dir_hash = (); sub VERB1() { return $verbosity >= 1; } sub VERB2() { return $verbosity >= 2; } sub VERB5() { return $verbosity >= 5; } sub VERB9() { return $verbosity >= 9; } sub show_warnings($) { my ($val) = @_; if (@warnings) { prt( "\nGot ".scalar @warnings." WARNINGS...\n" ); foreach my $itm (@warnings) { prt("$itm\n"); } prt("\n"); } else { prt( "\nNo warnings issued.\n\n" ) if (VERB9()); } } sub pgm_exit($$) { my ($val,$msg) = @_; if (length($msg)) { $msg .= "\n" if (!($msg =~ /\n$/)); prt($msg); } show_warnings($val); close_log($outfile,$load_log); exit($val); } sub prtw($) { my ($tx) = shift; $tx =~ s/\n$//; prt("$tx\n"); push(@warnings,$tx); } sub is_repo_folder { my ($fdr) = shift; my ($tst); foreach $tst (@repofolders) { return 1 if ($tst eq $fdr); } return 0; } sub process_in_file($) { my ($inf) = @_; if (! open INF, "<$inf") { pgm_exit(1,"ERROR: Unable to open file [$inf]\n"); } my @lines = ; close INF; my $lncnt = scalar @lines; prt("Processing $lncnt lines, from [$inf]...\n"); my ($line,$inc,$lnn); $lnn = 0; foreach $line (@lines) { chomp $line; $lnn++; if ($line =~ /\s*#\s*include\s+(.+)$/) { $inc = $1; prt("$lnn: $inc\n"); } } } sub process_a_dir($); sub process_a_dir($) { my $dir = shift; if ( !opendir( DIR, $dir ) ) { prtw("WARNING: Unable to open dir $dir!\n"); return; } my @files = readdir(DIR); closedir DIR; my ($file,$ff,$sb,$sz,$ra); my @dirs = (); ut_fix_directory(\$dir); $total_dirs++; foreach $file (@files) { next if ($file eq '.'); next if ($file eq '..'); $ff = $dir.$file; if (-f $ff) { $sz = 0; if ($sb = stat($ff)) { $sz = $sb->size; } $total_bytes += $sz; $dir_hash{$dir} = [] if (!defined $dir_hash{$dir}); $ra = $dir_hash{$dir}; push(@{$ra}, [$file,$sz]); $total_files++; } elsif (-d $ff) { push(@dirs,$ff) if (!is_repo_folder($file)); } else { prtw("WARNING: Skipping '$file'! path $dir\n"); } } foreach $dir (@dirs) { process_a_dir($dir); } } sub process_in_dir($) { my $dir = shift; if ( !opendir( DIR, $dir ) ) { prtw("WARNING: Unable to open dir $dir!\n"); return; } my @files = readdir(DIR); closedir DIR; my ($file,$ff,$sb,$sz,$ra); my @dirs = (); ut_fix_directory(\$dir); $total_dirs++; foreach $file (@files) { next if ($file eq '.'); next if ($file eq '..'); $ff = $dir.$file; if (-f $ff) { $sz = 0; if ($sb = stat($ff)) { $sz = $sb->size; } $total_bytes += $sz; $dir_hash{$dir} = [] if (!defined $dir_hash{$dir}); $ra = $dir_hash{$dir}; push(@{$ra}, [$file,$sz]); $total_files++; } elsif (-d $ff) { push(@dirs,$ff) if (!is_repo_folder($file)); } else { prtw("WARNING: Skipping '$file'! path $dir\n"); } } foreach $dir (@dirs) { process_a_dir($dir); } prt("Processed $total_dirs, for $total_files files, $total_bytes total bytes...\n"); } sub comp_arrays($$$$) { my ($da1,$di1,$da2,$di2) = @_; my ($ra1,$ra2,$fil1,$sz1,$fil2,$sz2,$ff1,$ff2); my %done = (); foreach $ra1 (@{$da1}) { $fil1 = ${$ra1}[0]; $sz1 = ${$ra1}[1]; $ff1 = $di1.$fil1; foreach $ra2 (@{$da2}) { $fil2 = ${$ra2}[0]; $sz2 = ${$ra2}[1]; $ff2 = $di2.$fil2; next if (defined $done{$ff2}); if ($fil1 eq $fil2) { if ($sz1 == $sz2) { prt("File $fil1 SAME in $di1 and $di2 - $sz1 bytes\n"); # if (VERB5()); $dupe_bytes += $sz1; $same_same++; } else { prt("File $fil1 in $di1 and $di2 - diff sizes $sz1 and $sz2\n") if (VERB9()); } } } $done{$ff1} = 1; } } sub show_dir_hash() { my @dirs = sort keys %dir_hash; my $cnt = scalar @dirs; my ($dir1,$dir2,$ra1,$ra2); prt("Processing $cnt dir keys...\n"); my %done = (); foreach $dir1 (@dirs) { $ra1 = $dir_hash{$dir1}; foreach $dir2 (@dirs) { next if ($dir1 eq $dir2); next if (defined $done{$dir2}); $ra2 = $dir_hash{$dir2}; comp_arrays($ra1,$dir1,$ra2,$dir2); } $done{$dir1} = 1; } prt("Same-same $same_same of $total_files. Duplicated $dupe_bytes bytes...\n"); $load_log = 1; } ######################################### ### MAIN ### parse_args(@ARGV); process_in_dir($in_dir); show_dir_hash(); pgm_exit(0,""); ######################################## sub need_arg { my ($arg,@av) = @_; pgm_exit(1,"ERROR: [$arg] must have a following argument!\n") if (!@av); } sub parse_args { my (@av) = @_; my ($arg,$sarg); my $verb = VERB2(); while (@av) { $arg = $av[0]; if ($arg =~ /^-/) { $sarg = substr($arg,1); $sarg = substr($sarg,1) while ($sarg =~ /^-/); if (($sarg =~ /^h/i)||($sarg eq '?')) { give_help(); pgm_exit(0,"Help exit(0)"); } elsif ($sarg =~ /^v/) { if ($sarg =~ /^v.*(\d+)$/) { $verbosity = $1; } else { while ($sarg =~ /^v/) { $verbosity++; $sarg = substr($sarg,1); } } $verb = VERB2(); prt("Verbosity = $verbosity\n") if ($verb); } elsif ($sarg =~ /^l/) { if ($sarg =~ /^ll/) { $load_log = 2; } else { $load_log = 1; } prt("Set to load log at end. ($load_log)\n") if ($verb); } elsif ($sarg =~ /^o/) { need_arg(@av); shift @av; $sarg = $av[0]; $out_file = $sarg; prt("Set out file to [$out_file].\n") if ($verb); } else { pgm_exit(1,"ERROR: Invalid argument [$arg]! Try -?\n"); } } else { $in_dir = $arg; prt("Set input to [$in_dir]\n") if ($verb); } shift @av; } if ($debug_on) { prtw("WARNING: DEBUG is ON!\n"); if (length($in_dir) == 0) { $in_dir = $def_file; prt("Set DEFAULT input to [$in_dir]\n"); } } if (length($in_dir) == 0) { pgm_exit(1,"ERROR: No input files found in command!\n"); } if (! -d $in_dir) { pgm_exit(1,"ERROR: Unable to find in directory [$in_dir]! Check name, location...\n"); } } sub give_help { prt("$pgmname: version $VERS\n"); prt("Usage: $pgmname [options] in-file\n"); prt("Options:\n"); prt(" --help (-h or -?) = This help, and exit 0.\n"); prt(" --verb[n] (-v) = Bump [or set] verbosity. def=$verbosity\n"); prt(" --load (-l) = Load LOG at end. ($outfile)\n"); prt(" --out (-o) = Write output to this file.\n"); } # eof - template.pl