samefiles.pl to HTML.

index -|- end

Generated: Mon Aug 29 19:34:57 2016 from samefiles.pl 2016/02/19 8.9 KB. text copy

#!/usr/bin/perl -w
# NAME: samefiles.pl
# AIM: Given a directory, search, recursively for the same files and report...
use strict;
use warnings;
use File::Basename;  # split path ($name,$dir,$ext) = fileparse($file [, qr/\.[^.]*/] )
use File::stat; # to get the file date
use Cwd;
my $os = $^O;
my $perl_dir = '/home/geoff/bin';
my $PATH_SEP = '/';
my $temp_dir = '/tmp';
if ($os =~ /win/i) {
    $perl_dir = 'C:\GTools\perl';
    $temp_dir = $perl_dir;
    $PATH_SEP = "\\";
}
unshift(@INC, $perl_dir);
require 'lib_utils.pl' or die "Unable to load 'lib_utils.pl' Check paths in \@INC...\n";
# log file stuff
our ($LF);
my $pgmname = $0;
if ($pgmname =~ /(\\|\/)/) {
    my @tmpsp = split(/(\\|\/)/,$pgmname);
    $pgmname = $tmpsp[-1];
}
my $outfile = $temp_dir.$PATH_SEP."temp.$pgmname.txt";
open_log($outfile);

# user variables
my $VERS = "0.0.5 2015-01-09";
my $load_log = 0;
my $in_dir = '';
my $verbosity = 0;
my $out_file = '';

my @repofolders = qw( CVS .svn .git .hg );

# ### DEBUG ###
my $debug_on = 1;
my $def_file = 'F:\Projects\tidy-tests\cases';

### program variables
my @warnings = ();
my $cwd = cwd();
my $total_files = 0;
my $total_dirs = 0;
my $total_bytes = 0;
my $dupe_bytes = 0;
my $same_same = 0;
my %dir_hash = ();

sub VERB1() { return $verbosity >= 1; }
sub VERB2() { return $verbosity >= 2; }
sub VERB5() { return $verbosity >= 5; }
sub VERB9() { return $verbosity >= 9; }

sub show_warnings($) {
    my ($val) = @_;
    if (@warnings) {
        prt( "\nGot ".scalar @warnings." WARNINGS...\n" );
        foreach my $itm (@warnings) {
           prt("$itm\n");
        }
        prt("\n");
    } else {
        prt( "\nNo warnings issued.\n\n" ) if (VERB9());
    }
}

sub pgm_exit($$) {
    my ($val,$msg) = @_;
    if (length($msg)) {
        $msg .= "\n" if (!($msg =~ /\n$/));
        prt($msg);
    }
    show_warnings($val);
    close_log($outfile,$load_log);
    exit($val);
}


sub prtw($) {
   my ($tx) = shift;
   $tx =~ s/\n$//;
   prt("$tx\n");
   push(@warnings,$tx);
}

sub is_repo_folder {
    my ($fdr) = shift;
    my ($tst);
    foreach $tst (@repofolders) {
        return 1 if ($tst eq $fdr);
    }
    return 0;
}


sub process_in_file($) {
    my ($inf) = @_;
    if (! open INF, "<$inf") {
        pgm_exit(1,"ERROR: Unable to open file [$inf]\n"); 
    }
    my @lines = <INF>;
    close INF;
    my $lncnt = scalar @lines;
    prt("Processing $lncnt lines, from [$inf]...\n");
    my ($line,$inc,$lnn);
    $lnn = 0;
    foreach $line (@lines) {
        chomp $line;
        $lnn++;
        if ($line =~ /\s*#\s*include\s+(.+)$/) {
            $inc = $1;
            prt("$lnn: $inc\n");
        }
    }
}

sub process_a_dir($);

sub process_a_dir($) {
    my $dir = shift;
   if ( !opendir( DIR, $dir ) ) {
        prtw("WARNING: Unable to open dir $dir!\n");
        return;
    }
   my @files = readdir(DIR);
   closedir DIR;
    my ($file,$ff,$sb,$sz,$ra);
    my @dirs = ();
    ut_fix_directory(\$dir);
    $total_dirs++;
    foreach $file (@files) {
        next if ($file eq '.');
        next if ($file eq '..');
        $ff = $dir.$file;
        if (-f $ff) {
            $sz = 0;
            if ($sb = stat($ff)) {
                $sz = $sb->size;
            }
            $total_bytes += $sz;
            $dir_hash{$dir} = [] if (!defined $dir_hash{$dir});
            $ra = $dir_hash{$dir};
            push(@{$ra}, [$file,$sz]);
            $total_files++;
        } elsif (-d $ff) {
            push(@dirs,$ff) if (!is_repo_folder($file));
        } else {
            prtw("WARNING: Skipping '$file'! path $dir\n");
        }
    }
    foreach $dir (@dirs) {
        process_a_dir($dir);
    }
}


sub process_in_dir($) {
    my $dir = shift;
   if ( !opendir( DIR, $dir ) ) {
        prtw("WARNING: Unable to open dir $dir!\n");
        return;
    }
   my @files = readdir(DIR);
   closedir DIR;
    my ($file,$ff,$sb,$sz,$ra);
    my @dirs = ();
    ut_fix_directory(\$dir);
    $total_dirs++;
    foreach $file (@files) {
        next if ($file eq '.');
        next if ($file eq '..');
        $ff = $dir.$file;
        if (-f $ff) {
            $sz = 0;
            if ($sb = stat($ff)) {
                $sz = $sb->size;
            }
            $total_bytes += $sz;
            $dir_hash{$dir} = [] if (!defined $dir_hash{$dir});
            $ra = $dir_hash{$dir};
            push(@{$ra}, [$file,$sz]);
            $total_files++;
        } elsif (-d $ff) {
            push(@dirs,$ff) if (!is_repo_folder($file));
        } else {
            prtw("WARNING: Skipping '$file'! path $dir\n");
        }
    }
    foreach $dir (@dirs) {
        process_a_dir($dir);
    }
    prt("Processed $total_dirs, for $total_files files, $total_bytes total bytes...\n");
}

sub comp_arrays($$$$) {
    my ($da1,$di1,$da2,$di2) = @_;
    my ($ra1,$ra2,$fil1,$sz1,$fil2,$sz2,$ff1,$ff2);
    my %done = ();
    foreach $ra1 (@{$da1}) {
        $fil1 = ${$ra1}[0];
        $sz1  = ${$ra1}[1];
        $ff1  = $di1.$fil1;
        foreach $ra2 (@{$da2}) {
            $fil2 = ${$ra2}[0];
            $sz2  = ${$ra2}[1];
            $ff2  = $di2.$fil2;
            next if (defined $done{$ff2});
            if ($fil1 eq $fil2) {
                if ($sz1 == $sz2) {
                    prt("File $fil1 SAME in $di1 and $di2 - $sz1 bytes\n"); # if (VERB5());
                    $dupe_bytes += $sz1;
                    $same_same++;
                } else {
                    prt("File $fil1 in $di1 and $di2 - diff sizes $sz1 and $sz2\n") if (VERB9());
                }
            }
        }
        $done{$ff1} = 1;
    }
}

sub show_dir_hash() {
    my @dirs = sort keys %dir_hash;
    my $cnt = scalar @dirs;
    my ($dir1,$dir2,$ra1,$ra2);
    prt("Processing $cnt dir keys...\n");
    my %done = ();
    foreach $dir1 (@dirs) {
        $ra1 = $dir_hash{$dir1};
        foreach $dir2 (@dirs) {
            next if ($dir1 eq $dir2);
            next if (defined $done{$dir2});
            $ra2 = $dir_hash{$dir2};
            comp_arrays($ra1,$dir1,$ra2,$dir2);
        }
        $done{$dir1} = 1;
    }
    prt("Same-same $same_same of $total_files. Duplicated $dupe_bytes bytes...\n");
    $load_log = 1;
}

#########################################
### MAIN ###
parse_args(@ARGV);
process_in_dir($in_dir);
show_dir_hash();
pgm_exit(0,"");
########################################

sub need_arg {
    my ($arg,@av) = @_;
    pgm_exit(1,"ERROR: [$arg] must have a following argument!\n") if (!@av);
}

sub parse_args {
    my (@av) = @_;
    my ($arg,$sarg);
    my $verb = VERB2();
    while (@av) {
        $arg = $av[0];
        if ($arg =~ /^-/) {
            $sarg = substr($arg,1);
            $sarg = substr($sarg,1) while ($sarg =~ /^-/);
            if (($sarg =~ /^h/i)||($sarg eq '?')) {
                give_help();
                pgm_exit(0,"Help exit(0)");
            } elsif ($sarg =~ /^v/) {
                if ($sarg =~ /^v.*(\d+)$/) {
                    $verbosity = $1;
                } else {
                    while ($sarg =~ /^v/) {
                        $verbosity++;
                        $sarg = substr($sarg,1);
                    }
                }
                $verb = VERB2();
                prt("Verbosity = $verbosity\n") if ($verb);
            } elsif ($sarg =~ /^l/) {
                if ($sarg =~ /^ll/) {
                    $load_log = 2;
                } else {
                    $load_log = 1;
                }
                prt("Set to load log at end. ($load_log)\n") if ($verb);
            } elsif ($sarg =~ /^o/) {
                need_arg(@av);
                shift @av;
                $sarg = $av[0];
                $out_file = $sarg;
                prt("Set out file to [$out_file].\n") if ($verb);
            } else {
                pgm_exit(1,"ERROR: Invalid argument [$arg]! Try -?\n");
            }
        } else {
            $in_dir = $arg;
            prt("Set input to [$in_dir]\n") if ($verb);
        }
        shift @av;
    }

    if ($debug_on) {
        prtw("WARNING: DEBUG is ON!\n");
        if (length($in_dir) ==  0) {
            $in_dir = $def_file;
            prt("Set DEFAULT input to [$in_dir]\n");
        }
    }
    if (length($in_dir) ==  0) {
        pgm_exit(1,"ERROR: No input files found in command!\n");
    }
    if (! -d $in_dir) {
        pgm_exit(1,"ERROR: Unable to find in directory [$in_dir]! Check name, location...\n");
    }
}

sub give_help {
    prt("$pgmname: version $VERS\n");
    prt("Usage: $pgmname [options] in-file\n");
    prt("Options:\n");
    prt(" --help  (-h or -?) = This help, and exit 0.\n");
    prt(" --verb[n]     (-v) = Bump [or set] verbosity. def=$verbosity\n");
    prt(" --load        (-l) = Load LOG at end. ($outfile)\n");
    prt(" --out <file>  (-o) = Write output to this file.\n");
}

# eof - template.pl

index -|- top

checked by tidy  Valid HTML 4.01 Transitional