lineendings.pl to HTML.

index -|- end

Generated: Tue Feb 2 17:54:43 2010 from lineendings.pl 2006/09/27 4.6 KB.

#!/Perl
# test8.pl
# download and write a file ...
# carefully checking the LINE ENDINGS contained in an online (www) file
use strict;
use warnings;
use LWP::Simple;
require "logfile.pl" or die "Missing logfile.pl ...\n"; # my simple log file and some other utility subs
# log file stuff
my ($LF);
my $outfile = 'temp'.$0.'.txt';
# line ending types
my $UnixType = 1;
my $DosType = 2;
my $MacType = 3;
my $MixedType = 4;
my $site = 'http://www.aprompt.ca/Tidy/';
my $URL = $site . '1-1-1-f1.html';
my $out_file = 'temp1.htm';
my $out_file2 = 'temp2.htm';
my $out_file3 = 'temp3.htm';
my $out_file4 = 'temp4.htm';
my $out_file5 = 'temp5.htm';
my $out_file6 = 'temp6.htm';
my $out_file7 = 'temp7.htm';
open_log($outfile) or mydie( "ERROR: Can not create LOG file ...\n" );
prt( "Moment ... downloading [$URL] ...\n" );
my $text = get($URL);
prt( "Writing [$out_file] ... raw from site ...\n" );
write2file( $text, $out_file );
show_line_ending(check_line_ending($text),length($text));
my @arr = split("\r", $text);
my $tx2 = join("\n", @arr); # note - write to file does \n translation to \r\n!!!
prt( "Writing [$out_file2] ... after 'conversion' 0x0D to 0x0A (\\r to \\n) ,,,\n" );
write2file( $tx2, $out_file2 );
show_line_ending(check_line_ending($tx2),length($tx2));
my $tx3 = $text;
$tx3 =~ s/\r/\n/gm;   # if file KNOWN to just have \r, then replaces with \n
prt( "Writing [$out_file3] ... after regex replacement \\r to \\n ...\n" );
write2file( $tx3, $out_file3 );
show_line_ending(check_line_ending($tx3),length($tx3));
my $tx4 = force_dos_le($text);
prt( "Writing [$out_file4] ... with DOS paired ending ...\n" );
write2file( $tx4, $out_file4 );   # EEK! This causes \r\r\n to be written to file (0x0d, 0x0d, 0x0a)
prt( "Writing [$out_file5] in binmode ... with DOS paired endings ...\n" );
writebinfile( $tx4, $out_file5 ); # but it is ok, if binmode set first ;=))
show_line_ending(check_line_ending($tx4),length($tx4));
my $tx6 = force_unix_le($text);
prt( "Writing [$out_file6] ... First conversion download to UNIX (\\n only) ...\n" );
write2file( $tx6, $out_file6 ); # \n written as \r\n
show_line_ending(check_line_ending($tx6),length($tx6));
my $tx7 = force_unix_le($tx4);
prt( "Writing [$out_file7] ... Second conversion DOS endings to UNIX ...\n" );
write2file( $tx7, $out_file7 ); # \n written as \r\n
show_line_ending(check_line_ending($tx7),length($tx7));
close_log($outfile,1);
exit(0);
sub show_line_ending {
   my ($tp, $ln) = @_;
   if ($tp == 0) {
      prt( "Unable to obtain line endings ...[$tp]$ln\n" );
   } elsif ($tp == $UnixType) {
      prt( "Unix line endings ...[$tp]$ln\n" );
   } elsif ($tp == $DosType) {
      prt( "Dos  line endings ...[$tp]$ln\n" );
   } elsif ($tp == $MacType) {
      prt( "Mac  line endings ...[$tp]$ln\n" );
   } elsif ($tp == $MixedType) {
      prt( "MIXED line endings ...[$tp]$ln\n" );
   } else {
      prt( "Unknown line endings ...[$tp]$ln\n" );
   }
}
sub check_line_ending {
   my ($tx) = shift;
   my $len = length($tx);
   my $le = 0;
   for (my $i = 0; $i < $len; $i++) {
      my $ch = substr($tx,$i,1);
      if ($ch eq "\r") {
         # could be single '\r',
         # or begin of '\r\n' pair
         $i++;
         if ($i < $len) {
            $ch = substr($tx,$i,1);
            if ($ch eq "\n") {
               if ($le) {
                  if ($le != $DosType) {
                     $le = $MixedType;
                  }
               } else {
                  $le = $DosType;
               }
            } else {
               # NOT \r\n
               if ($le) {
                  if ($le != $MacType) {
                     $le = $MixedType;
                  }
               } else {
                  $le = $MacType;
               }
            }
         }
      } elsif ($ch eq "\n") {
         if ($le) {
            if ($le != $UnixType) {
               $le = $MixedType;
            }
         } else {
            $le = $UnixType;
         }
      }
   }
   return $le;
}
sub force_dos_le {
   my ($tx) = shift;
   my $ntx = '';
   my $len = length($tx);
   for (my $i = 0; $i < $len; $i++) {
      my $ch = substr($tx,$i,1);
      if ($ch eq "\r") {
         $i++; # move to next char
         if ($i < $len) { # if length
            $ch = substr($tx,$i,1);
            if ($ch eq "\n") {
               $ntx .= "\r";
            } else {
               $ntx .= "\r\n";
            }
         } else {
            $ntx .= "\r";
            $ch = "\n";
         }
      } elsif ($ch eq "\n") {
         $ntx .= "\r";
      }
      $ntx .= $ch;
   }
   return $ntx;
}
sub force_unix_le {
   my ($tx) = shift;
   my $ntx = '';
   my $len = length($tx);
   for (my $i = 0; $i < $len; $i++) {
      my $ch = substr($tx,$i,1);
      if ($ch eq "\r") {
         $i++; # move to next char
         if ($i < $len) { # if length
            $ch = substr($tx,$i,1);
            if ($ch ne "\n") {
               $ntx .= "\n"; # force \n
            }
         } else { # last char
            $ch = "\n";
         }
      }
      $ntx .= $ch;
   }
   return $ntx;
}
# eof - lineendings.pl

index -|- top

checked by tidy  Valid HTML 4.01 Transitional