Generated: Tue Feb 2 17:54:43 2010 from lineendings.pl 2006/09/27 4.6 KB.
#!/Perl # test8.pl # download and write a file ... # carefully checking the LINE ENDINGS contained in an online (www) file use strict; use warnings; use LWP::Simple; require "logfile.pl" or die "Missing logfile.pl ...\n"; # my simple log file and some other utility subs # log file stuff my ($LF); my $outfile = 'temp'.$0.'.txt'; # line ending types my $UnixType = 1; my $DosType = 2; my $MacType = 3; my $MixedType = 4; my $site = 'http://www.aprompt.ca/Tidy/'; my $URL = $site . '1-1-1-f1.html'; my $out_file = 'temp1.htm'; my $out_file2 = 'temp2.htm'; my $out_file3 = 'temp3.htm'; my $out_file4 = 'temp4.htm'; my $out_file5 = 'temp5.htm'; my $out_file6 = 'temp6.htm'; my $out_file7 = 'temp7.htm'; open_log($outfile) or mydie( "ERROR: Can not create LOG file ...\n" ); prt( "Moment ... downloading [$URL] ...\n" ); my $text = get($URL); prt( "Writing [$out_file] ... raw from site ...\n" ); write2file( $text, $out_file ); show_line_ending(check_line_ending($text),length($text)); my @arr = split("\r", $text); my $tx2 = join("\n", @arr); # note - write to file does \n translation to \r\n!!! prt( "Writing [$out_file2] ... after 'conversion' 0x0D to 0x0A (\\r to \\n) ,,,\n" ); write2file( $tx2, $out_file2 ); show_line_ending(check_line_ending($tx2),length($tx2)); my $tx3 = $text; $tx3 =~ s/\r/\n/gm; # if file KNOWN to just have \r, then replaces with \n prt( "Writing [$out_file3] ... after regex replacement \\r to \\n ...\n" ); write2file( $tx3, $out_file3 ); show_line_ending(check_line_ending($tx3),length($tx3)); my $tx4 = force_dos_le($text); prt( "Writing [$out_file4] ... with DOS paired ending ...\n" ); write2file( $tx4, $out_file4 ); # EEK! This causes \r\r\n to be written to file (0x0d, 0x0d, 0x0a) prt( "Writing [$out_file5] in binmode ... with DOS paired endings ...\n" ); writebinfile( $tx4, $out_file5 ); # but it is ok, if binmode set first ;=)) show_line_ending(check_line_ending($tx4),length($tx4)); my $tx6 = force_unix_le($text); prt( "Writing [$out_file6] ... First conversion download to UNIX (\\n only) ...\n" ); write2file( $tx6, $out_file6 ); # \n written as \r\n show_line_ending(check_line_ending($tx6),length($tx6)); my $tx7 = force_unix_le($tx4); prt( "Writing [$out_file7] ... Second conversion DOS endings to UNIX ...\n" ); write2file( $tx7, $out_file7 ); # \n written as \r\n show_line_ending(check_line_ending($tx7),length($tx7)); close_log($outfile,1); exit(0); sub show_line_ending { my ($tp, $ln) = @_; if ($tp == 0) { prt( "Unable to obtain line endings ...[$tp]$ln\n" ); } elsif ($tp == $UnixType) { prt( "Unix line endings ...[$tp]$ln\n" ); } elsif ($tp == $DosType) { prt( "Dos line endings ...[$tp]$ln\n" ); } elsif ($tp == $MacType) { prt( "Mac line endings ...[$tp]$ln\n" ); } elsif ($tp == $MixedType) { prt( "MIXED line endings ...[$tp]$ln\n" ); } else { prt( "Unknown line endings ...[$tp]$ln\n" ); } } sub check_line_ending { my ($tx) = shift; my $len = length($tx); my $le = 0; for (my $i = 0; $i < $len; $i++) { my $ch = substr($tx,$i,1); if ($ch eq "\r") { # could be single '\r', # or begin of '\r\n' pair $i++; if ($i < $len) { $ch = substr($tx,$i,1); if ($ch eq "\n") { if ($le) { if ($le != $DosType) { $le = $MixedType; } } else { $le = $DosType; } } else { # NOT \r\n if ($le) { if ($le != $MacType) { $le = $MixedType; } } else { $le = $MacType; } } } } elsif ($ch eq "\n") { if ($le) { if ($le != $UnixType) { $le = $MixedType; } } else { $le = $UnixType; } } } return $le; } sub force_dos_le { my ($tx) = shift; my $ntx = ''; my $len = length($tx); for (my $i = 0; $i < $len; $i++) { my $ch = substr($tx,$i,1); if ($ch eq "\r") { $i++; # move to next char if ($i < $len) { # if length $ch = substr($tx,$i,1); if ($ch eq "\n") { $ntx .= "\r"; } else { $ntx .= "\r\n"; } } else { $ntx .= "\r"; $ch = "\n"; } } elsif ($ch eq "\n") { $ntx .= "\r"; } $ntx .= $ch; } return $ntx; } sub force_unix_le { my ($tx) = shift; my $ntx = ''; my $len = length($tx); for (my $i = 0; $i < $len; $i++) { my $ch = substr($tx,$i,1); if ($ch eq "\r") { $i++; # move to next char if ($i < $len) { # if length $ch = substr($tx,$i,1); if ($ch ne "\n") { $ntx .= "\n"; # force \n } } else { # last char $ch = "\n"; } } $ntx .= $ch; } return $ntx; } # eof - lineendings.pl