autoword02.pl to HTML.

index -|- end

Generated: Tue Feb 2 17:54:22 2010 from autoword02.pl 2007/09/22 7.5 KB.

#!/Perl
# Series: Automation of Microsoft Word
# original from : http://www.stouk.com/documents/perl/gui/guiref/page04.htm
# Purpose: Load an existing document, and write it out to a TEXT ONLY file.
# I had some initial trouble getting this to function, hence the addition of
# som eLOG file code, so I could review what was happening ... and there
# is a lot of DEAD CODE left commented out, to remind me of the tries ...
# author: geoff mclane - mailto: geoffair _at_ hotmail _dot_ com - 2006-06-12
# 22/09/2007 - added command line argument parse -in infile -out outfile
# use strict; 
use Win32::OLE; 
use Win32::OLE::Const 'Microsoft Word'; 
my ($LOG);
my $write_log = 0;
my $pgmname = $0;
if ($pgmname =~ /\w{1}:\\.*/) {
   my @tmpsp = split(/\\/,$pgmname);
   $pgmname = $tmpsp[-1];
}
my $outfile = "temp.".$pgmname.".txt";
if ( open( $LOG, ">$outfile" ) ) {
    $write_log = 1;
} else {
    $write_log = 0;
    prt( "WARNING: Unable to open $outfile LOG ...\n" );
}
##################################################################################
# *** ALTER THIS TO POINT TO YOUR OWN TEST DOCUMENT, AND TEST TEXT FILE RESULT ***
#my $infile = 'c:\tmp\test.doc';
#my $file_out = 'c:\tmp\test.txt';
my $infile = 'c:\tmp\test3.doc';
my $file_out = 'c:\tmp\test3.txt';
my $do_clean = 1;
# OR ACCEPT COMMAND LINE INPUT
parse_args(@ARGV);
##################################################################################
my $dbg1 = 0;
### can be all in one line
### my $Word = Win32::OLE->GetActiveObject('Word.Application') || Win32::OLE->new('Word.Application', 'Quit'); 
### or tested, like ...
my $Word = Win32::OLE->GetActiveObject('Word.Application');
if ($Word) {
   prt( "Using running application found ... \n" );
} else {
   prt( "No running application found ... starting new ...\n" );
   $Word = Win32::OLE->new('Word.Application', 'Quit');
   if ($Word) {
      prt("Loaded Word application ...\n");
   } else {
      mydie("YEEK! Unable to load word ... aborting \n" );
   }
}
###prt( "Setting word as visible ...\n" );
###$Word->{'Visible'} = 1; # if you want to see what's going on, but it is all quite quick ;=))
prt( "Loading Word application with $infile...\n" );
#my $doc = $Word->Documents->Open({ 
# FileName => $infile, 
# ConfirmConversions => 0, 
# ReadOnly => 1, 
# AddToRecentFiles => 0, 
# Format => wdOpenFormatAuto}); 
# OR SIMPLY ...
my $doc = $Word->Documents->Open($infile) || mydie("Unable to open document " . Win32::OLE->LastError()); 
# appears these are not needed ...
#print "Setting the file name to $file_out ...\n";
#$Word->Documents->FileName($file_out); 
#$Word->Documents->FileFormat(1); 
$Word->Documents->AddToRecentFiles(0);
if ( -f $file_out ) {
   print "Deleting previous output $file_out ...\n";
   unlink $file_out; # remove any previous
}
# not sure this is needed, or does anything ...
###$Word->DisplayAlerts = wdAlertsNone; - no, should be
$Word->{'DisplayAlerts'} = wdAlertsNone;
# lots of tries that FAILED
###$Word->Documents->SaveAs( { 'FileName->'."$file_out",}); 
###my $cmd = "FileName -> $file_out";
###$Word->Documents->SaveAs( { $cmd } ); 
###$Word->Documents->SaveAs( "$file_out" ); 
# $Word->Documents->SaveAs( FileName => "$file_out" ); 
# $Word->Documents->SaveAs({
# $Word->SaveAs({
# $Word->ActiveDocument->SaveAs({
# and then it WORKED with ...
my $aDoc = $Word->ActiveDocument;
if ($aDoc) {
   prt( "Doing SaveAs on ActiveDocument ...to $file_out ...\n" );
   $aDoc->SaveAs({
      FileName =>  "$file_out",
      FileFormat =>  wdFormatDOSTextLineBreaks });
   if ( -f $file_out) {
      prt("Appears new file created ...\n");
      $Word->Documents->Close; 
      $Word->Documents->Quit; 
      $Word->Quit; # close it all ...
      if ($do_clean) {
         if (open FIL, "<$file_out") {
            my @lines = <FIL>;
            my @nlines = ();
            close FIL;
            prt( "Got ".scalar @lines." to process ...\n" );
            my $had = 0;
            foreach my $line (@lines) {
               my $ln = trim_all($line);
               prt( "$ln\n" ) if ($dbg1);
               if (($ln eq '>')||(length($ln) == 0)) {
                  if ($had) {
                     prt( "Had HAD ... skipped ...\n" ) if ($dbg1);
                     next;
                  }
                  prt( "Set HAD ... skip next ...\n" ) if ($dbg1);
                  $had = 1;
               } else {
                  prt( "Clear HAD ... no skip ...\n" ) if ($dbg1);
                  $had = 0;
               }
               push(@nlines, $line);
            }
            write_a_file( $file_out, @nlines );
         } else {
            prt("Warning: unable to open file $file_out ...\n");
         }
      }
   } else {
      prt("Warning: text file not created ...\n");
      $Word->Documents->Close; 
      $Word->Documents->Quit; 
      $Word->Quit; # close it all ...
   }
} else {
   prt("ERROR: Failed to get ActiveDocument ...\n");
   $Word->Documents->Close; 
   $Word->Documents->Quit; 
   $Word->Quit; # close it all ...
}
##$Word->Documents->Close; 
##$Word->Documents->Quit; 
##$Word->Quit; # close it all ...
close $LOG;
### system($outfile);
# ENUMERATION OF OUTPUT FORMATS
# wdFormatDocument - Microsoft Word format. 
# wdFormatDOSText - Microsoft DOS text format. 
# wdFormatDOSTextLineBreaks - Microsoft DOS text with line breaks preserved. 
# wdFormatEncodedText - Encoded text format. 
# wdFormatFilteredHTML - Filtered HTML format. 
# wdFormatHTML - Standard HTML format. 
# wdFormatRTF - Rich text format (RTF). 
# wdFormatTemplate - Microsoft Word template format. 
# wdFormatText - Microsoft Windows text format. 
# wdFormatTextLineBreaks - Microsoft Windows text format with line breaks preserved. 
# wdFormatUnicodeText - Unicode text format. 
# wdFormatWebArchive - Web archive format. 
# wdFormatXML - Extensible Markup Language (XML) format. 
################################
### output and log file
sub wlog {
   my $ml = shift;
   print $LOG $ml;
}
sub prt {
   my $m = shift;
   if ($write_log) {
      wlog($m);
   }
   print $m;
}
sub mydie {
   my $msg = shift;
   if ($write_log) {
      wlog($msg);
   }
   die $msg;
}
##############################
# argument parsing
# Ensure argument exists, or die.
sub require_arg {
    my ($arg, @arglist) = @_;
    mydie( "ERROR: no argument given for option '$arg' ...\n" ) if ! @arglist;
}
sub give_help {
   prt( "$pgmname [-in filepath] [-out filename] [-clean]\n" );
   prt( "Default in = $infile, out = $file_out ...\n" );
   mydie( "And of course you MUST have MS Word installed ...\n" );
}
sub parse_args {
   my (@av) = @_;
   while (@av) {
      my $arg = $av[0];
      if (($arg eq '?')||
         ($arg eq '-?')||
         ($arg eq '-h')||
         ($arg eq -'H')) {
         give_help();
      } elsif ($arg eq '-clean') {
         $do_clean = 1;
         prt( "Do clean set...\n" );
      } elsif ($arg eq '-in') {
         require_arg(@av);
         shift @av;
         $arg = $av[0];
         $infile = $arg;
         prt( "In file set to [$infile]\n" );
      } elsif ($arg eq '-out') {
         require_arg(@av);
         shift @av;
         $arg = $av[0];
         $file_out = $arg;
         prt( "Out file set to [$file_out]\n" );
      } else {
         prt( "ERROR: Unknown command [$arg] ...\n" );
         give_help();
      }
      shift @av;
   }
}
#####################
### file action
sub write_a_file {
   my ($fil, @txt) = @_;
   open WOF, ">$fil" or mydie("ERROR: Unable to open $fil! $!\n");
   print WOF @txt;
   close WOF;
}
sub trim_all {
   my ($ln) = shift;
   chomp $ln;         # remove CR (\n)
   $ln =~ s/\r$//;      # remove LF (\r)
   $ln =~ s/\t/ /g;   # TAB(s) to a SPACE
   while ($ln =~ /\s\s/) {
      $ln =~ s/\s\s/ /g;   # all double space to SINGLE
   }
   while ($ln =~ /^\s/) {
      $ln = substr($ln,1); # remove all LEADING space
   }
   while ($ln =~ /\s$/) {
      $ln = substr($ln,0, length($ln) - 1); # remove all TRAILING space
   }
   return $ln;
}
# eof - autoword02.pl

index -|- top

checked by tidy  Valid HTML 4.01 Transitional