#!/usr/bin/perl -w # NAME: xml2json02.pl # AIM: Convert an XML file to a JSON file # 01/03/2014 geoff mclane http://geoffair.net/mperl # This version is bases on the oneliner from : http://blogs.perl.org/users/jhannah_mutation_grid/2010/09/one-liner-xmlperljson.html use strict; use warnings; use File::Basename; # split path ($name,$dir,$ext) = fileparse($file [, qr/\.[^.]*/] ) use JSON::Any; use XML::Simple; use Cwd; my $os = $^O; my $perl_dir = '/home/geoff/bin'; my $PATH_SEP = '/'; my $temp_dir = '/tmp'; if ($os =~ /win/i) { $perl_dir = 'C:\GTools\perl'; $temp_dir = $perl_dir; $PATH_SEP = "\\"; } unshift(@INC, $perl_dir); require 'lib_utils.pl' or die "Unable to load 'lib_utils.pl' Check paths in \@INC...\n"; # log file stuff our ($LF); my $pgmname = $0; if ($pgmname =~ /(\\|\/)/) { my @tmpsp = split(/(\\|\/)/,$pgmname); $pgmname = $tmpsp[-1]; } my $outfile = $temp_dir.$PATH_SEP."temp.$pgmname.txt"; open_log($outfile); # user variables my $VERS = "0.0.2 2014-01-13"; my $load_log = 0; my $in_file = ''; my $verbosity = 0; my $out_file = ''; my $do_pretty = 0; my $indent = ' '; my $fix_numbers = 0; my $rem_trailing = 0; # TODO: FIX THIS!!! # ### DEBUG ### my $debug_on = 0; my $def_file = 'def_file'; ### program variables my @warnings = (); my $cwd = cwd(); sub VERB1() { return $verbosity >= 1; } sub VERB2() { return $verbosity >= 2; } sub VERB5() { return $verbosity >= 5; } sub VERB9() { return $verbosity >= 9; } sub show_warnings($) { my ($val) = @_; if (@warnings) { prt( "\nGot ".scalar @warnings." WARNINGS...\n" ); foreach my $itm (@warnings) { prt("$itm\n"); } prt("\n"); } else { prt( "\nNo warnings issued.\n\n" ) if (VERB9()); } } sub pgm_exit($$) { my ($val,$msg) = @_; if (length($msg)) { $msg .= "\n" if (!($msg =~ /\n$/)); prt($msg); } show_warnings($val); close_log($outfile,$load_log); exit($val); } sub prtw($) { my ($tx) = shift; $tx =~ s/\n$//; prt("$tx\n"); push(@warnings,$tx); } sub pretty_json_simple($) { my $json = shift; $json =~ s/,/,\n /g; $json =~ s/\[/\[\n /g; $json =~ s/\{/\n {/g; $json =~ s/\n\n/\n/g; $json =~ s/\n \n/\n/g; $json = trim_leading($json); return $json; } sub pretty_json($) { my $json = shift; my $len = length($json); my $njson = ''; my ($i,$ch,$olen,$alen,$ind); my @arrays = (); my @objs = (); my $inq = 0; my $hf = 0; $olen = 0; $ind = ''; for ($i = 0; $i < $len; $i++) { $ch = substr($json,$i,1); if ($inq) { $njson .= $ch; $inq = 0 if ($ch eq '"'); } else { next if ($ch =~ /\s/); if ($ch eq '"') { $inq = 1; $njson .= $ch; next; } if ($ch eq '{') { push(@objs,$ch); $olen = scalar @objs; $olen-- if ($olen); $ind = $indent x $olen; if ($hf) { $njson .= "\n".$ind.$ch."\n"; $ind .= $indent; $njson .= $ind; $ch = ''; } else { $hf = 1; } } elsif ($ch eq '}') { $ind = $indent x $olen; $njson .= "\n".$ind; if (@objs) { pop @objs; $olen = scalar @objs; $olen-- if ($olen); $ind = $indent x $olen; } } elsif ($ch eq ',') { $njson .= $ch."\n".$ind; $ch = ''; } $njson .= $ch; } } $njson =~ s/\n\s*\n/\n/g; $njson .= "\n" if ( !($njson =~ /\n$/g) ); return $njson; } sub is_a_double($) { my $val = shift; return 1 if ($val =~ /^-?\d+\.?\d*$/); # { print "is a real number\n" } return 1 if ($val =~ /^\d+$/); # { print "is a whole number\n" } return 1 if ($val =~ /^-?\d+$/); # { print "is an integer\n" } return 1 if ($val =~ /^[+-]?\d+$/); # { print "is a +/- integer\n" } return 1 if ($val =~ /^-?(?:\d+(?:\.\d*)?|\.\d+)$/); # { print "is a decimal number\n" } return 1 if ($val =~ /^([+-]?)(?=\d|\.\d)\d*(\.\d*)?([Ee]([+-]?\d+))?$/); # { print "a C float\n" } return 0 if ($val =~ /\D/); # { print "has nondigits\n" } return 0; } sub fix_json_decimal($) { my $val = shift; ##$val = trim_all($val); $val = substr($val,1) if ($val =~ /^\+/); # remove any redundant leading '+' my $len = length($val); return $val if ($len == 1); ###prt("fix json decimal $val\n"); my $nval = ''; my ($i,$pp,$cp,$np,$i2,$hd,$hm,$hv); $cp = ''; $hd = 0; $hm = 0; $hv = 0; for ($i = 0; $i < $len; $i++) { $i2 = $i + 1; $pp = $cp; $cp = substr($val,$i,1); $np = ($i2 < $len) ? substr($val,$i2,1) : ''; if ($cp eq '.') { $hd = 1; } elsif ($cp eq '-') { $hm = 1; } elsif ($cp eq '0') { if (!$hd && !$hv) { if ($np =~ /\d/) { $cp = ''; # kill this } } } elsif ($cp =~ /\d/) { $hv = 1; } $nval .= $cp; } if ($rem_trailing && ($nval =~ /\./)) { prt("Got a decimal $nval "); my $oval = $nval; my @arr = split(/\./,$nval); $len = scalar @arr; if ($len == 2) { my $tmp = $arr[1]; prt(" test $tmp "); if ($tmp =~ /^\d+$/) { my $chg = 0; $len = length($tmp); prt(" len $len "); for ($i = ($len - 1); $i > 0; $i--) { $cp = substr($tmp,$i,1); if ($cp eq '0') { ###$tmp = substr($tmp,0,length($tmp)-2); $chg++; prt(" chg $chg [$cp] "); } else { prt(" $i NZ [$cp] $chg "); last; } } if ($chg > 0) { prt(" chg $chg len $len "); $tmp = substr($tmp,0,($len - $chg - 1)); $nval = $arr[0].'.'.$tmp; } else { prt(" chg=$chg "); } } else { prt(" not DIG "); } } else { prt("Split NOT 2 $len"); } if ($nval == $oval) { prt(" NO CHANGE\n"); } else { prt(" change to $nval\n"); } } return $nval; } # take pure number out of quotes, and also lose an leading zeros sub number_fix($) { my $json = shift; my $len = length($json); my ($i,$ch,$inq,$num); my $njson = ''; $inq = 0; for ($i = 0; $i < $len; $i++) { $ch = substr($json,$i,1); if ($inq) { if ($ch eq '"') { if (is_a_double($num)) { $njson .= fix_json_decimal($num); } else { $njson .= "\"$num\""; } $inq = 0; } else { $num .= $ch; } } elsif ($ch eq '"') { $num = ''; $inq = 1; } else { $njson .= $ch; } } return $njson; } sub process_in_file($) { my ($inf) = @_; if (! -f $inf ) { pgm_exit(1,"ERROR: Unable to find file [$inf]\n"); } prt("Loading in file $in_file...\n"); my $xml = XMLin($in_file); prt("Converting to JSON using JSON::Any...\n"); my $json = JSON::Any->new()->objToJson($xml); if ($fix_numbers) { prt("Doing json number fix...\n"); $json = number_fix($json); } if ($do_pretty) { prt("Doing pretty of json...\n"); $json = pretty_json($json); } if (length($out_file)) { write2file($json,$out_file); prt("JSON written to [$out_file]\n"); } else { prt("$json\n\n"); prt("No -o out_file in command, so written to stdout\n"); } } ######################################### ### MAIN ### parse_args(@ARGV); process_in_file($in_file); pgm_exit(0,""); ######################################## sub need_arg { my ($arg,@av) = @_; pgm_exit(1,"ERROR: [$arg] must have a following argument!\n") if (!@av); } sub parse_args { my (@av) = @_; my ($arg,$sarg); my $verb = VERB2(); while (@av) { $arg = $av[0]; if ($arg =~ /^-/) { $sarg = substr($arg,1); $sarg = substr($sarg,1) while ($sarg =~ /^-/); if (($sarg =~ /^h/i)||($sarg eq '?')) { give_help(); pgm_exit(0,"Help exit(0)"); } elsif ($sarg =~ /^v/) { if ($sarg =~ /^v.*(\d+)$/) { $verbosity = $1; } else { while ($sarg =~ /^v/) { $verbosity++; $sarg = substr($sarg,1); } } $verb = VERB2(); prt("Verbosity = $verbosity\n") if ($verb); } elsif ($sarg =~ /^l/) { if ($sarg =~ /^ll/) { $load_log = 2; } else { $load_log = 1; } prt("Set to load log at end. ($load_log)\n") if ($verb); } elsif ($sarg =~ /^o/) { need_arg(@av); shift @av; $sarg = $av[0]; $out_file = $sarg; prt("Set out file to [$out_file].\n") if ($verb); } elsif ($sarg =~ /^p/) { $do_pretty = 1; prt("Set to pretty the json output.\n") if ($verb); } elsif ($sarg =~ /^n/) { $fix_numbers = 1; prt("Set to fix json numbers.\n") if ($verb); } elsif ($sarg =~ /^i/) { need_arg(@av); shift @av; $sarg = $av[0]; if ($sarg =~ /^\d+$/) { $indent = ' ' x $sarg; prt("Set indent to [$indent].\n") if (VERB1()); } else { pgm_exit(1,"Expected integer following $arg, NOT [$sarg]\n"); } } else { pgm_exit(1,"ERROR: Invalid argument [$arg]! Try -?\n"); } } else { $in_file = $arg; prt("Set input to [$in_file]\n") if ($verb); } shift @av; } if ($debug_on) { prtw("WARNING: DEBUG is ON!\n"); if (length($in_file) == 0) { $in_file = $def_file; prt("Set DEFAULT input to [$in_file]\n"); } } if (length($in_file) == 0) { pgm_exit(1,"ERROR: No input files found in command!\n"); } if (! -f $in_file) { pgm_exit(1,"ERROR: Unable to find in file [$in_file]! Check name, location...\n"); } } sub give_help { prt("$pgmname: version $VERS\n"); prt("Usage: $pgmname [options] in-file\n"); prt("Options:\n"); prt(" --help (-h or -?) = This help, and exit 0.\n"); prt(" --verb[n] (-v) = Bump [or set] verbosity. def=$verbosity\n"); prt(" --load (-l) = Load LOG at end. ($outfile)\n"); prt(" --pretty (-p) = Pretty output. That is reline the json, inserting newlines, and indents.\n"); prt(" --indent (-i) = Number of indent spaces. (def=".length($indent).")\n"); prt(" --number (-n) = Fix json numbers. Default of JSON::Any is to quote them all.\n"); prt(" --out (-o) = Write output to this file.\n"); } # eof - xml2json02.pl