tidyentities.pl to HTML.

Generated: Sat Oct 24 16:35:30 2020 from tidyentities.pl 2017/11/15 74.4 KB. text copy
#!/usr/bin/perl -w
# NAME: tidyentities.pl
# AIM: VERY SPECIALIZED - Read a C table, and a JSON set of entities, and compare
# 2017-11-06 - purpose extended
# 08/02/2015 geoff mclane http://geoffair.net/mperl
use strict;
use warnings;
use File::Basename;  # split path ($name,$dir,$ext) = fileparse($file [, qr/\.[^.]*/] )
use Cwd;
use JSON;
use Data::Dumper;
use feature 'unicode_strings';
use utf8;
use Text::Unidecode;
my $os = $^O;
my $perl_dir = '/home/geoff/bin';
my $PATH_SEP = '/';
my $temp_dir = '/tmp';
if ($os =~ /win/i) {
    $perl_dir = 'C:\GTools\perl';
    $temp_dir = $perl_dir;
    $PATH_SEP = "\\";
}
unshift(@INC, $perl_dir);
require 'lib_utils.pl' or die "Unable to load 'lib_utils.pl' Check paths in \@INC...\n";
# log file stuff
our ($LF);
my $pgmname = $0;
if ($pgmname =~ /(\\|\/)/) {
    my @tmpsp = split(/(\\|\/)/,$pgmname);
    $pgmname = $tmpsp[-1];
}
my $outfile = $temp_dir.$PATH_SEP."temp.$pgmname.txt";
open_log($outfile);

# user variables
my $VERS = "0.0.6 2017-11-06";
###my $VERS = "0.0.5 2015-01-09";
my $load_log = 0;
my $in_file = '';
my $in_file1 = 'C:\Users\user\Documents\Tidy\htmlmathml.json';
my $in_file2 = 'C:\Users\user\Documents\Tidy\entities.c';
# my $in_file2 = 'F:\Projects\tidy-html5\src\entities.c';
# 2017-11-06 - add from : https://dev.w3.org/html5/html-author/charref
my $in_file3 = 'C:\Users\user\Documents\Tidy\charref.html';
# from : https://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references
my $in_file4 = 'C:\Users\user\Documents\Tidy\wiki-list-entities.html';
# from : https://www.w3.org/TR/REC-html40/sgml/entities.html
my $in_file4f = 'C:\Users\user\Documents\Tidy\html4-ents.html';
# from : https://www.w3.org/TR/html5/
# from : https://www.w3.org/TR/html5/syntax.html#named-character-references
my $in_file5 = 'C:\Users\user\Documents\Tidy\syntax5-ents.html';
my $in_file5j = 'C:\Users\user\Documents\Tidy\html5-ents.json';
# from : https://www.freeformatter.com/html-entities.html - Just 238 entities
# my $in_file6 = 'C:\Users\user\Documents\Tidy\html-entities.html';

my $verbosity = 0;
my $out_file = $temp_dir.$PATH_SEP."tempents1.c";
my $out_file2 = $temp_dir.$PATH_SEP."tempents12.c";
my $out_file3 = $temp_dir.$PATH_SEP."tempmultents1.html";
my $out_file4 = $temp_dir.$PATH_SEP."temptidyents1.html";
my $out_file5 = $temp_dir.$PATH_SEP."tempdiffents1.html";
my $out_file6 = $temp_dir.$PATH_SEP."tempfreeents1.html";
my $out_file7 = $temp_dir.$PATH_SEP."tempallents1.html";
my $out_file8 = $temp_dir.$PATH_SEP."tempmodents1.html";
my $nocase_sort = 0;
my $pad_struct = 0;

# 
# from: https://www.w3.org/TR/REC-xml/#sec-predefined-ent - version 1.0
# Only 5 defined - amp, lt, gt, apos, quot
# from: https://www.w3.org/MarkUp/html-spec/html-spec_toc.html
# Hypertext Markup Language - 2.0 September 22, 1995 - below
# from : https://www.w3.org/TR/REC-html32#latin1 - below

# HTML5 - from : https://www.w3.org/TR/html5/syntax.html#named-character-references
#   <div id="named-character-references-table">

# ### DEBUG ###
my $debug_on = 0;
my $def_file = 'def_file';

### program variables
my @warnings = ();
my $cwd = cwd();
my %entities = ();  # entities found in current C code [$vers, $code, 0]
my %all_ents = ();  # entities found in current charref.html
my %all_by_val = ();    # listed in DECIMAL order, accounting for REPEATS
my $max_ent = 0;
my $max_ver = 0;
my $max_dec = 0;
my %json_ents = (); # entities found in html5-ents.json
my %free_ents = (); # entities found in $in_file6 = 'C:\Users\user\Documents\Tidy\html-entities.html';
my $curr_date = lu_get_YYYYMMDD_hhmmss(time());

# Difference include -
# &zwnj; 10184 and 10185, BUT these are -
# &bsolhsub; 10184 and &suphsol; 10185
# from : https://www.w3schools.com/charsets/ref_utf_punctuation.asp
# 8204   200C   &zwnj;   ZERO WIDTH NON-JOINER
# BUT json contains &zwnj; with decimal 10184 and 10185 which are different 'glyphs'!!!
# 
my %modified_ents = ();

my %preferred_vals = (
    'phiv' => 981,
    'varphi' => 981,
    'OverBar' => 8254,
    'UnderBar' => 95,
    'epsi' => 949,
    'epsiv' => 1013,
    'varepsilon' => 1013,
    'epsilon' => 949,
    'straightepsilon' => 1013,
    'strns' => 175,
    'angst' => 197,
    'ohm' => 937
    );

my %equal_pairs = (
    'angst' => [197, 8491],
    'ohm' => [937, 8486]
    );

my %exclude_pairs = (
    'race' => [8756, 817]   # 0223D + 00331 = (8765 + 817 = 9582)
    );

sub VERB1() { return $verbosity >= 1; }
sub VERB2() { return $verbosity >= 2; }
sub VERB5() { return $verbosity >= 5; }
sub VERB9() { return $verbosity >= 9; }

sub show_warnings($) {
    my ($val) = @_;
    if (@warnings) {
        prt( "\nGot ".scalar @warnings." WARNINGS...\n" );
        foreach my $itm (@warnings) {
           prt("$itm\n");
        }
        prt("\n");
    } else {
        prt( "\nNo warnings issued.\n\n" ) if (VERB9());
    }
}

sub pgm_exit($$) {
    my ($val,$msg) = @_;
    if (length($msg)) {
        $msg .= "\n" if (!($msg =~ /\n$/));
        prt($msg);
    }
    show_warnings($val);
    close_log($outfile,$load_log);
    exit($val);
}


sub prtw($) {
   my ($tx) = shift;
   $tx =~ s/\n$//;
   prt("$tx\n");
   push(@warnings,$tx);
}

sub mycmp_nc_sort {
   return -1 if (lc($a) lt lc($b));
   return 1 if (lc($a) gt lc($b));
   return 0;
}

sub mycmp_dec {
   return -1 if ($a < $b);
   return 1  if ($a > $b);
   return 0;
}


sub get_head($) {
    my $title = shift;
    my $txt = <<EOF;
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>$title</title>
<style>
table {  margin-left: auto; margin-right: auto; }
table, td, th { border: 1px solid gray; }
.ctr { text-align: center; }
.rite { text-align: right; }
</style>
</head>
EOF
    return $txt;
}

sub write_table($$$) {
    my ($tit,$htm,$out) = @_;   # like "entities",$htm,$out
    my $line = get_head($tit);
    $line .= "<body>\n";
    $line .= "<a id='top'></a>";
    $line .= "<h1>$tit</h1>\n";
    $line .= "<table>\n";
    $line .= $htm;
    $line .= "</table>\n";
    $line .= "<p>Table columns headed with 'G' is a 'glyph' generated from the alphabetic entity, ";
    $line .= "like &amp;AElig;. ";
    $line .= "Marked with an 'N' is a 'glyph' generated from a numeric entity, ";
    $line .= "like &amp;#198;.</p>\n";
    $line .= "<p class='ctr'><a href='#top'>top</a></p>\n";
    $line .= "<p class='rite'>";
    my ($name,$dir) =  fileparse($out);
    $line .= "$name $curr_date";
    $line .= "</p>\n";
    $line .= "</body>\n</html>\n";
    write2file($line,$out);
}


my %test_hash = (
    'nlE' => "\x{2266}\x{338}",
    'harrcir' => "\x{2948}",
    'omid' => "\x{29b6}",
    'cularr' => "\x{21b6}",
    'ycy' => "\x{44b}",
    'ldca' => "\x{2936}"
    );

sub check_hash() {
    my $rh = \%test_hash;
    prt(Dumper($rh));
    my @arr = keys %{$rh};
    my ($key,$val,$num,$len,$i,$ch,$i2);
    foreach $key (@arr) {
        $val = ${$rh}{$key};
        #prt("$key ");
        $num = sprintf("%s", unidecode( $val ));
        #$num = sprintf("%X", unidecode( $val ));
        #prt("\n");
        prt("$key $num\n");
#        $val =~ s/([^[:ascii:]]+)/unidecode($1)/ge;
#        $len = length($val);
#        $num = '';
#        for ($i = 0; $i < $len; $i++) {
#            $i2 = $i + 1;
#            $ch = substr($val,$i,1);
#            prt("$i2: $ch ");
#            last if ($ch eq '{');
#        }
#        for (; $i < $len; $i++) {
#            $ch = substr($val,$i,1);
#            last if ($ch eq '}');
#            $num .= $ch;
#        }
#        #if ($val =~ /\\x\{(.+)\}/) {
#        #    $num = $1;
#        #    prt("$key $num\n");
#        #}
#        #$num = chr($val);
#        prt("$key $num\n");
    }
    pgm_exit(1,"TEMP EXIT\n");
}

sub clear_hash_counts($) {
    my $rh = shift;
    my @arr = keys %{$rh};  # like \%entities;
    my ($ent,$ra);
    foreach $ent (@arr) {
        $ra = ${$rh}{$ent};
        #$tvers = ${$ra}[0];
        #$code = ${$ra}[1];
        ${$ra}[2] = 0;
    }
}

#############################################################################
# from : http://www.w3.org/TR/xml-entity-names/#htmlmathml
# DWN: htmlmathml.json
# {
# "characters": {
#  "AElig": "\u00C6",
#  "AMP": "\u0026",
#  "NotSubset": "\u2282\u20D2", 
# from : http://www.w3.org/Math/DTD/mathml2/xhtml-math11-f-a.dtd
# <!ENTITY NotSubset        "&#x02284;" ><!--alias ISOAMSN vnsub -->
# <!ENTITY NotPrecedesEqual "&#x02AAF;&#x00338;" ><!--alias ISOAMSN npre -->
#   "NotPrecedesEqual": "\u2AAF\u0338",
sub process_in_json($) {
    my ($inf) = @_;
    if (! open INF, "<$inf") {
        pgm_exit(1,"ERROR: Unable to open file [$inf]\n"); 
    }
    my @lines = <INF>;
    close INF;
    my $lncnt = scalar @lines;
    prt("Processing $lncnt lines, from [$inf]...\n");
    my ($line,$num,$lnn,$val);
    $line = join("",@lines);
    my $json = JSON->new->allow_nonref;
    my $rh = $json->decode( $line );
    my $tag = 'characters';
    if (!defined ${$rh}{$tag}) {
        prt("NO '$tag' defined in file $inf!\n");
        return;
    }
    my $rh2 = ${$rh}{$tag};
    # prt(Dumper($rh2));
    $load_log = 1;
    my @arr = sort mycmp_nc_sort keys(%{$rh2});
    my $cnt = scalar @arr;
    prt("Found $cnt entity keys...\n");
    foreach $tag (@arr) {
        $val = ${$rh2}{$tag};
        if ($val =~ /\\x\{([0-9a-f]+)\}/) {
            $num = hex($1);
            prt("$tag = $num\n");
        } else {
            ###$num = chr($val);
        }
    }

}

#############################################################################
# static const entity entities[] =
#{
#    /*
#    ** Markup pre-defined character entities
#    */
#    { "quot",    VERS_ALL|VERS_XML,    34 },
#    { "amp",     VERS_ALL|VERS_XML,    38 },
# ...
#    { NULL,       VERS_UNKNOWN, 0 }
# };
# /* all W3C defined document types */
# #define VERS_ALL           (VERS_HTML20|VERS_HTML32|VERS_FROM40|XH50|HT50)
# /* special flag */
# #define VERS_XML           65536u

sub process_in_file2($) {
    my ($inf) = @_;
    if (! open INF, "<$inf") {
        pgm_exit(1,"ERROR: Unable to open file [$inf]\n"); 
    }
    my @lines = <INF>;
    close INF;
    my $lncnt = scalar @lines;
    prt("Processing $lncnt lines, from [$inf]...\n");
    my ($line,$inc,$lnn,$len,@arr,$ent,$ver,$code);
    $lnn = 0;
    my $intable = 0;
    foreach $line (@lines) {
        $lnn++;
        chomp $line;
        $line = trim_all($line);
        $len = length($line);
        next if ($len == 0);
        if ($intable) {
            if ($line =~ /^\{(.+)\}/) {
                $inc = trim_all($1);
                @arr = split(',',$inc);
                $len = scalar @arr;
                if ($len == 3) {
                    $ent = strip_double_quotes(trim_all($arr[0]));
                    $ver = trim_all($arr[1]);
                    $code = trim_all($arr[2]);
                    if ($ent eq 'NULL') {
                        prt("$lnn: $ent $ver $code - end table\n") if (VERB5());
                        $intable = 0;
                        last;
                    }
                    $entities{$ent} = [$ver, $code, 0];
                    prt("$lnn: '$ent' '$ver' '$code'\n") if (VERB5());
                } else {
                    pgm_exit(1,"$lnn DID NOT SPLIT 3, got $len [$line] *** FIX ME ***\n");
                }
            }
        } else {
            if ($line =~ /const\s+entity\s+entities/) {
                $intable = 1;
                prt("$lnn $line - start table\n") if (VERB5());
            }
        }
    }
    @arr = sort keys %entities;
    $len = scalar @arr;
    prt("Got $len entities from $inf\n");
    my ($ra);
    my $htm = '';
    my $cols = 4;
    my $wrap = 0;
    my $head = "<th>Name</th><th>Code</th><th>G</th><th>N</th>";
    $htm .= "<tr>";
    for ($wrap = 0; $wrap < $cols; $wrap++) {
        $htm .= $head;
    }
    $htm .= "</tr>\n";
    $wrap = 0;
    foreach $ent (@arr) {
        $ra = $entities{$ent};
        $ver = ${$ra}[0];
        $code = ${$ra}[1];
        $htm .= "<tr>" if ($wrap == 0);
        $htm .= "<td>&amp;$ent;</td><td>$code</td><td>&$ent;</td><td>&#$code;</td>";
        $wrap++;
        if ($wrap == $cols) {
            $wrap = 0;
            $htm .= "</tr>\n";
        }
    }
    if ($wrap) {
        while ($wrap < $cols) {
            $wrap++;
            $htm .= "<td>&nbsp;</td><td>&nbsp;</td><td>&nbsp;</td><td>&nbsp;</td>";
        }
        $htm .= "</tr>\n";
    }
    write_table("$len Tidy Entities",$htm,$out_file4);
    prt("Wrote $len entities to $out_file4\n");
}

#############################################################################
# <table>
# <tr 
# 0 title="U+00009 CHARACTER TABULATION" 
# 1 data-block="C0 Controls and Basic Latin" 
# 2 data-category="Cc" 
# 3 data-set="mmlextra">
#  <td class="character"> &#x00009;
#  <td class="named"><code>&amp;Tab;</code>
#  <td class="hex"><code>&amp;#x00009;</code>
#  <td class="dec"><code>&amp;#9;</code>
#  <td class="desc">CHARACTER TABULATION
# <tr title="U+0000A LINE FEED (LF)" data-block="C0 Controls and Basic Latin" data-category="Cc" data-set="mmlextra">
#  <td class="character"> &#x0000A;
#  <td class="named"><code>&amp;NewLine;</code>
#  <td class="hex"><code>&amp;#x0000A;</code>
#  <td class="dec"><code>&amp;#10;</code>
#  <td class="desc">LINE FEED (LF)
# <tr title="U+00021 EXCLAMATION MARK" data-block="C0 Controls and Basic Latin" data-category="Po" data-set="9573-2003-isonum">
#  <td class="character"> &#x00021;
#  <td class="named"><code>&amp;excl;</code>
#  <td class="hex"><code>&amp;#x00021;</code>
#  <td class="dec"><code>&amp;#33;</code>
#  <td class="desc">EXCLAMATION MARK
# <tr title="U+00022 QUOTATION MARK" data-block="C0 Controls and Basic Latin" data-category="Po" data-set="predefined xhtml1-special 9573-2003-isonum html5-uppercase">
#  <td class="character"> &#x00022;
#  <td class="named"><code>&amp;quot; &amp;QUOT;</code>
#  <td class="hex"><code>&amp;#x00022;</code>
#  <td class="dec"><code>&amp;#34;</code>
#  <td class="desc">QUOTATION MARK
# ...
sub process_in_file3($) {
    my ($inf) = @_;
    if (! open INF, "<$inf") {
        pgm_exit(1,"ERROR: Unable to open file [$inf]\n"); 
    }
    my @lines = <INF>;
    close INF;
    my $lncnt = scalar @lines;
    prt("Processing $lncnt lines, from [$inf]...\n");
    my ($line,$inc,$lnn,$len,@arr,$ent,$ver,$code);
    my ($i,$ch,$cnt,@arr2,$class,$cnt2,$attr);
    my ($type,$value,$hexchar,$tmp,$decimal,$ra,$added);
    $lnn = 0;
    my @ents = ();
    my $intable = 0;
    my $tag = '';
    my $tail = '';
    my $intag = 0;
    my $trcnt = 0;
    my $entcnt = 0;
    my $modents = 0;
    $class = '';
    foreach $line (@lines) {
        $lnn++;
        chomp $line;
        $line = trim_all($line);
        $len = length($line);
        next if ($len == 0);
        if ($intable) {
            if ($line =~ /^<\/table>/) {
                prt("$lnn: End table...\n") if (VERB9());
                $intable = 0;
            } else {
                for ($i = 0; $i < $len; $i++) {
                    $ch = substr($line,$i,1);
                    if ($ch eq '<') {
                        $tag = $ch;
                        $i++;
                        $intag = 1;
                        for (; $i < $len; $i++) {
                            $ch = substr($line,$i,1);
                            if ($ch eq '>') {
                                if ($intag) {
                                    $tag .= $ch;
                                } else {
                                    # $tail .= $ch;
                                }
                                #prt("$tag $tail\n");
                                if (length($tag) || length($tail)) {
                                    if (length($tail)) {
                                        if (length($tag)) {
                                            @arr = space_split($tail);
                                            $cnt = scalar @arr;
                                            if ($tag =~ /^<tr/i) {
                                                # 0 title="U+00009 CHARACTER TABULATION" 
                                                # 1 data-block="C0 Controls and Basic Latin" 
                                                # 2 data-category="Cc" 
                                                # 3 data-set="mmlextra">
                                                foreach $attr (@arr) {
                                                    @arr2 = split("=",$attr);
                                                    $cnt2 = scalar @arr2;
                                                    if ($cnt2 == 2) {
                                                        $type = $arr2[0];
                                                        $value = strip_quotes($arr2[1]);
                                                        if ($type eq 'title') {
                                                            # title="U+00009 CHARACTER TABULATION" 
                                                        } elsif ($type eq 'data-block') {
                                                            # data-block="C0 Controls and Basic Latin"
                                                        } elsif ($type eq 'data-category') {
                                                            # data-category="Cc" 
                                                        } elsif ($type eq 'data-set') {
                                                            # data-set="mmlextra">
                                                        } else {
                                                            pgm_exit(1,"Failed with '$attr'. type '$type', value '$value' - FIX ME\n");
                                                        }

                                                    } else {
                                                        pgm_exit(1,"Failed with '$attr'\n");
                                                    }
                                                }
                                                prt("Tag: TR with $cnt attrs\n") if (VERB9())
                                                    ;
                                                $trcnt++;
                                            } elsif ($cnt == 1) {
                                                @arr2 = split("=",$arr[0]);
                                                $cnt2 = scalar @arr2;
                                                if (($cnt2 == 2) && ($arr2[0] =~ /class/i)) {
                                                    $class = strip_quotes($arr2[1]);
                                                    # prt("Tag: $tag with class '$class'\n");
                                                    if ($class eq 'character') {
                                                        prt("Tag: $tag with class '$class'\n") if (VERB5());
                                                    } elsif ($class eq 'named') {
                                                        prt("Tag: $tag with class '$class'\n") if (VERB5());
                                                    } elsif ($class eq 'hex') {
                                                        prt("Tag: $tag with class '$class'\n") if (VERB5());
                                                    } elsif ($class eq 'dec') {
                                                        prt("Tag: $tag with class '$class'\n") if (VERB5());
                                                    } elsif ($class eq 'desc') {
                                                        prt("Tag: $tag with class '$class'\n") if (VERB5());
                                                    } else {
                                                        pgm_exit(1,"Error: Unknown class '$class' - FIX ME\n");
                                                    }
                                                } else {
                                                    prtw("Warning: Tag: $tag with $cnt attrs, $tail - FIX ME\n");
                                                }
                                            } else {
                                                prt("Tag: $tag with $cnt attrs, $tail\n");
                                            }
                                        } else {
                                            prt("Tail: $tail\n");
                                        }
                                    } else {
                                        if ($tag =~ /<code/) {
                                            # skip this
                                        } elsif ($tag =~ /<\/code>/) {
                                            # skip this
                                        } else {
                                            prt("$tag ONLY\n");
                                        }
                                    }
                                }
                                $tag = '';
                                $tail = '';
                                $intag = 0;
                            } elsif ($ch eq '<') {
                                # start of another tag closes this...
                                $tail = trim_all($tail);
                                if (length($tag) || length($tail)) {
                                    if (length($tail)) {
                                        if (length($tag)) {
                                            @arr = space_split($tail);
                                            $cnt = scalar @arr;
                                            prt("tag: $tag with $cnt attrs CHECK ME\n");
                                        } else {
                                            #prt("tail: $tail, class $class\n");
                                            if ($class eq 'character') {
                                                #prt("tail: $tail, class $class\n");
                                                if ($tail =~ /^\&#x([0-9A-F]+);$/) {
                                                    $hexchar = $1;
                                                    prt("C: $class = 0x$hexchar\n") if (VERB2());
                                                } else {
                                                    pgm_exit(1,"Failed with tail '$tail', class '$class'\n");
                                                }
                                            } elsif ($class eq 'named') {
                                                #prt("tail: $tail, class $class\n");
                                                @arr2 = split(/\s+/,$tail);
                                                @ents = ();
                                                foreach $tmp (@arr2) {
                                                    if ($tmp =~ /^\&amp;(\w+);$/) {
                                                        $ent = $1;
                                                        # $entities{$ent} = [$ver, $code, 0];
                                                        if (defined $entities{$ent}) {
                                                            $ra = $entities{$ent};
                                                            $ver = ${$ra}[0];
                                                            $code = ${$ra}[1];  # maybe check the 'code' value
                                                            $added = ${$ra}[2]; # check if a duplicate
                                                            ${$ra}[2]++;
                                                            prtw("Warning: $ent, $ver, $code already added! CHECK ME!\n") if ($added);
                                                        } else {
                                                            $ver = 'VERS_CHECK';
                                                        }
                                                        prt("E: $class = '$ent', vers '$ver'\n") if (VERB2());
                                                        ###################################
                                                        push(@ents,[$ent,$ver]);
                                                        ###################################
                                                        # $entcnt++;
                                                    } else {
                                                        pgm_exit(1,"Failed with tail '$tail', class '$class'\n");
                                                    }
                                                }
                                            } elsif ($class eq 'hex') {
                                                prt("tail: $tail, class $class\n") if (VERB9());
                                            } elsif ($class eq 'dec') {
                                                # tail: &amp;#120170;, class dec
                                                # prt("tail: $tail, class $class\n");
                                                if ($tail =~ /^\&amp;\#(\d+);$/) {
                                                    $decimal = $1;
                                                    prt("D: $class = '$decimal'\n") if (VERB2());
                                                } else {
                                                    pgm_exit(1,"Failed with tail '$tail', class '$class'\n");
                                                }
                                            } elsif ($class eq 'desc') {
                                                prt("tail: $tail, class $class\n") if (VERB9());
                                            } else {
                                                pgm_exit(1,"Error: Unknown class '$class' - FIX ME\n");
                                            }
                                        }
                                    } else {
                                        if ($tag =~ /<code/) {
                                            # skip this
                                        } elsif ($tag =~ /<\/code>/) {
                                            # skip this
                                        } else {
                                            prt("$tag ONLY\n");
                                        }
                                    }
                                }
                                # start of NEXT tag
                                $tag = $ch;
                                $tail = '';
                                $intag = 1;
                            } elsif ($intag) {
                                if ($ch =~ /\s/) {
                                    $intag = 0;
                                    $tail = '';
                                } else {
                                    $tag .= $ch;
                                }
                            } else {
                                $tail .= $ch;
                            }
                        }
                    }
                }   # for the line
                # at end of line
                if (length($tag) || length($tail)) {
                    if (length($tail)) {
                        if (length($tag)) {
                            @arr = space_split($tail);
                            $cnt = scalar @arr;
                            prt("tag: $tag with $cnt attrs\n");
                        } else {
                            prt("tail: $tail, class $class\n") if (VERB5());
                        }
                    } else {
                        if ($tag =~ /<code/) {
                            # skip this
                        } elsif ($tag =~ /<\/code>/) {
                            # skip this
                        } else {
                            prtw("Warning: $tag ONLY - CHECK ME\n");
                        }
                    }
                }
                $tag = '';
                $tail = '';
                if (defined $ent && defined $ver && defined $decimal && @ents) {
                    #my %all_ents = ();  # entities found in current charref.html
                    #push(@ents,[$ent,$ver]);
                    my ($ra2);
                    foreach $ra (@ents) {
                        $ent = ${$ra}[0];
                        $ver = ${$ra}[1];
                        #######################################
                        if (defined $preferred_vals{$ent}) {
                            $code = $preferred_vals{$ent};
                            if ($code != $decimal) {
                                prt("Modifying $ent code $decimal to $code\n");
                                $modified_ents{$ent} = [$code, $decimal];
                                $decimal = $code;
                                $modents++;
                            }
                        }
                        #######################################
                        if (defined $exclude_pairs{$ent}) {
                            $ra = $exclude_pairs{$ent};
                            $ver = ${$ra}[0];
                            $code = ${$ra}[1];
                            prt("Excluding $ent, a 'surrogate' pair $ver:$code, dec $decimal!\n");
                        } else {
                            #######################################
                            if (defined $all_ents{$ent}) {
                                $ra2 = $all_ents{$ent};
                                my $pver = ${$ra2}[0];
                                my $pcod = ${$ra2}[1];
                                if (($pver ne $ver) || ($pcod != $decimal)) {
                                    pgm_exit(1,"Already have $ent, $pcod, $pver, NOW $decimal, $ver!\n");
                                }
                            } else {
                                $entcnt++;
                                $all_ents{$ent} = [$ver, $decimal, 0];
                            }
                            #######################################
                            ### Prepare list by VALUE
                            ### NOTE: can be repeated values
                            ### NOTE: this MISSSES the numeric equivalent ABOVE, 
                            ###       but not ALL are exact equivalents!
                            $all_by_val{$decimal} = [] if (!defined $all_by_val{$decimal});
                            $ra2 = $all_by_val{$decimal};
                            push(@{$ra2}, [$ent,$ver]);
                            #######################################
                            $len = length($ent);
                            $max_ent = $len if ($len > $max_ent);
                            $len = length($ver);
                            $max_ver = $len if ($len > $max_ver);
                            $len = length($decimal);
                            $max_dec = $len if ($len > $max_dec);
                        }
                    }
                    $ent = undef;
                    $ver = undef;
                    $decimal = undef;
                    @ents = ();
                }

            }
        } else {
            if ($line =~ /^<table>/) {
                prt("$lnn: Start table...\n") if (VERB9());
                $intable = 1;
            }
        }
    }
    #######################################################
    # $all_ents{$ent} = [$ver, $decimal, 0];
    #######################################################
    @arr = keys %all_ents;
    $cnt = scalar @arr;
    prt("Found $trcnt lines... $entcnt entities, $cnt keys...\n");
    @arr = keys %entities;
    foreach $ent (@arr) {
        $ra = $entities{$ent};
        $ver = ${$ra}[0];
        $decimal = ${$ra}[1];
        $added = ${$ra}[2];
        if ($added == 0) {
            prtw("Warning: Seem to have missed $ent, $ver, $decimal!\n");
        }
    }
    ######################################################
    ### write html table sorted by VALUE
    # push(@{$ra2}, [$ent,$ver]);
    @arr = sort mycmp_dec keys(%all_by_val);
    $cnt = scalar @arr; # just count of VALUES stored, each could be more than 1 entity
    prt("Got $cnt decimal value... Write to a html file...\n");
    my $htm = '';
    my $cols = 4;
    my $wrap = 0;
    my $entries = 0;
    my ($ra2);
    $htm .= "<tr>";
    for ($wrap = 0; $wrap < $cols; $wrap++) {
        $htm .= "<th>Name</th>";
        $htm .= "<th>Code</th>";
        $htm .= "<th>G</th>";
        $htm .= "<th>N</th>";
    }
    $htm .= "</tr>\n";
    $wrap = 0;
    foreach $code (@arr) {
        $ra = $all_by_val{$code};
        foreach $ra2 (@{$ra}) {
            $entries++;
            $ent = ${$ra2}[0];
            $ver = ${$ra2}[1];
            $htm .= "<tr>" if ($wrap == 0);
            $htm .= "<td>&amp;$ent;</td>";
            $htm .= "<td>$code</td>";
            $htm .= "<td>&$ent;</td>";
            $htm .= "<td>&#$code;</td>";
            $wrap++;
            if ($wrap == $cols) {
                $wrap = 0;
                $htm .= "</tr>\n";
            }
        }
    }
    if ($wrap) {
        while ($wrap < $cols) {
            $wrap++;
            $htm .= "<td>&nbsp;</td>";
            $htm .= "<td>&nbsp;</td>";
            $htm .= "<td>&nbsp;</td>";
            $htm .= "<td>&nbsp;</td>";
        }
        $htm .= "</tr>\n";
    }
    my ($name,$dir) = fileparse($inf);
    # my $out_file7 = $temp_dir.$PATH_SEP."tempallents1.html";
    write_table("$entries entries, $entcnt entities, $cnt values, from $name",$htm,$out_file7);
    prt("$entries entries, $cnt values, $entcnt entities, written to $out_file7...\n");
    ######################################################
    ### TODO: Write out modified ents
    # my $out_file8 = $temp_dir.$PATH_SEP."tempmodents1.html";
    # $modified_ents{$ent} = [$code, $decimal];
    @arr = sort keys( %modified_ents );
    $cnt = scalar @arr;
    if ($cnt != $modents) {
        pgm_exit(1,"Error: Need to store mods in an array of arrays!\n");
    }
    prt("Write out $cnt ($modents) modified entities...\n");
    $htm = "";
    $wrap = 0;
    $htm .= "<tr>";
    $htm .= "<th>Name</th>";
    $htm .= "<th>G</th>";
    $htm .= "<th>Code1</th>";
    $htm .= "<th>N</th>";
    $htm .= "<th>Code2</th>";
    $htm .= "<th>N</th>";
    $htm .= "</tr>\n";
    foreach $ent (@arr) {
        $ra2 = $modified_ents{$ent};
        $code = ${$ra2}[0];
        $ver  = ${$ra2}[1];
        $htm .= "<tr>";
        $htm .= "<td>&amp;$ent;</td>";
        $htm .= "<td>&$ent;</td>";
        $htm .= "<td>$code</td>";
        $htm .= "<td>&#$code;</td>";
        $htm .= "<td>$ver</td>";
        $htm .= "<td>&#$ver;</td>";
        $htm .= "</tr>\n";
    }
    write_table("$cnt modified entities, 2 values",$htm,$out_file8);
    prt("$cnt entities, written to $out_file8...\n");

    ######################################################
    # $load_log = 1;
    # pgm_exit(1,"TEMP EXIT 2\n");
}

#############################################################################
# list of some 253 entities, up to HTML 4 it seems
#<table class="wikitable sortable"> 
#<tr style="background:#efefef">
#<th>Name</th>
#<th>Character</th>
#<th>Unicode code point (decimal)</th>
#<th>Standard</th>
#<th>DTD<sup id="cite_ref-DTD_1-0" class="reference"><a href="#cite_note-DTD-1">[a]</a></sup></th>
#<th>Old ISO subset<sup id="cite_ref-ISOsubset_2-0" class="reference"><a href="#cite_note-ISOsubset-2">[b]</a></sup></th>
#<th>Description<sup id="cite_ref-Description_3-0" class="reference"><a href="#cite_note-Description-3">[c]</a></sup></th>
#</tr>
#<tr>
#<td>quot</td>
#<td>"</td>
#<td>U+0022 (34)</td>
#<td>HTML 2.0</td>
#<td>HTMLspecial</td>
#<td>ISOnum</td>
#<td><a href="/wiki/Quotation_mark" title="Quotation mark">quotation mark</a> <i>(APL quote)</i></td>
#</tr>
#<tr>
#<td>amp</td>
#<td>&amp;</td>
#<td>U+0026 (38)</td>
#<td>Original html specification(html 1.0) and HTML 2.0</td>
#<td>HTMLspecial and <a rel="nofollow" class="external free" href="http://info.cern.ch/MarkUp/html-spec/html.dtd">http://info.cern.ch/MarkUp/html-spec/html.dtd</a> (originally)</td>
#<td>ISOnum</td>
#<td><a href="/wiki/Ampersand" title="Ampersand">ampersand</a></td>
#</tr>
#<tr>
#<td>apos</td>
#<td>'</td>
#<td>U+0027 (39)</td>
#<td>XHTML 1.0</td>
#<td>HTMLspecial</td>
#<td>ISOnum</td>
#<td><a href="/wiki/Apostrophe" title="Apostrophe">apostrophe</a> <i>(apostrophe-quote)</i>; see <a href="#Entities_representing_special_characters_in_XHTML">below</a></td>
#</tr>

sub process_in_file4($) {
    my ($inf) = @_;
    if (! open INF, "<$inf") {
        pgm_exit(1,"ERROR: Unable to open file [$inf]\n"); 
    }
    my @lines = <INF>;
    close INF;
    my $lncnt = scalar @lines;
    prt("Processing $lncnt lines, from [$inf]...\n");
    my ($line,$ent,$dec,$ver,$ra);
    my $lnn = 0;
    my $intable = 0;
    my $trcnt = 0;
    my $trcnt2 = 0;
    my $intr = 0;
    my $tdcnt = 0;
    my $tvers = 'N/A';
    my $code = "Unk";
    my $verx = '';
    foreach $line (@lines) {
        chomp $line;
        $lnn++;
        if (!$intable) {
            if ($line =~ /<table\s+class=\"wikitable\s+sortable\">/) {
                $intable = 1;
                prt("$lnn: Start table $line\n");
            }
            next;
        }
        if ($line =~ /<\/table>/) {
            $intable = 0;
            prt("$lnn: End table $line\n");
        }
        if ($line =~ /^<tr>/) {
            $trcnt++;
            $intr = 1;
            $tdcnt = 0;
            next;
        }
        if ($line =~ /^<\/tr>/) {
            $intr = 0;
            if ($trcnt2) {
                if (defined $ent && defined $dec && defined $ver) {
                    prt("$lnn: '$ent', dec $dec, ver$verx $ver $tvers $code\n");
                    $ent = undef;
                    $dec = undef;
                    $ver = undef;
                } else {
                    pgm_exit(1,"$lnn: Failed to get threesome...\n");
                }
            }
            $trcnt2++;
            next;
        }
        # 422 '<td style="background:#ddd">&#160;</td>
        if ($line =~ /^<td(>|\s+)/) {
            $tdcnt++;
            if ($tdcnt == 1) {
                if ($line =~ /<td>(\w+)<\/td>/) {
                    $ent = $1;
                    $tvers = 'N/A';
                    $code = 'Unk';
                    # $entities{$ent} = [$ver, $code, 0];
                    if (defined $entities{$ent}) {
                        $ra = $entities{$ent};
                        $tvers = ${$ra}[0];
                        $code = ${$ra}[1];  # maybe check the 'code' value
                    }
                    prt("$lnn: $ent\n") if (VERB9());
                } else {
                    pgm_exit(1,"$lnn: First TD failed '$line'\n");
                }
            } elsif ($tdcnt == 3) {
                if ($line =~ /<td>U.+\((\d+)\)<\/td>/) {
                    $dec = $1;
                } else {
                    pgm_exit(1,"$lnn: Third TD failed '$line'\n");
                }
            } elsif ($tdcnt == 4) {
                $verx = '';
                if ($line =~ /^<td>HTML\s+(.+)<\/td>/) {
                    $ver = $1;
                } elsif ($line =~ /Original\s+html\h+specification\(html 1.0\)/) {
                    $ver = '1.0';
                } elsif ($line =~ /^<td>XHTML\s+(.+)<\/td>/) {
                    $ver = $1;
                    $verx = 'X';
                } else {
                    pgm_exit(1,"$lnn: Forth TD failed '$line'\n");
                }
            }
        }
    }
    prt("$lnn: EOF - tr count $trcnt\n");
    $load_log = 1;
    pgm_exit(1,"TEMP EXIT 2\n");

}

# ========================================================================
# HTML 4
# https://www.w3.org/TR/REC-html40/sgml/entities.html
# <!ENTITY nbsp   CDATA "&#160;" -- no-break space = non-breaking space,
#                                  U+00A0 ISOnum -->
# <!ENTITY lang     CDATA "&#9001;" -- left-pointing angle bracket = bra,
#                                     U+2329 ISOtech -->
#<!-- lang is NOT the same character as U+003C 'less than' 
#     or U+2039 'single left-pointing angle quotation mark' -->
#<!ENTITY rang     CDATA "&#9002;" -- right-pointing angle bracket = ket,
#                                     U+232A ISOtech -->
#<!-- rang is NOT the same character as U+003E 'greater than' 
#     or U+203A 'single right-pointing angle quotation mark' -->
# to
# <!ENTITY euro   CDATA "&#8364;"  -- euro sign, U+20AC NEW -->
# In comparison with Tidy
# Got 3 WARNINGS...
# Warning: 391: ent lang, code 9001 vs 10216, vers VERS_FROM40
# Warning: 395: ent rang, code 9002 vs 10217, vers VERS_FROM40
# Warning: Seem to have missed apos, VERS_FROM40|VERS_XML, 39!
# https://www.w3.org/TR/html5/syntax.html#named-character-references
# lang = U+027E8 = 10216
# rang = U+027E9 = 10217
# and
# apos = U+00027 = 39
# ========================================================================
sub process_in_file4f($) {
    my ($inf) = @_;
    if (! open INF, "<$inf") {
        pgm_exit(1,"ERROR: Unable to open file [$inf]\n"); 
    }
    my @lines = <INF>;
    close INF;
    my $lncnt = scalar @lines;
    prt("Processing $lncnt lines, from [$inf]...\n");
    my ($line,$ent,$val,$code,$tvers,$tcode,$ra);
    my $lnn = 0;
    my $entcnt = 0;
    foreach $line (@lines) {
        chomp $line;
        $lnn++;
        if ($line =~ /<!ENTITY\s+/) {
            $entcnt++;
            if ($line =~ /<!ENTITY\s+(\w+)\s+CDATA\s+\"(.+)\"\s+/) {
                $ent = $1;
                $val = $2;
                if ($val =~ /^&\#(\d+);/) {
                    $code = $1;
                } else {
                    pgm_exit(1,"$lnn: Failed value '$val', line '$line'\n");
                }
                if (defined $entities{$ent}) {
                    $ra = $entities{$ent};
                    $tvers = ${$ra}[0];
                    $tcode = ${$ra}[1];  # maybe check the 'code' value
                    ${$ra}[2]++;
                } else {
                    pgm_exit(1,"$lnn: Failed ent '$ent', '$line'\n");
                }
                if ($code == $tcode) {
                    prt("$lnn: ent $ent, code $code, vers $tvers\n");
                } else {
                    prtw("Warning: $lnn: ent $ent, code $code vs $tcode, vers $tvers\n");
                }
            } else {
                pgm_exit(1,"$lnn: Failed decode '$line'\n");
            }
        }
    }
    prt("Found $entcnt ENTITY...\n");
    my @arr = keys %entities;
    my ($added);
    foreach $ent (@arr) {
        $ra = $entities{$ent};
        $tvers = ${$ra}[0];
        $code = ${$ra}[1];
        $added = ${$ra}[2];
        if ($added == 0) {
            prtw("Warning: Seem to have missed $ent, $tvers, $code!\n");
        }
    }

    pgm_exit(1,"TEMP EXIT 3\n");

}


# 2031 entities written to C:\GTools\perl\tempents.c...
# 2031 entities written to C:\GTools\perl\tempents.c.html...
# 2125 entities written to C:\GTools\perl\tempents2.c...
# 2125 entities written to C:\GTools\perl\tempents2.c.html...
# $all_ents{$ent} = [$ver, $decimal, 0];
sub do_output($$$) {
    my ($rh,$out,$inf) = @_; # like \%all_ents
    my $struc = 'static const entity entities[] =';
    my ($key,$ra,$ver,$dec,$line,$ent,@arr);
    my $td = '';
    my $htm = '';
    my $cols = 4;
    my $wrap = 0;
    my %decimals = ();
    if ($nocase_sort) {
        @arr = sort mycmp_nc_sort keys(%{$rh});
    } else {
        @arr = sort keys(%{$rh});
    }
    my @lines = ();
    push(@lines,$struc);
    push(@lines,"{");
    my $entcnt = 0;
    # $all_ents{$ent} = [$ver, $decimal, 0];
    $max_ent += 3;
    $max_ver += 1;
    # add thead
    my $head = "<th>Name</th><th>Code</th><th>G</th><th>N</th>";
    $htm .= "<tr>";
    for ($wrap = 0; $wrap < $cols; $wrap++) {
        $htm .= $head;
    }
    $htm .= "</tr>\n";
    $wrap = 0;

    foreach $key (@arr) {
        $ra = ${$rh}{$key};
        $ver = ${$ra}[0];
        $dec = ${$ra}[1];
        if (defined $decimals{$dec}) {
            $decimals{$dec}++;
            # is case sensitive, so is NORMAL to have repeated values
            # prtw("Warning: REPEATED decimal $ent $ver $dec\n");
        } else {
            $decimals{$dec} = 1;
        }
        # got ent=$key,$dec,$ver
        $td = "<td>&amp;$key;</td><td>$dec</td><td>&$key;</td><td>&#$dec;</td>";
        $htm .= "<tr>" if ($wrap == 0);
        $htm .= $td;
        $wrap++;
        if ($wrap == $cols) {
            $htm .= "</tr>\n";
            $wrap = 0;
        }
        $ent = "\"$key\",";
        $ver .= ',';
        if ($pad_struct) {
            $ent .= ' ' while (length($ent) < $max_ent);
            $ver .= ' ' while (length($ver) < $max_ver);
            $dec = ' '.$dec while (length($dec) < $max_dec);
            $line = "    { $ent$ver$dec },";
        } else {
            $line = "    { $ent $ver $dec },";
        }
        push(@lines,$line);
        $entcnt++;
    }
    #     { NULL,       VERS_UNKNOWN, 0 }
    $ent = 'NULL,';
    $ver = 'VERS_UNKNOWN';
    $ver .= ',';
    $dec = 0;
    if ($pad_struct) {
        $ent .= ' ' while (length($ent) < $max_ent);
        $ver .= ' ' while (length($ver) < $max_ver);
        $dec = ' '.$dec while (length($dec) < $max_dec);
        $line = "    { $ent$ver$dec }";
    } else {
        $line = "    { $ent $ver $dec },";
    }
    push(@lines,$line);
    $line = "};";
    push(@lines,$line);
    $line = join("\n",@lines)."\n";
    write2file($line,$out);
    prt("$entcnt entities written to $out...\n");
    # my ($n,$d,$e) = fileparse($out, qr/\.[^.]*/);
    if ($wrap) {
        #$td = "<td>&amp;$key;</td><td>$dec</td><td>&$key;</td><td>&#$dec;</td>";
        $td = "<td>&nbsp;</td><td>&ndsp;</td><td>&nbsp;</td><td>&nbsp;</td>";
        while ($wrap < $cols) {
            $htm .= $td;
            $wrap++;
        }
        $htm .= "</tr>\n";
    }
    my ($name,$dir) = fileparse($inf);
    $out .= ".html";
    write_table("$entcnt entities from $name",$htm,$out);
    prt("$entcnt entities written to $out...\n");
}


#          '&vee;' => {
#                       'codepoints' => [
#                                         8744
#                                       ],
#                       'characters' => "\x{2228}"
#                     },
# A problem with 'zwnj'?
# "&zwnj;": { "codepoints": [8204], "characters": "\u200C" }
# 
sub process_in_json2($) {
    my ($inf) = @_;
    if (! open INF, "<$inf") {
        pgm_exit(1,"ERROR: Unable to open file [$inf]\n"); 
    }
    my @lines = <INF>;
    close INF;
    my $lncnt = scalar @lines;
    prt("Processing $lncnt lines, from [$inf]...\n");
    my ($line,$num,$lnn,$val,$rh2,$ent,$key);
    my ($ra,$cnt,$tvers,$tcode,$ra2,$i,$msg,$ver);
    $line = join("",@lines);
    my $json = JSON->new->allow_nonref;
    my $rh = $json->decode( $line );
    #prt(Dumper($rh));
    #$load_log = 1;
    my @arr = sort keys %{$rh};
    $num = scalar @arr;
    prt("Got $num keys, from json input...\n");
    clear_hash_counts(\%entities);
    my %plus1 = ();
    my $keycnt = 0;
    my $rpts = 0;
    foreach $key (@arr) {
        $rh2 = ${$rh}{$key};
        $ent = $key;
        $ent =~ s/^&//;
        $ent =~ s/;$//;
        #if (defined ${$rh2}{codepoints} && defined ${$rh2}{characters}) {
        if (defined ${$rh2}{codepoints}) {
            $ra2 = ${$rh2}{codepoints};
            $cnt = scalar @{$ra2};
            # first codepoint
            $val = ${$ra2}[0];
            if (defined $entities{$ent}) {
                $ra = $entities{$ent};
                $tvers = ${$ra}[0];
                $tcode = ${$ra}[1];
                ${$ra}[2]++;
            } else {
                $tvers = 'VERS_CHECK';
                $tcode = $val;
            }
            if ($cnt == 2) {
                # MULTIPLE codepoints
                #               0      1      2 3    4
                $plus1{$ent} = [$tvers,$tcode,0,$cnt,$ra2];
            } elsif ($cnt == 1) {
                if (defined $json_ents{$ent}) {
                    $ra = $json_ents{$ent};
                    $ver = ${$ra}[0];
                    $val = ${$ra}[1];
                    if (($ver ne $tvers) || ($val != $tcode)) {
                        pgm_exit(1,"json_ents $ent ALREADY defined as [$ver, $val], now [$tvers, $tcode]\n");
                    }
                    $rpts++;
                } else {
                    $keycnt++;
                    $json_ents{$ent} = [$tvers, $tcode, 0];
                }
            } else {
                prt(Dumper($ra2));
                pgm_exit(1,"Reference array $cnt - not 1 or 2!\n");
            }
        } else {
            prt(Dumper($rh2));
            pgm_exit(1,"Reference hash does not contain 'codepoints'!\n");
        }
    }
    ##########################################################
    ### deal with those with two...
    my $htm = '';
    my $tmp = '';
    @arr = sort keys %plus1;
    $cnt = scalar @arr;
    $tmp = $keycnt + $cnt + $rpts;
    prt("Added $keycnt entities, with code, to json_ents hash... hv $rpts rpts, $cnt with 2, tot $tmp\n");
    my $head = "<th>#</th><th>Name</th><th>G</th><th>Code</th><th>N</th><th>Code</th><th>N</th><th>NN</th>";
    $tmp = '';
    if ($cnt) {
        $verbosity = 1;
        prt("Note: $cnt entities, have more than one codepoint!\n");
        ###prt(join(", ", @arr)."\n");
        $lnn = 0;
        $htm .= "<tr>";
        $htm .= $head;
        $htm .= "</tr>\n";
        foreach $ent (@arr) {
            $lnn++;
            $ra = $plus1{$ent};
            $tvers = ${$ra}[0];
            $tcode = ${$ra}[1];
            $cnt = $plus1{$ent}[3];
            $ra2 = $plus1{$ent}[4];
            $msg = "$lnn: $ent, $tvers, $tcode - ";
            $htm .= "<tr><td>$lnn</td><td>&amp;$ent;</td><td>&$ent;</td>";
            $tmp = '';
            for ($i = 0; $i < $cnt; $i++) {
                $val = ${$ra2}[$i];
                $msg .= "$val ";
                $htm .= "<td>$val</td><td>&#$val;</td>";
                $tmp .= "&#$val;";
            }
            $htm .= "<td>$tmp</td>";
            prt("$msg\n") if (VERB5());
            $htm .= "</tr>\n";
        }
        $cnt = scalar @arr;
        prt("Display of $cnt entities, with more than one codepoint!\n");
        write_table("$cnt Multi-codepoints entities",$htm,$out_file3);
        prt("HTML table written to $out_file3...\n");
    }
}

sub compare_ent_hashes($$) {
    my ($rh1,$rh2) = @_;
    my @arr1 = sort keys(%{$rh1});
    my @arr2 = sort keys(%{$rh2});
    my $cnt1 = scalar @arr1;
    my $cnt2 = scalar @arr2;
    if (!$cnt1) {
        prt("Warning: First hash has NO keys...\n");
        return;
    }
    if (!$cnt2) {
        prt("Warning: Second hash has NO keys...\n");
        return;
    }
    prt("Comparing 2 hashes: $cnt1 and $cnt2 keys respectively..\n");
    my ($key,$ra1,$vers1,$code1,$ra2,$vers2,$code2,$ent);
    my $samecnt = 0;
    my $diffcnt = 0;
    my $misscnt1 = 0;
    my $misscnt2 = 0;
    my $htm = "<tr>";
    $htm .= "<th>#</th><th>Name</th><th>G</th>";
    $htm .= "<th>Code</th><th>N</th>";
    $htm .= "<th>Code</th><th>N</th>";
    $htm .= "</tr>\n";
    foreach $key (@arr1) {
        $ent = $key;
        $ra1 = ${$rh1}{$key};
        $vers1 = ${$ra1}[0];
        $code1 = ${$ra1}[1];
        if (defined ${$rh2}{$key}) {
            $ra2 = ${$rh2}{$key};
            $vers2 = ${$ra2}[0];
            $code2 = ${$ra2}[1];
            if (($vers1 eq $vers2) && ($code1 == $code2)) {
                # the SAME - no interest...
                $samecnt++;
            } else {
                prt("Diff: ent $key - rh1 $vers1, $code1 vs rh2 $vers2, $code2\n") if (VERB2());
                $diffcnt++;
                $htm .= "<tr>";
                $htm .= "<td>D: $diffcnt</td>";
                $htm .= "<td>&amp;$ent;</td><td>&$ent;</td>";
                $htm .= "<td>$code1</td><td>&#$code1;</td>";
                $htm .= "<td>$code2</td><td>&#$code2;</td>";
                $htm .= "</tr>\n"
            }
            # mark BOTH as compared
            ${$ra1}[2] = 1;
            ${$ra2}[2] = 1;
        } else {
            prt("Missed: ent $key $vers1, $code1 ONLY in rh1\n") if (VERB2());
            $misscnt1++;
            $htm .= "<tr>";
            $htm .= "<td>M1 $misscnt1</td>";
            $htm .= "<td>&amp;$ent;</td><td>&$ent;</td>";
            $htm .= "<td>$code1</td><td>&#$code1;</td>";
            $htm .= "<td>&nbsp;</td><td>&nbsp;</td>";
            $htm .= "</tr>\n"
        }
    }
    foreach $key (@arr2) {
        $ent = $key;
        $ra2 = ${$rh2}{$key};
        $vers2 = ${$ra2}[0];
        $code2 = ${$ra2}[1];
        next if (${$ra2}[2]);
        prt("Missed: ent $key $vers2, $code2 ONLY in rh2\n") if (VERB2());
        $misscnt2++;
        $htm .= "<tr>";
        $htm .= "<td>M2 $misscnt2</td>";
        $htm .= "<td>&amp;$ent;</td><td>&$ent;</td>";
        $htm .= "<td>&nbsp;</td><td>&nbsp;</td>";
        $htm .= "<td>$code2</td><td>&#$code2;</td>";
        $htm .= "</tr>\n"
    }
    prt("Done 2 hashes: $cnt1 and $cnt2 keys - same $samecnt, diff $diffcnt, miss1 $misscnt1, miss2 $misscnt2...\n");
    write_table("Hash Differences - same $samecnt, diff $diffcnt, miss1 $misscnt1, miss2 $misscnt2",$htm,$out_file5);
    prt("Has differences written to $out_file5\n");
}

sub show_array_ref($) {
    my $ra = shift;
    my ($tmp,$cnt);
    $cnt = scalar @{$ra};
    prt("Show of ra of $cnt values...\n");
    $cnt = 0;
    foreach $tmp (@{$ra}) {
        $cnt++;
        prt("$cnt: '$tmp'\n");
    }
}

# Character               Entity Name          Entity Number     Description
#         1             2                  3                 4  
# <tr><td>&nbsp;</td><td></td>         <td>&amp;#32;</td><td>Space</td>          </tr>
# <tr><td>&#33;</td> <td></td>         <td>&amp;#33;</td><td>Exclamation mark</td></tr>
# <tr><td>&#38;</td> <td>&amp;amp;</td><td>&amp;#38;</td><td>Ampersand</td>       </tr>
# tmp '&#9830;</td>'
# tmp '&amp;diams;</td>'
# tmp '&amp;#9830;</td>'
# tmp 'Diamond</td>'
sub get_cols($$$$$) {
    my ($line,$rnum,$rent,$rdec,$rdesc) = @_;
    $line =~ s/^<tr>//;
    $line =~ s/<\/tr>$//;
    my $len = length($line);
    my @arr = split(/<td>/,$line);
    my $cnt = scalar @arr;
    #prt("cnt = $cnt, line '$line'\n");
    my ($tmp);
    my ($i,$num,$ent,$dec,$desc);
    my @arr2 = ();
    $i = 0;
    foreach $tmp (@arr) {
        $i++;
        # prt("$i: tmp '$tmp'\n");
        next if ($tmp =~ /^\s*$/);
        next if (length($tmp) == 0);
        push(@arr2,$tmp);
        #prt("$i: tmp '$tmp'\n");
    }
    $cnt = scalar @arr2;
    if ($cnt == 4) {
        # extract each column value
        $i = 0;
        # for ($i = 0; $i < $cnt; $i++) {
        foreach $tmp (@arr2) {
            # $tmp = $arr2[$i];
            if ($tmp =~ /<\/td>$/) {
                $tmp =~ s/<\/td>$//;
            }
            if ($i == 0) {
                if ($tmp =~ /^&\#(\d+);$/) {
                    $num = $1;
                } elsif ($tmp eq '&nbsp;') {
                    $num = 160;
                } else {
                    prt("$i: tmp '$tmp'\n");
                    show_array_ref(\@arr2);
                    return 0;
                }
            } elsif ($i == 1) {
                if ($tmp =~ /^&amp;(\w+);$/) {
                    $ent = $1;
                } elsif ($tmp =~ /^\s*$/) {
                    $ent = '';
                } else {
                    prt("$i: tmp '$tmp'\n");
                    return 0;
                }
            } elsif ($i == 2) {
                if ($tmp =~ /^&amp;\#(\d+);$/) {
                    $dec = $1;
                } else {
                    prt("$i: tmp '$tmp'\n");
                    return 0;
                }
            } elsif ($i == 3) {
                $desc = $tmp;
            }
            $i++;
        }
        ${$rnum} = $num;
        ${$rent} = $ent;
        ${$rdec} = $dec;
        ${$rdesc} = $desc;
        return 1;
    }
    return 0;
}

# Just 238 entities
# my $in_file6 = 'C:\Users\user\Documents\Tidy\html-entities.html';
# 'C:\Users\user\Documents\Tidy\html-entities.html';
# from : https://www.freeformatter.com/html-entities.html
# <table class="bordered-table zebra-striped" style="font-size:11px;">
# HTML entities.</p><table class="bordered-table zebra-striped" style="font-size:11px;">
sub process_in_file6($) {
    my ($inf) = @_;
    if (! open INF, "<$inf") {
        pgm_exit(1,"ERROR: Unable to open file [$inf]\n"); 
    }
    my @lines = <INF>;
    close INF;
    my $lncnt = scalar @lines;
    prt("Processing $lncnt lines, from [$inf]...\n");
    my ($line,$len,$num,$ent,$dec,$desc,$ra,$tvers,$tcode);
    my $lnn = 0;
    my $intable = 0;
    my $intbody = 0;
    my $tblcnt = 0;
    my $entcnt = 0;
    my $htm = '';
    clear_hash_counts(\%entities);
    foreach $line (@lines) {
        chomp $line;
        $line = trim_all($line);
        $lnn++;
        $len = length($line);
        next if ($len == 0);
        if ($line =~ /^<table/) {
            pgm_exit(1,"$lnn: Already IN table! '$line'\n") if ($intable);
            prt("$lnn: Start table '$line'\n") if (VERB9());
            $intable = 1;
            $tblcnt++;
            next;
        } elsif ($line =~ /^<\/table/) {
            pgm_exit(1,"$lnn: NOT IN table! '$line'\n") if (!$intable);
            prt("$lnn: End   table '$line'\n") if (VERB9());
            $intable = 0;
            next;
        }
        if ($intable) {
            if ($line =~ /<tbody>/) {
                pgm_exit(1,"$lnn: Already IN tbody! '$line'\n") if ($intbody);
                prt("$lnn: Start tbody '$line'\n") if (VERB9());
                $intbody = 1;
                next;
            } elsif ($line =~ /<\/tbody>/) {
                pgm_exit(1,"$lnn: NOT IN tbody! '$line'\n") if (!$intbody);
                prt("$lnn: End   tbody '$line'\n") if (VERB9());
                $intbody = 0;
                next;
            }
            if ($intbody) {
                if ($line =~ /^<tr>/) {
                    $ent = '';
                    if (get_cols($line,\$num,\$ent,\$dec,\$desc)) {
                        $len = length($ent);
                        next if ($len == 0);
                        $entcnt++;
                        if (defined $entities{$ent}) {
                            $ra = $entities{$ent};
                            $tvers = ${$ra}[0];
                            $tcode = ${$ra}[1];
                            ${$ra}[2]++;
                            prtw("Warning: free_ents $ent $tvers, tidy $tcode vs $dec\n") if ($tcode != $dec);
                        } else {
                            $tvers = 'VERS_CHECK';
                            $tcode = $dec;
                        }
                        $free_ents{$ent} = [$tvers, $tcode, 0];
                        $htm .= "<tr><td>$entcnt</td><td>&amp;$ent;</td><td>$dec</td><td>&$ent;</td>";
                        $htm .= "<td>&#$dec;</td>";
                        $htm .= "<td>$tvers</td>";
                        $htm .= "<td>$desc</td>";
                        $htm .= "</tr>\n";
                    } else {
                        pgm_exit(1,"$lnn: TR NOT HANDLED! '$line'\n");
                    }
                } else {
                    pgm_exit(1,"$lnn: NOT HANDLED! '$line'\n");
                }
            }
        }
    }

    my @arr = keys %entities;
    my ($added);
    my $missed = 0;
    foreach $ent (@arr) {
        $ra = $entities{$ent};
        $tvers = ${$ra}[0];
        $tcode = ${$ra}[1];
        $added = ${$ra}[2];
        $dec = $tcode;
        if ($added == 0) {
            prtw("Warning: Seem to have missed $ent, $tvers, $tcode!\n") if (VERB9());
            $htm .= "<tr><td colspan='7'>*** NOT FOUND IN FreeFormatter.com TABLES ***</td></tr>\n" if ($missed == 0);
            $missed++;
            $htm .= "<tr><td>$missed</td><td>&amp;$ent;</td><td>$dec</td><td>&$ent;</td>";
            $htm .= "<td>&#$dec;</td>";
            $htm .= "<td>$tvers</td>";
            $htm .= "<td>NOT FOUND in tables</td>";
            $htm .= "</tr>\n";
        }
    }

    write_table("$entcnt FreeFormatter.com, missing $missed",$htm,$out_file6);
    prt("$tblcnt tables, found $entcnt entities... written to $out_file6\n");

    clear_hash_counts(\%entities);
    # pgm_exit(1,"TEMP EXIT 6\n");
}

#########################################
### MAIN ###
###parse_args(@ARGV);
### check_hash();
process_in_file2($in_file2);    # 253 - 'C:\Users\user\Documents\Tidy\entities.c';
#process_in_file6($in_file6);    # 238 'C:\Users\user\Documents\Tidy\html-entities.html';
#process_in_file4f($in_file4f);  # 'C:\Users\user\Documents\Tidy\html4-ents.html';
#process_in_file4($in_file4);    # 253 - 'C:\Users\user\Documents\Tidy\wiki-list-entities.html';
process_in_file3($in_file3);    # 'C:\Users\user\Documents\Tidy\charref.html'; => %all_ents
do_output(\%all_ents,$out_file,$in_file3);
# process_in_json($in_file1);   # 'C:\Users\user\Documents\Tidy\htmlmathml.json';
process_in_json2($in_file5j);   # = 'C:\Users\user\Documents\Tidy\html5-ents.json';
do_output(\%json_ents,$out_file2,$in_file5j);
compare_ent_hashes(\%all_ents,\%json_ents);
pgm_exit(0,"");
########################################

sub need_arg {
    my ($arg,@av) = @_;
    pgm_exit(1,"ERROR: [$arg] must have a following argument!\n") if (!@av);
}

sub parse_args {
    my (@av) = @_;
    my ($arg,$sarg);
    my $verb = VERB2();
    while (@av) {
        $arg = $av[0];
        if ($arg =~ /^-/) {
            $sarg = substr($arg,1);
            $sarg = substr($sarg,1) while ($sarg =~ /^-/);
            if (($sarg =~ /^h/i)||($sarg eq '?')) {
                give_help();
                pgm_exit(0,"Help exit(0)");
            } elsif ($sarg =~ /^v/) {
                if ($sarg =~ /^v.*(\d+)$/) {
                    $verbosity = $1;
                } else {
                    while ($sarg =~ /^v/) {
                        $verbosity++;
                        $sarg = substr($sarg,1);
                    }
                }
                $verb = VERB2();
                prt("Verbosity = $verbosity\n") if ($verb);
            } elsif ($sarg =~ /^l/) {
                if ($sarg =~ /^ll/) {
                    $load_log = 2;
                } else {
                    $load_log = 1;
                }
                prt("Set to load log at end. ($load_log)\n") if ($verb);
            } elsif ($sarg =~ /^o/) {
                need_arg(@av);
                shift @av;
                $sarg = $av[0];
                $out_file = $sarg;
                prt("Set out file to [$out_file].\n") if ($verb);
            } else {
                pgm_exit(1,"ERROR: Invalid argument [$arg]! Try -?\n");
            }
        } else {
            $in_file = $arg;
            prt("Set input to [$in_file]\n") if ($verb);
        }
        shift @av;
    }

    if ($debug_on) {
        prtw("WARNING: DEBUG is ON!\n");
        if (length($in_file) ==  0) {
            $in_file = $def_file;
            prt("Set DEFAULT input to [$in_file]\n");
        }
    }
    if (length($in_file) ==  0) {
        pgm_exit(1,"ERROR: No input files found in command!\n");
    }
    if (! -f $in_file) {
        pgm_exit(1,"ERROR: Unable to find in file [$in_file]! Check name, location...\n");
    }
}

sub give_help {
    prt("$pgmname: version $VERS\n");
    prt("Usage: $pgmname [options] in-file\n");
    prt("Options:\n");
    prt(" --help  (-h or -?) = This help, and exit 0.\n");
    prt(" --verb[n]     (-v) = Bump [or set] verbosity. def=$verbosity\n");
    prt(" --load        (-l) = Load LOG at end. ($outfile)\n");
    prt(" --out <file>  (-o) = Write output to this file.\n");
}

# Hypertext Markup Language - 2.0 September 22, 1995
# https://www.w3.org/MarkUp/html-spec/html-spec_9.html#SEC9.7
# <!--    Modified for use in HTML        
# $Id: ISOlat1.sgml,v 1.2 1994/11/30 23:45:12 connolly Exp $ -->
sub get_html2_ents() {
    my $txt = <<EOF;
<!ENTITY AElig  CDATA "&#198;" -- capital AE diphthong (ligature) -->
<!ENTITY Aacute CDATA "&#193;" -- capital A, acute accent -->
<!ENTITY Acirc  CDATA "&#194;" -- capital A, circumflex accent -->
<!ENTITY Agrave CDATA "&#192;" -- capital A, grave accent -->
<!ENTITY Aring  CDATA "&#197;" -- capital A, ring -->
<!ENTITY Atilde CDATA "&#195;" -- capital A, tilde -->
<!ENTITY Auml   CDATA "&#196;" -- capital A, dieresis or umlaut mark -->
<!ENTITY Ccedil CDATA "&#199;" -- capital C, cedilla -->
<!ENTITY ETH    CDATA "&#208;" -- capital Eth, Icelandic -->
<!ENTITY Eacute CDATA "&#201;" -- capital E, acute accent -->
<!ENTITY Ecirc  CDATA "&#202;" -- capital E, circumflex accent -->
<!ENTITY Egrave CDATA "&#200;" -- capital E, grave accent -->
<!ENTITY Euml   CDATA "&#203;" -- capital E, dieresis or umlaut mark -->
<!ENTITY Iacute CDATA "&#205;" -- capital I, acute accent -->
<!ENTITY Icirc  CDATA "&#206;" -- capital I, circumflex accent -->
<!ENTITY Igrave CDATA "&#204;" -- capital I, grave accent -->
<!ENTITY Iuml   CDATA "&#207;" -- capital I, dieresis or umlaut mark -->
<!ENTITY Ntilde CDATA "&#209;" -- capital N, tilde -->
<!ENTITY Oacute CDATA "&#211;" -- capital O, acute accent -->
<!ENTITY Ocirc  CDATA "&#212;" -- capital O, circumflex accent -->
<!ENTITY Ograve CDATA "&#210;" -- capital O, grave accent -->
<!ENTITY Oslash CDATA "&#216;" -- capital O, slash -->
<!ENTITY Otilde CDATA "&#213;" -- capital O, tilde -->
<!ENTITY Ouml   CDATA "&#214;" -- capital O, dieresis or umlaut mark -->
<!ENTITY THORN  CDATA "&#222;" -- capital THORN, Icelandic -->
<!ENTITY Uacute CDATA "&#218;" -- capital U, acute accent -->
<!ENTITY Ucirc  CDATA "&#219;" -- capital U, circumflex accent -->
<!ENTITY Ugrave CDATA "&#217;" -- capital U, grave accent -->
<!ENTITY Uuml   CDATA "&#220;" -- capital U, dieresis or umlaut mark -->
<!ENTITY Yacute CDATA "&#221;" -- capital Y, acute accent -->
<!ENTITY aacute CDATA "&#225;" -- small a, acute accent -->
<!ENTITY acirc  CDATA "&#226;" -- small a, circumflex accent -->
<!ENTITY aelig  CDATA "&#230;" -- small ae diphthong (ligature) -->
<!ENTITY agrave CDATA "&#224;" -- small a, grave accent -->
<!ENTITY aring  CDATA "&#229;" -- small a, ring -->
<!ENTITY atilde CDATA "&#227;" -- small a, tilde -->
<!ENTITY auml   CDATA "&#228;" -- small a, dieresis or umlaut mark -->
<!ENTITY ccedil CDATA "&#231;" -- small c, cedilla -->
<!ENTITY eacute CDATA "&#233;" -- small e, acute accent -->
<!ENTITY ecirc  CDATA "&#234;" -- small e, circumflex accent -->
<!ENTITY egrave CDATA "&#232;" -- small e, grave accent -->
<!ENTITY eth    CDATA "&#240;" -- small eth, Icelandic -->
<!ENTITY euml   CDATA "&#235;" -- small e, dieresis or umlaut mark -->
<!ENTITY iacute CDATA "&#237;" -- small i, acute accent -->
<!ENTITY icirc  CDATA "&#238;" -- small i, circumflex accent -->
<!ENTITY igrave CDATA "&#236;" -- small i, grave accent -->
<!ENTITY iuml   CDATA "&#239;" -- small i, dieresis or umlaut mark -->
<!ENTITY ntilde CDATA "&#241;" -- small n, tilde -->
<!ENTITY oacute CDATA "&#243;" -- small o, acute accent -->
<!ENTITY ocirc  CDATA "&#244;" -- small o, circumflex accent -->
<!ENTITY ograve CDATA "&#242;" -- small o, grave accent -->
<!ENTITY oslash CDATA "&#248;" -- small o, slash -->
<!ENTITY otilde CDATA "&#245;" -- small o, tilde -->
<!ENTITY ouml   CDATA "&#246;" -- small o, dieresis or umlaut mark -->
<!ENTITY szlig  CDATA "&#223;" -- small sharp s, German (sz ligature) -->
<!ENTITY thorn  CDATA "&#254;" -- small thorn, Icelandic -->
<!ENTITY uacute CDATA "&#250;" -- small u, acute accent -->
<!ENTITY ucirc  CDATA "&#251;" -- small u, circumflex accent -->
<!ENTITY ugrave CDATA "&#249;" -- small u, grave accent -->
<!ENTITY uuml   CDATA "&#252;" -- small u, dieresis or umlaut mark -->
<!ENTITY yacute CDATA "&#253;" -- small y, acute accent -->
<!ENTITY yuml   CDATA "&#255;" -- small y, dieresis or umlaut mark -->
EOF
    return $txt;
}

# from : https://www.w3.org/TR/REC-html32#latin1
sub get_html3_ents() {
    my $txt = <<EOF;
    <!ENTITY nbsp   CDATA "&#160;" -- no-break space -->
    <!ENTITY iexcl  CDATA "&#161;" -- inverted exclamation mark -->
    <!ENTITY cent   CDATA "&#162;" -- cent sign -->
    <!ENTITY pound  CDATA "&#163;" -- pound sterling sign -->
    <!ENTITY curren CDATA "&#164;" -- general currency sign -->
    <!ENTITY yen    CDATA "&#165;" -- yen sign -->
    <!ENTITY brvbar CDATA "&#166;" -- broken (vertical) bar -->
    <!ENTITY sect   CDATA "&#167;" -- section sign -->
    <!ENTITY uml    CDATA "&#168;" -- umlaut (dieresis) -->
    <!ENTITY copy   CDATA "&#169;" -- copyright sign -->
    <!ENTITY ordf   CDATA "&#170;" -- ordinal indicator, feminine -->
    <!ENTITY laquo  CDATA "&#171;" -- angle quotation mark, left -->
    <!ENTITY not    CDATA "&#172;" -- not sign -->
    <!ENTITY shy    CDATA "&#173;" -- soft hyphen -->
    <!ENTITY reg    CDATA "&#174;" -- registered sign -->
    <!ENTITY macr   CDATA "&#175;" -- macron -->
    <!ENTITY deg    CDATA "&#176;" -- degree sign -->
    <!ENTITY plusmn CDATA "&#177;" -- plus-or-minus sign -->
    <!ENTITY sup2   CDATA "&#178;" -- superscript two -->
    <!ENTITY sup3   CDATA "&#179;" -- superscript three -->
    <!ENTITY acute  CDATA "&#180;" -- acute accent -->
    <!ENTITY micro  CDATA "&#181;" -- micro sign -->
    <!ENTITY para   CDATA "&#182;" -- pilcrow (paragraph sign) -->
    <!ENTITY middot CDATA "&#183;" -- middle dot -->
    <!ENTITY cedil  CDATA "&#184;" -- cedilla -->
    <!ENTITY sup1   CDATA "&#185;" -- superscript one -->
    <!ENTITY ordm   CDATA "&#186;" -- ordinal indicator, masculine -->
    <!ENTITY raquo  CDATA "&#187;" -- angle quotation mark, right -->
    <!ENTITY frac14 CDATA "&#188;" -- fraction one-quarter -->
    <!ENTITY frac12 CDATA "&#189;" -- fraction one-half -->
    <!ENTITY frac34 CDATA "&#190;" -- fraction three-quarters -->
    <!ENTITY iquest CDATA "&#191;" -- inverted question mark -->
    <!ENTITY Agrave CDATA "&#192;" -- capital A, grave accent -->
    <!ENTITY Aacute CDATA "&#193;" -- capital A, acute accent -->
    <!ENTITY Acirc  CDATA "&#194;" -- capital A, circumflex accent -->
    <!ENTITY Atilde CDATA "&#195;" -- capital A, tilde -->
    <!ENTITY Auml   CDATA "&#196;" -- capital A, dieresis or umlaut mark -->
    <!ENTITY Aring  CDATA "&#197;" -- capital A, ring -->
    <!ENTITY AElig  CDATA "&#198;" -- capital AE diphthong (ligature) -->
    <!ENTITY Ccedil CDATA "&#199;" -- capital C, cedilla -->
    <!ENTITY Egrave CDATA "&#200;" -- capital E, grave accent -->
    <!ENTITY Eacute CDATA "&#201;" -- capital E, acute accent -->
    <!ENTITY Ecirc  CDATA "&#202;" -- capital E, circumflex accent -->
    <!ENTITY Euml   CDATA "&#203;" -- capital E, dieresis or umlaut mark -->
    <!ENTITY Igrave CDATA "&#204;" -- capital I, grave accent -->
    <!ENTITY Iacute CDATA "&#205;" -- capital I, acute accent -->
    <!ENTITY Icirc  CDATA "&#206;" -- capital I, circumflex accent -->
    <!ENTITY Iuml   CDATA "&#207;" -- capital I, dieresis or umlaut mark -->
    <!ENTITY ETH    CDATA "&#208;" -- capital Eth, Icelandic -->
    <!ENTITY Ntilde CDATA "&#209;" -- capital N, tilde -->
    <!ENTITY Ograve CDATA "&#210;" -- capital O, grave accent -->
    <!ENTITY Oacute CDATA "&#211;" -- capital O, acute accent -->
    <!ENTITY Ocirc  CDATA "&#212;" -- capital O, circumflex accent -->
    <!ENTITY Otilde CDATA "&#213;" -- capital O, tilde -->
    <!ENTITY Ouml   CDATA "&#214;" -- capital O, dieresis or umlaut mark -->
    <!ENTITY times  CDATA "&#215;" -- multiply sign -->
    <!ENTITY Oslash CDATA "&#216;" -- capital O, slash -->
    <!ENTITY Ugrave CDATA "&#217;" -- capital U, grave accent -->
    <!ENTITY Uacute CDATA "&#218;" -- capital U, acute accent -->
    <!ENTITY Ucirc  CDATA "&#219;" -- capital U, circumflex accent -->
    <!ENTITY Uuml   CDATA "&#220;" -- capital U, dieresis or umlaut mark -->
    <!ENTITY Yacute CDATA "&#221;" -- capital Y, acute accent -->
    <!ENTITY THORN  CDATA "&#222;" -- capital THORN, Icelandic -->
    <!ENTITY szlig  CDATA "&#223;" -- small sharp s, German (sz ligature) -->
    <!ENTITY agrave CDATA "&#224;" -- small a, grave accent -->
    <!ENTITY aacute CDATA "&#225;" -- small a, acute accent -->
    <!ENTITY acirc  CDATA "&#226;" -- small a, circumflex accent -->
    <!ENTITY atilde CDATA "&#227;" -- small a, tilde -->
    <!ENTITY auml   CDATA "&#228;" -- small a, dieresis or umlaut mark -->
    <!ENTITY aring  CDATA "&#229;" -- small a, ring -->
    <!ENTITY aelig  CDATA "&#230;" -- small ae diphthong (ligature) -->
    <!ENTITY ccedil CDATA "&#231;" -- small c, cedilla -->
    <!ENTITY egrave CDATA "&#232;" -- small e, grave accent -->
    <!ENTITY eacute CDATA "&#233;" -- small e, acute accent -->
    <!ENTITY ecirc  CDATA "&#234;" -- small e, circumflex accent -->
    <!ENTITY euml   CDATA "&#235;" -- small e, dieresis or umlaut mark -->
    <!ENTITY igrave CDATA "&#236;" -- small i, grave accent -->
    <!ENTITY iacute CDATA "&#237;" -- small i, acute accent -->
    <!ENTITY icirc  CDATA "&#238;" -- small i, circumflex accent -->
    <!ENTITY iuml   CDATA "&#239;" -- small i, dieresis or umlaut mark -->
    <!ENTITY eth    CDATA "&#240;" -- small eth, Icelandic -->
    <!ENTITY ntilde CDATA "&#241;" -- small n, tilde -->
    <!ENTITY ograve CDATA "&#242;" -- small o, grave accent -->
    <!ENTITY oacute CDATA "&#243;" -- small o, acute accent -->
    <!ENTITY ocirc  CDATA "&#244;" -- small o, circumflex accent -->
    <!ENTITY otilde CDATA "&#245;" -- small o, tilde -->
    <!ENTITY ouml   CDATA "&#246;" -- small o, dieresis or umlaut mark -->
    <!ENTITY divide CDATA "&#247;" -- divide sign -->
    <!ENTITY oslash CDATA "&#248;" -- small o, slash -->
    <!ENTITY ugrave CDATA "&#249;" -- small u, grave accent -->
    <!ENTITY uacute CDATA "&#250;" -- small u, acute accent -->
    <!ENTITY ucirc  CDATA "&#251;" -- small u, circumflex accent -->
    <!ENTITY uuml   CDATA "&#252;" -- small u, dieresis or umlaut mark -->
    <!ENTITY yacute CDATA "&#253;" -- small y, acute accent -->
    <!ENTITY thorn  CDATA "&#254;" -- small thorn, Icelandic -->
    <!ENTITY yuml   CDATA "&#255;" -- small y, dieresis or umlaut mark -->
EOF
    return $txt;
}


# eof - tidyentities.pl
index -|- top