Generated: Tue Feb 2 17:54:39 2010 from genentities.pl 2008/12/01 19.2 KB.
#!/perl -w # NAME: genentities.pl # AIM: To prepare a HTML page, with _ALL_ entities up to 32767 # 11/30/2008 - geoff mclane - http://geoffair.net/mperl/ use strict; use warnings; require 'logfile.pl' or die "Unable to load logfile.pl ...\n"; # log file stuff my ($LF); my $pgmname = $0; if ($pgmname =~ /\w{1}:\\.*/) { my @tmpsp = split(/\\/,$pgmname); $pgmname = $tmpsp[-1]; } my $outfile = "temp.$pgmname.txt"; open_log($outfile); my $out_file = "tempent.htm"; my $out_file2 = "tempent2.htm"; # OPTION my $usehex = 0; my $usecn = 1; my %invalid = ( 0 => 1, 129 => 1, 141 => 1, 143 => 1, 144 => 1, 157 => 1 ); my %tidy_ents = ( 34 => "quot", 38 => "amp", # 39 => "apos", # something wrong here? 60 => "lt", 62 => "gt", 160 => "nbsp", 161 => "iexcl", 162 => "cent", 163 => "pound", 164 => "curren", 165 => "yen", 166 => "brvbar", 167 => "sect", 168 => "uml", 169 => "copy", 170 => "ordf", 171 => "laquo", 172 => "not", 173 => "shy", 174 => "reg", 175 => "macr", 176 => "deg", 177 => "plusmn", 178 => "sup2", 179 => "sup3", 180 => "acute", 181 => "micro", 182 => "para", 183 => "middot", 184 => "cedil", 185 => "sup1", 186 => "ordm", 187 => "raquo", 188 => "frac14", 189 => "frac12", 190 => "frac34", 191 => "iquest", 192 => "Agrave", 193 => "Aacute", 194 => "Acirc", 195 => "Atilde", 196 => "Auml", 197 => "Aring", 198 => "AElig", 199 => "Ccedil", 200 => "Egrave", 201 => "Eacute", 202 => "Ecirc", 203 => "Euml", 204 => "Igrave", 205 => "Iacute", 206 => "Icirc", 207 => "Iuml", 208 => "ETH", 209 => "Ntilde", 210 => "Ograve", 211 => "Oacute", 212 => "Ocirc", 213 => "Otilde", 214 => "Ouml", 215 => "times", 216 => "Oslash", 217 => "Ugrave", 218 => "Uacute", 219 => "Ucirc", 220 => "Uuml", 221 => "Yacute", 222 => "THORN", 223 => "szlig", 224 => "agrave", 225 => "aacute", 226 => "acirc", 227 => "atilde", 228 => "auml", 229 => "aring", 230 => "aelig", 231 => "ccedil", 232 => "egrave", 233 => "eacute", 234 => "ecirc", 235 => "euml", 236 => "igrave", 237 => "iacute", 238 => "icirc", 239 => "iuml", 240 => "eth", 241 => "ntilde", 242 => "ograve", 243 => "oacute", 244 => "ocirc", 245 => "otilde", 246 => "ouml", 247 => "divide", 248 => "oslash", 249 => "ugrave", 250 => "uacute", 251 => "ucirc", 252 => "uuml", 253 => "yacute", 254 => "thorn", 255 => "yuml", 402 => "fnof", 913 => "Alpha", 914 => "Beta", 915 => "Gamma", 916 => "Delta", 917 => "Epsilon", 918 => "Zeta", 919 => "Eta", 920 => "Theta", 921 => "Iota", 922 => "Kappa", 923 => "Lambda", 924 => "Mu", 925 => "Nu", 926 => "Xi", 927 => "Omicron", 928 => "Pi", 929 => "Rho", 931 => "Sigma", 932 => "Tau", 933 => "Upsilon", 934 => "Phi", 935 => "Chi", 936 => "Psi", 937 => "Omega", 945 => "alpha", 946 => "beta", 947 => "gamma", 948 => "delta", 949 => "epsilon", 950 => "zeta", 951 => "eta", 952 => "theta", 953 => "iota", 954 => "kappa", 955 => "lambda", 956 => "mu", 957 => "nu", 958 => "xi", 959 => "omicron", 960 => "pi", 961 => "rho", 962 => "sigmaf", 963 => "sigma", 964 => "tau", 965 => "upsilon", 966 => "phi", 967 => "chi", 968 => "psi", 969 => "omega", 977 => "thetasym", 978 => "upsih", 982 => "piv", 8226 => "bull", 8230 => "hellip", 8242 => "prime", 8243 => "Prime", 8254 => "oline", 8260 => "frasl", 8472 => "weierp", 8465 => "image", 8476 => "real", 8482 => "trade", 8501 => "alefsym", 8592 => "larr", 8593 => "uarr", 8594 => "rarr", 8595 => "darr", 8596 => "harr", 8629 => "crarr", 8656 => "lArr", 8657 => "uArr", 8658 => "rArr", 8659 => "dArr", 8660 => "hArr", 8704 => "forall", 8706 => "part", 8707 => "exist", 8709 => "empty", 8711 => "nabla", 8712 => "isin", 8713 => "notin", 8715 => "ni", 8719 => "prod", 8721 => "sum", 8722 => "minus", 8727 => "lowast", 8730 => "radic", 8733 => "prop", 8734 => "infin", 8736 => "ang", 8743 => "and", 8744 => "or", 8745 => "cap", 8746 => "cup", 8747 => "int", 8756 => "there4", 8764 => "sim", 8773 => "cong", 8776 => "asymp", 8800 => "ne", 8801 => "equiv", 8804 => "le", 8805 => "ge", 8834 => "sub", 8835 => "sup", 8836 => "nsub", 8838 => "sube", 8839 => "supe", 8853 => "oplus", 8855 => "otimes", 8869 => "perp", 8901 => "sdot", 8968 => "lceil", 8969 => "rceil", 8970 => "lfloor", 8971 => "rfloor", 9001 => "lang", 9002 => "rang", 9674 => "loz", 9824 => "spades", 9827 => "clubs", 9829 => "hearts", 9830 => "diams", 338 => "OElig", 339 => "oelig", 352 => "Scaron", 353 => "scaron", 376 => "Yuml", 710 => "circ", 732 => "tilde", 8194 => "ensp", 8195 => "emsp", 8201 => "thinsp", 8204 => "zwnj", 8205 => "zwj", 8206 => "lrm", 8207 => "rlm", 8211 => "ndash", 8212 => "mdash", 8216 => "lsquo", 8217 => "rsquo", 8218 => "sbquo", 8220 => "ldquo", 8221 => "rdquo", 8222 => "bdquo", 8224 => "dagger", 8225 => "Dagger", 8240 => "permil", 8249 => "lsaquo", 8250 => "rsaquo", 8364 => "euro" ); my %replaced = ( 128 => 0x20AC, 129 => 0x0000, 130 => 0x201A, 131 => 0x0192, 132 => 0x201E, 133 => 0x2026, 134 => 0x2020, 135 => 0x2021, 136 => 0x02C6, 137 => 0x2030, 138 => 0x0160, 139 => 0x2039, 140 => 0x0152, 141 => 0x0000, 142 => 0x017D, 143 => 0x0000, 144 => 0x0000, 145 => 0x2018, 146 => 0x2019, 147 => 0x201C, 148 => 0x201D, 149 => 0x2022, 150 => 0x2013, 151 => 0x2014, 152 => 0x02DC, 153 => 0x2122, 154 => 0x0161, 155 => 0x203A, 156 => 0x0153, 157 => 0x0000, 158 => 0x017E, 159 => 0x0178 ); my $params = '-f temptidy.txt --tidy-mark no --wrap 99 --indent yes '. '--break-before-br yes --indent-attributes yes --vertical-space yes '. '--indent-spaces 1 --indent-cdata no --wrap-asp no --wrap-attributes no '. '--wrap-jste no --wrap-php no --wrap-script-literals no --wrap-sections no'; prt( "$0 ... generating entities, output to $out_file ...\n" ); gen_entities($out_file, 0, 32768, 32, 0); my $tt = get_tidy_txt( $out_file, $params); write2file($tt,$out_file2); system($out_file2); close_log($outfile,0); exit(0); ################################################################# ######### SUB ONLY ######## # main purpose sub gen_entities { my ($out, $min, $max, $wrap, $load) = @_; my ($txt, $i, $ln, $rng, $mint, $maxt, $j); my $html = ''; $txt = get_html_head(); $html .= $txt; if ($usecn) { $txt = "<div class=\"cn\">\n"; $html .= $txt; } $html .= "<table align=\"center\" border=\"0\" cellpadding=\"0\" cellspacing=\"0\" summary=\"table of entities\">\n"; $txt = "<caption>Entities in the range $min to "; $i = $max; if ($max % $wrap) { $i += ($wrap - ($max % $wrap) - 1); } $txt .= "$i ...</caption>"; $html .= $txt; $txt = "<tr>\n"; $html .= $txt; $html .= "<th>Range</th>\n"; for ($j = 0; $j < $wrap; $j++) { $html .= "<th>$j</th>\n"; } $html .= "</tr>\n"; $ln = 0; for ($i = $min; $i < $max; $i++) { if ($i && (($i % 800) == 0)) { $txt = "<tr>\n"; $txt .= "<td colspan=\"$wrap\" align=\"center\">\n"; $txt .= "<a href=\"#top\">top</a>\n"; $txt .= " | \n"; $txt .= "<a href=\"#end\">end</a>\n"; $txt .= "</td>\n"; $txt .= "</tr>\n"; $html .= $txt; } if ($ln == 0) { $html .= "<tr>\n"; if ($usehex) { $mint = dec2hex($i); $maxt = dec2hex($i + $wrap - 1); } else { $mint = "$i"; while (length($mint) < 5) { $mint = '0'.$mint; } $maxt = ''.($i + $wrap - 1); while (length($maxt) < 5) { $maxt = '0'.$maxt; } } $rng = "$mint-$maxt"; $txt = "<td nowrap>$rng </td>\n"; $html .= $txt; } $txt = "<td>"; if (defined $invalid{$i}) { $txt .= " "; } elsif (defined $tidy_ents{$i}) { $txt .= "&".$tidy_ents{$i}.";"; } elsif (defined $replaced{$i}) { $txt .= "&#".$replaced{$i}.";"; } else { $txt .= "&#$i;"; } $txt .= "</td>\n"; $html .= $txt; $ln++; if ($ln == $wrap) { $ln = 0; $html .= "</tr>\n"; } } if ($ln) { while ($ln < $wrap) { $ln++; $html .= "<td>;&#$i;</td>/n"; $i++; } $html .= "</tr>\n"; } $html .= "<tr>\n"; $html .= "<th>Range</th>\n"; for ($j = 0; $j < $wrap; $j++) { $html .= "<th>$j</th>\n"; } $html .= "</tr>\n"; $html .= "</table>\n"; if ($usecn) { $txt = "</div>\n"; $html .= $txt; } $txt = get_end_links(); $html .= $txt; $txt = get_html_valid(); $html .= $txt; $txt = "<!-- generated by $pgmname on ". localtime(time()) . " for geoffair.net -->\n"; $html .= $txt; $html .= "</body>\n"; $html .= "</html>\n"; if (open OF, ">$out") { print OF $html; close OF; system($out) if ($load); } else { prt("ERROR: Failed to create $out file ... $! ...\n"); } } sub gen_entities_vok { my ($out, $min, $max, $wrap, $load) = @_; my ($txt, $i, $ln, $rng, $mint, $maxt, $j); if (open OF, ">$out") { $txt = get_html_head(); print OF $txt; if ($usecn) { $txt = "<div class=\"cn\">\n"; print OF $txt; } print OF "<table align=\"center\" border=\"0\" cellpadding=\"0\" cellspacing=\"0\" summary=\"table of entities\">\n"; $txt = "<caption>Entities in the range $min to "; $i = $max; if ($max % $wrap) { $i += ($wrap - ($max % $wrap) - 1); } $txt .= "$i ...</caption>"; print OF "$txt\n"; print OF "<tr>\n"; print OF "<th>Range</th>\n"; for ($j = 0; $j < $wrap; $j++) { print OF "<th>$j</th>\n"; } print OF "</tr>\n"; $ln = 0; for ($i = $min; $i < $max; $i++) { if ($i && (($i % 800) == 0)) { $txt = "<tr>\n"; $txt .= "<td colspan=\"$wrap\" align=\"center\">\n"; $txt .= "<a href=\"#top\">top</a>\n"; $txt .= " | \n"; $txt .= "<a href=\"#end\">end</a>\n"; $txt .= "</td>\n"; $txt .= "</tr>\n"; print OF $txt; } if ($ln == 0) { print OF "<tr>\n"; if ($usehex) { $mint = dec2hex($i); $maxt = dec2hex($i + $wrap - 1); } else { $mint = "$i"; while (length($mint) < 5) { $mint = '0'.$mint; } $maxt = ''.($i + $wrap - 1); while (length($maxt) < 5) { $maxt = '0'.$maxt; } } $rng = "$mint-$maxt"; $txt = "<td nowrap>$rng </td>\n"; print OF $txt; } $txt = "<td>"; if (defined $invalid{$i}) { $txt .= " "; } elsif (defined $tidy_ents{$i}) { $txt .= "&".$tidy_ents{$i}.";"; } elsif (defined $replaced{$i}) { $txt .= "&#".$replaced{$i}.";"; } else { $txt .= "&#$i;"; } $txt .= "</td>\n"; print OF $txt; $ln++; if ($ln == $wrap) { $ln = 0; print OF "</tr>\n"; } } if ($ln) { while ($ln < $wrap) { $ln++; print OF "<td>;&#$i;</td>/n"; $i++; } print OF "</tr>\n"; } print OF "<tr>\n"; print OF "<th>Range</th>\n"; for ($j = 0; $j < $wrap; $j++) { print OF "<th>$j</th>\n"; } print OF "</tr>\n"; print OF "</table>\n"; if ($usecn) { $txt = "</div>\n"; print OF $txt; } $txt = get_end_links(); print OF $txt; $txt = get_html_valid(); print OF $txt; $txt = "<!-- generated by $pgmname on ". localtime(time()) . " for geoffair.net -->\n"; print OF $txt; print OF "</body>\n"; print OF "</html>\n"; close OF; system($out) if ($load); } else { prt("ERROR: Failed to create $out file ... $! ...\n"); } } ######################## ####### NOT USED ####### sub gen_entities_two_columns { my ($out, $min, $max, $wrap) = @_; my ($txt, $i, $ln, $rng); if (open OF, ">$out") { $txt = html_head(); print OF $txt; print OF "<table align=\"center\" border=\"0\" cellpadding=\"1\" cellspacing=\"1\" summary=\"table of entities\">\n"; $txt = "<caption>Entities in the range $min to "; $i = $max; if ($max % $wrap) { $i += ($wrap - ($max % $wrap) - 1); } $txt .= "$i ...</caption>"; print OF "$txt\n"; $ln = 0; for ($i = $min; $i < $max; $i++) { if ($ln == 0) { print OF "<tr>\n"; $rng = "$i - ".($i + $wrap - 1); $txt = "<td>$rng</td>\n"; print OF $txt; $txt = "<td>"; print OF $txt; } $txt = ' '; $txt .= "&#$i;"; print OF $txt; $ln++; if ($ln == $wrap) { $ln = 0; print OF "</td>\n"; print OF "</tr>\n"; } } if ($ln) { while ($ln < $wrap) { $ln++; print OF " &#$i;"; $i++; } print OF "</td>\n"; print OF "</tr>\n"; } print OF "</table>\n"; $txt = "<!-- generated by $pgmname on ". localtime(time()) . " for geoffair.net -->\n"; print OF $txt; print OF "</body>\n"; print OF "</html>\n"; close OF; system($out); } else { prt("ERROR: Failed to create $out file ... $! ...\n"); } } ######################## ####### NOT USED ####### sub gen_entities_simple { my ($out, $min, $max, $wrap) = @_; my ($txt, $i, $ln); if (open OF, ">$out") { $txt = html_head(); print OF $txt; print OF "<table align=\"center\" border=\"1\" cellpadding=\"1\" cellspacing=\"1\" summary=\"table of entities\">\n"; $txt = "<caption>Entities in the range $min to "; $i = $max; if ($max % $wrap) { $i += ($wrap - ($max % $wrap) - 1); } $txt .= "$i ...</caption>"; print OF "$txt\n"; $ln = 0; for ($i = $min; $i < $max; $i++) { if ($ln == 0) { print OF "<tr>\n"; } $txt = "<td>"; if ($i == 0) { $txt .= " "; } else { $txt .= "&#$i;"; } $txt .= "<br>"; $txt .= "$i</td>\n"; print OF $txt; $ln++; if ($ln == $wrap) { $ln = 0; print OF "</tr>\n"; } } if ($ln) { while ($ln < $wrap) { $ln++; print OF "<td>&#$i;<br>$i</td>\n"; $i++; } print OF "</tr>\n"; } print OF "</table>\n"; $txt = get_end_links(); print OF $txt; $txt = get_html_valid(); print OF $txt; print OF "</body>\n"; print OF "</html>\n"; close OF; system($out); } else { prt("ERROR: Failed to create $out file ... $! ...\n"); } } sub get_end_links { my $end_links = <<EOF; <p class="ctr"> <a name="end"></a> |- <a target="_self" href="index.htm">index</a> -|- <a target="_self" href="http://geoffair.net/home2.htm">home</a> -|- <a target="_self" href="#top">top</a> -| </p> EOF return $end_links; } sub get_html_head { my $html_head = <<EOF; <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> <html> <head> <title> display entities </title> <meta http-equiv="Content-Language" content="en-us"> <meta http-equiv="Content-Type" content="text/html; charset=us-ascii"> <link rel="stylesheet" href="cxx.css" type="text/css"> </head> <body> <a name="top" id="top"></a> <h1> Display Entities </h1> <p class="ctr"> |- <a target="_self" href="index.htm">index</a> -|- <a target="_self" href="http://geoffair.net/home2.htm">home</a> -|- <a target=\"_self\" href=\"#end\">end</a> -| </p> <p> A simple Perl generated list of 'entity' values, 0-65767, just to see what happens! And what is displayed. </p> EOF return $html_head; } sub get_html_valid { my $html_valid = <<EOF; <p> <a name="end" id="end"></a> <a target="_blank" href="http://tidy.sourceforge.net/"><img border="0" src="images/checked_by_tidy.gif" alt="checked by tidy" width="32" height="32"></a> <a href="http://validator.w3.org/check?uri=referer" target="_blank"><img src="images/valid-html401.gif" alt="Valid HTML 4.01 Transitional" width="88" height="31"></a> </p> EOF return $html_valid; } sub dec2hex { my $decnum = $_[0]; # parameter passed to the subfunction my $hexnum = ''; # the final hex number my $tempval = 0; if ($decnum == 0) { return '0000'; } while ($decnum != 0) { # get the remainder (modulus function) # by dividing by 16 $tempval = $decnum % 16; # convert to the appropriate letter # if the value is greater than 9 if ($tempval > 9) { $tempval = chr($tempval + 55); } # 'concatenate' the number to # what we have so far in what will # be the final variable $hexnum = $tempval . $hexnum ; # new actually divide by 16, and # keep the integer value of the # answer $decnum = int($decnum / 16); # if we cant divide by 16, this is the # last step if ($decnum < 16) { # convert to letters again.. if ($decnum > 9) { $decnum = chr($decnum + 55); } # add this onto the final answer.. # reset decnum variable to zero so loop # will exit $hexnum = $decnum . $hexnum; $decnum = 0 } } while (length($hexnum) < 4) { $hexnum = '0' . $hexnum; } return $hexnum; } # end sub sub get_tidy_txt { my ($inf, $pars) = @_; my $ntx = ''; if (open (TDY, "tidydev $pars $inf |")) { my @arr = <TDY>; close TDY; foreach my $ln (@arr) { chomp $ln; if( length($ln) ) { $ntx .= "\n" if length($ntx); $ntx .= $ln; } } } else { prt( "ERROR: Failed to run tidydev ...\n" ); } return $ntx; } # eof - genentities.pl