| Filename | /usr/share/perl/5.10/charnames.pm |
| Statements | Executed 40 statements in 1.93ms |
| Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
|---|---|---|---|---|---|
| 1 | 1 | 1 | 37µs | 2.31ms | charnames::import |
| 1 | 1 | 1 | 22µs | 28µs | charnames::BEGIN@2 |
| 1 | 1 | 1 | 18µs | 23µs | charnames::BEGIN@174 |
| 1 | 1 | 1 | 17µs | 22µs | charnames::BEGIN@4 |
| 1 | 1 | 1 | 13µs | 33µs | charnames::BEGIN@183 |
| 1 | 1 | 1 | 11µs | 27µs | charnames::BEGIN@3 |
| 1 | 1 | 1 | 5µs | 5µs | charnames::BEGIN@7 |
| 0 | 0 | 0 | 0s | 0s | charnames::alias |
| 0 | 0 | 0 | 0s | 0s | charnames::alias_file |
| 0 | 0 | 0 | 0s | 0s | charnames::carp |
| 0 | 0 | 0 | 0s | 0s | charnames::charnames |
| 0 | 0 | 0 | 0s | 0s | charnames::croak |
| 0 | 0 | 0 | 0s | 0s | charnames::viacode |
| 0 | 0 | 0 | 0s | 0s | charnames::vianame |
| Line | State ments |
Time on line |
Calls | Time in subs |
Code |
|---|---|---|---|---|---|
| 1 | package charnames; | ||||
| 2 | 3 | 29µs | 2 | 33µs | # spent 28µs (22+5) within charnames::BEGIN@2 which was called:
# once (22µs+5µs) by MARC::Charset::BEGIN@12 at line 2 # spent 28µs making 1 call to charnames::BEGIN@2
# spent 5µs making 1 call to strict::import |
| 3 | 3 | 28µs | 2 | 43µs | # spent 27µs (11+16) within charnames::BEGIN@3 which was called:
# once (11µs+16µs) by MARC::Charset::BEGIN@12 at line 3 # spent 27µs making 1 call to charnames::BEGIN@3
# spent 16µs making 1 call to warnings::import |
| 4 | 3 | 44µs | 2 | 26µs | # spent 22µs (17+4) within charnames::BEGIN@4 which was called:
# once (17µs+4µs) by MARC::Charset::BEGIN@12 at line 4 # spent 22µs making 1 call to charnames::BEGIN@4
# spent 4µs making 1 call to UNIVERSAL::import |
| 5 | 1 | 700ns | our $VERSION = '1.07'; | ||
| 6 | |||||
| 7 | 3 | 863µs | 1 | 5µs | # spent 5µs within charnames::BEGIN@7 which was called:
# once (5µs+0s) by MARC::Charset::BEGIN@12 at line 7 # spent 5µs making 1 call to charnames::BEGIN@7 |
| 8 | |||||
| 9 | 1 | 10µs | my %alias1 = ( | ||
| 10 | # Icky 3.2 names with parentheses. | ||||
| 11 | 'LINE FEED' => 'LINE FEED (LF)', | ||||
| 12 | 'FORM FEED' => 'FORM FEED (FF)', | ||||
| 13 | 'CARRIAGE RETURN' => 'CARRIAGE RETURN (CR)', | ||||
| 14 | 'NEXT LINE' => 'NEXT LINE (NEL)', | ||||
| 15 | # Convenience. | ||||
| 16 | 'LF' => 'LINE FEED (LF)', | ||||
| 17 | 'FF' => 'FORM FEED (FF)', | ||||
| 18 | 'CR' => 'CARRIAGE RETURN (CR)', | ||||
| 19 | 'NEL' => 'NEXT LINE (NEL)', | ||||
| 20 | # More convenience. For futher convencience, | ||||
| 21 | # it is suggested some way using using the NamesList | ||||
| 22 | # aliases is implemented. | ||||
| 23 | 'ZWNJ' => 'ZERO WIDTH NON-JOINER', | ||||
| 24 | 'ZWJ' => 'ZERO WIDTH JOINER', | ||||
| 25 | 'BOM' => 'BYTE ORDER MARK', | ||||
| 26 | ); | ||||
| 27 | |||||
| 28 | 1 | 6µs | my %alias2 = ( | ||
| 29 | # Pre-3.2 compatibility (only for the first 256 characters). | ||||
| 30 | 'HORIZONTAL TABULATION' => 'CHARACTER TABULATION', | ||||
| 31 | 'VERTICAL TABULATION' => 'LINE TABULATION', | ||||
| 32 | 'FILE SEPARATOR' => 'INFORMATION SEPARATOR FOUR', | ||||
| 33 | 'GROUP SEPARATOR' => 'INFORMATION SEPARATOR THREE', | ||||
| 34 | 'RECORD SEPARATOR' => 'INFORMATION SEPARATOR TWO', | ||||
| 35 | 'UNIT SEPARATOR' => 'INFORMATION SEPARATOR ONE', | ||||
| 36 | 'PARTIAL LINE DOWN' => 'PARTIAL LINE FORWARD', | ||||
| 37 | 'PARTIAL LINE UP' => 'PARTIAL LINE BACKWARD', | ||||
| 38 | ); | ||||
| 39 | |||||
| 40 | 1 | 300ns | my %alias3 = ( | ||
| 41 | # User defined aliasses. Even more convenient :) | ||||
| 42 | ); | ||||
| 43 | 1 | 300ns | my $txt; | ||
| 44 | |||||
| 45 | sub croak | ||||
| 46 | { | ||||
| 47 | require Carp; goto &Carp::croak; | ||||
| 48 | } # croak | ||||
| 49 | |||||
| 50 | sub carp | ||||
| 51 | { | ||||
| 52 | require Carp; goto &Carp::carp; | ||||
| 53 | } # carp | ||||
| 54 | |||||
| 55 | sub alias (@) | ||||
| 56 | { | ||||
| 57 | @_ or return %alias3; | ||||
| 58 | my $alias = ref $_[0] ? $_[0] : { @_ }; | ||||
| 59 | @alias3{keys %$alias} = values %$alias; | ||||
| 60 | } # alias | ||||
| 61 | |||||
| 62 | sub alias_file ($) | ||||
| 63 | { | ||||
| 64 | my ($arg, $file) = @_; | ||||
| 65 | if (-f $arg && File::Spec->file_name_is_absolute ($arg)) { | ||||
| 66 | $file = $arg; | ||||
| 67 | } | ||||
| 68 | elsif ($arg =~ m/^\w+$/) { | ||||
| 69 | $file = "unicore/${arg}_alias.pl"; | ||||
| 70 | } | ||||
| 71 | else { | ||||
| 72 | croak "Charnames alias files can only have identifier characters"; | ||||
| 73 | } | ||||
| 74 | if (my @alias = do $file) { | ||||
| 75 | @alias == 1 && !defined $alias[0] and | ||||
| 76 | croak "$file cannot be used as alias file for charnames"; | ||||
| 77 | @alias % 2 and | ||||
| 78 | croak "$file did not return a (valid) list of alias pairs"; | ||||
| 79 | alias (@alias); | ||||
| 80 | return (1); | ||||
| 81 | } | ||||
| 82 | 0; | ||||
| 83 | } # alias_file | ||||
| 84 | |||||
| 85 | # This is not optimized in any way yet | ||||
| 86 | sub charnames | ||||
| 87 | { | ||||
| 88 | my $name = shift; | ||||
| 89 | |||||
| 90 | if (exists $alias1{$name}) { | ||||
| 91 | $name = $alias1{$name}; | ||||
| 92 | } | ||||
| 93 | elsif (exists $alias2{$name}) { | ||||
| 94 | require warnings; | ||||
| 95 | warnings::warnif('deprecated', qq{Unicode character name "$name" is deprecated, use "$alias2{$name}" instead}); | ||||
| 96 | $name = $alias2{$name}; | ||||
| 97 | } | ||||
| 98 | elsif (exists $alias3{$name}) { | ||||
| 99 | $name = $alias3{$name}; | ||||
| 100 | } | ||||
| 101 | |||||
| 102 | my $ord; | ||||
| 103 | my @off; | ||||
| 104 | my $fname; | ||||
| 105 | |||||
| 106 | if ($name eq "BYTE ORDER MARK") { | ||||
| 107 | $fname = $name; | ||||
| 108 | $ord = 0xFEFF; | ||||
| 109 | } else { | ||||
| 110 | ## Suck in the code/name list as a big string. | ||||
| 111 | ## Lines look like: | ||||
| 112 | ## "0052\t\tLATIN CAPITAL LETTER R\n" | ||||
| 113 | $txt = do "unicore/Name.pl" unless $txt; | ||||
| 114 | |||||
| 115 | ## @off will hold the index into the code/name string of the start and | ||||
| 116 | ## end of the name as we find it. | ||||
| 117 | |||||
| 118 | ## If :full, look for the name exactly | ||||
| 119 | if ($^H{charnames_full} and $txt =~ /\t\t\Q$name\E$/m) { | ||||
| 120 | @off = ($-[0], $+[0]); | ||||
| 121 | } | ||||
| 122 | |||||
| 123 | ## If we didn't get above, and :short allowed, look for the short name. | ||||
| 124 | ## The short name is like "greek:Sigma" | ||||
| 125 | unless (@off) { | ||||
| 126 | if ($^H{charnames_short} and $name =~ /^(.+?):(.+)/s) { | ||||
| 127 | my ($script, $cname) = ($1, $2); | ||||
| 128 | my $case = $cname =~ /[[:upper:]]/ ? "CAPITAL" : "SMALL"; | ||||
| 129 | if ($txt =~ m/\t\t\U$script\E (?:$case )?LETTER \U\Q$cname\E$/m) { | ||||
| 130 | @off = ($-[0], $+[0]); | ||||
| 131 | } | ||||
| 132 | } | ||||
| 133 | } | ||||
| 134 | |||||
| 135 | ## If we still don't have it, check for the name among the loaded | ||||
| 136 | ## scripts. | ||||
| 137 | if (not @off) { | ||||
| 138 | my $case = $name =~ /[[:upper:]]/ ? "CAPITAL" : "SMALL"; | ||||
| 139 | for my $script (@{$^H{charnames_scripts}}) { | ||||
| 140 | if ($txt =~ m/\t\t$script (?:$case )?LETTER \U\Q$name\E$/m) { | ||||
| 141 | @off = ($-[0], $+[0]); | ||||
| 142 | last; | ||||
| 143 | } | ||||
| 144 | } | ||||
| 145 | } | ||||
| 146 | |||||
| 147 | ## If we don't have it by now, give up. | ||||
| 148 | unless (@off) { | ||||
| 149 | carp "Unknown charname '$name'"; | ||||
| 150 | return "\x{FFFD}"; | ||||
| 151 | } | ||||
| 152 | |||||
| 153 | ## | ||||
| 154 | ## Now know where in the string the name starts. | ||||
| 155 | ## The code, in hex, is before that. | ||||
| 156 | ## | ||||
| 157 | ## The code can be 4-6 characters long, so we've got to sort of | ||||
| 158 | ## go look for it, just after the newline that comes before $off[0]. | ||||
| 159 | ## | ||||
| 160 | ## This would be much easier if unicore/Name.pl had info in | ||||
| 161 | ## a name/code order, instead of code/name order. | ||||
| 162 | ## | ||||
| 163 | ## The +1 after the rindex() is to skip past the newline we're finding, | ||||
| 164 | ## or, if the rindex() fails, to put us to an offset of zero. | ||||
| 165 | ## | ||||
| 166 | my $hexstart = rindex($txt, "\n", $off[0]) + 1; | ||||
| 167 | |||||
| 168 | ## we know where it starts, so turn into number - | ||||
| 169 | ## the ordinal for the char. | ||||
| 170 | $ord = CORE::hex substr($txt, $hexstart, $off[0] - $hexstart); | ||||
| 171 | } | ||||
| 172 | |||||
| 173 | if ($^H & $bytes::hint_bits) { # "use bytes" in effect? | ||||
| 174 | 3 | 96µs | 2 | 28µs | # spent 23µs (18+5) within charnames::BEGIN@174 which was called:
# once (18µs+5µs) by MARC::Charset::BEGIN@12 at line 174 # spent 23µs making 1 call to charnames::BEGIN@174
# spent 5µs making 1 call to bytes::import |
| 175 | return chr $ord if $ord <= 255; | ||||
| 176 | my $hex = sprintf "%04x", $ord; | ||||
| 177 | if (not defined $fname) { | ||||
| 178 | $fname = substr $txt, $off[0] + 2, $off[1] - $off[0] - 2; | ||||
| 179 | } | ||||
| 180 | croak "Character 0x$hex with name '$fname' is above 0xFF"; | ||||
| 181 | } | ||||
| 182 | |||||
| 183 | 3 | 803µs | 2 | 53µs | # spent 33µs (13+20) within charnames::BEGIN@183 which was called:
# once (13µs+20µs) by MARC::Charset::BEGIN@12 at line 183 # spent 33µs making 1 call to charnames::BEGIN@183
# spent 20µs making 1 call to warnings::unimport |
| 184 | return pack "U", $ord; | ||||
| 185 | } # charnames | ||||
| 186 | |||||
| 187 | sub import | ||||
| 188 | # spent 2.31ms (37µs+2.27) within charnames::import which was called:
# once (37µs+2.27ms) by MARC::Charset::BEGIN@12 at line 12 of MARC/Charset.pm | ||||
| 189 | 1 | 700ns | shift; ## ignore class name | ||
| 190 | |||||
| 191 | 1 | 700ns | if (not @_) { | ||
| 192 | carp("`use charnames' needs explicit imports list"); | ||||
| 193 | } | ||||
| 194 | 1 | 5µs | $^H{charnames} = \&charnames ; | ||
| 195 | |||||
| 196 | ## | ||||
| 197 | ## fill %h keys with our @_ args. | ||||
| 198 | ## | ||||
| 199 | 1 | 1µs | my ($promote, %h, @args) = (0); | ||
| 200 | 1 | 2µs | while (my $arg = shift) { | ||
| 201 | 1 | 600ns | if ($arg eq ":alias") { | ||
| 202 | @_ or | ||||
| 203 | croak ":alias needs an argument in charnames"; | ||||
| 204 | my $alias = shift; | ||||
| 205 | if (ref $alias) { | ||||
| 206 | ref $alias eq "HASH" or | ||||
| 207 | croak "Only HASH reference supported as argument to :alias"; | ||||
| 208 | alias ($alias); | ||||
| 209 | next; | ||||
| 210 | } | ||||
| 211 | if ($alias =~ m{:(\w+)$}) { | ||||
| 212 | $1 eq "full" || $1 eq "short" and | ||||
| 213 | croak ":alias cannot use existing pragma :$1 (reversed order?)"; | ||||
| 214 | alias_file ($1) and $promote = 1; | ||||
| 215 | next; | ||||
| 216 | } | ||||
| 217 | alias_file ($alias); | ||||
| 218 | next; | ||||
| 219 | } | ||||
| 220 | 1 | 2µs | if (substr($arg, 0, 1) eq ':' and ! ($arg eq ":full" || $arg eq ":short")) { | ||
| 221 | warn "unsupported special '$arg' in charnames"; | ||||
| 222 | next; | ||||
| 223 | } | ||||
| 224 | 1 | 1µs | push @args, $arg; | ||
| 225 | } | ||||
| 226 | 1 | 1µs | @args == 0 && $promote and @args = (":full"); | ||
| 227 | 1 | 3µs | @h{@args} = (1) x @args; | ||
| 228 | |||||
| 229 | 1 | 3µs | $^H{charnames_full} = delete $h{':full'}; | ||
| 230 | 1 | 2µs | $^H{charnames_short} = delete $h{':short'}; | ||
| 231 | 1 | 4µs | $^H{charnames_scripts} = [map uc, keys %h]; | ||
| 232 | |||||
| 233 | ## | ||||
| 234 | ## If utf8? warnings are enabled, and some scripts were given, | ||||
| 235 | ## see if at least we can find one letter of each script. | ||||
| 236 | ## | ||||
| 237 | 1 | 10µs | 1 | 2.27ms | if (warnings::enabled('utf8') && @{$^H{charnames_scripts}}) { # spent 2.27ms making 1 call to warnings::enabled |
| 238 | $txt = do "unicore/Name.pl" unless $txt; | ||||
| 239 | |||||
| 240 | for my $script (@{$^H{charnames_scripts}}) { | ||||
| 241 | if (not $txt =~ m/\t\t$script (?:CAPITAL |SMALL )?LETTER /) { | ||||
| 242 | warnings::warn('utf8', "No such script: '$script'"); | ||||
| 243 | } | ||||
| 244 | } | ||||
| 245 | } | ||||
| 246 | } # import | ||||
| 247 | |||||
| 248 | 1 | 200ns | my %viacode; | ||
| 249 | |||||
| 250 | sub viacode | ||||
| 251 | { | ||||
| 252 | if (@_ != 1) { | ||||
| 253 | carp "charnames::viacode() expects one argument"; | ||||
| 254 | return; | ||||
| 255 | } | ||||
| 256 | |||||
| 257 | my $arg = shift; | ||||
| 258 | |||||
| 259 | # this comes actually from Unicode::UCD, where it is the named | ||||
| 260 | # function _getcode (), but it avoids the overhead of loading it | ||||
| 261 | my $hex; | ||||
| 262 | if ($arg =~ /^[1-9]\d*$/) { | ||||
| 263 | $hex = sprintf "%04X", $arg; | ||||
| 264 | } elsif ($arg =~ /^(?:[Uu]\+|0[xX])?([[:xdigit:]]+)$/) { | ||||
| 265 | $hex = $1; | ||||
| 266 | } else { | ||||
| 267 | carp("unexpected arg \"$arg\" to charnames::viacode()"); | ||||
| 268 | return; | ||||
| 269 | } | ||||
| 270 | |||||
| 271 | # checking the length first is slightly faster | ||||
| 272 | if (length($hex) > 5 && hex($hex) > 0x10FFFF) { | ||||
| 273 | carp "Unicode characters only allocated up to U+10FFFF (you asked for U+$hex)"; | ||||
| 274 | return; | ||||
| 275 | } | ||||
| 276 | |||||
| 277 | return $viacode{$hex} if exists $viacode{$hex}; | ||||
| 278 | |||||
| 279 | $txt = do "unicore/Name.pl" unless $txt; | ||||
| 280 | |||||
| 281 | return unless $txt =~ m/^$hex\t\t(.+)/m; | ||||
| 282 | |||||
| 283 | $viacode{$hex} = $1; | ||||
| 284 | } # viacode | ||||
| 285 | |||||
| 286 | 1 | 200ns | my %vianame; | ||
| 287 | |||||
| 288 | sub vianame | ||||
| 289 | { | ||||
| 290 | if (@_ != 1) { | ||||
| 291 | carp "charnames::vianame() expects one name argument"; | ||||
| 292 | return () | ||||
| 293 | } | ||||
| 294 | |||||
| 295 | my $arg = shift; | ||||
| 296 | |||||
| 297 | return chr CORE::hex $1 if $arg =~ /^U\+([0-9a-fA-F]+)$/; | ||||
| 298 | |||||
| 299 | return $vianame{$arg} if exists $vianame{$arg}; | ||||
| 300 | |||||
| 301 | $txt = do "unicore/Name.pl" unless $txt; | ||||
| 302 | |||||
| 303 | my $pos = index $txt, "\t\t$arg\n"; | ||||
| 304 | if ($[ <= $pos) { | ||||
| 305 | my $posLF = rindex $txt, "\n", $pos; | ||||
| 306 | (my $code = substr $txt, $posLF + 1, 6) =~ tr/\t//d; | ||||
| 307 | return $vianame{$arg} = CORE::hex $code; | ||||
| 308 | |||||
| 309 | # If $pos is at the 1st line, $posLF must be $[ - 1 (not found); | ||||
| 310 | # then $posLF + 1 equals to $[ (at the beginning of $txt). | ||||
| 311 | # Otherwise $posLF is the position of "\n"; | ||||
| 312 | # then $posLF + 1 must be the position of the next to "\n" | ||||
| 313 | # (the beginning of the line). | ||||
| 314 | # substr($txt, $posLF + 1, 6) may be "0000\t\t", "00A1\t\t", | ||||
| 315 | # "10300\t", "100000", etc. So we can get the code via removing TAB. | ||||
| 316 | } else { | ||||
| 317 | return; | ||||
| 318 | } | ||||
| 319 | } # vianame | ||||
| 320 | |||||
| 321 | |||||
| 322 | 1 | 9µs | 1; | ||
| 323 | __END__ |