Filename | /usr/share/perl/5.10/charnames.pm |
Statements | Executed 40 statements in 1.74ms |
Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
---|---|---|---|---|---|
1 | 1 | 1 | 56µs | 1.94ms | import | charnames::
1 | 1 | 1 | 19µs | 24µs | BEGIN@2 | charnames::
1 | 1 | 1 | 17µs | 21µs | BEGIN@174 | charnames::
1 | 1 | 1 | 16µs | 36µs | BEGIN@183 | charnames::
1 | 1 | 1 | 14µs | 14µs | BEGIN@4 | charnames::
1 | 1 | 1 | 10µs | 25µs | BEGIN@3 | charnames::
1 | 1 | 1 | 5µs | 5µs | BEGIN@7 | charnames::
0 | 0 | 0 | 0s | 0s | alias | charnames::
0 | 0 | 0 | 0s | 0s | alias_file | charnames::
0 | 0 | 0 | 0s | 0s | carp | charnames::
0 | 0 | 0 | 0s | 0s | charnames | charnames::
0 | 0 | 0 | 0s | 0s | croak | charnames::
0 | 0 | 0 | 0s | 0s | viacode | charnames::
0 | 0 | 0 | 0s | 0s | vianame | charnames::
Line | State ments |
Time on line |
Calls | Time in subs |
Code |
---|---|---|---|---|---|
1 | package charnames; | ||||
2 | 3 | 29µs | 2 | 29µs | # spent 24µs (19+5) within charnames::BEGIN@2 which was called:
# once (19µs+5µs) by MARC::Charset::BEGIN@12 at line 2 # spent 24µs making 1 call to charnames::BEGIN@2
# spent 5µs making 1 call to strict::import |
3 | 3 | 51µs | 2 | 40µs | # spent 25µs (10+15) within charnames::BEGIN@3 which was called:
# once (10µs+15µs) by MARC::Charset::BEGIN@12 at line 3 # spent 25µs making 1 call to charnames::BEGIN@3
# spent 15µs making 1 call to warnings::import |
4 | 3 | 49µs | 1 | 14µs | # spent 14µs within charnames::BEGIN@4 which was called:
# once (14µs+0s) by MARC::Charset::BEGIN@12 at line 4 # spent 14µs making 1 call to charnames::BEGIN@4 |
5 | 1 | 700ns | our $VERSION = '1.07'; | ||
6 | |||||
7 | 3 | 728µs | 1 | 5µs | # spent 5µs within charnames::BEGIN@7 which was called:
# once (5µs+0s) by MARC::Charset::BEGIN@12 at line 7 # spent 5µs making 1 call to charnames::BEGIN@7 |
8 | |||||
9 | 1 | 9µs | my %alias1 = ( | ||
10 | # Icky 3.2 names with parentheses. | ||||
11 | 'LINE FEED' => 'LINE FEED (LF)', | ||||
12 | 'FORM FEED' => 'FORM FEED (FF)', | ||||
13 | 'CARRIAGE RETURN' => 'CARRIAGE RETURN (CR)', | ||||
14 | 'NEXT LINE' => 'NEXT LINE (NEL)', | ||||
15 | # Convenience. | ||||
16 | 'LF' => 'LINE FEED (LF)', | ||||
17 | 'FF' => 'FORM FEED (FF)', | ||||
18 | 'CR' => 'CARRIAGE RETURN (CR)', | ||||
19 | 'NEL' => 'NEXT LINE (NEL)', | ||||
20 | # More convenience. For futher convencience, | ||||
21 | # it is suggested some way using using the NamesList | ||||
22 | # aliases is implemented. | ||||
23 | 'ZWNJ' => 'ZERO WIDTH NON-JOINER', | ||||
24 | 'ZWJ' => 'ZERO WIDTH JOINER', | ||||
25 | 'BOM' => 'BYTE ORDER MARK', | ||||
26 | ); | ||||
27 | |||||
28 | 1 | 5µs | my %alias2 = ( | ||
29 | # Pre-3.2 compatibility (only for the first 256 characters). | ||||
30 | 'HORIZONTAL TABULATION' => 'CHARACTER TABULATION', | ||||
31 | 'VERTICAL TABULATION' => 'LINE TABULATION', | ||||
32 | 'FILE SEPARATOR' => 'INFORMATION SEPARATOR FOUR', | ||||
33 | 'GROUP SEPARATOR' => 'INFORMATION SEPARATOR THREE', | ||||
34 | 'RECORD SEPARATOR' => 'INFORMATION SEPARATOR TWO', | ||||
35 | 'UNIT SEPARATOR' => 'INFORMATION SEPARATOR ONE', | ||||
36 | 'PARTIAL LINE DOWN' => 'PARTIAL LINE FORWARD', | ||||
37 | 'PARTIAL LINE UP' => 'PARTIAL LINE BACKWARD', | ||||
38 | ); | ||||
39 | |||||
40 | 1 | 300ns | my %alias3 = ( | ||
41 | # User defined aliasses. Even more convenient :) | ||||
42 | ); | ||||
43 | 1 | 200ns | my $txt; | ||
44 | |||||
45 | sub croak | ||||
46 | { | ||||
47 | require Carp; goto &Carp::croak; | ||||
48 | } # croak | ||||
49 | |||||
50 | sub carp | ||||
51 | { | ||||
52 | require Carp; goto &Carp::carp; | ||||
53 | } # carp | ||||
54 | |||||
55 | sub alias (@) | ||||
56 | { | ||||
57 | @_ or return %alias3; | ||||
58 | my $alias = ref $_[0] ? $_[0] : { @_ }; | ||||
59 | @alias3{keys %$alias} = values %$alias; | ||||
60 | } # alias | ||||
61 | |||||
62 | sub alias_file ($) | ||||
63 | { | ||||
64 | my ($arg, $file) = @_; | ||||
65 | if (-f $arg && File::Spec->file_name_is_absolute ($arg)) { | ||||
66 | $file = $arg; | ||||
67 | } | ||||
68 | elsif ($arg =~ m/^\w+$/) { | ||||
69 | $file = "unicore/${arg}_alias.pl"; | ||||
70 | } | ||||
71 | else { | ||||
72 | croak "Charnames alias files can only have identifier characters"; | ||||
73 | } | ||||
74 | if (my @alias = do $file) { | ||||
75 | @alias == 1 && !defined $alias[0] and | ||||
76 | croak "$file cannot be used as alias file for charnames"; | ||||
77 | @alias % 2 and | ||||
78 | croak "$file did not return a (valid) list of alias pairs"; | ||||
79 | alias (@alias); | ||||
80 | return (1); | ||||
81 | } | ||||
82 | 0; | ||||
83 | } # alias_file | ||||
84 | |||||
85 | # This is not optimized in any way yet | ||||
86 | sub charnames | ||||
87 | { | ||||
88 | my $name = shift; | ||||
89 | |||||
90 | if (exists $alias1{$name}) { | ||||
91 | $name = $alias1{$name}; | ||||
92 | } | ||||
93 | elsif (exists $alias2{$name}) { | ||||
94 | require warnings; | ||||
95 | warnings::warnif('deprecated', qq{Unicode character name "$name" is deprecated, use "$alias2{$name}" instead}); | ||||
96 | $name = $alias2{$name}; | ||||
97 | } | ||||
98 | elsif (exists $alias3{$name}) { | ||||
99 | $name = $alias3{$name}; | ||||
100 | } | ||||
101 | |||||
102 | my $ord; | ||||
103 | my @off; | ||||
104 | my $fname; | ||||
105 | |||||
106 | if ($name eq "BYTE ORDER MARK") { | ||||
107 | $fname = $name; | ||||
108 | $ord = 0xFEFF; | ||||
109 | } else { | ||||
110 | ## Suck in the code/name list as a big string. | ||||
111 | ## Lines look like: | ||||
112 | ## "0052\t\tLATIN CAPITAL LETTER R\n" | ||||
113 | $txt = do "unicore/Name.pl" unless $txt; | ||||
114 | |||||
115 | ## @off will hold the index into the code/name string of the start and | ||||
116 | ## end of the name as we find it. | ||||
117 | |||||
118 | ## If :full, look for the name exactly | ||||
119 | if ($^H{charnames_full} and $txt =~ /\t\t\Q$name\E$/m) { | ||||
120 | @off = ($-[0], $+[0]); | ||||
121 | } | ||||
122 | |||||
123 | ## If we didn't get above, and :short allowed, look for the short name. | ||||
124 | ## The short name is like "greek:Sigma" | ||||
125 | unless (@off) { | ||||
126 | if ($^H{charnames_short} and $name =~ /^(.+?):(.+)/s) { | ||||
127 | my ($script, $cname) = ($1, $2); | ||||
128 | my $case = $cname =~ /[[:upper:]]/ ? "CAPITAL" : "SMALL"; | ||||
129 | if ($txt =~ m/\t\t\U$script\E (?:$case )?LETTER \U\Q$cname\E$/m) { | ||||
130 | @off = ($-[0], $+[0]); | ||||
131 | } | ||||
132 | } | ||||
133 | } | ||||
134 | |||||
135 | ## If we still don't have it, check for the name among the loaded | ||||
136 | ## scripts. | ||||
137 | if (not @off) { | ||||
138 | my $case = $name =~ /[[:upper:]]/ ? "CAPITAL" : "SMALL"; | ||||
139 | for my $script (@{$^H{charnames_scripts}}) { | ||||
140 | if ($txt =~ m/\t\t$script (?:$case )?LETTER \U\Q$name\E$/m) { | ||||
141 | @off = ($-[0], $+[0]); | ||||
142 | last; | ||||
143 | } | ||||
144 | } | ||||
145 | } | ||||
146 | |||||
147 | ## If we don't have it by now, give up. | ||||
148 | unless (@off) { | ||||
149 | carp "Unknown charname '$name'"; | ||||
150 | return "\x{FFFD}"; | ||||
151 | } | ||||
152 | |||||
153 | ## | ||||
154 | ## Now know where in the string the name starts. | ||||
155 | ## The code, in hex, is before that. | ||||
156 | ## | ||||
157 | ## The code can be 4-6 characters long, so we've got to sort of | ||||
158 | ## go look for it, just after the newline that comes before $off[0]. | ||||
159 | ## | ||||
160 | ## This would be much easier if unicore/Name.pl had info in | ||||
161 | ## a name/code order, instead of code/name order. | ||||
162 | ## | ||||
163 | ## The +1 after the rindex() is to skip past the newline we're finding, | ||||
164 | ## or, if the rindex() fails, to put us to an offset of zero. | ||||
165 | ## | ||||
166 | my $hexstart = rindex($txt, "\n", $off[0]) + 1; | ||||
167 | |||||
168 | ## we know where it starts, so turn into number - | ||||
169 | ## the ordinal for the char. | ||||
170 | $ord = CORE::hex substr($txt, $hexstart, $off[0] - $hexstart); | ||||
171 | } | ||||
172 | |||||
173 | if ($^H & $bytes::hint_bits) { # "use bytes" in effect? | ||||
174 | 3 | 92µs | 2 | 26µs | # spent 21µs (17+4) within charnames::BEGIN@174 which was called:
# once (17µs+4µs) by MARC::Charset::BEGIN@12 at line 174 # spent 21µs making 1 call to charnames::BEGIN@174
# spent 4µs making 1 call to bytes::import |
175 | return chr $ord if $ord <= 255; | ||||
176 | my $hex = sprintf "%04x", $ord; | ||||
177 | if (not defined $fname) { | ||||
178 | $fname = substr $txt, $off[0] + 2, $off[1] - $off[0] - 2; | ||||
179 | } | ||||
180 | croak "Character 0x$hex with name '$fname' is above 0xFF"; | ||||
181 | } | ||||
182 | |||||
183 | 3 | 706µs | 2 | 56µs | # spent 36µs (16+20) within charnames::BEGIN@183 which was called:
# once (16µs+20µs) by MARC::Charset::BEGIN@12 at line 183 # spent 36µs making 1 call to charnames::BEGIN@183
# spent 20µs making 1 call to warnings::unimport |
184 | return pack "U", $ord; | ||||
185 | } # charnames | ||||
186 | |||||
187 | sub import | ||||
188 | # spent 1.94ms (56µs+1.88) within charnames::import which was called:
# once (56µs+1.88ms) by MARC::Charset::BEGIN@12 at line 12 of MARC/Charset.pm | ||||
189 | 14 | 56µs | shift; ## ignore class name | ||
190 | |||||
191 | if (not @_) { | ||||
192 | carp("`use charnames' needs explicit imports list"); | ||||
193 | } | ||||
194 | $^H{charnames} = \&charnames ; | ||||
195 | |||||
196 | ## | ||||
197 | ## fill %h keys with our @_ args. | ||||
198 | ## | ||||
199 | my ($promote, %h, @args) = (0); | ||||
200 | while (my $arg = shift) { | ||||
201 | if ($arg eq ":alias") { | ||||
202 | @_ or | ||||
203 | croak ":alias needs an argument in charnames"; | ||||
204 | my $alias = shift; | ||||
205 | if (ref $alias) { | ||||
206 | ref $alias eq "HASH" or | ||||
207 | croak "Only HASH reference supported as argument to :alias"; | ||||
208 | alias ($alias); | ||||
209 | next; | ||||
210 | } | ||||
211 | if ($alias =~ m{:(\w+)$}) { | ||||
212 | $1 eq "full" || $1 eq "short" and | ||||
213 | croak ":alias cannot use existing pragma :$1 (reversed order?)"; | ||||
214 | alias_file ($1) and $promote = 1; | ||||
215 | next; | ||||
216 | } | ||||
217 | alias_file ($alias); | ||||
218 | next; | ||||
219 | } | ||||
220 | if (substr($arg, 0, 1) eq ':' and ! ($arg eq ":full" || $arg eq ":short")) { | ||||
221 | warn "unsupported special '$arg' in charnames"; | ||||
222 | next; | ||||
223 | } | ||||
224 | push @args, $arg; | ||||
225 | } | ||||
226 | @args == 0 && $promote and @args = (":full"); | ||||
227 | @h{@args} = (1) x @args; | ||||
228 | |||||
229 | $^H{charnames_full} = delete $h{':full'}; | ||||
230 | $^H{charnames_short} = delete $h{':short'}; | ||||
231 | $^H{charnames_scripts} = [map uc, keys %h]; | ||||
232 | |||||
233 | ## | ||||
234 | ## If utf8? warnings are enabled, and some scripts were given, | ||||
235 | ## see if at least we can find one letter of each script. | ||||
236 | ## | ||||
237 | 1 | 1.88ms | if (warnings::enabled('utf8') && @{$^H{charnames_scripts}}) { # spent 1.88ms making 1 call to warnings::enabled | ||
238 | $txt = do "unicore/Name.pl" unless $txt; | ||||
239 | |||||
240 | for my $script (@{$^H{charnames_scripts}}) { | ||||
241 | if (not $txt =~ m/\t\t$script (?:CAPITAL |SMALL )?LETTER /) { | ||||
242 | warnings::warn('utf8', "No such script: '$script'"); | ||||
243 | } | ||||
244 | } | ||||
245 | } | ||||
246 | } # import | ||||
247 | |||||
248 | 1 | 200ns | my %viacode; | ||
249 | |||||
250 | sub viacode | ||||
251 | { | ||||
252 | if (@_ != 1) { | ||||
253 | carp "charnames::viacode() expects one argument"; | ||||
254 | return; | ||||
255 | } | ||||
256 | |||||
257 | my $arg = shift; | ||||
258 | |||||
259 | # this comes actually from Unicode::UCD, where it is the named | ||||
260 | # function _getcode (), but it avoids the overhead of loading it | ||||
261 | my $hex; | ||||
262 | if ($arg =~ /^[1-9]\d*$/) { | ||||
263 | $hex = sprintf "%04X", $arg; | ||||
264 | } elsif ($arg =~ /^(?:[Uu]\+|0[xX])?([[:xdigit:]]+)$/) { | ||||
265 | $hex = $1; | ||||
266 | } else { | ||||
267 | carp("unexpected arg \"$arg\" to charnames::viacode()"); | ||||
268 | return; | ||||
269 | } | ||||
270 | |||||
271 | # checking the length first is slightly faster | ||||
272 | if (length($hex) > 5 && hex($hex) > 0x10FFFF) { | ||||
273 | carp "Unicode characters only allocated up to U+10FFFF (you asked for U+$hex)"; | ||||
274 | return; | ||||
275 | } | ||||
276 | |||||
277 | return $viacode{$hex} if exists $viacode{$hex}; | ||||
278 | |||||
279 | $txt = do "unicore/Name.pl" unless $txt; | ||||
280 | |||||
281 | return unless $txt =~ m/^$hex\t\t(.+)/m; | ||||
282 | |||||
283 | $viacode{$hex} = $1; | ||||
284 | } # viacode | ||||
285 | |||||
286 | 1 | 200ns | my %vianame; | ||
287 | |||||
288 | sub vianame | ||||
289 | { | ||||
290 | if (@_ != 1) { | ||||
291 | carp "charnames::vianame() expects one name argument"; | ||||
292 | return () | ||||
293 | } | ||||
294 | |||||
295 | my $arg = shift; | ||||
296 | |||||
297 | return chr CORE::hex $1 if $arg =~ /^U\+([0-9a-fA-F]+)$/; | ||||
298 | |||||
299 | return $vianame{$arg} if exists $vianame{$arg}; | ||||
300 | |||||
301 | $txt = do "unicore/Name.pl" unless $txt; | ||||
302 | |||||
303 | my $pos = index $txt, "\t\t$arg\n"; | ||||
304 | if ($[ <= $pos) { | ||||
305 | my $posLF = rindex $txt, "\n", $pos; | ||||
306 | (my $code = substr $txt, $posLF + 1, 6) =~ tr/\t//d; | ||||
307 | return $vianame{$arg} = CORE::hex $code; | ||||
308 | |||||
309 | # If $pos is at the 1st line, $posLF must be $[ - 1 (not found); | ||||
310 | # then $posLF + 1 equals to $[ (at the beginning of $txt). | ||||
311 | # Otherwise $posLF is the position of "\n"; | ||||
312 | # then $posLF + 1 must be the position of the next to "\n" | ||||
313 | # (the beginning of the line). | ||||
314 | # substr($txt, $posLF + 1, 6) may be "0000\t\t", "00A1\t\t", | ||||
315 | # "10300\t", "100000", etc. So we can get the code via removing TAB. | ||||
316 | } else { | ||||
317 | return; | ||||
318 | } | ||||
319 | } # vianame | ||||
320 | |||||
321 | |||||
322 | 1 | 10µs | 1; | ||
323 | __END__ |