Filename | /usr/share/perl/5.20/unicore/Name.pm |
Statements | Executed 25 statements in 254µs |
Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
---|---|---|---|---|---|
1 | 1 | 1 | 17µs | 19µs | BEGIN@395 | charnames::
0 | 0 | 0 | 0s | 0s | code_point_to_name_special | charnames::
0 | 0 | 0 | 0s | 0s | name_to_code_point_special | charnames::
Line | State ments |
Time on line |
Calls | Time in subs |
Code |
---|---|---|---|---|---|
1 | # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! | ||||
2 | # This file is machine-generated by lib/unicore/mktables from the Unicode | ||||
3 | # database, Version 6.3.0. Any changes made here will be lost! | ||||
4 | |||||
5 | |||||
6 | # !!!!!!! INTERNAL PERL USE ONLY !!!!!!! | ||||
7 | # This file is for internal use by core Perl only. The format and even the | ||||
8 | # name or existence of this file are subject to change without notice. Don't | ||||
9 | # use it directly. Use Unicode::UCD to access the Unicode character data | ||||
10 | # base. | ||||
11 | |||||
12 | |||||
13 | package charnames; | ||||
14 | |||||
15 | # This module contains machine-generated tables and code for the | ||||
16 | # algorithmically-determinable Unicode character names. The following | ||||
17 | # routines can be used to translate between name and code point and vice versa | ||||
18 | |||||
19 | { # Closure | ||||
20 | |||||
21 | # Matches legal code point. 4-6 hex numbers, If there are 6, the first | ||||
22 | # two must be 10; if there are 5, the first must not be a 0. Written this | ||||
23 | # way to decrease backtracking. The first regex allows the code point to | ||||
24 | # be at the end of a word, but to work properly, the word shouldn't end | ||||
25 | # with a valid hex character. The second one won't match a code point at | ||||
26 | # the end of a word, and doesn't have the run-on issue | ||||
27 | 2 | 10µs | 1 | 2µs | my $run_on_code_point_re = qr/(?^aax: (?: 10[0-9A-F]{4} | [1-9A-F][0-9A-F]{4} | [0-9A-F]{4} ) \b)/; # spent 2µs making 1 call to charnames::CORE:qr |
28 | 1 | 2µs | 1 | 800ns | my $code_point_re = qr/(?^aa:\b(?^aax: (?: 10[0-9A-F]{4} | [1-9A-F][0-9A-F]{4} | [0-9A-F]{4} ) \b))/; # spent 800ns making 1 call to charnames::CORE:qr |
29 | |||||
30 | # In the following hash, the keys are the bases of names which include | ||||
31 | # the code point in the name, like CJK UNIFIED IDEOGRAPH-4E01. The value | ||||
32 | # of each key is another hash which is used to get the low and high ends | ||||
33 | # for each range of code points that apply to the name. | ||||
34 | 1 | 4µs | my %names_ending_in_code_point = ( | ||
35 | 'CJK COMPATIBILITY IDEOGRAPH' => | ||||
36 | { | ||||
37 | 'high' => | ||||
38 | [ | ||||
39 | 64109, | ||||
40 | 64217, | ||||
41 | 195101, | ||||
42 | ], | ||||
43 | 'low' => | ||||
44 | [ | ||||
45 | 63744, | ||||
46 | 64112, | ||||
47 | 194560, | ||||
48 | ], | ||||
49 | }, | ||||
50 | 'CJK UNIFIED IDEOGRAPH' => | ||||
51 | { | ||||
52 | 'high' => | ||||
53 | [ | ||||
54 | 19893, | ||||
55 | 40908, | ||||
56 | 173782, | ||||
57 | 177972, | ||||
58 | 178205, | ||||
59 | ], | ||||
60 | 'low' => | ||||
61 | [ | ||||
62 | 13312, | ||||
63 | 19968, | ||||
64 | 131072, | ||||
65 | 173824, | ||||
66 | 177984, | ||||
67 | ], | ||||
68 | }, | ||||
69 | |||||
70 | ); | ||||
71 | |||||
72 | # The following hash is a copy of the previous one, except is for loose | ||||
73 | # matching, so each name has blanks and dashes squeezed out | ||||
74 | 1 | 2µs | my %loose_names_ending_in_code_point = ( | ||
75 | 'CJKCOMPATIBILITYIDEOGRAPH' => | ||||
76 | { | ||||
77 | 'high' => | ||||
78 | [ | ||||
79 | 64109, | ||||
80 | 64217, | ||||
81 | 195101, | ||||
82 | ], | ||||
83 | 'low' => | ||||
84 | [ | ||||
85 | 63744, | ||||
86 | 64112, | ||||
87 | 194560, | ||||
88 | ], | ||||
89 | }, | ||||
90 | 'CJKUNIFIEDIDEOGRAPH' => | ||||
91 | { | ||||
92 | 'high' => | ||||
93 | [ | ||||
94 | 19893, | ||||
95 | 40908, | ||||
96 | 173782, | ||||
97 | 177972, | ||||
98 | 178205, | ||||
99 | ], | ||||
100 | 'low' => | ||||
101 | [ | ||||
102 | 13312, | ||||
103 | 19968, | ||||
104 | 131072, | ||||
105 | 173824, | ||||
106 | 177984, | ||||
107 | ], | ||||
108 | }, | ||||
109 | |||||
110 | ); | ||||
111 | |||||
112 | # And the following array gives the inverse mapping from code points to | ||||
113 | # names. Lowest code points are first | ||||
114 | 1 | 7µs | my @code_points_ending_in_code_point = ( | ||
115 | |||||
116 | { | ||||
117 | 'high' => 19893, | ||||
118 | 'low' => 13312, | ||||
119 | 'name' => 'CJK UNIFIED IDEOGRAPH', | ||||
120 | }, | ||||
121 | { | ||||
122 | 'high' => 40908, | ||||
123 | 'low' => 19968, | ||||
124 | 'name' => 'CJK UNIFIED IDEOGRAPH', | ||||
125 | }, | ||||
126 | { | ||||
127 | 'high' => 64109, | ||||
128 | 'low' => 63744, | ||||
129 | 'name' => 'CJK COMPATIBILITY IDEOGRAPH', | ||||
130 | }, | ||||
131 | { | ||||
132 | 'high' => 64217, | ||||
133 | 'low' => 64112, | ||||
134 | 'name' => 'CJK COMPATIBILITY IDEOGRAPH', | ||||
135 | }, | ||||
136 | { | ||||
137 | 'high' => 173782, | ||||
138 | 'low' => 131072, | ||||
139 | 'name' => 'CJK UNIFIED IDEOGRAPH', | ||||
140 | }, | ||||
141 | { | ||||
142 | 'high' => 177972, | ||||
143 | 'low' => 173824, | ||||
144 | 'name' => 'CJK UNIFIED IDEOGRAPH', | ||||
145 | }, | ||||
146 | { | ||||
147 | 'high' => 178205, | ||||
148 | 'low' => 177984, | ||||
149 | 'name' => 'CJK UNIFIED IDEOGRAPH', | ||||
150 | }, | ||||
151 | { | ||||
152 | 'high' => 195101, | ||||
153 | 'low' => 194560, | ||||
154 | 'name' => 'CJK COMPATIBILITY IDEOGRAPH', | ||||
155 | }, | ||||
156 | , | ||||
157 | |||||
158 | ); | ||||
159 | |||||
160 | # Convert from code point to Jamo short name for use in composing Hangul | ||||
161 | # syllable names | ||||
162 | 1 | 30µs | my %Jamo = ( | ||
163 | 4352 => 'G', | ||||
164 | 4353 => 'GG', | ||||
165 | 4354 => 'N', | ||||
166 | 4355 => 'D', | ||||
167 | 4356 => 'DD', | ||||
168 | 4357 => 'R', | ||||
169 | 4358 => 'M', | ||||
170 | 4359 => 'B', | ||||
171 | 4360 => 'BB', | ||||
172 | 4361 => 'S', | ||||
173 | 4362 => 'SS', | ||||
174 | 4363 => '', | ||||
175 | 4364 => 'J', | ||||
176 | 4365 => 'JJ', | ||||
177 | 4366 => 'C', | ||||
178 | 4367 => 'K', | ||||
179 | 4368 => 'T', | ||||
180 | 4369 => 'P', | ||||
181 | 4370 => 'H', | ||||
182 | 4449 => 'A', | ||||
183 | 4450 => 'AE', | ||||
184 | 4451 => 'YA', | ||||
185 | 4452 => 'YAE', | ||||
186 | 4453 => 'EO', | ||||
187 | 4454 => 'E', | ||||
188 | 4455 => 'YEO', | ||||
189 | 4456 => 'YE', | ||||
190 | 4457 => 'O', | ||||
191 | 4458 => 'WA', | ||||
192 | 4459 => 'WAE', | ||||
193 | 4460 => 'OE', | ||||
194 | 4461 => 'YO', | ||||
195 | 4462 => 'U', | ||||
196 | 4463 => 'WEO', | ||||
197 | 4464 => 'WE', | ||||
198 | 4465 => 'WI', | ||||
199 | 4466 => 'YU', | ||||
200 | 4467 => 'EU', | ||||
201 | 4468 => 'YI', | ||||
202 | 4469 => 'I', | ||||
203 | 4520 => 'G', | ||||
204 | 4521 => 'GG', | ||||
205 | 4522 => 'GS', | ||||
206 | 4523 => 'N', | ||||
207 | 4524 => 'NJ', | ||||
208 | 4525 => 'NH', | ||||
209 | 4526 => 'D', | ||||
210 | 4527 => 'L', | ||||
211 | 4528 => 'LG', | ||||
212 | 4529 => 'LM', | ||||
213 | 4530 => 'LB', | ||||
214 | 4531 => 'LS', | ||||
215 | 4532 => 'LT', | ||||
216 | 4533 => 'LP', | ||||
217 | 4534 => 'LH', | ||||
218 | 4535 => 'M', | ||||
219 | 4536 => 'B', | ||||
220 | 4537 => 'BS', | ||||
221 | 4538 => 'S', | ||||
222 | 4539 => 'SS', | ||||
223 | 4540 => 'NG', | ||||
224 | 4541 => 'J', | ||||
225 | 4542 => 'C', | ||||
226 | 4543 => 'K', | ||||
227 | 4544 => 'T', | ||||
228 | 4545 => 'P', | ||||
229 | 4546 => 'H', | ||||
230 | |||||
231 | ); | ||||
232 | |||||
233 | # Leading consonant (can be null) | ||||
234 | 1 | 6µs | my %Jamo_L = ( | ||
235 | '' => 11, | ||||
236 | 'B' => 7, | ||||
237 | 'BB' => 8, | ||||
238 | 'C' => 14, | ||||
239 | 'D' => 3, | ||||
240 | 'DD' => 4, | ||||
241 | 'G' => 0, | ||||
242 | 'GG' => 1, | ||||
243 | 'H' => 18, | ||||
244 | 'J' => 12, | ||||
245 | 'JJ' => 13, | ||||
246 | 'K' => 15, | ||||
247 | 'M' => 6, | ||||
248 | 'N' => 2, | ||||
249 | 'P' => 17, | ||||
250 | 'R' => 5, | ||||
251 | 'S' => 9, | ||||
252 | 'SS' => 10, | ||||
253 | 'T' => 16, | ||||
254 | |||||
255 | ); | ||||
256 | |||||
257 | # Vowel | ||||
258 | 1 | 6µs | my %Jamo_V = ( | ||
259 | 'A' => 0, | ||||
260 | 'AE' => 1, | ||||
261 | 'E' => 5, | ||||
262 | 'EO' => 4, | ||||
263 | 'EU' => 18, | ||||
264 | 'I' => 20, | ||||
265 | 'O' => 8, | ||||
266 | 'OE' => 11, | ||||
267 | 'U' => 13, | ||||
268 | 'WA' => 9, | ||||
269 | 'WAE' => 10, | ||||
270 | 'WE' => 15, | ||||
271 | 'WEO' => 14, | ||||
272 | 'WI' => 16, | ||||
273 | 'YA' => 2, | ||||
274 | 'YAE' => 3, | ||||
275 | 'YE' => 7, | ||||
276 | 'YEO' => 6, | ||||
277 | 'YI' => 19, | ||||
278 | 'YO' => 12, | ||||
279 | 'YU' => 17, | ||||
280 | |||||
281 | ); | ||||
282 | |||||
283 | # Optional trailing consonant | ||||
284 | 1 | 7µs | my %Jamo_T = ( | ||
285 | 'B' => 17, | ||||
286 | 'BS' => 18, | ||||
287 | 'C' => 23, | ||||
288 | 'D' => 7, | ||||
289 | 'G' => 1, | ||||
290 | 'GG' => 2, | ||||
291 | 'GS' => 3, | ||||
292 | 'H' => 27, | ||||
293 | 'J' => 22, | ||||
294 | 'K' => 24, | ||||
295 | 'L' => 8, | ||||
296 | 'LB' => 11, | ||||
297 | 'LG' => 9, | ||||
298 | 'LH' => 15, | ||||
299 | 'LM' => 10, | ||||
300 | 'LP' => 14, | ||||
301 | 'LS' => 12, | ||||
302 | 'LT' => 13, | ||||
303 | 'M' => 16, | ||||
304 | 'N' => 4, | ||||
305 | 'NG' => 21, | ||||
306 | 'NH' => 6, | ||||
307 | 'NJ' => 5, | ||||
308 | 'P' => 26, | ||||
309 | 'S' => 19, | ||||
310 | 'SS' => 20, | ||||
311 | 'T' => 25, | ||||
312 | |||||
313 | ); | ||||
314 | |||||
315 | # Computed re that splits up a Hangul name into LVT or LV syllables | ||||
316 | 1 | 3µs | 1 | 1µs | my $syllable_re = qr/(|B|BB|C|D|DD|G|GG|H|J|JJ|K|M|N|P|R|S|SS|T)(A|AE|E|EO|EU|I|O|OE|U|WA|WAE|WE|WEO|WI|YA|YAE|YE|YEO|YI|YO|YU)(B|BS|C|D|G|GG|GS|H|J|K|L|LB|LG|LH|LM|LP|LS|LT|M|N|NG|NH|NJ|P|S|SS|T)?/; # spent 1µs making 1 call to charnames::CORE:qr |
317 | |||||
318 | 1 | 200ns | my $HANGUL_SYLLABLE = "HANGUL SYLLABLE "; | ||
319 | 1 | 100ns | my $loose_HANGUL_SYLLABLE = "HANGULSYLLABLE"; | ||
320 | |||||
321 | # These constants names and values were taken from the Unicode standard, | ||||
322 | # version 5.1, section 3.12. They are used in conjunction with Hangul | ||||
323 | # syllables | ||||
324 | 1 | 100ns | my $SBase = 0xAC00; | ||
325 | 1 | 100ns | my $LBase = 0x1100; | ||
326 | 1 | 100ns | my $VBase = 0x1161; | ||
327 | 1 | 100ns | my $TBase = 0x11A7; | ||
328 | 1 | 100ns | my $SCount = 11172; | ||
329 | 1 | 100ns | my $LCount = 19; | ||
330 | 1 | 100ns | my $VCount = 21; | ||
331 | 1 | 100ns | my $TCount = 28; | ||
332 | 1 | 1µs | my $NCount = $VCount * $TCount; | ||
333 | |||||
334 | sub name_to_code_point_special { | ||||
335 | my ($name, $loose) = @_; | ||||
336 | |||||
337 | # Returns undef if not one of the specially handled names; otherwise | ||||
338 | # returns the code point equivalent to the input name | ||||
339 | # $loose is non-zero if to use loose matching, 'name' in that case | ||||
340 | # must be input as upper case with all blanks and dashes squeezed out. | ||||
341 | |||||
342 | if ((! $loose && $name =~ s/$HANGUL_SYLLABLE//) | ||||
343 | || ($loose && $name =~ s/$loose_HANGUL_SYLLABLE//)) | ||||
344 | { | ||||
345 | return if $name !~ qr/^$syllable_re$/; | ||||
346 | my $L = $Jamo_L{$1}; | ||||
347 | my $V = $Jamo_V{$2}; | ||||
348 | my $T = (defined $3) ? $Jamo_T{$3} : 0; | ||||
349 | return ($L * $VCount + $V) * $TCount + $T + $SBase; | ||||
350 | } | ||||
351 | |||||
352 | # Name must end in 'code_point' for this to handle. | ||||
353 | return if (($loose && $name !~ /^ (.*?) ($run_on_code_point_re) $/x) | ||||
354 | || (! $loose && $name !~ /^ (.*) ($code_point_re) $/x)); | ||||
355 | |||||
356 | my $base = $1; | ||||
357 | my $code_point = CORE::hex $2; | ||||
358 | my $names_ref; | ||||
359 | |||||
360 | if ($loose) { | ||||
361 | $names_ref = \%loose_names_ending_in_code_point; | ||||
362 | } | ||||
363 | else { | ||||
364 | return if $base !~ s/-$//; | ||||
365 | $names_ref = \%names_ending_in_code_point; | ||||
366 | } | ||||
367 | |||||
368 | # Name must be one of the ones which has the code point in it. | ||||
369 | return if ! $names_ref->{$base}; | ||||
370 | |||||
371 | # Look through the list of ranges that apply to this name to see if | ||||
372 | # the code point is in one of them. | ||||
373 | for (my $i = 0; $i < scalar @{$names_ref->{$base}{'low'}}; $i++) { | ||||
374 | return if $names_ref->{$base}{'low'}->[$i] > $code_point; | ||||
375 | next if $names_ref->{$base}{'high'}->[$i] < $code_point; | ||||
376 | |||||
377 | # Here, the code point is in the range. | ||||
378 | return $code_point; | ||||
379 | } | ||||
380 | |||||
381 | # Here, looked like the name had a code point number in it, but | ||||
382 | # did not match one of the valid ones. | ||||
383 | return; | ||||
384 | } | ||||
385 | |||||
386 | sub code_point_to_name_special { | ||||
387 | my $code_point = shift; | ||||
388 | |||||
389 | # Returns the name of a code point if algorithmically determinable; | ||||
390 | # undef if not | ||||
391 | |||||
392 | # If in the Hangul range, calculate the name based on Unicode's | ||||
393 | # algorithm | ||||
394 | if ($code_point >= $SBase && $code_point <= $SBase + $SCount -1) { | ||||
395 | 2 | 154µs | 2 | 22µs | # spent 19µs (17+3) within charnames::BEGIN@395 which was called:
# once (17µs+3µs) by charnames::BEGIN@5 at line 395 # spent 19µs making 1 call to charnames::BEGIN@395
# spent 2µs making 1 call to integer::import |
396 | my $SIndex = $code_point - $SBase; | ||||
397 | my $L = $LBase + $SIndex / $NCount; | ||||
398 | my $V = $VBase + ($SIndex % $NCount) / $TCount; | ||||
399 | my $T = $TBase + $SIndex % $TCount; | ||||
400 | $name = "$HANGUL_SYLLABLE$Jamo{$L}$Jamo{$V}"; | ||||
401 | $name .= $Jamo{$T} if $T != $TBase; | ||||
402 | return $name; | ||||
403 | } | ||||
404 | |||||
405 | # Look through list of these code points for one in range. | ||||
406 | foreach my $hash (@code_points_ending_in_code_point) { | ||||
407 | return if $code_point < $hash->{'low'}; | ||||
408 | if ($code_point <= $hash->{'high'}) { | ||||
409 | return sprintf("%s-%04X", $hash->{'name'}, $code_point); | ||||
410 | } | ||||
411 | } | ||||
412 | return; # None found | ||||
413 | } | ||||
414 | } # End closure | ||||
415 | |||||
416 | 1 | 19µs | 1; |