← Index
NYTProf Performance Profile   « line view »
For svc/members/upsert
  Run on Tue Jan 13 11:50:22 2015
Reported on Tue Jan 13 12:09:49 2015

Filename/usr/share/perl/5.20/unicore/Name.pm
StatementsExecuted 25 statements in 254µs
Subroutines
Calls P F Exclusive
Time
Inclusive
Time
Subroutine
11117µs19µscharnames::::BEGIN@395charnames::BEGIN@395
0000s0scharnames::::code_point_to_name_specialcharnames::code_point_to_name_special
0000s0scharnames::::name_to_code_point_specialcharnames::name_to_code_point_special
Call graph for these subroutines as a Graphviz dot language file.
Line State
ments
Time
on line
Calls Time
in subs
Code
1# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
2# This file is machine-generated by lib/unicore/mktables from the Unicode
3# database, Version 6.3.0. Any changes made here will be lost!
4
5
6# !!!!!!! INTERNAL PERL USE ONLY !!!!!!!
7# This file is for internal use by core Perl only. The format and even the
8# name or existence of this file are subject to change without notice. Don't
9# use it directly. Use Unicode::UCD to access the Unicode character data
10# base.
11
12
13package charnames;
14
15# This module contains machine-generated tables and code for the
16# algorithmically-determinable Unicode character names. The following
17# routines can be used to translate between name and code point and vice versa
18
19{ # Closure
20
21 # Matches legal code point. 4-6 hex numbers, If there are 6, the first
22 # two must be 10; if there are 5, the first must not be a 0. Written this
23 # way to decrease backtracking. The first regex allows the code point to
24 # be at the end of a word, but to work properly, the word shouldn't end
25 # with a valid hex character. The second one won't match a code point at
26 # the end of a word, and doesn't have the run-on issue
27210µs12µs my $run_on_code_point_re = qr/(?^aax: (?: 10[0-9A-F]{4} | [1-9A-F][0-9A-F]{4} | [0-9A-F]{4} ) \b)/;
# spent 2µs making 1 call to charnames::CORE:qr
2812µs1800ns my $code_point_re = qr/(?^aa:\b(?^aax: (?: 10[0-9A-F]{4} | [1-9A-F][0-9A-F]{4} | [0-9A-F]{4} ) \b))/;
# spent 800ns making 1 call to charnames::CORE:qr
29
30 # In the following hash, the keys are the bases of names which include
31 # the code point in the name, like CJK UNIFIED IDEOGRAPH-4E01. The value
32 # of each key is another hash which is used to get the low and high ends
33 # for each range of code points that apply to the name.
3414µs my %names_ending_in_code_point = (
35'CJK COMPATIBILITY IDEOGRAPH' =>
36{
37'high' =>
38[
3964109,
4064217,
41195101,
42],
43'low' =>
44[
4563744,
4664112,
47194560,
48],
49},
50'CJK UNIFIED IDEOGRAPH' =>
51{
52'high' =>
53[
5419893,
5540908,
56173782,
57177972,
58178205,
59],
60'low' =>
61[
6213312,
6319968,
64131072,
65173824,
66177984,
67],
68},
69
70 );
71
72 # The following hash is a copy of the previous one, except is for loose
73 # matching, so each name has blanks and dashes squeezed out
7412µs my %loose_names_ending_in_code_point = (
75'CJKCOMPATIBILITYIDEOGRAPH' =>
76{
77'high' =>
78[
7964109,
8064217,
81195101,
82],
83'low' =>
84[
8563744,
8664112,
87194560,
88],
89},
90'CJKUNIFIEDIDEOGRAPH' =>
91{
92'high' =>
93[
9419893,
9540908,
96173782,
97177972,
98178205,
99],
100'low' =>
101[
10213312,
10319968,
104131072,
105173824,
106177984,
107],
108},
109
110 );
111
112 # And the following array gives the inverse mapping from code points to
113 # names. Lowest code points are first
11417µs my @code_points_ending_in_code_point = (
115
116{
117'high' => 19893,
118'low' => 13312,
119'name' => 'CJK UNIFIED IDEOGRAPH',
120},
121{
122'high' => 40908,
123'low' => 19968,
124'name' => 'CJK UNIFIED IDEOGRAPH',
125},
126{
127'high' => 64109,
128'low' => 63744,
129'name' => 'CJK COMPATIBILITY IDEOGRAPH',
130},
131{
132'high' => 64217,
133'low' => 64112,
134'name' => 'CJK COMPATIBILITY IDEOGRAPH',
135},
136{
137'high' => 173782,
138'low' => 131072,
139'name' => 'CJK UNIFIED IDEOGRAPH',
140},
141{
142'high' => 177972,
143'low' => 173824,
144'name' => 'CJK UNIFIED IDEOGRAPH',
145},
146{
147'high' => 178205,
148'low' => 177984,
149'name' => 'CJK UNIFIED IDEOGRAPH',
150},
151{
152'high' => 195101,
153'low' => 194560,
154'name' => 'CJK COMPATIBILITY IDEOGRAPH',
155},
156,
157
158 );
159
160 # Convert from code point to Jamo short name for use in composing Hangul
161 # syllable names
162130µs my %Jamo = (
1634352 => 'G',
1644353 => 'GG',
1654354 => 'N',
1664355 => 'D',
1674356 => 'DD',
1684357 => 'R',
1694358 => 'M',
1704359 => 'B',
1714360 => 'BB',
1724361 => 'S',
1734362 => 'SS',
1744363 => '',
1754364 => 'J',
1764365 => 'JJ',
1774366 => 'C',
1784367 => 'K',
1794368 => 'T',
1804369 => 'P',
1814370 => 'H',
1824449 => 'A',
1834450 => 'AE',
1844451 => 'YA',
1854452 => 'YAE',
1864453 => 'EO',
1874454 => 'E',
1884455 => 'YEO',
1894456 => 'YE',
1904457 => 'O',
1914458 => 'WA',
1924459 => 'WAE',
1934460 => 'OE',
1944461 => 'YO',
1954462 => 'U',
1964463 => 'WEO',
1974464 => 'WE',
1984465 => 'WI',
1994466 => 'YU',
2004467 => 'EU',
2014468 => 'YI',
2024469 => 'I',
2034520 => 'G',
2044521 => 'GG',
2054522 => 'GS',
2064523 => 'N',
2074524 => 'NJ',
2084525 => 'NH',
2094526 => 'D',
2104527 => 'L',
2114528 => 'LG',
2124529 => 'LM',
2134530 => 'LB',
2144531 => 'LS',
2154532 => 'LT',
2164533 => 'LP',
2174534 => 'LH',
2184535 => 'M',
2194536 => 'B',
2204537 => 'BS',
2214538 => 'S',
2224539 => 'SS',
2234540 => 'NG',
2244541 => 'J',
2254542 => 'C',
2264543 => 'K',
2274544 => 'T',
2284545 => 'P',
2294546 => 'H',
230
231 );
232
233 # Leading consonant (can be null)
23416µs my %Jamo_L = (
235'' => 11,
236'B' => 7,
237'BB' => 8,
238'C' => 14,
239'D' => 3,
240'DD' => 4,
241'G' => 0,
242'GG' => 1,
243'H' => 18,
244'J' => 12,
245'JJ' => 13,
246'K' => 15,
247'M' => 6,
248'N' => 2,
249'P' => 17,
250'R' => 5,
251'S' => 9,
252'SS' => 10,
253'T' => 16,
254
255 );
256
257 # Vowel
25816µs my %Jamo_V = (
259'A' => 0,
260'AE' => 1,
261'E' => 5,
262'EO' => 4,
263'EU' => 18,
264'I' => 20,
265'O' => 8,
266'OE' => 11,
267'U' => 13,
268'WA' => 9,
269'WAE' => 10,
270'WE' => 15,
271'WEO' => 14,
272'WI' => 16,
273'YA' => 2,
274'YAE' => 3,
275'YE' => 7,
276'YEO' => 6,
277'YI' => 19,
278'YO' => 12,
279'YU' => 17,
280
281 );
282
283 # Optional trailing consonant
28417µs my %Jamo_T = (
285'B' => 17,
286'BS' => 18,
287'C' => 23,
288'D' => 7,
289'G' => 1,
290'GG' => 2,
291'GS' => 3,
292'H' => 27,
293'J' => 22,
294'K' => 24,
295'L' => 8,
296'LB' => 11,
297'LG' => 9,
298'LH' => 15,
299'LM' => 10,
300'LP' => 14,
301'LS' => 12,
302'LT' => 13,
303'M' => 16,
304'N' => 4,
305'NG' => 21,
306'NH' => 6,
307'NJ' => 5,
308'P' => 26,
309'S' => 19,
310'SS' => 20,
311'T' => 25,
312
313 );
314
315 # Computed re that splits up a Hangul name into LVT or LV syllables
31613µs11µs my $syllable_re = qr/(|B|BB|C|D|DD|G|GG|H|J|JJ|K|M|N|P|R|S|SS|T)(A|AE|E|EO|EU|I|O|OE|U|WA|WAE|WE|WEO|WI|YA|YAE|YE|YEO|YI|YO|YU)(B|BS|C|D|G|GG|GS|H|J|K|L|LB|LG|LH|LM|LP|LS|LT|M|N|NG|NH|NJ|P|S|SS|T)?/;
# spent 1µs making 1 call to charnames::CORE:qr
317
3181200ns my $HANGUL_SYLLABLE = "HANGUL SYLLABLE ";
3191100ns my $loose_HANGUL_SYLLABLE = "HANGULSYLLABLE";
320
321 # These constants names and values were taken from the Unicode standard,
322 # version 5.1, section 3.12. They are used in conjunction with Hangul
323 # syllables
3241100ns my $SBase = 0xAC00;
3251100ns my $LBase = 0x1100;
3261100ns my $VBase = 0x1161;
3271100ns my $TBase = 0x11A7;
3281100ns my $SCount = 11172;
3291100ns my $LCount = 19;
3301100ns my $VCount = 21;
3311100ns my $TCount = 28;
33211µs my $NCount = $VCount * $TCount;
333
334 sub name_to_code_point_special {
335 my ($name, $loose) = @_;
336
337 # Returns undef if not one of the specially handled names; otherwise
338 # returns the code point equivalent to the input name
339 # $loose is non-zero if to use loose matching, 'name' in that case
340 # must be input as upper case with all blanks and dashes squeezed out.
341
342 if ((! $loose && $name =~ s/$HANGUL_SYLLABLE//)
343 || ($loose && $name =~ s/$loose_HANGUL_SYLLABLE//))
344 {
345 return if $name !~ qr/^$syllable_re$/;
346 my $L = $Jamo_L{$1};
347 my $V = $Jamo_V{$2};
348 my $T = (defined $3) ? $Jamo_T{$3} : 0;
349 return ($L * $VCount + $V) * $TCount + $T + $SBase;
350 }
351
352 # Name must end in 'code_point' for this to handle.
353 return if (($loose && $name !~ /^ (.*?) ($run_on_code_point_re) $/x)
354 || (! $loose && $name !~ /^ (.*) ($code_point_re) $/x));
355
356 my $base = $1;
357 my $code_point = CORE::hex $2;
358 my $names_ref;
359
360 if ($loose) {
361 $names_ref = \%loose_names_ending_in_code_point;
362 }
363 else {
364 return if $base !~ s/-$//;
365 $names_ref = \%names_ending_in_code_point;
366 }
367
368 # Name must be one of the ones which has the code point in it.
369 return if ! $names_ref->{$base};
370
371 # Look through the list of ranges that apply to this name to see if
372 # the code point is in one of them.
373 for (my $i = 0; $i < scalar @{$names_ref->{$base}{'low'}}; $i++) {
374 return if $names_ref->{$base}{'low'}->[$i] > $code_point;
375 next if $names_ref->{$base}{'high'}->[$i] < $code_point;
376
377 # Here, the code point is in the range.
378 return $code_point;
379 }
380
381 # Here, looked like the name had a code point number in it, but
382 # did not match one of the valid ones.
383 return;
384 }
385
386 sub code_point_to_name_special {
387 my $code_point = shift;
388
389 # Returns the name of a code point if algorithmically determinable;
390 # undef if not
391
392 # If in the Hangul range, calculate the name based on Unicode's
393 # algorithm
394 if ($code_point >= $SBase && $code_point <= $SBase + $SCount -1) {
3952154µs222µs
# spent 19µs (17+3) within charnames::BEGIN@395 which was called: # once (17µs+3µs) by charnames::BEGIN@5 at line 395
use integer;
# spent 19µs making 1 call to charnames::BEGIN@395 # spent 2µs making 1 call to integer::import
396 my $SIndex = $code_point - $SBase;
397 my $L = $LBase + $SIndex / $NCount;
398 my $V = $VBase + ($SIndex % $NCount) / $TCount;
399 my $T = $TBase + $SIndex % $TCount;
400 $name = "$HANGUL_SYLLABLE$Jamo{$L}$Jamo{$V}";
401 $name .= $Jamo{$T} if $T != $TBase;
402 return $name;
403 }
404
405 # Look through list of these code points for one in range.
406 foreach my $hash (@code_points_ending_in_code_point) {
407 return if $code_point < $hash->{'low'};
408 if ($code_point <= $hash->{'high'}) {
409 return sprintf("%s-%04X", $hash->{'name'}, $code_point);
410 }
411 }
412 return; # None found
413 }
414} # End closure
415
416119µs1;