Profile of unicore/Name.pm

Filename	/usr/share/perl/5.20/unicore/Name.pm
Statements	Executed 25 statements in 254µs

Subroutines
Calls	P	F	Exclusive Time	Inclusive Time	Subroutine
1	1	1	17µs	19µs	charnames::::BEGIN@395charnames::BEGIN@395
0	0	0	0s	0s	charnames::::code_point_to_name_specialcharnames::code_point_to_name_special
0	0	0	0s	0s	charnames::::name_to_code_point_specialcharnames::name_to_code_point_special

Call graph for these subroutines as a Graphviz dot language file.

Line	State ments	Time on line	Calls	Time in subs	Code
1					# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
2					# This file is machine-generated by lib/unicore/mktables from the Unicode
3					# database, Version 6.3.0. Any changes made here will be lost!
4
5
6					# !!!!!!! INTERNAL PERL USE ONLY !!!!!!!
7					# This file is for internal use by core Perl only. The format and even the
8					# name or existence of this file are subject to change without notice. Don't
9					# use it directly. Use Unicode::UCD to access the Unicode character data
10					# base.
11
12
13					package charnames;
14
15					# This module contains machine-generated tables and code for the
16					# algorithmically-determinable Unicode character names. The following
17					# routines can be used to translate between name and code point and vice versa
18
19					{ # Closure
20
21					# Matches legal code point. 4-6 hex numbers, If there are 6, the first
22					# two must be 10; if there are 5, the first must not be a 0. Written this
23					# way to decrease backtracking. The first regex allows the code point to
24					# be at the end of a word, but to work properly, the word shouldn't end
25					# with a valid hex character. The second one won't match a code point at
26					# the end of a word, and doesn't have the run-on issue
27	2	10µs	1	2µs	my $run_on_code_point_re = qr/(?^aax: (?: 10[0-9A-F]{4} \| [1-9A-F][0-9A-F]{4} \| [0-9A-F]{4} ) \b)/; # spent 2µs making 1 call to charnames::CORE:qr
28	1	2µs	1	800ns	my $code_point_re = qr/(?^aa:\b(?^aax: (?: 10[0-9A-F]{4} \| [1-9A-F][0-9A-F]{4} \| [0-9A-F]{4} ) \b))/; # spent 800ns making 1 call to charnames::CORE:qr
29
30					# In the following hash, the keys are the bases of names which include
31					# the code point in the name, like CJK UNIFIED IDEOGRAPH-4E01. The value
32					# of each key is another hash which is used to get the low and high ends
33					# for each range of code points that apply to the name.
34	1	4µs			my %names_ending_in_code_point = (
35					'CJK COMPATIBILITY IDEOGRAPH' =>
36					{
37					'high' =>
38					[
39					64109,
40					64217,
41					195101,
42					],
43					'low' =>
44					[
45					63744,
46					64112,
47					194560,
48					],
49					},
50					'CJK UNIFIED IDEOGRAPH' =>
51					{
52					'high' =>
53					[
54					19893,
55					40908,
56					173782,
57					177972,
58					178205,
59					],
60					'low' =>
61					[
62					13312,
63					19968,
64					131072,
65					173824,
66					177984,
67					],
68					},
69
70					);
71
72					# The following hash is a copy of the previous one, except is for loose
73					# matching, so each name has blanks and dashes squeezed out
74	1	2µs			my %loose_names_ending_in_code_point = (
75					'CJKCOMPATIBILITYIDEOGRAPH' =>
76					{
77					'high' =>
78					[
79					64109,
80					64217,
81					195101,
82					],
83					'low' =>
84					[
85					63744,
86					64112,
87					194560,
88					],
89					},
90					'CJKUNIFIEDIDEOGRAPH' =>
91					{
92					'high' =>
93					[
94					19893,
95					40908,
96					173782,
97					177972,
98					178205,
99					],
100					'low' =>
101					[
102					13312,
103					19968,
104					131072,
105					173824,
106					177984,
107					],
108					},
109
110					);
111
112					# And the following array gives the inverse mapping from code points to
113					# names. Lowest code points are first
114	1	7µs			my @code_points_ending_in_code_point = (
115
116					{
117					'high' => 19893,
118					'low' => 13312,
119					'name' => 'CJK UNIFIED IDEOGRAPH',
120					},
121					{
122					'high' => 40908,
123					'low' => 19968,
124					'name' => 'CJK UNIFIED IDEOGRAPH',
125					},
126					{
127					'high' => 64109,
128					'low' => 63744,
129					'name' => 'CJK COMPATIBILITY IDEOGRAPH',
130					},
131					{
132					'high' => 64217,
133					'low' => 64112,
134					'name' => 'CJK COMPATIBILITY IDEOGRAPH',
135					},
136					{
137					'high' => 173782,
138					'low' => 131072,
139					'name' => 'CJK UNIFIED IDEOGRAPH',
140					},
141					{
142					'high' => 177972,
143					'low' => 173824,
144					'name' => 'CJK UNIFIED IDEOGRAPH',
145					},
146					{
147					'high' => 178205,
148					'low' => 177984,
149					'name' => 'CJK UNIFIED IDEOGRAPH',
150					},
151					{
152					'high' => 195101,
153					'low' => 194560,
154					'name' => 'CJK COMPATIBILITY IDEOGRAPH',
155					},
156					,
157
158					);
159
160					# Convert from code point to Jamo short name for use in composing Hangul
161					# syllable names
162	1	30µs			my %Jamo = (
163					4352 => 'G',
164					4353 => 'GG',
165					4354 => 'N',
166					4355 => 'D',
167					4356 => 'DD',
168					4357 => 'R',
169					4358 => 'M',
170					4359 => 'B',
171					4360 => 'BB',
172					4361 => 'S',
173					4362 => 'SS',
174					4363 => '',
175					4364 => 'J',
176					4365 => 'JJ',
177					4366 => 'C',
178					4367 => 'K',
179					4368 => 'T',
180					4369 => 'P',
181					4370 => 'H',
182					4449 => 'A',
183					4450 => 'AE',
184					4451 => 'YA',
185					4452 => 'YAE',
186					4453 => 'EO',
187					4454 => 'E',
188					4455 => 'YEO',
189					4456 => 'YE',
190					4457 => 'O',
191					4458 => 'WA',
192					4459 => 'WAE',
193					4460 => 'OE',
194					4461 => 'YO',
195					4462 => 'U',
196					4463 => 'WEO',
197					4464 => 'WE',
198					4465 => 'WI',
199					4466 => 'YU',
200					4467 => 'EU',
201					4468 => 'YI',
202					4469 => 'I',
203					4520 => 'G',
204					4521 => 'GG',
205					4522 => 'GS',
206					4523 => 'N',
207					4524 => 'NJ',
208					4525 => 'NH',
209					4526 => 'D',
210					4527 => 'L',
211					4528 => 'LG',
212					4529 => 'LM',
213					4530 => 'LB',
214					4531 => 'LS',
215					4532 => 'LT',
216					4533 => 'LP',
217					4534 => 'LH',
218					4535 => 'M',
219					4536 => 'B',
220					4537 => 'BS',
221					4538 => 'S',
222					4539 => 'SS',
223					4540 => 'NG',
224					4541 => 'J',
225					4542 => 'C',
226					4543 => 'K',
227					4544 => 'T',
228					4545 => 'P',
229					4546 => 'H',
230
231					);
232
233					# Leading consonant (can be null)
234	1	6µs			my %Jamo_L = (
235					'' => 11,
236					'B' => 7,
237					'BB' => 8,
238					'C' => 14,
239					'D' => 3,
240					'DD' => 4,
241					'G' => 0,
242					'GG' => 1,
243					'H' => 18,
244					'J' => 12,
245					'JJ' => 13,
246					'K' => 15,
247					'M' => 6,
248					'N' => 2,
249					'P' => 17,
250					'R' => 5,
251					'S' => 9,
252					'SS' => 10,
253					'T' => 16,
254
255					);
256
257					# Vowel
258	1	6µs			my %Jamo_V = (
259					'A' => 0,
260					'AE' => 1,
261					'E' => 5,
262					'EO' => 4,
263					'EU' => 18,
264					'I' => 20,
265					'O' => 8,
266					'OE' => 11,
267					'U' => 13,
268					'WA' => 9,
269					'WAE' => 10,
270					'WE' => 15,
271					'WEO' => 14,
272					'WI' => 16,
273					'YA' => 2,
274					'YAE' => 3,
275					'YE' => 7,
276					'YEO' => 6,
277					'YI' => 19,
278					'YO' => 12,
279					'YU' => 17,
280
281					);
282
283					# Optional trailing consonant
284	1	7µs			my %Jamo_T = (
285					'B' => 17,
286					'BS' => 18,
287					'C' => 23,
288					'D' => 7,
289					'G' => 1,
290					'GG' => 2,
291					'GS' => 3,
292					'H' => 27,
293					'J' => 22,
294					'K' => 24,
295					'L' => 8,
296					'LB' => 11,
297					'LG' => 9,
298					'LH' => 15,
299					'LM' => 10,
300					'LP' => 14,
301					'LS' => 12,
302					'LT' => 13,
303					'M' => 16,
304					'N' => 4,
305					'NG' => 21,
306					'NH' => 6,
307					'NJ' => 5,
308					'P' => 26,
309					'S' => 19,
310					'SS' => 20,
311					'T' => 25,
312
313					);
314
315					# Computed re that splits up a Hangul name into LVT or LV syllables
316	1	3µs	1	1µs	my $syllable_re = qr/(\|B\|BB\|C\|D\|DD\|G\|GG\|H\|J\|JJ\|K\|M\|N\|P\|R\|S\|SS\|T)(A\|AE\|E\|EO\|EU\|I\|O\|OE\|U\|WA\|WAE\|WE\|WEO\|WI\|YA\|YAE\|YE\|YEO\|YI\|YO\|YU)(B\|BS\|C\|D\|G\|GG\|GS\|H\|J\|K\|L\|LB\|LG\|LH\|LM\|LP\|LS\|LT\|M\|N\|NG\|NH\|NJ\|P\|S\|SS\|T)?/; # spent 1µs making 1 call to charnames::CORE:qr
317
318	1	200ns			my $HANGUL_SYLLABLE = "HANGUL SYLLABLE ";
319	1	100ns			my $loose_HANGUL_SYLLABLE = "HANGULSYLLABLE";
320
321					# These constants names and values were taken from the Unicode standard,
322					# version 5.1, section 3.12. They are used in conjunction with Hangul
323					# syllables
324	1	100ns			my $SBase = 0xAC00;
325	1	100ns			my $LBase = 0x1100;
326	1	100ns			my $VBase = 0x1161;
327	1	100ns			my $TBase = 0x11A7;
328	1	100ns			my $SCount = 11172;
329	1	100ns			my $LCount = 19;
330	1	100ns			my $VCount = 21;
331	1	100ns			my $TCount = 28;
332	1	1µs			my $NCount = $VCount * $TCount;
333
334					sub name_to_code_point_special {
335					my ($name, $loose) = @_;
336
337					# Returns undef if not one of the specially handled names; otherwise
338					# returns the code point equivalent to the input name
339					# $loose is non-zero if to use loose matching, 'name' in that case
340					# must be input as upper case with all blanks and dashes squeezed out.
341
342					if ((! $loose && $name =~ s/$HANGUL_SYLLABLE//)
343					\|\| ($loose && $name =~ s/$loose_HANGUL_SYLLABLE//))
344					{
345					return if $name !~ qr/^$syllable_re$/;
346					my $L = $Jamo_L{$1};
347					my $V = $Jamo_V{$2};
348					my $T = (defined $3) ? $Jamo_T{$3} : 0;
349					return ($L * $VCount + $V) * $TCount + $T + $SBase;
350					}
351
352					# Name must end in 'code_point' for this to handle.
353					return if (($loose && $name !~ /^ (.*?) ($run_on_code_point_re) $/x)
354					\|\| (! $loose && $name !~ /^ (.*) ($code_point_re) $/x));
355
356					my $base = $1;
357					my $code_point = CORE::hex $2;
358					my $names_ref;
359
360					if ($loose) {
361					$names_ref = \%loose_names_ending_in_code_point;
362					}
363					else {
364					return if $base !~ s/-$//;
365					$names_ref = \%names_ending_in_code_point;
366					}
367
368					# Name must be one of the ones which has the code point in it.
369					return if ! $names_ref->{$base};
370
371					# Look through the list of ranges that apply to this name to see if
372					# the code point is in one of them.
373					for (my $i = 0; $i < scalar @{$names_ref->{$base}{'low'}}; $i++) {
374					return if $names_ref->{$base}{'low'}->[$i] > $code_point;
375					next if $names_ref->{$base}{'high'}->[$i] < $code_point;
376
377					# Here, the code point is in the range.
378					return $code_point;
379					}
380
381					# Here, looked like the name had a code point number in it, but
382					# did not match one of the valid ones.
383					return;
384					}
385
386					sub code_point_to_name_special {
387					my $code_point = shift;
388
389					# Returns the name of a code point if algorithmically determinable;
390					# undef if not
391
392					# If in the Hangul range, calculate the name based on Unicode's
393					# algorithm
394					if ($code_point >= $SBase && $code_point <= $SBase + $SCount -1) {
395	2	154µs	2	22µs	# spent 19µs (17+3) within charnames::BEGIN@395 which was called: # once (17µs+3µs) by charnames::BEGIN@5 at line 395 use integer; # spent 19µs making 1 call to charnames::BEGIN@395 # spent 2µs making 1 call to integer::import
396					my $SIndex = $code_point - $SBase;
397					my $L = $LBase + $SIndex / $NCount;
398					my $V = $VBase + ($SIndex % $NCount) / $TCount;
399					my $T = $TBase + $SIndex % $TCount;
400					$name = "$HANGUL_SYLLABLE$Jamo{$L}$Jamo{$V}";
401					$name .= $Jamo{$T} if $T != $TBase;
402					return $name;
403					}
404
405					# Look through list of these code points for one in range.
406					foreach my $hash (@code_points_ending_in_code_point) {
407					return if $code_point < $hash->{'low'};
408					if ($code_point <= $hash->{'high'}) {
409					return sprintf("%s-%04X", $hash->{'name'}, $code_point);
410					}
411					}
412					return; # None found
413					}
414					} # End closure
415
416	1	19µs			1;