Profile of charnames.pm

Filename	/usr/share/perl/5.10/charnames.pm
Statements	Executed 40 statements in 1.93ms

Subroutines
Calls	P	F	Exclusive Time	Inclusive Time	Subroutine
1	1	1	37µs	2.31ms	charnames::::importcharnames::import
1	1	1	22µs	28µs	charnames::::BEGIN@2charnames::BEGIN@2
1	1	1	18µs	23µs	charnames::::BEGIN@174charnames::BEGIN@174
1	1	1	17µs	22µs	charnames::::BEGIN@4charnames::BEGIN@4
1	1	1	13µs	33µs	charnames::::BEGIN@183charnames::BEGIN@183
1	1	1	11µs	27µs	charnames::::BEGIN@3charnames::BEGIN@3
1	1	1	5µs	5µs	charnames::::BEGIN@7charnames::BEGIN@7
0	0	0	0s	0s	charnames::::aliascharnames::alias
0	0	0	0s	0s	charnames::::alias_filecharnames::alias_file
0	0	0	0s	0s	charnames::::carpcharnames::carp
0	0	0	0s	0s	charnames::::charnamescharnames::charnames
0	0	0	0s	0s	charnames::::croakcharnames::croak
0	0	0	0s	0s	charnames::::viacodecharnames::viacode
0	0	0	0s	0s	charnames::::vianamecharnames::vianame

Call graph for these subroutines as a Graphviz dot language file.

Line	State ments	Time on line	Calls	Time in subs	Code
1					package charnames;
2	3	29µs	2	33µs	# spent 28µs (22+5) within charnames::BEGIN@2 which was called: # once (22µs+5µs) by MARC::Charset::BEGIN@12 at line 2 use strict; # spent 28µs making 1 call to charnames::BEGIN@2 # spent 5µs making 1 call to strict::import
3	3	28µs	2	43µs	# spent 27µs (11+16) within charnames::BEGIN@3 which was called: # once (11µs+16µs) by MARC::Charset::BEGIN@12 at line 3 use warnings; # spent 27µs making 1 call to charnames::BEGIN@3 # spent 16µs making 1 call to warnings::import
4	3	44µs	2	26µs	# spent 22µs (17+4) within charnames::BEGIN@4 which was called: # once (17µs+4µs) by MARC::Charset::BEGIN@12 at line 4 use File::Spec; # spent 22µs making 1 call to charnames::BEGIN@4 # spent 4µs making 1 call to UNIVERSAL::import
5	1	700ns			our $VERSION = '1.07';
6
7	3	863µs	1	5µs	# spent 5µs within charnames::BEGIN@7 which was called: # once (5µs+0s) by MARC::Charset::BEGIN@12 at line 7 use bytes (); # for $bytes::hint_bits # spent 5µs making 1 call to charnames::BEGIN@7
8
9	1	10µs			my %alias1 = (
10					# Icky 3.2 names with parentheses.
11					'LINE FEED' => 'LINE FEED (LF)',
12					'FORM FEED' => 'FORM FEED (FF)',
13					'CARRIAGE RETURN' => 'CARRIAGE RETURN (CR)',
14					'NEXT LINE' => 'NEXT LINE (NEL)',
15					# Convenience.
16					'LF' => 'LINE FEED (LF)',
17					'FF' => 'FORM FEED (FF)',
18					'CR' => 'CARRIAGE RETURN (CR)',
19					'NEL' => 'NEXT LINE (NEL)',
20					# More convenience. For futher convencience,
21					# it is suggested some way using using the NamesList
22					# aliases is implemented.
23					'ZWNJ' => 'ZERO WIDTH NON-JOINER',
24					'ZWJ' => 'ZERO WIDTH JOINER',
25					'BOM' => 'BYTE ORDER MARK',
26					);
27
28	1	6µs			my %alias2 = (
29					# Pre-3.2 compatibility (only for the first 256 characters).
30					'HORIZONTAL TABULATION' => 'CHARACTER TABULATION',
31					'VERTICAL TABULATION' => 'LINE TABULATION',
32					'FILE SEPARATOR' => 'INFORMATION SEPARATOR FOUR',
33					'GROUP SEPARATOR' => 'INFORMATION SEPARATOR THREE',
34					'RECORD SEPARATOR' => 'INFORMATION SEPARATOR TWO',
35					'UNIT SEPARATOR' => 'INFORMATION SEPARATOR ONE',
36					'PARTIAL LINE DOWN' => 'PARTIAL LINE FORWARD',
37					'PARTIAL LINE UP' => 'PARTIAL LINE BACKWARD',
38					);
39
40	1	300ns			my %alias3 = (
41					# User defined aliasses. Even more convenient :)
42					);
43	1	300ns			my $txt;
44
45					sub croak
46					{
47					require Carp; goto &Carp::croak;
48					} # croak
49
50					sub carp
51					{
52					require Carp; goto &Carp::carp;
53					} # carp
54
55					sub alias (@)
56					{
57					@_ or return %alias3;
58					my $alias = ref $_[0] ? $_[0] : { @_ };
59					@alias3{keys %$alias} = values %$alias;
60					} # alias
61
62					sub alias_file ($)
63					{
64					my ($arg, $file) = @_;
65					if (-f $arg && File::Spec->file_name_is_absolute ($arg)) {
66					$file = $arg;
67					}
68					elsif ($arg =~ m/^\w+$/) {
69					$file = "unicore/${arg}_alias.pl";
70					}
71					else {
72					croak "Charnames alias files can only have identifier characters";
73					}
74					if (my @alias = do $file) {
75					@alias == 1 && !defined $alias[0] and
76					croak "$file cannot be used as alias file for charnames";
77					@alias % 2 and
78					croak "$file did not return a (valid) list of alias pairs";
79					alias (@alias);
80					return (1);
81					}
82					0;
83					} # alias_file
84
85					# This is not optimized in any way yet
86					sub charnames
87					{
88					my $name = shift;
89
90					if (exists $alias1{$name}) {
91					$name = $alias1{$name};
92					}
93					elsif (exists $alias2{$name}) {
94					require warnings;
95					warnings::warnif('deprecated', qq{Unicode character name "$name" is deprecated, use "$alias2{$name}" instead});
96					$name = $alias2{$name};
97					}
98					elsif (exists $alias3{$name}) {
99					$name = $alias3{$name};
100					}
101
102					my $ord;
103					my @off;
104					my $fname;
105
106					if ($name eq "BYTE ORDER MARK") {
107					$fname = $name;
108					$ord = 0xFEFF;
109					} else {
110					## Suck in the code/name list as a big string.
111					## Lines look like:
112					## "0052\t\tLATIN CAPITAL LETTER R\n"
113					$txt = do "unicore/Name.pl" unless $txt;
114
115					## @off will hold the index into the code/name string of the start and
116					## end of the name as we find it.
117
118					## If :full, look for the name exactly
119					if ($^H{charnames_full} and $txt =~ /\t\t\Q$name\E$/m) {
120					@off = ($-[0], $+[0]);
121					}
122
123					## If we didn't get above, and :short allowed, look for the short name.
124					## The short name is like "greek:Sigma"
125					unless (@off) {
126					if ($^H{charnames_short} and $name =~ /^(.+?):(.+)/s) {
127					my ($script, $cname) = ($1, $2);
128					my $case = $cname =~ /[[:upper:]]/ ? "CAPITAL" : "SMALL";
129					if ($txt =~ m/\t\t\U$script\E (?:$case )?LETTER \U\Q$cname\E$/m) {
130					@off = ($-[0], $+[0]);
131					}
132					}
133					}
134
135					## If we still don't have it, check for the name among the loaded
136					## scripts.
137					if (not @off) {
138					my $case = $name =~ /[[:upper:]]/ ? "CAPITAL" : "SMALL";
139					for my $script (@{$^H{charnames_scripts}}) {
140					if ($txt =~ m/\t\t$script (?:$case )?LETTER \U\Q$name\E$/m) {
141					@off = ($-[0], $+[0]);
142					last;
143					}
144					}
145					}
146
147					## If we don't have it by now, give up.
148					unless (@off) {
149					carp "Unknown charname '$name'";
150					return "\x{FFFD}";
151					}
152
153					##
154					## Now know where in the string the name starts.
155					## The code, in hex, is before that.
156					##
157					## The code can be 4-6 characters long, so we've got to sort of
158					## go look for it, just after the newline that comes before $off[0].
159					##
160					## This would be much easier if unicore/Name.pl had info in
161					## a name/code order, instead of code/name order.
162					##
163					## The +1 after the rindex() is to skip past the newline we're finding,
164					## or, if the rindex() fails, to put us to an offset of zero.
165					##
166					my $hexstart = rindex($txt, "\n", $off[0]) + 1;
167
168					## we know where it starts, so turn into number -
169					## the ordinal for the char.
170					$ord = CORE::hex substr($txt, $hexstart, $off[0] - $hexstart);
171					}
172
173					if ($^H & $bytes::hint_bits) { # "use bytes" in effect?
174	3	96µs	2	28µs	# spent 23µs (18+5) within charnames::BEGIN@174 which was called: # once (18µs+5µs) by MARC::Charset::BEGIN@12 at line 174 use bytes; # spent 23µs making 1 call to charnames::BEGIN@174 # spent 5µs making 1 call to bytes::import
175					return chr $ord if $ord <= 255;
176					my $hex = sprintf "%04x", $ord;
177					if (not defined $fname) {
178					$fname = substr $txt, $off[0] + 2, $off[1] - $off[0] - 2;
179					}
180					croak "Character 0x$hex with name '$fname' is above 0xFF";
181					}
182
183	3	803µs	2	53µs	# spent 33µs (13+20) within charnames::BEGIN@183 which was called: # once (13µs+20µs) by MARC::Charset::BEGIN@12 at line 183 no warnings 'utf8'; # allow even illegal characters # spent 33µs making 1 call to charnames::BEGIN@183 # spent 20µs making 1 call to warnings::unimport
184					return pack "U", $ord;
185					} # charnames
186
187					sub import
188					# spent 2.31ms (37µs+2.27) within charnames::import which was called: # once (37µs+2.27ms) by MARC::Charset::BEGIN@12 at line 12 of MARC/Charset.pm {
189	14	36µs			shift; ## ignore class name
190
191					if (not @_) {
192					carp("`use charnames' needs explicit imports list");
193					}
194					$^H{charnames} = \&charnames ;
195
196					##
197					## fill %h keys with our @_ args.
198					##
199					my ($promote, %h, @args) = (0);
200					while (my $arg = shift) {
201					if ($arg eq ":alias") {
202					@_ or
203					croak ":alias needs an argument in charnames";
204					my $alias = shift;
205					if (ref $alias) {
206					ref $alias eq "HASH" or
207					croak "Only HASH reference supported as argument to :alias";
208					alias ($alias);
209					next;
210					}
211					if ($alias =~ m{:(\w+)$}) {
212					$1 eq "full" \|\| $1 eq "short" and
213					croak ":alias cannot use existing pragma :$1 (reversed order?)";
214					alias_file ($1) and $promote = 1;
215					next;
216					}
217					alias_file ($alias);
218					next;
219					}
220					if (substr($arg, 0, 1) eq ':' and ! ($arg eq ":full" \|\| $arg eq ":short")) {
221					warn "unsupported special '$arg' in charnames";
222					next;
223					}
224					push @args, $arg;
225					}
226					@args == 0 && $promote and @args = (":full");
227					@h{@args} = (1) x @args;
228
229					$^H{charnames_full} = delete $h{':full'};
230					$^H{charnames_short} = delete $h{':short'};
231					$^H{charnames_scripts} = [map uc, keys %h];
232
233					##
234					## If utf8? warnings are enabled, and some scripts were given,
235					## see if at least we can find one letter of each script.
236					##
237			1	2.27ms	if (warnings::enabled('utf8') && @{$^H{charnames_scripts}}) { # spent 2.27ms making 1 call to warnings::enabled
238					$txt = do "unicore/Name.pl" unless $txt;
239
240					for my $script (@{$^H{charnames_scripts}}) {
241					if (not $txt =~ m/\t\t$script (?:CAPITAL \|SMALL )?LETTER /) {
242					warnings::warn('utf8', "No such script: '$script'");
243					}
244					}
245					}
246					} # import
247
248	1	200ns			my %viacode;
249
250					sub viacode
251					{
252					if (@_ != 1) {
253					carp "charnames::viacode() expects one argument";
254					return;
255					}
256
257					my $arg = shift;
258
259					# this comes actually from Unicode::UCD, where it is the named
260					# function _getcode (), but it avoids the overhead of loading it
261					my $hex;
262					if ($arg =~ /^[1-9]\d*$/) {
263					$hex = sprintf "%04X", $arg;
264					} elsif ($arg =~ /^(?:[Uu]\+\|0[xX])?([[:xdigit:]]+)$/) {
265					$hex = $1;
266					} else {
267					carp("unexpected arg \"$arg\" to charnames::viacode()");
268					return;
269					}
270
271					# checking the length first is slightly faster
272					if (length($hex) > 5 && hex($hex) > 0x10FFFF) {
273					carp "Unicode characters only allocated up to U+10FFFF (you asked for U+$hex)";
274					return;
275					}
276
277					return $viacode{$hex} if exists $viacode{$hex};
278
279					$txt = do "unicore/Name.pl" unless $txt;
280
281					return unless $txt =~ m/^$hex\t\t(.+)/m;
282
283					$viacode{$hex} = $1;
284					} # viacode
285
286	1	200ns			my %vianame;
287
288					sub vianame
289					{
290					if (@_ != 1) {
291					carp "charnames::vianame() expects one name argument";
292					return ()
293					}
294
295					my $arg = shift;
296
297					return chr CORE::hex $1 if $arg =~ /^U\+([0-9a-fA-F]+)$/;
298
299					return $vianame{$arg} if exists $vianame{$arg};
300
301					$txt = do "unicore/Name.pl" unless $txt;
302
303					my $pos = index $txt, "\t\t$arg\n";
304					if ($[ <= $pos) {
305					my $posLF = rindex $txt, "\n", $pos;
306					(my $code = substr $txt, $posLF + 1, 6) =~ tr/\t//d;
307					return $vianame{$arg} = CORE::hex $code;
308
309					# If $pos is at the 1st line, $posLF must be $[ - 1 (not found);
310					# then $posLF + 1 equals to $[ (at the beginning of $txt).
311					# Otherwise $posLF is the position of "\n";
312					# then $posLF + 1 must be the position of the next to "\n"
313					# (the beginning of the line).
314					# substr($txt, $posLF + 1, 6) may be "0000\t\t", "00A1\t\t",
315					# "10300\t", "100000", etc. So we can get the code via removing TAB.
316					} else {
317					return;
318					}
319					} # vianame
320
321
322	1	9µs			1;
323					__END__