Profile of utf8

Filename	/usr/share/perl/5.10/utf8_heavy.pl
Statements	Executed 13111 statements in 48.6ms

Subroutines
Calls	P	F	Exclusive Time	Inclusive Time	Subroutine
16	12	10	40.2ms	48.4ms	utf8::::SWASHNEWutf8::SWASHNEW (recurses: max depth 1, inclusive time 10.7ms)
8506	11	1	7.54ms	7.54ms	utf8::::CORE:matchutf8::CORE:match (opcode)
3790	6	4	3.35ms	3.35ms	utf8::::is_utf8utf8::is_utf8 (xsub)
1319	1	1	1.62ms	1.62ms	utf8::::decodeutf8::decode (xsub)
15	1	1	537µs	537µs	utf8::::CORE:sortutf8::CORE:sort (opcode)
57	2	2	81µs	81µs	utf8::::encodeutf8::encode (xsub)
32	5	1	63µs	63µs	utf8::::CORE:substutf8::CORE:subst (opcode)
1	1	1	20µs	45µs	utf8::::BEGIN@3utf8::BEGIN@3
1	1	1	20µs	52µs	utf8::::BEGIN@211utf8::BEGIN@211
1	1	1	17µs	24µs	utf8::::BEGIN@2utf8::BEGIN@2
1	1	1	13µs	45µs	utf8::::BEGIN@155utf8::BEGIN@155
1	1	1	13µs	36µs	utf8::::BEGIN@76utf8::BEGIN@76
0	0	0	0s	0s	utf8::::DESTROYutf8::DESTROY
0	0	0	0s	0s	utf8::::croakutf8::croak

Call graph for these subroutines as a Graphviz dot language file.

Line	State ments	Time on line	Calls	Time in subs	Code
1					package utf8;
2	3	52µs	2	32µs	# spent 24µs (17+7) within utf8::BEGIN@2 which was called: # once (17µs+7µs) by utf8::AUTOLOAD at line 2 use strict; # spent 24µs making 1 call to utf8::BEGIN@2 # spent 7µs making 1 call to strict::import
3	3	348µs	2	71µs	# spent 45µs (20+25) within utf8::BEGIN@3 which was called: # once (20µs+25µs) by utf8::AUTOLOAD at line 3 use warnings; # spent 45µs making 1 call to utf8::BEGIN@3 # spent 25µs making 1 call to warnings::import
4
5					sub DEBUG () { 0 }
6
7					sub DESTROY {}
8
9	1	400ns			my %Cache;
10
11	1	900ns			our (%PropertyAlias, %PA_reverse, %PropValueAlias, %PVA_reverse, %PVA_abbr_map);
12
13					sub croak { require Carp; Carp::croak(@_) }
14
15					##
16					## "SWASH" == "SWATCH HASH". A "swatch" is a swatch of the Unicode landscape.
17					## It's a data structure that encodes a set of Unicode characters.
18					##
19
20					# spent 48.4ms (40.2+8.14) within utf8::SWASHNEW which was called 16 times, avg 3.02ms/call: # 3 times (8.48ms+-8.48ms) by utf8::SWASHNEW at line 250, avg 0s/call # 2 times (23.9ms+5.62ms) by DBI::CORE:subst at line 600 of DBI.pm, avg 14.8ms/call # 2 times (102µs+16µs) by DBD::mysql::CORE:match at line 49 of DBD/mysql.pm, avg 59µs/call # once (6.50ms+75µs) by XML::Simple::CORE:match at line 18 of utf8.pm # once (60µs+5.43ms) by C4::ClassSortRoutine::Dewey::CORE:subst at line 84 of /usr/share/koha/lib/C4/ClassSortRoutine/Dewey.pm # once (92µs+5.22ms) by C4::ClassSortRoutine::Dewey::CORE:subst at line 67 of /usr/share/koha/lib/C4/ClassSortRoutine/Dewey.pm # once (556µs+102µs) by MARC::Field::CORE:match at line 198 of MARC/Field.pm # once (172µs+48µs) by C4::Charset::CORE:subst at line 385 of /usr/share/koha/lib/C4/Charset.pm # once (165µs+36µs) by C4::Search::CORE:subst at line 578 of /usr/share/koha/lib/C4/Search.pm # once (60µs+56µs) by C4::ClassSortRoutine::Generic::CORE:subst at line 62 of /usr/share/koha/lib/C4/ClassSortRoutine/Generic.pm # once (93µs+16µs) by C4::Biblio::CORE:subst at line 1436 of /usr/share/koha/lib/C4/Biblio.pm # once (33µs+5µs) by DBD::mysql::CORE:match at line 56 of DBD/mysql.pm sub SWASHNEW {
21	16	32µs			my ($class, $type, $list, $minbits, $none) = @_;
22	16	21µs			local $^D = 0 if $^D;
23
24	16	4µs			print STDERR "SWASHNEW @_\n" if DEBUG;
25
26					##
27					## Get the list of codepoints for the type.
28					## Called from swash_init (see utf8.c) or SWASHNEW itself.
29					##
30					## Callers of swash_init:
31					## op.c:pmtrans -- for tr/// and y///
32					## regexec.c:regclass_swash -- for /[]/, \p, and \P
33					## utf8.c:is_utf8_common -- for common Unicode properties
34					## utf8.c:to_utf8_case -- for lc, uc, ucfirst, etc. and //i
35					##
36					## Given a $type, our goal is to fill $list with the set of codepoint
37					## ranges. If $type is false, $list passed is used.
38					##
39					## $minbits:
40					## For binary properties, $minbits must be 1.
41					## For character mappings (case and transliteration), $minbits must
42					## be a number except 1.
43					##
44					## $list (or that filled according to $type):
45					## Refer to perlunicode.pod, "User-Defined Character Properties."
46					##
47					## For binary properties, only characters with the property value
48					## of True should be listed. The 3rd column, if any, will be ignored.
49					##
50					## To make the parsing of $type clear, this code takes the a rather
51					## unorthodox approach of last'ing out of the block once we have the
52					## info we need. Were this to be a subroutine, the 'last' would just
53					## be a 'return'.
54					##
55	16	7µs			my $file; ## file to load data from, and also part of the %Cache key.
56	16	10µs			my $ListSorted = 0;
57
58	16	8µs			if ($type)
59					{
60	7	44µs	7	16µs	$type =~ s/^\s+//; # spent 16µs making 7 calls to utf8::CORE:subst, avg 2µs/call
61	7	28µs	7	12µs	$type =~ s/\s+$//; # spent 12µs making 7 calls to utf8::CORE:subst, avg 2µs/call
62
63	7	2µs			print STDERR "type = $type\n" if DEBUG;
64
65	7	33µs	7	7µs	GETFILE: # spent 7µs making 7 calls to utf8::CORE:subst, avg 943ns/call
66					{
67					##
68					## It could be a user-defined property.
69					##
70
71	7	4µs			my $caller1 = $type =~ s/(.+)::// ? $1 : caller(1);
72
73	7	43µs	7	18µs	if (defined $caller1 && $type =~ /^(?:\w+)$/) { # spent 18µs making 7 calls to utf8::CORE:match, avg 3µs/call
74	7	11µs			my $prop = "${caller1}::$type";
75	7	20µs			if (exists &{$prop}) {
76	3	410µs	2	58µs	# spent 36µs (13+23) within utf8::BEGIN@76 which was called: # once (13µs+23µs) by utf8::AUTOLOAD at line 76 no strict 'refs'; # spent 36µs making 1 call to utf8::BEGIN@76 # spent 23µs making 1 call to strict::unimport
77
78					$list = &{$prop};
79					last GETFILE;
80					}
81					}
82
83	7	3µs			my $wasIs;
84
85	7	49µs	10	26µs	($wasIs = $type =~ s/^Is(?:\s+\|[-_])?//i) # spent 26µs making 10 calls to utf8::CORE:subst, avg 3µs/call
86					or
87					$type =~ s/^(?:(?:General(?:\s+\|_)?)?Category\|gc)\s[:=]\s//i
88					or
89					$type =~ s/^(?:Script\|sc)\s[:=]\s//i
90					or
91					$type =~ s/^Block\s[:=]\s/In/i;
92
93
94					##
95					## See if it's in some enumeration.
96					##
97	7	4.63ms			require "unicore/PVA.pl";
98	7	48µs	7	16µs	if ($type =~ /^([\w\s]+)[:=]\s(.)/) { # spent 16µs making 7 calls to utf8::CORE:match, avg 2µs/call
99					my ($enum, $val) = (lc $1, lc $2);
100					$enum =~ tr/ _-//d;
101					$val =~ tr/ _-//d;
102
103					my $pa = $PropertyAlias{$enum} ? $enum : $PA_reverse{$enum};
104					my $f = $PropValueAlias{$pa}{$val} ? $val : $PVA_reverse{$pa}{lc $val};
105
106					if ($pa and $f) {
107					$pa = "gc_sc" if $pa eq "gc" or $pa eq "sc";
108					$file = "unicore/lib/$pa/$PVA_abbr_map{$pa}{lc $f}.pl";
109					last GETFILE;
110					}
111					}
112					else {
113	7	11µs			my $t = lc $type;
114	7	25µs			$t =~ tr/ _-//d;
115
116	7	32µs			if ($PropValueAlias{gc}{$t} or $PropValueAlias{sc}{$t}) {
117					$file = "unicore/lib/gc_sc/$PVA_abbr_map{gc_sc}{$t}.pl";
118					last GETFILE;
119					}
120					}
121
122					##
123					## See if it's in the direct mapping table.
124					##
125	7	362µs			require "unicore/Exact.pl";
126	7	14µs			if (my $base = $utf8::Exact{$type}) {
127	6	7µs			$file = "unicore/lib/gc_sc/$base.pl";
128	6	12µs			last GETFILE;
129					}
130
131					##
132					## If not there exactly, try the canonical form. The canonical
133					## form is lowercased, with any separators (\s+\|[-_]) removed.
134					##
135	1	1µs			my $canonical = lc $type;
136	1	14µs	1	3µs	$canonical =~ s/(?<=[a-z\d])(?:\s+\|[-_])(?=[a-z\d])//g; # spent 3µs making 1 call to utf8::CORE:subst
137	1	200ns			print STDERR "canonical = $canonical\n" if DEBUG;
138
139	1	1.55ms			require "unicore/Canonical.pl";
140	1	4µs			if (my $base = ($utf8::Canonical{$canonical} \|\| $utf8::Canonical{ lc $utf8::PropertyAlias{$canonical} })) {
141					$file = "unicore/lib/gc_sc/$base.pl";
142					last GETFILE;
143					}
144
145					##
146					## See if it's a user-level "To".
147					##
148
149	1	2µs			my $caller0 = caller(0);
150
151	1	20µs	1	8µs	if (defined $caller0 && $type =~ /^To(?:\w+)$/) { # spent 8µs making 1 call to utf8::CORE:match
152	1	2µs			my $map = $caller0 . "::" . $type;
153
154	1	3µs			if (exists &{$map}) {
155	3	350µs	2	76µs	# spent 45µs (13+31) within utf8::BEGIN@155 which was called: # once (13µs+31µs) by utf8::AUTOLOAD at line 155 no strict 'refs'; # spent 45µs making 1 call to utf8::BEGIN@155 # spent 31µs making 1 call to strict::unimport
156
157					$list = &{$map};
158					last GETFILE;
159					}
160					}
161
162					##
163					## Last attempt -- see if it's a standard "To" name
164					## (e.g. "ToLower") ToTitle is used by ucfirst().
165					## The user-level way to access ToDigit() and ToFold()
166					## is to use Unicode::UCD.
167					##
168	1	8µs	1	4µs	if ($type =~ /^To(Digit\|Fold\|Lower\|Title\|Upper)$/) { # spent 4µs making 1 call to utf8::CORE:match
169	1	5µs			$file = "unicore/To/$1.pl";
170					## would like to test to see if $file actually exists....
171	1	4µs			last GETFILE;
172					}
173
174					##
175					## If we reach this line, it's because we couldn't figure
176					## out what to do with $type. Ouch.
177					##
178
179					return $type;
180					}
181
182	7	6µs			if (defined $file) {
183	7	2µs			print STDERR "found it (file='$file')\n" if DEBUG;
184
185					##
186					## If we reach here, it was due to a 'last GETFILE' above
187					## (exception: user-defined properties and mappings), so we
188					## have a filename, so now we load it if we haven't already.
189					## If we have, return the cached results. The cache key is the
190					## class and file to load.
191					##
192	7	14µs			my $found = $Cache{$class, $file};
193	7	5µs			if ($found and ref($found) eq $class) {
194	1	200ns			print STDERR "Returning cached '$file' for \\p{$type}\n" if DEBUG;
195	1	3µs			return $found;
196					}
197
198	12	2.85ms			$list = do $file; die $@ if $@;
199					}
200
201	6	6µs			$ListSorted = 1; ## we know that these lists are sorted
202					}
203
204	15	6µs			my $extras;
205	15	9µs			my $bits = $minbits;
206
207	15	16µs			my $ORIG = $list;
208	15	505µs			if ($list) {
209	15	869µs			my @tmp = split(/^/m, $list);
210	15	5µs			my %seen;
211	3	692µs	2	84µs	# spent 52µs (20+32) within utf8::BEGIN@211 which was called: # once (20µs+32µs) by utf8::AUTOLOAD at line 211 no warnings; # spent 52µs making 1 call to utf8::BEGIN@211 # spent 32µs making 1 call to warnings::unimport
212	15	3.83ms	2485	1.08ms	$extras = join '', grep /^[^0-9a-fA-F]/, @tmp; # spent 1.08ms making 2485 calls to utf8::CORE:match, avg 435ns/call
213					$list = join '',
214					map { $_->[1] }
215	2473	6.18ms	2473	2.43ms	sort { $a->[0] <=> $b->[0] } # spent 2.43ms making 2473 calls to utf8::CORE:match, avg 981ns/call
216	4958	14.1ms	2485	2.35ms	map { /^([0-9a-fA-F]+)/; [ CORE::hex($1), $_ ] } # spent 2.35ms making 2485 calls to utf8::CORE:match, avg 947ns/call
217	15	4.15ms	15	537µs	grep { /^([0-9a-fA-F]+)/ and not $seen{$1}++ } @tmp; # XXX doesn't do ranges right # spent 537µs making 15 calls to utf8::CORE:sort, avg 36µs/call
218					}
219
220	15	9µs			if ($none) {
221					my $hextra = sprintf "%04x", $none + 1;
222					$list =~ s/\tXXXX$/\t$hextra/mg;
223					}
224
225	15	23µs			if ($minbits != 1 && $minbits < 32) { # not binary property
226	1	2µs			my $top = 0;
227	1	20µs	1	9µs	while ($list =~ /^([0-9a-fA-F]+)(?:[\t]([0-9a-fA-F]+)?)(?:[ \t]([0-9a-fA-F]+))?/mg) { # spent 9µs making 1 call to utf8::CORE:match
228	1007	804µs			my $min = CORE::hex $1;
229	1007	482µs			my $max = defined $2 ? CORE::hex $2 : $min;
230	1007	654µs			my $val = defined $3 ? CORE::hex $3 : 0;
231	1007	635µs			$val += $max - $min if defined $3;
232	1007	3.55ms	1007	1.57ms	$top = $val if $val > $top; # spent 1.57ms making 1007 calls to utf8::CORE:match, avg 2µs/call
233					}
234	1	1µs			my $topbits =
235					$top > 0xffff ? 32 :
236					$top > 0xff ? 16 : 8;
237	1	1µs			$bits = $topbits if $bits < $topbits;
238					}
239
240	15	10µs			my @extras;
241	15	27µs			for my $x ($extras) {
242	15	51µs			pos $x = 0;
243	15	97µs	15	25µs	while ($x =~ /^([^0-9a-fA-F\n])(.*)/mg) { # spent 25µs making 15 calls to utf8::CORE:match, avg 2µs/call
244	12	20µs			my $char = $1;
245	12	13µs			my $name = $2;
246	12	2µs			print STDERR "$1 => $2\n" if DEBUG;
247	12	73µs	24	23µs	if ($char =~ /[-+!&]/) { # spent 23µs making 24 calls to utf8::CORE:match, avg 975ns/call
248	3	7µs			my ($c,$t) = split(/::/, $name, 2); # bogus use of ::, really
249	3	800ns			my $subobj;
250	3	31µs	3	0s	if ($c eq 'utf8') { # spent 10.7ms making 3 calls to utf8::SWASHNEW, avg 3.56ms/call, recursion: max depth 1, sum of overlapping time 10.7ms
251					$subobj = utf8->SWASHNEW($t, "", $minbits, 0);
252					}
253					elsif (exists &$name) {
254					$subobj = utf8->SWASHNEW($name, "", $minbits, 0);
255					}
256					elsif ($c =~ /^([0-9a-fA-F]+)/) {
257					$subobj = utf8->SWASHNEW("", $c, $minbits, 0);
258					}
259	3	2µs			return $subobj unless ref $subobj;
260	3	4µs			push @extras, $name => $subobj;
261	3	5µs			$bits = $subobj->{BITS} if $bits < $subobj->{BITS};
262					}
263					}
264					}
265
266	15	3µs			print STDERR "CLASS = $class, TYPE => $type, BITS => $bits, NONE => $none\nEXTRAS =>\n$extras\nLIST =>\n$list\n" if DEBUG;
267
268	15	453µs			my $SWASH = bless {
269					TYPE => $type,
270					BITS => $bits,
271					EXTRAS => $extras,
272					LIST => $list,
273					NONE => $none,
274					@extras,
275					} => $class;
276
277	15	23µs			if ($file) {
278					$Cache{$class, $file} = $SWASH;
279					}
280
281	15	94µs			return $SWASH;
282					}
283
284					# Now SWASHGET is recasted into a C function S_swash_get (see utf8.c).
285
286	1	5µs			1;

					# spent 7.54ms within utf8::CORE:match which was called 8506 times, avg 886ns/call: # 2485 times (2.35ms+0s) by utf8::SWASHNEW at line 216, avg 947ns/call # 2485 times (1.08ms+0s) by utf8::SWASHNEW at line 212, avg 435ns/call # 2473 times (2.43ms+0s) by utf8::SWASHNEW at line 215, avg 981ns/call # 1007 times (1.57ms+0s) by utf8::SWASHNEW at line 232, avg 2µs/call # 24 times (23µs+0s) by utf8::SWASHNEW at line 247, avg 975ns/call # 15 times (25µs+0s) by utf8::SWASHNEW at line 243, avg 2µs/call # 7 times (18µs+0s) by utf8::SWASHNEW at line 73, avg 3µs/call # 7 times (16µs+0s) by utf8::SWASHNEW at line 98, avg 2µs/call # once (9µs+0s) by utf8::SWASHNEW at line 227 # once (8µs+0s) by utf8::SWASHNEW at line 151 # once (4µs+0s) by utf8::SWASHNEW at line 168 sub utf8::CORE:match; # opcode
					# spent 537µs within utf8::CORE:sort which was called 15 times, avg 36µs/call: # 15 times (537µs+0s) by utf8::SWASHNEW at line 217, avg 36µs/call sub utf8::CORE:sort; # opcode
					# spent 63µs within utf8::CORE:subst which was called 32 times, avg 2µs/call: # 10 times (26µs+0s) by utf8::SWASHNEW at line 85, avg 3µs/call # 7 times (16µs+0s) by utf8::SWASHNEW at line 60, avg 2µs/call # 7 times (12µs+0s) by utf8::SWASHNEW at line 61, avg 2µs/call # 7 times (7µs+0s) by utf8::SWASHNEW at line 65, avg 943ns/call # once (3µs+0s) by utf8::SWASHNEW at line 136 sub utf8::CORE:subst; # opcode
					# spent 1.62ms within utf8::decode which was called 1319 times, avg 1µs/call: # 1319 times (1.62ms+0s) by C4::Charset::NormalizeString at line 175 of /usr/share/koha/lib/C4/Charset.pm, avg 1µs/call sub utf8::decode; # xsub
					# spent 81µs within utf8::encode which was called 57 times, avg 1µs/call: # 51 times (64µs+0s) by C4::Templates::utf8_hashref at line 163 of /usr/share/koha/lib/C4/Templates.pm, avg 1µs/call # 6 times (17µs+0s) by JSON::PP::string_to_json at line 509 of JSON/PP.pm, avg 3µs/call sub utf8::encode; # xsub
					# spent 3.35ms within utf8::is_utf8 which was called 3790 times, avg 884ns/call: # 2284 times (1.88ms+0s) by C4::Templates::utf8_hashref at line 163 of /usr/share/koha/lib/C4/Templates.pm, avg 825ns/call # 1319 times (1.30ms+0s) by C4::Charset::NormalizeString at line 175 of /usr/share/koha/lib/C4/Charset.pm, avg 984ns/call # 149 times (100µs+0s) by C4::Templates::output at line 127 of /usr/share/koha/lib/C4/Templates.pm, avg 672ns/call # 27 times (49µs+0s) by Template::Filters::url_filter at line 303 of Template/Filters.pm, avg 2µs/call # 10 times (18µs+0s) by CGI::Util::escape at line 251 of CGI/Util.pm, avg 2µs/call # once (3µs+0s) by Template::Filters::uri_filter at line 278 of Template/Filters.pm sub utf8::is_utf8; # xsub