Filename | /usr/share/perl/5.10/utf8_heavy.pl |
Statements | Executed 13111 statements in 48.6ms |
Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
---|---|---|---|---|---|
16 | 12 | 10 | 40.2ms | 48.4ms | SWASHNEW (recurses: max depth 1, inclusive time 10.7ms) | utf8::
8506 | 11 | 1 | 7.54ms | 7.54ms | CORE:match (opcode) | utf8::
3790 | 6 | 4 | 3.35ms | 3.35ms | is_utf8 (xsub) | utf8::
1319 | 1 | 1 | 1.62ms | 1.62ms | decode (xsub) | utf8::
15 | 1 | 1 | 537µs | 537µs | CORE:sort (opcode) | utf8::
57 | 2 | 2 | 81µs | 81µs | encode (xsub) | utf8::
32 | 5 | 1 | 63µs | 63µs | CORE:subst (opcode) | utf8::
1 | 1 | 1 | 20µs | 45µs | BEGIN@3 | utf8::
1 | 1 | 1 | 20µs | 52µs | BEGIN@211 | utf8::
1 | 1 | 1 | 17µs | 24µs | BEGIN@2 | utf8::
1 | 1 | 1 | 13µs | 45µs | BEGIN@155 | utf8::
1 | 1 | 1 | 13µs | 36µs | BEGIN@76 | utf8::
0 | 0 | 0 | 0s | 0s | DESTROY | utf8::
0 | 0 | 0 | 0s | 0s | croak | utf8::
Line | State ments |
Time on line |
Calls | Time in subs |
Code |
---|---|---|---|---|---|
1 | package utf8; | ||||
2 | 3 | 52µs | 2 | 32µs | # spent 24µs (17+7) within utf8::BEGIN@2 which was called:
# once (17µs+7µs) by utf8::AUTOLOAD at line 2 # spent 24µs making 1 call to utf8::BEGIN@2
# spent 7µs making 1 call to strict::import |
3 | 3 | 348µs | 2 | 71µs | # spent 45µs (20+25) within utf8::BEGIN@3 which was called:
# once (20µs+25µs) by utf8::AUTOLOAD at line 3 # spent 45µs making 1 call to utf8::BEGIN@3
# spent 25µs making 1 call to warnings::import |
4 | |||||
5 | sub DEBUG () { 0 } | ||||
6 | |||||
7 | sub DESTROY {} | ||||
8 | |||||
9 | 1 | 400ns | my %Cache; | ||
10 | |||||
11 | 1 | 900ns | our (%PropertyAlias, %PA_reverse, %PropValueAlias, %PVA_reverse, %PVA_abbr_map); | ||
12 | |||||
13 | sub croak { require Carp; Carp::croak(@_) } | ||||
14 | |||||
15 | ## | ||||
16 | ## "SWASH" == "SWATCH HASH". A "swatch" is a swatch of the Unicode landscape. | ||||
17 | ## It's a data structure that encodes a set of Unicode characters. | ||||
18 | ## | ||||
19 | |||||
20 | # spent 48.4ms (40.2+8.14) within utf8::SWASHNEW which was called 16 times, avg 3.02ms/call:
# 3 times (8.48ms+-8.48ms) by utf8::SWASHNEW at line 250, avg 0s/call
# 2 times (23.9ms+5.62ms) by DBI::CORE:subst at line 600 of DBI.pm, avg 14.8ms/call
# 2 times (102µs+16µs) by DBD::mysql::CORE:match at line 49 of DBD/mysql.pm, avg 59µs/call
# once (6.50ms+75µs) by XML::Simple::CORE:match at line 18 of utf8.pm
# once (60µs+5.43ms) by C4::ClassSortRoutine::Dewey::CORE:subst at line 84 of /usr/share/koha/lib/C4/ClassSortRoutine/Dewey.pm
# once (92µs+5.22ms) by C4::ClassSortRoutine::Dewey::CORE:subst at line 67 of /usr/share/koha/lib/C4/ClassSortRoutine/Dewey.pm
# once (556µs+102µs) by MARC::Field::CORE:match at line 198 of MARC/Field.pm
# once (172µs+48µs) by C4::Charset::CORE:subst at line 385 of /usr/share/koha/lib/C4/Charset.pm
# once (165µs+36µs) by C4::Search::CORE:subst at line 578 of /usr/share/koha/lib/C4/Search.pm
# once (60µs+56µs) by C4::ClassSortRoutine::Generic::CORE:subst at line 62 of /usr/share/koha/lib/C4/ClassSortRoutine/Generic.pm
# once (93µs+16µs) by C4::Biblio::CORE:subst at line 1436 of /usr/share/koha/lib/C4/Biblio.pm
# once (33µs+5µs) by DBD::mysql::CORE:match at line 56 of DBD/mysql.pm | ||||
21 | 276 | 1.26ms | my ($class, $type, $list, $minbits, $none) = @_; | ||
22 | local $^D = 0 if $^D; | ||||
23 | |||||
24 | print STDERR "SWASHNEW @_\n" if DEBUG; | ||||
25 | |||||
26 | ## | ||||
27 | ## Get the list of codepoints for the type. | ||||
28 | ## Called from swash_init (see utf8.c) or SWASHNEW itself. | ||||
29 | ## | ||||
30 | ## Callers of swash_init: | ||||
31 | ## op.c:pmtrans -- for tr/// and y/// | ||||
32 | ## regexec.c:regclass_swash -- for /[]/, \p, and \P | ||||
33 | ## utf8.c:is_utf8_common -- for common Unicode properties | ||||
34 | ## utf8.c:to_utf8_case -- for lc, uc, ucfirst, etc. and //i | ||||
35 | ## | ||||
36 | ## Given a $type, our goal is to fill $list with the set of codepoint | ||||
37 | ## ranges. If $type is false, $list passed is used. | ||||
38 | ## | ||||
39 | ## $minbits: | ||||
40 | ## For binary properties, $minbits must be 1. | ||||
41 | ## For character mappings (case and transliteration), $minbits must | ||||
42 | ## be a number except 1. | ||||
43 | ## | ||||
44 | ## $list (or that filled according to $type): | ||||
45 | ## Refer to perlunicode.pod, "User-Defined Character Properties." | ||||
46 | ## | ||||
47 | ## For binary properties, only characters with the property value | ||||
48 | ## of True should be listed. The 3rd column, if any, will be ignored. | ||||
49 | ## | ||||
50 | ## To make the parsing of $type clear, this code takes the a rather | ||||
51 | ## unorthodox approach of last'ing out of the block once we have the | ||||
52 | ## info we need. Were this to be a subroutine, the 'last' would just | ||||
53 | ## be a 'return'. | ||||
54 | ## | ||||
55 | my $file; ## file to load data from, and also part of the %Cache key. | ||||
56 | my $ListSorted = 0; | ||||
57 | |||||
58 | 41 | 90µs | if ($type) | ||
59 | { | ||||
60 | 7 | 16µs | $type =~ s/^\s+//; # spent 16µs making 7 calls to utf8::CORE:subst, avg 2µs/call | ||
61 | 7 | 12µs | $type =~ s/\s+$//; # spent 12µs making 7 calls to utf8::CORE:subst, avg 2µs/call | ||
62 | |||||
63 | print STDERR "type = $type\n" if DEBUG; | ||||
64 | |||||
65 | 64 | 6.79ms | 7 | 7µs | GETFILE: # spent 7µs making 7 calls to utf8::CORE:subst, avg 943ns/call |
66 | { | ||||
67 | ## | ||||
68 | ## It could be a user-defined property. | ||||
69 | ## | ||||
70 | |||||
71 | my $caller1 = $type =~ s/(.+)::// ? $1 : caller(1); | ||||
72 | |||||
73 | 14 | 31µs | 7 | 18µs | if (defined $caller1 && $type =~ /^(?:\w+)$/) { # spent 18µs making 7 calls to utf8::CORE:match, avg 3µs/call |
74 | my $prop = "${caller1}::$type"; | ||||
75 | if (exists &{$prop}) { | ||||
76 | 3 | 410µs | 2 | 58µs | # spent 36µs (13+23) within utf8::BEGIN@76 which was called:
# once (13µs+23µs) by utf8::AUTOLOAD at line 76 # spent 36µs making 1 call to utf8::BEGIN@76
# spent 23µs making 1 call to strict::unimport |
77 | |||||
78 | $list = &{$prop}; | ||||
79 | last GETFILE; | ||||
80 | } | ||||
81 | } | ||||
82 | |||||
83 | my $wasIs; | ||||
84 | |||||
85 | 10 | 26µs | ($wasIs = $type =~ s/^Is(?:\s+|[-_])?//i) # spent 26µs making 10 calls to utf8::CORE:subst, avg 3µs/call | ||
86 | or | ||||
87 | $type =~ s/^(?:(?:General(?:\s+|_)?)?Category|gc)\s*[:=]\s*//i | ||||
88 | or | ||||
89 | $type =~ s/^(?:Script|sc)\s*[:=]\s*//i | ||||
90 | or | ||||
91 | $type =~ s/^Block\s*[:=]\s*/In/i; | ||||
92 | |||||
93 | |||||
94 | ## | ||||
95 | ## See if it's in some enumeration. | ||||
96 | ## | ||||
97 | require "unicore/PVA.pl"; | ||||
98 | 21 | 68µs | 7 | 16µs | if ($type =~ /^([\w\s]+)[:=]\s*(.*)/) { # spent 16µs making 7 calls to utf8::CORE:match, avg 2µs/call |
99 | my ($enum, $val) = (lc $1, lc $2); | ||||
100 | $enum =~ tr/ _-//d; | ||||
101 | $val =~ tr/ _-//d; | ||||
102 | |||||
103 | my $pa = $PropertyAlias{$enum} ? $enum : $PA_reverse{$enum}; | ||||
104 | my $f = $PropValueAlias{$pa}{$val} ? $val : $PVA_reverse{$pa}{lc $val}; | ||||
105 | |||||
106 | if ($pa and $f) { | ||||
107 | $pa = "gc_sc" if $pa eq "gc" or $pa eq "sc"; | ||||
108 | $file = "unicore/lib/$pa/$PVA_abbr_map{$pa}{lc $f}.pl"; | ||||
109 | last GETFILE; | ||||
110 | } | ||||
111 | } | ||||
112 | else { | ||||
113 | my $t = lc $type; | ||||
114 | $t =~ tr/ _-//d; | ||||
115 | |||||
116 | if ($PropValueAlias{gc}{$t} or $PropValueAlias{sc}{$t}) { | ||||
117 | $file = "unicore/lib/gc_sc/$PVA_abbr_map{gc_sc}{$t}.pl"; | ||||
118 | last GETFILE; | ||||
119 | } | ||||
120 | } | ||||
121 | |||||
122 | ## | ||||
123 | ## See if it's in the direct mapping table. | ||||
124 | ## | ||||
125 | require "unicore/Exact.pl"; | ||||
126 | 12 | 19µs | if (my $base = $utf8::Exact{$type}) { | ||
127 | $file = "unicore/lib/gc_sc/$base.pl"; | ||||
128 | last GETFILE; | ||||
129 | } | ||||
130 | |||||
131 | ## | ||||
132 | ## If not there exactly, try the canonical form. The canonical | ||||
133 | ## form is lowercased, with any separators (\s+|[-_]) removed. | ||||
134 | ## | ||||
135 | my $canonical = lc $type; | ||||
136 | 1 | 3µs | $canonical =~ s/(?<=[a-z\d])(?:\s+|[-_])(?=[a-z\d])//g; # spent 3µs making 1 call to utf8::CORE:subst | ||
137 | print STDERR "canonical = $canonical\n" if DEBUG; | ||||
138 | |||||
139 | require "unicore/Canonical.pl"; | ||||
140 | if (my $base = ($utf8::Canonical{$canonical} || $utf8::Canonical{ lc $utf8::PropertyAlias{$canonical} })) { | ||||
141 | $file = "unicore/lib/gc_sc/$base.pl"; | ||||
142 | last GETFILE; | ||||
143 | } | ||||
144 | |||||
145 | ## | ||||
146 | ## See if it's a user-level "To". | ||||
147 | ## | ||||
148 | |||||
149 | my $caller0 = caller(0); | ||||
150 | |||||
151 | 2 | 5µs | 1 | 8µs | if (defined $caller0 && $type =~ /^To(?:\w+)$/) { # spent 8µs making 1 call to utf8::CORE:match |
152 | my $map = $caller0 . "::" . $type; | ||||
153 | |||||
154 | if (exists &{$map}) { | ||||
155 | 3 | 350µs | 2 | 76µs | # spent 45µs (13+31) within utf8::BEGIN@155 which was called:
# once (13µs+31µs) by utf8::AUTOLOAD at line 155 # spent 45µs making 1 call to utf8::BEGIN@155
# spent 31µs making 1 call to strict::unimport |
156 | |||||
157 | $list = &{$map}; | ||||
158 | last GETFILE; | ||||
159 | } | ||||
160 | } | ||||
161 | |||||
162 | ## | ||||
163 | ## Last attempt -- see if it's a standard "To" name | ||||
164 | ## (e.g. "ToLower") ToTitle is used by ucfirst(). | ||||
165 | ## The user-level way to access ToDigit() and ToFold() | ||||
166 | ## is to use Unicode::UCD. | ||||
167 | ## | ||||
168 | 2 | 9µs | 1 | 4µs | if ($type =~ /^To(Digit|Fold|Lower|Title|Upper)$/) { # spent 4µs making 1 call to utf8::CORE:match |
169 | $file = "unicore/To/$1.pl"; | ||||
170 | ## would like to test to see if $file actually exists.... | ||||
171 | last GETFILE; | ||||
172 | } | ||||
173 | |||||
174 | ## | ||||
175 | ## If we reach this line, it's because we couldn't figure | ||||
176 | ## out what to do with $type. Ouch. | ||||
177 | ## | ||||
178 | |||||
179 | return $type; | ||||
180 | } | ||||
181 | |||||
182 | 33 | 2.87ms | if (defined $file) { | ||
183 | print STDERR "found it (file='$file')\n" if DEBUG; | ||||
184 | |||||
185 | ## | ||||
186 | ## If we reach here, it was due to a 'last GETFILE' above | ||||
187 | ## (exception: user-defined properties and mappings), so we | ||||
188 | ## have a filename, so now we load it if we haven't already. | ||||
189 | ## If we have, return the cached results. The cache key is the | ||||
190 | ## class and file to load. | ||||
191 | ## | ||||
192 | my $found = $Cache{$class, $file}; | ||||
193 | 2 | 3µs | if ($found and ref($found) eq $class) { | ||
194 | print STDERR "Returning cached '$file' for \\p{$type}\n" if DEBUG; | ||||
195 | return $found; | ||||
196 | } | ||||
197 | |||||
198 | $list = do $file; die $@ if $@; | ||||
199 | } | ||||
200 | |||||
201 | $ListSorted = 1; ## we know that these lists are sorted | ||||
202 | } | ||||
203 | |||||
204 | my $extras; | ||||
205 | my $bits = $minbits; | ||||
206 | |||||
207 | my $ORIG = $list; | ||||
208 | 60 | 8.86ms | if ($list) { | ||
209 | my @tmp = split(/^/m, $list); | ||||
210 | my %seen; | ||||
211 | 3 | 692µs | 2 | 84µs | # spent 52µs (20+32) within utf8::BEGIN@211 which was called:
# once (20µs+32µs) by utf8::AUTOLOAD at line 211 # spent 52µs making 1 call to utf8::BEGIN@211
# spent 32µs making 1 call to warnings::unimport |
212 | 2485 | 1.08ms | $extras = join '', grep /^[^0-9a-fA-F]/, @tmp; # spent 1.08ms making 2485 calls to utf8::CORE:match, avg 435ns/call | ||
213 | $list = join '', | ||||
214 | map { $_->[1] } | ||||
215 | 2473 | 2.43ms | sort { $a->[0] <=> $b->[0] } # spent 2.43ms making 2473 calls to utf8::CORE:match, avg 981ns/call | ||
216 | 2485 | 2.35ms | map { /^([0-9a-fA-F]+)/; [ CORE::hex($1), $_ ] } # spent 2.35ms making 2485 calls to utf8::CORE:match, avg 947ns/call | ||
217 | 7431 | 20.2ms | 15 | 537µs | grep { /^([0-9a-fA-F]+)/ and not $seen{$1}++ } @tmp; # XXX doesn't do ranges right # spent 537µs making 15 calls to utf8::CORE:sort, avg 36µs/call |
218 | } | ||||
219 | |||||
220 | if ($none) { | ||||
221 | my $hextra = sprintf "%04x", $none + 1; | ||||
222 | $list =~ s/\tXXXX$/\t$hextra/mg; | ||||
223 | } | ||||
224 | |||||
225 | 4 | 24µs | if ($minbits != 1 && $minbits < 32) { # not binary property | ||
226 | my $top = 0; | ||||
227 | 1 | 9µs | while ($list =~ /^([0-9a-fA-F]+)(?:[\t]([0-9a-fA-F]+)?)(?:[ \t]([0-9a-fA-F]+))?/mg) { # spent 9µs making 1 call to utf8::CORE:match | ||
228 | 5035 | 6.12ms | my $min = CORE::hex $1; | ||
229 | my $max = defined $2 ? CORE::hex $2 : $min; | ||||
230 | my $val = defined $3 ? CORE::hex $3 : 0; | ||||
231 | $val += $max - $min if defined $3; | ||||
232 | 1007 | 1.57ms | $top = $val if $val > $top; # spent 1.57ms making 1007 calls to utf8::CORE:match, avg 2µs/call | ||
233 | } | ||||
234 | my $topbits = | ||||
235 | $top > 0xffff ? 32 : | ||||
236 | $top > 0xff ? 16 : 8; | ||||
237 | $bits = $topbits if $bits < $topbits; | ||||
238 | } | ||||
239 | |||||
240 | my @extras; | ||||
241 | for my $x ($extras) { | ||||
242 | 30 | 148µs | pos $x = 0; | ||
243 | 15 | 25µs | while ($x =~ /^([^0-9a-fA-F\n])(.*)/mg) { # spent 25µs making 15 calls to utf8::CORE:match, avg 2µs/call | ||
244 | 48 | 108µs | my $char = $1; | ||
245 | my $name = $2; | ||||
246 | print STDERR "$1 => $2\n" if DEBUG; | ||||
247 | 18 | 50µs | 24 | 23µs | if ($char =~ /[-+!&]/) { # spent 23µs making 24 calls to utf8::CORE:match, avg 975ns/call |
248 | my ($c,$t) = split(/::/, $name, 2); # bogus use of ::, really | ||||
249 | my $subobj; | ||||
250 | 3 | 0s | if ($c eq 'utf8') { # spent 10.7ms making 3 calls to utf8::SWASHNEW, avg 3.56ms/call, recursion: max depth 1, sum of overlapping time 10.7ms | ||
251 | $subobj = utf8->SWASHNEW($t, "", $minbits, 0); | ||||
252 | } | ||||
253 | elsif (exists &$name) { | ||||
254 | $subobj = utf8->SWASHNEW($name, "", $minbits, 0); | ||||
255 | } | ||||
256 | elsif ($c =~ /^([0-9a-fA-F]+)/) { | ||||
257 | $subobj = utf8->SWASHNEW("", $c, $minbits, 0); | ||||
258 | } | ||||
259 | return $subobj unless ref $subobj; | ||||
260 | push @extras, $name => $subobj; | ||||
261 | $bits = $subobj->{BITS} if $bits < $subobj->{BITS}; | ||||
262 | } | ||||
263 | } | ||||
264 | } | ||||
265 | |||||
266 | print STDERR "CLASS = $class, TYPE => $type, BITS => $bits, NONE => $none\nEXTRAS =>\n$extras\nLIST =>\n$list\n" if DEBUG; | ||||
267 | |||||
268 | my $SWASH = bless { | ||||
269 | TYPE => $type, | ||||
270 | BITS => $bits, | ||||
271 | EXTRAS => $extras, | ||||
272 | LIST => $list, | ||||
273 | NONE => $none, | ||||
274 | @extras, | ||||
275 | } => $class; | ||||
276 | |||||
277 | if ($file) { | ||||
278 | $Cache{$class, $file} = $SWASH; | ||||
279 | } | ||||
280 | |||||
281 | return $SWASH; | ||||
282 | } | ||||
283 | |||||
284 | # Now SWASHGET is recasted into a C function S_swash_get (see utf8.c). | ||||
285 | |||||
286 | 1 | 5µs | 1; | ||
# spent 7.54ms within utf8::CORE:match which was called 8506 times, avg 886ns/call:
# 2485 times (2.35ms+0s) by utf8::SWASHNEW at line 216, avg 947ns/call
# 2485 times (1.08ms+0s) by utf8::SWASHNEW at line 212, avg 435ns/call
# 2473 times (2.43ms+0s) by utf8::SWASHNEW at line 215, avg 981ns/call
# 1007 times (1.57ms+0s) by utf8::SWASHNEW at line 232, avg 2µs/call
# 24 times (23µs+0s) by utf8::SWASHNEW at line 247, avg 975ns/call
# 15 times (25µs+0s) by utf8::SWASHNEW at line 243, avg 2µs/call
# 7 times (18µs+0s) by utf8::SWASHNEW at line 73, avg 3µs/call
# 7 times (16µs+0s) by utf8::SWASHNEW at line 98, avg 2µs/call
# once (9µs+0s) by utf8::SWASHNEW at line 227
# once (8µs+0s) by utf8::SWASHNEW at line 151
# once (4µs+0s) by utf8::SWASHNEW at line 168 | |||||
# spent 537µs within utf8::CORE:sort which was called 15 times, avg 36µs/call:
# 15 times (537µs+0s) by utf8::SWASHNEW at line 217, avg 36µs/call | |||||
# spent 63µs within utf8::CORE:subst which was called 32 times, avg 2µs/call:
# 10 times (26µs+0s) by utf8::SWASHNEW at line 85, avg 3µs/call
# 7 times (16µs+0s) by utf8::SWASHNEW at line 60, avg 2µs/call
# 7 times (12µs+0s) by utf8::SWASHNEW at line 61, avg 2µs/call
# 7 times (7µs+0s) by utf8::SWASHNEW at line 65, avg 943ns/call
# once (3µs+0s) by utf8::SWASHNEW at line 136 | |||||
# spent 1.62ms within utf8::decode which was called 1319 times, avg 1µs/call:
# 1319 times (1.62ms+0s) by C4::Charset::NormalizeString at line 175 of /usr/share/koha/lib/C4/Charset.pm, avg 1µs/call | |||||
# spent 81µs within utf8::encode which was called 57 times, avg 1µs/call:
# 51 times (64µs+0s) by C4::Templates::utf8_hashref at line 163 of /usr/share/koha/lib/C4/Templates.pm, avg 1µs/call
# 6 times (17µs+0s) by JSON::PP::string_to_json at line 509 of JSON/PP.pm, avg 3µs/call | |||||
# spent 3.35ms within utf8::is_utf8 which was called 3790 times, avg 884ns/call:
# 2284 times (1.88ms+0s) by C4::Templates::utf8_hashref at line 163 of /usr/share/koha/lib/C4/Templates.pm, avg 825ns/call
# 1319 times (1.30ms+0s) by C4::Charset::NormalizeString at line 175 of /usr/share/koha/lib/C4/Charset.pm, avg 984ns/call
# 149 times (100µs+0s) by C4::Templates::output at line 127 of /usr/share/koha/lib/C4/Templates.pm, avg 672ns/call
# 27 times (49µs+0s) by Template::Filters::url_filter at line 303 of Template/Filters.pm, avg 2µs/call
# 10 times (18µs+0s) by CGI::Util::escape at line 251 of CGI/Util.pm, avg 2µs/call
# once (3µs+0s) by Template::Filters::uri_filter at line 278 of Template/Filters.pm |