Filename | /usr/share/perl5/MARC/Charset/Code.pm |
Statements | Executed 14 statements in 1.78ms |
Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
---|---|---|---|---|---|
1 | 1 | 1 | 2.23ms | 3.10ms | BEGIN@8 | MARC::Charset::Code::
1 | 1 | 1 | 11µs | 23µs | BEGIN@3 | MARC::Charset::Code::
1 | 1 | 1 | 8µs | 1.84ms | BEGIN@5 | MARC::Charset::Code::
1 | 1 | 1 | 8µs | 31µs | BEGIN@6 | MARC::Charset::Code::
1 | 1 | 1 | 7µs | 12µs | BEGIN@4 | MARC::Charset::Code::
1 | 1 | 1 | 7µs | 24µs | BEGIN@7 | MARC::Charset::Code::
0 | 0 | 0 | 0s | 0s | char_value | MARC::Charset::Code::
0 | 0 | 0 | 0s | 0s | charset_name | MARC::Charset::Code::
0 | 0 | 0 | 0s | 0s | charset_value | MARC::Charset::Code::
0 | 0 | 0 | 0s | 0s | default_charset_group | MARC::Charset::Code::
0 | 0 | 0 | 0s | 0s | g0_marc_value | MARC::Charset::Code::
0 | 0 | 0 | 0s | 0s | get_escape | MARC::Charset::Code::
0 | 0 | 0 | 0s | 0s | marc8_hash_code | MARC::Charset::Code::
0 | 0 | 0 | 0s | 0s | marc_value | MARC::Charset::Code::
0 | 0 | 0 | 0s | 0s | to_string | MARC::Charset::Code::
0 | 0 | 0 | 0s | 0s | utf8_hash_code | MARC::Charset::Code::
Line | State ments |
Time on line |
Calls | Time in subs |
Code |
---|---|---|---|---|---|
1 | package MARC::Charset::Code; | ||||
2 | |||||
3 | 2 | 23µs | 2 | 35µs | # spent 23µs (11+12) within MARC::Charset::Code::BEGIN@3 which was called:
# once (11µs+12µs) by MARC::Charset::Table::BEGIN@43 at line 3 # spent 23µs making 1 call to MARC::Charset::Code::BEGIN@3
# spent 12µs making 1 call to strict::import |
4 | 2 | 22µs | 2 | 18µs | # spent 12µs (7+5) within MARC::Charset::Code::BEGIN@4 which was called:
# once (7µs+5µs) by MARC::Charset::Table::BEGIN@43 at line 4 # spent 12µs making 1 call to MARC::Charset::Code::BEGIN@4
# spent 5µs making 1 call to warnings::import |
5 | 2 | 35µs | 2 | 3.67ms | # spent 1.84ms (8µs+1.83) within MARC::Charset::Code::BEGIN@5 which was called:
# once (8µs+1.83ms) by MARC::Charset::Table::BEGIN@43 at line 5 # spent 1.84ms making 1 call to MARC::Charset::Code::BEGIN@5
# spent 1.83ms making 1 call to base::import |
6 | 2 | 24µs | 2 | 54µs | # spent 31µs (8+23) within MARC::Charset::Code::BEGIN@6 which was called:
# once (8µs+23µs) by MARC::Charset::Table::BEGIN@43 at line 6 # spent 31µs making 1 call to MARC::Charset::Code::BEGIN@6
# spent 23µs making 1 call to Exporter::import |
7 | 2 | 23µs | 2 | 42µs | # spent 24µs (7+18) within MARC::Charset::Code::BEGIN@7 which was called:
# once (7µs+18µs) by MARC::Charset::Table::BEGIN@43 at line 7 # spent 24µs making 1 call to MARC::Charset::Code::BEGIN@7
# spent 18µs making 1 call to Exporter::import |
8 | 2 | 1.64ms | 2 | 3.28ms | # spent 3.10ms (2.23+869µs) within MARC::Charset::Code::BEGIN@8 which was called:
# once (2.23ms+869µs) by MARC::Charset::Table::BEGIN@43 at line 8 # spent 3.10ms making 1 call to MARC::Charset::Code::BEGIN@8
# spent 177µs making 1 call to Exporter::import |
9 | |||||
10 | 1 | 5µs | 1 | 178µs | MARC::Charset::Code # spent 178µs making 1 call to Class::Accessor::mk_accessors |
11 | ->mk_accessors(qw(marc ucs name charset is_combining alt | ||||
12 | marc_right_half marc_left_half)); | ||||
13 | |||||
14 | =head1 NAME | ||||
15 | |||||
16 | MARC::Charset::Code - represents a MARC-8/UTF-8 mapping | ||||
17 | |||||
18 | =head1 SYNOPSIS | ||||
19 | |||||
20 | =head1 DESCRIPTION | ||||
21 | |||||
22 | Each mapping from a MARC-8 value to a UTF-8 value is represented by | ||||
23 | a MARC::Charset::Code object in a MARC::Charset::Table. | ||||
24 | |||||
25 | =head1 METHODS | ||||
26 | |||||
27 | =head2 new() | ||||
28 | |||||
29 | The constructor. | ||||
30 | |||||
31 | =head2 name() | ||||
32 | |||||
33 | A descriptive name for the code point. | ||||
34 | |||||
35 | =head2 marc() | ||||
36 | |||||
37 | A string representing the MARC-8 bytes codes. | ||||
38 | |||||
39 | =head2 ucs() | ||||
40 | |||||
41 | A string representing the UCS code point in hex. | ||||
42 | |||||
43 | =head2 charset_code() | ||||
44 | |||||
45 | The MARC-8 character set code. | ||||
46 | |||||
47 | =head2 is_combining() | ||||
48 | |||||
49 | Returns true/false to tell if the character is a combining character. | ||||
50 | |||||
51 | =head2 marc_left_half() | ||||
52 | |||||
53 | If the character is the right half of a "double diacritic", returns | ||||
54 | a hex string representing the MARC-8 value of the left half. | ||||
55 | |||||
56 | =head2 marc_right_half() | ||||
57 | |||||
58 | If the character is the left half of a "double diacritic", returns | ||||
59 | a hex string representing the MARC-8 value of the right half. | ||||
60 | |||||
61 | =head2 to_string() | ||||
62 | |||||
63 | A stringified version of the object suitable for pretty printing. | ||||
64 | |||||
65 | =head2 char_value() | ||||
66 | |||||
67 | Returns the unicode character. Essentially just a helper around | ||||
68 | ucs(). | ||||
69 | |||||
70 | =cut | ||||
71 | |||||
72 | sub char_value | ||||
73 | { | ||||
74 | return chr(hex(shift->ucs())); | ||||
75 | } | ||||
76 | |||||
77 | =head2 g0_marc_value() | ||||
78 | |||||
79 | The string representing the MARC-8 encoding | ||||
80 | for lookup. | ||||
81 | |||||
82 | =cut | ||||
83 | |||||
84 | sub g0_marc_value | ||||
85 | { | ||||
86 | my $code = shift; | ||||
87 | my $marc = $code->marc(); | ||||
88 | if ($code->charset_name eq 'CJK') { | ||||
89 | return | ||||
90 | chr(hex(substr($marc,0,2))) . | ||||
91 | chr(hex(substr($marc,2,2))) . | ||||
92 | chr(hex(substr($marc,4,2))); | ||||
93 | } else { | ||||
94 | return chr(hex($marc)); | ||||
95 | } | ||||
96 | } | ||||
97 | |||||
98 | =head2 marc_value() | ||||
99 | |||||
100 | The string representing the MARC-8 encodingA | ||||
101 | for output. | ||||
102 | |||||
103 | =cut | ||||
104 | |||||
105 | sub marc_value | ||||
106 | { | ||||
107 | my $code = shift; | ||||
108 | my $marc = $code->marc(); | ||||
109 | if ($code->charset_name eq 'CJK') { | ||||
110 | return | ||||
111 | chr(hex(substr($marc,0,2))) . | ||||
112 | chr(hex(substr($marc,2,2))) . | ||||
113 | chr(hex(substr($marc,4,2))); | ||||
114 | } else { | ||||
115 | if ($code->default_charset_group() eq 'G0') { | ||||
116 | return chr(hex($marc)); | ||||
117 | } else { | ||||
118 | return chr(hex($marc) + 128); | ||||
119 | } | ||||
120 | } | ||||
121 | } | ||||
122 | |||||
123 | |||||
124 | =head2 charset_name() | ||||
125 | |||||
126 | Returns the name of the character set, instead of the code. | ||||
127 | |||||
128 | =cut | ||||
129 | |||||
130 | sub charset_name | ||||
131 | { | ||||
132 | return MARC::Charset::Constants::charset_name(shift->charset_value()); | ||||
133 | } | ||||
134 | |||||
135 | =head2 to_string() | ||||
136 | |||||
137 | Returns a stringified version of the object. | ||||
138 | |||||
139 | =cut | ||||
140 | |||||
141 | sub to_string | ||||
142 | { | ||||
143 | my $self = shift; | ||||
144 | my $str = | ||||
145 | $self->name() . ': ' . | ||||
146 | 'charset_code=' . $self->charset() . ' ' . | ||||
147 | 'marc=' . $self->marc() . ' ' . | ||||
148 | 'ucs=' . $self->ucs() . ' '; | ||||
149 | |||||
150 | $str .= ' combining' if $self->is_combining(); | ||||
151 | return $str; | ||||
152 | } | ||||
153 | |||||
154 | |||||
155 | =head2 marc8_hash_code() | ||||
156 | |||||
157 | Returns a hash code for this Code object for looking up the object using | ||||
158 | MARC8. First portion is the character set code and the second is the | ||||
159 | MARC-8 value. | ||||
160 | |||||
161 | =cut | ||||
162 | |||||
163 | sub marc8_hash_code | ||||
164 | { | ||||
165 | my $self = shift; | ||||
166 | return sprintf('%s:%s', $self->charset_value(), $self->g0_marc_value()); | ||||
167 | } | ||||
168 | |||||
169 | |||||
170 | =head2 utf8_hash_code() | ||||
171 | |||||
172 | Returns a hash code for uniquely identifying a Code by it's UCS value. | ||||
173 | |||||
174 | =cut | ||||
175 | |||||
176 | sub utf8_hash_code | ||||
177 | { | ||||
178 | return int(hex(shift->ucs())); | ||||
179 | } | ||||
180 | |||||
181 | |||||
182 | =head2 default_charset_group | ||||
183 | |||||
184 | Returns 'G0' or 'G1' indicating where the character is typicalling used | ||||
185 | in the MARC-8 environment. | ||||
186 | |||||
187 | =cut | ||||
188 | |||||
189 | sub default_charset_group | ||||
190 | { | ||||
191 | my $charset = shift->charset_value(); | ||||
192 | |||||
193 | return 'G0' | ||||
194 | if $charset eq ASCII_DEFAULT | ||||
195 | or $charset eq GREEK_SYMBOLS | ||||
196 | or $charset eq SUBSCRIPTS | ||||
197 | or $charset eq SUPERSCRIPTS | ||||
198 | or $charset eq BASIC_LATIN | ||||
199 | or $charset eq BASIC_ARABIC | ||||
200 | or $charset eq BASIC_CYRILLIC | ||||
201 | or $charset eq BASIC_GREEK | ||||
202 | or $charset eq BASIC_HEBREW | ||||
203 | or $charset eq CJK; | ||||
204 | |||||
205 | return 'G1'; | ||||
206 | } | ||||
207 | |||||
208 | |||||
209 | =head2 get_marc8_escape | ||||
210 | |||||
211 | Returns an escape sequence to move to the Code from another marc-8 character | ||||
212 | set. | ||||
213 | |||||
214 | =cut | ||||
215 | |||||
216 | sub get_escape | ||||
217 | { | ||||
218 | my $charset = shift->charset_value(); | ||||
219 | |||||
220 | return ESCAPE . $charset | ||||
221 | if $charset eq ASCII_DEFAULT | ||||
222 | or $charset eq GREEK_SYMBOLS | ||||
223 | or $charset eq SUBSCRIPTS | ||||
224 | or $charset eq SUPERSCRIPTS; | ||||
225 | |||||
226 | return ESCAPE . SINGLE_G0_A . $charset | ||||
227 | if $charset eq ASCII_DEFAULT | ||||
228 | or $charset eq BASIC_LATIN | ||||
229 | or $charset eq BASIC_ARABIC | ||||
230 | or $charset eq BASIC_CYRILLIC | ||||
231 | or $charset eq BASIC_GREEK | ||||
232 | or $charset eq BASIC_HEBREW; | ||||
233 | |||||
234 | return ESCAPE . SINGLE_G1_A . $charset | ||||
235 | if $charset eq EXTENDED_ARABIC | ||||
236 | or $charset eq EXTENDED_LATIN | ||||
237 | or $charset eq EXTENDED_CYRILLIC; | ||||
238 | |||||
239 | return ESCAPE . MULTI_G0_A . CJK | ||||
240 | if $charset eq CJK; | ||||
241 | } | ||||
242 | |||||
243 | =head2 charset_value | ||||
244 | |||||
245 | Returns the charset value, not the hex sequence. | ||||
246 | |||||
247 | =cut | ||||
248 | |||||
249 | sub charset_value | ||||
250 | { | ||||
251 | return chr(hex(shift->charset())); | ||||
252 | } | ||||
253 | |||||
- - | |||||
256 | 1 | 2µs | 1; |