| Filename | /usr/share/perl5/MARC/Charset/Code.pm |
| Statements | Executed 14 statements in 1.78ms |
| Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
|---|---|---|---|---|---|
| 1 | 1 | 1 | 2.23ms | 3.10ms | MARC::Charset::Code::BEGIN@8 |
| 1 | 1 | 1 | 11µs | 23µs | MARC::Charset::Code::BEGIN@3 |
| 1 | 1 | 1 | 8µs | 1.84ms | MARC::Charset::Code::BEGIN@5 |
| 1 | 1 | 1 | 8µs | 31µs | MARC::Charset::Code::BEGIN@6 |
| 1 | 1 | 1 | 7µs | 12µs | MARC::Charset::Code::BEGIN@4 |
| 1 | 1 | 1 | 7µs | 24µs | MARC::Charset::Code::BEGIN@7 |
| 0 | 0 | 0 | 0s | 0s | MARC::Charset::Code::char_value |
| 0 | 0 | 0 | 0s | 0s | MARC::Charset::Code::charset_name |
| 0 | 0 | 0 | 0s | 0s | MARC::Charset::Code::charset_value |
| 0 | 0 | 0 | 0s | 0s | MARC::Charset::Code::default_charset_group |
| 0 | 0 | 0 | 0s | 0s | MARC::Charset::Code::g0_marc_value |
| 0 | 0 | 0 | 0s | 0s | MARC::Charset::Code::get_escape |
| 0 | 0 | 0 | 0s | 0s | MARC::Charset::Code::marc8_hash_code |
| 0 | 0 | 0 | 0s | 0s | MARC::Charset::Code::marc_value |
| 0 | 0 | 0 | 0s | 0s | MARC::Charset::Code::to_string |
| 0 | 0 | 0 | 0s | 0s | MARC::Charset::Code::utf8_hash_code |
| Line | State ments |
Time on line |
Calls | Time in subs |
Code |
|---|---|---|---|---|---|
| 1 | package MARC::Charset::Code; | ||||
| 2 | |||||
| 3 | 2 | 23µs | 2 | 35µs | # spent 23µs (11+12) within MARC::Charset::Code::BEGIN@3 which was called:
# once (11µs+12µs) by MARC::Charset::Table::BEGIN@43 at line 3 # spent 23µs making 1 call to MARC::Charset::Code::BEGIN@3
# spent 12µs making 1 call to strict::import |
| 4 | 2 | 22µs | 2 | 18µs | # spent 12µs (7+5) within MARC::Charset::Code::BEGIN@4 which was called:
# once (7µs+5µs) by MARC::Charset::Table::BEGIN@43 at line 4 # spent 12µs making 1 call to MARC::Charset::Code::BEGIN@4
# spent 5µs making 1 call to warnings::import |
| 5 | 2 | 35µs | 2 | 3.67ms | # spent 1.84ms (8µs+1.83) within MARC::Charset::Code::BEGIN@5 which was called:
# once (8µs+1.83ms) by MARC::Charset::Table::BEGIN@43 at line 5 # spent 1.84ms making 1 call to MARC::Charset::Code::BEGIN@5
# spent 1.83ms making 1 call to base::import |
| 6 | 2 | 24µs | 2 | 54µs | # spent 31µs (8+23) within MARC::Charset::Code::BEGIN@6 which was called:
# once (8µs+23µs) by MARC::Charset::Table::BEGIN@43 at line 6 # spent 31µs making 1 call to MARC::Charset::Code::BEGIN@6
# spent 23µs making 1 call to Exporter::import |
| 7 | 2 | 23µs | 2 | 42µs | # spent 24µs (7+18) within MARC::Charset::Code::BEGIN@7 which was called:
# once (7µs+18µs) by MARC::Charset::Table::BEGIN@43 at line 7 # spent 24µs making 1 call to MARC::Charset::Code::BEGIN@7
# spent 18µs making 1 call to Exporter::import |
| 8 | 2 | 1.64ms | 2 | 3.28ms | # spent 3.10ms (2.23+869µs) within MARC::Charset::Code::BEGIN@8 which was called:
# once (2.23ms+869µs) by MARC::Charset::Table::BEGIN@43 at line 8 # spent 3.10ms making 1 call to MARC::Charset::Code::BEGIN@8
# spent 177µs making 1 call to Exporter::import |
| 9 | |||||
| 10 | 1 | 5µs | 1 | 178µs | MARC::Charset::Code # spent 178µs making 1 call to Class::Accessor::mk_accessors |
| 11 | ->mk_accessors(qw(marc ucs name charset is_combining alt | ||||
| 12 | marc_right_half marc_left_half)); | ||||
| 13 | |||||
| 14 | =head1 NAME | ||||
| 15 | |||||
| 16 | MARC::Charset::Code - represents a MARC-8/UTF-8 mapping | ||||
| 17 | |||||
| 18 | =head1 SYNOPSIS | ||||
| 19 | |||||
| 20 | =head1 DESCRIPTION | ||||
| 21 | |||||
| 22 | Each mapping from a MARC-8 value to a UTF-8 value is represented by | ||||
| 23 | a MARC::Charset::Code object in a MARC::Charset::Table. | ||||
| 24 | |||||
| 25 | =head1 METHODS | ||||
| 26 | |||||
| 27 | =head2 new() | ||||
| 28 | |||||
| 29 | The constructor. | ||||
| 30 | |||||
| 31 | =head2 name() | ||||
| 32 | |||||
| 33 | A descriptive name for the code point. | ||||
| 34 | |||||
| 35 | =head2 marc() | ||||
| 36 | |||||
| 37 | A string representing the MARC-8 bytes codes. | ||||
| 38 | |||||
| 39 | =head2 ucs() | ||||
| 40 | |||||
| 41 | A string representing the UCS code point in hex. | ||||
| 42 | |||||
| 43 | =head2 charset_code() | ||||
| 44 | |||||
| 45 | The MARC-8 character set code. | ||||
| 46 | |||||
| 47 | =head2 is_combining() | ||||
| 48 | |||||
| 49 | Returns true/false to tell if the character is a combining character. | ||||
| 50 | |||||
| 51 | =head2 marc_left_half() | ||||
| 52 | |||||
| 53 | If the character is the right half of a "double diacritic", returns | ||||
| 54 | a hex string representing the MARC-8 value of the left half. | ||||
| 55 | |||||
| 56 | =head2 marc_right_half() | ||||
| 57 | |||||
| 58 | If the character is the left half of a "double diacritic", returns | ||||
| 59 | a hex string representing the MARC-8 value of the right half. | ||||
| 60 | |||||
| 61 | =head2 to_string() | ||||
| 62 | |||||
| 63 | A stringified version of the object suitable for pretty printing. | ||||
| 64 | |||||
| 65 | =head2 char_value() | ||||
| 66 | |||||
| 67 | Returns the unicode character. Essentially just a helper around | ||||
| 68 | ucs(). | ||||
| 69 | |||||
| 70 | =cut | ||||
| 71 | |||||
| 72 | sub char_value | ||||
| 73 | { | ||||
| 74 | return chr(hex(shift->ucs())); | ||||
| 75 | } | ||||
| 76 | |||||
| 77 | =head2 g0_marc_value() | ||||
| 78 | |||||
| 79 | The string representing the MARC-8 encoding | ||||
| 80 | for lookup. | ||||
| 81 | |||||
| 82 | =cut | ||||
| 83 | |||||
| 84 | sub g0_marc_value | ||||
| 85 | { | ||||
| 86 | my $code = shift; | ||||
| 87 | my $marc = $code->marc(); | ||||
| 88 | if ($code->charset_name eq 'CJK') { | ||||
| 89 | return | ||||
| 90 | chr(hex(substr($marc,0,2))) . | ||||
| 91 | chr(hex(substr($marc,2,2))) . | ||||
| 92 | chr(hex(substr($marc,4,2))); | ||||
| 93 | } else { | ||||
| 94 | return chr(hex($marc)); | ||||
| 95 | } | ||||
| 96 | } | ||||
| 97 | |||||
| 98 | =head2 marc_value() | ||||
| 99 | |||||
| 100 | The string representing the MARC-8 encodingA | ||||
| 101 | for output. | ||||
| 102 | |||||
| 103 | =cut | ||||
| 104 | |||||
| 105 | sub marc_value | ||||
| 106 | { | ||||
| 107 | my $code = shift; | ||||
| 108 | my $marc = $code->marc(); | ||||
| 109 | if ($code->charset_name eq 'CJK') { | ||||
| 110 | return | ||||
| 111 | chr(hex(substr($marc,0,2))) . | ||||
| 112 | chr(hex(substr($marc,2,2))) . | ||||
| 113 | chr(hex(substr($marc,4,2))); | ||||
| 114 | } else { | ||||
| 115 | if ($code->default_charset_group() eq 'G0') { | ||||
| 116 | return chr(hex($marc)); | ||||
| 117 | } else { | ||||
| 118 | return chr(hex($marc) + 128); | ||||
| 119 | } | ||||
| 120 | } | ||||
| 121 | } | ||||
| 122 | |||||
| 123 | |||||
| 124 | =head2 charset_name() | ||||
| 125 | |||||
| 126 | Returns the name of the character set, instead of the code. | ||||
| 127 | |||||
| 128 | =cut | ||||
| 129 | |||||
| 130 | sub charset_name | ||||
| 131 | { | ||||
| 132 | return MARC::Charset::Constants::charset_name(shift->charset_value()); | ||||
| 133 | } | ||||
| 134 | |||||
| 135 | =head2 to_string() | ||||
| 136 | |||||
| 137 | Returns a stringified version of the object. | ||||
| 138 | |||||
| 139 | =cut | ||||
| 140 | |||||
| 141 | sub to_string | ||||
| 142 | { | ||||
| 143 | my $self = shift; | ||||
| 144 | my $str = | ||||
| 145 | $self->name() . ': ' . | ||||
| 146 | 'charset_code=' . $self->charset() . ' ' . | ||||
| 147 | 'marc=' . $self->marc() . ' ' . | ||||
| 148 | 'ucs=' . $self->ucs() . ' '; | ||||
| 149 | |||||
| 150 | $str .= ' combining' if $self->is_combining(); | ||||
| 151 | return $str; | ||||
| 152 | } | ||||
| 153 | |||||
| 154 | |||||
| 155 | =head2 marc8_hash_code() | ||||
| 156 | |||||
| 157 | Returns a hash code for this Code object for looking up the object using | ||||
| 158 | MARC8. First portion is the character set code and the second is the | ||||
| 159 | MARC-8 value. | ||||
| 160 | |||||
| 161 | =cut | ||||
| 162 | |||||
| 163 | sub marc8_hash_code | ||||
| 164 | { | ||||
| 165 | my $self = shift; | ||||
| 166 | return sprintf('%s:%s', $self->charset_value(), $self->g0_marc_value()); | ||||
| 167 | } | ||||
| 168 | |||||
| 169 | |||||
| 170 | =head2 utf8_hash_code() | ||||
| 171 | |||||
| 172 | Returns a hash code for uniquely identifying a Code by it's UCS value. | ||||
| 173 | |||||
| 174 | =cut | ||||
| 175 | |||||
| 176 | sub utf8_hash_code | ||||
| 177 | { | ||||
| 178 | return int(hex(shift->ucs())); | ||||
| 179 | } | ||||
| 180 | |||||
| 181 | |||||
| 182 | =head2 default_charset_group | ||||
| 183 | |||||
| 184 | Returns 'G0' or 'G1' indicating where the character is typicalling used | ||||
| 185 | in the MARC-8 environment. | ||||
| 186 | |||||
| 187 | =cut | ||||
| 188 | |||||
| 189 | sub default_charset_group | ||||
| 190 | { | ||||
| 191 | my $charset = shift->charset_value(); | ||||
| 192 | |||||
| 193 | return 'G0' | ||||
| 194 | if $charset eq ASCII_DEFAULT | ||||
| 195 | or $charset eq GREEK_SYMBOLS | ||||
| 196 | or $charset eq SUBSCRIPTS | ||||
| 197 | or $charset eq SUPERSCRIPTS | ||||
| 198 | or $charset eq BASIC_LATIN | ||||
| 199 | or $charset eq BASIC_ARABIC | ||||
| 200 | or $charset eq BASIC_CYRILLIC | ||||
| 201 | or $charset eq BASIC_GREEK | ||||
| 202 | or $charset eq BASIC_HEBREW | ||||
| 203 | or $charset eq CJK; | ||||
| 204 | |||||
| 205 | return 'G1'; | ||||
| 206 | } | ||||
| 207 | |||||
| 208 | |||||
| 209 | =head2 get_marc8_escape | ||||
| 210 | |||||
| 211 | Returns an escape sequence to move to the Code from another marc-8 character | ||||
| 212 | set. | ||||
| 213 | |||||
| 214 | =cut | ||||
| 215 | |||||
| 216 | sub get_escape | ||||
| 217 | { | ||||
| 218 | my $charset = shift->charset_value(); | ||||
| 219 | |||||
| 220 | return ESCAPE . $charset | ||||
| 221 | if $charset eq ASCII_DEFAULT | ||||
| 222 | or $charset eq GREEK_SYMBOLS | ||||
| 223 | or $charset eq SUBSCRIPTS | ||||
| 224 | or $charset eq SUPERSCRIPTS; | ||||
| 225 | |||||
| 226 | return ESCAPE . SINGLE_G0_A . $charset | ||||
| 227 | if $charset eq ASCII_DEFAULT | ||||
| 228 | or $charset eq BASIC_LATIN | ||||
| 229 | or $charset eq BASIC_ARABIC | ||||
| 230 | or $charset eq BASIC_CYRILLIC | ||||
| 231 | or $charset eq BASIC_GREEK | ||||
| 232 | or $charset eq BASIC_HEBREW; | ||||
| 233 | |||||
| 234 | return ESCAPE . SINGLE_G1_A . $charset | ||||
| 235 | if $charset eq EXTENDED_ARABIC | ||||
| 236 | or $charset eq EXTENDED_LATIN | ||||
| 237 | or $charset eq EXTENDED_CYRILLIC; | ||||
| 238 | |||||
| 239 | return ESCAPE . MULTI_G0_A . CJK | ||||
| 240 | if $charset eq CJK; | ||||
| 241 | } | ||||
| 242 | |||||
| 243 | =head2 charset_value | ||||
| 244 | |||||
| 245 | Returns the charset value, not the hex sequence. | ||||
| 246 | |||||
| 247 | =cut | ||||
| 248 | |||||
| 249 | sub charset_value | ||||
| 250 | { | ||||
| 251 | return chr(hex(shift->charset())); | ||||
| 252 | } | ||||
| 253 | |||||
| - - | |||||
| 256 | 1 | 2µs | 1; |