Filename | /usr/share/perl5/MARC/Charset/Table.pm |
Statements | Executed 26 statements in 2.37ms |
Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
---|---|---|---|---|---|
1 | 1 | 1 | 2.21ms | 2.71ms | BEGIN@42 | MARC::Charset::Table::
1 | 1 | 1 | 1.41ms | 6.61ms | BEGIN@43 | MARC::Charset::Table::
1 | 1 | 1 | 19µs | 19µs | BEGIN@37 | MARC::Charset::Table::
1 | 1 | 1 | 16µs | 62µs | _init | MARC::Charset::Table::
1 | 1 | 1 | 12µs | 27µs | BEGIN@34 | MARC::Charset::Table::
1 | 1 | 1 | 10µs | 33µs | BEGIN@45 | MARC::Charset::Table::
1 | 1 | 1 | 9µs | 2.00ms | BEGIN@36 | MARC::Charset::Table::
1 | 1 | 1 | 8µs | 15µs | BEGIN@35 | MARC::Charset::Table::
1 | 1 | 1 | 7µs | 10µs | db_path | MARC::Charset::Table::
1 | 1 | 1 | 7µs | 69µs | new | MARC::Charset::Table::
1 | 1 | 1 | 6µs | 142µs | BEGIN@44 | MARC::Charset::Table::
1 | 1 | 1 | 3µs | 3µs | CORE:subst (opcode) | MARC::Charset::Table::
0 | 0 | 0 | 0s | 0s | add_code | MARC::Charset::Table::
0 | 0 | 0 | 0s | 0s | brand_new | MARC::Charset::Table::
0 | 0 | 0 | 0s | 0s | db | MARC::Charset::Table::
0 | 0 | 0 | 0s | 0s | get_code | MARC::Charset::Table::
0 | 0 | 0 | 0s | 0s | lookup_by_marc8 | MARC::Charset::Table::
0 | 0 | 0 | 0s | 0s | lookup_by_utf8 | MARC::Charset::Table::
Line | State ments |
Time on line |
Calls | Time in subs |
Code |
---|---|---|---|---|---|
1 | package MARC::Charset::Table; | ||||
2 | |||||
3 | =head1 NAME | ||||
4 | |||||
5 | MARC::Charset::Table - character mapping db | ||||
6 | |||||
7 | =head1 SYNOPSIS | ||||
8 | |||||
9 | use MARC::Charset::Table; | ||||
10 | use MARC::Charset::Constants qw(:all); | ||||
11 | |||||
12 | # create the table object | ||||
13 | my $table = MARC::Charset::Table->new(); | ||||
14 | |||||
15 | # get a code using the marc8 character set code and the character | ||||
16 | my $code = $table->lookup_by_marc8(CYRILLIC_BASIC, 'K'); | ||||
17 | |||||
18 | # get a code using the utf8 value | ||||
19 | $code = $table->lookup_by_utf8(chr(0x043A)); | ||||
20 | |||||
21 | =head1 DESCRIPTION | ||||
22 | |||||
23 | MARC::Charset::Table is a wrapper around the character mapping database, | ||||
24 | which is implemented as a tied hash on disk. This database gets generated | ||||
25 | by Makefile.PL on installation of MARC::Charset using | ||||
26 | MARC::Charset::Compiler. | ||||
27 | |||||
28 | The database is essentially a key/value mapping where a key is a | ||||
29 | MARC-8 character set code + a MARC-8 character, or an integer representing the | ||||
30 | UCS code point. These keys map to a serialized MARC::Charset::Code object. | ||||
31 | |||||
32 | =cut | ||||
33 | |||||
34 | 2 | 25µs | 2 | 42µs | # spent 27µs (12+15) within MARC::Charset::Table::BEGIN@34 which was called:
# once (12µs+15µs) by MARC::Charset::BEGIN@14 at line 34 # spent 27µs making 1 call to MARC::Charset::Table::BEGIN@34
# spent 15µs making 1 call to strict::import |
35 | 2 | 20µs | 2 | 22µs | # spent 15µs (8+7) within MARC::Charset::Table::BEGIN@35 which was called:
# once (8µs+7µs) by MARC::Charset::BEGIN@14 at line 35 # spent 15µs making 1 call to MARC::Charset::Table::BEGIN@35
# spent 7µs making 1 call to warnings::import |
36 | 2 | 56µs | 2 | 4.00ms | # spent 2.00ms (9µs+1.99) within MARC::Charset::Table::BEGIN@36 which was called:
# once (9µs+1.99ms) by MARC::Charset::BEGIN@14 at line 36 # spent 2.00ms making 1 call to MARC::Charset::Table::BEGIN@36
# spent 1.99ms making 1 call to POSIX::import |
37 | # spent 19µs within MARC::Charset::Table::BEGIN@37 which was called:
# once (19µs+0s) by MARC::Charset::BEGIN@14 at line 41 | ||||
38 | 1 | 20µs | @AnyDBM_File::ISA = qw(GDBM_File DB_File NDBM_File ODBM_File SDBM_File); | ||
39 | # SDBM_File is last on the list because it produces the largest database | ||||
40 | # on disk. | ||||
41 | 1 | 25µs | 1 | 19µs | } # spent 19µs making 1 call to MARC::Charset::Table::BEGIN@37 |
42 | 2 | 990µs | 2 | 2.73ms | # spent 2.71ms (2.21+497µs) within MARC::Charset::Table::BEGIN@42 which was called:
# once (2.21ms+497µs) by MARC::Charset::BEGIN@14 at line 42 # spent 2.71ms making 1 call to MARC::Charset::Table::BEGIN@42
# spent 20µs making 1 call to Exporter::import |
43 | 2 | 755µs | 2 | 6.61ms | # spent 6.61ms (1.41+5.21) within MARC::Charset::Table::BEGIN@43 which was called:
# once (1.41ms+5.21ms) by MARC::Charset::BEGIN@14 at line 43 # spent 6.61ms making 1 call to MARC::Charset::Table::BEGIN@43
# spent 2µs making 1 call to Class::Accessor::import |
44 | 2 | 27µs | 2 | 278µs | # spent 142µs (6+136) within MARC::Charset::Table::BEGIN@44 which was called:
# once (6µs+136µs) by MARC::Charset::BEGIN@14 at line 44 # spent 142µs making 1 call to MARC::Charset::Table::BEGIN@44
# spent 136µs making 1 call to Exporter::import |
45 | 2 | 376µs | 2 | 56µs | # spent 33µs (10+23) within MARC::Charset::Table::BEGIN@45 which was called:
# once (10µs+23µs) by MARC::Charset::BEGIN@14 at line 45 # spent 33µs making 1 call to MARC::Charset::Table::BEGIN@45
# spent 23µs making 1 call to Exporter::import |
46 | |||||
47 | =head2 new() | ||||
48 | |||||
49 | The consturctor. | ||||
50 | |||||
51 | =cut | ||||
52 | |||||
53 | sub new | ||||
54 | # spent 69µs (7+62) within MARC::Charset::Table::new which was called:
# once (7µs+62µs) by MARC::File::XML::BEGIN@11 at line 45 of MARC/Charset.pm | ||||
55 | 1 | 400ns | my $class = shift; | ||
56 | 1 | 1µs | my $self = bless {}, ref($class) || $class; | ||
57 | 1 | 1µs | 1 | 62µs | $self->_init(O_RDONLY); # spent 62µs making 1 call to MARC::Charset::Table::_init |
58 | 1 | 3µs | return $self; | ||
59 | } | ||||
60 | |||||
61 | |||||
62 | =head2 add_code() | ||||
63 | |||||
64 | Add a MARC::Charset::Code to the table. | ||||
65 | |||||
66 | =cut | ||||
67 | |||||
68 | |||||
69 | sub add_code | ||||
70 | { | ||||
71 | my ($self, $code) = @_; | ||||
72 | |||||
73 | # the Code object is serialized | ||||
74 | my $frozen = nfreeze($code); | ||||
75 | |||||
76 | # to support lookup by marc8 and utf8 values we | ||||
77 | # stash away the rule in the db using two keys | ||||
78 | my $marc8_key = $code->marc8_hash_code(); | ||||
79 | my $utf8_key = $code->utf8_hash_code(); | ||||
80 | |||||
81 | # stash away the marc8 lookup key | ||||
82 | $self->{db}->{$marc8_key} = $frozen; | ||||
83 | |||||
84 | # stash away the utf8 lookup key (only if it's not already there!) | ||||
85 | # this means that the sets that appear in the xml file will have | ||||
86 | # precedence ascii/ansel. Note that we're using 'defined' instead of | ||||
87 | # 'exists' because NDBM_File and ODBM_File don't support 'exists'. | ||||
88 | $self->{db}->{$utf8_key} = $frozen unless defined $self->{db}->{$utf8_key}; | ||||
89 | } | ||||
90 | |||||
91 | |||||
92 | =head2 get_code() | ||||
93 | |||||
94 | Retrieve a code using a hash key. | ||||
95 | |||||
96 | =cut | ||||
97 | |||||
98 | sub get_code | ||||
99 | { | ||||
100 | my ($self, $key) = @_; | ||||
101 | my $db = $self->db(); | ||||
102 | my $frozen = $db->{$key}; | ||||
103 | return thaw($frozen) if $frozen; | ||||
104 | return; | ||||
105 | } | ||||
106 | |||||
107 | |||||
108 | =head2 lookup_by_marc8() | ||||
109 | |||||
110 | Looks up MARC::Charset::Code entry using a character set code and a MARC-8 | ||||
111 | value. | ||||
112 | |||||
113 | use MARC::Charset::Constants qw(HEBREW); | ||||
114 | $code = $table->lookup_by_marc8(HEBREW, chr(0x60)); | ||||
115 | |||||
116 | =cut | ||||
117 | |||||
118 | sub lookup_by_marc8 | ||||
119 | { | ||||
120 | my ($self, $charset, $marc8) = @_; | ||||
121 | $charset = BASIC_LATIN if $charset eq ASCII_DEFAULT; | ||||
122 | return $self->get_code(sprintf('%s:%s', $charset, $marc8)); | ||||
123 | } | ||||
124 | |||||
125 | |||||
126 | =head2 lookup_by_utf8() | ||||
127 | |||||
128 | Looks up a MARC::Charset::Code object using a utf8 value. | ||||
129 | |||||
130 | =cut | ||||
131 | |||||
132 | sub lookup_by_utf8 | ||||
133 | { | ||||
134 | my ($self, $value) = @_; | ||||
135 | return $self->get_code(ord($value)); | ||||
136 | } | ||||
137 | |||||
- - | |||||
141 | =head2 db() | ||||
142 | |||||
143 | Returns a reference to a tied character database. MARC::Charset::Table | ||||
144 | wraps access to the db, but you can get at it if you want. | ||||
145 | |||||
146 | =cut | ||||
147 | |||||
148 | sub db | ||||
149 | { | ||||
150 | return shift->{db}; | ||||
151 | } | ||||
152 | |||||
153 | |||||
154 | =head2 db_path() | ||||
155 | |||||
156 | Returns the path to the character encoding database. Can be called | ||||
157 | statically too: | ||||
158 | |||||
159 | print MARC::Charset::Table->db_path(); | ||||
160 | |||||
161 | =cut | ||||
162 | |||||
163 | sub db_path | ||||
164 | # spent 10µs (7+3) within MARC::Charset::Table::db_path which was called:
# once (7µs+3µs) by MARC::Charset::Table::_init at line 194 | ||||
165 | 1 | 700ns | my $path = $INC{'MARC/Charset/Table.pm'}; | ||
166 | 1 | 8µs | 1 | 3µs | $path =~ s/\.pm$//; # spent 3µs making 1 call to MARC::Charset::Table::CORE:subst |
167 | 1 | 3µs | return $path; | ||
168 | } | ||||
169 | |||||
170 | |||||
171 | =head2 brand_new() | ||||
172 | |||||
173 | An alternate constructor which removes the existing database and starts | ||||
174 | afresh. Be careful with this one, it's really only used on MARC::Charset | ||||
175 | installation. | ||||
176 | |||||
177 | =cut | ||||
178 | |||||
179 | sub brand_new | ||||
180 | { | ||||
181 | my $class = shift; | ||||
182 | my $self = bless {}, ref($class) || $class; | ||||
183 | $self->_init(O_CREAT|O_RDWR); | ||||
184 | |||||
185 | return $self; | ||||
186 | } | ||||
187 | |||||
188 | |||||
189 | # helper function for initializing table internals | ||||
190 | |||||
191 | sub _init | ||||
192 | # spent 62µs (16+46) within MARC::Charset::Table::_init which was called:
# once (16µs+46µs) by MARC::Charset::Table::new at line 57 | ||||
193 | 1 | 500ns | my ($self, $dbm_opts) = @_; | ||
194 | 1 | 45µs | 2 | 46µs | tie my %db, 'AnyDBM_File', db_path(), $dbm_opts, 0644; ## no critic (ValuesAndExpressions::ProhibitLeadingZeros) # spent 35µs making 1 call to GDBM_File::TIEHASH
# spent 10µs making 1 call to MARC::Charset::Table::db_path |
195 | 1 | 7µs | $self->{db} = \%db; | ||
196 | } | ||||
197 | |||||
- - | |||||
202 | 1 | 2µs | 1; | ||
# spent 3µs within MARC::Charset::Table::CORE:subst which was called:
# once (3µs+0s) by MARC::Charset::Table::db_path at line 166 |