Filename | /usr/share/perl5/MARC/File/USMARC.pm |
Statements | Executed 28 statements in 2.51ms |
Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
---|---|---|---|---|---|
1 | 1 | 1 | 1.11ms | 1.19ms | BEGIN@16 | MARC::File::USMARC::
1 | 1 | 1 | 792µs | 969µs | BEGIN@14 | MARC::File::USMARC::
1 | 1 | 1 | 11µs | 22µs | BEGIN@9 | MARC::File::USMARC::
1 | 1 | 1 | 9µs | 26µs | BEGIN@17 | MARC::File::USMARC::
1 | 1 | 1 | 7µs | 37µs | BEGIN@21 | MARC::File::USMARC::
1 | 1 | 1 | 7µs | 25µs | BEGIN@13 | MARC::File::USMARC::
1 | 1 | 1 | 7µs | 11µs | BEGIN@10 | MARC::File::USMARC::
1 | 1 | 1 | 7µs | 8µs | BEGIN@11 | MARC::File::USMARC::
1 | 1 | 1 | 7µs | 32µs | BEGIN@19 | MARC::File::USMARC::
1 | 1 | 1 | 6µs | 26µs | BEGIN@23 | MARC::File::USMARC::
1 | 1 | 1 | 6µs | 26µs | BEGIN@24 | MARC::File::USMARC::
1 | 1 | 1 | 6µs | 27µs | BEGIN@22 | MARC::File::USMARC::
1 | 1 | 1 | 5µs | 5µs | BEGIN@20 | MARC::File::USMARC::
0 | 0 | 0 | 0s | 0s | _build_tag_directory | MARC::File::USMARC::
0 | 0 | 0 | 0s | 0s | _next | MARC::File::USMARC::
0 | 0 | 0 | 0s | 0s | decode | MARC::File::USMARC::
0 | 0 | 0 | 0s | 0s | encode | MARC::File::USMARC::
0 | 0 | 0 | 0s | 0s | update_leader | MARC::File::USMARC::
Line | State ments |
Time on line |
Calls | Time in subs |
Code |
---|---|---|---|---|---|
1 | package MARC::File::USMARC; | ||||
2 | |||||
3 | =head1 NAME | ||||
4 | |||||
5 | MARC::File::USMARC - USMARC-specific file handling | ||||
6 | |||||
7 | =cut | ||||
8 | |||||
9 | 2 | 24µs | 2 | 33µs | # spent 22µs (11+11) within MARC::File::USMARC::BEGIN@9 which was called:
# once (11µs+11µs) by C4::Biblio::BEGIN@28 at line 9 # spent 22µs making 1 call to MARC::File::USMARC::BEGIN@9
# spent 11µs making 1 call to strict::import |
10 | 2 | 20µs | 2 | 16µs | # spent 11µs (7+4) within MARC::File::USMARC::BEGIN@10 which was called:
# once (7µs+4µs) by C4::Biblio::BEGIN@28 at line 10 # spent 11µs making 1 call to MARC::File::USMARC::BEGIN@10
# spent 4µs making 1 call to warnings::import |
11 | 2 | 26µs | 2 | 10µs | # spent 8µs (7+1) within MARC::File::USMARC::BEGIN@11 which was called:
# once (7µs+1µs) by C4::Biblio::BEGIN@28 at line 11 # spent 8µs making 1 call to MARC::File::USMARC::BEGIN@11
# spent 1µs making 1 call to integer::import |
12 | |||||
13 | 2 | 23µs | 2 | 43µs | # spent 25µs (7+18) within MARC::File::USMARC::BEGIN@13 which was called:
# once (7µs+18µs) by C4::Biblio::BEGIN@28 at line 13 # spent 25µs making 1 call to MARC::File::USMARC::BEGIN@13
# spent 18µs making 1 call to vars::import |
14 | 2 | 698µs | 2 | 996µs | # spent 969µs (792+177) within MARC::File::USMARC::BEGIN@14 which was called:
# once (792µs+177µs) by C4::Biblio::BEGIN@28 at line 14 # spent 969µs making 1 call to MARC::File::USMARC::BEGIN@14
# spent 27µs making 1 call to Exporter::import |
15 | |||||
16 | 2 | 573µs | 1 | 1.19ms | # spent 1.19ms (1.11+79µs) within MARC::File::USMARC::BEGIN@16 which was called:
# once (1.11ms+79µs) by C4::Biblio::BEGIN@28 at line 16 # spent 1.19ms making 1 call to MARC::File::USMARC::BEGIN@16 |
17 | 3 | 36µs | 2 | 43µs | # spent 26µs (9+17) within MARC::File::USMARC::BEGIN@17 which was called:
# once (9µs+17µs) by C4::Biblio::BEGIN@28 at line 17 # spent 26µs making 1 call to MARC::File::USMARC::BEGIN@17
# spent 17µs making 1 call to vars::import |
18 | |||||
19 | 2 | 22µs | 2 | 57µs | # spent 32µs (7+25) within MARC::File::USMARC::BEGIN@19 which was called:
# once (7µs+25µs) by C4::Biblio::BEGIN@28 at line 19 # spent 32µs making 1 call to MARC::File::USMARC::BEGIN@19
# spent 25µs making 1 call to Exporter::import |
20 | 2 | 25µs | 1 | 5µs | # spent 5µs within MARC::File::USMARC::BEGIN@20 which was called:
# once (5µs+0s) by C4::Biblio::BEGIN@28 at line 20 # spent 5µs making 1 call to MARC::File::USMARC::BEGIN@20 |
21 | 2 | 28µs | 2 | 67µs | # spent 37µs (7+30) within MARC::File::USMARC::BEGIN@21 which was called:
# once (7µs+30µs) by C4::Biblio::BEGIN@28 at line 21 # spent 37µs making 1 call to MARC::File::USMARC::BEGIN@21
# spent 30µs making 1 call to constant::import |
22 | 2 | 24µs | 2 | 48µs | # spent 27µs (6+21) within MARC::File::USMARC::BEGIN@22 which was called:
# once (6µs+21µs) by C4::Biblio::BEGIN@28 at line 22 # spent 27µs making 1 call to MARC::File::USMARC::BEGIN@22
# spent 21µs making 1 call to constant::import |
23 | 2 | 22µs | 2 | 46µs | # spent 26µs (6+20) within MARC::File::USMARC::BEGIN@23 which was called:
# once (6µs+20µs) by C4::Biblio::BEGIN@28 at line 23 # spent 26µs making 1 call to MARC::File::USMARC::BEGIN@23
# spent 20µs making 1 call to constant::import |
24 | 2 | 986µs | 2 | 45µs | # spent 26µs (6+20) within MARC::File::USMARC::BEGIN@24 which was called:
# once (6µs+20µs) by C4::Biblio::BEGIN@28 at line 24 # spent 26µs making 1 call to MARC::File::USMARC::BEGIN@24
# spent 20µs making 1 call to constant::import |
25 | |||||
26 | =head1 SYNOPSIS | ||||
27 | |||||
28 | use MARC::File::USMARC; | ||||
29 | |||||
30 | my $file = MARC::File::USMARC->in( $filename ); | ||||
31 | |||||
32 | while ( my $marc = $file->next() ) { | ||||
33 | # Do something | ||||
34 | } | ||||
35 | $file->close(); | ||||
36 | undef $file; | ||||
37 | |||||
38 | =head1 EXPORT | ||||
39 | |||||
40 | None. | ||||
41 | |||||
42 | =head1 METHODS | ||||
43 | |||||
44 | =cut | ||||
45 | |||||
46 | sub _next { | ||||
47 | my $self = shift; | ||||
48 | my $fh = $self->{fh}; | ||||
49 | |||||
50 | my $reclen; | ||||
51 | return if eof($fh); | ||||
52 | |||||
53 | local $/ = END_OF_RECORD; | ||||
54 | my $usmarc = <$fh>; | ||||
55 | |||||
56 | # remove illegal garbage that sometimes occurs between records | ||||
57 | $usmarc =~ s/^[ \x00\x0a\x0d\x1a]+//; | ||||
58 | |||||
59 | return $usmarc; | ||||
60 | } | ||||
61 | |||||
62 | =head2 decode( $string [, \&filter_func ] ) | ||||
63 | |||||
64 | Constructor for handling data from a USMARC file. This function takes care of | ||||
65 | all the tag directory parsing & mangling. | ||||
66 | |||||
67 | Any warnings or coercions can be checked in the C<warnings()> function. | ||||
68 | |||||
69 | The C<$filter_func> is an optional reference to a user-supplied function | ||||
70 | that determines on a tag-by-tag basis if you want the tag passed to it | ||||
71 | to be put into the MARC record. The function is passed the tag number | ||||
72 | and the raw tag data, and must return a boolean. The return of a true | ||||
73 | value tells MARC::File::USMARC::decode that the tag should get put into | ||||
74 | the resulting MARC record. | ||||
75 | |||||
76 | For example, if you only want title and subject tags in your MARC record, | ||||
77 | try this: | ||||
78 | |||||
79 | sub filter { | ||||
80 | my ($tagno,$tagdata) = @_; | ||||
81 | |||||
82 | return ($tagno == 245) || ($tagno >= 600 && $tagno <= 699); | ||||
83 | } | ||||
84 | |||||
85 | my $marc = MARC::File::USMARC->decode( $string, \&filter ); | ||||
86 | |||||
87 | Why would you want to do such a thing? The big reason is that creating | ||||
88 | fields is processor-intensive, and if your program is doing read-only | ||||
89 | data analysis and needs to be as fast as possible, you can save time by | ||||
90 | not creating fields that you'll be ignoring anyway. | ||||
91 | |||||
92 | Another possible use is if you're only interested in printing certain | ||||
93 | tags from the record, then you can filter them when you read from disc | ||||
94 | and not have to delete unwanted tags yourself. | ||||
95 | |||||
96 | =cut | ||||
97 | |||||
98 | sub decode { | ||||
99 | |||||
100 | my $text; | ||||
101 | my $location = ''; | ||||
102 | |||||
103 | ## decode can be called in a variety of ways | ||||
104 | ## $object->decode( $string ) | ||||
105 | ## MARC::File::USMARC->decode( $string ) | ||||
106 | ## MARC::File::USMARC::decode( $string ) | ||||
107 | ## this bit of code covers all three | ||||
108 | |||||
109 | my $self = shift; | ||||
110 | if ( ref($self) =~ /^MARC::File/ ) { | ||||
111 | $location = 'in record '.$self->{recnum}; | ||||
112 | $text = shift; | ||||
113 | } else { | ||||
114 | $location = 'in record 1'; | ||||
115 | $text = $self=~/MARC::File/ ? shift : $self; | ||||
116 | } | ||||
117 | my $filter_func = shift; | ||||
118 | |||||
119 | # ok this the empty shell we will fill | ||||
120 | my $marc = MARC::Record->new(); | ||||
121 | |||||
122 | # Check for an all-numeric record length | ||||
123 | ($text =~ /^(\d{5})/) | ||||
124 | or return $marc->_warn( "Record length \"", substr( $text, 0, 5 ), "\" is not numeric $location" ); | ||||
125 | |||||
126 | my $reclen = $1; | ||||
127 | my $realLength = bytes::length( $text ); | ||||
128 | $marc->_warn( "Invalid record length $location: Leader says $reclen " . | ||||
129 | "bytes but it's actually $realLength" ) unless $reclen == $realLength; | ||||
130 | |||||
131 | (substr($text, -1, 1) eq END_OF_RECORD) | ||||
132 | or $marc->_warn( "Invalid record terminator $location" ); | ||||
133 | |||||
134 | $marc->leader( substr( $text, 0, LEADER_LEN ) ); | ||||
135 | |||||
136 | # bytes 12 - 16 of leader give offset to the body of the record | ||||
137 | my $data_start = 0 + bytes::substr( $text, 12, 5 ); | ||||
138 | |||||
139 | # immediately after the leader comes the directory (no separator) | ||||
140 | my $dir = substr( $text, LEADER_LEN, $data_start - LEADER_LEN - 1 ); # -1 to allow for \x1e at end of directory | ||||
141 | |||||
142 | # character after the directory must be \x1e | ||||
143 | (substr($text, $data_start-1, 1) eq END_OF_FIELD) | ||||
144 | or $marc->_warn( "No directory found $location" ); | ||||
145 | |||||
146 | # all directory entries 12 bytes long, so length % 12 must be 0 | ||||
147 | (length($dir) % DIRECTORY_ENTRY_LEN == 0) | ||||
148 | or $marc->_warn( "Invalid directory length $location" ); | ||||
149 | |||||
150 | |||||
151 | # go through all the fields | ||||
152 | my $nfields = length($dir)/DIRECTORY_ENTRY_LEN; | ||||
153 | for ( my $n = 0; $n < $nfields; $n++ ) { | ||||
154 | my ( $tagno, $len, $offset ) = unpack( "A3 A4 A5", substr($dir, $n*DIRECTORY_ENTRY_LEN, DIRECTORY_ENTRY_LEN) ); | ||||
155 | |||||
156 | # Check directory validity | ||||
157 | ($tagno =~ /^[0-9A-Za-z]{3}$/) | ||||
158 | or $marc->_warn( "Invalid tag in directory $location: \"$tagno\"" ); | ||||
159 | |||||
160 | ($len =~ /^\d{4}$/) | ||||
161 | or $marc->_warn( "Invalid length in directory $location tag $tagno: \"$len\"" ); | ||||
162 | |||||
163 | ($offset =~ /^\d{5}$/) | ||||
164 | or $marc->_warn( "Invalid offset in directory $location tag $tagno: \"$offset\"" ); | ||||
165 | |||||
166 | ($offset + $len <= $reclen) | ||||
167 | or $marc->_warn( "Directory entry $location runs off the end of the record tag $tagno" ); | ||||
168 | |||||
169 | my $tagdata = bytes::substr( $text, $data_start+$offset, $len ); | ||||
170 | |||||
171 | # if utf8 the we encode the string as utf8 | ||||
172 | if ( $marc->encoding() eq 'UTF-8' ) { | ||||
173 | $tagdata = marc_to_utf8( $tagdata ); | ||||
174 | } | ||||
175 | |||||
176 | $marc->_warn( "Invalid length in directory for tag $tagno $location" ) | ||||
177 | unless ( $len == bytes::length($tagdata) ); | ||||
178 | |||||
179 | if ( substr($tagdata, -1, 1) eq END_OF_FIELD ) { | ||||
180 | # get rid of the end-of-tag character | ||||
181 | chop $tagdata; | ||||
182 | --$len; | ||||
183 | } else { | ||||
184 | $marc->_warn( "field does not end in end of field character in tag $tagno $location" ); | ||||
185 | } | ||||
186 | |||||
187 | warn "Specs: ", join( "|", $tagno, $len, $offset, $tagdata ), "\n" if $MARC::Record::DEBUG; | ||||
188 | |||||
189 | if ( $filter_func ) { | ||||
190 | next unless $filter_func->( $tagno, $tagdata ); | ||||
191 | } | ||||
192 | |||||
193 | if ( MARC::Field->is_controlfield_tag($tagno) ) { | ||||
194 | $marc->append_fields( MARC::Field->new( $tagno, $tagdata ) ); | ||||
195 | } else { | ||||
196 | my @subfields = split( SUBFIELD_INDICATOR, $tagdata ); | ||||
197 | my $indicators = shift @subfields; | ||||
198 | my ($ind1, $ind2); | ||||
199 | |||||
200 | if ( length( $indicators ) > 2 or length( $indicators ) == 0 ) { | ||||
201 | $marc->_warn( "Invalid indicators \"$indicators\" forced to blanks $location for tag $tagno\n" ); | ||||
202 | ($ind1,$ind2) = (" ", " "); | ||||
203 | } else { | ||||
204 | $ind1 = substr( $indicators,0, 1 ); | ||||
205 | $ind2 = substr( $indicators,1, 1 ); | ||||
206 | } | ||||
207 | |||||
208 | # Split the subfield data into subfield name and data pairs | ||||
209 | my @subfield_data; | ||||
210 | for ( @subfields ) { | ||||
211 | if ( length > 0 ) { | ||||
212 | push( @subfield_data, substr($_,0,1),substr($_,1) ); | ||||
213 | } else { | ||||
214 | $marc->_warn( "Entirely empty subfield found in tag $tagno" ); | ||||
215 | } | ||||
216 | } | ||||
217 | |||||
218 | if ( !@subfield_data ) { | ||||
219 | $marc->_warn( "no subfield data found $location for tag $tagno" ); | ||||
220 | next; | ||||
221 | } | ||||
222 | |||||
223 | my $field = MARC::Field->new($tagno, $ind1, $ind2, @subfield_data ); | ||||
224 | if ( $field->warnings() ) { | ||||
225 | $marc->_warn( $field->warnings() ); | ||||
226 | } | ||||
227 | $marc->append_fields( $field ); | ||||
228 | } | ||||
229 | } # looping through all the fields | ||||
230 | |||||
231 | |||||
232 | return $marc; | ||||
233 | } | ||||
234 | |||||
235 | =head2 update_leader() | ||||
236 | |||||
237 | If any changes get made to the MARC record, the first 5 bytes of the | ||||
238 | leader (the length) will be invalid. This function updates the | ||||
239 | leader with the correct length of the record as it would be if | ||||
240 | written out to a file. | ||||
241 | |||||
242 | =cut | ||||
243 | |||||
244 | sub update_leader { | ||||
245 | my $self = shift; | ||||
246 | |||||
247 | my (undef,undef,$reclen,$baseaddress) = $self->_build_tag_directory(); | ||||
248 | |||||
249 | $self->_set_leader_lengths( $reclen, $baseaddress ); | ||||
250 | } | ||||
251 | |||||
252 | =head2 _build_tag_directory() | ||||
253 | |||||
254 | Function for internal use only: Builds the tag directory that gets | ||||
255 | put in front of the data in a MARC record. | ||||
256 | |||||
257 | Returns two array references, and two lengths: The tag directory, and the data fields themselves, | ||||
258 | the length of all data (including the Leader that we expect will be added), | ||||
259 | and the size of the Leader and tag directory. | ||||
260 | |||||
261 | =cut | ||||
262 | |||||
263 | sub _build_tag_directory { | ||||
264 | my $marc = shift; | ||||
265 | $marc = shift if (ref($marc)||$marc) =~ /^MARC::File/; | ||||
266 | die "Wanted a MARC::Record but got a ", ref($marc) unless ref($marc) eq "MARC::Record"; | ||||
267 | |||||
268 | my @fields; | ||||
269 | my @directory; | ||||
270 | |||||
271 | my $dataend = 0; | ||||
272 | for my $field ( $marc->fields() ) { | ||||
273 | # Dump data into proper format | ||||
274 | my $str = $field->as_usmarc; | ||||
275 | push( @fields, $str ); | ||||
276 | |||||
277 | # Create directory entry | ||||
278 | my $len = bytes::length( $str ); | ||||
279 | |||||
280 | my $direntry = sprintf( "%03s%04d%05d", $field->tag, $len, $dataend ); | ||||
281 | push( @directory, $direntry ); | ||||
282 | $dataend += $len; | ||||
283 | } | ||||
284 | |||||
285 | my $baseaddress = | ||||
286 | LEADER_LEN + # better be 24 | ||||
287 | ( @directory * DIRECTORY_ENTRY_LEN ) + | ||||
288 | # all the directory entries | ||||
289 | 1; # end-of-field marker | ||||
290 | |||||
291 | |||||
292 | my $total = | ||||
293 | $baseaddress + # stuff before first field | ||||
294 | $dataend + # Length of the fields | ||||
295 | 1; # End-of-record marker | ||||
296 | |||||
- - | |||||
299 | return (\@fields, \@directory, $total, $baseaddress); | ||||
300 | } | ||||
301 | |||||
302 | =head2 encode() | ||||
303 | |||||
304 | Returns a string of characters suitable for writing out to a USMARC file, | ||||
305 | including the leader, directory and all the fields. | ||||
306 | |||||
307 | =cut | ||||
308 | |||||
309 | sub encode { | ||||
310 | my $marc = shift; | ||||
311 | $marc = shift if (ref($marc)||$marc) =~ /^MARC::File/; | ||||
312 | |||||
313 | my ($fields,$directory,$reclen,$baseaddress) = _build_tag_directory($marc); | ||||
314 | $marc->set_leader_lengths( $reclen, $baseaddress ); | ||||
315 | |||||
316 | # Glomp it all together | ||||
317 | return join("",$marc->leader, @$directory, END_OF_FIELD, @$fields, END_OF_RECORD); | ||||
318 | } | ||||
319 | 1 | 2µs | 1; | ||
320 | |||||
321 | __END__ |