← Index
NYTProf Performance Profile   « line view »
For svc/members/upsert
  Run on Tue Jan 13 11:50:22 2015
Reported on Tue Jan 13 12:09:48 2015

Filename/usr/share/perl5/MARC/File/USMARC.pm
StatementsExecuted 28 statements in 2.51ms
Subroutines
Calls P F Exclusive
Time
Inclusive
Time
Subroutine
1111.11ms1.19msMARC::File::USMARC::::BEGIN@16MARC::File::USMARC::BEGIN@16
111792µs969µsMARC::File::USMARC::::BEGIN@14MARC::File::USMARC::BEGIN@14
11111µs22µsMARC::File::USMARC::::BEGIN@9MARC::File::USMARC::BEGIN@9
1119µs26µsMARC::File::USMARC::::BEGIN@17MARC::File::USMARC::BEGIN@17
1117µs37µsMARC::File::USMARC::::BEGIN@21MARC::File::USMARC::BEGIN@21
1117µs25µsMARC::File::USMARC::::BEGIN@13MARC::File::USMARC::BEGIN@13
1117µs11µsMARC::File::USMARC::::BEGIN@10MARC::File::USMARC::BEGIN@10
1117µs8µsMARC::File::USMARC::::BEGIN@11MARC::File::USMARC::BEGIN@11
1117µs32µsMARC::File::USMARC::::BEGIN@19MARC::File::USMARC::BEGIN@19
1116µs26µsMARC::File::USMARC::::BEGIN@23MARC::File::USMARC::BEGIN@23
1116µs26µsMARC::File::USMARC::::BEGIN@24MARC::File::USMARC::BEGIN@24
1116µs27µsMARC::File::USMARC::::BEGIN@22MARC::File::USMARC::BEGIN@22
1115µs5µsMARC::File::USMARC::::BEGIN@20MARC::File::USMARC::BEGIN@20
0000s0sMARC::File::USMARC::::_build_tag_directoryMARC::File::USMARC::_build_tag_directory
0000s0sMARC::File::USMARC::::_nextMARC::File::USMARC::_next
0000s0sMARC::File::USMARC::::decodeMARC::File::USMARC::decode
0000s0sMARC::File::USMARC::::encodeMARC::File::USMARC::encode
0000s0sMARC::File::USMARC::::update_leaderMARC::File::USMARC::update_leader
Call graph for these subroutines as a Graphviz dot language file.
Line State
ments
Time
on line
Calls Time
in subs
Code
1package MARC::File::USMARC;
2
3=head1 NAME
4
5MARC::File::USMARC - USMARC-specific file handling
6
7=cut
8
9224µs233µs
# spent 22µs (11+11) within MARC::File::USMARC::BEGIN@9 which was called: # once (11µs+11µs) by C4::Biblio::BEGIN@28 at line 9
use strict;
# spent 22µs making 1 call to MARC::File::USMARC::BEGIN@9 # spent 11µs making 1 call to strict::import
10220µs216µs
# spent 11µs (7+4) within MARC::File::USMARC::BEGIN@10 which was called: # once (7µs+4µs) by C4::Biblio::BEGIN@28 at line 10
use warnings;
# spent 11µs making 1 call to MARC::File::USMARC::BEGIN@10 # spent 4µs making 1 call to warnings::import
11226µs210µs
# spent 8µs (7+1) within MARC::File::USMARC::BEGIN@11 which was called: # once (7µs+1µs) by C4::Biblio::BEGIN@28 at line 11
use integer;
# spent 8µs making 1 call to MARC::File::USMARC::BEGIN@11 # spent 1µs making 1 call to integer::import
12
13223µs243µs
# spent 25µs (7+18) within MARC::File::USMARC::BEGIN@13 which was called: # once (7µs+18µs) by C4::Biblio::BEGIN@28 at line 13
use vars qw( $ERROR );
# spent 25µs making 1 call to MARC::File::USMARC::BEGIN@13 # spent 18µs making 1 call to vars::import
142698µs2996µs
# spent 969µs (792+177) within MARC::File::USMARC::BEGIN@14 which was called: # once (792µs+177µs) by C4::Biblio::BEGIN@28 at line 14
use MARC::File::Encode qw( marc_to_utf8 );
# spent 969µs making 1 call to MARC::File::USMARC::BEGIN@14 # spent 27µs making 1 call to Exporter::import
15
162573µs11.19ms
# spent 1.19ms (1.11+79µs) within MARC::File::USMARC::BEGIN@16 which was called: # once (1.11ms+79µs) by C4::Biblio::BEGIN@28 at line 16
use MARC::File;
# spent 1.19ms making 1 call to MARC::File::USMARC::BEGIN@16
17336µs243µs
# spent 26µs (9+17) within MARC::File::USMARC::BEGIN@17 which was called: # once (9µs+17µs) by C4::Biblio::BEGIN@28 at line 17
use vars qw( @ISA ); @ISA = qw( MARC::File );
# spent 26µs making 1 call to MARC::File::USMARC::BEGIN@17 # spent 17µs making 1 call to vars::import
18
19222µs257µs
# spent 32µs (7+25) within MARC::File::USMARC::BEGIN@19 which was called: # once (7µs+25µs) by C4::Biblio::BEGIN@28 at line 19
use MARC::Record qw( LEADER_LEN );
# spent 32µs making 1 call to MARC::File::USMARC::BEGIN@19 # spent 25µs making 1 call to Exporter::import
20225µs15µs
# spent 5µs within MARC::File::USMARC::BEGIN@20 which was called: # once (5µs+0s) by C4::Biblio::BEGIN@28 at line 20
use MARC::Field;
# spent 5µs making 1 call to MARC::File::USMARC::BEGIN@20
21228µs267µs
# spent 37µs (7+30) within MARC::File::USMARC::BEGIN@21 which was called: # once (7µs+30µs) by C4::Biblio::BEGIN@28 at line 21
use constant SUBFIELD_INDICATOR => "\x1F";
# spent 37µs making 1 call to MARC::File::USMARC::BEGIN@21 # spent 30µs making 1 call to constant::import
22224µs248µs
# spent 27µs (6+21) within MARC::File::USMARC::BEGIN@22 which was called: # once (6µs+21µs) by C4::Biblio::BEGIN@28 at line 22
use constant END_OF_FIELD => "\x1E";
# spent 27µs making 1 call to MARC::File::USMARC::BEGIN@22 # spent 21µs making 1 call to constant::import
23222µs246µs
# spent 26µs (6+20) within MARC::File::USMARC::BEGIN@23 which was called: # once (6µs+20µs) by C4::Biblio::BEGIN@28 at line 23
use constant END_OF_RECORD => "\x1D";
# spent 26µs making 1 call to MARC::File::USMARC::BEGIN@23 # spent 20µs making 1 call to constant::import
242986µs245µs
# spent 26µs (6+20) within MARC::File::USMARC::BEGIN@24 which was called: # once (6µs+20µs) by C4::Biblio::BEGIN@28 at line 24
use constant DIRECTORY_ENTRY_LEN => 12;
# spent 26µs making 1 call to MARC::File::USMARC::BEGIN@24 # spent 20µs making 1 call to constant::import
25
26=head1 SYNOPSIS
27
28 use MARC::File::USMARC;
29
30 my $file = MARC::File::USMARC->in( $filename );
31
32 while ( my $marc = $file->next() ) {
33 # Do something
34 }
35 $file->close();
36 undef $file;
37
38=head1 EXPORT
39
40None.
41
42=head1 METHODS
43
44=cut
45
46sub _next {
47 my $self = shift;
48 my $fh = $self->{fh};
49
50 my $reclen;
51 return if eof($fh);
52
53 local $/ = END_OF_RECORD;
54 my $usmarc = <$fh>;
55
56 # remove illegal garbage that sometimes occurs between records
57 $usmarc =~ s/^[ \x00\x0a\x0d\x1a]+//;
58
59 return $usmarc;
60}
61
62=head2 decode( $string [, \&filter_func ] )
63
64Constructor for handling data from a USMARC file. This function takes care of
65all the tag directory parsing & mangling.
66
67Any warnings or coercions can be checked in the C<warnings()> function.
68
69The C<$filter_func> is an optional reference to a user-supplied function
70that determines on a tag-by-tag basis if you want the tag passed to it
71to be put into the MARC record. The function is passed the tag number
72and the raw tag data, and must return a boolean. The return of a true
73value tells MARC::File::USMARC::decode that the tag should get put into
74the resulting MARC record.
75
76For example, if you only want title and subject tags in your MARC record,
77try this:
78
79 sub filter {
80 my ($tagno,$tagdata) = @_;
81
82 return ($tagno == 245) || ($tagno >= 600 && $tagno <= 699);
83 }
84
85 my $marc = MARC::File::USMARC->decode( $string, \&filter );
86
87Why would you want to do such a thing? The big reason is that creating
88fields is processor-intensive, and if your program is doing read-only
89data analysis and needs to be as fast as possible, you can save time by
90not creating fields that you'll be ignoring anyway.
91
92Another possible use is if you're only interested in printing certain
93tags from the record, then you can filter them when you read from disc
94and not have to delete unwanted tags yourself.
95
96=cut
97
98sub decode {
99
100 my $text;
101 my $location = '';
102
103 ## decode can be called in a variety of ways
104 ## $object->decode( $string )
105 ## MARC::File::USMARC->decode( $string )
106 ## MARC::File::USMARC::decode( $string )
107 ## this bit of code covers all three
108
109 my $self = shift;
110 if ( ref($self) =~ /^MARC::File/ ) {
111 $location = 'in record '.$self->{recnum};
112 $text = shift;
113 } else {
114 $location = 'in record 1';
115 $text = $self=~/MARC::File/ ? shift : $self;
116 }
117 my $filter_func = shift;
118
119 # ok this the empty shell we will fill
120 my $marc = MARC::Record->new();
121
122 # Check for an all-numeric record length
123 ($text =~ /^(\d{5})/)
124 or return $marc->_warn( "Record length \"", substr( $text, 0, 5 ), "\" is not numeric $location" );
125
126 my $reclen = $1;
127 my $realLength = bytes::length( $text );
128 $marc->_warn( "Invalid record length $location: Leader says $reclen " .
129 "bytes but it's actually $realLength" ) unless $reclen == $realLength;
130
131 (substr($text, -1, 1) eq END_OF_RECORD)
132 or $marc->_warn( "Invalid record terminator $location" );
133
134 $marc->leader( substr( $text, 0, LEADER_LEN ) );
135
136 # bytes 12 - 16 of leader give offset to the body of the record
137 my $data_start = 0 + bytes::substr( $text, 12, 5 );
138
139 # immediately after the leader comes the directory (no separator)
140 my $dir = substr( $text, LEADER_LEN, $data_start - LEADER_LEN - 1 ); # -1 to allow for \x1e at end of directory
141
142 # character after the directory must be \x1e
143 (substr($text, $data_start-1, 1) eq END_OF_FIELD)
144 or $marc->_warn( "No directory found $location" );
145
146 # all directory entries 12 bytes long, so length % 12 must be 0
147 (length($dir) % DIRECTORY_ENTRY_LEN == 0)
148 or $marc->_warn( "Invalid directory length $location" );
149
150
151 # go through all the fields
152 my $nfields = length($dir)/DIRECTORY_ENTRY_LEN;
153 for ( my $n = 0; $n < $nfields; $n++ ) {
154 my ( $tagno, $len, $offset ) = unpack( "A3 A4 A5", substr($dir, $n*DIRECTORY_ENTRY_LEN, DIRECTORY_ENTRY_LEN) );
155
156 # Check directory validity
157 ($tagno =~ /^[0-9A-Za-z]{3}$/)
158 or $marc->_warn( "Invalid tag in directory $location: \"$tagno\"" );
159
160 ($len =~ /^\d{4}$/)
161 or $marc->_warn( "Invalid length in directory $location tag $tagno: \"$len\"" );
162
163 ($offset =~ /^\d{5}$/)
164 or $marc->_warn( "Invalid offset in directory $location tag $tagno: \"$offset\"" );
165
166 ($offset + $len <= $reclen)
167 or $marc->_warn( "Directory entry $location runs off the end of the record tag $tagno" );
168
169 my $tagdata = bytes::substr( $text, $data_start+$offset, $len );
170
171 # if utf8 the we encode the string as utf8
172 if ( $marc->encoding() eq 'UTF-8' ) {
173 $tagdata = marc_to_utf8( $tagdata );
174 }
175
176 $marc->_warn( "Invalid length in directory for tag $tagno $location" )
177 unless ( $len == bytes::length($tagdata) );
178
179 if ( substr($tagdata, -1, 1) eq END_OF_FIELD ) {
180 # get rid of the end-of-tag character
181 chop $tagdata;
182 --$len;
183 } else {
184 $marc->_warn( "field does not end in end of field character in tag $tagno $location" );
185 }
186
187 warn "Specs: ", join( "|", $tagno, $len, $offset, $tagdata ), "\n" if $MARC::Record::DEBUG;
188
189 if ( $filter_func ) {
190 next unless $filter_func->( $tagno, $tagdata );
191 }
192
193 if ( MARC::Field->is_controlfield_tag($tagno) ) {
194 $marc->append_fields( MARC::Field->new( $tagno, $tagdata ) );
195 } else {
196 my @subfields = split( SUBFIELD_INDICATOR, $tagdata );
197 my $indicators = shift @subfields;
198 my ($ind1, $ind2);
199
200 if ( length( $indicators ) > 2 or length( $indicators ) == 0 ) {
201 $marc->_warn( "Invalid indicators \"$indicators\" forced to blanks $location for tag $tagno\n" );
202 ($ind1,$ind2) = (" ", " ");
203 } else {
204 $ind1 = substr( $indicators,0, 1 );
205 $ind2 = substr( $indicators,1, 1 );
206 }
207
208 # Split the subfield data into subfield name and data pairs
209 my @subfield_data;
210 for ( @subfields ) {
211 if ( length > 0 ) {
212 push( @subfield_data, substr($_,0,1),substr($_,1) );
213 } else {
214 $marc->_warn( "Entirely empty subfield found in tag $tagno" );
215 }
216 }
217
218 if ( !@subfield_data ) {
219 $marc->_warn( "no subfield data found $location for tag $tagno" );
220 next;
221 }
222
223 my $field = MARC::Field->new($tagno, $ind1, $ind2, @subfield_data );
224 if ( $field->warnings() ) {
225 $marc->_warn( $field->warnings() );
226 }
227 $marc->append_fields( $field );
228 }
229 } # looping through all the fields
230
231
232 return $marc;
233}
234
235=head2 update_leader()
236
237If any changes get made to the MARC record, the first 5 bytes of the
238leader (the length) will be invalid. This function updates the
239leader with the correct length of the record as it would be if
240written out to a file.
241
242=cut
243
244sub update_leader {
245 my $self = shift;
246
247 my (undef,undef,$reclen,$baseaddress) = $self->_build_tag_directory();
248
249 $self->_set_leader_lengths( $reclen, $baseaddress );
250}
251
252=head2 _build_tag_directory()
253
254Function for internal use only: Builds the tag directory that gets
255put in front of the data in a MARC record.
256
257Returns two array references, and two lengths: The tag directory, and the data fields themselves,
258the length of all data (including the Leader that we expect will be added),
259and the size of the Leader and tag directory.
260
261=cut
262
263sub _build_tag_directory {
264 my $marc = shift;
265 $marc = shift if (ref($marc)||$marc) =~ /^MARC::File/;
266 die "Wanted a MARC::Record but got a ", ref($marc) unless ref($marc) eq "MARC::Record";
267
268 my @fields;
269 my @directory;
270
271 my $dataend = 0;
272 for my $field ( $marc->fields() ) {
273 # Dump data into proper format
274 my $str = $field->as_usmarc;
275 push( @fields, $str );
276
277 # Create directory entry
278 my $len = bytes::length( $str );
279
280 my $direntry = sprintf( "%03s%04d%05d", $field->tag, $len, $dataend );
281 push( @directory, $direntry );
282 $dataend += $len;
283 }
284
285 my $baseaddress =
286 LEADER_LEN + # better be 24
287 ( @directory * DIRECTORY_ENTRY_LEN ) +
288 # all the directory entries
289 1; # end-of-field marker
290
291
292 my $total =
293 $baseaddress + # stuff before first field
294 $dataend + # Length of the fields
295 1; # End-of-record marker
296
- -
299 return (\@fields, \@directory, $total, $baseaddress);
300}
301
302=head2 encode()
303
304Returns a string of characters suitable for writing out to a USMARC file,
305including the leader, directory and all the fields.
306
307=cut
308
309sub encode {
310 my $marc = shift;
311 $marc = shift if (ref($marc)||$marc) =~ /^MARC::File/;
312
313 my ($fields,$directory,$reclen,$baseaddress) = _build_tag_directory($marc);
314 $marc->set_leader_lengths( $reclen, $baseaddress );
315
316 # Glomp it all together
317 return join("",$marc->leader, @$directory, END_OF_FIELD, @$fields, END_OF_RECORD);
318}
31912µs1;
320
321__END__