Filename | /usr/share/perl5/MARC/File/XML.pm |
Statements | Executed 32 statements in 3.49ms |
Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
---|---|---|---|---|---|
1 | 1 | 1 | 9.90ms | 31.5ms | BEGIN@9 | MARC::File::XML::
1 | 1 | 1 | 2.80ms | 44.5ms | BEGIN@11 | MARC::File::XML::
1 | 1 | 1 | 14µs | 19µs | BEGIN@3 | MARC::File::XML::
1 | 1 | 1 | 12µs | 12µs | CORE:regcomp (opcode) | MARC::File::XML::
1 | 1 | 1 | 12µs | 3.90ms | BEGIN@12 | MARC::File::XML::
1 | 1 | 1 | 10µs | 66µs | BEGIN@6 | MARC::File::XML::
1 | 1 | 1 | 7µs | 27µs | BEGIN@13 | MARC::File::XML::
1 | 1 | 1 | 7µs | 17µs | BEGIN@4 | MARC::File::XML::
1 | 1 | 1 | 7µs | 23µs | BEGIN@7 | MARC::File::XML::
1 | 1 | 1 | 6µs | 35µs | BEGIN@5 | MARC::File::XML::
1 | 1 | 1 | 4µs | 4µs | BEGIN@8 | MARC::File::XML::
1 | 1 | 1 | 3µs | 3µs | BEGIN@14 | MARC::File::XML::
1 | 1 | 1 | 3µs | 3µs | import | MARC::File::XML::
1 | 1 | 1 | 1µs | 1µs | CORE:qr (opcode) | MARC::File::XML::
0 | 0 | 0 | 0s | 0s | DESTROY | MARC::File::XML::
0 | 0 | 0 | 0s | 0s | __ANON__[:417] | MARC::File::XML::
0 | 0 | 0 | 0s | 0s | _next | MARC::File::XML::
0 | 0 | 0 | 0s | 0s | _parser | MARC::File::XML::
0 | 0 | 0 | 0s | 0s | _unimarc_encoding | MARC::File::XML::
0 | 0 | 0 | 0s | 0s | close | MARC::File::XML::
0 | 0 | 0 | 0s | 0s | decideMARC8Binary | MARC::File::XML::
0 | 0 | 0 | 0s | 0s | decode | MARC::File::XML::
0 | 0 | 0 | 0s | 0s | default_record_format | MARC::File::XML::
0 | 0 | 0 | 0s | 0s | encode | MARC::File::XML::
0 | 0 | 0 | 0s | 0s | escape | MARC::File::XML::
0 | 0 | 0 | 0s | 0s | footer | MARC::File::XML::
0 | 0 | 0 | 0s | 0s | header | MARC::File::XML::
0 | 0 | 0 | 0s | 0s | out | MARC::File::XML::
0 | 0 | 0 | 0s | 0s | record | MARC::File::XML::
0 | 0 | 0 | 0s | 0s | set_parser | MARC::File::XML::
0 | 0 | 0 | 0s | 0s | write | MARC::File::XML::
0 | 0 | 0 | 0s | 0s | as_xml | MARC::Record::
0 | 0 | 0 | 0s | 0s | as_xml_record | MARC::Record::
0 | 0 | 0 | 0s | 0s | new_from_xml | MARC::Record::
Line | State ments |
Time on line |
Calls | Time in subs |
Code |
---|---|---|---|---|---|
1 | package MARC::File::XML; | ||||
2 | |||||
3 | 2 | 24µs | 2 | 24µs | # spent 19µs (14+5) within MARC::File::XML::BEGIN@3 which was called:
# once (14µs+5µs) by C4::Biblio::BEGIN@29 at line 3 # spent 19µs making 1 call to MARC::File::XML::BEGIN@3
# spent 5µs making 1 call to warnings::import |
4 | 2 | 24µs | 2 | 26µs | # spent 17µs (7+10) within MARC::File::XML::BEGIN@4 which was called:
# once (7µs+10µs) by C4::Biblio::BEGIN@29 at line 4 # spent 17µs making 1 call to MARC::File::XML::BEGIN@4
# spent 10µs making 1 call to strict::import |
5 | 2 | 23µs | 2 | 65µs | # spent 35µs (6+30) within MARC::File::XML::BEGIN@5 which was called:
# once (6µs+30µs) by C4::Biblio::BEGIN@29 at line 5 # spent 35µs making 1 call to MARC::File::XML::BEGIN@5
# spent 30µs making 1 call to vars::import |
6 | 2 | 24µs | 2 | 123µs | # spent 66µs (10+57) within MARC::File::XML::BEGIN@6 which was called:
# once (10µs+57µs) by C4::Biblio::BEGIN@29 at line 6 # spent 66µs making 1 call to MARC::File::XML::BEGIN@6
# spent 57µs making 1 call to base::import |
7 | 2 | 21µs | 2 | 38µs | # spent 23µs (7+16) within MARC::File::XML::BEGIN@7 which was called:
# once (7µs+16µs) by C4::Biblio::BEGIN@29 at line 7 # spent 23µs making 1 call to MARC::File::XML::BEGIN@7
# spent 16µs making 1 call to Exporter::import |
8 | 2 | 18µs | 1 | 4µs | # spent 4µs within MARC::File::XML::BEGIN@8 which was called:
# once (4µs+0s) by C4::Biblio::BEGIN@29 at line 8 # spent 4µs making 1 call to MARC::File::XML::BEGIN@8 |
9 | 2 | 757µs | 2 | 31.6ms | # spent 31.5ms (9.90+21.6) within MARC::File::XML::BEGIN@9 which was called:
# once (9.90ms+21.6ms) by C4::Biblio::BEGIN@29 at line 9 # spent 31.5ms making 1 call to MARC::File::XML::BEGIN@9
# spent 119µs making 1 call to XML::LibXML::import |
10 | |||||
11 | 2 | 827µs | 2 | 44.6ms | # spent 44.5ms (2.80+41.7) within MARC::File::XML::BEGIN@11 which was called:
# once (2.80ms+41.7ms) by C4::Biblio::BEGIN@29 at line 11 # spent 44.5ms making 1 call to MARC::File::XML::BEGIN@11
# spent 34µs making 1 call to Exporter::import |
12 | 2 | 34µs | 2 | 7.80ms | # spent 3.90ms (12µs+3.89) within MARC::File::XML::BEGIN@12 which was called:
# once (12µs+3.89ms) by C4::Biblio::BEGIN@29 at line 12 # spent 3.90ms making 1 call to MARC::File::XML::BEGIN@12
# spent 3.89ms making 1 call to Exporter::import |
13 | 2 | 21µs | 2 | 47µs | # spent 27µs (7+20) within MARC::File::XML::BEGIN@13 which was called:
# once (7µs+20µs) by C4::Biblio::BEGIN@29 at line 13 # spent 27µs making 1 call to MARC::File::XML::BEGIN@13
# spent 20µs making 1 call to Exporter::import |
14 | 2 | 1.67ms | 1 | 3µs | # spent 3µs within MARC::File::XML::BEGIN@14 which was called:
# once (3µs+0s) by C4::Biblio::BEGIN@29 at line 14 # spent 3µs making 1 call to MARC::File::XML::BEGIN@14 |
15 | |||||
16 | 1 | 500ns | $VERSION = '1.0.3'; | ||
17 | |||||
18 | 1 | 100ns | our $parser; | ||
19 | |||||
20 | # spent 3µs within MARC::File::XML::import which was called:
# once (3µs+0s) by C4::Biblio::BEGIN@29 at line 29 of C4/Biblio.pm | ||||
21 | 1 | 300ns | my $class = shift; | ||
22 | 1 | 700ns | %_load_args = @_; | ||
23 | 1 | 600ns | $_load_args{ DefaultEncoding } ||= 'UTF-8'; | ||
24 | 1 | 3µs | $_load_args{ RecordFormat } ||= 'USMARC'; | ||
25 | } | ||||
26 | |||||
27 | =head1 NAME | ||||
28 | |||||
29 | MARC::File::XML - Work with MARC data encoded as XML | ||||
30 | |||||
31 | =head1 SYNOPSIS | ||||
32 | |||||
33 | ## Loading with USE options | ||||
34 | use MARC::File::XML ( BinaryEncoding => 'utf8', RecordFormat => 'UNIMARC' ); | ||||
35 | |||||
36 | ## Setting the record format without USE options | ||||
37 | MARC::File::XML->default_record_format('USMARC'); | ||||
38 | |||||
39 | ## reading with MARC::Batch | ||||
40 | my $batch = MARC::Batch->new( 'XML', $filename ); | ||||
41 | my $record = $batch->next(); | ||||
42 | |||||
43 | ## or reading with MARC::File::XML explicitly | ||||
44 | my $file = MARC::File::XML->in( $filename ); | ||||
45 | my $record = $file->next(); | ||||
46 | |||||
47 | ## serialize a single MARC::Record object as XML | ||||
48 | print $record->as_xml(); | ||||
49 | |||||
50 | ## write a bunch of records to a file | ||||
51 | my $file = MARC::File::XML->out( 'myfile.xml' ); | ||||
52 | $file->write( $record1 ); | ||||
53 | $file->write( $record2 ); | ||||
54 | $file->write( $record3 ); | ||||
55 | $file->close(); | ||||
56 | |||||
57 | ## instead of writing to disk, get the xml directly | ||||
58 | my $xml = join( "\n", | ||||
59 | MARC::File::XML::header(), | ||||
60 | MARC::File::XML::record( $record1 ), | ||||
61 | MARC::File::XML::record( $record2 ), | ||||
62 | MARC::File::XML::footer() | ||||
63 | ); | ||||
64 | |||||
65 | =head1 DESCRIPTION | ||||
66 | |||||
67 | The MARC-XML distribution is an extension to the MARC-Record distribution for | ||||
68 | working with MARC21 data that is encoded as XML. The XML encoding used is the | ||||
69 | MARC21slim schema supplied by the Library of Congress. More information may | ||||
70 | be obtained here: http://www.loc.gov/standards/marcxml/ | ||||
71 | |||||
72 | You must have MARC::Record installed to use MARC::File::XML. In fact | ||||
73 | once you install the MARC-XML distribution you will most likely not use it | ||||
74 | directly, but will have an additional file format available to you when you | ||||
75 | use MARC::Batch. | ||||
76 | |||||
77 | This version of MARC-XML supersedes an the versions ending with 0.25 which | ||||
78 | were used with the MARC.pm framework. MARC-XML now uses MARC::Record | ||||
79 | exclusively. | ||||
80 | |||||
81 | If you have any questions or would like to contribute to this module please | ||||
82 | sign on to the perl4lib list. More information about perl4lib is available | ||||
83 | at L<http://perl4lib.perl.org>. | ||||
84 | |||||
85 | =head1 METHODS | ||||
86 | |||||
87 | When you use MARC::File::XML your MARC::Record objects will have two new | ||||
88 | additional methods available to them: | ||||
89 | |||||
90 | =head2 MARC::File::XML->default_record_format([$format]) | ||||
91 | |||||
92 | Sets or returns the default record format used by MARC::File::XML. Valid | ||||
93 | formats are B<MARC21>, B<USMARC>, B<UNIMARC> and B<UNIMARCAUTH>. | ||||
94 | |||||
95 | MARC::File::XML->default_record_format('UNIMARC'); | ||||
96 | |||||
97 | =cut | ||||
98 | |||||
99 | sub default_record_format { | ||||
100 | my $self = shift; | ||||
101 | my $format = shift; | ||||
102 | |||||
103 | $_load_args{RecordFormat} = $format if ($format); | ||||
104 | |||||
105 | return $_load_args{RecordFormat}; | ||||
106 | } | ||||
107 | |||||
108 | |||||
109 | =head2 as_xml() | ||||
110 | |||||
111 | Returns a MARC::Record object serialized in XML. You can pass an optional format | ||||
112 | parameter to tell MARC::File::XML what type of record (USMARC, UNIMARC, UNIMARCAUTH) you are | ||||
113 | serializing. | ||||
114 | |||||
115 | print $record->as_xml([$format]); | ||||
116 | |||||
117 | =cut | ||||
118 | |||||
119 | sub MARC::Record::as_xml { | ||||
120 | my $record = shift; | ||||
121 | my $format = shift || $_load_args{RecordFormat}; | ||||
122 | return( MARC::File::XML::encode( $record, $format ) ); | ||||
123 | } | ||||
124 | |||||
125 | =head2 as_xml_record([$format]) | ||||
126 | |||||
127 | Returns a MARC::Record object serialized in XML without a collection wrapper. | ||||
128 | You can pass an optional format parameter to tell MARC::File::XML what type of | ||||
129 | record (USMARC, UNIMARC, UNIMARCAUTH) you are serializing. | ||||
130 | |||||
131 | print $record->as_xml_record('UNIMARC'); | ||||
132 | |||||
133 | =cut | ||||
134 | |||||
135 | sub MARC::Record::as_xml_record { | ||||
136 | my $record = shift; | ||||
137 | my $format = shift || $_load_args{RecordFormat}; | ||||
138 | return( MARC::File::XML::encode( $record, $format, 1 ) ); | ||||
139 | } | ||||
140 | |||||
141 | =head2 new_from_xml([$encoding, $format]) | ||||
142 | |||||
143 | If you have a chunk of XML and you want a record object for it you can use | ||||
144 | this method to generate a MARC::Record object. You can pass an optional | ||||
145 | encoding parameter to specify which encoding (UTF-8 or MARC-8) you would like | ||||
146 | the resulting record to be in. You can also pass a format parameter to specify | ||||
147 | the source record type, such as UNIMARC, UNIMARCAUTH, USMARC or MARC21. | ||||
148 | |||||
149 | my $record = MARC::Record->new_from_xml( $xml, $encoding, $format ); | ||||
150 | |||||
151 | Note: only works for single record XML chunks. | ||||
152 | |||||
153 | =cut | ||||
154 | |||||
155 | sub MARC::Record::new_from_xml { | ||||
156 | my $xml = shift; | ||||
157 | ## to allow calling as MARC::Record::new_from_xml() | ||||
158 | ## or MARC::Record->new_from_xml() | ||||
159 | $xml = shift if ( ref($xml) || ($xml eq "MARC::Record") ); | ||||
160 | |||||
161 | my $enc = shift || $_load_args{BinaryEncoding}; | ||||
162 | my $format = shift || $_load_args{RecordFormat}; | ||||
163 | return( MARC::File::XML::decode( $xml, $enc, $format ) ); | ||||
164 | } | ||||
165 | |||||
166 | =pod | ||||
167 | |||||
168 | If you want to write records as XML to a file you can use out() with write() | ||||
169 | to serialize more than one record as XML. | ||||
170 | |||||
171 | =head2 out() | ||||
172 | |||||
173 | A constructor for creating a MARC::File::XML object that can write XML to a | ||||
174 | file. You must pass in the name of a file to write XML to. If the $encoding | ||||
175 | parameter or the DefaultEncoding (see above) is set to UTF-8 then the binmode | ||||
176 | of the output file will be set appropriately. | ||||
177 | |||||
178 | my $file = MARC::File::XML->out( $filename [, $encoding] ); | ||||
179 | |||||
180 | =cut | ||||
181 | |||||
182 | sub out { | ||||
183 | my ( $class, $filename, $enc ) = @_; | ||||
184 | my $fh = IO::File->new( ">$filename" ) or croak( $! ); | ||||
185 | $enc ||= $_load_args{DefaultEncoding}; | ||||
186 | |||||
187 | if ($enc =~ /^utf-?8$/oi) { | ||||
188 | $fh->binmode(':utf8'); | ||||
189 | } else { | ||||
190 | $fh->binmode(':raw'); | ||||
191 | } | ||||
192 | |||||
193 | my %self = ( | ||||
194 | filename => $filename, | ||||
195 | fh => $fh, | ||||
196 | header => 0, | ||||
197 | encoding => $enc | ||||
198 | ); | ||||
199 | return( bless \%self, ref( $class ) || $class ); | ||||
200 | } | ||||
201 | |||||
202 | =head2 write() | ||||
203 | |||||
204 | Used in tandem with out() to write records to a file. | ||||
205 | |||||
206 | my $file = MARC::File::XML->out( $filename ); | ||||
207 | $file->write( $record1 ); | ||||
208 | $file->write( $record2 ); | ||||
209 | |||||
210 | =cut | ||||
211 | |||||
212 | sub write { | ||||
213 | my ( $self, $record, $enc ) = @_; | ||||
214 | if ( ! $self->{ fh } ) { | ||||
215 | croak( "MARC::File::XML object not open for writing" ); | ||||
216 | } | ||||
217 | if ( ! $record ) { | ||||
218 | croak( "must pass write() a MARC::Record object" ); | ||||
219 | } | ||||
220 | ## print the XML header if we haven't already | ||||
221 | if ( ! $self->{ header } ) { | ||||
222 | $enc ||= $self->{ encoding } || $_load_args{DefaultEncoding}; | ||||
223 | $self->{ fh }->print( header( $enc ) ); | ||||
224 | $self->{ header } = 1; | ||||
225 | } | ||||
226 | ## print out the record | ||||
227 | $self->{ fh }->print( record( $record ) ) || croak( $! ); | ||||
228 | return( 1 ); | ||||
229 | } | ||||
230 | |||||
231 | =head2 close() | ||||
232 | |||||
233 | When writing records to disk the filehandle is automatically closed when you | ||||
234 | the MARC::File::XML object goes out of scope. If you want to close it explicitly | ||||
235 | use the close() method. | ||||
236 | |||||
237 | =cut | ||||
238 | |||||
239 | sub close { | ||||
240 | my $self = shift; | ||||
241 | if ( $self->{ fh } ) { | ||||
242 | $self->{ fh }->print( footer() ) if $self->{ header }; | ||||
243 | $self->{ fh } = undef; | ||||
244 | $self->{ filename } = undef; | ||||
245 | $self->{ header } = undef; | ||||
246 | } | ||||
247 | return( 1 ); | ||||
248 | } | ||||
249 | |||||
250 | ## makes sure that the XML file is closed off | ||||
251 | |||||
252 | sub DESTROY { | ||||
253 | shift->close(); | ||||
254 | } | ||||
255 | |||||
256 | =pod | ||||
257 | |||||
258 | If you want to generate batches of records as XML, but don't want to write to | ||||
259 | disk you'll have to use header(), record() and footer() to generate the | ||||
260 | different portions. | ||||
261 | |||||
262 | $xml = join( "\n", | ||||
263 | MARC::File::XML::header(), | ||||
264 | MARC::File::XML::record( $record1 ), | ||||
265 | MARC::File::XML::record( $record2 ), | ||||
266 | MARC::File::XML::record( $record3 ), | ||||
267 | MARC::File::XML::footer() | ||||
268 | ); | ||||
269 | |||||
270 | =head2 header() | ||||
271 | |||||
272 | Returns a string of XML to use as the header to your XML file. | ||||
273 | |||||
274 | =cut | ||||
275 | |||||
276 | sub header { | ||||
277 | my $enc = shift; | ||||
278 | $enc = shift if ( $enc && (ref($enc) || ($enc eq "MARC::File::XML")) ); | ||||
279 | $enc ||= 'UTF-8'; | ||||
280 | return( <<MARC_XML_HEADER ); | ||||
281 | <?xml version="1.0" encoding="$enc"?> | ||||
282 | <collection | ||||
283 | xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | ||||
284 | xsi:schemaLocation="http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd" | ||||
285 | xmlns="http://www.loc.gov/MARC21/slim"> | ||||
286 | MARC_XML_HEADER | ||||
287 | } | ||||
288 | |||||
289 | =head2 footer() | ||||
290 | |||||
291 | Returns a string of XML to use at the end of your XML file. | ||||
292 | |||||
293 | =cut | ||||
294 | |||||
295 | sub footer { | ||||
296 | return( "</collection>" ); | ||||
297 | } | ||||
298 | |||||
299 | =head2 record() | ||||
300 | |||||
301 | Returns a chunk of XML suitable for placement between the header and the footer. | ||||
302 | |||||
303 | =cut | ||||
304 | |||||
305 | sub record { | ||||
306 | my $record = shift; | ||||
307 | my $format = shift; | ||||
308 | my $include_full_record_header = shift; | ||||
309 | my $enc = shift; | ||||
310 | |||||
311 | $format ||= $_load_args{RecordFormat}; | ||||
312 | |||||
313 | my $_transcode = 0; | ||||
314 | my $ldr = $record->leader; | ||||
315 | my $original_encoding = substr($ldr,9,1); | ||||
316 | |||||
317 | # Does the record think it is already Unicode? | ||||
318 | if ($original_encoding ne 'a' && lc($format) !~ /^unimarc/o) { | ||||
319 | # If not, we'll make it so | ||||
320 | $_transcode++; | ||||
321 | substr($ldr,9,1,'a'); | ||||
322 | $record->leader( $ldr ); | ||||
323 | } | ||||
324 | |||||
325 | my @xml = (); | ||||
326 | |||||
327 | if ($include_full_record_header) { | ||||
328 | push @xml, <<HEADER | ||||
329 | <?xml version="1.0" encoding="$enc"?> | ||||
330 | <record | ||||
331 | xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | ||||
332 | xsi:schemaLocation="http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd" | ||||
333 | xmlns="http://www.loc.gov/MARC21/slim"> | ||||
334 | HEADER | ||||
335 | |||||
336 | } else { | ||||
337 | push( @xml, "<record>" ); | ||||
338 | } | ||||
339 | |||||
340 | push( @xml, " <leader>" . escape( $record->leader ) . "</leader>" ); | ||||
341 | |||||
342 | foreach my $field ( $record->fields() ) { | ||||
343 | my ($tag) = escape( $field->tag() ); | ||||
344 | if ( $field->is_control_field() ) { | ||||
345 | my $data = $field->data; | ||||
346 | push( @xml, qq( <controlfield tag="$tag">) . | ||||
347 | escape( ($_transcode ? marc8_to_utf8($data) : $data) ). qq(</controlfield>) ); | ||||
348 | } else { | ||||
349 | my ($i1) = escape( $field->indicator( 1 ) ); | ||||
350 | my ($i2) = escape( $field->indicator( 2 ) ); | ||||
351 | push( @xml, qq( <datafield tag="$tag" ind1="$i1" ind2="$i2">) ); | ||||
352 | foreach my $subfield ( $field->subfields() ) { | ||||
353 | my ( $code, $data ) = ( escape( $$subfield[0] ), $$subfield[1] ); | ||||
354 | push( @xml, qq( <subfield code="$code">). | ||||
355 | escape( ($_transcode ? marc8_to_utf8($data) : $data) ).qq(</subfield>) ); | ||||
356 | } | ||||
357 | push( @xml, " </datafield>" ); | ||||
358 | } | ||||
359 | } | ||||
360 | push( @xml, "</record>\n" ); | ||||
361 | |||||
362 | if ($_transcode) { | ||||
363 | substr($ldr,9,1,$original_encoding); | ||||
364 | $record->leader( $ldr ); | ||||
365 | } | ||||
366 | |||||
367 | return( join( "\n", @xml ) ); | ||||
368 | } | ||||
369 | |||||
370 | 1 | 2µs | my %ESCAPES = ( | ||
371 | '&' => '&', | ||||
372 | '<' => '<', | ||||
373 | '>' => '>', | ||||
374 | ); | ||||
375 | 1 | 4µs | my $_base_escape_regex = join( '|', map { "\Q$_\E" } keys %ESCAPES ); | ||
376 | 1 | 22µs | 2 | 13µs | my $ESCAPE_REGEX = qr/$_base_escape_regex/; # spent 12µs making 1 call to MARC::File::XML::CORE:regcomp
# spent 1µs making 1 call to MARC::File::XML::CORE:qr |
377 | |||||
378 | sub escape { | ||||
379 | my $string = shift; | ||||
380 | return '' if ! defined $string or $string eq ''; | ||||
381 | $string =~ s/($ESCAPE_REGEX)/$ESCAPES{$1}/oge; | ||||
382 | return( $string ); | ||||
383 | } | ||||
384 | |||||
385 | sub _next { | ||||
386 | my $self = shift; | ||||
387 | my $fh = $self->{ fh }; | ||||
388 | |||||
389 | ## return undef at the end of the file | ||||
390 | return if eof($fh); | ||||
391 | |||||
392 | ## get a chunk of xml for a record | ||||
393 | local $/ = 'record>'; | ||||
394 | my $xml = <$fh>; | ||||
395 | |||||
396 | ## do we have enough? | ||||
397 | $xml .= <$fh> if $xml !~ m!</([^:]+:){0,1}record>$!; | ||||
398 | ## trim stuff before the start record element | ||||
399 | $xml =~ s/.*?<(([^:]+:){0,1})record.*?>/<$1record>/s; | ||||
400 | |||||
401 | ## return undef if there isn't a good chunk of xml | ||||
402 | return if ( $xml !~ m|<(([^:]+:){0,1})record>.*</\1record>|s ); | ||||
403 | |||||
404 | ## if we have a namespace prefix, restore the declaration | ||||
405 | if ($xml =~ /<([^:]+:)record>/) { | ||||
406 | $xml =~ s!<([^:]+):record>!<$1:record xmlns:$1="http://www.loc.gov/MARC21/slim">!; | ||||
407 | } | ||||
408 | |||||
409 | ## return the chunk of xml | ||||
410 | return( $xml ); | ||||
411 | } | ||||
412 | |||||
413 | sub _parser { | ||||
414 | $parser ||= XML::LibXML->new( | ||||
415 | ext_ent_handler => sub { | ||||
416 | die "External entities are not supported\n"; | ||||
417 | } | ||||
418 | ); | ||||
419 | return $parser; | ||||
420 | } | ||||
421 | |||||
422 | =head2 decode() | ||||
423 | |||||
424 | You probably don't ever want to call this method directly. If you do | ||||
425 | you should pass in a chunk of XML as the argument. | ||||
426 | |||||
427 | It is normally invoked by a call to next(), see L<MARC::Batch> or L<MARC::File>. | ||||
428 | |||||
429 | =cut | ||||
430 | |||||
431 | sub decode { | ||||
432 | my $self = shift; | ||||
433 | my $text; | ||||
434 | my $location = ''; | ||||
435 | |||||
436 | if ( ref($self) =~ /^MARC::File/ ) { | ||||
437 | $location = 'in record '.$self->{recnum}; | ||||
438 | $text = shift; | ||||
439 | } else { | ||||
440 | $location = 'in record 1'; | ||||
441 | $text = $self=~/MARC::File/ ? shift : $self; | ||||
442 | } | ||||
443 | |||||
444 | my $enc = shift || $_load_args{BinaryEncoding}; | ||||
445 | my $format = shift || $_load_args{RecordFormat}; | ||||
446 | |||||
447 | my $parser = _parser(); | ||||
448 | my $xml = $parser->parse_string($text); | ||||
449 | |||||
450 | my $root = $xml->documentElement; | ||||
451 | croak('MARCXML document has no root element') unless defined $root; | ||||
452 | if ($root->localname eq 'collection') { | ||||
453 | my @records = $root->getChildrenByLocalName('record'); | ||||
454 | croak('MARCXML document has no record element') unless @records; | ||||
455 | $root = $records[0]; | ||||
456 | } | ||||
457 | |||||
458 | my $rec = MARC::Record->new(); | ||||
459 | my @leaders = $root->getElementsByLocalName('leader'); | ||||
460 | my $transcode_to_marc8 = 0; | ||||
461 | if (@leaders) { | ||||
462 | my $leader = $leaders[0]->textContent; | ||||
463 | |||||
464 | # this bit is rather questionable | ||||
465 | $transcode_to_marc8 = substr($leader, 9, 1) eq 'a' && decideMARC8Binary($format, $enc) ? 1 : 0; | ||||
466 | substr($leader, 9, 1) = ' ' if $transcode_to_marc8; | ||||
467 | |||||
468 | $rec->leader($leader); | ||||
469 | } | ||||
470 | |||||
471 | my @fields = (); | ||||
472 | foreach my $elt ($root->getChildrenByLocalName('*')) { | ||||
473 | if ($elt->localname eq 'controlfield') { | ||||
474 | push @fields, MARC::Field->new($elt->getAttribute('tag'), $elt->textContent); | ||||
475 | } elsif ($elt->localname eq 'datafield') { | ||||
476 | my @sfs = (); | ||||
477 | foreach my $sfelt ($elt->getChildrenByLocalName('subfield')) { | ||||
478 | push @sfs, $sfelt->getAttribute('code'), | ||||
479 | $transcode_to_marc8 ? utf8_to_marc8($sfelt->textContent()) : $sfelt->textContent(); | ||||
480 | } | ||||
481 | push @fields, MARC::Field->new( | ||||
482 | $elt->getAttribute('tag'), | ||||
483 | $elt->getAttribute('ind1'), | ||||
484 | $elt->getAttribute('ind2'), | ||||
485 | @sfs | ||||
486 | ); | ||||
487 | } | ||||
488 | } | ||||
489 | $rec->append_fields(@fields); | ||||
490 | return $rec; | ||||
491 | |||||
492 | } | ||||
493 | |||||
494 | =head2 MARC::File::XML->set_parser($parser) | ||||
495 | |||||
496 | Pass a XML::LibXML parser to MARC::File::XML | ||||
497 | for it to use. This is optional, meant for | ||||
498 | use by applications that maintain a shared | ||||
499 | parser object or which require that external | ||||
500 | entities be processed. Note that the latter | ||||
501 | is a potential security risk; see | ||||
502 | L<https://www.owasp.org/index.php/XML_External_Entity_(XXE)_Processing>. | ||||
503 | |||||
504 | =cut | ||||
505 | |||||
506 | sub set_parser { | ||||
507 | my $self = shift; | ||||
508 | |||||
509 | $parser = shift; | ||||
510 | undef $parser unless ref($parser) =~ /XML::LibXML/; | ||||
511 | } | ||||
512 | |||||
513 | sub decideMARC8Binary { | ||||
514 | my $format = shift; | ||||
515 | my $enc = shift; | ||||
516 | |||||
517 | return 0 if (defined($format) && lc($format) =~ /^unimarc/o); | ||||
518 | return 0 if (defined($enc) && lc($enc) =~ /^utf-?8/o); | ||||
519 | return 1; | ||||
520 | } | ||||
521 | |||||
522 | |||||
523 | =head2 encode() | ||||
524 | |||||
525 | You probably want to use the as_xml() method on your MARC::Record object | ||||
526 | instead of calling this directly. But if you want to you just need to | ||||
527 | pass in the MARC::Record object you wish to encode as XML, and you will be | ||||
528 | returned the XML as a scalar. | ||||
529 | |||||
530 | =cut | ||||
531 | |||||
532 | sub encode { | ||||
533 | my $record = shift; | ||||
534 | my $format = shift || $_load_args{RecordFormat}; | ||||
535 | my $without_collection_header = shift; | ||||
536 | my $enc = shift || $_load_args{DefaultEncoding}; | ||||
537 | |||||
538 | if (lc($format) =~ /^unimarc/o) { | ||||
539 | $enc = _unimarc_encoding( $format => $record ); | ||||
540 | } | ||||
541 | |||||
542 | my @xml = (); | ||||
543 | push( @xml, header( $enc ) ) unless ($without_collection_header); | ||||
544 | # verbose, but naming the header output flags this way to avoid | ||||
545 | # the potential confusion identified in CPAN bug #34082 | ||||
546 | # http://rt.cpan.org/Public/Bug/Display.html?id=34082 | ||||
547 | my $include_full_record_header = ($without_collection_header) ? 1 : 0; | ||||
548 | push( @xml, record( $record, $format, $include_full_record_header, $enc ) ); | ||||
549 | push( @xml, footer() ) unless ($without_collection_header); | ||||
550 | |||||
551 | return( join( "\n", @xml ) ); | ||||
552 | } | ||||
553 | |||||
554 | sub _unimarc_encoding { | ||||
555 | my $f = shift; | ||||
556 | my $r = shift; | ||||
557 | |||||
558 | my $pos = 26; | ||||
559 | $pos = 13 if (lc($f) eq 'unimarcauth'); | ||||
560 | |||||
561 | my $enc = substr( $r->subfield(100 => 'a'), $pos, 2 ); | ||||
562 | |||||
563 | if ($enc eq '01' || $enc eq '03') { | ||||
564 | return 'ISO-8859-1'; | ||||
565 | } elsif ($enc eq '50') { | ||||
566 | return 'UTF-8'; | ||||
567 | } else { | ||||
568 | die "Unsupported UNIMARC character encoding [$enc] for XML output for $f; 100\$a -> " . $r->subfield(100 => 'a'); | ||||
569 | } | ||||
570 | } | ||||
571 | |||||
572 | =head1 TODO | ||||
573 | |||||
574 | =over 4 | ||||
575 | |||||
576 | =item * Support for callback filters in decode(). | ||||
577 | |||||
578 | =back | ||||
579 | |||||
580 | =head1 SEE ALSO | ||||
581 | |||||
582 | =over 4 | ||||
583 | |||||
584 | =item L<http://www.loc.gov/standards/marcxml/> | ||||
585 | |||||
586 | =item L<MARC::File::USMARC> | ||||
587 | |||||
588 | =item L<MARC::Batch> | ||||
589 | |||||
590 | =item L<MARC::Record> | ||||
591 | |||||
592 | =back | ||||
593 | |||||
594 | =head1 AUTHORS | ||||
595 | |||||
596 | =over 4 | ||||
597 | |||||
598 | =item * Ed Summers <ehs@pobox.com> | ||||
599 | |||||
600 | =back | ||||
601 | |||||
602 | =cut | ||||
603 | |||||
604 | 1 | 5µs | 1; | ||
# spent 1µs within MARC::File::XML::CORE:qr which was called:
# once (1µs+0s) by C4::Biblio::BEGIN@29 at line 376 | |||||
# spent 12µs within MARC::File::XML::CORE:regcomp which was called:
# once (12µs+0s) by C4::Biblio::BEGIN@29 at line 376 |