| Filename | /usr/share/perl5/MARC/File/XML.pm |
| Statements | Executed 32 statements in 3.49ms |
| Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
|---|---|---|---|---|---|
| 1 | 1 | 1 | 9.90ms | 31.5ms | MARC::File::XML::BEGIN@9 |
| 1 | 1 | 1 | 2.80ms | 44.5ms | MARC::File::XML::BEGIN@11 |
| 1 | 1 | 1 | 14µs | 19µs | MARC::File::XML::BEGIN@3 |
| 1 | 1 | 1 | 12µs | 12µs | MARC::File::XML::CORE:regcomp (opcode) |
| 1 | 1 | 1 | 12µs | 3.90ms | MARC::File::XML::BEGIN@12 |
| 1 | 1 | 1 | 10µs | 66µs | MARC::File::XML::BEGIN@6 |
| 1 | 1 | 1 | 7µs | 27µs | MARC::File::XML::BEGIN@13 |
| 1 | 1 | 1 | 7µs | 17µs | MARC::File::XML::BEGIN@4 |
| 1 | 1 | 1 | 7µs | 23µs | MARC::File::XML::BEGIN@7 |
| 1 | 1 | 1 | 6µs | 35µs | MARC::File::XML::BEGIN@5 |
| 1 | 1 | 1 | 4µs | 4µs | MARC::File::XML::BEGIN@8 |
| 1 | 1 | 1 | 3µs | 3µs | MARC::File::XML::BEGIN@14 |
| 1 | 1 | 1 | 3µs | 3µs | MARC::File::XML::import |
| 1 | 1 | 1 | 1µs | 1µs | MARC::File::XML::CORE:qr (opcode) |
| 0 | 0 | 0 | 0s | 0s | MARC::File::XML::DESTROY |
| 0 | 0 | 0 | 0s | 0s | MARC::File::XML::__ANON__[:417] |
| 0 | 0 | 0 | 0s | 0s | MARC::File::XML::_next |
| 0 | 0 | 0 | 0s | 0s | MARC::File::XML::_parser |
| 0 | 0 | 0 | 0s | 0s | MARC::File::XML::_unimarc_encoding |
| 0 | 0 | 0 | 0s | 0s | MARC::File::XML::close |
| 0 | 0 | 0 | 0s | 0s | MARC::File::XML::decideMARC8Binary |
| 0 | 0 | 0 | 0s | 0s | MARC::File::XML::decode |
| 0 | 0 | 0 | 0s | 0s | MARC::File::XML::default_record_format |
| 0 | 0 | 0 | 0s | 0s | MARC::File::XML::encode |
| 0 | 0 | 0 | 0s | 0s | MARC::File::XML::escape |
| 0 | 0 | 0 | 0s | 0s | MARC::File::XML::footer |
| 0 | 0 | 0 | 0s | 0s | MARC::File::XML::header |
| 0 | 0 | 0 | 0s | 0s | MARC::File::XML::out |
| 0 | 0 | 0 | 0s | 0s | MARC::File::XML::record |
| 0 | 0 | 0 | 0s | 0s | MARC::File::XML::set_parser |
| 0 | 0 | 0 | 0s | 0s | MARC::File::XML::write |
| 0 | 0 | 0 | 0s | 0s | MARC::Record::as_xml |
| 0 | 0 | 0 | 0s | 0s | MARC::Record::as_xml_record |
| 0 | 0 | 0 | 0s | 0s | MARC::Record::new_from_xml |
| Line | State ments |
Time on line |
Calls | Time in subs |
Code |
|---|---|---|---|---|---|
| 1 | package MARC::File::XML; | ||||
| 2 | |||||
| 3 | 2 | 24µs | 2 | 24µs | # spent 19µs (14+5) within MARC::File::XML::BEGIN@3 which was called:
# once (14µs+5µs) by C4::Biblio::BEGIN@29 at line 3 # spent 19µs making 1 call to MARC::File::XML::BEGIN@3
# spent 5µs making 1 call to warnings::import |
| 4 | 2 | 24µs | 2 | 26µs | # spent 17µs (7+10) within MARC::File::XML::BEGIN@4 which was called:
# once (7µs+10µs) by C4::Biblio::BEGIN@29 at line 4 # spent 17µs making 1 call to MARC::File::XML::BEGIN@4
# spent 10µs making 1 call to strict::import |
| 5 | 2 | 23µs | 2 | 65µs | # spent 35µs (6+30) within MARC::File::XML::BEGIN@5 which was called:
# once (6µs+30µs) by C4::Biblio::BEGIN@29 at line 5 # spent 35µs making 1 call to MARC::File::XML::BEGIN@5
# spent 30µs making 1 call to vars::import |
| 6 | 2 | 24µs | 2 | 123µs | # spent 66µs (10+57) within MARC::File::XML::BEGIN@6 which was called:
# once (10µs+57µs) by C4::Biblio::BEGIN@29 at line 6 # spent 66µs making 1 call to MARC::File::XML::BEGIN@6
# spent 57µs making 1 call to base::import |
| 7 | 2 | 21µs | 2 | 38µs | # spent 23µs (7+16) within MARC::File::XML::BEGIN@7 which was called:
# once (7µs+16µs) by C4::Biblio::BEGIN@29 at line 7 # spent 23µs making 1 call to MARC::File::XML::BEGIN@7
# spent 16µs making 1 call to Exporter::import |
| 8 | 2 | 18µs | 1 | 4µs | # spent 4µs within MARC::File::XML::BEGIN@8 which was called:
# once (4µs+0s) by C4::Biblio::BEGIN@29 at line 8 # spent 4µs making 1 call to MARC::File::XML::BEGIN@8 |
| 9 | 2 | 757µs | 2 | 31.6ms | # spent 31.5ms (9.90+21.6) within MARC::File::XML::BEGIN@9 which was called:
# once (9.90ms+21.6ms) by C4::Biblio::BEGIN@29 at line 9 # spent 31.5ms making 1 call to MARC::File::XML::BEGIN@9
# spent 119µs making 1 call to XML::LibXML::import |
| 10 | |||||
| 11 | 2 | 827µs | 2 | 44.6ms | # spent 44.5ms (2.80+41.7) within MARC::File::XML::BEGIN@11 which was called:
# once (2.80ms+41.7ms) by C4::Biblio::BEGIN@29 at line 11 # spent 44.5ms making 1 call to MARC::File::XML::BEGIN@11
# spent 34µs making 1 call to Exporter::import |
| 12 | 2 | 34µs | 2 | 7.80ms | # spent 3.90ms (12µs+3.89) within MARC::File::XML::BEGIN@12 which was called:
# once (12µs+3.89ms) by C4::Biblio::BEGIN@29 at line 12 # spent 3.90ms making 1 call to MARC::File::XML::BEGIN@12
# spent 3.89ms making 1 call to Exporter::import |
| 13 | 2 | 21µs | 2 | 47µs | # spent 27µs (7+20) within MARC::File::XML::BEGIN@13 which was called:
# once (7µs+20µs) by C4::Biblio::BEGIN@29 at line 13 # spent 27µs making 1 call to MARC::File::XML::BEGIN@13
# spent 20µs making 1 call to Exporter::import |
| 14 | 2 | 1.67ms | 1 | 3µs | # spent 3µs within MARC::File::XML::BEGIN@14 which was called:
# once (3µs+0s) by C4::Biblio::BEGIN@29 at line 14 # spent 3µs making 1 call to MARC::File::XML::BEGIN@14 |
| 15 | |||||
| 16 | 1 | 500ns | $VERSION = '1.0.3'; | ||
| 17 | |||||
| 18 | 1 | 100ns | our $parser; | ||
| 19 | |||||
| 20 | # spent 3µs within MARC::File::XML::import which was called:
# once (3µs+0s) by C4::Biblio::BEGIN@29 at line 29 of C4/Biblio.pm | ||||
| 21 | 1 | 300ns | my $class = shift; | ||
| 22 | 1 | 700ns | %_load_args = @_; | ||
| 23 | 1 | 600ns | $_load_args{ DefaultEncoding } ||= 'UTF-8'; | ||
| 24 | 1 | 3µs | $_load_args{ RecordFormat } ||= 'USMARC'; | ||
| 25 | } | ||||
| 26 | |||||
| 27 | =head1 NAME | ||||
| 28 | |||||
| 29 | MARC::File::XML - Work with MARC data encoded as XML | ||||
| 30 | |||||
| 31 | =head1 SYNOPSIS | ||||
| 32 | |||||
| 33 | ## Loading with USE options | ||||
| 34 | use MARC::File::XML ( BinaryEncoding => 'utf8', RecordFormat => 'UNIMARC' ); | ||||
| 35 | |||||
| 36 | ## Setting the record format without USE options | ||||
| 37 | MARC::File::XML->default_record_format('USMARC'); | ||||
| 38 | |||||
| 39 | ## reading with MARC::Batch | ||||
| 40 | my $batch = MARC::Batch->new( 'XML', $filename ); | ||||
| 41 | my $record = $batch->next(); | ||||
| 42 | |||||
| 43 | ## or reading with MARC::File::XML explicitly | ||||
| 44 | my $file = MARC::File::XML->in( $filename ); | ||||
| 45 | my $record = $file->next(); | ||||
| 46 | |||||
| 47 | ## serialize a single MARC::Record object as XML | ||||
| 48 | print $record->as_xml(); | ||||
| 49 | |||||
| 50 | ## write a bunch of records to a file | ||||
| 51 | my $file = MARC::File::XML->out( 'myfile.xml' ); | ||||
| 52 | $file->write( $record1 ); | ||||
| 53 | $file->write( $record2 ); | ||||
| 54 | $file->write( $record3 ); | ||||
| 55 | $file->close(); | ||||
| 56 | |||||
| 57 | ## instead of writing to disk, get the xml directly | ||||
| 58 | my $xml = join( "\n", | ||||
| 59 | MARC::File::XML::header(), | ||||
| 60 | MARC::File::XML::record( $record1 ), | ||||
| 61 | MARC::File::XML::record( $record2 ), | ||||
| 62 | MARC::File::XML::footer() | ||||
| 63 | ); | ||||
| 64 | |||||
| 65 | =head1 DESCRIPTION | ||||
| 66 | |||||
| 67 | The MARC-XML distribution is an extension to the MARC-Record distribution for | ||||
| 68 | working with MARC21 data that is encoded as XML. The XML encoding used is the | ||||
| 69 | MARC21slim schema supplied by the Library of Congress. More information may | ||||
| 70 | be obtained here: http://www.loc.gov/standards/marcxml/ | ||||
| 71 | |||||
| 72 | You must have MARC::Record installed to use MARC::File::XML. In fact | ||||
| 73 | once you install the MARC-XML distribution you will most likely not use it | ||||
| 74 | directly, but will have an additional file format available to you when you | ||||
| 75 | use MARC::Batch. | ||||
| 76 | |||||
| 77 | This version of MARC-XML supersedes an the versions ending with 0.25 which | ||||
| 78 | were used with the MARC.pm framework. MARC-XML now uses MARC::Record | ||||
| 79 | exclusively. | ||||
| 80 | |||||
| 81 | If you have any questions or would like to contribute to this module please | ||||
| 82 | sign on to the perl4lib list. More information about perl4lib is available | ||||
| 83 | at L<http://perl4lib.perl.org>. | ||||
| 84 | |||||
| 85 | =head1 METHODS | ||||
| 86 | |||||
| 87 | When you use MARC::File::XML your MARC::Record objects will have two new | ||||
| 88 | additional methods available to them: | ||||
| 89 | |||||
| 90 | =head2 MARC::File::XML->default_record_format([$format]) | ||||
| 91 | |||||
| 92 | Sets or returns the default record format used by MARC::File::XML. Valid | ||||
| 93 | formats are B<MARC21>, B<USMARC>, B<UNIMARC> and B<UNIMARCAUTH>. | ||||
| 94 | |||||
| 95 | MARC::File::XML->default_record_format('UNIMARC'); | ||||
| 96 | |||||
| 97 | =cut | ||||
| 98 | |||||
| 99 | sub default_record_format { | ||||
| 100 | my $self = shift; | ||||
| 101 | my $format = shift; | ||||
| 102 | |||||
| 103 | $_load_args{RecordFormat} = $format if ($format); | ||||
| 104 | |||||
| 105 | return $_load_args{RecordFormat}; | ||||
| 106 | } | ||||
| 107 | |||||
| 108 | |||||
| 109 | =head2 as_xml() | ||||
| 110 | |||||
| 111 | Returns a MARC::Record object serialized in XML. You can pass an optional format | ||||
| 112 | parameter to tell MARC::File::XML what type of record (USMARC, UNIMARC, UNIMARCAUTH) you are | ||||
| 113 | serializing. | ||||
| 114 | |||||
| 115 | print $record->as_xml([$format]); | ||||
| 116 | |||||
| 117 | =cut | ||||
| 118 | |||||
| 119 | sub MARC::Record::as_xml { | ||||
| 120 | my $record = shift; | ||||
| 121 | my $format = shift || $_load_args{RecordFormat}; | ||||
| 122 | return( MARC::File::XML::encode( $record, $format ) ); | ||||
| 123 | } | ||||
| 124 | |||||
| 125 | =head2 as_xml_record([$format]) | ||||
| 126 | |||||
| 127 | Returns a MARC::Record object serialized in XML without a collection wrapper. | ||||
| 128 | You can pass an optional format parameter to tell MARC::File::XML what type of | ||||
| 129 | record (USMARC, UNIMARC, UNIMARCAUTH) you are serializing. | ||||
| 130 | |||||
| 131 | print $record->as_xml_record('UNIMARC'); | ||||
| 132 | |||||
| 133 | =cut | ||||
| 134 | |||||
| 135 | sub MARC::Record::as_xml_record { | ||||
| 136 | my $record = shift; | ||||
| 137 | my $format = shift || $_load_args{RecordFormat}; | ||||
| 138 | return( MARC::File::XML::encode( $record, $format, 1 ) ); | ||||
| 139 | } | ||||
| 140 | |||||
| 141 | =head2 new_from_xml([$encoding, $format]) | ||||
| 142 | |||||
| 143 | If you have a chunk of XML and you want a record object for it you can use | ||||
| 144 | this method to generate a MARC::Record object. You can pass an optional | ||||
| 145 | encoding parameter to specify which encoding (UTF-8 or MARC-8) you would like | ||||
| 146 | the resulting record to be in. You can also pass a format parameter to specify | ||||
| 147 | the source record type, such as UNIMARC, UNIMARCAUTH, USMARC or MARC21. | ||||
| 148 | |||||
| 149 | my $record = MARC::Record->new_from_xml( $xml, $encoding, $format ); | ||||
| 150 | |||||
| 151 | Note: only works for single record XML chunks. | ||||
| 152 | |||||
| 153 | =cut | ||||
| 154 | |||||
| 155 | sub MARC::Record::new_from_xml { | ||||
| 156 | my $xml = shift; | ||||
| 157 | ## to allow calling as MARC::Record::new_from_xml() | ||||
| 158 | ## or MARC::Record->new_from_xml() | ||||
| 159 | $xml = shift if ( ref($xml) || ($xml eq "MARC::Record") ); | ||||
| 160 | |||||
| 161 | my $enc = shift || $_load_args{BinaryEncoding}; | ||||
| 162 | my $format = shift || $_load_args{RecordFormat}; | ||||
| 163 | return( MARC::File::XML::decode( $xml, $enc, $format ) ); | ||||
| 164 | } | ||||
| 165 | |||||
| 166 | =pod | ||||
| 167 | |||||
| 168 | If you want to write records as XML to a file you can use out() with write() | ||||
| 169 | to serialize more than one record as XML. | ||||
| 170 | |||||
| 171 | =head2 out() | ||||
| 172 | |||||
| 173 | A constructor for creating a MARC::File::XML object that can write XML to a | ||||
| 174 | file. You must pass in the name of a file to write XML to. If the $encoding | ||||
| 175 | parameter or the DefaultEncoding (see above) is set to UTF-8 then the binmode | ||||
| 176 | of the output file will be set appropriately. | ||||
| 177 | |||||
| 178 | my $file = MARC::File::XML->out( $filename [, $encoding] ); | ||||
| 179 | |||||
| 180 | =cut | ||||
| 181 | |||||
| 182 | sub out { | ||||
| 183 | my ( $class, $filename, $enc ) = @_; | ||||
| 184 | my $fh = IO::File->new( ">$filename" ) or croak( $! ); | ||||
| 185 | $enc ||= $_load_args{DefaultEncoding}; | ||||
| 186 | |||||
| 187 | if ($enc =~ /^utf-?8$/oi) { | ||||
| 188 | $fh->binmode(':utf8'); | ||||
| 189 | } else { | ||||
| 190 | $fh->binmode(':raw'); | ||||
| 191 | } | ||||
| 192 | |||||
| 193 | my %self = ( | ||||
| 194 | filename => $filename, | ||||
| 195 | fh => $fh, | ||||
| 196 | header => 0, | ||||
| 197 | encoding => $enc | ||||
| 198 | ); | ||||
| 199 | return( bless \%self, ref( $class ) || $class ); | ||||
| 200 | } | ||||
| 201 | |||||
| 202 | =head2 write() | ||||
| 203 | |||||
| 204 | Used in tandem with out() to write records to a file. | ||||
| 205 | |||||
| 206 | my $file = MARC::File::XML->out( $filename ); | ||||
| 207 | $file->write( $record1 ); | ||||
| 208 | $file->write( $record2 ); | ||||
| 209 | |||||
| 210 | =cut | ||||
| 211 | |||||
| 212 | sub write { | ||||
| 213 | my ( $self, $record, $enc ) = @_; | ||||
| 214 | if ( ! $self->{ fh } ) { | ||||
| 215 | croak( "MARC::File::XML object not open for writing" ); | ||||
| 216 | } | ||||
| 217 | if ( ! $record ) { | ||||
| 218 | croak( "must pass write() a MARC::Record object" ); | ||||
| 219 | } | ||||
| 220 | ## print the XML header if we haven't already | ||||
| 221 | if ( ! $self->{ header } ) { | ||||
| 222 | $enc ||= $self->{ encoding } || $_load_args{DefaultEncoding}; | ||||
| 223 | $self->{ fh }->print( header( $enc ) ); | ||||
| 224 | $self->{ header } = 1; | ||||
| 225 | } | ||||
| 226 | ## print out the record | ||||
| 227 | $self->{ fh }->print( record( $record ) ) || croak( $! ); | ||||
| 228 | return( 1 ); | ||||
| 229 | } | ||||
| 230 | |||||
| 231 | =head2 close() | ||||
| 232 | |||||
| 233 | When writing records to disk the filehandle is automatically closed when you | ||||
| 234 | the MARC::File::XML object goes out of scope. If you want to close it explicitly | ||||
| 235 | use the close() method. | ||||
| 236 | |||||
| 237 | =cut | ||||
| 238 | |||||
| 239 | sub close { | ||||
| 240 | my $self = shift; | ||||
| 241 | if ( $self->{ fh } ) { | ||||
| 242 | $self->{ fh }->print( footer() ) if $self->{ header }; | ||||
| 243 | $self->{ fh } = undef; | ||||
| 244 | $self->{ filename } = undef; | ||||
| 245 | $self->{ header } = undef; | ||||
| 246 | } | ||||
| 247 | return( 1 ); | ||||
| 248 | } | ||||
| 249 | |||||
| 250 | ## makes sure that the XML file is closed off | ||||
| 251 | |||||
| 252 | sub DESTROY { | ||||
| 253 | shift->close(); | ||||
| 254 | } | ||||
| 255 | |||||
| 256 | =pod | ||||
| 257 | |||||
| 258 | If you want to generate batches of records as XML, but don't want to write to | ||||
| 259 | disk you'll have to use header(), record() and footer() to generate the | ||||
| 260 | different portions. | ||||
| 261 | |||||
| 262 | $xml = join( "\n", | ||||
| 263 | MARC::File::XML::header(), | ||||
| 264 | MARC::File::XML::record( $record1 ), | ||||
| 265 | MARC::File::XML::record( $record2 ), | ||||
| 266 | MARC::File::XML::record( $record3 ), | ||||
| 267 | MARC::File::XML::footer() | ||||
| 268 | ); | ||||
| 269 | |||||
| 270 | =head2 header() | ||||
| 271 | |||||
| 272 | Returns a string of XML to use as the header to your XML file. | ||||
| 273 | |||||
| 274 | =cut | ||||
| 275 | |||||
| 276 | sub header { | ||||
| 277 | my $enc = shift; | ||||
| 278 | $enc = shift if ( $enc && (ref($enc) || ($enc eq "MARC::File::XML")) ); | ||||
| 279 | $enc ||= 'UTF-8'; | ||||
| 280 | return( <<MARC_XML_HEADER ); | ||||
| 281 | <?xml version="1.0" encoding="$enc"?> | ||||
| 282 | <collection | ||||
| 283 | xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | ||||
| 284 | xsi:schemaLocation="http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd" | ||||
| 285 | xmlns="http://www.loc.gov/MARC21/slim"> | ||||
| 286 | MARC_XML_HEADER | ||||
| 287 | } | ||||
| 288 | |||||
| 289 | =head2 footer() | ||||
| 290 | |||||
| 291 | Returns a string of XML to use at the end of your XML file. | ||||
| 292 | |||||
| 293 | =cut | ||||
| 294 | |||||
| 295 | sub footer { | ||||
| 296 | return( "</collection>" ); | ||||
| 297 | } | ||||
| 298 | |||||
| 299 | =head2 record() | ||||
| 300 | |||||
| 301 | Returns a chunk of XML suitable for placement between the header and the footer. | ||||
| 302 | |||||
| 303 | =cut | ||||
| 304 | |||||
| 305 | sub record { | ||||
| 306 | my $record = shift; | ||||
| 307 | my $format = shift; | ||||
| 308 | my $include_full_record_header = shift; | ||||
| 309 | my $enc = shift; | ||||
| 310 | |||||
| 311 | $format ||= $_load_args{RecordFormat}; | ||||
| 312 | |||||
| 313 | my $_transcode = 0; | ||||
| 314 | my $ldr = $record->leader; | ||||
| 315 | my $original_encoding = substr($ldr,9,1); | ||||
| 316 | |||||
| 317 | # Does the record think it is already Unicode? | ||||
| 318 | if ($original_encoding ne 'a' && lc($format) !~ /^unimarc/o) { | ||||
| 319 | # If not, we'll make it so | ||||
| 320 | $_transcode++; | ||||
| 321 | substr($ldr,9,1,'a'); | ||||
| 322 | $record->leader( $ldr ); | ||||
| 323 | } | ||||
| 324 | |||||
| 325 | my @xml = (); | ||||
| 326 | |||||
| 327 | if ($include_full_record_header) { | ||||
| 328 | push @xml, <<HEADER | ||||
| 329 | <?xml version="1.0" encoding="$enc"?> | ||||
| 330 | <record | ||||
| 331 | xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | ||||
| 332 | xsi:schemaLocation="http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd" | ||||
| 333 | xmlns="http://www.loc.gov/MARC21/slim"> | ||||
| 334 | HEADER | ||||
| 335 | |||||
| 336 | } else { | ||||
| 337 | push( @xml, "<record>" ); | ||||
| 338 | } | ||||
| 339 | |||||
| 340 | push( @xml, " <leader>" . escape( $record->leader ) . "</leader>" ); | ||||
| 341 | |||||
| 342 | foreach my $field ( $record->fields() ) { | ||||
| 343 | my ($tag) = escape( $field->tag() ); | ||||
| 344 | if ( $field->is_control_field() ) { | ||||
| 345 | my $data = $field->data; | ||||
| 346 | push( @xml, qq( <controlfield tag="$tag">) . | ||||
| 347 | escape( ($_transcode ? marc8_to_utf8($data) : $data) ). qq(</controlfield>) ); | ||||
| 348 | } else { | ||||
| 349 | my ($i1) = escape( $field->indicator( 1 ) ); | ||||
| 350 | my ($i2) = escape( $field->indicator( 2 ) ); | ||||
| 351 | push( @xml, qq( <datafield tag="$tag" ind1="$i1" ind2="$i2">) ); | ||||
| 352 | foreach my $subfield ( $field->subfields() ) { | ||||
| 353 | my ( $code, $data ) = ( escape( $$subfield[0] ), $$subfield[1] ); | ||||
| 354 | push( @xml, qq( <subfield code="$code">). | ||||
| 355 | escape( ($_transcode ? marc8_to_utf8($data) : $data) ).qq(</subfield>) ); | ||||
| 356 | } | ||||
| 357 | push( @xml, " </datafield>" ); | ||||
| 358 | } | ||||
| 359 | } | ||||
| 360 | push( @xml, "</record>\n" ); | ||||
| 361 | |||||
| 362 | if ($_transcode) { | ||||
| 363 | substr($ldr,9,1,$original_encoding); | ||||
| 364 | $record->leader( $ldr ); | ||||
| 365 | } | ||||
| 366 | |||||
| 367 | return( join( "\n", @xml ) ); | ||||
| 368 | } | ||||
| 369 | |||||
| 370 | 1 | 2µs | my %ESCAPES = ( | ||
| 371 | '&' => '&', | ||||
| 372 | '<' => '<', | ||||
| 373 | '>' => '>', | ||||
| 374 | ); | ||||
| 375 | 1 | 4µs | my $_base_escape_regex = join( '|', map { "\Q$_\E" } keys %ESCAPES ); | ||
| 376 | 1 | 22µs | 2 | 13µs | my $ESCAPE_REGEX = qr/$_base_escape_regex/; # spent 12µs making 1 call to MARC::File::XML::CORE:regcomp
# spent 1µs making 1 call to MARC::File::XML::CORE:qr |
| 377 | |||||
| 378 | sub escape { | ||||
| 379 | my $string = shift; | ||||
| 380 | return '' if ! defined $string or $string eq ''; | ||||
| 381 | $string =~ s/($ESCAPE_REGEX)/$ESCAPES{$1}/oge; | ||||
| 382 | return( $string ); | ||||
| 383 | } | ||||
| 384 | |||||
| 385 | sub _next { | ||||
| 386 | my $self = shift; | ||||
| 387 | my $fh = $self->{ fh }; | ||||
| 388 | |||||
| 389 | ## return undef at the end of the file | ||||
| 390 | return if eof($fh); | ||||
| 391 | |||||
| 392 | ## get a chunk of xml for a record | ||||
| 393 | local $/ = 'record>'; | ||||
| 394 | my $xml = <$fh>; | ||||
| 395 | |||||
| 396 | ## do we have enough? | ||||
| 397 | $xml .= <$fh> if $xml !~ m!</([^:]+:){0,1}record>$!; | ||||
| 398 | ## trim stuff before the start record element | ||||
| 399 | $xml =~ s/.*?<(([^:]+:){0,1})record.*?>/<$1record>/s; | ||||
| 400 | |||||
| 401 | ## return undef if there isn't a good chunk of xml | ||||
| 402 | return if ( $xml !~ m|<(([^:]+:){0,1})record>.*</\1record>|s ); | ||||
| 403 | |||||
| 404 | ## if we have a namespace prefix, restore the declaration | ||||
| 405 | if ($xml =~ /<([^:]+:)record>/) { | ||||
| 406 | $xml =~ s!<([^:]+):record>!<$1:record xmlns:$1="http://www.loc.gov/MARC21/slim">!; | ||||
| 407 | } | ||||
| 408 | |||||
| 409 | ## return the chunk of xml | ||||
| 410 | return( $xml ); | ||||
| 411 | } | ||||
| 412 | |||||
| 413 | sub _parser { | ||||
| 414 | $parser ||= XML::LibXML->new( | ||||
| 415 | ext_ent_handler => sub { | ||||
| 416 | die "External entities are not supported\n"; | ||||
| 417 | } | ||||
| 418 | ); | ||||
| 419 | return $parser; | ||||
| 420 | } | ||||
| 421 | |||||
| 422 | =head2 decode() | ||||
| 423 | |||||
| 424 | You probably don't ever want to call this method directly. If you do | ||||
| 425 | you should pass in a chunk of XML as the argument. | ||||
| 426 | |||||
| 427 | It is normally invoked by a call to next(), see L<MARC::Batch> or L<MARC::File>. | ||||
| 428 | |||||
| 429 | =cut | ||||
| 430 | |||||
| 431 | sub decode { | ||||
| 432 | my $self = shift; | ||||
| 433 | my $text; | ||||
| 434 | my $location = ''; | ||||
| 435 | |||||
| 436 | if ( ref($self) =~ /^MARC::File/ ) { | ||||
| 437 | $location = 'in record '.$self->{recnum}; | ||||
| 438 | $text = shift; | ||||
| 439 | } else { | ||||
| 440 | $location = 'in record 1'; | ||||
| 441 | $text = $self=~/MARC::File/ ? shift : $self; | ||||
| 442 | } | ||||
| 443 | |||||
| 444 | my $enc = shift || $_load_args{BinaryEncoding}; | ||||
| 445 | my $format = shift || $_load_args{RecordFormat}; | ||||
| 446 | |||||
| 447 | my $parser = _parser(); | ||||
| 448 | my $xml = $parser->parse_string($text); | ||||
| 449 | |||||
| 450 | my $root = $xml->documentElement; | ||||
| 451 | croak('MARCXML document has no root element') unless defined $root; | ||||
| 452 | if ($root->localname eq 'collection') { | ||||
| 453 | my @records = $root->getChildrenByLocalName('record'); | ||||
| 454 | croak('MARCXML document has no record element') unless @records; | ||||
| 455 | $root = $records[0]; | ||||
| 456 | } | ||||
| 457 | |||||
| 458 | my $rec = MARC::Record->new(); | ||||
| 459 | my @leaders = $root->getElementsByLocalName('leader'); | ||||
| 460 | my $transcode_to_marc8 = 0; | ||||
| 461 | if (@leaders) { | ||||
| 462 | my $leader = $leaders[0]->textContent; | ||||
| 463 | |||||
| 464 | # this bit is rather questionable | ||||
| 465 | $transcode_to_marc8 = substr($leader, 9, 1) eq 'a' && decideMARC8Binary($format, $enc) ? 1 : 0; | ||||
| 466 | substr($leader, 9, 1) = ' ' if $transcode_to_marc8; | ||||
| 467 | |||||
| 468 | $rec->leader($leader); | ||||
| 469 | } | ||||
| 470 | |||||
| 471 | my @fields = (); | ||||
| 472 | foreach my $elt ($root->getChildrenByLocalName('*')) { | ||||
| 473 | if ($elt->localname eq 'controlfield') { | ||||
| 474 | push @fields, MARC::Field->new($elt->getAttribute('tag'), $elt->textContent); | ||||
| 475 | } elsif ($elt->localname eq 'datafield') { | ||||
| 476 | my @sfs = (); | ||||
| 477 | foreach my $sfelt ($elt->getChildrenByLocalName('subfield')) { | ||||
| 478 | push @sfs, $sfelt->getAttribute('code'), | ||||
| 479 | $transcode_to_marc8 ? utf8_to_marc8($sfelt->textContent()) : $sfelt->textContent(); | ||||
| 480 | } | ||||
| 481 | push @fields, MARC::Field->new( | ||||
| 482 | $elt->getAttribute('tag'), | ||||
| 483 | $elt->getAttribute('ind1'), | ||||
| 484 | $elt->getAttribute('ind2'), | ||||
| 485 | @sfs | ||||
| 486 | ); | ||||
| 487 | } | ||||
| 488 | } | ||||
| 489 | $rec->append_fields(@fields); | ||||
| 490 | return $rec; | ||||
| 491 | |||||
| 492 | } | ||||
| 493 | |||||
| 494 | =head2 MARC::File::XML->set_parser($parser) | ||||
| 495 | |||||
| 496 | Pass a XML::LibXML parser to MARC::File::XML | ||||
| 497 | for it to use. This is optional, meant for | ||||
| 498 | use by applications that maintain a shared | ||||
| 499 | parser object or which require that external | ||||
| 500 | entities be processed. Note that the latter | ||||
| 501 | is a potential security risk; see | ||||
| 502 | L<https://www.owasp.org/index.php/XML_External_Entity_(XXE)_Processing>. | ||||
| 503 | |||||
| 504 | =cut | ||||
| 505 | |||||
| 506 | sub set_parser { | ||||
| 507 | my $self = shift; | ||||
| 508 | |||||
| 509 | $parser = shift; | ||||
| 510 | undef $parser unless ref($parser) =~ /XML::LibXML/; | ||||
| 511 | } | ||||
| 512 | |||||
| 513 | sub decideMARC8Binary { | ||||
| 514 | my $format = shift; | ||||
| 515 | my $enc = shift; | ||||
| 516 | |||||
| 517 | return 0 if (defined($format) && lc($format) =~ /^unimarc/o); | ||||
| 518 | return 0 if (defined($enc) && lc($enc) =~ /^utf-?8/o); | ||||
| 519 | return 1; | ||||
| 520 | } | ||||
| 521 | |||||
| 522 | |||||
| 523 | =head2 encode() | ||||
| 524 | |||||
| 525 | You probably want to use the as_xml() method on your MARC::Record object | ||||
| 526 | instead of calling this directly. But if you want to you just need to | ||||
| 527 | pass in the MARC::Record object you wish to encode as XML, and you will be | ||||
| 528 | returned the XML as a scalar. | ||||
| 529 | |||||
| 530 | =cut | ||||
| 531 | |||||
| 532 | sub encode { | ||||
| 533 | my $record = shift; | ||||
| 534 | my $format = shift || $_load_args{RecordFormat}; | ||||
| 535 | my $without_collection_header = shift; | ||||
| 536 | my $enc = shift || $_load_args{DefaultEncoding}; | ||||
| 537 | |||||
| 538 | if (lc($format) =~ /^unimarc/o) { | ||||
| 539 | $enc = _unimarc_encoding( $format => $record ); | ||||
| 540 | } | ||||
| 541 | |||||
| 542 | my @xml = (); | ||||
| 543 | push( @xml, header( $enc ) ) unless ($without_collection_header); | ||||
| 544 | # verbose, but naming the header output flags this way to avoid | ||||
| 545 | # the potential confusion identified in CPAN bug #34082 | ||||
| 546 | # http://rt.cpan.org/Public/Bug/Display.html?id=34082 | ||||
| 547 | my $include_full_record_header = ($without_collection_header) ? 1 : 0; | ||||
| 548 | push( @xml, record( $record, $format, $include_full_record_header, $enc ) ); | ||||
| 549 | push( @xml, footer() ) unless ($without_collection_header); | ||||
| 550 | |||||
| 551 | return( join( "\n", @xml ) ); | ||||
| 552 | } | ||||
| 553 | |||||
| 554 | sub _unimarc_encoding { | ||||
| 555 | my $f = shift; | ||||
| 556 | my $r = shift; | ||||
| 557 | |||||
| 558 | my $pos = 26; | ||||
| 559 | $pos = 13 if (lc($f) eq 'unimarcauth'); | ||||
| 560 | |||||
| 561 | my $enc = substr( $r->subfield(100 => 'a'), $pos, 2 ); | ||||
| 562 | |||||
| 563 | if ($enc eq '01' || $enc eq '03') { | ||||
| 564 | return 'ISO-8859-1'; | ||||
| 565 | } elsif ($enc eq '50') { | ||||
| 566 | return 'UTF-8'; | ||||
| 567 | } else { | ||||
| 568 | die "Unsupported UNIMARC character encoding [$enc] for XML output for $f; 100\$a -> " . $r->subfield(100 => 'a'); | ||||
| 569 | } | ||||
| 570 | } | ||||
| 571 | |||||
| 572 | =head1 TODO | ||||
| 573 | |||||
| 574 | =over 4 | ||||
| 575 | |||||
| 576 | =item * Support for callback filters in decode(). | ||||
| 577 | |||||
| 578 | =back | ||||
| 579 | |||||
| 580 | =head1 SEE ALSO | ||||
| 581 | |||||
| 582 | =over 4 | ||||
| 583 | |||||
| 584 | =item L<http://www.loc.gov/standards/marcxml/> | ||||
| 585 | |||||
| 586 | =item L<MARC::File::USMARC> | ||||
| 587 | |||||
| 588 | =item L<MARC::Batch> | ||||
| 589 | |||||
| 590 | =item L<MARC::Record> | ||||
| 591 | |||||
| 592 | =back | ||||
| 593 | |||||
| 594 | =head1 AUTHORS | ||||
| 595 | |||||
| 596 | =over 4 | ||||
| 597 | |||||
| 598 | =item * Ed Summers <ehs@pobox.com> | ||||
| 599 | |||||
| 600 | =back | ||||
| 601 | |||||
| 602 | =cut | ||||
| 603 | |||||
| 604 | 1 | 5µs | 1; | ||
# spent 1µs within MARC::File::XML::CORE:qr which was called:
# once (1µs+0s) by C4::Biblio::BEGIN@29 at line 376 | |||||
# spent 12µs within MARC::File::XML::CORE:regcomp which was called:
# once (12µs+0s) by C4::Biblio::BEGIN@29 at line 376 |