1# $Id: LibXML.pm,v 1.1.1.2 2007/10/10 23:04:13 ahuda Exp $ 2 3package XML::LibXML; 4 5use strict; 6use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS 7 $skipDTD $skipXMLDeclaration $setTagCompression 8 $MatchCB $ReadCB $OpenCB $CloseCB 9 ); 10use Carp; 11 12use XML::LibXML::Common qw(:encoding :libxml); 13 14use constant XML_XMLNS_NS => 'http://www.w3.org/2000/xmlns/'; 15use constant XML_XML_NS => 'http://www.w3.org/XML/1998/namespace'; 16 17use XML::LibXML::NodeList; 18use XML::LibXML::XPathContext; 19use IO::Handle; # for FH reads called as methods 20 21BEGIN { 22 23$VERSION = "1.65"; # VERSION TEMPLATE: DO NOT CHANGE 24require Exporter; 25require DynaLoader; 26@ISA = qw(DynaLoader Exporter); 27 28#-------------------------------------------------------------------------# 29# export information # 30#-------------------------------------------------------------------------# 31%EXPORT_TAGS = ( 32 all => [qw( 33 XML_ELEMENT_NODE 34 XML_ATTRIBUTE_NODE 35 XML_TEXT_NODE 36 XML_CDATA_SECTION_NODE 37 XML_ENTITY_REF_NODE 38 XML_ENTITY_NODE 39 XML_PI_NODE 40 XML_COMMENT_NODE 41 XML_DOCUMENT_NODE 42 XML_DOCUMENT_TYPE_NODE 43 XML_DOCUMENT_FRAG_NODE 44 XML_NOTATION_NODE 45 XML_HTML_DOCUMENT_NODE 46 XML_DTD_NODE 47 XML_ELEMENT_DECL 48 XML_ATTRIBUTE_DECL 49 XML_ENTITY_DECL 50 XML_NAMESPACE_DECL 51 XML_XINCLUDE_END 52 XML_XINCLUDE_START 53 encodeToUTF8 54 decodeFromUTF8 55 XML_XMLNS_NS 56 XML_XML_NS 57 )], 58 libxml => [qw( 59 XML_ELEMENT_NODE 60 XML_ATTRIBUTE_NODE 61 XML_TEXT_NODE 62 XML_CDATA_SECTION_NODE 63 XML_ENTITY_REF_NODE 64 XML_ENTITY_NODE 65 XML_PI_NODE 66 XML_COMMENT_NODE 67 XML_DOCUMENT_NODE 68 XML_DOCUMENT_TYPE_NODE 69 XML_DOCUMENT_FRAG_NODE 70 XML_NOTATION_NODE 71 XML_HTML_DOCUMENT_NODE 72 XML_DTD_NODE 73 XML_ELEMENT_DECL 74 XML_ATTRIBUTE_DECL 75 XML_ENTITY_DECL 76 XML_NAMESPACE_DECL 77 XML_XINCLUDE_END 78 XML_XINCLUDE_START 79 )], 80 encoding => [qw( 81 encodeToUTF8 82 decodeFromUTF8 83 )], 84 ns => [qw( 85 XML_XMLNS_NS 86 XML_XML_NS 87 )], 88 ); 89 90@EXPORT_OK = ( 91 @{$EXPORT_TAGS{all}}, 92 ); 93 94@EXPORT = ( 95 @{$EXPORT_TAGS{all}}, 96 ); 97 98#-------------------------------------------------------------------------# 99# initialization of the global variables # 100#-------------------------------------------------------------------------# 101$skipDTD = 0; 102$skipXMLDeclaration = 0; 103$setTagCompression = 0; 104 105$MatchCB = undef; 106$ReadCB = undef; 107$OpenCB = undef; 108$CloseCB = undef; 109 110#-------------------------------------------------------------------------# 111# bootstrapping # 112#-------------------------------------------------------------------------# 113bootstrap XML::LibXML $VERSION; 114undef &AUTOLOAD; 115 116} # BEGIN 117 118#-------------------------------------------------------------------------# 119# test exact version (up to patch-level) # 120#-------------------------------------------------------------------------# 121{ 122 my ($runtime_version) = LIBXML_RUNTIME_VERSION() =~ /^(\d+)/; 123 if ( $runtime_version < LIBXML_VERSION ) { 124 warn "Warning: XML::LibXML compiled against libxml2 ".LIBXML_VERSION. 125 ", but runtime libxml2 is older $runtime_version\n"; 126 } 127} 128 129#-------------------------------------------------------------------------# 130# parser constructor # 131#-------------------------------------------------------------------------# 132sub new { 133 my $class = shift; 134 my %options = @_; 135 if ( not exists $options{XML_LIBXML_KEEP_BLANKS} ) { 136 $options{XML_LIBXML_KEEP_BLANKS} = 1; 137 } 138 139 if ( defined $options{catalog} ) { 140 $class->load_catalog( $options{catalog} ); 141 delete $options{catalog}; 142 } 143 144 my $self = bless \%options, $class; 145 if ( defined $options{Handler} ) { 146 $self->set_handler( $options{Handler} ); 147 } 148 149 $self->{XML_LIBXML_EXT_DTD} = 1; 150 $self->{_State_} = 0; 151 return $self; 152} 153 154#-------------------------------------------------------------------------# 155# DOM Level 2 document constructor # 156#-------------------------------------------------------------------------# 157 158sub createDocument { 159 my $self = shift; 160 if (!@_ or $_[0] =~ m/^\d\.\d$/) { 161 # for backward compatibility 162 return XML::LibXML::Document->new(@_); 163 } 164 else { 165 # DOM API: createDocument(namespaceURI, qualifiedName, doctype?) 166 my $doc = XML::LibXML::Document-> new; 167 my $el = $doc->createElementNS(shift, shift); 168 $doc->setDocumentElement($el); 169 $doc->setExternalSubset(shift) if @_; 170 return $doc; 171 } 172} 173 174#-------------------------------------------------------------------------# 175# callback functions # 176#-------------------------------------------------------------------------# 177 178sub input_callbacks { 179 my $self = shift; 180 my $icbclass = shift; 181 182 if ( defined $icbclass ) { 183 $self->{XML_LIBXML_CALLBACK_STACK} = $icbclass; 184 } 185 return $self->{XML_LIBXML_CALLBACK_STACK}; 186} 187 188sub match_callback { 189 my $self = shift; 190 if ( ref $self ) { 191 if ( scalar @_ ) { 192 $self->{XML_LIBXML_MATCH_CB} = shift; 193 $self->{XML_LIBXML_CALLBACK_STACK} = undef; 194 } 195 return $self->{XML_LIBXML_MATCH_CB}; 196 } 197 else { 198 $MatchCB = shift if scalar @_; 199 return $MatchCB; 200 } 201} 202 203sub read_callback { 204 my $self = shift; 205 if ( ref $self ) { 206 if ( scalar @_ ) { 207 $self->{XML_LIBXML_READ_CB} = shift; 208 $self->{XML_LIBXML_CALLBACK_STACK} = undef; 209 } 210 return $self->{XML_LIBXML_READ_CB}; 211 } 212 else { 213 $ReadCB = shift if scalar @_; 214 return $ReadCB; 215 } 216} 217 218sub close_callback { 219 my $self = shift; 220 if ( ref $self ) { 221 if ( scalar @_ ) { 222 $self->{XML_LIBXML_CLOSE_CB} = shift; 223 $self->{XML_LIBXML_CALLBACK_STACK} = undef; 224 } 225 return $self->{XML_LIBXML_CLOSE_CB}; 226 } 227 else { 228 $CloseCB = shift if scalar @_; 229 return $CloseCB; 230 } 231} 232 233sub open_callback { 234 my $self = shift; 235 if ( ref $self ) { 236 if ( scalar @_ ) { 237 $self->{XML_LIBXML_OPEN_CB} = shift; 238 $self->{XML_LIBXML_CALLBACK_STACK} = undef; 239 } 240 return $self->{XML_LIBXML_OPEN_CB}; 241 } 242 else { 243 $OpenCB = shift if scalar @_; 244 return $OpenCB; 245 } 246} 247 248sub callbacks { 249 my $self = shift; 250 if ( ref $self ) { 251 if (@_) { 252 my ($match, $open, $read, $close) = @_; 253 @{$self}{qw(XML_LIBXML_MATCH_CB XML_LIBXML_OPEN_CB XML_LIBXML_READ_CB XML_LIBXML_CLOSE_CB)} = ($match, $open, $read, $close); 254 $self->{XML_LIBXML_CALLBACK_STACK} = undef; 255 } 256 else { 257 return @{$self}{qw(XML_LIBXML_MATCH_CB XML_LIBXML_OPEN_CB XML_LIBXML_READ_CB XML_LIBXML_CLOSE_CB)}; 258 } 259 } 260 else { 261 if (@_) { 262 ( $MatchCB, $OpenCB, $ReadCB, $CloseCB ) = @_; 263 } 264 else { 265 return ( $MatchCB, $OpenCB, $ReadCB, $CloseCB ); 266 } 267 } 268} 269 270#-------------------------------------------------------------------------# 271# member variable manipulation # 272#-------------------------------------------------------------------------# 273sub validation { 274 my $self = shift; 275 $self->{XML_LIBXML_VALIDATION} = shift if scalar @_; 276 return $self->{XML_LIBXML_VALIDATION}; 277} 278 279sub recover { 280 my $self = shift; 281 $self->{XML_LIBXML_RECOVER} = shift if scalar @_; 282 return $self->{XML_LIBXML_RECOVER}; 283} 284 285sub recover_silently { 286 my $self = shift; 287 my $arg = shift; 288 (($arg == 1) ? $self->recover(2) : $self->recover($arg)) if defined($arg); 289 return ($self->recover() == 2) ? 1 : 0; 290} 291 292sub expand_entities { 293 my $self = shift; 294 $self->{XML_LIBXML_EXPAND_ENTITIES} = shift if scalar @_; 295 return $self->{XML_LIBXML_EXPAND_ENTITIES}; 296} 297 298sub keep_blanks { 299 my $self = shift; 300 $self->{XML_LIBXML_KEEP_BLANKS} = shift if scalar @_; 301 return $self->{XML_LIBXML_KEEP_BLANKS}; 302} 303 304sub pedantic_parser { 305 my $self = shift; 306 $self->{XML_LIBXML_PEDANTIC} = shift if scalar @_; 307 return $self->{XML_LIBXML_PEDANTIC}; 308} 309 310sub line_numbers { 311 my $self = shift; 312 $self->{XML_LIBXML_LINENUMBERS} = shift if scalar @_; 313 return $self->{XML_LIBXML_LINENUMBERS}; 314} 315 316sub no_network { 317 my $self = shift; 318 $self->{XML_LIBXML_NONET} = shift if scalar @_; 319 return $self->{XML_LIBXML_NONET}; 320} 321 322sub load_ext_dtd { 323 my $self = shift; 324 $self->{XML_LIBXML_EXT_DTD} = shift if scalar @_; 325 return $self->{XML_LIBXML_EXT_DTD}; 326} 327 328sub complete_attributes { 329 my $self = shift; 330 $self->{XML_LIBXML_COMPLETE_ATTR} = shift if scalar @_; 331 return $self->{XML_LIBXML_COMPLETE_ATTR}; 332} 333 334sub expand_xinclude { 335 my $self = shift; 336 $self->{XML_LIBXML_EXPAND_XINCLUDE} = shift if scalar @_; 337 return $self->{XML_LIBXML_EXPAND_XINCLUDE}; 338} 339 340sub base_uri { 341 my $self = shift; 342 $self->{XML_LIBXML_BASE_URI} = shift if scalar @_; 343 return $self->{XML_LIBXML_BASE_URI}; 344} 345 346sub gdome_dom { 347 my $self = shift; 348 $self->{XML_LIBXML_GDOME} = shift if scalar @_; 349 return $self->{XML_LIBXML_GDOME}; 350} 351 352sub clean_namespaces { 353 my $self = shift; 354 $self->{XML_LIBXML_NSCLEAN} = shift if scalar @_; 355 return $self->{XML_LIBXML_NSCLEAN}; 356} 357 358#-------------------------------------------------------------------------# 359# set the optional SAX(2) handler # 360#-------------------------------------------------------------------------# 361sub set_handler { 362 my $self = shift; 363 if ( defined $_[0] ) { 364 $self->{HANDLER} = $_[0]; 365 366 $self->{SAX_ELSTACK} = []; 367 $self->{SAX} = {State => 0}; 368 } 369 else { 370 # undef SAX handling 371 $self->{SAX_ELSTACK} = []; 372 delete $self->{HANDLER}; 373 delete $self->{SAX}; 374 } 375} 376 377#-------------------------------------------------------------------------# 378# helper functions # 379#-------------------------------------------------------------------------# 380sub _auto_expand { 381 my ( $self, $result, $uri ) = @_; 382 383 $result->setBaseURI( $uri ) if defined $uri; 384 385 if ( defined $self->{XML_LIBXML_EXPAND_XINCLUDE} 386 and $self->{XML_LIBXML_EXPAND_XINCLUDE} == 1 ) { 387 $self->{_State_} = 1; 388 eval { $self->processXIncludes($result); }; 389 my $err = $@; 390 $self->{_State_} = 0; 391 if ($err) { 392 $self->_cleanup_callbacks(); 393 $result = undef; 394 croak $err; 395 } 396 } 397 return $result; 398} 399 400sub _init_callbacks { 401 my $self = shift; 402 my $icb = $self->{XML_LIBXML_CALLBACK_STACK}; 403 404 unless ( defined $icb ) { 405 $self->{XML_LIBXML_CALLBACK_STACK} = XML::LibXML::InputCallback->new(); 406 $icb = $self->{XML_LIBXML_CALLBACK_STACK}; 407 } 408 409 my $mcb = $self->match_callback(); 410 my $ocb = $self->open_callback(); 411 my $rcb = $self->read_callback(); 412 my $ccb = $self->close_callback(); 413 414 if ( defined $mcb and defined $ocb and defined $rcb and defined $ccb ) { 415 $icb->register_callbacks( [$mcb, $ocb, $rcb, $ccb] ); 416 } 417 418 $icb->init_callbacks(); 419} 420 421sub _cleanup_callbacks { 422 my $self = shift; 423 $self->{XML_LIBXML_CALLBACK_STACK}->cleanup_callbacks(); 424 my $mcb = $self->match_callback(); 425 $self->{XML_LIBXML_CALLBACK_STACK}->unregister_callbacks( [$mcb] ); 426} 427 428sub __read { 429 read($_[0], $_[1], $_[2]); 430} 431 432sub __write { 433 if ( ref( $_[0] ) ) { 434 $_[0]->write( $_[1], $_[2] ); 435 } 436 else { 437 $_[0]->write( $_[1] ); 438 } 439} 440 441# currently this is only used in the XInlcude processor 442# but in the future, all parsing functions should turn to 443# the new libxml2 parsing API internally and this will 444# become handy 445sub _parser_options { 446 my ($self,$opts)=@_; 447 $opts = {} unless ref $opts; 448 my $flags = 0; 449 $flags |= 1 if exists $opts->{recover} ? $opts->{recover} : $self->recover; 450 $flags |= 2 if exists $opts->{expand_entities} ? $opts->{expand_entities} : $self->expand_entities; 451 $flags |= 4 if exists $opts->{load_ext_dtd} ? $opts->{load_ext_dtd} : $self->load_ext_dtd; 452 $flags |= 8 if exists $opts->{complete_attributes} ? $opts->{complete_attributes} : $self->complete_attributes; 453 $flags |= 16 if exists $opts->{validation} ? $opts->{validation} : $self->validation; 454 $flags |= 32 if $opts->{suppress_errors}; 455 $flags |= 64 if $opts->{suppress_warnings}; 456 $flags |= 128 if exists $opts->{pedantic_parser} ? $opts->{pedantic_parser} : $self->pedantic_parser; 457 $flags |= 256 if exists $opts->{no_blanks} ? $opts->{no_blanks} : !$self->keep_blanks(); 458 $flags |= 1024 if exists $opts->{expand_xinclude} ? $opts->{expand_xinclude} : $self->expand_xinclude; 459 $flags |= 2048 if exists $opts->{no_network} ? $opts->{no_network} : $self->no_network; 460 $flags |= 8192 if exists $opts->{clean_namespaces} ? $opts->{clean_namespaces} : $self->clean_namespaces; 461 $flags |= 16384 if $opts->{no_cdata}; 462 $flags |= 32768 if $opts->{no_xinclude_nodes}; 463 return ($flags); 464} 465 466 467#-------------------------------------------------------------------------# 468# parsing functions # 469#-------------------------------------------------------------------------# 470# all parsing functions handle normal as SAX parsing at the same time. 471# note that SAX parsing is handled incomplete! use XML::LibXML::SAX for 472# complete parsing sequences 473#-------------------------------------------------------------------------# 474sub parse_string { 475 my $self = shift; 476 croak("parse_string is not a class method! Create a parser object with XML::LibXML->new first!") unless ref $self; 477 croak("parse already in progress") if $self->{_State_}; 478 479 unless ( defined $_[0] and length $_[0] ) { 480 croak("Empty String"); 481 } 482 483 $self->{_State_} = 1; 484 my $result; 485 486 $self->_init_callbacks(); 487 488 if ( defined $self->{SAX} ) { 489 my $string = shift; 490 $self->{SAX_ELSTACK} = []; 491 492 eval { $result = $self->_parse_sax_string($string); }; 493 494 my $err = $@; 495 $self->{_State_} = 0; 496 if ($err) { 497 chomp $err; 498 $self->_cleanup_callbacks(); 499 croak $err; 500 } 501 } 502 else { 503 eval { $result = $self->_parse_string( @_ ); }; 504 505 my $err = $@; 506 $self->{_State_} = 0; 507 if ($err) { 508 chomp $err; 509 $self->_cleanup_callbacks(); 510 croak $err; 511 } 512 513 $result = $self->_auto_expand( $result, $self->{XML_LIBXML_BASE_URI} ); 514 } 515 $self->_cleanup_callbacks(); 516 517 return $result; 518} 519 520sub parse_fh { 521 my $self = shift; 522 croak("parse_fh is not a class method! Create a parser object with XML::LibXML->new first!") unless ref $self; 523 croak("parse already in progress") if $self->{_State_}; 524 $self->{_State_} = 1; 525 my $result; 526 527 $self->_init_callbacks(); 528 529 if ( defined $self->{SAX} ) { 530 $self->{SAX_ELSTACK} = []; 531 eval { $self->_parse_sax_fh( @_ ); }; 532 my $err = $@; 533 $self->{_State_} = 0; 534 if ($err) { 535 chomp $err; 536 $self->_cleanup_callbacks(); 537 croak $err; 538 } 539 } 540 else { 541 eval { $result = $self->_parse_fh( @_ ); }; 542 my $err = $@; 543 $self->{_State_} = 0; 544 if ($err) { 545 chomp $err; 546 $self->_cleanup_callbacks(); 547 croak $err; 548 } 549 550 $result = $self->_auto_expand( $result, $self->{XML_LIBXML_BASE_URI} ); 551 } 552 553 $self->_cleanup_callbacks(); 554 555 return $result; 556} 557 558sub parse_file { 559 my $self = shift; 560 croak("parse_file is not a class method! Create a parser object with XML::LibXML->new first!") unless ref $self; 561 croak("parse already in progress") if $self->{_State_}; 562 $self->{_State_} = 1; 563 my $result; 564 565 $self->_init_callbacks(); 566 567 if ( defined $self->{SAX} ) { 568 $self->{SAX_ELSTACK} = []; 569 eval { $self->_parse_sax_file( @_ ); }; 570 my $err = $@; 571 $self->{_State_} = 0; 572 if ($err) { 573 chomp $err; 574 $self->_cleanup_callbacks(); 575 croak $err; 576 } 577 } 578 else { 579 eval { $result = $self->_parse_file(@_); }; 580 my $err = $@; 581 $self->{_State_} = 0; 582 if ($err) { 583 chomp $err; 584 $self->_cleanup_callbacks(); 585 croak $err; 586 } 587 588 $result = $self->_auto_expand( $result ); 589 } 590 $self->_cleanup_callbacks(); 591 592 return $result; 593} 594 595sub parse_xml_chunk { 596 my $self = shift; 597 # max 2 parameter: 598 # 1: the chunk 599 # 2: the encoding of the string 600 croak("parse_xml_chunk is not a class method! Create a parser object with XML::LibXML->new first!") unless ref $self; 601 croak("parse already in progress") if $self->{_State_}; my $result; 602 603 unless ( defined $_[0] and length $_[0] ) { 604 croak("Empty String"); 605 } 606 607 $self->{_State_} = 1; 608 609 $self->_init_callbacks(); 610 611 if ( defined $self->{SAX} ) { 612 eval { 613 $self->_parse_sax_xml_chunk( @_ ); 614 615 # this is required for XML::GenericChunk. 616 # in normal case is_filter is not defined, an thus the parsing 617 # will be terminated. in case of a SAX filter the parsing is not 618 # finished at that state. therefore we must not reset the parsing 619 unless ( $self->{IS_FILTER} ) { 620 $result = $self->{HANDLER}->end_document(); 621 } 622 }; 623 } 624 else { 625 eval { $result = $self->_parse_xml_chunk( @_ ); }; 626 } 627 628 $self->_cleanup_callbacks(); 629 630 my $err = $@; 631 $self->{_State_} = 0; 632 if ($err) { 633 chomp $err; 634 croak $err; 635 } 636 637 return $result; 638} 639 640sub parse_balanced_chunk { 641 my $self = shift; 642 $self->_init_callbacks(); 643 my $rv; 644 eval { 645 $rv = $self->parse_xml_chunk( @_ ); 646 }; 647 my $err = $@; 648 $self->_cleanup_callbacks(); 649 if ( $err ) { 650 chomp $err; 651 croak $err; 652 } 653 return $rv 654} 655 656# java style 657sub processXIncludes { 658 my $self = shift; 659 my $doc = shift; 660 my $opts = shift; 661 my $options = $self->_parser_options($opts); 662 if ( $self->{_State_} != 1 ) { 663 $self->_init_callbacks(); 664 } 665 my $rv; 666 eval { 667 $rv = $self->_processXIncludes($doc || " ", $options); 668 }; 669 my $err = $@; 670 if ( $self->{_State_} != 1 ) { 671 $self->_cleanup_callbacks(); 672 } 673 674 if ( $err ) { 675 chomp $err; 676 croak $err; 677 } 678 return $rv; 679} 680 681# perl style 682sub process_xincludes { 683 my $self = shift; 684 my $doc = shift; 685 my $opts = shift; 686 my $options = $self->_parser_options($opts); 687 688 my $rv; 689 $self->_init_callbacks(); 690 eval { 691 $rv = $self->_processXIncludes($doc || " ", $options); 692 }; 693 my $err = $@; 694 $self->_cleanup_callbacks(); 695 if ( $err ) { 696 chomp $err; 697 croak $@; 698 } 699 return $rv; 700} 701 702#-------------------------------------------------------------------------# 703# HTML parsing functions # 704#-------------------------------------------------------------------------# 705 706sub _html_options { 707 my ($self,$opts)=@_; 708 $opts = {} unless ref $opts; 709 # return (undef,undef) unless ref $opts; 710 my $flags = 0; 711 $flags |= 1 if exists $opts->{recover} ? $opts->{recover} : $self->recover; 712 $flags |= 32 if $opts->{suppress_errors}; 713 $flags |= 64 if $opts->{suppress_warnings}; 714 $flags |= 128 if exists $opts->{pedantic_parser} ? $opts->{pedantic_parser} : $self->pedantic_parser; 715 $flags |= 256 if exists $opts->{no_blanks} ? $opts->{no_blanks} : !$self->keep_blanks; 716 $flags |= 2048 if exists $opts->{no_network} ? $opts->{no_network} : !$self->no_network; 717 return ($opts->{URI},$opts->{encoding},$flags); 718} 719 720sub parse_html_string { 721 my ($self,$str,$opts) = @_; 722 croak("parse_html_string is not a class method! Create a parser object with XML::LibXML->new first!") unless ref $self; 723 croak("parse already in progress") if $self->{_State_}; 724 725 unless ( defined $str and length $str ) { 726 croak("Empty String"); 727 } 728 $self->{_State_} = 1; 729 my $result; 730 731 $self->_init_callbacks(); 732 eval { 733 $result = $self->_parse_html_string( $str, 734 $self->_html_options($opts) 735 ); 736 }; 737 my $err = $@; 738 $self->{_State_} = 0; 739 if ($err) { 740 chomp $err; 741 $self->_cleanup_callbacks(); 742 croak $err; 743 } 744 745 $self->_cleanup_callbacks(); 746 747 return $result; 748} 749 750sub parse_html_file { 751 my ($self,$file,$opts) = @_; 752 croak("parse_html_file is not a class method! Create a parser object with XML::LibXML->new first!") unless ref $self; 753 croak("parse already in progress") if $self->{_State_}; 754 $self->{_State_} = 1; 755 my $result; 756 757 $self->_init_callbacks(); 758 eval { $result = $self->_parse_html_file($file, 759 $self->_html_options($opts) 760 ); }; 761 my $err = $@; 762 $self->{_State_} = 0; 763 if ($err) { 764 chomp $err; 765 $self->_cleanup_callbacks(); 766 croak $err; 767 } 768 769 $self->_cleanup_callbacks(); 770 771 return $result; 772} 773 774sub parse_html_fh { 775 my ($self,$fh,$opts) = @_; 776 croak("parse_html_fh is not a class method! Create a parser object with XML::LibXML->new first!") unless ref $self; 777 croak("parse already in progress") if $self->{_State_}; 778 $self->{_State_} = 1; 779 780 my $result; 781 $self->_init_callbacks(); 782 eval { $result = $self->_parse_html_fh( $fh, 783 $self->_html_options($opts) 784 ); }; 785 my $err = $@; 786 $self->{_State_} = 0; 787 if ($err) { 788 chomp $err; 789 $self->_cleanup_callbacks(); 790 croak $err; 791 } 792 $self->_cleanup_callbacks(); 793 794 return $result; 795} 796 797#-------------------------------------------------------------------------# 798# push parser interface # 799#-------------------------------------------------------------------------# 800sub init_push { 801 my $self = shift; 802 803 if ( defined $self->{CONTEXT} ) { 804 delete $self->{CONTEXT}; 805 } 806 807 if ( defined $self->{SAX} ) { 808 $self->{CONTEXT} = $self->_start_push(1); 809 } 810 else { 811 $self->{CONTEXT} = $self->_start_push(0); 812 } 813} 814 815sub push { 816 my $self = shift; 817 818 $self->_init_callbacks(); 819 820 if ( not defined $self->{CONTEXT} ) { 821 $self->init_push(); 822 } 823 824 eval { 825 foreach ( @_ ) { 826 $self->_push( $self->{CONTEXT}, $_ ); 827 } 828 }; 829 my $err = $@; 830 $self->_cleanup_callbacks(); 831 if ( $err ) { 832 chomp $err; 833 croak $err; 834 } 835} 836 837# this function should be promoted! 838# the reason is because libxml2 uses xmlParseChunk() for this purpose! 839sub parse_chunk { 840 my $self = shift; 841 my $chunk = shift; 842 my $terminate = shift; 843 844 if ( not defined $self->{CONTEXT} ) { 845 $self->init_push(); 846 } 847 848 if ( defined $chunk and length $chunk ) { 849 $self->_push( $self->{CONTEXT}, $chunk ); 850 } 851 852 if ( $terminate ) { 853 return $self->finish_push(); 854 } 855} 856 857 858sub finish_push { 859 my $self = shift; 860 my $restore = shift || 0; 861 return undef unless defined $self->{CONTEXT}; 862 863 my $retval; 864 865 if ( defined $self->{SAX} ) { 866 eval { 867 $self->_end_sax_push( $self->{CONTEXT} ); 868 $retval = $self->{HANDLER}->end_document( {} ); 869 }; 870 } 871 else { 872 eval { $retval = $self->_end_push( $self->{CONTEXT}, $restore ); }; 873 } 874 delete $self->{CONTEXT}; 875 my $err = $@; 876 if ( $err ) { 877 chomp $err; 878 croak( $err ); 879 } 880 return $retval; 881} 882 8831; 884 885#-------------------------------------------------------------------------# 886# XML::LibXML::Node Interface # 887#-------------------------------------------------------------------------# 888package XML::LibXML::Node; 889 890sub isSupported { 891 my $self = shift; 892 my $feature = shift; 893 return $self->can($feature) ? 1 : 0; 894} 895 896sub getChildNodes { my $self = shift; return $self->childNodes(); } 897 898sub childNodes { 899 my $self = shift; 900 my @children = $self->_childNodes(); 901 return wantarray ? @children : XML::LibXML::NodeList->new_from_ref(\@children , 1); 902} 903 904sub attributes { 905 my $self = shift; 906 my @attr = $self->_attributes(); 907 return wantarray ? @attr : XML::LibXML::NamedNodeMap->new( @attr ); 908} 909 910 911sub findnodes { 912 my ($node, $xpath) = @_; 913 my @nodes = $node->_findnodes($xpath); 914 if (wantarray) { 915 return @nodes; 916 } 917 else { 918 return XML::LibXML::NodeList->new_from_ref(\@nodes, 1); 919 } 920} 921 922sub findvalue { 923 my ($node, $xpath) = @_; 924 my $res; 925 eval { 926 $res = $node->find($xpath); 927 }; 928 if ( $@ ) { 929 die $@; 930 } 931 return $res->to_literal->value; 932} 933 934sub find { 935 my ($node, $xpath) = @_; 936 my ($type, @params) = $node->_find($xpath); 937 if ($type) { 938 return $type->new(@params); 939 } 940 return undef; 941} 942 943sub setOwnerDocument { 944 my ( $self, $doc ) = @_; 945 $doc->adoptNode( $self ); 946} 947 948sub toStringC14N { 949 my ($self, $comments, $xpath) = (shift, shift, shift); 950 return $self->_toStringC14N( $comments || 0, 951 (defined $xpath ? $xpath : undef), 952 0, 953 undef ); 954} 955sub toStringEC14N { 956 my ($self, $comments, $xpath, $inc_prefix_list) = @_; 957 if (defined($inc_prefix_list) and !UNIVERSAL::isa($inc_prefix_list,'ARRAY')) { 958 croak("toStringEC14N: inclusive_prefix_list must be undefined or ARRAY"); 959 } 960 return $self->_toStringC14N( $comments || 0, 961 (defined $xpath ? $xpath : undef), 962 1, 963 (defined $inc_prefix_list ? $inc_prefix_list : undef)); 964} 965 966*serialize_c14n = \&toStringC14N; 967*serialize_exc_c14n = \&toStringEC14N; 968 9691; 970 971#-------------------------------------------------------------------------# 972# XML::LibXML::Document Interface # 973#-------------------------------------------------------------------------# 974package XML::LibXML::Document; 975 976use vars qw(@ISA); 977@ISA = ('XML::LibXML::Node'); 978 979sub actualEncoding { 980 my $doc = shift; 981 my $enc = $doc->encoding; 982 return (defined $enc and length $enc) ? $enc : 'UTF-8'; 983} 984 985sub setDocumentElement { 986 my $doc = shift; 987 my $element = shift; 988 989 my $oldelem = $doc->documentElement; 990 if ( defined $oldelem ) { 991 $doc->removeChild($oldelem); 992 } 993 994 $doc->_setDocumentElement($element); 995} 996 997sub toString { 998 my $self = shift; 999 my $flag = shift; 1000 1001 my $retval = ""; 1002 1003 if ( defined $XML::LibXML::skipXMLDeclaration 1004 and $XML::LibXML::skipXMLDeclaration == 1 ) { 1005 foreach ( $self->childNodes ){ 1006 next if $_->nodeType == XML::LibXML::XML_DTD_NODE() 1007 and $XML::LibXML::skipDTD; 1008 $retval .= $_->toString; 1009 } 1010 } 1011 else { 1012 $flag ||= 0 unless defined $flag; 1013 $retval = $self->_toString($flag); 1014 } 1015 1016 return $retval; 1017} 1018 1019sub serialize { 1020 my $self = shift; 1021 return $self->toString( @_ ); 1022} 1023 1024#-------------------------------------------------------------------------# 1025# bad style xinclude processing # 1026#-------------------------------------------------------------------------# 1027sub process_xinclude { 1028 my $self = shift; 1029 my $opts = shift; 1030 XML::LibXML->new->processXIncludes( $self, $opts ); 1031} 1032 1033sub insertProcessingInstruction { 1034 my $self = shift; 1035 my $target = shift; 1036 my $data = shift; 1037 1038 my $pi = $self->createPI( $target, $data ); 1039 my $root = $self->documentElement; 1040 1041 if ( defined $root ) { 1042 # this is actually not correct, but i guess it's what the user 1043 # intends 1044 $self->insertBefore( $pi, $root ); 1045 } 1046 else { 1047 # if no documentElement was found we just append the PI 1048 $self->appendChild( $pi ); 1049 } 1050} 1051 1052sub insertPI { 1053 my $self = shift; 1054 $self->insertProcessingInstruction( @_ ); 1055} 1056 1057#-------------------------------------------------------------------------# 1058# DOM L3 Document functions. 1059# added after robins implicit feature requst 1060#-------------------------------------------------------------------------# 1061*getElementsByTagName = \&XML::LibXML::Element::getElementsByTagName; 1062*getElementsByTagNameNS = \&XML::LibXML::Element::getElementsByTagNameNS; 1063*getElementsByLocalName = \&XML::LibXML::Element::getElementsByLocalName; 1064 10651; 1066 1067#-------------------------------------------------------------------------# 1068# XML::LibXML::DocumentFragment Interface # 1069#-------------------------------------------------------------------------# 1070package XML::LibXML::DocumentFragment; 1071 1072use vars qw(@ISA); 1073@ISA = ('XML::LibXML::Node'); 1074 1075sub toString { 1076 my $self = shift; 1077 my $retval = ""; 1078 if ( $self->hasChildNodes() ) { 1079 foreach my $n ( $self->childNodes() ) { 1080 $retval .= $n->toString(@_); 1081 } 1082 } 1083 return $retval; 1084} 1085 1086*serialize = \&toString; 1087 10881; 1089 1090#-------------------------------------------------------------------------# 1091# XML::LibXML::Element Interface # 1092#-------------------------------------------------------------------------# 1093package XML::LibXML::Element; 1094 1095use vars qw(@ISA); 1096@ISA = ('XML::LibXML::Node'); 1097use XML::LibXML qw(:ns :libxml); 1098use Carp; 1099 1100sub setNamespace { 1101 my $self = shift; 1102 my $n = $self->nodeName; 1103 if ( $self->_setNamespace(@_) ){ 1104 if ( scalar @_ < 3 || $_[2] == 1 ){ 1105 $self->setNodeName( $n ); 1106 } 1107 return 1; 1108 } 1109 return 0; 1110} 1111 1112sub getAttribute { 1113 my $self = shift; 1114 my $name = $_[0]; 1115 if ( $name =~ /^xmlns(?::|$)/ ) { 1116 # user wants to get a namespace ... 1117 (my $prefix = $name )=~s/^xmlns:?//; 1118 $self->_getNamespaceDeclURI($prefix); 1119 } 1120 else { 1121 $self->_getAttribute(@_); 1122 } 1123} 1124 1125sub setAttribute { 1126 my ( $self, $name, $value ) = @_; 1127 if ( $name =~ /^xmlns(?::|$)/ ) { 1128 # user wants to set the special attribute for declaring XML namespace ... 1129 1130 # this is fine but not exactly DOM conformant behavior, btw (according to DOM we should 1131 # probably declare an attribute which looks like XML namespace declaration 1132 # but isn't) 1133 (my $nsprefix = $name )=~s/^xmlns:?//; 1134 my $nn = $self->nodeName; 1135 if ( $nn =~ /^\Q${nsprefix}\E:/ ) { 1136 # the element has the same prefix 1137 $self->setNamespaceDeclURI($nsprefix,$value) || 1138 $self->setNamespace($value,$nsprefix,1); 1139 ## 1140 ## We set the namespace here. 1141 ## This is helpful, as in: 1142 ## 1143 ## | $e = XML::LibXML::Element->new('foo:bar'); 1144 ## | $e->setAttribute('xmlns:foo','http://yoyodine') 1145 ## 1146 } 1147 else { 1148 # just modify the namespace 1149 $self->setNamespaceDeclURI($nsprefix, $value) || 1150 $self->setNamespace($value,$nsprefix,0); 1151 } 1152 } 1153 else { 1154 $self->_setAttribute($name, $value); 1155 } 1156} 1157 1158sub getAttributeNS { 1159 my $self = shift; 1160 my ($nsURI, $name) = @_; 1161 croak("invalid attribute name") if !defined($name) or $name eq q{}; 1162 if ( defined($nsURI) and $nsURI eq XML_XMLNS_NS ) { 1163 $self->_getNamespaceDeclURI($name eq 'xmlns' ? undef : $name); 1164 } 1165 else { 1166 $self->_getAttributeNS(@_); 1167 } 1168} 1169 1170sub setAttributeNS { 1171 my ($self, $nsURI, $qname, $value)=@_; 1172 unless (defined $qname and length $qname) { 1173 croak("bad name"); 1174 } 1175 if (defined($nsURI) and $nsURI eq XML_XMLNS_NS) { 1176 if ($qname !~ /^xmlns(?::|$)/) { 1177 croak("NAMESPACE ERROR: Namespace declartions must have the prefix 'xmlns'"); 1178 } 1179 $self->setAttribute($qname,$value); # see implementation above 1180 return; 1181 } 1182 if ($qname=~/:/ and not (defined($nsURI) and length($nsURI))) { 1183 croak("NAMESPACE ERROR: Attribute without a prefix cannot be in a namespace"); 1184 } 1185 if ($qname=~/^xmlns(?:$|:)/) { 1186 croak("NAMESPACE ERROR: 'xmlns' prefix and qualified-name are reserved for the namespace ".XML_XMLNS_NS); 1187 } 1188 if ($qname=~/^xml:/ and not (defined $nsURI and $nsURI eq XML_XML_NS)) { 1189 croak("NAMESPACE ERROR: 'xml' prefix is reserved for the namespace ".XML_XML_NS); 1190 } 1191 $self->_setAttributeNS( defined $nsURI ? $nsURI : undef, $qname, $value ); 1192} 1193 1194sub getElementsByTagName { 1195 my ( $node , $name ) = @_; 1196 my $xpath = $name eq '*' ? "descendant::*" : "descendant::*[name()='$name']"; 1197 my @nodes = $node->_findnodes($xpath); 1198 return wantarray ? @nodes : XML::LibXML::NodeList->new_from_ref(\@nodes, 1); 1199} 1200 1201sub getElementsByTagNameNS { 1202 my ( $node, $nsURI, $name ) = @_; 1203 my $xpath; 1204 if ( $name eq '*' ) { 1205 if ( $nsURI eq '*' ) { 1206 $xpath = "descendant::*"; 1207 } else { 1208 $xpath = "descendant::*[namespace-uri()='$nsURI']"; 1209 } 1210 } elsif ( $nsURI eq '*' ) { 1211 $xpath = "descendant::*[local-name()='$name']"; 1212 } else { 1213 $xpath = "descendant::*[local-name()='$name' and namespace-uri()='$nsURI']"; 1214 } 1215 my @nodes = $node->_findnodes($xpath); 1216 return wantarray ? @nodes : XML::LibXML::NodeList->new_from_ref(\@nodes, 1); 1217} 1218 1219sub getElementsByLocalName { 1220 my ( $node,$name ) = @_; 1221 my $xpath; 1222 if ($name eq '*') { 1223 $xpath = "descendant::*"; 1224 } else { 1225 $xpath = "descendant::*[local-name()='$name']"; 1226 } 1227 my @nodes = $node->_findnodes($xpath); 1228 return wantarray ? @nodes : XML::LibXML::NodeList->new_from_ref(\@nodes, 1); 1229} 1230 1231sub getChildrenByTagName { 1232 my ( $node, $name ) = @_; 1233 my @nodes; 1234 if ($name eq '*') { 1235 @nodes = grep { $_->nodeType == XML_ELEMENT_NODE() } 1236 $node->childNodes(); 1237 } else { 1238 @nodes = grep { $_->nodeName eq $name } $node->childNodes(); 1239 } 1240 return wantarray ? @nodes : XML::LibXML::NodeList->new_from_ref(\@nodes, 1); 1241} 1242 1243sub getChildrenByLocalName { 1244 my ( $node, $name ) = @_; 1245 my @nodes; 1246 if ($name eq '*') { 1247 @nodes = grep { $_->nodeType == XML_ELEMENT_NODE() } 1248 $node->childNodes(); 1249 } else { 1250 @nodes = grep { $_->nodeType == XML_ELEMENT_NODE() and 1251 $_->localName eq $name } $node->childNodes(); 1252 } 1253 return wantarray ? @nodes : XML::LibXML::NodeList->new_from_ref(\@nodes, 1); 1254} 1255 1256sub getChildrenByTagNameNS { 1257 my ( $node, $nsURI, $name ) = @_; 1258 my @nodes = $node->_getChildrenByTagNameNS($nsURI,$name); 1259 return wantarray ? @nodes : XML::LibXML::NodeList->new_from_ref(\@nodes, 1); 1260} 1261 1262sub appendWellBalancedChunk { 1263 my ( $self, $chunk ) = @_; 1264 1265 my $local_parser = XML::LibXML->new(); 1266 my $frag = $local_parser->parse_xml_chunk( $chunk ); 1267 1268 $self->appendChild( $frag ); 1269} 1270 12711; 1272 1273#-------------------------------------------------------------------------# 1274# XML::LibXML::Text Interface # 1275#-------------------------------------------------------------------------# 1276package XML::LibXML::Text; 1277 1278use vars qw(@ISA); 1279@ISA = ('XML::LibXML::Node'); 1280 1281sub attributes { return undef; } 1282 1283sub deleteDataString { 1284 my $node = shift; 1285 my $string = shift; 1286 my $all = shift; 1287 my $data = $node->nodeValue(); 1288 $string =~ s/([\\\*\+\^\{\}\&\?\[\]\(\)\$\%\@])/\\$1/g; 1289 if ( $all ) { 1290 $data =~ s/$string//g; 1291 } 1292 else { 1293 $data =~ s/$string//; 1294 } 1295 $node->setData( $data ); 1296} 1297 1298sub replaceDataString { 1299 my ( $node, $left, $right,$all ) = @_; 1300 1301 #ashure we exchange the strings and not expressions! 1302 $left =~ s/([\\\*\+\^\{\}\&\?\[\]\(\)\$\%\@])/\\$1/g; 1303 my $datastr = $node->nodeValue(); 1304 if ( $all ) { 1305 $datastr =~ s/$left/$right/g; 1306 } 1307 else{ 1308 $datastr =~ s/$left/$right/; 1309 } 1310 $node->setData( $datastr ); 1311} 1312 1313sub replaceDataRegEx { 1314 my ( $node, $leftre, $rightre, $flags ) = @_; 1315 return unless defined $leftre; 1316 $rightre ||= ""; 1317 1318 my $datastr = $node->nodeValue(); 1319 my $restr = "s/" . $leftre . "/" . $rightre . "/"; 1320 $restr .= $flags if defined $flags; 1321 1322 eval '$datastr =~ '. $restr; 1323 1324 $node->setData( $datastr ); 1325} 1326 13271; 1328 1329package XML::LibXML::Comment; 1330 1331use vars qw(@ISA); 1332@ISA = ('XML::LibXML::Text'); 1333 13341; 1335 1336package XML::LibXML::CDATASection; 1337 1338use vars qw(@ISA); 1339@ISA = ('XML::LibXML::Text'); 1340 13411; 1342 1343#-------------------------------------------------------------------------# 1344# XML::LibXML::Attribute Interface # 1345#-------------------------------------------------------------------------# 1346package XML::LibXML::Attr; 1347use vars qw( @ISA ) ; 1348@ISA = ('XML::LibXML::Node') ; 1349 1350sub setNamespace { 1351 my ($self,$href,$prefix) = @_; 1352 my $n = $self->nodeName; 1353 if ( $self->_setNamespace($href,$prefix) ) { 1354 $self->setNodeName($n); 1355 return 1; 1356 } 1357 1358 return 0; 1359} 1360 13611; 1362 1363#-------------------------------------------------------------------------# 1364# XML::LibXML::Dtd Interface # 1365#-------------------------------------------------------------------------# 1366# this is still under construction 1367# 1368package XML::LibXML::Dtd; 1369use vars qw( @ISA ); 1370@ISA = ('XML::LibXML::Node'); 1371 13721; 1373 1374#-------------------------------------------------------------------------# 1375# XML::LibXML::PI Interface # 1376#-------------------------------------------------------------------------# 1377package XML::LibXML::PI; 1378use vars qw( @ISA ); 1379@ISA = ('XML::LibXML::Node'); 1380 1381sub setData { 1382 my $pi = shift; 1383 1384 my $string = ""; 1385 if ( scalar @_ == 1 ) { 1386 $string = shift; 1387 } 1388 else { 1389 my %h = @_; 1390 $string = join " ", map {$_.'="'.$h{$_}.'"'} keys %h; 1391 } 1392 1393 # the spec says any char but "?>" [17] 1394 $pi->_setData( $string ) unless $string =~ /\?>/; 1395} 1396 13971; 1398 1399#-------------------------------------------------------------------------# 1400# XML::LibXML::Namespace Interface # 1401#-------------------------------------------------------------------------# 1402package XML::LibXML::Namespace; 1403 1404# this is infact not a node! 1405sub prefix { return "xmlns"; } 1406sub getPrefix { return "xmlns"; } 1407sub getNamespaceURI { return "http://www.w3.org/2000/xmlns/" }; 1408 1409sub getNamespaces { return (); } 1410 1411sub nodeName { 1412 my $self = shift; 1413 my $nsP = $self->localname; 1414 return ( defined($nsP) && length($nsP) ) ? "xmlns:$nsP" : "xmlns"; 1415} 1416sub name { goto &nodeName } 1417sub getName { goto &nodeName } 1418 1419sub isEqualNode { 1420 my ( $self, $ref ) = @_; 1421 if ( ref($ref) eq "XML::LibXML::Namespace" ) { 1422 return $self->_isEqual($ref); 1423 } 1424 return 0; 1425} 1426 1427sub isSameNode { 1428 my ( $self, $ref ) = @_; 1429 if ( $$self == $$ref ){ 1430 return 1; 1431 } 1432 return 0; 1433} 1434 14351; 1436 1437#-------------------------------------------------------------------------# 1438# XML::LibXML::NamedNodeMap Interface # 1439#-------------------------------------------------------------------------# 1440package XML::LibXML::NamedNodeMap; 1441 1442use XML::LibXML::Common qw(:libxml); 1443 1444sub new { 1445 my $class = shift; 1446 my $self = bless { Nodes => [@_] }, $class; 1447 $self->{NodeMap} = { map { $_->nodeName => $_ } @_ }; 1448 return $self; 1449} 1450 1451sub length { return scalar( @{$_[0]->{Nodes}} ); } 1452sub nodes { return $_[0]->{Nodes}; } 1453sub item { $_[0]->{Nodes}->[$_[1]]; } 1454 1455sub getNamedItem { 1456 my $self = shift; 1457 my $name = shift; 1458 1459 return $self->{NodeMap}->{$name}; 1460} 1461 1462sub setNamedItem { 1463 my $self = shift; 1464 my $node = shift; 1465 1466 my $retval; 1467 if ( defined $node ) { 1468 if ( scalar @{$self->{Nodes}} ) { 1469 my $name = $node->nodeName(); 1470 if ( $node->nodeType() == XML_NAMESPACE_DECL ) { 1471 return; 1472 } 1473 if ( defined $self->{NodeMap}->{$name} ) { 1474 if ( $node->isSameNode( $self->{NodeMap}->{$name} ) ) { 1475 return; 1476 } 1477 $retval = $self->{NodeMap}->{$name}->replaceNode( $node ); 1478 } 1479 else { 1480 $self->{Nodes}->[0]->addSibling($node); 1481 } 1482 1483 $self->{NodeMap}->{$name} = $node; 1484 push @{$self->{Nodes}}, $node; 1485 } 1486 else { 1487 # not done yet 1488 # can this be properly be done??? 1489 warn "not done yet\n"; 1490 } 1491 } 1492 return $retval; 1493} 1494 1495sub removeNamedItem { 1496 my $self = shift; 1497 my $name = shift; 1498 my $retval; 1499 if ( $name =~ /^xmlns/ ) { 1500 warn "not done yet\n"; 1501 } 1502 elsif ( exists $self->{NodeMap}->{$name} ) { 1503 $retval = $self->{NodeMap}->{$name}; 1504 $retval->unbindNode; 1505 delete $self->{NodeMap}->{$name}; 1506 $self->{Nodes} = [grep {not($retval->isSameNode($_))} @{$self->{Nodes}}]; 1507 } 1508 1509 return $retval; 1510} 1511 1512sub getNamedItemNS { 1513 my $self = shift; 1514 my $nsURI = shift; 1515 my $name = shift; 1516 return undef; 1517} 1518 1519sub setNamedItemNS { 1520 my $self = shift; 1521 my $nsURI = shift; 1522 my $node = shift; 1523 return undef; 1524} 1525 1526sub removeNamedItemNS { 1527 my $self = shift; 1528 my $nsURI = shift; 1529 my $name = shift; 1530 return undef; 1531} 1532 15331; 1534 1535package XML::LibXML::_SAXParser; 1536 1537# this is pseudo class!!! and it will be removed as soon all functions 1538# moved to XS level 1539 1540use XML::SAX::Exception; 1541 1542# these functions will use SAX exceptions as soon i know how things really work 1543sub warning { 1544 my ( $parser, $message, $line, $col ) = @_; 1545 my $error = XML::SAX::Exception::Parse->new( LineNumber => $line, 1546 ColumnNumber => $col, 1547 Message => $message, ); 1548 $parser->{HANDLER}->warning( $error ); 1549} 1550 1551sub error { 1552 my ( $parser, $message, $line, $col ) = @_; 1553 1554 my $error = XML::SAX::Exception::Parse->new( LineNumber => $line, 1555 ColumnNumber => $col, 1556 Message => $message, ); 1557 $parser->{HANDLER}->error( $error ); 1558} 1559 1560sub fatal_error { 1561 my ( $parser, $message, $line, $col ) = @_; 1562 my $error = XML::SAX::Exception::Parse->new( LineNumber => $line, 1563 ColumnNumber => $col, 1564 Message => $message, ); 1565 $parser->{HANDLER}->fatal_error( $error ); 1566} 1567 15681; 1569 1570package XML::LibXML::RelaxNG; 1571 1572sub new { 1573 my $class = shift; 1574 my %args = @_; 1575 1576 my $self = undef; 1577 if ( defined $args{location} ) { 1578 $self = $class->parse_location( $args{location} ); 1579 } 1580 elsif ( defined $args{string} ) { 1581 $self = $class->parse_buffer( $args{string} ); 1582 } 1583 elsif ( defined $args{DOM} ) { 1584 $self = $class->parse_document( $args{DOM} ); 1585 } 1586 1587 return $self; 1588} 1589 15901; 1591 1592package XML::LibXML::Schema; 1593 1594sub new { 1595 my $class = shift; 1596 my %args = @_; 1597 1598 my $self = undef; 1599 if ( defined $args{location} ) { 1600 $self = $class->parse_location( $args{location} ); 1601 } 1602 elsif ( defined $args{string} ) { 1603 $self = $class->parse_buffer( $args{string} ); 1604 } 1605 1606 return $self; 1607} 1608 16091; 1610 1611#-------------------------------------------------------------------------# 1612# XML::LibXML::InputCallback Interface # 1613#-------------------------------------------------------------------------# 1614package XML::LibXML::InputCallback; 1615 1616use vars qw($_CUR_CB @_GLOBAL_CALLBACKS @_CB_STACK); 1617 1618$_CUR_CB = undef; 1619 1620@_GLOBAL_CALLBACKS = (); 1621@_CB_STACK = (); 1622 1623#-------------------------------------------------------------------------# 1624# global callbacks # 1625#-------------------------------------------------------------------------# 1626sub _callback_match { 1627 my $uri = shift; 1628 my $retval = 0; 1629 1630 # loop through the callbacks and and find the first matching 1631 # The callbacks are stored in execution order (reverse stack order) 1632 # any new global callbacks are shifted to the callback stack. 1633 foreach my $cb ( @_GLOBAL_CALLBACKS ) { 1634 1635 # callbacks have to return 1, 0 or undef, while 0 and undef 1636 # are handled the same way. 1637 # in fact, if callbacks return other values, the global match 1638 # assumes silently that the callback failed. 1639 1640 $retval = $cb->[0]->($uri); 1641 1642 if ( defined $retval and $retval == 1 ) { 1643 # make the other callbacks use this callback 1644 $_CUR_CB = $cb; 1645 unshift @_CB_STACK, $cb; 1646 last; 1647 } 1648 } 1649 1650 return $retval; 1651} 1652 1653sub _callback_open { 1654 my $uri = shift; 1655 my $retval = undef; 1656 1657 # the open callback has to return a defined value. 1658 # if one works on files this can be a file handle. But 1659 # depending on the needs of the callback it also can be a 1660 # database handle or a integer labeling a certain dataset. 1661 1662 if ( defined $_CUR_CB ) { 1663 $retval = $_CUR_CB->[1]->( $uri ); 1664 1665 # reset the callbacks, if one callback cannot open an uri 1666 if ( not defined $retval or $retval == 0 ) { 1667 shift @_CB_STACK; 1668 $_CUR_CB = $_CB_STACK[0]; 1669 } 1670 } 1671 1672 return $retval; 1673} 1674 1675sub _callback_read { 1676 my $fh = shift; 1677 my $buflen = shift; 1678 1679 my $retval = undef; 1680 1681 if ( defined $_CUR_CB ) { 1682 $retval = $_CUR_CB->[2]->( $fh, $buflen ); 1683 } 1684 1685 return $retval; 1686} 1687 1688sub _callback_close { 1689 my $fh = shift; 1690 my $retval = 0; 1691 1692 if ( defined $_CUR_CB ) { 1693 $retval = $_CUR_CB->[3]->( $fh ); 1694 shift @_CB_STACK; 1695 $_CUR_CB = $_CB_STACK[0]; 1696 } 1697 1698 return $retval; 1699} 1700 1701#-------------------------------------------------------------------------# 1702# member functions and methods # 1703#-------------------------------------------------------------------------# 1704 1705sub new { 1706 my $CLASS = shift; 1707 return bless {'_CALLBACKS' => []}, $CLASS; 1708} 1709 1710# add a callback set to the callback stack 1711# synopsis: $icb->register_callbacks( [$match_cb, $open_cb, $read_cb, $close_cb] ); 1712sub register_callbacks { 1713 my $self = shift; 1714 my $cbset = shift; 1715 1716 # test if callback set is complete 1717 if ( ref $cbset eq "ARRAY" and scalar( @$cbset ) == 4 ) { 1718 unshift @{$self->{_CALLBACKS}}, $cbset; 1719 } 1720} 1721 1722# remove a callback set to the callback stack 1723# if a callback set is passed, this function will check for the match function 1724sub unregister_callbacks { 1725 my $self = shift; 1726 my $cbset = shift; 1727 if ( ref $cbset eq "ARRAY" and scalar( @$cbset ) == 4 ) { 1728 $self->{_CALLBACKS} = [grep { $_->[0] != $cbset->[0] } @{$self->{_CALLBACKS}}]; 1729 } 1730 else { 1731 shift @{$self->{_CALLBACKS}}; 1732 } 1733} 1734 1735# make libxml2 use the callbacks 1736sub init_callbacks { 1737 my $self = shift; 1738 1739 $_CUR_CB = undef; 1740 @_CB_STACK = (); 1741 1742 @_GLOBAL_CALLBACKS = @{ $self->{_CALLBACKS} }; 1743 1744 if ( defined $XML::LibXML::match_cb and 1745 defined $XML::LibXML::open_cb and 1746 defined $XML::LibXML::read_cb and 1747 defined $XML::LibXML::close_cb ) { 1748 push @_GLOBAL_CALLBACKS, [$XML::LibXML::match_cb, 1749 $XML::LibXML::open_cb, 1750 $XML::LibXML::read_cb, 1751 $XML::LibXML::close_cb]; 1752 } 1753 1754 $self->lib_init_callbacks(); 1755} 1756 1757# reset libxml2's callbacks 1758sub cleanup_callbacks { 1759 my $self = shift; 1760 1761 $_CUR_CB = undef; 1762 @_GLOBAL_CALLBACKS = (); 1763 @_CB_STACK = (); 1764 1765 $self->lib_cleanup_callbacks(); 1766} 1767 17681; 1769 1770__END__ 1771