use strict; use warnings; # should be 43. use Test::More tests => 43; use XML::LibXML; use IO::File; # TEST ok(1, ' TODO : Add test name'); my $html = "example/test.html"; my $parser = XML::LibXML->new(); { my $doc = $parser->parse_html_file($html); # TEST ok($doc, ' TODO : Add test name'); } my $fh; open $fh, '<', $html or die "Can't open '$html': $!"; my $string; { local $/; $string = <$fh>; } seek($fh, 0, 0); # TEST ok($string, ' TODO : Add test name'); my $doc = $parser->parse_html_string($string); # TEST ok($doc, ' TODO : Add test name'); undef $doc; $doc = $parser->parse_html_fh($fh); # TEST ok($doc, ' TODO : Add test name'); $fh->close(); # parsing HTML's CGI calling links my $strhref = < foo

test EOHTML my $htmldoc; $parser->recover(1); eval { local $SIG{'__WARN__'} = sub { }; $htmldoc = $parser->parse_html_string( $strhref ); }; # ok( not $@ ); # TEST ok( $htmldoc, ' TODO : Add test name' ); # parse_html_string with encoding # encodings SKIP: { if (! eval { require Encode; }) { skip("Encoding related tests require Encode", 14); } use utf8; my $utf_str = "ěščř"; # w/o 'meta' charset $strhref = <

$utf_str

EOHTML # TEST ok( Encode::is_utf8($strhref), ' TODO : Add test name' ); $htmldoc = $parser->parse_html_string( $strhref ); # TEST ok( $htmldoc && $htmldoc->getDocumentElement, ' TODO : Add test name' ); # TEST is($htmldoc->findvalue('//p/text()'), $utf_str, ' TODO : Add test name'); $htmldoc = $parser->parse_html_string( $strhref, { encoding => 'UTF-8' } ); # TEST ok( $htmldoc && $htmldoc->getDocumentElement, ' TODO : Add test name' ); # TEST is($htmldoc->findvalue('//p/text()'), $utf_str, ' TODO : Add test name'); my $iso_str = Encode::encode('iso-8859-2', $strhref); $htmldoc = $parser->parse_html_string( $iso_str, { encoding => 'iso-8859-2' } ); # TEST ok( $htmldoc && $htmldoc->getDocumentElement, ' TODO : Add test name' ); # TEST is($htmldoc->findvalue('//p/text()'), $utf_str, ' TODO : Add test name'); # w/ 'meta' charset $strhref = <

$utf_str

EOHTML $htmldoc = $parser->parse_html_string( $strhref, { encoding => 'UTF-8' }); # TEST ok( $htmldoc && $htmldoc->getDocumentElement, ' TODO : Add test name' ); # TEST is($htmldoc->findvalue('//p/text()'), $utf_str, ' TODO : Add test name'); $iso_str = Encode::encode('iso-8859-2', $strhref); $htmldoc = $parser->parse_html_string( $iso_str ); # TEST ok( $htmldoc && $htmldoc->getDocumentElement, ' TODO : Add test name' ); # TEST is($htmldoc->findvalue('//p/text()'), $utf_str, ' TODO : Add test name'); $htmldoc = $parser->parse_html_string( $iso_str, { encoding => 'iso-8859-2', URI => 'foo' } ); # TEST ok( $htmldoc && $htmldoc->getDocumentElement, ' TODO : Add test name' ); # TEST is($htmldoc->findvalue('//p/text()'), $utf_str, ' TODO : Add test name'); # TEST is($htmldoc->URI, 'foo', ' TODO : Add test name'); } # parse example/enc_latin2.html # w/ 'meta' charset { use utf8; my $utf_str = "ěščř"; my $test_file = 'example/enc_latin2.html'; my $fh; $htmldoc = $parser->parse_html_file( $test_file ); # TEST ok( $htmldoc && $htmldoc->getDocumentElement, ' TODO : Add test name' ); # TEST is($htmldoc->findvalue('//p/text()'), $utf_str, ' TODO : Add test name'); $htmldoc = $parser->parse_html_file( $test_file, { encoding => 'iso-8859-2', URI => 'foo' }); # TEST ok( $htmldoc && $htmldoc->getDocumentElement, ' TODO : Add test name' ); # TEST is($htmldoc->findvalue('//p/text()'), $utf_str, ' TODO : Add test name'); # TEST is($htmldoc->URI, 'foo', ' TODO : Add test name'); open $fh, '<', $test_file or die "Cannot open '$test_file' for reading - $!"; $htmldoc = $parser->parse_html_fh( $fh ); close $fh; # TEST ok( $htmldoc && $htmldoc->getDocumentElement, ' TODO : Add test name' ); # TEST is($htmldoc->findvalue('//p/text()'), $utf_str, ' TODO : Add test name'); open $fh, '<', $test_file or die "Cannot open '$test_file' for reading - $!"; $htmldoc = $parser->parse_html_fh( $fh, { encoding => 'iso-8859-2', URI => 'foo', }); close $fh; # TEST ok( $htmldoc && $htmldoc->getDocumentElement, ' TODO : Add test name' ); # TEST is($htmldoc->URI, 'foo', ' TODO : Add test name'); # TEST is($htmldoc->findvalue('//p/text()'), $utf_str, ' TODO : Add test name'); SKIP: { my $num_tests = 2; # LibXML_read_perl doesn't play well with encoding layers. Skip # unconditionally for now. skip("skipping until LibXML_read_perl is fixed", $num_tests); if (1000*$] < 5008) { skip("skipping for Perl < 5.8", $num_tests); } elsif (20627 > XML::LibXML::LIBXML_VERSION) { skip("skipping for libxml2 < 2.6.27", $num_tests); } # translate to UTF8 on perl-side open $fh, '<:encoding(iso-8859-2)', $test_file or die "Cannot open '$test_file' for reading - $!"; $htmldoc = $parser->parse_html_fh( $fh, { encoding => 'UTF-8' }); close $fh; # TEST ok( $htmldoc && $htmldoc->getDocumentElement, ' TODO : Add test name' ); # TEST is($htmldoc->findvalue('//p/text()'), $utf_str, ' TODO : Add test name'); } } # parse example/enc2_latin2.html # w/o 'meta' charset { use utf8; my $utf_str = "ěščř"; my $test_file = 'example/enc2_latin2.html'; my $fh; $htmldoc = $parser->parse_html_file( $test_file, { encoding => 'iso-8859-2' }); # TEST ok( $htmldoc && $htmldoc->getDocumentElement, ' TODO : Add test name' ); # TEST is($htmldoc->findvalue('//p/text()'), $utf_str, ' TODO : Add test name'); open $fh, '<', $test_file or die "Cannot open '$test_file' for reading - $!"; $htmldoc = $parser->parse_html_fh( $fh, { encoding => 'iso-8859-2' }); close $fh; # TEST ok( $htmldoc && $htmldoc->getDocumentElement, ' TODO : Add test name' ); # TEST is($htmldoc->findvalue('//p/text()'), $utf_str, ' TODO : Add test name'); SKIP: { my $num_tests = 2; # LibXML_read_perl doesn't play well with encoding layers. Skip # unconditionally for now. skip("skipping until LibXML_read_perl is fixed", $num_tests); if (1000*$] < 5008) { skip("skipping for Perl < 5.8", $num_tests); } # translate to UTF8 on perl-side open my $fh, '<:encoding(iso-8859-2)', $test_file or die "Cannot open '$test_file' for reading - $!"; $htmldoc = $parser->parse_html_fh( $fh, { encoding => 'UTF-8' } ); close $fh; # TEST ok( $htmldoc && $htmldoc->getDocumentElement, ' TODO : Add test name' ); # TEST is($htmldoc->findvalue('//p/text()'), $utf_str, ' TODO : Add test name'); } } { # 44715 my $html = <<'EOF'; Test & Test some more

Meet you at the café?

How about this one?

EOF my $parser = XML::LibXML->new; eval { $doc = $parser->parse_html_string( $html => { recover => 1, suppress_errors => 1 } ); }; # TEST ok (!$@, 'No exception was thrown.'); # TEST ok ($doc, ' Parsing was successful.'); my $root = $doc && $doc->documentElement; my $val = $root && $root->findvalue('//input[@id="foo"]/@value'); # TEST is ($val, 'working', 'XPath'); } { # 70878 # HTML_PARSE_NODEFDTD SKIP: { skip("LibXML version is below 20708", 2) unless ( XML::LibXML::LIBXML_VERSION >= 20708 ); my $html = q(); my $p = XML::LibXML->new; # TEST like( $p->parse_html_string( $html, { recover => 2, no_defdtd => 1, encoding => 'UTF-8' } )->toStringHTML, qr/^\Q\E/, 'do not add a default DOCTYPE' ); # TEST like ( $p->parse_html_string( $html, { recover => 2, encoding => 'UTF-8' } )->toStringHTML, qr/^\Q