use strict; require HTML::Parser; my $decl = ''; my $com1 = ''; my $com2 = ''; my $start = ''; my $end = ''; my $empty = ""; my $proc = ''; my @argspec = qw( self offset length event tagname tag token0 text is_cdata dtext tokens tokenpos attr attrseq ); my @result = (); my $p = HTML::Parser -> new(default_h => [\@result, join(',', @argspec)], strict_comment => 1, xml_mode => 1); my @tests = ( # string, expected results $decl => [[$p, 0, 52, 'declaration', 'ENTITY', '!ENTITY', 'ENTITY', '', undef, undef, ['ENTITY', 'nbsp', 'CDATA', '" "', '-- no-break space --'], [2, 6, 9, 4, 16, 5, 22, 8, 31, 20], undef, undef ]], $com1 => [[$p, 0, 16, 'comment', ' Comment ', '# Comment ', ' Comment ', '', undef, undef, [' Comment '], [4, 9], undef, undef ]], $com2 => [[$p, 0, 30, 'comment', ' Comment ', '# Comment ', ' Comment ', '', undef, undef, [' Comment ', ' Comment '], [4, 9, 18, 9], undef, undef ]], $start => [[$p, 0, 14, 'start', 'a', 'a', 'a', '', undef, undef, ['a', 'href', '"foo"'], [1, 1, 3, 4, 8, 5], {'href', 'foo'}, ['href'] ]], $end => [[$p, 0, 4, 'end', 'a', '/a', 'a', '', undef, undef, ['a'], [2, 1], undef, undef ]], $empty => [[$p, 0, 16, 'start', 'IMG', 'IMG', 'IMG', "", undef, undef, ['IMG', 'SRC', "'foo'"], [1, 3, 5, 3, 9, 5], {'SRC', 'foo'}, ['SRC'] ], [$p, 16, 0, 'end', 'IMG', '/IMG', 'IMG', '', undef, undef, ['IMG'], undef, undef, undef ], ], $proc => [[$p, 0, 36, 'process', ' something completely different ', '? something completely different ', ' something completely different ', '', undef, undef, [' something completely different '], [2, 32], undef, undef ]], "$end\n$end" => [[$p, 0, 4, 'end', 'a', '/a', 'a', '', undef, undef, ['a'], [2, 1], undef, undef], [$p, 4, 1, 'text', undef, undef, undef, "\n", '', "\n", undef, undef, undef, undef], [$p, 5, 4, 'end', 'a', '/a', 'a', '', undef, undef, ['a'], [2, 1], undef, undef ]], ); my $n = @tests / 2; print "1..$n\n"; sub string_tag { my (@pieces) = @_; my $part; foreach $part ( @pieces ) { if (!defined $part) { $part = 'undef'; } elsif (!ref $part) { $part = "'$part'" if $part !~ /^\d+$/; } elsif ('ARRAY' eq ref $part ) { $part = '[' . join(', ', string_tag(@$part)) . ']'; } elsif ('HASH' eq ref $part ) { $part = '{' . join(',', string_tag(%$part)) . '}'; } else { $part = '<' . ref($part) . '>'; } } return join(", ", @pieces ); } my $i = 0; my ($got, $want); while (@tests) { my($html, $expected) = splice @tests, 0, 2; ++$i; print "-" x 50, " $i\n"; print "$html\n"; print "-" x 50, " $i\n"; @result = (); $p->parse($html)->eof; shift(@result) if $result[0][3] eq "start_document"; pop(@result) if $result[-1][3] eq "end_document"; # Compare results for each element expected foreach (@$expected) { $want = string_tag($_); $got = string_tag(shift @result); print " $got\n"; if ($want ne $got) { print "Expected: $want\n"; print( "not " ); last; } } print "ok $i\n"; }