=head1 NAME
robot-rules.t
=head1 DESCRIPTION
Test a number of different A</robots.txt> files against a number
of different User-agents.
=cut
require WWW::RobotRules;
use Carp;
use strict;
print "1..50\n";
my $content1 = <<EOM;
User-agent: *
Disallow: /private
Disallow: http://foo/also_private
User-agent: MOMspider
Disallow:
EOM
my $content2 = <<EOM;
User-agent: MOMspider
Disallow: /private
EOM
my $content3 = <<EOM;
EOM
my $content4 = <<EOM;
User-agent: *
Disallow: /private
Disallow: mailto:foo
User-agent: MOMspider
Disallow: /this
User-agent: Another
Disallow: /that
User-agent: SvartEnke1
Disallow: http://fOO
Disallow: http://bar
User-Agent: SvartEnke2
Disallow: ftp://foo
Disallow: http://foo:8080/
Disallow: http://bar/
EOM
my $content5 = <<EOM;
User-agent: *
Disallow: /
User-agent: Belle
Disallow: /west-wing/ User-agent: Beast
Disallow:
EOM
my $content6 = <<EOM;
User-agent: Beast
Disallow:
User-agent: Belle
Disallow: /west-wing/ User-agent: *
Disallow: /
EOM
my @tests1 = (
[$content1, 'MOMspider' =>
1 => 'http://foo/private' => 1,
2 => 'http://foo/also_private' => 1,
],
[$content1, 'Wubble' =>
3 => 'http://foo/private' => 0,
4 => 'http://foo/also_private' => 0,
5 => 'http://foo/other' => 1,
],
[$content2, 'MOMspider' =>
6 => 'http://foo/private' => 0,
7 => 'http://foo/other' => 1,
],
[$content2, 'Wubble' =>
8 => 'http://foo/private' => 1,
9 => 'http://foo/also_private' => 1,
10 => 'http://foo/other' => 1,
],
[$content3, 'MOMspider' =>
11 => 'http://foo/private' => 1,
12 => 'http://foo/other' => 1,
],
[$content3, 'Wubble' =>
13 => 'http://foo/private' => 1,
14 => 'http://foo/other' => 1,
],
[$content4, 'MOMspider' =>
15 => 'http://foo/private' => 1,
16 => 'http://foo/this' => 0,
17 => 'http://foo/that' => 1,
],
[$content4, 'Another' =>
18 => 'http://foo/private' => 1,
19 => 'http://foo/this' => 1,
20 => 'http://foo/that' => 0,
],
[$content4, 'Wubble' =>
21 => 'http://foo/private' => 0,
22 => 'http://foo/this' => 1,
23 => 'http://foo/that' => 1,
],
[$content4, 'Another/1.0' =>
24 => 'http://foo/private' => 1,
25 => 'http://foo/this' => 1,
26 => 'http://foo/that' => 0,
],
[$content4, "SvartEnke1" =>
27 => "http://foo/" => 0,
28 => "http://foo/this" => 0,
29 => "http://bar/" => 1,
],
[$content4, "SvartEnke2" =>
30 => "http://foo/" => 1,
31 => "http://foo/this" => 1,
32 => "http://bar/" => 1,
],
[$content4, "MomSpiderJr" => 33 => 'http://foo/private' => 1,
34 => 'http://foo/also_private' => 1,
35 => 'http://foo/this/' => 0,
],
[$content4, "SvartEnk" => 36 => "http://foo/" => 1,
37 => "http://foo/private/" => 0,
38 => "http://bar/" => 1,
],
[$content5, 'Villager/1.0' =>
39 => 'http://foo/west-wing/' => 0,
40 => 'http://foo/' => 0,
],
[$content5, 'Belle/2.0' =>
41 => 'http://foo/west-wing/' => 0,
42 => 'http://foo/' => 1,
],
[$content5, 'Beast/3.0' =>
43 => 'http://foo/west-wing/' => 1,
44 => 'http://foo/' => 1,
],
[$content6, 'Villager/1.0' =>
45 => 'http://foo/west-wing/' => 0,
46 => 'http://foo/' => 0,
],
[$content6, 'Belle/2.0' =>
47 => 'http://foo/west-wing/' => 0,
48 => 'http://foo/' => 1,
],
[$content6, 'Beast/3.0' =>
49 => 'http://foo/west-wing/' => 1,
50 => 'http://foo/' => 1,
],
);
my $t;
for $t (@tests1) {
my ($content, $ua) = splice(@$t, 0, 2);
my $robotsrules = new WWW::RobotRules($ua);
$robotsrules->parse('http://foo/robots.txt', $content);
my ($num, $path, $expected);
while(($num, $path, $expected) = splice(@$t, 0, 3)) {
my $allowed = $robotsrules->allowed($path);
$allowed = 1 if $allowed;
if($allowed != $expected) {
$robotsrules->dump;
confess "Test Failed: $ua => $path ($allowed != $expected)";
}
print "ok $num\n";
}
}