use strict;
use warnings;
package Mail::SpamAssassin::HTML;
use HTML::Parser 3.43 ();
use Mail::SpamAssassin::Logger;
use Mail::SpamAssassin::Constants qw(:sa);
use vars qw($re_loose $re_strict $re_other @ISA @EXPORT @EXPORT_OK);
require Exporter;
@ISA = qw(HTML::Parser Exporter);
@EXPORT = qw(get_results name_to_rgb);
@EXPORT_OK = qw();
my %elements = map {; $_ => 1 }
qw( a abbr acronym address area b base bdo big blockquote body br button caption cite code col colgroup dd del dfn div dl dt em fieldset form h1 h2 h3 h4 h5 h6 head hr html i img input ins kbd label legend li link map meta noscript object ol optgroup option p param pre q samp script select small span strong style sub sup table tbody td textarea tfoot th thead title tr tt ul var ),
qw( applet basefont center dir font frame frameset iframe isindex menu noframes s strike u ),
qw( nobr x-sigsep x-tab ),
;
my %tricks = map {; $_ => 1 }
qw( bgsound embed listing plaintext xmp ),
;
my %elements_text_style = map {; $_ => 1 }
qw( body font table tr th td big small basefont marquee span ),
;
my %elements_whitespace = map {; $_ => 1 }
qw( br div li th td dt dd p hr blockquote pre embed listing plaintext xmp ),
;
my %elements_uri = map {; $_ => 1 }
qw( body table tr td a area link img frame iframe embed script form base bgsound ),
;
my %ok_attributes;
$ok_attributes{basefont}{$_} = 1 for qw( color face size );
$ok_attributes{body}{$_} = 1 for qw( text bgcolor link alink vlink background );
$ok_attributes{font}{$_} = 1 for qw( color face size );
$ok_attributes{marquee}{$_} = 1 for qw( bgcolor background );
$ok_attributes{table}{$_} = 1 for qw( bgcolor );
$ok_attributes{td}{$_} = 1 for qw( bgcolor );
$ok_attributes{th}{$_} = 1 for qw( bgcolor );
$ok_attributes{tr}{$_} = 1 for qw( bgcolor );
$ok_attributes{span}{$_} = 1 for qw( style );
sub new {
my ($class) = @_;
my $self = $class->SUPER::new(
api_version => 3,
handlers => [
start_document => ["html_start", "self"],
start => ["html_tag", "self,tagname,attr,'+1'"],
end_document => ["html_end", "self"],
end => ["html_tag", "self,tagname,attr,'-1'"],
text => ["html_text", "self,dtext"],
comment => ["html_comment", "self,text"],
declaration => ["html_declaration", "self,text"],
],
marked_sections => 1);
$self;
}
sub html_start {
my ($self) = @_;
$self->put_results(html => 1);
$self->{basefont} = 3;
my %default = (tag => "default",
fgcolor => "#000000",
bgcolor => "#ffffff",
size => $self->{basefont});
push @{ $self->{text_style} }, \%default;
}
sub html_end {
my ($self) = @_;
delete $self->{text_style};
my @uri = ();
if (defined $self->{uri}) {
@uri = keys %{$self->{uri}};
}
$self->put_results(uri => \@uri);
$self->put_results(anchor => $self->{anchor});
$self->put_results(uri_detail => $self->{uri});
$self->put_results(uri_truncated => $self->{uri_truncated});
$self->put_results(image_area => $self->{image_area});
$self->put_results(length => $self->{length});
$self->put_results(min_size => $self->{min_size});
$self->put_results(max_size => $self->{max_size});
if (exists $self->{tags}) {
$self->put_results(closed_extra_ratio =>
($self->{closed_extra} / $self->{tags}));
}
$self->put_results(comment => $self->{comment});
$self->put_results(script => $self->{script});
$self->put_results(title => $self->{title});
$self->put_results(inside => $self->{inside});
if (exists $self->{backhair}) {
$self->put_results(backhair_count => scalar keys %{ $self->{backhair} });
}
if (exists $self->{elements} && exists $self->{tags}) {
$self->put_results(bad_tag_ratio =>
($self->{tags} - $self->{elements}) / $self->{tags});
}
if (exists $self->{elements_seen} && exists $self->{tags_seen}) {
$self->put_results(non_element_ratio =>
($self->{tags_seen} - $self->{elements_seen}) /
$self->{tags_seen});
}
if (exists $self->{tags} && exists $self->{obfuscation}) {
$self->put_results(obfuscation_ratio =>
$self->{obfuscation} / $self->{tags});
}
}
sub put_results {
my $self = shift;
my %results = @_;
while (my ($k, $v) = each %results) {
$self->{results}{$k} = $v;
}
}
sub get_results {
my ($self) = @_;
return $self->{results};
}
sub get_rendered_text {
my $self = shift;
my %options = @_;
return join('', @{ $self->{text} }) unless keys %options;
my $mask;
while (my ($k, $v) = each %options) {
next if !defined $self->{"text_$k"};
if (!defined $mask) {
$mask |= $v ? $self->{"text_$k"} : ~ $self->{"text_$k"};
}
else {
$mask &= $v ? $self->{"text_$k"} : ~ $self->{"text_$k"};
}
}
my $text = '';
my $i = 0;
for (@{ $self->{text} }) { $text .= $_ if vec($mask, $i++, 1); }
return $text;
}
sub parse {
my ($self, $text) = @_;
$self->{image_area} = 0;
$self->{title_index} = -1;
$self->{max_size} = 3; $self->{min_size} = 3; $self->{closed_html} = 0;
$self->{closed_body} = 0;
$self->{closed_extra} = 0;
$self->{text} = [];
$self->{length} += $1 if (length($text) =~ m/^(\d+)$/);
$text =~ s/ / /g;
$text =~ s/<(\w+)\s*\/>/<$1>/gi;
{
local $SIG{__WARN__} = sub {
warn @_ unless (defined $_[0] && $_[0] =~ /^Parsing of undecoded UTF-/);
};
$self->SUPER::parse($text);
}
$self->SUPER::eof;
return $self->{text};
}
sub html_tag {
my ($self, $tag, $attr, $num) = @_;
my $maybe_namespace = ($tag =~ m@^(?:o|st\d):[\w-]+/?$@);
if (exists $elements{$tag} || $maybe_namespace) {
$self->{elements}++;
$self->{elements_seen}++ if !exists $self->{inside}{$tag};
}
$self->{tags}++;
$self->{tags_seen}++ if !exists $self->{inside}{$tag};
$self->{inside}{$tag} += $num;
if ($self->{inside}{$tag} < 0) {
$self->{inside}{$tag} = 0;
$self->{closed_extra}++;
}
return if $maybe_namespace;
if (exists $elements{$tag} || exists $tricks{$tag}) {
text_style(@_) if exists $elements_text_style{$tag};
html_whitespace(@_) if exists $elements_whitespace{$tag};
if ($num == 1) {
html_uri(@_) if exists $elements_uri{$tag};
html_tests(@_);
}
else {
$self->{closed_html} = 1 if $tag eq "html";
$self->{closed_body} = 1 if $tag eq "body";
}
}
}
sub html_whitespace {
my ($self, $tag) = @_;
if ($tag eq "br" || $tag eq "div") {
$self->display_text("\n", whitespace => 1);
}
elsif ($tag =~ /^(?:li|t[hd]|d[td]|embed)$/) {
$self->display_text(" ", whitespace => 1);
}
elsif ($tag =~ /^(?:p|hr|blockquote|pre|listing|plaintext|xmp)$/) {
$self->display_text("\n\n", whitespace => 1);
}
}
sub push_uri {
my ($self, $type, $uri) = @_;
$uri = $self->canon_uri($uri);
my $target = target_uri($self->{base_href} || "", $uri);
if (length $uri) {
$self->{uri}->{$uri}->{types}->{$type} = 1;
}
}
sub canon_uri {
my ($self, $uri) = @_;
$uri =~ s/^\s+//;
$uri =~ s/\s+$//;
if (length $uri > MAX_URI_LENGTH) {
$self->{'uri_truncated'} = 1;
$uri = substr $uri, 0, MAX_URI_LENGTH;
}
return $uri;
}
sub html_uri {
my ($self, $tag, $attr) = @_;
if ($tag =~ /^(?:body|table|tr|td)$/) {
if (defined $attr->{background}) {
$self->push_uri($tag, $attr->{background});
}
}
elsif ($tag =~ /^(?:a|area|link)$/) {
if (defined $attr->{href}) {
$self->push_uri($tag, $attr->{href});
}
}
elsif ($tag =~ /^(?:img|frame|iframe|embed|script|bgsound)$/) {
if (defined $attr->{src}) {
$self->push_uri($tag, $attr->{src});
}
}
elsif ($tag eq "form") {
if (defined $attr->{action}) {
$self->push_uri($tag, $attr->{action});
}
}
elsif ($tag eq "base") {
if (my $uri = $attr->{href}) {
$uri = $self->canon_uri($uri);
$self->push_uri($tag, $uri);
if ($uri =~ m@^(?:https?|ftp):/{0,2}@i) {
$uri =~ s@^([a-z]+:/{0,2}[^/]+/.*?)[^/\.]+\.[^/\.]{2,4}$@$1@i;
$uri .= "/" unless $uri =~ m@/$@;
$self->{base_href} = $uri;
}
}
}
}
sub close_table_tag {
my ($self, $tag) = @_;
return unless grep { $_->{tag} eq $tag } @{ $self->{text_style} };
my $top;
while (@{ $self->{text_style} } && ($top = $self->{text_style}[-1]->{tag})) {
if (($tag eq "td" && ($top eq "font" || $top eq "td")) ||
($tag eq "tr" && $top =~ /^(?:font|td|tr)$/))
{
pop @{ $self->{text_style} };
}
else {
last;
}
}
}
sub close_tag {
my ($self, $tag) = @_;
return if !grep { $_->{tag} eq $tag } @{ $self->{text_style} };
while (my %current = %{ pop @{ $self->{text_style} } }) {
last if $current{tag} eq $tag;
}
}
sub text_style {
my ($self, $tag, $attr, $num) = @_;
$tag = "td" if $tag eq "th";
if ($num == 1) {
if ($tag eq "body") {
}
if ($tag eq "basefont" &&
exists $attr->{size} && $attr->{size} =~ /^\s*(\d+)/)
{
$self->{basefont} = $1;
return;
}
$self->close_table_tag($tag) if ($tag eq "td" || $tag eq "tr");
my %new = %{ $self->{text_style}[-1] };
$new{tag} = $tag;
if ($tag eq "big") {
$new{size} += 1;
push @{ $self->{text_style} }, \%new;
return;
}
if ($tag eq "small") {
$new{size} -= 1;
push @{ $self->{text_style} }, \%new;
return;
}
for my $name (keys %$attr) {
next unless exists $ok_attributes{$tag}{$name};
if ($name eq "text" || $name eq "color") {
$new{fgcolor} = name_to_rgb($attr->{$name});
}
elsif ($name eq "size") {
if ($attr->{size} =~ /^\s*([+-]\d+)/) {
$new{size} = $self->{basefont} + $1;
}
elsif ($attr->{size} =~ /^\s*(\d+)/) {
$new{size} = $1;
}
}
elsif ($name eq 'style') {
$new{style} = $attr->{style};
my @parts = split(/;/, $new{style});
foreach (@parts) {
if (/^\s*(background-)?color:\s*(.+)\s*$/i) {
my $whcolor = $1 ? 'bgcolor' : 'fgcolor';
my $value = lc $2;
if ($value =~ /rgb/) {
$value =~ tr/0-9,//cd;
my @rgb = split(/,/, $value);
splice @rgb, 3;
for(my $i=0; $i<3; $i++) {
if (!defined $rgb[$i]) {
$_ = 0;
}
elsif ($rgb[$i] > 255) {
$rgb[$i] = 255;
}
}
$new{$whcolor} = sprintf("#%02x%02x%02x", @rgb);
}
else {
$new{$whcolor} = name_to_rgb($value);
}
}
elsif (/^\s*([a-z_-]+)\s*:\s*(\S.*?)\s*$/i) {
$new{'style_'.$1} = $2;
}
}
}
elsif ($name eq "bgcolor") {
$attr->{bgcolor} = name_to_rgb($attr->{bgcolor});
}
else {
$new{$name} = $attr->{$name};
}
if ($new{size} > $self->{max_size}) {
$self->{max_size} = $new{size};
}
elsif ($new{size} < $self->{min_size}) {
$self->{min_size} = $new{size};
}
}
push @{ $self->{text_style} }, \%new;
}
else {
if ($tag ne "body") {
$self->close_tag($tag);
}
}
}
sub html_font_invisible {
my ($self, $text) = @_;
my $fg = $self->{text_style}[-1]->{fgcolor};
my $bg = $self->{text_style}[-1]->{bgcolor};
my $size = $self->{text_style}[-1]->{size};
my $display = $self->{text_style}[-1]->{style_display};
my $visibility = $self->{text_style}[-1]->{style_visibility};
if (substr($fg,-6) eq substr($bg,-6)) {
return 1;
}
elsif ($fg =~ /^\ my ($r1, $g1, $b1) = (hex($1), hex($2), hex($3));
if ($bg =~ /^\ my ($r2, $g2, $b2) = (hex($1), hex($2), hex($3));
my $r = ($r1 - $r2);
my $g = ($g1 - $g2);
my $b = ($b1 - $b2);
my $distance = ((0.2126*$r)**2 + (0.7152*$g)**2 + (0.0722*$b)**2)**0.5;
if ($distance < 12) {
$self->put_results(font_low_contrast => 1);
return 1;
}
}
}
if ($size <= 1) {
return 1;
}
if ($display && lc $display eq 'none') {
return 1;
}
if ($visibility && lc $visibility eq 'hidden') {
return 1;
}
return 0;
}
sub html_tests {
my ($self, $tag, $attr, $num) = @_;
if ($tag eq "font" && exists $attr->{face}) {
if ($attr->{face} !~ /^[a-z][a-z -]*[a-z](?:,\s*[a-z][a-z -]*[a-z])*$/i) {
$self->put_results(font_face_bad => 1);
}
}
if ($tag eq "img" && exists $self->{inside}{a} && $self->{inside}{a} > 0) {
$self->{uri}->{$self->{anchor_last}}->{anchor_text}->[-1] .= "<img>\n";
$self->{anchor}->[-1] .= "<img>\n";
}
if ($tag eq "img" && exists $attr->{width} && exists $attr->{height}) {
my $width = 0;
my $height = 0;
my $area = 0;
if ($attr->{width} =~ /^(\d+)(\%)?$/) {
$width = $1;
$width *= 8 if (defined $2 && $2 eq "%");
}
if ($attr->{height} =~ /^(\d+)(\%)?$/) {
$height = $1;
$height *= 6 if (defined $2 && $2 eq "%");
}
$width = 200 if $width <= 0;
$height = 200 if $height <= 0;
if ($width > 0 && $height > 0) {
$area = $width * $height;
$self->{image_area} += $area;
}
}
if ($tag eq "form" && exists $attr->{action}) {
$self->put_results(form_action_mailto => 1) if $attr->{action} =~ /mailto:/i
}
if ($tag eq "object" || $tag eq "embed") {
$self->put_results(embeds => 1);
}
if ($tag eq "a") {
$self->{anchor_last} = (exists $attr->{href} ? $self->canon_uri($attr->{href}) : "");
push(@{$self->{uri}->{$self->{anchor_last}}->{anchor_text}}, '');
push(@{$self->{anchor}}, '');
}
if ($tag eq "title") {
$self->{title_index}++;
$self->{title}->[$self->{title_index}] = "";
}
if ($tag eq "meta" &&
exists $attr->{'http-equiv'} &&
exists $attr->{content} &&
$attr->{'http-equiv'} =~ /Content-Type/i &&
$attr->{content} =~ /\bcharset\s*=\s*["']?([^"']+)/i)
{
$self->{charsets} .= exists $self->{charsets} ? " $1" : $1;
}
}
sub display_text {
my $self = shift;
my $text = shift;
my %display = @_;
# Unless it's specified to be invisible, then it's not invisible. ;)
if (!exists $display{invisible}) {
$display{invisible} = 0;
}
if ($display{whitespace}) {
# trim trailing whitespace from previous element if it was not whitespace
if (@{ $self->{text} } &&
(!defined $self->{text_whitespace} ||
!vec($self->{text_whitespace}, $#{$self->{text}}, 1)))
{
$self->{text}->[-1] =~ s/ $//;
}
}
else {
$text =~ s/[ \t\n\r\f\x0b\xa0]+/ /g;
# trim leading whitespace if previous element was whitespace
if (@{ $self->{text} } &&
defined $self->{text_whitespace} &&
vec($self->{text_whitespace}, $#{$self->{text}}, 1))
{
$text =~ s/^ //;
}
}
push @{ $self->{text} }, $text;
while (my ($k, $v) = each %display) {
my $textvar = "text_".$k;
if (!exists $self->{$textvar}) { $self->{$textvar} = ''; }
vec($self->{$textvar}, $#{$self->{text}}, 1) = $v;
}
}
sub html_text {
my ($self, $text) = @_;
# text that is not part of body
if (exists $self->{inside}{script} && $self->{inside}{script} > 0)
{
push @{ $self->{script} }, $text;
return;
}
if (exists $self->{inside}{style} && $self->{inside}{style} > 0) {
return;
}
# text that is part of body and also stored separately
if (exists $self->{inside}{a} && $self->{inside}{a} > 0) {
# this doesn't worry about nested anchors
$self->{uri}->{$self->{anchor_last}}->{anchor_text}->[-1] .= $text;
$self->{anchor}->[-1] .= $text;
}
if (exists $self->{inside}{title} && $self->{inside}{title} > 0) {
$self->{title}->[$self->{title_index}] .= $text;
}
my $invisible_for_bayes = 0;
if ($text =~ /[^ \t\n\r\f\x0b\xa0]/) {
$invisible_for_bayes = $self->html_font_invisible($text);
}
if (exists $self->{text}->[-1]) {
if ($text =~ /^[^\s\x21-\x2f\x3a-\x40\x5b-\x60\x7b-\x7e]/s &&
$self->{text}->[-1] =~ /[^\s\x21-\x2f\x3a-\x40\x5b-\x60\x7b-\x7e]\z/s)
{
$self->{obfuscation}++;
}
if ($self->{text}->[-1] =~
/\b([^\s\x21-\x2f\x3a-\x40\x5b-\x60\x7b-\x7e]{1,7})\z/s)
{
my $start = length($1);
if ($text =~ /^([^\s\x21-\x2f\x3a-\x40\x5b-\x60\x7b-\x7e]{1,7})\b/s) {
$self->{backhair}->{$start . "_" . length($1)}++;
}
}
}
if ($invisible_for_bayes) {
$self->display_text($text, invisible => 1);
}
else {
$self->display_text($text);
}
}
sub html_comment {
my ($self, $text) = @_;
push @{ $self->{comment} }, $text;
}
sub html_declaration {
my ($self, $text) = @_;
if ($text =~ /^<!doctype/i) {
my $tag = "!doctype";
$self->{elements}++;
$self->{tags}++;
$self->{inside}{$tag} = 0;
}
}
my %html_color = (
aqua => 0x00ffff,
black => 0x000000,
blue => 0x0000ff,
fuchsia => 0xff00ff,
gray => 0x808080,
green => 0x008000,
lime => 0x00ff00,
maroon => 0x800000,
navy => 0x000080,
olive => 0x808000,
purple => 0x800080,
red => 0xff0000,
silver => 0xc0c0c0,
teal => 0x008080,
white => 0xffffff,
yellow => 0xffff00,
aliceblue => 0xf0f8ff,
antiquewhite => 0xfaebd7,
aqua => 0x00ffff,
aquamarine => 0x7fffd4,
azure => 0xf0ffff,
beige => 0xf5f5dc,
bisque => 0xffe4c4,
black => 0x000000,
blanchedalmond => 0xffebcd,
blue => 0x0000ff,
blueviolet => 0x8a2be2,
brown => 0xa52a2a,
burlywood => 0xdeb887,
cadetblue => 0x5f9ea0,
chartreuse => 0x7fff00,
chocolate => 0xd2691e,
coral => 0xff7f50,
cornflowerblue => 0x6495ed,
cornsilk => 0xfff8dc,
crimson => 0xdc143c,
cyan => 0x00ffff,
darkblue => 0x00008b,
darkcyan => 0x008b8b,
darkgoldenrod => 0xb8860b,
darkgray => 0xa9a9a9,
darkgreen => 0x006400,
darkgrey => 0xa9a9a9,
darkkhaki => 0xbdb76b,
darkmagenta => 0x8b008b,
darkolivegreen => 0x556b2f,
darkorange => 0xff8c00,
darkorchid => 0x9932cc,
darkred => 0x8b0000,
darksalmon => 0xe9967a,
darkseagreen => 0x8fbc8f,
darkslateblue => 0x483d8b,
darkslategray => 0x2f4f4f,
darkslategrey => 0x2f4f4f,
darkturquoise => 0x00ced1,
darkviolet => 0x9400d3,
deeppink => 0xff1493,
deepskyblue => 0x00bfff,
dimgray => 0x696969,
dimgrey => 0x696969,
dodgerblue => 0x1e90ff,
firebrick => 0xb22222,
floralwhite => 0xfffaf0,
forestgreen => 0x228b22,
fuchsia => 0xff00ff,
gainsboro => 0xdcdcdc,
ghostwhite => 0xf8f8ff,
gold => 0xffd700,
goldenrod => 0xdaa520,
gray => 0x808080,
green => 0x008000,
greenyellow => 0xadff2f,
grey => 0x808080,
honeydew => 0xf0fff0,
hotpink => 0xff69b4,
indianred => 0xcd5c5c,
indigo => 0x4b0082,
ivory => 0xfffff0,
khaki => 0xf0e68c,
lavender => 0xe6e6fa,
lavenderblush => 0xfff0f5,
lawngreen => 0x7cfc00,
lemonchiffon => 0xfffacd,
lightblue => 0xadd8e6,
lightcoral => 0xf08080,
lightcyan => 0xe0ffff,
lightgoldenrodyellow => 0xfafad2,
lightgray => 0xd3d3d3,
lightgreen => 0x90ee90,
lightgrey => 0xd3d3d3,
lightpink => 0xffb6c1,
lightsalmon => 0xffa07a,
lightseagreen => 0x20b2aa,
lightskyblue => 0x87cefa,
lightslategray => 0x778899,
lightslategrey => 0x778899,
lightsteelblue => 0xb0c4de,
lightyellow => 0xffffe0,
lime => 0x00ff00,
limegreen => 0x32cd32,
linen => 0xfaf0e6,
magenta => 0xff00ff,
maroon => 0x800000,
mediumaquamarine => 0x66cdaa,
mediumblue => 0x0000cd,
mediumorchid => 0xba55d3,
mediumpurple => 0x9370db,
mediumseagreen => 0x3cb371,
mediumslateblue => 0x7b68ee,
mediumspringgreen => 0x00fa9a,
mediumturquoise => 0x48d1cc,
mediumvioletred => 0xc71585,
midnightblue => 0x191970,
mintcream => 0xf5fffa,
mistyrose => 0xffe4e1,
moccasin => 0xffe4b5,
navajowhite => 0xffdead,
navy => 0x000080,
oldlace => 0xfdf5e6,
olive => 0x808000,
olivedrab => 0x6b8e23,
orange => 0xffa500,
orangered => 0xff4500,
orchid => 0xda70d6,
palegoldenrod => 0xeee8aa,
palegreen => 0x98fb98,
paleturquoise => 0xafeeee,
palevioletred => 0xdb7093,
papayawhip => 0xffefd5,
peachpuff => 0xffdab9,
peru => 0xcd853f,
pink => 0xffc0cb,
plum => 0xdda0dd,
powderblue => 0xb0e0e6,
purple => 0x800080,
red => 0xff0000,
rosybrown => 0xbc8f8f,
royalblue => 0x4169e1,
saddlebrown => 0x8b4513,
salmon => 0xfa8072,
sandybrown => 0xf4a460,
seagreen => 0x2e8b57,
seashell => 0xfff5ee,
sienna => 0xa0522d,
silver => 0xc0c0c0,
skyblue => 0x87ceeb,
slateblue => 0x6a5acd,
slategray => 0x708090,
slategrey => 0x708090,
snow => 0xfffafa,
springgreen => 0x00ff7f,
steelblue => 0x4682b4,
tan => 0xd2b48c,
teal => 0x008080,
thistle => 0xd8bfd8,
tomato => 0xff6347,
turquoise => 0x40e0d0,
violet => 0xee82ee,
wheat => 0xf5deb3,
white => 0xffffff,
whitesmoke => 0xf5f5f5,
yellow => 0xffff00,
yellowgreen => 0x9acd32,
);
sub name_to_rgb {
my $color = lc $_[0];
my $hex = $html_color{$color};
if (defined $hex) {
return sprintf("#%06x", $hex);
}
$color =~ s/^ $color .= "0" x (3 - (length($color) % 3)) if (length($color) % 3);
my $length = length($color) / 3;
my @colors = ($color =~ /(.{$length})(.{$length})(.{$length})/);
@colors = map { s/.*(.{8})$/$1/; s/(..).*/$1/; s/^(.)$/0$1/; $_; } @colors;
$color = join("", @colors);
$color =~ tr/0-9a-f/0/c;
return "#" . $color;
}
use constant URI_STRICT => 0;
sub _parse_uri {
my ($u) = @_;
my %u;
($u{scheme}, $u{authority}, $u{path}, $u{query}, $u{fragment}) =
$u =~ m|^(?:([^:/? return %u;
}
sub _remove_dot_segments {
my ($input) = @_;
my $output = "";
$input =~ s@^(?:\.\.?/)@/@;
while ($input) {
if ($input =~ s@^/\.(?:$|/)@/@) {
}
elsif ($input =~ s@^/\.\.(?:$|/)@/@) {
$output =~ s@/?[^/]*$@@;
}
elsif ($input =~ s@(/?[^/]*)@@) {
$output .= $1;
}
}
return $output;
}
sub _merge_uri {
my ($base_authority, $base_path, $r_path) = @_;
if (defined $base_authority && !$base_path) {
return "/" . $r_path;
}
else {
if ($base_path =~ m|/|) {
$base_path =~ s|(?<=/)[^/]*$||;
}
else {
$base_path = "";
}
return $base_path . $r_path;
}
}
sub target_uri {
my ($base, $r) = @_;
my %r = _parse_uri($r); my %base = _parse_uri($base); my %t;
if ((not URI_STRICT) and
(defined $r{scheme} && defined $base{scheme}) and
($r{scheme} eq $base{scheme}))
{
undef $r{scheme};
}
if (defined $r{scheme}) {
$t{scheme} = $r{scheme};
$t{authority} = $r{authority};
$t{path} = _remove_dot_segments($r{path});
$t{query} = $r{query};
}
else {
if (defined $r{authority}) {
$t{authority} = $r{authority};
$t{path} = _remove_dot_segments($r{path});
$t{query} = $r{query};
}
else {
if ($r{path} eq "") {
$t{path} = $base{path};
if (defined $r{query}) {
$t{query} = $r{query};
}
else {
$t{query} = $base{query};
}
}
else {
if ($r{path} =~ m|^/|) {
$t{path} = _remove_dot_segments($r{path});
}
else {
$t{path} = _merge_uri($base{authority}, $base{path}, $r{path});
$t{path} = _remove_dot_segments($t{path});
}
$t{query} = $r{query};
}
$t{authority} = $base{authority};
}
$t{scheme} = $base{scheme};
}
$t{fragment} = $r{fragment};
my $result = "";
if ($t{scheme}) {
$result .= $t{scheme} . ":";
}
elsif (defined $t{authority}) {
if ($t{authority} =~ /^www\d*\./i) {
$result .= "http:";
}
elsif ($t{authority} =~ /^ftp\d*\./i) {
$result .= "ftp:";
}
}
if ($t{authority}) {
$result .= "//" . $t{authority};
}
$result .= $t{path};
if (defined $t{query}) {
$result .= "?" . $t{query};
}
if (defined $t{fragment}) {
$result .= "#" . $t{fragment};
}
return $result;
}
1;
__END__