sub usage {
die "build/mkrules [--src srcdir] [--exit_on_no_src] [--out outputdir]\n";
}
my $RULE_KEYWORDS_RE = qr{
header|rawbody|body|full|uri
|meta|mimeheader|describe|tflags
|reuse|score|urirhssub|uridnsbl
}x;
use strict;
use File::Find;
use File::Copy;
use File::Basename;
use Getopt::Long;
use lib 'lib';
use Mail::SpamAssassin;
use vars qw(
@opt_srcs $opt_out $opt_sandboxout $opt_manifest
$opt_manifestskip $opt_listpromotable $opt_active
$opt_activeout $default_file_header
$opt_rulemetadata $opt_exit_on_no_src
);
GetOptions("src=s" => \@opt_srcs,
"out=s",
"sandboxout=s",
"activeout=s",
"active=s",
"manifest=s",
"manifestskip=s",
"rulemetadata=s",
"exit_on_no_src",
);
if (!@opt_srcs) {
foreach ( 'rulescode', 'rulesrc' ) {
if (-d $_) {
push(@opt_srcs, $_);
}
}
}
if (!$opt_manifest && -f "MANIFEST") {
$opt_manifest = "MANIFEST";
}
if (!$opt_manifestskip && -f "MANIFEST.SKIP") {
$opt_manifestskip = "MANIFEST.SKIP";
}
if (!$opt_active && -f "rules/active.list") {
$opt_active = "rules/active.list";
}
if ($opt_exit_on_no_src) {
my $foundone = 0;
foreach my $src (@opt_srcs) {
if (-d $src) { $foundone++; last; }
}
if (!$foundone) {
print "no source directory found: exiting\n";
exit 0;
}
}
die "no src" unless (@opt_srcs >= 1);
my $promolist;
die "no out" unless ($opt_out);
die "unreadable out" unless (-d $opt_out);
die "unreadable active" unless (-f $opt_active);
$opt_sandboxout ||= "70_sandbox.cf";
$opt_activeout ||= "72_active.cf";
my $needs_compile = { };
my $found_output = { };
my $current_src;
my $newest_src_mtime = 0;
my $newest_out_mtime = 0;
my $default_file_header = join('', <DATA>);
foreach my $src (@opt_srcs) {
if (!-d $src) {
warn "WARNING: unreadable src '$src'\n";
next;
}
$current_src = $src;
File::Find::find ({
wanted => \&src_wanted,
no_chdir => 1
}, $src);
}
File::Find::find ({
wanted => \&out_wanted,
no_chdir => 1
}, $opt_out);
my $found_all_pm_files = 1;
foreach my $f (keys %{$needs_compile}) {
next unless ($f =~ /\.pm$/i);
if (!exists $found_output->{basename $f}) {
$found_all_pm_files = 0;
}
}
if ($newest_src_mtime && $newest_out_mtime
&& $newest_src_mtime < $newest_out_mtime
&& -f $opt_out.'/'.$opt_sandboxout
&& -f $opt_out.'/'.$opt_activeout
&& $found_all_pm_files)
{
print "mkrules: no rules updated\n";
exit 0;
}
my $rules = { };
my $file_manifest = { };
my $file_manifest_skip = [ ];
if ($opt_manifest) {
read_manifest($opt_manifest);
}
if ($opt_manifestskip) {
read_manifest_skip($opt_manifestskip);
}
my $active_rules = { };
read_active($opt_active);
my $seen_rules = { };
my $renamed_rules = { };
my $output_files = { };
my $output_file_text = { };
my $files_to_lint = { };
my $COMMENTS = '!comments!';
my $ALWAYS_PUBLISH = '!always_publish!';
read_all_rules($needs_compile);
read_rules_from_output_dir();
compile_output_files();
lint_output_files();
write_output_files();
exit;
sub lint_output_files {
foreach my $file (keys %{$files_to_lint}) {
my $text = join("\n", "file start $file", $output_file_text->{$file}, "file end $file");
if (lint_rule_text($text) != 0) {
warn "\nERROR: LINT FAILED, suppressing output: $file\n\n";
$output_file_text->{$file} = '';
}
}
}
sub lint_rule_text {
my ($text) = @_;
my $pretext = q{
loadplugin Mail::SpamAssassin::Plugin::Check
use_bayes 0
};
my $mailsa = Mail::SpamAssassin->new({
rules_filename => "./rules",
local_tests_only => 1,
dont_copy_prefs => 1,
config_text => $pretext.$text
});
my $errors = 0;
$mailsa->{lint_callback} = sub {
my %opts = @_;
return if ($opts{msg} =~ /
(?:score\sset\sfor\snon-existent|description\sexists)
/x);
warn "lint: $opts{msg}";
if ($opts{iserror}) {
$errors++;
}
};
$mailsa->lint_rules();
$mailsa->finish();
return $errors; }
sub src_wanted {
my $path = $File::Find::name;
my @st = stat $path;
if ($st[9] && $st[9] > $newest_src_mtime) {
$newest_src_mtime = $st[9];
}
return if (!-f $path);
return if ($path =~ /sandbox/ && !/(?:\d.*\.cf|\.pm)$/i);
return if ($path =~ /\.svn/);
my $dir = $path;
$dir =~ s/^${current_src}[\/\\\:]//s;
$dir =~ s/([^\/\\\:]+)$//;
my $filename = $1;
my $f = "$current_src/$dir$filename";
my $t;
$t = "$opt_out/$filename";
$needs_compile->{$f} = {
f => $f,
t => $t,
dir => $dir,
filename => $filename
};
}
sub out_wanted {
my $path = $File::Find::name;
return unless (-f $path);
return if ($path =~ /\.svn/);
return unless ($path =~ /\.(?:cf|pm)$/i);
my @st = stat $path;
if ($st[9] && $st[9] > $newest_out_mtime) {
$newest_out_mtime = $st[9];
}
my $dir = $path;
$dir =~ s/^${current_src}[\/\\\:]//s;
$dir =~ s/([^\/\\\:]+)$//;
my $filename = $1;
if ($path =~ /\.pm$/i) {
$found_output->{$filename} = 1;
}
}
sub read_all_rules {
my ($sources) = @_;
foreach my $f (sort {
my ($ae) = $a =~ /\.(cf|pm)$/;
my ($be) = $b =~ /\.(cf|pm)$/;
return $be cmp $ae || $a cmp $b;
} keys %$sources)
{
my $entry = $needs_compile->{$f};
my $t = $entry->{t};
my $needs_rebuild = 1;
if ($entry->{filename} =~ /\.pm$/) {
plugin_file_compile($entry);
}
elsif ($entry->{dir} =~ /sandbox/) {
rule_file_compile($f, $t, $entry->{filename}, 1);
}
elsif ($entry->{dir} =~ /extra/) {
next;
}
else {
if ($needs_rebuild) {
rule_file_compile($f, $t, $entry->{filename}, 0);
}
}
}
}
sub rule_file_compile {
my ($f, $t, $filename, $issandbox) = @_;
open (IN, "<$f") or die "cannot read $f";
my $rule_order = [ ];
my $lastrule = $COMMENTS;
if (!defined $rules->{$ALWAYS_PUBLISH}) {
$rules->{$ALWAYS_PUBLISH} = rule_entry_create();
}
my $current_conditional;
my $current_comments = '';
while (<IN>) {
my $orig = $_;
s/^
s/
next if (/^$/);
my $lang = '';
if (s/^lang\s+(\S+)\s+//) {
$lang = $1;
}
if (/^(${RULE_KEYWORDS_RE})\s+(\S+)\s+(.*)$/)
{
my $type = $1;
my $name = $2;
my $val = $3;
my $origname = $name;
if ($issandbox) {
$name = sandbox_rule_name_avoid_collisions($name, $f);
}
if (!$rules->{$name}) { $rules->{$name} = rule_entry_create(); }
$rules->{$name}->{issandbox} = $issandbox;
$rules->{$name}->{origname} = $origname;
$rules->{$name}->{cond} ||= $current_conditional;
$rules->{$name}->{text} .= $current_comments . $orig;
$rules->{$name}->{srcfile} = $f;
if ($current_conditional && $current_conditional =~ /ifplugin\s+(\S+)/) {
$rules->{$name}->{ifplugin} = $1;
}
if ($type =~ /^
(?:header|rawbody|body|full|uri|meta|mimeheader)
$/x)
{
$rules->{$name}->{found_definition} = 1;
}
elsif ($type eq 'tflags') {
if ($val =~ /\buserconf\b/) {
$rules->{$name}->{forceactive} = 1;
}
$val =~ s/\s+/ /gs;
if ($rules->{$name}->{tflags}) {
$rules->{$name}->{tflags} .= ' '.$val;
} else {
$rules->{$name}->{tflags} = $val;
}
}
$current_comments = '';
$lastrule = $name;
push (@$rule_order, $name);
}
elsif (/^
(pubfile|publish)
\s+(\S+)\s*(.*?)$
/x)
{
my $command = $1;
my $name = $2;
my $val = $3;
my $origname = $name;
if (!$rules->{$name}) { $rules->{$name} = rule_entry_create(); }
$rules->{$name}->{origname} = $origname;
if ($command eq 'publish') {
if (!defined $val || $val eq '') { $val = '1'; }
}
elsif ($command eq 'pubfile') {
if (!filename_in_manifest($opt_out.'/'.$val)) {
warn "$val: WARNING: not listed in manifest file, using default\n";
next; }
}
$rules->{$name}->{$command} = $val;
if ($rules->{$name}->{publish}) {
$rules->{$name}->{forceactive} = 1;
}
}
elsif (/^
(if|ifplugin)
\s+(.*?)$
/x)
{
$current_conditional = $orig;
}
elsif (/^endif\b/x)
{
undef $current_conditional;
}
elsif (/^require_version\s*(\S+)\b/) {
}
elsif (/^loadplugin\s*(\S+)\b/) {
my $name = 'loadplugin_'.$1;
unless ($rules->{$name}) {
$rules->{$name} = rule_entry_create();
$rules->{$name}->{issandbox} = $issandbox;
$rules->{$name}->{iscommand} = 1;
}
if (/^loadplugin\s*\S+\s+(\S+)/) {
my $fname = $1;
my $fpath = dirname($f)."/".$fname;
if (!-f $fpath) {
warn "$f: WARNING: plugin code file '$fpath' not found, line ignored: $orig";
next;
}
if ($fpath =~ /sandbox/i) {
$rules->{$name}->{sandbox_plugin} = 1;
}
if (!filename_in_manifest($opt_out.'/'.$fname)) {
warn "$f: WARNING: '$opt_out/$fname' not listed in manifest file, making 'tryplugin': $orig";
$orig =~ s/^\s*loadplugin\b/tryplugin/;
}
}
$rules->{$name}->{text} .= $orig;
unshift (@$rule_order, $name);
}
else {
my $name = $_;
/^\s*(\S+)/ and $name = $1;
$name =~ s/\s+/ /gs;
my $forceactive = 1;
if (/^test\s*/) {
$forceactive = 0;
$name = $_; $name =~ s/\s+/ /gs;
}
my $cond;
if ($current_conditional) {
$name = $current_conditional; $name =~ s/\s+/ /gs;
$cond = $current_conditional;
}
if ($issandbox) {
$name .= "_sandbox";
}
unless ($rules->{$name}) {
$rules->{$name} = rule_entry_create();
}
$rules->{$name}->{cond} ||= $cond;
$rules->{$name}->{issandbox} = $issandbox;
$rules->{$name}->{forceactive} = $forceactive;
$rules->{$name}->{iscommand} = 1;
$rules->{$name}->{text} .= $orig;
unshift (@$rule_order, $name);
}
}
close IN;
if ($current_comments) {
$rules->{$COMMENTS}->{text} .= $current_comments;
}
copy_to_output_buffers($rule_order, $issandbox, $f, $filename);
foreach my $name (@$rule_order) {
$seen_rules->{$name} = 1;
}
}
sub read_rules_from_output_dir {
return unless ($opt_rulemetadata);
foreach my $file (<$opt_out/*.cf>) {
next unless ($file =~ /\d\d_\S+\.cf$/);
next if (pubfile_is_activeout($file));
next if (pubfile_is_sandboxout($file));
read_output_file($file);
}
}
sub read_output_file {
my ($file) = @_;
open (IN, "<$file") or warn "cannot read $file";
while (<IN>) {
my $orig = $_;
s/^
s/
next if (/^$/);
my $lang = '';
if (s/^lang\s+(\S+)\s+//) {
$lang = $1;
}
if (/^tflags\s+(\S+)\s+(.*)$/) {
my $name = $1;
my $val = $2;
$val =~ s/\s+/ /gs;
if ($rules->{$name}->{tflags}) {
$rules->{$name}->{tflags} .= ' '.$val;
} else {
$rules->{$name}->{tflags} = $val;
}
}
}
close IN;
}
sub copy_to_output_buffers {
my ($rule_order, $issandbox, $f, $filename) = @_;
foreach my $pubfile ($opt_out.'/'.$opt_sandboxout,
$opt_out.'/'.$opt_activeout)
{
$output_files->{$pubfile} = {
header => $default_file_header
};
}
my %already_done = ();
my $copied_active = 0;
my $copied_other = 0;
foreach my $name (@$rule_order)
{
next if exists $already_done{$name};
$already_done{$name} = undef;
my $text = $rules->{$name}->{text};
if (!$text) {
next; }
my $srcfile = $rules->{$name}->{srcfile};
my $pubfile = pubfile_for_rule($rules, $name);
my $is_active = 0;
if (pubfile_is_activeout($pubfile)) {
$is_active++;
}
my $cond = $rules->{$name}->{cond};
my $pluginclass = $rules->{$name}->{ifplugin};
if ($cond)
{
my $ifplugin_text_name = "loadplugin_".($pluginclass || "");
if ($pluginclass && $rules->{$ifplugin_text_name})
{
if ($rules->{$ifplugin_text_name}->{sandbox_plugin}) {
$pubfile = $opt_out.'/'.$opt_sandboxout;
$is_active = 0;
}
$rules->{$ifplugin_text_name}->{output_file} = $pubfile;
}
$rules->{$name}->{output_text} = "\n".$cond.$text."endif\n";
} else {
$rules->{$name}->{output_text} = $text;
}
$rules->{$name}->{output_file} = $pubfile;
$output_files->{$pubfile} = {
header => $default_file_header
};
if ($is_active) {
$copied_active++;
} else {
$copied_other++;
}
}
print "$f: $copied_active active rules, ".
"$copied_other other\n";
}
sub pubfile_for_rule {
my ($rules, $name) = @_;
my $pubfile;
if ($rules->{$name}->{publish}) {
$pubfile = $opt_out.'/'.$opt_activeout;
}
if (!$pubfile) {
if ($active_rules->{$name} || $rules->{$name}->{forceactive} || (!$rules->{$name}->{found_definition} && !$rules->{$name}->{iscommand}))
{
$pubfile = $opt_out.'/'.$opt_activeout;
}
elsif ($rules->{$name}->{issandbox}) {
$pubfile = $opt_out.'/'.$opt_sandboxout;
}
else {
warn "oops? inactive rule, non-sandbox, shouldn't be possible anymore";
$pubfile = $opt_out.'/'.$opt_sandboxout;
}
}
return $pubfile;
}
sub plugin_file_compile {
my ($entry) = @_;
return if $opt_listpromotable;
if (0 && -e $entry->{t}) {
warn "The perl module ".$entry->{t}." already exists, can't copy from ".$entry->{f}."\n";
}
else {
copy($entry->{f}, $entry->{t}) || warn "Couldn't copy ".$entry->{f}.": $!";
}
}
sub compile_output_files {
my $always = $rules->{$ALWAYS_PUBLISH}->{output_text};
foreach my $file (keys %$output_files) {
$output_file_text->{$file} = $output_files->{$file}->{header};
if ($always && pubfile_is_activeout($file)) {
$output_file_text->{$file} .= $always;
}
}
my @rulenames = sort {
if ($a =~ /^loadplugin_/) {
return -1;
}
elsif ($b =~ /^loadplugin_/) {
return 1;
}
return $a cmp $b;
} keys %$rules;
my %seen = ();
foreach my $rule (@rulenames) {
fix_up_rule_dependencies($rule);
}
foreach my $rule (@rulenames) {
my $pubfile = $rules->{$rule}->{output_file};
next unless ($pubfile && pubfile_is_activeout($pubfile));
fix_up_rule_dependencies($rule);
}
my $rulemd = '';
foreach my $rule (@rulenames) {
next if ($rule =~ /^__/);
my $pubfile = $rules->{$rule}->{output_file};
my $text = $rules->{$rule}->{output_text};
next unless defined ($text);
$rulemd .= get_rulemetadata_string($rule);
$output_file_text->{$pubfile} .= "##{ $rule\n".
$text.
"##} ".$rule."\n\n";
}
foreach my $rule (@rulenames) {
next unless ($rule =~ /^__/);
my $pubfile = $rules->{$rule}->{output_file};
my $text = $rules->{$rule}->{output_text};
next unless defined ($text);
$rulemd .= get_rulemetadata_string($rule);
$output_file_text->{$pubfile} .= $text;
}
foreach my $file (keys %$output_files) {
$files_to_lint->{$file} = 1;
}
if ($opt_rulemetadata) {
open (RULEMD, ">".$opt_rulemetadata)
or die "cannot write rulemd to $opt_rulemetadata";
print RULEMD "<rulemds>", $rulemd, "</rulemds>\n";
close RULEMD or die "cannot close rulemd to $opt_rulemetadata";
}
}
sub get_rulemetadata_string {
my ($rule) = @_;
return '' unless ($opt_rulemetadata);
my $mod = 0;
my $srcfile = '';
if ($rules->{$rule}->{found_definition}) {
$srcfile = $rules->{$rule}->{srcfile} || '';
if ($srcfile) {
my @s = stat $srcfile;
if (@s) {
$mod = $s[9];
}
}
}
my $tf = $rules->{$rule}->{tflags} || '';
return "<rulemetadata>".
"<name>$rule</name>".
"<src>$srcfile</src>".
"<srcmtime>$mod</srcmtime>".
"<tf>$tf</tf>".
"</rulemetadata>\n";
}
sub fix_up_rule_dependencies {
my $rule = shift;
my $pubfile = $rules->{$rule}->{output_file};
my $text = $rules->{$rule}->{output_text};
return unless $text;
while ($text =~ /^\s*meta\s+(.*)$/mg) {
my $line = $1;
while ($line =~ /\b([_A-Za-z0-9]+)\b/g) {
my $rule2 = $1;
sed_renamed_rule_names(\$rule2);
next unless ($rules->{$rule2} && $rules->{$rule2}->{output_file});
my $pubfile2 = $rules->{$rule2}->{output_file};
next if (pubfile_is_activeout($pubfile2));
$rules->{$rule2}->{output_file} = $pubfile;
}
}
}
sub pubfile_is_activeout {
return 1 if ($_[0] && $_[0] =~ /\b\Q$opt_activeout\E$/);
return 0;
}
sub pubfile_is_sandboxout {
return 1 if ($_[0] && $_[0] =~ /\b\Q$opt_sandboxout\E$/);
return 0;
}
sub write_output_files {
foreach my $pubfile (sort keys %$output_files) {
if (-f $pubfile) {
unlink $pubfile or die "cannot remove output file '$pubfile'";
}
if (!filename_in_manifest($pubfile)) {
warn "$pubfile: WARNING: not listed in manifest file\n";
}
my $text = $output_file_text->{$pubfile};
if ($text) {
open (OUT, ">".$pubfile) or die "cannot write to output file '$pubfile'";
sed_renamed_rule_names(\$text);
print OUT $text;
close OUT or die "cannot close output file '$pubfile'";
}
else {
print "$pubfile: no rules promoted\n";
open (OUT, ">".$pubfile) or die "cannot write to output file '$pubfile'";
close OUT or die "cannot close output file '$pubfile'";
}
}
}
sub rule_entry_create {
return {
text => '',
publish => 0
};
}
sub sandbox_rule_name_avoid_collisions {
my ($rule, $path) = @_;
my $new;
my $newreason;
my $dowarn = 0;
return $rule if $opt_listpromotable;
return $rule if $active_rules->{$rule};
return $rule if $rules->{$rule}->{forceactive};
if ($rule !~ /^(?:T_|__)/) {
$new = "T_".$rule;
$newreason = "missing T_ prefix";
}
elsif (!exists $seen_rules->{$rule}) {
return $rule;
}
else {
$new = $path;
$new =~ s/[^A-Za-z0-9]+/_/gs;
$new =~ s/_+/_/gs;
$new =~ s/^_//;
$new =~ s/_$//;
$new = $rule.'_'.$new;
$newreason = "collision with existing rule";
$dowarn = 1;
}
if (!$renamed_rules->{$new}) {
$renamed_rules->{$new} = $rule;
if ($dowarn) {
warn "WARNING: $rule: renamed as $new due to $newreason\n";
}
}
return $new;
}
sub sed_renamed_rule_names {
my ($textref) = @_;
foreach my $new (keys %{$renamed_rules}) {
my $rule = $renamed_rules->{$new};
$$textref =~ s/\b${rule}\b/${new}/gs;
}
}
sub read_manifest {
my ($mfest) = @_;
open (IN, "<$mfest") or warn "cannot read $mfest";
while (<IN>) {
next if /^ /^\s*(.*?)\s*$/ and $file_manifest->{$1} = 1;
}
close IN;
}
sub read_manifest_skip {
my ($mfest) = @_;
open (IN, "<$mfest") or warn "cannot read $mfest";
while (<IN>) {
next if /^ /^\s*(.*?)\s*$/ and push (@{$file_manifest_skip}, qr/$1/);
}
close IN;
}
sub read_active {
my ($fname) = @_;
open (IN, "<$fname") or die "cannot read $fname";
while (<IN>) {
s/ /^(\S+)/ and $active_rules->{$1} = 1;
}
close IN;
}
sub filename_in_manifest {
my ($fname) = @_;
return 1 if ($file_manifest->{$fname});
foreach my $skipre (@{$file_manifest_skip}) {
return 1 if ($fname =~ $skipre);
}
return 0;
}
__DATA__
require_version @@VERSION@@