# <@LICENSE> # Copyright 2004 Apache Software Foundation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =head1 NAME Mail::SpamAssassin::Conf - SpamAssassin configuration file =head1 SYNOPSIS # a comment rewrite_header Subject *****SPAM***** full PARA_A_2_C_OF_1618 /Paragraph .a.{0,10}2.{0,10}C. of S. 1618/i describe PARA_A_2_C_OF_1618 Claims compliance with senate bill 1618 header FROM_HAS_MIXED_NUMS From =~ /\d+[a-z]+\d+\S*@/i describe FROM_HAS_MIXED_NUMS From: contains numbers mixed in with letters score A_HREF_TO_REMOVE 2.0 lang es describe FROM_FORGED_HOTMAIL Forzado From: simula ser de hotmail.com =head1 DESCRIPTION SpamAssassin is configured using traditional UNIX-style configuration files, loaded from the C and C directories. The C<#> character starts a comment, which continues until end of line. B using the C<#> character in the regular expression rules requires escaping. i.e.: C<\#> Whitespace in the files is not significant, but please note that starting a line with whitespace is deprecated, as we reserve its use for multi-line rule definitions, at some point in the future. Currently, each rule or configuration setting must fit on one-line; multi-line settings are not supported yet. Paths can use C<~> to refer to the user's home directory. Where appropriate below, default values are listed in parentheses. =head1 USER PREFERENCES The following options can be used in both site-wide (C) and user-specific (C) configuration files to customize how SpamAssassin handles incoming email messages. =cut package Mail::SpamAssassin::Conf; use Mail::SpamAssassin::Util; use Mail::SpamAssassin::NetSet; use Mail::SpamAssassin::Constants qw(:sa); use Mail::SpamAssassin::Conf::Parser; use File::Spec; use strict; use bytes; use vars qw{ @ISA $VERSION $DEFAULT_COMMANDS $CONF_TYPE_STRING $CONF_TYPE_BOOL $CONF_TYPE_NUMERIC $CONF_TYPE_HASH_KEY_VALUE $CONF_TYPE_ADDRLIST $CONF_TYPE_TEMPLATE $INVALID_VALUE $MISSING_REQUIRED_VALUE $TYPE_HEAD_TESTS $TYPE_HEAD_EVALS $TYPE_BODY_TESTS $TYPE_BODY_EVALS $TYPE_FULL_TESTS $TYPE_FULL_EVALS $TYPE_RAWBODY_TESTS $TYPE_RAWBODY_EVALS $TYPE_URI_TESTS $TYPE_URI_EVALS $TYPE_META_TESTS $TYPE_RBL_EVALS }; @ISA = qw(); # odd => eval test. Not constants so they can be shared with Parser # TODO: move to Constants.pm? $TYPE_HEAD_TESTS = 0x0008; $TYPE_HEAD_EVALS = 0x0009; $TYPE_BODY_TESTS = 0x000a; $TYPE_BODY_EVALS = 0x000b; $TYPE_FULL_TESTS = 0x000c; $TYPE_FULL_EVALS = 0x000d; $TYPE_RAWBODY_TESTS = 0x000e; $TYPE_RAWBODY_EVALS = 0x000f; $TYPE_URI_TESTS = 0x0010; $TYPE_URI_EVALS = 0x0011; $TYPE_META_TESTS = 0x0012; $TYPE_RBL_EVALS = 0x0013; my @rule_types = ("body_tests", "uri_tests", "uri_evals", "head_tests", "head_evals", "body_evals", "full_tests", "full_evals", "rawbody_tests", "rawbody_evals", "rbl_evals", "meta_tests"); $VERSION = 'bogus'; # avoid CPAN.pm picking up version strings later # these are variables instead of constants so that other classes can # access them; if they're constants, they'd have to go in Constants.pm # TODO: move to Constants.pm? $CONF_TYPE_STRING = 1; $CONF_TYPE_BOOL = 2; $CONF_TYPE_NUMERIC = 3; $CONF_TYPE_HASH_KEY_VALUE = 4; $CONF_TYPE_ADDRLIST = 5; $CONF_TYPE_TEMPLATE = 6; $MISSING_REQUIRED_VALUE = -998; $INVALID_VALUE = -999; # set to "1" by the test suite code, to record regression tests # $Mail::SpamAssassin::Conf::COLLECT_REGRESSION_TESTS = 1; # search for "sub new {" to find the start of the code ########################################################################### sub set_default_commands { return if (defined $DEFAULT_COMMANDS); # see "perldoc Mail::SpamAssassin::Conf::Parser" for details on this fmt. # push each config item like this, to avoid a POD bug; it can't just accept # ( { ... }, { ... }, { ...} ) otherwise POD parsing dies. my @cmds = (); =head2 SCORING OPTIONS =over 4 =item required_score n.nn (default: 5) Set the score required before a mail is considered spam. C can be an integer or a real number. 5.0 is the default setting, and is quite aggressive; it would be suitable for a single-user setup, but if you're an ISP installing SpamAssassin, you should probably set the default to be more conservative, like 8.0 or 10.0. It is not recommended to automatically delete or discard messages marked as spam, as your users B complain, but if you choose to do so, only delete messages with an exceptionally high score such as 15.0 or higher. This option was previously known as C and that name is still accepted, but is deprecated. =cut push (@cmds, { setting => 'required_score', aliases => ['required_hits'], # backwards compat default => 5, type => $CONF_TYPE_NUMERIC }); =item score SYMBOLIC_TEST_NAME n.nn [ n.nn n.nn n.nn ] Assign scores (the number of points for a hit) to a given test. Scores can be positive or negative real numbers or integers. C is the symbolic name used by SpamAssassin for that test; for example, 'FROM_ENDS_IN_NUMS'. If only one valid score is listed, then that score is always used for a test. If four valid scores are listed, then the score that is used depends on how SpamAssassin is being used. The first score is used when both Bayes and network tests are disabled (score set 0). The second score is used when Bayes is disabled, but network tests are enabled (score set 1). The third score is used when Bayes is enabled and network tests are disabled (score set 2). The fourth score is used when Bayes is enabled and network tests are enabled (score set 3). Setting a rule's score to 0 will disable that rule from running. If any of the score values are surrounded by parenthesis '()', then all of the scores in the line are considered to be relative to the already set score. ie: '(3)' means increase the score for this rule by 3 points in all score sets. '(3) (0) (3) (0)' means increase the score for this rule by 3 in score sets 0 and 2 only. If no score is given for a test by the end of the configuration, a default score is assigned: a score of 1.0 is used for all tests, except those who names begin with 'T_' (this is used to indicate a rule in testing) which receive 0.01. Note that test names which begin with '__' are indirect rules used to compose meta-match rules and can also act as prerequisites to other rules. They are not scored or listed in the 'tests hit' reports, but assigning a score of 0 to an indirect rule will disable it from running. =cut push (@cmds, { setting => 'score', is_frequent => 1, code => sub { my ($self, $key, $value, $line) = @_; my($rule, @scores) = split(/\s+/, $value); # Figure out if we're doing relative scores, remove the parens if we are my $relative = 0; foreach (@scores) { if (s/^\((-?\d+(?:\.\d+)?)\)$/$1/) { $relative = 1; } } if ($relative && !exists $self->{scoreset}->[0]->{$rule}) { my $msg = "Relative score without previous setting in SpamAssassin ". "configuration, skipping: $line"; if ($self->{lint_rules}) { warn $msg."\n"; } else { dbg ($msg); } $self->{errors}++; return; } # If we're only passed 1 score, copy it to the other scoresets if (@scores) { if (@scores != 4) { @scores = ( $scores[0], $scores[0], $scores[0], $scores[0] ); } # Set the actual scoreset values appropriately for my $index (0..3) { my $score = $relative ? $self->{scoreset}->[$index]->{$rule} + $scores[$index] : $scores[$index]; $self->{scoreset}->[$index]->{$rule} = $score + 0.0; } } else { my $msg = "Score configuration option without actual scores, skipping: $line"; if ($self->{lint_rules}) { warn $msg."\n"; } else { dbg ($msg); } $self->{errors}++; return; } } }); =back =head2 WHITELIST AND BLACKLIST OPTIONS =over 4 =item whitelist_from add@ress.com Used to specify addresses which send mail that is often tagged (incorrectly) as spam; it also helps if they are addresses of big companies with lots of lawyers. This way, if spammers impersonate them, they'll get into big trouble, so it doesn't provide a shortcut around SpamAssassin. If you want to whitelist your own domain, be aware that spammers will often impersonate the domain of the recipient. The recommended solution is to instead use C as explained below. Whitelist and blacklist addresses are now file-glob-style patterns, so C, C<*@isp.com>, or C<*.domain.net> will all work. Specifically, C<*> and C are allowed, but all other metacharacters are not. Regular expressions are not used for security reasons. Multiple addresses per line, separated by spaces, is OK. Multiple C lines is also OK. The headers checked for whitelist addresses are as follows: if C is set, use that; otherwise check all addresses taken from the following set of headers: Envelope-Sender Resent-Sender X-Envelope-From From In addition, the "envelope sender" data, taken from the SMTP envelope data where this is available, is looked up. e.g. whitelist_from joe@example.com fred@example.com whitelist_from *@example.com =cut push (@cmds, { setting => 'whitelist_from', type => $CONF_TYPE_ADDRLIST }); =item unwhitelist_from add@ress.com Used to override a default whitelist_from entry, so for example a distribution whitelist_from can be overridden in a local.cf file, or an individual user can override a whitelist_from entry in their own C file. The specified email address has to match exactly the address previously used in a whitelist_from line. e.g. unwhitelist_from joe@example.com fred@example.com unwhitelist_from *@example.com =cut push (@cmds, { command => 'unwhitelist_from', setting => 'whitelist_from', code => \&Mail::SpamAssassin::Conf::Parser::remove_addrlist_value }); =item whitelist_from_rcvd addr@lists.sourceforge.net sourceforge.net Use this to supplement the whitelist_from addresses with a check against the Received headers. The first parameter is the address to whitelist, and the second is a string to match the relay's rDNS. This string is matched against the reverse DNS lookup used during the handover from the internet to your internal network's mail exchangers. It can either be the full hostname, or the domain component of that hostname. In other words, if the host that connected to your MX had an IP address that mapped to 'sendinghost.spamassassin.org', you should specify C or just C here. Note that this requires that C be correct. For simple cases, it will be, but for a complex network, or running with DNS checks off or with C<-L>, you may get better results by setting that parameter. e.g. whitelist_from_rcvd joe@example.com example.com whitelist_from_rcvd *@axkit.org sergeant.org =item def_whitelist_from_rcvd addr@lists.sourceforge.net sourceforge.net Same as C, but used for the default whitelist entries in the SpamAssassin distribution. The whitelist score is lower, because these are often targets for spammer spoofing. =cut push (@cmds, { setting => 'whitelist_from_rcvd', code => sub { my ($self, $key, $value, $line) = @_; $self->{parser}->add_to_addrlist_rcvd ('whitelist_from_rcvd', split(/\s+/, $value)); } }); push (@cmds, { setting => 'def_whitelist_from_rcvd', code => sub { my ($self, $key, $value, $line) = @_; $self->{parser}->add_to_addrlist_rcvd ('def_whitelist_from_rcvd', split(/\s+/, $value)); } }); =item whitelist_allows_relays add@ress.com Specify addresses which are in C that sometimes send through a mail relay other than the listed ones. By default mail with a From address that is in C that does not match the relay will trigger a forgery rule. Including the address in C prevents that. Whitelist and blacklist addresses are now file-glob-style patterns, so C, C<*@isp.com>, or C<*.domain.net> will all work. Specifically, C<*> and C are allowed, but all other metacharacters are not. Regular expressions are not used for security reasons. Multiple addresses per line, separated by spaces, is OK. Multiple C lines is also OK. The specified email address does not have to match exactly the address previously used in a whitelist_from_rcvd line as it is compared to the address in the header. e.g. whitelist_allows_relays joe@example.com fred@example.com whitelist_allows_relays *@example.com =cut push (@cmds, { setting => 'whitelist_allows_relays', type => $CONF_TYPE_ADDRLIST }); =item unwhitelist_from_rcvd add@ress.com Used to override a default whitelist_from_rcvd entry, so for example a distribution whitelist_from_rcvd can be overridden in a local.cf file, or an individual user can override a whitelist_from_rcvd entry in their own C file. The specified email address has to match exactly the address previously used in a whitelist_from_rcvd line. e.g. unwhitelist_from_rcvd joe@example.com fred@example.com unwhitelist_from_rcvd *@axkit.org =cut push (@cmds, { setting => 'unwhitelist_from_rcvd', code => sub { my ($self, $key, $value, $line) = @_; $self->{parser}->remove_from_addrlist_rcvd('whitelist_from_rcvd', split (/\s+/, $value)); $self->{parser}->remove_from_addrlist_rcvd('def_whitelist_from_rcvd', split (/\s+/, $value)); } }); =item blacklist_from add@ress.com Used to specify addresses which send mail that is often tagged (incorrectly) as non-spam, but which the user doesn't want. Same format as C. =cut push (@cmds, { setting => 'blacklist_from', type => $CONF_TYPE_ADDRLIST }); =item unblacklist_from add@ress.com Used to override a default blacklist_from entry, so for example a distribution blacklist_from can be overridden in a local.cf file, or an individual user can override a blacklist_from entry in their own C file. e.g. unblacklist_from joe@example.com fred@example.com unblacklist_from *@spammer.com =cut push (@cmds, { command => 'unblacklist_from', setting => 'blacklist_from', code => \&Mail::SpamAssassin::Conf::Parser::remove_addrlist_value }); =item whitelist_to add@ress.com If the given address appears as a recipient in the message headers (Resent-To, To, Cc, obvious envelope recipient, etc.) the mail will be whitelisted. Useful if you're deploying SpamAssassin system-wide, and don't want some users to have their mail filtered. Same format as C. There are three levels of To-whitelisting, C, C and C. Users in the first level may still get some spammish mails blocked, but users in C should never get mail blocked. The headers checked for whitelist addresses are as follows: if C or C are set, use those; otherwise check all addresses taken from the following set of headers: To Cc Apparently-To Delivered-To Envelope-Recipients Apparently-Resent-To X-Envelope-To Envelope-To X-Delivered-To X-Original-To X-Rcpt-To X-Real-To =item more_spam_to add@ress.com See above. =item all_spam_to add@ress.com See above. =cut push (@cmds, { setting => 'whitelist_to', type => $CONF_TYPE_ADDRLIST }); push (@cmds, { setting => 'more_spam_to', type => $CONF_TYPE_ADDRLIST }); push (@cmds, { setting => 'all_spam_to', type => $CONF_TYPE_ADDRLIST }); =item blacklist_to add@ress.com If the given address appears as a recipient in the message headers (Resent-To, To, Cc, obvious envelope recipient, etc.) the mail will be blacklisted. Same format as C. =cut push (@cmds, { setting => 'blacklist_to', type => $CONF_TYPE_ADDRLIST }); =back =head2 BASIC MESSAGE TAGGING OPTIONS =over 4 =item rewrite_header { subject | from | to } STRING By default, suspected spam messages will not have the C, C or C lines tagged to indicate spam. By setting this option, the header will be tagged with C to indicate that a message is spam. For the From or To headers, this will take the form of an RFC 2822 comment following the address in parantheses. For the Subject header, this will be prepended to the original subject. Note that you should only use the _REQD_ and _SCORE_ tags when rewriting the Subject header unless C is 0. Otherwise, you may not be able to remove the SpamAssassin markup via the normal methods. Parentheses are not permitted in STRING if rewriting the From or To headers. (They will be converted to square brackets.) =cut push (@cmds, { setting => 'rewrite_header', code => sub { my ($self, $key, $value, $line) = @_; my($hdr, $string) = split(/\s+/, $value, 2); $hdr = ucfirst(lc($hdr)); # We only deal with From, Subject, and To ... if ($hdr =~ /^(?:From|Subject|To)$/) { if ($hdr ne 'Subject') { $string =~ tr/()/[]/; } $self->{rewrite_header}->{$hdr} = $string; return; } # if we get here, note the issue, then we'll fail through for an error. dbg("rewrite_header: ignoring $hdr, not From, Subject, or To"); } }); =item add_header { spam | ham | all } header_name string Customized headers can be added to the specified type of messages (spam, ham, or "all" to add to either). All headers begin with C (so a C Foo will generate a header called X-Spam-Foo). header_name is restricted to the character set [A-Za-z0-9_-]. C can contain tags as explained below in the B