20_uri_tests.cf   [plain text]


# SpamAssassin rules file: URI tests
#
# Please don't modify this file as your changes will be overwritten with
# the next update. Use @@LOCAL_RULES_DIR@@/local.cf instead.
# See 'perldoc Mail::SpamAssassin::Conf' for details.
#
# <@LICENSE>
# Copyright 2004 Apache Software Foundation
# 
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# 
#     http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# </@LICENSE>
#
###########################################################################

require_version @@VERSION@@

uri NUMERIC_HTTP_ADDR		/^https?\:\/\/\d{7,}/is
describe NUMERIC_HTTP_ADDR	Uses a numeric IP address in URL

uri NORMAL_HTTP_TO_IP		/^https?\:\/\/(?:\S*\@)?\d+\.\d+\.\d+\.\d+/i
describe NORMAL_HTTP_TO_IP	Uses a dotted-decimal IP address in URL
 	
# Theo sez:
# Have gotten FPs off this, and whitespace can't be in the host, so...
# %    Visit my homepage: http://i.like.foo.com %
uri HTTP_ESCAPED_HOST		/^https?\:\/\/[^\/\s]*%[0-9a-fA-F][0-9a-fA-F]/
describe HTTP_ESCAPED_HOST	Uses %-escapes inside a URL's hostname

# note: do not match \r or \n
uri HTTP_CTRL_CHARS_HOST	/^https?\:\/\/[^\/\s]*[\x00-\x08\x0b\x0c\x0e-\x1f]/
describe HTTP_CTRL_CHARS_HOST	Uses control sequences inside a URL hostname

# look for URI with escaped 0-9, A-Z, or a-z characters (all other safe
# characters have been well-tested, but are sometimes unnecessarily escaped
# in nonspam; requiring "http" or "https" also reduces false positives).
uri HTTP_EXCESSIVE_ESCAPES	/^https?:\/\/\S*%(?:3\d|[46][1-9a-f]|[57][\da])/i
describe HTTP_EXCESSIVE_ESCAPES	Completely unnecessary %-escapes inside a URL

# bug 1801
uri IP_LINK_PLUS	/^https?\:\/\/(?:\S*\@)?\d+\.\d+\.\d+\.\d+.{0,20}(?:cgi|click|ads|id\=)/i
describe IP_LINK_PLUS	Dotted-decimal IP address followed by CGI

uri REMOVE_PAGE			/^https?:\/\/[^\/]+\/.*?remove/
describe REMOVE_PAGE		URL of page called "remove"
 	
uri MAILTO_TO_SPAM_ADDR		/^mailto:[a-z]+\d{2,}\@/is
describe MAILTO_TO_SPAM_ADDR	Includes a link to a likely spammer email
 	
uri MAILTO_TO_REMOVE		/^mailto:.*?remove/is
describe MAILTO_TO_REMOVE	Includes a 'remove' email address

# allow ports 80 and 443 which are http and https, respectively
# we don't want to hit http://www.cnn.com:USArticle1840@www.liquidshirts.com/
# though, which actually doesn't have a weird port in it.
uri WEIRD_PORT			m{https?://[^/\s]+?:\d+(?<!:80)(?<!:443)(?<!:8080)(?:/|\s|$)}
describe WEIRD_PORT		Uses non-standard port number for HTTP

# looks for a (maybe empty) username and (optional) password in an url
uri USERPASS			m{https?://[^/\s]*?(?::[^/\s]+?)?\@}
describe USERPASS               URL contains username and (optional) password

uri URI_IS_POUND		m{\#$}
describe URI_IS_POUND		Filename is just a '\#'; probably a JS trick

uri BARGAIN_URL			/bargain([sz]|-\S+)?\.(?:com|biz)/
describe BARGAIN_URL		Includes a link to a likely spammer domain

# this is somewhat loose, but results are good
uri BIZ_TLD  			/\.biz(?:\/|$)/i
describe BIZ_TLD		Contains an URL in the BIZ top-level domain    

uri INFO_TLD			/^(?:https?:\/\/|mailto:)[^\/]+\.info(?:\/|$)/i
describe INFO_TLD		Contains an URL in the INFO top-level domain    

# Matt Cline
# Pretty good for most folks, except for jm: I have a really stupid
# e-commerce bunch obfuscating their URLs with this for some reason. screw 'em
# jm: hesitant to remove this outright; it should be good against phishers
#uri      HTTP_ENTITIES_HOST	m{https?://[^\s\">/]*\&\#[\da-f]+}i
#describe HTTP_ENTITIES_HOST	URI obscured with character entities

uri YAHOO_RD_REDIR		m{^https?\://rd\.yahoo\.com/(?:[0-9]{4,}|partner\b|dir\b)}i
describe YAHOO_RD_REDIR		Has Yahoo Redirect URI

uri YAHOO_DRS_REDIR		m{^https?://drs\.yahoo\.com/}i
describe YAHOO_DRS_REDIR	Has Yahoo Redirect URI

uri URI_OFFERS			m/offer([sz]|-\S+)?\.(?:com|bi?z)/i
describe URI_OFFERS		Message has link to company offers

uri URI_4YOU			m@^(?:https?://|mailto:)[^\/]*4you@i
describe URI_4YOU		Message has URI 4you

# 0 nonspam hits, hundreds of spam hits.  Serious problems there
uri TERRA_ES			/terra\.es\//i
describe TERRA_ES		Contains URI to a document hosted at 'terra.es'

# "www" hidden as "%77%77%77", "ww%77", etc.
# note: *not* anchored to start of string, to catch use of redirectors
uri HTTP_77			/http:\/\/.{0,2}\%77/
describe HTTP_77		Contains an URL-encoded hostname (HTTP77)

# affiliateid, aff_id, aff_sub_id etc.
uri URI_AFFILIATE		/aff\w+id=/i
describe URI_AFFILIATE		Contains a URI with an affiliate ID code

# really a URI rule
header URI_REDIRECTOR		eval:check_for_http_redirector()
describe URI_REDIRECTOR		Message has HTTP redirector URI