mass-find-nonspam   [plain text]


#!/usr/bin/perl -w
#
# used to clean out spamtraps.

my $killfile_message_ids = qr{(?:
    <\d\d\d\d\S+\@dev\.lifetimetv\.com>
    |<\d+\.\S+\@fmail..\.real-net\.net>
    |<3D53A59C000A4029\@mta3n\.bluewin\.ch>
    |<\S+JavaMail\._accucast\@ecacmail01\.hsn\.com>
    |<LYRIS-\d+-\d+-\S+\@newsletter\.zeit\.de>
    |<LISTMANAGERSQL-\S+\@mailcontrol\.bellevuedata\.com>
    |<LISTMANAGER-\S+\@lister\.masie\.com>
    |<CA-.*\@cert\.org>
    |<SEAEMS\S+\@\S+\.drugstore\.com>
)}x;

my $killfile_receiveds = qr{(?:
    for\s<(?:gfhj|sdhbgsfhg|dth|cnariani|KarenCedar|lorin.stein|fsg.publicity|richard.deitch|davidh)\@fsg\.com>
    |for\s<(?:elizabek|reekie)\@vineyard\.net>
    |for\s<(?:stu)\@westernchief\.com>
    |for\s:include:\S+/MapQuest/
    |for\s<ccastell\@RESQNET\.COM>
    |for\s<ccastell\@resqnet\.com>
    |for\s<jmarotta\@resqnet\.com>
    |for\s<tlai\@spamtraps\.taint\.org>
    |for\s<cmyers_highvision_net\@spamtraps\.taint\.org>
    |for\s<biffhero\@spamtraps\.taint\.org>
    |for\s<fortress\S*\@shangri-la\.dropbear\.id\.au>
    |for\s<jeffrey_thompsonic_com\@spamtraps\.taint\.org>
    |for\s<postmaster_neto.net\@spamtraps\.taint\.org>
    |for\s<spamistehsuk\@spamtraps\.taint\.org>
    |for\s<scott_nwds_com\@spamtraps\.taint\.org>
    |for\s<spamz0rsaretehsuks\@spamtraps\.taint\.org>
    |for\s<vineyard\.gazette\@spamtraps\.taint\.org>
    |<\S+\@listserv\.computerworld\.com>
    |\(envelope-from\s<away_\S+\@newsletters\.away\.com>\)
    |\(envelope-from\s<news\@zonelabs\.rsc02\.com>\)
    |\(envelope-from\s<officedepot\d\@officedepot\.rsc01\.com>\)
    |\(envelope-from\sBounces_\S+\@CWMAILIN\.COMPUTERWORLD\.COM\)
    |\(envelope-from\sYour_Astrologer\@mailer1\.astrology\.com\)
    |\(envelope-from\sbounce-gozilla-newsletters-\S+\@pro\.netatlantic\.com\)
    |\(envelope-from\sbounce-webtorials-\S+\@lists\.netline\.com\)
    |\(envelope-from\sdivx-return-\S+\@lists\.divx\.com\)
    |\(envelope-from\sfirewalls-bounce\@isc\.org\)
    |\(envelope-from\sfoxsportsflash-return\@mailings\.foxsports\.com\)
    |\(envelope-from\snews\@mail\d\.globalscape\.com\)
    |\(envelope-from\sowner-nolist-\S+\@LISTSERV\.BELIEFNET\.COM\)
    |\(envelope-from\swebmaster\@mail2fans\.com\)
    |\(envelope-from\swwf\S+-errors\S+\@bounce\.wwf\.com\)
    |\sby\semail\.ebgames\.com\s
    |by\smail\.scope\.ie
    |by\soutgoing\.securityfocus\.com\s.Postfix.
    |by\ssebastian\.icelandair\.is
    |envelope-from\sowner-announce\@hq\.lp\.org
    |envelope-from\ssecurity-advisories\@freebsd\.org
    |from\s\Qapplenews.lists.apple.com (applenews.lists.apple.com [17.254.0\E
    |from\s\Qmail.fuckedcompany.com ([66.221.38.222])\E
    |from\sFDLN01\.fed\.com\s
    |from\smpm\S+\.mypoints\.com\s\(mpm\S+\.mypoints\.com
    |from\sHDX\S+\.hmdelivery\.com\s\(hdx\S+\.hmdelivery\.com
    |from\smarketing.sonypictures.com\s\(zt01.sonypictures.com
    |from\sLISTSERV.DLI.COM\sby\sLISTSERV.DLI.COM\s
    |from\sLLin\s\(llin\.hq\.cmp\.com
    |from\s\S+\s\S+\sby\s(?:anclsmtp\d\d|myfamlsmtp\d\d)\.myfamily\.com
    |from\sabv-sfo\S+\.CNET\.COM
    |from\sapplenews.lists.apple.com\s\(applenews.lists.apple.com
    |from\sbounce\.winxpnews\.com
    |from\sexim-colo-01\.whoc\.theplanet\.co\.uk\s
    |from\sivillage-\d+\.ivillage\.com\s
    |from\smacromedia\.com
    |from\strafford\.com\sby\slists\.aktiv\.com
)}x;

my $killfile_froms = qr{(?:
    <listsupport\@internet\.com>
    |<journal\@wrox\.com>
    |<burpeeseed\@List\.burpee\.com>
    |<preferredcustomer\@photoworks\.com>
    |<proedigitaldigest\@proe\.com>
    |<subscriptions\@mcafee\.com>
    |Computerworld_WebAppDev\@Computerworld\.com
    |<news\@real-net\.net>
    |<Online.*\@newsletter\.online\.com>
    |<no-reply\@no-more-viruses-please\.com>
    |<Joke-Of-The-Day-return-.*\@mta03\.optamail\.com>
    |<replyem\@jcpenneyeservices\.com>
    |<Cooking\.com\.\S+\@mail\.cooking\.com>
    |<cio\@UPDATE\.CIO\.COM>
    |<DirectTV\S+\@vmadmin\.com>
    |<\S+\@icelandair\.is>
    |<urnews\@unixreview\.email-publisher\.com>
    |<\S+\@lists\.techtarget\.com>
    |<gcn\@eletters\.gcn\.com>
    |<subscriber\.email\@df.\.jobserve\.com>
    |newsletter\@rp-online\.de
    |<newsletters\@cotswoldoutdoor\.com>
)}x;

###########################################################################

use lib "lib";
use lib "../lib";
use lib "../../lib";
use Mail::SpamAssassin::ArchiveIterator;

# flush buffer immediately so if mass-check fails or stops we don't get a corrupt line
$|++;

my $format = "dir";		# for now. TODO

use vars qw( $opt_single 
);
use Getopt::Long;
GetOptions("single");

my $iter = new Mail::SpamAssassin::ArchiveIterator ({
        'opt_j' => 1,
        'opt_n' => 1,
        'opt_all' => 1,
  });

if ($opt_single) {
  my @data = <STDIN>;
  wanted ("stdin", 0, \@data);
  exit 1;

} else {
  my @targets = ();
  foreach (@ARGV) {
    push (@targets, "ham:$format:$_");
  }
  $iter->set_functions (\&wanted, sub { });
  $iter->run (@targets);
}

#foreach my $from (sort {$count{$b} <=> $count{$a}} keys %count) {
#print "$count{$from}   $from $lastsubject{$from}  $lastid{$from}\n";
#}
exit;

###########################################################################

sub get_or_empty {
  my ($ma, $hdr) = @_;
  my $str = join (' ', $ma->get_header ($hdr)); $str ||= ''; return $str;
}

sub wanted {
  my (undef, $id, $time, $dataref) = @_;

  my $ma = Mail::SpamAssassin->parse ($dataref);
  my $from = $ma->get_header ("From");
  unless (defined $from) {
    $ma->finish();
    return;
  }

  if ($from =~ /${killfile_froms}/o
    || get_or_empty($ma,"Message-Id") =~ /${killfile_message_ids}/o
    || get_or_empty($ma,"Received") =~ /${killfile_receiveds}/o)
  {
    if ($opt_single) {
      exit 0;
    } else {
      print "$id\n";
    }
  }

  $ma->finish();

  #chomp $from;
  #$count{$from}++;
  #chomp ($lastsubject{$from} = $ma->get_header("Subject"));
  #chomp ($lastid{$from} = $id);
}