compute-current-tcr   [plain text]


#!/usr/bin/perl

my $lambda = 1;
my %scores;
my $numpasses = 10;

my $sumtcr = 0;
my $sumsr = 0;
my $sumsp = 0;

open(SPAM, "<spam.log");
open(HAM, "<ham.log");
open(SCORES, "<../rules/50_scores.cf");

while(<SCORES>)
{
    next unless /^score\s+([^\s]*)\s+([-0-9.]*)/;
    $scores{$1} = $2;
}
close(SCORES);

while(<SPAM>)
{
    next if /^\#/;
    /.\s+[-0-9]*\s+[^\s]+\s+([^\s]*)\s*$/;
    my $testshit = $1; $testshit ||= '';
    my @rules=split /,/,$testshit;

    my $score = 0.0;
    foreach $rule (@rules) {
        next unless (defined ($scores{$rule}));
        $score += $scores{$rule};
    }

    if($score < 5)
    {
      add_point (1, 0, $score);
    } else {
      add_point (1, 1, $score);
    }
}
close(SPAM);

while(<HAM>)
{
    next if /^\#/;
    /.\s+[-0-9]*\s+[^\s]+\s+([^\s]*)\s*$/;
    my $testshit = $1; $testshit ||= '';
    my @rules=split /,/,$testshit;

    my $score = 0.0;
    foreach $rule (@rules) {
        next unless (defined ($scores{$rule}));
        $score += $scores{$rule};
    }

    if($score >= 5) {
      add_point (0, 1, $score);
    } else {
      add_point (0, 0, $score);
    }
}

my $werr = ($lambda * $nlegitspam + $nspamlegit)
                / ($lambda * $nlegit + $nspam);

my $werr_base = $nspam
                / ($lambda * $nlegit + $nspam);

$werr ||= 0.000001;     # avoid / by 0
my $tcr = $werr_base / $werr;

my $sr = ($nspamspam / $nspam) * 100.0;
my $sp = ($nspamspam / ($nspamspam + $nlegitspam)) * 100.0;

$sumtcr += $tcr;
$sumsr += $sr;
$sumsp += $sp;
printf ("TCR: %3.6f   SpamRecall: %3.6f%%   SpamPrecision: %3.6f%%\n",
                $tcr, $sr, $sp);
exit;

sub add_point {
  my ($isspam, $filedasspam, $score) = @_;
  if ($isspam) {
    $nspam++;
    if ($filedasspam) {
      $nspamspam++;
    } else {
      $nspamlegit++;
    }
  } else {
    $nlegit++;
    if ($filedasspam) {
      $nlegitspam++;
    } else {
      $nlegitlegit++;
    }
  }
}