use vars qw($opt_a $opt_h $opt_t);
use Getopt::Std;
getopts("aht");
my $prog = $0;
$prog =~ s@.*/@@;
sub usage {
my $status = shift;
my $out = $status ? STDERR : STDOUT;
print $out <<EOF;
usage: $prog [options] [mass-check results files]
-a show all entries (normally, reverses of pairs are not shown)
-h print this help
-t ignore T_ tests
Do not abuse this tool. Just because a test highly correlates with
another test does not mean you can simply remove one or merge them
without further consideration. You need to also look at hit rates,
false positives, false negatives, and actually compare the tests.
Some overlap is often good, especially if the tests have different
characteristics.
EOF
exit($status);
}
usage(0) if $opt_h;
if ($ push(@ARGV, "-");
}
my %solo;
my %pair;
foreach $file (@ARGV) {
read_file($file);
}
print "COUNT\tPAIR/A\tPAIR/B\tA,B\n";
foreach $k (sort { $pair{$b} <=> $pair{$a} } keys %pair) {
my ($a, $b) = split(/ /, $k);
my $a_pct = $pair{$k} / $solo{$a};
my $b_pct = $pair{$k} / $solo{$b};
if ($opt_a) {
printf "%d\t%.3f\t%.3f\t%s,%s\n", $pair{$k},$a_pct,$b_pct,$a,$b;
printf "%d\t%.3f\t%.3f\t%s,%s\n", $pair{$k},$b_pct,$a_pct,$b,$a;
}
else {
if (($a_pct > $b_pct) || ($a_pct == $b_pct && $a lt $b)) {
printf "%d\t%.3f\t%.3f\t%s,%s\n", $pair{$k},$a_pct,$b_pct,$a,$b;
}
else {
printf "%d\t%.3f\t%.3f\t%s,%s\n", $pair{$k},$b_pct,$a_pct,$b,$a;
}
}
}
sub read_file {
my ($input) = @_;
open(FILE, $input) || die "open failed: $input";
my $line = 0;
while(<FILE>) {
next if /^ if (/^[Y.]\s+-?\d+\s+\S+\s+(\S+)/) {
my @tests = split(/,/, $1);
@tests = grep { !/^T_/ } @tests if $opt_t;
my $i = 0;
for my $a (@tests) {
$solo{$a}++;
$pair{"$a $_"}++ for @tests[(++$i) .. $ }
}
else {
die "$prog: error in input format in $input\n";
}
}
close(FILE);
}