uniq-mailbox   [plain text]


#!/usr/bin/perl -w -i.bak
#
# uniq-mailbox mbox [...]
#
# removes duplicate mails found in any of the mailboxes, rewriting
# them as it goes.  backups saved as mbox.bak if any modifications
# are made.
#
# support for .tar and .gz mboxes: none. unpack them first ;)

###########################################################################

use English;

my $lastargv = '';
my $found_dups = 0;
my $num_mails = 0;
my $skipmail = 0;

my %seen = ();

while (<>) {
  if ($lastargv ne $ARGV) {
    report();
    $found_dups = 0;
    $num_mails = 0;
    $lastargv = $ARGV;
  }

  if (/^From /) {
    $in_header = 1;
    $skipmail = 0;
    @lines = ();
  }

  if ($skipmail) { next; }

  if ($in_header) {
    push (@lines, $_);
    if (/^Message-[iI][dD]: (.*)$/) {
      if (exists $seen{$1}) {
        $found_dups++;
        @lines = ();
        $skipmail = 1;
      }

      $seen{$1} = 1;
      $num_mails++;
    }

    if (/^$/) {
      if (!$skipmail) { print @lines; @lines = (); }
      $in_header = 0;
    }

  } else {
    print;
  }
}

report();
exit;

sub report {
  return unless ($lastargv =~ /\S/);
  if ($found_dups) {
    warn "found $found_dups duplicate mails: $lastargv ($num_mails messages)\n";
  } else {
    warn "no duplicate mails: $lastargv ($num_mails messages)\n";
    unlink ("$lastargv.bak");   # not needed
  }
}