simphosts   [plain text]


#!/usr/bin/perl
#
# $Id: simphosts,v 1.1.1.1 1999/10/04 22:25:52 wsanchez Exp $
#
# Simplify hosts(5) format file.
#   Marion Hakanson (hakanson@cse.ogi.edu)
#   Oregon Graduate Institute of Science and Technology
#
# Copyright (c) 1990, Marion Hakanson.
#
# You may distribute under the terms of the GNU General Public License
# as specified in the README file that comes with the dnsparse kit.
#
# The input file is read, and the mappings are stored up in the order
# encountered (earlier ones will override any later ones which may
# overlap or otherwise be redundant), in the same manner as that used
# by the bsd host-table-based resolution code.  When the entire file
# has been processed, each address is printed (in the order that they
# were encountered) as follows:
#    addr			first-canon-for-addr
#
# Next, the remaining (not yet printed) names are sorted, and each
# one is printed, one-per-line, as follows:
#    first-addr-for-name	first-canon-for-name name
# (if the two names are identical, only one is printed).
#
# Thus, if we've done the job right, the resulting table (also in legal
# hosts(5) format) should be functionally equivalent to the original,
# yet in a canonical form which can be compared textually without
# having semantically meaningless differences cloud the issue.
#
# Note that if one runs the result through this script another time,
# one should end up with the same result yet again.

if ( $#ARGV >= $[ ) {
  open(HOSTS, $ARGV[$[]) || die "Cannot open $ARGV[$[]: $!, aborted";
} else {
  open(HOSTS, "<&STDIN") || die 'Cannot dup STDIN, aborted';
}

# Print header comments as encountered
while ( <HOSTS> ) {
  last unless ( /^\s*#/ );
  print;
}

exit if ( eof(HOSTS) );

# Note that $_ was set from above
do {
 # All other comments are removed
 unless ( /^\s*#/ ) {
  chop;
  s/#.*//;
  ($addr, @names) = split;
  $canon = $names[$[];
  unless ( defined($addr_to_name{$addr}) ) {
    $addr_to_name{$addr} = $canon;
    push(@addrlist, $addr);
  }
  foreach $name ( @names ) {
    unless ( defined($name_to_addr{$name}) ) {
      $name_to_addr{$name} = $addr;
      push(@namelist, $name);
    }
    unless ( defined($name_to_canon{$name}) ) {
      $name_to_canon{$name} = $canon;
    }
  }
 }
} while (<HOSTS>);


# First print the addr-to-name mappings
foreach $addr (@addrlist) {
  $realcanon = $addr_to_name{$addr};
  print "$addr\t$realcanon\n";
  $printed{$realcanon} = 1;
}

# A nice separator
print "#\n";

# Now sort the names and print out their data
@namelist = sort(@namelist);

foreach $name (@namelist) {
  # Skip those we printed above
  next if ( $printed{$name} );

  $addr = $name_to_addr{$name};
  $canon = $name_to_canon{$name};
  $realcanon = $addr_to_name{$addr};

  print "$addr\t$canon";
  print " $name" if ( $name ne $canon );

  # Flag an anomaly (but preserve it)
  if ( $realcanon ne $canon ) {
    print "\t# HEY! $realcanon should be canonical here";
  }

  print "\n";
}