manlint   [plain text]


#!/usr/bin/perl -w

# manlint - report "errors" in man page(s).

# USAGE:
#  manlint [list of files to check]
#
# EXAMPLE:
#  manlint /usr/man/man*/*.* | less

# An error is anything not known to be a safe construct in a man page;
# see man(7) for more information.
# Currently it's excessively paranoid, but that's the point -- this
# program assumes there's a problem, and if it isn't we can add that to the
# ruleset so that what's safe is explicitly spelled out.
# Currently this program only examines tmac.an based pages, the normal
# kind encountered in Linux.  This is different than the BSD manddoc format,
# which is used by a number of man pages.

# (C) 1999 David A. Wheeler (dwheeler@ida.org)

# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA


require 5.002;  # Requires Perl 5.002 because functions are prototyped.

# First, set up configuration.

$debug = 0;
$errs = $totalerrs = 0;
$goodfiles = $badfiles = $skipfiles = 0;
$filename = '';

# Allow options for small or large safe set; just printing if a file fails
# instead of detail; auto-skip BSD files.

# This is a list of "safe" macros, with their value being the
# maximum number of allowed parameters (-1 = any, 0=no parameters allowed)
%safemacros = (
  'TH' => 5,
  # Font Control:
  'B' => -1, 'BI' => -1, 'BR' => -1,
  'I' => -1, 'IB' => -1, 'IR' => -1,
  'RB' => -1, 'RI' => -1, 'SB' => -1, 'SM' => -1,
  # tmac.an other macros:
  'SH' => 1,
  'LP' => 0, 'P' => 0,
  'PP' => 0,
  'RS' => 1, 'RE' => 0,
  'HP' => 1, 'IP' => 2, 'TP' => 1,
  'DT' => 0, 'PD' => 1, 'SS' => 1,
  # We'll allow IX (indexing).
  'IX' => -1,
  # I'm adding the UR, UN, and UE macros that will permit embedded URIs.
  'UR' => 1,
  'UN' => 1,
  'UE' => 0,
  # allowed troff macros
  '\\"' => -1,  # troff comments
  'ps' => 1,    # Point size
  'ft' => 1,    # Font commands (not recommended, may be ignored in some cases)
  'hy' => 1,    # Hyphenation (probably ignored in translation)
  'bp' => 0,    # Force page break; optional parameter forbidden.
  'ne' => 1,    # Need lines (likely to be ignored in translation)
  'br' => 0,
  'nf' => 0,    # No-fill; insert breaks at end of each line.
  'fi' => 0,
  'ig' => 1,
  '.'  => 0,     # standard end-of-ignore/end-of-definition.
  'ce' => 1,     # Center next N lines
  'ad' => 1,
  'na' => 0,
  # Will probably need to handle some if.
  'if' => -1,    # LIMITED VERSION.
  'ie' => -1,    # LIMITED VERSION.
  'el' => -1,
  'so' => 1,     # Handle 'so' for shared man pages
  'sp' => 1,     # Vertical Space - only permit positive values.
  'de'  => 1,    # Handling 'macro define' is a pain, but many pages require it.
  'ds' => -1,    # Allow string defines.
  'in' => 1,     # Require that every indent be paired with a negative indent.
  'ti' => 1,     # Temporary indent may be ignored
  'hy' => 1,     # Hypenation almost certainly ignored by anyone else.
  'nh' => 1,     # Again, hyphenation likely ignored.
  'tr' => 1,     # Translations limited, see below.
);

# Allowed parameters for the ft (font) troff command.
%allowed_ft_parameter = (
  '1' => 1,
  '2' => 1,
  '3' => 1,
  '4' => 1,
  'R' => 1,
  'I' => 1,
  'B' => 1,
  'P' => 1,
  'CW' => 1,
  '' => 1,
);

%allowed_tr = (
 '\\(ts"' => 1,
 '\\(is\'' => 1,
 '\\(if`' => 1,
 '\\(pd"' => 1,
 '\\(*W-|\(bv\*(Tr' => 1,
 '\\*(Tr' => 1,
);

sub problem($) {
 # Report a problem, if you should.
 my $message = shift;
 print "${ARGV}: $message\n";
 $errs++;
}

sub clean_state {
  %defined_macros = ();
  $is_skipped = 0;
}

sub process_line {
 # Process line already read in $_ (default input line).
 my $macro;
 my $parameters;
 if (m/^[.']\s*([^\s]+)\s*(.*)?/) {
   $macro=$1;
   $parameters=$2;
   $macro =~ s/\s//g;
   print "Found macro: #${macro}#\n" if $debug;
   if ($macro =~ m/Dd/) { # Is this the BSD macro set and not a tmac.an set?
      problem("Uses BSD mandoc conventions instead of tmac.an");
      $errs--; # Patch up error count.
      # print "${ARGV}: Uses BSD mandoc conventions instead of tmac.an.\n";
      close(ARGV); # Skip the rest of this file.
      $is_skipped = 1;
      return;
   }
   if ($macro =~ m/\\"/) {return;} # Skip troff comments.
   if (exists($defined_macros{$macro})) {
     return;  # ??? Should examine the macro parameters.
   }
   if (exists($safemacros{$macro}) ) {
     # ??? Check parameter count.
     # ??? Check that .TH is the first macro (note: bash.1, etc., break this)
     if ( ($macro eq 'if') || ($macro eq 'ie' )) {
       # Only permit checking 't' or 'n' for now.
       if ($parameters =~ m/^[tn]\s/) {
          $_ = $parameters;
          s/^[tn]\s+//;
          process_line();  # Re-examine line without the if statement.
       } else {
         problem("unsafe use of if/ie");
       }
       # ??? sp: only no-parameter or positive values.
     } elsif ($macro eq 'de') {
       $parameters =~ m/^([^\s]+)/;
       $is_defining = $1;
       $defined_macros{$is_defining} = 1;
     } elsif ($macro eq 'so') {
       $parameters =~ m/^([^\s]+)/;
       $new_file = $1;
       while (<$new_file>) { process_line(); }
      } elsif (($macro eq 'ft') && (defined($parameters))
            && (! exists($allowed_ft_parameter{$parameters}))) {
        problem("forbidden ft parameter $parameters");
      } elsif (($macro eq 'tr') && (defined($parameters))
            && (! exists($allowed_tr{$parameters}))) {
        problem("forbidden tr parameter $parameters");
     }
    # ??? 'in': Require that every indent be paired with a negative indent.
    # ??? For macros with text after them, check their text's escapes.
   } else {
     problem("unsafe macro $macro");
   }
 } else {
 # ??? Regular text; check escape clauses.
 }
}


# Main loop: Process files, looking for errors.

clean_state();

while (<>) {
 if ($ARGV ne $filename) {
   print "Processing $ARGV; up to now good=$goodfiles bad=$badfiles skip=$skipfiles\n";
   $filename=$ARGV;
 }
 process_line();
} continue {
 if (eof) {    # End of processing this file.
  close ARGV;  # Perl magic to get line #s to be accurate.
  $totalerrs += $errs;
  if ($errs) { $badfiles++ } else {
      if ($is_skipped) {$skipfiles++} else {$goodfiles++};
  }
  $errs = 0;
  clean_state();
 }
}

print "Number of good files = $goodfiles\n";
print "Number of bad files = $badfiles\n";
print "Number of skipped files = $skipfiles\n";
exit $errs;

# ??? Handle .so better (esp. the error messages)
# currently error messages don't report the traceback & they should.