#! /usr/bin/env python """Convert a LaTeX .toc file to some PDFTeX magic to create that neat outline. The output file has an extension of '.bkm' instead of '.out', since hyperref already uses that extension. """ import getopt import os import re import string import sys # Ench item in an entry is a tuple of: # # Section #, Title String, Page #, List of Sub-entries # # The return value of parse_toc() is such a tuple. cline_re = r"""^ \\contentsline\ \{([a-z]*)} # type of section in $1 \{(?:\\numberline\ \{([0-9.A-Z]+)})? # section number (.*)} # title string \{(\d+)}$""" # page number cline_rx = re.compile(cline_re, re.VERBOSE) OUTER_TO_INNER = -1 _transition_map = { ('chapter', 'section'): OUTER_TO_INNER, ('section', 'subsection'): OUTER_TO_INNER, ('subsection', 'subsubsection'): OUTER_TO_INNER, ('subsubsection', 'subsection'): 1, ('subsection', 'section'): 1, ('section', 'chapter'): 1, ('subsection', 'chapter'): 2, ('subsubsection', 'section'): 2, ('subsubsection', 'chapter'): 3, } INCLUDED_LEVELS = ("chapter", "section", "subsection", "subsubsection") def parse_toc(fp, bigpart=None): toc = top = [] stack = [toc] level = bigpart or 'chapter' lineno = 0 while 1: line = fp.readline() if not line: break lineno = lineno + 1 m = cline_rx.match(line) if m: stype, snum, title, pageno = m.group(1, 2, 3, 4) title = clean_title(title) entry = (stype, snum, title, string.atoi(pageno), []) if stype == level: toc.append(entry) else: if stype not in INCLUDED_LEVELS: # we don't want paragraphs & subparagraphs continue direction = _transition_map[(level, stype)] if direction == OUTER_TO_INNER: toc = toc[-1][-1] stack.insert(0, toc) toc.append(entry) else: for i in range(direction): del stack[0] toc = stack[0] toc.append(entry) level = stype else: sys.stderr.write("l.%s: " + line) return top hackscore_rx = re.compile(r"\\hackscore\s*{[^}]*}") raisebox_rx = re.compile(r"\\raisebox\s*{[^}]*}") title_rx = re.compile(r"\\([a-zA-Z])+\s+") title_trans = string.maketrans("", "") def clean_title(title): title = raisebox_rx.sub("", title) title = hackscore_rx.sub(r"\\_", title) pos = 0 while 1: m = title_rx.search(title, pos) if m: start = m.start() if title[start:start+15] != "\\textunderscore": title = title[:start] + title[m.end():] pos = start + 1 else: break title = string.translate(title, title_trans, "{}") return title def write_toc(toc, fp): for entry in toc: write_toc_entry(entry, fp, 0) def write_toc_entry(entry, fp, layer): stype, snum, title, pageno, toc = entry s = "\\pdfoutline goto name{page%03d}" % pageno if toc: s = "%s count -%d" % (s, len(toc)) if snum: title = "%s %s" % (snum, title) s = "%s {%s}\n" % (s, title) fp.write(s) for entry in toc: write_toc_entry(entry, fp, layer + 1) def process(ifn, ofn, bigpart=None): toc = parse_toc(open(ifn), bigpart) write_toc(toc, open(ofn, "w")) def main(): bigpart = None opts, args = getopt.getopt(sys.argv[1:], "c:") if opts: bigpart = opts[0][1] if not args: usage() sys.exit(2) for filename in args: base, ext = os.path.splitext(filename) ext = ext or ".toc" process(base + ext, base + ".bkm", bigpart) if __name__ == "__main__": main()