include_analyzer.py [plain text]
"""The skeleton for an include analyzer.
This module defines the basic caches and helper functions for an
include analyzer.
"""
__author__ = "Nils Klarlund"
import os
import glob
import basics
import macro_eval
import parse_file
import parse_command
import statistics
import cache_basics
import mirror_path
import compress_files
Debug = basics.Debug
DEBUG_TRACE = basics.DEBUG_TRACE
NotCoveredError = basics.NotCoveredError
class IncludeAnalyzer(object):
"""The skeleton, including caches, of an include analyzer."""
def _InitializeAllCaches(self):
self.file_cache = {}
self.symbol_table = {}
caches = self.caches = (
cache_basics.SetUpCaches(self.client_root_keeper.client_root))
self.includepath_map = caches.includepath_map
self.directory_map = caches.directory_map
self.realpath_map = caches.realpath_map
self.canonical_path = caches.canonical_path
self.dirname_cache = caches.dirname_cache
self.compiler_defaults = caches.compiler_defaults
self.systemdir_prefix_cache = caches.systemdir_prefix_cache
self.simple_build_stat = caches.simple_build_stat
self.build_stat_cache = caches.build_stat_cache
self.IsIncludepathIndex = caches.IsIncludepathIndex
self.IsSearchdirIndex = caches.IsSearchdirIndex
self.IsCurrdirIndex = caches.IsCurrdirIndex
self.IsRealpathIndex = caches.IsRealpathIndex
self.IsFilepathPair = caches.IsFilepathPair
self.mirror_path = mirror_path.MirrorPath(self.simple_build_stat,
self.canonical_path,
self.realpath_map,
self.systemdir_prefix_cache)
self.parse_file = parse_file.ParseFile(self.includepath_map)
self.compress_files = compress_files.CompressFiles(self.includepath_map,
self.directory_map,
self.realpath_map)
self.mirrored = set([])
self.quote_dirs_set = set([]) self.angle_dirs_set = set([]) self.include_dir_pairs = set([])
def __init__(self, client_root_keeper, stat_reset_triggers={}):
self.generation = 1
self.client_root_keeper = client_root_keeper
self.client_root_keeper.ClientRootMakedir(self.generation)
self.stat_reset_triggers = stat_reset_triggers
self.translation_unit = "unknown translation unit"
self.timer = None
self.include_server_cwd = os.getcwd()
self._InitializeAllCaches()
def _ProcessFileFromCommandLine(self, fpath, currdir, kind, search_list):
"""Return closure of fpath whose kind is "translation unit" or "include".
Such files come from the command line, either as the file to compile,
or from a "-include" command line option.
Arguments:
fpath: a filepath (as a string)
currdir: a string
kind: a string used for an error message if fpath is not found
search_list: a tuple of directory indices (for "include" kind files)
Returns:
an include closure calculated by RunAlgorithm
"""
if os.path.isabs(fpath):
file_dirpath, file_filename = os.path.split(fpath)
else:
file_dirpath, file_filename = "", fpath
fpath_resolved_pair, fpath_real = self.build_stat_cache.Resolve(
self.includepath_map.Index(file_filename),
self.currdir_idx,
self.directory_map.Index(file_dirpath),
search_list,
self.currdir_idx)
if fpath_resolved_pair == None:
raise NotCoveredError("Could not find %s '%s'." % (kind, fpath),
send_email=False)
self.mirror_path.DoPath(
os.path.join(currdir, fpath),
self.currdir_idx,
self.client_root_keeper.client_root)
closure = self.RunAlgorithm(fpath_resolved_pair, fpath_real)
return closure
def ProcessCompilationCommand(self, currdir, parsed_command):
"""Do the include analysis for parsed_command.
Precondition:
currdir == os.getcwd()
Arguments:
currdir: a string denoting an absolute filepath when command is run
parsed_command: the value returned by ParseCommandArgs
Returns:
an include closure as described in RunAlgorithm
"""
Debug(DEBUG_TRACE, "ProcessCompilationCommand: %s, %s"
% (currdir, parsed_command))
assert isinstance(currdir, str)
statistics.parse_file_counter_last = statistics.parse_file_counter
(self.quote_dirs, self.angle_dirs,
self.include_files, translation_unit,
self.result_file_prefix, self.d_opts) = parsed_command
statistics.translation_unit = translation_unit
self.translation_unit = translation_unit
self.currdir_idx = self.directory_map.Index(currdir)
self.include_dir_pairs |= set([(self.quote_dirs, self.angle_dirs)])
self.quote_dirs_set.add(self.quote_dirs)
self.angle_dirs_set.add(self.angle_dirs)
statistics.quote_path_total += len(self.quote_dirs)
statistics.angle_path_total += len(self.angle_dirs)
total_closure = {}
for include_file in self.include_files:
total_closure.update(
self._ProcessFileFromCommandLine(
self.includepath_map.string[include_file],
currdir,
"include file",
self.quote_dirs))
total_closure.update(self._ProcessFileFromCommandLine(translation_unit,
currdir,
"translation unit",
()))
return total_closure
def DoStatResetTriggers(self):
"""Reset stat caches if a glob evaluates differently from earlier.
More precisely, if a path of a glob comes in or out of existence or has a
new stamp, then reset stat caches."""
trigger_map = self.stat_reset_triggers
old_paths = [ path
for glob_expr in trigger_map
for path in trigger_map[glob_expr] ]
for glob_expr in trigger_map:
for path in glob.glob(glob_expr):
try:
old_paths.remove(path)
except ValueError:
pass
new_stamp = basics.Stamp(path)
if path in trigger_map[glob_expr]:
if new_stamp != trigger_map[glob_expr][path]:
Debug(basics.DEBUG_WARNING,
"Path '%s' changed. Clearing caches.",
path)
trigger_map[glob_expr][path] = new_stamp
self.ClearStatCaches()
return
else:
Debug(basics.DEBUG_WARNING,
"Path '%s' came into existence. Clearing caches.",
path)
trigger_map[glob_expr][path] = basics.Stamp(path)
self.ClearStatCaches()
return
if old_paths:
path = old_paths[0]
Debug(basics.DEBUG_WARNING,
"Path '%s' no longer exists. Clearing caches.",
path)
self.ClearStatCaches()
def DoCompilationCommand(self, cmd, currdir, client_root_keeper):
"""Parse and and process the command; then gather files and links."""
self.translation_unit = "unknown translation unit"
os.chdir(self.include_server_cwd)
self.DoStatResetTriggers()
os.chdir(currdir)
parsed_command = (
parse_command.ParseCommandArgs(cmd,
currdir,
self.includepath_map,
self.directory_map,
self.compiler_defaults,
self.timer))
(quote_dirs, unused_angle_dirs, unused_include_files, source_file,
result_file_prefix, unused_Dopts) = parsed_command
realpath_map = self.realpath_map
include_closure = (
self.ProcessCompilationCommand(currdir, parsed_command))
hmap_closure = {}
for dir_idx in quote_dirs:
dir_str = self.directory_map.string[dir_idx]
if dir_str.endswith('.hmap/'):
hmap_closure[realpath_map.Index(os.path.abspath(dir_str))] = []
if self.timer: self.timer.Cancel()
translation_unit = self.translation_unit
links = self.compiler_defaults.system_links + self.mirror_path.Links()
files = self.compress_files.Compress(include_closure, client_root_keeper)
hmaps = self.compress_files.Compress(hmap_closure, client_root_keeper)
forcing_files = self._ForceDirectoriesToExist()
files_and_links = files + hmaps + links + forcing_files
if basics.opt_verify:
exact_no_system_header_dependency_set = (
ExactDependencies(" ".join(cmd),
realpath_map,
self.systemdir_prefix_cache,
translation_unit))
if basics.opt_write_include_closure:
WriteDependencies(exact_no_system_header_dependency_set,
self.result_file_prefix + '.d_exact',
realpath_map)
VerifyExactDependencies(include_closure,
exact_no_system_header_dependency_set,
realpath_map,
translation_unit)
if basics.opt_write_include_closure:
WriteDependencies(include_closure,
self.result_file_prefix + '.d_approx',
realpath_map)
return files_and_links
def _ForceDirectoriesToExist(self):
"""Force any needed directories to exist.
In rare cases, the source files may contain #include "foo/../bar",
but may not contain any other files from the "foo" directory.
In such cases, we invent a dummy file in (the mirrored copy of)
each such directory, just to force the distccd server to create the
directory, so that the C compiler won't get an error when it tries
to resolve that #include.
Returns:
A list of files to pass as dummy inputs.
"""
must_exist_dirs = self.mirror_path.MustExistDirs()
special_name = 'forcing_technique_271828'
forcing_files = [d + '/' + special_name
for d in must_exist_dirs]
for forcing_file in forcing_files:
open(forcing_file, "a").close()
return forcing_files
def RunAlgorithm(self, filepath_resolved_pair, filepath_real_idx):
"""Run FindNode on filepath; then compute include closure.
Arguments:
filepath_resolved_pair: (directory_idx, includepath_idx)
filepath_real: the realpath_map index corresponding to
filepath_resolved_pair
Returns:
include_closure: a dictionary.
The include_closure consists of entries of the form
realpath_idx: [(searchdir_idx_1, includepath_idx_1),
(searchdir_idx_2, includepath_idx_2), ...]
where searchdir_i is an absolute path. realpath_idx is a realpath
index corresponding to a single #include (more exactly, it's the
index of the path that the #include resolves to).
This include closure calculation omits any system header files,
that is, header files found in a systemdir (recall systemdirs are
those searchdirs that are built into the preprocessor, such as
"/usr/include"). It concentrates only on header files users might
edit.
The keys are the most important part of the include_closure; the
values are used only to munge the preprocessor output to give more
useful filenames via the #line directive. The issue here is that
source files in the distcc system are not in their "proper"
locations: for instance, /usr/X11R6/include/X11.h might be in
/tmp/distcc/usr/X11R6/include/X11.h rather than in
/usr/X11R6/include.
As the example above suggests, relative position of .h files is
preserved in distcc-land, so if the #include ends up being a
relative include, we do not need to do any munging, so we don't
bother to store anything in the value-list corresponding to
realpath_idx. If, however, the #include ends up being an absolute
include, we do store the "real" name (as an index-pair) in the
list. For debugging purposes, we may store more than one "real"
name if there are several, which can happen when multiple symlinks
point to the same place.
TODO(csilvers): change the code to only store one.
Here's a concrete example: suppose we're trying to resolve
#include "bar.h", and the searchdir_list is ["reldir/foo",
"/usr/foo"]. If "<cwd>/reldir/foo/bar.h" exists, then
realpath_idx will resolve to that, and the preprocessor will emit
code like "#line 1 reldir/foo/bar.h". That's correct as-is, no
munging needed, so we don't bother to put a value in the
include_closure entry for this realpath.
If, however, "<cwd>/reldir/foo/bar.h" does not exist, but
"/usr/foo/bar.h" exists, then realpath_idx will resolve to that,
and the preprocessor will emit code like "#line 1
/tmp/distcc/usr/foo/bar.h". We'll want to munge that to be
"/usr/foo/bar.h", so we do put a value in the include_closure
entry for this realpath, to tell us what to munge to.
(Note we *could* use realpath to tell us the "real" filename,
without needing a separate index-pair, but that's not as
user-friendly, since realpath is the filename after symlinks are
resolved. Thus, on some setups the realpath of /usr/foo/bar.h
could be /netapp1/mnt/foo/bar.h or something equally unhelpful.)
This method to be overridden by derived class.
"""
raise Exception, "RunAlgorithm not implemented."
def ClearStatCaches(self):
"""Clear caches used for, or dependent on, stats."""
self.generation += 1
self.client_root_keeper.ClientRootMakedir(self.generation)
self._InitializeAllCaches()