compiler_defaults.py   [plain text]


#! /usr/bin/python2.4

# Copyright 2007 Google Inc.
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301,
# USA.

 
"""Divination of built-in system directories used by compiler installation.

It is undesirable for the distcc-pump to send header files that reside
under the built-in search path.  In a correct compiler installation,
these files must already be present on the server. This module lets
the distcc-pump run the compiler in a special mode that allows the
built-in system directories to be revealed.

The current code is tested only for gcc 4.1.1.

TODO(klarlund) Find out what other versions this code works for.
TODO(klarlund) The include server halts if the built-in system
directories cannot be determined. Should this be improved upon?
"""

__author__ = "Nils Klarlund"


import os
import re
import sys
import basics
import shutil
import subprocess

Debug = basics.Debug
DEBUG_TRACE = basics.DEBUG_TRACE
DEBUG_DATA = basics.DEBUG_DATA
NotCoveredError = basics.NotCoveredError

  
def _RealPrefix(path, in_dir):
  """Determine longest directory prefix and whether path contains a symlink.

  Given an absolute path PATH, figure out the longest prefix of PATH where
  every component of the prefix is a directory -- not a file or symlink.

  Checking does not begin until entering in_dir, to allow for directory
  hierarchies that may contain insignificant symbolic links.  This handles
  the case where /tmp is a symbolic link, as on Mac OS X.  in_dir must be
  a parent of path.

  Args:
    path: a string starting with '/'
  Returns:
    a pair consisting of
    - the prefix
    - a bool, which is True iff PATH contained a symlink within in_dir.
  """
  assert in_dir == path[:len(in_dir)] and path[len(in_dir)] == '/'
  prefix = "/"
  parts = path.split('/')
  while prefix != path:
    part = parts.pop(0)
    last_prefix = prefix
    prefix = os.path.join(prefix, part)
    if len(prefix) < len(in_dir):
      continue
    if os.path.islink(prefix):
      return last_prefix, True
    if not os.path.isdir(prefix):
      return last_prefix, False
  return path, False


def _MakeLinkFromMirrorToRealLocation(system_dir, client_root, system_links):
  """Create a link under client root that will resolve to system dir on server.

  See comments for CompilerDefaults class for rationale.

  Args:
    system_dir: a path such as /usr/include or
                /usr/lib/gcc/i486-linux-gnu/4.0.3/include
    client_root: a path such as /dev/shm/tmpX.include_server-X-1
    system_links: a list of paths under client_root; each denotes a symlink

  The link is created only if necessary. So,
    /usr/include/gcc/i486-linux-gnu/4.0.3/include
  is not created if
    /usr/include
  is already in place, since it's a prefix of the longer path.

  If a link is created, the symlink name will be appended to system_links.

  For example, if system_dir is '/usr/include' and client_root is
  '/dev/shm/tmpX.include_server-X-1', then this function will create a
  symlink in /dev/shm/tmpX.include_server-X-1/usr/include which points
  to ../../../../../../../../../../../../usr/include, and it will append
  '/dev/shm/tmpX.include_server-X-1/usr/include' to system_links.
  """
  if not system_dir.startswith('/'):
    raise ValueError("Expected absolute path, but got '%s'." % system_dir)
  if os.path.realpath(system_dir) != system_dir:
    raise NotCoveredError(
        "Default compiler search path '%s' must be a realpath." %s)
  rooted_system_dir = client_root + system_dir
  # Typical values for rooted_system_dir:
  #  /dev/shm/tmpX.include_server-X-1/usr/include
  real_prefix, is_link = _RealPrefix(rooted_system_dir, client_root)
  parent = os.path.dirname(rooted_system_dir)
  if real_prefix == rooted_system_dir:
    # rooted_system_dir already exists as a real (non-symlink) path.
    # Make rooted_system_dir a link.
    #
    # For example, this could happen if /usr/include/c++/4.0 and
    # /usr/include are both default system directories.
    # First we'd call this function with /usr/include/c++/4.0,
    # and it would call os.mkdirdirs() to create
    # /dev/shm/tmpX.include_server-X-1/usr/include/c++,
    # and then it would create a symlink named 4.0 within that.
    # Then we'd call this function again with /usr/include.
    # In this case, we can replace the whole subtree with a single symlink
    # at /dev/shm/tmpX.include_server-X-1/usr/include.
    shutil.rmtree(rooted_system_dir)
    system_links[:] = filter(lambda path :
                             not path.startswith(rooted_system_dir),
                             system_links)
  elif real_prefix == parent:
    # The really constructed path does not extend beyond the parent directory,
    # so we're all set to create the link if it's not already there.
    if os.path.exists(rooted_system_dir):
      assert os.path.islink(rooted_system_dir)
      return
  elif not is_link:
    os.makedirs(parent)
  else:
    # A link above real_prefix has already been created with this routine.
    return
  assert _RealPrefix(parent, client_root) == (parent, False), parent
  depth = len([c for c in system_dir if c == '/'])
  # The more directories on the path system_dir, the more '../' need to
  # appended. We add enough '../' to get to the root directory. It's OK
  # if we have too many, since '..' in the root directory points back to
  # the root directory.
  # TODO(klarlund,fergus): do this in a more principled way.
  # This probably requires changing the protocol.
  os.symlink('../' * (basics.MAX_COMPONENTS_IN_SERVER_ROOT + depth)
             + system_dir[1:],  # remove leading '/'
             rooted_system_dir)
  system_links.append(rooted_system_dir)


def _SystemSearchdirsGCC(compiler, language, sysroot_args, canonical_lookup):
  """Run gcc on empty file; parse output to figure out default paths.

  This function works only for gcc, and only some versions at that.

  Arguments:
    compiler: a filepath (the first argument on the distcc command line)
    language: 'c' or 'c++' or other item in basics.LANGUAGES
    sysroot_args: list of -isysroot or --sysroot args for gcc's command line.
    canonical_lookup: a function that maps strings to their realpaths
  Returns:
    list of pairs of system search dirs and their type for this compiler and
    language

  """

  # We are trying to wring the following kind of text out of the
  # compiler:
  #--------------------
  # blah. blah.
  # ...
  # blah. blah.
  # #include "..." search starts here:
  # #include <...> search starts here:
  #  /usr/local/include
  #  /usr/lib/gcc/i486-linux-gnu/4.0.3/include
  #  /usr/include
  # End of search list.
  # blah. blah.
  #------------

  command = [compiler, "-x", language ] + sysroot_args + [ "-v", "-c",
             "/dev/null", "-o", "/dev/null"]

  try:
    # We clear the environment, because otherwise, directories
    # declared by CPATH, for example, will be incorporated into the
    # result. (See the CPP manual for the meaning of CPATH.)  The only
    # thing we keep is PATH, so we can be sure to find the compiler.
    # NOTE: having the full PATH can be tricky: what if there's a gcc
    # -> distcc symlink somewhere on the PATH, before the real gcc?
    # We think the right thing will happen here, but it's complicated.
    # TODO(csilvers): it's possible we could need to pass in some
    # other environment vars, like LD_LIBRARY_PATH.  Instead of adding
    # in more env-vars by hand, consider just removing from os.environ
    # all the env-vars that are meaningful to gcc, such as CPATH.  See
    # http://docs.freebsd.org/info/gcc/gcc.info.Environment_Variables.html,
    # or the "Environment Variables Affecting GCC" section of the gcc
    # info page.
    if 'PATH' in os.environ:
      trimmed_env = {'PATH': os.environ['PATH']}
    else:
      trimmed_env = {}
    p = subprocess.Popen(command,
                         shell=False,
                         stdin=None,
                         stdout=subprocess.PIPE,
                         stderr=subprocess.STDOUT,
                         env=trimmed_env)
    out = p.communicate()[0]
  except (IOError, OSError), why:
    raise NotCoveredError (
             ( "Couldn't determine default system include directories\n"
             + "for compiler '%s', language '%s':\n"
             + "error executing '%s': %s.")
             % (compiler, language, command, why))

  if p.returncode != 0:
    raise NotCoveredError(
             ( "Couldn't determine default system include directories\n"
             + "for compiler '%s', language '%s':\n"
             + "command '%s' exited with status '%d'.\n Command output:\n%s") %
             (compiler, language, command, p.returncode, out))

  match_obj = re.search(
    r"%s\n(.*?)\n%s"  # don't ask
    % ("#include <...> search starts here:", "End of search list"),
    out,
    re.MULTILINE + re.DOTALL)

  parse_error_msg = \
           ( "Couldn't determine default system include directories\n"
           + "for compiler '%s', language '%s':\n"
           + "couldn't parse output of '%s'.\nReceived:\n%s") % \
           (compiler, language, command, out)

  if match_obj == None:
    raise NotCoveredError(parse_error_msg)

  directories = []
  for directory in match_obj.group(1).split('\n'):
    if not directory.startswith(' '):
      raise NotCoveredError(parse_error_msg)

    directory = directory[1:]
    directory_type = basics.INCLUDE_DIR_NORMAL

    # For our purposes, it doesn't matter whether the directory is a framework
    # directory (-F) or an include directory (-I).  This only applies when
    # using Apple GCC.
    framework_string = ' (framework directory)'
    if directory.endswith(framework_string):
      directory = directory[:-len(framework_string)]
      directory_type = basics.INCLUDE_DIR_FRAMEWORKS


    directories.append((canonical_lookup(directory), directory_type))

  return directories


class CompilerDefaults(object):
  """Records and caches the default search dirs and creates symlink farm.

  This function works only for gcc, and only some versions at that,
  because we parse the output from gcc to determine the default search dirs.

  The 'default' searchdirs are those on the search-path that are built in, that
  is known to the preprocessor, as opposed to being set on the commandline via
  -I et al.

  When we pass an option such as -I/foo/bar to the server,
  the server will rewrite it to say -I/server/path/root/foo/bar,
  where /server/path/root is the temporary directory on the server
  that corresponds to root on the client (e.g. typically /dev/shm/distccd_nnn).
  This causes problems in this case of -I options such as -I/usr/include/foo,
  where the path contains a 'default' search directory (in this case
  /usr/include) as a prefix.
  Header files under the system default directories are assumed to exist
  on the server, and it would be expensive to send them to the server
  unnecessarily (we measured it, and it slowed down the build of Samba by 20%).
  So for -I options like -I/usr/include/foo, we want the server
  to use /usr/include/foo on the server, not /server/path/root/usr/include/foo.

  Because the server unconditionally rewrites include search
  paths on the command line to be relative to the server root, we must take
  corrective action when identifying default system dirs: references to files
  under these relocated system directories must be redirected to the absolute
  location where they're actually found.

  To do so, we create a symlink forest under client_root.
  This will contain symlinks of the form

    usr/include -> ../../../../../../../../../../../../usr/include

  After being sent to the server, the server will rewrite them as

    /server/path/root/usr/include ->
       /server/path/root/../../../../../../../../../../../../usr/include

  which will make

     /server/path/root/usr/include

  become a symlink to

     /usr/include

  Consequently, an include search directory such as -I /usr/include/foo will
  work on the server, even after it has been rewritten to:

    -I /server/path/root/usr/include/foo
  """

  def __init__(self, canonical_lookup, client_root):
    """Constructor.

    Instance variables:
      system_dirs_real_paths: a dictionary such that
        system_dirs_real_paths[c][lang] is a list of directory paths
        (strings) for compiler c and language lang
      system_dirs_default: a list of all such strings, subjected to
        realpath-ification, for all c, lang, and sysroot
      client_root: a path such as /dev/shm/tmpX.include_server-X-1
      system_links: locations under client_root representing system default dirs
    """
    self.canonical_lookup = canonical_lookup
    self.system_dirs_default_all = set([])
    self.system_dirs_default = {}
    self.system_links = []
    self.client_root = client_root

  def SetSystemDirsDefaults(self, compiler, language, sysroot_info, timer=None):
    """Set instance variables according to compiler, and make symlink farm.

    Arguments:
      compiler: a filepath (the first argument on the distcc command line)
      language: 'c' or 'c++' or other item in basics.LANGUAGES
      sysroot_info: pair with the compiler's system root settings in string
        form (for logging or using as a dictionary key) and as list of args
        for use in gcc's command line.  Examples:
          ( '-isysroot /b/sandbox/', [ '-sysroot', '/b/sandbox/' ] )
        or
          ( '--sysroot=/b/sandbox/', [ '-sysroot=/b/sandbox/' ] )
        or
          ( '', [] )
      timer: a basis.IncludeAnalyzerTimer or None

    The timer will be disabled during this routine because the select involved
    in Popen calls does not handle SIGALRM.

    See also the class documentation for this class.
    """
    assert isinstance(compiler, str)
    assert isinstance(language, str)
    assert isinstance(sysroot_info, tuple)
    assert len(sysroot_info) == 2
    assert isinstance(sysroot_info[0], str)
    assert isinstance(sysroot_info[1], list)
    sysroot = sysroot_info[0]
    Debug(DEBUG_TRACE,
          "SetSystemDirsDefaults with CC, LANG, SYSROOT: %s, %s, %s" %
          (compiler, language, sysroot))
    if compiler in self.system_dirs_default:
      if language in self.system_dirs_default[compiler]:
        if sysroot in self.system_dirs_default[compiler][language]:
          return
      else:
        self.system_dirs_default[compiler][language] = {}
    else:
      self.system_dirs_default[compiler] = { language: {} }
    try:
      if timer:
        # We have to disable the timer because the select system call that is
        # executed when calling the compiler through Popen gives up if presented
        # with a SIGALRM.
        timer.Stop()
      self.system_dirs_default[compiler][language][sysroot] = (
        _SystemSearchdirsGCC(compiler, language, sysroot_info[1],
                             self.canonical_lookup))
      Debug(DEBUG_DATA,
            "system_dirs_default[%s][%s][%s]: %s" %
            (compiler, language, sysroot,
             self.system_dirs_default[compiler][language][sysroot]))
      # Now summarize what we know and add to system_dirs_default_all.
      self.system_dirs_default_all |= (
          set([ d for (d, t) in
                  self.system_dirs_default[compiler][language][sysroot] ]))
      # Construct the symlink farm for the compiler default dirs.
      for (sys_dir, t) in self.system_dirs_default[compiler][language][sysroot]:
        _MakeLinkFromMirrorToRealLocation(sys_dir, self.client_root,
                                          self.system_links)
    finally:
      if timer:
        timer.Start()