"""Evaluation of macros acccording to an overapproximation semantics.
This module generally follows CPP semantics for the evalution of macros. But we
treat every define as a possible one, because we don't know whether it is
actually executed when a file is really preprocessed. Our semantics is thus
multi-valued: an expression (that is, a string) is evaluated to the set of
poosible values it can have according to arbitrary choices of #defines in effect
among those encountered in the program text.
An example explains the general idea. If we have:
#define A x
#define A y
#define B 1
#define B 2
Then the expression
A.B
evaluates to one of the following:
x.1
y.1
x.2
y.2
The set {"x.1", "y.1", "x.2", "y.2"} is the value of EvalExpr("A.B",
symbol_table), where symbol_table is the dictionary in which we have
stored the four #define's.
Currently, we will be satisfied with
A A
evaluating to
x x
x y
y x
y y
although a sharper semantic modelling would yield only:
x x
y y
How to Read This Code
---------------------
An understanding of the C preprocessor is necessary. See "The GNU C Preprocessor
Internals" (http://gcc.gnu.org/onlinedocs/cppinternals). Especially the section
"Macro Expansion Algorithm" is informative.
Whitespace Insertion and Other Deficiencies
-------------------------------------------
CPP inserts whitespaces and sometimes doesn't according to very complicated
rules. We do not insert whitespaces.
Also, we retokenize each intermediate expansion.
For actual arguments of macros, we also we do not do the right thing for
parentheses inside single quotes, which is to ignore them.
There are probably several more deviations from CPP semantics.
These deviations should not matter for most common included computes.
What If the Include Processor is Wrong
--------------------------------------
Assume that we have
#include very_complicated_call(anfhis,fifj)
If the include processor produces spurious expansions like:
"whatda.c"
2 + 2
"5 * 3"
then file whada.c if found in search directory becomes part of the include
closure. So does the file "5 * 3". But 2 + 2 does not have the shape of an a
filepath in an include: the filepath must be in quotes or angle brackets.
These spurious files are not harmful to preprocessing on the server.
If the include server omits calculating the expansion
"right_file.h"
then the compilation on the server will fail. The client, according to the logic
of dcc_build_somewhere, will then perform a local compilation.
Symbol Table
------------
The symbol table is a dictionary, whose entries are of the form
symbol: definition_list
Each definition in definition_list is either
- a string, denoting the expansion of an object-like macro, or
- a pair ([param_1,...,param_n], rhs), which denotes a function-like macro,
whose formal parameters are param_1,.., param_n and whose expansion is rhs,
before the substitution of formal parameters for actual parameters.
"""
__author__ = "Nils Klarlund"
import re
import basics
import parse_file
import statistics
Debug = basics.Debug
DEBUG_TRACE = basics.DEBUG_TRACE
DEBUG_TRACE1 = basics.DEBUG_TRACE1
DEBUG_TRACE2 = basics.DEBUG_TRACE2
NotCoveredError = basics.NotCoveredError
SINGLE_POUND_RE = re.compile(r"\B#\s*(\S*)") DOUBLE_POUND_RE = re.compile(r"##")
SYMBOL_RE = re.compile(r"\b\w+\b")
def _BigUnion(list_of_sets):
"""Return the set that is the union of the sets or lists in list_of_sets."""
result = []
for s in list_of_sets:
result.extend(list(s))
return set(result)
def _PrependToSet(expr, expr_set):
"""Return the set consiting of expr + element with element in expr_set."""
return set([ expr + expr_ for expr_ in expr_set ])
def _SubstituteSymbolInString(x, y, str):
"""Return the string that results from substituting x for y in str."""
Debug(DEBUG_TRACE2,
"""_SubstituteSymbolInString: x: "%s", y: "%s", str:"%s" """,
x, y, str)
sub_re = re.compile(r"\b%s\b" % re.escape(x))
Debug(DEBUG_TRACE2,
"""_SubstituteSymbolInString (result): "%s" """, sub_re.sub(y, str))
return sub_re.sub(y, str)
def _ParseArgs(string, pos):
"""Split stuff according to commas at outer level in parenthesized string.
If string[pos:] does not start with '(', return (None, pos). If string[pos:]
starts with '(' and there is a balanced parenthesis structure ending
at pos_end, then return (args, pos_end), where args is
string[pos:pos_end] hacked into segments between commas at outer
level. So "(a,m(c, n(d)), c)...." results in (["a", "m(c, n(d))", "
c"], 17) being returned.
"""
open_parens = 0
if not pos < len(string) or string[pos] != '(':
return (None, pos)
commas = [pos]
pos_end = None
inside_quotes = False
for i in range(pos, len(string)):
if inside_quotes:
if string[i] == '"' and string[i-1] != r'\\':
inside_quotes = False
continue
if string[i]==',' and open_parens==1:
commas.append(i)
elif string[i]=='(':
open_parens += 1
elif string[i]==')':
open_parens -= 1
if open_parens == 0:
pos_end = i
break
elif string[i] == '"' and string[i-1] != r'\\':
inside_quotes = True
if not pos_end:
return (None, pos)
commas.append(pos_end) args_list = []
for i in range(len(commas) - 1):
args_list.append(string[commas[i] + 1 : commas[i + 1]])
return (args_list, pos_end + 1)
def _MassageAccordingToPoundSigns(string):
"""Perform 'stringification (#) and concatenation (##)."""
return SINGLE_POUND_RE.sub(r'"\1"', DOUBLE_POUND_RE.sub("", string))
def _EvalExprHelper(expr, symbol_table, disabled):
if __debug__:
Debug(DEBUG_TRACE2, "EvalExprHelper: expr: %s", expr)
"""Evaluate according to an overapproximation macro substitution semantics.
Arguments:
expr: a string
symbol_table: { symbol: [rhs,
...,
([param_1,...,param_n], rhs),
...],
... }, where rhs and param_i are strings
disabled: set of disabled symbols (see "The GNU C Preprocessor
Internals")
Returns: [ expr_1, expr_2, ...], which is a non-empy list of
strings, namely the expansions of expr.
"""
def _ReEvalRecursivelyForExpansion(expansion, after):
"""Reevalute the expansion that is the result of finding a match for a
macro.
Arguments:
symbol (outer scope): the name of the matched macro that resulted in
expansion; it is the same as match.group()
match (outer scope): the match data for the symbol
expansion: the expansion we are substituting for the match
after: the string after the expansion
Modifies:
value_set: the set of all possible expansions of expr
The value set is updated according to recursive evaluations of the string
that results from inserting expansion between expr[:match.start()] and
expr[match.end():] (for symbol-like macro) or expr[args_end:] (for
function-like macro).
The idea is to form a set of strings from a cross product of two string sets
descriping all possibly expansions before and after the match.
There are two recursions involved. First, we evaluate after to find all
possible values of what follows the match. This recursion does not involve a
larger disabled set. Each resulting string is named after_eval_expr. Second,
we evaluate expansion concatenated with each after_eval_expr value. In
these evaluations, symbol is added to the disabled set.
"""
if __debug__:
Debug(DEBUG_TRACE2,
("_ReEvalRecursivelyForExpansion: expr: %s\n" +
" before: %s\n expansion: %s\n after: %s")
% (expr, expr[:match.start()], expansion, after))
value_set.update(
_PrependToSet(expr[:match.start()],
_BigUnion([ _EvalExprHelper(expansion + after_expansion,
symbol_table,
disabled | set([symbol]))
for after_expansion in
_EvalExprHelper(after,
symbol_table,
disabled) ])))
def _EvalMacro(definition, disabled):
"""Evaluate symbol according to definition.
Here definition is either object-like or function-like.
"""
value_set.update(
_PrependToSet(expr[:match.end()],
_EvalExprHelper(expr[match.end():],
symbol_table,
disabled)))
if isinstance(definition, str):
_ReEvalRecursivelyForExpansion(definition, expr[match.end():])
elif isinstance(definition, tuple):
(lhs, rhs) = definition if not args_list or len(lhs) != len(args_list):
return
args_expand = [ _EvalExprHelper(arg, symbol_table, disabled)
for arg in args_list ]
expansions = [rhs]
for i in range(len(args_expand)):
expansions = [ _SubstituteSymbolInString(lhs[i], arg, expansion)
for expansion in expansions
for arg in args_expand[i] ]
for expansion in expansions:
real_expansion = _MassageAccordingToPoundSigns(expansion)
_ReEvalRecursivelyForExpansion(real_expansion, expr[args_end:])
else:
assert False, "Definition '%s' is unexpected." % definition
match = SYMBOL_RE.search(expr)
if not match:
return set([expr])
else:
symbol = match.group()
(args_list, args_end) = _ParseArgs(expr, match.end())
Debug(DEBUG_TRACE2,
"EvalExprHelper (inside): expr: %s\n" +
" symbol: %s\n args_list: %s\n" +
" before: %s\n",
expr, symbol, args_list, expr[:match.start()])
if symbol not in symbol_table:
return _PrependToSet(expr[:match.end()],
_EvalExprHelper(expr[match.end():],
symbol_table,
disabled))
else:
value_set = set([expr])
if symbol not in disabled:
defs = symbol_table[symbol]
for definition in defs:
_EvalMacro(definition, disabled)
return value_set
def EvalExpression(expr, symbol_table):
"""Calculate sets of possibly values of expr given symbol_table.
Arguments:
expr: any string to be macro expanded
symbol_table: { symbol: {rhs, ..}, ,...,
symbol:{((param_1,...,param_n), rhs), ... }
Returns:
[ expr_1, expr_2, ...], a list of strings: the possible expansions of expr.
"""
if __debug__:
Debug(DEBUG_TRACE, "EvalExpression: expr: %s", expr)
r = set(_EvalExprHelper(expr, symbol_table, set([])))
if __debug__:
Debug(DEBUG_TRACE, "EvalExpression: return: %s", r)
return r
def ResolveExpr(includepath_map_index,
resolve,
expr,
currdir_idx,
searchdir_idx,
quote_dirs,
angle_dirs,
symbol_table):
"""Evaluate and resolve possible values for expr using symbol table.
Determine all possible values of expr. Those that are of the form "filepath"
or <filepath> are resolved against (file_dir_idx, quote_dirs) or angle_dirs,
respectively. The set of resolvants is returned along with a list of all
symbols that occurs in possible evaluations of expr.
Arguments:
includepath_map_index: the Index function of an includepath map
resolve: a Resolve method of a BuildStatCache object
expr: any string to be macro expanded
currdir_idx: a directory index
searchdir_idx: a directory index (used for resolving quote-includes)
quote_dirs: a directory index list
angle_dirs: a directory index list
symbol_table: as described in module macro_expr
Returns:
a pair(files, symbols), where files is a list of (filepath pair,
realpath index), namely those files that are successful resolutions of
possible double-quoted and angle-quoted values of expr, and symbols is
the set of all identifiers occurring in expression or its possible
expansions
Raises:
NotCoveredError
"""
if __debug__:
Debug(DEBUG_TRACE, "ResolveExpr: %s, %s, %s",
expr, searchdir_idx, angle_dirs)
resolved_files = []
symbols = []
statistics.resolve_expr_counter += 1
for val in EvalExpression(expr, symbol_table):
match_result = parse_file.INCLUDE_STRING_RE.match(val)
if match_result:
if match_result.group('quote'):
resolved = resolve(includepath_map_index(match_result.group('quote')),
currdir_idx, searchdir_idx, quote_dirs, currdir_idx)
elif match_result.group('angle'):
resolved = resolve(includepath_map_index(match_result.group('angle')),
currdir_idx, None, angle_dirs, currdir_idx)
resolved_files.append(resolved)
else:
symbols.extend(SYMBOL_RE.findall(val))
if __debug__:
Debug(DEBUG_TRACE, "ResolveExpr: return: %s", resolved_files)
return (resolved_files, set(symbols))