# ----------------------------------------------------------------------------- # cpp.py # # Author: David Beazley (http://www.dabeaz.com) # Copyright (C) 2007 # All rights reserved # # This module implements an ANSI-C style lexical preprocessor for PLY. # ----------------------------------------------------------------------------- from __future__ import generators
import sys
# Some Python 3 compatibility shims if sys.version_info.major < 3:
STRING_TYPES = (str, unicode) else:
STRING_TYPES = str
xrange = range
# ----------------------------------------------------------------------------- # Default preprocessor lexer definitions. These tokens are enough to get # a basic preprocessor working. Other modules may import these if they want # -----------------------------------------------------------------------------
# Character constant 'c' or L'c' def t_CPP_CHAR(t):
r'(L)?\'([^\\\n]|(\\(.|\n)))*?\''
t.lexer.lineno += t.value.count("\n") return t
# Comment def t_CPP_COMMENT1(t):
r'(/\*(.|\n)*?\*/)'
ncr = t.value.count("\n")
t.lexer.lineno += ncr # replace with one space or a number of '\n'
t.type = 'CPP_WS'; t.value = '\n' * ncr if ncr else' ' return t
# Line comment def t_CPP_COMMENT2(t):
r'(//.*?(\n|$))' # replace with '/n'
t.type = 'CPP_WS'; t.value = '\n' return t
# ------------------------------------------------------------------ # Macro object # # This object holds information about preprocessor macros # # .name - Macro name (string) # .value - Macro value (a list of tokens) # .arglist - List of argument names # .variadic - Boolean indicating whether or not variadic macro # .vararg - Name of the variadic parameter # # When a macro is created, the macro replacement token sequence is # pre-scanned and used to create patch lists that are later used # during macro expansion # ------------------------------------------------------------------
class Macro(object): def __init__(self,name,value,arglist=None,variadic=False):
self.name = name
self.value = value
self.arglist = arglist
self.variadic = variadic if variadic:
self.vararg = arglist[-1]
self.source = None
# ------------------------------------------------------------------ # Preprocessor object # # Object representing a preprocessor. Contains macro definitions, # include directories, and other information # ------------------------------------------------------------------
# ----------------------------------------------------------------------------- # tokenize() # # Utility function. Given a string of text, tokenize into a list of tokens # -----------------------------------------------------------------------------
# --------------------------------------------------------------------- # error() # # Report a preprocessor error/warning of some kind # ----------------------------------------------------------------------
# ---------------------------------------------------------------------- # lexprobe() # # This method probes the preprocessor lexer object to discover # the token types of symbols that are important to the preprocessor. # If this works right, the preprocessor will simply "work" # with any suitable lexer regardless of how tokens have been named. # ----------------------------------------------------------------------
def lexprobe(self):
# Determine the token type for identifiers
self.lexer.input("identifier")
tok = self.lexer.token() ifnot tok or tok.value != "identifier":
print("Couldn't determine identifier type") else:
self.t_ID = tok.type
# Determine the token type for integers
self.lexer.input("12345")
tok = self.lexer.token() ifnot tok or int(tok.value) != 12345:
print("Couldn't determine integer type") else:
self.t_INTEGER = tok.type
self.t_INTEGER_TYPE = type(tok.value)
# Determine the token type for strings enclosed in double quotes
self.lexer.input("\"filename\"")
tok = self.lexer.token() ifnot tok or tok.value != "\"filename\"":
print("Couldn't determine string type") else:
self.t_STRING = tok.type
# Determine the token type for whitespace--if any
self.lexer.input(" ")
tok = self.lexer.token() ifnot tok or tok.value != " ":
self.t_SPACE = None else:
self.t_SPACE = tok.type
# Determine the token type for newlines
self.lexer.input("\n")
tok = self.lexer.token() ifnot tok or tok.value != "\n":
self.t_NEWLINE = None
print("Couldn't determine token for newlines") else:
self.t_NEWLINE = tok.type
self.t_WS = (self.t_SPACE, self.t_NEWLINE)
# Check for other characters used by the preprocessor
chars = [ '<','>','#','##','\\','(',')',',','.'] for c in chars:
self.lexer.input(c)
tok = self.lexer.token() ifnot tok or tok.value != c:
print("Unable to lex '%s' required for preprocessor" % c)
# ---------------------------------------------------------------------- # add_path() # # Adds a search path to the preprocessor. # ----------------------------------------------------------------------
def add_path(self,path):
self.path.append(path)
# ---------------------------------------------------------------------- # group_lines() # # Given an input string, this function splits it into lines. Trailing whitespace # is removed. Any line ending with \ is grouped with the next line. This # function forms the lowest level of the preprocessor---grouping into text into # a line-by-line format. # ----------------------------------------------------------------------
def group_lines(self,input):
lex = self.lexer.clone()
lines = [x.rstrip() for x in input.splitlines()] for i in xrange(len(lines)):
j = i+1 while lines[i].endswith('\\') and (j < len(lines)):
lines[i] = lines[i][:-1]+lines[j]
lines[j] = ""
j += 1
current_line = [] whileTrue:
tok = lex.token() ifnot tok: break
current_line.append(tok) if tok.type in self.t_WS and'\n'in tok.value: yield current_line
current_line = []
if current_line: yield current_line
# ---------------------------------------------------------------------- # tokenstrip() # # Remove leading/trailing whitespace tokens from a token list # ----------------------------------------------------------------------
def tokenstrip(self,tokens):
i = 0 while i < len(tokens) and tokens[i].type in self.t_WS:
i += 1 del tokens[:i]
i = len(tokens)-1 while i >= 0 and tokens[i].type in self.t_WS:
i -= 1 del tokens[i+1:] return tokens
# ---------------------------------------------------------------------- # collect_args() # # Collects comma separated arguments from a list of tokens. The arguments # must be enclosed in parenthesis. Returns a tuple (tokencount,args,positions) # where tokencount is the number of tokens consumed, args is a list of arguments, # and positions is a list of integers containing the starting index of each # argument. Each argument is represented by a list of tokens. # # When collecting arguments, leading and trailing whitespace is removed # from each argument. # # This function properly handles nested parenthesis and commas---these do not # define new arguments. # ----------------------------------------------------------------------
# Search for the opening '('.
i = 0 while (i < tokenlen) and (tokenlist[i].type in self.t_WS):
i += 1
if (i < tokenlen) and (tokenlist[i].value == '('):
positions.append(i+1) else:
self.error(self.source,tokenlist[0].lineno,"Missing '(' in macro arguments") return 0, [], []
i += 1
while i < tokenlen:
t = tokenlist[i] if t.value == '(':
current_arg.append(t)
nesting += 1 elif t.value == ')':
nesting -= 1 if nesting == 0: if current_arg:
args.append(self.tokenstrip(current_arg))
positions.append(i) return i+1,args,positions
current_arg.append(t) elif t.value == ','and nesting == 1:
args.append(self.tokenstrip(current_arg))
positions.append(i+1)
current_arg = [] else:
current_arg.append(t)
i += 1
# Missing end argument
self.error(self.source,tokenlist[-1].lineno,"Missing ')' in macro arguments") return 0, [],[]
# ---------------------------------------------------------------------- # macro_prescan() # # Examine the macro value (token sequence) and identify patch points # This is used to speed up macro expansion later on---we'll know # right away where to apply patches to the value to form the expansion # ----------------------------------------------------------------------
def macro_prescan(self,macro):
macro.patch = [] # Standard macro arguments
macro.str_patch = [] # String conversion expansion
macro.var_comma_patch = [] # Variadic macro comma patch
i = 0 while i < len(macro.value): if macro.value[i].type == self.t_ID and macro.value[i].value in macro.arglist:
argnum = macro.arglist.index(macro.value[i].value) # Conversion of argument to a string if i > 0 and macro.value[i-1].value == '#':
macro.value[i] = copy.copy(macro.value[i])
macro.value[i].type = self.t_STRING del macro.value[i-1]
macro.str_patch.append((argnum,i-1)) continue # Concatenation elif (i > 0 and macro.value[i-1].value == '##'):
macro.patch.append(('c',argnum,i-1)) del macro.value[i-1] continue elif ((i+1) < len(macro.value) and macro.value[i+1].value == '##'):
macro.patch.append(('c',argnum,i))
i += 1 continue # Standard expansion else:
macro.patch.append(('e',argnum,i)) elif macro.value[i].value == '##': if macro.variadic and (i > 0) and (macro.value[i-1].value == ',') and \
((i+1) < len(macro.value)) and (macro.value[i+1].type == self.t_ID) and \
(macro.value[i+1].value == macro.vararg):
macro.var_comma_patch.append(i-1)
i += 1
macro.patch.sort(key=lambda x: x[2],reverse=True)
# ---------------------------------------------------------------------- # macro_expand_args() # # Given a Macro and list of arguments (each a token list), this method # returns an expanded version of a macro. The return value is a token sequence # representing the replacement macro tokens # ----------------------------------------------------------------------
def macro_expand_args(self,macro,args): # Make a copy of the macro token sequence
rep = [copy.copy(_x) for _x in macro.value]
# Make string expansion patches. These do not alter the length of the replacement sequence
str_expansion = {} for argnum, i in macro.str_patch: if argnum notin str_expansion:
str_expansion[argnum] = ('"%s"' % "".join([x.value for x in args[argnum]])).replace("\\","\\\\")
rep[i] = copy.copy(rep[i])
rep[i].value = str_expansion[argnum]
# Make the variadic macro comma patch. If the variadic macro argument is empty, we get rid
comma_patch = False if macro.variadic andnot args[-1]: for i in macro.var_comma_patch:
rep[i] = None
comma_patch = True
# Make all other patches. The order of these matters. It is assumed that the patch list # has been sorted in reverse order of patch location since replacements will cause the # size of the replacement sequence to expand from the patch point.
expanded = { } for ptype, argnum, i in macro.patch: # Concatenation. Argument is left unexpanded if ptype == 'c':
rep[i:i+1] = args[argnum] # Normal expansion. Argument is macro expanded first elif ptype == 'e': if argnum notin expanded:
expanded[argnum] = self.expand_macros(args[argnum])
rep[i:i+1] = expanded[argnum]
# Get rid of removed comma if necessary if comma_patch:
rep = [_i for _i in rep if _i]
return rep
# ---------------------------------------------------------------------- # expand_macros() # # Given a list of tokens, this function performs macro expansion. # The expanded argument is a dictionary that contains macros already # expanded. This is used to prevent infinite recursion. # ----------------------------------------------------------------------
def expand_macros(self,tokens,expanded=None): if expanded isNone:
expanded = {}
i = 0 while i < len(tokens):
t = tokens[i] if t.type == self.t_ID: if t.value in self.macros and t.value notin expanded: # Yes, we found a macro match
expanded[t.value] = True
m = self.macros[t.value] ifnot m.arglist: # A simple macro
ex = self.expand_macros([copy.copy(_x) for _x in m.value],expanded) for e in ex:
e.lineno = t.lineno
tokens[i:i+1] = ex
i += len(ex) else: # A macro with arguments
j = i + 1 while j < len(tokens) and tokens[j].type in self.t_WS:
j += 1 if tokens[j].value == '(':
tokcount,args,positions = self.collect_args(tokens[j:]) ifnot m.variadic and len(args) != len(m.arglist):
self.error(self.source,t.lineno,"Macro %s requires %d arguments" % (t.value,len(m.arglist)))
i = j + tokcount elif m.variadic and len(args) < len(m.arglist)-1: if len(m.arglist) > 2:
self.error(self.source,t.lineno,"Macro %s must have at least %d arguments" % (t.value, len(m.arglist)-1)) else:
self.error(self.source,t.lineno,"Macro %s must have at least %d argument" % (t.value, len(m.arglist)-1))
i = j + tokcount else: if m.variadic: if len(args) == len(m.arglist)-1:
args.append([]) else:
args[len(m.arglist)-1] = tokens[j+positions[len(m.arglist)-1]:j+tokcount-1] del args[len(m.arglist):]
# Get macro replacement text
rep = self.macro_expand_args(m,args)
rep = self.expand_macros(rep,expanded) for r in rep:
r.lineno = t.lineno
tokens[i:j+tokcount] = rep
i += len(rep) del expanded[t.value] continue elif t.value == '__LINE__':
t.type = self.t_INTEGER
t.value = self.t_INTEGER_TYPE(t.lineno)
i += 1 return tokens
# ---------------------------------------------------------------------- # evalexpr() # # Evaluate an expression token sequence for the purposes of evaluating # integral expressions. # ----------------------------------------------------------------------
def evalexpr(self,tokens): # tokens = tokenize(line) # Search for defined macros
i = 0 while i < len(tokens): if tokens[i].type == self.t_ID and tokens[i].value == 'defined':
j = i + 1
needparen = False
result = "0L" while j < len(tokens): if tokens[j].type in self.t_WS:
j += 1 continue elif tokens[j].type == self.t_ID: if tokens[j].value in self.macros:
result = "1L" else:
result = "0L" ifnot needparen: break elif tokens[j].value == '(':
needparen = True elif tokens[j].value == ')': break else:
self.error(self.source,tokens[i].lineno,"Malformed defined()")
j += 1
tokens[i].type = self.t_INTEGER
tokens[i].value = self.t_INTEGER_TYPE(result) del tokens[i+1:j+1]
i += 1
tokens = self.expand_macros(tokens) for i,t in enumerate(tokens): if t.type == self.t_ID:
tokens[i] = copy.copy(t)
tokens[i].type = self.t_INTEGER
tokens[i].value = self.t_INTEGER_TYPE("0L") elif t.type == self.t_INTEGER:
tokens[i] = copy.copy(t) # Strip off any trailing suffixes
tokens[i].value = str(tokens[i].value) while tokens[i].value[-1] notin"0123456789abcdefABCDEF":
tokens[i].value = tokens[i].value[:-1]
expr = "".join([str(x.value) for x in tokens])
expr = expr.replace("&&"," and ")
expr = expr.replace("||"," or ")
expr = expr.replace("!"," not ") try:
result = eval(expr) except Exception:
self.error(self.source,tokens[0].lineno,"Couldn't evaluate expression")
result = 0 return result
for x in lines: for i,tok in enumerate(x): if tok.type notin self.t_WS: break if tok.value == '#': # Preprocessor directive
# insert necessary whitespace instead of eaten tokens for tok in x: if tok.type in self.t_WS and'\n'in tok.value:
chunk.append(tok)
dirtokens = self.tokenstrip(x[i+1:]) if dirtokens:
name = dirtokens[0].value
args = self.tokenstrip(dirtokens[1:]) else:
name = ""
args = []
if name == 'define': if enable: for tok in self.expand_macros(chunk): yield tok
chunk = []
self.define(args) elif name == 'include': if enable: for tok in self.expand_macros(chunk): yield tok
chunk = []
oldfile = self.macros['__FILE__'] for tok in self.include(args): yield tok
self.macros['__FILE__'] = oldfile
self.source = source elif name == 'undef': if enable: for tok in self.expand_macros(chunk): yield tok
chunk = []
self.undef(args) elif name == 'ifdef':
ifstack.append((enable,iftrigger)) if enable: ifnot args[0].value in self.macros:
enable = False
iftrigger = False else:
iftrigger = True elif name == 'ifndef':
ifstack.append((enable,iftrigger)) if enable: if args[0].value in self.macros:
enable = False
iftrigger = False else:
iftrigger = True elif name == 'if':
ifstack.append((enable,iftrigger)) if enable:
result = self.evalexpr(args) ifnot result:
enable = False
iftrigger = False else:
iftrigger = True elif name == 'elif': if ifstack: if ifstack[-1][0]: # We only pay attention if outer "if" allows this if enable: # If already true, we flip enable False
enable = False elifnot iftrigger: # If False, but not triggered yet, we'll check expression
result = self.evalexpr(args) if result:
enable = True
iftrigger = True else:
self.error(self.source,dirtokens[0].lineno,"Misplaced #elif")
elif name == 'else': if ifstack: if ifstack[-1][0]: if enable:
enable = False elifnot iftrigger:
enable = True
iftrigger = True else:
self.error(self.source,dirtokens[0].lineno,"Misplaced #else")
elif name == 'endif': if ifstack:
enable,iftrigger = ifstack.pop() else:
self.error(self.source,dirtokens[0].lineno,"Misplaced #endif") else: # Unknown preprocessor directive pass
else: # Normal text if enable:
chunk.extend(x)
for tok in self.expand_macros(chunk): yield tok
chunk = []
def include(self,tokens): # Try to extract the filename and then process an include file ifnot tokens: return if tokens: if tokens[0].value != '<'and tokens[0].type != self.t_STRING:
tokens = self.expand_macros(tokens)
if tokens[0].value == '<': # Include <...>
i = 1 while i < len(tokens): if tokens[i].value == '>': break
i += 1 else:
print("Malformed #include <...>") return
filename = "".join([x.value for x in tokens[1:i]])
path = self.path + [""] + self.temp_path elif tokens[0].type == self.t_STRING:
filename = tokens[0].value[1:-1]
path = self.temp_path + [""] + self.path else:
print("Malformed #include statement") return for p in path:
iname = os.path.join(p,filename) try:
data = open(iname,"r").read()
dname = os.path.dirname(iname) if dname:
self.temp_path.insert(0,dname) for tok in self.parsegen(data,filename): yield tok if dname: del self.temp_path[0] break except IOError: pass else:
print("Couldn't find '%s'" % filename)
# ---------------------------------------------------------------------- # define() # # Define a new macro # ----------------------------------------------------------------------
def define(self,tokens): if isinstance(tokens,STRING_TYPES):
tokens = self.tokenize(tokens)
linetok = tokens try:
name = linetok[0] if len(linetok) > 1:
mtype = linetok[1] else:
mtype = None ifnot mtype:
m = Macro(name.value,[])
self.macros[name.value] = m elif mtype.type in self.t_WS: # A normal macro
m = Macro(name.value,self.tokenstrip(linetok[2:]))
self.macros[name.value] = m elif mtype.value == '(': # A macro with arguments
tokcount, args, positions = self.collect_args(linetok[1:])
variadic = False for a in args: if variadic:
print("No more arguments may follow a variadic argument") break
astr = "".join([str(_i.value) for _i in a]) if astr == "...":
variadic = True
a[0].type = self.t_ID
a[0].value = '__VA_ARGS__'
variadic = True del a[1:] continue elif astr[-3:] == "..."and a[0].type == self.t_ID:
variadic = True del a[1:] # If, for some reason, "." is part of the identifier, strip off the name for the purposes # of macro expansion if a[0].value[-3:] == '...':
a[0].value = a[0].value[:-3] continue if len(a) > 1 or a[0].type != self.t_ID:
print("Invalid macro argument") break else:
mvalue = self.tokenstrip(linetok[1+tokcount:])
i = 0 while i < len(mvalue): if i+1 < len(mvalue): if mvalue[i].type in self.t_WS and mvalue[i+1].value == '##': del mvalue[i] continue elif mvalue[i].value == '##' and mvalue[i+1].type in self.t_WS: del mvalue[i+1]
i += 1
m = Macro(name.value,mvalue,[x[0].value for x in args],variadic)
self.macro_prescan(m)
self.macros[name.value] = m else:
print("Bad macro definition") except LookupError:
print("Bad macro definition")
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.