# ----------------------------------------------------------------------------- # ply: lex.py # # Copyright (C) 2001-2017 # David M. Beazley (Dabeaz LLC) # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # # * Redistributions of source code must retain the above copyright notice, # this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # * Neither the name of the David Beazley or Dabeaz LLC may be used to # endorse or promote products derived from this software without # specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # -----------------------------------------------------------------------------
__version__ = '3.10'
__tabversion__ = '3.10'
import re import sys import types import copy import os import inspect
# This regular expression is used to match valid token names
_is_identifier = re.compile(r'^[a-zA-Z0-9_]+$')
# Exception thrown when invalid token encountered and no default error # handler is defined. class LexError(Exception): def __init__(self, message, s):
self.args = (message,)
self.text = s
# Token class. This class is used to represent the tokens produced. class LexToken(object): def __str__(self): return'LexToken(%s,%r,%d,%d)' % (self.type, self.value, self.lineno, self.lexpos)
def __repr__(self): return str(self)
# This object is a stand-in for a logging object created by the # logging module.
class PlyLogger(object): def __init__(self, f):
self.f = f
# Null logger is used when no output is generated. Does nothing. class NullLogger(object): def __getattribute__(self, name): return self
def __call__(self, *args, **kwargs): return self
# ----------------------------------------------------------------------------- # === Lexing Engine === # # The following Lexer class implements the lexer runtime. There are only # a few public methods and attributes: # # input() - Store a new string in the lexer # token() - Get the next token # clone() - Clone the lexer # # lineno - Current line number # lexpos - Current position in the input string # -----------------------------------------------------------------------------
class Lexer: def __init__(self):
self.lexre = None# Master regular expression. This is a list of # tuples (re, findex) where re is a compiled # regular expression and findex is a list # mapping regex group numbers to rules
self.lexretext = None# Current regular expression strings
self.lexstatere = {} # Dictionary mapping lexer states to master regexs
self.lexstateretext = {} # Dictionary mapping lexer states to regex strings
self.lexstaterenames = {} # Dictionary mapping lexer states to symbol names
self.lexstate = 'INITIAL'# Current lexer state
self.lexstatestack = [] # Stack of lexer states
self.lexstateinfo = None# State information
self.lexstateignore = {} # Dictionary of ignored characters for each state
self.lexstateerrorf = {} # Dictionary of error functions for each state
self.lexstateeoff = {} # Dictionary of eof functions for each state
self.lexreflags = 0 # Optional re compile flags
self.lexdata = None# Actual input data (as a string)
self.lexpos = 0 # Current position in input text
self.lexlen = 0 # Length of the input text
self.lexerrorf = None# Error rule (if any)
self.lexeoff = None# EOF rule (if any)
self.lextokens = None# List of valid tokens
self.lexignore = ''# Ignored characters
self.lexliterals = ''# Literal characters that can be passed through
self.lexmodule = None# Module
self.lineno = 1 # Current line number
self.lexoptimize = False# Optimized mode
def clone(self, object=None):
c = copy.copy(self)
# If the object parameter has been supplied, it means we are attaching the # lexer to a new object. In this case, we have to rebind all methods in # the lexstatere and lexstateerrorf tables.
if object:
newtab = {} for key, ritem in self.lexstatere.items():
newre = [] for cre, findex in ritem:
newfindex = [] for f in findex: ifnot f ornot f[0]:
newfindex.append(f) continue
newfindex.append((getattr(object, f[0].__name__), f[1]))
newre.append((cre, newfindex))
newtab[key] = newre
c.lexstatere = newtab
c.lexstateerrorf = {} for key, ef in self.lexstateerrorf.items():
c.lexstateerrorf[key] = getattr(object, ef.__name__)
c.lexmodule = object return c
# ------------------------------------------------------------ # writetab() - Write lexer information to a table file # ------------------------------------------------------------ def writetab(self, lextab, outputdir=''): if isinstance(lextab, types.ModuleType): raise IOError("Won't overwrite existing lextab module")
basetabmodule = lextab.split('.')[-1]
filename = os.path.join(outputdir, basetabmodule) + '.py' with open(filename, 'w') as tf:
tf.write('# %s.py. This file automatically created by PLY (version %s). Don\'t edit!\n' % (basetabmodule, __version__))
tf.write('_tabversion = %s\n' % repr(__tabversion__))
tf.write('_lextokens = set(%s)\n' % repr(tuple(self.lextokens)))
tf.write('_lexreflags = %s\n' % repr(self.lexreflags))
tf.write('_lexliterals = %s\n' % repr(self.lexliterals))
tf.write('_lexstateinfo = %s\n' % repr(self.lexstateinfo))
# Rewrite the lexstatere table, replacing function objects with function names
tabre = {} for statename, lre in self.lexstatere.items():
titem = [] for (pat, func), retext, renames in zip(lre, self.lexstateretext[statename], self.lexstaterenames[statename]):
titem.append((retext, _funcs_to_names(func, renames)))
tabre[statename] = titem
taberr = {} for statename, ef in self.lexstateerrorf.items():
taberr[statename] = ef.__name__ if ef elseNone
tf.write('_lexstateerrorf = %s\n' % repr(taberr))
tabeof = {} for statename, ef in self.lexstateeoff.items():
tabeof[statename] = ef.__name__ if ef elseNone
tf.write('_lexstateeoff = %s\n' % repr(tabeof))
# ------------------------------------------------------------ # readtab() - Read lexer information from a tab file # ------------------------------------------------------------ def readtab(self, tabfile, fdict): if isinstance(tabfile, types.ModuleType):
lextab = tabfile else:
exec('import %s' % tabfile)
lextab = sys.modules[tabfile]
if getattr(lextab, '_tabversion', '0.0') != __tabversion__: raise ImportError('Inconsistent PLY version')
self.lextokens = lextab._lextokens
self.lexreflags = lextab._lexreflags
self.lexliterals = lextab._lexliterals
self.lextokens_all = self.lextokens | set(self.lexliterals)
self.lexstateinfo = lextab._lexstateinfo
self.lexstateignore = lextab._lexstateignore
self.lexstatere = {}
self.lexstateretext = {} for statename, lre in lextab._lexstatere.items():
titem = []
txtitem = [] for pat, func_name in lre:
titem.append((re.compile(pat, lextab._lexreflags), _names_to_funcs(func_name, fdict)))
self.lexstateerrorf = {} for statename, ef in lextab._lexstateerrorf.items():
self.lexstateerrorf[statename] = fdict[ef]
self.lexstateeoff = {} for statename, ef in lextab._lexstateeoff.items():
self.lexstateeoff[statename] = fdict[ef]
self.begin('INITIAL')
# ------------------------------------------------------------ # input() - Push a new string into the lexer # ------------------------------------------------------------ def input(self, s): # Pull off the first character to see if s looks like a string
c = s[:1] ifnot isinstance(c, StringTypes): raise ValueError('Expected a string')
self.lexdata = s
self.lexpos = 0
self.lexlen = len(s)
# ------------------------------------------------------------ # begin() - Changes the lexing state # ------------------------------------------------------------ def begin(self, state): if state notin self.lexstatere: raise ValueError('Undefined state')
self.lexre = self.lexstatere[state]
self.lexretext = self.lexstateretext[state]
self.lexignore = self.lexstateignore.get(state, '')
self.lexerrorf = self.lexstateerrorf.get(state, None)
self.lexeoff = self.lexstateeoff.get(state, None)
self.lexstate = state
# ------------------------------------------------------------ # push_state() - Changes the lexing state and saves old on stack # ------------------------------------------------------------ def push_state(self, state):
self.lexstatestack.append(self.lexstate)
self.begin(state)
# ------------------------------------------------------------ # pop_state() - Restores the previous state # ------------------------------------------------------------ def pop_state(self):
self.begin(self.lexstatestack.pop())
# ------------------------------------------------------------ # current_state() - Returns the current lexing state # ------------------------------------------------------------ def current_state(self): return self.lexstate
# ------------------------------------------------------------ # skip() - Skip ahead n characters # ------------------------------------------------------------ def skip(self, n):
self.lexpos += n
# ------------------------------------------------------------ # opttoken() - Return the next token from the Lexer # # Note: This function has been carefully implemented to be as fast # as possible. Don't make changes unless you really know what # you are doing # ------------------------------------------------------------ def token(self): # Make local copies of frequently referenced attributes
lexpos = self.lexpos
lexlen = self.lexlen
lexignore = self.lexignore
lexdata = self.lexdata
while lexpos < lexlen: # This code provides some short-circuit code for whitespace, tabs, and other ignored characters if lexdata[lexpos] in lexignore:
lexpos += 1 continue
# Look for a regular expression match for lexre, lexindexfunc in self.lexre:
m = lexre.match(lexdata, lexpos) ifnot m: continue
# Create a token for return
tok = LexToken()
tok.value = m.group()
tok.lineno = self.lineno
tok.lexpos = lexpos
i = m.lastindex
func, tok.type = lexindexfunc[i]
ifnot func: # If no token type was set, it's an ignored token if tok.type:
self.lexpos = m.end() return tok else:
lexpos = m.end() break
lexpos = m.end()
# If token is processed by a function, call it
tok.lexer = self # Set additional attributes useful in token rules
self.lexmatch = m
self.lexpos = lexpos
newtok = func(tok)
# Every function must return a token, if nothing, we just move to next token ifnot newtok:
lexpos = self.lexpos # This is here in case user has updated lexpos.
lexignore = self.lexignore # This is here in case there was a state change break
# Verify type of the token. If not in the token map, raise an error ifnot self.lexoptimize: if newtok.type notin self.lextokens_all: raise LexError("%s:%d: Rule '%s' returned an unknown token type '%s'" % (
func.__code__.co_filename, func.__code__.co_firstlineno,
func.__name__, newtok.type), lexdata[lexpos:])
return newtok else: # No match, see if in literals if lexdata[lexpos] in self.lexliterals:
tok = LexToken()
tok.value = lexdata[lexpos]
tok.lineno = self.lineno
tok.type = tok.value
tok.lexpos = lexpos
self.lexpos = lexpos + 1 return tok
# No match. Call t_error() if defined. if self.lexerrorf:
tok = LexToken()
tok.value = self.lexdata[lexpos:]
tok.lineno = self.lineno
tok.type = 'error'
tok.lexer = self
tok.lexpos = lexpos
self.lexpos = lexpos
newtok = self.lexerrorf(tok) if lexpos == self.lexpos: # Error method didn't change text position at all. This is an error. raise LexError("Scanning error. Illegal character '%s'" % (lexdata[lexpos]), lexdata[lexpos:])
lexpos = self.lexpos ifnot newtok: continue return newtok
self.lexpos = lexpos raise LexError("Illegal character '%s' at index %d" % (lexdata[lexpos], lexpos), lexdata[lexpos:])
def next(self):
t = self.token() if t isNone: raise StopIteration return t
__next__ = next
# ----------------------------------------------------------------------------- # ==== Lex Builder === # # The functions and classes below are used to collect lexing information # and build a Lexer object from it. # -----------------------------------------------------------------------------
# ----------------------------------------------------------------------------- # _get_regex(func) # # Returns the regular expression assigned to a function either as a doc string # or as a .regex attribute attached by the @TOKEN decorator. # ----------------------------------------------------------------------------- def _get_regex(func): return getattr(func, 'regex', func.__doc__)
# ----------------------------------------------------------------------------- # get_caller_module_dict() # # This function returns a dictionary containing all of the symbols defined within # a caller further down the call stack. This is used to get the environment # associated with the yacc() call if none was provided. # ----------------------------------------------------------------------------- def get_caller_module_dict(levels):
f = sys._getframe(levels)
ldict = f.f_globals.copy() if f.f_globals != f.f_locals:
ldict.update(f.f_locals) return ldict
# ----------------------------------------------------------------------------- # _funcs_to_names() # # Given a list of regular expression functions, this converts it to a list # suitable for output to a table file # ----------------------------------------------------------------------------- def _funcs_to_names(funclist, namelist):
result = [] for f, name in zip(funclist, namelist): if f and f[0]:
result.append((name, f[1])) else:
result.append(f) return result
# ----------------------------------------------------------------------------- # _names_to_funcs() # # Given a list of regular expression function names, this converts it back to # functions. # ----------------------------------------------------------------------------- def _names_to_funcs(namelist, fdict):
result = [] for n in namelist: if n and n[0]:
result.append((fdict[n[0]], n[1])) else:
result.append(n) return result
# ----------------------------------------------------------------------------- # _form_master_re() # # This function takes a list of all of the regex components and attempts to # form the master regular expression. Given limitations in the Python re # module, it may be necessary to break the master regex into separate expressions. # ----------------------------------------------------------------------------- def _form_master_re(relist, reflags, ldict, toknames): ifnot relist: return []
regex = '|'.join(relist) try:
lexre = re.compile(regex, reflags)
# Build the index to function map for the matching engine
lexindexfunc = [None] * (max(lexre.groupindex.values()) + 1)
lexindexnames = lexindexfunc[:]
for f, i in lexre.groupindex.items():
handle = ldict.get(f, None) if type(handle) in (types.FunctionType, types.MethodType):
lexindexfunc[i] = (handle, toknames[f])
lexindexnames[i] = f elif handle isnotNone:
lexindexnames[i] = f if f.find('ignore_') > 0:
lexindexfunc[i] = (None, None) else:
lexindexfunc[i] = (None, toknames[f])
return [(lexre, lexindexfunc)], [regex], [lexindexnames] except Exception:
m = int(len(relist)/2) if m == 0:
m = 1
llist, lre, lnames = _form_master_re(relist[:m], reflags, ldict, toknames)
rlist, rre, rnames = _form_master_re(relist[m:], reflags, ldict, toknames) return (llist+rlist), (lre+rre), (lnames+rnames)
# ----------------------------------------------------------------------------- # def _statetoken(s,names) # # Given a declaration name s of the form "t_" and a dictionary whose keys are # state names, this function returns a tuple (states,tokenname) where states # is a tuple of state names and tokenname is the name of the token. For example, # calling this with s = "t_foo_bar_SPAM" might return (('foo','bar'),'SPAM') # ----------------------------------------------------------------------------- def _statetoken(s, names):
nonstate = 1
parts = s.split('_') for i, part in enumerate(parts[1:], 1): if part notin names and part != 'ANY': break
if i > 1:
states = tuple(parts[1:i]) else:
states = ('INITIAL',)
# ----------------------------------------------------------------------------- # LexerReflect() # # This class represents information needed to build a lexer as extracted from a # user's input file. # ----------------------------------------------------------------------------- class LexerReflect(object): def __init__(self, ldict, log=None, reflags=0):
self.ldict = ldict
self.error_func = None
self.tokens = []
self.reflags = reflags
self.stateinfo = {'INITIAL': 'inclusive'}
self.modules = set()
self.error = False
self.log = PlyLogger(sys.stderr) if log isNoneelse log
# Get all of the basic information def get_all(self):
self.get_tokens()
self.get_literals()
self.get_states()
self.get_rules()
# Validate all of the information def validate_all(self):
self.validate_tokens()
self.validate_literals()
self.validate_rules() return self.error
# Get the tokens map def get_tokens(self):
tokens = self.ldict.get('tokens', None) ifnot tokens:
self.log.error('No token list is defined')
self.error = True return
ifnot isinstance(tokens, (list, tuple)):
self.log.error('tokens must be a list or tuple')
self.error = True return
ifnot tokens:
self.log.error('tokens is empty')
self.error = True return
self.tokens = tokens
# Validate the tokens def validate_tokens(self):
terminals = {} for n in self.tokens: ifnot _is_identifier.match(n):
self.log.error("Bad token name '%s'", n)
self.error = True if n in terminals:
self.log.warning("Token '%s' multiply defined", n)
terminals[n] = 1
# Get the literals specifier def get_literals(self):
self.literals = self.ldict.get('literals', '') ifnot self.literals:
self.literals = ''
# Validate literals def validate_literals(self): try: for c in self.literals: ifnot isinstance(c, StringTypes) or len(c) > 1:
self.log.error('Invalid literal %s. Must be a single character', repr(c))
self.error = True
except TypeError:
self.log.error('Invalid literals specification. literals must be a sequence of characters')
self.error = True
def get_states(self):
self.states = self.ldict.get('states', None) # Build statemap if self.states: ifnot isinstance(self.states, (tuple, list)):
self.log.error('states must be defined as a tuple or list')
self.error = True else: for s in self.states: ifnot isinstance(s, tuple) or len(s) != 2:
self.log.error("Invalid state specifier %s. Must be a tuple (statename,'exclusive|inclusive')", repr(s))
self.error = True continue
name, statetype = s ifnot isinstance(name, StringTypes):
self.log.error('State name %s must be a string', repr(name))
self.error = True continue ifnot (statetype == 'inclusive'or statetype == 'exclusive'):
self.log.error("State type for state %s must be 'inclusive' or 'exclusive'", name)
self.error = True continue if name in self.stateinfo:
self.log.error("State '%s' already defined", name)
self.error = True continue
self.stateinfo[name] = statetype
# Get all of the symbols with a t_ prefix and sort them into various # categories (functions, strings, error functions, and ignore characters)
def get_rules(self):
tsymbols = [f for f in self.ldict if f[:2] == 't_']
# Now build up a list of functions and a list of strings
self.toknames = {} # Mapping of symbols to token names
self.funcsym = {} # Symbols defined as functions
self.strsym = {} # Symbols defined as strings
self.ignore = {} # Ignore strings by state
self.errorf = {} # Error functions by state
self.eoff = {} # EOF functions by state
for s in self.stateinfo:
self.funcsym[s] = []
self.strsym[s] = []
if len(tsymbols) == 0:
self.log.error('No rules of the form t_rulename are defined')
self.error = True return
for f in tsymbols:
t = self.ldict[f]
states, tokname = _statetoken(f, self.stateinfo)
self.toknames[f] = tokname
if hasattr(t, '__call__'): if tokname == 'error': for s in states:
self.errorf[s] = t elif tokname == 'eof': for s in states:
self.eoff[s] = t elif tokname == 'ignore':
line = t.__code__.co_firstlineno
file = t.__code__.co_filename
self.log.error("%s:%d: Rule '%s' must be defined as a string", file, line, t.__name__)
self.error = True else: for s in states:
self.funcsym[s].append((f, t)) elif isinstance(t, StringTypes): if tokname == 'ignore': for s in states:
self.ignore[s] = t if'\\'in t:
self.log.warning("%s contains a literal backslash '\\'", f)
elif tokname == 'error':
self.log.error("Rule '%s' must be defined as a function", f)
self.error = True else: for s in states:
self.strsym[s].append((f, t)) else:
self.log.error('%s not defined as a function or string', f)
self.error = True
# Sort the functions by line number for f in self.funcsym.values():
f.sort(key=lambda x: x[1].__code__.co_firstlineno)
# Sort the strings by regular expression length for s in self.strsym.values():
s.sort(key=lambda x: len(x[1]), reverse=True)
# Validate all of the t_rules collected def validate_rules(self): for state in self.stateinfo: # Validate all rules defined by functions
for fname, f in self.funcsym[state]:
line = f.__code__.co_firstlineno
file = f.__code__.co_filename
module = inspect.getmodule(f)
self.modules.add(module)
tokname = self.toknames[fname] if isinstance(f, types.MethodType):
reqargs = 2 else:
reqargs = 1
nargs = f.__code__.co_argcount if nargs > reqargs:
self.log.error("%s:%d: Rule '%s' has too many arguments", file, line, f.__name__)
self.error = True continue
if nargs < reqargs:
self.log.error("%s:%d: Rule '%s' requires an argument", file, line, f.__name__)
self.error = True continue
ifnot _get_regex(f):
self.log.error("%s:%d: No regular expression defined for rule '%s'", file, line, f.__name__)
self.error = True continue
try:
c = re.compile('(?P<%s>%s)' % (fname, _get_regex(f)), self.reflags) if c.match(''):
self.log.error("%s:%d: Regular expression for rule '%s' matches empty string", file, line, f.__name__)
self.error = True except re.error as e:
self.log.error("%s:%d: Invalid regular expression for rule '%s'. %s", file, line, f.__name__, e) if'#' in _get_regex(f):
self.log.error("%s:%d. Make sure '#' in rule '%s' is escaped with '\\#'", file, line, f.__name__)
self.error = True
# Validate all rules defined by strings for name, r in self.strsym[state]:
tokname = self.toknames[name] if tokname == 'error':
self.log.error("Rule '%s' must be defined as a function", name)
self.error = True continue
if tokname notin self.tokens and tokname.find('ignore_') < 0:
self.log.error("Rule '%s' defined for an unspecified token %s", name, tokname)
self.error = True continue
try:
c = re.compile('(?P<%s>%s)' % (name, r), self.reflags) if (c.match('')):
self.log.error("Regular expression for rule '%s' matches empty string", name)
self.error = True except re.error as e:
self.log.error("Invalid regular expression for rule '%s'. %s", name, e) if'#' in r:
self.log.error("Make sure '#' in rule '%s' is escaped with '\\#'", name)
self.error = True
ifnot self.funcsym[state] andnot self.strsym[state]:
self.log.error("No rules defined for state '%s'", state)
self.error = True
# Validate the error function
efunc = self.errorf.get(state, None) if efunc:
f = efunc
line = f.__code__.co_firstlineno
file = f.__code__.co_filename
module = inspect.getmodule(f)
self.modules.add(module)
if isinstance(f, types.MethodType):
reqargs = 2 else:
reqargs = 1
nargs = f.__code__.co_argcount if nargs > reqargs:
self.log.error("%s:%d: Rule '%s' has too many arguments", file, line, f.__name__)
self.error = True
if nargs < reqargs:
self.log.error("%s:%d: Rule '%s' requires an argument", file, line, f.__name__)
self.error = True
for module in self.modules:
self.validate_module(module)
# ----------------------------------------------------------------------------- # validate_module() # # This checks to see if there are duplicated t_rulename() functions or strings # in the parser input file. This is done using a simple regular expression # match on each line in the source code of the given module. # -----------------------------------------------------------------------------
if errorlog isNone:
errorlog = PlyLogger(sys.stderr)
if debug: if debuglog isNone:
debuglog = PlyLogger(sys.stderr)
# Get the module dictionary used for the lexer if object:
module = object
# Get the module dictionary used for the parser if module:
_items = [(k, getattr(module, k)) for k in dir(module)]
ldict = dict(_items) # If no __file__ attribute is available, try to obtain it from the __module__ instead if'__file__'notin ldict:
ldict['__file__'] = sys.modules[ldict['__module__']].__file__ else:
ldict = get_caller_module_dict(2)
# Determine if the module is package of a package or not. # If so, fix the tabmodule setting so that tables load correctly
pkg = ldict.get('__package__') if pkg and isinstance(lextab, str): if'.'notin lextab:
lextab = pkg + '.' + lextab
# Collect parser information from the dictionary
linfo = LexerReflect(ldict, log=errorlog, reflags=reflags)
linfo.get_all() ifnot optimize: if linfo.validate_all(): raise SyntaxError("Can't build lexer")
if optimize and lextab: try:
lexobj.readtab(lextab, ldict)
token = lexobj.token
input = lexobj.input
lexer = lexobj return lexobj
except ImportError: pass
# Dump some basic debugging information if debug:
debuglog.info('lex: tokens = %r', linfo.tokens)
debuglog.info('lex: literals = %r', linfo.literals)
debuglog.info('lex: states = %r', linfo.stateinfo)
# Build a dictionary of valid token names
lexobj.lextokens = set() for n in linfo.tokens:
lexobj.lextokens.add(n)
# Get literals specification if isinstance(linfo.literals, (list, tuple)):
lexobj.lexliterals = type(linfo.literals[0])().join(linfo.literals) else:
lexobj.lexliterals = linfo.literals
# Get the stateinfo dictionary
stateinfo = linfo.stateinfo
regexs = {} # Build the master regular expressions for state in stateinfo:
regex_list = []
# Add rules defined by functions first for fname, f in linfo.funcsym[state]:
line = f.__code__.co_firstlineno
file = f.__code__.co_filename
regex_list.append('(?P<%s>%s)' % (fname, _get_regex(f))) if debug:
debuglog.info("lex: Adding rule %s -> '%s' (state '%s')", fname, _get_regex(f), state)
# Now add all of the simple rules for name, r in linfo.strsym[state]:
regex_list.append('(?P<%s>%s)' % (name, r)) if debug:
debuglog.info("lex: Adding rule %s -> '%s' (state '%s')", name, r, state)
regexs[state] = regex_list
# Build the master regular expressions
if debug:
debuglog.info('lex: ==== MASTER REGEXS FOLLOW ====')
for state in regexs:
lexre, re_text, re_names = _form_master_re(regexs[state], reflags, ldict, linfo.toknames)
lexobj.lexstatere[state] = lexre
lexobj.lexstateretext[state] = re_text
lexobj.lexstaterenames[state] = re_names if debug: for i, text in enumerate(re_text):
debuglog.info("lex: state '%s' : regex[%d] = '%s'", state, i, text)
# For inclusive states, we need to add the regular expressions from the INITIAL state for state, stype in stateinfo.items(): if state != 'INITIAL'and stype == 'inclusive':
lexobj.lexstatere[state].extend(lexobj.lexstatere['INITIAL'])
lexobj.lexstateretext[state].extend(lexobj.lexstateretext['INITIAL'])
lexobj.lexstaterenames[state].extend(lexobj.lexstaterenames['INITIAL'])
# Set up ignore variables
lexobj.lexstateignore = linfo.ignore
lexobj.lexignore = lexobj.lexstateignore.get('INITIAL', '')
# Set up error functions
lexobj.lexstateerrorf = linfo.errorf
lexobj.lexerrorf = linfo.errorf.get('INITIAL', None) ifnot lexobj.lexerrorf:
errorlog.warning('No t_error rule is defined')
# Set up eof functions
lexobj.lexstateeoff = linfo.eoff
lexobj.lexeoff = linfo.eoff.get('INITIAL', None)
# Check state information for ignore and error rules for s, stype in stateinfo.items(): if stype == 'exclusive': if s notin linfo.errorf:
errorlog.warning("No error rule is defined for exclusive state '%s'", s) if s notin linfo.ignore and lexobj.lexignore:
errorlog.warning("No ignore rule is defined for exclusive state '%s'", s) elif stype == 'inclusive': if s notin linfo.errorf:
linfo.errorf[s] = linfo.errorf.get('INITIAL', None) if s notin linfo.ignore:
linfo.ignore[s] = linfo.ignore.get('INITIAL', '')
# Create global versions of the token() and input() functions
token = lexobj.token
input = lexobj.input
lexer = lexobj
# If in optimize mode, we write the lextab if lextab and optimize: if outputdir isNone: # If no output directory is set, the location of the output files # is determined according to the following rules: # - If lextab specifies a package, files go into that package directory # - Otherwise, files go in the same directory as the specifying module if isinstance(lextab, types.ModuleType):
srcfile = lextab.__file__ else: if'.'notin lextab:
srcfile = ldict['__file__'] else:
parts = lextab.split('.')
pkgname = '.'.join(parts[:-1])
exec('import %s' % pkgname)
srcfile = getattr(sys.modules[pkgname], '__file__', '')
outputdir = os.path.dirname(srcfile) try:
lexobj.writetab(lextab, outputdir) except IOError as e:
errorlog.warning("Couldn't write lextab module %r. %s" % (lextab, e))
return lexobj
# ----------------------------------------------------------------------------- # runmain() # # This runs the lexer as a main program # -----------------------------------------------------------------------------
def runmain(lexer=None, data=None): ifnot data: try:
filename = sys.argv[1]
f = open(filename)
data = f.read()
f.close() except IndexError:
sys.stdout.write('Reading from standard input (type EOF to end):\n')
data = sys.stdin.read()
if lexer:
_input = lexer.input else:
_input = input
_input(data) if lexer:
_token = lexer.token else:
_token = token
# ----------------------------------------------------------------------------- # @TOKEN(regex) # # This decorator function can be used to set the regex expression on a function # when its docstring might need to be set in an alternative way # -----------------------------------------------------------------------------
def TOKEN(r): def set_regex(f): if hasattr(r, '__call__'):
f.regex = _get_regex(r) else:
f.regex = r return f return set_regex
# Alternative spelling of the TOKEN decorator
Token = TOKEN
Messung V0.5
¤ Dauer der Verarbeitung: 0.4 Sekunden
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.