""" Data structures for representing grammars. """
from __future__ import annotations # mypy: disallow-untyped-defs, disallow-incomplete-defs, disallow-untyped-calls
import copy import typing import dataclasses from dataclasses import dataclass from .ordered import OrderedSet, OrderedFrozenSet from . import types
# *** What is a grammar? ****************************************************** # # A grammar is a dictionary mapping nonterminal names to lists of right-hand # sides. Each right-hand side (also called a "production") is a list whose # elements can include terminals, nonterminals, Optional elements, LookaheadRules, # and NoLineTerminatorHere. # # The most common elements are terminals and nonterminals, so a grammar usually # looks something like this: def example_grammar() -> Grammar:
rules: typing.Dict[typing.Union[str, InitNt, Nt], LenientNtDef] = { 'expr': [
['term'],
['expr', '+', 'term'],
['expr', '-', 'term'],
], 'term': [
['unary'],
['term', '*', 'unary'],
['term', '/', 'unary'],
], 'unary': [
['prim'],
['-', 'unary'],
], 'prim': [
['NUM'],
['VAR'],
['(', 'expr', ')'],
],
}
# The goal nonterminals are the nonterminals we're actually interested in # parsing. Here we want to parse expressions; all the other nonterminals # are interesting only as the building blocks of expressions. # # Variable terminals are terminal symbols that can have several different # values, like a VAR token that could be any identifier, or a NUM token # that could be any number. return Grammar(rules, goal_nts=['expr'], variable_terminals=['NUM', 'VAR'])
Condition = typing.Tuple[str, bool]
# A production consists of a left side, an optional condition, a right side, # and a reducer. A `Production` object includes everything except the left # side. Incorporating reducers lets us transform a grammar while preserving # behavior. # # The production `expr ::= term` is represented by # `Production(["term"], 0)`. # # The production `expr ::= expr + term => add($0, $2)` is represented by # `Production(["expr", "+", "term"], CallMethod("add", (0, 2))`. #
@dataclass class Production:
__slots__ = ['body', 'reducer', 'condition']
body: typing.List[Element]
reducer: ReduceExprOrAccept
condition: typing.Optional[Condition]
# *** Reduce expressions ****************************************************** # # Reduce expressions ("reducers" for short) are a little language used to # specify what happens when a production is matched. A reduce expression is # one of these: # # * An integer evaluates to one of the syntactic components of the # production. For example, if the production we're reducing is # `sum ::= sum + term`, the integer `0` evaluates the `sum` to the left of # the plus sign, and `2` means the `term` on the right. (The integer # `1` would refer to the `+` token itself, but that's not super useful.) # # These integers are not quite indexes into `production.body`, because # LookaheadRule, ErrorSymbol, and NoLineTerminatorHere elements don't # count: in the production `stmt ::= [lookahead != "let"] expr ";"`, `0` is # the expr, and `1` is the semicolon token. See `is_concrete_element(e)`. # # * CallMethod objects pass values to a builder method and return the result. # The `args` are nested reduce expressions. # # * None is an expression used as a placeholder when an optional symbol is # omitted. # # * Some(expr) is used when an optional symbol is found and parsed. # In Python, this just expands to the same thing as `expr`, but in Rust # this expands to a use of `Option::Some()`. # # In addition, the special reducer 'accept' means stop parsing. This is # used only in productions for init nonterminals, created automatically by # Grammar.__init__(). It's not a reduce expression, so it can't be nested.
@dataclass(frozen=True) class CallMethod: """Express a method call, and give it a given set of arguments. A trait is
added as the parser should implement this trait to call this method."""
* self.variable_terminals - OrderedFrozenSet[str] - Terminals that carry
data, like (in JS) numeric literals and RegExps.
* self.terminals - OrderedFrozenSet[str] - All terminals used in the
language, including those in self.variable_terminals and
self.synthetic_terminals.
* self.synthetic_terminals - {str: OrderedFrozenSet[str]} - Maps terminals
to sets of terminals. An entry `name: set` in this dictionary means
that `name` is shorthand for"one of the terminals in `set`".
* self.nonterminals - {LenientNt: NtDef} - Keys are either (str|InitNt), early in the pipeline, or Nt objects later on. Values are the NtDef objects
that contain the actual Productions.
* self.methods - {str: MethodType} - Type information for methods.
* self.init_nts - [InitNt or Nt] - The list of all elements of
self.nonterminals.keys() that are init nonterminals.
# This constructor supports passing in a sort of jumbled blob of # strings, lists, and actual objects, and normalizes it all to a more # typeful structure. Being able to interpret simple # "list-of-lists-of-strings" input is super useful for tests. # # We don't check here that the grammar is LR, that it's cycle-free, or # any other nice properties.
# Copy/infer the arguments.
my_nonterminals: typing.Dict[LenientNt, LenientNtDef] = dict(nonterminals.items()) if goal_nts isNone: # Default to the first nonterminal in the dictionary.
my_goal_nts = [] for name in my_nonterminals:
my_goal_nts.append(name) break else:
my_goal_nts = list(goal_nts)
self.variable_terminals = OrderedFrozenSet(variable_terminals) if synthetic_terminals isNone:
synthetic_terminals = {} else:
synthetic_terminals = {
name: OrderedFrozenSet(set) for name, set in synthetic_terminals.items()
} for synthetic_key, values in synthetic_terminals.items(): if synthetic_key in my_nonterminals: raise ValueError( "invalid grammar: {!r} is both a terminal and a nonterminal"
.format(synthetic_key)) for t in values: if t in my_nonterminals: raise ValueError( "invalid grammar: {!r} is both ".format(t)
+ "a representation of a synthetic terminal and a nonterminal") if t in synthetic_terminals: # Nested synthetic terminals. Throw, for now. (This # should pretty much just work, except expand_terminal # doesn't support it; and we don't check for cycles # where a synthetic terminal includes itself directly # or indirectly.) raise ValueError( "unsupported: synthetic terminals can't include other " "synthetic terminals; {!r} includes {!r}"
.format(synthetic_key, t)) # self.synthetic_terminals = synthetic_terminals
self.synthetic_terminals = {}
keys_are_nt = isinstance(next(iter(my_nonterminals)), Nt)
key_type: typing.Union[typing.Type, typing.Tuple[typing.Type, ...]]
key_type = Nt if keys_are_nt else (str, InitNt)
self._cache = {}
# Gather some information just by looking at keys (without examining # every production). # # str_to_nt maps the name of each non-parameterized # nonterminal to `Nt(name)`, a cache.
str_to_nt: typing.Dict[typing.Union[str, InitNt], Nt] = {} # nt_params lists the names of each nonterminal's parameters (empty # tuple for non-parameterized nts).
nt_params: typing.Dict[typing.Union[str, InitNt], typing.Tuple[str, ...]] = {} for key in my_nonterminals: ifnot isinstance(key, key_type): raise ValueError( "invalid grammar: conflicting key types in nonterminals dict - " "expected either all str or all Nt, got {!r}"
.format(key.__class__.__name__))
nt_name: typing.Union[str, InitNt]
param_names: typing.Tuple[str, ...] if keys_are_nt: assert isinstance(key, Nt)
nt_name = key.name
param_names = tuple(name for name, value in key.args) else: assert isinstance(key, (str, InitNt))
nt_name = key
param_names = ()
my_nt = my_nonterminals[key] if isinstance(my_nt, NtDef):
param_names = tuple(my_nt.params) if nt_name notin nt_params:
nt_params[nt_name] = param_names else: if nt_params[nt_name] != param_names: raise ValueError( "conflicting parameter name lists for nt {!r}: " "both {!r} and {!r}"
.format(nt_name, nt_params[nt_name], param_names)) if param_names == () and nt_name notin str_to_nt:
str_to_nt[nt_name] = self.intern(Nt(nt_name))
# Validate, desugar, and copy the grammar. As a side effect, calling # validate_element on every element of the grammar populates # all_terminals.
all_terminals: OrderedSet[typing.Union[str, End]] = OrderedSet(self.variable_terminals)
all_terminals.add(End())
def note_terminal(t: str) -> None: """Add t (and all representations of it, if synthetic) to all_terminals.""" if t notin all_terminals:
all_terminals.add(t) if t in self.synthetic_terminals: for k in self.synthetic_terminals[t]:
note_terminal(k)
# Note: i and j are normally list indexes, but they are sometimes the # special string '?'. It's OK because they are used only in error # messages. def validate_element(
nt: LenientNt,
i: typing.Union[int, str],
j: typing.Union[int, str],
e: Element,
context_params: typing.Tuple[str, ...]
) -> Element: if isinstance(e, str): if e in nt_params: if nt_params[e] != (): raise ValueError( "invalid grammar: missing parameters for {!r} " "in production `grammar[{!r}][{}][{}].inner`: {!r}"
.format(e, nt, i, j, nt_params[e])) return str_to_nt[e] else:
note_terminal(e) return e elif isinstance(e, Optional): ifnot isinstance(e.inner, (str, Nt)): raise TypeError( "invalid grammar: unrecognized element " "in production `grammar[{!r}][{}][{}].inner`: {!r}"
.format(nt, i, j, e.inner))
inner = validate_element(nt, i, j, e.inner, context_params) return self.intern(Optional(inner)) elif isinstance(e, Literal): ifnot isinstance(e.text, str): raise TypeError( "invalid grammar: unrecognized element " "in production `grammar[{!r}][{}][{}].text`: {!r}"
.format(nt, i, j, e.text)) return self.intern(e) elif isinstance(e, UnicodeCategory): ifnot isinstance(e.cat_prefix, str): raise TypeError( "invalid grammar: unrecognized element " "in production `grammar[{!r}][{}][{}].cat_prefix`: {!r}"
.format(nt, i, j, e.cat_prefix)) return self.intern(e) elif isinstance(e, Exclude): ifnot isinstance(e.inner, (str, Nt)): raise TypeError( "invalid grammar: unrecognized element " "in production `grammar[{!r}][{}][{}].inner`: {!r}"
.format(nt, i, j, e.inner))
exclusion_list = [] for value in e.exclusion_list: ifnot isinstance(value, (str, Nt)): raise TypeError( "invalid grammar: unrecognized element " "in production `grammar[{!r}][{}][{}].exclusion_list`: {!r}"
.format(nt, i, j, value))
value = validate_element(nt, i, j, value, context_params)
exclusion_list.append(value)
inner = validate_element(nt, i, j, e.inner, context_params) return self.intern(Exclude(inner, tuple(exclusion_list))) elif isinstance(e, Nt): # Either the application or the original parameterized # production must be present in the dictionary. if e notin my_nonterminals and e.name notin my_nonterminals: raise ValueError( "invalid grammar: unrecognized nonterminal " "in production `grammar[{!r}][{}][{}]`: {!r}"
.format(nt, i, j, e.name))
args = tuple(pair[0] for pair in e.args) if e.name in nt_params and args != nt_params[e.name]: raise ValueError( "invalid grammar: wrong arguments passed to {!r} " "in production `grammar[{!r}][{}][{}]`: " "passed {!r}, expected {!r}"
.format(e.name, nt, i, j,
args, nt_params[e.name])) for param_name, arg_expr in e.args: if isinstance(arg_expr, Var): if arg_expr.name notin context_params: raise ValueError( "invalid grammar: undefined variable {!r} " "in production `grammar[{!r}][{}][{}]`"
.format(arg_expr.name, nt, i, j)) return self.intern(e) elif isinstance(e, (LookaheadRule, End, ErrorSymbol)): return self.intern(e) elif e is NoLineTerminatorHere: return e elif isinstance(e, CallMethod): return self.intern(e) else: raise TypeError( "invalid grammar: unrecognized element in production " "`grammar[{!r}][{}][{}]`: {!r}"
.format(nt, i, j, e)) assertFalse, "unreachable"
def check_reduce_expr(
nt: LenientNt,
i: int,
rhs: Production,
expr: ReduceExprOrAccept) -> None: if isinstance(expr, int):
concrete_len = sum(1 for e in rhs.body if is_concrete_element(e)) ifnot (0 <= expr < concrete_len): raise ValueError( "invalid grammar: element number {} out of range for " "production {!r} in grammar[{!r}][{}].reducer ({!r})"
.format(expr, nt, rhs.body, i, rhs.reducer)) elif isinstance(expr, CallMethod): ifnot isinstance(expr.method, str): raise TypeError( "invalid grammar: method names must be strings, " "not {!r}, in grammar[{!r}[{}].reducer"
.format(expr.method, nt, i)) ifnot expr.method.isidentifier():
name, space, pn = expr.method.partition(' ') if space == ' 'and name.isidentifier() and pn.isdigit(): pass else: raise ValueError( "invalid grammar: invalid method name {!r} " "(not an identifier), in grammar[{!r}[{}].reducer"
.format(expr.method, nt, i)) for arg_expr in expr.args:
check_reduce_expr(nt, i, rhs, arg_expr) elif expr isNone: pass elif isinstance(expr, Some):
check_reduce_expr(nt, i, rhs, expr.inner) else: raise TypeError( "invalid grammar: unrecognized reduce expression {!r} " "in grammar[{!r}][{}].reducer"
.format(expr, nt, i))
def copy_rhs(
nt: LenientNt,
i: int,
sole_production: bool,
rhs: LenientProduction,
context_params: typing.Tuple[str, ...]) -> Production: if isinstance(rhs, list): # Bare list, no reducer. Desugar to a Production, inferring a # reasonable default reducer.
nargs = sum(1 for e in rhs if is_concrete_element(e))
reducer: ReduceExpr if len(rhs) == 1 and nargs == 1:
reducer = 0 # don't call a method, just propagate the value else: # Call a method named after the production. If the # nonterminal has exactly one production, there's no need # to include the production index `i` to the method name. if sole_production:
method = str(nt) else:
method = '{}_{}'.format(nt, i)
reducer = CallMethod(method, tuple(range(nargs)))
rhs = Production(rhs, reducer)
ifnot isinstance(rhs, Production): raise TypeError( "invalid grammar: grammar[{!r}][{}] should be " "a Production or list of grammar symbols, not {!r}"
.format(nt, i, rhs))
if rhs.condition isnotNone:
param, value = rhs.condition if param notin context_params: raise TypeError( "invalid grammar: undefined parameter {!r} " "in conditional for grammar[{!r}][{}]"
.format(param, nt, i)) if rhs.reducer != 'accept':
check_reduce_expr(nt, i, rhs, rhs.reducer) assert isinstance(rhs.body, list) return rhs.copy_with(body=[
validate_element(nt, i, j, e, context_params) for j, e in enumerate(rhs.body)
])
def copy_nt_def(
nt: LenientNt,
nt_def: typing.Union[NtDef, typing.List[LenientProduction]],
) -> NtDef:
rhs_list: typing.Sequence[LenientProduction] if isinstance(nt_def, NtDef): for i, param in enumerate(nt_def.params): ifnot isinstance(param, str): raise TypeError( "invalid grammar: parameter {} of {} should be " "a string, not {!r}"
.format(i + 1, nt, param))
params = nt_def.params
rhs_list = nt_def.rhs_list
ty = nt_def.type else:
params = ()
rhs_list = nt_def
ty = None
ifnot isinstance(rhs_list, list): raise TypeError( "invalid grammar: grammar[{!r}] should be either a " "list of right-hand sides or NtDef, not {!r}"
.format(nt, type(rhs_list).__name__))
sole_production = len(rhs_list) == 1
productions = [copy_rhs(nt, i, sole_production, rhs, params) for i, rhs in enumerate(rhs_list)] return NtDef(params, productions, ty)
def check_nt_key(nt: LenientNt) -> None: if isinstance(nt, str): ifnot nt.isidentifier(): raise ValueError( "invalid grammar: nonterminal names must be identifiers, not {!r}"
.format(nt)) if nt in self.variable_terminals or nt in self.synthetic_terminals: raise TypeError( "invalid grammar: {!r} is both a nonterminal and a variable terminal"
.format(nt)) elif isinstance(nt, Nt): assert keys_are_nt # checked earlier ifnot (isinstance(nt.name, (str, InitNt)) and isinstance(nt.args, tuple)): raise TypeError( "invalid grammar: expected str or Nt(name=str, " "args=tuple) keys in nonterminals dict, got {!r}"
.format(nt))
check_nt_key(nt.name) for pair in nt.args: if (not isinstance(pair, tuple) or len(pair) != 2 ornot isinstance(pair[0], str) ornot isinstance(pair[1], bool)): raise TypeError( "invalid grammar: expected tuple((str, bool)) args, got {!r}"
.format(nt)) elif isinstance(nt, InitNt): # Users don't include init nonterminals when initially creating # a Grammar. They are automatically added below. But if this # Grammar is being created by hacking on a previous Grammar, it # will already have them. ifnot isinstance(nt.goal, Nt): raise TypeError( "invalid grammar: InitNt.goal should be a nonterminal, " "got {!r}"
.format(nt)) # nt.goal is a "use", not a "def". Check it like a use. # Bogus question marks appear in error messages :-|
validate_element(nt, '?', '?', nt.goal, ()) if nt.goal notin my_goal_nts: raise TypeError( "invalid grammar: nonterminal referenced by InitNt " "is not in the list of goals: {!r}"
.format(nt)) else: raise TypeError( "invalid grammar: expected string keys in nonterminals dict, got {!r}"
.format(nt))
def validate_nt(
nt: LenientNt,
nt_def: LenientNtDef
) -> typing.Tuple[LenientNt, NtDef]:
check_nt_key(nt) if isinstance(nt, InitNt): # Check the form of init productions. Initially these look like # [[goal]], but after the pipeline goes to work, they can be # [[Optional(goal)]] or [[], [goal]]. ifnot isinstance(nt_def, NtDef): raise TypeError( "invalid grammar: key {!r} must map to " "value of type NtDef, not {!r}"
.format(nt, nt_def))
rhs_list = nt_def.rhs_list
g = nt.goal if (rhs_list != [Production([g], 0),
Production([Nt(nt, ()), End()], 'accept')] and rhs_list != [Production([Optional(g)], 0),
Production([Nt(nt, ()), End()], 'accept')] and rhs_list != [Production([End()], 'accept'),
Production([g, End()], 'accept'),
Production([Nt(nt, ()), End()], 'accept')]): raise ValueError( "invalid grammar: grammar[{!r}] is not one of " "the expected forms: got {!r}"
.format(nt, rhs_list))
return nt, copy_nt_def(nt, nt_def)
self.nonterminals = {} for nt1, nt_def1 in my_nonterminals.items():
nt, nt_def = validate_nt(nt1, nt_def1)
self.nonterminals[nt] = nt_def for syn_term_name, t_set in synthetic_terminals.items():
nt_def = NtDef((syn_term_name,), [Production([e], 0) for e in t_set], None)
nt, nt_def = validate_nt(syn_term_name, nt_def)
self.nonterminals[nt] = nt_def # Remove synthetic terminals from the list of terminals.
all_terminals.remove(syn_term_name)
self.terminals = OrderedFrozenSet(all_terminals)
# Check types of reduce expressions and infer method types. But if the # caller passed in precalculated type info, skip it -- otherwise we # would redo type checking many times as we make minor changes to the # Grammar along the pipeline. if method_types isNone:
types.infer_types(self) else: for nt, nt_def in self.nonterminals.items(): assert isinstance(nt_def, NtDef) assert isinstance(nt_def.type, types.Type)
self.methods = method_types
# Synthesize "init" nonterminals.
self.init_nts = [] for goal in my_goal_nts: # Convert str goals to Nt objects and validate. if isinstance(goal, str):
ok = goal in str_to_nt if ok:
goal = str_to_nt[goal] elif isinstance(goal, Nt): if keys_are_nt:
ok = goal in my_nonterminals else:
ok = goal.name in my_nonterminals ifnot ok: raise ValueError( "goal nonterminal {!r} is undefined".format(goal)) assert isinstance(goal, Nt)
# Weird, but the key of an init nonterminal really is # `Nt(InitNt(Nt(goal_name, goal_args)), ())`. It takes no arguments, # but it refers to a goal that might take arguments.
init_nt = InitNt(goal)
init_key: LenientNt = init_nt
goal_nt = Nt(init_nt, ()) if keys_are_nt:
init_key = goal_nt if init_key notin self.nonterminals:
self.nonterminals[init_key] = NtDef(
(),
[Production([goal], 0),
Production([goal_nt, End()], 'accept')],
types.NoReturnType)
self.init_nts.append(goal_nt)
# Add the various execution backends which would rely on the same parse table.
self.exec_modes = exec_modes
self.type_to_modes = type_to_modes
# The argument is a list of extension.GrammarExtensions. The annotation is # vague because this module does not import the extension module. It would # be a cyclic dependency. def patch(self, extensions: typing.List) -> None: assert self.type_to_modes isnotNone assert self.exec_modes isnotNone if extensions == []: return # Copy of nonterminals which would be mutated by the patches.
nonterminals = copy.copy(self.nonterminals) for ext in extensions: # Add the given trait to the execution mode, depending on which # type it got implemented for. for mode in self.type_to_modes[ext.target.for_type]:
self.exec_modes[mode].add(ext.target.trait) # Apply grammar transformations.
ext.apply_patch(self, nonterminals) # Replace with the modified version of nonterminals
self.nonterminals = nonterminals
def intern(self, obj: Internable) -> Internable: """Return a shared copy of the immutable object `obj`.
This saves memory and consistent use allows code to use `is` for
equality testing. """ try: return self._cache[obj] except KeyError:
self._cache[obj] = obj return obj
# Terminals are tokens that must appear verbatim in the input wherever they # appear in the grammar, like the operators '+' '-' *' '/' and brackets '(' ')' # in the example grammar. def is_terminal(self, element: object) -> bool: return type(element) is str
def expand_set_of_terminals(
self,
terminals: typing.Iterable[typing.Union[str, None, ErrorSymbol]]
) -> OrderedSet[typing.Union[str, None, ErrorSymbol]]: """Copy a set of terminals, replacing any synthetic terminals with their representations.
Returns a new OrderedSet.
terminals - an iterable of terminals and/or other unique elements like Noneor ErrorSymbol. """
result: OrderedSet[typing.Union[str, None, ErrorSymbol]] = OrderedSet() for t in terminals: if isinstance(t, str) and t in self.synthetic_terminals:
result |= self.expand_set_of_terminals(self.synthetic_terminals[t]) else:
result.add(t) return result
def goals(self) -> typing.List[Nt]: """Return a list of this grammar's goal nonterminals.""" return [init_nt.name.goal for init_nt in self.init_nts] # type: ignore
def with_nonterminals(
self,
nonterminals: typing.Mapping[LenientNt, LenientNtDef]
) -> Grammar: """Return a copy of self with the same attributes except different nonterminals.""" if self.methods isnotNone: for nt_def in nonterminals.values(): assert isinstance(nt_def, NtDef) assert nt_def.type isnotNone return Grammar(
nonterminals,
goal_nts=self.goals(),
variable_terminals=self.variable_terminals,
synthetic_terminals=self.synthetic_terminals,
method_types=self.methods,
exec_modes=self.exec_modes,
type_to_modes=self.type_to_modes)
# === A few methods for dumping pieces of grammar.
def element_to_str(self, e: Element) -> str: if isinstance(e, Nt): return e.pretty() elif self.is_terminal(e): assert isinstance(e, str) if e in self.variable_terminals or e in self.synthetic_terminals: return e return'"' + repr(e)[1:-1] + '"' elif isinstance(e, Optional): return self.element_to_str(e.inner) + "?" elif isinstance(e, LookaheadRule): if len(e.set) == 1:
op = "=="if e.positive else"!="
s = repr(list(e.set)[0]) else:
op = "in"if e.positive else"not in"
s = '{' + repr(list(e.set))[1:-1] + '}' return"[lookahead {} {}]".format(op, s) elif isinstance(e, End): return"" elif e is NoLineTerminatorHere: return"[no LineTerminator here]" elif isinstance(e, CallMethod): return"{{ {} }}".format(expr_to_str(e)) else: return str(e)
def symbols_to_str(self, rhs: typing.Iterable[Element]) -> str: return" ".join(self.element_to_str(e) for e in rhs)
def rhs_to_str(self, rhs: LenientProduction) -> str: if isinstance(rhs, Production): if rhs.condition isNone:
prefix = '' else:
param, value = rhs.condition if value isTrue:
condition = "+" + param elif value isFalse:
condition = "~" + param else:
condition = "{} == {!r}".format(param, value)
prefix = "#[if {}] ".format(condition) return prefix + self.rhs_to_str(rhs.body) elif len(rhs) == 0: return"[empty]" else: return self.symbols_to_str(rhs)
def production_to_str(
self,
nt: LenientNt,
rhs: LenientProduction,
*reducer: ReduceExpr
) -> str: # As we have two ways of representing productions at the moment, just # take multiple arguments :( return"{} ::= {}{}".format(
self.nt_to_str(nt),
self.rhs_to_str(rhs), "".join(" => " + expr_to_str(expr) for expr in reducer))
# The type of `item` is `lr0.LRItem`. No annotation because this module # does not import `lr0`. It would be a cyclic dependency. def lr_item_to_str(self, prods: typing.List, item: typing.Any) -> str:
prod = prods[item.prod_index] if item.lookahead isNone:
la = [] else:
la = [self.element_to_str(item.lookahead)] return"{} ::= {} >> {{{}}}".format(
self.element_to_str(prod.nt), " ".join([self.element_to_str(e) for e in prod.rhs[:item.offset]]
+ ["\N{MIDDLE DOT}"]
+ la
+ [self.element_to_str(e) for e in prod.rhs[item.offset:]]), ", ".join( "$"if t isNoneelse self.element_to_str(t) for t in item.followed_by)
)
def item_set_to_str(
self,
prods: typing.List,
item_set: OrderedFrozenSet
) -> str: return"{{{}}}".format( ", ".join(self.lr_item_to_str(prods, item) for item in item_set)
)
def expand_terminal(self, t: str) -> OrderedFrozenSet[str]: return self.synthetic_terminals.get(t) or OrderedFrozenSet([t])
def compatible_elements(self, e1: Element, e2: Element) -> bool: # "type: ignore" because mypy doesn't know that `self.is_terminal(e1)` # means `e1` is a terminal, and thus `self.expand_terminal(e1)` is OK. return (e1 == e2 or (self.is_terminal(e1) and self.is_terminal(e2) and len(self.expand_terminal(e1) # type: ignore
& self.expand_terminal(e2)) > 0)) # type: ignore
def compatible_sequences(
self,
seq1: typing.Sequence[Element],
seq2: typing.Sequence[Element]) -> bool: """True if the two sequences could be the same terminals.""" return (len(seq1) == len(seq1) and all(self.compatible_elements(e1, e2) for e1, e2 in zip(seq1, seq2)))
def dump(self) -> None: for nt, nt_def in self.nonterminals.items():
left_side = self.nt_to_str(nt) if nt_def.params:
left_side += "[" + ", ".join(nt_def.params) + "]"
print(left_side + " ::=") for rhs in nt_def.rhs_list:
print(" ", self.rhs_to_str(rhs))
print()
def dump_type_info(self) -> None: for nt, nt_def in self.nonterminals.items():
print(nt, nt_def.type) for name, mty in self.methods.items():
print("fn {}({}) -> {}"
.format(name, ", ".join(str(ty) for ty in mty.argument_types),
str(mty.return_type)))
@dataclass(frozen=True) class InitNt: """InitNt(goal) is the name of the init nonterminal for the given goal.
One init nonterminal is created internally for each goal symbol in the grammar.
The idea is to have a nonterminal that the user has no control over, that is
never used in any production, but *only* as an entry point for the grammar,
that always has a single production "init_nt ::= goal_nt". This predictable
structure makes it easier to get into and out of parsing at run time.
When an init nonterminal is matched, we take the "accept" action rather than
a "reduce" action. """
goal: Nt
# *** Elements **************************************************************** # # Elements are the things that can appear in the .body list of a Production: # # * Strings represent terminals (see `Grammar.is_terminal`) # # * `Nt` objects refer to nonterminals. # # * `Optional` objects represent optional elements. # # * `LookaheadRule` objects are like lookahead assertions in regular # expressions. # # * The `NoLineTerminatorHere` singleton object can appear between two other # symbols to rule out line breaks between them. # # * `ErrorSymbol` objects never match anything produced by the lexer. Instead # they match an ErrorToken that's artificially injected into the token # stream at runtime, by the parser itself, just before a token that does # not match anything else.
def is_concrete_element(e: Element) -> bool: """True if parsing the element `e` produces a value.
A production's concrete elements can be used in reduce expressions. """ returnnot isinstance(e, (LookaheadRule, ErrorSymbol, NoLineTerminatorHereClass))
# Nonterminals in the ECMAScript grammar can be parameterized; NtParameter is # the type of the parameters. # # For example, `BindingIdentifier[?Yield, ?Await]` is represented as # `Nt('BindingIdentifier', (('Yield', Var('Yield')), ('Await', Var('Await'))))`. # # A nonterminal-parameter-expression is represented by either a Var object or # the actual value, a boolean. (In theory, parameters don't *have* to be # boolean; all the code would probably work for anything hashable. In practice, # all parameters in the ECMAScript grammar are boolean.)
NtParameter = typing.Hashable
class Nt: """Nt(name, ((param0, arg0), ...)) - An invocation of a nonterminal.
Nonterminals are like lambdas. Each nonterminal in a grammar is defined by an
NtDef which has 0 or more parameters.
Parameter names `param0...` are strings. The actual arguments `arg0...` are
NtParameters (see above). """
def pretty(self) -> str: """Unique version of this Nt to use in the Python runtime.
Also used in debug/verbose output. """ def arg_to_str(name: str, value: NtParameter) -> str: if value isTrue: return'+' + name elif value isFalse: return'~' + name elif isinstance(value, Var): if value.name == name: return'?' + value.name return name + "=" + value.name else: return name + "=" + repr(value)
if isinstance(self.name, InitNt): return"Start_" + self.name.goal.pretty() if len(self.args) == 0: return self.name return"{}[{}]".format(self.name, ", ".join(arg_to_str(name, value) for name, value in self.args))
@dataclass(frozen=True) class Optional: """Optional(nt) matches either nothing or the given nt.
Optional elements are expanded out before states are calculated, so the
core of the algorithm never sees them. """
inner: Element
@dataclass(frozen=True) class Literal: """Literal(str) matches a sequence of characters.
Literal elements are sequences of characters which are expected to appear
verbatim in the input. """
text: str
@dataclass(frozen=True) class UnicodeCategory: """UnicodeCategory(str) matches any character with a category matching
the cat_prefix.
UnicodeCategory elements are a set of literal elements which correspond to a
given unicode cat_prefix. """
cat_prefix: str
@dataclass(frozen=True) class LookaheadRule: """LookaheadRule(set, pos) imposes a lookahead restriction on whatever follows.
It never consumes any tokens itself. Instead, the right-hand side
[LookaheadRule(frozenset(['a', 'b']), False), 'Thing']
matches a Thing that does not start with the token `a` or `b`. """
set: typing.FrozenSet[str]
positive: bool
# A lookahead restriction really just specifies a set of allowed terminals. # # - No lookahead restriction at all is equivalent to a rule specifying all terminals. # # - A positive lookahead restriction explicitly lists all allowed tokens. # # - A negative lookahead restriction instead specifies the set of all tokens # except a few. # def lookahead_contains(rule: typing.Optional[LookaheadRule], t: str) -> bool: """True if the given lookahead restriction `rule` allows the terminal `t`.""" return (rule isNone or (t in rule.set if rule.positive else t notin rule.set))
def lookahead_intersect(
a: typing.Optional[LookaheadRule],
b: typing.Optional[LookaheadRule]
) -> typing.Optional[LookaheadRule]: """Returns a single rule enforcing both `a` and `b`, allowing only terminals that pass both.""" if a isNone: return b elif b isNone: return a elif a.positive: if b.positive: return LookaheadRule(a.set & b.set, True) else: return LookaheadRule(a.set - b.set, True) else: if b.positive: return LookaheadRule(b.set - a.set, True) else: return LookaheadRule(a.set | b.set, False)
class NoLineTerminatorHereClass: def __str__(self) -> str: return'NoLineTerminatorHere'
# Optional elements. These are expanded out before states are calculated, # so the core of the algorithm never sees them.
@dataclass(frozen=True) class Exclude: """Exclude(nt1, nt2) matches if nt1 matches and nt2 does not."""
inner: Element
exclusion_list: typing.Tuple[Element, ...]
# End. This is used to represent the terminal which is infinitely produced by # the lexer when input end is reached.
@dataclass(frozen=True) class End: """End() represents the end of the input content."""
# Special grammar symbol that can be consumed to handle a syntax error. # # The error code is passed to an error-handling routine at run time which # decides if the error is recoverable or not.
@dataclass(frozen=True) class ErrorSymbol: """Special grammar symbol that can be consumed to handle a syntax error."""
error_code: int
@dataclass class NtDef: """Definition of a nonterminal.
Instances have three attributes:
.params - Tuple of strings, the names of the parameters.
.rhs_list - List of Production objects. Arguments to Nt elements in the
productions can be Var(s) where `s in params`, indicating that parameter
should be passed through unchanged.
.type - The type of runtime value produced by parsing an instance of this
nonterminal, orNone.
An NtDef is a sort of lambda.
Some langauges have constructs that are allowed or disallowed in particular
situations. For example, in many languages `return` statements are allowed
only inside functions or methods. The ECMAScript standard (5.1.5 "Grammar
Notation") offers this example of the notation it uses to specify this sort
of thing:
# The Grammar constructor is very lax about the types you pass to it. It can # accept a `Dict[str, List[List[str]]]`, for example; it copies the data into # NtDef and Production objects. # # The `Lenient` types below are the relaxed types that Grammar() actually # accepts.
LenientNt = typing.Union[Nt, str, InitNt]
LenientProduction = typing.Union[Production, typing.List[Element]]
LenientNtDef = typing.Union[NtDef, typing.List[LenientProduction]]
Messung V0.5
¤ Dauer der Verarbeitung: 0.6 Sekunden
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.