"""parse_pgen.py - Parse grammars written in the pgen parser specification language.
I'm not sure I want to keep this pgen mini-language around; ignore this for now. """
import sys from collections import namedtuple
from .lexer import LexicalGrammar from .grammar import Grammar, Production, CallMethod, is_concrete_element, Optional from . import gen from . import parse_pgen_generated
def list_of(e, allow_comments=False):
nt = e + 's'
prods = [
Production([e], CallMethod('single', (0,))),
Production([nt, e], CallMethod('append', (0, 1))),
] if allow_comments:
prods.append(Production(['COMMENT'], CallMethod('empty', (0,)))) return prods
def call_method(name, body):
arg_indexes = []
current = 0 for e in body: if is_concrete_element(e): if e notin discards:
arg_indexes.append(current)
current += 1
def prod(self, symbols, reducer): if reducer isNone: if sum(1 for e in symbols if is_concrete_element(e)) == 1:
reducer = 0 else: raise ValueError("reducer required for {!r}".format(symbols)) return (symbols, reducer)
def optional(self, sym): return Optional(sym)
def ident(self, sym): return sym
def str(self, sym): assert len(sym) > 1 assert sym[0] == '"' assert sym[-1] == '"'
chars = sym[1:-1] # This is a bit sloppy. return Literal(chars)
def check_grammar(result):
tokens, nonterminals, goal_nts = result
tokens_by_name = {}
tokens_by_image = {} for name, image in tokens: if name in tokens_by_name: raise ValueError("token `{}` redeclared".format(name))
tokens_by_name[name] = image if image isnotNoneand image in tokens_by_image: raise ValueError("multiple tokens look like \"{}\"".format(image))
tokens_by_image[image] = name if name in nonterminals: raise ValueError("`{}` is declared as both a token and a nonterminal (pick one)".format(name))
def check_element(nt, i, e): if isinstance(e, Optional): return Optional(check_element(nt, i, e.inner)) elif isinstance(e, Literal): if e.chars notin tokens_by_image: raise ValueError("in {} production {}: undeclared token \"{}\"".format(nt, i, e.chars)) return e.chars else: assert isinstance(e, str), e.__class__.__name__ if e in nonterminals: return e elif e in tokens_by_name:
image = tokens_by_name[e] if image isnotNone: return image return e else: raise ValueError("in {} production {}: undeclared symbol {}".format(nt, i, e))
out = {nt: [] for nt in nonterminals} for nt, rhs_list in nonterminals.items(): for i, p in enumerate(rhs_list):
out_rhs = [check_element(nt, i, e) for e in p.body]
out[nt].append(p.copy_with(body=out_rhs))
return (tokens, out, goal_nts)
def load_grammar(filename): with open(filename) as f:
text = f.read()
parser = parse_pgen_generated.Parser(builder=AstBuilder())
lexer = pgen_lexer(parser, filename=filename)
lexer.write(text)
result = lexer.close()
tokens, nonterminals, goals = check_grammar(result)
variable_terminals = [name for name, image in tokens if image isNone] return Grammar(nonterminals,
goal_nts=goals,
variable_terminals=variable_terminals)
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.