"" "Handwritten parser of dependency specifiers.
The docstring for each __parse_* function contains ENBF-inspired grammar representing
the implementation.
"" "
import ast
from typing import Any, List, NamedTuple, Optional, Tuple, Union
from ._tokenizer import DEFAULT_RULES, Tokenizer
class Node:
def __init__(self, value: str) -> None :
self.value = value
def __str__(self) -> str:
return self.value
def __repr__(self) -> str:
return f"<{self.__class__.__name__}('{self}')>"
def serialize(self) -> str:
raise NotImplementedError
class Variable(Node):
def serialize(self) -> str:
return str(self)
class Value(Node):
def serialize(self) -> str:
return f'"{self}"'
class Op(Node):
def serialize(self) -> str:
return str(self)
MarkerVar = Union[Variable, Value]
MarkerItem = Tuple[MarkerVar, Op, MarkerVar]
# MarkerAtom = Union[MarkerItem, List["MarkerAtom"]]
# MarkerList = List[Union["MarkerList", MarkerAtom, str]]
# mypy does not support recursive type definition
# https://github.com/python/mypy/issues/731
MarkerAtom = Any
MarkerList = List[Any]
class ParsedRequirement(NamedTuple):
name: str
url: str
extras: List[str]
specifier: str
marker: Optional[MarkerList]
# --------------------------------------------------------------------------------------
# Recursive descent parser for dependency specifier
# --------------------------------------------------------------------------------------
def parse_requirement(source: str) -> ParsedRequirement:
return _parse_requirement(Tokenizer(source, rules=DEFAULT_RULES))
def _parse_requirement(tokenizer: Tokenizer) -> ParsedRequirement:
"" "
requirement = WS? IDENTIFIER WS? extras WS? requirement_details
"" "
tokenizer.consume("WS" )
name_token = tokenizer.expect(
"IDENTIFIER" , expected="package name at the start of dependency specifier"
)
name = name_token.text
tokenizer.consume("WS" )
extras = _parse_extras(tokenizer)
tokenizer.consume("WS" )
url, specifier, marker = _parse_requirement_details(tokenizer)
tokenizer.expect("END" , expected="end of dependency specifier" )
return ParsedRequirement(name, url, extras, specifier, marker)
def _parse_requirement_details(
tokenizer: Tokenizer,
) -> Tuple[str, str, Optional[MarkerList]]:
"" "
requirement_details = AT URL (WS requirement_marker?)?
| specifier WS? (requirement_marker)?
"" "
specifier = ""
url = ""
marker = None
if tokenizer.check("AT" ):
tokenizer.read()
tokenizer.consume("WS" )
url_start = tokenizer.position
url = tokenizer.expect("URL" , expected="URL after @" ).text
if tokenizer.check("END" , peek=True ):
return (url, specifier, marker)
tokenizer.expect("WS" , expected="whitespace after URL" )
# The input might end after whitespace.
if tokenizer.check("END" , peek=True ):
return (url, specifier, marker)
marker = _parse_requirement_marker(
tokenizer, span_start=url_start, after="URL and whitespace"
)
else :
specifier_start = tokenizer.position
specifier = _parse_specifier(tokenizer)
tokenizer.consume("WS" )
if tokenizer.check("END" , peek=True ):
return (url, specifier, marker)
marker = _parse_requirement_marker(
tokenizer,
span_start=specifier_start,
after=(
"version specifier"
if specifier
else "name and no valid version specifier"
),
)
return (url, specifier, marker)
def _parse_requirement_marker(
tokenizer: Tokenizer, *, span_start: int, after: str
) -> MarkerList:
"" "
requirement_marker = SEMICOLON marker WS?
"" "
if not tokenizer.check("SEMICOLON" ):
tokenizer.raise_syntax_error(
f"Expected end or semicolon (after {after})" ,
span_start=span_start,
)
tokenizer.read()
marker = _parse_marker(tokenizer)
tokenizer.consume("WS" )
return marker
def _parse_extras(tokenizer: Tokenizer) -> List[str]:
"" "
extras = (LEFT_BRACKET wsp* extras_list? wsp* RIGHT_BRACKET)?
"" "
if not tokenizer.check("LEFT_BRACKET" , peek=True ):
return []
with tokenizer.enclosing_tokens(
"LEFT_BRACKET" ,
"RIGHT_BRACKET" ,
around="extras" ,
):
tokenizer.consume("WS" )
extras = _parse_extras_list(tokenizer)
tokenizer.consume("WS" )
return extras
def _parse_extras_list(tokenizer: Tokenizer) -> List[str]:
"" "
extras_list = identifier (wsp* ',' wsp* identifier)*
"" "
extras: List[str] = []
if not tokenizer.check("IDENTIFIER" ):
return extras
extras.append(tokenizer.read().text)
while True :
tokenizer.consume("WS" )
if tokenizer.check("IDENTIFIER" , peek=True ):
tokenizer.raise_syntax_error("Expected comma between extra names" )
elif not tokenizer.check("COMMA" ):
break
tokenizer.read()
tokenizer.consume("WS" )
extra_token = tokenizer.expect("IDENTIFIER" , expected="extra name after comma" )
extras.append(extra_token.text)
return extras
def _parse_specifier(tokenizer: Tokenizer) -> str:
"" "
specifier = LEFT_PARENTHESIS WS? version_many WS? RIGHT_PARENTHESIS
| WS? version_many WS?
"" "
with tokenizer.enclosing_tokens(
"LEFT_PARENTHESIS" ,
"RIGHT_PARENTHESIS" ,
around="version specifier" ,
):
tokenizer.consume("WS" )
parsed_specifiers = _parse_version_many(tokenizer)
tokenizer.consume("WS" )
return parsed_specifiers
def _parse_version_many(tokenizer: Tokenizer) -> str:
"" "
version_many = (SPECIFIER (WS? COMMA WS? SPECIFIER)*)?
"" "
parsed_specifiers = ""
while tokenizer.check("SPECIFIER" ):
span_start = tokenizer.position
parsed_specifiers += tokenizer.read().text
if tokenizer.check("VERSION_PREFIX_TRAIL" , peek=True ):
tokenizer.raise_syntax_error(
".* suffix can only be used with `==` or `!=` operators" ,
span_start=span_start,
span_end=tokenizer.position + 1,
)
if tokenizer.check("VERSION_LOCAL_LABEL_TRAIL" , peek=True ):
tokenizer.raise_syntax_error(
"Local version label can only be used with `==` or `!=` operators" ,
span_start=span_start,
span_end=tokenizer.position,
)
tokenizer.consume("WS" )
if not tokenizer.check("COMMA" ):
break
parsed_specifiers += tokenizer.read().text
tokenizer.consume("WS" )
return parsed_specifiers
# --------------------------------------------------------------------------------------
# Recursive descent parser for marker expression
# --------------------------------------------------------------------------------------
def parse_marker(source: str) -> MarkerList:
return _parse_marker(Tokenizer(source, rules=DEFAULT_RULES))
def _parse_marker(tokenizer: Tokenizer) -> MarkerList:
"" "
marker = marker_atom (BOOLOP marker_atom)+
"" "
expression = [_parse_marker_atom(tokenizer)]
while tokenizer.check("BOOLOP" ):
token = tokenizer.read()
expr_right = _parse_marker_atom(tokenizer)
expression.extend((token.text, expr_right))
return expression
def _parse_marker_atom(tokenizer: Tokenizer) -> MarkerAtom:
"" "
marker_atom = WS? LEFT_PARENTHESIS WS? marker WS? RIGHT_PARENTHESIS WS?
| WS? marker_item WS?
"" "
tokenizer.consume("WS" )
if tokenizer.check("LEFT_PARENTHESIS" , peek=True ):
with tokenizer.enclosing_tokens(
"LEFT_PARENTHESIS" ,
"RIGHT_PARENTHESIS" ,
around="marker expression" ,
):
tokenizer.consume("WS" )
marker: MarkerAtom = _parse_marker(tokenizer)
tokenizer.consume("WS" )
else :
marker = _parse_marker_item(tokenizer)
tokenizer.consume("WS" )
return marker
def _parse_marker_item(tokenizer: Tokenizer) -> MarkerItem:
"" "
marker_item = WS? marker_var WS? marker_op WS? marker_var WS?
"" "
tokenizer.consume("WS" )
marker_var_left = _parse_marker_var(tokenizer)
tokenizer.consume("WS" )
marker_op = _parse_marker_op(tokenizer)
tokenizer.consume("WS" )
marker_var_right = _parse_marker_var(tokenizer)
tokenizer.consume("WS" )
return (marker_var_left, marker_op, marker_var_right)
def _parse_marker_var(tokenizer: Tokenizer) -> MarkerVar:
"" "
marker_var = VARIABLE | QUOTED_STRING
"" "
if tokenizer.check("VARIABLE" ):
return process_env_var(tokenizer.read().text.replace("." , "_" ))
elif tokenizer.check("QUOTED_STRING" ):
return process_python_str(tokenizer.read().text)
else :
tokenizer.raise_syntax_error(
message="Expected a marker variable or quoted string"
)
def process_env_var(env_var: str) -> Variable:
if (
env_var == "platform_python_implementation"
or env_var == "python_implementation"
):
return Variable("platform_python_implementation" )
else :
return Variable(env_var)
def process_python_str(python_str: str) -> Value:
value = ast.literal_eval(python_str)
return Value(str(value))
def _parse_marker_op(tokenizer: Tokenizer) -> Op:
"" "
marker_op = IN | NOT IN | OP
"" "
if tokenizer.check("IN" ):
tokenizer.read()
return Op("in" )
elif tokenizer.check("NOT" ):
tokenizer.read()
tokenizer.expect("WS" , expected="whitespace after 'not'" )
tokenizer.expect("IN" , expected="'in' after 'not'" )
return Op("not in" )
elif tokenizer.check("OP" ):
return Op(tokenizer.read().text)
else :
return tokenizer.raise_syntax_error(
"Expected marker operator, one of "
"<=, <, !=, ==, >=, >, ~=, ===, in, not in"
)
quality 93%
¤ Dauer der Verarbeitung: 0.14 Sekunden
(vorverarbeitet)
¤
*© Formatika GbR, Deutschland