# SPDX-License-Identifier: MIT # SPDX-FileCopyrightText: 2021 Taneli Hukkinen # Licensed to PSF under a Contributor Agreement.
from __future__ import annotations
from collections.abc import Iterable import string from types import MappingProxyType from typing import Any, BinaryIO, NamedTuple
from ._re import (
RE_DATETIME,
RE_LOCALTIME,
RE_NUMBER,
match_to_datetime,
match_to_localtime,
match_to_number,
) from ._types import Key, ParseFloat, Pos
ASCII_CTRL = frozenset(chr(i) for i in range(32)) | frozenset(chr(127))
# Neither of these sets include quotation mark or backslash. They are # currently handled as separate cases in the parser functions.
ILLEGAL_BASIC_STR_CHARS = ASCII_CTRL - frozenset("\t")
ILLEGAL_MULTILINE_BASIC_STR_CHARS = ASCII_CTRL - frozenset("\t\n")
class TOMLDecodeError(ValueError): """An error raised if a document is not valid TOML."""
def load(__fp: BinaryIO, *, parse_float: ParseFloat = float) -> dict[str, Any]: """Parse TOML from a binary file object."""
b = __fp.read() try:
s = b.decode() except AttributeError: raise TypeError( "File must be opened in binary mode, e.g. use `open('foo.toml', 'rb')`"
) fromNone return loads(s, parse_float=parse_float)
def loads(__s: str, *, parse_float: ParseFloat = float) -> dict[str, Any]: # noqa: C901 """Parse TOML from a string."""
# The spec allows converting "\r\n" to "\n", even in string # literals. Let's do so to simplify parsing.
src = __s.replace("\r\n", "\n")
pos = 0
out = Output(NestedDict(), Flags())
header: Key = ()
parse_float = make_safe_parse_float(parse_float)
# Parse one statement at a time # (typically means one line in TOML source) whileTrue: # 1. Skip line leading whitespace
pos = skip_chars(src, pos, TOML_WS)
# 2. Parse rules. Expect one of the following: # - end of file # - end of line # - comment # - key/value pair # - append dict to list (and move to its namespace) # - create dict (and move to its namespace) # Skip trailing whitespace when applicable. try:
char = src[pos] except IndexError: break if char == "\n":
pos += 1 continue if char in KEY_INITIAL_CHARS:
pos = key_value_rule(src, pos, out, header, parse_float)
pos = skip_chars(src, pos, TOML_WS) elif char == "[": try:
second_char: str | None = src[pos + 1] except IndexError:
second_char = None
out.flags.finalize_pending() if second_char == "[":
pos, header = create_list_rule(src, pos, out) else:
pos, header = create_dict_rule(src, pos, out)
pos = skip_chars(src, pos, TOML_WS) elif char != "#": raise suffixed_err(src, pos, "Invalid statement")
# 3. Skip comment
pos = skip_comment(src, pos)
# 4. Expect end of line or end of file try:
char = src[pos] except IndexError: break if char != "\n": raise suffixed_err(
src, pos, "Expected newline or end of document after a statement"
)
pos += 1
return out.data.dict
class Flags: """Flags that map to parsed keys/namespaces."""
# Marks an immutable namespace (inline array or inline table).
FROZEN = 0 # Marks a nest that has been explicitly created and can no longer # be opened using the "[table]" syntax.
EXPLICIT_NEST = 1
def finalize_pending(self) -> None: for key, flag in self._pending_flags:
self.set(key, flag, recursive=False)
self._pending_flags.clear()
def unset_all(self, key: Key) -> None:
cont = self._flags for k in key[:-1]: if k notin cont: return
cont = cont[k]["nested"]
cont.pop(key[-1], None)
def set(self, key: Key, flag: int, *, recursive: bool) -> None: # noqa: A003
cont = self._flags
key_parent, key_stem = key[:-1], key[-1] for k in key_parent: if k notin cont:
cont[k] = {"flags": set(), "recursive_flags": set(), "nested": {}}
cont = cont[k]["nested"] if key_stem notin cont:
cont[key_stem] = {"flags": set(), "recursive_flags": set(), "nested": {}}
cont[key_stem]["recursive_flags"if recursive else"flags"].add(flag)
def is_(self, key: Key, flag: int) -> bool: ifnot key: returnFalse# document root has no flags
cont = self._flags for k in key[:-1]: if k notin cont: returnFalse
inner_cont = cont[k] if flag in inner_cont["recursive_flags"]: returnTrue
cont = inner_cont["nested"]
key_stem = key[-1] if key_stem in cont:
cont = cont[key_stem] return flag in cont["flags"] or flag in cont["recursive_flags"] returnFalse
class NestedDict: def __init__(self) -> None: # The parsed content of the TOML document
self.dict: dict[str, Any] = {}
def get_or_create_nest(
self,
key: Key,
*,
access_lists: bool = True,
) -> dict:
cont: Any = self.dict for k in key: if k notin cont:
cont[k] = {}
cont = cont[k] if access_lists and isinstance(cont, list):
cont = cont[-1] ifnot isinstance(cont, dict): raise KeyError("There is no nest behind this key") return cont
def append_nest_to_list(self, key: Key) -> None:
cont = self.get_or_create_nest(key[:-1])
last_key = key[-1] if last_key in cont:
list_ = cont[last_key] ifnot isinstance(list_, list): raise KeyError("An object other than list found behind this key")
list_.append({}) else:
cont[last_key] = [{}]
class Output(NamedTuple):
data: NestedDict
flags: Flags
if out.flags.is_(key, Flags.FROZEN): raise suffixed_err(src, pos, f"Cannot mutate immutable namespace {key}") # Free the namespace now that it points to another empty list item...
out.flags.unset_all(key) # ...but this key precisely is still prohibited from table declaration
out.flags.set(key, Flags.EXPLICIT_NEST, recursive=False) try:
out.data.append_nest_to_list(key) except KeyError: raise suffixed_err(src, pos, "Cannot overwrite a value") fromNone
ifnot src.startswith("]]", pos): raise suffixed_err(src, pos, "Expected ']]' at the end of an array declaration") return pos + 2, key
relative_path_cont_keys = (header + key[:i] for i in range(1, len(key))) for cont_key in relative_path_cont_keys: # Check that dotted key syntax does not redefine an existing table if out.flags.is_(cont_key, Flags.EXPLICIT_NEST): raise suffixed_err(src, pos, f"Cannot redefine namespace {cont_key}") # Containers in the relative path can't be opened with the table syntax or # dotted key/value syntax in following table sections.
out.flags.add_pending(cont_key, Flags.EXPLICIT_NEST)
# Add at maximum two extra apostrophes/quotes if the end sequence # is 4 or 5 chars long instead of just 3. ifnot src.startswith(delim, pos): return pos, result
pos += 1 ifnot src.startswith(delim, pos): return pos, result + delim
pos += 1 return pos, result + (delim * 2)
def parse_basic_str(src: str, pos: Pos, *, multiline: bool) -> tuple[Pos, str]: if multiline:
error_on = ILLEGAL_MULTILINE_BASIC_STR_CHARS
parse_escapes = parse_basic_str_escape_multiline else:
error_on = ILLEGAL_BASIC_STR_CHARS
parse_escapes = parse_basic_str_escape
result = ""
start_pos = pos whileTrue: try:
char = src[pos] except IndexError: raise suffixed_err(src, pos, "Unterminated string") fromNone if char == '"': ifnot multiline: return pos + 1, result + src[start_pos:pos] if src.startswith('"""', pos): return pos + 3, result + src[start_pos:pos]
pos += 1 continue if char == "\\":
result += src[start_pos:pos]
pos, parsed_escape = parse_escapes(src, pos)
result += parsed_escape
start_pos = pos continue if char in error_on: raise suffixed_err(src, pos, f"Illegal character {char!r}")
pos += 1
# Dates and times
datetime_match = RE_DATETIME.match(src, pos) if datetime_match: try:
datetime_obj = match_to_datetime(datetime_match) except ValueError as e: raise suffixed_err(src, pos, "Invalid date or datetime") from e return datetime_match.end(), datetime_obj
localtime_match = RE_LOCALTIME.match(src, pos) if localtime_match: return localtime_match.end(), match_to_localtime(localtime_match)
# Integers and "normal" floats. # The regex will greedily match any type starting with a decimal # char, so needs to be located after handling of dates and times.
number_match = RE_NUMBER.match(src, pos) if number_match: return number_match.end(), match_to_number(number_match, parse_float)
# Special floats
first_three = src[pos : pos + 3] if first_three in {"inf", "nan"}: return pos + 3, parse_float(first_three)
first_four = src[pos : pos + 4] if first_four in {"-inf", "+inf", "-nan", "+nan"}: return pos + 4, parse_float(first_four)
raise suffixed_err(src, pos, "Invalid value")
def suffixed_err(src: str, pos: Pos, msg: str) -> TOMLDecodeError: """Return a `TOMLDecodeError` where error message is suffixed with
coordinates in source."""
def coord_repr(src: str, pos: Pos) -> str: if pos >= len(src): return"end of document"
line = src.count("\n", 0, pos) + 1 if line == 1:
column = pos + 1 else:
column = pos - src.rindex("\n", 0, pos) return f"line {line}, column {column}"
def make_safe_parse_float(parse_float: ParseFloat) -> ParseFloat: """A decorator to make `parse_float` safe.
`parse_float` must notreturn dicts or lists, because these types
would be mixed with parsed TOML tables and arrays, thus confusing
the parser. The returned decorated callable raises `ValueError`
instead of returning illegal types. """ # The default `float` callable never returns illegal types. Optimize it. if parse_float is float: # type: ignore[comparison-overlap] return float
def safe_parse_float(float_str: str) -> Any:
float_value = parse_float(float_str) if isinstance(float_value, (dict, list)): raise ValueError("parse_float must not return dicts or lists") return float_value
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.