# Scanner produces tokens of the following types: # STREAM-START # STREAM-END # DIRECTIVE(name, value) # DOCUMENT-START # DOCUMENT-END # BLOCK-SEQUENCE-START # BLOCK-MAPPING-START # BLOCK-END # FLOW-SEQUENCE-START # FLOW-MAPPING-START # FLOW-SEQUENCE-END # FLOW-MAPPING-END # BLOCK-ENTRY # FLOW-ENTRY # KEY # VALUE # ALIAS(value) # ANCHOR(value) # TAG(value) # SCALAR(value, plain, style) # # Read comments in the Scanner code for more details. #
__all__ = ['Scanner', 'ScannerError']
from .error import MarkedYAMLError from .tokens import *
class ScannerError(MarkedYAMLError): pass
class SimpleKey: # See below simple keys treatment.
def __init__(self, token_number, required, index, line, column, mark):
self.token_number = token_number
self.required = required
self.index = index
self.line = line
self.column = column
self.mark = mark
class Scanner:
def __init__(self): """Initialize the scanner.""" # It is assumed that Scanner and Reader will have a common descendant. # Reader do the dirty work of checking for BOM and converting the # input data to Unicode. It also adds NUL to the end. # # Reader supports the following methods # self.peek(i=0) # peek the next i-th character # self.prefix(l=1) # peek the next l characters # self.forward(l=1) # read the next l characters and move the pointer.
# Had we reached the end of the stream?
self.done = False
# The number of unclosed '{' and '['. `flow_level == 0` means block # context.
self.flow_level = 0
# List of processed tokens that are not yet emitted.
self.tokens = []
# Add the STREAM-START token.
self.fetch_stream_start()
# Number of tokens that were emitted through the `get_token` method.
self.tokens_taken = 0
# The current indentation level.
self.indent = -1
# Past indentation levels.
self.indents = []
# Variables related to simple keys treatment.
# A simple key is a key that is not denoted by the '?' indicator. # Example of simple keys: # --- # block simple key: value # ? not a simple key: # : { flow simple key: value } # We emit the KEY token before all keys, so when we find a potential # simple key, we try to locate the corresponding ':' indicator. # Simple keys should be limited to a single line and 1024 characters.
# Can a simple key start at the current position? A simple key may # start: # - at the beginning of the line, not counting indentation spaces # (in block context), # - after '{', '[', ',' (in the flow context), # - after '?', ':', '-' (in the block context). # In the block context, this flag also signifies if a block collection # may start at the current position.
self.allow_simple_key = True
# Keep track of possible simple keys. This is a dictionary. The key # is `flow_level`; there can be no more that one possible simple key # for each level. The value is a SimpleKey record: # (token_number, required, index, line, column, mark) # A simple key may start with ALIAS, ANCHOR, TAG, SCALAR(flow), # '[', or '{' tokens.
self.possible_simple_keys = {}
# Public methods.
def check_token(self, *choices): # Check if the next token is one of the given types. while self.need_more_tokens():
self.fetch_more_tokens() if self.tokens: ifnot choices: returnTrue for choice in choices: if isinstance(self.tokens[0], choice): returnTrue returnFalse
def peek_token(self): # Return the next token, but do not delete if from the queue. # Return None if no more tokens. while self.need_more_tokens():
self.fetch_more_tokens() if self.tokens: return self.tokens[0] else: returnNone
def get_token(self): # Return the next token. while self.need_more_tokens():
self.fetch_more_tokens() if self.tokens:
self.tokens_taken += 1 return self.tokens.pop(0)
# Private methods.
def need_more_tokens(self): if self.done: returnFalse ifnot self.tokens: returnTrue # The current token may be a potential simple key, so we # need to look further.
self.stale_possible_simple_keys() if self.next_possible_simple_key() == self.tokens_taken: returnTrue
def fetch_more_tokens(self):
# Eat whitespaces and comments until we reach the next token.
self.scan_to_next_token()
# Remove obsolete possible simple keys.
self.stale_possible_simple_keys()
# Compare the current indentation and column. It may add some tokens # and decrease the current indentation level.
self.unwind_indent(self.column)
# Peek the next character.
ch = self.peek()
# Is it the end of stream? if ch == '\0': return self.fetch_stream_end()
# Is it a directive? if ch == '%'and self.check_directive(): return self.fetch_directive()
# Is it the document start? if ch == '-'and self.check_document_start(): return self.fetch_document_start()
# Is it the document end? if ch == '.'and self.check_document_end(): return self.fetch_document_end()
# TODO: support for BOM within a stream. #if ch == '\uFEFF': # return self.fetch_bom() <-- issue BOMToken
# Note: the order of the following checks is NOT significant.
# Is it the flow sequence start indicator? if ch == '[': return self.fetch_flow_sequence_start()
# Is it the flow mapping start indicator? if ch == '{': return self.fetch_flow_mapping_start()
# Is it the flow sequence end indicator? if ch == ']': return self.fetch_flow_sequence_end()
# Is it the flow mapping end indicator? if ch == '}': return self.fetch_flow_mapping_end()
# Is it the flow entry indicator? if ch == ',': return self.fetch_flow_entry()
# Is it the block entry indicator? if ch == '-'and self.check_block_entry(): return self.fetch_block_entry()
# Is it the key indicator? if ch == '?'and self.check_key(): return self.fetch_key()
# Is it the value indicator? if ch == ':'and self.check_value(): return self.fetch_value()
# Is it an alias? if ch == '*': return self.fetch_alias()
# Is it an anchor? if ch == '&': return self.fetch_anchor()
# Is it a tag? if ch == '!': return self.fetch_tag()
# Is it a literal scalar? if ch == '|'andnot self.flow_level: return self.fetch_literal()
# Is it a folded scalar? if ch == '>'andnot self.flow_level: return self.fetch_folded()
# Is it a single quoted scalar? if ch == '\'': return self.fetch_single()
# Is it a double quoted scalar? if ch == '\"': return self.fetch_double()
# It must be a plain scalar then. if self.check_plain(): return self.fetch_plain()
# No? It's an error. Let's produce a nice error message. raise ScannerError("while scanning for the next token", None, "found character %r that cannot start any token" % ch,
self.get_mark())
# Simple keys treatment.
def next_possible_simple_key(self): # Return the number of the nearest possible simple key. Actually we # don't need to loop through the whole dictionary. We may replace it # with the following code: # if not self.possible_simple_keys: # return None # return self.possible_simple_keys[ # min(self.possible_simple_keys.keys())].token_number
min_token_number = None for level in self.possible_simple_keys:
key = self.possible_simple_keys[level] if min_token_number isNoneor key.token_number < min_token_number:
min_token_number = key.token_number return min_token_number
def stale_possible_simple_keys(self): # Remove entries that are no longer possible simple keys. According to # the YAML specification, simple keys # - should be limited to a single line, # - should be no longer than 1024 characters. # Disabling this procedure will allow simple keys of any length and # height (may cause problems if indentation is broken though). for level in list(self.possible_simple_keys):
key = self.possible_simple_keys[level] if key.line != self.line \ or self.index-key.index > 1024: if key.required: raise ScannerError("while scanning a simple key", key.mark, "could not find expected ':'", self.get_mark()) del self.possible_simple_keys[level]
def save_possible_simple_key(self): # The next token may start a simple key. We check if it's possible # and save its position. This function is called for # ALIAS, ANCHOR, TAG, SCALAR(flow), '[', and '{'.
# Check if a simple key is required at the current position.
required = not self.flow_level and self.indent == self.column
# The next token might be a simple key. Let's save it's number and # position. if self.allow_simple_key:
self.remove_possible_simple_key()
token_number = self.tokens_taken+len(self.tokens)
key = SimpleKey(token_number, required,
self.index, self.line, self.column, self.get_mark())
self.possible_simple_keys[self.flow_level] = key
def remove_possible_simple_key(self): # Remove the saved possible key position at the current flow level. if self.flow_level in self.possible_simple_keys:
key = self.possible_simple_keys[self.flow_level]
if key.required: raise ScannerError("while scanning a simple key", key.mark, "could not find expected ':'", self.get_mark())
del self.possible_simple_keys[self.flow_level]
# Indentation functions.
def unwind_indent(self, column):
## In flow context, tokens should respect indentation. ## Actually the condition should be `self.indent >= column` according to ## the spec. But this condition will prohibit intuitively correct ## constructions such as ## key : { ## } #if self.flow_level and self.indent > column: # raise ScannerError(None, None, # "invalid indentation or unclosed '[' or '{'", # self.get_mark())
# In the flow context, indentation is ignored. We make the scanner less # restrictive then specification requires. if self.flow_level: return
# In block context, we may need to issue the BLOCK-END tokens. while self.indent > column:
mark = self.get_mark()
self.indent = self.indents.pop()
self.tokens.append(BlockEndToken(mark, mark))
def add_indent(self, column): # Check if we need to increase indentation. if self.indent < column:
self.indents.append(self.indent)
self.indent = column returnTrue returnFalse
# Fetchers.
def fetch_stream_start(self): # We always add STREAM-START as the first token and STREAM-END as the # last token.
# Are we allowed to start a new entry? ifnot self.allow_simple_key: raise ScannerError(None, None, "sequence entries are not allowed here",
self.get_mark())
# We may need to add BLOCK-SEQUENCE-START. if self.add_indent(self.column):
mark = self.get_mark()
self.tokens.append(BlockSequenceStartToken(mark, mark))
# It's an error for the block entry to occur in the flow context, # but we let the parser detect this. else: pass
# Simple keys are allowed after '-'.
self.allow_simple_key = True
# Reset possible simple key on the current level.
self.remove_possible_simple_key()
# Are we allowed to start a key (not necessary a simple)? ifnot self.allow_simple_key: raise ScannerError(None, None, "mapping keys are not allowed here",
self.get_mark())
# We may need to add BLOCK-MAPPING-START. if self.add_indent(self.column):
mark = self.get_mark()
self.tokens.append(BlockMappingStartToken(mark, mark))
# Simple keys are allowed after '?' in the block context.
self.allow_simple_key = not self.flow_level
# Reset possible simple key on the current level.
self.remove_possible_simple_key()
# If this key starts a new block mapping, we need to add # BLOCK-MAPPING-START. ifnot self.flow_level: if self.add_indent(key.column):
self.tokens.insert(key.token_number-self.tokens_taken,
BlockMappingStartToken(key.mark, key.mark))
# There cannot be two simple keys one after another.
self.allow_simple_key = False
# It must be a part of a complex key. else:
# Block context needs additional checks. # (Do we really need them? They will be caught by the parser # anyway.) ifnot self.flow_level:
# We are allowed to start a complex value if and only if # we can start a simple key. ifnot self.allow_simple_key: raise ScannerError(None, None, "mapping values are not allowed here",
self.get_mark())
# If this value starts a new block mapping, we need to add # BLOCK-MAPPING-START. It will be detected as an error later by # the parser. ifnot self.flow_level: if self.add_indent(self.column):
mark = self.get_mark()
self.tokens.append(BlockMappingStartToken(mark, mark))
# Simple keys are allowed after ':' in the block context.
self.allow_simple_key = not self.flow_level
# Reset possible simple key on the current level.
self.remove_possible_simple_key()
# A flow scalar could be a simple key.
self.save_possible_simple_key()
# No simple keys after flow scalars.
self.allow_simple_key = False
# Scan and add SCALAR.
self.tokens.append(self.scan_flow_scalar(style))
def fetch_plain(self):
# A plain scalar could be a simple key.
self.save_possible_simple_key()
# No simple keys after plain scalars. But note that `scan_plain` will # change this flag if the scan is finished at the beginning of the # line.
self.allow_simple_key = False
# Scan and add SCALAR. May change `allow_simple_key`.
self.tokens.append(self.scan_plain())
# Checkers.
def check_directive(self):
# DIRECTIVE: ^ '%' ... # The '%' indicator is already checked. if self.column == 0: returnTrue
def check_document_start(self):
# DOCUMENT-START: ^ '---' (' '|'\n') if self.column == 0: if self.prefix(3) == '---' \ and self.peek(3) in'\0 \t\r\n\x85\u2028\u2029': returnTrue
def check_document_end(self):
# DOCUMENT-END: ^ '...' (' '|'\n') if self.column == 0: if self.prefix(3) == '...' \ and self.peek(3) in'\0 \t\r\n\x85\u2028\u2029': returnTrue
# A plain scalar may start with any non-space character except: # '-', '?', ':', ',', '[', ']', '{', '}', # '#', '&', '*', '!', '|', '>', '\'', '\"', # '%', '@', '`'. # # It may also start with # '-', '?', ':' # if it is followed by a non-space character. # # Note that we limit the last rule to the block context (except the # '-' character) because we want the flow context to be space # independent.
ch = self.peek() return ch notin'\0 \t\r\n\x85\u2028\u2029-?:,[]{}#&*!|>\'\"%@`' \ or (self.peek(1) notin'\0 \t\r\n\x85\u2028\u2029' and (ch == '-'or (not self.flow_level and ch in'?:')))
# Scanners.
def scan_to_next_token(self): # We ignore spaces, line breaks and comments. # If we find a line break in the block context, we set the flag # `allow_simple_key` on. # The byte order mark is stripped if it's the first character in the # stream. We do not yet support BOM inside the stream as the # specification requires. Any such mark will be considered as a part # of the document. # # TODO: We need to make tab handling rules more sane. A good rule is # Tabs cannot precede tokens # BLOCK-SEQUENCE-START, BLOCK-MAPPING-START, BLOCK-END, # KEY(block), VALUE(block), BLOCK-ENTRY # So the checking code is # if <TAB>: # self.allow_simple_keys = False # We also need to add the check for `allow_simple_keys == True` to # `unwind_indent` before issuing BLOCK-END. # Scanners for block, flow, and plain scalars need to be modified.
if self.index == 0 and self.peek() == '\uFEFF':
self.forward()
found = False whilenot found: while self.peek() == ' ':
self.forward() if self.peek() == '#': while self.peek() notin'\0\r\n\x85\u2028\u2029':
self.forward() if self.scan_line_break(): ifnot self.flow_level:
self.allow_simple_key = True else:
found = True
def scan_directive(self): # See the specification for details.
start_mark = self.get_mark()
self.forward()
name = self.scan_directive_name(start_mark)
value = None if name == 'YAML':
value = self.scan_yaml_directive_value(start_mark)
end_mark = self.get_mark() elif name == 'TAG':
value = self.scan_tag_directive_value(start_mark)
end_mark = self.get_mark() else:
end_mark = self.get_mark() while self.peek() notin'\0\r\n\x85\u2028\u2029':
self.forward()
self.scan_directive_ignored_line(start_mark) return DirectiveToken(name, value, start_mark, end_mark)
def scan_directive_name(self, start_mark): # See the specification for details.
length = 0
ch = self.peek(length) while'0' <= ch <= '9'or'A' <= ch <= 'Z'or'a' <= ch <= 'z' \ or ch in'-_':
length += 1
ch = self.peek(length) ifnot length: raise ScannerError("while scanning a directive", start_mark, "expected alphabetic or numeric character, but found %r"
% ch, self.get_mark())
value = self.prefix(length)
self.forward(length)
ch = self.peek() if ch notin'\0 \r\n\x85\u2028\u2029': raise ScannerError("while scanning a directive", start_mark, "expected alphabetic or numeric character, but found %r"
% ch, self.get_mark()) return value
def scan_yaml_directive_value(self, start_mark): # See the specification for details. while self.peek() == ' ':
self.forward()
major = self.scan_yaml_directive_number(start_mark) if self.peek() != '.': raise ScannerError("while scanning a directive", start_mark, "expected a digit or '.', but found %r" % self.peek(),
self.get_mark())
self.forward()
minor = self.scan_yaml_directive_number(start_mark) if self.peek() notin'\0 \r\n\x85\u2028\u2029': raise ScannerError("while scanning a directive", start_mark, "expected a digit or ' ', but found %r" % self.peek(),
self.get_mark()) return (major, minor)
def scan_yaml_directive_number(self, start_mark): # See the specification for details.
ch = self.peek() ifnot ('0' <= ch <= '9'): raise ScannerError("while scanning a directive", start_mark, "expected a digit, but found %r" % ch, self.get_mark())
length = 0 while'0' <= self.peek(length) <= '9':
length += 1
value = int(self.prefix(length))
self.forward(length) return value
def scan_tag_directive_value(self, start_mark): # See the specification for details. while self.peek() == ' ':
self.forward()
handle = self.scan_tag_directive_handle(start_mark) while self.peek() == ' ':
self.forward()
prefix = self.scan_tag_directive_prefix(start_mark) return (handle, prefix)
def scan_tag_directive_handle(self, start_mark): # See the specification for details.
value = self.scan_tag_handle('directive', start_mark)
ch = self.peek() if ch != ' ': raise ScannerError("while scanning a directive", start_mark, "expected ' ', but found %r" % ch, self.get_mark()) return value
def scan_tag_directive_prefix(self, start_mark): # See the specification for details.
value = self.scan_tag_uri('directive', start_mark)
ch = self.peek() if ch notin'\0 \r\n\x85\u2028\u2029': raise ScannerError("while scanning a directive", start_mark, "expected ' ', but found %r" % ch, self.get_mark()) return value
def scan_directive_ignored_line(self, start_mark): # See the specification for details. while self.peek() == ' ':
self.forward() if self.peek() == '#': while self.peek() notin'\0\r\n\x85\u2028\u2029':
self.forward()
ch = self.peek() if ch notin'\0\r\n\x85\u2028\u2029': raise ScannerError("while scanning a directive", start_mark, "expected a comment or a line break, but found %r"
% ch, self.get_mark())
self.scan_line_break()
def scan_anchor(self, TokenClass): # The specification does not restrict characters for anchors and # aliases. This may lead to problems, for instance, the document: # [ *alias, value ] # can be interpreted in two ways, as # [ "value" ] # and # [ *alias , "value" ] # Therefore we restrict aliases to numbers and ASCII letters.
start_mark = self.get_mark()
indicator = self.peek() if indicator == '*':
name = 'alias' else:
name = 'anchor'
self.forward()
length = 0
ch = self.peek(length) while'0' <= ch <= '9'or'A' <= ch <= 'Z'or'a' <= ch <= 'z' \ or ch in'-_':
length += 1
ch = self.peek(length) ifnot length: raise ScannerError("while scanning an %s" % name, start_mark, "expected alphabetic or numeric character, but found %r"
% ch, self.get_mark())
value = self.prefix(length)
self.forward(length)
ch = self.peek() if ch notin'\0 \t\r\n\x85\u2028\u2029?:,]}%@`': raise ScannerError("while scanning an %s" % name, start_mark, "expected alphabetic or numeric character, but found %r"
% ch, self.get_mark())
end_mark = self.get_mark() return TokenClass(value, start_mark, end_mark)
def scan_tag(self): # See the specification for details.
start_mark = self.get_mark()
ch = self.peek(1) if ch == '<':
handle = None
self.forward(2)
suffix = self.scan_tag_uri('tag', start_mark) if self.peek() != '>': raise ScannerError("while parsing a tag", start_mark, "expected '>', but found %r" % self.peek(),
self.get_mark())
self.forward() elif ch in'\0 \t\r\n\x85\u2028\u2029':
handle = None
suffix = '!'
self.forward() else:
length = 1
use_handle = False while ch notin'\0 \r\n\x85\u2028\u2029': if ch == '!':
use_handle = True break
length += 1
ch = self.peek(length)
handle = '!' if use_handle:
handle = self.scan_tag_handle('tag', start_mark) else:
handle = '!'
self.forward()
suffix = self.scan_tag_uri('tag', start_mark)
ch = self.peek() if ch notin'\0 \r\n\x85\u2028\u2029': raise ScannerError("while scanning a tag", start_mark, "expected ' ', but found %r" % ch, self.get_mark())
value = (handle, suffix)
end_mark = self.get_mark() return TagToken(value, start_mark, end_mark)
def scan_block_scalar(self, style): # See the specification for details.
# Scan the header.
self.forward()
chomping, increment = self.scan_block_scalar_indicators(start_mark)
self.scan_block_scalar_ignored_line(start_mark)
# Determine the indentation level and go to the first non-empty line.
min_indent = self.indent+1 if min_indent < 1:
min_indent = 1 if increment isNone:
breaks, max_indent, end_mark = self.scan_block_scalar_indentation()
indent = max(min_indent, max_indent) else:
indent = min_indent+increment-1
breaks, end_mark = self.scan_block_scalar_breaks(indent)
line_break = ''
# Scan the inner part of the block scalar. while self.column == indent and self.peek() != '\0':
chunks.extend(breaks)
leading_non_space = self.peek() notin' \t'
length = 0 while self.peek(length) notin'\0\r\n\x85\u2028\u2029':
length += 1
chunks.append(self.prefix(length))
self.forward(length)
line_break = self.scan_line_break()
breaks, end_mark = self.scan_block_scalar_breaks(indent) if self.column == indent and self.peek() != '\0':
# Unfortunately, folding rules are ambiguous. # # This is the folding according to the specification:
if folded and line_break == '\n' \ and leading_non_space and self.peek() notin' \t': ifnot breaks:
chunks.append(' ') else:
chunks.append(line_break)
# This is Clark Evans's interpretation (also in the spec # examples): # #if folded and line_break == '\n': # if not breaks: # if self.peek() not in ' \t': # chunks.append(' ') # else: # chunks.append(line_break) #else: # chunks.append(line_break) else: break
# Chomp the tail. if chomping isnotFalse:
chunks.append(line_break) if chomping isTrue:
chunks.extend(breaks)
# We are done. return ScalarToken(''.join(chunks), False, start_mark, end_mark,
style)
def scan_block_scalar_indicators(self, start_mark): # See the specification for details.
chomping = None
increment = None
ch = self.peek() if ch in'+-': if ch == '+':
chomping = True else:
chomping = False
self.forward()
ch = self.peek() if ch in'0123456789':
increment = int(ch) if increment == 0: raise ScannerError("while scanning a block scalar", start_mark, "expected indentation indicator in the range 1-9, but found 0",
self.get_mark())
self.forward() elif ch in'0123456789':
increment = int(ch) if increment == 0: raise ScannerError("while scanning a block scalar", start_mark, "expected indentation indicator in the range 1-9, but found 0",
self.get_mark())
self.forward()
ch = self.peek() if ch in'+-': if ch == '+':
chomping = True else:
chomping = False
self.forward()
ch = self.peek() if ch notin'\0 \r\n\x85\u2028\u2029': raise ScannerError("while scanning a block scalar", start_mark, "expected chomping or indentation indicators, but found %r"
% ch, self.get_mark()) return chomping, increment
def scan_block_scalar_ignored_line(self, start_mark): # See the specification for details. while self.peek() == ' ':
self.forward() if self.peek() == '#': while self.peek() notin'\0\r\n\x85\u2028\u2029':
self.forward()
ch = self.peek() if ch notin'\0\r\n\x85\u2028\u2029': raise ScannerError("while scanning a block scalar", start_mark, "expected a comment or a line break, but found %r" % ch,
self.get_mark())
self.scan_line_break()
def scan_block_scalar_indentation(self): # See the specification for details.
chunks = []
max_indent = 0
end_mark = self.get_mark() while self.peek() in' \r\n\x85\u2028\u2029': if self.peek() != ' ':
chunks.append(self.scan_line_break())
end_mark = self.get_mark() else:
self.forward() if self.column > max_indent:
max_indent = self.column return chunks, max_indent, end_mark
def scan_block_scalar_breaks(self, indent): # See the specification for details.
chunks = []
end_mark = self.get_mark() while self.column < indent and self.peek() == ' ':
self.forward() while self.peek() in'\r\n\x85\u2028\u2029':
chunks.append(self.scan_line_break())
end_mark = self.get_mark() while self.column < indent and self.peek() == ' ':
self.forward() return chunks, end_mark
def scan_flow_scalar(self, style): # See the specification for details. # Note that we loose indentation rules for quoted scalars. Quoted # scalars don't need to adhere indentation because " and ' clearly # mark the beginning and the end of them. Therefore we are less # restrictive then the specification requires. We only need to check # that document separators are not included in scalars. if style == '"':
double = True else:
double = False
chunks = []
start_mark = self.get_mark()
quote = self.peek()
self.forward()
chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark)) while self.peek() != quote:
chunks.extend(self.scan_flow_scalar_spaces(double, start_mark))
chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark))
self.forward()
end_mark = self.get_mark() return ScalarToken(''.join(chunks), False, start_mark, end_mark,
style)
def scan_flow_scalar_non_spaces(self, double, start_mark): # See the specification for details.
chunks = [] whileTrue:
length = 0 while self.peek(length) notin'\'\"\\\0 \t\r\n\x85\u2028\u2029':
length += 1 if length:
chunks.append(self.prefix(length))
self.forward(length)
ch = self.peek() ifnot double and ch == '\'' and self.peek(1) == '\'':
chunks.append('\'')
self.forward(2) elif (double and ch == '\'') or (not double and ch in '\"\\'):
chunks.append(ch)
self.forward() elif double and ch == '\\':
self.forward()
ch = self.peek() if ch in self.ESCAPE_REPLACEMENTS:
chunks.append(self.ESCAPE_REPLACEMENTS[ch])
self.forward() elif ch in self.ESCAPE_CODES:
length = self.ESCAPE_CODES[ch]
self.forward() for k in range(length): if self.peek(k) notin'0123456789ABCDEFabcdef': raise ScannerError("while scanning a double-quoted scalar", start_mark, "expected escape sequence of %d hexadecimal numbers, but found %r" %
(length, self.peek(k)), self.get_mark())
code = int(self.prefix(length), 16)
chunks.append(chr(code))
self.forward(length) elif ch in'\r\n\x85\u2028\u2029':
self.scan_line_break()
chunks.extend(self.scan_flow_scalar_breaks(double, start_mark)) else: raise ScannerError("while scanning a double-quoted scalar", start_mark, "found unknown escape character %r" % ch, self.get_mark()) else: return chunks
def scan_flow_scalar_spaces(self, double, start_mark): # See the specification for details.
chunks = []
length = 0 while self.peek(length) in' \t':
length += 1
whitespaces = self.prefix(length)
self.forward(length)
ch = self.peek() if ch == '\0': raise ScannerError("while scanning a quoted scalar", start_mark, "found unexpected end of stream", self.get_mark()) elif ch in'\r\n\x85\u2028\u2029':
line_break = self.scan_line_break()
breaks = self.scan_flow_scalar_breaks(double, start_mark) if line_break != '\n':
chunks.append(line_break) elifnot breaks:
chunks.append(' ')
chunks.extend(breaks) else:
chunks.append(whitespaces) return chunks
def scan_flow_scalar_breaks(self, double, start_mark): # See the specification for details.
chunks = [] whileTrue: # Instead of checking indentation, we check for document # separators.
prefix = self.prefix(3) if (prefix == '---'or prefix == '...') \ and self.peek(3) in'\0 \t\r\n\x85\u2028\u2029': raise ScannerError("while scanning a quoted scalar", start_mark, "found unexpected document separator", self.get_mark()) while self.peek() in' \t':
self.forward() if self.peek() in'\r\n\x85\u2028\u2029':
chunks.append(self.scan_line_break()) else: return chunks
def scan_plain(self): # See the specification for details. # We add an additional restriction for the flow context: # plain scalars in the flow context cannot contain ',' or '?'. # We also keep track of the `allow_simple_key` flag here. # Indentation rules are loosed for the flow context.
chunks = []
start_mark = self.get_mark()
end_mark = start_mark
indent = self.indent+1 # We allow zero indentation for scalars, but then we need to check for # document separators at the beginning of the line. #if indent == 0: # indent = 1
spaces = [] whileTrue:
length = 0 if self.peek() == '#': break whileTrue:
ch = self.peek(length) if ch in'\0 \t\r\n\x85\u2028\u2029' \ or (ch == ':'and
self.peek(length+1) in'\0 \t\r\n\x85\u2028\u2029'
+ (u',[]{}'if self.flow_level else u''))\ or (self.flow_level and ch in',?[]{}'): break
length += 1 if length == 0: break
self.allow_simple_key = False
chunks.extend(spaces)
chunks.append(self.prefix(length))
self.forward(length)
end_mark = self.get_mark()
spaces = self.scan_plain_spaces(indent, start_mark) ifnot spaces or self.peek() == '#' \ or (not self.flow_level and self.column < indent): break return ScalarToken(''.join(chunks), True, start_mark, end_mark)
def scan_plain_spaces(self, indent, start_mark): # See the specification for details. # The specification is really confusing about tabs in plain scalars. # We just forbid them completely. Do not use tabs in YAML!
chunks = []
length = 0 while self.peek(length) in' ':
length += 1
whitespaces = self.prefix(length)
self.forward(length)
ch = self.peek() if ch in'\r\n\x85\u2028\u2029':
line_break = self.scan_line_break()
self.allow_simple_key = True
prefix = self.prefix(3) if (prefix == '---'or prefix == '...') \ and self.peek(3) in'\0 \t\r\n\x85\u2028\u2029': return
breaks = [] while self.peek() in' \r\n\x85\u2028\u2029': if self.peek() == ' ':
self.forward() else:
breaks.append(self.scan_line_break())
prefix = self.prefix(3) if (prefix == '---'or prefix == '...') \ and self.peek(3) in'\0 \t\r\n\x85\u2028\u2029': return if line_break != '\n':
chunks.append(line_break) elifnot breaks:
chunks.append(' ')
chunks.extend(breaks) elif whitespaces:
chunks.append(whitespaces) return chunks
def scan_tag_handle(self, name, start_mark): # See the specification for details. # For some strange reasons, the specification does not allow '_' in # tag handles. I have allowed it anyway.
ch = self.peek() if ch != '!': raise ScannerError("while scanning a %s" % name, start_mark, "expected '!', but found %r" % ch, self.get_mark())
length = 1
ch = self.peek(length) if ch != ' ': while'0' <= ch <= '9'or'A' <= ch <= 'Z'or'a' <= ch <= 'z' \ or ch in'-_':
length += 1
ch = self.peek(length) if ch != '!':
self.forward(length) raise ScannerError("while scanning a %s" % name, start_mark, "expected '!', but found %r" % ch, self.get_mark())
length += 1
value = self.prefix(length)
self.forward(length) return value
def scan_tag_uri(self, name, start_mark): # See the specification for details. # Note: we do not check if URI is well-formed.
chunks = []
length = 0
ch = self.peek(length) while'0' <= ch <= '9'or'A' <= ch <= 'Z'or'a' <= ch <= 'z' \ or ch in'-;/?:@&=+$,_.!~*\'()[]%': if ch == '%':
chunks.append(self.prefix(length))
self.forward(length)
length = 0
chunks.append(self.scan_uri_escapes(name, start_mark)) else:
length += 1
ch = self.peek(length) if length:
chunks.append(self.prefix(length))
self.forward(length)
length = 0 ifnot chunks: raise ScannerError("while parsing a %s" % name, start_mark, "expected URI, but found %r" % ch, self.get_mark()) return''.join(chunks)
def scan_uri_escapes(self, name, start_mark): # See the specification for details.
codes = []
mark = self.get_mark() while self.peek() == '%':
self.forward() for k in range(2): if self.peek(k) notin'0123456789ABCDEFabcdef': raise ScannerError("while scanning a %s" % name, start_mark, "expected URI escape sequence of 2 hexadecimal numbers, but found %r"
% self.peek(k), self.get_mark())
codes.append(int(self.prefix(2), 16))
self.forward(2) try:
value = bytes(codes).decode('utf-8') except UnicodeDecodeError as exc: raise ScannerError("while scanning a %s" % name, start_mark, str(exc), mark) return value
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.