Quelle scanner.py

Sprache: Python

# -*- coding: utf-8 -*-
# Copyright JS Foundation and other contributors, https://js.foundation/
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
#   * Redistributions of source code must retain the above copyright
#     notice, this list of conditions and the following disclaimer.
#   * Redistributions in binary form must reproduce the above copyright
#     notice, this list of conditions and the following disclaimer in the
#     documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

from __future__ import absolute_import, unicode_literals

import re

from .objects import Object
from .compat import xrange, unicode, uchr, uord
from .character import Character, HEX_CONV, OCTAL_CONV
from .messages import Messages
from .token import Token

def hexValue(ch):
    return HEX_CONV[ch]

def octalValue(ch):
    return OCTAL_CONV[ch]

class RegExp(Object):
    def __init__(self, pattern=None, flags=None):
        self.pattern = pattern
        self.flags = flags

class Position(Object):
    def __init__(self, line=None, column=None, offset=None):
        self.line = line
        self.column = column
        self.offset = offset

class SourceLocation(Object):
    def __init__(self, start=None, end=None, source=None):
        self.start = start
        self.end = end
        self.source = source

class Comment(Object):
    def __init__(self, multiLine=None, slice=None, range=None, loc=None):
        self.multiLine = multiLine
        self.slice = slice
        self.range = range
        self.loc = loc

class RawToken(Object):
    def __init__(self, type=None, value=None, pattern=None, flags=None, regex=None, octal=None, cooked=None, head=None, tail=None, lineNumber=None, lineStart=None, start=None, end=None):
        self.type = type
        self.value = value
        self.pattern = pattern
        self.flags = flags
        self.regex = regex
        self.octal = octal
        self.cooked = cooked
        self.head = head
        self.tail = tail
        self.lineNumber = lineNumber
        self.lineStart = lineStart
        self.start = start
        self.end = end

class ScannerState(Object):
    def __init__(self, index=None, lineNumber=None, lineStart=None):
        self.index = index
        self.lineNumber = lineNumber
        self.lineStart = lineStart

class Octal(object):
    def __init__(self, octal, code):
        self.octal = octal
        self.code = code

class Scanner(object):
    def __init__(self, code, handler):
        self.source = unicode(code) + '\x00'
        self.errorHandler = handler
        self.trackComment = False
        self.isModule = False

        self.length = len(code)
        self.index = 0
        self.lineNumber = 1 if self.length > 0 else 0
        self.lineStart = 0
        self.curlyStack = []

    def saveState(self):
        return ScannerState(
            index=self.index,
            lineNumber=self.lineNumber,
            lineStart=self.lineStart
        )

    def restoreState(self, state):
        self.index = state.index
        self.lineNumber = state.lineNumber
        self.lineStart = state.lineStart

    def eof(self):
        return self.index >= self.length

    def throwUnexpectedToken(self, message=Messages.UnexpectedTokenIllegal):
        return self.errorHandler.throwError(self.index, self.lineNumber,
            self.index - self.lineStart + 1, message)

    def tolerateUnexpectedToken(self, message=Messages.UnexpectedTokenIllegal):
        self.errorHandler.tolerateError(self.index, self.lineNumber,
            self.index - self.lineStart + 1, message)

    # https://tc39.github.io/ecma262/#sec-comments

    def skipSingleLineComment(self, offset):
        comments = []

        if self.trackComment:
            start = self.index - offset
            loc = SourceLocation(
                start=Position(
                    line=self.lineNumber,
                    column=self.index - self.lineStart - offset
                ),
                end=Position()
            )

        while not self.eof():
            ch = self.source[self.index]
            self.index += 1
            if Character.isLineTerminator(ch):
                if self.trackComment:
                    loc.end = Position(
                        line=self.lineNumber,
                        column=self.index - self.lineStart - 1
                    )
                    entry = Comment(
                        multiLine=False,
                        slice=[start + offset, self.index - 1],
                        range=[start, self.index - 1],
                        loc=loc
                    )
                    comments.append(entry)

                if ch == '\r' and self.source[self.index] == '\n':
                    self.index += 1

                self.lineNumber += 1
                self.lineStart = self.index
                return comments

        if self.trackComment:
            loc.end = Position(
                line=self.lineNumber,
                column=self.index - self.lineStart
            )
            entry = Comment(
                multiLine=False,
                slice=[start + offset, self.index],
                range=[start, self.index],
                loc=loc
            )
            comments.append(entry)

        return comments

    def skipMultiLineComment(self):
        comments = []

        if self.trackComment:
            comments = []
            start = self.index - 2
            loc = SourceLocation(
                start=Position(
                    line=self.lineNumber,
                    column=self.index - self.lineStart - 2
                ),
                end=Position()
            )

        while not self.eof():
            ch = self.source[self.index]
            if Character.isLineTerminator(ch):
                if ch == '\r' and self.source[self.index + 1] == '\n':
                    self.index += 1

                self.lineNumber += 1
                self.index += 1
                self.lineStart = self.index
            elif ch == '*':
                # Block comment ends with '*/'.
                if self.source[self.index + 1] == '/':
                    self.index += 2
                    if self.trackComment:
                        loc.end = Position(
                            line=self.lineNumber,
                            column=self.index - self.lineStart
                        )
                        entry = Comment(
                            multiLine=True,
                            slice=[start + 2, self.index - 2],
                            range=[start, self.index],
                            loc=loc
                        )
                        comments.append(entry)

                    return comments

                self.index += 1
            else:
                self.index += 1

        # Ran off the end of the file - the whole thing is a comment
        if self.trackComment:
            loc.end = Position(
                line=self.lineNumber,
                column=self.index - self.lineStart
            )
            entry = Comment(
                multiLine=True,
                slice=[start + 2, self.index],
                range=[start, self.index],
                loc=loc
            )
            comments.append(entry)

        self.tolerateUnexpectedToken()
        return comments

    def scanComments(self):
        comments = []

        start = self.index == 0
        while not self.eof():
            ch = self.source[self.index]

            if Character.isWhiteSpace(ch):
                self.index += 1
            elif Character.isLineTerminator(ch):
                self.index += 1
                if ch == '\r' and self.source[self.index] == '\n':
                    self.index += 1

                self.lineNumber += 1
                self.lineStart = self.index
                start = True
            elif ch == '/':  # U+002F is '/'
                ch = self.source[self.index + 1]
                if ch == '/':
                    self.index += 2
                    comment = self.skipSingleLineComment(2)
                    if self.trackComment:
                        comments.extend(comment)

                    start = True
                elif ch == '*':  # U+002A is '*'
                    self.index += 2
                    comment = self.skipMultiLineComment()
                    if self.trackComment:
                        comments.extend(comment)

                else:
                    break

            elif start and ch == '-':  # U+002D is '-'
                # U+003E is '>'
                if self.source[self.index + 1:self.index + 3] == '->':
                    # '-->' is a single-line comment
                    self.index += 3
                    comment = self.skipSingleLineComment(3)
                    if self.trackComment:
                        comments.extend(comment)

                else:
                    break

            elif ch == '<' and not self.isModule:  # U+003C is '<'
                if self.source[self.index + 1:self.index + 4] == '!--':
                    self.index += 4  # `<!--`
                    comment = self.skipSingleLineComment(4)
                    if self.trackComment:
                        comments.extend(comment)

                else:
                    break

            else:
                break

        return comments

    # https://tc39.github.io/ecma262/#sec-future-reserved-words

    def isFutureReservedWord(self, id):
        return id in self.isFutureReservedWord.set
    isFutureReservedWord.set = set((
        'enum',
        'export',
        'import',
        'super',
    ))

    def isStrictModeReservedWord(self, id):
        return id in self.isStrictModeReservedWord.set
    isStrictModeReservedWord.set = set((
        'implements',
        'interface',
        'package',
        'private',
        'protected',
        'public',
        'static',
        'yield',
        'let',
    ))

    def isRestrictedWord(self, id):
        return id in self.isRestrictedWord.set
    isRestrictedWord.set = set((
        'eval', 'arguments',
    ))

    # https://tc39.github.io/ecma262/#sec-keywords

    def isKeyword(self, id):
        return id in self.isKeyword.set
    isKeyword.set = set((
        'if', 'in', 'do',

        'var', 'for', 'new',
        'try', 'let',

        'this', 'else', 'case',
        'void', 'with', 'enum',

        'while', 'break', 'catch',
        'throw', 'const', 'yield',
        'class', 'super',

        'return', 'typeof', 'delete',
        'switch', 'export', 'import',

        'default', 'finally', 'extends',

        'function', 'continue', 'debugger',

        'instanceof',
    ))

    def codePointAt(self, i):
        return uord(self.source[i:i + 2])

    def scanHexEscape(self, prefix):
        length = 4 if prefix == 'u' else 2
        code = 0

        for i in xrange(length):
            if not self.eof() and Character.isHexDigit(self.source[self.index]):
                ch = self.source[self.index]
                self.index += 1
                code = code * 16 + hexValue(ch)
            else:
                return None

        return uchr(code)

    def scanUnicodeCodePointEscape(self):
        ch = self.source[self.index]
        code = 0

        # At least, one hex digit is required.
        if ch == '}':
            self.throwUnexpectedToken()

        while not self.eof():
            ch = self.source[self.index]
            self.index += 1
            if not Character.isHexDigit(ch):
                break

            code = code * 16 + hexValue(ch)

        if code > 0x10FFFF or ch != '}':
            self.throwUnexpectedToken()

        return Character.fromCodePoint(code)

    def getIdentifier(self):
        start = self.index
        self.index += 1
        while not self.eof():
            ch = self.source[self.index]
            if ch == '\\':
                # Blackslash (U+005C) marks Unicode escape sequence.
                self.index = start
                return self.getComplexIdentifier()
            else:
                cp = ord(ch)
                if cp >= 0xD800 and cp < 0xDFFF:
                    # Need to handle surrogate pairs.
                    self.index = start
                    return self.getComplexIdentifier()

            if Character.isIdentifierPart(ch):
                self.index += 1
            else:
                break

        return self.source[start:self.index]

    def getComplexIdentifier(self):
        cp = self.codePointAt(self.index)
        id = Character.fromCodePoint(cp)
        self.index += len(id)

        # '\u' (U+005C, U+0075) denotes an escaped character.
        if cp == 0x5C:
            if self.source[self.index] != 'u':
                self.throwUnexpectedToken()

            self.index += 1
            if self.source[self.index] == '{':
                self.index += 1
                ch = self.scanUnicodeCodePointEscape()
            else:
                ch = self.scanHexEscape('u')
                if not ch or ch == '\\' or not Character.isIdentifierStart(ch[0]):
                    self.throwUnexpectedToken()

            id = ch

        while not self.eof():
            cp = self.codePointAt(self.index)
            ch = Character.fromCodePoint(cp)
            if not Character.isIdentifierPart(ch):
                break

            id += ch
            self.index += len(ch)

            # '\u' (U+005C, U+0075) denotes an escaped character.
            if cp == 0x5C:
                id = id[:-1]
                if self.source[self.index] != 'u':
                    self.throwUnexpectedToken()

                self.index += 1
                if self.source[self.index] == '{':
                    self.index += 1
                    ch = self.scanUnicodeCodePointEscape()
                else:
                    ch = self.scanHexEscape('u')
                    if not ch or ch == '\\' or not Character.isIdentifierPart(ch[0]):
                        self.throwUnexpectedToken()

                id += ch

        return id

    def octalToDecimal(self, ch):
        # \0 is not octal escape sequence
        octal = ch != '0'
        code = octalValue(ch)

        if not self.eof() and Character.isOctalDigit(self.source[self.index]):
            octal = True
            code = code * 8 + octalValue(self.source[self.index])
            self.index += 1

            # 3 digits are only allowed when string starts
            # with 0, 1, 2, 3
            if ch in '0123' and not self.eof() and Character.isOctalDigit(self.source[self.index]):
                code = code * 8 + octalValue(self.source[self.index])
                self.index += 1

        return Octal(octal, code)

    # https://tc39.github.io/ecma262/#sec-names-and-keywords

    def scanIdentifier(self):
        start = self.index

        # Backslash (U+005C) starts an escaped character.
        id = self.getComplexIdentifier() if self.source[start] == '\\' else self.getIdentifier()

        # There is no keyword or literal with only one character.
        # Thus, it must be an identifier.
        if len(id) == 1:
            type = Token.Identifier
        elif self.isKeyword(id):
            type = Token.Keyword
        elif id == 'null':
            type = Token.NullLiteral
        elif id == 'true' or id == 'false':
            type = Token.BooleanLiteral
        else:
            type = Token.Identifier

        if type is not Token.Identifier and start + len(id) != self.index:
            restore = self.index
            self.index = start
            self.tolerateUnexpectedToken(Messages.InvalidEscapedReservedWord)
            self.index = restore

        return RawToken(
            type=type,
            value=id,
            lineNumber=self.lineNumber,
            lineStart=self.lineStart,
            start=start,
            end=self.index
        )

    # https://tc39.github.io/ecma262/#sec-punctuators

    def scanPunctuator(self):
        start = self.index

        # Check for most common single-character punctuators.
        str = self.source[self.index]
        if str in (
            '(',
            '{',
        ):
            if str == '{':
                self.curlyStack.append('{')

            self.index += 1

        elif str == '.':
            self.index += 1
            if self.source[self.index] == '.' and self.source[self.index + 1] == '.':
                # Spread operator: ...
                self.index += 2
                str = '...'

        elif str == '}':
            self.index += 1
            if self.curlyStack:
                self.curlyStack.pop()

        elif str in (
            ')',
            ';',
            ',',
            '[',
            ']',
            ':',
            '?',
            '~',
        ):
            self.index += 1

        else:
            # 4-character punctuator.
            str = self.source[self.index:self.index + 4]
            if str == '>>>=':
                self.index += 4
            else:

                # 3-character punctuators.
                str = str[:3]
                if str in (
                    '===', '!==', '>>>',
                    '<<=', '>>=', '**='
                ):
                    self.index += 3
                else:

                    # 2-character punctuators.
                    str = str[:2]
                    if str in (
                        '&&', '||', '==', '!=',
                        '+=', '-=', '*=', '/=',
                        '++', '--', '<<', '>>',
                        '&=', '|=', '^=', '%=',
                        '<=', '>=', '=>', '**',
                    ):
                        self.index += 2
                    else:

                        # 1-character punctuators.
                        str = self.source[self.index]
                        if str in '<>=!+-*%&|^/':
                            self.index += 1

        if self.index == start:
            self.throwUnexpectedToken()

        return RawToken(
            type=Token.Punctuator,
            value=str,
            lineNumber=self.lineNumber,
            lineStart=self.lineStart,
            start=start,
            end=self.index
        )

    # https://tc39.github.io/ecma262/#sec-literals-numeric-literals

    def scanHexLiteral(self, start):
        num = ''

        while not self.eof():
            if not Character.isHexDigit(self.source[self.index]):
                break

            num += self.source[self.index]
            self.index += 1

        if len(num) == 0:
            self.throwUnexpectedToken()

        if Character.isIdentifierStart(self.source[self.index]):
            self.throwUnexpectedToken()

        return RawToken(
            type=Token.NumericLiteral,
            value=int(num, 16),
            lineNumber=self.lineNumber,
            lineStart=self.lineStart,
            start=start,
            end=self.index
        )

    def scanBinaryLiteral(self, start):
        num = ''

        while not self.eof():
            ch = self.source[self.index]
            if ch != '0' and ch != '1':
                break

            num += self.source[self.index]
            self.index += 1

        if len(num) == 0:
            # only 0b or 0B
            self.throwUnexpectedToken()

        if not self.eof():
            ch = self.source[self.index]
            if Character.isIdentifierStart(ch) or Character.isDecimalDigit(ch):
                self.throwUnexpectedToken()

        return RawToken(
            type=Token.NumericLiteral,
            value=int(num, 2),
            lineNumber=self.lineNumber,
            lineStart=self.lineStart,
            start=start,
            end=self.index
        )

    def scanOctalLiteral(self, prefix, start):
        num = ''
        octal = False

        if Character.isOctalDigit(prefix[0]):
            octal = True
            num = '0' + self.source[self.index]
        self.index += 1

        while not self.eof():
            if not Character.isOctalDigit(self.source[self.index]):
                break

            num += self.source[self.index]
            self.index += 1

        if not octal and len(num) == 0:
            # only 0o or 0O
            self.throwUnexpectedToken()

        if Character.isIdentifierStart(self.source[self.index]) or Character.isDecimalDigit(self.source[self.index]):
            self.throwUnexpectedToken()

        return RawToken(
            type=Token.NumericLiteral,
            value=int(num, 8),
            octal=octal,
            lineNumber=self.lineNumber,
            lineStart=self.lineStart,
            start=start,
            end=self.index
        )

    def isImplicitOctalLiteral(self):
        # Implicit octal, unless there is a non-octal digit.
        # (Annex B.1.1 on Numeric Literals)
        for i in xrange(self.index + 1, self.length):
            ch = self.source[i]
            if ch in '89':
                return False
            if not Character.isOctalDigit(ch):
                return True
        return True

    def scanNumericLiteral(self):
        start = self.index
        ch = self.source[start]
        assert Character.isDecimalDigit(ch) or ch == '.', 'Numeric literal must start with a decimal digit or a decimal point'

        num = ''
        if ch != '.':
            num = self.source[self.index]
            self.index += 1
            ch = self.source[self.index]

            # Hex number starts with '0x'.
            # Octal number starts with '0'.
            # Octal number in ES6 starts with '0o'.
            # Binary number in ES6 starts with '0b'.
            if num == '0':
                if ch in ('x', 'X'):
                    self.index += 1
                    return self.scanHexLiteral(start)

                if ch in ('b', 'B'):
                    self.index += 1
                    return self.scanBinaryLiteral(start)

                if ch in ('o', 'O'):
                    return self.scanOctalLiteral(ch, start)

                if ch and Character.isOctalDigit(ch):
                    if self.isImplicitOctalLiteral():
                        return self.scanOctalLiteral(ch, start)

            while Character.isDecimalDigit(self.source[self.index]):
                num += self.source[self.index]
                self.index += 1

            ch = self.source[self.index]

        if ch == '.':
            num += self.source[self.index]
            self.index += 1
            while Character.isDecimalDigit(self.source[self.index]):
                num += self.source[self.index]
                self.index += 1

            ch = self.source[self.index]

        if ch in ('e', 'E'):
            num += self.source[self.index]
            self.index += 1

            ch = self.source[self.index]
            if ch in ('+', '-'):
                num += self.source[self.index]
                self.index += 1

            if Character.isDecimalDigit(self.source[self.index]):
                while Character.isDecimalDigit(self.source[self.index]):
                    num += self.source[self.index]
                    self.index += 1

            else:
                self.throwUnexpectedToken()

        if Character.isIdentifierStart(self.source[self.index]):
            self.throwUnexpectedToken()

        value = float(num)
        return RawToken(
            type=Token.NumericLiteral,
            value=int(value) if value.is_integer() else value,
            lineNumber=self.lineNumber,
            lineStart=self.lineStart,
            start=start,
            end=self.index
        )

    # https://tc39.github.io/ecma262/#sec-literals-string-literals

    def scanStringLiteral(self):
        start = self.index
        quote = self.source[start]
        assert quote in ('\'', '"'), 'String literal must starts with a quote'

        self.index += 1
        octal = False
        str = ''

        while not self.eof():
            ch = self.source[self.index]
            self.index += 1

            if ch == quote:
                quote = ''
                break
            elif ch == '\\':
                ch = self.source[self.index]
                self.index += 1
                if not ch or not Character.isLineTerminator(ch):
                    if ch == 'u':
                        if self.source[self.index] == '{':
                            self.index += 1
                            str += self.scanUnicodeCodePointEscape()
                        else:
                            unescapedChar = self.scanHexEscape(ch)
                            if not unescapedChar:
                                self.throwUnexpectedToken()

                            str += unescapedChar

                    elif ch == 'x':
                        unescaped = self.scanHexEscape(ch)
                        if not unescaped:
                            self.throwUnexpectedToken(Messages.InvalidHexEscapeSequence)

                        str += unescaped
                    elif ch == 'n':
                        str += '\n'
                    elif ch == 'r':
                        str += '\r'
                    elif ch == 't':
                        str += '\t'
                    elif ch == 'b':
                        str += '\b'
                    elif ch == 'f':
                        str += '\f'
                    elif ch == 'v':
                        str += '\x0B'
                    elif ch in (
                        '8',
                        '9',
                    ):
                        str += ch
                        self.tolerateUnexpectedToken()

                    else:
                        if ch and Character.isOctalDigit(ch):
                            octToDec = self.octalToDecimal(ch)

                            octal = octToDec.octal or octal
                            str += uchr(octToDec.code)
                        else:
                            str += ch

                else:
                    self.lineNumber += 1
                    if ch == '\r' and self.source[self.index] == '\n':
                        self.index += 1

                    self.lineStart = self.index

            elif Character.isLineTerminator(ch):
                break
            else:
                str += ch

        if quote != '':
            self.index = start
            self.throwUnexpectedToken()

        return RawToken(
            type=Token.StringLiteral,
            value=str,
            octal=octal,
            lineNumber=self.lineNumber,
            lineStart=self.lineStart,
            start=start,
            end=self.index
        )

    # https://tc39.github.io/ecma262/#sec-template-literal-lexical-components

    def scanTemplate(self):
        cooked = ''
        terminated = False
        start = self.index

        head = self.source[start] == '`'
        tail = False
        rawOffset = 2

        self.index += 1

        while not self.eof():
            ch = self.source[self.index]
            self.index += 1
            if ch == '`':
                rawOffset = 1
                tail = True
                terminated = True
                break
            elif ch == '$':
                if self.source[self.index] == '{':
                    self.curlyStack.append('${')
                    self.index += 1
                    terminated = True
                    break

                cooked += ch
            elif ch == '\\':
                ch = self.source[self.index]
                self.index += 1
                if not Character.isLineTerminator(ch):
                    if ch == 'n':
                        cooked += '\n'
                    elif ch == 'r':
                        cooked += '\r'
                    elif ch == 't':
                        cooked += '\t'
                    elif ch == 'u':
                        if self.source[self.index] == '{':
                            self.index += 1
                            cooked += self.scanUnicodeCodePointEscape()
                        else:
                            restore = self.index
                            unescapedChar = self.scanHexEscape(ch)
                            if unescapedChar:
                                cooked += unescapedChar
                            else:
                                self.index = restore
                                cooked += ch

                    elif ch == 'x':
                        unescaped = self.scanHexEscape(ch)
                        if not unescaped:
                            self.throwUnexpectedToken(Messages.InvalidHexEscapeSequence)

                        cooked += unescaped
                    elif ch == 'b':
                        cooked += '\b'
                    elif ch == 'f':
                        cooked += '\f'
                    elif ch == 'v':
                        cooked += '\v'

                    else:
                        if ch == '0':
                            if Character.isDecimalDigit(self.source[self.index]):
                                # Illegal: \01 \02 and so on
                                self.throwUnexpectedToken(Messages.TemplateOctalLiteral)

                            cooked += '\0'
                        elif Character.isOctalDigit(ch):
                            # Illegal: \1 \2
                            self.throwUnexpectedToken(Messages.TemplateOctalLiteral)
                        else:
                            cooked += ch

                else:
                    self.lineNumber += 1
                    if ch == '\r' and self.source[self.index] == '\n':
                        self.index += 1

                    self.lineStart = self.index

            elif Character.isLineTerminator(ch):
                self.lineNumber += 1
                if ch == '\r' and self.source[self.index] == '\n':
                    self.index += 1

                self.lineStart = self.index
                cooked += '\n'
            else:
                cooked += ch

        if not terminated:
            self.throwUnexpectedToken()

        if not head:
            if self.curlyStack:
                self.curlyStack.pop()

        return RawToken(
            type=Token.Template,
            value=self.source[start + 1:self.index - rawOffset],
            cooked=cooked,
            head=head,
            tail=tail,
            lineNumber=self.lineNumber,
            lineStart=self.lineStart,
            start=start,
            end=self.index
        )

    # https://tc39.github.io/ecma262/#sec-literals-regular-expression-literals

    def testRegExp(self, pattern, flags):
        # The BMP character to use as a replacement for astral symbols when
        # translating an ES6 "u"-flagged pattern to an ES5-compatible
        # approximation.
        # Note: replacing with '\uFFFF' enables false positives in unlikely
        # scenarios. For example, `[\u{1044f}-\u{10440}]` is an invalid
        # pattern that would not be detected by this substitution.
        astralSubstitute = '\uFFFF'

        # Replace every Unicode escape sequence with the equivalent
        # BMP character or a constant ASCII code point in the case of
        # astral symbols. (See the above note on `astralSubstitute`
        # for more information.)
        def astralSub(m):
            codePoint = int(m.group(1) or m.group(2), 16)
            if codePoint > 0x10FFFF:
                self.tolerateUnexpectedToken(Messages.InvalidRegExp)
            elif codePoint <= 0xFFFF:
                return uchr(codePoint)
            return astralSubstitute
        pattern = re.sub(r'\\u\{([0-9a-fA-F]+)\}|\\u([a-fA-F0-9]{4})', astralSub, pattern)

        # Replace each paired surrogate with a single ASCII symbol to
        # avoid throwing on regular expressions that are only valid in
        # combination with the "u" flag.
        pattern = re.sub(r'[\uD800-\uDBFF][\uDC00-\uDFFF]', astralSubstitute, pattern)

        # Return a regular expression object for this pattern-flag pair, or
        # `null` in case the current environment doesn't support the flags it
        # uses.
        pyflags = 0 | re.M if 'm' in flags else 0 | re.I if 'i' in flags else 0
        try:
            return re.compile(pattern, pyflags)
        except Exception:
            self.tolerateUnexpectedToken(Messages.InvalidRegExp)

    def scanRegExpBody(self):
        ch = self.source[self.index]
        assert ch == '/', 'Regular expression literal must start with a slash'

        str = self.source[self.index]
        self.index += 1
        classMarker = False
        terminated = False

        while not self.eof():
            ch = self.source[self.index]
            self.index += 1
            str += ch
            if ch == '\\':
                ch = self.source[self.index]
                self.index += 1
                # https://tc39.github.io/ecma262/#sec-literals-regular-expression-literals
                if Character.isLineTerminator(ch):
                    self.throwUnexpectedToken(Messages.UnterminatedRegExp)

                str += ch
            elif Character.isLineTerminator(ch):
                self.throwUnexpectedToken(Messages.UnterminatedRegExp)
            elif classMarker:
                if ch == ']':
                    classMarker = False

            else:
                if ch == '/':
                    terminated = True
                    break
                elif ch == '[':
                    classMarker = True

        if not terminated:
            self.throwUnexpectedToken(Messages.UnterminatedRegExp)

        # Exclude leading and trailing slash.
        return str[1:-1]

    def scanRegExpFlags(self):
        str = ''
        flags = ''
        while not self.eof():
            ch = self.source[self.index]
            if not Character.isIdentifierPart(ch):
                break

            self.index += 1
            if ch == '\\' and not self.eof():
                ch = self.source[self.index]
                if ch == 'u':
                    self.index += 1
                    restore = self.index
                    char = self.scanHexEscape('u')
                    if char:
                        flags += char
                        str += '\\u'
                        while restore < self.index:
                            str += self.source[restore]
                            restore += 1

                    else:
                        self.index = restore
                        flags += 'u'
                        str += '\\u'

                    self.tolerateUnexpectedToken()
                else:
                    str += '\\'
                    self.tolerateUnexpectedToken()

            else:
                flags += ch
                str += ch

        return flags

    def scanRegExp(self):
        start = self.index

        pattern = self.scanRegExpBody()
        flags = self.scanRegExpFlags()
        value = self.testRegExp(pattern, flags)

        return RawToken(
            type=Token.RegularExpression,
            value='',
            pattern=pattern,
            flags=flags,
            regex=value,
            lineNumber=self.lineNumber,
            lineStart=self.lineStart,
            start=start,
            end=self.index
        )

    def lex(self):
        if self.eof():
            return RawToken(
                type=Token.EOF,
                value='',
                lineNumber=self.lineNumber,
                lineStart=self.lineStart,
                start=self.index,
                end=self.index
            )

        ch = self.source[self.index]

        if Character.isIdentifierStart(ch):
            return self.scanIdentifier()

        # Very common: ( and ) and ;
        if ch in ('(', ')', ';'):
            return self.scanPunctuator()

        # String literal starts with single quote (U+0027) or double quote (U+0022).
        if ch in ('\'', '"'):
            return self.scanStringLiteral()

        # Dot (.) U+002E can also start a floating-point number, hence the need
        # to check the next character.
        if ch == '.':
            if Character.isDecimalDigit(self.source[self.index + 1]):
                return self.scanNumericLiteral()

            return self.scanPunctuator()

        if Character.isDecimalDigit(ch):
            return self.scanNumericLiteral()

        # Template literals start with ` (U+0060) for template head
        # or } (U+007D) for template middle or template tail.
        if ch == '`' or (ch == '}' and self.curlyStack and self.curlyStack[-1] == '${'):
            return self.scanTemplate()

        # Possible identifier start in a surrogate pair.
        cp = ord(ch)
        if cp >= 0xD800 and cp < 0xDFFF:
            cp = self.codePointAt(self.index)
            ch = Character.fromCodePoint(cp)
            if Character.isIdentifierStart(ch):
                return self.scanIdentifier()

        return self.scanPunctuator()

Messung V0.5 in Prozent

¤ Dauer der Verarbeitung: 0.17 Sekunden (vorverarbeitet am 2026-04-26) ¤

Wurzel

Suchen

Beweissystem der NASA

Beweissystem Isabelle

NIST Cobol Testsuite

Cephes Mathematical Library

Wiener Entwicklungsmethode

Haftungshinweis

Die Informationen auf dieser Webseite wurden nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit, noch Qualität der bereit gestellten Informationen zugesichert.

Bemerkung:

Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.