Quelle TokenStream.cpp Sprache: C

/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
* vim: set ts=8 sts=2 et sw=2 tw=80:
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */

// JS lexical scanner.

#include "frontend/TokenStream.h"

#include "mozilla/ArrayUtils.h"
#include "mozilla/Attributes.h"
#include "mozilla/Likely.h"
#include "mozilla/Maybe.h"
#include "mozilla/MemoryChecking.h"
#include "mozilla/ScopeExit.h"
#include "mozilla/Span.h"
#include "mozilla/TemplateLib.h"
#include "mozilla/TextUtils.h"
#include "mozilla/Utf8.h"

#include <algorithm>
#include <iterator>
#include <limits>
#include <stdarg.h>
#include <stdint.h>
#include <stdio.h>
#include <type_traits>
#include <utility>

#include "jsnum.h"

#include "frontend/FrontendContext.h"
#include "frontend/Parser.h"
#include "frontend/ParserAtom.h"
#include "frontend/ReservedWords.h"
#include "js/CharacterEncoding.h"  // JS::ConstUTF8CharsZ
#include "js/ColumnNumber.h"  // JS::LimitedColumnNumberOneOrigin, JS::ColumnNumberOneOrigin, JS::TaggedColumnNumberOneOrigin
#include "js/ErrorReport.h"   // JSErrorBase
#include "js/friend/ErrorMessages.h"  // js::GetErrorMessage, JSMSG_*
#include "js/Printf.h"                // JS_smprintf
#include "js/RegExpFlags.h"           // JS::RegExpFlags
#include "js/UniquePtr.h"
#include "util/Text.h"
#include "util/Unicode.h"
#include "vm/FrameIter.h"  // js::{,NonBuiltin}FrameIter
#include "vm/JSContext.h"
#include "vm/Realm.h"

using mozilla::AsciiAlphanumericToNumber;
using mozilla::AssertedCast;
using mozilla::DecodeOneUtf8CodePoint;
using mozilla::IsAscii;
using mozilla::IsAsciiAlpha;
using mozilla::IsAsciiDigit;
using mozilla::IsAsciiHexDigit;
using mozilla::IsTrailingUnit;
using mozilla::MakeScopeExit;
using mozilla::Maybe;
using mozilla::PointerRangeSize;
using mozilla::Span;
using mozilla::Utf8Unit;

using JS::ReadOnlyCompileOptions;
using JS::RegExpFlag;
using JS::RegExpFlags;

struct ReservedWordInfo {
  const char* chars;  // C string with reserved word text
  js::frontend::TokenKind tokentype;
};

static const ReservedWordInfo reservedWords[] = {
#define RESERVED_WORD_INFO(word, name, type) {#word, js::frontend::type},
    FOR_EACH_JAVASCRIPT_RESERVED_WORD(RESERVED_WORD_INFO)
#undef RESERVED_WORD_INFO
};

enum class ReservedWordsIndex : size_t {
#define ENTRY_(_1, NAME, _3) NAME,
  FOR_EACH_JAVASCRIPT_RESERVED_WORD(ENTRY_)
#undef ENTRY_
};

// Returns a ReservedWordInfo for the specified characters, or nullptr if the
// string is not a reserved word.
template <typename CharT>
static const ReservedWordInfo* FindReservedWord(const CharT* s, size_t length) {
  MOZ_ASSERT(length != 0);

  size_t i;
  const ReservedWordInfo* rw;
  const char* chars;

#define JSRW_LENGTH() length
#define JSRW_AT(column) s[column]
#define JSRW_GOT_MATCH(index) \
  i = (index);                \
  goto got_match;
#define JSRW_TEST_GUESS(index) \
  i = (index);                 \
  goto test_guess;
#define JSRW_NO_MATCH() goto no_match;
#include "frontend/ReservedWordsGenerated.h"
#undef JSRW_NO_MATCH
#undef JSRW_TEST_GUESS
#undef JSRW_GOT_MATCH
#undef JSRW_AT
#undef JSRW_LENGTH

got_match:
  return &reservedWords[i];

test_guess:
  rw = &reservedWords[i];
  chars = rw->chars;
  do {
    if (*s++ != static_cast<unsigned char>(*chars++)) {
      goto no_match;
    }
  } while (--length != 0);
  return rw;

no_match:
  return nullptr;
}

template <>
MOZ_ALWAYS_INLINE const ReservedWordInfo* FindReservedWord<Utf8Unit>(
    const Utf8Unit* units, size_t length) {
  return FindReservedWord(Utf8AsUnsignedChars(units), length);
}

static const ReservedWordInfo* FindReservedWord(
    const js::frontend::TaggedParserAtomIndex atom) {
  switch (atom.rawData()) {
#define CASE_(_1, NAME, _3)                                           \
  case js::frontend::TaggedParserAtomIndex::WellKnownRawData::NAME(): \
    return &reservedWords[size_t(ReservedWordsIndex::NAME)];
    FOR_EACH_JAVASCRIPT_RESERVED_WORD(CASE_)
#undef CASE_
  }

  return nullptr;
}

template <typename CharT>
static constexpr bool IsAsciiBinary(CharT c) {
  using UnsignedCharT = std::make_unsigned_t<CharT>;
  auto uc = static_cast<UnsignedCharT>(c);
  return uc == '0' || uc == '1';
}

template <typename CharT>
static constexpr bool IsAsciiOctal(CharT c) {
  using UnsignedCharT = std::make_unsigned_t<CharT>;
  auto uc = static_cast<UnsignedCharT>(c);
  return '0' <= uc && uc <= '7';
}

template <typename CharT>
static constexpr uint8_t AsciiOctalToNumber(CharT c) {
  using UnsignedCharT = std::make_unsigned_t<CharT>;
  auto uc = static_cast<UnsignedCharT>(c);
  return uc - '0';
}

namespace js {

namespace frontend {

bool IsKeyword(TaggedParserAtomIndex atom) {
  if (const ReservedWordInfo* rw = FindReservedWord(atom)) {
    return TokenKindIsKeyword(rw->tokentype);
  }

  return false;
}

TokenKind ReservedWordTokenKind(TaggedParserAtomIndex name) {
  if (const ReservedWordInfo* rw = FindReservedWord(name)) {
    return rw->tokentype;
  }

  return TokenKind::Limit;
}

const char* ReservedWordToCharZ(TaggedParserAtomIndex name) {
  if (const ReservedWordInfo* rw = FindReservedWord(name)) {
    return ReservedWordToCharZ(rw->tokentype);
  }

  return nullptr;
}

const char* ReservedWordToCharZ(TokenKind tt) {
  MOZ_ASSERT(tt != TokenKind::Name);
  switch (tt) {
#define EMIT_CASE(word, name, type) \
  case type:                        \
    return #word;
    FOR_EACH_JAVASCRIPT_RESERVED_WORD(EMIT_CASE)
#undef EMIT_CASE
    default:
      MOZ_ASSERT_UNREACHABLE("Not a reserved word PropertyName.");
  }
  return nullptr;
}

TaggedParserAtomIndex TokenStreamAnyChars::reservedWordToPropertyName(
    TokenKind tt) const {
  MOZ_ASSERT(tt != TokenKind::Name);
  switch (tt) {
#define EMIT_CASE(word, name, type) \
  case type:                        \
    return TaggedParserAtomIndex::WellKnown::name();
    FOR_EACH_JAVASCRIPT_RESERVED_WORD(EMIT_CASE)
#undef EMIT_CASE
    default:
      MOZ_ASSERT_UNREACHABLE("Not a reserved word TokenKind.");
  }
  return TaggedParserAtomIndex::null();
}

SourceCoords::SourceCoords(FrontendContext* fc, uint32_t initialLineNumber,
                           uint32_t initialOffset)
    : lineStartOffsets_(fc), initialLineNum_(initialLineNumber), lastIndex_(0) {
  // This is actually necessary!  Removing it causes compile errors on
  // GCC and clang.  You could try declaring this:
  //
  //   const uint32_t SourceCoords::MAX_PTR;
  //
  // which fixes the GCC/clang error, but causes bustage on Windows.  Sigh.
  //
  uint32_t maxPtr = MAX_PTR;

  // The first line begins at buffer offset |initialOffset|.  MAX_PTR is the
  // sentinel.  The appends cannot fail because |lineStartOffsets_| has
  // statically-allocated elements.
  MOZ_ASSERT(lineStartOffsets_.capacity() >= 2);
  MOZ_ALWAYS_TRUE(lineStartOffsets_.reserve(2));
  lineStartOffsets_.infallibleAppend(initialOffset);
  lineStartOffsets_.infallibleAppend(maxPtr);
}

MOZ_ALWAYS_INLINE bool SourceCoords::add(uint32_t lineNum,
                                         uint32_t lineStartOffset) {
  uint32_t index = indexFromLineNumber(lineNum);
  uint32_t sentinelIndex = lineStartOffsets_.length() - 1;

  MOZ_ASSERT(lineStartOffsets_[0] <= lineStartOffset);
  MOZ_ASSERT(lineStartOffsets_[sentinelIndex] == MAX_PTR);

  if (index == sentinelIndex) {
    // We haven't seen this newline before.  Update lineStartOffsets_
    // only if lineStartOffsets_.append succeeds, to keep sentinel.
    // Otherwise return false to tell TokenStream about OOM.
    uint32_t maxPtr = MAX_PTR;
    if (!lineStartOffsets_.append(maxPtr)) {
      static_assert(std::is_same_v<decltype(lineStartOffsets_.allocPolicy()),
                                   TempAllocPolicy&>,
                    "this function's caller depends on it reporting an "
                    "error on failure, as TempAllocPolicy ensures");
      return false;
    }

    lineStartOffsets_[index] = lineStartOffset;
  } else {
    // We have seen this newline before (and ungot it).  Do nothing (other
    // than checking it hasn't mysteriously changed).
    // This path can be executed after hitting OOM, so check index.
    MOZ_ASSERT_IF(index < sentinelIndex,
                  lineStartOffsets_[index] == lineStartOffset);
  }
  return true;
}

MOZ_ALWAYS_INLINE bool SourceCoords::fill(const SourceCoords& other) {
  MOZ_ASSERT(lineStartOffsets_[0] == other.lineStartOffsets_[0]);
  MOZ_ASSERT(lineStartOffsets_.back() == MAX_PTR);
  MOZ_ASSERT(other.lineStartOffsets_.back() == MAX_PTR);

  if (lineStartOffsets_.length() >= other.lineStartOffsets_.length()) {
    return true;
  }

  uint32_t sentinelIndex = lineStartOffsets_.length() - 1;
  lineStartOffsets_[sentinelIndex] = other.lineStartOffsets_[sentinelIndex];

  for (size_t i = sentinelIndex + 1; i < other.lineStartOffsets_.length();
       i++) {
    if (!lineStartOffsets_.append(other.lineStartOffsets_[i])) {
      return false;
    }
  }
  return true;
}

MOZ_ALWAYS_INLINE uint32_t
SourceCoords::indexFromOffset(uint32_t offset) const {
  uint32_t iMin, iMax, iMid;

  if (lineStartOffsets_[lastIndex_] <= offset) {
    // If we reach here, offset is on a line the same as or higher than
    // last time.  Check first for the +0, +1, +2 cases, because they
    // typically cover 85--98% of cases.
    if (offset < lineStartOffsets_[lastIndex_ + 1]) {
      return lastIndex_;  // index is same as last time
    }

    // If we reach here, there must be at least one more entry (plus the
    // sentinel).  Try it.
    lastIndex_++;
    if (offset < lineStartOffsets_[lastIndex_ + 1]) {
      return lastIndex_;  // index is one higher than last time
    }

    // The same logic applies here.
    lastIndex_++;
    if (offset < lineStartOffsets_[lastIndex_ + 1]) {
      return lastIndex_;  // index is two higher than last time
    }

    // No luck.  Oh well, we have a better-than-default starting point for
    // the binary search.
    iMin = lastIndex_ + 1;
    MOZ_ASSERT(iMin <
               lineStartOffsets_.length() - 1);  // -1 due to the sentinel

  } else {
    iMin = 0;
  }

  // This is a binary search with deferred detection of equality, which was
  // marginally faster in this case than a standard binary search.
  // The -2 is because |lineStartOffsets_.length() - 1| is the sentinel, and we
  // want one before that.
  iMax = lineStartOffsets_.length() - 2;
  while (iMax > iMin) {
    iMid = iMin + (iMax - iMin) / 2;
    if (offset >= lineStartOffsets_[iMid + 1]) {
      iMin = iMid + 1;  // offset is above lineStartOffsets_[iMid]
    } else {
      iMax = iMid;  // offset is below or within lineStartOffsets_[iMid]
    }
  }

  MOZ_ASSERT(iMax == iMin);
  MOZ_ASSERT(lineStartOffsets_[iMin] <= offset);
  MOZ_ASSERT(offset < lineStartOffsets_[iMin + 1]);

  lastIndex_ = iMin;
  return iMin;
}

SourceCoords::LineToken SourceCoords::lineToken(uint32_t offset) const {
  return LineToken(indexFromOffset(offset), offset);
}

TokenStreamAnyChars::TokenStreamAnyChars(FrontendContext* fc,
                                         const ReadOnlyCompileOptions& options,
                                         StrictModeGetter* smg)
    : fc(fc),
      options_(options),
      strictModeGetter_(smg),
      filename_(options.filename()),
      longLineColumnInfo_(fc),
      srcCoords(fc, options.lineno, options.scriptSourceOffset),
      lineno(options.lineno),
      mutedErrors(options.mutedErrors()) {
  // |isExprEnding| was initially zeroed: overwrite the true entries here.
  isExprEnding[size_t(TokenKind::Comma)] = true;
  isExprEnding[size_t(TokenKind::Semi)] = true;
  isExprEnding[size_t(TokenKind::Colon)] = true;
  isExprEnding[size_t(TokenKind::RightParen)] = true;
  isExprEnding[size_t(TokenKind::RightBracket)] = true;
  isExprEnding[size_t(TokenKind::RightCurly)] = true;
}

template <typename Unit>
TokenStreamCharsBase<Unit>::TokenStreamCharsBase(FrontendContext* fc,
                                                 ParserAtomsTable* parserAtoms,
                                                 const Unit* units,
                                                 size_t length,
                                                 size_t startOffset)
    : TokenStreamCharsShared(fc, parserAtoms),
      sourceUnits(units, length, startOffset) {}

bool FillCharBufferFromSourceNormalizingAsciiLineBreaks(CharBuffer& charBuffer,
                                                        const char16_t* cur,
                                                        const char16_t* end) {
  MOZ_ASSERT(charBuffer.length() == 0);

  while (cur < end) {
    char16_t ch = *cur++;
    if (ch == '\r') {
      ch = '\n';
      if (cur < end && *cur == '\n') {
        cur++;
      }
    }

    if (!charBuffer.append(ch)) {
      return false;
    }
  }

  MOZ_ASSERT(cur == end);
  return true;
}

bool FillCharBufferFromSourceNormalizingAsciiLineBreaks(CharBuffer& charBuffer,
                                                        const Utf8Unit* cur,
                                                        const Utf8Unit* end) {
  MOZ_ASSERT(charBuffer.length() == 0);

  while (cur < end) {
    Utf8Unit unit = *cur++;
    if (MOZ_LIKELY(IsAscii(unit))) {
      char16_t ch = unit.toUint8();
      if (ch == '\r') {
        ch = '\n';
        if (cur < end && *cur == Utf8Unit('\n')) {
          cur++;
        }
      }

      if (!charBuffer.append(ch)) {
        return false;
      }

      continue;
    }

    Maybe<char32_t> ch = DecodeOneUtf8CodePoint(unit, &cur, end);
    MOZ_ASSERT(ch.isSome(),
               "provided source text should already have been validated");

    if (!AppendCodePointToCharBuffer(charBuffer, ch.value())) {
      return false;
    }
  }

  MOZ_ASSERT(cur == end);
  return true;
}

template <typename Unit, class AnyCharsAccess>
TokenStreamSpecific<Unit, AnyCharsAccess>::TokenStreamSpecific(
    FrontendContext* fc, ParserAtomsTable* parserAtoms,
    const ReadOnlyCompileOptions& options, const Unit* units, size_t length)
    : TokenStreamChars<Unit, AnyCharsAccess>(fc, parserAtoms, units, length,
                                             options.scriptSourceOffset) {}

bool TokenStreamAnyChars::checkOptions() {
  // Constrain starting columns to where they will saturate.
  if (options().column.oneOriginValue() >
      JS::LimitedColumnNumberOneOrigin::Limit) {
    reportErrorNoOffset(JSMSG_BAD_COLUMN_NUMBER);
    return false;
  }

  return true;
}

void TokenStreamAnyChars::reportErrorNoOffset(unsigned errorNumber, ...) const {
  va_list args;
  va_start(args, errorNumber);

  reportErrorNoOffsetVA(errorNumber, &args);

  va_end(args);
}

void TokenStreamAnyChars::reportErrorNoOffsetVA(unsigned errorNumber,
                                                va_list* args) const {
  ErrorMetadata metadata;
  computeErrorMetadataNoOffset(&metadata);

  ReportCompileErrorLatin1VA(fc, std::move(metadata), nullptr, errorNumber,
                             args);
}

[[nodiscard]] MOZ_ALWAYS_INLINE bool
TokenStreamAnyChars::internalUpdateLineInfoForEOL(uint32_t lineStartOffset) {
  prevLinebase = linebase;
  linebase = lineStartOffset;
  lineno++;

  // On overflow, report error.
  if (MOZ_UNLIKELY(!lineno)) {
    reportErrorNoOffset(JSMSG_BAD_LINE_NUMBER);
    return false;
  }

  return srcCoords.add(lineno, linebase);
}

#ifdef DEBUG

template <>
inline void SourceUnits<char16_t>::assertNextCodePoint(
    const PeekedCodePoint<char16_t>& peeked) {
  char32_t c = peeked.codePoint();
  if (c < unicode::NonBMPMin) {
    MOZ_ASSERT(peeked.lengthInUnits() == 1);
    MOZ_ASSERT(ptr[0] == c);
  } else {
    MOZ_ASSERT(peeked.lengthInUnits() == 2);
    char16_t lead, trail;
    unicode::UTF16Encode(c, &lead, &trail);
    MOZ_ASSERT(ptr[0] == lead);
    MOZ_ASSERT(ptr[1] == trail);
  }
}

template <>
inline void SourceUnits<Utf8Unit>::assertNextCodePoint(
    const PeekedCodePoint<Utf8Unit>& peeked) {
  char32_t c = peeked.codePoint();

  // This is all roughly indulgence of paranoia only for assertions, so the
  // reimplementation of UTF-8 encoding a code point is (we think) a virtue.
  uint8_t expectedUnits[4] = {};
  if (c < 0x80) {
    expectedUnits[0] = AssertedCast<uint8_t>(c);
  } else if (c < 0x800) {
    expectedUnits[0] = 0b1100'0000 | (c >> 6);
    expectedUnits[1] = 0b1000'0000 | (c & 0b11'1111);
  } else if (c < 0x10000) {
    expectedUnits[0] = 0b1110'0000 | (c >> 12);
    expectedUnits[1] = 0b1000'0000 | ((c >> 6) & 0b11'1111);
    expectedUnits[2] = 0b1000'0000 | (c & 0b11'1111);
  } else {
    expectedUnits[0] = 0b1111'0000 | (c >> 18);
    expectedUnits[1] = 0b1000'0000 | ((c >> 12) & 0b11'1111);
    expectedUnits[2] = 0b1000'0000 | ((c >> 6) & 0b11'1111);
    expectedUnits[3] = 0b1000'0000 | (c & 0b11'1111);
  }

  MOZ_ASSERT(peeked.lengthInUnits() <= 4);
  for (uint8_t i = 0; i < peeked.lengthInUnits(); i++) {
    MOZ_ASSERT(expectedUnits[i] == ptr[i].toUint8());
  }
}

#endif  // DEBUG

static MOZ_ALWAYS_INLINE void RetractPointerToCodePointBoundary(
    const Utf8Unit** ptr, const Utf8Unit* limit) {
  MOZ_ASSERT(*ptr <= limit);

  // |limit| is a code point boundary.
  if (MOZ_UNLIKELY(*ptr == limit)) {
    return;
  }

  // Otherwise rewind past trailing units to the start of the code point.
#ifdef DEBUG
  size_t retracted = 0;
#endif
  while (MOZ_UNLIKELY(IsTrailingUnit((*ptr)[0]))) {
    --*ptr;
#ifdef DEBUG
    retracted++;
#endif
  }

  MOZ_ASSERT(retracted < 4,
             "the longest UTF-8 code point is four units, so this should never "
             "retract more than three units");
}

static MOZ_ALWAYS_INLINE void RetractPointerToCodePointBoundary(
    const char16_t** ptr, const char16_t* limit) {
  MOZ_ASSERT(*ptr <= limit);

  // |limit| is a code point boundary.
  if (MOZ_UNLIKELY(*ptr == limit)) {
    return;
  }

  // Otherwise the pointer must be retracted by one iff it splits a two-unit
  // code point.
  if (MOZ_UNLIKELY(unicode::IsTrailSurrogate((*ptr)[0]))) {
    // Outside test suites testing garbage WTF-16, it's basically guaranteed
    // here that |(*ptr)[-1] (*ptr)[0]| is a surrogate pair.
    if (MOZ_LIKELY(unicode::IsLeadSurrogate((*ptr)[-1]))) {
      --*ptr;
    }
  }
}

template <typename Unit>
JS::ColumnNumberUnsignedOffset TokenStreamAnyChars::computeColumnOffset(
    const LineToken lineToken, const uint32_t offset,
    const SourceUnits<Unit>& sourceUnits) const {
  lineToken.assertConsistentOffset(offset);

  const uint32_t start = srcCoords.lineStart(lineToken);
  const uint32_t offsetInLine = offset - start;

  if constexpr (std::is_same_v<Unit, char16_t>) {
    // Column offset is in UTF-16 code units.
    return JS::ColumnNumberUnsignedOffset(offsetInLine);
  }

  return computeColumnOffsetForUTF8(lineToken, offset, start, offsetInLine,
                                    sourceUnits);
}

template <typename Unit>
JS::ColumnNumberUnsignedOffset TokenStreamAnyChars::computeColumnOffsetForUTF8(
    const LineToken lineToken, const uint32_t offset, const uint32_t start,
    const uint32_t offsetInLine, const SourceUnits<Unit>& sourceUnits) const {
  const uint32_t line = lineNumber(lineToken);

  // Reset the previous offset/column number offset cache for this line, if the
  // previous lookup wasn't on this line.
  if (line != lineOfLastColumnComputation_) {
    lineOfLastColumnComputation_ = line;
    lastChunkVectorForLine_ = nullptr;
    lastOffsetOfComputedColumn_ = start;
    lastComputedColumnOffset_ = JS::ColumnNumberUnsignedOffset::zero();
  }

  // Compute and return the final column number offset from a partially
  // calculated offset/column number offset, using the last-cached
  // offset/column number offset if they're more optimal.
  auto OffsetFromPartial =
      [this, offset, &sourceUnits](
          uint32_t partialOffset,
          JS::ColumnNumberUnsignedOffset partialColumnOffset,
          UnitsType unitsType) {
        MOZ_ASSERT(partialOffset <= offset);

        // If the last lookup on this line was closer to |offset|, use it.
        if (partialOffset < this->lastOffsetOfComputedColumn_ &&
            this->lastOffsetOfComputedColumn_ <= offset) {
          partialOffset = this->lastOffsetOfComputedColumn_;
          partialColumnOffset = this->lastComputedColumnOffset_;
        }

        const Unit* begin = sourceUnits.codeUnitPtrAt(partialOffset);
        const Unit* end = sourceUnits.codeUnitPtrAt(offset);

        size_t offsetDelta =
            AssertedCast<uint32_t>(PointerRangeSize(begin, end));
        partialOffset += offsetDelta;

        if (unitsType == UnitsType::GuaranteedSingleUnit) {
          MOZ_ASSERT(unicode::CountUTF16CodeUnits(begin, end) == offsetDelta,
                     "guaranteed-single-units also guarantee pointer distance "
                     "equals UTF-16 code unit count");
          partialColumnOffset += JS::ColumnNumberUnsignedOffset(offsetDelta);
        } else {
          partialColumnOffset += JS::ColumnNumberUnsignedOffset(
              AssertedCast<uint32_t>(unicode::CountUTF16CodeUnits(begin, end)));
        }

        this->lastOffsetOfComputedColumn_ = partialOffset;
        this->lastComputedColumnOffset_ = partialColumnOffset;
        return partialColumnOffset;
      };

  // We won't add an entry to |longLineColumnInfo_| for lines where the maximum
  // column has offset less than this value.  The most common (non-minified)
  // long line length is likely 80ch, maybe 100ch, so we use that, rounded up to
  // the next power of two for efficient division/multiplication below.
  constexpr uint32_t ColumnChunkLength = mozilla::tl::RoundUpPow2<100>::value;

  // The index within any associated |Vector<ChunkInfo>| of |offset|'s chunk.
  const uint32_t chunkIndex = offsetInLine / ColumnChunkLength;
  if (chunkIndex == 0) {
    // We don't know from an |offset| in the zeroth chunk that this line is even
    // long.  First-chunk info is mostly useless, anyway -- we have |start|
    // already.  So if we have *easy* access to that zeroth chunk, use it --
    // otherwise just count pessimally.  (This will still benefit from caching
    // the last column/offset for computations for successive offsets, so it's
    // not *always* worst-case.)
    UnitsType unitsType;
    if (lastChunkVectorForLine_ && lastChunkVectorForLine_->length() > 0) {
      MOZ_ASSERT((*lastChunkVectorForLine_)[0].columnOffset() ==
                 JS::ColumnNumberUnsignedOffset::zero());
      unitsType = (*lastChunkVectorForLine_)[0].unitsType();
    } else {
      unitsType = UnitsType::PossiblyMultiUnit;
    }

    return OffsetFromPartial(start, JS::ColumnNumberUnsignedOffset::zero(),
                             unitsType);
  }

  // If this line has no chunk vector yet, insert one in the hash map.  (The
  // required index is allocated and filled further down.)
  if (!lastChunkVectorForLine_) {
    auto ptr = longLineColumnInfo_.lookupForAdd(line);
    if (!ptr) {
      // This could rehash and invalidate a cached vector pointer, but the outer
      // condition means we don't have a cached pointer.
      if (!longLineColumnInfo_.add(ptr, line, Vector<ChunkInfo>(fc))) {
        // In case of OOM, just count columns from the start of the line.
        fc->recoverFromOutOfMemory();
        return OffsetFromPartial(start, JS::ColumnNumberUnsignedOffset::zero(),
                                 UnitsType::PossiblyMultiUnit);
      }
    }

    // Note that adding elements to this vector won't invalidate this pointer.
    lastChunkVectorForLine_ = &ptr->value();
  }

  const Unit* const limit = sourceUnits.codeUnitPtrAt(offset);

  auto RetractedOffsetOfChunk = [
#ifdef DEBUG
                                    this,
#endif
                                    start, limit,
                                    &sourceUnits](uint32_t index) {
    MOZ_ASSERT(index < this->lastChunkVectorForLine_->length());

    uint32_t naiveOffset = start + index * ColumnChunkLength;
    const Unit* naivePtr = sourceUnits.codeUnitPtrAt(naiveOffset);

    const Unit* actualPtr = naivePtr;
    RetractPointerToCodePointBoundary(&actualPtr, limit);

#ifdef DEBUG
    if ((*this->lastChunkVectorForLine_)[index].unitsType() ==
        UnitsType::GuaranteedSingleUnit) {
      MOZ_ASSERT(naivePtr == actualPtr, "miscomputed unitsType value");
    }
#endif

    return naiveOffset - PointerRangeSize(actualPtr, naivePtr);
  };

  uint32_t partialOffset;
  JS::ColumnNumberUnsignedOffset partialColumnOffset;
  UnitsType unitsType;

  auto entriesLen = AssertedCast<uint32_t>(lastChunkVectorForLine_->length());
  if (chunkIndex < entriesLen) {
    // We've computed the chunk |offset| resides in.  Compute the column number
    // from the chunk.
    partialOffset = RetractedOffsetOfChunk(chunkIndex);
    partialColumnOffset = (*lastChunkVectorForLine_)[chunkIndex].columnOffset();

    // This is exact if |chunkIndex| isn't the last chunk.
    unitsType = (*lastChunkVectorForLine_)[chunkIndex].unitsType();

    // Otherwise the last chunk is pessimistically assumed to contain multi-unit
    // code points because we haven't fully examined its contents yet -- they
    // may not have been tokenized yet, they could contain encoding errors, or
    // they might not even exist.
    MOZ_ASSERT_IF(chunkIndex == entriesLen - 1,
                  (*lastChunkVectorForLine_)[chunkIndex].unitsType() ==
                      UnitsType::PossiblyMultiUnit);
  } else {
    // Extend the vector from its last entry or the start of the line.  (This is
    // also a suitable partial start point if we must recover from OOM.)
    if (entriesLen > 0) {
      partialOffset = RetractedOffsetOfChunk(entriesLen - 1);
      partialColumnOffset =
          (*lastChunkVectorForLine_)[entriesLen - 1].columnOffset();
    } else {
      partialOffset = start;
      partialColumnOffset = JS::ColumnNumberUnsignedOffset::zero();
    }

    if (!lastChunkVectorForLine_->reserve(chunkIndex + 1)) {
      // As earlier, just start from the greatest offset/column in case of OOM.
      fc->recoverFromOutOfMemory();
      return OffsetFromPartial(partialOffset, partialColumnOffset,
                               UnitsType::PossiblyMultiUnit);
    }

    // OOM is no longer possible now.  \o/

    // The vector always begins with the column of the line start, i.e. zero,
    // with chunk units pessimally assumed not single-unit.
    if (entriesLen == 0) {
      lastChunkVectorForLine_->infallibleAppend(
          ChunkInfo(JS::ColumnNumberUnsignedOffset::zero(),
                    UnitsType::PossiblyMultiUnit));
      entriesLen++;
    }

    do {
      const Unit* const begin = sourceUnits.codeUnitPtrAt(partialOffset);
      const Unit* chunkLimit = sourceUnits.codeUnitPtrAt(
          start + std::min(entriesLen++ * ColumnChunkLength, offsetInLine));

      MOZ_ASSERT(begin < chunkLimit);
      MOZ_ASSERT(chunkLimit <= limit);

      static_assert(
          ColumnChunkLength > SourceUnitTraits<Unit>::maxUnitsLength - 1,
          "any retraction below is assumed to never underflow to the "
          "preceding chunk, even for the longest code point");

      // Prior tokenizing ensured that [begin, limit) is validly encoded, and
      // |begin < chunkLimit|, so any retraction here can't underflow.
      RetractPointerToCodePointBoundary(&chunkLimit, limit);

      MOZ_ASSERT(begin < chunkLimit);
      MOZ_ASSERT(chunkLimit <= limit);

      size_t numUnits = PointerRangeSize(begin, chunkLimit);
      size_t numUTF16CodeUnits =
          unicode::CountUTF16CodeUnits(begin, chunkLimit);

      // If this chunk (which will become non-final at the end of the loop) is
      // all single-unit code points, annotate the chunk accordingly.
      if (numUnits == numUTF16CodeUnits) {
        lastChunkVectorForLine_->back().guaranteeSingleUnits();
      }

      partialOffset += numUnits;
      partialColumnOffset += JS::ColumnNumberUnsignedOffset(numUTF16CodeUnits);

      lastChunkVectorForLine_->infallibleEmplaceBack(
          partialColumnOffset, UnitsType::PossiblyMultiUnit);
    } while (entriesLen < chunkIndex + 1);

    // We're at a spot in the current final chunk, and final chunks never have
    // complete units information, so be pessimistic.
    unitsType = UnitsType::PossiblyMultiUnit;
  }

  return OffsetFromPartial(partialOffset, partialColumnOffset, unitsType);
}

template <typename Unit, class AnyCharsAccess>
JS::LimitedColumnNumberOneOrigin
GeneralTokenStreamChars<Unit, AnyCharsAccess>::computeColumn(
    LineToken lineToken, uint32_t offset) const {
  lineToken.assertConsistentOffset(offset);

  const TokenStreamAnyChars& anyChars = anyCharsAccess();

  JS::ColumnNumberUnsignedOffset columnOffset =
      anyChars.computeColumnOffset(lineToken, offset, this->sourceUnits);

  if (!lineToken.isFirstLine()) {
    return JS::LimitedColumnNumberOneOrigin::fromUnlimited(
        JS::ColumnNumberOneOrigin() + columnOffset);
  }

  if (1 + columnOffset.value() > JS::LimitedColumnNumberOneOrigin::Limit) {
    return JS::LimitedColumnNumberOneOrigin::limit();
  }

  return JS::LimitedColumnNumberOneOrigin::fromUnlimited(
      (anyChars.options_.column + columnOffset).oneOriginValue());
}

template <typename Unit, class AnyCharsAccess>
void GeneralTokenStreamChars<Unit, AnyCharsAccess>::computeLineAndColumn(
    uint32_t offset, uint32_t* line,
    JS::LimitedColumnNumberOneOrigin* column) const {
  const TokenStreamAnyChars& anyChars = anyCharsAccess();

  auto lineToken = anyChars.lineToken(offset);
  *line = anyChars.lineNumber(lineToken);
  *column = computeColumn(lineToken, offset);
}

template <class AnyCharsAccess>
MOZ_COLD void TokenStreamChars<Utf8Unit, AnyCharsAccess>::internalEncodingError(
    uint8_t relevantUnits, unsigned errorNumber, ...) {
  va_list args;
  va_start(args, errorNumber);

  do {
    size_t offset = this->sourceUnits.offset();

    ErrorMetadata err;

    TokenStreamAnyChars& anyChars = anyCharsAccess();

    bool canAddLineOfContext = fillExceptingContext(&err, offset);
    if (canAddLineOfContext) {
      if (!internalComputeLineOfContext(&err, offset)) {
        break;
      }

      // As this is an encoding error, the computed window-end must be
      // identical to the location of the error -- any further on and the
      // window would contain invalid Unicode.
      MOZ_ASSERT_IF(err.lineOfContext != nullptr,
                    err.lineLength == err.tokenOffset);
    }

    auto notes = MakeUnique<JSErrorNotes>();
    if (!notes) {
      ReportOutOfMemory(anyChars.fc);
      break;
    }

    // The largest encoding of a UTF-8 code point is 4 units.  (Encoding an
    // obsolete 5- or 6-byte code point will complain only about a bad lead
    // code unit.)
    constexpr size_t MaxWidth = sizeof("0xHH 0xHH 0xHH 0xHH");

    MOZ_ASSERT(relevantUnits > 0);

    char badUnitsStr[MaxWidth];
    char* ptr = badUnitsStr;
    while (relevantUnits > 0) {
      byteToString(this->sourceUnits.getCodeUnit().toUint8(), ptr);
      ptr[4] = ' ';

      ptr += 5;
      relevantUnits--;
    }

    ptr[-1] = '\0';

    uint32_t line;
    JS::LimitedColumnNumberOneOrigin column;
    computeLineAndColumn(offset, &line, &column);

    if (!notes->addNoteASCII(anyChars.fc, anyChars.getFilename().c_str(), 0,
                             line, JS::ColumnNumberOneOrigin(column),
                             GetErrorMessage, nullptr, JSMSG_BAD_CODE_UNITS,
                             badUnitsStr)) {
      break;
    }

    ReportCompileErrorLatin1VA(anyChars.fc, std::move(err), std::move(notes),
                               errorNumber, &args);
  } while (false);

  va_end(args);
}

template <class AnyCharsAccess>
MOZ_COLD void TokenStreamChars<Utf8Unit, AnyCharsAccess>::badLeadUnit(
    Utf8Unit lead) {
  uint8_t leadValue = lead.toUint8();

  char leadByteStr[5];
  byteToTerminatedString(leadValue, leadByteStr);

  internalEncodingError(1, JSMSG_BAD_LEADING_UTF8_UNIT, leadByteStr);
}

template <class AnyCharsAccess>
MOZ_COLD void TokenStreamChars<Utf8Unit, AnyCharsAccess>::notEnoughUnits(
    Utf8Unit lead, uint8_t remaining, uint8_t required) {
  uint8_t leadValue = lead.toUint8();

  MOZ_ASSERT(required == 2 || required == 3 || required == 4);
  MOZ_ASSERT(remaining < 4);
  MOZ_ASSERT(remaining < required);

  char leadByteStr[5];
  byteToTerminatedString(leadValue, leadByteStr);

  // |toHexChar| produces the desired decimal numbers for values < 4.
  const char expectedStr[] = {toHexChar(required - 1), '\0'};
  const char actualStr[] = {toHexChar(remaining - 1), '\0'};

  internalEncodingError(remaining, JSMSG_NOT_ENOUGH_CODE_UNITS, leadByteStr,
                        expectedStr, required == 2 ? "" : "s", actualStr,
                        remaining == 2 ? " was" : "s were");
}

template <class AnyCharsAccess>
MOZ_COLD void TokenStreamChars<Utf8Unit, AnyCharsAccess>::badTrailingUnit(
    uint8_t unitsObserved) {
  Utf8Unit badUnit =
      this->sourceUnits.addressOfNextCodeUnit()[unitsObserved - 1];

  char badByteStr[5];
  byteToTerminatedString(badUnit.toUint8(), badByteStr);

  internalEncodingError(unitsObserved, JSMSG_BAD_TRAILING_UTF8_UNIT,
                        badByteStr);
}

template <class AnyCharsAccess>
MOZ_COLD void
TokenStreamChars<Utf8Unit, AnyCharsAccess>::badStructurallyValidCodePoint(
    char32_t codePoint, uint8_t codePointLength, const char* reason) {
  // Construct a string like "0x203D" (including null terminator) to include
  // in the error message.  Write the string end-to-start from end to start
  // of an adequately sized |char| array, shifting least significant nibbles
  // off the number and writing the corresponding hex digits until done, then
  // prefixing with "0x".  |codePointStr| points at the incrementally
  // computed string, within |codePointCharsArray|'s bounds.

  // 0x1F'FFFF is the maximum value that can fit in 3+6+6+6 unconstrained
  // bits in a four-byte UTF-8 code unit sequence.
  constexpr size_t MaxHexSize = sizeof(
      "0x1F"
      "FFFF");  // including '\0'
  char codePointCharsArray[MaxHexSize];

  char* codePointStr = std::end(codePointCharsArray);
  *--codePointStr = '\0';

  // Note that by do-while looping here rather than while-looping, this
  // writes a '0' when |codePoint == 0|.
  do {
    MOZ_ASSERT(codePointCharsArray < codePointStr);
    *--codePointStr = toHexChar(codePoint & 0xF);
    codePoint >>= 4;
  } while (codePoint);

  MOZ_ASSERT(codePointCharsArray + 2 <= codePointStr);
  *--codePointStr = 'x';
  *--codePointStr = '0';

  internalEncodingError(codePointLength, JSMSG_FORBIDDEN_UTF8_CODE_POINT,
                        codePointStr, reason);
}

template <class AnyCharsAccess>
[[nodiscard]] bool
TokenStreamChars<Utf8Unit, AnyCharsAccess>::getNonAsciiCodePointDontNormalize(
    Utf8Unit lead, char32_t* codePoint) {
  auto onBadLeadUnit = [this, &lead]() { this->badLeadUnit(lead); };

  auto onNotEnoughUnits = [this, &lead](uint8_t remaining, uint8_t required) {
    this->notEnoughUnits(lead, remaining, required);
  };

  auto onBadTrailingUnit = [this](uint8_t unitsObserved) {
    this->badTrailingUnit(unitsObserved);
  };

  auto onBadCodePoint = [this](char32_t badCodePoint, uint8_t unitsObserved) {
    this->badCodePoint(badCodePoint, unitsObserved);
  };

  auto onNotShortestForm = [this](char32_t badCodePoint,
                                  uint8_t unitsObserved) {
    this->notShortestForm(badCodePoint, unitsObserved);
  };

  // If a valid code point is decoded, this function call consumes its code
  // units.  If not, it ungets the lead code unit and invokes the right error
  // handler, so on failure we must immediately return false.
  SourceUnitsIterator iter(this->sourceUnits);
  Maybe<char32_t> maybeCodePoint = DecodeOneUtf8CodePointInline(
      lead, &iter, SourceUnitsEnd(), onBadLeadUnit, onNotEnoughUnits,
      onBadTrailingUnit, onBadCodePoint, onNotShortestForm);
  if (maybeCodePoint.isNothing()) {
    return false;
  }

  *codePoint = maybeCodePoint.value();
  return true;
}

template <class AnyCharsAccess>
bool TokenStreamChars<char16_t, AnyCharsAccess>::getNonAsciiCodePoint(
    int32_t lead, char32_t* codePoint) {
  MOZ_ASSERT(lead != EOF);
  MOZ_ASSERT(!isAsciiCodePoint(lead),
             "ASCII code unit/point must be handled separately");
  MOZ_ASSERT(lead == this->sourceUnits.previousCodeUnit(),
             "getNonAsciiCodePoint called incorrectly");

  // The code point is usually |lead|: overwrite later if needed.
  *codePoint = AssertedCast<char32_t>(lead);

  // ECMAScript specifically requires that unpaired UTF-16 surrogates be
  // treated as the corresponding code point and not as an error.  See
  // <https://tc39.github.io/ecma262/#sec-ecmascript-language-types-string-type>.
  // Thus this function does not consider any sequence of 16-bit numbers to
  // be intrinsically in error.

  // Dispense with single-unit code points and lone trailing surrogates.
  if (MOZ_LIKELY(!unicode::IsLeadSurrogate(lead))) {
    if (MOZ_UNLIKELY(lead == unicode::LINE_SEPARATOR ||
                     lead == unicode::PARA_SEPARATOR)) {
      if (!updateLineInfoForEOL()) {
#ifdef DEBUG
        // Assign to a sentinel value to hopefully cause errors.
        *codePoint = std::numeric_limits<char32_t>::max();
#endif
        MOZ_MAKE_MEM_UNDEFINED(codePoint, sizeof(*codePoint));
        return false;
      }

      *codePoint = '\n';
    } else {
      MOZ_ASSERT(!IsLineTerminator(*codePoint));
    }

    return true;
  }

  // Also handle a lead surrogate not paired with a trailing surrogate.
  if (MOZ_UNLIKELY(
          this->sourceUnits.atEnd() ||
          !unicode::IsTrailSurrogate(this->sourceUnits.peekCodeUnit()))) {
    MOZ_ASSERT(!IsLineTerminator(*codePoint));
    return true;
  }

  // Otherwise we have a multi-unit code point.
  *codePoint = unicode::UTF16Decode(lead, this->sourceUnits.getCodeUnit());
  MOZ_ASSERT(!IsLineTerminator(*codePoint));
  return true;
}

template <class AnyCharsAccess>
bool TokenStreamChars<Utf8Unit, AnyCharsAccess>::getNonAsciiCodePoint(
    int32_t unit, char32_t* codePoint) {
  MOZ_ASSERT(unit != EOF);
  MOZ_ASSERT(!isAsciiCodePoint(unit),
             "ASCII code unit/point must be handled separately");

  Utf8Unit lead = Utf8Unit(static_cast<unsigned char>(unit));
  MOZ_ASSERT(lead == this->sourceUnits.previousCodeUnit(),
             "getNonAsciiCodePoint called incorrectly");

  auto onBadLeadUnit = [this, &lead]() { this->badLeadUnit(lead); };

  auto onNotEnoughUnits = [this, &lead](uint_fast8_t remaining,
                                        uint_fast8_t required) {
    this->notEnoughUnits(lead, remaining, required);
  };

  auto onBadTrailingUnit = [this](uint_fast8_t unitsObserved) {
    this->badTrailingUnit(unitsObserved);
  };

  auto onBadCodePoint = [this](char32_t badCodePoint,
                               uint_fast8_t unitsObserved) {
    this->badCodePoint(badCodePoint, unitsObserved);
  };

  auto onNotShortestForm = [this](char32_t badCodePoint,
                                  uint_fast8_t unitsObserved) {
    this->notShortestForm(badCodePoint, unitsObserved);
  };

  // This consumes the full, valid code point or ungets |lead| and calls the
  // appropriate error functor on failure.
  SourceUnitsIterator iter(this->sourceUnits);
  Maybe<char32_t> maybeCodePoint = DecodeOneUtf8CodePoint(
      lead, &iter, SourceUnitsEnd(), onBadLeadUnit, onNotEnoughUnits,
      onBadTrailingUnit, onBadCodePoint, onNotShortestForm);
  if (maybeCodePoint.isNothing()) {
    return false;
  }

  char32_t cp = maybeCodePoint.value();
  if (MOZ_UNLIKELY(cp == unicode::LINE_SEPARATOR ||
                   cp == unicode::PARA_SEPARATOR)) {
    if (!updateLineInfoForEOL()) {
#ifdef DEBUG
      // Assign to a sentinel value to hopefully cause errors.
      *codePoint = std::numeric_limits<char32_t>::max();
#endif
      MOZ_MAKE_MEM_UNDEFINED(codePoint, sizeof(*codePoint));
      return false;
    }

    *codePoint = '\n';
  } else {
    MOZ_ASSERT(!IsLineTerminator(cp));
    *codePoint = cp;
  }

  return true;
}

template <>
size_t SourceUnits<char16_t>::findWindowStart(size_t offset) const {
  // This is JS's understanding of UTF-16 that allows lone surrogates, so
  // we have to exclude lone surrogates from [windowStart, offset) ourselves.

  const char16_t* const earliestPossibleStart = codeUnitPtrAt(startOffset_);

  const char16_t* const initial = codeUnitPtrAt(offset);
  const char16_t* p = initial;

  auto HalfWindowSize = [&p, &initial]() {
    return PointerRangeSize(p, initial);
  };

  while (true) {
    MOZ_ASSERT(earliestPossibleStart <= p);
    MOZ_ASSERT(HalfWindowSize() <= WindowRadius);
    if (p <= earliestPossibleStart || HalfWindowSize() >= WindowRadius) {
      break;
    }

    char16_t c = p[-1];

    // This stops at U+2028 LINE SEPARATOR or U+2029 PARAGRAPH SEPARATOR in
    // string and template literals.  These code points do affect line and
    // column coordinates, even as they encode their literal values.
    if (IsLineTerminator(c)) {
      break;
    }

    // Don't allow invalid UTF-16 in pre-context.  (Current users don't
    // require this, and this behavior isn't currently imposed on
    // pre-context, but these facts might change someday.)

    if (MOZ_UNLIKELY(unicode::IsLeadSurrogate(c))) {
      break;
    }

    // Optimistically include the code unit, reverting below if needed.
    p--;

    // If it's not a surrogate at all, keep going.
    if (MOZ_LIKELY(!unicode::IsTrailSurrogate(c))) {
      continue;
    }

    // Stop if we don't have a usable surrogate pair.
    if (HalfWindowSize() >= WindowRadius ||
        p <= earliestPossibleStart ||      // trail surrogate at low end
        !unicode::IsLeadSurrogate(p[-1]))  // no paired lead surrogate
    {
      p++;
      break;
    }

    p--;
  }

  MOZ_ASSERT(HalfWindowSize() <= WindowRadius);
  return offset - HalfWindowSize();
}

template <>
size_t SourceUnits<Utf8Unit>::findWindowStart(size_t offset) const {
  // |offset| must be the location of the error or somewhere before it, so we
  // know preceding data is valid UTF-8.

  const Utf8Unit* const earliestPossibleStart = codeUnitPtrAt(startOffset_);

  const Utf8Unit* const initial = codeUnitPtrAt(offset);
  const Utf8Unit* p = initial;

  auto HalfWindowSize = [&p, &initial]() {
    return PointerRangeSize(p, initial);
  };

  while (true) {
    MOZ_ASSERT(earliestPossibleStart <= p);
    MOZ_ASSERT(HalfWindowSize() <= WindowRadius);
    if (p <= earliestPossibleStart || HalfWindowSize() >= WindowRadius) {
      break;
    }

    // Peek backward for a line break, and only decrement if there is none.
    uint8_t prev = p[-1].toUint8();

    // First check for the ASCII LineTerminators.
    if (prev == '\r' || prev == '\n') {
      break;
    }

    // Now check for the non-ASCII LineTerminators U+2028 LINE SEPARATOR
    // (0xE2 0x80 0xA8) and U+2029 PARAGRAPH (0xE2 0x80 0xA9).  If there
    // aren't three code units available, some comparison here will fail
    // before we'd underflow.
    if (MOZ_UNLIKELY((prev == 0xA8 || prev == 0xA9) &&
                     p[-2].toUint8() == 0x80 && p[-3].toUint8() == 0xE2)) {
      break;
    }

    // Rewind over the non-LineTerminator.  This can't underflow
    // |earliestPossibleStart| because it begins a code point.
    while (IsTrailingUnit(*--p)) {
      continue;
    }

    MOZ_ASSERT(earliestPossibleStart <= p);

    // But if we underflowed |WindowRadius|, adjust forward and stop.
    if (HalfWindowSize() > WindowRadius) {
      static_assert(WindowRadius > 3,
                    "skipping over non-lead code units below must not "
                    "advance past |offset|");

      while (IsTrailingUnit(*++p)) {
        continue;
      }

      MOZ_ASSERT(HalfWindowSize() < WindowRadius);
      break;
    }
  }

  MOZ_ASSERT(HalfWindowSize() <= WindowRadius);
  return offset - HalfWindowSize();
}

template <>
size_t SourceUnits<char16_t>::findWindowEnd(size_t offset) const {
  const char16_t* const initial = codeUnitPtrAt(offset);
  const char16_t* p = initial;

  auto HalfWindowSize = [&initial, &p]() {
    return PointerRangeSize(initial, p);
  };

  while (true) {
    MOZ_ASSERT(p <= limit_);
    MOZ_ASSERT(HalfWindowSize() <= WindowRadius);
    if (p >= limit_ || HalfWindowSize() >= WindowRadius) {
      break;
    }

    char16_t c = *p;

    // This stops at U+2028 LINE SEPARATOR or U+2029 PARAGRAPH SEPARATOR in
    // string and template literals.  These code points do affect line and
    // column coordinates, even as they encode their literal values.
    if (IsLineTerminator(c)) {
      break;
    }

    // Don't allow invalid UTF-16 in post-context.  (Current users don't
    // require this, and this behavior isn't currently imposed on
    // pre-context, but these facts might change someday.)

    if (MOZ_UNLIKELY(unicode::IsTrailSurrogate(c))) {
      break;
    }

    // Optimistically consume the code unit, ungetting it below if needed.
    p++;

    // If it's not a surrogate at all, keep going.
    if (MOZ_LIKELY(!unicode::IsLeadSurrogate(c))) {
      continue;
    }

    // Retract if the lead surrogate would stand alone at the end of the
    // window.
    if (HalfWindowSize() >= WindowRadius ||  // split pair
        p >= limit_ ||                       // half-pair at end of source
        !unicode::IsTrailSurrogate(*p))      // no paired trail surrogate
    {
      p--;
      break;
    }

    p++;
  }

  return offset + HalfWindowSize();
}

template <>
size_t SourceUnits<Utf8Unit>::findWindowEnd(size_t offset) const {
  const Utf8Unit* const initial = codeUnitPtrAt(offset);
  const Utf8Unit* p = initial;

  auto HalfWindowSize = [&initial, &p]() {
    return PointerRangeSize(initial, p);
  };

  while (true) {
    MOZ_ASSERT(p <= limit_);
    MOZ_ASSERT(HalfWindowSize() <= WindowRadius);
    if (p >= limit_ || HalfWindowSize() >= WindowRadius) {
      break;
    }

    // A non-encoding error might be followed by an encoding error within
    // |maxEnd|, so we must validate as we go to not include invalid UTF-8
    // in the computed window.  What joy!

    Utf8Unit lead = *p;
    if (mozilla::IsAscii(lead)) {
      if (IsSingleUnitLineTerminator(lead)) {
        break;
      }

      p++;
      continue;
    }

    PeekedCodePoint<Utf8Unit> peeked = PeekCodePoint(p, limit_);
    if (peeked.isNone()) {
      break;  // encoding error
    }

    char32_t c = peeked.codePoint();
    if (MOZ_UNLIKELY(c == unicode::LINE_SEPARATOR ||
                     c == unicode::PARA_SEPARATOR)) {
      break;
    }

    MOZ_ASSERT(!IsLineTerminator(c));

    uint8_t len = peeked.lengthInUnits();
    if (HalfWindowSize() + len > WindowRadius) {
      break;
    }

    p += len;
  }

  MOZ_ASSERT(HalfWindowSize() <= WindowRadius);
  return offset + HalfWindowSize();
}

template <typename Unit, class AnyCharsAccess>
bool TokenStreamSpecific<Unit, AnyCharsAccess>::advance(size_t position) {
  const Unit* end = this->sourceUnits.codeUnitPtrAt(position);
  while (this->sourceUnits.addressOfNextCodeUnit() < end) {
    if (!getCodePoint()) {
      return false;
    }
  }

  TokenStreamAnyChars& anyChars = anyCharsAccess();
  Token* cur = const_cast<Token*>(&anyChars.currentToken());
  cur->pos.begin = this->sourceUnits.offset();
  cur->pos.end = cur->pos.begin;
#ifdef DEBUG
  cur->type = TokenKind::Limit;
#endif
  MOZ_MAKE_MEM_UNDEFINED(&cur->type, sizeof(cur->type));
  anyChars.lookahead = 0;
  return true;
}

template <typename Unit, class AnyCharsAccess>
void TokenStreamSpecific<Unit, AnyCharsAccess>::seekTo(const Position& pos) {
  TokenStreamAnyChars& anyChars = anyCharsAccess();

  this->sourceUnits.setAddressOfNextCodeUnit(pos.buf,
                                             /* allowPoisoned = */ true);
  anyChars.flags = pos.flags;
  anyChars.lineno = pos.lineno;
  anyChars.linebase = pos.linebase;
  anyChars.prevLinebase = pos.prevLinebase;
  anyChars.lookahead = pos.lookahead;

  anyChars.tokens[anyChars.cursor()] = pos.currentToken;
  for (unsigned i = 0; i < anyChars.lookahead; i++) {
    anyChars.tokens[anyChars.aheadCursor(1 + i)] = pos.lookaheadTokens[i];
  }
}

template <typename Unit, class AnyCharsAccess>
bool TokenStreamSpecific<Unit, AnyCharsAccess>::seekTo(
    const Position& pos, const TokenStreamAnyChars& other) {
  if (!anyCharsAccess().srcCoords.fill(other.srcCoords)) {
    return false;
  }

  seekTo(pos);
  return true;
}

void TokenStreamAnyChars::computeErrorMetadataNoOffset(
    ErrorMetadata* err) const {
  err->isMuted = mutedErrors;
  err->filename = filename_;
  err->lineNumber = 0;
  err->columnNumber = JS::ColumnNumberOneOrigin();

  MOZ_ASSERT(err->lineOfContext == nullptr);
}

bool TokenStreamAnyChars::fillExceptingContext(ErrorMetadata* err,
                                               uint32_t offset) const {
  err->isMuted = mutedErrors;

  // If this TokenStreamAnyChars doesn't have location information, try to
  // get it from the caller.
  if (!filename_) {
    JSContext* maybeCx = context()->maybeCurrentJSContext();
    if (maybeCx) {
      NonBuiltinFrameIter iter(maybeCx,
                               FrameIter::FOLLOW_DEBUGGER_EVAL_PREV_LINK,
                               maybeCx->realm()->principals());
      if (!iter.done() && iter.filename()) {
        err->filename = JS::ConstUTF8CharsZ(iter.filename());
        JS::TaggedColumnNumberOneOrigin columnNumber;
        err->lineNumber = iter.computeLine(&columnNumber);
        err->columnNumber =
            JS::ColumnNumberOneOrigin(columnNumber.oneOriginValue());
        return false;
      }
    }
  }

  // Otherwise use this TokenStreamAnyChars's location information.
  err->filename = filename_;
  return true;
}

template <>
inline void SourceUnits<char16_t>::computeWindowOffsetAndLength(
    const char16_t* encodedWindow, size_t encodedTokenOffset,
    size_t* utf16TokenOffset, size_t encodedWindowLength,
    size_t* utf16WindowLength) const {
  MOZ_ASSERT_UNREACHABLE("shouldn't need to recompute for UTF-16");
}

template <>
inline void SourceUnits<Utf8Unit>::computeWindowOffsetAndLength(
    const Utf8Unit* encodedWindow, size_t encodedTokenOffset,
    size_t* utf16TokenOffset, size_t encodedWindowLength,
    size_t* utf16WindowLength) const {
  MOZ_ASSERT(encodedTokenOffset <= encodedWindowLength,
             "token offset must be within the window, and the two lambda "
             "calls below presume this ordering of values");

  const Utf8Unit* const encodedWindowEnd = encodedWindow + encodedWindowLength;

  size_t i = 0;
  auto ComputeUtf16Count = [&i, &encodedWindow](const Utf8Unit* limit) {
    while (encodedWindow < limit) {
      Utf8Unit lead = *encodedWindow++;
      if (MOZ_LIKELY(IsAscii(lead))) {
        // ASCII contributes a single UTF-16 code unit.
        i++;
        continue;
      }

      Maybe<char32_t> cp = DecodeOneUtf8CodePoint(lead, &encodedWindow, limit);
      MOZ_ASSERT(cp.isSome(),
                 "computed window should only contain valid UTF-8");

      i += unicode::IsSupplementary(cp.value()) ? 2 : 1;
    }

    return i;
  };

  // Compute the token offset from |i == 0| and the initial |encodedWindow|.
  const Utf8Unit* token = encodedWindow + encodedTokenOffset;
  MOZ_ASSERT(token <= encodedWindowEnd);
  *utf16TokenOffset = ComputeUtf16Count(token);

  // Compute the window length, picking up from |i| and |encodedWindow| that,
  // in general, were modified just above.
  *utf16WindowLength = ComputeUtf16Count(encodedWindowEnd);
}

template <typename Unit>
bool TokenStreamCharsBase<Unit>::addLineOfContext(ErrorMetadata* err,
                                                  uint32_t offset) const {
  // Rename the variable to make meaning clearer: an offset into source units
  // in Unit encoding.
  size_t encodedOffset = offset;

  // These are also offsets into source units in Unit encoding.
  size_t encodedWindowStart = sourceUnits.findWindowStart(encodedOffset);
  size_t encodedWindowEnd = sourceUnits.findWindowEnd(encodedOffset);

  size_t encodedWindowLength = encodedWindowEnd - encodedWindowStart;
  MOZ_ASSERT(encodedWindowLength <= SourceUnits::WindowRadius * 2);

  // Don't add a useless "line" of context when the window ends up empty
  // because of an invalid encoding at the start of a line.
  if (encodedWindowLength == 0) {
    MOZ_ASSERT(err->lineOfContext == nullptr,
               "ErrorMetadata::lineOfContext must be null so we don't "
               "have to set the lineLength/tokenOffset fields");
    return true;
  }

  CharBuffer lineOfContext(fc);

  const Unit* encodedWindow = sourceUnits.codeUnitPtrAt(encodedWindowStart);
  if (!FillCharBufferFromSourceNormalizingAsciiLineBreaks(
          lineOfContext, encodedWindow, encodedWindow + encodedWindowLength)) {
    return false;
  }

  size_t utf16WindowLength = lineOfContext.length();

  // The windowed string is null-terminated.
  if (!lineOfContext.append('\0')) {
    return false;
  }

  err->lineOfContext.reset(lineOfContext.extractOrCopyRawBuffer());
  if (!err->lineOfContext) {
    return false;
  }

  size_t encodedTokenOffset = encodedOffset - encodedWindowStart;

  MOZ_ASSERT(encodedTokenOffset <= encodedWindowLength,
             "token offset must be inside the window");

  // The length in UTF-8 code units of a code point is always greater than or
  // equal to the same code point's length in UTF-16 code points.  ASCII code
  // points are 1 unit in either encoding.  Code points in [U+0080, U+10000)
  // are 2-3 UTF-8 code units to 1 UTF-16 code unit.  And code points in
  // [U+10000, U+10FFFF] are 4 UTF-8 code units to 2 UTF-16 code units.
  //
  // Therefore, if encoded window length equals the length in UTF-16 (this is
  // always the case for Unit=char16_t), the UTF-16 offsets are exactly the
  // encoded offsets.  Otherwise we must convert offset/length from UTF-8 to
  // UTF-16.
  if constexpr (std::is_same_v<Unit, char16_t>) {
    MOZ_ASSERT(utf16WindowLength == encodedWindowLength,
               "UTF-16 to UTF-16 shouldn't change window length");
    err->tokenOffset = encodedTokenOffset;
    err->lineLength = encodedWindowLength;
  } else {
    static_assert(std::is_same_v<Unit, Utf8Unit>, "should only see UTF-8 here");

    bool simple = utf16WindowLength == encodedWindowLength;
#ifdef DEBUG
    auto isAscii = [](Unit u) { return IsAscii(u); };
    MOZ_ASSERT(std::all_of(encodedWindow, encodedWindow + encodedWindowLength,
                           isAscii) == simple,
               "equal window lengths in UTF-8 should correspond only to "
               "wholly-ASCII text");
#endif
    if (simple) {
      err->tokenOffset = encodedTokenOffset;
      err->lineLength = encodedWindowLength;
    } else {
      sourceUnits.computeWindowOffsetAndLength(
          encodedWindow, encodedTokenOffset, &err->tokenOffset,
          encodedWindowLength, &err->lineLength);
    }
  }

  return true;
}

template <typename Unit, class AnyCharsAccess>
bool TokenStreamSpecific<Unit, AnyCharsAccess>::computeErrorMetadata(
    ErrorMetadata* err, const ErrorOffset& errorOffset) const {
  if (errorOffset.is<NoOffset>()) {
    anyCharsAccess().computeErrorMetadataNoOffset(err);
    return true;
  }

  uint32_t offset;
  if (errorOffset.is<uint32_t>()) {
    offset = errorOffset.as<uint32_t>();
  } else {
    offset = this->sourceUnits.offset();
  }

  // This function's return value isn't a success/failure indication: it
  // returns true if this TokenStream can be used to provide a line of
  // context.
  if (fillExceptingContext(err, offset)) {
    // Add a line of context from this TokenStream to help with debugging.
    return internalComputeLineOfContext(err, offset);
  }

  // We can't fill in any more here.
  return true;
}

template <typename Unit, class AnyCharsAccess>
void TokenStreamSpecific<Unit, AnyCharsAccess>::reportIllegalCharacter(
    int32_t cp) {
  UniqueChars display = JS_smprintf("U+%04X", cp);
  if (!display) {
    ReportOutOfMemory(anyCharsAccess().fc);
    return;
  }
  error(JSMSG_ILLEGAL_CHARACTER, display.get());
}

// We have encountered a '\': check for a Unicode escape sequence after it.
// Return the length of the escape sequence and the encoded code point (by
// value) if we found a Unicode escape sequence, and skip all code units
// involed.  Otherwise, return 0 and don't advance along the buffer.
template <typename Unit, class AnyCharsAccess>
uint32_t GeneralTokenStreamChars<Unit, AnyCharsAccess>::matchUnicodeEscape(
    char32_t* codePoint) {
  MOZ_ASSERT(this->sourceUnits.previousCodeUnit() == Unit('\\'));

  int32_t unit = getCodeUnit();
  if (unit != 'u') {
    // NOTE: |unit| may be EOF here.
    ungetCodeUnit(unit);
    MOZ_ASSERT(this->sourceUnits.previousCodeUnit() == Unit('\\'));
    return 0;
  }

  char16_t v;
  unit = getCodeUnit();
  if (IsAsciiHexDigit(unit) && this->sourceUnits.matchHexDigits(3, &v)) {
    *codePoint = (AsciiAlphanumericToNumber(unit) << 12) | v;
    return 5;
  }

  if (unit == '{') {
    return matchExtendedUnicodeEscape(codePoint);
  }

  // NOTE: |unit| may be EOF here, so this ungets either one or two units.
  ungetCodeUnit(unit);
  ungetCodeUnit('u');
  MOZ_ASSERT(this->sourceUnits.previousCodeUnit() == Unit('\\'));
  return 0;
}

template <typename Unit, class AnyCharsAccess>
uint32_t
GeneralTokenStreamChars<Unit, AnyCharsAccess>::matchExtendedUnicodeEscape(
    char32_t* codePoint) {
  MOZ_ASSERT(this->sourceUnits.previousCodeUnit() == Unit('{'));

  int32_t unit = getCodeUnit();

  // Skip leading zeroes.
  uint32_t leadingZeroes = 0;
  while (unit == '0') {
    leadingZeroes++;
    unit = getCodeUnit();
  }

  size_t i = 0;
  uint32_t code = 0;
  while (IsAsciiHexDigit(unit) && i < 6) {
    code = (code << 4) | AsciiAlphanumericToNumber(unit);
    unit = getCodeUnit();
    i++;
  }

  uint32_t gotten =
      2 +                  // 'u{'
      leadingZeroes + i +  // significant hexdigits
      (unit != EOF);       // subtract a get if it didn't contribute to length

  if (unit == '}' && (leadingZeroes > 0 || i > 0) &&
      code <= unicode::NonBMPMax) {
    *codePoint = code;
    return gotten;
  }

  this->sourceUnits.unskipCodeUnits(gotten);
  MOZ_ASSERT(this->sourceUnits.previousCodeUnit() == Unit('\\'));
  return 0;
}

template <typename Unit, class AnyCharsAccess>
uint32_t
GeneralTokenStreamChars<Unit, AnyCharsAccess>::matchUnicodeEscapeIdStart(
    char32_t* codePoint) {
  uint32_t length = matchUnicodeEscape(codePoint);
  if (MOZ_LIKELY(length > 0)) {
    if (MOZ_LIKELY(unicode::IsIdentifierStart(*codePoint))) {
      return length;
    }

    this->sourceUnits.unskipCodeUnits(length);
  }

  MOZ_ASSERT(this->sourceUnits.previousCodeUnit() == Unit('\\'));
  return 0;
}

template <typename Unit, class AnyCharsAccess>
bool GeneralTokenStreamChars<Unit, AnyCharsAccess>::matchUnicodeEscapeIdent(
    char32_t* codePoint) {
  uint32_t length = matchUnicodeEscape(codePoint);
  if (MOZ_LIKELY(length > 0)) {
    if (MOZ_LIKELY(unicode::IsIdentifierPart(*codePoint))) {
      return true;
    }

    this->sourceUnits.unskipCodeUnits(length);
  }

  MOZ_ASSERT(this->sourceUnits.previousCodeUnit() == Unit('\\'));
  return false;
}

template <typename Unit, class AnyCharsAccess>
[[nodiscard]] bool
TokenStreamSpecific<Unit, AnyCharsAccess>::matchIdentifierStart(
    IdentifierEscapes* sawEscape) {
  int32_t unit = getCodeUnit();
  if (unit == EOF) {
    error(JSMSG_MISSING_PRIVATE_NAME);
    return false;
  }

  if (MOZ_LIKELY(isAsciiCodePoint(unit))) {
    if (unicode::IsIdentifierStart(char16_t(unit))) {
      *sawEscape = IdentifierEscapes::None;
      return true;
    }

    if (unit == '\\') {
      char32_t codePoint;
      uint32_t escapeLength = matchUnicodeEscapeIdStart(&codePoint);
      if (escapeLength != 0) {
        *sawEscape = IdentifierEscapes::SawUnicodeEscape;
        return true;
      }

      // We could point "into" a mistyped escape, e.g. for "\u{41H}" we
      // could point at the 'H'.  But we don't do that now, so the code
      // unit after the '\' isn't necessarily bad, so just point at the
      // start of the actually-invalid escape.
      ungetCodeUnit('\\');
      error(JSMSG_BAD_ESCAPE);
      return false;
    }
  }

  // Unget the lead code unit before peeking at the full code point.
  ungetCodeUnit(unit);

  PeekedCodePoint<Unit> peeked = this->sourceUnits.peekCodePoint();
  if (!peeked.isNone() && unicode::IsIdentifierStart(peeked.codePoint())) {
    this->sourceUnits.consumeKnownCodePoint(peeked);

    *sawEscape = IdentifierEscapes::None;
    return true;
  }

  error(JSMSG_MISSING_PRIVATE_NAME);
  return false;
}

template <typename Unit, class AnyCharsAccess>
bool TokenStreamSpecific<Unit, AnyCharsAccess>::getDirectives(
    bool isMultiline, bool shouldWarnDeprecated) {
  // Match directive comments used in debugging, such as "//# sourceURL" and
  // "//# sourceMappingURL". Use of "//@" instead of "//#" is deprecated.
  //
  // To avoid a crashing bug in IE, several JavaScript transpilers wrap single
  // line comments containing a source mapping URL inside a multiline
  // comment. To avoid potentially expensive lookahead and backtracking, we
  // only check for this case if we encounter a '#' code unit.

  bool res = getDisplayURL(isMultiline, shouldWarnDeprecated) &&
             getSourceMappingURL(isMultiline, shouldWarnDeprecated);
  if (!res) {
    badToken();
  }

  return res;
}

[[nodiscard]] bool TokenStreamCharsShared::copyCharBufferTo(
    UniquePtr<char16_t[], JS::FreePolicy>* destination) {
  size_t length = charBuffer.length();

  *destination = fc->getAllocator()->make_pod_array<char16_t>(length + 1);
  if (!*destination) {
    return false;
  }

  std::copy(charBuffer.begin(), charBuffer.end(), destination->get());
  (*destination)[length] = '\0';
  return true;
}

template <typename Unit, class AnyCharsAccess>
[[nodiscard]] bool TokenStreamSpecific<Unit, AnyCharsAccess>::getDirective(
    bool isMultiline, bool shouldWarnDeprecated, const char* directive,
    uint8_t directiveLength, const char* errorMsgPragma,
    UniquePtr<char16_t[], JS::FreePolicy>* destination) {
  // Stop if we don't find |directive|.  (Note that |directive| must be
  // ASCII, so there are no tricky encoding issues to consider in matching
  // UTF-8/16-agnostically.)
  if (!this->sourceUnits.matchCodeUnits(directive, directiveLength)) {
    return true;
  }

  if (shouldWarnDeprecated) {
    if (!warning(JSMSG_DEPRECATED_PRAGMA, errorMsgPragma)) {
      return false;
    }
  }

  this->charBuffer.clear();

  do {
    int32_t unit = peekCodeUnit();
    if (unit == EOF) {
      break;
    }

    if (MOZ_LIKELY(isAsciiCodePoint(unit))) {
      if (unicode::IsSpace(AssertedCast<Latin1Char>(unit))) {
        break;
      }

      consumeKnownCodeUnit(unit);

      // Debugging directives can occur in both single- and multi-line
      // comments. If we're currently inside a multi-line comment, we
      // also must recognize multi-line comment terminators.
      if (isMultiline && unit == '*' && peekCodeUnit() == '/') {
        ungetCodeUnit('*');
        break;
      }

      if (!this->charBuffer.append(unit)) {
        return false;
      }

      continue;
    }

    // This ignores encoding errors: subsequent caller-side code to
    // handle the remaining source text in the comment will do so.
    PeekedCodePoint<Unit> peeked = this->sourceUnits.peekCodePoint();
    if (peeked.isNone() || unicode::IsSpace(peeked.codePoint())) {
      break;
    }

    MOZ_ASSERT(!IsLineTerminator(peeked.codePoint()),
               "!IsSpace must imply !IsLineTerminator or else we'll fail to "
               "maintain line-info/flags for EOL");
    this->sourceUnits.consumeKnownCodePoint(peeked);

    if (!AppendCodePointToCharBuffer(this->charBuffer, peeked.codePoint())) {
      return false;
    }
  } while (true);

  if (this->charBuffer.empty()) {
    // The directive's URL was missing, but comments can contain anything,
    // so it isn't an error.
    return true;
  }

  return copyCharBufferTo(destination);
}

template <typename Unit, class AnyCharsAccess>
bool TokenStreamSpecific<Unit, AnyCharsAccess>::getDisplayURL(
    bool isMultiline, bool shouldWarnDeprecated) {
  // Match comments of the form "//# sourceURL=<url>" or
  // "/\* //# sourceURL=<url> *\/"
  //
  // Note that while these are labeled "sourceURL" in the source text,
  // internally we refer to it as a "displayURL" to distinguish what the
  // developer would like to refer to the source as from the source's actual
  // URL.

  static constexpr char sourceURLDirective[] = " sourceURL=";
  constexpr uint8_t sourceURLDirectiveLength = js_strlen(sourceURLDirective);
  return getDirective(isMultiline, shouldWarnDeprecated, sourceURLDirective,
                      sourceURLDirectiveLength, "sourceURL",
                      &anyCharsAccess().displayURL_);
}

template <typename Unit, class AnyCharsAccess>
bool TokenStreamSpecific<Unit, AnyCharsAccess>::getSourceMappingURL(
    bool isMultiline, bool shouldWarnDeprecated) {
  // Match comments of the form "//# sourceMappingURL=<url>" or
  // "/\* //# sourceMappingURL=<url> *\/"

  static constexpr char sourceMappingURLDirective[] = " sourceMappingURL=";
  constexpr uint8_t sourceMappingURLDirectiveLength =
--> --------------------

--> maximum size reached

--> --------------------

Messung V0.5

¤ Dauer der Verarbeitung: 0.23 Sekunden (vorverarbeitet) ¤

Wurzel

Suchen

Beweissystem der NASA

Beweissystem Isabelle

NIST Cobol Testsuite

Cephes Mathematical Library

Wiener Entwicklungsmethode

Haftungshinweis

Die Informationen auf dieser Webseite wurden nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit, noch Qualität der bereit gestellten Informationen zugesichert.

Bemerkung:

Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.