Quelle RegExpObject.cpp

Sprache: C

/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
* vim: set ts=8 sts=2 et sw=2 tw=80:
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#include "vm/RegExpObject.h"

#include "mozilla/MemoryReporting.h"
#include "mozilla/PodOperations.h"

#include <type_traits>

#include "builtin/RegExp.h"
#include "builtin/SelfHostingDefines.h"  // REGEXP_*_FLAG
#include "frontend/FrontendContext.h"    // AutoReportFrontendContext
#include "frontend/TokenStream.h"
#include "gc/HashUtil.h"
#include "irregexp/RegExpAPI.h"
#include "js/friend/ErrorMessages.h"  // js::GetErrorMessage, JSMSG_*
#include "js/friend/StackLimits.h"    // js::ReportOverRecursed
#include "js/Object.h"                // JS::GetBuiltinClass
#include "js/Printer.h"               // js::GenericPrinter
#include "js/RegExp.h"
#include "js/RegExpFlags.h"  // JS::RegExpFlags
#include "util/StringBuilder.h"
#include "util/Unicode.h"
#include "vm/JSONPrinter.h"  // js::JSONPrinter
#include "vm/MatchPairs.h"
#include "vm/PlainObject.h"
#include "vm/RegExpStatics.h"
#include "vm/StringType.h"

#include "vm/JSContext-inl.h"
#include "vm/JSObject-inl.h"
#include "vm/NativeObject-inl.h"
#include "vm/Shape-inl.h"

using namespace js;

using JS::CompileOptions;
using JS::RegExpFlag;
using JS::RegExpFlags;
using mozilla::DebugOnly;
using mozilla::PodCopy;

using JS::AutoCheckCannotGC;

static_assert(RegExpFlag::HasIndices == REGEXP_HASINDICES_FLAG,
              "self-hosted JS and /d flag bits must agree");
static_assert(RegExpFlag::Global == REGEXP_GLOBAL_FLAG,
              "self-hosted JS and /g flag bits must agree");
static_assert(RegExpFlag::IgnoreCase == REGEXP_IGNORECASE_FLAG,
              "self-hosted JS and /i flag bits must agree");
static_assert(RegExpFlag::Multiline == REGEXP_MULTILINE_FLAG,
              "self-hosted JS and /m flag bits must agree");
static_assert(RegExpFlag::DotAll == REGEXP_DOTALL_FLAG,
              "self-hosted JS and /s flag bits must agree");
static_assert(RegExpFlag::Unicode == REGEXP_UNICODE_FLAG,
              "self-hosted JS and /u flag bits must agree");
static_assert(RegExpFlag::UnicodeSets == REGEXP_UNICODESETS_FLAG,
              "self-hosted JS and /v flag bits must agree");
static_assert(RegExpFlag::Sticky == REGEXP_STICKY_FLAG,
              "self-hosted JS and /y flag bits must agree");

RegExpObject* js::RegExpAlloc(JSContext* cx, NewObjectKind newKind,
                              HandleObject proto /* = nullptr */) {
  Rooted<RegExpObject*> regexp(
      cx, NewObjectWithClassProtoAndKind<RegExpObject>(cx, proto, newKind));
  if (!regexp) {
    return nullptr;
  }

  if (!SharedShape::ensureInitialCustomShape<RegExpObject>(cx, regexp)) {
    return nullptr;
  }

  MOZ_ASSERT(regexp->lookupPure(cx->names().lastIndex)->slot() ==
             RegExpObject::lastIndexSlot());

  return regexp;
}

/* MatchPairs */

bool VectorMatchPairs::initArrayFrom(VectorMatchPairs& copyFrom) {
  MOZ_ASSERT(copyFrom.pairCount() > 0);

  if (!allocOrExpandArray(copyFrom.pairCount())) {
    return false;
  }

  PodCopy(pairs_, copyFrom.pairs_, pairCount_);

  return true;
}

bool VectorMatchPairs::allocOrExpandArray(size_t pairCount) {
  if (!vec_.resizeUninitialized(pairCount)) {
    return false;
  }

  pairs_ = &vec_[0];
  pairCount_ = pairCount;
  return true;
}

/* RegExpObject */

/* static */
RegExpShared* RegExpObject::getShared(JSContext* cx,
                                      Handle<RegExpObject*> regexp) {
  if (regexp->hasShared()) {
    return regexp->getShared();
  }

  return createShared(cx, regexp);
}

/* static */
bool RegExpObject::isOriginalFlagGetter(JSNative native, RegExpFlags* mask) {
  if (native == regexp_hasIndices) {
    *mask = RegExpFlag::HasIndices;
    return true;
  }
  if (native == regexp_global) {
    *mask = RegExpFlag::Global;
    return true;
  }
  if (native == regexp_ignoreCase) {
    *mask = RegExpFlag::IgnoreCase;
    return true;
  }
  if (native == regexp_multiline) {
    *mask = RegExpFlag::Multiline;
    return true;
  }
  if (native == regexp_dotAll) {
    *mask = RegExpFlag::DotAll;
    return true;
  }
  if (native == regexp_sticky) {
    *mask = RegExpFlag::Sticky;
    return true;
  }
  if (native == regexp_unicode) {
    *mask = RegExpFlag::Unicode;
    return true;
  }
  if (native == regexp_unicodeSets) {
    *mask = RegExpFlag::UnicodeSets;
    return true;
  }

  return false;
}

static bool FinishRegExpClassInit(JSContext* cx, JS::HandleObject ctor,
                                  JS::HandleObject proto) {
#ifdef DEBUG
  // Assert RegExp.prototype.exec is usually stored in a dynamic slot. The
  // optimization in InlinableNativeIRGenerator::tryAttachIntrinsicRegExpExec
  // depends on this.
  Handle<NativeObject*> nproto = proto.as<NativeObject>();
  auto prop = nproto->lookupPure(cx->names().exec);
  MOZ_ASSERT(prop->isDataProperty());
  MOZ_ASSERT(!nproto->isFixedSlot(prop->slot()));
#endif
  return true;
}

static const ClassSpec RegExpObjectClassSpec = {
    GenericCreateConstructor<js::regexp_construct, 2, gc::AllocKind::FUNCTION>,
    GenericCreatePrototype<RegExpObject>,
    js::regexp_static_methods,
    js::regexp_static_props,
    js::regexp_methods,
    js::regexp_properties,
    FinishRegExpClassInit,
};

const JSClass RegExpObject::class_ = {
    "RegExp",
    JSCLASS_HAS_RESERVED_SLOTS(RegExpObject::RESERVED_SLOTS) |
        JSCLASS_HAS_CACHED_PROTO(JSProto_RegExp),
    JS_NULL_CLASS_OPS,
    &RegExpObjectClassSpec,
};

const JSClass RegExpObject::protoClass_ = {
    "RegExp.prototype",
    JSCLASS_HAS_CACHED_PROTO(JSProto_RegExp),
    JS_NULL_CLASS_OPS,
    &RegExpObjectClassSpec,
};

template <typename CharT>
RegExpObject* RegExpObject::create(JSContext* cx, const CharT* chars,
                                   size_t length, RegExpFlags flags,
                                   NewObjectKind newKind) {
  static_assert(std::is_same_v<CharT, char16_t>,
                "this code may need updating if/when CharT encodes UTF-8");

  Rooted<JSAtom*> source(cx, AtomizeChars(cx, chars, length));
  if (!source) {
    return nullptr;
  }

  return create(cx, source, flags, newKind);
}

template RegExpObject* RegExpObject::create(JSContext* cx,
                                            const char16_t* chars,
                                            size_t length, RegExpFlags flags,
                                            NewObjectKind newKind);

RegExpObject* RegExpObject::createSyntaxChecked(JSContext* cx,
                                                Handle<JSAtom*> source,
                                                RegExpFlags flags,
                                                NewObjectKind newKind) {
  Rooted<RegExpObject*> regexp(cx, RegExpAlloc(cx, newKind));
  if (!regexp) {
    return nullptr;
  }

  regexp->initAndZeroLastIndex(source, flags, cx);

  return regexp;
}

RegExpObject* RegExpObject::create(JSContext* cx, Handle<JSAtom*> source,
                                   RegExpFlags flags, NewObjectKind newKind) {
  Rooted<RegExpObject*> regexp(cx);
  {
    AutoReportFrontendContext fc(cx);
    CompileOptions dummyOptions(cx);
    frontend::DummyTokenStream dummyTokenStream(&fc, dummyOptions);

    LifoAllocScope allocScope(&cx->tempLifoAlloc());
    if (!irregexp::CheckPatternSyntax(cx, cx->stackLimitForCurrentPrincipal(),
                                      dummyTokenStream, source, flags)) {
      return nullptr;
    }

    regexp = RegExpAlloc(cx, newKind);
    if (!regexp) {
      return nullptr;
    }

    regexp->initAndZeroLastIndex(source, flags, cx);

    MOZ_ASSERT(!regexp->hasShared());
  }
  return regexp;
}

/* static */
RegExpShared* RegExpObject::createShared(JSContext* cx,
                                         Handle<RegExpObject*> regexp) {
  MOZ_ASSERT(!regexp->hasShared());
  Rooted<JSAtom*> source(cx, regexp->getSource());
  RegExpShared* shared =
      cx->zone()->regExps().get(cx, source, regexp->getFlags());
  if (!shared) {
    return nullptr;
  }

  regexp->setShared(shared);

  MOZ_ASSERT(regexp->hasShared());

  return shared;
}

SharedShape* RegExpObject::assignInitialShape(JSContext* cx,
                                              Handle<RegExpObject*> self) {
  MOZ_ASSERT(self->empty());

  static_assert(LAST_INDEX_SLOT == 0);

  /* The lastIndex property alone is writable but non-configurable. */
  if (!NativeObject::addPropertyInReservedSlot(cx, self, cx->names().lastIndex,
                                               LAST_INDEX_SLOT,
                                               {PropertyFlag::Writable})) {
    return nullptr;
  }

  return self->sharedShape();
}

void RegExpObject::initIgnoringLastIndex(JSAtom* source, RegExpFlags flags) {
  // If this is a re-initialization with an existing RegExpShared, 'flags'
  // may not match getShared()->flags, so forget the RegExpShared.
  clearShared();

  setSource(source);
  setFlags(flags);
}

void RegExpObject::initAndZeroLastIndex(JSAtom* source, RegExpFlags flags,
                                        JSContext* cx) {
  initIgnoringLastIndex(source, flags);
  zeroLastIndex(cx);
}

template <typename KnownF, typename UnknownF>
void ForEachRegExpFlag(JS::RegExpFlags flags, KnownF known, UnknownF unknown) {
  uint8_t raw = flags.value();

  for (uint8_t i = 1; i; i = i << 1) {
    if (!(raw & i)) {
      continue;
    }
    switch (raw & i) {
      case RegExpFlag::HasIndices:
        known("HasIndices", "d");
        break;
      case RegExpFlag::Global:
        known("Global", "g");
        break;
      case RegExpFlag::IgnoreCase:
        known("IgnoreCase", "i");
        break;
      case RegExpFlag::Multiline:
        known("Multiline", "m");
        break;
      case RegExpFlag::DotAll:
        known("DotAll", "s");
        break;
      case RegExpFlag::Unicode:
        known("Unicode", "u");
        break;
      case RegExpFlag::UnicodeSets:
        known("UnicodeSets", "v");
        break;
      case RegExpFlag::Sticky:
        known("Sticky", "y");
        break;
      default:
        unknown(i);
        break;
    }
  }
}

std::ostream& JS::operator<<(std::ostream& os, RegExpFlags flags) {
  ForEachRegExpFlag(
      flags, [&](const char* name, const char* c) { os << c; },
      [&](uint8_t value) { os << '?'; });
  return os;
}

#if defined(DEBUG) || defined(JS_JITSPEW)
void RegExpObject::dumpOwnFields(js::JSONPrinter& json) const {
  {
    js::GenericPrinter& out = json.beginStringProperty("source");
    getSource()->dumpPropertyName(out);
    json.endStringProperty();
  }

  json.beginInlineListProperty("flags");
  ForEachRegExpFlag(
      getFlags(),
      [&](const char* name, const char* c) { json.value("%s", name); },
      [&](uint8_t value) { json.value("Unknown(%02x)", value); });
  json.endInlineList();

  {
    js::GenericPrinter& out = json.beginStringProperty("lastIndex");
    getLastIndex().dumpStringContent(out);
    json.endStringProperty();
  }
}

void RegExpObject::dumpOwnStringContent(js::GenericPrinter& out) const {
  out.put("/");

  getSource()->dumpCharsNoQuote(out);

  out.put("/");

  ForEachRegExpFlag(
      getFlags(), [&](const char* name, const char* c) { out.put(c); },
      [&](uint8_t value) {});
}
#endif /* defined(DEBUG) || defined(JS_JITSPEW) */

static MOZ_ALWAYS_INLINE bool IsRegExpLineTerminator(const JS::Latin1Char c) {
  return c == '\n' || c == '\r';
}

static MOZ_ALWAYS_INLINE bool IsRegExpLineTerminator(const char16_t c) {
  return c == '\n' || c == '\r' || c == 0x2028 || c == 0x2029;
}

static MOZ_ALWAYS_INLINE bool AppendEscapedLineTerminator(
    StringBuilder& sb, const JS::Latin1Char c) {
  switch (c) {
    case '\n':
      if (!sb.append('n')) {
        return false;
      }
      break;
    case '\r':
      if (!sb.append('r')) {
        return false;
      }
      break;
    default:
      MOZ_CRASH("Bad LineTerminator");
  }
  return true;
}

static MOZ_ALWAYS_INLINE bool AppendEscapedLineTerminator(StringBuilder& sb,
                                                          const char16_t c) {
  switch (c) {
    case '\n':
      if (!sb.append('n')) {
        return false;
      }
      break;
    case '\r':
      if (!sb.append('r')) {
        return false;
      }
      break;
    case 0x2028:
      if (!sb.append("u2028")) {
        return false;
      }
      break;
    case 0x2029:
      if (!sb.append("u2029")) {
        return false;
      }
      break;
    default:
      MOZ_CRASH("Bad LineTerminator");
  }
  return true;
}

template <typename CharT>
static MOZ_ALWAYS_INLINE bool SetupBuilder(StringBuilder& sb,
                                           const CharT* oldChars, size_t oldLen,
                                           const CharT* it) {
  if constexpr (std::is_same_v<CharT, char16_t>) {
    if (!sb.ensureTwoByteChars()) {
      return false;
    }
  }

  if (!sb.reserve(oldLen + 1)) {
    return false;
  }

  sb.infallibleAppend(oldChars, size_t(it - oldChars));
  return true;
}

// Note: leaves the string builder empty if no escaping need be performed.
template <typename CharT>
static bool EscapeRegExpPattern(StringBuilder& sb, const CharT* oldChars,
                                size_t oldLen) {
  bool inBrackets = false;
  bool previousCharacterWasBackslash = false;

  for (const CharT* it = oldChars; it < oldChars + oldLen; ++it) {
    CharT ch = *it;
    if (!previousCharacterWasBackslash) {
      if (inBrackets) {
        if (ch == ']') {
          inBrackets = false;
        }
      } else if (ch == '/') {
        // There's a forward slash that needs escaping.
        if (sb.empty()) {
          // This is the first char we've seen that needs escaping,
          // copy everything up to this point.
          if (!SetupBuilder(sb, oldChars, oldLen, it)) {
            return false;
          }
        }
        if (!sb.append('\\')) {
          return false;
        }
      } else if (ch == '[') {
        inBrackets = true;
      }
    }

    if (IsRegExpLineTerminator(ch)) {
      // There's LineTerminator that needs escaping.
      if (sb.empty()) {
        // This is the first char we've seen that needs escaping,
        // copy everything up to this point.
        if (!SetupBuilder(sb, oldChars, oldLen, it)) {
          return false;
        }
      }
      if (!previousCharacterWasBackslash) {
        if (!sb.append('\\')) {
          return false;
        }
      }
      if (!AppendEscapedLineTerminator(sb, ch)) {
        return false;
      }
    } else if (!sb.empty()) {
      if (!sb.append(ch)) {
        return false;
      }
    }

    if (previousCharacterWasBackslash) {
      previousCharacterWasBackslash = false;
    } else if (ch == '\\') {
      previousCharacterWasBackslash = true;
    }
  }

  return true;
}

// ES6 draft rev32 21.2.3.2.4.
JSLinearString* js::EscapeRegExpPattern(JSContext* cx, Handle<JSAtom*> src) {
  // Step 2.
  if (src->length() == 0) {
    return cx->names().emptyRegExp_;
  }

  // We may never need to use |sb|. Start using it lazily.
  JSStringBuilder sb(cx);
  bool escapeFailed = false;
  if (src->hasLatin1Chars()) {
    JS::AutoCheckCannotGC nogc;
    escapeFailed =
        !::EscapeRegExpPattern(sb, src->latin1Chars(nogc), src->length());
  } else {
    JS::AutoCheckCannotGC nogc;
    escapeFailed =
        !::EscapeRegExpPattern(sb, src->twoByteChars(nogc), src->length());
  }
  if (escapeFailed) {
    return nullptr;
  }

  // Step 3.
  if (sb.empty()) {
    return src;
  }
  return sb.finishString();
}

// ES6 draft rev32 21.2.5.14. Optimized for RegExpObject.
JSLinearString* RegExpObject::toString(JSContext* cx,
                                       Handle<RegExpObject*> obj) {
  // Steps 3-4.
  Rooted<JSAtom*> src(cx, obj->getSource());
  if (!src) {
    return nullptr;
  }
  Rooted<JSLinearString*> escapedSrc(cx, EscapeRegExpPattern(cx, src));

  // Step 7.
  JSStringBuilder sb(cx);
  size_t len = escapedSrc->length();
  if (!sb.reserve(len + 2)) {
    return nullptr;
  }
  sb.infallibleAppend('/');
  if (!sb.append(escapedSrc)) {
    return nullptr;
  }
  sb.infallibleAppend('/');

  // Steps 5-7.
  if (obj->hasIndices() && !sb.append('d')) {
    return nullptr;
  }
  if (obj->global() && !sb.append('g')) {
    return nullptr;
  }
  if (obj->ignoreCase() && !sb.append('i')) {
    return nullptr;
  }
  if (obj->multiline() && !sb.append('m')) {
    return nullptr;
  }
  if (obj->dotAll() && !sb.append('s')) {
    return nullptr;
  }
  if (obj->unicode() && !sb.append('u')) {
    return nullptr;
  }
  if (obj->unicodeSets() && !sb.append('v')) {
    return nullptr;
  }
  if (obj->sticky() && !sb.append('y')) {
    return nullptr;
  }

  return sb.finishString();
}

template <typename CharT>
static MOZ_ALWAYS_INLINE bool IsRegExpMetaChar(CharT ch) {
  switch (ch) {
    /* ES 2016 draft Mar 25, 2016 21.2.1 SyntaxCharacter. */
    case '^':
    case '$':
    case '\\':
    case '.':
    case '*':
    case '+':
    case '?':
    case '(':
    case ')':
    case '[':
    case ']':
    case '{':
    case '}':
    case '|':
      return true;
    default:
      return false;
  }
}

template <typename CharT>
bool js::HasRegExpMetaChars(const CharT* chars, size_t length) {
  for (size_t i = 0; i < length; ++i) {
    if (IsRegExpMetaChar<CharT>(chars[i])) {
      return true;
    }
  }
  return false;
}

template bool js::HasRegExpMetaChars<Latin1Char>(const Latin1Char* chars,
                                                 size_t length);

template bool js::HasRegExpMetaChars<char16_t>(const char16_t* chars,
                                               size_t length);

bool js::StringHasRegExpMetaChars(const JSLinearString* str) {
  AutoCheckCannotGC nogc;
  if (str->hasLatin1Chars()) {
    return HasRegExpMetaChars(str->latin1Chars(nogc), str->length());
  }

  return HasRegExpMetaChars(str->twoByteChars(nogc), str->length());
}

/* RegExpShared */

RegExpShared::RegExpShared(JSAtom* source, RegExpFlags flags)
    : CellWithTenuredGCPointer(source), pairCount_(0), flags(flags) {}

void RegExpShared::traceChildren(JSTracer* trc) {
  TraceNullableCellHeaderEdge(trc, this, "RegExpShared source");
  if (kind() == RegExpShared::Kind::Atom) {
    TraceNullableEdge(trc, &patternAtom_, "RegExpShared pattern atom");
  } else {
    for (auto& comp : compilationArray) {
      TraceNullableEdge(trc, &comp.jitCode, "RegExpShared code");
    }
    TraceNullableEdge(trc, &groupsTemplate_, "RegExpShared groups template");
  }
}

void RegExpShared::discardJitCode() {
  for (auto& comp : compilationArray) {
    comp.jitCode = nullptr;
  }

  // We can also purge the tables used by JIT code.
  tables.clearAndFree();
}

void RegExpShared::finalize(JS::GCContext* gcx) {
  for (auto& comp : compilationArray) {
    if (comp.byteCode) {
      size_t length = comp.byteCodeLength();
      gcx->free_(this, comp.byteCode, length, MemoryUse::RegExpSharedBytecode);
    }
  }
  if (namedCaptureIndices_) {
    size_t length = numNamedCaptures() * sizeof(uint32_t);
    gcx->free_(this, namedCaptureIndices_, length,
               MemoryUse::RegExpSharedNamedCaptureData);
  }
  if (namedCaptureSliceIndices_) {
    size_t length = numDistinctNamedCaptures() * sizeof(uint32_t);
    gcx->free_(this, namedCaptureSliceIndices_, length,
               MemoryUse::RegExpSharedNamedCaptureSliceData);
  }
  tables.~JitCodeTables();
}

/* static */
bool RegExpShared::compileIfNecessary(JSContext* cx,
                                      MutableHandleRegExpShared re,
                                      Handle<JSLinearString*> input,
                                      RegExpShared::CodeKind codeKind) {
  if (codeKind == RegExpShared::CodeKind::Any) {
    // We start by interpreting regexps, then compile them once they are
    // sufficiently hot. For very long input strings, we tier up eagerly.
    codeKind = RegExpShared::CodeKind::Bytecode;
    if (re->markedForTierUp() || input->length() > 1000) {
      codeKind = RegExpShared::CodeKind::Jitcode;
    }
  }

  // Fall back to bytecode if native codegen is not available.
  if (!IsNativeRegExpEnabled() && codeKind == RegExpShared::CodeKind::Jitcode) {
    codeKind = RegExpShared::CodeKind::Bytecode;
  }

  bool needsCompile = false;
  if (re->kind() == RegExpShared::Kind::Unparsed) {
    needsCompile = true;
  }
  if (re->kind() == RegExpShared::Kind::RegExp) {
    if (!re->isCompiled(input->hasLatin1Chars(), codeKind)) {
      needsCompile = true;
    }
  }
  if (needsCompile) {
    return irregexp::CompilePattern(cx, re, input, codeKind);
  }
  return true;
}

/* static */
RegExpRunStatus RegExpShared::execute(JSContext* cx,
                                      MutableHandleRegExpShared re,
                                      Handle<JSLinearString*> input,
                                      size_t start, VectorMatchPairs* matches) {
  MOZ_ASSERT(matches);

  // TODO: Add tracelogger support

  /* Compile the code at point-of-use. */
  if (!compileIfNecessary(cx, re, input, RegExpShared::CodeKind::Any)) {
    return RegExpRunStatus::Error;
  }

  /*
   * Ensure sufficient memory for output vector.
   * No need to initialize it. The RegExp engine fills them in on a match.
   */
  if (!matches->allocOrExpandArray(re->pairCount())) {
    ReportOutOfMemory(cx);
    return RegExpRunStatus::Error;
  }

  if (re->kind() == RegExpShared::Kind::Atom) {
    return RegExpShared::executeAtom(re, input, start, matches);
  }

  /*
   * Ensure sufficient memory for output vector.
   * No need to initialize it. The RegExp engine fills them in on a match.
   */
  if (!matches->allocOrExpandArray(re->pairCount())) {
    ReportOutOfMemory(cx);
    return RegExpRunStatus::Error;
  }

  uint32_t interruptRetries = 0;
  const uint32_t maxInterruptRetries = 4;
  do {
    DebugOnly<bool> alreadyThrowing = cx->isExceptionPending();
    RegExpRunStatus result = irregexp::Execute(cx, re, input, start, matches);
#ifdef DEBUG
    // Check if we must simulate the interruption
    if (js::irregexp::IsolateShouldSimulateInterrupt(cx->isolate)) {
      js::irregexp::IsolateClearShouldSimulateInterrupt(cx->isolate);
      cx->requestInterrupt(InterruptReason::CallbackUrgent);
    }
#endif
    if (result == RegExpRunStatus::Error) {
      /* Execute can return RegExpRunStatus::Error:
       *
       *  1. If the native stack overflowed
       *  2. If the backtrack stack overflowed
       *  3. If an interrupt was requested during execution.
       *
       * In the first two cases, we want to throw an error. In the
       * third case, we want to handle the interrupt and try again.
       * We cap the number of times we will retry.
       */
      if (cx->isExceptionPending()) {
        // If this regexp is being executed by recovery instructions
        // while bailing out to handle an exception, there may already
        // be an exception pending. If so, just return that exception
        // instead of reporting a new one.
        MOZ_ASSERT(alreadyThrowing);
        return RegExpRunStatus::Error;
      }
      if (cx->hasAnyPendingInterrupt()) {
        if (!CheckForInterrupt(cx)) {
          return RegExpRunStatus::Error;
        }
        if (interruptRetries++ < maxInterruptRetries) {
          // The initial execution may have been interpreted, or the
          // interrupt may have triggered a GC that discarded jitcode.
          // To maximize the chance of succeeding before being
          // interrupted again, we want to ensure we are compiled.
          if (!compileIfNecessary(cx, re, input,
                                  RegExpShared::CodeKind::Jitcode)) {
            return RegExpRunStatus::Error;
          }
          continue;
        }
      }
      // If we have run out of retries, this regexp takes too long to execute.
      ReportOverRecursed(cx);
      return RegExpRunStatus::Error;
    }

    MOZ_ASSERT(result == RegExpRunStatus::Success ||
               result == RegExpRunStatus::Success_NotFound);

    return result;
  } while (true);

  MOZ_CRASH("Unreachable");
}

void RegExpShared::useAtomMatch(Handle<JSAtom*> pattern) {
  MOZ_ASSERT(kind() == RegExpShared::Kind::Unparsed);
  kind_ = RegExpShared::Kind::Atom;
  patternAtom_ = pattern;
  pairCount_ = 1;
}

void RegExpShared::useRegExpMatch(size_t pairCount) {
  MOZ_ASSERT(kind() == RegExpShared::Kind::Unparsed);
  kind_ = RegExpShared::Kind::RegExp;
  pairCount_ = pairCount;
  ticks_ = jit::JitOptions.regexpWarmUpThreshold;
}

/* static */
void RegExpShared::InitializeNamedCaptures(JSContext* cx, HandleRegExpShared re,
                                           uint32_t numNamedCaptures,
                                           uint32_t numDistinctNamedCaptures,
                                           Handle<PlainObject*> templateObject,
                                           uint32_t* captureIndices,
                                           uint32_t* sliceIndices) {
  MOZ_ASSERT(!re->groupsTemplate_);
  MOZ_ASSERT(!re->namedCaptureIndices_);
  MOZ_ASSERT(!re->namedCaptureSliceIndices_);

  re->numNamedCaptures_ = numNamedCaptures;
  re->numDistinctNamedCaptures_ = numDistinctNamedCaptures;
  re->groupsTemplate_ = templateObject;
  re->namedCaptureIndices_ = captureIndices;
  re->namedCaptureSliceIndices_ = sliceIndices;

  uint32_t arraySize = numNamedCaptures * sizeof(uint32_t);
  js::AddCellMemory(re, arraySize, MemoryUse::RegExpSharedNamedCaptureData);

  if (sliceIndices) {
    arraySize = numDistinctNamedCaptures * sizeof(uint32_t);
    js::AddCellMemory(re, arraySize,
                      MemoryUse::RegExpSharedNamedCaptureSliceData);
  }
}

void RegExpShared::tierUpTick() {
  MOZ_ASSERT(kind() == RegExpShared::Kind::RegExp);
  if (ticks_ > 0) {
    ticks_--;
  }
}

bool RegExpShared::markedForTierUp() const {
  if (!IsNativeRegExpEnabled()) {
    return false;
  }
  if (kind() != RegExpShared::Kind::RegExp) {
    return false;
  }
  return ticks_ == 0;
}

// When either unicode flag is set and if |index| points to a trail surrogate,
// step back to the corresponding lead surrogate.
static size_t StepBackToLeadSurrogate(const JSLinearString* input,
                                      size_t index) {
  // |index| must be a position within a two-byte string, otherwise it can't
  // point to the trail surrogate of a surrogate pair.
  if (index == 0 || index >= input->length() || input->hasLatin1Chars()) {
    return index;
  }

  /*
   * ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad
   * 21.2.2.2 step 2.
   *   Let listIndex be the index into Input of the character that was obtained
   *   from element index of str.
   *
   * In the spec, pattern match is performed with decoded Unicode code points,
   * but our implementation performs it with UTF-16 encoded strings. In step 2,
   * we should decrement lastIndex (index) if it points to a trail surrogate
   * that has a corresponding lead surrogate.
   *
   *   var r = /\uD83D\uDC38/ug;
   *   r.lastIndex = 1;
   *   var str = "\uD83D\uDC38";
   *   var result = r.exec(str); // pattern match starts from index 0
   *   print(result.index);      // prints 0
   *
   * Note: This doesn't match the current spec text and result in different
   * values for `result.index` under certain conditions. However, the spec will
   * change to match our implementation's behavior.
   * See https://github.com/tc39/ecma262/issues/128.
   */
  JS::AutoCheckCannotGC nogc;
  const auto* chars = input->twoByteChars(nogc);
  if (unicode::IsTrailSurrogate(chars[index]) &&
      unicode::IsLeadSurrogate(chars[index - 1])) {
    index--;
  }
  return index;
}

static RegExpRunStatus ExecuteAtomImpl(RegExpShared* re,
                                       const JSLinearString* input,
                                       size_t start, MatchPairs* matches) {
  MOZ_ASSERT(re->pairCount() == 1);
  size_t length = input->length();
  size_t searchLength = re->patternAtom()->length();

  if (re->unicode() || re->unicodeSets()) {
    start = StepBackToLeadSurrogate(input, start);
  }

  if (re->sticky()) {
    // First part checks size_t overflow.
    if (searchLength + start < searchLength || searchLength + start > length) {
      return RegExpRunStatus::Success_NotFound;
    }
    if (!HasSubstringAt(input, re->patternAtom(), start)) {
      return RegExpRunStatus::Success_NotFound;
    }

    (*matches)[0].start = start;
    (*matches)[0].limit = start + searchLength;
    matches->checkAgainst(input->length());
    return RegExpRunStatus::Success;
  }

  int res = StringFindPattern(input, re->patternAtom(), start);
  if (res == -1) {
    return RegExpRunStatus::Success_NotFound;
  }

  (*matches)[0].start = res;
  (*matches)[0].limit = res + searchLength;
  matches->checkAgainst(input->length());
  return RegExpRunStatus::Success;
}

RegExpRunStatus js::ExecuteRegExpAtomRaw(RegExpShared* re,
                                         const JSLinearString* input,
                                         size_t start, MatchPairs* matchPairs) {
  AutoUnsafeCallWithABI unsafe;
  return ExecuteAtomImpl(re, input, start, matchPairs);
}

/* static */
RegExpRunStatus RegExpShared::executeAtom(MutableHandleRegExpShared re,
                                          Handle<JSLinearString*> input,
                                          size_t start,
                                          VectorMatchPairs* matches) {
  return ExecuteAtomImpl(re, input, start, matches);
}

size_t RegExpShared::sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf) {
  size_t n = 0;

  for (const auto& compilation : compilationArray) {
    if (compilation.byteCode) {
      n += mallocSizeOf(compilation.byteCode);
    }
  }

  n += tables.sizeOfExcludingThis(mallocSizeOf);
  for (size_t i = 0; i < tables.length(); i++) {
    n += mallocSizeOf(tables[i].get());
  }

  return n;
}

/* RegExpRealm */

RegExpRealm::RegExpRealm()
    : optimizableRegExpPrototypeShape_(nullptr),
      optimizableRegExpInstanceShape_(nullptr) {
  for (auto& shape : matchResultShapes_) {
    shape = nullptr;
  }
}

SharedShape* RegExpRealm::createMatchResultShape(JSContext* cx,
                                                 ResultShapeKind kind) {
  MOZ_ASSERT(!matchResultShapes_[kind]);

  /* Create template array object */
  Rooted<ArrayObject*> templateObject(cx, NewDenseEmptyArray(cx));
  if (!templateObject) {
    return nullptr;
  }

  if (kind == ResultShapeKind::Indices) {
    /* The |indices| array only has a |groups| property. */
    if (!NativeDefineDataProperty(cx, templateObject, cx->names().groups,
                                  UndefinedHandleValue, JSPROP_ENUMERATE)) {
      return nullptr;
    }
    MOZ_ASSERT(templateObject->getLastProperty().slot() == IndicesGroupsSlot);

    matchResultShapes_[kind].set(templateObject->sharedShape());
    return matchResultShapes_[kind];
  }

  /* Set dummy index property */
  if (!NativeDefineDataProperty(cx, templateObject, cx->names().index,
                                UndefinedHandleValue, JSPROP_ENUMERATE)) {
    return nullptr;
  }
  MOZ_ASSERT(templateObject->getLastProperty().slot() ==
             MatchResultObjectIndexSlot);

  /* Set dummy input property */
  if (!NativeDefineDataProperty(cx, templateObject, cx->names().input,
                                UndefinedHandleValue, JSPROP_ENUMERATE)) {
    return nullptr;
  }
  MOZ_ASSERT(templateObject->getLastProperty().slot() ==
             MatchResultObjectInputSlot);

  /* Set dummy groups property */
  if (!NativeDefineDataProperty(cx, templateObject, cx->names().groups,
                                UndefinedHandleValue, JSPROP_ENUMERATE)) {
    return nullptr;
  }
  MOZ_ASSERT(templateObject->getLastProperty().slot() ==
             MatchResultObjectGroupsSlot);

  if (kind == ResultShapeKind::WithIndices) {
    /* Set dummy indices property */
    if (!NativeDefineDataProperty(cx, templateObject, cx->names().indices,
                                  UndefinedHandleValue, JSPROP_ENUMERATE)) {
      return nullptr;
    }
    MOZ_ASSERT(templateObject->getLastProperty().slot() ==
               MatchResultObjectIndicesSlot);
  }

#ifdef DEBUG
  if (kind == ResultShapeKind::Normal) {
    MOZ_ASSERT(templateObject->numFixedSlots() == 0);
    MOZ_ASSERT(templateObject->numDynamicSlots() ==
               MatchResultObjectNumDynamicSlots);
    MOZ_ASSERT(templateObject->slotSpan() == MatchResultObjectSlotSpan);
  }
#endif

  matchResultShapes_[kind].set(templateObject->sharedShape());

  return matchResultShapes_[kind];
}

void RegExpRealm::trace(JSTracer* trc) {
  if (regExpStatics) {
    regExpStatics->trace(trc);
  }

  for (auto& shape : matchResultShapes_) {
    TraceNullableEdge(trc, &shape, "RegExpRealm::matchResultShapes_");
  }

  TraceNullableEdge(trc, &optimizableRegExpPrototypeShape_,
                    "RegExpRealm::optimizableRegExpPrototypeShape_");

  TraceNullableEdge(trc, &optimizableRegExpInstanceShape_,
                    "RegExpRealm::optimizableRegExpInstanceShape_");
}

RegExpShared* RegExpZone::get(JSContext* cx, Handle<JSAtom*> source,
                              RegExpFlags flags) {
  DependentAddPtr<Set> p(cx, set_, Key(source, flags));
  if (p) {
    return *p;
  }

  auto* shared = cx->newCell<RegExpShared>(source, flags);
  if (!shared) {
    return nullptr;
  }

  if (!p.add(cx, set_, Key(source, flags), shared)) {
    return nullptr;
  }

  return shared;
}

size_t RegExpZone::sizeOfIncludingThis(
    mozilla::MallocSizeOf mallocSizeOf) const {
  return mallocSizeOf(this) + set_.sizeOfExcludingThis(mallocSizeOf);
}

RegExpZone::RegExpZone(Zone* zone) : set_(zone, zone) {}

/* Functions */

JSObject* js::CloneRegExpObject(JSContext* cx, Handle<RegExpObject*> regex) {
  constexpr gc::AllocKind allocKind = RegExpObject::AllocKind;
  static_assert(gc::GetGCKindSlots(allocKind) == RegExpObject::RESERVED_SLOTS);
  MOZ_ASSERT(regex->asTenured().getAllocKind() == allocKind);

  Rooted<SharedShape*> shape(cx, regex->sharedShape());
  Rooted<RegExpObject*> clone(cx, NativeObject::create<RegExpObject>(
                                      cx, allocKind, gc::Heap::Default, shape));
  if (!clone) {
    return nullptr;
  }

  RegExpShared* shared = RegExpObject::getShared(cx, regex);
  if (!shared) {
    return nullptr;
  }

  clone->initAndZeroLastIndex(shared->getSource(), shared->getFlags(), cx);
  clone->setShared(shared);

  return clone;
}

template <typename CharT>
static bool ParseRegExpFlags(const CharT* chars, size_t length,
                             RegExpFlags* flagsOut, char16_t* invalidFlag) {
  *flagsOut = RegExpFlag::NoFlags;

  for (size_t i = 0; i < length; i++) {
    uint8_t flag;
    if (!JS::MaybeParseRegExpFlag(chars[i], &flag) || *flagsOut & flag) {
      *invalidFlag = chars[i];
      return false;
    }

    // /u and /v flags are mutually exclusive.
    if (((*flagsOut & RegExpFlag::Unicode) &&
         (flag & RegExpFlag::UnicodeSets)) ||
        ((*flagsOut & RegExpFlag::UnicodeSets) &&
         (flag & RegExpFlag::Unicode))) {
      *invalidFlag = chars[i];
      return false;
    }

    *flagsOut |= flag;
  }

  return true;
}

bool js::ParseRegExpFlags(JSContext* cx, JSString* flagStr,
                          RegExpFlags* flagsOut) {
  JSLinearString* linear = flagStr->ensureLinear(cx);
  if (!linear) {
    return false;
  }

  size_t len = linear->length();

  bool ok;
  char16_t invalidFlag;
  if (linear->hasLatin1Chars()) {
    AutoCheckCannotGC nogc;
    ok = ::ParseRegExpFlags(linear->latin1Chars(nogc), len, flagsOut,
                            &invalidFlag);
  } else {
    AutoCheckCannotGC nogc;
    ok = ::ParseRegExpFlags(linear->twoByteChars(nogc), len, flagsOut,
                            &invalidFlag);
  }

  if (!ok) {
    JS::TwoByteChars range(&invalidFlag, 1);
    UniqueChars utf8(JS::CharsToNewUTF8CharsZ(cx, range).c_str());
    if (!utf8) {
      return false;
    }
    JS_ReportErrorNumberUTF8(cx, GetErrorMessage, nullptr,
                             JSMSG_BAD_REGEXP_FLAG, utf8.get());
    return false;
  }

  return true;
}

JS::ubi::Node::Size JS::ubi::Concrete<RegExpShared>::size(
    mozilla::MallocSizeOf mallocSizeOf) const {
  return js::gc::Arena::thingSize(gc::AllocKind::REGEXP_SHARED) +
         get().sizeOfExcludingThis(mallocSizeOf);
}

/*
* Regular Expressions.
*/
JS_PUBLIC_API JSObject* JS::NewRegExpObject(JSContext* cx, const char* bytes,
                                            size_t length, RegExpFlags flags) {
  AssertHeapIsIdle();
  CHECK_THREAD(cx);

  UniqueTwoByteChars chars(InflateString(cx, bytes, length));
  if (!chars) {
    return nullptr;
  }

  return RegExpObject::create(cx, chars.get(), length, flags, GenericObject);
}

JS_PUBLIC_API JSObject* JS::NewUCRegExpObject(JSContext* cx,
                                              const char16_t* chars,
                                              size_t length,
                                              RegExpFlags flags) {
  AssertHeapIsIdle();
  CHECK_THREAD(cx);

  return RegExpObject::create(cx, chars, length, flags, GenericObject);
}

JS_PUBLIC_API bool JS::SetRegExpInput(JSContext* cx, HandleObject obj,
                                      HandleString input) {
  AssertHeapIsIdle();
  CHECK_THREAD(cx);
  cx->check(input);

  Handle<GlobalObject*> global = obj.as<GlobalObject>();
  RegExpStatics* res = GlobalObject::getRegExpStatics(cx, global);
  if (!res) {
    return false;
  }

  res->reset(input);
  return true;
}

JS_PUBLIC_API bool JS::ClearRegExpStatics(JSContext* cx, HandleObject obj) {
  AssertHeapIsIdle();
  CHECK_THREAD(cx);
  MOZ_ASSERT(obj);

  Handle<GlobalObject*> global = obj.as<GlobalObject>();
  RegExpStatics* res = GlobalObject::getRegExpStatics(cx, global);
  if (!res) {
    return false;
  }

  res->clear();
  return true;
}

JS_PUBLIC_API bool JS::ExecuteRegExp(JSContext* cx, HandleObject obj,
                                     HandleObject reobj, const char16_t* chars,
                                     size_t length, size_t* indexp, bool test,
                                     MutableHandleValue rval) {
  AssertHeapIsIdle();
  CHECK_THREAD(cx);

  Handle<GlobalObject*> global = obj.as<GlobalObject>();
  RegExpStatics* res = GlobalObject::getRegExpStatics(cx, global);
  if (!res) {
    return false;
  }

  Rooted<JSLinearString*> input(cx, NewStringCopyN<CanGC>(cx, chars, length));
  if (!input) {
    return false;
  }

  return ExecuteRegExpLegacy(cx, res, reobj.as<RegExpObject>(), input, indexp,
                             test, rval);
}

JS_PUBLIC_API bool JS::ExecuteRegExpNoStatics(JSContext* cx, HandleObject obj,
                                              const char16_t* chars,
                                              size_t length, size_t* indexp,
                                              bool test,
                                              MutableHandleValue rval) {
  AssertHeapIsIdle();
  CHECK_THREAD(cx);

  Rooted<JSLinearString*> input(cx, NewStringCopyN<CanGC>(cx, chars, length));
  if (!input) {
    return false;
  }

  return ExecuteRegExpLegacy(cx, nullptr, obj.as<RegExpObject>(), input, indexp,
                             test, rval);
}

JS_PUBLIC_API bool JS::ObjectIsRegExp(JSContext* cx, HandleObject obj,
                                      bool* isRegExp) {
  cx->check(obj);

  ESClass cls;
  if (!GetBuiltinClass(cx, obj, &cls)) {
    return false;
  }

  *isRegExp = cls == ESClass::RegExp;
  return true;
}

JS_PUBLIC_API RegExpFlags JS::GetRegExpFlags(JSContext* cx, HandleObject obj) {
  AssertHeapIsIdle();
  CHECK_THREAD(cx);

  RegExpShared* shared = RegExpToShared(cx, obj);
  if (!shared) {
    return RegExpFlag::NoFlags;
  }
  return shared->getFlags();
}

JS_PUBLIC_API JSString* JS::GetRegExpSource(JSContext* cx, HandleObject obj) {
  AssertHeapIsIdle();
  CHECK_THREAD(cx);

  RegExpShared* shared = RegExpToShared(cx, obj);
  if (!shared) {
    return nullptr;
  }
  return shared->getSource();
}

JS_PUBLIC_API bool JS::CheckRegExpSyntax(JSContext* cx, const char16_t* chars,
                                         size_t length, RegExpFlags flags,
                                         MutableHandleValue error) {
  AssertHeapIsIdle();
  CHECK_THREAD(cx);

  AutoReportFrontendContext fc(cx);
  CompileOptions dummyOptions(cx);
  frontend::DummyTokenStream dummyTokenStream(&fc, dummyOptions);

  LifoAllocScope allocScope(&cx->tempLifoAlloc());

  mozilla::Range<const char16_t> source(chars, length);
  bool success = irregexp::CheckPatternSyntax(
      cx->tempLifoAlloc(), cx->stackLimitForCurrentPrincipal(),
      dummyTokenStream, source, flags);
  error.set(UndefinedValue());
  if (!success) {
    if (!fc.convertToRuntimeErrorAndClear()) {
      return false;
    }
    // We can fail because of OOM or over-recursion even if the syntax is valid.
    if (cx->isThrowingOutOfMemory() || cx->isThrowingOverRecursed()) {
      return false;
    }

    if (!cx->getPendingException(error)) {
      return false;
    }
    cx->clearPendingException();
  }
  return true;
}

Messung V0.5 in Prozent

¤ Dauer der Verarbeitung: 0.16 Sekunden (vorverarbeitet am 2026-04-26) ¤

Wurzel

Suchen

Beweissystem der NASA

Beweissystem Isabelle

NIST Cobol Testsuite

Cephes Mathematical Library

Wiener Entwicklungsmethode

Haftungshinweis

Die Informationen auf dieser Webseite wurden nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit, noch Qualität der bereit gestellten Informationen zugesichert.

Bemerkung:

Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.