// Copyright 2019 the V8 project authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file.
class AlternativeGenerationList; class BoyerMooreLookahead; class GreedyLoopState; class NodeVisitor; class QuickCheckDetails; class RegExpCompiler; class SeqRegExpNode; class Trace; struct PreloadState;
// Returns true if the interests and assumptions of this node // matches the given one. bool Matches(NodeInfo* that) { return (at_end == that->at_end) &&
(follows_word_interest == that->follows_word_interest) &&
(follows_newline_interest == that->follows_newline_interest) &&
(follows_start_interest == that->follows_start_interest);
}
// Updates the interests of this node given the interests of the // node preceding it. void AddFromPreceding(NodeInfo* that) {
at_end |= that->at_end;
follows_word_interest |= that->follows_word_interest;
follows_newline_interest |= that->follows_newline_interest;
follows_start_interest |= that->follows_start_interest;
}
// Sets the interests of this node to include the interests of the // following node. void AddFromFollowing(NodeInfo* that) {
follows_word_interest |= that->follows_word_interest;
follows_newline_interest |= that->follows_newline_interest;
follows_start_interest |= that->follows_start_interest;
}
// These bits are set of this node has to know what the preceding // character was. bool follows_word_interest : 1; bool follows_newline_interest : 1; bool follows_start_interest : 1;
// Any successful match starting from the current node will consume at least // this many characters. This does not necessarily mean that there is a // possible match with exactly this many characters, but we generally try to // get this number as high as possible to allow for early exit on failure.
uint8_t eats_at_least_from_possibly_start;
// Like eats_at_least_from_possibly_start, but with the additional assumption // that start-of-string assertions (^) can't match. This value is greater than // or equal to eats_at_least_from_possibly_start.
uint8_t eats_at_least_from_not_start;
};
class RegExpNode : public ZoneObject { public: explicit RegExpNode(Zone* zone)
: replacement_(nullptr),
on_work_list_(false),
trace_count_(0),
zone_(zone) {
bm_info_[0] = bm_info_[1] = nullptr;
} virtual ~RegExpNode(); virtualvoid Accept(NodeVisitor* visitor) = 0; // Generates a goto to this node or actually generates the code at this point. virtualvoid Emit(RegExpCompiler* compiler, Trace* trace) = 0; // How many characters must this node consume at a minimum in order to // succeed. The not_at_start argument is used to indicate that we know we are // not at the start of the input. In this case anchored branches will always // fail and can be ignored when determining how many characters are consumed // on success. If this node has not been analyzed yet, EatsAtLeast returns 0.
uint32_t EatsAtLeast(bool not_at_start); // Returns how many characters this node must consume in order to succeed, // given that this is a LoopChoiceNode whose counter register is in a // newly-initialized state at the current position in the generated code. For // example, consider /a{6,8}/. Absent any extra information, the // LoopChoiceNode for the repetition must report that it consumes at least // zero characters, because it may have already looped several times. However, // with a newly-initialized counter, it can report that it consumes at least // six characters. virtual EatsAtLeastInfo EatsAtLeastFromLoopEntry(); // Emits some quick code that checks whether the preloaded characters match. // Falls through on certain failure, jumps to the label on possible success. // If the node cannot make a quick check it does nothing and returns false. bool EmitQuickCheck(RegExpCompiler* compiler, Trace* bounds_check_trace,
Trace* trace, bool preload_has_checked_bounds,
Label* on_possible_success,
QuickCheckDetails* details_return, bool fall_through_on_failure, ChoiceNode* predecessor); // For a given number of characters this returns a mask and a value. The // next n characters are anded with the mask and compared with the value. // A comparison failure indicates the node cannot match the next n characters. // A comparison success indicates the node may match. virtualvoid GetQuickCheckDetails(QuickCheckDetails* details,
RegExpCompiler* compiler, int characters_filled_in, bool not_at_start) = 0; // Fills in quick check details for this node, given that this is a // LoopChoiceNode whose counter register is in a newly-initialized state at // the current position in the generated code. For example, consider /a{6,8}/. // Absent any extra information, the LoopChoiceNode for the repetition cannot // generate any useful quick check because a match might be the (empty) // continuation node. However, with a newly-initialized counter, it can // generate a quick check for several 'a' characters at once. virtualvoid GetQuickCheckDetailsFromLoopEntry(QuickCheckDetails* details,
RegExpCompiler* compiler, int characters_filled_in, bool not_at_start); staticconstint kNodeIsTooComplexForGreedyLoops = kMinInt; virtualint GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; } // Only returns the successor for a text node of length 1 that matches any // character and that has no guards on it. virtual RegExpNode* GetSuccessorOfOmnivorousTextNode(
RegExpCompiler* compiler) { return nullptr;
}
// Collects information on the possible code units (mod 128) that can match if // we look forward. This is used for a Boyer-Moore-like string searching // implementation. TODO(erikcorry): This should share more code with // EatsAtLeast, GetQuickCheckDetails. The budget argument is used to limit // the number of nodes we are willing to look at in order to create this data. staticconstint kRecursionBudget = 200; bool KeepRecursing(RegExpCompiler* compiler); virtualvoid FillInBMInfo(Isolate* isolate, int offset, int budget,
BoyerMooreLookahead* bm, bool not_at_start) {
UNREACHABLE();
}
// If we know that the input is one-byte then there are some nodes that can // never match. This method returns a node that can be substituted for // itself, or nullptr if the node can never match. virtual RegExpNode* FilterOneByte(int depth, RegExpCompiler* compiler) { returnthis;
} // Helper for FilterOneByte.
RegExpNode* replacement() {
DCHECK(info()->replacement_calculated); return replacement_;
}
RegExpNode* set_replacement(RegExpNode* replacement) {
info()->replacement_calculated = true;
replacement_ = replacement; return replacement; // For convenience.
}
// We want to avoid recalculating the lookahead info, so we store it on the // node. Only info that is for this node is stored. We can tell that the // info is for this node when offset == 0, so the information is calculated // relative to this node. void SaveBMInfo(BoyerMooreLookahead* bm, bool not_at_start, int offset) { if (offset == 0) set_bm_info(not_at_start, bm);
}
Label* label() { return &label_; } // If non-generic code is generated for a node (i.e. the node is not at the // start of the trace) then it cannot be reused. This variable sets a limit // on how often we allow that to happen before we insist on starting a new // trace and generating generic code for a node that can be reused by flushing // the deferred actions in the current trace and generating a goto. staticconstint kMaxCopiesCodeGenerated = 10;
// TODO(v8:10441): This is a hacky way to avoid exponential code size growth // for very large choice nodes that can be generated by unicode property // escapes. In order to avoid inlining (i.e. trace recursion), we pretend to // have generated the maximum count of code copies already. // We should instead fix this properly, e.g. by using the code size budget // (flush_budget) or by generating property escape matches as calls to a C // function. void SetDoNotInline() { trace_count_ = kMaxCopiesCodeGenerated; }
// Saved values for EatsAtLeast results, to avoid recomputation. Filled in // during analysis (valid if info_.been_analyzed is true).
EatsAtLeastInfo eats_at_least_;
// This variable keeps track of how many times code has been generated for // this node (in different traces). We don't keep track of where the // generated code is located unless the code is generated at the start of // a trace, in which case it is generic and can be reused by flushing the // deferred operations in the current trace and generating a goto. int trace_count_;
BoyerMooreLookahead* bm_info_[2];
private: union { struct { int reg; int value;
} u_store_register; struct { int reg;
} u_increment_register; struct { int reg; bool is_capture;
} u_position_register; struct { int stack_pointer_register; int current_position_register; int clear_register_count; int clear_register_from;
ActionNode* success_node; // Only used for positive submatch.
} u_submatch; struct { int start_register; int repetition_register; int repetition_limit;
} u_empty_match_check; struct { int range_from; int range_to;
} u_clear_captures; struct { int flags;
} u_modify_flags;
} data_;
class TextNode : public SeqRegExpNode { public:
TextNode(ZoneList<TextElement>* elms, bool read_backward,
RegExpNode* on_success)
: SeqRegExpNode(on_success), elms_(elms), read_backward_(read_backward) {}
TextNode(RegExpClassRanges* that, bool read_backward, RegExpNode* on_success)
: SeqRegExpNode(on_success),
elms_(zone()->New<ZoneList<TextElement>>(1, zone())),
read_backward_(read_backward) {
elms_->Add(TextElement::ClassRanges(that), zone());
} // Create TextNode for a single character class for the given ranges. static TextNode* CreateForCharacterRanges(Zone* zone,
ZoneList<CharacterRange>* ranges, bool read_backward,
RegExpNode* on_success); // Create TextNode for a surrogate pair (i.e. match a sequence of two uc16 // code unit ranges). static TextNode* CreateForSurrogatePair(
Zone* zone, CharacterRange lead, ZoneList<CharacterRange>* trail_ranges, bool read_backward, RegExpNode* on_success); static TextNode* CreateForSurrogatePair(Zone* zone,
ZoneList<CharacterRange>* lead_ranges,
CharacterRange trail, bool read_backward,
RegExpNode* on_success);
TextNode* AsTextNode() override { returnthis; } void Accept(NodeVisitor* visitor) override; void Emit(RegExpCompiler* compiler, Trace* trace) override; void GetQuickCheckDetails(QuickCheckDetails* details,
RegExpCompiler* compiler, int characters_filled_in, bool not_at_start) override;
ZoneList<TextElement>* elements() { return elms_; } bool read_backward() { return read_backward_; } void MakeCaseIndependent(Isolate* isolate, bool is_one_byte,
RegExpFlags flags); int GreedyLoopTextLength() override;
RegExpNode* GetSuccessorOfOmnivorousTextNode(
RegExpCompiler* compiler) override; void FillInBMInfo(Isolate* isolate, int offset, int budget,
BoyerMooreLookahead* bm, bool not_at_start) override; void CalculateOffsets();
RegExpNode* FilterOneByte(int depth, RegExpCompiler* compiler) override; int Length();
private: enum TextEmitPassType {
NON_LATIN1_MATCH, // Check for characters that can never match.
SIMPLE_CHARACTER_MATCH, // Case-dependent single character check.
NON_LETTER_CHARACTER_MATCH, // Check characters that have no case equivs.
CASE_CHARACTER_MATCH, // Case-independent single character check.
CHARACTER_CLASS_MATCH // Character class.
}; void TextEmitPass(RegExpCompiler* compiler, TextEmitPassType pass, bool preloaded, Trace* trace, bool first_element_checked, int* checked_up_to);
ZoneList<TextElement>* elms_; bool read_backward_;
};
// If true, this node is never checked at the start of the input. // Allows a new trace to start with at_start() set to false. bool not_at_start_; bool being_calculated_;
};
class NegativeLookaroundChoiceNode : public ChoiceNode { public: explicit NegativeLookaroundChoiceNode(GuardedAlternative this_must_fail,
GuardedAlternative then_do_this,
Zone* zone)
: ChoiceNode(2, zone) {
AddAlternative(this_must_fail);
AddAlternative(then_do_this);
} void GetQuickCheckDetails(QuickCheckDetails* details,
RegExpCompiler* compiler, int characters_filled_in, bool not_at_start) override; void FillInBMInfo(Isolate* isolate, int offset, int budget,
BoyerMooreLookahead* bm, bool not_at_start) override {
continue_node()->FillInBMInfo(isolate, offset, budget - 1, bm,
not_at_start); if (offset == 0) set_bm_info(not_at_start, bm);
} static constexpr int kLookaroundIndex = 0; static constexpr int kContinueIndex = 1;
RegExpNode* lookaround_node() { return alternatives()->at(kLookaroundIndex).node();
}
RegExpNode* continue_node() { return alternatives()->at(kContinueIndex).node();
} // For a negative lookahead we don't emit the quick check for the // alternative that is expected to fail. This is because quick check code // starts by loading enough characters for the alternative that takes fewest // characters, but on a negative lookahead the negative branch did not take // part in that calculation (EatsAtLeast) so the assumptions don't hold. bool try_to_emit_quick_check_for_alternative(bool is_first) override { return !is_first;
}
NegativeLookaroundChoiceNode* AsNegativeLookaroundChoiceNode() override { returnthis;
} void Accept(NodeVisitor* visitor) override;
RegExpNode* FilterOneByte(int depth, RegExpCompiler* compiler) override;
};
private: // AddAlternative is made private for loop nodes because alternatives // should not be added freely, we need to keep track of which node // goes back to the node itself. void AddAlternative(GuardedAlternative node) {
ChoiceNode::AddAlternative(node);
}
// Temporary marker set only while generating quick check details. Represents // whether GetQuickCheckDetails traversed the initialization node for this // loop's counter. If so, we may be able to generate stricter quick checks // because we know the loop node must match at least min_loop_iterations_ // times before the continuation node can match. bool traversed_loop_initialization_node_;
// The minimum number of times the loop_node_ must match before the // continue_node_ might be considered. This value can be temporarily decreased // while generating quick check details, to represent the remaining iterations // after the completed portion of the quick check details. int min_loop_iterations_;
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.