/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
class nsCycleCollectionTraversalCallback; class nsHtml5OwningUTF16Buffer; class nsHtml5Parser; class nsHtml5Speculation; class nsHtml5String; class nsHtml5Tokenizer; class nsHtml5TreeBuilder; class nsHtml5TreeOpExecutor; class nsIChannel; class nsIInputStream; class nsIRequest; class nsIRunnable; class nsITimer; class nsIURI;
namespace mozilla { class EncodingDetector; template <typename T> class Buffer;
namespace dom { class DocGroup;
}
} // namespace mozilla
enum eParserMode { /** * Parse a document normally as HTML.
*/
NORMAL,
/** * View document as HTML source.
*/
VIEW_SOURCE_HTML,
/** * View document as XML source
*/
VIEW_SOURCE_XML,
/** * View document as plain text source
*/
VIEW_SOURCE_PLAIN,
nsresult OnDataAvailable(nsIRequest* aRequest, nsIInputStream* aInStream,
uint64_t aSourceOffset, uint32_t aLength); /** * ReentrantMonitorAutoEnter is used for protecting access to * nsHtml5StreamParser::mOnStopCalled and should be obtained from * nsHtml5StreamListener::mDelegateMonitor
*/
nsresult OnStopRequest(
nsIRequest* aRequest, nsresult status, const mozilla::ReentrantMonitorAutoEnter& aProofOfLock);
/** * Post a runnable to the main thread to perform the speculative load * operations without performing the tree operations. * * This should be called at the end of each data available or stop * request runnable running on the parser thread.
*/ void PostLoadFlusher();
/** * Pass a buffer to chardetng.
*/ void FeedDetector(mozilla::Span<const uint8_t> aBuffer);
/** * Report EOF to chardetng.
*/ void DetectorEof();
/** * Call this method once you've created a parser, and want to instruct it * about what charset to load * * @param aEncoding the charset of a document * @param aCharsetSource the source of the charset
*/ inlinevoid SetDocumentCharset(NotNull<const Encoding*> aEncoding,
nsCharsetSource aSource, bool aForceAutoDetection) {
MOZ_ASSERT(mStreamState == STREAM_NOT_STARTED, "SetDocumentCharset called too late.");
MOZ_ASSERT(NS_IsMainThread(), "Wrong thread!");
MOZ_ASSERT(!(aForceAutoDetection && aSource >= kCharsetFromOtherComponent), "Can't force with high-ranking source.");
mEncoding = aEncoding;
mCharsetSource = aSource;
mForceAutoDetection = aForceAutoDetection;
mChannelHadCharset = (aSource == kCharsetFromChannel);
}
nsresult GetChannel(nsIChannel** aChannel);
/** * The owner parser must call this after script execution * when no scripts are executing and the document.written * buffer has been exhausted. * * If the first two arguments are nullptr, instead of * continuing after scripts, this method commits to an * internally-discovered encoding.
*/ void ContinueAfterScriptsOrEncodingCommitment(
nsHtml5Tokenizer* aTokenizer, nsHtml5TreeBuilder* aTreeBuilder, bool aLastWasCR);
/** * Continues the stream parser if the charset switch failed.
*/ void ContinueAfterFailedCharsetSwitch();
void Terminate() { mTerminated = true; }
void DropTimer();
/** * Sets the URL for View Source title in case this parser ends up being * used for View Source. If aURL is a view-source: URL, takes the inner * URL. data: URLs are shown with an ellipsis instead of the actual data.
*/ void SetViewSourceTitle(nsIURI* aURL);
/** * Marks the stream parser as interrupted. If you ever add calls to this * method, be sure to review Uninterrupt usage very, very carefully to * avoid having a previous in-flight runnable cancel your Interrupt() * call on the other thread too soon.
*/ void Interrupt() {
MOZ_ASSERT(NS_IsMainThread(), "Wrong thread!");
mInterrupted = true;
}
/** * True when there is a Unicode decoder already
*/ inlinebool HasDecoder() { return !!mUnicodeDecoder; }
/** * Returns 0 if 1) there aren't at least 2 buffers in mBufferedBytes * or 2) there is no byte '>' in the second buffer. * Otherwise, returns the length of the prefix of the second buffer * that is long enough to contain the first byte '>' in the second * buffer (including the '>' byte).
*/
size_t LengthOfLtContainingPrefixInSecondBuffer();
/** * Push bytes from network when there is no Unicode decoder yet
*/
nsresult SniffStreamBytes(mozilla::Span<const uint8_t> aFromSegment, bool aEof) MOZ_REQUIRES(mTokenizerMutex);
/** * Push bytes from network when there is a Unicode decoder already
*/
nsresult WriteStreamBytes(mozilla::Span<const uint8_t> aFromSegment)
MOZ_REQUIRES(mTokenizerMutex);
/** * Set up the Unicode decoder and write the sniffing buffer into it * followed by the current network buffer. * * @param aPrefix the part of the stream that has already been seen * prior to aFromSegment. In practice, these are the * bytes that are baked into the state of the BOM * and UTF-16 XML declaration-like sniffing state * machine state. * @param aFromSegment The current network buffer
*/
nsresult SetupDecodingAndWriteSniffingBufferAndCurrentSegment(
mozilla::Span<const uint8_t> aPrefix,
mozilla::Span<const uint8_t> aFromSegment) MOZ_REQUIRES(mTokenizerMutex);
/** * Initialize the Unicode decoder, mark the BOM as the source and * drop the sniffer. * * @param aDecoderCharsetName The name for the decoder's charset * (UTF-16BE, UTF-16LE or UTF-8; the BOM has * been swallowed)
*/ void SetupDecodingFromBom(NotNull<const Encoding*> aEncoding);
/** * When speculatively decoding from file: URL as UTF-8, commit * to UTF-8 as the non-speculative encoding and start processing * the decoded data.
*/
[[nodiscard]] nsresult CommitLocalFileToEncoding();
/** * When speculatively decoding from file: URL as UTF-8, redecode * using fallback and then continue normally with the fallback.
*/
[[nodiscard]] nsresult ReDecodeLocalFile() MOZ_REQUIRES(mTokenizerMutex);
/** * Potentially guess the encoding using mozilla::EncodingDetector. * Returns the guessed encoding and a telemetry-appropriate source.
*/
std::tuple<NotNull<const Encoding*>, nsCharsetSource> GuessEncoding( bool aInitial);
/** * Become confident or resolve and encoding name to its preferred form. * @param aEncoding the value of an internal encoding decl. Acts as an * out param, too, when the method returns true. * @return true if the parser needs to start using the new value of * aEncoding and false if the parser became confident or if * the encoding name did not specify a usable encoding
*/ const Encoding* PreferredForInternalEncodingDecl(const nsAString& aEncoding);
/** * Parser thread entry point for (maybe) flushing the ops and posting * a flush runnable back on the main thread.
*/ void TimerFlush();
/** * Called when speculation fails.
*/ void MaybeDisableFutureSpeculation() { mSpeculationFailureCount++; }
/** * Used to check whether we're getting too many speculation failures and * should just stop trying. The 100 is picked pretty randomly to be not too * small (so most pages are not affected) but small enough that we don't end * up with failed speculations over and over in pathological cases.
*/ bool IsSpeculationEnabled() { return mSpeculationFailureCount < 100; }
/** * Dispatch an event to a Quantum DOM main thread-ish thread. * (Not the parser thread.)
*/
nsresult DispatchToMain(already_AddRefed<nsIRunnable>&& aRunnable);
/** * Notify any devtools listeners about content newly received for parsing.
*/ inlinevoid OnNewContent(mozilla::Span<const char16_t> aData);
/** * Notify any devtools listeners after all parse content has been received.
*/ inlinevoid OnContentComplete();
nsCOMPtr<nsIRequest> mRequest;
/** * The document title to use if this turns out to be a View Source parser.
*/
nsCString mViewSourceTitle;
/** * The Unicode decoder
*/
mozilla::UniquePtr<mozilla::Decoder> mUnicodeDecoder;
/** * BOM sniffing state
*/
eBomState mBomState;
// encoding-related stuff /** * The source (confidence) of the character encoding in use
*/
nsCharsetSource mCharsetSource;
nsCharsetSource mEncodingSwitchSource;
/** * The character encoding in use
*/
NotNull<const Encoding*> mEncoding;
const Encoding* mNeedsEncodingSwitchTo;
bool mSeenEligibleMetaCharset;
bool mChardetEof;
#ifdef DEBUG
bool mStartedFeedingDetector;
bool mStartedFeedingDevTools;
#endif
/** * Whether reparse is forbidden
*/ bool mReparseForbidden;
/** * Whether the Repair Text Encoding menu item was invoked
*/ bool mForceAutoDetection;
/** * Whether there was a valid charset parameter on the HTTP layer.
*/ bool mChannelHadCharset;
/** * We are in the process of looking for <meta charset>
*/ bool mLookingForMetaCharset;
/** * Whether the byte stream started with ASCII <?
*/ bool mStartsWithLtQuestion;
/** * If we are viewing XML source and are waiting for a '>' form the network.
*/ bool mLookingForXmlDeclarationForXmlViewSource;
/** * Whether template has been pushed or head popped within the first 1024 * bytes.
*/ bool mTemplatePushedOrHeadPopped;
// Portable parser objects /** * The first buffer in the pending UTF-16 buffer queue
*/
RefPtr<nsHtml5OwningUTF16Buffer> mFirstBuffer;
/** * Non-owning pointer to the most recent buffer that contains the most recent * remembered greater-than sign. Used only while mLookingForMetaCharset is * true. While mLookingForMetaCharset is true, mFirstBuffer is not changed and * keeps the whole linked list of buffers alive. This pointer is non-owning to * avoid frequent refcounting.
*/
nsHtml5OwningUTF16Buffer* mGtBuffer;
int32_t mGtPos;
/** * The last buffer in the pending UTF-16 buffer queue
*/
nsHtml5OwningUTF16Buffer*
mLastBuffer; // weak ref; always points to // a buffer of the size // NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE
/** * The first buffer of the document if looking for <meta charset> or * nullptr afterwards.
*/
RefPtr<nsHtml5OwningUTF16Buffer> mFirstBufferOfMetaScan;
/** * The tree operation executor
*/
nsHtml5TreeOpExecutor* mExecutor;
/** * The HTML5 tree builder
*/
mozilla::UniquePtr<nsHtml5TreeBuilder> mTreeBuilder;
/** * The HTML5 tokenizer
*/
mozilla::UniquePtr<nsHtml5Tokenizer> mTokenizer;
/** * Makes sure the main thread can't mess the tokenizer state while it's * tokenizing. This mutex also protects the current speculation.
*/
mozilla::Mutex mTokenizerMutex;
/** * The scoped atom table
*/
nsHtml5AtomTable mAtomTable;
/** * The owner parser.
*/
RefPtr<nsHtml5Parser> mOwner;
/** * Whether the last character tokenized was a carriage return (for CRLF)
*/ bool mLastWasCR;
/** * For tracking stream life cycle
*/
eHtml5StreamState mStreamState;
/** * Whether we are speculating.
*/ bool mSpeculating;
/** * Whether the tokenizer has reached EOF. (Reset when stream rewinded.)
*/ bool mAtEOF;
/** * The speculations. The mutex protects the nsTArray itself. * To access the queue of current speculation, mTokenizerMutex must be * obtained. * The current speculation is the last element
*/
nsTArray<mozilla::UniquePtr<nsHtml5Speculation>> mSpeculations;
mozilla::Mutex mSpeculationMutex;
/** * Number of times speculation has failed for this parser.
*/
mozilla::Atomic<uint32_t> mSpeculationFailureCount;
/** * Number of bytes already buffered into mBufferedBytes.
*/
uint32_t mNumBytesBuffered;
/** * True to terminate early.
*/
mozilla::Atomic<bool> mTerminated;
/** * True to release mTokenizerMutex early.
*/
mozilla::Atomic<bool> mInterrupted;
/** * The thread this stream parser runs on.
*/
nsCOMPtr<nsISerialEventTarget> mEventTarget;
nsCOMPtr<nsIRunnable> mExecutorFlusher;
nsCOMPtr<nsIRunnable> mLoadFlusher;
/** * This runnable is distinct from the regular flushers to * signal the intent of encoding commitment without having to * protect mPendingEncodingCommitment in the executer with a * mutex.
*/
nsCOMPtr<nsIRunnable> mEncodingCommitter;
/** * The generict detector.
*/
mozilla::UniquePtr<mozilla::EncodingDetector> mDetector;
/** * The TLD we're loading from or empty if unknown.
*/
nsCString mTLD;
/** * Whether the initial charset source was kCharsetFromParentFrame
*/ bool mInitialEncodingWasFromParentFrame;
bool mHasHadErrors;
bool mDetectorHasSeenNonAscii;
/** * If true, we are decoding a local file that lacks an encoding * declaration and we are not tokenizing yet.
*/ bool mDecodingLocalFileWithoutTokenizing;
/** * Whether we are keeping the incoming bytes.
*/ bool mBufferingBytes;
/** * Timer for flushing tree ops once in a while when not speculating.
*/
nsCOMPtr<nsITimer> mFlushTimer;
/** * Mutex for protecting access to mFlushTimer (but not for the two * mFlushTimerFoo booleans below).
*/
mozilla::Mutex mFlushTimerMutex;
/** * Keeps track whether mFlushTimer has been armed. Unfortunately, * nsITimer doesn't enable querying this from the timer itself.
*/ bool mFlushTimerArmed;
/** * False initially and true after the timer has fired at least once.
*/ bool mFlushTimerEverFired;
/** * Whether the parser is doing a normal parse, view source or plain text.
*/
eParserMode mMode;
/** * If the associated docshell is being watched by the devtools, this is * set to the URI associated with the parse. All parse data is sent to the * devtools, along with this URI. This URI is cleared out after the parse has * been marked as completed.
*/
nsCOMPtr<nsIURI> mURIToSendToDevtools;
/** * If content is being sent to the devtools, an encoded UUID for the parser.
*/
nsString mUUIDForDevtools;
/** * prevent multiple calls to OnStopRequest * This field can be called from multiple threads and is protected by * nsHtml5StreamListener::mDelegateMonitor passed in the OnStopRequest
*/ bool mOnStopCalled{false};
/* * Used for telemetry about OnStopRequest vs OnDataFinished
*/ // guarded by nsHtml5StreamListener::mDelegateMonitor
mozilla::TimeStamp mOnStopRequestTime;
mozilla::TimeStamp mOnDataFinishedTime;
};
#endif// nsHtml5StreamParser_h
¤ Dauer der Verarbeitung: 0.21 Sekunden
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung ist noch experimentell.