/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* vim: set sw=2 ts=2 et tw=80: */ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
/* * Note that nsHtml5StreamParser implements cycle collecting AddRef and * Release. Therefore, nsHtml5StreamParser must never be refcounted from * the parser thread! * * To work around this limitation, runnables posted by the main thread to the * parser thread hold their reference to the stream parser in an * nsHtml5StreamParserPtr. Upon creation, nsHtml5StreamParserPtr addrefs the * object it holds * just like a regular nsRefPtr. This is OK, since the creation of the * runnable and the nsHtml5StreamParserPtr happens on the main thread. * * When the runnable is done on the parser thread, the destructor of * nsHtml5StreamParserPtr runs there. It doesn't call Release on the held object * directly. Instead, it posts another runnable back to the main thread where * that runnable calls Release on the wrapped object. * * When posting runnables in the other direction, the runnables have to be * created on the main thread when nsHtml5StreamParser is instantiated and * held for the lifetime of the nsHtml5StreamParser. This works, because the * same runnabled can be dispatched multiple times and currently runnables * posted from the parser thread to main thread don't need to wrap any * runnable-specific data. (In the other direction, the runnables most notably * wrap the byte data of the stream.)
*/
NS_IMPL_CYCLE_COLLECTING_ADDREF(nsHtml5StreamParser)
NS_IMPL_CYCLE_COLLECTING_RELEASE(nsHtml5StreamParser)
NS_IMPL_CYCLE_COLLECTION_TRAVERSE_BEGIN(nsHtml5StreamParser)
NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mRequest)
NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mOwner) // hack: count the strongly owned edge wrapped in the runnable if (tmp->mExecutorFlusher) {
NS_CYCLE_COLLECTION_NOTE_EDGE_NAME(cb, "mExecutorFlusher->mExecutor");
cb.NoteXPCOMChild(static_cast<nsIContentSink*>(tmp->mExecutor));
} // hack: count the strongly owned edge wrapped in the runnable if (tmp->mLoadFlusher) {
NS_CYCLE_COLLECTION_NOTE_EDGE_NAME(cb, "mLoadFlusher->mExecutor");
cb.NoteXPCOMChild(static_cast<nsIContentSink*>(tmp->mExecutor));
}
NS_IMPL_CYCLE_COLLECTION_TRAVERSE_END
class nsHtml5ExecutorFlusher : public Runnable { private:
RefPtr<nsHtml5TreeOpExecutor> mExecutor;
public: explicit nsHtml5ExecutorFlusher(nsHtml5TreeOpExecutor* aExecutor)
: Runnable("nsHtml5ExecutorFlusher"), mExecutor(aExecutor) {}
NS_IMETHOD Run() override { if (!mExecutor->isInList()) {
Document* doc = mExecutor->GetDocument(); if (XRE_IsContentProcess() &&
nsContentUtils::
HighPriorityEventPendingForTopLevelDocumentBeforeContentfulPaint(
doc)) { // Possible early paint pending, reuse the runnable and try to // call RunFlushLoop later.
nsCOMPtr<nsIRunnable> flusher = this; if (NS_SUCCEEDED(doc->Dispatch(flusher.forget()))) {
PROFILER_MARKER_UNTYPED("HighPrio blocking parser flushing(1)", DOM); return NS_OK;
}
}
mExecutor->RunFlushLoop();
} return NS_OK;
}
};
class nsHtml5LoadFlusher : public Runnable { private:
RefPtr<nsHtml5TreeOpExecutor> mExecutor;
// mEncoding and mCharsetSource potentially have come from channel or higher // by now. If we find a BOM, SetupDecodingFromBom() will overwrite them. // If we don't find a BOM, the previously set values of mEncoding and // mCharsetSource are not modified by the BOM sniffing here. static uint8_t utf8[] = {0xEF, 0xBB}; static uint8_t utf16le[] = {0xFF}; static uint8_t utf16be[] = {0xFE}; static uint8_t utf16leXml[] = {'<', 0x00, '?', 0x00, 'x'}; static uint8_t utf16beXml[] = {0x00, '<', 0x00, '?', 0x00}; // Buffer for replaying past bytes based on state machine state. If // writing this from scratch, probably wouldn't do it this way, but // let's keep the changes to a minimum. const uint8_t* prefix = utf8;
size_t prefixLength = 0; if (aEof && mBomState == BOM_SNIFFING_NOT_STARTED) { // Avoid handling aEof in the BOM_SNIFFING_NOT_STARTED state below.
mBomState = BOM_SNIFFING_OVER;
} for (size_t i = 0;
(i < aFromSegment.Length() && mBomState != BOM_SNIFFING_OVER) || aEof;
i++) { switch (mBomState) { case BOM_SNIFFING_NOT_STARTED:
MOZ_ASSERT(i == 0, "Bad BOM sniffing state.");
MOZ_ASSERT(!aEof, "Should have checked for aEof above!"); switch (aFromSegment[0]) { case 0xEF:
mBomState = SEEN_UTF_8_FIRST_BYTE; break; case 0xFF:
mBomState = SEEN_UTF_16_LE_FIRST_BYTE; break; case 0xFE:
mBomState = SEEN_UTF_16_BE_FIRST_BYTE; break; case 0x00: if (mCharsetSource < kCharsetFromXmlDeclarationUtf16 &&
mCharsetSource != kCharsetFromChannel) {
mBomState = SEEN_UTF_16_BE_XML_FIRST;
} else {
mBomState = BOM_SNIFFING_OVER;
} break; case'<': if (mCharsetSource < kCharsetFromXmlDeclarationUtf16 &&
mCharsetSource != kCharsetFromChannel) {
mBomState = SEEN_UTF_16_LE_XML_FIRST;
} else {
mBomState = BOM_SNIFFING_OVER;
} break; default:
mBomState = BOM_SNIFFING_OVER; break;
} break; case SEEN_UTF_16_LE_FIRST_BYTE: if (!aEof && aFromSegment[i] == 0xFE) {
SetupDecodingFromBom(UTF_16LE_ENCODING); return WriteStreamBytes(aFromSegment.From(i + 1));
}
prefix = utf16le;
prefixLength = 1 - i;
mBomState = BOM_SNIFFING_OVER; break; case SEEN_UTF_16_BE_FIRST_BYTE: if (!aEof && aFromSegment[i] == 0xFF) {
SetupDecodingFromBom(UTF_16BE_ENCODING); return WriteStreamBytes(aFromSegment.From(i + 1));
}
prefix = utf16be;
prefixLength = 1 - i;
mBomState = BOM_SNIFFING_OVER; break; case SEEN_UTF_8_FIRST_BYTE: if (!aEof && aFromSegment[i] == 0xBB) {
mBomState = SEEN_UTF_8_SECOND_BYTE;
} else {
prefixLength = 1 - i;
mBomState = BOM_SNIFFING_OVER;
} break; case SEEN_UTF_8_SECOND_BYTE: if (!aEof && aFromSegment[i] == 0xBF) {
SetupDecodingFromBom(UTF_8_ENCODING); return WriteStreamBytes(aFromSegment.From(i + 1));
}
prefixLength = 2 - i;
mBomState = BOM_SNIFFING_OVER; break; case SEEN_UTF_16_BE_XML_FIRST: if (!aEof && aFromSegment[i] == '<') {
mBomState = SEEN_UTF_16_BE_XML_SECOND;
} else {
prefix = utf16beXml;
prefixLength = 1 - i;
mBomState = BOM_SNIFFING_OVER;
} break; case SEEN_UTF_16_BE_XML_SECOND: if (!aEof && aFromSegment[i] == 0x00) {
mBomState = SEEN_UTF_16_BE_XML_THIRD;
} else {
prefix = utf16beXml;
prefixLength = 2 - i;
mBomState = BOM_SNIFFING_OVER;
} break; case SEEN_UTF_16_BE_XML_THIRD: if (!aEof && aFromSegment[i] == '?') {
mBomState = SEEN_UTF_16_BE_XML_FOURTH;
} else {
prefix = utf16beXml;
prefixLength = 3 - i;
mBomState = BOM_SNIFFING_OVER;
} break; case SEEN_UTF_16_BE_XML_FOURTH: if (!aEof && aFromSegment[i] == 0x00) {
mBomState = SEEN_UTF_16_BE_XML_FIFTH;
} else {
prefix = utf16beXml;
prefixLength = 4 - i;
mBomState = BOM_SNIFFING_OVER;
} break; case SEEN_UTF_16_BE_XML_FIFTH: if (!aEof && aFromSegment[i] == 'x') {
SetupDecodingFromUtf16BogoXml(UTF_16BE_ENCODING); return WriteStreamBytes(aFromSegment.From(i + 1));
}
prefix = utf16beXml;
prefixLength = 5 - i;
mBomState = BOM_SNIFFING_OVER; break; case SEEN_UTF_16_LE_XML_FIRST: if (!aEof && aFromSegment[i] == 0x00) {
mBomState = SEEN_UTF_16_LE_XML_SECOND;
} else { if (!aEof && aFromSegment[i] == '?' &&
!(mMode == PLAIN_TEXT || mMode == VIEW_SOURCE_PLAIN)) {
mStartsWithLtQuestion = true;
}
prefix = utf16leXml;
prefixLength = 1 - i;
mBomState = BOM_SNIFFING_OVER;
} break; case SEEN_UTF_16_LE_XML_SECOND: if (!aEof && aFromSegment[i] == '?') {
mBomState = SEEN_UTF_16_LE_XML_THIRD;
} else {
prefix = utf16leXml;
prefixLength = 2 - i;
mBomState = BOM_SNIFFING_OVER;
} break; case SEEN_UTF_16_LE_XML_THIRD: if (!aEof && aFromSegment[i] == 0x00) {
mBomState = SEEN_UTF_16_LE_XML_FOURTH;
} else {
prefix = utf16leXml;
prefixLength = 3 - i;
mBomState = BOM_SNIFFING_OVER;
} break; case SEEN_UTF_16_LE_XML_FOURTH: if (!aEof && aFromSegment[i] == 'x') {
mBomState = SEEN_UTF_16_LE_XML_FIFTH;
} else {
prefix = utf16leXml;
prefixLength = 4 - i;
mBomState = BOM_SNIFFING_OVER;
} break; case SEEN_UTF_16_LE_XML_FIFTH: if (!aEof && aFromSegment[i] == 0x00) {
SetupDecodingFromUtf16BogoXml(UTF_16LE_ENCODING); return WriteStreamBytes(aFromSegment.From(i + 1));
}
prefix = utf16leXml;
prefixLength = 5 - i;
mBomState = BOM_SNIFFING_OVER; break; default:
mBomState = BOM_SNIFFING_OVER; break;
} if (aEof) { break;
}
} // if we get here, there either was no BOM or the BOM sniffing isn't complete // yet
MOZ_ASSERT(mCharsetSource != kCharsetFromByteOrderMark, "Should not come here if BOM was found.");
MOZ_ASSERT(mCharsetSource != kCharsetFromXmlDeclarationUtf16, "Should not come here if UTF-16 bogo-XML declaration was found.");
MOZ_ASSERT(mCharsetSource != kCharsetFromOtherComponent, "kCharsetFromOtherComponent is for XSLT.");
if (mBomState == BOM_SNIFFING_OVER) { if (mMode == VIEW_SOURCE_XML && mStartsWithLtQuestion &&
mCharsetSource < kCharsetFromChannel) { // Sniff for XML declaration only.
MOZ_ASSERT(!mLookingForXmlDeclarationForXmlViewSource);
MOZ_ASSERT(!aEof);
MOZ_ASSERT(!mLookingForMetaCharset);
MOZ_ASSERT(!mDecodingLocalFileWithoutTokenizing); // Maybe we've already buffered a '>'.
MOZ_ASSERT(!mBufferedBytes.IsEmpty(), "How did at least not get buffered?");
Buffer<uint8_t>& first = mBufferedBytes[0]; const Encoding* encoding =
xmldecl_parse(first.Elements(), first.Length()); if (encoding) {
mEncoding = WrapNotNull(encoding);
mCharsetSource = kCharsetFromXmlDeclaration;
} elseif (memchr(first.Elements(), '>', first.Length())) { // There was a '>', but an encoding still wasn't found.
; // fall through to commit to the UTF-8 default.
} elseif (size_t lengthOfPrefix =
LengthOfLtContainingPrefixInSecondBuffer()) { // This can only happen if the first buffer was a lone '<', because // we come here upon seeing the second byte '?' if the first two bytes // were "<?". That is, the only way how we aren't dealing with the first // buffer is if the first buffer only contained a single '<' and we are // dealing with the second buffer that starts with '?'.
MOZ_ASSERT(first.Length() == 1);
MOZ_ASSERT(mBufferedBytes[1][0] == '?'); // Our scanner for XML declaration-like syntax wants to see a contiguous // buffer, so let's linearize the data. (Ideally, the XML declaration // scanner would be incremental, but this is the rare path anyway.)
Vector<uint8_t> contiguous; if (!contiguous.append(first.Elements(), first.Length())) {
MarkAsBroken(NS_ERROR_OUT_OF_MEMORY); return NS_ERROR_OUT_OF_MEMORY;
} if (!contiguous.append(mBufferedBytes[1].Elements(), lengthOfPrefix)) {
MarkAsBroken(NS_ERROR_OUT_OF_MEMORY); return NS_ERROR_OUT_OF_MEMORY;
}
encoding = xmldecl_parse(contiguous.begin(), contiguous.length()); if (encoding) {
mEncoding = WrapNotNull(encoding);
mCharsetSource = kCharsetFromXmlDeclaration;
} // else no XML decl, commit to the UTF-8 default.
} else {
MOZ_ASSERT(mBufferingBytes);
mLookingForXmlDeclarationForXmlViewSource = true; return NS_OK;
}
} elseif (mMode != VIEW_SOURCE_XML &&
(mForceAutoDetection || mCharsetSource < kCharsetFromChannel)) { // In order to use the buffering logic for meta with mForceAutoDetection, // we set mLookingForMetaCharset but still actually potentially ignore the // meta.
mFirstBufferOfMetaScan = mFirstBuffer;
MOZ_ASSERT(mLookingForMetaCharset);
if (mMode == VIEW_SOURCE_HTML) { auto r = mTokenizer->FlushViewSource(); if (r.isErr()) { return r.unwrapErr();
}
} auto r = mTreeBuilder->Flush(); if (r.isErr()) { return r.unwrapErr();
} // Encoding committer flushes the ops on the main thread.
mozilla::MutexAutoLock speculationAutoLock(mSpeculationMutex);
nsHtml5Speculation* speculation = new nsHtml5Speculation(
mFirstBuffer, mFirstBuffer->getStart(), mTokenizer->getLineNumber(),
mTokenizer->getColumnNumber(), mTreeBuilder->newSnapshot());
MOZ_ASSERT(!mFlushTimerArmed, "How did we end up arming the timer?"); if (mMode == VIEW_SOURCE_HTML) {
mTokenizer->SetViewSourceOpSink(speculation);
mTokenizer->StartViewSourceBodyContents();
} else {
MOZ_ASSERT(mMode != VIEW_SOURCE_XML);
mTreeBuilder->SetOpSink(speculation);
}
mSpeculations.AppendElement(speculation); // adopts the pointer
mSpeculating = true;
} else {
mLookingForMetaCharset = false;
mBufferingBytes = false;
mDecodingLocalFileWithoutTokenizing = false; if (mMode == VIEW_SOURCE_HTML) {
mTokenizer->StartViewSourceBodyContents();
}
}
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource, false); return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(
Span(prefix, prefixLength), aFromSegment);
}
// Decode again for (auto&& buffer : mBufferedBytes) {
DoDataAvailable(buffer);
}
if (mMode == VIEW_SOURCE_HTML) { auto r = mTokenizer->FlushViewSource(); if (r.isErr()) { return r.unwrapErr();
}
} auto r = mTreeBuilder->Flush(); if (r.isErr()) { return r.unwrapErr();
} return NS_OK;
}
mBufferingBytes = false;
mForceAutoDetection = false; // To stop feeding the detector
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource, true); if (mMode == VIEW_SOURCE_HTML) { auto r = mTokenizer->FlushViewSource(); if (r.isErr()) { return r.unwrapErr();
}
} auto r = mTreeBuilder->Flush(); if (r.isErr()) { return r.unwrapErr();
} return NS_OK;
}
class MaybeRunCollector : public Runnable { public: explicit MaybeRunCollector(nsIDocShell* aDocShell)
: Runnable("MaybeRunCollector"), mDocShell(aDocShell) {}
nsresult nsHtml5StreamParser::OnStartRequest(nsIRequest* aRequest) {
MOZ_RELEASE_ASSERT(STREAM_NOT_STARTED == mStreamState, "Got OnStartRequest when the stream had already started.");
MOZ_ASSERT(
!mExecutor->HasStarted(), "Got OnStartRequest at the wrong stage in the executor life cycle.");
MOZ_ASSERT(NS_IsMainThread(), "Wrong thread!");
// To avoid the cost of instantiating the detector when it's not needed, // let's instantiate only if we make it out of this method with the // intent to use it. auto detectorCreator = MakeScopeExit([&] { if ((mForceAutoDetection || mCharsetSource < kCharsetFromParentFrame) ||
!(mMode == LOAD_AS_DATA || mMode == VIEW_SOURCE_XML)) {
mDetector = mozilla::EncodingDetector::Create();
}
});
mRequest = aRequest;
mStreamState = STREAM_BEING_READ;
// For View Source, the parser should run with scripts "enabled" if a normal // load would have scripts enabled. bool scriptingEnabled =
mMode == LOAD_AS_DATA ? false : mExecutor->IsScriptEnabled();
mOwner->StartTokenizer(scriptingEnabled);
MOZ_ASSERT(!mDecodingLocalFileWithoutTokenizing); bool isSrcdoc = false;
nsCOMPtr<nsIChannel> channel;
nsresult rv = GetChannel(getter_AddRefs(channel)); if (NS_SUCCEEDED(rv)) {
isSrcdoc = NS_IsSrcdocChannel(channel); if (!isSrcdoc && mCharsetSource <= kCharsetFromFallback) {
nsCOMPtr<nsIURI> originalURI;
rv = channel->GetOriginalURI(getter_AddRefs(originalURI)); if (NS_SUCCEEDED(rv)) { if (originalURI->SchemeIs("resource")) {
mCharsetSource = kCharsetFromBuiltIn;
mEncoding = UTF_8_ENCODING;
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource, false);
} else {
nsCOMPtr<nsIURI> currentURI;
rv = channel->GetURI(getter_AddRefs(currentURI)); if (NS_SUCCEEDED(rv)) {
nsCOMPtr<nsIURI> innermost = NS_GetInnermostURI(currentURI); if (innermost->SchemeIs("file")) {
MOZ_ASSERT(mEncoding == UTF_8_ENCODING); if (!(mMode == LOAD_AS_DATA || mMode == VIEW_SOURCE_XML)) {
mDecodingLocalFileWithoutTokenizing = true;
}
} else {
nsAutoCString host;
innermost->GetAsciiHost(host); if (!host.IsEmpty()) { // First let's see if the host is DNS-absolute and ends with a // dot and get rid of that one. if (host.Last() == '.') {
host.SetLength(host.Length() - 1);
}
int32_t index = host.RFindChar('.'); if (index != kNotFound) { // We tolerate an IPv4 component as generic "TLD", so don't // bother checking.
ToLowerCase(
Substring(host, index + 1, host.Length() - (index + 1)),
mTLD);
}
}
}
}
}
}
}
}
mTreeBuilder->setIsSrcdocDocument(isSrcdoc);
mTreeBuilder->setScriptingEnabled(scriptingEnabled);
mTreeBuilder->SetPreventScriptExecution(
!((mMode == NORMAL) && scriptingEnabled));
mTreeBuilder->setAllowDeclarativeShadowRoots(
mExecutor->GetDocument()->AllowsDeclarativeShadowRoots());
mTokenizer->start();
mExecutor->Start();
mExecutor->StartReadingFromStage();
if (mMode == PLAIN_TEXT) {
mTreeBuilder->StartPlainText();
mTokenizer->StartPlainText();
MOZ_ASSERT(
mTemplatePushedOrHeadPopped); // Needed to force 1024-byte sniffing // Flush the ops to put them where ContinueAfterScriptsOrEncodingCommitment // can find them. auto r = mTreeBuilder->Flush(); if (r.isErr()) { return mExecutor->MarkAsBroken(r.unwrapErr());
}
} elseif (mMode == VIEW_SOURCE_PLAIN) {
nsAutoString viewSourceTitle;
CopyUTF8toUTF16(mViewSourceTitle, viewSourceTitle);
mTreeBuilder->EnsureBufferSpace(viewSourceTitle.Length());
mTreeBuilder->StartPlainTextViewSource(viewSourceTitle);
mTokenizer->StartPlainText();
MOZ_ASSERT(
mTemplatePushedOrHeadPopped); // Needed to force 1024-byte sniffing // Flush the ops to put them where ContinueAfterScriptsOrEncodingCommitment // can find them. auto r = mTreeBuilder->Flush(); if (r.isErr()) { return mExecutor->MarkAsBroken(r.unwrapErr());
}
} elseif (mMode == VIEW_SOURCE_HTML || mMode == VIEW_SOURCE_XML) { // Generate and flush the View Source document up to and including the // pre element start.
mTokenizer->StartViewSource(NS_ConvertUTF8toUTF16(mViewSourceTitle)); if (mMode == VIEW_SOURCE_XML) {
mTokenizer->StartViewSourceBodyContents();
} // Flush the ops to put them where ContinueAfterScriptsOrEncodingCommitment // can find them. auto r = mTokenizer->FlushViewSource(); if (r.isErr()) { return mExecutor->MarkAsBroken(r.unwrapErr());
}
}
/* * If you move the following line, be very careful not to cause * WillBuildModel to be called before the document has had its * script global object set.
*/
rv = mExecutor->WillBuildModel();
NS_ENSURE_SUCCESS(rv, rv);
RefPtr<nsHtml5OwningUTF16Buffer> newBuf =
nsHtml5OwningUTF16Buffer::FalliblyCreate(READ_BUFFER_SIZE); if (!newBuf) { // marks this stream parser as terminated, // which prevents entry to code paths that // would use mFirstBuffer or mLastBuffer. return mExecutor->MarkAsBroken(NS_ERROR_OUT_OF_MEMORY);
}
MOZ_ASSERT(!mFirstBuffer, "How come we have the first buffer set?");
MOZ_ASSERT(!mLastBuffer, "How come we have the last buffer set?");
mFirstBuffer = mLastBuffer = newBuf;
rv = NS_OK;
nsCOMPtr<nsIHttpChannel> httpChannel(do_QueryInterface(mRequest, &rv)); if (NS_SUCCEEDED(rv)) {
nsAutoCString method;
Unused << httpChannel->GetRequestMethod(method); // XXX does Necko have a way to renavigate POST, etc. without hitting // the network? if (!method.EqualsLiteral("GET")) { // This is the old Gecko behavior but the HTML5 spec disagrees. // Don't reparse on POST.
mReparseForbidden = true;
}
}
// Attempt to retarget delivery of data (via OnDataAvailable) to the parser // thread, rather than through the main thread.
nsCOMPtr<nsIThreadRetargetableRequest> threadRetargetableRequest =
do_QueryInterface(mRequest, &rv); if (threadRetargetableRequest) {
rv = threadRetargetableRequest->RetargetDeliveryTo(mEventTarget); if (NS_SUCCEEDED(rv)) { // Parser thread should be now ready to get data from necko and parse it // and main thread might have a chance to process a collector slice. // We need to do this asynchronously so that necko may continue processing // the request.
nsCOMPtr<nsIRunnable> runnable = new MaybeRunCollector(mExecutor->GetDocument()->GetDocShell());
mozilla::SchedulerGroup::Dispatch(runnable.forget());
}
}
if (NS_FAILED(rv)) {
NS_WARNING("Failed to retarget HTML data delivery to the parser thread.");
}
if (mCharsetSource == kCharsetFromParentFrame) { // Remember this for error reporting.
mInitialEncodingWasFromParentFrame = true;
MOZ_ASSERT(!mDecodingLocalFileWithoutTokenizing);
}
if (mForceAutoDetection || mCharsetSource < kCharsetFromChannel) {
mBufferingBytes = true; if (mMode != VIEW_SOURCE_XML) { // We need to set mLookingForMetaCharset to true here in case the first // buffer to arrive is larger than 1024. We need the code that splits // the buffers at 1024 bytes to work even in that case.
mLookingForMetaCharset = true;
}
}
if (mCharsetSource < kCharsetFromUtf8OnlyMime) { // we aren't ready to commit to an encoding yet // leave converter uninstantiated for now return NS_OK;
}
MOZ_ASSERT(mEncoding == UTF_8_ENCODING, "How come UTF-8-only MIME type didn't set encoding to UTF-8?");
// We are loading JSON/WebVTT/etc. into a browsing context. // There's no need to remove the BOM manually here, because // the UTF-8 decoder removes it.
mReparseForbidden = true;
mForceAutoDetection = false;
// Instantiate the converter here to avoid BOM sniffing.
mDecodingLocalFileWithoutTokenizing = false;
mUnicodeDecoder = mEncoding->NewDecoderWithBOMRemoval(); return NS_OK;
}
void nsHtml5StreamParser::DoStopRequest() {
MOZ_ASSERT(IsParserThread(), "Wrong thread!");
MOZ_RELEASE_ASSERT(STREAM_BEING_READ == mStreamState, "Stream ended without being open.");
mTokenizerMutex.AssertCurrentThreadOwns();
auto guard = MakeScopeExit([&] { OnContentComplete(); });
MOZ_ASSERT(mUnicodeDecoder, "Should have a decoder after finalizing sniffing.");
// mLastBuffer should always point to a buffer of the size // READ_BUFFER_SIZE. if (!mLastBuffer) {
NS_WARNING("mLastBuffer should not be null!");
MarkAsBroken(NS_ERROR_NULL_POINTER); return;
}
Span<uint8_t> src; // empty span for (;;) { auto dst = mLastBuffer->TailAsSpan(READ_BUFFER_SIZE);
uint32_t result;
size_t read;
size_t written; bool hadErrors; // Do not use structured binding lest deal with [-Werror=unused-variable]
std::tie(result, read, written, hadErrors) =
mUnicodeDecoder->DecodeToUTF16(src, dst, true); if (!(mLookingForMetaCharset || mDecodingLocalFileWithoutTokenizing)) {
OnNewContent(dst.To(written));
} if (hadErrors) {
mHasHadErrors = true;
}
MOZ_ASSERT(read == 0, "How come an empty span was read form?");
mLastBuffer->AdvanceEnd(written); if (result == kOutputFull) {
RefPtr<nsHtml5OwningUTF16Buffer> newBuf =
nsHtml5OwningUTF16Buffer::FalliblyCreate(READ_BUFFER_SIZE); if (!newBuf) {
MarkAsBroken(NS_ERROR_OUT_OF_MEMORY); return;
}
mLastBuffer = (mLastBuffer->next = std::move(newBuf));
} else { if (!mLookingForMetaCharset && mDecodingLocalFileWithoutTokenizing) {
MOZ_ASSERT(mNumBytesBuffered < LOCAL_FILE_UTF_8_BUFFER_SIZE);
MOZ_ASSERT(!mStartedFeedingDetector); for (auto&& buffer : mBufferedBytes) {
FeedDetector(buffer);
}
MOZ_ASSERT(!mChardetEof);
DetectorEof(); auto [encoding, source] = GuessEncoding(true);
mCharsetSource = source; if (encoding != mEncoding) {
mEncoding = encoding;
nsresult rv = ReDecodeLocalFile(); if (NS_FAILED(rv)) {
MarkAsBroken(rv); return;
}
DoStopRequest(); return;
}
MOZ_ASSERT(mEncoding == UTF_8_ENCODING);
nsresult rv = CommitLocalFileToEncoding(); if (NS_FAILED(rv)) {
MarkAsBroken(rv); return;
}
} break;
}
}
mStreamState = STREAM_ENDED;
if (IsTerminatedOrInterrupted()) { return;
}
ParseAvailableData();
}
class nsHtml5RequestStopper : public Runnable { private:
nsHtml5StreamParserPtr mStreamParser;
nsresult nsHtml5StreamParser::OnStopRequest(
nsIRequest* aRequest, nsresult status, const mozilla::ReentrantMonitorAutoEnter& aProofOfLock) {
MOZ_ASSERT_IF(aRequest, mRequest == aRequest); if (mOnStopCalled) { // OnStopRequest already executed (probably OMT).
MOZ_ASSERT(NS_IsMainThread(), "Expected to run on main thread"); if (mOnDataFinishedTime) {
mOnStopRequestTime = TimeStamp::Now();
}
} else {
mOnStopCalled = true;
if (MOZ_UNLIKELY(NS_IsMainThread())) {
MOZ_ASSERT(mOnDataFinishedTime.IsNull(), "stale mOnDataFinishedTime");
nsCOMPtr<nsIRunnable> stopper = new nsHtml5RequestStopper(this); if (NS_FAILED(
mEventTarget->Dispatch(stopper, nsIThread::DISPATCH_NORMAL))) {
NS_WARNING("Dispatching StopRequest event failed.");
}
} else { if (StaticPrefs::network_send_OnDataFinished_html5parser()) {
MOZ_ASSERT(IsParserThread(), "Wrong thread!");
mOnDataFinishedTime = TimeStamp::Now();
mozilla::MutexAutoLock autoLock(mTokenizerMutex);
DoStopRequest();
PostLoadFlusher();
} else { // Let the MainThread event handle this, even though it will just // send it back to this thread, so we can accurately judge the impact // of this change. This should eventually be removed
mOnStopCalled = false; // don't record any telemetry for this return NS_OK;
}
}
} if (!mOnStopRequestTime.IsNull() && !mOnDataFinishedTime.IsNull()) {
TimeDuration delta = (mOnStopRequestTime - mOnDataFinishedTime);
MOZ_ASSERT((delta.ToMilliseconds() >= 0), "OnDataFinished after OnStopRequest");
glean::networking::http_content_html5parser_ondatafinished_to_onstop_delay
.AccumulateRawDuration(delta);
} return NS_OK;
}
void nsHtml5StreamParser::DoDataAvailableBuffer(
mozilla::Buffer<uint8_t>&& aBuffer) { if (MOZ_UNLIKELY(!mBufferingBytes)) {
DoDataAvailable(aBuffer); return;
} if (MOZ_UNLIKELY(mLookingForXmlDeclarationForXmlViewSource)) { const uint8_t* elements = aBuffer.Elements();
size_t length = aBuffer.Length(); const uint8_t* lt = (const uint8_t*)memchr(elements, '>', length); if (!lt) {
mBufferedBytes.AppendElement(std::move(aBuffer)); return;
}
// We found an '>'. Now there either is or isn't an XML decl.
length = (lt - elements) + 1;
Vector<uint8_t> contiguous; for (auto&& buffer : mBufferedBytes) { if (!contiguous.append(buffer.Elements(), buffer.Length())) {
MarkAsBroken(NS_ERROR_OUT_OF_MEMORY); return;
}
} if (!contiguous.append(elements, length)) {
MarkAsBroken(NS_ERROR_OUT_OF_MEMORY); return;
}
for (auto&& buffer : mBufferedBytes) {
DoDataAvailable(buffer);
}
DoDataAvailable(aBuffer);
mBufferedBytes.Clear(); return;
}
CheckedInt<size_t> bufferedPlusLength(aBuffer.Length());
bufferedPlusLength += mNumBytesBuffered; if (!bufferedPlusLength.isValid()) {
MarkAsBroken(NS_ERROR_OUT_OF_MEMORY); return;
} // Ensure that WriteStreamBytes() sees buffers ending // exactly at the two special boundaries. bool metaBoundaryWithinBuffer =
mLookingForMetaCharset &&
mNumBytesBuffered < UNCONDITIONAL_META_SCAN_BOUNDARY &&
bufferedPlusLength.value() > UNCONDITIONAL_META_SCAN_BOUNDARY; bool localFileLimitWithinBuffer =
mDecodingLocalFileWithoutTokenizing &&
mNumBytesBuffered < LOCAL_FILE_UTF_8_BUFFER_SIZE &&
bufferedPlusLength.value() > LOCAL_FILE_UTF_8_BUFFER_SIZE; if (!metaBoundaryWithinBuffer && !localFileLimitWithinBuffer) { // Truncation OK, because we just checked the range.
mNumBytesBuffered = bufferedPlusLength.value();
mBufferedBytes.AppendElement(std::move(aBuffer));
DoDataAvailable(mBufferedBytes.LastElement());
} else {
MOZ_RELEASE_ASSERT(
!(metaBoundaryWithinBuffer && localFileLimitWithinBuffer), "How can Necko give us a buffer this large?");
size_t boundary = metaBoundaryWithinBuffer
? UNCONDITIONAL_META_SCAN_BOUNDARY
: LOCAL_FILE_UTF_8_BUFFER_SIZE; // Truncation OK, because the constant is small enough.
size_t overBoundary = bufferedPlusLength.value() - boundary;
MOZ_RELEASE_ASSERT(overBoundary < aBuffer.Length());
size_t untilBoundary = aBuffer.Length() - overBoundary; auto span = aBuffer.AsSpan(); auto head = span.To(untilBoundary); auto tail = span.From(untilBoundary);
MOZ_RELEASE_ASSERT(mNumBytesBuffered + untilBoundary == boundary); // The following copies may end up being useless, but optimizing // them away would add complexity.
Maybe<Buffer<uint8_t>> maybeHead = Buffer<uint8_t>::CopyFrom(head); if (maybeHead.isNothing()) {
MarkAsBroken(NS_ERROR_OUT_OF_MEMORY); return;
}
mNumBytesBuffered = boundary;
mBufferedBytes.AppendElement(std::move(*maybeHead));
DoDataAvailable(mBufferedBytes.LastElement()); // Re-decode may have happened here.
Maybe<Buffer<uint8_t>> maybeTail = Buffer<uint8_t>::CopyFrom(tail); if (maybeTail.isNothing()) {
MarkAsBroken(NS_ERROR_OUT_OF_MEMORY); return;
}
mNumBytesBuffered += tail.Length();
mBufferedBytes.AppendElement(std::move(*maybeTail));
DoDataAvailable(mBufferedBytes.LastElement());
} // Do this clean-up here to avoid use-after-free when // DoDataAvailable is passed a span pointing into an // element of mBufferedBytes. if (!mBufferingBytes) {
mBufferedBytes.Clear();
}
}
void nsHtml5StreamParser::DoDataAvailable(Span<const uint8_t> aBuffer) {
MOZ_ASSERT(IsParserThread(), "Wrong thread!");
MOZ_RELEASE_ASSERT(STREAM_BEING_READ == mStreamState, "DoDataAvailable called when stream not open.");
mTokenizerMutex.AssertCurrentThreadOwns();
if (IsTerminated()) { return;
}
nsresult rv; if (HasDecoder()) { if ((mForceAutoDetection || mCharsetSource < kCharsetFromParentFrame) &&
!mBufferingBytes && !mReparseForbidden &&
!(mMode == LOAD_AS_DATA || mMode == VIEW_SOURCE_XML)) {
MOZ_ASSERT(!mDecodingLocalFileWithoutTokenizing, "How is mBufferingBytes false if " "mDecodingLocalFileWithoutTokenizing is true?");
FeedDetector(aBuffer);
}
rv = WriteStreamBytes(aBuffer);
} else {
rv = SniffStreamBytes(aBuffer, false);
} if (NS_FAILED(rv)) {
MarkAsBroken(rv); return;
}
if (IsTerminatedOrInterrupted()) { return;
}
if (!mLookingForMetaCharset && mDecodingLocalFileWithoutTokenizing) { return;
}
if (NS_FAILED(rv = mTreeBuilder->IsBroken())) { return rv;
}
// Since we're getting OnDataAvailable directly on the parser thread, // there is no nsHtml5DataAvailable that would call PostLoadFlusher. // Hence, we need to call PostLoadFlusher() before this method returns. // Braces for RAII clarity relative to the mutex despite not being // strictly necessary.
{ auto speculationFlusher = MakeScopeExit([&] { PostLoadFlusher(); });
if (newEncoding == X_USER_DEFINED_ENCODING) { // WebKit/Blink hack for Indian and Armenian legacy sites
mTreeBuilder->MaybeComplainAboutCharset("EncMetaUserDefined", true,
mTokenizer->getLineNumber());
newEncoding = WINDOWS_1252_ENCODING;
}
if (newEncoding == REPLACEMENT_ENCODING) { // No line number, because the replacement encoding doesn't allow // showing the lines.
mTreeBuilder->MaybeComplainAboutCharset("EncMetaReplacement", true, 0);
}
nsString newEncoding; // Not Auto, because using it to hold nsStringBuffer*
aEncoding.ToString(newEncoding); auto encoding = PreferredForInternalEncodingDecl(newEncoding); if (!encoding) { returnfalse;
}
mSeenEligibleMetaCharset = true;
if (!mLookingForMetaCharset) { if (mInitialEncodingWasFromParentFrame) {
mTreeBuilder->MaybeComplainAboutCharset("EncMetaTooLateFrame", true,
mTokenizer->getLineNumber());
} else {
mTreeBuilder->MaybeComplainAboutCharset("EncMetaTooLate", true,
mTokenizer->getLineNumber());
} returnfalse;
} if (mTemplatePushedOrHeadPopped) {
mTreeBuilder->MaybeComplainAboutCharset("EncMetaAfterHeadInKilobyte", false,
mTokenizer->getLineNumber());
}
mTreeBuilder->FlushLoads(); // Dispatch this runnable unconditionally, because the loads // that need flushing may have been flushed earlier even if the // flush right above here did nothing. (Is this still true?)
nsCOMPtr<nsIRunnable> runnable(mLoadFlusher); if (NS_FAILED(
DispatchToMain(CreateRenderBlockingRunnable(runnable.forget())))) {
NS_WARNING("failed to dispatch load flush event");
}
if ((mMode == VIEW_SOURCE_HTML || mMode == VIEW_SOURCE_XML) &&
mTokenizer->ShouldFlushViewSource()) { auto r = mTreeBuilder->Flush(); // delete useless ops
MOZ_ASSERT(r.isOk(), "Should have null sink with View Source");
r = mTokenizer->FlushViewSource(); if (r.isErr()) {
MarkAsBroken(r.unwrapErr()); return;
} if (r.unwrap()) {
nsCOMPtr<nsIRunnable> runnable(mExecutorFlusher); if (NS_FAILED(DispatchToMain(runnable.forget()))) {
NS_WARNING("failed to dispatch executor flush event");
}
}
}
}
void nsHtml5StreamParser::FlushTreeOpsAndDisarmTimer() {
MOZ_ASSERT(IsParserThread(), "Wrong thread!"); if (mFlushTimerArmed) { // avoid calling Cancel if the flush timer isn't armed to avoid acquiring // a mutex
{
mozilla::MutexAutoLock flushTimerLock(mFlushTimerMutex);
mFlushTimer->Cancel();
}
mFlushTimerArmed = false;
}
--> --------------------
--> maximum size reached
--> --------------------
¤ Dauer der Verarbeitung: 0.43 Sekunden
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung ist noch experimentell.