/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* vim: set ts=8 sts=2 et sw=2 tw=80: */ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
/* * nsIContentSerializer implementation that can be used with an * nsIDocumentEncoder to convert a DOM into plaintext in a nice way * (eg for copy/paste as plaintext).
*/
staticconst int32_t kTabSize = 4; staticconst int32_t kIndentSizeHeaders =
2; /* Indention of h1, if mHeaderStrategy = kIndentIncreasedWithHeaderLevel or = kNumberHeadingsAndIndentSlightly. Indention of
other headers is derived from that. */ staticconst int32_t kIndentIncrementHeaders =
2; /* If mHeaderStrategy = kIndentIncreasedWithHeaderLevel, indent h(x+1) this many
columns more than h(x) */ staticconst int32_t kIndentSizeList = kTabSize; // Indention of non-first lines of ul and ol staticconst int32_t kIndentSizeDD = kTabSize; // Indention of <dd> staticconst char16_t kNBSP = 160; staticconst char16_t kSPACE = ' ';
nsresult NS_NewPlainTextSerializer(nsIContentSerializer** aSerializer) {
RefPtr<nsPlainTextSerializer> it = new nsPlainTextSerializer();
it.forget(aSerializer); return NS_OK;
}
// @param aFlags As defined in nsIDocumentEncoder.idl. staticvoid DetermineLineBreak(const int32_t aFlags, nsAString& aLineBreak) { // Set the line break character: if ((aFlags & nsIDocumentEncoder::OutputCRLineBreak) &&
(aFlags & nsIDocumentEncoder::OutputLFLineBreak)) { // Windows
aLineBreak.AssignLiteral(u"\r\n");
} elseif (aFlags & nsIDocumentEncoder::OutputCRLineBreak) { // Mac
aLineBreak.AssignLiteral(u"\r");
} elseif (aFlags & nsIDocumentEncoder::OutputLFLineBreak) { // Unix/DOM
aLineBreak.AssignLiteral(u"\n");
} else { // Platform/default
aLineBreak.AssignLiteral(NS_ULINEBREAK);
}
}
void nsPlainTextSerializer::CurrentLine::MaybeReplaceNbspsInContent( const int32_t aFlags) { if (!(aFlags & nsIDocumentEncoder::OutputPersistNBSP)) { // First, replace all nbsp characters with spaces, // which the unicode encoder won't do for us.
mContent.ReplaceChar(kNBSP, kSPACE);
}
}
if (aUseLineBreaker) { // We advance one line break point at a time from the beginning of the // mContent until we find a width less than or equal to wrap column.
uint32_t width = 0;
intl::LineBreakIteratorUtf16 lineBreakIter(mContent); while (Maybe<uint32_t> nextGoodSpace = lineBreakIter.Next()) { // Trim space at the tail. UAX#14 doesn't have break opportunity for // ASCII space at the tail. const Maybe<uint32_t> originalNextGoodSpace = nextGoodSpace; while (*nextGoodSpace > 0 &&
mContent.CharAt(*nextGoodSpace - 1) == 0x20) {
nextGoodSpace = Some(*nextGoodSpace - 1);
} if (*nextGoodSpace == 0) { // Restore the original nextGoodSpace.
nextGoodSpace = originalNextGoodSpace;
}
width += GetUnicharStringWidth(Span<const char16_t>(
mContent.get() + goodSpace, *nextGoodSpace - goodSpace)); if (prefixwidth + width > aWrapColumn) { // The next break point makes the width exceeding the wrap column, so // goodSpace is what we want. break;
}
goodSpace = AssertedCast<int32_t>(*nextGoodSpace);
}
return goodSpace;
}
// In this case we don't want strings, especially CJK-ones, to be split. See // bug 333064 for more information. We break only at ASCII spaces. if (aWrapColumn >= prefixwidth) { // Search backward from the adjusted wrap column or from the text end.
goodSpace =
std::min<int32_t>(aWrapColumn - prefixwidth, mContent.Length() - 1); while (goodSpace >= 0) { if (nsCRT::IsAsciiSpace(mContent.CharAt(goodSpace))) { return goodSpace;
}
goodSpace--;
}
}
nsPlainTextSerializer::nsPlainTextSerializer()
: mFloatingLines(-1),
mLineBreakDue(false),
kSpace(u" "_ns) // Init of "constant"
{
mHeadLevel = 0;
mHasWrittenCiteBlockquote = false;
mSpanLevel = 0; for (int32_t i = 0; i <= 6; i++) {
mHeaderCounter[i] = 0;
}
// Flow
mEmptyLines = 1; // The start of the document is an "empty line" in itself,
mInWhitespace = false;
mPreFormattedMail = false;
mPreformattedBlockBoundary = false;
// initialize the tag stack to zero: // The stack only ever contains pointers to static atoms, so they don't // need refcounting.
mTagStack = newconst nsAtom*[TagStackSize];
mTagStackIndex = 0;
mIgnoreAboveIndex = (uint32_t)kNotFound;
if (mFlags & nsIDocumentEncoder::OutputFormatted) { // Get some prefs that controls how we do formatted output
mStructs = Preferences::GetBool(PREF_STRUCTS, mStructs);
// XXX We should let the caller decide whether to do this or not
mFlags &= ~nsIDocumentEncoder::OutputNoFramesContent;
mWrapColumn = aWrapColumn;
}
NS_IMETHODIMP
nsPlainTextSerializer::Init(const uint32_t aFlags, uint32_t aWrapColumn, const Encoding* aEncoding, bool aIsCopying, bool aIsWholeDocument, bool* aNeedsPreformatScanning, nsAString& aOutput) { #ifdef DEBUG // Check if the major control flags are set correctly. if (aFlags & nsIDocumentEncoder::OutputFormatFlowed) { // One of OutputFormatted or OutputWrap must be set, but not both.
NS_ASSERTION((aFlags & nsIDocumentEncoder::OutputFormatted) !=
(aFlags & nsIDocumentEncoder::OutputWrap), "If you want format=flowed, you must combine it " "with either nsIDocumentEncoder::OutputFormatted " "or nsIDocumentEncoder::OutputWrap");
}
if (aFlags & nsIDocumentEncoder::OutputFormatted) {
NS_ASSERTION(
!(aFlags & nsIDocumentEncoder::OutputPreformatted), "Can't do formatted and preformatted output at the same time!");
} #endif
MOZ_ASSERT(!(aFlags & nsIDocumentEncoder::OutputFormatDelSp) ||
(aFlags & nsIDocumentEncoder::OutputFormatFlowed));
// Return true if aElement has 'display:none' or if we just don't know. staticbool IsDisplayNone(Element* aElement) {
RefPtr<const ComputedStyle> computedStyle =
nsComputedDOMStyle::GetComputedStyleNoFlush(aElement); return !computedStyle ||
computedStyle->StyleDisplay()->mDisplay == StyleDisplay::None;
}
int32_t fragLength = frag->GetLength();
int32_t endoffset =
(aEndOffset == -1) ? fragLength : std::min(aEndOffset, fragLength);
NS_ASSERTION(aStartOffset <= endoffset, "A start offset is beyond the end of the text fragment!");
nsAutoString textstr; if (frag->Is2b()) {
textstr.Assign(frag->Get2b() + aStartOffset, length);
} else { // AssignASCII is for 7-bit character only, so don't use it constchar* data = frag->Get1b();
CopyASCIItoUTF16(Substring(data + aStartOffset, data + endoffset), textstr);
}
// Mask the text if the text node is in a password field. if (content->HasFlag(NS_MAYBE_MASKED)) {
TextEditor::MaskString(textstr, *content->AsText(), 0, aStartOffset);
}
// We have to split the string across newlines // to match parser behavior
int32_t start = 0;
int32_t offset = textstr.FindCharInSet(u"\n\r"); while (offset != kNotFound) { if (offset > start) { // Pass in the line
DoAddText(false, Substring(textstr, start, offset - start));
}
// Consume the last bit of the string if there's any left if (start < length) { if (start) {
DoAddText(false, Substring(textstr, start, length - start));
} else {
DoAddText(false, textstr);
}
}
NS_IMETHODIMP
nsPlainTextSerializer::ForgetElementForPreformat(Element* aElement) {
MOZ_RELEASE_ASSERT(!mPreformatStack.empty(), "Tried to pop without previous push.");
mPreformatStack.pop(); return NS_OK;
}
nsresult nsPlainTextSerializer::DoOpenContainer(const nsAtom* aTag) { if (IsIgnorableRubyAnnotation(aTag)) { // Ignorable ruby annotation shouldn't be replaced by a placeholder // character, neither any of its descendants.
mIgnoredChildNodeLevel++; return NS_OK;
} if (IsIgnorableScriptOrStyle(mElement)) {
mIgnoredChildNodeLevel++; return NS_OK;
}
if (mSettings.HasFlag(nsIDocumentEncoder::OutputForPlainTextClipboardCopy)) { if (mPreformattedBlockBoundary && DoOutput()) { // Should always end a line, but get no more whitespace if (mFloatingLines < 0) mFloatingLines = 0;
mLineBreakDue = true;
}
mPreformattedBlockBoundary = false;
}
if (mSettings.HasFlag(nsIDocumentEncoder::OutputRaw)) { // Raw means raw. Don't even think about doing anything fancy // here like indenting, adding line breaks or any other // characters such as list item bullets, quote characters // around <q>, etc.
return NS_OK;
}
if (mTagStackIndex < TagStackSize) {
mTagStack[mTagStackIndex++] = aTag;
}
if (mIgnoreAboveIndex != (uint32_t)kNotFound) { return NS_OK;
}
// Reset this so that <blockquote type=cite> doesn't affect the whitespace // above random <pre>s below it.
mHasWrittenCiteBlockquote =
mHasWrittenCiteBlockquote && aTag == nsGkAtoms::pre;
bool isInCiteBlockquote = false;
// XXX special-case <blockquote type=cite> so that we don't add additional // newlines before the text. if (aTag == nsGkAtoms::blockquote) {
nsAutoString value;
nsresult rv = GetAttributeValue(nsGkAtoms::type, value);
isInCiteBlockquote = NS_SUCCEEDED(rv) && value.EqualsIgnoreCase("cite");
}
if (mLineBreakDue && !isInCiteBlockquote) EnsureVerticalSpace(mFloatingLines);
// Check if this tag's content that should not be output if ((aTag == nsGkAtoms::noscript &&
!mSettings.HasFlag(nsIDocumentEncoder::OutputNoScriptContent)) ||
((aTag == nsGkAtoms::iframe || aTag == nsGkAtoms::noframes) &&
!mSettings.HasFlag(nsIDocumentEncoder::OutputNoFramesContent))) { // Ignore everything that follows the current tag in // question until a matching end tag is encountered.
mIgnoreAboveIndex = mTagStackIndex - 1; return NS_OK;
}
if (aTag == nsGkAtoms::body) { // Try to figure out here whether we have a // preformatted style attribute set by Thunderbird. // // Trigger on the presence of a "pre-wrap" in the // style attribute. That's a very simplistic way to do // it, but better than nothing.
nsAutoString style;
int32_t whitespace; if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::style, style)) &&
(kNotFound != (whitespace = style.Find(u"white-space:")))) { if (kNotFound != style.LowerCaseFindASCII("pre-wrap", whitespace)) { #ifdef DEBUG_preformatted
printf("Set mPreFormattedMail based on style pre-wrap\n"); #endif
mPreFormattedMail = true;
} elseif (kNotFound != style.LowerCaseFindASCII("pre", whitespace)) { #ifdef DEBUG_preformatted
printf("Set mPreFormattedMail based on style pre\n"); #endif
mPreFormattedMail = true;
}
} else { /* See comment at end of function. */
mInWhitespace = true;
mPreFormattedMail = false;
}
return NS_OK;
}
// Keep this in sync with DoCloseContainer! if (!DoOutput()) { return NS_OK;
}
if (aTag == nsGkAtoms::p)
EnsureVerticalSpace(1); elseif (aTag == nsGkAtoms::pre) { if (GetLastBool(mIsInCiteBlockquote))
EnsureVerticalSpace(0); elseif (mHasWrittenCiteBlockquote) {
EnsureVerticalSpace(0);
mHasWrittenCiteBlockquote = false;
} else
EnsureVerticalSpace(1);
} elseif (aTag == nsGkAtoms::tr) {
PushBool(mHasWrittenCellsForRow, false);
} elseif (aTag == nsGkAtoms::td || aTag == nsGkAtoms::th) { // We must make sure that the content of two table cells get a // space between them.
// To make the separation between cells most obvious and // importable, we use a TAB. if (mHasWrittenCellsForRow.IsEmpty()) { // We don't always see a <tr> (nor a <table>) before the <td> if we're // copying part of a table
PushBool(mHasWrittenCellsForRow, true); // will never be popped
} elseif (GetLastBool(mHasWrittenCellsForRow)) { // Bypass |Write| so that the TAB isn't compressed away.
AddToLine(u"\t", 1);
mInWhitespace = true;
} else {
SetLastBool(mHasWrittenCellsForRow, true);
}
} elseif (aTag == nsGkAtoms::ul) { // Indent here to support nested lists, which aren't included in li :-(
EnsureVerticalSpace(IsInOlOrUl() ? 0 : 1); // Must end the current line before we change indention
mCurrentLine.mIndentation.mLength += kIndentSizeList;
mULCount++;
} elseif (aTag == nsGkAtoms::ol) {
EnsureVerticalSpace(IsInOlOrUl() ? 0 : 1); if (mSettings.HasFlag(nsIDocumentEncoder::OutputFormatted)) { // Must end the current line before we change indention
nsAutoString startAttr;
int32_t startVal = 1; if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::start, startAttr))) {
nsresult rv = NS_OK;
startVal = startAttr.ToInteger(&rv); if (NS_FAILED(rv)) {
startVal = 1;
}
}
mOLStack.AppendElement(startVal);
} else {
mOLStack.AppendElement(kOlStackDummyValue);
}
mCurrentLine.mIndentation.mLength += kIndentSizeList; // see ul
} elseif (aTag == nsGkAtoms::li &&
mSettings.HasFlag(nsIDocumentEncoder::OutputFormatted)) { if (mTagStackIndex > 1 && IsInOL()) { if (!mOLStack.IsEmpty()) {
nsAutoString valueAttr; if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::value, valueAttr))) {
nsresult rv = NS_OK;
int32_t valueAttrVal = valueAttr.ToInteger(&rv); if (NS_SUCCEEDED(rv)) {
mOLStack.LastElement() = valueAttrVal;
}
} // This is what nsBulletFrame does for OLs:
mCurrentLine.mIndentation.mHeader.AppendInt(mOLStack.LastElement(), 10);
mOLStack.LastElement()++;
} else {
mCurrentLine.mIndentation.mHeader.Append(char16_t('#'));
}
// Else make sure we'll separate block level tags, // even if we're about to leave, before doing any other formatting. elseif (IsCssBlockLevelElement(mElement)) {
EnsureVerticalSpace(0);
}
if (mSettings.HasFlag(nsIDocumentEncoder::OutputFormatted)) {
OpenContainerForOutputFormatted(aTag);
} return NS_OK;
}
/* Container elements are always block elements, so we shouldn't output any whitespace immediately after the container tag even if there's extra whitespace there because the HTML is pretty-printed or something. To ensure that happens, tell the serializer we're
already in whitespace so it won't output more. */
mInWhitespace = true;
}
nsresult nsPlainTextSerializer::DoCloseContainer(const nsAtom* aTag) { if (IsIgnorableRubyAnnotation(aTag)) {
mIgnoredChildNodeLevel--; return NS_OK;
} if (IsIgnorableScriptOrStyle(mElement)) {
mIgnoredChildNodeLevel--; return NS_OK;
}
if (mSettings.HasFlag(nsIDocumentEncoder::OutputForPlainTextClipboardCopy)) { if (DoOutput() && IsElementPreformatted() &&
IsCssBlockLevelElement(mElement)) { // If we're closing a preformatted block element, output a line break // when we find a new container.
mPreformattedBlockBoundary = true;
}
}
if (mSettings.HasFlag(nsIDocumentEncoder::OutputRaw)) { // Raw means raw. Don't even think about doing anything fancy // here like indenting, adding line breaks or any other // characters such as list item bullets, quote characters // around <q>, etc.
return NS_OK;
}
if (mTagStackIndex > 0) {
--mTagStackIndex;
}
if (mTagStackIndex >= mIgnoreAboveIndex) { if (mTagStackIndex == mIgnoreAboveIndex) { // We're dealing with the close tag whose matching // open tag had set the mIgnoreAboveIndex value. // Reset mIgnoreAboveIndex before discarding this tag.
mIgnoreAboveIndex = (uint32_t)kNotFound;
} return NS_OK;
}
MOZ_ASSERT(mOutputManager);
// End current line if we're ending a block level tag if ((aTag == nsGkAtoms::body) || (aTag == nsGkAtoms::html)) { // We want the output to end with a new line, // but in preformatted areas like text fields, // we can't emit newlines that weren't there. // So add the newline only in the case of formatted output. if (mSettings.HasFlag(nsIDocumentEncoder::OutputFormatted)) {
EnsureVerticalSpace(0);
} else {
mOutputManager->Flush(mCurrentLine);
} // We won't want to do anything with these in formatted mode either, // so just return now: return NS_OK;
}
// Keep this in sync with DoOpenContainer! if (!DoOutput()) { return NS_OK;
}
if (aTag == nsGkAtoms::tr) {
PopBool(mHasWrittenCellsForRow); // Should always end a line, but get no more whitespace if (mFloatingLines < 0) mFloatingLines = 0;
mLineBreakDue = true;
} elseif (((aTag == nsGkAtoms::li) || (aTag == nsGkAtoms::dt)) &&
mSettings.HasFlag(nsIDocumentEncoder::OutputFormatted)) { // Items that should always end a line, but get no more whitespace if (mFloatingLines < 0) mFloatingLines = 0;
mLineBreakDue = true;
} elseif (aTag == nsGkAtoms::pre) {
mFloatingLines = GetLastBool(mIsInCiteBlockquote) ? 0 : 1;
mLineBreakDue = true;
} elseif (aTag == nsGkAtoms::ul) {
mOutputManager->Flush(mCurrentLine);
mCurrentLine.mIndentation.mLength -= kIndentSizeList;
--mULCount; if (!IsInOlOrUl()) {
mFloatingLines = 1;
mLineBreakDue = true;
}
} elseif (aTag == nsGkAtoms::ol) {
mOutputManager->Flush(mCurrentLine); // Doing this after decreasing // OLStackIndex would be wrong.
mCurrentLine.mIndentation.mLength -= kIndentSizeList;
MOZ_ASSERT(!mOLStack.IsEmpty(), "Wrong OLStack level!");
mOLStack.RemoveLastElement(); if (!IsInOlOrUl()) {
mFloatingLines = 1;
mLineBreakDue = true;
}
} elseif (aTag == nsGkAtoms::dl) {
mFloatingLines = 1;
mLineBreakDue = true;
} elseif (aTag == nsGkAtoms::dd) {
mOutputManager->Flush(mCurrentLine);
mCurrentLine.mIndentation.mLength -= kIndentSizeDD;
} elseif (aTag == nsGkAtoms::span) {
NS_ASSERTION(mSpanLevel, "Span level will be negative!");
--mSpanLevel;
} elseif (aTag == nsGkAtoms::div) { if (mFloatingLines < 0) mFloatingLines = 0;
mLineBreakDue = true;
} elseif (aTag == nsGkAtoms::blockquote) {
mOutputManager->Flush(mCurrentLine); // Is this needed?
// Pop bool isInCiteBlockquote = PopBool(mIsInCiteBlockquote);
if (isInCiteBlockquote) {
NS_ASSERTION(mCurrentLine.mCiteQuoteLevel, "CiteQuote level will be negative!");
mCurrentLine.mCiteQuoteLevel--;
mFloatingLines = 0;
mHasWrittenCiteBlockquote = true;
} else {
mCurrentLine.mIndentation.mLength -= kTabSize;
mFloatingLines = 1;
}
mLineBreakDue = true;
} elseif (aTag == nsGkAtoms::q) {
Write(u"\""_ns);
} elseif (IsCssBlockLevelElement(mElement)) { // All other blocks get 1 vertical space after them // in formatted mode, otherwise 0. // This is hard. Sometimes 0 is a better number, but // how to know? if (mSettings.HasFlag(nsIDocumentEncoder::OutputFormatted)) {
EnsureVerticalSpace(1);
} else { if (mFloatingLines < 0) mFloatingLines = 0;
mLineBreakDue = true;
}
}
if (mSettings.HasFlag(nsIDocumentEncoder::OutputFormatted)) {
CloseContainerForOutputFormatted(aTag);
}
void nsPlainTextSerializer::DoAddText(bool aIsLineBreak, const nsAString& aText) { // If we don't want any output, just return if (!DoOutput()) { return;
}
if (!aIsLineBreak) { // Make sure to reset this, since it's no longer true.
mHasWrittenCiteBlockquote = false;
}
if (mLineBreakDue) EnsureVerticalSpace(mFloatingLines);
if (MustSuppressLeaf()) { return;
}
if (aIsLineBreak) { // The only times we want to pass along whitespace from the original // html source are if we're forced into preformatted mode via flags, // or if we're prettyprinting and we're inside a <pre>. // Otherwise, either we're collapsing to minimal text, or we're // prettyprinting to mimic the html format, and in neither case // does the formatting of the html source help us. if (mSettings.HasFlag(nsIDocumentEncoder::OutputPreformatted) ||
(mPreFormattedMail && !mSettings.GetWrapColumn()) ||
IsElementPreformatted()) {
EnsureVerticalSpace(mEmptyLines + 1);
} elseif (!mInWhitespace) {
Write(kSpace);
mInWhitespace = true;
} return;
}
if (mLineBreakDue) EnsureVerticalSpace(mFloatingLines);
if (MustSuppressLeaf()) { return NS_OK;
}
if (aTag == nsGkAtoms::br) { // Another egregious editor workaround, see bug 38194: // ignore the bogus br tags that the editor sticks here and there. // FYI: `brElement` may be `nullptr` if the element is <br> element // of non-HTML element. // XXX Do we need to call `EnsureVerticalSpace()` when the <br> element // is not an HTML element?
HTMLBRElement* brElement = HTMLBRElement::FromNodeOrNull(mElement); if (!brElement || !brElement->IsPaddingForEmptyLastLine()) {
EnsureVerticalSpace(mEmptyLines + 1);
}
} elseif (aTag == nsGkAtoms::hr &&
mSettings.HasFlag(nsIDocumentEncoder::OutputFormatted)) {
EnsureVerticalSpace(0);
// Make a line of dashes as wide as the wrap width // XXX honoring percentage would be nice
nsAutoString line;
CreateLineOfDashes(line, mSettings.GetWrapColumn());
Write(line);
EnsureVerticalSpace(0);
} elseif (aTag == nsGkAtoms::img) { /* Output (in decreasing order of preference)
alt, title or nothing */ // See <http://www.w3.org/TR/REC-html40/struct/objects.html#edef-IMG>
nsAutoString imageDescription; if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::alt, imageDescription))) { // If the alt attribute has an empty value (|alt=""|), output nothing
} elseif (NS_SUCCEEDED(
GetAttributeValue(nsGkAtoms::title, imageDescription)) &&
!imageDescription.IsEmpty()) {
imageDescription = u" ["_ns + imageDescription + u"] "_ns;
}
Write(imageDescription);
}
return NS_OK;
}
/** * Adds as many newline as necessary to get |aNumberOfRows| empty lines * * aNumberOfRows = -1 : Being in the middle of some line of text * aNumberOfRows = 0 : Being at the start of a line * aNumberOfRows = n>0 : Having n empty lines before the current line.
*/ void nsPlainTextSerializer::EnsureVerticalSpace(const int32_t aNumberOfRows) { // If we have something in the indent we probably want to output // it and it's not included in the count for empty lines so we don't // realize that we should start a new line. if (aNumberOfRows >= 0 && !mCurrentLine.mIndentation.mHeader.IsEmpty()) {
EndLine(false);
mInWhitespace = true;
}
void nsPlainTextSerializer::MaybeWrapAndOutputCompleteLines() { if (!mSettings.MayWrap()) { return;
}
// Yes, wrap! // The "+4" is to avoid wrap lines that only would be a couple // of letters too long. We give this bonus only if the // wrapcolumn is more than 20. const uint32_t wrapColumn = mSettings.GetWrapColumn();
uint32_t bonuswidth = (wrapColumn > 20) ? 4 : 0; while (!mCurrentLine.mContent.IsEmpty()) { const uint32_t prefixwidth = mCurrentLine.DeterminePrefixWidth(); // The width of the line as it will appear on the screen (approx.). const uint32_t currentLineContentWidth =
GetUnicharStringWidth(mCurrentLine.mContent); if (currentLineContentWidth + prefixwidth <= wrapColumn + bonuswidth) { break;
}
const int32_t contentLength = mCurrentLine.mContent.Length(); if (goodSpace <= 0 || goodSpace >= contentLength) { // Nothing to do. Hopefully we get more data later to use for a place to // break line. break;
} // Found a place to break // -1 (trim a char at the break position) only if the line break was a // space.
nsAutoString restOfContent; if (nsCRT::IsAsciiSpace(mCurrentLine.mContent.CharAt(goodSpace))) {
mCurrentLine.mContent.Right(restOfContent, contentLength - goodSpace - 1);
} else {
mCurrentLine.mContent.Right(restOfContent, contentLength - goodSpace);
} // if breaker was U+0020, it has to consider for delsp=yes support constbool breakBySpace = mCurrentLine.mContent.CharAt(goodSpace) == ' ';
mCurrentLine.mContent.Truncate(goodSpace);
EndLine(true, breakBySpace);
mCurrentLine.mContent.Truncate(); // Space stuffing a la RFC 2646 (format=flowed) if (mSettings.HasFlag(nsIDocumentEncoder::OutputFormatFlowed)) {
mCurrentLine.mSpaceStuffed = !restOfContent.IsEmpty() &&
IsSpaceStuffable(restOfContent.get()) && // We space-stuff quoted lines anyway
mCurrentLine.mCiteQuoteLevel == 0;
}
mCurrentLine.mContent.Append(restOfContent);
mEmptyLines = -1;
}
}
/** * This function adds a piece of text to the current stored line. If we are * wrapping text and the stored line will become too long, a suitable * location to wrap will be found and the line that's complete will be * output.
*/ void nsPlainTextSerializer::AddToLine(const char16_t* aLineFragment,
int32_t aLineFragmentLength) { if (mLineBreakDue) EnsureVerticalSpace(mFloatingLines);
if (mCurrentLine.mContent.IsEmpty()) { if (0 == aLineFragmentLength) { return;
}
if (mSettings.HasFlag(nsIDocumentEncoder::OutputFormatFlowed)) { // Space stuffing a la RFC 2646 (format=flowed). // We space-stuff quoted lines anyway
mCurrentLine.mSpaceStuffed =
IsSpaceStuffable(aLineFragment) && mCurrentLine.mCiteQuoteLevel == 0;
}
mEmptyLines = -1;
}
// The OpenPGP dash-escaped signature separator in inline // signed messages according to the OpenPGP standard (RFC 2440). constchar kDashEscapedSignatureSeparator[] = "- -- ";
/** * Outputs the contents of mCurrentLine.mContent, and resets line * specific variables. Also adds an indentation and prefix if there is one * specified. Strips ending spaces from the line if it isn't preformatted.
*/ void nsPlainTextSerializer::EndLine(bool aSoftLineBreak, bool aBreakBySpace) { if (aSoftLineBreak && mCurrentLine.mContent.IsEmpty()) { // No meaning return;
}
/* In non-preformatted mode, remove spaces from the end of the line for * format=flowed compatibility. Don't do this for these special cases: * "-- ", the signature separator (RFC 2646) shouldn't be touched and * "- -- ", the OpenPGP dash-escaped signature separator in inline * signed messages according to the OpenPGP standard (RFC 2440).
*/ if (!mSettings.HasFlag(nsIDocumentEncoder::OutputPreformatted) &&
(aSoftLineBreak || !IsSignatureSeparator(mCurrentLine.mContent))) {
mCurrentLine.mContent.Trim(" ", false, true, false);
}
if (aSoftLineBreak &&
mSettings.HasFlag(nsIDocumentEncoder::OutputFormatFlowed) &&
!mCurrentLine.mIndentation.mLength) { // Add the soft part of the soft linebreak (RFC 2646 4.1) // We only do this when there is no indentation since format=flowed // lines and indentation doesn't work well together.
// If breaker character is ASCII space with RFC 3676 support (delsp=yes), // add twice space. if (mSettings.HasFlag(nsIDocumentEncoder::OutputFormatDelSp) &&
aBreakBySpace) {
mCurrentLine.mContent.AppendLiteral(" ");
} else {
mCurrentLine.mContent.Append(char16_t(' '));
}
}
if (aSoftLineBreak) {
mEmptyLines = 0;
} else { // Hard break if (mCurrentLine.HasContentOrIndentationHeader()) {
mEmptyLines = 0;
} else {
mEmptyLines++;
}
}
// If we don't have anything "real" to output we have to // make sure the indent doesn't end in a space since that // would trick a format=flowed-aware receiver.
mOutputManager->Append(mCurrentLine,
OutputManager::StripTrailingWhitespaces::kMaybe);
mOutputManager->AppendLineBreak();
mCurrentLine.ResetContentAndIndentationHeader();
mInWhitespace = true;
mLineBreakDue = false;
mFloatingLines = -1;
}
/** * Creates the calculated and stored indent and text in the indentation. That is * quote chars and numbers for numbered lists and such.
*/ void nsPlainTextSerializer::CurrentLine::CreateQuotesAndIndent(
nsAString& aResult) const { // Put the mail quote "> " chars in, if appropriate: if (mCiteQuoteLevel > 0) {
nsAutoString quotes; for (int i = 0; i < mCiteQuoteLevel; i++) {
quotes.Append(char16_t('>'));
} if (!mContent.IsEmpty()) { /* Better don't output a space here, if the line is empty, in case a receiving format=flowed-aware UA thinks, this were a flowed line, which it isn't - it's just empty. (Flowed lines may be joined
with the following one, so the empty line may be lost completely.) */
quotes.Append(char16_t(' '));
}
aResult = quotes;
}
// Indent if necessary
int32_t indentwidth = mIndentation.mLength - mIndentation.mHeader.Length(); if (mSpaceStuffed) {
indentwidth += 1;
}
// Don't make empty lines look flowed if (indentwidth > 0 && HasContentOrIndentationHeader()) {
nsAutoString spaces; for (int i = 0; i < indentwidth; ++i) {
spaces.Append(char16_t(' '));
}
aResult += spaces;
}
if (!mIndentation.mHeader.IsEmpty()) {
aResult += mIndentation.mHeader;
}
}
staticbool IsLineFeedCarriageReturnBlankOrTab(char16_t c) { return ('\n' == c || '\r' == c || ' ' == c || '\t' == c);
}
staticvoid ReplaceVisiblyTrailingNbsps(nsAString& aString) { const int32_t totLen = aString.Length(); for (int32_t i = totLen - 1; i >= 0; i--) {
char16_t c = aString[i]; if (IsLineFeedCarriageReturnBlankOrTab(c)) { continue;
} if (kNBSP == c) {
aString.Replace(i, 1, ' ');
} else { break;
}
}
}
// Put the mail quote "> " chars in, if appropriate. // Have to put it in before every line.
int32_t bol = 0; while (bol < totLen) { bool outputLineBreak = false; bool spacesOnly = true;
// Find one of '\n' or '\r' using iterators since nsAString // doesn't have the old FindCharInSet function.
nsAString::const_iterator iter;
aString.BeginReading(iter);
nsAString::const_iterator done_searching;
aString.EndReading(done_searching);
iter.advance(bol);
int32_t new_newline = bol;
newline = kNotFound; while (iter != done_searching) { if ('\n' == *iter || '\r' == *iter) {
newline = new_newline; break;
} if (' ' != *iter) {
spacesOnly = false;
}
++new_newline;
++iter;
}
// Done searching
nsAutoString stringpart; if (newline == kNotFound) { // No new lines.
stringpart.Assign(Substring(aString, bol, totLen - bol)); if (!stringpart.IsEmpty()) {
char16_t lastchar = stringpart.Last();
mInWhitespace = IsLineFeedCarriageReturnBlankOrTab(lastchar);
}
mEmptyLines = -1;
bol = totLen;
} else { // There is a newline
stringpart.Assign(Substring(aString, bol, newline - bol));
mInWhitespace = true;
outputLineBreak = true;
mEmptyLines = 0;
bol = newline + 1; if ('\r' == *iter && bol < totLen && '\n' == *++iter) { // There was a CRLF in the input. This used to be illegal and // stripped by the parser. Apparently not anymore. Let's skip // over the LF.
bol++;
}
}
#ifdef DEBUG_wrapping
printf("No wrapping: newline is %d, totLen is %d\n", newline, totLen); #endif
}
/** * Write a string. This is the highlevel function to use to get text output. * By using AddToLine, Output, EndLine and other functions it handles quotation, * line wrapping, indentation, whitespace compression and other things.
*/ void nsPlainTextSerializer::Write(const nsAString& aStr) { // XXX Copy necessary to use nsString methods and gain // access to underlying buffer
nsAutoString str(aStr);
#ifdef DEBUG_wrapping
printf("Write(%s): wrap col = %d\n", NS_ConvertUTF16toUTF8(str).get(),
mSettings.GetWrapColumn()); #endif
const int32_t totLen = str.Length();
// If the string is empty, do nothing: if (totLen <= 0) return;
// For Flowed text change nbsp-ses to spaces at end of lines to allow them // to be cut off along with usual spaces if required. (bug #125928) if (mSettings.HasFlag(nsIDocumentEncoder::OutputFormatFlowed)) {
ReplaceVisiblyTrailingNbsps(str);
}
// We have two major codepaths here. One that does preformatted text and one // that does normal formatted text. The one for preformatted text calls // Output directly while the other code path goes through AddToLine. if ((mPreFormattedMail && !mSettings.GetWrapColumn()) ||
(IsElementPreformatted() && !mPreFormattedMail) ||
(mSpanLevel > 0 && mEmptyLines >= 0 && IsQuotedLine(str))) { // No intelligent wrapping.
// This mustn't be mixed with intelligent wrapping without clearing // the mCurrentLine.mContent buffer before!!!
NS_ASSERTION(mCurrentLine.mContent.IsEmpty() ||
(IsElementPreformatted() && !mPreFormattedMail), "Mixed wrapping data and nonwrapping data on the same line");
MOZ_ASSERT(mOutputManager);
if (!mCurrentLine.mContent.IsEmpty()) {
mOutputManager->Flush(mCurrentLine);
}
ConvertToLinesAndOutput(str); return;
}
// Intelligent handling of text // If needed, strip out all "end of lines" // and multiple whitespace between words
int32_t nextpos; const char16_t* offsetIntoBuffer = nullptr;
int32_t bol = 0; while (bol < totLen) { // Loop over lines // Find a place where we may have to do whitespace compression
nextpos = str.FindCharInSet(u" \t\n\r", bol); #ifdef DEBUG_wrapping
nsAutoString remaining;
str.Right(remaining, totLen - bol);
foo = ToNewCString(remaining); // printf("Next line: bol = %d, newlinepos = %d, totLen = %d, " // "string = '%s'\n", bol, nextpos, totLen, foo);
free(foo); #endif
if (nextpos == kNotFound) { // The rest of the string
offsetIntoBuffer = str.get() + bol;
AddToLine(offsetIntoBuffer, totLen - bol);
bol = totLen;
mInWhitespace = false;
} else { // There's still whitespace left in the string if (nextpos != 0 && (nextpos + 1) < totLen) {
offsetIntoBuffer = str.get() + nextpos; // skip '\n' if it is between CJ chars if (offsetIntoBuffer[0] == '\n' && IS_CJ_CHAR(offsetIntoBuffer[-1]) &&
IS_CJ_CHAR(offsetIntoBuffer[1])) {
offsetIntoBuffer = str.get() + bol;
AddToLine(offsetIntoBuffer, nextpos - bol);
bol = nextpos + 1; continue;
}
} // If we're already in whitespace and not preformatted, just skip it: if (mInWhitespace && (nextpos == bol) && !mPreFormattedMail &&
!mSettings.HasFlag(nsIDocumentEncoder::OutputPreformatted)) { // Skip whitespace
bol++; continue;
}
if (nextpos == bol) { // Note that we are in whitespace.
mInWhitespace = true;
offsetIntoBuffer = str.get() + nextpos;
AddToLine(offsetIntoBuffer, 1);
bol++; continue;
}
mInWhitespace = true;
offsetIntoBuffer = str.get() + bol; if (mPreFormattedMail ||
mSettings.HasFlag(nsIDocumentEncoder::OutputPreformatted)) { // Preserve the real whitespace character
nextpos++;
AddToLine(offsetIntoBuffer, nextpos - bol);
bol = nextpos;
} else { // Replace the whitespace with a space
AddToLine(offsetIntoBuffer, nextpos - bol);
AddToLine(kSpace.get(), 1);
bol = nextpos + 1; // Let's eat the whitespace
}
}
} // Continue looping over the string
}
/** * Gets the value of an attribute in a string. If the function returns * NS_ERROR_NOT_AVAILABLE, there was none such attribute specified.
*/
nsresult nsPlainTextSerializer::GetAttributeValue(const nsAtom* aName,
nsString& aValueRet) const { if (mElement) { if (mElement->GetAttr(aName, aValueRet)) { return NS_OK;
}
}
return NS_ERROR_NOT_AVAILABLE;
}
/** * Returns true, if the element was inserted by Moz' TXT->HTML converter. * In this case, we should ignore it.
*/ bool nsPlainTextSerializer::IsCurrentNodeConverted() const {
nsAutoString value;
nsresult rv = GetAttributeValue(nsGkAtoms::_class, value); return (NS_SUCCEEDED(rv) &&
(StringBeginsWith(value, u"moz-txt"_ns,
nsASCIICaseInsensitiveStringComparator) ||
StringBeginsWith(value, u"\"moz-txt"_ns,
nsASCIICaseInsensitiveStringComparator)));
}
bool nsPlainTextSerializer::IsElementPreformatted(Element* aElement) {
RefPtr<const ComputedStyle> computedStyle =
nsComputedDOMStyle::GetComputedStyleNoFlush(aElement); if (computedStyle) { const nsStyleText* textStyle = computedStyle->StyleText(); return textStyle->WhiteSpaceOrNewlineIsSignificant();
} // Fall back to looking at the tag, in case there is no style information. return GetIdForContent(aElement) == nsGkAtoms::pre;
}
bool nsPlainTextSerializer::IsCssBlockLevelElement(Element* aElement) {
RefPtr<const ComputedStyle> computedStyle =
nsComputedDOMStyle::GetComputedStyleNoFlush(aElement); if (computedStyle) { const nsStyleDisplay* displayStyle = computedStyle->StyleDisplay(); return displayStyle->IsBlockOutsideStyle();
} // Fall back to looking at the tag, in case there is no style information. return nsContentUtils::IsHTMLBlockLevelElement(aElement);
}
/** * This method is required only to identify LI's inside OL. * Returns TRUE if we are inside an OL tag and FALSE otherwise.
*/ bool nsPlainTextSerializer::IsInOL() const {
int32_t i = mTagStackIndex; while (--i >= 0) { if (mTagStack[i] == nsGkAtoms::ol) returntrue; if (mTagStack[i] == nsGkAtoms::ul) { // If a UL is reached first, LI belongs the UL nested in OL. returnfalse;
}
} // We may reach here for orphan LI's. returnfalse;
}
/* @return 0 = no header, 1 = h1, ..., 6 = h6
*/
int32_t HeaderLevel(const nsAtom* aTag) { if (aTag == nsGkAtoms::h1) { return 1;
} if (aTag == nsGkAtoms::h2) { return 2;
} if (aTag == nsGkAtoms::h3) { return 3;
} if (aTag == nsGkAtoms::h4) { return 4;
} if (aTag == nsGkAtoms::h5) { return 5;
} if (aTag == nsGkAtoms::h6) { return 6;
} return 0;
}
/* These functions define the column width of an ISO 10646 character * as follows: * * - The null character (U+0000) has a column width of 0. * * - Other C0/C1 control characters and DEL will lead to a return * value of -1. * * - Non-spacing and enclosing combining characters (general * category code Mn or Me in the Unicode database) have a * column width of 0. * * - Spacing characters in the East Asian Wide (W) or East Asian * FullWidth (F) category as defined in Unicode Technical * Report #11 have a column width of 2. * * - All remaining characters (including all printable * ISO 8859-1 and WGL4 characters, Unicode control characters, * etc.) have a column width of 1.
*/
int32_t GetUnicharWidth(char32_t aCh) { /* test for 8-bit control characters */ if (aCh == 0) { return 0;
} if (aCh < 32 || (aCh >= 0x7f && aCh < 0xa0)) { return -1;
}
/* The first combining char in Unicode is U+0300 */ if (aCh < 0x0300) { return 1;
}
auto gc = unicode::GetGeneralCategory(aCh); if (gc == HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK ||
gc == HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK) { return 0;
}
/* if we arrive here, ucs is not a combining or C0/C1 control character */
/* fast test for majority of non-wide scripts */ if (aCh < 0x1100) { return 1;
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung ist noch experimentell.