Quelle nsPlainTextSerializer.cpp Sprache: C

/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */

/*
* nsIContentSerializer implementation that can be used with an
* nsIDocumentEncoder to convert a DOM into plaintext in a nice way
* (eg for copy/paste as plaintext).
*/

#include "nsPlainTextSerializer.h"

#include <limits>

#include "nsPrintfCString.h"
#include "nsDebug.h"
#include "nsGkAtoms.h"
#include "nsNameSpaceManager.h"
#include "nsTextFragment.h"
#include "nsContentUtils.h"
#include "nsReadableUtils.h"
#include "nsUnicharUtils.h"
#include "nsCRT.h"
#include "mozilla/Casting.h"
#include "mozilla/TextEditor.h"
#include "mozilla/dom/CharacterData.h"
#include "mozilla/dom/Element.h"
#include "mozilla/dom/HTMLBRElement.h"
#include "mozilla/dom/Text.h"
#include "mozilla/intl/Segmenter.h"
#include "mozilla/intl/UnicodeProperties.h"
#include "mozilla/dom/AbstractRange.h"
#include "nsUnicodeProperties.h"
#include "mozilla/Span.h"
#include "mozilla/Preferences.h"
#include "mozilla/StaticPrefs_converter.h"
#include "nsComputedDOMStyle.h"

namespace mozilla {
class Encoding;
}

using namespace mozilla;
using namespace mozilla::dom;

#define PREF_STRUCTS "converter.html2txt.structs"
#define PREF_HEADER_STRATEGY "converter.html2txt.header_strategy"

static const int32_t kTabSize = 4;
static const int32_t kIndentSizeHeaders =
    2; /* Indention of h1, if
        mHeaderStrategy = kIndentIncreasedWithHeaderLevel
        or = kNumberHeadingsAndIndentSlightly. Indention of
        other headers is derived from that. */
static const int32_t kIndentIncrementHeaders =
    2; /* If mHeaderStrategy = kIndentIncreasedWithHeaderLevel,
   indent h(x+1) this many
   columns more than h(x) */
static const int32_t kIndentSizeList = kTabSize;
// Indention of non-first lines of ul and ol
static const int32_t kIndentSizeDD = kTabSize;  // Indention of <dd>
static const char16_t kNBSP = 160;
static const char16_t kSPACE = ' ';

static int32_t HeaderLevel(const nsAtom* aTag);
static int32_t GetUnicharWidth(char32_t ucs);
static int32_t GetUnicharStringWidth(Span<const char16_t> aString);

// Someday may want to make this non-const:
static const uint32_t TagStackSize = 500;

NS_IMPL_CYCLE_COLLECTING_ADDREF(nsPlainTextSerializer)
NS_IMPL_CYCLE_COLLECTING_RELEASE(nsPlainTextSerializer)

NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(nsPlainTextSerializer)
  NS_INTERFACE_MAP_ENTRY(nsIContentSerializer)
  NS_INTERFACE_MAP_ENTRY(nsISupports)
NS_INTERFACE_MAP_END

NS_IMPL_CYCLE_COLLECTION(nsPlainTextSerializer, mElement)

nsresult NS_NewPlainTextSerializer(nsIContentSerializer** aSerializer) {
  RefPtr<nsPlainTextSerializer> it = new nsPlainTextSerializer();
  it.forget(aSerializer);
  return NS_OK;
}

// @param aFlags As defined in nsIDocumentEncoder.idl.
static void DetermineLineBreak(const int32_t aFlags, nsAString& aLineBreak) {
  // Set the line break character:
  if ((aFlags & nsIDocumentEncoder::OutputCRLineBreak) &&
      (aFlags & nsIDocumentEncoder::OutputLFLineBreak)) {
    // Windows
    aLineBreak.AssignLiteral(u"\r\n");
  } else if (aFlags & nsIDocumentEncoder::OutputCRLineBreak) {
    // Mac
    aLineBreak.AssignLiteral(u"\r");
  } else if (aFlags & nsIDocumentEncoder::OutputLFLineBreak) {
    // Unix/DOM
    aLineBreak.AssignLiteral(u"\n");
  } else {
    // Platform/default
    aLineBreak.AssignLiteral(NS_ULINEBREAK);
  }
}

void nsPlainTextSerializer::CurrentLine::MaybeReplaceNbspsInContent(
    const int32_t aFlags) {
  if (!(aFlags & nsIDocumentEncoder::OutputPersistNBSP)) {
    // First, replace all nbsp characters with spaces,
    // which the unicode encoder won't do for us.
    mContent.ReplaceChar(kNBSP, kSPACE);
  }
}

void nsPlainTextSerializer::CurrentLine::ResetContentAndIndentationHeader() {
  mContent.Truncate();
  mIndentation.mHeader.Truncate();
}

int32_t nsPlainTextSerializer::CurrentLine::FindWrapIndexForContent(
    const uint32_t aWrapColumn, bool aUseLineBreaker) const {
  MOZ_ASSERT(!mContent.IsEmpty());

  const uint32_t prefixwidth = DeterminePrefixWidth();
  int32_t goodSpace = 0;

  if (aUseLineBreaker) {
    // We advance one line break point at a time from the beginning of the
    // mContent until we find a width less than or equal to wrap column.
    uint32_t width = 0;
    intl::LineBreakIteratorUtf16 lineBreakIter(mContent);
    while (Maybe<uint32_t> nextGoodSpace = lineBreakIter.Next()) {
      // Trim space at the tail. UAX#14 doesn't have break opportunity for
      // ASCII space at the tail.
      const Maybe<uint32_t> originalNextGoodSpace = nextGoodSpace;
      while (*nextGoodSpace > 0 &&
             mContent.CharAt(*nextGoodSpace - 1) == 0x20) {
        nextGoodSpace = Some(*nextGoodSpace - 1);
      }
      if (*nextGoodSpace == 0) {
        // Restore the original nextGoodSpace.
        nextGoodSpace = originalNextGoodSpace;
      }

      width += GetUnicharStringWidth(Span<const char16_t>(
          mContent.get() + goodSpace, *nextGoodSpace - goodSpace));
      if (prefixwidth + width > aWrapColumn) {
        // The next break point makes the width exceeding the wrap column, so
        // goodSpace is what we want.
        break;
      }
      goodSpace = AssertedCast<int32_t>(*nextGoodSpace);
    }

    return goodSpace;
  }

  // In this case we don't want strings, especially CJK-ones, to be split. See
  // bug 333064 for more information. We break only at ASCII spaces.
  if (aWrapColumn >= prefixwidth) {
    // Search backward from the adjusted wrap column or from the text end.
    goodSpace =
        std::min<int32_t>(aWrapColumn - prefixwidth, mContent.Length() - 1);
    while (goodSpace >= 0) {
      if (nsCRT::IsAsciiSpace(mContent.CharAt(goodSpace))) {
        return goodSpace;
      }
      goodSpace--;
    }
  }

  // Search forward from the adjusted wrap column.
  goodSpace = (prefixwidth > aWrapColumn) ? 1 : aWrapColumn - prefixwidth;
  const int32_t contentLength = mContent.Length();
  while (goodSpace < contentLength &&
         !nsCRT::IsAsciiSpace(mContent.CharAt(goodSpace))) {
    goodSpace++;
  }

  return goodSpace;
}

nsPlainTextSerializer::OutputManager::OutputManager(const int32_t aFlags,
                                                    nsAString& aOutput)
    : mFlags{aFlags}, mOutput{aOutput}, mAtFirstColumn{true} {
  MOZ_ASSERT(aOutput.IsEmpty());

  DetermineLineBreak(mFlags, mLineBreak);
}

void nsPlainTextSerializer::OutputManager::Append(
    const CurrentLine& aCurrentLine,
    const StripTrailingWhitespaces aStripTrailingWhitespaces) {
  if (IsAtFirstColumn()) {
    nsAutoString quotesAndIndent;
    aCurrentLine.CreateQuotesAndIndent(quotesAndIndent);

    if ((aStripTrailingWhitespaces == StripTrailingWhitespaces::kMaybe)) {
      const bool stripTrailingSpaces = aCurrentLine.mContent.IsEmpty();
      if (stripTrailingSpaces) {
        quotesAndIndent.Trim(" ", false, true, false);
      }
    }

    Append(quotesAndIndent);
  }

  Append(aCurrentLine.mContent);
}

void nsPlainTextSerializer::OutputManager::Append(const nsAString& aString) {
  if (!aString.IsEmpty()) {
    mOutput.Append(aString);
    mAtFirstColumn = false;
  }
}

void nsPlainTextSerializer::OutputManager::AppendLineBreak() {
  mOutput.Append(mLineBreak);
  mAtFirstColumn = true;
}

uint32_t nsPlainTextSerializer::OutputManager::GetOutputLength() const {
  return mOutput.Length();
}

nsPlainTextSerializer::nsPlainTextSerializer()
    : mFloatingLines(-1),
      mLineBreakDue(false),
      kSpace(u" "_ns)  // Init of "constant"
{
  mHeadLevel = 0;
  mHasWrittenCiteBlockquote = false;
  mSpanLevel = 0;
  for (int32_t i = 0; i <= 6; i++) {
    mHeaderCounter[i] = 0;
  }

  // Flow
  mEmptyLines = 1;  // The start of the document is an "empty line" in itself,
  mInWhitespace = false;
  mPreFormattedMail = false;

  mPreformattedBlockBoundary = false;

  // initialize the tag stack to zero:
  // The stack only ever contains pointers to static atoms, so they don't
  // need refcounting.
  mTagStack = new const nsAtom*[TagStackSize];
  mTagStackIndex = 0;
  mIgnoreAboveIndex = (uint32_t)kNotFound;

  mULCount = 0;

  mIgnoredChildNodeLevel = 0;
}

nsPlainTextSerializer::~nsPlainTextSerializer() {
  delete[] mTagStack;
  NS_WARNING_ASSERTION(mHeadLevel == 0, "Wrong head level!");
}

nsPlainTextSerializer::Settings::HeaderStrategy
nsPlainTextSerializer::Settings::Convert(const int32_t aPrefHeaderStrategy) {
  HeaderStrategy result{HeaderStrategy::kIndentIncreasedWithHeaderLevel};

  switch (aPrefHeaderStrategy) {
    case 0: {
      result = HeaderStrategy::kNoIndentation;
      break;
    }
    case 1: {
      result = HeaderStrategy::kIndentIncreasedWithHeaderLevel;
      break;
    }
    case 2: {
      result = HeaderStrategy::kNumberHeadingsAndIndentSlightly;
      break;
    }
    default: {
      NS_WARNING(
          nsPrintfCString("Header strategy pref contains undefined value: %i",
                          aPrefHeaderStrategy)
              .get());
    }
  }

  return result;
}

const int32_t kDefaultHeaderStrategy = 1;

void nsPlainTextSerializer::Settings::Init(const int32_t aFlags,
                                           const uint32_t aWrapColumn) {
  mFlags = aFlags;

  if (mFlags & nsIDocumentEncoder::OutputFormatted) {
    // Get some prefs that controls how we do formatted output
    mStructs = Preferences::GetBool(PREF_STRUCTS, mStructs);

    int32_t headerStrategy =
        Preferences::GetInt(PREF_HEADER_STRATEGY, kDefaultHeaderStrategy);
    mHeaderStrategy = Convert(headerStrategy);
  }

  mWithRubyAnnotation = StaticPrefs::converter_html2txt_always_include_ruby() ||
                        (mFlags & nsIDocumentEncoder::OutputRubyAnnotation);

  // XXX We should let the caller decide whether to do this or not
  mFlags &= ~nsIDocumentEncoder::OutputNoFramesContent;

  mWrapColumn = aWrapColumn;
}

NS_IMETHODIMP
nsPlainTextSerializer::Init(const uint32_t aFlags, uint32_t aWrapColumn,
                            const Encoding* aEncoding, bool aIsCopying,
                            bool aIsWholeDocument,
                            bool* aNeedsPreformatScanning, nsAString& aOutput) {
#ifdef DEBUG
  // Check if the major control flags are set correctly.
  if (aFlags & nsIDocumentEncoder::OutputFormatFlowed) {
    // One of OutputFormatted or OutputWrap must be set, but not both.
    NS_ASSERTION((aFlags & nsIDocumentEncoder::OutputFormatted) !=
                     (aFlags & nsIDocumentEncoder::OutputWrap),
                 "If you want format=flowed, you must combine it "
                 "with either nsIDocumentEncoder::OutputFormatted "
                 "or nsIDocumentEncoder::OutputWrap");
  }

  if (aFlags & nsIDocumentEncoder::OutputFormatted) {
    NS_ASSERTION(
        !(aFlags & nsIDocumentEncoder::OutputPreformatted),
        "Can't do formatted and preformatted output at the same time!");
  }
#endif
  MOZ_ASSERT(!(aFlags & nsIDocumentEncoder::OutputFormatDelSp) ||
             (aFlags & nsIDocumentEncoder::OutputFormatFlowed));

  *aNeedsPreformatScanning = true;
  mSettings.Init(aFlags, aWrapColumn);
  mOutputManager.emplace(mSettings.GetFlags(), aOutput);

  mUseLineBreaker = mSettings.MayWrap() && mSettings.MayBreakLines();

  mLineBreakDue = false;
  mFloatingLines = -1;

  mPreformattedBlockBoundary = false;

  MOZ_ASSERT(mOLStack.IsEmpty());

  return NS_OK;
}

bool nsPlainTextSerializer::GetLastBool(const nsTArray<bool>& aStack) {
  uint32_t size = aStack.Length();
  if (size == 0) {
    return false;
  }
  return aStack.ElementAt(size - 1);
}

void nsPlainTextSerializer::SetLastBool(nsTArray<bool>& aStack, bool aValue) {
  uint32_t size = aStack.Length();
  if (size > 0) {
    aStack.ElementAt(size - 1) = aValue;
  } else {
    NS_ERROR("There is no \"Last\" value");
  }
}

void nsPlainTextSerializer::PushBool(nsTArray<bool>& aStack, bool aValue) {
  aStack.AppendElement(bool(aValue));
}

bool nsPlainTextSerializer::PopBool(nsTArray<bool>& aStack) {
  return aStack.Length() ? aStack.PopLastElement() : false;
}

bool nsPlainTextSerializer::IsIgnorableRubyAnnotation(
    const nsAtom* aTag) const {
  if (mSettings.GetWithRubyAnnotation()) {
    return false;
  }

  return aTag == nsGkAtoms::rp || aTag == nsGkAtoms::rt ||
         aTag == nsGkAtoms::rtc;
}

// Return true if aElement has 'display:none' or if we just don't know.
static bool IsDisplayNone(Element* aElement) {
  RefPtr<const ComputedStyle> computedStyle =
      nsComputedDOMStyle::GetComputedStyleNoFlush(aElement);
  return !computedStyle ||
         computedStyle->StyleDisplay()->mDisplay == StyleDisplay::None;
}

static bool IsIgnorableScriptOrStyle(Element* aElement) {
  return aElement->IsAnyOfHTMLElements(nsGkAtoms::script, nsGkAtoms::style) &&
         IsDisplayNone(aElement);
}

NS_IMETHODIMP
nsPlainTextSerializer::AppendText(nsIContent* aText, int32_t aStartOffset,
                                  int32_t aEndOffset) {
  if (mIgnoreAboveIndex != (uint32_t)kNotFound) {
    return NS_OK;
  }

  NS_ASSERTION(aStartOffset >= 0, "Negative start offset for text fragment!");
  if (aStartOffset < 0) return NS_ERROR_INVALID_ARG;

  NS_ENSURE_ARG(aText);

  nsresult rv = NS_OK;

  nsIContent* content = aText;
  const nsTextFragment* frag;
  if (!content || !(frag = content->GetText())) {
    return NS_ERROR_FAILURE;
  }

  int32_t fragLength = frag->GetLength();
  int32_t endoffset =
      (aEndOffset == -1) ? fragLength : std::min(aEndOffset, fragLength);
  NS_ASSERTION(aStartOffset <= endoffset,
               "A start offset is beyond the end of the text fragment!");

  int32_t length = endoffset - aStartOffset;
  if (length <= 0) {
    return NS_OK;
  }

  nsAutoString textstr;
  if (frag->Is2b()) {
    textstr.Assign(frag->Get2b() + aStartOffset, length);
  } else {
    // AssignASCII is for 7-bit character only, so don't use it
    const char* data = frag->Get1b();
    CopyASCIItoUTF16(Substring(data + aStartOffset, data + endoffset), textstr);
  }

  // Mask the text if the text node is in a password field.
  if (content->HasFlag(NS_MAYBE_MASKED)) {
    TextEditor::MaskString(textstr, *content->AsText(), 0, aStartOffset);
  }

  // We have to split the string across newlines
  // to match parser behavior
  int32_t start = 0;
  int32_t offset = textstr.FindCharInSet(u"\n\r");
  while (offset != kNotFound) {
    if (offset > start) {
      // Pass in the line
      DoAddText(false, Substring(textstr, start, offset - start));
    }

    // Pass in a newline
    DoAddText();

    start = offset + 1;
    offset = textstr.FindCharInSet(u"\n\r", start);
  }

  // Consume the last bit of the string if there's any left
  if (start < length) {
    if (start) {
      DoAddText(false, Substring(textstr, start, length - start));
    } else {
      DoAddText(false, textstr);
    }
  }

  return rv;
}

NS_IMETHODIMP
nsPlainTextSerializer::AppendCDATASection(nsIContent* aCDATASection,
                                          int32_t aStartOffset,
                                          int32_t aEndOffset) {
  return AppendText(aCDATASection, aStartOffset, aEndOffset);
}

NS_IMETHODIMP
nsPlainTextSerializer::ScanElementForPreformat(Element* aElement) {
  mPreformatStack.push(IsElementPreformatted(aElement));
  return NS_OK;
}

NS_IMETHODIMP
nsPlainTextSerializer::ForgetElementForPreformat(Element* aElement) {
  MOZ_RELEASE_ASSERT(!mPreformatStack.empty(),
                     "Tried to pop without previous push.");
  mPreformatStack.pop();
  return NS_OK;
}

NS_IMETHODIMP
nsPlainTextSerializer::AppendElementStart(Element* aElement,
                                          Element* aOriginalElement) {
  NS_ENSURE_ARG(aElement);

  mElement = aElement;

  nsresult rv;
  nsAtom* id = GetIdForContent(mElement);

  bool isContainer = !FragmentOrElement::IsHTMLVoid(id);

  if (isContainer) {
    rv = DoOpenContainer(id);
  } else {
    rv = DoAddLeaf(id);
  }

  mElement = nullptr;

  if (id == nsGkAtoms::head) {
    ++mHeadLevel;
  }

  return rv;
}

NS_IMETHODIMP
nsPlainTextSerializer::AppendElementEnd(Element* aElement,
                                        Element* aOriginalElement) {
  NS_ENSURE_ARG(aElement);

  mElement = aElement;

  nsresult rv;
  nsAtom* id = GetIdForContent(mElement);

  bool isContainer = !FragmentOrElement::IsHTMLVoid(id);

  rv = NS_OK;
  if (isContainer) {
    rv = DoCloseContainer(id);
  }

  mElement = nullptr;

  if (id == nsGkAtoms::head) {
    NS_ASSERTION(mHeadLevel != 0, "mHeadLevel being decremented below 0");
    --mHeadLevel;
  }

  return rv;
}

NS_IMETHODIMP
nsPlainTextSerializer::FlushAndFinish() {
  MOZ_ASSERT(mOutputManager);

  mOutputManager->Flush(mCurrentLine);
  return Finish();
}

NS_IMETHODIMP
nsPlainTextSerializer::Finish() {
  mOutputManager.reset();

  return NS_OK;
}

NS_IMETHODIMP
nsPlainTextSerializer::GetOutputLength(uint32_t& aLength) const {
  MOZ_ASSERT(mOutputManager);

  aLength = mOutputManager->GetOutputLength();

  return NS_OK;
}

NS_IMETHODIMP
nsPlainTextSerializer::AppendDocumentStart(Document* aDocument) {
  return NS_OK;
}

constexpr int32_t kOlStackDummyValue = 0;

nsresult nsPlainTextSerializer::DoOpenContainer(const nsAtom* aTag) {
  if (IsIgnorableRubyAnnotation(aTag)) {
    // Ignorable ruby annotation shouldn't be replaced by a placeholder
    // character, neither any of its descendants.
    mIgnoredChildNodeLevel++;
    return NS_OK;
  }
  if (IsIgnorableScriptOrStyle(mElement)) {
    mIgnoredChildNodeLevel++;
    return NS_OK;
  }

  if (mSettings.HasFlag(nsIDocumentEncoder::OutputForPlainTextClipboardCopy)) {
    if (mPreformattedBlockBoundary && DoOutput()) {
      // Should always end a line, but get no more whitespace
      if (mFloatingLines < 0) mFloatingLines = 0;
      mLineBreakDue = true;
    }
    mPreformattedBlockBoundary = false;
  }

  if (mSettings.HasFlag(nsIDocumentEncoder::OutputRaw)) {
    // Raw means raw.  Don't even think about doing anything fancy
    // here like indenting, adding line breaks or any other
    // characters such as list item bullets, quote characters
    // around <q>, etc.

    return NS_OK;
  }

  if (mTagStackIndex < TagStackSize) {
    mTagStack[mTagStackIndex++] = aTag;
  }

  if (mIgnoreAboveIndex != (uint32_t)kNotFound) {
    return NS_OK;
  }

  // Reset this so that <blockquote type=cite> doesn't affect the whitespace
  // above random <pre>s below it.
  mHasWrittenCiteBlockquote =
      mHasWrittenCiteBlockquote && aTag == nsGkAtoms::pre;

  bool isInCiteBlockquote = false;

  // XXX special-case <blockquote type=cite> so that we don't add additional
  // newlines before the text.
  if (aTag == nsGkAtoms::blockquote) {
    nsAutoString value;
    nsresult rv = GetAttributeValue(nsGkAtoms::type, value);
    isInCiteBlockquote = NS_SUCCEEDED(rv) && value.EqualsIgnoreCase("cite");
  }

  if (mLineBreakDue && !isInCiteBlockquote) EnsureVerticalSpace(mFloatingLines);

  // Check if this tag's content that should not be output
  if ((aTag == nsGkAtoms::noscript &&
       !mSettings.HasFlag(nsIDocumentEncoder::OutputNoScriptContent)) ||
      ((aTag == nsGkAtoms::iframe || aTag == nsGkAtoms::noframes) &&
       !mSettings.HasFlag(nsIDocumentEncoder::OutputNoFramesContent))) {
    // Ignore everything that follows the current tag in
    // question until a matching end tag is encountered.
    mIgnoreAboveIndex = mTagStackIndex - 1;
    return NS_OK;
  }

  if (aTag == nsGkAtoms::body) {
    // Try to figure out here whether we have a
    // preformatted style attribute set by Thunderbird.
    //
    // Trigger on the presence of a "pre-wrap" in the
    // style attribute. That's a very simplistic way to do
    // it, but better than nothing.
    nsAutoString style;
    int32_t whitespace;
    if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::style, style)) &&
        (kNotFound != (whitespace = style.Find(u"white-space:")))) {
      if (kNotFound != style.LowerCaseFindASCII("pre-wrap", whitespace)) {
#ifdef DEBUG_preformatted
        printf("Set mPreFormattedMail based on style pre-wrap\n");
#endif
        mPreFormattedMail = true;
      } else if (kNotFound != style.LowerCaseFindASCII("pre", whitespace)) {
#ifdef DEBUG_preformatted
        printf("Set mPreFormattedMail based on style pre\n");
#endif
        mPreFormattedMail = true;
      }
    } else {
      /* See comment at end of function. */
      mInWhitespace = true;
      mPreFormattedMail = false;
    }

    return NS_OK;
  }

  // Keep this in sync with DoCloseContainer!
  if (!DoOutput()) {
    return NS_OK;
  }

  if (aTag == nsGkAtoms::p)
    EnsureVerticalSpace(1);
  else if (aTag == nsGkAtoms::pre) {
    if (GetLastBool(mIsInCiteBlockquote))
      EnsureVerticalSpace(0);
    else if (mHasWrittenCiteBlockquote) {
      EnsureVerticalSpace(0);
      mHasWrittenCiteBlockquote = false;
    } else
      EnsureVerticalSpace(1);
  } else if (aTag == nsGkAtoms::tr) {
    PushBool(mHasWrittenCellsForRow, false);
  } else if (aTag == nsGkAtoms::td || aTag == nsGkAtoms::th) {
    // We must make sure that the content of two table cells get a
    // space between them.

    // To make the separation between cells most obvious and
    // importable, we use a TAB.
    if (mHasWrittenCellsForRow.IsEmpty()) {
      // We don't always see a <tr> (nor a <table>) before the <td> if we're
      // copying part of a table
      PushBool(mHasWrittenCellsForRow, true);  // will never be popped
    } else if (GetLastBool(mHasWrittenCellsForRow)) {
      // Bypass |Write| so that the TAB isn't compressed away.
      AddToLine(u"\t", 1);
      mInWhitespace = true;
    } else {
      SetLastBool(mHasWrittenCellsForRow, true);
    }
  } else if (aTag == nsGkAtoms::ul) {
    // Indent here to support nested lists, which aren't included in li :-(
    EnsureVerticalSpace(IsInOlOrUl() ? 0 : 1);
    // Must end the current line before we change indention
    mCurrentLine.mIndentation.mLength += kIndentSizeList;
    mULCount++;
  } else if (aTag == nsGkAtoms::ol) {
    EnsureVerticalSpace(IsInOlOrUl() ? 0 : 1);
    if (mSettings.HasFlag(nsIDocumentEncoder::OutputFormatted)) {
      // Must end the current line before we change indention
      nsAutoString startAttr;
      int32_t startVal = 1;
      if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::start, startAttr))) {
        nsresult rv = NS_OK;
        startVal = startAttr.ToInteger(&rv);
        if (NS_FAILED(rv)) {
          startVal = 1;
        }
      }
      mOLStack.AppendElement(startVal);
    } else {
      mOLStack.AppendElement(kOlStackDummyValue);
    }
    mCurrentLine.mIndentation.mLength += kIndentSizeList;  // see ul
  } else if (aTag == nsGkAtoms::li &&
             mSettings.HasFlag(nsIDocumentEncoder::OutputFormatted)) {
    if (mTagStackIndex > 1 && IsInOL()) {
      if (!mOLStack.IsEmpty()) {
        nsAutoString valueAttr;
        if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::value, valueAttr))) {
          nsresult rv = NS_OK;
          int32_t valueAttrVal = valueAttr.ToInteger(&rv);
          if (NS_SUCCEEDED(rv)) {
            mOLStack.LastElement() = valueAttrVal;
          }
        }
        // This is what nsBulletFrame does for OLs:
        mCurrentLine.mIndentation.mHeader.AppendInt(mOLStack.LastElement(), 10);
        mOLStack.LastElement()++;
      } else {
        mCurrentLine.mIndentation.mHeader.Append(char16_t('#'));
      }

      mCurrentLine.mIndentation.mHeader.Append(char16_t('.'));

    } else {
      static const char bulletCharArray[] = "*o+#";
      uint32_t index = mULCount > 0 ? (mULCount - 1) : 3;
      char bulletChar = bulletCharArray[index % 4];
      mCurrentLine.mIndentation.mHeader.Append(char16_t(bulletChar));
    }

    mCurrentLine.mIndentation.mHeader.Append(char16_t(' '));
  } else if (aTag == nsGkAtoms::dl) {
    EnsureVerticalSpace(1);
  } else if (aTag == nsGkAtoms::dt) {
    EnsureVerticalSpace(0);
  } else if (aTag == nsGkAtoms::dd) {
    EnsureVerticalSpace(0);
    mCurrentLine.mIndentation.mLength += kIndentSizeDD;
  } else if (aTag == nsGkAtoms::span) {
    ++mSpanLevel;
  } else if (aTag == nsGkAtoms::blockquote) {
    // Push
    PushBool(mIsInCiteBlockquote, isInCiteBlockquote);
    if (isInCiteBlockquote) {
      EnsureVerticalSpace(0);
      mCurrentLine.mCiteQuoteLevel++;
    } else {
      EnsureVerticalSpace(1);
      mCurrentLine.mIndentation.mLength +=
          kTabSize;  // Check for some maximum value?
    }
  } else if (aTag == nsGkAtoms::q) {
    Write(u"\""_ns);
  }

  // Else make sure we'll separate block level tags,
  // even if we're about to leave, before doing any other formatting.
  else if (IsCssBlockLevelElement(mElement)) {
    EnsureVerticalSpace(0);
  }

  if (mSettings.HasFlag(nsIDocumentEncoder::OutputFormatted)) {
    OpenContainerForOutputFormatted(aTag);
  }
  return NS_OK;
}

void nsPlainTextSerializer::OpenContainerForOutputFormatted(
    const nsAtom* aTag) {
  const bool currentNodeIsConverted = IsCurrentNodeConverted();

  if (aTag == nsGkAtoms::h1 || aTag == nsGkAtoms::h2 || aTag == nsGkAtoms::h3 ||
      aTag == nsGkAtoms::h4 || aTag == nsGkAtoms::h5 || aTag == nsGkAtoms::h6) {
    EnsureVerticalSpace(2);
    if (mSettings.GetHeaderStrategy() ==
        Settings::HeaderStrategy::kNumberHeadingsAndIndentSlightly) {
      mCurrentLine.mIndentation.mLength += kIndentSizeHeaders;
      // Caching
      int32_t level = HeaderLevel(aTag);
      // Increase counter for current level
      mHeaderCounter[level]++;
      // Reset all lower levels
      int32_t i;

      for (i = level + 1; i <= 6; i++) {
        mHeaderCounter[i] = 0;
      }

      // Construct numbers
      nsAutoString leadup;
      for (i = 1; i <= level; i++) {
        leadup.AppendInt(mHeaderCounter[i]);
        leadup.Append(char16_t('.'));
      }
      leadup.Append(char16_t(' '));
      Write(leadup);
    } else if (mSettings.GetHeaderStrategy() ==
               Settings::HeaderStrategy::kIndentIncreasedWithHeaderLevel) {
      mCurrentLine.mIndentation.mLength += kIndentSizeHeaders;
      for (int32_t i = HeaderLevel(aTag); i > 1; i--) {
        // for h(x), run x-1 times
        mCurrentLine.mIndentation.mLength += kIndentIncrementHeaders;
      }
    }
  } else if (aTag == nsGkAtoms::sup && mSettings.GetStructs() &&
             !currentNodeIsConverted) {
    Write(u"^"_ns);
  } else if (aTag == nsGkAtoms::sub && mSettings.GetStructs() &&
             !currentNodeIsConverted) {
    Write(u"_"_ns);
  } else if (aTag == nsGkAtoms::code && mSettings.GetStructs() &&
             !currentNodeIsConverted) {
    Write(u"|"_ns);
  } else if ((aTag == nsGkAtoms::strong || aTag == nsGkAtoms::b) &&
             mSettings.GetStructs() && !currentNodeIsConverted) {
    Write(u"*"_ns);
  } else if ((aTag == nsGkAtoms::em || aTag == nsGkAtoms::i) &&
             mSettings.GetStructs() && !currentNodeIsConverted) {
    Write(u"/"_ns);
  } else if (aTag == nsGkAtoms::u && mSettings.GetStructs() &&
             !currentNodeIsConverted) {
    Write(u"_"_ns);
  }

  /* Container elements are always block elements, so we shouldn't
     output any whitespace immediately after the container tag even if
     there's extra whitespace there because the HTML is pretty-printed
     or something. To ensure that happens, tell the serializer we're
     already in whitespace so it won't output more. */
  mInWhitespace = true;
}

nsresult nsPlainTextSerializer::DoCloseContainer(const nsAtom* aTag) {
  if (IsIgnorableRubyAnnotation(aTag)) {
    mIgnoredChildNodeLevel--;
    return NS_OK;
  }
  if (IsIgnorableScriptOrStyle(mElement)) {
    mIgnoredChildNodeLevel--;
    return NS_OK;
  }

  if (mSettings.HasFlag(nsIDocumentEncoder::OutputForPlainTextClipboardCopy)) {
    if (DoOutput() && IsElementPreformatted() &&
        IsCssBlockLevelElement(mElement)) {
      // If we're closing a preformatted block element, output a line break
      // when we find a new container.
      mPreformattedBlockBoundary = true;
    }
  }

  if (mSettings.HasFlag(nsIDocumentEncoder::OutputRaw)) {
    // Raw means raw.  Don't even think about doing anything fancy
    // here like indenting, adding line breaks or any other
    // characters such as list item bullets, quote characters
    // around <q>, etc.

    return NS_OK;
  }

  if (mTagStackIndex > 0) {
    --mTagStackIndex;
  }

  if (mTagStackIndex >= mIgnoreAboveIndex) {
    if (mTagStackIndex == mIgnoreAboveIndex) {
      // We're dealing with the close tag whose matching
      // open tag had set the mIgnoreAboveIndex value.
      // Reset mIgnoreAboveIndex before discarding this tag.
      mIgnoreAboveIndex = (uint32_t)kNotFound;
    }
    return NS_OK;
  }

  MOZ_ASSERT(mOutputManager);

  // End current line if we're ending a block level tag
  if ((aTag == nsGkAtoms::body) || (aTag == nsGkAtoms::html)) {
    // We want the output to end with a new line,
    // but in preformatted areas like text fields,
    // we can't emit newlines that weren't there.
    // So add the newline only in the case of formatted output.
    if (mSettings.HasFlag(nsIDocumentEncoder::OutputFormatted)) {
      EnsureVerticalSpace(0);
    } else {
      mOutputManager->Flush(mCurrentLine);
    }
    // We won't want to do anything with these in formatted mode either,
    // so just return now:
    return NS_OK;
  }

  // Keep this in sync with DoOpenContainer!
  if (!DoOutput()) {
    return NS_OK;
  }

  if (aTag == nsGkAtoms::tr) {
    PopBool(mHasWrittenCellsForRow);
    // Should always end a line, but get no more whitespace
    if (mFloatingLines < 0) mFloatingLines = 0;
    mLineBreakDue = true;
  } else if (((aTag == nsGkAtoms::li) || (aTag == nsGkAtoms::dt)) &&
             mSettings.HasFlag(nsIDocumentEncoder::OutputFormatted)) {
    // Items that should always end a line, but get no more whitespace
    if (mFloatingLines < 0) mFloatingLines = 0;
    mLineBreakDue = true;
  } else if (aTag == nsGkAtoms::pre) {
    mFloatingLines = GetLastBool(mIsInCiteBlockquote) ? 0 : 1;
    mLineBreakDue = true;
  } else if (aTag == nsGkAtoms::ul) {
    mOutputManager->Flush(mCurrentLine);
    mCurrentLine.mIndentation.mLength -= kIndentSizeList;
    --mULCount;
    if (!IsInOlOrUl()) {
      mFloatingLines = 1;
      mLineBreakDue = true;
    }
  } else if (aTag == nsGkAtoms::ol) {
    mOutputManager->Flush(mCurrentLine);  // Doing this after decreasing
                                          // OLStackIndex would be wrong.
    mCurrentLine.mIndentation.mLength -= kIndentSizeList;
    MOZ_ASSERT(!mOLStack.IsEmpty(), "Wrong OLStack level!");
    mOLStack.RemoveLastElement();
    if (!IsInOlOrUl()) {
      mFloatingLines = 1;
      mLineBreakDue = true;
    }
  } else if (aTag == nsGkAtoms::dl) {
    mFloatingLines = 1;
    mLineBreakDue = true;
  } else if (aTag == nsGkAtoms::dd) {
    mOutputManager->Flush(mCurrentLine);
    mCurrentLine.mIndentation.mLength -= kIndentSizeDD;
  } else if (aTag == nsGkAtoms::span) {
    NS_ASSERTION(mSpanLevel, "Span level will be negative!");
    --mSpanLevel;
  } else if (aTag == nsGkAtoms::div) {
    if (mFloatingLines < 0) mFloatingLines = 0;
    mLineBreakDue = true;
  } else if (aTag == nsGkAtoms::blockquote) {
    mOutputManager->Flush(mCurrentLine);  // Is this needed?

    // Pop
    bool isInCiteBlockquote = PopBool(mIsInCiteBlockquote);

    if (isInCiteBlockquote) {
      NS_ASSERTION(mCurrentLine.mCiteQuoteLevel,
                   "CiteQuote level will be negative!");
      mCurrentLine.mCiteQuoteLevel--;
      mFloatingLines = 0;
      mHasWrittenCiteBlockquote = true;
    } else {
      mCurrentLine.mIndentation.mLength -= kTabSize;
      mFloatingLines = 1;
    }
    mLineBreakDue = true;
  } else if (aTag == nsGkAtoms::q) {
    Write(u"\""_ns);
  } else if (IsCssBlockLevelElement(mElement)) {
    // All other blocks get 1 vertical space after them
    // in formatted mode, otherwise 0.
    // This is hard. Sometimes 0 is a better number, but
    // how to know?
    if (mSettings.HasFlag(nsIDocumentEncoder::OutputFormatted)) {
      EnsureVerticalSpace(1);
    } else {
      if (mFloatingLines < 0) mFloatingLines = 0;
      mLineBreakDue = true;
    }
  }

  if (mSettings.HasFlag(nsIDocumentEncoder::OutputFormatted)) {
    CloseContainerForOutputFormatted(aTag);
  }

  return NS_OK;
}

void nsPlainTextSerializer::CloseContainerForOutputFormatted(
    const nsAtom* aTag) {
  const bool currentNodeIsConverted = IsCurrentNodeConverted();

  if (aTag == nsGkAtoms::h1 || aTag == nsGkAtoms::h2 || aTag == nsGkAtoms::h3 ||
      aTag == nsGkAtoms::h4 || aTag == nsGkAtoms::h5 || aTag == nsGkAtoms::h6) {
    using HeaderStrategy = Settings::HeaderStrategy;
    if ((mSettings.GetHeaderStrategy() ==
         HeaderStrategy::kIndentIncreasedWithHeaderLevel) ||
        (mSettings.GetHeaderStrategy() ==
         HeaderStrategy::kNumberHeadingsAndIndentSlightly)) {
      mCurrentLine.mIndentation.mLength -= kIndentSizeHeaders;
    }
    if (mSettings.GetHeaderStrategy() ==
        HeaderStrategy::kIndentIncreasedWithHeaderLevel) {
      for (int32_t i = HeaderLevel(aTag); i > 1; i--) {
        // for h(x), run x-1 times
        mCurrentLine.mIndentation.mLength -= kIndentIncrementHeaders;
      }
    }
    EnsureVerticalSpace(1);
  } else if (aTag == nsGkAtoms::a && !currentNodeIsConverted) {
    nsAutoString url;
    if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::href, url)) &&
        !url.IsEmpty()) {
      nsAutoString temp;
      temp.AssignLiteral(" <");
      temp += url;
      temp.Append(char16_t('>'));
      Write(temp);
    }
  } else if ((aTag == nsGkAtoms::sup || aTag == nsGkAtoms::sub) &&
             mSettings.GetStructs() && !currentNodeIsConverted) {
    Write(kSpace);
  } else if (aTag == nsGkAtoms::code && mSettings.GetStructs() &&
             !currentNodeIsConverted) {
    Write(u"|"_ns);
  } else if ((aTag == nsGkAtoms::strong || aTag == nsGkAtoms::b) &&
             mSettings.GetStructs() && !currentNodeIsConverted) {
    Write(u"*"_ns);
  } else if ((aTag == nsGkAtoms::em || aTag == nsGkAtoms::i) &&
             mSettings.GetStructs() && !currentNodeIsConverted) {
    Write(u"/"_ns);
  } else if (aTag == nsGkAtoms::u && mSettings.GetStructs() &&
             !currentNodeIsConverted) {
    Write(u"_"_ns);
  }
}

bool nsPlainTextSerializer::MustSuppressLeaf() const {
  if (mIgnoredChildNodeLevel > 0) {
    return true;
  }

  if ((mTagStackIndex > 1 &&
       mTagStack[mTagStackIndex - 2] == nsGkAtoms::select) ||
      (mTagStackIndex > 0 &&
       mTagStack[mTagStackIndex - 1] == nsGkAtoms::select)) {
    // Don't output the contents of SELECT elements;
    // Might be nice, eventually, to output just the selected element.
    // Read more in bug 31994.
    return true;
  }

  return false;
}

void nsPlainTextSerializer::DoAddText() { DoAddText(true, u""_ns); }

void nsPlainTextSerializer::DoAddText(bool aIsLineBreak,
                                      const nsAString& aText) {
  // If we don't want any output, just return
  if (!DoOutput()) {
    return;
  }

  if (!aIsLineBreak) {
    // Make sure to reset this, since it's no longer true.
    mHasWrittenCiteBlockquote = false;
  }

  if (mLineBreakDue) EnsureVerticalSpace(mFloatingLines);

  if (MustSuppressLeaf()) {
    return;
  }

  if (aIsLineBreak) {
    // The only times we want to pass along whitespace from the original
    // html source are if we're forced into preformatted mode via flags,
    // or if we're prettyprinting and we're inside a <pre>.
    // Otherwise, either we're collapsing to minimal text, or we're
    // prettyprinting to mimic the html format, and in neither case
    // does the formatting of the html source help us.
    if (mSettings.HasFlag(nsIDocumentEncoder::OutputPreformatted) ||
        (mPreFormattedMail && !mSettings.GetWrapColumn()) ||
        IsElementPreformatted()) {
      EnsureVerticalSpace(mEmptyLines + 1);
    } else if (!mInWhitespace) {
      Write(kSpace);
      mInWhitespace = true;
    }
    return;
  }

  Write(aText);
}

void CreateLineOfDashes(nsAString& aResult, const uint32_t aWrapColumn) {
  MOZ_ASSERT(aResult.IsEmpty());

  const uint32_t width = (aWrapColumn > 0 ? aWrapColumn : 25);
  while (aResult.Length() < width) {
    aResult.Append(char16_t('-'));
  }
}

nsresult nsPlainTextSerializer::DoAddLeaf(const nsAtom* aTag) {
  mPreformattedBlockBoundary = false;

  if (!DoOutput()) {
    return NS_OK;
  }

  if (mLineBreakDue) EnsureVerticalSpace(mFloatingLines);

  if (MustSuppressLeaf()) {
    return NS_OK;
  }

  if (aTag == nsGkAtoms::br) {
    // Another egregious editor workaround, see bug 38194:
    // ignore the bogus br tags that the editor sticks here and there.
    // FYI: `brElement` may be `nullptr` if the element is <br> element
    //      of non-HTML element.
    // XXX Do we need to call `EnsureVerticalSpace()` when the <br> element
    //     is not an HTML element?
    HTMLBRElement* brElement = HTMLBRElement::FromNodeOrNull(mElement);
    if (!brElement || !brElement->IsPaddingForEmptyLastLine()) {
      EnsureVerticalSpace(mEmptyLines + 1);
    }
  } else if (aTag == nsGkAtoms::hr &&
             mSettings.HasFlag(nsIDocumentEncoder::OutputFormatted)) {
    EnsureVerticalSpace(0);

    // Make a line of dashes as wide as the wrap width
    // XXX honoring percentage would be nice
    nsAutoString line;
    CreateLineOfDashes(line, mSettings.GetWrapColumn());
    Write(line);

    EnsureVerticalSpace(0);
  } else if (aTag == nsGkAtoms::img) {
    /* Output (in decreasing order of preference)
       alt, title or nothing */
    // See <http://www.w3.org/TR/REC-html40/struct/objects.html#edef-IMG>
    nsAutoString imageDescription;
    if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::alt, imageDescription))) {
      // If the alt attribute has an empty value (|alt=""|), output nothing
    } else if (NS_SUCCEEDED(
                   GetAttributeValue(nsGkAtoms::title, imageDescription)) &&
               !imageDescription.IsEmpty()) {
      imageDescription = u" ["_ns + imageDescription + u"] "_ns;
    }

    Write(imageDescription);
  }

  return NS_OK;
}

/**
* Adds as many newline as necessary to get |aNumberOfRows| empty lines
*
* aNumberOfRows = -1    :   Being in the middle of some line of text
* aNumberOfRows =  0    :   Being at the start of a line
* aNumberOfRows =  n>0  :   Having n empty lines before the current line.
*/
void nsPlainTextSerializer::EnsureVerticalSpace(const int32_t aNumberOfRows) {
  // If we have something in the indent we probably want to output
  // it and it's not included in the count for empty lines so we don't
  // realize that we should start a new line.
  if (aNumberOfRows >= 0 && !mCurrentLine.mIndentation.mHeader.IsEmpty()) {
    EndLine(false);
    mInWhitespace = true;
  }

  while (mEmptyLines < aNumberOfRows) {
    EndLine(false);
    mInWhitespace = true;
  }
  mLineBreakDue = false;
  mFloatingLines = -1;
}

void nsPlainTextSerializer::OutputManager::Flush(CurrentLine& aCurrentLine) {
  if (!aCurrentLine.mContent.IsEmpty()) {
    aCurrentLine.MaybeReplaceNbspsInContent(mFlags);

    Append(aCurrentLine, StripTrailingWhitespaces::kNo);

    aCurrentLine.ResetContentAndIndentationHeader();
  }
}

static bool IsSpaceStuffable(const char16_t* s) {
  return (s[0] == '>' || s[0] == ' ' || s[0] == kNBSP ||
          NS_strncmp(s, u"From ", 5) == 0);
}

void nsPlainTextSerializer::MaybeWrapAndOutputCompleteLines() {
  if (!mSettings.MayWrap()) {
    return;
  }

  // Yes, wrap!
  // The "+4" is to avoid wrap lines that only would be a couple
  // of letters too long. We give this bonus only if the
  // wrapcolumn is more than 20.
  const uint32_t wrapColumn = mSettings.GetWrapColumn();
  uint32_t bonuswidth = (wrapColumn > 20) ? 4 : 0;
  while (!mCurrentLine.mContent.IsEmpty()) {
    const uint32_t prefixwidth = mCurrentLine.DeterminePrefixWidth();
    // The width of the line as it will appear on the screen (approx.).
    const uint32_t currentLineContentWidth =
        GetUnicharStringWidth(mCurrentLine.mContent);
    if (currentLineContentWidth + prefixwidth <= wrapColumn + bonuswidth) {
      break;
    }

    const int32_t goodSpace =
        mCurrentLine.FindWrapIndexForContent(wrapColumn, mUseLineBreaker);

    const int32_t contentLength = mCurrentLine.mContent.Length();
    if (goodSpace <= 0 || goodSpace >= contentLength) {
      // Nothing to do. Hopefully we get more data later to use for a place to
      // break line.
      break;
    }
    // Found a place to break
    // -1 (trim a char at the break position) only if the line break was a
    // space.
    nsAutoString restOfContent;
    if (nsCRT::IsAsciiSpace(mCurrentLine.mContent.CharAt(goodSpace))) {
      mCurrentLine.mContent.Right(restOfContent, contentLength - goodSpace - 1);
    } else {
      mCurrentLine.mContent.Right(restOfContent, contentLength - goodSpace);
    }
    // if breaker was U+0020, it has to consider for delsp=yes support
    const bool breakBySpace = mCurrentLine.mContent.CharAt(goodSpace) == ' ';
    mCurrentLine.mContent.Truncate(goodSpace);
    EndLine(true, breakBySpace);
    mCurrentLine.mContent.Truncate();
    // Space stuffing a la RFC 2646 (format=flowed)
    if (mSettings.HasFlag(nsIDocumentEncoder::OutputFormatFlowed)) {
      mCurrentLine.mSpaceStuffed = !restOfContent.IsEmpty() &&
                                   IsSpaceStuffable(restOfContent.get()) &&
                                   // We space-stuff quoted lines anyway
                                   mCurrentLine.mCiteQuoteLevel == 0;
    }
    mCurrentLine.mContent.Append(restOfContent);
    mEmptyLines = -1;
  }
}

/**
* This function adds a piece of text to the current stored line. If we are
* wrapping text and the stored line will become too long, a suitable
* location to wrap will be found and the line that's complete will be
* output.
*/
void nsPlainTextSerializer::AddToLine(const char16_t* aLineFragment,
                                      int32_t aLineFragmentLength) {
  if (mLineBreakDue) EnsureVerticalSpace(mFloatingLines);

  if (mCurrentLine.mContent.IsEmpty()) {
    if (0 == aLineFragmentLength) {
      return;
    }

    if (mSettings.HasFlag(nsIDocumentEncoder::OutputFormatFlowed)) {
      // Space stuffing a la RFC 2646 (format=flowed).
      // We space-stuff quoted lines anyway
      mCurrentLine.mSpaceStuffed =
          IsSpaceStuffable(aLineFragment) && mCurrentLine.mCiteQuoteLevel == 0;
    }
    mEmptyLines = -1;
  }

  mCurrentLine.mContent.Append(aLineFragment, aLineFragmentLength);

  MaybeWrapAndOutputCompleteLines();
}

// The signature separator (RFC 2646).
const char kSignatureSeparator[] = "-- ";

// The OpenPGP dash-escaped signature separator in inline
// signed messages according to the OpenPGP standard (RFC 2440).
const char kDashEscapedSignatureSeparator[] = "- -- ";

static bool IsSignatureSeparator(const nsAString& aString) {
  return aString.EqualsLiteral(kSignatureSeparator) ||
         aString.EqualsLiteral(kDashEscapedSignatureSeparator);
}

/**
* Outputs the contents of mCurrentLine.mContent, and resets line
* specific variables. Also adds an indentation and prefix if there is one
* specified. Strips ending spaces from the line if it isn't preformatted.
*/
void nsPlainTextSerializer::EndLine(bool aSoftLineBreak, bool aBreakBySpace) {
  if (aSoftLineBreak && mCurrentLine.mContent.IsEmpty()) {
    // No meaning
    return;
  }

  /* In non-preformatted mode, remove spaces from the end of the line for
   * format=flowed compatibility. Don't do this for these special cases:
   * "-- ", the signature separator (RFC 2646) shouldn't be touched and
   * "- -- ", the OpenPGP dash-escaped signature separator in inline
   * signed messages according to the OpenPGP standard (RFC 2440).
   */
  if (!mSettings.HasFlag(nsIDocumentEncoder::OutputPreformatted) &&
      (aSoftLineBreak || !IsSignatureSeparator(mCurrentLine.mContent))) {
    mCurrentLine.mContent.Trim(" ", false, true, false);
  }

  if (aSoftLineBreak &&
      mSettings.HasFlag(nsIDocumentEncoder::OutputFormatFlowed) &&
      !mCurrentLine.mIndentation.mLength) {
    // Add the soft part of the soft linebreak (RFC 2646 4.1)
    // We only do this when there is no indentation since format=flowed
    // lines and indentation doesn't work well together.

    // If breaker character is ASCII space with RFC 3676 support (delsp=yes),
    // add twice space.
    if (mSettings.HasFlag(nsIDocumentEncoder::OutputFormatDelSp) &&
        aBreakBySpace) {
      mCurrentLine.mContent.AppendLiteral(" ");
    } else {
      mCurrentLine.mContent.Append(char16_t(' '));
    }
  }

  if (aSoftLineBreak) {
    mEmptyLines = 0;
  } else {
    // Hard break
    if (mCurrentLine.HasContentOrIndentationHeader()) {
      mEmptyLines = 0;
    } else {
      mEmptyLines++;
    }
  }

  MOZ_ASSERT(mOutputManager);

  mCurrentLine.MaybeReplaceNbspsInContent(mSettings.GetFlags());

  // If we don't have anything "real" to output we have to
  // make sure the indent doesn't end in a space since that
  // would trick a format=flowed-aware receiver.
  mOutputManager->Append(mCurrentLine,
                         OutputManager::StripTrailingWhitespaces::kMaybe);
  mOutputManager->AppendLineBreak();
  mCurrentLine.ResetContentAndIndentationHeader();
  mInWhitespace = true;
  mLineBreakDue = false;
  mFloatingLines = -1;
}

/**
* Creates the calculated and stored indent and text in the indentation. That is
* quote chars and numbers for numbered lists and such.
*/
void nsPlainTextSerializer::CurrentLine::CreateQuotesAndIndent(
    nsAString& aResult) const {
  // Put the mail quote "> " chars in, if appropriate:
  if (mCiteQuoteLevel > 0) {
    nsAutoString quotes;
    for (int i = 0; i < mCiteQuoteLevel; i++) {
      quotes.Append(char16_t('>'));
    }
    if (!mContent.IsEmpty()) {
      /* Better don't output a space here, if the line is empty,
         in case a receiving format=flowed-aware UA thinks, this were a flowed
         line, which it isn't - it's just empty. (Flowed lines may be joined
         with the following one, so the empty line may be lost completely.) */
      quotes.Append(char16_t(' '));
    }
    aResult = quotes;
  }

  // Indent if necessary
  int32_t indentwidth = mIndentation.mLength - mIndentation.mHeader.Length();
  if (mSpaceStuffed) {
    indentwidth += 1;
  }

  // Don't make empty lines look flowed
  if (indentwidth > 0 && HasContentOrIndentationHeader()) {
    nsAutoString spaces;
    for (int i = 0; i < indentwidth; ++i) {
      spaces.Append(char16_t(' '));
    }
    aResult += spaces;
  }

  if (!mIndentation.mHeader.IsEmpty()) {
    aResult += mIndentation.mHeader;
  }
}

static bool IsLineFeedCarriageReturnBlankOrTab(char16_t c) {
  return ('\n' == c || '\r' == c || ' ' == c || '\t' == c);
}

static void ReplaceVisiblyTrailingNbsps(nsAString& aString) {
  const int32_t totLen = aString.Length();
  for (int32_t i = totLen - 1; i >= 0; i--) {
    char16_t c = aString[i];
    if (IsLineFeedCarriageReturnBlankOrTab(c)) {
      continue;
    }
    if (kNBSP == c) {
      aString.Replace(i, 1, ' ');
    } else {
      break;
    }
  }
}

void nsPlainTextSerializer::ConvertToLinesAndOutput(const nsAString& aString) {
  const int32_t totLen = aString.Length();
  int32_t newline{0};

  // Put the mail quote "> " chars in, if appropriate.
  // Have to put it in before every line.
  int32_t bol = 0;
  while (bol < totLen) {
    bool outputLineBreak = false;
    bool spacesOnly = true;

    // Find one of '\n' or '\r' using iterators since nsAString
    // doesn't have the old FindCharInSet function.
    nsAString::const_iterator iter;
    aString.BeginReading(iter);
    nsAString::const_iterator done_searching;
    aString.EndReading(done_searching);
    iter.advance(bol);
    int32_t new_newline = bol;
    newline = kNotFound;
    while (iter != done_searching) {
      if ('\n' == *iter || '\r' == *iter) {
        newline = new_newline;
        break;
      }
      if (' ' != *iter) {
        spacesOnly = false;
      }
      ++new_newline;
      ++iter;
    }

    // Done searching
    nsAutoString stringpart;
    if (newline == kNotFound) {
      // No new lines.
      stringpart.Assign(Substring(aString, bol, totLen - bol));
      if (!stringpart.IsEmpty()) {
        char16_t lastchar = stringpart.Last();
        mInWhitespace = IsLineFeedCarriageReturnBlankOrTab(lastchar);
      }
      mEmptyLines = -1;
      bol = totLen;
    } else {
      // There is a newline
      stringpart.Assign(Substring(aString, bol, newline - bol));
      mInWhitespace = true;
      outputLineBreak = true;
      mEmptyLines = 0;
      bol = newline + 1;
      if ('\r' == *iter && bol < totLen && '\n' == *++iter) {
        // There was a CRLF in the input. This used to be illegal and
        // stripped by the parser. Apparently not anymore. Let's skip
        // over the LF.
        bol++;
      }
    }

    if (mSettings.HasFlag(nsIDocumentEncoder::OutputFormatFlowed)) {
      if ((outputLineBreak || !spacesOnly) &&  // bugs 261467,125928
          !IsQuotedLine(stringpart) && !IsSignatureSeparator(stringpart)) {
        stringpart.Trim(" ", false, true, true);
      }
      mCurrentLine.mSpaceStuffed =
          IsSpaceStuffable(stringpart.get()) && !IsQuotedLine(stringpart);
    }
    mCurrentLine.mContent.Append(stringpart);

    mCurrentLine.MaybeReplaceNbspsInContent(mSettings.GetFlags());

    mOutputManager->Append(mCurrentLine,
                           OutputManager::StripTrailingWhitespaces::kNo);
    if (outputLineBreak) {
      mOutputManager->AppendLineBreak();
    }

    mCurrentLine.ResetContentAndIndentationHeader();
  }

#ifdef DEBUG_wrapping
  printf("No wrapping: newline is %d, totLen is %d\n", newline, totLen);
#endif
}

/**
* Write a string. This is the highlevel function to use to get text output.
* By using AddToLine, Output, EndLine and other functions it handles quotation,
* line wrapping, indentation, whitespace compression and other things.
*/
void nsPlainTextSerializer::Write(const nsAString& aStr) {
  // XXX Copy necessary to use nsString methods and gain
  // access to underlying buffer
  nsAutoString str(aStr);

#ifdef DEBUG_wrapping
  printf("Write(%s): wrap col = %d\n", NS_ConvertUTF16toUTF8(str).get(),
         mSettings.GetWrapColumn());
#endif

  const int32_t totLen = str.Length();

  // If the string is empty, do nothing:
  if (totLen <= 0) return;

  // For Flowed text change nbsp-ses to spaces at end of lines to allow them
  // to be cut off along with usual spaces if required. (bug #125928)
  if (mSettings.HasFlag(nsIDocumentEncoder::OutputFormatFlowed)) {
    ReplaceVisiblyTrailingNbsps(str);
  }

  // We have two major codepaths here. One that does preformatted text and one
  // that does normal formatted text. The one for preformatted text calls
  // Output directly while the other code path goes through AddToLine.
  if ((mPreFormattedMail && !mSettings.GetWrapColumn()) ||
      (IsElementPreformatted() && !mPreFormattedMail) ||
      (mSpanLevel > 0 && mEmptyLines >= 0 && IsQuotedLine(str))) {
    // No intelligent wrapping.

    // This mustn't be mixed with intelligent wrapping without clearing
    // the mCurrentLine.mContent buffer before!!!
    NS_ASSERTION(mCurrentLine.mContent.IsEmpty() ||
                     (IsElementPreformatted() && !mPreFormattedMail),
                 "Mixed wrapping data and nonwrapping data on the same line");
    MOZ_ASSERT(mOutputManager);

    if (!mCurrentLine.mContent.IsEmpty()) {
      mOutputManager->Flush(mCurrentLine);
    }

    ConvertToLinesAndOutput(str);
    return;
  }

  // Intelligent handling of text
  // If needed, strip out all "end of lines"
  // and multiple whitespace between words
  int32_t nextpos;
  const char16_t* offsetIntoBuffer = nullptr;

  int32_t bol = 0;
  while (bol < totLen) {  // Loop over lines
    // Find a place where we may have to do whitespace compression
    nextpos = str.FindCharInSet(u" \t\n\r", bol);
#ifdef DEBUG_wrapping
    nsAutoString remaining;
    str.Right(remaining, totLen - bol);
    foo = ToNewCString(remaining);
    // printf("Next line: bol = %d, newlinepos = %d, totLen = %d, "
    //        "string = '%s'\n", bol, nextpos, totLen, foo);
    free(foo);
#endif

    if (nextpos == kNotFound) {
      // The rest of the string
      offsetIntoBuffer = str.get() + bol;
      AddToLine(offsetIntoBuffer, totLen - bol);
      bol = totLen;
      mInWhitespace = false;
    } else {
      // There's still whitespace left in the string
      if (nextpos != 0 && (nextpos + 1) < totLen) {
        offsetIntoBuffer = str.get() + nextpos;
        // skip '\n' if it is between CJ chars
        if (offsetIntoBuffer[0] == '\n' && IS_CJ_CHAR(offsetIntoBuffer[-1]) &&
            IS_CJ_CHAR(offsetIntoBuffer[1])) {
          offsetIntoBuffer = str.get() + bol;
          AddToLine(offsetIntoBuffer, nextpos - bol);
          bol = nextpos + 1;
          continue;
        }
      }
      // If we're already in whitespace and not preformatted, just skip it:
      if (mInWhitespace && (nextpos == bol) && !mPreFormattedMail &&
          !mSettings.HasFlag(nsIDocumentEncoder::OutputPreformatted)) {
        // Skip whitespace
        bol++;
        continue;
      }

      if (nextpos == bol) {
        // Note that we are in whitespace.
        mInWhitespace = true;
        offsetIntoBuffer = str.get() + nextpos;
        AddToLine(offsetIntoBuffer, 1);
        bol++;
        continue;
      }

      mInWhitespace = true;

      offsetIntoBuffer = str.get() + bol;
      if (mPreFormattedMail ||
          mSettings.HasFlag(nsIDocumentEncoder::OutputPreformatted)) {
        // Preserve the real whitespace character
        nextpos++;
        AddToLine(offsetIntoBuffer, nextpos - bol);
        bol = nextpos;
      } else {
        // Replace the whitespace with a space
        AddToLine(offsetIntoBuffer, nextpos - bol);
        AddToLine(kSpace.get(), 1);
        bol = nextpos + 1;  // Let's eat the whitespace
      }
    }
  }  // Continue looping over the string
}

/**
* Gets the value of an attribute in a string. If the function returns
* NS_ERROR_NOT_AVAILABLE, there was none such attribute specified.
*/
nsresult nsPlainTextSerializer::GetAttributeValue(const nsAtom* aName,
                                                  nsString& aValueRet) const {
  if (mElement) {
    if (mElement->GetAttr(aName, aValueRet)) {
      return NS_OK;
    }
  }

  return NS_ERROR_NOT_AVAILABLE;
}

/**
* Returns true, if the element was inserted by Moz' TXT->HTML converter.
* In this case, we should ignore it.
*/
bool nsPlainTextSerializer::IsCurrentNodeConverted() const {
  nsAutoString value;
  nsresult rv = GetAttributeValue(nsGkAtoms::_class, value);
  return (NS_SUCCEEDED(rv) &&
          (StringBeginsWith(value, u"moz-txt"_ns,
                            nsASCIICaseInsensitiveStringComparator) ||
           StringBeginsWith(value, u"\"moz-txt"_ns,
                            nsASCIICaseInsensitiveStringComparator)));
}

// static
nsAtom* nsPlainTextSerializer::GetIdForContent(nsIContent* aContent) {
  if (!aContent->IsHTMLElement()) {
    return nullptr;
  }

  nsAtom* localName = aContent->NodeInfo()->NameAtom();
  return localName->IsStatic() ? localName : nullptr;
}

bool nsPlainTextSerializer::IsElementPreformatted() const {
  return !mPreformatStack.empty() && mPreformatStack.top();
}

bool nsPlainTextSerializer::IsElementPreformatted(Element* aElement) {
  RefPtr<const ComputedStyle> computedStyle =
      nsComputedDOMStyle::GetComputedStyleNoFlush(aElement);
  if (computedStyle) {
    const nsStyleText* textStyle = computedStyle->StyleText();
    return textStyle->WhiteSpaceOrNewlineIsSignificant();
  }
  // Fall back to looking at the tag, in case there is no style information.
  return GetIdForContent(aElement) == nsGkAtoms::pre;
}

bool nsPlainTextSerializer::IsCssBlockLevelElement(Element* aElement) {
  RefPtr<const ComputedStyle> computedStyle =
      nsComputedDOMStyle::GetComputedStyleNoFlush(aElement);
  if (computedStyle) {
    const nsStyleDisplay* displayStyle = computedStyle->StyleDisplay();
    return displayStyle->IsBlockOutsideStyle();
  }
  // Fall back to looking at the tag, in case there is no style information.
  return nsContentUtils::IsHTMLBlockLevelElement(aElement);
}

/**
* This method is required only to identify LI's inside OL.
* Returns TRUE if we are inside an OL tag and FALSE otherwise.
*/
bool nsPlainTextSerializer::IsInOL() const {
  int32_t i = mTagStackIndex;
  while (--i >= 0) {
    if (mTagStack[i] == nsGkAtoms::ol) return true;
    if (mTagStack[i] == nsGkAtoms::ul) {
      // If a UL is reached first, LI belongs the UL nested in OL.
      return false;
    }
  }
  // We may reach here for orphan LI's.
  return false;
}

bool nsPlainTextSerializer::IsInOlOrUl() const {
  return (mULCount > 0) || !mOLStack.IsEmpty();
}

/*
  @return 0 = no header, 1 = h1, ..., 6 = h6
*/
int32_t HeaderLevel(const nsAtom* aTag) {
  if (aTag == nsGkAtoms::h1) {
    return 1;
  }
  if (aTag == nsGkAtoms::h2) {
    return 2;
  }
  if (aTag == nsGkAtoms::h3) {
    return 3;
  }
  if (aTag == nsGkAtoms::h4) {
    return 4;
  }
  if (aTag == nsGkAtoms::h5) {
    return 5;
  }
  if (aTag == nsGkAtoms::h6) {
    return 6;
  }
  return 0;
}

/* These functions define the column width of an ISO 10646 character
* as follows:
*
*    - The null character (U+0000) has a column width of 0.
*
*    - Other C0/C1 control characters and DEL will lead to a return
*      value of -1.
*
*    - Non-spacing and enclosing combining characters (general
*      category code Mn or Me in the Unicode database) have a
*      column width of 0.
*
*    - Spacing characters in the East Asian Wide (W) or East Asian
*      FullWidth (F) category as defined in Unicode Technical
*      Report #11 have a column width of 2.
*
*    - All remaining characters (including all printable
*      ISO 8859-1 and WGL4 characters, Unicode control characters,
*      etc.) have a column width of 1.
*/

int32_t GetUnicharWidth(char32_t aCh) {
  /* test for 8-bit control characters */
  if (aCh == 0) {
    return 0;
  }
  if (aCh < 32 || (aCh >= 0x7f && aCh < 0xa0)) {
    return -1;
  }

  /* The first combining char in Unicode is U+0300 */
  if (aCh < 0x0300) {
    return 1;
  }

  auto gc = unicode::GetGeneralCategory(aCh);
  if (gc == HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK ||
      gc == HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK) {
    return 0;
  }

  /* if we arrive here, ucs is not a combining or C0/C1 control character */

  /* fast test for majority of non-wide scripts */
  if (aCh < 0x1100) {
    return 1;
  }

  return intl::UnicodeProperties::IsEastAsianWidthFW(aCh) ? 2 : 1;
}

int32_t GetUnicharStringWidth(Span<const char16_t> aString) {
  int32_t width = 0;
  for (auto iter = aString.begin(); iter != aString.end(); ++iter) {
    char32_t c = *iter;
    if (NS_IS_HIGH_SURROGATE(c) && (iter + 1) != aString.end() &&
        NS_IS_LOW_SURROGATE(*(iter + 1))) {
      c = SURROGATE_TO_UCS4(c, *++iter);
    }
    const int32_t w = GetUnicharWidth(c);
    // Taking 1 as the width of non-printable character, for bug 94475.
    width += (w < 0 ? 1 : w);
  }
  return width;
}

quality94%

¤ Dauer der Verarbeitung: 0.38 Sekunden (vorverarbeitet) ¤

Wurzel

Suchen

Beweissystem der NASA

Beweissystem Isabelle

NIST Cobol Testsuite

Cephes Mathematical Library

Wiener Entwicklungsmethode

Haftungshinweis

Die Informationen auf dieser Webseite wurden nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit, noch Qualität der bereit gestellten Informationen zugesichert.

Bemerkung:

Die farbliche Syntaxdarstellung ist noch experimentell.