Quelle DOMtoATK.h

Sprache: C

/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set ts=2 et sw=2 tw=80: */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#include <glib.h>
#include <cstdint>
#include "mozilla/a11y/HyperTextAccessibleBase.h"
#include "nsCharTraits.h"
#include "nsString.h"

/**
* ATK offsets are counted in unicode codepoints, while DOM offsets are counted
* in UTF-16 code units.  That makes a difference for non-BMP characters,
* which need two UTF-16 code units to be represented (a pair of surrogates),
* while they are just one unicode character.
*
* To keep synchronization between ATK offsets (unicode codepoints) and DOM
* offsets (UTF-16 code units), after translation from UTF-16 to UTF-8 we add a
* BOM after each non-BMP character (which would otherwise use 2 UTF-16
* code units for only 1 unicode codepoint).
*
* BOMs (Byte Order Marks, U+FEFF, also known as ZERO WIDTH NO-BREAK SPACE, but
* that usage is deprecated) normally only appear at the beginning of unicode
* files, but their occurrence within text (notably after cut&paste) is not
* uncommon, and are thus considered as non-text.
*
* Since the selection requested through ATK may not contain both surrogates
* at the ends of the selection, we need to fetch one UTF-16 code point more
* on both side, and get rid of it before returning the string to ATK. The
* ATKStringConverterHelper class maintains this, NewATKString should be used
* to call it properly.
*
* In the end,
* - if the start is between the high and low surrogates, the UTF-8 result
* includes a BOM from it but not the character
* - if the end is between the high and low surrogates, the UTF-8 result
* includes the character but *not* the BOM
* - all non-BMP characters that are fully in the string are in the UTF-8 result
* as character followed by BOM
*/
namespace mozilla {
namespace a11y {

namespace DOMtoATK {

/**
* Converts a string of accessible text into ATK gchar* string (by adding
* BOMs). This can be used when offsets do not need to be adjusted because
* ends of the string can not fall between surrogates.
*/
gchar* Convert(const nsAString& aStr);

/**
* Add a BOM after each non-BMP character.
*/
void AddBOMs(nsACString& aDest, const nsACString& aSource);

class ATKStringConverterHelper {
public:
  ATKStringConverterHelper(void)
      :
#ifdef DEBUG
        mAdjusted(false),
#endif
        mStartShifted(false),
        mEndShifted(false) {
  }

  /**
   * In order to properly get non-BMP values, offsets need to be changed
   * to get one character more on each end, so that ConvertUTF16toUTF8 can
   * convert surrogates even if the originally requested offsets fall between
   * them.
   */
  void AdjustOffsets(gint* aStartOffset, gint* aEndOffset, gint count);

  /**
   * Converts a string of accessible text with adjusted offsets into ATK
   * gchar* string (by adding BOMs).  Note, AdjustOffsets has to be called
   * before getting the text passed to this.
   */
  gchar* ConvertAdjusted(const nsAString& aStr);

private:
  /**
   * Remove the additional characters requested by PrepareUTF16toUTF8.
   */
  gchar* FinishUTF16toUTF8(nsCString& aStr);

#ifdef DEBUG
  bool mAdjusted;
#endif
  bool mStartShifted;
  bool mEndShifted;
};

/**
* Get text from aAccessible, using ATKStringConverterHelper to properly
* introduce appropriate BOMs.
*/
inline gchar* NewATKString(HyperTextAccessibleBase* aAccessible,
                           gint aStartOffset, gint aEndOffset) {
  gint startOffset = aStartOffset, endOffset = aEndOffset;
  ATKStringConverterHelper converter;
  converter.AdjustOffsets(&startOffset, &endOffset,
                          gint(aAccessible->CharacterCount()));
  nsAutoString str;
  aAccessible->TextSubstring(startOffset, endOffset, str);

  if (str.Length() == 0) {
    // Bogus offsets, or empty string, either way we do not need conversion.
    return g_strdup("");
  }

  return converter.ConvertAdjusted(str);
}

/**
* Get a character from aAccessible, fetching more data as appropriate to
* properly get non-BMP characters or a BOM as appropriate.
*/
inline gunichar ATKCharacter(HyperTextAccessibleBase* aAccessible,
                             gint aOffset) {
  // char16_t is unsigned short in Mozilla, gnuichar is guint32 in glib.
  gunichar character = static_cast<gunichar>(aAccessible->CharAt(aOffset));

  if (NS_IS_LOW_SURROGATE(character)) {
    // Trailing surrogate, return BOM instead.
    return 0xFEFF;
  }

  if (NS_IS_HIGH_SURROGATE(character)) {
    // Heading surrogate, get the trailing surrogate and combine them.
    gunichar characterLow =
        static_cast<gunichar>(aAccessible->CharAt(aOffset + 1));

    if (!NS_IS_LOW_SURROGATE(characterLow)) {
      // It should have been a trailing surrogate... Flag the error.
      return 0xFFFD;
    }
    return SURROGATE_TO_UCS4(character, characterLow);
  }

  return character;
}

}  // namespace DOMtoATK

}  // namespace a11y
}  // namespace mozilla

Messung V0.5 in Prozent

¤ Dauer der Verarbeitung: 0.20 Sekunden (vorverarbeitet am 2026-04-25) ¤

Wurzel

Suchen

Beweissystem der NASA

Beweissystem Isabelle

NIST Cobol Testsuite

Cephes Mathematical Library

Wiener Entwicklungsmethode

Haftungshinweis

Die Informationen auf dieser Webseite wurden nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit, noch Qualität der bereit gestellten Informationen zugesichert.

Bemerkung:

Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.