Quelle testCharacterEncoding.cpp Sprache: C

/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#include "mozilla/TextUtils.h"

#include <clocale>
#include <cstring>
#include <cwchar>
#include <initializer_list>
#include <iterator>
#include <string_view>

#include "js/CharacterEncoding.h"
#include "jsapi-tests/tests.h"

static bool EqualsIgnoreCase(const char* xs, const char* ys) {
  while (*xs && *ys) {
    char x = *xs++;
    char y = *ys++;

    // Convert both to lower-case.
    if (mozilla::IsAsciiAlpha(x) && mozilla::IsAsciiAlpha(y)) {
      x |= 0x20;
      y |= 0x20;
    }

    // Fail if the characters aren't the same.
    if (x != y) {
      return false;
    }
  }

  // Both strings must be read to the end.
  return !*xs && !*ys;
}

class ToUTF8Locale {
  const char* previousLocale_ = nullptr;
  bool supported_ = false;

public:
  ToUTF8Locale() {
    // Store the old locale so we can reset it in the destructor.
    previousLocale_ = std::setlocale(LC_ALL, nullptr);

    // Query the system default locale.
    const char* defaultLocale = std::setlocale(LC_ALL, "");
    if (!defaultLocale) {
      // std::setlocale returns nullptr on failure.
      return;
    }

    // Switch the default locale to be UTF-8 aware.
    const char* newLocale = std::setlocale(LC_ALL, "en_US.UTF-8");
    if (!newLocale) {
      // std::setlocale returns nullptr on failure.
      return;
    }

    const char* defaultCodepage = std::strchr(defaultLocale, '.');
    const char* newCodepage = std::strchr(newLocale, '.');

    // Return if either the default or new locale don't contain a code-page.
    if (!defaultCodepage || !newCodepage) {
      return;
    }

    // Skip past the '.'.
    defaultCodepage++;
    newCodepage++;

    // UTF-8 is supported when the default locale and new locale support it:
    //
    // The default locale needs to support UTF-8, because this test is compiled
    // using the default locale.
    //
    // The new locale needs to support UTF-8 to ensure UTF-8 encoding works at
    // runtime.
    supported_ = EqualsIgnoreCase(defaultCodepage, "UTF-8") &&
                 EqualsIgnoreCase(newCodepage, "UTF-8");
  }

  bool supported() const { return supported_; }

  ~ToUTF8Locale() {
    // Restore the previous locale.
    if (previousLocale_) {
      std::setlocale(LC_ALL, previousLocale_);
    }
  }
};

BEGIN_TEST(testCharacterEncoding_narrow_to_utf8) {
  // Assume the narrow charset is ASCII-compatible. ASCII to UTF-8 conversion is
  // a no-op.
  for (std::string_view string : {
           "",
           "a",
           "abc",
           "abc\0def",
       }) {
    auto utf8 = JS::EncodeNarrowToUtf8(cx, string.data());
    CHECK(utf8 != nullptr);
    CHECK_EQUAL(std::strlen(utf8.get()), string.length());
    CHECK(utf8.get() == string);
  }
  return true;
}
END_TEST(testCharacterEncoding_narrow_to_utf8)

BEGIN_TEST(testCharacterEncoding_wide_to_utf8) {
  // Assume the wide charset is ASCII-compatible. ASCII to UTF-8 conversion is
  // a no-op.
  for (std::wstring_view string : {
           L"",
           L"a",
           L"abc",
           L"abc\0def",
       }) {
    auto utf8 = JS::EncodeWideToUtf8(cx, string.data());
    CHECK(utf8 != nullptr);
    CHECK_EQUAL(std::strlen(utf8.get()), string.length());
    CHECK(std::equal(
        string.begin(), string.end(), utf8.get(),
        [](wchar_t x, char y) { return char32_t(x) == char32_t(y); }));
  }
  return true;
}
END_TEST(testCharacterEncoding_wide_to_utf8)

BEGIN_TEST(testCharacterEncoding_wide_to_utf8_non_ascii) {
  // Change the locale to be UTF-8 aware for the emoji string.
  ToUTF8Locale utf8locale;

  // Skip this test if UTF-8 isn't supported on this system.
  if (!utf8locale.supported()) {
    return true;
  }

  {
    std::wstring_view string = L"ä";
    auto utf8 = JS::EncodeWideToUtf8(cx, string.data());
    CHECK(utf8 != nullptr);

    CHECK_EQUAL(std::strlen(utf8.get()), 2U);
    CHECK_EQUAL(utf8[0], char(0xC3));
    CHECK_EQUAL(utf8[1], char(0xA4));
  }
  {
    std::wstring_view string = L"💩";
    auto utf8 = JS::EncodeWideToUtf8(cx, string.data());
    CHECK(utf8 != nullptr);

    CHECK_EQUAL(std::strlen(utf8.get()), 4U);
    CHECK_EQUAL(utf8[0], char(0xF0));
    CHECK_EQUAL(utf8[1], char(0x9F));
    CHECK_EQUAL(utf8[2], char(0x92));
    CHECK_EQUAL(utf8[3], char(0xA9));
  }
  return true;
}
END_TEST(testCharacterEncoding_wide_to_utf8_non_ascii)

BEGIN_TEST(testCharacterEncoding_utf8_to_narrow) {
  // Assume the narrow charset is ASCII-compatible. ASCII to UTF-8 conversion is
  // a no-op.
  for (std::string_view string : {
           "",
           "a",
           "abc",
           "abc\0def",
       }) {
    auto narrow = JS::EncodeUtf8ToNarrow(cx, string.data());
    CHECK(narrow != nullptr);
    CHECK_EQUAL(std::strlen(narrow.get()), string.length());
    CHECK(narrow.get() == string);
  }
  return true;
}
END_TEST(testCharacterEncoding_utf8_to_narrow)

BEGIN_TEST(testCharacterEncoding_utf8_to_wide) {
  // Assume the wide charset is ASCII-compatible. ASCII to UTF-8 conversion is
  // a no-op.
  for (std::string_view string : {
           "",
           "a",
           "abc",
           "abc\0def",
       }) {
    auto wide = JS::EncodeUtf8ToWide(cx, string.data());
    CHECK(wide != nullptr);
    CHECK_EQUAL(std::wcslen(wide.get()), string.length());
    CHECK(std::equal(
        string.begin(), string.end(), wide.get(),
        [](char x, wchar_t y) { return char32_t(x) == char32_t(y); }));
  }
  return true;
}
END_TEST(testCharacterEncoding_utf8_to_wide)

BEGIN_TEST(testCharacterEncoding_narrow_roundtrip) {
  // Change the locale to be UTF-8 aware for the emoji string.
  ToUTF8Locale utf8locale;

  // Skip this test if UTF-8 isn't supported on this system.
  if (!utf8locale.supported()) {
    return true;
  }

  for (std::string_view string : {
           "",
           "a",
           "abc",
           "ä",
           "💩",
       }) {
    auto utf8 = JS::EncodeNarrowToUtf8(cx, string.data());
    CHECK(utf8 != nullptr);

    auto narrow = JS::EncodeUtf8ToNarrow(cx, utf8.get());
    CHECK(narrow != nullptr);

    CHECK(narrow.get() == string);
  }
  return true;
}
END_TEST(testCharacterEncoding_narrow_roundtrip)

BEGIN_TEST(testCharacterEncoding_wide_roundtrip) {
  // Change the locale to be UTF-8 aware for the emoji string.
  ToUTF8Locale utf8locale;

  // Skip this test if UTF-8 isn't supported on this system.
  if (!utf8locale.supported()) {
    return true;
  }

  for (std::wstring_view string : {
           L"",
           L"a",
           L"abc",
           L"ä",
           L"💩",
       }) {
    auto utf8 = JS::EncodeWideToUtf8(cx, string.data());
    CHECK(utf8 != nullptr);

    auto wide = JS::EncodeUtf8ToWide(cx, utf8.get());
    CHECK(wide != nullptr);

    CHECK(wide.get() == string);
  }
  return true;
}
END_TEST(testCharacterEncoding_wide_roundtrip)

quality85%

¤ Dauer der Verarbeitung: 0.25 Sekunden (vorverarbeitet) ¤

Wurzel

Suchen

Beweissystem der NASA

Beweissystem Isabelle

NIST Cobol Testsuite

Cephes Mathematical Library

Wiener Entwicklungsmethode

Haftungshinweis

Die Informationen auf dieser Webseite wurden nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit, noch Qualität der bereit gestellten Informationen zugesichert.

Bemerkung:

Die farbliche Syntaxdarstellung ist noch experimentell.