Quelle tests.rs

Sprache: Rust

// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

use icu_normalizer::properties::CanonicalCombiningClassMap;
use icu_normalizer::properties::CanonicalComposition;
use icu_normalizer::properties::CanonicalDecomposition;
use icu_normalizer::properties::Decomposed;
use icu_normalizer::uts46::Uts46Mapper;
use icu_normalizer::ComposingNormalizer;
use icu_normalizer::DecomposingNormalizer;

#[test]
fn test_nfd_basic() {
    let normalizer: DecomposingNormalizer = DecomposingNormalizer::new_nfd();
    assert_eq!(normalizer.normalize("ä"), "a\u{0308}");
    assert_eq!(normalizer.normalize("Ä"), "A\u{0308}");
    assert_eq!(normalizer.normalize("ệ"), "e\u{0323}\u{0302}");
    assert_eq!(normalizer.normalize("Ệ"), "E\u{0323}\u{0302}");
    assert_eq!(normalizer.normalize("��"), "��\u{1D165}");
    assert_eq!(normalizer.normalize("\u{2126}"), "Ω"); // ohm sign
    assert_eq!(normalizer.normalize("ﾍﾞ"), "ﾍﾞ"); // half-width unchanged
    assert_eq!(normalizer.normalize("ﾍﾟ"), "ﾍﾟ"); // half-width unchanged
    assert_eq!(normalizer.normalize("ﬁ"), "ﬁ"); // ligature unchanged
    assert_eq!(normalizer.normalize("\u{FDFA}"), "\u{FDFA}"); // ligature unchanged
    assert_eq!(normalizer.normalize("㈎"), "㈎"); // parenthetical unchanged
    assert_eq!(normalizer.normalize("\u{0345}"), "\u{0345}"); // Iota subscript
}

#[test]
fn test_nfkd_basic() {
    let normalizer: DecomposingNormalizer = DecomposingNormalizer::new_nfkd();
    assert_eq!(normalizer.normalize("ä"), "a\u{0308}");
    assert_eq!(normalizer.normalize("Ä"), "A\u{0308}");
    assert_eq!(normalizer.normalize("ệ"), "e\u{0323}\u{0302}");
    assert_eq!(normalizer.normalize("Ệ"), "E\u{0323}\u{0302}");
    assert_eq!(normalizer.normalize("��"), "��\u{1D165}");
    assert_eq!(normalizer.normalize("\u{2126}"), "Ω"); // ohm sign
    assert_eq!(normalizer.normalize("ﾍﾞ"), "ヘ\u{3099}"); // half-width to full-width
    assert_eq!(normalizer.normalize("ﾍﾟ"), "ヘ\u{309A}"); // half-width to full-width
    assert_eq!(normalizer.normalize("ﬁ"), "fi"); // ligature expanded
    assert_eq!(normalizer.normalize("\u{FDFA}"), "\u{635}\u{644}\u{649} \u{627}\u{644}\u{644}\u{647} \u{639}\u{644}\u{64A}\u{647} \u{648}\u{633}\u{644}\u{645}");
    // ligature expanded
    assert_eq!(normalizer.normalize("㈎"), "(\u{1100}\u{1161})"); // parenthetical expanded
    assert_eq!(normalizer.normalize("\u{0345}"), "\u{0345}"); // Iota subscript
}

#[test]
fn test_nfc_basic() {
    let normalizer: ComposingNormalizer = ComposingNormalizer::new_nfc();
    assert_eq!(normalizer.normalize("a\u{0308}"), "ä");
    assert_eq!(normalizer.normalize("A\u{0308}"), "Ä");
    assert_eq!(normalizer.normalize("e\u{0323}\u{0302}"), "ệ");
    assert_eq!(normalizer.normalize("E\u{0323}\u{0302}"), "Ệ");
    assert_eq!(normalizer.normalize("��"), "��\u{1D165}"); // Composition exclusion

    assert_eq!(normalizer.normalize("\u{2126}"), "Ω"); // ohm sign
    assert_eq!(normalizer.normalize("ﾍﾞ"), "ﾍﾞ"); // half-width unchanged
    assert_eq!(normalizer.normalize("ﾍﾟ"), "ﾍﾟ"); // half-width unchanged
    assert_eq!(normalizer.normalize("ﬁ"), "ﬁ"); // ligature unchanged
    assert_eq!(normalizer.normalize("\u{FDFA}"), "\u{FDFA}"); // ligature unchanged
    assert_eq!(normalizer.normalize("㈎"), "㈎"); // parenthetical unchanged
    assert_eq!(normalizer.normalize("\u{0345}"), "\u{0345}"); // Iota subscript
}

#[test]
fn test_nfkc_basic() {
    let normalizer: ComposingNormalizer = ComposingNormalizer::new_nfkc();
    assert_eq!(normalizer.normalize("a\u{0308}"), "ä");
    assert_eq!(normalizer.normalize("A\u{0308}"), "Ä");
    assert_eq!(normalizer.normalize("e\u{0323}\u{0302}"), "ệ");
    assert_eq!(normalizer.normalize("E\u{0323}\u{0302}"), "Ệ");
    assert_eq!(normalizer.normalize("��"), "��\u{1D165}"); // Composition exclusion

    assert_eq!(normalizer.normalize("\u{2126}"), "Ω"); // ohm sign
    assert_eq!(normalizer.normalize("ﾍﾞ"), "ベ"); // half-width to full-width, the compose
    assert_eq!(normalizer.normalize("ﾍﾟ"), "ペ"); // half-width to full-width, the compose
    assert_eq!(normalizer.normalize("ﬁ"), "fi"); // ligature expanded
    assert_eq!(normalizer.normalize("\u{FDFA}"), "\u{0635}\u{0644}\u{0649} \u{0627}\u{0644}\u{0644}\u{0647} \u{0639}\u{0644}\u{064A}\u{0647} \u{0648}\u{0633}\u{0644}\u{0645}");
    // ligature expanded
    assert_eq!(normalizer.normalize("㈎"), "(가)"); // parenthetical expanded and partially recomposed
    assert_eq!(normalizer.normalize("\u{0345}"), "\u{0345}"); // Iota subscript
}

#[test]
fn test_uts46_map_normalize() {
    let mapper: Uts46Mapper = Uts46Mapper::new();
    assert_eq!(
        mapper
            .map_normalize("a\u{0308}".chars())
            .collect::<String>(),
        "ä"
    );
    assert_eq!(
        mapper
            .map_normalize("A\u{0308}".chars())
            .collect::<String>(),
        "ä"
    );
    assert_eq!(
        mapper
            .map_normalize("e\u{0323}\u{0302}".chars())
            .collect::<String>(),
        "ệ"
    );
    assert_eq!(
        mapper
            .map_normalize("E\u{0323}\u{0302}".chars())
            .collect::<String>(),
        "ệ"
    );
    assert_eq!(
        mapper.map_normalize("��".chars()).collect::<String>(),
        "��\u{1D165}"
    ); // Composition exclusion

    assert_eq!(
        mapper.map_normalize("\u{2126}".chars()).collect::<String>(),
        "ω"
    ); // ohm sign
    assert_eq!(mapper.map_normalize("ﾍﾞ".chars()).collect::<String>(), "ベ"); // half-width to full-width, the compose
    assert_eq!(mapper.map_normalize("ﾍﾟ".chars()).collect::<String>(), "ペ"); // half-width to full-width, the compose
    assert_eq!(mapper.map_normalize("ﬁ".chars()).collect::<String>(), "fi"); // ligature expanded
    assert_eq!(mapper.map_normalize("\u{FDFA}".chars()).collect::<String>(), "\u{0635}\u{0644}\u{0649} \u{0627}\u{0644}\u{0644}\u{0647} \u{0639}\u{0644}\u{064A}\u{0647} \u{0648}\u{0633}\u{0644}\u{0645}");
    // ligature expanded
    assert_eq!(
        mapper.map_normalize("㈎".chars()).collect::<String>(),
        "(가)"
    ); // parenthetical expanded and partially recomposed

    // Deviations (UTS 46, 6 Mapping Table Derivation, Step 4)
    assert_eq!(
        mapper.map_normalize("\u{200C}".chars()).collect::<String>(),
        "\u{200C}"
    );
    assert_eq!(
        mapper.map_normalize("\u{200D}".chars()).collect::<String>(),
        "\u{200D}"
    );
    assert_eq!(mapper.map_normalize("ß".chars()).collect::<String>(), "ß");
    assert_eq!(mapper.map_normalize("ς".chars()).collect::<String>(), "ς");

    // Iota subscript
    assert_eq!(
        mapper.map_normalize("\u{0345}".chars()).collect::<String>(),
        "ι"
    );

    // Disallowed
    assert_eq!(
        mapper.map_normalize("\u{061C}".chars()).collect::<String>(),
        "\u{FFFD}"
    );

    // Ignored
    assert_eq!(
        mapper
            .map_normalize("a\u{180B}b".chars())
            .collect::<String>(),
        "ab"
    );
}

#[test]
fn test_uts46_normalize_validate() {
    let mapper: Uts46Mapper = Uts46Mapper::new();
    assert_eq!(
        mapper
            .normalize_validate("a\u{0308}".chars())
            .collect::<String>(),
        "ä"
    );
    assert_eq!(
        mapper
            .normalize_validate("A\u{0308}".chars())
            .collect::<String>(),
        "ä"
    );
    assert_eq!(
        mapper
            .normalize_validate("e\u{0323}\u{0302}".chars())
            .collect::<String>(),
        "ệ"
    );
    assert_eq!(
        mapper
            .normalize_validate("E\u{0323}\u{0302}".chars())
            .collect::<String>(),
        "ệ"
    );
    assert_eq!(
        mapper.normalize_validate("��".chars()).collect::<String>(),
        "��\u{1D165}"
    ); // Composition exclusion

    assert_eq!(
        mapper
            .normalize_validate("\u{2126}".chars())
            .collect::<String>(),
        "ω"
    ); // ohm sign
    assert_eq!(
        mapper.normalize_validate("ﾍﾞ".chars()).collect::<String>(),
        "ベ"
    ); // half-width to full-width, the compose
    assert_eq!(
        mapper.normalize_validate("ﾍﾟ".chars()).collect::<String>(),
        "ペ"
    ); // half-width to full-width, the compose
    assert_eq!(
        mapper.normalize_validate("ﬁ".chars()).collect::<String>(),
        "fi"
    ); // ligature expanded
    assert_eq!(mapper.normalize_validate("\u{FDFA}".chars()).collect::<String>(), "\u{0635}\u{0644}\u{0649} \u{0627}\u{0644}\u{0644}\u{0647} \u{0639}\u{0644}\u{064A}\u{0647} \u{0648}\u{0633}\u{0644}\u{0645}");
    // ligature expanded
    assert_eq!(
        mapper.normalize_validate("㈎".chars()).collect::<String>(),
        "(가)"
    ); // parenthetical expanded and partially recomposed

    // Deviations (UTS 46, 6 Mapping Table Derivation, Step 4)
    assert_eq!(
        mapper
            .normalize_validate("\u{200C}".chars())
            .collect::<String>(),
        "\u{200C}"
    );
    assert_eq!(
        mapper
            .normalize_validate("\u{200D}".chars())
            .collect::<String>(),
        "\u{200D}"
    );
    assert_eq!(
        mapper.normalize_validate("ß".chars()).collect::<String>(),
        "ß"
    );
    assert_eq!(
        mapper.normalize_validate("ς".chars()).collect::<String>(),
        "ς"
    );

    // Iota subscript
    assert_eq!(
        mapper
            .normalize_validate("\u{0345}".chars())
            .collect::<String>(),
        "ι"
    );

    // Disallowed
    assert_eq!(
        mapper
            .normalize_validate("\u{061C}".chars())
            .collect::<String>(),
        "\u{FFFD}"
    );

    // Ignored
    assert_eq!(
        mapper
            .normalize_validate("a\u{180B}b".chars())
            .collect::<String>(),
        "a\u{FFFD}b"
    );
}

type StackString = arraystring::ArrayString<arraystring::typenum::U48>;

#[test]
fn test_nfd_str_to() {
    let normalizer: DecomposingNormalizer = DecomposingNormalizer::new_nfd();

    let mut buf = StackString::new();
    assert!(normalizer.normalize_to("ä", &mut buf).is_ok());
    assert_eq!(&buf, "a\u{0308}");

    buf.clear();
    assert!(normalizer.normalize_to("ệ", &mut buf).is_ok());
    assert_eq!(&buf, "e\u{0323}\u{0302}");
}

#[test]
fn test_nfd_utf8_to() {
    let normalizer: DecomposingNormalizer = DecomposingNormalizer::new_nfd();

    let mut buf = StackString::new();
    assert!(normalizer
        .normalize_utf8_to("ä".as_bytes(), &mut buf)
        .is_ok());
    assert_eq!(&buf, "a\u{0308}");

    buf.clear();
    assert!(normalizer
        .normalize_utf8_to("ệ".as_bytes(), &mut buf)
        .is_ok());
    assert_eq!(&buf, "e\u{0323}\u{0302}");
}

type StackVec = arrayvec::ArrayVec<u16, 32>;

#[test]
fn test_nfd_utf16_to() {
    let normalizer: DecomposingNormalizer = DecomposingNormalizer::new_nfd();

    let mut buf = StackVec::new();
    assert!(normalizer
        .normalize_utf16_to([0x00E4u16].as_slice(), &mut buf)
        .is_ok());
    assert_eq!(&buf, [0x0061u16, 0x0308u16].as_slice());

    buf.clear();
    assert!(normalizer
        .normalize_utf16_to([0x1EC7u16].as_slice(), &mut buf)
        .is_ok());
    assert_eq!(&buf, [0x0065u16, 0x0323u16, 0x0302u16].as_slice());
}

#[test]
fn test_nfc_str_to() {
    let normalizer: ComposingNormalizer = ComposingNormalizer::new_nfc();

    let mut buf = StackString::new();
    assert!(normalizer.normalize_to("a\u{0308}", &mut buf).is_ok());
    assert_eq!(&buf, "ä");

    buf.clear();
    assert!(normalizer
        .normalize_to("e\u{0323}\u{0302}", &mut buf)
        .is_ok());
    assert_eq!(&buf, "ệ");
}

#[test]
fn test_nfc_utf8_to() {
    let normalizer: ComposingNormalizer = ComposingNormalizer::new_nfc();

    let mut buf = StackString::new();
    assert!(normalizer
        .normalize_utf8_to("a\u{0308}".as_bytes(), &mut buf)
        .is_ok());
    assert_eq!(&buf, "ä");

    buf.clear();
    assert!(normalizer
        .normalize_utf8_to("e\u{0323}\u{0302}".as_bytes(), &mut buf)
        .is_ok());
    assert_eq!(&buf, "ệ");
}

#[test]
fn test_nfc_utf16_to() {
    let normalizer: ComposingNormalizer = ComposingNormalizer::new_nfc();

    let mut buf = StackVec::new();
    assert!(normalizer
        .normalize_utf16_to([0x0061u16, 0x0308u16].as_slice(), &mut buf)
        .is_ok());
    assert_eq!(&buf, [0x00E4u16].as_slice());

    buf.clear();
    assert!(normalizer
        .normalize_utf16_to([0x0065u16, 0x0323u16, 0x0302u16].as_slice(), &mut buf)
        .is_ok());
    assert_eq!(&buf, [0x1EC7u16].as_slice());
}

#[test]
fn test_nfc_utf8_to_errors() {
    let normalizer: ComposingNormalizer = ComposingNormalizer::new_nfc();

    let mut buf = StackString::new();
    assert!(normalizer
        .normalize_utf8_to(b"\xFFa\xCC\x88\xFF", &mut buf)
        .is_ok());
    assert_eq!(&buf, "\u{FFFD}ä\u{FFFD}");

    buf.clear();
    assert!(normalizer
        .normalize_utf8_to(b"\x80e\xCC\xA3\xCC\x82\x80", &mut buf)
        .is_ok());
    assert_eq!(&buf, "\u{FFFD}ệ\u{FFFD}");

    buf.clear();
    assert!(normalizer
        .normalize_utf8_to(b"aaa\xFFaaa\xFFaaa", &mut buf)
        .is_ok());
    assert_eq!(&buf, "aaa\u{FFFD}aaa\u{FFFD}aaa");

    buf.clear();
    assert!(normalizer
        .normalize_utf8_to(b"aaa\xE2\x98aaa\xE2\x98aaa", &mut buf)
        .is_ok());
    assert_eq!(&buf, "aaa\u{FFFD}aaa\u{FFFD}aaa");
}

#[test]
fn test_nfd_utf8_to_errors() {
    let normalizer: DecomposingNormalizer = DecomposingNormalizer::new_nfd();

    let mut buf = StackString::new();
    assert!(normalizer
        .normalize_utf8_to(b"\xFF\xC3\xA4\xFF", &mut buf)
        .is_ok());
    assert_eq!(&buf, "\u{FFFD}a\u{0308}\u{FFFD}");

    buf.clear();
    assert!(normalizer
        .normalize_utf8_to(b"\x80\xE1\xBB\x87\x80", &mut buf)
        .is_ok());
    assert_eq!(&buf, "\u{FFFD}e\u{0323}\u{0302}\u{FFFD}");

    buf.clear();
    assert!(normalizer
        .normalize_utf8_to(b"aaa\xFFaaa\xFFaaa", &mut buf)
        .is_ok());
    assert_eq!(&buf, "aaa\u{FFFD}aaa\u{FFFD}aaa");

    buf.clear();
    assert!(normalizer
        .normalize_utf8_to(b"aaa\xE2\x98aaa\xE2\x98aaa", &mut buf)
        .is_ok());
    assert_eq!(&buf, "aaa\u{FFFD}aaa\u{FFFD}aaa");
}

#[test]
fn test_nfc_utf16_to_errors() {
    let normalizer: ComposingNormalizer = ComposingNormalizer::new_nfc();

    let mut buf = StackVec::new();
    assert!(normalizer
        .normalize_utf16_to([0xD800u16, 0x0061u16, 0x0308u16].as_slice(), &mut buf)
        .is_ok());
    assert_eq!(&buf, [0xFFFDu16, 0x00E4u16].as_slice());

    buf.clear();
    assert!(normalizer
        .normalize_utf16_to([0xDC00u16, 0x0061u16, 0x0308u16].as_slice(), &mut buf)
        .is_ok());
    assert_eq!(&buf, [0xFFFDu16, 0x00E4u16].as_slice());

    buf.clear();
    assert!(normalizer
        .normalize_utf16_to(
            [0x0061u16, 0xD800u16, 0x0061u16, 0x0308u16].as_slice(),
            &mut buf
        )
        .is_ok());
    assert_eq!(&buf, [0x0061u16, 0xFFFDu16, 0x00E4u16].as_slice());

    buf.clear();
    assert!(normalizer
        .normalize_utf16_to(
            [0x0061u16, 0xDC00u16, 0x0061u16, 0x0308u16].as_slice(),
            &mut buf
        )
        .is_ok());
    assert_eq!(&buf, [0x0061u16, 0xFFFDu16, 0x00E4u16].as_slice());

    buf.clear();
    assert!(normalizer
        .normalize_utf16_to(
            [0x0061u16, 0xD800u16, 0x0061u16, 0x0308u16, 0xD800u16].as_slice(),
            &mut buf
        )
        .is_ok());
    assert_eq!(
        &buf,
        [0x0061u16, 0xFFFDu16, 0x00E4u16, 0xFFFDu16].as_slice()
    );

    buf.clear();
    assert!(normalizer
        .normalize_utf16_to(
            [0x0061u16, 0xDC00u16, 0x0061u16, 0x0308u16, 0xDC00u16].as_slice(),
            &mut buf
        )
        .is_ok());
    assert_eq!(
        &buf,
        [0x0061u16, 0xFFFDu16, 0x00E4u16, 0xFFFDu16].as_slice()
    );

    buf.clear();
    assert!(normalizer
        .normalize_utf16_to(
            [0x0061u16, 0xD800u16, 0x0061u16, 0x0061u16, 0xD800u16].as_slice(),
            &mut buf
        )
        .is_ok());
    assert_eq!(
        &buf,
        [0x0061u16, 0xFFFDu16, 0x0061u16, 0x0061u16, 0xFFFDu16].as_slice()
    );

    buf.clear();
    assert!(normalizer
        .normalize_utf16_to(
            [0x0061u16, 0xDC00u16, 0x0061u16, 0x0061u16, 0xDC00u16].as_slice(),
            &mut buf
        )
        .is_ok());
    assert_eq!(
        &buf,
        [0x0061u16, 0xFFFDu16, 0x0061u16, 0x0061u16, 0xFFFDu16].as_slice()
    );

    buf.clear();
    assert!(normalizer
        .normalize_utf16_to(
            [0x0061u16, 0xD800u16, 0x0308u16, 0xD800u16].as_slice(),
            &mut buf
        )
        .is_ok());
    assert_eq!(
        &buf,
        [0x0061u16, 0xFFFDu16, 0x0308u16, 0xFFFDu16].as_slice()
    );

    buf.clear();
    assert!(normalizer
        .normalize_utf16_to(
            [0x0061u16, 0xDC00u16, 0x0308u16, 0xDC00u16].as_slice(),
            &mut buf
        )
        .is_ok());
    assert_eq!(
        &buf,
        [0x0061u16, 0xFFFDu16, 0x0308u16, 0xFFFDu16].as_slice()
    );
}

#[test]
fn test_nfd_utf16_to_errors() {
    let normalizer: DecomposingNormalizer = DecomposingNormalizer::new_nfd();

    let mut buf = StackVec::new();
    assert!(normalizer
        .normalize_utf16_to([0xD800u16, 0x00E4u16].as_slice(), &mut buf)
        .is_ok());
    assert_eq!(&buf, [0xFFFDu16, 0x0061u16, 0x0308u16].as_slice());

    buf.clear();
    assert!(normalizer
        .normalize_utf16_to([0xDC00u16, 0x00E4u16].as_slice(), &mut buf)
        .is_ok());
    assert_eq!(&buf, [0xFFFDu16, 0x0061u16, 0x0308u16].as_slice());

    buf.clear();
    assert!(normalizer
        .normalize_utf16_to([0x0061u16, 0xD800u16, 0x00E4u16].as_slice(), &mut buf)
        .is_ok());
    assert_eq!(
        &buf,
        [0x0061u16, 0xFFFDu16, 0x0061u16, 0x0308u16].as_slice()
    );

    buf.clear();
    assert!(normalizer
        .normalize_utf16_to([0x0061u16, 0xDC00u16, 0x00E4u16].as_slice(), &mut buf)
        .is_ok());
    assert_eq!(
        &buf,
        [0x0061u16, 0xFFFDu16, 0x0061u16, 0x0308u16].as_slice()
    );

    buf.clear();
    assert!(normalizer
        .normalize_utf16_to(
            [0x0061u16, 0xD800u16, 0x00E4u16, 0xD800u16].as_slice(),
            &mut buf
        )
        .is_ok());
    assert_eq!(
        &buf,
        [0x0061u16, 0xFFFDu16, 0x0061u16, 0x0308u16, 0xFFFDu16].as_slice()
    );

    buf.clear();
    assert!(normalizer
        .normalize_utf16_to(
            [0x0061u16, 0xDC00u16, 0x00E4u16, 0xDC00u16].as_slice(),
            &mut buf
        )
        .is_ok());
    assert_eq!(
        &buf,
        [0x0061u16, 0xFFFDu16, 0x0061u16, 0x0308u16, 0xFFFDu16].as_slice()
    );

    buf.clear();
    assert!(normalizer
        .normalize_utf16_to(
            [0x0061u16, 0xD800u16, 0x0061u16, 0x0061u16, 0xD800u16].as_slice(),
            &mut buf
        )
        .is_ok());
    assert_eq!(
        &buf,
        [0x0061u16, 0xFFFDu16, 0x0061u16, 0x0061u16, 0xFFFDu16].as_slice()
    );

    buf.clear();
    assert!(normalizer
        .normalize_utf16_to(
            [0x0061u16, 0xDC00u16, 0x0061u16, 0x0061u16, 0xDC00u16].as_slice(),
            &mut buf
        )
        .is_ok());
    assert_eq!(
        &buf,
        [0x0061u16, 0xFFFDu16, 0x0061u16, 0x0061u16, 0xFFFDu16].as_slice()
    );

    buf.clear();
    assert!(normalizer
        .normalize_utf16_to(
            [0x0061u16, 0xD800u16, 0x0308u16, 0xD800u16].as_slice(),
            &mut buf
        )
        .is_ok());
    assert_eq!(
        &buf,
        [0x0061u16, 0xFFFDu16, 0x0308u16, 0xFFFDu16].as_slice()
    );

    buf.clear();
    assert!(normalizer
        .normalize_utf16_to(
            [0x0061u16, 0xDC00u16, 0x0308u16, 0xDC00u16].as_slice(),
            &mut buf
        )
        .is_ok());
    assert_eq!(
        &buf,
        [0x0061u16, 0xFFFDu16, 0x0308u16, 0xFFFDu16].as_slice()
    );
}

use atoi::FromRadix16;

/// Parse five semicolon-terminated strings consisting of space-separated hexadecimal scalar values
fn parse_hex(mut hexes: &[u8]) -> [StackString; 5] {
    let mut strings = [
        StackString::new(),
        StackString::new(),
        StackString::new(),
        StackString::new(),
        StackString::new(),
    ];
    let mut current = 0;
    loop {
        let (scalar, mut offset) = u32::from_radix_16(hexes);
        let c = core::char::from_u32(scalar).unwrap();
        strings[current].try_push(c).unwrap();
        match hexes[offset] {
            b';' => {
                current += 1;
                if current == strings.len() {
                    return strings;
                }
                offset += 1;
            }
            b' ' => {
                offset += 1;
            }
            _ => {
                panic!("Bad format: Garbage");
            }
        }
        hexes = &hexes[offset..];
    }
}

#[test]
fn test_conformance() {
    let nfd: DecomposingNormalizer = DecomposingNormalizer::new_nfd();
    let nfkd: DecomposingNormalizer = DecomposingNormalizer::new_nfkd();
    let nfc: ComposingNormalizer = ComposingNormalizer::new_nfc();
    let nfkc: ComposingNormalizer = ComposingNormalizer::new_nfkc();

    let mut prev = 0u32;
    let mut part = 0u8;
    let data = include_bytes!("data/NormalizationTest.txt");
    let lines = data.split(|b| b == &b'\n');
    for line in lines {
        if line.is_empty() {
            continue;
        }
        if line.starts_with(b"#") {
            continue;
        }
        if line.starts_with(&b"@Part"[..]) {
            part = line[5] - b'0';
            if part == 2 {
                for u in prev + 1..=0x10FFFF {
                    if let Some(c) = char::from_u32(u) {
                        assert!(nfd
                            .normalize_iter(core::iter::once(c))
                            .eq(core::iter::once(c)));
                        assert!(nfkd
                            .normalize_iter(core::iter::once(c))
                            .eq(core::iter::once(c)));
                        assert!(nfc
                            .normalize_iter(core::iter::once(c))
                            .eq(core::iter::once(c)));
                        assert!(nfkc
                            .normalize_iter(core::iter::once(c))
                            .eq(core::iter::once(c)));
                    }
                }
            }
            continue;
        }
        let strings = parse_hex(line);
        // 0: source
        // 1: NFC
        // 2: NFD
        // 3: NFKC
        // 4: NFKD
        if part == 1 {
            let mut iter = strings[0].chars();
            let current = iter.next().unwrap();
            assert_eq!(iter.next(), None);
            let current_u = u32::from(current);
            for u in prev + 1..current_u {
                if let Some(c) = char::from_u32(u) {
                    assert!(nfd
                        .normalize_iter(core::iter::once(c))
                        .eq(core::iter::once(c)));
                    assert!(nfkd
                        .normalize_iter(core::iter::once(c))
                        .eq(core::iter::once(c)));
                    assert!(nfc
                        .normalize_iter(core::iter::once(c))
                        .eq(core::iter::once(c)));
                    assert!(nfkc
                        .normalize_iter(core::iter::once(c))
                        .eq(core::iter::once(c)));
                }
            }
            prev = current_u;
        }
        // NFC
        assert!(nfc
            .normalize_iter(strings[0].chars())
            .eq(strings[1].chars()));
        assert!(nfc
            .normalize_iter(strings[1].chars())
            .eq(strings[1].chars()));
        assert!(nfc
            .normalize_iter(strings[2].chars())
            .eq(strings[1].chars()));

        assert!(nfc
            .normalize_iter(strings[3].chars())
            .eq(strings[3].chars()));
        assert!(nfc
            .normalize_iter(strings[4].chars())
            .eq(strings[3].chars()));

        // NFD
        assert!(nfd
            .normalize_iter(strings[0].chars())
            .eq(strings[2].chars()));
        assert!(nfd
            .normalize_iter(strings[1].chars())
            .eq(strings[2].chars()));
        assert!(nfd
            .normalize_iter(strings[2].chars())
            .eq(strings[2].chars()));

        assert!(nfd
            .normalize_iter(strings[3].chars())
            .eq(strings[4].chars()));
        assert!(nfd
            .normalize_iter(strings[4].chars())
            .eq(strings[4].chars()));

        // NFKC
        assert!(nfkc
            .normalize_iter(strings[0].chars())
            .eq(strings[3].chars()));
        assert!(nfkc
            .normalize_iter(strings[1].chars())
            .eq(strings[3].chars()));
        assert!(nfkc
            .normalize_iter(strings[2].chars())
            .eq(strings[3].chars()));
        assert!(nfkc
            .normalize_iter(strings[3].chars())
            .eq(strings[3].chars()));
        assert!(nfkc
            .normalize_iter(strings[4].chars())
            .eq(strings[3].chars()));

        // NFKD
        assert!(nfkd
            .normalize_iter(strings[0].chars())
            .eq(strings[4].chars()));
        assert!(nfkd
            .normalize_iter(strings[1].chars())
            .eq(strings[4].chars()));
        assert!(nfkd
            .normalize_iter(strings[2].chars())
            .eq(strings[4].chars()));
        assert!(nfkd
            .normalize_iter(strings[3].chars())
            .eq(strings[4].chars()));
        assert!(nfkd
            .normalize_iter(strings[4].chars())
            .eq(strings[4].chars()));
    }
}

// Commented out, because we don't currently have a way to force a no-op set for testing.
// #[test]
// fn test_hangul() {
//     use icu_collections::codepointinvlist::{CodePointSet, CodePointSetBuilder};
//     use zerofrom::ZeroFrom;
//     let builder = CodePointSetBuilder::new();
//     let set: CodePointSet = builder.build();

//     let normalizer: ComposingNormalizer = ComposingNormalizer::new_nfc();
//     {
//         let mut norm_iter = normalizer.normalize_iter("A\u{AC00}\u{11A7}".chars());
//         // Pessimize passthrough to avoid hiding bugs.
//         norm_iter
//             .decomposition
//             .potential_passthrough_and_not_backward_combining = Some(ZeroFrom::zero_from(&set));
//         assert!(norm_iter.eq("A\u{AC00}\u{11A7}".chars()));
//     }
//     {
//         let mut norm_iter = normalizer.normalize_iter("A\u{AC00}\u{11C2}".chars());
//         // Pessimize passthrough to avoid hiding bugs.
//         norm_iter
//             .decomposition
//             .potential_passthrough_and_not_backward_combining = Some(ZeroFrom::zero_from(&set));
//         assert!(norm_iter.eq("A\u{AC1B}".chars()));
//     }
// }

fn str_to_utf16(s: &str, sink: &mut StackVec) {
    sink.clear();
    let mut buf = [0u16; 2];
    for c in s.chars() {
        sink.try_extend_from_slice(c.encode_utf16(&mut buf))
            .unwrap();
    }
}

fn char_to_utf16(c: char, sink: &mut StackVec) {
    sink.clear();
    let mut buf = [0u16; 2];
    sink.try_extend_from_slice(c.encode_utf16(&mut buf))
        .unwrap();
}

fn str_to_str(s: &str, sink: &mut StackString) {
    sink.clear();
    sink.try_push_str(s).unwrap();
}

fn char_to_str(c: char, sink: &mut StackString) {
    sink.clear();
    sink.try_push(c).unwrap();
}

#[test]
fn test_conformance_utf16() {
    let nfd: DecomposingNormalizer = DecomposingNormalizer::new_nfd();
    let nfkd: DecomposingNormalizer = DecomposingNormalizer::new_nfkd();
    let nfc: ComposingNormalizer = ComposingNormalizer::new_nfc();
    let nfkc: ComposingNormalizer = ComposingNormalizer::new_nfkc();

    let mut input = StackVec::new();
    let mut normalized = StackVec::new();
    let mut expected = StackVec::new();

    let mut prev = 0u32;
    let mut part = 0u8;
    let data = include_bytes!("data/NormalizationTest.txt");
    let lines = data.split(|b| b == &b'\n');
    for line in lines {
        if line.is_empty() {
            continue;
        }
        if line.starts_with(b"#") {
            continue;
        }
        if line.starts_with(&b"@Part"[..]) {
            part = line[5] - b'0';
            if part == 2 {
                for u in prev + 1..=0x10FFFF {
                    if let Some(c) = char::from_u32(u) {
                        normalized.clear();
                        char_to_utf16(c, &mut input);
                        assert!(nfd.normalize_utf16_to(&input, &mut normalized).is_ok());
                        assert_eq!(&normalized, &input);

                        normalized.clear();
                        char_to_utf16(c, &mut input);
                        assert!(nfkd.normalize_utf16_to(&input, &mut normalized).is_ok());
                        assert_eq!(&normalized, &input);

                        normalized.clear();
                        char_to_utf16(c, &mut input);
                        assert!(nfc.normalize_utf16_to(&input, &mut normalized).is_ok());
                        assert_eq!(&normalized, &input);

                        normalized.clear();
                        char_to_utf16(c, &mut input);
                        assert!(nfkc.normalize_utf16_to(&input, &mut normalized).is_ok());
                        assert_eq!(&normalized, &input);
                    }
                }
            }
            continue;
        }
        let strings = parse_hex(line);
        // 0: source
        // 1: NFC
        // 2: NFD
        // 3: NFKC
        // 4: NFKD
        if part == 1 {
            let mut iter = strings[0].chars();
            let current = iter.next().unwrap();
            assert_eq!(iter.next(), None);
            let current_u = u32::from(current);
            for u in prev + 1..current_u {
                if let Some(c) = char::from_u32(u) {
                    normalized.clear();
                    char_to_utf16(c, &mut input);
                    assert!(nfd.normalize_utf16_to(&input, &mut normalized).is_ok());
                    assert_eq!(&normalized, &input);

                    normalized.clear();
                    char_to_utf16(c, &mut input);
                    assert!(nfkd.normalize_utf16_to(&input, &mut normalized).is_ok());
                    assert_eq!(&normalized, &input);

                    normalized.clear();
                    char_to_utf16(c, &mut input);
                    assert!(nfc.normalize_utf16_to(&input, &mut normalized).is_ok());
                    assert_eq!(&normalized, &input);

                    normalized.clear();
                    char_to_utf16(c, &mut input);
                    assert!(nfkc.normalize_utf16_to(&input, &mut normalized).is_ok());
                    assert_eq!(&normalized, &input);
                }
            }
            prev = current_u;
        }
        // NFC
        normalized.clear();
        str_to_utf16(&strings[0], &mut input);
        str_to_utf16(&strings[1], &mut expected);
        assert!(nfc.normalize_utf16_to(&input, &mut normalized).is_ok());
        assert_eq!(&normalized, &expected);

        normalized.clear();
        str_to_utf16(&strings[1], &mut input);
        str_to_utf16(&strings[1], &mut expected);
        assert!(nfc.normalize_utf16_to(&input, &mut normalized).is_ok());
        assert_eq!(&normalized, &expected);

        normalized.clear();
        str_to_utf16(&strings[2], &mut input);
        str_to_utf16(&strings[1], &mut expected);
        assert!(nfc.normalize_utf16_to(&input, &mut normalized).is_ok());
        assert_eq!(&normalized, &expected);

        normalized.clear();
        str_to_utf16(&strings[3], &mut input);
        str_to_utf16(&strings[3], &mut expected);
        assert!(nfc.normalize_utf16_to(&input, &mut normalized).is_ok());
        assert_eq!(&normalized, &expected);

        normalized.clear();
        str_to_utf16(&strings[4], &mut input);
        str_to_utf16(&strings[3], &mut expected);
        assert!(nfc.normalize_utf16_to(&input, &mut normalized).is_ok());
        assert_eq!(&normalized, &expected);

        // NFD
        normalized.clear();
        str_to_utf16(&strings[0], &mut input);
        str_to_utf16(&strings[2], &mut expected);
        assert!(nfd.normalize_utf16_to(&input, &mut normalized).is_ok());
        assert_eq!(&normalized, &expected);

        normalized.clear();
        str_to_utf16(&strings[1], &mut input);
        str_to_utf16(&strings[2], &mut expected);
        assert!(nfd.normalize_utf16_to(&input, &mut normalized).is_ok());
        assert_eq!(&normalized, &expected);

        normalized.clear();
        str_to_utf16(&strings[2], &mut input);
        str_to_utf16(&strings[2], &mut expected);
        assert!(nfd.normalize_utf16_to(&input, &mut normalized).is_ok());
        assert_eq!(&normalized, &expected);

        normalized.clear();
        str_to_utf16(&strings[3], &mut input);
        str_to_utf16(&strings[4], &mut expected);
        assert!(nfd.normalize_utf16_to(&input, &mut normalized).is_ok());
        assert_eq!(&normalized, &expected);

        normalized.clear();
        str_to_utf16(&strings[4], &mut input);
        str_to_utf16(&strings[4], &mut expected);
        assert!(nfd.normalize_utf16_to(&input, &mut normalized).is_ok());
        assert_eq!(&normalized, &expected);

        // NFKC
        normalized.clear();
        str_to_utf16(&strings[0], &mut input);
        str_to_utf16(&strings[3], &mut expected);
        assert!(nfkc.normalize_utf16_to(&input, &mut normalized).is_ok());
        assert_eq!(&normalized, &expected);

        normalized.clear();
        str_to_utf16(&strings[1], &mut input);
        str_to_utf16(&strings[3], &mut expected);
        assert!(nfkc.normalize_utf16_to(&input, &mut normalized).is_ok());
        assert_eq!(&normalized, &expected);

        normalized.clear();
        str_to_utf16(&strings[2], &mut input);
        str_to_utf16(&strings[3], &mut expected);
        assert!(nfkc.normalize_utf16_to(&input, &mut normalized).is_ok());
        assert_eq!(&normalized, &expected);

        normalized.clear();
        str_to_utf16(&strings[3], &mut input);
        str_to_utf16(&strings[3], &mut expected);
        assert!(nfkc.normalize_utf16_to(&input, &mut normalized).is_ok());
        assert_eq!(&normalized, &expected);

        normalized.clear();
        str_to_utf16(&strings[4], &mut input);
        str_to_utf16(&strings[3], &mut expected);
        assert!(nfkc.normalize_utf16_to(&input, &mut normalized).is_ok());
        assert_eq!(&normalized, &expected);

        // NFKD
        normalized.clear();
        str_to_utf16(&strings[0], &mut input);
        str_to_utf16(&strings[4], &mut expected);
        assert!(nfkd.normalize_utf16_to(&input, &mut normalized).is_ok());
        assert_eq!(&normalized, &expected);

        normalized.clear();
        str_to_utf16(&strings[1], &mut input);
        str_to_utf16(&strings[4], &mut expected);
        assert!(nfkd.normalize_utf16_to(&input, &mut normalized).is_ok());
        assert_eq!(&normalized, &expected);

        normalized.clear();
        str_to_utf16(&strings[2], &mut input);
        str_to_utf16(&strings[4], &mut expected);
        assert!(nfkd.normalize_utf16_to(&input, &mut normalized).is_ok());
        assert_eq!(&normalized, &expected);

        normalized.clear();
        str_to_utf16(&strings[3], &mut input);
        str_to_utf16(&strings[4], &mut expected);
        assert!(nfkd.normalize_utf16_to(&input, &mut normalized).is_ok());
        assert_eq!(&normalized, &expected);

        normalized.clear();
        str_to_utf16(&strings[4], &mut input);
        str_to_utf16(&strings[4], &mut expected);
        assert!(nfkd.normalize_utf16_to(&input, &mut normalized).is_ok());
        assert_eq!(&normalized, &expected);
    }
}

#[test]
fn test_conformance_utf8() {
    let nfd: DecomposingNormalizer = DecomposingNormalizer::new_nfd();
    let nfkd: DecomposingNormalizer = DecomposingNormalizer::new_nfkd();
    let nfc: ComposingNormalizer = ComposingNormalizer::new_nfc();
    let nfkc: ComposingNormalizer = ComposingNormalizer::new_nfkc();

    let mut input = StackString::new();
    let mut normalized = StackString::new();
    let mut expected = StackString::new();

    let mut prev = 0u32;
    let mut part = 0u8;
    let data = include_bytes!("data/NormalizationTest.txt");
    let lines = data.split(|b| b == &b'\n');
    for line in lines {
        if line.is_empty() {
            continue;
        }
        if line.starts_with(b"#") {
            continue;
        }
        if line.starts_with(&b"@Part"[..]) {
            part = line[5] - b'0';
            if part == 2 {
                for u in prev + 1..=0x10FFFF {
                    if let Some(c) = char::from_u32(u) {
                        normalized.clear();
                        char_to_str(c, &mut input);
                        assert!(nfd
                            .normalize_utf8_to(input.as_bytes(), &mut normalized)
                            .is_ok());
                        assert_eq!(&normalized, &input);

                        normalized.clear();
                        char_to_str(c, &mut input);
                        assert!(nfkd
                            .normalize_utf8_to(input.as_bytes(), &mut normalized)
                            .is_ok());
                        assert_eq!(&normalized, &input);

                        normalized.clear();
                        char_to_str(c, &mut input);
                        assert!(nfc
                            .normalize_utf8_to(input.as_bytes(), &mut normalized)
                            .is_ok());
                        assert_eq!(&normalized, &input);

                        normalized.clear();
                        char_to_str(c, &mut input);
                        assert!(nfkc
                            .normalize_utf8_to(input.as_bytes(), &mut normalized)
                            .is_ok());
                        assert_eq!(&normalized, &input);
                    }
                }
            }
            continue;
        }
        let strings = parse_hex(line);
        // 0: source
        // 1: NFC
        // 2: NFD
        // 3: NFKC
        // 4: NFKD
        if part == 1 {
            let mut iter = strings[0].chars();
            let current = iter.next().unwrap();
            assert_eq!(iter.next(), None);
            let current_u = u32::from(current);
            for u in prev + 1..current_u {
                if let Some(c) = char::from_u32(u) {
                    normalized.clear();
                    char_to_str(c, &mut input);
                    assert!(nfd
                        .normalize_utf8_to(input.as_bytes(), &mut normalized)
                        .is_ok());
                    assert_eq!(&normalized, &input);

                    normalized.clear();
                    char_to_str(c, &mut input);
                    assert!(nfkd
                        .normalize_utf8_to(input.as_bytes(), &mut normalized)
                        .is_ok());
                    assert_eq!(&normalized, &input);

                    normalized.clear();
                    char_to_str(c, &mut input);
                    assert!(nfc
                        .normalize_utf8_to(input.as_bytes(), &mut normalized)
                        .is_ok());
                    assert_eq!(&normalized, &input);

                    normalized.clear();
                    char_to_str(c, &mut input);
                    assert!(nfkc
                        .normalize_utf8_to(input.as_bytes(), &mut normalized)
                        .is_ok());
                    assert_eq!(&normalized, &input);
                }
            }
            prev = current_u;
        }
        // NFC
        normalized.clear();
        str_to_str(&strings[0], &mut input);
        str_to_str(&strings[1], &mut expected);
        assert!(nfc
            .normalize_utf8_to(input.as_bytes(), &mut normalized)
            .is_ok());
        assert_eq!(&normalized, &expected);

        normalized.clear();
        str_to_str(&strings[1], &mut input);
        str_to_str(&strings[1], &mut expected);
        assert!(nfc
            .normalize_utf8_to(input.as_bytes(), &mut normalized)
            .is_ok());
        assert_eq!(&normalized, &expected);

        normalized.clear();
        str_to_str(&strings[2], &mut input);
        str_to_str(&strings[1], &mut expected);
        assert!(nfc
            .normalize_utf8_to(input.as_bytes(), &mut normalized)
            .is_ok());
        assert_eq!(&normalized, &expected);

        normalized.clear();
        str_to_str(&strings[3], &mut input);
        str_to_str(&strings[3], &mut expected);
        assert!(nfc
            .normalize_utf8_to(input.as_bytes(), &mut normalized)
            .is_ok());
        assert_eq!(&normalized, &expected);

        normalized.clear();
        str_to_str(&strings[4], &mut input);
        str_to_str(&strings[3], &mut expected);
        assert!(nfc
            .normalize_utf8_to(input.as_bytes(), &mut normalized)
            .is_ok());
        assert_eq!(&normalized, &expected);

        // NFD
        normalized.clear();
        str_to_str(&strings[0], &mut input);
        str_to_str(&strings[2], &mut expected);
        assert!(nfd
            .normalize_utf8_to(input.as_bytes(), &mut normalized)
            .is_ok());
        assert_eq!(&normalized, &expected);

        normalized.clear();
        str_to_str(&strings[1], &mut input);
        str_to_str(&strings[2], &mut expected);
        assert!(nfd
            .normalize_utf8_to(input.as_bytes(), &mut normalized)
            .is_ok());
        assert_eq!(&normalized, &expected);

        normalized.clear();
        str_to_str(&strings[2], &mut input);
        str_to_str(&strings[2], &mut expected);
        assert!(nfd
            .normalize_utf8_to(input.as_bytes(), &mut normalized)
            .is_ok());
        assert_eq!(&normalized, &expected);

        normalized.clear();
        str_to_str(&strings[3], &mut input);
        str_to_str(&strings[4], &mut expected);
        assert!(nfd
            .normalize_utf8_to(input.as_bytes(), &mut normalized)
            .is_ok());
        assert_eq!(&normalized, &expected);

        normalized.clear();
        str_to_str(&strings[4], &mut input);
        str_to_str(&strings[4], &mut expected);
        assert!(nfd
            .normalize_utf8_to(input.as_bytes(), &mut normalized)
            .is_ok());
        assert_eq!(&normalized, &expected);

        // NFKC
        normalized.clear();
        str_to_str(&strings[0], &mut input);
        str_to_str(&strings[3], &mut expected);
        assert!(nfkc
            .normalize_utf8_to(input.as_bytes(), &mut normalized)
            .is_ok());
        assert_eq!(&normalized, &expected);

        normalized.clear();
        str_to_str(&strings[1], &mut input);
        str_to_str(&strings[3], &mut expected);
        assert!(nfkc
            .normalize_utf8_to(input.as_bytes(), &mut normalized)
            .is_ok());
        assert_eq!(&normalized, &expected);

        normalized.clear();
        str_to_str(&strings[2], &mut input);
        str_to_str(&strings[3], &mut expected);
        assert!(nfkc
            .normalize_utf8_to(input.as_bytes(), &mut normalized)
            .is_ok());
        assert_eq!(&normalized, &expected);

        normalized.clear();
        str_to_str(&strings[3], &mut input);
        str_to_str(&strings[3], &mut expected);
        assert!(nfkc
            .normalize_utf8_to(input.as_bytes(), &mut normalized)
            .is_ok());
        assert_eq!(&normalized, &expected);

        normalized.clear();
        str_to_str(&strings[4], &mut input);
        str_to_str(&strings[3], &mut expected);
        assert!(nfkc
            .normalize_utf8_to(input.as_bytes(), &mut normalized)
            .is_ok());
        assert_eq!(&normalized, &expected);

        // NFKD
        normalized.clear();
        str_to_str(&strings[0], &mut input);
        str_to_str(&strings[4], &mut expected);
        assert!(nfkd
            .normalize_utf8_to(input.as_bytes(), &mut normalized)
            .is_ok());
        assert_eq!(&normalized, &expected);

        normalized.clear();
        str_to_str(&strings[1], &mut input);
        str_to_str(&strings[4], &mut expected);
        assert!(nfkd
            .normalize_utf8_to(input.as_bytes(), &mut normalized)
            .is_ok());
        assert_eq!(&normalized, &expected);

        normalized.clear();
        str_to_str(&strings[2], &mut input);
        str_to_str(&strings[4], &mut expected);
        assert!(nfkd
            .normalize_utf8_to(input.as_bytes(), &mut normalized)
            .is_ok());
        assert_eq!(&normalized, &expected);

        normalized.clear();
        str_to_str(&strings[3], &mut input);
        str_to_str(&strings[4], &mut expected);
        assert!(nfkd
            .normalize_utf8_to(input.as_bytes(), &mut normalized)
            .is_ok());
        assert_eq!(&normalized, &expected);

        normalized.clear();
        str_to_str(&strings[4], &mut input);
        str_to_str(&strings[4], &mut expected);
        assert!(nfkd
            .normalize_utf8_to(input.as_bytes(), &mut normalized)
            .is_ok());
        assert_eq!(&normalized, &expected);
    }
}

#[test]
fn test_canonical_composition() {
    let comp = CanonicalComposition::new();

    assert_eq!(comp.compose('a', 'b'), None); // Just two starters

    assert_eq!(comp.compose('a', '\u{0308}'), Some('ä'));
    assert_eq!(comp.compose('A', '\u{0308}'), Some('Ä'));
    assert_eq!(comp.compose('ẹ', '\u{0302}'), Some('ệ'));
    assert_eq!(comp.compose('Ẹ', '\u{0302}'), Some('Ệ'));
    assert_eq!(comp.compose('\u{1D157}', '\u{1D165}'), None); // Composition exclusion

    assert_eq!(comp.compose('ে', 'া'), Some('ো')); // Second is starter; BMP
    assert_eq!(comp.compose('��', '��'), Some('��')); // Second is starter; non-BMP

    assert_eq!(comp.compose('ᄀ', 'ᅡ'), Some('가')); // Hangul LV
    assert_eq!(comp.compose('가', 'ᆨ'), Some('각')); // Hangul LVT
}

#[test]
fn test_canonical_decomposition() {
    let decomp = CanonicalDecomposition::new();

    assert_eq!(
        decomp.decompose('ä'),
        Decomposed::Expansion('a', '\u{0308}')
    );
    assert_eq!(
        decomp.decompose('Ä'),
        Decomposed::Expansion('A', '\u{0308}')
    );
    assert_eq!(
        decomp.decompose('ệ'),
        Decomposed::Expansion('ẹ', '\u{0302}')
    );
    assert_eq!(
        decomp.decompose('Ệ'),
        Decomposed::Expansion('Ẹ', '\u{0302}')
    );
    assert_eq!(
        decomp.decompose('\u{1D15E}'),
        Decomposed::Expansion('\u{1D157}', '\u{1D165}')
    );
    assert_eq!(decomp.decompose('ো'), Decomposed::Expansion('ে', 'া'));
    assert_eq!(decomp.decompose('��'), Decomposed::Expansion('��', '��'));
    assert_eq!(decomp.decompose('가'), Decomposed::Expansion('ᄀ', 'ᅡ'));
    assert_eq!(decomp.decompose('각'), Decomposed::Expansion('가', 'ᆨ'));

    assert_eq!(decomp.decompose('\u{212B}'), Decomposed::Singleton('Å')); // ANGSTROM SIGN
    assert_eq!(decomp.decompose('\u{2126}'), Decomposed::Singleton('Ω')); // OHM SIGN

    assert_eq!(decomp.decompose('\u{1F71}'), Decomposed::Singleton('ά')); // oxia
    assert_eq!(
        decomp.decompose('\u{1F72}'),
        Decomposed::Expansion('ε', '\u{0300}')
    ); // not oxia but in the oxia range
    assert_eq!(
        decomp.decompose('ά'),
        Decomposed::Expansion('α', '\u{0301}')
    ); // tonos
}

#[test]
fn test_ccc() {
    let map = CanonicalCombiningClassMap::new();
    for u in 0..=0x10FFFF {
        assert_eq!(
            map.get32(u),
            icu_properties::maps::canonical_combining_class().get32(u)
        );
    }
}

#[test]
fn test_utf16_basic() {
    let normalizer: ComposingNormalizer = ComposingNormalizer::new_nfc();

    assert_eq!(
        normalizer.normalize_utf16(&[0x0061]).as_slice(),
        [0x0061].as_slice()
    );
    assert_eq!(
        normalizer.normalize_utf16(&[0x0300, 0x0323]).as_slice(),
        [0x0323, 0x0300].as_slice()
    );
}

#[test]
fn test_accented_digraph() {
    let normalizer: DecomposingNormalizer = DecomposingNormalizer::new_nfkd();
    assert_eq!(
        normalizer.normalize("\u{01C4}\u{0323}"),
        "DZ\u{0323}\u{030C}"
    );
    assert_eq!(
        normalizer.normalize("DZ\u{030C}\u{0323}"),
        "DZ\u{0323}\u{030C}"
    );
}

#[test]
fn test_ddd() {
    let normalizer: DecomposingNormalizer = DecomposingNormalizer::new_nfd();
    assert_eq!(
        normalizer.normalize("\u{0DDD}\u{0334}"),
        "\u{0DD9}\u{0DCF}\u{0334}\u{0DCA}"
    );
}

#[test]
fn test_is_normalized() {
    let nfd: DecomposingNormalizer = DecomposingNormalizer::new_nfd();
    let nfkd: DecomposingNormalizer = DecomposingNormalizer::new_nfkd();
    let nfc: ComposingNormalizer = ComposingNormalizer::new_nfc();
    let nfkc: ComposingNormalizer = ComposingNormalizer::new_nfkc();

    let aaa = "aaa";
    assert!(nfd.is_normalized(aaa));
    assert!(nfkd.is_normalized(aaa));
    assert!(nfc.is_normalized(aaa));
    assert!(nfkc.is_normalized(aaa));

    assert!(nfd.is_normalized_utf8(aaa.as_bytes()));
    assert!(nfkd.is_normalized_utf8(aaa.as_bytes()));
    assert!(nfc.is_normalized_utf8(aaa.as_bytes()));
    assert!(nfkc.is_normalized_utf8(aaa.as_bytes()));

    let aaa16 = [0x0061u16, 0x0061u16, 0x0061u16].as_slice();
    assert!(nfd.is_normalized_utf16(aaa16));
    assert!(nfkd.is_normalized_utf16(aaa16));
    assert!(nfc.is_normalized_utf16(aaa16));
    assert!(nfkc.is_normalized_utf16(aaa16));

    let affa = b"a\xFFa";
    assert!(nfd.is_normalized_utf8(affa));
    assert!(nfkd.is_normalized_utf8(affa));
    assert!(nfc.is_normalized_utf8(affa));
    assert!(nfkc.is_normalized_utf8(affa));

    let a_surrogate_a = [0x0061u16, 0xD800u16, 0x0061u16].as_slice();
    assert!(nfd.is_normalized_utf16(a_surrogate_a));
    assert!(nfkd.is_normalized_utf16(a_surrogate_a));
    assert!(nfc.is_normalized_utf16(a_surrogate_a));
    assert!(nfkc.is_normalized_utf16(a_surrogate_a));

    let note = "a��\u{1D165}a";
    assert!(nfd.is_normalized(note));
    assert!(nfkd.is_normalized(note));
    assert!(nfc.is_normalized(note));
    assert!(nfkc.is_normalized(note));

    assert!(nfd.is_normalized_utf8(note.as_bytes()));
    assert!(nfkd.is_normalized_utf8(note.as_bytes()));
    assert!(nfc.is_normalized_utf8(note.as_bytes()));
    assert!(nfkc.is_normalized_utf8(note.as_bytes()));

    let note16 = [
        0x0061u16, 0xD834u16, 0xDD57u16, 0xD834u16, 0xDD65u16, 0x0061u16,
    ]
    .as_slice();
    assert!(nfd.is_normalized_utf16(note16));
    assert!(nfkd.is_normalized_utf16(note16));
    assert!(nfc.is_normalized_utf16(note16));
    assert!(nfkc.is_normalized_utf16(note16));

    let umlaut = "aäa";
    assert!(!nfd.is_normalized(umlaut));
    assert!(!nfkd.is_normalized(umlaut));
    assert!(nfc.is_normalized(umlaut));
    assert!(nfkc.is_normalized(umlaut));

    assert!(!nfd.is_normalized_utf8(umlaut.as_bytes()));
    assert!(!nfkd.is_normalized_utf8(umlaut.as_bytes()));
    assert!(nfc.is_normalized_utf8(umlaut.as_bytes()));
    assert!(nfkc.is_normalized_utf8(umlaut.as_bytes()));

    let umlaut16 = [0x0061u16, 0x00E4u16, 0x0061u16].as_slice();
    assert!(!nfd.is_normalized_utf16(umlaut16));
    assert!(!nfkd.is_normalized_utf16(umlaut16));
    assert!(nfc.is_normalized_utf16(umlaut16));
    assert!(nfkc.is_normalized_utf16(umlaut16));

    let fraction = "a½a";
    assert!(nfd.is_normalized(fraction));
    assert!(!nfkd.is_normalized(fraction));
    assert!(nfc.is_normalized(fraction));
    assert!(!nfkc.is_normalized(fraction));

    assert!(nfd.is_normalized_utf8(fraction.as_bytes()));
    assert!(!nfkd.is_normalized_utf8(fraction.as_bytes()));
    assert!(nfc.is_normalized_utf8(fraction.as_bytes()));
    assert!(!nfkc.is_normalized_utf8(fraction.as_bytes()));

    let fraction16 = [0x0061u16, 0x00BDu16, 0x0061u16].as_slice();
    assert!(nfd.is_normalized_utf16(fraction16));
    assert!(!nfkd.is_normalized_utf16(fraction16));
    assert!(nfc.is_normalized_utf16(fraction16));
    assert!(!nfkc.is_normalized_utf16(fraction16));
}

Messung V0.5 in Prozent

¤ Dauer der Verarbeitung: 0.21 Sekunden (vorverarbeitet am 2026-06-20) ¤

Wurzel

Suchen

PVS Prover

Isabelle Prover

NIST Cobol Testsuite

Cephes Mathematical Library

Vienna Development Method

Haftungshinweis

Die Informationen auf dieser Webseite wurden nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit, noch Qualität der bereit gestellten Informationen zugesichert.

Bemerkung:

Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.