Anforderungen  |   Konzepte  |   Entwurf  |   Entwicklung  |   Qualitätssicherung  |   Lebenszyklus  |   Steuerung
 
 
 
 


Quelle  cpt.rs   Sprache: unbekannt

 
// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

use icu_collections::codepointtrie::planes::get_planes_trie;
use icu_collections::codepointtrie::Error;
use icu_collections::codepointtrie::*;
use zerovec::ZeroVec;

#[test]
fn planes_trie_deserialize_check_test() {
    // Get expected planes trie from crate::planes::get_planes_trie()

    let exp_planes_trie = get_planes_trie();

    // Compute actual planes trie from planes.toml

    let planes_enum_prop =
        ::toml::from_str::<UnicodeEnumeratedProperty>(include_str!("data/cpt/planes.toml"))
            .unwrap();

    let code_point_trie_struct = planes_enum_prop.code_point_trie.trie_struct;

    let trie_header = CodePointTrieHeader {
        high_start: code_point_trie_struct.high_start,
        shifted12_high_start: code_point_trie_struct.shifted12_high_start,
        index3_null_offset: code_point_trie_struct.index3_null_offset,
        data_null_offset: code_point_trie_struct.data_null_offset,
        null_value: code_point_trie_struct.null_value,
        trie_type: TrieType::try_from(code_point_trie_struct.trie_type_enum_val).unwrap_or_else(
            |_| {
                panic!(
                    "Could not parse trie_type serialized enum value in test data file: {}",
                    code_point_trie_struct.name
                )
            },
        ),
    };

    let data = ZeroVec::from_slice_or_alloc(code_point_trie_struct.data_8.as_ref().unwrap());
    let index = ZeroVec::from_slice_or_alloc(&code_point_trie_struct.index);
    let trie_result: Result<CodePointTrie<u8>, Error> =
        CodePointTrie::try_new(trie_header, index, data);
    let act_planes_trie = trie_result.unwrap();

    // Get check ranges (inversion map-style sequence of range+value) and
    // apply the trie validation test fn on expected and actual tries

    let serialized_ranges: Vec<(u32, u32, u32)> = planes_enum_prop.code_point_map.data.ranges;
    let mut check_ranges: Vec<u32> = vec![];
    for range_tuple in serialized_ranges {
        let range_end = range_tuple.1 + 1;
        let value = range_tuple.2;
        check_ranges.push(range_end);
        check_ranges.push(value);
    }

    check_trie(&act_planes_trie, &check_ranges);
    check_trie(&exp_planes_trie, &check_ranges);
}

#[test]
fn free_blocks_16() {
    run_deserialize_test_from_test_data(include_str!("data/cpt/free-blocks.16.toml"));
}

#[test]
fn free_blocks_32() {
    run_deserialize_test_from_test_data(include_str!("data/cpt/free-blocks.32.toml"));
}

#[test]
fn free_blocks_8() {
    run_deserialize_test_from_test_data(include_str!("data/cpt/free-blocks.8.toml"));
}

#[test]
fn free_blocks_small16() {
    run_deserialize_test_from_test_data(include_str!("data/cpt/free-blocks.small16.toml"));
}

#[test]
fn grow_data_16() {
    run_deserialize_test_from_test_data(include_str!("data/cpt/grow-data.16.toml"));
}

#[test]
fn grow_data_32() {
    run_deserialize_test_from_test_data(include_str!("data/cpt/grow-data.32.toml"));
}

#[test]
fn grow_data_8() {
    run_deserialize_test_from_test_data(include_str!("data/cpt/grow-data.8.toml"));
}

#[test]
fn grow_data_small16() {
    run_deserialize_test_from_test_data(include_str!("data/cpt/grow-data.small16.toml"));
}

#[test]
fn set1_16() {
    run_deserialize_test_from_test_data(include_str!("data/cpt/set1.16.toml"));
}

#[test]
fn set1_32() {
    run_deserialize_test_from_test_data(include_str!("data/cpt/set1.32.toml"));
}

#[test]
fn set1_8() {
    run_deserialize_test_from_test_data(include_str!("data/cpt/set1.8.toml"));
}

#[test]
fn set1_small16() {
    run_deserialize_test_from_test_data(include_str!("data/cpt/set1.small16.toml"));
}

#[test]
fn set2_overlap_16() {
    run_deserialize_test_from_test_data(include_str!("data/cpt/set2-overlap.16.toml"));
}

#[test]
fn set2_overlap_32() {
    run_deserialize_test_from_test_data(include_str!("data/cpt/set2-overlap.32.toml"));
}

#[test]
fn set2_overlap_small16() {
    run_deserialize_test_from_test_data(include_str!("data/cpt/set2-overlap.small16.toml"));
}

#[test]
fn set3_initial_9_16() {
    run_deserialize_test_from_test_data(include_str!("data/cpt/set3-initial-9.16.toml"));
}

#[test]
fn set3_initial_9_32() {
    run_deserialize_test_from_test_data(include_str!("data/cpt/set3-initial-9.32.toml"));
}

#[test]
fn set3_initial_9_8() {
    run_deserialize_test_from_test_data(include_str!("data/cpt/set3-initial-9.8.toml"));
}

#[test]
fn set3_initial_9_small16() {
    run_deserialize_test_from_test_data(include_str!("data/cpt/set3-initial-9.small16.toml"));
}

#[test]
fn set_empty_16() {
    run_deserialize_test_from_test_data(include_str!("data/cpt/set-empty.16.toml"));
}

#[test]
fn set_empty_32() {
    run_deserialize_test_from_test_data(include_str!("data/cpt/set-empty.32.toml"));
}

#[test]
fn set_empty_8() {
    run_deserialize_test_from_test_data(include_str!("data/cpt/set-empty.8.toml"));
}

#[test]
fn set_empty_small16() {
    run_deserialize_test_from_test_data(include_str!("data/cpt/set-empty.small16.toml"));
}

#[test]
fn set_single_value_16() {
    run_deserialize_test_from_test_data(include_str!("data/cpt/set-single-value.16.toml"));
}

#[test]
fn set_single_value_32() {
    run_deserialize_test_from_test_data(include_str!("data/cpt/set-single-value.32.toml"));
}

#[test]
fn set_single_value_8() {
    run_deserialize_test_from_test_data(include_str!("data/cpt/set-single-value.8.toml"));
}

#[test]
fn set_single_value_small16() {
    run_deserialize_test_from_test_data(include_str!("data/cpt/set-single-value.small16.toml"));
}

#[test]
fn short_all_same_16() {
    run_deserialize_test_from_test_data(include_str!("data/cpt/short-all-same.16.toml"));
}

#[test]
fn short_all_same_8() {
    run_deserialize_test_from_test_data(include_str!("data/cpt/short-all-same.8.toml"));
}

#[test]
fn short_all_same_small16() {
    run_deserialize_test_from_test_data(include_str!("data/cpt/short-all-same.small16.toml"));
}

#[test]
fn small0_in_fast_16() {
    run_deserialize_test_from_test_data(include_str!("data/cpt/small0-in-fast.16.toml"));
}

#[test]
fn small0_in_fast_32() {
    run_deserialize_test_from_test_data(include_str!("data/cpt/small0-in-fast.32.toml"));
}

#[test]
fn small0_in_fast_8() {
    run_deserialize_test_from_test_data(include_str!("data/cpt/small0-in-fast.8.toml"));
}

#[test]
fn small0_in_fast_small16() {
    run_deserialize_test_from_test_data(include_str!("data/cpt/small0-in-fast.small16.toml"));
}

/// The width of the elements in the data array of a [`CodePointTrie`].
/// See [`UCPTrieValueWidth`](https://unicode-org.github.io/icu-docs/apidoc/dev/icu4c/ucptrie_8h.html) in ICU4C.
#[derive(Clone, Copy, PartialEq)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub enum ValueWidthEnum {
    Bits16 = 0,
    Bits32 = 1,
    Bits8 = 2,
}

/// Test .get() on CodePointTrie by iterating through each range in
/// check_ranges and assert that the associated
/// value matches the trie value for each code point in the range.
pub fn check_trie<T: TrieValue + Into<u32>>(trie: &CodePointTrie<T>, check_ranges: &[u32]) {
    assert_eq!(
        0,
        check_ranges.len() % 2,
        "check_ranges must have an even number of 32-bit values in (limit,value) pairs"
    );

    let mut i: u32 = 0;
    let check_range_tuples = check_ranges.chunks(2);
    // Iterate over each check range
    for range_tuple in check_range_tuples {
        let range_limit = range_tuple[0];
        let range_value = range_tuple[1];
        // Check all values in this range, one-by-one
        while i < range_limit {
            assert_eq!(range_value, trie.get32(i).into(), "trie_get({})", i,);
            i += 1;
        }
    }
}

/// Test .get_range() / .iter_ranges() on CodePointTrie by calling
/// .iter_ranges() on the trie (which returns an iterator that produces values
/// by calls to .get_range) and see if it matches the values in check_ranges.
pub fn test_check_ranges_get_ranges<T: TrieValue + Into<u32>>(
    trie: &CodePointTrie<T>,
    check_ranges: &[u32],
) {
    assert_eq!(
        0,
        check_ranges.len() % 2,
        "check_ranges must have an even number of 32-bit values in (limit,value) pairs"
    );

    let mut trie_ranges = trie.iter_ranges();

    let mut range_start: u32 = 0;
    let check_range_tuples = check_ranges.chunks(2);
    // Iterate over each check range
    for range_tuple in check_range_tuples {
        let range_limit = range_tuple[0];
        let range_value = range_tuple[1];

        // The check ranges array seems to start with a trivial range whose
        // limit is zero. range_start is initialized to 0, so we can skip.
        if range_limit == 0 {
            continue;
        }

        let cpm_range = trie_ranges.next();
        assert!(cpm_range.is_some(), "CodePointTrie iter_ranges() produces fewer ranges than the check_ranges field in testdata has");
        let cpm_range = cpm_range.unwrap();
        let cpmr_start = cpm_range.range.start();
        let cpmr_end = cpm_range.range.end();
        let cpmr_value: u32 = cpm_range.value.into();

        assert_eq!(range_start, *cpmr_start);
        assert_eq!(range_limit, *cpmr_end + 1);
        assert_eq!(range_value, cpmr_value);

        range_start = range_limit;
    }

    assert!(trie_ranges.next().is_none(), "CodePointTrie iter_ranges() produces more ranges than the check_ranges field in testdata has");
}

/// Run above tests that verify the validity of CodePointTrie methods
pub fn run_trie_tests<T: TrieValue + Into<u32>>(trie: &CodePointTrie<T>, check_ranges: &[u32]) {
    check_trie(trie, check_ranges);
    test_check_ranges_get_ranges(trie, check_ranges);
}

// The following structs might be useful later for de-/serialization of the
// main `CodePointTrie` struct in the corresponding data provider.

#[cfg_attr(any(feature = "serde", test), derive(serde::Deserialize))]
pub struct UnicodeEnumeratedProperty {
    pub code_point_map: EnumPropCodePointMap,
    pub code_point_trie: EnumPropSerializedCPT,
}

#[cfg_attr(any(feature = "serde", test), derive(serde::Deserialize))]
pub struct EnumPropCodePointMap {
    pub data: EnumPropCodePointMapData,
}

#[cfg_attr(any(feature = "serde", test), derive(serde::Deserialize))]
pub struct EnumPropCodePointMapData {
    pub long_name: String,
    pub name: String,
    pub ranges: Vec<(u32, u32, u32)>,
}

#[allow(clippy::upper_case_acronyms)]
#[cfg_attr(any(feature = "serde", test), derive(serde::Deserialize))]
pub struct EnumPropSerializedCPT {
    #[cfg_attr(any(feature = "serde", test), serde(rename = "struct"))]
    pub trie_struct: EnumPropSerializedCPTStruct,
}

// These structs support the test data dumped as TOML files from ICU.
// Because the properties CodePointMap data will also be dumped from ICU
// using similar functions, some of these structs may be useful to refactor
// into main code at a later point.

#[allow(clippy::upper_case_acronyms)]
#[cfg_attr(any(feature = "serde", test), derive(serde::Deserialize))]
pub struct EnumPropSerializedCPTStruct {
    #[cfg_attr(any(feature = "serde", test), serde(skip))]
    pub long_name: String,
    pub name: String,
    pub index: Vec<u16>,
    pub data_8: Option<Vec<u8>>,
    pub data_16: Option<Vec<u16>>,
    pub data_32: Option<Vec<u32>>,
    #[cfg_attr(any(feature = "serde", test), serde(skip))]
    pub index_length: u32,
    #[cfg_attr(any(feature = "serde", test), serde(skip))]
    pub data_length: u32,
    #[cfg_attr(any(feature = "serde", test), serde(rename = "highStart"))]
    pub high_start: u32,
    #[cfg_attr(any(feature = "serde", test), serde(rename = "shifted12HighStart"))]
    pub shifted12_high_start: u16,
    #[cfg_attr(any(feature = "serde", test), serde(rename = "type"))]
    pub trie_type_enum_val: u8,
    #[cfg_attr(any(feature = "serde", test), serde(rename = "valueWidth"))]
    pub value_width_enum_val: u8,
    #[cfg_attr(any(feature = "serde", test), serde(rename = "index3NullOffset"))]
    pub index3_null_offset: u16,
    #[cfg_attr(any(feature = "serde", test), serde(rename = "dataNullOffset"))]
    pub data_null_offset: u32,
    #[cfg_attr(any(feature = "serde", test), serde(rename = "nullValue"))]
    pub null_value: u32,
}

// Given a .toml file dumped from ICU4C test data for UCPTrie, run the test
// data file deserialization into the test file struct, convert and construct
// the `CodePointTrie`, and test the constructed struct against the test file's
// "check ranges" (inversion map ranges) using `check_trie` to verify the
// validity of the `CodePointTrie`'s behavior for all code points.
#[allow(dead_code)]
pub fn run_deserialize_test_from_test_data(test_file: &str) {
    // The following structs are specific to the TOML format files for dumped ICU
    // test data.

    #[derive(serde::Deserialize)]
    pub struct TestFile {
        code_point_trie: TestCodePointTrie,
    }

    #[derive(serde::Deserialize)]
    pub struct TestCodePointTrie {
        // The trie_struct field for test data files is dumped from the same source
        // (ICU4C) using the same function (usrc_writeUCPTrie) as property data
        // for the provider, so we can reuse the same struct here.
        #[serde(rename(deserialize = "struct"))]
        trie_struct: EnumPropSerializedCPTStruct,
        #[serde(rename(deserialize = "testdata"))]
        test_data: TestData,
    }

    #[derive(serde::Deserialize)]
    pub struct TestData {
        #[serde(rename(deserialize = "checkRanges"))]
        check_ranges: Vec<u32>,
    }

    let test_file = ::toml::from_str::<TestFile>(test_file).unwrap();

    let test_struct = test_file.code_point_trie.trie_struct;

    println!(
        "Running CodePointTrie reader logic test on test data file: {}",
        test_struct.name
    );

    let trie_type_enum = match TrieType::try_from(test_struct.trie_type_enum_val) {
        Ok(enum_val) => enum_val,
        _ => {
            panic!(
                "Could not parse trie_type serialized enum value in test data file: {}",
                test_struct.name
            );
        }
    };

    let trie_header = CodePointTrieHeader {
        high_start: test_struct.high_start,
        shifted12_high_start: test_struct.shifted12_high_start,
        index3_null_offset: test_struct.index3_null_offset,
        data_null_offset: test_struct.data_null_offset,
        null_value: test_struct.null_value,
        trie_type: trie_type_enum,
    };

    let index = ZeroVec::from_slice_or_alloc(&test_struct.index);

    match (test_struct.data_8, test_struct.data_16, test_struct.data_32) {
        (Some(data_8), _, _) => {
            let data = ZeroVec::from_slice_or_alloc(&data_8);
            let trie_result: Result<CodePointTrie<u8>, Error> =
                CodePointTrie::try_new(trie_header, index, data);
            assert!(trie_result.is_ok(), "Could not construct trie");
            assert_eq!(
                test_struct.value_width_enum_val,
                ValueWidthEnum::Bits8 as u8
            );
            run_trie_tests(
                &trie_result.unwrap(),
                &test_file.code_point_trie.test_data.check_ranges,
            );
        }

        (_, Some(data_16), _) => {
            let data = ZeroVec::from_slice_or_alloc(&data_16);
            let trie_result: Result<CodePointTrie<u16>, Error> =
                CodePointTrie::try_new(trie_header, index, data);
            assert!(trie_result.is_ok(), "Could not construct trie");
            assert_eq!(
                test_struct.value_width_enum_val,
                ValueWidthEnum::Bits16 as u8
            );
            run_trie_tests(
                &trie_result.unwrap(),
                &test_file.code_point_trie.test_data.check_ranges,
            );
        }

        (_, _, Some(data_32)) => {
            let data = ZeroVec::from_slice_or_alloc(&data_32);
            let trie_result: Result<CodePointTrie<u32>, Error> =
                CodePointTrie::try_new(trie_header, index, data);
            assert!(trie_result.is_ok(), "Could not construct trie");
            assert_eq!(
                test_struct.value_width_enum_val,
                ValueWidthEnum::Bits32 as u8
            );
            run_trie_tests(
                &trie_result.unwrap(),
                &test_file.code_point_trie.test_data.check_ranges,
            );
        }

        (_, _, _) => {
            panic!("Could not match test trie data to a known value width or trie type");
        }
    };
}

[ Dauer der Verarbeitung: 0.27 Sekunden  (vorverarbeitet)  ]

                                                                                                                                                                                                                                                                                                                                                                                                     


Neuigkeiten

     Aktuelles
     Motto des Tages

Software

     Produkte
     Quellcodebibliothek

Aktivitäten

     Artikel über Sicherheit
     Anleitung zur Aktivierung von SSL

Muße

     Gedichte
     Musik
     Bilder

Jenseits des Üblichen ....
    

Besucherstatistik

Besucherstatistik

Monitoring

Montastic status badge