Anforderungen  |   Konzepte  |   Entwurf  |   Entwicklung  |   Qualitätssicherung  |   Lebenszyklus  |   Steuerung
 
 
 
 


SSL spec_test.rs   Sprache: unbekannt

 
// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

use icu_segmenter::GraphemeClusterSegmenter;
use icu_segmenter::LineSegmenter;
use icu_segmenter::SentenceSegmenter;
use icu_segmenter::WordSegmenter;
use std::char;

struct TestContentIterator(core::str::Split<'static, char>);

struct TestData {
    original_line: &'static str,
    utf8_vec: Vec<char>,
    utf16_vec: Vec<u16>,
    latin1_vec: Vec<u8>,
    break_result_utf8: Vec<usize>,
    break_result_utf16: Vec<usize>,
    break_result_latin1: Option<Vec<usize>>,
}

impl TestContentIterator {
    pub fn new(file: &'static str) -> Self {
        Self(file.split('\n'))
    }
}

impl Iterator for TestContentIterator {
    type Item = TestData;

    fn next(&mut self) -> Option<Self::Item> {
        loop {
            let line = self.0.next()?;
            if line.is_empty() {
                // EOF
                return None;
            }
            if line.starts_with('#') {
                // Comment
                continue;
            }

            let mut r = line.split('#');
            let r = r.next();
            let v = r.unwrap().split_ascii_whitespace();
            let mut char_break: Vec<_> = Vec::new();
            let mut u8_break: Vec<_> = Vec::new();
            let mut u16_break: Vec<_> = Vec::new();
            let mut char_vec: Vec<_> = Vec::new();
            let mut u8_vec: Vec<_> = Vec::new();
            let mut u16_vec: Vec<_> = Vec::new();

            let mut char_len = 0;
            let mut u8_len = 0;
            let mut u16_len = 0;

            let mut ascii_only = true;
            for (count, item) in v.enumerate() {
                if count % 2 == 1 {
                    let ch = char::from_u32(u32::from_str_radix(item, 16).unwrap()).unwrap();
                    char_vec.push(ch);
                    char_len += ch.len_utf8();

                    if ch as u32 >= 0x100 {
                        ascii_only = false;
                    } else {
                        u8_vec.push(ch as u8);
                        u8_len += 1;
                    }

                    let mut u16_buf = [0; 2];
                    let ch_u16 = ch.encode_utf16(&mut u16_buf);
                    u16_vec.extend_from_slice(ch_u16);
                    u16_len += ch_u16.len();
                } else if item != "\u{00d7}" {
                    assert_eq!(item, "\u{00f7}");
                    char_break.push(char_len);
                    u8_break.push(u8_len);
                    u16_break.push(u16_len);
                }
            }
            return Some(Self::Item {
                original_line: line,
                utf8_vec: char_vec,
                utf16_vec: u16_vec,
                latin1_vec: u8_vec,
                break_result_utf8: char_break,
                break_result_utf16: u16_break,
                break_result_latin1: if ascii_only { Some(u8_break) } else { None },
            });
        }
    }
}

fn line_break_test(file: &'static str) {
    let test_iter = TestContentIterator::new(file);
    let segmenter = LineSegmenter::new_dictionary();
    for (i, mut test) in test_iter.enumerate() {
        let s: String = test.utf8_vec.into_iter().collect();
        let iter = segmenter.segment_str(&s);
        let result: Vec<usize> = iter.collect();
        // NOTE: For consistency with ICU4C and other Segmenters, we return a breakpoint at
        // index 0, despite UAX #14 suggesting otherwise. See issue #3283.
        if test.break_result_utf8.first() != Some(&0) {
            test.break_result_utf8.insert(0, 0);
        }
        if result != test.break_result_utf8 {
            let lb = icu::properties::maps::line_break();
            let lb_name = icu::properties::LineBreak::enum_to_long_name_mapper();
            let mut iter = segmenter.segment_str(&s);
            // TODO(egg): It would be really nice to have Name here.
            println!("  | A | E | Code pt. | Line_Break     | Literal");
            for (i, c) in s.char_indices() {
                let expected_break = test.break_result_utf8.contains(&i);
                let actual_break = result.contains(&i);
                if actual_break {
                    iter.next();
                }
                println!(
                    "{}| {} | {} | {:>8} | {:>18} | {}",
                    if actual_break != expected_break {
                        "��"
                    } else {
                        "  "
                    },
                    if actual_break { "÷" } else { "×" },
                    if expected_break { "÷" } else { "×" },
                    format!("{:04X}", c as u32),
                    lb_name
                        .get(lb.get(c))
                        .unwrap_or(&format!("{:?}", lb.get(c))),
                    c
                )
            }
            println!("Test case #{}", i);
            panic!()
        }

        let iter = segmenter.segment_utf16(&test.utf16_vec);
        let result: Vec<usize> = iter.collect();
        if test.break_result_utf16.first() != Some(&0) {
            test.break_result_utf16.insert(0, 0);
        }
        assert_eq!(
            result, test.break_result_utf16,
            "UTF16: {}",
            test.original_line
        );

        // Test data is Latin-1 character only, it can run for Latin-1 segmenter test.
        if let Some(mut break_result_latin1) = test.break_result_latin1 {
            let iter = segmenter.segment_latin1(&test.latin1_vec);
            if break_result_latin1.first() != Some(&0) {
                break_result_latin1.insert(0, 0);
            }
            let result: Vec<usize> = iter.collect();
            assert_eq!(
                result, break_result_latin1,
                "Latin1: {}",
                test.original_line
            );
        }
    }
}

#[test]
fn run_line_break_test() {
    line_break_test(include_str!("testdata/LineBreakTest.txt"));
}

#[test]
fn run_line_break_extra_test() {
    line_break_test(include_str!("testdata/LineBreakExtraTest.txt"));
}

fn word_break_test(file: &'static str) {
    let test_iter = TestContentIterator::new(file);
    let segmenter = WordSegmenter::new_dictionary();
    for (i, test) in test_iter.enumerate() {
        let s: String = test.utf8_vec.into_iter().collect();
        let iter = segmenter.segment_str(&s);
        let result: Vec<usize> = iter.collect();
        if result != test.break_result_utf8 {
            let wb = icu::properties::maps::word_break();
            let wb_name = icu::properties::WordBreak::enum_to_long_name_mapper();
            let mut iter = segmenter.segment_str(&s);
            // TODO(egg): It would be really nice to have Name here.
            println!("  | A | E | Code pt. |   Word_Break   | State | Literal");
            for (i, c) in s.char_indices() {
                let expected_break = test.break_result_utf8.contains(&i);
                let actual_break = result.contains(&i);
                if actual_break {
                    iter.next();
                }
                println!(
                    "{}| {} | {} | {:>8} | {:>14} | {} | {}",
                    if actual_break != expected_break {
                        "��"
                    } else {
                        "  "
                    },
                    if actual_break { "÷" } else { "×" },
                    if expected_break { "÷" } else { "×" },
                    format!("{:04X}", c as u32),
                    wb_name
                        .get(wb.get(c))
                        .unwrap_or(&format!("{:?}", wb.get(c))),
                    // Placeholder for logging the state if exposed.
                    // Not "?????" to hide from clippy.
                    "?".repeat(5),
                    c
                )
            }
            println!("Test case #{}", i);
            panic!()
        }

        let iter = segmenter.segment_utf16(&test.utf16_vec);
        let result: Vec<usize> = iter.collect();
        assert_eq!(
            result, test.break_result_utf16,
            "UTF16: {}",
            test.original_line
        );

        // Test data is Latin-1 character only, it can run for Latin-1 segmenter test.
        if let Some(break_result_latin1) = test.break_result_latin1 {
            let iter = segmenter.segment_latin1(&test.latin1_vec);
            let result: Vec<usize> = iter.collect();
            assert_eq!(
                result, break_result_latin1,
                "Latin1: {}",
                test.original_line
            );
        }
    }
}

#[test]
fn run_word_break_test() {
    word_break_test(include_str!("testdata/WordBreakTest.txt"));
}

#[test]
fn run_word_break_extra_test() {
    word_break_test(include_str!("testdata/WordBreakExtraTest.txt"));
}

fn grapheme_break_test(file: &'static str) {
    let test_iter = TestContentIterator::new(file);
    let segmenter = GraphemeClusterSegmenter::new();
    for (i, test) in test_iter.enumerate() {
        let s: String = test.utf8_vec.into_iter().collect();
        let iter = segmenter.segment_str(&s);
        let result: Vec<usize> = iter.collect();
        if result != test.break_result_utf8 {
            let gcb = icu::properties::maps::grapheme_cluster_break();
            let gcb_name = icu::properties::GraphemeClusterBreak::enum_to_long_name_mapper();
            let mut iter = segmenter.segment_str(&s);
            // TODO(egg): It would be really nice to have Name here.
            println!("  | A | E | Code pt. |            GCB | State | Literal");
            for (i, c) in s.char_indices() {
                let expected_break = test.break_result_utf8.contains(&i);
                let actual_break = result.contains(&i);
                if actual_break {
                    iter.next();
                }
                println!(
                    "{}| {} | {} | {:>8} | {:>14} | {} | {}",
                    if actual_break != expected_break {
                        "��"
                    } else {
                        "  "
                    },
                    if actual_break { "÷" } else { "×" },
                    if expected_break { "÷" } else { "×" },
                    format!("{:04X}", c as u32),
                    gcb_name
                        .get(gcb.get(c))
                        .unwrap_or(&format!("{:?}", gcb.get(c))),
                    // Placeholder for logging the state if exposed.
                    // Not "?????" to hide from clippy.
                    "?".repeat(5),
                    c
                )
            }
            println!("Test case #{}", i);
            panic!()
        }

        let iter = segmenter.segment_utf16(&test.utf16_vec);
        let result: Vec<usize> = iter.collect();
        assert_eq!(
            result, test.break_result_utf16,
            "UTF16: {}",
            test.original_line
        );

        // Test data is Latin-1 character only, it can run for Latin-1 segmenter test.
        if let Some(break_result_latin1) = test.break_result_latin1 {
            let iter = segmenter.segment_latin1(&test.latin1_vec);
            let result: Vec<usize> = iter.collect();
            assert_eq!(
                result, break_result_latin1,
                "Latin1: {}",
                test.original_line
            );
        }
    }
}

#[test]
fn run_grapheme_break_test() {
    grapheme_break_test(include_str!("testdata/GraphemeBreakTest.txt"));
}

#[test]
fn run_grapheme_break_extra_test() {
    grapheme_break_test(include_str!("testdata/GraphemeBreakExtraTest.txt"));
}

fn sentence_break_test(file: &'static str) {
    let test_iter = TestContentIterator::new(file);
    let segmenter = SentenceSegmenter::new();
    for (i, test) in test_iter.enumerate() {
        let s: String = test.utf8_vec.into_iter().collect();
        let iter = segmenter.segment_str(&s);
        let result: Vec<usize> = iter.collect();
        if result != test.break_result_utf8 {
            let sb = icu::properties::maps::sentence_break();
            let sb_name = icu::properties::SentenceBreak::enum_to_long_name_mapper();
            let mut iter = segmenter.segment_str(&s);
            // TODO(egg): It would be really nice to have Name here.
            println!("  | A | E | Code pt. | Sentence_Break | State | Literal");
            for (i, c) in s.char_indices() {
                let expected_break = test.break_result_utf8.contains(&i);
                let actual_break = result.contains(&i);
                if actual_break {
                    iter.next();
                }
                println!(
                    "{}| {} | {} | {:>8} | {:>14} | {} | {}",
                    if actual_break != expected_break {
                        "��"
                    } else {
                        "  "
                    },
                    if actual_break { "÷" } else { "×" },
                    if expected_break { "÷" } else { "×" },
                    format!("{:04X}", c as u32),
                    sb_name
                        .get(sb.get(c))
                        .unwrap_or(&format!("{:?}", sb.get(c))),
                    // Placeholder for logging the state if exposed.
                    // Not "?????" to hide from clippy.
                    "?".repeat(5),
                    c
                )
            }
            println!("Test case #{}", i);
            panic!()
        }

        let iter = segmenter.segment_utf16(&test.utf16_vec);
        let result: Vec<usize> = iter.collect();
        assert_eq!(
            result, test.break_result_utf16,
            "UTF16: {}",
            test.original_line
        );

        // Test data is Latin-1 character only, it can run for Latin-1 segmenter test.
        if let Some(break_result_latin1) = test.break_result_latin1 {
            let iter = segmenter.segment_latin1(&test.latin1_vec);
            let result: Vec<usize> = iter.collect();
            assert_eq!(
                result, break_result_latin1,
                "Latin1: {}",
                test.original_line
            );
        }
    }
}

#[test]
fn run_sentence_break_test() {
    sentence_break_test(include_str!("testdata/SentenceBreakTest.txt"));
}

#[test]
fn run_sentence_break_extra_test() {
    sentence_break_test(include_str!("testdata/SentenceBreakExtraTest.txt"));
}

#[test]
fn run_sentence_break_random_test() {
    sentence_break_test(include_str!("testdata/SentenceBreakRandomTest.txt"));
}

[ Verzeichnis aufwärts0.51unsichere Verbindung  Übersetzung europäischer Sprachen durch Browser  ]

                                                                                                                                                                                                                                                                                                                                                                                                     


Neuigkeiten

     Aktuelles
     Motto des Tages

Software

     Produkte
     Quellcodebibliothek

Aktivitäten

     Artikel über Sicherheit
     Anleitung zur Aktivierung von SSL

Muße

     Gedichte
     Musik
     Bilder

Jenseits des Üblichen ....
    

Besucherstatistik

Besucherstatistik

Monitoring

Montastic status badge