staticconststruct { /* UTF-8 strings in this vector _must_ be NULL-terminated. */ unsignedchar str[10]; unsignedchar dec[10];
} nfdi_test_data[] = { /* Trivial sequence */
{ /* "ABba" decomposes to itself */
.str = "aBba",
.dec = "aBba",
}, /* Simple equivalent sequences */
{ /* 'VULGAR FRACTION ONE QUARTER' cannot decompose to 'NUMBER 1' + 'FRACTION SLASH' + 'NUMBER 4' on
canonical decomposition */
.str = {0xc2, 0xbc, 0x00},
.dec = {0xc2, 0xbc, 0x00},
},
{ /* 'LATIN SMALL LETTER A WITH DIAERESIS' decomposes to
'LETTER A' + 'COMBINING DIAERESIS' */
.str = {0xc3, 0xa4, 0x00},
.dec = {0x61, 0xcc, 0x88, 0x00},
},
{ /* 'LATIN SMALL LETTER LJ' can't decompose to
'LETTER L' + 'LETTER J' on canonical decomposition */
.str = {0xC7, 0x89, 0x00},
.dec = {0xC7, 0x89, 0x00},
},
{ /* GREEK ANO TELEIA decomposes to MIDDLE DOT */
.str = {0xCE, 0x87, 0x00},
.dec = {0xC2, 0xB7, 0x00}
}, /* Canonical ordering */
{ /* A + 'COMBINING ACUTE ACCENT' + 'COMBINING OGONEK' decomposes
to A + 'COMBINING OGONEK' + 'COMBINING ACUTE ACCENT' */
.str = {0x41, 0xcc, 0x81, 0xcc, 0xa8, 0x0},
.dec = {0x41, 0xcc, 0xa8, 0xcc, 0x81, 0x0},
},
{ /* 'LATIN SMALL LETTER A WITH DIAERESIS' + 'COMBINING OGONEK' decomposes to
'LETTER A' + 'COMBINING OGONEK' + 'COMBINING DIAERESIS' */
.str = {0xc3, 0xa4, 0xCC, 0xA8, 0x00},
.dec = {0x61, 0xCC, 0xA8, 0xcc, 0x88, 0x00},
},
};
staticconststruct { /* UTF-8 strings in this vector _must_ be NULL-terminated. */ unsignedchar str[30]; unsignedchar ncf[30];
} nfdicf_test_data[] = { /* Trivial sequences */
{ /* "ABba" folds to lowercase */
.str = {0x41, 0x42, 0x62, 0x61, 0x00},
.ncf = {0x61, 0x62, 0x62, 0x61, 0x00},
},
{ /* All ASCII folds to lower-case */
.str = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0.1",
.ncf = "abcdefghijklmnopqrstuvwxyz0.1",
},
{ /* LATIN SMALL LETTER SHARP S folds to
LATIN SMALL LETTER S + LATIN SMALL LETTER S */
.str = {0xc3, 0x9f, 0x00},
.ncf = {0x73, 0x73, 0x00},
},
{ /* LATIN CAPITAL LETTER A WITH RING ABOVE folds to
LATIN SMALL LETTER A + COMBINING RING ABOVE */
.str = {0xC3, 0x85, 0x00},
.ncf = {0x61, 0xcc, 0x8a, 0x00},
}, /* Introduced by UTF-8.0.0. */ /* Cherokee letters are interesting test-cases because they fold to upper-case. Before 8.0.0, Cherokee lowercase were undefined, thus, the folding from LC is not stable between
7.0.0 -> 8.0.0, but it is from UC. */
{ /* CHEROKEE SMALL LETTER A folds to CHEROKEE LETTER A */
.str = {0xea, 0xad, 0xb0, 0x00},
.ncf = {0xe1, 0x8e, 0xa0, 0x00},
},
{ /* CHEROKEE SMALL LETTER YE folds to CHEROKEE LETTER YE */
.str = {0xe1, 0x8f, 0xb8, 0x00},
.ncf = {0xe1, 0x8f, 0xb0, 0x00},
},
{ /* OLD HUNGARIAN CAPITAL LETTER AMB folds to
OLD HUNGARIAN SMALL LETTER AMB */
.str = {0xf0, 0x90, 0xb2, 0x83, 0x00},
.ncf = {0xf0, 0x90, 0xb3, 0x83, 0x00},
}, /* Introduced by UTF-9.0.0. */
{ /* OSAGE CAPITAL LETTER CHA folds to
OSAGE SMALL LETTER CHA */
.str = {0xf0, 0x90, 0x92, 0xb5, 0x00},
.ncf = {0xf0, 0x90, 0x93, 0x9d, 0x00},
},
{ /* LATIN CAPITAL LETTER SMALL CAPITAL I folds to
LATIN LETTER SMALL CAPITAL I */
.str = {0xea, 0x9e, 0xae, 0x00},
.ncf = {0xc9, 0xaa, 0x00},
}, /* Introduced by UTF-11.0.0. */
{ /* GEORGIAN SMALL LETTER AN folds to GEORGIAN MTAVRULI
CAPITAL LETTER AN */
.str = {0xe1, 0xb2, 0x90, 0x00},
.ncf = {0xe1, 0x83, 0x90, 0x00},
}
};
static ssize_t utf8len(conststruct unicode_map *um, enum utf8_normalization n, constchar *s)
{ return utf8nlen(um, n, s, (size_t)-1);
}
staticint utf8cursor(struct utf8cursor *u8c, conststruct unicode_map *um, enum utf8_normalization n, constchar *s)
{ return utf8ncursor(u8c, um, n, s, (unsignedint)-1);
}
for (i = 0; i < ARRAY_SIZE(nfdi_test_data); i++) { int len = strlen(nfdi_test_data[i].str); int nlen = strlen(nfdi_test_data[i].dec); int j = 0; unsignedchar c; int ret;
for (i = 0; i < ARRAY_SIZE(nfdicf_test_data); i++) { int len = strlen(nfdicf_test_data[i].str); int nlen = strlen(nfdicf_test_data[i].ncf); int j = 0; int ret; unsignedchar c;
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.