/* check data format and format version */ const UDataInfo *pInfo= reinterpret_cast<const UDataInfo *>( static_cast<constchar *>(inData)+4); if(!(
pInfo->dataFormat[0]==0x70 && /* dataFormat="pnam" */
pInfo->dataFormat[1]==0x6e &&
pInfo->dataFormat[2]==0x61 &&
pInfo->dataFormat[3]==0x6d &&
pInfo->formatVersion[0]==2
)) {
udata_printError(ds, "upname_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as pnames.icu\n",
pInfo->dataFormat[0], pInfo->dataFormat[1],
pInfo->dataFormat[2], pInfo->dataFormat[3],
pInfo->formatVersion[0]);
*pErrorCode=U_UNSUPPORTED_ERROR; return 0;
}
// Swap the indexes[] and the valueMaps[].
ds->swapArray32(ds, inBytes, numBytesIndexesAndValueMaps, outBytes, pErrorCode);
// Copy the rest of the data. if(inBytes!=outBytes) {
uprv_memcpy(outBytes+numBytesIndexesAndValueMaps,
inBytes+numBytesIndexesAndValueMaps,
totalSize-numBytesIndexesAndValueMaps);
}
// We need not swap anything else: // // The ByteTries are already byte-serialized, and are fixed on ASCII. // (On an EBCDIC machine, the input string is converted to lowercase ASCII // while matching.) // // The name groups are mostly invariant characters, but since we only // generate, and keep in subversion, ASCII versions of pnames.icu, // and since only ICU4J uses the pnames.icu data file // (the data is hardcoded in ICU4C) and ICU4J uses ASCII data files, // we just copy those bytes too.
}
return headerSize+totalSize;
}
/* Unicode properties data swapping ----------------------------------------- */
/* check data format and format version */
pInfo = reinterpret_cast<const UDataInfo*>(static_cast<constchar*>(inData) + 4); if(!(
pInfo->dataFormat[0]==0x55 && /* dataFormat="UPro" */
pInfo->dataFormat[1]==0x50 &&
pInfo->dataFormat[2]==0x72 &&
pInfo->dataFormat[3]==0x6f &&
(3<=pInfo->formatVersion[0] && pInfo->formatVersion[0]<=9) &&
(pInfo->formatVersion[0]>=7 ||
(pInfo->formatVersion[2]==UTRIE_SHIFT &&
pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT))
)) {
udata_printError(ds, "uprops_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not a Unicode properties file\n",
pInfo->dataFormat[0], pInfo->dataFormat[1],
pInfo->dataFormat[2], pInfo->dataFormat[3],
pInfo->formatVersion[0]);
*pErrorCode=U_UNSUPPORTED_ERROR; return 0;
}
/* the properties file must contain at least the indexes array */ if (length >= 0 && (length - headerSize) < static_cast<int32_t>(sizeof(dataIndexes))) {
udata_printError(ds, "uprops_swap(): too few bytes (%d after header) for a Unicode properties file\n",
length-headerSize);
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0;
}
/* * comments are copied from the data format description in genprops/store.c * indexes[] constants are in uprops.h
*/
int32_t dataTop; if(length>=0) {
int32_t *outData32;
/* * In formatVersion 7, UPROPS_DATA_TOP_INDEX has the post-header data size. * In earlier formatVersions, it is 0 and a lower dataIndexes entry * has the top of the last item.
*/ for(i=UPROPS_DATA_TOP_INDEX; i>0 && (dataTop=dataIndexes[i])==0; --i) {}
if((length-headerSize)<(4*dataTop)) {
udata_printError(ds, "uprops_swap(): too few bytes (%d after header) for a Unicode properties file\n",
length-headerSize);
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0;
}
/* copy everything for inaccessible data (padding) */ if(inData32!=outData32) {
uprv_memcpy(outData32, inData32, 4*(size_t)dataTop);
}
/* swap the indexes[16] */
ds->swapArray32(ds, inData32, 4*UPROPS_INDEX_COUNT, outData32, pErrorCode);
/* * swap the main properties UTrie * PT serialized properties trie, see utrie.h (byte size: 4*(i0-16))
*/
utrie_swapAnyVersion(ds,
inData32+UPROPS_INDEX_COUNT,
4*(dataIndexes[UPROPS_PROPS32_INDEX]-UPROPS_INDEX_COUNT),
outData32+UPROPS_INDEX_COUNT,
pErrorCode);
/* * swap the properties and exceptions words * P const uint32_t props32[i1-i0]; * E const uint32_t exceptions[i2-i1];
*/
ds->swapArray32(ds,
inData32+dataIndexes[UPROPS_PROPS32_INDEX],
4*(dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX]-dataIndexes[UPROPS_PROPS32_INDEX]),
outData32+dataIndexes[UPROPS_PROPS32_INDEX],
pErrorCode);
/* * swap the UChars * U const char16_t uchars[2*(i3-i2)];
*/
ds->swapArray16(ds,
inData32+dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX],
4*(dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX]-dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX]),
outData32+dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX],
pErrorCode);
/* * swap the additional UTrie * i3 additionalTrieIndex; -- 32-bit unit index to the additional trie for more properties
*/
utrie_swapAnyVersion(ds,
inData32+dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX],
4*(dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX]-dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX]),
outData32+dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX],
pErrorCode);
if(length>=0) {
length-=headerSize; if(length<16*4) {
udata_printError(ds, "ucase_swap(): too few bytes (%d after header) for case mapping data\n",
length);
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0;
}
}
/* read the first 16 indexes (ICU 3.2/format version 1: UCASE_IX_TOP==16, might grow) */ for(i=0; i<16; ++i) {
indexes[i]=udata_readInt32(ds, inIndexes[i]);
}
/* get the total length of the data */
size=indexes[UCASE_IX_LENGTH];
if(length>=0) { if(length<size) {
udata_printError(ds, "ucase_swap(): too few bytes (%d after header) for all of case mapping data\n",
length);
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0;
}
/* copy the data for inaccessible bytes */ if(inBytes!=outBytes) {
uprv_memcpy(outBytes, inBytes, size);
}
if(length>=0) {
length-=headerSize; if(length<16*4) {
udata_printError(ds, "ubidi_swap(): too few bytes (%d after header) for bidi/shaping data\n",
length);
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0;
}
}
/* read the first 16 indexes (ICU 3.4/format version 1: UBIDI_IX_TOP==16, might grow) */ for(i=0; i<16; ++i) {
indexes[i]=udata_readInt32(ds, inIndexes[i]);
}
/* get the total length of the data */
size=indexes[UBIDI_IX_LENGTH];
if(length>=0) { if(length<size) {
udata_printError(ds, "ubidi_swap(): too few bytes (%d after header) for all of bidi/shaping data\n",
length);
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0;
}
/* copy the data for inaccessible bytes */ if(inBytes!=outBytes) {
uprv_memcpy(outBytes, inBytes, size);
}
/* just skip the uint8_t jgArray[] and jgArray2[] */
count=indexes[UBIDI_IX_JG_LIMIT]-indexes[UBIDI_IX_JG_START];
offset+=count;
count=indexes[UBIDI_IX_JG_LIMIT2]-indexes[UBIDI_IX_JG_START2];
offset+=count;
U_ASSERT(offset==size);
}
return headerSize+size;
}
/* Unicode normalization data swapping -------------------------------------- */
/* check data format and format version */
pInfo = reinterpret_cast<const UDataInfo*>(static_cast<constchar*>(inData) + 4); if(!(
pInfo->dataFormat[0]==0x4e && /* dataFormat="Norm" */
pInfo->dataFormat[1]==0x6f &&
pInfo->dataFormat[2]==0x72 &&
pInfo->dataFormat[3]==0x6d &&
pInfo->formatVersion[0]==2
)) {
udata_printError(ds, "unorm_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as unorm.icu\n",
pInfo->dataFormat[0], pInfo->dataFormat[1],
pInfo->dataFormat[2], pInfo->dataFormat[3],
pInfo->formatVersion[0]);
*pErrorCode=U_UNSUPPORTED_ERROR; return 0;
}
if(length>=0) {
length-=headerSize; if(length<32*4) {
udata_printError(ds, "unorm_swap(): too few bytes (%d after header) for unorm.icu\n",
length);
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0;
}
}
/* read the first 32 indexes (ICU 2.8/format version 2.2: _NORM_INDEX_TOP==32, might grow) */ for(i=0; i<32; ++i) {
indexes[i]=udata_readInt32(ds, inIndexes[i]);
}
/* calculate the total length of the data */
size=
32*4+ /* size of indexes[] */
indexes[_NORM_INDEX_TRIE_SIZE]+
indexes[_NORM_INDEX_UCHAR_COUNT]*2+
indexes[_NORM_INDEX_COMBINE_DATA_COUNT]*2+
indexes[_NORM_INDEX_FCD_TRIE_SIZE]+
indexes[_NORM_INDEX_AUX_TRIE_SIZE]+
indexes[_NORM_INDEX_CANON_SET_COUNT]*2;
if(length>=0) { if(length<size) {
udata_printError(ds, "unorm_swap(): too few bytes (%d after header) for all of unorm.icu\n",
length);
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0;
}
/* copy the data for inaccessible bytes */ if(inBytes!=outBytes) {
uprv_memcpy(outBytes, inBytes, size);
}
if (length >= 0) {
length -= headerSize; if (length < 12 * 4) {
udata_printError(ds, "ulayout_swap(): too few bytes (%d after header) for text layout properties data\n",
length);
*pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR; return 0;
}
}
int32_t indexesLength = udata_readInt32(ds, inIndexes[ULAYOUT_IX_INDEXES_LENGTH]); if (indexesLength < 12) {
udata_printError(ds, "ulayout_swap(): too few indexes (%d) for text layout properties data\n",
indexesLength);
*pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR; return 0;
}
// Read the data offsets before swapping anything.
int32_t indexes[ULAYOUT_IX_TRIES_TOP + 1]; for (int32_t i = ULAYOUT_IX_INPC_TRIE_TOP; i <= ULAYOUT_IX_TRIES_TOP; ++i) {
indexes[i] = udata_readInt32(ds, inIndexes[i]);
}
int32_t size = indexes[ULAYOUT_IX_TRIES_TOP];
if (length >= 0) { if (length < size) {
udata_printError(ds, "ulayout_swap(): too few bytes (%d after header) " "for all of text layout properties data\n",
length);
*pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR; return 0;
}
// Copy the data for inaccessible bytes. if (inBytes != outBytes) {
uprv_memcpy(outBytes, inBytes, size);
}
if (length >= 0) {
length -= headerSize; // We expect to read at least EmojiProps::IX_TOTAL_SIZE. if (length < 14 * 4) {
udata_printError(ds, "uemoji_swap(): too few bytes (%d after header) for emoji properties data\n",
length);
*pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR; return 0;
}
}
// First offset after indexes[].
int32_t cpTrieOffset = udata_readInt32(ds, inIndexes[EmojiProps::IX_CPTRIE_OFFSET]);
int32_t indexesLength = cpTrieOffset / 4; if (indexesLength < 14) {
udata_printError(ds, "uemoji_swap(): too few indexes (%d) for emoji properties data\n",
indexesLength);
*pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR; return 0;
}
// Read the data offsets before swapping anything.
int32_t indexes[EmojiProps::IX_TOTAL_SIZE + 1];
indexes[0] = cpTrieOffset; for (int32_t i = 1; i <= EmojiProps::IX_TOTAL_SIZE; ++i) {
indexes[i] = udata_readInt32(ds, inIndexes[i]);
}
int32_t size = indexes[EmojiProps::IX_TOTAL_SIZE];
if (length >= 0) { if (length < size) {
udata_printError(ds, "uemoji_swap(): too few bytes (%d after header) " "for all of emoji properties data\n",
length);
*pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR; return 0;
}
// Copy the data for inaccessible bytes. if (inBytes != outBytes) {
uprv_memcpy(outBytes, inBytes, size);
}
// Swap the int32_t indexes[].
int32_t offset = 0;
int32_t top = cpTrieOffset;
ds->swapArray32(ds, inBytes, top - offset, outBytes, pErrorCode);
offset = top;
// Swap the code point trie.
top = indexes[EmojiProps::IX_CPTRIE_OFFSET + 1];
int32_t count = top - offset;
U_ASSERT(count >= 0); if (count >= 16) {
utrie_swapAnyVersion(ds, inBytes + offset, count, outBytes + offset, pErrorCode);
}
offset = top;
// Swap all of the string tries. // They are all serialized as arrays of 16-bit units.
offset = indexes[EmojiProps::IX_BASIC_EMOJI_TRIE_OFFSET];
top = indexes[EmojiProps::IX_RGI_EMOJI_ZWJ_SEQUENCE_TRIE_OFFSET + 1];
ds->swapArray16(ds, inBytes + offset, top - offset, outBytes + offset, pErrorCode);
offset = top;
/* check data format and format version */
pInfo = reinterpret_cast<const UDataInfo*>(static_cast<constchar*>(inData) + 4); if(!(
pInfo->dataFormat[0]==0x54 && /* dataFormat="Norm" */
pInfo->dataFormat[1]==0x65 &&
pInfo->dataFormat[2]==0x73 &&
pInfo->dataFormat[3]==0x74 &&
pInfo->formatVersion[0]==1
)) {
udata_printError(ds, "test_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as testdata\n",
pInfo->dataFormat[0], pInfo->dataFormat[1],
pInfo->dataFormat[2], pInfo->dataFormat[3],
pInfo->formatVersion[0]);
*pErrorCode=U_UNSUPPORTED_ERROR; return 0;
}
if(length>=0) { if(length<size) {
udata_printError(ds, "test_swap(): too few bytes (%d after header, wanted %d) for all of testdata\n",
length, size);
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0;
}
/* * Preflight the header first; checks for illegal arguments, too. * Do not swap the header right away because the format-specific swapper * will swap it, get the headerSize again, and also use the header * information. Otherwise we would have to pass some of the information * and not be able to use the UDataSwapFn signature.
*/
udata_swapDataHeader(ds, inData, -1, nullptr, pErrorCode);
/* * If we wanted udata_swap() to also handle non-loadable data like a UTrie, * then we could check here for further known magic values and structures.
*/ if(U_FAILURE(*pErrorCode)) { return 0; /* the data format was not recognized */
}
pInfo=(const UDataInfo *)((constchar *)inData+4);
{ /* convert the data format from ASCII to Unicode to the system charset */
char16_t u[4]={
pInfo->dataFormat[0], pInfo->dataFormat[1],
pInfo->dataFormat[2], pInfo->dataFormat[3]
};
/* dispatch to the swap function for the dataFormat */ for(i=0; i<UPRV_LENGTHOF(swapFns); ++i) { if(0==memcmp(swapFns[i].dataFormat, pInfo->dataFormat, 4)) {
swappedLength=swapFns[i].swapFn(ds, inData, length, outData, pErrorCode);
if(U_FAILURE(*pErrorCode)) {
udata_printError(ds, "udata_swap(): failure swapping data format %02x.%02x.%02x.%02x (\"%c%c%c%c\") - %s\n",
pInfo->dataFormat[0], pInfo->dataFormat[1],
pInfo->dataFormat[2], pInfo->dataFormat[3],
dataFormatChars[0], dataFormatChars[1],
dataFormatChars[2], dataFormatChars[3],
u_errorName(*pErrorCode));
} elseif(swappedLength<(length-15)) { /* swapped less than expected */
udata_printError(ds, "udata_swap() warning: swapped only %d out of %d bytes - data format %02x.%02x.%02x.%02x (\"%c%c%c%c\")\n",
swappedLength, length,
pInfo->dataFormat[0], pInfo->dataFormat[1],
pInfo->dataFormat[2], pInfo->dataFormat[3],
dataFormatChars[0], dataFormatChars[1],
dataFormatChars[2], dataFormatChars[3],
u_errorName(*pErrorCode));
}
return swappedLength;
}
}
/* the dataFormat was not recognized */
udata_printError(ds, "udata_swap(): unknown data format %02x.%02x.%02x.%02x (\"%c%c%c%c\")\n",
pInfo->dataFormat[0], pInfo->dataFormat[1],
pInfo->dataFormat[2], pInfo->dataFormat[3],
dataFormatChars[0], dataFormatChars[1],
dataFormatChars[2], dataFormatChars[3]);
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.