// First check for format version 4+ which has a standard data header.
UErrorCode errorCode=U_ZERO_ERROR;
(void)udata_swapDataHeader(ds, inData, -1, nullptr, &errorCode); if(U_SUCCESS(errorCode)) { const UDataInfo &info=*(const UDataInfo *)((constchar *)inData+4); if(info.dataFormat[0]==0x55 && // dataFormat="UCol"
info.dataFormat[1]==0x43 &&
info.dataFormat[2]==0x6f &&
info.dataFormat[3]==0x6c) { returntrue;
}
}
// Else check for format version 3. const UCATableHeader *inHeader=(const UCATableHeader *)inData;
/* * The collation binary must contain at least the UCATableHeader, * starting with its size field. * sizeof(UCATableHeader)==42*4 in ICU 2.8 * check the length against the header size before reading the size field
*/
UCATableHeader header;
uprv_memset(&header, 0, sizeof(header)); if(length<0) {
header.size=udata_readInt32(ds, inHeader->size);
} elseif((length<(42*4) || length<(header.size=udata_readInt32(ds, inHeader->size)))) { returnfalse;
}
/* argument checking in case we were not called from ucol_swap() */ if(U_FAILURE(*pErrorCode)) { return 0;
} if(ds==nullptr || inData==nullptr || length<-1 || (length>0 && outData==nullptr)) {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; return 0;
}
/* * The collation binary must contain at least the UCATableHeader, * starting with its size field. * sizeof(UCATableHeader)==42*4 in ICU 2.8 * check the length against the header size before reading the size field
*/
uprv_memset(&header, 0, sizeof(header)); if(length<0) {
header.size=udata_readInt32(ds, inHeader->size);
} elseif((length<(42*4) || length<(header.size=udata_readInt32(ds, inHeader->size)))) {
udata_printError(ds, "ucol_swap(formatVersion=3): too few bytes (%d after header) for collation data\n",
length);
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0;
}
header.magic=ds->readUInt32(inHeader->magic); if(!(
header.magic==UCOL_HEADER_MAGIC &&
inHeader->formatVersion[0]==3 /*&&
inHeader->formatVersion[1]>=0*/
)) {
udata_printError(ds, "ucol_swap(formatVersion=3): magic 0x%08x or format version %02x.%02x is not a collation binary\n",
header.magic,
inHeader->formatVersion[0], inHeader->formatVersion[1]);
*pErrorCode=U_UNSUPPORTED_ERROR; return 0;
}
if(inHeader->isBigEndian!=ds->inIsBigEndian || inHeader->charSetFamily!=ds->inCharset) {
udata_printError(ds, "ucol_swap(formatVersion=3): endianness %d or charset %d does not match the swapper\n",
inHeader->isBigEndian, inHeader->charSetFamily);
*pErrorCode=U_INVALID_FORMAT_ERROR; return 0;
}
if(length>=0) { /* copy everything, takes care of data that needs no swapping */ if(inBytes!=outBytes) {
uprv_memcpy(outBytes, inBytes, header.size);
}
/* swap the necessary pieces in the order of their occurrence in the data */
/* read more of the UCATableHeader (the size field was read above) */
header.options= ds->readUInt32(inHeader->options);
header.UCAConsts= ds->readUInt32(inHeader->UCAConsts);
header.contractionUCACombos= ds->readUInt32(inHeader->contractionUCACombos);
header.mappingPosition= ds->readUInt32(inHeader->mappingPosition);
header.expansion= ds->readUInt32(inHeader->expansion);
header.contractionIndex= ds->readUInt32(inHeader->contractionIndex);
header.contractionCEs= ds->readUInt32(inHeader->contractionCEs);
header.contractionSize= ds->readUInt32(inHeader->contractionSize);
header.endExpansionCE= ds->readUInt32(inHeader->endExpansionCE);
header.expansionCESize= ds->readUInt32(inHeader->expansionCESize);
header.endExpansionCECount= udata_readInt32(ds, inHeader->endExpansionCECount);
header.contractionUCACombosSize=udata_readInt32(ds, inHeader->contractionUCACombosSize);
header.scriptToLeadByte= ds->readUInt32(inHeader->scriptToLeadByte);
header.leadByteToScript= ds->readUInt32(inHeader->leadByteToScript);
/* swap the 32-bit integers in the header */
ds->swapArray32(ds, inHeader, static_cast<int32_t>(reinterpret_cast<constchar*>(&inHeader->jamoSpecial) - reinterpret_cast<constchar*>(inHeader)),
outHeader, pErrorCode);
ds->swapArray32(ds, &(inHeader->scriptToLeadByte), sizeof(header.scriptToLeadByte) + sizeof(header.leadByteToScript),
&(outHeader->scriptToLeadByte), pErrorCode); /* set the output platform properties */
outHeader->isBigEndian=ds->outIsBigEndian;
outHeader->charSetFamily=ds->outCharset;
/* swap the main trie */ if(header.mappingPosition!=0) {
count=header.endExpansionCE-header.mappingPosition;
utrie_swap(ds, inBytes + header.mappingPosition, static_cast<int32_t>(count),
outBytes+header.mappingPosition, pErrorCode);
}
/* swap the max expansion table */ if(header.endExpansionCECount!=0) {
ds->swapArray32(ds, inBytes+header.endExpansionCE, header.endExpansionCECount*4,
outBytes+header.endExpansionCE, pErrorCode);
}
/* expansionCESize, unsafeCP, contrEndCP: uint8_t[], no need to swap */
/* swap UCA constants */ if(header.UCAConsts!=0) { /* * if UCAConsts!=0 then contractionUCACombos because we are swapping * the UCA data file, and we know that the UCA contains contractions
*/
ds->swapArray32(ds, inBytes+header.UCAConsts, header.contractionUCACombos-header.UCAConsts,
outBytes+header.UCAConsts, pErrorCode);
}
/* swap the script to lead bytes */ if(header.scriptToLeadByte!=0) { int indexCount = ds->readUInt16(*((uint16_t*)(inBytes+header.scriptToLeadByte))); // each entry = 2 * uint16 int dataCount = ds->readUInt16(*((uint16_t*)(inBytes+header.scriptToLeadByte + 2))); // each entry = uint16
ds->swapArray16(ds, inBytes+header.scriptToLeadByte,
4 + (4 * indexCount) + (2 * dataCount),
outBytes+header.scriptToLeadByte, pErrorCode);
}
/* swap the lead byte to scripts */ if(header.leadByteToScript!=0) { int indexCount = ds->readUInt16(*((uint16_t*)(inBytes+header.leadByteToScript))); // each entry = uint16 int dataCount = ds->readUInt16(*((uint16_t*)(inBytes+header.leadByteToScript + 2))); // each entry = uint16
ds->swapArray16(ds, inBytes+header.leadByteToScript,
4 + (2 * indexCount) + (2 * dataCount),
outBytes+header.leadByteToScript, pErrorCode);
}
}
return header.size;
}
// swap formatVersion 4 or 5 ----------------------------------------------- ***
// The following are copied from CollationDataReader, trading an awkward copy of constants // for an awkward relocation of the i18n collationdatareader.h file into the common library. // Keep them in sync!
// Need at least IX_INDEXES_LENGTH and IX_OPTIONS. if(0<=length && length<8) {
udata_printError(ds, "ucol_swap(formatVersion=4): too few bytes " "(%d after header) for collation data\n",
length);
errorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0;
}
int32_t indexesLength=indexes[0]=udata_readInt32(ds, inIndexes[0]); if(0<=length && length<(indexesLength*4)) {
udata_printError(ds, "ucol_swap(formatVersion=4): too few bytes " "(%d after header) for collation data\n",
length);
errorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0;
}
for(int32_t i=1; i<=IX_TOTAL_SIZE && i<indexesLength; ++i) {
indexes[i]=udata_readInt32(ds, inIndexes[i]);
} for(int32_t i=indexesLength; i<=IX_TOTAL_SIZE; ++i) {
indexes[i]=-1;
}
inIndexes=nullptr; // Make sure we do not accidentally use these instead of indexes[].
// Get the total length of the data.
int32_t size; if(indexesLength>IX_TOTAL_SIZE) {
size=indexes[IX_TOTAL_SIZE];
} elseif(indexesLength>IX_REORDER_CODES_OFFSET) {
size=indexes[indexesLength-1];
} else {
size=indexesLength*4;
} if(length<0) { return size; }
if(length<size) {
udata_printError(ds, "ucol_swap(formatVersion=4): too few bytes " "(%d after header) for collation data\n",
length);
errorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0;
}
// Copy the data for inaccessible bytes and arrays of bytes. if(inBytes!=outBytes) {
uprv_memcpy(outBytes, inBytes, size);
}
// The following is a modified version of CollationDataReader::read(). // Here we use indexes[] not inIndexes[] because // the inIndexes[] may not be in this machine's endianness.
int32_t index; // one of the indexes[] slots
int32_t offset; // byte offset for the index part // int32_t length; // number of bytes in the index part
/* udata_swapDataHeader checks the arguments */
int32_t headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); if(U_FAILURE(*pErrorCode)) { // Try to swap the old format version which did not have a standard data header.
*pErrorCode=U_ZERO_ERROR; return swapFormatVersion3(ds, inData, length, outData, pErrorCode);
}
/* check data format and format version */ const UDataInfo &info=*(const UDataInfo *)((constchar *)inData+4); if(!(
info.dataFormat[0]==0x55 && // dataFormat="UCol"
info.dataFormat[1]==0x43 &&
info.dataFormat[2]==0x6f &&
info.dataFormat[3]==0x6c &&
(3<=info.formatVersion[0] && info.formatVersion[0]<=5)
)) {
udata_printError(ds, "ucol_swap(): data format %02x.%02x.%02x.%02x " "(format version %02x.%02x) is not recognized as collation data\n",
info.dataFormat[0], info.dataFormat[1],
info.dataFormat[2], info.dataFormat[3],
info.formatVersion[0], info.formatVersion[1]);
*pErrorCode=U_UNSUPPORTED_ERROR; return 0;
}
/* check data format and format version */
pInfo=(const UDataInfo *)((constchar *)inData+4); if(!(
pInfo->dataFormat[0]==0x49 && /* dataFormat="InvC" */
pInfo->dataFormat[1]==0x6e &&
pInfo->dataFormat[2]==0x76 &&
pInfo->dataFormat[3]==0x43 &&
pInfo->formatVersion[0]==2 &&
pInfo->formatVersion[1]>=1
)) {
udata_printError(ds, "ucol_swapInverseUCA(): data format %02x.%02x.%02x.%02x (format version %02x.%02x) is not an inverse UCA collation file\n",
pInfo->dataFormat[0], pInfo->dataFormat[1],
pInfo->dataFormat[2], pInfo->dataFormat[3],
pInfo->formatVersion[0], pInfo->formatVersion[1]);
*pErrorCode=U_UNSUPPORTED_ERROR; return 0;
}
/* * The inverse UCA collation binary must contain at least the InverseUCATableHeader, * starting with its size field. * sizeof(UCATableHeader)==8*4 in ICU 2.8 * check the length against the header size before reading the size field
*/ if(length<0) {
header.byteSize=udata_readInt32(ds, inHeader->byteSize);
} elseif(
((length-headerSize)<(8*4) ||
(uint32_t)(length-headerSize)<(header.byteSize=udata_readInt32(ds, inHeader->byteSize)))
) {
udata_printError(ds, "ucol_swapInverseUCA(): too few bytes (%d after header) for inverse UCA collation data\n",
length);
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0;
}
if(length>=0) { /* copy everything, takes care of data that needs no swapping */ if(inBytes!=outBytes) {
uprv_memcpy(outBytes, inBytes, header.byteSize);
}
/* swap the necessary pieces in the order of their occurrence in the data */
/* read more of the InverseUCATableHeader (the byteSize field was read above) */
header.tableSize= ds->readUInt32(inHeader->tableSize);
header.contsSize= ds->readUInt32(inHeader->contsSize);
header.table= ds->readUInt32(inHeader->table);
header.conts= ds->readUInt32(inHeader->conts);
/* swap the 32-bit integers in the header */
ds->swapArray32(ds, inHeader, 5*4, outHeader, pErrorCode);
¤ Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.0.14Bemerkung:
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.