/* * if shared data hasn't even been lazy evaluated yet * return 0
*/
umtx_lock(&usprepMutex); if (SHARED_DATA_HASHTABLE == nullptr) {
umtx_unlock(&usprepMutex); return 0;
}
/*creates an enumeration to iterate through every element in the table */ while ((e = uhash_nextElement(SHARED_DATA_HASHTABLE, &pos)) != nullptr)
{
profile = (UStringPrepProfile *) e->value.pointer;
key = (UStringPrepKey *) e->key.pointer;
/* open the data outside the mutex block */ //TODO: change the path
dataMemory=udata_openChoice(path, type, name, isSPrepAcceptable, nullptr, errorCode); if(U_FAILURE(*errorCode)) { returnfalse;
}
/* in the mutex block, set the data for this process */
umtx_lock(&usprepMutex); if(profile->sprepData==nullptr) {
profile->sprepData=dataMemory;
dataMemory=nullptr;
uprv_memcpy(&profile->indexes, p, sizeof(profile->indexes));
uprv_memcpy(&profile->sprepTrie, &_sprepTrie, sizeof(UTrie));
} else {
p = static_cast<const int32_t*>(udata_getMemory(profile->sprepData));
}
umtx_unlock(&usprepMutex); /* initialize some variables */
profile->mappingData = reinterpret_cast<const uint16_t*>(reinterpret_cast<const uint8_t*>(p + _SPREP_INDEX_TOP) + profile->indexes[_SPREP_INDEX_TRIE_SIZE]);
if(U_FAILURE(*errorCode)){
udata_close(dataMemory); returnfalse;
} if( normUniVer < sprepUniVer && /* the Unicode version of SPREP file must be less than the Unicode Version of the normalization data */
normUniVer < normCorrVer && /* the Unicode version of the NormalizationCorrections.txt file should be less than the Unicode Version of the normalization data */
((profile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0) /* normalization turned on*/
){
*errorCode = U_INVALID_FORMAT_ERROR;
udata_close(dataMemory); returnfalse;
}
profile->isDataLoaded = true;
/* if a different thread set it first, then close the extra data */ if(dataMemory!=nullptr) {
udata_close(dataMemory); /* nullptr if it was set correctly */
}
UStringPrepKey stackKey; /* * const is cast way to save malloc, strcpy and free calls * we use the passed in pointers for fetching the data from the * hash table which is safe
*/
stackKey.name = const_cast<char*>(name);
stackKey.path = const_cast<char*>(path);
/* fetch the data from the cache */
umtx_lock(&usprepMutex);
profile = static_cast<UStringPrepProfile*>(uhash_get(SHARED_DATA_HASHTABLE, &stackKey)); if(profile != nullptr) {
profile->refCount++;
}
umtx_unlock(&usprepMutex);
if(profile == nullptr) { /* else load the data and put the data in the cache */
LocalMemory<UStringPrepProfile> newProfile; if(newProfile.allocateInsteadAndReset() == nullptr) {
*status = U_MEMORY_ALLOCATION_ERROR; return nullptr;
}
/* load the data */ if(!loadData(newProfile.getAlias(), path, name, _SPREP_DATA_TYPE, status) || U_FAILURE(*status) ){ return nullptr;
}
/* get the options */
newProfile->doNFKC = static_cast<UBool>((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0);
newProfile->checkBiDi = static_cast<UBool>((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_CHECK_BIDI_ON) > 0);
umtx_lock(&usprepMutex); // If another thread already inserted the same key/value, refcount and cleanup our thread data
profile = static_cast<UStringPrepProfile*>(uhash_get(SHARED_DATA_HASHTABLE, &stackKey)); if(profile != nullptr) {
profile->refCount++;
usprep_unload(newProfile.getAlias());
} else { /* initialize the key members */
key->name = keyName.orphan();
uprv_strcpy(key->name, name); if(path != nullptr){
key->path = keyPath.orphan();
uprv_strcpy(key->path, path);
}
profile = newProfile.orphan();
/* add the data object to the cache */
profile->refCount = 1;
uhash_put(SHARED_DATA_HASHTABLE, key.orphan(), profile, status);
}
umtx_unlock(&usprepMutex);
}
UStringPrepType type; if(trieWord == 0){ /* * Initial value stored in the mapping table * just return USPREP_TYPE_LIMIT .. so that * the source codepoint is copied to the destination
*/
type = USPREP_TYPE_LIMIT;
isIndex =false;
value = 0;
}elseif(trieWord >= _SPREP_TYPE_THRESHOLD){
type = static_cast<UStringPrepType>(trieWord - _SPREP_TYPE_THRESHOLD);
isIndex =false;
value = 0;
}else{ /* get the type */
type = USPREP_MAP; /* ascertain if the value is index or delta */ if(trieWord & 0x02){
isIndex = true;
value = trieWord >> 2; //mask off the lower 2 bits and shift
}else{
isIndex = false;
value = static_cast<int16_t>(trieWord);
value = (value >> 2);
}
if((trieWord>>2) == _SPREP_MAX_INDEX_VALUE){
type = USPREP_DELETE;
isIndex =false;
value = 0;
}
} return type;
}
// TODO: change to writing to UnicodeString not char16_t * static int32_t
usprep_map( const UStringPrepProfile* profile, const char16_t* src, int32_t srcLength,
char16_t* dest, int32_t destCapacity,
int32_t options,
UParseError* parseError,
UErrorCode* status ){
/* 1) Map -- For each character in the input, check if it has a mapping and, if so, replace it with its mapping.
2) Normalize -- Possibly normalize the result of step 1 using Unicode normalization.
3) Prohibit -- Check for any characters that are not allowed in the output. If any are found, return an error.
4) Check bidi -- Possibly check for right-to-left characters, and if any are found, make sure that the whole string satisfies the requirements for bidirectional strings. If the string does not satisfy the requirements for bidirectional strings, return an error. [Unicode3.2] defines several bidirectional categories; each character has one bidirectional category assigned to it. For the purposes of the requirements below, an "RandALCat character" is a character that has Unicode bidirectional categories "R" or "AL"; an "LCat character" is a character that has Unicode bidirectional category "L". Note
that there are many characters which fall in neither of the above definitions; Latin digits (<U+0030> through <U+0039>) are examples of this because they have bidirectional category "EN".
In any profile that specifies bidirectional character handling, all three of the following requirements MUST be met:
1) The characters in section 5.8 MUST be prohibited.
2) If a string contains any RandALCat character, the string MUST NOT contain any LCat character.
3) If a string contains any RandALCat character, a RandALCat character MUST be the first character of the string, and a RandALCat character MUST be the last character of the string.
*/
U_CAPI int32_t U_EXPORT2
usprep_prepare( const UStringPrepProfile* profile, const char16_t* src, int32_t srcLength,
char16_t* dest, int32_t destCapacity,
int32_t options,
UParseError* parseError,
UErrorCode* status ){
// check error status if(U_FAILURE(*status)){ return 0;
}
if(*status == U_BUFFER_OVERFLOW_ERROR){ // redo processing of string /* we do not have enough room so grow the buffer*/
b1 = s1.getBuffer(b1Len); if(b1==nullptr){
*status = U_MEMORY_ALLOCATION_ERROR; return 0;
}
int16_t value;
UBool isIndex;
UStringPrepType type = getValues(result, value, isIndex);
if( type == USPREP_PROHIBITED ||
((result < _SPREP_TYPE_THRESHOLD) && (result & 0x01) /* first bit says it the code point is prohibited*/)
){
*status = U_STRINGPREP_PROHIBITED_ERROR;
uprv_syntaxError(b2, b2Index-U16_LENGTH(ch), b2Len, parseError); return 0;
}
/* check data format and format version */
pInfo=(const UDataInfo *)((constchar *)inData+4); if(!(
pInfo->dataFormat[0]==0x53 && /* dataFormat="SPRP" */
pInfo->dataFormat[1]==0x50 &&
pInfo->dataFormat[2]==0x52 &&
pInfo->dataFormat[3]==0x50 &&
pInfo->formatVersion[0]==3
)) {
udata_printError(ds, "usprep_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as StringPrep .spp data\n",
pInfo->dataFormat[0], pInfo->dataFormat[1],
pInfo->dataFormat[2], pInfo->dataFormat[3],
pInfo->formatVersion[0]);
*pErrorCode=U_UNSUPPORTED_ERROR; return 0;
}
if(length>=0) {
length-=headerSize; if(length<16*4) {
udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for StringPrep .spp data\n",
length);
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0;
}
}
/* read the first 16 indexes (ICU 2.8/format version 3: _SPREP_INDEX_TOP==16, might grow) */ for(i=0; i<16; ++i) {
indexes[i]=udata_readInt32(ds, inIndexes[i]);
}
/* calculate the total length of the data */
size=
16*4+ /* size of indexes[] */
indexes[_SPREP_INDEX_TRIE_SIZE]+
indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
if(length>=0) { if(length<size) {
udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for all of StringPrep .spp data\n",
length);
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0;
}
/* copy the data for inaccessible bytes */ if(inBytes!=outBytes) {
uprv_memcpy(outBytes, inBytes, size);
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.