/*Extracts the char16_t* to a char* and calls through createConverter */
U_CAPI UConverter* U_EXPORT2
ucnv_openU (const char16_t * name,
UErrorCode * err)
{ char asciiName[UCNV_MAX_CONVERTER_NAME_LENGTH];
/* Copy the string that is represented by the UConverterPlatform enum * @param platformString An output buffer * @param platform An enum representing a platform * @return the length of the copied string.
*/ static int32_t
ucnv_copyPlatformString(char *platformString, UConverterPlatform pltfrm)
{ switch (pltfrm)
{ case UCNV_IBM:
uprv_strcpy(platformString, "ibm-"); return 4; case UCNV_UNKNOWN: break;
}
UTRACE_DATA3(UTRACE_OPEN_CLOSE, "clone converter %s at %p into stackBuffer %p",
ucnv_getName(cnv, status), cnv, stackBuffer);
if (cnv->sharedData->impl->safeClone != nullptr) { /* call the custom safeClone function for sizing */
bufferSizeNeeded = 0;
cnv->sharedData->impl->safeClone(cnv, nullptr, &bufferSizeNeeded, status); if (U_FAILURE(*status)) {
UTRACE_EXIT_STATUS(*status); return nullptr;
}
} else
{ /* inherent sizing */
bufferSizeNeeded = sizeof(UConverter);
}
if (pBufferSize == nullptr) {
stackBufferSize = 1;
pBufferSize = &stackBufferSize;
} else {
stackBufferSize = *pBufferSize; if (stackBufferSize <= 0){ /* 'preflighting' request - set needed size into *pBufferSize */
*pBufferSize = bufferSizeNeeded;
UTRACE_EXIT_VALUE(bufferSizeNeeded); return nullptr;
}
}
/* Adjust (if necessary) the stackBuffer pointer to be aligned correctly for a UConverter. * TODO(Jira ICU-20736) Redo this using std::align() once g++4.9 compatibility is no longer needed.
*/ if (stackBuffer) {
uintptr_t p = reinterpret_cast<uintptr_t>(stackBuffer);
uintptr_t aligned_p = (p + alignof(UConverter) - 1) & ~(alignof(UConverter) - 1);
ptrdiff_t pointerAdjustment = aligned_p - p; if (bufferSizeNeeded + pointerAdjustment <= stackBufferSize) {
stackBuffer = reinterpret_cast<void *>(aligned_p);
stackBufferSize -= static_cast<int32_t>(pointerAdjustment);
} else { /* prevent using the stack buffer but keep the size > 0 so that we do not just preflight */
stackBufferSize = 1;
}
}
/* Now, see if we must allocate any memory */ if (stackBufferSize < bufferSizeNeeded || stackBuffer == nullptr)
{ /* allocate one here...*/
localConverter = allocatedConverter = (UConverter *) uprv_malloc (bufferSizeNeeded);
if(localConverter == nullptr) {
*status = U_MEMORY_ALLOCATION_ERROR;
UTRACE_EXIT_STATUS(*status); return nullptr;
} // If pBufferSize was nullptr as the input, pBufferSize is set to &stackBufferSize in this function. if (pBufferSize != &stackBufferSize) {
*status = U_SAFECLONE_ALLOCATED_WARNING;
}
/* record the fact that memory was allocated */
*pBufferSize = bufferSizeNeeded;
} else { /* just use the stack buffer */
localConverter = (UConverter*) stackBuffer;
allocatedConverter = nullptr;
}
/* now either call the safeclone fcn or not */ if (cnv->sharedData->impl->safeClone != nullptr) { /* call the custom safeClone function */
localConverter = cnv->sharedData->impl->safeClone(cnv, localConverter, pBufferSize, status);
}
/* In order to speed up the close, only call the callbacks when they have been changed. This performance check will only work when the callbacks are set within a shared library
or from user code that statically links this code. */ /* first, notify the callback functions that the converter is closed */ if (converter->fromCharErrorBehaviour != UCNV_TO_U_DEFAULT_CALLBACK) {
UConverterToUnicodeArgs toUArgs = { sizeof(UConverterToUnicodeArgs), true,
nullptr,
nullptr,
nullptr,
nullptr,
nullptr,
nullptr
};
/*returns a single Name from the list, will return nullptr if out of bounds
*/
U_CAPI constchar* U_EXPORT2
ucnv_getAvailableName (int32_t n)
{ if (0 <= n && n <= 0xffff) {
UErrorCode err = U_ZERO_ERROR; constchar *name = ucnv_bld_getAvailableConverter((uint16_t)n, &err); if (U_SUCCESS(err)) { return name;
}
} return nullptr;
}
/*Makes sure that the subChar is within the codepages char length boundaries */ if ((len > converter->sharedData->staticData->maxBytesPerChar)
|| (len < converter->sharedData->staticData->minBytesPerChar))
{
*err = U_ILLEGAL_ARGUMENT_ERROR; return;
}
uprv_memcpy (converter->subChars, mySubChar, len); /*copies the subchars */
converter->subCharLen = len; /*sets the new len */
/* * There is currently (2001Feb) no separate API to set/get subChar1. * In order to always have subChar written after it is explicitly set, * we set subChar1 to 0.
*/
converter->subChar1 = 0;
}
/* Let the following functions check all arguments. */
cloneSize = sizeof(cloneBuffer);
clone = ucnv_safeClone(cnv, cloneBuffer, &cloneSize, err);
ucnv_setFromUCallBack(clone, UCNV_FROM_U_CALLBACK_STOP, nullptr, nullptr, nullptr, err);
length8 = ucnv_fromUChars(clone, chars, (int32_t)sizeof(chars), s, length, err);
ucnv_close(clone); if (U_FAILURE(*err)) { return;
}
if (cnv->sharedData->impl->writeSub == nullptr #if !UCONFIG_NO_LEGACY_CONVERSION
|| (cnv->sharedData->staticData->conversionType == UCNV_MBCS &&
ucnv_MBCSGetType(cnv) != UCNV_EBCDIC_STATEFUL) #endif
) { /* The converter is not stateful. Store the charset bytes as a fixed string. */
subChars = (uint8_t *)chars;
} else { /* * The converter has a non-default writeSub() function, indicating * that it is stateful. * Store the Unicode string for on-the-fly conversion for correct * state handling.
*/ if (length > UCNV_ERROR_BUFFER_LENGTH) { /* * Should not occur. The converter should output at least one byte * per char16_t, which means that ucnv_fromUChars() should catch all * overflows.
*/
*err = U_BUFFER_OVERFLOW_ERROR; return;
}
subChars = (uint8_t *)s; if (length < 0) {
length = u_strlen(s);
}
length8 = length * U_SIZEOF_UCHAR;
}
/* * For storing the substitution string, select either the small buffer inside * UConverter or allocate a subChars buffer.
*/ if (length8 > UCNV_MAX_SUBCHAR_LEN) { /* Use a separate buffer for the string. Outside UConverter to not make it too large. */ if (cnv->subChars == (uint8_t *)cnv->subUChars) { /* Allocate a new buffer for the string. */
cnv->subChars = (uint8_t *)uprv_malloc(UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR); if (cnv->subChars == nullptr) {
cnv->subChars = (uint8_t *)cnv->subUChars;
*err = U_MEMORY_ALLOCATION_ERROR; return;
}
uprv_memset(cnv->subChars, 0, UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR);
}
}
/* Copy the substitution string into the UConverter or its subChars buffer. */ if (length8 == 0) {
cnv->subCharLen = 0;
} else {
uprv_memcpy(cnv->subChars, subChars, length8); if (subChars == (uint8_t *)chars) {
cnv->subCharLen = (int8_t)length8;
} else/* subChars == s */ {
cnv->subCharLen = (int8_t)-length;
}
}
/* See comment in ucnv_setSubstChars(). */
cnv->subChar1 = 0;
}
/*resets the internal states of a converter *goal : have the same behaviour than a freshly created converter
*/ staticvoid _reset(UConverter *converter, UConverterResetChoice choice,
UBool callCallback) { if(converter == nullptr) { return;
}
if(callCallback) { /* first, notify the callback functions that the converter is reset */
UErrorCode errorCode;
ccsid = converter->sharedData->staticData->codepage; if (ccsid == 0) { /* Rare case. This is for cases like gb18030,
which doesn't have an IBM canonical name, but does have an IBM alias. */ constchar *standardName = ucnv_getStandardName(ucnv_getName(converter, err), "IBM", err); if (U_SUCCESS(*err) && standardName) { constchar *ccsidStr = uprv_strchr(standardName, '-'); if (ccsidStr) {
ccsid = (int32_t)atol(ccsidStr+1); /* +1 to skip '-' */
}
}
} return ccsid;
}
if(sourceIndex>=0) { /* * adjust each offset by adding the previous sourceIndex * minus the length of the input sequence that caused an * error, if any
*/
delta=sourceIndex-errorInputLength;
} else { /* * set each offset to -1 because this conversion function * does not handle offsets
*/
delta=-1;
}
limit=offsets+length; if(delta==0) { /* most common case, nothing to do */
} elseif(delta>0) { /* add the delta to each offset (but not if the offset is <0) */ while(offsets<limit) {
offset=*offsets; if(offset>=0) {
*offsets=offset+delta;
}
++offsets;
}
} else/* delta<0 */ { /* * set each offset to -1 because this conversion function * does not handle offsets * or the error input sequence started in a previous buffer
*/ while(offsets<limit) {
*offsets++=-1;
}
}
}
/* * Implementation note for m:n conversions * * While collecting source units to find the longest match for m:n conversion, * some source units may need to be stored for a partial match. * When a second buffer does not yield a match on all of the previously stored * source units, then they must be "replayed", i.e., fed back into the converter. * * The code relies on the fact that replaying will not nest - * converting a replay buffer will not result in a replay. * This is because a replay is necessary only after the _continuation_ of a * partial match failed, but a replay buffer is converted as a whole. * It may result in some of its units being stored again for a partial match, * but there will not be a continuation _during_ the replay which could fail. * * It is conceivable that a callback function could call the converter * recursively in a way that causes another replay to be stored, but that * would be an error in the callback function. * Such violations will cause assertion failures in a debug build, * and wrong output, but they will not cause a crash.
*/
/* get the converter implementation function */
sourceIndex=0; if(offsets==nullptr) {
fromUnicode=cnv->sharedData->impl->fromUnicode;
} else {
fromUnicode=cnv->sharedData->impl->fromUnicodeWithOffsets; if(fromUnicode==nullptr) { /* there is no WithOffsets implementation */
fromUnicode=cnv->sharedData->impl->fromUnicode; /* we will write -1 for each offset */
sourceIndex=-1;
}
}
if(cnv->preFromULength>=0) { /* normal mode */
realSource=nullptr;
/* avoid compiler warnings - not otherwise necessary, and the values do not matter */
realSourceLimit=nullptr;
realFlush=false;
realSourceIndex=0;
} else { /* * Previous m:n conversion stored source units from a partial match * and failed to consume all of them. * We need to "replay" them from a temporary buffer and convert them first.
*/
realSource=pArgs->source;
realSourceLimit=pArgs->sourceLimit;
realFlush=pArgs->flush;
realSourceIndex=sourceIndex;
/* * set a flag for whether the converter * successfully processed the end of the input * * need not check cnv->preFromULength==0 because a replay (<0) will cause * s<sourceLimit before converterSawEndOfInput is checked
*/
converterSawEndOfInput= static_cast<UBool>(U_SUCCESS(*err) &&
pArgs->flush && pArgs->source==pArgs->sourceLimit &&
cnv->fromUChar32==0);
} else { /* handle error from ucnv_convertEx() */
converterSawEndOfInput=false;
}
/* no callback called yet for this iteration */
calledCallback=false;
/* no sourceIndex adjustment for conversion, only for callback output */
errorInputLength=0;
/* * loop for offsets and error handling * * iterates at most 3 times: * 1. to clean up after the conversion function * 2. after the callback * 3. after the callback again if there was truncated input
*/ for(;;) { /* update offsets if we write any */ if(offsets!=nullptr) {
int32_t length = static_cast<int32_t>(pArgs->target - t); if(length>0) {
_updateOffsets(offsets, length, sourceIndex, errorInputLength);
/* * if a converter handles offsets and updates the offsets * pointer at the end, then pArgs->offset should not change * here; * however, some converters do not handle offsets at all * (sourceIndex<0) or may not update the offsets pointer
*/
pArgs->offsets=offsets+=length;
}
if(cnv->preFromULength<0) { /* * switch the source to new replay units (cannot occur while replaying) * after offset handling and before end-of-input and callback handling
*/ if(realSource==nullptr) {
realSource=pArgs->source;
realSourceLimit=pArgs->sourceLimit;
realFlush=pArgs->flush;
realSourceIndex=sourceIndex;
if(U_SUCCESS(*err)) { if(s<pArgs->sourceLimit) { /* * continue with the conversion loop while there is still input left * (continue converting by breaking out of only the inner loop)
*/ break;
} elseif(realSource!=nullptr) { /* switch back from replaying to the real source and continue */
pArgs->source=realSource;
pArgs->sourceLimit=realSourceLimit;
pArgs->flush=realFlush;
sourceIndex=realSourceIndex;
realSource=nullptr; break;
} elseif(pArgs->flush && cnv->fromUChar32!=0) { /* * the entire input stream is consumed * and there is a partial, truncated input sequence left
*/
/* inject an error and continue with callback handling */
*err=U_TRUNCATED_CHAR_FOUND;
calledCallback=false; /* new error condition */
} else { /* input consumed */ if(pArgs->flush) { /* * return to the conversion loop once more if the flush * flag is set and the conversion function has not * successfully processed the end of the input yet * * (continue converting by breaking out of only the inner loop)
*/ if(!converterSawEndOfInput) { break;
}
/* reset the converter without calling the callback function */
_reset(cnv, UCNV_RESET_FROM_UNICODE, false);
}
/* done successfully */ return;
}
}
/* U_FAILURE(*err) */
{
UErrorCode e;
if( calledCallback ||
(e=*err)==U_BUFFER_OVERFLOW_ERROR ||
(e!=U_INVALID_CHAR_FOUND &&
e!=U_ILLEGAL_CHAR_FOUND &&
e!=U_TRUNCATED_CHAR_FOUND)
) { /* * the callback did not or cannot resolve the error: * set output pointers and return * * the check for buffer overflow is redundant but it is * a high-runner case and hopefully documents the intent * well * * if we were replaying, then the replay buffer must be * copied back into the UConverter * and the real arguments must be restored
*/ if(realSource!=nullptr) {
int32_t length;
/* get and write the code point */
codePoint=cnv->fromUChar32;
errorInputLength=0;
U16_APPEND_UNSAFE(cnv->invalidUCharBuffer, errorInputLength, codePoint);
cnv->invalidUCharLength = static_cast<int8_t>(errorInputLength);
/* set the converter state to deal with the next character */
cnv->fromUChar32=0;
/* call the callback function */
cnv->fromUCharErrorBehaviour(cnv->fromUContext, pArgs,
cnv->invalidUCharBuffer, errorInputLength, codePoint,
*err==U_INVALID_CHAR_FOUND ? UCNV_UNASSIGNED : UCNV_ILLEGAL,
err);
}
/* * loop back to the offset handling * * this flag will indicate after offset handling * that a callback was called; * if the callback did not resolve the error, then we return
*/
calledCallback=true;
}
}
}
/* * Output the fromUnicode overflow buffer. * Call this function if(cnv->charErrorBufferLength>0). * @return true if overflow
*/ static UBool
ucnv_outputOverflowFromUnicode(UConverter *cnv, char **target, constchar *targetLimit,
int32_t **pOffsets,
UErrorCode *err) {
int32_t *offsets; char *overflow, *t;
int32_t i, length;
/* copy the overflow contents to the target */
*t++=overflow[i++]; if(offsets!=nullptr) {
*offsets++=-1; /* no source index available for old output */
}
}
/* the overflow buffer is completely copied to the target */
cnv->charErrorBufferLength=0;
*target=t; if(offsets!=nullptr) {
*pOffsets=offsets;
} returnfalse;
}
if ((constvoid *)U_MAX_PTR(sourceLimit) == (constvoid *)sourceLimit) { /* Prevent code from going into an infinite loop in case we do hit this limit. The limit pointer is expected to be on a char16_t * boundary. This also prevents the next argument check from failing.
*/
sourceLimit = (const char16_t *)(((constchar *)sourceLimit) - 1);
}
/* * All these conditions should never happen. * * 1) Make sure that the limits are >= to the address source or target * * 2) Make sure that the buffer sizes do not exceed the number range for * int32_t because some functions use the size (in units or bytes) * rather than comparing pointers, and because offsets are int32_t values. * * size_t is guaranteed to be unsigned and large enough for the job. * * Return with an error instead of adjusting the limits because we would * not be able to maintain the semantics that either the source must be * consumed or the target filled (unless an error occurs). * An adjustment would be targetLimit=t+0x7fffffff; for example. * * 3) Make sure that the user didn't incorrectly cast a char16_t * pointer * to a char * pointer and provide an incomplete char16_t code unit.
*/ if (sourceLimit<s || targetLimit<t ||
((size_t)(sourceLimit-s)>(size_t)0x3fffffff && sourceLimit>s) ||
((size_t)(targetLimit-t)>(size_t)0x7fffffff && targetLimit>t) ||
(((constchar *)sourceLimit-(constchar *)s) & 1) != 0)
{
*err=U_ILLEGAL_ARGUMENT_ERROR; return;
}
/* output the target overflow buffer */ if( cnv->charErrorBufferLength>0 &&
ucnv_outputOverflowFromUnicode(cnv, target, targetLimit, &offsets, err)
) { /* U_BUFFER_OVERFLOW_ERROR */ return;
} /* *target may have moved, therefore stop using t */
if(!flush && s==sourceLimit && cnv->preFromULength>=0) { /* the overflow buffer is emptied and there is no new input: we are done */ return;
}
/* * Do not simply return with a buffer overflow error if * !flush && t==targetLimit * because it is possible that the source will not generate any output. * For example, the skip callback may be called; * it does not output anything.
*/
/* get the converter implementation function */
sourceIndex=0; if(offsets==nullptr) {
toUnicode=cnv->sharedData->impl->toUnicode;
} else {
toUnicode=cnv->sharedData->impl->toUnicodeWithOffsets; if(toUnicode==nullptr) { /* there is no WithOffsets implementation */
toUnicode=cnv->sharedData->impl->toUnicode; /* we will write -1 for each offset */
sourceIndex=-1;
}
}
if(cnv->preToULength>=0) { /* normal mode */
realSource=nullptr;
/* avoid compiler warnings - not otherwise necessary, and the values do not matter */
realSourceLimit=nullptr;
realFlush=false;
realSourceIndex=0;
} else { /* * Previous m:n conversion stored source units from a partial match * and failed to consume all of them. * We need to "replay" them from a temporary buffer and convert them first.
*/
realSource=pArgs->source;
realSourceLimit=pArgs->sourceLimit;
realFlush=pArgs->flush;
realSourceIndex=sourceIndex;
/* * set a flag for whether the converter * successfully processed the end of the input * * need not check cnv->preToULength==0 because a replay (<0) will cause * s<sourceLimit before converterSawEndOfInput is checked
*/
converterSawEndOfInput= static_cast<UBool>(U_SUCCESS(*err) &&
pArgs->flush && pArgs->source==pArgs->sourceLimit &&
cnv->toULength==0);
} else { /* handle error from getNextUChar() or ucnv_convertEx() */
converterSawEndOfInput=false;
}
/* no callback called yet for this iteration */
calledCallback=false;
/* no sourceIndex adjustment for conversion, only for callback output */
errorInputLength=0;
/* * loop for offsets and error handling * * iterates at most 3 times: * 1. to clean up after the conversion function * 2. after the callback * 3. after the callback again if there was truncated input
*/ for(;;) { /* update offsets if we write any */ if(offsets!=nullptr) {
int32_t length = static_cast<int32_t>(pArgs->target - t); if(length>0) {
_updateOffsets(offsets, length, sourceIndex, errorInputLength);
/* * if a converter handles offsets and updates the offsets * pointer at the end, then pArgs->offset should not change * here; * however, some converters do not handle offsets at all * (sourceIndex<0) or may not update the offsets pointer
*/
pArgs->offsets=offsets+=length;
}
if(cnv->preToULength<0) { /* * switch the source to new replay units (cannot occur while replaying) * after offset handling and before end-of-input and callback handling
*/ if(realSource==nullptr) {
realSource=pArgs->source;
realSourceLimit=pArgs->sourceLimit;
realFlush=pArgs->flush;
realSourceIndex=sourceIndex;
if(U_SUCCESS(*err)) { if(s<pArgs->sourceLimit) { /* * continue with the conversion loop while there is still input left * (continue converting by breaking out of only the inner loop)
*/ break;
} elseif(realSource!=nullptr) { /* switch back from replaying to the real source and continue */
pArgs->source=realSource;
pArgs->sourceLimit=realSourceLimit;
pArgs->flush=realFlush;
sourceIndex=realSourceIndex;
realSource=nullptr; break;
} elseif(pArgs->flush && cnv->toULength>0) { /* * the entire input stream is consumed * and there is a partial, truncated input sequence left
*/
/* inject an error and continue with callback handling */
*err=U_TRUNCATED_CHAR_FOUND;
calledCallback=false; /* new error condition */
} else { /* input consumed */ if(pArgs->flush) { /* * return to the conversion loop once more if the flush * flag is set and the conversion function has not * successfully processed the end of the input yet * * (continue converting by breaking out of only the inner loop)
*/ if(!converterSawEndOfInput) { break;
}
/* reset the converter without calling the callback function */
_reset(cnv, UCNV_RESET_TO_UNICODE, false);
}
/* done successfully */ return;
}
}
/* U_FAILURE(*err) */
{
UErrorCode e;
if( calledCallback ||
(e=*err)==U_BUFFER_OVERFLOW_ERROR ||
(e!=U_INVALID_CHAR_FOUND &&
e!=U_ILLEGAL_CHAR_FOUND &&
e!=U_TRUNCATED_CHAR_FOUND &&
e!=U_ILLEGAL_ESCAPE_SEQUENCE &&
e!=U_UNSUPPORTED_ESCAPE_SEQUENCE)
) { /* * the callback did not or cannot resolve the error: * set output pointers and return * * the check for buffer overflow is redundant but it is * a high-runner case and hopefully documents the intent * well * * if we were replaying, then the replay buffer must be * copied back into the UConverter * and the real arguments must be restored
*/ if(realSource!=nullptr) {
int32_t length;
/* set the converter state to deal with the next character */
cnv->toULength=0;
/* call the callback function */ if(cnv->toUCallbackReason==UCNV_ILLEGAL && *err==U_INVALID_CHAR_FOUND) {
cnv->toUCallbackReason = UCNV_UNASSIGNED;
}
cnv->fromCharErrorBehaviour(cnv->toUContext, pArgs,
cnv->invalidCharBuffer, errorInputLength,
cnv->toUCallbackReason,
err);
cnv->toUCallbackReason = UCNV_ILLEGAL; /* reset to default value */
/* * loop back to the offset handling * * this flag will indicate after offset handling * that a callback was called; * if the callback did not resolve the error, then we return
*/
calledCallback=true;
}
}
}
/* * Output the toUnicode overflow buffer. * Call this function if(cnv->UCharErrorBufferLength>0). * @return true if overflow
*/ static UBool
ucnv_outputOverflowToUnicode(UConverter *cnv,
char16_t **target, const char16_t *targetLimit,
int32_t **pOffsets,
UErrorCode *err) {
int32_t *offsets;
char16_t *overflow, *t;
int32_t i, length;
/* copy the overflow contents to the target */
*t++=overflow[i++]; if(offsets!=nullptr) {
*offsets++=-1; /* no source index available for old output */
}
}
/* the overflow buffer is completely copied to the target */
cnv->UCharErrorBufferLength=0;
*target=t; if(offsets!=nullptr) {
*pOffsets=offsets;
} returnfalse;
}
if ((constvoid *)U_MAX_PTR(targetLimit) == (constvoid *)targetLimit) { /* Prevent code from going into an infinite loop in case we do hit this limit. The limit pointer is expected to be on a char16_t * boundary. This also prevents the next argument check from failing.
*/
targetLimit = (const char16_t *)(((constchar *)targetLimit) - 1);
}
/* * All these conditions should never happen. * * 1) Make sure that the limits are >= to the address source or target * * 2) Make sure that the buffer sizes do not exceed the number range for * int32_t because some functions use the size (in units or bytes) * rather than comparing pointers, and because offsets are int32_t values. * * size_t is guaranteed to be unsigned and large enough for the job. * * Return with an error instead of adjusting the limits because we would * not be able to maintain the semantics that either the source must be * consumed or the target filled (unless an error occurs). * An adjustment would be sourceLimit=t+0x7fffffff; for example. * * 3) Make sure that the user didn't incorrectly cast a char16_t * pointer * to a char * pointer and provide an incomplete char16_t code unit.
*/ if (sourceLimit<s || targetLimit<t ||
((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s) ||
((size_t)(targetLimit-t)>(size_t)0x3fffffff && targetLimit>t) ||
(((constchar *)targetLimit-(constchar *)t) & 1) != 0
) {
*err=U_ILLEGAL_ARGUMENT_ERROR; return;
}
/* output the target overflow buffer */ if( cnv->UCharErrorBufferLength>0 &&
ucnv_outputOverflowToUnicode(cnv, target, targetLimit, &offsets, err)
) { /* U_BUFFER_OVERFLOW_ERROR */ return;
} /* *target may have moved, therefore stop using t */
if(!flush && s==sourceLimit && cnv->preToULength>=0) { /* the overflow buffer is emptied and there is no new input: we are done */ return;
}
/* * Do not simply return with a buffer overflow error if * !flush && t==targetLimit * because it is possible that the source will not generate any output. * For example, the skip callback may be called; * it does not output anything.
*/
/* * Make sure that the buffer sizes do not exceed the number range for * int32_t because some functions use the size (in units or bytes) * rather than comparing pointers, and because offsets are int32_t values. * * size_t is guaranteed to be unsigned and large enough for the job. * * Return with an error instead of adjusting the limits because we would * not be able to maintain the semantics that either the source must be * consumed or the target filled (unless an error occurs). * An adjustment would be sourceLimit=t+0x7fffffff; for example.
*/ if(((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s)) {
*err=U_ILLEGAL_ARGUMENT_ERROR; return 0xffff;
}
overflow=cnv->UCharErrorBuffer;
i=0;
length=cnv->UCharErrorBufferLength;
U16_NEXT(overflow, i, length, c);
/* move the remaining overflow contents up to the beginning */ if((cnv->UCharErrorBufferLength=(int8_t)(length-i))>0) {
uprv_memmove(cnv->UCharErrorBuffer, cnv->UCharErrorBuffer+i,
cnv->UCharErrorBufferLength*U_SIZEOF_UCHAR);
}
if(!U16_IS_LEAD(c) || i<length) { return c;
} /* * Continue if the overflow buffer contained only a lead surrogate, * in case the converter outputs single surrogates from complete * input sequences.
*/
}
/* * flush==true is implied for ucnv_getNextUChar() * * do not simply return even if s==sourceLimit because the converter may * not have seen flush==true before
*/
if(c<0) { /* * call the native getNextUChar() implementation if we are * at a character boundary (toULength==0) * * unlike with _toUnicode(), getNextUChar() implementations must set * U_TRUNCATED_CHAR_FOUND for truncated input, * in addition to setting toULength/toUBytes[]
*/ if(cnv->toULength==0 && cnv->sharedData->impl->getNextUChar!=nullptr) {
c=cnv->sharedData->impl->getNextUChar(&args, err);
*source=s=args.source; if(*err==U_INDEX_OUTOFBOUNDS_ERROR) { /* reset the converter without calling the callback function */
_reset(cnv, UCNV_RESET_TO_UNICODE, false); return 0xffff; /* no output */
} elseif(U_SUCCESS(*err) && c>=0) { return c; /* * else fall through to use _toUnicode() because * UCNV_GET_NEXT_UCHAR_USE_TO_U: the native function did not want to handle it after all * U_FAILURE: call _toUnicode() for callback handling (do not output c)
*/
}
}
/* convert to one char16_t in buffer[0], or handle getNextUChar() errors */
_toUnicodeWithCallback(&args, err);
i=0;
length=(int32_t)(args.target-buffer);
} else { /* write the lead surrogate from the overflow buffer */
buffer[0]=(char16_t)c;
args.target=buffer+1;
i=0;
length=1;
}
/* buffer contents starts at i and ends before length */
if(U_FAILURE(*err)) {
c=0xffff; /* no output */
} elseif(length==0) { /* no input or only state changes */
*err=U_INDEX_OUTOFBOUNDS_ERROR; /* no need to reset explicitly because _toUnicodeWithCallback() did it */
c=0xffff; /* no output */
} else {
c=buffer[0];
i=1; if(!U16_IS_LEAD(c)) { /* consume c=buffer[0], done */
} else { /* got a lead surrogate, see if a trail surrogate follows */
char16_t c2;
if(cnv->UCharErrorBufferLength>0) { /* got overflow output from the conversion */ if(U16_IS_TRAIL(c2=cnv->UCharErrorBuffer[0])) { /* got a trail surrogate, too */
c=U16_GET_SUPPLEMENTARY(c, c2);
/* move the remaining overflow contents up to the beginning */ if((--cnv->UCharErrorBufferLength)>0) {
uprv_memmove(cnv->UCharErrorBuffer, cnv->UCharErrorBuffer+1,
cnv->UCharErrorBufferLength*U_SIZEOF_UCHAR);
}
} else { /* c is an unpaired lead surrogate, just return it */
}
} elseif(args.source<sourceLimit) { /* convert once more, to buffer[1] */
args.targetLimit=buffer+2;
_toUnicodeWithCallback(&args, err); if(*err==U_BUFFER_OVERFLOW_ERROR) {
*err=U_ZERO_ERROR;
}
/* * move leftover output from buffer[i..length[ * into the beginning of the overflow buffer
*/ if(i<length) { /* move further overflow back */
int32_t delta=length-i; if((length=cnv->UCharErrorBufferLength)>0) {
uprv_memmove(cnv->UCharErrorBuffer+delta, cnv->UCharErrorBuffer,
length*U_SIZEOF_UCHAR);
}
cnv->UCharErrorBufferLength=(int8_t)(length+delta);
/* * Make sure that the buffer sizes do not exceed the number range for * int32_t. See ucnv_toUnicode() for a more detailed comment.
*/ if(
(sourceLimit!=nullptr && ((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s)) ||
((size_t)(targetLimit-t)>(size_t)0x7fffffff && targetLimit>t)
) {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; return;
}
if(sourceLimit==nullptr) { /* get limit of single-byte-NUL-terminated source string */
sourceLimit=uprv_strchr(*source, 0);
}
if(reset) {
ucnv_resetToUnicode(sourceCnv);
ucnv_resetFromUnicode(targetCnv);
*pivotSource=*pivotTarget=pivotStart;
} elseif(targetCnv->charErrorBufferLength>0) { /* output the targetCnv overflow buffer */ if(ucnv_outputOverflowFromUnicode(targetCnv, target, targetLimit, nullptr, pErrorCode)) { /* U_BUFFER_OVERFLOW_ERROR */ return;
} /* *target has moved, therefore stop using t */
if( !flush &&
targetCnv->preFromULength>=0 && *pivotSource==*pivotTarget &&
sourceCnv->UCharErrorBufferLength==0 && sourceCnv->preToULength>=0 && s==sourceLimit
) { /* the fromUnicode overflow buffer is emptied and there is no new input: we are done */ return;
}
}
/* * If direct-UTF-8 conversion is available, then we use a smaller * pivot buffer for error handling and partial matches * so that we quickly return to direct conversion. * * 32 is large enough for UCNV_EXT_MAX_UCHARS and UCNV_ERROR_BUFFER_LENGTH. * * We could reduce the pivot buffer size further, at the cost of * buffer overflows from callbacks. * The pivot buffer should not be smaller than the maximum number of * fromUnicode extension table input UChars * (for m:n conversion, see * targetCnv->sharedData->mbcs.extIndexes[UCNV_EXT_COUNT_UCHARS]) * or 2 for surrogate pairs. * * Too small a buffer can cause thrashing between pivoting and direct * conversion, with function call overhead outweighing the benefits * of direct conversion.
*/ if(convert!=nullptr && (pivotLimit-pivotStart)>32) {
pivotLimit=pivotStart+32;
}
/* * TODO: Consider separating this function into two functions, * extracting exactly the conversion loop, * for readability and to reduce the set of visible variables. * * Otherwise stop using s and t from here on.
*/
s=t=nullptr;
/* * conversion loop * * The sequence of steps in the loop may appear backward, * but the principle is simple: * In the chain of * source - sourceCnv overflow - pivot - targetCnv overflow - target * empty out later buffers before refilling them from earlier ones. * * The targetCnv overflow buffer is flushed out only once before the loop.
*/ for(;;) { /* * if(pivot not empty or error or replay or flush fromUnicode) { * fromUnicode(pivot -> target); * } * * For pivoting conversion; and for direct conversion for * error callback handling and flushing the replay buffer.
*/ if( *pivotSource<*pivotTarget ||
U_FAILURE(*pErrorCode) ||
targetCnv->preFromULength<0 ||
fromUArgs.flush
) {
fromUArgs.source=*pivotSource;
fromUArgs.sourceLimit=*pivotTarget;
_fromUnicodeWithCallback(&fromUArgs, pErrorCode); if(U_FAILURE(*pErrorCode)) { /* target overflow, or conversion error */
*pivotSource=(char16_t *)fromUArgs.source; break;
}
/* * _fromUnicodeWithCallback() must have consumed the pivot contents * (*pivotSource==*pivotTarget) since it returned with U_SUCCESS()
*/
}
/* The pivot buffer is empty; reset it so we start at pivotStart. */
*pivotSource=*pivotTarget=pivotStart;
/* * check for end of input and break if done * * Checking both flush and fromUArgs.flush ensures that the converters * have been called with the flush flag set if the ucnv_convertEx() * caller set it.
*/ if( toUArgs.source==sourceLimit &&
sourceCnv->preToULength>=0 && sourceCnv->toULength==0 &&
(!flush || fromUArgs.flush)
) { /* done successfully */ break;
}
/* * use direct conversion if available * but not if continuing a partial match * or flushing the toUnicode replay buffer
*/ if(convert!=nullptr && targetCnv->preFromUFirstCP<0 && sourceCnv->preToULength==0) { if(*pErrorCode==U_USING_DEFAULT_WARNING) { /* remove a warning that may be set by this function */
*pErrorCode=U_ZERO_ERROR;
}
convert(&fromUArgs, &toUArgs, pErrorCode); if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { break;
} elseif(U_FAILURE(*pErrorCode)) { if(sourceCnv->toULength>0) { /* * Fall through to calling _toUnicodeWithCallback() * for callback handling. * * The pivot buffer will be reset with * *pivotSource=*pivotTarget=pivotStart; * which indicates a toUnicode error to the caller * (*pivotSource==pivotStart shows no pivot UChars consumed).
*/
} else { /* * Indicate a fromUnicode error to the caller * (*pivotSource>pivotStart shows some pivot UChars consumed).
*/
*pivotSource=*pivotTarget=pivotStart+1; /* * Loop around to calling _fromUnicodeWithCallbacks() * for callback handling.
*/ continue;
}
} elseif(*pErrorCode==U_USING_DEFAULT_WARNING) { /* * No error, but the implementation requested to temporarily * fall back to pivoting.
*/
*pErrorCode=U_ZERO_ERROR; /* * The following else branches are almost identical to the end-of-input * handling in _toUnicodeWithCallback(). * Avoid calling it just for the end of input.
*/
} elseif(flush && sourceCnv->toULength>0) { /* flush==toUArgs.flush */ /* * the entire input stream is consumed * and there is a partial, truncated input sequence left
*/
/* inject an error and continue with callback handling */
*pErrorCode=U_TRUNCATED_CHAR_FOUND;
} else { /* input consumed */ if(flush) { /* reset the converters without calling the callback functions */
_reset(sourceCnv, UCNV_RESET_TO_UNICODE, false);
_reset(targetCnv, UCNV_RESET_FROM_UNICODE, false);
}
/* done successfully */ break;
}
}
/* * toUnicode(source -> pivot); * * For pivoting conversion; and for direct conversion for * error callback handling, continuing partial matches * and flushing the replay buffer. * * The pivot buffer is empty and reset.
*/
toUArgs.target=pivotStart; /* ==*pivotTarget */ /* toUArgs.targetLimit=pivotLimit; already set before the loop */
_toUnicodeWithCallback(&toUArgs, pErrorCode);
*pivotTarget=toUArgs.target; if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { /* pivot overflow: continue with the conversion loop */
*pErrorCode=U_ZERO_ERROR;
} elseif(U_FAILURE(*pErrorCode) || (!flush && *pivotTarget==pivotStart)) { /* conversion error, or there was nothing left to convert */ break;
} /* * else: * _toUnicodeWithCallback() wrote into the pivot buffer, * continue with fromUnicode conversion. * * Set the fromUnicode flush flag if we flush and if toUnicode has * processed the end of the input.
*/ if( flush && toUArgs.source==sourceLimit &&
sourceCnv->preToULength>=0 &&
sourceCnv->UCharErrorBufferLength==0
) {
fromUArgs.flush=true;
}
}
/* * The conversion loop is exited when one of the following is true: * - the entire source text has been converted successfully to the target buffer * - a target buffer overflow occurred * - a conversion error occurred
*/
*source=toUArgs.source;
*target=fromUArgs.target;
/* terminate the target buffer if possible */ if(flush && U_SUCCESS(*pErrorCode)) { if(*target!=targetLimit) {
**target=0; if(*pErrorCode==U_STRING_NOT_TERMINATED_WARNING) {
*pErrorCode=U_ZERO_ERROR;
}
} else {
*pErrorCode=U_STRING_NOT_TERMINATED_WARNING;
}
}
}
/* * If the output buffer is exhausted (or we are only "preflighting"), we need to stop writing * to it but continue the conversion in order to store in targetCapacity * the number of bytes that was required.
*/ if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR || targetCapacity==0)
{ char targetBuffer[CHUNK_SIZE];
/* initial 0xa5 bytes: make sure that if we read <SIG_MAX_LEN * bytes we don't misdetect something
*/ char start[SIG_MAX_LEN]={ '\xa5', '\xa5', '\xa5', '\xa5', '\xa5' }; int i = 0;
if (cnv == nullptr) {
*status = U_ILLEGAL_ARGUMENT_ERROR; returnfalse;
}
switch (ucnv_getType(cnv)) { case UCNV_SBCS: case UCNV_DBCS: case UCNV_UTF32_BigEndian: case UCNV_UTF32_LittleEndian: case UCNV_UTF32: case UCNV_US_ASCII: returntrue; default: returnfalse;
}
} #endif
/* * Hey, Emacs, please set the following: * * Local Variables: * indent-tabs-mode: nil * End: *
*/
Messung V0.5 in Prozent
¤ Dauer der Verarbeitung: 0.48 Sekunden
(vorverarbeitet am 2026-05-07)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.