/* Restore size of current sequence */ if (cnv->toULength > 0 && myTarget < targetLimit)
{
inBytes = cnv->mode; /* restore # of bytes to consume */
i = cnv->toULength; /* restore # of bytes consumed */
cnv->toULength = 0;
ch = cnv->toUnicodeStatus;/*Stores the previously calculated ch from a previous call*/
cnv->toUnicodeStatus = 0; goto morebytes;
}
while (mySource < sourceLimit && myTarget < targetLimit)
{
ch = *(mySource++); if (U8_IS_SINGLE(ch)) /* Simple case */
{
*(myTarget++) = (char16_t) ch;
} else
{ /* store the first char */
toUBytes[0] = (char)ch;
inBytes = U8_COUNT_BYTES_NON_ASCII(ch); /* lookup current sequence length */
i = 1;
morebytes: while (i < inBytes)
{ if (mySource < sourceLimit)
{
toUBytes[i] = (char) (ch2 = *mySource); if (!icu::UTF8::isValidTrail(ch, static_cast<uint8_t>(ch2), i, inBytes) &&
!(isCESU8 && i == 1 && ch == 0xed && U8_IS_TRAIL(ch2)))
{ break; /* i < inBytes */
}
ch = (ch << 6) + ch2;
++mySource;
i++;
} else
{ /* stores a partially calculated target*/
cnv->toUnicodeStatus = ch;
cnv->mode = inBytes;
cnv->toULength = (int8_t) i; goto donefornow;
}
}
// In CESU-8, only surrogates, not supplementary code points, are encoded directly. if (i == inBytes && (!isCESU8 || i <= 3))
{ /* Remove the accumulated high bits */
ch -= offsetsFromUTF8[inBytes];
/* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */ if (ch <= MAXIMUM_UCS2)
{ /* fits in 16 bits */
*(myTarget++) = (char16_t) ch;
} else
{ /* write out the surrogates */
*(myTarget++) = U16_LEAD(ch);
ch = U16_TRAIL(ch); if (myTarget < targetLimit)
{
*(myTarget++) = (char16_t)ch;
} else
{ /* Put in overflow buffer (not handled here) */
cnv->UCharErrorBuffer[0] = (char16_t) ch;
cnv->UCharErrorBufferLength = 1;
*err = U_BUFFER_OVERFLOW_ERROR; break;
}
}
} else
{
cnv->toULength = (int8_t)i;
*err = U_ILLEGAL_CHAR_FOUND; break;
}
}
}
donefornow: if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err))
{ /* End of target buffer */
*err = U_BUFFER_OVERFLOW_ERROR;
}
/* Restore size of current sequence */ if (cnv->toULength > 0 && myTarget < targetLimit)
{
inBytes = cnv->mode; /* restore # of bytes to consume */
i = cnv->toULength; /* restore # of bytes consumed */
cnv->toULength = 0;
ch = cnv->toUnicodeStatus;/*Stores the previously calculated ch from a previous call*/
cnv->toUnicodeStatus = 0; goto morebytes;
}
while (mySource < sourceLimit && myTarget < targetLimit)
{
ch = *(mySource++); if (U8_IS_SINGLE(ch)) /* Simple case */
{
*(myTarget++) = (char16_t) ch;
*(myOffsets++) = offsetNum++;
} else
{
toUBytes[0] = (char)ch;
inBytes = U8_COUNT_BYTES_NON_ASCII(ch);
i = 1;
morebytes: while (i < inBytes)
{ if (mySource < sourceLimit)
{
toUBytes[i] = (char) (ch2 = *mySource); if (!icu::UTF8::isValidTrail(ch, static_cast<uint8_t>(ch2), i, inBytes) &&
!(isCESU8 && i == 1 && ch == 0xed && U8_IS_TRAIL(ch2)))
{ break; /* i < inBytes */
}
ch = (ch << 6) + ch2;
++mySource;
i++;
} else
{
cnv->toUnicodeStatus = ch;
cnv->mode = inBytes;
cnv->toULength = (int8_t)i; goto donefornow;
}
}
// In CESU-8, only surrogates, not supplementary code points, are encoded directly. if (i == inBytes && (!isCESU8 || i <= 3))
{ /* Remove the accumulated high bits */
ch -= offsetsFromUTF8[inBytes];
/* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */ if (ch <= MAXIMUM_UCS2)
{ /* fits in 16 bits */
*(myTarget++) = (char16_t) ch;
*(myOffsets++) = offsetNum;
} else
{ /* write out the surrogates */
*(myTarget++) = U16_LEAD(ch);
*(myOffsets++) = offsetNum;
ch = U16_TRAIL(ch); if (myTarget < targetLimit)
{
*(myTarget++) = (char16_t)ch;
*(myOffsets++) = offsetNum;
} else
{
cnv->UCharErrorBuffer[0] = (char16_t) ch;
cnv->UCharErrorBufferLength = 1;
*err = U_BUFFER_OVERFLOW_ERROR;
}
}
offsetNum += i;
} else
{
cnv->toULength = (int8_t)i;
*err = U_ILLEGAL_CHAR_FOUND; break;
}
}
}
donefornow: if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err))
{ /* End of target buffer */
*err = U_BUFFER_OVERFLOW_ERROR;
}
if (ch < 0x80) /* Single byte */
{
*(myTarget++) = (uint8_t) ch;
} elseif (ch < 0x800) /* Double byte */
{
*(myTarget++) = (uint8_t) ((ch >> 6) | 0xc0); if (myTarget < targetLimit)
{
*(myTarget++) = (uint8_t) ((ch & 0x3f) | 0x80);
} else
{
cnv->charErrorBuffer[0] = (uint8_t) ((ch & 0x3f) | 0x80);
cnv->charErrorBufferLength = 1;
*err = U_BUFFER_OVERFLOW_ERROR;
}
} else { /* Check for surrogates */ if(U16_IS_SURROGATE(ch) && isNotCESU8) {
lowsurrogate: if (mySource < sourceLimit) { /* test both code units */ if(U16_IS_SURROGATE_LEAD(ch) && U16_IS_TRAIL(*mySource)) { /* convert and consume this supplementary code point */
ch=U16_GET_SUPPLEMENTARY(ch, *mySource);
++mySource; /* exit this condition tree */
} else { /* this is an unpaired trail or lead code unit */ /* callback(illegal) */
cnv->fromUChar32 = ch;
*err = U_ILLEGAL_CHAR_FOUND; break;
}
} else { /* no more input */
cnv->fromUChar32 = ch; break;
}
}
/* Do we write the buffer directly for speed,
or do we have to be careful about target buffer space? */
tempPtr = (((targetLimit - myTarget) >= 4) ? myTarget : tempBuf);
if(U16_IS_SURROGATE(ch) && isNotCESU8) {
lowsurrogate: if (mySource < sourceLimit) { /* test both code units */ if(U16_IS_SURROGATE_LEAD(ch) && U16_IS_TRAIL(*mySource)) { /* convert and consume this supplementary code point */
ch=U16_GET_SUPPLEMENTARY(ch, *mySource);
++mySource;
++nextSourceIndex; /* exit this condition tree */
} else { /* this is an unpaired trail or lead code unit */ /* callback(illegal) */
cnv->fromUChar32 = ch;
*err = U_ILLEGAL_CHAR_FOUND; break;
}
} else { /* no more input */
cnv->fromUChar32 = ch; break;
}
}
/* Do we write the buffer directly for speed,
or do we have to be careful about target buffer space? */
tempPtr = (((targetLimit - myTarget) >= 4) ? myTarget : tempBuf);
/*The byte sequence is longer than the buffer area passed*/ if (((constchar *)source + countTrailBytes) > args->sourceLimit)
{ /* check if all of the remaining bytes are trail bytes */
uint16_t extraBytesToWrite = countTrailBytes + 1;
cnv->toUBytes[0] = myByte;
i = 1;
*err = U_TRUNCATED_CHAR_FOUND; while(source < (const uint8_t *)args->sourceLimit) {
uint8_t b = *source; if(icu::UTF8::isValidTrail(myByte, b, i, extraBytesToWrite)) {
cnv->toUBytes[i++] = b;
++source;
} else { /* error even before we run out of input */
*err = U_ILLEGAL_CHAR_FOUND; break;
}
}
cnv->toULength = i;
args->source = (constchar *)source; return 0xffff;
}
/* set up the local pointers */
utf8=pToUArgs->converter;
source=(uint8_t *)pToUArgs->source;
sourceLimit=(uint8_t *)pToUArgs->sourceLimit;
target=(uint8_t *)pFromUArgs->target;
targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target);
/* get the converter state from the UTF-8 UConverter */ if(utf8->toULength > 0) {
toULength=oldToULength=utf8->toULength;
toULimit=(int8_t)utf8->mode;
c=(UChar32)utf8->toUnicodeStatus;
} else {
toULength=oldToULength=toULimit=0;
c = 0;
}
count=(int32_t)(sourceLimit-source)+oldToULength; if(count<toULimit) { /* * Not enough input to complete the partial character. * Jump to moreBytes below - it will not output to target.
*/
} elseif(targetCapacity<toULimit) { /* * Not enough target capacity to output the partial character. * Let the standard converter handle this.
*/
*pErrorCode=U_USING_DEFAULT_WARNING; return;
} else { // Use a single counter for source and target, counting the minimum of // the source length and the target capacity. // Let the standard converter handle edge cases. if(count>targetCapacity) {
count=targetCapacity;
}
// The conversion loop checks count>0 only once per character. // If the buffer ends with a truncated sequence, // then we reduce the count to stop before that, // and collect the remaining bytes after the conversion loop.
// Do not go back into the bytes that will be read for finishing a partial // sequence from the previous buffer.
int32_t length=count-toULength;
U8_TRUNCATE_IF_INCOMPLETE(source, 0, length);
count=toULength+length;
}
if(c!=0) {
utf8->toUnicodeStatus=0;
utf8->toULength=0; goto moreBytes; /* See note in ucnv_SBCSFromUTF8() about this goto. */
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.