/** * The ESCAPE_BYTE character is used during run-length encoding. It signals * a run of identical bytes.
*/ staticconst uint8_t ESCAPE_BYTE = (uint8_t)0xA5;
/** * Append a byte to the given StringBuffer, packing two bytes into each * character. The state parameter maintains intermediary data between * calls. * @param state A two-element array, with state[0] == 0 if this is the * first byte of a pair, or state[0] != 0 if this is the second byte * of a pair, in which case state[1] is the first byte.
*/ static uint16_t*
appendEncodedByte(uint16_t* buffer, uint16_t* buffLimit, uint8_t value, uint8_t state[],UErrorCode* status) { if(!status || U_FAILURE(*status)){ return NULL;
} if (state[0] != 0) {
uint16_t c = (uint16_t) ((state[1] << 8) | (((int32_t) value) & 0xFF)); if(buffer < buffLimit){
*buffer++ = c;
}else{
*status = U_BUFFER_OVERFLOW_ERROR;
}
state[0] = 0; return buffer;
} else {
state[0] = 1;
state[1] = value; return buffer;
}
} /** * Encode a run, possibly a degenerate run (of < 4 values). * @param length The length of the run; must be > 0 && <= 0xFF.
*/ static uint16_t*
encodeRunByte(uint16_t* buffer,uint16_t* bufLimit, uint8_t value, int32_t length, uint8_t state[], UErrorCode* status) { if(!status || U_FAILURE(*status)){ return NULL;
} if (length < 4) {
int32_t j=0; for (; j<length; ++j) { if (value == ESCAPE_BYTE) {
buffer = appendEncodedByte(buffer,bufLimit, ESCAPE_BYTE, state,status);
}
buffer = appendEncodedByte(buffer,bufLimit, value, state, status);
}
} else { if (length == ESCAPE_BYTE) { if (value == ESCAPE_BYTE){
buffer = appendEncodedByte(buffer, bufLimit,ESCAPE_BYTE, state,status);
}
buffer = appendEncodedByte(buffer,bufLimit, value, state, status);
--length;
}
buffer = appendEncodedByte(buffer,bufLimit, ESCAPE_BYTE, state,status);
buffer = appendEncodedByte(buffer,bufLimit, (char)length, state, status);
buffer = appendEncodedByte(buffer,bufLimit, value, state, status); /* Don't need to escape this value*/
} return buffer;
}
/** * Encode a run, possibly a degenerate run (of < 4 values). * @param length The length of the run; must be > 0 && <= 0xFFFF.
*/ static uint16_t*
encodeRunShort(uint16_t* buffer,uint16_t* bufLimit, uint16_t value, int32_t length,UErrorCode* status) { if (length < 4) { int j=0; for (; j<length; ++j) { if (value == (int32_t) ESCAPE){
APPEND(buffer,bufLimit,ESCAPE, status);
}
APPEND(buffer,bufLimit,value,status);
}
} else { if (length == (int32_t) ESCAPE) { if (value == (int32_t) ESCAPE){
APPEND(buffer,bufLimit,ESCAPE,status);
}
APPEND(buffer,bufLimit,value,status);
--length;
}
APPEND(buffer,bufLimit,ESCAPE,status);
APPEND(buffer,bufLimit,(uint16_t) length,status);
APPEND(buffer,bufLimit,(uint16_t)value, status); /* Don't need to escape this value */
} return buffer;
}
/** * Construct a string representing a char array. Use run-length encoding. * A character represents itself, unless it is the ESCAPE character. Then * the following notations are possible: * ESCAPE ESCAPE ESCAPE literal * ESCAPE n c n instances of character c * Since an encoded run occupies 3 characters, we only encode runs of 4 or * more characters. Thus we have n > 0 and n != ESCAPE and n <= 0xFFFF. * If we encounter a run where n == ESCAPE, we represent this as: * c ESCAPE n-1 c * The ESCAPE value is chosen so as not to collide with commonly * seen values.
*/
int32_t
usArrayToRLEString(const uint16_t* src,int32_t srcLen,uint16_t* buffer, int32_t bufLen,UErrorCode* status) {
uint16_t* bufLimit = buffer+bufLen;
uint16_t* saveBuffer = buffer; if(buffer < bufLimit){
*buffer++ = (uint16_t)(srcLen>>16); if(buffer<bufLimit){
uint16_t runValue = src[0];
int32_t runLength = 1; int i=1;
*buffer++ = (uint16_t) srcLen;
/** * Construct a string representing a byte array. Use run-length encoding. * Two bytes are packed into a single char, with a single extra zero byte at * the end if needed. A byte represents itself, unless it is the * ESCAPE_BYTE. Then the following notations are possible: * ESCAPE_BYTE ESCAPE_BYTE ESCAPE_BYTE literal * ESCAPE_BYTE n b n instances of byte b * Since an encoded run occupies 3 bytes, we only encode runs of 4 or * more bytes. Thus we have n > 0 and n != ESCAPE_BYTE and n <= 0xFF. * If we encounter a run where n == ESCAPE_BYTE, we represent this as: * b ESCAPE_BYTE n-1 b * The ESCAPE_BYTE value is chosen so as not to collide with commonly * seen values.
*/
int32_t
byteArrayToRLEString(const uint8_t* src,int32_t srcLen, uint16_t* buffer,int32_t bufLen, UErrorCode* status) { const uint16_t* saveBuf = buffer;
uint16_t* bufLimit = buffer+bufLen; if(buffer < bufLimit){
*buffer++ = ((uint16_t) (srcLen >> 16));
/* We must save the final byte, if there is one, by padding * an extra zero.
*/ if (state[0] != 0) {
buffer = appendEncodedByte(buffer,bufLimit, 0, state ,status);
}
}else{
*status = U_BUFFER_OVERFLOW_ERROR;
}
}else{
*status = U_BUFFER_OVERFLOW_ERROR;
} return (int32_t) (buffer - saveBuf);
}
/** * Construct an array of shorts from a run-length encoded string.
*/
int32_t
rleStringToUCharArray(uint16_t* src, int32_t srcLen, uint16_t* target, int32_t tgtLen, UErrorCode* status) {
int32_t length = 0;
int32_t ai = 0; int i=2;
for (; ai<tgtLen; ) { /* This part of the loop places the next byte into the local * variable 'b' each time through the loop. It keeps the * current character in 'c' and uses the boolean 'nextChar' * to see if we've taken both bytes out of 'c' yet.
*/
uint8_t b; if (nextChar) {
c = src[i++];
b = (uint8_t) (c >> 8);
nextChar = false;
} else {
b = (uint8_t) (c & 0xFF);
nextChar = true;
}
/* This part of the loop is a tiny state machine which handles * the parsing of the run-length encoding. This would be simpler * if we could look ahead, but we can't, so we use 'node' to * move between three nodes in the state machine.
*/ switch (node) { case 0: /* Normal idle node */ if (b == ESCAPE_BYTE) {
node = 1;
} else {
target[ai++] = b;
} break; case 1: /* We have seen one ESCAPE_BYTE; we expect either a second * one, or a run length and value.
*/ if (b == ESCAPE_BYTE) {
target[ai++] = ESCAPE_BYTE;
node = 0;
} else {
runLength = b;
node = 2;
} break; case 2:
{ int j=0; /* We have seen an ESCAPE_BYTE and length byte. We interpret * the next byte as the value to be repeated.
*/ for (; j<runLength; ++j){ if(ai<tgtLen){
target[ai++] = b;
}else{
*status = U_BUFFER_OVERFLOW_ERROR; return ai;
}
}
node = 0; break;
}
}
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.