/* it is official IDNA ACE Prefix is "xn--" */ staticconst char16_t ACE_PREFIX[] ={ 0x0078,0x006E,0x002d,0x002d } ; #define ACE_PREFIX_LENGTH 4
#define MAX_LABEL_LENGTH 63 /* The Max length of the labels should not be more than MAX_LABEL_LENGTH */ #define MAX_LABEL_BUFFER_SIZE 100
#define MAX_DOMAIN_NAME_LENGTH 255 /* The Max length of the domain names should not be more than MAX_DOMAIN_NAME_LENGTH */ #define MAX_IDN_BUFFER_SIZE MAX_DOMAIN_NAME_LENGTH+1
/** * Ascertain if the given code point is a label separator as * defined by the IDNA RFC * * @param ch The code point to be ascertained * @return true if the char is a label separator * @stable ICU 2.8
*/ staticinline UBool isLabelSeparator(char16_t ch){ switch(ch){ case 0x002e: case 0x3002: case 0xFF0E: case 0xFF61: returntrue; default: returnfalse;
}
}
// returns the length of the label excluding the separator // if *limit == separator then the length returned does not include // the separtor. staticinline int32_t
getNextSeparator(char16_t *src, int32_t srcLength,
char16_t **limit, UBool *done){ if(srcLength == -1){
int32_t i; for(i=0 ; ;i++){ if(src[i] == 0){
*limit = src + i; // point to null
*done = true; return i;
} if(isLabelSeparator(src[i])){
*limit = src + (i+1); // go past the delimiter return i;
}
}
}else{
int32_t i; for(i=0;i<srcLength;i++){ if(isLabelSeparator(src[i])){
*limit = src + (i+1); // go past the delimiter return i;
}
} // we have not found the delimiter // if(i==srcLength)
*limit = src+srcLength;
*done = true;
// TODO Revisit buffer handling. The label should not be over 63 ASCII characters. ICU4J may need to be updated too.
char16_t b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE]; //initialize pointers to stack buffers
char16_t *b1 = b1Stack, *b2 = b2Stack;
int32_t b1Len=0, b2Len,
b1Capacity = MAX_LABEL_BUFFER_SIZE,
b2Capacity = MAX_LABEL_BUFFER_SIZE ,
reqLength=0;
// for step 3 & 4
srcIsASCII = true; for( j=0;j<b1Len;j++){ // check if output of usprep_prepare is all ASCII if(b1[j] > 0x7F){
srcIsASCII = false;
}elseif(isLDHChar(b1[j])==false){ // if the char is in ASCII range verify that it is an LDH character
srcIsLDH = false;
failPos = j;
}
} if(useSTD3ASCIIRules){ // verify 3a and 3b // 3(a) Verify the absence of non-LDH ASCII code points; that is, the // absence of 0..2C, 2E..2F, 3A..40, 5B..60, and 7B..7F. // 3(b) Verify the absence of leading and trailing hyphen-minus; that // is, the absence of U+002D at the beginning and end of the // sequence. if( srcIsLDH == false/* source at this point should not contain anyLDH characters */
|| b1[0] == HYPHEN || b1[b1Len-1] == HYPHEN){
*status = U_IDNA_STD3_ASCII_RULES_ERROR;
/* populate the parseError struct */ if(srcIsLDH==false){ // failPos is always set the index of failure
uprv_syntaxError(b1,failPos, b1Len,parseError);
}elseif(b1[0] == HYPHEN){ // fail position is 0
uprv_syntaxError(b1,0,b1Len,parseError);
}else{ // the last index in the source is always length-1
uprv_syntaxError(b1, (b1Len>0) ? b1Len-1 : b1Len, b1Len,parseError);
}
goto CLEANUP;
}
} // Step 4: if the source is ASCII then proceed to step 8 if(srcIsASCII){ if(b1Len <= destCapacity){
u_memmove(dest, b1, b1Len);
reqLength = b1Len;
}else{
reqLength = b1Len; goto CLEANUP;
}
}else{ // step 5 : verify the sequence does not begin with ACE prefix if(!startsWithPrefix(b1,b1Len)){
//step 6: encode the sequence with punycode
// do not preserve the case flags for now! // TODO: Preserve the case while implementing the RFE // caseFlags = (UBool*) uprv_malloc(b1Len * sizeof(UBool)); // uprv_memset(caseFlags,true,b1Len);
if(*status == U_BUFFER_OVERFLOW_ERROR){ // redo processing of string /* we do not have enough room so grow the buffer*/
b2 = static_cast<char16_t*>(uprv_malloc(b2Len * U_SIZEOF_UCHAR)); if(b2 == nullptr){
*status = U_MEMORY_ALLOCATION_ERROR; goto CLEANUP;
}
*status = U_ZERO_ERROR; // reset error
b2Len = u_strToPunycode(b1,b1Len,b2,b2Len,caseFlags, status);
} //error bail out if(U_FAILURE(*status)){ goto CLEANUP;
} // TODO : Reconsider while implementing the case preserve RFE // convert all codepoints to lower case ASCII // toASCIILower(b2,b2Len);
reqLength = b2Len+ACE_PREFIX_LENGTH;
if(reqLength > destCapacity){
*status = U_BUFFER_OVERFLOW_ERROR; goto CLEANUP;
} //Step 7: prepend the ACE prefix
u_memcpy(dest, ACE_PREFIX, ACE_PREFIX_LENGTH); //Step 6: copy the contents in b2 into dest
u_memcpy(dest+ACE_PREFIX_LENGTH, b2, b2Len);
}else{
*status = U_IDNA_ACE_PREFIX_ERROR; //position of failure is 0
uprv_syntaxError(b1,0,b1Len,parseError); goto CLEANUP;
}
} // step 8: verify the length of label if(reqLength > MAX_LABEL_LENGTH){
*status = U_IDNA_LABEL_TOO_LONG_ERROR;
}
// TODO Revisit buffer handling. The label should not be over 63 ASCII characters. ICU4J may need to be updated too.
char16_t b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE], b3Stack[MAX_LABEL_BUFFER_SIZE];
// step 1: find out if all the codepoints in src are ASCII if(srcLength==-1){
srcLength = 0; for(;src[srcLength]!=0;){ if(src[srcLength]> 0x7f){
srcIsASCII = false;
}/*else if(isLDHChar(src[srcLength])==false){ // here we do not assemble surrogates // since we know that LDH code points // are in the ASCII range only srcIsLDH = false; failPos = srcLength;
}*/
srcLength++;
}
}elseif(srcLength > 0){ for(int32_t j=0; j<srcLength; j++){ if(src[j]> 0x7f){
srcIsASCII = false; break;
}/*else if(isLDHChar(src[j])==false){ // here we do not assemble surrogates // since we know that LDH code points // are in the ASCII range only srcIsLDH = false; failPos = j;
}*/
}
}else{ return 0;
}
if(srcIsASCII == false){ // step 2: process the string
b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Capacity, namePrepOptions, parseError, status); if(*status == U_BUFFER_OVERFLOW_ERROR){ // redo processing of string /* we do not have enough room so grow the buffer*/
b1 = static_cast<char16_t*>(uprv_malloc(b1Len * U_SIZEOF_UCHAR)); if(b1==nullptr){
*status = U_MEMORY_ALLOCATION_ERROR; goto CLEANUP;
}
*status = U_ZERO_ERROR; // reset error
b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Len, namePrepOptions, parseError, status);
} //bail out on error if(U_FAILURE(*status)){ goto CLEANUP;
}
}else{
//just point src to b1
b1 = const_cast<char16_t*>(src);
b1Len = srcLength;
}
// The RFC states that // <quote> // ToUnicode never fails. If any step fails, then the original input // is returned immediately in that step. // </quote>
if(*status == U_BUFFER_OVERFLOW_ERROR){ // redo processing of string /* we do not have enough room so grow the buffer*/
b2 = static_cast<char16_t*>(uprv_malloc(b2Len * U_SIZEOF_UCHAR)); if(b2==nullptr){
*status = U_MEMORY_ALLOCATION_ERROR; goto CLEANUP;
}
if(*status == U_BUFFER_OVERFLOW_ERROR){ // redo processing of string /* we do not have enough room so grow the buffer*/
b3 = static_cast<char16_t*>(uprv_malloc(b3Len * U_SIZEOF_UCHAR)); if(b3==nullptr){
*status = U_MEMORY_ALLOCATION_ERROR; goto CLEANUP;
}
} //bail out on error if(U_FAILURE(*status)){ goto CLEANUP;
}
//step 7: verify if(compareCaseInsensitiveASCII(b1, b1Len, b3, b3Len) !=0){ // Cause the original to be returned.
*status = U_IDNA_VERIFICATION_ERROR; goto CLEANUP;
}
//step 8: return output of step 5
reqLength = b2Len; if(b2Len <= destCapacity) {
u_memmove(dest, b2, b2Len);
}
} else{ // See the start of this if statement for why this is commented out. // verify that STD3 ASCII rules are satisfied /*if(useSTD3ASCIIRules == true){ if( srcIsLDH == false // source contains some non-LDH characters || src[0] == HYPHEN || src[srcLength-1] == HYPHEN){ *status = U_IDNA_STD3_ASCII_RULES_ERROR;
// populate the parseError struct if(srcIsLDH==false){ // failPos is always set the index of failure uprv_syntaxError(src,failPos, srcLength,parseError); }else if(src[0] == HYPHEN){ // fail position is 0 uprv_syntaxError(src,0,srcLength,parseError); }else{ // the last index in the source is always length-1 uprv_syntaxError(src, (srcLength>0) ? srcLength-1 : srcLength, srcLength,parseError); }
goto CLEANUP; }
}*/ // just return the source //copy the source to destination if(srcLength <= destCapacity){
u_memmove(dest, src, srcLength);
}
reqLength = srcLength;
}
// The RFC states that // <quote> // ToUnicode never fails. If any step fails, then the original input // is returned immediately in that step. // </quote> // So if any step fails lets copy source to destination if(U_FAILURE(*status)){ //copy the source to destination if(dest && srcLength <= destCapacity){ // srcLength should have already been set earlier.
U_ASSERT(srcLength >= 0);
u_memmove(dest, src, srcLength);
}
reqLength = srcLength;
*status = U_ZERO_ERROR;
}
labelLen = getNextSeparator(labelStart,remainingLen, &delimiter,&done);
labelReqLength = 0; if(!(labelLen==0 && done)){// make sure this is not a root label separator.
// The RFC states that // <quote> // ToUnicode never fails. If any step fails, then the original input // is returned immediately in that step. // </quote> // _internal_toUnicode will copy the label. /*if(labelLen==0 && done==false){ *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR; break;
}*/
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.