/* * Set up the UNewData and write the converter..
*/ staticvoid
writeConverterData(ConvData *data, constchar *cnvName, constchar *cnvDir, UErrorCode *status);
int main(int argc, char* argv[])
{
ConvData data; char cnvName[UCNV_MAX_FULL_FILE_NAME_LENGTH];
U_MAIN_INIT_ARGS(argc, argv);
/* Set up the ICU version number */
UVersionInfo icuVersion;
u_getVersion(icuVersion);
uprv_memcpy(&dataInfo.dataVersion, &icuVersion, sizeof(UVersionInfo));
/* preset then read command line options */
options[OPT_DESTDIR].value=u_getDataDirectory();
argc=u_parseArgs(argc, argv, UPRV_LENGTHOF(options), options);
if(options[OPT_VERSION].doesOccur) {
printf("makeconv version %u.%u, ICU tool to read .ucm codepage mapping files and write .cnv files\n",
dataInfo.formatVersion[0], dataInfo.formatVersion[1]);
printf("%s\n", U_COPYRIGHT_STRING); exit(0);
}
/* error handling, printing usage message */ if(argc<0) {
fprintf(stderr, "error in command line argument \"%s\"\n",
argv[-argc]);
} elseif(argc<2) {
argc=-1;
} if(argc<0 || options[OPT_HELP_H].doesOccur || options[OPT_HELP_QUESTION_MARK].doesOccur) {
FILE *stdfile=argc<0 ? stderr : stdout;
fprintf(stdfile, "usage: %s [-options] files...\n" "\tread .ucm codepage mapping files and write .cnv files\n" "options:\n" "\t-h or -? or --help this usage text\n" "\t-V or --version show a version message\n" "\t-c or --copyright include a copyright notice\n" "\t-d or --destdir destination directory, followed by the path\n" "\t-v or --verbose Turn on verbose output\n" "\t-q or --quiet do not display warnings and progress\n" "\t-s or --sourcedir source directory, followed by the path\n",
argv[0]);
fprintf(stdfile, "\t --small Generate smaller .cnv files. They will be\n" "\t significantly smaller but may not be compatible with\n" "\t older versions of ICU and will require heap memory\n" "\t allocation when loaded.\n" "\t --ignore-siso-check Use SI/SO other than 0xf/0xe.\n"); return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
}
/* get the options values */
haveCopyright = options[OPT_COPYRIGHT].doesOccur; constchar *destdir = options[OPT_DESTDIR].value;
VERBOSE = options[OPT_VERBOSE].doesOccur;
QUIET = options[OPT_QUIET].doesOccur;
SMALL = options[OPT_SMALL].doesOccur;
if (options[OPT_IGNORE_SISO_CHECK].doesOccur) {
IGNORE_SISO_CHECK = true;
}
/*produces the right destination path for display*/
outFileName.truncate(outBasenameStart); if (outBasenameStart != 0)
{ /* find the last file sepator */ constchar *basename = findBasename(arg);
outFileName.append(basename, localError);
} else
{
outFileName.append(arg, localError);
} if (U_FAILURE(localError)) { return localError;
}
/*removes the extension if any is found*/
int32_t lastDotIndex = outFileName.lastIndexOf('.'); if (lastDotIndex >= outBasenameStart) {
outFileName.truncate(lastDotIndex);
}
/* the basename without extension is the converter name */ if ((outFileName.length() - outBasenameStart) >= UPRV_LENGTHOF(cnvName)) {
fprintf(stderr, "converter name %s too long\n", outFileName.data() + outBasenameStart); return U_BUFFER_OVERFLOW_ERROR;
}
uprv_strcpy(cnvName, outFileName.data() + outBasenameStart);
/*Adds the target extension*/
outFileName.append(CONVERTER_FILE_EXTENSION, localError); if (U_FAILURE(localError)) { return localError;
}
if (U_FAILURE(localError))
{ /* if an error is found, print out an error msg and keep going */
fprintf(stderr, "Error creating converter for \"%s\" file for \"%s\" (%s)\n",
outFileName.data(), arg, u_errorName(localError)); if(U_SUCCESS(err)) {
err = localError;
}
} else
{ /* Insure the static data name matches the file name */ /* Changed to ignore directory and only compare base name
LDH 1/2/08*/ char *p;
p = strrchr(cnvName, U_FILE_SEP_CHAR); /* Find last file separator */
if(p == nullptr) /* OK, try alternate */
{
p = strrchr(cnvName, U_FILE_ALT_SEP_CHAR); if(p == nullptr)
{
p=cnvName; /* If no separators, no problem */
}
} else
{
p++; /* If found separator, don't include it in compare */
} if(uprv_stricmp(p,data.staticData.name) && !QUIET)
{
fprintf(stderr, "Warning: %s%s claims to be '%s'\n",
cnvName, CONVERTER_FILE_EXTENSION,
data.staticData.name);
}
if (strlen(cnvName) + 1 > UPRV_LENGTHOF(data.staticData.name)) {
fprintf(stderr, "converter name %s too long\n", cnvName); return U_BUFFER_OVERFLOW_ERROR;
}
uprv_strcpy((char*)data.staticData.name, cnvName);
if(!uprv_isInvariantString((char*)data.staticData.name, -1)) {
fprintf(stderr, "Error: A converter name must contain only invariant characters.\n" "%s is not a valid converter name.\n",
data.staticData.name); if(U_SUCCESS(err)) {
err = U_INVALID_TABLE_FORMAT;
}
}
/* copy values from the UCMFile to the static data */
staticData->maxBytesPerChar = static_cast<int8_t>(data->ucm->states.maxCharLength);
staticData->minBytesPerChar = static_cast<int8_t>(data->ucm->states.minCharLength);
staticData->conversionType=data->ucm->states.conversionType;
/* * Now that we know the type, copy any 'default' values from the table. * We need not check the type any further because the parser only * recognizes what we have prototypes for. * * For delta (extension-only) tables, copy values from the base file * instead, see createConverter().
*/ if(data->ucm->baseName[0]==0) {
prototype=ucnv_converterStaticData[staticData->conversionType]; if(prototype!=nullptr) { if(staticData->name[0]==0) {
uprv_strcpy((char *)staticData->name, prototype->name);
}
if( staticData->subChar1!=0 &&
(staticData->minBytesPerChar>1 ||
(staticData->conversionType!=UCNV_MBCS &&
staticData->conversionType!=UCNV_EBCDIC_STATEFUL))
) {
fprintf(stderr, "error: defined for a type other than MBCS or EBCDIC_STATEFUL\n");
*pErrorCode=U_INVALID_TABLE_FORMAT;
}
}
/* return true if a base table was read, false for an extension table */ static UBool
readFile(ConvData *data, constchar* converterName,
UErrorCode *pErrorCode) { char line[1024]; char *end;
FileStream *convFile;
if(0==uprv_strcmp(line, "CHARMAP")) { /* read the extension table */
ucm_readTable(data->ucm, convFile, false, baseStates, pErrorCode);
} else {
fprintf(stderr, "unexpected text after the base mapping table\n");
} break;
}
T_FileStream_close(convFile);
if(data->ucm->base->flagsType==UCM_FLAGS_MIXED || data->ucm->ext->flagsType==UCM_FLAGS_MIXED) {
fprintf(stderr, "error: some entries have the mapping precision (with '|'), some do not\n");
*pErrorCode=U_INVALID_TABLE_FORMAT;
}
if(dataIsBase) { /* * Build a normal .cnv file with a base table * and an optional extension table.
*/
data->cnvData=MBCSOpen(data->ucm); if(data->cnvData==nullptr) {
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
} elseif(!data->cnvData->isValid(data->cnvData,
staticData->subChar, staticData->subCharLen)
) {
fprintf(stderr, " the substitution character byte sequence is illegal in this codepage structure!\n");
*pErrorCode=U_INVALID_TABLE_FORMAT;
} elseif(staticData->subChar1!=0 &&
!data->cnvData->isValid(data->cnvData, &staticData->subChar1, 1)
) {
fprintf(stderr, " the subchar1 byte is illegal in this codepage structure!\n");
*pErrorCode=U_INVALID_TABLE_FORMAT;
} elseif(
data->ucm->ext->mappingsLength>0 &&
!ucm_checkBaseExt(states, data->ucm->base, data->ucm->ext, data->ucm->ext, false)
) {
*pErrorCode=U_INVALID_TABLE_FORMAT;
} elseif(data->ucm->base->flagsType&UCM_FLAGS_EXPLICIT) { /* sort the table so that it can be turned into UTF-8-friendly data */
ucm_sortTable(data->ucm->base);
}
if(U_SUCCESS(*pErrorCode)) { if( /* add the base table after ucm_checkBaseExt()! */
!data->cnvData->addTable(data->cnvData, data->ucm->base, &data->staticData)
) {
*pErrorCode=U_INVALID_TABLE_FORMAT;
} else { /* * addTable() may have requested moving more mappings to the extension table * if they fit into the base toUnicode table but not into the * base fromUnicode table. * (Especially for UTF-8-friendly fromUnicode tables.) * Such mappings will have the MBCS_FROM_U_EXT_FLAG set, which causes them * to be excluded from the extension toUnicode data. * See MBCSOkForBaseFromUnicode() for which mappings do not fit into * the base fromUnicode table.
*/
ucm_moveMappings(data->ucm->base, data->ucm->ext);
ucm_sortTable(data->ucm->ext); if(data->ucm->ext->mappingsLength>0) { /* prepare the extension table, if there is one */
data->extData=CnvExtOpen(data->ucm); if(data->extData==nullptr) {
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
} elseif(
!data->extData->addTable(data->extData, data->ucm->ext, &data->staticData)
) {
*pErrorCode=U_INVALID_TABLE_FORMAT;
}
}
}
}
} else { /* Build an extension-only .cnv file. */ char baseFilename[500]; char *basename;
initConvData(&baseData);
/* assemble a path/filename for data->ucm->baseName */
uprv_strcpy(baseFilename, converterName);
basename = const_cast<char*>(findBasename(baseFilename));
uprv_strcpy(basename, data->ucm->baseName);
uprv_strcat(basename, ".ucm");
/* read the base table */
dataIsBase=readFile(&baseData, baseFilename, pErrorCode); if(U_FAILURE(*pErrorCode)) { return;
} elseif(!dataIsBase) {
fprintf(stderr, "error: the file \"%s\" is not a base table file\n", baseFilename);
*pErrorCode=U_INVALID_TABLE_FORMAT;
} else { /* prepare the extension table */
data->extData=CnvExtOpen(data->ucm); if(data->extData==nullptr) {
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
} else { /* fill in gaps in extension file header fields */
UCMapping *m, *mLimit;
uint8_t fallbackFlags;
if(staticData->subCharLen==0) {
uprv_memcpy(staticData->subChar, baseData.staticData.subChar, 4);
staticData->subCharLen=baseData.staticData.subCharLen;
} /* * do not copy subChar1 - * only use what is explicitly specified * because it cannot be unset in the extension file header
*/
if(1!=ucm_countChars(baseStates, staticData->subChar, staticData->subCharLen)) {
fprintf(stderr, " the substitution character byte sequence is illegal in this codepage structure!\n");
*pErrorCode=U_INVALID_TABLE_FORMAT;
} elseif(staticData->subChar1!=0 && 1!=ucm_countChars(baseStates, &staticData->subChar1, 1)) {
fprintf(stderr, " the subchar1 byte is illegal in this codepage structure!\n");
*pErrorCode=U_INVALID_TABLE_FORMAT;
} elseif(
!ucm_checkValidity(data->ucm->ext, baseStates) ||
!ucm_checkBaseExt(baseStates, baseData.ucm->base, data->ucm->ext, data->ucm->ext, false)
) {
*pErrorCode=U_INVALID_TABLE_FORMAT;
} else { if(states->maxCharLength>1) { /* * When building a normal .cnv file with a base table * for an MBCS (not SBCS) table with explicit precision flags, * the MBCSAddTable() function marks some mappings for moving * to the extension table. * They fit into the base toUnicode table but not into the * base fromUnicode table. * (Note: We do have explicit precision flags because they are * required for extension table generation, and * ucm_checkBaseExt() verified it.) * * We do not call MBCSAddTable() here (we probably could) * so we need to do the analysis before building the extension table. * We assume that MBCSAddTable() will build a UTF-8-friendly table. * Redundant mappings in the extension table are ok except they cost some size. * * Do this after ucm_checkBaseExt().
*/ const MBCSData *mbcsData=MBCSGetDummy();
int32_t needsMove=0; for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength;
m<mLimit;
++m
) { if(!MBCSOkForBaseFromUnicode(mbcsData, m->b.bytes, m->bLen, m->u, m->f)) {
m->f|=MBCS_FROM_U_EXT_FLAG;
m->moveFlag=UCM_MOVE_TO_EXT;
++needsMove;
}
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.