/* ***** BEGIN LICENSE BLOCK ***** * Version: MPL 1.1/GPL 2.0/LGPL 2.1 * * Copyright (C) 2002-2022 Németh László * * The contents of this file are subject to the Mozilla Public License Version * 1.1 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * http://www.mozilla.org/MPL/ * * Software distributed under the License is distributed on an "AS IS" basis, * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License * for the specific language governing rights and limitations under the * License. * * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks. * * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter, * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls, * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen * * Alternatively, the contents of this file may be used under the terms of * either the GNU General Public License Version 2 or later (the "GPL"), or * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), * in which case the provisions of the GPL or the LGPL are applicable instead * of those above. If you wish to allow use of your version of this file only * under the terms of either the GPL or the LGPL, and not to allow others to * use your version of this file under the terms of the MPL, indicate your * decision by deleting the provisions above and replace them with the notice * and other provisions required by the GPL or the LGPL. If you do not delete * the provisions above, a recipient may use your version of this file under * the terms of any one of the MPL, the GPL or the LGPL. *
* ***** END LICENSE BLOCK ***** */ /* * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada * And Contributors. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All modifications to the source code must be clearly marked as * such. Binary redistributions based on modified source code * must be clearly marked as modified versions in the documentation * and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE.
*/
HashMgr::HashMgr(constchar* tpath, constchar* apath, constchar* key)
: tablesize(0),
tableptr(NULL),
flag_mode(FLAG_CHAR),
complexprefixes(0),
utf8(0),
forbiddenword(FORBIDDENWORD) // forbidden word signing flag
,
numaliasf(0),
aliasf(NULL),
aliasflen(0),
numaliasm(0),
aliasm(NULL) {
langnum = 0;
csconv = 0;
load_config(apath, key); int ec = load_tables(tpath, key); if (ec) { /* error condition - what should we do here */
HUNSPELL_WARNING(stderr, "Hash Manager Error : %d\n", ec);
free(tableptr); //keep tablesize to 1 to fix possible division with zero
tablesize = 1;
tableptr = (struct hentry**)calloc(tablesize, sizeof(struct hentry*)); if (!tableptr) {
tablesize = 0;
}
}
}
HashMgr::~HashMgr() { if (tableptr) { // now pass through hash table freeing up everything // go through column by column of the table for (int i = 0; i < tablesize; i++) { struct hentry* pt = tableptr[i]; struct hentry* nt = NULL; while (pt) {
nt = pt->next; if (pt->astr &&
(!aliasf || TESTAFF(pt->astr, ONLYUPCASEFLAG, pt->alen)))
arena_free(pt->astr);
arena_free(pt);
pt = nt;
}
}
free(tableptr);
}
tablesize = 0;
if (aliasf) { for (int j = 0; j < (numaliasf); j++)
arena_free(aliasf[j]);
arena_free(aliasf);
aliasf = NULL; if (aliasflen) {
arena_free(aliasflen);
aliasflen = NULL;
}
} if (aliasm) { for (int j = 0; j < (numaliasm); j++)
arena_free(aliasm[j]);
arena_free(aliasm);
aliasm = NULL;
}
#ifndef OPENOFFICEORG #ifndef MOZILLA_CLIENT if (utf8)
free_utf_tbl(); #endif #endif
#ifdef MOZILLA_CLIENT delete[] csconv; #endif
assert(outstanding_arena_allocations == 0);
}
// lookup a root word in the hashtable
struct hentry* HashMgr::lookup(constchar* word) const { struct hentry* dp; if (tableptr) {
dp = tableptr[hash(word)]; if (!dp) return NULL; for (; dp != NULL; dp = dp->next) { if (strcmp(word, dp->word) == 0) return dp;
}
} return NULL;
}
// add a word to the hash table (private) int HashMgr::add_word(const std::string& in_word, int wcl, unsignedshort* aff, int al, const std::string* in_desc, bool onlyupcase, int captype) { const std::string* word = &in_word; const std::string* desc = in_desc;
std::string *word_copy = NULL;
std::string *desc_copy = NULL; if ((!ignorechars.empty() && !has_no_ignored_chars(in_word, ignorechars)) || complexprefixes) {
word_copy = new std::string(in_word);
if (!ignorechars.empty()) { if (utf8) {
wcl = remove_ignored_chars_utf(*word_copy, ignorechars_utf16);
} else {
remove_ignored_chars(*word_copy, ignorechars);
}
}
if (complexprefixes) { if (utf8)
wcl = reverseword_utf(*word_copy); else
reverseword(*word_copy);
if (in_desc && !aliasm) {
desc_copy = new std::string(*in_desc);
if (complexprefixes) { if (utf8)
reverseword_utf(*desc_copy); else
reverseword(*desc_copy);
}
desc = desc_copy;
}
}
word = word_copy;
}
bool upcasehomonym = false; int descl = desc ? (aliasm ? sizeof(char*) : desc->size() + 1) : 0; // variable-length hash record with word and optional fields struct hentry* hp =
(struct hentry*)arena_alloc(sizeof(struct hentry) + word->size() + descl); if (!hp) { delete desc_copy; delete word_copy; return 1;
}
// store the description string or its pointer if (desc) {
hp->var |= H_OPT; if (aliasm) {
hp->var |= H_OPT_ALIASM;
store_pointer(hpw + word->size() + 1, get_aliasm(atoi(desc->c_str())));
} else {
strcpy(hpw + word->size() + 1, desc->c_str());
} if (strstr(HENTRY_DATA(hp), MORPH_PHON)) {
hp->var |= H_OPT_PHON; // store ph: fields (pronounciation, misspellings, old orthography etc.) // of a morphological description in reptable to use in REP replacements. if (reptable.capacity() < (unsignedint)(tablesize/MORPH_PHON_RATIO))
reptable.reserve(tablesize/MORPH_PHON_RATIO);
std::string fields = HENTRY_DATA(hp);
std::string::const_iterator iter = fields.begin();
std::string::const_iterator start_piece = mystrsep(fields, iter); while (start_piece != fields.end()) { if (std::string(start_piece, iter).find(MORPH_PHON) == 0) {
std::string ph = std::string(start_piece, iter).substr(sizeof MORPH_PHON - 1); if (ph.size() > 0) {
std::vector<w_char> w;
size_t strippatt;
std::string wordpart; // dictionary based REP replacement, separated by "->" // for example "pretty ph:prity ph:priti->pretti" to handle // both prity -> pretty and pritier -> prettiest suggestions. if (((strippatt = ph.find("->")) != std::string::npos) &&
(strippatt > 0) && (strippatt < ph.size() - 2)) {
wordpart = ph.substr(strippatt + 2);
ph.erase(ph.begin() + strippatt, ph.end());
} else
wordpart = in_word; // when the ph: field ends with the character *, // strip last character of the pattern and the replacement // to match in REP suggestions also at character changes, // for example, "pretty ph:prity*" results "prit->prett" // REP replacement instead of "prity->pretty", to get // prity->pretty and pritiest->prettiest suggestions. if (ph.at(ph.size()-1) == '*') {
strippatt = 1;
size_t stripword = 0; if (utf8) { while ((strippatt < ph.size()) &&
((ph.at(ph.size()-strippatt-1) & 0xc0) == 0x80))
++strippatt; while ((stripword < wordpart.size()) &&
((wordpart.at(wordpart.size()-stripword-1) & 0xc0) == 0x80))
++stripword;
}
++strippatt;
++stripword; if ((ph.size() > strippatt) && (wordpart.size() > stripword)) {
ph.erase(ph.size()-strippatt, strippatt);
wordpart.erase(in_word.size()-stripword, stripword);
}
} // capitalize lowercase pattern for capitalized words to support // good suggestions also for capitalized misspellings, eg. // Wednesday ph:wendsay // results wendsay -> Wednesday and Wendsay -> Wednesday, too. if (captype==INITCAP) {
std::string ph_capitalized; if (utf8) {
u8_u16(w, ph); if (get_captype_utf8(w, langnum) == NOCAP) {
mkinitcap_utf(w, langnum);
u16_u8(ph_capitalized, w);
}
} elseif (get_captype(ph, csconv) == NOCAP)
mkinitcap(ph_capitalized, csconv);
if (ph_capitalized.size() > 0) { // add also lowercase word in the case of German or // Hungarian to support lowercase suggestions lowercased by // compound word generation or derivational suffixes // (for example by adjectival suffix "-i" of geographical // names in Hungarian: // Massachusetts ph:messzecsuzec // messzecsuzeci -> massachusettsi (adjective) // For lowercasing by conditional PFX rules, see // tests/germancompounding test example or the // Hungarian dictionary.) if (langnum == LANG_de || langnum == LANG_hu) {
std::string wordpart_lower(wordpart); if (utf8) {
u8_u16(w, wordpart_lower);
mkallsmall_utf(w, langnum);
u16_u8(wordpart_lower, w);
} else {
mkallsmall(wordpart_lower, csconv);
}
reptable.push_back(replentry());
reptable.back().pattern.assign(ph);
reptable.back().outstrings[0].assign(wordpart_lower);
}
reptable.push_back(replentry());
reptable.back().pattern.assign(ph_capitalized);
reptable.back().outstrings[0].assign(wordpart);
}
}
reptable.push_back(replentry());
reptable.back().pattern.assign(ph);
reptable.back().outstrings[0].assign(wordpart);
}
}
start_piece = mystrsep(fields, iter);
}
}
}
int HashMgr::add_hidden_capitalized_word(const std::string& word, int wcl, unsignedshort* flags, int flagslen, const std::string* dp, int captype) { if (flags == NULL)
flagslen = 0;
// detect captype and modify word length for UTF-8 encoding int HashMgr::get_clen_and_captype(const std::string& word, int* captype, std::vector<w_char> &workbuf) { int len; if (utf8) {
len = u8_u16(workbuf, word);
*captype = get_captype_utf8(workbuf, langnum);
} else {
len = word.size();
*captype = get_captype(word, csconv);
} return len;
}
int HashMgr::get_clen_and_captype(const std::string& word, int* captype) {
std::vector<w_char> workbuf; return get_clen_and_captype(word, captype, workbuf);
}
// remove word (personal dictionary function for standalone applications) int HashMgr::remove(const std::string& word) { struct hentry* dp = lookup(word.c_str()); while (dp) { if (dp->alen == 0 || !TESTAFF(dp->astr, forbiddenword, dp->alen)) { unsignedshort* flags =
(unsignedshort*)arena_alloc(sizeof(unsignedshort) * (dp->alen + 1)); if (!flags) return 1; for (int i = 0; i < dp->alen; i++)
flags[i] = dp->astr[i];
flags[dp->alen] = forbiddenword;
arena_free(dp->astr);
dp->astr = flags;
dp->alen++;
std::sort(flags, flags + dp->alen);
}
dp = dp->next_homonym;
} return 0;
}
/* remove forbidden flag to add a personal word to the hash */ int HashMgr::remove_forbidden_flag(const std::string& word) { struct hentry* dp = lookup(word.c_str()); if (!dp) return 1; while (dp) { if (dp->astr && TESTAFF(dp->astr, forbiddenword, dp->alen))
dp->alen = 0; // XXX forbidden words of personal dic.
dp = dp->next_homonym;
} return 0;
}
// add a custom dic. word to the hash table (public) int HashMgr::add(const std::string& word) { if (remove_forbidden_flag(word)) { int captype; int al = 0; unsignedshort* flags = NULL; int wcl = get_clen_and_captype(word, &captype);
add_word(word, wcl, flags, al, NULL, false, captype); return add_hidden_capitalized_word(word, wcl, flags, al, NULL,
captype);
} return 0;
}
int HashMgr::add_with_affix(const std::string& word, const std::string& example) { // detect captype and modify word length for UTF-8 encoding struct hentry* dp = lookup(example.c_str());
remove_forbidden_flag(word); if (dp && dp->astr) { int captype; int wcl = get_clen_and_captype(word, &captype); if (aliasf) {
add_word(word, wcl, dp->astr, dp->alen, NULL, false, captype);
} else { unsignedshort* flags =
(unsignedshort*) arena_alloc(dp->alen * sizeof(unsignedshort)); if (flags) {
memcpy((void*)flags, (void*)dp->astr,
dp->alen * sizeof(unsignedshort));
add_word(word, wcl, flags, dp->alen, NULL, false, captype);
} else return 1;
} return add_hidden_capitalized_word(word, wcl, dp->astr,
dp->alen, NULL, captype);
} return 1;
}
// walk the hash table entry by entry - null at end // initialize: col=-1; hp = NULL; hp = walk_hashtable(&col, hp); struct hentry* HashMgr::walk_hashtable(int& col, struct hentry* hp) const { if (hp && hp->next != NULL) return hp->next; for (col++; col < tablesize; col++) { if (tableptr[col]) return tableptr[col];
} // null at end and reset to start
col = -1; return NULL;
}
// load a munched word list and build a hash table on the fly int HashMgr::load_tables(constchar* tpath, constchar* key) { // open dictionary file
FileMgr* dict = new FileMgr(tpath, key); if (dict == NULL) return 1;
// first read the first line of file to get hash table size */
std::string ts; if (!dict->getline(ts)) {
HUNSPELL_WARNING(stderr, "error: empty dic file %s\n", tpath); delete dict; return 2;
}
mychomp(ts);
/* remove byte order mark */ if (ts.compare(0, 3, "\xEF\xBB\xBF", 3) == 0) {
ts.erase(0, 3);
}
tablesize = atoi(ts.c_str());
int nExtra = 5 + USERWORD;
if (tablesize <= 0 ||
(tablesize >= (std::numeric_limits<int>::max() - 1 - nExtra) / int(sizeof(struct hentry*)))) {
HUNSPELL_WARNING(
stderr, "error: line 1: missing or bad word count in the dic file\n"); delete dict; return 4;
}
tablesize += nExtra; if ((tablesize % 2) == 0)
tablesize++;
// allocate the hash table
tableptr = (struct hentry**)calloc(tablesize, sizeof(struct hentry*)); if (!tableptr) { delete dict; return 3;
}
// loop through all words on much list and add to hash // table and create word and affix strings
std::vector<w_char> workbuf;
while (dict->getline(ts)) {
mychomp(ts); // split each line into word and morphological description
size_t dp_pos = 0; while ((dp_pos = ts.find(':', dp_pos)) != std::string::npos) { if ((dp_pos > 3) && (ts[dp_pos - 3] == ' ' || ts[dp_pos - 3] == '\t')) { for (dp_pos -= 3; dp_pos > 0 && (ts[dp_pos-1] == ' ' || ts[dp_pos-1] == '\t'); --dp_pos)
; if (dp_pos == 0) { // missing word
dp_pos = std::string::npos;
} else {
++dp_pos;
} break;
}
++dp_pos;
}
// tabulator is the old morphological field separator
size_t dp2_pos = ts.find('\t'); if (dp2_pos != std::string::npos && (dp_pos == std::string::npos || dp2_pos < dp_pos)) {
dp_pos = dp2_pos + 1;
}
// split each line into word and affix char strings // "\/" signs slash in words (not affix separator) // "/" at beginning of the line is word character (not affix separator)
size_t ap_pos = ts.find('/'); while (ap_pos != std::string::npos) { if (ap_pos == 0) {
++ap_pos; continue;
} elseif (ts[ap_pos - 1] != '\\') break; // replace "\/" with "/"
ts.erase(ap_pos - 1, 1);
ap_pos = ts.find('/', ap_pos);
}
unsignedshort* flags; int al; if (ap_pos != std::string::npos && ap_pos != ts.size()) {
std::string ap(ts.substr(ap_pos + 1));
ts.resize(ap_pos); if (aliasf) { int index = atoi(ap.c_str());
al = get_aliasf(index, &flags, dict); if (!al) {
HUNSPELL_WARNING(stderr, "error: line %d: bad flag vector alias\n",
dict->getlinenum());
}
} else {
al = decode_flags(&flags, ap.c_str(), dict, /* arena = */ true); if (al == -1) {
HUNSPELL_WARNING(stderr, "Can't allocate memory.\n"); delete dict; return 6;
}
std::sort(flags, flags + al);
}
} else {
al = 0;
flags = NULL;
}
int captype; int wcl = get_clen_and_captype(ts, &captype, workbuf); const std::string *dp_str = dp.empty() ? NULL : &dp; // add the word and its index plus its capitalized form optionally if (add_word(ts, wcl, flags, al, dp_str, false, captype) ||
add_hidden_capitalized_word(ts, wcl, flags, al, dp_str, captype)) { delete dict; return 5;
}
}
delete dict; return 0;
}
// the hash function is a simple load and rotate // algorithm borrowed int HashMgr::hash(constchar* word) const { unsignedlong hv = 0; for (int i = 0; i < 4 && *word != 0; i++)
hv = (hv << 8) | (*word++); while (*word != 0) {
ROTATE(hv, ROTATE_LEN);
hv ^= (*word++);
} return (unsignedlong)hv % tablesize;
}
int HashMgr::decode_flags(unsignedshort** result, const std::string& flags, FileMgr* af, bool arena) const { auto alloc = [arena, this](int n) { return arena ? this->arena_alloc(n) : malloc(n); }; int len; if (flags.empty()) {
*result = NULL; return 0;
} switch (flag_mode) { case FLAG_LONG: { // two-character flags (1x2yZz -> 1x 2y Zz)
len = flags.size(); if (len % 2 == 1)
HUNSPELL_WARNING(stderr, "error: line %d: bad flagvector\n",
af->getlinenum());
len /= 2;
*result = (unsignedshort*)alloc(len * sizeof(unsignedshort)); if (!*result) return -1; for (int i = 0; i < len; i++) {
(*result)[i] = ((unsignedshort)((unsignedchar)flags[i * 2]) << 8) +
(unsignedchar)flags[i * 2 + 1];
} break;
} case FLAG_NUM: { // decimal numbers separated by comma (4521,23,233 -> 4521 // 23 233)
len = 1; unsignedshort* dest; for (size_t i = 0; i < flags.size(); ++i) { if (flags[i] == ',')
len++;
}
*result = (unsignedshort*)alloc(len * sizeof(unsignedshort)); if (!*result) return -1;
dest = *result; constchar* src = flags.c_str(); for (constchar* p = src; *p; p++) { if (*p == ',') { int i = atoi(src); if (i >= DEFAULTFLAGS)
HUNSPELL_WARNING(
stderr, "error: line %d: flag id %d is too large (max: %d)\n",
af->getlinenum(), i, DEFAULTFLAGS - 1);
*dest = (unsignedshort)i; if (*dest == 0)
HUNSPELL_WARNING(stderr, "error: line %d: 0 is wrong flag id\n",
af->getlinenum());
src = p + 1;
dest++;
}
} int i = atoi(src); if (i >= DEFAULTFLAGS)
HUNSPELL_WARNING(stderr, "error: line %d: flag id %d is too large (max: %d)\n",
af->getlinenum(), i, DEFAULTFLAGS - 1);
*dest = (unsignedshort)i; if (*dest == 0)
HUNSPELL_WARNING(stderr, "error: line %d: 0 is wrong flag id\n",
af->getlinenum()); break;
} case FLAG_UNI: { // UTF-8 characters
std::vector<w_char> w;
u8_u16(w, flags);
len = w.size();
*result = (unsignedshort*)alloc(len * sizeof(unsignedshort)); if (!*result) return -1;
memcpy(*result, w.data(), len * sizeof(short)); break;
} default: { // Ispell's one-character flags (erfg -> e r f g) unsignedshort* dest;
len = flags.size();
*result = (unsignedshort*)alloc(len * sizeof(unsignedshort)); if (!*result) return -1;
dest = *result; for (size_t i = 0; i < flags.size(); ++i) {
*dest = (unsignedchar)flags[i];
dest++;
}
}
} return len;
}
bool HashMgr::decode_flags(std::vector<unsignedshort>& result, const std::string& flags, FileMgr* af) const { if (flags.empty()) { returnfalse;
} switch (flag_mode) { case FLAG_LONG: { // two-character flags (1x2yZz -> 1x 2y Zz)
size_t len = flags.size(); if (len % 2 == 1)
HUNSPELL_WARNING(stderr, "error: line %d: bad flagvector\n",
af->getlinenum());
len /= 2;
result.reserve(result.size() + len); for (size_t i = 0; i < len; ++i) {
result.push_back(((unsignedshort)((unsignedchar)flags[i * 2]) << 8) +
(unsignedchar)flags[i * 2 + 1]);
} break;
} case FLAG_NUM: { // decimal numbers separated by comma (4521,23,233 -> 4521 // 23 233) constchar* src = flags.c_str(); for (constchar* p = src; *p; p++) { if (*p == ',') { int i = atoi(src); if (i >= DEFAULTFLAGS)
HUNSPELL_WARNING(
stderr, "error: line %d: flag id %d is too large (max: %d)\n",
af->getlinenum(), i, DEFAULTFLAGS - 1);
result.push_back((unsignedshort)i); if (result.back() == 0)
HUNSPELL_WARNING(stderr, "error: line %d: 0 is wrong flag id\n",
af->getlinenum());
src = p + 1;
}
} int i = atoi(src); if (i >= DEFAULTFLAGS)
HUNSPELL_WARNING(stderr, "error: line %d: flag id %d is too large (max: %d)\n",
af->getlinenum(), i, DEFAULTFLAGS - 1);
result.push_back((unsignedshort)i); if (result.back() == 0)
HUNSPELL_WARNING(stderr, "error: line %d: 0 is wrong flag id\n",
af->getlinenum()); break;
} case FLAG_UNI: { // UTF-8 characters
std::vector<w_char> w;
u8_u16(w, flags);
size_t len = w.size();
size_t origsize = result.size();
result.resize(origsize + len);
memcpy(result.data() + origsize, w.data(), len * sizeof(short)); break;
} default: { // Ispell's one-character flags (erfg -> e r f g)
result.reserve(flags.size()); for (size_t i = 0; i < flags.size(); ++i) {
result.push_back((unsignedchar)flags[i]);
}
}
} returntrue;
}
unsignedshort HashMgr::decode_flag(constchar* f) const { unsignedshort s = 0; int i; switch (flag_mode) { case FLAG_LONG:
s = ((unsignedshort)((unsignedchar)f[0]) << 8) + (unsignedchar)f[1]; break; case FLAG_NUM:
i = atoi(f); if (i >= DEFAULTFLAGS)
HUNSPELL_WARNING(stderr, "error: flag id %d is too large (max: %d)\n",
i, DEFAULTFLAGS - 1);
s = (unsignedshort)i; break; case FLAG_UNI: {
std::vector<w_char> w;
u8_u16(w, f); if (!w.empty())
memcpy(&s, w.data(), 1 * sizeof(short)); break;
} default:
s = *(unsignedchar*)f;
} if (s == 0)
HUNSPELL_WARNING(stderr, "error: 0 is wrong flag id\n"); return s;
}
// This function is only called by external consumers, and so using the default // allocator with mystrdup is correct. char* HashMgr::encode_flag(unsignedshort f) const { if (f == 0) return mystrdup("(NULL)");
std::string ch; if (flag_mode == FLAG_LONG) {
ch.push_back((unsignedchar)(f >> 8));
ch.push_back((unsignedchar)(f - ((f >> 8) << 8)));
} elseif (flag_mode == FLAG_NUM) {
std::ostringstream stream;
stream << f;
ch = stream.str();
} elseif (flag_mode == FLAG_UNI) { const w_char* w_c = (const w_char*)&f;
std::vector<w_char> w(w_c, w_c + 1);
u16_u8(ch, w);
} else {
ch.push_back((unsignedchar)(f));
} return mystrdup(ch.c_str());
}
// read in aff file and set flag mode int HashMgr::load_config(constchar* affpath, constchar* key) { int firstline = 1;
// open the affix file
FileMgr* afflst = new FileMgr(affpath, key); if (!afflst) {
HUNSPELL_WARNING(
stderr, "Error - could not open affix description file %s\n", affpath); return 1;
}
// read in each line ignoring any that do not // start with a known line type indicator
std::string line; while (afflst->getline(line)) {
mychomp(line);
/* remove byte order mark */ if (firstline) {
firstline = 0; if (line.compare(0, 3, "\xEF\xBB\xBF", 3) == 0) {
line.erase(0, 3);
}
}
/* parse in the try string */ if ((line.compare(0, 4, "FLAG", 4) == 0) && line.size() > 4 && isspace(line[4])) { if (flag_mode != FLAG_CHAR) {
HUNSPELL_WARNING(stderr, "error: line %d: multiple definitions of the FLAG " "affix file parameter\n",
afflst->getlinenum());
} if (line.find("long") != std::string::npos)
flag_mode = FLAG_LONG; if (line.find("num") != std::string::npos)
flag_mode = FLAG_NUM; if (line.find("UTF-8") != std::string::npos)
flag_mode = FLAG_UNI; if (flag_mode == FLAG_CHAR) {
HUNSPELL_WARNING(
stderr, "error: line %d: FLAG needs `num', `long' or `UTF-8' parameter\n",
afflst->getlinenum());
}
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.