/* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #define TH_UNICODE
/* // get c(0), t(0)
*/
c(0) = rstr[0]; /* may be '\0' */ if (!th_isthai(c(0))) return -1;
t(0) = twbtype(c(0)); if (!(t(0) & A)) return -1;
/* // get c(-1), t(-1)
*/ if (left >= 1) {
c(-1) = lstr[-1]; if (!th_isthai(c(-1))) return 0;
t(-1) = twbtype(c(-1)); if (!(t(-1) & A)) return 0; /* handle punctuation marks here */
} else {
c(-1) = 0;
t(-1) = 0;
}
/* // get c(1..2), t(1..2)
*/ for (i = 1; i <= 2; i++) { if (i >= right) {
c(i) = 0;
t(i) = 0;
} else {
c(i) = rstr[i]; /* may be '\0'; */ if (!th_isthai(c(i)))
right = i--; else {
t(i) = twbtype(c(i)); if (!(t(i) & A)) right = i--;
}
}
} /* // get c(-2..-3), t(-2..-3)
*/ for (i = -2, j = -2; i >= -3; j--) { if (j < -left) {
c(i) = 0;
t(i) = 0;
i--;
} else {
c(i) = lstr[j]; if (!th_isthai(c(i)))
left = 0; else {
t(i) = (twb_t)(th_isthai(c(i)) ? twbtype(c(i)) : 0); if (!(t(i) & A))
left = 0; else { if ((t(i + 1) & MT) && ((t(i) & VR) || (t(i + 2) & VR))) {
c(i + 1) = c(i);
t(i + 1) = t(i);
} else
i--;
}
}
}
}
/* // prohibit the unlikely
*/ if ((t(-1) & C) && (t(0) & C)) { if ((t(-1) & CHE) || (t(0) & CHB)) return -1;
} /* // special case : vlao, C/ sara_a|aa, !sara_a
*/ if ((t(-3) & (VLA | VLO)) && (t(-2) & C) && (c(0) != TH_SARA_A) &&
(c(-1) == TH_SARA_A || c(-0) == TH_SARA_AA)) return 0;
/* // prohibit break
*/ if (t(0) & NB) return -1; if (t(-1) & NE) return -1;
/* // apply 100% rules
*/ if (t(-1) & VRE) { if (c(-2) == TH_SARA_AA && c(-1) == TH_SARA_A) return 0; return -1; /* usually too short syllable, part of word */
}
if (t(-2) & VRE) return -1;
if ((t(0) & C) && (t(1) & (VR | MT)) &&
(c(2) != TH_THANTHAKHAT)) { /*?C, NB */ if ((t(-1) & (VRS | VRX)) && c(1) == TH_SARA_I) return -1; /* exception */ if (t(-1) & (V | M)) return 0; /* !C/ C, NB */ if (t(-2) & VRS) return 0; /* VRS, C / C, NB */ if (!(t(0) & C2) && c(1) == TH_SARA_I) { /* / !C2 or /c, sara_i */ if (t(-2) & VRX) return 0; /* VRX, C / C, NB ? 100%? */ if (t(-2) & VC) return 0; /* VC, C / C, NB ? 100% */
}
} if ((t(-1) & VRX) && (t(0) & CC)) return 0; /* VRX/ CC */ if ((t(-2) & VRS) && (t(-1) & C) && (t(0) & (V | M))) return 0; /* VRS, C/ !C */
if ((t(0) & CX) && (t(1) & C2) && (c(2) != TH_THANTHAKHAT)) { if ((t(-2) & A) && (t(-1) & CX)) return 0; /* A, CX / CX, C2 */ if ((t(-2) & CX) && (t(-1) & MT)) return 0; /* CX, MT / CX, C2 */
} /* // apply 90% rules
*/ if (t(0) & VL) return 0; if (t(1) & VL) return -1; if (c(-1) == TH_THANTHAKHAT && c(-2) != TH_RORUA && c(-2) != TH_LOLING) return 0;
/* //return -1; // apply 80% rules
*/ if (t(0) & CHE) { if ((t(-2) & VRS) && (t(-1) & C)) return 0; /* VRS, C/ CHE */ /*if(t(-1) & VRX) return 0; // VRX/ CHE */ if (t(-1) & VC) return 0; /* VC/ CHE */
} if (t(-1) & CHB) { if ((t(0) & C) && (t(1) & VR)) return 0; /* CHB/ CC, VR */ if (t(0) & VC) return 0; /* CHB/ VC */
}
if ((t(-2) & VL) && (t(1) & VR)) { /* VL, C? C, VR */ if (t(-2) & VLI) return 0; /* VLI,C/C,VR .*/ else { /* vlao, C ? C , VR */ if (c(1) == TH_SARA_A) return 2; /* vlao, C, C, sara_a/ */ if (t(-2) & VLO) return 0; /* VLO, C/ C, !sara_a */ if (!(t(1) & VRA)) return 0; /* VLA, C/ C, !vca */
}
} /* C,MT,C */ if ((t(-2) & C) && (t(-1) & MT) && (t(0) & CX)) return 1;
return -1;
}
int TrbFollowing(const th_char* begin, int length, int offset) /* //(ThBreakIterator *this, int offset)
*/
{ const th_char* w = begin + offset; const th_char* end = begin + length; while (w < end && *w && !th_isthai(*w) && th_isspace(*w)) w++;
if (w < end && *w && !th_isthai(*w)) { int english = FALSE; while (w < end && *w && !th_isthai(*w) && !th_isspace(*w)) { if (th_isalpha(*w)) english = TRUE;
w++;
} if (english || w == end || (!th_isthai(*w) && th_isspace(*w))) return w - begin;
} if (w == end || *w == 0 || !th_isthai(*w)) return w - begin;
w++; if (w < end && *w && th_isthai(*w)) { int brk = TrbWordBreakPos(begin, w - begin, w, end - w); while (brk < 0) {
w++; if (w == end || *w == 0 || !th_isthai(*w)) break;
brk = TrbWordBreakPos(begin, w - begin, w, end - w);
} if (brk > 0) w += brk;
} if (w < end && *w && !th_isthai(*w)) { while (w < end && *w && !th_isthai(*w) && !th_isalpha(*w) &&
!th_isspace(*w))
w++;
} return w - begin;
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.