it("handles CJK punctuation and symbols in the extended range", () => { // "⺀" (U+2E80) is in CJK Radicals Supplement range
expect(estimateStringChars("⺀")).toBe(CHARS_PER_TOKEN_ESTIMATE);
});
it("does not inflate standard Latin characters", () => { const latin = "The quick brown fox jumps over the lazy dog";
expect(estimateStringChars(latin)).toBe(latin.length);
});
it("does not inflate numbers and basic punctuation", () => { const text = "123.45, hello! @#$%";
expect(estimateStringChars(text)).toBe(text.length);
});
it("counts CJK Extension B characters as one code point", () => { // "" (U+20000) is represented as a surrogate pair in UTF-16. // Result = 1 + 1 * 3 = 4 (exactly CHARS_PER_TOKEN_ESTIMATE)
expect(estimateStringChars("")).toBe(CHARS_PER_TOKEN_ESTIMATE);
});
it("does not collapse non-CJK surrogate pairs like emoji", () => { // Emoji is a surrogate pair in UTF-16, but not matched by NON_LATIN_RE. // Its weighted length should remain the UTF-16 length (2).
expect(estimateStringChars("")).toBe(2);
});
it("keeps mixed CJK and emoji weighting consistent", () => { // "你" counts as 4, emoji remains 2 => total 6
expect(estimateStringChars("你")).toBe(6);
});
it("yields ~1 token per CJK char when divided by CHARS_PER_TOKEN_ESTIMATE", () => { // 10 CJK chars should estimate as ~10 tokens const cjk = "这是一个测试用的句子呢"; const estimated = estimateStringChars(cjk); const tokens = Math.ceil(estimated / CHARS_PER_TOKEN_ESTIMATE); // Each CJK char ≈ 1 token, so tokens should be close to string length
expect(tokens).toBe(cjk.length);
});
});
describe("estimateTokensFromChars", () => {
it("divides by CHARS_PER_TOKEN_ESTIMATE and rounds up", () => {
expect(estimateTokensFromChars(8)).toBe(2);
expect(estimateTokensFromChars(9)).toBe(3);
expect(estimateTokensFromChars(0)).toBe(0);
});
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.