Quelle ICU4XWordSegmenter.d.ts

Sprache: JAVA

Spracherkennung für: .ts vermutete Sprache: Unknown {[0] [0] [0]} [Methode: Schwerpunktbildung, einfache Gewichte, sechs Dimensionen]

import { FFIError } from "./diplomat-runtime"
import { ICU4XDataProvider } from "./ICU4XDataProvider";
import { ICU4XError } from "./ICU4XError";
import { ICU4XWordBreakIteratorLatin1 } from "./ICU4XWordBreakIteratorLatin1";
import { ICU4XWordBreakIteratorUtf16 } from "./ICU4XWordBreakIteratorUtf16";
import { ICU4XWordBreakIteratorUtf8 } from "./ICU4XWordBreakIteratorUtf8";

/**

* An ICU4X word-break segmenter, capable of finding word breakpoints in strings.

* See the {@link https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html Rust documentation for `WordSegmenter`} for more information.
*/
export class ICU4XWordSegmenter {

  /**

   * Construct an {@link ICU4XWordSegmenter `ICU4XWordSegmenter`} with automatically selecting the best available LSTM or dictionary payload data.

   * Note: currently, it uses dictionary for Chinese and Japanese, and LSTM for Burmese, Khmer, Lao, and Thai.

   * See the {@link https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.new_auto Rust documentation for `new_auto`} for more information.
   * @throws {@link FFIError}<{@link ICU4XError}>
   */
  static create_auto(provider: ICU4XDataProvider): ICU4XWordSegmenter | never;

  /**

   * Construct an {@link ICU4XWordSegmenter `ICU4XWordSegmenter`} with LSTM payload data for Burmese, Khmer, Lao, and Thai.

   * Warning: {@link ICU4XWordSegmenter `ICU4XWordSegmenter`} created by this function doesn't handle Chinese or Japanese.

   * See the {@link https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.new_lstm Rust documentation for `new_lstm`} for more information.
   * @throws {@link FFIError}<{@link ICU4XError}>
   */
  static create_lstm(provider: ICU4XDataProvider): ICU4XWordSegmenter | never;

  /**

   * Construct an {@link ICU4XWordSegmenter `ICU4XWordSegmenter`} with dictionary payload data for Chinese, Japanese, Burmese, Khmer, Lao, and Thai.

   * See the {@link https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.new_dictionary Rust documentation for `new_dictionary`} for more information.
   * @throws {@link FFIError}<{@link ICU4XError}>
   */
  static create_dictionary(provider: ICU4XDataProvider): ICU4XWordSegmenter | never;

  /**

   * Segments a string.

   * Ill-formed input is treated as if errors had been replaced with REPLACEMENT CHARACTERs according to the WHATWG Encoding Standard.

   * See the {@link https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.segment_utf8 Rust documentation for `segment_utf8`} for more information.
   */
  segment_utf8(input: string): ICU4XWordBreakIteratorUtf8;

  /**

   * Segments a string.

   * Ill-formed input is treated as if errors had been replaced with REPLACEMENT CHARACTERs according to the WHATWG Encoding Standard.

   * See the {@link https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.segment_utf16 Rust documentation for `segment_utf16`} for more information.
   */
  segment_utf16(input: string): ICU4XWordBreakIteratorUtf16;

  /**

   * Segments a Latin-1 string.

   * See the {@link https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.segment_latin1 Rust documentation for `segment_latin1`} for more information.
   */
  segment_latin1(input: Uint8Array): ICU4XWordBreakIteratorLatin1;
}

¤ Dauer der Verarbeitung: 0.15 Sekunden (vorverarbeitet am 2026-04-28) ¤

Wurzel

Suchen

Beweissystem der NASA

Beweissystem Isabelle

NIST Cobol Testsuite

Cephes Mathematical Library

Wiener Entwicklungsmethode

Haftungshinweis

Die Informationen auf dieser Webseite wurden nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit, noch Qualität der bereit gestellten Informationen zugesichert.

Bemerkung:

Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.