Ziele Untersuchung
mit Columbo Integrität von
Datenbanken Interaktion und
Portierbarkeit Ergonomie der
Schnittstellen

Angebot Produkte Projekt Beratung

Mittel Analytik Modellierung Sprachen Algebra Logik Hardware Denken Kreativität

Zusammenhänge Gesellschaft Wirtschaft Branche Firma


products/Sources/formale Sprachen/C/Firefox/third_party/rust/regex/testdata/ (Browser von der Mozilla Stiftung Version 136.0.1^©) Datei vom 10.2.2025 mit Größe 11 kB

Quelle word-boundary.toml Sprache: unbekannt

Spracherkennung für: .toml vermutete Sprache: Unknown {[0] [0] [0]} [Methode: Schwerpunktbildung, einfache Gewichte, sechs Dimensionen]

# Some of these are cribbed from RE2's test suite.

# These test \b. Below are tests for \B.
[[test]]
name = "wb1"
regex = '\b'
haystack = ""
matches = []
unicode = false

[[test]]
name = "wb2"
regex = '\b'
haystack = "a"
matches = [[0, 0], [1, 1]]
unicode = false

[[test]]
name = "wb3"
regex = '\b'
haystack = "ab"
matches = [[0, 0], [2, 2]]
unicode = false

[[test]]
name = "wb4"
regex = '^\b'
haystack = "ab"
matches = [[0, 0]]
unicode = false

[[test]]
name = "wb5"
regex = '\b$'
haystack = "ab"
matches = [[2, 2]]
unicode = false

[[test]]
name = "wb6"
regex = '^\b$'
haystack = "ab"
matches = []
unicode = false

[[test]]
name = "wb7"
regex = '\bbar\b'
haystack = "nobar bar foo bar"
matches = [[6, 9], [14, 17]]
unicode = false

[[test]]
name = "wb8"
regex = 'a\b'
haystack = "faoa x"
matches = [[3, 4]]
unicode = false

[[test]]
name = "wb9"
regex = '\bbar'
haystack = "bar x"
matches = [[0, 3]]
unicode = false

[[test]]
name = "wb10"
regex = '\bbar'
haystack = "foo\nbar x"
matches = [[4, 7]]
unicode = false

[[test]]
name = "wb11"
regex = 'bar\b'
haystack = "foobar"
matches = [[3, 6]]
unicode = false

[[test]]
name = "wb12"
regex = 'bar\b'
haystack = "foobar\nxxx"
matches = [[3, 6]]
unicode = false

[[test]]
name = "wb13"
regex = '(?:foo|bar|[A-Z])\b'
haystack = "foo"
matches = [[0, 3]]
unicode = false

[[test]]
name = "wb14"
regex = '(?:foo|bar|[A-Z])\b'
haystack = "foo\n"
matches = [[0, 3]]
unicode = false

[[test]]
name = "wb15"
regex = '\b(?:foo|bar|[A-Z])'
haystack = "foo"
matches = [[0, 3]]
unicode = false

[[test]]
name = "wb16"
regex = '\b(?:foo|bar|[A-Z])\b'
haystack = "X"
matches = [[0, 1]]
unicode = false

[[test]]
name = "wb17"
regex = '\b(?:foo|bar|[A-Z])\b'
haystack = "XY"
matches = []
unicode = false

[[test]]
name = "wb18"
regex = '\b(?:foo|bar|[A-Z])\b'
haystack = "bar"
matches = [[0, 3]]
unicode = false

[[test]]
name = "wb19"
regex = '\b(?:foo|bar|[A-Z])\b'
haystack = "foo"
matches = [[0, 3]]
unicode = false

[[test]]
name = "wb20"
regex = '\b(?:foo|bar|[A-Z])\b'
haystack = "foo\n"
matches = [[0, 3]]
unicode = false

[[test]]
name = "wb21"
regex = '\b(?:foo|bar|[A-Z])\b'
haystack = "ffoo bbar N x"
matches = [[10, 11]]
unicode = false

[[test]]
name = "wb22"
regex = '\b(?:fo|foo)\b'
haystack = "fo"
matches = [[0, 2]]
unicode = false

[[test]]
name = "wb23"
regex = '\b(?:fo|foo)\b'
haystack = "foo"
matches = [[0, 3]]
unicode = false

[[test]]
name = "wb24"
regex = '\b\b'
haystack = ""
matches = []
unicode = false

[[test]]
name = "wb25"
regex = '\b\b'
haystack = "a"
matches = [[0, 0], [1, 1]]
unicode = false

[[test]]
name = "wb26"
regex = '\b$'
haystack = ""
matches = []
unicode = false

[[test]]
name = "wb27"
regex = '\b$'
haystack = "x"
matches = [[1, 1]]
unicode = false

[[test]]
name = "wb28"
regex = '\b$'
haystack = "y x"
matches = [[3, 3]]
unicode = false

[[test]]
name = "wb29"
regex = '(?-u:\b).$'
haystack = "x"
matches = [[0, 1]]

[[test]]
name = "wb30"
regex = '^\b(?:fo|foo)\b'
haystack = "fo"
matches = [[0, 2]]
unicode = false

[[test]]
name = "wb31"
regex = '^\b(?:fo|foo)\b'
haystack = "foo"
matches = [[0, 3]]
unicode = false

[[test]]
name = "wb32"
regex = '^\b$'
haystack = ""
matches = []
unicode = false

[[test]]
name = "wb33"
regex = '^\b$'
haystack = "x"
matches = []
unicode = false

[[test]]
name = "wb34"
regex = '^(?-u:\b).$'
haystack = "x"
matches = [[0, 1]]

[[test]]
name = "wb35"
regex = '^(?-u:\b).(?-u:\b)$'
haystack = "x"
matches = [[0, 1]]

[[test]]
name = "wb36"
regex = '^^^^^\b$$$$$'
haystack = ""
matches = []
unicode = false

[[test]]
name = "wb37"
regex = '^^^^^(?-u:\b).$$$$$'
haystack = "x"
matches = [[0, 1]]

[[test]]
name = "wb38"
regex = '^^^^^\b$$$$$'
haystack = "x"
matches = []
unicode = false

[[test]]
name = "wb39"
regex = '^^^^^(?-u:\b\b\b).(?-u:\b\b\b)$$$$$'
haystack = "x"
matches = [[0, 1]]

[[test]]
name = "wb40"
regex = '(?-u:\b).+(?-u:\b)'
haystack = "$$abc$$"
matches = [[2, 5]]

[[test]]
name = "wb41"
regex = '\b'
haystack = "a b c"
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]]
unicode = false

[[test]]
name = "wb42"
regex = '\bfoo\b'
haystack = "zzz foo zzz"
matches = [[4, 7]]
unicode = false

[[test]]
name = "wb43"
regex = '\b^'
haystack = "ab"
matches = [[0, 0]]
unicode = false

[[test]]
name = "wb44"
regex = '$\b'
haystack = "ab"
matches = [[2, 2]]
unicode = false

# Tests for \B. Note that \B is not allowed if UTF-8 mode is enabled, so we
# have to disable it for most of these tests. This is because \B can match at
# non-UTF-8 boundaries.
[[test]]
name = "nb1"
regex = '\Bfoo\B'
haystack = "n foo xfoox that"
matches = [[7, 10]]
unicode = false
utf8 = false

[[test]]
name = "nb2"
regex = 'a\B'
haystack = "faoa x"
matches = [[1, 2]]
unicode = false
utf8 = false

[[test]]
name = "nb3"
regex = '\Bbar'
haystack = "bar x"
matches = []
unicode = false
utf8 = false

[[test]]
name = "nb4"
regex = '\Bbar'
haystack = "foo\nbar x"
matches = []
unicode = false
utf8 = false

[[test]]
name = "nb5"
regex = 'bar\B'
haystack = "foobar"
matches = []
unicode = false
utf8 = false

[[test]]
name = "nb6"
regex = 'bar\B'
haystack = "foobar\nxxx"
matches = []
unicode = false
utf8 = false

[[test]]
name = "nb7"
regex = '(?:foo|bar|[A-Z])\B'
haystack = "foox"
matches = [[0, 3]]
unicode = false
utf8 = false

[[test]]
name = "nb8"
regex = '(?:foo|bar|[A-Z])\B'
haystack = "foo\n"
matches = []
unicode = false
utf8 = false

[[test]]
name = "nb9"
regex = '\B'
haystack = ""
matches = [[0, 0]]
unicode = false
utf8 = false

[[test]]
name = "nb10"
regex = '\B'
haystack = "x"
matches = []
unicode = false
utf8 = false

[[test]]
name = "nb11"
regex = '\B(?:foo|bar|[A-Z])'
haystack = "foo"
matches = []
unicode = false
utf8 = false

[[test]]
name = "nb12"
regex = '\B(?:foo|bar|[A-Z])\B'
haystack = "xXy"
matches = [[1, 2]]
unicode = false
utf8 = false

[[test]]
name = "nb13"
regex = '\B(?:foo|bar|[A-Z])\B'
haystack = "XY"
matches = []
unicode = false
utf8 = false

[[test]]
name = "nb14"
regex = '\B(?:foo|bar|[A-Z])\B'
haystack = "XYZ"
matches = [[1, 2]]
unicode = false
utf8 = false

[[test]]
name = "nb15"
regex = '\B(?:foo|bar|[A-Z])\B'
haystack = "abara"
matches = [[1, 4]]
unicode = false
utf8 = false

[[test]]
name = "nb16"
regex = '\B(?:foo|bar|[A-Z])\B'
haystack = "xfoo_"
matches = [[1, 4]]
unicode = false
utf8 = false

[[test]]
name = "nb17"
regex = '\B(?:foo|bar|[A-Z])\B'
haystack = "xfoo\n"
matches = []
unicode = false
utf8 = false

[[test]]
name = "nb18"
regex = '\B(?:foo|bar|[A-Z])\B'
haystack = "foo bar vNX"
matches = [[9, 10]]
unicode = false
utf8 = false

[[test]]
name = "nb19"
regex = '\B(?:fo|foo)\B'
haystack = "xfoo"
matches = [[1, 3]]
unicode = false
utf8 = false

[[test]]
name = "nb20"
regex = '\B(?:foo|fo)\B'
haystack = "xfooo"
matches = [[1, 4]]
unicode = false
utf8 = false

[[test]]
name = "nb21"
regex = '\B\B'
haystack = ""
matches = [[0, 0]]
unicode = false
utf8 = false

[[test]]
name = "nb22"
regex = '\B\B'
haystack = "x"
matches = []
unicode = false
utf8 = false

[[test]]
name = "nb23"
regex = '\B$'
haystack = ""
matches = [[0, 0]]
unicode = false
utf8 = false

[[test]]
name = "nb24"
regex = '\B$'
haystack = "x"
matches = []
unicode = false
utf8 = false

[[test]]
name = "nb25"
regex = '\B$'
haystack = "y x"
matches = []
unicode = false
utf8 = false

[[test]]
name = "nb26"
regex = '\B.$'
haystack = "x"
matches = []
unicode = false
utf8 = false

[[test]]
name = "nb27"
regex = '^\B(?:fo|foo)\B'
haystack = "fo"
matches = []
unicode = false
utf8 = false

[[test]]
name = "nb28"
regex = '^\B(?:fo|foo)\B'
haystack = "fo"
matches = []
unicode = false
utf8 = false

[[test]]
name = "nb29"
regex = '^\B'
haystack = ""
matches = [[0, 0]]
unicode = false
utf8 = false

[[test]]
name = "nb30"
regex = '^\B'
haystack = "x"
matches = []
unicode = false
utf8 = false

[[test]]
name = "nb31"
regex = '^\B\B'
haystack = ""
matches = [[0, 0]]
unicode = false
utf8 = false

[[test]]
name = "nb32"
regex = '^\B\B'
haystack = "x"
matches = []
unicode = false
utf8 = false

[[test]]
name = "nb33"
regex = '^\B$'
haystack = ""
matches = [[0, 0]]
unicode = false
utf8 = false

[[test]]
name = "nb34"
regex = '^\B$'
haystack = "x"
matches = []
unicode = false
utf8 = false

[[test]]
name = "nb35"
regex = '^\B.$'
haystack = "x"
matches = []
unicode = false
utf8 = false

[[test]]
name = "nb36"
regex = '^\B.\B$'
haystack = "x"
matches = []
unicode = false
utf8 = false

[[test]]
name = "nb37"
regex = '^^^^^\B$$$$$'
haystack = ""
matches = [[0, 0]]
unicode = false
utf8 = false

[[test]]
name = "nb38"
regex = '^^^^^\B.$$$$$'
haystack = "x"
matches = []
unicode = false
utf8 = false

[[test]]
name = "nb39"
regex = '^^^^^\B$$$$$'
haystack = "x"
matches = []
unicode = false
utf8 = false

# unicode1* and unicode2* work for both Unicode and ASCII because all matches
# are reported as byte offsets, and « and » do not correspond to word
# boundaries at either the character or byte level.
[[test]]
name = "unicode1"
regex = '\bx\b'
haystack = "«x"
matches = [[2, 3]]

[[test]]
name = "unicode1-only-ascii"
regex = '\bx\b'
haystack = "«x"
matches = [[2, 3]]
unicode = false

[[test]]
name = "unicode2"
regex = '\bx\b'
haystack = "x»"
matches = [[0, 1]]

[[test]]
name = "unicode2-only-ascii"
regex = '\bx\b'
haystack = "x»"
matches = [[0, 1]]
unicode = false

# ASCII word boundaries are completely oblivious to Unicode characters, so
# even though β is a character, an ASCII \b treats it as a word boundary
# when it is adjacent to another ASCII character. (The ASCII \b only looks
# at the leading byte of β.) For Unicode \b, the tests are precisely inverted.
[[test]]
name = "unicode3"
regex = '\bx\b'
haystack = 'áxβ'
matches = []

[[test]]
name = "unicode3-only-ascii"
regex = '\bx\b'
haystack = 'áxβ'
matches = [[2, 3]]
unicode = false

[[test]]
name = "unicode4"
regex = '\Bx\B'
haystack = 'áxβ'
matches = [[2, 3]]

[[test]]
name = "unicode4-only-ascii"
regex = '\Bx\B'
haystack = 'áxβ'
matches = []
unicode = false
utf8 = false

# The same as above, but with \b instead of \B as a sanity check.
[[test]]
name = "unicode5"
regex = '\b'
haystack = "0\U0007EF5E"
matches = [[0, 0], [1, 1]]

[[test]]
name = "unicode5-only-ascii"
regex = '\b'
haystack = "0\U0007EF5E"
matches = [[0, 0], [1, 1]]
unicode = false
utf8 = false

[[test]]
name = "unicode5-noutf8"
regex = '\b'
haystack = '0\xFF\xFF\xFF\xFF'
matches = [[0, 0], [1, 1]]
unescape = true
utf8 = false

[[test]]
name = "unicode5-noutf8-only-ascii"
regex = '\b'
haystack = '0\xFF\xFF\xFF\xFF'
matches = [[0, 0], [1, 1]]
unescape = true
unicode = false
utf8 = false

# Weird special case to ensure that ASCII \B treats each individual code unit
# as a non-word byte. (The specific codepoint is irrelevant. It's an arbitrary
# codepoint that uses 4 bytes in its UTF-8 encoding and is not a member of the
# \w character class.)
[[test]]
name = "unicode5-not"
regex = '\B'
haystack = "0\U0007EF5E"
matches = [[5, 5]]

[[test]]
name = "unicode5-not-only-ascii"
regex = '\B'
haystack = "0\U0007EF5E"
matches = [[2, 2], [3, 3], [4, 4], [5, 5]]
unicode = false
utf8 = false

# This gets no matches since \B only matches in the presence of valid UTF-8
# when Unicode is enabled, even when UTF-8 mode is disabled.
[[test]]
name = "unicode5-not-noutf8"
regex = '\B'
haystack = '0\xFF\xFF\xFF\xFF'
matches = []
unescape = true
utf8 = false

# But this DOES get matches since \B in ASCII mode only looks at individual
# bytes.
[[test]]
name = "unicode5-not-noutf8-only-ascii"
regex = '\B'
haystack = '0\xFF\xFF\xFF\xFF'
matches = [[2, 2], [3, 3], [4, 4], [5, 5]]
unescape = true
unicode = false
utf8 = false

# Some tests of no particular significance.
[[test]]
name = "unicode6"
regex = '\b[0-9]+\b'
haystack = "foo 123 bar 456 quux 789"
matches = [[4, 7], [12, 15], [21, 24]]

[[test]]
name = "unicode7"
regex = '\b[0-9]+\b'
haystack = "foo 123 bar a456 quux 789"
matches = [[4, 7], [22, 25]]

[[test]]
name = "unicode8"
regex = '\b[0-9]+\b'
haystack = "foo 123 bar 456a quux 789"
matches = [[4, 7], [22, 25]]

# A variant of the problem described here:
# https://github.com/google/re2/blob/89567f5de5b23bb5ad0c26cbafc10bdc7389d1fa/re2/dfa.cc#L658-L667
[[test]]
name = "alt-with-assertion-repetition"
regex = '(?:\b|%)+'
haystack = "z%"
bounds = [1, 2]
anchored = true
matches = [[1, 1]]