Ziele Untersuchung
mit Columbo Integrität von
Datenbanken Interaktion und
Portierbarkeit Ergonomie der
Schnittstellen

Angebot Produkte Projekt Beratung

Mittel Analytik Modellierung Sprachen Algebra Logik Hardware Denken Kreativität

Zusammenhänge Gesellschaft Wirtschaft Branche Firma

Benutzer


products/Sources/formale Sprachen/C/Firefox/third_party/rust/regex/testdata/ (Firefox Browser Version 136.0.1^©) Datei vom 10.2.2025 mit Größe 2 kB

Quelle regex-lite.toml Sprache: unbekannt

Spracherkennung für: .toml vermutete Sprache: Unknown {[0] [0] [0]} [Methode: Schwerpunktbildung, einfache Gewichte, sechs Dimensionen]

# These tests are specifically written to test the regex-lite crate. While it
# largely has the same semantics as the regex crate, there are some differences
# around Unicode support and UTF-8.
#
# To be clear, regex-lite supports far fewer patterns because of its lack of
# Unicode support, nested character classes and character class set operations.
# What we're talking about here are the patterns that both crates support but
# where the semantics might differ.

# regex-lite uses ASCII definitions for Perl character classes.
[[test]]
name = "perl-class-decimal"
regex = '\d'
haystack = '᠕'
matches = []
unicode = true

# regex-lite uses ASCII definitions for Perl character classes.
[[test]]
name = "perl-class-space"
regex = '\s'
haystack = "\u2000"
matches = []
unicode = true

# regex-lite uses ASCII definitions for Perl character classes.
[[test]]
name = "perl-class-word"
regex = '\w'
haystack = 'δ'
matches = []
unicode = true

# regex-lite uses the ASCII definition of word for word boundary assertions.
[[test]]
name = "word-boundary"
regex = '\b'
haystack = 'δ'
matches = []
unicode = true

# regex-lite uses the ASCII definition of word for negated word boundary
# assertions. But note that it should still not split codepoints!
[[test]]
name = "word-boundary-negated"
regex = '\B'
haystack = 'δ'
matches = [[0, 0], [2, 2]]
unicode = true

# While we're here, the empty regex---which matches at every
# position---shouldn't split a codepoint either.
[[test]]
name = "empty-no-split-codepoint"
regex = ''
haystack = '��'
matches = [[0, 0], [4, 4]]
unicode = true

# A dot always matches a full codepoint.
[[test]]
name = "dot-always-matches-codepoint"
regex = '.'
haystack = '��'
matches = [[0, 4]]
unicode = false

# A negated character class also always matches a full codepoint.
[[test]]
name = "negated-class-always-matches-codepoint"
regex = '[^a]'
haystack = '��'
matches = [[0, 4]]
unicode = false

# regex-lite only supports ASCII-aware case insensitive matching.
[[test]]
name = "case-insensitive-is-ascii-only"
regex = 's'
haystack = 'ſ'
matches = []
unicode = true
case-insensitive = true

# Negated word boundaries shouldn't split a codepoint, but they will match
# between invalid UTF-8.
#
# This test is only valid for a 'bytes' API, but that doesn't (yet) exist in
# regex-lite. This can't happen in the main API because &str can't contain
# invalid UTF-8.
# [[test]]
# name = "word-boundary-invalid-utf8"
# regex = '\B'
# haystack = '\xFF\xFF\xFF\xFF'
# unescape = true
# matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
# unicode = true
# utf8 = false