/* * Copyright (c) 1999, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions.
*/
/** * Converts ASCII alphabet characters [A-Za-z] in the given 's' to * supplementary characters. This method does NOT fully take care * of the regex syntax.
*/ publicstatic String toSupplementaries(String s) { int length = s.length();
StringBuilder sb = new StringBuilder(length * 2);
for (int i = 0; i < length; ) { char c = s.charAt(i++); if (c == '\\') {
sb.append(c); if (i < length) {
c = s.charAt(i++);
sb.append(c); if (c == 'u') { // assume no syntax error
sb.append(s.charAt(i++));
sb.append(s.charAt(i++));
sb.append(s.charAt(i++));
sb.append(s.charAt(i++));
}
}
} elseif ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) {
sb.append('\ud800').append((char)('\udc00'+c));
} else {
sb.append(c);
}
} return sb.toString();
}
// Regular expression tests //Following three tests execute from a file.
@Test publicstaticvoid processTestCases() throws IOException {
processFile("TestCases.txt");
}
// This is for bug6635133 // Test if surrogate pair in Unicode escapes can be handled correctly.
@Test publicstaticvoid surrogatesInClassTest() {
Pattern pattern = Pattern.compile("[\\ud834\\udd21-\\ud834\\udd24]");
Matcher matcher = pattern.matcher("\ud834\udd22");
assertTrue(matcher.find(), "Surrogate pair in Unicode escape");
}
// This is for bug6990617 // Test if Pattern.RemoveQEQuoting works correctly if the octal unicode // char encoding is only 2 or 3 digits instead of 4 and the first quoted // char is an octal digit.
@Test publicstaticvoid removeQEQuotingTest() {
Pattern pattern =
Pattern.compile("\\011\\Q1sometext\\E\\011\\Q2sometext\\E");
Matcher matcher = pattern.matcher("\t1sometext\t2sometext");
// This is for bug 4988891 // Test toMatchResult to see that it is a copy of the Matcher // that is not affected by subsequent operations on the original
@Test publicstaticvoid toMatchResultTest() {
Pattern pattern = Pattern.compile("squid");
Matcher matcher = pattern.matcher( "agiantsquidofdestinyasmallsquidoffate");
matcher.find();
int matcherStart1 = matcher.start();
MatchResult mr = matcher.toMatchResult();
assertNotSame(mr, matcher, "Matcher toMatchResult is identical object");
int resultStart1 = mr.start();
assertEquals(matcherStart1, resultStart1, "equal matchers don't have equal start indices");
matcher.find();
int matcherStart2 = matcher.start(); int resultStart2 = mr.start();
assertNotEquals(matcherStart2, resultStart2, "Matcher2 and Result2 should not be equal");
assertEquals(resultStart1, resultStart2, "Second match result should have the same state");
MatchResult mr2 = matcher.toMatchResult();
assertNotSame(mr, mr2, "Second Matcher copy should not be identical to the first.");
assertEquals(mr2.start(), matcherStart2, "mr2 index should equal matcher index");
}
// This is for bug 8074678 // Test the result of toMatchResult throws ISE if no match is availble
@Test publicstaticvoid toMatchResultTest2() {
Matcher matcher = Pattern.compile("nomatch").matcher("hello world");
matcher.find();
MatchResult mr = matcher.toMatchResult();
// This is for bug 5013885 // Must test a slice to see if it reports hitEnd correctly
@Test publicstaticvoid hitEndTest() { // Basic test of Slice node
Pattern p = Pattern.compile("^squidattack");
Matcher m = p.matcher("squack");
m.find();
assertFalse(m.hitEnd(), "Matcher should not be at end of sequence");
m.reset("squid");
m.find();
assertTrue(m.hitEnd(), "Matcher should be at the end of sequence");
// Test Slice, SliceA and SliceU nodes for (int i=0; i<3; i++) { int flags = 0; if (i==1) flags = Pattern.CASE_INSENSITIVE; if (i==2) flags = Pattern.UNICODE_CASE;
p = Pattern.compile("^abc", flags);
m = p.matcher("ad");
m.find();
assertFalse(m.hitEnd(), "Slice node test");
m.reset("ab");
m.find();
assertTrue(m.hitEnd(), "Slice node test");
}
// Test Boyer-Moore node
p = Pattern.compile("catattack");
m = p.matcher("attack");
m.find();
assertTrue(m.hitEnd(), "Boyer-Moore node test");
p = Pattern.compile("catattack");
m = p.matcher("attackattackattackcatatta");
m.find();
assertTrue(m.hitEnd(), "Boyer-More node test");
// 8184706: Matching u+0d at EOL against \R should hit-end
p = Pattern.compile("...\\R");
m = p.matcher("cat" + (char)0x0a);
m.find();
assertFalse(m.hitEnd());
m = p.matcher("cat" + (char)0x0d);
m.find();
assertTrue(m.hitEnd());
m = p.matcher("cat" + (char)0x0d + (char)0x0a);
m.find();
assertFalse(m.hitEnd());
}
// This is for bug 4997476 // It is weird code submitted by customer demonstrating a regression
@Test publicstaticvoid wordSearchTest() {
String testString = "word1 word2 word3";
Pattern p = Pattern.compile("\\b");
Matcher m = p.matcher(testString); int position = 0; int start; while (m.find(position)) {
start = m.start(); if (start == testString.length()) break; if (m.find(start+1)) {
position = m.start();
} else {
position = testString.length();
} if (testString.substring(start, position).equals(" ")) continue;
assertTrue(testString.substring(start, position-1).startsWith("word"));
}
}
// This is for bug 4994840
@Test publicstaticvoid caretAtEndTest() { // Problem only occurs with multiline patterns // containing a beginning-of-line caret "^" followed // by an expression that also matches the empty string.
Pattern pattern = Pattern.compile("^x?", Pattern.MULTILINE);
Matcher matcher = pattern.matcher("\r");
matcher.find();
matcher.find();
}
// This test is for 4979006 // Check to see if word boundary construct properly handles unicode // non spacing marks
@Test publicstaticvoid unicodeWordBoundsTest() {
String spaces = " ";
String wordChar = "a";
String nsm = "\u030a";
privatestaticvoid twoFindIndexes(String input, Matcher matcher, int a, int b)
{
matcher.reset(input);
matcher.find();
assertEquals(matcher.start(), a);
matcher.find();
assertEquals(matcher.start(), b);
}
// This test is for 6284152 privatestaticvoid check(String regex, String input, String[] expected) {
List<String> result = new ArrayList<>();
Pattern p = Pattern.compile(regex);
Matcher m = p.matcher(input); while (m.find()) {
result.add(m.group());
}
assertEquals(Arrays.asList(expected), result);
}
//boundary at end of the lookbehind sub-regex should work consistently //with the boundary just after the lookbehind sub-regex
check("(?<=.*\\b)foo", "abcd foo", new String[]{"foo"});
check("(?<=.*)\\bfoo", "abcd foo", new String[]{"foo"});
check("(?<!abc )\\bfoo", "abc foo", new String[0]);
check("(?<!abc \\b)foo", "abc foo", new String[0]);
//Negative
check("(?<!%.{0,5})foo\\d", "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5", new String[] {"foo4", "foo5"});
//Positive greedy
check("(?<=%b{1,4})foo", "%bbbbfoo", new String[] {"foo"});
//Positive reluctant
check("(?<=%b{1,4}?)foo", "%bbbbfoo", new String[] {"foo"});
//supplementary
check("(?<=%b{1,4})fo\ud800\udc00o", "%bbbbfo\ud800\udc00o", new String[] {"fo\ud800\udc00o"});
check("(?<=%b{1,4}?)fo\ud800\udc00o", "%bbbbfo\ud800\udc00o", new String[] {"fo\ud800\udc00o"});
check("(?<!%b{1,4})fo\ud800\udc00o", "%afo\ud800\udc00o", new String[] {"fo\ud800\udc00o"});
check("(?<!%b{1,4}?)fo\ud800\udc00o", "%afo\ud800\udc00o", new String[] {"fo\ud800\udc00o"});
}
// This test is for 4938995 // Check to see if weak region boundaries are transparent to // lookahead and lookbehind constructs
@Test publicstaticvoid boundsTest() {
String fullMessage = "catdogcat";
Pattern pattern = Pattern.compile("(?<=cat)dog(?=cat)");
Matcher matcher = pattern.matcher("catdogca");
matcher.useTransparentBounds(true);
// This test is for 4945394
@Test publicstaticvoid findFromTest() {
String message = "This is 40 $0 message.";
Pattern pat = Pattern.compile("\\$0");
Matcher match = pat.matcher(message);
assertTrue(match.find());
assertFalse(match.find());
assertFalse(match.find());
}
// This test is for 4872664 and 4892980
@Test publicstaticvoid negatedCharClassTest() {
Pattern pattern = Pattern.compile("[^>]");
Matcher matcher = pattern.matcher("\u203A");
assertTrue(matcher.matches());
matcher.reset("\u203A");
assertTrue(matcher.find());
String s = "for";
String[] result = s.split("[^fr]");
assertEquals(result[0], "f");
assertEquals(result[1], "r");
s = "f\u203Ar";
result = s.split("[^fr]");
assertEquals(result[0], "f");
assertEquals(result[1], "r");
// Test adding to bits, subtracting a node, then adding to bits again
pattern = Pattern.compile("[^f\u203Ar]");
matcher = pattern.matcher("a");
assertTrue(matcher.find());
matcher.reset("f");
assertFalse(matcher.find());
matcher.reset("\u203A");
assertFalse(matcher.find());
matcher.reset("r");
assertFalse(matcher.find());
matcher.reset("\u203B");
assertTrue(matcher.find());
// Test subtracting a node, adding to bits, subtracting again
pattern = Pattern.compile("[^\u203Ar\u203B]");
matcher = pattern.matcher("a");
assertTrue(matcher.find());
matcher.reset("\u203A");
assertFalse(matcher.find());
matcher.reset("r");
assertFalse(matcher.find());
matcher.reset("\u203B");
assertFalse(matcher.find());
matcher.reset("\u203C");
assertTrue(matcher.find());
}
// This test is for 4628291
@Test publicstaticvoid toStringTest() {
Pattern pattern = Pattern.compile("b+");
assertEquals(pattern.toString(), "b+");
Matcher matcher = pattern.matcher("aaabbbccc");
String matcherString = matcher.toString(); // unspecified
matcher.find();
matcher.toString(); // unspecified
matcher.region(0,3);
matcher.toString(); // unspecified
matcher.reset();
matcher.toString(); // unspecified
}
// This test is for 4808962
@Test publicstaticvoid literalPatternTest() { int flags = Pattern.LITERAL;
// note: this is case-sensitive.
pattern = Pattern.compile(toSupplementaries("a...b"), flags);
check(pattern, toSupplementaries("a...b"), true);
check(pattern, toSupplementaries("axxxb"), false);
flags |= Pattern.CANON_EQ;
String t = toSupplementaries("test"); //Note: Possible issue
p = Pattern.compile(t + "a\u030a", flags);
check(pattern, t + "a\u030a", false);
check(pattern, t + "\u00e5", false);
}
// This test is for 4803179 // This test is also for 4808962, replacement parts
@Test publicstaticvoid literalReplacementTest() { int flags = Pattern.LITERAL;
// IAE should be thrown if backslash or '$' is the last character // in replacement string
assertThrows(IllegalArgumentException.class, () -> "\uac00".replaceAll("\uac00", "$"));
assertThrows(IllegalArgumentException.class, () -> "\uac00".replaceAll("\uac00", "\\"));
}
// This test is for 4757029
@Test publicstaticvoid regionTest() {
Pattern pattern = Pattern.compile("abc");
Matcher matcher = pattern.matcher("abcdefabc");
// Supplementary character test
pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2\\E"));
check(pattern, toSupplementaries("dir1\\dir2"), true);
// This test is for 4792284
@Test publicstaticvoid nonCaptureRepetitionTest() {
String input = "abcdefgh;";
String[] patterns = new String[] { "(?:\\w{4})+;", "(?:\\w{8})*;", "(?:\\w{2}){2,4};", "(?:\\w{4}){2,};", // only matches the ".*?(?:\\w{5})+;", // specified minimum ".*?(?:\\w{9})*;", // number of reps - OK "(?:\\w{4})+?;", // lazy repetition - OK "(?:\\w{4})++;", // possessive repetition - OK "(?:\\w{2,}?)+;", // non-deterministic - OK "(\\w{4})+;", // capturing group - OK
};
for (String pattern : patterns) { // Check find()
check(pattern, 0, input, input, true); // Check matches()
Pattern p = Pattern.compile(pattern);
Matcher m = p.matcher(input);
// This test is for 6358731
@Test publicstaticvoid notCapturedGroupCurlyMatchTest() {
Pattern pattern = Pattern.compile("(abc)+|(abcd)+");
Matcher matcher = pattern.matcher("abcd");
// This test is for 4523620 /* private static void numOccurrencesTest() throws Exception { Pattern pattern = Pattern.compile("aaa");
if (pattern.numOccurrences("aaaaaa", false) != 2) failCount++; if (pattern.numOccurrences("aaaaaa", true) != 4) failCount++;
pattern = Pattern.compile("^"); if (pattern.numOccurrences("aaaaaa", false) != 1) failCount++; if (pattern.numOccurrences("aaaaaa", true) != 1) failCount++;
report("Number of Occurrences"); }
*/
// This test is for 4776374
@Test publicstaticvoid caretBetweenTerminatorsTest() { int flags1 = Pattern.DOTALL; int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES; int flags3 = Pattern.DOTALL | Pattern.UNIX_LINES | Pattern.MULTILINE; int flags4 = Pattern.DOTALL | Pattern.MULTILINE;
// This test is for 4727935
@Test publicstaticvoid dollarAtEndTest() { int flags1 = Pattern.DOTALL; int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES; int flags3 = Pattern.DOTALL | Pattern.MULTILINE;
// This test is for 4711773
@Test publicstaticvoid multilineDollarTest() {
Pattern findCR = Pattern.compile("$", Pattern.MULTILINE);
Matcher matcher = findCR.matcher("first bit\nsecond bit");
matcher.find();
assertEquals(matcher.start(), 9);
matcher.find();
assertEquals(matcher.start(0), 20);
// Supplementary character test
matcher = findCR.matcher(toSupplementaries("first bit\n second bit")); // double BMP chars
matcher.find();
assertEquals(matcher.start(0), 9*2);
matcher.find();
assertEquals(matcher.start(0), 20*2);
}
@Test publicstaticvoid reluctantRepetitionTest() {
Pattern p = Pattern.compile("1(\\s\\S+?){1,3}?[\\s,]2");
check(p, "1 word word word 2", true);
check(p, "1 wor wo w 2", true);
check(p, "1 word word 2", true);
check(p, "1 word 2", true);
check(p, "1 wo w w 2", true);
check(p, "1 wo w 2", true);
check(p, "1 wor w 2", true);
p = Pattern.compile("([a-z])+?c");
Matcher m = p.matcher("ababcdefdec");
check(m, "ababc");
// Supplementary character test
p = Pattern.compile(toSupplementaries("([a-z])+?c"));
m = p.matcher(toSupplementaries("ababcdefdec"));
check(m, toSupplementaries("ababc"));
}
publicstatic Pattern serializedPattern(Pattern p) throws Exception {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
ObjectOutputStream oos = new ObjectOutputStream(baos);
oos.writeObject(p);
oos.close(); try (ObjectInputStream ois = new ObjectInputStream( new ByteArrayInputStream(baos.toByteArray()))) { return (Pattern)ois.readObject();
}
}
// Supplementary character test
pattern = Pattern.compile(toSupplementaries("(ab)(c*)"));
matcher = pattern.matcher(toSupplementaries("abccczzzabcczzzabccc"));
result = matcher.replaceFirst(toSupplementaries("test"));
assertEquals(result, toSupplementaries("testzzzabcczzzabccc"));
matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
result = matcher.replaceFirst(toSupplementaries("test"));
assertEquals(result, toSupplementaries("zzztestzzzabcczzzabccczzz"));
matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
result = matcher.replaceFirst("$1");
assertEquals(result, toSupplementaries("zzzabzzzabcczzzabccczzz"));
matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
result = matcher.replaceFirst("$2");
assertEquals(result, toSupplementaries("zzzccczzzabcczzzabccczzz"));
flags = Pattern.CASE_INSENSITIVE; for (int i = 0; i < patterns.length; i++) {
pattern = Pattern.compile(patterns[i], flags);
matcher = pattern.matcher(texts[i]);
assertEquals(matcher.matches(), expected[i], "<1> Failed at " + i);
}
flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE; for (int i = 0; i < patterns.length; i++) {
pattern = Pattern.compile(patterns[i], flags);
matcher = pattern.matcher(texts[i]);
assertTrue(matcher.matches(), "<2> Failed at " + i);
} // flag unicode_case alone should do nothing
flags = Pattern.UNICODE_CASE; for (int i = 0; i < patterns.length; i++) {
pattern = Pattern.compile(patterns[i], flags);
matcher = pattern.matcher(texts[i]);
assertFalse(matcher.matches(), "<3> Failed at " + i);
}
// Special cases: i, I, u+0131 and u+0130
flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE;
pattern = Pattern.compile("[h-j]+", flags);
assertTrue(pattern.matcher("\u0131\u0130").matches());
}
if (matcher.find()) {
StringBuffer sb = new StringBuffer();
matcher.appendReplacement(sb, r);
matcher.appendTail(sb);
result = sb.toString();
assertEquals(result, "Swap one: 123 = first, second = 456");
}
// Supplementary character test
pattern = Pattern.compile(toSupplementaries("(ab)(cd)"));
matcher = pattern.matcher(toSupplementaries("abcd"));
result = matcher.replaceAll("$2$1");
assertEquals(result, toSupplementaries("cdab"));
s1 = toSupplementaries("Swap all: first = 123, second = 456");
s2 = toSupplementaries("Swap one: first = 123, second = 456");
r = toSupplementaries("$3$2$1");
pattern = Pattern.compile(toSupplementaries("([a-z]+)( *= *)([0-9]+)"));
matcher = pattern.matcher(s1);
// Supplementary character test
CharBuffer cbs = CharBuffer.allocate(100);
cbs.put(toSupplementaries("fooXandXboo"));
cbs.flip();
result = patternX.split(cbs);
assertEquals(result[0], toSupplementaries("foo"));
assertEquals(result[1], toSupplementaries("and"));
assertEquals(result[2], toSupplementaries("boo"));
String source = "0123456789"; for (int limit=-2; limit<3; limit++) { for (int x=0; x<10; x++) {
result = source.split(Integer.toString(x), limit); int expectedLength = limit < 1 ? 2 : limit;
if (!result[0].equals(source.substring(0,x))) {
assertEquals(limit, 1);
assertEquals(result[0], source.substring(0,10));
} if (expectedLength > 1) { // Check segment 2
assertEquals(result[1], source.substring(x+1,10));
}
}
}
} // Check the case for no match found for (int limit=-2; limit<3; limit++) {
result = source.split("e", limit);
assertEquals(result.length, 1);
assertEquals(result[0], source);
} // Check the case for limit == 0, source = ""; // split() now returns 0-length for empty source "" see #6559590
source = "";
result = source.split("e", 0);
assertEquals(result.length, 1);
assertEquals(result[0], source);
// Check both split() and splitAsStraem(), especially for zero-lenth // input and zero-lenth match cases
String[][] input = new String[][] {
{ " ", "Abc Efg Hij" }, // normal non-zero-match
{ " ", " Abc Efg Hij" }, // leading empty str for non-zero-match
{ " ", "Abc Efg Hij" }, // non-zero-match in the middle
{ "(?=\\p{Lu})", "AbcEfgHij" }, // no leading empty str for zero-match
{ "(?=\\p{Lu})", "AbcEfg" },
{ "(?=\\p{Lu})", "Abc" },
{ " ", "" }, // zero-length input
{ ".*", "" },
@Test publicstaticvoid findIntTest() {
Pattern p = Pattern.compile("blah");
Matcher m = p.matcher("zzzzblahzzzzzblah"); boolean result = m.find(2);
assertTrue(result);
final Pattern p2 = Pattern.compile("$"); final Matcher m2 = p2.matcher("1234567890");
result = m2.find(10);
assertTrue(result);
assertThrows(IndexOutOfBoundsException.class, () -> m2.find(11));
// Supplementary character test
p = Pattern.compile(toSupplementaries("blah"));
m = p.matcher(toSupplementaries("zzzzblahzzzzzblah"));
result = m.find(2);
assertTrue(result);
}
@Test publicstaticvoid emptyPatternTest() {
Pattern p = Pattern.compile(""); final Matcher m = p.matcher("foo");
// Should find empty pattern at beginning of input boolean result = m.find();
assertTrue(result);
assertEquals(m.start(), 0);
// Should not match entire input if input is not empty
m.reset();
result = m.matches();
assertFalse(result);
pattern = Pattern.compile("^", Pattern.MULTILINE);
matcher = pattern.matcher("this is some text");
String result = matcher.replaceAll("X");
assertEquals(result, "Xthis is some text");
pattern = Pattern.compile("^");
matcher = pattern.matcher("this is some text");
result = matcher.replaceAll("X");
assertEquals(result, "Xthis is some text");
pattern = Pattern.compile("^", Pattern.MULTILINE | Pattern.UNIX_LINES);
matcher = pattern.matcher("this is some text\n");
result = matcher.replaceAll("X");
assertEquals(result, "Xthis is some text\n");
}
for (int i = 1; i < 10; i++) { // Make sure backref 1-9 are always accepted
pattern = Pattern.compile("abcdef\\" + i); // and fail to match if the target group does not exit
check(pattern, "abcdef", false);
}
/** * Unicode Technical Report #18, section 2.6 End of Line * There is no empty line to be matched in the sequence \u000D\u000A * but there is an empty line in the sequence \u000A\u000D.
*/
@Test publicstaticvoid anchorTest() {
Pattern p = Pattern.compile("^.*$", Pattern.MULTILINE);
Matcher m = p.matcher("blah1\r\nblah2");
m.find();
m.find();
assertEquals(m.group(), "blah2");
// Test behavior of $ with \r\n at end of input
p = Pattern.compile(".+$");
m = p.matcher("blah1\r\n");
assertTrue(m.find());
assertEquals(m.group(), "blah1");
assertFalse(m.find());
// Test behavior of $ with \r\n at end of input in multiline
p = Pattern.compile(".+$", Pattern.MULTILINE);
m = p.matcher("blah1\r\n");
assertTrue(m.find());
assertFalse(m.find());
// Test for $ recognition of \u0085 for bug 4527731
p = Pattern.compile(".+$", Pattern.MULTILINE);
m = p.matcher("blah1\u0085");
assertTrue(m.find());
// Supplementary character test
p = Pattern.compile("^.*$", Pattern.MULTILINE);
m = p.matcher(toSupplementaries("blah1\r\nblah2"));
m.find();
m.find();
assertEquals(m.group(), toSupplementaries("blah2"));
// Test behavior of $ with \r\n at end of input
p = Pattern.compile(".+$");
m = p.matcher(toSupplementaries("blah1\r\n"));
assertTrue(m.find());
assertEquals(m.group(), toSupplementaries("blah1"));
assertFalse(m.find());
// Test behavior of $ with \r\n at end of input in multiline
p = Pattern.compile(".+$", Pattern.MULTILINE);
m = p.matcher(toSupplementaries("blah1\r\n"));
assertTrue(m.find());
assertFalse(m.find());
// Test for $ recognition of \u0085 for bug 4527731
p = Pattern.compile(".+$", Pattern.MULTILINE);
m = p.matcher(toSupplementaries("blah1\u0085"));
assertTrue(m.find());
}
/** * A basic sanity test of Matcher.lookingAt().
*/
@Test publicstaticvoid lookingAtTest() {
Pattern p = Pattern.compile("(ab)(c*)");
Matcher m = p.matcher("abccczzzabcczzzabccc");
assertTrue(m.lookingAt());
assertEquals(m.group(), m.group(0));
m = p.matcher("zzzabccczzzabcczzzabccczzz");
assertFalse(m.lookingAt());
// Supplementary character test
p = Pattern.compile(toSupplementaries("(ab)(c*)"));
m = p.matcher(toSupplementaries("abccczzzabcczzzabccc"));
assertTrue(m.lookingAt());
assertEquals(m.group(), m.group(0));
m = p.matcher(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
assertFalse(m.lookingAt());
}
/** * A basic sanity test of Matcher.matches().
*/
@Test publicstaticvoid matchesTest() { // matches()
Pattern p = Pattern.compile("ulb(c*)");
Matcher m = p.matcher("ulbcccccc");
assertTrue(m.matches());
// find() but not matches()
m.reset("zzzulbcccccc");
assertFalse(m.matches());
// lookingAt() but not matches()
m.reset("ulbccccccdef");
assertFalse(m.matches());
// matches()
p = Pattern.compile("a|ad");
m = p.matcher("ad");
assertTrue(m.matches());
// Supplementary character test // matches()
p = Pattern.compile(toSupplementaries("ulb(c*)"));
m = p.matcher(toSupplementaries("ulbcccccc"));
assertTrue(m.matches());
// find() but not matches()
m.reset(toSupplementaries("zzzulbcccccc"));
assertFalse(m.matches());
// lookingAt() but not matches()
m.reset(toSupplementaries("ulbccccccdef"));
assertFalse(m.matches());
// matches()
p = Pattern.compile(toSupplementaries("a|ad"));
m = p.matcher(toSupplementaries("ad"));
assertTrue(m.matches());
}
/** * A basic sanity test of Pattern.matches().
*/
@Test publicstaticvoid patternMatchesTest() { // matches()
assertTrue(Pattern.matches(toSupplementaries("ulb(c*)"),
toSupplementaries("ulbcccccc")));
// find() but not matches()
assertFalse(Pattern.matches(toSupplementaries("ulb(c*)"),
toSupplementaries("zzzulbcccccc")));
// lookingAt() but not matches()
assertFalse(Pattern.matches(toSupplementaries("ulb(c*)"),
toSupplementaries("ulbccccccdef")));
// Supplementary character test // matches()
assertTrue(Pattern.matches(toSupplementaries("ulb(c*)"),
toSupplementaries("ulbcccccc")));
// find() but not matches()
assertFalse(Pattern.matches(toSupplementaries("ulb(c*)"),
toSupplementaries("zzzulbcccccc")));
// lookingAt() but not matches()
assertFalse(Pattern.matches(toSupplementaries("ulb(c*)"),
toSupplementaries("ulbccccccdef")));
}
/** * Canonical equivalence testing. Tests the ability of the engine * to match sequences that are not explicitly specified in the * pattern when they are considered equivalent by the Unicode Standard.
*/
@Test publicstaticvoid ceTest() { // Decomposed char outside char classes
Pattern p = Pattern.compile("testa\u030a", Pattern.CANON_EQ);
Matcher m = p.matcher("test\u00e5");
assertTrue(m.matches());
m.reset("testa\u030a");
assertTrue(m.matches());
// Composed char outside char classes
p = Pattern.compile("test\u00e5", Pattern.CANON_EQ);
m = p.matcher("test\u00e5");
assertTrue(m.matches());
m.reset("testa\u030a");
assertTrue(m.find());
// Decomposed char inside a char class
p = Pattern.compile("test[abca\u030a]", Pattern.CANON_EQ);
m = p.matcher("test\u00e5");
assertTrue(m.find());
m.reset("testa\u030a");
assertTrue(m.find());
// Composed char inside a char class
p = Pattern.compile("test[abc\u00e5def\u00e0]", Pattern.CANON_EQ);
m = p.matcher("test\u00e5");
assertTrue(m.find());
m.reset("testa\u0300");
assertTrue(m.find());
m.reset("testa\u030a");
assertTrue(m.find());
// Marks that cannot legally change order and be equivalent
p = Pattern.compile("testa\u0308\u0300", Pattern.CANON_EQ);
check(p, "testa\u0308\u0300", true);
check(p, "testa\u0300\u0308", false);
// Marks that can legally change order and be equivalent
p = Pattern.compile("testa\u0308\u0323", Pattern.CANON_EQ);
check(p, "testa\u0308\u0323", true);
check(p, "testa\u0323\u0308", true);
// Test all equivalences of the sequence a\u0308\u0323\u0300
p = Pattern.compile("testa\u0308\u0323\u0300", Pattern.CANON_EQ);
check(p, "testa\u0308\u0323\u0300", true);
check(p, "testa\u0323\u0308\u0300", true);
check(p, "testa\u0308\u0300\u0323", true);
check(p, "test\u00e4\u0323\u0300", true);
check(p, "test\u00e4\u0300\u0323", true);
/** * A basic sanity test of Matcher.replaceAll().
*/
@Test publicstaticvoid globalSubstitute() { // Global substitution with a literal
Pattern p = Pattern.compile("(ab)(c*)");
Matcher m = p.matcher("abccczzzabcczzzabccc");
assertEquals(m.replaceAll("test"), "testzzztestzzztest");
// Global substitution with groups
m.reset("zzzabccczzzabcczzzabccczzz");
String result = m.replaceAll("$1");
assertEquals(result, "zzzabzzzabzzzabzzz");
// Supplementary character test // Global substitution with a literal
p = Pattern.compile(toSupplementaries("(ab)(c*)"));
m = p.matcher(toSupplementaries("abccczzzabcczzzabccc"));
assertEquals(m.replaceAll(toSupplementaries("test")),
toSupplementaries("testzzztestzzztest"));
// Global substitution with groups
m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
result = m.replaceAll("$1");
assertEquals(result,toSupplementaries("zzzabzzzabzzzabzzz"));
}
/** * Tests the usage of Matcher.appendReplacement() with literal * and group substitutions.
*/
@Test publicstaticvoid stringBufferSubstituteLiteral() { // SB substitution with literal final String blah = "zzzblahzzz"; final Pattern p = Pattern.compile("blah"); final Matcher m = p.matcher(blah); final StringBuffer result = new StringBuffer();
@Test publicstaticvoid stringBufferSubtituteWithGroups() { // SB substitution with groups final String blah = "zzzabcdzzz"; final Pattern p = Pattern.compile("(ab)(cd)*"); final Matcher m = p.matcher(blah); final StringBuffer result = new StringBuffer();
assertThrows(IllegalStateException.class, () -> m.appendReplacement(result, "$1"));
m.find();
m.appendReplacement(result, "$1");
assertEquals(result.toString(), "zzzab");
@Test publicstaticvoid stringBufferThreeSubstitution() { // SB substitution with 3 groups final String blah = "zzzabcdcdefzzz"; final Pattern p = Pattern.compile("(ab)(cd)*(ef)"); final Matcher m = p.matcher(blah); final StringBuffer result = new StringBuffer();
assertThrows(IllegalStateException.class, () -> m.appendReplacement(result, "$1w$2w$3"));
m.find();
m.appendReplacement(result, "$1w$2w$3");
assertEquals(result.toString(), "zzzabwcdwef");
@Test publicstaticvoid stringBufferSubstituteGroupsThreeMatches() { // SB substitution with groups and three matches // skipping middle match final String blah = "zzzabcdzzzabcddzzzabcdzzz"; final Pattern p = Pattern.compile("(ab)(cd*)"); final Matcher m = p.matcher(blah); final StringBuffer result = new StringBuffer();
assertThrows(IllegalStateException.class, () -> m.appendReplacement(result, "$1"));
@Test publicstaticvoid stringBufferEscapedDollar() { // Check to make sure escaped $ is ignored
String blah = "zzzabcdcdefzzz";
Pattern p = Pattern.compile("(ab)(cd)*(ef)");
Matcher m = p.matcher(blah);
StringBuffer result = new StringBuffer();
m.find();
m.appendReplacement(result, "$1w\\$2w$3");
assertEquals(result.toString(), "zzzabw$2wef");
@Test publicstaticvoid stringBufferNonExistentGroup() { // Check to make sure a reference to nonexistent group causes error final String blah = "zzzabcdcdefzzz"; final Pattern p = Pattern.compile("(ab)(cd)*(ef)"); final Matcher m = p.matcher(blah); final StringBuffer result = new StringBuffer();
m.find();
assertThrows(IndexOutOfBoundsException.class,
() -> m.appendReplacement(result, "$1w$5w$3"));
}
// Check double digit group references
String blah = "zzz123456789101112zzz";
Pattern p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
Matcher m = p.matcher(blah);
StringBuffer result = new StringBuffer();
m.find();
m.appendReplacement(result, "$1w$11w$3");
assertEquals(result.toString(), "zzz1w11w3");
}
@Test publicstaticvoid stringBufferBackoff() { // Check to make sure it backs off $15 to $1 if only three groups
String blah = "zzzabcdcdefzzz";
Pattern p = Pattern.compile("(ab)(cd)*(ef)");
Matcher m = p.matcher(blah);
StringBuffer result = new StringBuffer();
m.find();
m.appendReplacement(result, "$1w$15w$3");
assertEquals(result.toString(), "zzzabwab5wef");
}
@Test publicstaticvoid stringBufferSupplementaryCharacter(){ // Supplementary character test // SB substitution with literal final String blah = toSupplementaries("zzzblahzzz"); final Pattern p = Pattern.compile(toSupplementaries("blah")); final Matcher m = p.matcher(blah); final StringBuffer result = new StringBuffer();
assertThrows(IllegalStateException.class,
() -> m.appendReplacement(result, toSupplementaries("blech")));
m.find();
m.appendReplacement(result, toSupplementaries("blech"));
assertEquals(result.toString(), toSupplementaries("zzzblech"));
@Test publicstaticvoid stringBufferSubstitutionWithGroups() { // SB substitution with groups final String blah = toSupplementaries("zzzabcdzzz"); final Pattern p = Pattern.compile(toSupplementaries("(ab)(cd)*")); final Matcher m = p.matcher(blah); final StringBuffer result = new StringBuffer();
assertThrows(IllegalStateException.class,
() -> m.appendReplacement(result, "$1"));
m.find();
m.appendReplacement(result, "$1");
assertEquals(result.toString(), toSupplementaries("zzzab"));
@Test publicstaticvoid stringBufferSubstituteWithThreeGroups() { // SB substitution with 3 groups final String blah = toSupplementaries("zzzabcdcdefzzz"); final Pattern p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); final Matcher m = p.matcher(blah); final StringBuffer result = new StringBuffer();
assertThrows(IllegalStateException.class,
() -> m.appendReplacement(result, toSupplementaries("$1w$2w$3")));
@Test publicstaticvoid stringBufferWithGroupsAndThreeMatches() { // SB substitution with groups and three matches // skipping middle match final String blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz"); final Pattern p = Pattern.compile(toSupplementaries("(ab)(cd*)")); final Matcher m = p.matcher(blah); final StringBuffer result = new StringBuffer();
assertThrows(IllegalStateException.class, () ->
m.appendReplacement(result, "$1"));
@Test publicstaticvoid stringBufferEnsureDollarIgnored() { // Check to make sure escaped $ is ignored
String blah = toSupplementaries("zzzabcdcdefzzz");
Pattern p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
Matcher m = p.matcher(blah);
StringBuffer result = new StringBuffer();
m.find();
m.appendReplacement(result, toSupplementaries("$1w\\$2w$3"));
assertEquals(result.toString(), toSupplementaries("zzzabw$2wef"));
@Test publicstaticvoid stringBufferCheckNonexistentGroupReference() { // Check to make sure a reference to nonexistent group causes error final String blah = toSupplementaries("zzzabcdcdefzzz"); final Pattern p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); final Matcher m = p.matcher(blah); final StringBuffer result = new StringBuffer();
m.find();
assertThrows(IndexOutOfBoundsException.class, () ->
m.appendReplacement(result, toSupplementaries("$1w$5w$3")));
}
@Test publicstaticvoid stringBufferCheckSupplementalDoubleDigitGroupReferences() { // Check double digit group references
String blah = toSupplementaries("zzz123456789101112zzz");
Pattern p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
Matcher m = p.matcher(blah);
StringBuffer result = new StringBuffer();
m.find();
m.appendReplacement(result, toSupplementaries("$1w$11w$3"));
assertEquals(result.toString(), toSupplementaries("zzz1w11w3"));
}
@Test publicstaticvoid stringBufferBackoffSupplemental() { // Check to make sure it backs off $15 to $1 if only three groups
String blah = toSupplementaries("zzzabcdcdefzzz");
Pattern p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
Matcher m = p.matcher(blah);
StringBuffer result = new StringBuffer();
m.find();
m.appendReplacement(result, toSupplementaries("$1w$15w$3"));
assertEquals(result.toString(), toSupplementaries("zzzabwab5wef"));
}
@Test publicstaticvoid stringBufferCheckAppendException() { // Check nothing has been appended into the output buffer if // the replacement string triggers IllegalArgumentException.
Pattern p = Pattern.compile("(abc)");
Matcher m = p.matcher("abcd");
StringBuffer result = new StringBuffer();
m.find();
expectThrows(IllegalArgumentException.class,
() -> m.appendReplacement(result, ("xyz$g")));
assertEquals(result.length(), 0);
} /** * Tests the usage of Matcher.appendReplacement() with literal * and group substitutions.
*/
@Test publicstaticvoid stringBuilderSubstitutionWithLiteral() { // SB substitution with literal final String blah = "zzzblahzzz"; final Pattern p = Pattern.compile("blah"); final Matcher m = p.matcher(blah); final StringBuilder result = new StringBuilder();
assertThrows(IllegalStateException.class, () ->
m.appendReplacement(result, "blech"));
@Test publicstaticvoid stringBuilderSubstitutionWithGroups() { // SB substitution with groups final String blah = "zzzabcdzzz"; final Pattern p = Pattern.compile("(ab)(cd)*"); final Matcher m = p.matcher(blah); final StringBuilder result = new StringBuilder();
assertThrows(IllegalStateException.class, () ->
m.appendReplacement(result, "$1"));
m.find();
m.appendReplacement(result, "$1");
assertEquals(result.toString(), "zzzab");
@Test publicstaticvoid stringBuilderSubstitutionWithThreeGroups() { // SB substitution with 3 groups final String blah = "zzzabcdcdefzzz"; final Pattern p = Pattern.compile("(ab)(cd)*(ef)"); final Matcher m = p.matcher(blah); final StringBuilder result = new StringBuilder();
assertThrows(IllegalStateException.class, () ->
m.appendReplacement(result, "$1w$2w$3"));
@Test publicstaticvoid stringBuilderSubstitutionThreeMatch() { // SB substitution with groups and three matches // skipping middle match final String blah = "zzzabcdzzzabcddzzzabcdzzz"; final Pattern p = Pattern.compile("(ab)(cd*)"); final Matcher m = p.matcher(blah); final StringBuilder result = new StringBuilder();
assertThrows(IllegalStateException.class, () ->
m.appendReplacement(result, "$1"));
m.find();
m.appendReplacement(result, "$1");
assertEquals(result.toString(), "zzzab");
@Test publicstaticvoid stringBuilderSubtituteCheckEscapedDollar() { // Check to make sure escaped $ is ignored final String blah = "zzzabcdcdefzzz"; final Pattern p = Pattern.compile("(ab)(cd)*(ef)"); final Matcher m = p.matcher(blah); final StringBuilder result = new StringBuilder();
m.find();
m.appendReplacement(result, "$1w\\$2w$3");
assertEquals(result.toString(), "zzzabw$2wef");
@Test publicstaticvoid stringBuilderNonexistentGroupError() { // Check to make sure a reference to nonexistent group causes error final String blah = "zzzabcdcdefzzz"; final Pattern p = Pattern.compile("(ab)(cd)*(ef)"); final Matcher m = p.matcher(blah); final StringBuilder result = new StringBuilder();
m.find();
assertThrows(IndexOutOfBoundsException.class, () ->
m.appendReplacement(result, "$1w$5w$3"));
}
@Test publicstaticvoid stringBuilderDoubleDigitGroupReferences() { // Check double digit group references final String blah = "zzz123456789101112zzz"; final Pattern p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); final Matcher m = p.matcher(blah); final StringBuilder result = new StringBuilder();
m.find();
m.appendReplacement(result, "$1w$11w$3");
assertEquals(result.toString(), "zzz1w11w3");
}
@Test publicstaticvoid stringBuilderCheckBackoff() { // Check to make sure it backs off $15 to $1 if only three groups final String blah = "zzzabcdcdefzzz"; final Pattern p = Pattern.compile("(ab)(cd)*(ef)"); final Matcher m = p.matcher(blah); final StringBuilder result = new StringBuilder();
m.find();
m.appendReplacement(result, "$1w$15w$3");
assertEquals(result.toString(), "zzzabwab5wef");
}
@Test publicstaticvoid stringBuilderSupplementalLiteralSubstitution() { // Supplementary character test // SB substitution with literal final String blah = toSupplementaries("zzzblahzzz"); final Pattern p = Pattern.compile(toSupplementaries("blah")); final Matcher m = p.matcher(blah); final StringBuilder result = new StringBuilder();
assertThrows(IllegalStateException.class,
() -> m.appendReplacement(result, toSupplementaries("blech")));
m.find();
m.appendReplacement(result, toSupplementaries("blech"));
assertEquals(result.toString(), toSupplementaries("zzzblech"));
m.appendTail(result);
assertEquals(result.toString(), toSupplementaries("zzzblechzzz"));
}
@Test publicstaticvoid stringBuilderSupplementalSubstitutionWithGroups() { // SB substitution with groups final String blah = toSupplementaries("zzzabcdzzz"); final Pattern p = Pattern.compile(toSupplementaries("(ab)(cd)*")); final Matcher m = p.matcher(blah); final StringBuilder result = new StringBuilder();
assertThrows(IllegalStateException.class,
() -> m.appendReplacement(result, "$1"));
m.find();
m.appendReplacement(result, "$1");
assertEquals(result.toString(), toSupplementaries("zzzab"));
@Test publicstaticvoid stringBuilderSupplementalSubstitutionThreeGroups() { // SB substitution with 3 groups final String blah = toSupplementaries("zzzabcdcdefzzz"); final Pattern p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); final Matcher m = p.matcher(blah); final StringBuilder result = new StringBuilder();
assertThrows(IllegalStateException.class, () ->
m.appendReplacement(result, toSupplementaries("$1w$2w$3")));
m.find();
m.appendReplacement(result, toSupplementaries("$1w$2w$3"));
assertEquals(result.toString(), toSupplementaries("zzzabwcdwef"));
@Test publicstaticvoid stringBuilderSubstitutionSupplementalSkipMiddleThreeMatch() { // SB substitution with groups and three matches // skipping middle match final String blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz"); final Pattern p = Pattern.compile(toSupplementaries("(ab)(cd*)")); final Matcher m = p.matcher(blah); final StringBuilder result = new StringBuilder();
assertThrows(IllegalStateException.class, () ->
m.appendReplacement(result, "$1"));
m.find();
m.appendReplacement(result, "$1");
assertEquals(result.toString(), toSupplementaries("zzzab"));
@Test publicstaticvoid stringBuilderSupplementalEscapedDollar() { // Check to make sure escaped $ is ignored final String blah = toSupplementaries("zzzabcdcdefzzz"); final Pattern p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); final Matcher m = p.matcher(blah); final StringBuilder result = new StringBuilder();
m.find();
m.appendReplacement(result, toSupplementaries("$1w\\$2w$3"));
assertEquals(result.toString(), toSupplementaries("zzzabw$2wef"));
@Test publicstaticvoid stringBuilderSupplementalNonExistentGroupError() { // Check to make sure a reference to nonexistent group causes error final String blah = toSupplementaries("zzzabcdcdefzzz"); final Pattern p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); final Matcher m = p.matcher(blah); final StringBuilder result = new StringBuilder();
m.find();
assertThrows(IndexOutOfBoundsException.class, () ->
m.appendReplacement(result, toSupplementaries("$1w$5w$3")));
}
@Test publicstaticvoid stringBuilderSupplementalCheckDoubleDigitGroupReferences() { // Check double digit group references final String blah = toSupplementaries("zzz123456789101112zzz"); final Pattern p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); final Matcher m = p.matcher(blah); final StringBuilder result = new StringBuilder();
m.find();
m.appendReplacement(result, toSupplementaries("$1w$11w$3"));
assertEquals(result.toString(), toSupplementaries("zzz1w11w3"));
}
@Test publicstaticvoid stringBuilderSupplementalCheckBackoff() { // Check to make sure it backs off $15 to $1 if only three groups final String blah = toSupplementaries("zzzabcdcdefzzz"); final Pattern p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); final Matcher m = p.matcher(blah); final StringBuilder result = new StringBuilder();
m.find();
m.appendReplacement(result, toSupplementaries("$1w$15w$3"));
assertEquals(result.toString(), toSupplementaries("zzzabwab5wef"));
}
@Test publicstaticvoid stringBuilderCheckIllegalArgumentException() { // Check nothing has been appended into the output buffer if // the replacement string triggers IllegalArgumentException. final Pattern p = Pattern.compile("(abc)"); final Matcher m = p.matcher("abcd"); final StringBuilder result = new StringBuilder();
m.find();
assertThrows(IllegalArgumentException.class, () ->
m.appendReplacement(result, ("xyz$g")));
assertEquals(result.length(), 0);
}
/* * 5 groups of characters are created to make a substitution string. * A base string will be created including random lead chars, the * substitution string, and random trailing chars. * A pattern containing the 5 groups is searched for and replaced with: * random group + random string + random group. * The results are checked for correctness.
*/
@Test publicstaticvoid substitutionBasher() { for (int runs = 0; runs<1000; runs++) { // Create a base string to work in int leadingChars = generator.nextInt(10);
StringBuilder baseBuffer = new StringBuilder(100);
String leadingString = getRandomAlphaString(leadingChars);
baseBuffer.append(leadingString);
// Create 5 groups of random number of random chars // Create the string to substitute // Create the pattern string to search for
StringBuilder bufferToSub = new StringBuilder(25);
StringBuilder bufferToPat = new StringBuilder(50);
String[] groups = new String[5]; for(int i=0; i<5; i++) { int aGroupSize = generator.nextInt(5)+1;
groups[i] = getRandomAlphaString(aGroupSize);
bufferToSub.append(groups[i]);
bufferToPat.append('(');
bufferToPat.append(groups[i]);
bufferToPat.append(')');
}
String stringToSub = bufferToSub.toString();
String pattern = bufferToPat.toString();
// Place sub string into working string at random index
baseBuffer.append(stringToSub);
// Append random chars to end int trailingChars = generator.nextInt(10);
String trailingString = getRandomAlphaString(trailingChars);
baseBuffer.append(trailingString);
String baseString = baseBuffer.toString();
// Create test pattern and matcher
Pattern p = Pattern.compile(pattern);
Matcher m = p.matcher(baseString);
// Reject candidate if pattern happens to start early
m.find(); if (m.start() < leadingChars) continue;
// Reject candidate if more than one match if (m.find()) continue;
// Construct a replacement string with : // random group + random string + random group
StringBuilder bufferToRep = new StringBuilder(); int groupIndex1 = generator.nextInt(5);
bufferToRep.append("$").append(groupIndex1 + 1);
String randomMidString = getRandomAlphaString(5);
bufferToRep.append(randomMidString); int groupIndex2 = generator.nextInt(5);
bufferToRep.append("$").append(groupIndex2 + 1);
String replacement = bufferToRep.toString();
// Do the replacement
String result = m.replaceAll(replacement);
/* * 5 groups of characters are created to make a substitution string. * A base string will be created including random lead chars, the * substitution string, and random trailing chars. * A pattern containing the 5 groups is searched for and replaced with: * random group + random string + random group. * The results are checked for correctness.
*/
@Test publicstaticvoid substitutionBasher2() { for (int runs = 0; runs<1000; runs++) { // Create a base string to work in int leadingChars = generator.nextInt(10);
StringBuilder baseBuffer = new StringBuilder(100);
String leadingString = getRandomAlphaString(leadingChars);
baseBuffer.append(leadingString);
// Create 5 groups of random number of random chars // Create the string to substitute // Create the pattern string to search for
StringBuilder bufferToSub = new StringBuilder(25);
StringBuilder bufferToPat = new StringBuilder(50);
String[] groups = new String[5]; for(int i=0; i<5; i++) { int aGroupSize = generator.nextInt(5)+1;
groups[i] = getRandomAlphaString(aGroupSize);
bufferToSub.append(groups[i]);
bufferToPat.append('(');
bufferToPat.append(groups[i]);
bufferToPat.append(')');
}
String stringToSub = bufferToSub.toString();
String pattern = bufferToPat.toString();
// Place sub string into working string at random index
baseBuffer.append(stringToSub);
// Append random chars to end int trailingChars = generator.nextInt(10);
String trailingString = getRandomAlphaString(trailingChars);
baseBuffer.append(trailingString);
String baseString = baseBuffer.toString();
// Create test pattern and matcher
Pattern p = Pattern.compile(pattern);
Matcher m = p.matcher(baseString);
// Reject candidate if pattern happens to start early
m.find(); if (m.start() < leadingChars) continue;
// Reject candidate if more than one match if (m.find()) continue;
// Construct a replacement string with : // random group + random string + random group
StringBuilder bufferToRep = new StringBuilder(); int groupIndex1 = generator.nextInt(5);
bufferToRep.append("$").append(groupIndex1 + 1);
String randomMidString = getRandomAlphaString(5);
bufferToRep.append(randomMidString); int groupIndex2 = generator.nextInt(5);
bufferToRep.append("$").append(groupIndex2 + 1);
String replacement = bufferToRep.toString();
// Do the replacement
String result = m.replaceAll(replacement);
/** * Checks the handling of some escape sequences that the Pattern * class should process instead of the java compiler. These are * not in the file because the escapes should be processed * by the Pattern class when the regex is compiled.
*/
@Test publicstaticvoid escapes() {
Pattern p = Pattern.compile("\\043");
Matcher m = p.matcher("#");
assertTrue(m.find());
p = Pattern.compile("\\x23");
m = p.matcher("#");
assertTrue(m.find());
p = Pattern.compile("\\u0023");
m = p.matcher("#");
assertTrue(m.find());
}
/** * Checks the handling of blank input situations. These * tests are incompatible with my test file format.
*/
@Test publicstaticvoid blankInput() {
Pattern p = Pattern.compile("abc", Pattern.CASE_INSENSITIVE);
Matcher m = p.matcher("");
assertFalse(m.find());
p = Pattern.compile("a*", Pattern.CASE_INSENSITIVE);
m = p.matcher("");
assertTrue(m.find());
p = Pattern.compile("abc");
m = p.matcher("");
assertFalse(m.find());
p = Pattern.compile("a*");
m = p.matcher("");
assertTrue(m.find());
}
/** * Tests the Boyer-Moore pattern matching of a character sequence * on randomly generated patterns.
*/
@Test publicstaticvoid bm() {
doBnM('a');
privatestaticvoid doBnM(int baseCharacter) { for (int i=0; i<100; i++) { // Create a short pattern to search for int patternLength = generator.nextInt(7) + 4;
StringBuilder patternBuffer = new StringBuilder(patternLength);
String pattern;
retry: for (;;) { for (int x=0; x<patternLength; x++) { int ch = baseCharacter + generator.nextInt(26); if (Character.isSupplementaryCodePoint(ch)) {
patternBuffer.append(Character.toChars(ch));
} else {
patternBuffer.append((char)ch);
}
}
pattern = patternBuffer.toString();
// Avoid patterns that start and end with the same substring // See JDK-6854417 for (int x=1; x < pattern.length(); x++) { if (pattern.startsWith(pattern.substring(x))) continue retry;
} break;
}
Pattern p = Pattern.compile(pattern);
// Create a buffer with random ASCII chars that does // not match the sample
String toSearch;
StringBuffer s;
Matcher m = p.matcher(""); do {
s = new StringBuffer(100); for (int x=0; x<100; x++) { int ch = baseCharacter + generator.nextInt(26); if (Character.isSupplementaryCodePoint(ch)) {
s.append(Character.toChars(ch));
} else {
s.append((char)ch);
}
}
toSearch = s.toString();
m.reset(toSearch);
} while (m.find());
// Insert the pattern at a random spot int insertIndex = generator.nextInt(99); if (Character.isLowSurrogate(s.charAt(insertIndex)))
insertIndex++;
s.insert(insertIndex, pattern);
toSearch = s.toString();
// Make sure that the pattern is found
m.reset(toSearch);
assertTrue(m.find());
// Make sure that the match text is the pattern
assertEquals(m.group(), pattern);
// Make sure match occured at insertion point
assertEquals(m.start(), insertIndex);
}
}
/** * Tests the matching of slices on randomly generated patterns. * The Boyer-Moore optimization is not done on these patterns * because it uses unicode case folding.
*/
@Test publicstaticvoid slice() {
doSlice(Character.MAX_VALUE);
doSlice(Character.MAX_CODE_POINT);
}
privatestaticvoid doSlice(int maxCharacter) { for (int i=0; i<100; i++) { // Create a short pattern to search for int patternLength = generator.nextInt(7) + 4;
StringBuilder patternBuffer = new StringBuilder(patternLength); for (int x=0; x<patternLength; x++) { int randomChar = 0; while (!Character.isLetterOrDigit(randomChar))
randomChar = generator.nextInt(maxCharacter); if (Character.isSupplementaryCodePoint(randomChar)) {
patternBuffer.append(Character.toChars(randomChar));
} else {
patternBuffer.append((char) randomChar);
}
}
String pattern = patternBuffer.toString();
Pattern p = Pattern.compile(pattern, Pattern.UNICODE_CASE);
// Create a buffer with random chars that does not match the sample
String toSearch = null;
StringBuffer s = null;
Matcher m = p.matcher(""); do {
s = new StringBuffer(100); for (int x=0; x<100; x++) { int randomChar = 0; while (!Character.isLetterOrDigit(randomChar))
randomChar = generator.nextInt(maxCharacter); if (Character.isSupplementaryCodePoint(randomChar)) {
s.append(Character.toChars(randomChar));
} else {
s.append((char) randomChar);
}
}
toSearch = s.toString();
m.reset(toSearch);
} while (m.find());
// Insert the pattern at a random spot int insertIndex = generator.nextInt(99); if (Character.isLowSurrogate(s.charAt(insertIndex)))
insertIndex++;
s.insert(insertIndex, pattern);
toSearch = s.toString();
// Make sure that the pattern is found
m.reset(toSearch);
assertTrue(m.find());
// Make sure that the match text is the pattern
assertEquals(m.group(), pattern);
// Make sure match occured at insertion point
assertEquals(m.start(), insertIndex);
}
}
// Testing examples from a file
/** * Goes through the file "TestCases.txt" and creates many patterns * described in the file, matching the patterns against input lines in * the file, and comparing the results against the correct results * also found in the file. The file format is described in comments * at the head of the file.
*/ publicstaticvoid processFile(String fileName) throws IOException {
File testCases = new File(System.getProperty("test.src", "."),
fileName);
FileInputStream in = new FileInputStream(testCases);
BufferedReader r = new BufferedReader(new InputStreamReader(in));
// Process next test case.
String aLine; while((aLine = r.readLine()) != null) { // Read a line for pattern
String patternString = grabLine(r);
Pattern p = null; try {
p = compileTestPattern(patternString);
} catch (PatternSyntaxException e) {
String dataString = grabLine(r);
String expectedResult = grabLine(r); if (expectedResult.startsWith("error")) continue;
String line1 = "----------------------------------------";
String line2 = "Pattern = " + patternString;
String line3 = "Data = " + dataString;
fail(line1 + System.lineSeparator() + line2 + System.lineSeparator() + line3 + System.lineSeparator()); continue;
}
// Read a line for input string
String dataString = grabLine(r);
Matcher m = p.matcher(dataString);
StringBuilder result = new StringBuilder();
// Check for IllegalStateExceptions before a match
preMatchInvariants(m);
boolean found = m.find();
if (found)
postTrueMatchInvariants(m); else
postFalseMatchInvariants(m);
if (flagString.equals("i")) return Pattern.compile(patternString, Pattern.CASE_INSENSITIVE);
if (flagString.equals("m")) return Pattern.compile(patternString, Pattern.MULTILINE);
return Pattern.compile(patternString);
}
/** * Reads a line from the input file. Keeps reading lines until a non * empty non comment line is read. If the line contains a \n then * these two characters are replaced by a newline char. If a \\uxxxx * sequence is read then the sequence is replaced by the unicode char.
*/ publicstatic String grabLine(BufferedReader r) throws IOException { int index = 0;
String line = r.readLine(); while (line.startsWith("//") || line.length() < 1)
line = r.readLine(); while ((index = line.indexOf("\\n")) != -1) {
StringBuilder temp = new StringBuilder(line);
temp.replace(index, index+2, "\n");
line = temp.toString();
} while ((index = line.indexOf("\\u")) != -1) {
StringBuilder temp = new StringBuilder(line);
String value = temp.substring(index+2, index+6); char aChar = (char)Integer.parseInt(value, 16);
String unicodeChar = "" + aChar;
temp.replace(index, index+6, unicodeChar);
line = temp.toString();
}
// This is for bug 6919132
@Test publicstaticvoid nonBmpClassComplementTest() {
Pattern p = Pattern.compile("\\P{Lu}");
Matcher m = p.matcher(new String(newint[] {0x1d400}, 0, 1));
assertFalse(m.find() && m.start() == 1);
// from a unicode category
p = Pattern.compile("\\P{Lu}");
m = p.matcher(new String(newint[] {0x1d400}, 0, 1));
assertFalse(m.find());
assertTrue(m.hitEnd());
// block
p = Pattern.compile("\\P{InMathematicalAlphanumericSymbols}");
m = p.matcher(new String(newint[] {0x1d400}, 0, 1));
assertFalse(m.find() && m.start() == 1);
p = Pattern.compile("\\P{sc=GRANTHA}");
m = p.matcher(new String(newint[] {0x11350}, 0, 1));
assertFalse(m.find() && m.start() == 1);
}
for (int cp = 1; cp < Character.MAX_CODE_POINT; cp++) { if (cp >= 0x30000 && (cp & 0x70) == 0){ continue; // only pick couple code points, they are the same
}
// Unicode Script
Character.UnicodeScript script = Character.UnicodeScript.of(cp);
Matcher m;
String str = new String(Character.toChars(cp)); if (script == lastScript) {
m = lastSM;
m.reset(str);
} else {
m = Pattern.compile("\\p{Is" + script.name() + "}").matcher(str);
}
assertTrue(m.matches());
Matcher other = (script == Character.UnicodeScript.COMMON)? unknown : common;
other.reset(str);
assertFalse(other.matches());
lastSM = m;
lastScript = script;
// Unicode Block
Character.UnicodeBlock block = Character.UnicodeBlock.of(cp); if (block == null) { //System.out.printf("Not a Block: cp=%x%n", cp); continue;
} if (block == lastBlock) {
m = lastBM;
m.reset(str);
} else {
m = Pattern.compile("\\p{block=" + block.toString() + "}").matcher(str);
}
assertTrue(m.matches());
other = (block == Character.UnicodeBlock.BASIC_LATIN)? greek : latin;
other.reset(str);
assertFalse(other.matches());
lastBM = m;
lastBlock = block;
}
}
for (int cp = 0; cp < Character.MAX_CODE_POINT; cp++) { if (!Character.isValidCodePoint(cp) ||
Character.getType(cp) == Character.UNASSIGNED) continue;
String str = new String(Character.toChars(cp)); // single
String p = "\\N{" + Character.getName(cp) + "}";
assertTrue(Pattern.compile(p).matcher(str).matches()); // class[c]
p = "[\\N{" + Character.getName(cp) + "}]";
assertTrue(Pattern.compile(p).matcher(str).matches());
}
// range for (int i = 0; i < 10; i++) { int start = generator.nextInt(20); int end = start + generator.nextInt(200);
String p = "[\\N{" + Character.getName(start) + "}-\\N{" + Character.getName(end) + "}]";
String str; for (int cp = start; cp < end; cp++) {
str = new String(Character.toChars(cp));
assertTrue(Pattern.compile(p).matcher(str).matches());
}
str = new String(Character.toChars(end + 10));
assertFalse(Pattern.compile(p).matcher(str).matches());
}
// slice for (int i = 0; i < 10; i++) { int n = generator.nextInt(256); int[] buf = newint[n];
StringBuilder sb = new StringBuilder(1024); for (int j = 0; j < n; j++) { int cp = generator.nextInt(1000); if (!Character.isValidCodePoint(cp) ||
Character.getType(cp) == Character.UNASSIGNED)
cp = 0x4e00; // just use 4e00
sb.append("\\N{").append(Character.getName(cp)).append("}");
buf[j] = cp;
}
String p = sb.toString();
String str = new String(buf, 0, buf.length);
assertTrue(Pattern.compile(p).matcher(str).matches());
}
}
@Test publicstaticvoid horizontalAndVerticalWSTest() {
String hws = new String (newchar[] {
0x09, 0x20, 0xa0, 0x1680, 0x180e,
0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005,
0x2006, 0x2007, 0x2008, 0x2009, 0x200a,
0x202f, 0x205f, 0x3000 });
String vws = new String (newchar[] {
0x0a, 0x0b, 0x0c, 0x0d, 0x85, 0x2028, 0x2029 });
assertTrue(Pattern.compile("\\h+").matcher(hws).matches() &&
Pattern.compile("[\\h]+").matcher(hws).matches());
assertTrue(!Pattern.compile("\\H").matcher(hws).find() &&
!Pattern.compile("[\\H]").matcher(hws).find());
assertTrue(Pattern.compile("\\v+").matcher(vws).matches() &&
Pattern.compile("[\\v]+").matcher(vws).matches());
assertTrue(!Pattern.compile("\\V").matcher(vws).find() &&
!Pattern.compile("[\\V]").matcher(vws).find());
String prefix = "abcd";
String suffix = "efgh";
String ng = "A"; for (int i = 0; i < hws.length(); i++) {
String c = String.valueOf(hws.charAt(i));
Matcher m = Pattern.compile("\\h").matcher(prefix + c + suffix);
assertTrue(m.find() && c.equals(m.group()));
m = Pattern.compile("[\\h]").matcher(prefix + c + suffix);
assertTrue(m.find() && c.equals(m.group()));
String matcherSubstring = hws.substring(0, i) + ng + hws.substring(i);
m = Pattern.compile("\\H").matcher(matcherSubstring);
assertTrue(m.find() && ng.equals(m.group()));
m = Pattern.compile("[\\H]").matcher(matcherSubstring);
assertTrue(m.find() && ng.equals(m.group()));
} for (int i = 0; i < vws.length(); i++) {
String c = String.valueOf(vws.charAt(i));
Matcher m = Pattern.compile("\\v").matcher(prefix + c + suffix);
assertTrue(m.find() && c.equals(m.group()));
m = Pattern.compile("[\\v]").matcher(prefix + c + suffix);
assertTrue(m.find() && c.equals(m.group()));
String matcherSubstring = vws.substring(0, i) + ng + vws.substring(i);
m = Pattern.compile("\\V").matcher(matcherSubstring);
assertTrue(m.find() && ng.equals(m.group()));
m = Pattern.compile("[\\V]").matcher(matcherSubstring);
assertTrue(m.find() && ng.equals(m.group()));
} // \v in range is interpreted as 0x0B. This is the undocumented behavior
assertTrue(Pattern.compile("[\\v-\\v]").matcher(String.valueOf((char)0x0B)).matches());
}
// This test is for 8007395
@Test publicstaticvoid groupCurlyNotFoundSuppTest() {
String input = "test this as \ud83d\ude0d"; for (String pStr : new String[] { "test(.)+(@[a-zA-Z.]+)", "test(.)*(@[a-zA-Z.]+)", "test([^B])+(@[a-zA-Z.]+)", "test([^B])*(@[a-zA-Z.]+)", "test(\\P{IsControl})+(@[a-zA-Z.]+)", "test(\\P{IsControl})*(@[a-zA-Z.]+)",
}) {
Matcher m = Pattern.compile(pStr, Pattern.CASE_INSENSITIVE)
.matcher(input);
assertFalse(m.find());
}
}
// This test is for 8023647
@Test publicstaticvoid groupCurlyBackoffTest() {
assertFalse(!"abc1c".matches("(\\w)+1\\1") || "abc11".matches("(\\w)+1\\1"));
}
// This test is for 8012646
@Test publicstaticvoid patternAsPredicate() {
Predicate<String> p = Pattern.compile("[a-z]+").asPredicate();
// This test is for 8035975
@Test publicstaticvoid invalidFlags() { for (int flag = 1; flag != 0; flag <<= 1) { switch (flag) { case Pattern.CASE_INSENSITIVE: case Pattern.MULTILINE: case Pattern.DOTALL: case Pattern.UNICODE_CASE: case Pattern.CANON_EQ: case Pattern.UNIX_LINES: case Pattern.LITERAL: case Pattern.UNICODE_CHARACTER_CLASS: case Pattern.COMMENTS: // valid flag, continue break; default: int finalFlag = flag;
assertThrows(IllegalArgumentException.class, () ->
Pattern.compile(".", finalFlag));
}
}
}
// This test is for 8158482
@Test publicstaticvoid embeddedFlags() { //Runs without exception.
Pattern.compile("(?i).(?-i).");
Pattern.compile("(?m).(?-m).");
Pattern.compile("(?s).(?-s).");
Pattern.compile("(?d).(?-d).");
Pattern.compile("(?u).(?-u).");
Pattern.compile("(?c).(?-c).");
Pattern.compile("(?x).(?-x).");
Pattern.compile("(?U).(?-U).");
Pattern.compile("(?imsducxU).(?-imsducxU).");
}
@Test publicstaticvoid grapheme() throws Exception { finalint[] lineNumber = newint[1];
Stream.concat(Files.lines(UCDFiles.GRAPHEME_BREAK_TEST),
Files.lines(Paths.get(System.getProperty("test.src", "."), "GraphemeTestCases.txt")))
.forEach( ln -> {
lineNumber[0]++; if (ln.length() == 0 || ln.startsWith("#")) { return;
}
ln = ln.replaceAll("\\s+|\\([a-zA-Z]+\\)|\\[[a-zA-Z]]+\\]|#.*", ""); // System.out.println(str);
String[] strs = ln.split("\u00f7|\u00d7");
StringBuilder src = new StringBuilder();
ArrayList<String> graphemes = new ArrayList<>();
StringBuilder buf = new StringBuilder(); int offBk = 0; for (String str : strs) { if (str.length() == 0) // first empty str continue; int cp = Integer.parseInt(str, 16);
src.appendCodePoint(cp);
buf.appendCodePoint(cp);
offBk += (str.length() + 1); if (ln.charAt(offBk) == '\u00f7') { // DIV
graphemes.add(buf.toString());
buf = new StringBuilder();
}
}
Pattern p = Pattern.compile("\\X"); // (1) test \X directly
Matcher m = p.matcher(src.toString()); for (String g : graphemes) { // System.out.printf(" grapheme:=[%s]%n", g);
String group = null; if (!m.find() || !(group = m.group()).equals(g)) {
fail("Failed pattern \\X [" + ln + "] : "
+ "expected: " + g + " - actual: " + group
+ "(line " + lineNumber[0] + ")");
}
}
assertFalse(m.find()); // test \b{g} without \X via Pattern
Pattern pbg = Pattern.compile("\\b{g}");
m = pbg.matcher(src.toString());
m.find(); int prev = m.end(); for (String g : graphemes) {
String group = null; if (!m.find() || !(group = src.substring(prev, m.end())).equals(g)) {
fail("Failed pattern \\b{g} [" + ln + "] : "
+ "expected: " + g + " - actual: " + group
+ "(line " + lineNumber[0] + ")");
}
assertEquals("", m.group());
prev = m.end();
}
assertFalse(m.find()); // (2) test \b{g} + \X via Scanner
Scanner s = new Scanner(src.toString()).useDelimiter("\\b{g}"); for (String g : graphemes) {
String next = null; if (!s.hasNext(p) || !(next = s.next(p)).equals(g)) {
fail("Failed \\b{g} [" + ln + "] : "
+ "expected: " + g + " - actual: " + next
+ " (line " + lineNumber[0] + ")");
}
}
assertFalse(s.hasNext(p)); // test \b{g} without \X via Scanner
s = new Scanner(src.toString()).useDelimiter("\\b{g}"); for (String g : graphemes) {
String next = null; if (!s.hasNext() || !(next = s.next()).equals(g)) {
fail("Failed \\b{g} [" + ln + "] : "
+ "expected: " + g + " - actual: " + next
+ " (line " + lineNumber[0] + ")");
}
}
assertFalse(s.hasNext());
}); // some sanity checks
assertTrue(Pattern.compile("\\X{10}").matcher("abcdefghij").matches() &&
Pattern.compile("\\b{g}(?:\\X\\b{g}){5}\\b{g}").matcher("abcde").matches() &&
Pattern.compile("(?:\\X\\b{g}){2}").matcher("\ud800\udc00\ud801\udc02").matches()); // make sure "\b{n}" still works
assertTrue(Pattern.compile("\\b{1}hello\\b{1} \\b{1}world\\b{1}").matcher("hello world").matches());
}
// hangup/timeout if go into exponential backtracking
@Test publicstaticvoid expoBacktracking() {
Object[][] patternMatchers = { // 6328855
{ "(.*\n*)*", "this little fine string lets\r\njava.lang.String.matches\r\ncrash\r\n(We don't know why but adding \r* to the regex makes it work again)", false }, // 6192895
{ " *([a-zA-Z0-9/\\-\\?:\\(\\)\\.,'\\+\\{\\}]+ *)+", "Hello World this is a test this is a test this is a test A", true },
{ " *([a-zA-Z0-9/\\-\\?:\\(\\)\\.,'\\+\\{\\}]+ *)+", "Hello World this is a test this is a test this is a test \u4e00 ", false },
{ " *([a-z0-9]+ *)+", "hello world this is a test this is a test this is a test A", false }, // 4771934 [FIXED] #5013651?
{ "^(\\w+([\\.-]?\\w+)*@\\w+([\\.-]?\\w+)*(\\.\\w{2,4})+[,;]?)+$", "abc@efg.abc,efg@abc.abc,abc@xyz.mno;abc@sdfsd.com", true }, // 4866249 [FIXED]
{ "<\\s*" + "(meta|META)" + "(\\s|[^>])+" + "(CHARSET|charset)=" + "(\\s|[^>])+>", "<META http-equiv=\"Content-Type\" content=\"text/html; charset=ISO-8859-5\">", true },
{ "^(\\w+([\\.-]?\\w+)*@\\w+([\\.-]?\\w+)*(\\.\\w{2,4})+[,;]?)+$", "abc@efg.abc,efg@abc.abc,abc@xyz.mno;sdfsd.com", false }, // 6345469
{ "((<[^>]+>)?(((\\s)?)*(\\ )?)*((\\s)?)*)+", " < br/> < / p> <p> <html> <adfasfdasdf> </p>", true }, // --> matched
{ "((<[^>]+>)?(((\\s)?)*(\\ )?)*((\\s)?)*)+", " < br/> < / p> <p> <html> <adfasfdasdf> p </p>", false }, // 5026912
{ "^\\s*" + "(\\w|\\d|[\\xC0-\\xFF]|/)+" + "\\s+|$", "156580451111112225588087755221111111566969655555555", false}, // 6988218
{ "^([+-]?((0[xX](\\p{XDigit}+))|(((\\p{Digit}+)(\\.)?((\\p{Digit}+)?)([eE][+-]?(\\p{Digit}+))?)|(\\.((\\p{Digit}+))([eE][+-]?(\\p{Digit}+))?)))|[n|N]?'([^']*(?:'')*[^']*)*')", "'%)) order by ANGEBOT.ID", false}, // find // 6693451
{ "^(\\s*foo\\s*)*$", "foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo", true },
{ "^(\\s*foo\\s*)*$", "foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo fo", false
}, // 7006761
{ "(([0-9A-Z]+)([_]?+)*)*", "FOOOOO_BAAAR_FOOOOOOOOO_BA_", true},
{ "(([0-9A-Z]+)([_]?+)*)*", "FOOOOO_BAAAR_FOOOOOOOOO_BA_ ", false}, // 8140212
{ "(?<before>.*)\\{(?<reflection>\\w+):(?<innerMethod>\\w+(\\.?\\w+(\\(((?<args>(('[^']*')|((/|\\w)+))(,(('[^']*')|((/|\\w)+)))*))?\\))?)*)\\}(?<after>.*)", "{CeGlobal:getSodCutoff.getGui.getAmqp.getSimpleModeEnabled()", false
},
{ "^(a+)+$", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", true},
{ "^(a+)+$", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa!", false},
/* not fixed //8132141 ---> second level exponential backtracking { "(h|h|ih(((i|a|c|c|a|i|i|j|b|a|i|b|a|a|j))+h)ahbfhba|c|i)*", "hchcchicihcchciiicichhcichcihcchiihichiciiiihhcchicchhcihchcihiihciichhccciccichcichiihcchcihhicchcciicchcccihiiihhihihihichicihhcciccchihhhcchichchciihiicihciihcccciciccicciiiiiiiiicihhhiiiihchccchchhhhiiihchihcccchhhiiiiiiiicicichicihcciciihichhhhchihciiihhiccccccciciihhichiccchhicchicihihccichicciihcichccihhiciccccccccichhhhihihhcchchihihiihhihihihicichihiiiihhhhihhhchhichiicihhiiiiihchccccchichci" },
*/
};
for (Object[] pm : patternMatchers) {
String p = (String)pm[0];
String s = (String)pm[1]; boolean r = (Boolean)pm[2];
assertEquals(r, Pattern.compile(p).matcher(s).matches());
}
}
@Test publicstaticvoid invalidGroupName() { // Invalid start of a group name for (String groupName : List.of("", ".", "0", "\u0040", "\u005b", "\u0060", "\u007b", "\u0416")) { for (String pat : List.of("(?<" + groupName + ">)", "\\k<" + groupName + ">")) { var e = expectThrows(PatternSyntaxException.class, () -> Pattern.compile(pat));
assertTrue(e.getMessage().startsWith( "capturing group name does not start with a"
+ " Latin letter"));
}
} // Invalid char in a group name for (String groupName : List.of("a.", "b\u0040", "c\u005b", "d\u0060", "e\u007b", "f\u0416")) { for (String pat : List.of("(?<" + groupName + ">)", "\\k<" + groupName + ">")) { var e = expectThrows(PatternSyntaxException.class, () ->
Pattern.compile(pat));
assertTrue(e.getMessage().startsWith( "named capturing group is missing trailing '>'"));
}
}
}
@Test publicstaticvoid illegalRepetitionRange() { // huge integers > (2^31 - 1)
String n = BigInteger.valueOf(1L << 32)
.toString();
String m = BigInteger.valueOf(1L << 31)
.add(new BigInteger(80, generator))
.toString(); for (String rep : List.of("", "x", ".", ",", "-1", "2,1",
n, n + ",", "0," + n, n + "," + m, m, m + ",", "0," + m)) {
String pat = ".{" + rep + "}"; var e = expectThrows(PatternSyntaxException.class, () ->
Pattern.compile(pat));
assertTrue(e.getMessage().startsWith("Illegal repetition"));
}
}
@Test publicstaticvoid surrogatePairWithCanonEq() { //Runs without exception
Pattern.compile("\ud834\udd21", Pattern.CANON_EQ);
}
//This test is for 8037397 //Ensure we don't drop nested interior character classes to the right of an //intersection operator.
@Test publicstaticvoid droppedClassesWithIntersection() {
String rx = "[A-Z&&[A-Z]0-9]";
String ry = "[A-Z&&[A-F][G-Z]0-9]";
assertTrue(letterCharsMatch, "Compiling intersection pattern is " + "dropping a character class in its matcher");
assertTrue(digitCharsDontMatch, "Compiling intersection pattern is " + "matching digits where it should not");
}
//This test is for 8269753 //This is for ensuring that the caret doesn't point at the wrong character //in a syntax exception message because we previously didn't compensate for //tabs when rendering the offending string that contained tab characters.
@Test publicstaticvoid errorMessageCaretIndentation() {
String pattern = "\t**"; var e = expectThrows(PatternSyntaxException.class, () ->
Pattern.compile(pattern)); var sep = System.lineSeparator();
assertTrue(e.getMessage().contains(sep + "\t ^"));
}
//This test is for 8276694 //Ensure our error message indicates we have an unescaped backslash when we //encounter one.
@Test publicstaticvoid unescapedBackslash() {
String pattern = "\\"; var e = expectThrows(PatternSyntaxException.class, () ->
Pattern.compile(pattern));
assertTrue(e.getMessage().contains("Unescaped trailing backslash"));
}
//This test is for 8280403 //Given bad intersection syntax, we should throw a PatternSyntaxException.
@Test publicstaticvoid badIntersectionSyntax() {
String pattern = "[˜\\H +F&&]"; var e = expectThrows(PatternSyntaxException.class, () ->
Pattern.compile(pattern));
assertTrue(e.getMessage().contains("Bad intersection syntax"));
}
//This test is for 8264160 //Here we check for inconsistencies between the behavior of \w and the //behavior of \b. Prior to this fix, the two flags did not behave in a //consistent way ie \b would recognize non-\w characters as part of a word //in some cases. This test verifies that the two behave consistently //for all codepoints we support.
@Test publicstaticvoid wordBoundaryInconsistencies() {
Pattern basicWordCharPattern = Pattern.compile("\\w");
Pattern basicWordCharBoundaryPattern =
Pattern.compile(";\\b.", Pattern.DOTALL);
//This test is for 8281560 //Checks that when the Canonical Equivalence flag is set, the behavior for //Matcher::hitEnd is equivalent for these similar, patterns that saw //inconsistencies.
@Test publicstaticvoid prematureHitEndInNFCCharProperty() { var testInput = "a1a1"; var pat1 = "(a+|1+)"; var pat2 = "([a]+|[1]+)";
var matcher1 = Pattern.compile(pat1, Pattern.CANON_EQ).matcher(testInput); var matcher2 = Pattern.compile(pat2, Pattern.CANON_EQ).matcher(testInput);
ArrayList<Boolean> results1 = new ArrayList<>();
ArrayList<Boolean> results2 = new ArrayList<>();
while (matcher1.find()) {
results1.add(matcher1.hitEnd());
}
while (matcher2.find()) {
results2.add(matcher2.hitEnd());
}
assertEquals(results1, results2);
}
//This test is for 8281315 //Checks that we are able to correctly match this case with a backref //without encountering an IndexOutOfBoundsException.
@Test publicstaticvoid iOOBForCIBackrefs(){
String line = "\ud83d\udc95\ud83d\udc95\ud83d\udc95"; var pattern2 = Pattern.compile("(?i)(.)\\1{2,}");
assertTrue(pattern2.matcher(line).find());
}
}
Messung V0.5 in Prozent
¤ Dauer der Verarbeitung: 0.103 Sekunden
(vorverarbeitet am 2026-04-26)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.