/*
* Copyright (c) 1999, 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
/**
* @test
* @summary tests RegExp framework (use -Dseed=X to set PRNG seed)
* @bug 4481568 4482696 4495089 4504687 4527731 4599621 4631553 4619345
* 4630911 4672616 4711773 4727935 4750573 4792284 4803197 4757029 4808962
* 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476
* 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940
* 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133
* 6350801 6676425 6878475 6919132 6931676 6948903 6990617 7014645 7039066
* 7067045 7014640 7189363 8007395 8013252 8013254 8012646 8023647 6559590
* 8027645 8035076 8039124 8035975 8074678 6854417 8143854 8147531 7071819
* 8151481 4867170 7080302 6728861 6995635 6736245 4916384 6328855 6192895
* 6345469 6988218 6693451 7006761 8140212 8143282 8158482 8176029 8184706
* 8194667 8197462 8184692 8221431 8224789 8228352 8230829 8236034 8235812
* 8216332 8214245 8237599 8241055 8247546 8258259 8037397 8269753 8276694
* 8280403 8264160 8281315
* @library /test/lib
* @library /lib/testlibrary/java/lang
* @build jdk.test.lib.RandomFactory
* @author Mike McCloskey
* @run testng RegExTest
* @key randomness
*/
import java.io.*;
import java.math.BigInteger;
import java.nio.CharBuffer;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.*;
import java.util.function.Function;
import java.util.function.IntFunction;
import java.util.function.Predicate;
import java.util.regex.Matcher;
import java.util.regex.MatchResult;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import java.util.stream.IntStream;
import java.util.stream.Stream;
import org.testng.annotations.Test;
import org.testng.Assert;
import jdk.test.lib.RandomFactory;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertFalse;
import static org.testng.Assert.assertNotEquals;
import static org.testng.Assert.assertNotSame;
import static org.testng.Assert.assertThrows;
import static org.testng.Assert.assertTrue;
import static org.testng.Assert.fail;
import static org.testng.Assert.expectThrows;
/**
* This is a test class created to check the operation of
* the Pattern and Matcher classes.
*/
public class RegExTest {
private static final Random generator = RandomFactory.getRandom();
// Utility functions
private static String getRandomAlphaString(int length) {
StringBuilder buf = new StringBuilder(length);
for (int i=0; i<length; i++) {
char randChar = (char)(97 + generator.nextInt(26));
buf.append(randChar);
}
return buf.toString();
}
private static void check(Matcher m, String expected) {
m.find();
assertEquals(m.group(), expected);
}
private static void check(Matcher m, String result, boolean expected) {
m.find();
assertEquals(m.group().equals(result), expected);
}
private static void check(Pattern p, String s, boolean expected) {
assertEquals(p.matcher(s).find(), expected);
}
private static void check(String p, String s, boolean expected) {
Matcher matcher = Pattern.compile(p).matcher(s);
assertEquals(matcher.find(), expected);
}
private static void check(String p, char c, boolean expected) {
String propertyPattern = expected ? "\\p" + p : "\\P" + p;
Pattern pattern = Pattern.compile(propertyPattern);
char[] ca = new char[1]; ca[0] = c;
Matcher matcher = pattern.matcher(new String(ca));
assertTrue(matcher.find());
}
private static void check(String p, int codePoint, boolean expected) {
String propertyPattern = expected ? "\\p" + p : "\\P" + p;
Pattern pattern = Pattern.compile(propertyPattern);
char[] ca = Character.toChars(codePoint);
Matcher matcher = pattern.matcher(new String(ca));
assertTrue(matcher.find());
}
private static void check(String p, int flag, String input, String s,
boolean expected)
{
Pattern pattern = Pattern.compile(p, flag);
Matcher matcher = pattern.matcher(input);
if (expected)
check(matcher, s, expected);
else
check(pattern, input, expected);
}
private static void check(Pattern p, String s, String g, String expected) {
Matcher m = p.matcher(s);
m.find();
assertFalse(!m.group(g).equals(expected) ||
s.charAt(m.start(g)) != expected.charAt(0) ||
s.charAt(m.end(g) - 1) != expected.charAt(expected.length() - 1));
}
private static void checkReplaceFirst(String p, String s, String r, String expected)
{
assertEquals(expected, Pattern.compile(p).matcher(s).replaceFirst(r));
}
private static void checkReplaceAll(String p, String s, String r, String expected)
{
assertEquals(expected, Pattern.compile(p).matcher(s).replaceAll(r));
}
private static void checkExpectedFail(String p) {
assertThrows(PatternSyntaxException.class, () ->
Pattern.compile(p));
}
/**
* Converts ASCII alphabet characters [A-Za-z] in the given 's' to
* supplementary characters. This method does NOT fully take care
* of the regex syntax.
*/
public static String toSupplementaries(String s) {
int length = s.length();
StringBuilder sb = new StringBuilder(length * 2);
for (int i = 0; i < length; ) {
char c = s.charAt(i++);
if (c == '\\') {
sb.append(c);
if (i < length) {
c = s.charAt(i++);
sb.append(c);
if (c == 'u') {
// assume no syntax error
sb.append(s.charAt(i++));
sb.append(s.charAt(i++));
sb.append(s.charAt(i++));
sb.append(s.charAt(i++));
}
}
} else if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) {
sb.append('\ud800').append((char)('\udc00'+c));
} else {
sb.append(c);
}
}
return sb.toString();
}
// Regular expression tests
//Following three tests execute from a file.
@Test
public static void processTestCases() throws IOException {
processFile("TestCases.txt");
}
@Test
public static void processBMPTestCases() throws IOException {
processFile("BMPTestCases.txt");
}
@Test
public static void processSupplementaryTestCases() throws IOException {
processFile("SupplementaryTestCases.txt");
}
@Test
public static void nullArgumentTest() {
assertThrows(NullPointerException.class, () -> Pattern.compile(null));
assertThrows(NullPointerException.class, () -> Pattern.matches(null, null));
assertThrows(NullPointerException.class, () -> Pattern.matches("xyz", null));
assertThrows(NullPointerException.class, () -> Pattern.quote(null));
assertThrows(NullPointerException.class, () -> Pattern.compile("xyz").split(null));
assertThrows(NullPointerException.class, () -> Pattern.compile("xyz").matcher(null));
final Matcher m = Pattern.compile("xyz").matcher("xyz");
m.matches();
assertThrows(NullPointerException.class, () -> m.appendTail((StringBuffer) null));
assertThrows(NullPointerException.class, () -> m.appendTail((StringBuilder)null));
assertThrows(NullPointerException.class, () -> m.replaceAll((String) null));
assertThrows(NullPointerException.class, () -> m.replaceAll((Function<MatchResult, String>)null));
assertThrows(NullPointerException.class, () -> m.replaceFirst((String)null));
assertThrows(NullPointerException.class, () -> m.replaceFirst((Function<MatchResult, String>) null));
assertThrows(NullPointerException.class, () -> m.appendReplacement((StringBuffer)null, null));
assertThrows(NullPointerException.class, () -> m.appendReplacement((StringBuilder)null, null));
assertThrows(NullPointerException.class, () -> m.reset(null));
assertThrows(NullPointerException.class, () -> Matcher.quoteReplacement(null));
//check(() -> m.usePattern(null));
}
// This is for bug6635133
// Test if surrogate pair in Unicode escapes can be handled correctly.
@Test
public static void surrogatesInClassTest() {
Pattern pattern = Pattern.compile("[\\ud834\\udd21-\\ud834\\udd24]");
Matcher matcher = pattern.matcher("\ud834\udd22");
assertTrue(matcher.find(), "Surrogate pair in Unicode escape");
}
// This is for bug6990617
// Test if Pattern.RemoveQEQuoting works correctly if the octal unicode
// char encoding is only 2 or 3 digits instead of 4 and the first quoted
// char is an octal digit.
@Test
public static void removeQEQuotingTest() {
Pattern pattern =
Pattern.compile("\\011\\Q1sometext\\E\\011\\Q2sometext\\E");
Matcher matcher = pattern.matcher("\t1sometext\t2sometext");
assertTrue(matcher.find(), "Remove Q/E Quoting");
}
// This is for bug 4988891
// Test toMatchResult to see that it is a copy of the Matcher
// that is not affected by subsequent operations on the original
@Test
public static void toMatchResultTest() {
Pattern pattern = Pattern.compile("squid");
Matcher matcher = pattern.matcher(
"agiantsquidofdestinyasmallsquidoffate");
matcher.find();
int matcherStart1 = matcher.start();
MatchResult mr = matcher.toMatchResult();
assertNotSame(mr, matcher, "Matcher toMatchResult is identical object");
int resultStart1 = mr.start();
assertEquals(matcherStart1, resultStart1, "equal matchers don't have equal start indices");
matcher.find();
int matcherStart2 = matcher.start();
int resultStart2 = mr.start();
assertNotEquals(matcherStart2, resultStart2, "Matcher2 and Result2 should not be equal");
assertEquals(resultStart1, resultStart2, "Second match result should have the same state");
MatchResult mr2 = matcher.toMatchResult();
assertNotSame(mr, mr2, "Second Matcher copy should not be identical to the first.");
assertEquals(mr2.start(), matcherStart2, "mr2 index should equal matcher index");
}
// This is for bug 8074678
// Test the result of toMatchResult throws ISE if no match is availble
@Test
public static void toMatchResultTest2() {
Matcher matcher = Pattern.compile("nomatch").matcher("hello world");
matcher.find();
MatchResult mr = matcher.toMatchResult();
assertThrows(IllegalStateException.class, mr::start);
assertThrows(IllegalStateException.class, () -> mr.start(2));
assertThrows(IllegalStateException.class, mr::end);
assertThrows(IllegalStateException.class, () -> mr.end(2));
assertThrows(IllegalStateException.class, mr::group);
assertThrows(IllegalStateException.class, () -> mr.group(2));
matcher = Pattern.compile("(match)").matcher("there is a match");
matcher.find();
MatchResult mr2 = matcher.toMatchResult();
assertThrows(IndexOutOfBoundsException.class, () -> mr2.start(2));
assertThrows(IndexOutOfBoundsException.class, () -> mr2.end(2));
assertThrows(IndexOutOfBoundsException.class, () -> mr2.group(2));
}
// This is for bug 5013885
// Must test a slice to see if it reports hitEnd correctly
@Test
public static void hitEndTest() {
// Basic test of Slice node
Pattern p = Pattern.compile("^squidattack");
Matcher m = p.matcher("squack");
m.find();
assertFalse(m.hitEnd(), "Matcher should not be at end of sequence");
m.reset("squid");
m.find();
assertTrue(m.hitEnd(), "Matcher should be at the end of sequence");
// Test Slice, SliceA and SliceU nodes
for (int i=0; i<3; i++) {
int flags = 0;
if (i==1) flags = Pattern.CASE_INSENSITIVE;
if (i==2) flags = Pattern.UNICODE_CASE;
p = Pattern.compile("^abc", flags);
m = p.matcher("ad");
m.find();
assertFalse(m.hitEnd(), "Slice node test");
m.reset("ab");
m.find();
assertTrue(m.hitEnd(), "Slice node test");
}
// Test Boyer-Moore node
p = Pattern.compile("catattack");
m = p.matcher("attack");
m.find();
assertTrue(m.hitEnd(), "Boyer-Moore node test");
p = Pattern.compile("catattack");
m = p.matcher("attackattackattackcatatta");
m.find();
assertTrue(m.hitEnd(), "Boyer-More node test");
// 8184706: Matching u+0d at EOL against \R should hit-end
p = Pattern.compile("...\\R");
m = p.matcher("cat" + (char)0x0a);
m.find();
assertFalse(m.hitEnd());
m = p.matcher("cat" + (char)0x0d);
m.find();
assertTrue(m.hitEnd());
m = p.matcher("cat" + (char)0x0d + (char)0x0a);
m.find();
assertFalse(m.hitEnd());
}
// This is for bug 4997476
// It is weird code submitted by customer demonstrating a regression
@Test
public static void wordSearchTest() {
String testString = "word1 word2 word3";
Pattern p = Pattern.compile("\\b");
Matcher m = p.matcher(testString);
int position = 0;
int start;
while (m.find(position)) {
start = m.start();
if (start == testString.length())
break;
if (m.find(start+1)) {
position = m.start();
} else {
position = testString.length();
}
if (testString.substring(start, position).equals(" "))
continue;
assertTrue(testString.substring(start, position-1).startsWith("word"));
}
}
// This is for bug 4994840
@Test
public static void caretAtEndTest() {
// Problem only occurs with multiline patterns
// containing a beginning-of-line caret "^" followed
// by an expression that also matches the empty string.
Pattern pattern = Pattern.compile("^x?", Pattern.MULTILINE);
Matcher matcher = pattern.matcher("\r");
matcher.find();
matcher.find();
}
// This test is for 4979006
// Check to see if word boundary construct properly handles unicode
// non spacing marks
@Test
public static void unicodeWordBoundsTest() {
String spaces = " ";
String wordChar = "a";
String nsm = "\u030a";
assert (Character.getType('\u030a') == Character.NON_SPACING_MARK);
Pattern pattern = Pattern.compile("\\b");
Matcher matcher = pattern.matcher("");
// S=other B=word character N=non spacing mark .=word boundary
// SS.BB.SS
String input = spaces + wordChar + wordChar + spaces;
twoFindIndexes(input, matcher, 2, 4);
// SS.BBN.SS
input = spaces + wordChar +wordChar + nsm + spaces;
twoFindIndexes(input, matcher, 2, 5);
// SS.BN.SS
input = spaces + wordChar + nsm + spaces;
twoFindIndexes(input, matcher, 2, 4);
// SS.BNN.SS
input = spaces + wordChar + nsm + nsm + spaces;
twoFindIndexes(input, matcher, 2, 5);
// SSN.BB.SS
input = spaces + nsm + wordChar + wordChar + spaces;
twoFindIndexes(input, matcher, 3, 5);
// SS.BNB.SS
input = spaces + wordChar + nsm + wordChar + spaces;
twoFindIndexes(input, matcher, 2, 5);
// SSNNSS
input = spaces + nsm + nsm + spaces;
matcher.reset(input);
assertFalse(matcher.find());
// SSN.BBN.SS
input = spaces + nsm + wordChar + wordChar + nsm + spaces;
twoFindIndexes(input, matcher, 3, 6);
}
private static void twoFindIndexes(String input, Matcher matcher, int a,
int b)
{
matcher.reset(input);
matcher.find();
assertEquals(matcher.start(), a);
matcher.find();
assertEquals(matcher.start(), b);
}
// This test is for 6284152
private static void check(String regex, String input, String[] expected) {
List<String> result = new ArrayList<>();
Pattern p = Pattern.compile(regex);
Matcher m = p.matcher(input);
while (m.find()) {
result.add(m.group());
}
assertEquals(Arrays.asList(expected), result);
}
@Test
public static void lookbehindTest() {
//Positive
check("(?<=%.{0,5})foo\\d",
"%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5",
new String[]{"foo1", "foo2", "foo3"});
//boundary at end of the lookbehind sub-regex should work consistently
//with the boundary just after the lookbehind sub-regex
check("(?<=.*\\b)foo", "abcd foo", new String[]{"foo"});
check("(?<=.*)\\bfoo", "abcd foo", new String[]{"foo"});
check("(?, "abc foo", new String[0]);
check("(?, "abc foo", new String[0]);
//Negative
check("(?,
"%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5",
new String[] {"foo4", "foo5"});
//Positive greedy
check("(?<=%b{1,4})foo", "%bbbbfoo", new String[] {"foo"});
//Positive reluctant
check("(?<=%b{1,4}?)foo", "%bbbbfoo", new String[] {"foo"});
//supplementary
check("(?<=%b{1,4})fo\ud800\udc00o", "%bbbbfo\ud800\udc00o",
new String[] {"fo\ud800\udc00o"});
check("(?<=%b{1,4}?)fo\ud800\udc00o", "%bbbbfo\ud800\udc00o",
new String[] {"fo\ud800\udc00o"});
check("(?, "%afo\ud800\udc00o",
new String[] {"fo\ud800\udc00o"});
check("(?, "%afo\ud800\udc00o",
new String[] {"fo\ud800\udc00o"});
}
// This test is for 4938995
// Check to see if weak region boundaries are transparent to
// lookahead and lookbehind constructs
@Test
public static void boundsTest() {
String fullMessage = "catdogcat";
Pattern pattern = Pattern.compile("(?<=cat)dog(?=cat)");
Matcher matcher = pattern.matcher("catdogca");
matcher.useTransparentBounds(true);
assertFalse(matcher.find());
matcher.reset("atdogcat");
assertFalse(matcher.find());
matcher.reset(fullMessage);
assertTrue(matcher.find());
matcher.reset(fullMessage);
matcher.region(0,9);
assertTrue(matcher.find());
matcher.reset(fullMessage);
matcher.region(0,6);
assertTrue(matcher.find());
matcher.reset(fullMessage);
matcher.region(3,6);
assertTrue(matcher.find());
matcher.useTransparentBounds(false);
assertFalse(matcher.find());
// Negative lookahead/lookbehind
pattern = Pattern.compile("(?);
matcher = pattern.matcher("dogcat");
matcher.useTransparentBounds(true);
matcher.region(0,3);
assertFalse(matcher.find());
matcher.reset("catdog");
matcher.region(3,6);
assertFalse(matcher.find());
matcher.useTransparentBounds(false);
matcher.reset("dogcat");
matcher.region(0,3);
assertTrue(matcher.find());
matcher.reset("catdog");
matcher.region(3,6);
assertTrue(matcher.find());
}
// This test is for 4945394
@Test
public static void findFromTest() {
String message = "This is 40 $0 message.";
Pattern pat = Pattern.compile("\\$0");
Matcher match = pat.matcher(message);
assertTrue(match.find());
assertFalse(match.find());
assertFalse(match.find());
}
// This test is for 4872664 and 4892980
@Test
public static void negatedCharClassTest() {
Pattern pattern = Pattern.compile("[^>]");
Matcher matcher = pattern.matcher("\u203A");
assertTrue(matcher.matches());
pattern = Pattern.compile("[^fr]");
matcher = pattern.matcher("a");
assertTrue(matcher.find());
matcher.reset("\u203A");
assertTrue(matcher.find());
String s = "for";
String[] result = s.split("[^fr]");
assertEquals(result[0], "f");
assertEquals(result[1], "r");
s = "f\u203Ar";
result = s.split("[^fr]");
assertEquals(result[0], "f");
assertEquals(result[1], "r");
// Test adding to bits, subtracting a node, then adding to bits again
pattern = Pattern.compile("[^f\u203Ar]");
matcher = pattern.matcher("a");
assertTrue(matcher.find());
matcher.reset("f");
assertFalse(matcher.find());
matcher.reset("\u203A");
assertFalse(matcher.find());
matcher.reset("r");
assertFalse(matcher.find());
matcher.reset("\u203B");
assertTrue(matcher.find());
// Test subtracting a node, adding to bits, subtracting again
pattern = Pattern.compile("[^\u203Ar\u203B]");
matcher = pattern.matcher("a");
assertTrue(matcher.find());
matcher.reset("\u203A");
assertFalse(matcher.find());
matcher.reset("r");
assertFalse(matcher.find());
matcher.reset("\u203B");
assertFalse(matcher.find());
matcher.reset("\u203C");
assertTrue(matcher.find());
}
// This test is for 4628291
@Test
public static void toStringTest() {
Pattern pattern = Pattern.compile("b+");
assertEquals(pattern.toString(), "b+");
Matcher matcher = pattern.matcher("aaabbbccc");
String matcherString = matcher.toString(); // unspecified
matcher.find();
matcher.toString(); // unspecified
matcher.region(0,3);
matcher.toString(); // unspecified
matcher.reset();
matcher.toString(); // unspecified
}
// This test is for 4808962
@Test
public static void literalPatternTest() {
int flags = Pattern.LITERAL;
Pattern pattern = Pattern.compile("abc\\t$^", flags);
check(pattern, "abc\\t$^", true);
pattern = Pattern.compile(Pattern.quote("abc\\t$^"));
check(pattern, "abc\\t$^", true);
pattern = Pattern.compile("\\Qa^$bcabc\\E", flags);
check(pattern, "\\Qa^$bcabc\\E", true);
check(pattern, "a^$bcabc", false);
pattern = Pattern.compile("\\\\Q\\\\E");
check(pattern, "\\Q\\E", true);
pattern = Pattern.compile("\\Qabc\\Eefg\\\\Q\\\\Ehij");
check(pattern, "abcefg\\Q\\Ehij", true);
pattern = Pattern.compile("\\\\\\Q\\\\E");
check(pattern, "\\\\\\\\", true);
pattern = Pattern.compile(Pattern.quote("\\Qa^$bcabc\\E"));
check(pattern, "\\Qa^$bcabc\\E", true);
check(pattern, "a^$bcabc", false);
pattern = Pattern.compile(Pattern.quote("\\Qabc\\Edef"));
check(pattern, "\\Qabc\\Edef", true);
check(pattern, "abcdef", false);
pattern = Pattern.compile(Pattern.quote("abc\\Edef"));
check(pattern, "abc\\Edef", true);
check(pattern, "abcdef", false);
pattern = Pattern.compile(Pattern.quote("\\E"));
check(pattern, "\\E", true);
pattern = Pattern.compile("((((abc.+?:)", flags);
check(pattern, "((((abc.+?:)", true);
flags |= Pattern.MULTILINE;
pattern = Pattern.compile("^cat$", flags);
check(pattern, "abc^cat$def", true);
check(pattern, "cat", false);
flags |= Pattern.CASE_INSENSITIVE;
pattern = Pattern.compile("abcdef", flags);
check(pattern, "ABCDEF", true);
check(pattern, "AbCdEf", true);
flags |= Pattern.DOTALL;
pattern = Pattern.compile("a...b", flags);
check(pattern, "A...b", true);
check(pattern, "Axxxb", false);
flags |= Pattern.CANON_EQ;
//Note: Possible issue
Pattern p = Pattern.compile("testa\u030a", flags);
check(pattern, "testa\u030a", false);
check(pattern, "test\u00e5", false);
// Supplementary character test
flags = Pattern.LITERAL;
pattern = Pattern.compile(toSupplementaries("abc\\t$^"), flags);
check(pattern, toSupplementaries("abc\\t$^"), true);
pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\t$^")));
check(pattern, toSupplementaries("abc\\t$^"), true);
pattern = Pattern.compile(toSupplementaries("\\Qa^$bcabc\\E"), flags);
check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true);
check(pattern, toSupplementaries("a^$bcabc"), false);
pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qa^$bcabc\\E")));
check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true);
check(pattern, toSupplementaries("a^$bcabc"), false);
pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qabc\\Edef")));
check(pattern, toSupplementaries("\\Qabc\\Edef"), true);
check(pattern, toSupplementaries("abcdef"), false);
pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\Edef")));
check(pattern, toSupplementaries("abc\\Edef"), true);
check(pattern, toSupplementaries("abcdef"), false);
pattern = Pattern.compile(toSupplementaries("((((abc.+?:)"), flags);
check(pattern, toSupplementaries("((((abc.+?:)"), true);
flags |= Pattern.MULTILINE;
pattern = Pattern.compile(toSupplementaries("^cat$"), flags);
check(pattern, toSupplementaries("abc^cat$def"), true);
check(pattern, toSupplementaries("cat"), false);
flags |= Pattern.DOTALL;
// note: this is case-sensitive.
pattern = Pattern.compile(toSupplementaries("a...b"), flags);
check(pattern, toSupplementaries("a...b"), true);
check(pattern, toSupplementaries("axxxb"), false);
flags |= Pattern.CANON_EQ;
String t = toSupplementaries("test");
//Note: Possible issue
p = Pattern.compile(t + "a\u030a", flags);
check(pattern, t + "a\u030a", false);
check(pattern, t + "\u00e5", false);
}
// This test is for 4803179
// This test is also for 4808962, replacement parts
@Test
public static void literalReplacementTest() {
int flags = Pattern.LITERAL;
Pattern pattern = Pattern.compile("abc", flags);
Matcher matcher = pattern.matcher("zzzabczzz");
String replaceTest = "$0";
String result = matcher.replaceAll(replaceTest);
assertEquals(result, "zzzabczzz");
matcher.reset();
String literalReplacement = Matcher.quoteReplacement(replaceTest);
result = matcher.replaceAll(literalReplacement);
assertEquals(result, "zzz$0zzz");
matcher.reset();
replaceTest = "\\t$\\$";
literalReplacement = Matcher.quoteReplacement(replaceTest);
result = matcher.replaceAll(literalReplacement);
assertEquals(result, "zzz\\t$\\$zzz");
// Supplementary character test
pattern = Pattern.compile(toSupplementaries("abc"), flags);
matcher = pattern.matcher(toSupplementaries("zzzabczzz"));
replaceTest = "$0";
result = matcher.replaceAll(replaceTest);
assertEquals(result, toSupplementaries("zzzabczzz"));
matcher.reset();
literalReplacement = Matcher.quoteReplacement(replaceTest);
result = matcher.replaceAll(literalReplacement);
assertEquals(result, toSupplementaries("zzz$0zzz"));
matcher.reset();
replaceTest = "\\t$\\$";
literalReplacement = Matcher.quoteReplacement(replaceTest);
result = matcher.replaceAll(literalReplacement);
assertEquals(result, toSupplementaries("zzz\\t$\\$zzz"));
// IAE should be thrown if backslash or '$' is the last character
// in replacement string
assertThrows(IllegalArgumentException.class, () -> "\uac00".replaceAll("\uac00", "$"));
assertThrows(IllegalArgumentException.class, () -> "\uac00".replaceAll("\uac00", "\\"));
}
// This test is for 4757029
@Test
public static void regionTest() {
Pattern pattern = Pattern.compile("abc");
Matcher matcher = pattern.matcher("abcdefabc");
matcher.region(0,9);
assertTrue(matcher.find());
assertTrue(matcher.find());
matcher.region(0,3);
assertTrue(matcher.find());
matcher.region(3,6);
assertFalse(matcher.find());
matcher.region(0,2);
assertFalse(matcher.find());
expectRegionFail(matcher, 1, -1);
expectRegionFail(matcher, -1, -1);
expectRegionFail(matcher, -1, 1);
expectRegionFail(matcher, 5, 3);
expectRegionFail(matcher, 5, 12);
expectRegionFail(matcher, 12, 12);
pattern = Pattern.compile("^abc$");
matcher = pattern.matcher("zzzabczzz");
matcher.region(0,9);
assertFalse(matcher.find());
matcher.region(3,6);
assertTrue(matcher.find());
matcher.region(3,6);
matcher.useAnchoringBounds(false);
assertFalse(matcher.find());
// Supplementary character test
pattern = Pattern.compile(toSupplementaries("abc"));
matcher = pattern.matcher(toSupplementaries("abcdefabc"));
matcher.region(0,9*2);
assertTrue(matcher.find());
assertTrue(matcher.find());
matcher.region(0,3*2);
assertTrue(matcher.find());
matcher.region(1,3*2);
assertFalse(matcher.find());
matcher.region(3*2,6*2);
assertFalse(matcher.find());
matcher.region(0,2*2);
assertFalse(matcher.find());
matcher.region(0,2*2+1);
assertFalse(matcher.find());
expectRegionFail(matcher, 2, -1);
expectRegionFail(matcher, -1, -1);
expectRegionFail(matcher, -1, 2);
expectRegionFail(matcher, 5*2, 3*2);
expectRegionFail(matcher, 5*2, 12*2);
expectRegionFail(matcher, 12*2, 12*2);
pattern = Pattern.compile(toSupplementaries("^abc$"));
matcher = pattern.matcher(toSupplementaries("zzzabczzz"));
matcher.region(0,9*2);
assertFalse(matcher.find());
matcher.region(3*2,6*2);
assertTrue(matcher.find());
matcher.region(3*2+1,6*2);
assertFalse(matcher.find());
matcher.region(3*2,6*2-1);
assertFalse(matcher.find());
matcher.region(3*2,6*2);
matcher.useAnchoringBounds(false);
assertFalse(matcher.find());
// JDK-8230829
pattern = Pattern.compile("\\ud800\\udc61");
matcher = pattern.matcher("\ud800\udc61");
matcher.region(0, 1);
assertFalse(matcher.find(), "Matched a surrogate pair" +
" that crosses border of region");
assertTrue(matcher.hitEnd(), "Expected to hit the end when" +
" matching a surrogate pair crossing region");
}
private static void expectRegionFail(Matcher matcher, int index1,
int index2)
{
try {
matcher.region(index1, index2);
fail();
} catch (IndexOutOfBoundsException | IllegalStateException ioobe) {
// Correct result
} catch (Exception e) {
fail();
}
}
// This test is for 4803197
@Test
public static void escapedSegmentTest() {
Pattern pattern = Pattern.compile("\\Qdir1\\dir2\\E");
check(pattern, "dir1\\dir2", true);
pattern = Pattern.compile("\\Qdir1\\dir2\\\\E");
check(pattern, "dir1\\dir2\\", true);
pattern = Pattern.compile("(\\Qdir1\\dir2\\\\E)");
check(pattern, "dir1\\dir2\\", true);
// Supplementary character test
pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2\\E"));
check(pattern, toSupplementaries("dir1\\dir2"), true);
pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2")+"\\\\E");
check(pattern, toSupplementaries("dir1\\dir2\\"), true);
pattern = Pattern.compile(toSupplementaries("(\\Qdir1\\dir2")+"\\\\E)");
check(pattern, toSupplementaries("dir1\\dir2\\"), true);
}
// This test is for 4792284
@Test
public static void nonCaptureRepetitionTest() {
String input = "abcdefgh;";
String[] patterns = new String[] {
"(?:\\w{4})+;",
"(?:\\w{8})*;",
"(?:\\w{2}){2,4};",
"(?:\\w{4}){2,};", // only matches the
".*?(?:\\w{5})+;", // specified minimum
".*?(?:\\w{9})*;", // number of reps - OK
"(?:\\w{4})+?;", // lazy repetition - OK
"(?:\\w{4})++;", // possessive repetition - OK
"(?:\\w{2,}?)+;", // non-deterministic - OK
"(\\w{4})+;", // capturing group - OK
};
for (String pattern : patterns) {
// Check find()
check(pattern, 0, input, input, true);
// Check matches()
Pattern p = Pattern.compile(pattern);
Matcher m = p.matcher(input);
assertTrue(m.matches());
assertEquals(m.group(0), input);
}
}
// This test is for 6358731
@Test
public static void notCapturedGroupCurlyMatchTest() {
Pattern pattern = Pattern.compile("(abc)+|(abcd)+");
Matcher matcher = pattern.matcher("abcd");
boolean condition = !matcher.matches() ||
matcher.group(1) != null ||
!matcher.group(2).equals("abcd");
assertFalse(condition);
}
// This test is for 4706545
@Test
public static void javaCharClassTest() {
for (int i=0; i<1000; i++) {
char c = (char)generator.nextInt();
check("{javaLowerCase}", c, Character.isLowerCase(c));
check("{javaUpperCase}", c, Character.isUpperCase(c));
check("{javaUpperCase}+", c, Character.isUpperCase(c));
check("{javaTitleCase}", c, Character.isTitleCase(c));
check("{javaDigit}", c, Character.isDigit(c));
check("{javaDefined}", c, Character.isDefined(c));
check("{javaLetter}", c, Character.isLetter(c));
check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c));
check("{javaJavaIdentifierStart}", c,
Character.isJavaIdentifierStart(c));
check("{javaJavaIdentifierPart}", c,
Character.isJavaIdentifierPart(c));
check("{javaUnicodeIdentifierStart}", c,
Character.isUnicodeIdentifierStart(c));
check("{javaUnicodeIdentifierPart}", c,
Character.isUnicodeIdentifierPart(c));
check("{javaIdentifierIgnorable}", c,
Character.isIdentifierIgnorable(c));
check("{javaSpaceChar}", c, Character.isSpaceChar(c));
check("{javaWhitespace}", c, Character.isWhitespace(c));
check("{javaISOControl}", c, Character.isISOControl(c));
check("{javaMirrored}", c, Character.isMirrored(c));
}
// Supplementary character test
for (int i=0; i<1000; i++) {
int c = generator.nextInt(Character.MAX_CODE_POINT
- Character.MIN_SUPPLEMENTARY_CODE_POINT)
+ Character.MIN_SUPPLEMENTARY_CODE_POINT;
check("{javaLowerCase}", c, Character.isLowerCase(c));
check("{javaUpperCase}", c, Character.isUpperCase(c));
check("{javaUpperCase}+", c, Character.isUpperCase(c));
check("{javaTitleCase}", c, Character.isTitleCase(c));
check("{javaDigit}", c, Character.isDigit(c));
check("{javaDefined}", c, Character.isDefined(c));
check("{javaLetter}", c, Character.isLetter(c));
check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c));
check("{javaJavaIdentifierStart}", c,
Character.isJavaIdentifierStart(c));
check("{javaJavaIdentifierPart}", c,
Character.isJavaIdentifierPart(c));
check("{javaUnicodeIdentifierStart}", c,
Character.isUnicodeIdentifierStart(c));
check("{javaUnicodeIdentifierPart}", c,
Character.isUnicodeIdentifierPart(c));
check("{javaIdentifierIgnorable}", c,
Character.isIdentifierIgnorable(c));
check("{javaSpaceChar}", c, Character.isSpaceChar(c));
check("{javaWhitespace}", c, Character.isWhitespace(c));
check("{javaISOControl}", c, Character.isISOControl(c));
check("{javaMirrored}", c, Character.isMirrored(c));
}
}
// This test is for 4523620
/*
private static void numOccurrencesTest() throws Exception {
Pattern pattern = Pattern.compile("aaa");
if (pattern.numOccurrences("aaaaaa", false) != 2)
failCount++;
if (pattern.numOccurrences("aaaaaa", true) != 4)
failCount++;
pattern = Pattern.compile("^");
if (pattern.numOccurrences("aaaaaa", false) != 1)
failCount++;
if (pattern.numOccurrences("aaaaaa", true) != 1)
failCount++;
report("Number of Occurrences");
}
*/
// This test is for 4776374
@Test
public static void caretBetweenTerminatorsTest() {
int flags1 = Pattern.DOTALL;
int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES;
int flags3 = Pattern.DOTALL | Pattern.UNIX_LINES | Pattern.MULTILINE;
int flags4 = Pattern.DOTALL | Pattern.MULTILINE;
check("^....", flags1, "test\ntest", "test", true);
check(".....^", flags1, "test\ntest", "test", false);
check(".....^", flags1, "test\n", "test", false);
check("....^", flags1, "test\r\n", "test", false);
check("^....", flags2, "test\ntest", "test", true);
check("....^", flags2, "test\ntest", "test", false);
check(".....^", flags2, "test\n", "test", false);
check("....^", flags2, "test\r\n", "test", false);
check("^....", flags3, "test\ntest", "test", true);
check(".....^", flags3, "test\ntest", "test\n", true);
check(".....^", flags3, "test\u0085test", "test\u0085", false);
check(".....^", flags3, "test\n", "test", false);
check(".....^", flags3, "test\r\n", "test", false);
check("......^", flags3, "test\r\ntest", "test\r\n", true);
check("^....", flags4, "test\ntest", "test", true);
check(".....^", flags3, "test\ntest", "test\n", true);
check(".....^", flags4, "test\u0085test", "test\u0085", true);
check(".....^", flags4, "test\n", "test\n", false);
check(".....^", flags4, "test\r\n", "test\r", false);
// Supplementary character test
String t = toSupplementaries("test");
check("^....", flags1, t+"\n"+t, t, true);
check(".....^", flags1, t+"\n"+t, t, false);
check(".....^", flags1, t+"\n", t, false);
check("....^", flags1, t+"\r\n", t, false);
check("^....", flags2, t+"\n"+t, t, true);
check("....^", flags2, t+"\n"+t, t, false);
check(".....^", flags2, t+"\n", t, false);
check("....^", flags2, t+"\r\n", t, false);
check("^....", flags3, t+"\n"+t, t, true);
check(".....^", flags3, t+"\n"+t, t+"\n", true);
check(".....^", flags3, t+"\u0085"+t, t+"\u0085", false);
check(".....^", flags3, t+"\n", t, false);
check(".....^", flags3, t+"\r\n", t, false);
check("......^", flags3, t+"\r\n"+t, t+"\r\n", true);
check("^....", flags4, t+"\n"+t, t, true);
check(".....^", flags3, t+"\n"+t, t+"\n", true);
check(".....^", flags4, t+"\u0085"+t, t+"\u0085", true);
check(".....^", flags4, t+"\n", t+"\n", false);
check(".....^", flags4, t+"\r\n", t+"\r", false);
}
// This test is for 4727935
@Test
public static void dollarAtEndTest() {
int flags1 = Pattern.DOTALL;
int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES;
int flags3 = Pattern.DOTALL | Pattern.MULTILINE;
check("....$", flags1, "test\n", "test", true);
check("....$", flags1, "test\r\n", "test", true);
check(".....$", flags1, "test\n", "test\n", true);
check(".....$", flags1, "test\u0085", "test\u0085", true);
check("....$", flags1, "test\u0085", "test", true);
check("....$", flags2, "test\n", "test", true);
check(".....$", flags2, "test\n", "test\n", true);
check(".....$", flags2, "test\u0085", "test\u0085", true);
check("....$", flags2, "test\u0085", "est\u0085", true);
check("....$.blah", flags3, "test\nblah", "test\nblah", true);
check(".....$.blah", flags3, "test\n\nblah", "test\n\nblah", true);
check("....$blah", flags3, "test\nblah", "!!!!", false);
check(".....$blah", flags3, "test\nblah", "!!!!", false);
// Supplementary character test
String t = toSupplementaries("test");
String b = toSupplementaries("blah");
check("....$", flags1, t+"\n", t, true);
check("....$", flags1, t+"\r\n", t, true);
check(".....$", flags1, t+"\n", t+"\n", true);
check(".....$", flags1, t+"\u0085", t+"\u0085", true);
check("....$", flags1, t+"\u0085", t, true);
check("....$", flags2, t+"\n", t, true);
check(".....$", flags2, t+"\n", t+"\n", true);
check(".....$", flags2, t+"\u0085", t+"\u0085", true);
check("....$", flags2, t+"\u0085", toSupplementaries("est\u0085"), true);
check("....$."+b, flags3, t+"\n"+b, t+"\n"+b, true);
check(".....$."+b, flags3, t+"\n\n"+b, t+"\n\n"+b, true);
check("....$"+b, flags3, t+"\n"+b, "!!!!", false);
check(".....$"+b, flags3, t+"\n"+b, "!!!!", false);
}
// This test is for 4711773
@Test
public static void multilineDollarTest() {
Pattern findCR = Pattern.compile("$", Pattern.MULTILINE);
Matcher matcher = findCR.matcher("first bit\nsecond bit");
matcher.find();
assertEquals(matcher.start(), 9);
matcher.find();
assertEquals(matcher.start(0), 20);
// Supplementary character test
matcher = findCR.matcher(toSupplementaries("first bit\n second bit")); // double BMP chars
matcher.find();
assertEquals(matcher.start(0), 9*2);
matcher.find();
assertEquals(matcher.start(0), 20*2);
}
@Test
public static void reluctantRepetitionTest() {
Pattern p = Pattern.compile("1(\\s\\S+?){1,3}?[\\s,]2");
check(p, "1 word word word 2", true);
check(p, "1 wor wo w 2", true);
check(p, "1 word word 2", true);
check(p, "1 word 2", true);
check(p, "1 wo w w 2", true);
check(p, "1 wo w 2", true);
check(p, "1 wor w 2", true);
p = Pattern.compile("([a-z])+?c");
Matcher m = p.matcher("ababcdefdec");
check(m, "ababc");
// Supplementary character test
p = Pattern.compile(toSupplementaries("([a-z])+?c"));
m = p.matcher(toSupplementaries("ababcdefdec"));
check(m, toSupplementaries("ababc"));
}
public static Pattern serializedPattern(Pattern p) throws Exception {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
ObjectOutputStream oos = new ObjectOutputStream(baos);
oos.writeObject(p);
oos.close();
try (ObjectInputStream ois = new ObjectInputStream(
new ByteArrayInputStream(baos.toByteArray()))) {
return (Pattern)ois.readObject();
}
}
@Test
public static void serializeTest() throws Exception {
String patternStr = "(b)";
String matchStr = "b";
Pattern pattern = Pattern.compile(patternStr);
Pattern serializedPattern = serializedPattern(pattern);
Matcher matcher = serializedPattern.matcher(matchStr);
assertTrue(matcher.matches());
assertEquals(matcher.groupCount(), 1);
pattern = Pattern.compile("a(?-i)b", Pattern.CASE_INSENSITIVE);
serializedPattern = serializedPattern(pattern);
assertTrue(serializedPattern.matcher("Ab").matches());
assertFalse(serializedPattern.matcher("AB").matches());
}
@Test
public static void gTest() {
Pattern pattern = Pattern.compile("\\G\\w");
Matcher matcher = pattern.matcher("abc#x#x");
matcher.find();
matcher.find();
matcher.find();
assertFalse(matcher.find());
pattern = Pattern.compile("\\GA*");
matcher = pattern.matcher("1A2AA3");
matcher.find();
assertFalse(matcher.find());
pattern = Pattern.compile("\\GA*");
matcher = pattern.matcher("1A2AA3");
assertTrue(matcher.find(1));
matcher.find();
assertFalse(matcher.find());
}
@Test
public static void zTest() {
Pattern pattern = Pattern.compile("foo\\Z");
// Positives
check(pattern, "foo\u0085", true);
check(pattern, "foo\u2028", true);
check(pattern, "foo\u2029", true);
check(pattern, "foo\n", true);
check(pattern, "foo\r", true);
check(pattern, "foo\r\n", true);
// Negatives
check(pattern, "fooo", false);
check(pattern, "foo\n\r", false);
pattern = Pattern.compile("foo\\Z", Pattern.UNIX_LINES);
// Positives
check(pattern, "foo", true);
check(pattern, "foo\n", true);
// Negatives
check(pattern, "foo\r", false);
check(pattern, "foo\u0085", false);
check(pattern, "foo\u2028", false);
check(pattern, "foo\u2029", false);
}
@Test
public static void replaceFirstTest() {
Pattern pattern = Pattern.compile("(ab)(c*)");
Matcher matcher = pattern.matcher("abccczzzabcczzzabccc");
assertEquals(matcher.replaceFirst("test"), "testzzzabcczzzabccc");
matcher.reset("zzzabccczzzabcczzzabccczzz");
assertEquals(matcher.replaceFirst("test"), "zzztestzzzabcczzzabccczzz");
matcher.reset("zzzabccczzzabcczzzabccczzz");
String result = matcher.replaceFirst("$1");
assertEquals(result,"zzzabzzzabcczzzabccczzz");
matcher.reset("zzzabccczzzabcczzzabccczzz");
result = matcher.replaceFirst("$2");
assertEquals(result, "zzzccczzzabcczzzabccczzz");
pattern = Pattern.compile("a*");
matcher = pattern.matcher("aaaaaaaaaa");
assertEquals(matcher.replaceFirst("test"), "test");
pattern = Pattern.compile("a+");
matcher = pattern.matcher("zzzaaaaaaaaaa");
assertEquals(matcher.replaceFirst("test"), "zzztest");
// Supplementary character test
pattern = Pattern.compile(toSupplementaries("(ab)(c*)"));
matcher = pattern.matcher(toSupplementaries("abccczzzabcczzzabccc"));
result = matcher.replaceFirst(toSupplementaries("test"));
assertEquals(result, toSupplementaries("testzzzabcczzzabccc"));
matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
result = matcher.replaceFirst(toSupplementaries("test"));
assertEquals(result, toSupplementaries("zzztestzzzabcczzzabccczzz"));
matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
result = matcher.replaceFirst("$1");
assertEquals(result, toSupplementaries("zzzabzzzabcczzzabccczzz"));
matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
result = matcher.replaceFirst("$2");
assertEquals(result, toSupplementaries("zzzccczzzabcczzzabccczzz"));
pattern = Pattern.compile(toSupplementaries("a*"));
matcher = pattern.matcher(toSupplementaries("aaaaaaaaaa"));
result = matcher.replaceFirst(toSupplementaries("test"));
assertEquals(result,toSupplementaries("test"));
pattern = Pattern.compile(toSupplementaries("a+"));
matcher = pattern.matcher(toSupplementaries("zzzaaaaaaaaaa"));
result = matcher.replaceFirst(toSupplementaries("test"));
assertEquals(result, toSupplementaries("zzztest"));
}
@Test
public static void unixLinesTest() {
Pattern pattern = Pattern.compile(".*");
Matcher matcher = pattern.matcher("aa\u2028blah");
matcher.find();
assertEquals(matcher.group(0), "aa");
pattern = Pattern.compile(".*", Pattern.UNIX_LINES);
matcher = pattern.matcher("aa\u2028blah");
matcher.find();
assertEquals(matcher.group(0), "aa\u2028blah");
pattern = Pattern.compile("[az]$",
Pattern.MULTILINE | Pattern.UNIX_LINES);
matcher = pattern.matcher("aa\u2028zz");
check(matcher, "a\u2028", false);
// Supplementary character test
pattern = Pattern.compile(".*");
matcher = pattern.matcher(toSupplementaries("aa\u2028blah"));
matcher.find();
assertEquals(matcher.group(0), toSupplementaries("aa"));
pattern = Pattern.compile(".*", Pattern.UNIX_LINES);
matcher = pattern.matcher(toSupplementaries("aa\u2028blah"));
matcher.find();
assertEquals(matcher.group(0), toSupplementaries("aa\u2028blah"));
pattern = Pattern.compile(toSupplementaries("[az]$"),
Pattern.MULTILINE | Pattern.UNIX_LINES);
matcher = pattern.matcher(toSupplementaries("aa\u2028zz"));
check(matcher, toSupplementaries("a\u2028"), false);
}
@Test
public static void commentsTest() {
int flags = Pattern.COMMENTS;
Pattern pattern = Pattern.compile("aa \\# aa", flags);
Matcher matcher = pattern.matcher("aa#aa");
assertTrue(matcher.matches());
pattern = Pattern.compile("aa # blah", flags);
matcher = pattern.matcher("aa");
assertTrue(matcher.matches());
pattern = Pattern.compile("aa blah", flags);
matcher = pattern.matcher("aablah");
assertTrue(matcher.matches());
pattern = Pattern.compile("aa # blah blech ", flags);
matcher = pattern.matcher("aa");
assertTrue(matcher.matches());
pattern = Pattern.compile("aa # blah\n ", flags);
matcher = pattern.matcher("aa");
assertTrue(matcher.matches());
pattern = Pattern.compile("aa # blah\nbc # blech", flags);
matcher = pattern.matcher("aabc");
assertTrue(matcher.matches());
pattern = Pattern.compile("aa # blah\nbc# blech", flags);
matcher = pattern.matcher("aabc");
assertTrue(matcher.matches());
pattern = Pattern.compile("aa # blah\nbc\\# blech", flags);
matcher = pattern.matcher("aabc#blech");
assertTrue(matcher.matches());
// Supplementary character test
pattern = Pattern.compile(toSupplementaries("aa \\# aa"), flags);
matcher = pattern.matcher(toSupplementaries("aa#aa"));
assertTrue(matcher.matches());
pattern = Pattern.compile(toSupplementaries("aa # blah"), flags);
matcher = pattern.matcher(toSupplementaries("aa"));
assertTrue(matcher.matches());
pattern = Pattern.compile(toSupplementaries("aa blah"), flags);
matcher = pattern.matcher(toSupplementaries("aablah"));
assertTrue(matcher.matches());
pattern = Pattern.compile(toSupplementaries("aa # blah blech "), flags);
matcher = pattern.matcher(toSupplementaries("aa"));
assertTrue(matcher.matches());
pattern = Pattern.compile(toSupplementaries("aa # blah\n "), flags);
matcher = pattern.matcher(toSupplementaries("aa"));
assertTrue(matcher.matches());
pattern = Pattern.compile(toSupplementaries("aa # blah\nbc # blech"), flags);
matcher = pattern.matcher(toSupplementaries("aabc"));
assertTrue(matcher.matches());
pattern = Pattern.compile(toSupplementaries("aa # blah\nbc# blech"), flags);
matcher = pattern.matcher(toSupplementaries("aabc"));
assertTrue(matcher.matches());
pattern = Pattern.compile(toSupplementaries("aa # blah\nbc\\# blech"), flags);
matcher = pattern.matcher(toSupplementaries("aabc#blech"));
assertTrue(matcher.matches());
}
@Test
public static void caseFoldingTest() { // bug 4504687
int flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE;
Pattern pattern = Pattern.compile("aa", flags);
Matcher matcher = pattern.matcher("ab");
assertFalse(matcher.matches());
pattern = Pattern.compile("aA", flags);
matcher = pattern.matcher("ab");
assertFalse(matcher.matches());
pattern = Pattern.compile("aa", flags);
matcher = pattern.matcher("aB");
assertFalse(matcher.matches());
matcher = pattern.matcher("Ab");
assertFalse(matcher.matches());
// ASCII "a"
// Latin-1 Supplement "a" + grave
// Cyrillic "a"
String[] patterns = new String[] {
//single
"a", "\u00e0", "\u0430",
//slice
"ab", "\u00e0\u00e1", "\u0430\u0431",
//class single
"[a]", "[\u00e0]", "[\u0430]",
//class range
"[a-b]", "[\u00e0-\u00e5]", "[\u0430-\u0431]",
//back reference
"(a)\\1", "(\u00e0)\\1", "(\u0430)\\1"
};
String[] texts = new String[] {
"A", "\u00c0", "\u0410",
"AB", "\u00c0\u00c1", "\u0410\u0411",
"A", "\u00c0", "\u0410",
"B", "\u00c2", "\u0411",
"aA", "\u00e0\u00c0", "\u0430\u0410"
};
boolean[] expected = new boolean[] {
true, false, false,
true, false, false,
true, false, false,
true, false, false,
true, false, false
};
flags = Pattern.CASE_INSENSITIVE;
for (int i = 0; i < patterns.length; i++) {
pattern = Pattern.compile(patterns[i], flags);
matcher = pattern.matcher(texts[i]);
assertEquals(matcher.matches(), expected[i], "<1> Failed at " + i);
}
flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE;
for (int i = 0; i < patterns.length; i++) {
pattern = Pattern.compile(patterns[i], flags);
matcher = pattern.matcher(texts[i]);
assertTrue(matcher.matches(), "<2> Failed at " + i);
}
// flag unicode_case alone should do nothing
flags = Pattern.UNICODE_CASE;
for (int i = 0; i < patterns.length; i++) {
pattern = Pattern.compile(patterns[i], flags);
matcher = pattern.matcher(texts[i]);
assertFalse(matcher.matches(), "<3> Failed at " + i);
}
// Special cases: i, I, u+0131 and u+0130
flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE;
pattern = Pattern.compile("[h-j]+", flags);
assertTrue(pattern.matcher("\u0131\u0130").matches());
}
@Test
public static void appendTest() {
Pattern pattern = Pattern.compile("(ab)(cd)");
Matcher matcher = pattern.matcher("abcd");
String result = matcher.replaceAll("$2$1");
assertEquals(result, "cdab");
String s1 = "Swap all: first = 123, second = 456";
String s2 = "Swap one: first = 123, second = 456";
String r = "$3$2$1";
pattern = Pattern.compile("([a-z]+)( *= *)([0-9]+)");
matcher = pattern.matcher(s1);
result = matcher.replaceAll(r);
assertEquals(result, "Swap all: 123 = first, 456 = second");
matcher = pattern.matcher(s2);
if (matcher.find()) {
StringBuffer sb = new StringBuffer();
matcher.appendReplacement(sb, r);
matcher.appendTail(sb);
result = sb.toString();
assertEquals(result, "Swap one: 123 = first, second = 456");
}
// Supplementary character test
pattern = Pattern.compile(toSupplementaries("(ab)(cd)"));
matcher = pattern.matcher(toSupplementaries("abcd"));
result = matcher.replaceAll("$2$1");
assertEquals(result, toSupplementaries("cdab"));
s1 = toSupplementaries("Swap all: first = 123, second = 456");
s2 = toSupplementaries("Swap one: first = 123, second = 456");
r = toSupplementaries("$3$2$1");
pattern = Pattern.compile(toSupplementaries("([a-z]+)( *= *)([0-9]+)"));
matcher = pattern.matcher(s1);
result = matcher.replaceAll(r);
assertEquals(result, toSupplementaries("Swap all: 123 = first, 456 = second"));
matcher = pattern.matcher(s2);
if (matcher.find()) {
StringBuffer sb = new StringBuffer();
matcher.appendReplacement(sb, r);
matcher.appendTail(sb);
result = sb.toString();
assertEquals(result, toSupplementaries("Swap one: 123 = first, second = 456"));
}
}
@Test
public static void splitTest() {
Pattern pattern = Pattern.compile(":");
String[] result = pattern.split("foo:and:boo", 2);
assertEquals(result[0], "foo");
assertEquals(result[1], "and:boo");
// Supplementary character test
Pattern patternX = Pattern.compile(toSupplementaries("X"));
result = patternX.split(toSupplementaries("fooXandXboo"), 2);
assertEquals(result[0], toSupplementaries("foo"));
assertEquals(result[1], toSupplementaries("andXboo"));
CharBuffer cb = CharBuffer.allocate(100);
cb.put("foo:and:boo");
cb.flip();
result = pattern.split(cb);
assertEquals(result[0], "foo");
assertEquals(result[1], "and");
assertEquals(result[2], "boo");
// Supplementary character test
CharBuffer cbs = CharBuffer.allocate(100);
cbs.put(toSupplementaries("fooXandXboo"));
cbs.flip();
result = patternX.split(cbs);
assertEquals(result[0], toSupplementaries("foo"));
assertEquals(result[1], toSupplementaries("and"));
assertEquals(result[2], toSupplementaries("boo"));
String source = "0123456789";
for (int limit=-2; limit<3; limit++) {
for (int x=0; x<10; x++) {
result = source.split(Integer.toString(x), limit);
int expectedLength = limit < 1 ? 2 : limit;
if ((limit == 0) && (x == 9)) {
// expected dropping of ""
assertEquals(result.length, 1);
assertEquals(result[0], "012345678");
} else {
assertEquals(result.length, expectedLength);
if (!result[0].equals(source.substring(0,x))) {
assertEquals(limit, 1);
assertEquals(result[0], source.substring(0,10));
}
if (expectedLength > 1) { // Check segment 2
assertEquals(result[1], source.substring(x+1,10));
}
}
}
}
// Check the case for no match found
for (int limit=-2; limit<3; limit++) {
result = source.split("e", limit);
assertEquals(result.length, 1);
assertEquals(result[0], source);
}
// Check the case for limit == 0, source = "";
// split() now returns 0-length for empty source "" see #6559590
source = "";
result = source.split("e", 0);
assertEquals(result.length, 1);
assertEquals(result[0], source);
// Check both split() and splitAsStraem(), especially for zero-lenth
// input and zero-lenth match cases
String[][] input = new String[][] {
{ " ", "Abc Efg Hij" }, // normal non-zero-match
{ " ", " Abc Efg Hij" }, // leading empty str for non-zero-match
{ " ", "Abc Efg Hij" }, // non-zero-match in the middle
{ "(?=\\p{Lu})", "AbcEfgHij" }, // no leading empty str for zero-match
{ "(?=\\p{Lu})", "AbcEfg" },
{ "(?=\\p{Lu})", "Abc" },
{ " ", "" }, // zero-length input
{ ".*", "" },
// some tests from PatternStreamTest.java
{ "4", "awgqwefg1fefw4vssv1vvv1" },
{ "\u00a3a", "afbfq\u00a3abgwgb\u00a3awngnwggw\u00a3a\u00a3ahjrnhneerh" },
{ "1", "awgqwefg1fefw4vssv1vvv1" },
{ "1", "a\u4ebafg1fefw\u4eba4\u9f9cvssv\u9f9c1v\u672c\u672cvv" },
{ "\u56da", "1\u56da23\u56da456\u56da7890" },
{ "\u56da", "1\u56da23\u9f9c\u672c\u672c\u56da456\u56da\u9f9c\u672c7890" },
{ "\u56da", "" },
{ "[ \t,:.]","This is,testing: with\tdifferent separators." }, //multiple septs
{ "o", "boo:and:foo" },
{ "o", "booooo:and:fooooo" },
{ "o", "fooooo:" },
};
String[][] expected = new String[][] {
{ "Abc", "Efg", "Hij" },
{ "", "Abc", "Efg", "Hij" },
{ "Abc", "", "Efg", "Hij" },
{ "Abc", "Efg", "Hij" },
{ "Abc", "Efg" },
{ "Abc" },
{ "" },
{ "" },
{ "awgqwefg1fefw", "vssv1vvv1" },
{ "afbfq", "bgwgb", "wngnwggw", "", "hjrnhneerh" },
{ "awgqwefg", "fefw4vssv", "vvv" },
{ "a\u4ebafg", "fefw\u4eba4\u9f9cvssv\u9f9c", "v\u672c\u672cvv" },
{ "1", "23", "456", "7890" },
{ "1", "23\u9f9c\u672c\u672c", "456", "\u9f9c\u672c7890" },
{ "" },
{ "This", "is", "testing", "", "with", "different", "separators" },
{ "b", "", ":and:f" },
{ "b", "", "", "", "", ":and:f" },
{ "f", "", "", "", "", ":" },
};
for (int i = 0; i < input.length; i++) {
pattern = Pattern.compile(input[i][0]);
assertTrue(Arrays.equals(pattern.split(input[i][1]), expected[i]));
assertFalse(input[i][1].length() > 0 && // splitAsStream() return empty resulting
// array for zero-length input for now
!Arrays.equals(pattern.splitAsStream(input[i][1]).toArray(),
expected[i]));
}
}
@Test
public static void negationTest() {
Pattern pattern = Pattern.compile("[\\[@^]+");
Matcher matcher = pattern.matcher("@@@@[[[[^^^^");
assertTrue(matcher.find());
assertEquals(matcher.group(0), "@@@@[[[[^^^^");
pattern = Pattern.compile("[@\\[^]+");
matcher = pattern.matcher("@@@@[[[[^^^^");
assertTrue(matcher.find());
assertEquals(matcher.group(0), "@@@@[[[[^^^^");
pattern = Pattern.compile("[@\\[^@]+");
matcher = pattern.matcher("@@@@[[[[^^^^");
assertTrue(matcher.find());
assertEquals(matcher.group(0), "@@@@[[[[^^^^");
pattern = Pattern.compile("\\)");
matcher = pattern.matcher("xxx)xxx");
assertTrue(matcher.find());
}
@Test
public static void ampersandTest() {
Pattern pattern = Pattern.compile("[&@]+");
check(pattern, "@@@@&&&&", true);
pattern = Pattern.compile("[@&]+");
check(pattern, "@@@@&&&&", true);
pattern = Pattern.compile("[@\\&]+");
check(pattern, "@@@@&&&&", true);
}
@Test
--> --------------------
--> maximum size reached
--> --------------------
¤ Dauer der Verarbeitung: 0.111 Sekunden
(vorverarbeitet)
¤
|
Laden
Fehler beim Verzeichnis:
in der Quellcodebibliothek suchen
Die farbliche Syntaxdarstellung ist noch experimentell.
|