/* * Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions.
*/
/** * @test * @bug 4048446 4051866 4053636 4054238 4054734 4054736 4058613 4059820 4060154 * 4062418 4065540 4066189 4066696 4076676 4078588 4079231 4081866 4087241 * 4087243 4092260 4095316 4101940 4103436 4114076 4114077 4124632 4132736 * 4133509 4139572 4141640 4179126 4179686 4244884 4663220 * @library /java/text/testlib * @summary Regression tests for Collation and associated classes * @modules jdk.localedata
*/ /* (C) Copyright Taligent, Inc. 1996 - All Rights Reserved (C) Copyright IBM Corp. 1996 - All Rights Reserved
The original version of this source code and documentation is copyrighted and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These materials are provided under terms of a License Agreement between Taligent and Sun. This technology is protected by multiple US and International patents. This notice and attribution to Taligent may not be removed. Taligent is a registered trademark of Taligent, Inc.
*/
// CollationElementIterator will not work correctly if the associated // Collator object's mode is changed // publicvoid Test4054238() {
RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
String[] decomp = { "\u0001", "<", "\u0002", "\u0001", "=", "\u0001", "A\u0001", ">", "~\u0002", // Ensure A and ~ are not compared bitwise "\u00C0", "=", "A\u0300"// Decomp should make these equal
};
c.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
compareArray(c, decomp);
String[] nodecomp = { "\u00C0", ">", "A\u0300"// A-grave vs. A combining-grave
};
c.setDecomposition(Collator.NO_DECOMPOSITION);
compareArray(c, nodecomp);
}
// Full Decomposition mode not implemented // publicvoid Test4054736() {
RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
c.setDecomposition(Collator.FULL_DECOMPOSITION);
// Collator.getInstance() causes an ArrayIndexOutofBoundsException for Korean // publicvoid Test4058613() { // Creating a default collator doesn't work when Korean is the default // locale
Locale oldDefault = Locale.getDefault();
Locale.setDefault( Locale.KOREAN ); try {
Collator c = Collator.getInstance();
// Since the fix to this bug was to turn of decomposition for Korean collators, // ensure that's what we got if (c.getDecomposition() != Collator.NO_DECOMPOSITION) {
errln("Decomposition is not set to NO_DECOMPOSITION");
}
} finally {
Locale.setDefault(oldDefault);
}
}
// RuleBasedCollator.getRules does not return the exact pattern as input // for expanding character sequences // publicvoid Test4059820() {
RuleBasedCollator c = null; try {
c = new RuleBasedCollator("< a < b , c/a < d < z");
} catch (ParseException e) {
errln("Exception building collator: " + e.toString()); return;
} if ( c.getRules().indexOf("c/a") == -1) {
errln("returned rules do not contain 'c/a'");
}
}
// MergeCollation::fixEntry broken for "& H < \u0131, \u0130, i, I" // publicvoid Test4060154() {
RuleBasedCollator c = null; try {
c = new RuleBasedCollator("< g, G < h, H < i, I < j, J"
+ " & H < \u0131, \u0130, i, I" );
} catch (ParseException e) {
errln("Exception building collator: " + e.toString()); return;
}
c.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
// Secondary/Tertiary comparison incorrect in French Secondary // publicvoid Test4062418() throws ParseException {
RuleBasedCollator c = (RuleBasedCollator) Collator.getInstance(Locale.FRANCE);
c.setStrength(Collator.SECONDARY);
String[] tests = { "p\u00eache", "<", "p\u00e9ch\u00e9", // Comparing accents from end, p\u00e9ch\u00e9 is greater
};
compareArray(c, tests);
}
// Collator.compare() method broken if either string contains spaces // publicvoid Test4065540() { if (en_us.compare("abcd e", "abcd f") == 0) {
errln("'abcd e' == 'abcd f'");
}
}
// Unicode characters need to be recursively decomposed to get the // correct result. For example, // u1EB1 -> \u0103 + \u0300 -> a + \u0306 + \u0300. // publicvoid Test4066189() {
String test1 = "\u1EB1";
String test2 = "a\u0306\u0300";
// French secondary collation checking at the end of compare iteration fails // publicvoid Test4066696() {
RuleBasedCollator c = (RuleBasedCollator) Collator.getInstance(Locale.FRANCE);
c.setStrength(Collator.SECONDARY);
// Bad canonicalization of same-class combining characters // publicvoid Test4076676() { // These combining characters are all in the same class, so they should not // be reordered, and they should compare as unequal.
String s1 = "A\u0301\u0302\u0300";
String s2 = "A\u0302\u0300\u0301";
RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
c.setStrength(Collator.TERTIARY);
if (c.compare(s1,s2) == 0) {
errln("Same-class combining chars were reordered");
}
}
// RuleBasedCollator breaks on "< a < bb" rule // publicvoid Test4078588() throws ParseException {
RuleBasedCollator rbc=new RuleBasedCollator("< a < bb");
int result = rbc.compare("a","bb");
if (result != -1) {
errln("Compare(a,bb) returned " + result + "; expected -1");
}
}
// Combining characters in different classes not reordered properly. // publicvoid Test4081866() throws ParseException { // These combining characters are all in different classes, // so they should be reordered and the strings should compare as equal.
String s1 = "A\u0300\u0316\u0327\u0315";
String s2 = "A\u0327\u0316\u0315\u0300";
RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
c.setStrength(Collator.TERTIARY);
// Now that the default collators are set to NO_DECOMPOSITION // (as a result of fixing bug 4114077), we must set it explicitly // when we're testing reordering behavior. -- lwerner, 5/5/98
c.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
if (c.compare(s1,s2) != 0) {
errln("Combining chars were not reordered");
}
}
// string comparison errors in Scandinavian collators // publicvoid Test4087241() {
RuleBasedCollator c = (RuleBasedCollator) Collator.getInstance(
Locale.of("da", "DK"));
c.setStrength(Collator.SECONDARY);
// CollationKey takes ignorable strings into account when it shouldn't // publicvoid Test4087243() {
RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
c.setStrength(Collator.TERTIARY);
// Mu/micro conflict // Micro symbol and greek lowercase letter Mu should sort identically // publicvoid Test4092260() {
Collator c = Collator.getInstance(Locale.of("el"));
// will only be equal when FULL_DECOMPOSITION is used
c.setDecomposition(Collator.FULL_DECOMPOSITION);
String[] tests = { "\u00B5", "=", "\u03BC",
};
compareArray(c, tests);
}
void Test4095316() {
Collator c = Collator.getInstance(Locale.of("el", "GR"));
c.setStrength(Collator.TERTIARY); // javadocs for RuleBasedCollator clearly specify that characters containing compatability // chars MUST use FULL_DECOMPOSITION to get accurate comparisons.
c.setDecomposition(Collator.FULL_DECOMPOSITION);
String[] tests = { "\u03D4", "=", "\u03AB",
};
compareArray(c, tests);
}
publicvoid Test4101940() { try {
RuleBasedCollator c = new RuleBasedCollator("< a < b");
CollationElementIterator i = c.getCollationElementIterator("");
i.reset();
if (i.next() != i.NULLORDER) {
errln("next did not return NULLORDER");
}
} catch (Exception e) {
errln("Caught " + e );
}
}
// Collator.compare not handling spaces properly // publicvoid Test4103436() {
RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
c.setStrength(Collator.TERTIARY);
// Collation not Unicode conformant with Hangul syllables // publicvoid Test4114076() {
RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
c.setStrength(Collator.TERTIARY);
// // With Canonical decomposition, Hangul syllables should get decomposed // into Jamo, but Jamo characters should not be decomposed into // conjoining Jamo //
c.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
String[] test1 = { "\ud4db", "=", "\u1111\u1171\u11b6",
};
compareArray(c, test1);
// Full decomposition result should be the same as canonical decomposition // for all hangul.
c.setDecomposition(Collator.FULL_DECOMPOSITION);
compareArray(c, test1);
}
// Collator.getCollationKey was hanging on certain character sequences // publicvoid Test4124632() throws Exception {
Collator coll = Collator.getInstance(Locale.JAPAN);
try {
coll.getCollationKey("A\u0308bc");
} catch (OutOfMemoryError e) {
errln("Ran out of memory -- probably an infinite loop");
}
}
// sort order of french words with multiple accents has errors // publicvoid Test4132736() {
Collator c = Collator.getInstance(Locale.FRANCE);
// The sorting using java.text.CollationKey is not in the exact order // publicvoid Test4133509() {
String[] test1 = { "Exception", "<", "ExceptionInInitializerError", "Graphics", "<", "GraphicsEnvironment", "String", "<", "StringBuffer",
};
compareArray(en_us, test1);
}
// Collation with decomposition off doesn't work for Europe // publicvoid Test4114077() { // Ensure that we get the same results with decomposition off // as we do with it on....
RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
c.setStrength(Collator.TERTIARY);
// Support for Swedish gone in 1.1.6 (Can't create Swedish collator) // publicvoid Test4141640() { // // Rather than just creating a Swedish collator, we might as well // try to instantiate one for every locale available on the system // in order to prevent this sort of bug from cropping up in the future //
Locale[] locales = Collator.getAvailableLocales();
for (int i = 0; i < locales.length; i++) { try {
Collator c = Collator.getInstance(locales[i]);
} catch (Exception e) {
errln("Caught " + e + " creating collator for " + locales[i]);
}
}
}
// getCollationKey throws exception for spanish text // Cannot reproduce this bug on 1.2, however it DOES fail on 1.1.6 // publicvoid Test4139572() { // // Code pasted straight from the bug report // // create spanish locale and collator
Locale l = Locale.of("es", "es");
Collator col = Collator.getInstance(l);
// this spanish phrase kills it!
col.getCollationKey("Nombre De Objeto");
}
// RuleBasedCollator doesn't use getCollationElementIterator internally // publicvoid Test4146160() throws ParseException { // // Use a custom collator class whose getCollationElementIterator // methods increment a count.... //
My4146160Collator.count = 0; new My4146160Collator().getCollationKey("1"); if (My4146160Collator.count < 1) {
errln("getCollationElementIterator not called");
}
My4146160Collator.count = 0; new My4146160Collator().compare("1", "2"); if (My4146160Collator.count < 1) {
errln("getCollationElementIterator not called");
}
}
// Create a collator with a few expanding character sequences in it....
RuleBasedCollator coll = new RuleBasedCollator(en_us.getRules()
+ " & ae ; \u00e4 & AE ; \u00c4"
+ " & oe ; \u00f6 & OE ; \u00d6"
+ " & ue ; \u00fc & UE ; \u00dc");
String text = "T\u00f6ne"; // o-umlaut
CollationElementIterator iter = coll.getCollationElementIterator(text);
Vector elements = new Vector(); int elem;
// Iterate forward and collect all of the elements into a Vector while ((elem = iter.next()) != iter.NULLORDER) {
elements.addElement(new Integer(elem));
}
// Now iterate backward and make sure they're the same int index = elements.size() - 1; while ((elem = iter.previous()) != iter.NULLORDER) { int expect = ((Integer)elements.elementAt(index)).intValue();
if (elem != expect) {
errln("Mismatch at index " + index
+ ": got " + Integer.toString(elem,16)
+ ", expected " + Integer.toString(expect,16));
}
index--;
}
}
for (int i = 1; i < testStrings.length; i++) { if (coll.compare(testStrings[i - 1], testStrings[i]) >= 0) {
errln("error: \"" + testStrings[i - 1]
+ "\" is greater than or equal to \"" + testStrings[i]
+ "\".");
}
}
}
publicvoid Test4179216() throws ParseException { // you can position a CollationElementIterator in the middle of // a contracting character sequence, yielding a bogus collation // element
RuleBasedCollator coll = (RuleBasedCollator)Collator.getInstance(Locale.US);
coll = new RuleBasedCollator(coll.getRules()
+ " & C < ch , cH , Ch , CH < cat < crunchy");
String testText = "church church catcatcher runcrunchynchy";
CollationElementIterator iter = coll.getCollationElementIterator(
testText);
// test that the "ch" combination works properly
iter.setOffset(4); int elt4 = CollationElementIterator.primaryOrder(iter.next());
iter.reset(); int elt0 = CollationElementIterator.primaryOrder(iter.next());
iter.setOffset(5); int elt5 = CollationElementIterator.primaryOrder(iter.next());
if (elt4 != elt0 || elt5 != elt0)
errln("The collation elements at positions 0 (" + elt0 + "), 4 ("
+ elt4 + "), and 5 (" + elt5 + ") don't match.");
// test that the "cat" combination works properly
iter.setOffset(14); int elt14 = CollationElementIterator.primaryOrder(iter.next());
iter.setOffset(15); int elt15 = CollationElementIterator.primaryOrder(iter.next());
iter.setOffset(16); int elt16 = CollationElementIterator.primaryOrder(iter.next());
iter.setOffset(17); int elt17 = CollationElementIterator.primaryOrder(iter.next());
iter.setOffset(18); int elt18 = CollationElementIterator.primaryOrder(iter.next());
iter.setOffset(19); int elt19 = CollationElementIterator.primaryOrder(iter.next());
// now generate a complete list of the collation elements, // first using next() and then using setOffset(), and // make sure both interfaces return the same set of elements
iter.reset();
int elt = iter.next(); int count = 0; while (elt != CollationElementIterator.NULLORDER) {
++count;
elt = iter.next();
}
String[] nextElements = new String[count];
String[] setOffsetElements = new String[count]; int lastPos = 0;
iter.reset();
elt = iter.next();
count = 0; while (elt != CollationElementIterator.NULLORDER) {
nextElements[count++] = testText.substring(lastPos, iter.getOffset());
lastPos = iter.getOffset();
elt = iter.next();
}
count = 0; for (int i = 0; i < testText.length(); ) {
iter.setOffset(i);
lastPos = iter.getOffset();
elt = iter.next();
setOffsetElements[count++] = testText.substring(lastPos, iter.getOffset());
i = iter.getOffset();
} for (int i = 0; i < nextElements.length; i++) { if (nextElements[i].equals(setOffsetElements[i])) {
logln(nextElements[i]);
} else {
errln("Error: next() yielded " + nextElements[i] + ", but setOffset() yielded "
+ setOffsetElements[i]);
}
}
}