/* * Copyright (c) 1994, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. Oracle designates this * particular file as subject to the "Classpath" exception as provided * by Oracle in the LICENSE file that accompanied this code. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions.
*/
/** * The {@code String} class represents character strings. All * string literals in Java programs, such as {@code "abc"}, are * implemented as instances of this class. * <p> * Strings are constant; their values cannot be changed after they * are created. String buffers support mutable strings. * Because String objects are immutable they can be shared. For example: * <blockquote><pre> * String str = "abc"; * </pre></blockquote><p> * is equivalent to: * <blockquote><pre> * char data[] = {'a', 'b', 'c'}; * String str = new String(data); * </pre></blockquote><p> * Here are some more examples of how strings can be used: * <blockquote><pre> * System.out.println("abc"); * String cde = "cde"; * System.out.println("abc" + cde); * String c = "abc".substring(2, 3); * String d = cde.substring(1, 2); * </pre></blockquote> * <p> * The class {@code String} includes methods for examining * individual characters of the sequence, for comparing strings, for * searching strings, for extracting substrings, and for creating a * copy of a string with all characters translated to uppercase or to * lowercase. Case mapping is based on the Unicode Standard version * specified by the {@link java.lang.Character Character} class. * <p> * The Java language provides special support for the string * concatenation operator ( + ), and for conversion of * other objects to strings. For additional information on string * concatenation and conversion, see <i>The Java Language Specification</i>. * * <p> Unless otherwise noted, passing a {@code null} argument to a constructor * or method in this class will cause a {@link NullPointerException} to be * thrown. * * <p>A {@code String} represents a string in the UTF-16 format * in which <em>supplementary characters</em> are represented by <em>surrogate * pairs</em> (see the section <a href="Character.html#unicode">Unicode * Character Representations</a> in the {@code Character} class for * more information). * Index values refer to {@code char} code units, so a supplementary * character uses two positions in a {@code String}. * <p>The {@code String} class provides methods for dealing with * Unicode code points (i.e., characters), in addition to those for * dealing with Unicode code units (i.e., {@code char} values). * * <p>Unless otherwise noted, methods for comparing Strings do not take locale * into account. The {@link java.text.Collator} class provides methods for * finer-grain, locale-sensitive String comparison. * * @implNote The implementation of the string concatenation operator is left to * the discretion of a Java compiler, as long as the compiler ultimately conforms * to <i>The Java Language Specification</i>. For example, the {@code javac} compiler * may implement the operator with {@code StringBuffer}, {@code StringBuilder}, * or {@code java.lang.invoke.StringConcatFactory} depending on the JDK version. The * implementation of string conversion is typically through the method {@code toString}, * defined by {@code Object} and inherited by all classes in Java. * * @author Lee Boynton * @author Arthur van Hoff * @author Martin Buchholz * @author Ulf Zibis * @see java.lang.Object#toString() * @see java.lang.StringBuffer * @see java.lang.StringBuilder * @see java.nio.charset.Charset * @since 1.0 * @jls 15.18.1 String Concatenation Operator +
*/
/** * The value is used for character storage. * * @implNote This field is trusted by the VM, and is a subject to * constant folding if String instance is constant. Overwriting this * field after construction will cause problems. * * Additionally, it is marked with {@link Stable} to trust the contents * of the array. No other facility in JDK provides this functionality (yet). * {@link Stable} is safe here, because value is never null.
*/
@Stable privatefinalbyte[] value;
/** * The identifier of the encoding used to encode the bytes in * {@code value}. The supported values in this implementation are * * LATIN1 * UTF16 * * @implNote This field is trusted by the VM, and is a subject to * constant folding if String instance is constant. Overwriting this * field after construction will cause problems.
*/ privatefinalbyte coder;
/** Cache the hash code for the string */ privateint hash; // Default to 0
/** * Cache if the hash has been calculated as actually being zero, enabling * us to avoid recalculating this.
*/ privateboolean hashIsZero; // Default to false;
/** use serialVersionUID from JDK 1.0.2 for interoperability */
@java.io.Serial privatestaticfinallong serialVersionUID = -6849794470754667710L;
/** * If String compaction is disabled, the bytes in {@code value} are * always encoded in UTF16. * * For methods with several possible implementation paths, when String * compaction is disabled, only one code path is taken. * * The instance field value is generally opaque to optimizing JIT * compilers. Therefore, in performance-sensitive place, an explicit * check of the static boolean {@code COMPACT_STRINGS} is done first * before checking the {@code coder} field since the static boolean * {@code COMPACT_STRINGS} would be constant folded away by an * optimizing JIT compiler. The idioms for these cases are as follows. * * For code such as: * * if (coder == LATIN1) { ... } * * can be written more optimally as * * if (coder() == LATIN1) { ... } * * or: * * if (COMPACT_STRINGS && coder == LATIN1) { ... } * * An optimizing JIT compiler can fold the above conditional as: * * COMPACT_STRINGS == true => if (coder == LATIN1) { ... } * COMPACT_STRINGS == false => if (false) { ... } * * @implNote * The actual value for this field is injected by JVM. The static * initialization block is used to set the value here to communicate * that this static final field is not statically foldable, and to * avoid any possible circular dependency during vm initialization.
*/ staticfinalboolean COMPACT_STRINGS;
static {
COMPACT_STRINGS = true;
}
/** * Class String is special cased within the Serialization Stream Protocol. * * A String instance is written into an ObjectOutputStream according to * <a href="{@docRoot}/../specs/serialization/protocol.html#stream-elements"> * <cite>Java Object Serialization Specification</cite>, Section 6.2, "Stream Elements"</a>
*/
@java.io.Serial privatestaticfinal ObjectStreamField[] serialPersistentFields = new ObjectStreamField[0];
/** * Initializes a newly created {@code String} object so that it represents * an empty character sequence. Note that use of this constructor is * unnecessary since Strings are immutable.
*/ public String() { this.value = "".value; this.coder = "".coder;
}
/** * Initializes a newly created {@code String} object so that it represents * the same sequence of characters as the argument; in other words, the * newly created string is a copy of the argument string. Unless an * explicit copy of {@code original} is needed, use of this constructor is * unnecessary since Strings are immutable. * * @param original * A {@code String}
*/
@IntrinsicCandidate public String(String original) { this.value = original.value; this.coder = original.coder; this.hash = original.hash; this.hashIsZero = original.hashIsZero;
}
/** * Allocates a new {@code String} so that it represents the sequence of * characters currently contained in the character array argument. The * contents of the character array are copied; subsequent modification of * the character array does not affect the newly created string. * * @param value * The initial value of the string
*/ public String(char[] value) { this(value, 0, value.length, null);
}
/** * Allocates a new {@code String} that contains characters from a subarray * of the character array argument. The {@code offset} argument is the * index of the first character of the subarray and the {@code count} * argument specifies the length of the subarray. The contents of the * subarray are copied; subsequent modification of the character array does * not affect the newly created string. * * @param value * Array that is the source of characters * * @param offset * The initial offset * * @param count * The length * * @throws IndexOutOfBoundsException * If {@code offset} is negative, {@code count} is negative, or * {@code offset} is greater than {@code value.length - count}
*/ public String(char[] value, int offset, int count) { this(value, offset, count, rangeCheck(value, offset, count));
}
privatestaticVoid rangeCheck(char[] value, int offset, int count) {
checkBoundsOffCount(offset, count, value.length); returnnull;
}
/** * Allocates a new {@code String} that contains characters from a subarray * of the <a href="Character.html#unicode">Unicode code point</a> array * argument. The {@code offset} argument is the index of the first code * point of the subarray and the {@code count} argument specifies the * length of the subarray. The contents of the subarray are converted to * {@code char}s; subsequent modification of the {@code int} array does not * affect the newly created string. * * @param codePoints * Array that is the source of Unicode code points * * @param offset * The initial offset * * @param count * The length * * @throws IllegalArgumentException * If any invalid Unicode code point is found in {@code * codePoints} * * @throws IndexOutOfBoundsException * If {@code offset} is negative, {@code count} is negative, or * {@code offset} is greater than {@code codePoints.length - count} * * @since 1.5
*/ public String(int[] codePoints, int offset, int count) {
checkBoundsOffCount(offset, count, codePoints.length); if (count == 0) { this.value = "".value; this.coder = "".coder; return;
} if (COMPACT_STRINGS) { byte[] val = StringLatin1.toBytes(codePoints, offset, count); if (val != null) { this.coder = LATIN1; this.value = val; return;
}
} this.coder = UTF16; this.value = StringUTF16.toBytes(codePoints, offset, count);
}
/** * Allocates a new {@code String} constructed from a subarray of an array * of 8-bit integer values. * * <p> The {@code offset} argument is the index of the first byte of the * subarray, and the {@code count} argument specifies the length of the * subarray. * * <p> Each {@code byte} in the subarray is converted to a {@code char} as * specified in the {@link #String(byte[],int) String(byte[],int)} constructor. * * @deprecated This method does not properly convert bytes into characters. * As of JDK 1.1, the preferred way to do this is via the * {@code String} constructors that take a {@link Charset}, charset name, * or that use the {@link Charset#defaultCharset() default charset}. * * @param ascii * The bytes to be converted to characters * * @param hibyte * The top 8 bits of each 16-bit Unicode code unit * * @param offset * The initial offset * @param count * The length * * @throws IndexOutOfBoundsException * If {@code offset} is negative, {@code count} is negative, or * {@code offset} is greater than {@code ascii.length - count} * * @see #String(byte[], int) * @see #String(byte[], int, int, java.lang.String) * @see #String(byte[], int, int, java.nio.charset.Charset) * @see #String(byte[], int, int) * @see #String(byte[], java.lang.String) * @see #String(byte[], java.nio.charset.Charset) * @see #String(byte[])
*/
@Deprecated(since="1.1") public String(byte[] ascii, int hibyte, int offset, int count) {
checkBoundsOffCount(offset, count, ascii.length); if (count == 0) { this.value = "".value; this.coder = "".coder; return;
} if (COMPACT_STRINGS && (byte)hibyte == 0) { this.value = Arrays.copyOfRange(ascii, offset, offset + count); this.coder = LATIN1;
} else {
hibyte <<= 8; byte[] val = StringUTF16.newBytesFor(count); for (int i = 0; i < count; i++) {
StringUTF16.putChar(val, i, hibyte | (ascii[offset++] & 0xff));
} this.value = val; this.coder = UTF16;
}
}
/** * Allocates a new {@code String} containing characters constructed from * an array of 8-bit integer values. Each character <i>c</i> in the * resulting string is constructed from the corresponding component * <i>b</i> in the byte array such that: * * <blockquote><pre> * <b><i>c</i></b> == (char)(((hibyte & 0xff) << 8) * | (<b><i>b</i></b> & 0xff)) * </pre></blockquote> * * @deprecated This method does not properly convert bytes into * characters. As of JDK 1.1, the preferred way to do this is via the * {@code String} constructors that take a {@link Charset}, charset name, * or that use the {@link Charset#defaultCharset() default charset}. * * @param ascii * The bytes to be converted to characters * * @param hibyte * The top 8 bits of each 16-bit Unicode code unit * * @see #String(byte[], int, int, java.lang.String) * @see #String(byte[], int, int, java.nio.charset.Charset) * @see #String(byte[], int, int) * @see #String(byte[], java.lang.String) * @see #String(byte[], java.nio.charset.Charset) * @see #String(byte[])
*/
@Deprecated(since="1.1") public String(byte[] ascii, int hibyte) { this(ascii, hibyte, 0, ascii.length);
}
/** * Constructs a new {@code String} by decoding the specified subarray of * bytes using the specified charset. The length of the new {@code String} * is a function of the charset, and hence may not be equal to the length * of the subarray. * * <p> The behavior of this constructor when the given bytes are not valid * in the given charset is unspecified. The {@link * java.nio.charset.CharsetDecoder} class should be used when more control * over the decoding process is required. * * @param bytes * The bytes to be decoded into characters * * @param offset * The index of the first byte to decode * * @param length * The number of bytes to decode * * @param charsetName * The name of a supported {@linkplain java.nio.charset.Charset * charset} * * @throws UnsupportedEncodingException * If the named charset is not supported * * @throws IndexOutOfBoundsException * If {@code offset} is negative, {@code length} is negative, or * {@code offset} is greater than {@code bytes.length - length} * * @since 1.1
*/ public String(byte[] bytes, int offset, int length, String charsetName) throws UnsupportedEncodingException { this(lookupCharset(charsetName), bytes, checkBoundsOffCount(offset, length, bytes.length), length);
}
/** * Constructs a new {@code String} by decoding the specified subarray of * bytes using the specified {@linkplain java.nio.charset.Charset charset}. * The length of the new {@code String} is a function of the charset, and * hence may not be equal to the length of the subarray. * * <p> This method always replaces malformed-input and unmappable-character * sequences with this charset's default replacement string. The {@link * java.nio.charset.CharsetDecoder} class should be used when more control * over the decoding process is required. * * @param bytes * The bytes to be decoded into characters * * @param offset * The index of the first byte to decode * * @param length * The number of bytes to decode * * @param charset * The {@linkplain java.nio.charset.Charset charset} to be used to * decode the {@code bytes} * * @throws IndexOutOfBoundsException * If {@code offset} is negative, {@code length} is negative, or * {@code offset} is greater than {@code bytes.length - length} * * @since 1.6
*/ public String(byte[] bytes, int offset, int length, Charset charset) { this(Objects.requireNonNull(charset), bytes, checkBoundsOffCount(offset, length, bytes.length), length);
}
/** * This method does not do any precondition checks on its arguments. * <p> * Important: parameter order of this method is deliberately changed in order to * disambiguate it against other similar methods of this class.
*/
@SuppressWarnings("removal") private String(Charset charset, byte[] bytes, int offset, int length) { if (length == 0) { this.value = "".value; this.coder = "".coder;
} elseif (charset == UTF_8.INSTANCE) { if (COMPACT_STRINGS) { int dp = StringCoding.countPositives(bytes, offset, length); if (dp == length) { this.value = Arrays.copyOfRange(bytes, offset, offset + length); this.coder = LATIN1; return;
} int sl = offset + length; byte[] dst = newbyte[length]; if (dp > 0) {
System.arraycopy(bytes, offset, dst, 0, dp);
offset += dp;
} while (offset < sl) { int b1 = bytes[offset++]; if (b1 >= 0) {
dst[dp++] = (byte)b1; continue;
} if ((b1 & 0xfe) == 0xc2 && offset < sl) { // b1 either 0xc2 or 0xc3 int b2 = bytes[offset]; if (b2 < -64) { // continuation bytes are always negative values in the range -128 to -65
dst[dp++] = (byte)decode2(b1, b2);
offset++; continue;
}
} // anything not a latin1, including the REPL // we have to go with the utf16
offset--; break;
} if (offset == sl) { if (dp != dst.length) {
dst = Arrays.copyOf(dst, dp);
} this.value = dst; this.coder = LATIN1; return;
} byte[] buf = newbyte[length << 1];
StringLatin1.inflate(dst, 0, buf, 0, dp);
dst = buf;
dp = decodeUTF8_UTF16(bytes, offset, sl, dst, dp, true); if (dp != length) {
dst = Arrays.copyOf(dst, dp << 1);
} this.value = dst; this.coder = UTF16;
} else { // !COMPACT_STRINGS byte[] dst = newbyte[length << 1]; int dp = decodeUTF8_UTF16(bytes, offset, offset + length, dst, 0, true); if (dp != length) {
dst = Arrays.copyOf(dst, dp << 1);
} this.value = dst; this.coder = UTF16;
}
} elseif (charset == ISO_8859_1.INSTANCE) { if (COMPACT_STRINGS) { this.value = Arrays.copyOfRange(bytes, offset, offset + length); this.coder = LATIN1;
} else { this.value = StringLatin1.inflate(bytes, offset, length); this.coder = UTF16;
}
} elseif (charset == US_ASCII.INSTANCE) { if (COMPACT_STRINGS && !StringCoding.hasNegatives(bytes, offset, length)) { this.value = Arrays.copyOfRange(bytes, offset, offset + length); this.coder = LATIN1;
} else { byte[] dst = newbyte[length << 1]; int dp = 0; while (dp < length) { int b = bytes[offset++];
StringUTF16.putChar(dst, dp++, (b >= 0) ? (char) b : REPL);
} this.value = dst; this.coder = UTF16;
}
} else { // (1)We never cache the "external" cs, the only benefit of creating // an additional StringDe/Encoder object to wrap it is to share the // de/encode() method. These SD/E objects are short-lived, the young-gen // gc should be able to take care of them well. But the best approach // is still not to generate them if not really necessary. // (2)The defensive copy of the input byte/char[] has a big performance // impact, as well as the outgoing result byte/char[]. Need to do the // optimization check of (sm==null && classLoader0==null) for both.
CharsetDecoder cd = charset.newDecoder(); // ArrayDecoder fastpaths if (cd instanceof ArrayDecoder ad) { // ascii if (ad.isASCIICompatible() && !StringCoding.hasNegatives(bytes, offset, length)) { if (COMPACT_STRINGS) { this.value = Arrays.copyOfRange(bytes, offset, offset + length); this.coder = LATIN1; return;
} this.value = StringLatin1.inflate(bytes, offset, length); this.coder = UTF16; return;
}
// fastpath for always Latin1 decodable single byte if (COMPACT_STRINGS && ad.isLatin1Decodable()) { byte[] dst = newbyte[length];
ad.decodeToLatin1(bytes, offset, length, dst); this.value = dst; this.coder = LATIN1; return;
}
int en = scale(length, cd.maxCharsPerByte());
cd.onMalformedInput(CodingErrorAction.REPLACE)
.onUnmappableCharacter(CodingErrorAction.REPLACE); char[] ca = newchar[en]; int clen = ad.decode(bytes, offset, length, ca); if (COMPACT_STRINGS) { byte[] bs = StringUTF16.compress(ca, 0, clen); if (bs != null) {
value = bs;
coder = LATIN1; return;
}
}
coder = UTF16;
value = StringUTF16.toBytes(ca, 0, clen); return;
}
// decode using CharsetDecoder int en = scale(length, cd.maxCharsPerByte());
cd.onMalformedInput(CodingErrorAction.REPLACE)
.onUnmappableCharacter(CodingErrorAction.REPLACE); char[] ca = newchar[en]; if (charset.getClass().getClassLoader0() != null &&
System.getSecurityManager() != null) {
bytes = Arrays.copyOfRange(bytes, offset, offset + length);
offset = 0;
}
int caLen; try {
caLen = decodeWithDecoder(cd, ca, bytes, offset, length);
} catch (CharacterCodingException x) { // Substitution is enabled, so this shouldn't happen thrownew Error(x);
} if (COMPACT_STRINGS) { byte[] bs = StringUTF16.compress(ca, 0, caLen); if (bs != null) {
value = bs;
coder = LATIN1; return;
}
}
coder = UTF16;
value = StringUTF16.toBytes(ca, 0, caLen);
}
}
/* * Throws iae, instead of replacing, if malformed or unmappable.
*/ static String newStringUTF8NoRepl(byte[] bytes, int offset, int length) {
checkBoundsOffCount(offset, length, bytes.length); if (length == 0) { return"";
} int dp; byte[] dst; if (COMPACT_STRINGS) {
dp = StringCoding.countPositives(bytes, offset, length); int sl = offset + length; if (dp == length) { returnnew String(Arrays.copyOfRange(bytes, offset, offset + length), LATIN1);
}
dst = newbyte[length];
System.arraycopy(bytes, offset, dst, 0, dp);
offset += dp; while (offset < sl) { int b1 = bytes[offset++]; if (b1 >= 0) {
dst[dp++] = (byte)b1; continue;
} if ((b1 & 0xfe) == 0xc2 && offset < sl) { // b1 either 0xc2 or 0xc3 int b2 = bytes[offset]; if (b2 < -64) { // continuation bytes are always negative values in the range -128 to -65
dst[dp++] = (byte)decode2(b1, b2);
offset++; continue;
}
} // anything not a latin1, including the REPL // we have to go with the utf16
offset--; break;
} if (offset == sl) { if (dp != dst.length) {
dst = Arrays.copyOf(dst, dp);
} returnnew String(dst, LATIN1);
} if (dp == 0) {
dst = newbyte[length << 1];
} else { byte[] buf = newbyte[length << 1];
StringLatin1.inflate(dst, 0, buf, 0, dp);
dst = buf;
}
dp = decodeUTF8_UTF16(bytes, offset, sl, dst, dp, false);
} else { // !COMPACT_STRINGS
dst = newbyte[length << 1];
dp = decodeUTF8_UTF16(bytes, offset, offset + length, dst, 0, false);
} if (dp != length) {
dst = Arrays.copyOf(dst, dp << 1);
} returnnew String(dst, UTF16);
}
static String newStringNoRepl(byte[] src, Charset cs) throws CharacterCodingException { try { return newStringNoRepl1(src, cs);
} catch (IllegalArgumentException e) { //newStringNoRepl1 throws IAE with MalformedInputException or CCE as the cause
Throwable cause = e.getCause(); if (cause instanceof MalformedInputException mie) { throw mie;
} throw (CharacterCodingException)cause;
}
}
@SuppressWarnings("removal") privatestatic String newStringNoRepl1(byte[] src, Charset cs) { int len = src.length; if (len == 0) { return"";
} if (cs == UTF_8.INSTANCE) { return newStringUTF8NoRepl(src, 0, src.length);
} if (cs == ISO_8859_1.INSTANCE) { if (COMPACT_STRINGS) returnnew String(src, LATIN1); returnnew String(StringLatin1.inflate(src, 0, src.length), UTF16);
} if (cs == US_ASCII.INSTANCE) { if (!StringCoding.hasNegatives(src, 0, src.length)) { if (COMPACT_STRINGS) returnnew String(src, LATIN1); returnnew String(StringLatin1.inflate(src, 0, src.length), UTF16);
} else {
throwMalformed(src);
}
}
CharsetDecoder cd = cs.newDecoder(); // ascii fastpath if (cd instanceof ArrayDecoder ad &&
ad.isASCIICompatible() &&
!StringCoding.hasNegatives(src, 0, src.length)) { returnnew String(src, 0, src.length, ISO_8859_1.INSTANCE);
} int en = scale(len, cd.maxCharsPerByte()); char[] ca = newchar[en]; if (cs.getClass().getClassLoader0() != null &&
System.getSecurityManager() != null) {
src = Arrays.copyOf(src, len);
} int caLen; try {
caLen = decodeWithDecoder(cd, ca, src, 0, src.length);
} catch (CharacterCodingException x) { // throw via IAE thrownew IllegalArgumentException(x);
} if (COMPACT_STRINGS) { byte[] bs = StringUTF16.compress(ca, 0, caLen); if (bs != null) { returnnew String(bs, LATIN1);
}
} returnnew String(StringUTF16.toBytes(ca, 0, caLen), UTF16);
}
privatestaticfinalchar REPL = '\ufffd';
// Trim the given byte array to the given length
@SuppressWarnings("removal") privatestaticbyte[] safeTrim(byte[] ba, int len, boolean isTrusted) { if (len == ba.length && (isTrusted || System.getSecurityManager() == null)) { return ba;
} else { return Arrays.copyOf(ba, len);
}
}
privatestaticint scale(int len, float expansionFactor) { // We need to perform double, not float, arithmetic; otherwise // we lose low order bits when len is larger than 2**24. return (int)(len * (double)expansionFactor);
}
/** * Decodes ASCII from the source byte array into the destination * char array. Used via JavaLangAccess from UTF_8 and other charset * decoders. * * @return the number of bytes successfully decoded, at most len
*/ /* package-private */ staticint decodeASCII(byte[] sa, int sp, char[] da, int dp, int len) { int count = StringCoding.countPositives(sa, sp, len); while (count < len) { if (sa[sp + count] < 0) { break;
}
count++;
}
StringLatin1.inflate(sa, sp, da, dp, count); return count;
}
/** * Constructs a new {@code String} by decoding the specified array of bytes * using the specified {@linkplain java.nio.charset.Charset charset}. The * length of the new {@code String} is a function of the charset, and hence * may not be equal to the length of the byte array. * * <p> The behavior of this constructor when the given bytes are not valid * in the given charset is unspecified. The {@link * java.nio.charset.CharsetDecoder} class should be used when more control * over the decoding process is required. * * @param bytes * The bytes to be decoded into characters * * @param charsetName * The name of a supported {@linkplain java.nio.charset.Charset * charset} * * @throws UnsupportedEncodingException * If the named charset is not supported * * @since 1.1
*/ public String(byte[] bytes, String charsetName) throws UnsupportedEncodingException { this(lookupCharset(charsetName), bytes, 0, bytes.length);
}
/** * Constructs a new {@code String} by decoding the specified array of * bytes using the specified {@linkplain java.nio.charset.Charset charset}. * The length of the new {@code String} is a function of the charset, and * hence may not be equal to the length of the byte array. * * <p> This method always replaces malformed-input and unmappable-character * sequences with this charset's default replacement string. The {@link * java.nio.charset.CharsetDecoder} class should be used when more control * over the decoding process is required. * * @param bytes * The bytes to be decoded into characters * * @param charset * The {@linkplain java.nio.charset.Charset charset} to be used to * decode the {@code bytes} * * @since 1.6
*/ public String(byte[] bytes, Charset charset) { this(Objects.requireNonNull(charset), bytes, 0, bytes.length);
}
/** * Constructs a new {@code String} by decoding the specified subarray of * bytes using the {@link Charset#defaultCharset() default charset}. * The length of the new {@code String} is a function of the charset, * and hence may not be equal to the length of the subarray. * * <p> The behavior of this constructor when the given bytes are not valid * in the default charset is unspecified. The {@link * java.nio.charset.CharsetDecoder} class should be used when more control * over the decoding process is required. * * @param bytes * The bytes to be decoded into characters * * @param offset * The index of the first byte to decode * * @param length * The number of bytes to decode * * @throws IndexOutOfBoundsException * If {@code offset} is negative, {@code length} is negative, or * {@code offset} is greater than {@code bytes.length - length} * * @since 1.1
*/ public String(byte[] bytes, int offset, int length) { this(Charset.defaultCharset(), bytes, checkBoundsOffCount(offset, length, bytes.length), length);
}
/** * Constructs a new {@code String} by decoding the specified array of bytes * using the {@link Charset#defaultCharset() default charset}. The length * of the new {@code String} is a function of the charset, and hence may not * be equal to the length of the byte array. * * <p> The behavior of this constructor when the given bytes are not valid * in the default charset is unspecified. The {@link * java.nio.charset.CharsetDecoder} class should be used when more control * over the decoding process is required. * * @param bytes * The bytes to be decoded into characters * * @since 1.1
*/ public String(byte[] bytes) { this(Charset.defaultCharset(), bytes, 0, bytes.length);
}
/** * Allocates a new string that contains the sequence of characters * currently contained in the string buffer argument. The contents of the * string buffer are copied; subsequent modification of the string buffer * does not affect the newly created string. * * @param buffer * A {@code StringBuffer}
*/ public String(StringBuffer buffer) { this(buffer.toString());
}
/** * Allocates a new string that contains the sequence of characters * currently contained in the string builder argument. The contents of the * string builder are copied; subsequent modification of the string builder * does not affect the newly created string. * * <p> This constructor is provided to ease migration to {@code * StringBuilder}. Obtaining a string from a string builder via the {@code * toString} method is likely to run faster and is generally preferred. * * @param builder * A {@code StringBuilder} * * @since 1.5
*/ public String(StringBuilder builder) { this(builder, null);
}
/** * Returns the length of this string. * The length is equal to the number of <a href="Character.html#unicode">Unicode * code units</a> in the string. * * @return the length of the sequence of characters represented by this * object.
*/ publicint length() { return value.length >> coder();
}
/** * Returns {@code true} if, and only if, {@link #length()} is {@code 0}. * * @return {@code true} if {@link #length()} is {@code 0}, otherwise * {@code false} * * @since 1.6
*/
@Override publicboolean isEmpty() { return value.length == 0;
}
/** * Returns the {@code char} value at the * specified index. An index ranges from {@code 0} to * {@code length() - 1}. The first {@code char} value of the sequence * is at index {@code 0}, the next at index {@code 1}, * and so on, as for array indexing. * * <p>If the {@code char} value specified by the index is a * <a href="Character.html#unicode">surrogate</a>, the surrogate * value is returned. * * @param index the index of the {@code char} value. * @return the {@code char} value at the specified index of this string. * The first {@code char} value is at index {@code 0}. * @throws IndexOutOfBoundsException if the {@code index} * argument is negative or not less than the length of this * string.
*/ publicchar charAt(int index) { if (isLatin1()) { return StringLatin1.charAt(value, index);
} else { return StringUTF16.charAt(value, index);
}
}
/** * Returns the character (Unicode code point) at the specified * index. The index refers to {@code char} values * (Unicode code units) and ranges from {@code 0} to * {@link #length()}{@code - 1}. * * <p> If the {@code char} value specified at the given index * is in the high-surrogate range, the following index is less * than the length of this {@code String}, and the * {@code char} value at the following index is in the * low-surrogate range, then the supplementary code point * corresponding to this surrogate pair is returned. Otherwise, * the {@code char} value at the given index is returned. * * @param index the index to the {@code char} values * @return the code point value of the character at the * {@code index} * @throws IndexOutOfBoundsException if the {@code index} * argument is negative or not less than the length of this * string. * @since 1.5
*/ publicint codePointAt(int index) { if (isLatin1()) {
checkIndex(index, value.length); return value[index] & 0xff;
} int length = value.length >> 1;
checkIndex(index, length); return StringUTF16.codePointAt(value, index, length);
}
/** * Returns the character (Unicode code point) before the specified * index. The index refers to {@code char} values * (Unicode code units) and ranges from {@code 1} to {@link * CharSequence#length() length}. * * <p> If the {@code char} value at {@code (index - 1)} * is in the low-surrogate range, {@code (index - 2)} is not * negative, and the {@code char} value at {@code (index - * 2)} is in the high-surrogate range, then the * supplementary code point value of the surrogate pair is * returned. If the {@code char} value at {@code index - * 1} is an unpaired low-surrogate or a high-surrogate, the * surrogate value is returned. * * @param index the index following the code point that should be returned * @return the Unicode code point value before the given index. * @throws IndexOutOfBoundsException if the {@code index} * argument is less than 1 or greater than the length * of this string. * @since 1.5
*/ publicint codePointBefore(int index) { int i = index - 1;
checkIndex(i, length()); if (isLatin1()) { return (value[i] & 0xff);
} return StringUTF16.codePointBefore(value, index);
}
/** * Returns the number of Unicode code points in the specified text * range of this {@code String}. The text range begins at the * specified {@code beginIndex} and extends to the * {@code char} at index {@code endIndex - 1}. Thus the * length (in {@code char}s) of the text range is * {@code endIndex-beginIndex}. Unpaired surrogates within * the text range count as one code point each. * * @param beginIndex the index to the first {@code char} of * the text range. * @param endIndex the index after the last {@code char} of * the text range. * @return the number of Unicode code points in the specified text * range * @throws IndexOutOfBoundsException if the * {@code beginIndex} is negative, or {@code endIndex} * is larger than the length of this {@code String}, or * {@code beginIndex} is larger than {@code endIndex}. * @since 1.5
*/ publicint codePointCount(int beginIndex, int endIndex) {
Objects.checkFromToIndex(beginIndex, endIndex, length()); if (isLatin1()) { return endIndex - beginIndex;
} return StringUTF16.codePointCount(value, beginIndex, endIndex);
}
/** * Returns the index within this {@code String} that is * offset from the given {@code index} by * {@code codePointOffset} code points. Unpaired surrogates * within the text range given by {@code index} and * {@code codePointOffset} count as one code point each. * * @param index the index to be offset * @param codePointOffset the offset in code points * @return the index within this {@code String} * @throws IndexOutOfBoundsException if {@code index} * is negative or larger than the length of this * {@code String}, or if {@code codePointOffset} is positive * and the substring starting with {@code index} has fewer * than {@code codePointOffset} code points, * or if {@code codePointOffset} is negative and the substring * before {@code index} has fewer than the absolute value * of {@code codePointOffset} code points. * @since 1.5
*/ publicint offsetByCodePoints(int index, int codePointOffset) { return Character.offsetByCodePoints(this, index, codePointOffset);
}
/** * Copies characters from this string into the destination character * array. * <p> * The first character to be copied is at index {@code srcBegin}; * the last character to be copied is at index {@code srcEnd-1} * (thus the total number of characters to be copied is * {@code srcEnd-srcBegin}). The characters are copied into the * subarray of {@code dst} starting at index {@code dstBegin} * and ending at index: * <blockquote><pre> * dstBegin + (srcEnd-srcBegin) - 1 * </pre></blockquote> * * @param srcBegin index of the first character in the string * to copy. * @param srcEnd index after the last character in the string * to copy. * @param dst the destination array. * @param dstBegin the start offset in the destination array. * @throws IndexOutOfBoundsException If any of the following * is true: * <ul><li>{@code srcBegin} is negative. * <li>{@code srcBegin} is greater than {@code srcEnd} * <li>{@code srcEnd} is greater than the length of this * string * <li>{@code dstBegin} is negative * <li>{@code dstBegin+(srcEnd-srcBegin)} is larger than * {@code dst.length}</ul>
*/ publicvoid getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin) {
checkBoundsBeginEnd(srcBegin, srcEnd, length());
checkBoundsOffCount(dstBegin, srcEnd - srcBegin, dst.length); if (isLatin1()) {
StringLatin1.getChars(value, srcBegin, srcEnd, dst, dstBegin);
} else {
StringUTF16.getChars(value, srcBegin, srcEnd, dst, dstBegin);
}
}
/** * Copies characters from this string into the destination byte array. Each * byte receives the 8 low-order bits of the corresponding character. The * eight high-order bits of each character are not copied and do not * participate in the transfer in any way. * * <p> The first character to be copied is at index {@code srcBegin}; the
--> --------------------
--> maximum size reached
--> --------------------
Messung V0.5
¤ Dauer der Verarbeitung: 0.25 Sekunden
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.