/* * Copyright (c) 2020, 2022, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2017, 2022 SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions. *
*/
//------------------------------------------------------ // Special String Intrinsics. Implementation //------------------------------------------------------
// Intrinsics for CompactStrings
// Compress char[] to byte[]. // Restores: src, dst // Uses: cnt // Kills: tmp, Z_R0, Z_R1. // Early clobber: result. // Note: // cnt is signed int. Do not rely on high word! // counts # characters, not bytes. // // The result indicates success or failure of the operation. // General compress operation (cut off high order byte which must be all zeroes). // = len - all characters have been successfully compressed. // = 0 - compress failed. At least one character was found with a non-zero high order byte. // This is the failure return value which exactly corresponds to the Java implementation. // 0 <= result < len - compress failed. That many characters were compressed successfully // before the first non-compressable character was found. This is the // current, but not fully compatible, implementation. See below. // Encode to ISO or 7-bit ASCII array. // = len - all characters have been encoded successfully. // < len - encode failed. That many characters were encoded successfully. // When used as an index into the character array, the return value addresses the // first not encodeable character. // // If precise is true, the processing stops exactly at the point where a failure is detected. // More characters than indicated by the return value may have been read from the src array. // Exactly the number of characters indicated by the return value have been written to dst. // If precise is false, a few characters more than indicated by the return value may have been // written to the dst array. In any failure case, The result value indexes the first invalid character. unsignedint C2_MacroAssembler::string_compress(Register result, Register src, Register dst, Register cnt, Register tmp, bool precise, bool toASCII) {
assert_different_registers(Z_R0, Z_R1, result, src, dst, cnt, tmp);
unsignedshort char_mask = 0xff00; // all selected bits must be '0' for a char to be valid unsignedint mask_ix_l = 0; // leftmost one bit pos in mask unsignedint mask_ix_r = 7; // rightmost one bit pos in mask if (precise) { if (toASCII) {
BLOCK_COMMENT("encode_ascii_array {");
char_mask = 0xff80;
mask_ix_r = 8; // rightmost one bit pos in mask. ASCII only uses codes 0..127
} else {
BLOCK_COMMENT("encode_iso_array {");
}
} else {
BLOCK_COMMENT("string_compress {");
assert(!toASCII, "Can't compress strings to 7-bit ASCII");
} int block_start = offset();
Register Rsrc = src; Register Rdst = dst; Register Rix = tmp; Register Rcnt = cnt; Register Rmask = result; // holds incompatibility check mask until result value is stored.
Label ScalarShortcut, AllDone;
bind(skip8Shortcut);
clear_reg(Z_R0, true, false); // #characters already processed (none). Precond for scalar loop.
z_brl(ScalarShortcut); // Just a few characters
bind(skipShortcut);
} #endif
clear_reg(Z_R0); // make sure register is properly initialized.
if (VM_Version::has_VectorFacility()) { constint min_vcnt = 32; // Minimum #characters required to use vector instructions. // Otherwise just do nothing in vector mode. // Must correspond to # vector registers used by implementation, // and must be a power of 2. constint log_min_vcnt = exact_log2(min_vcnt);
Label VectorLoop, VectorDone, VectorBreak;
assert((Vsrc_last->encoding() - Vsrc_first->encoding() + 1) == min_vcnt/8, "logic error");
assert(VM_Version::has_DistinctOpnds(), "Assumption when has_VectorFacility()");
z_srak(Rix, Rcnt, log_min_vcnt); // # vector loop iterations
z_brz(VectorDone); // not enough data for vector loop
z_vzero(Vzero); // all zeroes
z_vgmh(Vmask, mask_ix_l, mask_ix_r); // generate 0xff00/0xff80 mask for all 2-byte elements
z_sllg(Z_R0, Rix, log_min_vcnt); // remember #chars that will be processed by vector loop
//---< check for incompatible character >---
z_vo(Vtmp1, Z_V20, Z_V21);
z_vo(Vtmp2, Z_V22, Z_V23);
z_vo(Vtmp1, Vtmp1, Vtmp2);
z_vn(Vtmp1, Vtmp1, Vmask);
z_vceqhs(Vtmp1, Vtmp1, Vzero); // all bits selected by mask must be zero for successful compress.
z_bvnt(VectorBreak); // break vector loop if not all vector elements compare eq -> incompatible character found. // re-process data from current iteration in break handler.
bind(VectorBreak);
add2reg(Rsrc, -min_vcnt*2); // Fix Rsrc. Rsrc was already updated, but Rdst and Rix are not.
z_sll(Rix, log_min_vcnt); // # chars processed so far in VectorLoop, excl. current iteration.
z_sr(Z_R0, Rix); // correct # chars processed in total.
bind(VectorDone);
}
{ constint min_cnt = 8; // Minimum #characters required to use unrolled loop. // Otherwise just do nothing in unrolled loop. // Must correspond to # registers used by implementation, // and must be a power of 2. constint log_min_cnt = exact_log2(min_cnt);
Label UnrolledLoop, UnrolledDone, UnrolledBreak;
if (VM_Version::has_DistinctOpnds()) {
z_srk(Rix, Rcnt, Z_R0); // remaining # chars to compress in unrolled loop
} else {
z_lr(Rix, Rcnt);
z_sr(Rix, Z_R0);
}
z_sra(Rix, log_min_cnt); // unrolled loop count
z_brz(UnrolledDone);
bind(UnrolledLoop);
z_lmg(Z_R0, Z_R1, 0, Rsrc); if (precise) {
z_ogr(Z_R1, Z_R0); // check all 8 chars for incompatibility
z_ngr(Z_R1, Rmask);
z_brnz(UnrolledBreak);
z_lgfr(Z_R0, Rcnt); // # chars processed in total after unrolled loop.
z_nilf(Z_R0, ~(min_cnt-1));
z_tmll(Rcnt, min_cnt-1);
z_brnaz(ScalarShortcut); // if all bits zero, there is nothing left to do for scalar loop. // Rix == 0 in all cases.
z_sllg(Z_R1, Rcnt, 1); // # src bytes already processed. Only lower 32 bits are valid! // Z_R1 contents must be treated as unsigned operand! For huge strings, // (Rcnt >= 2**30), the value may spill into the sign bit by sllg.
z_lgfr(result, Rcnt); // all characters processed.
z_slgfr(Rdst, Rcnt); // restore ptr
z_slgfr(Rsrc, Z_R1); // restore ptr, double the element count for Rsrc restore
z_bru(AllDone);
bind(UnrolledBreak);
z_lgfr(Z_R0, Rcnt); // # chars processed in total after unrolled loop
z_nilf(Z_R0, ~(min_cnt-1));
z_sll(Rix, log_min_cnt); // # chars not yet processed in UnrolledLoop (due to break), broken iteration not included.
z_sr(Z_R0, Rix); // fix # chars processed OK so far. if (!precise) { // Because we don't need to be precise, we just return the # of characters which have been written. // The first illegal character is in the index range [result-min_cnt/2, result+min_cnt/2).
z_lgfr(result, Z_R0);
z_sllg(Z_R1, Z_R0, 1); // # src bytes already processed. Only lower 32 bits are valid! // Z_R1 contents must be treated as unsigned operand! For huge strings, // (Rcnt >= 2**30), the value may spill into the sign bit by sllg.
z_aghi(result, min_cnt/2); // min_cnt/2 characters have already been written // but ptrs were not updated yet.
z_slgfr(Rdst, Z_R0); // restore ptr
z_slgfr(Rsrc, Z_R1); // restore ptr, double the element count for Rsrc restore
z_bru(AllDone);
}
bind(UnrolledDone);
}
#if 0 // Sacrifice shortcuts for code compactness
{ //---< Special treatment for very short strings (one or two characters) >--- // For these strings, we are sure that the above code was skipped. // Thus, no registers were modified, register restore is not required.
Label ScalarDoit, Scalar2Char;
z_chi(Rcnt, 2);
z_brh(ScalarDoit);
z_llh(Z_R1, 0, Z_R0, Rsrc);
z_bre(Scalar2Char);
z_tmll(Z_R1, char_mask);
z_lghi(result, 0); // cnt == 1, first char invalid, no chars successfully processed
z_brnaz(AllDone);
z_stc(Z_R1, 0, Z_R0, Rdst);
z_lghi(result, 1);
z_bru(AllDone);
assert_different_registers(Z_R0, Z_R1, tmp, src, dst, cnt);
assert(dst->encoding()%2 == 0, "must be even reg");
assert(cnt->encoding()%2 == 1, "must be odd reg");
assert(cnt->encoding() - dst->encoding() == 1, "must be even/odd pair");
StubRoutines::zarch::generate_load_trot_table_addr(this, table); // kills Z_R0 (if ASSERT)
clear_reg(stop_char); // Stop character. Not used here, but initialized to have a defined value.
lgr_if_needed(src_addr, src);
z_llgfr(cnt, cnt); // # src characters, must be a positive simm32.
translate_ot(dst, src_addr, /* mask = */ 0x0001);
BLOCK_COMMENT("} string_inflate");
return offset() - block_start;
}
// Inflate byte[] to char[]. // Restores: src, dst // Uses: cnt // Kills: tmp, Z_R0, Z_R1. // Note: // cnt is signed int. Do not rely on high word! // counts # characters, not bytes. unsignedint C2_MacroAssembler::string_inflate(Register src, Register dst, Register cnt, Register tmp) {
assert_different_registers(Z_R0, Z_R1, src, dst, cnt, tmp);
BLOCK_COMMENT("string_inflate {"); int block_start = offset();
#if 0 // Sacrifice shortcuts for code compactness
{ //---< shortcuts for short strings (very frequent) >---
Label skipShortcut, skip4Shortcut;
z_ltr(Rcnt, Rcnt); // absolutely nothing to do for strings of len == 0.
z_brz(AllDone);
clear_reg(Z_R0); // make sure registers are properly initialized.
clear_reg(Z_R1);
z_chi(Rcnt, 4);
z_brne(skip4Shortcut); // 4 characters are very frequent
z_icm(Z_R0, 5, 0, Rsrc); // Treat exactly 4 characters specially.
z_icm(Z_R1, 5, 2, Rsrc);
z_stm(Z_R0, Z_R1, 0, Rdst);
z_bru(AllDone);
bind(skip4Shortcut);
z_chi(Rcnt, 8);
z_brh(skipShortcut); // There's a lot to do...
z_lgfr(Z_R0, Rcnt); // remaining #characters (<= 8). Precond for scalar loop. // This does not destroy the "register cleared" state of Z_R0.
z_brl(ScalarShortcut); // Just a few characters
z_icmh(Z_R0, 5, 0, Rsrc); // Treat exactly 8 characters specially.
z_icmh(Z_R1, 5, 4, Rsrc);
z_icm(Z_R0, 5, 2, Rsrc);
z_icm(Z_R1, 5, 6, Rsrc);
z_stmg(Z_R0, Z_R1, 0, Rdst);
z_bru(AllDone);
bind(skipShortcut);
} #endif
clear_reg(Z_R0); // make sure register is properly initialized.
if (VM_Version::has_VectorFacility()) { constint min_vcnt = 32; // Minimum #characters required to use vector instructions. // Otherwise just do nothing in vector mode. // Must be multiple of vector register length (16 bytes = 128 bits). constint log_min_vcnt = exact_log2(min_vcnt);
Label VectorLoop, VectorDone;
assert(VM_Version::has_DistinctOpnds(), "Assumption when has_VectorFacility()");
z_srak(Rix, Rcnt, log_min_vcnt); // calculate # vector loop iterations
z_brz(VectorDone); // skip if none
z_sllg(Z_R0, Rix, log_min_vcnt); // remember #chars that will be processed by vector loop
bind(VectorLoop);
z_vlm(Z_V20, Z_V21, 0, Rsrc); // get next 32 characters (single-byte)
add2reg(Rsrc, min_vcnt);
constint min_cnt = 8; // Minimum #characters required to use unrolled scalar loop. // Otherwise just do nothing in unrolled scalar mode. // Must be multiple of 8.
{ constint log_min_cnt = exact_log2(min_cnt);
Label UnrolledLoop, UnrolledDone;
if (VM_Version::has_DistinctOpnds()) {
z_srk(Rix, Rcnt, Z_R0); // remaining # chars to process in unrolled loop
} else {
z_lr(Rix, Rcnt);
z_sr(Rix, Z_R0);
}
z_sra(Rix, log_min_cnt); // unrolled loop count
z_brz(UnrolledDone);
bind(UnrolledDone);
z_lgfr(Z_R0, Rcnt); // # chars left over after unrolled loop.
z_nilf(Z_R0, min_cnt-1);
z_brnz(ScalarShortcut); // if zero, there is nothing left to do for scalar loop. // Rix == 0 in all cases.
z_sgfr(Z_R0, Rcnt); // negative # characters the ptrs have been advanced previously.
z_agr(Rdst, Z_R0); // restore ptr, double the element count for Rdst restore.
z_agr(Rdst, Z_R0);
z_agr(Rsrc, Z_R0); // restore ptr.
z_bru(AllDone);
}
{
bind(ScalarShortcut); // Z_R0 must contain remaining # characters as 64-bit signed int here. // register contents is preserved over scalar processing (for register fixup).
Label CodeTable; // Some comments on Rix calculation: // - Rcnt is small, therefore no bits shifted out of low word (sll(g) instructions). // - high word of both Rix and Rcnt may contain garbage // - the final lngfr takes care of that garbage, extending the sign to high word
z_sllg(Rix, Z_R0, 2); // calculate 10*Rix = (4*Rix + Rix)*2
z_ar(Rix, Z_R0);
z_larl(Z_R1, CodeTable);
z_sll(Rix, 1);
z_lngfr(Rix, Rix); // ix range: [0..7], after inversion & mult: [-(7*12)..(0*12)].
z_bc(Assembler::bcondAlways, 0, Rix, Z_R1);
lgr_if_needed(pos, src); // current position in src array
z_srak(ctr, cnt, log_unroll_factor); // # iterations of unrolled loop
z_brnh(unrolledDone); // array too short for unrolled loop
bind(unrolledLoop);
z_lmg(Z_R0, Z_R1, 0, pos);
z_ogr(Z_R0, Z_R1);
z_ngr(Z_R0, mask);
z_brne(unrolledDone); // There is a negative byte somewhere. // ctr and pos are not updated yet -> // delegate finding correct pos to byteLoop.
add2reg(pos, unroll_factor);
z_brct(ctr, unrolledLoop);
// Once we arrive here, we have to examine at most (unroll_factor - 1) bytes more. // We then either have reached the end of the array or we hit a negative byte.
bind(unrolledDone);
z_sll(ctr, log_unroll_factor); // calculate # bytes not processed by unrolled loop // > 0 only if a negative byte was found
z_lr(Z_R0, cnt); // calculate remainder bytes
z_nilf(Z_R0, unroll_factor - 1);
z_ar(ctr, Z_R0); // remaining bytes
z_brnh(allDone); // shortcut if nothing left to do
bind(byteLoop);
z_cli(0, pos, byte_mask); // unsigned comparison! byte@pos must be smaller that byte_mask
z_brnl(allDone); // negative byte found.
add2reg(pos, 1);
z_brct(ctr, byteLoop);
bind(allDone);
z_srk(ctr, cnt, ctr); // # bytes actually processed (= cnt or index of first negative byte)
z_sgfr(pos, ctr); // restore src
z_lgfr(result, ctr); // unnecessary. Only there to be sure the high word has a defined state.
BLOCK_COMMENT("} count_positives");
return offset() - block_start;
}
// kill: cnt1, cnt2, odd_reg, even_reg; early clobber: result unsignedint C2_MacroAssembler::string_compare(Register str1, Register str2, Register cnt1, Register cnt2, Register odd_reg, Register even_reg, Register result, int ae) { int block_start = offset();
// If strings are equal up to min length, return the length difference. constRegister diff = result, // Pre-set result with length difference.
min = cnt1, // min number of bytes
tmp = cnt2;
// Note: Making use of the fact that compareTo(a, b) == -compareTo(b, a) // we interchange str1 and str2 in the UL case and negate the result. // Like this, str1 is always latin1 encoded, except for the UU case. // In addition, we need 0 (or sign which is 0) extend when using 64 bit register. constbool used_as_LU = (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL);
BLOCK_COMMENT("string_compare {");
if (used_as_LU) {
z_srl(cnt2, 1);
}
// See if the lengths are different, and calculate min in cnt1. // Save diff in case we need it for a tie-breaker.
// diff = cnt1 - cnt2 if (VM_Version::has_DistinctOpnds()) {
z_srk(diff, cnt1, cnt2);
} else {
z_lr(diff, cnt1);
z_sr(diff, cnt2);
} if (str1 != str2) { if (VM_Version::has_LoadStoreConditional()) {
z_locr(min, cnt2, Assembler::bcondHigh);
} else {
Label Lskip;
z_brl(Lskip); // min ok if cnt1 < cnt2
z_lr(min, cnt2); // min = cnt2
bind(Lskip);
}
}
if (ae == StrIntrinsicNode::UU) {
z_sra(diff, 1);
} if (str1 != str2) {
Label Ldone; if (used_as_LU) { // Loop which searches the first difference character by character.
Label Lloop; constRegister ind1 = Z_R1,
ind2 = min; int stride1 = 1, stride2 = 2; // See comment above.
// ind1: index, even_reg: index increment, odd_reg: index limit
z_llilf(ind1, (unsignedint)(-stride1));
z_lhi(even_reg, stride1);
add2reg(odd_reg, -stride1, min);
clear_reg(ind2); // kills min
Label Ldone, Ldone_true, Ldone_false, Lclcle, CLC_template; int base_offset = 0;
if (ary1 != ary2) { if (is_array_equ) {
base_offset = arrayOopDesc::base_offset_in_bytes(is_byte ? T_BYTE : T_CHAR);
// Return true if the same array.
compareU64_and_branch(ary1, ary2, Assembler::bcondEqual, Ldone_true);
// Return false if one of them is NULL.
compareU64_and_branch(ary1, (intptr_t)0, Assembler::bcondEqual, Ldone_false);
compareU64_and_branch(ary2, (intptr_t)0, Assembler::bcondEqual, Ldone_false);
// Load the lengths of arrays.
z_llgf(odd_reg, Address(ary1, arrayOopDesc::length_offset_in_bytes()));
// Return false if the two arrays are not equal length.
z_c(odd_reg, Address(ary2, arrayOopDesc::length_offset_in_bytes()));
z_brne(Ldone_false);
// string len in bytes (right operand) if (!is_byte) {
z_chi(odd_reg, 128);
z_sll(odd_reg, 1); // preserves flags
z_brh(Lclcle);
} else {
compareU32_and_branch(odd_reg, (intptr_t)256, Assembler::bcondHigh, Lclcle);
}
} else {
z_llgfr(odd_reg, limit); // Need to zero-extend prior to using the value.
compareU32_and_branch(limit, (intptr_t)256, Assembler::bcondHigh, Lclcle);
}
// Use clc instruction for up to 256 bytes.
{ Register str1_reg = ary1,
str2_reg = ary2; if (is_array_equ) {
str1_reg = Z_R1;
str2_reg = even_reg;
add2reg(str1_reg, base_offset, ary1); // string addr (left operand)
add2reg(str2_reg, base_offset, ary2); // string addr (right operand)
}
z_ahi(odd_reg, -1); // Clc uses decremented limit. Also compare result to 0.
z_brl(Ldone_true); // Note: We could jump to the template if equal.
assert(VM_Version::has_ExecuteExtensions(), "unsupported hardware");
z_exrl(odd_reg, CLC_template);
z_bre(Ldone_true); // fall through
if (needle == haystack) {
z_lhi(result, 0);
} else {
// Load first character of needle (R0 used by search_string instructions). if (n_csize == 2) { z_llgh(Z_R0, Address(needle)); } else { z_llgc(Z_R0, Address(needle)); }
// Compute last haystack addr to use if no match gets found. if (needlecnt != noreg) { // variable needlecnt
z_ahi(needlecnt, -1); // Remaining characters after first one.
z_sr(haycnt, needlecnt); // Compute index succeeding last element to compare. if (n_csize == 2) { z_sll(needlecnt, 1); } // In bytes.
} else { // constant needlecnt
assert((needlecntval & 0x7fff) == needlecntval, "must be positive simm16 immediate"); // Compute index succeeding last element to compare. if (needlecntval != 1) { z_ahi(haycnt, 1 - needlecntval); }
}
z_llgfr(haycnt, haycnt); // Clear high half.
z_lgr(result, haystack); // Final result will be computed from needle start pointer. if (h_csize == 2) { z_sll(haycnt, 1); } // Scale to number of bytes.
z_agr(haycnt, haystack); // Point to address succeeding last element (haystack+scale*(haycnt-needlecnt+1)).
if (h_csize != n_csize) {
assert(ae == StrIntrinsicNode::UL, "Invalid encoding");
if (needlecnt != noreg || needlecntval != 1) { if (needlecnt != noreg) {
compare32_and_branch(needlecnt, (intptr_t)0, Assembler::bcondEqual, L_needle1);
}
// Main Loop: UL version (now we have at least 2 characters).
Label L_OuterLoop, L_InnerLoop, L_Skip;
bind(L_OuterLoop); // Search for 1st 2 characters.
z_lgr(Z_R1, haycnt);
MacroAssembler::search_string_uni(Z_R1, result);
z_brc(Assembler::bcondNotFound, L_NotFound);
z_lgr(result, Z_R1);
// Main Loop: clc version (now we have at least 2 characters).
Label L_OuterLoop, CLC_template;
bind(L_OuterLoop); // Search for 1st 2 characters.
z_lgr(Z_R1, haycnt); if (h_csize == 1) {
MacroAssembler::search_string(Z_R1, result);
} else {
MacroAssembler::search_string_uni(Z_R1, result);
}
z_brc(Assembler::bcondNotFound, L_NotFound);
z_lgr(result, Z_R1);
if (needlecnt != noreg) {
assert(VM_Version::has_ExecuteExtensions(), "unsupported hardware");
z_exrl(needlecnt, CLC_template);
} else {
z_clc(h_csize, needle_bytes -1, Z_R1, n_csize, needle);
}
z_bre(L_Found);
z_aghi(result, h_csize); // This is the new address we want to use for comparing.
z_bru(L_OuterLoop);
if (needlecnt != noreg || needle_bytes > 256) {
bind(L_clcle);
// Main Loop: clcle version (now we have at least 256 bytes).
Label L_OuterLoop, CLC_template;
bind(L_OuterLoop); // Search for 1st 2 characters.
z_lgr(Z_R1, haycnt); if (h_csize == 1) {
MacroAssembler::search_string(Z_R1, result);
} else {
MacroAssembler::search_string_uni(Z_R1, result);
}
z_brc(Assembler::bcondNotFound, L_NotFound);
if (n_csize == 2) { z_llgh(Z_R0, Address(needle)); } else { z_llgc(Z_R0, Address(needle)); } // Reload.
z_aghi(result, h_csize); // This is the new address we want to use for comparing.
z_bru(L_OuterLoop);
}
}
if (needlecnt != noreg || needlecntval == 1) {
bind(L_needle1);
// Single needle character version. if (h_csize == 1) {
MacroAssembler::search_string(haycnt, result);
} else {
MacroAssembler::search_string_uni(haycnt, result);
}
z_lgr(result, haycnt);
z_brc(Assembler::bcondFound, L_Found);
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung ist noch experimentell.