/* * Copyright (c) 2008, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions. *
*/
// At top of Java expression stack which may be different than SP. // It isn't for category 1 objects. staticinline Address at_tos() { return Address(Rstack_top, Interpreter::expr_offset_in_bytes(0));
}
// Loads double/long local into R0_tos_lo/R1_tos_hi with two // separate ldr instructions (supports nonadjacent values). // Used for longs in all modes, and for doubles in SOFTFP mode. void TemplateTable::load_category2_local(Register Rlocal_index, Register tmp) { constRegister Rlocal_base = tmp;
assert_different_registers(Rlocal_index, tmp);
// Stores R0_tos_lo/R1_tos_hi to double/long local with two // separate str instructions (supports nonadjacent values). // Used for longs in all modes, and for doubles in SOFTFP mode void TemplateTable::store_category2_local(Register Rlocal_index, Register tmp) { constRegister Rlocal_base = tmp;
assert_different_registers(Rlocal_index, tmp);
switch (bc) { case Bytecodes::_fast_aputfield: case Bytecodes::_fast_bputfield: case Bytecodes::_fast_zputfield: case Bytecodes::_fast_cputfield: case Bytecodes::_fast_dputfield: case Bytecodes::_fast_fputfield: case Bytecodes::_fast_iputfield: case Bytecodes::_fast_lputfield: case Bytecodes::_fast_sputfield:
{ // We skip bytecode quickening for putfield instructions when // the put_code written to the constant pool cache is zero. // This is required so that every execution of this instruction // calls out to InterpreterRuntime::resolve_get_put to do // additional, required work.
assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
assert(load_bc_into_bc_reg, "we use bc_reg as temp");
__ get_cache_and_index_and_bytecode_at_bcp(bc_reg, temp_reg, temp_reg, byte_no, 1, sizeof(u2));
__ mov(bc_reg, bc);
__ cbz(temp_reg, L_patch_done); // test if bytecode is zero
} break; default:
assert(byte_no == -1, "sanity"); // the pair bytecodes have already done the load. if (load_bc_into_bc_reg) {
__ mov(bc_reg, bc);
}
}
if (__ can_post_breakpoint()) {
Label L_fast_patch; // if a breakpoint is present we can't rewrite the stream directly
__ ldrb(temp_reg, at_bcp(0));
__ cmp(temp_reg, Bytecodes::_breakpoint);
__ b(L_fast_patch, ne); if (bc_reg != R3) {
__ mov(R3, bc_reg);
}
__ mov(R1, Rmethod);
__ mov(R2, Rbcp); // Let breakpoint table handling rewrite to quicker bytecode
__ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::set_original_bytecode_at), R1, R2, R3);
__ b(L_patch_done);
__ bind(L_fast_patch);
}
void TemplateTable::dconst(int value) {
transition(vtos, dtos); constint one_lo = 0; // low part of 1.0 constint one_hi = 0x3ff00000; // high part of 1.0
// get const type
__ add(Rtemp, Rtags, tags_offset);
__ ldrb(RtagType, Address(Rtemp, Rindex));
volatile_barrier(MacroAssembler::LoadLoad, Rtemp);
// unresolved class - get the resolved class
__ cmp(RtagType, JVM_CONSTANT_UnresolvedClass);
// unresolved class in error (resolution failed) - call into runtime // so that the same error from first resolution attempt is thrown.
__ cond_cmp(RtagType, JVM_CONSTANT_UnresolvedClassInError, ne);
// resolved class - need to call vm to get java mirror of the class
__ cond_cmp(RtagType, JVM_CONSTANT_Class, ne);
// first time invocation - must resolve first
__ mov(R1, (int)bytecode());
__ call_VM(R0_tos, entry, R1);
__ bind(resolved);
{ // Check for the null sentinel. // If we just called the VM, that already did the mapping for us, // but it's harmless to retry.
Label notNull; Register result = R0; Register tmp = R1; Register rarg = R2;
// Stash null_sentinel address to get its value later
__ mov_slow(rarg, (uintptr_t)Universe::the_null_sentinel_addr());
__ ldr(tmp, Address(rarg));
__ resolve_oop_handle(tmp);
__ cmp(result, tmp);
__ b(notNull, ne);
__ mov(result, 0); // NULL object reference
__ bind(notNull);
}
__ logical_shift_right(flags, flags, ConstantPoolCacheEntry::tos_state_shift); // Make sure we don't need to mask flags after the above shift
ConstantPoolCacheEntry::verify_tos_state_shift();
switch (bytecode()) { case Bytecodes::_ldc: case Bytecodes::_ldc_w:
{ // tos in (itos, ftos, stos, btos, ctos, ztos)
Label notIntFloat, notShort, notByte, notChar, notBool;
__ cmp(flags, itos);
__ cond_cmp(flags, ftos, ne);
__ b(notIntFloat, ne);
__ ldr(R0_tos, field);
__ push(itos);
__ b(Done);
// get next byte
__ ldrb(next_bytecode, at_bcp(Bytecodes::length_for(Bytecodes::_iload))); // if _iload, wait to rewrite to iload2. We only want to rewrite the // last two iloads in a pair. Comparing against fast_iload means that // the next bytecode is neither an iload or a caload, and therefore // an iload pair.
__ cmp(next_bytecode, Bytecodes::_iload);
__ b(done, eq);
// Get the local value into tos constRegister Rlocal_index = R1_tmp;
locals_index(Rlocal_index);
Address local = load_iaddress(Rlocal_index, Rtemp);
__ ldr_s32(R0_tos, local);
}
// Get the local value into tos
locals_index(Rlocal_index);
Address local = load_faddress(Rlocal_index, Rtemp); #ifdef __SOFTFP__
__ ldr(R0_tos, local); #else
__ ldr_float(S0_tos, local); #endif// __SOFTFP__
}
void TemplateTable::aload_0_internal(RewriteControl rc) {
transition(vtos, atos); // According to bytecode histograms, the pairs: // // _aload_0, _fast_igetfield // _aload_0, _fast_agetfield // _aload_0, _fast_fgetfield // // occur frequently. If RewriteFrequentPairs is set, the (slow) _aload_0 // bytecode checks if the next bytecode is either _fast_igetfield, // _fast_agetfield or _fast_fgetfield and then rewrites the // current bytecode into a pair bytecode; otherwise it rewrites the current // bytecode into _fast_aload_0 that doesn't do the pair check anymore. // // Note: If the next bytecode is _getfield, the rewrite must be delayed, // otherwise we may miss an opportunity for a pair. // // Also rewrite frequent pairs // aload_0, aload_1 // aload_0, iload_1 // These bytecodes with a small amount of code are most profitable to rewrite if ((rc == may_rewrite) && __ rewrite_frequent_pairs()) {
Label rewrite, done; constRegister next_bytecode = R1_tmp; constRegister target_bytecode = R2_tmp;
// get next byte
__ ldrb(next_bytecode, at_bcp(Bytecodes::length_for(Bytecodes::_aload_0)));
// if _getfield then wait with rewrite
__ cmp(next_bytecode, Bytecodes::_getfield);
__ b(done, eq);
// if _igetfield then rewrite to _fast_iaccess_0
assert(Bytecodes::java_code(Bytecodes::_fast_iaccess_0) == Bytecodes::_aload_0, "fix bytecode definition");
__ cmp(next_bytecode, Bytecodes::_fast_igetfield);
__ mov(target_bytecode, Bytecodes::_fast_iaccess_0);
__ b(rewrite, eq);
// if _agetfield then rewrite to _fast_aaccess_0
assert(Bytecodes::java_code(Bytecodes::_fast_aaccess_0) == Bytecodes::_aload_0, "fix bytecode definition");
__ cmp(next_bytecode, Bytecodes::_fast_agetfield);
__ mov(target_bytecode, Bytecodes::_fast_aaccess_0);
__ b(rewrite, eq);
// if _fgetfield then rewrite to _fast_faccess_0, else rewrite to _fast_aload0
assert(Bytecodes::java_code(Bytecodes::_fast_faccess_0) == Bytecodes::_aload_0, "fix bytecode definition");
assert(Bytecodes::java_code(Bytecodes::_fast_aload_0) == Bytecodes::_aload_0, "fix bytecode definition");
void TemplateTable::sastore() {
assert(arrayOopDesc::base_offset_in_bytes(T_CHAR) ==
arrayOopDesc::base_offset_in_bytes(T_SHORT), "base offsets for char and short should be equal");
castore();
}
void TemplateTable::dup() {
transition(vtos, vtos); // stack: ..., a
__ load_ptr(0, R0_tmp);
__ push_ptr(R0_tmp); // stack: ..., a, a
}
void TemplateTable::dup_x1() {
transition(vtos, vtos); // stack: ..., a, b
__ load_ptr(0, R0_tmp); // load b
__ load_ptr(1, R2_tmp); // load a
__ store_ptr(1, R0_tmp); // store b
__ store_ptr(0, R2_tmp); // store a
__ push_ptr(R0_tmp); // push b // stack: ..., b, a, b
}
void TemplateTable::dup_x2() {
transition(vtos, vtos); // stack: ..., a, b, c
__ load_ptr(0, R0_tmp); // load c
__ load_ptr(1, R2_tmp); // load b
__ load_ptr(2, R4_tmp); // load a
__ push_ptr(R0_tmp); // push c
// stack: ..., a, b, c, c
__ store_ptr(1, R2_tmp); // store b
__ store_ptr(2, R4_tmp); // store a
__ store_ptr(3, R0_tmp); // store c // stack: ..., c, a, b, c
}
void TemplateTable::dup2() {
transition(vtos, vtos); // stack: ..., a, b
__ load_ptr(1, R0_tmp); // load a
__ push_ptr(R0_tmp); // push a
__ load_ptr(1, R0_tmp); // load b
__ push_ptr(R0_tmp); // push b // stack: ..., a, b, a, b
}
// stack: ..., a, b, c
__ load_ptr(0, R4_tmp); // load c
__ load_ptr(1, R2_tmp); // load b
__ load_ptr(2, R0_tmp); // load a
__ push_ptr(R2_tmp); // push b
__ push_ptr(R4_tmp); // push c
// stack: ..., a, b, c, b, c
__ store_ptr(2, R0_tmp); // store a
__ store_ptr(3, R4_tmp); // store c
__ store_ptr(4, R2_tmp); // store b
// stack: ..., b, c, a, b, c
}
void TemplateTable::dup2_x2() {
transition(vtos, vtos); // stack: ..., a, b, c, d
__ load_ptr(0, R0_tmp); // load d
__ load_ptr(1, R2_tmp); // load c
__ push_ptr(R2_tmp); // push c
__ push_ptr(R0_tmp); // push d // stack: ..., a, b, c, d, c, d
__ load_ptr(4, R4_tmp); // load b
__ store_ptr(4, R0_tmp); // store d in b
__ store_ptr(2, R4_tmp); // store b in d // stack: ..., a, d, c, b, c, d
__ load_ptr(5, R4_tmp); // load a
__ store_ptr(5, R2_tmp); // store c in a
__ store_ptr(3, R4_tmp); // store a in c // stack: ..., c, d, a, b, c, d
}
void TemplateTable::swap() {
transition(vtos, vtos); // stack: ..., a, b
__ load_ptr(1, R0_tmp); // load a
__ load_ptr(0, R2_tmp); // load b
__ store_ptr(0, R0_tmp); // store a in b
__ store_ptr(1, R2_tmp); // store b in a // stack: ..., b, a
}
void TemplateTable::dneg() {
transition(dtos, dtos); #ifdef __SOFTFP__ // Invert sign bit in the high part of the double constint sign_mask_hi = 0x80000000;
__ eor(R1_tos_hi, R1_tos_hi, sign_mask_hi); #else
__ neg_double(D0_tos, D0_tos); #endif// __SOFTFP__
}
void TemplateTable::convert() { // Checking #ifdef ASSERT
{ TosState tos_in = ilgl;
TosState tos_out = ilgl; switch (bytecode()) { case Bytecodes::_i2l: // fall through case Bytecodes::_i2f: // fall through case Bytecodes::_i2d: // fall through case Bytecodes::_i2b: // fall through case Bytecodes::_i2c: // fall through case Bytecodes::_i2s: tos_in = itos; break; case Bytecodes::_l2i: // fall through case Bytecodes::_l2f: // fall through case Bytecodes::_l2d: tos_in = ltos; break; case Bytecodes::_f2i: // fall through case Bytecodes::_f2l: // fall through case Bytecodes::_f2d: tos_in = ftos; break; case Bytecodes::_d2i: // fall through case Bytecodes::_d2l: // fall through case Bytecodes::_d2f: tos_in = dtos; break; default : ShouldNotReachHere();
} switch (bytecode()) { case Bytecodes::_l2i: // fall through case Bytecodes::_f2i: // fall through case Bytecodes::_d2i: // fall through case Bytecodes::_i2b: // fall through case Bytecodes::_i2c: // fall through case Bytecodes::_i2s: tos_out = itos; break; case Bytecodes::_i2l: // fall through case Bytecodes::_f2l: // fall through case Bytecodes::_d2l: tos_out = ltos; break; case Bytecodes::_i2f: // fall through case Bytecodes::_l2f: // fall through case Bytecodes::_d2f: tos_out = ftos; break; case Bytecodes::_i2d: // fall through case Bytecodes::_l2d: // fall through case Bytecodes::_f2d: tos_out = dtos; break; default : ShouldNotReachHere();
}
transition(tos_in, tos_out);
} #endif// ASSERT
// comparison result | flag N | flag Z | flag C | flag V // "<" | 1 | 0 | 0 | 0 // "==" | 0 | 1 | 1 | 0 // ">" | 0 | 0 | 1 | 0 // unordered | 0 | 0 | 1 | 1
if (unordered_result < 0) {
__ mov(R0_tos, 1); // result == 1 if greater
__ mvn(R0_tos, 0, lt); // result == -1 if less or unordered (N!=V)
} else {
__ mov(R0_tos, 1); // result == 1 if greater or unordered
__ mvn(R0_tos, 0, mi); // result == -1 if less (N=1)
}
__ mov(R0_tos, 0, eq); // result == 0 if equ (Z=1) #endif// __SOFTFP__
}
// Handle all the JSR stuff here, then exit. // It's much shorter and cleaner than intermingling with the // non-JSR normal-branch stuff occurring below. if (is_jsr) { // compute return address as bci in R1 constRegister Rret_addr = R1_tmp;
assert_different_registers(Rdisp, Rret_addr, Rtemp);
// R0: osr nmethod (osr ok) or NULL (osr not possible) constRegister Rnmethod = R0;
__ ldrb(R3_bytecode, Address(Rbcp)); // reload next bytecode
__ cbz(Rnmethod, dispatch); // test result, no osr if null
// nmethod may have been invalidated (VM may block upon call_VM return)
__ ldrb(R1_tmp, Address(Rnmethod, nmethod::state_offset()));
__ cmp(R1_tmp, nmethod::in_use);
__ b(dispatch, ne);
// We have the address of an on stack replacement routine in Rnmethod, // We need to prepare to execute the OSR method. First we must // migrate the locals and monitors off of the stack.
void TemplateTable::if_0cmp(Condition cc) {
transition(itos, vtos); // assume branch is more often taken than not (loops use backward branches)
Label not_taken;
__ cmp_32(R0_tos, 0);
__ b(not_taken, convNegCond(cc));
branch(false, false);
__ bind(not_taken);
__ profile_not_taken_branch(R0_tmp);
}
void TemplateTable::if_icmp(Condition cc) {
transition(itos, vtos); // assume branch is more often taken than not (loops use backward branches)
Label not_taken;
__ pop_i(R1_tmp);
__ cmp_32(R1_tmp, R0_tos);
__ b(not_taken, convNegCond(cc));
branch(false, false);
__ bind(not_taken);
__ profile_not_taken_branch(R0_tmp);
}
// assume branch is more often taken than not (loops use backward branches)
Label not_taken; if (cc == equal) {
__ cbnz(R0_tos, not_taken);
} else {
__ cbz(R0_tos, not_taken);
}
branch(false, false);
__ bind(not_taken);
__ profile_not_taken_branch(R0_tmp);
}
void TemplateTable::if_acmp(Condition cc) {
transition(atos, vtos); // assume branch is more often taken than not (loops use backward branches)
Label not_taken;
__ pop_ptr(R1_tmp);
__ cmpoop(R1_tmp, R0_tos);
__ b(not_taken, convNegCond(cc));
branch(false, false);
__ bind(not_taken);
__ profile_not_taken_branch(R0_tmp);
}
// load lo & hi
__ ldmia(Rabcp, RegisterSet(Rlow) | RegisterSet(Rhigh), writeback);
__ byteswap_u32(Rlow, Rtemp, Rtemp2);
__ byteswap_u32(Rhigh, Rtemp, Rtemp2);
// compare index with high bound
__ cmp_32(Rhigh, Rindex);
// if Rindex <= Rhigh then calculate index in table (Rindex - Rlow)
__ subs(Rindex, Rindex, Rlow, ge);
// if Rindex <= Rhigh and (Rindex - Rlow) >= 0 // ("ge" status accumulated from cmp and subs instructions) then load // offset from table, otherwise load offset for default case
// load the next bytecode to R3_bytecode and advance Rbcp
__ ldrb(R3_bytecode, Address(Rbcp, Roffset, lsl, 0, pre_indexed));
__ dispatch_only(vtos, true);
}
void TemplateTable::lookupswitch() {
transition(itos, itos);
__ stop("lookupswitch bytecode should have been rewritten");
}
// entry found -> get offset
__ bind(found); // Rabcp is already incremented and points to the next entry
__ ldr_s32(Roffset, Address(Rabcp, -BytesPerInt)); if (ProfileInterpreter) { // Calculate index of the selected case.
assert_different_registers(Roffset, Rcount, Rtemp, R0_tmp, R1_tmp, R2_tmp);
// load the next bytecode to R3_bytecode and advance Rbcp
__ ldrb(R3_bytecode, Address(Rbcp, Roffset, lsl, 0, pre_indexed));
__ dispatch_only(vtos, true);
}
void TemplateTable::fast_binaryswitch() {
transition(itos, vtos); // Implementation using the following core algorithm: // // int binary_search(int key, LookupswitchPair* array, int n) { // // Binary search according to "Methodik des Programmierens" by // // Edsger W. Dijkstra and W.H.J. Feijen, Addison Wesley Germany 1985. // int i = 0; // int j = n; // while (i+1 < j) { // // invariant P: 0 <= i < j <= n and (a[i] <= key < a[j] or Q) // // with Q: for all i: 0 <= i < n: key < a[i] // // where a stands for the array and assuming that the (inexisting) // // element a[n] is infinitely big. // int h = (i + j) >> 1; // // i < h < j // if (key < array[h].fast_match()) { // j = h; // } else { // i = h; // } // } // // R: a[i] <= key < a[i+1] or Q // // (i.e., if key is within array, i is the correct index) // return i; // }
// initialize i & j
__ mov(i, 0); // i = 0;
__ ldr_s32(j, Address(array, -BytesPerInt)); // j = length(array); // Convert j into native byteordering
__ byteswap_u32(j, temp1, temp2);
// and start
Label entry;
__ b(entry);
// binary search loop
{ Label loop;
__ bind(loop); // int h = (i + j) >> 1;
__ add(h, i, j); // h = i + j;
__ logical_shift_right(h, h, 1); // h = (i + j) >> 1; // if (key < array[h].fast_match()) { // j = h; // } else { // i = h; // }
__ ldr_s32(val, Address(array, h, lsl, 1+LogBytesPerInt)); // Convert array[h].match to native byte-ordering before compare
__ byteswap_u32(val, temp1, temp2);
__ cmp_32(key, val);
__ mov(j, h, lt); // j = h if (key < array[h].fast_match())
__ mov(i, h, ge); // i = h if (key >= array[h].fast_match()) // while (i+1 < j)
__ bind(entry);
__ add(temp1, i, 1); // i+1
__ cmp(temp1, j); // i+1 < j
__ b(loop, lt);
}
// end of binary search, result index is i (must check again!)
Label default_case; // Convert array[i].match to native byte-ordering before compare
__ ldr_s32(val, Address(array, i, lsl, 1+LogBytesPerInt));
__ byteswap_u32(val, temp1, temp2);
__ cmp_32(key, val);
__ b(default_case, ne);
// Narrow result if state is itos but result type is smaller. // Need to narrow in the return bytecode rather than in generate_return_entry // since compiled code callers expect the result to already be narrowed. if (state == itos) {
__ narrow(R0_tos);
}
__ remove_activation(state, LR);
// According to interpreter calling conventions, result is returned in R0/R1, // so ftos (S0) and dtos (D0) are moved to R0/R1. // This conversion should be done after remove_activation, as it uses // push(state) & pop(state) to preserve return value.
__ convert_tos_to_retval(state);
__ ret();
__ nop(); // to avoid filling CPU pipeline with invalid instructions
__ nop();
}
// ---------------------------------------------------------------------------- // Volatile variables demand their effects be made known to all CPU's in // order. Store buffers on most chips allow reads & writes to reorder; the // JMM's ReadAfterWrite.java test fails in -Xint mode without some kind of // memory barrier (i.e., it's not sufficient that the interpreter does not // reorder volatile references, the hardware also must not reorder them). // // According to the new Java Memory Model (JMM): // (1) All volatiles are serialized wrt to each other. // ALSO reads & writes act as acquire & release, so: // (2) A read cannot let unrelated NON-volatile memory refs that happen after // the read float up to before the read. It's OK for non-volatile memory refs // that happen before the volatile read to float down below it. // (3) Similar a volatile write cannot let unrelated NON-volatile memory refs // that happen BEFORE the write float down to after the write. It's OK for // non-volatile memory refs that happen after the volatile write to float up // before it. // // We only put in barriers around volatile refs (they are expensive), not // _between_ memory refs (that would require us to track the flavor of the // previous memory refs). Requirements (2) and (3) require some barriers // before volatile stores and after volatile loads. These nearly cover // requirement (1) but miss the volatile-store-volatile-load case. This final // case is placed after volatile-stores although it could just as well go // before volatile-loads. void TemplateTable::volatile_barrier(MacroAssembler::Membar_mask_bits order_constraint, Register tmp, bool preserve_flags, Register load_tgt) {
__ membar(order_constraint, tmp, preserve_flags, load_tgt);
}
assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
__ get_cache_and_index_and_bytecode_at_bcp(Rcache, Rindex, Rtemp, byte_no, 1, index_size);
__ cmp(Rtemp, code); // have we resolved this bytecode?
__ b(resolved, eq);
// resolve first time through
address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_from_cache);
__ mov(R1, code);
__ call_VM(noreg, entry, R1); // Update registers with resolved info
__ get_cache_and_index_at_bcp(Rcache, Rindex, 1, index_size);
__ bind(resolved);
}
// The Rcache and Rindex registers must be set before call void TemplateTable::load_field_cp_cache_entry(Register Rcache, Register Rindex, Register Roffset, Register Rflags, Register Robj, bool is_static = false) {
// The registers cache and index expected to be set before call, and should not be Rtemp. // Blows volatile registers R0-R3, Rtemp, LR, // except cache and index registers which are preserved. void TemplateTable::jvmti_post_field_access(Register Rcache, Register Rindex, bool is_static, bool has_tos) {
assert_different_registers(Rcache, Rindex, Rtemp);
if (__ can_post_field_access()) { // Check to see if a field access watch has been set before we take // the time to call into the VM.
// compute type
__ logical_shift_right(Rflags, Rflags, ConstantPoolCacheEntry::tos_state_shift); // Make sure we don't need to mask flags after the above shift
ConstantPoolCacheEntry::verify_tos_state_shift();
// There are actually two versions of implementation of getfield/getstatic: // // 1) Table switch using add(PC,...) instruction (fast_version) // 2) Table switch using ldr(PC,...) instruction // // First version requires fixed size of code block for each case and // can not be used in RewriteBytecodes and VerifyOops // modes.
// Size of fixed size code block for fast_version constint log_max_block_size = 3; constint max_block_size = 1 << log_max_block_size;
// Decide if fast version is enabled bool fast_version = (is_static || !RewriteBytecodes) && !VerifyOops;
// On 32-bit ARM atos and itos cases can be merged only for fast version, because // atos requires additional processing in slow version. bool atos_merged_with_itos = fast_version;
assert(number_of_states == 10, "number of tos states should be equal to 9");
// ftos
{
assert(ftos == seq++, "ftos has unexpected value");
FixedSizeCodeBlock ftos_block(_masm, max_block_size, fast_version);
__ bind(Lftos); // floats and ints are placed on stack in same way, so // we can use push(itos) to transfer value without using VFP
__ access_load_at(T_INT, IN_HEAP, Address(Robj, Roffset), R0_tos, noreg, noreg, noreg);
__ push(itos); if (!is_static && rc == may_rewrite) {
patch_bytecode(Bytecodes::_fast_fgetfield, R0_tmp, Rtemp);
}
__ b(Done);
}
// dtos
{
assert(dtos == seq++, "dtos has unexpected value");
FixedSizeCodeBlock dtos_block(_masm, max_block_size, fast_version);
__ bind(Ldtos); // doubles and longs are placed on stack in the same way, so // we can use push(ltos) to transfer value without using VFP
__ access_load_at(T_LONG, IN_HEAP, Address(Robj, Roffset), noreg /* ltos */, noreg, noreg, noreg);
__ push(ltos); if (!is_static && rc == may_rewrite) {
patch_bytecode(Bytecodes::_fast_dgetfield, R0_tmp, Rtemp);
}
__ b(Done);
}
// atos
{
assert(atos == seq++, "atos has unexpected value");
// atos case for slow version on 32-bit ARM if(!atos_merged_with_itos) {
__ bind(Latos);
do_oop_load(_masm, R0_tos, Address(Robj, Roffset));
__ push(atos); // Rewrite bytecode to be faster if (!is_static && rc == may_rewrite) {
patch_bytecode(Bytecodes::_fast_agetfield, R0_tmp, Rtemp);
}
__ b(Done);
}
}
assert(vtos == seq++, "vtos has unexpected value");
// itos and atos cases are frequent so it makes sense to move them out of table switch // atos case can be merged with itos case (and thus moved out of table switch) on 32-bit ARM, fast version only
__ bind(Lint);
__ access_load_at(T_INT, IN_HEAP, Address(Robj, Roffset), R0_tos, noreg, noreg, noreg);
__ push(itos); // Rewrite bytecode to be faster if (!is_static && rc == may_rewrite) {
patch_bytecode(Bytecodes::_fast_igetfield, R0_tmp, Rtemp);
}
__ bind(Done);
// Check for volatile field
Label notVolatile;
__ tbz(Rflagsav, ConstantPoolCacheEntry::is_volatile_shift, notVolatile);
// The registers cache and index expected to be set before call, and should not be R1 or Rtemp. // Blows volatile registers R0-R3, Rtemp, LR, // except cache and index registers which are preserved. void TemplateTable::jvmti_post_field_mod(Register Rcache, Register Rindex, bool is_static) {
ByteSize cp_base_offset = ConstantPoolCache::base_offset();
assert_different_registers(Rcache, Rindex, R1, Rtemp);
if (__ can_post_field_modification()) { // Check to see if a field modification watch has been set before we take // the time to call into the VM.
Label Lcontinue;
if (is_static) { // Life is simple. Null out the object pointer.
__ mov(R1, 0);
} else { // Life is harder. The stack holds the value on top, followed by the object. // We don't know the size of the value, though; it could be one or two words // depending on its type. As a result, we must find the type to determine where // the object is.
__ logical_shift_right(Rtemp, Rtemp, ConstantPoolCacheEntry::tos_state_shift); // Make sure we don't need to mask Rtemp after the above shift
ConstantPoolCacheEntry::verify_tos_state_shift();
__ cmp(Rtemp, ltos);
__ cond_cmp(Rtemp, dtos, ne); // two word value (ltos/dtos)
__ ldr(R1, Address(SP, Interpreter::expr_offset_in_bytes(2)), eq);
// one word value (not ltos, dtos)
__ ldr(R1, Address(SP, Interpreter::expr_offset_in_bytes(1)), ne);
}
// compute type
__ logical_shift_right(Rflags, Rflags, ConstantPoolCacheEntry::tos_state_shift); // Make sure we don't need to mask flags after the above shift
ConstantPoolCacheEntry::verify_tos_state_shift();
// There are actually two versions of implementation of putfield/putstatic: // // 32-bit ARM: // 1) Table switch using add(PC,...) instruction (fast_version) // 2) Table switch using ldr(PC,...) instruction // // First version requires fixed size of code block for each case and // can not be used in RewriteBytecodes and VerifyOops // modes.
// Size of fixed size code block for fast_version (in instructions) constint log_max_block_size = 3; constint max_block_size = 1 << log_max_block_size;
// Decide if fast version is enabled bool fast_version = (is_static || !RewriteBytecodes) && !VerifyOops;
assert(number_of_states == 10, "number of tos states should be equal to 9");
// itos case is frequent and is moved outside table switch
__ cmp(Rflags, itos);
// table switch by type if (fast_version) {
__ add(PC, PC, AsmOperand(Rflags, lsl, log_max_block_size + Assembler::LogInstructionSize), ne);
} else {
__ ldr(PC, Address(PC, Rflags, lsl, LogBytesPerWord), ne);
}
// jump to itos case
__ b(Lint);
// table with addresses for slow version if (fast_version) { // nothing to do
} else {
__ bind(Ltable);
__ emit_address(Lbtos);
__ emit_address(Lztos);
__ emit_address(Lctos);
__ emit_address(Lstos);
__ emit_address(Litos);
__ emit_address(Lltos);
__ emit_address(Lftos);
__ emit_address(Ldtos);
__ emit_address(Latos);
}
// ftos
{
assert(ftos == seq++, "ftos has unexpected value");
FixedSizeCodeBlock ftos_block(_masm, max_block_size, fast_version);
__ bind(Lftos); // floats and ints are placed on stack in the same way, so // we can use pop(itos) to transfer value without using VFP
__ pop(itos); if (!is_static) pop_and_check_object(Robj);
__ access_store_at(T_INT, IN_HEAP, Address(Robj, Roffset), R0_tos, noreg, noreg, noreg, false); if (!is_static && rc == may_rewrite) {
patch_bytecode(Bytecodes::_fast_fputfield, R0_tmp, Rtemp, true, byte_no);
}
__ b(Done);
}
// dtos
{
assert(dtos == seq++, "dtos has unexpected value");
FixedSizeCodeBlock dtos_block(_masm, max_block_size, fast_version);
__ bind(Ldtos); // doubles and longs are placed on stack in the same way, so // we can use pop(ltos) to transfer value without using VFP
__ pop(ltos); if (!is_static) pop_and_check_object(Robj);
__ access_store_at(T_LONG, IN_HEAP, Address(Robj, Roffset), noreg /* ltos */, noreg, noreg, noreg, false); if (!is_static && rc == may_rewrite) {
patch_bytecode(Bytecodes::_fast_dputfield, R0_tmp, Rtemp, true, byte_no);
}
__ b(Done);
}
// atos
{
assert(atos == seq++, "dtos has unexpected value");
__ bind(Latos);
__ pop(atos); if (!is_static) pop_and_check_object(Robj); // Store into the field
do_oop_store(_masm, Address(Robj, Roffset), R0_tos, Rtemp, R1_tmp, R5_tmp, false); if (!is_static && rc == may_rewrite) {
patch_bytecode(Bytecodes::_fast_aputfield, R0_tmp, Rtemp, true, byte_no);
}
__ b(Done);
}
// itos case is frequent and is moved outside table switch
__ bind(Lint);
__ pop(itos); if (!is_static) pop_and_check_object(Robj);
__ access_store_at(T_INT, IN_HEAP, Address(Robj, Roffset), R0_tos, noreg, noreg, noreg, false); if (!is_static && rc == may_rewrite) {
patch_bytecode(Bytecodes::_fast_iputfield, R0_tmp, Rtemp, true, byte_no);
}
__ bind(Done);
Label notVolatile2; if (is_static) { // Just check for volatile. Memory barrier for static final field // is handled by class initialization.
__ tbz(Rflagsav, ConstantPoolCacheEntry::is_volatile_shift, notVolatile2);
volatile_barrier(MacroAssembler::StoreLoad, Rtemp);
__ bind(notVolatile2);
} else { // Check for volatile field and final field
Label skipMembar;
void TemplateTable::jvmti_post_fast_field_mod() { // This version of jvmti_post_fast_field_mod() is not used on ARM
Unimplemented();
}
// Blows volatile registers R0-R3, Rtemp, LR, // but preserves tosca with the given state. void TemplateTable::jvmti_post_fast_field_mod(TosState state) { if (__ can_post_field_modification()) { // Check to see if a field modification watch has been set before we take // the time to call into the VM.
Label done;
// do the JVMTI work here to avoid disturbing the register state below if (__ can_post_field_access()) { // Check to see if a field access watch has been set before we take // the time to call into the VM.
Label done;
__ ldr_global_s32(R2, (address) JvmtiExport::get_field_access_count_addr());
__ cbz(R2, done); // access constant pool cache entry
__ get_cache_entry_pointer_at_bcp(R2, R1, 1);
__ push_ptr(R0_tos); // save object pointer before call_VM() clobbers it
__ verify_oop(R0_tos);
__ mov(R1, R0_tos); // R1: object pointer copied above // R2: cache entry pointer
__ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_access), R1, R2);
__ pop_ptr(R0_tos); // restore object pointer
// load flags to test volatile
__ ldr_u32(Rflags, Address(Rtemp, ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset()));
// make sure exception is reported in correct bcp range (getfield is next instruction)
__ add(Rbcp, Rbcp, 1);
__ null_check(Robj, Rtemp);
__ sub(Rbcp, Rbcp, 1);
void TemplateTable::invokestatic(int byte_no) {
transition(vtos, vtos);
assert(byte_no == f1_byte, "use this argument");
prepare_invoke(byte_no, Rmethod); // do the call
__ profile_call(R2_tmp);
__ jump_from_interpreted(Rmethod);
}
void TemplateTable::fast_invokevfinal(int byte_no) {
transition(vtos, vtos);
assert(byte_no == f2_byte, "use this argument");
__ stop("fast_invokevfinal is not used on ARM");
}
// First check for Object case, then private interface method, // then regular interface method.
// Special case of invokeinterface called for virtual method of // java.lang.Object. See cpCache.cpp for details.
Label notObjectMethod;
__ tbz(Rflags, ConstantPoolCacheEntry::is_forced_virtual_shift, notObjectMethod);
invokevirtual_helper(Rmethod, Rrecv, Rflags);
__ bind(notObjectMethod);
// Get receiver klass into Rklass - also a null check
__ load_klass(Rklass, Rrecv);
// Check for private method invocation - indicated by vfinal
Label no_such_interface;
Label subtype;
__ check_klass_subtype(Rklass, Rinterf, R1_tmp, R3_tmp, noreg, subtype); // If we get here the typecheck failed
__ b(no_such_interface);
__ bind(subtype);
// do the call
__ profile_final_call(R0_tmp);
__ jump_from_interpreted(Rmethod);
// profile this call
__ profile_virtual_call(R0_tmp, Rklass);
// Get declaring interface class from method
__ ldr(Rtemp, Address(Rmethod, Method::const_offset()));
__ ldr(Rtemp, Address(Rtemp, ConstMethod::constants_offset()));
__ ldr(Rinterf, Address(Rtemp, ConstantPool::pool_holder_offset_in_bytes()));
// Get itable index from method
__ ldr_s32(Rtemp, Address(Rmethod, Method::itable_index_offset()));
__ add(Rtemp, Rtemp, (-Method::itable_index_max)); // small negative constant is too large for an immediate on arm32
__ neg(Rindex, Rtemp);
// Check for abstract method error // Note: This should be done more efficiently via a throw_abstract_method_error // interpreter entry point and a conditional jump to it in case of a null // method.
{ Label L;
__ cbnz(Rmethod, L); // throw exception // note: must restore interpreter registers to canonical // state for exception handling to work correctly!
__ restore_method();
__ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodError)); // the call_VM checks for exception, so we should never return here.
__ should_not_reach_here();
__ bind(L);
}
// do the call
__ jump_from_interpreted(Rmethod);
// throw exception
__ bind(no_such_interface);
__ restore_method();
__ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_IncompatibleClassChangeError)); // the call_VM checks for exception, so we should never return here.
__ should_not_reach_here();
}
// Make sure the class we're about to instantiate has been resolved. // This is done before loading InstanceKlass to be consistent with the order // how Constant Pool is updated (see ConstantPool::klass_at_put) constint tags_offset = Array<u1>::base_offset_in_bytes();
__ add(Rtemp, Rtags, Rindex);
__ ldrb(Rtemp, Address(Rtemp, tags_offset));
// use Rklass as a scratch
volatile_barrier(MacroAssembler::LoadLoad, Rklass);
// make sure klass is initialized & doesn't have finalizer // make sure klass is fully initialized
__ ldrb(Rtemp, Address(Rklass, InstanceKlass::init_state_offset()));
__ cmp(Rtemp, InstanceKlass::fully_initialized);
__ b(slow_case, ne);
// get instance_size in InstanceKlass (scaled to a count of bytes)
__ ldr_u32(Rsize, Address(Rklass, Klass::layout_helper_offset()));
// test to see if it has a finalizer or is malformed in some way // Klass::_lh_instance_slow_path_bit is really a bit mask, not bit number
__ tbnz(Rsize, exact_log2(Klass::_lh_instance_slow_path_bit), slow_case);
// Allocate the instance: // If TLAB is enabled: // Try to allocate in the TLAB. // If fails, go to the slow path. // Initialize the allocation. // Exit. // // Go to slow path. if (UseTLAB) { constRegister Rtlab_top = R1_tmp; constRegister Rtlab_end = R2_tmp;
assert_different_registers(Robj, Rsize, Rklass, Rtlab_top, Rtlab_end);
__ tlab_allocate(Robj, Rtlab_top, Rtlab_end, Rsize, slow_case); if (ZeroTLAB) { // the fields have been already cleared
__ b(initialize_header);
}
// The object is initialized before the header. If the object size is // zero, go directly to the header initialization.
__ subs(Rsize, Rsize, sizeof(oopDesc));
__ add(Rzero_cur, Robj, sizeof(oopDesc));
__ b(initialize_header, eq);
#ifdef ASSERT // make sure Rsize is a multiple of 8
Label L;
__ tst(Rsize, 0x07);
__ b(L, eq);
__ stop("object size is not multiple of 8 - adjust this code");
__ bind(L); #endif
// Note: Disable DTrace runtime check for now to eliminate overhead on each allocation if (DTraceAllocProbes) { // Trigger dtrace event for fastpath
Label Lcontinue;
__ push(atos);
call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc)); // vm_result_2 has metadata result
__ get_vm_result_2(Rsuper, Robj);
__ pop_ptr(Robj);
__ b(resolved);
__ bind(throw_exception); // Come here on failure of subtype check
__ profile_typecheck_failed(R1_tmp);
__ mov(R2_ClassCastException_obj, Robj); // convention with generate_ClassCastException_handler()
__ b(Interpreter::_throw_ClassCastException_entry);
// Get superklass in Rsuper and subklass in Rsub
__ bind(quicked);
__ load_resolved_klass_at_offset(Rcpool, Rindex, Rsuper);
// Collect counts on whether this check-cast sees NULLs a lot or not. if (ProfileInterpreter) {
__ b(done);
__ bind(is_null);
__ profile_null_seen(R1_tmp);
} else {
__ bind(is_null); // same as 'done'
}
__ bind(done);
}
void TemplateTable::instanceof() { // result = 0: obj == NULL or obj is not an instanceof the specified klass // result = 1: obj != NULL and obj is an instanceof the specified klass
// Get cpool & tags index
__ get_cpool_and_tags(Rcpool, Rtags);
__ get_unsigned_2_byte_index_at_bcp(Rindex, 1);
// See if bytecode has already been quicked
__ add(Rtemp, Rtags, Rindex);
__ ldrb(Rtemp, Address(Rtemp, Array<u1>::base_offset_in_bytes()));
__ cmp(Rtemp, JVM_CONSTANT_Class);
__ push(atos);
call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc)); // vm_result_2 has metadata result
__ get_vm_result_2(Rsuper, Robj);
__ pop_ptr(Robj);
__ b(resolved);
// Get superklass in Rsuper and subklass in Rsub
__ bind(quicked);
__ load_resolved_klass_at_offset(Rcpool, Rindex, Rsuper);
__ bind(resolved);
__ load_klass(Rsub, Robj);
// Generate subtype check. Blows both tmps and Rtemp.
__ gen_subtype_check(Rsub, Rsuper, not_subtype, Rsubtype_check_tmp1, Rsubtype_check_tmp2);
// Come here on success
__ mov(R0_tos, 1);
__ b(done);
__ bind(not_subtype); // Come here on failure
__ profile_typecheck_failed(R1_tmp);
__ mov(R0_tos, 0);
// Collect counts on whether this test sees NULLs a lot or not. if (ProfileInterpreter) {
__ b(done);
__ bind(is_null);
__ profile_null_seen(R1_tmp);
} else {
__ bind(is_null); // same as 'done'
}
__ bind(done);
}
// initialize entry pointer
__ mov(Rentry, 0); // points to free slot or NULL
// find a free slot in the monitor block (result in Rentry)
{ Label loop, exit; constRegister Rcur = R2_tmp; constRegister Rcur_obj = Rtemp; constRegister Rbottom = R3_tmp;
assert_different_registers(Robj, Rentry, Rcur, Rbottom, Rcur_obj);
__ ldr(Rcur, Address(FP, frame::interpreter_frame_monitor_block_top_offset * wordSize)); // points to current entry, starting with top-most entry
__ sub(Rbottom, FP, -frame::interpreter_frame_monitor_block_bottom_offset * wordSize); // points to word before bottom of monitor block
__ cmp(Rcur, Rbottom); // check if there are no monitors
__ ldr(Rcur_obj, Address(Rcur, BasicObjectLock::obj_offset_in_bytes()), ne); // prefetch monitor's object for the first iteration
__ b(allocate_monitor, eq); // there are no monitors, skip searching
__ bind(loop);
__ cmp(Rcur_obj, 0); // check if current entry is used
__ mov(Rentry, Rcur, eq); // if not used then remember entry
__ cmp(Rcur_obj, Robj); // check if current entry is for same object
__ b(exit, eq); // if same object then stop searching
__ add(Rcur, Rcur, entry_size); // otherwise advance to next entry
__ cmp(Rcur, Rbottom); // check if bottom reached
__ ldr(Rcur_obj, Address(Rcur, BasicObjectLock::obj_offset_in_bytes()), ne); // prefetch monitor's object for the next iteration
__ b(loop, ne); // if not at bottom then check this entry
__ bind(exit);
}
__ cbnz(Rentry, allocated); // check if a slot has been found; if found, continue with that one
__ bind(allocate_monitor);
// allocate one if there's no free slot
{ Label loop;
assert_different_registers(Robj, Rentry, R2_tmp, Rtemp);
// 1. compute new pointers
__ ldr(Rentry, Address(FP, frame::interpreter_frame_monitor_block_top_offset * wordSize)); // old monitor block top / expression stack bottom
__ mov(R2_tmp, Rstack_top); // set start value for copy loop
__ str(Rentry, Address(FP, frame::interpreter_frame_monitor_block_top_offset * wordSize)); // set new monitor block top
// 2. move expression stack contents
__ cmp(R2_tmp, Rentry); // check if expression stack is empty
__ ldr(Rtemp, Address(R2_tmp, entry_size), ne); // load expression stack word from old location
__ b(allocated, eq);
__ bind(loop);
__ str(Rtemp, Address(R2_tmp, wordSize, post_indexed)); // store expression stack word at new location // and advance to next word
__ cmp(R2_tmp, Rentry); // check if bottom reached
__ ldr(Rtemp, Address(R2, entry_size), ne); // load expression stack word from old location
__ b(loop, ne); // if not at bottom then copy next word
}
// call run-time routine
// Rentry: points to monitor entry
__ bind(allocated);
// Increment bcp to point to the next bytecode, so exception handling for async. exceptions work correctly. // The object has already been popped from the stack, so the expression stack looks correct.
__ add(Rbcp, Rbcp, 1);
__ str(Robj, Address(Rentry, BasicObjectLock::obj_offset_in_bytes())); // store object
__ lock_object(Rentry);
// check to make sure this monitor doesn't cause stack overflow after locking
__ save_bcp(); // in case of exception
__ arm_stack_overflow_check(0, Rtemp);
// The bcp has already been incremented. Just need to dispatch to next instruction.
__ dispatch_next(vtos);
}
__ ldr(Rcur, Address(FP, frame::interpreter_frame_monitor_block_top_offset * wordSize)); // points to current entry, starting with top-most entry
__ sub(Rbottom, FP, -frame::interpreter_frame_monitor_block_bottom_offset * wordSize); // points to word before bottom of monitor block
__ cmp(Rcur, Rbottom); // check if bottom reached
__ ldr(Rcur_obj, Address(Rcur, BasicObjectLock::obj_offset_in_bytes()), ne); // prefetch monitor's object for the first iteration
__ b(throw_exception, eq); // throw exception if there are now monitors
__ bind(loop); // check if current entry is for same object
__ cmp(Rcur_obj, Robj);
__ b(found, eq); // if same object then stop searching
__ add(Rcur, Rcur, entry_size); // otherwise advance to next entry
__ cmp(Rcur, Rbottom); // check if bottom reached
__ ldr(Rcur_obj, Address(Rcur, BasicObjectLock::obj_offset_in_bytes()), ne);
__ b (loop, ne); // if not at bottom then check this entry
}
// error handling. Unlocking was not block-structured
__ bind(throw_exception);
__ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_illegal_monitor_state_exception));
__ should_not_reach_here();
// call run-time routine // Rcur: points to monitor entry
__ bind(found);
__ push_ptr(Robj); // make sure object is on stack (contract with oopMaps)
__ mov(Rmonitor, Rcur);
__ unlock_object(Rmonitor);
__ pop_ptr(Robj); // discard object
}
__ nop(); // to avoid filling CPU pipeline with invalid instructions
__ nop();
__ bind_literal(Ltable);
}
//---------------------------------------------------------------------------------------------------- // Multi arrays
void TemplateTable::multianewarray() {
transition(vtos, atos);
__ ldrb(Rtmp_save0, at_bcp(3)); // get number of dimensions
// last dim is on top of stack; we want address of first one: // first_addr = last_addr + ndims * stackElementSize - 1*wordsize // the latter wordSize to point to the beginning of the array.
__ add(Rtemp, Rstack_top, AsmOperand(Rtmp_save0, lsl, Interpreter::logStackElementSize));
__ sub(R1, Rtemp, wordSize);
call_VM(R0, CAST_FROM_FN_PTR(address, InterpreterRuntime::multianewarray), R1);
__ add(Rstack_top, Rstack_top, AsmOperand(Rtmp_save0, lsl, Interpreter::logStackElementSize)); // MacroAssembler::StoreStore useless (included in the runtime exit path)
}
Messung V0.5 in Prozent
¤ Dauer der Verarbeitung: 0.66 Sekunden
(vorverarbeitet am 2026-04-26)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.