/* * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2013, 2022 SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions. *
*/
// Do an oop store like *(base + index) = val OR *(base + offset) = val // (only one of both variants is possible at the same time). // Index can be noreg. // Kills: // Rbase, Rtmp staticvoid do_oop_store(InterpreterMacroAssembler* _masm, Register base,
RegisterOrConstant offset, Register val, // Noreg means always null. Register tmp1, Register tmp2, Register tmp3,
DecoratorSet decorators) {
assert_different_registers(tmp1, tmp2, tmp3, val, base);
__ store_heap_oop(val, offset, base, tmp1, tmp2, tmp3, MacroAssembler::PRESERVATION_NONE, decorators);
}
Address TemplateTable::at_bcp(int offset) { // Not used on ppc.
ShouldNotReachHere(); return Address();
}
// Patches the current bytecode (ptr to it located in bcp) // in the bytecode stream with a new one. void TemplateTable::patch_bytecode(Bytecodes::Code new_bc, Register Rnew_bc, RegisterRtemp, bool load_bc_into_bc_reg /*=true*/, int byte_no) { // With sharing on, may need to test method flag. if (!RewriteBytecodes) return;
Label L_patch_done;
switch (new_bc) { case Bytecodes::_fast_aputfield: case Bytecodes::_fast_bputfield: case Bytecodes::_fast_zputfield: case Bytecodes::_fast_cputfield: case Bytecodes::_fast_dputfield: case Bytecodes::_fast_fputfield: case Bytecodes::_fast_iputfield: case Bytecodes::_fast_lputfield: case Bytecodes::_fast_sputfield:
{ // We skip bytecode quickening for putfield instructions when // the put_code written to the constant pool cache is zero. // This is required so that every execution of this instruction // calls out to InterpreterRuntime::resolve_get_put to do // additional, required work.
assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
assert(load_bc_into_bc_reg, "we use bc_reg as temp");
__ get_cache_and_index_at_bcp(Rtemp /* dst = cache */, 1); // ((*(cache+indices))>>((1+byte_no)*8))&0xFF: #ifdefined(VM_LITTLE_ENDIAN)
__ lbz(Rnew_bc, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::indices_offset()) + 1 + byte_no, Rtemp); #else
__ lbz(Rnew_bc, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::indices_offset()) + 7 - (1 + byte_no), Rtemp); #endif
__ cmpwi(CCR0, Rnew_bc, 0);
__ li(Rnew_bc, (unsignedint)(unsignedchar)new_bc);
__ beq(CCR0, L_patch_done); // __ isync(); // acquire not needed break;
}
// Resolved class - need to call vm to get java mirror of the class.
__ cmpwi(CCR1, Rscratch2, JVM_CONSTANT_Class);
__ crnor(CCR0, Assembler::equal, CCR1, Assembler::equal); // Neither resolved class nor unresolved case from above?
__ beq(CCR0, notClass);
// VMr = obj = base address to find primitive value to push // VMr2 = flags = (tos, off) using format of CPCE::_flags
__ andi(off, flags, ConstantPoolCacheEntry::field_index_mask);
// What sort of thing are we loading?
__ rldicl(flags, flags, 64-ConstantPoolCacheEntry::tos_state_shift, 64-ConstantPoolCacheEntry::tos_state_bits);
// Get the locals index located in the bytecode stream at bcp + offset. void TemplateTable::locals_index(Register Rdst, int offset) {
__ lbz(Rdst, offset, R14_bcp);
}
// get next byte
__ lbz(Rnext_byte, Bytecodes::length_for(Bytecodes::_iload), R14_bcp);
// if _iload, wait to rewrite to iload2. We only want to rewrite the // last two iloads in a pair. Comparing against fast_iload means that // the next bytecode is neither an iload or a caload, and therefore // an iload pair.
__ cmpwi(CCR0, Rnext_byte, (unsignedint)(unsignedchar)Bytecodes::_iload);
__ beq(CCR0, Ldone);
// Load a local variable type long from locals area to TOS cache register. // Local index resides in bytecodestream. void TemplateTable::lload() {
transition(vtos, ltos);
void TemplateTable::aload_0_internal(RewriteControl rc) {
transition(vtos, atos); // According to bytecode histograms, the pairs: // // _aload_0, _fast_igetfield // _aload_0, _fast_agetfield // _aload_0, _fast_fgetfield // // occur frequently. If RewriteFrequentPairs is set, the (slow) // _aload_0 bytecode checks if the next bytecode is either // _fast_igetfield, _fast_agetfield or _fast_fgetfield and then // rewrites the current bytecode into a pair bytecode; otherwise it // rewrites the current bytecode into _0 that doesn't do // the pair check anymore. // // Note: If the next bytecode is _getfield, the rewrite must be // delayed, otherwise we may miss an opportunity for a pair. // // Also rewrite frequent pairs // aload_0, aload_1 // aload_0, iload_1 // These bytecodes with a small amount of code are most profitable // to rewrite.
// Get next byte.
__ lbz(Rnext_byte, Bytecodes::length_for(Bytecodes::_aload_0), R14_bcp);
// If _getfield, wait to rewrite. We only want to rewrite the last two bytecodes in a pair.
__ cmpwi(CCR0, Rnext_byte, (unsignedint)(unsignedchar)Bytecodes::_getfield);
__ beq(CCR0, Ldont_rewrite);
__ ld(R17_tos, Interpreter::expr_offset_in_bytes(0), R15_esp); // Get value to store.
__ lwz(Rindex, Interpreter::expr_offset_in_bytes(1), R15_esp); // Get index.
__ ld(Rarray, Interpreter::expr_offset_in_bytes(2), R15_esp); // Get array.
// Do fast instanceof cache test.
__ ld(Rarray_element_klass, in_bytes(ObjArrayKlass::element_klass_offset()), Rarray_klass);
// Generate a fast subtype check. Branch to store_ok if no failure. Throw if failure.
__ gen_subtype_check(Rvalue_klass /*subklass*/, Rarray_element_klass /*superklass*/, Rscratch, Rscratch2, Rscratch3, Lstore_ok);
// Store is OK.
__ bind(Lstore_ok);
do_oop_store(_masm, Rstore_addr, arrayOopDesc::base_offset_in_bytes(T_OBJECT), R17_tos /* value */,
Rscratch, Rscratch2, Rscratch3, IS_ARRAY | IS_NOT_NULL);
__ bind(Ldone); // Adjust sp (pops array, index and value).
__ addi(R15_esp, R15_esp, 3 * Interpreter::stackElementSize);
}
// Need to check whether array is boolean or byte // since both types share the bastore bytecode.
__ load_klass(Rscratch, Rarray);
__ lwz(Rscratch, in_bytes(Klass::layout_helper_offset()), Rscratch); int diffbit = exact_log2(Klass::layout_helper_boolean_diffbit());
__ testbitdi(CCR0, R0, Rscratch, diffbit);
Label L_skip;
__ bfalse(CCR0, L_skip);
__ andi(R17_tos, R17_tos, 1); // if it is a T_BOOLEAN array, mask the stored value to 0/1
__ bind(L_skip);
Register Ra = R11_scratch1,
Rb = R12_scratch2,
Rc = R3_ARG1;
// stack: ..., a, b, c
__ ld(Rc, Interpreter::stackElementSize, R15_esp); // load c
__ ld(Ra, Interpreter::stackElementSize * 3, R15_esp); // load a
__ std(Rc, Interpreter::stackElementSize * 3, R15_esp); // store c in a
__ ld(Rb, Interpreter::stackElementSize * 2, R15_esp); // load b // stack: ..., c, b, c
__ std(Ra, Interpreter::stackElementSize * 2, R15_esp); // store a in b // stack: ..., c, a, c
__ std(Rb, Interpreter::stackElementSize, R15_esp); // store b in c
__ push_ptr(Rc); // push c // stack: ..., c, a, b, c
}
Register Ra = R11_scratch1,
Rb = R12_scratch2,
Rc = R3_ARG1,
Rd = R4_ARG2; // stack: ..., a, b, c, d
__ ld(Rb, Interpreter::stackElementSize * 3, R15_esp);
__ ld(Rd, Interpreter::stackElementSize, R15_esp);
__ std(Rb, Interpreter::stackElementSize, R15_esp); // store b in d
__ std(Rd, Interpreter::stackElementSize * 3, R15_esp); // store d in b
__ ld(Ra, Interpreter::stackElementSize * 4, R15_esp);
__ ld(Rc, Interpreter::stackElementSize * 2, R15_esp);
__ std(Ra, Interpreter::stackElementSize * 2, R15_esp); // store a in c
__ std(Rc, Interpreter::stackElementSize * 4, R15_esp); // store c in a // stack: ..., c, d, a, b
__ push_2ptrs(Rc, Rd); // stack: ..., c, d, a, b, c, d
}
void TemplateTable::swap() {
transition(vtos, vtos); // stack: ..., a, b
Register Ra = R11_scratch1,
Rb = R12_scratch2; // stack: ..., a, b
__ ld(Rb, Interpreter::stackElementSize, R15_esp);
__ ld(Ra, Interpreter::stackElementSize * 2, R15_esp);
__ std(Rb, Interpreter::stackElementSize * 2, R15_esp);
__ std(Ra, Interpreter::stackElementSize, R15_esp); // stack: ..., b, a
}
void TemplateTable::convert() { // %%%%% Factor this first part across platforms #ifdef ASSERT
TosState tos_in = ilgl;
TosState tos_out = ilgl; switch (bytecode()) { case Bytecodes::_i2l: // fall through case Bytecodes::_i2f: // fall through case Bytecodes::_i2d: // fall through case Bytecodes::_i2b: // fall through case Bytecodes::_i2c: // fall through case Bytecodes::_i2s: tos_in = itos; break; case Bytecodes::_l2i: // fall through case Bytecodes::_l2f: // fall through case Bytecodes::_l2d: tos_in = ltos; break; case Bytecodes::_f2i: // fall through case Bytecodes::_f2l: // fall through case Bytecodes::_f2d: tos_in = ftos; break; case Bytecodes::_d2i: // fall through case Bytecodes::_d2l: // fall through case Bytecodes::_d2f: tos_in = dtos; break; default : ShouldNotReachHere();
} switch (bytecode()) { case Bytecodes::_l2i: // fall through case Bytecodes::_f2i: // fall through case Bytecodes::_d2i: // fall through case Bytecodes::_i2b: // fall through case Bytecodes::_i2c: // fall through case Bytecodes::_i2s: tos_out = itos; break; case Bytecodes::_i2l: // fall through case Bytecodes::_f2l: // fall through case Bytecodes::_d2l: tos_out = ltos; break; case Bytecodes::_i2f: // fall through case Bytecodes::_l2f: // fall through case Bytecodes::_d2f: tos_out = ftos; break; case Bytecodes::_i2d: // fall through case Bytecodes::_l2d: // fall through case Bytecodes::_f2d: tos_out = dtos; break; default : ShouldNotReachHere();
}
transition(tos_in, tos_out); #endif
case Bytecodes::_l2i: // Nothing to do, we'll continue to work with the lower bits. break;
case Bytecodes::_i2b:
__ extsb(R17_tos, R17_tos); break;
case Bytecodes::_i2c:
__ rldicl(R17_tos, R17_tos, 0, 64-2*8); break;
case Bytecodes::_i2s:
__ extsh(R17_tos, R17_tos); break;
case Bytecodes::_i2d:
__ extsw(R17_tos, R17_tos); case Bytecodes::_l2d:
__ move_l_to_d();
__ fcfid(F15_ftos, F15_ftos); break;
case Bytecodes::_i2f:
__ extsw(R17_tos, R17_tos);
__ move_l_to_d(); if (VM_Version::has_fcfids()) { // fcfids is >= Power7 only // Comment: alternatively, load with sign extend could be done by lfiwax.
__ fcfids(F15_ftos, F15_ftos);
} else {
__ fcfid(F15_ftos, F15_ftos);
__ frsp(F15_ftos, F15_ftos);
} break;
case Bytecodes::_l2f: if (VM_Version::has_fcfids()) { // fcfids is >= Power7 only
__ move_l_to_d();
__ fcfids(F15_ftos, F15_ftos);
} else { // Avoid rounding problem when result should be 0x3f800001: need fixup code before fcfid+frsp.
__ mr(R3_ARG1, R17_tos);
__ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::l2f));
__ fmr(F15_ftos, F1_RET);
} break;
case Bytecodes::_f2d: // empty break;
case Bytecodes::_d2f:
__ frsp(F15_ftos, F15_ftos); break;
case Bytecodes::_d2i: case Bytecodes::_f2i:
__ fcmpu(CCR0, F15_ftos, F15_ftos);
__ li(R17_tos, 0); // 0 in case of NAN
__ bso(CCR0, done);
__ fctiwz(F15_ftos, F15_ftos);
__ move_d_to_l(); break;
case Bytecodes::_d2l: case Bytecodes::_f2l:
__ fcmpu(CCR0, F15_ftos, F15_ftos);
__ li(R17_tos, 0); // 0 in case of NAN
__ bso(CCR0, done);
__ fctidz(F15_ftos, F15_ftos);
__ move_d_to_l(); break;
default: ShouldNotReachHere();
}
__ bind(done);
}
// Long compare void TemplateTable::lcmp() {
transition(ltos, itos);
constRegister Rscratch = R11_scratch1;
__ pop_l(Rscratch); // first operand, deeper in stack
__ cmpd(CCR0, Rscratch, R17_tos); // compare
__ set_cmp3(R17_tos); // set result as follows: <: -1, =: 0, >: 1
}
// fcmpl/fcmpg and dcmpl/dcmpg bytecodes // unordered_result == -1 => fcmpl or dcmpl // unordered_result == 1 => fcmpg or dcmpg void TemplateTable::float_cmp(bool is_float, int unordered_result) { const FloatRegister Rfirst = F0_SCRATCH,
Rsecond = F15_ftos; constRegister Rscratch = R11_scratch1;
// If no method data exists, go to profile_continue.
__ ld(Rmdo, in_bytes(Method::method_data_offset()), R19_method);
__ cmpdi(CCR0, Rmdo, 0);
__ beq(CCR0, Lno_mdo);
// Helper function for if_cmp* methods below. // Factored out common compare and branch code. void TemplateTable::if_cmp_common(Register Rfirst, Register Rsecond, Register Rscratch1, Register Rscratch2, Condition cc, bool is_jint, bool cmp0) {
Label Lnot_taken; // Note: The condition code we get is the condition under which we // *fall through*! So we have to inverse the CC here.
// Conition is false => Jump!
branch(false, false);
// Condition is not true => Continue.
__ align(32, 12);
__ bind(Lnot_taken);
__ profile_not_taken_branch(Rscratch1, Rscratch2);
}
// Compare integer values with zero and fall through if CC holds, branch away otherwise. void TemplateTable::if_0cmp(Condition cc) {
transition(itos, vtos);
// Table switch using binary search (value/offset pairs are ordered). // Bytecode stream format: // Bytecode (1) | 4-byte padding | default offset (4) | count (4) | value/offset pair1 (8) | value/offset pair2 (8) | ... // Note: Everything is big-endian format here. So on little endian machines, we have to revers offset and count and cmp value. void TemplateTable::fast_binaryswitch() {
transition(itos, vtos); // Implementation using the following core algorithm: (copied from Intel) // // int binary_search(int key, LookupswitchPair* array, int n) { // // Binary search according to "Methodik des Programmierens" by // // Edsger W. Dijkstra and W.H.J. Feijen, Addison Wesley Germany 1985. // int i = 0; // int j = n; // while (i+1 < j) { // // invariant P: 0 <= i < j <= n and (a[i] <= key < a[j] or Q) // // with Q: for all i: 0 <= i < n: key < a[i] // // where a stands for the array and assuming that the (inexisting) // // element a[n] is infinitely big. // int h = (i + j) >> 1; // // i < h < j // if (key < array[h].fast_match()) { // j = h; // } else { // i = h; // } // } // // R: a[i] <= key < a[i+1] or Q // // (i.e., if key is within array, i is the correct index) // return i; // }
// while (i+1 < j)
__ bind(entry);
__ addi(Rscratch, Ri, 1);
__ cmpw(CCR0, Rscratch, Rj);
__ add(Rh, Ri, Rj); // start h = i + j >> 1;
__ blt(CCR0, loop);
}
// End of binary search, result index is i (must check again!).
Label default_case;
Label continue_execution; if (ProfileInterpreter) {
__ mr(Rh, Ri); // Save index in i for profiling.
} // Ri = value offset
__ sldi(Ri, Ri, log_entry_size);
__ add(Ri, Ri, Rarray);
__ get_u4(Rscratch, Ri, 0, InterpreterMacroAssembler::Unsigned);
// Check if the method has the FINALIZER flag set and call into the VM to finalize in this case.
assert(state == vtos, "only valid state");
__ ld(R17_tos, 0, R18_locals);
// Load klass of this obj.
__ load_klass(Rklass, R17_tos);
__ lwz(Rklass_flags, in_bytes(Klass::access_flags_offset()), Rklass);
__ testbitdi(CCR0, R0, Rklass_flags, exact_log2(JVM_ACC_HAS_FINALIZER));
__ bfalse(CCR0, Lskip_register_finalizer);
// Move the result value into the correct register and remove memory stack frame.
__ remove_activation(state, /* throw_monitor_exception */ true); // Restoration of lr done by remove_activation. switch (state) { // Narrow result if state is itos but result type is smaller. // Need to narrow in the return bytecode rather than in generate_return_entry // since compiled code callers expect the result to already be narrowed. case itos: __ narrow(R17_tos); /* fall through */ case ltos: case atos: __ mr(R3_RET, R17_tos); break; case ftos: case dtos: __ fmr(F1_RET, F15_ftos); break; case vtos: // This might be a constructor. Final fields (and volatile fields on PPC64) need // to get visible before the reference to the object gets stored anywhere.
__ membar(Assembler::StoreStore); break; default : ShouldNotReachHere();
}
__ blr();
}
// ============================================================================ // Constant pool cache access // // Memory ordering: // // Like done in C++ interpreter, we load the fields // - _indices // - _f12_oop // acquired, because these are asked if the cache is already resolved. We don't // want to float loads above this check. // See also comments in ConstantPoolCacheEntry::bytecode_1(), // ConstantPoolCacheEntry::bytecode_2() and ConstantPoolCacheEntry::f1();
// Call into the VM if call site is not yet resolved // // Input regs: // - None, all passed regs are outputs. // // Returns: // - Rcache: The const pool cache entry that contains the resolved result. // - Rresult: Either noreg or output for f1/f2. // // Kills: // - Rscratch void TemplateTable::resolve_cache_and_index(int byte_no, Register Rcache, Register Rscratch, size_t index_size) {
// Load the constant pool cache entry at field accesses into registers. // The Rcache and Rindex registers must be set before call. // Input: // - Rcache, Rindex // Output: // - Robj, Roffset, Rflags // Kills: // - R11, R12 void TemplateTable::load_field_cp_cache_entry(Register Robj, Register Rcache, Register Rindex /* unused on PPC64 */, Register Roffset, Register Rflags, bool is_static) {
assert_different_registers(Rcache, Rflags, Roffset, R11_scratch1, R12_scratch2);
assert(Rindex == noreg, "parameter not used on PPC64");
ByteSize cp_base_offset = ConstantPoolCache::base_offset();
__ ld(Rflags, in_bytes(cp_base_offset) + in_bytes(ConstantPoolCacheEntry::flags_offset()), Rcache);
__ ld(Roffset, in_bytes(cp_base_offset) + in_bytes(ConstantPoolCacheEntry::f2_offset()), Rcache); if (is_static) {
__ ld(Robj, in_bytes(cp_base_offset) + in_bytes(ConstantPoolCacheEntry::f1_offset()), Rcache);
__ ld(Robj, in_bytes(Klass::java_mirror_offset()), Robj);
__ resolve_oop_handle(Robj, R11_scratch1, R12_scratch2, MacroAssembler::PRESERVATION_NONE); // Acquire not needed here. Following access has an address dependency on this value.
}
}
// Load the constant pool cache entry at invokes into registers. // Resolve if necessary.
// ============================================================================ // Field access
// Volatile variables demand their effects be made known to all CPU's // in order. Store buffers on most chips allow reads & writes to // reorder; the JMM's ReadAfterWrite.java test fails in -Xint mode // without some kind of memory barrier (i.e., it's not sufficient that // the interpreter does not reorder volatile references, the hardware // also must not reorder them). // // According to the new Java Memory Model (JMM): // (1) All volatiles are serialized wrt to each other. ALSO reads & // writes act as acquire & release, so: // (2) A read cannot let unrelated NON-volatile memory refs that // happen after the read float up to before the read. It's OK for // non-volatile memory refs that happen before the volatile read to // float down below it. // (3) Similar a volatile write cannot let unrelated NON-volatile // memory refs that happen BEFORE the write float down to after the // write. It's OK for non-volatile memory refs that happen after the // volatile write to float up before it. // // We only put in barriers around volatile refs (they are expensive), // not _between_ memory refs (that would require us to track the // flavor of the previous memory refs). Requirements (2) and (3) // require some barriers before volatile stores and after volatile // loads. These nearly cover requirement (1) but miss the // volatile-store-volatile-load case. This final case is placed after // volatile-stores although it could just as well go before // volatile-loads.
// The registers cache and index expected to be set before call. // Correct values of the cache and index registers are preserved. // Kills: // Rcache (if has_tos) // Rscratch void TemplateTable::jvmti_post_field_access(Register Rcache, Register Rscratch, bool is_static, bool has_tos) {
assert_different_registers(Rcache, Rscratch);
if (JvmtiExport::can_post_field_access()) {
ByteSize cp_base_offset = ConstantPoolCache::base_offset();
Label Lno_field_access_post;
// Check if post field access in enabled. int offs = __ load_const_optimized(Rscratch, JvmtiExport::get_field_access_count_addr(), R0, true);
__ lwz(Rscratch, offs, Rscratch);
// Post access enabled - do it!
__ addi(Rcache, Rcache, in_bytes(cp_base_offset)); if (is_static) {
__ li(R17_tos, 0);
} else { if (has_tos) { // The fast bytecode versions have obj ptr in register. // Thus, save object pointer before call_VM() clobbers it // put object on tos where GC wants it.
__ push_ptr(R17_tos);
} else { // Load top of stack (do not pop the value off the stack).
__ ld(R17_tos, Interpreter::expr_offset_in_bytes(0), R15_esp);
}
__ verify_oop(R17_tos);
} // tos: object pointer or NULL if static // cache: cache entry pointer
__ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_access), R17_tos, Rcache); if (!is_static && has_tos) { // Restore object pointer.
__ pop_ptr(R17_tos);
__ verify_oop(R17_tos);
} else { // Cache is still needed to get class or obj.
__ get_cache_and_index_at_bcp(Rcache, 1);
}
// The registers cache and index expected to be set before call. // The function may destroy various registers, just not the cache and index registers. void TemplateTable::jvmti_post_field_mod(Register Rcache, Register Rscratch, bool is_static) {
if (JvmtiExport::can_post_field_modification()) {
Label Lno_field_mod_post;
// Check if post field access in enabled. int offs = __ load_const_optimized(Rscratch, JvmtiExport::get_field_modification_count_addr(), R0, true);
__ lwz(Rscratch, offs, Rscratch);
// Do the post
ByteSize cp_base_offset = ConstantPoolCache::base_offset(); constRegister Robj = Rscratch;
__ addi(Rcache, Rcache, in_bytes(cp_base_offset)); if (is_static) { // Life is simple. Null out the object pointer.
__ li(Robj, 0);
} else { // In case of the fast versions, value lives in registers => put it back on tos. int offs = Interpreter::expr_offset_in_bytes(0); Register base = R15_esp; switch(bytecode()) { case Bytecodes::_fast_aputfield: __ push_ptr(); offs+= Interpreter::stackElementSize; break; case Bytecodes::_fast_iputfield: // Fall through case Bytecodes::_fast_bputfield: // Fall through case Bytecodes::_fast_zputfield: // Fall through case Bytecodes::_fast_cputfield: // Fall through case Bytecodes::_fast_sputfield: __ push_i(); offs+= Interpreter::stackElementSize; break; case Bytecodes::_fast_lputfield: __ push_l(); offs+=2*Interpreter::stackElementSize; break; case Bytecodes::_fast_fputfield: __ push_f(); offs+= Interpreter::stackElementSize; break; case Bytecodes::_fast_dputfield: __ push_d(); offs+=2*Interpreter::stackElementSize; break; default: {
offs = 0;
base = Robj; constRegister Rflags = Robj;
Label is_one_slot; // Life is harder. The stack holds the value on top, followed by the // object. We don't know the size of the value, though; it could be // one or two words depending on its type. As a result, we must find // the type to determine where the object is.
__ ld(Rflags, in_bytes(ConstantPoolCacheEntry::flags_offset()), Rcache); // Big Endian
__ rldicl(Rflags, Rflags, 64-ConstantPoolCacheEntry::tos_state_shift, 64-ConstantPoolCacheEntry::tos_state_bits);
// In case of the fast versions, value lives in registers => put it back on tos. switch(bytecode()) { case Bytecodes::_fast_aputfield: __ pop_ptr(); break; case Bytecodes::_fast_iputfield: // Fall through case Bytecodes::_fast_bputfield: // Fall through case Bytecodes::_fast_zputfield: // Fall through case Bytecodes::_fast_cputfield: // Fall through case Bytecodes::_fast_sputfield: __ pop_i(); break; case Bytecodes::_fast_lputfield: __ pop_l(); break; case Bytecodes::_fast_fputfield: __ pop_f(); break; case Bytecodes::_fast_dputfield: __ pop_d(); break; default: break; // Nothin' to do.
}
// On PPC64, we have a different jvmti_post_field_mod which does the job. void TemplateTable::jvmti_post_fast_field_mod() {
__ should_not_reach_here();
}
constRegister Rcache = R5_ARG3, // Do not use ARG1/2 (causes trouble in jvmti_post_field_mod).
Rclass_or_obj = R31, // Needs to survive C call.
Roffset = R22_tmp2, // Needs to survive C call.
Rflags = R3_ARG1,
Rscratch = R11_scratch1, // used by load_field_cp_cache_entry
Rscratch2 = R12_scratch2, // used by load_field_cp_cache_entry
Rscratch3 = R4_ARG2; const ConditionRegister CR_is_vol = CCR2; // Non-volatile condition register (survives runtime call in do_oop_store).
// Constant pool already resolved => Load flags and offset of field.
__ get_cache_and_index_at_bcp(Rcache, 1);
jvmti_post_field_mod(Rcache, Rscratch, false/* not static */);
load_field_cp_cache_entry(noreg, Rcache, noreg, Roffset, Rflags, false); // Uses R11, R12
// Get the obj and the final store addr.
pop_and_check_object(Rclass_or_obj); // Kills R11_scratch1.
// Do the store and fencing. switch(bytecode()) { case Bytecodes::_fast_aputfield: // Store into the field.
do_oop_store(_masm, Rclass_or_obj, Roffset, R17_tos, Rscratch, Rscratch2, Rscratch3, IN_HEAP); break;
case Bytecodes::_fast_iputfield:
__ stwx(R17_tos, Rclass_or_obj, Roffset); break;
case Bytecodes::_fast_lputfield:
__ stdx(R17_tos, Rclass_or_obj, Roffset); break;
case Bytecodes::_fast_zputfield:
__ andi(R17_tos, R17_tos, 0x1); // boolean is true if LSB is 1 // fall through to bputfield case Bytecodes::_fast_bputfield:
__ stbx(R17_tos, Rclass_or_obj, Roffset); break;
case Bytecodes::_fast_cputfield: case Bytecodes::_fast_sputfield:
__ sthx(R17_tos, Rclass_or_obj, Roffset); break;
case Bytecodes::_fast_fputfield:
__ stfsx(F15_ftos, Rclass_or_obj, Roffset); break;
case Bytecodes::_fast_dputfield:
__ stfdx(F15_ftos, Rclass_or_obj, Roffset); break;
// Maybe push "appendix" to arguments. if (is_invokedynamic || is_invokehandle) {
Label Ldone; Register reference = Rscratch1;
__ rldicl_(R0, Rflags, 64-ConstantPoolCacheEntry::has_appendix_shift, 63);
__ beq(CCR0, Ldone); // Push "appendix" (MethodType, CallSite, etc.). // This must be done before we get the receiver, // since the parameter_size includes it.
__ load_resolved_reference_at_index(reference, Rindex, /* temp */ Rret_addr, Rscratch2);
__ verify_oop(reference);
__ push_ptr(reference);
__ bind(Ldone);
}
// Load receiver if needed (after appendix is pushed so parameter size is correct). if (load_receiver) { Register Rparam_count = Rscratch1;
__ andi(Rparam_count, Rflags, ConstantPoolCacheEntry::parameter_size_mask);
__ load_receiver(Rparam_count, Rrecv);
__ verify_oop(Rrecv);
}
// Get return type. It's coded into the upper 4 bits of the lower half of the 64 bit value.
__ rldicl(Rret_type, Rflags, 64-ConstantPoolCacheEntry::tos_state_shift, 64-ConstantPoolCacheEntry::tos_state_bits);
__ load_dispatch_table(Rtable_addr, (address*)table_addr);
__ sldi(Rret_type, Rret_type, LogBytesPerWord); // Get return address.
__ ldx(Rret_addr, Rtable_addr, Rret_type);
}
}
// Helper for virtual calls. Load target out of vtable and jump off! // Kills all passed registers. void TemplateTable::generate_vtable_call(Register Rrecv_klass, Register Rindex, Register Rret, Register Rtemp) {
// Get target method & entry point. constint base = in_bytes(Klass::vtable_start_offset()); // Calc vtable addr scale the vtable index by 8.
__ sldi(Rindex, Rindex, exact_log2(vtableEntry::size_in_bytes())); // Load target.
__ addi(Rrecv_klass, Rrecv_klass, base + vtableEntry::method_offset_in_bytes());
__ ldx(Rtarget_method, Rindex, Rrecv_klass); // Argument and return type profiling.
__ profile_arguments_type(Rtarget_method, Rrecv_klass /* scratch1 */, Rtemp /* scratch2 */, true);
__ call_from_interpreter(Rtarget_method, Rret, Rrecv_klass /* scratch1 */, Rtemp /* scratch2 */);
}
// Virtual or final call. Final calls are rewritten on the fly to run through "fast_finalcall" next time. void TemplateTable::invokevirtual(int byte_no) {
transition(vtos, vtos);
Register Rtable_addr = R11_scratch1,
Rret_type = R12_scratch2,
Rret_addr = R5_ARG3,
Rflags = R22_tmp2, // Should survive C call.
Rrecv = R3_ARG1,
Rrecv_klass = Rrecv,
Rvtableindex_or_method = R31, // Should survive C call.
Rnum_params = R4_ARG2,
Rnew_bc = R6_ARG4;
Register Rscratch = Rflags; // Rflags is dead now.
// Final call case.
__ profile_final_call(Rtemp1, Rscratch); // Argument and return type profiling.
__ profile_arguments_type(Rmethod, Rscratch, Rrecv_klass /* scratch */, true); // Do the final call - the index (f2) contains the method.
__ call_from_interpreter(Rmethod, Rret, Rscratch, Rrecv_klass /* scratch */);
// First check for Object case, then private interface method, // then regular interface method.
// Get receiver klass - this is also a null check
__ null_check_throw(Rreceiver, oopDesc::klass_offset_in_bytes(), Rscratch2);
__ load_klass(Rrecv_klass, Rreceiver);
// Check corner case object method. // Special case of invokeinterface called for virtual method of // java.lang.Object. See ConstantPoolCacheEntry::set_method() for details: // The invokeinterface was rewritten to a invokevirtual, hence we have // to handle this corner case.
__ check_klass_subtype(Rrecv_klass, Rinterface_klass, Rscratch1, Rscratch2, L_subtype); // If we get here the typecheck failed
__ b(L_no_such_interface);
__ bind(L_subtype);
// do the call
Register Rscratch = Rflags; // Rflags is dead now.
// Profile this call.
__ profile_call(Rscratch1, Rscratch2);
// Off we go. With the new method handles, we don't jump to a method handle // entry any more. Instead, we pushed an "appendix" in prepare invoke, which happens // to be the callsite object the bootstrap method returned. This is passed to a // "link" method which does the dispatch (Most likely just grabs the MH stored // inside the callsite and does an invokehandle). // Argument and return type profiling.
__ profile_arguments_type(Rmethod, Rscratch1, Rscratch2, false);
__ call_from_interpreter(Rmethod, Rret_addr, Rscratch1 /* scratch1 */, Rscratch2 /* scratch2 */);
}
// -------------------------------------------------------------------------- // Check if fast case is possible.
// Load pointers to const pool and const pool's tags array.
__ get_cpool_and_tags(Rcpool, Rtags); // Load index of constant pool entry.
__ get_2_byte_integer_at_bcp(1, Rindex, InterpreterMacroAssembler::Unsigned);
// Note: compared to other architectures, PPC's implementation always goes // to the slow path if TLAB is used and fails. if (UseTLAB) { // Make sure the class we're about to instantiate has been resolved // This is done before loading instanceKlass to be consistent with the order // how Constant Pool is updated (see ConstantPoolCache::klass_at_put).
__ addi(Rtags, Rtags, Array<u1>::base_offset_in_bytes());
__ lbzx(Rtags, Rindex, Rtags);
// Make sure klass is fully initialized and get instance_size.
__ lbz(Rscratch, in_bytes(InstanceKlass::init_state_offset()), RinstanceKlass);
__ lwz(Rinstance_size, in_bytes(Klass::layout_helper_offset()), RinstanceKlass);
__ cmpdi(CCR1, Rscratch, InstanceKlass::fully_initialized); // Make sure klass does not have has_finalizer, or is abstract, or interface or java/lang/Class.
__ andi_(R0, Rinstance_size, Klass::_lh_instance_slow_path_bit); // slow path bit equals 0?
__ crnand(CCR0, Assembler::equal, CCR1, Assembler::equal); // slow path bit set or not fully initialized?
__ beq(CCR0, Lslow_case);
// -------------------------------------------------------------------------- // Fast case: // Allocate the instance. // 1) Try to allocate in the TLAB. // 2) If the above fails (or is not applicable), go to a slow case (creates a new TLAB, etc.).
Register RoldTopValue = RallocatedObject; // Object will be allocated here if it fits. Register RnewTopValue = R6_ARG4; Register RendValue = R7_ARG5;
// Check if we can allocate in the TLAB.
__ ld(RoldTopValue, in_bytes(JavaThread::tlab_top_offset()), R16_thread);
__ ld(RendValue, in_bytes(JavaThread::tlab_end_offset()), R16_thread);
// Must prevent reordering of stores for object initialization with stores that publish the new object.
__ membar(Assembler::StoreStore);
}
// Allocate a multi dimensional array void TemplateTable::multianewarray() {
transition(vtos, atos);
Register Rptr = R31; // Needs to survive C call.
// Put ndims * wordSize into frame temp slot
__ lbz(Rptr, 3, R14_bcp);
__ sldi(Rptr, Rptr, Interpreter::logStackElementSize); // Esp points past last_dim, so set to R4 to first_dim address.
__ add(R4, Rptr, R15_esp);
call_VM(R17_tos, CAST_FROM_FN_PTR(address, InterpreterRuntime::multianewarray), R4 /* first_size_address */); // Pop all dimensions off the stack.
__ add(R15_esp, Rptr, R15_esp);
// Must prevent reordering of stores for object initialization with stores that publish the new object.
__ membar(Assembler::StoreStore);
}
// Call into the VM to "quicken" instanceof.
__ push_ptr(); // for GC
call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
__ get_vm_result_2(RspecifiedKlass);
__ pop_ptr(); // Restore receiver.
__ b(Lresolved);
// Extract target class from constant pool.
__ bind(Lquicked);
__ sldi(Roffset, Roffset, LogBytesPerWord);
__ load_resolved_klass_at_offset(Rcpool, Roffset, RspecifiedKlass);
// Do the checkcast.
__ bind(Lresolved); // Get value klass in RobjKlass.
__ load_klass(RobjKlass, R17_tos); // Generate a fast subtype check. Branch to cast_ok if no failure. Return 0 if failure.
__ gen_subtype_check(RobjKlass, RspecifiedKlass, /*3 temp regs*/ Roffset, Rcpool, Rtags, /*target if subtype*/ Ldone);
// Not a subtype; so must throw exception // Target class oop is in register R6_ARG4 == RspecifiedKlass by convention.
__ load_dispatch_table(R11_scratch1, (address*)Interpreter::_throw_ClassCastException_entry);
__ mtctr(R11_scratch1);
__ bctr();
// Profile the null case.
__ align(32, 12);
__ bind(Lis_null);
__ profile_null_seen(R11_scratch1, Rtags); // Rtags used as scratch.
__ align(32, 12);
__ bind(Ldone);
}
// Output: // - tos == 0: Obj was null or not an instance of class. // - tos == 1: Obj was an instance of class. void TemplateTable::instanceof() {
transition(atos, itos);
// Call into the VM to "quicken" instanceof.
__ push_ptr(); // for GC
call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
__ get_vm_result_2(RspecifiedKlass);
__ pop_ptr(); // Restore receiver.
__ b(Lresolved);
// Extract target class from constant pool.
__ bind(Lquicked);
__ sldi(Roffset, Roffset, LogBytesPerWord);
__ load_resolved_klass_at_offset(Rcpool, Roffset, RspecifiedKlass);
// Do the checkcast.
__ bind(Lresolved); // Get value klass in RobjKlass.
__ load_klass(RobjKlass, R17_tos); // Generate a fast subtype check. Branch to cast_ok if no failure. Return 0 if failure.
__ li(R17_tos, 1);
__ gen_subtype_check(RobjKlass, RspecifiedKlass, /*3 temp regs*/ Roffset, Rcpool, Rtags, /*target if subtype*/ Ldone);
__ li(R17_tos, 0);
if (ProfileInterpreter) {
__ b(Ldone);
}
// Profile the null case.
__ align(32, 12);
__ bind(Lis_null);
__ profile_null_seen(Rcpool, Rtags); // Rcpool and Rtags used as scratch.
// Exception oop is in tos
__ verify_oop(R17_tos);
__ null_check_throw(R17_tos, -1, R11_scratch1);
// Throw exception interpreter entry expects exception oop to be in R3.
__ mr(R3_RET, R17_tos);
__ load_dispatch_table(R11_scratch1, (address*)Interpreter::throw_exception_entry());
__ mtctr(R11_scratch1);
__ bctr();
}
// ============================================================================= // Synchronization // Searches the basic object lock list on the stack for a free slot // and uses it to lock the object in tos. // // Recursive locking is enabled by exiting the search if the same // object is already found in the list. Thus, a new basic lock obj lock // is allocated "higher up" in the stack and thus is found first // at next monitor exit. void TemplateTable::monitorenter() {
transition(atos, vtos);
// Check if any slot is present => short cut to allocation if not.
__ cmpld(reached_limit, Rcurrent_obj_addr, Rlimit);
__ bgt(reached_limit, Lallocate_new);
// Pre-load topmost slot.
__ ld(Rcurrent_obj, 0, Rcurrent_obj_addr);
__ addi(Rcurrent_obj_addr, Rcurrent_obj_addr, frame::interpreter_frame_monitor_size() * wordSize); // The search loop.
__ bind(Lloop); // Found free slot?
__ cmpdi(found_free_slot, Rcurrent_obj, 0); // Is this entry for same obj? If so, stop the search and take the found // free slot or allocate a new one to enable recursive locking.
__ cmpd(found_same_obj, Rcurrent_obj, Robj_to_lock);
__ cmpld(reached_limit, Rcurrent_obj_addr, Rlimit);
__ beq(found_free_slot, Lexit);
__ beq(found_same_obj, Lallocate_new);
__ bgt(reached_limit, Lallocate_new); // Check if last allocated BasicLockObj reached.
__ ld(Rcurrent_obj, 0, Rcurrent_obj_addr);
__ addi(Rcurrent_obj_addr, Rcurrent_obj_addr, frame::interpreter_frame_monitor_size() * wordSize); // Next iteration if unchecked BasicObjectLocks exist on the stack.
__ b(Lloop);
}
// ------------------------------------------------------------------------------ // Check if we found a free slot.
__ bind(Lexit);
// ------------------------------------------------------------------------------ // We now have a slot to lock.
__ bind(Lfound);
// Increment bcp to point to the next bytecode, so exception handling for async. exceptions work correctly. // The object has already been popped from the stack, so the expression stack looks correct.
__ addi(R14_bcp, R14_bcp, 1);
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.