/*
* Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2022 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#include "precompiled.hpp"
#include "asm/macroAssembler.inline.hpp"
#include "compiler/disassembler.hpp"
#include "gc/shared/collectedHeap.inline.hpp"
#include "gc/shared/barrierSet.hpp"
#include "gc/shared/barrierSetAssembler.hpp"
#include "interpreter/interpreter.hpp"
#include "memory/resourceArea.hpp"
#include "nativeInst_ppc.hpp"
#include "oops/klass.inline.hpp"
#include "oops/methodData.hpp"
#include "prims/methodHandles.hpp"
#include "runtime/icache.hpp"
#include "runtime/interfaceSupport.inline.hpp"
#include "runtime/objectMonitor.hpp"
#include "runtime/os.hpp"
#include "runtime/safepoint.hpp"
#include "runtime/safepointMechanism.hpp"
#include "runtime/sharedRuntime.hpp"
#include "runtime/stubRoutines.hpp"
#include "runtime/vm_version.hpp"
#include "utilities/macros.hpp"
#include "utilities/powerOfTwo.hpp"
#ifdef PRODUCT
#define BLOCK_COMMENT(str) // nothing
#else
#define BLOCK_COMMENT(str) block_comment(str)
#endif
#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
#ifdef ASSERT
// On RISC, there's no benefit to verifying instruction boundaries.
bool AbstractAssembler::pd_check_instruction_mark() { return false; }
#endif
void MacroAssembler::ld_largeoffset_unchecked(Register d, int si31, Register a, int emit_filler_nop) {
assert(Assembler::is_simm(si31, 31) && si31 >= 0, "si31 out of range");
if (Assembler::is_simm(si31, 16)) {
ld(d, si31, a);
if (emit_filler_nop) nop();
} else {
const int hi = MacroAssembler::largeoffset_si16_si16_hi(si31);
const int lo = MacroAssembler::largeoffset_si16_si16_lo(si31);
addis(d, a, hi);
ld(d, lo, d);
}
}
void MacroAssembler::ld_largeoffset(Register d, int si31, Register a, int emit_filler_nop) {
assert_different_registers(d, a);
ld_largeoffset_unchecked(d, si31, a, emit_filler_nop);
}
void MacroAssembler::load_sized_value(Register dst, RegisterOrConstant offs, Register base,
size_t size_in_bytes, bool is_signed) {
switch (size_in_bytes) {
case 8: ld(dst, offs, base); break;
case 4: is_signed ? lwa(dst, offs, base) : lwz(dst, offs, base); break;
case 2: is_signed ? lha(dst, offs, base) : lhz(dst, offs, base); break;
case 1: lbz(dst, offs, base); if (is_signed) extsb(dst, dst); break; // lba doesn't exist :(
default: ShouldNotReachHere();
}
}
void MacroAssembler::store_sized_value(Register dst, RegisterOrConstant offs, Register base,
size_t size_in_bytes) {
switch (size_in_bytes) {
case 8: std(dst, offs, base); break;
case 4: stw(dst, offs, base); break;
case 2: sth(dst, offs, base); break;
case 1: stb(dst, offs, base); break;
default: ShouldNotReachHere();
}
}
void MacroAssembler::align(int modulus, int max, int rem) {
int padding = (rem + modulus - (offset() % modulus)) % modulus;
if (padding > max) return;
for (int c = (padding >> 2); c > 0; --c) { nop(); }
}
void MacroAssembler::align_prefix() {
if (is_aligned(offset() + BytesPerInstWord, 64)) { nop(); }
}
// Issue instructions that calculate given TOC from global TOC.
void MacroAssembler::calculate_address_from_global_toc(Register dst, address addr, bool hi16, bool lo16,
bool add_relocation, bool emit_dummy_addr) {
int offset = -1;
if (emit_dummy_addr) {
offset = -128; // dummy address
} else if (addr != (address)(intptr_t)-1) {
offset = MacroAssembler::offset_to_global_toc(addr);
}
if (hi16) {
addis(dst, R29_TOC, MacroAssembler::largeoffset_si16_si16_hi(offset));
}
if (lo16) {
if (add_relocation) {
// Relocate at the addi to avoid confusion with a load from the method's TOC.
relocate(internal_word_Relocation::spec(addr));
}
addi(dst, dst, MacroAssembler::largeoffset_si16_si16_lo(offset));
}
}
address MacroAssembler::patch_calculate_address_from_global_toc_at(address a, address bound, address addr) {
const int offset = MacroAssembler::offset_to_global_toc(addr);
const address inst2_addr = a;
const int inst2 = *(int *)inst2_addr;
// The relocation points to the second instruction, the addi,
// and the addi reads and writes the same register dst.
const int dst = inv_rt_field(inst2);
assert(is_addi(inst2) && inv_ra_field(inst2) == dst, "must be addi reading and writing dst");
// Now, find the preceding addis which writes to dst.
int inst1 = 0;
address inst1_addr = inst2_addr - BytesPerInstWord;
while (inst1_addr >= bound) {
inst1 = *(int *) inst1_addr;
if (is_addis(inst1) && inv_rt_field(inst1) == dst) {
// Stop, found the addis which writes dst.
break;
}
inst1_addr -= BytesPerInstWord;
}
assert(is_addis(inst1) && inv_ra_field(inst1) == 29 /* R29 */, "source must be global TOC");
set_imm((int *)inst1_addr, MacroAssembler::largeoffset_si16_si16_hi(offset));
set_imm((int *)inst2_addr, MacroAssembler::largeoffset_si16_si16_lo(offset));
return inst1_addr;
}
address MacroAssembler::get_address_of_calculate_address_from_global_toc_at(address a, address bound) {
const address inst2_addr = a;
const int inst2 = *(int *)inst2_addr;
// The relocation points to the second instruction, the addi,
// and the addi reads and writes the same register dst.
const int dst = inv_rt_field(inst2);
assert(is_addi(inst2) && inv_ra_field(inst2) == dst, "must be addi reading and writing dst");
// Now, find the preceding addis which writes to dst.
int inst1 = 0;
address inst1_addr = inst2_addr - BytesPerInstWord;
while (inst1_addr >= bound) {
inst1 = *(int *) inst1_addr;
if (is_addis(inst1) && inv_rt_field(inst1) == dst) {
// stop, found the addis which writes dst
break;
}
inst1_addr -= BytesPerInstWord;
}
assert(is_addis(inst1) && inv_ra_field(inst1) == 29 /* R29 */, "source must be global TOC");
int offset = (get_imm(inst1_addr, 0) << 16) + get_imm(inst2_addr, 0);
// -1 is a special case
if (offset == -1) {
return (address)(intptr_t)-1;
} else {
return global_toc() + offset;
}
}
#ifdef _LP64
// Patch compressed oops or klass constants.
// Assembler sequence is
// 1) compressed oops:
// lis rx = const.hi
// ori rx = rx | const.lo
// 2) compressed klass:
// lis rx = const.hi
// clrldi rx = rx & 0xFFFFffff // clearMS32b, optional
// ori rx = rx | const.lo
// Clrldi will be passed by.
address MacroAssembler::patch_set_narrow_oop(address a, address bound, narrowOop data) {
assert(UseCompressedOops, "Should only patch compressed oops");
const address inst2_addr = a;
const int inst2 = *(int *)inst2_addr;
// The relocation points to the second instruction, the ori,
// and the ori reads and writes the same register dst.
const int dst = inv_rta_field(inst2);
assert(is_ori(inst2) && inv_rs_field(inst2) == dst, "must be ori reading and writing dst");
// Now, find the preceding addis which writes to dst.
int inst1 = 0;
address inst1_addr = inst2_addr - BytesPerInstWord;
bool inst1_found = false;
while (inst1_addr >= bound) {
inst1 = *(int *)inst1_addr;
if (is_lis(inst1) && inv_rs_field(inst1) == dst) { inst1_found = true; break; }
inst1_addr -= BytesPerInstWord;
}
assert(inst1_found, "inst is not lis");
uint32_t data_value = CompressedOops::narrow_oop_value(data);
int xc = (data_value >> 16) & 0xffff;
int xd = (data_value >> 0) & 0xffff;
set_imm((int *)inst1_addr, (short)(xc)); // see enc_load_con_narrow_hi/_lo
set_imm((int *)inst2_addr, (xd)); // unsigned int
return inst1_addr;
}
// Get compressed oop constant.
narrowOop MacroAssembler::get_narrow_oop(address a, address bound) {
assert(UseCompressedOops, "Should only patch compressed oops");
const address inst2_addr = a;
const int inst2 = *(int *)inst2_addr;
// The relocation points to the second instruction, the ori,
// and the ori reads and writes the same register dst.
const int dst = inv_rta_field(inst2);
assert(is_ori(inst2) && inv_rs_field(inst2) == dst, "must be ori reading and writing dst");
// Now, find the preceding lis which writes to dst.
int inst1 = 0;
address inst1_addr = inst2_addr - BytesPerInstWord;
bool inst1_found = false;
while (inst1_addr >= bound) {
inst1 = *(int *) inst1_addr;
if (is_lis(inst1) && inv_rs_field(inst1) == dst) { inst1_found = true; break;}
inst1_addr -= BytesPerInstWord;
}
assert(inst1_found, "inst is not lis");
uint xl = ((unsigned int) (get_imm(inst2_addr, 0) & 0xffff));
uint xh = (((get_imm(inst1_addr, 0)) & 0xffff) << 16);
return CompressedOops::narrow_oop_cast(xl | xh);
}
#endif // _LP64
// Returns true if successful.
bool MacroAssembler::load_const_from_method_toc(Register dst, AddressLiteral& a,
Register toc, bool fixed_size) {
int toc_offset = 0;
// Use RelocationHolder::none for the constant pool entry, otherwise
// we will end up with a failing NativeCall::verify(x) where x is
// the address of the constant pool entry.
// FIXME: We should insert relocation information for oops at the constant
// pool entries instead of inserting it at the loads; patching of a constant
// pool entry should be less expensive.
address const_address = address_constant((address)a.value(), RelocationHolder::none);
if (const_address == NULL) { return false; } // allocation failure
// Relocate at the pc of the load.
relocate(a.rspec());
toc_offset = (int)(const_address - code()->consts()->start());
ld_largeoffset_unchecked(dst, toc_offset, toc, fixed_size);
return true;
}
bool MacroAssembler::is_load_const_from_method_toc_at(address a) {
const address inst1_addr = a;
const int inst1 = *(int *)inst1_addr;
// The relocation points to the ld or the addis.
return (is_ld(inst1)) ||
(is_addis(inst1) && inv_ra_field(inst1) != 0);
}
int MacroAssembler::get_offset_of_load_const_from_method_toc_at(address a) {
assert(is_load_const_from_method_toc_at(a), "must be load_const_from_method_toc");
const address inst1_addr = a;
const int inst1 = *(int *)inst1_addr;
if (is_ld(inst1)) {
return inv_d1_field(inst1);
} else if (is_addis(inst1)) {
const int dst = inv_rt_field(inst1);
// Now, find the succeeding ld which reads and writes to dst.
address inst2_addr = inst1_addr + BytesPerInstWord;
int inst2 = 0;
while (true) {
inst2 = *(int *) inst2_addr;
if (is_ld(inst2) && inv_ra_field(inst2) == dst && inv_rt_field(inst2) == dst) {
// Stop, found the ld which reads and writes dst.
break;
}
inst2_addr += BytesPerInstWord;
}
return (inv_d1_field(inst1) << 16) + inv_d1_field(inst2);
}
ShouldNotReachHere();
return 0;
}
// Get the constant from a `load_const' sequence.
long MacroAssembler::get_const(address a) {
assert(is_load_const_at(a), "not a load of a constant");
const int *p = (const int*) a;
unsigned long x = (((unsigned long) (get_imm(a,0) & 0xffff)) << 48);
if (is_ori(*(p+1))) {
x |= (((unsigned long) (get_imm(a,1) & 0xffff)) << 32);
x |= (((unsigned long) (get_imm(a,3) & 0xffff)) << 16);
x |= (((unsigned long) (get_imm(a,4) & 0xffff)));
} else if (is_lis(*(p+1))) {
x |= (((unsigned long) (get_imm(a,2) & 0xffff)) << 32);
x |= (((unsigned long) (get_imm(a,1) & 0xffff)) << 16);
x |= (((unsigned long) (get_imm(a,3) & 0xffff)));
} else {
ShouldNotReachHere();
return (long) 0;
}
return (long) x;
}
// Patch the 64 bit constant of a `load_const' sequence. This is a low
// level procedure. It neither flushes the instruction cache nor is it
// mt safe.
void MacroAssembler::patch_const(address a, long x) {
assert(is_load_const_at(a), "not a load of a constant");
int *p = (int*) a;
if (is_ori(*(p+1))) {
set_imm(0 + p, (x >> 48) & 0xffff);
set_imm(1 + p, (x >> 32) & 0xffff);
set_imm(3 + p, (x >> 16) & 0xffff);
set_imm(4 + p, x & 0xffff);
} else if (is_lis(*(p+1))) {
set_imm(0 + p, (x >> 48) & 0xffff);
set_imm(2 + p, (x >> 32) & 0xffff);
set_imm(1 + p, (x >> 16) & 0xffff);
set_imm(3 + p, x & 0xffff);
} else {
ShouldNotReachHere();
}
}
AddressLiteral MacroAssembler::allocate_metadata_address(Metadata* obj) {
assert(oop_recorder() != NULL, "this assembler needs a Recorder");
int index = oop_recorder()->allocate_metadata_index(obj);
RelocationHolder rspec = metadata_Relocation::spec(index);
return AddressLiteral((address)obj, rspec);
}
AddressLiteral MacroAssembler::constant_metadata_address(Metadata* obj) {
assert(oop_recorder() != NULL, "this assembler needs a Recorder");
int index = oop_recorder()->find_index(obj);
RelocationHolder rspec = metadata_Relocation::spec(index);
return AddressLiteral((address)obj, rspec);
}
AddressLiteral MacroAssembler::allocate_oop_address(jobject obj) {
assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
int oop_index = oop_recorder()->allocate_oop_index(obj);
return AddressLiteral(address(obj), oop_Relocation::spec(oop_index));
}
AddressLiteral MacroAssembler::constant_oop_address(jobject obj) {
assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
int oop_index = oop_recorder()->find_index(obj);
return AddressLiteral(address(obj), oop_Relocation::spec(oop_index));
}
#ifndef PRODUCT
void MacroAssembler::pd_print_patched_instruction(address branch) {
Unimplemented(); // TODO: PPC port
}
#endif // ndef PRODUCT
// Conditional far branch for destinations encodable in 24+2 bits.
void MacroAssembler::bc_far(int boint, int biint, Label& dest, int optimize) {
// If requested by flag optimize, relocate the bc_far as a
// runtime_call and prepare for optimizing it when the code gets
// relocated.
if (optimize == bc_far_optimize_on_relocate) {
relocate(relocInfo::runtime_call_type);
}
// variant 2:
//
// b!cxx SKIP
// bxx DEST
// SKIP:
//
const int opposite_boint = add_bhint_to_boint(opposite_bhint(inv_boint_bhint(boint)),
opposite_bcond(inv_boint_bcond(boint)));
// We emit two branches.
// First, a conditional branch which jumps around the far branch.
const address not_taken_pc = pc() + 2 * BytesPerInstWord;
const address bc_pc = pc();
bc(opposite_boint, biint, not_taken_pc);
const int bc_instr = *(int*)bc_pc;
assert(not_taken_pc == (address)inv_bd_field(bc_instr, (intptr_t)bc_pc), "postcondition");
assert(opposite_boint == inv_bo_field(bc_instr), "postcondition");
assert(boint == add_bhint_to_boint(opposite_bhint(inv_boint_bhint(inv_bo_field(bc_instr))),
opposite_bcond(inv_boint_bcond(inv_bo_field(bc_instr)))),
"postcondition");
assert(biint == inv_bi_field(bc_instr), "postcondition");
// Second, an unconditional far branch which jumps to dest.
// Note: target(dest) remembers the current pc (see CodeSection::target)
// and returns the current pc if the label is not bound yet; when
// the label gets bound, the unconditional far branch will be patched.
const address target_pc = target(dest);
const address b_pc = pc();
b(target_pc);
assert(not_taken_pc == pc(), "postcondition");
assert(dest.is_bound() || target_pc == b_pc, "postcondition");
}
// 1 or 2 instructions
void MacroAssembler::bc_far_optimized(int boint, int biint, Label& dest) {
if (dest.is_bound() && is_within_range_of_bcxx(target(dest), pc())) {
bc(boint, biint, dest);
} else {
bc_far(boint, biint, dest, MacroAssembler::bc_far_optimize_on_relocate);
}
}
bool MacroAssembler::is_bc_far_at(address instruction_addr) {
return is_bc_far_variant1_at(instruction_addr) ||
is_bc_far_variant2_at(instruction_addr) ||
is_bc_far_variant3_at(instruction_addr);
}
address MacroAssembler::get_dest_of_bc_far_at(address instruction_addr) {
if (is_bc_far_variant1_at(instruction_addr)) {
const address instruction_1_addr = instruction_addr;
const int instruction_1 = *(int*)instruction_1_addr;
return (address)inv_bd_field(instruction_1, (intptr_t)instruction_1_addr);
} else if (is_bc_far_variant2_at(instruction_addr)) {
const address instruction_2_addr = instruction_addr + 4;
return bxx_destination(instruction_2_addr);
} else if (is_bc_far_variant3_at(instruction_addr)) {
return instruction_addr + 8;
}
// variant 4 ???
ShouldNotReachHere();
return NULL;
}
void MacroAssembler::set_dest_of_bc_far_at(address instruction_addr, address dest) {
if (is_bc_far_variant3_at(instruction_addr)) {
// variant 3, far cond branch to the next instruction, already patched to nops:
//
// nop
// endgroup
// SKIP/DEST:
//
return;
}
// first, extract boint and biint from the current branch
int boint = 0;
int biint = 0;
ResourceMark rm;
const int code_size = 2 * BytesPerInstWord;
CodeBuffer buf(instruction_addr, code_size);
MacroAssembler masm(&buf);
if (is_bc_far_variant2_at(instruction_addr) && dest == instruction_addr + 8) {
// Far branch to next instruction: Optimize it by patching nops (produce variant 3).
masm.nop();
masm.endgroup();
} else {
if (is_bc_far_variant1_at(instruction_addr)) {
// variant 1, the 1st instruction contains the destination address:
//
// bcxx DEST
// nop
//
const int instruction_1 = *(int*)(instruction_addr);
boint = inv_bo_field(instruction_1);
biint = inv_bi_field(instruction_1);
} else if (is_bc_far_variant2_at(instruction_addr)) {
// variant 2, the 2nd instruction contains the destination address:
//
// b!cxx SKIP
// bxx DEST
// SKIP:
//
const int instruction_1 = *(int*)(instruction_addr);
boint = add_bhint_to_boint(opposite_bhint(inv_boint_bhint(inv_bo_field(instruction_1))),
opposite_bcond(inv_boint_bcond(inv_bo_field(instruction_1))));
biint = inv_bi_field(instruction_1);
} else {
// variant 4???
ShouldNotReachHere();
}
// second, set the new branch destination and optimize the code
if (dest != instruction_addr + 4 && // the bc_far is still unbound!
masm.is_within_range_of_bcxx(dest, instruction_addr)) {
// variant 1:
//
// bcxx DEST
// nop
//
masm.bc(boint, biint, dest);
masm.nop();
} else {
// variant 2:
//
// b!cxx SKIP
// bxx DEST
// SKIP:
//
const int opposite_boint = add_bhint_to_boint(opposite_bhint(inv_boint_bhint(boint)),
opposite_bcond(inv_boint_bcond(boint)));
const address not_taken_pc = masm.pc() + 2 * BytesPerInstWord;
masm.bc(opposite_boint, biint, not_taken_pc);
masm.b(dest);
}
}
ICache::ppc64_flush_icache_bytes(instruction_addr, code_size);
}
// Emit a NOT mt-safe patchable 64 bit absolute call/jump.
void MacroAssembler::bxx64_patchable(address dest, relocInfo::relocType rt, bool link) {
// get current pc
uint64_t start_pc = (uint64_t) pc();
const address pc_of_bl = (address) (start_pc + (6*BytesPerInstWord)); // bl is last
const address pc_of_b = (address) (start_pc + (0*BytesPerInstWord)); // b is first
// relocate here
if (rt != relocInfo::none) {
relocate(rt);
}
if ( ReoptimizeCallSequences &&
(( link && is_within_range_of_b(dest, pc_of_bl)) ||
(!link && is_within_range_of_b(dest, pc_of_b)))) {
// variant 2:
// Emit an optimized, pc-relative call/jump.
if (link) {
// some padding
nop();
nop();
nop();
nop();
nop();
nop();
// do the call
assert(pc() == pc_of_bl, "just checking");
bl(dest, relocInfo::none);
} else {
// do the jump
assert(pc() == pc_of_b, "just checking");
b(dest, relocInfo::none);
// some padding
nop();
nop();
nop();
nop();
nop();
nop();
}
// Assert that we can identify the emitted call/jump.
assert(is_bxx64_patchable_variant2_at((address)start_pc, link),
"can't identify emitted call");
} else {
// variant 1:
mr(R0, R11); // spill R11 -> R0.
// Load the destination address into CTR,
// calculate destination relative to global toc.
calculate_address_from_global_toc(R11, dest, true, true, false);
mtctr(R11);
mr(R11, R0); // spill R11 <- R0.
nop();
// do the call/jump
if (link) {
bctrl();
} else{
bctr();
}
// Assert that we can identify the emitted call/jump.
assert(is_bxx64_patchable_variant1b_at((address)start_pc, link),
"can't identify emitted call");
}
// Assert that we can identify the emitted call/jump.
assert(is_bxx64_patchable_at((address)start_pc, link),
"can't identify emitted call");
assert(get_dest_of_bxx64_patchable_at((address)start_pc, link) == dest,
"wrong encoding of dest address");
}
// Identify a bxx64_patchable instruction.
bool MacroAssembler::is_bxx64_patchable_at(address instruction_addr, bool link) {
return is_bxx64_patchable_variant1b_at(instruction_addr, link)
//|| is_bxx64_patchable_variant1_at(instruction_addr, link)
|| is_bxx64_patchable_variant2_at(instruction_addr, link);
}
// Does the call64_patchable instruction use a pc-relative encoding of
// the call destination?
bool MacroAssembler::is_bxx64_patchable_pcrelative_at(address instruction_addr, bool link) {
// variant 2 is pc-relative
return is_bxx64_patchable_variant2_at(instruction_addr, link);
}
// Identify variant 1.
bool MacroAssembler::is_bxx64_patchable_variant1_at(address instruction_addr, bool link) {
unsigned int* instr = (unsigned int*) instruction_addr;
return (link ? is_bctrl(instr[6]) : is_bctr(instr[6])) // bctr[l]
&& is_mtctr(instr[5]) // mtctr
&& is_load_const_at(instruction_addr);
}
// Identify variant 1b: load destination relative to global toc.
bool MacroAssembler::is_bxx64_patchable_variant1b_at(address instruction_addr, bool link) {
unsigned int* instr = (unsigned int*) instruction_addr;
return (link ? is_bctrl(instr[6]) : is_bctr(instr[6])) // bctr[l]
&& is_mtctr(instr[3]) // mtctr
&& is_calculate_address_from_global_toc_at(instruction_addr + 2*BytesPerInstWord, instruction_addr);
}
// Identify variant 2.
bool MacroAssembler::is_bxx64_patchable_variant2_at(address instruction_addr, bool link) {
unsigned int* instr = (unsigned int*) instruction_addr;
if (link) {
return is_bl (instr[6]) // bl dest is last
&& is_nop(instr[0]) // nop
&& is_nop(instr[1]) // nop
&& is_nop(instr[2]) // nop
&& is_nop(instr[3]) // nop
&& is_nop(instr[4]) // nop
&& is_nop(instr[5]); // nop
} else {
return is_b (instr[0]) // b dest is first
&& is_nop(instr[1]) // nop
&& is_nop(instr[2]) // nop
&& is_nop(instr[3]) // nop
&& is_nop(instr[4]) // nop
&& is_nop(instr[5]) // nop
&& is_nop(instr[6]); // nop
}
}
// Set dest address of a bxx64_patchable instruction.
void MacroAssembler::set_dest_of_bxx64_patchable_at(address instruction_addr, address dest, bool link) {
ResourceMark rm;
int code_size = MacroAssembler::bxx64_patchable_size;
CodeBuffer buf(instruction_addr, code_size);
MacroAssembler masm(&buf);
masm.bxx64_patchable(dest, relocInfo::none, link);
ICache::ppc64_flush_icache_bytes(instruction_addr, code_size);
}
// Get dest address of a bxx64_patchable instruction.
address MacroAssembler::get_dest_of_bxx64_patchable_at(address instruction_addr, bool link) {
if (is_bxx64_patchable_variant1_at(instruction_addr, link)) {
return (address) (unsigned long) get_const(instruction_addr);
} else if (is_bxx64_patchable_variant2_at(instruction_addr, link)) {
unsigned int* instr = (unsigned int*) instruction_addr;
if (link) {
const int instr_idx = 6; // bl is last
int branchoffset = branch_destination(instr[instr_idx], 0);
return instruction_addr + branchoffset + instr_idx*BytesPerInstWord;
} else {
const int instr_idx = 0; // b is first
int branchoffset = branch_destination(instr[instr_idx], 0);
return instruction_addr + branchoffset + instr_idx*BytesPerInstWord;
}
// Load dest relative to global toc.
} else if (is_bxx64_patchable_variant1b_at(instruction_addr, link)) {
return get_address_of_calculate_address_from_global_toc_at(instruction_addr + 2*BytesPerInstWord,
instruction_addr);
} else {
ShouldNotReachHere();
return NULL;
}
}
void MacroAssembler::clobber_volatile_gprs(Register excluded_register) {
const int magic_number = 0x42;
// Preserve stack pointer register (R1_SP) and system thread id register (R13);
// although they're technically volatile
for (int i = 2; i < 13; i++) {
Register reg = as_Register(i);
if (reg == excluded_register) {
continue;
}
li(reg, magic_number);
}
}
void MacroAssembler::clobber_carg_stack_slots(Register tmp) {
const int magic_number = 0x43;
li(tmp, magic_number);
for (int m = 0; m <= 7; m++) {
std(tmp, frame::abi_minframe_size + m * 8, R1_SP);
}
}
// Uses ordering which corresponds to ABI:
// _savegpr0_14: std r14,-144(r1)
// _savegpr0_15: std r15,-136(r1)
// _savegpr0_16: std r16,-128(r1)
void MacroAssembler::save_nonvolatile_gprs(Register dst, int offset) {
std(R14, offset, dst); offset += 8;
std(R15, offset, dst); offset += 8;
std(R16, offset, dst); offset += 8;
std(R17, offset, dst); offset += 8;
std(R18, offset, dst); offset += 8;
std(R19, offset, dst); offset += 8;
std(R20, offset, dst); offset += 8;
std(R21, offset, dst); offset += 8;
std(R22, offset, dst); offset += 8;
std(R23, offset, dst); offset += 8;
std(R24, offset, dst); offset += 8;
std(R25, offset, dst); offset += 8;
std(R26, offset, dst); offset += 8;
std(R27, offset, dst); offset += 8;
std(R28, offset, dst); offset += 8;
std(R29, offset, dst); offset += 8;
std(R30, offset, dst); offset += 8;
std(R31, offset, dst); offset += 8;
stfd(F14, offset, dst); offset += 8;
stfd(F15, offset, dst); offset += 8;
stfd(F16, offset, dst); offset += 8;
stfd(F17, offset, dst); offset += 8;
stfd(F18, offset, dst); offset += 8;
stfd(F19, offset, dst); offset += 8;
stfd(F20, offset, dst); offset += 8;
stfd(F21, offset, dst); offset += 8;
stfd(F22, offset, dst); offset += 8;
stfd(F23, offset, dst); offset += 8;
stfd(F24, offset, dst); offset += 8;
stfd(F25, offset, dst); offset += 8;
stfd(F26, offset, dst); offset += 8;
stfd(F27, offset, dst); offset += 8;
stfd(F28, offset, dst); offset += 8;
stfd(F29, offset, dst); offset += 8;
stfd(F30, offset, dst); offset += 8;
stfd(F31, offset, dst);
}
// Uses ordering which corresponds to ABI:
// _restgpr0_14: ld r14,-144(r1)
// _restgpr0_15: ld r15,-136(r1)
// _restgpr0_16: ld r16,-128(r1)
void MacroAssembler::restore_nonvolatile_gprs(Register src, int offset) {
ld(R14, offset, src); offset += 8;
ld(R15, offset, src); offset += 8;
ld(R16, offset, src); offset += 8;
ld(R17, offset, src); offset += 8;
ld(R18, offset, src); offset += 8;
ld(R19, offset, src); offset += 8;
ld(R20, offset, src); offset += 8;
ld(R21, offset, src); offset += 8;
ld(R22, offset, src); offset += 8;
ld(R23, offset, src); offset += 8;
ld(R24, offset, src); offset += 8;
ld(R25, offset, src); offset += 8;
ld(R26, offset, src); offset += 8;
ld(R27, offset, src); offset += 8;
ld(R28, offset, src); offset += 8;
ld(R29, offset, src); offset += 8;
ld(R30, offset, src); offset += 8;
ld(R31, offset, src); offset += 8;
// FP registers
lfd(F14, offset, src); offset += 8;
lfd(F15, offset, src); offset += 8;
lfd(F16, offset, src); offset += 8;
lfd(F17, offset, src); offset += 8;
lfd(F18, offset, src); offset += 8;
lfd(F19, offset, src); offset += 8;
lfd(F20, offset, src); offset += 8;
lfd(F21, offset, src); offset += 8;
lfd(F22, offset, src); offset += 8;
lfd(F23, offset, src); offset += 8;
lfd(F24, offset, src); offset += 8;
lfd(F25, offset, src); offset += 8;
lfd(F26, offset, src); offset += 8;
lfd(F27, offset, src); offset += 8;
lfd(F28, offset, src); offset += 8;
lfd(F29, offset, src); offset += 8;
lfd(F30, offset, src); offset += 8;
lfd(F31, offset, src);
}
// For verify_oops.
void MacroAssembler::save_volatile_gprs(Register dst, int offset, bool include_fp_regs, bool include_R3_RET_reg) {
std(R2, offset, dst); offset += 8;
if (include_R3_RET_reg) {
std(R3, offset, dst); offset += 8;
}
std(R4, offset, dst); offset += 8;
std(R5, offset, dst); offset += 8;
std(R6, offset, dst); offset += 8;
std(R7, offset, dst); offset += 8;
std(R8, offset, dst); offset += 8;
std(R9, offset, dst); offset += 8;
std(R10, offset, dst); offset += 8;
std(R11, offset, dst); offset += 8;
std(R12, offset, dst); offset += 8;
if (include_fp_regs) {
stfd(F0, offset, dst); offset += 8;
stfd(F1, offset, dst); offset += 8;
stfd(F2, offset, dst); offset += 8;
stfd(F3, offset, dst); offset += 8;
stfd(F4, offset, dst); offset += 8;
stfd(F5, offset, dst); offset += 8;
stfd(F6, offset, dst); offset += 8;
stfd(F7, offset, dst); offset += 8;
stfd(F8, offset, dst); offset += 8;
stfd(F9, offset, dst); offset += 8;
stfd(F10, offset, dst); offset += 8;
stfd(F11, offset, dst); offset += 8;
stfd(F12, offset, dst); offset += 8;
stfd(F13, offset, dst);
}
}
// For verify_oops.
void MacroAssembler::restore_volatile_gprs(Register src, int offset, bool include_fp_regs, bool include_R3_RET_reg) {
ld(R2, offset, src); offset += 8;
if (include_R3_RET_reg) {
ld(R3, offset, src); offset += 8;
}
ld(R4, offset, src); offset += 8;
ld(R5, offset, src); offset += 8;
ld(R6, offset, src); offset += 8;
ld(R7, offset, src); offset += 8;
ld(R8, offset, src); offset += 8;
ld(R9, offset, src); offset += 8;
ld(R10, offset, src); offset += 8;
ld(R11, offset, src); offset += 8;
ld(R12, offset, src); offset += 8;
if (include_fp_regs) {
lfd(F0, offset, src); offset += 8;
lfd(F1, offset, src); offset += 8;
lfd(F2, offset, src); offset += 8;
lfd(F3, offset, src); offset += 8;
lfd(F4, offset, src); offset += 8;
lfd(F5, offset, src); offset += 8;
lfd(F6, offset, src); offset += 8;
lfd(F7, offset, src); offset += 8;
lfd(F8, offset, src); offset += 8;
lfd(F9, offset, src); offset += 8;
lfd(F10, offset, src); offset += 8;
lfd(F11, offset, src); offset += 8;
lfd(F12, offset, src); offset += 8;
lfd(F13, offset, src);
}
}
void MacroAssembler::save_LR_CR(Register tmp) {
mfcr(tmp);
std(tmp, _abi0(cr), R1_SP);
mflr(tmp);
std(tmp, _abi0(lr), R1_SP);
// Tmp must contain lr on exit! (see return_addr and prolog in ppc64.ad)
}
void MacroAssembler::restore_LR_CR(Register tmp) {
assert(tmp != R1_SP, "must be distinct");
ld(tmp, _abi0(lr), R1_SP);
mtlr(tmp);
ld(tmp, _abi0(cr), R1_SP);
mtcr(tmp);
}
address MacroAssembler::get_PC_trash_LR(Register result) {
Label L;
bl(L);
bind(L);
address lr_pc = pc();
mflr(result);
return lr_pc;
}
void MacroAssembler::resize_frame(Register offset, Register tmp) {
#ifdef ASSERT
assert_different_registers(offset, tmp, R1_SP);
andi_(tmp, offset, frame::alignment_in_bytes-1);
asm_assert_eq("resize_frame: unaligned");
#endif
// tmp <- *(SP)
ld(tmp, _abi0(callers_sp), R1_SP);
// addr <- SP + offset;
// *(addr) <- tmp;
// SP <- addr
stdux(tmp, R1_SP, offset);
}
void MacroAssembler::resize_frame(int offset, Register tmp) {
assert(is_simm(offset, 16), "too big an offset");
assert_different_registers(tmp, R1_SP);
assert((offset & (frame::alignment_in_bytes-1))==0, "resize_frame: unaligned");
// tmp <- *(SP)
ld(tmp, _abi0(callers_sp), R1_SP);
// addr <- SP + offset;
// *(addr) <- tmp;
// SP <- addr
stdu(tmp, offset, R1_SP);
}
void MacroAssembler::resize_frame_absolute(Register addr, Register tmp1, Register tmp2) {
// (addr == tmp1) || (addr == tmp2) is allowed here!
assert(tmp1 != tmp2, "must be distinct");
// compute offset w.r.t. current stack pointer
// tmp_1 <- addr - SP (!)
subf(tmp1, R1_SP, addr);
// atomically update SP keeping back link.
resize_frame(tmp1/* offset */, tmp2/* tmp */);
}
void MacroAssembler::push_frame(Register bytes, Register tmp) {
#ifdef ASSERT
assert(bytes != R0, "r0 not allowed here");
andi_(R0, bytes, frame::alignment_in_bytes-1);
asm_assert_eq("push_frame(Reg, Reg): unaligned");
#endif
neg(tmp, bytes);
stdux(R1_SP, R1_SP, tmp);
}
// Push a frame of size `bytes'.
void MacroAssembler::push_frame(unsigned int bytes, Register tmp) {
long offset = align_addr(bytes, frame::alignment_in_bytes);
if (is_simm(-offset, 16)) {
stdu(R1_SP, -offset, R1_SP);
} else {
load_const_optimized(tmp, -offset);
stdux(R1_SP, R1_SP, tmp);
}
}
// Push a frame of size `bytes' plus abi_reg_args on top.
void MacroAssembler::push_frame_reg_args(unsigned int bytes, Register tmp) {
push_frame(bytes + frame::abi_reg_args_size, tmp);
}
// Setup up a new C frame with a spill area for non-volatile GPRs and
// additional space for local variables.
void MacroAssembler::push_frame_reg_args_nonvolatiles(unsigned int bytes,
Register tmp) {
push_frame(bytes + frame::abi_reg_args_size + frame::spill_nonvolatiles_size, tmp);
}
// Pop current C frame.
void MacroAssembler::pop_frame() {
ld(R1_SP, _abi0(callers_sp), R1_SP);
}
#if defined(ABI_ELFv2)
address MacroAssembler::branch_to(Register r_function_entry, bool and_link) {
// TODO(asmundak): make sure the caller uses R12 as function descriptor
// most of the times.
if (R12 != r_function_entry) {
mr(R12, r_function_entry);
}
mtctr(R12);
// Do a call or a branch.
if (and_link) {
bctrl();
} else {
bctr();
}
_last_calls_return_pc = pc();
return _last_calls_return_pc;
}
// Call a C function via a function descriptor and use full C
// calling conventions. Updates and returns _last_calls_return_pc.
address MacroAssembler::call_c(Register r_function_entry) {
return branch_to(r_function_entry, /*and_link=*/true);
}
// For tail calls: only branch, don't link, so callee returns to caller of this function.
address MacroAssembler::call_c_and_return_to_caller(Register r_function_entry) {
return branch_to(r_function_entry, /*and_link=*/false);
}
address MacroAssembler::call_c(address function_entry, relocInfo::relocType rt) {
load_const(R12, function_entry, R0);
return branch_to(R12, /*and_link=*/true);
}
#else
// Generic version of a call to C function via a function descriptor
// with variable support for C calling conventions (TOC, ENV, etc.).
// Updates and returns _last_calls_return_pc.
address MacroAssembler::branch_to(Register function_descriptor, bool and_link, bool save_toc_before_call,
bool restore_toc_after_call, bool load_toc_of_callee, bool load_env_of_callee) {
// we emit standard ptrgl glue code here
assert((function_descriptor != R0), "function_descriptor cannot be R0");
// retrieve necessary entries from the function descriptor
ld(R0, in_bytes(FunctionDescriptor::entry_offset()), function_descriptor);
mtctr(R0);
if (load_toc_of_callee) {
ld(R2_TOC, in_bytes(FunctionDescriptor::toc_offset()), function_descriptor);
}
if (load_env_of_callee) {
ld(R11, in_bytes(FunctionDescriptor::env_offset()), function_descriptor);
} else if (load_toc_of_callee) {
li(R11, 0);
}
// do a call or a branch
if (and_link) {
bctrl();
} else {
bctr();
}
_last_calls_return_pc = pc();
return _last_calls_return_pc;
}
// Call a C function via a function descriptor and use full C calling
// conventions.
// We don't use the TOC in generated code, so there is no need to save
// and restore its value.
address MacroAssembler::call_c(Register fd) {
return branch_to(fd, /*and_link=*/true,
/*save toc=*/false,
/*restore toc=*/false,
/*load toc=*/true,
/*load env=*/true);
}
address MacroAssembler::call_c_and_return_to_caller(Register fd) {
return branch_to(fd, /*and_link=*/false,
/*save toc=*/false,
/*restore toc=*/false,
/*load toc=*/true,
/*load env=*/true);
}
address MacroAssembler::call_c(const FunctionDescriptor* fd, relocInfo::relocType rt) {
if (rt != relocInfo::none) {
// this call needs to be relocatable
if (!ReoptimizeCallSequences
|| (rt != relocInfo::runtime_call_type && rt != relocInfo::none)
|| fd == NULL // support code-size estimation
|| !fd->is_friend_function()
|| fd->entry() == NULL) {
// it's not a friend function as defined by class FunctionDescriptor,
// so do a full call-c here.
load_const(R11, (address)fd, R0);
bool has_env = (fd != NULL && fd->env() != NULL);
return branch_to(R11, /*and_link=*/true,
/*save toc=*/false,
/*restore toc=*/false,
/*load toc=*/true,
/*load env=*/has_env);
} else {
// It's a friend function. Load the entry point and don't care about
// toc and env. Use an optimizable call instruction, but ensure the
// same code-size as in the case of a non-friend function.
nop();
nop();
nop();
bl64_patchable(fd->entry(), rt);
_last_calls_return_pc = pc();
return _last_calls_return_pc;
}
} else {
// This call does not need to be relocatable, do more aggressive
// optimizations.
if (!ReoptimizeCallSequences
|| !fd->is_friend_function()) {
// It's not a friend function as defined by class FunctionDescriptor,
// so do a full call-c here.
load_const(R11, (address)fd, R0);
return branch_to(R11, /*and_link=*/true,
/*save toc=*/false,
/*restore toc=*/false,
/*load toc=*/true,
/*load env=*/true);
} else {
// it's a friend function, load the entry point and don't care about
// toc and env.
address dest = fd->entry();
if (is_within_range_of_b(dest, pc())) {
bl(dest);
} else {
bl64_patchable(dest, rt);
}
_last_calls_return_pc = pc();
return _last_calls_return_pc;
}
}
}
// Call a C function. All constants needed reside in TOC.
//
// Read the address to call from the TOC.
// Read env from TOC, if fd specifies an env.
// Read new TOC from TOC.
address MacroAssembler::call_c_using_toc(const FunctionDescriptor* fd,
relocInfo::relocType rt, Register toc) {
if (!ReoptimizeCallSequences
|| (rt != relocInfo::runtime_call_type && rt != relocInfo::none)
|| !fd->is_friend_function()) {
// It's not a friend function as defined by class FunctionDescriptor,
// so do a full call-c here.
assert(fd->entry() != NULL, "function must be linked");
AddressLiteral fd_entry(fd->entry());
bool success = load_const_from_method_toc(R11, fd_entry, toc, /*fixed_size*/ true);
mtctr(R11);
if (fd->env() == NULL) {
li(R11, 0);
nop();
} else {
AddressLiteral fd_env(fd->env());
success = success && load_const_from_method_toc(R11, fd_env, toc, /*fixed_size*/ true);
}
AddressLiteral fd_toc(fd->toc());
// Set R2_TOC (load from toc)
success = success && load_const_from_method_toc(R2_TOC, fd_toc, toc, /*fixed_size*/ true);
bctrl();
_last_calls_return_pc = pc();
if (!success) { return NULL; }
} else {
// It's a friend function, load the entry point and don't care about
// toc and env. Use an optimizable call instruction, but ensure the
// same code-size as in the case of a non-friend function.
nop();
bl64_patchable(fd->entry(), rt);
_last_calls_return_pc = pc();
}
return _last_calls_return_pc;
}
#endif // ABI_ELFv2
void MacroAssembler::post_call_nop() {
// Make inline again when loom is always enabled.
if (!Continuations::enabled()) {
return;
}
nop();
}
void MacroAssembler::call_VM_base(Register oop_result,
Register last_java_sp,
address entry_point,
bool check_exceptions) {
BLOCK_COMMENT("call_VM {");
// Determine last_java_sp register.
if (!last_java_sp->is_valid()) {
last_java_sp = R1_SP;
}
set_top_ijava_frame_at_SP_as_last_Java_frame(last_java_sp, R11_scratch1);
// ARG1 must hold thread address.
mr(R3_ARG1, R16_thread);
#if defined(ABI_ELFv2)
address return_pc = call_c(entry_point, relocInfo::none);
#else
address return_pc = call_c((FunctionDescriptor*)entry_point, relocInfo::none);
#endif
reset_last_Java_frame();
// Check for pending exceptions.
if (check_exceptions) {
// We don't check for exceptions here.
ShouldNotReachHere();
}
// Get oop result if there is one and reset the value in the thread.
if (oop_result->is_valid()) {
get_vm_result(oop_result);
}
_last_calls_return_pc = return_pc;
BLOCK_COMMENT("} call_VM");
}
void MacroAssembler::call_VM_leaf_base(address entry_point) {
BLOCK_COMMENT("call_VM_leaf {");
#if defined(ABI_ELFv2)
call_c(entry_point, relocInfo::none);
#else
call_c(CAST_FROM_FN_PTR(FunctionDescriptor*, entry_point), relocInfo::none);
#endif
BLOCK_COMMENT("} call_VM_leaf");
}
void MacroAssembler::call_VM(Register oop_result, address entry_point, bool check_exceptions) {
call_VM_base(oop_result, noreg, entry_point, check_exceptions);
}
void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1,
bool check_exceptions) {
// R3_ARG1 is reserved for the thread.
mr_if_needed(R4_ARG2, arg_1);
call_VM(oop_result, entry_point, check_exceptions);
}
void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2,
bool check_exceptions) {
// R3_ARG1 is reserved for the thread
mr_if_needed(R4_ARG2, arg_1);
assert(arg_2 != R4_ARG2, "smashed argument");
mr_if_needed(R5_ARG3, arg_2);
call_VM(oop_result, entry_point, check_exceptions);
}
void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, Register arg_3,
bool check_exceptions) {
// R3_ARG1 is reserved for the thread
mr_if_needed(R4_ARG2, arg_1);
assert(arg_2 != R4_ARG2, "smashed argument");
mr_if_needed(R5_ARG3, arg_2);
mr_if_needed(R6_ARG4, arg_3);
call_VM(oop_result, entry_point, check_exceptions);
}
void MacroAssembler::call_VM_leaf(address entry_point) {
call_VM_leaf_base(entry_point);
}
void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1) {
mr_if_needed(R3_ARG1, arg_1);
call_VM_leaf(entry_point);
}
void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2) {
mr_if_needed(R3_ARG1, arg_1);
assert(arg_2 != R3_ARG1, "smashed argument");
mr_if_needed(R4_ARG2, arg_2);
call_VM_leaf(entry_point);
}
void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3) {
mr_if_needed(R3_ARG1, arg_1);
assert(arg_2 != R3_ARG1, "smashed argument");
mr_if_needed(R4_ARG2, arg_2);
assert(arg_3 != R3_ARG1 && arg_3 != R4_ARG2, "smashed argument");
mr_if_needed(R5_ARG3, arg_3);
call_VM_leaf(entry_point);
}
// Check whether instruction is a read access to the polling page
// which was emitted by load_from_polling_page(..).
bool MacroAssembler::is_load_from_polling_page(int instruction, void* ucontext,
address* polling_address_ptr) {
if (!is_ld(instruction))
return false; // It's not a ld. Fail.
int rt = inv_rt_field(instruction);
int ra = inv_ra_field(instruction);
int ds = inv_ds_field(instruction);
if (!(ds == 0 && ra != 0 && rt == 0)) {
return false; // It's not a ld(r0, X, ra). Fail.
}
if (!ucontext) {
// Set polling address.
if (polling_address_ptr != NULL) {
*polling_address_ptr = NULL;
}
return true; // No ucontext given. Can't check value of ra. Assume true.
}
#ifdef LINUX
// Ucontext given. Check that register ra contains the address of
// the safepoing polling page.
ucontext_t* uc = (ucontext_t*) ucontext;
// Set polling address.
address addr = (address)uc->uc_mcontext.regs->gpr[ra] + (ssize_t)ds;
if (polling_address_ptr != NULL) {
*polling_address_ptr = addr;
}
return SafepointMechanism::is_poll_address(addr);
#else
// Not on Linux, ucontext must be NULL.
ShouldNotReachHere();
return false;
#endif
}
void MacroAssembler::bang_stack_with_offset(int offset) {
// When increasing the stack, the old stack pointer will be written
// to the new top of stack according to the PPC64 abi.
// Therefore, stack banging is not necessary when increasing
// the stack by <= os::vm_page_size() bytes.
// When increasing the stack by a larger amount, this method is
// called repeatedly to bang the intermediate pages.
// Stack grows down, caller passes positive offset.
assert(offset > 0, "must bang with positive offset");
long stdoffset = -offset;
if (is_simm(stdoffset, 16)) {
// Signed 16 bit offset, a simple std is ok.
if (UseLoadInstructionsForStackBangingPPC64) {
ld(R0, (int)(signed short)stdoffset, R1_SP);
} else {
std(R0,(int)(signed short)stdoffset, R1_SP);
}
} else if (is_simm(stdoffset, 31)) {
const int hi = MacroAssembler::largeoffset_si16_si16_hi(stdoffset);
const int lo = MacroAssembler::largeoffset_si16_si16_lo(stdoffset);
Register tmp = R11;
addis(tmp, R1_SP, hi);
if (UseLoadInstructionsForStackBangingPPC64) {
ld(R0, lo, tmp);
} else {
std(R0, lo, tmp);
}
} else {
ShouldNotReachHere();
}
}
// If instruction is a stack bang of the form
// std R0, x(Ry), (see bang_stack_with_offset())
// stdu R1_SP, x(R1_SP), (see push_frame(), resize_frame())
// or stdux R1_SP, Rx, R1_SP (see push_frame(), resize_frame())
// return the banged address. Otherwise, return 0.
address MacroAssembler::get_stack_bang_address(int instruction, void *ucontext) {
#ifdef LINUX
ucontext_t* uc = (ucontext_t*) ucontext;
int rs = inv_rs_field(instruction);
int ra = inv_ra_field(instruction);
if ( (is_ld(instruction) && rs == 0 && UseLoadInstructionsForStackBangingPPC64)
|| (is_std(instruction) && rs == 0 && !UseLoadInstructionsForStackBangingPPC64)
|| (is_stdu(instruction) && rs == 1)) {
int ds = inv_ds_field(instruction);
// return banged address
return ds+(address)uc->uc_mcontext.regs->gpr[ra];
} else if (is_stdux(instruction) && rs == 1) {
int rb = inv_rb_field(instruction);
address sp = (address)uc->uc_mcontext.regs->gpr[1];
long rb_val = (long)uc->uc_mcontext.regs->gpr[rb];
return ra != 1 || rb_val >= 0 ? NULL // not a stack bang
: sp + rb_val; // banged address
}
return NULL; // not a stack bang
#else
// workaround not needed on !LINUX :-)
ShouldNotCallThis();
return NULL;
#endif
}
void MacroAssembler::reserved_stack_check(Register return_pc) {
// Test if reserved zone needs to be enabled.
Label no_reserved_zone_enabling;
ld_ptr(R0, JavaThread::reserved_stack_activation_offset(), R16_thread);
cmpld(CCR0, R1_SP, R0);
blt_predict_taken(CCR0, no_reserved_zone_enabling);
// Enable reserved zone again, throw stack overflow exception.
push_frame_reg_args(0, R0);
call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone), R16_thread);
pop_frame();
mtlr(return_pc);
load_const_optimized(R0, StubRoutines::throw_delayed_StackOverflowError_entry());
mtctr(R0);
bctr();
should_not_reach_here();
bind(no_reserved_zone_enabling);
}
void MacroAssembler::getandsetd(Register dest_current_value, Register exchange_value, Register addr_base,
bool cmpxchgx_hint) {
Label retry;
bind(retry);
ldarx(dest_current_value, addr_base, cmpxchgx_hint);
stdcx_(exchange_value, addr_base);
if (UseStaticBranchPredictionInCompareAndSwapPPC64) {
bne_predict_not_taken(CCR0, retry); // StXcx_ sets CCR0.
} else {
bne( CCR0, retry); // StXcx_ sets CCR0.
}
}
void MacroAssembler::getandaddd(Register dest_current_value, Register inc_value, Register addr_base,
Register tmp, bool cmpxchgx_hint) {
Label retry;
bind(retry);
ldarx(dest_current_value, addr_base, cmpxchgx_hint);
add(tmp, dest_current_value, inc_value);
stdcx_(tmp, addr_base);
if (UseStaticBranchPredictionInCompareAndSwapPPC64) {
bne_predict_not_taken(CCR0, retry); // StXcx_ sets CCR0.
} else {
bne( CCR0, retry); // StXcx_ sets CCR0.
}
}
// Word/sub-word atomic helper functions
// Temps and addr_base are killed if size < 4 and processor does not support respective instructions.
// Only signed types are supported with size < 4.
// Atomic add always kills tmp1.
void MacroAssembler::atomic_get_and_modify_generic(Register dest_current_value, Register exchange_value,
Register addr_base, Register tmp1, Register tmp2, Register tmp3,
bool cmpxchgx_hint, bool is_add, int size) {
// Sub-word instructions are available since Power 8.
// For older processors, instruction_type != size holds, and we
// emulate the sub-word instructions by constructing a 4-byte value
// that leaves the other bytes unchanged.
const int instruction_type = VM_Version::has_lqarx() ? size : 4;
Label retry;
Register shift_amount = noreg,
val32 = dest_current_value,
modval = is_add ? tmp1 : exchange_value;
if (instruction_type != size) {
assert_different_registers(tmp1, tmp2, tmp3, dest_current_value, exchange_value, addr_base);
modval = tmp1;
shift_amount = tmp2;
val32 = tmp3;
// Need some preparation: Compute shift amount, align address. Note: shorts must be 2 byte aligned.
#ifdef VM_LITTLE_ENDIAN
rldic(shift_amount, addr_base, 3, 64-5); // (dest & 3) * 8;
clrrdi(addr_base, addr_base, 2);
#else
xori(shift_amount, addr_base, (size == 1) ? 3 : 2);
clrrdi(addr_base, addr_base, 2);
rldic(shift_amount, shift_amount, 3, 64-5); // byte: ((3-dest) & 3) * 8; short: ((1-dest/2) & 1) * 16;
#endif
}
// atomic emulation loop
bind(retry);
switch (instruction_type) {
case 4: lwarx(val32, addr_base, cmpxchgx_hint); break;
case 2: lharx(val32, addr_base, cmpxchgx_hint); break;
case 1: lbarx(val32, addr_base, cmpxchgx_hint); break;
default: ShouldNotReachHere();
}
if (instruction_type != size) {
srw(dest_current_value, val32, shift_amount);
}
if (is_add) { add(modval, dest_current_value, exchange_value); }
if (instruction_type != size) {
// Transform exchange value such that the replacement can be done by one xor instruction.
xorr(modval, dest_current_value, is_add ? modval : exchange_value);
clrldi(modval, modval, (size == 1) ? 56 : 48);
slw(modval, modval, shift_amount);
xorr(modval, val32, modval);
}
switch (instruction_type) {
case 4: stwcx_(modval, addr_base); break;
case 2: sthcx_(modval, addr_base); break;
case 1: stbcx_(modval, addr_base); break;
default: ShouldNotReachHere();
}
if (UseStaticBranchPredictionInCompareAndSwapPPC64) {
bne_predict_not_taken(CCR0, retry); // StXcx_ sets CCR0.
} else {
bne( CCR0, retry); // StXcx_ sets CCR0.
}
// l?arx zero-extends, but Java wants byte/short values sign-extended.
if (size == 1) {
extsb(dest_current_value, dest_current_value);
} else if (size == 2) {
extsh(dest_current_value, dest_current_value);
};
}
// Temps, addr_base and exchange_value are killed if size < 4 and processor does not support respective instructions.
// Only signed types are supported with size < 4.
void MacroAssembler::cmpxchg_loop_body(ConditionRegister flag, Register dest_current_value,
Register compare_value, Register exchange_value,
Register addr_base, Register tmp1, Register tmp2,
Label &retry, Label &failed, bool cmpxchgx_hint, int size) {
// Sub-word instructions are available since Power 8.
// For older processors, instruction_type != size holds, and we
// emulate the sub-word instructions by constructing a 4-byte value
// that leaves the other bytes unchanged.
const int instruction_type = VM_Version::has_lqarx() ? size : 4;
Register shift_amount = noreg,
val32 = dest_current_value,
modval = exchange_value;
if (instruction_type != size) {
assert_different_registers(tmp1, tmp2, dest_current_value, compare_value, exchange_value, addr_base);
shift_amount = tmp1;
val32 = tmp2;
modval = tmp2;
// Need some preparation: Compute shift amount, align address. Note: shorts must be 2 byte aligned.
#ifdef VM_LITTLE_ENDIAN
rldic(shift_amount, addr_base, 3, 64-5); // (dest & 3) * 8;
clrrdi(addr_base, addr_base, 2);
#else
xori(shift_amount, addr_base, (size == 1) ? 3 : 2);
clrrdi(addr_base, addr_base, 2);
rldic(shift_amount, shift_amount, 3, 64-5); // byte: ((3-dest) & 3) * 8; short: ((1-dest/2) & 1) * 16;
#endif
// Transform exchange value such that the replacement can be done by one xor instruction.
xorr(exchange_value, compare_value, exchange_value);
clrldi(exchange_value, exchange_value, (size == 1) ? 56 : 48);
slw(exchange_value, exchange_value, shift_amount);
}
// atomic emulation loop
bind(retry);
switch (instruction_type) {
case 4: lwarx(val32, addr_base, cmpxchgx_hint); break;
case 2: lharx(val32, addr_base, cmpxchgx_hint); break;
case 1: lbarx(val32, addr_base, cmpxchgx_hint); break;
default: ShouldNotReachHere();
}
if (instruction_type != size) {
srw(dest_current_value, val32, shift_amount);
}
if (size == 1) {
extsb(dest_current_value, dest_current_value);
} else if (size == 2) {
extsh(dest_current_value, dest_current_value);
};
cmpw(flag, dest_current_value, compare_value);
if (UseStaticBranchPredictionInCompareAndSwapPPC64) {
bne_predict_not_taken(flag, failed);
} else {
bne( flag, failed);
}
// branch to done => (flag == ne), (dest_current_value != compare_value)
// fall through => (flag == eq), (dest_current_value == compare_value)
if (instruction_type != size) {
xorr(modval, val32, exchange_value);
}
switch (instruction_type) {
case 4: stwcx_(modval, addr_base); break;
case 2: sthcx_(modval, addr_base); break;
case 1: stbcx_(modval, addr_base); break;
default: ShouldNotReachHere();
}
}
// CmpxchgX sets condition register to cmpX(current, compare).
void MacroAssembler::cmpxchg_generic(ConditionRegister flag, Register dest_current_value,
Register compare_value, Register exchange_value,
Register addr_base, Register tmp1, Register tmp2,
int semantics, bool cmpxchgx_hint,
Register int_flag_success, bool contention_hint, bool weak, int size) {
Label retry;
Label failed;
Label done;
// Save one branch if result is returned via register and
// result register is different from the other ones.
bool use_result_reg = (int_flag_success != noreg);
bool preset_result_reg = (int_flag_success != dest_current_value && int_flag_success != compare_value &&
int_flag_success != exchange_value && int_flag_success != addr_base &&
int_flag_success != tmp1 && int_flag_success != tmp2);
assert(!weak || flag == CCR0, "weak only supported with CCR0");
assert(size == 1 || size == 2 || size == 4, "unsupported");
if (use_result_reg && preset_result_reg) {
li(int_flag_success, 0); // preset (assume cas failed)
}
// Add simple guard in order to reduce risk of starving under high contention (recommended by IBM).
if (contention_hint) { // Don't try to reserve if cmp fails.
switch (size) {
case 1: lbz(dest_current_value, 0, addr_base); extsb(dest_current_value, dest_current_value); break;
case 2: lha(dest_current_value, 0, addr_base); break;
case 4: lwz(dest_current_value, 0, addr_base); break;
default: ShouldNotReachHere();
}
cmpw(flag, dest_current_value, compare_value);
bne(flag, failed);
}
// release/fence semantics
if (semantics & MemBarRel) {
release();
}
cmpxchg_loop_body(flag, dest_current_value, compare_value, exchange_value, addr_base, tmp1, tmp2,
retry, failed, cmpxchgx_hint, size);
if (!weak || use_result_reg) {
if (UseStaticBranchPredictionInCompareAndSwapPPC64) {
bne_predict_not_taken(CCR0, weak ? failed : retry); // StXcx_ sets CCR0.
} else {
bne( CCR0, weak ? failed : retry); // StXcx_ sets CCR0.
}
}
// fall through => (flag == eq), (dest_current_value == compare_value), (swapped)
// Result in register (must do this at the end because int_flag_success can be the
// same register as one above).
if (use_result_reg) {
li(int_flag_success, 1);
}
if (semantics & MemBarFenceAfter) {
fence();
} else if (semantics & MemBarAcq) {
isync();
}
if (use_result_reg && !preset_result_reg) {
b(done);
}
bind(failed);
if (use_result_reg && !preset_result_reg) {
li(int_flag_success, 0);
}
bind(done);
// (flag == ne) => (dest_current_value != compare_value), (!swapped)
// (flag == eq) => (dest_current_value == compare_value), ( swapped)
}
// Performs atomic compare exchange:
// if (compare_value == *addr_base)
// *addr_base = exchange_value
// int_flag_success = 1;
// else
// int_flag_success = 0;
//
// ConditionRegister flag = cmp(compare_value, *addr_base)
// Register dest_current_value = *addr_base
// Register compare_value Used to compare with value in memory
// Register exchange_value Written to memory if compare_value == *addr_base
// Register addr_base The memory location to compareXChange
// Register int_flag_success Set to 1 if exchange_value was written to *addr_base
//
// To avoid the costly compare exchange the value is tested beforehand.
// Several special cases exist to avoid that unnecessary information is generated.
//
void MacroAssembler::cmpxchgd(ConditionRegister flag,
Register dest_current_value, RegisterOrConstant compare_value, Register exchange_value,
Register addr_base, int semantics, bool cmpxchgx_hint,
Register int_flag_success, Label* failed_ext, bool contention_hint, bool weak) {
Label retry;
Label failed_int;
Label& failed = (failed_ext != NULL) ? *failed_ext : failed_int;
Label done;
// Save one branch if result is returned via register and result register is different from the other ones.
bool use_result_reg = (int_flag_success!=noreg);
bool preset_result_reg = (int_flag_success!=dest_current_value && int_flag_success!=compare_value.register_or_noreg() &&
int_flag_success!=exchange_value && int_flag_success!=addr_base);
assert(!weak || flag == CCR0, "weak only supported with CCR0");
assert(int_flag_success == noreg || failed_ext == NULL, "cannot have both");
if (use_result_reg && preset_result_reg) {
li(int_flag_success, 0); // preset (assume cas failed)
}
// Add simple guard in order to reduce risk of starving under high contention (recommended by IBM).
if (contention_hint) { // Don't try to reserve if cmp fails.
ld(dest_current_value, 0, addr_base);
cmpd(flag, compare_value, dest_current_value);
bne(flag, failed);
}
// release/fence semantics
if (semantics & MemBarRel) {
release();
}
// atomic emulation loop
bind(retry);
ldarx(dest_current_value, addr_base, cmpxchgx_hint);
cmpd(flag, compare_value, dest_current_value);
if (UseStaticBranchPredictionInCompareAndSwapPPC64) {
bne_predict_not_taken(flag, failed);
} else {
bne( flag, failed);
}
stdcx_(exchange_value, addr_base);
if (!weak || use_result_reg || failed_ext) {
if (UseStaticBranchPredictionInCompareAndSwapPPC64) {
bne_predict_not_taken(CCR0, weak ? failed : retry); // stXcx_ sets CCR0
} else {
bne( CCR0, weak ? failed : retry); // stXcx_ sets CCR0
}
}
// result in register (must do this at the end because int_flag_success can be the same register as one above)
if (use_result_reg) {
li(int_flag_success, 1);
}
if (semantics & MemBarFenceAfter) {
fence();
} else if (semantics & MemBarAcq) {
isync();
}
if (use_result_reg && !preset_result_reg) {
b(done);
}
bind(failed_int);
if (use_result_reg && !preset_result_reg) {
li(int_flag_success, 0);
}
bind(done);
// (flag == ne) => (dest_current_value != compare_value), (!swapped)
// (flag == eq) => (dest_current_value == compare_value), ( swapped)
}
// Look up the method for a megamorphic invokeinterface call.
// The target method is determined by <intf_klass, itable_index>.
// The receiver klass is in recv_klass.
// On success, the result will be in method_result, and execution falls through.
// On failure, execution transfers to the given label.
void MacroAssembler::lookup_interface_method(Register recv_klass,
Register intf_klass,
RegisterOrConstant itable_index,
Register method_result,
Register scan_temp,
Register temp2,
Label& L_no_such_interface,
bool return_method) {
assert_different_registers(recv_klass, intf_klass, method_result, scan_temp);
// Compute start of first itableOffsetEntry (which is at the end of the vtable).
int vtable_base = in_bytes(Klass::vtable_start_offset());
int itentry_off = itableMethodEntry::method_offset_in_bytes();
int logMEsize = exact_log2(itableMethodEntry::size() * wordSize);
int scan_step = itableOffsetEntry::size() * wordSize;
int log_vte_size= exact_log2(vtableEntry::size_in_bytes());
lwz(scan_temp, in_bytes(Klass::vtable_length_offset()), recv_klass);
// %%% We should store the aligned, prescaled offset in the klassoop.
// Then the next several instructions would fold away.
sldi(scan_temp, scan_temp, log_vte_size);
addi(scan_temp, scan_temp, vtable_base);
add(scan_temp, recv_klass, scan_temp);
// Adjust recv_klass by scaled itable_index, so we can free itable_index.
if (return_method) {
if (itable_index.is_register()) {
Register itable_offset = itable_index.as_register();
sldi(method_result, itable_offset, logMEsize);
if (itentry_off) { addi(method_result, method_result, itentry_off); }
add(method_result, method_result, recv_klass);
} else {
--> --------------------
--> maximum size reached
--> --------------------
¤ Dauer der Verarbeitung: 0.114 Sekunden
(vorverarbeitet)
¤
|
Haftungshinweis
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung ist noch experimentell.
|