/* * Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions. *
*/
// these are no-ops overridden by InterpreterMacroAssembler void MacroAssembler::check_and_handle_earlyret(Register java_thread) {} void MacroAssembler::check_and_handle_popframe(Register java_thread) {}
// Calls to C land // // When entering C land, the fp, & esp of the last Java frame have to be recorded // in the (thread-local) JavaThread object. When leaving C land, the last Java fp // has to be reset to 0. This is required to allow proper stack traversal. void MacroAssembler::set_last_Java_frame(Register last_java_sp, Register last_java_fp, Register last_java_pc, Register tmp) {
if (last_java_pc->is_valid()) {
sd(last_java_pc, Address(xthread,
JavaThread::frame_anchor_offset() +
JavaFrameAnchor::last_Java_pc_offset()));
}
void MacroAssembler::set_last_Java_frame(Register last_java_sp, Register last_java_fp,
Label &L, Register tmp) { if (L.is_bound()) {
set_last_Java_frame(last_java_sp, last_java_fp, target(L), tmp);
} else {
L.add_patch_at(code(), locator());
IncompressibleRegion ir(this); // the label address will be patched back.
set_last_Java_frame(last_java_sp, last_java_fp, pc() /* Patched later */, tmp);
}
}
void MacroAssembler::reset_last_Java_frame(bool clear_fp) { // we must set sp to zero to clear frame
sd(zr, Address(xthread, JavaThread::last_Java_sp_offset()));
// must clear fp, so that compiled frames are not confused; it is // possible that we need it only for debugging if (clear_fp) {
sd(zr, Address(xthread, JavaThread::last_Java_fp_offset()));
}
// Always clear the pc because it could have been set by make_walkable()
sd(zr, Address(xthread, JavaThread::last_Java_pc_offset()));
}
// debugging support
assert(number_of_arguments >= 0 , "cannot have negative number of arguments");
assert(java_thread == xthread, "unexpected register");
assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result");
assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp");
// push java thread (becomes first argument of C function)
mv(c_rarg0, java_thread);
// set last Java frame before call
assert(last_java_sp != fp, "can't use fp");
void MacroAssembler::_verify_oop(Register reg, constchar* s, constchar* file, int line) { if (!VerifyOops) { return; }
// Pass register number to verify_oop_subroutine constchar* b = NULL;
{
ResourceMark rm;
stringStream ss;
ss.print("verify_oop: %s: %s (%s:%d)", reg->name(), s, file, line);
b = code_string(ss.as_string());
}
BLOCK_COMMENT("verify_oop {");
push_reg(RegSet::of(ra, t0, t1, c_rarg0), sp);
mv(c_rarg0, reg); // c_rarg0 : x10 // The length of the instruction sequence emitted should be independent // of the value of the local char buffer address so that the size of mach // nodes for scratch emit and normal emit matches.
movptr(t0, (address)b);
// The length of the instruction sequence emitted should be independent // of the value of the local char buffer address so that the size of mach // nodes for scratch emit and normal emit matches.
movptr(t0, (address)b);
void MacroAssembler::emit_static_call_stub() {
IncompressibleRegion ir(this); // Fixed length: see CompiledStaticCall::to_interp_stub_size(). // CompiledDirectStaticCall::set_to_interpreted knows the // exact layout of this stub.
mov_metadata(xmethod, (Metadata*)NULL);
// Jump to the entry point of the c2i stub.
int32_t offset = 0;
movptr(t0, 0, offset);
jalr(x0, t0, offset);
}
void MacroAssembler::call_VM_leaf_base(address entry_point, int number_of_arguments,
Label *retaddr) {
push_reg(RegSet::of(t0, xmethod), sp); // push << t0 & xmethod >> to sp
call(entry_point); if (retaddr != NULL) {
bind(*retaddr);
}
pop_reg(RegSet::of(t0, xmethod), sp); // pop << t0 & xmethod >> from sp
}
void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) {
call_VM_leaf_base(entry_point, number_of_arguments);
}
void MacroAssembler::li(Register Rd, int64_t imm) { // int64_t is in range 0x8000 0000 0000 0000 ~ 0x7fff ffff ffff ffff // li -> c.li if (do_compress() && (is_imm_in_range(imm, 6, 0) && Rd != x0)) {
c_li(Rd, imm); return;
}
int shift = 12;
int64_t upper = imm, lower = imm; // Split imm to a lower 12-bit sign-extended part and the remainder, // because addi will sign-extend the lower imm.
lower = ((int32_t)imm << 20) >> 20;
upper -= lower;
int MacroAssembler::bitset_to_regs(unsignedint bitset, unsignedchar* regs) { int count = 0; // Scan bitset to accumulate register pairs for (int reg = 31; reg >= 0; reg--) { if ((1U << 31) & bitset) {
regs[count++] = reg;
}
bitset <<= 1;
} return count;
}
// Push integer registers in the bitset supplied. Don't push sp. // Return the number of words pushed int MacroAssembler::push_reg(unsignedint bitset, Register stack) {
DEBUG_ONLY(int words_pushed = 0;) unsignedchar regs[32]; int count = bitset_to_regs(bitset, regs); // reserve one slot to align for odd count int offset = is_even(count) ? 0 : wordSize;
if (count) {
addi(stack, stack, -count * wordSize - offset);
} for (int i = count - 1; i >= 0; i--) {
sd(as_Register(regs[i]), Address(stack, (count - 1 - i) * wordSize + offset));
DEBUG_ONLY(words_pushed++;)
}
// Push floating-point registers in the bitset supplied. // Return the number of words pushed int MacroAssembler::push_fp(unsignedint bitset, Register stack) {
DEBUG_ONLY(int words_pushed = 0;) unsignedchar regs[32]; int count = bitset_to_regs(bitset, regs); int push_slots = count + (count & 1);
if (count) {
addi(stack, stack, -push_slots * wordSize);
}
for (int i = count - 1; i >= 0; i--) {
fsd(as_FloatRegister(regs[i]), Address(stack, (push_slots - 1 - i) * wordSize));
DEBUG_ONLY(words_pushed++;)
}
#ifdef COMPILER2 // Push vector registers in the bitset supplied. // Return the number of words pushed int MacroAssembler::push_v(unsignedint bitset, Register stack) { int vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE);
// Scan bitset to accumulate register pairs unsignedchar regs[32]; int count = bitset_to_regs(bitset, regs);
for (int i = 0; i < count; i++) {
sub(stack, stack, vector_size_in_bytes);
vs1r_v(as_VectorRegister(regs[i]), stack);
}
return count * vector_size_in_bytes / wordSize;
}
int MacroAssembler::pop_v(unsignedint bitset, Register stack) { int vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE);
// Scan bitset to accumulate register pairs unsignedchar regs[32]; int count = bitset_to_regs(bitset, regs);
for (int i = count - 1; i >= 0; i--) {
vl1re8_v(as_VectorRegister(regs[i]), stack);
add(stack, stack, vector_size_in_bytes);
}
void MacroAssembler::andrw(Register Rd, Register Rs1, Register Rs2) {
andr(Rd, Rs1, Rs2); // addw: The result is clipped to 32 bits, then the sign bit is extended, // and the result is stored in Rd
addw(Rd, Rd, zr);
}
void MacroAssembler::orrw(Register Rd, Register Rs1, Register Rs2) {
orr(Rd, Rs1, Rs2); // addw: The result is clipped to 32 bits, then the sign bit is extended, // and the result is stored in Rd
addw(Rd, Rd, zr);
}
void MacroAssembler::xorrw(Register Rd, Register Rs1, Register Rs2) {
xorr(Rd, Rs1, Rs2); // addw: The result is clipped to 32 bits, then the sign bit is extended, // and the result is stored in Rd
addw(Rd, Rd, zr);
}
// Note: load_unsigned_short used to be called load_unsigned_word. int MacroAssembler::load_unsigned_short(Register dst, Address src) { int off = offset();
lhu(dst, src); return off;
}
int MacroAssembler::load_unsigned_byte(Register dst, Address src) { int off = offset();
lbu(dst, src); return off;
}
int MacroAssembler::load_signed_short(Register dst, Address src) { int off = offset();
lh(dst, src); return off;
}
int MacroAssembler::load_signed_byte(Register dst, Address src) { int off = offset();
lb(dst, src); return off;
}
// Move a metadata address into a register. void MacroAssembler::mov_metadata(Register dst, Metadata* obj) { int oop_index; if (obj == NULL) {
oop_index = oop_recorder()->allocate_metadata_index(obj);
} else {
oop_index = oop_recorder()->find_index(obj);
}
RelocationHolder rspec = metadata_Relocation::spec(oop_index);
mv(dst, Address((address)obj, rspec));
}
// Writes to stack successive pages until offset reached to check for // stack overflow + shadow pages. This clobbers tmp. void MacroAssembler::bang_stack_size(Register size, Register tmp) {
assert_different_registers(tmp, size, t0); // Bang stack for total size given plus shadow page size. // Bang one page at a time because large size can bang beyond yellow and // red zones.
mv(t0, os::vm_page_size());
Label loop;
bind(loop);
sub(tmp, sp, t0);
subw(size, size, t0);
sd(size, Address(tmp));
bgtz(size, loop);
// Bang down shadow pages too. // At this point, (tmp-0) is the last address touched, so don't // touch it again. (It was touched as (tmp-pagesize) but then tmp // was post-decremented.) Skip this address by starting at i=1, and // touch a few more pages below. N.B. It is important to touch all // the way down to and including i=StackShadowPages. for (int i = 0; i < (int)(StackOverflow::stack_shadow_zone_size() / os::vm_page_size()) - 1; i++) { // this could be any sized move but this is can be a debugging crumb // so the bigger the better.
sub(tmp, tmp, os::vm_page_size());
sd(size, Address(tmp, 0));
}
}
// A null weak handle resolves to null.
beqz(result, resolved);
// Only 64 bit platforms support GCs that require a tmp register // Only IN_HEAP loads require a thread_tmp register // WeakHandle::resolve is an indirection like jweak.
access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF,
result, Address(result), tmp1, tmp2);
bind(resolved);
}
void MacroAssembler::null_check(Register reg, int offset) { if (needs_explicit_null_check(offset)) { // provoke OS NULL exception if reg = NULL by // accessing M[reg] w/o changing any registers // NOTE: this is plenty to provoke a segv
ld(zr, Address(reg, 0));
} else { // nothing to do, (later) access of M[reg + offset] // will provoke OS NULL exception if reg = NULL
}
}
void MacroAssembler::store_klass(Register dst, Register src, Register tmp) { // FIXME: Should this be a store release? concurrent gcs assumes // klass length is valid if klass field is not null. if (UseCompressedClassPointers) {
encode_klass_not_null(src, tmp);
sw(src, Address(dst, oopDesc::klass_offset_in_bytes()));
} else {
sd(src, Address(dst, oopDesc::klass_offset_in_bytes()));
}
}
void MacroAssembler::store_klass_gap(Register dst, Register src) { if (UseCompressedClassPointers) { // Store to klass gap in destination
sw(src, Address(dst, oopDesc::klass_gap_offset_in_bytes()));
}
}
void MacroAssembler::decode_klass_not_null(Register dst, Register src, Register tmp) {
assert(UseCompressedClassPointers, "should only be used for compressed headers");
void MacroAssembler::encode_klass_not_null(Register dst, Register src, Register tmp) {
assert(UseCompressedClassPointers, "should only be used for compressed headers");
void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
assert(UseCompressedOops, "should only be used for compressed headers");
assert(Universe::heap() != NULL, "java heap should be initialized"); // Cannot assert, unverified entry point counts instructions (see .ad file) // vtableStubs also counts instructions in pd_code_size_limit. // Also do not verify_oop as this is called by verify_oop. if (CompressedOops::shift() != 0) {
assert(LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong");
slli(dst, src, LogMinObjAlignmentInBytes); if (CompressedOops::base() != NULL) {
add(dst, xheapbase, dst);
}
} else {
assert(CompressedOops::base() == NULL, "sanity");
mv(dst, src);
}
}
// Used for storing NULLs. void MacroAssembler::store_heap_oop_null(Address dst) {
access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg, noreg);
}
int MacroAssembler::corrected_idivl(Register result, Register rs1, Register rs2, bool want_remainder)
{ // Full implementation of Java idiv and irem. The function // returns the (pc) offset of the div instruction - may be needed // for implicit exceptions. // // input : rs1: dividend // rs2: divisor // // result: either // quotient (= rs1 idiv rs2) // remainder (= rs1 irem rs2)
int idivl_offset = offset(); if (!want_remainder) {
divw(result, rs1, rs2);
} else {
remw(result, rs1, rs2); // result = rs1 % rs2;
} return idivl_offset;
}
int MacroAssembler::corrected_idivq(Register result, Register rs1, Register rs2, bool want_remainder)
{ // Full implementation of Java ldiv and lrem. The function // returns the (pc) offset of the div instruction - may be needed // for implicit exceptions. // // input : rs1: dividend // rs2: divisor // // result: either // quotient (= rs1 idiv rs2) // remainder (= rs1 irem rs2)
int idivq_offset = offset(); if (!want_remainder) {
div(result, rs1, rs2);
} else {
rem(result, rs1, rs2); // result = rs1 % rs2;
} return idivq_offset;
}
// Look up the method for a megamorpic invkkeinterface call. // The target method is determined by <intf_klass, itable_index>. // The receiver klass is in recv_klass. // On success, the result will be in method_result, and execution falls through. // On failure, execution transfers to the given label. void MacroAssembler::lookup_interface_method(Register recv_klass, Register intf_klass,
RegisterOrConstant itable_index, Register method_result, Register scan_tmp,
Label& L_no_such_interface, bool return_method) {
assert_different_registers(recv_klass, intf_klass, scan_tmp);
assert_different_registers(method_result, intf_klass, scan_tmp);
assert(recv_klass != method_result || !return_method, "recv_klass can be destroyed when mehtid isn't needed");
assert(itable_index.is_constant() || itable_index.as_register() == method_result, "caller must be same register for non-constant itable index as for method");
// Compute start of first itableOffsetEntry (which is at the end of the vtable). int vtable_base = in_bytes(Klass::vtable_start_offset()); int itentry_off = itableMethodEntry::method_offset_in_bytes(); int scan_step = itableOffsetEntry::size() * wordSize; int vte_size = vtableEntry::size_in_bytes();
assert(vte_size == wordSize, "else adjust times_vte_scale");
// %%% Could store the aligned, prescaled offset in the klassoop.
shadd(scan_tmp, scan_tmp, recv_klass, scan_tmp, 3);
add(scan_tmp, scan_tmp, vtable_base);
if (return_method) { // Adjust recv_klass by scaled itable_index, so we can free itable_index.
assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); if (itable_index.is_register()) {
slli(t0, itable_index.as_register(), 3);
} else {
mv(t0, itable_index.as_constant() << 3);
}
add(recv_klass, recv_klass, t0); if (itentry_off) {
add(recv_klass, recv_klass, itentry_off);
}
}
Label search, found_method;
ld(method_result, Address(scan_tmp, itableOffsetEntry::interface_offset_in_bytes()));
beq(intf_klass, method_result, found_method);
bind(search); // Check that the previous entry is non-null. A null entry means that // the receiver class doesn't implement the interface, and wasn't the // same as when the caller was compiled.
beqz(method_result, L_no_such_interface, /* is_far */ true);
addi(scan_tmp, scan_tmp, scan_step);
ld(method_result, Address(scan_tmp, itableOffsetEntry::interface_offset_in_bytes()));
bne(intf_klass, method_result, search);
bind(found_method);
// Got a hit. if (return_method) {
lwu(scan_tmp, Address(scan_tmp, itableOffsetEntry::offset_offset_in_bytes()));
add(method_result, recv_klass, scan_tmp);
ld(method_result, Address(method_result));
}
}
// virtual method calling void MacroAssembler::lookup_virtual_method(Register recv_klass,
RegisterOrConstant vtable_index, Register method_result) { constint base = in_bytes(Klass::vtable_start_offset());
assert(vtableEntry::size() * wordSize == 8, "adjust the scaling in the code below"); int vtable_offset_in_bytes = base + vtableEntry::method_offset_in_bytes();
if (last != NULL && nativeInstruction_at(last)->is_membar() && prev == last) {
NativeMembar *bar = NativeMembar_at(prev); // We are merging two memory barrier instructions. On RISCV we // can do this simply by ORing them together.
bar->set_kind(bar->get_kind() | order_constraint);
BLOCK_COMMENT("merged membar");
} else {
code()->set_last_insn(pc());
// Form an address from base + offset in Rd. Rd my or may not // actually be used: you must use the Address that is returned. It // is up to you to ensure that the shift provided matches the size // of your data.
Address MacroAssembler::form_address(Register Rd, Register base, long byte_offset) { if (is_offset_in_range(byte_offset, 12)) { // 12: imm in range 2^12 return Address(base, byte_offset);
}
// Do it the hard way
mv(Rd, byte_offset);
add(Rd, base, Rd); return Address(Rd);
}
void MacroAssembler::cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp,
Label &succeed, Label *fail) { // oldv holds comparison value // newv holds value to write in exchange // addr identifies memory word to compare against/update
Label retry_load, nope;
bind(retry_load); // Load reserved from the memory location
lr_d(tmp, addr, Assembler::aqrl); // Fail and exit if it is not what we expect
bne(tmp, oldv, nope); // If the store conditional succeeds, tmp will be zero
sc_d(tmp, newv, addr, Assembler::rl);
beqz(tmp, succeed); // Retry only when the store conditional failed
j(retry_load);
void MacroAssembler::far_jump(Address entry, Register tmp) {
assert(ReservedCodeCacheSize < 4*G, "branch out of range");
assert(CodeCache::find_blob(entry.target()) != NULL, "destination of far call not found in code cache");
assert(entry.rspec().type() == relocInfo::external_word_type
|| entry.rspec().type() == relocInfo::runtime_call_type
|| entry.rspec().type() == relocInfo::none, "wrong entry relocInfo type");
IncompressibleRegion ir(this); // Fixed length: see MacroAssembler::far_branch_size() if (far_branches()) { // We can use auipc + jalr here because we know that the total size of // the code cache cannot exceed 2Gb.
relocate(entry.rspec(), [&] {
int32_t offset;
la_patchable(tmp, entry, offset);
jalr(x0, tmp, offset);
});
} else {
j(entry);
}
}
void MacroAssembler::far_call(Address entry, Register tmp) {
assert(ReservedCodeCacheSize < 4*G, "branch out of range");
assert(CodeCache::find_blob(entry.target()) != NULL, "destination of far call not found in code cache");
assert(entry.rspec().type() == relocInfo::external_word_type
|| entry.rspec().type() == relocInfo::runtime_call_type
|| entry.rspec().type() == relocInfo::none, "wrong entry relocInfo type");
IncompressibleRegion ir(this); // Fixed length: see MacroAssembler::far_branch_size() if (far_branches()) { // We can use auipc + jalr here because we know that the total size of // the code cache cannot exceed 2Gb.
relocate(entry.rspec(), [&] {
int32_t offset;
la_patchable(tmp, entry, offset);
jalr(x1, tmp, offset); // link
});
} else {
jal(entry); // link
}
}
void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, Register super_klass, Register tmp_reg,
Label* L_success,
Label* L_failure,
Label* L_slow_path, Register super_check_offset) {
assert_different_registers(sub_klass, super_klass, tmp_reg); bool must_load_sco = (super_check_offset == noreg); if (must_load_sco) {
assert(tmp_reg != noreg, "supply either a temp or a register offset");
} else {
assert_different_registers(sub_klass, super_klass, super_check_offset);
}
Label L_fallthrough; int label_nulls = 0; if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
assert(label_nulls <= 1, "at most one NULL in batch");
int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); int sco_offset = in_bytes(Klass::super_check_offset_offset());
Address super_check_offset_addr(super_klass, sco_offset);
// Hacked jmp, which may only be used just before L_fallthrough. #define final_jmp(label) \ if (&(label) == &L_fallthrough) { /*do nothing*/ } \ else j(label) /*omit semi*/
// If the pointers are equal, we are done (e.g., String[] elements). // This self-check enables sharing of secondary supertype arrays among // non-primary types such as array-of-interface. Otherwise, each such // type would need its own customized SSA. // We move this check to the front of the fast path because many // type checks are in fact trivially successful in this manner, // so we get a nicely predicted branch right at the start of the check.
beq(sub_klass, super_klass, *L_success);
// This check has worked decisively for primary supers. // Secondary supers are sought in the super_cache ('super_cache_addr'). // (Secondary supers are interfaces and very deeply nested subtypes.) // This works in the same check above because of a tricky aliasing // between the super_Cache and the primary super display elements. // (The 'super_check_addr' can address either, as the case requires.) // Note that the cache is updated below if it does not help us find // what we need immediately. // So if it was a primary super, we can just fail immediately. // Otherwise, it's the slow path for us (no success at this point).
Label L_fallthrough; int label_nulls = 0; if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; }
assert(label_nulls <= 1, "at most one NULL in the batch");
// A couple of useful fields in sub_klass: int ss_offset = in_bytes(Klass::secondary_supers_offset()); int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
Address secondary_supers_addr(sub_klass, ss_offset);
Address super_cache_addr( sub_klass, sc_offset);
BLOCK_COMMENT("check_klass_subtype_slow_path");
// Do a linear scan of the secondary super-klass chain. // This code is rarely used, so simplicity is a virtue here. // The repne_scan instruction uses fixed registers, which we must spill. // Don't worry too much about pre-existing connections with the input regs.
assert(sub_klass != x10, "killed reg"); // killed by mv(x10, super)
assert(sub_klass != x12, "killed reg"); // killed by la(x12, &pst_counter)
RegSet pushed_registers; if (!IS_A_TEMP(x12)) {
pushed_registers += x12;
} if (!IS_A_TEMP(x15)) {
pushed_registers += x15;
}
if (super_klass != x10) { if (!IS_A_TEMP(x10)) {
pushed_registers += x10;
}
}
push_reg(pushed_registers, sp);
// Get super_klass value into x10 (even if it was in x15 or x12)
mv(x10, super_klass);
// We will consult the secondary-super array.
ld(x15, secondary_supers_addr); // Load the array length.
lwu(x12, Address(x15, Array<Klass*>::length_offset_in_bytes())); // Skip to start of data.
add(x15, x15, Array<Klass*>::base_offset_in_bytes());
// Set t0 to an obvious invalid value, falling through by default
mv(t0, -1); // Scan X12 words at [X15] for an occurrence of X10.
repne_scan(x15, x10, x12, t0);
// pop will restore x10, so we should use a temp register to keep its value
mv(t1, x10);
// Unspill the temp registers:
pop_reg(pushed_registers, sp);
bne(t1, t0, *L_failure);
// Success. Cache the super we found an proceed in triumph.
sd(super_klass, super_cache_addr);
if (L_success != &L_fallthrough) {
j(*L_success);
}
// get_thread() can be called anywhere inside generated code so we // need to save whatever non-callee save context might get clobbered // by the call to Thread::current() or, indeed, the call setup code. void MacroAssembler::get_thread(Register thread) { // save all call-clobbered regs except thread
RegSet saved_regs = RegSet::range(x5, x7) + RegSet::range(x10, x17) +
RegSet::range(x28, x31) + ra - thread;
push_reg(saved_regs, sp);
assert(is_valid_riscv64_address(dest.target()), "bad address");
assert(dest.getMode() == Address::literal, "la_patchable must be applied to a literal address");
// RISC-V doesn't compute a page-aligned address, in order to partially // compensate for the use of *signed* offsets in its base+disp12 // addressing mode (RISC-V's PC-relative reach remains asymmetric // [-(2G + 2K), 2G - 2K). if (offset_high >= -((1L << 31) + (1L << 11)) && offset_low < (1L << 31) - (1L << 11)) {
int64_t distance = dest.target() - pc();
auipc(reg1, (int32_t)distance + 0x800);
offset = ((int32_t)distance << 20) >> 20;
} else {
movptr(reg1, dest.target(), offset);
}
}
void MacroAssembler::build_frame(int framesize) {
assert(framesize >= 2, "framesize must include space for FP/RA");
assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment");
sub(sp, sp, framesize);
sd(fp, Address(sp, framesize - 2 * wordSize));
sd(ra, Address(sp, framesize - wordSize)); if (PreserveFramePointer) { add(fp, sp, framesize); }
}
void MacroAssembler::remove_frame(int framesize) {
assert(framesize >= 2, "framesize must include space for FP/RA");
assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment");
ld(fp, Address(sp, framesize - 2 * wordSize));
ld(ra, Address(sp, framesize - wordSize));
add(sp, sp, framesize);
}
void MacroAssembler::reserved_stack_check() { // testing if reserved zone needs to be enabled
Label no_reserved_zone_enabling;
enter(); // RA and FP are live.
mv(c_rarg0, xthread);
RuntimeAddress target(CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone));
relocate(target.rspec(), [&] {
int32_t offset;
la_patchable(t0, target, offset);
jalr(x1, t0, offset);
});
leave();
// We have already removed our own frame. // throw_delayed_StackOverflowError will think that it's been // called by our caller.
target = RuntimeAddress(StubRoutines::throw_delayed_StackOverflowError_entry());
relocate(target.rspec(), [&] {
int32_t offset;
la_patchable(t0, target, offset);
jalr(x0, t0, offset);
});
should_not_reach_here();
bind(no_reserved_zone_enabling);
}
// Move the address of the polling page into dest. void MacroAssembler::get_polling_page(Register dest, relocInfo::relocType rtype) {
ld(dest, Address(xthread, JavaThread::polling_page_offset()));
}
// Read the polling page. The address of the polling page must // already be in r. void MacroAssembler::read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype) {
relocate(rtype, [&] {
lwu(zr, Address(r, offset));
});
}
void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { #ifdef ASSERT
{
ThreadInVMfromUnknown tiv;
assert (UseCompressedOops, "should only be used for compressed oops");
assert (Universe::heap() != NULL, "java heap should be initialized");
assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
assert(Universe::heap()->is_in(JNIHandles::resolve(obj)), "should be real oop");
} #endif int oop_index = oop_recorder()->find_index(obj);
relocate(oop_Relocation::spec(oop_index), [&] {
li32(dst, 0xDEADBEEF);
});
zero_extend(dst, dst, 32);
}
void MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
assert (UseCompressedClassPointers, "should only be used for compressed headers");
assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); int index = oop_recorder()->find_index(k);
assert(!Universe::heap()->is_in(k), "should not be an oop");
// Maybe emit a call via a trampoline. If the code cache is small // trampolines won't be emitted.
address MacroAssembler::trampoline_call(Address entry) {
assert(entry.rspec().type() == relocInfo::runtime_call_type ||
entry.rspec().type() == relocInfo::opt_virtual_call_type ||
entry.rspec().type() == relocInfo::static_call_type ||
entry.rspec().type() == relocInfo::virtual_call_type, "wrong reloc type");
address target = entry.target();
// We need a trampoline if branches are far. if (far_branches()) { if (!in_scratch_emit_size()) { if (entry.rspec().type() == relocInfo::runtime_call_type) {
assert(CodeBuffer::supports_shared_stubs(), "must support shared stubs");
code()->share_trampoline_for(entry.target(), offset());
} else {
address stub = emit_trampoline_stub(offset(), target); if (stub == NULL) {
postcond(pc() == badAddress); return NULL; // CodeCache is full
}
}
}
target = pc();
}
// Emit a trampoline stub for a call to a target which is too far away. // // code sequences: // // call-site: // branch-and-link to <destination> or <trampoline stub> // // Related trampoline stub for this call site in the stub section: // load the call target from the constant pool // branch (RA still points to the call site above)
// We are always 4-byte aligned here.
assert_alignment(pc());
// Create a trampoline stub relocation which relates this trampoline stub // with the call instruction at insts_call_instruction_offset in the // instructions code-section.
// Make sure the address of destination 8-byte aligned after 3 instructions.
align(wordSize, NativeCallTrampolineStub::data_offset);
assert(is_NativeCallTrampolineStub_at(stub_start_addr), "doesn't look like a trampoline");
end_a_stub(); return stub_start_addr;
}
Address MacroAssembler::add_memory_helper(const Address dst, Register tmp) { switch (dst.getMode()) { case Address::base_plus_offset: // This is the expected mode, although we allow all the other // forms below. return form_address(tmp, dst.base(), dst.offset()); default:
la(tmp, dst); return Address(tmp);
}
}
// Next infrequent code is moved outside loops.
bind(L_last_x);
lwu(product_hi, Address(x, 0));
j(L_third_loop_prologue);
bind(L_done);
} #endif
// Count bits of trailing zero chars from lsb to msb until first non-zero element. // For LL case, one byte for one element, so shift 8 bits once, and for other case, // shift 16 bits once. void MacroAssembler::ctzc_bit(Register Rd, Register Rs, bool isLL, Register tmp1, Register tmp2) { if (UseZbb) {
assert_different_registers(Rd, Rs, tmp1); int step = isLL ? 8 : 16;
ctz(Rd, Rs);
andi(tmp1, Rd, step - 1);
sub(Rd, Rd, tmp1); return;
}
// This instruction reads adjacent 4 bytes from the lower half of source register, // inflate into a register, for example: // Rs: A7A6A5A4A3A2A1A0 // Rd: 00A300A200A100A0 void MacroAssembler::inflate_lo32(Register Rd, Register Rs, Register tmp1, Register tmp2) {
assert_different_registers(Rd, Rs, tmp1, tmp2);
mv(tmp1, 0xFF);
mv(Rd, zr); for (int i = 0; i <= 3; i++) {
andr(tmp2, Rs, tmp1); if (i) {
slli(tmp2, tmp2, i * 8);
}
orr(Rd, Rd, tmp2); if (i != 3) {
slli(tmp1, tmp1, 8);
}
}
}
// This instruction reads adjacent 4 bytes from the upper half of source register, // inflate into a register, for example: // Rs: A7A6A5A4A3A2A1A0 // Rd: 00A700A600A500A4 void MacroAssembler::inflate_hi32(Register Rd, Register Rs, Register tmp1, Register tmp2) {
assert_different_registers(Rd, Rs, tmp1, tmp2);
mv(tmp1, 0xFF00000000);
mv(Rd, zr); for (int i = 0; i <= 3; i++) {
andr(tmp2, Rs, tmp1);
orr(Rd, Rd, tmp2);
srli(Rd, Rd, 8); if (i != 3) {
slli(tmp1, tmp1, 8);
}
}
}
// The size of the blocks erased by the zero_blocks stub. We must // handle anything smaller than this ourselves in zero_words(). constint MacroAssembler::zero_words_block_size = 8;
// zero_words() is used by C2 ClearArray patterns. It is as small as // possible, handling small word counts locally and delegating // anything larger to the zero_blocks stub. It is expanded many times // in compiled code, so it is important to keep it short.
// ptr: Address of a buffer to be zeroed. // cnt: Count in HeapWords. // // ptr, cnt, and t0 are clobbered.
address MacroAssembler::zero_words(Register ptr, Register cnt) {
assert(is_power_of_2(zero_words_block_size), "adjust this");
assert(ptr == x28 && cnt == x29, "mismatch in register usage");
assert_different_registers(cnt, t0);
BLOCK_COMMENT("zero_words {");
mv(t0, zero_words_block_size);
Label around, done, done16;
bltu(cnt, t0, around);
{
RuntimeAddress zero_blocks = RuntimeAddress(StubRoutines::riscv::zero_blocks());
assert(zero_blocks.target() != NULL, "zero_blocks stub has not been generated"); if (StubRoutines::riscv::complete()) {
address tpc = trampoline_call(zero_blocks); if (tpc == NULL) {
DEBUG_ONLY(reset_labels(around));
postcond(pc() == badAddress); return NULL;
}
} else {
jal(zero_blocks);
}
}
bind(around); for (int i = zero_words_block_size >> 1; i > 1; i >>= 1) {
Label l;
andi(t0, cnt, i);
beqz(t0, l); for (int j = 0; j < i; j++) {
sd(zr, Address(ptr, j * wordSize));
}
addi(ptr, ptr, i * wordSize);
bind(l);
}
{
Label l;
andi(t0, cnt, 1);
beqz(t0, l);
sd(zr, Address(ptr, 0));
bind(l);
}
// base: Address of a buffer to be zeroed, 8 bytes aligned. // cnt: Immediate count in HeapWords. void MacroAssembler::zero_words(Register base, uint64_t cnt) {
assert_different_registers(base, t0, t1);
BLOCK_COMMENT("zero_words {");
if (cnt <= SmallArraySize / BytesPerLong) { for (int i = 0; i < (int)cnt; i++) {
sd(zr, Address(base, i * wordSize));
}
} else { constint unroll = 8; // Number of sd(zr, adr), instructions we'll unroll int remainder = cnt % unroll; for (int i = 0; i < remainder; i++) {
sd(zr, Address(base, i * wordSize));
}
// base: Address of a buffer to be filled, 8 bytes aligned. // cnt: Count in 8-byte unit. // value: Value to be filled with. // base will point to the end of the buffer after filling. void MacroAssembler::fill_words(Register base, Register cnt, Register value) { // Algorithm: // // t0 = cnt & 7 // cnt -= t0 // p += t0 // switch (t0): // switch start: // do while cnt // cnt -= 8 // p[-8] = value // case 7: // p[-7] = value // case 6: // p[-6] = value // // ... // case 1: // p[-1] = value // case 0: // p += 8 // do-while end // switch end
Label fini, skip, entry, loop; constint unroll = 8; // Number of sd instructions we'll unroll
beqz(cnt, fini);
andi(t0, cnt, unroll - 1);
sub(cnt, cnt, t0); // align 8, so first sd n % 8 = mod, next loop sd 8 * n.
shadd(base, t0, base, t1, 3);
la(t1, entry);
slli(t0, t0, 2); // sd_inst_nums * 4; t0 is cnt % 8, so t1 = t1 - sd_inst_nums * 4, 4 is sizeof(inst)
sub(t1, t1, t0);
jr(t1);
bind(loop);
add(base, base, unroll * 8); for (int i = -unroll; i < 0; i++) {
sd(value, Address(base, i * 8));
}
bind(entry);
sub(cnt, cnt, unroll);
bgez(cnt, loop);
bind(fini);
}
// Zero blocks of memory by using CBO.ZERO. // // Aligns the base address first sufficiently for CBO.ZERO, then uses // CBO.ZERO repeatedly for every full block. cnt is the size to be // zeroed in HeapWords. Returns the count of words left to be zeroed // in cnt. // // NOTE: This is intended to be used in the zero_blocks() stub. If // you want to use it elsewhere, note that cnt must be >= CacheLineSize. void MacroAssembler::zero_dcache_blocks(Register base, Register cnt, Register tmp1, Register tmp2) {
Label initial_table_end, loop;
// Align base with cache line size.
neg(tmp1, base);
andi(tmp1, tmp1, CacheLineSize - 1);
// tmp1: the number of bytes to be filled to align the base with cache line size.
add(base, base, tmp1);
srai(tmp2, tmp1, 3);
sub(cnt, cnt, tmp2);
srli(tmp2, tmp1, 1);
la(tmp1, initial_table_end);
sub(tmp2, tmp1, tmp2);
jr(tmp2); for (int i = -CacheLineSize + wordSize; i < 0; i += wordSize) {
sd(zr, Address(base, i));
}
bind(initial_table_end);
#define FCMP(FLOATTYPE, FLOATSIG) \ void MacroAssembler::FLOATTYPE##_compare(Register result, FloatRegister Rs1, \
FloatRegister Rs2, int unordered_result) { \
Label Ldone; \ if (unordered_result < 0) { \ /* we want -1 for unordered or less than, 0 for equal and 1 for greater than. */ \ /* installs 1 if gt else 0 */ \
flt_##FLOATSIG(result, Rs2, Rs1); \ /* Rs1 > Rs2, install 1 */ \
bgtz(result, Ldone); \
feq_##FLOATSIG(result, Rs1, Rs2); \
addi(result, result, -1); \ /* Rs1 = Rs2, install 0 */ \ /* NaN or Rs1 < Rs2, install -1 */ \
bind(Ldone); \
} else { \ /* we want -1 for less than, 0 for equal and 1 for unordered or greater than. */ \ /* installs 1 if gt or unordered else 0 */ \
flt_##FLOATSIG(result, Rs1, Rs2); \ /* Rs1 < Rs2, install -1 */ \
bgtz(result, Ldone); \
feq_##FLOATSIG(result, Rs1, Rs2); \
addi(result, result, -1); \ /* Rs1 = Rs2, install 0 */ \ /* NaN or Rs1 > Rs2, install 1 */ \
bind(Ldone); \
neg(result, result); \
} \
}
FCMP(float, s);
FCMP(double, d);
#undef FCMP
// Zero words; len is in bytes // Destroys all registers except addr // len must be a nonzero multiple of wordSize void MacroAssembler::zero_memory(Register addr, Register len, Register tmp) {
assert_different_registers(addr, len, tmp, t0, t1);
#ifdef ASSERT
{
Label L;
andi(t0, len, BytesPerWord - 1);
beqz(t0, L);
stop("len is not a multiple of BytesPerWord");
bind(L);
} #endif// ASSERT
// The java_calling_convention describes stack locations as ideal slots on // a frame with no abi restrictions. Since we must observe abi restrictions // (like the placement of the register window) the slots must be biased by // the following value. staticint reg2offset_in(VMReg r) { // Account for saved fp and ra // This should really be in_preserve_stack_slots return r->reg2stack() * VMRegImpl::stack_slot_size;
}
// On 64 bit we will store integer like items to the stack as // 64 bits items (riscv64 abi) even though java would only store // 32bits for a parameter. On 32bit it will simply be 32 bits // So this routine will do 32->32 on 32bit and 32->64 on 64bit void MacroAssembler::move32_64(VMRegPair src, VMRegPair dst, Register tmp) { if (src.first()->is_stack()) { if (dst.first()->is_stack()) { // stack to stack
ld(tmp, Address(fp, reg2offset_in(src.first())));
sd(tmp, Address(sp, reg2offset_out(dst.first())));
} else { // stack to reg
lw(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first())));
}
} elseif (dst.first()->is_stack()) { // reg to stack
sd(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first())));
} else { if (dst.first() != src.first()) { // 32bits extend sign
addw(dst.first()->as_Register(), src.first()->as_Register(), zr);
}
}
}
// An oop arg. Must pass a handle not the oop itself void MacroAssembler::object_move(OopMap* map, int oop_handle_offset, int framesize_in_slots,
VMRegPair src,
VMRegPair dst, bool is_receiver, int* receiver_offset) {
assert_cond(map != NULL && receiver_offset != NULL);
// must pass a handle. First figure out the location we use as a handle Register rHandle = dst.first()->is_stack() ? t1 : dst.first()->as_Register();
// See if oop is NULL if it is we need no handle
if (src.first()->is_stack()) { // Oop is already on the stack as an argument int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots)); if (is_receiver) {
*receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size;
}
map->set_oop(VMRegImpl::stack2reg(oop_slot)); // Store oop in handle area, may be NULL
sd(rOop, Address(sp, offset)); if (is_receiver) {
*receiver_offset = offset;
}
//rOop maybe the same as rHandle if (rOop == rHandle) {
Label isZero;
beqz(rOop, isZero);
la(rHandle, Address(sp, offset));
bind(isZero);
} else {
Label notZero2;
la(rHandle, Address(sp, offset));
bnez(rOop, notZero2);
mv(rHandle, zr);
bind(notZero2);
}
}
// If arg is on the stack then place it otherwise it is already in correct reg. if (dst.first()->is_stack()) {
sd(rHandle, Address(sp, reg2offset_out(dst.first())));
}
}
// A float arg may have to do float reg int reg conversion void MacroAssembler::float_move(VMRegPair src, VMRegPair dst, Register tmp) {
assert(src.first()->is_stack() && dst.first()->is_stack() ||
src.first()->is_reg() && dst.first()->is_reg() ||
src.first()->is_stack() && dst.first()->is_reg(), "Unexpected error"); if (src.first()->is_stack()) { if (dst.first()->is_stack()) {
lwu(tmp, Address(fp, reg2offset_in(src.first())));
sw(tmp, Address(sp, reg2offset_out(dst.first())));
} elseif (dst.first()->is_Register()) {
lwu(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first())));
} else {
ShouldNotReachHere();
}
} elseif (src.first() != dst.first()) { if (src.is_single_phys_reg() && dst.is_single_phys_reg()) {
fmv_s(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
} else {
ShouldNotReachHere();
}
}
}
// A long move void MacroAssembler::long_move(VMRegPair src, VMRegPair dst, Register tmp) { if (src.first()->is_stack()) { if (dst.first()->is_stack()) { // stack to stack
ld(tmp, Address(fp, reg2offset_in(src.first())));
sd(tmp, Address(sp, reg2offset_out(dst.first())));
} else { // stack to reg
ld(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first())));
}
} elseif (dst.first()->is_stack()) { // reg to stack
sd(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first())));
} else { if (dst.first() != src.first()) {
mv(dst.first()->as_Register(), src.first()->as_Register());
}
}
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.