/* * Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, 2021, Red Hat Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions. *
*/
// If insn1 and insn2 use the same register to form an address, either // by an offsetted LDR or a simple ADD, return the offset. If the // second instruction is an LDR, the offset may be scaled. staticbool offset_for(uint32_t insn1, uint32_t insn2, ptrdiff_t &byte_offset) { if (Instruction_aarch64::extract(insn2, 29, 24) == 0b111001 &&
Instruction_aarch64::extract(insn1, 4, 0) ==
Instruction_aarch64::extract(insn2, 9, 5)) { // Load/store register (unsigned immediate)
byte_offset = Instruction_aarch64::extract(insn2, 21, 10);
uint32_t size = Instruction_aarch64::extract(insn2, 31, 30);
byte_offset <<= size; returntrue;
} elseif (Instruction_aarch64::extract(insn2, 31, 22) == 0b1001000100 &&
Instruction_aarch64::extract(insn1, 4, 0) ==
Instruction_aarch64::extract(insn2, 4, 0)) { // add (immediate)
byte_offset = Instruction_aarch64::extract(insn2, 21, 10); returntrue;
} returnfalse;
}
// Patch any kind of instruction; there may be several instructions. // Return the total length (in bytes) of the instructions. int MacroAssembler::pd_patch_instruction_size(address insn_addr, address target) {
Patcher patcher(insn_addr); return patcher.run(insn_addr, target);
}
int MacroAssembler::patch_oop(address insn_addr, address o) { int instructions; unsigned insn = *(unsigned*)insn_addr;
assert(nativeInstruction_at(insn_addr+4)->is_movk(), "wrong insns in patch");
// OOPs are either narrow (32 bits) or wide (48 bits). We encode // narrow OOPs by setting the upper 16 bits in the first // instruction. if (Instruction_aarch64::extract(insn, 31, 21) == 0b11010010101) { // Move narrow OOP
uint32_t n = CompressedOops::narrow_oop_value(cast_to_oop(o));
Instruction_aarch64::patch(insn_addr, 20, 5, n >> 16);
Instruction_aarch64::patch(insn_addr+4, 20, 5, n & 0xffff);
instructions = 2;
} else { // Move wide OOP
assert(nativeInstruction_at(insn_addr+8)->is_movk(), "wrong insns in patch");
uintptr_t dest = (uintptr_t)o;
Instruction_aarch64::patch(insn_addr, 20, 5, dest & 0xffff);
Instruction_aarch64::patch(insn_addr+4, 20, 5, (dest >>= 16) & 0xffff);
Instruction_aarch64::patch(insn_addr+8, 20, 5, (dest >>= 16) & 0xffff);
instructions = 3;
} return instructions * NativeInstruction::instruction_size;
}
int MacroAssembler::patch_narrow_klass(address insn_addr, narrowKlass n) { // Metadata pointers are either narrow (32 bits) or wide (48 bits). // We encode narrow ones by setting the upper 16 bits in the first // instruction.
NativeInstruction *insn = nativeInstruction_at(insn_addr);
assert(Instruction_aarch64::extract(insn->encoding(), 31, 21) == 0b11010010101 &&
nativeInstruction_at(insn_addr+4)->is_movk(), "wrong insns in patch");
Instruction_aarch64::patch(insn_addr, 20, 5, n >> 16);
Instruction_aarch64::patch(insn_addr+4, 20, 5, n & 0xffff); return 2 * NativeInstruction::instruction_size;
}
void MacroAssembler::reset_last_Java_frame(bool clear_fp) { // we must set sp to zero to clear frame
str(zr, Address(rthread, JavaThread::last_Java_sp_offset()));
// must clear fp, so that compiled frames are not confused; it is // possible that we need it only for debugging if (clear_fp) {
str(zr, Address(rthread, JavaThread::last_Java_fp_offset()));
}
// Always clear the pc because it could have been set by make_walkable()
str(zr, Address(rthread, JavaThread::last_Java_pc_offset()));
}
// Calls to C land // // When entering C land, the rfp, & resp of the last Java frame have to be recorded // in the (thread-local) JavaThread object. When leaving C land, the last Java fp // has to be reset to 0. This is required to allow proper stack traversal. void MacroAssembler::set_last_Java_frame(Register last_java_sp, Register last_java_fp, Register last_java_pc, Register scratch) {
if (last_java_pc->is_valid()) {
str(last_java_pc, Address(rthread,
JavaThread::frame_anchor_offset()
+ JavaFrameAnchor::last_Java_pc_offset()));
}
void MacroAssembler::far_call(Address entry, Register tmp) {
assert(ReservedCodeCacheSize < 4*G, "branch out of range");
assert(CodeCache::find_blob(entry.target()) != NULL, "destination of far call not found in code cache");
assert(entry.rspec().type() == relocInfo::external_word_type
|| entry.rspec().type() == relocInfo::runtime_call_type
|| entry.rspec().type() == relocInfo::none, "wrong entry relocInfo type"); if (target_needs_far_branch(entry.target())) {
uint64_t offset; // We can use ADRP here because we know that the total size of // the code cache cannot exceed 2Gb (ADRP limit is 4GB).
adrp(tmp, entry, offset);
add(tmp, tmp, offset);
blr(tmp);
} else {
bl(entry);
}
}
int MacroAssembler::far_jump(Address entry, Register tmp) {
assert(ReservedCodeCacheSize < 4*G, "branch out of range");
assert(CodeCache::find_blob(entry.target()) != NULL, "destination of far call not found in code cache");
assert(entry.rspec().type() == relocInfo::external_word_type
|| entry.rspec().type() == relocInfo::runtime_call_type
|| entry.rspec().type() == relocInfo::none, "wrong entry relocInfo type");
address start = pc(); if (target_needs_far_branch(entry.target())) {
uint64_t offset; // We can use ADRP here because we know that the total size of // the code cache cannot exceed 2Gb (ADRP limit is 4GB).
adrp(tmp, entry, offset);
add(tmp, tmp, offset);
br(tmp);
} else {
b(entry);
} return pc() - start;
}
void MacroAssembler::reserved_stack_check() { // testing if reserved zone needs to be enabled
Label no_reserved_zone_enabling;
enter(); // LR and FP are live.
lea(rscratch1, CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone));
mov(c_rarg0, rthread);
blr(rscratch1);
leave();
// We have already removed our own frame. // throw_delayed_StackOverflowError will think that it's been // called by our caller.
lea(rscratch1, RuntimeAddress(StubRoutines::throw_delayed_StackOverflowError_entry()));
br(rscratch1);
should_not_reach_here();
// debugging support
assert(number_of_arguments >= 0 , "cannot have negative number of arguments");
assert(java_thread == rthread, "unexpected register"); #ifdef ASSERT // TraceBytecodes does not use r12 but saves it over the call, so don't verify // if ((UseCompressedOops || UseCompressedClassPointers) && !TraceBytecodes) verify_heapbase("call_VM_base: heap base corrupted?"); #endif// ASSERT
assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result");
assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp");
// push java thread (becomes first argument of C function)
mov(c_rarg0, java_thread);
// set last Java frame before call
assert(last_java_sp != rfp, "can't use rfp");
// do the call, remove parameters
MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments, &l);
// lr could be poisoned with PAC signature during throw_pending_exception // if it was tail-call optimized by compiler, since lr is not callee-saved // reload it with proper value
adr(lr, l);
// reset last Java frame // Only interpreter should have to clear fp
reset_last_Java_frame(true);
// C++ interp handles this in the interpreter
check_and_handle_popframe(java_thread);
check_and_handle_earlyret(java_thread);
if (check_exceptions) { // check for pending exceptions (java_thread is set upon return)
ldr(rscratch1, Address(java_thread, in_bytes(Thread::pending_exception_offset())));
Label ok;
cbz(rscratch1, ok);
lea(rscratch1, RuntimeAddress(StubRoutines::forward_exception_entry()));
br(rscratch1);
bind(ok);
}
// get oop result if there is one and reset the value in the thread if (oop_result->is_valid()) {
get_vm_result(oop_result, java_thread);
}
}
// Check the entry target is always reachable from any branch. staticbool is_always_within_branch_range(Address entry) { const address target = entry.target();
if (!CodeCache::contains(target)) { // We always use trampolines for callees outside CodeCache.
assert(entry.rspec().type() == relocInfo::runtime_call_type, "non-runtime call of an external target"); returnfalse;
}
if (!MacroAssembler::far_branches()) { returntrue;
}
if (entry.rspec().type() == relocInfo::runtime_call_type) { // Runtime calls are calls of a non-compiled method (stubs, adapters). // Non-compiled methods stay forever in CodeCache. // We check whether the longest possible branch is within the branch range.
assert(CodeCache::find_blob(target) != NULL &&
!CodeCache::find_blob(target)->is_compiled(), "runtime call of compiled method"); const address right_longest_branch_start = CodeCache::high_bound() - NativeInstruction::instruction_size; const address left_longest_branch_start = CodeCache::low_bound(); constbool is_reachable = Assembler::reachable_from_branch_at(left_longest_branch_start, target) &&
Assembler::reachable_from_branch_at(right_longest_branch_start, target); return is_reachable;
}
returnfalse;
}
// Maybe emit a call via a trampoline. If the code cache is small // trampolines won't be emitted.
address MacroAssembler::trampoline_call(Address entry) {
assert(entry.rspec().type() == relocInfo::runtime_call_type
|| entry.rspec().type() == relocInfo::opt_virtual_call_type
|| entry.rspec().type() == relocInfo::static_call_type
|| entry.rspec().type() == relocInfo::virtual_call_type, "wrong reloc type");
address target = entry.target();
if (!is_always_within_branch_range(entry)) { if (!in_scratch_emit_size()) { // We don't want to emit a trampoline if C2 is generating dummy // code during its branch shortening phase. if (entry.rspec().type() == relocInfo::runtime_call_type) {
assert(CodeBuffer::supports_shared_stubs(), "must support shared stubs");
code()->share_trampoline_for(entry.target(), offset());
} else {
address stub = emit_trampoline_stub(offset(), target); if (stub == NULL) {
postcond(pc() == badAddress); return NULL; // CodeCache is full
}
}
}
target = pc();
}
// Emit a trampoline stub for a call to a target which is too far away. // // code sequences: // // call-site: // branch-and-link to <destination> or <trampoline stub> // // Related trampoline stub for this call site in the stub section: // load the call target from the constant pool // branch (LR still points to the call site above)
// Create a trampoline stub relocation which relates this trampoline stub // with the call instruction at insts_call_instruction_offset in the // instructions code-section.
align(wordSize);
relocate(trampoline_stub_Relocation::spec(code()->insts()->start()
+ insts_call_instruction_offset)); constint stub_start_offset = offset();
assert(is_NativeCallTrampolineStub_at(stub_start_addr), "doesn't look like a trampoline");
end_a_stub(); return stub_start_addr;
}
void MacroAssembler::emit_static_call_stub() { // CompiledDirectStaticCall::set_to_interpreted knows the // exact layout of this stub.
isb();
mov_metadata(rmethod, (Metadata*)NULL);
// Jump to the entry point of the c2i stub.
movptr(rscratch1, 0);
br(rscratch1);
}
void MacroAssembler::c2bool(Register x) { // implements x == 0 ? 0 : 1 // note: must only look at least-significant byte of x // since C-style booleans are stored in one byte // only! (was bug)
tst(x, 0xff);
cset(x, Assembler::NE);
}
// Look up the method for a megamorphic invokeinterface call. // The target method is determined by <intf_klass, itable_index>. // The receiver klass is in recv_klass. // On success, the result will be in method_result, and execution falls through. // On failure, execution transfers to the given label. void MacroAssembler::lookup_interface_method(Register recv_klass, Register intf_klass,
RegisterOrConstant itable_index, Register method_result, Register scan_temp,
Label& L_no_such_interface, bool return_method) {
assert_different_registers(recv_klass, intf_klass, scan_temp);
assert_different_registers(method_result, intf_klass, scan_temp);
assert(recv_klass != method_result || !return_method, "recv_klass can be destroyed when method isn't needed");
assert(itable_index.is_constant() || itable_index.as_register() == method_result, "caller must use same register for non-constant itable index as for method");
// Compute start of first itableOffsetEntry (which is at the end of the vtable) int vtable_base = in_bytes(Klass::vtable_start_offset()); int itentry_off = itableMethodEntry::method_offset_in_bytes(); int scan_step = itableOffsetEntry::size() * wordSize; int vte_size = vtableEntry::size_in_bytes();
assert(vte_size == wordSize, "else adjust times_vte_scale");
// %%% Could store the aligned, prescaled offset in the klassoop. // lea(scan_temp, Address(recv_klass, scan_temp, times_vte_scale, vtable_base));
lea(scan_temp, Address(recv_klass, scan_temp, Address::lsl(3)));
add(scan_temp, scan_temp, vtable_base);
if (return_method) { // Adjust recv_klass by scaled itable_index, so we can free itable_index.
assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); // lea(recv_klass, Address(recv_klass, itable_index, Address::times_ptr, itentry_off));
lea(recv_klass, Address(recv_klass, itable_index, Address::lsl(3))); if (itentry_off)
add(recv_klass, recv_klass, itentry_off);
}
ldr(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes()));
cmp(intf_klass, method_result);
br(Assembler::EQ, found_method);
bind(search); // Check that the previous entry is non-null. A null entry means that // the receiver class doesn't implement the interface, and wasn't the // same as when the caller was compiled.
cbz(method_result, L_no_such_interface); if (itableOffsetEntry::interface_offset_in_bytes() != 0) {
add(scan_temp, scan_temp, scan_step);
ldr(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes()));
} else {
ldr(method_result, Address(pre(scan_temp, scan_step)));
}
cmp(intf_klass, method_result);
br(Assembler::NE, search);
bind(found_method);
// Got a hit. if (return_method) {
ldrw(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes()));
ldr(method_result, Address(recv_klass, scan_temp, Address::uxtw(0)));
}
}
// virtual method calling void MacroAssembler::lookup_virtual_method(Register recv_klass,
RegisterOrConstant vtable_index, Register method_result) { constint base = in_bytes(Klass::vtable_start_offset());
assert(vtableEntry::size() * wordSize == 8, "adjust the scaling in the code below"); int vtable_offset_in_bytes = base + vtableEntry::method_offset_in_bytes();
void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, Register super_klass, Register temp_reg,
Label* L_success,
Label* L_failure,
Label* L_slow_path,
RegisterOrConstant super_check_offset) {
assert_different_registers(sub_klass, super_klass, temp_reg); bool must_load_sco = (super_check_offset.constant_or_zero() == -1); if (super_check_offset.is_register()) {
assert_different_registers(sub_klass, super_klass,
super_check_offset.as_register());
} elseif (must_load_sco) {
assert(temp_reg != noreg, "supply either a temp or a register offset");
}
Label L_fallthrough; int label_nulls = 0; if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
assert(label_nulls <= 1, "at most one NULL in the batch");
int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); int sco_offset = in_bytes(Klass::super_check_offset_offset());
Address super_check_offset_addr(super_klass, sco_offset);
// Hacked jmp, which may only be used just before L_fallthrough. #define final_jmp(label) \ if (&(label) == &L_fallthrough) { /*do nothing*/ } \ else b(label) /*omit semi*/
// If the pointers are equal, we are done (e.g., String[] elements). // This self-check enables sharing of secondary supertype arrays among // non-primary types such as array-of-interface. Otherwise, each such // type would need its own customized SSA. // We move this check to the front of the fast path because many // type checks are in fact trivially successful in this manner, // so we get a nicely predicted branch right at the start of the check.
cmp(sub_klass, super_klass);
br(Assembler::EQ, *L_success);
// This check has worked decisively for primary supers. // Secondary supers are sought in the super_cache ('super_cache_addr'). // (Secondary supers are interfaces and very deeply nested subtypes.) // This works in the same check above because of a tricky aliasing // between the super_cache and the primary super display elements. // (The 'super_check_addr' can address either, as the case requires.) // Note that the cache is updated below if it does not help us find // what we need immediately. // So if it was a primary super, we can just fail immediately. // Otherwise, it's the slow path for us (no success at this point).
if (super_check_offset.is_register()) {
br(Assembler::EQ, *L_success);
subs(zr, super_check_offset.as_register(), sc_offset); if (L_failure == &L_fallthrough) {
br(Assembler::EQ, *L_slow_path);
} else {
br(Assembler::NE, *L_failure);
final_jmp(*L_slow_path);
}
} elseif (super_check_offset.as_constant() == sc_offset) { // Need a slow path; fast failure is impossible. if (L_slow_path == &L_fallthrough) {
br(Assembler::EQ, *L_success);
} else {
br(Assembler::NE, *L_slow_path);
final_jmp(*L_success);
}
} else { // No slow path; it's a fast decision. if (L_failure == &L_fallthrough) {
br(Assembler::EQ, *L_success);
} else {
br(Assembler::NE, *L_failure);
final_jmp(*L_success);
}
}
bind(L_fallthrough);
#undef final_jmp
}
// These two are taken from x86, but they look generally useful
Label L_fallthrough; int label_nulls = 0; if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; }
assert(label_nulls <= 1, "at most one NULL in the batch");
// a couple of useful fields in sub_klass: int ss_offset = in_bytes(Klass::secondary_supers_offset()); int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
Address secondary_supers_addr(sub_klass, ss_offset);
Address super_cache_addr( sub_klass, sc_offset);
BLOCK_COMMENT("check_klass_subtype_slow_path");
// Do a linear scan of the secondary super-klass chain. // This code is rarely used, so simplicity is a virtue here. // The repne_scan instruction uses fixed registers, which we must spill. // Don't worry too much about pre-existing connections with the input regs.
assert(sub_klass != r0, "killed reg"); // killed by mov(r0, super)
assert(sub_klass != r2, "killed reg"); // killed by lea(r2, &pst_counter)
RegSet pushed_registers; if (!IS_A_TEMP(r2)) pushed_registers += r2; if (!IS_A_TEMP(r5)) pushed_registers += r5;
if (super_klass != r0) { if (!IS_A_TEMP(r0)) pushed_registers += r0;
}
push(pushed_registers, sp);
// Get super_klass value into r0 (even if it was in r5 or r2). if (super_klass != r0) {
mov(r0, super_klass);
}
// We will consult the secondary-super array.
ldr(r5, secondary_supers_addr); // Load the array length.
ldrw(r2, Address(r5, Array<Klass*>::length_offset_in_bytes())); // Skip to start of data.
add(r5, r5, Array<Klass*>::base_offset_in_bytes());
cmp(sp, zr); // Clear Z flag; SP is never zero // Scan R2 words at [R5] for an occurrence of R0. // Set NZ/Z based on last compare.
repne_scan(r5, r0, r2, rscratch1);
// Unspill the temp. registers:
pop(pushed_registers, sp);
br(Assembler::NE, *L_failure);
// Success. Cache the super we found and proceed in triumph.
str(super_klass, super_cache_addr);
if (L_success != &L_fallthrough) {
b(*L_success);
}
#undef IS_A_TEMP
bind(L_fallthrough);
}
void MacroAssembler::clinit_barrier(Register klass, Register scratch, Label* L_fast_path, Label* L_slow_path) {
assert(L_fast_path != NULL || L_slow_path != NULL, "at least one is required");
assert_different_registers(klass, rthread, scratch);
Label L_fallthrough, L_tmp; if (L_fast_path == NULL) {
L_fast_path = &L_fallthrough;
} elseif (L_slow_path == NULL) {
L_slow_path = &L_fallthrough;
} // Fast path check: class is fully initialized
ldrb(scratch, Address(klass, InstanceKlass::init_state_offset()));
subs(zr, scratch, InstanceKlass::fully_initialized);
br(Assembler::EQ, *L_fast_path);
// Fast path check: current thread is initializer thread
ldr(scratch, Address(klass, InstanceKlass::init_thread_offset()));
cmp(rthread, scratch);
strip_return_address(); // This might happen within a stack frame.
protect_return_address();
stp(r0, rscratch1, Address(pre(sp, -2 * wordSize)));
stp(rscratch2, lr, Address(pre(sp, -2 * wordSize)));
// addr may contain sp so we will have to adjust it based on the // pushes that we just did. if (addr.uses(sp)) {
lea(r0, addr);
ldr(r0, Address(r0, 4 * wordSize));
} else {
ldr(r0, addr);
}
movptr(rscratch1, (uintptr_t)(address)b);
// call indirectly to solve generation ordering problem
lea(rscratch2, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
ldr(rscratch2, Address(rscratch2));
blr(rscratch2);
void MacroAssembler::null_check(Register reg, int offset) { if (needs_explicit_null_check(offset)) { // provoke OS NULL exception if reg = NULL by // accessing M[reg] w/o changing any registers // NOTE: this is plenty to provoke a segv
ldr(zr, Address(reg));
} else { // nothing to do, (later) access of M[reg + offset] // will provoke OS NULL exception if reg = NULL
}
}
// MacroAssembler protected routines needed to implement // public methods
// Move a constant pointer into r. In AArch64 mode the virtual // address space is 48 bits in size, so we only need three // instructions to create a patchable instruction sequence that can // reach anywhere. void MacroAssembler::movptr(Register r, uintptr_t imm64) { #ifndef PRODUCT
{ char buffer[64];
snprintf(buffer, sizeof(buffer), "0x%" PRIX64, (uint64_t)imm64);
block_comment(buffer);
} #endif
assert(imm64 < (1ull << 48), "48-bit overflow in address constant");
movz(r, imm64 & 0xffff);
imm64 >>= 16;
movk(r, imm64 & 0xffff, 16);
imm64 >>= 16;
movk(r, imm64 & 0xffff, 32);
}
// Macro to mov replicated immediate to vector register. // imm64: only the lower 8/16/32 bits are considered for B/H/S type. That is, // the upper 56/48/32 bits must be zeros for B/H/S type. // Vd will get the following values for different arrangements in T // imm64 == hex 000000gh T8B: Vd = ghghghghghghghgh // imm64 == hex 000000gh T16B: Vd = ghghghghghghghghghghghghghghghgh // imm64 == hex 0000efgh T4H: Vd = efghefghefghefgh // imm64 == hex 0000efgh T8H: Vd = efghefghefghefghefghefghefghefgh // imm64 == hex abcdefgh T2S: Vd = abcdefghabcdefgh // imm64 == hex abcdefgh T4S: Vd = abcdefghabcdefghabcdefghabcdefgh // imm64 == hex abcdefgh T1D: Vd = 00000000abcdefgh // imm64 == hex abcdefgh T2D: Vd = 00000000abcdefgh00000000abcdefgh // Clobbers rscratch1 void MacroAssembler::mov(FloatRegister Vd, SIMD_Arrangement T, uint64_t imm64) {
assert(T != T1Q, "unsupported"); if (T == T1D || T == T2D) { int imm = operand_valid_for_movi_immediate(imm64, T); if (-1 != imm) {
movi(Vd, T, imm);
} else {
mov(rscratch1, imm64);
dup(Vd, T, rscratch1);
} return;
}
void MacroAssembler::mov_immediate64(Register dst, uint64_t imm64)
{ #ifndef PRODUCT
{ char buffer[64];
snprintf(buffer, sizeof(buffer), "0x%" PRIX64, imm64);
block_comment(buffer);
} #endif if (operand_valid_for_logical_immediate(false, imm64)) {
orr(dst, zr, imm64);
} else { // we can use a combination of MOVZ or MOVN with // MOVK to build up the constant
uint64_t imm_h[4]; int zero_count = 0; int neg_count = 0; int i; for (i = 0; i < 4; i++) {
imm_h[i] = ((imm64 >> (i * 16)) & 0xffffL); if (imm_h[i] == 0) {
zero_count++;
} elseif (imm_h[i] == 0xffffL) {
neg_count++;
}
} if (zero_count == 4) { // one MOVZ will do
movz(dst, 0);
} elseif (neg_count == 4) { // one MOVN will do
movn(dst, 0);
} elseif (zero_count == 3) { for (i = 0; i < 4; i++) { if (imm_h[i] != 0L) {
movz(dst, (uint32_t)imm_h[i], (i << 4)); break;
}
}
} elseif (neg_count == 3) { // one MOVN will do for (int i = 0; i < 4; i++) { if (imm_h[i] != 0xffffL) {
movn(dst, (uint32_t)imm_h[i] ^ 0xffffL, (i << 4)); break;
}
}
} elseif (zero_count == 2) { // one MOVZ and one MOVK will do for (i = 0; i < 3; i++) { if (imm_h[i] != 0L) {
movz(dst, (uint32_t)imm_h[i], (i << 4));
i++; break;
}
} for (;i < 4; i++) { if (imm_h[i] != 0L) {
movk(dst, (uint32_t)imm_h[i], (i << 4));
}
}
} elseif (neg_count == 2) { // one MOVN and one MOVK will do for (i = 0; i < 4; i++) { if (imm_h[i] != 0xffffL) {
movn(dst, (uint32_t)imm_h[i] ^ 0xffffL, (i << 4));
i++; break;
}
} for (;i < 4; i++) { if (imm_h[i] != 0xffffL) {
movk(dst, (uint32_t)imm_h[i], (i << 4));
}
}
} elseif (zero_count == 1) { // one MOVZ and two MOVKs will do for (i = 0; i < 4; i++) { if (imm_h[i] != 0L) {
movz(dst, (uint32_t)imm_h[i], (i << 4));
i++; break;
}
} for (;i < 4; i++) { if (imm_h[i] != 0x0L) {
movk(dst, (uint32_t)imm_h[i], (i << 4));
}
}
} elseif (neg_count == 1) { // one MOVN and two MOVKs will do for (i = 0; i < 4; i++) { if (imm_h[i] != 0xffffL) {
movn(dst, (uint32_t)imm_h[i] ^ 0xffffL, (i << 4));
i++; break;
}
} for (;i < 4; i++) { if (imm_h[i] != 0xffffL) {
movk(dst, (uint32_t)imm_h[i], (i << 4));
}
}
} else { // use a MOVZ and 3 MOVKs (makes it easier to debug)
movz(dst, (uint32_t)imm_h[0], 0); for (i = 1; i < 4; i++) {
movk(dst, (uint32_t)imm_h[i], (i << 4));
}
}
}
}
void MacroAssembler::mov_immediate32(Register dst, uint32_t imm32)
{ #ifndef PRODUCT
{ char buffer[64];
snprintf(buffer, sizeof(buffer), "0x%" PRIX32, imm32);
block_comment(buffer);
} #endif if (operand_valid_for_logical_immediate(true, imm32)) {
orrw(dst, zr, imm32);
} else { // we can use MOVZ, MOVN or two calls to MOVK to build up the // constant
uint32_t imm_h[2];
imm_h[0] = imm32 & 0xffff;
imm_h[1] = ((imm32 >> 16) & 0xffff); if (imm_h[0] == 0) {
movzw(dst, imm_h[1], 16);
} elseif (imm_h[0] == 0xffff) {
movnw(dst, imm_h[1] ^ 0xffff, 16);
} elseif (imm_h[1] == 0) {
movzw(dst, imm_h[0], 0);
} elseif (imm_h[1] == 0xffff) {
movnw(dst, imm_h[0] ^ 0xffff, 0);
} else { // use a MOVZ and MOVK (makes it easier to debug)
movzw(dst, imm_h[0], 0);
movkw(dst, imm_h[1], 16);
}
}
}
// Form an address from base + offset in Rd. Rd may or may // not actually be used: you must use the Address that is returned. // It is up to you to ensure that the shift provided matches the size // of your data.
Address MacroAssembler::form_address(Register Rd, Register base, int64_t byte_offset, int shift) { if (Address::offset_ok_for_immed(byte_offset, shift)) // It fits; no need for any heroics return Address(base, byte_offset);
// Don't do anything clever with negative or misaligned offsets unsigned mask = (1 << shift) - 1; if (byte_offset < 0 || byte_offset & mask) {
mov(Rd, byte_offset);
add(Rd, base, Rd); return Address(Rd);
}
// See if we can do this with two 12-bit offsets
{
uint64_t word_offset = byte_offset >> shift;
uint64_t masked_offset = word_offset & 0xfff000; if (Address::offset_ok_for_immed(word_offset - masked_offset, 0)
&& Assembler::operand_valid_for_add_sub_immediate(masked_offset << shift)) {
add(Rd, base, masked_offset << shift);
word_offset -= masked_offset; return Address(Rd, word_offset << shift);
}
}
// Do it the hard way
mov(Rd, byte_offset);
add(Rd, base, Rd); return Address(Rd);
}
int MacroAssembler::corrected_idivl(Register result, Register ra, Register rb, bool want_remainder, Register scratch)
{ // Full implementation of Java idiv and irem. The function // returns the (pc) offset of the div instruction - may be needed // for implicit exceptions. // // constraint : ra/rb =/= scratch // normal case // // input : ra: dividend // rb: divisor // // result: either // quotient (= ra idiv rb) // remainder (= ra irem rb)
int MacroAssembler::corrected_idivq(Register result, Register ra, Register rb, bool want_remainder, Register scratch)
{ // Full implementation of Java ldiv and lrem. The function // returns the (pc) offset of the div instruction - may be needed // for implicit exceptions. // // constraint : ra/rb =/= scratch // normal case // // input : ra: dividend // rb: divisor // // result: either // quotient (= ra idiv rb) // remainder (= ra irem rb)
void MacroAssembler::ldr(Register Rx, const Address &adr) { // We always try to merge two adjacent loads into one ldp. if (!try_merge_ldst(Rx, adr, 8, false)) {
Assembler::ldr(Rx, adr);
}
}
void MacroAssembler::ldrw(Register Rw, const Address &adr) { // We always try to merge two adjacent loads into one ldp. if (!try_merge_ldst(Rw, adr, 4, false)) {
Assembler::ldrw(Rw, adr);
}
}
void MacroAssembler::str(Register Rx, const Address &adr) { // We always try to merge two adjacent stores into one stp. if (!try_merge_ldst(Rx, adr, 8, true)) {
Assembler::str(Rx, adr);
}
}
void MacroAssembler::strw(Register Rw, const Address &adr) { // We always try to merge two adjacent stores into one stp. if (!try_merge_ldst(Rw, adr, 4, true)) {
Assembler::strw(Rw, adr);
}
}
// MacroAssembler routines found actually to be needed
// Note: load_unsigned_short used to be called load_unsigned_word. int MacroAssembler::load_unsigned_short(Register dst, Address src) { int off = offset();
ldrh(dst, src); return off;
}
int MacroAssembler::load_unsigned_byte(Register dst, Address src) { int off = offset();
ldrb(dst, src); return off;
}
int MacroAssembler::load_signed_short(Register dst, Address src) { int off = offset();
ldrsh(dst, src); return off;
}
int MacroAssembler::load_signed_byte(Register dst, Address src) { int off = offset();
ldrsb(dst, src); return off;
}
int MacroAssembler::load_signed_short32(Register dst, Address src) { int off = offset();
ldrshw(dst, src); return off;
}
int MacroAssembler::load_signed_byte32(Register dst, Address src) { int off = offset();
ldrsbw(dst, src); return off;
}
void MacroAssembler::incrementw(Address dst, int value)
{
assert(!dst.uses(rscratch1), "invalid dst for address increment"); if (dst.getMode() == Address::literal) {
assert(abs(value) < (1 << 12), "invalid value and address mode combination");
lea(rscratch2, dst);
dst = Address(rscratch2);
}
ldrw(rscratch1, dst);
incrementw(rscratch1, value);
strw(rscratch1, dst);
}
void MacroAssembler::increment(Address dst, int value)
{
assert(!dst.uses(rscratch1), "invalid dst for address increment"); if (dst.getMode() == Address::literal) {
assert(abs(value) < (1 << 12), "invalid value and address mode combination");
lea(rscratch2, dst);
dst = Address(rscratch2);
}
ldr(rscratch1, dst);
increment(rscratch1, value);
str(rscratch1, dst);
}
// Push lots of registers in the bit set supplied. Don't push sp. // Return the number of words pushed int MacroAssembler::push(unsignedint bitset, Register stack) { int words_pushed = 0;
// Scan bitset to accumulate register pairs unsignedchar regs[32]; int count = 0; for (int reg = 0; reg <= 30; reg++) { if (1 & bitset)
regs[count++] = reg;
bitset >>= 1;
}
regs[count++] = zr->raw_encoding();
count &= ~1; // Only push an even number of regs
if (count) {
stp(as_Register(regs[0]), as_Register(regs[1]),
Address(pre(stack, -count * wordSize)));
words_pushed += 2;
} for (int i = 2; i < count; i += 2) {
stp(as_Register(regs[i]), as_Register(regs[i+1]),
Address(stack, i * wordSize));
words_pushed += 2;
}
// Push lots of registers in the bit set supplied. Don't push sp. // Return the number of dwords pushed int MacroAssembler::push_fp(unsignedint bitset, Register stack) { int words_pushed = 0; bool use_sve = false; int sve_vector_size_in_bytes = 0;
// Return the number of dwords popped int MacroAssembler::pop_fp(unsignedint bitset, Register stack) { int words_pushed = 0; bool use_sve = false; int sve_vector_size_in_bytes = 0;
// Return the number of dwords pushed int MacroAssembler::push_p(unsignedint bitset, Register stack) { bool use_sve = false; int sve_predicate_size_in_slots = 0;
unsignedchar regs[PRegister::number_of_registers]; int count = 0; for (int reg = 0; reg < PRegister::number_of_registers; reg++) { if (1 & bitset)
regs[count++] = reg;
bitset >>= 1;
}
if (count == 0) { return 0;
}
int total_push_bytes = align_up(sve_predicate_size_in_slots *
VMRegImpl::stack_slot_size * count, 16);
sub(stack, stack, total_push_bytes); for (int i = 0; i < count; i++) {
sve_str(as_PRegister(regs[i]), Address(stack, i));
} return total_push_bytes / 8;
}
// Return the number of dwords popped int MacroAssembler::pop_p(unsignedint bitset, Register stack) { bool use_sve = false; int sve_predicate_size_in_slots = 0;
// this simulates the behaviour of the x86 cmpxchg instruction using a // load linked/store conditional pair. we use the acquire/release // versions of these instructions so that we flush pending writes as // per Java semantics.
// n.b the x86 version assumes the old value to be compared against is // in rax and updates rax with the value located in memory if the // cmpxchg fails. we supply a register for the old value explicitly
// the aarch64 load linked/store conditional instructions do not // accept an offset. so, unlike x86, we must provide a plain register // to identify the memory word to be compared/exchanged rather than a // register+offset Address.
void MacroAssembler::cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp,
Label &succeed, Label *fail) { // oldv holds comparison value // newv holds value to write in exchange // addr identifies memory word to compare against/update if (UseLSE) {
mov(tmp, oldv);
casal(Assembler::xword, oldv, newv, addr);
cmp(tmp, oldv);
br(Assembler::EQ, succeed);
membar(AnyAny);
} else {
Label retry_load, nope;
prfm(Address(addr), PSTL1STRM);
bind(retry_load); // flush and load exclusive from the memory location // and fail if it is not what we expect
ldaxr(tmp, addr);
cmp(tmp, oldv);
br(Assembler::NE, nope); // if we store+flush with no intervening write tmp will be zero
stlxr(tmp, newv, addr);
cbzw(tmp, succeed); // retry so we only ever return after a load fails to compare // ensures we don't return a stale value after a failed write.
b(retry_load); // if the memory word differs we return it in oldv and signal a fail
bind(nope);
membar(AnyAny);
mov(oldv, tmp);
} if (fail)
b(*fail);
}
void MacroAssembler::cmpxchgw(Register oldv, Register newv, Register addr, Register tmp,
Label &succeed, Label *fail) { // oldv holds comparison value // newv holds value to write in exchange // addr identifies memory word to compare against/update // tmp returns 0/1 for success/failure if (UseLSE) {
mov(tmp, oldv);
casal(Assembler::word, oldv, newv, addr);
cmp(tmp, oldv);
br(Assembler::EQ, succeed);
membar(AnyAny);
} else {
Label retry_load, nope;
prfm(Address(addr), PSTL1STRM);
bind(retry_load); // flush and load exclusive from the memory location // and fail if it is not what we expect
ldaxrw(tmp, addr);
cmp(tmp, oldv);
br(Assembler::NE, nope); // if we store+flush with no intervening write tmp will be zero
stlxrw(tmp, newv, addr);
cbzw(tmp, succeed); // retry so we only ever return after a load fails to compare // ensures we don't return a stale value after a failed write.
b(retry_load); // if the memory word differs we return it in oldv and signal a fail
bind(nope);
membar(AnyAny);
mov(oldv, tmp);
} if (fail)
b(*fail);
}
// A generic CAS; success or failure is in the EQ flag. A weak CAS // doesn't retry and may fail spuriously. If the oldval is wanted, // Pass a register for the result, otherwise pass noreg.
staticbool different(Register a, RegisterOrConstant b, Register c) { if (b.is_constant()) return a != c; else return a != b.as_register() && a != c && b.as_register() != c;
}
void MacroAssembler::push_call_clobbered_registers_except(RegSet exclude) { int step = 4 * wordSize;
push(call_clobbered_gp_registers() - exclude, sp);
sub(sp, sp, step);
mov(rscratch1, -step); // Push v0-v7, v16-v31. for (int i = 31; i>= 4; i -= 4) { if (i <= v7->encoding() || i >= v16->encoding())
st1(as_FloatRegister(i-3), as_FloatRegister(i-2), as_FloatRegister(i-1),
as_FloatRegister(i), T1D, Address(post(sp, rscratch1)));
}
st1(as_FloatRegister(0), as_FloatRegister(1), as_FloatRegister(2),
as_FloatRegister(3), T1D, Address(sp));
}
void MacroAssembler::pop_call_clobbered_registers_except(RegSet exclude) { for (int i = 0; i < 32; i += 4) { if (i <= v7->encoding() || i >= v16->encoding())
ld1(as_FloatRegister(i), as_FloatRegister(i+1), as_FloatRegister(i+2),
as_FloatRegister(i+3), T1D, Address(post(sp, 4 * wordSize)));
}
void MacroAssembler::push_CPU_state(bool save_vectors, bool use_sve, int sve_vector_size_in_bytes, int total_predicate_in_bytes) {
push(RegSet::range(r0, r29), sp); // integer registers except lr & sp if (save_vectors && use_sve && sve_vector_size_in_bytes > 16) {
sub(sp, sp, sve_vector_size_in_bytes * FloatRegister::number_of_registers); for (int i = 0; i < FloatRegister::number_of_registers; i++) {
sve_str(as_FloatRegister(i), Address(sp, i));
}
} else { int step = (save_vectors ? 8 : 4) * wordSize;
mov(rscratch1, -step);
sub(sp, sp, step); for (int i = 28; i >= 4; i -= 4) {
st1(as_FloatRegister(i), as_FloatRegister(i+1), as_FloatRegister(i+2),
as_FloatRegister(i+3), save_vectors ? T2D : T1D, Address(post(sp, rscratch1)));
}
st1(v0, v1, v2, v3, save_vectors ? T2D : T1D, sp);
} if (save_vectors && use_sve && total_predicate_in_bytes > 0) {
sub(sp, sp, total_predicate_in_bytes); for (int i = 0; i < PRegister::number_of_registers; i++) {
sve_str(as_PRegister(i), Address(sp, i));
}
}
}
void MacroAssembler::pop_CPU_state(bool restore_vectors, bool use_sve, int sve_vector_size_in_bytes, int total_predicate_in_bytes) { if (restore_vectors && use_sve && total_predicate_in_bytes > 0) { for (int i = PRegister::number_of_registers - 1; i >= 0; i--) {
sve_ldr(as_PRegister(i), Address(sp, i));
}
add(sp, sp, total_predicate_in_bytes);
} if (restore_vectors && use_sve && sve_vector_size_in_bytes > 16) { for (int i = FloatRegister::number_of_registers - 1; i >= 0; i--) {
sve_ldr(as_FloatRegister(i), Address(sp, i));
}
add(sp, sp, sve_vector_size_in_bytes * FloatRegister::number_of_registers);
} else { int step = (restore_vectors ? 8 : 4) * wordSize; for (int i = 0; i <= 28; i += 4)
ld1(as_FloatRegister(i), as_FloatRegister(i+1), as_FloatRegister(i+2),
as_FloatRegister(i+3), restore_vectors ? T2D : T1D, Address(post(sp, step)));
}
// We may use predicate registers and rely on ptrue with SVE, // regardless of wide vector (> 8 bytes) used or not. if (use_sve) {
reinitialize_ptrue();
}
// Generate an address from (r + r1 extend offset). "size" is the // size of the operand. The result may be in rscratch2.
Address MacroAssembler::offsetted_address(Register r, Register r1,
Address::extend ext, int offset, int size) { if (offset || (ext.shift() % size != 0)) {
lea(rscratch2, Address(r, r1, ext)); return Address(rscratch2, offset);
} else { return Address(r, r1, ext);
}
}
Address MacroAssembler::spill_address(int size, int offset, Register tmp)
{
assert(offset >= 0, "spill to negative address?"); // Offset reachable ? // Not aligned - 9 bits signed offset // Aligned - 12 bits unsigned offset shifted Register base = sp; if ((offset & (size-1)) && offset >= (1<<8)) {
add(tmp, base, offset & ((1<<12)-1));
base = tmp;
offset &= -1u<<12;
}
if (offset >= (1<<12) * size) {
add(tmp, base, offset & (((1<<12)-1)<<12));
base = tmp;
offset &= ~(((1<<12)-1)<<12);
}
return Address(base, offset);
}
Address MacroAssembler::sve_spill_address(int sve_reg_size_in_bytes, int offset, Register tmp) {
assert(offset >= 0, "spill to negative address?");
Register base = sp;
// An immediate offset in the range 0 to 255 which is multiplied // by the current vector or predicate register size in bytes. if (offset % sve_reg_size_in_bytes == 0 && offset < ((1<<8)*sve_reg_size_in_bytes)) { return Address(base, offset / sve_reg_size_in_bytes);
}
add(tmp, base, offset); return Address(tmp);
}
// Checks whether offset is aligned. // Returns true if it is, else false. bool MacroAssembler::merge_alignment_check(Register base,
size_t size,
int64_t cur_offset,
int64_t prev_offset) const { if (AvoidUnalignedAccesses) { if (base == sp) { // Checks whether low offset if aligned to pair of registers.
int64_t pair_mask = size * 2 - 1;
int64_t offset = prev_offset > cur_offset ? cur_offset : prev_offset; return (offset & pair_mask) == 0;
} else { // If base is not sp, we can't guarantee the access is aligned. returnfalse;
}
} else {
int64_t mask = size - 1; // Load/store pair instruction only supports element size aligned offset. return (cur_offset & mask) == 0 && (prev_offset & mask) == 0;
}
}
// Checks whether current and previous loads/stores can be merged. // Returns true if it can be merged, else false. bool MacroAssembler::ldst_can_merge(Register rt, const Address &adr,
size_t cur_size_in_bytes, bool is_store) const {
address prev = pc() - NativeInstruction::instruction_size;
address last = code()->last_insn();
if (last == NULL || !nativeInstruction_at(last)->is_Imm_LdSt()) { returnfalse;
}
// Following cases can not be merged: // ldr x2, [x2, #8] // ldr x3, [x2, #16] // or: // ldr x2, [x3, #8] // ldr x2, [x3, #16] // If t1 and t2 is the same in "ldp t1, t2, [xn, #imm]", we'll get SIGILL. if (!is_store && (adr.base() == prev_ldst->target() || rt == prev_ldst->target())) { returnfalse;
}
int64_t low_offset = prev_offset > cur_offset ? cur_offset : prev_offset; // Offset range must be in ldp/stp instruction's range. if (low_offset > max_offset || low_offset < min_offset) { returnfalse;
}
if (merge_alignment_check(adr.base(), prev_size_in_bytes, cur_offset, prev_offset)) { returntrue;
}
returnfalse;
}
// Merge current load/store with previous load/store into ldp/stp. void MacroAssembler::merge_ldst(Register rt, const Address &adr,
size_t cur_size_in_bytes, bool is_store) {
assert(ldst_can_merge(rt, adr, cur_size_in_bytes, is_store) == true, "cur and prev must be able to be merged.");
// AArch64 has a multiply-accumulate instruction that we can't use // here because it has no way to process carries, so we have to use // separate add and adc instructions. Bah.
umulh(rscratch1, x_xstart, y_idx); // x_xstart * y_idx -> rscratch1:product
mul(product, x_xstart, y_idx);
adds(product, product, carry);
adc(carry, rscratch1, zr); // x_xstart * y_idx + carry -> carry:product
subw(kdx, kdx, 2);
ror(product, product, 32); // back to big-endian
str(product, offsetted_address(z, kdx, Address::uxtw(LogBytesPerInt), 0, BytesPerLong));
// A null weak handle resolves to null.
cbz(result, resolved);
// Only 64 bit platforms support GCs that require a tmp register // WeakHandle::resolve is an indirection like jweak.
access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF,
result, Address(result), tmp1, tmp2);
bind(resolved);
}
void MacroAssembler::store_klass(Register dst, Register src) { // FIXME: Should this be a store release? concurrent gcs assumes // klass length is valid if klass field is not null. if (UseCompressedClassPointers) {
encode_klass_not_null(src);
strw(src, Address(dst, oopDesc::klass_offset_in_bytes()));
} else {
str(src, Address(dst, oopDesc::klass_offset_in_bytes()));
}
}
void MacroAssembler::store_klass_gap(Register dst, Register src) { if (UseCompressedClassPointers) { // Store to klass gap in destination
strw(src, Address(dst, oopDesc::klass_gap_offset_in_bytes()));
}
}
/* Old algorithm: is this any worse? Label nonnull; cbnz(r, nonnull); sub(r, r, rheapbase); bind(nonnull); lsr(r, r, LogMinObjAlignmentInBytes);
*/
}
}
void MacroAssembler::encode_heap_oop_not_null(Register r) { #ifdef ASSERT
verify_heapbase("MacroAssembler::encode_heap_oop_not_null: heap base corrupted?"); if (CheckCompressedOops) {
Label ok;
cbnz(r, ok);
stop("null oop passed to encode_heap_oop_not_null");
bind(ok);
} #endif
verify_oop_msg(r, "broken oop in encode_heap_oop_not_null"); if (CompressedOops::base() != NULL) {
sub(r, r, rheapbase);
} if (CompressedOops::shift() != 0) {
assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong");
lsr(r, r, LogMinObjAlignmentInBytes);
}
}
void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) { #ifdef ASSERT
verify_heapbase("MacroAssembler::encode_heap_oop_not_null2: heap base corrupted?"); if (CheckCompressedOops) {
Label ok;
cbnz(src, ok);
stop("null oop passed to encode_heap_oop_not_null2");
bind(ok);
} #endif
verify_oop_msg(src, "broken oop in encode_heap_oop_not_null2");
Register data = src; if (CompressedOops::base() != NULL) {
sub(dst, src, rheapbase);
data = dst;
} if (CompressedOops::shift() != 0) {
assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong");
lsr(dst, data, LogMinObjAlignmentInBytes);
data = dst;
} if (data == src)
mov(dst, src);
}
void MacroAssembler::decode_heap_oop(Register d, Register s) { #ifdef ASSERT
verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?"); #endif if (CompressedOops::base() == NULL) { if (CompressedOops::shift() != 0 || d != s) {
lsl(d, s, CompressedOops::shift());
}
} else {
Label done; if (d != s)
mov(d, s);
cbz(s, done);
add(d, rheapbase, s, Assembler::LSL, LogMinObjAlignmentInBytes);
bind(done);
}
verify_oop_msg(d, "broken oop in decode_heap_oop");
}
void MacroAssembler::decode_heap_oop_not_null(Register r) {
assert (UseCompressedOops, "should only be used for compressed headers");
assert (Universe::heap() != NULL, "java heap should be initialized"); // Cannot assert, unverified entry point counts instructions (see .ad file) // vtableStubs also counts instructions in pd_code_size_limit. // Also do not verify_oop as this is called by verify_oop. if (CompressedOops::shift() != 0) {
assert(LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); if (CompressedOops::base() != NULL) {
add(r, rheapbase, r, Assembler::LSL, LogMinObjAlignmentInBytes);
} else {
add(r, zr, r, Assembler::LSL, LogMinObjAlignmentInBytes);
}
} else {
assert (CompressedOops::base() == NULL, "sanity");
}
}
void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
assert (UseCompressedOops, "should only be used for compressed headers");
assert (Universe::heap() != NULL, "java heap should be initialized"); // Cannot assert, unverified entry point counts instructions (see .ad file) // vtableStubs also counts instructions in pd_code_size_limit. // Also do not verify_oop as this is called by verify_oop. if (CompressedOops::shift() != 0) {
assert(LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); if (CompressedOops::base() != NULL) {
add(dst, rheapbase, src, Assembler::LSL, LogMinObjAlignmentInBytes);
} else {
add(dst, zr, src, Assembler::LSL, LogMinObjAlignmentInBytes);
}
} else {
assert (CompressedOops::base() == NULL, "sanity"); if (dst != src) {
mov(dst, src);
}
}
}
MacroAssembler::KlassDecodeMode MacroAssembler::klass_decode_mode() {
assert(UseCompressedClassPointers, "not using compressed class pointers");
assert(Metaspace::initialized(), "metaspace not initialized yet");
if (_klass_decode_mode != KlassDecodeNone) { return _klass_decode_mode;
}
void MacroAssembler::decode_klass_not_null(Register dst, Register src) {
assert (UseCompressedClassPointers, "should only be used for compressed headers");
switch (klass_decode_mode()) { case KlassDecodeZero: if (CompressedKlassPointers::shift() != 0) {
lsl(dst, src, LogKlassAlignmentInBytes);
} else { if (dst != src) mov(dst, src);
} break;
void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { #ifdef ASSERT
{
ThreadInVMfromUnknown tiv;
assert (UseCompressedOops, "should only be used for compressed oops");
assert (Universe::heap() != NULL, "java heap should be initialized");
assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
assert(Universe::heap()->is_in(JNIHandles::resolve(obj)), "should be real oop");
} #endif int oop_index = oop_recorder()->find_index(obj);
InstructionMark im(this);
RelocationHolder rspec = oop_Relocation::spec(oop_index);
code_section()->relocate(inst_mark(), rspec);
movz(dst, 0xDEAD, 16);
movk(dst, 0xBEEF);
}
void MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
assert (UseCompressedClassPointers, "should only be used for compressed headers");
assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); int index = oop_recorder()->find_index(k);
assert(! Universe::heap()->is_in(k), "should not be an oop");
// Writes to stack successive pages until offset reached to check for // stack overflow + shadow pages. This clobbers tmp. void MacroAssembler::bang_stack_size(Register size, Register tmp) {
assert_different_registers(tmp, size, rscratch1);
mov(tmp, sp); // Bang stack for total size given plus shadow page size. // Bang one page at a time because large size can bang beyond yellow and // red zones.
Label loop;
mov(rscratch1, os::vm_page_size());
bind(loop);
lea(tmp, Address(tmp, -os::vm_page_size()));
subsw(size, size, rscratch1);
str(size, Address(tmp));
br(Assembler::GT, loop);
// Bang down shadow pages too. // At this point, (tmp-0) is the last address touched, so don't // touch it again. (It was touched as (tmp-pagesize) but then tmp // was post-decremented.) Skip this address by starting at i=1, and // touch a few more pages below. N.B. It is important to touch all // the way down to and including i=StackShadowPages. for (int i = 0; i < (int)(StackOverflow::stack_shadow_zone_size() / os::vm_page_size()) - 1; i++) { // this could be any sized move but this is can be a debugging crumb // so the bigger the better.
lea(tmp, Address(tmp, -os::vm_page_size()));
str(size, Address(tmp));
}
}
// Move the address of the polling page into dest. void MacroAssembler::get_polling_page(Register dest, relocInfo::relocType rtype) {
ldr(dest, Address(rthread, JavaThread::polling_page_offset()));
}
// Read the polling page. The address of the polling page must // already be in r.
address MacroAssembler::read_polling_page(Register r, relocInfo::relocType rtype) {
address mark;
{
InstructionMark im(this);
code_section()->relocate(inst_mark(), rtype);
ldrw(zr, Address(r, 0));
mark = inst_mark();
}
verify_cross_modify_fence_not_required(); return mark;
}
assert(is_valid_AArch64_address(dest.target()), "bad address");
assert(dest.getMode() == Address::literal, "ADRP must be applied to a literal address");
InstructionMark im(this);
code_section()->relocate(inst_mark(), dest.rspec()); // 8143067: Ensure that the adrp can reach the dest from anywhere within // the code cache so that if it is relocated we know it will still reach if (offset_high >= -(1<<20) && offset_low < (1<<20)) {
_adrp(reg1, dest.target());
} else {
uint64_t target = (uint64_t)dest.target();
uint64_t adrp_target
= (target & 0xffffffffULL) | ((uint64_t)pc() & 0xffff00000000ULL);
// Strictly speaking the byte_map_base isn't an address at all, and it might // even be negative. It is thus materialised as a constant.
mov(reg, (uint64_t)byte_map_base);
}
// This method counts leading positive bytes (highest bit not set) in provided byte array
address MacroAssembler::count_positives(Register ary1, Register len, Register result) { // Simple and most common case of aligned small array which is not at the // end of memory page is placed here. All other cases are in stub.
Label LOOP, END, STUB, STUB_LONG, SET_RESULT, DONE; const uint64_t UPPER_BIT_MASK=0x8080808080808080;
assert_different_registers(ary1, len, result);
mov(result, len);
cmpw(len, 0);
br(LE, DONE);
cmpw(len, 4 * wordSize);
br(GE, STUB_LONG); // size > 32 then go to stub
int shift = 64 - exact_log2(os::vm_page_size());
lsl(rscratch1, ary1, shift);
mov(rscratch2, (size_t)(4 * wordSize) << shift);
adds(rscratch2, rscratch1, rscratch2); // At end of page?
br(CS, STUB); // at the end of page then go to stub
subs(len, len, wordSize);
br(LT, END);
// if (a1 == a2) // return true;
cmpoop(a1, a2); // May have read barriers for a1 and a2.
br(EQ, SAME);
if (UseSimpleArrayEquals) {
Label NEXT_WORD, SHORT, TAIL03, TAIL01, A_MIGHT_BE_NULL, A_IS_NOT_NULL; // if (a1 == null || a2 == null) // return false; // a1 & a2 == 0 means (some-pointer is null) or // (very-rare-or-even-probably-impossible-pointer-values) // so, we can save one branch in most cases
tst(a1, a2);
mov(result, false);
br(EQ, A_MIGHT_BE_NULL); // if (a1.length != a2.length) // return false;
bind(A_IS_NOT_NULL);
ldrw(cnt1, Address(a1, length_offset));
ldrw(cnt2, Address(a2, length_offset));
eorw(tmp5, cnt1, cnt2);
cbnzw(tmp5, DONE);
lea(a1, Address(a1, base_offset));
lea(a2, Address(a2, base_offset)); // Check for short strings, i.e. smaller than wordSize.
subs(cnt1, cnt1, elem_per_word);
br(Assembler::LT, SHORT); // Main 8 byte comparison loop.
bind(NEXT_WORD); {
ldr(tmp1, Address(post(a1, wordSize)));
ldr(tmp2, Address(post(a2, wordSize)));
subs(cnt1, cnt1, elem_per_word);
eor(tmp5, tmp1, tmp2);
cbnz(tmp5, DONE);
} br(GT, NEXT_WORD); // Last longword. In the case where length == 4 we compare the // same longword twice, but that's still faster than another // conditional branch. // cnt1 could be 0, -1, -2, -3, -4 for chars; -4 only happens when // length == 4. if (log_elem_size > 0)
lsl(cnt1, cnt1, log_elem_size);
ldr(tmp3, Address(a1, cnt1));
ldr(tmp4, Address(a2, cnt1));
eor(tmp5, tmp3, tmp4);
cbnz(tmp5, DONE);
b(SAME);
bind(A_MIGHT_BE_NULL); // in case both a1 and a2 are not-null, proceed with loads
cbz(a1, DONE);
cbz(a2, DONE);
b(A_IS_NOT_NULL);
bind(SHORT);
tbz(cnt1, 2 - log_elem_size, TAIL03); // 0-7 bytes left.
{
ldrw(tmp1, Address(post(a1, 4)));
ldrw(tmp2, Address(post(a2, 4)));
eorw(tmp5, tmp1, tmp2);
cbnzw(tmp5, DONE);
}
bind(TAIL03);
tbz(cnt1, 1 - log_elem_size, TAIL01); // 0-3 bytes left.
{
ldrh(tmp3, Address(post(a1, 2)));
ldrh(tmp4, Address(post(a2, 2)));
eorw(tmp5, tmp3, tmp4);
cbnzw(tmp5, DONE);
}
bind(TAIL01); if (elem_size == 1) { // Only needed when comparing byte arrays.
tbz(cnt1, 0, SAME); // 0-1 bytes left.
{
ldrb(tmp1, a1);
ldrb(tmp2, a2);
eorw(tmp5, tmp1, tmp2);
cbnzw(tmp5, DONE);
}
}
} else {
Label NEXT_DWORD, SHORT, TAIL, TAIL2, STUB,
CSET_EQ, LAST_CHECK;
mov(result, false);
cbz(a1, DONE);
ldrw(cnt1, Address(a1, length_offset));
cbz(a2, DONE);
ldrw(cnt2, Address(a2, length_offset)); // on most CPUs a2 is still "locked"(surprisingly) in ldrw and it's // faster to perform another branch before comparing a1 and a2
cmp(cnt1, (u1)elem_per_word);
br(LE, SHORT); // short or same
ldr(tmp3, Address(pre(a1, base_offset)));
subs(zr, cnt1, stubBytesThreshold);
br(GE, STUB);
ldr(tmp4, Address(pre(a2, base_offset)));
sub(tmp5, zr, cnt1, LSL, 3 + log_elem_size);
cmp(cnt2, cnt1);
br(NE, DONE);
// For Strings we're passed the address of the first characters in a1 // and a2 and the length in cnt1. // elem_size is the element size in bytes: either 1 or 2. // There are two implementations. For arrays >= 8 bytes, all // comparisons (including the final one, which may overlap) are // performed 8 bytes at a time. For strings < 8 bytes, we compare a // halfword, then a short, and then a byte.
void MacroAssembler::string_equals(Register a1, Register a2, Register result, Register cnt1, int elem_size)
{
Label SAME, DONE, SHORT, NEXT_WORD; Register tmp1 = rscratch1; Register tmp2 = rscratch2; Register cnt2 = tmp2; // cnt2 only used in array length compare
assert(elem_size == 1 || elem_size == 2, "must be 2 or 1 byte");
assert_different_registers(a1, a2, result, cnt1, rscratch1, rscratch2);
// Check for short strings, i.e. smaller than wordSize.
subs(cnt1, cnt1, wordSize);
br(Assembler::LT, SHORT); // Main 8 byte comparison loop.
bind(NEXT_WORD); {
ldr(tmp1, Address(post(a1, wordSize)));
ldr(tmp2, Address(post(a2, wordSize)));
subs(cnt1, cnt1, wordSize);
eor(tmp1, tmp1, tmp2);
cbnz(tmp1, DONE);
} br(GT, NEXT_WORD); // Last longword. In the case where length == 4 we compare the // same longword twice, but that's still faster than another // conditional branch. // cnt1 could be 0, -1, -2, -3, -4 for chars; -4 only happens when // length == 4.
ldr(tmp1, Address(a1, cnt1));
ldr(tmp2, Address(a2, cnt1));
eor(tmp2, tmp1, tmp2);
cbnz(tmp2, DONE);
b(SAME);
// That's it.
bind(DONE);
BLOCK_COMMENT("} string_equals");
}
// The size of the blocks erased by the zero_blocks stub. We must // handle anything smaller than this ourselves in zero_words(). constint MacroAssembler::zero_words_block_size = 8;
// zero_words() is used by C2 ClearArray patterns and by // C1_MacroAssembler. It is as small as possible, handling small word // counts locally and delegating anything larger to the zero_blocks // stub. It is expanded many times in compiled code, so it is // important to keep it short.
// ptr: Address of a buffer to be zeroed. // cnt: Count in HeapWords. // // ptr, cnt, rscratch1, and rscratch2 are clobbered.
address MacroAssembler::zero_words(Register ptr, Register cnt)
{
assert(is_power_of_2(zero_words_block_size), "adjust this");
BLOCK_COMMENT("zero_words {");
assert(ptr == r10 && cnt == r11, "mismatch in register usage");
RuntimeAddress zero_blocks = RuntimeAddress(StubRoutines::aarch64::zero_blocks());
assert(zero_blocks.target() != NULL, "zero_blocks stub has not been generated");
subs(rscratch1, cnt, zero_words_block_size);
Label around;
br(LO, around);
{
RuntimeAddress zero_blocks = RuntimeAddress(StubRoutines::aarch64::zero_blocks());
assert(zero_blocks.target() != NULL, "zero_blocks stub has not been generated"); // Make sure this is a C2 compilation. C1 allocates space only for // trampoline stubs generated by Call LIR ops, and in any case it // makes sense for a C1 compilation task to proceed as quickly as // possible.
CompileTask* task; if (StubRoutines::aarch64::complete()
&& Thread::current()->is_Compiler_thread()
&& (task = ciEnv::current()->task())
&& is_c2_compile(task->comp_level())) {
address tpc = trampoline_call(zero_blocks); if (tpc == NULL) {
DEBUG_ONLY(reset_labels(around)); return NULL;
}
} else {
far_call(zero_blocks);
}
}
bind(around);
// We have a few words left to do. zero_blocks has adjusted r10 and r11 // for us. for (int i = zero_words_block_size >> 1; i > 1; i >>= 1) {
Label l;
tbz(cnt, exact_log2(i), l); for (int j = 0; j < i; j += 2) {
stp(zr, zr, post(ptr, 2 * BytesPerWord));
}
bind(l);
}
{
Label l;
tbz(cnt, 0, l);
str(zr, Address(ptr));
bind(l);
}
BLOCK_COMMENT("} zero_words"); return pc();
}
// base: Address of a buffer to be zeroed, 8 bytes aligned. // cnt: Immediate count in HeapWords. // // r10, r11, rscratch1, and rscratch2 are clobbered.
address MacroAssembler::zero_words(Register base, uint64_t cnt)
{
assert(wordSize <= BlockZeroingLowLimit, "increase BlockZeroingLowLimit");
address result = nullptr; if (cnt <= (uint64_t)BlockZeroingLowLimit / BytesPerWord) { #ifndef PRODUCT
{ char buf[64];
snprintf(buf, sizeof buf, "zero_words (count = %" PRIu64 ") {", cnt);
BLOCK_COMMENT(buf);
} #endif if (cnt >= 16) {
uint64_t loops = cnt/16; if (loops > 1) {
mov(rscratch2, loops - 1);
}
{
Label loop;
bind(loop); for (int i = 0; i < 16; i += 2) {
stp(zr, zr, Address(base, i * BytesPerWord));
}
add(base, base, 16 * BytesPerWord); if (loops > 1) {
subs(rscratch2, rscratch2, 1);
br(GE, loop);
}
}
}
cnt %= 16; int i = cnt & 1; // store any odd word to start if (i) str(zr, Address(base)); for (; i < (int)cnt; i += 2) {
stp(zr, zr, Address(base, i * wordSize));
}
BLOCK_COMMENT("} zero_words");
result = pc();
} else {
mov(r10, base); mov(r11, cnt);
result = zero_words(r10, r11);
} return result;
}
// Zero blocks of memory by using DC ZVA. // // Aligns the base address first sufficiently for DC ZVA, then uses // DC ZVA repeatedly for every full block. cnt is the size to be // zeroed in HeapWords. Returns the count of words left to be zeroed // in cnt. // // NOTE: This is intended to be used in the zero_blocks() stub. If // you want to use it elsewhere, note that cnt must be >= 2*zva_length. void MacroAssembler::zero_dcache_blocks(Register base, Register cnt) { Register tmp = rscratch1; Register tmp2 = rscratch2; int zva_length = VM_Version::zva_length();
Label initial_table_end, loop_zva;
Label fini;
// Base must be 16 byte aligned. If not just return and let caller handle it
tst(base, 0x0f);
br(Assembler::NE, fini); // Align base with ZVA length.
neg(tmp, base);
andr(tmp, tmp, zva_length - 1);
// tmp: the number of bytes to be filled to align the base with ZVA length.
add(base, base, tmp);
sub(cnt, cnt, tmp, Assembler::ASR, 3);
adr(tmp2, initial_table_end);
sub(tmp2, tmp2, tmp, Assembler::LSR, 2);
br(tmp2);
for (int i = -zva_length + 16; i < 0; i += 16)
stp(zr, zr, Address(base, i));
bind(initial_table_end);
// Intrinsic for // // - sun/nio/cs/ISO_8859_1$Encoder.implEncodeISOArray // return the number of characters copied. // - java/lang/StringUTF16.compress // return zero (0) if copy fails, otherwise 'len'. // // This version always returns the number of characters copied, and does not // clobber the 'len' register. A successful copy will complete with the post- // condition: 'res' == 'len', while an unsuccessful copy will exit with the // post-condition: 0 <= 'res' < 'len'. // // NOTE: Attempts to use 'ld2' (and 'umaxv' in the ISO part) has proven to // degrade performance (on Ampere Altra - Neoverse N1), to an extent // beyond the acceptable, even though the footprint would be smaller. // Using 'umaxv' in the ASCII-case comes with a small penalty but does // avoid additional bloat. // void MacroAssembler::encode_iso_array(Register src, Register dst, Register len, Register res, bool ascii,
FloatRegister vtmp0, FloatRegister vtmp1,
FloatRegister vtmp2, FloatRegister vtmp3)
{ Register cnt = res; Register max = rscratch1; Register chk = rscratch2;
prfm(Address(src), PLDL1STRM);
movw(cnt, len);
#define ASCII(insn) do { if (ascii) { insn; } } while (0)
// java.math.round(double a) // Returns the closest long to the argument, with ties rounding to // positive infinity. This requires some fiddling for corner // cases. We take care to avoid double rounding in e.g. (jlong)(a + 0.5). void MacroAssembler::java_round_double(Register dst, FloatRegister src,
FloatRegister ftmp) {
Label DONE;
BLOCK_COMMENT("java_round_double: { ");
fmovd(rscratch1, src); // Use RoundToNearestTiesAway unless src small and -ve.
fcvtasd(dst, src); // Test if src >= 0 || abs(src) >= 0x1.0p52
eor(rscratch1, rscratch1, UCONST64(1) << 63); // flip sign bit
mov(rscratch2, julong_cast(0x1.0p52));
cmp(rscratch1, rscratch2);
br(HS, DONE); { // src < 0 && abs(src) < 0x1.0p52 // src may have a fractional part, so add 0.5
fmovd(ftmp, 0.5);
faddd(ftmp, src, ftmp); // Convert double to jlong, use RoundTowardsNegative
fcvtmsd(dst, ftmp);
}
bind(DONE);
BLOCK_COMMENT("} java_round_double");
}
void MacroAssembler::java_round_float(Register dst, FloatRegister src,
FloatRegister ftmp) {
Label DONE;
BLOCK_COMMENT("java_round_float: { ");
fmovs(rscratch1, src); // Use RoundToNearestTiesAway unless src small and -ve.
fcvtassw(dst, src); // Test if src >= 0 || abs(src) >= 0x1.0p23
eor(rscratch1, rscratch1, 0x80000000); // flip sign bit
mov(rscratch2, jint_cast(0x1.0p23f));
cmp(rscratch1, rscratch2);
br(HS, DONE); { // src < 0 && |src| < 0x1.0p23 // src may have a fractional part, so add 0.5
fmovs(ftmp, 0.5f);
fadds(ftmp, src, ftmp); // Convert float to jint, use RoundTowardsNegative
fcvtmssw(dst, ftmp);
}
bind(DONE);
BLOCK_COMMENT("} java_round_float");
}
// get_thread() can be called anywhere inside generated code so we // need to save whatever non-callee save context might get clobbered // by the call to JavaThread::aarch64_get_thread_helper() or, indeed, // the call setup code. // // On Linux, aarch64_get_thread_helper() clobbers only r0, r1, and flags. // On other systems, the helper is a usual C function. // void MacroAssembler::get_thread(Register dst) {
RegSet saved_regs =
LINUX_ONLY(RegSet::range(r0, r1) + lr - dst)
NOT_LINUX (RegSet::range(r0, r17) + lr - dst);
void MacroAssembler::cache_wb(Address line) {
assert(line.getMode() == Address::base_plus_offset, "mode should be base_plus_offset");
assert(line.index() == noreg, "index should be noreg");
assert(line.offset() == 0, "offset should be 0"); // would like to assert this // assert(line._ext.shift == 0, "shift should be zero"); if (VM_Version::supports_dcpop()) { // writeback using clear virtual address to point of persistence
dc(Assembler::CVAP, line.base());
} else { // no need to generate anything as Unsafe.writebackMemory should // never invoke this stub
}
}
void MacroAssembler::cache_wbsync(bool is_pre) { // we only need a barrier post sync if (!is_pre) {
membar(Assembler::AnyAny);
}
}
void MacroAssembler::verify_sve_vector_length(Register tmp) { // Make sure that native code does not change SVE vector length. if (!UseSVE) return;
Label verify_ok;
movw(tmp, zr);
sve_inc(tmp, B);
subsw(zr, tmp, VM_Version::get_initial_sve_vector_length());
br(EQ, verify_ok);
stop("Error: SVE vector length has changed since jvm startup");
bind(verify_ok);
}
void MacroAssembler::verify_ptrue() {
Label verify_ok; if (!UseSVE) { return;
}
sve_cntp(rscratch1, B, ptrue, ptrue); // get true elements count.
sve_dec(rscratch1, B);
cbz(rscratch1, verify_ok);
stop("Error: the preserved predicate register (p7) elements are not all true");
bind(verify_ok);
}
void MacroAssembler::safepoint_isb() {
isb(); #ifndef PRODUCT if (VerifyCrossModifyFence) { // Clear the thread state.
strb(zr, Address(rthread, in_bytes(JavaThread::requires_cross_modify_fence_offset())));
} #endif
}
#ifndef PRODUCT void MacroAssembler::verify_cross_modify_fence_not_required() { if (VerifyCrossModifyFence) { // Check if thread needs a cross modify fence.
ldrb(rscratch1, Address(rthread, in_bytes(JavaThread::requires_cross_modify_fence_offset())));
Label fence_not_required;
cbz(rscratch1, fence_not_required); // If it does then fail.
lea(rscratch1, CAST_FROM_FN_PTR(address, JavaThread::verify_cross_modify_fence_failure));
mov(c_rarg0, rthread);
blr(rscratch1);
bind(fence_not_required);
}
} #endif
void MacroAssembler::spin_wait() { for (int i = 0; i < VM_Version::spin_wait_desc().inst_count(); ++i) { switch (VM_Version::spin_wait_desc().inst()) { case SpinWait::NOP:
nop(); break; case SpinWait::ISB:
isb(); break; case SpinWait::YIELD:
yield(); break; default:
ShouldNotReachHere();
}
}
}
// Stack frame creation/removal
void MacroAssembler::enter(bool strip_ret_addr) { if (strip_ret_addr) { // Addresses can only be signed once. If there are multiple nested frames being created // in the same function, then the return address needs stripping first.
strip_return_address();
}
protect_return_address();
stp(rfp, lr, Address(pre(sp, -2 * wordSize)));
mov(rfp, sp);
}
// ROP Protection // Use the AArch64 PAC feature to add ROP protection for generated code. Use whenever creating/ // destroying stack frames or whenever directly loading/storing the LR to memory. // If ROP protection is not set then these functions are no-ops. // For more details on PAC see pauth_aarch64.hpp.
// Sign the LR. Use during construction of a stack frame, before storing the LR to memory. // Uses the FP as the modifier. // void MacroAssembler::protect_return_address() { if (VM_Version::use_rop_protection()) {
check_return_address(); // The standard convention for C code is to use paciasp, which uses SP as the modifier. This // works because in C code, FP and SP match on function entry. In the JDK, SP and FP may not // match, so instead explicitly use the FP.
pacia(lr, rfp);
}
}
// Sign the return value in the given register. Use before updating the LR in the existing stack // frame for the current function. // Uses the FP from the start of the function as the modifier - which is stored at the address of // the current FP. // void MacroAssembler::protect_return_address(Register return_reg, Register temp_reg) { if (VM_Version::use_rop_protection()) {
assert(PreserveFramePointer, "PreserveFramePointer must be set for ROP protection");
check_return_address(return_reg);
ldr(temp_reg, Address(rfp));
pacia(return_reg, temp_reg);
}
}
// Authenticate the LR. Use before function return, after restoring FP and loading LR from memory. // void MacroAssembler::authenticate_return_address(Register return_reg) { if (VM_Version::use_rop_protection()) {
autia(return_reg, rfp);
check_return_address(return_reg);
}
}
// Authenticate the return value in the given register. Use before updating the LR in the existing // stack frame for the current function. // Uses the FP from the start of the function as the modifier - which is stored at the address of // the current FP. // void MacroAssembler::authenticate_return_address(Register return_reg, Register temp_reg) { if (VM_Version::use_rop_protection()) {
assert(PreserveFramePointer, "PreserveFramePointer must be set for ROP protection");
ldr(temp_reg, Address(rfp));
autia(return_reg, temp_reg);
check_return_address(return_reg);
}
}
// Strip any PAC data from LR without performing any authentication. Use with caution - only if // there is no guaranteed way of authenticating the LR. // void MacroAssembler::strip_return_address() { if (VM_Version::use_rop_protection()) {
xpaclri();
}
}
#ifndef PRODUCT // PAC failures can be difficult to debug. After an authentication failure, a segfault will only // occur when the pointer is used - ie when the program returns to the invalid LR. At this point // it is difficult to debug back to the callee function. // This function simply loads from the address in the given register. // Use directly after authentication to catch authentication failures. // Also use before signing to check that the pointer is valid and hasn't already been signed. // void MacroAssembler::check_return_address(Register return_reg) { if (VM_Version::use_rop_protection()) {
ldr(zr, Address(return_reg));
}
} #endif
// The java_calling_convention describes stack locations as ideal slots on // a frame with no abi restrictions. Since we must observe abi restrictions // (like the placement of the register window) the slots must be biased by // the following value. staticint reg2offset_in(VMReg r) { // Account for saved rfp and lr // This should really be in_preserve_stack_slots return (r->reg2stack() + 4) * VMRegImpl::stack_slot_size;
}
// On 64bit we will store integer like items to the stack as // 64bits items (AArch64 ABI) even though java would only store // 32bits for a parameter. On 32bit it will simply be 32bits // So this routine will do 32->32 on 32bit and 32->64 on 64bit void MacroAssembler::move32_64(VMRegPair src, VMRegPair dst, Register tmp) { if (src.first()->is_stack()) { if (dst.first()->is_stack()) { // stack to stack
ldr(tmp, Address(rfp, reg2offset_in(src.first())));
str(tmp, Address(sp, reg2offset_out(dst.first())));
} else { // stack to reg
ldrsw(dst.first()->as_Register(), Address(rfp, reg2offset_in(src.first())));
}
} elseif (dst.first()->is_stack()) { // reg to stack
str(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first())));
} else { if (dst.first() != src.first()) {
sxtw(dst.first()->as_Register(), src.first()->as_Register());
}
}
}
// An oop arg. Must pass a handle not the oop itself void MacroAssembler::object_move(
OopMap* map, int oop_handle_offset, int framesize_in_slots,
VMRegPair src,
VMRegPair dst, bool is_receiver, int* receiver_offset) {
// must pass a handle. First figure out the location we use as a handle
// Oop is already on the stack as an argument int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots)); if (is_receiver) {
*receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size;
}
map->set_oop(VMRegImpl::stack2reg(oop_slot)); // Store oop in handle area, may be NULL
str(rOop, Address(sp, offset)); if (is_receiver) {
*receiver_offset = offset;
}
// If arg is on the stack then place it otherwise it is already in correct reg. if (dst.first()->is_stack()) {
str(rHandle, Address(sp, reg2offset_out(dst.first())));
}
}
// A float arg may have to do float reg int reg conversion void MacroAssembler::float_move(VMRegPair src, VMRegPair dst, Register tmp) { if (src.first()->is_stack()) { if (dst.first()->is_stack()) {
ldrw(tmp, Address(rfp, reg2offset_in(src.first())));
strw(tmp, Address(sp, reg2offset_out(dst.first())));
} else {
ldrs(dst.first()->as_FloatRegister(), Address(rfp, reg2offset_in(src.first())));
}
} elseif (src.first() != dst.first()) { if (src.is_single_phys_reg() && dst.is_single_phys_reg())
fmovs(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); else
strs(src.first()->as_FloatRegister(), Address(sp, reg2offset_out(dst.first())));
}
}
// A long move void MacroAssembler::long_move(VMRegPair src, VMRegPair dst, Register tmp) { if (src.first()->is_stack()) { if (dst.first()->is_stack()) { // stack to stack
ldr(tmp, Address(rfp, reg2offset_in(src.first())));
str(tmp, Address(sp, reg2offset_out(dst.first())));
} else { // stack to reg
ldr(dst.first()->as_Register(), Address(rfp, reg2offset_in(src.first())));
}
} elseif (dst.first()->is_stack()) { // reg to stack // Do we really have to sign extend??? // __ movslq(src.first()->as_Register(), src.first()->as_Register());
str(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first())));
} else { if (dst.first() != src.first()) {
mov(dst.first()->as_Register(), src.first()->as_Register());
}
}
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.