/* * Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, 2021, Red Hat Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions. *
*/
// If insn1 and insn2 use the same register to form an address, either // by an offsetted LDR or a simple ADD, return the offset. If the // second instruction is an LDR, the offset may be scaled. staticbool offset_for(uint32_t insn1, uint32_t insn2, ptrdiff_t &byte_offset) { if (Instruction_aarch64::extract(insn2, 29, 24) == 0b111001 &&
Instruction_aarch64::extract(insn1, 4, 0) ==
Instruction_aarch64::extract(insn2, 9, 5)) { // Load/store register (unsigned immediate)
byte_offset = Instruction_aarch64::extract(insn2, 21, 10);
uint32_t size = Instruction_aarch64::extract(insn2, 31, 30);
byte_offset <<= size; returntrue;
} elseif (Instruction_aarch64::extract(insn2, 31, 22) == 0b1001000100 &&
Instruction_aarch64::extract(insn1, 4, 0) ==
Instruction_aarch64::extract(insn2, 4, 0)) { // add (immediate)
byte_offset = Instruction_aarch64::extract(insn2, 21, 10); returntrue;
} returnfalse;
}
// Patch any kind of instruction; there may be several instructions. // Return the total length (in bytes) of the instructions. int MacroAssembler::pd_patch_instruction_size(address insn_addr, address target) {
Patcher patcher(insn_addr); return patcher.run(insn_addr, target);
}
int MacroAssembler::patch_oop(address insn_addr, address o) { int instructions; unsigned insn = *(unsigned*)insn_addr;
assert(nativeInstruction_at(insn_addr+4)->is_movk(), "wrong insns in patch");
// OOPs are either narrow (32 bits) or wide (48 bits). We encode // narrow OOPs by setting the upper 16 bits in the first // instruction. if (Instruction_aarch64::extract(insn, 31, 21) == 0b11010010101) { // Move narrow OOP
uint32_t n = CompressedOops::narrow_oop_value(cast_to_oop(o));
Instruction_aarch64::patch(insn_addr, 20, 5, n >> 16);
Instruction_aarch64::patch(insn_addr+4, 20, 5, n & 0xffff);
instructions = 2;
} else { // Move wide OOP
assert(nativeInstruction_at(insn_addr+8)->is_movk(), "wrong insns in patch");
uintptr_t dest = (uintptr_t)o;
Instruction_aarch64::patch(insn_addr, 20, 5, dest & 0xffff);
Instruction_aarch64::patch(insn_addr+4, 20, 5, (dest >>= 16) & 0xffff);
Instruction_aarch64::patch(insn_addr+8, 20, 5, (dest >>= 16) & 0xffff);
instructions = 3;
} return instructions * NativeInstruction::instruction_size;
}
int MacroAssembler::patch_narrow_klass(address insn_addr, narrowKlass n) { // Metadata pointers are either narrow (32 bits) or wide (48 bits). // We encode narrow ones by setting the upper 16 bits in the first // instruction.
NativeInstruction *insn = nativeInstruction_at(insn_addr);
assert(Instruction_aarch64::extract(insn->encoding(), 31, 21) == 0b11010010101 &&
nativeInstruction_at(insn_addr+4)->is_movk(), "wrong insns in patch");
Instruction_aarch64::patch(insn_addr, 20, 5, n >> 16);
Instruction_aarch64::patch(insn_addr+4, 20, 5, n & 0xffff); return 2 * NativeInstruction::instruction_size;
}
void MacroAssembler::reset_last_Java_frame(bool clear_fp) { // we must set sp to zero to clear frame
str(zr, Address(rthread, JavaThread::last_Java_sp_offset()));
// must clear fp, so that compiled frames are not confused; it is // possible that we need it only for debugging if (clear_fp) {
str(zr, Address(rthread, JavaThread::last_Java_fp_offset()));
}
// Always clear the pc because it could have been set by make_walkable()
str(zr, Address(rthread, JavaThread::last_Java_pc_offset()));
}
// Calls to C land // // When entering C land, the rfp, & resp of the last Java frame have to be recorded // in the (thread-local) JavaThread object. When leaving C land, the last Java fp // has to be reset to 0. This is required to allow proper stack traversal. void MacroAssembler::set_last_Java_frame(Register last_java_sp, Register last_java_fp, Register last_java_pc, Register scratch) {
if (last_java_pc->is_valid()) {
str(last_java_pc, Address(rthread,
JavaThread::frame_anchor_offset()
+ JavaFrameAnchor::last_Java_pc_offset()));
}
void MacroAssembler::far_call(Address entry, Register tmp) {
assert(ReservedCodeCacheSize < 4*G, "branch out of range");
assert(CodeCache::find_blob(entry.target()) != NULL, "destination of far call not found in code cache");
assert(entry.rspec().type() == relocInfo::external_word_type
|| entry.rspec().type() == relocInfo::runtime_call_type
|| entry.rspec().type() == relocInfo::none, "wrong entry relocInfo type"); if (target_needs_far_branch(entry.target())) {
uint64_t offset; // We can use ADRP here because we know that the total size of // the code cache cannot exceed 2Gb (ADRP limit is 4GB).
adrp(tmp, entry, offset);
add(tmp, tmp, offset);
blr(tmp);
} else {
bl(entry);
}
}
int MacroAssembler::far_jump(Address entry, Register tmp) {
assert(ReservedCodeCacheSize < 4*G, "branch out of range");
assert(CodeCache::find_blob(entry.target()) != NULL, "destination of far call not found in code cache");
assert(entry.rspec().type() == relocInfo::external_word_type
|| entry.rspec().type() == relocInfo::runtime_call_type
|| entry.rspec().type() == relocInfo::none, "wrong entry relocInfo type");
address start = pc(); if (target_needs_far_branch(entry.target())) {
uint64_t offset; // We can use ADRP here because we know that the total size of // the code cache cannot exceed 2Gb (ADRP limit is 4GB).
adrp(tmp, entry, offset);
add(tmp, tmp, offset);
br(tmp);
} else {
b(entry);
} return pc() - start;
}
void MacroAssembler::reserved_stack_check() { // testing if reserved zone needs to be enabled
Label no_reserved_zone_enabling;
enter(); // LR and FP are live.
lea(rscratch1, CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone));
mov(c_rarg0, rthread);
blr(rscratch1);
leave();
// We have already removed our own frame. // throw_delayed_StackOverflowError will think that it's been // called by our caller.
lea(rscratch1, RuntimeAddress(StubRoutines::throw_delayed_StackOverflowError_entry()));
br(rscratch1);
should_not_reach_here();
// debugging support
assert(number_of_arguments >= 0 , "cannot have negative number of arguments");
assert(java_thread == rthread, "unexpected register"); #ifdef ASSERT // TraceBytecodes does not use r12 but saves it over the call, so don't verify // if ((UseCompressedOops || UseCompressedClassPointers) && !TraceBytecodes) verify_heapbase("call_VM_base: heap base corrupted?"); #endif// ASSERT
assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result"an>);
assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp"span>);
// push java thread (becomes first argument of C function)
mov(c_rarg0, java_thread);
// set last Java frame before call
assert(last_java_sp != rfp, "can't use rfp");
// do the call, remove parameters
MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments, &l);
// lr could be poisoned with PAC signature during throw_pending_exception // if it was tail-call optimized by compiler, since lr is not callee-saved // reload it with proper value
adr(lr, l);
// reset last Java frame // Only interpreter should have to clear fp
reset_last_Java_frame(true);
// C++ interp handles this in the interpreter
check_and_handle_popframe(java_thread);
check_and_handle_earlyret(java_thread);
if (check_exceptions) { // check for pending exceptions (java_thread is set upon return)
ldr(rscratch1, Address(java_thread, in_bytes(Thread::pending_exception_offset())));
Label ok;
cbz(rscratch1, ok);
lea(rscratch1, RuntimeAddress(StubRoutines::forward_exception_entry()));
br(rscratch1);
bind(ok);
}
// get oop result if there is one and reset the value in the thread if (oop_result->is_valid()) {
get_vm_result(oop_result, java_thread);
}
}
// Check the entry target is always reachable from any branch. staticbool is_always_within_branch_range(Address entry) { const address target = entry.target();
if (!CodeCache::contains(target)) { // We always use trampolines for callees outside CodeCache.
assert(entry.rspec().type() == relocInfo::runtime_call_type, "non-runtime call of an external target"); returnfalse;
}
if (!MacroAssembler::far_branches()) { returntrue;
}
if (entry.rspec().type() == relocInfo::runtime_call_type) { // Runtime calls are calls of a non-compiled method (stubs, adapters). // Non-compiled methods stay forever in CodeCache. // We check whether the longest possible branch is within the branch range.
assert(CodeCache::find_blob(target) != NULL &&
!CodeCache::find_blob(target)->is_compiled(), "runtime call of compiled method"); const address right_longest_branch_start = CodeCache::high_bound() - NativeInstruction::instruction_size; const address left_longest_branch_start = CodeCache::low_bound(); constbool is_reachable = Assembler::reachable_from_branch_at(left_longest_branch_start, target) &&
Assembler::reachable_from_branch_at(right_longest_branch_start, target); return is_reachable;
}
returnfalse;
}
// Maybe emit a call via a trampoline. If the code cache is small // trampolines won't be emitted.
address MacroAssembler::trampoline_call(Address entry) {
assert(entry.rspec().type() == relocInfo::runtime_call_type
|| entry.rspec().type() == relocInfo::opt_virtual_call_type
|| entry.rspec().type() == relocInfo::static_call_type
|| entry.rspec().type() == relocInfo::virtual_call_type, "wrong reloc type");
address target = entry.target();
if (!is_always_within_branch_range(entry)) { if (!in_scratch_emit_size()) { // We don't want to emit a trampoline if C2 is generating dummy // code during its branch shortening phase. if (entry.rspec().type() == relocInfo::runtime_call_type) {
assert(CodeBuffer::supports_shared_stubs(), "must support shared stubs");
code()->share_trampoline_for(entry.target(), offset());
} else {
address stub = emit_trampoline_stub(offset(), target); if (stub == NULL) {
postcond(pc() == badAddress); return NULL; // CodeCache is full
}
}
}
target = pc();
}
// Emit a trampoline stub for a call to a target which is too far away. // // code sequences: // // call-site: // branch-and-link to <destination> or <trampoline stub> // // Related trampoline stub for this call site in the stub section: // load the call target from the constant pool // branch (LR still points to the call site above)
// Create a trampoline stub relocation which relates this trampoline stub // with the call instruction at insts_call_instruction_offset in the // instructions code-section.
align(wordSize);
relocate(trampoline_stub_Relocation::spec(code()->insts()->start()
+ insts_call_instruction_offset)); constint stub_start_offset = offset();
assert(is_NativeCallTrampolineStub_at(stub_start_addr), "doesn't look like a trampoline");
end_a_stub(); return stub_start_addr;
}
void MacroAssembler::emit_static_call_stub() { // CompiledDirectStaticCall::set_to_interpreted knows the // exact layout of this stub.
isb();
mov_metadata(rmethod, (Metadata*)NULL);
// Jump to the entry point of the c2i stub.
movptr(rscratch1, 0);
br(rscratch1);
}
void MacroAssembler::c2bool(Register x) { // implements x == 0 ? 0 : 1 // note: must only look at least-significant byte of x // since C-style booleans are stored in one byte // only! (was bug)
tst(x, 0xff);
cset(x, Assembler::NE);
}
// Look up the method for a megamorphic invokeinterface call. // The target method is determined by <intf_klass, itable_index>. // The receiver klass is in recv_klass. // On success, the result will be in method_result, and execution falls through. // On failure, execution transfers to the given label. void MacroAssembler::lookup_interface_method(Register recv_klass, Register intf_klass,
RegisterOrConstant itable_index, Register method_result, Register scan_temp,
Label& L_no_such_interface, bool return_method) {
assert_different_registers(recv_klass, intf_klass, scan_temp);
assert_different_registers(method_result, intf_klass, scan_temp);
assert(recv_klass != method_result || !return_method, "recv_klass can be destroyed when method isn't needed");
assert(itable_index.is_constant() || itable_index.as_register() == method_result, "caller must use same register for non-constant itable index as for method");
// Compute start of first itableOffsetEntry (which is at the end of the vtable) int vtable_base = in_bytes(Klass::vtable_start_offset()); int itentry_off = itableMethodEntry::method_offset_in_bytes(); int scan_step = itableOffsetEntry::size() * wordSize; int vte_size = vtableEntry::size_in_bytes();
assert(vte_size == wordSize, "else adjust times_vte_scale");
// %%% Could store the aligned, prescaled offset in the klassoop. // lea(scan_temp, Address(recv_klass, scan_temp, times_vte_scale, vtable_base));
lea(scan_temp, Address(recv_klass, scan_temp, Address::lsl(3)));
add(scan_temp, scan_temp, vtable_base);
if (return_method) { // Adjust recv_klass by scaled itable_index, so we can free itable_index.
assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); // lea(recv_klass, Address(recv_klass, itable_index, Address::times_ptr, itentry_off));
lea(recv_klass, Address(recv_klass, itable_index, Address::lsl(3))); if (itentry_off)
add(recv_klass, recv_klass, itentry_off);
}
ldr(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes()));
cmp(intf_klass, method_result);
br(Assembler::EQ, found_method);
bind(search); // Check that the previous entry is non-null. A null entry means that // the receiver class doesn't implement the interface, and wasn't the // same as when the caller was compiled.
cbz(method_result, L_no_such_interface); if (itableOffsetEntry::interface_offset_in_bytes() != 0) {
add(scan_temp, scan_temp, scan_step);
ldr(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes()));
} else {
ldr(method_result, Address(pre(scan_temp, scan_step)));
}
cmp(intf_klass, method_result);
br(Assembler::NE, search);
bind(found_method);
// Got a hit. if (return_method) {
ldrw(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes()));
ldr(method_result, Address(recv_klass, scan_temp, Address::uxtw(0)));
}
}
// virtual method calling void MacroAssembler::lookup_virtual_method(Register recv_klass,
RegisterOrConstant vtable_index, Register method_result) { constint base = in_bytes(Klass::vtable_start_offset());
assert(vtableEntry::size() * wordSize == 8, "adjust the scaling in the code below"); int vtable_offset_in_bytes = base + vtableEntry::method_offset_in_bytes();
void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, Register super_klass, Register temp_reg,
Label* L_success,
Label* L_failure,
Label* L_slow_path,
RegisterOrConstant super_check_offset) {
assert_different_registers(sub_klass, super_klass, temp_reg); bool must_load_sco = (super_check_offset.constant_or_zero() == -1); if (super_check_offset.is_register()) {
assert_different_registers(sub_klass, super_klass,
super_check_offset.as_register());
} elseif (must_load_sco) {
assert(temp_reg != noreg, "supply either a temp or a register offset");
}
Label L_fallthrough; int label_nulls = 0; if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
assert(label_nulls <= 1, "at most one NULL in the batch");
int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); int sco_offset = in_bytes(Klass::super_check_offset_offset());
Address super_check_offset_addr(super_klass, sco_offset);
// Hacked jmp, which may only be used just before L_fallthrough. #define final_jmp(label) \ if (&(label) == &L_fallthrough) { /*do nothing*/ } \ else b(label) /*omit semi*/
// If the pointers are equal, we are done (e.g., String[] elements). // This self-check enables sharing of secondary supertype arrays among // non-primary types such as array-of-interface. Otherwise, each such // type would need its own customized SSA. // We move this check to the front of the fast path because many // type checks are in fact trivially successful in this manner, // so we get a nicely predicted branch right at the start of the check.
cmp(sub_klass, super_klass);
br(Assembler::EQ, *L_success);
// This check has worked decisively for primary supers. // Secondary supers are sought in the super_cache ('super_cache_addr'). // (Secondary supers are interfaces and very deeply nested subtypes.) // This works in the same check above because of a tricky aliasing // between the super_cache and the primary super display elements. // (The 'super_check_addr' can address either, as the case requires.) // Note that the cache is updated below if it does not help us find // what we need immediately. // So if it was a primary super, we can just fail immediately. // Otherwise, it's the slow path for us (no success at this point).
if (super_check_offset.is_register()) {
br(Assembler::EQ, *L_success);
subs(zr, super_check_offset.as_register(), sc_offset); if (L_failure == &L_fallthrough) {
br(Assembler::EQ, *L_slow_path);
} else {
br(Assembler::NE, *L_failure);
final_jmp(*L_slow_path);
}
} elseif (super_check_offset.as_constant() == sc_offset) { // Need a slow path; fast failure is impossible. if (L_slow_path == &L_fallthrough) {
br(Assembler::EQ, *L_success);
} else {
br(Assembler::NE, *L_slow_path);
final_jmp(*L_success);
}
} else { // No slow path; it's a fast decision. if (L_failure == &L_fallthrough) {
br(Assembler::EQ, *L_success);
} else {
br(Assembler::NE, *L_failure);
final_jmp(*L_success);
}
}
bind(L_fallthrough);
#undef final_jmp
}
// These two are taken from x86, but they look generally useful
Label L_fallthrough; int label_nulls = 0; if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; }
assert(label_nulls <= 1, "at most one NULL in the batch");
// a couple of useful fields in sub_klass: int ss_offset = in_bytes(Klass::secondary_supers_offset()); int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
Address secondary_supers_addr(sub_klass, ss_offset);
Address super_cache_addr( sub_klass, sc_offset);
BLOCK_COMMENT("check_klass_subtype_slow_path");
// Do a linear scan of the secondary super-klass chain. // This code is rarely used, so simplicity is a virtue here. // The repne_scan instruction uses fixed registers, which we must spill. // Don't worry too much about pre-existing connections with the input regs.
assert(sub_klass != r0, "killed reg"); // killed by mov(r0, super)
assert(sub_klass != r2, "killed reg"); // killed by lea(r2, &pst_counter)
RegSet pushed_registers; if (!IS_A_TEMP(r2)) pushed_registers += r2; if (!IS_A_TEMP(r5)) pushed_registers += r5;
if (super_klass != r0) { if (!IS_A_TEMP(r0)) pushed_registers += r0;
}
push(pushed_registers, sp);
// Get super_klass value into r0 (even if it was in r5 or r2). if (super_klass != r0) {
mov(r0, super_klass);
}
// We will consult the secondary-super array.
ldr(r5, secondary_supers_addr); // Load the array length.
ldrw(r2, Address(r5, Array<Klass*>::length_offset_in_bytes())); // Skip to start of data.
add(r5, r5, Array<Klass*>::base_offset_in_bytes());
cmp(sp, zr); // Clear Z flag; SP is never zero // Scan R2 words at [R5] for an occurrence of R0. // Set NZ/Z based on last compare.
repne_scan(r5, r0, r2, rscratch1);
// Unspill the temp. registers:
pop(pushed_registers, sp);
br(Assembler::NE, *L_failure);
// Success. Cache the super we found and proceed in triumph.
str(super_klass, super_cache_addr);
if (L_success != &L_fallthrough) {
b(*L_success);
}
#undef IS_A_TEMP
bind(L_fallthrough);
}
void MacroAssembler::clinit_barrier(Register klass, Register scratch, Label* L_fast_path, Label* L_slow_path) {
assert(L_fast_path != NULL || L_slow_path != NULL, "at least one is required");
assert_different_registers(klass, rthread, scratch);
Label L_fallthrough, L_tmp; if (L_fast_path == NULL) {
L_fast_path = &L_fallthrough;
} elseif (L_slow_path == NULL) {
L_slow_path = &L_fallthrough;
} // Fast path check: class is fully initialized
ldrb(scratch, Address(klass, InstanceKlass::init_state_offset()));
subs(zr, scratch, InstanceKlass::fully_initialized);
br(Assembler::EQ, *L_fast_path);
// Fast path check: current thread is initializer thread
ldr(scratch, Address(klass, InstanceKlass::init_thread_offset()));
cmp(rthread, scratch);
strip_return_address(); // This might happen within a stack frame.
protect_return_address();
stp(r0, rscratch1, Address(pre(sp, -2 * wordSize)));
stp(rscratch2, lr, Address(pre(sp, -2 * wordSize)));
// addr may contain sp so we will have to adjust it based on the // pushes that we just did. if (addr.uses(sp)) {
lea(r0, addr);
ldr(r0, Address(r0, 4 * wordSize));
} else {
ldr(r0, addr);
}
movptr(rscratch1, (uintptr_t)(address)b);
// call indirectly to solve generation ordering problem
lea(rscratch2, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
ldr(rscratch2, Address(rscratch2));
blr(rscratch2);
void MacroAssembler::null_check(Register reg, int offset) { if (needs_explicit_null_check(offset)) { // provoke OS NULL exception if reg = NULL by // accessing M[reg] w/o changing any registers // NOTE: this is plenty to provoke a segv
ldr(zr, Address(reg));
} else { // nothing to do, (later) access of M[reg + offset] // will provoke OS NULL exception if reg = NULL
}
}
// MacroAssembler protected routines needed to implement // public methods
// Move a constant pointer into r. In AArch64 mode the virtual // address space is 48 bits in size, so we only need three // instructions to create a patchable instruction sequence that can // reach anywhere. void MacroAssembler::movptr(Register r, uintptr_t imm64) { #ifndef PRODUCT
{ char buffer[64];
snprintf(buffer, sizeof(buffer), "0x%" PRIX64, (uint64_t)imm64);
block_comment(buffer);
} #endif
assert(imm64 < (1ull << 48), "48-bit overflow in address constant");
movz(r, imm64 & 0xffff);
imm64 >>= 16;
movk(r, imm64 & 0xffff, 16);
imm64 >>= 16;
movk(r, imm64 & 0xffff, 32);
}
// Macro to mov replicated immediate to vector register. // imm64: only the lower 8/16/32 bits are considered for B/H/S type. That is, // the upper 56/48/32 bits must be zeros for B/H/S type. // Vd will get the following values for different arrangements in T // imm64 == hex 000000gh T8B: Vd = ghghghghghghghgh // imm64 == hex 000000gh T16B: Vd = ghghghghghghghghghghghghghghghgh // imm64 == hex 0000efgh T4H: Vd = efghefghefghefgh // imm64 == hex 0000efgh T8H: Vd = efghefghefghefghefghefghefghefgh // imm64 == hex abcdefgh T2S: Vd = abcdefghabcdefgh // imm64 == hex abcdefgh T4S: Vd = abcdefghabcdefghabcdefghabcdefgh // imm64 == hex abcdefgh T1D: Vd = 00000000abcdefgh // imm64 == hex abcdefgh T2D: Vd = 00000000abcdefgh00000000abcdefgh // Clobbers rscratch1 void MacroAssembler::mov(FloatRegister Vd, SIMD_Arrangement T, uint64_t imm64) {
assert(T != T1Q, "unsupported"); if (T == T1D || T == T2D) { int imm = operand_valid_for_movi_immediate(imm64, T); if (-1 != imm) {
movi(Vd, T, imm);
} else {
mov(rscratch1, imm64);
dup(Vd, T, rscratch1);
} return;
}
void MacroAssembler::mov_immediate64(Register dst, uint64_t imm64)
{ #ifndef PRODUCT
{ char buffer[64];
snprintf(buffer, sizeof(buffer), "0x%" PRIX64, imm64);
block_comment(buffer);
} #endif if (operand_valid_for_logical_immediate(false, imm64)) {
orr(dst, zr, imm64);
} else { // we can use a combination of MOVZ or MOVN with // MOVK to build up the constant
uint64_t imm_h[4]; int zero_count = 0; int neg_count = 0; int i; for (i = 0; i < 4; i++) {
imm_h[i] = ((imm64 >> (i * 16)) & 0xffffL); if (imm_h[i] == 0) {
zero_count++;
} elseif (imm_h[i] == 0xffffL) {
neg_count++;
}
} if (zero_count == 4) { // one MOVZ will do
movz(dst, 0);
} elseif (neg_count == 4) { // one MOVN will do
movn(dst, 0);
} elseif (zero_count == 3) { for (i = 0; i < 4; i++) { if (imm_h[i] != 0L) {
movz(dst, (uint32_t)imm_h[i], (i << 4)); break;
}
}
} elseif (neg_count == 3) { // one MOVN will do for (int i = 0; i < 4; i++) { if (imm_h[i] != 0xffffL) {
movn(dst, (uint32_t)imm_h[i] ^ 0xffffL, (i << 4)); break;
}
}
} elseif (zero_count == 2) { // one MOVZ and one MOVK will do for (i = 0; i < 3; i++) { if (imm_h[i] != 0L) {
movz(dst, (uint32_t)imm_h[i], (i << 4));
i++; break;
}
} for (;i < 4; i++) { if (imm_h[i] != 0L) {
movk(dst, (uint32_t)imm_h[i], (i << 4));
}
}
} elseif (neg_count == 2) { // one MOVN and one MOVK will do for (i = 0; i < 4; i++) { if (imm_h[i] != 0xffffL) {
movn(dst, (uint32_t)imm_h[i] ^ 0xffffL, (i << 4));
i++; break;
}
} for (;i < 4; i++) { if (imm_h[i] != 0xffffL) {
movk(dst, (uint32_t)imm_h[i], (i << 4));
}
}
} elseif (zero_count == 1) { // one MOVZ and two MOVKs will do for (i = 0; i < 4; i++) { if (imm_h[i] != 0L) {
movz(dst, (uint32_t)imm_h[i], (i << 4));
--> --------------------
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung ist noch experimentell.