/* * Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, 2021, Red Hat Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions. *
*/
// MacroAssembler extends Assembler by frequently used macros. // // Instructions for which a 'better' code sequence exists depending // on arguments should also go in here.
class MacroAssembler: public Assembler { friendclass LIR_Assembler;
public: using Assembler::mov; using Assembler::movi;
protected:
// Support for VM calls // // This is the base routine called by the different versions of call_VM_leaf. The interpreter // may customize this version by overriding it for its purposes (e.g., to save/restore // additional registers when doing a VM call). virtualvoid call_VM_leaf_base(
address entry_point, // the entry point int number_of_arguments, // the number of arguments to pop after the call
Label *retaddr = NULL
);
virtualvoid call_VM_leaf_base(
address entry_point, // the entry point int number_of_arguments, // the number of arguments to pop after the call
Label &retaddr) {
call_VM_leaf_base(entry_point, number_of_arguments, &retaddr);
}
// This is the base routine called by the different versions of call_VM. The interpreter // may customize this version by overriding it for its purposes (e.g., to save/restore // additional registers when doing a VM call). // // If no java_thread register is specified (noreg) than rthread will be used instead. call_VM_base // returns the register which contains the thread upon return. If a thread register has been // specified, the return value will correspond to that register. If no last_java_sp is specified // (noreg) than rsp will be used instead. virtualvoid call_VM_base( // returns the register containing the thread upon return Register oop_result, // where an oop-result ends up if any; use noreg otherwise Register java_thread, // the thread if computed before ; use noreg otherwise Register last_java_sp, // to set up last_Java_frame in stubs; use noreg otherwise
address entry_point, // the entry point int number_of_arguments, // the number of arguments (w/o thread) to pop after the call bool check_exceptions // whether to check for pending exceptions after return
);
// These routines should emit JVMTI PopFrame and ForceEarlyReturn handling code. // The implementation is only non-empty for the InterpreterMacroAssembler, // as only the interpreter handles PopFrame and ForceEarlyReturn requests. virtualvoid check_and_handle_popframe(Register java_thread); virtualvoid check_and_handle_earlyret(Register java_thread);
/* Sometimes we get misaligned loads and stores, usually from Unsafe
accesses, and these can exceed the offset range. */
Address legitimize_address(const Address &a, int size, Register scratch) { if (a.getMode() == Address::base_plus_offset) { if (! Address::offset_ok_for_immed(a.offset(), exact_log2(size))) {
block_comment("legitimize_address {");
lea(scratch, a);
block_comment("} legitimize_address"); return Address(scratch);
}
} return a;
}
// Microsoft's MSVC team thinks that the __FUNCSIG__ is approximately (sympathy for calling conventions) equivalent to __PRETTY_FUNCTION__ // Also, from Clang patch: "It is very similar to GCC's PRETTY_FUNCTION, except it prints the calling convention." // https://reviews.llvm.org/D3311
// Push and pop everything that might be clobbered by a native // runtime call except rscratch1 and rscratch2. (They are always // scratch, so we don't have to protect them.) Only save the lower // 64 bits of each vector register. Additional registers can be excluded // in a passed RegSet. void push_call_clobbered_registers_except(RegSet exclude); void pop_call_clobbered_registers_except(RegSet exclude);
// Generalized Test Bit And Branch, including a "far" variety which // spans more than 32KiB. void tbr(Condition cond, Register Rt, int bitpos, Label &dest, bool isfar = false) {
assert(cond == EQ || cond == NE, "must be");
if (isfar)
cond = ~cond;
void (Assembler::* branch)(Register Rt, int bitpos, Label &L); if (cond == Assembler::EQ)
branch = &Assembler::tbz; else
branch = &Assembler::tbnz;
// idiv variant which deals with MINLONG as dividend and -1 as divisor int corrected_idivl(Register result, Register ra, Register rb, bool want_remainder, Register tmp = rscratch1); int corrected_idivq(Register result, Register ra, Register rb, bool want_remainder, Register tmp = rscratch1);
// Support for NULL-checks // // Generates code that causes a NULL OS exception if the content of reg is NULL. // If the accessed location is M[reg + offset] and the offset is known, provide the // offset. No explicit code generation is needed if the offset is within a certain // range (0 <= offset <= page_size).
// Return whether code is emitted to a scratch blob. virtualbool in_scratch_emit_size() { returnfalse;
}
address emit_trampoline_stub(int insts_call_instruction_offset, address target); void emit_static_call_stub();
// The following 4 methods return the offset of the appropriate move instruction
// Support for fast byte/short loading with zero extension (depending on particular CPU) int load_unsigned_byte(Register dst, Address src); int load_unsigned_short(Register dst, Address src);
// Support for fast byte/short loading with sign extension (depending on particular CPU) int load_signed_byte(Register dst, Address src); int load_signed_short(Register dst, Address src);
int load_signed_byte32(Register dst, Address src); int load_signed_short32(Register dst, Address src);
// Support for sign-extension (hi:lo = extend_sign(lo)) void extend_sign(Register hi, Register lo);
// Load and store values by size and signed-ness void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed); void store_sized_value(Address dst, Register src, size_t size_in_bytes);
// Support for inc/dec with optimal instruction selection depending on value
// x86_64 aliases an unqualified register/address increment and // decrement to call incrementq and decrementq but also supports // explicitly sized calls to incrementq/decrementq or // incrementl/decrementl
// for aarch64 the proper convention would be to use // increment/decrement for 64 bit operations and // incrementw/decrementw for 32 bit operations. so when porting // x86_64 code we can leave calls to increment/decrement as is, // replace incrementq/decrementq with increment/decrement and // replace incrementl/decrementl with incrementw/decrementw.
// n.b. increment/decrement calls with an Address destination will // need to use a scratch register to load the value to be // incremented. increment/decrement calls which add or subtract a // constant value greater than 2^12 will need to use a 2nd scratch // register to hold the constant. so, a register increment/decrement // may trash rscratch2 and an address increment/decrement trash // rscratch and rscratch2
void decrementw(Address dst, int value = 1); void decrementw(Register reg, int value = 1);
void decrement(Register reg, int value = 1); void decrement(Address dst, int value = 1);
void incrementw(Address dst, int value = 1); void incrementw(Register reg, int value = 1);
void increment(Register reg, int value = 1); void increment(Address dst, int value = 1);
// Support for getting the JavaThread pointer (i.e.; a reference to thread-local information) // The pointer will be loaded into the thread register. void get_thread(Register thread);
// Support for VM calls // // It is imperative that all calls into the VM are handled via the call_VM macros. // They make sure that the stack linkage is setup correctly. call_VM's correspond // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points.
// thread in the default location (rthread) void reset_last_Java_frame(bool clear_fp);
// Stores void store_check(Register obj); // store check for obj - register is destroyed afterwards void store_check(Register obj, Address dst); // same as above, dst is exact store location (reg. is destroyed)
// currently unimplemented // Used for storing NULL. All other oop constants should be // stored using routines that take a jobject. void store_heap_oop_null(Address dst);
void store_klass_gap(Register dst, Register src);
// This dummy is to prevent a call to store_heap_oop from // converting a zero (like NULL) into a Register by giving // the compiler two choices it can't resolve
// allocation void tlab_allocate( Register obj, // result: pointer to object after successful allocation Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise int con_size_in_bytes, // object size in bytes if known at compile time Register t1, // temp register Register t2, // temp register
Label& slow_case // continuation point if fast allocation fails
); void verify_tlab();
// virtual method calling // n.b. x86 allows RegisterOrConstant for vtable_index void lookup_virtual_method(Register recv_klass,
RegisterOrConstant vtable_index, Register method_result);
// Test sub_klass against super_klass, with fast and slow paths.
// The fast path produces a tri-state answer: yes / no / maybe-slow. // One of the three labels can be NULL, meaning take the fall-through. // If super_check_offset is -1, the value is loaded up from super_klass. // No registers are killed, except temp_reg. void check_klass_subtype_fast_path(Register sub_klass, Register super_klass, Register temp_reg,
Label* L_success,
Label* L_failure,
Label* L_slow_path,
RegisterOrConstant super_check_offset = RegisterOrConstant(-1));
// The rest of the type check; must be wired to a corresponding fast path. // It does not repeat the fast path logic, so don't use it standalone. // The temp_reg and temp2_reg can be noreg, if no temps are available. // Updates the sub's secondary super cache as necessary. // If set_cond_codes, condition codes will be Z on success, NZ on failure. void check_klass_subtype_slow_path(Register sub_klass, Register super_klass, Register temp_reg, Register temp2_reg,
Label* L_success,
Label* L_failure, bool set_cond_codes = false);
// Simplified, combined version, good for typical uses. // Falls through on failure. void check_klass_subtype(Register sub_klass, Register super_klass, Register temp_reg,
Label& L_success);
#ifdef ASSERT // Template short-hand support to clean-up after a failed call to trampoline // call generation (see trampoline_call() below), when a set of Labels must // be reset (before returning). template<typename Label, typename... More> void reset_labels(Label &lbl, More&... more) {
lbl.reset(); reset_labels(more...);
} template<typename Label> void reset_labels(Label &lbl) {
lbl.reset();
} #endif
public: // AArch64 OpenJDK uses four different types of calls: // - direct call: bl pc_relative_offset // This is the shortest and the fastest, but the offset has the range: // +/-128MB for the release build, +/-2MB for the debug build. // // - far call: adrp reg, pc_relative_offset; add; bl reg // This is longer than a direct call. The offset has // the range +/-4GB. As the code cache size is limited to 4GB, // far calls can reach anywhere in the code cache. If a jump is // needed rather than a call, a far jump 'b reg' can be used instead. // All instructions are embedded at a call site. // // - trampoline call: // This is only available in C1/C2-generated code (nmethod). It is a combination // of a direct call, which is used if the destination of a call is in range, // and a register-indirect call. It has the advantages of reaching anywhere in // the AArch64 address space and being patchable at runtime when the generated // code is being executed by other threads. // // [Main code section] // bl trampoline // [Stub code section] // trampoline: // ldr reg, pc + 8 // br reg // <64-bit destination address> // // If the destination is in range when the generated code is moved to the code // cache, 'bl trampoline' is replaced with 'bl destination' and the trampoline // is not used. // The optimization does not remove the trampoline from the stub section. // This is necessary because the trampoline may well be redirected later when // code is patched, and the new destination may not be reachable by a simple BR // instruction. // // - indirect call: move reg, address; blr reg // This too can reach anywhere in the address space, but it cannot be // patched while code is running, so it must only be modified at a safepoint. // This form of call is most suitable for targets at fixed addresses, which // will never be patched. // // The patching we do conforms to the "Concurrent modification and // execution of instructions" section of the Arm Architectural // Reference Manual, which only allows B, BL, BRK, HVC, ISB, NOP, SMC, // or SVC instructions to be modified while another thread is // executing them. // // To patch a trampoline call when the BL can't reach, we first modify // the 64-bit destination address in the trampoline, then modify the // BL to point to the trampoline, then flush the instruction cache to // broadcast the change to all executing threads. See // NativeCall::set_destination_mt_safe for the details. // // There is a benign race in that the other thread might observe the // modified BL before it observes the modified 64-bit destination // address. That does not matter because the destination method has been // invalidated, so there will be a trap at its start. // For this to work, the destination address in the trampoline is // always updated, even if we're not using the trampoline.
// Emit a direct call if the entry address will always be in range, // otherwise a trampoline call. // Supported entry.rspec(): // - relocInfo::runtime_call_type // - relocInfo::opt_virtual_call_type // - relocInfo::static_call_type // - relocInfo::virtual_call_type // // Return: the call PC or NULL if CodeCache is full.
address trampoline_call(Address entry);
// Check if branches to the non nmethod section require a far jump staticbool codestub_branch_needs_far_jump() { return CodeCache::max_distance_to_non_nmethod() > branch_range;
}
// Emit a direct call/jump if the entry address will always be in range, // otherwise a far call/jump. // The address must be inside the code cache. // Supported entry.rspec(): // - relocInfo::external_word_type // - relocInfo::runtime_call_type // - relocInfo::none // In the case of a far call/jump, the entry address is put in the tmp register. // The tmp register is invalidated. // // Far_jump returns the amount of the emitted code. void far_call(Address entry, Register tmp = rscratch1); int far_jump(Address entry, Register tmp = rscratch1);
// If a constant does not fit in an immediate field, generate some // number of MOV instructions and then perform the operation void wrap_add_sub_imm_insn(Register Rd, Register Rn, uint64_t imm,
add_sub_imm_insn insn1,
add_sub_reg_insn insn2, bool is32); // Separate vsn which sets the flags void wrap_adds_subs_imm_insn(Register Rd, Register Rn, uint64_t imm,
add_sub_imm_insn insn1,
add_sub_reg_insn insn2, bool is32);
// Form an address from base + offset in Rd. Rd may or may not // actually be used: you must use the Address that is returned. It // is up to you to ensure that the shift provided matches the size // of your data.
Address form_address(Register Rd, Register base, int64_t byte_offset, int shift);
// Return true iff an address is within the 48-bit AArch64 address // space. bool is_valid_AArch64_address(address a) { return ((uint64_t)a >> 48) == 0;
}
// Load the base of the cardtable byte map into reg. void load_byte_map_base(Register reg);
// Prolog generator routines to support switch between x86 code and // generated ARM code
// routine to generate an x86 prolog for a stub function which // bootstraps into the generated ARM code which directly follows the // stub //
// Place an ISB after code may have been modified due to a safepoint. void safepoint_isb();
private: // Return the effective address r + (r1 << ext) + offset. // Uses rscratch2.
Address offsetted_address(Register r, Register r1, Address::extend ext, int offset, int size);
private: // Returns an address on the stack which is reachable with a ldr/str of size // Uses rscratch2 if the address is not directly reachable
Address spill_address(int size, int offset, Register tmp=rscratch2);
Address sve_spill_address(int sve_reg_size_in_bytes, int offset, Register tmp=rscratch2);
/** * class SkipIfEqual: * * Instantiating this class will result in assembly code being output that will * jump around any code emitted between the creation of the instance and it's * automatic destruction at the end of a scope block, depending on the value of * the flag passed to the constructor, which will be checked at run-time.
*/ class SkipIfEqual { private:
MacroAssembler* _masm;
Label _label;
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung ist noch experimentell.