/* * Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions. *
*/
// MacroAssembler extends Assembler by frequently used macros. // // Instructions for which a 'better' code sequence exists depending // on arguments should also go in here.
// Stack frame creation/removal // Note that SP must be updated to the right place before saving/restoring RA and FP // because signal based thread suspend/resume could happen asynchronously. void enter() {
addi(sp, sp, - 2 * wordSize);
sd(ra, Address(sp, wordSize));
sd(fp, Address(sp));
addi(fp, sp, 2 * wordSize);
}
// Support for getting the JavaThread pointer (i.e.; a reference to thread-local information) // The pointer will be loaded into the thread register. void get_thread(Register thread);
// Support for VM calls // // It is imperative that all calls into the VM are handled via the call_VM macros. // They make sure that the stack linkage is setup correctly. call_VM's correspond // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points.
// thread in the default location (xthread) void reset_last_Java_frame(bool clear_fp);
virtualvoid call_VM_leaf_base(
address entry_point, // the entry point int number_of_arguments, // the number of arguments to pop after the call
Label* retaddr = NULL
);
virtualvoid call_VM_leaf_base(
address entry_point, // the entry point int number_of_arguments, // the number of arguments to pop after the call
Label& retaddr) {
call_VM_leaf_base(entry_point, number_of_arguments, &retaddr);
}
virtualvoid call_VM_base( // returns the register containing the thread upon return Register oop_result, // where an oop-result ends up if any; use noreg otherwise Register java_thread, // the thread if computed before ; use noreg otherwise Register last_java_sp, // to set up last_Java_frame in stubs; use noreg otherwise
address entry_point, // the entry point int number_of_arguments, // the number of arguments (w/o thread) to pop after the call bool check_exceptions // whether to check for pending exceptions after return
);
void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions);
// currently unimplemented // Used for storing NULL. All other oop constants should be // stored using routines that take a jobject. void store_heap_oop_null(Address dst);
// This dummy is to prevent a call to store_heap_oop from // converting a zero (linked NULL) into a Register by giving // the compiler two choices it can't resolve
void store_heap_oop(Address dst, void* dummy);
// Support for NULL-checks // // Generates code that causes a NULL OS exception if the content of reg is NULL. // If the accessed location is M[reg + offset] and the offset is known, provide the // offset. No explicit code generateion is needed if the offset is within a certain // range (0 <= offset <= page_size).
// idiv variant which deals with MINLONG as dividend and -1 as divisor int corrected_idivl(Register result, Register rs1, Register rs2, bool want_remainder); int corrected_idivq(Register result, Register rs1, Register rs2, bool want_remainder);
// virtual method calling // n.n. x86 allows RegisterOrConstant for vtable_index void lookup_virtual_method(Register recv_klass,
RegisterOrConstant vtable_index, Register method_result);
// Form an address from base + offset in Rd. Rd my or may not // actually be used: you must use the Address that is returned. It // is up to you to ensure that the shift provided matches the size // of your data.
Address form_address(Register Rd, Register base, long byte_offset);
// allocation void tlab_allocate( Register obj, // result: pointer to object after successful allocation Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise int con_size_in_bytes, // object size in bytes if known at compile time Register tmp1, // temp register Register tmp2, // temp register
Label& slow_case, // continuation point of fast allocation fails bool is_far = false
);
// Test sub_klass against super_klass, with fast and slow paths.
// The fast path produces a tri-state answer: yes / no / maybe-slow. // One of the three labels can be NULL, meaning take the fall-through. // If super_check_offset is -1, the value is loaded up from super_klass. // No registers are killed, except tmp_reg void check_klass_subtype_fast_path(Register sub_klass, Register super_klass, Register tmp_reg,
Label* L_success,
Label* L_failure,
Label* L_slow_path, Register super_check_offset = noreg);
// The reset of the type check; must be wired to a corresponding fast path. // It does not repeat the fast path logic, so don't use it standalone. // The tmp1_reg and tmp2_reg can be noreg, if no temps are available. // Updates the sub's secondary super cache as necessary. void check_klass_subtype_slow_path(Register sub_klass, Register super_klass, Register tmp1_reg, Register tmp2_reg,
Label* L_success,
Label* L_failure);
// A more convenient access to fence for our purposes // We used four bit to indicate the read and write bits in the predecessors and successors, // and extended i for r, o for w if UseConservativeFence enabled. enum Membar_mask_bits {
StoreStore = 0b0101, // (pred = ow + succ = ow)
LoadStore = 0b1001, // (pred = ir + succ = ow)
StoreLoad = 0b0110, // (pred = ow + succ = ir)
LoadLoad = 0b1010, // (pred = ir + succ = ir)
AnyAny = LoadStore | StoreLoad // (pred = iorw + succ = iorw)
};
// The following 4 methods return the offset of the appropriate move instruction
// Support for fast byte/short loading with zero extension (depending on particular CPU) int load_unsigned_byte(Register dst, Address src); int load_unsigned_short(Register dst, Address src);
// Support for fast byte/short loading with sign extension (depending on particular CPU) int load_signed_byte(Register dst, Address src); int load_signed_short(Register dst, Address src);
// Load and store values by size and signed-ness void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed); void store_sized_value(Address dst, Register src, size_t size_in_bytes);
// Push and pop everything that might be clobbered by a native // runtime call except t0 and t1. (They are always // temporary registers, so we don't have to protect them.) // Additional registers can be excluded in a passed RegSet. void push_call_clobbered_registers_except(RegSet exclude); void pop_call_clobbered_registers_except(RegSet exclude);
// Emit a direct call/jump if the entry address will always be in range, // otherwise a far call/jump. // The address must be inside the code cache. // Supported entry.rspec(): // - relocInfo::external_word_type // - relocInfo::runtime_call_type // - relocInfo::none // In the case of a far call/jump, the entry address is put in the tmp register. // The tmp register is invalidated. void far_call(Address entry, Register tmp = t0); void far_jump(Address entry, Register tmp = t0);
// RISCV64 OpenJDK uses four different types of calls: // - direct call: jal pc_relative_offset // This is the shortest and the fastest, but the offset has the range: +/-1MB. // // - far call: auipc reg, pc_relative_offset; jalr ra, reg, offset // This is longer than a direct call. The offset has // the range [-(2G + 2K), 2G - 2K). Addresses out of the range in the code cache // requires indirect call. // If a jump is needed rather than a call, a far jump 'jalr x0, reg, offset' can // be used instead. // All instructions are embedded at a call site. // // - trampoline call: // This is only available in C1/C2-generated code (nmethod). It is a combination // of a direct call, which is used if the destination of a call is in range, // and a register-indirect call. It has the advantages of reaching anywhere in // the RISCV address space and being patchable at runtime when the generated // code is being executed by other threads. // // [Main code section] // jal trampoline // [Stub code section] // trampoline: // ld reg, pc + 8 (auipc + ld) // jr reg // <64-bit destination address> // // If the destination is in range when the generated code is moved to the code // cache, 'jal trampoline' is replaced with 'jal destination' and the trampoline // is not used. // The optimization does not remove the trampoline from the stub section.
// This is necessary because the trampoline may well be redirected later when // code is patched, and the new destination may not be reachable by a simple JAL // instruction. // // - indirect call: movptr + jalr // This too can reach anywhere in the address space, but it cannot be // patched while code is running, so it must only be modified at a safepoint. // This form of call is most suitable for targets at fixed addresses, which // will never be patched. // // // To patch a trampoline call when the JAL can't reach, we first modify // the 64-bit destination address in the trampoline, then modify the // JAL to point to the trampoline, then flush the instruction cache to // broadcast the change to all executing threads. See // NativeCall::set_destination_mt_safe for the details. // // There is a benign race in that the other thread might observe the // modified JAL before it observes the modified 64-bit destination // address. That does not matter because the destination method has been // invalidated, so there will be a trap at its start. // For this to work, the destination address in the trampoline is // always updated, even if we're not using the trampoline.
// Emit a direct call if the entry address will always be in range, // otherwise a trampoline call. // Supported entry.rspec(): // - relocInfo::runtime_call_type // - relocInfo::opt_virtual_call_type // - relocInfo::static_call_type // - relocInfo::virtual_call_type // // Return: the call PC or NULL if CodeCache is full.
address trampoline_call(Address entry);
address ic_call(address entry, jint method_index = 0);
// Support for memory inc/dec // n.b. increment/decrement calls with an Address destination will // need to use a scratch register to load the value to be // incremented. increment/decrement calls which add or subtract a // constant value other than sign-extended 12-bit immediate will need // to use a 2nd scratch register to hold the constant. so, an address // increment/decrement may trash both t0 and t1.
// shift left by shamt and add void shadd(Register Rd, Register Rs1, Register Rs2, Register tmp, int shamt);
// Here the float instructions with safe deal with some exceptions. // e.g. convert from NaN, +Inf, -Inf to int, float, double // will trigger exception, we need to deal with these situations // to get correct results. void fcvt_w_s_safe(Register dst, FloatRegister src, Register tmp = t0); void fcvt_l_s_safe(Register dst, FloatRegister src, Register tmp = t0); void fcvt_w_d_safe(Register dst, FloatRegister src, Register tmp = t0); void fcvt_l_d_safe(Register dst, FloatRegister src, Register tmp = t0);
// vector load/store unit-stride instructions void vlex_v(VectorRegister vd, Register base, Assembler::SEW sew, VectorMask vm = unmasked) { switch (sew) { case Assembler::e64:
vle64_v(vd, base, vm); break; case Assembler::e32:
vle32_v(vd, base, vm); break; case Assembler::e16:
vle16_v(vd, base, vm); break; case Assembler::e8: // fall through default:
vle8_v(vd, base, vm); break;
}
}
void vsex_v(VectorRegister store_data, Register base, Assembler::SEW sew, VectorMask vm = unmasked) { switch (sew) { case Assembler::e64:
vse64_v(store_data, base, vm); break; case Assembler::e32:
vse32_v(store_data, base, vm); break; case Assembler::e16:
vse16_v(store_data, base, vm); break; case Assembler::e8: // fall through default:
vse8_v(store_data, base, vm); break;
}
}
void cast_primitive_type(BasicType type, Register Rt) { switch (type) { case T_BOOLEAN:
sltu(Rt, zr, Rt); break; case T_CHAR :
zero_extend(Rt, Rt, 16); break; case T_BYTE :
sign_extend(Rt, Rt, 8); break; case T_SHORT :
sign_extend(Rt, Rt, 16); break; case T_INT :
addw(Rt, Rt, zr); break; case T_LONG : /* nothing to do */ break; case T_VOID : /* nothing to do */ break; case T_FLOAT : /* nothing to do */ break; case T_DOUBLE : /* nothing to do */ break; default: ShouldNotReachHere();
}
}
// float cmp with unordered_result void float_compare(Register result, FloatRegister Rs1, FloatRegister Rs2, int unordered_result); void double_compare(Register result, FloatRegister Rs1, FloatRegister Rs2, int unordered_result);
// Zero/Sign-extend void zero_extend(Register dst, Register src, int bits); void sign_extend(Register dst, Register src, int bits);
// compare src1 and src2 and get -1/0/1 in dst. // if [src1 > src2], dst = 1; // if [src1 == src2], dst = 0; // if [src1 < src2], dst = -1; void cmp_l2i(Register dst, Register src1, Register src2, Register tmp = t0);
#ifdef ASSERT // Template short-hand support to clean-up after a failed call to trampoline // call generation (see trampoline_call() below), when a set of Labels must // be reset (before returning). template<typename Label, typename... More> void reset_labels(Label& lbl, More&... more) {
lbl.reset(); reset_labels(more...);
}
--> --------------------
--> maximum size reached
--> --------------------
¤ Dauer der Verarbeitung: 0.18 Sekunden
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung ist noch experimentell.