Quellcode-Bibliothek
© Kompilation durch diese Firma
[Weder Korrektheit noch Funktionsfähigkeit der Software werden zugesichert.]
Datei:
c1_IR.hpp
Sprache: HTML
Untersuchungsergebnis.ad Download desSML {SML[185] C[216] BAT[496]}zum Wurzelverzeichnis wechseln //
// Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved.
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
//
// This code is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License version 2 only, as
// published by the Free Software Foundation.
//
// This code is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// version 2 for more details (a copy is included in the LICENSE file that
// accompanied this code).
//
// You should have received a copy of the GNU General Public License version
// 2 along with this work; if not, write to the Free Software Foundation,
// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
//
// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
// or visit www.oracle.com if you need additional information or have any
// questions.
//
//
// X86 Architecture Description File
//----------REGISTER DEFINITION BLOCK------------------------------------------
// This information is used by the matcher and the register allocator to
// describe individual registers and classes of registers within the target
// architecture.
register %{
//----------Architecture Description Register Definitions----------------------
// General Registers
// "reg_def" name ( register save type, C convention save type,
// ideal register type, encoding );
// Register Save Types:
//
// NS = No-Save: The register allocator assumes that these registers
// can be used without saving upon entry to the method, &
// that they do not need to be saved at call sites.
//
// SOC = Save-On-Call: The register allocator assumes that these registers
// can be used without saving upon entry to the method,
// but that they must be saved at call sites.
//
// SOE = Save-On-Entry: The register allocator assumes that these registers
// must be saved before using them upon entry to the
// method, but they do not need to be saved at call
// sites.
//
// AS = Always-Save: The register allocator assumes that these registers
// must be saved before using them upon entry to the
// method, & that they must be saved at call sites.
//
// Ideal Register Type is used to determine how to save & restore a
// register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
// spilled with LoadP/StoreP. If the register supports both, use Op_RegI.
//
// The encoding number is the actual bit-pattern placed into the opcodes.
// General Registers
// Previously set EBX, ESI, and EDI as save-on-entry for java code
// Turn off SOE in java-code due to frequent use of uncommon-traps.
// Now that allocator is better, turn on ESI and EDI as SOE registers.
reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
// now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
reg_def ESP( NS, NS, Op_RegI, 4, rsp->as_VMReg());
// Float registers. We treat TOS/FPR0 special. It is invisible to the
// allocator, and only shows up in the encodings.
reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
// Ok so here's the trick FPR1 is really st(0) except in the midst
// of emission of assembly for a machnode. During the emission the fpu stack
// is pushed making FPR1 == st(1) temporarily. However at any safepoint
// the stack will not have this element so FPR1 == st(0) from the
// oopMap viewpoint. This same weirdness with numbering causes
// instruction encoding to have to play games with the register
// encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
// where it does flt->flt moves to see an example
//
reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
//
// Empty fill registers, which are never used, but supply alignment to xmm regs
//
reg_def FILL0( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad());
reg_def FILL1( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad());
reg_def FILL2( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad());
reg_def FILL3( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad());
reg_def FILL4( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad());
reg_def FILL5( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad());
reg_def FILL6( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad());
reg_def FILL7( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad());
// Specify priority of register selection within phases of register
// allocation. Highest priority is first. A useful heuristic is to
// give registers a low priority when they are required by machine
// instructions, like EAX and EDX. Registers which are used as
// pairs must fall on an even boundary (witness the FPR#L's in this list).
// For the Intel integer registers, the equivalent Long pairs are
// EDX:EAX, EBX:ECX, and EDI:EBP.
alloc_class chunk0( ECX, EBX, EBP, EDI, EAX, EDX, ESI, ESP,
FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
FPR6L, FPR6H, FPR7L, FPR7H,
FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7);
//----------Architecture Description Register Classes--------------------------
// Several register classes are automatically defined based upon information in
// this architecture description.
// 1) reg_class inline_cache_reg ( /* as def'd in frame section */ )
// 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
//
// Class for no registers (empty set).
reg_class no_reg();
// Class for all registers
reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
// Class for all registers (excluding EBP)
reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP);
// Dynamic register class that selects at runtime between register classes
// any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer).
// Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg;
reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %});
// Class for general registers
reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
// Class for general registers (excluding EBP).
// It is also safe for use by tailjumps (we don't want to allocate in ebp).
// Used also if the PreserveFramePointer flag is true.
reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX);
// Dynamic register class that selects between int_reg and int_reg_no_ebp.
reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %});
// Class of "X" registers
reg_class int_x_reg(EBX, ECX, EDX, EAX);
// Class of registers that can appear in an address with no offset.
// EBP and ESP require an extra instruction byte for zero offset.
// Used in fast-unlock
reg_class p_reg(EDX, EDI, ESI, EBX);
// Class for general registers excluding ECX
reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX);
// Class for general registers excluding ECX (and EBP)
reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX);
// Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp.
reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %});
// Class for general registers excluding EAX
reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
// Class for general registers excluding EAX and EBX.
reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP);
// Class for general registers excluding EAX and EBX (and EBP)
reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX);
// Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp.
reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %});
// Class of EAX (for multiply and divide operations)
reg_class eax_reg(EAX);
// Class of EBX (for atomic add)
reg_class ebx_reg(EBX);
// Class of ECX (for shift and JCXZ operations and cmpLTMask)
reg_class ecx_reg(ECX);
// Class of EDX (for multiply and divide operations)
reg_class edx_reg(EDX);
// Class of EDI (for synchronization)
reg_class edi_reg(EDI);
// Class of ESI (for synchronization)
reg_class esi_reg(ESI);
// Singleton class for stack pointer
reg_class sp_reg(ESP);
// Singleton class for instruction pointer
// reg_class ip_reg(EIP);
// Class of integer register pairs
reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI );
// Class of integer register pairs (excluding EBP and EDI);
reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX );
// Dynamic register class that selects between long_reg and long_reg_no_ebp.
reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %});
// Class of integer register pairs that aligns with calling convention
reg_class eadx_reg( EAX,EDX );
reg_class ebcx_reg( ECX,EBX );
reg_class ebpd_reg( EBP,EDI );
// Not AX or DX, used in divides
reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP);
// Not AX or DX (and neither EBP), used in divides
reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI);
// Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp.
reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %});
// Floating point registers. Notice FPR0 is not a choice.
// FPR0 is not ever allocated; we use clever encodings to fake
// a 2-address instructions out of Intels FP stack.
reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
FPR7L,FPR7H );
reg_class fp_flt_reg0( FPR1L );
reg_class fp_dbl_reg0( FPR1L,FPR1H );
reg_class fp_dbl_reg1( FPR2L,FPR2H );
reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
%}
//----------SOURCE BLOCK-------------------------------------------------------
// This is a block of C++ code which provides values, functions, and
// definitions necessary in the rest of the architecture description
source_hpp %{
// Must be visible to the DFA in dfa_x86_32.cpp
extern bool is_operand_hi32_zero(Node* n);
%}
source %{
#define RELOC_IMM32 Assembler::imm_operand
#define RELOC_DISP32 Assembler::disp32_operand
#define __ _masm.
// How to find the high register of a Long pair, given the low register
#define HIGH_FROM_LOW(x) (as_Register((x)->encoding()+2))
#define HIGH_FROM_LOW_ENC(x) ((x)+2)
// These masks are used to provide 128-bit aligned bitmasks to the XMM
// instructions, to allow sign-masking or sign-bit flipping. They allow
// fast versions of NegF/NegD and AbsF/AbsD.
void reg_mask_init() {}
// Note: 'double' and 'long long' have 32-bits alignment on x86.
static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
// Use the expression (adr)&(~0xF) to provide 128-bits aligned address
// of 128-bits operands for SSE instructions.
jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
// Store the value to a 128-bits operand.
operand[0] = lo;
operand[1] = hi;
return operand;
}
// Buffer for 128-bits masks used by SSE instructions.
static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
// Static initialization during VM startup.
static jlong *float_signmask_pool = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
static jlong *float_signflip_pool = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
// Offset hacking within calls.
static int pre_call_resets_size() {
int size = 0;
Compile* C = Compile::current();
if (C->in_24_bit_fp_mode()) {
size += 6; // fldcw
}
if (VM_Version::supports_vzeroupper()) {
size += 3; // vzeroupper
}
return size;
}
// !!!!! Special hack to get all type of calls to specify the byte offset
// from the start of the call to the point where the return address
// will point.
int MachCallStaticJavaNode::ret_addr_offset() {
return 5 + pre_call_resets_size(); // 5 bytes from start of call to where return address points
}
int MachCallDynamicJavaNode::ret_addr_offset() {
return 10 + pre_call_resets_size(); // 10 bytes from start of call to where return address points
}
static int sizeof_FFree_Float_Stack_All = -1;
int MachCallRuntimeNode::ret_addr_offset() {
assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
return 5 + pre_call_resets_size() + (_leaf_no_fp ? 0 : sizeof_FFree_Float_Stack_All);
}
//
// Compute padding required for nodes which need alignment
//
// The address of the call instruction needs to be 4-byte aligned to
// ensure that it does not span a cache line so that it can be patched.
int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
current_offset += pre_call_resets_size(); // skip fldcw, if any
current_offset += 1; // skip call opcode byte
return align_up(current_offset, alignment_required()) - current_offset;
}
// The address of the call instruction needs to be 4-byte aligned to
// ensure that it does not span a cache line so that it can be patched.
int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
current_offset += pre_call_resets_size(); // skip fldcw, if any
current_offset += 5; // skip MOV instruction
current_offset += 1; // skip call opcode byte
return align_up(current_offset, alignment_required()) - current_offset;
}
// EMIT_RM()
void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
cbuf.insts()->emit_int8(c);
}
// EMIT_CC()
void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
unsigned char c = (unsigned char)( f1 | f2 );
cbuf.insts()->emit_int8(c);
}
// EMIT_OPCODE()
void emit_opcode(CodeBuffer &cbuf, int code) {
cbuf.insts()->emit_int8((unsigned char) code);
}
// EMIT_OPCODE() w/ relocation information
void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) {
cbuf.relocate(cbuf.insts_mark() + offset, reloc);
emit_opcode(cbuf, code);
}
// EMIT_D8()
void emit_d8(CodeBuffer &cbuf, int d8) {
cbuf.insts()->emit_int8((unsigned char) d8);
}
// EMIT_D16()
void emit_d16(CodeBuffer &cbuf, int d16) {
cbuf.insts()->emit_int16(d16);
}
// EMIT_D32()
void emit_d32(CodeBuffer &cbuf, int d32) {
cbuf.insts()->emit_int32(d32);
}
// emit 32 bit value and construct relocation entry from relocInfo::relocType
void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc,
int format) {
cbuf.relocate(cbuf.insts_mark(), reloc, format);
cbuf.insts()->emit_int32(d32);
}
// emit 32 bit value and construct relocation entry from RelocationHolder
void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec,
int format) {
#ifdef ASSERT
if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
assert(oopDesc::is_oop(cast_to_oop(d32)), "cannot embed broken oops in code");
}
#endif
cbuf.relocate(cbuf.insts_mark(), rspec, format);
cbuf.insts()->emit_int32(d32);
}
// Access stack slot for load or store
void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) {
emit_opcode( cbuf, opcode ); // (e.g., FILD [ESP+src])
if( -128 <= disp && disp <= 127 ) {
emit_rm( cbuf, 0x01, rm_field, ESP_enc ); // R/M byte
emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte
emit_d8 (cbuf, disp); // Displacement // R/M byte
} else {
emit_rm( cbuf, 0x02, rm_field, ESP_enc ); // R/M byte
emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte
emit_d32(cbuf, disp); // Displacement // R/M byte
}
}
// rRegI ereg, memory mem) %{ // emit_reg_mem
void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) {
// There is no index & no scale, use form without SIB byte
if ((index == 0x4) &&
(scale == 0) && (base != ESP_enc)) {
// If no displacement, mode is 0x0; unless base is [EBP]
if ( (displace == 0) && (base != EBP_enc) ) {
emit_rm(cbuf, 0x0, reg_encoding, base);
}
else { // If 8-bit displacement, mode 0x1
if ((displace >= -128) && (displace <= 127)
&& (disp_reloc == relocInfo::none) ) {
emit_rm(cbuf, 0x1, reg_encoding, base);
emit_d8(cbuf, displace);
}
else { // If 32-bit displacement
if (base == -1) { // Special flag for absolute address
emit_rm(cbuf, 0x0, reg_encoding, 0x5);
// (manual lies; no SIB needed here)
if ( disp_reloc != relocInfo::none ) {
emit_d32_reloc(cbuf, displace, disp_reloc, 1);
} else {
emit_d32 (cbuf, displace);
}
}
else { // Normal base + offset
emit_rm(cbuf, 0x2, reg_encoding, base);
if ( disp_reloc != relocInfo::none ) {
emit_d32_reloc(cbuf, displace, disp_reloc, 1);
} else {
emit_d32 (cbuf, displace);
}
}
}
}
}
else { // Else, encode with the SIB byte
// If no displacement, mode is 0x0; unless base is [EBP]
if (displace == 0 && (base != EBP_enc)) { // If no displacement
emit_rm(cbuf, 0x0, reg_encoding, 0x4);
emit_rm(cbuf, scale, index, base);
}
else { // If 8-bit displacement, mode 0x1
if ((displace >= -128) && (displace <= 127)
&& (disp_reloc == relocInfo::none) ) {
emit_rm(cbuf, 0x1, reg_encoding, 0x4);
emit_rm(cbuf, scale, index, base);
emit_d8(cbuf, displace);
}
else { // If 32-bit displacement
if (base == 0x04 ) {
emit_rm(cbuf, 0x2, reg_encoding, 0x4);
emit_rm(cbuf, scale, index, 0x04);
} else {
emit_rm(cbuf, 0x2, reg_encoding, 0x4);
emit_rm(cbuf, scale, index, base);
}
if ( disp_reloc != relocInfo::none ) {
emit_d32_reloc(cbuf, displace, disp_reloc, 1);
} else {
emit_d32 (cbuf, displace);
}
}
}
}
}
void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
if( dst_encoding == src_encoding ) {
// reg-reg copy, use an empty encoding
} else {
emit_opcode( cbuf, 0x8B );
emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
}
}
void emit_cmpfp_fixup(MacroAssembler& _masm) {
Label exit;
__ jccb(Assembler::noParity, exit);
__ pushf();
//
// comiss/ucomiss instructions set ZF,PF,CF flags and
// zero OF,AF,SF for NaN values.
// Fixup flags by zeroing ZF,PF so that compare of NaN
// values returns 'less than' result (CF is set).
// Leave the rest of flags unchanged.
//
// 7 6 5 4 3 2 1 0
// |S|Z|r|A|r|P|r|C| (r - reserved bit)
// 0 0 1 0 1 0 1 1 (0x2B)
//
__ andl(Address(rsp, 0), 0xffffff2b);
__ popf();
__ bind(exit);
}
void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
Label done;
__ movl(dst, -1);
__ jcc(Assembler::parity, done);
__ jcc(Assembler::below, done);
__ setb(Assembler::notEqual, dst);
__ movzbl(dst, dst);
__ bind(done);
}
//=============================================================================
const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
int ConstantTable::calculate_table_base_offset() const {
return 0; // absolute addressing, no offset
}
bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
ShouldNotReachHere();
}
void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
// Empty encoding
}
uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
return 0;
}
#ifndef PRODUCT
void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
st->print("# MachConstantBaseNode (empty encoding)");
}
#endif
//=============================================================================
#ifndef PRODUCT
void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
Compile* C = ra_->C;
int framesize = C->output()->frame_size_in_bytes();
int bangsize = C->output()->bang_size_in_bytes();
assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
// Remove wordSize for return addr which is already pushed.
framesize -= wordSize;
if (C->output()->need_stack_bang(bangsize)) {
framesize -= wordSize;
st->print("# stack bang (%d bytes)", bangsize);
st->print("\n\t");
st->print("PUSH EBP\t# Save EBP");
if (PreserveFramePointer) {
st->print("\n\t");
st->print("MOV EBP, ESP\t# Save the caller's SP into EBP");
}
if (framesize) {
st->print("\n\t");
st->print("SUB ESP, #%d\t# Create frame",framesize);
}
} else {
st->print("SUB ESP, #%d\t# Create frame",framesize);
st->print("\n\t");
framesize -= wordSize;
st->print("MOV [ESP + #%d], EBP\t# Save EBP",framesize);
if (PreserveFramePointer) {
st->print("\n\t");
st->print("MOV EBP, ESP\t# Save the caller's SP into EBP");
if (framesize > 0) {
st->print("\n\t");
st->print("ADD EBP, #%d", framesize);
}
}
}
if (VerifyStackAtCalls) {
st->print("\n\t");
framesize -= wordSize;
st->print("MOV [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize);
}
if( C->in_24_bit_fp_mode() ) {
st->print("\n\t");
st->print("FLDCW \t# load 24 bit fpu control word");
}
if (UseSSE >= 2 && VerifyFPU) {
st->print("\n\t");
st->print("# verify FPU stack (must be clean on entry)");
}
#ifdef ASSERT
if (VerifyStackAtCalls) {
st->print("\n\t");
st->print("# stack alignment check");
}
#endif
st->cr();
}
#endif
void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
Compile* C = ra_->C;
C2_MacroAssembler _masm(&cbuf);
int framesize = C->output()->frame_size_in_bytes();
int bangsize = C->output()->bang_size_in_bytes();
__ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode(), C->stub_function() != NULL);
C->output()->set_frame_complete(cbuf.insts_size());
if (C->has_mach_constant_base_node()) {
// NOTE: We set the table base offset here because users might be
// emitted before MachConstantBaseNode.
ConstantTable& constant_table = C->output()->constant_table();
constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
}
}
uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
return MachNode::size(ra_); // too many variables; just compute it the hard way
}
int MachPrologNode::reloc() const {
return 0; // a large enough number
}
//=============================================================================
#ifndef PRODUCT
void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
Compile *C = ra_->C;
int framesize = C->output()->frame_size_in_bytes();
assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
// Remove two words for return addr and rbp,
framesize -= 2*wordSize;
if (C->max_vector_size() > 16) {
st->print("VZEROUPPER");
st->cr(); st->print("\t");
}
if (C->in_24_bit_fp_mode()) {
st->print("FLDCW standard control word");
st->cr(); st->print("\t");
}
if (framesize) {
st->print("ADD ESP,%d\t# Destroy frame",framesize);
st->cr(); st->print("\t");
}
st->print_cr("POPL EBP"); st->print("\t");
if (do_polling() && C->is_method_compilation()) {
st->print("CMPL rsp, poll_offset[thread] \n\t"
"JA #safepoint_stub\t"
"# Safepoint: poll for GC");
}
}
#endif
void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
Compile *C = ra_->C;
MacroAssembler _masm(&cbuf);
if (C->max_vector_size() > 16) {
// Clear upper bits of YMM registers when current compiled code uses
// wide vectors to avoid AVX <-> SSE transition penalty during call.
_masm.vzeroupper();
}
// If method set FPU control word, restore to standard control word
if (C->in_24_bit_fp_mode()) {
_masm.fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
}
int framesize = C->output()->frame_size_in_bytes();
assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
// Remove two words for return addr and rbp,
framesize -= 2*wordSize;
// Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
if (framesize >= 128) {
emit_opcode(cbuf, 0x81); // add SP, #framesize
emit_rm(cbuf, 0x3, 0x00, ESP_enc);
emit_d32(cbuf, framesize);
} else if (framesize) {
emit_opcode(cbuf, 0x83); // add SP, #framesize
emit_rm(cbuf, 0x3, 0x00, ESP_enc);
emit_d8(cbuf, framesize);
}
emit_opcode(cbuf, 0x58 | EBP_enc);
if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
__ reserved_stack_check();
}
if (do_polling() && C->is_method_compilation()) {
Register thread = as_Register(EBX_enc);
MacroAssembler masm(&cbuf);
__ get_thread(thread);
Label dummy_label;
Label* code_stub = &dummy_label;
if (!C->output()->in_scratch_emit_size()) {
code_stub = &C->output()->safepoint_poll_table()->add_safepoint(__ offset());
}
__ relocate(relocInfo::poll_return_type);
__ safepoint_poll(*code_stub, thread, true /* at_return */, true /* in_nmethod */);
}
}
uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
return MachNode::size(ra_); // too many variables; just compute it
// the hard way
}
int MachEpilogNode::reloc() const {
return 0; // a large enough number
}
const Pipeline * MachEpilogNode::pipeline() const {
return MachNode::pipeline_class();
}
//=============================================================================
enum RC { rc_bad, rc_int, rc_kreg, rc_float, rc_xmm, rc_stack };
static enum RC rc_class( OptoReg::Name reg ) {
if( !OptoReg::is_valid(reg) ) return rc_bad;
if (OptoReg::is_stack(reg)) return rc_stack;
VMReg r = OptoReg::as_VMReg(reg);
if (r->is_Register()) return rc_int;
if (r->is_FloatRegister()) {
assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
return rc_float;
}
if (r->is_KRegister()) return rc_kreg;
assert(r->is_XMMRegister(), "must be");
return rc_xmm;
}
static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg,
int opcode, const char *op_str, int size, outputStream* st ) {
if( cbuf ) {
emit_opcode (*cbuf, opcode );
encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none);
#ifndef PRODUCT
} else if( !do_size ) {
if( size != 0 ) st->print("\n\t");
if( opcode == 0x8B || opcode == 0x89 ) { // MOV
if( is_load ) st->print("%s %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
else st->print("%s [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
} else { // FLD, FST, PUSH, POP
st->print("%s [ESP + #%d]",op_str,offset);
}
#endif
}
int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
return size+3+offset_size;
}
// Helper for XMM registers. Extra opcode bits, limited syntax.
static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
int in_size_in_bits = Assembler::EVEX_32bit;
int evex_encoding = 0;
if (reg_lo+1 == reg_hi) {
in_size_in_bits = Assembler::EVEX_64bit;
evex_encoding = Assembler::VEX_W;
}
if (cbuf) {
MacroAssembler _masm(cbuf);
// EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations,
// it maps more cases to single byte displacement
_masm.set_managed();
if (reg_lo+1 == reg_hi) { // double move?
if (is_load) {
__ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
} else {
__ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
}
} else {
if (is_load) {
__ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
} else {
__ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
}
}
#ifndef PRODUCT
} else if (!do_size) {
if (size != 0) st->print("\n\t");
if (reg_lo+1 == reg_hi) { // double move?
if (is_load) st->print("%s %s,[ESP + #%d]",
UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
Matcher::regName[reg_lo], offset);
else st->print("MOVSD [ESP + #%d],%s",
offset, Matcher::regName[reg_lo]);
} else {
if (is_load) st->print("MOVSS %s,[ESP + #%d]",
Matcher::regName[reg_lo], offset);
else st->print("MOVSS [ESP + #%d],%s",
offset, Matcher::regName[reg_lo]);
}
#endif
}
bool is_single_byte = false;
if ((UseAVX > 2) && (offset != 0)) {
is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding);
}
int offset_size = 0;
if (UseAVX > 2 ) {
offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
} else {
offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
}
size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX
// VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
return size+5+offset_size;
}
static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
int src_hi, int dst_hi, int size, outputStream* st ) {
if (cbuf) {
MacroAssembler _masm(cbuf);
// EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
_masm.set_managed();
if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
__ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
as_XMMRegister(Matcher::_regEncode[src_lo]));
} else {
__ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]),
as_XMMRegister(Matcher::_regEncode[src_lo]));
}
#ifndef PRODUCT
} else if (!do_size) {
if (size != 0) st->print("\n\t");
if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers
if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
} else {
st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
}
} else {
if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
st->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
} else {
st->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
}
}
#endif
}
// VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
// Only MOVAPS SSE prefix uses 1 byte. EVEX uses an additional 2 bytes.
int sz = (UseAVX > 2) ? 6 : 4;
if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3;
return size + sz;
}
static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
int src_hi, int dst_hi, int size, outputStream* st ) {
// 32-bit
if (cbuf) {
MacroAssembler _masm(cbuf);
// EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
_masm.set_managed();
__ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
as_Register(Matcher::_regEncode[src_lo]));
#ifndef PRODUCT
} else if (!do_size) {
st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
#endif
}
return (UseAVX> 2) ? 6 : 4;
}
static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
int src_hi, int dst_hi, int size, outputStream* st ) {
// 32-bit
if (cbuf) {
MacroAssembler _masm(cbuf);
// EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
_masm.set_managed();
__ movdl(as_Register(Matcher::_regEncode[dst_lo]),
as_XMMRegister(Matcher::_regEncode[src_lo]));
#ifndef PRODUCT
} else if (!do_size) {
st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
#endif
}
return (UseAVX> 2) ? 6 : 4;
}
static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
if( cbuf ) {
emit_opcode(*cbuf, 0x8B );
emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
#ifndef PRODUCT
} else if( !do_size ) {
if( size != 0 ) st->print("\n\t");
st->print("MOV %s,%s",Matcher::regName[dst],Matcher::regName[src]);
#endif
}
return size+2;
}
static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
int offset, int size, outputStream* st ) {
if( src_lo != FPR1L_num ) { // Move value to top of FP stack, if not already there
if( cbuf ) {
emit_opcode( *cbuf, 0xD9 ); // FLD (i.e., push it)
emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] );
#ifndef PRODUCT
} else if( !do_size ) {
if( size != 0 ) st->print("\n\t");
st->print("FLD %s",Matcher::regName[src_lo]);
#endif
}
size += 2;
}
int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
const char *op_str;
int op;
if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
op = 0xDD;
} else { // 32-bit store
op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
op = 0xD9;
assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
}
return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);
}
// Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
static void vec_mov_helper(CodeBuffer *cbuf, int src_lo, int dst_lo,
int src_hi, int dst_hi, uint ireg, outputStream* st);
void vec_spill_helper(CodeBuffer *cbuf, bool is_load,
int stack_offset, int reg, uint ireg, outputStream* st);
static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset,
int dst_offset, uint ireg, outputStream* st) {
if (cbuf) {
MacroAssembler _masm(cbuf);
switch (ireg) {
case Op_VecS:
__ pushl(Address(rsp, src_offset));
__ popl (Address(rsp, dst_offset));
break;
case Op_VecD:
__ pushl(Address(rsp, src_offset));
__ popl (Address(rsp, dst_offset));
__ pushl(Address(rsp, src_offset+4));
__ popl (Address(rsp, dst_offset+4));
break;
case Op_VecX:
__ movdqu(Address(rsp, -16), xmm0);
__ movdqu(xmm0, Address(rsp, src_offset));
__ movdqu(Address(rsp, dst_offset), xmm0);
__ movdqu(xmm0, Address(rsp, -16));
break;
case Op_VecY:
__ vmovdqu(Address(rsp, -32), xmm0);
__ vmovdqu(xmm0, Address(rsp, src_offset));
__ vmovdqu(Address(rsp, dst_offset), xmm0);
__ vmovdqu(xmm0, Address(rsp, -32));
break;
case Op_VecZ:
__ evmovdquq(Address(rsp, -64), xmm0, 2);
__ evmovdquq(xmm0, Address(rsp, src_offset), 2);
__ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
__ evmovdquq(xmm0, Address(rsp, -64), 2);
break;
default:
ShouldNotReachHere();
}
#ifndef PRODUCT
} else {
switch (ireg) {
case Op_VecS:
st->print("pushl [rsp + #%d]\t# 32-bit mem-mem spill\n\t"
"popl [rsp + #%d]",
src_offset, dst_offset);
break;
case Op_VecD:
st->print("pushl [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
"popq [rsp + #%d]\n\t"
"pushl [rsp + #%d]\n\t"
"popq [rsp + #%d]",
src_offset, dst_offset, src_offset+4, dst_offset+4);
break;
case Op_VecX:
st->print("movdqu [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t"
"movdqu xmm0, [rsp + #%d]\n\t"
"movdqu [rsp + #%d], xmm0\n\t"
"movdqu xmm0, [rsp - #16]",
src_offset, dst_offset);
break;
case Op_VecY:
st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
"vmovdqu xmm0, [rsp + #%d]\n\t"
"vmovdqu [rsp + #%d], xmm0\n\t"
"vmovdqu xmm0, [rsp - #32]",
src_offset, dst_offset);
break;
case Op_VecZ:
st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
"vmovdqu xmm0, [rsp + #%d]\n\t"
"vmovdqu [rsp + #%d], xmm0\n\t"
"vmovdqu xmm0, [rsp - #64]",
src_offset, dst_offset);
break;
default:
ShouldNotReachHere();
}
#endif
}
}
uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
// Get registers to move
OptoReg::Name src_second = ra_->get_reg_second(in(1));
OptoReg::Name src_first = ra_->get_reg_first(in(1));
OptoReg::Name dst_second = ra_->get_reg_second(this );
OptoReg::Name dst_first = ra_->get_reg_first(this );
enum RC src_second_rc = rc_class(src_second);
enum RC src_first_rc = rc_class(src_first);
enum RC dst_second_rc = rc_class(dst_second);
enum RC dst_first_rc = rc_class(dst_first);
assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
// Generate spill code!
int size = 0;
if( src_first == dst_first && src_second == dst_second )
return size; // Self copy, no move
if (bottom_type()->isa_vect() != NULL && bottom_type()->isa_vectmask() == NULL) {
uint ireg = ideal_reg();
assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity");
assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
// mem -> mem
int src_offset = ra_->reg2offset(src_first);
int dst_offset = ra_->reg2offset(dst_first);
vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st);
} else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
vec_mov_helper(cbuf, src_first, dst_first, src_second, dst_second, ireg, st);
} else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
int stack_offset = ra_->reg2offset(dst_first);
vec_spill_helper(cbuf, false, stack_offset, src_first, ireg, st);
} else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
int stack_offset = ra_->reg2offset(src_first);
vec_spill_helper(cbuf, true, stack_offset, dst_first, ireg, st);
} else {
ShouldNotReachHere();
}
return 0;
}
// --------------------------------------
// Check for mem-mem move. push/pop to move.
if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
if( src_second == dst_first ) { // overlapping stack copy ranges
assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st);
size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st);
src_second_rc = dst_second_rc = rc_bad; // flag as already moved the second bits
}
// move low bits
size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH ",size, st);
size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP ",size, st);
if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st);
size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st);
}
return size;
}
// --------------------------------------
// Check for integer reg-reg copy
if( src_first_rc == rc_int && dst_first_rc == rc_int )
size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st);
// Check for integer store
if( src_first_rc == rc_int && dst_first_rc == rc_stack )
size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
// Check for integer load
if( src_first_rc == rc_stack && dst_first_rc == rc_int )
size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
// Check for integer reg-xmm reg copy
if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
"no 64 bit integer-float reg moves" );
return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
}
// --------------------------------------
// Check for float reg-reg copy
if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
(src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
if( cbuf ) {
// Note the mucking with the register encode to compensate for the 0/1
// indexing issue mentioned in a comment in the reg_def sections
// for FPR registers many lines above here.
if( src_first != FPR1L_num ) {
emit_opcode (*cbuf, 0xD9 ); // FLD ST(i)
emit_d8 (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 );
emit_opcode (*cbuf, 0xDD ); // FSTP ST(i)
emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
} else {
emit_opcode (*cbuf, 0xDD ); // FST ST(i)
emit_d8 (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 );
}
#ifndef PRODUCT
} else if( !do_size ) {
if( size != 0 ) st->print("\n\t");
if( src_first != FPR1L_num ) st->print("FLD %s\n\tFSTP %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
else st->print( "FST %s", Matcher::regName[dst_first]);
#endif
}
return size + ((src_first != FPR1L_num) ? 2+2 : 2);
}
// Check for float store
if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
}
// Check for float load
if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
int offset = ra_->reg2offset(src_first);
const char *op_str;
int op;
if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
op_str = "FLD_D";
op = 0xDD;
} else { // 32-bit load
op_str = "FLD_S";
op = 0xD9;
assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
}
if( cbuf ) {
emit_opcode (*cbuf, op );
encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none);
emit_opcode (*cbuf, 0xDD ); // FSTP ST(i)
emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
#ifndef PRODUCT
} else if( !do_size ) {
if( size != 0 ) st->print("\n\t");
st->print("%s ST,[ESP + #%d]\n\tFSTP %s",op_str, offset,Matcher::regName[dst_first]);
#endif
}
int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
return size + 3+offset_size+2;
}
// Check for xmm reg-reg copy
if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
(src_first+1 == src_second && dst_first+1 == dst_second),
"no non-adjacent float-moves" );
return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
}
// Check for xmm reg-integer reg copy
if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
"no 64 bit float-integer reg moves" );
return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
}
// Check for xmm store
if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first), src_first, src_second, size, st);
}
// Check for float xmm load
if( src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
}
// Copy from float reg to xmm reg
if( src_first_rc == rc_float && dst_first_rc == rc_xmm ) {
// copy to the top of stack from floating point reg
// and use LEA to preserve flags
if( cbuf ) {
emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP-8]
emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
emit_d8(*cbuf,0xF8);
#ifndef PRODUCT
} else if( !do_size ) {
if( size != 0 ) st->print("\n\t");
st->print("LEA ESP,[ESP-8]");
#endif
}
size += 4;
size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
// Copy from the temp memory to the xmm reg.
size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st);
if( cbuf ) {
emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP+8]
emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
emit_d8(*cbuf,0x08);
#ifndef PRODUCT
} else if( !do_size ) {
if( size != 0 ) st->print("\n\t");
st->print("LEA ESP,[ESP+8]");
#endif
}
size += 4;
return size;
}
// AVX-512 opmask specific spilling.
if (src_first_rc == rc_stack && dst_first_rc == rc_kreg) {
assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
MacroAssembler _masm(cbuf);
int offset = ra_->reg2offset(src_first);
__ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
return 0;
}
if (src_first_rc == rc_kreg && dst_first_rc == rc_stack) {
assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
MacroAssembler _masm(cbuf);
int offset = ra_->reg2offset(dst_first);
__ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
return 0;
}
if (src_first_rc == rc_kreg && dst_first_rc == rc_int) {
Unimplemented();
return 0;
}
if (src_first_rc == rc_int && dst_first_rc == rc_kreg) {
Unimplemented();
return 0;
}
if (src_first_rc == rc_kreg && dst_first_rc == rc_kreg) {
assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
MacroAssembler _masm(cbuf);
__ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
return 0;
}
assert( size > 0, "missed a case" );
// --------------------------------------------------------------------
// Check for second bits still needing moving.
if( src_second == dst_second )
return size; // Self copy; no move
assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
// Check for second word int-int move
if( src_second_rc == rc_int && dst_second_rc == rc_int )
return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st);
// Check for second word integer store
if( src_second_rc == rc_int && dst_second_rc == rc_stack )
return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
// Check for second word integer load
if( dst_second_rc == rc_int && src_second_rc == rc_stack )
return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
Unimplemented();
return 0; // Mute compiler
}
#ifndef PRODUCT
void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
implementation( NULL, ra_, false, st );
}
#endif
void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
implementation( &cbuf, ra_, false, NULL );
}
uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
return MachNode::size(ra_);
}
//=============================================================================
#ifndef PRODUCT
void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
int reg = ra_->get_reg_first(this);
st->print("LEA %s,[ESP + #%d]",Matcher::regName[reg],offset);
}
#endif
void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
int reg = ra_->get_encode(this);
if( offset >= 128 ) {
emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset]
emit_rm(cbuf, 0x2, reg, 0x04);
emit_rm(cbuf, 0x0, 0x04, ESP_enc);
emit_d32(cbuf, offset);
}
else {
emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset]
emit_rm(cbuf, 0x1, reg, 0x04);
emit_rm(cbuf, 0x0, 0x04, ESP_enc);
emit_d8(cbuf, offset);
}
}
uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
if( offset >= 128 ) {
return 7;
}
else {
return 4;
}
}
//=============================================================================
#ifndef PRODUCT
void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
st->print_cr( "CMP EAX,[ECX+4]\t# Inline cache check");
st->print_cr("\tJNE SharedRuntime::handle_ic_miss_stub");
st->print_cr("\tNOP");
st->print_cr("\tNOP");
if( !OptoBreakpoint )
st->print_cr("\tNOP");
}
#endif
void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
MacroAssembler masm(&cbuf);
#ifdef ASSERT
uint insts_size = cbuf.insts_size();
#endif
masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
masm.jump_cc(Assembler::notEqual,
RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
/* WARNING these NOPs are critical so that verified entry point is properly
aligned for patching by NativeJump::patch_verified_entry() */
int nops_cnt = 2;
if( !OptoBreakpoint ) // Leave space for int3
nops_cnt += 1;
masm.nop(nops_cnt);
assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node");
}
uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
return OptoBreakpoint ? 11 : 12;
}
//=============================================================================
// Vector calling convention not supported.
const bool Matcher::supports_vector_calling_convention() {
return false;
}
OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
Unimplemented();
return OptoRegPair(0, 0);
}
// Is this branch offset short enough that a short branch can be used?
//
// NOTE: If the platform does not provide any short branch variants, then
// this method should return false for offset 0.
bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
// The passed offset is relative to address of the branch.
// On 86 a branch displacement is calculated relative to address
// of a next instruction.
offset -= br_size;
// the short version of jmpConUCF2 contains multiple branches,
// making the reach slightly less
if (rule == jmpConUCF2_rule)
return (-126 <= offset && offset <= 125);
return (-128 <= offset && offset <= 127);
}
// Return whether or not this register is ever used as an argument. This
// function is used on startup to build the trampoline stubs in generateOptoStub.
// Registers not mentioned will be killed by the VM call in the trampoline, and
// arguments in those registers not be available to the callee.
bool Matcher::can_be_java_arg( int reg ) {
if( reg == ECX_num || reg == EDX_num ) return true;
if( (reg == XMM0_num || reg == XMM1_num ) && UseSSE>=1 ) return true;
if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
return false;
}
bool Matcher::is_spillable_arg( int reg ) {
return can_be_java_arg(reg);
}
uint Matcher::int_pressure_limit()
{
return (INTPRESSURE == -1) ? 6 : INTPRESSURE;
}
uint Matcher::float_pressure_limit()
{
return (FLOATPRESSURE == -1) ? 6 : FLOATPRESSURE;
}
bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
// Use hardware integer DIV instruction when
// it is faster than a code which use multiply.
// Only when constant divisor fits into 32 bit
// (min_jint is excluded to get only correct
// positive 32 bit values from negative).
return VM_Version::has_fast_idiv() &&
(divisor == (int)divisor && divisor != min_jint);
}
// Register for DIVI projection of divmodI
RegMask Matcher::divI_proj_mask() {
return EAX_REG_mask();
}
// Register for MODI projection of divmodI
RegMask Matcher::modI_proj_mask() {
return EDX_REG_mask();
}
// Register for DIVL projection of divmodL
RegMask Matcher::divL_proj_mask() {
ShouldNotReachHere();
return RegMask();
}
// Register for MODL projection of divmodL
RegMask Matcher::modL_proj_mask() {
ShouldNotReachHere();
return RegMask();
}
const RegMask Matcher::method_handle_invoke_SP_save_mask() {
return NO_REG_mask();
}
// Returns true if the high 32 bits of the value is known to be zero.
bool is_operand_hi32_zero(Node* n) {
int opc = n->Opcode();
if (opc == Op_AndL) {
Node* o2 = n->in(2);
if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
return true;
}
}
if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
return true;
}
return false;
}
%}
//----------ENCODING BLOCK-----------------------------------------------------
// This block specifies the encoding classes used by the compiler to output
// byte streams. Encoding classes generate functions which are called by
// Machine Instruction Nodes in order to generate the bit encoding of the
// instruction. Operands specify their base encoding interface with the
// interface keyword. There are currently supported four interfaces,
// REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an
// operand to generate a function which returns its register number when
// queried. CONST_INTER causes an operand to generate a function which
// returns the value of the constant when queried. MEMORY_INTER causes an
// operand to generate four functions which return the Base Register, the
// Index Register, the Scale Value, and the Offset Value of the operand when
// queried. COND_INTER causes an operand to generate six functions which
// return the encoding code (ie - encoding bits for the instruction)
// associated with each basic boolean condition for a conditional instruction.
// Instructions specify two basic values for encoding. They use the
// ins_encode keyword to specify their encoding class (which must be one of
// the class names specified in the encoding block), and they use the
// opcode keyword to specify, in order, their primary, secondary, and
// tertiary opcode. Only the opcode sections which a particular instruction
// needs for encoding need to be specified.
encode %{
// Build emit functions for each basic byte or larger field in the intel
// encoding scheme (opcode, rm, sib, immediate), and call them from C++
// code in the enc_class source block. Emit functions will live in the
// main source block for now. In future, we can generalize this by
// adding a syntax that specifies the sizes of fields in an order,
// so that the adlc can build the emit functions automagically
// Emit primary opcode
enc_class OpcP %{
emit_opcode(cbuf, $primary);
%}
// Emit secondary opcode
enc_class OpcS %{
emit_opcode(cbuf, $secondary);
%}
// Emit opcode directly
enc_class Opcode(immI d8) %{
emit_opcode(cbuf, $d8$$constant);
%}
enc_class SizePrefix %{
emit_opcode(cbuf,0x66);
%}
enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many)
emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
%}
enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{ // OpcRegReg(Many)
emit_opcode(cbuf,$opcode$$constant);
emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
%}
enc_class mov_r32_imm0( rRegI dst ) %{
emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd -- MOV r32 ,imm32
emit_d32 ( cbuf, 0x0 ); // imm32==0x0
%}
enc_class cdq_enc %{
// Full implementation of Java idiv and irem; checks for
// special case as described in JVM spec., p.243 & p.271.
//
// normal case special case
//
// input : rax,: dividend min_int
// reg: divisor -1
//
// output: rax,: quotient (= rax, idiv reg) min_int
// rdx: remainder (= rax, irem reg) 0
//
// Code sequnce:
//
// 81 F8 00 00 00 80 cmp rax,80000000h
// 0F 85 0B 00 00 00 jne normal_case
// 33 D2 xor rdx,edx
// 83 F9 FF cmp rcx,0FFh
// 0F 84 03 00 00 00 je done
// normal_case:
// 99 cdq
// F7 F9 idiv rax,ecx
// done:
//
emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8);
emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);
emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80); // cmp rax,80000000h
emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85);
emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00);
emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // jne normal_case
emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2); // xor rdx,edx
emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh
emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84);
emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00);
emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // je done
// normal_case:
emit_opcode(cbuf,0x99); // cdq
// idiv (note: must be emitted by the user of this rule)
// normal:
%}
// Dense encoding for older common ops
enc_class Opc_plus(immI opcode, rRegI reg) %{
emit_opcode(cbuf, $opcode$$constant + $reg$$reg);
%}
// Opcde enc_class for 8/32 bit immediate instructions with sign-extension
enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
// Check for 8-bit immediate, and set sign extend bit in opcode
if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
emit_opcode(cbuf, $primary | 0x02);
}
else { // If 32-bit immediate
emit_opcode(cbuf, $primary);
}
%}
enc_class OpcSErm (rRegI dst, immI imm) %{ // OpcSEr/m
// Emit primary opcode and set sign-extend bit
// Check for 8-bit immediate, and set sign extend bit in opcode
if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
emit_opcode(cbuf, $primary | 0x02); }
else { // If 32-bit immediate
emit_opcode(cbuf, $primary);
}
// Emit r/m byte with secondary opcode, after primary opcode.
emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
%}
enc_class Con8or32 (immI imm) %{ // Con8or32(storeImmI), 8 or 32 bits
// Check for 8-bit immediate, and set sign extend bit in opcode
if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
$$$emit8$imm$$constant;
}
else { // If 32-bit immediate
// Output immediate
$$$emit32$imm$$constant;
}
%}
enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
// Emit primary opcode and set sign-extend bit
// Check for 8-bit immediate, and set sign extend bit in opcode
int con = (int)$imm$$constant; // Throw away top bits
emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
// Emit r/m byte with secondary opcode, after primary opcode.
emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
else emit_d32(cbuf,con);
%}
enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
// Emit primary opcode and set sign-extend bit
// Check for 8-bit immediate, and set sign extend bit in opcode
int con = (int)($imm$$constant >> 32); // Throw away bottom bits
emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
// Emit r/m byte with tertiary opcode, after primary opcode.
emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW_ENC($dst$$reg));
if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
else emit_d32(cbuf,con);
%}
enc_class OpcSReg (rRegI dst) %{ // BSWAP
emit_cc(cbuf, $secondary, $dst$$reg );
%}
enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
int destlo = $dst$$reg;
int desthi = HIGH_FROM_LOW_ENC(destlo);
// bswap lo
emit_opcode(cbuf, 0x0F);
emit_cc(cbuf, 0xC8, destlo);
// bswap hi
emit_opcode(cbuf, 0x0F);
emit_cc(cbuf, 0xC8, desthi);
// xchg lo and hi
emit_opcode(cbuf, 0x87);
emit_rm(cbuf, 0x3, destlo, desthi);
%}
enc_class RegOpc (rRegI div) %{ // IDIV, IMOD, JMP indirect, ...
emit_rm(cbuf, 0x3, $secondary, $div$$reg );
%}
enc_class enc_cmov(cmpOp cop ) %{ // CMOV
$$$emit8$primary;
emit_cc(cbuf, $secondary, $cop$$cmpcode);
%}
enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV
int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
emit_d8(cbuf, op >> 8 );
emit_d8(cbuf, op & 255);
%}
// emulate a CMOV with a conditional branch around a MOV
enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
// Invert sense of branch from sense of CMOV
emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) );
emit_d8( cbuf, $brOffs$$constant );
%}
enc_class enc_PartialSubtypeCheck( ) %{
Register Redi = as_Register(EDI_enc); // result register
Register Reax = as_Register(EAX_enc); // super class
Register Recx = as_Register(ECX_enc); // killed
Register Resi = as_Register(ESI_enc); // sub class
Label miss;
MacroAssembler _masm(&cbuf);
__ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
NULL, &miss,
--> --------------------
--> maximum size reached
--> --------------------
[ zur Elbe Produktseite wechseln0.352Quellennavigators
]
|
|