Quellcode-Bibliothek

^© Kompilation durch diese Firma

[Weder Korrektheit noch Funktionsfähigkeit der Software werden zugesichert.]

Datei: ciNullObject.cpp Sprache: C

/*
* Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/

#ifndef CPU_X86_ASSEMBLER_X86_HPP
#define CPU_X86_ASSEMBLER_X86_HPP

#include "asm/register.hpp"
#include "utilities/powerOfTwo.hpp"

// Contains all the definitions needed for x86 assembly code generation.

// Calling convention
class Argument {
public:
  enum {
#ifdef _LP64
#ifdef _WIN64
    n_int_register_parameters_c   = 4, // rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
    n_float_register_parameters_c = 4,  // xmm0 - xmm3 (c_farg0, c_farg1, ... )
    n_int_register_returns_c = 1, // rax
    n_float_register_returns_c = 1, // xmm0
#else
    n_int_register_parameters_c   = 6, // rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...)
    n_float_register_parameters_c = 8,  // xmm0 - xmm7 (c_farg0, c_farg1, ... )
    n_int_register_returns_c = 2, // rax, rdx
    n_float_register_returns_c = 2, // xmm0, xmm1
#endif // _WIN64
    n_int_register_parameters_j   = 6, // j_rarg0, j_rarg1, ...
    n_float_register_parameters_j = 8  // j_farg0, j_farg1, ...
#else
    n_register_parameters = 0,   // 0 registers used to pass arguments
    n_int_register_parameters_j   = 0,
    n_float_register_parameters_j = 0
#endif // _LP64
  };
};

#ifdef _LP64
// Symbolically name the register arguments used by the c calling convention.
// Windows is different from linux/solaris. So much for standards...

#ifdef _WIN64

constexpr Register c_rarg0 = rcx;
constexpr Register c_rarg1 = rdx;
constexpr Register c_rarg2 =  r8;
constexpr Register c_rarg3 =  r9;

constexpr XMMRegister c_farg0 = xmm0;
constexpr XMMRegister c_farg1 = xmm1;
constexpr XMMRegister c_farg2 = xmm2;
constexpr XMMRegister c_farg3 = xmm3;

#else

constexpr Register c_rarg0 = rdi;
constexpr Register c_rarg1 = rsi;
constexpr Register c_rarg2 = rdx;
constexpr Register c_rarg3 = rcx;
constexpr Register c_rarg4 =  r8;
constexpr Register c_rarg5 =  r9;

constexpr XMMRegister c_farg0 = xmm0;
constexpr XMMRegister c_farg1 = xmm1;
constexpr XMMRegister c_farg2 = xmm2;
constexpr XMMRegister c_farg3 = xmm3;
constexpr XMMRegister c_farg4 = xmm4;
constexpr XMMRegister c_farg5 = xmm5;
constexpr XMMRegister c_farg6 = xmm6;
constexpr XMMRegister c_farg7 = xmm7;

#endif // _WIN64

// Symbolically name the register arguments used by the Java calling convention.
// We have control over the convention for java so we can do what we please.
// What pleases us is to offset the java calling convention so that when
// we call a suitable jni method the arguments are lined up and we don't
// have to do little shuffling. A suitable jni method is non-static and a
// small number of arguments (two fewer args on windows)
//
//        |-------------------------------------------------------|
//        | c_rarg0   c_rarg1  c_rarg2 c_rarg3 c_rarg4 c_rarg5    |
//        |-------------------------------------------------------|
//        | rcx       rdx      r8      r9      rdi*    rsi*       | windows (* not a c_rarg)
//        | rdi       rsi      rdx     rcx     r8      r9         | solaris/linux
//        |-------------------------------------------------------|
//        | j_rarg5   j_rarg0  j_rarg1 j_rarg2 j_rarg3 j_rarg4    |
//        |-------------------------------------------------------|

constexpr Register j_rarg0 = c_rarg1;
constexpr Register j_rarg1 = c_rarg2;
constexpr Register j_rarg2 = c_rarg3;
// Windows runs out of register args here
#ifdef _WIN64
constexpr Register j_rarg3 = rdi;
constexpr Register j_rarg4 = rsi;
#else
constexpr Register j_rarg3 = c_rarg4;
constexpr Register j_rarg4 = c_rarg5;
#endif /* _WIN64 */
constexpr Register j_rarg5 = c_rarg0;

constexpr XMMRegister j_farg0 = xmm0;
constexpr XMMRegister j_farg1 = xmm1;
constexpr XMMRegister j_farg2 = xmm2;
constexpr XMMRegister j_farg3 = xmm3;
constexpr XMMRegister j_farg4 = xmm4;
constexpr XMMRegister j_farg5 = xmm5;
constexpr XMMRegister j_farg6 = xmm6;
constexpr XMMRegister j_farg7 = xmm7;

constexpr Register rscratch1 = r10;  // volatile
constexpr Register rscratch2 = r11;  // volatile

constexpr Register r12_heapbase = r12; // callee-saved
constexpr Register r15_thread   = r15; // callee-saved

#else
// rscratch1 will appear in 32bit code that is dead but of course must compile
// Using noreg ensures if the dead code is incorrectly live and executed it
// will cause an assertion failure
#define rscratch1 noreg
#define rscratch2 noreg

#endif // _LP64

// JSR 292
// On x86, the SP does not have to be saved when invoking method handle intrinsics
// or compiled lambda forms. We indicate that by setting rbp_mh_SP_save to noreg.
constexpr Register rbp_mh_SP_save = noreg;

// Address is an abstraction used to represent a memory location
// using any of the amd64 addressing modes with one object.
//
// Note: A register location is represented via a Register, not
//       via an address for efficiency & simplicity reasons.

class ArrayAddress;

class Address {
public:
  enum ScaleFactor {
    no_scale = -1,
    times_1  =  0,
    times_2  =  1,
    times_4  =  2,
    times_8  =  3,
    times_ptr = LP64_ONLY(times_8) NOT_LP64(times_4)
  };
  static ScaleFactor times(int size) {
    assert(size >= 1 && size <= 8 && is_power_of_2(size), "bad scale size");
    if (size == 8)  return times_8;
    if (size == 4)  return times_4;
    if (size == 2)  return times_2;
    return times_1;
  }
  static int scale_size(ScaleFactor scale) {
    assert(scale != no_scale, "");
    assert(((1 << (int)times_1) == 1 &&
            (1 << (int)times_2) == 2 &&
            (1 << (int)times_4) == 4 &&
            (1 << (int)times_8) == 8), "");
    return (1 << (int)scale);
  }

private:
  Register         _base;
  Register         _index;
  XMMRegister      _xmmindex;
  ScaleFactor      _scale;
  int              _disp;
  bool             _isxmmindex;
  RelocationHolder _rspec;

  // Easily misused constructors make them private
  // %%% can we make these go away?
  NOT_LP64(Address(address loc, RelocationHolder spec);)
  Address(int disp, address loc, relocInfo::relocType rtype);
  Address(int disp, address loc, RelocationHolder spec);

public:

int disp() { return _disp; }
  // creation
  Address()
    : _base(noreg),
      _index(noreg),
      _xmmindex(xnoreg),
      _scale(no_scale),
      _disp(0),
      _isxmmindex(false){
  }

  // No default displacement otherwise Register can be implicitly
  // converted to 0(Register) which is quite a different animal.

  Address(Register base, int disp)
    : _base(base),
      _index(noreg),
      _xmmindex(xnoreg),
      _scale(no_scale),
      _disp(disp),
      _isxmmindex(false){
  }

  Address(Register base, Register index, ScaleFactor scale, int disp = 0)
    : _base (base),
      _index(index),
      _xmmindex(xnoreg),
      _scale(scale),
      _disp (disp),
      _isxmmindex(false) {
    assert(!index->is_valid() == (scale == Address::no_scale),
           "inconsistent address");
  }

  Address(Register base, RegisterOrConstant index, ScaleFactor scale = times_1, int disp = 0)
    : _base (base),
      _index(index.register_or_noreg()),
      _xmmindex(xnoreg),
      _scale(scale),
      _disp (disp + (index.constant_or_zero() * scale_size(scale))),
      _isxmmindex(false){
    if (!index.is_register())  scale = Address::no_scale;
    assert(!_index->is_valid() == (scale == Address::no_scale),
           "inconsistent address");
  }

  Address(Register base, XMMRegister index, ScaleFactor scale, int disp = 0)
    : _base (base),
      _index(noreg),
      _xmmindex(index),
      _scale(scale),
      _disp(disp),
      _isxmmindex(true) {
      assert(!index->is_valid() == (scale == Address::no_scale),
             "inconsistent address");
  }

  // The following overloads are used in connection with the
  // ByteSize type (see sizes.hpp).  They simplify the use of
  // ByteSize'd arguments in assembly code.

  Address(Register base, ByteSize disp)
    : Address(base, in_bytes(disp)) {}

  Address(Register base, Register index, ScaleFactor scale, ByteSize disp)
    : Address(base, index, scale, in_bytes(disp)) {}

  Address(Register base, RegisterOrConstant index, ScaleFactor scale, ByteSize disp)
    : Address(base, index, scale, in_bytes(disp)) {}

  Address plus_disp(int disp) const {
    Address a = (*this);
    a._disp += disp;
    return a;
  }
  Address plus_disp(RegisterOrConstant disp, ScaleFactor scale = times_1) const {
    Address a = (*this);
    a._disp += disp.constant_or_zero() * scale_size(scale);
    if (disp.is_register()) {
      assert(!a.index()->is_valid(), "competing indexes");
      a._index = disp.as_register();
      a._scale = scale;
    }
    return a;
  }
  bool is_same_address(Address a) const {
    // disregard _rspec
    return _base == a._base && _disp == a._disp && _index == a._index && _scale == a._scale;
  }

  // accessors
  bool        uses(Register reg) const { return _base == reg || _index == reg; }
  Register    base()             const { return _base;  }
  Register    index()            const { return _index; }
  XMMRegister xmmindex()         const { return _xmmindex; }
  ScaleFactor scale()            const { return _scale; }
  int         disp()             const { return _disp;  }
  bool        isxmmindex()       const { return _isxmmindex; }

  // Convert the raw encoding form into the form expected by the constructor for
  // Address.  An index of 4 (rsp) corresponds to having no index, so convert
  // that to noreg for the Address constructor.
  static Address make_raw(int base, int index, int scale, int disp, relocInfo::relocType disp_reloc);

  static Address make_array(ArrayAddress);

private:
  bool base_needs_rex() const {
    return _base->is_valid() && _base->encoding() >= 8;
  }

  bool index_needs_rex() const {
    return _index->is_valid() &&_index->encoding() >= 8;
  }

  bool xmmindex_needs_rex() const {
    return _xmmindex->is_valid() && _xmmindex->encoding() >= 8;
  }

  relocInfo::relocType reloc() const { return _rspec.type(); }

  friend class Assembler;
  friend class MacroAssembler;
  friend class LIR_Assembler; // base/index/scale/disp
};

//
// AddressLiteral has been split out from Address because operands of this type
// need to be treated specially on 32bit vs. 64bit platforms. By splitting it out
// the few instructions that need to deal with address literals are unique and the
// MacroAssembler does not have to implement every instruction in the Assembler
// in order to search for address literals that may need special handling depending
// on the instruction and the platform. As small step on the way to merging i486/amd64
// directories.
//
class AddressLiteral {
  friend class ArrayAddress;
  RelocationHolder _rspec;
  // Typically we use AddressLiterals we want to use their rval
  // However in some situations we want the lval (effect address) of the item.
  // We provide a special factory for making those lvals.
  bool _is_lval;

  // If the target is far we'll need to load the ea of this to
  // a register to reach it. Otherwise if near we can do rip
  // relative addressing.

  address          _target;

protected:
  // creation
  AddressLiteral()
    : _is_lval(false),
      _target(NULL)
  {}

  public:

  AddressLiteral(address target, relocInfo::relocType rtype);

  AddressLiteral(address target, RelocationHolder const& rspec)
    : _rspec(rspec),
      _is_lval(false),
      _target(target)
  {}

  AddressLiteral addr() {
    AddressLiteral ret = *this;
    ret._is_lval = true;
    return ret;
  }

private:

  address target() { return _target; }
  bool is_lval() const { return _is_lval; }

  relocInfo::relocType reloc() const { return _rspec.type(); }
  const RelocationHolder& rspec() const { return _rspec; }

  friend class Assembler;
  friend class MacroAssembler;
  friend class Address;
  friend class LIR_Assembler;
};

// Convenience classes
class RuntimeAddress: public AddressLiteral {

  public:

  RuntimeAddress(address target) : AddressLiteral(target, relocInfo::runtime_call_type) {}

};

class ExternalAddress: public AddressLiteral {
private:
  static relocInfo::relocType reloc_for_target(address target) {
    // Sometimes ExternalAddress is used for values which aren't
    // exactly addresses, like the card table base.
    // external_word_type can't be used for values in the first page
    // so just skip the reloc in that case.
    return external_word_Relocation::can_be_relocated(target) ? relocInfo::external_word_type : relocInfo::none;
  }

public:

  ExternalAddress(address target) : AddressLiteral(target, reloc_for_target(target)) {}

};

class InternalAddress: public AddressLiteral {

  public:

  InternalAddress(address target) : AddressLiteral(target, relocInfo::internal_word_type) {}

};

// x86 can do array addressing as a single operation since disp can be an absolute
// address amd64 can't. We create a class that expresses the concept but does extra
// magic on amd64 to get the final result

class ArrayAddress {
  private:

  AddressLiteral _base;
  Address        _index;

  public:

  ArrayAddress() {};
  ArrayAddress(AddressLiteral base, Address index): _base(base), _index(index) {};
  AddressLiteral base() { return _base; }
  Address index() { return _index; }

};

class InstructionAttr;

// 64-bit reflect the fxsave size which is 512 bytes and the new xsave area on EVEX which is another 2176 bytes
// See fxsave and xsave(EVEX enabled) documentation for layout
const int FPUStateSizeInWords = NOT_LP64(27) LP64_ONLY(2688 / wordSize);

// The Intel x86/Amd64 Assembler: Pure assembler doing NO optimizations on the instruction
// level (e.g. mov rax, 0 is not translated into xor rax, rax!); i.e., what you write
// is what you get. The Assembler is generating code into a CodeBuffer.

class Assembler : public AbstractAssembler  {
  friend class AbstractAssembler; // for the non-virtual hack
  friend class LIR_Assembler; // as_Address()
  friend class StubGenerator;

public:
  enum Condition {                     // The x86 condition codes used for conditional jumps/moves.
    zero          = 0x4,
    notZero       = 0x5,
    equal         = 0x4,
    notEqual      = 0x5,
    less          = 0xc,
    lessEqual     = 0xe,
    greater       = 0xf,
    greaterEqual  = 0xd,
    below         = 0x2,
    belowEqual    = 0x6,
    above         = 0x7,
    aboveEqual    = 0x3,
    overflow      = 0x0,
    noOverflow    = 0x1,
    carrySet      = 0x2,
    carryClear    = 0x3,
    negative      = 0x8,
    positive      = 0x9,
    parity        = 0xa,
    noParity      = 0xb
  };

  enum Prefix {
    // segment overrides
    CS_segment = 0x2e,
    SS_segment = 0x36,
    DS_segment = 0x3e,
    ES_segment = 0x26,
    FS_segment = 0x64,
    GS_segment = 0x65,

    REX        = 0x40,

    REX_B      = 0x41,
    REX_X      = 0x42,
    REX_XB     = 0x43,
    REX_R      = 0x44,
    REX_RB     = 0x45,
    REX_RX     = 0x46,
    REX_RXB    = 0x47,

    REX_W      = 0x48,

    REX_WB     = 0x49,
    REX_WX     = 0x4A,
    REX_WXB    = 0x4B,
    REX_WR     = 0x4C,
    REX_WRB    = 0x4D,
    REX_WRX    = 0x4E,
    REX_WRXB   = 0x4F,

    VEX_3bytes = 0xC4,
    VEX_2bytes = 0xC5,
    EVEX_4bytes = 0x62,
    Prefix_EMPTY = 0x0
  };

  enum VexPrefix {
    VEX_B = 0x20,
    VEX_X = 0x40,
    VEX_R = 0x80,
    VEX_W = 0x80
  };

  enum ExexPrefix {
    EVEX_F  = 0x04,
    EVEX_V  = 0x08,
    EVEX_Rb = 0x10,
    EVEX_X  = 0x40,
    EVEX_Z  = 0x80
  };

  enum VexSimdPrefix {
    VEX_SIMD_NONE = 0x0,
    VEX_SIMD_66   = 0x1,
    VEX_SIMD_F3   = 0x2,
    VEX_SIMD_F2   = 0x3
  };

  enum VexOpcode {
    VEX_OPCODE_NONE  = 0x0,
    VEX_OPCODE_0F    = 0x1,
    VEX_OPCODE_0F_38 = 0x2,
    VEX_OPCODE_0F_3A = 0x3,
    VEX_OPCODE_MASK  = 0x1F
  };

  enum AvxVectorLen {
    AVX_128bit = 0x0,
    AVX_256bit = 0x1,
    AVX_512bit = 0x2,
    AVX_NoVec  = 0x4
  };

  enum EvexTupleType {
    EVEX_FV   = 0,
    EVEX_HV   = 4,
    EVEX_FVM  = 6,
    EVEX_T1S  = 7,
    EVEX_T1F  = 11,
    EVEX_T2   = 13,
    EVEX_T4   = 15,
    EVEX_T8   = 17,
    EVEX_HVM  = 18,
    EVEX_QVM  = 19,
    EVEX_OVM  = 20,
    EVEX_M128 = 21,
    EVEX_DUP  = 22,
    EVEX_ETUP = 23
  };

  enum EvexInputSizeInBits {
    EVEX_8bit  = 0,
    EVEX_16bit = 1,
    EVEX_32bit = 2,
    EVEX_64bit = 3,
    EVEX_NObit = 4
  };

  enum WhichOperand {
    // input to locate_operand, and format code for relocations
    imm_operand  = 0,            // embedded 32-bit|64-bit immediate operand
    disp32_operand = 1,          // embedded 32-bit displacement or address
    call32_operand = 2,          // embedded 32-bit self-relative displacement
#ifndef _LP64
    _WhichOperand_limit = 3
#else
     narrow_oop_operand = 3,     // embedded 32-bit immediate narrow oop
    _WhichOperand_limit = 4
#endif
  };

  // Comparison predicates for integral types & FP types when using SSE
  enum ComparisonPredicate {
    eq = 0,
    lt = 1,
    le = 2,
    _false = 3,
    neq = 4,
    nlt = 5,
    nle = 6,
    _true = 7
  };

  // Comparison predicates for FP types when using AVX
  // O means ordered. U is unordered. When using ordered, any NaN comparison is false. Otherwise, it is true.
  // S means signaling. Q means non-signaling. When signaling is true, instruction signals #IA on NaN.
  enum ComparisonPredicateFP {
    EQ_OQ = 0,
    LT_OS = 1,
    LE_OS = 2,
    UNORD_Q = 3,
    NEQ_UQ = 4,
    NLT_US = 5,
    NLE_US = 6,
    ORD_Q = 7,
    EQ_UQ = 8,
    NGE_US = 9,
    NGT_US = 0xA,
    FALSE_OQ = 0XB,
    NEQ_OQ = 0xC,
    GE_OS = 0xD,
    GT_OS = 0xE,
    TRUE_UQ = 0xF,
    EQ_OS = 0x10,
    LT_OQ = 0x11,
    LE_OQ = 0x12,
    UNORD_S = 0x13,
    NEQ_US = 0x14,
    NLT_UQ = 0x15,
    NLE_UQ = 0x16,
    ORD_S = 0x17,
    EQ_US = 0x18,
    NGE_UQ = 0x19,
    NGT_UQ = 0x1A,
    FALSE_OS = 0x1B,
    NEQ_OS = 0x1C,
    GE_OQ = 0x1D,
    GT_OQ = 0x1E,
    TRUE_US =0x1F
  };

  enum Width {
    B = 0,
    W = 1,
    D = 2,
    Q = 3
  };

  //---<  calculate length of instruction  >---
  // As instruction size can't be found out easily on x86/x64,
  // we just use '4' for len and maxlen.
  // instruction must start at passed address
  static unsigned int instr_len(unsigned char *instr) { return 4; }

  //---<  longest instructions  >---
  // Max instruction length is not specified in architecture documentation.
  // We could use a "safe enough" estimate (15), but just default to
  // instruction length guess from above.
  static unsigned int instr_maxlen() { return 4; }

  // NOTE: The general philopsophy of the declarations here is that 64bit versions
  // of instructions are freely declared without the need for wrapping them an ifdef.
  // (Some dangerous instructions are ifdef's out of inappropriate jvm's.)
  // In the .cpp file the implementations are wrapped so that they are dropped out
  // of the resulting jvm. This is done mostly to keep the footprint of MINIMAL
  // to the size it was prior to merging up the 32bit and 64bit assemblers.
  //
  // This does mean you'll get a linker/runtime error if you use a 64bit only instruction
  // in a 32bit vm. This is somewhat unfortunate but keeps the ifdef noise down.

private:

  bool _legacy_mode_bw;
  bool _legacy_mode_dq;
  bool _legacy_mode_vl;
  bool _legacy_mode_vlbw;
  NOT_LP64(bool _is_managed;)

  class InstructionAttr *_attributes;

  // 64bit prefixes
  void prefix(Register reg);
  void prefix(Register dst, Register src, Prefix p);
  void prefix(Register dst, Address adr, Prefix p);

  void prefix(Address adr);
  void prefix(Address adr, Register reg,  bool byteinst = false);
  void prefix(Address adr, XMMRegister reg);

  int prefix_and_encode(int reg_enc, bool byteinst = false);
  int prefix_and_encode(int dst_enc, int src_enc) {
    return prefix_and_encode(dst_enc, false, src_enc, false);
  }
  int prefix_and_encode(int dst_enc, bool dst_is_byte, int src_enc, bool src_is_byte);

  // Some prefixq variants always emit exactly one prefix byte, so besides a
  // prefix-emitting method we provide a method to get the prefix byte to emit,
  // which can then be folded into a byte stream.
  int8_t get_prefixq(Address adr);
  int8_t get_prefixq(Address adr, Register reg);

  void prefixq(Address adr);
  void prefixq(Address adr, Register reg);
  void prefixq(Address adr, XMMRegister reg);

  int prefixq_and_encode(int reg_enc);
  int prefixq_and_encode(int dst_enc, int src_enc);

  void rex_prefix(Address adr, XMMRegister xreg,
                  VexSimdPrefix pre, VexOpcode opc, bool rex_w);
  int  rex_prefix_and_encode(int dst_enc, int src_enc,
                             VexSimdPrefix pre, VexOpcode opc, bool rex_w);

  void vex_prefix(bool vex_r, bool vex_b, bool vex_x, int nds_enc, VexSimdPrefix pre, VexOpcode opc);

  void evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool evex_r, bool evex_v,
                   int nds_enc, VexSimdPrefix pre, VexOpcode opc);

  void vex_prefix(Address adr, int nds_enc, int xreg_enc,
                  VexSimdPrefix pre, VexOpcode opc,
                  InstructionAttr *attributes);

  int  vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc,
                             VexSimdPrefix pre, VexOpcode opc,
                             InstructionAttr *attributes);

  void simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre,
                   VexOpcode opc, InstructionAttr *attributes);

  int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre,
                             VexOpcode opc, InstructionAttr *attributes);

  // Helper functions for groups of instructions
  void emit_arith_b(int op1, int op2, Register dst, int imm8);

  void emit_arith(int op1, int op2, Register dst, int32_t imm32);
  // Force generation of a 4 byte immediate value even if it fits into 8bit
  void emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32);
  void emit_arith(int op1, int op2, Register dst, Register src);

  bool emit_compressed_disp_byte(int &disp);

  void emit_modrm(int mod, int dst_enc, int src_enc);
  void emit_modrm_disp8(int mod, int dst_enc, int src_enc,
                        int disp);
  void emit_modrm_sib(int mod, int dst_enc, int src_enc,
                      Address::ScaleFactor scale, int index_enc, int base_enc);
  void emit_modrm_sib_disp8(int mod, int dst_enc, int src_enc,
                            Address::ScaleFactor scale, int index_enc, int base_enc,
                            int disp);

  void emit_operand_helper(int reg_enc,
                           int base_enc, int index_enc, Address::ScaleFactor scale,
                           int disp,
                           RelocationHolder const& rspec,
                           int post_addr_length);

  void emit_operand(Register reg,
                    Register base, Register index, Address::ScaleFactor scale,
                    int disp,
                    RelocationHolder const& rspec,
                    int post_addr_length);

  void emit_operand(Register reg,
                    Register base, XMMRegister index, Address::ScaleFactor scale,
                    int disp,
                    RelocationHolder const& rspec,
                    int post_addr_length);

  void emit_operand(XMMRegister xreg,
                    Register base, XMMRegister xindex, Address::ScaleFactor scale,
                    int disp,
                    RelocationHolder const& rspec,
                    int post_addr_length);

  void emit_operand(Register reg, Address adr,
                    int post_addr_length);

  void emit_operand(XMMRegister reg,
                    Register base, Register index, Address::ScaleFactor scale,
                    int disp,
                    RelocationHolder const& rspec,
                    int post_addr_length);

  void emit_operand_helper(KRegister kreg,
                           int base_enc, int index_enc, Address::ScaleFactor scale,
                           int disp,
                           RelocationHolder const& rspec,
                           int post_addr_length);

  void emit_operand(KRegister kreg, Address adr,
                    int post_addr_length);

  void emit_operand(KRegister kreg,
                    Register base, Register index, Address::ScaleFactor scale,
                    int disp,
                    RelocationHolder const& rspec,
                    int post_addr_length);

  void emit_operand(XMMRegister reg, Address adr, int post_addr_length);

  // Immediate-to-memory forms
  void emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32);
  void emit_arith_operand_imm32(int op1, Register rm, Address adr, int32_t imm32);

protected:
#ifdef ASSERT
  void check_relocation(RelocationHolder const& rspec, int format);
#endif

  void emit_data(jint data, relocInfo::relocType    rtype, int format);
  void emit_data(jint data, RelocationHolder const& rspec, int format);
  void emit_data64(jlong data, relocInfo::relocType rtype, int format = 0);
  void emit_data64(jlong data, RelocationHolder const& rspec, int format = 0);

  bool always_reachable(AddressLiteral adr) NOT_LP64( { return true; } );
  bool        reachable(AddressLiteral adr) NOT_LP64( { return true; } );

  // These are all easily abused and hence protected

  // 32BIT ONLY SECTION
#ifndef _LP64
  // Make these disappear in 64bit mode since they would never be correct
  void cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec);   // 32BIT ONLY
  void cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec);    // 32BIT ONLY

  void mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec);    // 32BIT ONLY
  void mov_literal32(Address dst, int32_t imm32, RelocationHolder const& rspec);     // 32BIT ONLY

  void push_literal32(int32_t imm32, RelocationHolder const& rspec);                 // 32BIT ONLY
#else
  // 64BIT ONLY SECTION
  void mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec);   // 64BIT ONLY

  void cmp_narrow_oop(Register src1, int32_t imm32, RelocationHolder const& rspec);
  void cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec);

  void mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec);
  void mov_narrow_oop(Address dst, int32_t imm32, RelocationHolder const& rspec);
#endif // _LP64

  // These are unique in that we are ensured by the caller that the 32bit
  // relative in these instructions will always be able to reach the potentially
  // 64bit address described by entry. Since they can take a 64bit address they
  // don't have the 32 suffix like the other instructions in this class.

  void call_literal(address entry, RelocationHolder const& rspec);
  void jmp_literal(address entry, RelocationHolder const& rspec);

  // Avoid using directly section
  // Instructions in this section are actually usable by anyone without danger
  // of failure but have performance issues that are addressed my enhanced
  // instructions which will do the proper thing base on the particular cpu.
  // We protect them because we don't trust you...

  // Don't use next inc() and dec() methods directly. INC & DEC instructions
  // could cause a partial flag stall since they don't set CF flag.
  // Use MacroAssembler::decrement() & MacroAssembler::increment() methods
  // which call inc() & dec() or add() & sub() in accordance with
  // the product flag UseIncDec value.

  void decl(Register dst);
  void decl(Address dst);
  void decq(Address dst);

  void incl(Register dst);
  void incl(Address dst);
  void incq(Register dst);
  void incq(Address dst);

  // New cpus require use of movsd and movss to avoid partial register stall
  // when loading from memory. But for old Opteron use movlpd instead of movsd.
  // The selection is done in MacroAssembler::movdbl() and movflt().

  // Move Scalar Single-Precision Floating-Point Values
  void movss(XMMRegister dst, Address src);
  void movss(XMMRegister dst, XMMRegister src);
  void movss(Address dst, XMMRegister src);

  // Move Scalar Double-Precision Floating-Point Values
  void movsd(XMMRegister dst, Address src);
  void movsd(XMMRegister dst, XMMRegister src);
  void movsd(Address dst, XMMRegister src);
  void movlpd(XMMRegister dst, Address src);

  // New cpus require use of movaps and movapd to avoid partial register stall
  // when moving between registers.
  void movaps(XMMRegister dst, XMMRegister src);
  void movapd(XMMRegister dst, XMMRegister src);

  // End avoid using directly

  // Instruction prefixes
  void prefix(Prefix p);

  public:

  // Creation
  Assembler(CodeBuffer* code) : AbstractAssembler(code) {
    init_attributes();
  }

  // Decoding
  static address locate_operand(address inst, WhichOperand which);
  static address locate_next_instruction(address inst);

  // Utilities
  static bool query_compressed_disp_byte(int disp, bool is_evex_inst, int vector_len,
                                         int cur_tuple_type, int in_size_in_bits, int cur_encoding);

  // Generic instructions
  // Does 32bit or 64bit as needed for the platform. In some sense these
  // belong in macro assembler but there is no need for both varieties to exist

  void init_attributes(void);

  void set_attributes(InstructionAttr *attributes) { _attributes = attributes; }
  void clear_attributes(void) { _attributes = NULL; }

  void set_managed(void) { NOT_LP64(_is_managed = true;) }
  void clear_managed(void) { NOT_LP64(_is_managed = false;) }
  bool is_managed(void) {
    NOT_LP64(return _is_managed;)
    LP64_ONLY(return false;) }

  void lea(Register dst, Address src);

  void mov(Register dst, Register src);

#ifdef _LP64
  // support caching the result of some routines

  // must be called before pusha(), popa(), vzeroupper() - checked with asserts
  static void precompute_instructions();

  void pusha_uncached();
  void popa_uncached();
#endif
  void vzeroupper_uncached();
  void decq(Register dst);

  void pusha();
  void popa();

  void pushf();
  void popf();

  void push(int32_t imm32);

  void push(Register src);

  void pop(Register dst);

  // These do register sized moves/scans
  void rep_mov();
  void rep_stos();
  void rep_stosb();
  void repne_scan();
#ifdef _LP64
  void repne_scanl();
#endif

  // Vanilla instructions in lexical order

  void adcl(Address dst, int32_t imm32);
  void adcl(Address dst, Register src);
  void adcl(Register dst, int32_t imm32);
  void adcl(Register dst, Address src);
  void adcl(Register dst, Register src);

  void adcq(Register dst, int32_t imm32);
  void adcq(Register dst, Address src);
  void adcq(Register dst, Register src);

  void addb(Address dst, int imm8);
  void addw(Register dst, Register src);
  void addw(Address dst, int imm16);

  void addl(Address dst, int32_t imm32);
  void addl(Address dst, Register src);
  void addl(Register dst, int32_t imm32);
  void addl(Register dst, Address src);
  void addl(Register dst, Register src);

  void addq(Address dst, int32_t imm32);
  void addq(Address dst, Register src);
  void addq(Register dst, int32_t imm32);
  void addq(Register dst, Address src);
  void addq(Register dst, Register src);

#ifdef _LP64
//Add Unsigned Integers with Carry Flag
  void adcxq(Register dst, Register src);

//Add Unsigned Integers with Overflow Flag
  void adoxq(Register dst, Register src);
#endif

  void addr_nop_4();
  void addr_nop_5();
  void addr_nop_7();
  void addr_nop_8();

  // Add Scalar Double-Precision Floating-Point Values
  void addsd(XMMRegister dst, Address src);
  void addsd(XMMRegister dst, XMMRegister src);

  // Add Scalar Single-Precision Floating-Point Values
  void addss(XMMRegister dst, Address src);
  void addss(XMMRegister dst, XMMRegister src);

  // AES instructions
  void aesdec(XMMRegister dst, Address src);
  void aesdec(XMMRegister dst, XMMRegister src);
  void aesdeclast(XMMRegister dst, Address src);
  void aesdeclast(XMMRegister dst, XMMRegister src);
  void aesenc(XMMRegister dst, Address src);
  void aesenc(XMMRegister dst, XMMRegister src);
  void aesenclast(XMMRegister dst, Address src);
  void aesenclast(XMMRegister dst, XMMRegister src);
  // Vector AES instructions
  void vaesenc(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
  void vaesenclast(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
  void vaesdec(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
  void vaesdeclast(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);

  void andw(Register dst, Register src);
  void andb(Address dst, Register src);

  void andl(Address  dst, int32_t imm32);
  void andl(Register dst, int32_t imm32);
  void andl(Register dst, Address src);
  void andl(Register dst, Register src);
  void andl(Address dst, Register src);

  void andq(Address  dst, int32_t imm32);
  void andq(Register dst, int32_t imm32);
  void andq(Register dst, Address src);
  void andq(Register dst, Register src);
  void andq(Address dst, Register src);

  // BMI instructions
  void andnl(Register dst, Register src1, Register src2);
  void andnl(Register dst, Register src1, Address src2);
  void andnq(Register dst, Register src1, Register src2);
  void andnq(Register dst, Register src1, Address src2);

  void blsil(Register dst, Register src);
  void blsil(Register dst, Address src);
  void blsiq(Register dst, Register src);
  void blsiq(Register dst, Address src);

  void blsmskl(Register dst, Register src);
  void blsmskl(Register dst, Address src);
  void blsmskq(Register dst, Register src);
  void blsmskq(Register dst, Address src);

  void blsrl(Register dst, Register src);
  void blsrl(Register dst, Address src);
  void blsrq(Register dst, Register src);
  void blsrq(Register dst, Address src);

  void bsfl(Register dst, Register src);
  void bsrl(Register dst, Register src);

#ifdef _LP64
  void bsfq(Register dst, Register src);
  void bsrq(Register dst, Register src);
#endif

  void bswapl(Register reg);

  void bswapq(Register reg);

  void call(Label& L, relocInfo::relocType rtype);
  void call(Register reg);  // push pc; pc <- reg
  void call(Address adr);   // push pc; pc <- adr

  void cdql();

  void cdqq();

  void cld();

  void clflush(Address adr);
  void clflushopt(Address adr);
  void clwb(Address adr);

  void cmovl(Condition cc, Register dst, Register src);
  void cmovl(Condition cc, Register dst, Address src);

  void cmovq(Condition cc, Register dst, Register src);
  void cmovq(Condition cc, Register dst, Address src);

  void cmpb(Address dst, int imm8);

  void cmpl(Address dst, int32_t imm32);
  void cmpl(Register dst, int32_t imm32);
  void cmpl(Register dst, Register src);
  void cmpl(Register dst, Address src);
  void cmpl_imm32(Address dst, int32_t imm32);

  void cmpq(Address dst, int32_t imm32);
  void cmpq(Address dst, Register src);
  void cmpq(Register dst, int32_t imm32);
  void cmpq(Register dst, Register src);
  void cmpq(Register dst, Address src);

  void cmpw(Address dst, int imm16);

  void cmpxchg8 (Address adr);

  void cmpxchgb(Register reg, Address adr);
  void cmpxchgl(Register reg, Address adr);

  void cmpxchgq(Register reg, Address adr);
  void cmpxchgw(Register reg, Address adr);

  // Ordered Compare Scalar Double-Precision Floating-Point Values and set EFLAGS
  void comisd(XMMRegister dst, Address src);
  void comisd(XMMRegister dst, XMMRegister src);

  // Ordered Compare Scalar Single-Precision Floating-Point Values and set EFLAGS
  void comiss(XMMRegister dst, Address src);
  void comiss(XMMRegister dst, XMMRegister src);

  // Identify processor type and features
  void cpuid();

  // CRC32C
  void crc32(Register crc, Register v, int8_t sizeInBytes);
  void crc32(Register crc, Address adr, int8_t sizeInBytes);

  // Convert Scalar Double-Precision Floating-Point Value to Scalar Single-Precision Floating-Point Value
  void cvtsd2ss(XMMRegister dst, XMMRegister src);
  void cvtsd2ss(XMMRegister dst, Address src);

  // Convert Doubleword Integer to Scalar Double-Precision Floating-Point Value
  void cvtsi2sdl(XMMRegister dst, Register src);
  void cvtsi2sdl(XMMRegister dst, Address src);
  void cvtsi2sdq(XMMRegister dst, Register src);
  void cvtsi2sdq(XMMRegister dst, Address src);

  // Convert Doubleword Integer to Scalar Single-Precision Floating-Point Value
  void cvtsi2ssl(XMMRegister dst, Register src);
  void cvtsi2ssl(XMMRegister dst, Address src);
  void cvtsi2ssq(XMMRegister dst, Register src);
  void cvtsi2ssq(XMMRegister dst, Address src);

  // Convert Packed Signed Doubleword Integers to Packed Double-Precision Floating-Point Value
  void cvtdq2pd(XMMRegister dst, XMMRegister src);
  void vcvtdq2pd(XMMRegister dst, XMMRegister src, int vector_len);

  // Convert Halffloat to Single Precision Floating-Point value
  void vcvtps2ph(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
  void vcvtph2ps(XMMRegister dst, XMMRegister src, int vector_len);
  void evcvtps2ph(Address dst, KRegister mask, XMMRegister src, int imm8, int vector_len);
  void vcvtps2ph(Address dst, XMMRegister src, int imm8, int vector_len);
  void vcvtph2ps(XMMRegister dst, Address src, int vector_len);

  // Convert Packed Signed Doubleword Integers to Packed Single-Precision Floating-Point Value
  void cvtdq2ps(XMMRegister dst, XMMRegister src);
  void vcvtdq2ps(XMMRegister dst, XMMRegister src, int vector_len);

  // Convert Scalar Single-Precision Floating-Point Value to Scalar Double-Precision Floating-Point Value
  void cvtss2sd(XMMRegister dst, XMMRegister src);
  void cvtss2sd(XMMRegister dst, Address src);

  // Convert with Truncation Scalar Double-Precision Floating-Point Value to Doubleword Integer
  void cvtsd2siq(Register dst, XMMRegister src);
  void cvttsd2sil(Register dst, Address src);
  void cvttsd2sil(Register dst, XMMRegister src);
  void cvttsd2siq(Register dst, Address src);
  void cvttsd2siq(Register dst, XMMRegister src);

  // Convert with Truncation Scalar Single-Precision Floating-Point Value to Doubleword Integer
  void cvttss2sil(Register dst, XMMRegister src);
  void cvttss2siq(Register dst, XMMRegister src);
  void cvtss2sil(Register dst, XMMRegister src);

  // Convert vector double to int
  void cvttpd2dq(XMMRegister dst, XMMRegister src);

  // Convert vector float and double
  void vcvtps2pd(XMMRegister dst, XMMRegister src, int vector_len);
  void vcvtpd2ps(XMMRegister dst, XMMRegister src, int vector_len);

  // Convert vector float to int/long
  void vcvtps2dq(XMMRegister dst, XMMRegister src, int vector_len);
  void vcvttps2dq(XMMRegister dst, XMMRegister src, int vector_len);
  void evcvttps2qq(XMMRegister dst, XMMRegister src, int vector_len);

  // Convert vector long to vector FP
  void evcvtqq2ps(XMMRegister dst, XMMRegister src, int vector_len);
  void evcvtqq2pd(XMMRegister dst, XMMRegister src, int vector_len);

  // Convert vector double to long
  void evcvtpd2qq(XMMRegister dst, XMMRegister src, int vector_len);
  void evcvttpd2qq(XMMRegister dst, XMMRegister src, int vector_len);

  // Convert vector double to int
  void vcvttpd2dq(XMMRegister dst, XMMRegister src, int vector_len);

  // Evex casts with truncation
  void evpmovwb(XMMRegister dst, XMMRegister src, int vector_len);
  void evpmovdw(XMMRegister dst, XMMRegister src, int vector_len);
  void evpmovdb(XMMRegister dst, XMMRegister src, int vector_len);
  void evpmovqd(XMMRegister dst, XMMRegister src, int vector_len);
  void evpmovqb(XMMRegister dst, XMMRegister src, int vector_len);
  void evpmovqw(XMMRegister dst, XMMRegister src, int vector_len);

  // Evex casts with signed saturation
  void evpmovsqd(XMMRegister dst, XMMRegister src, int vector_len);

  //Abs of packed Integer values
  void pabsb(XMMRegister dst, XMMRegister src);
  void pabsw(XMMRegister dst, XMMRegister src);
  void pabsd(XMMRegister dst, XMMRegister src);
  void vpabsb(XMMRegister dst, XMMRegister src, int vector_len);
  void vpabsw(XMMRegister dst, XMMRegister src, int vector_len);
  void vpabsd(XMMRegister dst, XMMRegister src, int vector_len);
  void evpabsq(XMMRegister dst, XMMRegister src, int vector_len);

  // Divide Scalar Double-Precision Floating-Point Values
  void divsd(XMMRegister dst, Address src);
  void divsd(XMMRegister dst, XMMRegister src);

  // Divide Scalar Single-Precision Floating-Point Values
  void divss(XMMRegister dst, Address src);
  void divss(XMMRegister dst, XMMRegister src);

#ifndef _LP64
private:

  void emit_farith(int b1, int b2, int i);

public:
  void emms();

  void fabs();

  void fadd(int i);

  void fadd_d(Address src);
  void fadd_s(Address src);

  // "Alternate" versions of x87 instructions place result down in FPU
  // stack instead of on TOS

  void fadda(int i); // "alternate" fadd
  void faddp(int i = 1);

  void fchs();

  void fcom(int i);

  void fcomp(int i = 1);
  void fcomp_d(Address src);
  void fcomp_s(Address src);

  void fcompp();

  void fcos();

  void fdecstp();

  void fdiv(int i);
  void fdiv_d(Address src);
  void fdivr_s(Address src);
  void fdiva(int i);  // "alternate" fdiv
  void fdivp(int i = 1);

  void fdivr(int i);
  void fdivr_d(Address src);
  void fdiv_s(Address src);

  void fdivra(int i); // "alternate" reversed fdiv

  void fdivrp(int i = 1);

  void ffree(int i = 0);

  void fild_d(Address adr);
  void fild_s(Address adr);

  void fincstp();

  void finit();

  void fist_s (Address adr);
  void fistp_d(Address adr);
  void fistp_s(Address adr);

  void fld1();

  void fld_d(Address adr);
  void fld_s(Address adr);
  void fld_s(int index);

  void fldcw(Address src);

  void fldenv(Address src);

  void fldlg2();

  void fldln2();

  void fldz();

  void flog();
  void flog10();

  void fmul(int i);

  void fmul_d(Address src);
  void fmul_s(Address src);

  void fmula(int i);  // "alternate" fmul

  void fmulp(int i = 1);

  void fnsave(Address dst);

  void fnstcw(Address src);

  void fnstsw_ax();

  void fprem();
  void fprem1();

  void frstor(Address src);

  void fsin();

  void fsqrt();

  void fst_d(Address adr);
  void fst_s(Address adr);

  void fstp_d(Address adr);
  void fstp_d(int index);
  void fstp_s(Address adr);

  void fsub(int i);
  void fsub_d(Address src);
  void fsub_s(Address src);

  void fsuba(int i);  // "alternate" fsub

  void fsubp(int i = 1);

  void fsubr(int i);
  void fsubr_d(Address src);
  void fsubr_s(Address src);

  void fsubra(int i); // "alternate" reversed fsub

  void fsubrp(int i = 1);

  void ftan();

  void ftst();

  void fucomi(int i = 1);
  void fucomip(int i = 1);

  void fwait();

  void fxch(int i = 1);

  void fyl2x();
  void frndint();
  void f2xm1();
  void fldl2e();
#endif // !_LP64

  // operands that only take the original 32bit registers
  void emit_operand32(Register reg, Address adr, int post_addr_length);

  void fld_x(Address adr);  // extended-precision (80-bit) format
  void fstp_x(Address adr); // extended-precision (80-bit) format
  void fxrstor(Address src);
  void xrstor(Address src);

  void fxsave(Address dst);
  void xsave(Address dst);

  void hlt();

  void idivl(Register src);
  void divl(Register src); // Unsigned division

#ifdef _LP64
  void idivq(Register src);
  void divq(Register src); // Unsigned division
#endif

  void imull(Register src);
  void imull(Register dst, Register src);
  void imull(Register dst, Register src, int value);
  void imull(Register dst, Address src, int value);
  void imull(Register dst, Address src);

#ifdef _LP64
  void imulq(Register dst, Register src);
  void imulq(Register dst, Register src, int value);
  void imulq(Register dst, Address src, int value);
  void imulq(Register dst, Address src);
  void imulq(Register dst);
#endif

  // jcc is the generic conditional branch generator to run-
  // time routines, jcc is used for branches to labels. jcc
  // takes a branch opcode (cc) and a label (L) and generates
  // either a backward branch or a forward branch and links it
  // to the label fixup chain. Usage:
  //
  // Label L;      // unbound label
  // jcc(cc, L);   // forward branch to unbound label
  // bind(L);      // bind label to the current pc
  // jcc(cc, L);   // backward branch to bound label
  // bind(L);      // illegal: a label may be bound only once
  //
  // Note: The same Label can be used for forward and backward branches
  // but it may be bound only once.

  void jcc(Condition cc, Label& L, bool maybe_short = true);

  // Conditional jump to a 8-bit offset to L.
  // WARNING: be very careful using this for forward jumps.  If the label is
  // not bound within an 8-bit offset of this instruction, a run-time error
  // will occur.

  // Use macro to record file and line number.
  #define jccb(cc, L) jccb_0(cc, L, __FILE__, __LINE__)

  void jccb_0(Condition cc, Label& L, const char* file, int line);

  void jmp(Address entry);    // pc <- entry

  // Label operations & relative jumps (PPUM Appendix D)
  void jmp(Label& L, bool maybe_short = true);   // unconditional jump to L

  void jmp(Register entry); // pc <- entry

  // Unconditional 8-bit offset jump to L.
  // WARNING: be very careful using this for forward jumps.  If the label is
  // not bound within an 8-bit offset of this instruction, a run-time error
  // will occur.

  // Use macro to record file and line number.
  #define jmpb(L) jmpb_0(L, __FILE__, __LINE__)

  void jmpb_0(Label& L, const char* file, int line);

  void ldmxcsr( Address src );

  void leal(Register dst, Address src);

  void leaq(Register dst, Address src);

  void lfence();

  void lock();
  void size_prefix();

  void lzcntl(Register dst, Register src);
  void lzcntl(Register dst, Address src);

#ifdef _LP64
  void lzcntq(Register dst, Register src);
  void lzcntq(Register dst, Address src);
#endif

  enum Membar_mask_bits {
    StoreStore = 1 << 3,
    LoadStore  = 1 << 2,
    StoreLoad  = 1 << 1,
    LoadLoad   = 1 << 0
  };

  // Serializes memory and blows flags
  void membar(Membar_mask_bits order_constraint);

  void mfence();
  void sfence();

  // Moves

  void mov64(Register dst, int64_t imm64);
  void mov64(Register dst, int64_t imm64, relocInfo::relocType rtype, int format);

  void movb(Address dst, Register src);
  void movb(Address dst, int imm8);
  void movb(Register dst, Address src);

  void movddup(XMMRegister dst, XMMRegister src);
  void movddup(XMMRegister dst, Address src);
  void vmovddup(XMMRegister dst, Address src, int vector_len);

  void kandbl(KRegister dst, KRegister src1, KRegister src2);
  void kandwl(KRegister dst, KRegister src1, KRegister src2);
  void kanddl(KRegister dst, KRegister src1, KRegister src2);
  void kandql(KRegister dst, KRegister src1, KRegister src2);

  void korbl(KRegister dst, KRegister src1, KRegister src2);
  void korwl(KRegister dst, KRegister src1, KRegister src2);
  void kordl(KRegister dst, KRegister src1, KRegister src2);
  void korql(KRegister dst, KRegister src1, KRegister src2);

  void kxorbl(KRegister dst, KRegister src1, KRegister src2);
  void kxorwl(KRegister dst, KRegister src1, KRegister src2);
  void kxordl(KRegister dst, KRegister src1, KRegister src2);
  void kxorql(KRegister dst, KRegister src1, KRegister src2);
  void kmovbl(KRegister dst, Register src);
  void kmovbl(Register dst, KRegister src);
  void kmovbl(KRegister dst, KRegister src);
  void kmovwl(KRegister dst, Register src);
  void kmovwl(KRegister dst, Address src);
  void kmovwl(Register dst, KRegister src);
  void kmovwl(Address dst, KRegister src);
  void kmovwl(KRegister dst, KRegister src);
  void kmovdl(KRegister dst, Register src);
  void kmovdl(Register dst, KRegister src);
  void kmovql(KRegister dst, KRegister src);
  void kmovql(Address dst, KRegister src);
  void kmovql(KRegister dst, Address src);
  void kmovql(KRegister dst, Register src);
  void kmovql(Register dst, KRegister src);

  void knotbl(KRegister dst, KRegister src);
  void knotwl(KRegister dst, KRegister src);
  void knotdl(KRegister dst, KRegister src);
  void knotql(KRegister dst, KRegister src);

  void kortestbl(KRegister dst, KRegister src);
  void kortestwl(KRegister dst, KRegister src);
  void kortestdl(KRegister dst, KRegister src);
  void kortestql(KRegister dst, KRegister src);

  void kxnorbl(KRegister dst, KRegister src1, KRegister src2);
  void kshiftlbl(KRegister dst, KRegister src, int imm8);
  void kshiftlql(KRegister dst, KRegister src, int imm8);
  void kshiftrbl(KRegister dst, KRegister src, int imm8);
  void kshiftrwl(KRegister dst, KRegister src, int imm8);
  void kshiftrdl(KRegister dst, KRegister src, int imm8);
  void kshiftrql(KRegister dst, KRegister src, int imm8);
  void ktestq(KRegister src1, KRegister src2);
  void ktestd(KRegister src1, KRegister src2);
  void kunpckdql(KRegister dst, KRegister src1, KRegister src2);

  void ktestql(KRegister dst, KRegister src);
  void ktestdl(KRegister dst, KRegister src);
  void ktestwl(KRegister dst, KRegister src);
  void ktestbl(KRegister dst, KRegister src);

  void movdl(XMMRegister dst, Register src);
  void movdl(Register dst, XMMRegister src);
  void movdl(XMMRegister dst, Address src);
  void movdl(Address dst, XMMRegister src);

  // Move Double Quadword
  void movdq(XMMRegister dst, Register src);
  void movdq(Register dst, XMMRegister src);

  // Move Aligned Double Quadword
  void movdqa(XMMRegister dst, XMMRegister src);
  void movdqa(XMMRegister dst, Address src);

  // Move Unaligned Double Quadword
  void movdqu(Address     dst, XMMRegister src);
  void movdqu(XMMRegister dst, Address src);
  void movdqu(XMMRegister dst, XMMRegister src);

  // Move Unaligned 256bit Vector
  void vmovdqu(Address dst, XMMRegister src);
  void vmovdqu(XMMRegister dst, Address src);
  void vmovdqu(XMMRegister dst, XMMRegister src);

   // Move Unaligned 512bit Vector
  void evmovdqub(XMMRegister dst, XMMRegister src, int vector_len);
  void evmovdqub(XMMRegister dst, Address src, int vector_len);
  void evmovdqub(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
  void evmovdqub(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
  void evmovdqub(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len);

  void evmovdquw(XMMRegister dst, Address src, int vector_len);
  void evmovdquw(Address dst, XMMRegister src, int vector_len);
  void evmovdquw(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
  void evmovdquw(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
  void evmovdquw(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len);

  void evmovdqul(XMMRegister dst, XMMRegister src, int vector_len);
  void evmovdqul(XMMRegister dst, Address src, int vector_len);
  void evmovdqul(Address dst, XMMRegister src, int vector_len);

  void evmovdqul(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
  void evmovdqul(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
  void evmovdqul(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len);

  void evmovdquq(Address dst, XMMRegister src, int vector_len);
  void evmovdquq(XMMRegister dst, Address src, int vector_len);
  void evmovdquq(XMMRegister dst, XMMRegister src, int vector_len);

  void evmovdquq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
  void evmovdquq(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
  void evmovdquq(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len);

  // Move lower 64bit to high 64bit in 128bit register
  void movlhps(XMMRegister dst, XMMRegister src);

  void movl(Register dst, int32_t imm32);
  void movl(Address dst, int32_t imm32);
  void movl(Register dst, Register src);
  void movl(Register dst, Address src);
  void movl(Address dst, Register src);

#ifdef _LP64
  void movq(Register dst, Register src);
  void movq(Register dst, Address src);
  void movq(Address  dst, Register src);
  void movq(Address  dst, int32_t imm32);
  void movq(Register  dst, int32_t imm32);
#endif

  // Move Quadword
  void movq(Address     dst, XMMRegister src);
  void movq(XMMRegister dst, Address src);
  void movq(XMMRegister dst, XMMRegister src);
  void movq(Register dst, XMMRegister src);
  void movq(XMMRegister dst, Register src);

  void movsbl(Register dst, Address src);
  void movsbl(Register dst, Register src);

#ifdef _LP64
  void movsbq(Register dst, Address src);
  void movsbq(Register dst, Register src);

  // Move signed 32bit immediate to 64bit extending sign
  void movslq(Address  dst, int32_t imm64);
  void movslq(Register dst, int32_t imm64);

  void movslq(Register dst, Address src);
  void movslq(Register dst, Register src);
#endif

  void movswl(Register dst, Address src);
  void movswl(Register dst, Register src);

#ifdef _LP64
  void movswq(Register dst, Address src);
  void movswq(Register dst, Register src);
#endif

  void movups(XMMRegister dst, Address src);
  void vmovups(XMMRegister dst, Address src, int vector_len);
  void movups(Address dst, XMMRegister src);
  void vmovups(Address dst, XMMRegister src, int vector_len);

  void movw(Address dst, int imm16);
  void movw(Register dst, Address src);
  void movw(Address dst, Register src);

  void movzbl(Register dst, Address src);
  void movzbl(Register dst, Register src);

#ifdef _LP64
  void movzbq(Register dst, Address src);
  void movzbq(Register dst, Register src);
#endif

  void movzwl(Register dst, Address src);
  void movzwl(Register dst, Register src);

#ifdef _LP64
  void movzwq(Register dst, Address src);
  void movzwq(Register dst, Register src);
#endif

  // Unsigned multiply with RAX destination register
  void mull(Address src);
  void mull(Register src);

#ifdef _LP64
  void mulq(Address src);
  void mulq(Register src);
  void mulxq(Register dst1, Register dst2, Register src);
#endif

  // Multiply Scalar Double-Precision Floating-Point Values
  void mulsd(XMMRegister dst, Address src);
  void mulsd(XMMRegister dst, XMMRegister src);

  // Multiply Scalar Single-Precision Floating-Point Values
  void mulss(XMMRegister dst, Address src);
  void mulss(XMMRegister dst, XMMRegister src);

  void negl(Register dst);
  void negl(Address dst);

#ifdef _LP64
  void negq(Register dst);
  void negq(Address dst);
#endif

  void nop(int i = 1);

  void notl(Register dst);

#ifdef _LP64
  void notq(Register dst);

  void btsq(Address dst, int imm8);
  void btrq(Address dst, int imm8);
#endif

  void orw(Register dst, Register src);

  void orl(Address dst, int32_t imm32);
  void orl(Register dst, int32_t imm32);
  void orl(Register dst, Address src);
  void orl(Register dst, Register src);
  void orl(Address dst, Register src);

  void orb(Address dst, int imm8);
  void orb(Address dst, Register src);

  void orq(Address dst, int32_t imm32);
  void orq(Address dst, Register src);
  void orq(Register dst, int32_t imm32);
  void orq(Register dst, Address src);
  void orq(Register dst, Register src);

  // Pack with signed saturation
  void packsswb(XMMRegister dst, XMMRegister src);
  void vpacksswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
  void packssdw(XMMRegister dst, XMMRegister src);
  void vpackssdw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);

  // Pack with unsigned saturation
  void packuswb(XMMRegister dst, XMMRegister src);
  void packuswb(XMMRegister dst, Address src);
  void packusdw(XMMRegister dst, XMMRegister src);
  void vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
  void vpackusdw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);

  // Permutations
  void vpermq(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
  void vpermq(XMMRegister dst, XMMRegister src, int imm8);
  void vpermq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
  void vpermb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
  void vpermb(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
  void vpermw(XMMRegister dst,  XMMRegister nds, XMMRegister src, int vector_len);
  void vpermd(XMMRegister dst,  XMMRegister nds, Address src, int vector_len);
  void vpermd(XMMRegister dst,  XMMRegister nds, XMMRegister src, int vector_len);
  void vperm2i128(XMMRegister dst,  XMMRegister nds, XMMRegister src, int imm8);
  void vperm2f128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);
  void vpermilps(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
  void vpermilpd(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
  void vpermpd(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
  void evpermi2q(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
  void evpermt2b(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
  void evpmultishiftqb(XMMRegister dst, XMMRegister ctl, XMMRegister src, int vector_len);

  void pause();

  // Undefined Instruction
  void ud2();

  // SSE4.2 string instructions
  void pcmpestri(XMMRegister xmm1, XMMRegister xmm2, int imm8);
  void pcmpestri(XMMRegister xmm1, Address src, int imm8);

  void pcmpeqb(XMMRegister dst, XMMRegister src);
  void vpcmpCCbwd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, int vector_len);

  void vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
  void evpcmpeqb(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
  void evpcmpeqb(KRegister kdst, XMMRegister nds, Address src, int vector_len);
  void evpcmpeqb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len);

  void vpcmpgtb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
  void evpcmpgtb(KRegister kdst, XMMRegister nds, Address src, int vector_len);
  void evpcmpgtb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len);

  void evpcmpuw(KRegister kdst, XMMRegister nds, XMMRegister src, ComparisonPredicate vcc, int vector_len);
  void evpcmpuw(KRegister kdst, XMMRegister nds, Address src, ComparisonPredicate vcc, int vector_len);

  void pcmpeqw(XMMRegister dst, XMMRegister src);
  void vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
  void evpcmpeqw(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
  void evpcmpeqw(KRegister kdst, XMMRegister nds, Address src, int vector_len);

  void vpcmpgtw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);

  void pcmpeqd(XMMRegister dst, XMMRegister src);
  void vpcmpeqd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
  void evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, int vector_len);
  void evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len);

  void pcmpeqq(XMMRegister dst, XMMRegister src);
  void evpcmpeqq(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, int vector_len);
  void vpcmpCCq(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, int vector_len);
  void vpcmpeqq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
  void evpcmpeqq(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
  void evpcmpeqq(KRegister kdst, XMMRegister nds, Address src, int vector_len);

  void pcmpgtq(XMMRegister dst, XMMRegister src);
  void vpcmpgtq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);

  void pmovmskb(Register dst, XMMRegister src);
  void vpmovmskb(Register dst, XMMRegister src, int vec_enc);
  void vmovmskps(Register dst, XMMRegister src, int vec_enc);
  void vmovmskpd(Register dst, XMMRegister src, int vec_enc);
  void vpmaskmovd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
  void vpmaskmovq(XMMRegister dst, XMMRegister mask, Address src, int vector_len);

  void vmaskmovps(XMMRegister dst, Address src, XMMRegister mask, int vector_len);
  void vmaskmovpd(XMMRegister dst, Address src, XMMRegister mask, int vector_len);
  void vmaskmovps(Address dst, XMMRegister src, XMMRegister mask, int vector_len);
  void vmaskmovpd(Address dst, XMMRegister src, XMMRegister mask, int vector_len);

  // SSE 4.1 extract
  void pextrd(Register dst, XMMRegister src, int imm8);
  void pextrq(Register dst, XMMRegister src, int imm8);
  void pextrd(Address dst, XMMRegister src, int imm8);
  void pextrq(Address dst, XMMRegister src, int imm8);
  void pextrb(Register dst, XMMRegister src, int imm8);
  void pextrb(Address dst, XMMRegister src, int imm8);
  // SSE 2 extract
  void pextrw(Register dst, XMMRegister src, int imm8);
  void pextrw(Address dst, XMMRegister src, int imm8);

  // SSE 4.1 insert
  void pinsrd(XMMRegister dst, Register src, int imm8);
  void pinsrq(XMMRegister dst, Register src, int imm8);
  void pinsrb(XMMRegister dst, Register src, int imm8);
  void pinsrd(XMMRegister dst, Address src, int imm8);
  void pinsrq(XMMRegister dst, Address src, int imm8);
  void pinsrb(XMMRegister dst, Address src, int imm8);
  void insertps(XMMRegister dst, XMMRegister src, int imm8);
  // SSE 2 insert
  void pinsrw(XMMRegister dst, Register src, int imm8);
  void pinsrw(XMMRegister dst, Address src, int imm8);

  // AVX insert
  void vpinsrd(XMMRegister dst, XMMRegister nds, Register src, int imm8);
  void vpinsrb(XMMRegister dst, XMMRegister nds, Register src, int imm8);
  void vpinsrq(XMMRegister dst, XMMRegister nds, Register src, int imm8);
  void vpinsrw(XMMRegister dst, XMMRegister nds, Register src, int imm8);
  void vinsertps(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);

  // Zero extend moves
  void pmovzxbw(XMMRegister dst, XMMRegister src);
  void pmovzxbw(XMMRegister dst, Address src);
  void pmovzxbd(XMMRegister dst, XMMRegister src);
  void vpmovzxbw(XMMRegister dst, Address src, int vector_len);
  void vpmovzxbw(XMMRegister dst, XMMRegister src, int vector_len);
  void vpmovzxbd(XMMRegister dst, XMMRegister src, int vector_len);
  void vpmovzxbq(XMMRegister dst, XMMRegister src, int vector_len);
  void vpmovzxwd(XMMRegister dst, XMMRegister src, int vector_len);
  void vpmovzxwq(XMMRegister dst, XMMRegister src, int vector_len);
  void pmovzxdq(XMMRegister dst, XMMRegister src);
  void vpmovzxdq(XMMRegister dst, XMMRegister src, int vector_len);
  void evpmovzxbw(XMMRegister dst, KRegister mask, Address src, int vector_len);

  // Sign extend moves
  void pmovsxbd(XMMRegister dst, XMMRegister src);
  void pmovsxbq(XMMRegister dst, XMMRegister src);
  void pmovsxbw(XMMRegister dst, XMMRegister src);
  void pmovsxwd(XMMRegister dst, XMMRegister src);
  void vpmovsxbd(XMMRegister dst, XMMRegister src, int vector_len);
  void vpmovsxbq(XMMRegister dst, XMMRegister src, int vector_len);
  void vpmovsxbw(XMMRegister dst, XMMRegister src, int vector_len);
  void vpmovsxwd(XMMRegister dst, XMMRegister src, int vector_len);
  void vpmovsxwq(XMMRegister dst, XMMRegister src, int vector_len);
  void vpmovsxdq(XMMRegister dst, XMMRegister src, int vector_len);

  void evpmovwb(Address dst, XMMRegister src, int vector_len);
  void evpmovwb(Address dst, KRegister mask, XMMRegister src, int vector_len);
  void evpmovdb(Address dst, XMMRegister src, int vector_len);

  // Multiply add
  void pmaddwd(XMMRegister dst, XMMRegister src);
  void vpmaddwd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
  void vpmaddubsw(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
  void evpmadd52luq(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
  void evpmadd52luq(XMMRegister dst, KRegister mask, XMMRegister src1, XMMRegister src2, bool merge, int vector_len);
  void evpmadd52huq(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
  void evpmadd52huq(XMMRegister dst, KRegister mask, XMMRegister src1, XMMRegister src2, bool merge, int vector_len);

  // Multiply add accumulate
  void evpdpwssd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);

#ifndef _LP64 // no 32bit push/pop on amd64
  void popl(Address dst);
#endif

#ifdef _LP64
  void popq(Address dst);
  void popq(Register dst);
#endif

  void popcntl(Register dst, Address src);
  void popcntl(Register dst, Register src);

  void evpopcntb(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
  void evpopcntw(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
--> --------------------

--> maximum size reached

--> --------------------

¤ Dauer der Verarbeitung: 0.60 Sekunden (vorverarbeitet) ¤

Download des Quellennavigators
Download des sprechenden Kalenders
Eigene Datei ansehen

Haftungshinweis

Die Informationen auf dieser Webseite wurden nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit, noch Qualität der bereit gestellten Informationen zugesichert.

Bemerkung:

Die farbliche Syntaxdarstellung ist noch experimentell.