/* * Copyright (c) 2002, 2022, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012, 2022 SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions. *
*/
// Address is an abstraction used to represent a memory location // as used in assembler instructions. // PPC instructions grok either baseReg + indexReg or baseReg + disp. class Address { private: Register _base; // Base register. Register _index; // Index register.
intptr_t _disp; // Displacement.
public:
Address(Register b, Register i, address d = 0)
: _base(b), _index(i), _disp((intptr_t)d) {
assert(i == noreg || d == 0, "can't have both");
}
// Argument is an abstraction used to represent an outgoing // actual argument or an incoming formal parameter, whether // it resides in memory or in a register, in a manner consistent // with the PPC Application Binary Interface, or ABI. This is // often referred to as the native or C calling convention.
class Argument { private: int _number; // The number of the argument. public: enum { // Only 8 registers may contain integer parameters.
n_register_parameters = 8, // Can have up to 8 floating registers.
n_float_register_parameters = 8,
// PPC C calling conventions. // The first eight arguments are passed in int regs if they are int.
n_int_register_parameters_c = 8, // The first thirteen float arguments are passed in float regs.
n_float_register_parameters_c = 13, // Only the first 8 parameters are not placed on the stack. Aix disassembly // shows that xlC places all float args after argument 8 on the stack AND // in a register. This is not documented, but we follow this convention, too.
n_regs_not_on_stack_c = 8,
// The PPC Assembler: Pure assembler doing NO optimizations on the // instruction level; i.e., what you write is what you get. The // Assembler is generating code into a CodeBuffer.
class Assembler : public AbstractAssembler { protected: // Displacement routines staticint patched_branch(int dest_pos, int inst, int inst_pos); staticint branch_destination(int inst, int pos);
friendclass AbstractAssembler;
// Code patchers need various routines like inv_wdisp() friendclass NativeInstruction; friendclass NativeGeneralJump; friendclass Relocation;
public:
enum shifts {
XO_21_29_SHIFT = 2,
XO_21_30_SHIFT = 1,
XO_27_29_SHIFT = 2,
XO_30_31_SHIFT = 0,
SPR_5_9_SHIFT = 11u, // SPR_5_9 field in bits 11 -- 15
SPR_0_4_SHIFT = 16u, // SPR_0_4 field in bits 16 -- 20
RS_SHIFT = 21u, // RS field in bits 21 -- 25
OPCODE_SHIFT = 26u, // opcode in bits 26 -- 31
// Shift counts in prefix word
PRE_TYPE_SHIFT = 24u, // Prefix type in bits 24 -- 25
PRE_ST1_SHIFT = 23u, // ST1 field in bits 23 -- 23
PRE_R_SHIFT = 20u, // R-bit in bits 20 -- 20
PRE_ST4_SHIFT = 20u, // ST4 field in bits 23 -- 20
};
//---< calculate length of instruction >--- // With PPC64 being a RISC architecture, this always is BytesPerInstWord // instruction must start at passed address staticunsignedint instr_len(unsignedchar *instr) { return BytesPerInstWord; }
// Test if x is within signed immediate range for nbits. staticbool is_simm(int x, unsignedint nbits) {
assert(0 < nbits && nbits < 32, "out of bounds"); constint min = -(((int)1) << nbits-1); constint maxplus1 = (((int)1) << nbits-1); return min <= x && x < maxplus1;
}
staticbool is_simm(jlong x, unsignedint nbits) {
assert(0 < nbits && nbits < 64, "out of bounds"); const jlong min = -(((jlong)1) << nbits-1); const jlong maxplus1 = (((jlong)1) << nbits-1); return min <= x && x < maxplus1;
}
// Test if x is within unsigned immediate range for nbits. staticbool is_uimm(int x, unsignedint nbits) {
assert(0 < nbits && nbits < 32, "out of bounds"); constunsignedint maxplus1 = (((unsignedint)1) << nbits); return (unsignedint)x < maxplus1;
}
// X is supposed to fit in a field "nbits" wide // and be sign-extended. Check the range. staticvoid assert_signed_range(intptr_t x, int nbits) {
assert(nbits == 32 || (-(1 << nbits-1) <= x && x < (1 << nbits-1)), "value out of range");
}
staticvoid assert_signed_word_disp_range(intptr_t x, int nbits) {
assert((x & 3) == 0, "not word aligned");
assert_signed_range(x, nbits + 2);
}
staticvoid assert_unsigned_const(int x, int nbits) {
assert(juint(x) < juint(1 << nbits), "unsigned constant out of range");
}
// inverse of u_field staticint inv_u_field(int x, int hi_bit, int lo_bit) {
juint r = juint(x) >> lo_bit;
r &= fmask(hi_bit, lo_bit); returnint(r);
}
// signed version: extract from field and sign-extend staticint inv_s_field_ppc(int x, int hi_bit, int lo_bit) {
x = x << (31-hi_bit);
x = x >> (31-hi_bit+lo_bit); return x;
}
staticint u_field(int x, int hi_bit, int lo_bit) {
assert((x & ~fmask(hi_bit, lo_bit)) == 0, "value out of range"); int r = x << lo_bit;
assert(inv_u_field(r, hi_bit, lo_bit) == x, "just checking"); return r;
}
// Same as u_field for signed values staticint s_field(int x, int hi_bit, int lo_bit) { int nbits = hi_bit - lo_bit + 1;
assert(nbits == 32 || (-(1 << nbits-1) <= x && x < (1 << nbits-1)), "value out of range");
x &= fmask(hi_bit, lo_bit); int r = x << lo_bit; return r;
}
//static int xo1( int x) { return opp_u_field(x, 29, 21); }// is contained in our opcodes //static int xo2( int x) { return opp_u_field(x, 30, 21); }// is contained in our opcodes //static int xo3( int x) { return opp_u_field(x, 30, 22); }// is contained in our opcodes //static int xo4( int x) { return opp_u_field(x, 30, 26); }// is contained in our opcodes //static int xo5( int x) { return opp_u_field(x, 29, 27); }// is contained in our opcodes //static int xo6( int x) { return opp_u_field(x, 30, 27); }// is contained in our opcodes //static int xo7( int x) { return opp_u_field(x, 31, 30); }// is contained in our opcodes
protected: // Compute relative address for branch. static intptr_t disp(intptr_t x, intptr_t off) { int xx = x - off;
xx = xx >> 2; return xx;
}
public: // signed immediate, in low bits, nbits long staticint simm(int x, int nbits) {
assert_signed_range(x, nbits); return x & ((1 << nbits) - 1);
}
// unsigned immediate, in low bits, nbits long staticint uimm(int x, int nbits) {
assert_unsigned_const(x, nbits); return x & ((1 << nbits) - 1);
}
staticvoid set_imm(int* instr, short s) { // imm is always in the lower 16 bits of the instruction, // so this is endian-neutral. Same for the get_imm below.
uint32_t w = *(uint32_t *)instr;
*instr = (int)((w & ~0x0000FFFF) | (s & 0x0000FFFF));
}
staticint get_imm(address a, int instruction_number) { return (short)((int *)a)[instruction_number];
}
staticinlineint hi16_signed( int x) { return (int)(int16_t)(x >> 16); } staticinlineint lo16_unsigned(int x) { return x & 0xffff; }
protected:
// Extract the top 32 bits in a 64 bit word. static int32_t hi32(int64_t x) {
int32_t r = int32_t((uint64_t)x >> 32); return r;
}
public:
staticinlineunsignedint align_addr(unsignedint addr, unsignedint a) { return ((addr + (a - 1)) & ~(a - 1));
}
#if !defined(ABI_ELFv2) // Emit a function descriptor with the specified entry point, TOC, // and ENV. If the entry point is NULL, the descriptor will point // just past the descriptor. // Use values from friend functions as defaults. inline address emit_fd(address entry = NULL,
address toc = (address) FunctionDescriptor::friend_toc,
address env = (address) FunctionDescriptor::friend_env); #endif
// Memory instructions use r0 as hard coded 0, e.g. to simulate loading // immediates. The normal instruction encoders enforce that r0 is not // passed to them. Use either extended mnemonics encoders or the special ra0 // versions.
// Issue an illegal instruction. inlinevoid illtrap(); staticinlinebool is_illtrap(address instr_addr);
// PPC 1, section 3.3.8, Fixed-Point Arithmetic Instructions inlinevoid addi( Register d, Register a, int si16); inlinevoid addis(Register d, Register a, int si16);
// Prefixed add immediate, introduced by POWER10 inlinevoid paddi(Register d, Register a, long si34, bool r); inlinevoid pli( Register d, long si34);
private: inlinevoid addi_r0ok( Register d, Register a, int si16); inlinevoid addis_r0ok(Register d, Register a, int si16); inlinevoid paddi_r0ok(Register d, Register a, long si34, bool r); public: inlinevoid addic_( Register d, Register a, int si16); inlinevoid subfic( Register d, Register a, int si16); inlinevoid add( Register d, Register a, Register b); inlinevoid add_( Register d, Register a, Register b); inlinevoid subf( Register d, Register a, Register b); // d = b - a "Sub_from", as in ppc spec. inlinevoid sub( Register d, Register a, Register b); // d = a - b Swap operands of subf for readability. inlinevoid subf_( Register d, Register a, Register b); inlinevoid addc( Register d, Register a, Register b); inlinevoid addc_( Register d, Register a, Register b); inlinevoid subfc( Register d, Register a, Register b); inlinevoid subfc_( Register d, Register a, Register b); inlinevoid adde( Register d, Register a, Register b); inlinevoid adde_( Register d, Register a, Register b); inlinevoid subfe( Register d, Register a, Register b); inlinevoid subfe_( Register d, Register a, Register b); inlinevoid addme( Register d, Register a); inlinevoid addme_( Register d, Register a); inlinevoid subfme( Register d, Register a); inlinevoid subfme_(Register d, Register a); inlinevoid addze( Register d, Register a); inlinevoid addze_( Register d, Register a); inlinevoid subfze( Register d, Register a); inlinevoid subfze_(Register d, Register a); inlinevoid neg( Register d, Register a); inlinevoid neg_( Register d, Register a); inlinevoid mulli( Register d, Register a, int si16); inlinevoid mulld( Register d, Register a, Register b); inlinevoid mulld_( Register d, Register a, Register b); inlinevoid mullw( Register d, Register a, Register b); inlinevoid mullw_( Register d, Register a, Register b); inlinevoid mulhw( Register d, Register a, Register b); inlinevoid mulhw_( Register d, Register a, Register b); inlinevoid mulhwu( Register d, Register a, Register b); inlinevoid mulhwu_(Register d, Register a, Register b); inlinevoid mulhd( Register d, Register a, Register b); inlinevoid mulhd_( Register d, Register a, Register b); inlinevoid mulhdu( Register d, Register a, Register b); inlinevoid mulhdu_(Register d, Register a, Register b); inlinevoid divd( Register d, Register a, Register b); inlinevoid divd_( Register d, Register a, Register b); inlinevoid divw( Register d, Register a, Register b); inlinevoid divw_( Register d, Register a, Register b); inlinevoid divdu( Register d, Register a, Register b); inlinevoid divdu_( Register d, Register a, Register b); inlinevoid divwu( Register d, Register a, Register b); inlinevoid divwu_( Register d, Register a, Register b);
private: // PPC 1, section 3.3.9, Fixed-Point Compare Instructions inlinevoid cmpi( ConditionRegister bf, int l, Register a, int si16); inlinevoid cmp( ConditionRegister bf, int l, Register a, Register b); inlinevoid cmpli(ConditionRegister bf, int l, Register a, int ui16); inlinevoid cmpl( ConditionRegister bf, int l, Register a, Register b);
public: // extended mnemonics of Compare Instructions inlinevoid cmpwi( ConditionRegister crx, Register a, int si16); inlinevoid cmpdi( ConditionRegister crx, Register a, int si16); inlinevoid cmpw( ConditionRegister crx, Register a, Register b); inlinevoid cmpd( ConditionRegister crx, Register a, Register b); inlinevoid cmplwi(ConditionRegister crx, Register a, int ui16); inlinevoid cmpldi(ConditionRegister crx, Register a, int ui16); inlinevoid cmplw( ConditionRegister crx, Register a, Register b); inlinevoid cmpld( ConditionRegister crx, Register a, Register b);
// >= Power9 inlinevoid cmprb( ConditionRegister bf, int l, Register a, Register b); inlinevoid cmpeqb(ConditionRegister bf, Register a, Register b);
inlinevoid isel( Register d, Register a, Register b, int bc); // Convenient version which takes: Condition register, Condition code and invert flag. Omit b to keep old value. inlinevoid isel( Register d, ConditionRegister cr, Condition cc, bool inv, Register a, Register b = noreg); // Set d = 0 if (cr.cc) equals 1, otherwise b. inlinevoid isel_0( Register d, ConditionRegister cr, Condition cc, Register b = noreg);
// PPC 1, section 3.3.11, Fixed-Point Logical Instructions void andi( Register a, Register s, long ui16); // optimized version inlinevoid andi_( Register a, Register s, int ui16); inlinevoid andis_( Register a, Register s, int ui16); inlinevoid ori( Register a, Register s, int ui16); inlinevoid oris( Register a, Register s, int ui16); inlinevoid xori( Register a, Register s, int ui16); inlinevoid xoris( Register a, Register s, int ui16); inlinevoid andr( Register a, Register s, Register b); // suffixed by 'r' as 'and' is C++ keyword inlinevoid and_( Register a, Register s, Register b); // Turn or0(rx,rx,rx) into a nop and avoid that we accidentally emit a // SMT-priority change instruction (see SMT instructions below). inlinevoid or_unchecked(Register a, Register s, Register b); inlinevoid orr( Register a, Register s, Register b); // suffixed by 'r' as 'or' is C++ keyword inlinevoid or_( Register a, Register s, Register b); inlinevoid xorr( Register a, Register s, Register b); // suffixed by 'r' as 'xor' is C++ keyword inlinevoid xor_( Register a, Register s, Register b); inlinevoid nand( Register a, Register s, Register b); inlinevoid nand_( Register a, Register s, Register b); inlinevoid nor( Register a, Register s, Register b); inlinevoid nor_( Register a, Register s, Register b); inlinevoid andc( Register a, Register s, Register b); inlinevoid andc_( Register a, Register s, Register b); inlinevoid orc( Register a, Register s, Register b); inlinevoid orc_( Register a, Register s, Register b); inlinevoid extsb( Register a, Register s); inlinevoid extsb_( Register a, Register s); inlinevoid extsh( Register a, Register s); inlinevoid extsh_( Register a, Register s); inlinevoid extsw( Register a, Register s); inlinevoid extsw_( Register a, Register s);
// extended mnemonics inlinevoid nop(); // NOP for FP and BR units (different versions to allow them to be in one group) inlinevoid fpnop0(); inlinevoid fpnop1(); inlinevoid brnop0(); inlinevoid brnop1(); inlinevoid brnop2();
inlinevoid mr( Register d, Register s); inlinevoid ori_opt( Register d, int ui16); inlinevoid oris_opt(Register d, int ui16);
// endgroup opcode for Power6 inlinevoid endgroup();
// count instructions inlinevoid cntlzw( Register a, Register s); inlinevoid cntlzw_( Register a, Register s); inlinevoid cntlzd( Register a, Register s); inlinevoid cntlzd_( Register a, Register s); inlinevoid cnttzw( Register a, Register s); inlinevoid cnttzw_( Register a, Register s); inlinevoid cnttzd( Register a, Register s); inlinevoid cnttzd_( Register a, Register s);
// PPC 1, section 3.3.12, Fixed-Point Rotate and Shift Instructions inlinevoid sld( Register a, Register s, Register b); inlinevoid sld_( Register a, Register s, Register b); inlinevoid slw( Register a, Register s, Register b); inlinevoid slw_( Register a, Register s, Register b); inlinevoid srd( Register a, Register s, Register b); inlinevoid srd_( Register a, Register s, Register b); inlinevoid srw( Register a, Register s, Register b); inlinevoid srw_( Register a, Register s, Register b); inlinevoid srad( Register a, Register s, Register b); inlinevoid srad_( Register a, Register s, Register b); inlinevoid sraw( Register a, Register s, Register b); inlinevoid sraw_( Register a, Register s, Register b); inlinevoid sradi( Register a, Register s, int sh6); inlinevoid sradi_( Register a, Register s, int sh6); inlinevoid srawi( Register a, Register s, int sh5); inlinevoid srawi_( Register a, Register s, int sh5);
// extended mnemonics for Shift Instructions inlinevoid sldi( Register a, Register s, int sh6); inlinevoid sldi_( Register a, Register s, int sh6); inlinevoid slwi( Register a, Register s, int sh5); inlinevoid slwi_( Register a, Register s, int sh5); inlinevoid srdi( Register a, Register s, int sh6); inlinevoid srdi_( Register a, Register s, int sh6); inlinevoid srwi( Register a, Register s, int sh5); inlinevoid srwi_( Register a, Register s, int sh5);
inlinevoid clrrdi( Register a, Register s, int ui6); inlinevoid clrrdi_( Register a, Register s, int ui6); inlinevoid clrldi( Register a, Register s, int ui6); inlinevoid clrldi_( Register a, Register s, int ui6); inlinevoid clrlsldi(Register a, Register s, int clrl6, int shl6); inlinevoid clrlsldi_(Register a, Register s, int clrl6, int shl6); inlinevoid extrdi( Register a, Register s, int n, int b); // testbit with condition register inlinevoid testbitdi(ConditionRegister cr, Register a, Register s, int ui6);
// Byte reverse instructions (introduced with Power10) inlinevoid brh( Register a, Register s); inlinevoid brw( Register a, Register s); inlinevoid brd( Register a, Register s);
// rotate instructions inlinevoid rotldi( Register a, Register s, int n); inlinevoid rotrdi( Register a, Register s, int n); inlinevoid rotlwi( Register a, Register s, int n); inlinevoid rotrwi( Register a, Register s, int n);
// Rotate Instructions inlinevoid rldic( Register a, Register s, int sh6, int mb6); inlinevoid rldic_( Register a, Register s, int sh6, int mb6); inlinevoid rldicr( Register a, Register s, int sh6, int mb6); inlinevoid rldicr_( Register a, Register s, int sh6, int mb6); inlinevoid rldicl( Register a, Register s, int sh6, int mb6); inlinevoid rldicl_( Register a, Register s, int sh6, int mb6); inlinevoid rlwinm( Register a, Register s, int sh5, int mb5, int me5); inlinevoid rlwinm_( Register a, Register s, int sh5, int mb5, int me5); inlinevoid rldimi( Register a, Register s, int sh6, int mb6); inlinevoid rldimi_( Register a, Register s, int sh6, int mb6); inlinevoid rlwimi( Register a, Register s, int sh5, int mb5, int me5); inlinevoid insrdi( Register a, Register s, int n, int b); inlinevoid insrwi( Register a, Register s, int n, int b);
// For convenience. Load pointer into d from b+s1. inlinevoid ld_ptr(Register d, int b, Register s1); inlinevoid ld_ptr(Register d, ByteSize b, Register s1);
// Special purpose registers // Exception Register inlinevoid mtxer(Register s1); inlinevoid mfxer(Register d); // Vector Register Save Register inlinevoid mtvrsave(Register s1); inlinevoid mfvrsave(Register d); // Timebase inlinevoid mftb(Register d); // Introduced with Power 8: // Data Stream Control Register inlinevoid mtdscr(Register s1); inlinevoid mfdscr(Register d ); // Transactional Memory Registers inlinevoid mftfhar(Register d); inlinevoid mftfiar(Register d); inlinevoid mftexasr(Register d); inlinevoid mftexasru(Register d);
// TEXASR bit description enum transaction_failure_reason { // Upper half (TEXASRU):
tm_failure_code = 0, // The Failure Code is copied from tabort or treclaim operand.
tm_failure_persistent = 7, // The failure is likely to recur on each execution.
tm_disallowed = 8, // The instruction is not permitted.
tm_nesting_of = 9, // The maximum transaction level was exceeded.
tm_footprint_of = 10, // The tracking limit for transactional storage accesses was exceeded.
tm_self_induced_cf = 11, // A self-induced conflict occurred in Suspended state.
tm_non_trans_cf = 12, // A conflict occurred with a non-transactional access by another processor.
tm_trans_cf = 13, // A conflict occurred with another transaction.
tm_translation_cf = 14, // A conflict occurred with a TLB invalidation.
tm_inst_fetch_cf = 16, // An instruction fetch was performed from a block that was previously written transactionally.
tm_tabort = 31, // Termination was caused by the execution of an abort instruction. // Lower half:
tm_suspended = 32, // Failure was recorded in Suspended state.
tm_failure_summary = 36, // Failure has been detected and recorded.
tm_tfiar_exact = 37, // Value in the TFIAR is exact.
tm_rot = 38, // Rollback-only transaction.
tm_transaction_level = 52, // Transaction level (nesting depth + 1).
};
// PPC 1, section 2.4.1 Branch Instructions inlinevoid b( address a, relocInfo::relocType rt = relocInfo::none); inlinevoid b( Label& L); inlinevoid bl( address a, relocInfo::relocType rt = relocInfo::none); inlinevoid bl( Label& L); inlinevoid bc( int boint, int biint, address a, relocInfo::relocType rt = relocInfo::none); inlinevoid bc( int boint, int biint, Label& L); inlinevoid bcl(int boint, int biint, address a, relocInfo::relocType rt = relocInfo::none); inlinevoid bcl(int boint, int biint, Label& L);
inlinevoid bclr( int boint, int biint, int bhint, relocInfo::relocType rt = relocInfo::none); inlinevoid bclrl( int boint, int biint, int bhint, relocInfo::relocType rt = relocInfo::none); inlinevoid bcctr( int boint, int biint, int bhint = bhintbhBCCTRisNotReturnButSame,
relocInfo::relocType rt = relocInfo::none); inlinevoid bcctrl(int boint, int biint, int bhint = bhintbhBCLRisReturn,
relocInfo::relocType rt = relocInfo::none);
// helper function for b, bcxx inlinebool is_within_range_of_b(address a, address pc); inlinebool is_within_range_of_bcxx(address a, address pc);
// get the destination of a bxx branch (b, bl, ba, bla) staticinline address bxx_destination(address baddr); staticinline address bxx_destination(int instr, address pc); staticinline intptr_t bxx_destination_offset(int instr, intptr_t bxx_pos);
// extended mnemonics for Branch Instructions via LR // We use `blr' for returns. inlinevoid blr(relocInfo::relocType rt = relocInfo::none);
// extended mnemonics for Branch Instructions with CTR // bdnz means `decrement CTR and jump to L if CTR is not zero' inlinevoid bdnz(Label& L); // Decrement and branch if result is zero. inlinevoid bdz(Label& L); // we use `bctr[l]' for jumps/calls in function descriptor glue // code, e.g. calls to runtime functions inlinevoid bctr( relocInfo::relocType rt = relocInfo::none); inlinevoid bctrl(relocInfo::relocType rt = relocInfo::none); // conditional jumps/branches via CTR inlinevoid beqctr( ConditionRegister crx, relocInfo::relocType rt = relocInfo::none); inlinevoid beqctrl(ConditionRegister crx, relocInfo::relocType rt = relocInfo::none); inlinevoid bnectr( ConditionRegister crx, relocInfo::relocType rt = relocInfo::none); inlinevoid bnectrl(ConditionRegister crx, relocInfo::relocType rt = relocInfo::none);
// condition register logic instructions // NOTE: There's a preferred form: d and s2 should point into the same condition register. inlinevoid crand( int d, int s1, int s2); inlinevoid crnand(int d, int s1, int s2); inlinevoid cror( int d, int s1, int s2); inlinevoid crxor( int d, int s1, int s2); inlinevoid crnor( int d, int s1, int s2); inlinevoid creqv( int d, int s1, int s2); inlinevoid crandc(int d, int s1, int s2); inlinevoid crorc( int d, int s1, int s2);
// machine barrier instructions: // // - sync two-way memory barrier, aka fence // - lwsync orders Store|Store, // Load|Store, // Load|Load, // but not Store|Load // - eieio orders memory accesses for device memory (only) // - isync invalidates speculatively executed instructions // From the Power ISA 2.06 documentation: // "[...] an isync instruction prevents the execution of // instructions following the isync until instructions // preceding the isync have completed, [...]" // From IBM's AIX assembler reference: // "The isync [...] instructions causes the processor to // refetch any instructions that might have been fetched // prior to the isync instruction. The instruction isync // causes the processor to wait for all previous instructions // to complete. Then any instructions already fetched are // discarded and instruction processing continues in the // environment established by the previous instructions." // // semantic barrier instructions: // (as defined in orderAccess.hpp) // // - release orders Store|Store, (maps to lwsync) // Load|Store // - acquire orders Load|Store, (maps to lwsync) // Load|Load // - fence orders Store|Store, (maps to sync) // Load|Store, // Load|Load, // Store|Load // private: inlinevoid sync(int l); public: inlinevoid sync(); inlinevoid lwsync(); inlinevoid ptesync(); inlinevoid eieio(); inlinevoid isync(); inlinevoid elemental_membar(int e); // Elemental Memory Barriers (>=Power 8)
// Wait instructions for polling. Attention: May result in SIGILL. inlinevoid wait(); inlinevoid waitrsv(); // >=Power7
// atomics inlinevoid lbarx_unchecked(Register d, Register a, Register b, int eh1 = 0); // >=Power 8 inlinevoid lharx_unchecked(Register d, Register a, Register b, int eh1 = 0); // >=Power 8 inlinevoid lwarx_unchecked(Register d, Register a, Register b, int eh1 = 0); inlinevoid ldarx_unchecked(Register d, Register a, Register b, int eh1 = 0); inlinevoid lqarx_unchecked(Register d, Register a, Register b, int eh1 = 0); // >=Power 8 inlinebool lxarx_hint_exclusive_access(); inlinevoid lbarx( Register d, Register a, Register b, bool hint_exclusive_access = false); inlinevoid lharx( Register d, Register a, Register b, bool hint_exclusive_access = false); inlinevoid lwarx( Register d, Register a, Register b, bool hint_exclusive_access = false); inlinevoid ldarx( Register d, Register a, Register b, bool hint_exclusive_access = false); inlinevoid lqarx( Register d, Register a, Register b, bool hint_exclusive_access = false); inlinevoid stbcx_( Register s, Register a, Register b); inlinevoid sthcx_( Register s, Register a, Register b); inlinevoid stwcx_( Register s, Register a, Register b); inlinevoid stdcx_( Register s, Register a, Register b); inlinevoid stqcx_( Register s, Register a, Register b);
// Instructions for adjusting thread priority for simultaneous // multithreading (SMT) on Power5. private: inlinevoid smt_prio_very_low(); inlinevoid smt_prio_medium_high(); inlinevoid smt_prio_high();
// trap instructions inlinevoid twi_0(Register a); // for load with acquire semantics use load+twi_0+isync (trap can't occur) // NOT FOR DIRECT USE!! protected: inlinevoid tdi_unchecked(int tobits, Register a, int si16); inlinevoid twi_unchecked(int tobits, Register a, int si16); inlinevoid tdi( int tobits, Register a, int si16); // asserts UseSIGTRAP inlinevoid twi( int tobits, Register a, int si16); // asserts UseSIGTRAP inlinevoid td( int tobits, Register a, Register b); // asserts UseSIGTRAP inlinevoid tw( int tobits, Register a, Register b); // asserts UseSIGTRAP
public: staticbool is_tdi(int x, int tobits, int ra, int si16) { return (TDI_OPCODE == (x & TDI_OPCODE_MASK))
&& (tobits == inv_to_field(x))
&& (ra == -1/*any reg*/ || ra == inv_ra_field(x))
&& (si16 == inv_si_field(x));
}
staticint tdi_get_si16(int x, int tobits, int ra) { if (TDI_OPCODE == (x & TDI_OPCODE_MASK)
&& (tobits == inv_to_field(x))
&& (ra == -1/*any reg*/ || ra == inv_ra_field(x))) { return inv_si_field(x);
} return -1; // No valid tdi instruction.
}
staticbool is_twi(int x, int tobits, int ra, int si16) { return (TWI_OPCODE == (x & TWI_OPCODE_MASK))
&& (tobits == inv_to_field(x))
&& (ra == -1/*any reg*/ || ra == inv_ra_field(x))
&& (si16 == inv_si_field(x));
}
staticbool is_twi(int x, int tobits, int ra) { return (TWI_OPCODE == (x & TWI_OPCODE_MASK))
&& (tobits == inv_to_field(x))
&& (ra == -1/*any reg*/ || ra == inv_ra_field(x));
}
staticbool is_td(int x, int tobits, int ra, int rb) { return (TD_OPCODE == (x & TD_OPCODE_MASK))
&& (tobits == inv_to_field(x))
&& (ra == -1/*any reg*/ || ra == inv_ra_field(x))
&& (rb == -1/*any reg*/ || rb == inv_rb_field(x));
}
staticbool is_tw(int x, int tobits, int ra, int rb) { return (TW_OPCODE == (x & TW_OPCODE_MASK))
&& (tobits == inv_to_field(x))
&& (ra == -1/*any reg*/ || ra == inv_ra_field(x))
&& (rb == -1/*any reg*/ || rb == inv_rb_field(x));
}
// PPC floating point instructions // PPC 1, section 4.6.2 Floating-Point Load Instructions inlinevoid lfs( FloatRegister d, int si16, Register a); inlinevoid lfsu( FloatRegister d, int si16, Register a); inlinevoid lfsx( FloatRegister d, Register a, Register b); inlinevoid lfd( FloatRegister d, int si16, Register a); inlinevoid lfdu( FloatRegister d, int si16, Register a); inlinevoid lfdx( FloatRegister d, Register a, Register b);
// PPC 1, section 4.6.3 Floating-Point Store Instructions inlinevoid stfs( FloatRegister s, int si16, Register a); inlinevoid stfsu( FloatRegister s, int si16, Register a); inlinevoid stfsx( FloatRegister s, Register a, Register b); inlinevoid stfd( FloatRegister s, int si16, Register a); inlinevoid stfdu( FloatRegister s, int si16, Register a); inlinevoid stfdx( FloatRegister s, Register a, Register b);
// Deliver A Random Number (introduced with POWER9) inlinevoid darn( Register d, int l = 1 /*L=CRN*/);
// AES (introduced with Power 8) inlinevoid vcipher( VectorRegister d, VectorRegister a, VectorRegister b); inlinevoid vcipherlast( VectorRegister d, VectorRegister a, VectorRegister b); inlinevoid vncipher( VectorRegister d, VectorRegister a, VectorRegister b); inlinevoid vncipherlast(VectorRegister d, VectorRegister a, VectorRegister b); inlinevoid vsbox( VectorRegister d, VectorRegister a);
// SHA (introduced with Power 8) inlinevoid vshasigmad(VectorRegister d, VectorRegister a, bool st, int six); inlinevoid vshasigmaw(VectorRegister d, VectorRegister a, bool st, int six);
// Vector Binary Polynomial Multiplication (introduced with Power 8) inlinevoid vpmsumb( VectorRegister d, VectorRegister a, VectorRegister b); inlinevoid vpmsumd( VectorRegister d, VectorRegister a, VectorRegister b); inlinevoid vpmsumh( VectorRegister d, VectorRegister a, VectorRegister b); inlinevoid vpmsumw( VectorRegister d, VectorRegister a, VectorRegister b);
// Vector Permute and Xor (introduced with Power 8) inlinevoid vpermxor( VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c);
// Transactional Memory instructions (introduced with Power 8) inlinevoid tbegin_(); // R=0 inlinevoid tbeginrot_(); // R=1 Rollback-Only Transaction inlinevoid tend_(); // A=0 inlinevoid tendall_(); // A=1 inlinevoid tabort_(); inlinevoid tabort_(Register a); inlinevoid tabortwc_(int t, Register a, Register b); inlinevoid tabortwci_(int t, Register a, int si); inlinevoid tabortdc_(int t, Register a, Register b); inlinevoid tabortdci_(int t, Register a, int si); inlinevoid tsuspend_(); // tsr with L=0 inlinevoid tresume_(); // tsr with L=1 inlinevoid tcheck(int f);
// RegisterOrConstant versions. // These emitters choose between the versions using two registers and // those with register and immediate, depending on the content of roc. // If the constant is not encodable as immediate, instructions to // load the constant are emitted beforehand. Store instructions need a // tmp reg if the constant is not encodable as immediate. // Size unpredictable. void ld( Register d, RegisterOrConstant roc, Register s1 = noreg); void lwa( Register d, RegisterOrConstant roc, Register s1 = noreg); void lwz( Register d, RegisterOrConstant roc, Register s1 = noreg); void lha( Register d, RegisterOrConstant roc, Register s1 = noreg); void lhz( Register d, RegisterOrConstant roc, Register s1 = noreg); void lbz( Register d, RegisterOrConstant roc, Register s1 = noreg); void std( Register d, RegisterOrConstant roc, Register s1 = noreg, Register tmp = noreg); void stw( Register d, RegisterOrConstant roc, Register s1 = noreg, Register tmp = noreg); void sth( Register d, RegisterOrConstant roc, Register s1 = noreg, Register tmp = noreg); void stb( Register d, RegisterOrConstant roc, Register s1 = noreg, Register tmp = noreg); void add( Register d, RegisterOrConstant roc, Register s1); void subf(Register d, RegisterOrConstant roc, Register s1); void cmpd(ConditionRegister d, RegisterOrConstant roc, Register s1); // Load pointer d from s1+roc. void ld_ptr(Register d, RegisterOrConstant roc, Register s1 = noreg) { ld(d, roc, s1); }
// Emit several instructions to load a 64 bit constant. This issues a fixed // instruction pattern so that the constant can be patched later on. enum {
load_const_size = 5 * BytesPerInstWord
}; void load_const(Register d, long a, Register tmp = noreg); inlinevoid load_const(Register d, void* a, Register tmp = noreg); inlinevoid load_const(Register d, Label& L, Register tmp = noreg); inlinevoid load_const(Register d, AddressLiteral& a, Register tmp = noreg); inlinevoid load_const32(Register d, int i); // load signed int (patchable)
// Load a 64 bit constant, optimized, not identifiable. // Tmp can be used to increase ILP. Set return_simm16_rest = true to get a // 16 bit immediate offset. This is useful if the offset can be encoded in // a succeeding instruction. int load_const_optimized(Register d, long a, Register tmp = noreg, bool return_simm16_rest = false); inlineint load_const_optimized(Register d, void* a, Register tmp = noreg, bool return_simm16_rest = false) { return load_const_optimized(d, (long)(unsignedlong)a, tmp, return_simm16_rest);
}
// If return_simm16_rest, the return value needs to get added afterwards. int add_const_optimized(Register d, Register s, long x, Register tmp = R0, bool return_simm16_rest = false); inlineint add_const_optimized(Register d, Register s, void* a, Register tmp = R0, bool return_simm16_rest = false) { return add_const_optimized(d, s, (long)(unsignedlong)a, tmp, return_simm16_rest);
}
// If return_simm16_rest, the return value needs to get added afterwards. inlineint sub_const_optimized(Register d, Register s, long x, Register tmp = R0, bool return_simm16_rest = false) { return add_const_optimized(d, s, -x, tmp, return_simm16_rest);
} inlineint sub_const_optimized(Register d, Register s, void* a, Register tmp = R0, bool return_simm16_rest = false) { return sub_const_optimized(d, s, (long)(unsignedlong)a, tmp, return_simm16_rest);
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.