SSL macroAssembler_s390.cpp Sprache: C

/*
* Copyright (c) 2016, 2022, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016, 2022 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/

#include "precompiled.hpp"
#include "asm/codeBuffer.hpp"
#include "asm/macroAssembler.inline.hpp"
#include "compiler/disassembler.hpp"
#include "gc/shared/barrierSet.hpp"
#include "gc/shared/barrierSetAssembler.hpp"
#include "gc/shared/collectedHeap.inline.hpp"
#include "interpreter/interpreter.hpp"
#include "gc/shared/cardTableBarrierSet.hpp"
#include "memory/resourceArea.hpp"
#include "memory/universe.hpp"
#include "oops/accessDecorators.hpp"
#include "oops/compressedOops.inline.hpp"
#include "oops/klass.inline.hpp"
#include "prims/methodHandles.hpp"
#include "registerSaver_s390.hpp"
#include "runtime/icache.hpp"
#include "runtime/interfaceSupport.inline.hpp"
#include "runtime/objectMonitor.hpp"
#include "runtime/os.hpp"
#include "runtime/safepoint.hpp"
#include "runtime/safepointMechanism.hpp"
#include "runtime/sharedRuntime.hpp"
#include "runtime/stubRoutines.hpp"
#include "utilities/events.hpp"
#include "utilities/macros.hpp"
#include "utilities/powerOfTwo.hpp"

#include <ucontext.h>

#define BLOCK_COMMENT(str) block_comment(str)
#define BIND(label)        bind(label); BLOCK_COMMENT(#label ":")

// Move 32-bit register if destination and source are different.
void MacroAssembler::lr_if_needed(Register rd, Register rs) {
  if (rs != rd) { z_lr(rd, rs); }
}

// Move register if destination and source are different.
void MacroAssembler::lgr_if_needed(Register rd, Register rs) {
  if (rs != rd) { z_lgr(rd, rs); }
}

// Zero-extend 32-bit register into 64-bit register if destination and source are different.
void MacroAssembler::llgfr_if_needed(Register rd, Register rs) {
  if (rs != rd) { z_llgfr(rd, rs); }
}

// Move float register if destination and source are different.
void MacroAssembler::ldr_if_needed(FloatRegister rd, FloatRegister rs) {
  if (rs != rd) { z_ldr(rd, rs); }
}

// Move integer register if destination and source are different.
// It is assumed that shorter-than-int types are already
// appropriately sign-extended.
void MacroAssembler::move_reg_if_needed(Register dst, BasicType dst_type, Register src,
                                        BasicType src_type) {
  assert((dst_type != T_FLOAT) && (dst_type != T_DOUBLE), "use move_freg for float types");
  assert((src_type != T_FLOAT) && (src_type != T_DOUBLE), "use move_freg for float types");

  if (dst_type == src_type) {
    lgr_if_needed(dst, src); // Just move all 64 bits.
    return;
  }

  switch (dst_type) {
    // Do not support these types for now.
    //  case T_BOOLEAN:
    case T_BYTE:  // signed byte
      switch (src_type) {
        case T_INT:
          z_lgbr(dst, src);
          break;
        default:
          ShouldNotReachHere();
      }
      return;

    case T_CHAR:
    case T_SHORT:
      switch (src_type) {
        case T_INT:
          if (dst_type == T_CHAR) {
            z_llghr(dst, src);
          } else {
            z_lghr(dst, src);
          }
          break;
        default:
          ShouldNotReachHere();
      }
      return;

    case T_INT:
      switch (src_type) {
        case T_BOOLEAN:
        case T_BYTE:
        case T_CHAR:
        case T_SHORT:
        case T_INT:
        case T_LONG:
        case T_OBJECT:
        case T_ARRAY:
        case T_VOID:
        case T_ADDRESS:
          lr_if_needed(dst, src);
          // llgfr_if_needed(dst, src);  // zero-extend (in case we need to find a bug).
          return;

        default:
          assert(false, "non-integer src type");
          return;
      }
    case T_LONG:
      switch (src_type) {
        case T_BOOLEAN:
        case T_BYTE:
        case T_CHAR:
        case T_SHORT:
        case T_INT:
          z_lgfr(dst, src); // sign extension
          return;

        case T_LONG:
        case T_OBJECT:
        case T_ARRAY:
        case T_VOID:
        case T_ADDRESS:
          lgr_if_needed(dst, src);
          return;

        default:
          assert(false, "non-integer src type");
          return;
      }
      return;
    case T_OBJECT:
    case T_ARRAY:
    case T_VOID:
    case T_ADDRESS:
      switch (src_type) {
        // These types don't make sense to be converted to pointers:
        //      case T_BOOLEAN:
        //      case T_BYTE:
        //      case T_CHAR:
        //      case T_SHORT:

        case T_INT:
          z_llgfr(dst, src); // zero extension
          return;

        case T_LONG:
        case T_OBJECT:
        case T_ARRAY:
        case T_VOID:
        case T_ADDRESS:
          lgr_if_needed(dst, src);
          return;

        default:
          assert(false, "non-integer src type");
          return;
      }
      return;
    default:
      assert(false, "non-integer dst type");
      return;
  }
}

// Move float register if destination and source are different.
void MacroAssembler::move_freg_if_needed(FloatRegister dst, BasicType dst_type,
                                         FloatRegister src, BasicType src_type) {
  assert((dst_type == T_FLOAT) || (dst_type == T_DOUBLE), "use move_reg for int types");
  assert((src_type == T_FLOAT) || (src_type == T_DOUBLE), "use move_reg for int types");
  if (dst_type == src_type) {
    ldr_if_needed(dst, src); // Just move all 64 bits.
  } else {
    switch (dst_type) {
      case T_FLOAT:
        assert(src_type == T_DOUBLE, "invalid float type combination");
        z_ledbr(dst, src);
        return;
      case T_DOUBLE:
        assert(src_type == T_FLOAT, "invalid float type combination");
        z_ldebr(dst, src);
        return;
      default:
        assert(false, "non-float dst type");
        return;
    }
  }
}

// Optimized emitter for reg to mem operations.
// Uses modern instructions if running on modern hardware, classic instructions
// otherwise. Prefers (usually shorter) classic instructions if applicable.
// Data register (reg) cannot be used as work register.
//
// Don't rely on register locking, instead pass a scratch register (Z_R0 by default).
// CAUTION! Passing registers >= Z_R2 may produce bad results on old CPUs!
void MacroAssembler::freg2mem_opt(FloatRegister reg,
                                  int64_t       disp,
                                  Register      index,
                                  Register      base,
                                  void (MacroAssembler::*modern) (FloatRegister, int64_t, Register, Register),
                                  void (MacroAssembler::*classic)(FloatRegister, int64_t, Register, Register),
                                  Register      scratch) {
  index = (index == noreg) ? Z_R0 : index;
  if (Displacement::is_shortDisp(disp)) {
    (this->*classic)(reg, disp, index, base);
  } else {
    if (Displacement::is_validDisp(disp)) {
      (this->*modern)(reg, disp, index, base);
    } else {
      if (scratch != Z_R0 && scratch != Z_R1) {
        (this->*modern)(reg, disp, index, base);      // Will fail with disp out of range.
      } else {
        if (scratch != Z_R0) {   // scratch == Z_R1
          if ((scratch == index) || (index == base)) {
            (this->*modern)(reg, disp, index, base);  // Will fail with disp out of range.
          } else {
            add2reg(scratch, disp, base);
            (this->*classic)(reg, 0, index, scratch);
            if (base == scratch) {
              add2reg(base, -disp);  // Restore base.
            }
          }
        } else {   // scratch == Z_R0
          z_lgr(scratch, base);
          add2reg(base, disp);
          (this->*classic)(reg, 0, index, base);
          z_lgr(base, scratch);      // Restore base.
        }
      }
    }
  }
}

void MacroAssembler::freg2mem_opt(FloatRegister reg, const Address &a, bool is_double) {
  if (is_double) {
    freg2mem_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_FFUN(z_stdy), CLASSIC_FFUN(z_std));
  } else {
    freg2mem_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_FFUN(z_stey), CLASSIC_FFUN(z_ste));
  }
}

// Optimized emitter for mem to reg operations.
// Uses modern instructions if running on modern hardware, classic instructions
// otherwise. Prefers (usually shorter) classic instructions if applicable.
// data register (reg) cannot be used as work register.
//
// Don't rely on register locking, instead pass a scratch register (Z_R0 by default).
// CAUTION! Passing registers >= Z_R2 may produce bad results on old CPUs!
void MacroAssembler::mem2freg_opt(FloatRegister reg,
                                  int64_t       disp,
                                  Register      index,
                                  Register      base,
                                  void (MacroAssembler::*modern) (FloatRegister, int64_t, Register, Register),
                                  void (MacroAssembler::*classic)(FloatRegister, int64_t, Register, Register),
                                  Register      scratch) {
  index = (index == noreg) ? Z_R0 : index;
  if (Displacement::is_shortDisp(disp)) {
    (this->*classic)(reg, disp, index, base);
  } else {
    if (Displacement::is_validDisp(disp)) {
      (this->*modern)(reg, disp, index, base);
    } else {
      if (scratch != Z_R0 && scratch != Z_R1) {
        (this->*modern)(reg, disp, index, base);      // Will fail with disp out of range.
      } else {
        if (scratch != Z_R0) {   // scratch == Z_R1
          if ((scratch == index) || (index == base)) {
            (this->*modern)(reg, disp, index, base);  // Will fail with disp out of range.
          } else {
            add2reg(scratch, disp, base);
            (this->*classic)(reg, 0, index, scratch);
            if (base == scratch) {
              add2reg(base, -disp);  // Restore base.
            }
          }
        } else {   // scratch == Z_R0
          z_lgr(scratch, base);
          add2reg(base, disp);
          (this->*classic)(reg, 0, index, base);
          z_lgr(base, scratch);      // Restore base.
        }
      }
    }
  }
}

void MacroAssembler::mem2freg_opt(FloatRegister reg, const Address &a, bool is_double) {
  if (is_double) {
    mem2freg_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_FFUN(z_ldy), CLASSIC_FFUN(z_ld));
  } else {
    mem2freg_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_FFUN(z_ley), CLASSIC_FFUN(z_le));
  }
}

// Optimized emitter for reg to mem operations.
// Uses modern instructions if running on modern hardware, classic instructions
// otherwise. Prefers (usually shorter) classic instructions if applicable.
// Data register (reg) cannot be used as work register.
//
// Don't rely on register locking, instead pass a scratch register
// (Z_R0 by default)
// CAUTION! passing registers >= Z_R2 may produce bad results on old CPUs!
void MacroAssembler::reg2mem_opt(Register reg,
                                 int64_t  disp,
                                 Register index,
                                 Register base,
                                 void (MacroAssembler::*modern) (Register, int64_t, Register, Register),
                                 void (MacroAssembler::*classic)(Register, int64_t, Register, Register),
                                 Register scratch) {
  index = (index == noreg) ? Z_R0 : index;
  if (Displacement::is_shortDisp(disp)) {
    (this->*classic)(reg, disp, index, base);
  } else {
    if (Displacement::is_validDisp(disp)) {
      (this->*modern)(reg, disp, index, base);
    } else {
      if (scratch != Z_R0 && scratch != Z_R1) {
        (this->*modern)(reg, disp, index, base);      // Will fail with disp out of range.
      } else {
        if (scratch != Z_R0) {   // scratch == Z_R1
          if ((scratch == index) || (index == base)) {
            (this->*modern)(reg, disp, index, base);  // Will fail with disp out of range.
          } else {
            add2reg(scratch, disp, base);
            (this->*classic)(reg, 0, index, scratch);
            if (base == scratch) {
              add2reg(base, -disp);  // Restore base.
            }
          }
        } else {   // scratch == Z_R0
          if ((scratch == reg) || (scratch == base) || (reg == base)) {
            (this->*modern)(reg, disp, index, base);  // Will fail with disp out of range.
          } else {
            z_lgr(scratch, base);
            add2reg(base, disp);
            (this->*classic)(reg, 0, index, base);
            z_lgr(base, scratch);    // Restore base.
          }
        }
      }
    }
  }
}

int MacroAssembler::reg2mem_opt(Register reg, const Address &a, bool is_double) {
  int store_offset = offset();
  if (is_double) {
    reg2mem_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_IFUN(z_stg), CLASSIC_IFUN(z_stg));
  } else {
    reg2mem_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_IFUN(z_sty), CLASSIC_IFUN(z_st));
  }
  return store_offset;
}

// Optimized emitter for mem to reg operations.
// Uses modern instructions if running on modern hardware, classic instructions
// otherwise. Prefers (usually shorter) classic instructions if applicable.
// Data register (reg) will be used as work register where possible.
void MacroAssembler::mem2reg_opt(Register reg,
                                 int64_t  disp,
                                 Register index,
                                 Register base,
                                 void (MacroAssembler::*modern) (Register, int64_t, Register, Register),
                                 void (MacroAssembler::*classic)(Register, int64_t, Register, Register)) {
  index = (index == noreg) ? Z_R0 : index;
  if (Displacement::is_shortDisp(disp)) {
    (this->*classic)(reg, disp, index, base);
  } else {
    if (Displacement::is_validDisp(disp)) {
      (this->*modern)(reg, disp, index, base);
    } else {
      if ((reg == index) && (reg == base)) {
        z_sllg(reg, reg, 1);
        add2reg(reg, disp);
        (this->*classic)(reg, 0, noreg, reg);
      } else if ((reg == index) && (reg != Z_R0)) {
        add2reg(reg, disp);
        (this->*classic)(reg, 0, reg, base);
      } else if (reg == base) {
        add2reg(reg, disp);
        (this->*classic)(reg, 0, index, reg);
      } else if (reg != Z_R0) {
        add2reg(reg, disp, base);
        (this->*classic)(reg, 0, index, reg);
      } else { // reg == Z_R0 && reg != base here
        add2reg(base, disp);
        (this->*classic)(reg, 0, index, base);
        add2reg(base, -disp);
      }
    }
  }
}

void MacroAssembler::mem2reg_opt(Register reg, const Address &a, bool is_double) {
  if (is_double) {
    z_lg(reg, a);
  } else {
    mem2reg_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_IFUN(z_ly), CLASSIC_IFUN(z_l));
  }
}

void MacroAssembler::mem2reg_signed_opt(Register reg, const Address &a) {
  mem2reg_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_IFUN(z_lgf), CLASSIC_IFUN(z_lgf));
}

void MacroAssembler::and_imm(Register r, long mask,
                             Register tmp /* = Z_R0 */,
                             bool wide    /* = false */) {
  assert(wide || Immediate::is_simm32(mask), "mask value too large");

  if (!wide) {
    z_nilf(r, mask);
    return;
  }

  assert(r != tmp, " need a different temporary register !");
  load_const_optimized(tmp, mask);
  z_ngr(r, tmp);
}

// Calculate the 1's complement.
// Note: The condition code is neither preserved nor correctly set by this code!!!
// Note: (wide == false) does not protect the high order half of the target register
//       from alteration. It only serves as optimization hint for 32-bit results.
void MacroAssembler::not_(Register r1, Register r2, bool wide) {

  if ((r2 == noreg) || (r2 == r1)) { // Calc 1's complement in place.
    z_xilf(r1, -1);
    if (wide) {
      z_xihf(r1, -1);
    }
  } else { // Distinct src and dst registers.
    load_const_optimized(r1, -1);
    z_xgr(r1, r2);
  }
}

unsigned long MacroAssembler::create_mask(int lBitPos, int rBitPos) {
  assert(lBitPos >=  0,      "zero is leftmost bit position");
  assert(rBitPos <= 63,      "63 is rightmost bit position");
  assert(lBitPos <= rBitPos, "inverted selection interval");
  return (lBitPos == 0 ? (unsigned long)(-1L) : ((1UL<<(63-lBitPos+1))-1)) & (~((1UL<<(63-rBitPos))-1));
}

// Helper function for the "Rotate_then_<logicalOP>" emitters.
// Rotate src, then mask register contents such that only bits in range survive.
// For oneBits == false, all bits not in range are set to 0. Useful for deleting all bits outside range.
// For oneBits == true,  all bits not in range are set to 1. Useful for preserving all bits outside range.
// The caller must ensure that the selected range only contains bits with defined value.
void MacroAssembler::rotate_then_mask(Register dst, Register src, int lBitPos, int rBitPos,
                                      int nRotate, bool src32bit, bool dst32bit, bool oneBits) {
  assert(!(dst32bit && lBitPos < 32), "selection interval out of range for int destination");
  bool sll4rll = (nRotate >= 0) && (nRotate <= (63-rBitPos)); // Substitute SLL(G) for RLL(G).
  bool srl4rll = (nRotate <  0) && (-nRotate <= lBitPos);     // Substitute SRL(G) for RLL(G).
  //  Pre-determine which parts of dst will be zero after shift/rotate.
  bool llZero  =  sll4rll && (nRotate >= 16);
  bool lhZero  = (sll4rll && (nRotate >= 32)) || (srl4rll && (nRotate <= -48));
  bool lfZero  = llZero && lhZero;
  bool hlZero  = (sll4rll && (nRotate >= 48)) || (srl4rll && (nRotate <= -32));
  bool hhZero  =                                 (srl4rll && (nRotate <= -16));
  bool hfZero  = hlZero && hhZero;

  // rotate then mask src operand.
  // if oneBits == true,  all bits outside selected range are 1s.
  // if oneBits == false, all bits outside selected range are 0s.
  if (src32bit) {   // There might be garbage in the upper 32 bits which will get masked away.
    if (dst32bit) {
      z_rll(dst, src, nRotate);   // Copy and rotate, upper half of reg remains undisturbed.
    } else {
      if      (sll4rll) { z_sllg(dst, src,  nRotate); }
      else if (srl4rll) { z_srlg(dst, src, -nRotate); }
      else              { z_rllg(dst, src,  nRotate); }
    }
  } else {
    if      (sll4rll) { z_sllg(dst, src,  nRotate); }
    else if (srl4rll) { z_srlg(dst, src, -nRotate); }
    else              { z_rllg(dst, src,  nRotate); }
  }

  unsigned long  range_mask    = create_mask(lBitPos, rBitPos);
  unsigned int   range_mask_h  = (unsigned int)(range_mask >> 32);
  unsigned int   range_mask_l  = (unsigned int)range_mask;
  unsigned short range_mask_hh = (unsigned short)(range_mask >> 48);
  unsigned short range_mask_hl = (unsigned short)(range_mask >> 32);
  unsigned short range_mask_lh = (unsigned short)(range_mask >> 16);
  unsigned short range_mask_ll = (unsigned short)range_mask;
  // Works for z9 and newer H/W.
  if (oneBits) {
    if ((~range_mask_l) != 0)                { z_oilf(dst, ~range_mask_l); } // All bits outside range become 1s.
    if (((~range_mask_h) != 0) && !dst32bit) { z_oihf(dst, ~range_mask_h); }
  } else {
    // All bits outside range become 0s
    if (((~range_mask_l) != 0) &&              !lfZero) {
      z_nilf(dst, range_mask_l);
    }
    if (((~range_mask_h) != 0) && !dst32bit && !hfZero) {
      z_nihf(dst, range_mask_h);
    }
  }
}

// Rotate src, then insert selected range from rotated src into dst.
// Clear dst before, if requested.
void MacroAssembler::rotate_then_insert(Register dst, Register src, int lBitPos, int rBitPos,
                                        int nRotate, bool clear_dst) {
  // This version does not depend on src being zero-extended int2long.
  nRotate &= 0x003f;                                       // For risbg, pretend it's an unsigned value.
  z_risbg(dst, src, lBitPos, rBitPos, nRotate, clear_dst); // Rotate, then insert selected, clear the rest.
}

// Rotate src, then and selected range from rotated src into dst.
// Set condition code only if so requested. Otherwise it is unpredictable.
// See performance note in macroAssembler_s390.hpp for important information.
void MacroAssembler::rotate_then_and(Register dst, Register src, int lBitPos, int rBitPos,
                                     int nRotate, bool test_only) {
  guarantee(!test_only, "Emitter not fit for test_only instruction variant.");
  // This version does not depend on src being zero-extended int2long.
  nRotate &= 0x003f;                                       // For risbg, pretend it's an unsigned value.
  z_rxsbg(dst, src, lBitPos, rBitPos, nRotate, test_only); // Rotate, then xor selected.
}

// Rotate src, then or selected range from rotated src into dst.
// Set condition code only if so requested. Otherwise it is unpredictable.
// See performance note in macroAssembler_s390.hpp for important information.
void MacroAssembler::rotate_then_or(Register dst, Register src,  int  lBitPos,  int  rBitPos,
                                    int nRotate, bool test_only) {
  guarantee(!test_only, "Emitter not fit for test_only instruction variant.");
  // This version does not depend on src being zero-extended int2long.
  nRotate &= 0x003f;                                       // For risbg, pretend it's an unsigned value.
  z_rosbg(dst, src, lBitPos, rBitPos, nRotate, test_only); // Rotate, then xor selected.
}

// Rotate src, then xor selected range from rotated src into dst.
// Set condition code only if so requested. Otherwise it is unpredictable.
// See performance note in macroAssembler_s390.hpp for important information.
void MacroAssembler::rotate_then_xor(Register dst, Register src,  int  lBitPos,  int  rBitPos,
                                     int nRotate, bool test_only) {
  guarantee(!test_only, "Emitter not fit for test_only instruction variant.");
    // This version does not depend on src being zero-extended int2long.
  nRotate &= 0x003f;                                       // For risbg, pretend it's an unsigned value.
  z_rxsbg(dst, src, lBitPos, rBitPos, nRotate, test_only); // Rotate, then xor selected.
}

void MacroAssembler::add64(Register r1, RegisterOrConstant inc) {
  if (inc.is_register()) {
    z_agr(r1, inc.as_register());
  } else { // constant
    intptr_t imm = inc.as_constant();
    add2reg(r1, imm);
  }
}
// Helper function to multiply the 64bit contents of a register by a 16bit constant.
// The optimization tries to avoid the mghi instruction, since it uses the FPU for
// calculation and is thus rather slow.
//
// There is no handling for special cases, e.g. cval==0 or cval==1.
//
// Returns len of generated code block.
unsigned int MacroAssembler::mul_reg64_const16(Register rval, Register work, int cval) {
  int block_start = offset();

  bool sign_flip = cval < 0;
  cval = sign_flip ? -cval : cval;

  BLOCK_COMMENT("Reg64*Con16 {");

  int bit1 = cval & -cval;
  if (bit1 == cval) {
    z_sllg(rval, rval, exact_log2(bit1));
    if (sign_flip) { z_lcgr(rval, rval); }
  } else {
    int bit2 = (cval-bit1) & -(cval-bit1);
    if ((bit1+bit2) == cval) {
      z_sllg(work, rval, exact_log2(bit1));
      z_sllg(rval, rval, exact_log2(bit2));
      z_agr(rval, work);
      if (sign_flip) { z_lcgr(rval, rval); }
    } else {
      if (sign_flip) { z_mghi(rval, -cval); }
      else           { z_mghi(rval,  cval); }
    }
  }
  BLOCK_COMMENT("} Reg64*Con16");

  int block_end = offset();
  return block_end - block_start;
}

// Generic operation r1 := r2 + imm.
//
// Should produce the best code for each supported CPU version.
// r2 == noreg yields r1 := r1 + imm
// imm == 0 emits either no instruction or r1 := r2 !
// NOTES: 1) Don't use this function where fixed sized
//           instruction sequences are required!!!
//        2) Don't use this function if condition code
//           setting is required!
//        3) Despite being declared as int64_t, the parameter imm
//           must be a simm_32 value (= signed 32-bit integer).
void MacroAssembler::add2reg(Register r1, int64_t imm, Register r2) {
  assert(Immediate::is_simm32(imm), "probably an implicit conversion went wrong");

  if (r2 == noreg) { r2 = r1; }

  // Handle special case imm == 0.
  if (imm == 0) {
    lgr_if_needed(r1, r2);
    // Nothing else to do.
    return;
  }

  if (!PreferLAoverADD || (r2 == Z_R0)) {
    bool distinctOpnds = VM_Version::has_DistinctOpnds();

    // Can we encode imm in 16 bits signed?
    if (Immediate::is_simm16(imm)) {
      if (r1 == r2) {
        z_aghi(r1, imm);
        return;
      }
      if (distinctOpnds) {
        z_aghik(r1, r2, imm);
        return;
      }
      z_lgr(r1, r2);
      z_aghi(r1, imm);
      return;
    }
  } else {
    // Can we encode imm in 12 bits unsigned?
    if (Displacement::is_shortDisp(imm)) {
      z_la(r1, imm, r2);
      return;
    }
    // Can we encode imm in 20 bits signed?
    if (Displacement::is_validDisp(imm)) {
      // Always use LAY instruction, so we don't need the tmp register.
      z_lay(r1, imm, r2);
      return;
    }

  }

  // Can handle it (all possible values) with long immediates.
  lgr_if_needed(r1, r2);
  z_agfi(r1, imm);
}

// Generic operation r := b + x + d
//
// Addition of several operands with address generation semantics - sort of:
//  - no restriction on the registers. Any register will do for any operand.
//  - x == noreg: operand will be disregarded.
//  - b == noreg: will use (contents of) result reg as operand (r := r + d).
//  - x == Z_R0:  just disregard
//  - b == Z_R0:  use as operand. This is not address generation semantics!!!
//
// The same restrictions as on add2reg() are valid!!!
void MacroAssembler::add2reg_with_index(Register r, int64_t d, Register x, Register b) {
  assert(Immediate::is_simm32(d), "probably an implicit conversion went wrong");

  if (x == noreg) { x = Z_R0; }
  if (b == noreg) { b = r; }

  // Handle special case x == R0.
  if (x == Z_R0) {
    // Can simply add the immediate value to the base register.
    add2reg(r, d, b);
    return;
  }

  if (!PreferLAoverADD || (b == Z_R0)) {
    bool distinctOpnds = VM_Version::has_DistinctOpnds();
    // Handle special case d == 0.
    if (d == 0) {
      if (b == x)        { z_sllg(r, b, 1); return; }
      if (r == x)        { z_agr(r, b);     return; }
      if (r == b)        { z_agr(r, x);     return; }
      if (distinctOpnds) { z_agrk(r, x, b); return; }
      z_lgr(r, b);
      z_agr(r, x);
    } else {
      if (x == b)             { z_sllg(r, x, 1); }
      else if (r == x)        { z_agr(r, b); }
      else if (r == b)        { z_agr(r, x); }
      else if (distinctOpnds) { z_agrk(r, x, b); }
      else {
        z_lgr(r, b);
        z_agr(r, x);
      }
      add2reg(r, d);
    }
  } else {
    // Can we encode imm in 12 bits unsigned?
    if (Displacement::is_shortDisp(d)) {
      z_la(r, d, x, b);
      return;
    }
    // Can we encode imm in 20 bits signed?
    if (Displacement::is_validDisp(d)) {
      z_lay(r, d, x, b);
      return;
    }
    z_la(r, 0, x, b);
    add2reg(r, d);
  }
}

// Generic emitter (32bit) for direct memory increment.
// For optimal code, do not specify Z_R0 as temp register.
void MacroAssembler::add2mem_32(const Address &a, int64_t imm, Register tmp) {
  if (VM_Version::has_MemWithImmALUOps() && Immediate::is_simm8(imm)) {
    z_asi(a, imm);
  } else {
    z_lgf(tmp, a);
    add2reg(tmp, imm);
    z_st(tmp, a);
  }
}

void MacroAssembler::add2mem_64(const Address &a, int64_t imm, Register tmp) {
  if (VM_Version::has_MemWithImmALUOps() && Immediate::is_simm8(imm)) {
    z_agsi(a, imm);
  } else {
    z_lg(tmp, a);
    add2reg(tmp, imm);
    z_stg(tmp, a);
  }
}

void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed) {
  switch (size_in_bytes) {
    case  8: z_lg(dst, src); break;
    case  4: is_signed ? z_lgf(dst, src) : z_llgf(dst, src); break;
    case  2: is_signed ? z_lgh(dst, src) : z_llgh(dst, src); break;
    case  1: is_signed ? z_lgb(dst, src) : z_llgc(dst, src); break;
    default: ShouldNotReachHere();
  }
}

void MacroAssembler::store_sized_value(Register src, Address dst, size_t size_in_bytes) {
  switch (size_in_bytes) {
    case  8: z_stg(src, dst); break;
    case  4: z_st(src, dst); break;
    case  2: z_sth(src, dst); break;
    case  1: z_stc(src, dst); break;
    default: ShouldNotReachHere();
  }
}

// Split a si20 offset (20bit, signed) into an ui12 offset (12bit, unsigned) and
// a high-order summand in register tmp.
//
// return value: <  0: No split required, si20 actually has property uimm12.
//               >= 0: Split performed. Use return value as uimm12 displacement and
//                     tmp as index register.
int MacroAssembler::split_largeoffset(int64_t si20_offset, Register tmp, bool fixed_codelen, bool accumulate) {
  assert(Immediate::is_simm20(si20_offset), "sanity");
  int lg_off = (int)si20_offset &  0x0fff; // Punch out low-order 12 bits, always positive.
  int ll_off = (int)si20_offset & ~0x0fff; // Force low-order 12 bits to zero.
  assert((Displacement::is_shortDisp(si20_offset) && (ll_off == 0)) ||
         !Displacement::is_shortDisp(si20_offset), "unexpected offset values");
  assert((lg_off+ll_off) == si20_offset, "offset splitup error");

  Register work = accumulate? Z_R0 : tmp;

  if (fixed_codelen) {          // Len of code = 10 = 4 + 6.
    z_lghi(work, ll_off>>12);   // Implicit sign extension.
    z_slag(work, work, 12);
  } else {                      // Len of code = 0..10.
    if (ll_off == 0) { return -1; }
    // ll_off has 8 significant bits (at most) plus sign.
    if ((ll_off & 0x0000f000) == 0) {    // Non-zero bits only in upper halfbyte.
      z_llilh(work, ll_off >> 16);
      if (ll_off < 0) {                  // Sign-extension required.
        z_lgfr(work, work);
      }
    } else {
      if ((ll_off & 0x000f0000) == 0) {  // Non-zero bits only in lower halfbyte.
        z_llill(work, ll_off);
      } else {                           // Non-zero bits in both halfbytes.
        z_lghi(work, ll_off>>12);        // Implicit sign extension.
        z_slag(work, work, 12);
      }
    }
  }
  if (accumulate) { z_algr(tmp, work); } // len of code += 4
  return lg_off;
}

void MacroAssembler::load_float_largeoffset(FloatRegister t, int64_t si20, Register a, Register tmp) {
  if (Displacement::is_validDisp(si20)) {
    z_ley(t, si20, a);
  } else {
    // Fixed_codelen = true is a simple way to ensure that the size of load_float_largeoffset
    // does not depend on si20 (scratch buffer emit size == code buffer emit size for constant
    // pool loads).
    bool accumulate    = true;
    bool fixed_codelen = true;
    Register work;

    if (fixed_codelen) {
      z_lgr(tmp, a);  // Lgr_if_needed not applicable due to fixed_codelen.
    } else {
      accumulate = (a == tmp);
    }
    work = tmp;

    int disp12 = split_largeoffset(si20, work, fixed_codelen, accumulate);
    if (disp12 < 0) {
      z_le(t, si20, work);
    } else {
      if (accumulate) {
        z_le(t, disp12, work);
      } else {
        z_le(t, disp12, work, a);
      }
    }
  }
}

void MacroAssembler::load_double_largeoffset(FloatRegister t, int64_t si20, Register a, Register tmp) {
  if (Displacement::is_validDisp(si20)) {
    z_ldy(t, si20, a);
  } else {
    // Fixed_codelen = true is a simple way to ensure that the size of load_double_largeoffset
    // does not depend on si20 (scratch buffer emit size == code buffer emit size for constant
    // pool loads).
    bool accumulate    = true;
    bool fixed_codelen = true;
    Register work;

    if (fixed_codelen) {
      z_lgr(tmp, a);  // Lgr_if_needed not applicable due to fixed_codelen.
    } else {
      accumulate = (a == tmp);
    }
    work = tmp;

    int disp12 = split_largeoffset(si20, work, fixed_codelen, accumulate);
    if (disp12 < 0) {
      z_ld(t, si20, work);
    } else {
      if (accumulate) {
        z_ld(t, disp12, work);
      } else {
        z_ld(t, disp12, work, a);
      }
    }
  }
}

// PCrelative TOC access.
// Returns distance (in bytes) from current position to start of consts section.
// Returns 0 (zero) if no consts section exists or if it has size zero.
long MacroAssembler::toc_distance() {
  CodeSection* cs = code()->consts();
  return (long)((cs != NULL) ? cs->start()-pc() : 0);
}

// Implementation on x86/sparc assumes that constant and instruction section are
// adjacent, but this doesn't hold. Two special situations may occur, that we must
// be able to handle:
//   1. const section may be located apart from the inst section.
//   2. const section may be empty
// In both cases, we use the const section's start address to compute the "TOC",
// this seems to occur only temporarily; in the final step we always seem to end up
// with the pc-relatice variant.
//
// PC-relative offset could be +/-2**32 -> use long for disp
// Furthermore: makes no sense to have special code for
// adjacent const and inst sections.
void MacroAssembler::load_toc(Register Rtoc) {
  // Simply use distance from start of const section (should be patched in the end).
  long disp = toc_distance();

  RelocationHolder rspec = internal_word_Relocation::spec(pc() + disp);
  relocate(rspec);
  z_larl(Rtoc, RelAddr::pcrel_off32(disp));  // Offset is in halfwords.
}

// PCrelative TOC access.
// Load from anywhere pcrelative (with relocation of load instr)
void MacroAssembler::load_long_pcrelative(Register Rdst, address dataLocation) {
  address          pc             = this->pc();
  ptrdiff_t        total_distance = dataLocation - pc;
  RelocationHolder rspec          = internal_word_Relocation::spec(dataLocation);

  assert((total_distance & 0x01L) == 0, "halfword alignment is mandatory");
  assert(total_distance != 0, "sanity");

  // Some extra safety net.
  if (!RelAddr::is_in_range_of_RelAddr32(total_distance)) {
    guarantee(RelAddr::is_in_range_of_RelAddr32(total_distance), "load_long_pcrelative can't handle distance " INTPTR_FORMAT, total_distance);
  }

  (this)->relocate(rspec, relocInfo::pcrel_addr_format);
  z_lgrl(Rdst, RelAddr::pcrel_off32(total_distance));
}

// PCrelative TOC access.
// Load from anywhere pcrelative (with relocation of load instr)
// loaded addr has to be relocated when added to constant pool.
void MacroAssembler::load_addr_pcrelative(Register Rdst, address addrLocation) {
  address          pc             = this->pc();
  ptrdiff_t        total_distance = addrLocation - pc;
  RelocationHolder rspec          = internal_word_Relocation::spec(addrLocation);

  assert((total_distance & 0x01L) == 0, "halfword alignment is mandatory");

  // Some extra safety net.
  if (!RelAddr::is_in_range_of_RelAddr32(total_distance)) {
    guarantee(RelAddr::is_in_range_of_RelAddr32(total_distance), "load_long_pcrelative can't handle distance " INTPTR_FORMAT, total_distance);
  }

  (this)->relocate(rspec, relocInfo::pcrel_addr_format);
  z_lgrl(Rdst, RelAddr::pcrel_off32(total_distance));
}

// Generic operation: load a value from memory and test.
// CondCode indicates the sign (<0, ==0, >0) of the loaded value.
void MacroAssembler::load_and_test_byte(Register dst, const Address &a) {
  z_lb(dst, a);
  z_ltr(dst, dst);
}

void MacroAssembler::load_and_test_short(Register dst, const Address &a) {
  int64_t disp = a.disp20();
  if (Displacement::is_shortDisp(disp)) {
    z_lh(dst, a);
  } else if (Displacement::is_longDisp(disp)) {
    z_lhy(dst, a);
  } else {
    guarantee(false, "displacement out of range");
  }
  z_ltr(dst, dst);
}

void MacroAssembler::load_and_test_int(Register dst, const Address &a) {
  z_lt(dst, a);
}

void MacroAssembler::load_and_test_int2long(Register dst, const Address &a) {
  z_ltgf(dst, a);
}

void MacroAssembler::load_and_test_long(Register dst, const Address &a) {
  z_ltg(dst, a);
}

// Test a bit in memory.
void MacroAssembler::testbit(const Address &a, unsigned int bit) {
  assert(a.index() == noreg, "no index reg allowed in testbit");
  if (bit <= 7) {
    z_tm(a.disp() + 3, a.base(), 1 << bit);
  } else if (bit <= 15) {
    z_tm(a.disp() + 2, a.base(), 1 << (bit - 8));
  } else if (bit <= 23) {
    z_tm(a.disp() + 1, a.base(), 1 << (bit - 16));
  } else if (bit <= 31) {
    z_tm(a.disp() + 0, a.base(), 1 << (bit - 24));
  } else {
    ShouldNotReachHere();
  }
}

// Test a bit in a register. Result is reflected in CC.
void MacroAssembler::testbit(Register r, unsigned int bitPos) {
  if (bitPos < 16) {
    z_tmll(r, 1U<<bitPos);
  } else if (bitPos < 32) {
    z_tmlh(r, 1U<<(bitPos-16));
  } else if (bitPos < 48) {
    z_tmhl(r, 1U<<(bitPos-32));
  } else if (bitPos < 64) {
    z_tmhh(r, 1U<<(bitPos-48));
  } else {
    ShouldNotReachHere();
  }
}

void MacroAssembler::prefetch_read(Address a) {
  z_pfd(1, a.disp20(), a.indexOrR0(), a.base());
}
void MacroAssembler::prefetch_update(Address a) {
  z_pfd(2, a.disp20(), a.indexOrR0(), a.base());
}

// Clear a register, i.e. load const zero into reg.
// Return len (in bytes) of generated instruction(s).
// whole_reg: Clear 64 bits if true, 32 bits otherwise.
// set_cc:    Use instruction that sets the condition code, if true.
int MacroAssembler::clear_reg(Register r, bool whole_reg, bool set_cc) {
  unsigned int start_off = offset();
  if (whole_reg) {
    set_cc ? z_xgr(r, r) : z_laz(r, 0, Z_R0);
  } else {  // Only 32bit register.
    set_cc ? z_xr(r, r) : z_lhi(r, 0);
  }
  return offset() - start_off;
}

#ifdef ASSERT
int MacroAssembler::preset_reg(Register r, unsigned long pattern, int pattern_len) {
  switch (pattern_len) {
    case 1:
      pattern = (pattern & 0x000000ff)  | ((pattern & 0x000000ff)<<8);
    case 2:
      pattern = (pattern & 0x0000ffff)  | ((pattern & 0x0000ffff)<<16);
    case 4:
      pattern = (pattern & 0xffffffffL) | ((pattern & 0xffffffffL)<<32);
    case 8:
      return load_const_optimized_rtn_len(r, pattern, true);
      break;
    default:
      guarantee(false, "preset_reg: bad len");
  }
  return 0;
}
#endif

// addr: Address descriptor of memory to clear index register will not be used !
// size: Number of bytes to clear.
//    !!! DO NOT USE THEM FOR ATOMIC MEMORY CLEARING !!!
//    !!! Use store_const() instead                  !!!
void MacroAssembler::clear_mem(const Address& addr, unsigned size) {
  guarantee(size <= 256, "MacroAssembler::clear_mem: size too large");

  if (size == 1) {
    z_mvi(addr, 0);
    return;
  }

  switch (size) {
    case 2: z_mvhhi(addr, 0);
      return;
    case 4: z_mvhi(addr, 0);
      return;
    case 8: z_mvghi(addr, 0);
      return;
    default: ; // Fallthru to xc.
  }

  z_xc(addr, size, addr);
}

void MacroAssembler::align(int modulus) {
  while (offset() % modulus != 0) z_nop();
}

// Special version for non-relocateable code if required alignment
// is larger than CodeEntryAlignment.
void MacroAssembler::align_address(int modulus) {
  while ((uintptr_t)pc() % modulus != 0) z_nop();
}

Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
                                         Register temp_reg,
                                         int64_t extra_slot_offset) {
  // On Z, we can have index and disp in an Address. So don't call argument_offset,
  // which issues an unnecessary add instruction.
  int stackElementSize = Interpreter::stackElementSize;
  int64_t offset = extra_slot_offset * stackElementSize;
  const Register argbase = Z_esp;
  if (arg_slot.is_constant()) {
    offset += arg_slot.as_constant() * stackElementSize;
    return Address(argbase, offset);
  }
  // else
  assert(temp_reg != noreg, "must specify");
  assert(temp_reg != Z_ARG1, "base and index are conflicting");
  z_sllg(temp_reg, arg_slot.as_register(), exact_log2(stackElementSize)); // tempreg = arg_slot << 3
  return Address(argbase, temp_reg, offset);
}

//===================================================================
//===   START   C O N S T A N T S   I N   C O D E   S T R E A M   ===
//===================================================================
//===            P A T CH A B L E   C O N S T A N T S             ===
//===================================================================

//---------------------------------------------------
//  Load (patchable) constant into register
//---------------------------------------------------

// Load absolute address (and try to optimize).
//   Note: This method is usable only for position-fixed code,
//         referring to a position-fixed target location.
//         If not so, relocations and patching must be used.
void MacroAssembler::load_absolute_address(Register d, address addr) {
  assert(addr != NULL, "should not happen");
  BLOCK_COMMENT("load_absolute_address:");
  if (addr == NULL) {
    z_larl(d, pc()); // Dummy emit for size calc.
    return;
  }

  if (RelAddr::is_in_range_of_RelAddr32(addr, pc())) {
    z_larl(d, addr);
    return;
  }

  load_const_optimized(d, (long)addr);
}

// Load a 64bit constant.
// Patchable code sequence, but not atomically patchable.
// Make sure to keep code size constant -> no value-dependent optimizations.
// Do not kill condition code.
void MacroAssembler::load_const(Register t, long x) {
  // Note: Right shift is only cleanly defined for unsigned types
  //       or for signed types with nonnegative values.
  Assembler::z_iihf(t, (long)((unsigned long)x >> 32));
  Assembler::z_iilf(t, (long)((unsigned long)x & 0xffffffffUL));
}

// Load a 32bit constant into a 64bit register, sign-extend or zero-extend.
// Patchable code sequence, but not atomically patchable.
// Make sure to keep code size constant -> no value-dependent optimizations.
// Do not kill condition code.
void MacroAssembler::load_const_32to64(Register t, int64_t x, bool sign_extend) {
  if (sign_extend) { Assembler::z_lgfi(t, x); }
  else             { Assembler::z_llilf(t, x); }
}

// Load narrow oop constant, no decompression.
void MacroAssembler::load_narrow_oop(Register t, narrowOop a) {
  assert(UseCompressedOops, "must be on to call this method");
  load_const_32to64(t, CompressedOops::narrow_oop_value(a), false /*sign_extend*/);
}

// Load narrow klass constant, compression required.
void MacroAssembler::load_narrow_klass(Register t, Klass* k) {
  assert(UseCompressedClassPointers, "must be on to call this method");
  narrowKlass encoded_k = CompressedKlassPointers::encode(k);
  load_const_32to64(t, encoded_k, false /*sign_extend*/);
}

//------------------------------------------------------
//  Compare (patchable) constant with register.
//------------------------------------------------------

// Compare narrow oop in reg with narrow oop constant, no decompression.
void MacroAssembler::compare_immediate_narrow_oop(Register oop1, narrowOop oop2) {
  assert(UseCompressedOops, "must be on to call this method");

  Assembler::z_clfi(oop1, CompressedOops::narrow_oop_value(oop2));
}

// Compare narrow oop in reg with narrow oop constant, no decompression.
void MacroAssembler::compare_immediate_narrow_klass(Register klass1, Klass* klass2) {
  assert(UseCompressedClassPointers, "must be on to call this method");
  narrowKlass encoded_k = CompressedKlassPointers::encode(klass2);

  Assembler::z_clfi(klass1, encoded_k);
}

//----------------------------------------------------------
//  Check which kind of load_constant we have here.
//----------------------------------------------------------

// Detection of CPU version dependent load_const sequence.
// The detection is valid only for code sequences generated by load_const,
// not load_const_optimized.
bool MacroAssembler::is_load_const(address a) {
  unsigned long inst1, inst2;
  unsigned int  len1,  len2;

  len1 = get_instruction(a, &inst1);
  len2 = get_instruction(a + len1, &inst2);

  return is_z_iihf(inst1) && is_z_iilf(inst2);
}

// Detection of CPU version dependent load_const_32to64 sequence.
// Mostly used for narrow oops and narrow Klass pointers.
// The detection is valid only for code sequences generated by load_const_32to64.
bool MacroAssembler::is_load_const_32to64(address pos) {
  unsigned long inst1, inst2;
  unsigned int len1;

  len1 = get_instruction(pos, &inst1);
  return is_z_llilf(inst1);
}

// Detection of compare_immediate_narrow sequence.
// The detection is valid only for code sequences generated by compare_immediate_narrow_oop.
bool MacroAssembler::is_compare_immediate32(address pos) {
  return is_equal(pos, CLFI_ZOPC, RIL_MASK);
}

// Detection of compare_immediate_narrow sequence.
// The detection is valid only for code sequences generated by compare_immediate_narrow_oop.
bool MacroAssembler::is_compare_immediate_narrow_oop(address pos) {
  return is_compare_immediate32(pos);
  }

// Detection of compare_immediate_narrow sequence.
// The detection is valid only for code sequences generated by compare_immediate_narrow_klass.
bool MacroAssembler::is_compare_immediate_narrow_klass(address pos) {
  return is_compare_immediate32(pos);
}

//-----------------------------------
//  patch the load_constant
//-----------------------------------

// CPU-version dependent patching of load_const.
void MacroAssembler::patch_const(address a, long x) {
  assert(is_load_const(a), "not a load of a constant");
  // Note: Right shift is only cleanly defined for unsigned types
  //       or for signed types with nonnegative values.
  set_imm32((address)a, (long)((unsigned long)x >> 32));
  set_imm32((address)(a + 6), (long)((unsigned long)x & 0xffffffffUL));
}

// Patching the value of CPU version dependent load_const_32to64 sequence.
// The passed ptr MUST be in compressed format!
int MacroAssembler::patch_load_const_32to64(address pos, int64_t np) {
  assert(is_load_const_32to64(pos), "not a load of a narrow ptr (oop or klass)");

  set_imm32(pos, np);
  return 6;
}

// Patching the value of CPU version dependent compare_immediate_narrow sequence.
// The passed ptr MUST be in compressed format!
int MacroAssembler::patch_compare_immediate_32(address pos, int64_t np) {
  assert(is_compare_immediate32(pos), "not a compressed ptr compare");

  set_imm32(pos, np);
  return 6;
}

// Patching the immediate value of CPU version dependent load_narrow_oop sequence.
// The passed ptr must NOT be in compressed format!
int MacroAssembler::patch_load_narrow_oop(address pos, oop o) {
  assert(UseCompressedOops, "Can only patch compressed oops");
  return patch_load_const_32to64(pos, CompressedOops::narrow_oop_value(o));
}

// Patching the immediate value of CPU version dependent load_narrow_klass sequence.
// The passed ptr must NOT be in compressed format!
int MacroAssembler::patch_load_narrow_klass(address pos, Klass* k) {
  assert(UseCompressedClassPointers, "Can only patch compressed klass pointers");

  narrowKlass nk = CompressedKlassPointers::encode(k);
  return patch_load_const_32to64(pos, nk);
}

// Patching the immediate value of CPU version dependent compare_immediate_narrow_oop sequence.
// The passed ptr must NOT be in compressed format!
int MacroAssembler::patch_compare_immediate_narrow_oop(address pos, oop o) {
  assert(UseCompressedOops, "Can only patch compressed oops");
  return patch_compare_immediate_32(pos, CompressedOops::narrow_oop_value(o));
}

// Patching the immediate value of CPU version dependent compare_immediate_narrow_klass sequence.
// The passed ptr must NOT be in compressed format!
int MacroAssembler::patch_compare_immediate_narrow_klass(address pos, Klass* k) {
  assert(UseCompressedClassPointers, "Can only patch compressed klass pointers");

  narrowKlass nk = CompressedKlassPointers::encode(k);
  return patch_compare_immediate_32(pos, nk);
}

//------------------------------------------------------------------------
//  Extract the constant from a load_constant instruction stream.
//------------------------------------------------------------------------

// Get constant from a load_const sequence.
long MacroAssembler::get_const(address a) {
  assert(is_load_const(a), "not a load of a constant");
  unsigned long x;
  x =  (((unsigned long) (get_imm32(a,0) & 0xffffffff)) << 32);
  x |= (((unsigned long) (get_imm32(a,1) & 0xffffffff)));
  return (long) x;
}

//--------------------------------------
//  Store a constant in memory.
//--------------------------------------

// General emitter to move a constant to memory.
// The store is atomic.
//  o Address must be given in RS format (no index register)
//  o Displacement should be 12bit unsigned for efficiency. 20bit signed also supported.
//  o Constant can be 1, 2, 4, or 8 bytes, signed or unsigned.
//  o Memory slot can be 1, 2, 4, or 8 bytes, signed or unsigned.
//  o Memory slot must be at least as wide as constant, will assert otherwise.
//  o Signed constants will sign-extend, unsigned constants will zero-extend to slot width.
int MacroAssembler::store_const(const Address &dest, long imm,
                                unsigned int lm, unsigned int lc,
                                Register scratch) {
  int64_t  disp = dest.disp();
  Register base = dest.base();
  assert(!dest.has_index(), "not supported");
  assert((lm==1)||(lm==2)||(lm==4)||(lm==8), "memory length not supported");
  assert((lc==1)||(lc==2)||(lc==4)||(lc==8), "constant length not supported");
  assert(lm>=lc, "memory slot too small");
  assert(lc==8 || Immediate::is_simm(imm, lc*8), "const out of range");
  assert(Displacement::is_validDisp(disp), "displacement out of range");

  bool is_shortDisp = Displacement::is_shortDisp(disp);
  int store_offset = -1;

  // For target len == 1 it's easy.
  if (lm == 1) {
    store_offset = offset();
    if (is_shortDisp) {
      z_mvi(disp, base, imm);
      return store_offset;
    } else {
      z_mviy(disp, base, imm);
      return store_offset;
    }
  }

  // All the "good stuff" takes an unsigned displacement.
  if (is_shortDisp) {
    // NOTE: Cannot use clear_mem for imm==0, because it is not atomic.

    store_offset = offset();
    switch (lm) {
      case 2:  // Lc == 1 handled correctly here, even for unsigned. Instruction does no widening.
        z_mvhhi(disp, base, imm);
        return store_offset;
      case 4:
        if (Immediate::is_simm16(imm)) {
          z_mvhi(disp, base, imm);
          return store_offset;
        }
        break;
      case 8:
        if (Immediate::is_simm16(imm)) {
          z_mvghi(disp, base, imm);
          return store_offset;
        }
        break;
      default:
        ShouldNotReachHere();
        break;
    }
  }

  //  Can't optimize, so load value and store it.
  guarantee(scratch != noreg, " need a scratch register here !");
  if (imm != 0) {
    load_const_optimized(scratch, imm);  // Preserves CC anyway.
  } else {
    // Leave CC alone!!
    (void) clear_reg(scratch, true, false); // Indicate unused result.
  }

  store_offset = offset();
  if (is_shortDisp) {
    switch (lm) {
      case 2:
        z_sth(scratch, disp, Z_R0, base);
        return store_offset;
      case 4:
        z_st(scratch, disp, Z_R0, base);
        return store_offset;
      case 8:
        z_stg(scratch, disp, Z_R0, base);
        return store_offset;
      default:
        ShouldNotReachHere();
        break;
    }
  } else {
    switch (lm) {
      case 2:
        z_sthy(scratch, disp, Z_R0, base);
        return store_offset;
      case 4:
        z_sty(scratch, disp, Z_R0, base);
        return store_offset;
      case 8:
        z_stg(scratch, disp, Z_R0, base);
        return store_offset;
      default:
        ShouldNotReachHere();
        break;
    }
  }
  return -1; // should not reach here
}

//===================================================================
//===       N O T   P A T CH A B L E   C O N S T A N T S          ===
//===================================================================

// Load constant x into register t with a fast instruction sequence
// depending on the bits in x. Preserves CC under all circumstances.
int MacroAssembler::load_const_optimized_rtn_len(Register t, long x, bool emit) {
  if (x == 0) {
    int len;
    if (emit) {
      len = clear_reg(t, true, false);
    } else {
      len = 4;
    }
    return len;
  }

  if (Immediate::is_simm16(x)) {
    if (emit) { z_lghi(t, x); }
    return 4;
  }

  // 64 bit value: | part1 | part2 | part3 | part4 |
  // At least one part is not zero!
  // Note: Right shift is only cleanly defined for unsigned types
  //       or for signed types with nonnegative values.
  int part1 = (int)((unsigned long)x >> 48) & 0x0000ffff;
  int part2 = (int)((unsigned long)x >> 32) & 0x0000ffff;
  int part3 = (int)((unsigned long)x >> 16) & 0x0000ffff;
  int part4 = (int)x & 0x0000ffff;
  int part12 = (int)((unsigned long)x >> 32);
  int part34 = (int)x;

  // Lower word only (unsigned).
  if (part12 == 0) {
    if (part3 == 0) {
      if (emit) z_llill(t, part4);
      return 4;
    }
    if (part4 == 0) {
      if (emit) z_llilh(t, part3);
      return 4;
    }
    if (emit) z_llilf(t, part34);
    return 6;
  }

  // Upper word only.
  if (part34 == 0) {
    if (part1 == 0) {
      if (emit) z_llihl(t, part2);
      return 4;
    }
    if (part2 == 0) {
      if (emit) z_llihh(t, part1);
      return 4;
    }
    if (emit) z_llihf(t, part12);
    return 6;
  }

  // Lower word only (signed).
  if ((part1 == 0x0000ffff) && (part2 == 0x0000ffff) && ((part3 & 0x00008000) != 0)) {
    if (emit) z_lgfi(t, part34);
    return 6;
  }

  int len = 0;

  if ((part1 == 0) || (part2 == 0)) {
    if (part1 == 0) {
      if (emit) z_llihl(t, part2);
      len += 4;
    } else {
      if (emit) z_llihh(t, part1);
      len += 4;
    }
  } else {
    if (emit) z_llihf(t, part12);
    len += 6;
  }

  if ((part3 == 0) || (part4 == 0)) {
    if (part3 == 0) {
      if (emit) z_iill(t, part4);
      len += 4;
    } else {
      if (emit) z_iilh(t, part3);
      len += 4;
    }
  } else {
    if (emit) z_iilf(t, part34);
    len += 6;
  }
  return len;
}

//=====================================================================
//===     H I G H E R   L E V E L   B R A N C H   E M I T T E R S   ===
//=====================================================================

// Note: In the worst case, one of the scratch registers is destroyed!!!
void MacroAssembler::compare32_and_branch(Register r1, RegisterOrConstant x2, branch_condition cond, Label& lbl) {
  // Right operand is constant.
  if (x2.is_constant()) {
    jlong value = x2.as_constant();
    compare_and_branch_optimized(r1, value, cond, lbl, /*len64=*/false, /*has_sign=*/true);
    return;
  }

  // Right operand is in register.
  compare_and_branch_optimized(r1, x2.as_register(), cond, lbl, /*len64=*/false, /*has_sign=*/true);
}

// Note: In the worst case, one of the scratch registers is destroyed!!!
void MacroAssembler::compareU32_and_branch(Register r1, RegisterOrConstant x2, branch_condition cond, Label& lbl) {
  // Right operand is constant.
  if (x2.is_constant()) {
    jlong value = x2.as_constant();
    compare_and_branch_optimized(r1, value, cond, lbl, /*len64=*/false, /*has_sign=*/false);
    return;
  }

  // Right operand is in register.
  compare_and_branch_optimized(r1, x2.as_register(), cond, lbl, /*len64=*/false, /*has_sign=*/false);
}

// Note: In the worst case, one of the scratch registers is destroyed!!!
void MacroAssembler::compare64_and_branch(Register r1, RegisterOrConstant x2, branch_condition cond, Label& lbl) {
  // Right operand is constant.
  if (x2.is_constant()) {
    jlong value = x2.as_constant();
    compare_and_branch_optimized(r1, value, cond, lbl, /*len64=*/true, /*has_sign=*/true);
    return;
  }

  // Right operand is in register.
  compare_and_branch_optimized(r1, x2.as_register(), cond, lbl, /*len64=*/true, /*has_sign=*/true);
}

void MacroAssembler::compareU64_and_branch(Register r1, RegisterOrConstant x2, branch_condition cond, Label& lbl) {
  // Right operand is constant.
  if (x2.is_constant()) {
    jlong value = x2.as_constant();
    compare_and_branch_optimized(r1, value, cond, lbl, /*len64=*/true, /*has_sign=*/false);
    return;
  }

  // Right operand is in register.
  compare_and_branch_optimized(r1, x2.as_register(), cond, lbl, /*len64=*/true, /*has_sign=*/false);
}

// Generate an optimal branch to the branch target.
// Optimal means that a relative branch (brc or brcl) is used if the
// branch distance is short enough. Loading the target address into a
// register and branching via reg is used as fallback only.
//
// Used registers:
//   Z_R1 - work reg. Holds branch target address.
//          Used in fallback case only.
//
// This version of branch_optimized is good for cases where the target address is known
// and constant, i.e. is never changed (no relocation, no patching).
void MacroAssembler::branch_optimized(Assembler::branch_condition cond, address branch_addr) {
  address branch_origin = pc();

  if (RelAddr::is_in_range_of_RelAddr16(branch_addr, branch_origin)) {
    z_brc(cond, branch_addr);
  } else if (RelAddr::is_in_range_of_RelAddr32(branch_addr, branch_origin)) {
    z_brcl(cond, branch_addr);
  } else {
    load_const_optimized(Z_R1, branch_addr);  // CC must not get killed by load_const_optimized.
    z_bcr(cond, Z_R1);
  }
}

// This version of branch_optimized is good for cases where the target address
// is potentially not yet known at the time the code is emitted.
//
// One very common case is a branch to an unbound label which is handled here.
// The caller might know (or hope) that the branch distance is short enough
// to be encoded in a 16bit relative address. In this case he will pass a
// NearLabel branch_target.
// Care must be taken with unbound labels. Each call to target(label) creates
// an entry in the patch queue for that label to patch all references of the label
// once it gets bound. Those recorded patch locations must be patchable. Otherwise,
// an assertion fires at patch time.
void MacroAssembler::branch_optimized(Assembler::branch_condition cond, Label& branch_target) {
  if (branch_target.is_bound()) {
    address branch_addr = target(branch_target);
    branch_optimized(cond, branch_addr);
  } else if (branch_target.is_near()) {
    z_brc(cond, branch_target);  // Caller assures that the target will be in range for z_brc.
  } else {
    z_brcl(cond, branch_target); // Let's hope target is in range. Otherwise, we will abort at patch time.
  }
}

// Generate an optimal compare and branch to the branch target.
// Optimal means that a relative branch (clgrj, brc or brcl) is used if the
// branch distance is short enough. Loading the target address into a
// register and branching via reg is used as fallback only.
//
// Input:
//   r1 - left compare operand
//   r2 - right compare operand
void MacroAssembler::compare_and_branch_optimized(Register r1,
                                                  Register r2,
                                                  Assembler::branch_condition cond,
                                                  address  branch_addr,
                                                  bool     len64,
                                                  bool     has_sign) {
  unsigned int casenum = (len64?2:0)+(has_sign?0:1);

  address branch_origin = pc();
  if (VM_Version::has_CompareBranch() && RelAddr::is_in_range_of_RelAddr16(branch_addr, branch_origin)) {
    switch (casenum) {
      case 0: z_crj( r1, r2, cond, branch_addr); break;
      case 1: z_clrj (r1, r2, cond, branch_addr); break;
      case 2: z_cgrj(r1, r2, cond, branch_addr); break;
      case 3: z_clgrj(r1, r2, cond, branch_addr); break;
      default: ShouldNotReachHere(); break;
    }
  } else {
    switch (casenum) {
      case 0: z_cr( r1, r2); break;
      case 1: z_clr(r1, r2); break;
      case 2: z_cgr(r1, r2); break;
      case 3: z_clgr(r1, r2); break;
      default: ShouldNotReachHere(); break;
    }
    branch_optimized(cond, branch_addr);
  }
}

// Generate an optimal compare and branch to the branch target.
// Optimal means that a relative branch (clgij, brc or brcl) is used if the
// branch distance is short enough. Loading the target address into a
// register and branching via reg is used as fallback only.
//
// Input:
//   r1 - left compare operand (in register)
//   x2 - right compare operand (immediate)
void MacroAssembler::compare_and_branch_optimized(Register r1,
                                                  jlong    x2,
                                                  Assembler::branch_condition cond,
                                                  Label&   branch_target,
                                                  bool     len64,
                                                  bool     has_sign) {
  address      branch_origin = pc();
  bool         x2_imm8       = (has_sign && Immediate::is_simm8(x2)) || (!has_sign && Immediate::is_uimm8(x2));
  bool         is_RelAddr16  = branch_target.is_near() ||
                               (branch_target.is_bound() &&
                                RelAddr::is_in_range_of_RelAddr16(target(branch_target), branch_origin));
  unsigned int casenum       = (len64?2:0)+(has_sign?0:1);

  if (VM_Version::has_CompareBranch() && is_RelAddr16 && x2_imm8) {
    switch (casenum) {
      case 0: z_cij( r1, x2, cond, branch_target); break;
      case 1: z_clij(r1, x2, cond, branch_target); break;
      case 2: z_cgij(r1, x2, cond, branch_target); break;
      case 3: z_clgij(r1, x2, cond, branch_target); break;
      default: ShouldNotReachHere(); break;
    }
    return;
  }

  if (x2 == 0) {
    switch (casenum) {
      case 0: z_ltr(r1, r1); break;
      case 1: z_ltr(r1, r1); break; // Caution: unsigned test only provides zero/notZero indication!
      case 2: z_ltgr(r1, r1); break;
      case 3: z_ltgr(r1, r1); break; // Caution: unsigned test only provides zero/notZero indication!
      default: ShouldNotReachHere(); break;
    }
  } else {
    if ((has_sign && Immediate::is_simm16(x2)) || (!has_sign && Immediate::is_uimm(x2, 15))) {
      switch (casenum) {
        case 0: z_chi(r1, x2); break;
        case 1: z_chi(r1, x2); break; // positive immediate < 2**15
        case 2: z_cghi(r1, x2); break;
        case 3: z_cghi(r1, x2); break; // positive immediate < 2**15
        default: break;
      }
    } else if ( (has_sign && Immediate::is_simm32(x2)) || (!has_sign && Immediate::is_uimm32(x2)) ) {
      switch (casenum) {
        case 0: z_cfi( r1, x2); break;
        case 1: z_clfi(r1, x2); break;
        case 2: z_cgfi(r1, x2); break;
        case 3: z_clgfi(r1, x2); break;
        default: ShouldNotReachHere(); break;
      }
    } else {
      // No instruction with immediate operand possible, so load into register.
      Register scratch = (r1 != Z_R0) ? Z_R0 : Z_R1;
      load_const_optimized(scratch, x2);
      switch (casenum) {
        case 0: z_cr( r1, scratch); break;
        case 1: z_clr(r1, scratch); break;
        case 2: z_cgr(r1, scratch); break;
        case 3: z_clgr(r1, scratch); break;
        default: ShouldNotReachHere(); break;
      }
    }
  }
  branch_optimized(cond, branch_target);
}

// Generate an optimal compare and branch to the branch target.
// Optimal means that a relative branch (clgrj, brc or brcl) is used if the
// branch distance is short enough. Loading the target address into a
// register and branching via reg is used as fallback only.
//
// Input:
//   r1 - left compare operand
//   r2 - right compare operand
void MacroAssembler::compare_and_branch_optimized(Register r1,
                                                  Register r2,
                                                  Assembler::branch_condition cond,
                                                  Label&   branch_target,
                                                  bool     len64,
                                                  bool     has_sign) {
  unsigned int casenum = (len64 ? 2 : 0) + (has_sign ? 0 : 1);

  if (branch_target.is_bound()) {
    address branch_addr = target(branch_target);
    compare_and_branch_optimized(r1, r2, cond, branch_addr, len64, has_sign);
  } else {
    if (VM_Version::has_CompareBranch() && branch_target.is_near()) {
      switch (casenum) {
        case 0: z_crj(  r1, r2, cond, branch_target); break;
        case 1: z_clrj( r1, r2, cond, branch_target); break;
        case 2: z_cgrj( r1, r2, cond, branch_target); break;
        case 3: z_clgrj(r1, r2, cond, branch_target); break;
        default: ShouldNotReachHere(); break;
      }
    } else {
      switch (casenum) {
        case 0: z_cr( r1, r2); break;
        case 1: z_clr(r1, r2); break;
        case 2: z_cgr(r1, r2); break;
--> --------------------

--> maximum size reached

--> --------------------

quality100%

¤ Dauer der Verarbeitung: 0.66 Sekunden (vorverarbeitet) ¤

zum Verzeichnis wechseln

in der Quellcodebibliothek suchen

Beweissystem der NASA

Beweissystem Isabelle

NIST Cobol Testsuite

Cephes Mathematical Library

Wiener Entwicklungsmethode

Haftungshinweis

Die Informationen auf dieser Webseite wurden nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit, noch Qualität der bereit gestellten Informationen zugesichert.

Bemerkung:

Die farbliche Syntaxdarstellung ist noch experimentell.