Quelle macroAssembler_aarch64.cpp Sprache: C

/*
* Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, 2021, Red Hat Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/

#include <sys/types.h>

#include "precompiled.hpp"
#include "asm/assembler.hpp"
#include "asm/assembler.inline.hpp"
#include "ci/ciEnv.hpp"
#include "compiler/compileTask.hpp"
#include "compiler/disassembler.hpp"
#include "compiler/oopMap.hpp"
#include "gc/shared/barrierSet.hpp"
#include "gc/shared/barrierSetAssembler.hpp"
#include "gc/shared/cardTableBarrierSet.hpp"
#include "gc/shared/cardTable.hpp"
#include "gc/shared/collectedHeap.hpp"
#include "gc/shared/tlab_globals.hpp"
#include "interpreter/bytecodeHistogram.hpp"
#include "interpreter/interpreter.hpp"
#include "jvm.h"
#include "memory/resourceArea.hpp"
#include "memory/universe.hpp"
#include "nativeInst_aarch64.hpp"
#include "oops/accessDecorators.hpp"
#include "oops/compressedOops.inline.hpp"
#include "oops/klass.inline.hpp"
#include "runtime/continuation.hpp"
#include "runtime/icache.hpp"
#include "runtime/interfaceSupport.inline.hpp"
#include "runtime/javaThread.hpp"
#include "runtime/jniHandles.inline.hpp"
#include "runtime/sharedRuntime.hpp"
#include "runtime/stubRoutines.hpp"
#include "utilities/powerOfTwo.hpp"
#ifdef COMPILER1
#include "c1/c1_LIRAssembler.hpp"
#endif
#ifdef COMPILER2
#include "oops/oop.hpp"
#include "opto/compile.hpp"
#include "opto/node.hpp"
#include "opto/output.hpp"
#endif

#ifdef PRODUCT
#define BLOCK_COMMENT(str) /* nothing */
#else
#define BLOCK_COMMENT(str) block_comment(str)
#endif
#define STOP(str) stop(str);
#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")

#ifdef ASSERT
extern "C" void disnm(intptr_t p);
#endif
// Target-dependent relocation processing
//
// Instruction sequences whose target may need to be retrieved or
// patched are distinguished by their leading instruction, sorting
// them into three main instruction groups and related subgroups.
//
// 1) Branch, Exception and System (insn count = 1)
//    1a) Unconditional branch (immediate):
//      b/bl imm19
//    1b) Compare & branch (immediate):
//      cbz/cbnz Rt imm19
//    1c) Test & branch (immediate):
//      tbz/tbnz Rt imm14
//    1d) Conditional branch (immediate):
//      b.cond imm19
//
// 2) Loads and Stores (insn count = 1)
//    2a) Load register literal:
//      ldr Rt imm19
//
// 3) Data Processing Immediate (insn count = 2 or 3)
//    3a) PC-rel. addressing
//      adr/adrp Rx imm21; ldr/str Ry Rx  #imm12
//      adr/adrp Rx imm21; add Ry Rx  #imm12
//      adr/adrp Rx imm21; movk Rx #imm16<<32; ldr/str Ry, [Rx, #offset_in_page]
//      adr/adrp Rx imm21
//      adr/adrp Rx imm21; movk Rx #imm16<<32
//      adr/adrp Rx imm21; movk Rx #imm16<<32; add Ry, Rx, #offset_in_page
//      The latter form can only happen when the target is an
//      ExternalAddress, and (by definition) ExternalAddresses don't
//      move. Because of that property, there is never any need to
//      patch the last of the three instructions. However,
//      MacroAssembler::target_addr_for_insn takes all three
//      instructions into account and returns the correct address.
//    3b) Move wide (immediate)
//      movz Rx #imm16; movk Rx #imm16 << 16; movk Rx #imm16 << 32;
//
// A switch on a subset of the instruction's bits provides an
// efficient dispatch to these subcases.
//
// insn[28:26] -> main group ('x' == don't care)
//   00x -> UNALLOCATED
//   100 -> Data Processing Immediate
//   101 -> Branch, Exception and System
//   x1x -> Loads and Stores
//
// insn[30:25] -> subgroup ('_' == group, 'x' == don't care).
// n.b. in some cases extra bits need to be checked to verify the
// instruction is as expected
//
// 1) ... xx101x Branch, Exception and System
//   1a)  00___x Unconditional branch (immediate)
//   1b)  01___0 Compare & branch (immediate)
//   1c)  01___1 Test & branch (immediate)
//   1d)  10___0 Conditional branch (immediate)
//        other  Should not happen
//
// 2) ... xxx1x0 Loads and Stores
//   2a)  xx1__00 Load/Store register (insn[28] == 1 && insn[24] == 0)
//   2aa) x01__00 Load register literal (i.e. requires insn[29] == 0)
//                strictly should be 64 bit non-FP/SIMD i.e.
//       0101_000 (i.e. requires insn[31:24] == 01011000)
//
// 3) ... xx100x Data Processing Immediate
//   3a)  xx___00 PC-rel. addressing (n.b. requires insn[24] == 0)
//   3b)  xx___101 Move wide (immediate) (n.b. requires insn[24:23] == 01)
//                 strictly should be 64 bit movz #imm16<<0
//       110___10100 (i.e. requires insn[31:21] == 11010010100)
//
class RelocActions {
protected:
  typedef int (*reloc_insn)(address insn_addr, address &target);

  virtual reloc_insn adrpMem() = 0;
  virtual reloc_insn adrpAdd() = 0;
  virtual reloc_insn adrpMovk() = 0;

  const address _insn_addr;
  const uint32_t _insn;

  static uint32_t insn_at(address insn_addr, int n) {
    return ((uint32_t*)insn_addr)[n];
  }
  uint32_t insn_at(int n) const {
    return insn_at(_insn_addr, n);
  }

public:

  RelocActions(address insn_addr) : _insn_addr(insn_addr), _insn(insn_at(insn_addr, 0)) {}
  RelocActions(address insn_addr, uint32_t insn)
    :  _insn_addr(insn_addr), _insn(insn) {}

  virtual int unconditionalBranch(address insn_addr, address &target) = 0;
  virtual int conditionalBranch(address insn_addr, address &target) = 0;
  virtual int testAndBranch(address insn_addr, address &target) = 0;
  virtual int loadStore(address insn_addr, address &target) = 0;
  virtual int adr(address insn_addr, address &target) = 0;
  virtual int adrp(address insn_addr, address &target, reloc_insn inner) = 0;
  virtual int immediate(address insn_addr, address &target) = 0;
  virtual void verify(address insn_addr, address &target) = 0;

  int ALWAYSINLINE run(address insn_addr, address &target) {
    int instructions = 1;

    uint32_t dispatch = Instruction_aarch64::extract(_insn, 30, 25);
    switch(dispatch) {
      case 0b001010:
      case 0b001011: {
        instructions = unconditionalBranch(insn_addr, target);
        break;
      }
      case 0b101010:   // Conditional branch (immediate)
      case 0b011010: { // Compare & branch (immediate)
        instructions = conditionalBranch(insn_addr, target);
          break;
      }
      case 0b011011: {
        instructions = testAndBranch(insn_addr, target);
        break;
      }
      case 0b001100:
      case 0b001110:
      case 0b011100:
      case 0b011110:
      case 0b101100:
      case 0b101110:
      case 0b111100:
      case 0b111110: {
        // load/store
        if ((Instruction_aarch64::extract(_insn, 29, 24) & 0b111011) == 0b011000) {
          // Load register (literal)
          instructions = loadStore(insn_addr, target);
          break;
        } else {
          // nothing to do
          assert(target == 0, "did not expect to relocate target for polling page load");
        }
        break;
      }
      case 0b001000:
      case 0b011000:
      case 0b101000:
      case 0b111000: {
        // adr/adrp
        assert(Instruction_aarch64::extract(_insn, 28, 24) == 0b10000, "must be");
        int shift = Instruction_aarch64::extract(_insn, 31, 31);
        if (shift) {
          uint32_t insn2 = insn_at(1);
          if (Instruction_aarch64::extract(insn2, 29, 24) == 0b111001 &&
              Instruction_aarch64::extract(_insn, 4, 0) ==
              Instruction_aarch64::extract(insn2, 9, 5)) {
            instructions = adrp(insn_addr, target, adrpMem());
          } else if (Instruction_aarch64::extract(insn2, 31, 22) == 0b1001000100 &&
                     Instruction_aarch64::extract(_insn, 4, 0) ==
                     Instruction_aarch64::extract(insn2, 4, 0)) {
            instructions = adrp(insn_addr, target, adrpAdd());
          } else if (Instruction_aarch64::extract(insn2, 31, 21) == 0b11110010110 &&
                     Instruction_aarch64::extract(_insn, 4, 0) ==
                     Instruction_aarch64::extract(insn2, 4, 0)) {
            instructions = adrp(insn_addr, target, adrpMovk());
          } else {
            ShouldNotReachHere();
          }
        } else {
          instructions = adr(insn_addr, target);
        }
        break;
      }
      case 0b001001:
      case 0b011001:
      case 0b101001:
      case 0b111001: {
        instructions = immediate(insn_addr, target);
        break;
      }
      default: {
        ShouldNotReachHere();
      }
    }

    verify(insn_addr, target);
    return instructions * NativeInstruction::instruction_size;
  }
};

class Patcher : public RelocActions {
  virtual reloc_insn adrpMem() { return &Patcher::adrpMem_impl; }
  virtual reloc_insn adrpAdd() { return &Patcher::adrpAdd_impl; }
  virtual reloc_insn adrpMovk() { return &Patcher::adrpMovk_impl; }

public:
  Patcher(address insn_addr) : RelocActions(insn_addr) {}

  virtual int unconditionalBranch(address insn_addr, address &target) {
    intptr_t offset = (target - insn_addr) >> 2;
    Instruction_aarch64::spatch(insn_addr, 25, 0, offset);
    return 1;
  }
  virtual int conditionalBranch(address insn_addr, address &target) {
    intptr_t offset = (target - insn_addr) >> 2;
    Instruction_aarch64::spatch(insn_addr, 23, 5, offset);
    return 1;
  }
  virtual int testAndBranch(address insn_addr, address &target) {
    intptr_t offset = (target - insn_addr) >> 2;
    Instruction_aarch64::spatch(insn_addr, 18, 5, offset);
    return 1;
  }
  virtual int loadStore(address insn_addr, address &target) {
    intptr_t offset = (target - insn_addr) >> 2;
    Instruction_aarch64::spatch(insn_addr, 23, 5, offset);
    return 1;
  }
  virtual int adr(address insn_addr, address &target) {
#ifdef ASSERT
    assert(Instruction_aarch64::extract(_insn, 28, 24) == 0b10000, "must be");
#endif
    // PC-rel. addressing
    ptrdiff_t offset = target - insn_addr;
    int offset_lo = offset & 3;
    offset >>= 2;
    Instruction_aarch64::spatch(insn_addr, 23, 5, offset);
    Instruction_aarch64::patch(insn_addr, 30, 29, offset_lo);
    return 1;
  }
  virtual int adrp(address insn_addr, address &target, reloc_insn inner) {
    int instructions = 1;
#ifdef ASSERT
    assert(Instruction_aarch64::extract(_insn, 28, 24) == 0b10000, "must be");
#endif
    ptrdiff_t offset = target - insn_addr;
    instructions = 2;
    precond(inner != nullptr);
    // Give the inner reloc a chance to modify the target.
    address adjusted_target = target;
    instructions = (*inner)(insn_addr, adjusted_target);
    uintptr_t pc_page = (uintptr_t)insn_addr >> 12;
    uintptr_t adr_page = (uintptr_t)adjusted_target >> 12;
    offset = adr_page - pc_page;
    int offset_lo = offset & 3;
    offset >>= 2;
    Instruction_aarch64::spatch(insn_addr, 23, 5, offset);
    Instruction_aarch64::patch(insn_addr, 30, 29, offset_lo);
    return instructions;
  }
  static int adrpMem_impl(address insn_addr, address &target) {
    uintptr_t dest = (uintptr_t)target;
    int offset_lo = dest & 0xfff;
    uint32_t insn2 = insn_at(insn_addr, 1);
    uint32_t size = Instruction_aarch64::extract(insn2, 31, 30);
    Instruction_aarch64::patch(insn_addr + sizeof (uint32_t), 21, 10, offset_lo >> size);
    guarantee(((dest >> size) << size) == dest, "misaligned target");
    return 2;
  }
  static int adrpAdd_impl(address insn_addr, address &target) {
    uintptr_t dest = (uintptr_t)target;
    int offset_lo = dest & 0xfff;
    Instruction_aarch64::patch(insn_addr + sizeof (uint32_t), 21, 10, offset_lo);
    return 2;
  }
  static int adrpMovk_impl(address insn_addr, address &target) {
    uintptr_t dest = uintptr_t(target);
    Instruction_aarch64::patch(insn_addr + sizeof (uint32_t), 20, 5, (uintptr_t)target >> 32);
    dest = (dest & 0xffffffffULL) | (uintptr_t(insn_addr) & 0xffff00000000ULL);
    target = address(dest);
    return 2;
  }
  virtual int immediate(address insn_addr, address &target) {
    assert(Instruction_aarch64::extract(_insn, 31, 21) == 0b11010010100, "must be");
    uint64_t dest = (uint64_t)target;
    // Move wide constant
    assert(nativeInstruction_at(insn_addr+4)->is_movk(), "wrong insns in patch");
    assert(nativeInstruction_at(insn_addr+8)->is_movk(), "wrong insns in patch");
    Instruction_aarch64::patch(insn_addr, 20, 5, dest & 0xffff);
    Instruction_aarch64::patch(insn_addr+4, 20, 5, (dest >>= 16) & 0xffff);
    Instruction_aarch64::patch(insn_addr+8, 20, 5, (dest >>= 16) & 0xffff);
    return 3;
  }
  virtual void verify(address insn_addr, address &target) {
#ifdef ASSERT
    address address_is = MacroAssembler::target_addr_for_insn(insn_addr);
    if (!(address_is == target)) {
      tty->print_cr("%p at %p should be %p", address_is, insn_addr, target);
      disnm((intptr_t)insn_addr);
      assert(address_is == target, "should be");
    }
#endif
  }
};

// If insn1 and insn2 use the same register to form an address, either
// by an offsetted LDR or a simple ADD, return the offset. If the
// second instruction is an LDR, the offset may be scaled.
static bool offset_for(uint32_t insn1, uint32_t insn2, ptrdiff_t &byte_offset) {
  if (Instruction_aarch64::extract(insn2, 29, 24) == 0b111001 &&
      Instruction_aarch64::extract(insn1, 4, 0) ==
      Instruction_aarch64::extract(insn2, 9, 5)) {
    // Load/store register (unsigned immediate)
    byte_offset = Instruction_aarch64::extract(insn2, 21, 10);
    uint32_t size = Instruction_aarch64::extract(insn2, 31, 30);
    byte_offset <<= size;
    return true;
  } else if (Instruction_aarch64::extract(insn2, 31, 22) == 0b1001000100 &&
             Instruction_aarch64::extract(insn1, 4, 0) ==
             Instruction_aarch64::extract(insn2, 4, 0)) {
    // add (immediate)
    byte_offset = Instruction_aarch64::extract(insn2, 21, 10);
    return true;
  }
  return false;
}

class Decoder : public RelocActions {
  virtual reloc_insn adrpMem() { return &Decoder::adrpMem_impl; }
  virtual reloc_insn adrpAdd() { return &Decoder::adrpAdd_impl; }
  virtual reloc_insn adrpMovk() { return &Decoder::adrpMovk_impl; }

public:
  Decoder(address insn_addr, uint32_t insn) : RelocActions(insn_addr, insn) {}

  virtual int loadStore(address insn_addr, address &target) {
    intptr_t offset = Instruction_aarch64::sextract(_insn, 23, 5);
    target = insn_addr + (offset << 2);
    return 1;
  }
  virtual int unconditionalBranch(address insn_addr, address &target) {
    intptr_t offset = Instruction_aarch64::sextract(_insn, 25, 0);
    target = insn_addr + (offset << 2);
    return 1;
  }
  virtual int conditionalBranch(address insn_addr, address &target) {
    intptr_t offset = Instruction_aarch64::sextract(_insn, 23, 5);
    target = address(((uint64_t)insn_addr + (offset << 2)));
    return 1;
  }
  virtual int testAndBranch(address insn_addr, address &target) {
    intptr_t offset = Instruction_aarch64::sextract(_insn, 18, 5);
    target = address(((uint64_t)insn_addr + (offset << 2)));
    return 1;
  }
  virtual int adr(address insn_addr, address &target) {
    // PC-rel. addressing
    intptr_t offset = Instruction_aarch64::extract(_insn, 30, 29);
    offset |= Instruction_aarch64::sextract(_insn, 23, 5) << 2;
    target = address((uint64_t)insn_addr + offset);
    return 1;
  }
  virtual int adrp(address insn_addr, address &target, reloc_insn inner) {
    assert(Instruction_aarch64::extract(_insn, 28, 24) == 0b10000, "must be");
    intptr_t offset = Instruction_aarch64::extract(_insn, 30, 29);
    offset |= Instruction_aarch64::sextract(_insn, 23, 5) << 2;
    int shift = 12;
    offset <<= shift;
    uint64_t target_page = ((uint64_t)insn_addr) + offset;
    target_page &= ((uint64_t)-1) << shift;
    uint32_t insn2 = insn_at(1);
    target = address(target_page);
    precond(inner != nullptr);
    (*inner)(insn_addr, target);
    return 2;
  }
  static int adrpMem_impl(address insn_addr, address &target) {
    uint32_t insn2 = insn_at(insn_addr, 1);
    // Load/store register (unsigned immediate)
    ptrdiff_t byte_offset = Instruction_aarch64::extract(insn2, 21, 10);
    uint32_t size = Instruction_aarch64::extract(insn2, 31, 30);
    byte_offset <<= size;
    target += byte_offset;
    return 2;
  }
  static int adrpAdd_impl(address insn_addr, address &target) {
    uint32_t insn2 = insn_at(insn_addr, 1);
    // add (immediate)
    ptrdiff_t byte_offset = Instruction_aarch64::extract(insn2, 21, 10);
    target += byte_offset;
    return 2;
  }
  static int adrpMovk_impl(address insn_addr, address &target) {
    uint32_t insn2 = insn_at(insn_addr, 1);
    uint64_t dest = uint64_t(target);
    dest = (dest & 0xffff0000ffffffff) |
      ((uint64_t)Instruction_aarch64::extract(insn2, 20, 5) << 32);
    target = address(dest);

    // We know the destination 4k page. Maybe we have a third
    // instruction.
    uint32_t insn = insn_at(insn_addr, 0);
    uint32_t insn3 = insn_at(insn_addr, 2);
    ptrdiff_t byte_offset;
    if (offset_for(insn, insn3, byte_offset)) {
      target += byte_offset;
      return 3;
    } else {
      return 2;
    }
  }
  virtual int immediate(address insn_addr, address &target) {
    uint32_t *insns = (uint32_t *)insn_addr;
    assert(Instruction_aarch64::extract(_insn, 31, 21) == 0b11010010100, "must be");
    // Move wide constant: movz, movk, movk.  See movptr().
    assert(nativeInstruction_at(insns+1)->is_movk(), "wrong insns in patch");
    assert(nativeInstruction_at(insns+2)->is_movk(), "wrong insns in patch");
    target = address(uint64_t(Instruction_aarch64::extract(_insn, 20, 5))
                 + (uint64_t(Instruction_aarch64::extract(insns[1], 20, 5)) << 16)
                 + (uint64_t(Instruction_aarch64::extract(insns[2], 20, 5)) << 32));
    assert(nativeInstruction_at(insn_addr+4)->is_movk(), "wrong insns in patch");
    assert(nativeInstruction_at(insn_addr+8)->is_movk(), "wrong insns in patch");
    return 3;
  }
  virtual void verify(address insn_addr, address &target) {
  }
};

address MacroAssembler::target_addr_for_insn(address insn_addr, uint32_t insn) {
  Decoder decoder(insn_addr, insn);
  address target;
  decoder.run(insn_addr, target);
  return target;
}

// Patch any kind of instruction; there may be several instructions.
// Return the total length (in bytes) of the instructions.
int MacroAssembler::pd_patch_instruction_size(address insn_addr, address target) {
  Patcher patcher(insn_addr);
  return patcher.run(insn_addr, target);
}

int MacroAssembler::patch_oop(address insn_addr, address o) {
  int instructions;
  unsigned insn = *(unsigned*)insn_addr;
  assert(nativeInstruction_at(insn_addr+4)->is_movk(), "wrong insns in patch");

  // OOPs are either narrow (32 bits) or wide (48 bits).  We encode
  // narrow OOPs by setting the upper 16 bits in the first
  // instruction.
  if (Instruction_aarch64::extract(insn, 31, 21) == 0b11010010101) {
    // Move narrow OOP
    uint32_t n = CompressedOops::narrow_oop_value(cast_to_oop(o));
    Instruction_aarch64::patch(insn_addr, 20, 5, n >> 16);
    Instruction_aarch64::patch(insn_addr+4, 20, 5, n & 0xffff);
    instructions = 2;
  } else {
    // Move wide OOP
    assert(nativeInstruction_at(insn_addr+8)->is_movk(), "wrong insns in patch");
    uintptr_t dest = (uintptr_t)o;
    Instruction_aarch64::patch(insn_addr, 20, 5, dest & 0xffff);
    Instruction_aarch64::patch(insn_addr+4, 20, 5, (dest >>= 16) & 0xffff);
    Instruction_aarch64::patch(insn_addr+8, 20, 5, (dest >>= 16) & 0xffff);
    instructions = 3;
  }
  return instructions * NativeInstruction::instruction_size;
}

int MacroAssembler::patch_narrow_klass(address insn_addr, narrowKlass n) {
  // Metadata pointers are either narrow (32 bits) or wide (48 bits).
  // We encode narrow ones by setting the upper 16 bits in the first
  // instruction.
  NativeInstruction *insn = nativeInstruction_at(insn_addr);
  assert(Instruction_aarch64::extract(insn->encoding(), 31, 21) == 0b11010010101 &&
         nativeInstruction_at(insn_addr+4)->is_movk(), "wrong insns in patch");

  Instruction_aarch64::patch(insn_addr, 20, 5, n >> 16);
  Instruction_aarch64::patch(insn_addr+4, 20, 5, n & 0xffff);
  return 2 * NativeInstruction::instruction_size;
}

address MacroAssembler::target_addr_for_insn_or_null(address insn_addr, unsigned insn) {
  if (NativeInstruction::is_ldrw_to_zr(address(&insn))) {
    return nullptr;
  }
  return MacroAssembler::target_addr_for_insn(insn_addr, insn);
}

void MacroAssembler::safepoint_poll(Label& slow_path, bool at_return, bool acquire, bool in_nmethod, Register tmp) {
  if (acquire) {
    lea(tmp, Address(rthread, JavaThread::polling_word_offset()));
    ldar(tmp, tmp);
  } else {
    ldr(tmp, Address(rthread, JavaThread::polling_word_offset()));
  }
  if (at_return) {
    // Note that when in_nmethod is set, the stack pointer is incremented before the poll. Therefore,
    // we may safely use the sp instead to perform the stack watermark check.
    cmp(in_nmethod ? sp : rfp, tmp);
    br(Assembler::HI, slow_path);
  } else {
    tbnz(tmp, log2i_exact(SafepointMechanism::poll_bit()), slow_path);
  }
}

void MacroAssembler::rt_call(address dest, Register tmp) {
  CodeBlob *cb = CodeCache::find_blob(dest);
  if (cb) {
    far_call(RuntimeAddress(dest));
  } else {
    lea(tmp, RuntimeAddress(dest));
    blr(tmp);
  }
}

void MacroAssembler::push_cont_fastpath(Register java_thread) {
  if (!Continuations::enabled()) return;
  Label done;
  ldr(rscratch1, Address(java_thread, JavaThread::cont_fastpath_offset()));
  cmp(sp, rscratch1);
  br(Assembler::LS, done);
  mov(rscratch1, sp); // we can't use sp as the source in str
  str(rscratch1, Address(java_thread, JavaThread::cont_fastpath_offset()));
  bind(done);
}

void MacroAssembler::pop_cont_fastpath(Register java_thread) {
  if (!Continuations::enabled()) return;
  Label done;
  ldr(rscratch1, Address(java_thread, JavaThread::cont_fastpath_offset()));
  cmp(sp, rscratch1);
  br(Assembler::LO, done);
  str(zr, Address(java_thread, JavaThread::cont_fastpath_offset()));
  bind(done);
}

void MacroAssembler::reset_last_Java_frame(bool clear_fp) {
  // we must set sp to zero to clear frame
  str(zr, Address(rthread, JavaThread::last_Java_sp_offset()));

  // must clear fp, so that compiled frames are not confused; it is
  // possible that we need it only for debugging
  if (clear_fp) {
    str(zr, Address(rthread, JavaThread::last_Java_fp_offset()));
  }

  // Always clear the pc because it could have been set by make_walkable()
  str(zr, Address(rthread, JavaThread::last_Java_pc_offset()));
}

// Calls to C land
//
// When entering C land, the rfp, & resp of the last Java frame have to be recorded
// in the (thread-local) JavaThread object. When leaving C land, the last Java fp
// has to be reset to 0. This is required to allow proper stack traversal.
void MacroAssembler::set_last_Java_frame(Register last_java_sp,
                                         Register last_java_fp,
                                         Register last_java_pc,
                                         Register scratch) {

  if (last_java_pc->is_valid()) {
      str(last_java_pc, Address(rthread,
                                JavaThread::frame_anchor_offset()
                                + JavaFrameAnchor::last_Java_pc_offset()));
    }

  // determine last_java_sp register
  if (last_java_sp == sp) {
    mov(scratch, sp);
    last_java_sp = scratch;
  } else if (!last_java_sp->is_valid()) {
    last_java_sp = esp;
  }

  str(last_java_sp, Address(rthread, JavaThread::last_Java_sp_offset()));

  // last_java_fp is optional
  if (last_java_fp->is_valid()) {
    str(last_java_fp, Address(rthread, JavaThread::last_Java_fp_offset()));
  }
}

void MacroAssembler::set_last_Java_frame(Register last_java_sp,
                                         Register last_java_fp,
                                         address  last_java_pc,
                                         Register scratch) {
  assert(last_java_pc != NULL, "must provide a valid PC");

  adr(scratch, last_java_pc);
  str(scratch, Address(rthread,
                       JavaThread::frame_anchor_offset()
                       + JavaFrameAnchor::last_Java_pc_offset()));

  set_last_Java_frame(last_java_sp, last_java_fp, noreg, scratch);
}

void MacroAssembler::set_last_Java_frame(Register last_java_sp,
                                         Register last_java_fp,
                                         Label &L,
                                         Register scratch) {
  if (L.is_bound()) {
    set_last_Java_frame(last_java_sp, last_java_fp, target(L), scratch);
  } else {
    InstructionMark im(this);
    L.add_patch_at(code(), locator());
    set_last_Java_frame(last_java_sp, last_java_fp, pc() /* Patched later */, scratch);
  }
}

static inline bool target_needs_far_branch(address addr) {
  // codecache size <= 128M
  if (!MacroAssembler::far_branches()) {
    return false;
  }
  // codecache size > 240M
  if (MacroAssembler::codestub_branch_needs_far_jump()) {
    return true;
  }
  // codecache size: 128M..240M
  return !CodeCache::is_non_nmethod(addr);
}

void MacroAssembler::far_call(Address entry, Register tmp) {
  assert(ReservedCodeCacheSize < 4*G, "branch out of range");
  assert(CodeCache::find_blob(entry.target()) != NULL,
         "destination of far call not found in code cache");
  assert(entry.rspec().type() == relocInfo::external_word_type
         || entry.rspec().type() == relocInfo::runtime_call_type
         || entry.rspec().type() == relocInfo::none, "wrong entry relocInfo type");
  if (target_needs_far_branch(entry.target())) {
    uint64_t offset;
    // We can use ADRP here because we know that the total size of
    // the code cache cannot exceed 2Gb (ADRP limit is 4GB).
    adrp(tmp, entry, offset);
    add(tmp, tmp, offset);
    blr(tmp);
  } else {
    bl(entry);
  }
}

int MacroAssembler::far_jump(Address entry, Register tmp) {
  assert(ReservedCodeCacheSize < 4*G, "branch out of range");
  assert(CodeCache::find_blob(entry.target()) != NULL,
         "destination of far call not found in code cache");
  assert(entry.rspec().type() == relocInfo::external_word_type
         || entry.rspec().type() == relocInfo::runtime_call_type
         || entry.rspec().type() == relocInfo::none, "wrong entry relocInfo type");
  address start = pc();
  if (target_needs_far_branch(entry.target())) {
    uint64_t offset;
    // We can use ADRP here because we know that the total size of
    // the code cache cannot exceed 2Gb (ADRP limit is 4GB).
    adrp(tmp, entry, offset);
    add(tmp, tmp, offset);
    br(tmp);
  } else {
    b(entry);
  }
  return pc() - start;
}

void MacroAssembler::reserved_stack_check() {
    // testing if reserved zone needs to be enabled
    Label no_reserved_zone_enabling;

    ldr(rscratch1, Address(rthread, JavaThread::reserved_stack_activation_offset()));
    cmp(sp, rscratch1);
    br(Assembler::LO, no_reserved_zone_enabling);

    enter();   // LR and FP are live.
    lea(rscratch1, CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone));
    mov(c_rarg0, rthread);
    blr(rscratch1);
    leave();

    // We have already removed our own frame.
    // throw_delayed_StackOverflowError will think that it's been
    // called by our caller.
    lea(rscratch1, RuntimeAddress(StubRoutines::throw_delayed_StackOverflowError_entry()));
    br(rscratch1);
    should_not_reach_here();

    bind(no_reserved_zone_enabling);
}

static void pass_arg0(MacroAssembler* masm, Register arg) {
  if (c_rarg0 != arg ) {
    masm->mov(c_rarg0, arg);
  }
}

static void pass_arg1(MacroAssembler* masm, Register arg) {
  if (c_rarg1 != arg ) {
    masm->mov(c_rarg1, arg);
  }
}

static void pass_arg2(MacroAssembler* masm, Register arg) {
  if (c_rarg2 != arg ) {
    masm->mov(c_rarg2, arg);
  }
}

static void pass_arg3(MacroAssembler* masm, Register arg) {
  if (c_rarg3 != arg ) {
    masm->mov(c_rarg3, arg);
  }
}

void MacroAssembler::call_VM_base(Register oop_result,
                                  Register java_thread,
                                  Register last_java_sp,
                                  address  entry_point,
                                  int      number_of_arguments,
                                  bool     check_exceptions) {
   // determine java_thread register
  if (!java_thread->is_valid()) {
    java_thread = rthread;
  }

  // determine last_java_sp register
  if (!last_java_sp->is_valid()) {
    last_java_sp = esp;
  }

  // debugging support
  assert(number_of_arguments >= 0   , "cannot have negative number of arguments");
  assert(java_thread == rthread, "unexpected register");
#ifdef ASSERT
  // TraceBytecodes does not use r12 but saves it over the call, so don't verify
  // if ((UseCompressedOops || UseCompressedClassPointers) && !TraceBytecodes) verify_heapbase("call_VM_base: heap base corrupted?");
#endif // ASSERT

  assert(java_thread != oop_result  , "cannot use the same register for java_thread & oop_result"an>);
  assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp"span>);

  // push java thread (becomes first argument of C function)

  mov(c_rarg0, java_thread);

  // set last Java frame before call
  assert(last_java_sp != rfp, "can't use rfp");

  Label l;
  set_last_Java_frame(last_java_sp, rfp, l, rscratch1);

  // do the call, remove parameters
  MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments, &l);

  // lr could be poisoned with PAC signature during throw_pending_exception
  // if it was tail-call optimized by compiler, since lr is not callee-saved
  // reload it with proper value
  adr(lr, l);

  // reset last Java frame
  // Only interpreter should have to clear fp
  reset_last_Java_frame(true);

   // C++ interp handles this in the interpreter
  check_and_handle_popframe(java_thread);
  check_and_handle_earlyret(java_thread);

  if (check_exceptions) {
    // check for pending exceptions (java_thread is set upon return)
    ldr(rscratch1, Address(java_thread, in_bytes(Thread::pending_exception_offset())));
    Label ok;
    cbz(rscratch1, ok);
    lea(rscratch1, RuntimeAddress(StubRoutines::forward_exception_entry()));
    br(rscratch1);
    bind(ok);
  }

  // get oop result if there is one and reset the value in the thread
  if (oop_result->is_valid()) {
    get_vm_result(oop_result, java_thread);
  }
}

void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
  call_VM_base(oop_result, noreg, noreg, entry_point, number_of_arguments, check_exceptions);
}

// Check the entry target is always reachable from any branch.
static bool is_always_within_branch_range(Address entry) {
  const address target = entry.target();

  if (!CodeCache::contains(target)) {
    // We always use trampolines for callees outside CodeCache.
    assert(entry.rspec().type() == relocInfo::runtime_call_type, "non-runtime call of an external target");
    return false;
  }

  if (!MacroAssembler::far_branches()) {
    return true;
  }

  if (entry.rspec().type() == relocInfo::runtime_call_type) {
    // Runtime calls are calls of a non-compiled method (stubs, adapters).
    // Non-compiled methods stay forever in CodeCache.
    // We check whether the longest possible branch is within the branch range.
    assert(CodeCache::find_blob(target) != NULL &&
          !CodeCache::find_blob(target)->is_compiled(),
          "runtime call of compiled method");
    const address right_longest_branch_start = CodeCache::high_bound() - NativeInstruction::instruction_size;
    const address left_longest_branch_start = CodeCache::low_bound();
    const bool is_reachable = Assembler::reachable_from_branch_at(left_longest_branch_start, target) &&
                              Assembler::reachable_from_branch_at(right_longest_branch_start, target);
    return is_reachable;
  }

  return false;
}

// Maybe emit a call via a trampoline. If the code cache is small
// trampolines won't be emitted.
address MacroAssembler::trampoline_call(Address entry) {
  assert(entry.rspec().type() == relocInfo::runtime_call_type
         || entry.rspec().type() == relocInfo::opt_virtual_call_type
         || entry.rspec().type() == relocInfo::static_call_type
         || entry.rspec().type() == relocInfo::virtual_call_type, "wrong reloc type");

  address target = entry.target();

  if (!is_always_within_branch_range(entry)) {
    if (!in_scratch_emit_size()) {
      // We don't want to emit a trampoline if C2 is generating dummy
      // code during its branch shortening phase.
      if (entry.rspec().type() == relocInfo::runtime_call_type) {
        assert(CodeBuffer::supports_shared_stubs(), "must support shared stubs");
        code()->share_trampoline_for(entry.target(), offset());
      } else {
        address stub = emit_trampoline_stub(offset(), target);
        if (stub == NULL) {
          postcond(pc() == badAddress);
          return NULL; // CodeCache is full
        }
      }
    }
    target = pc();
  }

  address call_pc = pc();
  relocate(entry.rspec());
  bl(target);

  postcond(pc() != badAddress);
  return call_pc;
}

// Emit a trampoline stub for a call to a target which is too far away.
//
// code sequences:
//
// call-site:
//   branch-and-link to <destination> or <trampoline stub>
//
// Related trampoline stub for this call site in the stub section:
//   load the call target from the constant pool
//   branch (LR still points to the call site above)

address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset,
                                             address dest) {
  // Max stub size: alignment nop, TrampolineStub.
  address stub = start_a_stub(NativeInstruction::instruction_size
                   + NativeCallTrampolineStub::instruction_size);
  if (stub == NULL) {
    return NULL;  // CodeBuffer::expand failed
  }

  // Create a trampoline stub relocation which relates this trampoline stub
  // with the call instruction at insts_call_instruction_offset in the
  // instructions code-section.
  align(wordSize);
  relocate(trampoline_stub_Relocation::spec(code()->insts()->start()
                                            + insts_call_instruction_offset));
  const int stub_start_offset = offset();

  // Now, create the trampoline stub's code:
  // - load the call
  // - call
  Label target;
  ldr(rscratch1, target);
  br(rscratch1);
  bind(target);
  assert(offset() - stub_start_offset == NativeCallTrampolineStub::data_offset,
         "should be");
  emit_int64((int64_t)dest);

  const address stub_start_addr = addr_at(stub_start_offset);

  assert(is_NativeCallTrampolineStub_at(stub_start_addr), "doesn't look like a trampoline");

  end_a_stub();
  return stub_start_addr;
}

void MacroAssembler::emit_static_call_stub() {
  // CompiledDirectStaticCall::set_to_interpreted knows the
  // exact layout of this stub.

  isb();
  mov_metadata(rmethod, (Metadata*)NULL);

  // Jump to the entry point of the c2i stub.
  movptr(rscratch1, 0);
  br(rscratch1);
}

void MacroAssembler::c2bool(Register x) {
  // implements x == 0 ? 0 : 1
  // note: must only look at least-significant byte of x
  //       since C-style booleans are stored in one byte
  //       only! (was bug)
  tst(x, 0xff);
  cset(x, Assembler::NE);
}

address MacroAssembler::ic_call(address entry, jint method_index) {
  RelocationHolder rh = virtual_call_Relocation::spec(pc(), method_index);
  // address const_ptr = long_constant((jlong)Universe::non_oop_word());
  // uintptr_t offset;
  // ldr_constant(rscratch2, const_ptr);
  movptr(rscratch2, (uintptr_t)Universe::non_oop_word());
  return trampoline_call(Address(entry, rh));
}

// Implementation of call_VM versions

void MacroAssembler::call_VM(Register oop_result,
                             address entry_point,
                             bool check_exceptions) {
  call_VM_helper(oop_result, entry_point, 0, check_exceptions);
}

void MacroAssembler::call_VM(Register oop_result,
                             address entry_point,
                             Register arg_1,
                             bool check_exceptions) {
  pass_arg1(this, arg_1);
  call_VM_helper(oop_result, entry_point, 1, check_exceptions);
}

void MacroAssembler::call_VM(Register oop_result,
                             address entry_point,
                             Register arg_1,
                             Register arg_2,
                             bool check_exceptions) {
  assert(arg_1 != c_rarg2, "smashed arg");
  pass_arg2(this, arg_2);
  pass_arg1(this, arg_1);
  call_VM_helper(oop_result, entry_point, 2, check_exceptions);
}

void MacroAssembler::call_VM(Register oop_result,
                             address entry_point,
                             Register arg_1,
                             Register arg_2,
                             Register arg_3,
                             bool check_exceptions) {
  assert(arg_1 != c_rarg3, "smashed arg");
  assert(arg_2 != c_rarg3, "smashed arg");
  pass_arg3(this, arg_3);

  assert(arg_1 != c_rarg2, "smashed arg");
  pass_arg2(this, arg_2);

  pass_arg1(this, arg_1);
  call_VM_helper(oop_result, entry_point, 3, check_exceptions);
}

void MacroAssembler::call_VM(Register oop_result,
                             Register last_java_sp,
                             address entry_point,
                             int number_of_arguments,
                             bool check_exceptions) {
  call_VM_base(oop_result, rthread, last_java_sp, entry_point, number_of_arguments, check_exceptions);
}

void MacroAssembler::call_VM(Register oop_result,
                             Register last_java_sp,
                             address entry_point,
                             Register arg_1,
                             bool check_exceptions) {
  pass_arg1(this, arg_1);
  call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
}

void MacroAssembler::call_VM(Register oop_result,
                             Register last_java_sp,
                             address entry_point,
                             Register arg_1,
                             Register arg_2,
                             bool check_exceptions) {

  assert(arg_1 != c_rarg2, "smashed arg");
  pass_arg2(this, arg_2);
  pass_arg1(this, arg_1);
  call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
}

void MacroAssembler::call_VM(Register oop_result,
                             Register last_java_sp,
                             address entry_point,
                             Register arg_1,
                             Register arg_2,
                             Register arg_3,
                             bool check_exceptions) {
  assert(arg_1 != c_rarg3, "smashed arg");
  assert(arg_2 != c_rarg3, "smashed arg");
  pass_arg3(this, arg_3);
  assert(arg_1 != c_rarg2, "smashed arg");
  pass_arg2(this, arg_2);
  pass_arg1(this, arg_1);
  call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
}

void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) {
  ldr(oop_result, Address(java_thread, JavaThread::vm_result_offset()));
  str(zr, Address(java_thread, JavaThread::vm_result_offset()));
  verify_oop_msg(oop_result, "broken oop in call_VM_base");
}

void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) {
  ldr(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset()));
  str(zr, Address(java_thread, JavaThread::vm_result_2_offset()));
}

void MacroAssembler::align(int modulus) {
  while (offset() % modulus != 0) nop();
}

void MacroAssembler::post_call_nop() {
  if (!Continuations::enabled()) {
    return;
  }
  InstructionMark im(this);
  relocate(post_call_nop_Relocation::spec());
  nop();
  movk(zr, 0);
  movk(zr, 0);
}

// these are no-ops overridden by InterpreterMacroAssembler

void MacroAssembler::check_and_handle_earlyret(Register java_thread) { }

void MacroAssembler::check_and_handle_popframe(Register java_thread) { }

// Look up the method for a megamorphic invokeinterface call.
// The target method is determined by <intf_klass, itable_index>.
// The receiver klass is in recv_klass.
// On success, the result will be in method_result, and execution falls through.
// On failure, execution transfers to the given label.
void MacroAssembler::lookup_interface_method(Register recv_klass,
                                             Register intf_klass,
                                             RegisterOrConstant itable_index,
                                             Register method_result,
                                             Register scan_temp,
                                             Label& L_no_such_interface,
                         bool return_method) {
  assert_different_registers(recv_klass, intf_klass, scan_temp);
  assert_different_registers(method_result, intf_klass, scan_temp);
  assert(recv_klass != method_result || !return_method,
     "recv_klass can be destroyed when method isn't needed");
  assert(itable_index.is_constant() || itable_index.as_register() == method_result,
         "caller must use same register for non-constant itable index as for method");

  // Compute start of first itableOffsetEntry (which is at the end of the vtable)
  int vtable_base = in_bytes(Klass::vtable_start_offset());
  int itentry_off = itableMethodEntry::method_offset_in_bytes();
  int scan_step   = itableOffsetEntry::size() * wordSize;
  int vte_size    = vtableEntry::size_in_bytes();
  assert(vte_size == wordSize, "else adjust times_vte_scale");

  ldrw(scan_temp, Address(recv_klass, Klass::vtable_length_offset()));

  // %%% Could store the aligned, prescaled offset in the klassoop.
  // lea(scan_temp, Address(recv_klass, scan_temp, times_vte_scale, vtable_base));
  lea(scan_temp, Address(recv_klass, scan_temp, Address::lsl(3)));
  add(scan_temp, scan_temp, vtable_base);

  if (return_method) {
    // Adjust recv_klass by scaled itable_index, so we can free itable_index.
    assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
    // lea(recv_klass, Address(recv_klass, itable_index, Address::times_ptr, itentry_off));
    lea(recv_klass, Address(recv_klass, itable_index, Address::lsl(3)));
    if (itentry_off)
      add(recv_klass, recv_klass, itentry_off);
  }

  // for (scan = klass->itable(); scan->interface() != NULL; scan += scan_step) {
  //   if (scan->interface() == intf) {
  //     result = (klass + scan->offset() + itable_index);
  //   }
  // }
  Label search, found_method;

  ldr(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes()));
  cmp(intf_klass, method_result);
  br(Assembler::EQ, found_method);
  bind(search);
  // Check that the previous entry is non-null.  A null entry means that
  // the receiver class doesn't implement the interface, and wasn't the
  // same as when the caller was compiled.
  cbz(method_result, L_no_such_interface);
  if (itableOffsetEntry::interface_offset_in_bytes() != 0) {
    add(scan_temp, scan_temp, scan_step);
    ldr(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes()));
  } else {
    ldr(method_result, Address(pre(scan_temp, scan_step)));
  }
  cmp(intf_klass, method_result);
  br(Assembler::NE, search);

  bind(found_method);

  // Got a hit.
  if (return_method) {
    ldrw(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes()));
    ldr(method_result, Address(recv_klass, scan_temp, Address::uxtw(0)));
  }
}

// virtual method calling
void MacroAssembler::lookup_virtual_method(Register recv_klass,
                                           RegisterOrConstant vtable_index,
                                           Register method_result) {
  const int base = in_bytes(Klass::vtable_start_offset());
  assert(vtableEntry::size() * wordSize == 8,
         "adjust the scaling in the code below");
  int vtable_offset_in_bytes = base + vtableEntry::method_offset_in_bytes();

  if (vtable_index.is_register()) {
    lea(method_result, Address(recv_klass,
                               vtable_index.as_register(),
                               Address::lsl(LogBytesPerWord)));
    ldr(method_result, Address(method_result, vtable_offset_in_bytes));
  } else {
    vtable_offset_in_bytes += vtable_index.as_constant() * wordSize;
    ldr(method_result,
        form_address(rscratch1, recv_klass, vtable_offset_in_bytes, 0));
  }
}

void MacroAssembler::check_klass_subtype(Register sub_klass,
                           Register super_klass,
                           Register temp_reg,
                           Label& L_success) {
  Label L_failure;
  check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg,        &L_success, &L_failure, NULL);
  check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL);
  bind(L_failure);
}

void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
                                                   Register super_klass,
                                                   Register temp_reg,
                                                   Label* L_success,
                                                   Label* L_failure,
                                                   Label* L_slow_path,
                                        RegisterOrConstant super_check_offset) {
  assert_different_registers(sub_klass, super_klass, temp_reg);
  bool must_load_sco = (super_check_offset.constant_or_zero() == -1);
  if (super_check_offset.is_register()) {
    assert_different_registers(sub_klass, super_klass,
                               super_check_offset.as_register());
  } else if (must_load_sco) {
    assert(temp_reg != noreg, "supply either a temp or a register offset");
  }

  Label L_fallthrough;
  int label_nulls = 0;
  if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
  if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
  if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
  assert(label_nulls <= 1, "at most one NULL in the batch");

  int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
  int sco_offset = in_bytes(Klass::super_check_offset_offset());
  Address super_check_offset_addr(super_klass, sco_offset);

  // Hacked jmp, which may only be used just before L_fallthrough.
#define final_jmp(label)                                                \
  if (&(label) == &L_fallthrough) { /*do nothing*/ }                    \
  else                            b(label)                /*omit semi*/

  // If the pointers are equal, we are done (e.g., String[] elements).
  // This self-check enables sharing of secondary supertype arrays among
  // non-primary types such as array-of-interface.  Otherwise, each such
  // type would need its own customized SSA.
  // We move this check to the front of the fast path because many
  // type checks are in fact trivially successful in this manner,
  // so we get a nicely predicted branch right at the start of the check.
  cmp(sub_klass, super_klass);
  br(Assembler::EQ, *L_success);

  // Check the supertype display:
  if (must_load_sco) {
    ldrw(temp_reg, super_check_offset_addr);
    super_check_offset = RegisterOrConstant(temp_reg);
  }
  Address super_check_addr(sub_klass, super_check_offset);
  ldr(rscratch1, super_check_addr);
  cmp(super_klass, rscratch1); // load displayed supertype

  // This check has worked decisively for primary supers.
  // Secondary supers are sought in the super_cache ('super_cache_addr').
  // (Secondary supers are interfaces and very deeply nested subtypes.)
  // This works in the same check above because of a tricky aliasing
  // between the super_cache and the primary super display elements.
  // (The 'super_check_addr' can address either, as the case requires.)
  // Note that the cache is updated below if it does not help us find
  // what we need immediately.
  // So if it was a primary super, we can just fail immediately.
  // Otherwise, it's the slow path for us (no success at this point).

  if (super_check_offset.is_register()) {
    br(Assembler::EQ, *L_success);
    subs(zr, super_check_offset.as_register(), sc_offset);
    if (L_failure == &L_fallthrough) {
      br(Assembler::EQ, *L_slow_path);
    } else {
      br(Assembler::NE, *L_failure);
      final_jmp(*L_slow_path);
    }
  } else if (super_check_offset.as_constant() == sc_offset) {
    // Need a slow path; fast failure is impossible.
    if (L_slow_path == &L_fallthrough) {
      br(Assembler::EQ, *L_success);
    } else {
      br(Assembler::NE, *L_slow_path);
      final_jmp(*L_success);
    }
  } else {
    // No slow path; it's a fast decision.
    if (L_failure == &L_fallthrough) {
      br(Assembler::EQ, *L_success);
    } else {
      br(Assembler::NE, *L_failure);
      final_jmp(*L_success);
    }
  }

  bind(L_fallthrough);

#undef final_jmp
}

// These two are taken from x86, but they look generally useful

// scans count pointer sized words at [addr] for occurrence of value,
// generic
void MacroAssembler::repne_scan(Register addr, Register value, Register count,
                                Register scratch) {
  Label Lloop, Lexit;
  cbz(count, Lexit);
  bind(Lloop);
  ldr(scratch, post(addr, wordSize));
  cmp(value, scratch);
  br(EQ, Lexit);
  sub(count, count, 1);
  cbnz(count, Lloop);
  bind(Lexit);
}

// scans count 4 byte words at [addr] for occurrence of value,
// generic
void MacroAssembler::repne_scanw(Register addr, Register value, Register count,
                                Register scratch) {
  Label Lloop, Lexit;
  cbz(count, Lexit);
  bind(Lloop);
  ldrw(scratch, post(addr, wordSize));
  cmpw(value, scratch);
  br(EQ, Lexit);
  sub(count, count, 1);
  cbnz(count, Lloop);
  bind(Lexit);
}

void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
                                                   Register super_klass,
                                                   Register temp_reg,
                                                   Register temp2_reg,
                                                   Label* L_success,
                                                   Label* L_failure,
                                                   bool set_cond_codes) {
  assert_different_registers(sub_klass, super_klass, temp_reg);
  if (temp2_reg != noreg)
    assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg, rscratch1);
#define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg)

  Label L_fallthrough;
  int label_nulls = 0;
  if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
  if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
  assert(label_nulls <= 1, "at most one NULL in the batch");

  // a couple of useful fields in sub_klass:
  int ss_offset = in_bytes(Klass::secondary_supers_offset());
  int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
  Address secondary_supers_addr(sub_klass, ss_offset);
  Address super_cache_addr(     sub_klass, sc_offset);

  BLOCK_COMMENT("check_klass_subtype_slow_path");

  // Do a linear scan of the secondary super-klass chain.
  // This code is rarely used, so simplicity is a virtue here.
  // The repne_scan instruction uses fixed registers, which we must spill.
  // Don't worry too much about pre-existing connections with the input regs.

  assert(sub_klass != r0, "killed reg"); // killed by mov(r0, super)
  assert(sub_klass != r2, "killed reg"); // killed by lea(r2, &pst_counter)

  RegSet pushed_registers;
  if (!IS_A_TEMP(r2))    pushed_registers += r2;
  if (!IS_A_TEMP(r5))    pushed_registers += r5;

  if (super_klass != r0) {
    if (!IS_A_TEMP(r0))   pushed_registers += r0;
  }

  push(pushed_registers, sp);

  // Get super_klass value into r0 (even if it was in r5 or r2).
  if (super_klass != r0) {
    mov(r0, super_klass);
  }

#ifndef PRODUCT
  mov(rscratch2, (address)&SharedRuntime::_partial_subtype_ctr);
  Address pst_counter_addr(rscratch2);
  ldr(rscratch1, pst_counter_addr);
  add(rscratch1, rscratch1, 1);
  str(rscratch1, pst_counter_addr);
#endif //PRODUCT

  // We will consult the secondary-super array.
  ldr(r5, secondary_supers_addr);
  // Load the array length.
  ldrw(r2, Address(r5, Array<Klass*>::length_offset_in_bytes()));
  // Skip to start of data.
  add(r5, r5, Array<Klass*>::base_offset_in_bytes());

  cmp(sp, zr); // Clear Z flag; SP is never zero
  // Scan R2 words at [R5] for an occurrence of R0.
  // Set NZ/Z based on last compare.
  repne_scan(r5, r0, r2, rscratch1);

  // Unspill the temp. registers:
  pop(pushed_registers, sp);

  br(Assembler::NE, *L_failure);

  // Success.  Cache the super we found and proceed in triumph.
  str(super_klass, super_cache_addr);

  if (L_success != &L_fallthrough) {
    b(*L_success);
  }

#undef IS_A_TEMP

  bind(L_fallthrough);
}

void MacroAssembler::clinit_barrier(Register klass, Register scratch, Label* L_fast_path, Label* L_slow_path) {
  assert(L_fast_path != NULL || L_slow_path != NULL, "at least one is required");
  assert_different_registers(klass, rthread, scratch);

  Label L_fallthrough, L_tmp;
  if (L_fast_path == NULL) {
    L_fast_path = &L_fallthrough;
  } else if (L_slow_path == NULL) {
    L_slow_path = &L_fallthrough;
  }
  // Fast path check: class is fully initialized
  ldrb(scratch, Address(klass, InstanceKlass::init_state_offset()));
  subs(zr, scratch, InstanceKlass::fully_initialized);
  br(Assembler::EQ, *L_fast_path);

  // Fast path check: current thread is initializer thread
  ldr(scratch, Address(klass, InstanceKlass::init_thread_offset()));
  cmp(rthread, scratch);

  if (L_slow_path == &L_fallthrough) {
    br(Assembler::EQ, *L_fast_path);
    bind(*L_slow_path);
  } else if (L_fast_path == &L_fallthrough) {
    br(Assembler::NE, *L_slow_path);
    bind(*L_fast_path);
  } else {
    Unimplemented();
  }
}

void MacroAssembler::_verify_oop(Register reg, const char* s, const char* file, int line) {
  if (!VerifyOops) return;

  // Pass register number to verify_oop_subroutine
  const char* b = NULL;
  {
    ResourceMark rm;
    stringStream ss;
    ss.print("verify_oop: %s: %s (%s:%d)", reg->name(), s, file, line);
    b = code_string(ss.as_string());
  }
  BLOCK_COMMENT("verify_oop {");

  strip_return_address(); // This might happen within a stack frame.
  protect_return_address();
  stp(r0, rscratch1, Address(pre(sp, -2 * wordSize)));
  stp(rscratch2, lr, Address(pre(sp, -2 * wordSize)));

  mov(r0, reg);
  movptr(rscratch1, (uintptr_t)(address)b);

  // call indirectly to solve generation ordering problem
  lea(rscratch2, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
  ldr(rscratch2, Address(rscratch2));
  blr(rscratch2);

  ldp(rscratch2, lr, Address(post(sp, 2 * wordSize)));
  ldp(r0, rscratch1, Address(post(sp, 2 * wordSize)));
  authenticate_return_address();

  BLOCK_COMMENT("} verify_oop");
}

void MacroAssembler::_verify_oop_addr(Address addr, const char* s, const char* file, int line) {
  if (!VerifyOops) return;

  const char* b = NULL;
  {
    ResourceMark rm;
    stringStream ss;
    ss.print("verify_oop_addr: %s (%s:%d)", s, file, line);
    b = code_string(ss.as_string());
  }
  BLOCK_COMMENT("verify_oop_addr {");

  strip_return_address(); // This might happen within a stack frame.
  protect_return_address();
  stp(r0, rscratch1, Address(pre(sp, -2 * wordSize)));
  stp(rscratch2, lr, Address(pre(sp, -2 * wordSize)));

  // addr may contain sp so we will have to adjust it based on the
  // pushes that we just did.
  if (addr.uses(sp)) {
    lea(r0, addr);
    ldr(r0, Address(r0, 4 * wordSize));
  } else {
    ldr(r0, addr);
  }
  movptr(rscratch1, (uintptr_t)(address)b);

  // call indirectly to solve generation ordering problem
  lea(rscratch2, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
  ldr(rscratch2, Address(rscratch2));
  blr(rscratch2);

  ldp(rscratch2, lr, Address(post(sp, 2 * wordSize)));
  ldp(r0, rscratch1, Address(post(sp, 2 * wordSize)));
  authenticate_return_address();

  BLOCK_COMMENT("} verify_oop_addr");
}

Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
                                         int extra_slot_offset) {
  // cf. TemplateTable::prepare_invoke(), if (load_receiver).
  int stackElementSize = Interpreter::stackElementSize;
  int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0);
#ifdef ASSERT
  int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1);
  assert(offset1 - offset == stackElementSize, "correct arithmetic");
#endif
  if (arg_slot.is_constant()) {
    return Address(esp, arg_slot.as_constant() * stackElementSize
                   + offset);
  } else {
    add(rscratch1, esp, arg_slot.as_register(),
        ext::uxtx, exact_log2(stackElementSize));
    return Address(rscratch1, offset);
  }
}

void MacroAssembler::call_VM_leaf_base(address entry_point,
                                       int number_of_arguments,
                                       Label *retaddr) {
  Label E, L;

  stp(rscratch1, rmethod, Address(pre(sp, -2 * wordSize)));

  mov(rscratch1, entry_point);
  blr(rscratch1);
  if (retaddr)
    bind(*retaddr);

  ldp(rscratch1, rmethod, Address(post(sp, 2 * wordSize)));
}

void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) {
  call_VM_leaf_base(entry_point, number_of_arguments);
}

void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) {
  pass_arg0(this, arg_0);
  call_VM_leaf_base(entry_point, 1);
}

void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
  pass_arg0(this, arg_0);
  pass_arg1(this, arg_1);
  call_VM_leaf_base(entry_point, 2);
}

void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0,
                                  Register arg_1, Register arg_2) {
  pass_arg0(this, arg_0);
  pass_arg1(this, arg_1);
  pass_arg2(this, arg_2);
  call_VM_leaf_base(entry_point, 3);
}

void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) {
  pass_arg0(this, arg_0);
  MacroAssembler::call_VM_leaf_base(entry_point, 1);
}

void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {

  assert(arg_0 != c_rarg1, "smashed arg");
  pass_arg1(this, arg_1);
  pass_arg0(this, arg_0);
  MacroAssembler::call_VM_leaf_base(entry_point, 2);
}

void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
  assert(arg_0 != c_rarg2, "smashed arg");
  assert(arg_1 != c_rarg2, "smashed arg");
  pass_arg2(this, arg_2);
  assert(arg_0 != c_rarg1, "smashed arg");
  pass_arg1(this, arg_1);
  pass_arg0(this, arg_0);
  MacroAssembler::call_VM_leaf_base(entry_point, 3);
}

void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) {
  assert(arg_0 != c_rarg3, "smashed arg");
  assert(arg_1 != c_rarg3, "smashed arg");
  assert(arg_2 != c_rarg3, "smashed arg");
  pass_arg3(this, arg_3);
  assert(arg_0 != c_rarg2, "smashed arg");
  assert(arg_1 != c_rarg2, "smashed arg");
  pass_arg2(this, arg_2);
  assert(arg_0 != c_rarg1, "smashed arg");
  pass_arg1(this, arg_1);
  pass_arg0(this, arg_0);
  MacroAssembler::call_VM_leaf_base(entry_point, 4);
}

void MacroAssembler::null_check(Register reg, int offset) {
  if (needs_explicit_null_check(offset)) {
    // provoke OS NULL exception if reg = NULL by
    // accessing M[reg] w/o changing any registers
    // NOTE: this is plenty to provoke a segv
    ldr(zr, Address(reg));
  } else {
    // nothing to do, (later) access of M[reg + offset]
    // will provoke OS NULL exception if reg = NULL
  }
}

// MacroAssembler protected routines needed to implement
// public methods

void MacroAssembler::mov(Register r, Address dest) {
  code_section()->relocate(pc(), dest.rspec());
  uint64_t imm64 = (uint64_t)dest.target();
  movptr(r, imm64);
}

// Move a constant pointer into r.  In AArch64 mode the virtual
// address space is 48 bits in size, so we only need three
// instructions to create a patchable instruction sequence that can
// reach anywhere.
void MacroAssembler::movptr(Register r, uintptr_t imm64) {
#ifndef PRODUCT
  {
    char buffer[64];
    snprintf(buffer, sizeof(buffer), "0x%" PRIX64, (uint64_t)imm64);
    block_comment(buffer);
  }
#endif
  assert(imm64 < (1ull << 48), "48-bit overflow in address constant");
  movz(r, imm64 & 0xffff);
  imm64 >>= 16;
  movk(r, imm64 & 0xffff, 16);
  imm64 >>= 16;
  movk(r, imm64 & 0xffff, 32);
}

// Macro to mov replicated immediate to vector register.
// imm64: only the lower 8/16/32 bits are considered for B/H/S type. That is,
//        the upper 56/48/32 bits must be zeros for B/H/S type.
// Vd will get the following values for different arrangements in T
//   imm64 == hex 000000gh  T8B:  Vd = ghghghghghghghgh
//   imm64 == hex 000000gh  T16B: Vd = ghghghghghghghghghghghghghghghgh
//   imm64 == hex 0000efgh  T4H:  Vd = efghefghefghefgh
//   imm64 == hex 0000efgh  T8H:  Vd = efghefghefghefghefghefghefghefgh
//   imm64 == hex abcdefgh  T2S:  Vd = abcdefghabcdefgh
//   imm64 == hex abcdefgh  T4S:  Vd = abcdefghabcdefghabcdefghabcdefgh
//   imm64 == hex abcdefgh  T1D:  Vd = 00000000abcdefgh
//   imm64 == hex abcdefgh  T2D:  Vd = 00000000abcdefgh00000000abcdefgh
// Clobbers rscratch1
void MacroAssembler::mov(FloatRegister Vd, SIMD_Arrangement T, uint64_t imm64) {
  assert(T != T1Q, "unsupported");
  if (T == T1D || T == T2D) {
    int imm = operand_valid_for_movi_immediate(imm64, T);
    if (-1 != imm) {
      movi(Vd, T, imm);
    } else {
      mov(rscratch1, imm64);
      dup(Vd, T, rscratch1);
    }
    return;
  }

#ifdef ASSERT
  if (T == T8B || T == T16B) assert((imm64 & ~0xff) == 0, "extraneous bits (T8B/T16B)");
  if (T == T4H || T == T8H) assert((imm64  & ~0xffff) == 0, "extraneous bits (T4H/T8H)");
  if (T == T2S || T == T4S) assert((imm64  & ~0xffffffff) == 0, "extraneous bits (T2S/T4S)");
#endif
  int shift = operand_valid_for_movi_immediate(imm64, T);
  uint32_t imm32 = imm64 & 0xffffffffULL;
  if (shift >= 0) {
    movi(Vd, T, (imm32 >> shift) & 0xff, shift);
  } else {
    movw(rscratch1, imm32);
    dup(Vd, T, rscratch1);
  }
}

void MacroAssembler::mov_immediate64(Register dst, uint64_t imm64)
{
#ifndef PRODUCT
  {
    char buffer[64];
    snprintf(buffer, sizeof(buffer), "0x%" PRIX64, imm64);
    block_comment(buffer);
  }
#endif
  if (operand_valid_for_logical_immediate(false, imm64)) {
    orr(dst, zr, imm64);
  } else {
    // we can use a combination of MOVZ or MOVN with
    // MOVK to build up the constant
    uint64_t imm_h[4];
    int zero_count = 0;
    int neg_count = 0;
    int i;
    for (i = 0; i < 4; i++) {
      imm_h[i] = ((imm64 >> (i * 16)) & 0xffffL);
      if (imm_h[i] == 0) {
        zero_count++;
      } else if (imm_h[i] == 0xffffL) {
        neg_count++;
      }
    }
    if (zero_count == 4) {
      // one MOVZ will do
      movz(dst, 0);
    } else if (neg_count == 4) {
      // one MOVN will do
      movn(dst, 0);
    } else if (zero_count == 3) {
      for (i = 0; i < 4; i++) {
        if (imm_h[i] != 0L) {
          movz(dst, (uint32_t)imm_h[i], (i << 4));
          break;
        }
      }
    } else if (neg_count == 3) {
      // one MOVN will do
      for (int i = 0; i < 4; i++) {
        if (imm_h[i] != 0xffffL) {
          movn(dst, (uint32_t)imm_h[i] ^ 0xffffL, (i << 4));
          break;
        }
      }
    } else if (zero_count == 2) {
      // one MOVZ and one MOVK will do
      for (i = 0; i < 3; i++) {
        if (imm_h[i] != 0L) {
          movz(dst, (uint32_t)imm_h[i], (i << 4));
          i++;
          break;
        }
      }
      for (;i < 4; i++) {
        if (imm_h[i] != 0L) {
          movk(dst, (uint32_t)imm_h[i], (i << 4));
        }
      }
    } else if (neg_count == 2) {
      // one MOVN and one MOVK will do
      for (i = 0; i < 4; i++) {
        if (imm_h[i] != 0xffffL) {
          movn(dst, (uint32_t)imm_h[i] ^ 0xffffL, (i << 4));
          i++;
          break;
        }
      }
      for (;i < 4; i++) {
        if (imm_h[i] != 0xffffL) {
          movk(dst, (uint32_t)imm_h[i], (i << 4));
        }
      }
    } else if (zero_count == 1) {
      // one MOVZ and two MOVKs will do
      for (i = 0; i < 4; i++) {
        if (imm_h[i] != 0L) {
          movz(dst, (uint32_t)imm_h[i], (i << 4));
--> --------------------

--> maximum size reached

--> --------------------

quality96%

¤ Dauer der Verarbeitung: 0.24 Sekunden ¤

Wurzel

Suchen

Beweissystem der NASA

Beweissystem Isabelle

NIST Cobol Testsuite

Cephes Mathematical Library

Wiener Entwicklungsmethode

Haftungshinweis

Die Informationen auf dieser Webseite wurden nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit, noch Qualität der bereit gestellten Informationen zugesichert.

Bemerkung:

Die farbliche Syntaxdarstellung ist noch experimentell.