Quellcode-Bibliothek

^© Kompilation durch diese Firma

[Weder Korrektheit noch Funktionsfähigkeit der Software werden zugesichert.]

Datei: assembler_riscv.cpp Sprache: JAVA

/*
* Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/

#include "precompiled.hpp"
#include "asm/macroAssembler.hpp"
#include "compiler/disassembler.hpp"
#include "gc/shared/collectedHeap.hpp"
#include "gc/shared/tlab_globals.hpp"
#include "interpreter/interpreter.hpp"
#include "interpreter/interpreterRuntime.hpp"
#include "interpreter/interp_masm.hpp"
#include "interpreter/templateTable.hpp"
#include "memory/universe.hpp"
#include "oops/methodData.hpp"
#include "oops/objArrayKlass.hpp"
#include "oops/oop.inline.hpp"
#include "prims/jvmtiExport.hpp"
#include "prims/methodHandles.hpp"
#include "runtime/frame.inline.hpp"
#include "runtime/safepointMechanism.hpp"
#include "runtime/sharedRuntime.hpp"
#include "runtime/stubRoutines.hpp"
#include "runtime/synchronizer.hpp"
#include "utilities/macros.hpp"

#define __ Disassembler::hook<InterpreterMacroAssembler>(__FILE__, __LINE__, _masm)->

// Global Register Names
static const Register rbcp     = LP64_ONLY(r13) NOT_LP64(rsi);
static const Register rlocals  = LP64_ONLY(r14) NOT_LP64(rdi);

// Address Computation: local variables
static inline Address iaddress(int n) {
  return Address(rlocals, Interpreter::local_offset_in_bytes(n));
}

static inline Address laddress(int n) {
  return iaddress(n + 1);
}

#ifndef _LP64
static inline Address haddress(int n) {
  return iaddress(n + 0);
}
#endif

static inline Address faddress(int n) {
  return iaddress(n);
}

static inline Address daddress(int n) {
  return laddress(n);
}

static inline Address aaddress(int n) {
  return iaddress(n);
}

static inline Address iaddress(Register r) {
  return Address(rlocals, r, Address::times_ptr);
}

static inline Address laddress(Register r) {
  return Address(rlocals, r, Address::times_ptr, Interpreter::local_offset_in_bytes(1));
}

#ifndef _LP64
static inline Address haddress(Register r)       {
  return Address(rlocals, r, Interpreter::stackElementScale(), Interpreter::local_offset_in_bytes(0));
}
#endif

static inline Address faddress(Register r) {
  return iaddress(r);
}

static inline Address daddress(Register r) {
  return laddress(r);
}

static inline Address aaddress(Register r) {
  return iaddress(r);
}

// expression stack
// (Note: Must not use symmetric equivalents at_rsp_m1/2 since they store
// data beyond the rsp which is potentially unsafe in an MT environment;
// an interrupt may overwrite that data.)
static inline Address at_rsp   () {
  return Address(rsp, 0);
}

// At top of Java expression stack which may be different than esp().  It
// isn't for category 1 objects.
static inline Address at_tos   () {
  return Address(rsp,  Interpreter::expr_offset_in_bytes(0));
}

static inline Address at_tos_p1() {
  return Address(rsp,  Interpreter::expr_offset_in_bytes(1));
}

static inline Address at_tos_p2() {
  return Address(rsp,  Interpreter::expr_offset_in_bytes(2));
}

// Condition conversion
static Assembler::Condition j_not(TemplateTable::Condition cc) {
  switch (cc) {
  case TemplateTable::equal        : return Assembler::notEqual;
  case TemplateTable::not_equal    : return Assembler::equal;
  case TemplateTable::less         : return Assembler::greaterEqual;
  case TemplateTable::less_equal   : return Assembler::greater;
  case TemplateTable::greater      : return Assembler::lessEqual;
  case TemplateTable::greater_equal: return Assembler::less;
  }
  ShouldNotReachHere();
  return Assembler::zero;
}

// Miscellaneous helper routines
// Store an oop (or NULL) at the address described by obj.
// If val == noreg this means store a NULL

static void do_oop_store(InterpreterMacroAssembler* _masm,
                         Address dst,
                         Register val,
                         DecoratorSet decorators = 0) {
  assert(val == noreg || val == rax, "parameter is just for looks");
  __ store_heap_oop(dst, val, rdx, rbx, LP64_ONLY(r8) NOT_LP64(rsi), decorators);
}

static void do_oop_load(InterpreterMacroAssembler* _masm,
                        Address src,
                        Register dst,
                        DecoratorSet decorators = 0) {
  __ load_heap_oop(dst, src, rdx, rbx, decorators);
}

Address TemplateTable::at_bcp(int offset) {
  assert(_desc->uses_bcp(), "inconsistent uses_bcp information");
  return Address(rbcp, offset);
}

void TemplateTable::patch_bytecode(Bytecodes::Code bc, Register bc_reg,
                                   Register temp_reg, bool load_bc_into_bc_reg/*=true*/,
                                   int byte_no) {
  if (!RewriteBytecodes)  return;
  Label L_patch_done;

  switch (bc) {
  case Bytecodes::_fast_aputfield:
  case Bytecodes::_fast_bputfield:
  case Bytecodes::_fast_zputfield:
  case Bytecodes::_fast_cputfield:
  case Bytecodes::_fast_dputfield:
  case Bytecodes::_fast_fputfield:
  case Bytecodes::_fast_iputfield:
  case Bytecodes::_fast_lputfield:
  case Bytecodes::_fast_sputfield:
    {
      // We skip bytecode quickening for putfield instructions when
      // the put_code written to the constant pool cache is zero.
      // This is required so that every execution of this instruction
      // calls out to InterpreterRuntime::resolve_get_put to do
      // additional, required work.
      assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
      assert(load_bc_into_bc_reg, "we use bc_reg as temp");
      __ get_cache_and_index_and_bytecode_at_bcp(temp_reg, bc_reg, temp_reg, byte_no, 1);
      __ movl(bc_reg, bc);
      __ cmpl(temp_reg, (int) 0);
      __ jcc(Assembler::zero, L_patch_done);  // don't patch
    }
    break;
  default:
    assert(byte_no == -1, "sanity");
    // the pair bytecodes have already done the load.
    if (load_bc_into_bc_reg) {
      __ movl(bc_reg, bc);
    }
  }

  if (JvmtiExport::can_post_breakpoint()) {
    Label L_fast_patch;
    // if a breakpoint is present we can't rewrite the stream directly
    __ movzbl(temp_reg, at_bcp(0));
    __ cmpl(temp_reg, Bytecodes::_breakpoint);
    __ jcc(Assembler::notEqual, L_fast_patch);
    __ get_method(temp_reg);
    // Let breakpoint table handling rewrite to quicker bytecode
    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::set_original_bytecode_at), temp_reg, rbcp, bc_reg);
#ifndef ASSERT
    __ jmpb(L_patch_done);
#else
    __ jmp(L_patch_done);
#endif
    __ bind(L_fast_patch);
  }

#ifdef ASSERT
  Label L_okay;
  __ load_unsigned_byte(temp_reg, at_bcp(0));
  __ cmpl(temp_reg, (int) Bytecodes::java_code(bc));
  __ jcc(Assembler::equal, L_okay);
  __ cmpl(temp_reg, bc_reg);
  __ jcc(Assembler::equal, L_okay);
  __ stop("patching the wrong bytecode");
  __ bind(L_okay);
#endif

  // patch bytecode
  __ movb(at_bcp(0), bc_reg);
  __ bind(L_patch_done);
}
// Individual instructions

void TemplateTable::nop() {
  transition(vtos, vtos);
  // nothing to do
}

void TemplateTable::shouldnotreachhere() {
  transition(vtos, vtos);
  __ stop("shouldnotreachhere bytecode");
}

void TemplateTable::aconst_null() {
  transition(vtos, atos);
  __ xorl(rax, rax);
}

void TemplateTable::iconst(int value) {
  transition(vtos, itos);
  if (value == 0) {
    __ xorl(rax, rax);
  } else {
    __ movl(rax, value);
  }
}

void TemplateTable::lconst(int value) {
  transition(vtos, ltos);
  if (value == 0) {
    __ xorl(rax, rax);
  } else {
    __ movl(rax, value);
  }
#ifndef _LP64
  assert(value >= 0, "check this code");
  __ xorptr(rdx, rdx);
#endif
}

void TemplateTable::fconst(int value) {
  transition(vtos, ftos);
  if (UseSSE >= 1) {
    static float one = 1.0f, two = 2.0f;
    switch (value) {
    case 0:
      __ xorps(xmm0, xmm0);
      break;
    case 1:
      __ movflt(xmm0, ExternalAddress((address) &one), rscratch1);
      break;
    case 2:
      __ movflt(xmm0, ExternalAddress((address) &two), rscratch1);
      break;
    default:
      ShouldNotReachHere();
      break;
    }
  } else {
#ifdef _LP64
    ShouldNotReachHere();
#else
           if (value == 0) { __ fldz();
    } else if (value == 1) { __ fld1();
    } else if (value == 2) { __ fld1(); __ fld1(); __ faddp(); // should do a better solution here
    } else                 { ShouldNotReachHere();
    }
#endif // _LP64
  }
}

void TemplateTable::dconst(int value) {
  transition(vtos, dtos);
  if (UseSSE >= 2) {
    static double one = 1.0;
    switch (value) {
    case 0:
      __ xorpd(xmm0, xmm0);
      break;
    case 1:
      __ movdbl(xmm0, ExternalAddress((address) &one), rscratch1);
      break;
    default:
      ShouldNotReachHere();
      break;
    }
  } else {
#ifdef _LP64
    ShouldNotReachHere();
#else
           if (value == 0) { __ fldz();
    } else if (value == 1) { __ fld1();
    } else                 { ShouldNotReachHere();
    }
#endif
  }
}

void TemplateTable::bipush() {
  transition(vtos, itos);
  __ load_signed_byte(rax, at_bcp(1));
}

void TemplateTable::sipush() {
  transition(vtos, itos);
  __ load_unsigned_short(rax, at_bcp(1));
  __ bswapl(rax);
  __ sarl(rax, 16);
}

void TemplateTable::ldc(LdcType type) {
  transition(vtos, vtos);
  Register rarg = NOT_LP64(rcx) LP64_ONLY(c_rarg1);
  Label call_ldc, notFloat, notClass, notInt, Done;

  if (is_ldc_wide(type)) {
    __ get_unsigned_2_byte_index_at_bcp(rbx, 1);
  } else {
    __ load_unsigned_byte(rbx, at_bcp(1));
  }

  __ get_cpool_and_tags(rcx, rax);
  const int base_offset = ConstantPool::header_size() * wordSize;
  const int tags_offset = Array<u1>::base_offset_in_bytes();

  // get type
  __ movzbl(rdx, Address(rax, rbx, Address::times_1, tags_offset));

  // unresolved class - get the resolved class
  __ cmpl(rdx, JVM_CONSTANT_UnresolvedClass);
  __ jccb(Assembler::equal, call_ldc);

  // unresolved class in error state - call into runtime to throw the error
  // from the first resolution attempt
  __ cmpl(rdx, JVM_CONSTANT_UnresolvedClassInError);
  __ jccb(Assembler::equal, call_ldc);

  // resolved class - need to call vm to get java mirror of the class
  __ cmpl(rdx, JVM_CONSTANT_Class);
  __ jcc(Assembler::notEqual, notClass);

  __ bind(call_ldc);

  __ movl(rarg, is_ldc_wide(type) ? 1 : 0);
  call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::ldc), rarg);

  __ push(atos);
  __ jmp(Done);

  __ bind(notClass);
  __ cmpl(rdx, JVM_CONSTANT_Float);
  __ jccb(Assembler::notEqual, notFloat);

  // ftos
  __ load_float(Address(rcx, rbx, Address::times_ptr, base_offset));
  __ push(ftos);
  __ jmp(Done);

  __ bind(notFloat);
  __ cmpl(rdx, JVM_CONSTANT_Integer);
  __ jccb(Assembler::notEqual, notInt);

  // itos
  __ movl(rax, Address(rcx, rbx, Address::times_ptr, base_offset));
  __ push(itos);
  __ jmp(Done);

  // assume the tag is for condy; if not, the VM runtime will tell us
  __ bind(notInt);
  condy_helper(Done);

  __ bind(Done);
}

// Fast path for caching oop constants.
void TemplateTable::fast_aldc(LdcType type) {
  transition(vtos, atos);

  Register result = rax;
  Register tmp = rdx;
  Register rarg = NOT_LP64(rcx) LP64_ONLY(c_rarg1);
  int index_size = is_ldc_wide(type) ? sizeof(u2) : sizeof(u1);

  Label resolved;

  // We are resolved if the resolved reference cache entry contains a
  // non-null object (String, MethodType, etc.)
  assert_different_registers(result, tmp);
  __ get_cache_index_at_bcp(tmp, 1, index_size);
  __ load_resolved_reference_at_index(result, tmp);
  __ testptr(result, result);
  __ jcc(Assembler::notZero, resolved);

  address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc);

  // first time invocation - must resolve first
  __ movl(rarg, (int)bytecode());
  __ call_VM(result, entry, rarg);
  __ bind(resolved);

  { // Check for the null sentinel.
    // If we just called the VM, it already did the mapping for us,
    // but it's harmless to retry.
    Label notNull;
    ExternalAddress null_sentinel((address)Universe::the_null_sentinel_addr());
    __ movptr(tmp, null_sentinel);
    __ resolve_oop_handle(tmp, rscratch2);
    __ cmpoop(tmp, result);
    __ jccb(Assembler::notEqual, notNull);
    __ xorptr(result, result);  // NULL object reference
    __ bind(notNull);
  }

  if (VerifyOops) {
    __ verify_oop(result);
  }
}

void TemplateTable::ldc2_w() {
  transition(vtos, vtos);
  Label notDouble, notLong, Done;
  __ get_unsigned_2_byte_index_at_bcp(rbx, 1);

  __ get_cpool_and_tags(rcx, rax);
  const int base_offset = ConstantPool::header_size() * wordSize;
  const int tags_offset = Array<u1>::base_offset_in_bytes();

  // get type
  __ movzbl(rdx, Address(rax, rbx, Address::times_1, tags_offset));
  __ cmpl(rdx, JVM_CONSTANT_Double);
  __ jccb(Assembler::notEqual, notDouble);

  // dtos
  __ load_double(Address(rcx, rbx, Address::times_ptr, base_offset));
  __ push(dtos);

  __ jmp(Done);
  __ bind(notDouble);
  __ cmpl(rdx, JVM_CONSTANT_Long);
  __ jccb(Assembler::notEqual, notLong);

  // ltos
  __ movptr(rax, Address(rcx, rbx, Address::times_ptr, base_offset + 0 * wordSize));
  NOT_LP64(__ movptr(rdx, Address(rcx, rbx, Address::times_ptr, base_offset + 1 * wordSize)));
  __ push(ltos);
  __ jmp(Done);

  __ bind(notLong);
  condy_helper(Done);

  __ bind(Done);
}

void TemplateTable::condy_helper(Label& Done) {
  const Register obj = rax;
  const Register off = rbx;
  const Register flags = rcx;
  const Register rarg = NOT_LP64(rcx) LP64_ONLY(c_rarg1);
  __ movl(rarg, (int)bytecode());
  call_VM(obj, CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc), rarg);
#ifndef _LP64
  // borrow rdi from locals
  __ get_thread(rdi);
  __ get_vm_result_2(flags, rdi);
  __ restore_locals();
#else
  __ get_vm_result_2(flags, r15_thread);
#endif
  // VMr = obj = base address to find primitive value to push
  // VMr2 = flags = (tos, off) using format of CPCE::_flags
  __ movl(off, flags);
  __ andl(off, ConstantPoolCacheEntry::field_index_mask);
  const Address field(obj, off, Address::times_1, 0*wordSize);

  // What sort of thing are we loading?
  __ shrl(flags, ConstantPoolCacheEntry::tos_state_shift);
  __ andl(flags, ConstantPoolCacheEntry::tos_state_mask);

  switch (bytecode()) {
  case Bytecodes::_ldc:
  case Bytecodes::_ldc_w:
    {
      // tos in (itos, ftos, stos, btos, ctos, ztos)
      Label notInt, notFloat, notShort, notByte, notChar, notBool;
      __ cmpl(flags, itos);
      __ jccb(Assembler::notEqual, notInt);
      // itos
      __ movl(rax, field);
      __ push(itos);
      __ jmp(Done);

      __ bind(notInt);
      __ cmpl(flags, ftos);
      __ jccb(Assembler::notEqual, notFloat);
      // ftos
      __ load_float(field);
      __ push(ftos);
      __ jmp(Done);

      __ bind(notFloat);
      __ cmpl(flags, stos);
      __ jccb(Assembler::notEqual, notShort);
      // stos
      __ load_signed_short(rax, field);
      __ push(stos);
      __ jmp(Done);

      __ bind(notShort);
      __ cmpl(flags, btos);
      __ jccb(Assembler::notEqual, notByte);
      // btos
      __ load_signed_byte(rax, field);
      __ push(btos);
      __ jmp(Done);

      __ bind(notByte);
      __ cmpl(flags, ctos);
      __ jccb(Assembler::notEqual, notChar);
      // ctos
      __ load_unsigned_short(rax, field);
      __ push(ctos);
      __ jmp(Done);

      __ bind(notChar);
      __ cmpl(flags, ztos);
      __ jccb(Assembler::notEqual, notBool);
      // ztos
      __ load_signed_byte(rax, field);
      __ push(ztos);
      __ jmp(Done);

      __ bind(notBool);
      break;
    }

  case Bytecodes::_ldc2_w:
    {
      Label notLong, notDouble;
      __ cmpl(flags, ltos);
      __ jccb(Assembler::notEqual, notLong);
      // ltos
      // Loading high word first because movptr clobbers rax
      NOT_LP64(__ movptr(rdx, field.plus_disp(4)));
      __ movptr(rax, field);
      __ push(ltos);
      __ jmp(Done);

      __ bind(notLong);
      __ cmpl(flags, dtos);
      __ jccb(Assembler::notEqual, notDouble);
      // dtos
      __ load_double(field);
      __ push(dtos);
      __ jmp(Done);

      __ bind(notDouble);
      break;
    }

  default:
    ShouldNotReachHere();
  }

  __ stop("bad ldc/condy");
}

void TemplateTable::locals_index(Register reg, int offset) {
  __ load_unsigned_byte(reg, at_bcp(offset));
  __ negptr(reg);
}

void TemplateTable::iload() {
  iload_internal();
}

void TemplateTable::nofast_iload() {
  iload_internal(may_not_rewrite);
}

void TemplateTable::iload_internal(RewriteControl rc) {
  transition(vtos, itos);
  if (RewriteFrequentPairs && rc == may_rewrite) {
    Label rewrite, done;
    const Register bc = LP64_ONLY(c_rarg3) NOT_LP64(rcx);
    LP64_ONLY(assert(rbx != bc, "register damaged"));

    // get next byte
    __ load_unsigned_byte(rbx,
                          at_bcp(Bytecodes::length_for(Bytecodes::_iload)));
    // if _iload, wait to rewrite to iload2.  We only want to rewrite the
    // last two iloads in a pair.  Comparing against fast_iload means that
    // the next bytecode is neither an iload or a caload, and therefore
    // an iload pair.
    __ cmpl(rbx, Bytecodes::_iload);
    __ jcc(Assembler::equal, done);

    __ cmpl(rbx, Bytecodes::_fast_iload);
    __ movl(bc, Bytecodes::_fast_iload2);

    __ jccb(Assembler::equal, rewrite);

    // if _caload, rewrite to fast_icaload
    __ cmpl(rbx, Bytecodes::_caload);
    __ movl(bc, Bytecodes::_fast_icaload);
    __ jccb(Assembler::equal, rewrite);

    // rewrite so iload doesn't check again.
    __ movl(bc, Bytecodes::_fast_iload);

    // rewrite
    // bc: fast bytecode
    __ bind(rewrite);
    patch_bytecode(Bytecodes::_iload, bc, rbx, false);
    __ bind(done);
  }

  // Get the local value into tos
  locals_index(rbx);
  __ movl(rax, iaddress(rbx));
}

void TemplateTable::fast_iload2() {
  transition(vtos, itos);
  locals_index(rbx);
  __ movl(rax, iaddress(rbx));
  __ push(itos);
  locals_index(rbx, 3);
  __ movl(rax, iaddress(rbx));
}

void TemplateTable::fast_iload() {
  transition(vtos, itos);
  locals_index(rbx);
  __ movl(rax, iaddress(rbx));
}

void TemplateTable::lload() {
  transition(vtos, ltos);
  locals_index(rbx);
  __ movptr(rax, laddress(rbx));
  NOT_LP64(__ movl(rdx, haddress(rbx)));
}

void TemplateTable::fload() {
  transition(vtos, ftos);
  locals_index(rbx);
  __ load_float(faddress(rbx));
}

void TemplateTable::dload() {
  transition(vtos, dtos);
  locals_index(rbx);
  __ load_double(daddress(rbx));
}

void TemplateTable::aload() {
  transition(vtos, atos);
  locals_index(rbx);
  __ movptr(rax, aaddress(rbx));
}

void TemplateTable::locals_index_wide(Register reg) {
  __ load_unsigned_short(reg, at_bcp(2));
  __ bswapl(reg);
  __ shrl(reg, 16);
  __ negptr(reg);
}

void TemplateTable::wide_iload() {
  transition(vtos, itos);
  locals_index_wide(rbx);
  __ movl(rax, iaddress(rbx));
}

void TemplateTable::wide_lload() {
  transition(vtos, ltos);
  locals_index_wide(rbx);
  __ movptr(rax, laddress(rbx));
  NOT_LP64(__ movl(rdx, haddress(rbx)));
}

void TemplateTable::wide_fload() {
  transition(vtos, ftos);
  locals_index_wide(rbx);
  __ load_float(faddress(rbx));
}

void TemplateTable::wide_dload() {
  transition(vtos, dtos);
  locals_index_wide(rbx);
  __ load_double(daddress(rbx));
}

void TemplateTable::wide_aload() {
  transition(vtos, atos);
  locals_index_wide(rbx);
  __ movptr(rax, aaddress(rbx));
}

void TemplateTable::index_check(Register array, Register index) {
  // Pop ptr into array
  __ pop_ptr(array);
  index_check_without_pop(array, index);
}

void TemplateTable::index_check_without_pop(Register array, Register index) {
  // destroys rbx
  // check array
  __ null_check(array, arrayOopDesc::length_offset_in_bytes());
  // sign extend index for use by indexed load
  __ movl2ptr(index, index);
  // check index
  __ cmpl(index, Address(array, arrayOopDesc::length_offset_in_bytes()));
  if (index != rbx) {
    // ??? convention: move aberrant index into rbx for exception message
    assert(rbx != array, "different registers");
    __ movl(rbx, index);
  }
  Label skip;
  __ jccb(Assembler::below, skip);
  // Pass array to create more detailed exceptions.
  __ mov(NOT_LP64(rax) LP64_ONLY(c_rarg1), array);
  __ jump(ExternalAddress(Interpreter::_throw_ArrayIndexOutOfBoundsException_entry));
  __ bind(skip);
}

void TemplateTable::iaload() {
  transition(itos, itos);
  // rax: index
  // rdx: array
  index_check(rdx, rax); // kills rbx
  __ access_load_at(T_INT, IN_HEAP | IS_ARRAY, rax,
                    Address(rdx, rax, Address::times_4,
                            arrayOopDesc::base_offset_in_bytes(T_INT)),
                    noreg, noreg);
}

void TemplateTable::laload() {
  transition(itos, ltos);
  // rax: index
  // rdx: array
  index_check(rdx, rax); // kills rbx
  NOT_LP64(__ mov(rbx, rax));
  // rbx,: index
  __ access_load_at(T_LONG, IN_HEAP | IS_ARRAY, noreg /* ltos */,
                    Address(rdx, rbx, Address::times_8,
                            arrayOopDesc::base_offset_in_bytes(T_LONG)),
                    noreg, noreg);
}

void TemplateTable::faload() {
  transition(itos, ftos);
  // rax: index
  // rdx: array
  index_check(rdx, rax); // kills rbx
  __ access_load_at(T_FLOAT, IN_HEAP | IS_ARRAY, noreg /* ftos */,
                    Address(rdx, rax,
                            Address::times_4,
                            arrayOopDesc::base_offset_in_bytes(T_FLOAT)),
                    noreg, noreg);
}

void TemplateTable::daload() {
  transition(itos, dtos);
  // rax: index
  // rdx: array
  index_check(rdx, rax); // kills rbx
  __ access_load_at(T_DOUBLE, IN_HEAP | IS_ARRAY, noreg /* dtos */,
                    Address(rdx, rax,
                            Address::times_8,
                            arrayOopDesc::base_offset_in_bytes(T_DOUBLE)),
                    noreg, noreg);
}

void TemplateTable::aaload() {
  transition(itos, atos);
  // rax: index
  // rdx: array
  index_check(rdx, rax); // kills rbx
  do_oop_load(_masm,
              Address(rdx, rax,
                      UseCompressedOops ? Address::times_4 : Address::times_ptr,
                      arrayOopDesc::base_offset_in_bytes(T_OBJECT)),
              rax,
              IS_ARRAY);
}

void TemplateTable::baload() {
  transition(itos, itos);
  // rax: index
  // rdx: array
  index_check(rdx, rax); // kills rbx
  __ access_load_at(T_BYTE, IN_HEAP | IS_ARRAY, rax,
                    Address(rdx, rax, Address::times_1, arrayOopDesc::base_offset_in_bytes(T_BYTE)),
                    noreg, noreg);
}

void TemplateTable::caload() {
  transition(itos, itos);
  // rax: index
  // rdx: array
  index_check(rdx, rax); // kills rbx
  __ access_load_at(T_CHAR, IN_HEAP | IS_ARRAY, rax,
                    Address(rdx, rax, Address::times_2, arrayOopDesc::base_offset_in_bytes(T_CHAR)),
                    noreg, noreg);
}

// iload followed by caload frequent pair
void TemplateTable::fast_icaload() {
  transition(vtos, itos);
  // load index out of locals
  locals_index(rbx);
  __ movl(rax, iaddress(rbx));

  // rax: index
  // rdx: array
  index_check(rdx, rax); // kills rbx
  __ access_load_at(T_CHAR, IN_HEAP | IS_ARRAY, rax,
                    Address(rdx, rax, Address::times_2, arrayOopDesc::base_offset_in_bytes(T_CHAR)),
                    noreg, noreg);
}

void TemplateTable::saload() {
  transition(itos, itos);
  // rax: index
  // rdx: array
  index_check(rdx, rax); // kills rbx
  __ access_load_at(T_SHORT, IN_HEAP | IS_ARRAY, rax,
                    Address(rdx, rax, Address::times_2, arrayOopDesc::base_offset_in_bytes(T_SHORT)),
                    noreg, noreg);
}

void TemplateTable::iload(int n) {
  transition(vtos, itos);
  __ movl(rax, iaddress(n));
}

void TemplateTable::lload(int n) {
  transition(vtos, ltos);
  __ movptr(rax, laddress(n));
  NOT_LP64(__ movptr(rdx, haddress(n)));
}

void TemplateTable::fload(int n) {
  transition(vtos, ftos);
  __ load_float(faddress(n));
}

void TemplateTable::dload(int n) {
  transition(vtos, dtos);
  __ load_double(daddress(n));
}

void TemplateTable::aload(int n) {
  transition(vtos, atos);
  __ movptr(rax, aaddress(n));
}

void TemplateTable::aload_0() {
  aload_0_internal();
}

void TemplateTable::nofast_aload_0() {
  aload_0_internal(may_not_rewrite);
}

void TemplateTable::aload_0_internal(RewriteControl rc) {
  transition(vtos, atos);
  // According to bytecode histograms, the pairs:
  //
  // _aload_0, _fast_igetfield
  // _aload_0, _fast_agetfield
  // _aload_0, _fast_fgetfield
  //
  // occur frequently. If RewriteFrequentPairs is set, the (slow)
  // _aload_0 bytecode checks if the next bytecode is either
  // _fast_igetfield, _fast_agetfield or _fast_fgetfield and then
  // rewrites the current bytecode into a pair bytecode; otherwise it
  // rewrites the current bytecode into _fast_aload_0 that doesn't do
  // the pair check anymore.
  //
  // Note: If the next bytecode is _getfield, the rewrite must be
  //       delayed, otherwise we may miss an opportunity for a pair.
  //
  // Also rewrite frequent pairs
  //   aload_0, aload_1
  //   aload_0, iload_1
  // These bytecodes with a small amount of code are most profitable
  // to rewrite
  if (RewriteFrequentPairs && rc == may_rewrite) {
    Label rewrite, done;

    const Register bc = LP64_ONLY(c_rarg3) NOT_LP64(rcx);
    LP64_ONLY(assert(rbx != bc, "register damaged"));

    // get next byte
    __ load_unsigned_byte(rbx, at_bcp(Bytecodes::length_for(Bytecodes::_aload_0)));

    // if _getfield then wait with rewrite
    __ cmpl(rbx, Bytecodes::_getfield);
    __ jcc(Assembler::equal, done);

    // if _igetfield then rewrite to _fast_iaccess_0
    assert(Bytecodes::java_code(Bytecodes::_fast_iaccess_0) == Bytecodes::_aload_0, "fix bytecode definition");
    __ cmpl(rbx, Bytecodes::_fast_igetfield);
    __ movl(bc, Bytecodes::_fast_iaccess_0);
    __ jccb(Assembler::equal, rewrite);

    // if _agetfield then rewrite to _fast_aaccess_0
    assert(Bytecodes::java_code(Bytecodes::_fast_aaccess_0) == Bytecodes::_aload_0, "fix bytecode definition");
    __ cmpl(rbx, Bytecodes::_fast_agetfield);
    __ movl(bc, Bytecodes::_fast_aaccess_0);
    __ jccb(Assembler::equal, rewrite);

    // if _fgetfield then rewrite to _fast_faccess_0
    assert(Bytecodes::java_code(Bytecodes::_fast_faccess_0) == Bytecodes::_aload_0, "fix bytecode definition");
    __ cmpl(rbx, Bytecodes::_fast_fgetfield);
    __ movl(bc, Bytecodes::_fast_faccess_0);
    __ jccb(Assembler::equal, rewrite);

    // else rewrite to _fast_aload0
    assert(Bytecodes::java_code(Bytecodes::_fast_aload_0) == Bytecodes::_aload_0, "fix bytecode definition");
    __ movl(bc, Bytecodes::_fast_aload_0);

    // rewrite
    // bc: fast bytecode
    __ bind(rewrite);
    patch_bytecode(Bytecodes::_aload_0, bc, rbx, false);

    __ bind(done);
  }

  // Do actual aload_0 (must do this after patch_bytecode which might call VM and GC might change oop).
  aload(0);
}

void TemplateTable::istore() {
  transition(itos, vtos);
  locals_index(rbx);
  __ movl(iaddress(rbx), rax);
}

void TemplateTable::lstore() {
  transition(ltos, vtos);
  locals_index(rbx);
  __ movptr(laddress(rbx), rax);
  NOT_LP64(__ movptr(haddress(rbx), rdx));
}

void TemplateTable::fstore() {
  transition(ftos, vtos);
  locals_index(rbx);
  __ store_float(faddress(rbx));
}

void TemplateTable::dstore() {
  transition(dtos, vtos);
  locals_index(rbx);
  __ store_double(daddress(rbx));
}

void TemplateTable::astore() {
  transition(vtos, vtos);
  __ pop_ptr(rax);
  locals_index(rbx);
  __ movptr(aaddress(rbx), rax);
}

void TemplateTable::wide_istore() {
  transition(vtos, vtos);
  __ pop_i();
  locals_index_wide(rbx);
  __ movl(iaddress(rbx), rax);
}

void TemplateTable::wide_lstore() {
  transition(vtos, vtos);
  NOT_LP64(__ pop_l(rax, rdx));
  LP64_ONLY(__ pop_l());
  locals_index_wide(rbx);
  __ movptr(laddress(rbx), rax);
  NOT_LP64(__ movl(haddress(rbx), rdx));
}

void TemplateTable::wide_fstore() {
#ifdef _LP64
  transition(vtos, vtos);
  __ pop_f(xmm0);
  locals_index_wide(rbx);
  __ movflt(faddress(rbx), xmm0);
#else
  wide_istore();
#endif
}

void TemplateTable::wide_dstore() {
#ifdef _LP64
  transition(vtos, vtos);
  __ pop_d(xmm0);
  locals_index_wide(rbx);
  __ movdbl(daddress(rbx), xmm0);
#else
  wide_lstore();
#endif
}

void TemplateTable::wide_astore() {
  transition(vtos, vtos);
  __ pop_ptr(rax);
  locals_index_wide(rbx);
  __ movptr(aaddress(rbx), rax);
}

void TemplateTable::iastore() {
  transition(itos, vtos);
  __ pop_i(rbx);
  // rax: value
  // rbx: index
  // rdx: array
  index_check(rdx, rbx); // prefer index in rbx
  __ access_store_at(T_INT, IN_HEAP | IS_ARRAY,
                     Address(rdx, rbx, Address::times_4,
                             arrayOopDesc::base_offset_in_bytes(T_INT)),
                     rax, noreg, noreg, noreg);
}

void TemplateTable::lastore() {
  transition(ltos, vtos);
  __ pop_i(rbx);
  // rax,: low(value)
  // rcx: array
  // rdx: high(value)
  index_check(rcx, rbx);  // prefer index in rbx,
  // rbx,: index
  __ access_store_at(T_LONG, IN_HEAP | IS_ARRAY,
                     Address(rcx, rbx, Address::times_8,
                             arrayOopDesc::base_offset_in_bytes(T_LONG)),
                     noreg /* ltos */, noreg, noreg, noreg);
}

void TemplateTable::fastore() {
  transition(ftos, vtos);
  __ pop_i(rbx);
  // value is in UseSSE >= 1 ? xmm0 : ST(0)
  // rbx:  index
  // rdx:  array
  index_check(rdx, rbx); // prefer index in rbx
  __ access_store_at(T_FLOAT, IN_HEAP | IS_ARRAY,
                     Address(rdx, rbx, Address::times_4,
                             arrayOopDesc::base_offset_in_bytes(T_FLOAT)),
                     noreg /* ftos */, noreg, noreg, noreg);
}

void TemplateTable::dastore() {
  transition(dtos, vtos);
  __ pop_i(rbx);
  // value is in UseSSE >= 2 ? xmm0 : ST(0)
  // rbx:  index
  // rdx:  array
  index_check(rdx, rbx); // prefer index in rbx
  __ access_store_at(T_DOUBLE, IN_HEAP | IS_ARRAY,
                     Address(rdx, rbx, Address::times_8,
                             arrayOopDesc::base_offset_in_bytes(T_DOUBLE)),
                     noreg /* dtos */, noreg, noreg, noreg);
}

void TemplateTable::aastore() {
  Label is_null, ok_is_subtype, done;
  transition(vtos, vtos);
  // stack: ..., array, index, value
  __ movptr(rax, at_tos());    // value
  __ movl(rcx, at_tos_p1()); // index
  __ movptr(rdx, at_tos_p2()); // array

  Address element_address(rdx, rcx,
                          UseCompressedOops? Address::times_4 : Address::times_ptr,
                          arrayOopDesc::base_offset_in_bytes(T_OBJECT));

  index_check_without_pop(rdx, rcx);     // kills rbx
  __ testptr(rax, rax);
  __ jcc(Assembler::zero, is_null);

  // Move subklass into rbx
  __ load_klass(rbx, rax, rscratch1);
  // Move superklass into rax
  __ load_klass(rax, rdx, rscratch1);
  __ movptr(rax, Address(rax,
                         ObjArrayKlass::element_klass_offset()));

  // Generate subtype check.  Blows rcx, rdi
  // Superklass in rax.  Subklass in rbx.
  __ gen_subtype_check(rbx, ok_is_subtype);

  // Come here on failure
  // object is at TOS
  __ jump(ExternalAddress(Interpreter::_throw_ArrayStoreException_entry));

  // Come here on success
  __ bind(ok_is_subtype);

  // Get the value we will store
  __ movptr(rax, at_tos());
  __ movl(rcx, at_tos_p1()); // index
  // Now store using the appropriate barrier
  do_oop_store(_masm, element_address, rax, IS_ARRAY);
  __ jmp(done);

  // Have a NULL in rax, rdx=array, ecx=index.  Store NULL at ary[idx]
  __ bind(is_null);
  __ profile_null_seen(rbx);

  // Store a NULL
  do_oop_store(_masm, element_address, noreg, IS_ARRAY);

  // Pop stack arguments
  __ bind(done);
  __ addptr(rsp, 3 * Interpreter::stackElementSize);
}

void TemplateTable::bastore() {
  transition(itos, vtos);
  __ pop_i(rbx);
  // rax: value
  // rbx: index
  // rdx: array
  index_check(rdx, rbx); // prefer index in rbx
  // Need to check whether array is boolean or byte
  // since both types share the bastore bytecode.
  __ load_klass(rcx, rdx, rscratch1);
  __ movl(rcx, Address(rcx, Klass::layout_helper_offset()));
  int diffbit = Klass::layout_helper_boolean_diffbit();
  __ testl(rcx, diffbit);
  Label L_skip;
  __ jccb(Assembler::zero, L_skip);
  __ andl(rax, 1);  // if it is a T_BOOLEAN array, mask the stored value to 0/1
  __ bind(L_skip);
  __ access_store_at(T_BYTE, IN_HEAP | IS_ARRAY,
                     Address(rdx, rbx,Address::times_1,
                             arrayOopDesc::base_offset_in_bytes(T_BYTE)),
                     rax, noreg, noreg, noreg);
}

void TemplateTable::castore() {
  transition(itos, vtos);
  __ pop_i(rbx);
  // rax: value
  // rbx: index
  // rdx: array
  index_check(rdx, rbx);  // prefer index in rbx
  __ access_store_at(T_CHAR, IN_HEAP | IS_ARRAY,
                     Address(rdx, rbx, Address::times_2,
                             arrayOopDesc::base_offset_in_bytes(T_CHAR)),
                     rax, noreg, noreg, noreg);
}

void TemplateTable::sastore() {
  castore();
}

void TemplateTable::istore(int n) {
  transition(itos, vtos);
  __ movl(iaddress(n), rax);
}

void TemplateTable::lstore(int n) {
  transition(ltos, vtos);
  __ movptr(laddress(n), rax);
  NOT_LP64(__ movptr(haddress(n), rdx));
}

void TemplateTable::fstore(int n) {
  transition(ftos, vtos);
  __ store_float(faddress(n));
}

void TemplateTable::dstore(int n) {
  transition(dtos, vtos);
  __ store_double(daddress(n));
}

void TemplateTable::astore(int n) {
  transition(vtos, vtos);
  __ pop_ptr(rax);
  __ movptr(aaddress(n), rax);
}

void TemplateTable::pop() {
  transition(vtos, vtos);
  __ addptr(rsp, Interpreter::stackElementSize);
}

void TemplateTable::pop2() {
  transition(vtos, vtos);
  __ addptr(rsp, 2 * Interpreter::stackElementSize);
}

void TemplateTable::dup() {
  transition(vtos, vtos);
  __ load_ptr(0, rax);
  __ push_ptr(rax);
  // stack: ..., a, a
}

void TemplateTable::dup_x1() {
  transition(vtos, vtos);
  // stack: ..., a, b
  __ load_ptr( 0, rax);  // load b
  __ load_ptr( 1, rcx);  // load a
  __ store_ptr(1, rax);  // store b
  __ store_ptr(0, rcx);  // store a
  __ push_ptr(rax);      // push b
  // stack: ..., b, a, b
}

void TemplateTable::dup_x2() {
  transition(vtos, vtos);
  // stack: ..., a, b, c
  __ load_ptr( 0, rax);  // load c
  __ load_ptr( 2, rcx);  // load a
  __ store_ptr(2, rax);  // store c in a
  __ push_ptr(rax);      // push c
  // stack: ..., c, b, c, c
  __ load_ptr( 2, rax);  // load b
  __ store_ptr(2, rcx);  // store a in b
  // stack: ..., c, a, c, c
  __ store_ptr(1, rax);  // store b in c
  // stack: ..., c, a, b, c
}

void TemplateTable::dup2() {
  transition(vtos, vtos);
  // stack: ..., a, b
  __ load_ptr(1, rax);  // load a
  __ push_ptr(rax);     // push a
  __ load_ptr(1, rax);  // load b
  __ push_ptr(rax);     // push b
  // stack: ..., a, b, a, b
}

void TemplateTable::dup2_x1() {
  transition(vtos, vtos);
  // stack: ..., a, b, c
  __ load_ptr( 0, rcx);  // load c
  __ load_ptr( 1, rax);  // load b
  __ push_ptr(rax);      // push b
  __ push_ptr(rcx);      // push c
  // stack: ..., a, b, c, b, c
  __ store_ptr(3, rcx);  // store c in b
  // stack: ..., a, c, c, b, c
  __ load_ptr( 4, rcx);  // load a
  __ store_ptr(2, rcx);  // store a in 2nd c
  // stack: ..., a, c, a, b, c
  __ store_ptr(4, rax);  // store b in a
  // stack: ..., b, c, a, b, c
}

void TemplateTable::dup2_x2() {
  transition(vtos, vtos);
  // stack: ..., a, b, c, d
  __ load_ptr( 0, rcx);  // load d
  __ load_ptr( 1, rax);  // load c
  __ push_ptr(rax);      // push c
  __ push_ptr(rcx);      // push d
  // stack: ..., a, b, c, d, c, d
  __ load_ptr( 4, rax);  // load b
  __ store_ptr(2, rax);  // store b in d
  __ store_ptr(4, rcx);  // store d in b
  // stack: ..., a, d, c, b, c, d
  __ load_ptr( 5, rcx);  // load a
  __ load_ptr( 3, rax);  // load c
  __ store_ptr(3, rcx);  // store a in c
  __ store_ptr(5, rax);  // store c in a
  // stack: ..., c, d, a, b, c, d
}

void TemplateTable::swap() {
  transition(vtos, vtos);
  // stack: ..., a, b
  __ load_ptr( 1, rcx);  // load a
  __ load_ptr( 0, rax);  // load b
  __ store_ptr(0, rcx);  // store a in b
  __ store_ptr(1, rax);  // store b in a
  // stack: ..., b, a
}

void TemplateTable::iop2(Operation op) {
  transition(itos, itos);
  switch (op) {
  case add  :                    __ pop_i(rdx); __ addl (rax, rdx); break;
  case sub  : __ movl(rdx, rax); __ pop_i(rax); __ subl (rax, rdx); break;
  case mul  :                    __ pop_i(rdx); __ imull(rax, rdx); break;
  case _and :                    __ pop_i(rdx); __ andl (rax, rdx); break;
  case _or  :                    __ pop_i(rdx); __ orl  (rax, rdx); break;
  case _xor :                    __ pop_i(rdx); __ xorl (rax, rdx); break;
  case shl  : __ movl(rcx, rax); __ pop_i(rax); __ shll (rax);      break;
  case shr  : __ movl(rcx, rax); __ pop_i(rax); __ sarl (rax);      break;
  case ushr : __ movl(rcx, rax); __ pop_i(rax); __ shrl (rax);      break;
  default   : ShouldNotReachHere();
  }
}

void TemplateTable::lop2(Operation op) {
  transition(ltos, ltos);
#ifdef _LP64
  switch (op) {
  case add  :                    __ pop_l(rdx); __ addptr(rax, rdx); break;
  case sub  : __ mov(rdx, rax);  __ pop_l(rax); __ subptr(rax, rdx); break;
  case _and :                    __ pop_l(rdx); __ andptr(rax, rdx); break;
  case _or  :                    __ pop_l(rdx); __ orptr (rax, rdx); break;
  case _xor :                    __ pop_l(rdx); __ xorptr(rax, rdx); break;
  default   : ShouldNotReachHere();
  }
#else
  __ pop_l(rbx, rcx);
  switch (op) {
    case add  : __ addl(rax, rbx); __ adcl(rdx, rcx); break;
    case sub  : __ subl(rbx, rax); __ sbbl(rcx, rdx);
                __ mov (rax, rbx); __ mov (rdx, rcx); break;
    case _and : __ andl(rax, rbx); __ andl(rdx, rcx); break;
    case _or  : __ orl (rax, rbx); __ orl (rdx, rcx); break;
    case _xor : __ xorl(rax, rbx); __ xorl(rdx, rcx); break;
    default   : ShouldNotReachHere();
  }
#endif
}

void TemplateTable::idiv() {
  transition(itos, itos);
  __ movl(rcx, rax);
  __ pop_i(rax);
  // Note: could xor rax and ecx and compare with (-1 ^ min_int). If
  //       they are not equal, one could do a normal division (no correction
  //       needed), which may speed up this implementation for the common case.
  //       (see also JVM spec., p.243 & p.271)
  __ corrected_idivl(rcx);
}

void TemplateTable::irem() {
  transition(itos, itos);
  __ movl(rcx, rax);
  __ pop_i(rax);
  // Note: could xor rax and ecx and compare with (-1 ^ min_int). If
  //       they are not equal, one could do a normal division (no correction
  //       needed), which may speed up this implementation for the common case.
  //       (see also JVM spec., p.243 & p.271)
  __ corrected_idivl(rcx);
  __ movl(rax, rdx);
}

void TemplateTable::lmul() {
  transition(ltos, ltos);
#ifdef _LP64
  __ pop_l(rdx);
  __ imulq(rax, rdx);
#else
  __ pop_l(rbx, rcx);
  __ push(rcx); __ push(rbx);
  __ push(rdx); __ push(rax);
  __ lmul(2 * wordSize, 0);
  __ addptr(rsp, 4 * wordSize);  // take off temporaries
#endif
}

void TemplateTable::ldiv() {
  transition(ltos, ltos);
#ifdef _LP64
  __ mov(rcx, rax);
  __ pop_l(rax);
  // generate explicit div0 check
  __ testq(rcx, rcx);
  __ jump_cc(Assembler::zero,
             ExternalAddress(Interpreter::_throw_ArithmeticException_entry));
  // Note: could xor rax and rcx and compare with (-1 ^ min_int). If
  //       they are not equal, one could do a normal division (no correction
  //       needed), which may speed up this implementation for the common case.
  //       (see also JVM spec., p.243 & p.271)
  __ corrected_idivq(rcx); // kills rbx
#else
  __ pop_l(rbx, rcx);
  __ push(rcx); __ push(rbx);
  __ push(rdx); __ push(rax);
  // check if y = 0
  __ orl(rax, rdx);
  __ jump_cc(Assembler::zero,
             ExternalAddress(Interpreter::_throw_ArithmeticException_entry));
  __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::ldiv));
  __ addptr(rsp, 4 * wordSize);  // take off temporaries
#endif
}

void TemplateTable::lrem() {
  transition(ltos, ltos);
#ifdef _LP64
  __ mov(rcx, rax);
  __ pop_l(rax);
  __ testq(rcx, rcx);
  __ jump_cc(Assembler::zero,
             ExternalAddress(Interpreter::_throw_ArithmeticException_entry));
  // Note: could xor rax and rcx and compare with (-1 ^ min_int). If
  //       they are not equal, one could do a normal division (no correction
  //       needed), which may speed up this implementation for the common case.
  //       (see also JVM spec., p.243 & p.271)
  __ corrected_idivq(rcx); // kills rbx
  __ mov(rax, rdx);
#else
  __ pop_l(rbx, rcx);
  __ push(rcx); __ push(rbx);
  __ push(rdx); __ push(rax);
  // check if y = 0
  __ orl(rax, rdx);
  __ jump_cc(Assembler::zero,
             ExternalAddress(Interpreter::_throw_ArithmeticException_entry));
  __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::lrem));
  __ addptr(rsp, 4 * wordSize);
#endif
}

void TemplateTable::lshl() {
  transition(itos, ltos);
  __ movl(rcx, rax);                             // get shift count
  #ifdef _LP64
  __ pop_l(rax);                                 // get shift value
  __ shlq(rax);
#else
  __ pop_l(rax, rdx);                            // get shift value
  __ lshl(rdx, rax);
#endif
}

void TemplateTable::lshr() {
#ifdef _LP64
  transition(itos, ltos);
  __ movl(rcx, rax);                             // get shift count
  __ pop_l(rax);                                 // get shift value
  __ sarq(rax);
#else
  transition(itos, ltos);
  __ mov(rcx, rax);                              // get shift count
  __ pop_l(rax, rdx);                            // get shift value
  __ lshr(rdx, rax, true);
#endif
}

void TemplateTable::lushr() {
  transition(itos, ltos);
#ifdef _LP64
  __ movl(rcx, rax);                             // get shift count
  __ pop_l(rax);                                 // get shift value
  __ shrq(rax);
#else
  __ mov(rcx, rax);                              // get shift count
  __ pop_l(rax, rdx);                            // get shift value
  __ lshr(rdx, rax);
#endif
}

void TemplateTable::fop2(Operation op) {
  transition(ftos, ftos);

  if (UseSSE >= 1) {
    switch (op) {
    case add:
      __ addss(xmm0, at_rsp());
      __ addptr(rsp, Interpreter::stackElementSize);
      break;
    case sub:
      __ movflt(xmm1, xmm0);
      __ pop_f(xmm0);
      __ subss(xmm0, xmm1);
      break;
    case mul:
      __ mulss(xmm0, at_rsp());
      __ addptr(rsp, Interpreter::stackElementSize);
      break;
    case div:
      __ movflt(xmm1, xmm0);
      __ pop_f(xmm0);
      __ divss(xmm0, xmm1);
      break;
    case rem:
      // On x86_64 platforms the SharedRuntime::frem method is called to perform the
      // modulo operation. The frem method calls the function
      // double fmod(double x, double y) in math.h. The documentation of fmod states:
      // "If x or y is a NaN, a NaN is returned." without specifying what type of NaN
      // (signalling or quiet) is returned.
      //
      // On x86_32 platforms the FPU is used to perform the modulo operation. The
      // reason is that on 32-bit Windows the sign of modulo operations diverges from
      // what is considered the standard (e.g., -0.0f % -3.14f is 0.0f (and not -0.0f).
      // The fprem instruction used on x86_32 is functionally equivalent to
      // SharedRuntime::frem in that it returns a NaN.
#ifdef _LP64
      __ movflt(xmm1, xmm0);
      __ pop_f(xmm0);
      __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::frem), 2);
#else // !_LP64
      __ push_f(xmm0);
      __ pop_f();
      __ fld_s(at_rsp());
      __ fremr(rax);
      __ f2ieee();
      __ pop(rax);  // pop second operand off the stack
      __ push_f();
      __ pop_f(xmm0);
#endif // _LP64
      break;
    default:
      ShouldNotReachHere();
      break;
    }
  } else {
#ifdef _LP64
    ShouldNotReachHere();
#else // !_LP64
    switch (op) {
    case add: __ fadd_s (at_rsp());                break;
    case sub: __ fsubr_s(at_rsp());                break;
    case mul: __ fmul_s (at_rsp());                break;
    case div: __ fdivr_s(at_rsp());                break;
    case rem: __ fld_s  (at_rsp()); __ fremr(rax); break;
    default : ShouldNotReachHere();
    }
    __ f2ieee();
    __ pop(rax);  // pop second operand off the stack
#endif // _LP64
  }
}

void TemplateTable::dop2(Operation op) {
  transition(dtos, dtos);
  if (UseSSE >= 2) {
    switch (op) {
    case add:
      __ addsd(xmm0, at_rsp());
      __ addptr(rsp, 2 * Interpreter::stackElementSize);
      break;
    case sub:
      __ movdbl(xmm1, xmm0);
      __ pop_d(xmm0);
      __ subsd(xmm0, xmm1);
      break;
    case mul:
      __ mulsd(xmm0, at_rsp());
      __ addptr(rsp, 2 * Interpreter::stackElementSize);
      break;
    case div:
      __ movdbl(xmm1, xmm0);
      __ pop_d(xmm0);
      __ divsd(xmm0, xmm1);
      break;
    case rem:
      // Similar to fop2(), the modulo operation is performed using the
      // SharedRuntime::drem method (on x86_64 platforms) or using the
      // FPU (on x86_32 platforms) for the same reasons as mentioned in fop2().
#ifdef _LP64
      __ movdbl(xmm1, xmm0);
      __ pop_d(xmm0);
      __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::drem), 2);
#else // !_LP64
      __ push_d(xmm0);
      __ pop_d();
      __ fld_d(at_rsp());
      __ fremr(rax);
      __ d2ieee();
      __ pop(rax);
      __ pop(rdx);
      __ push_d();
      __ pop_d(xmm0);
#endif // _LP64
      break;
    default:
      ShouldNotReachHere();
      break;
    }
  } else {
#ifdef _LP64
    ShouldNotReachHere();
#else // !_LP64
    switch (op) {
    case add: __ fadd_d (at_rsp());                break;
    case sub: __ fsubr_d(at_rsp());                break;
    case mul: {
      // strict semantics
      __ fld_x(ExternalAddress(StubRoutines::x86::addr_fpu_subnormal_bias1()));
      __ fmulp();
      __ fmul_d (at_rsp());
      __ fld_x(ExternalAddress(StubRoutines::x86::addr_fpu_subnormal_bias2()));
      __ fmulp();
      break;
    }
    case div: {
      // strict semantics
      __ fld_x(ExternalAddress(StubRoutines::x86::addr_fpu_subnormal_bias1()));
      __ fmul_d (at_rsp());
      __ fdivrp();
      __ fld_x(ExternalAddress(StubRoutines::x86::addr_fpu_subnormal_bias2()));
      __ fmulp();
      break;
    }
    case rem: __ fld_d  (at_rsp()); __ fremr(rax); break;
    default : ShouldNotReachHere();
    }
    __ d2ieee();
    // Pop double precision number from rsp.
    __ pop(rax);
    __ pop(rdx);
#endif // _LP64
  }
}

void TemplateTable::ineg() {
  transition(itos, itos);
  __ negl(rax);
}

void TemplateTable::lneg() {
  transition(ltos, ltos);
  LP64_ONLY(__ negq(rax));
  NOT_LP64(__ lneg(rdx, rax));
}

// Note: 'double' and 'long long' have 32-bits alignment on x86.
static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
  // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
  // of 128-bits operands for SSE instructions.
  jlong *operand = (jlong*)(((intptr_t)adr)&((intptr_t)(~0xF)));
  // Store the value to a 128-bits operand.
  operand[0] = lo;
  operand[1] = hi;
  return operand;
}

// Buffer for 128-bits masks used by SSE instructions.
static jlong float_signflip_pool[2*2];
static jlong double_signflip_pool[2*2];

void TemplateTable::fneg() {
  transition(ftos, ftos);
  if (UseSSE >= 1) {
    static jlong *float_signflip  = double_quadword(&float_signflip_pool[1],  CONST64(0x8000000080000000),  CONST64(0x8000000080000000));
    __ xorps(xmm0, ExternalAddress((address) float_signflip), rscratch1);
  } else {
    LP64_ONLY(ShouldNotReachHere());
    NOT_LP64(__ fchs());
  }
}

void TemplateTable::dneg() {
  transition(dtos, dtos);
  if (UseSSE >= 2) {
    static jlong *double_signflip =
      double_quadword(&double_signflip_pool[1], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
    __ xorpd(xmm0, ExternalAddress((address) double_signflip), rscratch1);
  } else {
#ifdef _LP64
    ShouldNotReachHere();
#else
    __ fchs();
#endif
  }
}

void TemplateTable::iinc() {
  transition(vtos, vtos);
  __ load_signed_byte(rdx, at_bcp(2)); // get constant
  locals_index(rbx);
  __ addl(iaddress(rbx), rdx);
}

void TemplateTable::wide_iinc() {
  transition(vtos, vtos);
  __ movl(rdx, at_bcp(4)); // get constant
  locals_index_wide(rbx);
  __ bswapl(rdx); // swap bytes & sign-extend constant
  __ sarl(rdx, 16);
  __ addl(iaddress(rbx), rdx);
  // Note: should probably use only one movl to get both
  //       the index and the constant -> fix this
}

void TemplateTable::convert() {
#ifdef _LP64
  // Checking
#ifdef ASSERT
  {
    TosState tos_in  = ilgl;
    TosState tos_out = ilgl;
    switch (bytecode()) {
    case Bytecodes::_i2l: // fall through
    case Bytecodes::_i2f: // fall through
    case Bytecodes::_i2d: // fall through
    case Bytecodes::_i2b: // fall through
    case Bytecodes::_i2c: // fall through
    case Bytecodes::_i2s: tos_in = itos; break;
    case Bytecodes::_l2i: // fall through
    case Bytecodes::_l2f: // fall through
    case Bytecodes::_l2d: tos_in = ltos; break;
    case Bytecodes::_f2i: // fall through
    case Bytecodes::_f2l: // fall through
    case Bytecodes::_f2d: tos_in = ftos; break;
    case Bytecodes::_d2i: // fall through
    case Bytecodes::_d2l: // fall through
    case Bytecodes::_d2f: tos_in = dtos; break;
    default             : ShouldNotReachHere();
    }
    switch (bytecode()) {
    case Bytecodes::_l2i: // fall through
    case Bytecodes::_f2i: // fall through
    case Bytecodes::_d2i: // fall through
    case Bytecodes::_i2b: // fall through
    case Bytecodes::_i2c: // fall through
    case Bytecodes::_i2s: tos_out = itos; break;
    case Bytecodes::_i2l: // fall through
    case Bytecodes::_f2l: // fall through
    case Bytecodes::_d2l: tos_out = ltos; break;
    case Bytecodes::_i2f: // fall through
    case Bytecodes::_l2f: // fall through
    case Bytecodes::_d2f: tos_out = ftos; break;
    case Bytecodes::_i2d: // fall through
    case Bytecodes::_l2d: // fall through
    case Bytecodes::_f2d: tos_out = dtos; break;
    default             : ShouldNotReachHere();
    }
    transition(tos_in, tos_out);
  }
#endif // ASSERT

  static const int64_t is_nan = 0x8000000000000000L;

  // Conversion
  switch (bytecode()) {
  case Bytecodes::_i2l:
    __ movslq(rax, rax);
    break;
  case Bytecodes::_i2f:
    __ cvtsi2ssl(xmm0, rax);
    break;
  case Bytecodes::_i2d:
    __ cvtsi2sdl(xmm0, rax);
    break;
  case Bytecodes::_i2b:
    __ movsbl(rax, rax);
    break;
  case Bytecodes::_i2c:
    __ movzwl(rax, rax);
    break;
  case Bytecodes::_i2s:
    __ movswl(rax, rax);
    break;
  case Bytecodes::_l2i:
    __ movl(rax, rax);
    break;
  case Bytecodes::_l2f:
    __ cvtsi2ssq(xmm0, rax);
    break;
  case Bytecodes::_l2d:
    __ cvtsi2sdq(xmm0, rax);
    break;
  case Bytecodes::_f2i:
  {
    Label L;
    __ cvttss2sil(rax, xmm0);
    __ cmpl(rax, 0x80000000); // NaN or overflow/underflow?
    __ jcc(Assembler::notEqual, L);
    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2i), 1);
    __ bind(L);
  }
    break;
  case Bytecodes::_f2l:
  {
    Label L;
    __ cvttss2siq(rax, xmm0);
    // NaN or overflow/underflow?
    __ cmp64(rax, ExternalAddress((address) &is_nan), rscratch1);
    __ jcc(Assembler::notEqual, L);
    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2l), 1);
    __ bind(L);
  }
    break;
  case Bytecodes::_f2d:
    __ cvtss2sd(xmm0, xmm0);
    break;
  case Bytecodes::_d2i:
  {
    Label L;
    __ cvttsd2sil(rax, xmm0);
    __ cmpl(rax, 0x80000000); // NaN or overflow/underflow?
    __ jcc(Assembler::notEqual, L);
    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2i), 1);
    __ bind(L);
  }
    break;
  case Bytecodes::_d2l:
  {
    Label L;
    __ cvttsd2siq(rax, xmm0);
    // NaN or overflow/underflow?
    __ cmp64(rax, ExternalAddress((address) &is_nan), rscratch1);
    __ jcc(Assembler::notEqual, L);
    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2l), 1);
    __ bind(L);
  }
    break;
  case Bytecodes::_d2f:
    __ cvtsd2ss(xmm0, xmm0);
    break;
  default:
    ShouldNotReachHere();
  }
#else // !_LP64
  // Checking
#ifdef ASSERT
  { TosState tos_in  = ilgl;
    TosState tos_out = ilgl;
    switch (bytecode()) {
      case Bytecodes::_i2l: // fall through
      case Bytecodes::_i2f: // fall through
      case Bytecodes::_i2d: // fall through
      case Bytecodes::_i2b: // fall through
      case Bytecodes::_i2c: // fall through
      case Bytecodes::_i2s: tos_in = itos; break;
      case Bytecodes::_l2i: // fall through
      case Bytecodes::_l2f: // fall through
      case Bytecodes::_l2d: tos_in = ltos; break;
      case Bytecodes::_f2i: // fall through
      case Bytecodes::_f2l: // fall through
      case Bytecodes::_f2d: tos_in = ftos; break;
      case Bytecodes::_d2i: // fall through
      case Bytecodes::_d2l: // fall through
      case Bytecodes::_d2f: tos_in = dtos; break;
      default             : ShouldNotReachHere();
    }
    switch (bytecode()) {
      case Bytecodes::_l2i: // fall through
      case Bytecodes::_f2i: // fall through
      case Bytecodes::_d2i: // fall through
      case Bytecodes::_i2b: // fall through
      case Bytecodes::_i2c: // fall through
      case Bytecodes::_i2s: tos_out = itos; break;
      case Bytecodes::_i2l: // fall through
      case Bytecodes::_f2l: // fall through
      case Bytecodes::_d2l: tos_out = ltos; break;
      case Bytecodes::_i2f: // fall through
      case Bytecodes::_l2f: // fall through
      case Bytecodes::_d2f: tos_out = ftos; break;
      case Bytecodes::_i2d: // fall through
      case Bytecodes::_l2d: // fall through
      case Bytecodes::_f2d: tos_out = dtos; break;
      default             : ShouldNotReachHere();
    }
    transition(tos_in, tos_out);
  }
#endif // ASSERT

  // Conversion
  // (Note: use push(rcx)/pop(rcx) for 1/2-word stack-ptr manipulation)
  switch (bytecode()) {
    case Bytecodes::_i2l:
      __ extend_sign(rdx, rax);
      break;
    case Bytecodes::_i2f:
      if (UseSSE >= 1) {
        __ cvtsi2ssl(xmm0, rax);
      } else {
        __ push(rax);          // store int on tos
        __ fild_s(at_rsp());   // load int to ST0
        __ f2ieee();           // truncate to float size
        __ pop(rcx);           // adjust rsp
      }
      break;
    case Bytecodes::_i2d:
      if (UseSSE >= 2) {
        __ cvtsi2sdl(xmm0, rax);
      } else {
      __ push(rax);          // add one slot for d2ieee()
      __ push(rax);          // store int on tos
      __ fild_s(at_rsp());   // load int to ST0
      __ d2ieee();           // truncate to double size
      __ pop(rcx);           // adjust rsp
      __ pop(rcx);
      }
      break;
    case Bytecodes::_i2b:
      __ shll(rax, 24);      // truncate upper 24 bits
      __ sarl(rax, 24);      // and sign-extend byte
      LP64_ONLY(__ movsbl(rax, rax));
      break;
    case Bytecodes::_i2c:
      __ andl(rax, 0xFFFF);  // truncate upper 16 bits
      LP64_ONLY(__ movzwl(rax, rax));
      break;
    case Bytecodes::_i2s:
      __ shll(rax, 16);      // truncate upper 16 bits
      __ sarl(rax, 16);      // and sign-extend short
      LP64_ONLY(__ movswl(rax, rax));
      break;
    case Bytecodes::_l2i:
      /* nothing to do */
      break;
    case Bytecodes::_l2f:
      // On 64-bit platforms, the cvtsi2ssq instruction is used to convert
      // 64-bit long values to floats. On 32-bit platforms it is not possible
      // to use that instruction with 64-bit operands, therefore the FPU is
      // used to perform the conversion.
      __ push(rdx);          // store long on tos
      __ push(rax);
      __ fild_d(at_rsp());   // load long to ST0
      __ f2ieee();           // truncate to float size
      __ pop(rcx);           // adjust rsp
      __ pop(rcx);
      if (UseSSE >= 1) {
        __ push_f();
        __ pop_f(xmm0);
      }
      break;
    case Bytecodes::_l2d:
      // On 32-bit platforms the FPU is used for conversion because on
      // 32-bit platforms it is not not possible to use the cvtsi2sdq
      // instruction with 64-bit operands.
      __ push(rdx);          // store long on tos
      __ push(rax);
      __ fild_d(at_rsp());   // load long to ST0
      __ d2ieee();           // truncate to double size
      __ pop(rcx);           // adjust rsp
      __ pop(rcx);
      if (UseSSE >= 2) {
        __ push_d();
        __ pop_d(xmm0);
      }
      break;
    case Bytecodes::_f2i:
      // SharedRuntime::f2i does not differentiate between sNaNs and qNaNs
      // as it returns 0 for any NaN.
      if (UseSSE >= 1) {
        __ push_f(xmm0);
      } else {
        __ push(rcx);          // reserve space for argument
        __ fstp_s(at_rsp());   // pass float argument on stack
      }
      __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2i), 1);
      break;
    case Bytecodes::_f2l:
      // SharedRuntime::f2l does not differentiate between sNaNs and qNaNs
      // as it returns 0 for any NaN.
      if (UseSSE >= 1) {
       __ push_f(xmm0);
      } else {
        __ push(rcx);          // reserve space for argument
        __ fstp_s(at_rsp());   // pass float argument on stack
      }
      __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2l), 1);
      break;
    case Bytecodes::_f2d:
      if (UseSSE < 1) {
        /* nothing to do */
      } else if (UseSSE == 1) {
        __ push_f(xmm0);
        __ pop_f();
      } else { // UseSSE >= 2
        __ cvtss2sd(xmm0, xmm0);
      }
      break;
    case Bytecodes::_d2i:
      if (UseSSE >= 2) {
        __ push_d(xmm0);
      } else {
        __ push(rcx);          // reserve space for argument
        __ push(rcx);
        __ fstp_d(at_rsp());   // pass double argument on stack
      }
      __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2i), 2);
      break;
    case Bytecodes::_d2l:
      if (UseSSE >= 2) {
        __ push_d(xmm0);
      } else {
        __ push(rcx);          // reserve space for argument
        __ push(rcx);
        __ fstp_d(at_rsp());   // pass double argument on stack
      }
      __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2l), 2);
      break;
    case Bytecodes::_d2f:
      if (UseSSE <= 1) {
        __ push(rcx);          // reserve space for f2ieee()
        __ f2ieee();           // truncate to float size
        __ pop(rcx);           // adjust rsp
        if (UseSSE == 1) {
          // The cvtsd2ss instruction is not available if UseSSE==1, therefore
          // the conversion is performed using the FPU in this case.
          __ push_f();
          __ pop_f(xmm0);
        }
      } else { // UseSSE >= 2
        __ cvtsd2ss(xmm0, xmm0);
      }
      break;
    default             :
      ShouldNotReachHere();
  }
#endif // _LP64
}

void TemplateTable::lcmp() {
  transition(ltos, itos);
#ifdef _LP64
  Label done;
  __ pop_l(rdx);
  __ cmpq(rdx, rax);
  __ movl(rax, -1);
  __ jccb(Assembler::less, done);
  __ setb(Assembler::notEqual, rax);
  __ movzbl(rax, rax);
  __ bind(done);
#else

  // y = rdx:rax
  __ pop_l(rbx, rcx);             // get x = rcx:rbx
  __ lcmp2int(rcx, rbx, rdx, rax);// rcx := cmp(x, y)
  __ mov(rax, rcx);
#endif
}

void TemplateTable::float_cmp(bool is_float, int unordered_result) {
  if ((is_float && UseSSE >= 1) ||
      (!is_float && UseSSE >= 2)) {
    Label done;
    if (is_float) {
      // XXX get rid of pop here, use ... reg, mem32
      __ pop_f(xmm1);
      __ ucomiss(xmm1, xmm0);
    } else {
      // XXX get rid of pop here, use ... reg, mem64
      __ pop_d(xmm1);
      __ ucomisd(xmm1, xmm0);
    }
    if (unordered_result < 0) {
      __ movl(rax, -1);
      __ jccb(Assembler::parity, done);
      __ jccb(Assembler::below, done);
      __ setb(Assembler::notEqual, rdx);
      __ movzbl(rax, rdx);
    } else {
      __ movl(rax, 1);
      __ jccb(Assembler::parity, done);
      __ jccb(Assembler::above, done);
      __ movl(rax, 0);
      __ jccb(Assembler::equal, done);
      __ decrementl(rax);
    }
    __ bind(done);
  } else {
#ifdef _LP64
    ShouldNotReachHere();
#else // !_LP64
    if (is_float) {
      __ fld_s(at_rsp());
    } else {
      __ fld_d(at_rsp());
      __ pop(rdx);
    }
    __ pop(rcx);
    __ fcmp2int(rax, unordered_result < 0);
#endif // _LP64
  }
}

void TemplateTable::branch(bool is_jsr, bool is_wide) {
  __ get_method(rcx); // rcx holds method
  __ profile_taken_branch(rax, rbx); // rax holds updated MDP, rbx
                                     // holds bumped taken count

  const ByteSize be_offset = MethodCounters::backedge_counter_offset() +
                             InvocationCounter::counter_offset();
  const ByteSize inv_offset = MethodCounters::invocation_counter_offset() +
                              InvocationCounter::counter_offset();

  // Load up edx with the branch displacement
  if (is_wide) {
    __ movl(rdx, at_bcp(1));
  } else {
    __ load_signed_short(rdx, at_bcp(1));
  }
  __ bswapl(rdx);

  if (!is_wide) {
    __ sarl(rdx, 16);
  }
  LP64_ONLY(__ movl2ptr(rdx, rdx));

  // Handle all the JSR stuff here, then exit.
  // It's much shorter and cleaner than intermingling with the non-JSR
  // normal-branch stuff occurring below.
  if (is_jsr) {
    // Pre-load the next target bytecode into rbx
    __ load_unsigned_byte(rbx, Address(rbcp, rdx, Address::times_1, 0));

    // compute return address as bci in rax
    __ lea(rax, at_bcp((is_wide ? 5 : 3) -
                        in_bytes(ConstMethod::codes_offset())));
    __ subptr(rax, Address(rcx, Method::const_offset()));
    // Adjust the bcp in r13 by the displacement in rdx
    __ addptr(rbcp, rdx);
    // jsr returns atos that is not an oop
    __ push_i(rax);
    __ dispatch_only(vtos, true);
    return;
  }

  // Normal (non-jsr) branch handling

  // Adjust the bcp in r13 by the displacement in rdx
  __ addptr(rbcp, rdx);

  assert(UseLoopCounter || !UseOnStackReplacement,
         "on-stack-replacement requires loop counters");
  Label backedge_counter_overflow;
  Label dispatch;
  if (UseLoopCounter) {
    // increment backedge counter for backward branches
    // rax: MDO
    // rbx: MDO bumped taken-count
    // rcx: method
    // rdx: target offset
    // r13: target bcp
    // r14: locals pointer
    __ testl(rdx, rdx);             // check if forward or backward branch
    __ jcc(Assembler::positive, dispatch); // count only if backward branch

    // check if MethodCounters exists
    Label has_counters;
    __ movptr(rax, Address(rcx, Method::method_counters_offset()));
    __ testptr(rax, rax);
    __ jcc(Assembler::notZero, has_counters);
    __ push(rdx);
    __ push(rcx);
    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::build_method_counters),
               rcx);
    __ pop(rcx);
    __ pop(rdx);
    __ movptr(rax, Address(rcx, Method::method_counters_offset()));
    __ testptr(rax, rax);
    __ jcc(Assembler::zero, dispatch);
    __ bind(has_counters);

    Label no_mdo;
    if (ProfileInterpreter) {
      // Are we profiling?
      __ movptr(rbx, Address(rcx, in_bytes(Method::method_data_offset())));
      __ testptr(rbx, rbx);
      __ jccb(Assembler::zero, no_mdo);
      // Increment the MDO backedge counter
      const Address mdo_backedge_counter(rbx, in_bytes(MethodData::backedge_counter_offset()) +
          in_bytes(InvocationCounter::counter_offset()));
      const Address mask(rbx, in_bytes(MethodData::backedge_mask_offset()));
      __ increment_mask_and_jump(mdo_backedge_counter, mask, rax,
          UseOnStackReplacement ? &backedge_counter_overflow : NULL);
      __ jmp(dispatch);
    }
    __ bind(no_mdo);
    // Increment backedge counter in MethodCounters*
    __ movptr(rcx, Address(rcx, Method::method_counters_offset()));
    const Address mask(rcx, in_bytes(MethodCounters::backedge_mask_offset()));
    __ increment_mask_and_jump(Address(rcx, be_offset), mask, rax,
        UseOnStackReplacement ? &backedge_counter_overflow : NULL);
    __ bind(dispatch);
  }

  // Pre-load the next target bytecode into rbx
  __ load_unsigned_byte(rbx, Address(rbcp, 0));

  // continue with the bytecode @ target
  // rax: return bci for jsr's, unused otherwise
  // rbx: target bytecode
  // r13: target bcp
  __ dispatch_only(vtos, true);

  if (UseLoopCounter) {
    if (UseOnStackReplacement) {
      Label set_mdp;
      // invocation counter overflow
      __ bind(backedge_counter_overflow);
      __ negptr(rdx);
      __ addptr(rdx, rbcp); // branch bcp
--> --------------------

--> maximum size reached

--> --------------------

¤ Dauer der Verarbeitung: 0.129 Sekunden (vorverarbeitet) ¤

Druckansicht unsichere Verbindung
Druckansicht sprechenden Kalenders
Eigene Datei ansehen

Haftungshinweis

Die Informationen auf dieser Webseite wurden nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit, noch Qualität der bereit gestellten Informationen zugesichert.

Bemerkung:

Die farbliche Syntaxdarstellung ist noch experimentell.