Ziele Untersuchung
mit Columbo Integrität von
Datenbanken Interaktion und
Portierbarkeit Ergonomie der
Schnittstellen

Angebot Produkte Projekt Beratung

Mittel Analytik Modellierung Sprachen Algebra Logik Hardware Denken Kreativität

Zusammenhänge Gesellschaft Wirtschaft Branche Firma


products/sources/formale Sprachen/Java/Openjdk/src/hotspot/cpu/arm/ (Sun/Oracle ^©) Datei vom 13.11.2022 mit Größe 346 kB

Quelle arm.ad Sprache: unbekannt

//
// Copyright (c) 2008, 2022, Oracle and/or its affiliates. All rights reserved.
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
//
// This code is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License version 2 only, as
// published by the Free Software Foundation.
//
// This code is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
// version 2 for more details (a copy is included in the LICENSE file that
// accompanied this code).
//
// You should have received a copy of the GNU General Public License version
// 2 along with this work; if not, write to the Free Software Foundation,
// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
//
// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
// or visit www.oracle.com if you need additional information or have any
// questions.
//

// ARM Architecture Description File

//----------DEFINITION BLOCK---------------------------------------------------
// Define name --> value mappings to inform the ADLC of an integer valued name
// Current support includes integer values in the range [0, 0x7FFFFFFF]
// Format:
//        int_def  <name>         ( <int_value>, <expression>);
// Generated Code in ad_<arch>.hpp
//        #define  <name>   (<expression>)
//        // value == <int_value>
// Generated code in ad_<arch>.cpp adlc_verification()
//        assert( <name> == <int_value>, "Expect (<expression>) to equal <int_value>");
//
definitions %{
// The default cost (of an ALU instruction).
  int_def DEFAULT_COST      (    100,     100);
  int_def HUGE_COST         (1000000, 1000000);

// Memory refs are twice as expensive as run-of-the-mill.
  int_def MEMORY_REF_COST   (    200, DEFAULT_COST * 2);

// Branches are even more expensive.
  int_def BRANCH_COST       (    300, DEFAULT_COST * 3);
  int_def CALL_COST         (    300, DEFAULT_COST * 3);
%}

//----------SOURCE BLOCK-------------------------------------------------------
// This is a block of C++ code which provides values, functions, and
// definitions necessary in the rest of the architecture description
source_hpp %{
// Header information of the source block.
// Method declarations/definitions which are used outside
// the ad-scope can conveniently be defined here.
//
// To keep related declarations/definitions/uses close together,
// we switch between source %{ }% and source_hpp %{ }% freely as needed.

// Does destination need to be loaded in a register then passed to a
// branch instruction?
extern bool maybe_far_call(const CallNode *n);
extern bool maybe_far_call(const MachCallNode *n);
static inline bool cache_reachable() {
  return MacroAssembler::_cache_fully_reachable();
}

#define ldr_32 ldr
#define str_32 str
#define tst_32 tst
#define teq_32 teq
#if 1
extern bool PrintOptoAssembly;
#endif

class c2 {
public:
  static OptoRegPair return_value(int ideal_reg);
};

class CallStubImpl {

  //--------------------------------------------------------------
  //---<  Used for optimization in Compile::Shorten_branches  >---
  //--------------------------------------------------------------

public:
  // Size of call trampoline stub.
  static uint size_call_trampoline() {
    return 0; // no call trampolines on this platform
  }

  // number of relocations needed by a call trampoline stub
  static uint reloc_call_trampoline() {
    return 0; // no call trampolines on this platform
  }
};

class HandlerImpl {

public:

  static int emit_exception_handler(CodeBuffer &cbuf);
  static int emit_deopt_handler(CodeBuffer& cbuf);

  static uint size_exception_handler() {
    return ( 3 * 4 );
  }

  static uint size_deopt_handler() {
    return ( 9 * 4 );
  }

};

class Node::PD {
public:
  enum NodeFlags {
    _last_flag = Node::_last_flag
  };
};

// Assert that the given node is not a var shift.
bool assert_not_var_shift(const Node *n);
%}

source %{

// Assert that the given node is not a var shift.
bool assert_not_var_shift(const Node *n) {
  assert(!n->as_ShiftV()->is_var_shift(), "illegal var shift");
  return true;
}

#define __ _masm.

static FloatRegister reg_to_FloatRegister_object(int register_encoding);
static Register reg_to_register_object(int register_encoding);

void PhaseOutput::pd_perform_mach_node_analysis() {
}

int MachNode::pd_alignment_required() const {
  return 1;
}

int MachNode::compute_padding(int current_offset) const {
  return 0;
}

// ****************************************************************************

// REQUIRED FUNCTIONALITY

// emit an interrupt that is caught by the debugger (for debugging compiler)
void emit_break(CodeBuffer &cbuf) {
  C2_MacroAssembler _masm(&cbuf);
  __ breakpoint();
}

#ifndef PRODUCT
void MachBreakpointNode::format( PhaseRegAlloc *, outputStream *st ) const {
  st->print("TA");
}
#endif

void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  emit_break(cbuf);
}

uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
  return MachNode::size(ra_);
}

void emit_nop(CodeBuffer &cbuf) {
  C2_MacroAssembler _masm(&cbuf);
  __ nop();
}

void emit_call_reloc(CodeBuffer &cbuf, const MachCallNode *n, MachOper *m, RelocationHolder const& rspec) {
  int ret_addr_offset0 = n->as_MachCall()->ret_addr_offset();
  int call_site_offset = cbuf.insts()->mark_off();
  C2_MacroAssembler _masm(&cbuf);
  __ set_inst_mark(); // needed in emit_to_interp_stub() to locate the call
  address target = (address)m->method();
  assert(n->as_MachCall()->entry_point() == target, "sanity");
  assert(maybe_far_call(n) == !__ reachable_from_cache(target), "sanity");
  assert(cache_reachable() == __ cache_fully_reachable(), "sanity");

  assert(target != NULL, "need real address");

  int ret_addr_offset = -1;
  if (rspec.type() == relocInfo::runtime_call_type) {
    __ call(target, rspec);
    ret_addr_offset = __ offset();
  } else {
    // scratches Rtemp
    ret_addr_offset = __ patchable_call(target, rspec, true);
  }
  assert(ret_addr_offset - call_site_offset == ret_addr_offset0, "fix ret_addr_offset()");
}

//=============================================================================
// REQUIRED FUNCTIONALITY for encoding
void emit_lo(CodeBuffer &cbuf, int val) {  }
void emit_hi(CodeBuffer &cbuf, int val) {  }

//=============================================================================
const RegMask& MachConstantBaseNode::_out_RegMask = PTR_REG_mask();

int ConstantTable::calculate_table_base_offset() const {
  int offset = -(size() / 2);
  // flds, fldd: 8-bit  offset multiplied by 4: +/- 1024
  // ldr, ldrb : 12-bit offset:                 +/- 4096
  if (!Assembler::is_simm10(offset)) {
    offset = Assembler::min_simm10;
  }
  return offset;
}

bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
  ShouldNotReachHere();
}

void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
  Compile* C = ra_->C;
  ConstantTable& constant_table = C->output()->constant_table();
  C2_MacroAssembler _masm(&cbuf);

  Register r = as_Register(ra_->get_encode(this));
  CodeSection* consts_section = __ code()->consts();
  CodeSection* insts_section = __ code()->insts();
  // constants section size is aligned according to the align_at_start settings of the next section
  int consts_size = insts_section->align_at_start(consts_section->size());
  assert(constant_table.size() == consts_size, "must be: %d == %d", constant_table.size(), consts_size);

  // Materialize the constant table base.
  address baseaddr = consts_section->start() + -(constant_table.table_base_offset());
  RelocationHolder rspec = internal_word_Relocation::spec(baseaddr);
  __ mov_address(r, baseaddr, rspec);
}

uint MachConstantBaseNode::size(PhaseRegAlloc*) const {
  return 8;
}

#ifndef PRODUCT
void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
  char reg[128];
  ra_->dump_register(this, reg);
  st->print("MOV_SLOW    &constanttable,%s\t! constant table base", reg);
}
#endif

#ifndef PRODUCT
void MachPrologNode::format( PhaseRegAlloc *ra_, outputStream *st ) const {
  Compile* C = ra_->C;

  for (int i = 0; i < OptoPrologueNops; i++) {
    st->print_cr("NOP"); st->print("\t");
  }

  size_t framesize = C->output()->frame_size_in_bytes();
  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  int bangsize = C->output()->bang_size_in_bytes();
  // Remove two words for return addr and rbp,
  framesize -= 2*wordSize;
  bangsize -= 2*wordSize;

  // Calls to C2R adapters often do not accept exceptional returns.
  // We require that their callers must bang for them.  But be careful, because
  // some VM calls (such as call site linkage) can use several kilobytes of
  // stack.  But the stack safety zone should account for that.
  // See bugs 4446381, 4468289, 4497237.
  if (C->output()->need_stack_bang(bangsize)) {
    st->print_cr("! stack bang (%d bytes)", bangsize); st->print("\t");
  }
  st->print_cr("PUSH   R_FP|R_LR_LR"); st->print("\t");
  if (framesize != 0) {
    st->print   ("SUB    R_SP, R_SP, " SIZE_FORMAT,framesize);
  }
}
#endif

void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  Compile* C = ra_->C;
  C2_MacroAssembler _masm(&cbuf);

  for (int i = 0; i < OptoPrologueNops; i++) {
    __ nop();
  }

  size_t framesize = C->output()->frame_size_in_bytes();
  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  int bangsize = C->output()->bang_size_in_bytes();
  // Remove two words for return addr and fp,
  framesize -= 2*wordSize;
  bangsize -= 2*wordSize;

  // Calls to C2R adapters often do not accept exceptional returns.
  // We require that their callers must bang for them.  But be careful, because
  // some VM calls (such as call site linkage) can use several kilobytes of
  // stack.  But the stack safety zone should account for that.
  // See bugs 4446381, 4468289, 4497237.
  if (C->output()->need_stack_bang(bangsize)) {
    __ arm_stack_overflow_check(bangsize, Rtemp);
  }

  __ raw_push(FP, LR);
  if (framesize != 0) {
    __ sub_slow(SP, SP, framesize);
  }

  // offset from scratch buffer is not valid
  if (strcmp(cbuf.name(), "Compile::Fill_buffer") == 0) {
    C->output()->set_frame_complete( __ offset() );
  }

  if (C->has_mach_constant_base_node()) {
    // NOTE: We set the table base offset here because users might be
    // emitted before MachConstantBaseNode.
    ConstantTable& constant_table = C->output()->constant_table();
    constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
  }
}

uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
  return MachNode::size(ra_);
}

int MachPrologNode::reloc() const {
  return 10; // a large enough number
}

//=============================================================================
#ifndef PRODUCT
void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream *st ) const {
  Compile* C = ra_->C;

  size_t framesize = C->output()->frame_size_in_bytes();
  framesize -= 2*wordSize;

  if (framesize != 0) {
    st->print("ADD    R_SP, R_SP, " SIZE_FORMAT "\n\t",framesize);
  }
  st->print("POP    R_FP|R_LR_LR");

  if (do_polling() && ra_->C->is_method_compilation()) {
    st->print("\n\t");
    st->print("MOV    Rtemp, #PollAddr\t! Load Polling address\n\t");
    st->print("LDR    Rtemp,[Rtemp]\t!Poll for Safepointing");
  }
}
#endif

void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  C2_MacroAssembler _masm(&cbuf);
  Compile* C = ra_->C;

  size_t framesize = C->output()->frame_size_in_bytes();
  framesize -= 2*wordSize;
  if (framesize != 0) {
    __ add_slow(SP, SP, framesize);
  }
  __ raw_pop(FP, LR);

  // If this does safepoint polling, then do it here
  if (do_polling() && ra_->C->is_method_compilation()) {
    __ read_polling_page(Rtemp, relocInfo::poll_return_type);
  }
}

uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
  return MachNode::size(ra_);
}

int MachEpilogNode::reloc() const {
  return 16; // a large enough number
}

const Pipeline * MachEpilogNode::pipeline() const {
  return MachNode::pipeline_class();
}

//=============================================================================

// Figure out which register class each belongs in: rc_int, rc_float, rc_stack
enum RC { rc_bad, rc_int, rc_float, rc_stack };
static enum RC rc_class( OptoReg::Name reg ) {
  if (!OptoReg::is_valid(reg)) return rc_bad;
  if (OptoReg::is_stack(reg)) return rc_stack;
  VMReg r = OptoReg::as_VMReg(reg);
  if (r->is_Register()) return rc_int;
  assert(r->is_FloatRegister(), "must be");
  return rc_float;
}

static inline bool is_iRegLd_memhd(OptoReg::Name src_first, OptoReg::Name src_second, int offset) {
  int rlo = Matcher::_regEncode[src_first];
  int rhi = Matcher::_regEncode[src_second];
  if (!((rlo&1)==0 && (rlo+1 == rhi))) {
    tty->print_cr("CAUGHT BAD LDRD/STRD");
  }
  return (rlo&1)==0 && (rlo+1 == rhi) && is_memoryHD(offset);
}

uint MachSpillCopyNode::implementation( CodeBuffer *cbuf,
                                        PhaseRegAlloc *ra_,
                                        bool do_size,
                                        outputStream* st ) const {
  // Get registers to move
  OptoReg::Name src_second = ra_->get_reg_second(in(1));
  OptoReg::Name src_first = ra_->get_reg_first(in(1));
  OptoReg::Name dst_second = ra_->get_reg_second(this );
  OptoReg::Name dst_first = ra_->get_reg_first(this );

  enum RC src_second_rc = rc_class(src_second);
  enum RC src_first_rc = rc_class(src_first);
  enum RC dst_second_rc = rc_class(dst_second);
  enum RC dst_first_rc = rc_class(dst_first);

  assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );

  // Generate spill code!
  int size = 0;

  if (src_first == dst_first && src_second == dst_second)
    return size;            // Self copy, no move

#ifdef TODO
  if (bottom_type()->isa_vect() != NULL) {
  }
#endif

  // Shared code does not expect instruction set capability based bailouts here.
  // Handle offset unreachable bailout with minimal change in shared code.
  // Bailout only for real instruction emit.
  // This requires a single comment change in shared code. ( see output.cpp "Normal" instruction case )

  C2_MacroAssembler _masm(cbuf);

  // --------------------------------------
  // Check for mem-mem move.  Load into unused float registers and fall into
  // the float-store case.
  if (src_first_rc == rc_stack && dst_first_rc == rc_stack) {
    int offset = ra_->reg2offset(src_first);
    if (cbuf && !is_memoryfp(offset)) {
      ra_->C->record_method_not_compilable("unable to handle large constant offsets");
      return 0;
    } else {
      if (src_second_rc != rc_bad) {
        assert((src_first&1)==0 && src_first+1 == src_second, "pair of registers must be aligned/contiguous");
        src_first     = OptoReg::Name(R_mem_copy_lo_num);
        src_second    = OptoReg::Name(R_mem_copy_hi_num);
        src_first_rc  = rc_float;
        src_second_rc = rc_float;
        if (cbuf) {
          __ ldr_double(Rmemcopy, Address(SP, offset));
        } else if (!do_size) {
          st->print(LDR_DOUBLE "   R_%s,[R_SP + #%d]\t! spill",OptoReg::regname(src_first),offset);
        }
      } else {
        src_first     = OptoReg::Name(R_mem_copy_lo_num);
        src_first_rc  = rc_float;
        if (cbuf) {
          __ ldr_float(Rmemcopy, Address(SP, offset));
        } else if (!do_size) {
          st->print(LDR_FLOAT "   R_%s,[R_SP + #%d]\t! spill",OptoReg::regname(src_first),offset);
        }
      }
      size += 4;
    }
  }

  if (src_second_rc == rc_stack && dst_second_rc == rc_stack) {
    Unimplemented();
  }

  // --------------------------------------
  // Check for integer reg-reg copy
  if (src_first_rc == rc_int && dst_first_rc == rc_int) {
    // Else normal reg-reg copy
    assert( src_second != dst_first, "smashed second before evacuating it" );
    if (cbuf) {
      __ mov(reg_to_register_object(Matcher::_regEncode[dst_first]), reg_to_register_object(Matcher::_regEncode[src_first]));
#ifndef PRODUCT
    } else if (!do_size) {
      st->print("MOV    R_%s, R_%s\t# spill",
                Matcher::regName[dst_first],
                Matcher::regName[src_first]);
#endif
    }
    size += 4;
  }

  // Check for integer store
  if (src_first_rc == rc_int && dst_first_rc == rc_stack) {
    int offset = ra_->reg2offset(dst_first);
    if (cbuf && !is_memoryI(offset)) {
      ra_->C->record_method_not_compilable("unable to handle large constant offsets");
      return 0;
    } else {
      if (src_second_rc != rc_bad && is_iRegLd_memhd(src_first, src_second, offset)) {
        assert((src_first&1)==0 && src_first+1 == src_second, "pair of registers must be aligned/contiguous");
        if (cbuf) {
          __ str_64(reg_to_register_object(Matcher::_regEncode[src_first]), Address(SP, offset));
#ifndef PRODUCT
        } else if (!do_size) {
          if (size != 0) st->print("\n\t");
          st->print(STR_64 "   R_%s,[R_SP + #%d]\t! spill",OptoReg::regname(src_first), offset);
#endif
        }
        return size + 4;
      } else {
        if (cbuf) {
          __ str_32(reg_to_register_object(Matcher::_regEncode[src_first]), Address(SP, offset));
#ifndef PRODUCT
        } else if (!do_size) {
          if (size != 0) st->print("\n\t");
          st->print(STR_32 "   R_%s,[R_SP + #%d]\t! spill",OptoReg::regname(src_first), offset);
#endif
        }
      }
    }
    size += 4;
  }

  // Check for integer load
  if (dst_first_rc == rc_int && src_first_rc == rc_stack) {
    int offset = ra_->reg2offset(src_first);
    if (cbuf && !is_memoryI(offset)) {
      ra_->C->record_method_not_compilable("unable to handle large constant offsets");
      return 0;
    } else {
      if (src_second_rc != rc_bad && is_iRegLd_memhd(dst_first, dst_second, offset)) {
        assert((src_first&1)==0 && src_first+1 == src_second, "pair of registers must be aligned/contiguous");
        if (cbuf) {
          __ ldr_64(reg_to_register_object(Matcher::_regEncode[dst_first]), Address(SP, offset));
#ifndef PRODUCT
        } else if (!do_size) {
          if (size != 0) st->print("\n\t");
          st->print(LDR_64 "   R_%s,[R_SP + #%d]\t! spill",OptoReg::regname(dst_first), offset);
#endif
        }
        return size + 4;
      } else {
        if (cbuf) {
          __ ldr_32(reg_to_register_object(Matcher::_regEncode[dst_first]), Address(SP, offset));
#ifndef PRODUCT
        } else if (!do_size) {
          if (size != 0) st->print("\n\t");
          st->print(LDR_32 "   R_%s,[R_SP + #%d]\t! spill",OptoReg::regname(dst_first), offset);
#endif
        }
      }
    }
    size += 4;
  }

  // Check for float reg-reg copy
  if (src_first_rc == rc_float && dst_first_rc == rc_float) {
    if (src_second_rc != rc_bad) {
      assert((src_first&1)==0 && src_first+1 == src_second && (dst_first&1)==0 && dst_first+1 == dst_second, "pairs of registers must be aligned/contiguous");
      if (cbuf) {
      __ mov_double(reg_to_FloatRegister_object(Matcher::_regEncode[dst_first]), reg_to_FloatRegister_object(Matcher::_regEncode[src_first]));
#ifndef PRODUCT
      } else if (!do_size) {
        st->print(MOV_DOUBLE "    R_%s, R_%s\t# spill",
                  Matcher::regName[dst_first],
                  Matcher::regName[src_first]);
#endif
      }
      return 4;
    }
    if (cbuf) {
      __ mov_float(reg_to_FloatRegister_object(Matcher::_regEncode[dst_first]), reg_to_FloatRegister_object(Matcher::_regEncode[src_first]));
#ifndef PRODUCT
    } else if (!do_size) {
      st->print(MOV_FLOAT "    R_%s, R_%s\t# spill",
                Matcher::regName[dst_first],
                Matcher::regName[src_first]);
#endif
    }
    size = 4;
  }

  // Check for float store
  if (src_first_rc == rc_float && dst_first_rc == rc_stack) {
    int offset = ra_->reg2offset(dst_first);
    if (cbuf && !is_memoryfp(offset)) {
      ra_->C->record_method_not_compilable("unable to handle large constant offsets");
      return 0;
    } else {
      // Further check for aligned-adjacent pair, so we can use a double store
      if (src_second_rc != rc_bad) {
        assert((src_first&1)==0 && src_first+1 == src_second && (dst_first&1)==0 && dst_first+1 == dst_second, "pairs of registers and stack slots must be aligned/contiguous");
        if (cbuf) {
          __ str_double(reg_to_FloatRegister_object(Matcher::_regEncode[src_first]), Address(SP, offset));
#ifndef PRODUCT
        } else if (!do_size) {
          if (size != 0) st->print("\n\t");
          st->print(STR_DOUBLE "   R_%s,[R_SP + #%d]\t! spill",OptoReg::regname(src_first),offset);
#endif
        }
        return size + 4;
      } else {
        if (cbuf) {
          __ str_float(reg_to_FloatRegister_object(Matcher::_regEncode[src_first]), Address(SP, offset));
#ifndef PRODUCT
        } else if (!do_size) {
          if (size != 0) st->print("\n\t");
          st->print(STR_FLOAT "   R_%s,[R_SP + #%d]\t! spill",OptoReg::regname(src_first),offset);
#endif
        }
      }
    }
    size += 4;
  }

  // Check for float load
  if (dst_first_rc == rc_float && src_first_rc == rc_stack) {
    int offset = ra_->reg2offset(src_first);
    if (cbuf && !is_memoryfp(offset)) {
      ra_->C->record_method_not_compilable("unable to handle large constant offsets");
      return 0;
    } else {
      // Further check for aligned-adjacent pair, so we can use a double store
      if (src_second_rc != rc_bad) {
        assert((src_first&1)==0 && src_first+1 == src_second && (dst_first&1)==0 && dst_first+1 == dst_second, "pairs of registers and stack slots must be aligned/contiguous");
        if (cbuf) {
          __ ldr_double(reg_to_FloatRegister_object(Matcher::_regEncode[dst_first]), Address(SP, offset));
#ifndef PRODUCT
        } else if (!do_size) {
          if (size != 0) st->print("\n\t");
          st->print(LDR_DOUBLE "   R_%s,[R_SP + #%d]\t! spill",OptoReg::regname(dst_first),offset);
#endif
        }
        return size + 4;
      } else {
        if (cbuf) {
          __ ldr_float(reg_to_FloatRegister_object(Matcher::_regEncode[dst_first]), Address(SP, offset));
#ifndef PRODUCT
        } else if (!do_size) {
          if (size != 0) st->print("\n\t");
          st->print(LDR_FLOAT "   R_%s,[R_SP + #%d]\t! spill",OptoReg::regname(dst_first),offset);
#endif
        }
      }
    }
    size += 4;
  }

  // check for int reg -> float reg move
  if (src_first_rc == rc_int && dst_first_rc == rc_float) {
    // Further check for aligned-adjacent pair, so we can use a single instruction
    if (src_second_rc != rc_bad) {
      assert((dst_first&1)==0 && dst_first+1 == dst_second, "pairs of registers must be aligned/contiguous");
      assert((src_first&1)==0 && src_first+1 == src_second, "pairs of registers must be aligned/contiguous");
      assert(src_second_rc == rc_int && dst_second_rc == rc_float, "unsupported");
      if (cbuf) {
        __ fmdrr(reg_to_FloatRegister_object(Matcher::_regEncode[dst_first]), reg_to_register_object(Matcher::_regEncode[src_first]), reg_to_register_object(Matcher::_regEncode[src_second]));
#ifndef PRODUCT
      } else if (!do_size) {
        if (size != 0) st->print("\n\t");
        st->print("FMDRR   R_%s, R_%s, R_%s\t! spill",OptoReg::regname(dst_first), OptoReg::regname(src_first), OptoReg::regname(src_second));
#endif
      }
      return size + 4;
    } else {
      if (cbuf) {
        __ fmsr(reg_to_FloatRegister_object(Matcher::_regEncode[dst_first]), reg_to_register_object(Matcher::_regEncode[src_first]));
#ifndef PRODUCT
      } else if (!do_size) {
        if (size != 0) st->print("\n\t");
        st->print(FMSR "   R_%s, R_%s\t! spill",OptoReg::regname(dst_first), OptoReg::regname(src_first));
#endif
      }
      size += 4;
    }
  }

  // check for float reg -> int reg move
  if (src_first_rc == rc_float && dst_first_rc == rc_int) {
    // Further check for aligned-adjacent pair, so we can use a single instruction
    if (src_second_rc != rc_bad) {
      assert((src_first&1)==0 && src_first+1 == src_second, "pairs of registers must be aligned/contiguous");
      assert((dst_first&1)==0 && dst_first+1 == dst_second, "pairs of registers must be aligned/contiguous");
      assert(src_second_rc == rc_float && dst_second_rc == rc_int, "unsupported");
      if (cbuf) {
        __ fmrrd(reg_to_register_object(Matcher::_regEncode[dst_first]), reg_to_register_object(Matcher::_regEncode[dst_second]), reg_to_FloatRegister_object(Matcher::_regEncode[src_first]));
#ifndef PRODUCT
      } else if (!do_size) {
        if (size != 0) st->print("\n\t");
        st->print("FMRRD   R_%s, R_%s, R_%s\t! spill",OptoReg::regname(dst_first), OptoReg::regname(dst_second), OptoReg::regname(src_first));
#endif
      }
      return size + 4;
    } else {
      if (cbuf) {
        __ fmrs(reg_to_register_object(Matcher::_regEncode[dst_first]), reg_to_FloatRegister_object(Matcher::_regEncode[src_first]));
#ifndef PRODUCT
      } else if (!do_size) {
        if (size != 0) st->print("\n\t");
        st->print(FMRS "   R_%s, R_%s\t! spill",OptoReg::regname(dst_first), OptoReg::regname(src_first));
#endif
      }
      size += 4;
    }
  }

  // --------------------------------------------------------------------
  // Check for hi bits still needing moving.  Only happens for misaligned
  // arguments to native calls.
  if (src_second == dst_second)
    return size;               // Self copy; no move
  assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );

  // Check for integer reg-reg copy.  Hi bits are stuck up in the top
  // 32-bits of a 64-bit register, but are needed in low bits of another
  // register (else it's a hi-bits-to-hi-bits copy which should have
  // happened already as part of a 64-bit move)
  if (src_second_rc == rc_int && dst_second_rc == rc_int) {
    if (cbuf) {
      __ mov(reg_to_register_object(Matcher::_regEncode[dst_second]), reg_to_register_object(Matcher::_regEncode[src_second]));
#ifndef PRODUCT
    } else if (!do_size) {
      if (size != 0) st->print("\n\t");
      st->print("MOV    R_%s, R_%s\t# spill high",
                Matcher::regName[dst_second],
                Matcher::regName[src_second]);
#endif
    }
    return size+4;
  }

  // Check for high word integer store
  if (src_second_rc == rc_int && dst_second_rc == rc_stack) {
    int offset = ra_->reg2offset(dst_second);

    if (cbuf && !is_memoryP(offset)) {
      ra_->C->record_method_not_compilable("unable to handle large constant offsets");
      return 0;
    } else {
      if (cbuf) {
        __ str(reg_to_register_object(Matcher::_regEncode[src_second]), Address(SP, offset));
#ifndef PRODUCT
      } else if (!do_size) {
        if (size != 0) st->print("\n\t");
        st->print("STR   R_%s,[R_SP + #%d]\t! spill",OptoReg::regname(src_second), offset);
#endif
      }
    }
    return size + 4;
  }

  // Check for high word integer load
  if (dst_second_rc == rc_int && src_second_rc == rc_stack) {
    int offset = ra_->reg2offset(src_second);
    if (cbuf && !is_memoryP(offset)) {
      ra_->C->record_method_not_compilable("unable to handle large constant offsets");
      return 0;
    } else {
      if (cbuf) {
        __ ldr(reg_to_register_object(Matcher::_regEncode[dst_second]), Address(SP, offset));
#ifndef PRODUCT
      } else if (!do_size) {
        if (size != 0) st->print("\n\t");
        st->print("LDR   R_%s,[R_SP + #%d]\t! spill",OptoReg::regname(dst_second), offset);
#endif
      }
    }
    return size + 4;
  }

  Unimplemented();
  return 0; // Mute compiler
}

#ifndef PRODUCT
void MachSpillCopyNode::format( PhaseRegAlloc *ra_, outputStream *st ) const {
  implementation( NULL, ra_, false, st );
}
#endif

void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  implementation( &cbuf, ra_, false, NULL );
}

uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
  return implementation( NULL, ra_, true, NULL );
}

//=============================================================================
#ifndef PRODUCT
void MachNopNode::format( PhaseRegAlloc *, outputStream *st ) const {
  st->print("NOP \t# %d bytes pad for loops and calls", 4 * _count);
}
#endif

void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc * ) const {
  C2_MacroAssembler _masm(&cbuf);
  for(int i = 0; i < _count; i += 1) {
    __ nop();
  }
}

uint MachNopNode::size(PhaseRegAlloc *ra_) const {
  return 4 * _count;
}

//=============================================================================
#ifndef PRODUCT
void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream *st ) const {
  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
  int reg = ra_->get_reg_first(this);
  st->print("ADD    %s,R_SP+#%d",Matcher::regName[reg], offset);
}
#endif

void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  C2_MacroAssembler _masm(&cbuf);
  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
  int reg = ra_->get_encode(this);
  Register dst = reg_to_register_object(reg);

  if (is_aimm(offset)) {
    __ add(dst, SP, offset);
  } else {
    __ mov_slow(dst, offset);
    __ add(dst, SP, dst);
  }
}

uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
  // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_)
  assert(ra_ == ra_->C->regalloc(), "sanity");
  return ra_->C->output()->scratch_emit_size(this);
}

//=============================================================================
#ifndef PRODUCT
#define R_RTEMP "R_R12"
void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream *st ) const {
  st->print_cr("\nUEP:");
  if (UseCompressedClassPointers) {
    st->print_cr("\tLDR_w " R_RTEMP ",[R_R0 + oopDesc::klass_offset_in_bytes]\t! Inline cache check");
    st->print_cr("\tdecode_klass " R_RTEMP);
  } else {
    st->print_cr("\tLDR   " R_RTEMP ",[R_R0 + oopDesc::klass_offset_in_bytes]\t! Inline cache check");
  }
  st->print_cr("\tCMP   " R_RTEMP ",R_R8" );
  st->print   ("\tB.NE  SharedRuntime::handle_ic_miss_stub");
}
#endif

void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  C2_MacroAssembler _masm(&cbuf);
  Register iCache  = reg_to_register_object(Matcher::inline_cache_reg_encode());
  assert(iCache == Ricklass, "should be");
  Register receiver = R0;

  __ load_klass(Rtemp, receiver);
  __ cmp(Rtemp, iCache);
  __ jump(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type, noreg, ne);
}

uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
  return MachNode::size(ra_);
}

//=============================================================================

// Emit exception handler code.
int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) {
  C2_MacroAssembler _masm(&cbuf);

  address base = __ start_a_stub(size_exception_handler());
  if (base == NULL) {
    ciEnv::current()->record_failure("CodeCache is full");
    return 0;  // CodeBuffer::expand failed
  }

  int offset = __ offset();

  // OK to trash LR, because exception blob will kill it
  __ jump(OptoRuntime::exception_blob()->entry_point(), relocInfo::runtime_call_type, LR_tmp);

  assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");

  __ end_a_stub();

  return offset;
}

int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) {
  // Can't use any of the current frame's registers as we may have deopted
  // at a poll and everything can be live.
  C2_MacroAssembler _masm(&cbuf);

  address base = __ start_a_stub(size_deopt_handler());
  if (base == NULL) {
    ciEnv::current()->record_failure("CodeCache is full");
    return 0;  // CodeBuffer::expand failed
  }

  int offset = __ offset();
  address deopt_pc = __ pc();

  __ sub(SP, SP, wordSize); // make room for saved PC
  __ push(LR); // save LR that may be live when we get here
  __ mov_relative_address(LR, deopt_pc);
  __ str(LR, Address(SP, wordSize)); // save deopt PC
  __ pop(LR); // restore LR
  __ jump(SharedRuntime::deopt_blob()->unpack(), relocInfo::runtime_call_type, noreg);

  assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");

  __ end_a_stub();
  return offset;
}

const bool Matcher::match_rule_supported(int opcode) {
  if (!has_match_rule(opcode))
    return false;

  switch (opcode) {
  case Op_PopCountI:
  case Op_PopCountL:
    if (!UsePopCountInstruction)
      return false;
    break;
  case Op_LShiftCntV:
  case Op_RShiftCntV:
  case Op_AddVB:
  case Op_AddVS:
  case Op_AddVI:
  case Op_AddVL:
  case Op_SubVB:
  case Op_SubVS:
  case Op_SubVI:
  case Op_SubVL:
  case Op_MulVS:
  case Op_MulVI:
  case Op_LShiftVB:
  case Op_LShiftVS:
  case Op_LShiftVI:
  case Op_LShiftVL:
  case Op_RShiftVB:
  case Op_RShiftVS:
  case Op_RShiftVI:
  case Op_RShiftVL:
  case Op_URShiftVB:
  case Op_URShiftVS:
  case Op_URShiftVI:
  case Op_URShiftVL:
  case Op_AndV:
  case Op_OrV:
  case Op_XorV:
    return VM_Version::has_simd();
  case Op_LoadVector:
  case Op_StoreVector:
  case Op_AddVF:
  case Op_SubVF:
  case Op_MulVF:
    return VM_Version::has_vfp() || VM_Version::has_simd();
  case Op_AddVD:
  case Op_SubVD:
  case Op_MulVD:
  case Op_DivVF:
  case Op_DivVD:
    return VM_Version::has_vfp();
  }

  return true;  // Per default match rules are supported.
}

const bool Matcher::match_rule_supported_superword(int opcode, int vlen, BasicType bt) {
  return match_rule_supported_vector(opcode, vlen, bt);
}

const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {

  // TODO
  // identify extra cases that we might want to provide match rules for
  // e.g. Op_ vector nodes and other intrinsics while guarding with vlen
  bool ret_value = match_rule_supported(opcode) && vector_size_supported(bt, vlen);
  // Add rules here.

  return ret_value;  // Per default match rules are supported.
}

const bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
  return false;
}

const bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) {
  return false;
}

const RegMask* Matcher::predicate_reg_mask(void) {
  return NULL;
}

const TypeVectMask* Matcher::predicate_reg_type(const Type* elemTy, int length) {
  return NULL;
}

// Vector calling convention not yet implemented.
const bool Matcher::supports_vector_calling_convention(void) {
  return false;
}

OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
  Unimplemented();
  return OptoRegPair(0, 0);
}

// Vector width in bytes
const int Matcher::vector_width_in_bytes(BasicType bt) {
  return MaxVectorSize;
}

const int Matcher::scalable_vector_reg_size(const BasicType bt) {
  return -1;
}

// Vector ideal reg corresponding to specified size in bytes
const uint Matcher::vector_ideal_reg(int size) {
  assert(MaxVectorSize >= size, "");
  switch(size) {
    case  8: return Op_VecD;
    case 16: return Op_VecX;
  }
  ShouldNotReachHere();
  return 0;
}

// Limits on vector size (number of elements) loaded into vector.
const int Matcher::max_vector_size(const BasicType bt) {
  assert(is_java_primitive(bt), "only primitive type vectors");
  return vector_width_in_bytes(bt)/type2aelembytes(bt);
}

const int Matcher::min_vector_size(const BasicType bt) {
  assert(is_java_primitive(bt), "only primitive type vectors");
  return 8/type2aelembytes(bt);
}

// Is this branch offset short enough that a short branch can be used?
//
// NOTE: If the platform does not provide any short branch variants, then
//       this method should return false for offset 0.
bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
  // The passed offset is relative to address of the branch.
  // On ARM a branch displacement is calculated relative to address
  // of the branch + 8.
  //
  // offset -= 8;
  // return (Assembler::is_simm24(offset));
  return false;
}

MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* original_opnd, uint ideal_reg, bool is_temp) {
  ShouldNotReachHere(); // generic vector operands not supported
  return NULL;
}

bool Matcher::is_reg2reg_move(MachNode* m) {
  ShouldNotReachHere();  // generic vector operands not supported
  return false;
}

bool Matcher::is_generic_vector(MachOper* opnd)  {
  ShouldNotReachHere();  // generic vector operands not supported
  return false;
}

// Should the matcher clone input 'm' of node 'n'?
bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
  if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con)
    mstack.push(m, Visit);           // m = ShiftCntV
    return true;
  }
  return false;
}

// Should the Matcher clone shifts on addressing modes, expecting them
// to be subsumed into complex addressing expressions or compute them
// into registers?
bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
  return clone_base_plus_offset_address(m, mstack, address_visited);
}

// Return whether or not this register is ever used as an argument.  This
// function is used on startup to build the trampoline stubs in generateOptoStub.
// Registers not mentioned will be killed by the VM call in the trampoline, and
// arguments in those registers not be available to the callee.
bool Matcher::can_be_java_arg( int reg ) {
  if (reg == R_R0_num ||
      reg == R_R1_num ||
      reg == R_R2_num ||
      reg == R_R3_num) return true;

  if (reg >= R_S0_num &&
      reg <= R_S13_num) return true;
  return false;
}

bool Matcher::is_spillable_arg( int reg ) {
  return can_be_java_arg(reg);
}

uint Matcher::int_pressure_limit()
{
  return (INTPRESSURE == -1) ? 12 : INTPRESSURE;
}

uint Matcher::float_pressure_limit()
{
  return (FLOATPRESSURE == -1) ? 30 : FLOATPRESSURE;
}

bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
  return false;
}

// Register for DIVI projection of divmodI
RegMask Matcher::divI_proj_mask() {
  ShouldNotReachHere();
  return RegMask();
}

// Register for MODI projection of divmodI
RegMask Matcher::modI_proj_mask() {
  ShouldNotReachHere();
  return RegMask();
}

// Register for DIVL projection of divmodL
RegMask Matcher::divL_proj_mask() {
  ShouldNotReachHere();
  return RegMask();
}

// Register for MODL projection of divmodL
RegMask Matcher::modL_proj_mask() {
  ShouldNotReachHere();
  return RegMask();
}

const RegMask Matcher::method_handle_invoke_SP_save_mask() {
  return FP_REGP_mask();
}

bool maybe_far_call(const CallNode *n) {
  return !MacroAssembler::_reachable_from_cache(n->as_Call()->entry_point());
}

bool maybe_far_call(const MachCallNode *n) {
  return !MacroAssembler::_reachable_from_cache(n->as_MachCall()->entry_point());
}

%}

//----------ENCODING BLOCK-----------------------------------------------------
// This block specifies the encoding classes used by the compiler to output
// byte streams.  Encoding classes are parameterized macros used by
// Machine Instruction Nodes in order to generate the bit encoding of the
// instruction.  Operands specify their base encoding interface with the
// interface keyword.  There are currently supported four interfaces,
// REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
// operand to generate a function which returns its register number when
// queried.   CONST_INTER causes an operand to generate a function which
// returns the value of the constant when queried.  MEMORY_INTER causes an
// operand to generate four functions which return the Base Register, the
// Index Register, the Scale Value, and the Offset Value of the operand when
// queried.  COND_INTER causes an operand to generate six functions which
// return the encoding code (ie - encoding bits for the instruction)
// associated with each basic boolean condition for a conditional instruction.
//
// Instructions specify two basic values for encoding.  Again, a function
// is available to check if the constant displacement is an oop. They use the
// ins_encode keyword to specify their encoding classes (which must be
// a sequence of enc_class names, and their parameters, specified in
// the encoding block), and they use the
// opcode keyword to specify, in order, their primary, secondary, and
// tertiary opcode.  Only the opcode sections which a particular instruction
// needs for encoding need to be specified.
encode %{
  enc_class call_epilog %{
    // nothing
  %}

  enc_class Java_To_Runtime (method meth) %{
    // CALL directly to the runtime
    emit_call_reloc(cbuf, as_MachCall(), $meth, runtime_call_Relocation::spec());
  %}

  enc_class Java_Static_Call (method meth) %{
    // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
    // who we intended to call.

    if ( !_method) {
      emit_call_reloc(cbuf, as_MachCall(), $meth, runtime_call_Relocation::spec());
    } else {
      int method_index = resolved_method_index(cbuf);
      RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
                                                  : static_call_Relocation::spec(method_index);
      emit_call_reloc(cbuf, as_MachCall(), $meth, rspec);

      // Emit stubs for static call.
      address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
      if (stub == NULL) {
        ciEnv::current()->record_failure("CodeCache is full");
        return;
      }
    }
  %}

  enc_class save_last_PC %{
    // preserve mark
    address mark = cbuf.insts()->mark();
    debug_only(int off0 = cbuf.insts_size());
    C2_MacroAssembler _masm(&cbuf);
    int ret_addr_offset = as_MachCall()->ret_addr_offset();
    __ adr(LR, mark + ret_addr_offset);
    __ str(LR, Address(Rthread, JavaThread::last_Java_pc_offset()));
    debug_only(int off1 = cbuf.insts_size());
    assert(off1 - off0 == 2 * Assembler::InstructionSize, "correct size prediction");
    // restore mark
    cbuf.insts()->set_mark(mark);
  %}

  enc_class preserve_SP %{
    // preserve mark
    address mark = cbuf.insts()->mark();
    debug_only(int off0 = cbuf.insts_size());
    C2_MacroAssembler _masm(&cbuf);
    // FP is preserved across all calls, even compiled calls.
    // Use it to preserve SP in places where the callee might change the SP.
    __ mov(Rmh_SP_save, SP);
    debug_only(int off1 = cbuf.insts_size());
    assert(off1 - off0 == 4, "correct size prediction");
    // restore mark
    cbuf.insts()->set_mark(mark);
  %}

  enc_class restore_SP %{
    C2_MacroAssembler _masm(&cbuf);
    __ mov(SP, Rmh_SP_save);
  %}

  enc_class Java_Dynamic_Call (method meth) %{
    C2_MacroAssembler _masm(&cbuf);
    Register R8_ic_reg = reg_to_register_object(Matcher::inline_cache_reg_encode());
    assert(R8_ic_reg == Ricklass, "should be");
    __ set_inst_mark();
    __ movw(R8_ic_reg, ((unsigned int)Universe::non_oop_word()) & 0xffff);
    __ movt(R8_ic_reg, ((unsigned int)Universe::non_oop_word()) >> 16);
    address  virtual_call_oop_addr = __ inst_mark();
    // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
    // who we intended to call.
    int method_index = resolved_method_index(cbuf);
    __ relocate(virtual_call_Relocation::spec(virtual_call_oop_addr, method_index));
    emit_call_reloc(cbuf, as_MachCall(), $meth, RelocationHolder::none);
  %}

  enc_class LdReplImmI(immI src, regD dst, iRegI tmp, int cnt, int wth) %{
    // FIXME: load from constant table?
    // Load a constant replicated "count" times with width "width"
    int count = $cnt$$constant;
    int width = $wth$$constant;
    assert(count*width == 4, "sanity");
    int val = $src$$constant;
    if (width < 4) {
      int bit_width = width * 8;
      val &= (((int)1) << bit_width) - 1; // mask off sign bits
      for (int i = 0; i < count - 1; i++) {
        val |= (val << bit_width);
      }
    }
    C2_MacroAssembler _masm(&cbuf);

    if (val == -1) {
      __ mvn($tmp$$Register, 0);
    } else if (val == 0) {
      __ mov($tmp$$Register, 0);
    } else {
      __ movw($tmp$$Register, val & 0xffff);
      __ movt($tmp$$Register, (unsigned int)val >> 16);
    }
    __ fmdrr($dst$$FloatRegister, $tmp$$Register, $tmp$$Register);
  %}

  enc_class LdReplImmF(immF src, regD dst, iRegI tmp) %{
    // Replicate float con 2 times and pack into vector (8 bytes) in regD.
    float fval = $src$$constant;
    int val = *((int*)&fval);
    C2_MacroAssembler _masm(&cbuf);

    if (val == -1) {
      __ mvn($tmp$$Register, 0);
    } else if (val == 0) {
      __ mov($tmp$$Register, 0);
    } else {
      __ movw($tmp$$Register, val & 0xffff);
      __ movt($tmp$$Register, (unsigned int)val >> 16);
    }
    __ fmdrr($dst$$FloatRegister, $tmp$$Register, $tmp$$Register);
  %}

  enc_class enc_String_Compare(R0RegP str1, R1RegP str2, R2RegI cnt1, R3RegI cnt2, iRegI result, iRegI tmp1, iRegI tmp2) %{
    Label Ldone, Lloop;
    C2_MacroAssembler _masm(&cbuf);

    Register   str1_reg = $str1$$Register;
    Register   str2_reg = $str2$$Register;
    Register   cnt1_reg = $cnt1$$Register; // int
    Register   cnt2_reg = $cnt2$$Register; // int
    Register   tmp1_reg = $tmp1$$Register;
    Register   tmp2_reg = $tmp2$$Register;
    Register result_reg = $result$$Register;

    assert_different_registers(str1_reg, str2_reg, cnt1_reg, cnt2_reg, tmp1_reg, tmp2_reg);

    // Compute the minimum of the string lengths(str1_reg) and the
    // difference of the string lengths (stack)

    // See if the lengths are different, and calculate min in str1_reg.
    // Stash diff in tmp2 in case we need it for a tie-breaker.
    __ subs_32(tmp2_reg, cnt1_reg, cnt2_reg);
    __ mov(cnt1_reg, AsmOperand(cnt1_reg, lsl, exact_log2(sizeof(jchar)))); // scale the limit
    __ mov(cnt1_reg, AsmOperand(cnt2_reg, lsl, exact_log2(sizeof(jchar))), pl); // scale the limit

    // reallocate cnt1_reg, cnt2_reg, result_reg
    // Note:  limit_reg holds the string length pre-scaled by 2
    Register limit_reg = cnt1_reg;
    Register  chr2_reg = cnt2_reg;
    Register  chr1_reg = tmp1_reg;
    // str{12} are the base pointers

    // Is the minimum length zero?
    __ cmp_32(limit_reg, 0);
    if (result_reg != tmp2_reg) {
      __ mov(result_reg, tmp2_reg, eq);
    }
    __ b(Ldone, eq);

    // Load first characters
    __ ldrh(chr1_reg, Address(str1_reg, 0));
    __ ldrh(chr2_reg, Address(str2_reg, 0));

    // Compare first characters
    __ subs(chr1_reg, chr1_reg, chr2_reg);
    if (result_reg != chr1_reg) {
      __ mov(result_reg, chr1_reg, ne);
    }
    __ b(Ldone, ne);

    {
      // Check after comparing first character to see if strings are equivalent
      // Check if the strings start at same location
      __ cmp(str1_reg, str2_reg);
      // Check if the length difference is zero
      __ cond_cmp(tmp2_reg, 0, eq);
      __ mov(result_reg, 0, eq); // result is zero
      __ b(Ldone, eq);
      // Strings might not be equal
    }

    __ subs(chr1_reg, limit_reg, 1 * sizeof(jchar));
    if (result_reg != tmp2_reg) {
      __ mov(result_reg, tmp2_reg, eq);
    }
    __ b(Ldone, eq);

    // Shift str1_reg and str2_reg to the end of the arrays, negate limit
    __ add(str1_reg, str1_reg, limit_reg);
    __ add(str2_reg, str2_reg, limit_reg);
    __ neg(limit_reg, chr1_reg);  // limit = -(limit-2)

    // Compare the rest of the characters
    __ bind(Lloop);
    __ ldrh(chr1_reg, Address(str1_reg, limit_reg));
    __ ldrh(chr2_reg, Address(str2_reg, limit_reg));
    __ subs(chr1_reg, chr1_reg, chr2_reg);
    if (result_reg != chr1_reg) {
      __ mov(result_reg, chr1_reg, ne);
    }
    __ b(Ldone, ne);

    __ adds(limit_reg, limit_reg, sizeof(jchar));
    __ b(Lloop, ne);

    // If strings are equal up to min length, return the length difference.
    if (result_reg != tmp2_reg) {
      __ mov(result_reg, tmp2_reg);
    }

    // Otherwise, return the difference between the first mismatched chars.
    __ bind(Ldone);
  %}

  enc_class enc_String_Equals(R0RegP str1, R1RegP str2, R2RegI cnt, iRegI result, iRegI tmp1, iRegI tmp2) %{
    Label Lchar, Lchar_loop, Ldone, Lequal;
    C2_MacroAssembler _masm(&cbuf);

    Register   str1_reg = $str1$$Register;
    Register   str2_reg = $str2$$Register;
    Register    cnt_reg = $cnt$$Register; // int
    Register   tmp1_reg = $tmp1$$Register;
    Register   tmp2_reg = $tmp2$$Register;
    Register result_reg = $result$$Register;

    assert_different_registers(str1_reg, str2_reg, cnt_reg, tmp1_reg, tmp2_reg, result_reg);

    __ cmp(str1_reg, str2_reg); //same char[] ?
    __ b(Lequal, eq);

    __ cbz_32(cnt_reg, Lequal); // count == 0

    //rename registers
    Register limit_reg = cnt_reg;
    Register  chr1_reg = tmp1_reg;
    Register  chr2_reg = tmp2_reg;

    __ logical_shift_left(limit_reg, limit_reg, exact_log2(sizeof(jchar)));

    //check for alignment and position the pointers to the ends
    __ orr(chr1_reg, str1_reg, str2_reg);
    __ tst(chr1_reg, 0x3);

    // notZero means at least one not 4-byte aligned.
    // We could optimize the case when both arrays are not aligned
    // but it is not frequent case and it requires additional checks.
    __ b(Lchar, ne);

    // Compare char[] arrays aligned to 4 bytes.
    __ char_arrays_equals(str1_reg, str2_reg, limit_reg, result_reg,
                          chr1_reg, chr2_reg, Ldone);

    __ b(Lequal); // equal

    // char by char compare
    __ bind(Lchar);
    __ mov(result_reg, 0);
    __ add(str1_reg, limit_reg, str1_reg);
    __ add(str2_reg, limit_reg, str2_reg);
    __ neg(limit_reg, limit_reg); //negate count

    // Lchar_loop
    __ bind(Lchar_loop);
    __ ldrh(chr1_reg, Address(str1_reg, limit_reg));
    __ ldrh(chr2_reg, Address(str2_reg, limit_reg));
    __ cmp(chr1_reg, chr2_reg);
    __ b(Ldone, ne);
    __ adds(limit_reg, limit_reg, sizeof(jchar));
    __ b(Lchar_loop, ne);

    __ bind(Lequal);
    __ mov(result_reg, 1);  //equal

    __ bind(Ldone);
  %}

  enc_class enc_Array_Equals(R0RegP ary1, R1RegP ary2, iRegI tmp1, iRegI tmp2, iRegI tmp3, iRegI result) %{
    Label Ldone, Lloop, Lequal;
    C2_MacroAssembler _masm(&cbuf);

    Register   ary1_reg = $ary1$$Register;
    Register   ary2_reg = $ary2$$Register;
    Register   tmp1_reg = $tmp1$$Register;
    Register   tmp2_reg = $tmp2$$Register;
    Register   tmp3_reg = $tmp3$$Register;
    Register result_reg = $result$$Register;

    assert_different_registers(ary1_reg, ary2_reg, tmp1_reg, tmp2_reg, tmp3_reg, result_reg);

    int length_offset  = arrayOopDesc::length_offset_in_bytes();
    int base_offset    = arrayOopDesc::base_offset_in_bytes(T_CHAR);

    // return true if the same array
    __ teq(ary1_reg, ary2_reg);
    __ mov(result_reg, 1, eq);
    __ b(Ldone, eq); // equal

    __ tst(ary1_reg, ary1_reg);
    __ mov(result_reg, 0, eq);
    __ b(Ldone, eq);    // not equal

    __ tst(ary2_reg, ary2_reg);
    __ mov(result_reg, 0, eq);
    __ b(Ldone, eq);    // not equal

    //load the lengths of arrays
    __ ldr_s32(tmp1_reg, Address(ary1_reg, length_offset)); // int
    __ ldr_s32(tmp2_reg, Address(ary2_reg, length_offset)); // int

    // return false if the two arrays are not equal length
    __ teq_32(tmp1_reg, tmp2_reg);
    __ mov(result_reg, 0, ne);
    __ b(Ldone, ne);    // not equal

    __ tst(tmp1_reg, tmp1_reg);
    __ mov(result_reg, 1, eq);
    __ b(Ldone, eq);    // zero-length arrays are equal

    // load array addresses
    __ add(ary1_reg, ary1_reg, base_offset);
    __ add(ary2_reg, ary2_reg, base_offset);

    // renaming registers
    Register chr1_reg  =  tmp3_reg;   // for characters in ary1
    Register chr2_reg  =  tmp2_reg;   // for characters in ary2
    Register limit_reg =  tmp1_reg;   // length

    // set byte count
    __ logical_shift_left_32(limit_reg, limit_reg, exact_log2(sizeof(jchar)));

    // Compare char[] arrays aligned to 4 bytes.
    __ char_arrays_equals(ary1_reg, ary2_reg, limit_reg, result_reg,
                          chr1_reg, chr2_reg, Ldone);
    __ bind(Lequal);
    __ mov(result_reg, 1);  //equal

    __ bind(Ldone);
    %}
%}

//----------FRAME--------------------------------------------------------------
// Definition of frame structure and management information.
//
//  S T A C K   L A Y O U T    Allocators stack-slot number
//                             |   (to get allocators register number
//  G  Owned by    |        |  v    add VMRegImpl::stack0)
//  r   CALLER     |        |
//  o     |        +--------+      pad to even-align allocators stack-slot
//  w     V        |  pad0  |        numbers; owned by CALLER
//  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
//  h     ^        |   in   |  5
//        |        |  args  |  4   Holes in incoming args owned by SELF
//  |     |        |        |  3
//  |     |        +--------+
//  V     |        | old out|      Empty on Intel, window on Sparc
//        |    old |preserve|      Must be even aligned.
//        |     SP-+--------+----> Matcher::_old_SP, 8 (or 16 in LP64)-byte aligned
//        |        |   in   |  3   area for Intel ret address
//     Owned by    |preserve|      Empty on Sparc.
//       SELF      +--------+
//        |        |  pad2  |  2   pad to align old SP
//        |        +--------+  1
//        |        | locks  |  0
//        |        +--------+----> VMRegImpl::stack0, 8 (or 16 in LP64)-byte aligned
//        |        |  pad1  | 11   pad to align new SP
//        |        +--------+
//        |        |        | 10
//        |        | spills |  9   spills
//        V        |        |  8   (pad0 slot for callee)
//      -----------+--------+----> Matcher::_out_arg_limit, unaligned
//        ^        |  out   |  7
//        |        |  args  |  6   Holes in outgoing args owned by CALLEE
//     Owned by    +--------+
//      CALLEE     | new out|  6   Empty on Intel, window on Sparc
//        |    new |preserve|      Must be even-aligned.
//        |     SP-+--------+----> Matcher::_new_SP, even aligned
//        |        |        |
//
// Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
//         known from SELF's arguments and the Java calling convention.
//         Region 6-7 is determined per call site.
// Note 2: If the calling convention leaves holes in the incoming argument
//         area, those holes are owned by SELF.  Holes in the outgoing area
//         are owned by the CALLEE.  Holes should not be necessary in the
//         incoming area, as the Java calling convention is completely under
//         the control of the AD file.  Doubles can be sorted and packed to
//         avoid holes.  Holes in the outgoing arguments may be necessary for
//         varargs C calling conventions.
// Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
//         even aligned with pad0 as needed.
//         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
//         region 6-11 is even aligned; it may be padded out more so that
//         the region from SP to FP meets the minimum stack alignment.

frame %{
  // These two registers define part of the calling convention
  // between compiled code and the interpreter.
  inline_cache_reg(R_Ricklass);          // Inline Cache Register or Method* for I2C

  // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
  cisc_spilling_operand_name(indOffset);

  // Number of stack slots consumed by a Monitor enter
  sync_stack_slots(1 * VMRegImpl::slots_per_word);

  // Compiled code's Frame Pointer
  frame_pointer(R_R13);

  // Stack alignment requirement
  stack_alignment(StackAlignmentInBytes);
  //  LP64: Alignment size in bytes (128-bit -> 16 bytes)
  // !LP64: Alignment size in bytes (64-bit  ->  8 bytes)

  // Number of outgoing stack slots killed above the out_preserve_stack_slots
  // for calls to C.  Supports the var-args backing area for register parms.
  // ADLC doesn't support parsing expressions, so I folded the math by hand.
  varargs_C_out_slots_killed( 0);

  // The after-PROLOG location of the return address.  Location of
  // return address specifies a type (REG or STACK) and a number
  // representing the register number (i.e. - use a register name) or
  // stack slot.
  // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
  // Otherwise, it is above the locks and verification slot and alignment word
  return_addr(STACK - 1*VMRegImpl::slots_per_word +
              align_up((Compile::current()->in_preserve_stack_slots() +
                        Compile::current()->fixed_slots()),
                       stack_alignment_in_slots()));

  // Location of compiled Java return values.  Same as C
  return_value %{
    return c2::return_value(ideal_reg);
  %}

%}

//----------ATTRIBUTES---------------------------------------------------------
//----------Instruction Attributes---------------------------------------------
ins_attrib ins_cost(DEFAULT_COST); // Required cost attribute
ins_attrib ins_size(32);           // Required size attribute (in bits)
ins_attrib ins_short_branch(0);    // Required flag: is this instruction a
                                   // non-matching short branch variant of some
                                                            // long branch?

//----------OPERANDS-----------------------------------------------------------
// Operand definitions must precede instruction definitions for correct parsing
// in the ADLC because operands constitute user defined types which are used in
// instruction definitions.

//----------Simple Operands----------------------------------------------------
// Immediate Operands
// Integer Immediate: 32-bit
operand immI() %{
  match(ConI);

  op_cost(0);
  // formats are generated automatically for constants and base registers
  format %{ %}
  interface(CONST_INTER);
%}

// Integer Immediate: 8-bit unsigned - for VMOV
operand immU8() %{
  predicate(0 <= n->get_int() && (n->get_int() <= 255));
  match(ConI);
  op_cost(0);

  format %{ %}
  interface(CONST_INTER);
%}

// Integer Immediate: 16-bit
operand immI16() %{
  predicate((n->get_int() >> 16) == 0 && VM_Version::supports_movw());
  match(ConI);
  op_cost(0);

  format %{ %}
  interface(CONST_INTER);
%}

// Integer Immediate: offset for half and double word loads and stores
operand immIHD() %{
  predicate(is_memoryHD(n->get_int()));
  match(ConI);
  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

// Integer Immediate: offset for fp loads and stores
operand immIFP() %{
  predicate(is_memoryfp(n->get_int()) && ((n->get_int() & 3) == 0));
  match(ConI);
  op_cost(0);

  format %{ %}
  interface(CONST_INTER);
%}

// Valid scale values for addressing modes and shifts
operand immU5() %{
  predicate(0 <= n->get_int() && (n->get_int() <= 31));
  match(ConI);
  op_cost(0);

  format %{ %}
  interface(CONST_INTER);
%}

// Integer Immediate: 6-bit
operand immU6Big() %{
  predicate(n->get_int() >= 32 && n->get_int() <= 63);
  match(ConI);
  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

// Integer Immediate: 0-bit
operand immI0() %{
  predicate(n->get_int() == 0);
  match(ConI);
  op_cost(0);

  format %{ %}
  interface(CONST_INTER);
%}

// Integer Immediate: the value 1
operand immI_1() %{
  predicate(n->get_int() == 1);
  match(ConI);
  op_cost(0);

  format %{ %}
  interface(CONST_INTER);
%}

// Integer Immediate: the value 2
operand immI_2() %{
  predicate(n->get_int() == 2);
  match(ConI);
  op_cost(0);

  format %{ %}
  interface(CONST_INTER);
%}

// Integer Immediate: the value 3
operand immI_3() %{
  predicate(n->get_int() == 3);
  match(ConI);
  op_cost(0);

  format %{ %}
  interface(CONST_INTER);
%}

// Integer Immediate: the value 4
operand immI_4() %{
  predicate(n->get_int() == 4);
  match(ConI);
  op_cost(0);

  format %{ %}
  interface(CONST_INTER);
%}

// Integer Immediate: the value 8
operand immI_8() %{
  predicate(n->get_int() == 8);
  match(ConI);
  op_cost(0);

  format %{ %}
  interface(CONST_INTER);
%}

// Int Immediate non-negative
operand immU31()
%{
  predicate(n->get_int() >= 0);
  match(ConI);

  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

// Integer Immediate: the values 32-63
operand immI_32_63() %{
  predicate(n->get_int() >= 32 && n->get_int() <= 63);
  match(ConI);
  op_cost(0);

  format %{ %}
  interface(CONST_INTER);
%}

// Immediates for special shifts (sign extend)

// Integer Immediate: the value 16
operand immI_16() %{
  predicate(n->get_int() == 16);
  match(ConI);
  op_cost(0);

  format %{ %}
  interface(CONST_INTER);
%}

// Integer Immediate: the value 24
operand immI_24() %{
  predicate(n->get_int() == 24);
  match(ConI);
  op_cost(0);

  format %{ %}
  interface(CONST_INTER);
%}

// Integer Immediate: the value 255
operand immI_255() %{
  predicate( n->get_int() == 255 );
  match(ConI);
  op_cost(0);

  format %{ %}
  interface(CONST_INTER);
%}

// Integer Immediate: the value 65535
operand immI_65535() %{
  predicate(n->get_int() == 65535);
  match(ConI);
  op_cost(0);

  format %{ %}
  interface(CONST_INTER);
%}

// Integer Immediates for arithmetic instructions

operand aimmI() %{
  predicate(is_aimm(n->get_int()));
  match(ConI);
  op_cost(0);

  format %{ %}
  interface(CONST_INTER);
%}

operand aimmIneg() %{
  predicate(is_aimm(-n->get_int()));
  match(ConI);
  op_cost(0);

  format %{ %}
  interface(CONST_INTER);
%}

operand aimmU31() %{
  predicate((0 <= n->get_int()) && is_aimm(n->get_int()));
  match(ConI);
  op_cost(0);

  format %{ %}
  interface(CONST_INTER);
%}

// Integer Immediates for logical instructions

operand limmI() %{
  predicate(is_limmI(n->get_int()));
  match(ConI);
  op_cost(0);

  format %{ %}
  interface(CONST_INTER);
%}

operand limmIlow8() %{
  predicate(is_limmI_low(n->get_int(), 8));
  match(ConI);
  op_cost(0);

  format %{ %}
  interface(CONST_INTER);
%}

operand limmU31() %{
  predicate(0 <= n->get_int() && is_limmI(n->get_int()));
  match(ConI);
  op_cost(0);

  format %{ %}
  interface(CONST_INTER);
%}

operand limmIn() %{
  predicate(is_limmI(~n->get_int()));
  match(ConI);
  op_cost(0);

  format %{ %}
  interface(CONST_INTER);
%}

// Long Immediate: the value FF
operand immL_FF() %{
  predicate( n->get_long() == 0xFFL );
  match(ConL);
  op_cost(0);

  format %{ %}
  interface(CONST_INTER);
%}

// Long Immediate: the value FFFF
operand immL_FFFF() %{
  predicate( n->get_long() == 0xFFFFL );
  match(ConL);
  op_cost(0);

  format %{ %}
  interface(CONST_INTER);
%}

// Pointer Immediate: 32 or 64-bit
operand immP() %{
  match(ConP);

  op_cost(5);
  // formats are generated automatically for constants and base registers
  format %{ %}
  interface(CONST_INTER);
%}

operand immP0() %{
  predicate(n->get_ptr() == 0);
  match(ConP);
  op_cost(0);

  format %{ %}
  interface(CONST_INTER);
%}

// Pointer Immediate
operand immN()
%{
  match(ConN);

  op_cost(10);
  format %{ %}
  interface(CONST_INTER);
%}

operand immNKlass()
%{
  match(ConNKlass);

  op_cost(10);
  format %{ %}
  interface(CONST_INTER);
%}

// NULL Pointer Immediate
operand immN0()
%{
  predicate(n->get_narrowcon() == 0);
  match(ConN);

  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

operand immL() %{
  match(ConL);
  op_cost(40);
  // formats are generated automatically for constants and base registers
  format %{ %}
  interface(CONST_INTER);
%}

operand immL0() %{
  predicate(n->get_long() == 0L);
  match(ConL);
  op_cost(0);
  // formats are generated automatically for constants and base registers
  format %{ %}
  interface(CONST_INTER);
%}

// Long Immediate: 16-bit
operand immL16() %{
  predicate(n->get_long() >= 0 && n->get_long() < (1<<16)  && VM_Version::supports_movw());
  match(ConL);
  op_cost(0);

  format %{ %}
  interface(CONST_INTER);
%}

// Long Immediate: low 32-bit mask
operand immL_32bits() %{
  predicate(n->get_long() == 0xFFFFFFFFL);
  match(ConL);
  op_cost(0);

  format %{ %}
  interface(CONST_INTER);
%}

// Double Immediate
operand immD() %{
  match(ConD);

  op_cost(40);
  format %{ %}
  interface(CONST_INTER);
%}

// Double Immediate: +0.0d.
operand immD0() %{
  predicate(jlong_cast(n->getd()) == 0);

  match(ConD);
  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

operand imm8D() %{
  predicate(Assembler::double_num(n->getd()).can_be_imm8());
  match(ConD);

  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

// Float Immediate
operand immF() %{
  match(ConF);

  op_cost(20);
  format %{ %}
  interface(CONST_INTER);
%}

// Float Immediate: +0.0f
operand immF0() %{
  predicate(jint_cast(n->getf()) == 0);
  match(ConF);

  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

// Float Immediate: encoded as 8 bits
operand imm8F() %{
  predicate(Assembler::float_num(n->getf()).can_be_imm8());
  match(ConF);

  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

// Integer Register Operands
// Integer Register
operand iRegI() %{
  constraint(ALLOC_IN_RC(int_reg));
  match(RegI);
  match(R0RegI);
  match(R1RegI);
  match(R2RegI);
  match(R3RegI);
  match(R12RegI);

  format %{ %}
  interface(REG_INTER);
%}

// Pointer Register
operand iRegP() %{
  constraint(ALLOC_IN_RC(ptr_reg));
  match(RegP);
  match(R0RegP);
  match(R1RegP);
  match(R2RegP);
  match(RExceptionRegP);
  match(R8RegP);
  match(R9RegP);
--> --------------------

--> maximum size reached

--> --------------------