Ziele Untersuchung
mit Columbo Integrität von
Datenbanken Interaktion und
Portierbarkeit Ergonomie der
Schnittstellen

Angebot Produkte Projekt Beratung

Mittel Analytik Modellierung Sprachen Algebra Logik Hardware Thinking Intellekt

Zusammenhänge Gesellschaft Wirtschaft Branche Firma


products/Sources/formale Sprachen/JAVA/openjdk-20-36_src/src/hotspot/cpu/aarch64/

Quellcode-Bibliothek

^© Kompilation durch diese Firma

[Weder Korrektheit noch Funktionsfähigkeit der Software werden zugesichert.]

Datei: Sprache: Unknown

Columbo aufrufen.ad zum Wurzelverzeichnis wechselnSML {SML[185] C[216] BAT[486]}Datei anzeigen

//
// Copyright (c) 2003, 2022, Oracle and/or its affiliates. All rights reserved.
// Copyright (c) 2014, 2021, Red Hat, Inc. All rights reserved.
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
//
// This code is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License version 2 only, as
// published by the Free Software Foundation.
//
// This code is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
// version 2 for more details (a copy is included in the LICENSE file that
// accompanied this code).
//
// You should have received a copy of the GNU General Public License version
// 2 along with this work; if not, write to the Free Software Foundation,
// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
//
// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
// or visit www.oracle.com if you need additional information or have any
// questions.
//
//

// AArch64 Architecture Description File

//----------REGISTER DEFINITION BLOCK------------------------------------------
// This information is used by the matcher and the register allocator to
// describe individual registers and classes of registers within the target
// architecture.

register %{
//----------Architecture Description Register Definitions----------------------
// General Registers
// "reg_def"  name ( register save type, C convention save type,
//                   ideal register type, encoding );
// Register Save Types:
//
// NS  = No-Save:       The register allocator assumes that these registers
//                      can be used without saving upon entry to the method, &
//                      that they do not need to be saved at call sites.
//
// SOC = Save-On-Call:  The register allocator assumes that these registers
//                      can be used without saving upon entry to the method,
//                      but that they must be saved at call sites.
//
// SOE = Save-On-Entry: The register allocator assumes that these registers
//                      must be saved before using them upon entry to the
//                      method, but they do not need to be saved at call
//                      sites.
//
// AS  = Always-Save:   The register allocator assumes that these registers
//                      must be saved before using them upon entry to the
//                      method, & that they must be saved at call sites.
//
// Ideal Register Type is used to determine how to save & restore a
// register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
// spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
//
// The encoding number is the actual bit-pattern placed into the opcodes.

// We must define the 64 bit int registers in two 32 bit halves, the
// real lower register and a virtual upper half register. upper halves
// are used by the register allocator but are not actually supplied as
// operands to memory ops.
//
// follow the C1 compiler in making registers
//
//   r0-r7,r10-r26 volatile (caller save)
//   r27-r32 system (no save, no allocate)
//   r8-r9 non-allocatable (so we can use them as scratch regs)
//
// as regards Java usage. we don't use any callee save registers
// because this makes it difficult to de-optimise a frame (see comment
// in x86 implementation of Deoptimization::unwind_callee_save_values)
//

// General Registers

reg_def R0      ( SOC, SOC, Op_RegI,  0, r0->as_VMReg()         );
reg_def R0_H    ( SOC, SOC, Op_RegI,  0, r0->as_VMReg()->next() );
reg_def R1      ( SOC, SOC, Op_RegI,  1, r1->as_VMReg()         );
reg_def R1_H    ( SOC, SOC, Op_RegI,  1, r1->as_VMReg()->next() );
reg_def R2      ( SOC, SOC, Op_RegI,  2, r2->as_VMReg()         );
reg_def R2_H    ( SOC, SOC, Op_RegI,  2, r2->as_VMReg()->next() );
reg_def R3      ( SOC, SOC, Op_RegI,  3, r3->as_VMReg()         );
reg_def R3_H    ( SOC, SOC, Op_RegI,  3, r3->as_VMReg()->next() );
reg_def R4      ( SOC, SOC, Op_RegI,  4, r4->as_VMReg()         );
reg_def R4_H    ( SOC, SOC, Op_RegI,  4, r4->as_VMReg()->next() );
reg_def R5      ( SOC, SOC, Op_RegI,  5, r5->as_VMReg()         );
reg_def R5_H    ( SOC, SOC, Op_RegI,  5, r5->as_VMReg()->next() );
reg_def R6      ( SOC, SOC, Op_RegI,  6, r6->as_VMReg()         );
reg_def R6_H    ( SOC, SOC, Op_RegI,  6, r6->as_VMReg()->next() );
reg_def R7      ( SOC, SOC, Op_RegI,  7, r7->as_VMReg()         );
reg_def R7_H    ( SOC, SOC, Op_RegI,  7, r7->as_VMReg()->next() );
reg_def R8      ( NS,  SOC, Op_RegI,  8, r8->as_VMReg()         ); // rscratch1, non-allocatable
reg_def R8_H    ( NS,  SOC, Op_RegI,  8, r8->as_VMReg()->next() );
reg_def R9      ( NS,  SOC, Op_RegI,  9, r9->as_VMReg()         ); // rscratch2, non-allocatable
reg_def R9_H    ( NS,  SOC, Op_RegI,  9, r9->as_VMReg()->next() );
reg_def R10     ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()        );
reg_def R10_H   ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
reg_def R11     ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()        );
reg_def R11_H   ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
reg_def R12     ( SOC, SOC, Op_RegI, 12, r12->as_VMReg()        );
reg_def R12_H   ( SOC, SOC, Op_RegI, 12, r12->as_VMReg()->next());
reg_def R13     ( SOC, SOC, Op_RegI, 13, r13->as_VMReg()        );
reg_def R13_H   ( SOC, SOC, Op_RegI, 13, r13->as_VMReg()->next());
reg_def R14     ( SOC, SOC, Op_RegI, 14, r14->as_VMReg()        );
reg_def R14_H   ( SOC, SOC, Op_RegI, 14, r14->as_VMReg()->next());
reg_def R15     ( SOC, SOC, Op_RegI, 15, r15->as_VMReg()        );
reg_def R15_H   ( SOC, SOC, Op_RegI, 15, r15->as_VMReg()->next());
reg_def R16     ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()        );
reg_def R16_H   ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
reg_def R17     ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()        );
reg_def R17_H   ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
reg_def R18     ( SOC, SOC, Op_RegI, 18, r18_tls->as_VMReg()        );
reg_def R18_H   ( SOC, SOC, Op_RegI, 18, r18_tls->as_VMReg()->next());
reg_def R19     ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()        );
reg_def R19_H   ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()->next());
reg_def R20     ( SOC, SOE, Op_RegI, 20, r20->as_VMReg()        ); // caller esp
reg_def R20_H   ( SOC, SOE, Op_RegI, 20, r20->as_VMReg()->next());
reg_def R21     ( SOC, SOE, Op_RegI, 21, r21->as_VMReg()        );
reg_def R21_H   ( SOC, SOE, Op_RegI, 21, r21->as_VMReg()->next());
reg_def R22     ( SOC, SOE, Op_RegI, 22, r22->as_VMReg()        );
reg_def R22_H   ( SOC, SOE, Op_RegI, 22, r22->as_VMReg()->next());
reg_def R23     ( SOC, SOE, Op_RegI, 23, r23->as_VMReg()        );
reg_def R23_H   ( SOC, SOE, Op_RegI, 23, r23->as_VMReg()->next());
reg_def R24     ( SOC, SOE, Op_RegI, 24, r24->as_VMReg()        );
reg_def R24_H   ( SOC, SOE, Op_RegI, 24, r24->as_VMReg()->next());
reg_def R25     ( SOC, SOE, Op_RegI, 25, r25->as_VMReg()        );
reg_def R25_H   ( SOC, SOE, Op_RegI, 25, r25->as_VMReg()->next());
reg_def R26     ( SOC, SOE, Op_RegI, 26, r26->as_VMReg()        );
reg_def R26_H   ( SOC, SOE, Op_RegI, 26, r26->as_VMReg()->next());
reg_def R27     ( SOC, SOE, Op_RegI, 27, r27->as_VMReg()        ); // heapbase
reg_def R27_H   ( SOC, SOE, Op_RegI, 27, r27->as_VMReg()->next());
reg_def R28     (  NS, SOE, Op_RegI, 28, r28->as_VMReg()        ); // thread
reg_def R28_H   (  NS, SOE, Op_RegI, 28, r28->as_VMReg()->next());
reg_def R29     (  NS,  NS, Op_RegI, 29, r29->as_VMReg()        ); // fp
reg_def R29_H   (  NS,  NS, Op_RegI, 29, r29->as_VMReg()->next());
reg_def R30     (  NS,  NS, Op_RegI, 30, r30->as_VMReg()        ); // lr
reg_def R30_H   (  NS,  NS, Op_RegI, 30, r30->as_VMReg()->next());
reg_def R31     (  NS,  NS, Op_RegI, 31, r31_sp->as_VMReg()     ); // sp
reg_def R31_H   (  NS,  NS, Op_RegI, 31, r31_sp->as_VMReg()->next());

// ----------------------------
// Float/Double/Vector Registers
// ----------------------------

// Double Registers

// The rules of ADL require that double registers be defined in pairs.
// Each pair must be two 32-bit values, but not necessarily a pair of
// single float registers. In each pair, ADLC-assigned register numbers
// must be adjacent, with the lower number even. Finally, when the
// CPU stores such a register pair to memory, the word associated with
// the lower ADLC-assigned number must be stored to the lower address.

// AArch64 has 32 floating-point registers. Each can store a vector of
// single or double precision floating-point values up to 8 * 32
// floats, 4 * 64 bit floats or 2 * 128 bit floats.  We currently only
// use the first float or double element of the vector.

// for Java use float registers v0-v15 are always save on call whereas
// the platform ABI treats v8-v15 as callee save). float registers
// v16-v31 are SOC as per the platform spec

// For SVE vector registers, we simply extend vector register size to 8
// 'logical' slots. This is nominally 256 bits but it actually covers
// all possible 'physical' SVE vector register lengths from 128 ~ 2048
// bits. The 'physical' SVE vector register length is detected during
// startup, so the register allocator is able to identify the correct
// number of bytes needed for an SVE spill/unspill.
// Note that a vector register with 4 slots denotes a 128-bit NEON
// register allowing it to be distinguished from the corresponding SVE
// vector register when the SVE vector length is 128 bits.

  reg_def V0   ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()          );
  reg_def V0_H ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next()  );
  reg_def V0_J ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next(2) );
  reg_def V0_K ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next(3) );

  reg_def V1   ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()          );
  reg_def V1_H ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next()  );
  reg_def V1_J ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next(2) );
  reg_def V1_K ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next(3) );

  reg_def V2   ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()          );
  reg_def V2_H ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next()  );
  reg_def V2_J ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next(2) );
  reg_def V2_K ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next(3) );

  reg_def V3   ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()          );
  reg_def V3_H ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next()  );
  reg_def V3_J ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next(2) );
  reg_def V3_K ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next(3) );

  reg_def V4   ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()          );
  reg_def V4_H ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next()  );
  reg_def V4_J ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next(2) );
  reg_def V4_K ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next(3) );

  reg_def V5   ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()          );
  reg_def V5_H ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next()  );
  reg_def V5_J ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next(2) );
  reg_def V5_K ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next(3) );

  reg_def V6   ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()          );
  reg_def V6_H ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next()  );
  reg_def V6_J ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next(2) );
  reg_def V6_K ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next(3) );

  reg_def V7   ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()          );
  reg_def V7_H ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next()  );
  reg_def V7_J ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next(2) );
  reg_def V7_K ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next(3) );

  reg_def V8   ( SOC, SOE, Op_RegF, 8, v8->as_VMReg()          );
  reg_def V8_H ( SOC, SOE, Op_RegF, 8, v8->as_VMReg()->next()  );
  reg_def V8_J ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next(2) );
  reg_def V8_K ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next(3) );

  reg_def V9   ( SOC, SOE, Op_RegF, 9, v9->as_VMReg()          );
  reg_def V9_H ( SOC, SOE, Op_RegF, 9, v9->as_VMReg()->next()  );
  reg_def V9_J ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next(2) );
  reg_def V9_K ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next(3) );

  reg_def V10   ( SOC, SOE, Op_RegF, 10, v10->as_VMReg()          );
  reg_def V10_H ( SOC, SOE, Op_RegF, 10, v10->as_VMReg()->next()  );
  reg_def V10_J ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(2) );
  reg_def V10_K ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(3) );

  reg_def V11   ( SOC, SOE, Op_RegF, 11, v11->as_VMReg()          );
  reg_def V11_H ( SOC, SOE, Op_RegF, 11, v11->as_VMReg()->next()  );
  reg_def V11_J ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(2) );
  reg_def V11_K ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(3) );

  reg_def V12   ( SOC, SOE, Op_RegF, 12, v12->as_VMReg()          );
  reg_def V12_H ( SOC, SOE, Op_RegF, 12, v12->as_VMReg()->next()  );
  reg_def V12_J ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(2) );
  reg_def V12_K ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(3) );

  reg_def V13   ( SOC, SOE, Op_RegF, 13, v13->as_VMReg()          );
  reg_def V13_H ( SOC, SOE, Op_RegF, 13, v13->as_VMReg()->next()  );
  reg_def V13_J ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(2) );
  reg_def V13_K ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(3) );

  reg_def V14   ( SOC, SOE, Op_RegF, 14, v14->as_VMReg()          );
  reg_def V14_H ( SOC, SOE, Op_RegF, 14, v14->as_VMReg()->next()  );
  reg_def V14_J ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(2) );
  reg_def V14_K ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(3) );

  reg_def V15   ( SOC, SOE, Op_RegF, 15, v15->as_VMReg()          );
  reg_def V15_H ( SOC, SOE, Op_RegF, 15, v15->as_VMReg()->next()  );
  reg_def V15_J ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(2) );
  reg_def V15_K ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(3) );

  reg_def V16   ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()          );
  reg_def V16_H ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next()  );
  reg_def V16_J ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(2) );
  reg_def V16_K ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(3) );

  reg_def V17   ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()          );
  reg_def V17_H ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next()  );
  reg_def V17_J ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(2) );
  reg_def V17_K ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(3) );

  reg_def V18   ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()          );
  reg_def V18_H ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next()  );
  reg_def V18_J ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(2) );
  reg_def V18_K ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(3) );

  reg_def V19   ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()          );
  reg_def V19_H ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next()  );
  reg_def V19_J ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(2) );
  reg_def V19_K ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(3) );

  reg_def V20   ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()          );
  reg_def V20_H ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next()  );
  reg_def V20_J ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(2) );
  reg_def V20_K ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(3) );

  reg_def V21   ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()          );
  reg_def V21_H ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next()  );
  reg_def V21_J ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(2) );
  reg_def V21_K ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(3) );

  reg_def V22   ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()          );
  reg_def V22_H ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next()  );
  reg_def V22_J ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(2) );
  reg_def V22_K ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(3) );

  reg_def V23   ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()          );
  reg_def V23_H ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next()  );
  reg_def V23_J ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(2) );
  reg_def V23_K ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(3) );

  reg_def V24   ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()          );
  reg_def V24_H ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next()  );
  reg_def V24_J ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(2) );
  reg_def V24_K ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(3) );

  reg_def V25   ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()          );
  reg_def V25_H ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next()  );
  reg_def V25_J ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(2) );
  reg_def V25_K ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(3) );

  reg_def V26   ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()          );
  reg_def V26_H ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next()  );
  reg_def V26_J ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(2) );
  reg_def V26_K ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(3) );

  reg_def V27   ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()          );
  reg_def V27_H ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next()  );
  reg_def V27_J ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(2) );
  reg_def V27_K ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(3) );

  reg_def V28   ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()          );
  reg_def V28_H ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next()  );
  reg_def V28_J ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(2) );
  reg_def V28_K ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(3) );

  reg_def V29   ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()          );
  reg_def V29_H ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next()  );
  reg_def V29_J ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(2) );
  reg_def V29_K ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(3) );

  reg_def V30   ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()          );
  reg_def V30_H ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next()  );
  reg_def V30_J ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(2) );
  reg_def V30_K ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(3) );

  reg_def V31   ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()          );
  reg_def V31_H ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next()  );
  reg_def V31_J ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(2) );
  reg_def V31_K ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(3) );

// ----------------------------
// SVE Predicate Registers
// ----------------------------
  reg_def P0 (SOC, SOC, Op_RegVectMask, 0, p0->as_VMReg());
  reg_def P1 (SOC, SOC, Op_RegVectMask, 1, p1->as_VMReg());
  reg_def P2 (SOC, SOC, Op_RegVectMask, 2, p2->as_VMReg());
  reg_def P3 (SOC, SOC, Op_RegVectMask, 3, p3->as_VMReg());
  reg_def P4 (SOC, SOC, Op_RegVectMask, 4, p4->as_VMReg());
  reg_def P5 (SOC, SOC, Op_RegVectMask, 5, p5->as_VMReg());
  reg_def P6 (SOC, SOC, Op_RegVectMask, 6, p6->as_VMReg());
  reg_def P7 (SOC, SOC, Op_RegVectMask, 7, p7->as_VMReg());
  reg_def P8 (SOC, SOC, Op_RegVectMask, 8, p8->as_VMReg());
  reg_def P9 (SOC, SOC, Op_RegVectMask, 9, p9->as_VMReg());
  reg_def P10 (SOC, SOC, Op_RegVectMask, 10, p10->as_VMReg());
  reg_def P11 (SOC, SOC, Op_RegVectMask, 11, p11->as_VMReg());
  reg_def P12 (SOC, SOC, Op_RegVectMask, 12, p12->as_VMReg());
  reg_def P13 (SOC, SOC, Op_RegVectMask, 13, p13->as_VMReg());
  reg_def P14 (SOC, SOC, Op_RegVectMask, 14, p14->as_VMReg());
  reg_def P15 (SOC, SOC, Op_RegVectMask, 15, p15->as_VMReg());

// ----------------------------
// Special Registers
// ----------------------------

// the AArch64 CSPR status flag register is not directly accessible as
// instruction operand. the FPSR status flag register is a system
// register which can be written/read using MSR/MRS but again does not
// appear as an operand (a code identifying the FSPR occurs as an
// immediate value in the instruction).

reg_def RFLAGS(SOC, SOC, 0, 32, VMRegImpl::Bad());

// Specify priority of register selection within phases of register
// allocation.  Highest priority is first.  A useful heuristic is to
// give registers a low priority when they are required by machine
// instructions, like EAX and EDX on I486, and choose no-save registers
// before save-on-call, & save-on-call before save-on-entry.  Registers
// which participate in fixed calling sequences should come last.
// Registers which are used as pairs must fall on an even boundary.

alloc_class chunk0(
    // volatiles
    R10, R10_H,
    R11, R11_H,
    R12, R12_H,
    R13, R13_H,
    R14, R14_H,
    R15, R15_H,
    R16, R16_H,
    R17, R17_H,
    R18, R18_H,

    // arg registers
    R0, R0_H,
    R1, R1_H,
    R2, R2_H,
    R3, R3_H,
    R4, R4_H,
    R5, R5_H,
    R6, R6_H,
    R7, R7_H,

    // non-volatiles
    R19, R19_H,
    R20, R20_H,
    R21, R21_H,
    R22, R22_H,
    R23, R23_H,
    R24, R24_H,
    R25, R25_H,
    R26, R26_H,

    // non-allocatable registers

    R27, R27_H, // heapbase
    R28, R28_H, // thread
    R29, R29_H, // fp
    R30, R30_H, // lr
    R31, R31_H, // sp
    R8, R8_H,   // rscratch1
    R9, R9_H,   // rscratch2
);

alloc_class chunk1(

    // no save
    V16, V16_H, V16_J, V16_K,
    V17, V17_H, V17_J, V17_K,
    V18, V18_H, V18_J, V18_K,
    V19, V19_H, V19_J, V19_K,
    V20, V20_H, V20_J, V20_K,
    V21, V21_H, V21_J, V21_K,
    V22, V22_H, V22_J, V22_K,
    V23, V23_H, V23_J, V23_K,
    V24, V24_H, V24_J, V24_K,
    V25, V25_H, V25_J, V25_K,
    V26, V26_H, V26_J, V26_K,
    V27, V27_H, V27_J, V27_K,
    V28, V28_H, V28_J, V28_K,
    V29, V29_H, V29_J, V29_K,
    V30, V30_H, V30_J, V30_K,
    V31, V31_H, V31_J, V31_K,

    // arg registers
    V0, V0_H, V0_J, V0_K,
    V1, V1_H, V1_J, V1_K,
    V2, V2_H, V2_J, V2_K,
    V3, V3_H, V3_J, V3_K,
    V4, V4_H, V4_J, V4_K,
    V5, V5_H, V5_J, V5_K,
    V6, V6_H, V6_J, V6_K,
    V7, V7_H, V7_J, V7_K,

    // non-volatiles
    V8, V8_H, V8_J, V8_K,
    V9, V9_H, V9_J, V9_K,
    V10, V10_H, V10_J, V10_K,
    V11, V11_H, V11_J, V11_K,
    V12, V12_H, V12_J, V12_K,
    V13, V13_H, V13_J, V13_K,
    V14, V14_H, V14_J, V14_K,
    V15, V15_H, V15_J, V15_K,
);

alloc_class chunk2 (
    // Governing predicates for load/store and arithmetic
    P0,
    P1,
    P2,
    P3,
    P4,
    P5,
    P6,

    // Extra predicates
    P8,
    P9,
    P10,
    P11,
    P12,
    P13,
    P14,
    P15,

    // Preserved for all-true predicate
    P7,
);

alloc_class chunk3(RFLAGS);

//----------Architecture Description Register Classes--------------------------
// Several register classes are automatically defined based upon information in
// this architecture description.
// 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
// 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
//

// Class for all 32 bit general purpose registers
reg_class all_reg32(
    R0,
    R1,
    R2,
    R3,
    R4,
    R5,
    R6,
    R7,
    R10,
    R11,
    R12,
    R13,
    R14,
    R15,
    R16,
    R17,
    R18,
    R19,
    R20,
    R21,
    R22,
    R23,
    R24,
    R25,
    R26,
    R27,
    R28,
    R29,
    R30,
    R31
);

// Class for all 32 bit integer registers (excluding SP which
// will never be used as an integer register)
reg_class any_reg32 %{
  return _ANY_REG32_mask;
%}

// Singleton class for R0 int register
reg_class int_r0_reg(R0);

// Singleton class for R2 int register
reg_class int_r2_reg(R2);

// Singleton class for R3 int register
reg_class int_r3_reg(R3);

// Singleton class for R4 int register
reg_class int_r4_reg(R4);

// Singleton class for R31 int register
reg_class int_r31_reg(R31);

// Class for all 64 bit general purpose registers
reg_class all_reg(
    R0, R0_H,
    R1, R1_H,
    R2, R2_H,
    R3, R3_H,
    R4, R4_H,
    R5, R5_H,
    R6, R6_H,
    R7, R7_H,
    R10, R10_H,
    R11, R11_H,
    R12, R12_H,
    R13, R13_H,
    R14, R14_H,
    R15, R15_H,
    R16, R16_H,
    R17, R17_H,
    R18, R18_H,
    R19, R19_H,
    R20, R20_H,
    R21, R21_H,
    R22, R22_H,
    R23, R23_H,
    R24, R24_H,
    R25, R25_H,
    R26, R26_H,
    R27, R27_H,
    R28, R28_H,
    R29, R29_H,
    R30, R30_H,
    R31, R31_H
);

// Class for all long integer registers (including SP)
reg_class any_reg %{
  return _ANY_REG_mask;
%}

// Class for non-allocatable 32 bit registers
reg_class non_allocatable_reg32(
#ifdef R18_RESERVED
    // See comment in register_aarch64.hpp
    R18,                        // tls on Windows
#endif
    R28,                        // thread
    R30,                        // lr
    R31                         // sp
);

// Class for non-allocatable 64 bit registers
reg_class non_allocatable_reg(
#ifdef R18_RESERVED
    // See comment in register_aarch64.hpp
    R18, R18_H,                 // tls on Windows, platform register on macOS
#endif
    R28, R28_H,                 // thread
    R30, R30_H,                 // lr
    R31, R31_H                  // sp
);

// Class for all non-special integer registers
reg_class no_special_reg32 %{
  return _NO_SPECIAL_REG32_mask;
%}

// Class for all non-special long integer registers
reg_class no_special_reg %{
  return _NO_SPECIAL_REG_mask;
%}

// Class for 64 bit register r0
reg_class r0_reg(
    R0, R0_H
);

// Class for 64 bit register r1
reg_class r1_reg(
    R1, R1_H
);

// Class for 64 bit register r2
reg_class r2_reg(
    R2, R2_H
);

// Class for 64 bit register r3
reg_class r3_reg(
    R3, R3_H
);

// Class for 64 bit register r4
reg_class r4_reg(
    R4, R4_H
);

// Class for 64 bit register r5
reg_class r5_reg(
    R5, R5_H
);

// Class for 64 bit register r10
reg_class r10_reg(
    R10, R10_H
);

// Class for 64 bit register r11
reg_class r11_reg(
    R11, R11_H
);

// Class for method register
reg_class method_reg(
    R12, R12_H
);

// Class for heapbase register
reg_class heapbase_reg(
    R27, R27_H
);

// Class for thread register
reg_class thread_reg(
    R28, R28_H
);

// Class for frame pointer register
reg_class fp_reg(
    R29, R29_H
);

// Class for link register
reg_class lr_reg(
    R30, R30_H
);

// Class for long sp register
reg_class sp_reg(
  R31, R31_H
);

// Class for all pointer registers
reg_class ptr_reg %{
  return _PTR_REG_mask;
%}

// Class for all non_special pointer registers
reg_class no_special_ptr_reg %{
  return _NO_SPECIAL_PTR_REG_mask;
%}

// Class for all float registers
reg_class float_reg(
    V0,
    V1,
    V2,
    V3,
    V4,
    V5,
    V6,
    V7,
    V8,
    V9,
    V10,
    V11,
    V12,
    V13,
    V14,
    V15,
    V16,
    V17,
    V18,
    V19,
    V20,
    V21,
    V22,
    V23,
    V24,
    V25,
    V26,
    V27,
    V28,
    V29,
    V30,
    V31
);

// Double precision float registers have virtual `high halves' that
// are needed by the allocator.
// Class for all double registers
reg_class double_reg(
    V0, V0_H,
    V1, V1_H,
    V2, V2_H,
    V3, V3_H,
    V4, V4_H,
    V5, V5_H,
    V6, V6_H,
    V7, V7_H,
    V8, V8_H,
    V9, V9_H,
    V10, V10_H,
    V11, V11_H,
    V12, V12_H,
    V13, V13_H,
    V14, V14_H,
    V15, V15_H,
    V16, V16_H,
    V17, V17_H,
    V18, V18_H,
    V19, V19_H,
    V20, V20_H,
    V21, V21_H,
    V22, V22_H,
    V23, V23_H,
    V24, V24_H,
    V25, V25_H,
    V26, V26_H,
    V27, V27_H,
    V28, V28_H,
    V29, V29_H,
    V30, V30_H,
    V31, V31_H
);

// Class for all SVE vector registers.
reg_class vectora_reg (
    V0, V0_H, V0_J, V0_K,
    V1, V1_H, V1_J, V1_K,
    V2, V2_H, V2_J, V2_K,
    V3, V3_H, V3_J, V3_K,
    V4, V4_H, V4_J, V4_K,
    V5, V5_H, V5_J, V5_K,
    V6, V6_H, V6_J, V6_K,
    V7, V7_H, V7_J, V7_K,
    V8, V8_H, V8_J, V8_K,
    V9, V9_H, V9_J, V9_K,
    V10, V10_H, V10_J, V10_K,
    V11, V11_H, V11_J, V11_K,
    V12, V12_H, V12_J, V12_K,
    V13, V13_H, V13_J, V13_K,
    V14, V14_H, V14_J, V14_K,
    V15, V15_H, V15_J, V15_K,
    V16, V16_H, V16_J, V16_K,
    V17, V17_H, V17_J, V17_K,
    V18, V18_H, V18_J, V18_K,
    V19, V19_H, V19_J, V19_K,
    V20, V20_H, V20_J, V20_K,
    V21, V21_H, V21_J, V21_K,
    V22, V22_H, V22_J, V22_K,
    V23, V23_H, V23_J, V23_K,
    V24, V24_H, V24_J, V24_K,
    V25, V25_H, V25_J, V25_K,
    V26, V26_H, V26_J, V26_K,
    V27, V27_H, V27_J, V27_K,
    V28, V28_H, V28_J, V28_K,
    V29, V29_H, V29_J, V29_K,
    V30, V30_H, V30_J, V30_K,
    V31, V31_H, V31_J, V31_K,
);

// Class for all 64bit vector registers
reg_class vectord_reg(
    V0, V0_H,
    V1, V1_H,
    V2, V2_H,
    V3, V3_H,
    V4, V4_H,
    V5, V5_H,
    V6, V6_H,
    V7, V7_H,
    V8, V8_H,
    V9, V9_H,
    V10, V10_H,
    V11, V11_H,
    V12, V12_H,
    V13, V13_H,
    V14, V14_H,
    V15, V15_H,
    V16, V16_H,
    V17, V17_H,
    V18, V18_H,
    V19, V19_H,
    V20, V20_H,
    V21, V21_H,
    V22, V22_H,
    V23, V23_H,
    V24, V24_H,
    V25, V25_H,
    V26, V26_H,
    V27, V27_H,
    V28, V28_H,
    V29, V29_H,
    V30, V30_H,
    V31, V31_H
);

// Class for all 128bit vector registers
reg_class vectorx_reg(
    V0, V0_H, V0_J, V0_K,
    V1, V1_H, V1_J, V1_K,
    V2, V2_H, V2_J, V2_K,
    V3, V3_H, V3_J, V3_K,
    V4, V4_H, V4_J, V4_K,
    V5, V5_H, V5_J, V5_K,
    V6, V6_H, V6_J, V6_K,
    V7, V7_H, V7_J, V7_K,
    V8, V8_H, V8_J, V8_K,
    V9, V9_H, V9_J, V9_K,
    V10, V10_H, V10_J, V10_K,
    V11, V11_H, V11_J, V11_K,
    V12, V12_H, V12_J, V12_K,
    V13, V13_H, V13_J, V13_K,
    V14, V14_H, V14_J, V14_K,
    V15, V15_H, V15_J, V15_K,
    V16, V16_H, V16_J, V16_K,
    V17, V17_H, V17_J, V17_K,
    V18, V18_H, V18_J, V18_K,
    V19, V19_H, V19_J, V19_K,
    V20, V20_H, V20_J, V20_K,
    V21, V21_H, V21_J, V21_K,
    V22, V22_H, V22_J, V22_K,
    V23, V23_H, V23_J, V23_K,
    V24, V24_H, V24_J, V24_K,
    V25, V25_H, V25_J, V25_K,
    V26, V26_H, V26_J, V26_K,
    V27, V27_H, V27_J, V27_K,
    V28, V28_H, V28_J, V28_K,
    V29, V29_H, V29_J, V29_K,
    V30, V30_H, V30_J, V30_K,
    V31, V31_H, V31_J, V31_K
);

// Class for 128 bit register v0
reg_class v0_reg(
    V0, V0_H
);

// Class for 128 bit register v1
reg_class v1_reg(
    V1, V1_H
);

// Class for 128 bit register v2
reg_class v2_reg(
    V2, V2_H
);

// Class for 128 bit register v3
reg_class v3_reg(
    V3, V3_H
);

// Class for 128 bit register v4
reg_class v4_reg(
    V4, V4_H
);

// Class for 128 bit register v5
reg_class v5_reg(
    V5, V5_H
);

// Class for 128 bit register v6
reg_class v6_reg(
    V6, V6_H
);

// Class for 128 bit register v7
reg_class v7_reg(
    V7, V7_H
);

// Class for 128 bit register v8
reg_class v8_reg(
    V8, V8_H
);

// Class for 128 bit register v9
reg_class v9_reg(
    V9, V9_H
);

// Class for 128 bit register v10
reg_class v10_reg(
    V10, V10_H
);

// Class for 128 bit register v11
reg_class v11_reg(
    V11, V11_H
);

// Class for 128 bit register v12
reg_class v12_reg(
    V12, V12_H
);

// Class for 128 bit register v13
reg_class v13_reg(
    V13, V13_H
);

// Class for 128 bit register v14
reg_class v14_reg(
    V14, V14_H
);

// Class for 128 bit register v15
reg_class v15_reg(
    V15, V15_H
);

// Class for 128 bit register v16
reg_class v16_reg(
    V16, V16_H
);

// Class for 128 bit register v17
reg_class v17_reg(
    V17, V17_H
);

// Class for 128 bit register v18
reg_class v18_reg(
    V18, V18_H
);

// Class for 128 bit register v19
reg_class v19_reg(
    V19, V19_H
);

// Class for 128 bit register v20
reg_class v20_reg(
    V20, V20_H
);

// Class for 128 bit register v21
reg_class v21_reg(
    V21, V21_H
);

// Class for 128 bit register v22
reg_class v22_reg(
    V22, V22_H
);

// Class for 128 bit register v23
reg_class v23_reg(
    V23, V23_H
);

// Class for 128 bit register v24
reg_class v24_reg(
    V24, V24_H
);

// Class for 128 bit register v25
reg_class v25_reg(
    V25, V25_H
);

// Class for 128 bit register v26
reg_class v26_reg(
    V26, V26_H
);

// Class for 128 bit register v27
reg_class v27_reg(
    V27, V27_H
);

// Class for 128 bit register v28
reg_class v28_reg(
    V28, V28_H
);

// Class for 128 bit register v29
reg_class v29_reg(
    V29, V29_H
);

// Class for 128 bit register v30
reg_class v30_reg(
    V30, V30_H
);

// Class for 128 bit register v31
reg_class v31_reg(
    V31, V31_H
);

// Class for all SVE predicate registers.
reg_class pr_reg (
    P0,
    P1,
    P2,
    P3,
    P4,
    P5,
    P6,
    // P7, non-allocatable, preserved with all elements preset to TRUE.
    P8,
    P9,
    P10,
    P11,
    P12,
    P13,
    P14,
    P15
);

// Class for SVE governing predicate registers, which are used
// to determine the active elements of a predicated instruction.
reg_class gov_pr (
    P0,
    P1,
    P2,
    P3,
    P4,
    P5,
    P6,
    // P7, non-allocatable, preserved with all elements preset to TRUE.
);

reg_class p0_reg(P0);
reg_class p1_reg(P1);

// Singleton class for condition codes
reg_class int_flags(RFLAGS);

%}

//----------DEFINITION BLOCK---------------------------------------------------
// Define name --> value mappings to inform the ADLC of an integer valued name
// Current support includes integer values in the range [0, 0x7FFFFFFF]
// Format:
//        int_def  <name>         ( <int_value>, <expression>);
// Generated Code in ad_<arch>.hpp
//        #define  <name>   (<expression>)
//        // value == <int_value>
// Generated code in ad_<arch>.cpp adlc_verification()
//        assert( <name> == <int_value>, "Expect (<expression>) to equal <int_value>");
//

// we follow the ppc-aix port in using a simple cost model which ranks
// register operations as cheap, memory ops as more expensive and
// branches as most expensive. the first two have a low as well as a
// normal cost. huge cost appears to be a way of saying don't do
// something

definitions %{
  // The default cost (of a register move instruction).
  int_def INSN_COST            (    100,     100);
  int_def BRANCH_COST          (    200,     2 * INSN_COST);
  int_def CALL_COST            (    200,     2 * INSN_COST);
  int_def VOLATILE_REF_COST    (   1000,     10 * INSN_COST);
%}

//----------SOURCE BLOCK-------------------------------------------------------
// This is a block of C++ code which provides values, functions, and
// definitions necessary in the rest of the architecture description

source_hpp %{

#include "asm/macroAssembler.hpp"
#include "gc/shared/barrierSetAssembler.hpp"
#include "gc/shared/cardTable.hpp"
#include "gc/shared/cardTableBarrierSet.hpp"
#include "gc/shared/collectedHeap.hpp"
#include "opto/addnode.hpp"
#include "opto/convertnode.hpp"
#include "runtime/objectMonitor.hpp"

extern RegMask _ANY_REG32_mask;
extern RegMask _ANY_REG_mask;
extern RegMask _PTR_REG_mask;
extern RegMask _NO_SPECIAL_REG32_mask;
extern RegMask _NO_SPECIAL_REG_mask;
extern RegMask _NO_SPECIAL_PTR_REG_mask;

class CallStubImpl {

  //--------------------------------------------------------------
  //---<  Used for optimization in Compile::shorten_branches  >---
  //--------------------------------------------------------------

public:
  // Size of call trampoline stub.
  static uint size_call_trampoline() {
    return 0; // no call trampolines on this platform
  }

  // number of relocations needed by a call trampoline stub
  static uint reloc_call_trampoline() {
    return 0; // no call trampolines on this platform
  }
};

class HandlerImpl {

public:

  static int emit_exception_handler(CodeBuffer &cbuf);
  static int emit_deopt_handler(CodeBuffer& cbuf);

  static uint size_exception_handler() {
    return MacroAssembler::far_codestub_branch_size();
  }

  static uint size_deopt_handler() {
    // count one adr and one far branch instruction
    return NativeInstruction::instruction_size + MacroAssembler::far_codestub_branch_size();
  }
};

class Node::PD {
public:
  enum NodeFlags {
    _last_flag = Node::_last_flag
  };
};

  bool is_CAS(int opcode, bool maybe_volatile);

  // predicates controlling emit of ldr<x>/ldar<x> and associated dmb

  bool unnecessary_acquire(const Node *barrier);
  bool needs_acquiring_load(const Node *load);

  // predicates controlling emit of str<x>/stlr<x> and associated dmbs

  bool unnecessary_release(const Node *barrier);
  bool unnecessary_volatile(const Node *barrier);
  bool needs_releasing_store(const Node *store);

  // predicate controlling translation of CompareAndSwapX
  bool needs_acquiring_load_exclusive(const Node *load);

  // predicate controlling addressing modes
  bool size_fits_all_mem_uses(AddPNode* addp, int shift);
%}

source %{

  // Derived RegMask with conditionally allocatable registers

  void PhaseOutput::pd_perform_mach_node_analysis() {
  }

  int MachNode::pd_alignment_required() const {
    return 1;
  }

  int MachNode::compute_padding(int current_offset) const {
    return 0;
  }

  RegMask _ANY_REG32_mask;
  RegMask _ANY_REG_mask;
  RegMask _PTR_REG_mask;
  RegMask _NO_SPECIAL_REG32_mask;
  RegMask _NO_SPECIAL_REG_mask;
  RegMask _NO_SPECIAL_PTR_REG_mask;

  void reg_mask_init() {
    // We derive below RegMask(s) from the ones which are auto-generated from
    // adlc register classes to make AArch64 rheapbase (r27) and rfp (r29)
    // registers conditionally reserved.

    _ANY_REG32_mask = _ALL_REG32_mask;
    _ANY_REG32_mask.Remove(OptoReg::as_OptoReg(r31_sp->as_VMReg()));

    _ANY_REG_mask = _ALL_REG_mask;

    _PTR_REG_mask = _ALL_REG_mask;

    _NO_SPECIAL_REG32_mask = _ALL_REG32_mask;
    _NO_SPECIAL_REG32_mask.SUBTRACT(_NON_ALLOCATABLE_REG32_mask);

    _NO_SPECIAL_REG_mask = _ALL_REG_mask;
    _NO_SPECIAL_REG_mask.SUBTRACT(_NON_ALLOCATABLE_REG_mask);

    _NO_SPECIAL_PTR_REG_mask = _ALL_REG_mask;
    _NO_SPECIAL_PTR_REG_mask.SUBTRACT(_NON_ALLOCATABLE_REG_mask);

    // r27 is not allocatable when compressed oops is on and heapbase is not
    // zero, compressed klass pointers doesn't use r27 after JDK-8234794
    if (UseCompressedOops && (CompressedOops::ptrs_base() != NULL)) {
      _NO_SPECIAL_REG32_mask.Remove(OptoReg::as_OptoReg(r27->as_VMReg()));
      _NO_SPECIAL_REG_mask.SUBTRACT(_HEAPBASE_REG_mask);
      _NO_SPECIAL_PTR_REG_mask.SUBTRACT(_HEAPBASE_REG_mask);
    }

    // r29 is not allocatable when PreserveFramePointer is on
    if (PreserveFramePointer) {
      _NO_SPECIAL_REG32_mask.Remove(OptoReg::as_OptoReg(r29->as_VMReg()));
      _NO_SPECIAL_REG_mask.SUBTRACT(_FP_REG_mask);
      _NO_SPECIAL_PTR_REG_mask.SUBTRACT(_FP_REG_mask);
    }
  }

  // Optimizaton of volatile gets and puts
  // -------------------------------------
  //
  // AArch64 has ldar<x> and stlr<x> instructions which we can safely
  // use to implement volatile reads and writes. For a volatile read
  // we simply need
  //
  //   ldar<x>
  //
  // and for a volatile write we need
  //
  //   stlr<x>
  //
  // Alternatively, we can implement them by pairing a normal
  // load/store with a memory barrier. For a volatile read we need
  //
  //   ldr<x>
  //   dmb ishld
  //
  // for a volatile write
  //
  //   dmb ish
  //   str<x>
  //   dmb ish
  //
  // We can also use ldaxr and stlxr to implement compare and swap CAS
  // sequences. These are normally translated to an instruction
  // sequence like the following
  //
  //   dmb      ish
  // retry:
  //   ldxr<x>   rval raddr
  //   cmp       rval rold
  //   b.ne done
  //   stlxr<x>  rval, rnew, rold
  //   cbnz      rval retry
  // done:
  //   cset      r0, eq
  //   dmb ishld
  //
  // Note that the exclusive store is already using an stlxr
  // instruction. That is required to ensure visibility to other
  // threads of the exclusive write (assuming it succeeds) before that
  // of any subsequent writes.
  //
  // The following instruction sequence is an improvement on the above
  //
  // retry:
  //   ldaxr<x>  rval raddr
  //   cmp       rval rold
  //   b.ne done
  //   stlxr<x>  rval, rnew, rold
  //   cbnz      rval retry
  // done:
  //   cset      r0, eq
  //
  // We don't need the leading dmb ish since the stlxr guarantees
  // visibility of prior writes in the case that the swap is
  // successful. Crucially we don't have to worry about the case where
  // the swap is not successful since no valid program should be
  // relying on visibility of prior changes by the attempting thread
  // in the case where the CAS fails.
  //
  // Similarly, we don't need the trailing dmb ishld if we substitute
  // an ldaxr instruction since that will provide all the guarantees we
  // require regarding observation of changes made by other threads
  // before any change to the CAS address observed by the load.
  //
  // In order to generate the desired instruction sequence we need to
  // be able to identify specific 'signature' ideal graph node
  // sequences which i) occur as a translation of a volatile reads or
  // writes or CAS operations and ii) do not occur through any other
  // translation or graph transformation. We can then provide
  // alternative aldc matching rules which translate these node
  // sequences to the desired machine code sequences. Selection of the
  // alternative rules can be implemented by predicates which identify
  // the relevant node sequences.
  //
  // The ideal graph generator translates a volatile read to the node
  // sequence
  //
  //   LoadX[mo_acquire]
  //   MemBarAcquire
  //
  // As a special case when using the compressed oops optimization we
  // may also see this variant
  //
  //   LoadN[mo_acquire]
  //   DecodeN
  //   MemBarAcquire
  //
  // A volatile write is translated to the node sequence
  //
  //   MemBarRelease
  //   StoreX[mo_release] {CardMark}-optional
  //   MemBarVolatile
  //
  // n.b. the above node patterns are generated with a strict
  // 'signature' configuration of input and output dependencies (see
  // the predicates below for exact details). The card mark may be as
  // simple as a few extra nodes or, in a few GC configurations, may
  // include more complex control flow between the leading and
  // trailing memory barriers. However, whatever the card mark
  // configuration these signatures are unique to translated volatile
  // reads/stores -- they will not appear as a result of any other
  // bytecode translation or inlining nor as a consequence of
  // optimizing transforms.
  //
  // We also want to catch inlined unsafe volatile gets and puts and
  // be able to implement them using either ldar<x>/stlr<x> or some
  // combination of ldr<x>/stlr<x> and dmb instructions.
  //
  // Inlined unsafe volatiles puts manifest as a minor variant of the
  // normal volatile put node sequence containing an extra cpuorder
  // membar
  //
  //   MemBarRelease
  //   MemBarCPUOrder
  //   StoreX[mo_release] {CardMark}-optional
  //   MemBarCPUOrder
  //   MemBarVolatile
  //
  // n.b. as an aside, a cpuorder membar is not itself subject to
  // matching and translation by adlc rules.  However, the rule
  // predicates need to detect its presence in order to correctly
  // select the desired adlc rules.
  //
  // Inlined unsafe volatile gets manifest as a slightly different
  // node sequence to a normal volatile get because of the
  // introduction of some CPUOrder memory barriers to bracket the
  // Load. However, but the same basic skeleton of a LoadX feeding a
  // MemBarAcquire, possibly through an optional DecodeN, is still
  // present
  //
  //   MemBarCPUOrder
  //        ||       \\
  //   MemBarCPUOrder LoadX[mo_acquire]
  //        ||            |
  //        ||       {DecodeN} optional
  //        ||       /
  //     MemBarAcquire
  //
  // In this case the acquire membar does not directly depend on the
  // load. However, we can be sure that the load is generated from an
  // inlined unsafe volatile get if we see it dependent on this unique
  // sequence of membar nodes. Similarly, given an acquire membar we
  // can know that it was added because of an inlined unsafe volatile
  // get if it is fed and feeds a cpuorder membar and if its feed
  // membar also feeds an acquiring load.
  //
  // Finally an inlined (Unsafe) CAS operation is translated to the
  // following ideal graph
  //
  //   MemBarRelease
  //   MemBarCPUOrder
  //   CompareAndSwapX {CardMark}-optional
  //   MemBarCPUOrder
  //   MemBarAcquire
  //
  // So, where we can identify these volatile read and write
  // signatures we can choose to plant either of the above two code
  // sequences. For a volatile read we can simply plant a normal
  // ldr<x> and translate the MemBarAcquire to a dmb. However, we can
  // also choose to inhibit translation of the MemBarAcquire and
  // inhibit planting of the ldr<x>, instead planting an ldar<x>.
  //
  // When we recognise a volatile store signature we can choose to
  // plant at a dmb ish as a translation for the MemBarRelease, a
  // normal str<x> and then a dmb ish for the MemBarVolatile.
  // Alternatively, we can inhibit translation of the MemBarRelease
  // and MemBarVolatile and instead plant a simple stlr<x>
  // instruction.
  //
  // when we recognise a CAS signature we can choose to plant a dmb
  // ish as a translation for the MemBarRelease, the conventional
  // macro-instruction sequence for the CompareAndSwap node (which
  // uses ldxr<x>) and then a dmb ishld for the MemBarAcquire.
  // Alternatively, we can elide generation of the dmb instructions
  // and plant the alternative CompareAndSwap macro-instruction
  // sequence (which uses ldaxr<x>).
  //
  // Of course, the above only applies when we see these signature
  // configurations. We still want to plant dmb instructions in any
  // other cases where we may see a MemBarAcquire, MemBarRelease or
  // MemBarVolatile. For example, at the end of a constructor which
  // writes final/volatile fields we will see a MemBarRelease
  // instruction and this needs a 'dmb ish' lest we risk the
  // constructed object being visible without making the
  // final/volatile field writes visible.
  //
  // n.b. the translation rules below which rely on detection of the
  // volatile signatures and insert ldar<x> or stlr<x> are failsafe.
  // If we see anything other than the signature configurations we
  // always just translate the loads and stores to ldr<x> and str<x>
  // and translate acquire, release and volatile membars to the
  // relevant dmb instructions.
  //

  // is_CAS(int opcode, bool maybe_volatile)
  //
  // return true if opcode is one of the possible CompareAndSwapX
  // values otherwise false.

  bool is_CAS(int opcode, bool maybe_volatile)
  {
    switch(opcode) {
      // We handle these
    case Op_CompareAndSwapI:
    case Op_CompareAndSwapL:
    case Op_CompareAndSwapP:
    case Op_CompareAndSwapN:
    case Op_ShenandoahCompareAndSwapP:
    case Op_ShenandoahCompareAndSwapN:
    case Op_CompareAndSwapB:
    case Op_CompareAndSwapS:
    case Op_GetAndSetI:
    case Op_GetAndSetL:
    case Op_GetAndSetP:
    case Op_GetAndSetN:
    case Op_GetAndAddI:
    case Op_GetAndAddL:
      return true;
    case Op_CompareAndExchangeI:
    case Op_CompareAndExchangeN:
    case Op_CompareAndExchangeB:
    case Op_CompareAndExchangeS:
    case Op_CompareAndExchangeL:
    case Op_CompareAndExchangeP:
    case Op_WeakCompareAndSwapB:
    case Op_WeakCompareAndSwapS:
    case Op_WeakCompareAndSwapI:
    case Op_WeakCompareAndSwapL:
    case Op_WeakCompareAndSwapP:
    case Op_WeakCompareAndSwapN:
    case Op_ShenandoahWeakCompareAndSwapP:
    case Op_ShenandoahWeakCompareAndSwapN:
    case Op_ShenandoahCompareAndExchangeP:
    case Op_ShenandoahCompareAndExchangeN:
      return maybe_volatile;
    default:
      return false;
    }
  }

  // helper to determine the maximum number of Phi nodes we may need to
  // traverse when searching from a card mark membar for the merge mem
  // feeding a trailing membar or vice versa

// predicates controlling emit of ldr<x>/ldar<x>

bool unnecessary_acquire(const Node *barrier)
{
  assert(barrier->is_MemBar(), "expecting a membar");

  MemBarNode* mb = barrier->as_MemBar();

  if (mb->trailing_load()) {
    return true;
  }

  if (mb->trailing_load_store()) {
    Node* load_store = mb->in(MemBarNode::Precedent);
    assert(load_store->is_LoadStore(), "unexpected graph shape");
    return is_CAS(load_store->Opcode(), true);
  }

  return false;
}

bool needs_acquiring_load(const Node *n)
{
  assert(n->is_Load(), "expecting a load");
  LoadNode *ld = n->as_Load();
  return ld->is_acquire();
}

bool unnecessary_release(const Node *n)
{
  assert((n->is_MemBar() &&
          n->Opcode() == Op_MemBarRelease),
         "expecting a release membar");

  MemBarNode *barrier = n->as_MemBar();
  if (!barrier->leading()) {
    return false;
  } else {
    Node* trailing = barrier->trailing_membar();
    MemBarNode* trailing_mb = trailing->as_MemBar();
    assert(trailing_mb->trailing(), "Not a trailing membar?");
    assert(trailing_mb->leading_membar() == n, "inconsistent leading/trailing membars");

    Node* mem = trailing_mb->in(MemBarNode::Precedent);
    if (mem->is_Store()) {
      assert(mem->as_Store()->is_release(), "");
      assert(trailing_mb->Opcode() == Op_MemBarVolatile, "");
      return true;
    } else {
      assert(mem->is_LoadStore(), "");
      assert(trailing_mb->Opcode() == Op_MemBarAcquire, "");
      return is_CAS(mem->Opcode(), true);
    }
  }
  return false;
}

bool unnecessary_volatile(const Node *n)
{
  // assert n->is_MemBar();
  MemBarNode *mbvol = n->as_MemBar();

  bool release = mbvol->trailing_store();
  assert(!release || (mbvol->in(MemBarNode::Precedent)->is_Store() && mbvol->in(MemBarNode::Precedent)->as_Store()->is_release()), "");
#ifdef ASSERT
  if (release) {
    Node* leading = mbvol->leading_membar();
    assert(leading->Opcode() == Op_MemBarRelease, "");
    assert(leading->as_MemBar()->leading_store(), "");
    assert(leading->as_MemBar()->trailing_membar() == mbvol, "");
  }
#endif

  return release;
}

// predicates controlling emit of str<x>/stlr<x>

bool needs_releasing_store(const Node *n)
{
  // assert n->is_Store();
  StoreNode *st = n->as_Store();
  return st->trailing_membar() != NULL;
}

// predicate controlling translation of CAS
//
// returns true if CAS needs to use an acquiring load otherwise false

bool needs_acquiring_load_exclusive(const Node *n)
{
  assert(is_CAS(n->Opcode(), true), "expecting a compare and swap");
  LoadStoreNode* ldst = n->as_LoadStore();
  if (is_CAS(n->Opcode(), false)) {
    assert(ldst->trailing_membar() != NULL, "expected trailing membar");
  } else {
    return ldst->trailing_membar() != NULL;
  }

  // so we can just return true here
  return true;
}

#define __ _masm.

// advance declarations for helper functions to convert register
// indices to register objects

// the ad file has to provide implementations of certain methods
// expected by the generic code
//
// REQUIRED FUNCTIONALITY

//=============================================================================

// !!!!! Special hack to get all types of calls to specify the byte offset
//       from the start of the call to the point where the return address
//       will point.

int MachCallStaticJavaNode::ret_addr_offset()
{
  // call should be a simple bl
  int off = 4;
  return off;
}

int MachCallDynamicJavaNode::ret_addr_offset()
{
  return 16; // movz, movk, movk, bl
}

int MachCallRuntimeNode::ret_addr_offset() {
  // for generated stubs the call will be
  //   bl(addr)
  // or with far branches
  //   bl(trampoline_stub)
  // for real runtime callouts it will be six instructions
  // see aarch64_enc_java_to_runtime
  //   adr(rscratch2, retaddr)
  //   lea(rscratch1, RuntimeAddress(addr)
  //   stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)))
  //   blr(rscratch1)
  CodeBlob *cb = CodeCache::find_blob(_entry_point);
  if (cb) {
    return 1 * NativeInstruction::instruction_size;
  } else {
    return 6 * NativeInstruction::instruction_size;
  }
}

//=============================================================================

#ifndef PRODUCT
void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
  st->print("BREAKPOINT");
}
#endif

void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  C2_MacroAssembler _masm(&cbuf);
  __ brk(0);
}

uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
  return MachNode::size(ra_);
}

//=============================================================================

#ifndef PRODUCT
  void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
    st->print("nop \t# %d bytes pad for loops and calls", _count);
  }
#endif

  void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
    C2_MacroAssembler _masm(&cbuf);
    for (int i = 0; i < _count; i++) {
      __ nop();
    }
  }

  uint MachNopNode::size(PhaseRegAlloc*) const {
    return _count * NativeInstruction::instruction_size;
  }

//=============================================================================
const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;

int ConstantTable::calculate_table_base_offset() const {
  return 0;  // absolute addressing, no offset
}

bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
  ShouldNotReachHere();
}

void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
  // Empty encoding
}

uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
  return 0;
}

#ifndef PRODUCT
void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
  st->print("-- \t// MachConstantBaseNode (empty encoding)");
}
#endif

#ifndef PRODUCT
void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
  Compile* C = ra_->C;

  int framesize = C->output()->frame_slots() << LogBytesPerInt;

  if (C->output()->need_stack_bang(framesize))
    st->print("# stack bang size=%d\n\t", framesize);

  if (VM_Version::use_rop_protection()) {
    st->print("ldr zr, [lr]\n\t");
    st->print("pacia lr, rfp\n\t");
  }
  if (framesize < ((1 << 9) + 2 * wordSize)) {
    st->print("sub sp, sp, #%d\n\t", framesize);
    st->print("stp rfp, lr, [sp, #%d]", framesize - 2 * wordSize);
    if (PreserveFramePointer) st->print("\n\tadd rfp, sp, #%d", framesize - 2 * wordSize);
  } else {
    st->print("stp lr, rfp, [sp, #%d]!\n\t", -(2 * wordSize));
    if (PreserveFramePointer) st->print("mov rfp, sp\n\t");
    st->print("mov rscratch1, #%d\n\t", framesize - 2 * wordSize);
    st->print("sub sp, sp, rscratch1");
  }
  if (C->stub_function() == NULL && BarrierSet::barrier_set()->barrier_set_nmethod() != NULL) {
    st->print("\n\t");
    st->print("ldr rscratch1, [guard]\n\t");
    st->print("dmb ishld\n\t");
    st->print("ldr rscratch2, [rthread, #thread_disarmed_offset]\n\t");
    st->print("cmp rscratch1, rscratch2\n\t");
    st->print("b.eq skip");
    st->print("\n\t");
    st->print("blr #nmethod_entry_barrier_stub\n\t");
    st->print("b skip\n\t");
    st->print("guard: int\n\t");
    st->print("\n\t");
    st->print("skip:\n\t");
  }
}
#endif

void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  Compile* C = ra_->C;
  C2_MacroAssembler _masm(&cbuf);

  // n.b. frame size includes space for return pc and rfp
  const int framesize = C->output()->frame_size_in_bytes();

  // insert a nop at the start of the prolog so we can patch in a
  // branch if we need to invalidate the method later
  __ nop();

  if (C->clinit_barrier_on_entry()) {
    assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");

    Label L_skip_barrier;

    __ mov_metadata(rscratch2, C->method()->holder()->constant_encoding());
    __ clinit_barrier(rscratch2, rscratch1, &L_skip_barrier);
    __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
    __ bind(L_skip_barrier);
  }

  if (C->max_vector_size() > 0) {
    __ reinitialize_ptrue();
  }

  int bangsize = C->output()->bang_size_in_bytes();
  if (C->output()->need_stack_bang(bangsize))
    __ generate_stack_overflow_check(bangsize);

  __ build_frame(framesize);

  if (C->stub_function() == NULL) {
    BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
    if (BarrierSet::barrier_set()->barrier_set_nmethod() != NULL) {
      // Dummy labels for just measuring the code size
      Label dummy_slow_path;
      Label dummy_continuation;
      Label dummy_guard;
      Label* slow_path = &dummy_slow_path;
      Label* continuation = &dummy_continuation;
      Label* guard = &dummy_guard;
      if (!Compile::current()->output()->in_scratch_emit_size()) {
        // Use real labels from actual stub when not emitting code for the purpose of measuring its size
        C2EntryBarrierStub* stub = Compile::current()->output()->entry_barrier_table()->add_entry_barrier();
        slow_path = &stub->slow_path();
        continuation = &stub->continuation();
        guard = &stub->guard();
      }
      // In the C2 code, we move the non-hot part of nmethod entry barriers out-of-line to a stub.
      bs->nmethod_entry_barrier(&_masm, slow_path, continuation, guard);
    }
  }

  if (VerifyStackAtCalls) {
    Unimplemented();
  }

  C->output()->set_frame_complete(cbuf.insts_size());

  if (C->has_mach_constant_base_node()) {
    // NOTE: We set the table base offset here because users might be
    // emitted before MachConstantBaseNode.
    ConstantTable& constant_table = C->output()->constant_table();
    constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
  }
}

uint MachPrologNode::size(PhaseRegAlloc* ra_) const
{
  return MachNode::size(ra_); // too many variables; just compute it
                              // the hard way
}

int MachPrologNode::reloc() const
{
  return 0;
}

//=============================================================================

#ifndef PRODUCT
void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
  Compile* C = ra_->C;
  int framesize = C->output()->frame_slots() << LogBytesPerInt;

  st->print("# pop frame %d\n\t",framesize);

  if (framesize == 0) {
    st->print("ldp lr, rfp, [sp],#%d\n\t", (2 * wordSize));
  } else if (framesize < ((1 << 9) + 2 * wordSize)) {
    st->print("ldp lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
    st->print("add sp, sp, #%d\n\t", framesize);
  } else {
    st->print("mov rscratch1, #%d\n\t", framesize - 2 * wordSize);
    st->print("add sp, sp, rscratch1\n\t");
    st->print("ldp lr, rfp, [sp],#%d\n\t", (2 * wordSize));
  }
  if (VM_Version::use_rop_protection()) {
    st->print("autia lr, rfp\n\t");
    st->print("ldr zr, [lr]\n\t");
  }

  if (do_polling() && C->is_method_compilation()) {
    st->print("# test polling word\n\t");
    st->print("ldr rscratch1, [rthread],#%d\n\t", in_bytes(JavaThread::polling_word_offset()));
    st->print("cmp sp, rscratch1\n\t");
    st->print("bhi #slow_path");
  }
}
#endif

void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  Compile* C = ra_->C;
  C2_MacroAssembler _masm(&cbuf);
  int framesize = C->output()->frame_slots() << LogBytesPerInt;

  __ remove_frame(framesize);

  if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
    __ reserved_stack_check();
  }

  if (do_polling() && C->is_method_compilation()) {
    Label dummy_label;
    Label* code_stub = &dummy_label;
    if (!C->output()->in_scratch_emit_size()) {
      code_stub = &C->output()->safepoint_poll_table()->add_safepoint(__ offset());
    }
    __ relocate(relocInfo::poll_return_type);
    __ safepoint_poll(*code_stub, true /* at_return */, false /* acquire */, true /* in_nmethod */);
  }
}

uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
  // Variable size. Determine dynamically.
  return MachNode::size(ra_);
}

int MachEpilogNode::reloc() const {
  // Return number of relocatable values contained in this instruction.
  return 1; // 1 for polling page.
}

const Pipeline * MachEpilogNode::pipeline() const {
  return MachNode::pipeline_class();
}

//=============================================================================

// Figure out which register class each belongs in: rc_int, rc_float or
// rc_stack.
enum RC { rc_bad, rc_int, rc_float, rc_predicate, rc_stack };

static enum RC rc_class(OptoReg::Name reg) {

  if (reg == OptoReg::Bad) {
    return rc_bad;
  }

  // we have 32 int registers * 2 halves
  int slots_of_int_registers = Register::number_of_registers * Register::max_slots_per_register;

  if (reg < slots_of_int_registers) {
    return rc_int;
  }

  // we have 32 float register * 8 halves
  int slots_of_float_registers = FloatRegister::number_of_registers * FloatRegister::max_slots_per_register;
  if (reg < slots_of_int_registers + slots_of_float_registers) {
    return rc_float;
  }

  int slots_of_predicate_registers = PRegister::number_of_registers * PRegister::max_slots_per_register;
  if (reg < slots_of_int_registers + slots_of_float_registers + slots_of_predicate_registers) {
    return rc_predicate;
  }

  // Between predicate regs & stack is the flags.
  assert(OptoReg::is_stack(reg), "blow up if spilling flags");

  return rc_stack;
}

uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream *st) const {
  Compile* C = ra_->C;

  // Get registers to move.
  OptoReg::Name src_hi = ra_->get_reg_second(in(1));
  OptoReg::Name src_lo = ra_->get_reg_first(in(1));
  OptoReg::Name dst_hi = ra_->get_reg_second(this);
  OptoReg::Name dst_lo = ra_->get_reg_first(this);

  enum RC src_hi_rc = rc_class(src_hi);
  enum RC src_lo_rc = rc_class(src_lo);
  enum RC dst_hi_rc = rc_class(dst_hi);
  enum RC dst_lo_rc = rc_class(dst_lo);

  assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register");

  if (src_hi != OptoReg::Bad && !bottom_type()->isa_vectmask()) {
    assert((src_lo&1)==0 && src_lo+1==src_hi &&
           (dst_lo&1)==0 && dst_lo+1==dst_hi,
           "expected aligned-adjacent pairs");
  }

  if (src_lo == dst_lo && src_hi == dst_hi) {
    return 0;            // Self copy, no move.
  }

  bool is64 = (src_lo & 1) == 0 && src_lo + 1 == src_hi &&
              (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi;
  int src_offset = ra_->reg2offset(src_lo);
  int dst_offset = ra_->reg2offset(dst_lo);

  if (bottom_type()->isa_vect() && !bottom_type()->isa_vectmask()) {
    uint ireg = ideal_reg();
    if (ireg == Op_VecA && cbuf) {
      C2_MacroAssembler _masm(cbuf);
      int sve_vector_reg_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE);
      if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
        // stack->stack
        __ spill_copy_sve_vector_stack_to_stack(src_offset, dst_offset,
                                                sve_vector_reg_size_in_bytes);
      } else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) {
        __ spill_sve_vector(as_FloatRegister(Matcher::_regEncode[src_lo]), ra_->reg2offset(dst_lo),
                            sve_vector_reg_size_in_bytes);
      } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_float) {
        __ unspill_sve_vector(as_FloatRegister(Matcher::_regEncode[dst_lo]), ra_->reg2offset(src_lo),
                              sve_vector_reg_size_in_bytes);
      } else if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
        __ sve_orr(as_FloatRegister(Matcher::_regEncode[dst_lo]),
                   as_FloatRegister(Matcher::_regEncode[src_lo]),
                   as_FloatRegister(Matcher::_regEncode[src_lo]));
      } else {
        ShouldNotReachHere();
      }
    } else if (cbuf) {
      assert(ireg == Op_VecD || ireg == Op_VecX, "must be 64 bit or 128 bit vector");
      C2_MacroAssembler _masm(cbuf);
      assert((src_lo_rc != rc_int && dst_lo_rc != rc_int), "sanity");
      if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
        // stack->stack
        assert((src_offset & 7) == 0 && (dst_offset & 7) == 0, "unaligned stack offset");
        if (ireg == Op_VecD) {
          __ unspill(rscratch1, true, src_offset);
          __ spill(rscratch1, true, dst_offset);
        } else {
          __ spill_copy128(src_offset, dst_offset);
        }
      } else if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
        __ mov(as_FloatRegister(Matcher::_regEncode[dst_lo]),
               ireg == Op_VecD ? __ T8B : __ T16B,
               as_FloatRegister(Matcher::_regEncode[src_lo]));
      } else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) {
        __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
                 ireg == Op_VecD ? __ D : __ Q,
                 ra_->reg2offset(dst_lo));
      } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_float) {
        __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
                   ireg == Op_VecD ? __ D : __ Q,
                   ra_->reg2offset(src_lo));
      } else {
        ShouldNotReachHere();
      }
    }
  } else if (cbuf) {
    C2_MacroAssembler _masm(cbuf);
    switch (src_lo_rc) {
    case rc_int:
      if (dst_lo_rc == rc_int) {  // gpr --> gpr copy
        if (is64) {
            __ mov(as_Register(Matcher::_regEncode[dst_lo]),
                   as_Register(Matcher::_regEncode[src_lo]));
        } else {
            C2_MacroAssembler _masm(cbuf);
            __ movw(as_Register(Matcher::_regEncode[dst_lo]),
                    as_Register(Matcher::_regEncode[src_lo]));
        }
      } else if (dst_lo_rc == rc_float) { // gpr --> fpr copy
        if (is64) {
            __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
                     as_Register(Matcher::_regEncode[src_lo]));
        } else {
            __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
--> --------------------

--> maximum size reached

--> --------------------