Ziele Untersuchung
mit Columbo Integrität von
Datenbanken Interaktion und
Portierbarkeit Ergonomie der
Schnittstellen

Angebot Produkte Projekt Beratung

Mittel Analytik Modellierung Sprachen Algebra Logik Hardware Denken Kreativität

Zusammenhänge Gesellschaft Wirtschaft Branche Firma


products/Sources/formale Sprachen/Java/Openjdk/src/hotspot/cpu/riscv/ (Sun/Oracle ^©) Datei vom 13.11.2022 mit Größe 292 kB

Quelle riscv.ad Sprache: unbekannt

Spracherkennung für: .ad vermutete Sprache: Unknown {[0] [0] [0]} [Methode: Schwerpunktbildung, einfache Gewichte, sechs Dimensionen]

//
// Copyright (c) 2003, 2022, Oracle and/or its affiliates. All rights reserved.
// Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
// Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
//
// This code is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License version 2 only, as
// published by the Free Software Foundation.
//
// This code is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
// version 2 for more details (a copy is included in the LICENSE file that
// accompanied this code).
//
// You should have received a copy of the GNU General Public License version
// 2 along with this work; if not, write to the Free Software Foundation,
// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
//
// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
// or visit www.oracle.com if you need additional information or have any
// questions.
//
//

// RISCV Architecture Description File

//----------REGISTER DEFINITION BLOCK------------------------------------------
// This information is used by the matcher and the register allocator to
// describe individual registers and classes of registers within the target
// architecture.

register %{
//----------Architecture Description Register Definitions----------------------
// General Registers
// "reg_def"  name ( register save type, C convention save type,
//                   ideal register type, encoding );
// Register Save Types:
//
// NS  = No-Save:       The register allocator assumes that these registers
//                      can be used without saving upon entry to the method, &
//                      that they do not need to be saved at call sites.
//
// SOC = Save-On-Call:  The register allocator assumes that these registers
//                      can be used without saving upon entry to the method,
//                      but that they must be saved at call sites.
//
// SOE = Save-On-Entry: The register allocator assumes that these registers
//                      must be saved before using them upon entry to the
//                      method, but they do not need to be saved at call
//                      sites.
//
// AS  = Always-Save:   The register allocator assumes that these registers
//                      must be saved before using them upon entry to the
//                      method, & that they must be saved at call sites.
//
// Ideal Register Type is used to determine how to save & restore a
// register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
// spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
//
// The encoding number is the actual bit-pattern placed into the opcodes.

// We must define the 64 bit int registers in two 32 bit halves, the
// real lower register and a virtual upper half register. upper halves
// are used by the register allocator but are not actually supplied as
// operands to memory ops.
//
// follow the C1 compiler in making registers
//
//   x7, x9-x17, x27-x31 volatile (caller save)
//   x0-x4, x8, x23 system (no save, no allocate)
//   x5-x6 non-allocatable (so we can use them as temporary regs)

//
// as regards Java usage. we don't use any callee save registers
// because this makes it difficult to de-optimise a frame (see comment
// in x86 implementation of Deoptimization::unwind_callee_save_values)
//

// General Registers

reg_def R0      ( NS,  NS,  Op_RegI, 0,  x0->as_VMReg()         ); // zr
reg_def R0_H    ( NS,  NS,  Op_RegI, 0,  x0->as_VMReg()->next() );
reg_def R1      ( NS,  SOC, Op_RegI, 1,  x1->as_VMReg()         ); // ra
reg_def R1_H    ( NS,  SOC, Op_RegI, 1,  x1->as_VMReg()->next() );
reg_def R2      ( NS,  SOE, Op_RegI, 2,  x2->as_VMReg()         ); // sp
reg_def R2_H    ( NS,  SOE, Op_RegI, 2,  x2->as_VMReg()->next() );
reg_def R3      ( NS,  NS,  Op_RegI, 3,  x3->as_VMReg()         ); // gp
reg_def R3_H    ( NS,  NS,  Op_RegI, 3,  x3->as_VMReg()->next() );
reg_def R4      ( NS,  NS,  Op_RegI, 4,  x4->as_VMReg()         ); // tp
reg_def R4_H    ( NS,  NS,  Op_RegI, 4,  x4->as_VMReg()->next() );
reg_def R7      ( SOC, SOC, Op_RegI, 7,  x7->as_VMReg()         );
reg_def R7_H    ( SOC, SOC, Op_RegI, 7,  x7->as_VMReg()->next() );
reg_def R8      ( NS,  SOE, Op_RegI, 8,  x8->as_VMReg()         ); // fp
reg_def R8_H    ( NS,  SOE, Op_RegI, 8,  x8->as_VMReg()->next() );
reg_def R9      ( SOC, SOE, Op_RegI, 9,  x9->as_VMReg()         );
reg_def R9_H    ( SOC, SOE, Op_RegI, 9,  x9->as_VMReg()->next() );
reg_def R10     ( SOC, SOC, Op_RegI, 10, x10->as_VMReg()        );
reg_def R10_H   ( SOC, SOC, Op_RegI, 10, x10->as_VMReg()->next());
reg_def R11     ( SOC, SOC, Op_RegI, 11, x11->as_VMReg()        );
reg_def R11_H   ( SOC, SOC, Op_RegI, 11, x11->as_VMReg()->next());
reg_def R12     ( SOC, SOC, Op_RegI, 12, x12->as_VMReg()        );
reg_def R12_H   ( SOC, SOC, Op_RegI, 12, x12->as_VMReg()->next());
reg_def R13     ( SOC, SOC, Op_RegI, 13, x13->as_VMReg()        );
reg_def R13_H   ( SOC, SOC, Op_RegI, 13, x13->as_VMReg()->next());
reg_def R14     ( SOC, SOC, Op_RegI, 14, x14->as_VMReg()        );
reg_def R14_H   ( SOC, SOC, Op_RegI, 14, x14->as_VMReg()->next());
reg_def R15     ( SOC, SOC, Op_RegI, 15, x15->as_VMReg()        );
reg_def R15_H   ( SOC, SOC, Op_RegI, 15, x15->as_VMReg()->next());
reg_def R16     ( SOC, SOC, Op_RegI, 16, x16->as_VMReg()        );
reg_def R16_H   ( SOC, SOC, Op_RegI, 16, x16->as_VMReg()->next());
reg_def R17     ( SOC, SOC, Op_RegI, 17, x17->as_VMReg()        );
reg_def R17_H   ( SOC, SOC, Op_RegI, 17, x17->as_VMReg()->next());
reg_def R18     ( SOC, SOE, Op_RegI, 18, x18->as_VMReg()        );
reg_def R18_H   ( SOC, SOE, Op_RegI, 18, x18->as_VMReg()->next());
reg_def R19     ( SOC, SOE, Op_RegI, 19, x19->as_VMReg()        );
reg_def R19_H   ( SOC, SOE, Op_RegI, 19, x19->as_VMReg()->next());
reg_def R20     ( SOC, SOE, Op_RegI, 20, x20->as_VMReg()        ); // caller esp
reg_def R20_H   ( SOC, SOE, Op_RegI, 20, x20->as_VMReg()->next());
reg_def R21     ( SOC, SOE, Op_RegI, 21, x21->as_VMReg()        );
reg_def R21_H   ( SOC, SOE, Op_RegI, 21, x21->as_VMReg()->next());
reg_def R22     ( SOC, SOE, Op_RegI, 22, x22->as_VMReg()        );
reg_def R22_H   ( SOC, SOE, Op_RegI, 22, x22->as_VMReg()->next());
reg_def R23     ( NS,  SOE, Op_RegI, 23, x23->as_VMReg()        ); // java thread
reg_def R23_H   ( NS,  SOE, Op_RegI, 23, x23->as_VMReg()->next());
reg_def R24     ( SOC, SOE, Op_RegI, 24, x24->as_VMReg()        );
reg_def R24_H   ( SOC, SOE, Op_RegI, 24, x24->as_VMReg()->next());
reg_def R25     ( SOC, SOE, Op_RegI, 25, x25->as_VMReg()        );
reg_def R25_H   ( SOC, SOE, Op_RegI, 25, x25->as_VMReg()->next());
reg_def R26     ( SOC, SOE, Op_RegI, 26, x26->as_VMReg()        );
reg_def R26_H   ( SOC, SOE, Op_RegI, 26, x26->as_VMReg()->next());
reg_def R27     ( SOC, SOE, Op_RegI, 27, x27->as_VMReg()        ); // heapbase
reg_def R27_H   ( SOC, SOE, Op_RegI, 27, x27->as_VMReg()->next());
reg_def R28     ( SOC, SOC, Op_RegI, 28, x28->as_VMReg()        );
reg_def R28_H   ( SOC, SOC, Op_RegI, 28, x28->as_VMReg()->next());
reg_def R29     ( SOC, SOC, Op_RegI, 29, x29->as_VMReg()        );
reg_def R29_H   ( SOC, SOC, Op_RegI, 29, x29->as_VMReg()->next());
reg_def R30     ( SOC, SOC, Op_RegI, 30, x30->as_VMReg()        );
reg_def R30_H   ( SOC, SOC, Op_RegI, 30, x30->as_VMReg()->next());
reg_def R31     ( SOC, SOC, Op_RegI, 31, x31->as_VMReg()        );
reg_def R31_H   ( SOC, SOC, Op_RegI, 31, x31->as_VMReg()->next());

// ----------------------------
// Float/Double Registers
// ----------------------------

// Double Registers

// The rules of ADL require that double registers be defined in pairs.
// Each pair must be two 32-bit values, but not necessarily a pair of
// single float registers. In each pair, ADLC-assigned register numbers
// must be adjacent, with the lower number even. Finally, when the
// CPU stores such a register pair to memory, the word associated with
// the lower ADLC-assigned number must be stored to the lower address.

// RISCV has 32 floating-point registers. Each can store a single
// or double precision floating-point value.

// for Java use float registers f0-f31 are always save on call whereas
// the platform ABI treats f8-f9 and f18-f27 as callee save). Other
// float registers are SOC as per the platform spec

reg_def F0    ( SOC, SOC, Op_RegF,  0,  f0->as_VMReg()          );
reg_def F0_H  ( SOC, SOC, Op_RegF,  0,  f0->as_VMReg()->next()  );
reg_def F1    ( SOC, SOC, Op_RegF,  1,  f1->as_VMReg()          );
reg_def F1_H  ( SOC, SOC, Op_RegF,  1,  f1->as_VMReg()->next()  );
reg_def F2    ( SOC, SOC, Op_RegF,  2,  f2->as_VMReg()          );
reg_def F2_H  ( SOC, SOC, Op_RegF,  2,  f2->as_VMReg()->next()  );
reg_def F3    ( SOC, SOC, Op_RegF,  3,  f3->as_VMReg()          );
reg_def F3_H  ( SOC, SOC, Op_RegF,  3,  f3->as_VMReg()->next()  );
reg_def F4    ( SOC, SOC, Op_RegF,  4,  f4->as_VMReg()          );
reg_def F4_H  ( SOC, SOC, Op_RegF,  4,  f4->as_VMReg()->next()  );
reg_def F5    ( SOC, SOC, Op_RegF,  5,  f5->as_VMReg()          );
reg_def F5_H  ( SOC, SOC, Op_RegF,  5,  f5->as_VMReg()->next()  );
reg_def F6    ( SOC, SOC, Op_RegF,  6,  f6->as_VMReg()          );
reg_def F6_H  ( SOC, SOC, Op_RegF,  6,  f6->as_VMReg()->next()  );
reg_def F7    ( SOC, SOC, Op_RegF,  7,  f7->as_VMReg()          );
reg_def F7_H  ( SOC, SOC, Op_RegF,  7,  f7->as_VMReg()->next()  );
reg_def F8    ( SOC, SOE, Op_RegF,  8,  f8->as_VMReg()          );
reg_def F8_H  ( SOC, SOE, Op_RegF,  8,  f8->as_VMReg()->next()  );
reg_def F9    ( SOC, SOE, Op_RegF,  9,  f9->as_VMReg()          );
reg_def F9_H  ( SOC, SOE, Op_RegF,  9,  f9->as_VMReg()->next()  );
reg_def F10   ( SOC, SOC, Op_RegF,  10, f10->as_VMReg()         );
reg_def F10_H ( SOC, SOC, Op_RegF,  10, f10->as_VMReg()->next() );
reg_def F11   ( SOC, SOC, Op_RegF,  11, f11->as_VMReg()         );
reg_def F11_H ( SOC, SOC, Op_RegF,  11, f11->as_VMReg()->next() );
reg_def F12   ( SOC, SOC, Op_RegF,  12, f12->as_VMReg()         );
reg_def F12_H ( SOC, SOC, Op_RegF,  12, f12->as_VMReg()->next() );
reg_def F13   ( SOC, SOC, Op_RegF,  13, f13->as_VMReg()         );
reg_def F13_H ( SOC, SOC, Op_RegF,  13, f13->as_VMReg()->next() );
reg_def F14   ( SOC, SOC, Op_RegF,  14, f14->as_VMReg()         );
reg_def F14_H ( SOC, SOC, Op_RegF,  14, f14->as_VMReg()->next() );
reg_def F15   ( SOC, SOC, Op_RegF,  15, f15->as_VMReg()         );
reg_def F15_H ( SOC, SOC, Op_RegF,  15, f15->as_VMReg()->next() );
reg_def F16   ( SOC, SOC, Op_RegF,  16, f16->as_VMReg()         );
reg_def F16_H ( SOC, SOC, Op_RegF,  16, f16->as_VMReg()->next() );
reg_def F17   ( SOC, SOC, Op_RegF,  17, f17->as_VMReg()         );
reg_def F17_H ( SOC, SOC, Op_RegF,  17, f17->as_VMReg()->next() );
reg_def F18   ( SOC, SOE, Op_RegF,  18, f18->as_VMReg()         );
reg_def F18_H ( SOC, SOE, Op_RegF,  18, f18->as_VMReg()->next() );
reg_def F19   ( SOC, SOE, Op_RegF,  19, f19->as_VMReg()         );
reg_def F19_H ( SOC, SOE, Op_RegF,  19, f19->as_VMReg()->next() );
reg_def F20   ( SOC, SOE, Op_RegF,  20, f20->as_VMReg()         );
reg_def F20_H ( SOC, SOE, Op_RegF,  20, f20->as_VMReg()->next() );
reg_def F21   ( SOC, SOE, Op_RegF,  21, f21->as_VMReg()         );
reg_def F21_H ( SOC, SOE, Op_RegF,  21, f21->as_VMReg()->next() );
reg_def F22   ( SOC, SOE, Op_RegF,  22, f22->as_VMReg()         );
reg_def F22_H ( SOC, SOE, Op_RegF,  22, f22->as_VMReg()->next() );
reg_def F23   ( SOC, SOE, Op_RegF,  23, f23->as_VMReg()         );
reg_def F23_H ( SOC, SOE, Op_RegF,  23, f23->as_VMReg()->next() );
reg_def F24   ( SOC, SOE, Op_RegF,  24, f24->as_VMReg()         );
reg_def F24_H ( SOC, SOE, Op_RegF,  24, f24->as_VMReg()->next() );
reg_def F25   ( SOC, SOE, Op_RegF,  25, f25->as_VMReg()         );
reg_def F25_H ( SOC, SOE, Op_RegF,  25, f25->as_VMReg()->next() );
reg_def F26   ( SOC, SOE, Op_RegF,  26, f26->as_VMReg()         );
reg_def F26_H ( SOC, SOE, Op_RegF,  26, f26->as_VMReg()->next() );
reg_def F27   ( SOC, SOE, Op_RegF,  27, f27->as_VMReg()         );
reg_def F27_H ( SOC, SOE, Op_RegF,  27, f27->as_VMReg()->next() );
reg_def F28   ( SOC, SOC, Op_RegF,  28, f28->as_VMReg()         );
reg_def F28_H ( SOC, SOC, Op_RegF,  28, f28->as_VMReg()->next() );
reg_def F29   ( SOC, SOC, Op_RegF,  29, f29->as_VMReg()         );
reg_def F29_H ( SOC, SOC, Op_RegF,  29, f29->as_VMReg()->next() );
reg_def F30   ( SOC, SOC, Op_RegF,  30, f30->as_VMReg()         );
reg_def F30_H ( SOC, SOC, Op_RegF,  30, f30->as_VMReg()->next() );
reg_def F31   ( SOC, SOC, Op_RegF,  31, f31->as_VMReg()         );
reg_def F31_H ( SOC, SOC, Op_RegF,  31, f31->as_VMReg()->next() );

// ----------------------------
// Vector Registers
// ----------------------------

// For RVV vector registers, we simply extend vector register size to 4
// 'logical' slots. This is nominally 128 bits but it actually covers
// all possible 'physical' RVV vector register lengths from 128 ~ 1024
// bits. The 'physical' RVV vector register length is detected during
// startup, so the register allocator is able to identify the correct
// number of bytes needed for an RVV spill/unspill.

reg_def V0    ( SOC, SOC, Op_VecA, 0,  v0->as_VMReg()           );
reg_def V0_H  ( SOC, SOC, Op_VecA, 0,  v0->as_VMReg()->next()   );
reg_def V0_J  ( SOC, SOC, Op_VecA, 0,  v0->as_VMReg()->next(2)  );
reg_def V0_K  ( SOC, SOC, Op_VecA, 0,  v0->as_VMReg()->next(3)  );

reg_def V1    ( SOC, SOC, Op_VecA, 1,  v1->as_VMReg()          );
reg_def V1_H  ( SOC, SOC, Op_VecA, 1,  v1->as_VMReg()->next()   );
reg_def V1_J  ( SOC, SOC, Op_VecA, 1,  v1->as_VMReg()->next(2)  );
reg_def V1_K  ( SOC, SOC, Op_VecA, 1,  v1->as_VMReg()->next(3)  );

reg_def V2    ( SOC, SOC, Op_VecA, 2,  v2->as_VMReg()           );
reg_def V2_H  ( SOC, SOC, Op_VecA, 2,  v2->as_VMReg()->next()   );
reg_def V2_J  ( SOC, SOC, Op_VecA, 2,  v2->as_VMReg()->next(2)  );
reg_def V2_K  ( SOC, SOC, Op_VecA, 2,  v2->as_VMReg()->next(3)  );

reg_def V3    ( SOC, SOC, Op_VecA, 3,  v3->as_VMReg()           );
reg_def V3_H  ( SOC, SOC, Op_VecA, 3,  v3->as_VMReg()->next()   );
reg_def V3_J  ( SOC, SOC, Op_VecA, 3,  v3->as_VMReg()->next(2)  );
reg_def V3_K  ( SOC, SOC, Op_VecA, 3,  v3->as_VMReg()->next(3)  );

reg_def V4    ( SOC, SOC, Op_VecA, 4,  v4->as_VMReg()           );
reg_def V4_H  ( SOC, SOC, Op_VecA, 4,  v4->as_VMReg()->next()   );
reg_def V4_J  ( SOC, SOC, Op_VecA, 4,  v4->as_VMReg()->next(2)  );
reg_def V4_K  ( SOC, SOC, Op_VecA, 4,  v4->as_VMReg()->next(3)  );

reg_def V5    ( SOC, SOC, Op_VecA, 5,  v5->as_VMReg()          );
reg_def V5_H  ( SOC, SOC, Op_VecA, 5,  v5->as_VMReg()->next()   );
reg_def V5_J  ( SOC, SOC, Op_VecA, 5,  v5->as_VMReg()->next(2)  );
reg_def V5_K  ( SOC, SOC, Op_VecA, 5,  v5->as_VMReg()->next(3)  );

reg_def V6    ( SOC, SOC, Op_VecA, 6,  v6->as_VMReg()           );
reg_def V6_H  ( SOC, SOC, Op_VecA, 6,  v6->as_VMReg()->next()   );
reg_def V6_J  ( SOC, SOC, Op_VecA, 6,  v6->as_VMReg()->next(2)  );
reg_def V6_K  ( SOC, SOC, Op_VecA, 6,  v6->as_VMReg()->next(3)  );

reg_def V7    ( SOC, SOC, Op_VecA, 7,  v7->as_VMReg()          );
reg_def V7_H  ( SOC, SOC, Op_VecA, 7,  v7->as_VMReg()->next()   );
reg_def V7_J  ( SOC, SOC, Op_VecA, 7,  v7->as_VMReg()->next(2)  );
reg_def V7_K  ( SOC, SOC, Op_VecA, 7,  v7->as_VMReg()->next(3)  );

reg_def V8    ( SOC, SOC, Op_VecA, 8,  v8->as_VMReg()           );
reg_def V8_H  ( SOC, SOC, Op_VecA, 8,  v8->as_VMReg()->next()   );
reg_def V8_J  ( SOC, SOC, Op_VecA, 8,  v8->as_VMReg()->next(2)  );
reg_def V8_K  ( SOC, SOC, Op_VecA, 8,  v8->as_VMReg()->next(3)  );

reg_def V9    ( SOC, SOC, Op_VecA, 9,  v9->as_VMReg()           );
reg_def V9_H  ( SOC, SOC, Op_VecA, 9,  v9->as_VMReg()->next()   );
reg_def V9_J  ( SOC, SOC, Op_VecA, 9,  v9->as_VMReg()->next(2)  );
reg_def V9_K  ( SOC, SOC, Op_VecA, 9,  v9->as_VMReg()->next(3)  );

reg_def V10   ( SOC, SOC, Op_VecA, 10, v10->as_VMReg()          );
reg_def V10_H ( SOC, SOC, Op_VecA, 10, v10->as_VMReg()->next()  );
reg_def V10_J ( SOC, SOC, Op_VecA, 10, v10->as_VMReg()->next(2) );
reg_def V10_K ( SOC, SOC, Op_VecA, 10, v10->as_VMReg()->next(3) );

reg_def V11   ( SOC, SOC, Op_VecA, 11, v11->as_VMReg()          );
reg_def V11_H ( SOC, SOC, Op_VecA, 11, v11->as_VMReg()->next()  );
reg_def V11_J ( SOC, SOC, Op_VecA, 11, v11->as_VMReg()->next(2) );
reg_def V11_K ( SOC, SOC, Op_VecA, 11, v11->as_VMReg()->next(3) );

reg_def V12   ( SOC, SOC, Op_VecA, 12, v12->as_VMReg()          );
reg_def V12_H ( SOC, SOC, Op_VecA, 12, v12->as_VMReg()->next()  );
reg_def V12_J ( SOC, SOC, Op_VecA, 12, v12->as_VMReg()->next(2) );
reg_def V12_K ( SOC, SOC, Op_VecA, 12, v12->as_VMReg()->next(3) );

reg_def V13   ( SOC, SOC, Op_VecA, 13, v13->as_VMReg()          );
reg_def V13_H ( SOC, SOC, Op_VecA, 13, v13->as_VMReg()->next()  );
reg_def V13_J ( SOC, SOC, Op_VecA, 13, v13->as_VMReg()->next(2) );
reg_def V13_K ( SOC, SOC, Op_VecA, 13, v13->as_VMReg()->next(3) );

reg_def V14   ( SOC, SOC, Op_VecA, 14, v14->as_VMReg()          );
reg_def V14_H ( SOC, SOC, Op_VecA, 14, v14->as_VMReg()->next()  );
reg_def V14_J ( SOC, SOC, Op_VecA, 14, v14->as_VMReg()->next(2) );
reg_def V14_K ( SOC, SOC, Op_VecA, 14, v14->as_VMReg()->next(3) );

reg_def V15   ( SOC, SOC, Op_VecA, 15, v15->as_VMReg()          );
reg_def V15_H ( SOC, SOC, Op_VecA, 15, v15->as_VMReg()->next()  );
reg_def V15_J ( SOC, SOC, Op_VecA, 15, v15->as_VMReg()->next(2) );
reg_def V15_K ( SOC, SOC, Op_VecA, 15, v15->as_VMReg()->next(3) );

reg_def V16   ( SOC, SOC, Op_VecA, 16, v16->as_VMReg()          );
reg_def V16_H ( SOC, SOC, Op_VecA, 16, v16->as_VMReg()->next()  );
reg_def V16_J ( SOC, SOC, Op_VecA, 16, v16->as_VMReg()->next(2) );
reg_def V16_K ( SOC, SOC, Op_VecA, 16, v16->as_VMReg()->next(3) );

reg_def V17   ( SOC, SOC, Op_VecA, 17, v17->as_VMReg()          );
reg_def V17_H ( SOC, SOC, Op_VecA, 17, v17->as_VMReg()->next()  );
reg_def V17_J ( SOC, SOC, Op_VecA, 17, v17->as_VMReg()->next(2) );
reg_def V17_K ( SOC, SOC, Op_VecA, 17, v17->as_VMReg()->next(3) );

reg_def V18   ( SOC, SOC, Op_VecA, 18, v18->as_VMReg()          );
reg_def V18_H ( SOC, SOC, Op_VecA, 18, v18->as_VMReg()->next()  );
reg_def V18_J ( SOC, SOC, Op_VecA, 18, v18->as_VMReg()->next(2) );
reg_def V18_K ( SOC, SOC, Op_VecA, 18, v18->as_VMReg()->next(3) );

reg_def V19   ( SOC, SOC, Op_VecA, 19, v19->as_VMReg()          );
reg_def V19_H ( SOC, SOC, Op_VecA, 19, v19->as_VMReg()->next()  );
reg_def V19_J ( SOC, SOC, Op_VecA, 19, v19->as_VMReg()->next(2) );
reg_def V19_K ( SOC, SOC, Op_VecA, 19, v19->as_VMReg()->next(3) );

reg_def V20   ( SOC, SOC, Op_VecA, 20, v20->as_VMReg()          );
reg_def V20_H ( SOC, SOC, Op_VecA, 20, v20->as_VMReg()->next()  );
reg_def V20_J ( SOC, SOC, Op_VecA, 20, v20->as_VMReg()->next(2) );
reg_def V20_K ( SOC, SOC, Op_VecA, 20, v20->as_VMReg()->next(3) );

reg_def V21   ( SOC, SOC, Op_VecA, 21, v21->as_VMReg()          );
reg_def V21_H ( SOC, SOC, Op_VecA, 21, v21->as_VMReg()->next()  );
reg_def V21_J ( SOC, SOC, Op_VecA, 21, v21->as_VMReg()->next(2) );
reg_def V21_K ( SOC, SOC, Op_VecA, 21, v21->as_VMReg()->next(3) );

reg_def V22   ( SOC, SOC, Op_VecA, 22, v22->as_VMReg()          );
reg_def V22_H ( SOC, SOC, Op_VecA, 22, v22->as_VMReg()->next()  );
reg_def V22_J ( SOC, SOC, Op_VecA, 22, v22->as_VMReg()->next(2) );
reg_def V22_K ( SOC, SOC, Op_VecA, 22, v22->as_VMReg()->next(3) );

reg_def V23   ( SOC, SOC, Op_VecA, 23, v23->as_VMReg()          );
reg_def V23_H ( SOC, SOC, Op_VecA, 23, v23->as_VMReg()->next()  );
reg_def V23_J ( SOC, SOC, Op_VecA, 23, v23->as_VMReg()->next(2) );
reg_def V23_K ( SOC, SOC, Op_VecA, 23, v23->as_VMReg()->next(3) );

reg_def V24   ( SOC, SOC, Op_VecA, 24, v24->as_VMReg()          );
reg_def V24_H ( SOC, SOC, Op_VecA, 24, v24->as_VMReg()->next()  );
reg_def V24_J ( SOC, SOC, Op_VecA, 24, v24->as_VMReg()->next(2) );
reg_def V24_K ( SOC, SOC, Op_VecA, 24, v24->as_VMReg()->next(3) );

reg_def V25   ( SOC, SOC, Op_VecA, 25, v25->as_VMReg()          );
reg_def V25_H ( SOC, SOC, Op_VecA, 25, v25->as_VMReg()->next()  );
reg_def V25_J ( SOC, SOC, Op_VecA, 25, v25->as_VMReg()->next(2) );
reg_def V25_K ( SOC, SOC, Op_VecA, 25, v25->as_VMReg()->next(3) );

reg_def V26   ( SOC, SOC, Op_VecA, 26, v26->as_VMReg()          );
reg_def V26_H ( SOC, SOC, Op_VecA, 26, v26->as_VMReg()->next()  );
reg_def V26_J ( SOC, SOC, Op_VecA, 26, v26->as_VMReg()->next(2) );
reg_def V26_K ( SOC, SOC, Op_VecA, 26, v26->as_VMReg()->next(3) );

reg_def V27   ( SOC, SOC, Op_VecA, 27, v27->as_VMReg()          );
reg_def V27_H ( SOC, SOC, Op_VecA, 27, v27->as_VMReg()->next()  );
reg_def V27_J ( SOC, SOC, Op_VecA, 27, v27->as_VMReg()->next(2) );
reg_def V27_K ( SOC, SOC, Op_VecA, 27, v27->as_VMReg()->next(3) );

reg_def V28   ( SOC, SOC, Op_VecA, 28, v28->as_VMReg()          );
reg_def V28_H ( SOC, SOC, Op_VecA, 28, v28->as_VMReg()->next()  );
reg_def V28_J ( SOC, SOC, Op_VecA, 28, v28->as_VMReg()->next(2) );
reg_def V28_K ( SOC, SOC, Op_VecA, 28, v28->as_VMReg()->next(3) );

reg_def V29   ( SOC, SOC, Op_VecA, 29, v29->as_VMReg()          );
reg_def V29_H ( SOC, SOC, Op_VecA, 29, v29->as_VMReg()->next()  );
reg_def V29_J ( SOC, SOC, Op_VecA, 29, v29->as_VMReg()->next(2) );
reg_def V29_K ( SOC, SOC, Op_VecA, 29, v29->as_VMReg()->next(3) );

reg_def V30   ( SOC, SOC, Op_VecA, 30, v30->as_VMReg()          );
reg_def V30_H ( SOC, SOC, Op_VecA, 30, v30->as_VMReg()->next()  );
reg_def V30_J ( SOC, SOC, Op_VecA, 30, v30->as_VMReg()->next(2) );
reg_def V30_K ( SOC, SOC, Op_VecA, 30, v30->as_VMReg()->next(3) );

reg_def V31   ( SOC, SOC, Op_VecA, 31, v31->as_VMReg()          );
reg_def V31_H ( SOC, SOC, Op_VecA, 31, v31->as_VMReg()->next()  );
reg_def V31_J ( SOC, SOC, Op_VecA, 31, v31->as_VMReg()->next(2) );
reg_def V31_K ( SOC, SOC, Op_VecA, 31, v31->as_VMReg()->next(3) );

// ----------------------------
// Special Registers
// ----------------------------

// On riscv, the physical flag register is missing, so we use t1 instead,
// to bridge the RegFlag semantics in share/opto

reg_def RFLAGS   (SOC, SOC, Op_RegFlags, 6, x6->as_VMReg()        );

// Specify priority of register selection within phases of register
// allocation.  Highest priority is first.  A useful heuristic is to
// give registers a low priority when they are required by machine
// instructions, like EAX and EDX on I486, and choose no-save registers
// before save-on-call, & save-on-call before save-on-entry.  Registers
// which participate in fixed calling sequences should come last.
// Registers which are used as pairs must fall on an even boundary.

alloc_class chunk0(
    // volatiles
    R7,  R7_H,
    R28, R28_H,
    R29, R29_H,
    R30, R30_H,
    R31, R31_H,

    // arg registers
    R10, R10_H,
    R11, R11_H,
    R12, R12_H,
    R13, R13_H,
    R14, R14_H,
    R15, R15_H,
    R16, R16_H,
    R17, R17_H,

    // non-volatiles
    R9,  R9_H,
    R18, R18_H,
    R19, R19_H,
    R20, R20_H,
    R21, R21_H,
    R22, R22_H,
    R24, R24_H,
    R25, R25_H,
    R26, R26_H,

    // non-allocatable registers
    R23, R23_H, // java thread
    R27, R27_H, // heapbase
    R4,  R4_H,  // thread
    R8,  R8_H,  // fp
    R0,  R0_H,  // zero
    R1,  R1_H,  // ra
    R2,  R2_H,  // sp
    R3,  R3_H,  // gp
);

alloc_class chunk1(

    // no save
    F0,  F0_H,
    F1,  F1_H,
    F2,  F2_H,
    F3,  F3_H,
    F4,  F4_H,
    F5,  F5_H,
    F6,  F6_H,
    F7,  F7_H,
    F28, F28_H,
    F29, F29_H,
    F30, F30_H,
    F31, F31_H,

    // arg registers
    F10, F10_H,
    F11, F11_H,
    F12, F12_H,
    F13, F13_H,
    F14, F14_H,
    F15, F15_H,
    F16, F16_H,
    F17, F17_H,

    // non-volatiles
    F8,  F8_H,
    F9,  F9_H,
    F18, F18_H,
    F19, F19_H,
    F20, F20_H,
    F21, F21_H,
    F22, F22_H,
    F23, F23_H,
    F24, F24_H,
    F25, F25_H,
    F26, F26_H,
    F27, F27_H,
);

alloc_class chunk2(
    V0, V0_H, V0_J, V0_K,
    V1, V1_H, V1_J, V1_K,
    V2, V2_H, V2_J, V2_K,
    V3, V3_H, V3_J, V3_K,
    V4, V4_H, V4_J, V4_K,
    V5, V5_H, V5_J, V5_K,
    V6, V6_H, V6_J, V6_K,
    V7, V7_H, V7_J, V7_K,
    V8, V8_H, V8_J, V8_K,
    V9, V9_H, V9_J, V9_K,
    V10, V10_H, V10_J, V10_K,
    V11, V11_H, V11_J, V11_K,
    V12, V12_H, V12_J, V12_K,
    V13, V13_H, V13_J, V13_K,
    V14, V14_H, V14_J, V14_K,
    V15, V15_H, V15_J, V15_K,
    V16, V16_H, V16_J, V16_K,
    V17, V17_H, V17_J, V17_K,
    V18, V18_H, V18_J, V18_K,
    V19, V19_H, V19_J, V19_K,
    V20, V20_H, V20_J, V20_K,
    V21, V21_H, V21_J, V21_K,
    V22, V22_H, V22_J, V22_K,
    V23, V23_H, V23_J, V23_K,
    V24, V24_H, V24_J, V24_K,
    V25, V25_H, V25_J, V25_K,
    V26, V26_H, V26_J, V26_K,
    V27, V27_H, V27_J, V27_K,
    V28, V28_H, V28_J, V28_K,
    V29, V29_H, V29_J, V29_K,
    V30, V30_H, V30_J, V30_K,
    V31, V31_H, V31_J, V31_K,
);

alloc_class chunk3(RFLAGS);

//----------Architecture Description Register Classes--------------------------
// Several register classes are automatically defined based upon information in
// this architecture description.
// 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
// 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
//

// Class for all 32 bit general purpose registers
reg_class all_reg32(
    R0,
    R1,
    R2,
    R3,
    R4,
    R7,
    R8,
    R9,
    R10,
    R11,
    R12,
    R13,
    R14,
    R15,
    R16,
    R17,
    R18,
    R19,
    R20,
    R21,
    R22,
    R23,
    R24,
    R25,
    R26,
    R27,
    R28,
    R29,
    R30,
    R31
);

// Class for any 32 bit integer registers (excluding zr)
reg_class any_reg32 %{
  return _ANY_REG32_mask;
%}

// Singleton class for R10 int register
reg_class int_r10_reg(R10);

// Singleton class for R12 int register
reg_class int_r12_reg(R12);

// Singleton class for R13 int register
reg_class int_r13_reg(R13);

// Singleton class for R14 int register
reg_class int_r14_reg(R14);

// Class for all long integer registers
reg_class all_reg(
    R0,  R0_H,
    R1,  R1_H,
    R2,  R2_H,
    R3,  R3_H,
    R4,  R4_H,
    R7,  R7_H,
    R8,  R8_H,
    R9,  R9_H,
    R10, R10_H,
    R11, R11_H,
    R12, R12_H,
    R13, R13_H,
    R14, R14_H,
    R15, R15_H,
    R16, R16_H,
    R17, R17_H,
    R18, R18_H,
    R19, R19_H,
    R20, R20_H,
    R21, R21_H,
    R22, R22_H,
    R23, R23_H,
    R24, R24_H,
    R25, R25_H,
    R26, R26_H,
    R27, R27_H,
    R28, R28_H,
    R29, R29_H,
    R30, R30_H,
    R31, R31_H
);

// Class for all long integer registers (excluding zr)
reg_class any_reg %{
  return _ANY_REG_mask;
%}

// Class for non-allocatable 32 bit registers
reg_class non_allocatable_reg32(
    R0,                       // zr
    R1,                       // ra
    R2,                       // sp
    R3,                       // gp
    R4,                       // tp
    R23                       // java thread
);

// Class for non-allocatable 64 bit registers
reg_class non_allocatable_reg(
    R0,  R0_H,                // zr
    R1,  R1_H,                // ra
    R2,  R2_H,                // sp
    R3,  R3_H,                // gp
    R4,  R4_H,                // tp
    R23, R23_H                // java thread
);

reg_class no_special_reg32 %{
  return _NO_SPECIAL_REG32_mask;
%}

reg_class no_special_reg %{
  return _NO_SPECIAL_REG_mask;
%}

reg_class ptr_reg %{
  return _PTR_REG_mask;
%}

reg_class no_special_ptr_reg %{
  return _NO_SPECIAL_PTR_REG_mask;
%}

// Class for 64 bit register r10
reg_class r10_reg(
    R10, R10_H
);

// Class for 64 bit register r11
reg_class r11_reg(
    R11, R11_H
);

// Class for 64 bit register r12
reg_class r12_reg(
    R12, R12_H
);

// Class for 64 bit register r13
reg_class r13_reg(
    R13, R13_H
);

// Class for 64 bit register r14
reg_class r14_reg(
    R14, R14_H
);

// Class for 64 bit register r15
reg_class r15_reg(
    R15, R15_H
);

// Class for 64 bit register r16
reg_class r16_reg(
    R16, R16_H
);

// Class for method register
reg_class method_reg(
    R31, R31_H
);

// Class for heapbase register
reg_class heapbase_reg(
    R27, R27_H
);

// Class for java thread register
reg_class java_thread_reg(
    R23, R23_H
);

reg_class r28_reg(
    R28, R28_H
);

reg_class r29_reg(
    R29, R29_H
);

reg_class r30_reg(
    R30, R30_H
);

reg_class r31_reg(
    R31, R31_H
);

// Class for zero registesr
reg_class zr_reg(
    R0, R0_H
);

// Class for thread register
reg_class thread_reg(
    R4, R4_H
);

// Class for frame pointer register
reg_class fp_reg(
    R8, R8_H
);

// Class for link register
reg_class ra_reg(
    R1, R1_H
);

// Class for long sp register
reg_class sp_reg(
    R2, R2_H
);

// Class for all float registers
reg_class float_reg(
    F0,
    F1,
    F2,
    F3,
    F4,
    F5,
    F6,
    F7,
    F8,
    F9,
    F10,
    F11,
    F12,
    F13,
    F14,
    F15,
    F16,
    F17,
    F18,
    F19,
    F20,
    F21,
    F22,
    F23,
    F24,
    F25,
    F26,
    F27,
    F28,
    F29,
    F30,
    F31
);

// Double precision float registers have virtual `high halves' that
// are needed by the allocator.
// Class for all double registers
reg_class double_reg(
    F0,  F0_H,
    F1,  F1_H,
    F2,  F2_H,
    F3,  F3_H,
    F4,  F4_H,
    F5,  F5_H,
    F6,  F6_H,
    F7,  F7_H,
    F8,  F8_H,
    F9,  F9_H,
    F10, F10_H,
    F11, F11_H,
    F12, F12_H,
    F13, F13_H,
    F14, F14_H,
    F15, F15_H,
    F16, F16_H,
    F17, F17_H,
    F18, F18_H,
    F19, F19_H,
    F20, F20_H,
    F21, F21_H,
    F22, F22_H,
    F23, F23_H,
    F24, F24_H,
    F25, F25_H,
    F26, F26_H,
    F27, F27_H,
    F28, F28_H,
    F29, F29_H,
    F30, F30_H,
    F31, F31_H
);

// Class for all RVV vector registers
reg_class vectora_reg(
    V1, V1_H, V1_J, V1_K,
    V2, V2_H, V2_J, V2_K,
    V3, V3_H, V3_J, V3_K,
    V4, V4_H, V4_J, V4_K,
    V5, V5_H, V5_J, V5_K,
    V6, V6_H, V6_J, V6_K,
    V7, V7_H, V7_J, V7_K,
    V8, V8_H, V8_J, V8_K,
    V9, V9_H, V9_J, V9_K,
    V10, V10_H, V10_J, V10_K,
    V11, V11_H, V11_J, V11_K,
    V12, V12_H, V12_J, V12_K,
    V13, V13_H, V13_J, V13_K,
    V14, V14_H, V14_J, V14_K,
    V15, V15_H, V15_J, V15_K,
    V16, V16_H, V16_J, V16_K,
    V17, V17_H, V17_J, V17_K,
    V18, V18_H, V18_J, V18_K,
    V19, V19_H, V19_J, V19_K,
    V20, V20_H, V20_J, V20_K,
    V21, V21_H, V21_J, V21_K,
    V22, V22_H, V22_J, V22_K,
    V23, V23_H, V23_J, V23_K,
    V24, V24_H, V24_J, V24_K,
    V25, V25_H, V25_J, V25_K,
    V26, V26_H, V26_J, V26_K,
    V27, V27_H, V27_J, V27_K,
    V28, V28_H, V28_J, V28_K,
    V29, V29_H, V29_J, V29_K,
    V30, V30_H, V30_J, V30_K,
    V31, V31_H, V31_J, V31_K
);

// Class for 64 bit register f0
reg_class f0_reg(
    F0, F0_H
);

// Class for 64 bit register f1
reg_class f1_reg(
    F1, F1_H
);

// Class for 64 bit register f2
reg_class f2_reg(
    F2, F2_H
);

// Class for 64 bit register f3
reg_class f3_reg(
    F3, F3_H
);

// class for vector register v1
reg_class v1_reg(
    V1, V1_H, V1_J, V1_K
);

// class for vector register v2
reg_class v2_reg(
    V2, V2_H, V2_J, V2_K
);

// class for vector register v3
reg_class v3_reg(
    V3, V3_H, V3_J, V3_K
);

// class for vector register v4
reg_class v4_reg(
    V4, V4_H, V4_J, V4_K
);

// class for vector register v5
reg_class v5_reg(
    V5, V5_H, V5_J, V5_K
);

// class for condition codes
reg_class reg_flags(RFLAGS);
%}

//----------DEFINITION BLOCK---------------------------------------------------
// Define name --> value mappings to inform the ADLC of an integer valued name
// Current support includes integer values in the range [0, 0x7FFFFFFF]
// Format:
//        int_def  <name>         ( <int_value>, <expression>);
// Generated Code in ad_<arch>.hpp
//        #define  <name>   (<expression>)
//        // value == <int_value>
// Generated code in ad_<arch>.cpp adlc_verification()
//        assert( <name> == <int_value>, "Expect (<expression>) to equal <int_value>");
//

// we follow the ppc-aix port in using a simple cost model which ranks
// register operations as cheap, memory ops as more expensive and
// branches as most expensive. the first two have a low as well as a
// normal cost. huge cost appears to be a way of saying don't do
// something

definitions %{
  // The default cost (of a register move instruction).
  int_def DEFAULT_COST         (  100,               100);
  int_def ALU_COST             (  100,  1 * DEFAULT_COST);          // unknown, const, arith, shift, slt,
                                                                    // multi, auipc, nop, logical, move
  int_def LOAD_COST            (  300,  3 * DEFAULT_COST);          // load, fpload
  int_def STORE_COST           (  100,  1 * DEFAULT_COST);          // store, fpstore
  int_def XFER_COST            (  300,  3 * DEFAULT_COST);          // mfc, mtc, fcvt, fmove, fcmp
  int_def BRANCH_COST          (  100,  1 * DEFAULT_COST);          // branch, jmp, call
  int_def IMUL_COST            ( 1000, 10 * DEFAULT_COST);          // imul
  int_def IDIVSI_COST          ( 3400, 34 * DEFAULT_COST);          // idivdi
  int_def IDIVDI_COST          ( 6600, 66 * DEFAULT_COST);          // idivsi
  int_def FMUL_SINGLE_COST     (  500,  5 * DEFAULT_COST);          // fadd, fmul, fmadd
  int_def FMUL_DOUBLE_COST     (  700,  7 * DEFAULT_COST);          // fadd, fmul, fmadd
  int_def FDIV_COST            ( 2000, 20 * DEFAULT_COST);          // fdiv
  int_def FSQRT_COST           ( 2500, 25 * DEFAULT_COST);          // fsqrt
  int_def VOLATILE_REF_COST    ( 1000, 10 * DEFAULT_COST);
%}

//----------SOURCE BLOCK-------------------------------------------------------
// This is a block of C++ code which provides values, functions, and
// definitions necessary in the rest of the architecture description

source_hpp %{

#include "asm/macroAssembler.hpp"
#include "gc/shared/barrierSetAssembler.hpp"
#include "gc/shared/cardTable.hpp"
#include "gc/shared/cardTableBarrierSet.hpp"
#include "gc/shared/collectedHeap.hpp"
#include "opto/addnode.hpp"
#include "opto/convertnode.hpp"
#include "runtime/objectMonitor.hpp"

extern RegMask _ANY_REG32_mask;
extern RegMask _ANY_REG_mask;
extern RegMask _PTR_REG_mask;
extern RegMask _NO_SPECIAL_REG32_mask;
extern RegMask _NO_SPECIAL_REG_mask;
extern RegMask _NO_SPECIAL_PTR_REG_mask;

class CallStubImpl {

  //--------------------------------------------------------------
  //---<  Used for optimization in Compile::shorten_branches  >---
  //--------------------------------------------------------------

public:
  // Size of call trampoline stub.
  static uint size_call_trampoline() {
    return 0; // no call trampolines on this platform
  }

  // number of relocations needed by a call trampoline stub
  static uint reloc_call_trampoline() {
    return 0; // no call trampolines on this platform
  }
};

class HandlerImpl {

public:

  static int emit_exception_handler(CodeBuffer &cbuf);
  static int emit_deopt_handler(CodeBuffer& cbuf);

  static uint size_exception_handler() {
    return MacroAssembler::far_branch_size();
  }

  static uint size_deopt_handler() {
    // count auipc + far branch
    return NativeInstruction::instruction_size + MacroAssembler::far_branch_size();
  }
};

class Node::PD {
public:
  enum NodeFlags {
    _last_flag = Node::_last_flag
  };
};

bool is_CAS(int opcode, bool maybe_volatile);

// predicate controlling translation of CompareAndSwapX
bool needs_acquiring_load_reserved(const Node *load);

// predicate controlling addressing modes
bool size_fits_all_mem_uses(AddPNode* addp, int shift);
%}

source %{

// Derived RegMask with conditionally allocatable registers

RegMask _ANY_REG32_mask;
RegMask _ANY_REG_mask;
RegMask _PTR_REG_mask;
RegMask _NO_SPECIAL_REG32_mask;
RegMask _NO_SPECIAL_REG_mask;
RegMask _NO_SPECIAL_PTR_REG_mask;

void reg_mask_init() {

  _ANY_REG32_mask = _ALL_REG32_mask;
  _ANY_REG32_mask.Remove(OptoReg::as_OptoReg(x0->as_VMReg()));

  _ANY_REG_mask = _ALL_REG_mask;
  _ANY_REG_mask.SUBTRACT(_ZR_REG_mask);

  _PTR_REG_mask = _ALL_REG_mask;
  _PTR_REG_mask.SUBTRACT(_ZR_REG_mask);

  _NO_SPECIAL_REG32_mask = _ALL_REG32_mask;
  _NO_SPECIAL_REG32_mask.SUBTRACT(_NON_ALLOCATABLE_REG32_mask);

  _NO_SPECIAL_REG_mask = _ALL_REG_mask;
  _NO_SPECIAL_REG_mask.SUBTRACT(_NON_ALLOCATABLE_REG_mask);

  _NO_SPECIAL_PTR_REG_mask = _ALL_REG_mask;
  _NO_SPECIAL_PTR_REG_mask.SUBTRACT(_NON_ALLOCATABLE_REG_mask);

  // x27 is not allocatable when compressed oops is on
  if (UseCompressedOops) {
    _NO_SPECIAL_REG32_mask.Remove(OptoReg::as_OptoReg(x27->as_VMReg()));
    _NO_SPECIAL_REG_mask.SUBTRACT(_HEAPBASE_REG_mask);
    _NO_SPECIAL_PTR_REG_mask.SUBTRACT(_HEAPBASE_REG_mask);
  }

  // x8 is not allocatable when PreserveFramePointer is on
  if (PreserveFramePointer) {
    _NO_SPECIAL_REG32_mask.Remove(OptoReg::as_OptoReg(x8->as_VMReg()));
    _NO_SPECIAL_REG_mask.SUBTRACT(_FP_REG_mask);
    _NO_SPECIAL_PTR_REG_mask.SUBTRACT(_FP_REG_mask);
  }
}

void PhaseOutput::pd_perform_mach_node_analysis() {
}

int MachNode::pd_alignment_required() const {
  return 1;
}

int MachNode::compute_padding(int current_offset) const {
  return 0;
}

// is_CAS(int opcode, bool maybe_volatile)
//
// return true if opcode is one of the possible CompareAndSwapX
// values otherwise false.
bool is_CAS(int opcode, bool maybe_volatile)
{
  switch (opcode) {
    // We handle these
    case Op_CompareAndSwapI:
    case Op_CompareAndSwapL:
    case Op_CompareAndSwapP:
    case Op_CompareAndSwapN:
    case Op_ShenandoahCompareAndSwapP:
    case Op_ShenandoahCompareAndSwapN:
    case Op_CompareAndSwapB:
    case Op_CompareAndSwapS:
    case Op_GetAndSetI:
    case Op_GetAndSetL:
    case Op_GetAndSetP:
    case Op_GetAndSetN:
    case Op_GetAndAddI:
    case Op_GetAndAddL:
      return true;
    case Op_CompareAndExchangeI:
    case Op_CompareAndExchangeN:
    case Op_CompareAndExchangeB:
    case Op_CompareAndExchangeS:
    case Op_CompareAndExchangeL:
    case Op_CompareAndExchangeP:
    case Op_WeakCompareAndSwapB:
    case Op_WeakCompareAndSwapS:
    case Op_WeakCompareAndSwapI:
    case Op_WeakCompareAndSwapL:
    case Op_WeakCompareAndSwapP:
    case Op_WeakCompareAndSwapN:
    case Op_ShenandoahWeakCompareAndSwapP:
    case Op_ShenandoahWeakCompareAndSwapN:
    case Op_ShenandoahCompareAndExchangeP:
    case Op_ShenandoahCompareAndExchangeN:
      return maybe_volatile;
    default:
      return false;
  }
}

// predicate controlling translation of CAS
//
// returns true if CAS needs to use an acquiring load otherwise false
bool needs_acquiring_load_reserved(const Node *n)
{
  assert(n != NULL && is_CAS(n->Opcode(), true), "expecting a compare and swap");

  LoadStoreNode* ldst = n->as_LoadStore();
  if (n != NULL && is_CAS(n->Opcode(), false)) {
    assert(ldst != NULL && ldst->trailing_membar() != NULL, "expected trailing membar");
  } else {
    return ldst != NULL && ldst->trailing_membar() != NULL;
  }
  // so we can just return true here
  return true;
}
#define __ _masm.

// advance declarations for helper functions to convert register
// indices to register objects

// the ad file has to provide implementations of certain methods
// expected by the generic code
//
// REQUIRED FUNCTIONALITY

//=============================================================================

// !!!!! Special hack to get all types of calls to specify the byte offset
//       from the start of the call to the point where the return address
//       will point.

int MachCallStaticJavaNode::ret_addr_offset()
{
  // jal
  return 1 * NativeInstruction::instruction_size;
}

int MachCallDynamicJavaNode::ret_addr_offset()
{
  return 7 * NativeInstruction::instruction_size; // movptr, jal
}

int MachCallRuntimeNode::ret_addr_offset() {
  // for generated stubs the call will be
  //   jal(addr)
  // or with far branches
  //   jal(trampoline_stub)
  // for real runtime callouts it will be 11 instructions
  // see riscv_enc_java_to_runtime
  //   la(t1, retaddr)                ->  auipc + addi
  //   la(t0, RuntimeAddress(addr))   ->  lui + addi + slli + addi + slli + addi
  //   addi(sp, sp, -2 * wordSize)    ->  addi
  //   sd(t1, Address(sp, wordSize))  ->  sd
  //   jalr(t0)                       ->  jalr
  CodeBlob *cb = CodeCache::find_blob(_entry_point);
  if (cb != NULL) {
    return 1 * NativeInstruction::instruction_size;
  } else {
    return 11 * NativeInstruction::instruction_size;
  }
}

//
// Compute padding required for nodes which need alignment
//

// With RVC a call instruction may get 2-byte aligned.
// The address of the call instruction needs to be 4-byte aligned to
// ensure that it does not span a cache line so that it can be patched.
int CallStaticJavaDirectNode::compute_padding(int current_offset) const
{
  // to make sure the address of jal 4-byte aligned.
  return align_up(current_offset, alignment_required()) - current_offset;
}

// With RVC a call instruction may get 2-byte aligned.
// The address of the call instruction needs to be 4-byte aligned to
// ensure that it does not span a cache line so that it can be patched.
int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
{
  // skip the movptr in MacroAssembler::ic_call():
  // lui + addi + slli + addi + slli + addi
  // Though movptr() has already 4-byte aligned with or without RVC,
  // We need to prevent from further changes by explicitly calculating the size.
  const int movptr_size = 6 * NativeInstruction::instruction_size;
  current_offset += movptr_size;
  // to make sure the address of jal 4-byte aligned.
  return align_up(current_offset, alignment_required()) - current_offset;
}

//=============================================================================

#ifndef PRODUCT
void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
  assert_cond(st != NULL);
  st->print("BREAKPOINT");
}
#endif

void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  C2_MacroAssembler _masm(&cbuf);
  __ ebreak();
}

uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
  return MachNode::size(ra_);
}

//=============================================================================

#ifndef PRODUCT
  void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
    st->print("nop \t# %d bytes pad for loops and calls", _count);
  }
#endif

  void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
    C2_MacroAssembler _masm(&cbuf);
    Assembler::CompressibleRegion cr(&_masm); // nops shall be 2-byte under RVC for alignment purposes.
    for (int i = 0; i < _count; i++) {
      __ nop();
    }
  }

  uint MachNopNode::size(PhaseRegAlloc*) const {
    return _count * (UseRVC ? NativeInstruction::compressed_instruction_size : NativeInstruction::instruction_size);
  }

//=============================================================================
const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;

int ConstantTable::calculate_table_base_offset() const {
  return 0;  // absolute addressing, no offset
}

bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
  ShouldNotReachHere();
}

void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
  // Empty encoding
}

uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
  return 0;
}

#ifndef PRODUCT
void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
  assert_cond(st != NULL);
  st->print("-- \t// MachConstantBaseNode (empty encoding)");
}
#endif

#ifndef PRODUCT
void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
  assert_cond(st != NULL && ra_ != NULL);
  Compile* C = ra_->C;

  int framesize = C->output()->frame_slots() << LogBytesPerInt;

  if (C->output()->need_stack_bang(framesize)) {
    st->print("# stack bang size=%d\n\t", framesize);
  }

  st->print("sd  fp, [sp, #%d]\n\t", - 2 * wordSize);
  st->print("sd  ra, [sp, #%d]\n\t", - wordSize);
  if (PreserveFramePointer) { st->print("sub  fp, sp, #%d\n\t", 2 * wordSize); }
  st->print("sub sp, sp, #%d\n\t", framesize);

  if (C->stub_function() == NULL && BarrierSet::barrier_set()->barrier_set_nmethod() != NULL) {
    st->print("ld  t0, [guard]\n\t");
    st->print("membar LoadLoad\n\t");
    st->print("ld  t1, [xthread, #thread_disarmed_offset]\n\t");
    st->print("beq t0, t1, skip\n\t");
    st->print("jalr #nmethod_entry_barrier_stub\n\t");
    st->print("j skip\n\t");
    st->print("guard: int\n\t");
    st->print("skip:\n\t");
  }
}
#endif

void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  assert_cond(ra_ != NULL);
  Compile* C = ra_->C;
  C2_MacroAssembler _masm(&cbuf);

  // n.b. frame size includes space for return pc and fp
  const int framesize = C->output()->frame_size_in_bytes();

  // insert a nop at the start of the prolog so we can patch in a
  // branch if we need to invalidate the method later
  {
    Assembler::IncompressibleRegion ir(&_masm);  // keep the nop as 4 bytes for patching.
    MacroAssembler::assert_alignment(__ pc());
    __ nop();  // 4 bytes
  }

  assert_cond(C != NULL);

  if (C->clinit_barrier_on_entry()) {
    assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");

    Label L_skip_barrier;

    __ mov_metadata(t1, C->method()->holder()->constant_encoding());
    __ clinit_barrier(t1, t0, &L_skip_barrier);
    __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
    __ bind(L_skip_barrier);
  }

  int bangsize = C->output()->bang_size_in_bytes();
  if (C->output()->need_stack_bang(bangsize)) {
    __ generate_stack_overflow_check(bangsize);
  }

  __ build_frame(framesize);

  if (C->stub_function() == NULL) {
    BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
    if (BarrierSet::barrier_set()->barrier_set_nmethod() != NULL) {
      // Dummy labels for just measuring the code size
      Label dummy_slow_path;
      Label dummy_continuation;
      Label dummy_guard;
      Label* slow_path = &dummy_slow_path;
      Label* continuation = &dummy_continuation;
      Label* guard = &dummy_guard;
      if (!Compile::current()->output()->in_scratch_emit_size()) {
        // Use real labels from actual stub when not emitting code for purpose of measuring its size
        C2EntryBarrierStub* stub = Compile::current()->output()->entry_barrier_table()->add_entry_barrier();
        slow_path = &stub->slow_path();
        continuation = &stub->continuation();
        guard = &stub->guard();
      }
      // In the C2 code, we move the non-hot part of nmethod entry barriers out-of-line to a stub.
      bs->nmethod_entry_barrier(&_masm, slow_path, continuation, guard);
    }
  }

  if (VerifyStackAtCalls) {
    Unimplemented();
  }

  C->output()->set_frame_complete(cbuf.insts_size());

  if (C->has_mach_constant_base_node()) {
    // NOTE: We set the table base offset here because users might be
    // emitted before MachConstantBaseNode.
    ConstantTable& constant_table = C->output()->constant_table();
    constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
  }
}

uint MachPrologNode::size(PhaseRegAlloc* ra_) const
{
  assert_cond(ra_ != NULL);
  return MachNode::size(ra_); // too many variables; just compute it
                              // the hard way
}

int MachPrologNode::reloc() const
{
  return 0;
}

//=============================================================================

#ifndef PRODUCT
void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
  assert_cond(st != NULL && ra_ != NULL);
  Compile* C = ra_->C;
  assert_cond(C != NULL);
  int framesize = C->output()->frame_size_in_bytes();

  st->print("# pop frame %d\n\t", framesize);

  if (framesize == 0) {
    st->print("ld  ra, [sp,#%d]\n\t", (2 * wordSize));
    st->print("ld  fp, [sp,#%d]\n\t", (3 * wordSize));
    st->print("add sp, sp, #%d\n\t", (2 * wordSize));
  } else {
    st->print("add  sp, sp, #%d\n\t", framesize);
    st->print("ld  ra, [sp,#%d]\n\t", - 2 * wordSize);
    st->print("ld  fp, [sp,#%d]\n\t", - wordSize);
  }

  if (do_polling() && C->is_method_compilation()) {
    st->print("# test polling word\n\t");
    st->print("ld t0, [xthread,#%d]\n\t", in_bytes(JavaThread::polling_word_offset()));
    st->print("bgtu sp, t0, #slow_path");
  }
}
#endif

void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  assert_cond(ra_ != NULL);
  Compile* C = ra_->C;
  C2_MacroAssembler _masm(&cbuf);
  assert_cond(C != NULL);
  int framesize = C->output()->frame_size_in_bytes();

  __ remove_frame(framesize);

  if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
    __ reserved_stack_check();
  }

  if (do_polling() && C->is_method_compilation()) {
    Label dummy_label;
    Label* code_stub = &dummy_label;
    if (!C->output()->in_scratch_emit_size()) {
      code_stub = &C->output()->safepoint_poll_table()->add_safepoint(__ offset());
    }
    __ relocate(relocInfo::poll_return_type);
    __ safepoint_poll(*code_stub, true /* at_return */, false /* acquire */, true /* in_nmethod */);
  }
}

uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
  assert_cond(ra_ != NULL);
  // Variable size. Determine dynamically.
  return MachNode::size(ra_);
}

int MachEpilogNode::reloc() const {
  // Return number of relocatable values contained in this instruction.
  return 1; // 1 for polling page.
}
const Pipeline * MachEpilogNode::pipeline() const {
  return MachNode::pipeline_class();
}

//=============================================================================

// Figure out which register class each belongs in: rc_int, rc_float or
// rc_stack.
enum RC { rc_bad, rc_int, rc_float, rc_vector, rc_stack };

static enum RC rc_class(OptoReg::Name reg) {

  if (reg == OptoReg::Bad) {
    return rc_bad;
  }

  // we have 30 int registers * 2 halves
  // (t0 and t1 are omitted)
  int slots_of_int_registers = Register::max_slots_per_register * (Register::number_of_registers - 2);
  if (reg < slots_of_int_registers) {
    return rc_int;
  }

  // we have 32 float register * 2 halves
  int slots_of_float_registers = FloatRegister::max_slots_per_register * FloatRegister::number_of_registers;
  if (reg < slots_of_int_registers + slots_of_float_registers) {
    return rc_float;
  }

  // we have 32 vector register * 4 halves
  int slots_of_vector_registers = VectorRegister::max_slots_per_register * VectorRegister::number_of_registers;
  if (reg < slots_of_int_registers + slots_of_float_registers + slots_of_vector_registers) {
    return rc_vector;
  }

  // Between vector regs & stack is the flags regs.
  assert(OptoReg::is_stack(reg), "blow up if spilling flags");

  return rc_stack;
}

uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream *st) const {
  assert_cond(ra_ != NULL);
  Compile* C = ra_->C;

  // Get registers to move.
  OptoReg::Name src_hi = ra_->get_reg_second(in(1));
  OptoReg::Name src_lo = ra_->get_reg_first(in(1));
  OptoReg::Name dst_hi = ra_->get_reg_second(this);
  OptoReg::Name dst_lo = ra_->get_reg_first(this);

  enum RC src_hi_rc = rc_class(src_hi);
  enum RC src_lo_rc = rc_class(src_lo);
  enum RC dst_hi_rc = rc_class(dst_hi);
  enum RC dst_lo_rc = rc_class(dst_lo);

  assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register");

  if (src_hi != OptoReg::Bad) {
    assert((src_lo & 1) == 0 && src_lo + 1 == src_hi &&
           (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi,
           "expected aligned-adjacent pairs");
  }

  if (src_lo == dst_lo && src_hi == dst_hi) {
    return 0;            // Self copy, no move.
  }

  bool is64 = (src_lo & 1) == 0 && src_lo + 1 == src_hi &&
              (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi;
  int src_offset = ra_->reg2offset(src_lo);
  int dst_offset = ra_->reg2offset(dst_lo);

  if (bottom_type()->isa_vect() != NULL) {
    uint ireg = ideal_reg();
    if (ireg == Op_VecA && cbuf) {
      C2_MacroAssembler _masm(cbuf);
      int vector_reg_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE);
      if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
        // stack to stack
        __ spill_copy_vector_stack_to_stack(src_offset, dst_offset,
                                            vector_reg_size_in_bytes);
      } else if (src_lo_rc == rc_vector && dst_lo_rc == rc_stack) {
        // vpr to stack
        __ spill(as_VectorRegister(Matcher::_regEncode[src_lo]), ra_->reg2offset(dst_lo));
      } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_vector) {
        // stack to vpr
        __ unspill(as_VectorRegister(Matcher::_regEncode[dst_lo]), ra_->reg2offset(src_lo));
      } else if (src_lo_rc == rc_vector && dst_lo_rc == rc_vector) {
        // vpr to vpr
        __ vmv1r_v(as_VectorRegister(Matcher::_regEncode[dst_lo]), as_VectorRegister(Matcher::_regEncode[src_lo]));
      } else {
        ShouldNotReachHere();
      }
    }
  } else if (cbuf != NULL) {
    C2_MacroAssembler _masm(cbuf);
    switch (src_lo_rc) {
      case rc_int:
        if (dst_lo_rc == rc_int) {  // gpr --> gpr copy
          if (!is64 && this->ideal_reg() != Op_RegI) { // zero extended for narrow oop or klass
            __ zero_extend(as_Register(Matcher::_regEncode[dst_lo]), as_Register(Matcher::_regEncode[src_lo]), 32);
          } else {
            __ mv(as_Register(Matcher::_regEncode[dst_lo]), as_Register(Matcher::_regEncode[src_lo]));
          }
        } else if (dst_lo_rc == rc_float) { // gpr --> fpr copy
          if (is64) {
            __ fmv_d_x(as_FloatRegister(Matcher::_regEncode[dst_lo]),
                       as_Register(Matcher::_regEncode[src_lo]));
          } else {
            __ fmv_w_x(as_FloatRegister(Matcher::_regEncode[dst_lo]),
                       as_Register(Matcher::_regEncode[src_lo]));
          }
        } else {                    // gpr --> stack spill
          assert(dst_lo_rc == rc_stack, "spill to bad register class");
          __ spill(as_Register(Matcher::_regEncode[src_lo]), is64, dst_offset);
        }
        break;
      case rc_float:
        if (dst_lo_rc == rc_int) {  // fpr --> gpr copy
          if (is64) {
            __ fmv_x_d(as_Register(Matcher::_regEncode[dst_lo]),
                       as_FloatRegister(Matcher::_regEncode[src_lo]));
          } else {
            __ fmv_x_w(as_Register(Matcher::_regEncode[dst_lo]),
                       as_FloatRegister(Matcher::_regEncode[src_lo]));
          }
        } else if (dst_lo_rc == rc_float) { // fpr --> fpr copy
          if (is64) {
            __ fmv_d(as_FloatRegister(Matcher::_regEncode[dst_lo]),
                     as_FloatRegister(Matcher::_regEncode[src_lo]));
          } else {
            __ fmv_s(as_FloatRegister(Matcher::_regEncode[dst_lo]),
                     as_FloatRegister(Matcher::_regEncode[src_lo]));
          }
        } else {                    // fpr --> stack spill
          assert(dst_lo_rc == rc_stack, "spill to bad register class");
          __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
                   is64, dst_offset);
        }
        break;
      case rc_stack:
        if (dst_lo_rc == rc_int) {  // stack --> gpr load
          if (this->ideal_reg() == Op_RegI) {
            __ unspill(as_Register(Matcher::_regEncode[dst_lo]), is64, src_offset);
          } else { // // zero extended for narrow oop or klass
            __ unspillu(as_Register(Matcher::_regEncode[dst_lo]), is64, src_offset);
          }
        } else if (dst_lo_rc == rc_float) { // stack --> fpr load
          __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
                     is64, src_offset);
        } else {                    // stack --> stack copy
          assert(dst_lo_rc == rc_stack, "spill to bad register class");
          if (this->ideal_reg() == Op_RegI) {
            __ unspill(t0, is64, src_offset);
          } else { // zero extended for narrow oop or klass
            __ unspillu(t0, is64, src_offset);
          }
          __ spill(t0, is64, dst_offset);
        }
        break;
      default:
        ShouldNotReachHere();
    }
  }

  if (st != NULL) {
    st->print("spill ");
    if (src_lo_rc == rc_stack) {
      st->print("[sp, #%d] -> ", src_offset);
    } else {
      st->print("%s -> ", Matcher::regName[src_lo]);
    }
    if (dst_lo_rc == rc_stack) {
      st->print("[sp, #%d]", dst_offset);
    } else {
      st->print("%s", Matcher::regName[dst_lo]);
    }
    if (bottom_type()->isa_vect() != NULL) {
      int vsize = 0;
      if (ideal_reg() == Op_VecA) {
        vsize = Matcher::scalable_vector_reg_size(T_BYTE) * 8;
      } else {
        ShouldNotReachHere();
      }
      st->print("\t# vector spill size = %d", vsize);
    } else {
      st->print("\t# spill size = %d", is64 ? 64 : 32);
    }
  }

  return 0;
}

#ifndef PRODUCT
void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
  if (ra_ == NULL) {
    st->print("N%d = SpillCopy(N%d)", _idx, in(1)->_idx);
  } else {
    implementation(NULL, ra_, false, st);
  }
}
#endif

void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  implementation(&cbuf, ra_, false, NULL);
}

uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
  return MachNode::size(ra_);
}

//=============================================================================

#ifndef PRODUCT
void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
  assert_cond(ra_ != NULL && st != NULL);
  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
  int reg = ra_->get_reg_first(this);
  st->print("add %s, sp, #%d\t# box lock",
            Matcher::regName[reg], offset);
}
#endif

void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  C2_MacroAssembler _masm(&cbuf);
  Assembler::IncompressibleRegion ir(&_masm);  // Fixed length: see BoxLockNode::size()

  assert_cond(ra_ != NULL);
  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
  int reg    = ra_->get_encode(this);

  if (is_imm_in_range(offset, 12, 0)) {
    __ addi(as_Register(reg), sp, offset);
  } else if (is_imm_in_range(offset, 32, 0)) {
    __ li32(t0, offset);
    __ add(as_Register(reg), sp, t0);
  } else {
    ShouldNotReachHere();
  }
}

uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
  // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());

  if (is_imm_in_range(offset, 12, 0)) {
    return NativeInstruction::instruction_size;
  } else {
    return 3 * NativeInstruction::instruction_size; // lui + addiw + add;
  }
}

//=============================================================================

#ifndef PRODUCT
void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
{
  assert_cond(st != NULL);
  st->print_cr("# MachUEPNode");
  if (UseCompressedClassPointers) {
    st->print_cr("\tlwu t0, [j_rarg0, oopDesc::klass_offset_in_bytes()]\t# compressed klass");
    if (CompressedKlassPointers::shift() != 0) {
      st->print_cr("\tdecode_klass_not_null t0, t0");
    }
  } else {
    st->print_cr("\tld t0, [j_rarg0, oopDesc::klass_offset_in_bytes()]\t# compressed klass");
  }
  st->print_cr("\tbeq t0, t1, ic_hit");
  st->print_cr("\tj, SharedRuntime::_ic_miss_stub\t # Inline cache check");
  st->print_cr("\tic_hit:");
}
#endif

void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
{
  // This is the unverified entry point.
  C2_MacroAssembler _masm(&cbuf);

  Label skip;
  __ cmp_klass(j_rarg0, t1, t0, t2 /* call-clobbered t2 as a tmp */, skip);
  __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
  __ bind(skip);

  // These NOPs are critical so that verified entry point is properly
  // 4 bytes aligned for patching by NativeJump::patch_verified_entry()
  __ align(NativeInstruction::instruction_size);
}

uint MachUEPNode::size(PhaseRegAlloc* ra_) const
{
  assert_cond(ra_ != NULL);
  return MachNode::size(ra_);
}

// REQUIRED EMIT CODE

//=============================================================================

// Emit exception handler code.
int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf)
{
  // la_patchable t0, #exception_blob_entry_point
  // jr (offset)t0
  // or
  // j #exception_blob_entry_point
  // Note that the code buffer's insts_mark is always relative to insts.
  // That's why we must use the macroassembler to generate a handler.
  C2_MacroAssembler _masm(&cbuf);
  address base = __ start_a_stub(size_exception_handler());
  if (base == NULL) {
    ciEnv::current()->record_failure("CodeCache is full");
    return 0;  // CodeBuffer::expand failed
  }
  int offset = __ offset();
  __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
  assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
  __ end_a_stub();
  return offset;
}

// Emit deopt handler code.
int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf)
{
  // Note that the code buffer's insts_mark is always relative to insts.
  // That's why we must use the macroassembler to generate a handler.
  C2_MacroAssembler _masm(&cbuf);
  address base = __ start_a_stub(size_deopt_handler());
  if (base == NULL) {
    ciEnv::current()->record_failure("CodeCache is full");
    return 0;  // CodeBuffer::expand failed
  }
  int offset = __ offset();

  __ auipc(ra, 0);
  __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));

  assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
  __ end_a_stub();
  return offset;

}
// REQUIRED MATCHER CODE

//=============================================================================

const bool Matcher::match_rule_supported(int opcode) {
  if (!has_match_rule(opcode)) {
    return false;
  }

  switch (opcode) {
    case Op_CacheWB:           // fall through
    case Op_CacheWBPreSync:    // fall through
    case Op_CacheWBPostSync:
      if (!VM_Version::supports_data_cache_line_flush()) {
        return false;
      }
      break;

    case Op_StrCompressedCopy: // fall through
    case Op_StrInflatedCopy:   // fall through
    case Op_CountPositives:
      return UseRVV;

    case Op_EncodeISOArray:
      return UseRVV && SpecialEncodeISOArray;

    case Op_PopCountI:
    case Op_PopCountL:
      return UsePopCountInstruction;

    case Op_RotateRight:
    case Op_RotateLeft:
    case Op_CountLeadingZerosI:
    case Op_CountLeadingZerosL:
    case Op_CountTrailingZerosI:
    case Op_CountTrailingZerosL:
      return UseZbb;
  }

  return true; // Per default match rules are supported.
}

const bool Matcher::match_rule_supported_superword(int opcode, int vlen, BasicType bt) {
  return match_rule_supported_vector(opcode, vlen, bt);
}

// Identify extra cases that we might want to provide match rules for vector nodes and
// other intrinsics guarded with vector length (vlen) and element type (bt).
const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
  if (!match_rule_supported(opcode) || !vector_size_supported(bt, vlen)) {
    return false;
  }

  return op_vec_supported(opcode);
}

const bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
  return false;
}

const bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) {
  return false;
}

const RegMask* Matcher::predicate_reg_mask(void) {
  return NULL;
}

const TypeVectMask* Matcher::predicate_reg_type(const Type* elemTy, int length) {
  return NULL;
}

// Vector calling convention not yet implemented.
const bool Matcher::supports_vector_calling_convention(void) {
  return false;
}

OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
  Unimplemented();
  return OptoRegPair(0, 0);
}

// Is this branch offset short enough that a short branch can be used?
//
// NOTE: If the platform does not provide any short branch variants, then
//       this method should return false for offset 0.
// |---label(L1)-----|
// |-----------------|
// |-----------------|----------eq: float-------------------
// |-----------------| // far_cmpD_branch   |   cmpD_branch
// |------- ---------|    feq;              |      feq;
// |-far_cmpD_branch-|    beqz done;        |      bnez L;
// |-----------------|    j L;              |
// |-----------------|    bind(done);       |
// |-----------------|--------------------------------------
// |-----------------| // so shortBrSize = br_size - 4;
// |-----------------| // so offs = offset - shortBrSize + 4;
// |---label(L2)-----|
bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
  // The passed offset is relative to address of the branch.
  int shortBrSize = br_size - 4;
  int offs = offset - shortBrSize + 4;
  return (-4096 <= offs && offs < 4096);
}

// Vector width in bytes.
const int Matcher::vector_width_in_bytes(BasicType bt) {
  if (UseRVV) {
    // The MaxVectorSize should have been set by detecting RVV max vector register size when check UseRVV.
    // MaxVectorSize == VM_Version::_initial_vector_length
    return MaxVectorSize;
  }
  return 0;
}

// Limits on vector size (number of elements) loaded into vector.
const int Matcher::max_vector_size(const BasicType bt) {
  return vector_width_in_bytes(bt) / type2aelembytes(bt);
}
const int Matcher::min_vector_size(const BasicType bt) {
  return max_vector_size(bt);
}

// Vector ideal reg.
const uint Matcher::vector_ideal_reg(int len) {
  assert(MaxVectorSize >= len, "");
  if (UseRVV) {
    return Op_VecA;
  }

  ShouldNotReachHere();
  return 0;
}

const int Matcher::scalable_vector_reg_size(const BasicType bt) {
  return Matcher::max_vector_size(bt);
}

MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* original_opnd, uint ideal_reg, bool is_temp) {
  ShouldNotReachHere(); // generic vector operands not supported
  return NULL;
}

bool Matcher::is_reg2reg_move(MachNode* m) {
  ShouldNotReachHere(); // generic vector operands not supported
  return false;
}

bool Matcher::is_generic_vector(MachOper* opnd) {
  ShouldNotReachHere(); // generic vector operands not supported
  return false;
}

// Return whether or not this register is ever used as an argument.
// This function is used on startup to build the trampoline stubs in
// generateOptoStub.  Registers not mentioned will be killed by the VM
// call in the trampoline, and arguments in those registers not be
// available to the callee.
bool Matcher::can_be_java_arg(int reg)
{
  return
    reg ==  R10_num || reg == R10_H_num ||
    reg ==  R11_num || reg == R11_H_num ||
    reg ==  R12_num || reg == R12_H_num ||
    reg ==  R13_num || reg == R13_H_num ||
    reg ==  R14_num || reg == R14_H_num ||
    reg ==  R15_num || reg == R15_H_num ||
    reg ==  R16_num || reg == R16_H_num ||
    reg ==  R17_num || reg == R17_H_num ||
    reg ==  F10_num || reg == F10_H_num ||
    reg ==  F11_num || reg == F11_H_num ||
    reg ==  F12_num || reg == F12_H_num ||
    reg ==  F13_num || reg == F13_H_num ||
    reg ==  F14_num || reg == F14_H_num ||
    reg ==  F15_num || reg == F15_H_num ||
    reg ==  F16_num || reg == F16_H_num ||
    reg ==  F17_num || reg == F17_H_num;
}

bool Matcher::is_spillable_arg(int reg)
{
  return can_be_java_arg(reg);
}

uint Matcher::int_pressure_limit()
{
  // A derived pointer is live at CallNode and then is flagged by RA
  // as a spilled LRG. Spilling heuristics(Spill-USE) explicitly skip
  // derived pointers and lastly fail to spill after reaching maximum
  // number of iterations. Lowering the default pressure threshold to
  // (_NO_SPECIAL_REG32_mask.Size() minus 1) forces CallNode to become
  // a high register pressure area of the code so that split_DEF can
  // generate DefinitionSpillCopy for the derived pointer.
  uint default_int_pressure_threshold = _NO_SPECIAL_REG32_mask.Size() - 1;
  if (!PreserveFramePointer) {
    // When PreserveFramePointer is off, frame pointer is allocatable,
    // but different from other SOC registers, it is excluded from
    // fatproj's mask because its save type is No-Save. Decrease 1 to
    // ensure high pressure at fatproj when PreserveFramePointer is off.
    // See check_pressure_at_fatproj().
    default_int_pressure_threshold--;
  }
  return (INTPRESSURE == -1) ? default_int_pressure_threshold : INTPRESSURE;
}

uint Matcher::float_pressure_limit()
{
  // _FLOAT_REG_mask is generated by adlc from the float_reg register class.
  return (FLOATPRESSURE == -1) ? _FLOAT_REG_mask.Size() : FLOATPRESSURE;
}

bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) {
  return false;
}

RegMask Matcher::divI_proj_mask() {
  ShouldNotReachHere();
  return RegMask();
}

// Register for MODI projection of divmodI.
RegMask Matcher::modI_proj_mask() {
  ShouldNotReachHere();
  return RegMask();
}

// Register for DIVL projection of divmodL.
RegMask Matcher::divL_proj_mask() {
  ShouldNotReachHere();
  return RegMask();
}

// Register for MODL projection of divmodL.
RegMask Matcher::modL_proj_mask() {
  ShouldNotReachHere();
  return RegMask();
}

const RegMask Matcher::method_handle_invoke_SP_save_mask() {
  return FP_REG_mask();
}

bool size_fits_all_mem_uses(AddPNode* addp, int shift) {
  assert_cond(addp != NULL);
  for (DUIterator_Fast imax, i = addp->fast_outs(imax); i < imax; i++) {
    Node* u = addp->fast_out(i);
    if (u != NULL && u->is_Mem()) {
      int opsize = u->as_Mem()->memory_size();
      assert(opsize > 0, "unexpected memory operand size");
      if (u->as_Mem()->memory_size() != (1 << shift)) {
        return false;
      }
    }
  }
  return true;
}

// Should the Matcher clone input 'm' of node 'n'?
bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
  assert_cond(m != NULL);
  if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con)
    mstack.push(m, Visit);           // m = ShiftCntV
    return true;
  }
  return false;
}

// Should the Matcher clone shifts on addressing modes, expecting them
// to be subsumed into complex addressing expressions or compute them
// into registers?
bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
  return clone_base_plus_offset_address(m, mstack, address_visited);
}

%}

//----------ENCODING BLOCK-----------------------------------------------------
// This block specifies the encoding classes used by the compiler to
// output byte streams.  Encoding classes are parameterized macros
// used by Machine Instruction Nodes in order to generate the bit
// encoding of the instruction.  Operands specify their base encoding
// interface with the interface keyword.  There are currently
// supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
// COND_INTER.  REG_INTER causes an operand to generate a function
// which returns its register number when queried.  CONST_INTER causes
// an operand to generate a function which returns the value of the
// constant when queried.  MEMORY_INTER causes an operand to generate
// four functions which return the Base Register, the Index Register,
// the Scale Value, and the Offset Value of the operand when queried.
// COND_INTER causes an operand to generate six functions which return
// the encoding code (ie - encoding bits for the instruction)
// associated with each basic boolean condition for a conditional
// instruction.
//
// Instructions specify two basic values for encoding.  Again, a
// function is available to check if the constant displacement is an
// oop. They use the ins_encode keyword to specify their encoding
// classes (which must be a sequence of enc_class names, and their
// parameters, specified in the encoding block), and they use the
// opcode keyword to specify, in order, their primary, secondary, and
// tertiary opcode.  Only the opcode sections which a particular
// instruction needs for encoding need to be specified.
encode %{
  // BEGIN Non-volatile memory access

  enc_class riscv_enc_li_imm(iRegIorL dst, immIorL src) %{
    C2_MacroAssembler _masm(&cbuf);
    int64_t con = (int64_t)$src$$constant;
    Register dst_reg = as_Register($dst$$reg);
    __ mv(dst_reg, con);
  %}

  enc_class riscv_enc_mov_p(iRegP dst, immP src) %{
    C2_MacroAssembler _masm(&cbuf);
    Register dst_reg = as_Register($dst$$reg);
    address con = (address)$src$$constant;
    if (con == NULL || con == (address)1) {
      ShouldNotReachHere();
    } else {
      relocInfo::relocType rtype = $src->constant_reloc();
      if (rtype == relocInfo::oop_type) {
        __ movoop(dst_reg, (jobject)con);
      } else if (rtype == relocInfo::metadata_type) {
        __ mov_metadata(dst_reg, (Metadata*)con);
      } else {
        assert(rtype == relocInfo::none, "unexpected reloc type");
        __ mv(dst_reg, $src$$constant);
      }
    }
  %}

  enc_class riscv_enc_mov_p1(iRegP dst) %{
    C2_MacroAssembler _masm(&cbuf);
    Register dst_reg = as_Register($dst$$reg);
    __ mv(dst_reg, 1);
  %}

  enc_class riscv_enc_mov_byte_map_base(iRegP dst) %{
    C2_MacroAssembler _masm(&cbuf);
    __ load_byte_map_base($dst$$Register);
  %}

  enc_class riscv_enc_mov_n(iRegN dst, immN src) %{
    C2_MacroAssembler _masm(&cbuf);
    Register dst_reg = as_Register($dst$$reg);
    address con = (address)$src$$constant;
    if (con == NULL) {
      ShouldNotReachHere();
    } else {
      relocInfo::relocType rtype = $src->constant_reloc();
      assert(rtype == relocInfo::oop_type, "unexpected reloc type");
      __ set_narrow_oop(dst_reg, (jobject)con);
    }
  %}

  enc_class riscv_enc_mov_zero(iRegNorP dst) %{
    C2_MacroAssembler _masm(&cbuf);
    Register dst_reg = as_Register($dst$$reg);
    __ mv(dst_reg, zr);
  %}

  enc_class riscv_enc_mov_nk(iRegN dst, immNKlass src) %{
    C2_MacroAssembler _masm(&cbuf);
    Register dst_reg = as_Register($dst$$reg);
    address con = (address)$src$$constant;
    if (con == NULL) {
      ShouldNotReachHere();
    } else {
      relocInfo::relocType rtype = $src->constant_reloc();
      assert(rtype == relocInfo::metadata_type, "unexpected reloc type");
      __ set_narrow_klass(dst_reg, (Klass *)con);
    }
  %}

  enc_class riscv_enc_cmpxchgw(iRegINoSp res, memory mem, iRegI oldval, iRegI newval) %{
    C2_MacroAssembler _masm(&cbuf);
    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
               /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
               /*result as bool*/ true);
  %}

  enc_class riscv_enc_cmpxchgn(iRegINoSp res, memory mem, iRegI oldval, iRegI newval) %{
    C2_MacroAssembler _masm(&cbuf);
    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32,
               /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
               /*result as bool*/ true);
  %}

  enc_class riscv_enc_cmpxchg(iRegINoSp res, memory mem, iRegL oldval, iRegL newval) %{
    C2_MacroAssembler _masm(&cbuf);
    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
               /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
               /*result as bool*/ true);
  %}

  enc_class riscv_enc_cmpxchgw_acq(iRegINoSp res, memory mem, iRegI oldval, iRegI newval) %{
    C2_MacroAssembler _masm(&cbuf);
    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
               /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
               /*result as bool*/ true);
  %}

  enc_class riscv_enc_cmpxchgn_acq(iRegINoSp res, memory mem, iRegI oldval, iRegI newval) %{
    C2_MacroAssembler _masm(&cbuf);
    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32,
               /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
               /*result as bool*/ true);
  %}

  enc_class riscv_enc_cmpxchg_acq(iRegINoSp res, memory mem, iRegL oldval, iRegL newval) %{
    C2_MacroAssembler _masm(&cbuf);
    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
               /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
               /*result as bool*/ true);
  %}

  // compare and branch instruction encodings

  enc_class riscv_enc_j(label lbl) %{
    C2_MacroAssembler _masm(&cbuf);
    Label* L = $lbl$$label;
    __ j(*L);
  %}

  enc_class riscv_enc_far_cmpULtGe_imm0_branch(cmpOpULtGe cmp, iRegIorL op1, label lbl) %{
    C2_MacroAssembler _masm(&cbuf);
    Label* L = $lbl$$label;
    switch ($cmp$$cmpcode) {
      case(BoolTest::ge):
        __ j(*L);
        break;
      case(BoolTest::lt):
        break;
      default:
        Unimplemented();
    }
  %}

  // call instruction encodings

  enc_class riscv_enc_partial_subtype_check(iRegP sub, iRegP super, iRegP temp, iRegP result) %{
    Register sub_reg = as_Register($sub$$reg);
    Register super_reg = as_Register($super$$reg);
    Register temp_reg = as_Register($temp$$reg);
    Register result_reg = as_Register($result$$reg);
    Register cr_reg = t1;

    Label miss;
    Label done;
    C2_MacroAssembler _masm(&cbuf);
    __ check_klass_subtype_slow_path(sub_reg, super_reg, temp_reg, result_reg,
                                     NULL, &miss);
    if ($primary) {
      __ mv(result_reg, zr);
    } else {
      __ mv(cr_reg, zr);
      __ j(done);
    }

    __ bind(miss);
    if (!$primary) {
      __ mv(cr_reg, 1);
    }

    __ bind(done);
  %}

  enc_class riscv_enc_java_static_call(method meth) %{
    C2_MacroAssembler _masm(&cbuf);
    Assembler::IncompressibleRegion ir(&_masm);  // Fixed length: see ret_addr_offset

    address addr = (address)$meth$$method;
    address call = NULL;
    assert_cond(addr != NULL);
    if (!_method) {
      // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
      call = __ trampoline_call(Address(addr, relocInfo::runtime_call_type));
      if (call == NULL) {
        ciEnv::current()->record_failure("CodeCache is full");
        return;
      }
    } else {
      int method_index = resolved_method_index(cbuf);
      RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
                                                  : static_call_Relocation::spec(method_index);
      call = __ trampoline_call(Address(addr, rspec));
      if (call == NULL) {
        ciEnv::current()->record_failure("CodeCache is full");
        return;
      }

      if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
        // Calls of the same statically bound method can share
        // a stub to the interpreter.
        cbuf.shared_stub_to_interp_for(_method, call - cbuf.insts_begin());
      } else {
        // Emit stub for static call
        address stub = CompiledStaticCall::emit_to_interp_stub(cbuf, call);
        if (stub == NULL) {
          ciEnv::current()->record_failure("CodeCache is full");
          return;
        }
      }
    }

    __ post_call_nop();
  %}

  enc_class riscv_enc_java_dynamic_call(method meth) %{
    C2_MacroAssembler _masm(&cbuf);
    Assembler::IncompressibleRegion ir(&_masm);  // Fixed length: see ret_addr_offset
    int method_index = resolved_method_index(cbuf);
    address call = __ ic_call((address)$meth$$method, method_index);
    if (call == NULL) {
      ciEnv::current()->record_failure("CodeCache is full");
      return;
    }

    __ post_call_nop();
  %}

  enc_class riscv_enc_call_epilog() %{
    C2_MacroAssembler _masm(&cbuf);
    if (VerifyStackAtCalls) {
      // Check that stack depth is unchanged: find majik cookie on stack
      __ call_Unimplemented();
    }
  %}

  enc_class riscv_enc_java_to_runtime(method meth) %{
    C2_MacroAssembler _masm(&cbuf);
    Assembler::IncompressibleRegion ir(&_masm);  // Fixed length: see ret_addr_offset

    // some calls to generated routines (arraycopy code) are scheduled
    // by C2 as runtime calls. if so we can call them using a jr (they
    // will be in a reachable segment) otherwise we have to use a jalr
    // which loads the absolute address into a register.
    address entry = (address)$meth$$method;
    CodeBlob *cb = CodeCache::find_blob(entry);
    if (cb != NULL) {
      address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
      if (call == NULL) {
        ciEnv::current()->record_failure("CodeCache is full");
        return;
      }
      __ post_call_nop();
    } else {
      Label retaddr;
      __ la(t1, retaddr);
      __ la(t0, RuntimeAddress(entry));
      // Leave a breadcrumb for JavaFrameAnchor::capture_last_Java_pc()
      __ addi(sp, sp, -2 * wordSize);
      __ sd(t1, Address(sp, wordSize));
      __ jalr(t0);
      __ bind(retaddr);
      __ post_call_nop();
      __ addi(sp, sp, 2 * wordSize);
    }
  %}

  // using the cr register as the bool result: 0 for success; others failed.
  enc_class riscv_enc_fast_lock(iRegP object, iRegP box, iRegPNoSp tmp1, iRegPNoSp tmp2) %{
    C2_MacroAssembler _masm(&cbuf);
    Register flag = t1;
    Register oop = as_Register($object$$reg);
    Register box = as_Register($box$$reg);
    Register disp_hdr = as_Register($tmp1$$reg);
    Register tmp = as_Register($tmp2$$reg);
    Label cont;
    Label object_has_monitor;
    Label no_count;

    assert_different_registers(oop, box, tmp, disp_hdr, t0);

    // Load markWord from object into displaced_header.
    __ ld(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes()));

    if (DiagnoseSyncOnValueBasedClasses != 0) {
      __ load_klass(flag, oop);
      __ lwu(flag, Address(flag, Klass::access_flags_offset()));
      __ andi(flag, flag, JVM_ACC_IS_VALUE_BASED_CLASS, tmp /* tmp */);
      __ bnez(flag, cont, true /* is_far */);
    }

    // Check for existing monitor
    __ andi(t0, disp_hdr, markWord::monitor_value);
    __ bnez(t0, object_has_monitor);

    if (!UseHeavyMonitors) {
      // Set tmp to be (markWord of object | UNLOCK_VALUE).
      __ ori(tmp, disp_hdr, markWord::unlocked_value);

      // Initialize the box. (Must happen before we update the object mark!)
      __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));

      // Compare object markWord with an unlocked value (tmp) and if
      // equal exchange the stack address of our box with object markWord.
      // On failure disp_hdr contains the possibly locked markWord.
      __ cmpxchg(/*memory address*/oop, /*expected value*/tmp, /*new value*/box, Assembler::int64, Assembler::aq,
                 Assembler::rl, /*result*/disp_hdr);
      __ mv(flag, zr);
      __ beq(disp_hdr, tmp, cont); // prepare zero flag and goto cont if we won the cas

      assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");

      // If the compare-and-exchange succeeded, then we found an unlocked
      // object, will have now locked it will continue at label cont
      // We did not see an unlocked object so try the fast recursive case.

      // Check if the owner is self by comparing the value in the
      // markWord of object (disp_hdr) with the stack pointer.
      __ sub(disp_hdr, disp_hdr, sp);
      __ mv(tmp, (intptr_t) (~(os::vm_page_size()-1) | (uintptr_t)markWord::lock_mask_in_place));
      // If (mark & lock_mask) == 0 and mark - sp < page_size, we are stack-locking and goto cont,
      // hence we can store 0 as the displaced header in the box, which indicates that it is a
      // recursive lock.
      __ andr(tmp/*==0?*/, disp_hdr, tmp);
      __ sd(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes()));
      __ mv(flag, tmp); // we can use the value of tmp as the result here
    } else {
      __ mv(flag, 1); // Set non-zero flag to indicate 'failure' -> take slow-path
    }

    __ j(cont);

    // Handle existing monitor.
    __ bind(object_has_monitor);
    // The object's monitor m is unlocked iff m->owner == NULL,
    // otherwise m->owner may contain a thread or a stack address.
    //
    // Try to CAS m->owner from NULL to current thread.
    __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes() - markWord::monitor_value));
    __ cmpxchg(/*memory address*/tmp, /*expected value*/zr, /*new value*/xthread, Assembler::int64, Assembler::aq,
             Assembler::rl, /*result*/flag); // cas succeeds if flag == zr(expected)

    // Store a non-null value into the box to avoid looking like a re-entrant
    // lock. The fast-path monitor unlock code checks for
    // markWord::monitor_value so use markWord::unused_mark which has the
    // relevant bit set, and also matches ObjectSynchronizer::slow_enter.
    __ mv(tmp, (address)markWord::unused_mark().value());
    __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));

    __ beqz(flag, cont); // CAS success means locking succeeded

    __ bne(flag, xthread, cont); // Check for recursive locking

    // Recursive lock case
    __ mv(flag, zr);
    __ increment(Address(disp_hdr, ObjectMonitor::recursions_offset_in_bytes() - markWord::monitor_value), 1, t0, tmp);

    __ bind(cont);

    __ bnez(flag, no_count);

    __ increment(Address(xthread, JavaThread::held_monitor_count_offset()), 1, t0, tmp);

    __ bind(no_count);
  %}

  // using cr flag to indicate the fast_unlock result: 0 for success; others failed.
  enc_class riscv_enc_fast_unlock(iRegP object, iRegP box, iRegPNoSp tmp1, iRegPNoSp tmp2) %{
    C2_MacroAssembler _masm(&cbuf);
    Register flag = t1;
    Register oop = as_Register($object$$reg);
    Register box = as_Register($box$$reg);
    Register disp_hdr = as_Register($tmp1$$reg);
    Register tmp = as_Register($tmp2$$reg);
    Label cont;
    Label object_has_monitor;
    Label no_count;

    assert_different_registers(oop, box, tmp, disp_hdr, flag);

    if (!UseHeavyMonitors) {
      // Find the lock address and load the displaced header from the stack.
      __ ld(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));

      // If the displaced header is 0, we have a recursive unlock.
      __ mv(flag, disp_hdr);
      __ beqz(disp_hdr, cont);
    }

    // Handle existing monitor.
    __ ld(tmp, Address(oop, oopDesc::mark_offset_in_bytes()));
    __ andi(t0, tmp, markWord::monitor_value);
    __ bnez(t0, object_has_monitor);

    if (!UseHeavyMonitors) {
      // Check if it is still a light weight lock, this is true if we
      // see the stack address of the basicLock in the markWord of the
      // object.

      __ cmpxchg(/*memory address*/oop, /*expected value*/box, /*new value*/disp_hdr, Assembler::int64, Assembler::relaxed,
                 Assembler::rl, /*result*/tmp);
      __ xorr(flag, box, tmp); // box == tmp if cas succeeds
    } else {
      __ mv(flag, 1); // Set non-zero flag to indicate 'failure' -> take slow path
    }
    __ j(cont);

    assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");

    // Handle existing monitor.
    __ bind(object_has_monitor);
    STATIC_ASSERT(markWord::monitor_value <= INT_MAX);
    __ add(tmp, tmp, -(int)markWord::monitor_value); // monitor
    __ ld(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));

    Label notRecursive;
    __ beqz(disp_hdr, notRecursive); // Will be 0 if not recursive.

    // Recursive lock
    __ addi(disp_hdr, disp_hdr, -1);
    __ sd(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
    __ mv(flag, zr);
    __ j(cont);

    __ bind(notRecursive);
    __ ld(flag, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes()));
    __ ld(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes()));
    __ orr(flag, flag, disp_hdr); // Will be 0 if both are 0.
    __ bnez(flag, cont);
    // need a release store here
    __ la(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
    __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
    __ sd(zr, Address(tmp)); // set unowned

    __ bind(cont);

    __ bnez(flag, no_count);

    __ decrement(Address(xthread, JavaThread::held_monitor_count_offset()), 1, t0, tmp);

    __ bind(no_count);
  %}

  // arithmetic encodings

  enc_class riscv_enc_divw(iRegI dst, iRegI src1, iRegI src2) %{
    C2_MacroAssembler _masm(&cbuf);
    Register dst_reg = as_Register($dst$$reg);
    Register src1_reg = as_Register($src1$$reg);
    Register src2_reg = as_Register($src2$$reg);
    __ corrected_idivl(dst_reg, src1_reg, src2_reg, false);
  %}

  enc_class riscv_enc_div(iRegI dst, iRegI src1, iRegI src2) %{
    C2_MacroAssembler _masm(&cbuf);
    Register dst_reg = as_Register($dst$$reg);
    Register src1_reg = as_Register($src1$$reg);
    Register src2_reg = as_Register($src2$$reg);
    __ corrected_idivq(dst_reg, src1_reg, src2_reg, false);
  %}

  enc_class riscv_enc_modw(iRegI dst, iRegI src1, iRegI src2) %{
    C2_MacroAssembler _masm(&cbuf);
    Register dst_reg = as_Register($dst$$reg);
    Register src1_reg = as_Register($src1$$reg);
    Register src2_reg = as_Register($src2$$reg);
    __ corrected_idivl(dst_reg, src1_reg, src2_reg, true);
  %}

  enc_class riscv_enc_mod(iRegI dst, iRegI src1, iRegI src2) %{
    C2_MacroAssembler _masm(&cbuf);
    Register dst_reg = as_Register($dst$$reg);
    Register src1_reg = as_Register($src1$$reg);
    Register src2_reg = as_Register($src2$$reg);
    __ corrected_idivq(dst_reg, src1_reg, src2_reg, true);
  %}

  enc_class riscv_enc_tail_call(iRegP jump_target) %{
    C2_MacroAssembler _masm(&cbuf);
    Register target_reg = as_Register($jump_target$$reg);
    __ jr(target_reg);
  %}

  enc_class riscv_enc_tail_jmp(iRegP jump_target) %{
    C2_MacroAssembler _masm(&cbuf);
    Register target_reg = as_Register($jump_target$$reg);
    // exception oop should be in x10
    // ret addr has been popped into ra
    // callee expects it in x13
    __ mv(x13, ra);
    __ jr(target_reg);
  %}

  enc_class riscv_enc_rethrow() %{
    C2_MacroAssembler _masm(&cbuf);
    __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub()));
  %}

  enc_class riscv_enc_ret() %{
    C2_MacroAssembler _masm(&cbuf);
    __ ret();
  %}

%}

//----------FRAME--------------------------------------------------------------
// Definition of frame structure and management information.
//
//  S T A C K   L A Y O U T    Allocators stack-slot number
//                             |   (to get allocators register number
//  G  Owned by    |        |  v    add OptoReg::stack0())
//  r   CALLER     |        |
//  o     |        +--------+      pad to even-align allocators stack-slot
//  w     V        |  pad0  |        numbers; owned by CALLER
//  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
//  h     ^        |   in   |  5
//        |        |  args  |  4   Holes in incoming args owned by SELF
//  |     |        |        |  3
//  |     |        +--------+
//  V     |        | old out|      Empty on Intel, window on Sparc
//        |    old |preserve|      Must be even aligned.
//        |     SP-+--------+----> Matcher::_old_SP, even aligned
//        |        |   in   |  3   area for Intel ret address
//     Owned by    |preserve|      Empty on Sparc.
//       SELF      +--------+
//        |        |  pad2  |  2   pad to align old SP
//        |        +--------+  1
//        |        | locks  |  0
//        |        +--------+----> OptoReg::stack0(), even aligned
//        |        |  pad1  | 11   pad to align new SP
//        |        +--------+
//        |        |        | 10
//        |        | spills |  9   spills
//        V        |        |  8   (pad0 slot for callee)
//      -----------+--------+----> Matcher::_out_arg_limit, unaligned
//        ^        |  out   |  7
//        |        |  args  |  6   Holes in outgoing args owned by CALLEE
//     Owned by    +--------+
//      CALLEE     | new out|  6   Empty on Intel, window on Sparc
//        |    new |preserve|      Must be even-aligned.
//        |     SP-+--------+----> Matcher::_new_SP, even aligned
//        |        |        |
//
// Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
//         known from SELF's arguments and the Java calling convention.
//         Region 6-7 is determined per call site.
// Note 2: If the calling convention leaves holes in the incoming argument
//         area, those holes are owned by SELF.  Holes in the outgoing area
//         are owned by the CALLEE.  Holes should not be necessary in the
//         incoming area, as the Java calling convention is completely under
//         the control of the AD file.  Doubles can be sorted and packed to
//         avoid holes.  Holes in the outgoing arguments may be necessary for
//         varargs C calling conventions.
// Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
//         even aligned with pad0 as needed.
//         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
//           (the latter is true on Intel but is it false on RISCV?)
//         region 6-11 is even aligned; it may be padded out more so that
//         the region from SP to FP meets the minimum stack alignment.
// Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
//         alignment.  Region 11, pad1, may be dynamically extended so that
//         SP meets the minimum alignment.

frame %{
  // These three registers define part of the calling convention
  // between compiled code and the interpreter.

  // Inline Cache Register or methodOop for I2C.
  inline_cache_reg(R31);

  // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
  cisc_spilling_operand_name(indOffset);

  // Number of stack slots consumed by locking an object
  // generate Compile::sync_stack_slots
  // VMRegImpl::slots_per_word = wordSize / stack_slot_size = 8 / 4 = 2
  sync_stack_slots(1 * VMRegImpl::slots_per_word);

  // Compiled code's Frame Pointer
  frame_pointer(R2);

  // Interpreter stores its frame pointer in a register which is
  // stored to the stack by I2CAdaptors.
  // I2CAdaptors convert from interpreted java to compiled java.
  interpreter_frame_pointer(R8);

  // Stack alignment requirement
  stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)

  // Number of outgoing stack slots killed above the out_preserve_stack_slots
  // for calls to C.  Supports the var-args backing area for register parms.
  varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes / BytesPerInt);

  // The after-PROLOG location of the return address.  Location of
  // return address specifies a type (REG or STACK) and a number
  // representing the register number (i.e. - use a register name) or
  // stack slot.
  // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
  // Otherwise, it is above the locks and verification slot and alignment word
  // TODO this may well be correct but need to check why that - 2 is there
  // ppc port uses 0 but we definitely need to allow for fixed_slots
  // which folds in the space used for monitors
  return_addr(STACK - 2 +
              align_up((Compile::current()->in_preserve_stack_slots() +
                        Compile::current()->fixed_slots()),
                       stack_alignment_in_slots()));

  // Location of compiled Java return values.  Same as C for now.
  return_value
  %{
    assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
           "only return normal values");

    static const int lo[Op_RegL + 1] = { // enum name
      0,                                 // Op_Node
      0,                                 // Op_Set
      R10_num,                           // Op_RegN
      R10_num,                           // Op_RegI
      R10_num,                           // Op_RegP
      F10_num,                           // Op_RegF
      F10_num,                           // Op_RegD
      R10_num                            // Op_RegL
    };

    static const int hi[Op_RegL + 1] = { // enum name
      0,                                 // Op_Node
      0,                                 // Op_Set
      OptoReg::Bad,                      // Op_RegN
      OptoReg::Bad,                      // Op_RegI
      R10_H_num,                         // Op_RegP
      OptoReg::Bad,                      // Op_RegF
      F10_H_num,                         // Op_RegD
      R10_H_num                          // Op_RegL
    };

    return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
  %}
%}

//----------ATTRIBUTES---------------------------------------------------------
//----------Operand Attributes-------------------------------------------------
op_attrib op_cost(1);        // Required cost attribute

//----------Instruction Attributes---------------------------------------------
ins_attrib ins_cost(DEFAULT_COST); // Required cost attribute
ins_attrib ins_size(32);        // Required size attribute (in bits)
ins_attrib ins_short_branch(0); // Required flag: is this instruction
                                // a non-matching short branch variant
                                // of some long branch?
ins_attrib ins_alignment(4);    // Required alignment attribute (must
                                // be a power of 2) specifies the
                                // alignment that some part of the
                                // instruction (not necessarily the
                                // start) requires.  If > 1, a
                                // compute_padding() function must be
                                // provided for the instruction

//----------OPERANDS-----------------------------------------------------------
// Operand definitions must precede instruction definitions for correct parsing
// in the ADLC because operands constitute user defined types which are used in
// instruction definitions.

//----------Simple Operands----------------------------------------------------

// Integer operands 32 bit
// 32 bit immediate
operand immI()
%{
  match(ConI);

  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

// 32 bit zero
operand immI0()
%{
  predicate(n->get_int() == 0);
  match(ConI);

  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

// 32 bit unit increment
operand immI_1()
%{
  predicate(n->get_int() == 1);
  match(ConI);

  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

// 32 bit unit decrement
operand immI_M1()
%{
  predicate(n->get_int() == -1);
  match(ConI);

  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

// Unsigned Integer Immediate:  6-bit int, greater than 32
operand uimmI6_ge32() %{
  predicate(((unsigned int)(n->get_int()) < 64) && (n->get_int() >= 32));
  match(ConI);
  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

operand immI_le_4()
%{
  predicate(n->get_int() <= 4);
  match(ConI);

  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

operand immI_16()
%{
  predicate(n->get_int() == 16);
  match(ConI);
  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

operand immI_24()
%{
  predicate(n->get_int() == 24);
  match(ConI);
  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

operand immI_31()
%{
  predicate(n->get_int() == 31);
  match(ConI);

  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

operand immI_63()
%{
  predicate(n->get_int() == 63);
  match(ConI);

  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

// 32 bit integer valid for add immediate
operand immIAdd()
%{
  predicate(Assembler::operand_valid_for_add_immediate((int64_t)n->get_int()));
  match(ConI);
  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

// 32 bit integer valid for sub immediate
operand immISub()
%{
  predicate(Assembler::operand_valid_for_add_immediate(-(int64_t)n->get_int()));
  match(ConI);
  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

// 5 bit signed value.
operand immI5()
%{
  predicate(n->get_int() <= 15 && n->get_int() >= -16);
  match(ConI);

  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

// 5 bit signed value (simm5)
operand immL5()
%{
  predicate(n->get_long() <= 15 && n->get_long() >= -16);
  match(ConL);

  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

// Integer operands 64 bit
// 64 bit immediate
operand immL()
%{
  match(ConL);

  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

// 64 bit zero
operand immL0()
%{
  predicate(n->get_long() == 0);
  match(ConL);

  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

// Pointer operands
// Pointer Immediate
operand immP()
%{
  match(ConP);

  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

// NULL Pointer Immediate
operand immP0()
%{
  predicate(n->get_ptr() == 0);
  match(ConP);

  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

// Pointer Immediate One
// this is used in object initialization (initial object header)
operand immP_1()
%{
  predicate(n->get_ptr() == 1);
  match(ConP);

  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

// Card Table Byte Map Base
operand immByteMapBase()
%{
  // Get base of card map
  predicate(BarrierSet::barrier_set()->is_a(BarrierSet::CardTableBarrierSet) &&
            (CardTable::CardValue*)n->get_ptr() ==
             ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base());
  match(ConP);

  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

// Int Immediate: low 16-bit mask
operand immI_16bits()
%{
  predicate(n->get_int() == 0xFFFF);
  match(ConI);
  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

operand immIpowerOf2() %{
  predicate(is_power_of_2((juint)(n->get_int())));
  match(ConI);
  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

// Long Immediate: low 32-bit mask
operand immL_32bits()
%{
  predicate(n->get_long() == 0xFFFFFFFFL);
  match(ConL);
  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

// 64 bit unit decrement
operand immL_M1()
%{
  predicate(n->get_long() == -1);
  match(ConL);

  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

// 32 bit offset of pc in thread anchor

operand immL_pc_off()
%{
  predicate(n->get_long() == in_bytes(JavaThread::frame_anchor_offset()) +
                             in_bytes(JavaFrameAnchor::last_Java_pc_offset()));
  match(ConL);

  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

// 64 bit integer valid for add immediate
operand immLAdd()
%{
  predicate(Assembler::operand_valid_for_add_immediate(n->get_long()));
  match(ConL);
  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

// 64 bit integer valid for sub immediate
operand immLSub()
%{
  predicate(Assembler::operand_valid_for_add_immediate(-(n->get_long())));
  match(ConL);
  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

// Narrow pointer operands
// Narrow Pointer Immediate
operand immN()
%{
  match(ConN);

  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

// Narrow NULL Pointer Immediate
operand immN0()
%{
  predicate(n->get_narrowcon() == 0);
  match(ConN);

  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

operand immNKlass()
%{
  match(ConNKlass);

  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

// Float and Double operands
// Double Immediate
operand immD()
%{
  match(ConD);
  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

// Double Immediate: +0.0d
operand immD0()
%{
  predicate(jlong_cast(n->getd()) == 0);
  match(ConD);

  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

// Float Immediate
operand immF()
%{
  match(ConF);
  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

// Float Immediate: +0.0f.
operand immF0()
%{
  predicate(jint_cast(n->getf()) == 0);
  match(ConF);

  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

operand immIOffset()
%{
  predicate(is_imm_in_range(n->get_int(), 12, 0));
  match(ConI);
  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

operand immLOffset()
%{
  predicate(is_imm_in_range(n->get_long(), 12, 0));
  match(ConL);
  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

// Scale values
operand immIScale()
%{
  predicate(1 <= n->get_int() && (n->get_int() <= 3));
  match(ConI);

  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

// Integer 32 bit Register Operands
operand iRegI()
%{
  constraint(ALLOC_IN_RC(any_reg32));
  match(RegI);
  match(iRegINoSp);
  op_cost(0);
  format %{ %}
  interface(REG_INTER);
%}

// Integer 32 bit Register not Special
operand iRegINoSp()
%{
  constraint(ALLOC_IN_RC(no_special_reg32));
  match(RegI);
  op_cost(0);
  format %{ %}
  interface(REG_INTER);
%}

// Register R10 only
operand iRegI_R10()
%{
  constraint(ALLOC_IN_RC(int_r10_reg));
  match(RegI);
  match(iRegINoSp);
  op_cost(0);
  format %{ %}
  interface(REG_INTER);
%}

// Register R12 only
operand iRegI_R12()
%{
  constraint(ALLOC_IN_RC(int_r12_reg));
  match(RegI);
  match(iRegINoSp);
  op_cost(0);
  format %{ %}
  interface(REG_INTER);
%}

// Register R13 only
operand iRegI_R13()
%{
  constraint(ALLOC_IN_RC(int_r13_reg));
  match(RegI);
  match(iRegINoSp);
  op_cost(0);
  format %{ %}
  interface(REG_INTER);
%}

// Register R14 only
operand iRegI_R14()
%{
  constraint(ALLOC_IN_RC(int_r14_reg));
  match(RegI);
  match(iRegINoSp);
  op_cost(0);
  format %{ %}
  interface(REG_INTER);
%}

// Integer 64 bit Register Operands
operand iRegL()
%{
  constraint(ALLOC_IN_RC(any_reg));
  match(RegL);
  match(iRegLNoSp);
  op_cost(0);
  format %{ %}
  interface(REG_INTER);
%}

// Integer 64 bit Register not Special
operand iRegLNoSp()
%{
  constraint(ALLOC_IN_RC(no_special_reg));
  match(RegL);
  match(iRegL_R10);
  format %{ %}
  interface(REG_INTER);
%}

// Long 64 bit Register R28 only
operand iRegL_R28()
%{
  constraint(ALLOC_IN_RC(r28_reg));
  match(RegL);
  match(iRegLNoSp);
  op_cost(0);
  format %{ %}
  interface(REG_INTER);
%}

// Long 64 bit Register R29 only
operand iRegL_R29()
%{
  constraint(ALLOC_IN_RC(r29_reg));
  match(RegL);
  match(iRegLNoSp);
  op_cost(0);
  format %{ %}
  interface(REG_INTER);
%}

// Long 64 bit Register R30 only
operand iRegL_R30()
%{
  constraint(ALLOC_IN_RC(r30_reg));
  match(RegL);
  match(iRegLNoSp);
  op_cost(0);
  format %{ %}
  interface(REG_INTER);
%}

// Pointer Register Operands
// Pointer Register
operand iRegP()
%{
  constraint(ALLOC_IN_RC(ptr_reg));
  match(RegP);
  match(iRegPNoSp);
  match(iRegP_R10);
  match(iRegP_R15);
  match(javaThread_RegP);
  op_cost(0);
  format %{ %}
  interface(REG_INTER);
%}

// Pointer 64 bit Register not Special
operand iRegPNoSp()
%{
  constraint(ALLOC_IN_RC(no_special_ptr_reg));
  match(RegP);
  op_cost(0);
  format %{ %}
  interface(REG_INTER);
%}

operand iRegP_R10()
%{
  constraint(ALLOC_IN_RC(r10_reg));
  match(RegP);
  // match(iRegP);
  match(iRegPNoSp);
  op_cost(0);
  format %{ %}
  interface(REG_INTER);
%}

// Pointer 64 bit Register R11 only
operand iRegP_R11()
%{
  constraint(ALLOC_IN_RC(r11_reg));
  match(RegP);
  match(iRegPNoSp);
  op_cost(0);
  format %{ %}
  interface(REG_INTER);
%}

operand iRegP_R12()
%{
  constraint(ALLOC_IN_RC(r12_reg));
  match(RegP);
  // match(iRegP);
  match(iRegPNoSp);
  op_cost(0);
  format %{ %}
  interface(REG_INTER);
%}

// Pointer 64 bit Register R13 only
operand iRegP_R13()
%{
  constraint(ALLOC_IN_RC(r13_reg));
  match(RegP);
  match(iRegPNoSp);
  op_cost(0);
  format %{ %}
  interface(REG_INTER);
%}

operand iRegP_R14()
%{
  constraint(ALLOC_IN_RC(r14_reg));
  match(RegP);
  // match(iRegP);
  match(iRegPNoSp);
  op_cost(0);
  format %{ %}
  interface(REG_INTER);
%}

operand iRegP_R15()
%{
  constraint(ALLOC_IN_RC(r15_reg));
  match(RegP);
  // match(iRegP);
  match(iRegPNoSp);
  op_cost(0);
  format %{ %}
  interface(REG_INTER);
%}

operand iRegP_R16()
%{
  constraint(ALLOC_IN_RC(r16_reg));
  match(RegP);
  // match(iRegP);
  match(iRegPNoSp);
  op_cost(0);
  format %{ %}
  interface(REG_INTER);
%}

// Pointer 64 bit Register R28 only
operand iRegP_R28()
%{
  constraint(ALLOC_IN_RC(r28_reg));
  match(RegP);
  match(iRegPNoSp);
  op_cost(0);
  format %{ %}
  interface(REG_INTER);
%}

// Pointer 64 bit Register R30 only
operand iRegP_R30()
%{
  constraint(ALLOC_IN_RC(r30_reg));
  match(RegP);
  match(iRegPNoSp);
  op_cost(0);
  format %{ %}
  interface(REG_INTER);
%}

// Pointer 64 bit Register R31 only
operand iRegP_R31()
%{
  constraint(ALLOC_IN_RC(r31_reg));
  match(RegP);
  match(iRegPNoSp);
  op_cost(0);
  format %{ %}
  interface(REG_INTER);
%}

// Pointer Register Operands
// Narrow Pointer Register
operand iRegN()
%{
  constraint(ALLOC_IN_RC(any_reg32));
  match(RegN);
  match(iRegNNoSp);
  op_cost(0);
  format %{ %}
  interface(REG_INTER);
%}

// Integer 64 bit Register not Special
operand iRegNNoSp()
%{
  constraint(ALLOC_IN_RC(no_special_reg32));
  match(RegN);
  op_cost(0);
  format %{ %}
  interface(REG_INTER);
%}

// heap base register -- used for encoding immN0
operand iRegIHeapbase()
%{
  constraint(ALLOC_IN_RC(heapbase_reg));
  match(RegI);
  op_cost(0);
  format %{ %}
  interface(REG_INTER);
%}

// Long 64 bit Register R10 only
operand iRegL_R10()
%{
  constraint(ALLOC_IN_RC(r10_reg));
  match(RegL);
  match(iRegLNoSp);
  op_cost(0);
  format %{ %}
  interface(REG_INTER);
%}

// Float Register
// Float register operands
operand fRegF()
%{
  constraint(ALLOC_IN_RC(float_reg));
  match(RegF);

  op_cost(0);
  format %{ %}
  interface(REG_INTER);
%}

// Double Register
// Double register operands
operand fRegD()
%{
  constraint(ALLOC_IN_RC(double_reg));
  match(RegD);

  op_cost(0);
  format %{ %}
  interface(REG_INTER);
%}

// Generic vector class. This will be used for
// all vector operands.
operand vReg()
%{
  constraint(ALLOC_IN_RC(vectora_reg));
  match(VecA);
  op_cost(0);
  format %{ %}
  interface(REG_INTER);
%}

operand vReg_V1()
%{
  constraint(ALLOC_IN_RC(v1_reg));
  match(VecA);
  match(vReg);
  op_cost(0);
  format %{ %}
  interface(REG_INTER);
%}

operand vReg_V2()
%{
  constraint(ALLOC_IN_RC(v2_reg));
  match(VecA);
  match(vReg);
  op_cost(0);
  format %{ %}
  interface(REG_INTER);
%}

operand vReg_V3()
%{
  constraint(ALLOC_IN_RC(v3_reg));
  match(VecA);
  match(vReg);
  op_cost(0);
  format %{ %}
  interface(REG_INTER);
%}

operand vReg_V4()
%{
  constraint(ALLOC_IN_RC(v4_reg));
  match(VecA);
  match(vReg);
  op_cost(0);
  format %{ %}
  interface(REG_INTER);
%}

operand vReg_V5()
%{
  constraint(ALLOC_IN_RC(v5_reg));
  match(VecA);
  match(vReg);
  op_cost(0);
  format %{ %}
  interface(REG_INTER);
%}

// Java Thread Register
operand javaThread_RegP(iRegP reg)
%{
  constraint(ALLOC_IN_RC(java_thread_reg)); // java_thread_reg
  match(reg);
  op_cost(0);
  format %{ %}
  interface(REG_INTER);
%}

//----------Memory Operands----------------------------------------------------
// RISCV has only base_plus_offset and literal address mode, so no need to use
// index and scale. Here set index as 0xffffffff and scale as 0x0.
operand indirect(iRegP reg)
%{
  constraint(ALLOC_IN_RC(ptr_reg));
  match(reg);
  op_cost(0);
  format %{ "[$reg]" %}
  interface(MEMORY_INTER) %{
    base($reg);
    index(0xffffffff);
    scale(0x0);
    disp(0x0);
  %}
%}

operand indOffI(iRegP reg, immIOffset off)
%{
  constraint(ALLOC_IN_RC(ptr_reg));
  match(AddP reg off);
  op_cost(0);
  format %{ "[$reg, $off]" %}
  interface(MEMORY_INTER) %{
    base($reg);
    index(0xffffffff);
    scale(0x0);
    disp($off);
  %}
%}

operand indOffL(iRegP reg, immLOffset off)
%{
  constraint(ALLOC_IN_RC(ptr_reg));
  match(AddP reg off);
  op_cost(0);
  format %{ "[$reg, $off]" %}
  interface(MEMORY_INTER) %{
    base($reg);
    index(0xffffffff);
    scale(0x0);
    disp($off);
  %}
%}

operand indirectN(iRegN reg)
%{
  predicate(CompressedOops::shift() == 0);
  constraint(ALLOC_IN_RC(ptr_reg));
  match(DecodeN reg);
  op_cost(0);
  format %{ "[$reg]\t# narrow" %}
  interface(MEMORY_INTER) %{
    base($reg);
    index(0xffffffff);
    scale(0x0);
    disp(0x0);
  %}
%}

operand indOffIN(iRegN reg, immIOffset off)
%{
  predicate(CompressedOops::shift() == 0);
  constraint(ALLOC_IN_RC(ptr_reg));
  match(AddP (DecodeN reg) off);
  op_cost(0);
  format %{ "[$reg, $off]\t# narrow" %}
  interface(MEMORY_INTER) %{
    base($reg);
    index(0xffffffff);
    scale(0x0);
    disp($off);
  %}
%}

operand indOffLN(iRegN reg, immLOffset off)
%{
  predicate(CompressedOops::shift() == 0);
  constraint(ALLOC_IN_RC(ptr_reg));
  match(AddP (DecodeN reg) off);
  op_cost(0);
  format %{ "[$reg, $off]\t# narrow" %}
  interface(MEMORY_INTER) %{
    base($reg);
    index(0xffffffff);
    scale(0x0);
    disp($off);
  %}
%}

// RISCV opto stubs need to write to the pc slot in the thread anchor
operand thread_anchor_pc(javaThread_RegP reg, immL_pc_off off)
%{
  constraint(ALLOC_IN_RC(ptr_reg));
  match(AddP reg off);
  op_cost(0);
  format %{ "[$reg, $off]" %}
  interface(MEMORY_INTER) %{
    base($reg);
    index(0xffffffff);
    scale(0x0);
    disp($off);
  %}
%}

//----------Special Memory Operands--------------------------------------------
// Stack Slot Operand - This operand is used for loading and storing temporary
//                      values on the stack where a match requires a value to
//                      flow through memory.
operand stackSlotI(sRegI reg)
%{
  constraint(ALLOC_IN_RC(stack_slots));
  // No match rule because this operand is only generated in matching
  // match(RegI);
  format %{ "[$reg]" %}
  interface(MEMORY_INTER) %{
    base(0x02);  // RSP
    index(0xffffffff);  // No Index
    scale(0x0);  // No Scale
    disp($reg);  // Stack Offset
  %}
%}

operand stackSlotF(sRegF reg)
%{
  constraint(ALLOC_IN_RC(stack_slots));
  // No match rule because this operand is only generated in matching
  // match(RegF);
  format %{ "[$reg]" %}
  interface(MEMORY_INTER) %{
    base(0x02);  // RSP
    index(0xffffffff);  // No Index
    scale(0x0);  // No Scale
    disp($reg);  // Stack Offset
  %}
%}

operand stackSlotD(sRegD reg)
%{
  constraint(ALLOC_IN_RC(stack_slots));
  // No match rule because this operand is only generated in matching
  // match(RegD);
  format %{ "[$reg]" %}
  interface(MEMORY_INTER) %{
    base(0x02);  // RSP
    index(0xffffffff);  // No Index
    scale(0x0);  // No Scale
    disp($reg);  // Stack Offset
  %}
%}

operand stackSlotL(sRegL reg)
%{
  constraint(ALLOC_IN_RC(stack_slots));
  // No match rule because this operand is only generated in matching
  // match(RegL);
  format %{ "[$reg]" %}
  interface(MEMORY_INTER) %{
    base(0x02);  // RSP
    index(0xffffffff);  // No Index
    scale(0x0);  // No Scale
    disp($reg);  // Stack Offset
  %}
%}

// Special operand allowing long args to int ops to be truncated for free

operand iRegL2I(iRegL reg) %{

  op_cost(0);

  match(ConvL2I reg);

  format %{ "l2i($reg)" %}

  interface(REG_INTER)
%}

// Comparison Operands
// NOTE: Label is a predefined operand which should not be redefined in
//       the AD file. It is generically handled within the ADLC.

//----------Conditional Branch Operands----------------------------------------
// Comparison Op  - This is the operation of the comparison, and is limited to
//                  the following set of codes:
//                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
//
// Other attributes of the comparison, such as unsignedness, are specified
// by the comparison instruction that sets a condition code flags register.
// That result is represented by a flags operand whose subtype is appropriate
// to the unsignedness (etc.) of the comparison.
//
// Later, the instruction which matches both the Comparison Op (a Bool) and
// the flags (produced by the Cmp) specifies the coding of the comparison op
// by matching a specific subtype of Bool operand below, such as cmpOpU.

// used for signed integral comparisons and fp comparisons
operand cmpOp()
%{
  match(Bool);

  format %{ "" %}

  // the values in interface derives from struct BoolTest::mask
  interface(COND_INTER) %{
    equal(0x0, "eq");
    greater(0x1, "gt");
    overflow(0x2, "overflow");
    less(0x3, "lt");
    not_equal(0x4, "ne");
    less_equal(0x5, "le");
    no_overflow(0x6, "no_overflow");
    greater_equal(0x7, "ge");
  %}
%}

// used for unsigned integral comparisons
operand cmpOpU()
%{
  match(Bool);

  format %{ "" %}
  // the values in interface derives from struct BoolTest::mask
  interface(COND_INTER) %{
    equal(0x0, "eq");
    greater(0x1, "gtu");
    overflow(0x2, "overflow");
    less(0x3, "ltu");
    not_equal(0x4, "ne");
    less_equal(0x5, "leu");
    no_overflow(0x6, "no_overflow");
    greater_equal(0x7, "geu");
  %}
%}

// used for certain integral comparisons which can be
// converted to bxx instructions
operand cmpOpEqNe()
%{
  match(Bool);
  op_cost(0);
  predicate(n->as_Bool()->_test._test == BoolTest::ne ||
            n->as_Bool()->_test._test == BoolTest::eq);

  format %{ "" %}
  interface(COND_INTER) %{
    equal(0x0, "eq");
    greater(0x1, "gt");
    overflow(0x2, "overflow");
    less(0x3, "lt");
    not_equal(0x4, "ne");
    less_equal(0x5, "le");
    no_overflow(0x6, "no_overflow");
    greater_equal(0x7, "ge");
  %}
%}

operand cmpOpULtGe()
%{
  match(Bool);
  op_cost(0);
  predicate(n->as_Bool()->_test._test == BoolTest::lt ||
            n->as_Bool()->_test._test == BoolTest::ge);

  format %{ "" %}
  interface(COND_INTER) %{
    equal(0x0, "eq");
    greater(0x1, "gt");
    overflow(0x2, "overflow");
    less(0x3, "lt");
    not_equal(0x4, "ne");
    less_equal(0x5, "le");
    no_overflow(0x6, "no_overflow");
    greater_equal(0x7, "ge");
  %}
%}

operand cmpOpUEqNeLeGt()
%{
  match(Bool);
  op_cost(0);
  predicate(n->as_Bool()->_test._test == BoolTest::ne ||
            n->as_Bool()->_test._test == BoolTest::eq ||
            n->as_Bool()->_test._test == BoolTest::le ||
            n->as_Bool()->_test._test == BoolTest::gt);

  format %{ "" %}
  interface(COND_INTER) %{
    equal(0x0, "eq");
    greater(0x1, "gt");
    overflow(0x2, "overflow");
    less(0x3, "lt");
    not_equal(0x4, "ne");
    less_equal(0x5, "le");
    no_overflow(0x6, "no_overflow");
    greater_equal(0x7, "ge");
  %}
%}

// Flags register, used as output of compare logic
operand rFlagsReg()
%{
  constraint(ALLOC_IN_RC(reg_flags));
  match(RegFlags);

  op_cost(0);
  format %{ "RFLAGS" %}
  interface(REG_INTER);
%}

// Special Registers

// Method Register
operand inline_cache_RegP(iRegP reg)
%{
  constraint(ALLOC_IN_RC(method_reg)); // inline_cache_reg
  match(reg);
  match(iRegPNoSp);
  op_cost(0);
  format %{ %}
  interface(REG_INTER);
%}

//----------OPERAND CLASSES----------------------------------------------------
// Operand Classes are groups of operands that are used as to simplify
// instruction definitions by not requiring the AD writer to specify
// separate instructions for every form of operand when the
// instruction accepts multiple operand types with the same basic
// encoding and format. The classic case of this is memory operands.

// memory is used to define read/write location for load/store
// instruction defs. we can turn a memory op into an Address

opclass memory(indirect, indOffI, indOffL, indirectN, indOffIN, indOffLN);

// iRegIorL2I is used for src inputs in rules for 32 bit int (I)
// operations. it allows the src to be either an iRegI or a (ConvL2I
// iRegL). in the latter case the l2i normally planted for a ConvL2I
// can be elided because the 32-bit instruction will just employ the
// lower 32 bits anyway.
//
// n.b. this does not elide all L2I conversions. if the truncated
// value is consumed by more than one operation then the ConvL2I
// cannot be bundled into the consuming nodes so an l2i gets planted
// (actually a mvw $dst $src) and the downstream instructions consume
// the result of the l2i as an iRegI input. That's a shame since the
// mvw is actually redundant but its not too costly.

opclass iRegIorL2I(iRegI, iRegL2I);
opclass iRegIorL(iRegI, iRegL);
opclass iRegNorP(iRegN, iRegP);
opclass iRegILNP(iRegI, iRegL, iRegN, iRegP);
opclass iRegILNPNoSp(iRegINoSp, iRegLNoSp, iRegNNoSp, iRegPNoSp);
opclass immIorL(immI, immL);

//----------PIPELINE-----------------------------------------------------------
// Rules which define the behavior of the target architectures pipeline.

// For specific pipelines, e.g. generic RISC-V, define the stages of that pipeline
//pipe_desc(ID, EX, MEM, WR);
#define ID   S0
#define EX   S1
#define MEM  S2
#define WR   S3

// Integer ALU reg operation
pipeline %{

attributes %{
  // RISC-V instructions are of fixed length
  fixed_size_instructions;           // Fixed size instructions TODO does
  max_instructions_per_bundle = 2;   // Generic RISC-V 1, Sifive Series 7 2
  // RISC-V instructions come in 32-bit word units
  instruction_unit_size = 4;         // An instruction is 4 bytes long
  instruction_fetch_unit_size = 64;  // The processor fetches one line
  instruction_fetch_units = 1;       // of 64 bytes

  // List of nop instructions
  nops( MachNop );
%}

// We don't use an actual pipeline model so don't care about resources
// or description. we do use pipeline classes to introduce fixed
// latencies

//----------RESOURCES----------------------------------------------------------
// Resources are the functional units available to the machine

// Generic RISC-V pipeline
// 1 decoder
// 1 instruction decoded per cycle
// 1 load/store ops per cycle, 1 branch, 1 FPU
// 1 mul, 1 div

resources ( DECODE,
            ALU,
            MUL,
            DIV,
            BRANCH,
            LDST,
            FPU);

//----------PIPELINE DESCRIPTION-----------------------------------------------
// Pipeline Description specifies the stages in the machine's pipeline

// Define the pipeline as a generic 6 stage pipeline
pipe_desc(S0, S1, S2, S3, S4, S5);

//----------PIPELINE CLASSES---------------------------------------------------
// Pipeline Classes describe the stages in which input and output are
// referenced by the hardware pipeline.

pipe_class fp_dop_reg_reg_s(fRegF dst, fRegF src1, fRegF src2)
%{
  single_instruction;
  src1   : S1(read);
  src2   : S2(read);
  dst    : S5(write);
  DECODE : ID;
  FPU    : S5;
%}

pipe_class fp_dop_reg_reg_d(fRegD dst, fRegD src1, fRegD src2)
%{
  src1   : S1(read);
  src2   : S2(read);
  dst    : S5(write);
  DECODE : ID;
  FPU    : S5;
%}

pipe_class fp_uop_s(fRegF dst, fRegF src)
%{
  single_instruction;
  src    : S1(read);
  dst    : S5(write);
  DECODE : ID;
  FPU    : S5;
%}

pipe_class fp_uop_d(fRegD dst, fRegD src)
%{
  single_instruction;
  src    : S1(read);
  dst    : S5(write);
  DECODE : ID;
  FPU    : S5;
%}

pipe_class fp_d2f(fRegF dst, fRegD src)
%{
  single_instruction;
  src    : S1(read);
  dst    : S5(write);
  DECODE : ID;
  FPU    : S5;
%}

pipe_class fp_f2d(fRegD dst, fRegF src)
%{
  single_instruction;
  src    : S1(read);
  dst    : S5(write);
  DECODE : ID;
  FPU    : S5;
%}

pipe_class fp_f2i(iRegINoSp dst, fRegF src)
%{
  single_instruction;
  src    : S1(read);
  dst    : S5(write);
  DECODE : ID;
  FPU    : S5;
%}

pipe_class fp_f2l(iRegLNoSp dst, fRegF src)
%{
  single_instruction;
  src    : S1(read);
  dst    : S5(write);
  DECODE : ID;
  FPU    : S5;
%}

pipe_class fp_i2f(fRegF dst, iRegIorL2I src)
%{
  single_instruction;
  src    : S1(read);
  dst    : S5(write);
  DECODE : ID;
  FPU    : S5;
%}

pipe_class fp_l2f(fRegF dst, iRegL src)
%{
  single_instruction;
  src    : S1(read);
  dst    : S5(write);
  DECODE : ID;
  FPU    : S5;
%}

pipe_class fp_d2i(iRegINoSp dst, fRegD src)
%{
  single_instruction;
  src    : S1(read);
  dst    : S5(write);
  DECODE : ID;
  FPU    : S5;
%}

pipe_class fp_d2l(iRegLNoSp dst, fRegD src)
%{
  single_instruction;
  src    : S1(read);
  dst    : S5(write);
  DECODE : ID;
  FPU    : S5;
%}

pipe_class fp_i2d(fRegD dst, iRegIorL2I src)
%{
  single_instruction;
  src    : S1(read);
  dst    : S5(write);
  DECODE : ID;
  FPU    : S5;
%}

pipe_class fp_l2d(fRegD dst, iRegIorL2I src)
%{
  single_instruction;
  src    : S1(read);
  dst    : S5(write);
  DECODE : ID;
  FPU    : S5;
%}

pipe_class fp_div_s(fRegF dst, fRegF src1, fRegF src2)
%{
  single_instruction;
  src1   : S1(read);
  src2   : S2(read);
  dst    : S5(write);
  DECODE : ID;
  FPU    : S5;
%}

pipe_class fp_div_d(fRegD dst, fRegD src1, fRegD src2)
%{
  single_instruction;
  src1   : S1(read);
  src2   : S2(read);
  dst    : S5(write);
  DECODE : ID;
  FPU    : S5;
%}

pipe_class fp_sqrt_s(fRegF dst, fRegF src1, fRegF src2)
%{
  single_instruction;
  src1   : S1(read);
  src2   : S2(read);
  dst    : S5(write);
  DECODE : ID;
  FPU    : S5;
%}

pipe_class fp_sqrt_d(fRegD dst, fRegD src1, fRegD src2)
%{
  single_instruction;
  src1   : S1(read);
  src2   : S2(read);
  dst    : S5(write);
  DECODE : ID;
  FPU    : S5;
%}

pipe_class fp_load_constant_s(fRegF dst)
%{
  single_instruction;
  dst    : S5(write);
  DECODE : ID;
  FPU    : S5;
%}

pipe_class fp_load_constant_d(fRegD dst)
%{
  single_instruction;
  dst    : S5(write);
  DECODE : ID;
  FPU    : S5;
%}

pipe_class fp_load_mem_s(fRegF dst, memory mem)
%{
  single_instruction;
  mem    : S1(read);
  dst    : S5(write);
  DECODE : ID;
  LDST   : MEM;
%}

pipe_class fp_load_mem_d(fRegD dst, memory mem)
%{
  single_instruction;
  mem    : S1(read);
  dst    : S5(write);
  DECODE : ID;
  LDST   : MEM;
%}

pipe_class fp_store_reg_s(fRegF src, memory mem)
%{
  single_instruction;
  src    : S1(read);
  mem    : S5(write);
  DECODE : ID;
  LDST   : MEM;
%}

pipe_class fp_store_reg_d(fRegD src, memory mem)
%{
  single_instruction;
  src    : S1(read);
  mem    : S5(write);
  DECODE : ID;
  LDST   : MEM;
%}

//------- Integer ALU operations --------------------------

// Integer ALU reg-reg operation
// Operands needs in ID, result generated in EX
// E.g.  ADD   Rd, Rs1, Rs2
pipe_class ialu_reg_reg(iRegI dst, iRegI src1, iRegI src2)
%{
  single_instruction;
  dst    : EX(write);
  src1   : ID(read);
  src2   : ID(read);
  DECODE : ID;
  ALU    : EX;
%}

// Integer ALU reg operation with constant shift
// E.g. SLLI    Rd, Rs1, #shift
pipe_class ialu_reg_shift(iRegI dst, iRegI src1)
%{
  single_instruction;
  dst    : EX(write);
  src1   : ID(read);
  DECODE : ID;
  ALU    : EX;
%}

// Integer ALU reg-reg operation with variable shift
// both operands must be available in ID
// E.g. SLL   Rd, Rs1, Rs2
pipe_class ialu_reg_reg_vshift(iRegI dst, iRegI src1, iRegI src2)
%{
  single_instruction;
  dst    : EX(write);
  src1   : ID(read);
  src2   : ID(read);
  DECODE : ID;
  ALU    : EX;
%}

// Integer ALU reg operation
// E.g. NEG   Rd, Rs2
pipe_class ialu_reg(iRegI dst, iRegI src)
%{
  single_instruction;
  dst    : EX(write);
  src    : ID(read);
  DECODE : ID;
  ALU    : EX;
%}

// Integer ALU reg immediate operation
// E.g. ADDI   Rd, Rs1, #imm
pipe_class ialu_reg_imm(iRegI dst, iRegI src1)
%{
  single_instruction;
  dst    : EX(write);
  src1   : ID(read);
  DECODE : ID;
  ALU    : EX;
%}

// Integer ALU immediate operation (no source operands)
// E.g. LI    Rd, #imm
pipe_class ialu_imm(iRegI dst)
%{
  single_instruction;
  dst    : EX(write);
  DECODE : ID;
  ALU    : EX;
%}

//------- Multiply pipeline operations --------------------

// Multiply reg-reg
// E.g. MULW   Rd, Rs1, Rs2
pipe_class imul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
%{
  single_instruction;
  dst    : WR(write);
  src1   : ID(read);
  src2   : ID(read);
  DECODE : ID;
  MUL    : WR;
%}

// E.g. MUL   RD, Rs1, Rs2
pipe_class lmul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
%{
  single_instruction;
  fixed_latency(3); // Maximum latency for 64 bit mul
  dst    : WR(write);
  src1   : ID(read);
  src2   : ID(read);
  DECODE : ID;
  MUL    : WR;
%}

//------- Divide pipeline operations --------------------

// E.g. DIVW   Rd, Rs1, Rs2
pipe_class idiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
%{
  single_instruction;
  fixed_latency(8); // Maximum latency for 32 bit divide
  dst    : WR(write);
  src1   : ID(read);
  src2   : ID(read);
  DECODE : ID;
  DIV    : WR;
%}

// E.g. DIV   RD, Rs1, Rs2
pipe_class ldiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
%{
  single_instruction;
  fixed_latency(16); // Maximum latency for 64 bit divide
  dst    : WR(write);
  src1   : ID(read);
  src2   : ID(read);
  DECODE : ID;
  DIV    : WR;
%}

//------- Load pipeline operations ------------------------

// Load - prefetch
// Eg.  PREFETCH_W  mem
pipe_class iload_prefetch(memory mem)
%{
  single_instruction;
  mem    : ID(read);
  DECODE : ID;
  LDST   : MEM;
%}

// Load - reg, mem
// E.g. LA    Rd, mem
pipe_class iload_reg_mem(iRegI dst, memory mem)
%{
  single_instruction;
  dst    : WR(write);
  mem    : ID(read);
  DECODE : ID;
  LDST   : MEM;
%}

// Load - reg, reg
// E.g. LD    Rd, Rs
pipe_class iload_reg_reg(iRegI dst, iRegI src)
%{
  single_instruction;
  dst    : WR(write);
  src    : ID(read);
  DECODE : ID;
  LDST   : MEM;
%}

//------- Store pipeline operations -----------------------

// Store - zr, mem
// E.g. SD    zr, mem
pipe_class istore_mem(memory mem)
%{
  single_instruction;
  mem    : ID(read);
  DECODE : ID;
  LDST   : MEM;
%}

// Store - reg, mem
// E.g. SD    Rs, mem
pipe_class istore_reg_mem(iRegI src, memory mem)
%{
  single_instruction;
  mem    : ID(read);
  src    : EX(read);
  DECODE : ID;
  LDST   : MEM;
%}

// Store - reg, reg
// E.g. SD    Rs2, Rs1
pipe_class istore_reg_reg(iRegI dst, iRegI src)
%{
  single_instruction;
  dst    : ID(read);
  src    : EX(read);
  DECODE : ID;
  LDST   : MEM;
%}

//------- Store pipeline operations -----------------------

// Branch
pipe_class pipe_branch()
%{
  single_instruction;
  DECODE : ID;
  BRANCH : EX;
%}

// Branch
pipe_class pipe_branch_reg(iRegI src)
%{
  single_instruction;
  src    : ID(read);
  DECODE : ID;
  BRANCH : EX;
%}

// Compare & Branch
// E.g. BEQ   Rs1, Rs2, L
pipe_class pipe_cmp_branch(iRegI src1, iRegI src2)
%{
  single_instruction;
  src1   : ID(read);
  src2   : ID(read);
  DECODE : ID;
  BRANCH : EX;
%}

// E.g. BEQZ Rs, L
pipe_class pipe_cmpz_branch(iRegI src)
%{
  single_instruction;
  src    : ID(read);
  DECODE : ID;
  BRANCH : EX;
%}

//------- Synchronisation operations ----------------------
// Any operation requiring serialization
// E.g. FENCE/Atomic Ops/Load Acquire/Store Release
pipe_class pipe_serial()
%{
  single_instruction;
  force_serialization;
  fixed_latency(16);
  DECODE : ID;
  LDST   : MEM;
%}

pipe_class pipe_slow()
%{
  instruction_count(10);
  multiple_bundles;
  force_serialization;
  fixed_latency(16);
  DECODE : ID;
  LDST   : MEM;
%}

// Empty pipeline class
pipe_class pipe_class_empty()
%{
  single_instruction;
  fixed_latency(0);
%}

// Default pipeline class.
pipe_class pipe_class_default()
%{
  single_instruction;
  fixed_latency(2);
%}

// Pipeline class for compares.
pipe_class pipe_class_compare()
%{
  single_instruction;
  fixed_latency(16);
%}

// Pipeline class for memory operations.
pipe_class pipe_class_memory()
%{
  single_instruction;
  fixed_latency(16);
%}

// Pipeline class for call.
pipe_class pipe_class_call()
%{
  single_instruction;
  fixed_latency(100);
%}

// Define the class for the Nop node.
define %{
   MachNop = pipe_class_empty;
%}
%}
//----------INSTRUCTIONS-------------------------------------------------------
//
// match      -- States which machine-independent subtree may be replaced
//               by this instruction.
// ins_cost   -- The estimated cost of this instruction is used by instruction
//               selection to identify a minimum cost tree of machine
//               instructions that matches a tree of machine-independent
//               instructions.
// format     -- A string providing the disassembly for this instruction.
//               The value of an instruction's operand may be inserted
//               by referring to it with a '$' prefix.
// opcode     -- Three instruction opcodes may be provided.  These are referred
//               to within an encode class as $primary, $secondary, and $tertiary
//               rrspectively.  The primary opcode is commonly used to
//               indicate the type of machine instruction, while secondary
//               and tertiary are often used for prefix options or addressing
//               modes.
// ins_encode -- A list of encode classes with parameters. The encode class
//               name must have been defined in an 'enc_class' specification
//               in the encode section of the architecture description.

// ============================================================================
// Memory (Load/Store) Instructions

// Load Instructions

// Load Byte (8 bit signed)
instruct loadB(iRegINoSp dst, memory mem)
%{
  match(Set dst (LoadB mem));

  ins_cost(LOAD_COST);
  format %{ "lb  $dst, $mem\t# byte, #@loadB" %}

  ins_encode %{
    __ lb(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
  %}

  ins_pipe(iload_reg_mem);
%}

// Load Byte (8 bit signed) into long
instruct loadB2L(iRegLNoSp dst, memory mem)
%{
  match(Set dst (ConvI2L (LoadB mem)));

  ins_cost(LOAD_COST);
  format %{ "lb  $dst, $mem\t# byte, #@loadB2L" %}

  ins_encode %{
    __ lb(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
  %}

  ins_pipe(iload_reg_mem);
%}

// Load Byte (8 bit unsigned)
instruct loadUB(iRegINoSp dst, memory mem)
%{
  match(Set dst (LoadUB mem));

  ins_cost(LOAD_COST);
  format %{ "lbu  $dst, $mem\t# byte, #@loadUB" %}

  ins_encode %{
    __ lbu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
  %}

  ins_pipe(iload_reg_mem);
%}

// Load Byte (8 bit unsigned) into long
instruct loadUB2L(iRegLNoSp dst, memory mem)
%{
  match(Set dst (ConvI2L (LoadUB mem)));

  ins_cost(LOAD_COST);
  format %{ "lbu  $dst, $mem\t# byte, #@loadUB2L" %}

  ins_encode %{
    __ lbu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
  %}

  ins_pipe(iload_reg_mem);
%}

// Load Short (16 bit signed)
instruct loadS(iRegINoSp dst, memory mem)
%{
  match(Set dst (LoadS mem));

  ins_cost(LOAD_COST);
  format %{ "lh  $dst, $mem\t# short, #@loadS" %}

  ins_encode %{
    __ lh(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
  %}

  ins_pipe(iload_reg_mem);
%}

// Load Short (16 bit signed) into long
instruct loadS2L(iRegLNoSp dst, memory mem)
%{
  match(Set dst (ConvI2L (LoadS mem)));

  ins_cost(LOAD_COST);
  format %{ "lh  $dst, $mem\t# short, #@loadS2L" %}

  ins_encode %{
    __ lh(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
  %}

  ins_pipe(iload_reg_mem);
%}

// Load Char (16 bit unsigned)
instruct loadUS(iRegINoSp dst, memory mem)
%{
  match(Set dst (LoadUS mem));

  ins_cost(LOAD_COST);
  format %{ "lhu  $dst, $mem\t# short, #@loadUS" %}

  ins_encode %{
    __ lhu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
  %}

  ins_pipe(iload_reg_mem);
%}

// Load Short/Char (16 bit unsigned) into long
instruct loadUS2L(iRegLNoSp dst, memory mem)
%{
  match(Set dst (ConvI2L (LoadUS mem)));

  ins_cost(LOAD_COST);
  format %{ "lhu  $dst, $mem\t# short, #@loadUS2L" %}

  ins_encode %{
    __ lhu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
  %}

  ins_pipe(iload_reg_mem);
%}

// Load Integer (32 bit signed)
instruct loadI(iRegINoSp dst, memory mem)
%{
  match(Set dst (LoadI mem));

  ins_cost(LOAD_COST);
  format %{ "lw  $dst, $mem\t# int, #@loadI" %}

  ins_encode %{
    __ lw(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
  %}

  ins_pipe(iload_reg_mem);
%}

// Load Integer (32 bit signed) into long
instruct loadI2L(iRegLNoSp dst, memory mem)
%{
  match(Set dst (ConvI2L (LoadI mem)));

  ins_cost(LOAD_COST);
  format %{ "lw  $dst, $mem\t# int, #@loadI2L" %}

  ins_encode %{
    __ lw(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
  %}

  ins_pipe(iload_reg_mem);
%}

// Load Integer (32 bit unsigned) into long
instruct loadUI2L(iRegLNoSp dst, memory mem, immL_32bits mask)
%{
  match(Set dst (AndL (ConvI2L (LoadI mem)) mask));

  ins_cost(LOAD_COST);
  format %{ "lwu  $dst, $mem\t# int, #@loadUI2L" %}

  ins_encode %{
    __ lwu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
  %}

  ins_pipe(iload_reg_mem);
%}

// Load Long (64 bit signed)
instruct loadL(iRegLNoSp dst, memory mem)
%{
  match(Set dst (LoadL mem));

  ins_cost(LOAD_COST);
  format %{ "ld  $dst, $mem\t# int, #@loadL" %}

  ins_encode %{
    __ ld(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
  %}

  ins_pipe(iload_reg_mem);
%}

// Load Range
instruct loadRange(iRegINoSp dst, memory mem)
%{
  match(Set dst (LoadRange mem));

  ins_cost(LOAD_COST);
  format %{ "lwu  $dst, $mem\t# range, #@loadRange" %}

  ins_encode %{
    __ lwu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
  %}

  ins_pipe(iload_reg_mem);
%}

// Load Pointer
instruct loadP(iRegPNoSp dst, memory mem)
%{
  match(Set dst (LoadP mem));
  predicate(n->as_Load()->barrier_data() == 0);

  ins_cost(LOAD_COST);
  format %{ "ld  $dst, $mem\t# ptr, #@loadP" %}

  ins_encode %{
    __ ld(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
  %}

  ins_pipe(iload_reg_mem);
%}

// Load Compressed Pointer
instruct loadN(iRegNNoSp dst, memory mem)
%{
  match(Set dst (LoadN mem));

  ins_cost(LOAD_COST);
  format %{ "lwu  $dst, $mem\t# loadN, compressed ptr, #@loadN" %}

  ins_encode %{
    __ lwu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
  %}

  ins_pipe(iload_reg_mem);
%}

// Load Klass Pointer
instruct loadKlass(iRegPNoSp dst, memory mem)
%{
  match(Set dst (LoadKlass mem));

  ins_cost(LOAD_COST);
  format %{ "ld  $dst, $mem\t# class, #@loadKlass" %}

  ins_encode %{
    __ ld(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
  %}

  ins_pipe(iload_reg_mem);
%}

// Load Narrow Klass Pointer
instruct loadNKlass(iRegNNoSp dst, memory mem)
%{
  match(Set dst (LoadNKlass mem));

  ins_cost(LOAD_COST);
  format %{ "lwu  $dst, $mem\t# loadNKlass, compressed class ptr, #@loadNKlass" %}

  ins_encode %{
    __ lwu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
  %}

  ins_pipe(iload_reg_mem);
%}

// Load Float
instruct loadF(fRegF dst, memory mem)
%{
  match(Set dst (LoadF mem));

  ins_cost(LOAD_COST);
  format %{ "flw  $dst, $mem\t# float, #@loadF" %}

  ins_encode %{
    __ flw(as_FloatRegister($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
  %}

  ins_pipe(fp_load_mem_s);
%}

// Load Double
instruct loadD(fRegD dst, memory mem)
%{
  match(Set dst (LoadD mem));

  ins_cost(LOAD_COST);
  format %{ "fld  $dst, $mem\t# double, #@loadD" %}

  ins_encode %{
    __ fld(as_FloatRegister($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
  %}

  ins_pipe(fp_load_mem_d);
%}

// Load Int Constant
instruct loadConI(iRegINoSp dst, immI src)
%{
  match(Set dst src);

  ins_cost(ALU_COST);
  format %{ "li $dst, $src\t# int, #@loadConI" %}

  ins_encode(riscv_enc_li_imm(dst, src));

  ins_pipe(ialu_imm);
%}

// Load Long Constant
instruct loadConL(iRegLNoSp dst, immL src)
%{
  match(Set dst src);

  ins_cost(ALU_COST);
  format %{ "li $dst, $src\t# long, #@loadConL" %}

  ins_encode(riscv_enc_li_imm(dst, src));

  ins_pipe(ialu_imm);
%}

// Load Pointer Constant
instruct loadConP(iRegPNoSp dst, immP con)
%{
  match(Set dst con);

  ins_cost(ALU_COST);
  format %{ "mv  $dst, $con\t# ptr, #@loadConP" %}

  ins_encode(riscv_enc_mov_p(dst, con));

  ins_pipe(ialu_imm);
%}

// Load Null Pointer Constant
instruct loadConP0(iRegPNoSp dst, immP0 con)
%{
  match(Set dst con);

  ins_cost(ALU_COST);
  format %{ "mv  $dst, $con\t# NULL ptr, #@loadConP0" %}

  ins_encode(riscv_enc_mov_zero(dst));

  ins_pipe(ialu_imm);
%}

// Load Pointer Constant One
instruct loadConP1(iRegPNoSp dst, immP_1 con)
%{
  match(Set dst con);

  ins_cost(ALU_COST);
  format %{ "mv  $dst, $con\t# load ptr constant one, #@loadConP1" %}

  ins_encode(riscv_enc_mov_p1(dst));

  ins_pipe(ialu_imm);
%}

// Load Byte Map Base Constant
instruct loadByteMapBase(iRegPNoSp dst, immByteMapBase con)
%{
  match(Set dst con);
  ins_cost(ALU_COST);
  format %{ "mv  $dst, $con\t# Byte Map Base, #@loadByteMapBase" %}

  ins_encode(riscv_enc_mov_byte_map_base(dst));

  ins_pipe(ialu_imm);
%}

// Load Narrow Pointer Constant
instruct loadConN(iRegNNoSp dst, immN con)
%{
  match(Set dst con);

  ins_cost(ALU_COST * 4);
  format %{ "mv  $dst, $con\t# compressed ptr, #@loadConN" %}

  ins_encode(riscv_enc_mov_n(dst, con));

  ins_pipe(ialu_imm);
%}

// Load Narrow Null Pointer Constant
instruct loadConN0(iRegNNoSp dst, immN0 con)
%{
  match(Set dst con);

  ins_cost(ALU_COST);
  format %{ "mv  $dst, $con\t# compressed NULL ptr, #@loadConN0" %}

  ins_encode(riscv_enc_mov_zero(dst));

  ins_pipe(ialu_imm);
%}

// Load Narrow Klass Constant
instruct loadConNKlass(iRegNNoSp dst, immNKlass con)
%{
  match(Set dst con);

  ins_cost(ALU_COST * 6);
  format %{ "mv  $dst, $con\t# compressed klass ptr, #@loadConNKlass" %}

  ins_encode(riscv_enc_mov_nk(dst, con));

  ins_pipe(ialu_imm);
%}

// Load Float Constant
instruct loadConF(fRegF dst, immF con) %{
  match(Set dst con);

  ins_cost(LOAD_COST);
  format %{
    "flw $dst, [$constantaddress]\t# load from constant table: float=$con, #@loadConF"
  %}

  ins_encode %{
    __ flw(as_FloatRegister($dst$$reg), $constantaddress($con));
  %}

  ins_pipe(fp_load_constant_s);
%}

instruct loadConF0(fRegF dst, immF0 con) %{
  match(Set dst con);

  ins_cost(XFER_COST);

  format %{ "fmv.w.x $dst, zr\t# float, #@loadConF0" %}

  ins_encode %{
    __ fmv_w_x(as_FloatRegister($dst$$reg), zr);
  %}

  ins_pipe(fp_load_constant_s);
%}

// Load Double Constant
instruct loadConD(fRegD dst, immD con) %{
  match(Set dst con);

  ins_cost(LOAD_COST);
  format %{
    "fld $dst, [$constantaddress]\t# load from constant table: double=$con, #@loadConD"
  %}

  ins_encode %{
    __ fld(as_FloatRegister($dst$$reg), $constantaddress($con));
  %}

  ins_pipe(fp_load_constant_d);
%}

instruct loadConD0(fRegD dst, immD0 con) %{
  match(Set dst con);

  ins_cost(XFER_COST);

  format %{ "fmv.d.x $dst, zr\t# double, #@loadConD0" %}

  ins_encode %{
    __ fmv_d_x(as_FloatRegister($dst$$reg), zr);
  %}

  ins_pipe(fp_load_constant_d);
%}

// Store Instructions
// Store CMS card-mark Immediate
instruct storeimmCM0(immI0 zero, memory mem)
%{
  match(Set mem (StoreCM mem zero));

  ins_cost(STORE_COST);
  format %{ "storestore (elided)\n\t"
            "sb zr, $mem\t# byte, #@storeimmCM0" %}

  ins_encode %{
    __ sb(zr, Address(as_Register($mem$$base), $mem$$disp));
  %}

  ins_pipe(istore_mem);
%}

// Store CMS card-mark Immediate with intervening StoreStore
// needed when using CMS with no conditional card marking
instruct storeimmCM0_ordered(immI0 zero, memory mem)
%{
  match(Set mem (StoreCM mem zero));

  ins_cost(ALU_COST + STORE_COST);
  format %{ "membar(StoreStore)\n\t"
            "sb zr, $mem\t# byte, #@storeimmCM0_ordered" %}

  ins_encode %{
    __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
    __ sb(zr, Address(as_Register($mem$$base), $mem$$disp));
  %}

  ins_pipe(istore_mem);
%}

// Store Byte
instruct storeB(iRegIorL2I src, memory mem)
%{
  match(Set mem (StoreB mem src));

  ins_cost(STORE_COST);
  format %{ "sb  $src, $mem\t# byte, #@storeB" %}

  ins_encode %{
    __ sb(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
  %}

  ins_pipe(istore_reg_mem);
%}

instruct storeimmB0(immI0 zero, memory mem)
%{
  match(Set mem (StoreB mem zero));

  ins_cost(STORE_COST);
  format %{ "sb zr, $mem\t# byte, #@storeimmB0" %}

  ins_encode %{
    __ sb(zr, Address(as_Register($mem$$base), $mem$$disp));
  %}

  ins_pipe(istore_mem);
%}

// Store Char/Short
instruct storeC(iRegIorL2I src, memory mem)
%{
  match(Set mem (StoreC mem src));

  ins_cost(STORE_COST);
  format %{ "sh  $src, $mem\t# short, #@storeC" %}

  ins_encode %{
    __ sh(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
  %}

  ins_pipe(istore_reg_mem);
%}

instruct storeimmC0(immI0 zero, memory mem)
%{
  match(Set mem (StoreC mem zero));

  ins_cost(STORE_COST);
  format %{ "sh  zr, $mem\t# short, #@storeimmC0" %}

  ins_encode %{
    __ sh(zr, Address(as_Register($mem$$base), $mem$$disp));
  %}

  ins_pipe(istore_mem);
%}

// Store Integer
instruct storeI(iRegIorL2I src, memory mem)
%{
  match(Set mem(StoreI mem src));

  ins_cost(STORE_COST);
  format %{ "sw  $src, $mem\t# int, #@storeI" %}

  ins_encode %{
    __ sw(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
  %}

  ins_pipe(istore_reg_mem);
%}

instruct storeimmI0(immI0 zero, memory mem)
%{
  match(Set mem(StoreI mem zero));

  ins_cost(STORE_COST);
  format %{ "sw  zr, $mem\t# int, #@storeimmI0" %}

  ins_encode %{
    __ sw(zr, Address(as_Register($mem$$base), $mem$$disp));
  %}

  ins_pipe(istore_mem);
%}

// Store Long (64 bit signed)
instruct storeL(iRegL src, memory mem)
%{
  match(Set mem (StoreL mem src));

  ins_cost(STORE_COST);
  format %{ "sd  $src, $mem\t# long, #@storeL" %}

  ins_encode %{
    __ sd(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
  %}

  ins_pipe(istore_reg_mem);
%}

// Store Long (64 bit signed)
instruct storeimmL0(immL0 zero, memory mem)
%{
  match(Set mem (StoreL mem zero));

  ins_cost(STORE_COST);
  format %{ "sd  zr, $mem\t# long, #@storeimmL0" %}

  ins_encode %{
    __ sd(zr, Address(as_Register($mem$$base), $mem$$disp));
  %}

  ins_pipe(istore_mem);
%}

// Store Pointer
instruct storeP(iRegP src, memory mem)
%{
  match(Set mem (StoreP mem src));

  ins_cost(STORE_COST);
  format %{ "sd  $src, $mem\t# ptr, #@storeP" %}

  ins_encode %{
    __ sd(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
  %}

  ins_pipe(istore_reg_mem);
%}

// Store Pointer
instruct storeimmP0(immP0 zero, memory mem)
%{
  match(Set mem (StoreP mem zero));

  ins_cost(STORE_COST);
  format %{ "sd zr, $mem\t# ptr, #@storeimmP0" %}

  ins_encode %{
    __ sd(zr, Address(as_Register($mem$$base), $mem$$disp));
  %}

  ins_pipe(istore_mem);
%}

// Store Compressed Pointer
instruct storeN(iRegN src, memory mem)
%{
  match(Set mem (StoreN mem src));

  ins_cost(STORE_COST);
  format %{ "sw  $src, $mem\t# compressed ptr, #@storeN" %}

  ins_encode %{
    __ sw(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
  %}

  ins_pipe(istore_reg_mem);
%}

instruct storeImmN0(iRegIHeapbase heapbase, immN0 zero, memory mem)
%{
  match(Set mem (StoreN mem zero));

  ins_cost(STORE_COST);
  format %{ "sw  rheapbase, $mem\t# compressed ptr (rheapbase==0), #@storeImmN0" %}

  ins_encode %{
    __ sw(as_Register($heapbase$$reg), Address(as_Register($mem$$base), $mem$$disp));
  %}

  ins_pipe(istore_reg_mem);
%}

// Store Float
instruct storeF(fRegF src, memory mem)
%{
  match(Set mem (StoreF mem src));

  ins_cost(STORE_COST);
  format %{ "fsw  $src, $mem\t# float, #@storeF" %}

  ins_encode %{
    __ fsw(as_FloatRegister($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
  %}

  ins_pipe(fp_store_reg_s);
%}

// Store Double
instruct storeD(fRegD src, memory mem)
%{
  match(Set mem (StoreD mem src));

  ins_cost(STORE_COST);
  format %{ "fsd  $src, $mem\t# double, #@storeD" %}

  ins_encode %{
    __ fsd(as_FloatRegister($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
  %}

  ins_pipe(fp_store_reg_d);
%}

// Store Compressed Klass Pointer
instruct storeNKlass(iRegN src, memory mem)
%{
  match(Set mem (StoreNKlass mem src));

  ins_cost(STORE_COST);
  format %{ "sw  $src, $mem\t# compressed klass ptr, #@storeNKlass" %}

  ins_encode %{
    __ sw(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
  %}

  ins_pipe(istore_reg_mem);
%}

// ============================================================================
// Prefetch instructions
// Must be safe to execute with invalid address (cannot fault).

instruct prefetchalloc( memory mem ) %{
  predicate(UseZicbop);
  match(PrefetchAllocation mem);

  ins_cost(ALU_COST * 1);
  format %{ "prefetch_w $mem\t# Prefetch for write" %}

  ins_encode %{
    if (is_imm_in_range($mem$$disp, 12, 0)) {
      if (($mem$$disp & 0x1f) == 0) {
        __ prefetch_w(as_Register($mem$$base), $mem$$disp);
      } else {
        __ addi(t0, as_Register($mem$$base), $mem$$disp);
        __ prefetch_w(t0, 0);
      }
    } else {
      __ mv(t0, $mem$$disp);
      __ add(t0, as_Register($mem$$base), t0);
      __ prefetch_w(t0, 0);
    }
  %}

  ins_pipe(iload_prefetch);
%}

// ============================================================================
// Atomic operation instructions
//

// standard CompareAndSwapX when we are using barriers
// these have higher priority than the rules selected by a predicate
instruct compareAndSwapB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
                         iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr)
%{
  match(Set res (CompareAndSwapB mem (Binary oldval newval)));

  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 10 + BRANCH_COST * 4);

  effect(TEMP_DEF res, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);

  format %{
    "cmpxchg $mem, $oldval, $newval\t# (byte) if $mem == $oldval then $mem <-- $newval\n\t"
    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapB"
  %}

  ins_encode %{
    __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8,
                            Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register,
                            true /* result as bool */, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
  %}

  ins_pipe(pipe_slow);
%}

instruct compareAndSwapS(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
                         iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr)
%{
  match(Set res (CompareAndSwapS mem (Binary oldval newval)));

  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 11 + BRANCH_COST * 4);

  effect(TEMP_DEF res, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);

  format %{
    "cmpxchg $mem, $oldval, $newval\t# (short) if $mem == $oldval then $mem <-- $newval\n\t"
    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapS"
  %}

  ins_encode %{
    __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16,
                            Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register,
                            true /* result as bool */, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
  %}

  ins_pipe(pipe_slow);
%}

instruct compareAndSwapI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval)
%{
  match(Set res (CompareAndSwapI mem (Binary oldval newval)));

  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4);

  format %{
    "cmpxchg $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval\n\t"
    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapI"
  %}

  ins_encode(riscv_enc_cmpxchgw(res, mem, oldval, newval));

  ins_pipe(pipe_slow);
%}

instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval)
%{
  match(Set res (CompareAndSwapL mem (Binary oldval newval)));

  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4);

  format %{
    "cmpxchg $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval\n\t"
    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapL"
  %}

  ins_encode(riscv_enc_cmpxchg(res, mem, oldval, newval));

  ins_pipe(pipe_slow);
%}

instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval)
%{
  predicate(n->as_LoadStore()->barrier_data() == 0);

  match(Set res (CompareAndSwapP mem (Binary oldval newval)));

  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4);

  format %{
    "cmpxchg $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval\n\t"
    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapP"
  %}

  ins_encode(riscv_enc_cmpxchg(res, mem, oldval, newval));

  ins_pipe(pipe_slow);
%}

instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval)
%{
  match(Set res (CompareAndSwapN mem (Binary oldval newval)));

  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 8 + BRANCH_COST * 4);

  format %{
    "cmpxchg $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval\n\t"
    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapN"
  %}

  ins_encode(riscv_enc_cmpxchgn(res, mem, oldval, newval));

  ins_pipe(pipe_slow);
%}

// alternative CompareAndSwapX when we are eliding barriers
instruct compareAndSwapBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
                            iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr)
%{
  predicate(needs_acquiring_load_reserved(n));

  match(Set res (CompareAndSwapB mem (Binary oldval newval)));

  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 10 + BRANCH_COST * 4);

  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);

  format %{
    "cmpxchg_acq $mem, $oldval, $newval\t# (byte) if $mem == $oldval then $mem <-- $newval\n\t"
    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapBAcq"
  %}

  ins_encode %{
    __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8,
                            Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register,
                            true /* result as bool */, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
  %}

  ins_pipe(pipe_slow);
%}

instruct compareAndSwapSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
                            iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr)
%{
  predicate(needs_acquiring_load_reserved(n));

  match(Set res (CompareAndSwapS mem (Binary oldval newval)));

  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 11 + BRANCH_COST * 4);

  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);

  format %{
    "cmpxchg_acq $mem, $oldval, $newval\t# (short) if $mem == $oldval then $mem <-- $newval\n\t"
    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapSAcq"
  %}

  ins_encode %{
    __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16,
                            Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register,
                            true /* result as bool */, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
  %}

  ins_pipe(pipe_slow);
%}

instruct compareAndSwapIAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval)
%{
  predicate(needs_acquiring_load_reserved(n));

  match(Set res (CompareAndSwapI mem (Binary oldval newval)));

  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4);

  format %{
    "cmpxchg_acq $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval\n\t"
    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapIAcq"
  %}

  ins_encode(riscv_enc_cmpxchgw_acq(res, mem, oldval, newval));

  ins_pipe(pipe_slow);
%}

instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval)
%{
  predicate(needs_acquiring_load_reserved(n));

  match(Set res (CompareAndSwapL mem (Binary oldval newval)));

  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4);

  format %{
    "cmpxchg_acq $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval\n\t"
    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapLAcq"
  %}

  ins_encode(riscv_enc_cmpxchg_acq(res, mem, oldval, newval));

  ins_pipe(pipe_slow);
%}

instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval)
%{
  predicate(needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() == 0));

  match(Set res (CompareAndSwapP mem (Binary oldval newval)));

  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4);

  format %{
    "cmpxchg_acq $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval\n\t"
    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapPAcq"
  %}

  ins_encode(riscv_enc_cmpxchg_acq(res, mem, oldval, newval));

  ins_pipe(pipe_slow);
%}

instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval)
%{
  predicate(needs_acquiring_load_reserved(n));

  match(Set res (CompareAndSwapN mem (Binary oldval newval)));

  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 8 + BRANCH_COST * 4);

  format %{
    "cmpxchg_acq $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval\n\t"
    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapNAcq"
  %}

  ins_encode(riscv_enc_cmpxchgn_acq(res, mem, oldval, newval));

  ins_pipe(pipe_slow);
%}

// Sundry CAS operations.  Note that release is always true,
// regardless of the memory ordering of the CAS.  This is because we
// need the volatile case to be sequentially consistent but there is
// no trailing StoreLoad barrier emitted by C2.  Unfortunately we
// can't check the type of memory ordering here, so we always emit a
// sc_d(w) with rl bit set.
instruct compareAndExchangeB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
                             iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr)
%{
  match(Set res (CompareAndExchangeB mem (Binary oldval newval)));

  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 5);

  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);

  format %{
    "cmpxchg $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeB"
  %}

  ins_encode %{
    __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8,
                            /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
                            /*result_as_bool*/ false, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
  %}

  ins_pipe(pipe_slow);
%}

instruct compareAndExchangeS(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
                             iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr)
%{
  match(Set res (CompareAndExchangeS mem (Binary oldval newval)));

  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 6);

  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);

  format %{
    "cmpxchg $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeS"
  %}

  ins_encode %{
    __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16,
                            /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
                            /*result_as_bool*/ false, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
  %}

  ins_pipe(pipe_slow);
%}

instruct compareAndExchangeI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval)
%{
  match(Set res (CompareAndExchangeI mem (Binary oldval newval)));

  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST);

  effect(TEMP_DEF res);

  format %{
    "cmpxchg $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeI"
  %}

  ins_encode %{
    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
               /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
  %}

  ins_pipe(pipe_slow);
%}

instruct compareAndExchangeL(iRegLNoSp res, indirect mem, iRegL oldval, iRegL newval)
%{
  match(Set res (CompareAndExchangeL mem (Binary oldval newval)));

  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST);

  effect(TEMP_DEF res);

  format %{
    "cmpxchg $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeL"
  %}

  ins_encode %{
    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
               /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
  %}

  ins_pipe(pipe_slow);
%}

instruct compareAndExchangeN(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval)
%{
  match(Set res (CompareAndExchangeN mem (Binary oldval newval)));

  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 3);

  effect(TEMP_DEF res);

  format %{
    "cmpxchg $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeN"
  %}

  ins_encode %{
    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32,
               /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
  %}

  ins_pipe(pipe_slow);
%}

instruct compareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval)
%{
  predicate(n->as_LoadStore()->barrier_data() == 0);
  match(Set res (CompareAndExchangeP mem (Binary oldval newval)));

  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST);

  effect(TEMP_DEF res);

  format %{
    "cmpxchg $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeP"
  %}

  ins_encode %{
    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
               /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
  %}

  ins_pipe(pipe_slow);
%}

instruct compareAndExchangeBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
                                iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr)
%{
  predicate(needs_acquiring_load_reserved(n));

  match(Set res (CompareAndExchangeB mem (Binary oldval newval)));

  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 5);

  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);

  format %{
    "cmpxchg_acq $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeBAcq"
  %}

  ins_encode %{
    __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8,
                            /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
                            /*result_as_bool*/ false, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
  %}

  ins_pipe(pipe_slow);
%}

instruct compareAndExchangeSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
                                iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr)
%{
  predicate(needs_acquiring_load_reserved(n));

  match(Set res (CompareAndExchangeS mem (Binary oldval newval)));

  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 6);

  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);

  format %{
    "cmpxchg_acq $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeSAcq"
  %}

  ins_encode %{
    __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16,
                            /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
                            /*result_as_bool*/ false, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
  %}

  ins_pipe(pipe_slow);
%}

instruct compareAndExchangeIAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval)
%{
  predicate(needs_acquiring_load_reserved(n));

  match(Set res (CompareAndExchangeI mem (Binary oldval newval)));

  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST);

  effect(TEMP_DEF res);

  format %{
    "cmpxchg_acq $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeIAcq"
  %}

  ins_encode %{
    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
               /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
  %}

  ins_pipe(pipe_slow);
%}

instruct compareAndExchangeLAcq(iRegLNoSp res, indirect mem, iRegL oldval, iRegL newval)
%{
  predicate(needs_acquiring_load_reserved(n));

  match(Set res (CompareAndExchangeL mem (Binary oldval newval)));

  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST);

  effect(TEMP_DEF res);

  format %{
    "cmpxchg_acq $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeLAcq"
  %}

  ins_encode %{
    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
               /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
  %}

  ins_pipe(pipe_slow);
%}

instruct compareAndExchangeNAcq(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval)
%{
  predicate(needs_acquiring_load_reserved(n));

  match(Set res (CompareAndExchangeN mem (Binary oldval newval)));

  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST);

  effect(TEMP_DEF res);

  format %{
    "cmpxchg_acq $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeNAcq"
  %}

  ins_encode %{
    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32,
               /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
  %}

  ins_pipe(pipe_slow);
%}

instruct compareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval)
%{
  predicate(needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() == 0));

  match(Set res (CompareAndExchangeP mem (Binary oldval newval)));

  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST);

  effect(TEMP_DEF res);

  format %{
    "cmpxchg_acq $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangePAcq"
  %}

  ins_encode %{
    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
               /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
  %}

  ins_pipe(pipe_slow);
%}

instruct weakCompareAndSwapB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
                             iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr)
%{
  match(Set res (WeakCompareAndSwapB mem (Binary oldval newval)));

  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 6);

  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);

  format %{
    "cmpxchg_weak $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval\n\t"
    "# $res == 1 when success, #@weakCompareAndSwapB"
  %}

  ins_encode %{
    __ weak_cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8,
                                 /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
                                 $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
  %}

  ins_pipe(pipe_slow);
%}

instruct weakCompareAndSwapS(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
                             iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr)
%{
  match(Set res (WeakCompareAndSwapS mem (Binary oldval newval)));

  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 7);

  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);

  format %{
    "cmpxchg_weak $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval\n\t"
    "# $res == 1 when success, #@weakCompareAndSwapS"
  %}

  ins_encode %{
    __ weak_cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16,
                                 /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
                                 $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
  %}

  ins_pipe(pipe_slow);
%}

instruct weakCompareAndSwapI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval)
%{
  match(Set res (WeakCompareAndSwapI mem (Binary oldval newval)));

  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2);

  format %{
    "cmpxchg_weak $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval\n\t"
    "# $res == 1 when success, #@weakCompareAndSwapI"
  %}

  ins_encode %{
    __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
                    /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
  %}

  ins_pipe(pipe_slow);
%}

instruct weakCompareAndSwapL(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval)
%{
  match(Set res (WeakCompareAndSwapL mem (Binary oldval newval)));

  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2);

  format %{
    "cmpxchg_weak $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval\n\t"
    "# $res == 1 when success, #@weakCompareAndSwapL"
  %}

  ins_encode %{
    __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
                    /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
  %}

  ins_pipe(pipe_slow);
%}

instruct weakCompareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval)
%{
  match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));

  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 4);

  format %{
    "cmpxchg_weak $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval\n\t"
    "# $res == 1 when success, #@weakCompareAndSwapN"
  %}

  ins_encode %{
    __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32,
                    /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
  %}

  ins_pipe(pipe_slow);
%}

instruct weakCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval)
%{
  predicate(n->as_LoadStore()->barrier_data() == 0);
  match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));

  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2);

  format %{
    "cmpxchg_weak $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval\n\t"
    "# $res == 1 when success, #@weakCompareAndSwapP"
  %}

  ins_encode %{
    __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
                    /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
  %}

  ins_pipe(pipe_slow);
%}

instruct weakCompareAndSwapBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
                                iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr)
%{
  predicate(needs_acquiring_load_reserved(n));

  match(Set res (WeakCompareAndSwapB mem (Binary oldval newval)));

  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 6);

  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);

  format %{
    "cmpxchg_weak_acq $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval\n\t"
    "# $res == 1 when success, #@weakCompareAndSwapBAcq"
  %}

  ins_encode %{
    __ weak_cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8,
                                 /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
                                 $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
  %}

  ins_pipe(pipe_slow);
%}

instruct weakCompareAndSwapSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
                                iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr)
%{
  predicate(needs_acquiring_load_reserved(n));

  match(Set res (WeakCompareAndSwapS mem (Binary oldval newval)));

  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 7);

  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);

  format %{
    "cmpxchg_weak_acq $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval\n\t"
    "# $res == 1 when success, #@weakCompareAndSwapSAcq"
  %}

  ins_encode %{
    __ weak_cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16,
                                 /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
                                 $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
  %}

  ins_pipe(pipe_slow);
%}

instruct weakCompareAndSwapIAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval)
%{
  predicate(needs_acquiring_load_reserved(n));

  match(Set res (WeakCompareAndSwapI mem (Binary oldval newval)));

  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2);

  format %{
    "cmpxchg_weak_acq $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval\n\t"
    "# $res == 1 when success, #@weakCompareAndSwapIAcq"
  %}

  ins_encode %{
    __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
                    /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
  %}

  ins_pipe(pipe_slow);
%}

instruct weakCompareAndSwapLAcq(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval)
%{
  predicate(needs_acquiring_load_reserved(n));

  match(Set res (WeakCompareAndSwapL mem (Binary oldval newval)));

  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2);

  format %{
    "cmpxchg_weak_acq $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval\n\t"
    "# $res == 1 when success, #@weakCompareAndSwapLAcq"
  %}

  ins_encode %{
    __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
                    /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
  %}

  ins_pipe(pipe_slow);
%}

instruct weakCompareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval)
%{
  predicate(needs_acquiring_load_reserved(n));

  match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));

  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 4);

  format %{
    "cmpxchg_weak_acq $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval\n\t"
    "# $res == 1 when success, #@weakCompareAndSwapNAcq"
  %}

  ins_encode %{
    __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32,
                    /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
  %}

  ins_pipe(pipe_slow);
%}

instruct weakCompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval)
%{
  predicate(needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() == 0));

  match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));

  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2);

  format %{
    "cmpxchg_weak_acq $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval\n\t"
    "\t# $res == 1 when success, #@weakCompareAndSwapPAcq"
  %}

  ins_encode %{
    __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
                    /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
  %}

  ins_pipe(pipe_slow);
%}

instruct get_and_setI(indirect mem, iRegI newv, iRegINoSp prev)
%{
  match(Set prev (GetAndSetI mem newv));

  ins_cost(ALU_COST);

  format %{ "atomic_xchgw  $prev, $newv, [$mem]\t#@get_and_setI" %}

  ins_encode %{
    __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
  %}

  ins_pipe(pipe_serial);
%}

instruct get_and_setL(indirect mem, iRegL newv, iRegLNoSp prev)
%{
  match(Set prev (GetAndSetL mem newv));

  ins_cost(ALU_COST);

  format %{ "atomic_xchg  $prev, $newv, [$mem]\t#@get_and_setL" %}

  ins_encode %{
    __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
  %}

  ins_pipe(pipe_serial);
%}

instruct get_and_setN(indirect mem, iRegN newv, iRegINoSp prev)
%{
  match(Set prev (GetAndSetN mem newv));

  ins_cost(ALU_COST);

  format %{ "atomic_xchgwu $prev, $newv, [$mem]\t#@get_and_setN" %}

  ins_encode %{
    __ atomic_xchgwu($prev$$Register, $newv$$Register, as_Register($mem$$base));
  %}

  ins_pipe(pipe_serial);
%}

instruct get_and_setP(indirect mem, iRegP newv, iRegPNoSp prev)
%{
  predicate(n->as_LoadStore()->barrier_data() == 0);
  match(Set prev (GetAndSetP mem newv));

  ins_cost(ALU_COST);

  format %{ "atomic_xchg  $prev, $newv, [$mem]\t#@get_and_setP" %}

  ins_encode %{
    __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
  %}

  ins_pipe(pipe_serial);
%}

instruct get_and_setIAcq(indirect mem, iRegI newv, iRegINoSp prev)
%{
  predicate(needs_acquiring_load_reserved(n));

  match(Set prev (GetAndSetI mem newv));

  ins_cost(ALU_COST);

  format %{ "atomic_xchgw_acq  $prev, $newv, [$mem]\t#@get_and_setIAcq" %}

  ins_encode %{
    __ atomic_xchgalw($prev$$Register, $newv$$Register, as_Register($mem$$base));
  %}

  ins_pipe(pipe_serial);
%}

instruct get_and_setLAcq(indirect mem, iRegL newv, iRegLNoSp prev)
%{
  predicate(needs_acquiring_load_reserved(n));

  match(Set prev (GetAndSetL mem newv));

  ins_cost(ALU_COST);

  format %{ "atomic_xchg_acq  $prev, $newv, [$mem]\t#@get_and_setLAcq" %}

  ins_encode %{
    __ atomic_xchgal($prev$$Register, $newv$$Register, as_Register($mem$$base));
  %}

  ins_pipe(pipe_serial);
%}

instruct get_and_setNAcq(indirect mem, iRegN newv, iRegINoSp prev)
%{
  predicate(needs_acquiring_load_reserved(n));

  match(Set prev (GetAndSetN mem newv));

  ins_cost(ALU_COST);

  format %{ "atomic_xchgwu_acq $prev, $newv, [$mem]\t#@get_and_setNAcq" %}

  ins_encode %{
    __ atomic_xchgalwu($prev$$Register, $newv$$Register, as_Register($mem$$base));
  %}

  ins_pipe(pipe_serial);
%}

instruct get_and_setPAcq(indirect mem, iRegP newv, iRegPNoSp prev)
%{
  predicate(needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() == 0));

  match(Set prev (GetAndSetP mem newv));

  ins_cost(ALU_COST);

  format %{ "atomic_xchg_acq  $prev, $newv, [$mem]\t#@get_and_setPAcq" %}

  ins_encode %{
    __ atomic_xchgal($prev$$Register, $newv$$Register, as_Register($mem$$base));
  %}

  ins_pipe(pipe_serial);
%}

instruct get_and_addL(indirect mem, iRegLNoSp newval, iRegL incr)
%{
  match(Set newval (GetAndAddL mem incr));

  ins_cost(ALU_COST);

  format %{ "get_and_addL $newval, [$mem], $incr\t#@get_and_addL" %}

  ins_encode %{
    __ atomic_add($newval$$Register, $incr$$Register, as_Register($mem$$base));
  %}

  ins_pipe(pipe_serial);
%}

instruct get_and_addL_no_res(indirect mem, Universe dummy, iRegL incr)
%{
  predicate(n->as_LoadStore()->result_not_used());

  match(Set dummy (GetAndAddL mem incr));

  ins_cost(ALU_COST);

  format %{ "get_and_addL [$mem], $incr\t#@get_and_addL_no_res" %}

  ins_encode %{
    __ atomic_add(noreg, $incr$$Register, as_Register($mem$$base));
  %}

  ins_pipe(pipe_serial);
%}

instruct get_and_addLi(indirect mem, iRegLNoSp newval, immLAdd incr)
%{
  match(Set newval (GetAndAddL mem incr));

  ins_cost(ALU_COST);

  format %{ "get_and_addL $newval, [$mem], $incr\t#@get_and_addLi" %}

  ins_encode %{
    __ atomic_add($newval$$Register, $incr$$constant, as_Register($mem$$base));
  %}

  ins_pipe(pipe_serial);
%}

instruct get_and_addLi_no_res(indirect mem, Universe dummy, immLAdd incr)
%{
  predicate(n->as_LoadStore()->result_not_used());

  match(Set dummy (GetAndAddL mem incr));

  ins_cost(ALU_COST);

  format %{ "get_and_addL [$mem], $incr\t#@get_and_addLi_no_res" %}

  ins_encode %{
    __ atomic_add(noreg, $incr$$constant, as_Register($mem$$base));
  %}

  ins_pipe(pipe_serial);
%}

instruct get_and_addI(indirect mem, iRegINoSp newval, iRegIorL2I incr)
%{
  match(Set newval (GetAndAddI mem incr));

  ins_cost(ALU_COST);

  format %{ "get_and_addI $newval, [$mem], $incr\t#@get_and_addI" %}

  ins_encode %{
    __ atomic_addw($newval$$Register, $incr$$Register, as_Register($mem$$base));
  %}

  ins_pipe(pipe_serial);
%}

instruct get_and_addI_no_res(indirect mem, Universe dummy, iRegIorL2I incr)
%{
  predicate(n->as_LoadStore()->result_not_used());

  match(Set dummy (GetAndAddI mem incr));

  ins_cost(ALU_COST);

  format %{ "get_and_addI [$mem], $incr\t#@get_and_addI_no_res" %}

  ins_encode %{
    __ atomic_addw(noreg, $incr$$Register, as_Register($mem$$base));
  %}

  ins_pipe(pipe_serial);
%}

instruct get_and_addIi(indirect mem, iRegINoSp newval, immIAdd incr)
%{
  match(Set newval (GetAndAddI mem incr));

  ins_cost(ALU_COST);

  format %{ "get_and_addI $newval, [$mem], $incr\t#@get_and_addIi" %}

  ins_encode %{
    __ atomic_addw($newval$$Register, $incr$$constant, as_Register($mem$$base));
  %}

  ins_pipe(pipe_serial);
%}

instruct get_and_addIi_no_res(indirect mem, Universe dummy, immIAdd incr)
%{
  predicate(n->as_LoadStore()->result_not_used());

  match(Set dummy (GetAndAddI mem incr));

  ins_cost(ALU_COST);

  format %{ "get_and_addI [$mem], $incr\t#@get_and_addIi_no_res" %}

  ins_encode %{
    __ atomic_addw(noreg, $incr$$constant, as_Register($mem$$base));
  %}

  ins_pipe(pipe_serial);
%}

instruct get_and_addLAcq(indirect mem, iRegLNoSp newval, iRegL incr)
%{
  predicate(needs_acquiring_load_reserved(n));

  match(Set newval (GetAndAddL mem incr));

  ins_cost(ALU_COST);

  format %{ "get_and_addL_acq $newval, [$mem], $incr\t#@get_and_addLAcq" %}

  ins_encode %{
    __ atomic_addal($newval$$Register, $incr$$Register, as_Register($mem$$base));
  %}

  ins_pipe(pipe_serial);
%}

instruct get_and_addL_no_resAcq(indirect mem, Universe dummy, iRegL incr) %{
  predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_reserved(n));

  match(Set dummy (GetAndAddL mem incr));

  ins_cost(ALU_COST);

  format %{ "get_and_addL_acq [$mem], $incr\t#@get_and_addL_no_resAcq" %}

  ins_encode %{
    __ atomic_addal(noreg, $incr$$Register, as_Register($mem$$base));
  %}

  ins_pipe(pipe_serial);
%}

instruct get_and_addLiAcq(indirect mem, iRegLNoSp newval, immLAdd incr)
%{
  predicate(needs_acquiring_load_reserved(n));

  match(Set newval (GetAndAddL mem incr));

  ins_cost(ALU_COST);

  format %{ "get_and_addL_acq $newval, [$mem], $incr\t#@get_and_addLiAcq" %}

  ins_encode %{
    __ atomic_addal($newval$$Register, $incr$$constant, as_Register($mem$$base));
  %}

  ins_pipe(pipe_serial);
%}

instruct get_and_addLi_no_resAcq(indirect mem, Universe dummy, immLAdd incr)
%{
  predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_reserved(n));

  match(Set dummy (GetAndAddL mem incr));

  ins_cost(ALU_COST);

  format %{ "get_and_addL_acq [$mem], $incr\t#@get_and_addLi_no_resAcq" %}

  ins_encode %{
    __ atomic_addal(noreg, $incr$$constant, as_Register($mem$$base));
  %}

  ins_pipe(pipe_serial);
%}

instruct get_and_addIAcq(indirect mem, iRegINoSp newval, iRegIorL2I incr)
%{
  predicate(needs_acquiring_load_reserved(n));

  match(Set newval (GetAndAddI mem incr));

  ins_cost(ALU_COST);

  format %{ "get_and_addI_acq $newval, [$mem], $incr\t#@get_and_addIAcq" %}

  ins_encode %{
    __ atomic_addalw($newval$$Register, $incr$$Register, as_Register($mem$$base));
  %}

  ins_pipe(pipe_serial);
%}

instruct get_and_addI_no_resAcq(indirect mem, Universe dummy, iRegIorL2I incr)
%{
  predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_reserved(n));

  match(Set dummy (GetAndAddI mem incr));

  ins_cost(ALU_COST);

  format %{ "get_and_addI_acq [$mem], $incr\t#@get_and_addI_no_resAcq" %}

  ins_encode %{
    __ atomic_addalw(noreg, $incr$$Register, as_Register($mem$$base));
  %}

  ins_pipe(pipe_serial);
%}

instruct get_and_addIiAcq(indirect mem, iRegINoSp newval, immIAdd incr)
%{
  predicate(needs_acquiring_load_reserved(n));

  match(Set newval (GetAndAddI mem incr));

  ins_cost(ALU_COST);

  format %{ "get_and_addI_acq $newval, [$mem], $incr\t#@get_and_addIiAcq" %}

  ins_encode %{
    __ atomic_addalw($newval$$Register, $incr$$constant, as_Register($mem$$base));
  %}

  ins_pipe(pipe_serial);
%}

instruct get_and_addIi_no_resAcq(indirect mem, Universe dummy, immIAdd incr)
%{
  predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_reserved(n));

  match(Set dummy (GetAndAddI mem incr));

  ins_cost(ALU_COST);

  format %{ "get_and_addI_acq [$mem], $incr\t#@get_and_addIi_no_resAcq" %}

  ins_encode %{
    __ atomic_addalw(noreg, $incr$$constant, as_Register($mem$$base));
  %}

  ins_pipe(pipe_serial);
%}

// ============================================================================
// Arithmetic Instructions
//

// Integer Addition

// TODO
// these currently employ operations which do not set CR and hence are
// not flagged as killing CR but we would like to isolate the cases
// where we want to set flags from those where we don't. need to work
// out how to do that.
instruct addI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
  match(Set dst (AddI src1 src2));

  ins_cost(ALU_COST);
  format %{ "addw  $dst, $src1, $src2\t#@addI_reg_reg" %}

  ins_encode %{
    __ addw(as_Register($dst$$reg),
            as_Register($src1$$reg),
            as_Register($src2$$reg));
  %}

  ins_pipe(ialu_reg_reg);
%}

instruct addI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAdd src2) %{
  match(Set dst (AddI src1 src2));

  ins_cost(ALU_COST);
  format %{ "addiw  $dst, $src1, $src2\t#@addI_reg_imm" %}

  ins_encode %{
    int32_t con = (int32_t)$src2$$constant;
    __ addiw(as_Register($dst$$reg),
             as_Register($src1$$reg),
             $src2$$constant);
  %}

  ins_pipe(ialu_reg_imm);
%}

instruct addI_reg_imm_l2i(iRegINoSp dst, iRegL src1, immIAdd src2) %{
  match(Set dst (AddI (ConvL2I src1) src2));

  ins_cost(ALU_COST);
  format %{ "addiw  $dst, $src1, $src2\t#@addI_reg_imm_l2i" %}

  ins_encode %{
    __ addiw(as_Register($dst$$reg),
             as_Register($src1$$reg),
             $src2$$constant);
  %}

  ins_pipe(ialu_reg_imm);
%}

// Pointer Addition
instruct addP_reg_reg(iRegPNoSp dst, iRegP src1, iRegL src2) %{
  match(Set dst (AddP src1 src2));

  ins_cost(ALU_COST);
  format %{ "add $dst, $src1, $src2\t# ptr, #@addP_reg_reg" %}

  ins_encode %{
    __ add(as_Register($dst$$reg),
           as_Register($src1$$reg),
           as_Register($src2$$reg));
  %}

  ins_pipe(ialu_reg_reg);
%}

// If we shift more than 32 bits, we need not convert I2L.
instruct lShiftL_regI_immGE32(iRegLNoSp dst, iRegI src, uimmI6_ge32 scale) %{
  match(Set dst (LShiftL (ConvI2L src) scale));
  ins_cost(ALU_COST);
  format %{ "slli  $dst, $src, $scale & 63\t#@lShiftL_regI_immGE32" %}

  ins_encode %{
    __ slli(as_Register($dst$$reg), as_Register($src$$reg), $scale$$constant & 63);
  %}

  ins_pipe(ialu_reg_shift);
%}

// Pointer Immediate Addition
// n.b. this needs to be more expensive than using an indirect memory
// operand
instruct addP_reg_imm(iRegPNoSp dst, iRegP src1, immLAdd src2) %{
  match(Set dst (AddP src1 src2));
  ins_cost(ALU_COST);
  format %{ "addi  $dst, $src1, $src2\t# ptr, #@addP_reg_imm" %}

  ins_encode %{
    // src2 is imm, so actually call the addi
    __ add(as_Register($dst$$reg),
           as_Register($src1$$reg),
           $src2$$constant);
  %}

  ins_pipe(ialu_reg_imm);
%}

// Long Addition
instruct addL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
  match(Set dst (AddL src1 src2));
  ins_cost(ALU_COST);
  format %{ "add  $dst, $src1, $src2\t#@addL_reg_reg" %}

  ins_encode %{
    __ add(as_Register($dst$$reg),
           as_Register($src1$$reg),
           as_Register($src2$$reg));
  %}

  ins_pipe(ialu_reg_reg);
%}

// No constant pool entries requiredLong Immediate Addition.
instruct addL_reg_imm(iRegLNoSp dst, iRegL src1, immLAdd src2) %{
  match(Set dst (AddL src1 src2));
  ins_cost(ALU_COST);
  format %{ "addi  $dst, $src1, $src2\t#@addL_reg_imm" %}

  ins_encode %{
    // src2 is imm, so actually call the addi
    __ add(as_Register($dst$$reg),
           as_Register($src1$$reg),
           $src2$$constant);
  %}

  ins_pipe(ialu_reg_imm);
%}

// Integer Subtraction
instruct subI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
  match(Set dst (SubI src1 src2));

  ins_cost(ALU_COST);
  format %{ "subw  $dst, $src1, $src2\t#@subI_reg_reg" %}

  ins_encode %{
    __ subw(as_Register($dst$$reg),
            as_Register($src1$$reg),
            as_Register($src2$$reg));
  %}

  ins_pipe(ialu_reg_reg);
%}

// Immediate Subtraction
instruct subI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immISub src2) %{
  match(Set dst (SubI src1 src2));

  ins_cost(ALU_COST);
  format %{ "addiw  $dst, $src1, -$src2\t#@subI_reg_imm" %}

  ins_encode %{
    // src2 is imm, so actually call the addiw
    __ subw(as_Register($dst$$reg),
            as_Register($src1$$reg),
            $src2$$constant);
  %}

  ins_pipe(ialu_reg_imm);
%}

// Long Subtraction
instruct subL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
  match(Set dst (SubL src1 src2));
  ins_cost(ALU_COST);
  format %{ "sub  $dst, $src1, $src2\t#@subL_reg_reg" %}

  ins_encode %{
    __ sub(as_Register($dst$$reg),
           as_Register($src1$$reg),
           as_Register($src2$$reg));
  %}

  ins_pipe(ialu_reg_reg);
%}

// No constant pool entries requiredLong Immediate Subtraction.
instruct subL_reg_imm(iRegLNoSp dst, iRegL src1, immLSub src2) %{
  match(Set dst (SubL src1 src2));
  ins_cost(ALU_COST);
  format %{ "addi  $dst, $src1, -$src2\t#@subL_reg_imm" %}

  ins_encode %{
    // src2 is imm, so actually call the addi
    __ sub(as_Register($dst$$reg),
           as_Register($src1$$reg),
           $src2$$constant);
  %}

  ins_pipe(ialu_reg_imm);
%}

// Integer Negation (special case for sub)

instruct negI_reg(iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
  match(Set dst (SubI zero src));
  ins_cost(ALU_COST);
  format %{ "subw  $dst, x0, $src\t# int, #@negI_reg" %}

  ins_encode %{
    // actually call the subw
    __ negw(as_Register($dst$$reg),
            as_Register($src$$reg));
  %}

  ins_pipe(ialu_reg);
%}

// Long Negation

instruct negL_reg(iRegLNoSp dst, iRegL src, immL0 zero) %{
  match(Set dst (SubL zero src));
  ins_cost(ALU_COST);
  format %{ "sub  $dst, x0, $src\t# long, #@negL_reg" %}

  ins_encode %{
    // actually call the sub
    __ neg(as_Register($dst$$reg),
           as_Register($src$$reg));
  %}

  ins_pipe(ialu_reg);
%}

// Integer Multiply

instruct mulI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
  match(Set dst (MulI src1 src2));
  ins_cost(IMUL_COST);
  format %{ "mulw  $dst, $src1, $src2\t#@mulI" %}

  //this means 2 word multi, and no sign extend to 64 bits
  ins_encode %{
    // riscv64 mulw will sign-extension to high 32 bits in dst reg
    __ mulw(as_Register($dst$$reg),
            as_Register($src1$$reg),
            as_Register($src2$$reg));
  %}

  ins_pipe(imul_reg_reg);
%}

// Long Multiply

instruct mulL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
  match(Set dst (MulL src1 src2));
  ins_cost(IMUL_COST);
  format %{ "mul  $dst, $src1, $src2\t#@mulL" %}

  ins_encode %{
    __ mul(as_Register($dst$$reg),
           as_Register($src1$$reg),
           as_Register($src2$$reg));
  %}

  ins_pipe(lmul_reg_reg);
%}

instruct mulHiL_rReg(iRegLNoSp dst, iRegL src1, iRegL src2)
%{
  match(Set dst (MulHiL src1 src2));
  ins_cost(IMUL_COST);
  format %{ "mulh  $dst, $src1, $src2\t# mulhi, #@mulHiL_rReg" %}

  ins_encode %{
    __ mulh(as_Register($dst$$reg),
            as_Register($src1$$reg),
            as_Register($src2$$reg));
  %}

  ins_pipe(lmul_reg_reg);
%}

// Integer Divide

instruct divI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
  match(Set dst (DivI src1 src2));
  ins_cost(IDIVSI_COST);
  format %{ "divw  $dst, $src1, $src2\t#@divI"%}

  ins_encode(riscv_enc_divw(dst, src1, src2));
  ins_pipe(idiv_reg_reg);
%}

instruct signExtract(iRegINoSp dst, iRegIorL2I src1, immI_31 div1, immI_31 div2) %{
  match(Set dst (URShiftI (RShiftI src1 div1) div2));
  ins_cost(ALU_COST);
  format %{ "srliw $dst, $src1, $div1\t# int signExtract, #@signExtract" %}

  ins_encode %{
    __ srliw(as_Register($dst$$reg), as_Register($src1$$reg), 31);
  %}
  ins_pipe(ialu_reg_shift);
%}

// Long Divide

instruct divL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
  match(Set dst (DivL src1 src2));
  ins_cost(IDIVDI_COST);
  format %{ "div  $dst, $src1, $src2\t#@divL" %}

  ins_encode(riscv_enc_div(dst, src1, src2));
  ins_pipe(ldiv_reg_reg);
%}

instruct signExtractL(iRegLNoSp dst, iRegL src1, immI_63 div1, immI_63 div2) %{
  match(Set dst (URShiftL (RShiftL src1 div1) div2));
  ins_cost(ALU_COST);
  format %{ "srli $dst, $src1, $div1\t# long signExtract, #@signExtractL" %}

  ins_encode %{
    __ srli(as_Register($dst$$reg), as_Register($src1$$reg), 63);
  %}
  ins_pipe(ialu_reg_shift);
%}

// Integer Remainder

instruct modI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
  match(Set dst (ModI src1 src2));
  ins_cost(IDIVSI_COST);
  format %{ "remw  $dst, $src1, $src2\t#@modI" %}

  ins_encode(riscv_enc_modw(dst, src1, src2));
  ins_pipe(ialu_reg_reg);
%}

// Long Remainder

instruct modL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
  match(Set dst (ModL src1 src2));
  ins_cost(IDIVDI_COST);
  format %{ "rem  $dst, $src1, $src2\t#@modL" %}

  ins_encode(riscv_enc_mod(dst, src1, src2));
  ins_pipe(ialu_reg_reg);
%}

// Integer Shifts

// Shift Left Register
// In RV64I, only the low 5 bits of src2 are considered for the shift amount
instruct lShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
  match(Set dst (LShiftI src1 src2));
  ins_cost(ALU_COST);
  format %{ "sllw  $dst, $src1, $src2\t#@lShiftI_reg_reg" %}

  ins_encode %{
    __ sllw(as_Register($dst$$reg),
            as_Register($src1$$reg),
            as_Register($src2$$reg));
  %}

  ins_pipe(ialu_reg_reg_vshift);
%}

// Shift Left Immediate
instruct lShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
  match(Set dst (LShiftI src1 src2));
  ins_cost(ALU_COST);
  format %{ "slliw  $dst, $src1, ($src2 & 0x1f)\t#@lShiftI_reg_imm" %}

  ins_encode %{
    // the shift amount is encoded in the lower
    // 5 bits of the I-immediate field for RV32I
    __ slliw(as_Register($dst$$reg),
             as_Register($src1$$reg),
             (unsigned) $src2$$constant & 0x1f);
  %}

  ins_pipe(ialu_reg_shift);
%}

// Shift Right Logical Register
// In RV64I, only the low 5 bits of src2 are considered for the shift amount
instruct urShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
  match(Set dst (URShiftI src1 src2));
  ins_cost(ALU_COST);
  format %{ "srlw  $dst, $src1, $src2\t#@urShiftI_reg_reg" %}

  ins_encode %{
    __ srlw(as_Register($dst$$reg),
            as_Register($src1$$reg),
            as_Register($src2$$reg));
  %}

  ins_pipe(ialu_reg_reg_vshift);
%}

// Shift Right Logical Immediate
instruct urShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
  match(Set dst (URShiftI src1 src2));
  ins_cost(ALU_COST);
  format %{ "srliw  $dst, $src1, ($src2 & 0x1f)\t#@urShiftI_reg_imm" %}

  ins_encode %{
    // the shift amount is encoded in the lower
    // 6 bits of the I-immediate field for RV64I
    __ srliw(as_Register($dst$$reg),
             as_Register($src1$$reg),
             (unsigned) $src2$$constant & 0x1f);
  %}

  ins_pipe(ialu_reg_shift);
%}

// Shift Right Arithmetic Register
// In RV64I, only the low 5 bits of src2 are considered for the shift amount
instruct rShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
  match(Set dst (RShiftI src1 src2));
  ins_cost(ALU_COST);
  format %{ "sraw  $dst, $src1, $src2\t#@rShiftI_reg_reg" %}

  ins_encode %{
    // riscv will sign-ext dst high 32 bits
    __ sraw(as_Register($dst$$reg),
            as_Register($src1$$reg),
            as_Register($src2$$reg));
  %}

  ins_pipe(ialu_reg_reg_vshift);
%}

// Shift Right Arithmetic Immediate
instruct rShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
  match(Set dst (RShiftI src1 src2));
  ins_cost(ALU_COST);
  format %{ "sraiw  $dst, $src1, ($src2 & 0x1f)\t#@rShiftI_reg_imm" %}

  ins_encode %{
    // riscv will sign-ext dst high 32 bits
    __ sraiw(as_Register($dst$$reg),
             as_Register($src1$$reg),
             (unsigned) $src2$$constant & 0x1f);
  %}

  ins_pipe(ialu_reg_shift);
%}

// Long Shifts

// Shift Left Register
// In RV64I, only the low 6 bits of src2 are considered for the shift amount
instruct lShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
  match(Set dst (LShiftL src1 src2));

  ins_cost(ALU_COST);
  format %{ "sll  $dst, $src1, $src2\t#@lShiftL_reg_reg" %}

  ins_encode %{
    __ sll(as_Register($dst$$reg),
           as_Register($src1$$reg),
           as_Register($src2$$reg));
  %}

  ins_pipe(ialu_reg_reg_vshift);
%}

// Shift Left Immediate
instruct lShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
  match(Set dst (LShiftL src1 src2));

  ins_cost(ALU_COST);
  format %{ "slli  $dst, $src1, ($src2 & 0x3f)\t#@lShiftL_reg_imm" %}

  ins_encode %{
    // the shift amount is encoded in the lower
    // 6 bits of the I-immediate field for RV64I
    __ slli(as_Register($dst$$reg),
            as_Register($src1$$reg),
            (unsigned) $src2$$constant & 0x3f);
  %}

  ins_pipe(ialu_reg_shift);
%}

// Shift Right Logical Register
// In RV64I, only the low 6 bits of src2 are considered for the shift amount
instruct urShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
  match(Set dst (URShiftL src1 src2));

  ins_cost(ALU_COST);
  format %{ "srl  $dst, $src1, $src2\t#@urShiftL_reg_reg" %}

  ins_encode %{
    __ srl(as_Register($dst$$reg),
            as_Register($src1$$reg),
            as_Register($src2$$reg));
  %}

  ins_pipe(ialu_reg_reg_vshift);
%}

// Shift Right Logical Immediate
instruct urShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
  match(Set dst (URShiftL src1 src2));

  ins_cost(ALU_COST);
  format %{ "srli  $dst, $src1, ($src2 & 0x3f)\t#@urShiftL_reg_imm" %}

  ins_encode %{
    // the shift amount is encoded in the lower
    // 6 bits of the I-immediate field for RV64I
    __ srli(as_Register($dst$$reg),
            as_Register($src1$$reg),
            (unsigned) $src2$$constant & 0x3f);
  %}

  ins_pipe(ialu_reg_shift);
%}

// A special-case pattern for card table stores.
instruct urShiftP_reg_imm(iRegLNoSp dst, iRegP src1, immI src2) %{
  match(Set dst (URShiftL (CastP2X src1) src2));

  ins_cost(ALU_COST);
  format %{ "srli  $dst, p2x($src1), ($src2 & 0x3f)\t#@urShiftP_reg_imm" %}

  ins_encode %{
    // the shift amount is encoded in the lower
    // 6 bits of the I-immediate field for RV64I
    __ srli(as_Register($dst$$reg),
            as_Register($src1$$reg),
            (unsigned) $src2$$constant & 0x3f);
  %}

  ins_pipe(ialu_reg_shift);
%}

// Shift Right Arithmetic Register
// In RV64I, only the low 6 bits of src2 are considered for the shift amount
instruct rShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
  match(Set dst (RShiftL src1 src2));

  ins_cost(ALU_COST);
  format %{ "sra  $dst, $src1, $src2\t#@rShiftL_reg_reg" %}

  ins_encode %{
    __ sra(as_Register($dst$$reg),
           as_Register($src1$$reg),
           as_Register($src2$$reg));
  %}

  ins_pipe(ialu_reg_reg_vshift);
%}

// Shift Right Arithmetic Immediate
instruct rShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
  match(Set dst (RShiftL src1 src2));

  ins_cost(ALU_COST);
  format %{ "srai  $dst, $src1, ($src2 & 0x3f)\t#@rShiftL_reg_imm" %}

  ins_encode %{
    // the shift amount is encoded in the lower
    // 6 bits of the I-immediate field for RV64I
    __ srai(as_Register($dst$$reg),
            as_Register($src1$$reg),
            (unsigned) $src2$$constant & 0x3f);
  %}

  ins_pipe(ialu_reg_shift);
%}

instruct regI_not_reg(iRegINoSp dst, iRegI src1, immI_M1 m1) %{
  match(Set dst (XorI src1 m1));
  ins_cost(ALU_COST);
  format %{ "xori  $dst, $src1, -1\t#@regI_not_reg" %}

  ins_encode %{
    __ xori(as_Register($dst$$reg), as_Register($src1$$reg), -1);
  %}

  ins_pipe(ialu_reg);
%}

instruct regL_not_reg(iRegLNoSp dst, iRegL src1, immL_M1 m1) %{
  match(Set dst (XorL src1 m1));
  ins_cost(ALU_COST);
  format %{ "xori  $dst, $src1, -1\t#@regL_not_reg" %}

  ins_encode %{
    __ xori(as_Register($dst$$reg), as_Register($src1$$reg), -1);
  %}

  ins_pipe(ialu_reg);
%}

// ============================================================================
// Floating Point Arithmetic Instructions

instruct addF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{
  match(Set dst (AddF src1 src2));

  ins_cost(FMUL_SINGLE_COST);
  format %{ "fadd.s  $dst, $src1, $src2\t#@addF_reg_reg" %}

  ins_encode %{
    __ fadd_s(as_FloatRegister($dst$$reg),
              as_FloatRegister($src1$$reg),
              as_FloatRegister($src2$$reg));
  %}

  ins_pipe(fp_dop_reg_reg_s);
%}

instruct addD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{
  match(Set dst (AddD src1 src2));

  ins_cost(FMUL_DOUBLE_COST);
  format %{ "fadd.d  $dst, $src1, $src2\t#@addD_reg_reg" %}

  ins_encode %{
    __ fadd_d(as_FloatRegister($dst$$reg),
              as_FloatRegister($src1$$reg),
              as_FloatRegister($src2$$reg));
  %}

  ins_pipe(fp_dop_reg_reg_d);
%}

instruct subF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{
  match(Set dst (SubF src1 src2));

  ins_cost(FMUL_SINGLE_COST);
  format %{ "fsub.s  $dst, $src1, $src2\t#@subF_reg_reg" %}

  ins_encode %{
    __ fsub_s(as_FloatRegister($dst$$reg),
              as_FloatRegister($src1$$reg),
              as_FloatRegister($src2$$reg));
  %}

  ins_pipe(fp_dop_reg_reg_s);
%}

instruct subD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{
  match(Set dst (SubD src1 src2));

  ins_cost(FMUL_DOUBLE_COST);
  format %{ "fsub.d  $dst, $src1, $src2\t#@subD_reg_reg" %}

  ins_encode %{
    __ fsub_d(as_FloatRegister($dst$$reg),
              as_FloatRegister($src1$$reg),
              as_FloatRegister($src2$$reg));
  %}

  ins_pipe(fp_dop_reg_reg_d);
%}

instruct mulF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{
  match(Set dst (MulF src1 src2));

  ins_cost(FMUL_SINGLE_COST);
  format %{ "fmul.s  $dst, $src1, $src2\t#@mulF_reg_reg" %}

  ins_encode %{
    __ fmul_s(as_FloatRegister($dst$$reg),
              as_FloatRegister($src1$$reg),
              as_FloatRegister($src2$$reg));
  %}

  ins_pipe(fp_dop_reg_reg_s);
%}

instruct mulD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{
  match(Set dst (MulD src1 src2));

  ins_cost(FMUL_DOUBLE_COST);
  format %{ "fmul.d  $dst, $src1, $src2\t#@mulD_reg_reg" %}

  ins_encode %{
    __ fmul_d(as_FloatRegister($dst$$reg),
              as_FloatRegister($src1$$reg),
              as_FloatRegister($src2$$reg));
  %}

  ins_pipe(fp_dop_reg_reg_d);
%}

// src1 * src2 + src3
instruct maddF_reg_reg(fRegF dst, fRegF src1, fRegF src2, fRegF src3) %{
  predicate(UseFMA);
  match(Set dst (FmaF src3 (Binary src1 src2)));

  ins_cost(FMUL_SINGLE_COST);
  format %{ "fmadd.s  $dst, $src1, $src2, $src3\t#@maddF_reg_reg" %}

  ins_encode %{
    __ fmadd_s(as_FloatRegister($dst$$reg),
               as_FloatRegister($src1$$reg),
               as_FloatRegister($src2$$reg),
               as_FloatRegister($src3$$reg));
  %}

  ins_pipe(pipe_class_default);
%}

// src1 * src2 + src3
instruct maddD_reg_reg(fRegD dst, fRegD src1, fRegD src2, fRegD src3) %{
  predicate(UseFMA);
  match(Set dst (FmaD src3 (Binary src1 src2)));

  ins_cost(FMUL_DOUBLE_COST);
  format %{ "fmadd.d  $dst, $src1, $src2, $src3\t#@maddD_reg_reg" %}

  ins_encode %{
    __ fmadd_d(as_FloatRegister($dst$$reg),
               as_FloatRegister($src1$$reg),
               as_FloatRegister($src2$$reg),
               as_FloatRegister($src3$$reg));
  %}

  ins_pipe(pipe_class_default);
%}

// src1 * src2 - src3
instruct msubF_reg_reg(fRegF dst, fRegF src1, fRegF src2, fRegF src3) %{
  predicate(UseFMA);
  match(Set dst (FmaF (NegF src3) (Binary src1 src2)));

  ins_cost(FMUL_SINGLE_COST);
  format %{ "fmsub.s  $dst, $src1, $src2, $src3\t#@msubF_reg_reg" %}

  ins_encode %{
    __ fmsub_s(as_FloatRegister($dst$$reg),
               as_FloatRegister($src1$$reg),
               as_FloatRegister($src2$$reg),
               as_FloatRegister($src3$$reg));
  %}

  ins_pipe(pipe_class_default);
%}

// src1 * src2 - src3
instruct msubD_reg_reg(fRegD dst, fRegD src1, fRegD src2, fRegD src3) %{
  predicate(UseFMA);
  match(Set dst (FmaD (NegD src3) (Binary src1 src2)));

  ins_cost(FMUL_DOUBLE_COST);
  format %{ "fmsub.d  $dst, $src1, $src2, $src3\t#@msubD_reg_reg" %}

  ins_encode %{
    __ fmsub_d(as_FloatRegister($dst$$reg),
               as_FloatRegister($src1$$reg),
               as_FloatRegister($src2$$reg),
               as_FloatRegister($src3$$reg));
  %}

  ins_pipe(pipe_class_default);
%}

// -src1 * src2 + src3
instruct nmsubF_reg_reg(fRegF dst, fRegF src1, fRegF src2, fRegF src3) %{
  predicate(UseFMA);
  match(Set dst (FmaF src3 (Binary (NegF src1) src2)));
  match(Set dst (FmaF src3 (Binary src1 (NegF src2))));

  ins_cost(FMUL_SINGLE_COST);
  format %{ "fnmsub.s  $dst, $src1, $src2, $src3\t#@nmsubF_reg_reg" %}

  ins_encode %{
    __ fnmsub_s(as_FloatRegister($dst$$reg),
                as_FloatRegister($src1$$reg),
                as_FloatRegister($src2$$reg),
                as_FloatRegister($src3$$reg));
  %}

  ins_pipe(pipe_class_default);
%}

// -src1 * src2 + src3
instruct nmsubD_reg_reg(fRegD dst, fRegD src1, fRegD src2, fRegD src3) %{
  predicate(UseFMA);
  match(Set dst (FmaD src3 (Binary (NegD src1) src2)));
  match(Set dst (FmaD src3 (Binary src1 (NegD src2))));

  ins_cost(FMUL_DOUBLE_COST);
  format %{ "fnmsub.d  $dst, $src1, $src2, $src3\t#@nmsubD_reg_reg" %}

  ins_encode %{
    __ fnmsub_d(as_FloatRegister($dst$$reg),
                as_FloatRegister($src1$$reg),
                as_FloatRegister($src2$$reg),
                as_FloatRegister($src3$$reg));
  %}

  ins_pipe(pipe_class_default);
%}

// -src1 * src2 - src3
instruct nmaddF_reg_reg(fRegF dst, fRegF src1, fRegF src2, fRegF src3) %{
  predicate(UseFMA);
  match(Set dst (FmaF (NegF src3) (Binary (NegF src1) src2)));
  match(Set dst (FmaF (NegF src3) (Binary src1 (NegF src2))));

  ins_cost(FMUL_SINGLE_COST);
  format %{ "fnmadd.s  $dst, $src1, $src2, $src3\t#@nmaddF_reg_reg" %}

  ins_encode %{
    __ fnmadd_s(as_FloatRegister($dst$$reg),
                as_FloatRegister($src1$$reg),
                as_FloatRegister($src2$$reg),
                as_FloatRegister($src3$$reg));
  %}

  ins_pipe(pipe_class_default);
%}

// -src1 * src2 - src3
instruct nmaddD_reg_reg(fRegD dst, fRegD src1, fRegD src2, fRegD src3) %{
  predicate(UseFMA);
  match(Set dst (FmaD (NegD src3) (Binary (NegD src1) src2)));
  match(Set dst (FmaD (NegD src3) (Binary src1 (NegD src2))));

  ins_cost(FMUL_DOUBLE_COST);
  format %{ "fnmadd.d  $dst, $src1, $src2, $src3\t#@nmaddD_reg_reg" %}

  ins_encode %{
    __ fnmadd_d(as_FloatRegister($dst$$reg),
                as_FloatRegister($src1$$reg),
                as_FloatRegister($src2$$reg),
                as_FloatRegister($src3$$reg));
  %}

  ins_pipe(pipe_class_default);
%}

// Math.max(FF)F
instruct maxF_reg_reg(fRegF dst, fRegF src1, fRegF src2, rFlagsReg cr) %{
  match(Set dst (MaxF src1 src2));
  effect(TEMP_DEF dst, KILL cr);

  format %{ "maxF $dst, $src1, $src2" %}

  ins_encode %{
    __ minmax_FD(as_FloatRegister($dst$$reg),
                 as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg),
                 false /* is_double */, false /* is_min */);
  %}

  ins_pipe(fp_dop_reg_reg_s);
%}

// Math.min(FF)F
instruct minF_reg_reg(fRegF dst, fRegF src1, fRegF src2, rFlagsReg cr) %{
  match(Set dst (MinF src1 src2));
  effect(TEMP_DEF dst, KILL cr);

  format %{ "minF $dst, $src1, $src2" %}

  ins_encode %{
    __ minmax_FD(as_FloatRegister($dst$$reg),
                 as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg),
                 false /* is_double */, true /* is_min */);
  %}

  ins_pipe(fp_dop_reg_reg_s);
%}

// Math.max(DD)D
instruct maxD_reg_reg(fRegD dst, fRegD src1, fRegD src2, rFlagsReg cr) %{
  match(Set dst (MaxD src1 src2));
  effect(TEMP_DEF dst, KILL cr);

  format %{ "maxD $dst, $src1, $src2" %}

  ins_encode %{
    __ minmax_FD(as_FloatRegister($dst$$reg),
                 as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg),
                 true /* is_double */, false /* is_min */);
  %}

  ins_pipe(fp_dop_reg_reg_d);
%}

// Math.min(DD)D
instruct minD_reg_reg(fRegD dst, fRegD src1, fRegD src2, rFlagsReg cr) %{
  match(Set dst (MinD src1 src2));
  effect(TEMP_DEF dst, KILL cr);

  format %{ "minD $dst, $src1, $src2" %}

  ins_encode %{
    __ minmax_FD(as_FloatRegister($dst$$reg),
                 as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg),
                 true /* is_double */, true /* is_min */);
  %}

  ins_pipe(fp_dop_reg_reg_d);
%}

// Float.isInfinite
instruct isIniniteF_reg_reg(iRegINoSp dst, fRegF src)
%{
  match(Set dst (IsInfiniteF src));
  format %{ "isInfinite $dst, $src" %}
  ins_encode %{
    __ fclass_s(as_Register($dst$$reg), as_FloatRegister($src$$reg));
    __ andi(as_Register($dst$$reg), as_Register($dst$$reg), 0b10000001);
    __ slt(as_Register($dst$$reg), zr, as_Register($dst$$reg));
  %}
  ins_pipe(fp_dop_reg_reg_s);
%}

// Double.isInfinite
instruct isInfiniteD_reg_reg(iRegINoSp dst, fRegD src)
%{
  match(Set dst (IsInfiniteD src));
  format %{ "isInfinite $dst, $src" %}
  ins_encode %{
    __ fclass_d(as_Register($dst$$reg), as_FloatRegister($src$$reg));
    __ andi(as_Register($dst$$reg), as_Register($dst$$reg), 0b10000001);
    __ slt(as_Register($dst$$reg), zr, as_Register($dst$$reg));
  %}
  ins_pipe(fp_dop_reg_reg_d);
%}

// Float.isFinite
instruct isFiniteF_reg_reg(iRegINoSp dst, fRegF src)
%{
  match(Set dst (IsFiniteF src));
  format %{ "isFinite $dst, $src" %}
  ins_encode %{
    __ fclass_s(as_Register($dst$$reg), as_FloatRegister($src$$reg));
    __ andi(as_Register($dst$$reg), as_Register($dst$$reg), 0b0001111110);
    __ slt(as_Register($dst$$reg), zr, as_Register($dst$$reg));
  %}
  ins_pipe(fp_dop_reg_reg_s);
%}

// Double.isFinite
instruct isFiniteD_reg_reg(iRegINoSp dst, fRegD src)
%{
  match(Set dst (IsFiniteD src));
  format %{ "isFinite $dst, $src" %}
  ins_encode %{
    __ fclass_d(as_Register($dst$$reg), as_FloatRegister($src$$reg));
    __ andi(as_Register($dst$$reg), as_Register($dst$$reg), 0b0001111110);
    __ slt(as_Register($dst$$reg), zr, as_Register($dst$$reg));
  %}
  ins_pipe(fp_dop_reg_reg_d);
%}

instruct divF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{
  match(Set dst (DivF src1  src2));

  ins_cost(FDIV_COST);
  format %{ "fdiv.s  $dst, $src1, $src2\t#@divF_reg_reg" %}

  ins_encode %{
    __ fdiv_s(as_FloatRegister($dst$$reg),
              as_FloatRegister($src1$$reg),
              as_FloatRegister($src2$$reg));
  %}

  ins_pipe(fp_div_s);
%}

instruct divD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{
  match(Set dst (DivD src1  src2));

  ins_cost(FDIV_COST);
  format %{ "fdiv.d  $dst, $src1, $src2\t#@divD_reg_reg" %}

  ins_encode %{
    __ fdiv_d(as_FloatRegister($dst$$reg),
              as_FloatRegister($src1$$reg),
              as_FloatRegister($src2$$reg));
  %}

  ins_pipe(fp_div_d);
%}

instruct negF_reg_reg(fRegF dst, fRegF src) %{
  match(Set dst (NegF src));

  ins_cost(XFER_COST);
  format %{ "fsgnjn.s  $dst, $src, $src\t#@negF_reg_reg" %}

  ins_encode %{
    __ fneg_s(as_FloatRegister($dst$$reg),
              as_FloatRegister($src$$reg));
  %}

  ins_pipe(fp_uop_s);
%}

instruct negD_reg_reg(fRegD dst, fRegD src) %{
  match(Set dst (NegD src));

  ins_cost(XFER_COST);
  format %{ "fsgnjn.d  $dst, $src, $src\t#@negD_reg_reg" %}

  ins_encode %{
    __ fneg_d(as_FloatRegister($dst$$reg),
              as_FloatRegister($src$$reg));
  %}

  ins_pipe(fp_uop_d);
%}

instruct absI_reg(iRegINoSp dst, iRegIorL2I src) %{
  match(Set dst (AbsI src));

  ins_cost(ALU_COST * 3);
  format %{
    "sraiw  t0, $src, 0x1f\n\t"
    "addw  $dst, $src, t0\n\t"
    "xorr  $dst, $dst, t0\t#@absI_reg"
  %}

  ins_encode %{
    __ sraiw(t0, as_Register($src$$reg), 0x1f);
    __ addw(as_Register($dst$$reg), as_Register($src$$reg), t0);
    __ xorr(as_Register($dst$$reg), as_Register($dst$$reg), t0);
  %}

  ins_pipe(ialu_reg_reg);
%}

instruct absL_reg(iRegLNoSp dst, iRegL src) %{
  match(Set dst (AbsL src));

  ins_cost(ALU_COST * 3);
  format %{
    "srai  t0, $src, 0x3f\n\t"
    "add  $dst, $src, t0\n\t"
    "xorr  $dst, $dst, t0\t#@absL_reg"
  %}

  ins_encode %{
    __ srai(t0, as_Register($src$$reg), 0x3f);
    __ add(as_Register($dst$$reg), as_Register($src$$reg), t0);
    __ xorr(as_Register($dst$$reg), as_Register($dst$$reg), t0);
  %}

  ins_pipe(ialu_reg_reg);
%}

instruct absF_reg(fRegF dst, fRegF src) %{
  match(Set dst (AbsF src));

  ins_cost(XFER_COST);
  format %{ "fsgnjx.s  $dst, $src, $src\t#@absF_reg" %}
  ins_encode %{
    __ fabs_s(as_FloatRegister($dst$$reg),
              as_FloatRegister($src$$reg));
  %}

  ins_pipe(fp_uop_s);
%}

instruct absD_reg(fRegD dst, fRegD src) %{
  match(Set dst (AbsD src));

  ins_cost(XFER_COST);
  format %{ "fsgnjx.d  $dst, $src, $src\t#@absD_reg" %}
  ins_encode %{
    __ fabs_d(as_FloatRegister($dst$$reg),
              as_FloatRegister($src$$reg));
  %}

  ins_pipe(fp_uop_d);
%}

instruct sqrtF_reg(fRegF dst, fRegF src) %{
  match(Set dst (ConvD2F (SqrtD (ConvF2D src))));

  ins_cost(FSQRT_COST);
  format %{ "fsqrt.s  $dst, $src\t#@sqrtF_reg" %}
  ins_encode %{
    __ fsqrt_s(as_FloatRegister($dst$$reg),
               as_FloatRegister($src$$reg));
  %}

  ins_pipe(fp_sqrt_s);
%}

instruct sqrtD_reg(fRegD dst, fRegD src) %{
  match(Set dst (SqrtD src));

  ins_cost(FSQRT_COST);
  format %{ "fsqrt.d  $dst, $src\t#@sqrtD_reg" %}
  ins_encode %{
    __ fsqrt_d(as_FloatRegister($dst$$reg),
               as_FloatRegister($src$$reg));
  %}

  ins_pipe(fp_sqrt_d);
%}

// Arithmetic Instructions End

// ============================================================================
// Logical Instructions

// Register And
instruct andI_reg_reg(iRegINoSp dst, iRegI src1, iRegI src2) %{
  match(Set dst (AndI src1 src2));

  format %{ "andr  $dst, $src1, $src2\t#@andI_reg_reg" %}

  ins_cost(ALU_COST);
  ins_encode %{
    __ andr(as_Register($dst$$reg),
            as_Register($src1$$reg),
            as_Register($src2$$reg));
  %}

  ins_pipe(ialu_reg_reg);
%}

// Immediate And
instruct andI_reg_imm(iRegINoSp dst, iRegI src1, immIAdd src2) %{
  match(Set dst (AndI src1 src2));

  format %{ "andi  $dst, $src1, $src2\t#@andI_reg_imm" %}

  ins_cost(ALU_COST);
  ins_encode %{
    __ andi(as_Register($dst$$reg),
            as_Register($src1$$reg),
            (int32_t)($src2$$constant));
  %}

  ins_pipe(ialu_reg_imm);
%}

// Register Or
instruct orI_reg_reg(iRegINoSp dst, iRegI src1, iRegI src2) %{
  match(Set dst (OrI src1 src2));

  format %{ "orr  $dst, $src1, $src2\t#@orI_reg_reg" %}

  ins_cost(ALU_COST);
  ins_encode %{
    __ orr(as_Register($dst$$reg),
           as_Register($src1$$reg),
           as_Register($src2$$reg));
  %}

  ins_pipe(ialu_reg_reg);
%}

// Immediate Or
instruct orI_reg_imm(iRegINoSp dst, iRegI src1, immIAdd src2) %{
  match(Set dst (OrI src1 src2));

  format %{ "ori  $dst, $src1, $src2\t#@orI_reg_imm" %}

  ins_cost(ALU_COST);
  ins_encode %{
    __ ori(as_Register($dst$$reg),
           as_Register($src1$$reg),
           (int32_t)($src2$$constant));
  %}

  ins_pipe(ialu_reg_imm);
%}

// Register Xor
instruct xorI_reg_reg(iRegINoSp dst, iRegI src1, iRegI src2) %{
  match(Set dst (XorI src1 src2));

  format %{ "xorr  $dst, $src1, $src2\t#@xorI_reg_reg" %}

  ins_cost(ALU_COST);
  ins_encode %{
    __ xorr(as_Register($dst$$reg),
            as_Register($src1$$reg),
            as_Register($src2$$reg));
  %}

  ins_pipe(ialu_reg_reg);
%}

// Immediate Xor
instruct xorI_reg_imm(iRegINoSp dst, iRegI src1, immIAdd src2) %{
  match(Set dst (XorI src1 src2));

  format %{ "xori  $dst, $src1, $src2\t#@xorI_reg_imm" %}

  ins_cost(ALU_COST);
  ins_encode %{
    __ xori(as_Register($dst$$reg),
            as_Register($src1$$reg),
            (int32_t)($src2$$constant));
  %}

  ins_pipe(ialu_reg_imm);
%}

// Register And Long
instruct andL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
  match(Set dst (AndL src1 src2));

  format %{ "andr  $dst, $src1, $src2\t#@andL_reg_reg" %}

  ins_cost(ALU_COST);
  ins_encode %{
    __ andr(as_Register($dst$$reg),
            as_Register($src1$$reg),
            as_Register($src2$$reg));
  %}

  ins_pipe(ialu_reg_reg);
%}

// Immediate And Long
instruct andL_reg_imm(iRegLNoSp dst, iRegL src1, immLAdd src2) %{
  match(Set dst (AndL src1 src2));

  format %{ "andi  $dst, $src1, $src2\t#@andL_reg_imm" %}

  ins_cost(ALU_COST);
  ins_encode %{
    __ andi(as_Register($dst$$reg),
            as_Register($src1$$reg),
            (int32_t)($src2$$constant));
  %}

  ins_pipe(ialu_reg_imm);
%}

// Register Or Long
instruct orL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
  match(Set dst (OrL src1 src2));

  format %{ "orr  $dst, $src1, $src2\t#@orL_reg_reg" %}

  ins_cost(ALU_COST);
  ins_encode %{
    __ orr(as_Register($dst$$reg),
           as_Register($src1$$reg),
           as_Register($src2$$reg));
  %}

  ins_pipe(ialu_reg_reg);
%}

// Immediate Or Long
instruct orL_reg_imm(iRegLNoSp dst, iRegL src1, immLAdd src2) %{
  match(Set dst (OrL src1 src2));

  format %{ "ori  $dst, $src1, $src2\t#@orL_reg_imm" %}

  ins_cost(ALU_COST);
  ins_encode %{
    __ ori(as_Register($dst$$reg),
           as_Register($src1$$reg),
           (int32_t)($src2$$constant));
  %}

  ins_pipe(ialu_reg_imm);
%}

// Register Xor Long
instruct xorL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
  match(Set dst (XorL src1 src2));

  format %{ "xorr  $dst, $src1, $src2\t#@xorL_reg_reg" %}

  ins_cost(ALU_COST);
  ins_encode %{
    __ xorr(as_Register($dst$$reg),
            as_Register($src1$$reg),
            as_Register($src2$$reg));
  %}

  ins_pipe(ialu_reg_reg);
%}

// Immediate Xor Long
instruct xorL_reg_imm(iRegLNoSp dst, iRegL src1, immLAdd src2) %{
  match(Set dst (XorL src1 src2));

  ins_cost(ALU_COST);
  format %{ "xori  $dst, $src1, $src2\t#@xorL_reg_imm" %}

  ins_encode %{
    __ xori(as_Register($dst$$reg),
            as_Register($src1$$reg),
            (int32_t)($src2$$constant));
  %}

  ins_pipe(ialu_reg_imm);
%}

// ============================================================================
// BSWAP Instructions

instruct bytes_reverse_int(iRegINoSp dst, iRegIorL2I src, rFlagsReg cr) %{
  match(Set dst (ReverseBytesI src));
  effect(TEMP cr);

  ins_cost(ALU_COST * 13);
  format %{ "revb_w_w  $dst, $src\t#@bytes_reverse_int" %}

  ins_encode %{
    __ revb_w_w(as_Register($dst$$reg), as_Register($src$$reg));
  %}

  ins_pipe(ialu_reg);
%}

instruct bytes_reverse_long(iRegLNoSp dst, iRegL src, rFlagsReg cr) %{
  match(Set dst (ReverseBytesL src));
  effect(TEMP cr);

  ins_cost(ALU_COST * 29);
  format %{ "revb  $dst, $src\t#@bytes_reverse_long" %}

  ins_encode %{
    __ revb(as_Register($dst$$reg), as_Register($src$$reg));
  %}

  ins_pipe(ialu_reg);
%}

instruct bytes_reverse_unsigned_short(iRegINoSp dst, iRegIorL2I src) %{
  match(Set dst (ReverseBytesUS src));

  ins_cost(ALU_COST * 5);
  format %{ "revb_h_h_u  $dst, $src\t#@bytes_reverse_unsigned_short" %}

  ins_encode %{
    __ revb_h_h_u(as_Register($dst$$reg), as_Register($src$$reg));
  %}

  ins_pipe(ialu_reg);
%}

instruct bytes_reverse_short(iRegINoSp dst, iRegIorL2I src) %{
  match(Set dst (ReverseBytesS src));

  ins_cost(ALU_COST * 5);
  format %{ "revb_h_h  $dst, $src\t#@bytes_reverse_short" %}

  ins_encode %{
    __ revb_h_h(as_Register($dst$$reg), as_Register($src$$reg));
  %}

  ins_pipe(ialu_reg);
%}

// ============================================================================
// MemBar Instruction

instruct load_fence() %{
  match(LoadFence);
  ins_cost(ALU_COST);

  format %{ "#@load_fence" %}

  ins_encode %{
    __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
  %}
  ins_pipe(pipe_serial);
%}

instruct membar_acquire() %{
  match(MemBarAcquire);
  ins_cost(ALU_COST);

  format %{ "#@membar_acquire\n\t"
            "fence ir iorw" %}

  ins_encode %{
    __ block_comment("membar_acquire");
    __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
  %}

  ins_pipe(pipe_serial);
%}

instruct membar_acquire_lock() %{
  match(MemBarAcquireLock);
  ins_cost(0);

  format %{ "#@membar_acquire_lock (elided)" %}

  ins_encode %{
    __ block_comment("membar_acquire_lock (elided)");
  %}

  ins_pipe(pipe_serial);
%}

instruct store_fence() %{
  match(StoreFence);
  ins_cost(ALU_COST);

  format %{ "#@store_fence" %}

  ins_encode %{
    __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
  %}
  ins_pipe(pipe_serial);
%}

instruct membar_release() %{
  match(MemBarRelease);
  ins_cost(ALU_COST);

  format %{ "#@membar_release\n\t"
            "fence iorw ow" %}

  ins_encode %{
    __ block_comment("membar_release");
    __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
  %}
  ins_pipe(pipe_serial);
%}

instruct membar_storestore() %{
  match(MemBarStoreStore);
  match(StoreStoreFence);
  ins_cost(ALU_COST);

  format %{ "MEMBAR-store-store\t#@membar_storestore" %}

  ins_encode %{
    __ membar(MacroAssembler::StoreStore);
  %}
  ins_pipe(pipe_serial);
%}

instruct membar_release_lock() %{
  match(MemBarReleaseLock);
  ins_cost(0);

  format %{ "#@membar_release_lock (elided)" %}

  ins_encode %{
    __ block_comment("membar_release_lock (elided)");
  %}

  ins_pipe(pipe_serial);
%}

instruct membar_volatile() %{
  match(MemBarVolatile);
  ins_cost(ALU_COST);

  format %{ "#@membar_volatile\n\t"
             "fence iorw iorw"%}

  ins_encode %{
    __ block_comment("membar_volatile");
    __ membar(MacroAssembler::StoreLoad);
  %}

  ins_pipe(pipe_serial);
%}

// ============================================================================
// Cast Instructions (Java-level type cast)

instruct castX2P(iRegPNoSp dst, iRegL src) %{
  match(Set dst (CastX2P src));

  ins_cost(ALU_COST);
  format %{ "mv  $dst, $src\t# long -> ptr, #@castX2P" %}

  ins_encode %{
    if ($dst$$reg != $src$$reg) {
      __ mv(as_Register($dst$$reg), as_Register($src$$reg));
    }
  %}

  ins_pipe(ialu_reg);
%}

instruct castP2X(iRegLNoSp dst, iRegP src) %{
  match(Set dst (CastP2X src));

  ins_cost(ALU_COST);
  format %{ "mv  $dst, $src\t# ptr -> long, #@castP2X" %}

  ins_encode %{
    if ($dst$$reg != $src$$reg) {
      __ mv(as_Register($dst$$reg), as_Register($src$$reg));
    }
  %}

  ins_pipe(ialu_reg);
%}

instruct castPP(iRegPNoSp dst)
%{
  match(Set dst (CastPP dst));
  ins_cost(0);

  size(0);
  format %{ "# castPP of $dst, #@castPP" %}
  ins_encode(/* empty encoding */);
  ins_pipe(pipe_class_empty);
%}

instruct castLL(iRegL dst)
%{
  match(Set dst (CastLL dst));

  size(0);
  format %{ "# castLL of $dst, #@castLL" %}
  ins_encode(/* empty encoding */);
  ins_cost(0);
  ins_pipe(pipe_class_empty);
%}

instruct castII(iRegI dst)
%{
  match(Set dst (CastII dst));

  size(0);
  format %{ "# castII of $dst, #@castII" %}
  ins_encode(/* empty encoding */);
  ins_cost(0);
  ins_pipe(pipe_class_empty);
%}

instruct checkCastPP(iRegPNoSp dst)
%{
  match(Set dst (CheckCastPP dst));

  size(0);
  ins_cost(0);
  format %{ "# checkcastPP of $dst, #@checkCastPP" %}
  ins_encode(/* empty encoding */);
  ins_pipe(pipe_class_empty);
%}

instruct castFF(fRegF dst)
%{
  match(Set dst (CastFF dst));

  size(0);
  format %{ "# castFF of $dst" %}
  ins_encode(/* empty encoding */);
  ins_cost(0);
  ins_pipe(pipe_class_empty);
%}

instruct castDD(fRegD dst)
%{
  match(Set dst (CastDD dst));

  size(0);
  format %{ "# castDD of $dst" %}
  ins_encode(/* empty encoding */);
  ins_cost(0);
  ins_pipe(pipe_class_empty);
%}

instruct castVV(vReg dst)
%{
  match(Set dst (CastVV dst));

  size(0);
  format %{ "# castVV of $dst" %}
  ins_encode(/* empty encoding */);
  ins_cost(0);
  ins_pipe(pipe_class_empty);
%}

// ============================================================================
// Convert Instructions

// int to bool
instruct convI2Bool(iRegINoSp dst, iRegI src)
%{
  match(Set dst (Conv2B src));

  ins_cost(ALU_COST);
  format %{ "snez  $dst, $src\t#@convI2Bool" %}

  ins_encode %{
    __ snez(as_Register($dst$$reg), as_Register($src$$reg));
  %}

  ins_pipe(ialu_reg);
%}

// pointer to bool
instruct convP2Bool(iRegINoSp dst, iRegP src)
%{
  match(Set dst (Conv2B src));

  ins_cost(ALU_COST);
  format %{ "snez  $dst, $src\t#@convP2Bool" %}

  ins_encode %{
    __ snez(as_Register($dst$$reg), as_Register($src$$reg));
  %}

  ins_pipe(ialu_reg);
%}

// int <-> long

instruct convI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src)
%{
  match(Set dst (ConvI2L src));

  ins_cost(ALU_COST);
  format %{ "addw  $dst, $src, zr\t#@convI2L_reg_reg" %}
  ins_encode %{
    __ addw(as_Register($dst$$reg), as_Register($src$$reg), zr);
  %}
  ins_pipe(ialu_reg);
%}

instruct convL2I_reg(iRegINoSp dst, iRegL src) %{
  match(Set dst (ConvL2I src));

  ins_cost(ALU_COST);
  format %{ "addw  $dst, $src, zr\t#@convL2I_reg" %}

  ins_encode %{
    __ addw(as_Register($dst$$reg), as_Register($src$$reg), zr);
  %}

  ins_pipe(ialu_reg);
%}

// int to unsigned long (Zero-extend)
instruct convI2UL_reg_reg(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask)
%{
  match(Set dst (AndL (ConvI2L src) mask));

  ins_cost(ALU_COST * 2);
  format %{ "zero_extend $dst, $src, 32\t# i2ul, #@convI2UL_reg_reg" %}

  ins_encode %{
    __ zero_extend(as_Register($dst$$reg), as_Register($src$$reg), 32);
  %}

  ins_pipe(ialu_reg_shift);
%}

// float <-> double

instruct convF2D_reg(fRegD dst, fRegF src) %{
  match(Set dst (ConvF2D src));

  ins_cost(XFER_COST);
  format %{ "fcvt.d.s  $dst, $src\t#@convF2D_reg" %}

  ins_encode %{
    __ fcvt_d_s(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
  %}

  ins_pipe(fp_f2d);
%}

instruct convD2F_reg(fRegF dst, fRegD src) %{
  match(Set dst (ConvD2F src));

  ins_cost(XFER_COST);
  format %{ "fcvt.s.d  $dst, $src\t#@convD2F_reg" %}

  ins_encode %{
    __ fcvt_s_d(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
  %}

  ins_pipe(fp_d2f);
%}

// float <-> int

instruct convF2I_reg_reg(iRegINoSp dst, fRegF src) %{
  match(Set dst (ConvF2I src));

  ins_cost(XFER_COST);
  format %{ "fcvt.w.s  $dst, $src\t#@convF2I_reg_reg" %}

  ins_encode %{
    __ fcvt_w_s_safe($dst$$Register, $src$$FloatRegister);
  %}

  ins_pipe(fp_f2i);
%}

instruct convI2F_reg_reg(fRegF dst, iRegIorL2I src) %{
  match(Set dst (ConvI2F src));

  ins_cost(XFER_COST);
  format %{ "fcvt.s.w  $dst, $src\t#@convI2F_reg_reg" %}

  ins_encode %{
    __ fcvt_s_w(as_FloatRegister($dst$$reg), as_Register($src$$reg));
  %}

  ins_pipe(fp_i2f);
%}

// float <-> long

instruct convF2L_reg_reg(iRegLNoSp dst, fRegF src) %{
  match(Set dst (ConvF2L src));

  ins_cost(XFER_COST);
  format %{ "fcvt.l.s  $dst, $src\t#@convF2L_reg_reg" %}

  ins_encode %{
    __ fcvt_l_s_safe($dst$$Register, $src$$FloatRegister);
  %}

  ins_pipe(fp_f2l);
%}

instruct convL2F_reg_reg(fRegF dst, iRegL src) %{
  match(Set dst (ConvL2F src));

  ins_cost(XFER_COST);
  format %{ "fcvt.s.l  $dst, $src\t#@convL2F_reg_reg" %}

  ins_encode %{
    __ fcvt_s_l(as_FloatRegister($dst$$reg), as_Register($src$$reg));
  %}

  ins_pipe(fp_l2f);
%}

// double <-> int

instruct convD2I_reg_reg(iRegINoSp dst, fRegD src) %{
  match(Set dst (ConvD2I src));

  ins_cost(XFER_COST);
  format %{ "fcvt.w.d  $dst, $src\t#@convD2I_reg_reg" %}

  ins_encode %{
    __ fcvt_w_d_safe($dst$$Register, $src$$FloatRegister);
  %}

  ins_pipe(fp_d2i);
%}

instruct convI2D_reg_reg(fRegD dst, iRegIorL2I src) %{
  match(Set dst (ConvI2D src));

  ins_cost(XFER_COST);
  format %{ "fcvt.d.w  $dst, $src\t#@convI2D_reg_reg" %}

  ins_encode %{
    __ fcvt_d_w(as_FloatRegister($dst$$reg), as_Register($src$$reg));
  %}

  ins_pipe(fp_i2d);
%}

// double <-> long

instruct convD2L_reg_reg(iRegLNoSp dst, fRegD src) %{
  match(Set dst (ConvD2L src));

  ins_cost(XFER_COST);
  format %{ "fcvt.l.d  $dst, $src\t#@convD2L_reg_reg" %}

  ins_encode %{
    __ fcvt_l_d_safe($dst$$Register, $src$$FloatRegister);
  %}

  ins_pipe(fp_d2l);
%}

instruct convL2D_reg_reg(fRegD dst, iRegL src) %{
  match(Set dst (ConvL2D src));

  ins_cost(XFER_COST);
  format %{ "fcvt.d.l  $dst, $src\t#@convL2D_reg_reg" %}

  ins_encode %{
    __ fcvt_d_l(as_FloatRegister($dst$$reg), as_Register($src$$reg));
  %}

  ins_pipe(fp_l2d);
%}

// Convert oop into int for vectors alignment masking
instruct convP2I(iRegINoSp dst, iRegP src) %{
  match(Set dst (ConvL2I (CastP2X src)));

  ins_cost(ALU_COST * 2);
  format %{ "zero_extend $dst, $src, 32\t# ptr -> int, #@convP2I" %}

  ins_encode %{
    __ zero_extend($dst$$Register, $src$$Register, 32);
  %}

  ins_pipe(ialu_reg);
%}

// Convert compressed oop into int for vectors alignment masking
// in case of 32bit oops (heap < 4Gb).
instruct convN2I(iRegINoSp dst, iRegN src)
%{
  predicate(CompressedOops::shift() == 0);
  match(Set dst (ConvL2I (CastP2X (DecodeN src))));

  ins_cost(ALU_COST);
  format %{ "mv  $dst, $src\t# compressed ptr -> int, #@convN2I" %}

  ins_encode %{
    __ mv($dst$$Register, $src$$Register);
  %}

  ins_pipe(ialu_reg);
%}

// Convert oop pointer into compressed form
instruct encodeHeapOop(iRegNNoSp dst, iRegP src) %{
  match(Set dst (EncodeP src));
  ins_cost(ALU_COST);
  format %{ "encode_heap_oop  $dst, $src\t#@encodeHeapOop" %}
  ins_encode %{
    Register s = $src$$Register;
    Register d = $dst$$Register;
    __ encode_heap_oop(d, s);
  %}
  ins_pipe(ialu_reg);
%}

instruct decodeHeapOop(iRegPNoSp dst, iRegN src) %{
  predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
            n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
  match(Set dst (DecodeN src));

  ins_cost(0);
  format %{ "decode_heap_oop  $dst, $src\t#@decodeHeapOop" %}
  ins_encode %{
    Register s = $src$$Register;
    Register d = $dst$$Register;
    __ decode_heap_oop(d, s);
  %}
  ins_pipe(ialu_reg);
%}

instruct decodeHeapOop_not_null(iRegPNoSp dst, iRegN src) %{
  predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
            n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
  match(Set dst (DecodeN src));

  ins_cost(0);
  format %{ "decode_heap_oop_not_null $dst, $src\t#@decodeHeapOop_not_null" %}
  ins_encode %{
    Register s = $src$$Register;
    Register d = $dst$$Register;
    __ decode_heap_oop_not_null(d, s);
  %}
  ins_pipe(ialu_reg);
%}

// Convert klass pointer into compressed form.
instruct encodeKlass_not_null(iRegNNoSp dst, iRegP src) %{
  match(Set dst (EncodePKlass src));

  ins_cost(ALU_COST);
  format %{ "encode_klass_not_null  $dst, $src\t#@encodeKlass_not_null" %}

  ins_encode %{
    Register src_reg = as_Register($src$$reg);
    Register dst_reg = as_Register($dst$$reg);
    __ encode_klass_not_null(dst_reg, src_reg, t0);
  %}

   ins_pipe(ialu_reg);
%}

instruct decodeKlass_not_null(iRegPNoSp dst, iRegN src, iRegPNoSp tmp) %{
  match(Set dst (DecodeNKlass src));

  effect(TEMP tmp);

  ins_cost(ALU_COST);
  format %{ "decode_klass_not_null  $dst, $src\t#@decodeKlass_not_null" %}

  ins_encode %{
    Register src_reg = as_Register($src$$reg);
    Register dst_reg = as_Register($dst$$reg);
    Register tmp_reg = as_Register($tmp$$reg);
    __ decode_klass_not_null(dst_reg, src_reg, tmp_reg);
  %}

   ins_pipe(ialu_reg);
%}

// stack <-> reg and reg <-> reg shuffles with no conversion

instruct MoveF2I_stack_reg(iRegINoSp dst, stackSlotF src) %{

  match(Set dst (MoveF2I src));

  effect(DEF dst, USE src);

  ins_cost(LOAD_COST);

  format %{ "lw  $dst, $src\t#@MoveF2I_stack_reg" %}

  ins_encode %{
    __ lw(as_Register($dst$$reg), Address(sp, $src$$disp));
  %}

  ins_pipe(iload_reg_reg);

%}

instruct MoveI2F_stack_reg(fRegF dst, stackSlotI src) %{

  match(Set dst (MoveI2F src));

  effect(DEF dst, USE src);

  ins_cost(LOAD_COST);

  format %{ "flw  $dst, $src\t#@MoveI2F_stack_reg" %}

  ins_encode %{
    __ flw(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
  %}

  ins_pipe(pipe_class_memory);

%}

instruct MoveD2L_stack_reg(iRegLNoSp dst, stackSlotD src) %{

  match(Set dst (MoveD2L src));

  effect(DEF dst, USE src);

  ins_cost(LOAD_COST);

  format %{ "ld  $dst, $src\t#@MoveD2L_stack_reg" %}

  ins_encode %{
    __ ld(as_Register($dst$$reg), Address(sp, $src$$disp));
  %}

  ins_pipe(iload_reg_reg);

%}

instruct MoveL2D_stack_reg(fRegD dst, stackSlotL src) %{

  match(Set dst (MoveL2D src));

  effect(DEF dst, USE src);

  ins_cost(LOAD_COST);

  format %{ "fld  $dst, $src\t#@MoveL2D_stack_reg" %}

  ins_encode %{
    __ fld(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
  %}

  ins_pipe(pipe_class_memory);

%}

instruct MoveF2I_reg_stack(stackSlotI dst, fRegF src) %{

  match(Set dst (MoveF2I src));

  effect(DEF dst, USE src);

  ins_cost(STORE_COST);

  format %{ "fsw  $src, $dst\t#@MoveF2I_reg_stack" %}

  ins_encode %{
    __ fsw(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
  %}

  ins_pipe(pipe_class_memory);

%}

instruct MoveI2F_reg_stack(stackSlotF dst, iRegI src) %{

  match(Set dst (MoveI2F src));

  effect(DEF dst, USE src);

  ins_cost(STORE_COST);

  format %{ "sw  $src, $dst\t#@MoveI2F_reg_stack" %}

  ins_encode %{
    __ sw(as_Register($src$$reg), Address(sp, $dst$$disp));
  %}

  ins_pipe(istore_reg_reg);

%}

instruct MoveD2L_reg_stack(stackSlotL dst, fRegD src) %{

  match(Set dst (MoveD2L src));

  effect(DEF dst, USE src);

  ins_cost(STORE_COST);

  format %{ "fsd  $dst, $src\t#@MoveD2L_reg_stack" %}

  ins_encode %{
    __ fsd(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
  %}

  ins_pipe(pipe_class_memory);

%}

instruct MoveL2D_reg_stack(stackSlotD dst, iRegL src) %{

  match(Set dst (MoveL2D src));

  effect(DEF dst, USE src);

  ins_cost(STORE_COST);

  format %{ "sd  $src, $dst\t#@MoveL2D_reg_stack" %}

  ins_encode %{
    __ sd(as_Register($src$$reg), Address(sp, $dst$$disp));
  %}

  ins_pipe(istore_reg_reg);

%}

instruct MoveF2I_reg_reg(iRegINoSp dst, fRegF src) %{

  match(Set dst (MoveF2I src));

  effect(DEF dst, USE src);

  ins_cost(XFER_COST);

  format %{ "fmv.x.w  $dst, $src\t#@MoveL2D_reg_stack" %}

  ins_encode %{
    __ fmv_x_w(as_Register($dst$$reg), as_FloatRegister($src$$reg));
  %}

  ins_pipe(fp_f2i);

%}

instruct MoveI2F_reg_reg(fRegF dst, iRegI src) %{

  match(Set dst (MoveI2F src));

  effect(DEF dst, USE src);

  ins_cost(XFER_COST);

  format %{ "fmv.w.x  $dst, $src\t#@MoveI2F_reg_reg" %}

  ins_encode %{
    __ fmv_w_x(as_FloatRegister($dst$$reg), as_Register($src$$reg));
  %}

  ins_pipe(fp_i2f);

%}

instruct MoveD2L_reg_reg(iRegLNoSp dst, fRegD src) %{

  match(Set dst (MoveD2L src));

  effect(DEF dst, USE src);

  ins_cost(XFER_COST);

  format %{ "fmv.x.d $dst, $src\t#@MoveD2L_reg_reg" %}

  ins_encode %{
    __ fmv_x_d(as_Register($dst$$reg), as_FloatRegister($src$$reg));
  %}

  ins_pipe(fp_d2l);

%}

instruct MoveL2D_reg_reg(fRegD dst, iRegL src) %{

  match(Set dst (MoveL2D src));

  effect(DEF dst, USE src);

  ins_cost(XFER_COST);

  format %{ "fmv.d.x  $dst, $src\t#@MoveD2L_reg_reg" %}

  ins_encode %{
    __ fmv_d_x(as_FloatRegister($dst$$reg), as_Register($src$$reg));
  %}

  ins_pipe(fp_l2d);
%}

// ============================================================================
// Compare Instructions which set the result float comparisons in dest register.

instruct cmpF3_reg_reg(iRegINoSp dst, fRegF op1, fRegF op2)
%{
  match(Set dst (CmpF3 op1 op2));

  ins_cost(XFER_COST * 2 + BRANCH_COST + ALU_COST);
  format %{ "flt.s  $dst, $op2, $op1\t#@cmpF3_reg_reg\n\t"
            "bgtz   $dst, done\n\t"
            "feq.s  $dst, $op1, $op2\n\t"
            "addi   $dst, $dst, -1\t#@cmpF3_reg_reg"
  %}

  ins_encode %{
    // we want -1 for unordered or less than, 0 for equal and 1 for greater than.
    __ float_compare(as_Register($dst$$reg), as_FloatRegister($op1$$reg),
                     as_FloatRegister($op2$$reg), -1 /*unordered_result < 0*/);
  %}

  ins_pipe(pipe_class_default);
%}

instruct cmpD3_reg_reg(iRegINoSp dst, fRegD op1, fRegD op2)
%{
  match(Set dst (CmpD3 op1 op2));

  ins_cost(XFER_COST * 2 + BRANCH_COST + ALU_COST);
  format %{ "flt.d  $dst, $op2, $op1\t#@cmpD3_reg_reg\n\t"
            "bgtz   $dst, done\n\t"
            "feq.d  $dst, $op1, $op2\n\t"
            "addi   $dst, $dst, -1\t#@cmpD3_reg_reg"
  %}

  ins_encode %{
    // we want -1 for unordered or less than, 0 for equal and 1 for greater than.
    __ double_compare(as_Register($dst$$reg), as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), -1 /*unordered_result < 0*/);
  %}

  ins_pipe(pipe_class_default);
%}

instruct cmpL3_reg_reg(iRegINoSp dst, iRegL op1, iRegL op2)
%{
  match(Set dst (CmpL3 op1 op2));

  ins_cost(ALU_COST * 3 + BRANCH_COST);
  format %{ "slt   $dst, $op2, $op1\t#@cmpL3_reg_reg\n\t"
            "bnez  $dst, done\n\t"
            "slt  $dst, $op1, $op2\n\t"
            "neg   $dst, $dst\t#@cmpL3_reg_reg"
  %}
  ins_encode %{
    __ cmp_l2i(t0, as_Register($op1$$reg), as_Register($op2$$reg));
    __ mv(as_Register($dst$$reg), t0);
  %}

  ins_pipe(pipe_class_default);
%}

instruct cmpLTMask_reg_reg(iRegINoSp dst, iRegI p, iRegI q)
%{
  match(Set dst (CmpLTMask p q));

  ins_cost(2 * ALU_COST);

  format %{ "slt $dst, $p, $q\t#@cmpLTMask_reg_reg\n\t"
            "subw $dst, zr, $dst\t#@cmpLTMask_reg_reg"
  %}

  ins_encode %{
    __ slt(as_Register($dst$$reg), as_Register($p$$reg), as_Register($q$$reg));
    __ subw(as_Register($dst$$reg), zr, as_Register($dst$$reg));
  %}

  ins_pipe(ialu_reg_reg);
%}

instruct cmpLTMask_reg_zero(iRegINoSp dst, iRegIorL2I op, immI0 zero)
%{
  match(Set dst (CmpLTMask op zero));

  ins_cost(ALU_COST);

  format %{ "sraiw $dst, $dst, 31\t#@cmpLTMask_reg_reg" %}

  ins_encode %{
    __ sraiw(as_Register($dst$$reg), as_Register($op$$reg), 31);
  %}

  ins_pipe(ialu_reg_shift);
%}

// ============================================================================
// Max and Min

instruct minI_rReg(iRegINoSp dst, iRegI src1, iRegI src2)
%{
  match(Set dst (MinI src1 src2));

  effect(DEF dst, USE src1, USE src2);

  ins_cost(BRANCH_COST + ALU_COST * 2);
  format %{
    "ble $src1, $src2, Lsrc1.\t#@minI_rReg\n\t"
    "mv $dst, $src2\n\t"
    "j Ldone\n\t"
    "bind Lsrc1\n\t"
    "mv $dst, $src1\n\t"
    "bind\t#@minI_rReg"
  %}

  ins_encode %{
    Label Lsrc1, Ldone;
    __ ble(as_Register($src1$$reg), as_Register($src2$$reg), Lsrc1);
    __ mv(as_Register($dst$$reg), as_Register($src2$$reg));
    __ j(Ldone);
    __ bind(Lsrc1);
    __ mv(as_Register($dst$$reg), as_Register($src1$$reg));
    __ bind(Ldone);
  %}

  ins_pipe(ialu_reg_reg);
%}

instruct maxI_rReg(iRegINoSp dst, iRegI src1, iRegI src2)
%{
  match(Set dst (MaxI src1 src2));

  effect(DEF dst, USE src1, USE src2);

  ins_cost(BRANCH_COST + ALU_COST * 2);
  format %{
    "bge $src1, $src2, Lsrc1\t#@maxI_rReg\n\t"
    "mv $dst, $src2\n\t"
    "j Ldone\n\t"
    "bind Lsrc1\n\t"
    "mv $dst, $src1\n\t"
    "bind\t#@maxI_rReg"
  %}

  ins_encode %{
    Label Lsrc1, Ldone;
    __ bge(as_Register($src1$$reg), as_Register($src2$$reg), Lsrc1);
    __ mv(as_Register($dst$$reg), as_Register($src2$$reg));
    __ j(Ldone);
    __ bind(Lsrc1);
    __ mv(as_Register($dst$$reg), as_Register($src1$$reg));
    __ bind(Ldone);

  %}

  ins_pipe(ialu_reg_reg);
%}

// ============================================================================
// Branch Instructions
// Direct Branch.
instruct branch(label lbl)
%{
  match(Goto);

  effect(USE lbl);

  ins_cost(BRANCH_COST);
  format %{ "j  $lbl\t#@branch" %}

  ins_encode(riscv_enc_j(lbl));

  ins_pipe(pipe_branch);
%}

// ============================================================================
// Compare and Branch Instructions

// Patterns for short (< 12KiB) variants

// Compare flags and branch near instructions.
instruct cmpFlag_branch(cmpOpEqNe cmp, rFlagsReg cr, label lbl) %{
  match(If cmp cr);
  effect(USE lbl);

  ins_cost(BRANCH_COST);
  format %{ "b$cmp  $cr, zr, $lbl\t#@cmpFlag_branch" %}

  ins_encode %{
    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($cr$$reg), *($lbl$$label));
  %}
  ins_pipe(pipe_cmpz_branch);
  ins_short_branch(1);
%}

// Compare signed int and branch near instructions
instruct cmpI_branch(cmpOp cmp, iRegI op1, iRegI op2, label lbl)
%{
  // Same match rule as `far_cmpI_branch'.
  match(If cmp (CmpI op1 op2));

  effect(USE lbl);

  ins_cost(BRANCH_COST);

  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpI_branch" %}

  ins_encode %{
    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label));
  %}

  ins_pipe(pipe_cmp_branch);
  ins_short_branch(1);
%}

instruct cmpI_loop(cmpOp cmp, iRegI op1, iRegI op2, label lbl)
%{
  // Same match rule as `far_cmpI_loop'.
  match(CountedLoopEnd cmp (CmpI op1 op2));

  effect(USE lbl);

  ins_cost(BRANCH_COST);

  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpI_loop" %}

  ins_encode %{
    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label));
  %}

  ins_pipe(pipe_cmp_branch);
  ins_short_branch(1);
%}

// Compare unsigned int and branch near instructions
instruct cmpU_branch(cmpOpU cmp, iRegI op1, iRegI op2, label lbl)
%{
  // Same match rule as `far_cmpU_branch'.
  match(If cmp (CmpU op1 op2));

  effect(USE lbl);

  ins_cost(BRANCH_COST);

  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpU_branch" %}

  ins_encode %{
    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
                  as_Register($op2$$reg), *($lbl$$label));
  %}

  ins_pipe(pipe_cmp_branch);
  ins_short_branch(1);
%}

instruct cmpU_loop(cmpOpU cmp, iRegI op1, iRegI op2, label lbl)
%{
  // Same match rule as `far_cmpU_loop'.
  match(CountedLoopEnd cmp (CmpU op1 op2));

  effect(USE lbl);

  ins_cost(BRANCH_COST);

  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpU_loop" %}

  ins_encode %{
    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
                  as_Register($op2$$reg), *($lbl$$label));
  %}

  ins_pipe(pipe_cmp_branch);
  ins_short_branch(1);
%}

// Compare signed long and branch near instructions
instruct cmpL_branch(cmpOp cmp, iRegL op1, iRegL op2, label lbl)
%{
  // Same match rule as `far_cmpL_branch'.
  match(If cmp (CmpL op1 op2));

  effect(USE lbl);

  ins_cost(BRANCH_COST);

  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpL_branch" %}

  ins_encode %{
    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label));
  %}

  ins_pipe(pipe_cmp_branch);
  ins_short_branch(1);
%}

instruct cmpL_loop(cmpOp cmp, iRegL op1, iRegL op2, label lbl)
%{
  // Same match rule as `far_cmpL_loop'.
  match(CountedLoopEnd cmp (CmpL op1 op2));

  effect(USE lbl);

  ins_cost(BRANCH_COST);

  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpL_loop" %}

  ins_encode %{
    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label));
  %}

  ins_pipe(pipe_cmp_branch);
  ins_short_branch(1);
%}

// Compare unsigned long and branch near instructions
instruct cmpUL_branch(cmpOpU cmp, iRegL op1, iRegL op2, label lbl)
%{
  // Same match rule as `far_cmpUL_branch'.
  match(If cmp (CmpUL op1 op2));

  effect(USE lbl);

  ins_cost(BRANCH_COST);
  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpUL_branch" %}

  ins_encode %{
    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
                  as_Register($op2$$reg), *($lbl$$label));
  %}

  ins_pipe(pipe_cmp_branch);
  ins_short_branch(1);
%}

instruct cmpUL_loop(cmpOpU cmp, iRegL op1, iRegL op2, label lbl)
%{
  // Same match rule as `far_cmpUL_loop'.
  match(CountedLoopEnd cmp (CmpUL op1 op2));

  effect(USE lbl);

  ins_cost(BRANCH_COST);
  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpUL_loop" %}

  ins_encode %{
    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
                  as_Register($op2$$reg), *($lbl$$label));
  %}

  ins_pipe(pipe_cmp_branch);
  ins_short_branch(1);
%}

// Compare pointer and branch near instructions
instruct cmpP_branch(cmpOpU cmp, iRegP op1, iRegP op2, label lbl)
%{
  // Same match rule as `far_cmpP_branch'.
  match(If cmp (CmpP op1 op2));

  effect(USE lbl);

  ins_cost(BRANCH_COST);

  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpP_branch" %}

  ins_encode %{
    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
                  as_Register($op2$$reg), *($lbl$$label));
  %}

  ins_pipe(pipe_cmp_branch);
  ins_short_branch(1);
%}

instruct cmpP_loop(cmpOpU cmp, iRegP op1, iRegP op2, label lbl)
%{
  // Same match rule as `far_cmpP_loop'.
  match(CountedLoopEnd cmp (CmpP op1 op2));

  effect(USE lbl);

  ins_cost(BRANCH_COST);

  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpP_loop" %}

  ins_encode %{
    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
                  as_Register($op2$$reg), *($lbl$$label));
  %}

  ins_pipe(pipe_cmp_branch);
  ins_short_branch(1);
%}

// Compare narrow pointer and branch near instructions
instruct cmpN_branch(cmpOpU cmp, iRegN op1, iRegN op2, label lbl)
%{
  // Same match rule as `far_cmpN_branch'.
  match(If cmp (CmpN op1 op2));

  effect(USE lbl);

  ins_cost(BRANCH_COST);

  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpN_branch" %}

  ins_encode %{
    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
                  as_Register($op2$$reg), *($lbl$$label));
  %}

  ins_pipe(pipe_cmp_branch);
  ins_short_branch(1);
%}

instruct cmpN_loop(cmpOpU cmp, iRegN op1, iRegN op2, label lbl)
%{
  // Same match rule as `far_cmpN_loop'.
  match(CountedLoopEnd cmp (CmpN op1 op2));

  effect(USE lbl);

  ins_cost(BRANCH_COST);

  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpN_loop" %}

  ins_encode %{
    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
                  as_Register($op2$$reg), *($lbl$$label));
  %}

  ins_pipe(pipe_cmp_branch);
  ins_short_branch(1);
%}

// Compare float and branch near instructions
instruct cmpF_branch(cmpOp cmp, fRegF op1, fRegF op2, label lbl)
%{
  // Same match rule as `far_cmpF_branch'.
  match(If cmp (CmpF op1 op2));

  effect(USE lbl);

  ins_cost(XFER_COST + BRANCH_COST);
  format %{ "float_b$cmp $op1, $op2, $lbl \t#@cmpF_branch"%}

  ins_encode %{
    __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), *($lbl$$label));
  %}

  ins_pipe(pipe_class_compare);
  ins_short_branch(1);
%}

instruct cmpF_loop(cmpOp cmp, fRegF op1, fRegF op2, label lbl)
%{
  // Same match rule as `far_cmpF_loop'.
  match(CountedLoopEnd cmp (CmpF op1 op2));
  effect(USE lbl);

  ins_cost(XFER_COST + BRANCH_COST);
  format %{ "float_b$cmp $op1, $op2, $lbl\t#@cmpF_loop"%}

  ins_encode %{
    __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), *($lbl$$label));
  %}

  ins_pipe(pipe_class_compare);
  ins_short_branch(1);
%}

// Compare double and branch near instructions
instruct cmpD_branch(cmpOp cmp, fRegD op1, fRegD op2, label lbl)
%{
  // Same match rule as `far_cmpD_branch'.
  match(If cmp (CmpD op1 op2));
  effect(USE lbl);

  ins_cost(XFER_COST + BRANCH_COST);
  format %{ "double_b$cmp $op1, $op2, $lbl\t#@cmpD_branch"%}

  ins_encode %{
    __ float_cmp_branch($cmp$$cmpcode | C2_MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
                        as_FloatRegister($op2$$reg), *($lbl$$label));
  %}

  ins_pipe(pipe_class_compare);
  ins_short_branch(1);
%}

instruct cmpD_loop(cmpOp cmp, fRegD op1, fRegD op2, label lbl)
%{
  // Same match rule as `far_cmpD_loop'.
  match(CountedLoopEnd cmp (CmpD op1 op2));
  effect(USE lbl);

  ins_cost(XFER_COST + BRANCH_COST);
  format %{ "double_b$cmp $op1, $op2, $lbl\t#@cmpD_loop"%}

  ins_encode %{
    __ float_cmp_branch($cmp$$cmpcode | C2_MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
                        as_FloatRegister($op2$$reg), *($lbl$$label));
  %}

  ins_pipe(pipe_class_compare);
  ins_short_branch(1);
%}

// Compare signed int with zero and branch near instructions
instruct cmpI_reg_imm0_branch(cmpOp cmp, iRegI op1, immI0 zero, label lbl)
%{
  // Same match rule as `far_cmpI_reg_imm0_branch'.
  match(If cmp (CmpI op1 zero));

  effect(USE op1, USE lbl);

  ins_cost(BRANCH_COST);
  format %{ "b$cmp  $op1, zr, $lbl\t#@cmpI_reg_imm0_branch" %}

  ins_encode %{
    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label));
  %}

  ins_pipe(pipe_cmpz_branch);
  ins_short_branch(1);
%}

instruct cmpI_reg_imm0_loop(cmpOp cmp, iRegI op1, immI0 zero, label lbl)
%{
  // Same match rule as `far_cmpI_reg_imm0_loop'.
  match(CountedLoopEnd cmp (CmpI op1 zero));

  effect(USE op1, USE lbl);

  ins_cost(BRANCH_COST);

  format %{ "b$cmp  $op1, zr, $lbl\t#@cmpI_reg_imm0_loop" %}

  ins_encode %{
    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label));
  %}

  ins_pipe(pipe_cmpz_branch);
  ins_short_branch(1);
%}

// Compare unsigned int with zero and branch near instructions
instruct cmpUEqNeLeGt_reg_imm0_branch(cmpOpUEqNeLeGt cmp, iRegI op1, immI0 zero, label lbl)
%{
  // Same match rule as `far_cmpUEqNeLeGt_reg_imm0_branch'.
  match(If cmp (CmpU op1 zero));

  effect(USE op1, USE lbl);

  ins_cost(BRANCH_COST);

  format %{ "b$cmp  $op1, zr, $lbl\t#@cmpUEqNeLeGt_reg_imm0_branch" %}

  ins_encode %{
    __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label));
  %}

  ins_pipe(pipe_cmpz_branch);
  ins_short_branch(1);
%}

instruct cmpUEqNeLeGt_reg_imm0_loop(cmpOpUEqNeLeGt cmp, iRegI op1, immI0 zero, label lbl)
%{
  // Same match rule as `far_cmpUEqNeLeGt_reg_imm0_loop'.
  match(CountedLoopEnd cmp (CmpU op1 zero));

  effect(USE op1, USE lbl);

  ins_cost(BRANCH_COST);

  format %{ "b$cmp  $op1, zr, $lbl\t#@cmpUEqNeLeGt_reg_imm0_loop" %}

  ins_encode %{
    __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label));
  %}

  ins_pipe(pipe_cmpz_branch);
  ins_short_branch(1);
%}

// Compare signed long with zero and branch near instructions
instruct cmpL_reg_imm0_branch(cmpOp cmp, iRegL op1, immL0 zero, label lbl)
%{
  // Same match rule as `far_cmpL_reg_imm0_branch'.
  match(If cmp (CmpL op1 zero));

  effect(USE op1, USE lbl);

  ins_cost(BRANCH_COST);

  format %{ "b$cmp  $op1, zr, $lbl\t#@cmpL_reg_imm0_branch" %}

  ins_encode %{
    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label));
  %}

  ins_pipe(pipe_cmpz_branch);
  ins_short_branch(1);
%}

instruct cmpL_reg_imm0_loop(cmpOp cmp, iRegL op1, immL0 zero, label lbl)
%{
  // Same match rule as `far_cmpL_reg_imm0_loop'.
  match(CountedLoopEnd cmp (CmpL op1 zero));

  effect(USE op1, USE lbl);

  ins_cost(BRANCH_COST);

  format %{ "b$cmp  $op1, zr, $lbl\t#@cmpL_reg_imm0_loop" %}

  ins_encode %{
    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label));
  %}

  ins_pipe(pipe_cmpz_branch);
  ins_short_branch(1);
%}

// Compare unsigned long with zero and branch near instructions
instruct cmpULEqNeLeGt_reg_imm0_branch(cmpOpUEqNeLeGt cmp, iRegL op1, immL0 zero, label lbl)
%{
  // Same match rule as `far_cmpULEqNeLeGt_reg_imm0_branch'.
  match(If cmp (CmpUL op1 zero));

  effect(USE op1, USE lbl);

  ins_cost(BRANCH_COST);

  format %{ "b$cmp  $op1, zr, $lbl\t#@cmpULEqNeLeGt_reg_imm0_branch" %}

  ins_encode %{
    __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label));
  %}

  ins_pipe(pipe_cmpz_branch);
  ins_short_branch(1);
%}

instruct cmpULEqNeLeGt_reg_imm0_loop(cmpOpUEqNeLeGt cmp, iRegL op1, immL0 zero, label lbl)
%{
  // Same match rule as `far_cmpULEqNeLeGt_reg_imm0_loop'.
  match(CountedLoopEnd cmp (CmpUL op1 zero));

  effect(USE op1, USE lbl);

  ins_cost(BRANCH_COST);

  format %{ "b$cmp  $op1, zr, $lbl\t#@cmpULEqNeLeGt_reg_imm0_loop" %}

  ins_encode %{
    __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label));
  %}

  ins_pipe(pipe_cmpz_branch);
  ins_short_branch(1);
%}

// Compare pointer with zero and branch near instructions
instruct cmpP_imm0_branch(cmpOpEqNe cmp, iRegP op1, immP0 zero, label lbl) %{
  // Same match rule as `far_cmpP_reg_imm0_branch'.
  match(If cmp (CmpP op1 zero));
  effect(USE lbl);

  ins_cost(BRANCH_COST);
  format %{ "b$cmp   $op1, zr, $lbl\t#@cmpP_imm0_branch" %}

  ins_encode %{
    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label));
  %}

  ins_pipe(pipe_cmpz_branch);
  ins_short_branch(1);
%}

instruct cmpP_imm0_loop(cmpOpEqNe cmp, iRegP op1, immP0 zero, label lbl) %{
  // Same match rule as `far_cmpP_reg_imm0_loop'.
  match(CountedLoopEnd cmp (CmpP op1 zero));
  effect(USE lbl);

  ins_cost(BRANCH_COST);
  format %{ "b$cmp   $op1, zr, $lbl\t#@cmpP_imm0_loop" %}

  ins_encode %{
    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label));
  %}

  ins_pipe(pipe_cmpz_branch);
  ins_short_branch(1);
%}

// Compare narrow pointer with zero and branch near instructions
instruct cmpN_imm0_branch(cmpOpEqNe cmp, iRegN op1, immN0 zero, label lbl) %{
  // Same match rule as `far_cmpN_reg_imm0_branch'.
  match(If cmp (CmpN op1 zero));
  effect(USE lbl);

  ins_cost(BRANCH_COST);

  format %{ "b$cmp  $op1, zr, $lbl\t#@cmpN_imm0_branch" %}

  ins_encode %{
    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label));
  %}

  ins_pipe(pipe_cmpz_branch);
  ins_short_branch(1);
%}

instruct cmpN_imm0_loop(cmpOpEqNe cmp, iRegN op1, immN0 zero, label lbl) %{
  // Same match rule as `far_cmpN_reg_imm0_loop'.
  match(CountedLoopEnd cmp (CmpN op1 zero));
  effect(USE lbl);

  ins_cost(BRANCH_COST);

  format %{ "b$cmp  $op1, zr, $lbl\t#@cmpN_imm0_loop" %}

  ins_encode %{
    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label));
  %}

  ins_pipe(pipe_cmpz_branch);
  ins_short_branch(1);
%}

// Compare narrow pointer with pointer zero and branch near instructions
instruct cmpP_narrowOop_imm0_branch(cmpOpEqNe cmp, iRegN op1, immP0 zero, label lbl) %{
  // Same match rule as `far_cmpP_narrowOop_imm0_branch'.
  match(If cmp (CmpP (DecodeN op1) zero));
  effect(USE lbl);

  ins_cost(BRANCH_COST);
  format %{ "b$cmp   $op1, zr, $lbl\t#@cmpP_narrowOop_imm0_branch" %}

  ins_encode %{
    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label));
  %}

  ins_pipe(pipe_cmpz_branch);
  ins_short_branch(1);
%}

instruct cmpP_narrowOop_imm0_loop(cmpOpEqNe cmp, iRegN op1, immP0 zero, label lbl) %{
  // Same match rule as `far_cmpP_narrowOop_imm0_loop'.
  match(CountedLoopEnd cmp (CmpP (DecodeN op1) zero));
  effect(USE lbl);

  ins_cost(BRANCH_COST);
  format %{ "b$cmp   $op1, zr, $lbl\t#@cmpP_narrowOop_imm0_loop" %}

  ins_encode %{
    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label));
  %}

  ins_pipe(pipe_cmpz_branch);
  ins_short_branch(1);
%}

// Patterns for far (20KiB) variants

instruct far_cmpFlag_branch(cmpOp cmp, rFlagsReg cr, label lbl) %{
  match(If cmp cr);
  effect(USE lbl);

  ins_cost(BRANCH_COST);
  format %{ "far_b$cmp $cr, zr, $lbl\t#@far_cmpFlag_branch"%}

  ins_encode %{
    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($cr$$reg), *($lbl$$label), /* is_far */ true);
  %}

  ins_pipe(pipe_cmpz_branch);
%}

// Compare signed int and branch far instructions
instruct far_cmpI_branch(cmpOp cmp, iRegI op1, iRegI op2, label lbl) %{
  match(If cmp (CmpI op1 op2));
  effect(USE lbl);

  ins_cost(BRANCH_COST * 2);

  // the format instruction [far_b$cmp] here is be used as two insructions
  // in macroassembler: b$not_cmp(op1, op2, done), j($lbl), bind(done)
  format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpI_branch" %}

  ins_encode %{
    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
  %}

  ins_pipe(pipe_cmp_branch);
%}

instruct far_cmpI_loop(cmpOp cmp, iRegI op1, iRegI op2, label lbl) %{
  match(CountedLoopEnd cmp (CmpI op1 op2));
  effect(USE lbl);

  ins_cost(BRANCH_COST * 2);
  format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpI_loop" %}

  ins_encode %{
    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
  %}

  ins_pipe(pipe_cmp_branch);
%}

instruct far_cmpU_branch(cmpOpU cmp, iRegI op1, iRegI op2, label lbl) %{
  match(If cmp (CmpU op1 op2));
  effect(USE lbl);

  ins_cost(BRANCH_COST * 2);
  format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpU_branch" %}

  ins_encode %{
    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
                       as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
  %}

  ins_pipe(pipe_cmp_branch);
%}

instruct far_cmpU_loop(cmpOpU cmp, iRegI op1, iRegI op2, label lbl) %{
  match(CountedLoopEnd cmp (CmpU op1 op2));
  effect(USE lbl);

  ins_cost(BRANCH_COST * 2);
  format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpU_loop" %}

  ins_encode %{
    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
                       as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
  %}

  ins_pipe(pipe_cmp_branch);
%}

instruct far_cmpL_branch(cmpOp cmp, iRegL op1, iRegL op2, label lbl) %{
  match(If cmp (CmpL op1 op2));
  effect(USE lbl);

  ins_cost(BRANCH_COST * 2);
  format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpL_branch" %}

  ins_encode %{
    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
  %}

  ins_pipe(pipe_cmp_branch);
%}

instruct far_cmpLloop(cmpOp cmp, iRegL op1, iRegL op2, label lbl) %{
  match(CountedLoopEnd cmp (CmpL op1 op2));
  effect(USE lbl);

  ins_cost(BRANCH_COST * 2);
  format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpL_loop" %}

  ins_encode %{
    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
  %}

  ins_pipe(pipe_cmp_branch);
%}

instruct far_cmpUL_branch(cmpOpU cmp, iRegL op1, iRegL op2, label lbl) %{
  match(If cmp (CmpUL op1 op2));
  effect(USE lbl);

  ins_cost(BRANCH_COST * 2);
  format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpUL_branch" %}

  ins_encode %{
    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
                       as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
  %}

  ins_pipe(pipe_cmp_branch);
%}

instruct far_cmpUL_loop(cmpOpU cmp, iRegL op1, iRegL op2, label lbl) %{
  match(CountedLoopEnd cmp (CmpUL op1 op2));
  effect(USE lbl);

  ins_cost(BRANCH_COST * 2);
  format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpUL_loop" %}

  ins_encode %{
    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
                       as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
  %}

  ins_pipe(pipe_cmp_branch);
%}

instruct far_cmpP_branch(cmpOpU cmp, iRegP op1, iRegP op2, label lbl)
%{
  match(If cmp (CmpP op1 op2));

  effect(USE lbl);

  ins_cost(BRANCH_COST * 2);

  format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpP_branch" %}

  ins_encode %{
    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
                       as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
  %}

  ins_pipe(pipe_cmp_branch);
%}

instruct far_cmpP_loop(cmpOpU cmp, iRegP op1, iRegP op2, label lbl)
%{
  match(CountedLoopEnd cmp (CmpP op1 op2));

  effect(USE lbl);

  ins_cost(BRANCH_COST * 2);

  format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpP_loop" %}

  ins_encode %{
    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
                       as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
  %}

  ins_pipe(pipe_cmp_branch);
%}

instruct far_cmpN_branch(cmpOpU cmp, iRegN op1, iRegN op2, label lbl)
%{
  match(If cmp (CmpN op1 op2));

  effect(USE lbl);

  ins_cost(BRANCH_COST * 2);

  format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpN_branch" %}

  ins_encode %{
    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
                       as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
  %}

  ins_pipe(pipe_cmp_branch);
%}

instruct far_cmpN_loop(cmpOpU cmp, iRegN op1, iRegN op2, label lbl)
%{
  match(CountedLoopEnd cmp (CmpN op1 op2));

  effect(USE lbl);

  ins_cost(BRANCH_COST * 2);

  format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpN_loop" %}

  ins_encode %{
    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
                  as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
  %}

  ins_pipe(pipe_cmp_branch);
%}

// Float compare and branch instructions
instruct far_cmpF_branch(cmpOp cmp, fRegF op1, fRegF op2, label lbl)
%{
  match(If cmp (CmpF op1 op2));

  effect(USE lbl);

  ins_cost(XFER_COST + BRANCH_COST * 2);
  format %{ "far_float_b$cmp $op1, $op2, $lbl\t#@far_cmpF_branch"%}

  ins_encode %{
    __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg),
                        *($lbl$$label), /* is_far */ true);
  %}

  ins_pipe(pipe_class_compare);
%}

instruct far_cmpF_loop(cmpOp cmp, fRegF op1, fRegF op2, label lbl)
%{
  match(CountedLoopEnd cmp (CmpF op1 op2));
  effect(USE lbl);

  ins_cost(XFER_COST + BRANCH_COST * 2);
  format %{ "far_float_b$cmp $op1, $op2, $lbl\t#@far_cmpF_loop"%}

  ins_encode %{
    __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg),
                        *($lbl$$label), /* is_far */ true);
  %}

  ins_pipe(pipe_class_compare);
%}

// Double compare and branch instructions
instruct far_cmpD_branch(cmpOp cmp, fRegD op1, fRegD op2, label lbl)
%{
  match(If cmp (CmpD op1 op2));
  effect(USE lbl);

  ins_cost(XFER_COST + BRANCH_COST * 2);
  format %{ "far_double_b$cmp $op1, $op2, $lbl\t#@far_cmpD_branch"%}

  ins_encode %{
    __ float_cmp_branch($cmp$$cmpcode | C2_MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
                        as_FloatRegister($op2$$reg), *($lbl$$label), /* is_far */ true);
  %}

  ins_pipe(pipe_class_compare);
%}

instruct far_cmpD_loop(cmpOp cmp, fRegD op1, fRegD op2, label lbl)
%{
  match(CountedLoopEnd cmp (CmpD op1 op2));
  effect(USE lbl);

  ins_cost(XFER_COST + BRANCH_COST * 2);
  format %{ "far_double_b$cmp $op1, $op2, $lbl\t#@far_cmpD_loop"%}

  ins_encode %{
    __ float_cmp_branch($cmp$$cmpcode | C2_MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
                        as_FloatRegister($op2$$reg), *($lbl$$label), /* is_far */ true);
  %}

  ins_pipe(pipe_class_compare);
%}

instruct far_cmpI_reg_imm0_branch(cmpOp cmp, iRegI op1, immI0 zero, label lbl)
%{
  match(If cmp (CmpI op1 zero));

  effect(USE op1, USE lbl);

  ins_cost(BRANCH_COST * 2);

  format %{ "far_b$cmp  $op1, zr, $lbl\t#@far_cmpI_reg_imm0_branch" %}

  ins_encode %{
    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label), /* is_far */ true);
  %}

  ins_pipe(pipe_cmpz_branch);
%}

instruct far_cmpI_reg_imm0_loop(cmpOp cmp, iRegI op1, immI0 zero, label lbl)
%{
  match(CountedLoopEnd cmp (CmpI op1 zero));

  effect(USE op1, USE lbl);

  ins_cost(BRANCH_COST * 2);

  format %{ "far_b$cmp  $op1, zr, $lbl\t#@far_cmpI_reg_imm0_loop" %}

  ins_encode %{
    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label), /* is_far */ true);
  %}

  ins_pipe(pipe_cmpz_branch);
%}

instruct far_cmpUEqNeLeGt_imm0_branch(cmpOpUEqNeLeGt cmp, iRegI op1, immI0 zero, label lbl)
%{
  match(If cmp (CmpU op1 zero));

  effect(USE op1, USE lbl);

  ins_cost(BRANCH_COST * 2);

  format %{ "far_b$cmp  $op1, zr, $lbl\t#@far_cmpUEqNeLeGt_imm0_branch" %}

  ins_encode %{
    __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true);
  %}

  ins_pipe(pipe_cmpz_branch);
%}

instruct far_cmpUEqNeLeGt_reg_imm0_loop(cmpOpUEqNeLeGt cmp, iRegI op1, immI0 zero, label lbl)
%{
  match(CountedLoopEnd cmp (CmpU op1 zero));

  effect(USE op1, USE lbl);

  ins_cost(BRANCH_COST * 2);

  format %{ "far_b$cmp  $op1, zr, $lbl\t#@far_cmpUEqNeLeGt_reg_imm0_loop" %}

  ins_encode %{
    __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true);
  %}

  ins_pipe(pipe_cmpz_branch);
%}

// compare lt/ge unsigned instructs has no short instruct with same match
instruct far_cmpULtGe_reg_imm0_branch(cmpOpULtGe cmp, iRegI op1, immI0 zero, label lbl)
%{
  match(If cmp (CmpU op1 zero));

  effect(USE op1, USE lbl);

  ins_cost(BRANCH_COST);

  format %{ "j  $lbl if $cmp == ge\t#@far_cmpULtGe_reg_imm0_branch" %}

  ins_encode(riscv_enc_far_cmpULtGe_imm0_branch(cmp, op1, lbl));

  ins_pipe(pipe_cmpz_branch);
%}

instruct far_cmpULtGe_reg_imm0_loop(cmpOpULtGe cmp, iRegI op1, immI0 zero, label lbl)
%{
  match(CountedLoopEnd cmp (CmpU op1 zero));

  effect(USE op1, USE lbl);

  ins_cost(BRANCH_COST);

  format %{ "j  $lbl if $cmp == ge\t#@far_cmpULtGe_reg_imm0_loop" %}

  ins_encode(riscv_enc_far_cmpULtGe_imm0_branch(cmp, op1, lbl));

  ins_pipe(pipe_cmpz_branch);
%}

instruct far_cmpL_reg_imm0_branch(cmpOp cmp, iRegL op1, immL0 zero, label lbl)
%{
  match(If cmp (CmpL op1 zero));

  effect(USE op1, USE lbl);

  ins_cost(BRANCH_COST * 2);

  format %{ "far_b$cmp  $op1, zr, $lbl\t#@far_cmpL_reg_imm0_branch" %}

  ins_encode %{
    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label), /* is_far */ true);
  %}

  ins_pipe(pipe_cmpz_branch);
%}

instruct far_cmpL_reg_imm0_loop(cmpOp cmp, iRegL op1, immL0 zero, label lbl)
%{
  match(CountedLoopEnd cmp (CmpL op1 zero));

  effect(USE op1, USE lbl);

  ins_cost(BRANCH_COST * 2);

  format %{ "far_b$cmp  $op1, zr, $lbl\t#@far_cmpL_reg_imm0_loop" %}

  ins_encode %{
    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label), /* is_far */ true);
  %}

  ins_pipe(pipe_cmpz_branch);
%}

instruct far_cmpULEqNeLeGt_reg_imm0_branch(cmpOpUEqNeLeGt cmp, iRegL op1, immL0 zero, label lbl)
%{
  match(If cmp (CmpUL op1 zero));

  effect(USE op1, USE lbl);

  ins_cost(BRANCH_COST * 2);

  format %{ "far_b$cmp  $op1, zr, $lbl\t#@far_cmpULEqNeLeGt_reg_imm0_branch" %}

  ins_encode %{
    __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true);
  %}

  ins_pipe(pipe_cmpz_branch);
%}

instruct far_cmpULEqNeLeGt_reg_imm0_loop(cmpOpUEqNeLeGt cmp, iRegL op1, immL0 zero, label lbl)
%{
  match(CountedLoopEnd cmp (CmpUL op1 zero));

  effect(USE op1, USE lbl);

  ins_cost(BRANCH_COST * 2);

  format %{ "far_b$cmp  $op1, zr, $lbl\t#@far_cmpULEqNeLeGt_reg_imm0_loop" %}

  ins_encode %{
    __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true);
  %}

  ins_pipe(pipe_cmpz_branch);
%}

// compare lt/ge unsigned instructs has no short instruct with same match
instruct far_cmpULLtGe_reg_imm0_branch(cmpOpULtGe cmp, iRegL op1, immL0 zero, label lbl)
%{
  match(If cmp (CmpUL op1 zero));

  effect(USE op1, USE lbl);

  ins_cost(BRANCH_COST);

  format %{ "j  $lbl if $cmp == ge\t#@far_cmpULLtGe_reg_imm0_branch" %}

  ins_encode(riscv_enc_far_cmpULtGe_imm0_branch(cmp, op1, lbl));

  ins_pipe(pipe_cmpz_branch);
%}

instruct far_cmpULLtGe_reg_imm0_loop(cmpOpULtGe cmp, iRegL op1, immL0 zero, label lbl)
%{
  match(CountedLoopEnd cmp (CmpUL op1 zero));

  effect(USE op1, USE lbl);

  ins_cost(BRANCH_COST);

  format %{ "j  $lbl if $cmp == ge\t#@far_cmpULLtGe_reg_imm0_loop" %}

  ins_encode(riscv_enc_far_cmpULtGe_imm0_branch(cmp, op1, lbl));

  ins_pipe(pipe_cmpz_branch);
%}

instruct far_cmpP_imm0_branch(cmpOpEqNe cmp, iRegP op1, immP0 zero, label lbl) %{
  match(If cmp (CmpP op1 zero));
  effect(USE lbl);

  ins_cost(BRANCH_COST * 2);
  format %{ "far_b$cmp   $op1, zr, $lbl\t#@far_cmpP_imm0_branch" %}

  ins_encode %{
    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true);
  %}

  ins_pipe(pipe_cmpz_branch);
%}

instruct far_cmpP_imm0_loop(cmpOpEqNe cmp, iRegP op1, immP0 zero, label lbl) %{
  match(CountedLoopEnd cmp (CmpP op1 zero));
  effect(USE lbl);

  ins_cost(BRANCH_COST * 2);
  format %{ "far_b$cmp   $op1, zr, $lbl\t#@far_cmpP_imm0_loop" %}

  ins_encode %{
    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true);
  %}

  ins_pipe(pipe_cmpz_branch);
%}

instruct far_cmpN_imm0_branch(cmpOpEqNe cmp, iRegN op1, immN0 zero, label lbl) %{
  match(If cmp (CmpN op1 zero));
  effect(USE lbl);

  ins_cost(BRANCH_COST * 2);

  format %{ "far_b$cmp  $op1, zr, $lbl\t#@far_cmpN_imm0_branch" %}

  ins_encode %{
    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true);
  %}

  ins_pipe(pipe_cmpz_branch);
%}

instruct far_cmpN_imm0_loop(cmpOpEqNe cmp, iRegN op1, immN0 zero, label lbl) %{
  match(CountedLoopEnd cmp (CmpN op1 zero));
  effect(USE lbl);

  ins_cost(BRANCH_COST * 2);

  format %{ "far_b$cmp  $op1, zr, $lbl\t#@far_cmpN_imm0_loop" %}

  ins_encode %{
    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true);
  %}

  ins_pipe(pipe_cmpz_branch);
%}

instruct far_cmpP_narrowOop_imm0_branch(cmpOpEqNe cmp, iRegN op1, immP0 zero, label lbl) %{
  match(If cmp (CmpP (DecodeN op1) zero));
  effect(USE lbl);

  ins_cost(BRANCH_COST * 2);
  format %{ "far_b$cmp   $op1, zr, $lbl\t#@far_cmpP_narrowOop_imm0_branch" %}

  ins_encode %{
    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true);
  %}

  ins_pipe(pipe_cmpz_branch);
%}

instruct far_cmpP_narrowOop_imm0_loop(cmpOpEqNe cmp, iRegN op1, immP0 zero, label lbl) %{
  match(CountedLoopEnd cmp (CmpP (DecodeN op1) zero));
  effect(USE lbl);

  ins_cost(BRANCH_COST * 2);
  format %{ "far_b$cmp   $op1, zr, $lbl\t#@far_cmpP_narrowOop_imm0_loop" %}

  ins_encode %{
    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true);
  %}

  ins_pipe(pipe_cmpz_branch);
%}

// ============================================================================
// Conditional Move Instructions
instruct cmovI_cmpI(iRegINoSp dst, iRegI src, iRegI op1, iRegI op2, cmpOp cop) %{
  match(Set dst (CMoveI (Binary cop (CmpI op1 op2)) (Binary dst src)));
  ins_cost(ALU_COST + BRANCH_COST);

  format %{
             "bneg$cop $op1, $op2, skip\t#@cmovI_cmpI\n\t"
             "mv $dst, $src\n\t"
             "skip:"
         %}

  ins_encode %{
    __ enc_cmove($cop$$cmpcode,
                 as_Register($op1$$reg), as_Register($op2$$reg),
                 as_Register($dst$$reg), as_Register($src$$reg));
  %}

  ins_pipe(pipe_slow);
%}

instruct cmovI_cmpU(iRegINoSp dst, iRegI src, iRegI op1, iRegI op2, cmpOpU cop) %{
  match(Set dst (CMoveI (Binary cop (CmpU op1 op2)) (Binary dst src)));
  ins_cost(ALU_COST + BRANCH_COST);

  format %{
             "bneg$cop $op1, $op2, skip\t#@cmovI_cmpU\n\t"
             "mv $dst, $src\n\t"
             "skip:"
         %}

  ins_encode %{
    __ enc_cmove($cop$$cmpcode | C2_MacroAssembler::unsigned_branch_mask,
                 as_Register($op1$$reg), as_Register($op2$$reg),
                 as_Register($dst$$reg), as_Register($src$$reg));
  %}

  ins_pipe(pipe_slow);
%}

instruct cmovI_cmpL(iRegINoSp dst, iRegI src, iRegL op1, iRegL op2, cmpOp cop) %{
  match(Set dst (CMoveI (Binary cop (CmpL op1 op2)) (Binary dst src)));
  ins_cost(ALU_COST + BRANCH_COST);

  format %{
             "bneg$cop $op1, $op2, skip\t#@cmovI_cmpL\n\t"
             "mv $dst, $src\n\t"
             "skip:"
         %}

  ins_encode %{
    __ enc_cmove($cop$$cmpcode,
                 as_Register($op1$$reg), as_Register($op2$$reg),
                 as_Register($dst$$reg), as_Register($src$$reg));
  %}

  ins_pipe(pipe_slow);
%}

instruct cmovL_cmpL(iRegLNoSp dst, iRegL src, iRegL op1, iRegL op2, cmpOp cop) %{
  match(Set dst (CMoveL (Binary cop (CmpL op1 op2)) (Binary dst src)));
  ins_cost(ALU_COST + BRANCH_COST);

  format %{
             "bneg$cop $op1, $op2, skip\t#@cmovL_cmpL\n\t"
             "mv $dst, $src\n\t"
             "skip:"
         %}

  ins_encode %{
    __ enc_cmove($cop$$cmpcode,
                 as_Register($op1$$reg), as_Register($op2$$reg),
                 as_Register($dst$$reg), as_Register($src$$reg));
  %}

  ins_pipe(pipe_slow);
%}

instruct cmovL_cmpUL(iRegLNoSp dst, iRegL src, iRegL op1, iRegL op2, cmpOpU cop) %{
  match(Set dst (CMoveL (Binary cop (CmpUL op1 op2)) (Binary dst src)));
  ins_cost(ALU_COST + BRANCH_COST);

  format %{
             "bneg$cop $op1, $op2, skip\t#@cmovL_cmpUL\n\t"
             "mv $dst, $src\n\t"
             "skip:"
         %}

  ins_encode %{
    __ enc_cmove($cop$$cmpcode | C2_MacroAssembler::unsigned_branch_mask,
                 as_Register($op1$$reg), as_Register($op2$$reg),
                 as_Register($dst$$reg), as_Register($src$$reg));
  %}

  ins_pipe(pipe_slow);
%}

instruct cmovI_cmpUL(iRegINoSp dst, iRegI src, iRegL op1, iRegL op2, cmpOpU cop) %{
  match(Set dst (CMoveI (Binary cop (CmpUL op1 op2)) (Binary dst src)));
  ins_cost(ALU_COST + BRANCH_COST);
  format %{
             "bneg$cop $op1, $op2\t#@cmovI_cmpUL\n\t"
             "mv $dst, $src\n\t"
             "skip:"
         %}

  ins_encode %{
    __ enc_cmove($cop$$cmpcode | C2_MacroAssembler::unsigned_branch_mask,
                 as_Register($op1$$reg), as_Register($op2$$reg),
                 as_Register($dst$$reg), as_Register($src$$reg));
  %}

  ins_pipe(pipe_slow);
%}

// ============================================================================
// Procedure Call/Return Instructions

// Call Java Static Instruction
// Note: If this code changes, the corresponding ret_addr_offset() and
//       compute_padding() functions will have to be adjusted.
instruct CallStaticJavaDirect(method meth)
%{
  match(CallStaticJava);

  effect(USE meth);

  ins_cost(BRANCH_COST);

  format %{ "CALL,static $meth\t#@CallStaticJavaDirect" %}

  ins_encode(riscv_enc_java_static_call(meth),
             riscv_enc_call_epilog);

  ins_pipe(pipe_class_call);
  ins_alignment(4);
%}

// TO HERE

// Call Java Dynamic Instruction
// Note: If this code changes, the corresponding ret_addr_offset() and
//       compute_padding() functions will have to be adjusted.
instruct CallDynamicJavaDirect(method meth, rFlagsReg cr)
%{
  match(CallDynamicJava);

  effect(USE meth, KILL cr);

  ins_cost(BRANCH_COST + ALU_COST * 6);

  format %{ "CALL,dynamic $meth\t#@CallDynamicJavaDirect" %}

  ins_encode(riscv_enc_java_dynamic_call(meth),
             riscv_enc_call_epilog);

  ins_pipe(pipe_class_call);
  ins_alignment(4);
%}

// Call Runtime Instruction

instruct CallRuntimeDirect(method meth, rFlagsReg cr)
%{
  match(CallRuntime);

  effect(USE meth, KILL cr);

  ins_cost(BRANCH_COST);

  format %{ "CALL, runtime $meth\t#@CallRuntimeDirect" %}

  ins_encode(riscv_enc_java_to_runtime(meth));

  ins_pipe(pipe_class_call);
%}

// Call Runtime Instruction

instruct CallLeafDirect(method meth, rFlagsReg cr)
%{
  match(CallLeaf);

  effect(USE meth, KILL cr);

  ins_cost(BRANCH_COST);

  format %{ "CALL, runtime leaf $meth\t#@CallLeafDirect" %}

  ins_encode(riscv_enc_java_to_runtime(meth));

  ins_pipe(pipe_class_call);
%}

// Call Runtime Instruction

instruct CallLeafNoFPDirect(method meth, rFlagsReg cr)
%{
  match(CallLeafNoFP);

  effect(USE meth, KILL cr);

  ins_cost(BRANCH_COST);

  format %{ "CALL, runtime leaf nofp $meth\t#@CallLeafNoFPDirect" %}

  ins_encode(riscv_enc_java_to_runtime(meth));

  ins_pipe(pipe_class_call);
%}

// ============================================================================
// Partial Subtype Check
//
// superklass array for an instance of the superklass.  Set a hidden
// internal cache on a hit (cache is checked with exposed code in
// gen_subtype_check()).  Return zero for a hit.  The encoding
// ALSO sets flags.

instruct partialSubtypeCheck(iRegP_R15 result, iRegP_R14 sub, iRegP_R10 super, iRegP_R12 tmp, rFlagsReg cr)
%{
  match(Set result (PartialSubtypeCheck sub super));
  effect(KILL tmp, KILL cr);

  ins_cost(2 * STORE_COST + 3 * LOAD_COST + 4 * ALU_COST + BRANCH_COST * 4);
  format %{ "partialSubtypeCheck $result, $sub, $super\t#@partialSubtypeCheck" %}

  ins_encode(riscv_enc_partial_subtype_check(sub, super, tmp, result));

  opcode(0x1); // Force zero of result reg on hit

  ins_pipe(pipe_class_memory);
%}

instruct partialSubtypeCheckVsZero(iRegP_R15 result, iRegP_R14 sub, iRegP_R10 super, iRegP_R12 tmp,
                                   immP0 zero, rFlagsReg cr)
%{
  match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
  effect(KILL tmp, KILL result);

  ins_cost(2 * STORE_COST + 3 * LOAD_COST + 4 * ALU_COST + BRANCH_COST * 4);
  format %{ "partialSubtypeCheck $result, $sub, $super == 0\t#@partialSubtypeCheckVsZero" %}

  ins_encode(riscv_enc_partial_subtype_check(sub, super, tmp, result));

  opcode(0x0); // Don't zero result reg on hit

  ins_pipe(pipe_class_memory);
%}

instruct string_compareU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
                         iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, rFlagsReg cr)
%{
  predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UU);
  match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
  effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);

  format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareU" %}
  ins_encode %{
    // Count is in 8-bit bytes; non-Compact chars are 16 bits.
    __ string_compare($str1$$Register, $str2$$Register,
                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
                      $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
                      StrIntrinsicNode::UU);
  %}
  ins_pipe(pipe_class_memory);
%}

instruct string_compareL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
                         iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, rFlagsReg cr)
%{
  predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LL);
  match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
  effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);

  format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareL" %}
  ins_encode %{
    __ string_compare($str1$$Register, $str2$$Register,
                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
                      $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
                      StrIntrinsicNode::LL);
  %}
  ins_pipe(pipe_class_memory);
%}

instruct string_compareUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
                          iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, rFlagsReg cr)
%{
  predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UL);
  match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
  effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);

  format %{"String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareUL" %}
  ins_encode %{
    __ string_compare($str1$$Register, $str2$$Register,
                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
                      $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
                      StrIntrinsicNode::UL);
  %}
  ins_pipe(pipe_class_memory);
%}

instruct string_compareLU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
                          iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3,
                          rFlagsReg cr)
%{
  predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LU);
  match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
  effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);

  format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareLU" %}
  ins_encode %{
    __ string_compare($str1$$Register, $str2$$Register,
                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
                      $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
                      StrIntrinsicNode::LU);
  %}
  ins_pipe(pipe_class_memory);
%}

instruct string_indexofUU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
                          iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
                          iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg cr)
%{
  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP_DEF result,
         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr);

  format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UU)" %}
  ins_encode %{
    __ string_indexof($str1$$Register, $str2$$Register,
                      $cnt1$$Register, $cnt2$$Register,
                      $tmp1$$Register, $tmp2$$Register,
                      $tmp3$$Register, $tmp4$$Register,
                      $tmp5$$Register, $tmp6$$Register,
                      $result$$Register, StrIntrinsicNode::UU);
  %}
  ins_pipe(pipe_class_memory);
%}

instruct string_indexofLL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
                          iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
                          iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg cr)
%{
  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP_DEF result,
         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr);

  format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (LL)" %}
  ins_encode %{
    __ string_indexof($str1$$Register, $str2$$Register,
                      $cnt1$$Register, $cnt2$$Register,
                      $tmp1$$Register, $tmp2$$Register,
                      $tmp3$$Register, $tmp4$$Register,
                      $tmp5$$Register, $tmp6$$Register,
                      $result$$Register, StrIntrinsicNode::LL);
  %}
  ins_pipe(pipe_class_memory);
%}

instruct string_indexofUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
                          iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
                          iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg cr)
%{
  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP_DEF result,
         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr);
  format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UL)" %}

  ins_encode %{
    __ string_indexof($str1$$Register, $str2$$Register,
                      $cnt1$$Register, $cnt2$$Register,
                      $tmp1$$Register, $tmp2$$Register,
                      $tmp3$$Register, $tmp4$$Register,
                      $tmp5$$Register, $tmp6$$Register,
                      $result$$Register, StrIntrinsicNode::UL);
  %}
  ins_pipe(pipe_class_memory);
%}

instruct string_indexof_conUU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2,
                              immI_le_4 int_cnt2, iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2,
                              iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
%{
  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, TEMP_DEF result,
         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);

  format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UU)" %}

  ins_encode %{
    int icnt2 = (int)$int_cnt2$$constant;
    __ string_indexof_linearscan($str1$$Register, $str2$$Register,
                                 $cnt1$$Register, zr,
                                 $tmp1$$Register, $tmp2$$Register,
                                 $tmp3$$Register, $tmp4$$Register,
                                 icnt2, $result$$Register, StrIntrinsicNode::UU);
  %}
  ins_pipe(pipe_class_memory);
%}

instruct string_indexof_conLL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2,
                              immI_le_4 int_cnt2, iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2,
                              iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
%{
  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, TEMP_DEF result,
         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);

  format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (LL)" %}
  ins_encode %{
    int icnt2 = (int)$int_cnt2$$constant;
    __ string_indexof_linearscan($str1$$Register, $str2$$Register,
                                 $cnt1$$Register, zr,
                                 $tmp1$$Register, $tmp2$$Register,
                                 $tmp3$$Register, $tmp4$$Register,
                                 icnt2, $result$$Register, StrIntrinsicNode::LL);
  %}
  ins_pipe(pipe_class_memory);
%}

instruct string_indexof_conUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2,
                              immI_1 int_cnt2, iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2,
                              iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
%{
  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, TEMP_DEF result,
         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);

  format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UL)" %}
  ins_encode %{
    int icnt2 = (int)$int_cnt2$$constant;
    __ string_indexof_linearscan($str1$$Register, $str2$$Register,
                                 $cnt1$$Register, zr,
                                 $tmp1$$Register, $tmp2$$Register,
                                 $tmp3$$Register, $tmp4$$Register,
                                 icnt2, $result$$Register, StrIntrinsicNode::UL);
  %}
  ins_pipe(pipe_class_memory);
%}

instruct stringU_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch,
                              iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2,
                              iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
%{
  match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
  predicate(!UseRVV && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
  effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP_DEF result,
         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);

  format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result" %}
  ins_encode %{
    __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register,
                           $result$$Register, $tmp1$$Register, $tmp2$$Register,
                           $tmp3$$Register, $tmp4$$Register, false /* isU */);
  %}
  ins_pipe(pipe_class_memory);
%}

instruct stringL_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch,
                              iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2,
                              iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
%{
  match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
  predicate(!UseRVV && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
  effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP_DEF result,
         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);

  format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result" %}
  ins_encode %{
    __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register,
                           $result$$Register, $tmp1$$Register, $tmp2$$Register,
                           $tmp3$$Register, $tmp4$$Register, true /* isL */);
  %}
  ins_pipe(pipe_class_memory);
%}

// clearing of an array
instruct clearArray_reg_reg(iRegL_R29 cnt, iRegP_R28 base, iRegP_R30 tmp1,
                            iRegP_R31 tmp2, Universe dummy)
%{
  // temp registers must match the one used in StubGenerator::generate_zero_blocks()
  predicate(UseBlockZeroing || !UseRVV);
  match(Set dummy (ClearArray cnt base));
  effect(USE_KILL cnt, USE_KILL base, TEMP tmp1, TEMP tmp2);

  ins_cost(4 * DEFAULT_COST);
  format %{ "ClearArray $cnt, $base\t#@clearArray_reg_reg" %}

  ins_encode %{
    address tpc = __ zero_words($base$$Register, $cnt$$Register);
    if (tpc == NULL) {
      ciEnv::current()->record_failure("CodeCache is full");
      return;
    }
  %}

  ins_pipe(pipe_class_memory);
%}

instruct clearArray_imm_reg(immL cnt, iRegP_R28 base, Universe dummy, rFlagsReg cr)
%{
  predicate(!UseRVV && (uint64_t)n->in(2)->get_long()
            < (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord));
  match(Set dummy (ClearArray cnt base));
  effect(USE_KILL base, KILL cr);

  ins_cost(4 * DEFAULT_COST);
  format %{ "ClearArray $cnt, $base\t#@clearArray_imm_reg" %}

  ins_encode %{
    __ zero_words($base$$Register, (uint64_t)$cnt$$constant);
  %}

  ins_pipe(pipe_class_memory);
%}

instruct string_equalsL(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt,
                        iRegI_R10 result, rFlagsReg cr)
%{
  predicate(!UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL);
  match(Set result (StrEquals (Binary str1 str2) cnt));
  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);

  format %{ "String Equals $str1, $str2, $cnt -> $result\t#@string_equalsL" %}
  ins_encode %{
    // Count is in 8-bit bytes; non-Compact chars are 16 bits.
    __ string_equals($str1$$Register, $str2$$Register,
                     $result$$Register, $cnt$$Register, 1);
  %}
  ins_pipe(pipe_class_memory);
%}

instruct string_equalsU(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt,
                        iRegI_R10 result, rFlagsReg cr)
%{
  predicate(!UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU);
  match(Set result (StrEquals (Binary str1 str2) cnt));
  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);

  format %{ "String Equals $str1, $str2, $cnt -> $result\t#@string_equalsU" %}
  ins_encode %{
    // Count is in 8-bit bytes; non-Compact chars are 16 bits.
    __ string_equals($str1$$Register, $str2$$Register,
                     $result$$Register, $cnt$$Register, 2);
  %}
  ins_pipe(pipe_class_memory);
%}

instruct array_equalsB(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result,
                       iRegP_R13 tmp1, iRegP_R14 tmp2, iRegP_R15 tmp3,
                       iRegP_R16 tmp4, iRegP_R28 tmp5, rFlagsReg cr)
%{
  predicate(!UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
  match(Set result (AryEq ary1 ary2));
  effect(USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp5, KILL cr);

  format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsB // KILL $tmp5" %}
  ins_encode %{
    __ arrays_equals($ary1$$Register, $ary2$$Register,
                     $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register,
                     $result$$Register, $tmp5$$Register, 1);
  %}
  ins_pipe(pipe_class_memory);
%}

instruct array_equalsC(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result,
                       iRegP_R13 tmp1, iRegP_R14 tmp2, iRegP_R15 tmp3,
                       iRegP_R16 tmp4, iRegP_R28 tmp5, rFlagsReg cr)
%{
  predicate(!UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
  match(Set result (AryEq ary1 ary2));
  effect(USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp5, KILL cr);

  format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsC // KILL $tmp5" %}
  ins_encode %{
    __ arrays_equals($ary1$$Register, $ary2$$Register,
                     $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register,
                     $result$$Register, $tmp5$$Register, 2);
  %}
  ins_pipe(pipe_class_memory);
%}

// ============================================================================
// Safepoint Instructions

instruct safePoint(iRegP poll)
%{
  match(SafePoint poll);

  ins_cost(2 * LOAD_COST);
  format %{
    "lwu zr, [$poll]\t# Safepoint: poll for GC, #@safePoint"
  %}
  ins_encode %{
    __ read_polling_page(as_Register($poll$$reg), 0, relocInfo::poll_type);
  %}
  ins_pipe(pipe_serial); // ins_pipe(iload_reg_mem);
%}

// ============================================================================
// This name is KNOWN by the ADLC and cannot be changed.
// The ADLC forces a 'TypeRawPtr::BOTTOM' output type
// for this guy.
instruct tlsLoadP(javaThread_RegP dst)
%{
  match(Set dst (ThreadLocal));

  ins_cost(0);

  format %{ " -- \t// $dst=Thread::current(), empty, #@tlsLoadP" %}

  size(0);

  ins_encode( /*empty*/ );

  ins_pipe(pipe_class_empty);
%}

// inlined locking and unlocking
// using t1 as the 'flag' register to bridge the BoolNode producers and consumers
instruct cmpFastLock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp1, iRegPNoSp tmp2)
%{
  match(Set cr (FastLock object box));
  effect(TEMP tmp1, TEMP tmp2);

  ins_cost(LOAD_COST * 2 + STORE_COST * 3 + ALU_COST * 6 + BRANCH_COST * 3);
  format %{ "fastlock $object,$box\t! kills $tmp1,$tmp2, #@cmpFastLock" %}

  ins_encode(riscv_enc_fast_lock(object, box, tmp1, tmp2));

  ins_pipe(pipe_serial);
%}

// using t1 as the 'flag' register to bridge the BoolNode producers and consumers
instruct cmpFastUnlock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp1, iRegPNoSp tmp2)
%{
  match(Set cr (FastUnlock object box));
  effect(TEMP tmp1, TEMP tmp2);

  ins_cost(LOAD_COST * 2 + STORE_COST + ALU_COST * 2 + BRANCH_COST * 4);
  format %{ "fastunlock $object,$box\t! kills $tmp1, $tmp2, #@cmpFastUnlock" %}

  ins_encode(riscv_enc_fast_unlock(object, box, tmp1, tmp2));

  ins_pipe(pipe_serial);
%}

// Tail Call; Jump from runtime stub to Java code.
// Also known as an 'interprocedural jump'.
// Target of jump will eventually return to caller.
// TailJump below removes the return address.
instruct TailCalljmpInd(iRegPNoSp jump_target, inline_cache_RegP method_oop)
%{
  match(TailCall jump_target method_oop);

  ins_cost(BRANCH_COST);

  format %{ "jalr $jump_target\t# $method_oop holds method oop, #@TailCalljmpInd." %}

  ins_encode(riscv_enc_tail_call(jump_target));

  ins_pipe(pipe_class_call);
%}

instruct TailjmpInd(iRegPNoSp jump_target, iRegP_R10 ex_oop)
%{
  match(TailJump jump_target ex_oop);

  ins_cost(ALU_COST + BRANCH_COST);

  format %{ "jalr $jump_target\t# $ex_oop holds exception oop, #@TailjmpInd." %}

  ins_encode(riscv_enc_tail_jmp(jump_target));

  ins_pipe(pipe_class_call);
%}

// Create exception oop: created by stack-crawling runtime code.
// Created exception is now available to this handler, and is setup
// just prior to jumping to this handler. No code emitted.
instruct CreateException(iRegP_R10 ex_oop)
%{
  match(Set ex_oop (CreateEx));

  ins_cost(0);
  format %{ " -- \t// exception oop; no code emitted, #@CreateException" %}

  size(0);

  ins_encode( /*empty*/ );

  ins_pipe(pipe_class_empty);
%}

// Rethrow exception: The exception oop will come in the first
// argument position. Then JUMP (not call) to the rethrow stub code.
instruct RethrowException()
%{
  match(Rethrow);

  ins_cost(BRANCH_COST);

  format %{ "j rethrow_stub\t#@RethrowException" %}

  ins_encode(riscv_enc_rethrow());

  ins_pipe(pipe_class_call);
%}

// Return Instruction
// epilog node loads ret address into ra as part of frame pop
instruct Ret()
%{
  match(Return);

  ins_cost(BRANCH_COST);
  format %{ "ret\t// return register, #@Ret" %}

  ins_encode(riscv_enc_ret());

  ins_pipe(pipe_branch);
%}

// Die now.
instruct ShouldNotReachHere() %{
  match(Halt);

  ins_cost(BRANCH_COST);

  format %{ "#@ShouldNotReachHere" %}

  ins_encode %{
    if (is_reachable()) {
      __ stop(_halt_reason);
    }
  %}

  ins_pipe(pipe_class_default);
%}

//----------PEEPHOLE RULES-----------------------------------------------------
// These must follow all instruction definitions as they use the names
// defined in the instructions definitions.
//
// peepmatch ( root_instr_name [preceding_instruction]* );
//
// peepconstraint %{
// (instruction_number.operand_name relational_op instruction_number.operand_name
//  [, ...] );
// // instruction numbers are zero-based using left to right order in peepmatch
//
// peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
// // provide an instruction_number.operand_name for each operand that appears
// // in the replacement instruction's match rule
//
// ---------VM FLAGS---------------------------------------------------------
//
// All peephole optimizations can be turned off using -XX:-OptoPeephole
//
// Each peephole rule is given an identifying number starting with zero and
// increasing by one in the order seen by the parser.  An individual peephole
// can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
// on the command-line.
//
// ---------CURRENT LIMITATIONS----------------------------------------------
//
// Only match adjacent instructions in same basic block
// Only equality constraints
// Only constraints between operands, not (0.dest_reg == RAX_enc)
// Only one replacement instruction
//
//----------SMARTSPILL RULES---------------------------------------------------
// These must follow all instruction definitions as they use the names
// defined in the instructions definitions.

// Local Variables:
// mode: c++
// End: