/* * Copyright (c) 2016, 2022, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2016, 2022 SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions. *
*/
// Declaration and definition of StubGenerator (no .hpp file). // For a more detailed description of the stub routine structure // see the comment in stubRoutines.hpp.
// These static, partially const, variables are for the AES intrinsics. // They are declared/initialized here to make them available across function bodies.
#ifdefined(JIT_TIMER) staticconstint JIT_TIMER_space = 8; // extra space for JIT_TIMER data #else staticconstint JIT_TIMER_space = 0; #endif staticconstint AES_parmBlk_align = 32; // octoword alignment.
staticint AES_ctrVal_len = 0; // ctr init value len (in bytes), expected: length of dataBlk (16) staticint AES_ctrVec_len = 0; // # of ctr vector elements. That many block can be ciphered with one instruction execution staticint AES_ctrArea_len = 0; // reserved stack space (in bytes) for ctr (= ctrVal_len * ctrVec_len)
staticint AES_parmBlk_addspace = 0; // Must be multiple of AES_parmblk_align. // Will be set by stub generator to stub specific value. staticint AES_dataBlk_space = 0; // Must be multiple of AES_parmblk_align. // Will be set by stub generator to stub specific value.
// Calculate top_of_arguments_addr which will be tos (not prepushed) later. // Wimply use SP + frame::top_ijava_frame_size.
__ add2reg(r_top_of_arguments_addr,
frame::z_top_ijava_frame_abi_size - BytesPerWord, Z_SP);
// Any arguments to copy?
__ load_and_test_int2long(Z_R1, Address(r_entryframe_fp, d_arg_argument_count));
__ z_bre(arguments_copied);
// Prepare loop and copy arguments in reverse order.
{ // Calculate argument size in bytes.
__ z_sllg(r_argument_size_in_bytes, Z_R1, LogBytesPerWord);
// Get addr of first incoming Java argument.
__ z_lg(r_argument_addr, d_arg_argument_addr, r_entryframe_fp);
// Let r_argumentcopy_addr point to last outgoing Java argument.
__ add2reg(r_argumentcopy_addr, BytesPerWord, r_top_of_arguments_addr); // = Z_SP+160 effectively.
// Let r_argument_addr point to last incoming Java argument.
__ add2reg_with_index(r_argument_addr, -BytesPerWord,
r_argument_size_in_bytes, r_argument_addr);
// Now loop while Z_R1 > 0 and copy arguments.
{
Label next_argument;
__ bind(next_argument); // Mem-mem move.
__ z_mvc(0, BytesPerWord-1, r_argumentcopy_addr, 0, r_argument_addr);
__ add2reg(r_argument_addr, -BytesPerWord);
__ add2reg(r_argumentcopy_addr, BytesPerWord);
__ z_brct(Z_R1, next_argument);
}
} // End of argument copy loop.
BLOCK_COMMENT("call {");
{ // Call frame manager or native entry.
// // Register state on entry to frame manager / native entry: // // Z_ARG1 = r_top_of_arguments_addr - intptr_t *sender tos (prepushed) // Lesp = (SP) + copied_arguments_offset - 8 // Z_method - method // Z_thread - JavaThread* //
// Here, the usual SP is the initial_caller_sp.
__ z_lgr(Z_R10, Z_SP);
// Z_esp points to the slot below the last argument.
__ z_lgr(Z_esp, r_top_of_arguments_addr);
// // Stack on entry to frame manager / native entry: // // F0 [TOP_IJAVA_FRAME_ABI] // [outgoing Java arguments] // [ENTRY_FRAME_LOCALS] // F1 [C_FRAME] // ... //
// Do a light-weight C-call here, r_new_arg_entry holds the address // of the interpreter entry point (frame manager or native entry) // and save runtime-value of return_pc in return_address // (call by reference argument).
return_address = __ call_stub(r_new_arg_entry);
}
BLOCK_COMMENT("} call");
{
BLOCK_COMMENT("restore registers {"); // Returned from frame manager or native entry. // Now pop frame, process result, and return to caller.
// // Stack on exit from frame manager / native entry: // // F0 [ABI] // ... // [ENTRY_FRAME_LOCALS] // F1 [C_FRAME] // ... // // Just pop the topmost frame ... //
// Restore frame pointer.
__ z_lg(r_entryframe_fp, _z_abi(callers_sp), Z_SP); // Pop frame. Done here to minimize stalls.
__ pop_frame();
// Reload some volatile registers which we've spilled before the call // to frame manager / native entry. // Access all locals via frame pointer, because we know nothing about // the topmost frame's size.
__ z_lg(r_arg_result_addr, result_address_offset, r_entryframe_fp);
__ z_lg(r_arg_result_type, result_type_offset, r_entryframe_fp);
// // Stack on exit from call_stub: // // 0 [C_FRAME] // ... // // No call_stub frames left. //
// All non-volatiles have been restored at this point!!
//------------------------------------------------------------------------ // The following code makes some assumptions on the T_<type> enum values. // The enum is defined in globalDefinitions.hpp. // The validity of the assumptions is tested as far as possible. // The assigned values should not be shuffled // T_BOOLEAN==4 - lowest used enum value // T_NARROWOOP==16 - largest used enum value //------------------------------------------------------------------------
BLOCK_COMMENT("process result {");
Label firstHandler; int handlerLen= 8; #ifdef ASSERT char assertMsg[] = "check BasicType definition in globalDefinitions.hpp";
__ z_chi(r_arg_result_type, T_BOOLEAN);
__ asm_assert_low(assertMsg, 0x0234);
__ z_chi(r_arg_result_type, T_NARROWOOP);
__ asm_assert_high(assertMsg, 0x0235); #endif
__ add2reg(r_arg_result_type, -T_BOOLEAN); // Remove offset.
__ z_larl(Z_R1, firstHandler); // location of first handler
__ z_sllg(r_arg_result_type, r_arg_result_type, 3); // Each handler is 8 bytes long.
__ z_bc(MacroAssembler::bcondAlways, 0, r_arg_result_type, Z_R1);
// Return point for a Java call if there's an exception thrown in // Java code. The exception is caught and transformed into a // pending exception stored in JavaThread that can be tested from // within the VM.
address generate_catch_exception() {
StubCodeMark mark(this, "StubRoutines", "catch_exception");
__ z_stg(Z_ARG1, thread_(pending_exception)); // Store into `char *'.
__ z_stg(exception_file, thread_(exception_file)); // Store into `int'.
__ z_st(exception_line, thread_(exception_line));
// Complete return to VM.
assert(StubRoutines::_call_stub_return_address != NULL, "must have been generated before");
// Continue in call stub.
__ z_br(Z_ARG2);
return start;
}
// Continuation point for runtime calls returning with a pending // exception. The pending exception check happened in the runtime // or native call stub. The pending exception in Thread is // converted into a Java-level exception. // // Read: // Z_R14: pc the runtime library callee wants to return to. // Since the exception occurred in the callee, the return pc // from the point of view of Java is the exception pc. // // Invalidate: // Volatile registers (except below). // // Update: // Z_ARG1: exception // (Z_R14 is unchanged and is live out). //
address generate_forward_exception() {
StubCodeMark mark(this, "StubRoutines", "forward_exception");
address start = __ pc();
// Make sure that this code is only executed if there is a pending exception.
{
Label L;
__ z_ltgr(Z_ARG1, Z_ARG1);
__ z_brne(L);
__ stop("StubRoutines::forward exception: no pending exception (1)");
__ bind(L);
}
__ verify_oop(Z_ARG1, "StubRoutines::forward exception: not an oop"); #endif
// The exception pc is the return address in the caller, // must load it into Z_ARG2
__ z_lgr(Z_ARG2, Z_R14);
#ifdef ASSERT // Make sure exception is set.
{ Label L;
__ z_ltgr(Z_ARG1, Z_ARG1);
__ z_brne(L);
__ stop("StubRoutines::forward exception: no pending exception (2)");
__ bind(L);
} #endif // Clear the pending exception.
__ clear_mem(Address(Z_thread, pending_exception_offset), sizeof(void *)); // Jump to exception handler
__ z_br(Z_R1 /*handler address*/);
return start;
#undef pending_exception_offset
}
// Continuation point for throwing of implicit exceptions that are // not handled in the current activation. Fabricates an exception // oop and initiates normal exception dispatching in this // frame. Only callee-saved registers are preserved (through the // normal RegisterMap handling). If the compiler // needs all registers to be preserved between the fault point and // the exception handler then it must assume responsibility for that // in AbstractCompiler::continuation_for_implicit_null_exception or // continuation_for_implicit_division_by_zero_exception. All other // implicit exceptions (e.g., NullPointerException or // AbstractMethodError on entry) are either at call sites or // otherwise assume that stack unwinding will be initiated, so // caller saved registers were assumed volatile in the compiler.
// Note that we generate only this stub into a RuntimeStub, because // it needs to be properly traversed and ignored during GC, so we // change the meaning of the "__" macro within this method.
// Note: the routine set_pc_not_at_call_for_caller in // SharedRuntime.cpp requires that this code be generated into a // RuntimeStub. #undef __ #define __ masm->
address generate_throw_exception(constchar* name, address runtime_entry, bool restore_saved_exception_pc, Register arg1 = noreg, Register arg2 = noreg) {
assert_different_registers(arg1, Z_R0_scratch); // would be destroyed by push_frame()
assert_different_registers(arg2, Z_R0_scratch); // would be destroyed by push_frame()
int insts_size = 256; int locs_size = 0;
CodeBuffer code(name, insts_size, locs_size);
MacroAssembler* masm = new MacroAssembler(&code); int framesize_in_bytes;
address start = __ pc();
// Note that we always have a runtime stub frame on the top of stack at this point.
__ get_PC(Z_R1);
__ set_last_Java_frame(/*sp*/Z_SP, /*pc*/Z_R1);
// Do the call.
BLOCK_COMMENT("call runtime_entry");
__ call_VM_leaf(runtime_entry, Z_thread, arg1, arg2);
__ reset_last_Java_frame();
#ifdef ASSERT // Make sure that this code is only executed if there is a pending exception.
{ Label L;
__ z_lg(Z_R0,
in_bytes(Thread::pending_exception_offset()),
Z_thread);
__ z_ltgr(Z_R0, Z_R0);
__ z_brne(L);
__ stop("StubRoutines::throw_exception: no pending exception");
__ bind(L);
} #endif
// No args, but tmp registers that are killed. constRegister Rlength = Z_ARG4; // cache array length constRegister Rarray_ptr = Z_ARG5; // Current value from cache array.
if (UseCompressedOops) {
assert(Universe::heap() != NULL, "java heap must be initialized to generate partial_subtype_check stub");
}
// Always take the slow path.
__ check_klass_subtype_slow_path(Rsubklass, Rsuperklass,
Rarray_ptr, Rlength, NULL, &miss);
// Match falls through here.
__ clear_reg(Z_RET); // Zero indicates a match. Set EQ flag in CC.
__ z_br(Z_R14);
__ BIND(miss);
__ load_const_optimized(Z_RET, 1); // One indicates a miss.
__ z_ltgr(Z_RET, Z_RET); // Set NE flag in CR.
__ z_br(Z_R14);
return start;
}
#if !defined(PRODUCT) // Wrapper which calls oopDesc::is_oop_or_null() // Only called by MacroAssembler::verify_oop staticvoid verify_oop_helper(constchar* message, oopDesc* o) { if (!oopDesc::is_oop_or_null(o)) {
fatal("%s. oop: " PTR_FORMAT, message, p2i(o));
}
++ StubRoutines::_verify_oop_count;
} #endif
// Return address of code to be called from code generated by // MacroAssembler::verify_oop. // // Don't generate, rather use C++ code.
address generate_verify_oop_subroutine() { // Don't generate a StubCodeMark, because no code is generated! // Generating the mark triggers notifying the oprofile jvmti agent // about the dynamic code generation, but the stub without // code (code_size == 0) confuses opjitconv // StubCodeMark mark(this, "StubRoutines", "verify_oop_stub");
// This is to test that the count register contains a positive int value. // Required because C2 does not respect int to long conversion for stub calls. void assert_positive_int(Register count) { #ifdef ASSERT
__ z_srag(Z_R0, count, 31); // Just leave the sign (must be zero) in Z_R0.
__ asm_assert_eq("missing zero extend", 0xAFFE); #endif
}
// Generate overlap test for array copy stubs. // If no actual overlap is detected, control is transferred to the // "normal" copy stub (entry address passed in disjoint_copy_target). // Otherwise, execution continues with the code generated by the // caller of array_overlap_test. // // Input: // Z_ARG1 - from // Z_ARG2 - to // Z_ARG3 - element count void array_overlap_test(address disjoint_copy_target, int log2_elem_size) {
__ MacroAssembler::compare_and_branch_optimized(Z_ARG2, Z_ARG1, Assembler::bcondNotHigh,
disjoint_copy_target, /*len64=*/true, /*has_sign=*/false);
Register index = Z_ARG3; if (log2_elem_size > 0) {
__ z_sllg(Z_R1, Z_ARG3, log2_elem_size); // byte count
index = Z_R1;
}
__ add2reg_with_index(Z_R1, 0, index, Z_ARG1); // First byte after "from" range.
// Destructive overlap: let caller generate code for that.
}
// Generate stub for disjoint array copy. If "aligned" is true, the // "from" and "to" addresses are assumed to be heapword aligned. // // Arguments for generated stub: // from: Z_ARG1 // to: Z_ARG2 // count: Z_ARG3 treated as signed void generate_disjoint_copy(bool aligned, int element_size, bool branchToEnd, bool restoreArgs) { // This is the zarch specific stub generator for general array copy tasks. // It has the following prereqs and features: // // - No destructive overlap allowed (else unpredictable results). // - Destructive overlap does not exist if the leftmost byte of the target // does not coincide with any of the source bytes (except the leftmost). // // Register usage upon entry: // Z_ARG1 == Z_R2 : address of source array // Z_ARG2 == Z_R3 : address of target array // Z_ARG3 == Z_R4 : length of operands (# of elements on entry) // // Register usage within the generator: // - Z_R0 and Z_R1 are KILLed by the stub routine (target addr/len). // Used as pair register operand in complex moves, scratch registers anyway. // - Z_R5 is KILLed by the stub routine (source register pair addr/len) (even/odd reg). // Same as R0/R1, but no scratch register. // - Z_ARG1, Z_ARG2, Z_ARG3 are USEd but preserved by the stub routine, // but they might get temporarily overwritten.
Register save_reg = Z_ARG4; // (= Z_R5), holds original target operand address for restore.
{ Register llen_reg = Z_R1; // Holds left operand len (odd reg). Register laddr_reg = Z_R0; // Holds left operand addr (even reg), overlaps with data_reg. Register rlen_reg = Z_R5; // Holds right operand len (odd reg), overlaps with save_reg. Register raddr_reg = Z_R4; // Holds right operand addr (even reg), overlaps with len_reg.
Register data_reg = Z_R0; // Holds copied data chunk in alignment process and copy loop. Register len_reg = Z_ARG3; // Holds operand len (#elements at entry, #bytes shortly after). Register dst_reg = Z_ARG2; // Holds left (target) operand addr. Register src_reg = Z_ARG1; // Holds right (source) operand addr.
assert((element_size<=256) && (256%element_size == 0), "element size must be <= 256, power of 2"); unsignedint log2_size = exact_log2(element_size);
switch (element_size) { case 1: BLOCK_COMMENT("ARRAYCOPY DISJOINT byte {"); break; case 2: BLOCK_COMMENT("ARRAYCOPY DISJOINT short {"); break; case 4: BLOCK_COMMENT("ARRAYCOPY DISJOINT int {"); break; case 8: BLOCK_COMMENT("ARRAYCOPY DISJOINT long {"); break; default: BLOCK_COMMENT("ARRAYCOPY DISJOINT {"); break;
}
assert_positive_int(len_reg);
BLOCK_COMMENT("preparation {");
// No copying if len <= 0. if (branchToEnd) {
__ compare64_and_branch(len_reg, (intptr_t) 0, Assembler::bcondNotHigh, done);
} else { if (VM_Version::has_CompareBranch()) {
__ z_cgib(len_reg, 0, Assembler::bcondNotHigh, 0, Z_R14);
} else {
__ z_ltgr(len_reg, len_reg);
__ z_bcr(Assembler::bcondNotPositive, Z_R14);
}
}
// Prefetch just one cache line. Speculative opt for short arrays. // Do not use Z_R1 in prefetch. Is undefined here. if (VM_Version::has_Prefetch()) {
__ z_pfd(0x01, 0, Z_R0, src_reg); // Fetch access.
__ z_pfd(0x02, 0, Z_R0, dst_reg); // Store access.
}
BLOCK_COMMENT("} preparation");
// Save args only if really needed. // Keep len test local to branch. Is generated only once.
BLOCK_COMMENT("mode selection {");
// Special handling for arrays with only a few elements. // Nothing fancy: just an executed MVC. if (log2_size > 0) {
__ z_sllg(Z_R1, len_reg, log2_size); // Remember #bytes in Z_R1.
} if (element_size != 8) {
__ z_cghi(len_reg, 256/element_size);
__ z_brnh(doMVC);
usedMVC = true;
} if (element_size == 8) { // Long and oop arrays are always aligned.
__ z_cghi(len_reg, 256/element_size);
__ z_brnh(doMVCUnrolled);
usedMVCUnrolled = true;
}
// Prefetch another cache line. We, for sure, have more than one line to copy. if (VM_Version::has_Prefetch()) {
__ z_pfd(0x01, 256, Z_R0, src_reg); // Fetch access.
__ z_pfd(0x02, 256, Z_R0, dst_reg); // Store access.
}
if (restoreArgs) { // Remember entry value of ARG2 to restore all arguments later from that knowledge.
__ z_lgr(save_reg, dst_reg);
}
__ z_cghi(len_reg, 4096/element_size); if (log2_size == 0) {
__ z_lgr(Z_R1, len_reg); // Init Z_R1 with #bytes
}
__ z_brnh(doMVCLOOP);
// Fall through to MVCLE case.
BLOCK_COMMENT("} mode selection");
// MVCLE: for long arrays // DW aligned: Best performance for sizes > 4kBytes. // unaligned: Least complex for sizes > 256 bytes. if (usedMVCLE) {
BLOCK_COMMENT("mode MVCLE {");
__ MacroAssembler::move_long_ext(laddr_reg, raddr_reg, 0xb0); // special: bypass cache // __ MacroAssembler::move_long_ext(laddr_reg, raddr_reg, 0xb8); // special: Hold data in cache. // __ MacroAssembler::move_long_ext(laddr_reg, raddr_reg, 0);
if (restoreArgs) { // MVCLE updates the source (Z_R4,Z_R5) and target (Z_R0,Z_R1) register pairs. // Dst_reg (Z_ARG2) and src_reg (Z_ARG1) are left untouched. No restore required. // Len_reg (Z_ARG3) is destroyed and must be restored.
__ z_slgr(laddr_reg, dst_reg); // copied #bytes if (log2_size > 0) {
__ z_srag(Z_ARG3, laddr_reg, log2_size); // Convert back to #elements.
} else {
__ z_lgr(Z_ARG3, laddr_reg);
}
} if (branchToEnd) {
__ z_bru(done);
} else {
__ z_br(Z_R14);
}
BLOCK_COMMENT("} mode MVCLE");
} // No fallthru possible here.
// MVCUnrolled: for short, aligned arrays.
if (usedMVCUnrolled) {
BLOCK_COMMENT("mode MVC unrolled {");
stride = 8;
// Generate unrolled MVC instructions. for (int ii = 32; ii > 1; ii--) {
__ z_mvc(0, ii * stride-1, dst_reg, 0, src_reg); // ii*8 byte copy if (branchToEnd) {
__ z_bru(done);
} else {
__ z_br(Z_R14);
}
}
// This is an absolute fast path: // - Array len in bytes must be not greater than 256. // - Array len in bytes must be an integer mult of DW // to save expensive handling of trailing bytes. // - Argument restore is not done, // i.e. previous code must not alter arguments (this code doesn't either).
__ bind(doMVCUnrolled);
// Avoid mul, prefer shift where possible. // Combine shift right (for #DW) with shift left (for block size). // Set CC for zero test below (asm_assert). // Note: #bytes comes in Z_R1, #DW in len_reg. unsignedint MVCblocksize = pcMVCblock_e - pcMVCblock_b; unsignedint logMVCblocksize = 0xffffffffU; // Pacify compiler ("used uninitialized" warning).
if (log2_size > 0) { // Len was scaled into Z_R1. switch (MVCblocksize) {
case 8: logMVCblocksize = 3;
__ z_ltgr(Z_R0, Z_R1); // #bytes is index break; // reasonable size, use shift
case 16: logMVCblocksize = 4;
__ z_slag(Z_R0, Z_R1, logMVCblocksize-log2_size); break; // reasonable size, use shift
default: logMVCblocksize = 0;
__ z_ltgr(Z_R0, len_reg); // #DW for mul break; // all other sizes: use mul
}
} else {
guarantee(log2_size, "doMVCUnrolled: only for DW entities");
}
// This test (and branch) is redundant. Previous code makes sure that // - element count > 0 // - element size == 8. // Thus, len reg should never be zero here. We insert an asm_assert() here, // just to double-check and to be on the safe side.
__ asm_assert(false, "zero len cannot occur", 99);
__ z_larl(Z_R1, MVC_ListEnd); // Get addr of last instr block. // Avoid mul, prefer shift where possible. if (logMVCblocksize == 0) {
__ z_mghi(Z_R0, MVCblocksize);
}
__ z_slgr(Z_R1, Z_R0);
__ z_br(Z_R1);
BLOCK_COMMENT("} mode MVC unrolled");
} // No fallthru possible here.
// MVC execute template // Must always generate. Usage may be switched on below. // There is no suitable place after here to put the template.
__ bind(MVC_template);
__ z_mvc(0,0,dst_reg,0,src_reg); // Instr template, never exec directly!
// MVC Loop: for medium-sized arrays
// Only for DW aligned arrays (src and dst). // #bytes to copy must be at least 256!!! // Non-aligned cases handled separately.
stride = 256;
stride_reg = Z_R1; // Holds #bytes when control arrives here.
ix_reg = Z_ARG3; // Alias for len_reg.
if (usedMVCLOOP) {
BLOCK_COMMENT("mode MVC loop {");
__ bind(doMVCLOOP);
__ z_lcgr(ix_reg, Z_R1); // Ix runs from -(n-2)*stride to 1*stride (inclusive).
__ z_llill(stride_reg, stride);
__ add2reg(ix_reg, 2*stride); // Thus: increment ix by 2*stride.
// Don 't use add2reg() here, since we must set the condition code!
__ z_aghi(ix_reg, -2*stride); // Compensate incr from above: zero diff means "all copied".
if (restoreArgs) {
__ z_lcgr(Z_R1, ix_reg); // Prepare ix_reg for copy loop, #bytes expected in Z_R1.
__ z_brnz(doMVCgeneral); // We're not done yet, ix_reg is not zero.
// ARG1, ARG2, and ARG3 were altered by the code above, so restore them building on save_reg.
__ z_slgr(dst_reg, save_reg); // copied #bytes
__ z_slgr(src_reg, dst_reg); // = ARG1 (now restored) if (log2_size) {
__ z_srag(Z_ARG3, dst_reg, log2_size); // Convert back to #elements to restore ARG3.
} else {
__ z_lgr(Z_ARG3, dst_reg);
}
__ z_lgr(Z_ARG2, save_reg); // ARG2 now restored.
// MVCgeneral: for short, unaligned arrays, after other copy operations
// Somewhat expensive due to use of EX instruction, but simple. if (usedMVCgeneral) {
BLOCK_COMMENT("mode MVC general {");
__ bind(doMVCgeneral);
__ add2reg(len_reg, -1, Z_R1); // Get #bytes-1 for EXECUTE. if (VM_Version::has_ExecuteExtensions()) {
__ z_exrl(len_reg, MVC_template); // Execute MVC with variable length.
} else {
__ z_larl(Z_R1, MVC_template); // Get addr of instr template.
__ z_ex(len_reg, 0, Z_R0, Z_R1); // Execute MVC with variable length.
} // penalty: 9 ticks
if (restoreArgs) { // ARG1, ARG2, and ARG3 were altered by code executed before, so restore them building on save_reg
__ z_slgr(dst_reg, save_reg); // Copied #bytes without the "doMVCgeneral" chunk
__ z_slgr(src_reg, dst_reg); // = ARG1 (now restored), was not advanced for "doMVCgeneral" chunk
__ add2reg_with_index(dst_reg, 1, len_reg, dst_reg); // Len of executed MVC was not accounted for, yet. if (log2_size) {
__ z_srag(Z_ARG3, dst_reg, log2_size); // Convert back to #elements to restore ARG3
} else {
__ z_lgr(Z_ARG3, dst_reg);
}
__ z_lgr(Z_ARG2, save_reg); // ARG2 now restored.
}
if (usedMVC) { if (branchToEnd) {
__ z_bru(done);
} else {
__ z_br(Z_R14);
}
} else { if (!branchToEnd) __ z_br(Z_R14);
}
BLOCK_COMMENT("} mode MVC general");
} // Fallthru possible if following block not generated.
// MVC: for short, unaligned arrays
// Somewhat expensive due to use of EX instruction, but simple. penalty: 9 ticks. // Differs from doMVCgeneral in reconstruction of ARG2, ARG3, and ARG4. if (usedMVC) {
BLOCK_COMMENT("mode MVC {");
__ bind(doMVC);
// get #bytes-1 for EXECUTE if (log2_size) {
__ add2reg(Z_R1, -1); // Length was scaled into Z_R1.
} else {
__ add2reg(Z_R1, -1, len_reg); // Length was not scaled.
}
if (VM_Version::has_ExecuteExtensions()) {
__ z_exrl(Z_R1, MVC_template); // Execute MVC with variable length.
} else {
__ z_lgr(Z_R0, Z_R5); // Save ARG4, may be unnecessary.
__ z_larl(Z_R5, MVC_template); // Get addr of instr template.
__ z_ex(Z_R1, 0, Z_R0, Z_R5); // Execute MVC with variable length.
__ z_lgr(Z_R5, Z_R0); // Restore ARG4, may be unnecessary.
}
if (!branchToEnd) {
__ z_br(Z_R14);
}
BLOCK_COMMENT("} mode MVC");
}
__ bind(done);
switch (element_size) { case 1: BLOCK_COMMENT("} ARRAYCOPY DISJOINT byte "); break; case 2: BLOCK_COMMENT("} ARRAYCOPY DISJOINT short"); break; case 4: BLOCK_COMMENT("} ARRAYCOPY DISJOINT int "); break; case 8: BLOCK_COMMENT("} ARRAYCOPY DISJOINT long "); break; default: BLOCK_COMMENT("} ARRAYCOPY DISJOINT "); break;
}
}
}
// Generate stub for conjoint array copy. If "aligned" is true, the // "from" and "to" addresses are assumed to be heapword aligned. // // Arguments for generated stub: // from: Z_ARG1 // to: Z_ARG2 // count: Z_ARG3 treated as signed void generate_conjoint_copy(bool aligned, int element_size, bool branchToEnd) {
// This is the zarch specific stub generator for general array copy tasks. // It has the following prereqs and features: // // - Destructive overlap exists and is handled by reverse copy. // - Destructive overlap exists if the leftmost byte of the target // does coincide with any of the source bytes (except the leftmost). // - Z_R0 and Z_R1 are KILLed by the stub routine (data and stride) // - Z_ARG1 and Z_ARG2 are USEd but preserved by the stub routine. // - Z_ARG3 is USED but preserved by the stub routine. // - Z_ARG4 is used as index register and is thus KILLed. //
{ Register stride_reg = Z_R1; // Stride & compare value in loop (negative element_size). Register data_reg = Z_R0; // Holds value of currently processed element. Register ix_reg = Z_ARG4; // Holds byte index of currently processed element. Register len_reg = Z_ARG3; // Holds length (in #elements) of arrays. Register dst_reg = Z_ARG2; // Holds left operand addr. Register src_reg = Z_ARG1; // Holds right operand addr.
assert(256%element_size == 0, "Element size must be power of 2.");
assert(element_size <= 8, "Can't handle more than DW units.");
switch (element_size) { case 1: BLOCK_COMMENT("ARRAYCOPY CONJOINT byte {"); break; case 2: BLOCK_COMMENT("ARRAYCOPY CONJOINT short {"); break; case 4: BLOCK_COMMENT("ARRAYCOPY CONJOINT int {"); break; case 8: BLOCK_COMMENT("ARRAYCOPY CONJOINT long {"); break; default: BLOCK_COMMENT("ARRAYCOPY CONJOINT {"); break;
}
assert_positive_int(len_reg);
if (VM_Version::has_Prefetch()) {
__ z_pfd(0x01, 0, Z_R0, src_reg); // Fetch access.
__ z_pfd(0x02, 0, Z_R0, dst_reg); // Store access.
}
// Optimize reverse copy loop. // Main loop copies DW units which may be unaligned. Unaligned access adds some penalty ticks. // Unaligned DW access (neither fetch nor store) is DW-atomic, but should be alignment-atomic. // Preceding the main loop, some bytes are copied to obtain a DW-multiple remaining length.
__ load_const_optimized(stride_reg, stride); // Prepare for DW copy loop.
if (element_size == 8) // Nothing to do here.
__ z_bru(countLoop1); else { // Do not generate dead code.
__ z_tmll(ix_reg, 7); // Check the "odd" bits.
__ z_bre(countLoop1); // There are none, very good!
}
if (log2_size == 0) { // Handle leftover Byte.
__ z_tmll(ix_reg, 1);
__ z_bre(skipBY);
__ z_lb(data_reg, -1, ix_reg, src_reg);
__ z_stcy(data_reg, -1, ix_reg, dst_reg);
__ add2reg(ix_reg, -1); // Decrement delayed to avoid AGI.
__ bind(skipBY); // fallthru
} if (log2_size <= 1) { // Handle leftover HW.
__ z_tmll(ix_reg, 2);
__ z_bre(skipHW);
__ z_lhy(data_reg, -2, ix_reg, src_reg);
__ z_sthy(data_reg, -2, ix_reg, dst_reg);
__ add2reg(ix_reg, -2); // Decrement delayed to avoid AGI.
__ bind(skipHW);
__ z_tmll(ix_reg, 4);
__ z_bre(countLoop1); // fallthru
} if (log2_size <= 2) { // There are just 4 bytes (left) that need to be copied.
__ z_ly(data_reg, -4, ix_reg, src_reg);
__ z_sty(data_reg, -4, ix_reg, dst_reg);
__ add2reg(ix_reg, -4); // Decrement delayed to avoid AGI.
__ z_bru(countLoop1);
}
// Control can never get to here. Never! Never ever!
__ z_illtrap(0x99);
__ bind(copyLoop1);
__ z_lg(data_reg, 0, ix_reg, src_reg);
__ z_stg(data_reg, 0, ix_reg, dst_reg);
__ bind(countLoop1);
__ z_brxhg(ix_reg, stride_reg, copyLoop1);
if (!branchToEnd)
__ z_br(Z_R14);
switch (element_size) { case 1: BLOCK_COMMENT("} ARRAYCOPY CONJOINT byte "); break; case 2: BLOCK_COMMENT("} ARRAYCOPY CONJOINT short"); break; case 4: BLOCK_COMMENT("} ARRAYCOPY CONJOINT int "); break; case 8: BLOCK_COMMENT("} ARRAYCOPY CONJOINT long "); break; default: BLOCK_COMMENT("} ARRAYCOPY CONJOINT "); break;
}
}
}
// Generate stub for disjoint byte copy. If "aligned" is true, the // "from" and "to" addresses are assumed to be heapword aligned.
address generate_disjoint_byte_copy(bool aligned, constchar * name) {
StubCodeMark mark(this, "StubRoutines", name);
// This is the zarch specific stub generator for byte array copy. // Refer to generate_disjoint_copy for a list of prereqs and features: unsignedint start_off = __ offset(); // Remember stub start address (is rtn value).
generate_disjoint_copy(aligned, 1, false, false); return __ addr_at(start_off);
}
address generate_disjoint_short_copy(bool aligned, constchar * name) {
StubCodeMark mark(this, "StubRoutines", name); // This is the zarch specific stub generator for short array copy. // Refer to generate_disjoint_copy for a list of prereqs and features: unsignedint start_off = __ offset(); // Remember stub start address (is rtn value).
generate_disjoint_copy(aligned, 2, false, false); return __ addr_at(start_off);
}
address generate_disjoint_int_copy(bool aligned, constchar * name) {
StubCodeMark mark(this, "StubRoutines", name); // This is the zarch specific stub generator for int array copy. // Refer to generate_disjoint_copy for a list of prereqs and features: unsignedint start_off = __ offset(); // Remember stub start address (is rtn value).
generate_disjoint_copy(aligned, 4, false, false); return __ addr_at(start_off);
}
address generate_disjoint_long_copy(bool aligned, constchar * name) {
StubCodeMark mark(this, "StubRoutines", name); // This is the zarch specific stub generator for long array copy. // Refer to generate_disjoint_copy for a list of prereqs and features: unsignedint start_off = __ offset(); // Remember stub start address (is rtn value).
generate_disjoint_copy(aligned, 8, false, false); return __ addr_at(start_off);
}
address generate_disjoint_oop_copy(bool aligned, constchar * name, bool dest_uninitialized) {
StubCodeMark mark(this, "StubRoutines", name); // This is the zarch specific stub generator for oop array copy. // Refer to generate_disjoint_copy for a list of prereqs and features. unsignedint start_off = __ offset(); // Remember stub start address (is rtn value). unsignedint size = UseCompressedOops ? 4 : 8;
address generate_conjoint_byte_copy(bool aligned, constchar * name) {
StubCodeMark mark(this, "StubRoutines", name); // This is the zarch specific stub generator for overlapping byte array copy. // Refer to generate_conjoint_copy for a list of prereqs and features: unsignedint start_off = __ offset(); // Remember stub start address (is rtn value).
address nooverlap_target = aligned ? StubRoutines::arrayof_jbyte_disjoint_arraycopy()
: StubRoutines::jbyte_disjoint_arraycopy();
array_overlap_test(nooverlap_target, 0); // Branch away to nooverlap_target if disjoint.
generate_conjoint_copy(aligned, 1, false);
return __ addr_at(start_off);
}
address generate_conjoint_short_copy(bool aligned, constchar * name) {
StubCodeMark mark(this, "StubRoutines", name); // This is the zarch specific stub generator for overlapping short array copy. // Refer to generate_conjoint_copy for a list of prereqs and features: unsignedint start_off = __ offset(); // Remember stub start address (is rtn value).
address nooverlap_target = aligned ? StubRoutines::arrayof_jshort_disjoint_arraycopy()
: StubRoutines::jshort_disjoint_arraycopy();
array_overlap_test(nooverlap_target, 1); // Branch away to nooverlap_target if disjoint.
generate_conjoint_copy(aligned, 2, false);
return __ addr_at(start_off);
}
address generate_conjoint_int_copy(bool aligned, constchar * name) {
StubCodeMark mark(this, "StubRoutines", name); // This is the zarch specific stub generator for overlapping int array copy. // Refer to generate_conjoint_copy for a list of prereqs and features:
array_overlap_test(nooverlap_target, 2); // Branch away to nooverlap_target if disjoint.
generate_conjoint_copy(aligned, 4, false);
return __ addr_at(start_off);
}
address generate_conjoint_long_copy(bool aligned, constchar * name) {
StubCodeMark mark(this, "StubRoutines", name); // This is the zarch specific stub generator for overlapping long array copy. // Refer to generate_conjoint_copy for a list of prereqs and features:
// Branch to disjoint_copy (if applicable) before pre_barrier to avoid double pre_barrier.
array_overlap_test(nooverlap_target, shift); // Branch away to nooverlap_target if disjoint.
DecoratorSet decorators = IN_HEAP | IS_ARRAY; if (dest_uninitialized) {
decorators |= IS_DEST_UNINITIALIZED;
} if (aligned) {
decorators |= ARRAYCOPY_ALIGNED;
}
// Call interface for AES_encryptBlock, AES_decryptBlock stubs. // // Z_ARG1 - source data block. Ptr to leftmost byte to be processed. // Z_ARG2 - destination data block. Ptr to leftmost byte to be stored. // For in-place encryption/decryption, ARG1 and ARG2 can point // to the same piece of storage. // Z_ARG3 - Crypto key address (expanded key). The first n bits of // the expanded key constitute the original AES-<n> key (see below). // // Z_RET - return value. First unprocessed byte offset in src buffer. // // Some remarks: // The crypto key, as passed from the caller to these encryption stubs, // is a so-called expanded key. It is derived from the original key // by the Rijndael key schedule, see http://en.wikipedia.org/wiki/Rijndael_key_schedule // With the expanded key, the cipher/decipher task is decomposed in // multiple, less complex steps, called rounds. Sun SPARC and Intel // processors obviously implement support for those less complex steps. // z/Architecture provides instructions for full cipher/decipher complexity. // Therefore, we need the original, not the expanded key here. // Luckily, the first n bits of an AES-<n> expanded key are formed // by the original key itself. That takes us out of trouble. :-) // The key length (in bytes) relation is as follows: // original expanded rounds key bit keylen // key bytes key bytes length in words // 16 176 11 128 44 // 24 208 13 192 52 // 32 240 15 256 60 // // The crypto instructions used in the AES* stubs have some specific register requirements. // Z_R0 holds the crypto function code. Please refer to the KM/KMC instruction // description in the "z/Architecture Principles of Operation" manual for details. // Z_R1 holds the parameter block address. The parameter block contains the cryptographic key // (KM instruction) and the chaining value (KMC instruction). // dst must designate an even-numbered register, holding the address of the output message. // src must designate an even/odd register pair, holding the address/length of the original message
// Helper function which generates code to // - load the function code in register fCode (== Z_R0). // - load the data block length (depends on cipher function) into register srclen if requested. // - is_decipher switches between cipher/decipher function codes // - set_len requests (if true) loading the data block length in register srclen void generate_load_AES_fCode(Register keylen, Register fCode, Register srclen, bool is_decipher) {
BLOCK_COMMENT("Set fCode {"); {
Label fCode_set; int mode = is_decipher ? VM_Version::CipherMode::decipher : VM_Version::CipherMode::cipher; bool identical_dataBlk_len = (VM_Version::Cipher::_AES128_dataBlk == VM_Version::Cipher::_AES192_dataBlk)
&& (VM_Version::Cipher::_AES128_dataBlk == VM_Version::Cipher::_AES256_dataBlk); // Expanded key length is 44/52/60 * 4 bytes for AES-128/AES-192/AES-256.
__ z_cghi(keylen, 52); // Check only once at the beginning. keylen and fCode may share the same register.
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung ist noch experimentell.