/*
* Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#include "precompiled.hpp"
#include "compiler/compileLog.hpp"
#include "memory/allocation.inline.hpp"
#include "opto/addnode.hpp"
#include "opto/callnode.hpp"
#include "opto/castnode.hpp"
#include "opto/connode.hpp"
#include "opto/convertnode.hpp"
#include "opto/divnode.hpp"
#include "opto/loopnode.hpp"
#include "opto/mulnode.hpp"
#include "opto/movenode.hpp"
#include "opto/opaquenode.hpp"
#include "opto/rootnode.hpp"
#include "opto/runtime.hpp"
#include "opto/subnode.hpp"
#include "opto/superword.hpp"
#include "opto/vectornode.hpp"
#include "runtime/globals_extension.hpp"
#include "runtime/stubRoutines.hpp"
//------------------------------is_loop_exit-----------------------------------
// Given an IfNode, return the loop-exiting projection or NULL if both
// arms remain in the loop.
Node *IdealLoopTree::is_loop_exit(Node *iff) const {
if (iff->outcnt() != 2) return NULL; // Ignore partially dead tests
PhaseIdealLoop *phase = _phase;
// Test is an IfNode, has 2 projections. If BOTH are in the loop
// we need loop unswitching instead of peeling.
if (!is_member(phase->get_loop(iff->raw_out(0))))
return iff->raw_out(0);
if (!is_member(phase->get_loop(iff->raw_out(1))))
return iff->raw_out(1);
return NULL;
}
//=============================================================================
//------------------------------record_for_igvn----------------------------
// Put loop body on igvn work list
void IdealLoopTree::record_for_igvn() {
for (uint i = 0; i < _body.size(); i++) {
Node *n = _body.at(i);
_phase->_igvn._worklist.push(n);
}
// put body of outer strip mined loop on igvn work list as well
if (_head->is_CountedLoop() && _head->as_Loop()->is_strip_mined()) {
CountedLoopNode* l = _head->as_CountedLoop();
Node* outer_loop = l->outer_loop();
assert(outer_loop != NULL, "missing piece of strip mined loop");
_phase->_igvn._worklist.push(outer_loop);
Node* outer_loop_tail = l->outer_loop_tail();
assert(outer_loop_tail != NULL, "missing piece of strip mined loop");
_phase->_igvn._worklist.push(outer_loop_tail);
Node* outer_loop_end = l->outer_loop_end();
assert(outer_loop_end != NULL, "missing piece of strip mined loop");
_phase->_igvn._worklist.push(outer_loop_end);
Node* outer_safepoint = l->outer_safepoint();
assert(outer_safepoint != NULL, "missing piece of strip mined loop");
_phase->_igvn._worklist.push(outer_safepoint);
Node* cle_out = _head->as_CountedLoop()->loopexit()->proj_out(false);
assert(cle_out != NULL, "missing piece of strip mined loop");
_phase->_igvn._worklist.push(cle_out);
}
}
//------------------------------compute_exact_trip_count-----------------------
// Compute loop trip count if possible. Do not recalculate trip count for
// split loops (pre-main-post) which have their limits and inits behind Opaque node.
void IdealLoopTree::compute_trip_count(PhaseIdealLoop* phase) {
if (!_head->as_Loop()->is_valid_counted_loop(T_INT)) {
return;
}
CountedLoopNode* cl = _head->as_CountedLoop();
// Trip count may become nonexact for iteration split loops since
// RCE modifies limits. Note, _trip_count value is not reset since
// it is used to limit unrolling of main loop.
cl->set_nonexact_trip_count();
// Loop's test should be part of loop.
if (!phase->is_member(this, phase->get_ctrl(cl->loopexit()->in(CountedLoopEndNode::TestValue))))
return; // Infinite loop
#ifdef ASSERT
BoolTest::mask bt = cl->loopexit()->test_trip();
assert(bt == BoolTest::lt || bt == BoolTest::gt ||
bt == BoolTest::ne, "canonical test is expected");
#endif
Node* init_n = cl->init_trip();
Node* limit_n = cl->limit();
if (init_n != NULL && limit_n != NULL) {
// Use longs to avoid integer overflow.
int stride_con = cl->stride_con();
const TypeInt* init_type = phase->_igvn.type(init_n)->is_int();
const TypeInt* limit_type = phase->_igvn.type(limit_n)->is_int();
jlong init_con = (stride_con > 0) ? init_type->_lo : init_type->_hi;
jlong limit_con = (stride_con > 0) ? limit_type->_hi : limit_type->_lo;
int stride_m = stride_con - (stride_con > 0 ? 1 : -1);
jlong trip_count = (limit_con - init_con + stride_m)/stride_con;
// The loop body is always executed at least once even if init >= limit (for stride_con > 0) or
// init <= limit (for stride_con < 0).
trip_count = MAX2(trip_count, (jlong)1);
if (trip_count < (jlong)max_juint) {
if (init_n->is_Con() && limit_n->is_Con()) {
// Set exact trip count.
cl->set_exact_trip_count((uint)trip_count);
} else if (cl->unrolled_count() == 1) {
// Set maximum trip count before unrolling.
cl->set_trip_count((uint)trip_count);
}
}
}
}
//------------------------------compute_profile_trip_cnt----------------------------
// Compute loop trip count from profile data as
// (backedge_count + loop_exit_count) / loop_exit_count
float IdealLoopTree::compute_profile_trip_cnt_helper(Node* n) {
if (n->is_If()) {
IfNode *iff = n->as_If();
if (iff->_fcnt != COUNT_UNKNOWN && iff->_prob != PROB_UNKNOWN) {
Node *exit = is_loop_exit(iff);
if (exit) {
float exit_prob = iff->_prob;
if (exit->Opcode() == Op_IfFalse) {
exit_prob = 1.0 - exit_prob;
}
if (exit_prob > PROB_MIN) {
float exit_cnt = iff->_fcnt * exit_prob;
return exit_cnt;
}
}
}
}
if (n->is_Jump()) {
JumpNode *jmp = n->as_Jump();
if (jmp->_fcnt != COUNT_UNKNOWN) {
float* probs = jmp->_probs;
float exit_prob = 0;
PhaseIdealLoop *phase = _phase;
for (DUIterator_Fast imax, i = jmp->fast_outs(imax); i < imax; i++) {
JumpProjNode* u = jmp->fast_out(i)->as_JumpProj();
if (!is_member(_phase->get_loop(u))) {
exit_prob += probs[u->_con];
}
}
return exit_prob * jmp->_fcnt;
}
}
return 0;
}
void IdealLoopTree::compute_profile_trip_cnt(PhaseIdealLoop *phase) {
if (!_head->is_Loop()) {
return;
}
LoopNode* head = _head->as_Loop();
if (head->profile_trip_cnt() != COUNT_UNKNOWN) {
return; // Already computed
}
float trip_cnt = (float)max_jint; // default is big
Node* back = head->in(LoopNode::LoopBackControl);
while (back != head) {
if ((back->Opcode() == Op_IfTrue || back->Opcode() == Op_IfFalse) &&
back->in(0) &&
back->in(0)->is_If() &&
back->in(0)->as_If()->_fcnt != COUNT_UNKNOWN &&
back->in(0)->as_If()->_prob != PROB_UNKNOWN &&
(back->Opcode() == Op_IfTrue ? 1-back->in(0)->as_If()->_prob : back->in(0)->as_If()->_prob) > PROB_MIN) {
break;
}
back = phase->idom(back);
}
if (back != head) {
assert((back->Opcode() == Op_IfTrue || back->Opcode() == Op_IfFalse) &&
back->in(0), "if-projection exists");
IfNode* back_if = back->in(0)->as_If();
float loop_back_cnt = back_if->_fcnt * (back->Opcode() == Op_IfTrue ? back_if->_prob : (1 - back_if->_prob));
// Now compute a loop exit count
float loop_exit_cnt = 0.0f;
if (_child == NULL) {
for (uint i = 0; i < _body.size(); i++) {
Node *n = _body[i];
loop_exit_cnt += compute_profile_trip_cnt_helper(n);
}
} else {
ResourceMark rm;
Unique_Node_List wq;
wq.push(back);
for (uint i = 0; i < wq.size(); i++) {
Node *n = wq.at(i);
assert(n->is_CFG(), "only control nodes");
if (n != head) {
if (n->is_Region()) {
for (uint j = 1; j < n->req(); j++) {
wq.push(n->in(j));
}
} else {
loop_exit_cnt += compute_profile_trip_cnt_helper(n);
wq.push(n->in(0));
}
}
}
}
if (loop_exit_cnt > 0.0f) {
trip_cnt = (loop_back_cnt + loop_exit_cnt) / loop_exit_cnt;
} else {
// No exit count so use
trip_cnt = loop_back_cnt;
}
} else {
head->mark_profile_trip_failed();
}
#ifndef PRODUCT
if (TraceProfileTripCount) {
tty->print_cr("compute_profile_trip_cnt lp: %d cnt: %f\n", head->_idx, trip_cnt);
}
#endif
head->set_profile_trip_cnt(trip_cnt);
}
//---------------------find_invariant-----------------------------
// Return nonzero index of invariant operand for an associative
// binary operation of (nonconstant) invariant and variant values.
// Helper for reassociate_invariants.
int IdealLoopTree::find_invariant(Node* n, PhaseIdealLoop *phase) {
bool in1_invar = this->is_invariant(n->in(1));
bool in2_invar = this->is_invariant(n->in(2));
if (in1_invar && !in2_invar) return 1;
if (!in1_invar && in2_invar) return 2;
return 0;
}
//---------------------is_associative-----------------------------
// Return TRUE if "n" is an associative binary node. If "base" is
// not NULL, "n" must be re-associative with it.
bool IdealLoopTree::is_associative(Node* n, Node* base) {
int op = n->Opcode();
if (base != NULL) {
assert(is_associative(base), "Base node should be associative");
int base_op = base->Opcode();
if (base_op == Op_AddI || base_op == Op_SubI) {
return op == Op_AddI || op == Op_SubI;
}
if (base_op == Op_AddL || base_op == Op_SubL) {
return op == Op_AddL || op == Op_SubL;
}
return op == base_op;
} else {
// Integer "add/sub/mul/and/or/xor" operations are associative.
return op == Op_AddI || op == Op_AddL
|| op == Op_SubI || op == Op_SubL
|| op == Op_MulI || op == Op_MulL
|| op == Op_AndI || op == Op_AndL
|| op == Op_OrI || op == Op_OrL
|| op == Op_XorI || op == Op_XorL;
}
}
//---------------------reassociate_add_sub------------------------
// Reassociate invariant add and subtract expressions:
//
// inv1 + (x + inv2) => ( inv1 + inv2) + x
// (x + inv2) + inv1 => ( inv1 + inv2) + x
// inv1 + (x - inv2) => ( inv1 - inv2) + x
// inv1 - (inv2 - x) => ( inv1 - inv2) + x
// (x + inv2) - inv1 => (-inv1 + inv2) + x
// (x - inv2) + inv1 => ( inv1 - inv2) + x
// (x - inv2) - inv1 => (-inv1 - inv2) + x
// inv1 + (inv2 - x) => ( inv1 + inv2) - x
// inv1 - (x - inv2) => ( inv1 + inv2) - x
// (inv2 - x) + inv1 => ( inv1 + inv2) - x
// (inv2 - x) - inv1 => (-inv1 + inv2) - x
// inv1 - (x + inv2) => ( inv1 - inv2) - x
//
Node* IdealLoopTree::reassociate_add_sub(Node* n1, int inv1_idx, int inv2_idx, PhaseIdealLoop *phase) {
assert(n1->is_Add() || n1->is_Sub(), "Target node should be add or subtract");
Node* n2 = n1->in(3 - inv1_idx);
Node* inv1 = n1->in(inv1_idx);
Node* inv2 = n2->in(inv2_idx);
Node* x = n2->in(3 - inv2_idx);
bool neg_x = n2->is_Sub() && inv2_idx == 1;
bool neg_inv2 = n2->is_Sub() && inv2_idx == 2;
bool neg_inv1 = n1->is_Sub() && inv1_idx == 2;
if (n1->is_Sub() && inv1_idx == 1) {
neg_x = !neg_x;
neg_inv2 = !neg_inv2;
}
bool is_int = n1->bottom_type()->isa_int() != NULL;
Node* inv1_c = phase->get_ctrl(inv1);
Node* n_inv1;
if (neg_inv1) {
Node* zero;
if (is_int) {
zero = phase->_igvn.intcon(0);
n_inv1 = new SubINode(zero, inv1);
} else {
zero = phase->_igvn.longcon(0L);
n_inv1 = new SubLNode(zero, inv1);
}
phase->set_ctrl(zero, phase->C->root());
phase->register_new_node(n_inv1, inv1_c);
} else {
n_inv1 = inv1;
}
Node* inv;
if (is_int) {
if (neg_inv2) {
inv = new SubINode(n_inv1, inv2);
} else {
inv = new AddINode(n_inv1, inv2);
}
phase->register_new_node(inv, phase->get_early_ctrl(inv));
if (neg_x) {
return new SubINode(inv, x);
} else {
return new AddINode(x, inv);
}
} else {
if (neg_inv2) {
inv = new SubLNode(n_inv1, inv2);
} else {
inv = new AddLNode(n_inv1, inv2);
}
phase->register_new_node(inv, phase->get_early_ctrl(inv));
if (neg_x) {
return new SubLNode(inv, x);
} else {
return new AddLNode(x, inv);
}
}
}
//---------------------reassociate-----------------------------
// Reassociate invariant binary expressions with add/sub/mul/
// and/or/xor operators.
// For add/sub expressions: see "reassociate_add_sub"
//
// For mul/and/or/xor expressions:
//
// inv1 op (x op inv2) => (inv1 op inv2) op x
//
Node* IdealLoopTree::reassociate(Node* n1, PhaseIdealLoop *phase) {
if (!is_associative(n1) || n1->outcnt() == 0) return NULL;
if (is_invariant(n1)) return NULL;
// Don't mess with add of constant (igvn moves them to expression tree root.)
if (n1->is_Add() && n1->in(2)->is_Con()) return NULL;
int inv1_idx = find_invariant(n1, phase);
if (!inv1_idx) return NULL;
Node* n2 = n1->in(3 - inv1_idx);
if (!is_associative(n2, n1)) return NULL;
int inv2_idx = find_invariant(n2, phase);
if (!inv2_idx) return NULL;
if (!phase->may_require_nodes(10, 10)) return NULL;
Node* result = NULL;
switch (n1->Opcode()) {
case Op_AddI:
case Op_AddL:
case Op_SubI:
case Op_SubL:
result = reassociate_add_sub(n1, inv1_idx, inv2_idx, phase);
break;
case Op_MulI:
case Op_MulL:
case Op_AndI:
case Op_AndL:
case Op_OrI:
case Op_OrL:
case Op_XorI:
case Op_XorL: {
Node* inv1 = n1->in(inv1_idx);
Node* inv2 = n2->in(inv2_idx);
Node* x = n2->in(3 - inv2_idx);
Node* inv = n2->clone_with_data_edge(inv1, inv2);
phase->register_new_node(inv, phase->get_early_ctrl(inv));
result = n1->clone_with_data_edge(x, inv);
break;
}
default:
ShouldNotReachHere();
}
assert(result != NULL, "");
phase->register_new_node(result, phase->get_ctrl(n1));
phase->_igvn.replace_node(n1, result);
assert(phase->get_loop(phase->get_ctrl(n1)) == this, "");
_body.yank(n1);
return result;
}
//---------------------reassociate_invariants-----------------------------
// Reassociate invariant expressions:
void IdealLoopTree::reassociate_invariants(PhaseIdealLoop *phase) {
for (int i = _body.size() - 1; i >= 0; i--) {
Node *n = _body.at(i);
for (int j = 0; j < 5; j++) {
Node* nn = reassociate(n, phase);
if (nn == NULL) break;
n = nn; // again
}
}
}
//------------------------------policy_peeling---------------------------------
// Return TRUE if the loop should be peeled, otherwise return FALSE. Peeling
// is applicable if we can make a loop-invariant test (usually a null-check)
// execute before we enter the loop. When TRUE, the estimated node budget is
// also requested.
bool IdealLoopTree::policy_peeling(PhaseIdealLoop *phase) {
uint estimate = estimate_peeling(phase);
return estimate == 0 ? false : phase->may_require_nodes(estimate);
}
// Perform actual policy and size estimate for the loop peeling transform, and
// return the estimated loop size if peeling is applicable, otherwise return
// zero. No node budget is allocated.
uint IdealLoopTree::estimate_peeling(PhaseIdealLoop *phase) {
// If nodes are depleted, some transform has miscalculated its needs.
assert(!phase->exceeding_node_budget(), "sanity");
// Peeling does loop cloning which can result in O(N^2) node construction.
if (_body.size() > 255) {
return 0; // Suppress too large body size.
}
// Optimistic estimate that approximates loop body complexity via data and
// control flow fan-out (instead of using the more pessimistic: BodySize^2).
uint estimate = est_loop_clone_sz(2);
if (phase->exceeding_node_budget(estimate)) {
return 0; // Too large to safely clone.
}
// Check for vectorized loops, any peeling done was already applied.
if (_head->is_CountedLoop()) {
CountedLoopNode* cl = _head->as_CountedLoop();
if (cl->is_unroll_only() || cl->trip_count() == 1) {
return 0;
}
}
Node* test = tail();
while (test != _head) { // Scan till run off top of loop
if (test->is_If()) { // Test?
Node *ctrl = phase->get_ctrl(test->in(1));
if (ctrl->is_top()) {
return 0; // Found dead test on live IF? No peeling!
}
// Standard IF only has one input value to check for loop invariance.
assert(test->Opcode() == Op_If ||
test->Opcode() == Op_CountedLoopEnd ||
test->Opcode() == Op_LongCountedLoopEnd ||
test->Opcode() == Op_RangeCheck,
"Check this code when new subtype is added");
// Condition is not a member of this loop?
if (!is_member(phase->get_loop(ctrl)) && is_loop_exit(test)) {
return estimate; // Found reason to peel!
}
}
// Walk up dominators to loop _head looking for test which is executed on
// every path through the loop.
test = phase->idom(test);
}
return 0;
}
//------------------------------peeled_dom_test_elim---------------------------
// If we got the effect of peeling, either by actually peeling or by making
// a pre-loop which must execute at least once, we can remove all
// loop-invariant dominated tests in the main body.
void PhaseIdealLoop::peeled_dom_test_elim(IdealLoopTree* loop, Node_List& old_new) {
bool progress = true;
while (progress) {
progress = false; // Reset for next iteration
Node* prev = loop->_head->in(LoopNode::LoopBackControl); // loop->tail();
Node* test = prev->in(0);
while (test != loop->_head) { // Scan till run off top of loop
int p_op = prev->Opcode();
assert(test != NULL, "test cannot be NULL");
Node* test_cond = NULL;
if ((p_op == Op_IfFalse || p_op == Op_IfTrue) && test->is_If()) {
test_cond = test->in(1);
}
if (test_cond != NULL && // Test?
!test_cond->is_Con() && // And not already obvious?
// And condition is not a member of this loop?
!loop->is_member(get_loop(get_ctrl(test_cond)))) {
// Walk loop body looking for instances of this test
for (uint i = 0; i < loop->_body.size(); i++) {
Node* n = loop->_body.at(i);
// Check against cached test condition because dominated_by()
// replaces the test condition with a constant.
if (n->is_If() && n->in(1) == test_cond) {
// IfNode was dominated by version in peeled loop body
progress = true;
dominated_by(old_new[prev->_idx]->as_IfProj(), n->as_If());
}
}
}
prev = test;
test = idom(test);
} // End of scan tests in loop
} // End of while (progress)
}
//------------------------------do_peeling-------------------------------------
// Peel the first iteration of the given loop.
// Step 1: Clone the loop body. The clone becomes the peeled iteration.
// The pre-loop illegally has 2 control users (old & new loops).
// Step 2: Make the old-loop fall-in edges point to the peeled iteration.
// Do this by making the old-loop fall-in edges act as if they came
// around the loopback from the prior iteration (follow the old-loop
// backedges) and then map to the new peeled iteration. This leaves
// the pre-loop with only 1 user (the new peeled iteration), but the
// peeled-loop backedge has 2 users.
// Step 3: Cut the backedge on the clone (so its not a loop) and remove the
// extra backedge user.
//
// orig
//
// stmt1
// |
// v
// loop predicate
// |
// v
// loop<----+
// | |
// stmt2 |
// | |
// v |
// if ^
// / \ |
// / \ |
// v v |
// false true |
// / \ |
// / ----+
// |
// v
// exit
//
//
// after clone loop
//
// stmt1
// |
// v
// loop predicate
// / \
// clone / \ orig
// / \
// / \
// v v
// +---->loop clone loop<----+
// | | | |
// | stmt2 clone stmt2 |
// | | | |
// | v v |
// ^ if clone If ^
// | / \ / \ |
// | / \ / \ |
// | v v v v |
// | true false false true |
// | / \ / \ |
// +---- \ / ----+
// \ /
// 1v v2
// region
// |
// v
// exit
//
//
// after peel and predicate move
//
// stmt1
// |
// v
// loop predicate
// /
// /
// clone / orig
// /
// / +----------+
// / | |
// / | |
// / | |
// v v |
// TOP-->loop clone loop<----+ |
// | | | |
// stmt2 clone stmt2 | |
// | | | ^
// v v | |
// if clone If ^ |
// / \ / \ | |
// / \ / \ | |
// v v v v | |
// true false false true | |
// | \ / \ | |
// | \ / ----+ ^
// | \ / |
// | 1v v2 |
// v region |
// | | |
// | v |
// | exit |
// | |
// +--------------->-----------------+
//
//
// final graph
//
// stmt1
// |
// v
// loop predicate
// |
// v
// stmt2 clone
// |
// v
// if clone
// / |
// / |
// v v
// false true
// | |
// | v
// | initialized skeleton predicates
// | |
// | v
// | loop<----+
// | | |
// | stmt2 |
// | | |
// | v |
// v if ^
// | / \ |
// | / \ |
// | v v |
// | false true |
// | | \ |
// v v --+
// region
// |
// v
// exit
//
void PhaseIdealLoop::do_peeling(IdealLoopTree *loop, Node_List &old_new) {
C->set_major_progress();
// Peeling a 'main' loop in a pre/main/post situation obfuscates the
// 'pre' loop from the main and the 'pre' can no longer have its
// iterations adjusted. Therefore, we need to declare this loop as
// no longer a 'main' loop; it will need new pre and post loops before
// we can do further RCE.
#ifndef PRODUCT
if (TraceLoopOpts) {
tty->print("Peel ");
loop->dump_head();
}
#endif
LoopNode* head = loop->_head->as_Loop();
bool counted_loop = head->is_CountedLoop();
if (counted_loop) {
CountedLoopNode *cl = head->as_CountedLoop();
assert(cl->trip_count() > 0, "peeling a fully unrolled loop");
cl->set_trip_count(cl->trip_count() - 1);
if (cl->is_main_loop()) {
cl->set_normal_loop();
#ifndef PRODUCT
if (PrintOpto && VerifyLoopOptimizations) {
tty->print("Peeling a 'main' loop; resetting to 'normal' ");
loop->dump_head();
}
#endif
}
}
Node* entry = head->in(LoopNode::EntryControl);
// Step 1: Clone the loop body. The clone becomes the peeled iteration.
// The pre-loop illegally has 2 control users (old & new loops).
const uint idx_before_clone = Compile::current()->unique();
LoopNode* outer_loop_head = head->skip_strip_mined();
clone_loop(loop, old_new, dom_depth(outer_loop_head), ControlAroundStripMined);
// Step 2: Make the old-loop fall-in edges point to the peeled iteration.
// Do this by making the old-loop fall-in edges act as if they came
// around the loopback from the prior iteration (follow the old-loop
// backedges) and then map to the new peeled iteration. This leaves
// the pre-loop with only 1 user (the new peeled iteration), but the
// peeled-loop backedge has 2 users.
Node* new_entry = old_new[head->in(LoopNode::LoopBackControl)->_idx];
_igvn.hash_delete(outer_loop_head);
outer_loop_head->set_req(LoopNode::EntryControl, new_entry);
for (DUIterator_Fast jmax, j = head->fast_outs(jmax); j < jmax; j++) {
Node* old = head->fast_out(j);
if (old->in(0) == loop->_head && old->req() == 3 && old->is_Phi()) {
Node* new_exit_value = old_new[old->in(LoopNode::LoopBackControl)->_idx];
if (!new_exit_value) // Backedge value is ALSO loop invariant?
// Then loop body backedge value remains the same.
new_exit_value = old->in(LoopNode::LoopBackControl);
_igvn.hash_delete(old);
old->set_req(LoopNode::EntryControl, new_exit_value);
}
}
// Step 3: Cut the backedge on the clone (so its not a loop) and remove the
// extra backedge user.
Node* new_head = old_new[head->_idx];
_igvn.hash_delete(new_head);
new_head->set_req(LoopNode::LoopBackControl, C->top());
for (DUIterator_Fast j2max, j2 = new_head->fast_outs(j2max); j2 < j2max; j2++) {
Node* use = new_head->fast_out(j2);
if (use->in(0) == new_head && use->req() == 3 && use->is_Phi()) {
_igvn.hash_delete(use);
use->set_req(LoopNode::LoopBackControl, C->top());
}
}
// Step 4: Correct dom-depth info. Set to loop-head depth.
int dd_outer_loop_head = dom_depth(outer_loop_head);
set_idom(outer_loop_head, outer_loop_head->in(LoopNode::EntryControl), dd_outer_loop_head);
for (uint j3 = 0; j3 < loop->_body.size(); j3++) {
Node *old = loop->_body.at(j3);
Node *nnn = old_new[old->_idx];
if (!has_ctrl(nnn)) {
set_idom(nnn, idom(nnn), dd_outer_loop_head-1);
}
}
// Step 5: skeleton_predicates instantiation
if (counted_loop && UseLoopPredicate) {
CountedLoopNode *cl_head = head->as_CountedLoop();
Node* init = cl_head->init_trip();
Node* stride = cl_head->stride();
IdealLoopTree* outer_loop = get_loop(outer_loop_head);
Predicates predicates(new_head->in(LoopNode::EntryControl));
initialize_skeleton_predicates_for_peeled_loop(predicates.predicate(),
outer_loop_head, dd_outer_loop_head,
init, stride, outer_loop,
idx_before_clone, old_new);
initialize_skeleton_predicates_for_peeled_loop(predicates.profile_predicate(),
outer_loop_head, dd_outer_loop_head,
init, stride, outer_loop,
idx_before_clone, old_new);
}
// Now force out all loop-invariant dominating tests. The optimizer
// finds some, but we _know_ they are all useless.
peeled_dom_test_elim(loop,old_new);
loop->record_for_igvn();
}
//------------------------------policy_maximally_unroll------------------------
// Calculate the exact loop trip-count and return TRUE if loop can be fully,
// i.e. maximally, unrolled, otherwise return FALSE. When TRUE, the estimated
// node budget is also requested.
bool IdealLoopTree::policy_maximally_unroll(PhaseIdealLoop* phase) const {
CountedLoopNode* cl = _head->as_CountedLoop();
assert(cl->is_normal_loop(), "");
if (!cl->is_valid_counted_loop(T_INT)) {
return false; // Malformed counted loop.
}
if (!cl->has_exact_trip_count()) {
return false; // Trip count is not exact.
}
uint trip_count = cl->trip_count();
// Note, max_juint is used to indicate unknown trip count.
assert(trip_count > 1, "one iteration loop should be optimized out already");
assert(trip_count < max_juint, "exact trip_count should be less than max_juint.");
// If nodes are depleted, some transform has miscalculated its needs.
assert(!phase->exceeding_node_budget(), "sanity");
// Allow the unrolled body to get larger than the standard loop size limit.
uint unroll_limit = (uint)LoopUnrollLimit * 4;
assert((intx)unroll_limit == LoopUnrollLimit * 4, "LoopUnrollLimit must fit in 32bits");
if (trip_count > unroll_limit || _body.size() > unroll_limit) {
return false;
}
uint new_body_size = est_loop_unroll_sz(trip_count);
if (new_body_size == UINT_MAX) { // Check for bad estimate (overflow).
return false;
}
// Fully unroll a loop with few iterations, regardless of other conditions,
// since the following (general) loop optimizations will split such loop in
// any case (into pre-main-post).
if (trip_count <= 3) {
return phase->may_require_nodes(new_body_size);
}
// Reject if unrolling will result in too much node construction.
if (new_body_size > unroll_limit || phase->exceeding_node_budget(new_body_size)) {
return false;
}
// Do not unroll a loop with String intrinsics code.
// String intrinsics are large and have loops.
for (uint k = 0; k < _body.size(); k++) {
Node* n = _body.at(k);
switch (n->Opcode()) {
case Op_StrComp:
case Op_StrEquals:
case Op_StrIndexOf:
case Op_StrIndexOfChar:
case Op_EncodeISOArray:
case Op_AryEq:
case Op_CountPositives: {
return false;
}
#if INCLUDE_RTM_OPT
case Op_FastLock:
case Op_FastUnlock: {
// Don't unroll RTM locking code because it is large.
if (UseRTMLocking) {
return false;
}
}
#endif
} // switch
}
return phase->may_require_nodes(new_body_size);
}
//------------------------------policy_unroll----------------------------------
// Return TRUE or FALSE if the loop should be unrolled or not. Apply unroll if
// the loop is a counted loop and the loop body is small enough. When TRUE,
// the estimated node budget is also requested.
bool IdealLoopTree::policy_unroll(PhaseIdealLoop *phase) {
CountedLoopNode *cl = _head->as_CountedLoop();
assert(cl->is_normal_loop() || cl->is_main_loop(), "");
if (!cl->is_valid_counted_loop(T_INT)) {
return false; // Malformed counted loop
}
// If nodes are depleted, some transform has miscalculated its needs.
assert(!phase->exceeding_node_budget(), "sanity");
// Protect against over-unrolling.
// After split at least one iteration will be executed in pre-loop.
if (cl->trip_count() <= (cl->is_normal_loop() ? 2u : 1u)) {
return false;
}
_local_loop_unroll_limit = LoopUnrollLimit;
_local_loop_unroll_factor = 4;
int future_unroll_cnt = cl->unrolled_count() * 2;
if (!cl->is_vectorized_loop()) {
if (future_unroll_cnt > LoopMaxUnroll) return false;
} else {
// obey user constraints on vector mapped loops with additional unrolling applied
int unroll_constraint = (cl->slp_max_unroll()) ? cl->slp_max_unroll() : 1;
if ((future_unroll_cnt / unroll_constraint) > LoopMaxUnroll) return false;
}
const int stride_con = cl->stride_con();
// Check for initial stride being a small enough constant
const int initial_stride_sz = MAX2(1<<2, Matcher::max_vector_size(T_BYTE) / 2);
// Maximum stride size should protect against overflow, when doubling stride unroll_count times
const int max_stride_size = MIN2<int>(max_jint / 2 - 2, initial_stride_sz * future_unroll_cnt);
// No abs() use; abs(min_jint) = min_jint
if (stride_con < -max_stride_size || stride_con > max_stride_size) return false;
// Don't unroll if the next round of unrolling would push us
// over the expected trip count of the loop. One is subtracted
// from the expected trip count because the pre-loop normally
// executes 1 iteration.
if (UnrollLimitForProfileCheck > 0 &&
cl->profile_trip_cnt() != COUNT_UNKNOWN &&
future_unroll_cnt > UnrollLimitForProfileCheck &&
(float)future_unroll_cnt > cl->profile_trip_cnt() - 1.0) {
return false;
}
bool should_unroll = true;
// When unroll count is greater than LoopUnrollMin, don't unroll if:
// the residual iterations are more than 10% of the trip count
// and rounds of "unroll,optimize" are not making significant progress
// Progress defined as current size less than 20% larger than previous size.
if (UseSuperWord && cl->node_count_before_unroll() > 0 &&
future_unroll_cnt > LoopUnrollMin &&
is_residual_iters_large(future_unroll_cnt, cl) &&
1.2 * cl->node_count_before_unroll() < (double)_body.size()) {
if ((cl->slp_max_unroll() == 0) && !is_residual_iters_large(cl->unrolled_count(), cl)) {
// cl->slp_max_unroll() = 0 means that the previous slp analysis never passed.
// slp analysis may fail due to the loop IR is too complicated especially during the early stage
// of loop unrolling analysis. But after several rounds of loop unrolling and other optimizations,
// it's possible that the loop IR becomes simple enough to pass the slp analysis.
// So we don't return immediately in hoping that the next slp analysis can succeed.
should_unroll = false;
future_unroll_cnt = cl->unrolled_count();
} else {
return false;
}
}
Node *init_n = cl->init_trip();
Node *limit_n = cl->limit();
if (limit_n == NULL) return false; // We will dereference it below.
// Non-constant bounds.
// Protect against over-unrolling when init or/and limit are not constant
// (so that trip_count's init value is maxint) but iv range is known.
if (init_n == NULL || !init_n->is_Con() || !limit_n->is_Con()) {
Node* phi = cl->phi();
if (phi != NULL) {
assert(phi->is_Phi() && phi->in(0) == _head, "Counted loop should have iv phi.");
const TypeInt* iv_type = phase->_igvn.type(phi)->is_int();
int next_stride = stride_con * 2; // stride after this unroll
if (next_stride > 0) {
if (iv_type->_lo > max_jint - next_stride || // overflow
iv_type->_lo + next_stride > iv_type->_hi) {
return false; // over-unrolling
}
} else if (next_stride < 0) {
if (iv_type->_hi < min_jint - next_stride || // overflow
iv_type->_hi + next_stride < iv_type->_lo) {
return false; // over-unrolling
}
}
}
}
// After unroll limit will be adjusted: new_limit = limit-stride.
// Bailout if adjustment overflow.
const TypeInt* limit_type = phase->_igvn.type(limit_n)->is_int();
if ((stride_con > 0 && ((min_jint + stride_con) > limit_type->_hi)) ||
(stride_con < 0 && ((max_jint + stride_con) < limit_type->_lo)))
return false; // overflow
// Rudimentary cost model to estimate loop unrolling
// factor.
// Adjust body_size to determine if we unroll or not
uint body_size = _body.size();
// Key test to unroll loop in CRC32 java code
int xors_in_loop = 0;
// Also count ModL, DivL and MulL which expand mightly
for (uint k = 0; k < _body.size(); k++) {
Node* n = _body.at(k);
switch (n->Opcode()) {
case Op_XorI: xors_in_loop++; break; // CRC32 java code
case Op_ModL: body_size += 30; break;
case Op_DivL: body_size += 30; break;
case Op_MulL: body_size += 10; break;
case Op_RoundF:
case Op_RoundD: {
body_size += Matcher::scalar_op_pre_select_sz_estimate(n->Opcode(), n->bottom_type()->basic_type());
} break;
case Op_CountTrailingZerosV:
case Op_CountLeadingZerosV:
case Op_ReverseV:
case Op_RoundVF:
case Op_RoundVD:
case Op_VectorCastD2X:
case Op_VectorCastF2X:
case Op_PopCountVI:
case Op_PopCountVL: {
const TypeVect* vt = n->bottom_type()->is_vect();
body_size += Matcher::vector_op_pre_select_sz_estimate(n->Opcode(), vt->element_basic_type(), vt->length());
} break;
case Op_StrComp:
case Op_StrEquals:
case Op_StrIndexOf:
case Op_StrIndexOfChar:
case Op_EncodeISOArray:
case Op_AryEq:
case Op_CountPositives: {
// Do not unroll a loop with String intrinsics code.
// String intrinsics are large and have loops.
return false;
}
#if INCLUDE_RTM_OPT
case Op_FastLock:
case Op_FastUnlock: {
// Don't unroll RTM locking code because it is large.
if (UseRTMLocking) {
return false;
}
}
#endif
} // switch
}
if (UseSuperWord) {
if (!cl->is_reduction_loop()) {
phase->mark_reductions(this);
}
// Only attempt slp analysis when user controls do not prohibit it
if (!cl->range_checks_present() && (LoopMaxUnroll > _local_loop_unroll_factor)) {
// Once policy_slp_analysis succeeds, mark the loop with the
// maximal unroll factor so that we minimize analysis passes
if (future_unroll_cnt >= _local_loop_unroll_factor) {
policy_unroll_slp_analysis(cl, phase, future_unroll_cnt);
}
}
}
int slp_max_unroll_factor = cl->slp_max_unroll();
if ((LoopMaxUnroll < slp_max_unroll_factor) && FLAG_IS_DEFAULT(LoopMaxUnroll) && UseSubwordForMaxVector) {
LoopMaxUnroll = slp_max_unroll_factor;
}
uint estimate = est_loop_clone_sz(2);
if (cl->has_passed_slp()) {
if (slp_max_unroll_factor >= future_unroll_cnt) {
return should_unroll && phase->may_require_nodes(estimate);
}
return false; // Loop too big.
}
// Check for being too big
if (body_size > (uint)_local_loop_unroll_limit) {
if ((cl->is_subword_loop() || xors_in_loop >= 4) && body_size < 4u * LoopUnrollLimit) {
return should_unroll && phase->may_require_nodes(estimate);
}
return false; // Loop too big.
}
if (cl->is_unroll_only()) {
if (TraceSuperWordLoopUnrollAnalysis) {
tty->print_cr("policy_unroll passed vector loop(vlen=%d, factor=%d)\n",
slp_max_unroll_factor, future_unroll_cnt);
}
}
// Unroll once! (Each trip will soon do double iterations)
return should_unroll && phase->may_require_nodes(estimate);
}
void IdealLoopTree::policy_unroll_slp_analysis(CountedLoopNode *cl, PhaseIdealLoop *phase, int future_unroll_cnt) {
// If nodes are depleted, some transform has miscalculated its needs.
assert(!phase->exceeding_node_budget(), "sanity");
// Enable this functionality target by target as needed
if (SuperWordLoopUnrollAnalysis) {
if (!cl->was_slp_analyzed()) {
SuperWord sw(phase);
sw.transform_loop(this, false);
// If the loop is slp canonical analyze it
if (sw.early_return() == false) {
sw.unrolling_analysis(_local_loop_unroll_factor);
}
}
if (cl->has_passed_slp()) {
int slp_max_unroll_factor = cl->slp_max_unroll();
if (slp_max_unroll_factor >= future_unroll_cnt) {
int new_limit = cl->node_count_before_unroll() * slp_max_unroll_factor;
if (new_limit > LoopUnrollLimit) {
if (TraceSuperWordLoopUnrollAnalysis) {
tty->print_cr("slp analysis unroll=%d, default limit=%d\n", new_limit, _local_loop_unroll_limit);
}
_local_loop_unroll_limit = new_limit;
}
}
}
}
}
//------------------------------policy_range_check-----------------------------
// Return TRUE or FALSE if the loop should be range-check-eliminated or not.
// When TRUE, the estimated node budget is also requested.
//
// We will actually perform iteration-splitting, a more powerful form of RCE.
bool IdealLoopTree::policy_range_check(PhaseIdealLoop* phase, bool provisional, BasicType bt) const {
if (!provisional && !RangeCheckElimination) return false;
// If nodes are depleted, some transform has miscalculated its needs.
assert(provisional || !phase->exceeding_node_budget(), "sanity");
if (_head->is_CountedLoop()) {
CountedLoopNode *cl = _head->as_CountedLoop();
// If we unrolled with no intention of doing RCE and we later changed our
// minds, we got no pre-loop. Either we need to make a new pre-loop, or we
// have to disallow RCE.
if (cl->is_main_no_pre_loop()) return false; // Disallowed for now.
// check for vectorized loops, some opts are no longer needed
// RCE needs pre/main/post loops. Don't apply it on a single iteration loop.
if (cl->is_unroll_only() || (cl->is_normal_loop() && cl->trip_count() == 1)) return false;
} else {
assert(provisional, "no long counted loop expected");
}
BaseCountedLoopNode* cl = _head->as_BaseCountedLoop();
Node *trip_counter = cl->phi();
assert(!cl->is_LongCountedLoop() || bt == T_LONG, "only long range checks in long counted loops");
assert(cl->is_valid_counted_loop(cl->bt()), "only for well formed loops");
// Check loop body for tests of trip-counter plus loop-invariant vs
// loop-invariant.
for (uint i = 0; i < _body.size(); i++) {
Node *iff = _body[i];
if (iff->Opcode() == Op_If ||
iff->Opcode() == Op_RangeCheck) { // Test?
// Comparing trip+off vs limit
Node *bol = iff->in(1);
if (bol->req() != 2) {
continue; // dead constant test
}
if (!bol->is_Bool()) {
assert(bol->Opcode() == Op_Conv2B, "predicate check only");
continue;
}
if (bol->as_Bool()->_test._test == BoolTest::ne) {
continue; // not RC
}
Node *cmp = bol->in(1);
if (provisional) {
// Try to pattern match with either cmp inputs, do not check
// whether one of the inputs is loop independent as it may not
// have had a chance to be hoisted yet.
if (!phase->is_scaled_iv_plus_offset(cmp->in(1), trip_counter, bt, NULL, NULL) &&
!phase->is_scaled_iv_plus_offset(cmp->in(2), trip_counter, bt, NULL, NULL)) {
continue;
}
} else {
Node *rc_exp = cmp->in(1);
Node *limit = cmp->in(2);
Node *limit_c = phase->get_ctrl(limit);
if (limit_c == phase->C->top()) {
return false; // Found dead test on live IF? No RCE!
}
if (is_member(phase->get_loop(limit_c))) {
// Compare might have operands swapped; commute them
rc_exp = cmp->in(2);
limit = cmp->in(1);
limit_c = phase->get_ctrl(limit);
if (is_member(phase->get_loop(limit_c))) {
continue; // Both inputs are loop varying; cannot RCE
}
}
if (!phase->is_scaled_iv_plus_offset(rc_exp, trip_counter, bt, NULL, NULL)) {
continue;
}
}
// Found a test like 'trip+off vs limit'. Test is an IfNode, has two (2)
// projections. If BOTH are in the loop we need loop unswitching instead
// of iteration splitting.
if (is_loop_exit(iff)) {
// Found valid reason to split iterations (if there is room).
// NOTE: Usually a gross overestimate.
// Long range checks cause the loop to be transformed in a loop nest which only causes a fixed number of nodes
// to be added
return provisional || bt == T_LONG || phase->may_require_nodes(est_loop_clone_sz(2));
}
} // End of is IF
}
return false;
}
//------------------------------policy_peel_only-------------------------------
// Return TRUE or FALSE if the loop should NEVER be RCE'd or aligned. Useful
// for unrolling loops with NO array accesses.
bool IdealLoopTree::policy_peel_only(PhaseIdealLoop *phase) const {
// If nodes are depleted, some transform has miscalculated its needs.
assert(!phase->exceeding_node_budget(), "sanity");
// check for vectorized loops, any peeling done was already applied
if (_head->is_CountedLoop() && _head->as_CountedLoop()->is_unroll_only()) {
return false;
}
for (uint i = 0; i < _body.size(); i++) {
if (_body[i]->is_Mem()) {
return false;
}
}
// No memory accesses at all!
return true;
}
//------------------------------clone_up_backedge_goo--------------------------
// If Node n lives in the back_ctrl block and cannot float, we clone a private
// version of n in preheader_ctrl block and return that, otherwise return n.
Node *PhaseIdealLoop::clone_up_backedge_goo(Node *back_ctrl, Node *preheader_ctrl, Node *n, VectorSet &visited, Node_Stack &clones) {
if (get_ctrl(n) != back_ctrl) return n;
// Only visit once
if (visited.test_set(n->_idx)) {
Node *x = clones.find(n->_idx);
return (x != NULL) ? x : n;
}
Node *x = NULL; // If required, a clone of 'n'
// Check for 'n' being pinned in the backedge.
if (n->in(0) && n->in(0) == back_ctrl) {
assert(clones.find(n->_idx) == NULL, "dead loop");
x = n->clone(); // Clone a copy of 'n' to preheader
clones.push(x, n->_idx);
x->set_req(0, preheader_ctrl); // Fix x's control input to preheader
}
// Recursive fixup any other input edges into x.
// If there are no changes we can just return 'n', otherwise
// we need to clone a private copy and change it.
for (uint i = 1; i < n->req(); i++) {
Node *g = clone_up_backedge_goo(back_ctrl, preheader_ctrl, n->in(i), visited, clones);
if (g != n->in(i)) {
if (!x) {
assert(clones.find(n->_idx) == NULL, "dead loop");
x = n->clone();
clones.push(x, n->_idx);
}
x->set_req(i, g);
}
}
if (x) { // x can legally float to pre-header location
register_new_node(x, preheader_ctrl);
return x;
} else { // raise n to cover LCA of uses
set_ctrl(n, find_non_split_ctrl(back_ctrl->in(0)));
}
return n;
}
Node* PhaseIdealLoop::cast_incr_before_loop(Node* incr, Node* ctrl, Node* loop) {
Node* castii = new CastIINode(incr, TypeInt::INT, ConstraintCastNode::UnconditionalDependency);
castii->set_req(0, ctrl);
register_new_node(castii, ctrl);
for (DUIterator_Fast imax, i = incr->fast_outs(imax); i < imax; i++) {
Node* n = incr->fast_out(i);
if (n->is_Phi() && n->in(0) == loop) {
int nrep = n->replace_edge(incr, castii, &_igvn);
return castii;
}
}
return NULL;
}
#ifdef ASSERT
void PhaseIdealLoop::ensure_zero_trip_guard_proj(Node* node, bool is_main_loop) {
assert(node->is_IfProj(), "must be the zero trip guard If node");
Node* zer_bol = node->in(0)->in(1);
assert(zer_bol != NULL && zer_bol->is_Bool(), "must be Bool");
Node* zer_cmp = zer_bol->in(1);
assert(zer_cmp != NULL && zer_cmp->Opcode() == Op_CmpI, "must be CmpI");
// For the main loop, the opaque node is the second input to zer_cmp, for the post loop it's the first input node
Node* zer_opaq = zer_cmp->in(is_main_loop ? 2 : 1);
assert(zer_opaq != NULL && zer_opaq->Opcode() == Op_OpaqueZeroTripGuard, "must be OpaqueZeroTripGuard");
}
#endif
// Make a copy of the skeleton range check predicates before the main
// loop and set the initial value of loop as input. After unrolling,
// the range of values for the induction variable in the main loop can
// fall outside the allowed range of values by the array access (main
// loop is never executed). When that happens, range check
// CastII/ConvI2L nodes cause some data paths to die. For consistency,
// the control paths must die too but the range checks were removed by
// predication. The range checks that we add here guarantee that they do.
void PhaseIdealLoop::copy_skeleton_predicates_to_main_loop_helper(Node* predicate, Node* init, Node* stride,
IdealLoopTree* outer_loop, LoopNode* outer_main_head,
uint dd_main_head, const uint idx_before_pre_post,
const uint idx_after_post_before_pre, Node* zero_trip_guard_proj_main,
Node* zero_trip_guard_proj_post, const Node_List &old_new) {
if (predicate != NULL) {
#ifdef ASSERT
ensure_zero_trip_guard_proj(zero_trip_guard_proj_main, true);
ensure_zero_trip_guard_proj(zero_trip_guard_proj_post, false);
#endif
IfNode* iff = predicate->in(0)->as_If();
ProjNode* uncommon_proj = iff->proj_out(1 - predicate->as_Proj()->_con);
Node* rgn = uncommon_proj->unique_ctrl_out();
assert(rgn->is_Region() || rgn->is_Call(), "must be a region or call uct");
assert(iff->in(1)->in(1)->Opcode() == Op_Opaque1, "unexpected predicate shape");
predicate = iff->in(0);
Node* current_proj = outer_main_head->in(LoopNode::EntryControl);
Node* prev_proj = current_proj;
Node* opaque_init = new OpaqueLoopInitNode(C, init);
register_new_node(opaque_init, outer_main_head->in(LoopNode::EntryControl));
Node* opaque_stride = new OpaqueLoopStrideNode(C, stride);
register_new_node(opaque_stride, outer_main_head->in(LoopNode::EntryControl));
while (predicate != NULL && predicate->is_Proj() && predicate->in(0)->is_If()) {
iff = predicate->in(0)->as_If();
uncommon_proj = iff->proj_out(1 - predicate->as_Proj()->_con);
if (uncommon_proj->unique_ctrl_out() != rgn)
break;
if (iff->in(1)->Opcode() == Op_Opaque4) {
assert(skeleton_predicate_has_opaque(iff), "unexpected");
// Clone the skeleton predicate twice and initialize one with the initial
// value of the loop induction variable. Leave the other predicate
// to be initialized when increasing the stride during loop unrolling.
prev_proj = clone_skeleton_predicate_and_initialize(iff, opaque_init, NULL, predicate, uncommon_proj,
current_proj, outer_loop, prev_proj);
assert(skeleton_predicate_has_opaque(prev_proj->in(0)->as_If()), "");
prev_proj = clone_skeleton_predicate_and_initialize(iff, init, stride, predicate, uncommon_proj,
current_proj, outer_loop, prev_proj);
assert(!skeleton_predicate_has_opaque(prev_proj->in(0)->as_If()), "");
// Rewire any control inputs from the cloned skeleton predicates down to the main and post loop for data nodes that are part of the
// main loop (and were cloned to the pre and post loop).
for (DUIterator i = predicate->outs(); predicate->has_out(i); i++) {
Node* loop_node = predicate->out(i);
Node* pre_loop_node = old_new[loop_node->_idx];
// Change the control if 'loop_node' is part of the main loop. If there is an old->new mapping and the index of
// 'pre_loop_node' is greater than idx_before_pre_post, then we know that 'loop_node' was cloned and is part of
// the main loop (and 'pre_loop_node' is part of the pre loop).
if (!loop_node->is_CFG() && (pre_loop_node != NULL && pre_loop_node->_idx > idx_after_post_before_pre)) {
// 'loop_node' is a data node and part of the main loop. Rewire the control to the projection of the zero-trip guard if node
// of the main loop that is immediately preceding the cloned predicates.
_igvn.replace_input_of(loop_node, 0, zero_trip_guard_proj_main);
--i;
} else if (loop_node->_idx > idx_before_pre_post && loop_node->_idx < idx_after_post_before_pre) {
// 'loop_node' is a data node and part of the post loop. Rewire the control to the projection of the zero-trip guard if node
// of the post loop that is immediately preceding the post loop header node (there are no cloned predicates for the post loop).
assert(pre_loop_node == NULL, "a node belonging to the post loop should not have an old_new mapping at this stage");
_igvn.replace_input_of(loop_node, 0, zero_trip_guard_proj_post);
--i;
}
}
// Remove the skeleton predicate from the pre-loop
_igvn.replace_input_of(iff, 1, _igvn.intcon(1));
}
predicate = predicate->in(0)->in(0);
}
_igvn.replace_input_of(outer_main_head, LoopNode::EntryControl, prev_proj);
set_idom(outer_main_head, prev_proj, dd_main_head);
}
}
static bool skeleton_follow_inputs(Node* n) {
int op = n->Opcode();
return (n->is_Bool() ||
n->is_Cmp() ||
op == Op_AndL ||
op == Op_OrL ||
op == Op_RShiftL ||
op == Op_LShiftL ||
op == Op_LShiftI ||
op == Op_AddL ||
op == Op_AddI ||
op == Op_MulL ||
op == Op_MulI ||
op == Op_SubL ||
op == Op_SubI ||
op == Op_ConvI2L ||
op == Op_CastII);
}
bool PhaseIdealLoop::subgraph_has_opaque(Node* n) {
if (n->Opcode() == Op_OpaqueLoopInit || n->Opcode() == Op_OpaqueLoopStride) {
return true;
}
if (!skeleton_follow_inputs(n)) {
return false;
}
uint init;
uint stride;
count_opaque_loop_nodes(n, init, stride);
return init != 0 || stride != 0;
}
bool PhaseIdealLoop::skeleton_predicate_has_opaque(IfNode* iff) {
uint init;
uint stride;
count_opaque_loop_nodes(iff->in(1)->in(1), init, stride);
#ifdef ASSERT
ResourceMark rm;
Unique_Node_List wq;
wq.clear();
wq.push(iff->in(1)->in(1));
uint verif_init = 0;
uint verif_stride = 0;
for (uint i = 0; i < wq.size(); i++) {
Node* n = wq.at(i);
int op = n->Opcode();
if (!n->is_CFG()) {
if (n->Opcode() == Op_OpaqueLoopInit) {
verif_init++;
} else if (n->Opcode() == Op_OpaqueLoopStride) {
verif_stride++;
} else {
for (uint j = 1; j < n->req(); j++) {
Node* m = n->in(j);
if (m != NULL) {
wq.push(m);
}
}
}
}
}
assert(init == verif_init && stride == verif_stride, "missed opaque node");
#endif
assert(stride == 0 || init != 0, "init should be there every time stride is");
return init != 0;
}
void PhaseIdealLoop::count_opaque_loop_nodes(Node* n, uint& init, uint& stride) {
init = 0;
stride = 0;
ResourceMark rm;
Unique_Node_List wq;
wq.push(n);
for (uint i = 0; i < wq.size(); i++) {
Node* n = wq.at(i);
if (skeleton_follow_inputs(n)) {
for (uint j = 1; j < n->req(); j++) {
Node* m = n->in(j);
if (m != NULL) {
wq.push(m);
}
}
continue;
}
if (n->Opcode() == Op_OpaqueLoopInit) {
init++;
} else if (n->Opcode() == Op_OpaqueLoopStride) {
stride++;
}
}
}
// Clone the skeleton predicate bool for a main or unswitched loop:
// Main loop: Set new_init and new_stride nodes as new inputs.
// Unswitched loop: new_init and new_stride are both NULL. Clone OpaqueLoopInit and OpaqueLoopStride instead.
Node* PhaseIdealLoop::clone_skeleton_predicate_bool(Node* iff, Node* new_init, Node* new_stride, Node* control) {
Node_Stack to_clone(2);
to_clone.push(iff->in(1), 1);
uint current = C->unique();
Node* result = NULL;
bool is_unswitched_loop = new_init == NULL && new_stride == NULL;
assert(new_init != NULL || is_unswitched_loop, "new_init must be set when new_stride is non-null");
// Look for the opaque node to replace with the new value
// and clone everything in between. We keep the Opaque4 node
// so the duplicated predicates are eliminated once loop
// opts are over: they are here only to keep the IR graph
// consistent.
do {
Node* n = to_clone.node();
uint i = to_clone.index();
Node* m = n->in(i);
if (skeleton_follow_inputs(m)) {
to_clone.push(m, 1);
continue;
}
if (m->is_Opaque1()) {
if (n->_idx < current) {
n = n->clone();
register_new_node(n, control);
}
int op = m->Opcode();
if (op == Op_OpaqueLoopInit) {
if (is_unswitched_loop && m->_idx < current && new_init == NULL) {
new_init = m->clone();
register_new_node(new_init, control);
}
n->set_req(i, new_init);
} else {
assert(op == Op_OpaqueLoopStride, "unexpected opaque node");
if (is_unswitched_loop && m->_idx < current && new_stride == NULL) {
new_stride = m->clone();
register_new_node(new_stride, control);
}
if (new_stride != NULL) {
n->set_req(i, new_stride);
}
}
to_clone.set_node(n);
}
while (true) {
Node* cur = to_clone.node();
uint j = to_clone.index();
if (j+1 < cur->req()) {
to_clone.set_index(j+1);
break;
}
to_clone.pop();
if (to_clone.size() == 0) {
result = cur;
break;
}
Node* next = to_clone.node();
j = to_clone.index();
if (next->in(j) != cur) {
assert(cur->_idx >= current || next->in(j)->Opcode() == Op_Opaque1, "new node or Opaque1 being replaced");
if (next->_idx < current) {
next = next->clone();
register_new_node(next, control);
to_clone.set_node(next);
}
next->set_req(j, cur);
}
}
} while (result == NULL);
assert(result->_idx >= current, "new node expected");
assert(!is_unswitched_loop || new_init != NULL, "new_init must always be found and cloned");
return result;
}
// Clone a skeleton predicate for the main loop. new_init and new_stride are set as new inputs. Since the predicates cannot fail at runtime,
// Halt nodes are inserted instead of uncommon traps.
Node* PhaseIdealLoop::clone_skeleton_predicate_and_initialize(Node* iff, Node* new_init, Node* new_stride, Node* predicate, Node* uncommon_proj,
Node* control, IdealLoopTree* outer_loop, Node* input_proj) {
Node* result = clone_skeleton_predicate_bool(iff, new_init, new_stride, control);
Node* proj = predicate->clone();
Node* other_proj = uncommon_proj->clone();
Node* new_iff = iff->clone();
new_iff->set_req(1, result);
proj->set_req(0, new_iff);
other_proj->set_req(0, new_iff);
Node* frame = new ParmNode(C->start(), TypeFunc::FramePtr);
register_new_node(frame, C->start());
// It's impossible for the predicate to fail at runtime. Use an Halt node.
Node* halt = new HaltNode(other_proj, frame, "duplicated predicate failed which is impossible");
_igvn.add_input_to(C->root(), halt);
new_iff->set_req(0, input_proj);
register_control(new_iff, outer_loop == _ltree_root ? _ltree_root : outer_loop->_parent, input_proj);
register_control(proj, outer_loop == _ltree_root ? _ltree_root : outer_loop->_parent, new_iff);
register_control(other_proj, _ltree_root, new_iff);
register_control(halt, _ltree_root, other_proj);
return proj;
}
void PhaseIdealLoop::copy_skeleton_predicates_to_main_loop(CountedLoopNode* pre_head, Node* init, Node* stride,
IdealLoopTree* outer_loop, LoopNode* outer_main_head,
uint dd_main_head, const uint idx_before_pre_post,
const uint idx_after_post_before_pre, Node* zero_trip_guard_proj_main,
Node* zero_trip_guard_proj_post, const Node_List &old_new) {
if (UseLoopPredicate) {
Node* entry = pre_head->in(LoopNode::EntryControl);
Node* predicate = NULL;
predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_loop_limit_check);
if (predicate != NULL) {
entry = skip_loop_predicates(entry);
}
Node* profile_predicate = NULL;
if (UseProfiledLoopPredicate) {
profile_predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_profile_predicate);
if (profile_predicate != NULL) {
entry = skip_loop_predicates(entry);
}
}
predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_predicate);
copy_skeleton_predicates_to_main_loop_helper(predicate, init, stride, outer_loop, outer_main_head, dd_main_head,
idx_before_pre_post, idx_after_post_before_pre, zero_trip_guard_proj_main,
zero_trip_guard_proj_post, old_new);
copy_skeleton_predicates_to_main_loop_helper(profile_predicate, init, stride, outer_loop, outer_main_head, dd_main_head,
idx_before_pre_post, idx_after_post_before_pre, zero_trip_guard_proj_main,
zero_trip_guard_proj_post, old_new);
}
}
//------------------------------insert_pre_post_loops--------------------------
// Insert pre and post loops. If peel_only is set, the pre-loop can not have
// more iterations added. It acts as a 'peel' only, no lower-bound RCE, no
// alignment. Useful to unroll loops that do no array accesses.
void PhaseIdealLoop::insert_pre_post_loops(IdealLoopTree *loop, Node_List &old_new, bool peel_only) {
#ifndef PRODUCT
if (TraceLoopOpts) {
if (peel_only)
tty->print("PeelMainPost ");
else
tty->print("PreMainPost ");
loop->dump_head();
}
#endif
C->set_major_progress();
// Find common pieces of the loop being guarded with pre & post loops
CountedLoopNode *main_head = loop->_head->as_CountedLoop();
assert(main_head->is_normal_loop(), "");
CountedLoopEndNode *main_end = main_head->loopexit();
assert(main_end->outcnt() == 2, "1 true, 1 false path only");
Node *pre_header= main_head->in(LoopNode::EntryControl);
Node *init = main_head->init_trip();
Node *incr = main_end ->incr();
Node *limit = main_end ->limit();
Node *stride = main_end ->stride();
Node *cmp = main_end ->cmp_node();
BoolTest::mask b_test = main_end->test_trip();
// Need only 1 user of 'bol' because I will be hacking the loop bounds.
Node *bol = main_end->in(CountedLoopEndNode::TestValue);
if (bol->outcnt() != 1) {
bol = bol->clone();
register_new_node(bol,main_end->in(CountedLoopEndNode::TestControl));
_igvn.replace_input_of(main_end, CountedLoopEndNode::TestValue, bol);
}
// Need only 1 user of 'cmp' because I will be hacking the loop bounds.
if (cmp->outcnt() != 1) {
cmp = cmp->clone();
register_new_node(cmp,main_end->in(CountedLoopEndNode::TestControl));
_igvn.replace_input_of(bol, 1, cmp);
}
// Add the post loop
const uint idx_before_pre_post = Compile::current()->unique();
CountedLoopNode *post_head = NULL;
Node* post_incr = incr;
Node* main_exit = insert_post_loop(loop, old_new, main_head, main_end, post_incr, limit, post_head);
const uint idx_after_post_before_pre = Compile::current()->unique();
//------------------------------
// Step B: Create Pre-Loop.
// Step B1: Clone the loop body. The clone becomes the pre-loop. The main
// loop pre-header illegally has 2 control users (old & new loops).
LoopNode* outer_main_head = main_head;
IdealLoopTree* outer_loop = loop;
if (main_head->is_strip_mined()) {
main_head->verify_strip_mined(1);
outer_main_head = main_head->outer_loop();
outer_loop = loop->_parent;
assert(outer_loop->_head == outer_main_head, "broken loop tree");
}
uint dd_main_head = dom_depth(outer_main_head);
clone_loop(loop, old_new, dd_main_head, ControlAroundStripMined);
CountedLoopNode* pre_head = old_new[main_head->_idx]->as_CountedLoop();
CountedLoopEndNode* pre_end = old_new[main_end ->_idx]->as_CountedLoopEnd();
pre_head->set_pre_loop(main_head);
Node *pre_incr = old_new[incr->_idx];
// Reduce the pre-loop trip count.
pre_end->_prob = PROB_FAIR;
// Find the pre-loop normal exit.
--> --------------------
--> maximum size reached
--> --------------------
¤ Dauer der Verarbeitung: 0.165 Sekunden
(vorverarbeitet)
¤
|
Haftungshinweis
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung ist noch experimentell.
|