/*
* Copyright (c) 1998, 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#include "precompiled.hpp"
#include "ci/ciMethodData.hpp"
#include "compiler/compileLog.hpp"
#include "gc/shared/barrierSet.hpp"
#include "gc/shared/c2/barrierSetC2.hpp"
#include "libadt/vectset.hpp"
#include "memory/allocation.inline.hpp"
#include "memory/resourceArea.hpp"
#include "opto/addnode.hpp"
#include "opto/arraycopynode.hpp"
#include "opto/callnode.hpp"
#include "opto/castnode.hpp"
#include "opto/connode.hpp"
#include "opto/convertnode.hpp"
#include "opto/divnode.hpp"
#include "opto/idealGraphPrinter.hpp"
#include "opto/loopnode.hpp"
#include "opto/movenode.hpp"
#include "opto/mulnode.hpp"
#include "opto/opaquenode.hpp"
#include "opto/rootnode.hpp"
#include "opto/runtime.hpp"
#include "opto/superword.hpp"
#include "runtime/sharedRuntime.hpp"
#include "utilities/powerOfTwo.hpp"
//=============================================================================
//--------------------------is_cloop_ind_var-----------------------------------
// Determine if a node is a counted loop induction variable.
// NOTE: The method is declared in "node.hpp".
bool Node::is_cloop_ind_var() const {
return (is_Phi() &&
as_Phi()->region()->is_CountedLoop() &&
as_Phi()->region()->as_CountedLoop()->phi() == this);
}
//=============================================================================
//------------------------------dump_spec--------------------------------------
// Dump special per-node info
#ifndef PRODUCT
void LoopNode::dump_spec(outputStream *st) const {
if (is_inner_loop()) st->print( "inner " );
if (is_partial_peel_loop()) st->print( "partial_peel " );
if (partial_peel_has_failed()) st->print( "partial_peel_failed " );
}
#endif
//------------------------------is_valid_counted_loop-------------------------
bool LoopNode::is_valid_counted_loop(BasicType bt) const {
if (is_BaseCountedLoop() && as_BaseCountedLoop()->bt() == bt) {
BaseCountedLoopNode* l = as_BaseCountedLoop();
BaseCountedLoopEndNode* le = l->loopexit_or_null();
if (le != NULL &&
le->proj_out_or_null(1 /* true */) == l->in(LoopNode::LoopBackControl)) {
Node* phi = l->phi();
Node* exit = le->proj_out_or_null(0 /* false */);
if (exit != NULL && exit->Opcode() == Op_IfFalse &&
phi != NULL && phi->is_Phi() &&
phi->in(LoopNode::LoopBackControl) == l->incr() &&
le->loopnode() == l && le->stride_is_con()) {
return true;
}
}
}
return false;
}
//------------------------------get_early_ctrl---------------------------------
// Compute earliest legal control
Node *PhaseIdealLoop::get_early_ctrl( Node *n ) {
assert( !n->is_Phi() && !n->is_CFG(), "this code only handles data nodes" );
uint i;
Node *early;
if (n->in(0) && !n->is_expensive()) {
early = n->in(0);
if (!early->is_CFG()) // Might be a non-CFG multi-def
early = get_ctrl(early); // So treat input as a straight data input
i = 1;
} else {
early = get_ctrl(n->in(1));
i = 2;
}
uint e_d = dom_depth(early);
assert( early, "" );
for (; i < n->req(); i++) {
Node *cin = get_ctrl(n->in(i));
assert( cin, "" );
// Keep deepest dominator depth
uint c_d = dom_depth(cin);
if (c_d > e_d) { // Deeper guy?
early = cin; // Keep deepest found so far
e_d = c_d;
} else if (c_d == e_d && // Same depth?
early != cin) { // If not equal, must use slower algorithm
// If same depth but not equal, one _must_ dominate the other
// and we want the deeper (i.e., dominated) guy.
Node *n1 = early;
Node *n2 = cin;
while (1) {
n1 = idom(n1); // Walk up until break cycle
n2 = idom(n2);
if (n1 == cin || // Walked early up to cin
dom_depth(n2) < c_d)
break; // early is deeper; keep him
if (n2 == early || // Walked cin up to early
dom_depth(n1) < c_d) {
early = cin; // cin is deeper; keep him
break;
}
}
e_d = dom_depth(early); // Reset depth register cache
}
}
// Return earliest legal location
assert(early == find_non_split_ctrl(early), "unexpected early control");
if (n->is_expensive() && !_verify_only && !_verify_me) {
assert(n->in(0), "should have control input");
early = get_early_ctrl_for_expensive(n, early);
}
return early;
}
//------------------------------get_early_ctrl_for_expensive---------------------------------
// Move node up the dominator tree as high as legal while still beneficial
Node *PhaseIdealLoop::get_early_ctrl_for_expensive(Node *n, Node* earliest) {
assert(n->in(0) && n->is_expensive(), "expensive node with control input here");
assert(OptimizeExpensiveOps, "optimization off?");
Node* ctl = n->in(0);
assert(ctl->is_CFG(), "expensive input 0 must be cfg");
uint min_dom_depth = dom_depth(earliest);
#ifdef ASSERT
if (!is_dominator(ctl, earliest) && !is_dominator(earliest, ctl)) {
dump_bad_graph("Bad graph detected in get_early_ctrl_for_expensive", n, earliest, ctl);
assert(false, "Bad graph detected in get_early_ctrl_for_expensive");
}
#endif
if (dom_depth(ctl) < min_dom_depth) {
return earliest;
}
while (1) {
Node *next = ctl;
// Moving the node out of a loop on the projection of a If
// confuses loop predication. So once we hit a Loop in a If branch
// that doesn't branch to an UNC, we stop. The code that process
// expensive nodes will notice the loop and skip over it to try to
// move the node further up.
if (ctl->is_CountedLoop() && ctl->in(1) != NULL && ctl->in(1)->in(0) != NULL && ctl->in(1)->in(0)->is_If()) {
if (!ctl->in(1)->as_Proj()->is_uncommon_trap_if_pattern(Deoptimization::Reason_none)) {
break;
}
next = idom(ctl->in(1)->in(0));
} else if (ctl->is_Proj()) {
// We only move it up along a projection if the projection is
// the single control projection for its parent: same code path,
// if it's a If with UNC or fallthrough of a call.
Node* parent_ctl = ctl->in(0);
if (parent_ctl == NULL) {
break;
} else if (parent_ctl->is_CountedLoopEnd() && parent_ctl->as_CountedLoopEnd()->loopnode() != NULL) {
next = parent_ctl->as_CountedLoopEnd()->loopnode()->init_control();
} else if (parent_ctl->is_If()) {
if (!ctl->as_Proj()->is_uncommon_trap_if_pattern(Deoptimization::Reason_none)) {
break;
}
assert(idom(ctl) == parent_ctl, "strange");
next = idom(parent_ctl);
} else if (ctl->is_CatchProj()) {
if (ctl->as_Proj()->_con != CatchProjNode::fall_through_index) {
break;
}
assert(parent_ctl->in(0)->in(0)->is_Call(), "strange graph");
next = parent_ctl->in(0)->in(0)->in(0);
} else {
// Check if parent control has a single projection (this
// control is the only possible successor of the parent
// control). If so, we can try to move the node above the
// parent control.
int nb_ctl_proj = 0;
for (DUIterator_Fast imax, i = parent_ctl->fast_outs(imax); i < imax; i++) {
Node *p = parent_ctl->fast_out(i);
if (p->is_Proj() && p->is_CFG()) {
nb_ctl_proj++;
if (nb_ctl_proj > 1) {
break;
}
}
}
if (nb_ctl_proj > 1) {
break;
}
assert(parent_ctl->is_Start() || parent_ctl->is_MemBar() || parent_ctl->is_Call() ||
BarrierSet::barrier_set()->barrier_set_c2()->is_gc_barrier_node(parent_ctl), "unexpected node");
assert(idom(ctl) == parent_ctl, "strange");
next = idom(parent_ctl);
}
} else {
next = idom(ctl);
}
if (next->is_Root() || next->is_Start() || dom_depth(next) < min_dom_depth) {
break;
}
ctl = next;
}
if (ctl != n->in(0)) {
_igvn.replace_input_of(n, 0, ctl);
_igvn.hash_insert(n);
}
return ctl;
}
//------------------------------set_early_ctrl---------------------------------
// Set earliest legal control
void PhaseIdealLoop::set_early_ctrl(Node* n, bool update_body) {
Node *early = get_early_ctrl(n);
// Record earliest legal location
set_ctrl(n, early);
IdealLoopTree *loop = get_loop(early);
if (update_body && loop->_child == NULL) {
loop->_body.push(n);
}
}
//------------------------------set_subtree_ctrl-------------------------------
// set missing _ctrl entries on new nodes
void PhaseIdealLoop::set_subtree_ctrl(Node* n, bool update_body) {
// Already set? Get out.
if (_nodes[n->_idx]) return;
// Recursively set _nodes array to indicate where the Node goes
uint i;
for (i = 0; i < n->req(); ++i) {
Node *m = n->in(i);
if (m && m != C->root()) {
set_subtree_ctrl(m, update_body);
}
}
// Fixup self
set_early_ctrl(n, update_body);
}
IdealLoopTree* PhaseIdealLoop::insert_outer_loop(IdealLoopTree* loop, LoopNode* outer_l, Node* outer_ift) {
IdealLoopTree* outer_ilt = new IdealLoopTree(this, outer_l, outer_ift);
IdealLoopTree* parent = loop->_parent;
IdealLoopTree* sibling = parent->_child;
if (sibling == loop) {
parent->_child = outer_ilt;
} else {
while (sibling->_next != loop) {
sibling = sibling->_next;
}
sibling->_next = outer_ilt;
}
outer_ilt->_next = loop->_next;
outer_ilt->_parent = parent;
outer_ilt->_child = loop;
outer_ilt->_nest = loop->_nest;
loop->_parent = outer_ilt;
loop->_next = NULL;
loop->_nest++;
assert(loop->_nest <= SHRT_MAX, "sanity");
return outer_ilt;
}
// Create a skeleton strip mined outer loop: a Loop head before the
// inner strip mined loop, a safepoint and an exit condition guarded
// by an opaque node after the inner strip mined loop with a backedge
// to the loop head. The inner strip mined loop is left as it is. Only
// once loop optimizations are over, do we adjust the inner loop exit
// condition to limit its number of iterations, set the outer loop
// exit condition and add Phis to the outer loop head. Some loop
// optimizations that operate on the inner strip mined loop need to be
// aware of the outer strip mined loop: loop unswitching needs to
// clone the outer loop as well as the inner, unrolling needs to only
// clone the inner loop etc. No optimizations need to change the outer
// strip mined loop as it is only a skeleton.
IdealLoopTree* PhaseIdealLoop::create_outer_strip_mined_loop(BoolNode *test, Node *cmp, Node *init_control,
IdealLoopTree* loop, float cl_prob, float le_fcnt,
Node*& entry_control, Node*& iffalse) {
Node* outer_test = _igvn.intcon(0);
set_ctrl(outer_test, C->root());
Node *orig = iffalse;
iffalse = iffalse->clone();
_igvn.register_new_node_with_optimizer(iffalse);
set_idom(iffalse, idom(orig), dom_depth(orig));
IfNode *outer_le = new OuterStripMinedLoopEndNode(iffalse, outer_test, cl_prob, le_fcnt);
Node *outer_ift = new IfTrueNode (outer_le);
Node* outer_iff = orig;
_igvn.replace_input_of(outer_iff, 0, outer_le);
LoopNode *outer_l = new OuterStripMinedLoopNode(C, init_control, outer_ift);
entry_control = outer_l;
IdealLoopTree* outer_ilt = insert_outer_loop(loop, outer_l, outer_ift);
set_loop(iffalse, outer_ilt);
// When this code runs, loop bodies have not yet been populated.
const bool body_populated = false;
register_control(outer_le, outer_ilt, iffalse, body_populated);
register_control(outer_ift, outer_ilt, outer_le, body_populated);
set_idom(outer_iff, outer_le, dom_depth(outer_le));
_igvn.register_new_node_with_optimizer(outer_l);
set_loop(outer_l, outer_ilt);
set_idom(outer_l, init_control, dom_depth(init_control)+1);
return outer_ilt;
}
void PhaseIdealLoop::insert_loop_limit_check(ProjNode* limit_check_proj, Node* cmp_limit, Node* bol) {
Node* new_predicate_proj = create_new_if_for_predicate(limit_check_proj, NULL,
Deoptimization::Reason_loop_limit_check,
Op_If);
Node* iff = new_predicate_proj->in(0);
assert(iff->Opcode() == Op_If, "bad graph shape");
Node* conv = iff->in(1);
assert(conv->Opcode() == Op_Conv2B, "bad graph shape");
Node* opaq = conv->in(1);
assert(opaq->Opcode() == Op_Opaque1, "bad graph shape");
cmp_limit = _igvn.register_new_node_with_optimizer(cmp_limit);
bol = _igvn.register_new_node_with_optimizer(bol);
set_subtree_ctrl(bol, false);
_igvn.replace_input_of(iff, 1, bol);
#ifndef PRODUCT
// report that the loop predication has been actually performed
// for this loop
if (TraceLoopLimitCheck) {
tty->print_cr("Counted Loop Limit Check generated:");
debug_only( bol->dump(2); )
}
#endif
}
Node* PhaseIdealLoop::loop_exit_control(Node* x, IdealLoopTree* loop) {
// Counted loop head must be a good RegionNode with only 3 not NULL
// control input edges: Self, Entry, LoopBack.
if (x->in(LoopNode::Self) == NULL || x->req() != 3 || loop->_irreducible) {
return NULL;
}
Node *init_control = x->in(LoopNode::EntryControl);
Node *back_control = x->in(LoopNode::LoopBackControl);
if (init_control == NULL || back_control == NULL) { // Partially dead
return NULL;
}
// Must also check for TOP when looking for a dead loop
if (init_control->is_top() || back_control->is_top()) {
return NULL;
}
// Allow funny placement of Safepoint
if (back_control->Opcode() == Op_SafePoint) {
back_control = back_control->in(TypeFunc::Control);
}
// Controlling test for loop
Node *iftrue = back_control;
uint iftrue_op = iftrue->Opcode();
if (iftrue_op != Op_IfTrue &&
iftrue_op != Op_IfFalse) {
// I have a weird back-control. Probably the loop-exit test is in
// the middle of the loop and I am looking at some trailing control-flow
// merge point. To fix this I would have to partially peel the loop.
return NULL; // Obscure back-control
}
// Get boolean guarding loop-back test
Node *iff = iftrue->in(0);
if (get_loop(iff) != loop || !iff->in(1)->is_Bool()) {
return NULL;
}
return iftrue;
}
Node* PhaseIdealLoop::loop_exit_test(Node* back_control, IdealLoopTree* loop, Node*& incr, Node*& limit, BoolTest::mask& bt, float& cl_prob) {
Node* iftrue = back_control;
uint iftrue_op = iftrue->Opcode();
Node* iff = iftrue->in(0);
BoolNode* test = iff->in(1)->as_Bool();
bt = test->_test._test;
cl_prob = iff->as_If()->_prob;
if (iftrue_op == Op_IfFalse) {
bt = BoolTest(bt).negate();
cl_prob = 1.0 - cl_prob;
}
// Get backedge compare
Node* cmp = test->in(1);
if (!cmp->is_Cmp()) {
return NULL;
}
// Find the trip-counter increment & limit. Limit must be loop invariant.
incr = cmp->in(1);
limit = cmp->in(2);
// ---------
// need 'loop()' test to tell if limit is loop invariant
// ---------
if (!is_member(loop, get_ctrl(incr))) { // Swapped trip counter and limit?
Node* tmp = incr; // Then reverse order into the CmpI
incr = limit;
limit = tmp;
bt = BoolTest(bt).commute(); // And commute the exit test
}
if (is_member(loop, get_ctrl(limit))) { // Limit must be loop-invariant
return NULL;
}
if (!is_member(loop, get_ctrl(incr))) { // Trip counter must be loop-variant
return NULL;
}
return cmp;
}
Node* PhaseIdealLoop::loop_iv_incr(Node* incr, Node* x, IdealLoopTree* loop, Node*& phi_incr) {
if (incr->is_Phi()) {
if (incr->as_Phi()->region() != x || incr->req() != 3) {
return NULL; // Not simple trip counter expression
}
phi_incr = incr;
incr = phi_incr->in(LoopNode::LoopBackControl); // Assume incr is on backedge of Phi
if (!is_member(loop, get_ctrl(incr))) { // Trip counter must be loop-variant
return NULL;
}
}
return incr;
}
Node* PhaseIdealLoop::loop_iv_stride(Node* incr, IdealLoopTree* loop, Node*& xphi) {
assert(incr->Opcode() == Op_AddI || incr->Opcode() == Op_AddL, "caller resp.");
// Get merge point
xphi = incr->in(1);
Node *stride = incr->in(2);
if (!stride->is_Con()) { // Oops, swap these
if (!xphi->is_Con()) { // Is the other guy a constant?
return NULL; // Nope, unknown stride, bail out
}
Node *tmp = xphi; // 'incr' is commutative, so ok to swap
xphi = stride;
stride = tmp;
}
return stride;
}
PhiNode* PhaseIdealLoop::loop_iv_phi(Node* xphi, Node* phi_incr, Node* x, IdealLoopTree* loop) {
if (!xphi->is_Phi()) {
return NULL; // Too much math on the trip counter
}
if (phi_incr != NULL && phi_incr != xphi) {
return NULL;
}
PhiNode *phi = xphi->as_Phi();
// Phi must be of loop header; backedge must wrap to increment
if (phi->region() != x) {
return NULL;
}
return phi;
}
static int check_stride_overflow(jlong stride_con, const TypeInteger* limit_t, BasicType bt) {
if (stride_con > 0) {
if (limit_t->lo_as_long() > (max_signed_integer(bt) - stride_con)) {
return -1;
}
if (limit_t->hi_as_long() > (max_signed_integer(bt) - stride_con)) {
return 1;
}
} else {
if (limit_t->hi_as_long() < (min_signed_integer(bt) - stride_con)) {
return -1;
}
if (limit_t->lo_as_long() < (min_signed_integer(bt) - stride_con)) {
return 1;
}
}
return 0;
}
static bool condition_stride_ok(BoolTest::mask bt, jlong stride_con) {
// If the condition is inverted and we will be rolling
// through MININT to MAXINT, then bail out.
if (bt == BoolTest::eq || // Bail out, but this loop trips at most twice!
// Odd stride
(bt == BoolTest::ne && stride_con != 1 && stride_con != -1) ||
// Count down loop rolls through MAXINT
((bt == BoolTest::le || bt == BoolTest::lt) && stride_con < 0) ||
// Count up loop rolls through MININT
((bt == BoolTest::ge || bt == BoolTest::gt) && stride_con > 0)) {
return false; // Bail out
}
return true;
}
Node* PhaseIdealLoop::loop_nest_replace_iv(Node* iv_to_replace, Node* inner_iv, Node* outer_phi, Node* inner_head,
BasicType bt) {
Node* iv_as_long;
if (bt == T_LONG) {
iv_as_long = new ConvI2LNode(inner_iv, TypeLong::INT);
register_new_node(iv_as_long, inner_head);
} else {
iv_as_long = inner_iv;
}
Node* iv_replacement = AddNode::make(outer_phi, iv_as_long, bt);
register_new_node(iv_replacement, inner_head);
for (DUIterator_Last imin, i = iv_to_replace->last_outs(imin); i >= imin;) {
Node* u = iv_to_replace->last_out(i);
#ifdef ASSERT
if (!is_dominator(inner_head, ctrl_or_self(u))) {
assert(u->is_Phi(), "should be a Phi");
for (uint j = 1; j < u->req(); j++) {
if (u->in(j) == iv_to_replace) {
assert(is_dominator(inner_head, u->in(0)->in(j)), "iv use above loop?");
}
}
}
#endif
_igvn.rehash_node_delayed(u);
int nb = u->replace_edge(iv_to_replace, iv_replacement, &_igvn);
i -= nb;
}
return iv_replacement;
}
void PhaseIdealLoop::add_empty_predicate(Deoptimization::DeoptReason reason, Node* inner_head, IdealLoopTree* loop, SafePointNode* sfpt) {
if (!C->too_many_traps(reason)) {
Node *cont = _igvn.intcon(1);
Node* opq = new Opaque1Node(C, cont);
_igvn.register_new_node_with_optimizer(opq);
Node *bol = new Conv2BNode(opq);
_igvn.register_new_node_with_optimizer(bol);
set_subtree_ctrl(bol, false);
IfNode* iff = new IfNode(inner_head->in(LoopNode::EntryControl), bol, PROB_MAX, COUNT_UNKNOWN);
register_control(iff, loop, inner_head->in(LoopNode::EntryControl));
Node* iffalse = new IfFalseNode(iff);
register_control(iffalse, _ltree_root, iff);
Node* iftrue = new IfTrueNode(iff);
register_control(iftrue, loop, iff);
C->add_predicate_opaq(opq);
int trap_request = Deoptimization::make_trap_request(reason, Deoptimization::Action_maybe_recompile);
address call_addr = SharedRuntime::uncommon_trap_blob()->entry_point();
const TypePtr* no_memory_effects = NULL;
JVMState* jvms = sfpt->jvms();
CallNode* unc = new CallStaticJavaNode(OptoRuntime::uncommon_trap_Type(), call_addr, "uncommon_trap",
no_memory_effects);
Node* mem = NULL;
Node* i_o = NULL;
if (sfpt->is_Call()) {
mem = sfpt->proj_out(TypeFunc::Memory);
i_o = sfpt->proj_out(TypeFunc::I_O);
} else {
mem = sfpt->memory();
i_o = sfpt->i_o();
}
Node *frame = new ParmNode(C->start(), TypeFunc::FramePtr);
register_new_node(frame, C->start());
Node *ret = new ParmNode(C->start(), TypeFunc::ReturnAdr);
register_new_node(ret, C->start());
unc->init_req(TypeFunc::Control, iffalse);
unc->init_req(TypeFunc::I_O, i_o);
unc->init_req(TypeFunc::Memory, mem); // may gc ptrs
unc->init_req(TypeFunc::FramePtr, frame);
unc->init_req(TypeFunc::ReturnAdr, ret);
unc->init_req(TypeFunc::Parms+0, _igvn.intcon(trap_request));
unc->set_cnt(PROB_UNLIKELY_MAG(4));
unc->copy_call_debug_info(&_igvn, sfpt);
for (uint i = TypeFunc::Parms; i < unc->req(); i++) {
set_subtree_ctrl(unc->in(i), false);
}
register_control(unc, _ltree_root, iffalse);
Node* ctrl = new ProjNode(unc, TypeFunc::Control);
register_control(ctrl, _ltree_root, unc);
Node* halt = new HaltNode(ctrl, frame, "uncommon trap returned which should never happen" PRODUCT_ONLY(COMMA /*reachable*/false));
register_control(halt, _ltree_root, ctrl);
_igvn.add_input_to(C->root(), halt);
_igvn.replace_input_of(inner_head, LoopNode::EntryControl, iftrue);
set_idom(inner_head, iftrue, dom_depth(inner_head));
}
}
// Find a safepoint node that dominates the back edge. We need a
// SafePointNode so we can use its jvm state to create empty
// predicates.
static bool no_side_effect_since_safepoint(Compile* C, Node* x, Node* mem, MergeMemNode* mm, PhaseIdealLoop* phase) {
SafePointNode* safepoint = NULL;
for (DUIterator_Fast imax, i = x->fast_outs(imax); i < imax; i++) {
Node* u = x->fast_out(i);
if (u->is_Phi() && u->bottom_type() == Type::MEMORY) {
Node* m = u->in(LoopNode::LoopBackControl);
if (u->adr_type() == TypePtr::BOTTOM) {
if (m->is_MergeMem() && mem->is_MergeMem()) {
if (m != mem DEBUG_ONLY(|| true)) {
// MergeMemStream can modify m, for example to adjust the length to mem.
// This is unfortunate, and probably unnecessary. But as it is, we need
// to add m to the igvn worklist, else we may have a modified node that
// is not on the igvn worklist.
phase->igvn()._worklist.push(m);
for (MergeMemStream mms(m->as_MergeMem(), mem->as_MergeMem()); mms.next_non_empty2(); ) {
if (!mms.is_empty()) {
if (mms.memory() != mms.memory2()) {
return false;
}
#ifdef ASSERT
if (mms.alias_idx() != Compile::AliasIdxBot) {
mm->set_memory_at(mms.alias_idx(), mem->as_MergeMem()->base_memory());
}
#endif
}
}
}
} else if (mem->is_MergeMem()) {
if (m != mem->as_MergeMem()->base_memory()) {
return false;
}
} else {
return false;
}
} else {
if (mem->is_MergeMem()) {
if (m != mem->as_MergeMem()->memory_at(C->get_alias_index(u->adr_type()))) {
return false;
}
#ifdef ASSERT
mm->set_memory_at(C->get_alias_index(u->adr_type()), mem->as_MergeMem()->base_memory());
#endif
} else {
if (m != mem) {
return false;
}
}
}
}
}
return true;
}
SafePointNode* PhaseIdealLoop::find_safepoint(Node* back_control, Node* x, IdealLoopTree* loop) {
IfNode* exit_test = back_control->in(0)->as_If();
SafePointNode* safepoint = NULL;
if (exit_test->in(0)->is_SafePoint() && exit_test->in(0)->outcnt() == 1) {
safepoint = exit_test->in(0)->as_SafePoint();
} else {
Node* c = back_control;
while (c != x && c->Opcode() != Op_SafePoint) {
c = idom(c);
}
if (c->Opcode() == Op_SafePoint) {
safepoint = c->as_SafePoint();
}
if (safepoint == NULL) {
return NULL;
}
Node* mem = safepoint->in(TypeFunc::Memory);
// We can only use that safepoint if there's no side effect between the backedge and the safepoint.
// mm is used for book keeping
MergeMemNode* mm = NULL;
#ifdef ASSERT
if (mem->is_MergeMem()) {
mm = mem->clone()->as_MergeMem();
_igvn._worklist.push(mm);
for (MergeMemStream mms(mem->as_MergeMem()); mms.next_non_empty(); ) {
if (mms.alias_idx() != Compile::AliasIdxBot && loop != get_loop(ctrl_or_self(mms.memory()))) {
mm->set_memory_at(mms.alias_idx(), mem->as_MergeMem()->base_memory());
}
}
}
#endif
if (!no_side_effect_since_safepoint(C, x, mem, mm, this)) {
safepoint = NULL;
} else {
assert(mm == NULL|| _igvn.transform(mm) == mem->as_MergeMem()->base_memory(), "all memory state should have been processed");
}
#ifdef ASSERT
if (mm != NULL) {
_igvn.remove_dead_node(mm);
}
#endif
}
return safepoint;
}
// If the loop has the shape of a counted loop but with a long
// induction variable, transform the loop in a loop nest: an inner
// loop that iterates for at most max int iterations with an integer
// induction variable and an outer loop that iterates over the full
// range of long values from the initial loop in (at most) max int
// steps. That is:
//
// x: for (long phi = init; phi < limit; phi += stride) {
// // phi := Phi(L, init, incr)
// // incr := AddL(phi, longcon(stride))
// long incr = phi + stride;
// ... use phi and incr ...
// }
//
// OR:
//
// x: for (long phi = init; (phi += stride) < limit; ) {
// // phi := Phi(L, AddL(init, stride), incr)
// // incr := AddL(phi, longcon(stride))
// long incr = phi + stride;
// ... use phi and (phi + stride) ...
// }
//
// ==transform=>
//
// const ulong inner_iters_limit = INT_MAX - stride - 1; //near 0x7FFFFFF0
// assert(stride <= inner_iters_limit); // else abort transform
// assert((extralong)limit + stride <= LONG_MAX); // else deopt
// outer_head: for (long outer_phi = init;;) {
// // outer_phi := Phi(outer_head, init, AddL(outer_phi, I2L(inner_phi)))
// ulong inner_iters_max = (ulong) MAX(0, ((extralong)limit + stride - outer_phi));
// long inner_iters_actual = MIN(inner_iters_limit, inner_iters_max);
// assert(inner_iters_actual == (int)inner_iters_actual);
// int inner_phi, inner_incr;
// x: for (inner_phi = 0;; inner_phi = inner_incr) {
// // inner_phi := Phi(x, intcon(0), inner_incr)
// // inner_incr := AddI(inner_phi, intcon(stride))
// inner_incr = inner_phi + stride;
// if (inner_incr < inner_iters_actual) {
// ... use phi=>(outer_phi+inner_phi) and incr=>(outer_phi+inner_incr) ...
// continue;
// }
// else break;
// }
// if ((outer_phi+inner_phi) < limit) //OR (outer_phi+inner_incr) < limit
// continue;
// else break;
// }
//
// The same logic is used to transform an int counted loop that contains long range checks into a loop nest of 2 int
// loops with long range checks transformed to int range checks in the inner loop.
bool PhaseIdealLoop::create_loop_nest(IdealLoopTree* loop, Node_List &old_new) {
Node* x = loop->_head;
// Only for inner loops
if (loop->_child != NULL || !x->is_BaseCountedLoop() || x->as_Loop()->is_loop_nest_outer_loop()) {
return false;
}
if (x->is_CountedLoop() && !x->as_CountedLoop()->is_main_loop() && !x->as_CountedLoop()->is_normal_loop()) {
return false;
}
BaseCountedLoopNode* head = x->as_BaseCountedLoop();
BasicType bt = x->as_BaseCountedLoop()->bt();
check_counted_loop_shape(loop, x, bt);
#ifndef PRODUCT
if (bt == T_LONG) {
Atomic::inc(&_long_loop_candidates);
}
#endif
jlong stride_con = head->stride_con();
assert(stride_con != 0, "missed some peephole opt");
// We can't iterate for more than max int at a time.
if (stride_con != (jint)stride_con) {
assert(bt == T_LONG, "only for long loops");
return false;
}
// The number of iterations for the integer count loop: guarantee no
// overflow: max_jint - stride_con max. -1 so there's no need for a
// loop limit check if the exit test is <= or >=.
int iters_limit = max_jint - ABS(stride_con) - 1;
#ifdef ASSERT
if (bt == T_LONG && StressLongCountedLoop > 0) {
iters_limit = iters_limit / StressLongCountedLoop;
}
#endif
// At least 2 iterations so counted loop construction doesn't fail
if (iters_limit/ABS(stride_con) < 2) {
return false;
}
PhiNode* phi = head->phi()->as_Phi();
Node* incr = head->incr();
Node* back_control = head->in(LoopNode::LoopBackControl);
// data nodes on back branch not supported
if (back_control->outcnt() > 1) {
return false;
}
Node* limit = head->limit();
// We'll need to use the loop limit before the inner loop is entered
if (!is_dominator(get_ctrl(limit), x)) {
return false;
}
IfNode* exit_test = head->loopexit();
assert(back_control->Opcode() == Op_IfTrue, "wrong projection for back edge");
Node_List range_checks;
iters_limit = extract_long_range_checks(loop, stride_con, iters_limit, phi, range_checks);
if (bt == T_INT) {
// The only purpose of creating a loop nest is to handle long range checks. If there are none, do not proceed further.
if (range_checks.size() == 0) {
return false;
}
}
// Take what we know about the number of iterations of the long counted loop into account when computing the limit of
// the inner loop.
const Node* init = head->init_trip();
const TypeInteger* lo = _igvn.type(init)->is_integer(bt);
const TypeInteger* hi = _igvn.type(limit)->is_integer(bt);
if (stride_con < 0) {
swap(lo, hi);
}
if (hi->hi_as_long() <= lo->lo_as_long()) {
// not a loop after all
return false;
}
julong orig_iters = hi->hi_as_long() - lo->lo_as_long();
iters_limit = checked_cast<int>(MIN2((julong)iters_limit, orig_iters));
// We need a safepoint to insert empty predicates for the inner loop.
SafePointNode* safepoint;
if (bt == T_INT && head->as_CountedLoop()->is_strip_mined()) {
// Loop is strip mined: use the safepoint of the outer strip mined loop
OuterStripMinedLoopNode* outer_loop = head->as_CountedLoop()->outer_loop();
assert(outer_loop != NULL, "no outer loop");
safepoint = outer_loop->outer_safepoint();
outer_loop->transform_to_counted_loop(&_igvn, this);
exit_test = head->loopexit();
} else {
safepoint = find_safepoint(back_control, x, loop);
}
Node* exit_branch = exit_test->proj_out(false);
Node* entry_control = head->in(LoopNode::EntryControl);
// Clone the control flow of the loop to build an outer loop
Node* outer_back_branch = back_control->clone();
Node* outer_exit_test = new IfNode(exit_test->in(0), exit_test->in(1), exit_test->_prob, exit_test->_fcnt);
Node* inner_exit_branch = exit_branch->clone();
LoopNode* outer_head = new LoopNode(entry_control, outer_back_branch);
IdealLoopTree* outer_ilt = insert_outer_loop(loop, outer_head, outer_back_branch);
const bool body_populated = true;
register_control(outer_head, outer_ilt, entry_control, body_populated);
_igvn.register_new_node_with_optimizer(inner_exit_branch);
set_loop(inner_exit_branch, outer_ilt);
set_idom(inner_exit_branch, exit_test, dom_depth(exit_branch));
outer_exit_test->set_req(0, inner_exit_branch);
register_control(outer_exit_test, outer_ilt, inner_exit_branch, body_populated);
_igvn.replace_input_of(exit_branch, 0, outer_exit_test);
set_idom(exit_branch, outer_exit_test, dom_depth(exit_branch));
outer_back_branch->set_req(0, outer_exit_test);
register_control(outer_back_branch, outer_ilt, outer_exit_test, body_populated);
_igvn.replace_input_of(x, LoopNode::EntryControl, outer_head);
set_idom(x, outer_head, dom_depth(x));
// add an iv phi to the outer loop and use it to compute the inner
// loop iteration limit
Node* outer_phi = phi->clone();
outer_phi->set_req(0, outer_head);
register_new_node(outer_phi, outer_head);
Node* inner_iters_max = NULL;
if (stride_con > 0) {
inner_iters_max = MaxNode::max_diff_with_zero(limit, outer_phi, TypeInteger::bottom(bt), _igvn);
} else {
inner_iters_max = MaxNode::max_diff_with_zero(outer_phi, limit, TypeInteger::bottom(bt), _igvn);
}
Node* inner_iters_limit = _igvn.integercon(iters_limit, bt);
// inner_iters_max may not fit in a signed integer (iterating from
// Long.MIN_VALUE to Long.MAX_VALUE for instance). Use an unsigned
// min.
Node* inner_iters_actual = MaxNode::unsigned_min(inner_iters_max, inner_iters_limit, TypeInteger::make(0, iters_limit, Type::WidenMin, bt), _igvn);
Node* inner_iters_actual_int;
if (bt == T_LONG) {
inner_iters_actual_int = new ConvL2INode(inner_iters_actual);
_igvn.register_new_node_with_optimizer(inner_iters_actual_int);
} else {
inner_iters_actual_int = inner_iters_actual;
}
Node* int_zero = _igvn.intcon(0);
set_ctrl(int_zero, C->root());
if (stride_con < 0) {
inner_iters_actual_int = new SubINode(int_zero, inner_iters_actual_int);
_igvn.register_new_node_with_optimizer(inner_iters_actual_int);
}
// Clone the iv data nodes as an integer iv
Node* int_stride = _igvn.intcon(checked_cast<int>(stride_con));
set_ctrl(int_stride, C->root());
Node* inner_phi = new PhiNode(x->in(0), TypeInt::INT);
Node* inner_incr = new AddINode(inner_phi, int_stride);
Node* inner_cmp = NULL;
inner_cmp = new CmpINode(inner_incr, inner_iters_actual_int);
Node* inner_bol = new BoolNode(inner_cmp, exit_test->in(1)->as_Bool()->_test._test);
inner_phi->set_req(LoopNode::EntryControl, int_zero);
inner_phi->set_req(LoopNode::LoopBackControl, inner_incr);
register_new_node(inner_phi, x);
register_new_node(inner_incr, x);
register_new_node(inner_cmp, x);
register_new_node(inner_bol, x);
_igvn.replace_input_of(exit_test, 1, inner_bol);
// Clone inner loop phis to outer loop
for (uint i = 0; i < head->outcnt(); i++) {
Node* u = head->raw_out(i);
if (u->is_Phi() && u != inner_phi && u != phi) {
assert(u->in(0) == head, "inconsistent");
Node* clone = u->clone();
clone->set_req(0, outer_head);
register_new_node(clone, outer_head);
_igvn.replace_input_of(u, LoopNode::EntryControl, clone);
}
}
// Replace inner loop long iv phi as inner loop int iv phi + outer
// loop iv phi
Node* iv_add = loop_nest_replace_iv(phi, inner_phi, outer_phi, head, bt);
// Replace inner loop long iv incr with inner loop int incr + outer
// loop iv phi
loop_nest_replace_iv(incr, inner_incr, outer_phi, head, bt);
set_subtree_ctrl(inner_iters_actual_int, body_populated);
LoopNode* inner_head = create_inner_head(loop, head, exit_test);
// Summary of steps from initial loop to loop nest:
//
// == old IR nodes =>
//
// entry_control: {...}
// x:
// for (long phi = init;;) {
// // phi := Phi(x, init, incr)
// // incr := AddL(phi, longcon(stride))
// exit_test:
// if (phi < limit)
// back_control: fallthrough;
// else
// exit_branch: break;
// long incr = phi + stride;
// ... use phi and incr ...
// phi = incr;
// }
//
// == new IR nodes (just before final peel) =>
//
// entry_control: {...}
// long adjusted_limit = limit + stride; //because phi_incr != NULL
// assert(!limit_check_required || (extralong)limit + stride == adjusted_limit); // else deopt
// ulong inner_iters_limit = max_jint - ABS(stride) - 1; //near 0x7FFFFFF0
// outer_head:
// for (long outer_phi = init;;) {
// // outer_phi := phi->clone(), in(0):=outer_head, => Phi(outer_head, init, incr)
// // REPLACE phi => AddL(outer_phi, I2L(inner_phi))
// // REPLACE incr => AddL(outer_phi, I2L(inner_incr))
// // SO THAT outer_phi := Phi(outer_head, init, AddL(outer_phi, I2L(inner_incr)))
// ulong inner_iters_max = (ulong) MAX(0, ((extralong)adjusted_limit - outer_phi) * SGN(stride));
// int inner_iters_actual_int = (int) MIN(inner_iters_limit, inner_iters_max) * SGN(stride);
// inner_head: x: //in(1) := outer_head
// int inner_phi;
// for (inner_phi = 0;;) {
// // inner_phi := Phi(x, intcon(0), inner_phi + stride)
// int inner_incr = inner_phi + stride;
// bool inner_bol = (inner_incr < inner_iters_actual_int);
// exit_test: //exit_test->in(1) := inner_bol;
// if (inner_bol) // WAS (phi < limit)
// back_control: fallthrough;
// else
// inner_exit_branch: break; //exit_branch->clone()
// ... use phi=>(outer_phi+inner_phi) and incr=>(outer_phi+inner_incr) ...
// inner_phi = inner_phi + stride; // inner_incr
// }
// outer_exit_test: //exit_test->clone(), in(0):=inner_exit_branch
// if ((outer_phi+inner_phi) < limit) // WAS (phi < limit)
// outer_back_branch: fallthrough; //back_control->clone(), in(0):=outer_exit_test
// else
// exit_branch: break; //in(0) := outer_exit_test
// }
if (bt == T_INT) {
outer_phi = new ConvI2LNode(outer_phi);
register_new_node(outer_phi, outer_head);
}
transform_long_range_checks(checked_cast<int>(stride_con), range_checks, outer_phi, inner_iters_actual_int,
inner_phi, iv_add, inner_head);
// Peel one iteration of the loop and use the safepoint at the end
// of the peeled iteration to insert empty predicates. If no well
// positioned safepoint peel to guarantee a safepoint in the outer
// loop.
if (safepoint != NULL || !loop->_has_call) {
old_new.clear();
do_peeling(loop, old_new);
} else {
C->set_major_progress();
}
if (safepoint != NULL) {
SafePointNode* cloned_sfpt = old_new[safepoint->_idx]->as_SafePoint();
if (UseLoopPredicate) {
add_empty_predicate(Deoptimization::Reason_predicate, inner_head, outer_ilt, cloned_sfpt);
}
if (UseProfiledLoopPredicate) {
add_empty_predicate(Deoptimization::Reason_profile_predicate, inner_head, outer_ilt, cloned_sfpt);
}
add_empty_predicate(Deoptimization::Reason_loop_limit_check, inner_head, outer_ilt, cloned_sfpt);
}
#ifndef PRODUCT
if (bt == T_LONG) {
Atomic::inc(&_long_loop_nests);
}
#endif
inner_head->mark_loop_nest_inner_loop();
outer_head->mark_loop_nest_outer_loop();
return true;
}
int PhaseIdealLoop::extract_long_range_checks(const IdealLoopTree* loop, jlong stride_con, int iters_limit, PhiNode* phi,
Node_List& range_checks) {
const jlong min_iters = 2;
jlong reduced_iters_limit = iters_limit;
jlong original_iters_limit = iters_limit;
for (uint i = 0; i < loop->_body.size(); i++) {
Node* c = loop->_body.at(i);
if (c->is_IfProj() && c->in(0)->is_RangeCheck()) {
CallStaticJavaNode* call = c->as_IfProj()->is_uncommon_trap_if_pattern(Deoptimization::Reason_none);
if (call != NULL) {
Node* range = NULL;
Node* offset = NULL;
jlong scale = 0;
RangeCheckNode* rc = c->in(0)->as_RangeCheck();
if (loop->is_range_check_if(rc, this, T_LONG, phi, range, offset, scale) &&
loop->is_invariant(range) && loop->is_invariant(offset) &&
original_iters_limit / ABS(scale * stride_con) >= min_iters) {
reduced_iters_limit = MIN2(reduced_iters_limit, original_iters_limit/ABS(scale));
range_checks.push(c);
}
}
}
}
return checked_cast<int>(reduced_iters_limit);
}
// One execution of the inner loop covers a sub-range of the entire iteration range of the loop: [A,Z), aka [A=init,
// Z=limit). If the loop has at least one trip (which is the case here), the iteration variable i always takes A as its
// first value, followed by A+S (S is the stride), next A+2S, etc. The limit is exclusive, so that the final value B of
// i is never Z. It will be B=Z-1 if S=1, or B=Z+1 if S=-1.
// If |S|>1 the formula for the last value B would require a floor operation, specifically B=floor((Z-sgn(S)-A)/S)*S+A,
// which is B=Z-sgn(S)U for some U in [1,|S|]. So when S>0, i ranges as i:[A,Z) or i:[A,B=Z-U], or else (in reverse)
// as i:(Z,A] or i:[B=Z+U,A]. It will become important to reason about this inclusive range [A,B] or [B,A].
// Within the loop there may be many range checks. Each such range check (R.C.) is of the form 0 <= i*K+L < R, where K
// is a scale factor applied to the loop iteration variable i, and L is some offset; K, L, and R are loop-invariant.
// Because R is never negative (see below), this check can always be simplified to an unsigned check i*K+L <u R.
// When a long loop over a 64-bit variable i (outer_iv) is decomposed into a series of shorter sub-loops over a 32-bit
// variable j (inner_iv), j ranges over a shorter interval j:[0,B_2] or [0,Z_2) (assuming S > 0), where the limit is
// chosen to prevent various cases of 32-bit overflow (including multiplications j*K below). In the sub-loop the
// logical value i is offset from j by a 64-bit constant C, so i ranges in i:C+[0,Z_2).
// For S<0, j ranges (in reverse!) through j:[-|B_2|,0] or (-|Z_2|,0]. For either sign of S, we can say i=j+C and j
// ranges through 32-bit ranges [A_2,B_2] or [B_2,A_2] (A_2=0 of course).
// The disjoint union of all the C+[A_2,B_2] ranges from the sub-loops must be identical to the whole range [A,B].
// Assuming S>0, the first C must be A itself, and the next C value is the previous C+B_2, plus S. If |S|=1, the next
// C value is also the previous C+Z_2. In each sub-loop, j counts from j=A_2=0 and i counts from C+0 and exits at
// j=B_2 (i=C+B_2), just before it gets to i=C+Z_2. Both i and j count up (from C and 0) if S>0; otherwise they count
// down (from C and 0 again).
// Returning to range checks, we see that each i*K+L <u R expands to (C+j)*K+L <u R, or j*K+Q <u R, where Q=(C*K+L).
// (Recall that K and L and R are loop-invariant scale, offset and range values for a particular R.C.) This is still a
// 64-bit comparison, so the range check elimination logic will not apply to it. (The R.C.E. transforms operate only on
// 32-bit indexes and comparisons, because they use 64-bit temporary values to avoid overflow; see
// PhaseIdealLoop::add_constraint.)
// We must transform this comparison so that it gets the same answer, but by means of a 32-bit R.C. (using j not i) of
// the form j*K+L_2 <u32 R_2. Note that L_2 and R_2 must be loop-invariant, but only with respect to the sub-loop. Thus, the
// problem reduces to computing values for L_2 and R_2 (for each R.C. in the loop) in the loop header for the sub-loop.
// Then the standard R.C.E. transforms can take those as inputs and further compute the necessary minimum and maximum
// values for the 32-bit counter j within which the range checks can be eliminated.
// So, given j*K+Q <u R, we need to find some j*K+L_2 <u32 R_2, where L_2 and R_2 fit in 32 bits, and the 32-bit operations do
// not overflow. We also need to cover the cases where i*K+L (= j*K+Q) overflows to a 64-bit negative, since that is
// allowed as an input to the R.C., as long as the R.C. as a whole fails.
// If 32-bit multiplication j*K might overflow, we adjust the sub-loop limit Z_2 closer to zero to reduce j's range.
// For each R.C. j*K+Q <u32 R, the range of mathematical values of j*K+Q in the sub-loop is [Q_min, Q_max], where
// Q_min=Q and Q_max=B_2*K+Q (if S>0 and K>0), Q_min=A_2*K+Q and Q_max=Q (if S<0 and K>0),
// Q_min=B_2*K+Q and Q_max=Q if (S>0 and K<0), Q_min=Q and Q_max=A_2*K+Q (if S<0 and K<0)
// Note that the first R.C. value is always Q=(S*K>0 ? Q_min : Q_max). Also Q_{min,max} = Q + {min,max}(A_2*K,B_2*K).
// If S*K>0 then, as the loop iterations progress, each R.C. value i*K+L = j*K+Q goes up from Q=Q_min towards Q_max.
// If S*K<0 then j*K+Q starts at Q=Q_max and goes down towards Q_min.
// Case A: Some Negatives (but no overflow).
// Number line:
// |s64_min . . . 0 . . . s64_max|
// | . Q_min..Q_max . 0 . . . . | s64 negative
// | . . . . R=0 R< R< R< R< | (against R values)
// | . . . Q_min..0..Q_max . . . | small mixed
// | . . . . R R R< R< R< | (against R values)
//
// R values which are out of range (>Q_max+1) are reduced to max(0,Q_max+1). They are marked on the number line as R<.
//
// So, if Q_min <s64 0, then use this test:
// j*K + s32_trunc(Q_min) <u32 clamp(R, 0, Q_max+1) if S*K>0 (R.C.E. steps upward)
// j*K + s32_trunc(Q_max) <u32 clamp(R, 0, Q_max+1) if S*K<0 (R.C.E. steps downward)
// Both formulas reduce to adding j*K to the 32-bit truncated value of the first R.C. expression value, Q:
// j*K + s32_trunc(Q) <u32 clamp(R, 0, Q_max+1) for all S,K
// If the 32-bit truncation loses information, no harm is done, since certainly the clamp also will return R_2=zero.
// Case B: No Negatives.
// Number line:
// |s64_min . . . 0 . . . s64_max|
// | . . . . 0 Q_min..Q_max . . | small positive
// | . . . . R> R R R< R< | (against R values)
// | . . . . 0 . Q_min..Q_max . | s64 positive
// | . . . . R> R> R R R< | (against R values)
//
// R values which are out of range (<Q_min or >Q_max+1) are reduced as marked: R> up to Q_min, R< down to Q_max+1.
// Then the whole comparison is shifted left by Q_min, so it can take place at zero, which is a nice 32-bit value.
//
// So, if both Q_min, Q_max+1 >=s64 0, then use this test:
// j*K + 0 <u32 clamp(R, Q_min, Q_max+1) - Q_min if S*K>0
// More generally:
// j*K + Q - Q_min <u32 clamp(R, Q_min, Q_max+1) - Q_min for all S,K
// Case C: Overflow in the 64-bit domain
// Number line:
// |..Q_max-2^64 . . 0 . . . Q_min..| s64 overflow
// | . . . . R> R> R> R> R | (against R values)
//
// In this case, Q_min >s64 Q_max+1, even though the mathematical values of Q_min and Q_max+1 are correctly ordered.
// The formulas from the previous case can be used, except that the bad upper bound Q_max is replaced by max_jlong.
// (In fact, we could use any replacement bound from R to max_jlong inclusive, as the input to the clamp function.)
//
// So if Q_min >=s64 0 but Q_max+1 <s64 0, use this test:
// j*K + 0 <u32 clamp(R, Q_min, max_jlong) - Q_min if S*K>0
// More generally:
// j*K + Q - Q_min <u32 clamp(R, Q_min, max_jlong) - Q_min for all S,K
//
// Dropping the bad bound means only Q_min is used to reduce the range of R:
// j*K + Q - Q_min <u32 max(Q_min, R) - Q_min for all S,K
//
// Here the clamp function is a 64-bit min/max that reduces the dynamic range of its R operand to the required [L,H]:
// clamp(X, L, H) := max(L, min(X, H))
// When degenerately L > H, it returns L not H.
//
// All of the formulas above can be merged into a single one:
// L_clamp = Q_min < 0 ? 0 : Q_min --whether and how far to left-shift
// H_clamp = Q_max+1 < Q_min ? max_jlong : Q_max+1
// = Q_max+1 < 0 && Q_min >= 0 ? max_jlong : Q_max+1
// Q_first = Q = (S*K>0 ? Q_min : Q_max) = (C*K+L)
// R_clamp = clamp(R, L_clamp, H_clamp) --reduced dynamic range
// replacement R.C.:
// j*K + Q_first - L_clamp <u32 R_clamp - L_clamp
// or equivalently:
// j*K + L_2 <u32 R_2
// where
// L_2 = Q_first - L_clamp
// R_2 = R_clamp - L_clamp
//
// Note on why R is never negative:
//
// Various details of this transformation would break badly if R could be negative, so this transformation only
// operates after obtaining hard evidence that R<0 is impossible. For example, if R comes from a LoadRange node, we
// know R cannot be negative. For explicit checks (of both int and long) a proof is constructed in
// inline_preconditions_checkIndex, which triggers an uncommon trap if R<0, then wraps R in a ConstraintCastNode with a
// non-negative type. Later on, when IdealLoopTree::is_range_check_if looks for an optimizable R.C., it checks that
// the type of that R node is non-negative. Any "wild" R node that could be negative is not treated as an optimizable
// R.C., but R values from a.length and inside checkIndex are good to go.
//
void PhaseIdealLoop::transform_long_range_checks(int stride_con, const Node_List &range_checks, Node* outer_phi,
Node* inner_iters_actual_int, Node* inner_phi,
Node* iv_add, LoopNode* inner_head) {
Node* long_zero = _igvn.longcon(0);
set_ctrl(long_zero, C->root());
Node* int_zero = _igvn.intcon(0);
set_ctrl(int_zero, this->C->root());
Node* long_one = _igvn.longcon(1);
set_ctrl(long_one, this->C->root());
Node* int_stride = _igvn.intcon(checked_cast<int>(stride_con));
set_ctrl(int_stride, this->C->root());
for (uint i = 0; i < range_checks.size(); i++) {
ProjNode* proj = range_checks.at(i)->as_Proj();
ProjNode* unc_proj = proj->other_if_proj();
RangeCheckNode* rc = proj->in(0)->as_RangeCheck();
jlong scale = 0;
Node* offset = NULL;
Node* rc_bol = rc->in(1);
Node* rc_cmp = rc_bol->in(1);
if (rc_cmp->Opcode() == Op_CmpU) {
// could be shared and have already been taken care of
continue;
}
bool short_scale = false;
bool ok = is_scaled_iv_plus_offset(rc_cmp->in(1), iv_add, T_LONG, &scale, &offset, &short_scale);
assert(ok, "inconsistent: was tested before");
Node* range = rc_cmp->in(2);
Node* c = rc->in(0);
Node* entry_control = inner_head->in(LoopNode::EntryControl);
Node* R = range;
Node* K = _igvn.longcon(scale);
set_ctrl(K, this->C->root());
Node* L = offset;
if (short_scale) {
// This converts:
// (int)i*K + L <u64 R
// with K an int into:
// i*(long)K + L <u64 unsigned_min((long)max_jint + L + 1, R)
// to protect against an overflow of (int)i*K
//
// Because if (int)i*K overflows, there are K,L where:
// (int)i*K + L <u64 R is false because (int)i*K+L overflows to a negative which becomes a huge u64 value.
// But if i*(long)K + L is >u64 (long)max_jint and still is <u64 R, then
// i*(long)K + L <u64 R is true.
//
// As a consequence simply converting i*K + L <u64 R to i*(long)K + L <u64 R could cause incorrect execution.
//
// It's always true that:
// (int)i*K <u64 (long)max_jint + 1
// which implies (int)i*K + L <u64 (long)max_jint + 1 + L
// As a consequence:
// i*(long)K + L <u64 unsigned_min((long)max_jint + L + 1, R)
// is always false in case of overflow of i*K
//
// Note, there are also K,L where i*K overflows and
// i*K + L <u64 R is true, but
// i*(long)K + L <u64 unsigned_min((long)max_jint + L + 1, R) is false
// So this transformation could cause spurious deoptimizations and failed range check elimination
// (but not incorrect execution) for unlikely corner cases with overflow.
// If this causes problems in practice, we could maybe direct execution to a post-loop, instead of deoptimizing.
Node* max_jint_plus_one_long = _igvn.longcon((jlong)max_jint + 1);
set_ctrl(max_jint_plus_one_long, C->root());
Node* max_range = new AddLNode(max_jint_plus_one_long, L);
register_new_node(max_range, entry_control);
R = MaxNode::unsigned_min(R, max_range, TypeLong::POS, _igvn);
set_subtree_ctrl(R, true);
}
Node* C = outer_phi;
// Start with 64-bit values:
// i*K + L <u64 R
// (C+j)*K + L <u64 R
// j*K + Q <u64 R where Q = Q_first = C*K+L
Node* Q_first = new MulLNode(C, K);
register_new_node(Q_first, entry_control);
Q_first = new AddLNode(Q_first, L);
register_new_node(Q_first, entry_control);
// Compute endpoints of the range of values j*K + Q.
// Q_min = (j=0)*K + Q; Q_max = (j=B_2)*K + Q
Node* Q_min = Q_first;
// Compute the exact ending value B_2 (which is really A_2 if S < 0)
Node* B_2 = new LoopLimitNode(this->C, int_zero, inner_iters_actual_int, int_stride);
register_new_node(B_2, entry_control);
B_2 = new SubINode(B_2, int_stride);
register_new_node(B_2, entry_control);
B_2 = new ConvI2LNode(B_2);
register_new_node(B_2, entry_control);
Node* Q_max = new MulLNode(B_2, K);
register_new_node(Q_max, entry_control);
Q_max = new AddLNode(Q_max, Q_first);
register_new_node(Q_max, entry_control);
if (scale * stride_con < 0) {
swap(Q_min, Q_max);
}
// Now, mathematically, Q_max > Q_min, and they are close enough so that (Q_max-Q_min) fits in 32 bits.
// L_clamp = Q_min < 0 ? 0 : Q_min
Node* Q_min_cmp = new CmpLNode(Q_min, long_zero);
register_new_node(Q_min_cmp, entry_control);
Node* Q_min_bool = new BoolNode(Q_min_cmp, BoolTest::lt);
register_new_node(Q_min_bool, entry_control);
Node* L_clamp = new CMoveLNode(Q_min_bool, Q_min, long_zero, TypeLong::LONG);
register_new_node(L_clamp, entry_control);
// (This could also be coded bitwise as L_clamp = Q_min & ~(Q_min>>63).)
Node* Q_max_plus_one = new AddLNode(Q_max, long_one);
register_new_node(Q_max_plus_one, entry_control);
// H_clamp = Q_max+1 < Q_min ? max_jlong : Q_max+1
// (Because Q_min and Q_max are close, the overflow check could also be encoded as Q_max+1 < 0 & Q_min >= 0.)
Node* max_jlong_long = _igvn.longcon(max_jlong);
set_ctrl(max_jlong_long, this->C->root());
Node* Q_max_cmp = new CmpLNode(Q_max_plus_one, Q_min);
register_new_node(Q_max_cmp, entry_control);
Node* Q_max_bool = new BoolNode(Q_max_cmp, BoolTest::lt);
register_new_node(Q_max_bool, entry_control);
Node* H_clamp = new CMoveLNode(Q_max_bool, Q_max_plus_one, max_jlong_long, TypeLong::LONG);
register_new_node(H_clamp, entry_control);
// (This could also be coded bitwise as H_clamp = ((Q_max+1)<<1 | M)>>>1 where M = (Q_max+1)>>63 & ~Q_min>>63.)
// R_2 = clamp(R, L_clamp, H_clamp) - L_clamp
// that is: R_2 = clamp(R, L_clamp=0, H_clamp=Q_max) if Q_min < 0
// or else: R_2 = clamp(R, L_clamp, H_clamp) - Q_min if Q_min >= 0
// and also: R_2 = clamp(R, L_clamp, Q_max+1) - L_clamp if Q_min < Q_max+1 (no overflow)
// or else: R_2 = clamp(R, L_clamp, *no limit*)- L_clamp if Q_max+1 < Q_min (overflow)
Node* R_2 = clamp(R, L_clamp, H_clamp);
R_2 = new SubLNode(R_2, L_clamp);
register_new_node(R_2, entry_control);
R_2 = new ConvL2INode(R_2, TypeInt::POS);
register_new_node(R_2, entry_control);
// L_2 = Q_first - L_clamp
// We are subtracting L_clamp from both sides of the <u32 comparison.
// If S*K>0, then Q_first == 0 and the R.C. expression at -L_clamp and steps upward to Q_max-L_clamp.
// If S*K<0, then Q_first != 0 and the R.C. expression starts high and steps downward to Q_min-L_clamp.
Node* L_2 = new SubLNode(Q_first, L_clamp);
register_new_node(L_2, entry_control);
L_2 = new ConvL2INode(L_2, TypeInt::INT);
register_new_node(L_2, entry_control);
// Transform the range check using the computed values L_2/R_2
// from: i*K + L <u64 R
// to: j*K + L_2 <u32 R_2
// that is:
// (j*K + Q_first) - L_clamp <u32 clamp(R, L_clamp, H_clamp) - L_clamp
K = _igvn.intcon(checked_cast<int>(scale));
set_ctrl(K, this->C->root());
Node* scaled_iv = new MulINode(inner_phi, K);
register_new_node(scaled_iv, c);
Node* scaled_iv_plus_offset = new AddINode(scaled_iv, L_2);
register_new_node(scaled_iv_plus_offset, c);
Node* new_rc_cmp = new CmpUNode(scaled_iv_plus_offset, R_2);
register_new_node(new_rc_cmp, c);
_igvn.replace_input_of(rc_bol, 1, new_rc_cmp);
}
}
Node* PhaseIdealLoop::clamp(Node* R, Node* L, Node* H) {
Node* min = MaxNode::signed_min(R, H, TypeLong::LONG, _igvn);
set_subtree_ctrl(min, true);
Node* max = MaxNode::signed_max(L, min, TypeLong::LONG, _igvn);
set_subtree_ctrl(max, true);
return max;
}
LoopNode* PhaseIdealLoop::create_inner_head(IdealLoopTree* loop, BaseCountedLoopNode* head,
IfNode* exit_test) {
LoopNode* new_inner_head = new LoopNode(head->in(1), head->in(2));
IfNode* new_inner_exit = new IfNode(exit_test->in(0), exit_test->in(1), exit_test->_prob, exit_test->_fcnt);
_igvn.register_new_node_with_optimizer(new_inner_head);
_igvn.register_new_node_with_optimizer(new_inner_exit);
loop->_body.push(new_inner_head);
loop->_body.push(new_inner_exit);
loop->_body.yank(head);
loop->_body.yank(exit_test);
set_loop(new_inner_head, loop);
set_loop(new_inner_exit, loop);
set_idom(new_inner_head, idom(head), dom_depth(head));
set_idom(new_inner_exit, idom(exit_test), dom_depth(exit_test));
lazy_replace(head, new_inner_head);
lazy_replace(exit_test, new_inner_exit);
loop->_head = new_inner_head;
return new_inner_head;
}
#ifdef ASSERT
void PhaseIdealLoop::check_counted_loop_shape(IdealLoopTree* loop, Node* x, BasicType bt) {
Node* back_control = loop_exit_control(x, loop);
assert(back_control != NULL, "no back control");
BoolTest::mask mask = BoolTest::illegal;
float cl_prob = 0;
Node* incr = NULL;
Node* limit = NULL;
Node* cmp = loop_exit_test(back_control, loop, incr, limit, mask, cl_prob);
assert(cmp != NULL && cmp->Opcode() == Op_Cmp(bt), "no exit test");
Node* phi_incr = NULL;
incr = loop_iv_incr(incr, x, loop, phi_incr);
assert(incr != NULL && incr->Opcode() == Op_Add(bt), "no incr");
Node* xphi = NULL;
Node* stride = loop_iv_stride(incr, loop, xphi);
assert(stride != NULL, "no stride");
PhiNode* phi = loop_iv_phi(xphi, phi_incr, x, loop);
assert(phi != NULL && phi->in(LoopNode::LoopBackControl) == incr, "No phi");
jlong stride_con = stride->get_integer_as_long(bt);
assert(condition_stride_ok(mask, stride_con), "illegal condition");
assert(mask != BoolTest::ne, "unexpected condition");
assert(phi_incr == NULL, "bad loop shape");
assert(cmp->in(1) == incr, "bad exit test shape");
// Safepoint on backedge not supported
assert(x->in(LoopNode::LoopBackControl)->Opcode() != Op_SafePoint, "no safepoint on backedge");
}
#endif
#ifdef ASSERT
// convert an int counted loop to a long counted to stress handling of
// long counted loops
bool PhaseIdealLoop::convert_to_long_loop(Node* cmp, Node* phi, IdealLoopTree* loop) {
Unique_Node_List iv_nodes;
Node_List old_new;
iv_nodes.push(cmp);
bool failed = false;
for (uint i = 0; i < iv_nodes.size() && !failed; i++) {
Node* n = iv_nodes.at(i);
switch(n->Opcode()) {
case Op_Phi: {
Node* clone = new PhiNode(n->in(0), TypeLong::LONG);
old_new.map(n->_idx, clone);
break;
}
case Op_CmpI: {
Node* clone = new CmpLNode(NULL, NULL);
old_new.map(n->_idx, clone);
break;
}
case Op_AddI: {
Node* clone = new AddLNode(NULL, NULL);
old_new.map(n->_idx, clone);
break;
}
case Op_CastII: {
failed = true;
break;
}
default:
DEBUG_ONLY(n->dump());
fatal("unexpected");
}
for (uint i = 1; i < n->req(); i++) {
Node* in = n->in(i);
if (in == NULL) {
continue;
}
if (loop->is_member(get_loop(get_ctrl(in)))) {
iv_nodes.push(in);
}
}
}
if (failed) {
for (uint i = 0; i < iv_nodes.size(); i++) {
Node* n = iv_nodes.at(i);
Node* clone = old_new[n->_idx];
if (clone != NULL) {
_igvn.remove_dead_node(clone);
}
}
return false;
}
for (uint i = 0; i < iv_nodes.size(); i++) {
Node* n = iv_nodes.at(i);
Node* clone = old_new[n->_idx];
for (uint i = 1; i < n->req(); i++) {
Node* in = n->in(i);
if (in == NULL) {
continue;
}
Node* in_clone = old_new[in->_idx];
if (in_clone == NULL) {
assert(_igvn.type(in)->isa_int(), "");
in_clone = new ConvI2LNode(in);
_igvn.register_new_node_with_optimizer(in_clone);
set_subtree_ctrl(in_clone, false);
}
if (in_clone->in(0) == NULL) {
in_clone->set_req(0, C->top());
clone->set_req(i, in_clone);
in_clone->set_req(0, NULL);
} else {
clone->set_req(i, in_clone);
}
}
_igvn.register_new_node_with_optimizer(clone);
}
set_ctrl(old_new[phi->_idx], phi->in(0));
for (uint i = 0; i < iv_nodes.size(); i++) {
Node* n = iv_nodes.at(i);
Node* clone = old_new[n->_idx];
set_subtree_ctrl(clone, false);
Node* m = n->Opcode() == Op_CmpI ? clone : NULL;
for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
Node* u = n->fast_out(i);
if (iv_nodes.member(u)) {
continue;
}
if (m == NULL) {
m = new ConvL2INode(clone);
_igvn.register_new_node_with_optimizer(m);
set_subtree_ctrl(m, false);
}
_igvn.rehash_node_delayed(u);
int nb = u->replace_edge(n, m, &_igvn);
--i, imax -= nb;
}
}
return true;
}
#endif
//------------------------------is_counted_loop--------------------------------
bool PhaseIdealLoop::is_counted_loop(Node* x, IdealLoopTree*&loop, BasicType iv_bt) {
PhaseGVN *gvn = &_igvn;
Node* back_control = loop_exit_control(x, loop);
if (back_control == NULL) {
return false;
}
BoolTest::mask bt = BoolTest::illegal;
float cl_prob = 0;
Node* incr = NULL;
Node* limit = NULL;
Node* cmp = loop_exit_test(back_control, loop, incr, limit, bt, cl_prob);
if (cmp == NULL || cmp->Opcode() != Op_Cmp(iv_bt)) {
return false; // Avoid pointer & float & 64-bit compares
}
// Trip-counter increment must be commutative & associative.
if (incr->Opcode() == Op_Cast(iv_bt)) {
incr = incr->in(1);
}
Node* phi_incr = NULL;
incr = loop_iv_incr(incr, x, loop, phi_incr);
if (incr == NULL) {
return false;
}
Node* trunc1 = NULL;
Node* trunc2 = NULL;
const TypeInteger* iv_trunc_t = NULL;
Node* orig_incr = incr;
if (!(incr = CountedLoopNode::match_incr_with_optional_truncation(incr, &trunc1, &trunc2, &iv_trunc_t, iv_bt))) {
return false; // Funny increment opcode
}
assert(incr->Opcode() == Op_Add(iv_bt), "wrong increment code");
Node* xphi = NULL;
Node* stride = loop_iv_stride(incr, loop, xphi);
if (stride == NULL) {
return false;
}
if (xphi->Opcode() == Op_Cast(iv_bt)) {
xphi = xphi->in(1);
}
// Stride must be constant
jlong stride_con = stride->get_integer_as_long(iv_bt);
assert(stride_con != 0, "missed some peephole opt");
PhiNode* phi = loop_iv_phi(xphi, phi_incr, x, loop);
if (phi == NULL ||
(trunc1 == NULL && phi->in(LoopNode::LoopBackControl) != incr) ||
(trunc1 != NULL && phi->in(LoopNode::LoopBackControl) != trunc1)) {
return false;
}
Node* iftrue = back_control;
uint iftrue_op = iftrue->Opcode();
Node* iff = iftrue->in(0);
BoolNode* test = iff->in(1)->as_Bool();
const TypeInteger* limit_t = gvn->type(limit)->is_integer(iv_bt);
if (trunc1 != NULL) {
// When there is a truncation, we must be sure that after the truncation
// the trip counter will end up higher than the limit, otherwise we are looking
// at an endless loop. Can happen with range checks.
// Example:
// int i = 0;
// while (true)
// sum + = array[i];
// i++;
// i = i && 0x7fff;
// }
//
// If the array is shorter than 0x8000 this exits through a AIOOB
// - Counted loop transformation is ok
// If the array is longer then this is an endless loop
// - No transformation can be done.
const TypeInteger* incr_t = gvn->type(orig_incr)->is_integer(iv_bt);
if (limit_t->hi_as_long() > incr_t->hi_as_long()) {
--> --------------------
--> maximum size reached
--> --------------------
¤ Dauer der Verarbeitung: 0.89 Sekunden
(vorverarbeitet)
¤
|
Haftungshinweis
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung ist noch experimentell.
|