Quelle memnode.cpp Sprache: C

/*
* Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/

#include "precompiled.hpp"
#include "classfile/javaClasses.hpp"
#include "compiler/compileLog.hpp"
#include "gc/shared/barrierSet.hpp"
#include "gc/shared/c2/barrierSetC2.hpp"
#include "gc/shared/tlab_globals.hpp"
#include "memory/allocation.inline.hpp"
#include "memory/resourceArea.hpp"
#include "oops/objArrayKlass.hpp"
#include "opto/addnode.hpp"
#include "opto/arraycopynode.hpp"
#include "opto/cfgnode.hpp"
#include "opto/regalloc.hpp"
#include "opto/compile.hpp"
#include "opto/connode.hpp"
#include "opto/convertnode.hpp"
#include "opto/loopnode.hpp"
#include "opto/machnode.hpp"
#include "opto/matcher.hpp"
#include "opto/memnode.hpp"
#include "opto/mulnode.hpp"
#include "opto/narrowptrnode.hpp"
#include "opto/phaseX.hpp"
#include "opto/regmask.hpp"
#include "opto/rootnode.hpp"
#include "opto/vectornode.hpp"
#include "utilities/align.hpp"
#include "utilities/copy.hpp"
#include "utilities/macros.hpp"
#include "utilities/powerOfTwo.hpp"
#include "utilities/vmError.hpp"

// Portions of code courtesy of Clifford Click

// Optimization - Graph Style

static Node *step_through_mergemem(PhaseGVN *phase, MergeMemNode *mmem,  const TypePtr *tp, const TypePtr *adr_check, outputStream *st);

//=============================================================================
uint MemNode::size_of() const { return sizeof(*this); }

const TypePtr *MemNode::adr_type() const {
  Node* adr = in(Address);
  if (adr == NULL)  return NULL; // node is dead
  const TypePtr* cross_check = NULL;
  DEBUG_ONLY(cross_check = _adr_type);
  return calculate_adr_type(adr->bottom_type(), cross_check);
}

bool MemNode::check_if_adr_maybe_raw(Node* adr) {
  if (adr != NULL) {
    if (adr->bottom_type()->base() == Type::RawPtr || adr->bottom_type()->base() == Type::AnyPtr) {
      return true;
    }
  }
  return false;
}

#ifndef PRODUCT
void MemNode::dump_spec(outputStream *st) const {
  if (in(Address) == NULL)  return; // node is dead
#ifndef ASSERT
  // fake the missing field
  const TypePtr* _adr_type = NULL;
  if (in(Address) != NULL)
    _adr_type = in(Address)->bottom_type()->isa_ptr();
#endif
  dump_adr_type(this, _adr_type, st);

  Compile* C = Compile::current();
  if (C->alias_type(_adr_type)->is_volatile()) {
    st->print(" Volatile!");
  }
  if (_unaligned_access) {
    st->print(" unaligned");
  }
  if (_mismatched_access) {
    st->print(" mismatched");
  }
  if (_unsafe_access) {
    st->print(" unsafe");
  }
}

void MemNode::dump_adr_type(const Node* mem, const TypePtr* adr_type, outputStream *st) {
  st->print(" @");
  if (adr_type == NULL) {
    st->print("NULL");
  } else {
    adr_type->dump_on(st);
    Compile* C = Compile::current();
    Compile::AliasType* atp = NULL;
    if (C->have_alias_type(adr_type))  atp = C->alias_type(adr_type);
    if (atp == NULL)
      st->print(", idx=?\?;");
    else if (atp->index() == Compile::AliasIdxBot)
      st->print(", idx=Bot;");
    else if (atp->index() == Compile::AliasIdxTop)
      st->print(", idx=Top;");
    else if (atp->index() == Compile::AliasIdxRaw)
      st->print(", idx=Raw;");
    else {
      ciField* field = atp->field();
      if (field) {
        st->print(", name=");
        field->print_name_on(st);
      }
      st->print(", idx=%d;", atp->index());
    }
  }
}

extern void print_alias_types();

#endif

Node *MemNode::optimize_simple_memory_chain(Node *mchain, const TypeOopPtr *t_oop, Node *load, PhaseGVN *phase) {
  assert((t_oop != NULL), "sanity");
  bool is_instance = t_oop->is_known_instance_field();
  bool is_boxed_value_load = t_oop->is_ptr_to_boxed_value() &&
                             (load != NULL) && load->is_Load() &&
                             (phase->is_IterGVN() != NULL);
  if (!(is_instance || is_boxed_value_load))
    return mchain;  // don't try to optimize non-instance types
  uint instance_id = t_oop->instance_id();
  Node *start_mem = phase->C->start()->proj_out_or_null(TypeFunc::Memory);
  Node *prev = NULL;
  Node *result = mchain;
  while (prev != result) {
    prev = result;
    if (result == start_mem)
      break;  // hit one of our sentinels
    // skip over a call which does not affect this memory slice
    if (result->is_Proj() && result->as_Proj()->_con == TypeFunc::Memory) {
      Node *proj_in = result->in(0);
      if (proj_in->is_Allocate() && proj_in->_idx == instance_id) {
        break;  // hit one of our sentinels
      } else if (proj_in->is_Call()) {
        // ArrayCopyNodes processed here as well
        CallNode *call = proj_in->as_Call();
        if (!call->may_modify(t_oop, phase)) { // returns false for instances
          result = call->in(TypeFunc::Memory);
        }
      } else if (proj_in->is_Initialize()) {
        AllocateNode* alloc = proj_in->as_Initialize()->allocation();
        // Stop if this is the initialization for the object instance which
        // contains this memory slice, otherwise skip over it.
        if ((alloc == NULL) || (alloc->_idx == instance_id)) {
          break;
        }
        if (is_instance) {
          result = proj_in->in(TypeFunc::Memory);
        } else if (is_boxed_value_load) {
          Node* klass = alloc->in(AllocateNode::KlassNode);
          const TypeKlassPtr* tklass = phase->type(klass)->is_klassptr();
          if (tklass->klass_is_exact() && !tklass->exact_klass()->equals(t_oop->is_instptr()->exact_klass())) {
            result = proj_in->in(TypeFunc::Memory); // not related allocation
          }
        }
      } else if (proj_in->is_MemBar()) {
        ArrayCopyNode* ac = NULL;
        if (ArrayCopyNode::may_modify(t_oop, proj_in->as_MemBar(), phase, ac)) {
          break;
        }
        result = proj_in->in(TypeFunc::Memory);
      } else {
        assert(false, "unexpected projection");
      }
    } else if (result->is_ClearArray()) {
      if (!is_instance || !ClearArrayNode::step_through(&result, instance_id, phase)) {
        // Can not bypass initialization of the instance
        // we are looking for.
        break;
      }
      // Otherwise skip it (the call updated 'result' value).
    } else if (result->is_MergeMem()) {
      result = step_through_mergemem(phase, result->as_MergeMem(), t_oop, NULL, tty);
    }
  }
  return result;
}

Node *MemNode::optimize_memory_chain(Node *mchain, const TypePtr *t_adr, Node *load, PhaseGVN *phase) {
  const TypeOopPtr* t_oop = t_adr->isa_oopptr();
  if (t_oop == NULL)
    return mchain;  // don't try to optimize non-oop types
  Node* result = optimize_simple_memory_chain(mchain, t_oop, load, phase);
  bool is_instance = t_oop->is_known_instance_field();
  PhaseIterGVN *igvn = phase->is_IterGVN();
  if (is_instance && igvn != NULL && result->is_Phi()) {
    PhiNode *mphi = result->as_Phi();
    assert(mphi->bottom_type() == Type::MEMORY, "memory phi required");
    const TypePtr *t = mphi->adr_type();
    bool do_split = false;
    // In the following cases, Load memory input can be further optimized based on
    // its precise address type
    if (t == TypePtr::BOTTOM || t == TypeRawPtr::BOTTOM ) {
      do_split = true;
    } else if (t->isa_oopptr() && !t->is_oopptr()->is_known_instance()) {
      const TypeOopPtr* mem_t =
        t->is_oopptr()->cast_to_exactness(true)
        ->is_oopptr()->cast_to_ptr_type(t_oop->ptr())
        ->is_oopptr()->cast_to_instance_id(t_oop->instance_id());
      if (t_oop->is_aryptr()) {
        mem_t = mem_t->is_aryptr()
                     ->cast_to_stable(t_oop->is_aryptr()->is_stable())
                     ->cast_to_size(t_oop->is_aryptr()->size())
                     ->with_offset(t_oop->is_aryptr()->offset())
                     ->is_aryptr();
      }
      do_split = mem_t == t_oop;
    }
    if (do_split) {
      // clone the Phi with our address type
      result = mphi->split_out_instance(t_adr, igvn);
    } else {
      assert(phase->C->get_alias_index(t) == phase->C->get_alias_index(t_adr), "correct memory chain");
    }
  }
  return result;
}

static Node *step_through_mergemem(PhaseGVN *phase, MergeMemNode *mmem,  const TypePtr *tp, const TypePtr *adr_check, outputStream *st) {
  uint alias_idx = phase->C->get_alias_index(tp);
  Node *mem = mmem;
#ifdef ASSERT
  {
    // Check that current type is consistent with the alias index used during graph construction
    assert(alias_idx >= Compile::AliasIdxRaw, "must not be a bad alias_idx");
    bool consistent =  adr_check == NULL || adr_check->empty() ||
                       phase->C->must_alias(adr_check, alias_idx );
    // Sometimes dead array references collapse to a[-1], a[-2], or a[-3]
    if( !consistent && adr_check != NULL && !adr_check->empty() &&
               tp->isa_aryptr() &&        tp->offset() == Type::OffsetBot &&
        adr_check->isa_aryptr() && adr_check->offset() != Type::OffsetBot &&
        ( adr_check->offset() == arrayOopDesc::length_offset_in_bytes() ||
          adr_check->offset() == oopDesc::klass_offset_in_bytes() ||
          adr_check->offset() == oopDesc::mark_offset_in_bytes() ) ) {
      // don't assert if it is dead code.
      consistent = true;
    }
    if( !consistent ) {
      st->print("alias_idx==%d, adr_check==", alias_idx);
      if( adr_check == NULL ) {
        st->print("NULL");
      } else {
        adr_check->dump();
      }
      st->cr();
      print_alias_types();
      assert(consistent, "adr_check must match alias idx");
    }
  }
#endif
  // TypeOopPtr::NOTNULL+any is an OOP with unknown offset - generally
  // means an array I have not precisely typed yet.  Do not do any
  // alias stuff with it any time soon.
  const TypeOopPtr *toop = tp->isa_oopptr();
  if (tp->base() != Type::AnyPtr &&
      !(toop &&
        toop->isa_instptr() &&
        toop->is_instptr()->instance_klass()->is_java_lang_Object() &&
        toop->offset() == Type::OffsetBot)) {
    // compress paths and change unreachable cycles to TOP
    // If not, we can update the input infinitely along a MergeMem cycle
    // Equivalent code in PhiNode::Ideal
    Node* m  = phase->transform(mmem);
    // If transformed to a MergeMem, get the desired slice
    // Otherwise the returned node represents memory for every slice
    mem = (m->is_MergeMem())? m->as_MergeMem()->memory_at(alias_idx) : m;
    // Update input if it is progress over what we have now
  }
  return mem;
}

//--------------------------Ideal_common---------------------------------------
// Look for degenerate control and memory inputs.  Bypass MergeMem inputs.
// Unhook non-raw memories from complete (macro-expanded) initializations.
Node *MemNode::Ideal_common(PhaseGVN *phase, bool can_reshape) {
  // If our control input is a dead region, kill all below the region
  Node *ctl = in(MemNode::Control);
  if (ctl && remove_dead_region(phase, can_reshape))
    return this;
  ctl = in(MemNode::Control);
  // Don't bother trying to transform a dead node
  if (ctl && ctl->is_top())  return NodeSentinel;

  PhaseIterGVN *igvn = phase->is_IterGVN();
  // Wait if control on the worklist.
  if (ctl && can_reshape && igvn != NULL) {
    Node* bol = NULL;
    Node* cmp = NULL;
    if (ctl->in(0)->is_If()) {
      assert(ctl->is_IfTrue() || ctl->is_IfFalse(), "sanity");
      bol = ctl->in(0)->in(1);
      if (bol->is_Bool())
        cmp = ctl->in(0)->in(1)->in(1);
    }
    if (igvn->_worklist.member(ctl) ||
        (bol != NULL && igvn->_worklist.member(bol)) ||
        (cmp != NULL && igvn->_worklist.member(cmp)) ) {
      // This control path may be dead.
      // Delay this memory node transformation until the control is processed.
      igvn->_worklist.push(this);
      return NodeSentinel; // caller will return NULL
    }
  }
  // Ignore if memory is dead, or self-loop
  Node *mem = in(MemNode::Memory);
  if (phase->type( mem ) == Type::TOP) return NodeSentinel; // caller will return NULL
  assert(mem != this, "dead loop in MemNode::Ideal");

  if (can_reshape && igvn != NULL && igvn->_worklist.member(mem)) {
    // This memory slice may be dead.
    // Delay this mem node transformation until the memory is processed.
    igvn->_worklist.push(this);
    return NodeSentinel; // caller will return NULL
  }

  Node *address = in(MemNode::Address);
  const Type *t_adr = phase->type(address);
  if (t_adr == Type::TOP)              return NodeSentinel; // caller will return NULL

  if (can_reshape && is_unsafe_access() && (t_adr == TypePtr::NULL_PTR)) {
    // Unsafe off-heap access with zero address. Remove access and other control users
    // to not confuse optimizations and add a HaltNode to fail if this is ever executed.
    assert(ctl != NULL, "unsafe accesses should be control dependent");
    for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) {
      Node* u = ctl->fast_out(i);
      if (u != ctl) {
        igvn->rehash_node_delayed(u);
        int nb = u->replace_edge(ctl, phase->C->top(), igvn);
        --i, imax -= nb;
      }
    }
    Node* frame = igvn->transform(new ParmNode(phase->C->start(), TypeFunc::FramePtr));
    Node* halt = igvn->transform(new HaltNode(ctl, frame, "unsafe off-heap access with zero address"));
    phase->C->root()->add_req(halt);
    return this;
  }

  if (can_reshape && igvn != NULL &&
      (igvn->_worklist.member(address) ||
       (igvn->_worklist.size() > 0 && t_adr != adr_type())) ) {
    // The address's base and type may change when the address is processed.
    // Delay this mem node transformation until the address is processed.
    igvn->_worklist.push(this);
    return NodeSentinel; // caller will return NULL
  }

  // Do NOT remove or optimize the next lines: ensure a new alias index
  // is allocated for an oop pointer type before Escape Analysis.
  // Note: C++ will not remove it since the call has side effect.
  if (t_adr->isa_oopptr()) {
    int alias_idx = phase->C->get_alias_index(t_adr->is_ptr());
  }

  Node* base = NULL;
  if (address->is_AddP()) {
    base = address->in(AddPNode::Base);
  }
  if (base != NULL && phase->type(base)->higher_equal(TypePtr::NULL_PTR) &&
      !t_adr->isa_rawptr()) {
    // Note: raw address has TOP base and top->higher_equal(TypePtr::NULL_PTR) is true.
    // Skip this node optimization if its address has TOP base.
    return NodeSentinel; // caller will return NULL
  }

  // Avoid independent memory operations
  Node* old_mem = mem;

  // The code which unhooks non-raw memories from complete (macro-expanded)
  // initializations was removed. After macro-expansion all stores caught
  // by Initialize node became raw stores and there is no information
  // which memory slices they modify. So it is unsafe to move any memory
  // operation above these stores. Also in most cases hooked non-raw memories
  // were already unhooked by using information from detect_ptr_independence()
  // and find_previous_store().

  if (mem->is_MergeMem()) {
    MergeMemNode* mmem = mem->as_MergeMem();
    const TypePtr *tp = t_adr->is_ptr();

    mem = step_through_mergemem(phase, mmem, tp, adr_type(), tty);
  }

  if (mem != old_mem) {
    set_req_X(MemNode::Memory, mem, phase);
    if (phase->type(mem) == Type::TOP) return NodeSentinel;
    return this;
  }

  // let the subclass continue analyzing...
  return NULL;
}

// Helper function for proving some simple control dominations.
// Attempt to prove that all control inputs of 'dom' dominate 'sub'.
// Already assumes that 'dom' is available at 'sub', and that 'sub'
// is not a constant (dominated by the method's StartNode).
// Used by MemNode::find_previous_store to prove that the
// control input of a memory operation predates (dominates)
// an allocation it wants to look past.
bool MemNode::all_controls_dominate(Node* dom, Node* sub) {
  if (dom == NULL || dom->is_top() || sub == NULL || sub->is_top())
    return false; // Conservative answer for dead code

  // Check 'dom'. Skip Proj and CatchProj nodes.
  dom = dom->find_exact_control(dom);
  if (dom == NULL || dom->is_top())
    return false; // Conservative answer for dead code

  if (dom == sub) {
    // For the case when, for example, 'sub' is Initialize and the original
    // 'dom' is Proj node of the 'sub'.
    return false;
  }

  if (dom->is_Con() || dom->is_Start() || dom->is_Root() || dom == sub)
    return true;

  // 'dom' dominates 'sub' if its control edge and control edges
  // of all its inputs dominate or equal to sub's control edge.

  // Currently 'sub' is either Allocate, Initialize or Start nodes.
  // Or Region for the check in LoadNode::Ideal();
  // 'sub' should have sub->in(0) != NULL.
  assert(sub->is_Allocate() || sub->is_Initialize() || sub->is_Start() ||
         sub->is_Region() || sub->is_Call(), "expecting only these nodes");

  // Get control edge of 'sub'.
  Node* orig_sub = sub;
  sub = sub->find_exact_control(sub->in(0));
  if (sub == NULL || sub->is_top())
    return false; // Conservative answer for dead code

  assert(sub->is_CFG(), "expecting control");

  if (sub == dom)
    return true;

  if (sub->is_Start() || sub->is_Root())
    return false;

  {
    // Check all control edges of 'dom'.

    ResourceMark rm;
    Node_List nlist;
    Unique_Node_List dom_list;

    dom_list.push(dom);
    bool only_dominating_controls = false;

    for (uint next = 0; next < dom_list.size(); next++) {
      Node* n = dom_list.at(next);
      if (n == orig_sub)
        return false; // One of dom's inputs dominated by sub.
      if (!n->is_CFG() && n->pinned()) {
        // Check only own control edge for pinned non-control nodes.
        n = n->find_exact_control(n->in(0));
        if (n == NULL || n->is_top())
          return false; // Conservative answer for dead code
        assert(n->is_CFG(), "expecting control");
        dom_list.push(n);
      } else if (n->is_Con() || n->is_Start() || n->is_Root()) {
        only_dominating_controls = true;
      } else if (n->is_CFG()) {
        if (n->dominates(sub, nlist))
          only_dominating_controls = true;
        else
          return false;
      } else {
        // First, own control edge.
        Node* m = n->find_exact_control(n->in(0));
        if (m != NULL) {
          if (m->is_top())
            return false; // Conservative answer for dead code
          dom_list.push(m);
        }
        // Now, the rest of edges.
        uint cnt = n->req();
        for (uint i = 1; i < cnt; i++) {
          m = n->find_exact_control(n->in(i));
          if (m == NULL || m->is_top())
            continue;
          dom_list.push(m);
        }
      }
    }
    return only_dominating_controls;
  }
}

//---------------------detect_ptr_independence---------------------------------
// Used by MemNode::find_previous_store to prove that two base
// pointers are never equal.
// The pointers are accompanied by their associated allocations,
// if any, which have been previously discovered by the caller.
bool MemNode::detect_ptr_independence(Node* p1, AllocateNode* a1,
                                      Node* p2, AllocateNode* a2,
                                      PhaseTransform* phase) {
  // Attempt to prove that these two pointers cannot be aliased.
  // They may both manifestly be allocations, and they should differ.
  // Or, if they are not both allocations, they can be distinct constants.
  // Otherwise, one is an allocation and the other a pre-existing value.
  if (a1 == NULL && a2 == NULL) {           // neither an allocation
    return (p1 != p2) && p1->is_Con() && p2->is_Con();
  } else if (a1 != NULL && a2 != NULL) {    // both allocations
    return (a1 != a2);
  } else if (a1 != NULL) {                  // one allocation a1
    // (Note:  p2->is_Con implies p2->in(0)->is_Root, which dominates.)
    return all_controls_dominate(p2, a1);
  } else { //(a2 != NULL)                   // one allocation a2
    return all_controls_dominate(p1, a2);
  }
  return false;
}

// Find an arraycopy ac that produces the memory state represented by parameter mem.
// Return ac if
// (a) can_see_stored_value=true  and ac must have set the value for this load or if
// (b) can_see_stored_value=false and ac could have set the value for this load or if
// (c) can_see_stored_value=false and ac cannot have set the value for this load.
// In case (c) change the parameter mem to the memory input of ac to skip it
// when searching stored value.
// Otherwise return NULL.
Node* LoadNode::find_previous_arraycopy(PhaseTransform* phase, Node* ld_alloc, Node*& mem, bool can_see_stored_value) const {
  ArrayCopyNode* ac = find_array_copy_clone(phase, ld_alloc, mem);
  if (ac != NULL) {
    Node* ld_addp = in(MemNode::Address);
    Node* src = ac->in(ArrayCopyNode::Src);
    const TypeAryPtr* ary_t = phase->type(src)->isa_aryptr();

    // This is a load from a cloned array. The corresponding arraycopy ac must
    // have set the value for the load and we can return ac but only if the load
    // is known to be within bounds. This is checked below.
    if (ary_t != NULL && ld_addp->is_AddP()) {
      Node* ld_offs = ld_addp->in(AddPNode::Offset);
      BasicType ary_elem = ary_t->elem()->array_element_basic_type();
      jlong header = arrayOopDesc::base_offset_in_bytes(ary_elem);
      jlong elemsize = type2aelembytes(ary_elem);

      const TypeX*   ld_offs_t = phase->type(ld_offs)->isa_intptr_t();
      const TypeInt* sizetype  = ary_t->size();

      if (ld_offs_t->_lo >= header && ld_offs_t->_hi < (sizetype->_lo * elemsize + header)) {
        // The load is known to be within bounds. It receives its value from ac.
        return ac;
      }
      // The load is known to be out-of-bounds.
    }
    // The load could be out-of-bounds. It must not be hoisted but must remain
    // dependent on the runtime range check. This is achieved by returning NULL.
  } else if (mem->is_Proj() && mem->in(0) != NULL && mem->in(0)->is_ArrayCopy()) {
    ArrayCopyNode* ac = mem->in(0)->as_ArrayCopy();

    if (ac->is_arraycopy_validated() ||
        ac->is_copyof_validated() ||
        ac->is_copyofrange_validated()) {
      Node* ld_addp = in(MemNode::Address);
      if (ld_addp->is_AddP()) {
        Node* ld_base = ld_addp->in(AddPNode::Address);
        Node* ld_offs = ld_addp->in(AddPNode::Offset);

        Node* dest = ac->in(ArrayCopyNode::Dest);

        if (dest == ld_base) {
          const TypeX *ld_offs_t = phase->type(ld_offs)->isa_intptr_t();
          if (ac->modifies(ld_offs_t->_lo, ld_offs_t->_hi, phase, can_see_stored_value)) {
            return ac;
          }
          if (!can_see_stored_value) {
            mem = ac->in(TypeFunc::Memory);
            return ac;
          }
        }
      }
    }
  }
  return NULL;
}

ArrayCopyNode* MemNode::find_array_copy_clone(PhaseTransform* phase, Node* ld_alloc, Node* mem) const {
  if (mem->is_Proj() && mem->in(0) != NULL && (mem->in(0)->Opcode() == Op_MemBarStoreStore ||
                                               mem->in(0)->Opcode() == Op_MemBarCPUOrder)) {
    if (ld_alloc != NULL) {
      // Check if there is an array copy for a clone
      Node* mb = mem->in(0);
      ArrayCopyNode* ac = NULL;
      if (mb->in(0) != NULL && mb->in(0)->is_Proj() &&
          mb->in(0)->in(0) != NULL && mb->in(0)->in(0)->is_ArrayCopy()) {
        ac = mb->in(0)->in(0)->as_ArrayCopy();
      } else {
        // Step over GC barrier when ReduceInitialCardMarks is disabled
        BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
        Node* control_proj_ac = bs->step_over_gc_barrier(mb->in(0));

        if (control_proj_ac->is_Proj() && control_proj_ac->in(0)->is_ArrayCopy()) {
          ac = control_proj_ac->in(0)->as_ArrayCopy();
        }
      }

      if (ac != NULL && ac->is_clonebasic()) {
        AllocateNode* alloc = AllocateNode::Ideal_allocation(ac->in(ArrayCopyNode::Dest), phase);
        if (alloc != NULL && alloc == ld_alloc) {
          return ac;
        }
      }
    }
  }
  return NULL;
}

// The logic for reordering loads and stores uses four steps:
// (a) Walk carefully past stores and initializations which we
//     can prove are independent of this load.
// (b) Observe that the next memory state makes an exact match
//     with self (load or store), and locate the relevant store.
// (c) Ensure that, if we were to wire self directly to the store,
//     the optimizer would fold it up somehow.
// (d) Do the rewiring, and return, depending on some other part of
//     the optimizer to fold up the load.
// This routine handles steps (a) and (b).  Steps (c) and (d) are
// specific to loads and stores, so they are handled by the callers.
// (Currently, only LoadNode::Ideal has steps (c), (d).  More later.)
//
Node* MemNode::find_previous_store(PhaseTransform* phase) {
  Node*         ctrl   = in(MemNode::Control);
  Node*         adr    = in(MemNode::Address);
  intptr_t      offset = 0;
  Node*         base   = AddPNode::Ideal_base_and_offset(adr, phase, offset);
  AllocateNode* alloc  = AllocateNode::Ideal_allocation(base, phase);

  if (offset == Type::OffsetBot)
    return NULL;            // cannot unalias unless there are precise offsets

  const bool adr_maybe_raw = check_if_adr_maybe_raw(adr);
  const TypeOopPtr *addr_t = adr->bottom_type()->isa_oopptr();

  intptr_t size_in_bytes = memory_size();

  Node* mem = in(MemNode::Memory);   // start searching here...

  int cnt = 50;             // Cycle limiter
  for (;;) {                // While we can dance past unrelated stores...
    if (--cnt < 0)  break;  // Caught in cycle or a complicated dance?

    Node* prev = mem;
    if (mem->is_Store()) {
      Node* st_adr = mem->in(MemNode::Address);
      intptr_t st_offset = 0;
      Node* st_base = AddPNode::Ideal_base_and_offset(st_adr, phase, st_offset);
      if (st_base == NULL)
        break;              // inscrutable pointer

      // For raw accesses it's not enough to prove that constant offsets don't intersect.
      // We need the bases to be the equal in order for the offset check to make sense.
      if ((adr_maybe_raw || check_if_adr_maybe_raw(st_adr)) && st_base != base) {
        break;
      }

      if (st_offset != offset && st_offset != Type::OffsetBot) {
        const int MAX_STORE = MAX2(BytesPerLong, (int)MaxVectorSize);
        assert(mem->as_Store()->memory_size() <= MAX_STORE, "");
        if (st_offset >= offset + size_in_bytes ||
            st_offset <= offset - MAX_STORE ||
            st_offset <= offset - mem->as_Store()->memory_size()) {
          // Success:  The offsets are provably independent.
          // (You may ask, why not just test st_offset != offset and be done?
          // The answer is that stores of different sizes can co-exist
          // in the same sequence of RawMem effects.  We sometimes initialize
          // a whole 'tile' of array elements with a single jint or jlong.)
          mem = mem->in(MemNode::Memory);
          continue;           // (a) advance through independent store memory
        }
      }
      if (st_base != base &&
          detect_ptr_independence(base, alloc,
                                  st_base,
                                  AllocateNode::Ideal_allocation(st_base, phase),
                                  phase)) {
        // Success:  The bases are provably independent.
        mem = mem->in(MemNode::Memory);
        continue;           // (a) advance through independent store memory
      }

      // (b) At this point, if the bases or offsets do not agree, we lose,
      // since we have not managed to prove 'this' and 'mem' independent.
      if (st_base == base && st_offset == offset) {
        return mem;         // let caller handle steps (c), (d)
      }

    } else if (mem->is_Proj() && mem->in(0)->is_Initialize()) {
      InitializeNode* st_init = mem->in(0)->as_Initialize();
      AllocateNode*  st_alloc = st_init->allocation();
      if (st_alloc == NULL)
        break;              // something degenerated
      bool known_identical = false;
      bool known_independent = false;
      if (alloc == st_alloc)
        known_identical = true;
      else if (alloc != NULL)
        known_independent = true;
      else if (all_controls_dominate(this, st_alloc))
        known_independent = true;

      if (known_independent) {
        // The bases are provably independent: Either they are
        // manifestly distinct allocations, or else the control
        // of this load dominates the store's allocation.
        int alias_idx = phase->C->get_alias_index(adr_type());
        if (alias_idx == Compile::AliasIdxRaw) {
          mem = st_alloc->in(TypeFunc::Memory);
        } else {
          mem = st_init->memory(alias_idx);
        }
        continue;           // (a) advance through independent store memory
      }

      // (b) at this point, if we are not looking at a store initializing
      // the same allocation we are loading from, we lose.
      if (known_identical) {
        // From caller, can_see_stored_value will consult find_captured_store.
        return mem;         // let caller handle steps (c), (d)
      }

    } else if (find_previous_arraycopy(phase, alloc, mem, false) != NULL) {
      if (prev != mem) {
        // Found an arraycopy but it doesn't affect that load
        continue;
      }
      // Found an arraycopy that may affect that load
      return mem;
    } else if (addr_t != NULL && addr_t->is_known_instance_field()) {
      // Can't use optimize_simple_memory_chain() since it needs PhaseGVN.
      if (mem->is_Proj() && mem->in(0)->is_Call()) {
        // ArrayCopyNodes processed here as well.
        CallNode *call = mem->in(0)->as_Call();
        if (!call->may_modify(addr_t, phase)) {
          mem = call->in(TypeFunc::Memory);
          continue;         // (a) advance through independent call memory
        }
      } else if (mem->is_Proj() && mem->in(0)->is_MemBar()) {
        ArrayCopyNode* ac = NULL;
        if (ArrayCopyNode::may_modify(addr_t, mem->in(0)->as_MemBar(), phase, ac)) {
          break;
        }
        mem = mem->in(0)->in(TypeFunc::Memory);
        continue;           // (a) advance through independent MemBar memory
      } else if (mem->is_ClearArray()) {
        if (ClearArrayNode::step_through(&mem, (uint)addr_t->instance_id(), phase)) {
          // (the call updated 'mem' value)
          continue;         // (a) advance through independent allocation memory
        } else {
          // Can not bypass initialization of the instance
          // we are looking for.
          return mem;
        }
      } else if (mem->is_MergeMem()) {
        int alias_idx = phase->C->get_alias_index(adr_type());
        mem = mem->as_MergeMem()->memory_at(alias_idx);
        continue;           // (a) advance through independent MergeMem memory
      }
    }

    // Unless there is an explicit 'continue', we must bail out here,
    // because 'mem' is an inscrutable memory state (e.g., a call).
    break;
  }

  return NULL;              // bail out
}

//----------------------calculate_adr_type-------------------------------------
// Helper function.  Notices when the given type of address hits top or bottom.
// Also, asserts a cross-check of the type against the expected address type.
const TypePtr* MemNode::calculate_adr_type(const Type* t, const TypePtr* cross_check) {
  if (t == Type::TOP)  return NULL; // does not touch memory any more?
  #ifdef ASSERT
  if (!VerifyAliases || VMError::is_error_reported() || Node::in_dump())  cross_check = NULL;
  #endif
  const TypePtr* tp = t->isa_ptr();
  if (tp == NULL) {
    assert(cross_check == NULL || cross_check == TypePtr::BOTTOM, "expected memory type must be wide");
    return TypePtr::BOTTOM;           // touches lots of memory
  } else {
    #ifdef ASSERT
    // %%%% [phh] We don't check the alias index if cross_check is
    //            TypeRawPtr::BOTTOM.  Needs to be investigated.
    if (cross_check != NULL &&
        cross_check != TypePtr::BOTTOM &&
        cross_check != TypeRawPtr::BOTTOM) {
      // Recheck the alias index, to see if it has changed (due to a bug).
      Compile* C = Compile::current();
      assert(C->get_alias_index(cross_check) == C->get_alias_index(tp),
             "must stay in the original alias category");
      // The type of the address must be contained in the adr_type,
      // disregarding "null"-ness.
      // (We make an exception for TypeRawPtr::BOTTOM, which is a bit bucket.)
      const TypePtr* tp_notnull = tp->join(TypePtr::NOTNULL)->is_ptr();
      assert(cross_check->meet(tp_notnull) == cross_check->remove_speculative(),
             "real address must not escape from expected memory type");
    }
    #endif
    return tp;
  }
}

//=============================================================================
// Should LoadNode::Ideal() attempt to remove control edges?
bool LoadNode::can_remove_control() const {
  return !has_pinned_control_dependency();
}
uint LoadNode::size_of() const { return sizeof(*this); }
bool LoadNode::cmp( const Node &n ) const
{ return !Type::cmp( _type, ((LoadNode&)n)._type ); }
const Type *LoadNode::bottom_type() const { return _type; }
uint LoadNode::ideal_reg() const {
  return _type->ideal_reg();
}

#ifndef PRODUCT
void LoadNode::dump_spec(outputStream *st) const {
  MemNode::dump_spec(st);
  if( !Verbose && !WizardMode ) {
    // standard dump does this in Verbose and WizardMode
    st->print(" #"); _type->dump_on(st);
  }
  if (!depends_only_on_test()) {
    st->print(" (does not depend only on test, ");
    if (control_dependency() == UnknownControl) {
      st->print("unknown control");
    } else if (control_dependency() == Pinned) {
      st->print("pinned");
    } else if (adr_type() == TypeRawPtr::BOTTOM) {
      st->print("raw access");
    } else {
      st->print("unknown reason");
    }
    st->print(")");
  }
}
#endif

#ifdef ASSERT
//----------------------------is_immutable_value-------------------------------
// Helper function to allow a raw load without control edge for some cases
bool LoadNode::is_immutable_value(Node* adr) {
  if (adr->is_AddP() && adr->in(AddPNode::Base)->is_top() &&
      adr->in(AddPNode::Address)->Opcode() == Op_ThreadLocal) {

    jlong offset = adr->in(AddPNode::Offset)->find_intptr_t_con(-1);
    int offsets[] = {
      in_bytes(JavaThread::osthread_offset()),
      in_bytes(JavaThread::threadObj_offset()),
      in_bytes(JavaThread::vthread_offset()),
      in_bytes(JavaThread::scopedValueCache_offset()),
    };

    for (size_t i = 0; i < sizeof offsets / sizeof offsets[0]; i++) {
      if (offset == offsets[i]) {
        return true;
      }
    }
  }

  return false;
}
#endif

//----------------------------LoadNode::make-----------------------------------
// Polymorphic factory method:
Node* LoadNode::make(PhaseGVN& gvn, Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, const Type* rt, BasicType bt, MemOrd mo,
                     ControlDependency control_dependency, bool require_atomic_access, bool unaligned, bool mismatched, bool unsafe, uint8_t barrier_data) {
  Compile* C = gvn.C;

  // sanity check the alias category against the created node type
  assert(!(adr_type->isa_oopptr() &&
           adr_type->offset() == oopDesc::klass_offset_in_bytes()),
         "use LoadKlassNode instead");
  assert(!(adr_type->isa_aryptr() &&
           adr_type->offset() == arrayOopDesc::length_offset_in_bytes()),
         "use LoadRangeNode instead");
  // Check control edge of raw loads
  assert( ctl != NULL || C->get_alias_index(adr_type) != Compile::AliasIdxRaw ||
          // oop will be recorded in oop map if load crosses safepoint
          rt->isa_oopptr() || is_immutable_value(adr),
          "raw memory operations should have control edge");
  LoadNode* load = NULL;
  switch (bt) {
  case T_BOOLEAN: load = new LoadUBNode(ctl, mem, adr, adr_type, rt->is_int(),  mo, control_dependency); break;
  case T_BYTE:    load = new LoadBNode (ctl, mem, adr, adr_type, rt->is_int(),  mo, control_dependency); break;
  case T_INT:     load = new LoadINode (ctl, mem, adr, adr_type, rt->is_int(),  mo, control_dependency); break;
  case T_CHAR:    load = new LoadUSNode(ctl, mem, adr, adr_type, rt->is_int(),  mo, control_dependency); break;
  case T_SHORT:   load = new LoadSNode (ctl, mem, adr, adr_type, rt->is_int(),  mo, control_dependency); break;
  case T_LONG:    load = new LoadLNode (ctl, mem, adr, adr_type, rt->is_long(), mo, control_dependency, require_atomic_access); break;
  case T_FLOAT:   load = new LoadFNode (ctl, mem, adr, adr_type, rt,            mo, control_dependency); break;
  case T_DOUBLE:  load = new LoadDNode (ctl, mem, adr, adr_type, rt,            mo, control_dependency, require_atomic_access); break;
  case T_ADDRESS: load = new LoadPNode (ctl, mem, adr, adr_type, rt->is_ptr(),  mo, control_dependency); break;
  case T_OBJECT:
#ifdef _LP64
    if (adr->bottom_type()->is_ptr_to_narrowoop()) {
      load = new LoadNNode(ctl, mem, adr, adr_type, rt->make_narrowoop(), mo, control_dependency);
    } else
#endif
    {
      assert(!adr->bottom_type()->is_ptr_to_narrowoop() && !adr->bottom_type()->is_ptr_to_narrowklass(), "should have got back a narrow oop");
      load = new LoadPNode(ctl, mem, adr, adr_type, rt->is_ptr(), mo, control_dependency);
    }
    break;
  default:
    ShouldNotReachHere();
    break;
  }
  assert(load != NULL, "LoadNode should have been created");
  if (unaligned) {
    load->set_unaligned_access();
  }
  if (mismatched) {
    load->set_mismatched_access();
  }
  if (unsafe) {
    load->set_unsafe_access();
  }
  load->set_barrier_data(barrier_data);
  if (load->Opcode() == Op_LoadN) {
    Node* ld = gvn.transform(load);
    return new DecodeNNode(ld, ld->bottom_type()->make_ptr());
  }

  return load;
}

//------------------------------hash-------------------------------------------
uint LoadNode::hash() const {
  // unroll addition of interesting fields
  return (uintptr_t)in(Control) + (uintptr_t)in(Memory) + (uintptr_t)in(Address);
}

static bool skip_through_membars(Compile::AliasType* atp, const TypeInstPtr* tp, bool eliminate_boxing) {
  if ((atp != NULL) && (atp->index() >= Compile::AliasIdxRaw)) {
    bool non_volatile = (atp->field() != NULL) && !atp->field()->is_volatile();
    bool is_stable_ary = FoldStableValues &&
                         (tp != NULL) && (tp->isa_aryptr() != NULL) &&
                         tp->isa_aryptr()->is_stable();

    return (eliminate_boxing && non_volatile) || is_stable_ary;
  }

  return false;
}

// Is the value loaded previously stored by an arraycopy? If so return
// a load node that reads from the source array so we may be able to
// optimize out the ArrayCopy node later.
Node* LoadNode::can_see_arraycopy_value(Node* st, PhaseGVN* phase) const {
  Node* ld_adr = in(MemNode::Address);
  intptr_t ld_off = 0;
  AllocateNode* ld_alloc = AllocateNode::Ideal_allocation(ld_adr, phase, ld_off);
  Node* ac = find_previous_arraycopy(phase, ld_alloc, st, true);
  if (ac != NULL) {
    assert(ac->is_ArrayCopy(), "what kind of node can this be?");

    Node* mem = ac->in(TypeFunc::Memory);
    Node* ctl = ac->in(0);
    Node* src = ac->in(ArrayCopyNode::Src);

    if (!ac->as_ArrayCopy()->is_clonebasic() && !phase->type(src)->isa_aryptr()) {
      return NULL;
    }

    LoadNode* ld = clone()->as_Load();
    Node* addp = in(MemNode::Address)->clone();
    if (ac->as_ArrayCopy()->is_clonebasic()) {
      assert(ld_alloc != NULL, "need an alloc");
      assert(addp->is_AddP(), "address must be addp");
      BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
      assert(bs->step_over_gc_barrier(addp->in(AddPNode::Base)) == bs->step_over_gc_barrier(ac->in(ArrayCopyNode::Dest)), "strange pattern");
      assert(bs->step_over_gc_barrier(addp->in(AddPNode::Address)) == bs->step_over_gc_barrier(ac->in(ArrayCopyNode::Dest)), "strange pattern");
      addp->set_req(AddPNode::Base, src);
      addp->set_req(AddPNode::Address, src);
    } else {
      assert(ac->as_ArrayCopy()->is_arraycopy_validated() ||
             ac->as_ArrayCopy()->is_copyof_validated() ||
             ac->as_ArrayCopy()->is_copyofrange_validated(), "only supported cases");
      assert(addp->in(AddPNode::Base) == addp->in(AddPNode::Address), "should be");
      addp->set_req(AddPNode::Base, src);
      addp->set_req(AddPNode::Address, src);

      const TypeAryPtr* ary_t = phase->type(in(MemNode::Address))->isa_aryptr();
      BasicType ary_elem = ary_t->isa_aryptr()->elem()->array_element_basic_type();
      if (is_reference_type(ary_elem, true)) ary_elem = T_OBJECT;

      uint header = arrayOopDesc::base_offset_in_bytes(ary_elem);
      uint shift  = exact_log2(type2aelembytes(ary_elem));

      Node* diff = phase->transform(new SubINode(ac->in(ArrayCopyNode::SrcPos), ac->in(ArrayCopyNode::DestPos)));
#ifdef _LP64
      diff = phase->transform(new ConvI2LNode(diff));
#endif
      diff = phase->transform(new LShiftXNode(diff, phase->intcon(shift)));

      Node* offset = phase->transform(new AddXNode(addp->in(AddPNode::Offset), diff));
      addp->set_req(AddPNode::Offset, offset);
    }
    addp = phase->transform(addp);
#ifdef ASSERT
    const TypePtr* adr_type = phase->type(addp)->is_ptr();
    ld->_adr_type = adr_type;
#endif
    ld->set_req(MemNode::Address, addp);
    ld->set_req(0, ctl);
    ld->set_req(MemNode::Memory, mem);
    // load depends on the tests that validate the arraycopy
    ld->_control_dependency = UnknownControl;
    return ld;
  }
  return NULL;
}

//---------------------------can_see_stored_value------------------------------
// This routine exists to make sure this set of tests is done the same
// everywhere.  We need to make a coordinated change: first LoadNode::Ideal
// will change the graph shape in a way which makes memory alive twice at the
// same time (uses the Oracle model of aliasing), then some
// LoadXNode::Identity will fold things back to the equivalence-class model
// of aliasing.
Node* MemNode::can_see_stored_value(Node* st, PhaseTransform* phase) const {
  Node* ld_adr = in(MemNode::Address);
  intptr_t ld_off = 0;
  Node* ld_base = AddPNode::Ideal_base_and_offset(ld_adr, phase, ld_off);
  Node* ld_alloc = AllocateNode::Ideal_allocation(ld_base, phase);
  const TypeInstPtr* tp = phase->type(ld_adr)->isa_instptr();
  Compile::AliasType* atp = (tp != NULL) ? phase->C->alias_type(tp) : NULL;
  // This is more general than load from boxing objects.
  if (skip_through_membars(atp, tp, phase->C->eliminate_boxing())) {
    uint alias_idx = atp->index();
    Node* result = NULL;
    Node* current = st;
    // Skip through chains of MemBarNodes checking the MergeMems for
    // new states for the slice of this load.  Stop once any other
    // kind of node is encountered.  Loads from final memory can skip
    // through any kind of MemBar but normal loads shouldn't skip
    // through MemBarAcquire since the could allow them to move out of
    // a synchronized region. It is not safe to step over MemBarCPUOrder,
    // because alias info above them may be inaccurate (e.g., due to
    // mixed/mismatched unsafe accesses).
    bool is_final_mem = !atp->is_rewritable();
    while (current->is_Proj()) {
      int opc = current->in(0)->Opcode();
      if ((is_final_mem && (opc == Op_MemBarAcquire ||
                            opc == Op_MemBarAcquireLock ||
                            opc == Op_LoadFence)) ||
          opc == Op_MemBarRelease ||
          opc == Op_StoreFence ||
          opc == Op_MemBarReleaseLock ||
          opc == Op_MemBarStoreStore ||
          opc == Op_StoreStoreFence) {
        Node* mem = current->in(0)->in(TypeFunc::Memory);
        if (mem->is_MergeMem()) {
          MergeMemNode* merge = mem->as_MergeMem();
          Node* new_st = merge->memory_at(alias_idx);
          if (new_st == merge->base_memory()) {
            // Keep searching
            current = new_st;
            continue;
          }
          // Save the new memory state for the slice and fall through
          // to exit.
          result = new_st;
        }
      }
      break;
    }
    if (result != NULL) {
      st = result;
    }
  }

  // Loop around twice in the case Load -> Initialize -> Store.
  // (See PhaseIterGVN::add_users_to_worklist, which knows about this case.)
  for (int trip = 0; trip <= 1; trip++) {

    if (st->is_Store()) {
      Node* st_adr = st->in(MemNode::Address);
      if (st_adr != ld_adr) {
        // Try harder before giving up. Unify base pointers with casts (e.g., raw/non-raw pointers).
        intptr_t st_off = 0;
        Node* st_base = AddPNode::Ideal_base_and_offset(st_adr, phase, st_off);
        if (ld_base == NULL)                                   return NULL;
        if (st_base == NULL)                                   return NULL;
        if (!ld_base->eqv_uncast(st_base, /*keep_deps=*/true)) return NULL;
        if (ld_off != st_off)                                  return NULL;
        if (ld_off == Type::OffsetBot)                         return NULL;
        // Same base, same offset.
        // Possible improvement for arrays: check index value instead of absolute offset.

        // At this point we have proven something like this setup:
        //   B = << base >>
        //   L =  LoadQ(AddP(Check/CastPP(B), #Off))
        //   S = StoreQ(AddP(             B , #Off), V)
        // (Actually, we haven't yet proven the Q's are the same.)
        // In other words, we are loading from a casted version of
        // the same pointer-and-offset that we stored to.
        // Casted version may carry a dependency and it is respected.
        // Thus, we are able to replace L by V.
      }
      // Now prove that we have a LoadQ matched to a StoreQ, for some Q.
      if (store_Opcode() != st->Opcode()) {
        return NULL;
      }
      // LoadVector/StoreVector needs additional check to ensure the types match.
      if (st->is_StoreVector()) {
        const TypeVect*  in_vt = st->as_StoreVector()->vect_type();
        const TypeVect* out_vt = as_LoadVector()->vect_type();
        if (in_vt != out_vt) {
          return NULL;
        }
      }
      return st->in(MemNode::ValueIn);
    }

    // A load from a freshly-created object always returns zero.
    // (This can happen after LoadNode::Ideal resets the load's memory input
    // to find_captured_store, which returned InitializeNode::zero_memory.)
    if (st->is_Proj() && st->in(0)->is_Allocate() &&
        (st->in(0) == ld_alloc) &&
        (ld_off >= st->in(0)->as_Allocate()->minimum_header_size())) {
      // return a zero value for the load's basic type
      // (This is one of the few places where a generic PhaseTransform
      // can create new nodes.  Think of it as lazily manifesting
      // virtually pre-existing constants.)
      if (memory_type() != T_VOID) {
        if (ReduceBulkZeroing || find_array_copy_clone(phase, ld_alloc, in(MemNode::Memory)) == NULL) {
          // If ReduceBulkZeroing is disabled, we need to check if the allocation does not belong to an
          // ArrayCopyNode clone. If it does, then we cannot assume zero since the initialization is done
          // by the ArrayCopyNode.
          return phase->zerocon(memory_type());
        }
      } else {
        // TODO: materialize all-zero vector constant
        assert(!isa_Load() || as_Load()->type()->isa_vect(), "");
      }
    }

    // A load from an initialization barrier can match a captured store.
    if (st->is_Proj() && st->in(0)->is_Initialize()) {
      InitializeNode* init = st->in(0)->as_Initialize();
      AllocateNode* alloc = init->allocation();
      if ((alloc != NULL) && (alloc == ld_alloc)) {
        // examine a captured store value
        st = init->find_captured_store(ld_off, memory_size(), phase);
        if (st != NULL) {
          continue;             // take one more trip around
        }
      }
    }

    // Load boxed value from result of valueOf() call is input parameter.
    if (this->is_Load() && ld_adr->is_AddP() &&
        (tp != NULL) && tp->is_ptr_to_boxed_value()) {
      intptr_t ignore = 0;
      Node* base = AddPNode::Ideal_base_and_offset(ld_adr, phase, ignore);
      BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
      base = bs->step_over_gc_barrier(base);
      if (base != NULL && base->is_Proj() &&
          base->as_Proj()->_con == TypeFunc::Parms &&
          base->in(0)->is_CallStaticJava() &&
          base->in(0)->as_CallStaticJava()->is_boxing_method()) {
        return base->in(0)->in(TypeFunc::Parms);
      }
    }

    break;
  }

  return NULL;
}

//----------------------is_instance_field_load_with_local_phi------------------
bool LoadNode::is_instance_field_load_with_local_phi(Node* ctrl) {
  if( in(Memory)->is_Phi() && in(Memory)->in(0) == ctrl &&
      in(Address)->is_AddP() ) {
    const TypeOopPtr* t_oop = in(Address)->bottom_type()->isa_oopptr();
    // Only instances and boxed values.
    if( t_oop != NULL &&
        (t_oop->is_ptr_to_boxed_value() ||
         t_oop->is_known_instance_field()) &&
        t_oop->offset() != Type::OffsetBot &&
        t_oop->offset() != Type::OffsetTop) {
      return true;
    }
  }
  return false;
}

//------------------------------Identity---------------------------------------
// Loads are identity if previous store is to same address
Node* LoadNode::Identity(PhaseGVN* phase) {
  // If the previous store-maker is the right kind of Store, and the store is
  // to the same address, then we are equal to the value stored.
  Node* mem = in(Memory);
  Node* value = can_see_stored_value(mem, phase);
  if( value ) {
    // byte, short & char stores truncate naturally.
    // A load has to load the truncated value which requires
    // some sort of masking operation and that requires an
    // Ideal call instead of an Identity call.
    if (memory_size() < BytesPerInt) {
      // If the input to the store does not fit with the load's result type,
      // it must be truncated via an Ideal call.
      if (!phase->type(value)->higher_equal(phase->type(this)))
        return this;
    }
    // (This works even when value is a Con, but LoadNode::Value
    // usually runs first, producing the singleton type of the Con.)
    if (!has_pinned_control_dependency() || value->is_Con()) {
      return value;
    } else {
      return this;
    }
  }

  if (has_pinned_control_dependency()) {
    return this;
  }
  // Search for an existing data phi which was generated before for the same
  // instance's field to avoid infinite generation of phis in a loop.
  Node *region = mem->in(0);
  if (is_instance_field_load_with_local_phi(region)) {
    const TypeOopPtr *addr_t = in(Address)->bottom_type()->isa_oopptr();
    int this_index  = phase->C->get_alias_index(addr_t);
    int this_offset = addr_t->offset();
    int this_iid    = addr_t->instance_id();
    if (!addr_t->is_known_instance() &&
         addr_t->is_ptr_to_boxed_value()) {
      // Use _idx of address base (could be Phi node) for boxed values.
      intptr_t   ignore = 0;
      Node*      base = AddPNode::Ideal_base_and_offset(in(Address), phase, ignore);
      if (base == NULL) {
        return this;
      }
      this_iid = base->_idx;
    }
    const Type* this_type = bottom_type();
    for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
      Node* phi = region->fast_out(i);
      if (phi->is_Phi() && phi != mem &&
          phi->as_Phi()->is_same_inst_field(this_type, (int)mem->_idx, this_iid, this_index, this_offset)) {
        return phi;
      }
    }
  }

  return this;
}

// Construct an equivalent unsigned load.
Node* LoadNode::convert_to_unsigned_load(PhaseGVN& gvn) {
  BasicType bt = T_ILLEGAL;
  const Type* rt = NULL;
  switch (Opcode()) {
    case Op_LoadUB: return this;
    case Op_LoadUS: return this;
    case Op_LoadB: bt = T_BOOLEAN; rt = TypeInt::UBYTE; break;
    case Op_LoadS: bt = T_CHAR;    rt = TypeInt::CHAR;  break;
    default:
      assert(false, "no unsigned variant: %s", Name());
      return NULL;
  }
  return LoadNode::make(gvn, in(MemNode::Control), in(MemNode::Memory), in(MemNode::Address),
                        raw_adr_type(), rt, bt, _mo, _control_dependency,
                        false /*require_atomic_access*/, is_unaligned_access(), is_mismatched_access());
}

// Construct an equivalent signed load.
Node* LoadNode::convert_to_signed_load(PhaseGVN& gvn) {
  BasicType bt = T_ILLEGAL;
  const Type* rt = NULL;
  switch (Opcode()) {
    case Op_LoadUB: bt = T_BYTE;  rt = TypeInt::BYTE;  break;
    case Op_LoadUS: bt = T_SHORT; rt = TypeInt::SHORT; break;
    case Op_LoadB: // fall through
    case Op_LoadS: // fall through
    case Op_LoadI: // fall through
    case Op_LoadL: return this;
    default:
      assert(false, "no signed variant: %s", Name());
      return NULL;
  }
  return LoadNode::make(gvn, in(MemNode::Control), in(MemNode::Memory), in(MemNode::Address),
                        raw_adr_type(), rt, bt, _mo, _control_dependency,
                        false /*require_atomic_access*/, is_unaligned_access(), is_mismatched_access());
}

bool LoadNode::has_reinterpret_variant(const Type* rt) {
  BasicType bt = rt->basic_type();
  switch (Opcode()) {
    case Op_LoadI: return (bt == T_FLOAT);
    case Op_LoadL: return (bt == T_DOUBLE);
    case Op_LoadF: return (bt == T_INT);
    case Op_LoadD: return (bt == T_LONG);

    default: return false;
  }
}

Node* LoadNode::convert_to_reinterpret_load(PhaseGVN& gvn, const Type* rt) {
  BasicType bt = rt->basic_type();
  assert(has_reinterpret_variant(rt), "no reinterpret variant: %s %s", Name(), type2name(bt));
  bool is_mismatched = is_mismatched_access();
  const TypeRawPtr* raw_type = gvn.type(in(MemNode::Memory))->isa_rawptr();
  if (raw_type == NULL) {
    is_mismatched = true; // conservatively match all non-raw accesses as mismatched
  }
  const int op = Opcode();
  bool require_atomic_access = (op == Op_LoadL && ((LoadLNode*)this)->require_atomic_access()) ||
                               (op == Op_LoadD && ((LoadDNode*)this)->require_atomic_access());
  return LoadNode::make(gvn, in(MemNode::Control), in(MemNode::Memory), in(MemNode::Address),
                        raw_adr_type(), rt, bt, _mo, _control_dependency,
                        require_atomic_access, is_unaligned_access(), is_mismatched);
}

bool StoreNode::has_reinterpret_variant(const Type* vt) {
  BasicType bt = vt->basic_type();
  switch (Opcode()) {
    case Op_StoreI: return (bt == T_FLOAT);
    case Op_StoreL: return (bt == T_DOUBLE);
    case Op_StoreF: return (bt == T_INT);
    case Op_StoreD: return (bt == T_LONG);

    default: return false;
  }
}

Node* StoreNode::convert_to_reinterpret_store(PhaseGVN& gvn, Node* val, const Type* vt) {
  BasicType bt = vt->basic_type();
  assert(has_reinterpret_variant(vt), "no reinterpret variant: %s %s", Name(), type2name(bt));
  const int op = Opcode();
  bool require_atomic_access = (op == Op_StoreL && ((StoreLNode*)this)->require_atomic_access()) ||
                               (op == Op_StoreD && ((StoreDNode*)this)->require_atomic_access());
  StoreNode* st = StoreNode::make(gvn, in(MemNode::Control), in(MemNode::Memory), in(MemNode::Address),
                                  raw_adr_type(), val, bt, _mo, require_atomic_access);

  bool is_mismatched = is_mismatched_access();
  const TypeRawPtr* raw_type = gvn.type(in(MemNode::Memory))->isa_rawptr();
  if (raw_type == NULL) {
    is_mismatched = true; // conservatively match all non-raw accesses as mismatched
  }
  if (is_mismatched) {
    st->set_mismatched_access();
  }
  return st;
}

// We're loading from an object which has autobox behaviour.
// If this object is result of a valueOf call we'll have a phi
// merging a newly allocated object and a load from the cache.
// We want to replace this load with the original incoming
// argument to the valueOf call.
Node* LoadNode::eliminate_autobox(PhaseIterGVN* igvn) {
  assert(igvn->C->eliminate_boxing(), "sanity");
  intptr_t ignore = 0;
  Node* base = AddPNode::Ideal_base_and_offset(in(Address), igvn, ignore);
  if ((base == NULL) || base->is_Phi()) {
    // Push the loads from the phi that comes from valueOf up
    // through it to allow elimination of the loads and the recovery
    // of the original value. It is done in split_through_phi().
    return NULL;
  } else if (base->is_Load() ||
             (base->is_DecodeN() && base->in(1)->is_Load())) {
    // Eliminate the load of boxed value for integer types from the cache
    // array by deriving the value from the index into the array.
    // Capture the offset of the load and then reverse the computation.

    // Get LoadN node which loads a boxing object from 'cache' array.
    if (base->is_DecodeN()) {
      base = base->in(1);
    }
    if (!base->in(Address)->is_AddP()) {
      return NULL; // Complex address
    }
    AddPNode* address = base->in(Address)->as_AddP();
    Node* cache_base = address->in(AddPNode::Base);
    if ((cache_base != NULL) && cache_base->is_DecodeN()) {
      // Get ConP node which is static 'cache' field.
      cache_base = cache_base->in(1);
    }
    if ((cache_base != NULL) && cache_base->is_Con()) {
      const TypeAryPtr* base_type = cache_base->bottom_type()->isa_aryptr();
      if ((base_type != NULL) && base_type->is_autobox_cache()) {
        Node* elements[4];
        int shift = exact_log2(type2aelembytes(T_OBJECT));
        int count = address->unpack_offsets(elements, ARRAY_SIZE(elements));
        if (count > 0 && elements[0]->is_Con() &&
            (count == 1 ||
             (count == 2 && elements[1]->Opcode() == Op_LShiftX &&
                            elements[1]->in(2) == igvn->intcon(shift)))) {
          ciObjArray* array = base_type->const_oop()->as_obj_array();
          // Fetch the box object cache[0] at the base of the array and get its value
          ciInstance* box = array->obj_at(0)->as_instance();
          ciInstanceKlass* ik = box->klass()->as_instance_klass();
          assert(ik->is_box_klass(), "sanity");
          assert(ik->nof_nonstatic_fields() == 1, "change following code");
          if (ik->nof_nonstatic_fields() == 1) {
            // This should be true nonstatic_field_at requires calling
            // nof_nonstatic_fields so check it anyway
            ciConstant c = box->field_value(ik->nonstatic_field_at(0));
            BasicType bt = c.basic_type();
            // Only integer types have boxing cache.
            assert(bt == T_BOOLEAN || bt == T_CHAR  ||
                   bt == T_BYTE    || bt == T_SHORT ||
                   bt == T_INT     || bt == T_LONG, "wrong type = %s", type2name(bt));
            jlong cache_low = (bt == T_LONG) ? c.as_long() : c.as_int();
            if (cache_low != (int)cache_low) {
              return NULL; // should not happen since cache is array indexed by value
            }
            jlong offset = arrayOopDesc::base_offset_in_bytes(T_OBJECT) - (cache_low << shift);
            if (offset != (int)offset) {
              return NULL; // should not happen since cache is array indexed by value
            }
           // Add up all the offsets making of the address of the load
            Node* result = elements[0];
            for (int i = 1; i < count; i++) {
              result = igvn->transform(new AddXNode(result, elements[i]));
            }
            // Remove the constant offset from the address and then
            result = igvn->transform(new AddXNode(result, igvn->MakeConX(-(int)offset)));
            // remove the scaling of the offset to recover the original index.
            if (result->Opcode() == Op_LShiftX && result->in(2) == igvn->intcon(shift)) {
              // Peel the shift off directly but wrap it in a dummy node
              // since Ideal can't return existing nodes
              igvn->_worklist.push(result); // remove dead node later
              result = new RShiftXNode(result->in(1), igvn->intcon(0));
            } else if (result->is_Add() && result->in(2)->is_Con() &&
                       result->in(1)->Opcode() == Op_LShiftX &&
                       result->in(1)->in(2) == igvn->intcon(shift)) {
              // We can't do general optimization: ((X<<Z) + Y) >> Z ==> X + (Y>>Z)
              // but for boxing cache access we know that X<<Z will not overflow
              // (there is range check) so we do this optimizatrion by hand here.
              igvn->_worklist.push(result); // remove dead node later
              Node* add_con = new RShiftXNode(result->in(2), igvn->intcon(shift));
              result = new AddXNode(result->in(1)->in(1), igvn->transform(add_con));
            } else {
              result = new RShiftXNode(result, igvn->intcon(shift));
            }
#ifdef _LP64
            if (bt != T_LONG) {
              result = new ConvL2INode(igvn->transform(result));
            }
#else
            if (bt == T_LONG) {
              result = new ConvI2LNode(igvn->transform(result));
            }
#endif
            // Boxing/unboxing can be done from signed & unsigned loads (e.g. LoadUB -> ... -> LoadB pair).
            // Need to preserve unboxing load type if it is unsigned.
            switch(this->Opcode()) {
              case Op_LoadUB:
                result = new AndINode(igvn->transform(result), igvn->intcon(0xFF));
                break;
              case Op_LoadUS:
                result = new AndINode(igvn->transform(result), igvn->intcon(0xFFFF));
                break;
            }
            return result;
          }
        }
      }
    }
  }
  return NULL;
}

static bool stable_phi(PhiNode* phi, PhaseGVN *phase) {
  Node* region = phi->in(0);
  if (region == NULL) {
    return false; // Wait stable graph
  }
  uint cnt = phi->req();
  for (uint i = 1; i < cnt; i++) {
    Node* rc = region->in(i);
    if (rc == NULL || phase->type(rc) == Type::TOP)
      return false; // Wait stable graph
    Node* in = phi->in(i);
    if (in == NULL || phase->type(in) == Type::TOP)
      return false; // Wait stable graph
  }
  return true;
}
//------------------------------split_through_phi------------------------------
// Split instance or boxed field load through Phi.
Node* LoadNode::split_through_phi(PhaseGVN* phase) {
  if (req() > 3) {
    assert(is_LoadVector() && Opcode() != Op_LoadVector, "load has too many inputs");
    // LoadVector subclasses such as LoadVectorMasked have extra inputs that the logic below doesn't take into account
    return NULL;
  }
  Node* mem     = in(Memory);
  Node* address = in(Address);
  const TypeOopPtr *t_oop = phase->type(address)->isa_oopptr();

  assert((t_oop != NULL) &&
         (t_oop->is_known_instance_field() ||
          t_oop->is_ptr_to_boxed_value()), "invalid conditions");

  Compile* C = phase->C;
  intptr_t ignore = 0;
  Node*    base = AddPNode::Ideal_base_and_offset(address, phase, ignore);
  bool base_is_phi = (base != NULL) && base->is_Phi();
  bool load_boxed_values = t_oop->is_ptr_to_boxed_value() && C->aggressive_unboxing() &&
                           (base != NULL) && (base == address->in(AddPNode::Base)) &&
                           phase->type(base)->higher_equal(TypePtr::NOTNULL);

  if (!((mem->is_Phi() || base_is_phi) &&
        (load_boxed_values || t_oop->is_known_instance_field()))) {
    return NULL; // memory is not Phi
  }

  if (mem->is_Phi()) {
    if (!stable_phi(mem->as_Phi(), phase)) {
      return NULL; // Wait stable graph
    }
    uint cnt = mem->req();
    // Check for loop invariant memory.
    if (cnt == 3) {
      for (uint i = 1; i < cnt; i++) {
        Node* in = mem->in(i);
        Node*  m = optimize_memory_chain(in, t_oop, this, phase);
        if (m == mem) {
          if (i == 1) {
            // if the first edge was a loop, check second edge too.
            // If both are replaceable - we are in an infinite loop
            Node *n = optimize_memory_chain(mem->in(2), t_oop, this, phase);
            if (n == mem) {
              break;
            }
          }
          set_req(Memory, mem->in(cnt - i));
          return this; // made change
        }
      }
    }
  }
  if (base_is_phi) {
    if (!stable_phi(base->as_Phi(), phase)) {
      return NULL; // Wait stable graph
    }
    uint cnt = base->req();
    // Check for loop invariant memory.
    if (cnt == 3) {
      for (uint i = 1; i < cnt; i++) {
        if (base->in(i) == base) {
          return NULL; // Wait stable graph
        }
      }
    }
  }

--> --------------------

--> maximum size reached

--> --------------------

Messung V0.5

¤ Dauer der Verarbeitung: 0.11 Sekunden (vorverarbeitet) ¤

Wurzel

Suchen

Beweissystem der NASA

Beweissystem Isabelle

NIST Cobol Testsuite

Cephes Mathematical Library

Wiener Entwicklungsmethode

Haftungshinweis

Die Informationen auf dieser Webseite wurden nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit, noch Qualität der bereit gestellten Informationen zugesichert.

Bemerkung:

Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.