Quellcode-Bibliothek

^© Kompilation durch diese Firma

[Weder Korrektheit noch Funktionsfähigkeit der Software werden zugesichert.]

Datei: g1ConcurrentRefine.cpp Sprache: Unknown

/*
* Copyright (c) 2001, 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/

#include "precompiled.hpp"
#include "gc/g1/g1BarrierSet.hpp"
#include "gc/g1/g1CollectionSet.hpp"
#include "gc/g1/g1ConcurrentRefine.hpp"
#include "gc/g1/g1ConcurrentRefineThread.hpp"
#include "gc/g1/g1DirtyCardQueue.hpp"
#include "gc/g1/g1Policy.hpp"
#include "gc/g1/heapRegion.inline.hpp"
#include "gc/g1/heapRegionRemSet.inline.hpp"
#include "gc/shared/gc_globals.hpp"
#include "logging/log.hpp"
#include "memory/allocation.inline.hpp"
#include "memory/iterator.hpp"
#include "runtime/java.hpp"
#include "runtime/mutexLocker.hpp"
#include "utilities/debug.hpp"
#include "utilities/globalDefinitions.hpp"
#include <math.h>

G1ConcurrentRefineThread* G1ConcurrentRefineThreadControl::create_refinement_thread(uint worker_id, bool initializing) {
  G1ConcurrentRefineThread* result = nullptr;
  if (initializing || !InjectGCWorkerCreationFailure) {
    result = G1ConcurrentRefineThread::create(_cr, worker_id);
  }
  if (result == nullptr || result->osthread() == nullptr) {
    log_warning(gc)("Failed to create refinement thread %u, no more %s",
                    worker_id,
                    result == nullptr ? "memory" : "OS threads");
    if (result != nullptr) {
      delete result;
      result = nullptr;
    }
  }
  return result;
}

G1ConcurrentRefineThreadControl::G1ConcurrentRefineThreadControl() :
  _cr(nullptr),
  _threads(nullptr),
  _max_num_threads(0)
{}

G1ConcurrentRefineThreadControl::~G1ConcurrentRefineThreadControl() {
  if (_threads != nullptr) {
    for (uint i = 0; i < _max_num_threads; i++) {
      G1ConcurrentRefineThread* t = _threads[i];
      if (t == nullptr) {
#ifdef ASSERT
        for (uint j = i + 1; j < _max_num_threads; ++j) {
          assert(_threads[j] == nullptr, "invariant");
        }
#endif // ASSERT
        break;
      } else {
        delete t;
      }
    }
    FREE_C_HEAP_ARRAY(G1ConcurrentRefineThread*, _threads);
  }
}

jint G1ConcurrentRefineThreadControl::initialize(G1ConcurrentRefine* cr, uint max_num_threads) {
  assert(cr != NULL, "G1ConcurrentRefine must not be NULL");
  _cr = cr;
  _max_num_threads = max_num_threads;

  if (max_num_threads > 0) {
    _threads = NEW_C_HEAP_ARRAY(G1ConcurrentRefineThread*, max_num_threads, mtGC);

    _threads[0] = create_refinement_thread(0, true);
    if (_threads[0] == nullptr) {
      vm_shutdown_during_initialization("Could not allocate primary refinement thread");
      return JNI_ENOMEM;
    }

    if (UseDynamicNumberOfGCThreads) {
      for (uint i = 1; i < max_num_threads; ++i) {
        _threads[i] = nullptr;
      }
    } else {
      for (uint i = 1; i < max_num_threads; ++i) {
        _threads[i] = create_refinement_thread(i, true);
        if (_threads[i] == nullptr) {
          vm_shutdown_during_initialization("Could not allocate refinement threads.");
          return JNI_ENOMEM;
        }
      }
    }
  }

  return JNI_OK;
}

#ifdef ASSERT
void G1ConcurrentRefineThreadControl::assert_current_thread_is_primary_refinement_thread() const {
  assert(_threads != nullptr, "No threads");
  assert(Thread::current() == _threads[0], "Not primary thread");
}
#endif // ASSERT

bool G1ConcurrentRefineThreadControl::activate(uint worker_id) {
  assert(worker_id < _max_num_threads, "precondition");
  G1ConcurrentRefineThread* thread_to_activate = _threads[worker_id];
  if (thread_to_activate == nullptr) {
    thread_to_activate = create_refinement_thread(worker_id, false);
    if (thread_to_activate == nullptr) {
      return false;
    }
    _threads[worker_id] = thread_to_activate;
  }
  thread_to_activate->activate();
  return true;
}

void G1ConcurrentRefineThreadControl::worker_threads_do(ThreadClosure* tc) {
  for (uint i = 0; i < _max_num_threads; i++) {
    if (_threads[i] != NULL) {
      tc->do_thread(_threads[i]);
    }
  }
}

void G1ConcurrentRefineThreadControl::stop() {
  for (uint i = 0; i < _max_num_threads; i++) {
    if (_threads[i] != NULL) {
      _threads[i]->stop();
    }
  }
}

uint64_t G1ConcurrentRefine::adjust_threads_period_ms() const {
  // Instead of a fixed value, this could be a command line option.  But then
  // we might also want to allow configuration of adjust_threads_wait_ms().
  return 50;
}

static size_t minimum_pending_cards_target() {
  // One buffer per thread.
  return ParallelGCThreads * G1UpdateBufferSize;
}

G1ConcurrentRefine::G1ConcurrentRefine(G1Policy* policy) :
  _policy(policy),
  _threads_wanted(0),
  _pending_cards_target(PendingCardsTargetUninitialized),
  _last_adjust(),
  _needs_adjust(false),
  _threads_needed(policy, adjust_threads_period_ms()),
  _thread_control(),
  _dcqs(G1BarrierSet::dirty_card_queue_set())
{}

jint G1ConcurrentRefine::initialize() {
  return _thread_control.initialize(this, max_num_threads());
}

G1ConcurrentRefine* G1ConcurrentRefine::create(G1Policy* policy, jint* ecode) {
  G1ConcurrentRefine* cr = new G1ConcurrentRefine(policy);
  *ecode = cr->initialize();
  if (*ecode != 0) {
    delete cr;
    cr = nullptr;
  }
  return cr;
}

void G1ConcurrentRefine::stop() {
  _thread_control.stop();
}

G1ConcurrentRefine::~G1ConcurrentRefine() {
}

void G1ConcurrentRefine::threads_do(ThreadClosure *tc) {
  _thread_control.worker_threads_do(tc);
}

uint G1ConcurrentRefine::max_num_threads() {
  return G1ConcRefinementThreads;
}

void G1ConcurrentRefine::update_pending_cards_target(double logged_cards_time_ms,
                                                     size_t processed_logged_cards,
                                                     size_t predicted_thread_buffer_cards,
                                                     double goal_ms) {
  size_t minimum = minimum_pending_cards_target();
  if ((processed_logged_cards < minimum) || (logged_cards_time_ms == 0.0)) {
    log_debug(gc, ergo, refine)("Unchanged pending cards target: %zu",
                                _pending_cards_target);
    return;
  }

  // Base the pending cards budget on the measured rate.
  double rate = processed_logged_cards / logged_cards_time_ms;
  size_t budget = static_cast<size_t>(goal_ms * rate);
  // Deduct predicted cards in thread buffers to get target.
  size_t new_target = budget - MIN2(budget, predicted_thread_buffer_cards);
  // Add some hysteresis with previous values.
  if (is_pending_cards_target_initialized()) {
    new_target = (new_target + _pending_cards_target) / 2;
  }
  // Apply minimum target.
  new_target = MAX2(new_target, minimum_pending_cards_target());
  _pending_cards_target = new_target;
  log_debug(gc, ergo, refine)("New pending cards target: %zu", new_target);
}

void G1ConcurrentRefine::adjust_after_gc(double logged_cards_time_ms,
                                         size_t processed_logged_cards,
                                         size_t predicted_thread_buffer_cards,
                                         double goal_ms) {
  if (!G1UseConcRefinement) return;

  update_pending_cards_target(logged_cards_time_ms,
                              processed_logged_cards,
                              predicted_thread_buffer_cards,
                              goal_ms);
  if (_thread_control.max_num_threads() == 0) {
    // If no refinement threads then the mutator threshold is the target.
    _dcqs.set_mutator_refinement_threshold(_pending_cards_target);
  } else {
    // Provisionally make the mutator threshold unlimited, to be updated by
    // the next periodic adjustment.  Because card state may have changed
    // drastically, record that adjustment is needed and kick the primary
    // thread, in case it is waiting.
    _dcqs.set_mutator_refinement_threshold(SIZE_MAX);
    _needs_adjust = true;
    if (is_pending_cards_target_initialized()) {
      _thread_control.activate(0);
    }
  }
}

// Wake up the primary thread less frequently when the time available until
// the next GC is longer.  But don't increase the wait time too rapidly.
// This reduces the number of primary thread wakeups that just immediately
// go back to waiting, while still being responsive to behavior changes.
static uint64_t compute_adjust_wait_time_ms(double available_ms) {
  return static_cast<uint64_t>(sqrt(available_ms) * 4.0);
}

uint64_t G1ConcurrentRefine::adjust_threads_wait_ms() const {
  assert_current_thread_is_primary_refinement_thread();
  if (is_pending_cards_target_initialized()) {
    double available_ms = _threads_needed.predicted_time_until_next_gc_ms();
    uint64_t wait_time_ms = compute_adjust_wait_time_ms(available_ms);
    return MAX2(wait_time_ms, adjust_threads_period_ms());
  } else {
    // If target not yet initialized then wait forever (until explicitly
    // activated).  This happens during startup, when we don't bother with
    // refinement.
    return 0;
  }
}

class G1ConcurrentRefine::RemSetSamplingClosure : public HeapRegionClosure {
  G1CollectionSet* _cset;
  size_t _sampled_rs_length;

public:
  explicit RemSetSamplingClosure(G1CollectionSet* cset) :
    _cset(cset), _sampled_rs_length(0) {}

  bool do_heap_region(HeapRegion* r) override {
    size_t rs_length = r->rem_set()->occupied();
    _sampled_rs_length += rs_length;
    return false;
  }

  size_t sampled_rs_length() const { return _sampled_rs_length; }
};

// Adjust the target length (in regions) of the young gen, based on the the
// current length of the remembered sets.
//
// At the end of the GC G1 determines the length of the young gen based on
// how much time the next GC can take, and when the next GC may occur
// according to the MMU.
//
// The assumption is that a significant part of the GC is spent on scanning
// the remembered sets (and many other components), so this thread constantly
// reevaluates the prediction for the remembered set scanning costs, and potentially
// resizes the young gen. This may do a premature GC or even increase the young
// gen size to keep pause time length goal.
void G1ConcurrentRefine::adjust_young_list_target_length() {
  if (_policy->use_adaptive_young_list_length()) {
    G1CollectionSet* cset = G1CollectedHeap::heap()->collection_set();
    RemSetSamplingClosure cl{cset};
    cset->iterate(&cl);
    _policy->revise_young_list_target_length(cl.sampled_rs_length());
  }
}

bool G1ConcurrentRefine::adjust_threads_periodically() {
  assert_current_thread_is_primary_refinement_thread();

  // Check whether it's time to do a periodic adjustment.
  if (!_needs_adjust) {
    Tickspan since_adjust = Ticks::now() - _last_adjust;
    if (since_adjust.milliseconds() >= adjust_threads_period_ms()) {
      _needs_adjust = true;
    }
  }

  // If needed, try to adjust threads wanted.
  if (_needs_adjust) {
    // Getting used young bytes requires holding Heap_lock.  But we can't use
    // normal lock and block until available.  Blocking on the lock could
    // deadlock with a GC VMOp that is holding the lock and requesting a
    // safepoint.  Instead try to lock, and if fail then skip adjustment for
    // this iteration of the thread, do some refinement work, and retry the
    // adjustment later.
    if (Heap_lock->try_lock()) {
      size_t used_bytes = _policy->estimate_used_young_bytes_locked();
      Heap_lock->unlock();
      adjust_young_list_target_length();
      size_t young_bytes = _policy->young_list_target_length() * HeapRegion::GrainBytes;
      size_t available_bytes = young_bytes - MIN2(young_bytes, used_bytes);
      adjust_threads_wanted(available_bytes);
      _needs_adjust = false;
      _last_adjust = Ticks::now();
      return true;
    }
  }

  return false;
}

bool G1ConcurrentRefine::is_in_last_adjustment_period() const {
  return _threads_needed.predicted_time_until_next_gc_ms() <= adjust_threads_period_ms();
}

void G1ConcurrentRefine::adjust_threads_wanted(size_t available_bytes) {
  assert_current_thread_is_primary_refinement_thread();
  size_t num_cards = _dcqs.num_cards();
  size_t mutator_threshold = SIZE_MAX;
  uint old_wanted = Atomic::load(&_threads_wanted);

  _threads_needed.update(old_wanted,
                         available_bytes,
                         num_cards,
                         _pending_cards_target);
  uint new_wanted = _threads_needed.threads_needed();
  if (new_wanted > _thread_control.max_num_threads()) {
    // If running all the threads can't reach goal, turn on refinement by
    // mutator threads.  Using target as the threshold may be stronger
    // than required, but will do the most to get us under goal, and we'll
    // reevaluate with the next adjustment.
    mutator_threshold = _pending_cards_target;
    new_wanted = _thread_control.max_num_threads();
  } else if (is_in_last_adjustment_period()) {
    // If very little time remains until GC, enable mutator refinement.  If
    // the target has been reached, this keeps the number of pending cards on
    // target even if refinement threads deactivate in the meantime.  And if
    // the target hasn't been reached, this prevents things from getting
    // worse.
    mutator_threshold = _pending_cards_target;
  }
  Atomic::store(&_threads_wanted, new_wanted);
  _dcqs.set_mutator_refinement_threshold(mutator_threshold);
  log_debug(gc, refine)("Concurrent refinement: wanted %u, cards: %zu, "
                        "predicted: %zu, time: %1.2fms",
                        new_wanted,
                        num_cards,
                        _threads_needed.predicted_cards_at_next_gc(),
                        _threads_needed.predicted_time_until_next_gc_ms());
  // Activate newly wanted threads.  The current thread is the primary
  // refinement thread, so is already active.
  for (uint i = MAX2(old_wanted, 1u); i < new_wanted; ++i) {
    if (!_thread_control.activate(i)) {
      // Failed to allocate and activate thread.  Stop trying to activate, and
      // instead use mutator threads to make up the gap.
      Atomic::store(&_threads_wanted, i);
      _dcqs.set_mutator_refinement_threshold(_pending_cards_target);
      break;
    }
  }
}

void G1ConcurrentRefine::reduce_threads_wanted() {
  assert_current_thread_is_primary_refinement_thread();
  if (!_needs_adjust) {         // Defer if adjustment request is active.
    uint wanted = Atomic::load(&_threads_wanted);
    if (wanted > 0) {
      Atomic::store(&_threads_wanted, --wanted);
    }
    // If very little time remains until GC, enable mutator refinement.  If
    // the target has been reached, this keeps the number of pending cards on
    // target even as refinement threads deactivate in the meantime.
    if (is_in_last_adjustment_period()) {
      _dcqs.set_mutator_refinement_threshold(_pending_cards_target);
    }
  }
}

bool G1ConcurrentRefine::is_thread_wanted(uint worker_id) const {
  return worker_id < Atomic::load(&_threads_wanted);
}

bool G1ConcurrentRefine::is_thread_adjustment_needed() const {
  assert_current_thread_is_primary_refinement_thread();
  return _needs_adjust;
}

void G1ConcurrentRefine::record_thread_adjustment_needed() {
  assert_current_thread_is_primary_refinement_thread();
  _needs_adjust = true;
}

G1ConcurrentRefineStats G1ConcurrentRefine::get_and_reset_refinement_stats() {
  struct CollectStats : public ThreadClosure {
    G1ConcurrentRefineStats _total_stats;
    virtual void do_thread(Thread* t) {
      G1ConcurrentRefineThread* crt = static_cast<G1ConcurrentRefineThread*>(t);
      G1ConcurrentRefineStats& stats = *crt->refinement_stats();
      _total_stats += stats;
      stats.reset();
    }
  } collector;
  threads_do(&collector);
  return collector._total_stats;
}

uint G1ConcurrentRefine::worker_id_offset() {
  return G1DirtyCardQueueSet::num_par_ids();
}

bool G1ConcurrentRefine::try_refinement_step(uint worker_id,
                                             size_t stop_at,
                                             G1ConcurrentRefineStats* stats) {
  uint adjusted_id = worker_id + worker_id_offset();
  return _dcqs.refine_completed_buffer_concurrently(adjusted_id, stop_at, stats);
}

Quellcode-Bibliothek

© Kompilation durch diese Firma

[Weder Korrektheit noch Funktionsfähigkeit der Software werden zugesichert.]

Datei: g1ConcurrentRefine.cpp Sprache: Unknown

[ Dauer der Verarbeitung: 0.4 Sekunden (vorverarbeitet) ]

^© Kompilation durch diese Firma