/*
* Copyright (c) 2001, 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#include "precompiled.hpp"
#include "gc/g1/g1BarrierSet.hpp"
#include "gc/g1/g1CollectionSet.hpp"
#include "gc/g1/g1ConcurrentRefine.hpp"
#include "gc/g1/g1ConcurrentRefineThread.hpp"
#include "gc/g1/g1DirtyCardQueue.hpp"
#include "gc/g1/g1Policy.hpp"
#include "gc/g1/heapRegion.inline.hpp"
#include "gc/g1/heapRegionRemSet.inline.hpp"
#include "gc/shared/gc_globals.hpp"
#include "logging/log.hpp"
#include "memory/allocation.inline.hpp"
#include "memory/iterator.hpp"
#include "runtime/java.hpp"
#include "runtime/mutexLocker.hpp"
#include "utilities/debug.hpp"
#include "utilities/globalDefinitions.hpp"
#include <math.h>
G1ConcurrentRefineThread* G1ConcurrentRefineThreadControl::create_refinement_thread(uint worker_id, bool initializing) {
G1ConcurrentRefineThread* result = nullptr;
if (initializing || !InjectGCWorkerCreationFailure) {
result = G1ConcurrentRefineThread::create(_cr, worker_id);
}
if (result == nullptr || result->osthread() == nullptr) {
log_warning(gc)("Failed to create refinement thread %u, no more %s",
worker_id,
result == nullptr ? "memory" : "OS threads");
if (result != nullptr) {
delete result;
result = nullptr;
}
}
return result;
}
G1ConcurrentRefineThreadControl::G1ConcurrentRefineThreadControl() :
_cr(nullptr),
_threads(nullptr),
_max_num_threads(0)
{}
G1ConcurrentRefineThreadControl::~G1ConcurrentRefineThreadControl() {
if (_threads != nullptr) {
for (uint i = 0; i < _max_num_threads; i++) {
G1ConcurrentRefineThread* t = _threads[i];
if (t == nullptr) {
#ifdef ASSERT
for (uint j = i + 1; j < _max_num_threads; ++j) {
assert(_threads[j] == nullptr, "invariant");
}
#endif // ASSERT
break;
} else {
delete t;
}
}
FREE_C_HEAP_ARRAY(G1ConcurrentRefineThread*, _threads);
}
}
jint G1ConcurrentRefineThreadControl::initialize(G1ConcurrentRefine* cr, uint max_num_threads) {
assert(cr != NULL, "G1ConcurrentRefine must not be NULL");
_cr = cr;
_max_num_threads = max_num_threads;
if (max_num_threads > 0) {
_threads = NEW_C_HEAP_ARRAY(G1ConcurrentRefineThread*, max_num_threads, mtGC);
_threads[0] = create_refinement_thread(0, true);
if (_threads[0] == nullptr) {
vm_shutdown_during_initialization("Could not allocate primary refinement thread");
return JNI_ENOMEM;
}
if (UseDynamicNumberOfGCThreads) {
for (uint i = 1; i < max_num_threads; ++i) {
_threads[i] = nullptr;
}
} else {
for (uint i = 1; i < max_num_threads; ++i) {
_threads[i] = create_refinement_thread(i, true);
if (_threads[i] == nullptr) {
vm_shutdown_during_initialization("Could not allocate refinement threads.");
return JNI_ENOMEM;
}
}
}
}
return JNI_OK;
}
#ifdef ASSERT
void G1ConcurrentRefineThreadControl::assert_current_thread_is_primary_refinement_thread() const {
assert(_threads != nullptr, "No threads");
assert(Thread::current() == _threads[0], "Not primary thread");
}
#endif // ASSERT
bool G1ConcurrentRefineThreadControl::activate(uint worker_id) {
assert(worker_id < _max_num_threads, "precondition");
G1ConcurrentRefineThread* thread_to_activate = _threads[worker_id];
if (thread_to_activate == nullptr) {
thread_to_activate = create_refinement_thread(worker_id, false);
if (thread_to_activate == nullptr) {
return false;
}
_threads[worker_id] = thread_to_activate;
}
thread_to_activate->activate();
return true;
}
void G1ConcurrentRefineThreadControl::worker_threads_do(ThreadClosure* tc) {
for (uint i = 0; i < _max_num_threads; i++) {
if (_threads[i] != NULL) {
tc->do_thread(_threads[i]);
}
}
}
void G1ConcurrentRefineThreadControl::stop() {
for (uint i = 0; i < _max_num_threads; i++) {
if (_threads[i] != NULL) {
_threads[i]->stop();
}
}
}
uint64_t G1ConcurrentRefine::adjust_threads_period_ms() const {
// Instead of a fixed value, this could be a command line option. But then
// we might also want to allow configuration of adjust_threads_wait_ms().
return 50;
}
static size_t minimum_pending_cards_target() {
// One buffer per thread.
return ParallelGCThreads * G1UpdateBufferSize;
}
G1ConcurrentRefine::G1ConcurrentRefine(G1Policy* policy) :
_policy(policy),
_threads_wanted(0),
_pending_cards_target(PendingCardsTargetUninitialized),
_last_adjust(),
_needs_adjust(false),
_threads_needed(policy, adjust_threads_period_ms()),
_thread_control(),
_dcqs(G1BarrierSet::dirty_card_queue_set())
{}
jint G1ConcurrentRefine::initialize() {
return _thread_control.initialize(this, max_num_threads());
}
G1ConcurrentRefine* G1ConcurrentRefine::create(G1Policy* policy, jint* ecode) {
G1ConcurrentRefine* cr = new G1ConcurrentRefine(policy);
*ecode = cr->initialize();
if (*ecode != 0) {
delete cr;
cr = nullptr;
}
return cr;
}
void G1ConcurrentRefine::stop() {
_thread_control.stop();
}
G1ConcurrentRefine::~G1ConcurrentRefine() {
}
void G1ConcurrentRefine::threads_do(ThreadClosure *tc) {
_thread_control.worker_threads_do(tc);
}
uint G1ConcurrentRefine::max_num_threads() {
return G1ConcRefinementThreads;
}
void G1ConcurrentRefine::update_pending_cards_target(double logged_cards_time_ms,
size_t processed_logged_cards,
size_t predicted_thread_buffer_cards,
double goal_ms) {
size_t minimum = minimum_pending_cards_target();
if ((processed_logged_cards < minimum) || (logged_cards_time_ms == 0.0)) {
log_debug(gc, ergo, refine)("Unchanged pending cards target: %zu",
_pending_cards_target);
return;
}
// Base the pending cards budget on the measured rate.
double rate = processed_logged_cards / logged_cards_time_ms;
size_t budget = static_cast<size_t>(goal_ms * rate);
// Deduct predicted cards in thread buffers to get target.
size_t new_target = budget - MIN2(budget, predicted_thread_buffer_cards);
// Add some hysteresis with previous values.
if (is_pending_cards_target_initialized()) {
new_target = (new_target + _pending_cards_target) / 2;
}
// Apply minimum target.
new_target = MAX2(new_target, minimum_pending_cards_target());
_pending_cards_target = new_target;
log_debug(gc, ergo, refine)("New pending cards target: %zu", new_target);
}
void G1ConcurrentRefine::adjust_after_gc(double logged_cards_time_ms,
size_t processed_logged_cards,
size_t predicted_thread_buffer_cards,
double goal_ms) {
if (!G1UseConcRefinement) return;
update_pending_cards_target(logged_cards_time_ms,
processed_logged_cards,
predicted_thread_buffer_cards,
goal_ms);
if (_thread_control.max_num_threads() == 0) {
// If no refinement threads then the mutator threshold is the target.
_dcqs.set_mutator_refinement_threshold(_pending_cards_target);
} else {
// Provisionally make the mutator threshold unlimited, to be updated by
// the next periodic adjustment. Because card state may have changed
// drastically, record that adjustment is needed and kick the primary
// thread, in case it is waiting.
_dcqs.set_mutator_refinement_threshold(SIZE_MAX);
_needs_adjust = true;
if (is_pending_cards_target_initialized()) {
_thread_control.activate(0);
}
}
}
// Wake up the primary thread less frequently when the time available until
// the next GC is longer. But don't increase the wait time too rapidly.
// This reduces the number of primary thread wakeups that just immediately
// go back to waiting, while still being responsive to behavior changes.
static uint64_t compute_adjust_wait_time_ms(double available_ms) {
return static_cast<uint64_t>(sqrt(available_ms) * 4.0);
}
uint64_t G1ConcurrentRefine::adjust_threads_wait_ms() const {
assert_current_thread_is_primary_refinement_thread();
if (is_pending_cards_target_initialized()) {
double available_ms = _threads_needed.predicted_time_until_next_gc_ms();
uint64_t wait_time_ms = compute_adjust_wait_time_ms(available_ms);
return MAX2(wait_time_ms, adjust_threads_period_ms());
} else {
// If target not yet initialized then wait forever (until explicitly
// activated). This happens during startup, when we don't bother with
// refinement.
return 0;
}
}
class G1ConcurrentRefine::RemSetSamplingClosure : public HeapRegionClosure {
G1CollectionSet* _cset;
size_t _sampled_rs_length;
public:
explicit RemSetSamplingClosure(G1CollectionSet* cset) :
_cset(cset), _sampled_rs_length(0) {}
bool do_heap_region(HeapRegion* r) override {
size_t rs_length = r->rem_set()->occupied();
_sampled_rs_length += rs_length;
return false;
}
size_t sampled_rs_length() const { return _sampled_rs_length; }
};
// Adjust the target length (in regions) of the young gen, based on the the
// current length of the remembered sets.
//
// At the end of the GC G1 determines the length of the young gen based on
// how much time the next GC can take, and when the next GC may occur
// according to the MMU.
//
// The assumption is that a significant part of the GC is spent on scanning
// the remembered sets (and many other components), so this thread constantly
// reevaluates the prediction for the remembered set scanning costs, and potentially
// resizes the young gen. This may do a premature GC or even increase the young
// gen size to keep pause time length goal.
void G1ConcurrentRefine::adjust_young_list_target_length() {
if (_policy->use_adaptive_young_list_length()) {
G1CollectionSet* cset = G1CollectedHeap::heap()->collection_set();
RemSetSamplingClosure cl{cset};
cset->iterate(&cl);
_policy->revise_young_list_target_length(cl.sampled_rs_length());
}
}
bool G1ConcurrentRefine::adjust_threads_periodically() {
assert_current_thread_is_primary_refinement_thread();
// Check whether it's time to do a periodic adjustment.
if (!_needs_adjust) {
Tickspan since_adjust = Ticks::now() - _last_adjust;
if (since_adjust.milliseconds() >= adjust_threads_period_ms()) {
_needs_adjust = true;
}
}
// If needed, try to adjust threads wanted.
if (_needs_adjust) {
// Getting used young bytes requires holding Heap_lock. But we can't use
// normal lock and block until available. Blocking on the lock could
// deadlock with a GC VMOp that is holding the lock and requesting a
// safepoint. Instead try to lock, and if fail then skip adjustment for
// this iteration of the thread, do some refinement work, and retry the
// adjustment later.
if (Heap_lock->try_lock()) {
size_t used_bytes = _policy->estimate_used_young_bytes_locked();
Heap_lock->unlock();
adjust_young_list_target_length();
size_t young_bytes = _policy->young_list_target_length() * HeapRegion::GrainBytes;
size_t available_bytes = young_bytes - MIN2(young_bytes, used_bytes);
adjust_threads_wanted(available_bytes);
_needs_adjust = false;
_last_adjust = Ticks::now();
return true;
}
}
return false;
}
bool G1ConcurrentRefine::is_in_last_adjustment_period() const {
return _threads_needed.predicted_time_until_next_gc_ms() <= adjust_threads_period_ms();
}
void G1ConcurrentRefine::adjust_threads_wanted(size_t available_bytes) {
assert_current_thread_is_primary_refinement_thread();
size_t num_cards = _dcqs.num_cards();
size_t mutator_threshold = SIZE_MAX;
uint old_wanted = Atomic::load(&_threads_wanted);
_threads_needed.update(old_wanted,
available_bytes,
num_cards,
_pending_cards_target);
uint new_wanted = _threads_needed.threads_needed();
if (new_wanted > _thread_control.max_num_threads()) {
// If running all the threads can't reach goal, turn on refinement by
// mutator threads. Using target as the threshold may be stronger
// than required, but will do the most to get us under goal, and we'll
// reevaluate with the next adjustment.
mutator_threshold = _pending_cards_target;
new_wanted = _thread_control.max_num_threads();
} else if (is_in_last_adjustment_period()) {
// If very little time remains until GC, enable mutator refinement. If
// the target has been reached, this keeps the number of pending cards on
// target even if refinement threads deactivate in the meantime. And if
// the target hasn't been reached, this prevents things from getting
// worse.
mutator_threshold = _pending_cards_target;
}
Atomic::store(&_threads_wanted, new_wanted);
_dcqs.set_mutator_refinement_threshold(mutator_threshold);
log_debug(gc, refine)("Concurrent refinement: wanted %u, cards: %zu, "
"predicted: %zu, time: %1.2fms",
new_wanted,
num_cards,
_threads_needed.predicted_cards_at_next_gc(),
_threads_needed.predicted_time_until_next_gc_ms());
// Activate newly wanted threads. The current thread is the primary
// refinement thread, so is already active.
for (uint i = MAX2(old_wanted, 1u); i < new_wanted; ++i) {
if (!_thread_control.activate(i)) {
// Failed to allocate and activate thread. Stop trying to activate, and
// instead use mutator threads to make up the gap.
Atomic::store(&_threads_wanted, i);
_dcqs.set_mutator_refinement_threshold(_pending_cards_target);
break;
}
}
}
void G1ConcurrentRefine::reduce_threads_wanted() {
assert_current_thread_is_primary_refinement_thread();
if (!_needs_adjust) { // Defer if adjustment request is active.
uint wanted = Atomic::load(&_threads_wanted);
if (wanted > 0) {
Atomic::store(&_threads_wanted, --wanted);
}
// If very little time remains until GC, enable mutator refinement. If
// the target has been reached, this keeps the number of pending cards on
// target even as refinement threads deactivate in the meantime.
if (is_in_last_adjustment_period()) {
_dcqs.set_mutator_refinement_threshold(_pending_cards_target);
}
}
}
bool G1ConcurrentRefine::is_thread_wanted(uint worker_id) const {
return worker_id < Atomic::load(&_threads_wanted);
}
bool G1ConcurrentRefine::is_thread_adjustment_needed() const {
assert_current_thread_is_primary_refinement_thread();
return _needs_adjust;
}
void G1ConcurrentRefine::record_thread_adjustment_needed() {
assert_current_thread_is_primary_refinement_thread();
_needs_adjust = true;
}
G1ConcurrentRefineStats G1ConcurrentRefine::get_and_reset_refinement_stats() {
struct CollectStats : public ThreadClosure {
G1ConcurrentRefineStats _total_stats;
virtual void do_thread(Thread* t) {
G1ConcurrentRefineThread* crt = static_cast<G1ConcurrentRefineThread*>(t);
G1ConcurrentRefineStats& stats = *crt->refinement_stats();
_total_stats += stats;
stats.reset();
}
} collector;
threads_do(&collector);
return collector._total_stats;
}
uint G1ConcurrentRefine::worker_id_offset() {
return G1DirtyCardQueueSet::num_par_ids();
}
bool G1ConcurrentRefine::try_refinement_step(uint worker_id,
size_t stop_at,
G1ConcurrentRefineStats* stats) {
uint adjusted_id = worker_id + worker_id_offset();
return _dcqs.refine_completed_buffer_concurrently(adjusted_id, stop_at, stats);
}
[ Dauer der Verarbeitung: 0.4 Sekunden
(vorverarbeitet)
]
|