/* * Copyright (c) 2005, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions. *
*/
class ParallelScavengeHeap; class PSAdaptiveSizePolicy; class PSYoungGen; class PSOldGen; class ParCompactionManager; class PSParallelCompact; class MoveAndUpdateClosure; class RefProcTaskExecutor; class ParallelOldTracer; class STWGCTimer;
// The SplitInfo class holds the information needed to 'split' a source region // so that the live data can be copied to two destination *spaces*. Normally, // all the live data in a region is copied to a single destination space (e.g., // everything live in a region in eden is copied entirely into the old gen). // However, when the heap is nearly full, all the live data in eden may not fit // into the old gen. Copying only some of the regions from eden to old gen // requires finding a region that does not contain a partial object (i.e., no // live object crosses the region boundary) somewhere near the last object that // does fit into the old gen. Since it's not always possible to find such a // region, splitting is necessary for predictable behavior. // // A region is always split at the end of the partial object. This avoids // additional tests when calculating the new location of a pointer, which is a // very hot code path. The partial object and everything to its left will be // copied to another space (call it dest_space_1). The live data to the right // of the partial object will be copied either within the space itself, or to a // different destination space (distinct from dest_space_1). // // Split points are identified during the summary phase, when region // destinations are computed: data about the split, including the // partial_object_size, is recorded in a SplitInfo record and the // partial_object_size field in the summary data is set to zero. The zeroing is // possible (and necessary) since the partial object will move to a different // destination space than anything to its right, thus the partial object should // not affect the locations of any objects to its right. // // The recorded data is used during the compaction phase, but only rarely: when // the partial object on the split region will be copied across a destination // region boundary. This test is made once each time a region is filled, and is // a simple address comparison, so the overhead is negligible (see // PSParallelCompact::first_src_addr()). // // Notes: // // Only regions with partial objects are split; a region without a partial // object does not need any extra bookkeeping. // // At most one region is split per space, so the amount of data required is // constant. // // A region is split only when the destination space would overflow. Once that // happens, the destination space is abandoned and no other data (even from // other source spaces) is targeted to that destination space. Abandoning the // destination space may leave a somewhat large unused area at the end, if a // large object caused the overflow. // // Future work: // // More bookkeeping would be required to continue to use the destination space. // The most general solution would allow data from regions in two different // source spaces to be "joined" in a single destination region. At the very // least, additional code would be required in next_src_region() to detect the // join and skip to an out-of-order source region. If the join region was also // the last destination region to which a split region was copied (the most // likely case), then additional work would be needed to get fill_region() to // stop iteration and switch to a new source region at the right point. Basic // idea would be to use a fake value for the top of the source space. It is // doable, if a bit tricky. // // A simpler (but less general) solution would fill the remainder of the // destination region with a dummy object and continue filling the next // destination region.
class SplitInfo
{ public: // Return true if this split info is valid (i.e., if a split has been // recorded). The very first region cannot have a partial object and thus is // never split, so 0 is the 'invalid' value. bool is_valid() const { return _src_region_idx > 0; }
// Return true if this split holds data for the specified source region. inlinebool is_split(size_t source_region) const;
// The index of the split region, the size of the partial object on that // region and the destination of the partial object.
size_t src_region_idx() const { return _src_region_idx; }
size_t partial_obj_size() const { return _partial_obj_size; }
HeapWord* destination() const { return _destination; }
// The destination count of the partial object referenced by this split // (either 1 or 2). This must be added to the destination count of the // remainder of the source region. unsignedint destination_count() const { return _destination_count; }
// If a word within the partial object will be written to the first word of a // destination region, this is the address of the destination region; // otherwise this is NULL.
HeapWord* dest_region_addr() const { return _dest_region_addr; }
// If a word within the partial object will be written to the first word of a // destination region, this is the address of that word within the partial // object; otherwise this is NULL.
HeapWord* first_src_addr() const { return _first_src_addr; }
// Record the data necessary to split the region src_region_idx. void record(size_t src_region_idx, size_t partial_obj_size,
HeapWord* destination);
// Where the free space will start after the collection. Valid only after the // summary phase completes.
HeapWord* new_top() const { return _new_top; }
// Allows new_top to be set.
HeapWord** new_top_addr() { return &_new_top; }
// Where the smallest allowable dense prefix ends (used only for perm gen).
HeapWord* min_dense_prefix() const { return _min_dense_prefix; }
// Where the dense prefix ends, or the compacted region begins.
HeapWord* dense_prefix() const { return _dense_prefix; }
// The start array for the (generation containing the) space, or NULL if there // is no start array.
ObjectStartArray* start_array() const { return _start_array; }
class ParallelCompactData
{ public: // Sizes are in HeapWords, unless indicated otherwise. staticconst size_t Log2RegionSize; staticconst size_t RegionSize; staticconst size_t RegionSizeBytes;
// Mask for the bits in a size_t to get an offset within a region. staticconst size_t RegionSizeOffsetMask; // Mask for the bits in a pointer to get an offset within a region. staticconst size_t RegionAddrOffsetMask; // Mask for the bits in a pointer to get the address of the start of a region. staticconst size_t RegionAddrMask;
class RegionData
{ public: // Destination address of the region.
HeapWord* destination() const { return _destination; }
// The first region containing data destined for this region.
size_t source_region() const { return _source_region; }
// Reuse _source_region to store the corresponding shadow region index
size_t shadow_region() const { return _source_region; }
// The starting address of the partial object extending onto the region.
HeapWord* partial_obj_addr() const { return _partial_obj_addr; }
// Size of the partial object extending onto the region (words).
size_t partial_obj_size() const { return _partial_obj_size; }
// Size of live data that lies within this region due to objects that start // in this region (words). This does not include the partial object // extending onto the region (if any), or the part of an object that extends // onto the next region (if any).
size_t live_obj_size() const { return _dc_and_los & los_mask; }
// Total live data that lies within the region (words).
size_t data_size() const { return partial_obj_size() + live_obj_size(); }
// The destination_count is the number of other regions to which data from // this region will be copied. At the end of the summary phase, the valid // values of destination_count are // // 0 - data from the region will be compacted completely into itself, or the // region is empty. The region can be claimed and then filled. // 1 - data from the region will be compacted into 1 other region; some // data from the region may also be compacted into the region itself. // 2 - data from the region will be copied to 2 other regions. // // During compaction as regions are emptied, the destination_count is // decremented (atomically) and when it reaches 0, it can be claimed and // then filled. // // A region is claimed for processing by atomically changing the // destination_count to the claimed value (dc_claimed). After a region has // been filled, the destination_count should be set to the completed value // (dc_completed). inline uint destination_count() const; inline uint destination_count_raw() const;
// Whether the block table for this region has been filled. inlinebool blocks_filled() const;
// Number of times the block table was filled.
DEBUG_ONLY(inline size_t blocks_filled_count() const;)
// The location of the java heap data that corresponds to this region. inline HeapWord* data_location() const;
// The highest address referenced by objects in this region. inline HeapWord* highest_ref() const;
// Whether this region is available to be claimed, has been claimed, or has // been completed. // // Minor subtlety: claimed() returns true if the region is marked // completed(), which is desirable since a region must be claimed before it // can be completed. bool available() const { return _dc_and_los < dc_one; } bool claimed() const { return _dc_and_los >= dc_claimed; } bool completed() const { return _dc_and_los >= dc_completed; }
// These are atomic. inlinevoid add_live_obj(size_t words); inlinevoid set_highest_ref(HeapWord* addr); inlinevoid decrement_destination_count(); inlinebool claim();
// Possible values of _shadow_state, and transition is as follows // Normal Path: // UnusedRegion -> mark_normal() -> NormalRegion // Shadow Path: // UnusedRegion -> mark_shadow() -> ShadowRegion -> // mark_filled() -> FilledShadow -> mark_copied() -> CopiedShadow staticconstint UnusedRegion = 0; // The region is not collected yet staticconstint ShadowRegion = 1; // Stolen by an idle thread, and a shadow region is created for it staticconstint FilledShadow = 2; // Its shadow region has been filled and ready to be copied back staticconstint CopiedShadow = 3; // The data of the shadow region has been copied back staticconstint NormalRegion = 4; // The region will be collected by the original parallel algorithm
// Mark the current region as normal or shadow to enter different processing paths inlinebool mark_normal(); inlinebool mark_shadow(); // Mark the shadow region as filled and ready to be copied back inlinevoid mark_filled(); // Mark the shadow region as copied back to avoid double copying. inlinebool mark_copied(); // Special case: see the comment in PSParallelCompact::fill_and_update_shadow_region. // Return to the normal path here inlinevoid shadow_to_normal();
int shadow_state() { return _shadow_state; }
private: // The type used to represent object sizes within a region. typedef uint region_sz_t;
// Constants for manipulating the _dc_and_los field, which holds both the // destination count and live obj size. The live obj size lives at the // least significant end so no masking is necessary when adding. staticconst region_sz_t dc_shift; // Shift amount. staticconst region_sz_t dc_mask; // Mask for destination count. staticconst region_sz_t dc_one; // 1, shifted appropriately. staticconst region_sz_t dc_claimed; // Region has been claimed. staticconst region_sz_t dc_completed; // Region has been completed. staticconst region_sz_t los_mask; // Mask for live obj size.
#ifdef ASSERT
size_t _blocks_filled_count; // Number of block table fills.
// These enable optimizations that are only partially implemented. Use // debug builds to prevent the code fragments from breaking.
HeapWord* _data_location;
HeapWord* _highest_ref; #endif// #ifdef ASSERT
#ifdef ASSERT public:
uint _pushed; // 0 until region is pushed onto a stack private: #endif
};
// "Blocks" allow shorter sections of the bitmap to be searched. Each Block // holds an offset, which is the amount of live data in the Region to the left // of the first live object that starts in the Block. class BlockData
{ public: typedefunsignedshortint blk_ofs_t;
// Fill in the regions covering [beg, end) so that no data moves; i.e., the // destination of region n is simply the start of region n. Both arguments // beg and end must be region-aligned. void summarize_dense_prefix(HeapWord* beg, HeapWord* end);
inlinevoid
ParallelCompactData::RegionData::set_blocks_filled()
{
OrderAccess::release();
_blocks_filled = true; // Debug builds count the number of times the table was filled.
DEBUG_ONLY(Atomic::inc(&_blocks_filled_count));
}
// MT-unsafe claiming of a region. Should only be used during single threaded // execution. inlinebool ParallelCompactData::RegionData::claim_unsafe()
{ if (available()) {
_dc_and_los |= dc_claimed; returntrue;
} returnfalse;
}
inlinevoid ParallelCompactData::RegionData::mark_filled() { int old = Atomic::cmpxchg(&_shadow_state, ShadowRegion, FilledShadow);
assert(old == ShadowRegion, "Fail to mark the region as filled");
}
void ParallelCompactData::RegionData::shadow_to_normal() { int old = Atomic::cmpxchg(&_shadow_state, ShadowRegion, NormalRegion);
assert(old == ShadowRegion, "Fail to mark the region as finish");
}
inline HeapWord*
ParallelCompactData::region_to_addr(size_t region, size_t offset) const
{
assert(region <= _region_count, "region out of range");
assert(offset < RegionSize, "offset too big"); // This may be too strict. return region_to_addr(region) + offset;
}
// Abstract closure for use with ParMarkBitMap::iterate(), which will invoke the // do_addr() method. // // The closure is initialized with the number of heap words to process // (words_remaining()), and becomes 'full' when it reaches 0. The do_addr() // methods in subclasses should update the total as words are processed. Since // only one subclass actually uses this mechanism to terminate iteration, the // default initial value is > 0. The implementation is here and not in the // single subclass that uses it to avoid making is_full() virtual, and thus // adding a virtual call per live object.
class ParMarkBitMapClosure: public StackObj { public: typedef ParMarkBitMap::idx_t idx_t; typedef ParMarkBitMap::IterationStatus IterationStatus;
public: inline ParMarkBitMapClosure(ParMarkBitMap* mbm, ParCompactionManager* cm,
size_t words = max_uintx);
inlinevoid ParMarkBitMapClosure::decrement_words_remaining(size_t words) {
assert(_words_remaining >= words, "processed too many words");
_words_remaining -= words;
}
// The Parallel collector is a stop-the-world garbage collector that // does parts of the collection using parallel threads. The collection includes // the tenured generation and the young generation. // // There are four phases of the collection. // // - marking phase // - summary phase // - compacting phase // - clean up phase // // Roughly speaking these phases correspond, respectively, to // - mark all the live objects // - calculate the destination of each object at the end of the collection // - move the objects to their destination // - update some references and reinitialize some variables // // These three phases are invoked in PSParallelCompact::invoke_no_policy(). The // marking phase is implemented in PSParallelCompact::marking_phase() and does a // complete marking of the heap. The summary phase is implemented in // PSParallelCompact::summary_phase(). The move and update phase is implemented // in PSParallelCompact::compact(). // // A space that is being collected is divided into regions and with each region // is associated an object of type ParallelCompactData. Each region is of a // fixed size and typically will contain more than 1 object and may have parts // of objects at the front and back of the region. // // region -----+---------------------+---------- // objects covered [ AAA )[ BBB )[ CCC )[ DDD ) // // The marking phase does a complete marking of all live objects in the heap. // The marking also compiles the size of the data for all live objects covered // by the region. This size includes the part of any live object spanning onto // the region (part of AAA if it is live) from the front, all live objects // contained in the region (BBB and/or CCC if they are live), and the part of // any live objects covered by the region that extends off the region (part of // DDD if it is live). The marking phase uses multiple GC threads and marking // is done in a bit array of type ParMarkBitMap. The marking of the bit map is // done atomically as is the accumulation of the size of the live objects // covered by a region. // // The summary phase calculates the total live data to the left of each region // XXX. Based on that total and the bottom of the space, it can calculate the // starting location of the live data in XXX. The summary phase calculates for // each region XXX quantities such as // // - the amount of live data at the beginning of a region from an object // entering the region. // - the location of the first live data on the region // - a count of the number of regions receiving live data from XXX. // // See ParallelCompactData for precise details. The summary phase also // calculates the dense prefix for the compaction. The dense prefix is a // portion at the beginning of the space that is not moved. The objects in the // dense prefix do need to have their object references updated. See method // summarize_dense_prefix(). // // The summary phase is done using 1 GC thread. // // The compaction phase moves objects to their new location and updates all // references in the object. // // A current exception is that objects that cross a region boundary are moved // but do not have their references updated. References are not updated because // it cannot easily be determined if the klass pointer KKK for the object AAA // has been updated. KKK likely resides in a region to the left of the region // containing AAA. These AAA's have their references updated at the end in a // clean up phase. See the method PSParallelCompact::update_deferred_object(). // // Compaction is done on a region basis. A region that is ready to be filled is // put on a ready list and GC threads take region off the list and fill them. A // region is ready to be filled if it empty of live objects. Such a region may // have been initially empty (only contained dead objects) or may have had all // its live objects copied out already. A region that compacts into itself is // also ready for filling. The ready list is initially filled with empty // regions and regions compacting into themselves. There is always at least 1 // region that can be put on the ready list. The regions are atomically added // and removed from the ready list. // // During compaction, there is a natural task dependency among regions because // destination regions may also be source regions themselves. Consequently, the // destination regions are not available for processing until all live objects // within them are evacuated to their destinations. These dependencies lead to // limited thread utilization as threads spin waiting on regions to be ready. // Shadow regions are utilized to address these region dependencies. The basic // idea is that, if a region is unavailable because it still contains live // objects and thus cannot serve as a destination momentarily, the GC thread // may allocate a shadow region as a substitute destination and directly copy // live objects into this shadow region. Live objects in the shadow region will // be copied into the target destination region when it becomes available. // // For more details on shadow regions, please refer to §4.2 of the VEE'19 paper: // Haoyu Li, Mingyu Wu, Binyu Zang, and Haibo Chen. 2019. ScissorGC: scalable // and efficient compaction for Java full garbage collection. In Proceedings of // the 15th ACM SIGPLAN/SIGOPS International Conference on Virtual Execution // Environments (VEE 2019). ACM, New York, NY, USA, 108-121. DOI: // https://doi.org/10.1145/3313808.3313820
class TaskQueue;
class PSParallelCompact : AllStatic { public: // Convenient access to type names. typedef ParMarkBitMap::idx_t idx_t; typedef ParallelCompactData::RegionData RegionData; typedef ParallelCompactData::BlockData BlockData;
// Mark live objects staticvoid marking_phase(ParallelOldTracer *gc_tracer);
// Compute the dense prefix for the designated space. This is an experimental // implementation currently not used in production. static HeapWord* compute_dense_prefix_via_density(const SpaceId id, bool maximum_compaction);
// Methods used to compute the dense prefix.
// Compute the value of the normal distribution at x = density. The mean and // standard deviation are values saved by initialize_dead_wood_limiter(). staticinlinedouble normal_distribution(double density);
// Initialize the static vars used by dead_wood_limiter(). staticvoid initialize_dead_wood_limiter();
// Return the percentage of space that can be treated as "dead wood" (i.e., // not reclaimed). staticdouble dead_wood_limiter(double density, size_t min_percent);
// Find the first (left-most) region in the range [beg, end) that has at least // dead_words of dead space to the left. The argument beg must be the first // region in the space that is not completely live. static RegionData* dead_wood_limit_region(const RegionData* beg, const RegionData* end,
size_t dead_words);
// Return a pointer to the first region in the range [beg, end) that is not // completely full. static RegionData* first_dead_space_region(const RegionData* beg, const RegionData* end);
// Return a value indicating the benefit or 'yield' if the compacted region // were to start (or equivalently if the dense prefix were to end) at the // candidate region. Higher values are better. // // The value is based on the amount of space reclaimed vs. the costs of (a) // updating references in the dense prefix plus (b) copying objects and // updating references in the compacted region. staticinlinedouble reclaimed_ratio(const RegionData* const candidate,
HeapWord* const bottom,
HeapWord* const top,
HeapWord* const new_top);
// Compute the dense prefix for the designated space. static HeapWord* compute_dense_prefix(const SpaceId id, bool maximum_compaction);
// Return true if dead space crosses onto the specified Region; bit must be // the bit index corresponding to the first word of the Region. staticinlinebool dead_space_crosses_boundary(const RegionData* region,
idx_t bit);
// Summary phase utility routine to fill dead space (if any) at the dense // prefix boundary. Should only be called if the dense prefix is // non-empty. staticvoid fill_dense_prefix_end(SpaceId id);
staticvoid post_initialize(); // Perform initialization for PSParallelCompact that requires // allocations. This should be called during the VM initialization // at a pointer where it would be appropriate to return a JNI_ENOMEM // in the event of a failure. staticbool initialize();
// Compaction support. // Return true if p is in the range [beg_addr, end_addr). staticinlinebool is_in(HeapWord* p, HeapWord* beg_addr, HeapWord* end_addr); staticinlinebool is_in(oop* p, HeapWord* beg_addr, HeapWord* end_addr);
// Convenience wrappers for per-space data kept in _space_info. staticinline MutableSpace* space(SpaceId space_id); staticinline HeapWord* new_top(SpaceId space_id); staticinline HeapWord* dense_prefix(SpaceId space_id); staticinline ObjectStartArray* start_array(SpaceId space_id);
// Process the end of the given region range in the dense prefix. // This includes saving any object not updated. staticvoid dense_prefix_regions_epilogue(ParCompactionManager* cm,
size_t region_start_index,
size_t region_end_index,
idx_t exiting_object_offset,
idx_t region_offset_start,
idx_t region_offset_end);
// Update a region in the dense prefix. For each live object // in the region, update it's interior references. For each // dead object, fill it with deadwood. Dead space at the end // of a region range will be filled to the start of the next // live object regardless of the region_index_end. None of the // objects in the dense prefix move and dead space is dead // (holds only dead objects that don't need any processing), so // dead space can be filled in any order. staticvoid update_and_deadwood_in_dense_prefix(ParCompactionManager* cm,
SpaceId space_id,
size_t region_index_start,
size_t region_index_end);
// Return the address of the count + 1st live word in the range [beg, end). static HeapWord* skip_live_words(HeapWord* beg, HeapWord* end, size_t count);
// Return the address of the word to be copied to dest_addr, which must be // aligned to a region boundary. static HeapWord* first_src_addr(HeapWord* const dest_addr,
SpaceId src_space_id,
size_t src_region_idx);
// Determine the next source region, set closure.source() to the start of the // new region return the region index. Parameter end_addr is the address one // beyond the end of source range just processed. If necessary, switch to a // new source space and set src_space_id (in-out parameter) and src_space_top // (out parameter) accordingly. static size_t next_src_region(MoveAndUpdateClosure& closure,
SpaceId& src_space_id,
HeapWord*& src_space_top,
HeapWord* end_addr);
// Decrement the destination count for each non-empty source region in the // range [beg_region, region(region_align_up(end_addr))). If the destination // count for a region goes to 0 and it needs to be filled, enqueue it. staticvoid decrement_destination_counts(ParCompactionManager* cm,
SpaceId src_space_id,
size_t beg_region,
HeapWord* end_addr);
staticvoid fill_region(ParCompactionManager* cm, MoveAndUpdateClosure& closure, size_t region); staticvoid fill_and_update_region(ParCompactionManager* cm, size_t region);
staticbool steal_unavailable_region(ParCompactionManager* cm, size_t& region_idx); staticvoid fill_and_update_shadow_region(ParCompactionManager* cm, size_t region); // Copy the content of a shadow region back to its corresponding heap region staticvoid copy_back(HeapWord* shadow_addr, HeapWord* region_addr); // Collect empty regions as shadow regions and initialize the // _next_shadow_region filed for each compact manager staticvoid initialize_shadow_regions(uint parallel_gc_threads);
// Fill in the block table for the specified region. staticvoid fill_blocks(size_t region_idx);
// Update a single deferred object. staticvoid update_deferred_object(ParCompactionManager* cm, HeapWord* addr);
#ifdef ASSERT // Sanity check the new location of a word in the heap. staticinlinevoid check_new_location(HeapWord* old_addr, HeapWord* new_addr); // Verify that all the regions have been emptied. staticvoid verify_complete(SpaceId space_id); #endif// #ifdef ASSERT
};
class MoveAndUpdateClosure: public ParMarkBitMapClosure { staticinline size_t calculate_words_remaining(size_t region); public: inline MoveAndUpdateClosure(ParMarkBitMap* bitmap, ParCompactionManager* cm,
size_t region);
// If the object will fit (size <= words_remaining()), copy it to the current // destination, update the interior oops and the start array and return either // full (if the closure is full) or incomplete. If the object will not fit, // return would_overflow.
IterationStatus do_addr(HeapWord* addr, size_t size);
// Copy enough words to fill this closure, starting at source(). Interior // oops and the start array are not updated. Return full.
IterationStatus copy_until_full();
// Copy enough words to fill this closure or to the end of an object, // whichever is smaller, starting at source(). Interior oops and the start // array are not updated. void copy_partial_obj();
virtualvoid complete_region(ParCompactionManager* cm, HeapWord* dest_addr,
PSParallelCompact::RegionData* region_ptr);
protected: // Update variables to indicate that word_count words were processed. inlinevoid update_state(size_t word_count);
protected:
HeapWord* _destination; // Next addr to be written.
ObjectStartArray* const _start_array;
size_t _offset;
};
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung ist noch experimentell.