/* * The volume index is a combination of two separate subindexes, one containing sparse hook entries * (retained for all chapters), and one containing the remaining entries (retained only for the * dense chapters). If there are no sparse chapters, only the non-hook sub index is used, and it * will contain all records for all chapters. * * The volume index is also divided into zones, with one thread operating on each zone. Each * incoming request is dispatched to the appropriate thread, and then to the appropriate subindex. * Each delta list is handled by a single zone. To ensure that the distribution of delta lists to * zones doesn't underflow (leaving some zone with no delta lists), the minimum number of delta * lists must be the square of the maximum zone count for both subindexes. * * Each subindex zone is a delta index where the payload is a chapter number. The volume index can * compute the delta list number, address, and zone number from the record name in order to * dispatch record handling to the correct structures. * * Most operations that use all the zones take place either before request processing is allowed, * or after all requests have been flushed in order to shut down. The only multi-threaded operation * supported during normal operation is the uds_lookup_volume_index_name() method, used to determine * whether a new chapter should be loaded into the sparse index cache. This operation only uses the * sparse hook subindex, and the zone mutexes are used to make this operation safe. * * There are three ways of expressing chapter numbers in the volume index: virtual, index, and * rolling. The interface to the volume index uses virtual chapter numbers, which are 64 bits long. * Internally the subindex stores only the minimal number of bits necessary by masking away the * high-order bits. When the index needs to deal with ordering of index chapter numbers, as when * flushing entries from older chapters, it rolls the index chapter number around so that the * smallest one in use is mapped to 0. See convert_index_to_virtual() or flush_invalid_entries() * for an example of this technique. * * For efficiency, when older chapter numbers become invalid, the index does not immediately remove * the invalidated entries. Instead it lazily removes them from a given delta list the next time it * walks that list during normal operation. Because of this, the index size must be increased * somewhat to accommodate all the invalid entries that have not yet been removed. For the standard * index sizes, this requires about 4 chapters of old entries per 1024 chapters of valid entries in * the index.
*/
struct sub_index_parameters { /* The number of bits in address mask */
u8 address_bits; /* The number of bits in chapter number */
u8 chapter_bits; /* The mean delta */
u32 mean_delta; /* The number of delta lists */
u64 list_count; /* The number of chapters used */
u32 chapter_count; /* The number of bits per chapter */
size_t chapter_size_in_bits; /* The number of bytes of delta list memory */
size_t memory_size; /* The number of bytes the index should keep free at all times */
size_t target_free_bytes;
};
params->chapter_count = geometry->chapters_per_volume; /* * Make sure that the number of delta list records in the volume index does not change when * the volume is reduced by one chapter. This preserves the mapping from name to volume * index delta list.
*/
rounded_chapters = params->chapter_count; if (uds_is_reduced_index_geometry(geometry))
rounded_chapters += 1;
delta_list_records = records_per_chapter * rounded_chapters;
address_count = config->volume_index_mean_delta * DELTA_LIST_SIZE;
params->list_count = max(delta_list_records / DELTA_LIST_SIZE, min_delta_lists);
params->address_bits = bits_per(address_count - 1);
params->chapter_bits = bits_per(rounded_chapters - 1); if ((u32) params->list_count != params->list_count) { return vdo_log_warning_strerror(UDS_INVALID_ARGUMENT, "cannot initialize volume index with %llu delta lists",
(unsignedlonglong) params->list_count);
}
if (params->address_bits > 31) { return vdo_log_warning_strerror(UDS_INVALID_ARGUMENT, "cannot initialize volume index with %u address bits",
params->address_bits);
}
/* * The probability that a given delta list is not touched during the writing of an entire * chapter is: * * double p_not_touched = pow((double) (params->list_count - 1) / params->list_count, * records_per_chapter); * * For the standard index sizes, about 78% of the delta lists are not touched, and * therefore contain old index entries that have not been eliminated by the lazy LRU * processing. Then the number of old index entries that accumulate over the entire index, * in terms of full chapters worth of entries, is: * * double invalid_chapters = p_not_touched / (1.0 - p_not_touched); * * For the standard index sizes, the index needs about 3.5 chapters of space for the old * entries in a 1024 chapter index, so round this up to use 4 chapters per 1024 chapters in * the index.
*/
invalid_chapters = max(rounded_chapters / 256, 2U);
chapters_in_volume_index = rounded_chapters + invalid_chapters;
entries_in_volume_index = records_per_chapter * chapters_in_volume_index;
/* * Compute the expected size of a full index, then set the total memory to be 6% larger * than that expected size. This number should be large enough that there are not many * rebalances when the index is full.
*/
params->chapter_size_in_bits = uds_compute_delta_index_size(records_per_chapter,
params->mean_delta,
params->chapter_bits);
index_size_in_bits = params->chapter_size_in_bits * chapters_in_volume_index;
expected_index_size = index_size_in_bits / BITS_PER_BYTE;
params->memory_size = expected_index_size * 106 / 100;
/* This function is only useful if the configuration includes sparse chapters. */ staticvoid split_configuration(conststruct uds_configuration *config, struct split_config *split)
{
u64 sample_rate, sample_records;
u64 dense_chapters, sparse_chapters;
/* Start with copies of the base configuration. */
split->hook_config = *config;
split->hook_geometry = *config->geometry;
split->hook_config.geometry = &split->hook_geometry;
split->non_hook_config = *config;
split->non_hook_geometry = *config->geometry;
split->non_hook_config.geometry = &split->non_hook_geometry;
/* Adjust the number of records indexed for each chapter. */
split->hook_geometry.records_per_chapter = sample_records;
split->non_hook_geometry.records_per_chapter -= sample_records;
/* Adjust the number of chapters indexed. */
split->hook_geometry.sparse_chapters_per_volume = 0;
split->non_hook_geometry.sparse_chapters_per_volume = 0;
split->non_hook_geometry.chapters_per_volume = dense_chapters;
}
if (likely(relative_chapter >= flush_range->chapter_count)) { if (relative_chapter < *next_chapter_to_invalidate)
*next_chapter_to_invalidate = relative_chapter; break;
}
result = uds_remove_delta_index_entry(&record->delta_entry); if (result != UDS_SUCCESS) return result;
}
return UDS_SUCCESS;
}
/* Find the matching record, or the list offset where the record would go. */ staticint get_volume_index_entry(struct volume_index_record *record, u32 list_number,
u32 key, struct chapter_range *flush_range)
{ struct volume_index_record other_record; conststruct volume_sub_index *sub_index = record->sub_index;
u32 next_chapter_to_invalidate = sub_index->chapter_mask; int result;
result = uds_start_delta_index_search(&sub_index->delta_index, list_number, 0,
&record->delta_entry); if (result != UDS_SUCCESS) return result;
do {
result = flush_invalid_entries(record, flush_range,
&next_chapter_to_invalidate); if (result != UDS_SUCCESS) return result;
} while (!record->delta_entry.at_end && (key > record->delta_entry.key));
result = uds_remember_delta_index_offset(&record->delta_entry); if (result != UDS_SUCCESS) return result;
/* Check any collision records for a more precise match. */
other_record = *record; if (!other_record.delta_entry.at_end && (key == other_record.delta_entry.key)) { for (;;) {
u8 collision_name[UDS_RECORD_NAME_SIZE];
result = flush_invalid_entries(&other_record, flush_range,
&next_chapter_to_invalidate); if (result != UDS_SUCCESS) return result;
if (other_record.delta_entry.at_end ||
!other_record.delta_entry.is_collision) break;
result = uds_get_delta_entry_collision(&other_record.delta_entry,
collision_name); if (result != UDS_SUCCESS) return result;
int uds_get_volume_index_record(struct volume_index *volume_index, conststruct uds_record_name *name, struct volume_index_record *record)
{ int result;
if (uds_is_volume_index_sample(volume_index, name)) { /* * Other threads cannot be allowed to call uds_lookup_volume_index_name() while * this thread is finding the volume index record. Due to the lazy LRU flushing of * the volume index, uds_get_volume_index_record() is not a read-only operation.
*/ unsignedint zone =
get_volume_sub_index_zone(&volume_index->vi_hook, name); struct mutex *mutex = &volume_index->zones[zone].hook_mutex;
mutex_lock(mutex);
result = get_volume_sub_index_record(&volume_index->vi_hook, name,
record);
mutex_unlock(mutex); /* Remember the mutex so that other operations on the index record can use it. */
record->mutex = mutex;
} else {
result = get_volume_sub_index_record(&volume_index->vi_non_hook, name,
record);
}
return result;
}
int uds_put_volume_index_record(struct volume_index_record *record, u64 virtual_chapter)
{ int result;
u32 address; conststruct volume_sub_index *sub_index = record->sub_index;
if (!is_virtual_chapter_indexed(record, virtual_chapter)) {
u64 low = get_zone_for_record(record)->virtual_chapter_low;
u64 high = get_zone_for_record(record)->virtual_chapter_high;
return vdo_log_warning_strerror(UDS_INVALID_ARGUMENT, "cannot put record into chapter number %llu that is out of the valid range %llu to %llu",
(unsignedlonglong) virtual_chapter,
(unsignedlonglong) low,
(unsignedlonglong) high);
}
address = extract_address(sub_index, record->name); if (unlikely(record->mutex != NULL))
mutex_lock(record->mutex);
result = uds_put_delta_index_entry(&record->delta_entry, address,
convert_virtual_to_index(sub_index,
virtual_chapter),
record->is_found ? record->name->name : NULL); if (unlikely(record->mutex != NULL))
mutex_unlock(record->mutex); switch (result) { case UDS_SUCCESS:
record->virtual_chapter = virtual_chapter;
record->is_collision = record->delta_entry.is_collision;
record->is_found = true; break; case UDS_OVERFLOW:
vdo_log_ratelimit(vdo_log_warning_strerror, UDS_OVERFLOW, "Volume index entry dropped due to overflow condition");
uds_log_delta_index_entry(&record->delta_entry); break; default: break;
}
return result;
}
int uds_remove_volume_index_record(struct volume_index_record *record)
{ int result;
if (!record->is_found) return vdo_log_warning_strerror(UDS_BAD_STATE, "illegal operation on new record");
/* Mark the record so that it cannot be used again */
record->is_found = false; if (unlikely(record->mutex != NULL))
mutex_lock(record->mutex);
result = uds_remove_delta_index_entry(&record->delta_entry); if (unlikely(record->mutex != NULL))
mutex_unlock(record->mutex); return result;
}
/* Check to see if the new zone data is too large. */
delta_zone = &sub_index->delta_index.delta_zones[zone_number]; for (i = 1; i <= delta_zone->list_count; i++)
used_bits += delta_zone->delta_lists[i].size;
if (used_bits > sub_index->max_zone_bits) { /* Expire enough chapters to free the desired space. */
u64 expire_count =
1 + (used_bits - sub_index->max_zone_bits) / sub_index->chapter_zone_bits;
/* * Other threads cannot be allowed to call uds_lookup_volume_index_name() while the open * chapter number is changing.
*/ if (has_sparse(volume_index)) {
mutex_lock(mutex);
set_volume_sub_index_zone_open_chapter(&volume_index->vi_hook,
zone_number, virtual_chapter);
mutex_unlock(mutex);
}
}
/* * Set the newest open chapter number for the index, while also advancing the oldest valid chapter * number.
*/ void uds_set_volume_index_open_chapter(struct volume_index *volume_index,
u64 virtual_chapter)
{ unsignedint zone;
for (zone = 0; zone < volume_index->zone_count; zone++)
uds_set_volume_index_zone_open_chapter(volume_index, zone, virtual_chapter);
}
int uds_set_volume_index_record_chapter(struct volume_index_record *record,
u64 virtual_chapter)
{ conststruct volume_sub_index *sub_index = record->sub_index; int result;
if (!record->is_found) return vdo_log_warning_strerror(UDS_BAD_STATE, "illegal operation on new record");
if (!is_virtual_chapter_indexed(record, virtual_chapter)) {
u64 low = get_zone_for_record(record)->virtual_chapter_low;
u64 high = get_zone_for_record(record)->virtual_chapter_high;
return vdo_log_warning_strerror(UDS_INVALID_ARGUMENT, "cannot set chapter number %llu that is out of the valid range %llu to %llu",
(unsignedlonglong) virtual_chapter,
(unsignedlonglong) low,
(unsignedlonglong) high);
}
if (unlikely(record->mutex != NULL))
mutex_lock(record->mutex);
result = uds_set_delta_entry_value(&record->delta_entry,
convert_virtual_to_index(sub_index,
virtual_chapter)); if (unlikely(record->mutex != NULL))
mutex_unlock(record->mutex); if (result != UDS_SUCCESS) return result;
/* Do a read-only lookup of the record name for sparse cache management. */
u64 uds_lookup_volume_index_name(conststruct volume_index *volume_index, conststruct uds_record_name *name)
{ unsignedint zone_number = uds_get_volume_index_zone(volume_index, name); struct mutex *mutex = &volume_index->zones[zone_number].hook_mutex;
u64 virtual_chapter;
if (!uds_is_volume_index_sample(volume_index, name)) return NO_CHAPTER;
for (i = 0; i < reader_count; i++) { struct sub_index_data header;
u8 buffer[sizeof(struct sub_index_data)];
size_t offset = 0;
u32 j;
result = uds_read_from_buffered_reader(readers[i], buffer, sizeof(buffer)); if (result != UDS_SUCCESS) { return vdo_log_warning_strerror(result, "failed to read volume index header");
}
result = VDO_ASSERT(offset == sizeof(buffer), "%zu bytes decoded of %zu expected", offset, sizeof(buffer)); if (result != VDO_SUCCESS) return UDS_CORRUPT_DATA;
if (memcmp(header.magic, MAGIC_START_5, MAGIC_SIZE) != 0) { return vdo_log_warning_strerror(UDS_CORRUPT_DATA, "volume index file had bad magic number");
}
if (i == 0) {
virtual_chapter_low = header.virtual_chapter_low;
virtual_chapter_high = header.virtual_chapter_high;
} elseif (virtual_chapter_high != header.virtual_chapter_high) {
u64 low = header.virtual_chapter_low;
u64 high = header.virtual_chapter_high;
return vdo_log_warning_strerror(UDS_CORRUPT_DATA, "Inconsistent volume index zone files: Chapter range is [%llu,%llu], chapter range %d is [%llu,%llu]",
(unsignedlonglong) virtual_chapter_low,
(unsignedlonglong) virtual_chapter_high,
i, (unsignedlonglong) low,
(unsignedlonglong) high);
} elseif (virtual_chapter_low < header.virtual_chapter_low) {
virtual_chapter_low = header.virtual_chapter_low;
}
result = uds_read_from_buffered_reader(readers[i], decoded, sizeof(u64)); if (result != UDS_SUCCESS) { return vdo_log_warning_strerror(result, "failed to read volume index flush ranges");
}
for (z = 0; z < sub_index->zone_count; z++) {
memset(&sub_index->zones[z], 0, sizeof(struct volume_sub_index_zone));
sub_index->zones[z].virtual_chapter_low = virtual_chapter_low;
sub_index->zones[z].virtual_chapter_high = virtual_chapter_high;
}
result = uds_start_restoring_delta_index(&sub_index->delta_index, readers,
reader_count); if (result != UDS_SUCCESS) return vdo_log_warning_strerror(result, "restoring delta index failed");
if (!has_sparse(volume_index)) { return start_restoring_volume_sub_index(&volume_index->vi_non_hook,
buffered_readers, reader_count);
}
for (i = 0; i < reader_count; i++) { struct volume_index_data header;
u8 buffer[sizeof(struct volume_index_data)];
size_t offset = 0;
result = uds_read_from_buffered_reader(buffered_readers[i], buffer, sizeof(buffer)); if (result != UDS_SUCCESS) { return vdo_log_warning_strerror(result, "failed to read volume index header");
}
result = finish_restoring_volume_sub_index(&volume_index->vi_non_hook,
buffered_readers, reader_count); if ((result == UDS_SUCCESS) && has_sparse(volume_index)) {
result = finish_restoring_volume_sub_index(&volume_index->vi_hook,
buffered_readers,
reader_count);
}
return result;
}
int uds_load_volume_index(struct volume_index *volume_index, struct buffered_reader **readers, unsignedint reader_count)
{ int result;
/* Start by reading the header section of the stream. */
result = start_restoring_volume_index(volume_index, readers, reader_count); if (result != UDS_SUCCESS) return result;
result = finish_restoring_volume_index(volume_index, readers, reader_count); if (result != UDS_SUCCESS) {
abort_restoring_volume_index(volume_index); return result;
}
/* Check the final guard lists to make sure there is no extra data. */
result = uds_check_guard_delta_lists(readers, reader_count); if (result != UDS_SUCCESS)
abort_restoring_volume_index(volume_index);
result = VDO_ASSERT(offset == sizeof(struct sub_index_data), "%zu bytes of config written, of %zu expected", offset, sizeof(struct sub_index_data)); if (result != VDO_SUCCESS) return result;
result = uds_write_to_buffered_writer(buffered_writer, buffer, offset); if (result != UDS_SUCCESS) return vdo_log_warning_strerror(result, "failed to write volume index header");
for (i = 0; i < list_count; i++) {
u8 encoded[sizeof(u64)];
put_unaligned_le64(sub_index->flush_chapters[first_list + i], &encoded);
result = uds_write_to_buffered_writer(buffered_writer, encoded, sizeof(u64)); if (result != UDS_SUCCESS) { return vdo_log_warning_strerror(result, "failed to write volume index flush ranges");
}
}
if (!has_sparse(volume_index)) { return start_saving_volume_sub_index(&volume_index->vi_non_hook,
zone_number, writer);
}
memcpy(buffer, MAGIC_START_6, MAGIC_SIZE);
offset += MAGIC_SIZE;
encode_u32_le(buffer, &offset, volume_index->sparse_sample_rate);
result = VDO_ASSERT(offset == sizeof(struct volume_index_data), "%zu bytes of header written, of %zu expected", offset, sizeof(struct volume_index_data)); if (result != VDO_SUCCESS) return result;
result = uds_write_to_buffered_writer(writer, buffer, offset); if (result != UDS_SUCCESS) {
vdo_log_warning_strerror(result, "failed to write volume index header"); return result;
}
result = start_saving_volume_sub_index(&volume_index->vi_non_hook, zone_number,
writer); if (result != UDS_SUCCESS) return result;
/* The following arrays are initialized to all zeros. */
result = vdo_allocate(params.list_count, u64, "first chapter to flush",
&sub_index->flush_chapters); if (result != VDO_SUCCESS) return result;
return vdo_allocate(zone_count, struct volume_sub_index_zone, "volume index zones", &sub_index->zones);
}
int uds_make_volume_index(conststruct uds_configuration *config, u64 volume_nonce, struct volume_index **volume_index_ptr)
{ struct split_config split; unsignedint zone; struct volume_index *volume_index; int result;
result = vdo_allocate(1, struct volume_index, "volume index", &volume_index); if (result != VDO_SUCCESS) return result;
volume_index->zone_count = config->zone_count;
if (!uds_is_sparse_index_geometry(config->geometry)) {
result = initialize_volume_sub_index(config, volume_nonce, 'm',
&volume_index->vi_non_hook); if (result != UDS_SUCCESS) {
uds_free_volume_index(volume_index); return result;
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.