/** * DOC: Block map eras * * The block map era, or maximum age, is used as follows: * * Each block map page, when dirty, records the earliest recovery journal block sequence number of * the changes reflected in that dirty block. Sequence numbers are classified into eras: every * @maximum_age sequence numbers, we switch to a new era. Block map pages are assigned to eras * according to the sequence number they record. * * In the current (newest) era, block map pages are not written unless there is cache pressure. In * the next oldest era, each time a new journal block is written 1/@maximum_age of the pages in * this era are issued for write. In all older eras, pages are issued for write immediately.
*/
/* Used to indicate that the page holding the location of a tree root has been "loaded". */ staticconst physical_block_number_t VDO_INVALID_PBN = 0xFFFFFFFFFFFFFFFF;
/* * For adjusting VDO page cache statistic fields which are only mutated on the logical zone thread. * Prevents any compiler shenanigans from affecting other threads reading those stats.
*/ #define ADD_ONCE(value, delta) WRITE_ONCE(value, (value) + (delta))
/** * initialize_info() - Initialize all page info structures and put them on the free list. * * Return: VDO_SUCCESS or an error.
*/ staticint initialize_info(struct vdo_page_cache *cache)
{ struct page_info *info;
INIT_LIST_HEAD(&cache->free_list); for (info = cache->infos; info < cache->infos + cache->page_count; info++) { int result;
/** * allocate_cache_components() - Allocate components of the cache which require their own * allocation. * * The caller is responsible for all clean up on errors. * * Return: VDO_SUCCESS or an error code.
*/ staticint __must_check allocate_cache_components(struct vdo_page_cache *cache)
{
u64 size = cache->page_count * (u64) VDO_BLOCK_SIZE; int result;
result = vdo_allocate(cache->page_count, struct page_info, "page infos",
&cache->infos); if (result != VDO_SUCCESS) return result;
result = vdo_allocate_memory(size, VDO_BLOCK_SIZE, "cache pages", &cache->pages); if (result != VDO_SUCCESS) return result;
result = vdo_int_map_create(cache->page_count, &cache->page_map); if (result != VDO_SUCCESS) return result;
return initialize_info(cache);
}
/** * assert_on_cache_thread() - Assert that a function has been called on the VDO page cache's * thread.
*/ staticinlinevoid assert_on_cache_thread(struct vdo_page_cache *cache, constchar *function_name)
{
thread_id_t thread_id = vdo_get_callback_thread_id();
VDO_ASSERT_LOG_ONLY((thread_id == cache->zone->thread_id), "%s() must only be called on cache thread %d, not thread %d",
function_name, cache->zone->thread_id, thread_id);
}
/** assert_io_allowed() - Assert that a page cache may issue I/O. */ staticinlinevoid assert_io_allowed(struct vdo_page_cache *cache)
{
VDO_ASSERT_LOG_ONLY(!vdo_is_state_quiescent(&cache->zone->state), "VDO page cache may issue I/O");
}
if (++cache->pressure_report >= DISPLAY_INTERVAL)
cache->pressure_report = 0;
}
}
/** * get_page_state_name() - Return the name of a page state. * * If the page state is invalid a static string is returned and the invalid state is logged. * * Return: A pointer to a static page state name.
*/ staticconstchar * __must_check get_page_state_name(enum vdo_page_buffer_state state)
{ int result; staticconstchar * const state_names[] = { "FREE", "INCOMING", "FAILED", "RESIDENT", "DIRTY", "OUTGOING"
};
result = VDO_ASSERT(state < ARRAY_SIZE(state_names), "Unknown page_state value %d", state); if (result != VDO_SUCCESS) return"[UNKNOWN PAGE STATE]";
return state_names[state];
}
/** * update_counter() - Update the counter associated with a given state. * @info: The page info to count. * @delta: The delta to apply to the counter.
*/ staticvoid update_counter(struct page_info *info, s32 delta)
{ struct block_map_statistics *stats = &info->cache->stats;
switch (info->state) { case PS_FREE:
ADD_ONCE(stats->free_pages, delta); return;
case PS_INCOMING:
ADD_ONCE(stats->incoming_pages, delta); return;
case PS_OUTGOING:
ADD_ONCE(stats->outgoing_pages, delta); return;
case PS_FAILED:
ADD_ONCE(stats->failed_pages, delta); return;
case PS_RESIDENT:
ADD_ONCE(stats->clean_pages, delta); return;
case PS_DIRTY:
ADD_ONCE(stats->dirty_pages, delta); return;
default: return;
}
}
/** update_lru() - Update the lru information for an active page. */ staticvoid update_lru(struct page_info *info)
{ if (info->cache->lru_list.prev != &info->lru_entry)
list_move_tail(&info->lru_entry, &info->cache->lru_list);
}
/** * set_info_state() - Set the state of a page_info and put it on the right list, adjusting * counters.
*/ staticvoid set_info_state(struct page_info *info, enum vdo_page_buffer_state new_state)
{ if (new_state == info->state) return;
switch (info->state) { case PS_FREE: case PS_FAILED:
list_move_tail(&info->state_entry, &info->cache->free_list); return;
case PS_OUTGOING:
list_move_tail(&info->state_entry, &info->cache->outgoing_list); return;
case PS_DIRTY: return;
default:
list_del_init(&info->state_entry);
}
}
/** set_info_pbn() - Set the pbn for an info, updating the map as needed. */ staticint __must_check set_info_pbn(struct page_info *info, physical_block_number_t pbn)
{ struct vdo_page_cache *cache = info->cache;
/* Either the new or the old page number must be NO_PAGE. */ int result = VDO_ASSERT((pbn == NO_PAGE) || (info->pbn == NO_PAGE), "Must free a page before reusing it."); if (result != VDO_SUCCESS) return result;
if (info->pbn != NO_PAGE)
vdo_int_map_remove(cache->page_map, info->pbn);
info->pbn = pbn;
if (pbn != NO_PAGE) {
result = vdo_int_map_put(cache->page_map, pbn, info, true, NULL); if (result != VDO_SUCCESS) return result;
} return VDO_SUCCESS;
}
/** reset_page_info() - Reset page info to represent an unallocated page. */ staticint reset_page_info(struct page_info *info)
{ int result;
result = VDO_ASSERT(info->busy == 0, "VDO Page must not be busy"); if (result != VDO_SUCCESS) return result;
result = VDO_ASSERT(!vdo_waitq_has_waiters(&info->waiting), "VDO Page must not have waiters"); if (result != VDO_SUCCESS) return result;
result = set_info_pbn(info, NO_PAGE);
set_info_state(info, PS_FREE);
list_del_init(&info->lru_entry); return result;
}
/** * find_free_page() - Find a free page. * * Return: A pointer to the page info structure (if found), NULL otherwise.
*/ staticstruct page_info * __must_check find_free_page(struct vdo_page_cache *cache)
{ struct page_info *info;
info = list_first_entry_or_null(&cache->free_list, struct page_info,
state_entry); if (info != NULL)
list_del_init(&info->state_entry);
return info;
}
/** * find_page() - Find the page info (if any) associated with a given pbn. * @pbn: The absolute physical block number of the page. * * Return: The page info for the page if available, or NULL if not.
*/ staticstruct page_info * __must_check find_page(struct vdo_page_cache *cache,
physical_block_number_t pbn)
{ if ((cache->last_found != NULL) && (cache->last_found->pbn == pbn)) return cache->last_found;
/** * select_lru_page() - Determine which page is least recently used. * * Picks the least recently used from among the non-busy entries at the front of each of the lru * list. Since whenever we mark a page busy we also put it to the end of the list it is unlikely * that the entries at the front are busy unless the queue is very short, but not impossible. * * Return: A pointer to the info structure for a relevant page, or NULL if no such page can be * found. The page can be dirty or resident.
*/ staticstruct page_info * __must_check select_lru_page(struct vdo_page_cache *cache)
{ struct page_info *info;
/** * complete_waiter_with_error() - Complete a page completion with an error code. * @waiter: The page completion, as a waiter. * @result_ptr: A pointer to the error code. * * Implements waiter_callback_fn.
*/ staticvoid complete_waiter_with_error(struct vdo_waiter *waiter, void *result_ptr)
{ int *result = result_ptr;
/** * complete_waiter_with_page() - Complete a page completion with a page. * @waiter: The page completion, as a waiter. * @page_info: The page info to complete with. * * Implements waiter_callback_fn.
*/ staticvoid complete_waiter_with_page(struct vdo_waiter *waiter, void *page_info)
{
complete_with_page(page_info, page_completion_from_waiter(waiter));
}
/** * distribute_page_over_waitq() - Complete a waitq of VDO page completions with a page result. * * Upon completion the waitq will be empty. * * Return: The number of pages distributed.
*/ staticunsignedint distribute_page_over_waitq(struct page_info *info, struct vdo_wait_queue *waitq)
{
size_t num_pages;
/* * Increment the busy count once for each pending completion so that this page does not * stop being busy until all completions have been processed.
*/
info->busy += num_pages;
/** * set_persistent_error() - Set a persistent error which all requests will receive in the future. * @context: A string describing what triggered the error. * * Once triggered, all enqueued completions will get this error. Any future requests will result in * this error as well.
*/ staticvoid set_persistent_error(struct vdo_page_cache *cache, constchar *context, int result)
{ struct page_info *info; /* If we're already read-only, there's no need to log. */ struct vdo *vdo = cache->vdo;
for (info = cache->infos; info < cache->infos + cache->page_count; info++) {
vdo_waitq_notify_all_waiters(&info->waiting,
complete_waiter_with_error, &result);
}
}
/** * validate_completed_page() - Check that a page completion which is being freed to the cache * referred to a valid page and is in a valid state. * @writable: Whether a writable page is required. * * Return: VDO_SUCCESS if the page was valid, otherwise as error
*/ staticint __must_check validate_completed_page(struct vdo_page_completion *completion, bool writable)
{ int result;
result = VDO_ASSERT(completion->ready, "VDO Page completion not ready"); if (result != VDO_SUCCESS) return result;
result = VDO_ASSERT(completion->info != NULL, "VDO Page Completion must be complete"); if (result != VDO_SUCCESS) return result;
result = VDO_ASSERT(completion->info->pbn == completion->pbn, "VDO Page Completion pbn must be consistent"); if (result != VDO_SUCCESS) return result;
result = VDO_ASSERT(is_valid(completion->info), "VDO Page Completion page must be valid"); if (result != VDO_SUCCESS) return result;
if (writable) {
result = VDO_ASSERT(completion->writable, "VDO Page Completion must be writable"); if (result != VDO_SUCCESS) return result;
}
staticvoid enter_zone_read_only_mode(struct block_map_zone *zone, int result)
{
vdo_enter_read_only_mode(zone->block_map->vdo, result);
/* * We are in read-only mode, so we won't ever write any page out. * Just take all waiters off the waitq so the zone can drain.
*/
vdo_waitq_init(&zone->flush_waiters);
check_for_drain_complete(zone);
}
staticbool __must_check
validate_completed_page_or_enter_read_only_mode(struct vdo_page_completion *completion, bool writable)
{ int result = validate_completed_page(completion, writable);
/* * Don't decrement until right before calling check_for_drain_complete() to * ensure that the above work can't cause the page cache to be freed out from under us.
*/
cache->outstanding_reads--;
check_for_drain_complete(cache->zone);
}
/** * page_is_loaded() - Callback used when a page has been loaded. * @completion: The vio which has loaded the page. Its parent is the page_info.
*/ staticvoid page_is_loaded(struct vdo_completion *completion)
{ struct page_info *info = completion->parent; struct vdo_page_cache *cache = info->cache;
nonce_t nonce = info->cache->zone->block_map->nonce; struct block_map_page *page; enum block_map_page_validity validity;
assert_on_cache_thread(cache, __func__);
page = (struct block_map_page *) get_page_buffer(info);
validity = vdo_validate_block_map_page(page, nonce, info->pbn); if (validity == VDO_BLOCK_MAP_PAGE_BAD) {
physical_block_number_t pbn = vdo_get_block_map_page_pbn(page); int result = vdo_log_error_strerror(VDO_BAD_PAGE, "Expected page %llu but got page %llu instead",
(unsignedlonglong) info->pbn,
(unsignedlonglong) pbn);
/* * Don't decrement until right before calling check_for_drain_complete() to * ensure that the above work can't cause the page cache to be freed out from under us.
*/
cache->outstanding_reads--;
check_for_drain_complete(cache->zone);
}
/** * handle_rebuild_read_error() - Handle a read error during a read-only rebuild. * @completion: The page load completion.
*/ staticvoid handle_rebuild_read_error(struct vdo_completion *completion)
{ struct page_info *info = completion->parent; struct vdo_page_cache *cache = info->cache;
assert_on_cache_thread(cache, __func__);
/* * We are doing a read-only rebuild, so treat this as a successful read * of an uninitialized page.
*/
vio_record_metadata_io_error(as_vio(completion));
ADD_ONCE(cache->stats.failed_reads, 1);
memset(get_page_buffer(info), 0, VDO_BLOCK_SIZE);
vdo_reset_completion(completion);
page_is_loaded(completion);
}
/** * launch_page_load() - Begin the process of loading a page. * * Return: VDO_SUCCESS or an error code.
*/ staticint __must_check launch_page_load(struct page_info *info,
physical_block_number_t pbn)
{ int result;
vdo_action_fn callback; struct vdo_page_cache *cache = info->cache;
assert_io_allowed(cache);
result = set_info_pbn(info, pbn); if (result != VDO_SUCCESS) return result;
result = VDO_ASSERT((info->busy == 0), "Page is not busy before loading."); if (result != VDO_SUCCESS) return result;
/** save_pages() - Attempt to save the outgoing pages by first flushing the layer. */ staticvoid save_pages(struct vdo_page_cache *cache)
{ struct page_info *info; struct vio *vio;
if ((cache->pages_in_flush > 0) || (cache->pages_to_flush == 0)) return;
assert_io_allowed(cache);
info = list_first_entry(&cache->outgoing_list, struct page_info, state_entry);
/* * We must make sure that the recovery journal entries that changed these pages were * successfully persisted, and thus must issue a flush before each batch of pages is * written to ensure this.
*/
vdo_submit_flush_vio(vio, flush_endio, handle_flush_error);
}
/** * schedule_page_save() - Add a page to the outgoing list of pages waiting to be saved. * * Once in the list, a page may not be used until it has been written out.
*/ staticvoid schedule_page_save(struct page_info *info)
{ if (info->busy > 0) {
info->write_status = WRITE_STATUS_DEFERRED; return;
}
/** * launch_page_save() - Add a page to outgoing pages waiting to be saved, and then start saving * pages if another save is not in progress.
*/ staticvoid launch_page_save(struct page_info *info)
{
schedule_page_save(info);
save_pages(info->cache);
}
/** * completion_needs_page() - Determine whether a given vdo_page_completion (as a waiter) is * requesting a given page number. * @context: A pointer to the pbn of the desired page. * * Implements waiter_match_fn. * * Return: true if the page completion is for the desired page number.
*/ staticbool completion_needs_page(struct vdo_waiter *waiter, void *context)
{
physical_block_number_t *pbn = context;
/** * allocate_free_page() - Allocate a free page to the first completion in the waiting queue, and * any other completions that match it in page number.
*/ staticvoid allocate_free_page(struct page_info *info)
{ int result; struct vdo_waiter *oldest_waiter;
physical_block_number_t pbn; struct vdo_page_cache *cache = info->cache;
assert_on_cache_thread(cache, __func__);
if (!vdo_waitq_has_waiters(&cache->free_waiters)) { if (cache->stats.cache_pressure > 0) {
vdo_log_info("page cache pressure relieved");
WRITE_ONCE(cache->stats.cache_pressure, 0);
}
return;
}
result = reset_page_info(info); if (result != VDO_SUCCESS) {
set_persistent_error(cache, "cannot reset page info", result); return;
}
/* * Remove all entries which match the page number in question and push them onto the page * info's waitq.
*/
vdo_waitq_dequeue_matching_waiters(&cache->free_waiters, completion_needs_page,
&pbn, &info->waiting);
cache->waiter_count -= vdo_waitq_num_waiters(&info->waiting);
result = launch_page_load(info, pbn); if (result != VDO_SUCCESS) {
vdo_waitq_notify_all_waiters(&info->waiting,
complete_waiter_with_error, &result);
}
}
/** * discard_a_page() - Begin the process of discarding a page. * * If no page is discardable, increments a count of deferred frees so that the next release of a * page which is no longer busy will kick off another discard cycle. This is an indication that the * cache is not big enough. * * If the selected page is not dirty, immediately allocates the page to the oldest completion * waiting for a free page.
*/ staticvoid discard_a_page(struct vdo_page_cache *cache)
{ struct page_info *info = select_lru_page(cache);
if (info == NULL) {
report_cache_pressure(cache); return;
}
if (!is_dirty(info)) {
allocate_free_page(info); return;
}
VDO_ASSERT_LOG_ONLY(!is_in_flight(info), "page selected for discard is not in flight");
/** * discard_page_for_completion() - Helper used to trigger a discard so that the completion can get * a different page.
*/ staticvoid discard_page_for_completion(struct vdo_page_completion *vdo_page_comp)
{ struct vdo_page_cache *cache = vdo_page_comp->cache;
/** * discard_page_if_needed() - Helper used to trigger a discard if the cache needs another free * page. * @cache: The page cache.
*/ staticvoid discard_page_if_needed(struct vdo_page_cache *cache)
{ if (cache->waiter_count > cache->discard_count)
discard_a_page(cache);
}
/** * write_has_finished() - Inform the cache that a write has finished (possibly with an error). * @info: The info structure for the page whose write just completed. * * Return: true if the page write was a discard.
*/ staticbool write_has_finished(struct page_info *info)
{ bool was_discard = (info->write_status == WRITE_STATUS_DISCARD);
/** * handle_page_write_error() - Handler for page write errors. * @completion: The page write vio.
*/ staticvoid handle_page_write_error(struct vdo_completion *completion)
{ int result = completion->result; struct page_info *info = completion->parent; struct vdo_page_cache *cache = info->cache;
vio_record_metadata_io_error(as_vio(completion));
/* If we're already read-only, write failures are to be expected. */ if (result != VDO_READ_ONLY) {
vdo_log_ratelimit(vdo_log_error, "failed to write block map page %llu",
(unsignedlonglong) info->pbn);
}
/** * page_is_written_out() - Callback used when a page has been written out. * @completion: The vio which wrote the page. Its parent is a page_info.
*/ staticvoid page_is_written_out(struct vdo_completion *completion)
{ bool was_discard, reclaimed;
u32 reclamations; struct page_info *info = completion->parent; struct vdo_page_cache *cache = info->cache; struct block_map_page *page = (struct block_map_page *) get_page_buffer(info);
if (reclaimed)
discard_page_if_needed(cache); else
allocate_free_page(info);
check_for_drain_complete(cache->zone);
}
/** * write_pages() - Write the batch of pages which were covered by the layer flush which just * completed. * @flush_completion: The flush vio. * * This callback is registered in save_pages().
*/ staticvoid write_pages(struct vdo_completion *flush_completion)
{ struct vdo_page_cache *cache = ((struct page_info *) flush_completion->parent)->cache;
/* * We need to cache these two values on the stack since it is possible for the last * page info to cause the page cache to get freed. Hence once we launch the last page, * it may be unsafe to dereference the cache.
*/ bool has_unflushed_pages = (cache->pages_to_flush > 0);
page_count_t pages_in_flush = cache->pages_in_flush;
if (has_unflushed_pages) { /* * If there are unflushed pages, the cache can't have been freed, so this call is * safe.
*/
save_pages(cache);
}
}
/** * vdo_release_page_completion() - Release a VDO Page Completion. * * The page referenced by this completion (if any) will no longer be held busy by this completion. * If a page becomes discardable and there are completions awaiting free pages then a new round of * page discarding is started.
*/ void vdo_release_page_completion(struct vdo_completion *completion)
{ struct page_info *discard_info = NULL; struct vdo_page_completion *page_completion = as_vdo_page_completion(completion); struct vdo_page_cache *cache;
if (completion->result == VDO_SUCCESS) { if (!validate_completed_page_or_enter_read_only_mode(page_completion, false)) return;
if (--page_completion->info->busy == 0)
discard_info = page_completion->info;
}
VDO_ASSERT_LOG_ONLY((page_completion->waiter.next_waiter == NULL), "Page being released after leaving all queues");
if (discard_info != NULL) { if (discard_info->write_status == WRITE_STATUS_DEFERRED) {
discard_info->write_status = WRITE_STATUS_NORMAL;
launch_page_save(discard_info);
}
/* * if there are excess requests for pages (that have not already started discards) * we need to discard some page (which may be this one)
*/
discard_page_if_needed(cache);
}
}
/** * load_page_for_completion() - Helper function to load a page as described by a VDO Page * Completion.
*/ staticvoid load_page_for_completion(struct page_info *info, struct vdo_page_completion *vdo_page_comp)
{ int result;
vdo_waitq_enqueue_waiter(&info->waiting, &vdo_page_comp->waiter);
result = launch_page_load(info, vdo_page_comp->pbn); if (result != VDO_SUCCESS) {
vdo_waitq_notify_all_waiters(&info->waiting,
complete_waiter_with_error, &result);
}
}
/** * vdo_get_page() - Initialize a page completion and get a block map page. * @page_completion: The vdo_page_completion to initialize. * @zone: The block map zone of the desired page. * @pbn: The absolute physical block of the desired page. * @writable: Whether the page can be modified. * @parent: The object to notify when the fetch is complete. * @callback: The notification callback. * @error_handler: The handler for fetch errors. * @requeue: Whether we must requeue when notifying the parent. * * May cause another page to be discarded (potentially writing a dirty page) and the one nominated * by the completion to be loaded from disk. When the callback is invoked, the page will be * resident in the cache and marked busy. All callers must call vdo_release_page_completion() * when they are done with the page to clear the busy mark.
*/ void vdo_get_page(struct vdo_page_completion *page_completion, struct block_map_zone *zone, physical_block_number_t pbn, bool writable, void *parent, vdo_action_fn callback,
vdo_action_fn error_handler, bool requeue)
{ struct vdo_page_cache *cache = &zone->page_cache; struct vdo_completion *completion = &page_completion->completion; struct page_info *info;
assert_on_cache_thread(cache, __func__);
VDO_ASSERT_LOG_ONLY((page_completion->waiter.next_waiter == NULL), "New page completion was not already on a wait queue");
if (page_completion->writable && vdo_is_read_only(cache->vdo)) {
vdo_fail_completion(completion, VDO_READ_ONLY); return;
}
if (page_completion->writable)
ADD_ONCE(cache->stats.write_count, 1); else
ADD_ONCE(cache->stats.read_count, 1);
info = find_page(cache, page_completion->pbn); if (info != NULL) { /* The page is in the cache already. */ if ((info->write_status == WRITE_STATUS_DEFERRED) ||
is_incoming(info) ||
(is_outgoing(info) && page_completion->writable)) { /* The page is unusable until it has finished I/O. */
ADD_ONCE(cache->stats.wait_for_page, 1);
vdo_waitq_enqueue_waiter(&info->waiting, &page_completion->waiter); return;
}
if (is_valid(info)) { /* The page is usable. */
ADD_ONCE(cache->stats.found_in_cache, 1); if (!is_present(info))
ADD_ONCE(cache->stats.read_outgoing, 1);
update_lru(info);
info->busy++;
complete_with_page(info, page_completion); return;
}
/* Something horrible has gone wrong. */
VDO_ASSERT_LOG_ONLY(false, "Info found in a usable state.");
}
/* The page must be fetched. */
info = find_free_page(cache); if (info != NULL) {
ADD_ONCE(cache->stats.fetch_required, 1);
load_page_for_completion(info, page_completion); return;
}
/* The page must wait for a page to be discarded. */
ADD_ONCE(cache->stats.discard_required, 1);
discard_page_for_completion(page_completion);
}
/** * vdo_request_page_write() - Request that a VDO page be written out as soon as it is not busy. * @completion: The vdo_page_completion containing the page.
*/ void vdo_request_page_write(struct vdo_completion *completion)
{ struct page_info *info; struct vdo_page_completion *vdo_page_comp = as_vdo_page_completion(completion);
if (!validate_completed_page_or_enter_read_only_mode(vdo_page_comp, true)) return;
info = vdo_page_comp->info;
set_info_state(info, PS_DIRTY);
launch_page_save(info);
}
/** * vdo_get_cached_page() - Get the block map page from a page completion. * @completion: A vdo page completion whose callback has been called. * @page_ptr: A pointer to hold the page * * Return: VDO_SUCCESS or an error
*/ int vdo_get_cached_page(struct vdo_completion *completion, struct block_map_page **page_ptr)
{ int result; struct vdo_page_completion *vpc;
vpc = as_vdo_page_completion(completion);
result = validate_completed_page(vpc, true); if (result == VDO_SUCCESS)
*page_ptr = (struct block_map_page *) get_page_buffer(vpc->info);
return result;
}
/** * vdo_invalidate_page_cache() - Invalidate all entries in the VDO page cache. * * There must not be any dirty pages in the cache. * * Return: A success or error code.
*/ int vdo_invalidate_page_cache(struct vdo_page_cache *cache)
{ struct page_info *info;
assert_on_cache_thread(cache, __func__);
/* Make sure we don't throw away any dirty pages. */ for (info = cache->infos; info < cache->infos + cache->page_count; info++) { int result = VDO_ASSERT(!is_dirty(info), "cache must have no dirty pages");
if (result != VDO_SUCCESS) return result;
}
/* Reset the page map by re-allocating it. */
vdo_int_map_free(vdo_forget(cache->page_map)); return vdo_int_map_create(cache->page_count, &cache->page_map);
}
/** * get_tree_page_by_index() - Get the tree page for a given height and page index. * * Return: The requested page.
*/ staticstruct tree_page * __must_check get_tree_page_by_index(struct forest *forest,
root_count_t root_index,
height_t height,
page_number_t page_index)
{
page_number_t offset = 0;
size_t segment;
/* Get the page referred to by the lock's tree slot at its current height. */ staticinlinestruct tree_page *get_tree_page(conststruct block_map_zone *zone, conststruct tree_lock *lock)
{ return get_tree_page_by_index(zone->block_map->forest, lock->root_index,
lock->height,
lock->tree_slots[lock->height].page_index);
}
/** vdo_copy_valid_page() - Validate and copy a buffer to a page. */ bool vdo_copy_valid_page(char *buffer, nonce_t nonce,
physical_block_number_t pbn, struct block_map_page *page)
{ struct block_map_page *loaded = (struct block_map_page *) buffer; enum block_map_page_validity validity =
vdo_validate_block_map_page(loaded, nonce, pbn);
if (validity == VDO_BLOCK_MAP_PAGE_VALID) {
memcpy(page, loaded, VDO_BLOCK_SIZE); returntrue;
}
if (validity == VDO_BLOCK_MAP_PAGE_BAD) {
vdo_log_error_strerror(VDO_BAD_PAGE, "Expected page %llu but got page %llu instead",
(unsignedlonglong) pbn,
(unsignedlonglong) vdo_get_block_map_page_pbn(loaded));
}
returnfalse;
}
/** * in_cyclic_range() - Check whether the given value is between the lower and upper bounds, within * a cyclic range of values from 0 to (modulus - 1). * @lower: The lowest value to accept. * @value: The value to check. * @upper: The highest value to accept. * @modulus: The size of the cyclic space, no more than 2^15. * * The value and both bounds must be smaller than the modulus. * * Return: true if the value is in range.
*/ staticbool in_cyclic_range(u16 lower, u16 value, u16 upper, u16 modulus)
{ if (value < lower)
value += modulus; if (upper < lower)
upper += modulus; return (value <= upper);
}
/** * is_not_older() - Check whether a generation is strictly older than some other generation in the * context of a zone's current generation range. * @zone: The zone in which to do the comparison. * @a: The generation in question. * @b: The generation to compare to. * * Return: true if generation @a is not strictly older than generation @b in the context of @zone
*/ staticbool __must_check is_not_older(struct block_map_zone *zone, u8 a, u8 b)
{ int result;
result = VDO_ASSERT((in_cyclic_range(zone->oldest_generation, a, zone->generation, 1 << 8) &&
in_cyclic_range(zone->oldest_generation, b, zone->generation, 1 << 8)), "generation(s) %u, %u are out of range [%u, %u]",
a, b, zone->oldest_generation, zone->generation); if (result != VDO_SUCCESS) {
enter_zone_read_only_mode(zone, result); returntrue;
}
return in_cyclic_range(b, a, zone->generation, 1 << 8);
}
staticvoid release_generation(struct block_map_zone *zone, u8 generation)
{ int result;
result = VDO_ASSERT((zone->dirty_page_counts[generation] > 0), "dirty page count underflow for generation %u", generation); if (result != VDO_SUCCESS) {
enter_zone_read_only_mode(zone, result); return;
}
/* Return: true if all possible generations were not already active */ staticbool attempt_increment(struct block_map_zone *zone)
{
u8 generation = zone->generation + 1;
if (zone->oldest_generation == generation) returnfalse;
zone->generation = generation; returntrue;
}
/* Launches a flush if one is not already in progress. */ staticvoid enqueue_page(struct tree_page *page, struct block_map_zone *zone)
{ if ((zone->flusher == NULL) && attempt_increment(zone)) {
zone->flusher = page;
acquire_vio(&page->waiter, zone); return;
}
if ((zone->flusher != tree_page) &&
is_not_older(zone, tree_page->generation, zone->generation)) { /* * This page was re-dirtied after the last flush was issued, hence we need to do * another flush.
*/
enqueue_page(tree_page, zone);
return_to_pool(zone, vio); return;
}
/* Clear this now so that we know this page is not on any dirty list. */
tree_page->recovery_lock = 0;
/* * We've already copied the page into the vio which will write it, so if it was not yet * initialized, the first write will indicate that (for torn write protection). It is now * safe to mark it as initialized in memory since if the write fails, the in memory state * will become irrelevant.
*/ if (page->header.initialized) {
write_initialized_page(completion); return;
}
/* Release a lock on a page which was being loaded or allocated. */ staticvoid release_page_lock(struct data_vio *data_vio, char *what)
{ struct block_map_zone *zone; struct tree_lock *lock_holder; struct tree_lock *lock = &data_vio->tree_lock;
VDO_ASSERT_LOG_ONLY(lock->locked, "release of unlocked block map page %s for key %llu in tree %u",
what, (unsignedlonglong) lock->key, lock->root_index);
zone = data_vio->logical.zone->block_map_zone;
lock_holder = vdo_int_map_remove(zone->loading_pages, lock->key);
VDO_ASSERT_LOG_ONLY((lock_holder == lock), "block map page %s mismatch for key %llu in tree %u",
what, (unsignedlonglong) lock->key, lock->root_index);
lock->locked = false;
}
staticvoid finish_lookup(struct data_vio *data_vio, int result)
{
data_vio->tree_lock.height = 0;
if (is_invalid_tree_entry(vdo_from_data_vio(data_vio), &mapping, lock->height)) {
vdo_log_error_strerror(VDO_BAD_MAPPING, "Invalid block map tree PBN: %llu with state %u for page index %u at height %u",
(unsignedlonglong) mapping.pbn, mapping.state,
lock->tree_slots[lock->height - 1].page_index,
lock->height - 1);
abort_load(data_vio, VDO_BAD_MAPPING); return;
}
if (!vdo_is_mapped_location(&mapping)) { /* The page we need is unallocated */
allocate_block_map_page(data_vio->logical.zone->block_map_zone,
data_vio); return;
}
if (!vdo_copy_valid_page(vio->data, nonce, pbn, page))
vdo_format_block_map_page(page, nonce, pbn, false);
return_vio_to_pool(pooled);
/* Release our claim to the load and wake any waiters */
release_page_lock(data_vio, "load");
vdo_waitq_notify_all_waiters(&tree_lock->waiters, continue_load_for_waiter, page);
continue_with_loaded_page(data_vio, page);
}
/* * If the page is already locked, queue up to wait for the lock to be released. If the lock is * acquired, @data_vio->tree_lock.locked will be true.
*/ staticint attempt_page_lock(struct block_map_zone *zone, struct data_vio *data_vio)
{ int result; struct tree_lock *lock_holder; struct tree_lock *lock = &data_vio->tree_lock;
height_t height = lock->height; struct block_map_tree_slot tree_slot = lock->tree_slots[height]; union page_key key;
result = vdo_int_map_put(zone->loading_pages, lock->key,
lock, false, (void **) &lock_holder); if (result != VDO_SUCCESS) return result;
if (lock_holder == NULL) { /* We got the lock */
data_vio->tree_lock.locked = true; return VDO_SUCCESS;
}
/* Someone else is loading or allocating the page we need */
vdo_waitq_enqueue_waiter(&lock_holder->waiters, &data_vio->waiter); return VDO_SUCCESS;
}
/* Load a block map tree page from disk, for the next level in the data vio tree lock. */ staticvoid load_block_map_page(struct block_map_zone *zone, struct data_vio *data_vio)
{ int result;
result = attempt_page_lock(zone, data_vio); if (result != VDO_SUCCESS) {
abort_load(data_vio, result); return;
}
if (data_vio->tree_lock.locked) {
data_vio->waiter.callback = load_page;
acquire_vio_from_pool(zone->vio_pool, &data_vio->waiter);
}
}
expired = &zone->dirty_lists->expired[VDO_TREE_PAGE];
list_for_each_entry_safe(page, ttmp, expired, entry) { int result;
list_del_init(&page->entry);
result = VDO_ASSERT(!vdo_waiter_is_waiting(&page->waiter), "Newly expired page not already waiting to write"); if (result != VDO_SUCCESS) {
enter_zone_read_only_mode(zone, result); continue;
}
set_generation(zone, page, generation); if (!page->writing)
enqueue_page(page, zone);
}
/** * add_to_dirty_lists() - Add an element to the dirty lists. * @zone: The zone in which we are operating. * @entry: The list entry of the element to add. * @type: The type of page. * @old_period: The period in which the element was previously dirtied, or 0 if it was not dirty. * @new_period: The period in which the element has now been dirtied, or 0 if it does not hold a * lock.
*/ staticvoid add_to_dirty_lists(struct block_map_zone *zone, struct list_head *entry, enum block_map_page_type type,
sequence_number_t old_period,
sequence_number_t new_period)
{ struct dirty_lists *dirty_lists = zone->dirty_lists;
/* * Record the allocation in the tree and wake any waiters now that the write lock has been * released.
*/ staticvoid finish_block_map_allocation(struct vdo_completion *completion)
{
physical_block_number_t pbn; struct tree_page *tree_page; struct block_map_page *page;
sequence_number_t old_lock; struct data_vio *data_vio = as_data_vio(completion); struct block_map_zone *zone = data_vio->logical.zone->block_map_zone; struct tree_lock *tree_lock = &data_vio->tree_lock;
height_t height = tree_lock->height;
/* Record the allocation. */
page = (struct block_map_page *) tree_page->page_buffer;
old_lock = tree_page->recovery_lock;
vdo_update_block_map_page(page, data_vio, pbn,
VDO_MAPPING_STATE_UNCOMPRESSED,
&tree_page->recovery_lock);
if (vdo_waiter_is_waiting(&tree_page->waiter)) { /* This page is waiting to be written out. */ if (zone->flusher != tree_page) { /* * The outstanding flush won't cover the update we just made, * so mark the page as needing another flush.
*/
set_generation(zone, tree_page, zone->generation);
}
} else { /* Put the page on a dirty list */ if (old_lock == 0)
INIT_LIST_HEAD(&tree_page->entry);
add_to_dirty_lists(zone, &tree_page->entry, VDO_TREE_PAGE,
old_lock, tree_page->recovery_lock);
}
tree_lock->height--; if (height > 1) { /* Format the interior node we just allocated (in memory). */
tree_page = get_tree_page(zone, tree_lock);
vdo_format_block_map_page(tree_page->page_buffer,
zone->block_map->nonce,
pbn, false);
}
/* Release our claim to the allocation and wake any waiters */
release_page_lock(data_vio, "allocation");
vdo_waitq_notify_all_waiters(&tree_lock->waiters,
continue_allocation_for_waiter, &pbn); if (tree_lock->height == 0) {
finish_lookup(data_vio, VDO_SUCCESS); return;
}
/* * Newly allocated block map pages are set to have to MAXIMUM_REFERENCES after they are journaled, * to prevent deduplication against the block after we release the write lock on it, but before we * write out the page.
*/ staticvoid set_block_map_page_reference_count(struct vdo_completion *completion)
{ struct data_vio *data_vio = as_data_vio(completion);
staticvoid allocate_block_map_page(struct block_map_zone *zone, struct data_vio *data_vio)
{ int result;
if (!data_vio->write || data_vio->is_discard) { /* This is a pure read or a discard, so there's nothing left to do here. */
finish_lookup(data_vio, VDO_SUCCESS); return;
}
result = attempt_page_lock(zone, data_vio); if (result != VDO_SUCCESS) {
abort_lookup(data_vio, result, "allocation"); return;
}
/** * vdo_find_block_map_slot() - Find the block map slot in which the block map entry for a data_vio * resides and cache that result in the data_vio. * * All ancestors in the tree will be allocated or loaded, as needed.
*/ void vdo_find_block_map_slot(struct data_vio *data_vio)
{
page_number_t page_index; struct block_map_tree_slot tree_slot; struct data_location mapping; struct block_map_page *page = NULL; struct tree_lock *lock = &data_vio->tree_lock; struct block_map_zone *zone = data_vio->logical.zone->block_map_zone;
zone->active_lookups++; if (vdo_is_state_draining(&zone->state)) {
finish_lookup(data_vio, VDO_SHUTTING_DOWN); return;
}
/* Calculate the index and slot for the next level. */
tree_slot.block_map_slot.slot =
tree_slot.page_index % VDO_BLOCK_MAP_ENTRIES_PER_PAGE;
tree_slot.page_index = tree_slot.page_index / VDO_BLOCK_MAP_ENTRIES_PER_PAGE;
}
/* The page at this height has been allocated and loaded. */
mapping = vdo_unpack_block_map_entry(&page->entries[tree_slot.block_map_slot.slot]); if (is_invalid_tree_entry(vdo_from_data_vio(data_vio), &mapping, lock->height)) {
vdo_log_error_strerror(VDO_BAD_MAPPING, "Invalid block map tree PBN: %llu with state %u for page index %u at height %u",
(unsignedlonglong) mapping.pbn, mapping.state,
lock->tree_slots[lock->height - 1].page_index,
lock->height - 1);
abort_load(data_vio, VDO_BAD_MAPPING); return;
}
if (!vdo_is_mapped_location(&mapping)) { /* The page we want one level down has not been allocated, so allocate it. */
allocate_block_map_page(zone, data_vio); return;
}
lock->tree_slots[lock->height - 1].block_map_slot.pbn = mapping.pbn; if (lock->height == 1) { /* This is the ultimate block map page, so we're done */
finish_lookup(data_vio, VDO_SUCCESS); return;
}
/* We know what page we need to load. */
load_block_map_page(zone, data_vio);
}
/* * Find the PBN of a leaf block map page. This method may only be used after all allocated tree * pages have been loaded, otherwise, it may give the wrong answer (0).
*/
physical_block_number_t vdo_find_block_map_page_pbn(struct block_map *map,
page_number_t page_number)
{ struct data_location mapping; struct tree_page *tree_page; struct block_map_page *page;
root_count_t root_index = page_number % map->root_count;
page_number_t page_index = page_number / map->root_count;
slot_number_t slot = page_index % VDO_BLOCK_MAP_ENTRIES_PER_PAGE;
/* * Write a tree page or indicate that it has been re-dirtied if it is already being written. This * method is used when correcting errors in the tree during read-only rebuild.
*/ void vdo_write_tree_page(struct tree_page *page, struct block_map_zone *zone)
{ bool waiting = vdo_waiter_is_waiting(&page->waiter);
if (waiting && (zone->flusher == page)) return;
set_generation(zone, page, zone->generation); if (waiting || page->writing) return;
for (root = 0; root < forest->map->root_count; root++)
vdo_free(forest->trees[root].segments);
vdo_free(forest->boundaries);
vdo_free(forest);
}
/** * make_forest() - Make a collection of trees for a block_map, expanding the existing forest if * there is one. * @entries: The number of entries the block map will hold. * * Return: VDO_SUCCESS or an error.
*/ staticint make_forest(struct block_map *map, block_count_t entries)
{ struct forest *forest, *old_forest = map->forest; struct boundary new_boundary, *old_boundary = NULL;
block_count_t new_pages; int result;
if (old_forest != NULL)
old_boundary = &(old_forest->boundaries[old_forest->segments - 1]);
/** * finish_cursor() - Finish the traversal of a single tree. If it was the last cursor, finish the * traversal.
*/ staticvoid finish_cursor(struct cursor *cursor)
{ struct cursors *cursors = cursor->parent; struct vdo_completion *completion = cursors->completion;
return_vio_to_pool(vdo_forget(cursor->vio)); if (--cursors->active_roots > 0) return;
vdo_free(cursors);
vdo_finish_completion(completion);
}
staticvoid traverse(struct cursor *cursor);
/** * continue_traversal() - Continue traversing a block map tree. * @completion: The VIO doing a read or write.
*/ staticvoid continue_traversal(struct vdo_completion *completion)
{
vio_record_metadata_io_error(as_vio(completion));
traverse(completion->parent);
}
/** * finish_traversal_load() - Continue traversing a block map tree now that a page has been loaded. * @completion: The VIO doing the read.
*/ staticvoid finish_traversal_load(struct vdo_completion *completion)
{ struct cursor *cursor = completion->parent;
height_t height = cursor->height; struct cursor_level *level = &cursor->levels[height]; struct tree_page *tree_page =
&(cursor->tree->segments[0].levels[height][level->page_index]); struct block_map_page *page = (struct block_map_page *) tree_page->page_buffer;
if (!vdo_is_valid_location(&location)) { /* This entry is invalid, so remove it from the page. */
page->entries[level->slot] = UNMAPPED_BLOCK_MAP_ENTRY;
vdo_write_tree_page(tree_page, cursor->parent->zone); continue;
}
if (!vdo_is_mapped_location(&location)) continue;
/* Erase mapped entries past the end of the logical space. */ if (entry_index >= cursor->boundary.levels[height]) {
page->entries[level->slot] = UNMAPPED_BLOCK_MAP_ENTRY;
vdo_write_tree_page(tree_page, cursor->parent->zone); continue;
}
if (cursor->height < VDO_BLOCK_MAP_TREE_HEIGHT - 1) { int result = cursor->parent->entry_callback(location.pbn,
cursor->parent->completion); if (result != VDO_SUCCESS) {
page->entries[level->slot] = UNMAPPED_BLOCK_MAP_ENTRY;
vdo_write_tree_page(tree_page, cursor->parent->zone); continue;
}
}
/** * launch_cursor() - Start traversing a single block map tree now that the cursor has a VIO with * which to load pages. * @context: The pooled_vio just acquired. * * Implements waiter_callback_fn.
*/ staticvoid launch_cursor(struct vdo_waiter *waiter, void *context)
{ struct cursor *cursor = container_of(waiter, struct cursor, waiter); struct pooled_vio *pooled = context;
/** * compute_boundary() - Compute the number of pages used at each level of the given root's tree. * * Return: The list of page counts as a boundary structure.
*/ staticstruct boundary compute_boundary(struct block_map *map, root_count_t root_index)
{ struct boundary boundary;
height_t height;
page_count_t leaf_pages = vdo_compute_block_map_page_count(map->entry_count); /* * Compute the leaf pages for this root. If the number of leaf pages does not distribute * evenly, we must determine if this root gets an extra page. Extra pages are assigned to * roots starting from tree 0.
*/
page_count_t last_tree_root = (leaf_pages - 1) % map->root_count;
page_count_t level_pages = leaf_pages / map->root_count;
/* The root node always exists, even if the root is otherwise unused. */
boundary.levels[VDO_BLOCK_MAP_TREE_HEIGHT - 1] = 1;
return boundary;
}
/** * vdo_traverse_forest() - Walk the entire forest of a block map. * @callback: A function to call with the pbn of each allocated node in the forest. * @completion: The completion to notify on each traversed PBN, and when traversal completes.
*/ void vdo_traverse_forest(struct block_map *map, vdo_entry_callback_fn callback, struct vdo_completion *completion)
{
root_count_t root; struct cursors *cursors; int result;
result = vdo_allocate_extended(struct cursors, map->root_count, struct cursor, __func__, &cursors); if (result != VDO_SUCCESS) {
vdo_fail_completion(completion, result); return;
}
/** * initialize_block_map_zone() - Initialize the per-zone portions of the block map. * @maximum_age: The number of journal blocks before a dirtied page is considered old and must be * written out.
*/ staticint __must_check initialize_block_map_zone(struct block_map *map,
zone_count_t zone_number,
page_count_t cache_size,
block_count_t maximum_age)
{ int result;
block_count_t i; struct vdo *vdo = map->vdo; struct block_map_zone *zone = &map->zones[zone_number];
for (i = 0; i < maximum_age; i++) {
INIT_LIST_HEAD(&zone->dirty_lists->eras[i][VDO_TREE_PAGE]);
INIT_LIST_HEAD(&zone->dirty_lists->eras[i][VDO_CACHE_PAGE]);
}
result = vdo_int_map_create(VDO_LOCK_MAP_CAPACITY, &zone->loading_pages); if (result != VDO_SUCCESS) return result;
result = make_vio_pool(vdo, BLOCK_MAP_VIO_POOL_SIZE, 1,
zone->thread_id, VIO_TYPE_BLOCK_MAP_INTERIOR,
VIO_PRIORITY_METADATA, zone, &zone->vio_pool); if (result != VDO_SUCCESS) return result;
/* * Schedule an era advance if necessary. This method should not be called directly. Rather, call * vdo_schedule_default_action() on the block map's action manager. * * Implements vdo_action_scheduler_fn.
*/ staticbool schedule_era_advance(void *context)
{ struct block_map *map = context;
if (map->current_era_point == map->pending_era_point) returnfalse;
result = make_forest(map, map->entry_count); if (result != VDO_SUCCESS) {
vdo_free_block_map(map); return result;
}
replace_forest(map);
map->zone_count = vdo->thread_config.logical_zone_count; for (zone = 0; zone < map->zone_count; zone++) {
result = initialize_block_map_zone(map, zone, cache_size, maximum_age); if (result != VDO_SUCCESS) {
vdo_free_block_map(map); return result;
}
}
result = vdo_make_action_manager(map->zone_count, get_block_map_zone_thread_id,
vdo_get_recovery_journal_thread_id(journal),
map, schedule_era_advance, vdo,
&map->action_manager); if (result != VDO_SUCCESS) {
vdo_free_block_map(map); return result;
}
*map_ptr = map; return VDO_SUCCESS;
}
struct block_map_state_2_0 vdo_record_block_map(conststruct block_map *map)
{ return (struct block_map_state_2_0) {
.flat_page_origin = VDO_BLOCK_MAP_FLAT_PAGE_ORIGIN, /* This is the flat page count, which has turned out to always be 0. */
.flat_page_count = 0,
.root_origin = map->root_origin,
.root_count = map->root_count,
};
}
/* The block map needs to know the journals' sequence number to initialize the eras. */ void vdo_initialize_block_map_from_journal(struct block_map *map, struct recovery_journal *journal)
{
zone_count_t z = 0;
/* Compute the logical zone for the LBN of a data vio. */
zone_count_t vdo_compute_logical_zone(struct data_vio *data_vio)
{ struct block_map *map = vdo_from_data_vio(data_vio)->block_map; struct tree_lock *tree_lock = &data_vio->tree_lock;
page_number_t page_number = data_vio->logical.lbn / VDO_BLOCK_MAP_ENTRIES_PER_PAGE;
VDO_ASSERT_LOG_ONLY((zone->active_lookups == 0), "%s() called with no active lookups", __func__);
if (!vdo_is_state_suspending(state)) { while (zone->dirty_lists->oldest_period < zone->dirty_lists->next_period)
expire_oldest_list(zone->dirty_lists);
write_expired_elements(zone);
}
/* Allocate an expanded collection of trees, for a future growth. */ int vdo_prepare_to_grow_block_map(struct block_map *map,
block_count_t new_logical_blocks)
{ if (map->next_entry_count == new_logical_blocks) return VDO_SUCCESS;
if (map->next_entry_count > 0)
vdo_abandon_block_map_growth(map);
if (forest != NULL)
deforest(forest, forest->segments - 1);
map->next_entry_count = 0;
}
/* Release the page completion and then continue the requester. */ staticinlinevoid finish_processing_page(struct vdo_completion *completion, int result)
{ struct vdo_completion *parent = completion->parent;
/** * clear_mapped_location() - Clear a data_vio's mapped block location, setting it to be unmapped. * * This indicates the block map entry for the logical block is either unmapped or corrupted.
*/ staticvoid clear_mapped_location(struct data_vio *data_vio)
{
data_vio->mapped = (struct zoned_pbn) {
.state = VDO_MAPPING_STATE_UNMAPPED,
};
}
/** * set_mapped_location() - Decode and validate a block map entry, and set the mapped location of a * data_vio. * * Return: VDO_SUCCESS or VDO_BAD_MAPPING if the map entry is invalid or an error code for any * other failure
*/ staticint __must_check set_mapped_location(struct data_vio *data_vio, conststruct block_map_entry *entry)
{ /* Unpack the PBN for logging purposes even if the entry is invalid. */ struct data_location mapped = vdo_unpack_block_map_entry(entry);
if (vdo_is_valid_location(&mapped)) { int result;
result = vdo_get_physical_zone(vdo_from_data_vio(data_vio),
mapped.pbn, &data_vio->mapped.zone); if (result == VDO_SUCCESS) {
data_vio->mapped.pbn = mapped.pbn;
data_vio->mapped.state = mapped.state; return VDO_SUCCESS;
}
/* * Return all errors not specifically known to be errors from validating the * location.
*/ if ((result != VDO_OUT_OF_RANGE) && (result != VDO_BAD_MAPPING)) return result;
}
/* * Log the corruption even if we wind up ignoring it for write VIOs, converting all cases * to VDO_BAD_MAPPING.
*/
vdo_log_error_strerror(VDO_BAD_MAPPING, "PBN %llu with state %u read from the block map was invalid",
(unsignedlonglong) mapped.pbn, mapped.state);
/* * A read VIO has no option but to report the bad mapping--reading zeros would be hiding * known data loss.
*/ if (!data_vio->write) return VDO_BAD_MAPPING;
/* * A write VIO only reads this mapping to decref the old block. Treat this as an unmapped * entry rather than fail the write.
*/
clear_mapped_location(data_vio); return VDO_SUCCESS;
}
/* This callback is registered in vdo_get_mapped_block(). */ staticvoid get_mapping_from_fetched_page(struct vdo_completion *completion)
{ int result; struct vdo_page_completion *vpc = as_vdo_page_completion(completion); conststruct block_map_page *page; conststruct block_map_entry *entry; struct data_vio *data_vio = as_data_vio(completion->parent); struct block_map_tree_slot *tree_slot;
if (completion->result != VDO_SUCCESS) {
finish_processing_page(completion, completion->result); return;
}
result = validate_completed_page(vpc, false); if (result != VDO_SUCCESS) {
finish_processing_page(completion, result); return;
}
/* Encode the new mapping. */
page->entries[tree_lock->tree_slots[tree_lock->height].block_map_slot.slot] =
vdo_pack_block_map_entry(pbn, mapping_state);
/* Adjust references on the recovery journal blocks. */
old_locked = *recovery_lock;
new_locked = data_vio->recovery_sequence_number;
if (old_locked > 0) {
vdo_release_recovery_journal_block_reference(journal, old_locked,
VDO_ZONE_TYPE_LOGICAL,
zone->zone_number);
}
*recovery_lock = new_locked;
}
/* * FIXME: explain this more * Release the transferred lock from the data_vio.
*/
vdo_release_journal_entry_lock(journal, new_locked);
data_vio->recovery_sequence_number = 0;
}
/* Read a stored block mapping into a data_vio. */ void vdo_get_mapped_block(struct data_vio *data_vio)
{ if (data_vio->tree_lock.tree_slots[0].block_map_slot.pbn == VDO_ZERO_BLOCK) { /* * We know that the block map page for this LBN has not been allocated, so the * block must be unmapped.
*/
clear_mapped_location(data_vio);
continue_data_vio(data_vio); return;
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.