/* If we bump this, update the arrays below */ #define TABLE_VERSION 4
/* arrays for handling different table versions */ staticconst u8 REQUIRED_ARGC[] = { 10, 12, 9, 7, 6 }; /* pool name no longer used. only here for verification of older versions */ staticconst u8 POOL_NAME_ARG_INDEX[] = { 8, 10, 8 };
/* * Track in-use instance numbers using a flat bit array. * * O(n) run time isn't ideal, but if we have 1000 VDO devices in use simultaneously we still only * need to scan 16 words, so it's not likely to be a big deal compared to other resource usage.
*/
/* * This minimum size for the bit array creates a numbering space of 0-999, which allows * successive starts of the same volume to have different instance numbers in any * reasonably-sized test. Changing instances on restart allows vdoMonReport to detect that * the ephemeral stats have reset to zero.
*/ #define BIT_COUNT_MINIMUM 1000 /* Grow the bit array by this many bits when needed */ #define BIT_COUNT_INCREMENT 100
/** * free_device_config() - Free a device config created by parse_device_config(). * @config: The config to free.
*/ staticvoid free_device_config(struct device_config *config)
{ if (config == NULL) return;
if (config->owned_device != NULL)
dm_put_device(config->owning_target, config->owned_device);
/* Reduce the chance a use-after-free (as in BZ 1669960) happens to work. */
memset(config, 0, sizeof(*config));
vdo_free(config);
}
/** * get_version_number() - Decide the version number from argv. * * @argc: The number of table values. * @argv: The array of table values. * @error_ptr: A pointer to return a error string in. * @version_ptr: A pointer to return the version. * * Return: VDO_SUCCESS or an error code.
*/ staticint get_version_number(int argc, char **argv, char **error_ptr, unsignedint *version_ptr)
{ /* version, if it exists, is in a form of V<n> */ if (sscanf(argv[0], "V%u", version_ptr) == 1) { if (*version_ptr < 1 || *version_ptr > TABLE_VERSION) {
*error_ptr = "Unknown version number detected"; return VDO_BAD_CONFIGURATION;
}
} else { /* V0 actually has no version number in the table string */
*version_ptr = 0;
}
/* * V0 and V1 have no optional parameters. There will always be a parameter for thread * config, even if it's a "." to show it's an empty list.
*/ if (*version_ptr <= 1) { if (argc != REQUIRED_ARGC[*version_ptr]) {
*error_ptr = "Incorrect number of arguments for version"; return VDO_BAD_CONFIGURATION;
}
} elseif (argc < REQUIRED_ARGC[*version_ptr]) {
*error_ptr = "Incorrect number of arguments for version"; return VDO_BAD_CONFIGURATION;
}
if (*version_ptr != TABLE_VERSION) {
vdo_log_warning("Detected version mismatch between kernel module and tools kernel: %d, tool: %d",
TABLE_VERSION, *version_ptr);
vdo_log_warning("Please consider upgrading management tools to match kernel.");
} return VDO_SUCCESS;
}
/* Free a list of non-NULL string pointers, and then the list itself. */ staticvoid free_string_array(char **string_array)
{ unsignedint offset;
/* * Split the input string into substrings, separated at occurrences of the indicated character, * returning a null-terminated list of string pointers. * * The string pointers and the pointer array itself should both be freed with vdo_free() when no * longer needed. This can be done with vdo_free_string_array (below) if the pointers in the array * are not changed. Since the array and copied strings are allocated by this function, it may only * be used in contexts where allocation is permitted. * * Empty substrings are not ignored; that is, returned substrings may be empty strings if the * separator occurs twice in a row.
*/ staticint split_string(constchar *string, char separator, char ***substring_array_ptr)
{ unsignedint current_substring = 0, substring_count = 1; constchar *s; char **substrings; int result;
ptrdiff_t length;
for (s = string; *s != 0; s++) { if (*s == separator)
substring_count++;
}
result = vdo_allocate(substring_count + 1, char *, "string-splitting array",
&substrings); if (result != VDO_SUCCESS) return result;
for (s = string; *s != 0; s++) { if (*s == separator) {
ptrdiff_t length = s - string;
result = vdo_allocate(length + 1, char, "split string",
&substrings[current_substring]); if (result != VDO_SUCCESS) {
free_string_array(substrings); return result;
} /* * Trailing NUL is already in place after allocation; deal with the zero or * more non-NUL bytes in the string.
*/ if (length > 0)
memcpy(substrings[current_substring], string, length);
string = s + 1;
current_substring++;
BUG_ON(current_substring >= substring_count);
}
} /* Process final string, with no trailing separator. */
BUG_ON(current_substring != (substring_count - 1));
length = strlen(string);
/* * Join the input substrings into one string, joined with the indicated character, returning a * string. array_length is a bound on the number of valid elements in substring_array, in case it * is not NULL-terminated.
*/ staticint join_strings(char **substring_array, size_t array_length, char separator, char **string_ptr)
{
size_t string_length = 0;
size_t i; int result; char *output, *current_position;
for (i = 0; (i < array_length) && (substring_array[i] != NULL); i++)
string_length += strlen(substring_array[i]) + 1;
result = vdo_allocate(string_length, char, __func__, &output); if (result != VDO_SUCCESS) return result;
current_position = &output[0];
for (i = 0; (i < array_length) && (substring_array[i] != NULL); i++) {
current_position = vdo_append_to_buffer(current_position,
output + string_length, "%s",
substring_array[i]);
*current_position = separator;
current_position++;
}
/* We output one too many separators; replace the last with a zero byte. */ if (current_position != output)
*(current_position - 1) = '\0';
*string_ptr = output; return VDO_SUCCESS;
}
/** * parse_bool() - Parse a two-valued option into a bool. * @bool_str: The string value to convert to a bool. * @true_str: The string value which should be converted to true. * @false_str: The string value which should be converted to false. * @bool_ptr: A pointer to return the bool value in. * * Return: VDO_SUCCESS or an error if bool_str is neither true_str nor false_str.
*/ staticinlineint __must_check parse_bool(constchar *bool_str, constchar *true_str, constchar *false_str, bool *bool_ptr)
{ bool value = false;
if (strcmp(bool_str, true_str) == 0)
value = true; elseif (strcmp(bool_str, false_str) == 0)
value = false; else return VDO_BAD_CONFIGURATION;
*bool_ptr = value; return VDO_SUCCESS;
}
/** * process_one_thread_config_spec() - Process one component of a thread parameter configuration * string and update the configuration data structure. * @thread_param_type: The type of thread specified. * @count: The thread count requested. * @config: The configuration data structure to update. * * If the thread count requested is invalid, a message is logged and -EINVAL returned. If the * thread name is unknown, a message is logged but no error is returned. * * Return: VDO_SUCCESS or -EINVAL
*/ staticint process_one_thread_config_spec(constchar *thread_param_type, unsignedint count, struct thread_count_config *config)
{ /* Handle limited thread parameters */ if (strcmp(thread_param_type, "bioRotationInterval") == 0) { if (count == 0) {
vdo_log_error("thread config string error: 'bioRotationInterval' of at least 1 is required"); return -EINVAL;
} elseif (count > VDO_BIO_ROTATION_INTERVAL_LIMIT) {
vdo_log_error("thread config string error: 'bioRotationInterval' cannot be higher than %d",
VDO_BIO_ROTATION_INTERVAL_LIMIT); return -EINVAL;
}
config->bio_rotation_interval = count; return VDO_SUCCESS;
} if (strcmp(thread_param_type, "logical") == 0) { if (count > MAX_VDO_LOGICAL_ZONES) {
vdo_log_error("thread config string error: at most %d 'logical' threads are allowed",
MAX_VDO_LOGICAL_ZONES); return -EINVAL;
}
config->logical_zones = count; return VDO_SUCCESS;
} if (strcmp(thread_param_type, "physical") == 0) { if (count > MAX_VDO_PHYSICAL_ZONES) {
vdo_log_error("thread config string error: at most %d 'physical' threads are allowed",
MAX_VDO_PHYSICAL_ZONES); return -EINVAL;
}
config->physical_zones = count; return VDO_SUCCESS;
} /* Handle other thread count parameters */ if (count > MAXIMUM_VDO_THREADS) {
vdo_log_error("thread config string error: at most %d '%s' threads are allowed",
MAXIMUM_VDO_THREADS, thread_param_type); return -EINVAL;
} if (strcmp(thread_param_type, "hash") == 0) {
config->hash_zones = count; return VDO_SUCCESS;
} if (strcmp(thread_param_type, "cpu") == 0) { if (count == 0) {
vdo_log_error("thread config string error: at least one 'cpu' thread required"); return -EINVAL;
}
config->cpu_threads = count; return VDO_SUCCESS;
} if (strcmp(thread_param_type, "ack") == 0) {
config->bio_ack_threads = count; return VDO_SUCCESS;
} if (strcmp(thread_param_type, "bio") == 0) { if (count == 0) {
vdo_log_error("thread config string error: at least one 'bio' thread required"); return -EINVAL;
}
config->bio_threads = count; return VDO_SUCCESS;
}
/* * Don't fail, just log. This will handle version mismatches between user mode tools and * kernel.
*/
vdo_log_info("unknown thread parameter type \"%s\"", thread_param_type); return VDO_SUCCESS;
}
/** * parse_one_thread_config_spec() - Parse one component of a thread parameter configuration string * and update the configuration data structure. * @spec: The thread parameter specification string. * @config: The configuration data to be updated.
*/ staticint parse_one_thread_config_spec(constchar *spec, struct thread_count_config *config)
{ unsignedint count; char **fields; int result;
result = split_string(spec, '=', &fields); if (result != VDO_SUCCESS) return result;
result = kstrtouint(fields[1], 10, &count); if (result) {
vdo_log_error("thread config string error: integer value needed, found \"%s\"",
fields[1]);
free_string_array(fields); return result;
}
result = process_one_thread_config_spec(fields[0], count, config);
free_string_array(fields); return result;
}
/** * parse_thread_config_string() - Parse the configuration string passed and update the specified * counts and other parameters of various types of threads to be * created. * @string: Thread parameter configuration string. * @config: The thread configuration data to update. * * The configuration string should contain one or more comma-separated specs of the form * "typename=number"; the supported type names are "cpu", "ack", "bio", "bioRotationInterval", * "logical", "physical", and "hash". * * If an error occurs during parsing of a single key/value pair, we deem it serious enough to stop * further parsing. * * This function can't set the "reason" value the caller wants to pass back, because we'd want to * format it to say which field was invalid, and we can't allocate the "reason" strings * dynamically. So if an error occurs, we'll log the details and pass back an error. * * Return: VDO_SUCCESS or -EINVAL or -ENOMEM
*/ staticint parse_thread_config_string(constchar *string, struct thread_count_config *config)
{ int result = VDO_SUCCESS; char **specs;
if (strcmp(".", string) != 0) { unsignedint i;
result = split_string(string, ',', &specs); if (result != VDO_SUCCESS) return result;
for (i = 0; specs[i] != NULL; i++) {
result = parse_one_thread_config_spec(specs[i], config); if (result != VDO_SUCCESS) break;
}
free_string_array(specs);
} return result;
}
/** * process_one_key_value_pair() - Process one component of an optional parameter string and update * the configuration data structure. * @key: The optional parameter key name. * @value: The optional parameter value. * @config: The configuration data structure to update. * * If the value requested is invalid, a message is logged and -EINVAL returned. If the key is * unknown, a message is logged but no error is returned. * * Return: VDO_SUCCESS or -EINVAL
*/ staticint process_one_key_value_pair(constchar *key, unsignedint value, struct device_config *config)
{ /* Non thread optional parameters */ if (strcmp(key, "maxDiscard") == 0) { if (value == 0) {
vdo_log_error("optional parameter error: at least one max discard block required"); return -EINVAL;
} /* Max discard sectors in blkdev_issue_discard is UINT_MAX >> 9 */ if (value > (UINT_MAX / VDO_BLOCK_SIZE)) {
vdo_log_error("optional parameter error: at most %d max discard blocks are allowed",
UINT_MAX / VDO_BLOCK_SIZE); return -EINVAL;
}
config->max_discard_blocks = value; return VDO_SUCCESS;
} /* Handles unknown key names */ return process_one_thread_config_spec(key, value, &config->thread_counts);
}
/** * parse_one_key_value_pair() - Parse one key/value pair and update the configuration data * structure. * @key: The optional key name. * @value: The optional value. * @config: The configuration data to be updated. * * Return: VDO_SUCCESS or error.
*/ staticint parse_one_key_value_pair(constchar *key, constchar *value, struct device_config *config)
{ unsignedint count; int result;
if (strcmp(key, "deduplication") == 0) return parse_bool(value, "on", "off", &config->deduplication);
if (strcmp(key, "compression") == 0) return parse_bool(value, "on", "off", &config->compression);
/* The remaining arguments must have integral values. */
result = kstrtouint(value, 10, &count); if (result) {
vdo_log_error("optional config string error: integer value needed, found \"%s\"",
value); return result;
} return process_one_key_value_pair(key, count, config);
}
/** * parse_key_value_pairs() - Parse all key/value pairs from a list of arguments. * @argc: The total number of arguments in list. * @argv: The list of key/value pairs. * @config: The device configuration data to update. * * If an error occurs during parsing of a single key/value pair, we deem it serious enough to stop * further parsing. * * This function can't set the "reason" value the caller wants to pass back, because we'd want to * format it to say which field was invalid, and we can't allocate the "reason" strings * dynamically. So if an error occurs, we'll log the details and return the error. * * Return: VDO_SUCCESS or error
*/ staticint parse_key_value_pairs(int argc, char **argv, struct device_config *config)
{ int result = VDO_SUCCESS;
while (argc) {
result = parse_one_key_value_pair(argv[0], argv[1], config); if (result != VDO_SUCCESS) break;
argc -= 2;
argv += 2;
}
return result;
}
/** * parse_optional_arguments() - Parse the configuration string passed in for optional arguments. * @arg_set: The structure holding the arguments to parse. * @error_ptr: Pointer to a buffer to hold the error string. * @config: Pointer to device configuration data to update. * * For V0/V1 configurations, there will only be one optional parameter; the thread configuration. * The configuration string should contain one or more comma-separated specs of the form * "typename=number"; the supported type names are "cpu", "ack", "bio", "bioRotationInterval", * "logical", "physical", and "hash". * * For V2 configurations and beyond, there could be any number of arguments. They should contain * one or more key/value pairs separated by a space. * * Return: VDO_SUCCESS or error
*/ staticint parse_optional_arguments(struct dm_arg_set *arg_set, char **error_ptr, struct device_config *config)
{ int result = VDO_SUCCESS;
if (config->version == 0 || config->version == 1) {
result = parse_thread_config_string(arg_set->argv[0],
&config->thread_counts); if (result != VDO_SUCCESS) {
*error_ptr = "Invalid thread-count configuration"; return VDO_BAD_CONFIGURATION;
}
} else { if ((arg_set->argc % 2) != 0) {
*error_ptr = "Odd number of optional arguments given but they should be pairs"; return VDO_BAD_CONFIGURATION;
}
result = parse_key_value_pairs(arg_set->argc, arg_set->argv, config); if (result != VDO_SUCCESS) {
*error_ptr = "Invalid optional argument configuration"; return VDO_BAD_CONFIGURATION;
}
} return result;
}
/** * handle_parse_error() - Handle a parsing error. * @config: The config to free. * @error_ptr: A place to store a constant string about the error. * @error_str: A constant string to store in error_ptr.
*/ staticvoid handle_parse_error(struct device_config *config, char **error_ptr, char *error_str)
{
free_device_config(config);
*error_ptr = error_str;
}
/** * parse_device_config() - Convert the dmsetup table into a struct device_config. * @argc: The number of table values. * @argv: The array of table values. * @ti: The target structure for this table. * @config_ptr: A pointer to return the allocated config. * * Return: VDO_SUCCESS or an error code.
*/ staticint parse_device_config(int argc, char **argv, struct dm_target *ti, struct device_config **config_ptr)
{ bool enable_512e;
size_t logical_bytes = to_bytes(ti->len); struct dm_arg_set arg_set; char **error_ptr = &ti->error; struct device_config *config = NULL; int result;
if ((logical_bytes % VDO_BLOCK_SIZE) != 0) {
handle_parse_error(config, error_ptr, "Logical size must be a multiple of 4096"); return VDO_BAD_CONFIGURATION;
}
if (argc == 0) {
handle_parse_error(config, error_ptr, "Incorrect number of arguments"); return VDO_BAD_CONFIGURATION;
}
result = vdo_allocate(1, struct device_config, "device_config", &config); if (result != VDO_SUCCESS) {
handle_parse_error(config, error_ptr, "Could not allocate config structure"); return VDO_BAD_CONFIGURATION;
}
/* Save the original string. */
result = join_strings(argv, argc, ' ', &config->original_string); if (result != VDO_SUCCESS) {
handle_parse_error(config, error_ptr, "Could not populate string"); return VDO_BAD_CONFIGURATION;
}
result = get_version_number(argc, argv, error_ptr, &config->version); if (result != VDO_SUCCESS) { /* get_version_number sets error_ptr itself. */
handle_parse_error(config, error_ptr, *error_ptr); return result;
} /* Move the arg pointer forward only if the argument was there. */ if (config->version >= 1)
dm_shift_arg(&arg_set);
result = vdo_duplicate_string(dm_shift_arg(&arg_set), "parent device name",
&config->parent_device_name); if (result != VDO_SUCCESS) {
handle_parse_error(config, error_ptr, "Could not copy parent device name"); return VDO_BAD_CONFIGURATION;
}
/* Get the physical blocks, if known. */ if (config->version >= 1) {
result = kstrtoull(dm_shift_arg(&arg_set), 10, &config->physical_blocks); if (result != VDO_SUCCESS) {
handle_parse_error(config, error_ptr, "Invalid physical block count"); return VDO_BAD_CONFIGURATION;
}
}
/* Get the logical block size and validate */
result = parse_bool(dm_shift_arg(&arg_set), "512", "4096", &enable_512e); if (result != VDO_SUCCESS) {
handle_parse_error(config, error_ptr, "Invalid logical block size"); return VDO_BAD_CONFIGURATION;
}
config->logical_block_size = (enable_512e ? 512 : 4096);
/* Skip past the two no longer used read cache options. */ if (config->version <= 1)
dm_consume_args(&arg_set, 2);
/* Get the page cache size. */
result = kstrtouint(dm_shift_arg(&arg_set), 10, &config->cache_size); if (result != VDO_SUCCESS) {
handle_parse_error(config, error_ptr, "Invalid block map page cache size"); return VDO_BAD_CONFIGURATION;
}
/* Get the block map era length. */
result = kstrtouint(dm_shift_arg(&arg_set), 10, &config->block_map_maximum_age); if (result != VDO_SUCCESS) {
handle_parse_error(config, error_ptr, "Invalid block map maximum age"); return VDO_BAD_CONFIGURATION;
}
/* Skip past the no longer used MD RAID5 optimization mode */ if (config->version <= 2)
dm_consume_args(&arg_set, 1);
/* Skip past the no longer used write policy setting */ if (config->version <= 3)
dm_consume_args(&arg_set, 1);
/* Skip past the no longer used pool name for older table lines */ if (config->version <= 2) { /* * Make sure the enum to get the pool name from argv directly is still in sync with * the parsing of the table line.
*/ if (&arg_set.argv[0] != &argv[POOL_NAME_ARG_INDEX[config->version]]) {
handle_parse_error(config, error_ptr, "Pool name not in expected location"); return VDO_BAD_CONFIGURATION;
}
dm_shift_arg(&arg_set);
}
/* Get the optional arguments and validate. */
result = parse_optional_arguments(&arg_set, error_ptr, config); if (result != VDO_SUCCESS) { /* parse_optional_arguments sets error_ptr itself. */
handle_parse_error(config, error_ptr, *error_ptr); return result;
}
/* * Logical, physical, and hash zone counts can all be zero; then we get one thread doing * everything, our older configuration. If any zone count is non-zero, the others must be * as well.
*/ if (((config->thread_counts.logical_zones == 0) !=
(config->thread_counts.physical_zones == 0)) ||
((config->thread_counts.physical_zones == 0) !=
(config->thread_counts.hash_zones == 0))) {
handle_parse_error(config, error_ptr, "Logical, physical, and hash zones counts must all be zero or all non-zero"); return VDO_BAD_CONFIGURATION;
}
/* The minimum io size for random io */
limits->io_min = VDO_BLOCK_SIZE; /* The optimal io size for streamed/sequential io */
limits->io_opt = VDO_BLOCK_SIZE;
/* * Sets the maximum discard size that will be passed into VDO. This value comes from a * table line value passed in during dmsetup create. * * The value 1024 is the largest usable value on HD systems. A 2048 sector discard on a * busy HD system takes 31 seconds. We should use a value no higher than 1024, which takes * 15 to 16 seconds on a busy HD system. However, using large values results in 120 second * blocked task warnings in kernel logs. In order to avoid these warnings, we choose to * use the smallest reasonable value. * * The value is used by dm-thin to determine whether to pass down discards. The block layer * splits large discards on this boundary when this is set.
*/
limits->max_hw_discard_sectors =
(vdo->device_config->max_discard_blocks * VDO_SECTORS_PER_BLOCK);
/* * Force discards to not begin or end with a partial block by stating the granularity is * 4k.
*/
limits->discard_granularity = VDO_BLOCK_SIZE;
}
switch (status_type) { case STATUSTYPE_INFO: /* Report info for dmsetup status */
mutex_lock(&vdo->stats_mutex);
vdo_fetch_statistics(vdo, &vdo->stats_buffer);
stats = &vdo->stats_buffer;
/* * If the message is a dump, just do it. Otherwise, check that no other message is being processed, * and only proceed if so. * Returns -EBUSY if another message is being processed
*/ staticint __must_check process_vdo_message(struct vdo *vdo, unsignedint argc, char **argv)
{ int result;
/* * All messages which may be processed in parallel with other messages should be handled * here before the atomic check below. Messages which should be exclusive should be * processed in process_vdo_message_locked().
*/
/* Dump messages should always be processed */ if (strcasecmp(argv[0], "dump") == 0) return vdo_dump(vdo, argc, argv, "dmsetup message");
if (argc == 1) { if (strcasecmp(argv[0], "dump-on-shutdown") == 0) {
vdo->dump_on_shutdown = true; return 0;
}
/* Index messages should always be processed */ if ((strcasecmp(argv[0], "index-close") == 0) ||
(strcasecmp(argv[0], "index-create") == 0) ||
(strcasecmp(argv[0], "index-disable") == 0) ||
(strcasecmp(argv[0], "index-enable") == 0)) return vdo_message_dedupe_index(vdo->hash_zones, argv[0]);
}
if (atomic_cmpxchg(&vdo->processing_message, 0, 1) != 0) return -EBUSY;
result = process_vdo_message_locked(vdo, argc, argv);
/* Pairs with the implicit barrier in cmpxchg just above */
smp_wmb();
atomic_set(&vdo->processing_message, 0); return result;
}
/* * Must be done here so we don't map return codes. The code in dm-ioctl expects a 1 for a * return code to look at the buffer and see if it is full or not.
*/ if ((argc == 1) && (strcasecmp(argv[0], "stats") == 0)) {
vdo_write_stats(vdo, result_buffer, maxlen);
result = 1;
} elseif ((argc == 1) && (strcasecmp(argv[0], "config") == 0)) {
vdo_write_config(vdo, &result_buffer, &maxlen);
result = 1;
} else {
result = vdo_status_to_errno(process_vdo_message(vdo, argc, argv));
}
/* * If this value changes, please make sure to update the value for max_discard_sectors * accordingly.
*/
BUG_ON(dm_set_target_max_io_len(ti, VDO_SECTORS_PER_BLOCK) != 0);
}
/** * get_thread_id_for_phase() - Get the thread id for the current phase of the admin operation in * progress.
*/ static thread_id_t __must_check get_thread_id_for_phase(struct vdo *vdo)
{ switch (vdo->admin.phase) { case RESUME_PHASE_PACKER: case RESUME_PHASE_FLUSHER: case SUSPEND_PHASE_PACKER: case SUSPEND_PHASE_FLUSHES: return vdo->thread_config.packer_thread;
case RESUME_PHASE_DATA_VIOS: case SUSPEND_PHASE_DATA_VIOS: return vdo->thread_config.cpu_thread;
case LOAD_PHASE_DRAIN_JOURNAL: case RESUME_PHASE_JOURNAL: case SUSPEND_PHASE_JOURNAL: return vdo->thread_config.journal_thread;
/* * We can't use vdo_prepare_completion_for_requeue() here because we don't want to reset * any error in the completion.
*/
completion->callback = callback;
completion->error_handler = error_handler;
completion->callback_thread_id = get_thread_id_for_phase(vdo);
completion->requeue = true; return completion;
}
/** * advance_phase() - Increment the phase of the current admin operation and prepare the admin * completion to run on the thread for the next phase. * @vdo: The on which an admin operation is being performed * * Return: The current phase
*/ static u32 advance_phase(struct vdo *vdo)
{
u32 phase = vdo->admin.phase++;
/* * Perform an administrative operation (load, suspend, grow logical, or grow physical). This method * should not be called from vdo threads.
*/ staticint perform_admin_operation(struct vdo *vdo, u32 starting_phase,
vdo_action_fn callback, vdo_action_fn error_handler, constchar *type)
{ int result; struct vdo_administrator *admin = &vdo->admin;
if (atomic_cmpxchg(&admin->busy, 0, 1) != 0) { return vdo_log_error_strerror(VDO_COMPONENT_BUSY, "Can't start %s operation, another operation is already in progress",
type);
}
/* * Using the "interruptible" interface means that Linux will not log a message when we wait * for more than 120 seconds.
*/ while (wait_for_completion_interruptible(&admin->callback_sync)) { /* However, if we get a signal in a user-mode process, we could spin... */
fsleep(1000);
}
result = admin->completion.result; /* pairs with implicit barrier in cmpxchg above */
smp_wmb();
atomic_set(&admin->busy, 0); return result;
}
/* Assert that we are operating on the correct thread for the current phase. */ staticvoid assert_admin_phase_thread(struct vdo *vdo, constchar *what)
{
VDO_ASSERT_LOG_ONLY(vdo_get_callback_thread_id() == get_thread_id_for_phase(vdo), "%s on correct thread for %s", what,
ADMIN_PHASE_NAMES[vdo->admin.phase]);
}
/** * finish_operation_callback() - Callback to finish an admin operation. * @completion: The admin_completion.
*/ staticvoid finish_operation_callback(struct vdo_completion *completion)
{ struct vdo_administrator *admin = &completion->vdo->admin;
/** * decode_from_super_block() - Decode the VDO state from the super block and validate that it is * correct. * @vdo: The vdo being loaded. * * On error from this method, the component states must be destroyed explicitly. If this method * returns successfully, the component states must not be destroyed. * * Return: VDO_SUCCESS or an error.
*/ staticint __must_check decode_from_super_block(struct vdo *vdo)
{ conststruct device_config *config = vdo->device_config; int result;
result = vdo_decode_component_states(vdo->super_block.buffer, &vdo->geometry,
&vdo->states); if (result != VDO_SUCCESS) return result;
/* * If the device config specifies a larger logical size than was recorded in the super * block, just accept it.
*/ if (vdo->states.vdo.config.logical_blocks < config->logical_blocks) {
vdo_log_warning("Growing logical size: a logical size of %llu blocks was specified, but that differs from the %llu blocks configured in the vdo super block",
(unsignedlonglong) config->logical_blocks,
(unsignedlonglong) vdo->states.vdo.config.logical_blocks);
vdo->states.vdo.config.logical_blocks = config->logical_blocks;
}
result = vdo_validate_component_states(&vdo->states, vdo->geometry.nonce,
config->physical_blocks,
config->logical_blocks); if (result != VDO_SUCCESS) return result;
/** * decode_vdo() - Decode the component data portion of a super block and fill in the corresponding * portions of the vdo being loaded. * @vdo: The vdo being loaded. * * This will also allocate the recovery journal and slab depot. If this method is called with an * asynchronous layer (i.e. a thread config which specifies at least one base thread), the block * map and packer will be constructed as well. * * Return: VDO_SUCCESS or an error.
*/ staticint __must_check decode_vdo(struct vdo *vdo)
{
block_count_t maximum_age, journal_length; struct partition *partition; int result;
result = decode_from_super_block(vdo); if (result != VDO_SUCCESS) {
vdo_destroy_component_states(&vdo->states); return result;
}
if (maximum_age == 0) { return vdo_log_error_strerror(VDO_BAD_CONFIGURATION, "maximum age must be greater than 0");
}
result = vdo_enable_read_only_entry(vdo); if (result != VDO_SUCCESS) return result;
partition = vdo_get_known_partition(&vdo->layout,
VDO_RECOVERY_JOURNAL_PARTITION);
result = vdo_decode_recovery_journal(vdo->states.recovery_journal,
vdo->states.vdo.nonce, vdo, partition,
vdo->states.vdo.complete_recoveries,
vdo->states.vdo.config.recovery_journal_size,
&vdo->recovery_journal); if (result != VDO_SUCCESS) return result;
partition = vdo_get_known_partition(&vdo->layout, VDO_SLAB_SUMMARY_PARTITION);
result = vdo_decode_slab_depot(vdo->states.slab_depot, vdo, partition,
&vdo->depot); if (result != VDO_SUCCESS) return result;
result = vdo_decode_block_map(vdo->states.block_map,
vdo->states.vdo.config.logical_blocks, vdo,
vdo->recovery_journal, vdo->states.vdo.nonce,
vdo->device_config->cache_size, maximum_age,
&vdo->block_map); if (result != VDO_SUCCESS) return result;
result = vdo_make_physical_zones(vdo, &vdo->physical_zones); if (result != VDO_SUCCESS) return result;
/* The logical zones depend on the physical zones already existing. */
result = vdo_make_logical_zones(vdo, &vdo->logical_zones); if (result != VDO_SUCCESS) return result;
staticvoid release_instance(unsignedint instance)
{
mutex_lock(&instances_lock); if (instance >= instances.bit_count) {
VDO_ASSERT_LOG_ONLY(false, "instance number %u must be less than bit count %u",
instance, instances.bit_count);
} elseif (test_bit(instance, instances.words) == 0) {
VDO_ASSERT_LOG_ONLY(false, "instance number %u must be allocated", instance);
} else {
__clear_bit(instance, instances.words);
instances.count -= 1;
}
mutex_unlock(&instances_lock);
}
/** * get_bit_array_size() - Return the number of bytes needed to store a bit array of the specified * capacity in an array of unsigned longs. * @bit_count: The number of bits the array must hold. * * Return: the number of bytes needed for the array representation.
*/ static size_t get_bit_array_size(unsignedint bit_count)
{ /* Round up to a multiple of the word size and convert to a byte count. */ return (BITS_TO_LONGS(bit_count) * sizeof(unsignedlong));
}
/** * grow_bit_array() - Re-allocate the bitmap word array so there will more instance numbers that * can be allocated. * * Since the array is initially NULL, this also initializes the array the first time we allocate an * instance number. * * Return: VDO_SUCCESS or an error code from the allocation
*/ staticint grow_bit_array(void)
{ unsignedint new_count = max(instances.bit_count + BIT_COUNT_INCREMENT,
(unsignedint) BIT_COUNT_MINIMUM); unsignedlong *new_words; int result;
result = vdo_reallocate_memory(instances.words,
get_bit_array_size(instances.bit_count),
get_bit_array_size(new_count), "instance number bit array", &new_words); if (result != VDO_SUCCESS) return result;
/** * allocate_instance() - Allocate an instance number. * @instance_ptr: A point to hold the instance number * * Return: VDO_SUCCESS or an error code * * This function must be called while holding the instances lock.
*/ staticint allocate_instance(unsignedint *instance_ptr)
{ unsignedint instance; int result;
/* If there are no unallocated instances, grow the bit array. */ if (instances.count >= instances.bit_count) {
result = grow_bit_array(); if (result != VDO_SUCCESS) return result;
}
/* * There must be a zero bit somewhere now. Find it, starting just after the last instance * allocated.
*/
instance = find_next_zero_bit(instances.words, instances.bit_count,
instances.next); if (instance >= instances.bit_count) { /* Nothing free after next, so wrap around to instance zero. */
instance = find_first_zero_bit(instances.words, instances.bit_count);
result = VDO_ASSERT(instance < instances.bit_count, "impossibly, no zero bit found"); if (result != VDO_SUCCESS) return result;
}
result = parse_device_config(argc, argv, ti, &config); if (result != VDO_SUCCESS) {
vdo_log_error_strerror(result, "parsing failed: %s", ti->error);
release_instance(instance); return -EINVAL;
}
/* Beyond this point, the instance number will be cleaned up for us if needed */
result = vdo_initialize(ti, instance, config); if (result != VDO_SUCCESS) {
release_instance(instance);
free_device_config(config); return vdo_status_to_errno(result);
}
/** * check_may_grow_physical() - Callback to check that we're not in recovery mode, used in * vdo_prepare_to_grow_physical(). * @completion: The admin completion.
*/ staticvoid check_may_grow_physical(struct vdo_completion *completion)
{ struct vdo *vdo = completion->vdo;
assert_admin_phase_thread(vdo, __func__);
/* These checks can only be done from a vdo thread. */ if (vdo_is_read_only(vdo))
vdo_set_completion_result(completion, VDO_READ_ONLY);
if (vdo_in_recovery_mode(vdo))
vdo_set_completion_result(completion, VDO_RETRY_AFTER_REBUILD);
/** * grow_layout() - Make the layout for growing a vdo. * @vdo: The vdo preparing to grow. * @old_size: The current size of the vdo. * @new_size: The size to which the vdo will be grown. * * Return: VDO_SUCCESS or an error code.
*/ staticint grow_layout(struct vdo *vdo, block_count_t old_size, block_count_t new_size)
{ int result;
block_count_t min_new_size;
if (vdo->next_layout.size == new_size) { /* We are already prepared to grow to the new size, so we're done. */ return VDO_SUCCESS;
}
/* Make a copy completion if there isn't one */ if (vdo->partition_copier == NULL) {
vdo->partition_copier = dm_kcopyd_client_create(NULL); if (IS_ERR(vdo->partition_copier)) {
result = PTR_ERR(vdo->partition_copier);
vdo->partition_copier = NULL; return result;
}
}
/* Free any unused preparation. */
vdo_uninitialize_layout(&vdo->next_layout);
/* * Make a new layout with the existing partition sizes for everything but the slab depot * partition.
*/
result = vdo_initialize_layout(new_size, vdo->layout.start,
get_partition_size(&vdo->layout,
VDO_BLOCK_MAP_PARTITION),
get_partition_size(&vdo->layout,
VDO_RECOVERY_JOURNAL_PARTITION),
get_partition_size(&vdo->layout,
VDO_SLAB_SUMMARY_PARTITION),
&vdo->next_layout); if (result != VDO_SUCCESS) {
dm_kcopyd_client_destroy(vdo_forget(vdo->partition_copier)); return result;
}
/* Ensure the new journal and summary are entirely within the added blocks. */
min_new_size = (old_size +
get_partition_size(&vdo->next_layout,
VDO_SLAB_SUMMARY_PARTITION) +
get_partition_size(&vdo->next_layout,
VDO_RECOVERY_JOURNAL_PARTITION)); if (min_new_size > new_size) { /* Copying the journal and summary would destroy some old metadata. */
vdo_uninitialize_layout(&vdo->next_layout);
dm_kcopyd_client_destroy(vdo_forget(vdo->partition_copier)); return VDO_INCREMENT_TOO_SMALL;
}
vdo_log_info("Preparing to resize physical to %llu",
(unsignedlonglong) new_physical_blocks);
VDO_ASSERT_LOG_ONLY((new_physical_blocks > current_physical_blocks), "New physical size is larger than current physical size");
result = perform_admin_operation(vdo, PREPARE_GROW_PHYSICAL_PHASE_START,
check_may_grow_physical,
finish_operation_callback, "prepare grow-physical"); if (result != VDO_SUCCESS) return result;
result = grow_layout(vdo, current_physical_blocks, new_physical_blocks); if (result != VDO_SUCCESS) return result;
result = vdo_prepare_to_grow_slab_depot(vdo->depot,
vdo_get_known_partition(&vdo->next_layout,
VDO_SLAB_DEPOT_PARTITION)); if (result != VDO_SUCCESS) {
vdo_uninitialize_layout(&vdo->next_layout); return result;
}
vdo_log_info("Done preparing to resize physical"); return VDO_SUCCESS;
}
/** * validate_new_device_config() - Check whether a new device config represents a valid modification * to an existing config. * @to_validate: The new config to validate. * @config: The existing config. * @may_grow: Set to true if growing the logical and physical size of the vdo is currently * permitted. * @error_ptr: A pointer to hold the reason for any error. * * Return: VDO_SUCCESS or an error.
*/ staticint validate_new_device_config(struct device_config *to_validate, struct device_config *config, bool may_grow, char **error_ptr)
{ if (to_validate->owning_target->begin != config->owning_target->begin) {
*error_ptr = "Starting sector cannot change"; return VDO_PARAMETER_MISMATCH;
}
if (to_validate->physical_blocks < config->physical_blocks) {
*error_ptr = "Removing physical storage from a VDO is not supported"; return VDO_NOT_IMPLEMENTED;
}
if (!may_grow && (to_validate->physical_blocks > config->physical_blocks)) {
*error_ptr = "VDO physical size may not grow in current state"; return VDO_NOT_IMPLEMENTED;
}
result = validate_new_device_config(config, vdo->device_config, may_grow,
&ti->error); if (result != VDO_SUCCESS) return -EINVAL;
if (config->logical_blocks > vdo->device_config->logical_blocks) {
block_count_t logical_blocks = vdo->states.vdo.config.logical_blocks;
vdo_log_info("Preparing to resize logical to %llu",
(unsignedlonglong) config->logical_blocks);
VDO_ASSERT_LOG_ONLY((config->logical_blocks > logical_blocks), "New logical size is larger than current size");
result = vdo_prepare_to_grow_block_map(vdo->block_map,
config->logical_blocks); if (result != VDO_SUCCESS) {
ti->error = "Device vdo_prepare_to_grow_logical failed"; return result;
}
vdo_log_info("Done preparing to resize logical");
}
if (config->physical_blocks > vdo->device_config->physical_blocks) {
result = prepare_to_grow_physical(vdo, config->physical_blocks); if (result != VDO_SUCCESS) { if (result == VDO_PARAMETER_MISMATCH) { /* * If we don't trap this case, vdo_status_to_errno() will remap * it to -EIO, which is misleading and ahistorical.
*/
result = -EINVAL;
}
if (result == VDO_TOO_MANY_SLABS)
ti->error = "Device vdo_prepare_to_grow_physical failed (specified physical size too big based on formatted slab size)"; else
ti->error = "Device vdo_prepare_to_grow_physical failed";
return result;
}
}
if (strcmp(config->parent_device_name, vdo->device_config->parent_device_name) != 0) { constchar *device_name = vdo_get_device_name(config->owning_target);
vdo_log_info("Updating backing device of %s from %s to %s", device_name,
vdo->device_config->parent_device_name,
config->parent_device_name);
}
list_del_init(&config->config_list); if (list_empty(&vdo->device_config_list)) { constchar *device_name;
/* This was the last config referencing the VDO. Free it. */ unsignedint instance = vdo->instance; struct registered_thread allocating_thread, instance_thread;
vdo_destroy(vdo_forget(vdo));
vdo_log_info("device '%s' stopped", device_name);
vdo_unregister_thread_device_id();
vdo_unregister_allocating_thread();
release_instance(instance);
} elseif (config == vdo->device_config) { /* * The VDO still references this config. Give it a reference to a config that isn't * being destroyed.
*/
vdo->device_config = list_first_entry(&vdo->device_config_list, struct device_config, config_list);
}
/** * write_super_block_for_suspend() - Update the VDO state and save the super block. * @completion: The admin completion
*/ staticvoid write_super_block_for_suspend(struct vdo_completion *completion)
{ struct vdo *vdo = completion->vdo;
switch (vdo_get_state(vdo)) { case VDO_DIRTY: case VDO_NEW:
vdo_set_state(vdo, VDO_CLEAN); break;
case VDO_CLEAN: case VDO_READ_ONLY_MODE: case VDO_FORCE_REBUILD: case VDO_RECOVERING: case VDO_REBUILD_FOR_UPGRADE: break;
case VDO_REPLAYING: default:
vdo_continue_completion(completion, UDS_BAD_STATE); return;
}
vdo_save_components(vdo, completion);
}
/** * suspend_callback() - Callback to initiate a suspend, registered in vdo_postsuspend(). * @completion: The sub-task completion.
*/ staticvoid suspend_callback(struct vdo_completion *completion)
{ struct vdo *vdo = completion->vdo; struct admin_state *state = &vdo->admin.state; int result;
assert_admin_phase_thread(vdo, __func__);
switch (advance_phase(vdo)) { case SUSPEND_PHASE_START: if (vdo_get_admin_state_code(state)->quiescent) { /* Already suspended */ break;
}
case SUSPEND_PHASE_PACKER: /* * If the VDO was already resumed from a prior suspend while read-only, some of the * components may not have been resumed. By setting a read-only error here, we * guarantee that the result of this suspend will be VDO_READ_ONLY and not * VDO_INVALID_ADMIN_STATE in that case.
*/ if (vdo_in_read_only_mode(vdo))
vdo_set_completion_result(completion, VDO_READ_ONLY);
case SUSPEND_PHASE_DATA_VIOS:
drain_data_vio_pool(vdo->data_vio_pool, completion); return;
case SUSPEND_PHASE_DEDUPE:
vdo_drain_hash_zones(vdo->hash_zones, completion); return;
case SUSPEND_PHASE_FLUSHES:
vdo_drain_flusher(vdo->flusher, completion); return;
case SUSPEND_PHASE_LOGICAL_ZONES: /* * Attempt to flush all I/O before completing post suspend work. We believe a * suspended device is expected to have persisted all data written before the * suspend, even if it hasn't been flushed yet.
*/
result = vdo_synchronous_flush(vdo); if (result != VDO_SUCCESS)
vdo_enter_read_only_mode(vdo, result);
case SUSPEND_PHASE_BLOCK_MAP:
vdo_drain_block_map(vdo->block_map, vdo_get_admin_state_code(state),
completion); return;
case SUSPEND_PHASE_JOURNAL:
vdo_drain_recovery_journal(vdo->recovery_journal,
vdo_get_admin_state_code(state), completion); return;
case SUSPEND_PHASE_DEPOT:
vdo_drain_slab_depot(vdo->depot, vdo_get_admin_state_code(state),
completion); return;
case SUSPEND_PHASE_READ_ONLY_WAIT:
vdo_wait_until_not_entering_read_only_mode(completion); return;
case SUSPEND_PHASE_WRITE_SUPER_BLOCK: if (vdo_is_state_suspending(state) || (completion->result != VDO_SUCCESS)) { /* If we didn't save the VDO or there was an error, we're done. */ break;
}
/* * It's important to note any error here does not actually stop device-mapper from * suspending the device. All this work is done post suspend.
*/
result = perform_admin_operation(vdo, SUSPEND_PHASE_START, suspend_callback,
suspend_callback, "suspend");
--> --------------------
--> maximum size reached
--> --------------------
Messung V0.5
¤ Dauer der Verarbeitung: 0.41 Sekunden
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.