// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2001 Sistina Software (UK) Limited. * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. * * This file is released under the GPL.
*/
/* * Similar to ceiling(log_size(n))
*/ staticunsignedint int_log(unsignedint n, unsignedint base)
{ int result = 0;
while (n > 1) {
n = dm_div_up(n, base);
result++;
}
return result;
}
/* * Calculate the index of the child node of the n'th node k'th key.
*/ staticinlineunsignedint get_child(unsignedint n, unsignedint k)
{ return (n * CHILDREN_PER_NODE) + k;
}
/* * Return the n'th node of level l from table t.
*/ staticinline sector_t *get_node(struct dm_table *t, unsignedint l, unsignedint n)
{ return t->index[l] + (n * KEYS_PER_NODE);
}
/* * Return the highest key that you could lookup from the n'th * node on level l of the btree.
*/ static sector_t high(struct dm_table *t, unsignedint l, unsignedint n)
{ for (; l < t->depth - 1; l++)
n = get_child(n, CHILDREN_PER_NODE - 1);
if (n >= t->counts[l]) return (sector_t) -1;
return get_node(t, l, n)[KEYS_PER_NODE - 1];
}
/* * Fills in a level of the btree based on the highs of the level * below it.
*/ staticint setup_btree_index(unsignedint l, struct dm_table *t)
{ unsignedint n, k;
sector_t *node;
for (n = 0U; n < t->counts[l]; n++) {
node = get_node(t, l, n);
for (k = 0U; k < KEYS_PER_NODE; k++)
node[k] = high(t, l + 1, get_child(n, k));
}
return 0;
}
/* * highs, and targets are managed as dynamic arrays during a * table load.
*/ staticint alloc_targets(struct dm_table *t, unsignedint num)
{
sector_t *n_highs; struct dm_target *n_targets;
/* * Allocate both the target array and offset array at once.
*/
n_highs = kvcalloc(num, sizeof(struct dm_target) + sizeof(sector_t),
GFP_KERNEL); if (!n_highs) return -ENOMEM;
void dm_table_destroy(struct dm_table *t)
{ if (!t) return;
/* free the indexes */ if (t->depth >= 2)
kvfree(t->index[t->depth - 2]);
/* free the targets */ for (unsignedint i = 0; i < t->num_targets; i++) { struct dm_target *ti = dm_table_get_target(t, i);
if (ti->type->dtr)
ti->type->dtr(ti);
dm_put_target_type(ti->type);
}
kvfree(t->highs);
/* free the device list */
free_devices(&t->devices, t->md);
dm_free_md_mempools(t->mempools);
dm_table_destroy_crypto_profile(t);
kfree(t);
}
/* * See if we've already got a device in the list.
*/ staticstruct dm_dev_internal *find_device(struct list_head *l, dev_t dev)
{ struct dm_dev_internal *dd;
list_for_each_entry(dd, l, list) if (dd->dm_dev->bdev->bd_dev == dev) return dd;
return NULL;
}
/* * If possible, this checks an area of a destination device is invalid.
*/ staticint device_area_is_invalid(struct dm_target *ti, struct dm_dev *dev,
sector_t start, sector_t len, void *data)
{ struct queue_limits *limits = data; struct block_device *bdev = dev->bdev;
sector_t dev_size = bdev_nr_sectors(bdev); unsignedshort logical_block_size_sectors =
limits->logical_block_size >> SECTOR_SHIFT;
if (!dev_size) return 0;
if ((start >= dev_size) || (start + len > dev_size)) {
DMERR("%s: %pg too small for target: start=%llu, len=%llu, dev_size=%llu",
dm_device_name(ti->table->md), bdev,
(unsignedlonglong)start,
(unsignedlonglong)len,
(unsignedlonglong)dev_size); return 1;
}
/* * If the target is mapped to zoned block device(s), check * that the zones are not partially mapped.
*/ if (bdev_is_zoned(bdev)) { unsignedint zone_sectors = bdev_zone_sectors(bdev);
if (!bdev_is_zone_aligned(bdev, start)) {
DMERR("%s: start=%llu not aligned to h/w zone size %u of %pg",
dm_device_name(ti->table->md),
(unsignedlonglong)start,
zone_sectors, bdev); return 1;
}
/* * Note: The last zone of a zoned block device may be smaller * than other zones. So for a target mapping the end of a * zoned block device with such a zone, len would not be zone * aligned. We do not allow such last smaller zone to be part * of the mapping here to ensure that mappings with multiple * devices do not end up with a smaller zone in the middle of * the sector range.
*/ if (!bdev_is_zone_aligned(bdev, len)) {
DMERR("%s: len=%llu not aligned to h/w zone size %u of %pg",
dm_device_name(ti->table->md),
(unsignedlonglong)len,
zone_sectors, bdev); return 1;
}
}
if (logical_block_size_sectors <= 1) return 0;
if (start & (logical_block_size_sectors - 1)) {
DMERR("%s: start=%llu not aligned to h/w logical block size %u of %pg",
dm_device_name(ti->table->md),
(unsignedlonglong)start,
limits->logical_block_size, bdev); return 1;
}
if (len & (logical_block_size_sectors - 1)) {
DMERR("%s: len=%llu not aligned to h/w logical block size %u of %pg",
dm_device_name(ti->table->md),
(unsignedlonglong)len,
limits->logical_block_size, bdev); return 1;
}
return 0;
}
/* * This upgrades the mode on an already open dm_dev, being * careful to leave things as they were if we fail to reopen the * device and not to touch the existing bdev field in case * it is accessed concurrently.
*/ staticint upgrade_mode(struct dm_dev_internal *dd, blk_mode_t new_mode, struct mapped_device *md)
{ int r; struct dm_dev *old_dev, *new_dev;
old_dev = dd->dm_dev;
r = dm_get_table_device(md, dd->dm_dev->bdev->bd_dev,
dd->dm_dev->mode | new_mode, &new_dev); if (r) return r;
/* * Note: the __ref annotation is because this function can call the __init * marked early_lookup_bdev when called during early boot code from dm-init.c.
*/ int __ref dm_devt_from_path(constchar *path, dev_t *dev_p)
{ int r;
dev_t dev; unsignedint major, minor; char dummy;
if (sscanf(path, "%u:%u%c", &major, &minor, &dummy) == 2) { /* Extract the major/minor numbers */
dev = MKDEV(major, minor); if (MAJOR(dev) != major || MINOR(dev) != minor) return -EOVERFLOW;
} else {
r = lookup_bdev(path, &dev); #ifndef MODULE if (r && system_state < SYSTEM_RUNNING)
r = early_lookup_bdev(path, &dev); #endif if (r) return r;
}
*dev_p = dev; return 0;
}
EXPORT_SYMBOL(dm_devt_from_path);
/* * Add a device to the list, or just increment the usage count if * it's already present.
*/ int dm_get_device(struct dm_target *ti, constchar *path, blk_mode_t mode, struct dm_dev **result)
{ int r;
dev_t dev; struct dm_dev_internal *dd; struct dm_table *t = ti->table;
BUG_ON(!t);
r = dm_devt_from_path(path, &dev); if (r) return r;
if (dev == disk_devt(t->md->disk)) return -EINVAL;
down_write(&t->devices_lock);
dd = find_device(&t->devices, dev); if (!dd) {
dd = kmalloc(sizeof(*dd), GFP_KERNEL); if (!dd) {
r = -ENOMEM; goto unlock_ret_r;
}
r = dm_get_table_device(t->md, dev, mode, &dd->dm_dev); if (r) {
kfree(dd); goto unlock_ret_r;
}
if (unlikely(!q)) {
DMWARN("%s: Cannot set limits for nonexistent device %pg",
dm_device_name(ti->table->md), bdev); return 0;
}
mutex_lock(&q->limits_lock); /* * BLK_FEAT_ATOMIC_WRITES is not inherited from the bottom device in * blk_stack_limits(), so do it manually.
*/
limits->features |= (q->limits.features & BLK_FEAT_ATOMIC_WRITES);
/* * Only stack the integrity profile if the target doesn't have native * integrity support.
*/ if (!dm_target_has_integrity(ti->type))
queue_limits_stack_integrity_bdev(limits, bdev);
mutex_unlock(&q->limits_lock); return 0;
}
/* * Decrement a device's use count and remove it if necessary.
*/ void dm_put_device(struct dm_target *ti, struct dm_dev *d)
{ int found = 0; struct dm_table *t = ti->table; struct list_head *devices = &t->devices; struct dm_dev_internal *dd;
down_write(&t->devices_lock);
list_for_each_entry(dd, devices, list) { if (dd->dm_dev == d) {
found = 1; break;
}
} if (!found) {
DMERR("%s: device %s not in table devices list",
dm_device_name(t->md), d->name); goto unlock_ret;
} if (refcount_dec_and_test(&dd->count)) {
dm_put_table_device(t->md, d);
list_del(&dd->list);
kfree(dd);
}
/* * Checks to see if the target joins onto the end of the table.
*/ staticint adjoin(struct dm_table *t, struct dm_target *ti)
{ struct dm_target *prev;
/* * Used to dynamically allocate the arg array. * * We do first allocation with GFP_NOIO because dm-mpath and dm-thin must * process messages even if some device is suspended. These messages have a * small fixed number of arguments. * * On the other hand, dm-switch needs to process bulk data using messages and * excessive use of GFP_NOIO could cause trouble.
*/ staticchar **realloc_argv(unsignedint *size, char **old_argv)
{ char **argv; unsignedint new_size;
gfp_t gfp;
/* * Destructively splits up the argument list to pass to ctr.
*/ int dm_split_args(int *argc, char ***argvp, char *input)
{ char *start, *end = input, *out, **argv = NULL; unsignedint array_size = 0;
*argc = 0;
if (!input) {
*argvp = NULL; return 0;
}
argv = realloc_argv(&array_size, argv); if (!argv) return -ENOMEM;
while (1) { /* Skip whitespace */
start = skip_spaces(end);
if (!*start) break; /* success, we hit the end */
/* 'out' is used to remove any back-quotes */
end = out = start; while (*end) { /* Everything apart from '\0' can be quoted */ if (*end == '\\' && *(end + 1)) {
*out++ = *(end + 1);
end += 2; continue;
}
if (isspace(*end)) break; /* end of token */
*out++ = *end++;
}
/* have we already filled the array ? */ if ((*argc + 1) > array_size) {
argv = realloc_argv(&array_size, argv); if (!argv) return -ENOMEM;
}
/* we know this is whitespace */ if (*end)
end++;
/* terminate the string and put it in the array */
*out = '\0';
argv[*argc] = start;
(*argc)++;
}
/* * Impose necessary and sufficient conditions on a devices's table such * that any incoming bio which respects its logical_block_size can be * processed successfully. If it falls across the boundary between * two or more targets, the size of each piece it gets split into must * be compatible with the logical_block_size of the target processing it.
*/ staticint validate_hardware_logical_block_alignment(struct dm_table *t, struct queue_limits *limits)
{ /* * This function uses arithmetic modulo the logical_block_size * (in units of 512-byte sectors).
*/ unsignedshort device_logical_block_size_sects =
limits->logical_block_size >> SECTOR_SHIFT;
/* * Offset of the start of the next table entry, mod logical_block_size.
*/ unsignedshort next_target_start = 0;
/* * Given an aligned bio that extends beyond the end of a * target, how many sectors must the next target handle?
*/ unsignedshort remaining = 0;
/* * Check each entry in the table in turn.
*/ for (i = 0; i < t->num_targets; i++) {
ti = dm_table_get_target(t, i);
dm_set_stacking_limits(&ti_limits);
/* combine all target devices' limits */ if (ti->type->iterate_devices)
ti->type->iterate_devices(ti, dm_set_device_limits,
&ti_limits);
/* * If the remaining sectors fall entirely within this * table entry are they compatible with its logical_block_size?
*/ if (remaining < ti->len &&
remaining & ((ti_limits.logical_block_size >>
SECTOR_SHIFT) - 1)) break; /* Error */
if (dm_target_needs_singleton(ti->type)) { if (t->num_targets) {
ti->error = "singleton target type must appear alone in table"; goto bad;
}
t->singleton = true;
}
if (dm_target_always_writeable(ti->type) &&
!(t->mode & BLK_OPEN_WRITE)) {
ti->error = "target type may not be included in a read-only table"; goto bad;
}
if (t->immutable_target_type) { if (t->immutable_target_type != ti->type) {
ti->error = "immutable target type cannot be mixed with other target types"; goto bad;
}
} elseif (dm_target_is_immutable(ti->type)) { if (t->num_targets) {
ti->error = "immutable target type cannot be mixed with other target types"; goto bad;
}
t->immutable_target_type = ti->type;
}
if (!ti->num_discard_bios && ti->discards_supported)
DMWARN("%s: %s: ignoring discards_supported because num_discard_bios is zero.",
dm_device_name(t->md), type);
if (ti->limit_swap_bios && !static_key_enabled(&swap_bios_enabled.key))
static_branch_enable(&swap_bios_enabled);
if (!ti->flush_bypasses_map)
t->flush_bypasses_map = false;
if (t->type != DM_TYPE_NONE) { /* target already set the table's type */ if (t->type == DM_TYPE_BIO_BASED) { /* possibly upgrade to a variant of bio-based */ goto verify_bio_based;
}
BUG_ON(t->type == DM_TYPE_DAX_BIO_BASED); goto verify_rq_based;
}
for (unsignedint i = 0; i < t->num_targets; i++) {
ti = dm_table_get_target(t, i); if (dm_target_hybrid(ti))
hybrid = 1; elseif (dm_target_request_based(ti))
request_based = 1; else
bio_based = 1;
if (bio_based && request_based) {
DMERR("Inconsistent table: different target types can't be mixed up"); return -EINVAL;
}
}
if (hybrid && !bio_based && !request_based) { /* * The targets can work either way. * Determine the type from the live device. * Default to bio-based if device is new.
*/ if (__table_type_request_based(live_md_type))
request_based = 1; else
bio_based = 1;
}
if (bio_based) {
verify_bio_based: /* We must use this table as bio-based */
t->type = DM_TYPE_BIO_BASED; if (dm_table_supports_dax(t, device_not_dax_capable) ||
(list_empty(devices) && live_md_type == DM_TYPE_DAX_BIO_BASED)) {
t->type = DM_TYPE_DAX_BIO_BASED;
} return 0;
}
BUG_ON(!request_based); /* No targets in this table */
t->type = DM_TYPE_REQUEST_BASED;
verify_rq_based: /* * Request-based dm supports only tables that have a single target now. * To support multiple targets, request splitting support is needed, * and that needs lots of changes in the block-layer. * (e.g. request completion process for partial completion.)
*/ if (t->num_targets > 1) {
DMERR("request-based DM doesn't support multiple targets"); return -EINVAL;
}
if (list_empty(devices)) { int srcu_idx; struct dm_table *live_table = dm_get_live_table(t->md, &srcu_idx);
/* inherit live table's type */ if (live_table)
t->type = live_table->type;
dm_put_live_table(t->md, srcu_idx); return 0;
}
ti = dm_table_get_immutable_target(t); if (!ti) {
DMERR("table load rejected: immutable target is required"); return -EINVAL;
} elseif (ti->max_io_len) {
DMERR("table load rejected: immutable target that splits IO is not supported"); return -EINVAL;
}
/* Non-request-stackable devices can't be used for request-based dm */ if (!ti->type->iterate_devices ||
ti->type->iterate_devices(ti, device_is_not_rq_stackable, NULL)) {
DMERR("table load rejected: including non-request-stackable devices"); return -EINVAL;
}
staticint setup_indexes(struct dm_table *t)
{ int i; unsignedint total = 0;
sector_t *indexes;
/* allocate the space for *all* the indexes */ for (i = t->depth - 2; i >= 0; i--) {
t->counts[i] = dm_div_up(t->counts[i + 1], CHILDREN_PER_NODE);
total += t->counts[i];
}
indexes = kvcalloc(total, NODE_SIZE, GFP_KERNEL); if (!indexes) return -ENOMEM;
/* set up internal nodes, bottom-up */ for (i = t->depth - 2; i >= 0; i--) {
t->index[i] = indexes;
indexes += (KEYS_PER_NODE * t->counts[i]);
setup_btree_index(i, t);
}
return 0;
}
/* * Builds the btree to index the map.
*/ staticint dm_table_build_index(struct dm_table *t)
{ int r = 0; unsignedint leaf_nodes;
/* how many indexes will the btree have ? */
leaf_nodes = dm_div_up(t->num_targets, KEYS_PER_NODE);
t->depth = 1 + int_log(leaf_nodes, CHILDREN_PER_NODE);
/* leaf layer has already been set up */
t->counts[t->depth - 1] = leaf_nodes;
t->index[t->depth - 1] = t->highs;
/* * When an inline encryption key is evicted from a device-mapper device, evict * it from all the underlying devices.
*/ staticint dm_keyslot_evict(struct blk_crypto_profile *profile, conststruct blk_crypto_key *key, unsignedint slot)
{ struct mapped_device *md =
container_of(profile, struct dm_crypto_profile, profile)->md; struct dm_table *t; int srcu_idx;
t = dm_get_live_table(md, &srcu_idx); if (!t) goto put_live_table;
for (unsignedint i = 0; i < t->num_targets; i++) { struct dm_target *ti = dm_table_get_target(t, i);
if (!ti->type->iterate_devices) continue;
ti->type->iterate_devices(ti, dm_keyslot_evict_callback,
(void *)key);
}
t = dm_get_live_table(md, &srcu_idx); if (!t) goto out;
/* * blk-crypto currently has no support for multiple incompatible * implementations of wrapped inline crypto keys on a single system. * It was already checked earlier that support for wrapped keys was * declared on all underlying devices. Thus, all the underlying devices * should support all wrapped key operations and they should behave * identically, i.e. work with the same keys. So, just executing the * operation on the first device on which it works suffices for now.
*/ for (i = 0; i < t->num_targets; i++) {
ti = dm_table_get_target(t, i); if (!ti->type->iterate_devices) continue;
ti->type->iterate_devices(ti, dm_wrappedkey_op_callback, args); if (!args->err) break;
}
out:
dm_put_live_table(md, srcu_idx); return args->err;
}
/* * Constructs and initializes t->crypto_profile with a crypto profile that * represents the common set of crypto capabilities of the devices described by * the dm_table. However, if the constructed crypto profile doesn't support all * crypto capabilities that are supported by the current mapped_device, it * returns an error instead, since we don't support removing crypto capabilities * on table changes. Finally, if the constructed crypto profile is "empty" (has * no crypto capabilities at all), it just sets t->crypto_profile to NULL.
*/ staticint dm_table_construct_crypto_profile(struct dm_table *t)
{ struct dm_crypto_profile *dmcp; struct blk_crypto_profile *profile; unsignedint i; bool empty_profile = true;
if (t->md->queue &&
!blk_crypto_has_capabilities(profile,
t->md->queue->crypto_profile)) {
DMERR("Inline encryption capabilities of new DM table were more restrictive than the old table's. This is not supported!");
dm_destroy_crypto_profile(profile); return -EINVAL;
}
/* * If the new profile doesn't actually support any crypto capabilities, * we may as well represent it with a NULL profile.
*/ for (i = 0; i < ARRAY_SIZE(profile->modes_supported); i++) { if (profile->modes_supported[i]) {
empty_profile = false; break;
}
}
if (empty_profile) {
dm_destroy_crypto_profile(profile);
profile = NULL;
}
/* * t->crypto_profile is only set temporarily while the table is being * set up, and it gets set to NULL after the profile has been * transferred to the request_queue.
*/
t->crypto_profile = profile;
/* * Prepares the table for use by building the indices, * setting the type, and allocating mempools.
*/ int dm_table_complete(struct dm_table *t)
{ int r;
r = dm_table_determine_type(t); if (r) {
DMERR("unable to determine table type"); return r;
}
r = dm_table_build_index(t); if (r) {
DMERR("unable to build btrees"); return r;
}
r = dm_table_construct_crypto_profile(t); if (r) {
DMERR("could not construct crypto profile."); return r;
}
r = dm_table_alloc_md_mempools(t, t->md); if (r)
DMERR("unable to allocate mempools");
/* * Search the btree for the correct target. * * Caller should check returned pointer for NULL * to trap I/O beyond end of device.
*/ struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector)
{ unsignedint l, n = 0, k = 0;
sector_t *node;
if (unlikely(sector >= dm_table_get_size(t))) return NULL;
for (l = 0; l < t->depth; l++) {
n = get_child(n, k);
node = get_node(t, l, n);
for (k = 0; k < KEYS_PER_NODE; k++) if (node[k] >= sector) break;
}
return &t->targets[(KEYS_PER_NODE * n) + k];
}
/* * type->iterate_devices() should be called when the sanity check needs to * iterate and check all underlying data devices. iterate_devices() will * iterate all underlying data devices until it encounters a non-zero return * code, returned by whether the input iterate_devices_callout_fn, or * iterate_devices() itself internally. * * For some target type (e.g. dm-stripe), one call of iterate_devices() may * iterate multiple underlying devices internally, in which case a non-zero * return code returned by iterate_devices_callout_fn will stop the iteration * in advance. * * Cases requiring _any_ underlying device supporting some kind of attribute, * should use the iteration structure like dm_table_any_dev_attr(), or call * it directly. @func should handle semantics of positive examples, e.g. * capable of something. * * Cases requiring _all_ underlying devices supporting some kind of attribute, * should use the iteration structure like dm_table_supports_nowait() or * dm_table_supports_discards(). Or introduce dm_table_all_devs_attr() that * uses an @anti_func that handle semantics of counter examples, e.g. not * capable of something. So: return !dm_table_any_dev_attr(t, anti_func, data);
*/ staticbool dm_table_any_dev_attr(struct dm_table *t,
iterate_devices_callout_fn func, void *data)
{ for (unsignedint i = 0; i < t->num_targets; i++) { struct dm_target *ti = dm_table_get_target(t, i);
if (ti->type->iterate_devices &&
ti->type->iterate_devices(ti, func, data)) returntrue;
}
/* * Check whether a table has no data devices attached using each * target's iterate_devices method. * Returns false if the result is unknown because a target doesn't * support iterate_devices.
*/ bool dm_table_has_no_data_devices(struct dm_table *t)
{ for (unsignedint i = 0; i < t->num_targets; i++) { struct dm_target *ti = dm_table_get_target(t, i); unsignedint num_devices = 0;
if (!ti->type->iterate_devices) returnfalse;
ti->type->iterate_devices(ti, count_device, &num_devices); if (num_devices) returnfalse;
}
returntrue;
}
bool dm_table_is_wildcard(struct dm_table *t)
{ for (unsignedint i = 0; i < t->num_targets; i++) { struct dm_target *ti = dm_table_get_target(t, i);
if (!dm_target_is_wildcard(ti->type)) returnfalse;
}
/* * Check the device zoned model based on the target feature flag. If the target * has the DM_TARGET_ZONED_HM feature flag set, host-managed zoned devices are * also accepted but all devices must have the same zoned model. If the target * has the DM_TARGET_MIXED_ZONED_MODEL feature set, the devices can have any * zoned model with all zoned devices having the same zone size.
*/ staticbool dm_table_supports_zoned(struct dm_table *t, bool zoned)
{ for (unsignedint i = 0; i < t->num_targets; i++) { struct dm_target *ti = dm_table_get_target(t, i);
/* * For the wildcard target (dm-error), if we do not have a * backing device, we must always return false. If we have a * backing device, the result must depend on checking zoned * model, like for any other target. So for this, check directly * if the target backing device is zoned as we get "false" when * dm-error was set without a backing device.
*/ if (dm_target_is_wildcard(ti->type) &&
!ti->type->iterate_devices(ti, device_is_zoned_model, NULL)) returnfalse;
if (dm_target_supports_zoned_hm(ti->type)) { if (!ti->type->iterate_devices ||
ti->type->iterate_devices(ti, device_not_zoned,
&zoned)) returnfalse;
} elseif (!dm_target_supports_mixed_zoned_model(ti->type)) { if (zoned) returnfalse;
}
}
if (!bdev_is_zoned(dev->bdev)) return 0; return bdev_zone_sectors(dev->bdev) != *zone_sectors;
}
/* * Check consistency of zoned model and zone sectors across all targets. For * zone sectors, if the destination device is a zoned block device, it shall * have the specified zone_sectors.
*/ staticint validate_hardware_zoned(struct dm_table *t, bool zoned, unsignedint zone_sectors)
{ if (!zoned) return 0;
if (!dm_table_supports_zoned(t, zoned)) {
DMERR("%s: zoned model is not consistent across all devices",
dm_device_name(t->md)); return -EINVAL;
}
/* Check zone size validity and compatibility */ if (!zone_sectors || !is_power_of_2(zone_sectors)) return -EINVAL;
if (dm_table_any_dev_attr(t, device_not_matches_zone_sectors, &zone_sectors)) {
DMERR("%s: zone sectors is not consistent across all zoned devices",
dm_device_name(t->md)); return -EINVAL;
}
return 0;
}
/* * Establish the new table's queue_limits and validate them.
*/ int dm_calculate_queue_limits(struct dm_table *t, struct queue_limits *limits)
{ struct queue_limits ti_limits; unsignedint zone_sectors = 0; bool zoned = false;
dm_set_stacking_limits(limits);
t->integrity_supported = true; for (unsignedint i = 0; i < t->num_targets; i++) { struct dm_target *ti = dm_table_get_target(t, i);
if (!dm_target_passes_integrity(ti->type))
t->integrity_supported = false;
}
for (unsignedint i = 0; i < t->num_targets; i++) { struct dm_target *ti = dm_table_get_target(t, i);
dm_set_stacking_limits(&ti_limits);
if (!ti->type->iterate_devices) { /* Set I/O hints portion of queue limits */ if (ti->type->io_hints)
ti->type->io_hints(ti, &ti_limits); goto combine_limits;
}
/* * Combine queue limits of all the devices this target uses.
*/
ti->type->iterate_devices(ti, dm_set_device_limits,
&ti_limits);
if (!zoned && (ti_limits.features & BLK_FEAT_ZONED)) { /* * After stacking all limits, validate all devices * in table support this zoned model and zone sectors.
*/
zoned = (ti_limits.features & BLK_FEAT_ZONED);
zone_sectors = ti_limits.chunk_sectors;
}
/* Set I/O hints portion of queue limits */ if (ti->type->io_hints)
ti->type->io_hints(ti, &ti_limits);
/* * Check each device area is consistent with the target's * overall queue limits.
*/ if (ti->type->iterate_devices(ti, device_area_is_invalid,
&ti_limits)) return -EINVAL;
combine_limits: /* * Merge this target's queue limits into the overall limits * for the table.
*/ if (blk_stack_limits(limits, &ti_limits, 0) < 0)
DMWARN("%s: adding target device (start sect %llu len %llu) " "caused an alignment inconsistency",
dm_device_name(t->md),
(unsignedlonglong) ti->begin,
(unsignedlonglong) ti->len);
if (t->integrity_supported ||
dm_target_has_integrity(ti->type)) { if (!queue_limits_stack_integrity(limits, &ti_limits)) {
DMWARN("%s: adding target device (start sect %llu len %llu) " "disabled integrity support due to incompatibility",
dm_device_name(t->md),
(unsignedlonglong) ti->begin,
(unsignedlonglong) ti->len);
t->integrity_supported = false;
}
}
}
/* * Verify that the zoned model and zone sectors, as determined before * any .io_hints override, are the same across all devices in the table. * - this is especially relevant if .io_hints is emulating a disk-managed * zoned model on host-managed zoned block devices. * BUT...
*/ if (limits->features & BLK_FEAT_ZONED) { /* * ...IF the above limits stacking determined a zoned model * validate that all of the table's devices conform to it.
*/
zoned = limits->features & BLK_FEAT_ZONED;
zone_sectors = limits->chunk_sectors;
} if (validate_hardware_zoned(t, zoned, zone_sectors)) return -EINVAL;
/* * Check if a target requires flush support even if none of the underlying * devices need it (e.g. to persist target-specific metadata).
*/ staticbool dm_table_supports_flush(struct dm_table *t)
{ for (unsignedint i = 0; i < t->num_targets; i++) { struct dm_target *ti = dm_table_get_target(t, i);
if (ti->num_flush_bios && ti->flush_supported) returntrue;
}
staticbool dm_table_supports_discards(struct dm_table *t)
{ for (unsignedint i = 0; i < t->num_targets; i++) { struct dm_target *ti = dm_table_get_target(t, i);
if (!ti->num_discard_bios) returnfalse;
/* * Either the target provides discard support (as implied by setting * 'discards_supported') or it relies on _all_ data devices having * discard support.
*/ if (!ti->discards_supported &&
(!ti->type->iterate_devices ||
ti->type->iterate_devices(ti, device_not_discard_capable, NULL))) returnfalse;
}
int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, struct queue_limits *limits)
{ int r; struct queue_limits old_limits;
if (!dm_table_supports_nowait(t))
limits->features &= ~BLK_FEAT_NOWAIT;
/* * The current polling impementation does not support request based * stacking.
*/ if (!__table_type_bio_based(t->type))
limits->features &= ~BLK_FEAT_POLL;
if (!dm_table_supports_write_zeroes(t)) {
limits->max_write_zeroes_sectors = 0;
limits->max_hw_wzeroes_unmap_sectors = 0;
}
if (!dm_table_supports_secure_erase(t))
limits->max_secure_erase_sectors = 0;
if (dm_table_supports_flush(t))
limits->features |= BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA;
if (dm_table_supports_dax(t, device_not_dax_capable))
limits->features |= BLK_FEAT_DAX; else
limits->features &= ~BLK_FEAT_DAX;
/* For a zoned table, setup the zone related queue attributes. */ if (IS_ENABLED(CONFIG_BLK_DEV_ZONED)) { if (limits->features & BLK_FEAT_ZONED) {
r = dm_set_zones_restrictions(t, q, limits); if (r) return r;
} elseif (dm_has_zone_plugs(t->md)) {
DMWARN("%s: device has zone write plug resources. " "Cannot switch to non-zoned table.",
dm_device_name(t->md)); return -EINVAL;
}
}
if (dm_table_supports_atomic_writes(t))
limits->features |= BLK_FEAT_ATOMIC_WRITES;
old_limits = queue_limits_start_update(q);
r = queue_limits_commit_update(q, limits); if (r) return r;
/* * Now that the limits are set, check the zones mapped by the table * and setup the resources for zone append emulation if necessary.
*/ if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) &&
(limits->features & BLK_FEAT_ZONED)) {
r = dm_revalidate_zones(t, q); if (r) {
queue_limits_set(q, &old_limits); return r;
}
}
if (IS_ENABLED(CONFIG_BLK_DEV_ZONED))
dm_finalize_zone_settings(t, limits);
if (dm_table_supports_dax(t, device_not_dax_synchronous_capable))
set_dax_synchronous(t->md->dax_dev);
if (dm_table_any_dev_attr(t, device_dax_write_cache_enabled, NULL))
dax_write_cache(t->md->dax_dev, true);
for (unsignedint i = 0; i < t->num_targets; i++) { struct dm_target *ti = dm_table_get_target(t, i);
switch (mode) { case PRESUSPEND: if (ti->type->presuspend)
ti->type->presuspend(ti); break; case PRESUSPEND_UNDO: if (ti->type->presuspend_undo)
ti->type->presuspend_undo(ti); break; case POSTSUSPEND: if (ti->type->postsuspend)
ti->type->postsuspend(ti); break;
}
}
}
void dm_table_presuspend_targets(struct dm_table *t)
{ if (!t) return;
suspend_targets(t, PRESUSPEND);
}
void dm_table_presuspend_undo_targets(struct dm_table *t)
{ if (!t) return;
suspend_targets(t, PRESUSPEND_UNDO);
}
void dm_table_postsuspend_targets(struct dm_table *t)
{ if (!t) return;
suspend_targets(t, POSTSUSPEND);
}
int dm_table_resume_targets(struct dm_table *t)
{ unsignedint i; int r = 0;
lockdep_assert_held(&t->md->suspend_lock);
for (i = 0; i < t->num_targets; i++) { struct dm_target *ti = dm_table_get_target(t, i);
if (!ti->type->preresume) continue;
r = ti->type->preresume(ti); if (r) {
DMERR("%s: %s: preresume failed, error = %d",
dm_device_name(t->md), ti->type->name, r); return r;
}
}
for (i = 0; i < t->num_targets; i++) { struct dm_target *ti = dm_table_get_target(t, i);
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.