// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2003 Sistina Software Limited. * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. * * This file is released under the GPL.
*/
/* * Paths are grouped into Priority Groups and numbered from 1 upwards. * Each has a path selector which controls which path gets used.
*/ struct priority_group { struct list_head list;
struct pgpath *current_pgpath; struct priority_group *current_pg; struct priority_group *next_pg; /* Switch to this PG if set */ struct priority_group *last_probed_pg;
atomic_t nr_valid_paths; /* Total number of usable paths */ unsignedint nr_priority_groups; struct list_head priority_groups;
constchar *hw_handler_name; char *hw_handler_params;
wait_queue_head_t pg_init_wait; /* Wait for pg_init completion */
wait_queue_head_t probe_wait; /* Wait for probing paths */ unsignedint pg_init_retries; /* Number of times to retry pg_init */ unsignedint pg_init_delay_msecs; /* Number of msecs before pg_init retry */
atomic_t pg_init_in_progress; /* Only one pg_init allowed at once */
atomic_t pg_init_count; /* Number of times pg_init called */
/* *----------------------------------------------- * Multipath state flags. *-----------------------------------------------
*/ #define MPATHF_QUEUE_IO 0 /* Must we queue all I/O? */ #define MPATHF_QUEUE_IF_NO_PATH 1 /* Queue I/O if last path fails? */ #define MPATHF_SAVED_QUEUE_IF_NO_PATH 2 /* Saved state during suspension */ #define MPATHF_RETAIN_ATTACHED_HW_HANDLER 3 /* If there's already a hw_handler present, don't change it. */ #define MPATHF_PG_INIT_DISABLED 4 /* pg_init is not currently allowed */ #define MPATHF_PG_INIT_REQUIRED 5 /* pg_init needs calling? */ #define MPATHF_PG_INIT_DELAY_RETRY 6 /* Delay pg_init retry? */ #define MPATHF_DELAY_PG_SWITCH 7 /* Delay switching pg if it still has paths */ #define MPATHF_NEED_PG_SWITCH 8 /* Need to switch pgs after the delay has ended */
staticint alloc_multipath_stage2(struct dm_target *ti, struct multipath *m)
{ if (m->queue_mode == DM_TYPE_NONE) {
m->queue_mode = DM_TYPE_REQUEST_BASED;
} elseif (m->queue_mode == DM_TYPE_BIO_BASED) {
INIT_WORK(&m->process_queued_bios, process_queued_bios); /* * bio-based doesn't support any direct scsi_dh management; * it just discovers if a scsi_dh is attached.
*/
set_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags);
}
dm_table_set_type(ti->table, m->queue_mode);
/* * Init fields that are only used when a scsi_dh is attached * - must do this unconditionally (really doesn't hurt non-SCSI uses)
*/
set_bit(MPATHF_QUEUE_IO, &m->flags);
atomic_set(&m->pg_init_in_progress, 0);
atomic_set(&m->pg_init_count, 0);
m->pg_init_delay_msecs = DM_PG_INIT_DELAY_DEFAULT;
init_waitqueue_head(&m->pg_init_wait);
init_waitqueue_head(&m->probe_wait);
/* Must we initialise the PG first, and queue I/O till it's ready? */ if (m->hw_handler_name) {
set_bit(MPATHF_PG_INIT_REQUIRED, &m->flags);
set_bit(MPATHF_QUEUE_IO, &m->flags);
} else {
clear_bit(MPATHF_PG_INIT_REQUIRED, &m->flags);
clear_bit(MPATHF_QUEUE_IO, &m->flags);
}
/* Don't change PG until it has no remaining paths */
pg = READ_ONCE(m->current_pg); if (pg) {
pgpath = choose_path_in_pg(m, pg, nr_bytes); if (!IS_ERR_OR_NULL(pgpath)) return pgpath;
}
/* Were we instructed to switch PG? */ if (READ_ONCE(m->next_pg)) {
spin_lock_irqsave(&m->lock, flags);
pg = m->next_pg; if (!pg) {
spin_unlock_irqrestore(&m->lock, flags); goto check_all_pgs;
}
m->next_pg = NULL;
spin_unlock_irqrestore(&m->lock, flags);
pgpath = choose_path_in_pg(m, pg, nr_bytes); if (!IS_ERR_OR_NULL(pgpath)) return pgpath;
}
check_all_pgs: /* * Loop through priority groups until we find a valid path. * First time we skip PGs marked 'bypassed'. * Second time we only try the ones we skipped, but set * pg_init_delay_retry so we do not hammer controllers.
*/ do {
list_for_each_entry(pg, &m->priority_groups, list) { if (pg->bypassed == !!bypassed) continue;
pgpath = choose_path_in_pg(m, pg, nr_bytes); if (!IS_ERR_OR_NULL(pgpath)) { if (!bypassed) {
spin_lock_irqsave(&m->lock, flags);
set_bit(MPATHF_PG_INIT_DELAY_RETRY, &m->flags);
spin_unlock_irqrestore(&m->lock, flags);
} return pgpath;
}
}
} while (bypassed--);
/* * dm_report_EIO() is a macro instead of a function to make pr_debug_ratelimited() * report the function name and line number of the function from which * it has been invoked.
*/ #define dm_report_EIO(m) \
DMDEBUG_LIMIT("%s: returning EIO; QIFNP = %d; SQIFNP = %d; DNFS = %d", \
dm_table_device_name((m)->ti->table), \
test_bit(MPATHF_QUEUE_IF_NO_PATH, &(m)->flags), \
test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &(m)->flags), \
dm_noflush_suspending((m)->ti))
/* * Check whether bios must be queued in the device-mapper core rather * than here in the target.
*/ staticbool __must_push_back(struct multipath *m)
{ return dm_noflush_suspending(m->ti);
}
/* Do we need to select a new pgpath? */
pgpath = READ_ONCE(m->current_pgpath); if (!pgpath || !mpath_double_check_test_bit(MPATHF_QUEUE_IO, m))
pgpath = choose_pgpath(m, nr_bytes);
if (!pgpath) { if (must_push_back_rq(m)) return DM_MAPIO_DELAY_REQUEUE;
dm_report_EIO(m); /* Failed */ return DM_MAPIO_KILL;
} elseif (mpath_double_check_test_bit(MPATHF_QUEUE_IO, m) ||
mpath_double_check_test_bit(MPATHF_PG_INIT_REQUIRED, m)) {
pg_init_all_paths(m); return DM_MAPIO_DELAY_REQUEUE;
}
/* * blk-mq's SCHED_RESTART can cover this requeue, so we * needn't deal with it by DELAY_REQUEUE. More importantly, * we have to return DM_MAPIO_REQUEUE so that blk-mq can * get the queue busy feedback (via BLK_STS_RESOURCE), * otherwise I/O merging can suffer.
*/ return DM_MAPIO_REQUEUE;
}
clone->bio = clone->biotail = NULL;
clone->cmd_flags |= REQ_FAILFAST_TRANSPORT;
*__clone = clone;
if (pgpath->pg->ps.type->start_io)
pgpath->pg->ps.type->start_io(&pgpath->pg->ps,
&pgpath->path,
nr_bytes); return DM_MAPIO_REMAPPED;
}
staticvoid multipath_release_clone(struct request *clone, union map_info *map_context)
{ if (unlikely(map_context)) { /* * non-NULL map_context means caller is still map * method; must undo multipath_clone_and_map()
*/ struct dm_mpath_io *mpio = get_mpio(map_context); struct pgpath *pgpath = mpio->pgpath;
if (pgpath && pgpath->pg->ps.type->end_io)
pgpath->pg->ps.type->end_io(&pgpath->pg->ps,
&pgpath->path,
mpio->nr_bytes,
clone->io_start_time_ns);
}
blk_mq_free_request(clone);
}
/* * Map cloned bios (bio-based multipath)
*/
staticvoid __multipath_queue_bio(struct multipath *m, struct bio *bio)
{ /* Queue for the daemon to resubmit */
bio_list_add(&m->queued_bios, bio); if (!test_bit(MPATHF_QUEUE_IO, &m->flags))
queue_work(kmultipathd, &m->process_queued_bios);
}
staticvoid multipath_queue_bio(struct multipath *m, struct bio *bio)
{ unsignedlong flags;
/* Do we need to select a new pgpath? */
pgpath = READ_ONCE(m->current_pgpath); if (!pgpath || !mpath_double_check_test_bit(MPATHF_QUEUE_IO, m))
pgpath = choose_pgpath(m, bio->bi_iter.bi_size);
if (!pgpath) {
spin_lock_irq(&m->lock); if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) {
__multipath_queue_bio(m, bio);
pgpath = ERR_PTR(-EAGAIN);
}
spin_unlock_irq(&m->lock);
dm_bio_restore(get_bio_details_from_mpio(mpio), bio);
r = __multipath_map_bio(m, bio, mpio); switch (r) { case DM_MAPIO_KILL:
bio->bi_status = BLK_STS_IOERR;
bio_endio(bio); break; case DM_MAPIO_REQUEUE:
bio->bi_status = BLK_STS_DM_REQUEUE;
bio_endio(bio); break; case DM_MAPIO_REMAPPED:
submit_bio_noacct(bio); break; case DM_MAPIO_SUBMITTED: break; default:
WARN_ONCE(true, "__multipath_map_bio() returned %d\n", r);
}
}
blk_finish_plug(&plug);
}
/* * If we run out of usable paths, should we queue I/O or error it?
*/ staticint queue_if_no_path(struct multipath *m, bool f_queue_if_no_path, bool save_old_value, constchar *caller)
{ unsignedlong flags; bool queue_if_no_path_bit, saved_queue_if_no_path_bit; constchar *dm_dev_name = dm_table_device_name(m->ti->table);
if (save_old_value) { if (unlikely(!queue_if_no_path_bit && saved_queue_if_no_path_bit)) {
DMERR("%s: QIFNP disabled but saved as enabled, saving again loses state, not saving!",
dm_dev_name);
} else
assign_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags, queue_if_no_path_bit);
} elseif (!f_queue_if_no_path && saved_queue_if_no_path_bit) { /* due to "fail_if_no_path" message, need to honor it. */
clear_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags);
}
assign_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags, f_queue_if_no_path);
if (!f_queue_if_no_path) {
dm_table_run_md_queue_async(m->ti->table);
process_queued_io_list(m);
}
return 0;
}
/* * If the queue_if_no_path timeout fires, turn off queue_if_no_path and * process any queued I/O.
*/ staticvoid queue_if_no_path_timeout_work(struct timer_list *t)
{ struct multipath *m = timer_container_of(m, t, nopath_timer);
/* * An event is triggered whenever a path is taken out of use. * Includes path failure and PG bypass.
*/ staticvoid trigger_event(struct work_struct *work)
{ struct multipath *m =
container_of(work, struct multipath, trigger_event);
if (mpath_double_check_test_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, m)) {
retain: if (*attached_handler_name) { /* * Clear any hw_handler_params associated with a * handler that isn't already attached.
*/ if (m->hw_handler_name && strcmp(*attached_handler_name, m->hw_handler_name)) {
kfree(m->hw_handler_params);
m->hw_handler_params = NULL;
}
/* * Reset hw_handler_name to match the attached handler * * NB. This modifies the table line to show the actual * handler instead of the original table passed in.
*/
kfree(m->hw_handler_name);
m->hw_handler_name = *attached_handler_name;
*attached_handler_name = NULL;
}
}
if (m->hw_handler_name) {
r = scsi_dh_attach(q, m->hw_handler_name); if (r == -EBUSY) {
DMINFO("retaining handler on device %pg", bdev); goto retain;
} if (r < 0) {
*error = "error attaching hardware handler"; return r;
}
if (m->hw_handler_params) {
r = scsi_dh_set_params(q, m->hw_handler_params); if (r < 0) {
*error = "unable to set hardware handler parameters"; return r;
}
}
}
staticconststruct dm_arg _args[] = {
{0, 8, "invalid number of feature args"},
{1, 50, "pg_init_retries must be between 1 and 50"},
{0, 60000, "pg_init_delay_msecs must be between 0 and 60000"},
};
r = dm_read_arg_group(_args, as, &argc, &ti->error); if (r) return -EINVAL;
if (!argc) return 0;
do {
arg_name = dm_shift_arg(as);
argc--;
if (!strcasecmp(arg_name, "queue_if_no_path")) {
r = queue_if_no_path(m, true, false, __func__); continue;
}
if (!strcasecmp(arg_name, "retain_attached_hw_handler")) {
set_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags); continue;
}
if (!strcasecmp(arg_name, "pg_init_retries") &&
(argc >= 1)) {
r = dm_read_arg(_args + 1, as, &m->pg_init_retries, &ti->error);
argc--; continue;
}
if (!strcasecmp(arg_name, "pg_init_delay_msecs") &&
(argc >= 1)) {
r = dm_read_arg(_args + 2, as, &m->pg_init_delay_msecs, &ti->error);
argc--; continue;
}
out:
spin_unlock_irq(&m->lock); if (run_queue) {
dm_table_run_md_queue_async(m->ti->table);
process_queued_io_list(m);
}
if (pgpath->is_active)
disable_nopath_timeout(m);
return r;
}
/* * Fail or reinstate all paths that match the provided struct dm_dev.
*/ staticint action_dev(struct multipath *m, dev_t dev, action_fn action)
{ int r = -EINVAL; struct pgpath *pgpath; struct priority_group *pg;
list_for_each_entry(pg, &m->priority_groups, list) {
list_for_each_entry(pgpath, &pg->pgpaths, list) { if (pgpath->path.dev->bdev->bd_dev == dev)
r = action(pgpath);
}
}
return r;
}
/* * Temporarily try to avoid having to use the specified PG
*/ staticvoid bypass_pg(struct multipath *m, struct priority_group *pg, bool bypassed, bool can_be_delayed)
{ unsignedlong flags;
/* * Switch to using the specified PG from the next I/O that gets mapped
*/ staticint switch_pg_num(struct multipath *m, constchar *pgstr)
{ struct priority_group *pg; unsignedint pgnum; char dummy;
if (!pgstr || (sscanf(pgstr, "%u%c", &pgnum, &dummy) != 1) || !pgnum ||
!m->nr_priority_groups || (pgnum > m->nr_priority_groups)) {
DMWARN("invalid PG number supplied to %s", __func__); return -EINVAL;
}
/* * Set/clear bypassed status of a PG. * PGs are numbered upwards from 1 in the order they were declared.
*/ staticint bypass_pg_num(struct multipath *m, constchar *pgstr, bool bypassed)
{ struct priority_group *pg; unsignedint pgnum; char dummy;
if (!pgstr || (sscanf(pgstr, "%u%c", &pgnum, &dummy) != 1) || !pgnum ||
!m->nr_priority_groups || (pgnum > m->nr_priority_groups)) {
DMWARN("invalid PG number supplied to bypass_pg"); return -EINVAL;
}
list_for_each_entry(pg, &m->priority_groups, list) { if (!--pgnum) break;
}
/* device or driver problems */ switch (errors) { case SCSI_DH_OK: break; case SCSI_DH_NOSYS: if (!m->hw_handler_name) {
errors = 0; break;
}
DMERR("Could not failover the device: Handler scsi_dh_%s " "Error %d.", m->hw_handler_name, errors); /* * Fail path for now, so we do not ping pong
*/
fail_path(pgpath); break; case SCSI_DH_DEV_TEMP_BUSY: /* * Probably doing something like FW upgrade on the * controller so try the other pg.
*/
bypass_pg(m, pg, true, false); break; case SCSI_DH_RETRY: /* Wait before retrying. */
delay_retry = true;
fallthrough; case SCSI_DH_IMM_RETRY: case SCSI_DH_RES_TEMP_UNAVAIL: if (pg_init_limit_reached(m, pgpath))
fail_path(pgpath);
errors = 0; break; case SCSI_DH_DEV_OFFLINED: default: /* * We probably do not want to fail the path for a device * error, but this is what the old dm did. In future * patches we can do more advanced handling.
*/
fail_path(pgpath);
}
staticint multipath_end_io(struct dm_target *ti, struct request *clone,
blk_status_t error, union map_info *map_context)
{ struct dm_mpath_io *mpio = get_mpio(map_context); struct pgpath *pgpath = mpio->pgpath; int r = DM_ENDIO_DONE;
/* * We don't queue any clone request inside the multipath target * during end I/O handling, since those clone requests don't have * bio clones. If we queue them inside the multipath target, * we need to make bio clones, that requires memory allocation. * (See drivers/md/dm-rq.c:end_clone_bio() about why the clone requests * don't have bio clones.) * Instead of queueing the clone request here, we queue the original * request into dm core, which will remake a clone request and * clone bios for it and resubmit it later.
*/ if (error && blk_path_error(error)) { struct multipath *m = ti->private;
if (error == BLK_STS_RESOURCE)
r = DM_ENDIO_DELAY_REQUEUE; else
r = DM_ENDIO_REQUEUE;
if (pgpath)
fail_path(pgpath);
if (!atomic_read(&m->nr_valid_paths) &&
!must_push_back_rq(m)) { if (error == BLK_STS_IOERR)
dm_report_EIO(m); /* complete with the original error */
r = DM_ENDIO_DONE;
}
}
if (pgpath) { struct path_selector *ps = &pgpath->pg->ps;
if (ps->type->end_io)
ps->type->end_io(ps, &pgpath->path, mpio->nr_bytes,
clone->io_start_time_ns);
}
if (!*error || !blk_path_error(*error)) goto done;
if (pgpath)
fail_path(pgpath);
if (!atomic_read(&m->nr_valid_paths)) {
spin_lock_irqsave(&m->lock, flags); if (!test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) { if (__must_push_back(m)) {
r = DM_ENDIO_REQUEUE;
} else {
dm_report_EIO(m);
*error = BLK_STS_IOERR;
}
spin_unlock_irqrestore(&m->lock, flags); goto done;
}
spin_unlock_irqrestore(&m->lock, flags);
}
multipath_queue_bio(m, clone);
r = DM_ENDIO_INCOMPLETE;
done: if (pgpath) { struct path_selector *ps = &pgpath->pg->ps;
if (ps->type->end_io)
ps->type->end_io(ps, &pgpath->path, mpio->nr_bytes,
(mpio->start_time_ns ?:
dm_start_time_ns_from_clone(clone)));
}
return r;
}
/* * Suspend with flush can't complete until all the I/O is processed * so if the last path fails we must error any remaining I/O. * - Note that if the freeze_bdev fails while suspending, the * queue_if_no_path state is lost - userspace should reset it. * Otherwise, during noflush suspend, queue_if_no_path will not change.
*/ staticvoid multipath_presuspend(struct dm_target *ti)
{ struct multipath *m = ti->private;
spin_lock_irq(&m->lock);
m->is_suspending = true;
spin_unlock_irq(&m->lock); /* FIXME: bio-based shouldn't need to always disable queue_if_no_path */ if (m->queue_mode == DM_TYPE_BIO_BASED || !dm_noflush_suspending(m->ti))
queue_if_no_path(m, false, true, __func__);
}
switch (type) { case STATUSTYPE_INFO:
list_for_each_entry(pg, &m->priority_groups, list) { if (pg->bypassed)
state = 'D'; /* Disabled */ elseif (pg == m->current_pg)
state = 'A'; /* Currently Active */ else
state = 'E'; /* Enabled */
DMEMIT("%c ", state);
if (pg->ps.type->status)
sz += pg->ps.type->status(&pg->ps, NULL, type,
result + sz,
maxlen - sz); else
DMEMIT("0 ");
r = dm_devt_from_path(argv[1], &dev); if (r) {
DMWARN("message: error getting device %s",
argv[1]); goto out;
}
r = action_dev(m, dev, action);
out:
mutex_unlock(&m->work_mutex); return r;
}
/* * Perform a minimal read from the given path to find out whether the * path still works. If a path error occurs, fail it.
*/ staticint probe_path(struct pgpath *pgpath)
{ struct block_device *bdev = pgpath->path.dev->bdev; unsignedint read_size = bdev_logical_block_size(bdev); struct page *page; struct bio *bio;
blk_status_t status; int r = 0;
if (WARN_ON_ONCE(read_size > PAGE_SIZE)) return -EINVAL;
page = alloc_page(GFP_KERNEL); if (!page) return -ENOMEM;
/* Perform a minimal read: Sector 0, length read_size */
bio = bio_alloc(bdev, 1, REQ_OP_READ, GFP_KERNEL); if (!bio) {
r = -ENOMEM; goto out;
}
if (status && blk_path_error(status))
fail_path(pgpath);
out:
__free_page(page); return r;
}
/* * Probe all active paths in current_pg to find out whether they still work. * Fail all paths that do not work. * * Return -ENOTCONN if no valid path is left (even outside of current_pg). We * cannot probe paths in other pgs without switching current_pg, so if valid * paths are only in different pgs, they may or may not work. Additionally * we should not probe paths in a pathgroup that is in the process of * Initializing. Userspace can submit a request and we'll switch and wait * for the pathgroup to be initialized. If the request fails, it may need to * probe again.
*/ staticint probe_active_paths(struct multipath *m)
{ struct pgpath *pgpath; struct priority_group *pg = NULL; int r = 0;
spin_lock_irq(&m->lock); if (test_bit(MPATHF_DELAY_PG_SWITCH, &m->flags)) {
wait_event_lock_irq(m->probe_wait,
!test_bit(MPATHF_DELAY_PG_SWITCH, &m->flags),
m->lock); /* * if we waited because a probe was already in progress, * and it probed the current active pathgroup, don't * reprobe. Just return the number of valid paths
*/ if (m->current_pg == m->last_probed_pg) goto skip_probe;
} if (!m->current_pg || m->is_suspending ||
test_bit(MPATHF_QUEUE_IO, &m->flags)) goto skip_probe;
set_bit(MPATHF_DELAY_PG_SWITCH, &m->flags);
pg = m->last_probed_pg = m->current_pg;
spin_unlock_irq(&m->lock);
list_for_each_entry(pgpath, &pg->pgpaths, list) { if (pg != READ_ONCE(m->current_pg) ||
READ_ONCE(m->is_suspending)) goto out; if (!pgpath->is_active) continue;
r = probe_path(pgpath); if (r < 0) goto out;
}
out:
spin_lock_irq(&m->lock);
clear_bit(MPATHF_DELAY_PG_SWITCH, &m->flags); if (test_and_clear_bit(MPATHF_NEED_PG_SWITCH, &m->flags)) {
m->current_pgpath = NULL;
m->current_pg = NULL;
}
skip_probe: if (r == 0 && !atomic_read(&m->nr_valid_paths))
r = -ENOTCONN;
spin_unlock_irq(&m->lock); if (pg)
wake_up(&m->probe_wait); return r;
}
if (pgpath) { if (!mpath_double_check_test_bit(MPATHF_QUEUE_IO, m)) {
*bdev = pgpath->path.dev->bdev;
r = 0;
} else { /* pg_init has not started or completed */
r = -ENOTCONN;
}
} else { /* No path is available */
r = -EIO;
spin_lock_irq(&m->lock); if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))
r = -ENOTCONN;
spin_unlock_irq(&m->lock);
}
if (r == -ENOTCONN) { if (!READ_ONCE(m->current_pg)) { /* Path status changed, redo selection */
(void) choose_pgpath(m, 0);
}
spin_lock_irq(&m->lock); if (test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags))
(void) __pg_init_all_paths(m);
spin_unlock_irq(&m->lock);
dm_table_run_md_queue_async(m->ti->table);
process_queued_io_list(m);
}
/* * Only pass ioctls through if the device sizes match exactly.
*/ if (!r && ti->len != bdev_nr_sectors((*bdev))) return 1; return r;
}
/* * We return "busy", only when we can map I/Os but underlying devices * are busy (so even if we map I/Os now, the I/Os will wait on * the underlying queue). * In other words, if we want to kill I/Os or queue them inside us * due to map unavailability, we don't return "busy". Otherwise, * dm core won't give us the I/Os and we can't do what we want.
*/ staticint multipath_busy(struct dm_target *ti)
{ bool busy = false, has_active = false; struct multipath *m = ti->private; struct priority_group *pg, *next_pg; struct pgpath *pgpath;
/* pg_init in progress */ if (atomic_read(&m->pg_init_in_progress)) returntrue;
/* no paths available, for blk-mq: rely on IO mapping to delay requeue */ if (!atomic_read(&m->nr_valid_paths)) { unsignedlong flags;
/* Guess which priority_group will be used at next mapping time */
pg = READ_ONCE(m->current_pg);
next_pg = READ_ONCE(m->next_pg); if (unlikely(!READ_ONCE(m->current_pgpath) && next_pg))
pg = next_pg;
if (!pg) { /* * We don't know which pg will be used at next mapping time. * We don't call choose_pgpath() here to avoid to trigger * pg_init just by busy checking. * So we don't know whether underlying devices we will be using * at next mapping time are busy or not. Just try mapping.
*/ return busy;
}
/* * If there is one non-busy active path at least, the path selector * will be able to select it. So we consider such a pg as not busy.
*/
busy = true;
list_for_each_entry(pgpath, &pg->pgpaths, list) { if (pgpath->is_active) {
has_active = true; if (!pgpath_busy(pgpath)) {
busy = false; break;
}
}
}
if (!has_active) { /* * No active path in this pg, so this pg won't be used and * the current_pg will be changed at next mapping time. * We need to try mapping to determine it.
*/
busy = false;
}
staticint __init dm_multipath_init(void)
{ int r = -ENOMEM;
kmultipathd = alloc_workqueue("kmpathd", WQ_MEM_RECLAIM, 0); if (!kmultipathd) {
DMERR("failed to create workqueue kmpathd"); goto bad_alloc_kmultipathd;
}
/* * A separate workqueue is used to handle the device handlers * to avoid overloading existing workqueue. Overloading the * old workqueue would also create a bottleneck in the * path of the storage hardware device activation.
*/
kmpath_handlerd = alloc_ordered_workqueue("kmpath_handlerd",
WQ_MEM_RECLAIM); if (!kmpath_handlerd) {
DMERR("failed to create workqueue kmpath_handlerd"); goto bad_alloc_kmpath_handlerd;
}
dm_mpath_wq = alloc_workqueue("dm_mpath_wq", 0, 0); if (!dm_mpath_wq) {
DMERR("failed to create workqueue dm_mpath_wq"); goto bad_alloc_dm_mpath_wq;
}
r = dm_register_target(&multipath_target); if (r < 0) goto bad_register_target;
module_param_named(queue_if_no_path_timeout_secs, queue_if_no_path_timeout_secs, ulong, 0644);
MODULE_PARM_DESC(queue_if_no_path_timeout_secs, "No available paths queue IO timeout in seconds");
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.