// SPDX-License-Identifier: GPL-2.0 /* * Block device elevator/IO-scheduler. * * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE * * 30042000 Jens Axboe <axboe@kernel.dk> : * * Split the elevator a bit so that it is possible to choose a different * one or even write a new "plug in". There are three pieces: * - elevator_fn, inserts a new request in the queue list * - elevator_merge_fn, decides whether a new buffer can be merged with * an existing request * - elevator_dequeue_fn, called when a request is taken off the active list * * 20082000 Dave Jones <davej@suse.de> : * Removed tests for max-bomb-segments, which was breaking elvtune * when run without -bN * * Jens: * - Rework again to work with bio instead of buffer_heads * - loose bi_dev comparisons, partition handling is right now * - completely modularize elevator setup and teardown *
*/ #include <linux/kernel.h> #include <linux/fs.h> #include <linux/blkdev.h> #include <linux/bio.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/init.h> #include <linux/compiler.h> #include <linux/blktrace_api.h> #include <linux/hash.h> #include <linux/uaccess.h> #include <linux/pm_runtime.h>
/* Holding context data for changing elevator */ struct elv_change_ctx { constchar *name; bool no_uevent;
/* for unregistering old elevator */ struct elevator_queue *old; /* for registering new elevator */ struct elevator_queue *new; /* holds sched tags data */ struct elevator_tags *et;
};
/* * Query io scheduler to see if the current process issuing bio may be * merged with rq.
*/ staticbool elv_iosched_allow_bio_merge(struct request *rq, struct bio *bio)
{ struct request_queue *q = rq->q; struct elevator_queue *e = q->elevator;
if (e->type->ops.allow_merge) return e->type->ops.allow_merge(q, rq, bio);
returntrue;
}
/* * can we safely merge with this request?
*/ bool elv_bio_merge_ok(struct request *rq, struct bio *bio)
{ if (!blk_rq_merge_ok(rq, bio)) returnfalse;
if (!elv_iosched_allow_bio_merge(rq, bio)) returnfalse;
returntrue;
}
EXPORT_SYMBOL(elv_bio_merge_ok);
/** * elevator_match - Check whether @e's name or alias matches @name * @e: Scheduler to test * @name: Elevator name to test * * Return true if the elevator @e's name or alias matches @name.
*/ staticbool elevator_match(conststruct elevator_type *e, constchar *name)
{ return !strcmp(e->elevator_name, name) ||
(e->elevator_alias && !strcmp(e->elevator_alias, name));
}
/* * Levels of merges: * nomerges: No merges at all attempted * noxmerges: Only simple one-hit cache try * merges: All merge tries attempted
*/ if (blk_queue_nomerges(q) || !bio_mergeable(bio)) return ELEVATOR_NO_MERGE;
/* * First try one-hit cache.
*/ if (q->last_merge && elv_bio_merge_ok(q->last_merge, bio)) { enum elv_merge ret = blk_try_merge(q->last_merge, bio);
if (blk_queue_noxmerges(q)) return ELEVATOR_NO_MERGE;
/* * See if our hash lookup can find a potential backmerge.
*/
__rq = elv_rqhash_find(q, bio->bi_iter.bi_sector); if (__rq && elv_bio_merge_ok(__rq, bio)) {
*req = __rq;
if (blk_discard_mergable(__rq)) return ELEVATOR_DISCARD_MERGE; return ELEVATOR_BACK_MERGE;
}
if (e->type->ops.request_merge) return e->type->ops.request_merge(q, req, bio);
return ELEVATOR_NO_MERGE;
}
/* * Attempt to do an insertion back merge. Only check for the case where * we can append 'rq' to an existing request, so we can throw 'rq' away * afterwards. * * Returns true if we merged, false otherwise. 'free' will contain all * requests that need to be freed.
*/ bool elv_attempt_insert_merge(struct request_queue *q, struct request *rq, struct list_head *free)
{ struct request *__rq; bool ret;
if (blk_queue_nomerges(q)) returnfalse;
/* * First try one-hit cache.
*/ if (q->last_merge && blk_attempt_req_merge(q, q->last_merge, rq)) {
list_add(&rq->queuelist, free); returntrue;
}
if (blk_queue_noxmerges(q)) returnfalse;
ret = false; /* * See if our hash lookup can find a potential backmerge.
*/ while (1) {
__rq = elv_rqhash_find(q, blk_rq_pos(rq)); if (!__rq || !blk_attempt_req_merge(q, __rq, rq)) break;
list_add(&rq->queuelist, free); /* The merged request could be merged with others, try again */
ret = true;
rq = __rq;
}
error = kobject_add(&e->kobj, &q->disk->queue_kobj, "iosched"); if (!error) { conststruct elv_fs_entry *attr = e->type->elevator_attrs; if (attr) { while (attr->attr.name) { if (sysfs_create_file(&e->kobj, &attr->attr)) break;
attr++;
}
} if (uevent)
kobject_uevent(&e->kobj, KOBJ_ADD);
/* * Sched is initialized, it is ready to export it via * debugfs
*/
blk_mq_sched_reg_debugfs(q);
set_bit(ELEVATOR_FLAG_REGISTERED, &e->flags);
} return error;
}
staticvoid elv_unregister_queue(struct request_queue *q, struct elevator_queue *e)
{ if (e && test_and_clear_bit(ELEVATOR_FLAG_REGISTERED, &e->flags)) {
kobject_uevent(&e->kobj, KOBJ_REMOVE);
kobject_del(&e->kobj);
/* unexport via debugfs before exiting sched */
blk_mq_sched_unreg_debugfs(q);
}
}
int elv_register(struct elevator_type *e)
{ /* finish request is mandatory */ if (WARN_ON_ONCE(!e->ops.finish_request)) return -EINVAL; /* insert_requests and dispatch_request are mandatory */ if (WARN_ON_ONCE(!e->ops.insert_requests || !e->ops.dispatch_request)) return -EINVAL;
/* create icq_cache if requested */ if (e->icq_size) { if (WARN_ON(e->icq_size < sizeof(struct io_cq)) ||
WARN_ON(e->icq_align < __alignof__(struct io_cq))) return -EINVAL;
/* * Destroy icq_cache if it exists. icq's are RCU managed. Make * sure all RCU operations are complete before proceeding.
*/ if (e->icq_cache) {
rcu_barrier();
kmem_cache_destroy(e->icq_cache);
e->icq_cache = NULL;
}
}
EXPORT_SYMBOL_GPL(elv_unregister);
/* * Switch to new_e io scheduler. * * If switching fails, we are most likely running out of memory and not able * to restore the old io scheduler, so leaving the io scheduler being none.
*/ staticint elevator_switch(struct request_queue *q, struct elv_change_ctx *ctx)
{ struct elevator_type *new_e = NULL; int ret = 0;
memflags = blk_mq_freeze_queue(q);
mutex_lock(&q->elevator_lock);
e = q->elevator;
elevator_exit(q);
mutex_unlock(&q->elevator_lock);
blk_mq_unfreeze_queue(q, memflags); if (e) {
blk_mq_free_sched_tags(e->et, q->tag_set);
kobject_put(&e->kobj);
}
}
staticint elevator_change_done(struct request_queue *q, struct elv_change_ctx *ctx)
{ int ret = 0;
if (ctx->old) { bool enable_wbt = test_bit(ELEVATOR_FLAG_ENABLE_WBT_ON_EXIT,
&ctx->old->flags);
elv_unregister_queue(q, ctx->old);
blk_mq_free_sched_tags(ctx->old->et, q->tag_set);
kobject_put(&ctx->old->kobj); if (enable_wbt)
wbt_enable_default(q->disk);
} if (ctx->new) {
ret = elv_register_queue(q, ctx->new, !ctx->no_uevent); if (ret)
elv_exit_and_release(q);
} return ret;
}
/* * Switch this queue to the given IO scheduler.
*/ staticint elevator_change(struct request_queue *q, struct elv_change_ctx *ctx)
{ unsignedint memflags; struct blk_mq_tag_set *set = q->tag_set; int ret = 0;
lockdep_assert_held(&set->update_nr_hwq_lock);
if (strncmp(ctx->name, "none", 4)) {
ctx->et = blk_mq_alloc_sched_tags(set, set->nr_hw_queues,
blk_mq_default_nr_requests(set)); if (!ctx->et) return -ENOMEM;
}
memflags = blk_mq_freeze_queue(q); /* * May be called before adding disk, when there isn't any FS I/O, * so freezing queue plus canceling dispatch work is enough to * drain any dispatch activities originated from passthrough * requests, then no need to quiesce queue which may add long boot * latency, especially when lots of disks are involved. * * Disk isn't added yet, so verifying queue lock only manually.
*/
blk_mq_cancel_work_sync(q);
mutex_lock(&q->elevator_lock); if (!(q->elevator && elevator_match(q->elevator->type, ctx->name)))
ret = elevator_switch(q, ctx);
mutex_unlock(&q->elevator_lock);
blk_mq_unfreeze_queue(q, memflags); if (!ret)
ret = elevator_change_done(q, ctx); /* * Free sched tags if it's allocated but we couldn't switch elevator.
*/ if (ctx->et && !ctx->new)
blk_mq_free_sched_tags(ctx->et, set);
return ret;
}
/* * The I/O scheduler depends on the number of hardware queues, this forces a * reattachment when nr_hw_queues changes.
*/ void elv_update_nr_hw_queues(struct request_queue *q, struct elevator_type *e, struct elevator_tags *t)
{ struct blk_mq_tag_set *set = q->tag_set; struct elv_change_ctx ctx = {}; int ret = -ENODEV;
WARN_ON_ONCE(q->mq_freeze_depth == 0);
if (e && !blk_queue_dying(q) && blk_queue_registered(q)) {
ctx.name = e->elevator_name;
ctx.et = t;
mutex_lock(&q->elevator_lock); /* force to reattach elevator after nr_hw_queue is updated */
ret = elevator_switch(q, &ctx);
mutex_unlock(&q->elevator_lock);
}
blk_mq_unfreeze_queue_nomemrestore(q); if (!ret)
WARN_ON_ONCE(elevator_change_done(q, &ctx)); /* * Free sched tags if it's allocated but we couldn't switch elevator.
*/ if (t && !ctx.new)
blk_mq_free_sched_tags(t, set);
}
/* * Use the default elevator settings. If the chosen elevator initialization * fails, fall back to the "none" elevator (no elevator).
*/ void elevator_set_default(struct request_queue *q)
{ struct elv_change_ctx ctx = {
.name = "mq-deadline",
.no_uevent = true,
}; int err; struct elevator_type *e;
/* now we allow to switch elevator */
blk_queue_flag_clear(QUEUE_FLAG_NO_ELV_SWITCH, q);
if (q->tag_set->flags & BLK_MQ_F_NO_SCHED_BY_DEFAULT) return;
/* * For single queue devices, default to using mq-deadline. If we * have multiple queues or mq-deadline is not available, default * to "none".
*/
e = elevator_find_get(ctx.name); if (!e) return;
if ((q->nr_hw_queues == 1 ||
blk_mq_is_shared_tags(q->tag_set->flags))) {
err = elevator_change(q, &ctx); if (err < 0)
pr_warn("\"%s\" elevator initialization, failed %d, falling back to \"none\"\n",
ctx.name, err);
}
elevator_put(e);
}
/* Make sure queue is not in the middle of being removed */ if (!blk_queue_registered(q)) return -ENOENT;
/* * If the attribute needs to load a module, do it before freezing the * queue to ensure that the module file can be read when the request * queue is the one for the device storing the module file.
*/
strscpy(elevator_name, buf, sizeof(elevator_name));
ctx.name = strstrip(elevator_name);
elv_iosched_load_module(ctx.name);
down_read(&set->update_nr_hwq_lock); if (!blk_queue_no_elv_switch(q)) {
ret = elevator_change(q, &ctx); if (!ret)
ret = count;
} else {
ret = -ENOENT;
}
up_read(&set->update_nr_hwq_lock); return ret;
}
staticint __init elevator_setup(char *str)
{
pr_warn("Kernel parameter elevator= does not have any effect anymore.\n" "Please use sysfs to set IO scheduler for individual devices.\n"); return 1;
}
__setup("elevator=", elevator_setup);
¤ Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.0.19Bemerkung:
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung ist noch experimentell.