/* * See Documentation/block/deadline-iosched.rst
*/ staticconstint read_expire = HZ / 2; /* max time before a read is submitted. */ staticconstint write_expire = 5 * HZ; /* ditto for writes, these limits are SOFT! */ /* * Time after which to dispatch lower priority requests even if higher * priority requests are pending.
*/ staticconstint prio_aging_expire = 10 * HZ; staticconstint writes_starved = 2; /* max times reads can starve a write */ staticconstint fifo_batch = 16; /* # of sequential requests treated as one
by the above parameters. For throughput. */
/* * I/O statistics per I/O priority. It is fine if these counters overflow. * What matters is that these counters are at least as wide as * log2(max_outstanding_requests).
*/ struct io_stats_per_prio {
uint32_t inserted;
uint32_t merged;
uint32_t dispatched;
atomic_t completed;
};
/* * Deadline scheduler data per I/O priority (enum dd_prio). Requests are * present on both sort_list[] and fifo_list[].
*/ struct dd_per_prio { struct list_head dispatch; struct rb_root sort_list[DD_DIR_COUNT]; struct list_head fifo_list[DD_DIR_COUNT]; /* Position of the most recently dispatched request. */
sector_t latest_pos[DD_DIR_COUNT]; struct io_stats_per_prio stats;
};
struct deadline_data { /* * run time data
*/
struct dd_per_prio per_prio[DD_PRIO_COUNT];
/* Data direction of latest dispatched request. */ enum dd_data_dir last_dir; unsignedint batching; /* number of sequential requests made */ unsignedint starved; /* times reads have starved writes */
/* * settings that change how the i/o scheduler behaves
*/ int fifo_expire[DD_DIR_COUNT]; int fifo_batch; int writes_starved; int front_merges;
u32 async_depth; int prio_aging_expire;
spinlock_t lock;
};
/* Maps an I/O priority class to a deadline scheduler priority. */ staticconstenum dd_prio ioprio_class_to_prio[] = {
[IOPRIO_CLASS_NONE] = DD_BE_PRIO,
[IOPRIO_CLASS_RT] = DD_RT_PRIO,
[IOPRIO_CLASS_BE] = DD_BE_PRIO,
[IOPRIO_CLASS_IDLE] = DD_IDLE_PRIO,
};
/* * Returns the I/O priority class (IOPRIO_CLASS_*) that has been assigned to a * request.
*/ static u8 dd_rq_ioclass(struct request *rq)
{ return IOPRIO_PRIO_CLASS(req_get_ioprio(rq));
}
/* * Return the first request for which blk_rq_pos() >= @pos.
*/ staticinlinestruct request *deadline_from_pos(struct dd_per_prio *per_prio, enum dd_data_dir data_dir, sector_t pos)
{ struct rb_node *node = per_prio->sort_list[data_dir].rb_node; struct request *rq, *res = NULL;
/* * if the merge was a front merge, we need to reposition request
*/ if (type == ELEVATOR_FRONT_MERGE) {
elv_rb_del(deadline_rb_root(per_prio, req), req);
deadline_add_rq_rb(per_prio, req);
}
}
/* * Callback function that is invoked after @next has been merged into @req.
*/ staticvoid dd_merged_requests(struct request_queue *q, struct request *req, struct request *next)
{ struct deadline_data *dd = q->elevator->elevator_data; const u8 ioprio_class = dd_rq_ioclass(next); constenum dd_prio prio = ioprio_class_to_prio[ioprio_class];
lockdep_assert_held(&dd->lock);
dd->per_prio[prio].stats.merged++;
/* * if next expires before rq, assign its expire time to rq * and move into next position (next will be deleted) in fifo
*/ if (!list_empty(&req->queuelist) && !list_empty(&next->queuelist)) { if (time_before((unsignedlong)next->fifo_time,
(unsignedlong)req->fifo_time)) {
list_move(&req->queuelist, &next->queuelist);
req->fifo_time = next->fifo_time;
}
}
/* * kill knowledge of next, this one is a goner
*/
deadline_remove_request(q, &dd->per_prio[prio], next);
}
/* * move an entry to dispatch queue
*/ staticvoid
deadline_move_request(struct deadline_data *dd, struct dd_per_prio *per_prio, struct request *rq)
{ /* * take it off the sort and fifo list
*/
deadline_remove_request(rq->q, per_prio, rq);
}
/* Number of requests queued for a given priority level. */ static u32 dd_queued(struct deadline_data *dd, enum dd_prio prio)
{ conststruct io_stats_per_prio *stats = &dd->per_prio[prio].stats;
/* * deadline_check_fifo returns true if and only if there are expired requests * in the FIFO list. Requires !list_empty(&dd->fifo_list[data_dir]).
*/ staticinlinebool deadline_check_fifo(struct dd_per_prio *per_prio, enum dd_data_dir data_dir)
{ struct request *rq = rq_entry_fifo(per_prio->fifo_list[data_dir].next);
/* * For the specified data direction, return the next request to * dispatch using arrival ordered lists.
*/ staticstruct request *
deadline_fifo_request(struct deadline_data *dd, struct dd_per_prio *per_prio, enum dd_data_dir data_dir)
{ if (list_empty(&per_prio->fifo_list[data_dir])) return NULL;
/* * For the specified data direction, return the next request to * dispatch using sector position sorted lists.
*/ staticstruct request *
deadline_next_request(struct deadline_data *dd, struct dd_per_prio *per_prio, enum dd_data_dir data_dir)
{ return deadline_from_pos(per_prio, data_dir,
per_prio->latest_pos[data_dir]);
}
/* * Returns true if and only if @rq started after @latest_start where * @latest_start is in jiffies.
*/ staticbool started_after(struct deadline_data *dd, struct request *rq, unsignedlong latest_start)
{ unsignedlong start_time = (unsignedlong)rq->fifo_time;
start_time -= dd->fifo_expire[rq_data_dir(rq)];
return time_after(start_time, latest_start);
}
/* * deadline_dispatch_requests selects the best request according to * read/write expire, fifo_batch, etc and with a start time <= @latest_start.
*/ staticstruct request *__dd_dispatch_request(struct deadline_data *dd, struct dd_per_prio *per_prio, unsignedlong latest_start)
{ struct request *rq, *next_rq; enum dd_data_dir data_dir; enum dd_prio prio;
u8 ioprio_class;
/* * batches are currently reads XOR writes
*/
rq = deadline_next_request(dd, per_prio, dd->last_dir); if (rq && dd->batching < dd->fifo_batch) { /* we have a next request and are still entitled to batch */
data_dir = rq_data_dir(rq); goto dispatch_request;
}
/* * at this point we are not running a batch. select the appropriate * data direction (read / write)
*/
if (!list_empty(&per_prio->fifo_list[DD_READ])) {
BUG_ON(RB_EMPTY_ROOT(&per_prio->sort_list[DD_READ]));
if (deadline_fifo_request(dd, per_prio, DD_WRITE) &&
(dd->starved++ >= dd->writes_starved)) goto dispatch_writes;
data_dir = DD_READ;
goto dispatch_find_request;
}
/* * there are either no reads or writes have been starved
*/
if (!list_empty(&per_prio->fifo_list[DD_WRITE])) {
dispatch_writes:
BUG_ON(RB_EMPTY_ROOT(&per_prio->sort_list[DD_WRITE]));
dd->starved = 0;
data_dir = DD_WRITE;
goto dispatch_find_request;
}
return NULL;
dispatch_find_request: /* * we are not running a batch, find best request for selected data_dir
*/
next_rq = deadline_next_request(dd, per_prio, data_dir); if (deadline_check_fifo(per_prio, data_dir) || !next_rq) { /* * A deadline has expired, the last request was in the other * direction, or we have run out of higher-sectored requests. * Start again from the request with the earliest expiry time.
*/
rq = deadline_fifo_request(dd, per_prio, data_dir);
} else { /* * The last req was the same dir and we have a next request in * sort order. No expired requests so continue on from here.
*/
rq = next_rq;
}
if (!rq) return NULL;
dd->last_dir = data_dir;
dd->batching = 0;
dispatch_request: if (started_after(dd, rq, latest_start)) return NULL;
/* * Check whether there are any requests with priority other than DD_RT_PRIO * that were inserted more than prio_aging_expire jiffies ago.
*/ staticstruct request *dd_dispatch_prio_aged_requests(struct deadline_data *dd, unsignedlong now)
{ struct request *rq; enum dd_prio prio; int prio_cnt;
for (prio = DD_BE_PRIO; prio <= DD_PRIO_MAX; prio++) {
rq = __dd_dispatch_request(dd, &dd->per_prio[prio],
now - dd->prio_aging_expire); if (rq) return rq;
}
return NULL;
}
/* * Called from blk_mq_run_hw_queue() -> __blk_mq_sched_dispatch_requests(). * * One confusing aspect here is that we get called for a specific * hardware queue, but we may return a request that is for a * different hardware queue. This is because mq-deadline has shared * state for all hardware queues, in terms of sorting, FIFOs, etc.
*/ staticstruct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx)
{ struct deadline_data *dd = hctx->queue->elevator->elevator_data; constunsignedlong now = jiffies; struct request *rq; enum dd_prio prio;
spin_lock(&dd->lock);
rq = dd_dispatch_prio_aged_requests(dd, now); if (rq) goto unlock;
/* * Next, dispatch requests in priority order. Ignore lower priority * requests if any higher priority requests are pending.
*/ for (prio = 0; prio <= DD_PRIO_MAX; prio++) {
rq = __dd_dispatch_request(dd, &dd->per_prio[prio], now); if (rq || dd_queued(dd, prio)) break;
}
unlock:
spin_unlock(&dd->lock);
return rq;
}
/* * Called by __blk_mq_alloc_request(). The shallow_depth value set by this * function is used by __blk_mq_get_tag().
*/ staticvoid dd_limit_depth(blk_opf_t opf, struct blk_mq_alloc_data *data)
{ struct deadline_data *dd = data->q->elevator->elevator_data;
/* Do not throttle synchronous reads. */ if (op_is_sync(opf) && !op_is_write(opf)) return;
/* * Throttle asynchronous requests and writes such that these requests * do not block the allocation of synchronous requests.
*/
data->shallow_depth = dd->async_depth;
}
/* Called by blk_mq_update_nr_requests(). */ staticvoid dd_depth_updated(struct request_queue *q)
{ struct deadline_data *dd = q->elevator->elevator_data;
WARN_ONCE(queued != 0, "statistics for priority %d: i %u m %u d %u c %u\n",
prio, stats->inserted, stats->merged,
stats->dispatched, atomic_read(&stats->completed));
}
/* * Try to merge @bio into an existing request. If @bio has been merged into * an existing request, store the pointer to that request into *@rq.
*/ staticint dd_request_merge(struct request_queue *q, struct request **rq, struct bio *bio)
{ struct deadline_data *dd = q->elevator->elevator_data; const u8 ioprio_class = IOPRIO_PRIO_CLASS(bio->bi_ioprio); constenum dd_prio prio = ioprio_class_to_prio[ioprio_class]; struct dd_per_prio *per_prio = &dd->per_prio[prio];
sector_t sector = bio_end_sector(bio); struct request *__rq;
if (!dd->front_merges) return ELEVATOR_NO_MERGE;
__rq = elv_rb_find(&per_prio->sort_list[bio_data_dir(bio)], sector); if (__rq) {
BUG_ON(sector != blk_rq_pos(__rq));
if (elv_bio_merge_ok(__rq, bio)) {
*rq = __rq; if (blk_discard_mergable(__rq)) return ELEVATOR_DISCARD_MERGE; return ELEVATOR_FRONT_MERGE;
}
}
return ELEVATOR_NO_MERGE;
}
/* * Attempt to merge a bio into an existing request. This function is called * before @bio is associated with a request.
*/ staticbool dd_bio_merge(struct request_queue *q, struct bio *bio, unsignedint nr_segs)
{ struct deadline_data *dd = q->elevator->elevator_data; struct request *free = NULL; bool ret;
spin_lock(&dd->lock);
ret = blk_mq_sched_try_merge(q, bio, nr_segs, &free);
spin_unlock(&dd->lock);
if (rq_mergeable(rq)) {
elv_rqhash_add(q, rq); if (!q->last_merge)
q->last_merge = rq;
}
/* * set expire time and add to fifo list
*/
rq->fifo_time = jiffies + dd->fifo_expire[data_dir];
list_add_tail(&rq->queuelist, &per_prio->fifo_list[data_dir]);
}
}
/* * Called from blk_mq_insert_request() or blk_mq_dispatch_list().
*/ staticvoid dd_insert_requests(struct blk_mq_hw_ctx *hctx, struct list_head *list,
blk_insert_t flags)
{ struct request_queue *q = hctx->queue; struct deadline_data *dd = q->elevator->elevator_data;
LIST_HEAD(free);
spin_lock(&dd->lock); while (!list_empty(list)) { struct request *rq;
/* * The block layer core may call dd_finish_request() without having * called dd_insert_requests(). Skip requests that bypassed I/O * scheduling. See also blk_mq_request_bypass_insert().
*/ if (per_prio)
atomic_inc(&per_prio->stats.completed);
}
/* Number of requests owned by the block driver for a given priority. */ static u32 dd_owned_by_driver(struct deadline_data *dd, enum dd_prio prio)
{ conststruct io_stats_per_prio *stats = &dd->per_prio[prio].stats;
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.