From cc14cb2fd7087696a4da9e2cf1def7f771b0be00 Mon Sep 17 00:00:00 2001 From: Arianna Avanzini Date: Sun, 7 Jun 2015 02:37:12 +0200 Subject: [PATCH 1/2] block: Switch from BFQ-v7r7 for 3.4.0 to BFQ-v7r8 for 3.4.0 - first part . BUGFIX: Let weight-related fields of a bfq_entity be correctly initialized (also) when the I/O priority of the entity is changed before the first request is inserted into the bfq_queue associated to the entity. . BUGFIX: When merging requests belonging to different bfq_queues, avoid repositioning the surviving request. In fact, in this case the repositioning may result in the surviving request being moved across bfq_queues, which would ultimately cause bfq_queues' data structures to become inconsistent. . BUGFIX: When merging requests belonging to the same bfq_queue, reposition the surviving request so that it gets in the correct position, namely the position of the dropped request, instead of always being moved to the head of the FIFO of the bfq_queue (which means to let the request be considered the eldest one). . IMPROVEMENT: Always perform device idling if the scenario is asymmetric in terms of throughput distribution among processes. This extends throughput-distribution guarantees to any process, regardless of the properties of its request pattern and of the request patterns of the other processes, and regardless of whether the device is NCQ-capable. . IMPROVEMENT: Remove the current limitation on the maximum number of in-flight requests allowed for a sync queue (limitation set in place for fairness issues in CFQ, inherited by the first version of BFQ, but made unnecessary by the latest accurate fairness strategies added to BFQ). Removing this limitation enables devices with long internal queues to fill their queues as much as they deem appropriate, also with sync requests. This avoids throughput losses on these devices, because, to achieve a high throughput, they often need to have a high number of requests queued internally. . CODE IMPROVEMENT: Simplify I/O priority change logic by turning it into a single-step procedure instead of a two-step one; improve readability by rethinking the names of the functions involved in changing the I/O priority of a bfq_queue. Signed-off-by: Paolo Valente Signed-off-by: Arianna Avanzini Reported-by: Erik Fleischer Reported-by: Malte Schröder Reported-by: Manfred Schwarb --- block/bfq-iosched.c | 125 ++++++++++++++++++++++++---------------------------- block/bfq-sched.c | 8 +--- block/bfq.h | 11 ++--- 3 files changed, 62 insertions(+), 82 deletions(-) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 152992b..4702abe 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -70,9 +70,6 @@ #include "bfq.h" #include "blk.h" -/* Max number of dispatches in one round of service. */ -static const int bfq_quantum = 4; - /* Expiration time of sync (0) and async (1) requests, in jiffies. */ static const int bfq_fifo_expire[2] = { HZ / 4, HZ / 8 }; @@ -369,7 +366,6 @@ static void bfq_rq_pos_tree_add(struct bfq_data *bfqd, struct bfq_queue *bfqq) */ static inline bool bfq_differentiated_weights(struct bfq_data *bfqd) { - BUG_ON(!bfqd->hw_tag); /* * For weights to differ, at least one of the trees must contain * at least two nodes. @@ -406,19 +402,19 @@ static void bfq_weights_tree_add(struct bfq_data *bfqd, struct rb_node **new = &(root->rb_node), *parent = NULL; /* - * Do not insert if: - * - the device does not support queueing; - * - the entity is already associated with a counter, which happens if: - * 1) the entity is associated with a queue, 2) a request arrival - * has caused the queue to become both non-weight-raised, and hence - * change its weight, and backlogged; in this respect, each - * of the two events causes an invocation of this function, - * 3) this is the invocation of this function caused by the second - * event. This second invocation is actually useless, and we handle - * this fact by exiting immediately. More efficient or clearer - * solutions might possibly be adopted. + * Do not insert if the entity is already associated with a + * counter, which happens if: + * 1) the entity is associated with a queue, + * 2) a request arrival has caused the queue to become both + * non-weight-raised, and hence change its weight, and + * backlogged; in this respect, each of the two events + * causes an invocation of this function, + * 3) this is the invocation of this function caused by the + * second event. This second invocation is actually useless, + * and we handle this fact by exiting immediately. More + * efficient or clearer solutions might possibly be adopted. */ - if (!bfqd->hw_tag || entity->weight_counter) + if (entity->weight_counter) return; while (*new) { @@ -457,14 +453,6 @@ static void bfq_weights_tree_remove(struct bfq_data *bfqd, struct bfq_entity *entity, struct rb_root *root) { - /* - * Check whether the entity is actually associated with a counter. - * In fact, the device may not be considered NCQ-capable for a while, - * which implies that no insertion in the weight trees is performed, - * after which the device may start to be deemed NCQ-capable, and hence - * this function may start to be invoked. This may cause the function - * to be invoked for entities that are not associated with any counter. - */ if (!entity->weight_counter) return; @@ -1090,7 +1078,8 @@ static void bfq_remove_request(struct request *rq) bfq_updated_next_req(bfqd, bfqq); } - list_del_init(&rq->queuelist); + if (rq->queuelist.prev != &rq->queuelist) + list_del_init(&rq->queuelist); BUG_ON(bfqq->queued[sync] == 0); bfqq->queued[sync]--; bfqd->queued--; @@ -1165,14 +1154,22 @@ static void bfq_merged_request(struct request_queue *q, struct request *req, static void bfq_merged_requests(struct request_queue *q, struct request *rq, struct request *next) { - struct bfq_queue *bfqq = RQ_BFQQ(rq); + struct bfq_queue *bfqq = RQ_BFQQ(rq), *next_bfqq = RQ_BFQQ(next); /* - * Reposition in fifo if next is older than rq. + * If next and rq belong to the same bfq_queue and next is older + * than rq, then reposition rq in the fifo (by substituting next + * with rq). Otherwise, if next and rq belong to different + * bfq_queues, never reposition rq: in fact, we would have to + * reposition it with respect to next's position in its own fifo, + * which would most certainly be too expensive with respect to + * the benefits. */ - if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist) && + if (bfqq == next_bfqq && + !list_empty(&rq->queuelist) && !list_empty(&next->queuelist) && time_before(rq_fifo_time(next), rq_fifo_time(rq))) { - list_move(&rq->queuelist, &next->queuelist); + list_del_init(&rq->queuelist); + list_replace_init(&next->queuelist, &rq->queuelist); rq_set_fifo_time(rq, rq_fifo_time(next)); } @@ -2489,13 +2486,12 @@ static inline bool bfq_bfqq_must_not_expire(struct bfq_queue *bfqq) */ #define cond_for_expiring_non_wr (bfqd->hw_tag && \ (bfqd->wr_busy_queues > 0 || \ - (symmetric_scenario && \ - (blk_queue_nonrot(bfqd->queue) || \ - cond_for_seeky_on_ncq_hdd)))) + (blk_queue_nonrot(bfqd->queue) || \ + cond_for_seeky_on_ncq_hdd))) return bfq_bfqq_sync(bfqq) && !cond_for_expiring_in_burst && - (bfqq->wr_coeff > 1 || + (bfqq->wr_coeff > 1 || !symmetric_scenario || (bfq_bfqq_IO_bound(bfqq) && bfq_bfqq_idle_window(bfqq) && !cond_for_expiring_non_wr) ); @@ -2579,9 +2575,9 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd) } /* - * No requests pending. If the in-service queue still has requests - * in flight (possibly waiting for a completion) or is idling for a - * new request, then keep it. + * No requests pending. However, if the in-service queue is idling + * for a new request, or has requests waiting for a completion and + * may idle after their completion, then keep it anyway. */ if (timer_pending(&bfqd->idle_slice_timer) || (bfqq->dispatched != 0 && bfq_bfqq_must_not_expire(bfqq))) { @@ -2780,14 +2776,13 @@ static int bfq_dispatch_requests(struct request_queue *q, int force) if (bfqq == NULL) return 0; - max_dispatch = bfqd->bfq_quantum; if (bfq_class_idle(bfqq)) max_dispatch = 1; if (!bfq_bfqq_sync(bfqq)) max_dispatch = bfqd->bfq_max_budget_async_rq; - if (bfqq->dispatched >= max_dispatch) { + if (!bfq_bfqq_sync(bfqq) && bfqq->dispatched >= max_dispatch) { if (bfqd->busy_queues > 1) return 0; if (bfqq->dispatched >= 4 * max_dispatch) @@ -2803,8 +2798,8 @@ static int bfq_dispatch_requests(struct request_queue *q, int force) if (!bfq_dispatch_request(bfqd, bfqq)) return 0; - bfq_log_bfqq(bfqd, bfqq, "dispatched one request of %d (max_disp %d)", - bfqq->pid, max_dispatch); + bfq_log_bfqq(bfqd, bfqq, "dispatched %s request", + bfq_bfqq_sync(bfqq) ? "sync" : "async"); return 1; } @@ -2935,14 +2930,12 @@ static void bfq_exit_icq(struct io_cq *icq) * Update the entity prio values; note that the new values will not * be used until the next (re)activation. */ -static void bfq_init_prio_data(struct bfq_queue *bfqq, struct io_context *ioc) +static void bfq_set_next_ioprio_data(struct bfq_queue *bfqq, struct bfq_io_cq *bic) { struct task_struct *tsk = current; + struct io_context *ioc = bic->icq.ioc; int ioprio_class; - if (!bfq_bfqq_prio_changed(bfqq)) - return; - ioprio_class = IOPRIO_PRIO_CLASS(ioc->ioprio); switch (ioprio_class) { default: @@ -2972,29 +2965,31 @@ static void bfq_init_prio_data(struct bfq_queue *bfqq, struct io_context *ioc) if (bfqq->entity.new_ioprio < 0 || bfqq->entity.new_ioprio >= IOPRIO_BE_NR) { - printk(KERN_CRIT "bfq_init_prio_data: new_ioprio %d\n", + printk(KERN_CRIT "bfq_set_next_ioprio_data: new_ioprio %d\n", bfqq->entity.new_ioprio); BUG(); } + bfqq->entity.new_weight = bfq_ioprio_to_weight(bfqq->entity.new_ioprio); bfqq->entity.ioprio_changed = 1; - - bfq_clear_bfqq_prio_changed(bfqq); } -static void bfq_changed_ioprio(struct io_context *ioc, - struct bfq_io_cq *bic) +static void bfq_check_ioprio_change(struct io_context *ioc, + struct bfq_io_cq *bic) { struct bfq_data *bfqd; struct bfq_queue *bfqq, *new_bfqq; struct bfq_group *bfqg; unsigned long uninitialized_var(flags); + int ioprio = bic->icq.ioc->ioprio; bfqd = bfq_get_bfqd_locked(&(bic->icq.q->elevator->elevator_data), &flags); if (unlikely(bfqd == NULL)) return; + bic->ioprio = ioprio; + bfqq = bic->bfqq[BLK_RW_ASYNC]; if (bfqq != NULL) { bfqg = container_of(bfqq->entity.sched_data, struct bfq_group, @@ -3004,7 +2999,7 @@ static void bfq_changed_ioprio(struct io_context *ioc, if (new_bfqq != NULL) { bic->bfqq[BLK_RW_ASYNC] = new_bfqq; bfq_log_bfqq(bfqd, bfqq, - "changed_ioprio: bfqq %p %d", + "check_ioprio_change: bfqq %p %d", bfqq, atomic_read(&bfqq->ref)); bfq_put_queue(bfqq); } @@ -3012,13 +3007,13 @@ static void bfq_changed_ioprio(struct io_context *ioc, bfqq = bic->bfqq[BLK_RW_SYNC]; if (bfqq != NULL) - bfq_mark_bfqq_prio_changed(bfqq); + bfq_set_next_ioprio_data(bfqq, bic); bfq_put_bfqd_unlock(bfqd, &flags); } static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, - pid_t pid, int is_sync) + struct bfq_io_cq *bic, pid_t pid, int is_sync) { RB_CLEAR_NODE(&bfqq->entity.rb_node); INIT_LIST_HEAD(&bfqq->fifo); @@ -3027,7 +3022,8 @@ static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, atomic_set(&bfqq->ref, 0); bfqq->bfqd = bfqd; - bfq_mark_bfqq_prio_changed(bfqq); + if (bic) + bfq_set_next_ioprio_data(bfqq, bic); if (is_sync) { if (!bfq_class_idle(bfqq)) @@ -3087,8 +3083,8 @@ retry: } if (bfqq != NULL) { - bfq_init_bfqq(bfqd, bfqq, current->pid, is_sync); - bfq_init_prio_data(bfqq, ioc); + bfq_init_bfqq(bfqd, bfqq, bic, current->pid, + is_sync); bfq_init_entity(&bfqq->entity, bfqg); bfq_log_bfqq(bfqd, bfqq, "allocated"); } else { @@ -3356,8 +3352,6 @@ static void bfq_insert_request(struct request_queue *q, struct request *rq) bfq_bfqq_increase_failed_cooperations(bfqq); } - bfq_init_prio_data(bfqq, RQ_BIC(rq)->icq.ioc); - bfq_add_request(rq); /* @@ -3501,11 +3495,8 @@ static int bfq_may_queue(struct request_queue *q, int rw) return ELV_MQUEUE_MAY; bfqq = bic_to_bfqq(bic, rw_is_sync(rw)); - if (bfqq != NULL) { - bfq_init_prio_data(bfqq, bic->icq.ioc); - + if (bfqq != NULL) return __bfq_may_queue(bfqq); - } return ELV_MQUEUE_MAY; } @@ -3575,7 +3566,7 @@ static int bfq_set_request(struct request_queue *q, struct request *rq, /* handle changed prio notifications; cgroup change is handled separately */ if (unlikely(icq_get_changed(&bic->icq) & ICQ_IOPRIO_CHANGED)) - bfq_changed_ioprio(bic->icq.ioc, bic); + bfq_check_ioprio_change(bic->icq.ioc, bic); might_sleep_if(gfp_mask & __GFP_WAIT); @@ -3794,10 +3785,12 @@ static void *bfq_init_queue(struct request_queue *q) * Grab a permanent reference to it, so that the normal code flow * will not attempt to free it. */ - bfq_init_bfqq(bfqd, &bfqd->oom_bfqq, 1, 0); + bfq_init_bfqq(bfqd, &bfqd->oom_bfqq, NULL, 1, 0); atomic_inc(&bfqd->oom_bfqq.ref); bfqd->oom_bfqq.entity.new_ioprio = BFQ_DEFAULT_QUEUE_IOPRIO; bfqd->oom_bfqq.entity.new_ioprio_class = IOPRIO_CLASS_BE; + bfqd->oom_bfqq.entity.new_weight = + bfq_ioprio_to_weight(bfqd->oom_bfqq.entity.new_ioprio); /* * Trigger weight initialization, according to ioprio, at the * oom_bfqq's first activation. The oom_bfqq's ioprio and ioprio @@ -3837,7 +3830,6 @@ static void *bfq_init_queue(struct request_queue *q) bfqd->bfq_max_budget = bfq_default_max_budget; - bfqd->bfq_quantum = bfq_quantum; bfqd->bfq_fifo_expire[0] = bfq_fifo_expire[0]; bfqd->bfq_fifo_expire[1] = bfq_fifo_expire[1]; bfqd->bfq_back_max = bfq_back_max; @@ -3971,7 +3963,6 @@ static ssize_t __FUNC(struct elevator_queue *e, char *page) \ __data = jiffies_to_msecs(__data); \ return bfq_var_show(__data, (page)); \ } -SHOW_FUNCTION(bfq_quantum_show, bfqd->bfq_quantum, 0); SHOW_FUNCTION(bfq_fifo_expire_sync_show, bfqd->bfq_fifo_expire[1], 1); SHOW_FUNCTION(bfq_fifo_expire_async_show, bfqd->bfq_fifo_expire[0], 1); SHOW_FUNCTION(bfq_back_seek_max_show, bfqd->bfq_back_max, 0); @@ -4008,7 +3999,6 @@ __FUNC(struct elevator_queue *e, const char *page, size_t count) \ *(__PTR) = __data; \ return ret; \ } -STORE_FUNCTION(bfq_quantum_store, &bfqd->bfq_quantum, 1, INT_MAX, 0); STORE_FUNCTION(bfq_fifo_expire_sync_store, &bfqd->bfq_fifo_expire[1], 1, INT_MAX, 1); STORE_FUNCTION(bfq_fifo_expire_async_store, &bfqd->bfq_fifo_expire[0], 1, @@ -4109,7 +4099,6 @@ static ssize_t bfq_low_latency_store(struct elevator_queue *e, __ATTR(name, S_IRUGO|S_IWUSR, bfq_##name##_show, bfq_##name##_store) static struct elv_fs_entry bfq_attrs[] = { - BFQ_ATTR(quantum), BFQ_ATTR(fifo_expire_sync), BFQ_ATTR(fifo_expire_async), BFQ_ATTR(back_seek_max), @@ -4190,7 +4179,7 @@ static int __init bfq_init(void) device_speed_thresh[1] = (R_fast[1] + R_slow[1]) / 2; elv_register(&iosched_bfq); - pr_info("BFQ I/O-scheduler version: v7r7"); + pr_info("BFQ I/O-scheduler: v7r8"); return 0; } diff --git a/block/bfq-sched.c b/block/bfq-sched.c index 6764a7e..d0890c6 100644 --- a/block/bfq-sched.c +++ b/block/bfq-sched.c @@ -636,13 +636,7 @@ __bfq_entity_update_weight_prio(struct bfq_service_tree *old_st, entity->orig_weight = entity->new_weight; entity->ioprio = bfq_weight_to_ioprio(entity->orig_weight); - } else if (entity->new_ioprio != entity->ioprio) { - entity->ioprio = entity->new_ioprio; - entity->orig_weight = - bfq_ioprio_to_weight(entity->ioprio); - } else - entity->new_weight = entity->orig_weight = - bfq_ioprio_to_weight(entity->ioprio); + } entity->ioprio_class = entity->new_ioprio_class; entity->ioprio_changed = 0; diff --git a/block/bfq.h b/block/bfq.h index 9428d71..e6f1a88 100644 --- a/block/bfq.h +++ b/block/bfq.h @@ -1,5 +1,5 @@ /* - * BFQ-v7r7 for 3.4.0: data structures and common functions prototypes. + * BFQ-v7r8 for 3.4.0: data structures and common functions prototypes. * * Based on ideas and code from CFQ: * Copyright (C) 2003 Jens Axboe @@ -327,6 +327,7 @@ struct bfq_io_cq { struct io_cq icq; /* must be the first member */ struct bfq_queue *bfqq[2]; struct bfq_ttime ttime; + int ioprio; unsigned int wr_time_left; bool saved_idle_window; @@ -418,7 +419,6 @@ enum bfq_device_speed { * @group_list: list of all the bfq_groups active on the device. * @active_list: list of all the bfq_queues active on the device. * @idle_list: list of all the bfq_queues idle on the device. - * @bfq_quantum: max number of requests dispatched per dispatch round. * @bfq_fifo_expire: timeout for async/sync requests; when it expires * requests are served in fifo order. * @bfq_back_penalty: weight of backward seeks wrt forward ones. @@ -526,7 +526,6 @@ struct bfq_data { struct list_head active_list; struct list_head idle_list; - unsigned int bfq_quantum; unsigned int bfq_fifo_expire[2]; unsigned int bfq_back_penalty; unsigned int bfq_back_max; @@ -569,7 +568,6 @@ enum bfqq_state_flags { BFQ_BFQQ_FLAG_must_alloc, /* must be allowed rq alloc */ BFQ_BFQQ_FLAG_fifo_expire, /* FIFO checked in this slice */ BFQ_BFQQ_FLAG_idle_window, /* slice idling enabled */ - BFQ_BFQQ_FLAG_prio_changed, /* task priority has changed */ BFQ_BFQQ_FLAG_sync, /* synchronous queue */ BFQ_BFQQ_FLAG_budget_new, /* no completion with this budget */ BFQ_BFQQ_FLAG_IO_bound, /* @@ -613,7 +611,6 @@ BFQ_BFQQ_FNS(wait_request); BFQ_BFQQ_FNS(must_alloc); BFQ_BFQQ_FNS(fifo_expire); BFQ_BFQQ_FNS(idle_window); -BFQ_BFQQ_FNS(prio_changed); BFQ_BFQQ_FNS(sync); BFQ_BFQQ_FNS(budget_new); BFQ_BFQQ_FNS(IO_bound); @@ -794,8 +791,8 @@ static inline void bfq_put_bfqd_unlock(struct bfq_data *bfqd, spin_unlock_irqrestore(bfqd->queue->queue_lock, *flags); } -static void bfq_changed_ioprio(struct io_context *ioc, - struct bfq_io_cq *bic); +static void bfq_check_ioprio_change(struct io_context *ioc, + struct bfq_io_cq *bic); static void bfq_put_queue(struct bfq_queue *bfqq); static void bfq_dispatch_insert(struct request_queue *q, struct request *rq); static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd, -- 2.1.4