From ee2d7f2440c3aba2b8973a78e3a81028bb957fde Mon Sep 17 00:00:00 2001
From: Paolo Valente <paolo.valente@unimore.it>
Date: Fri, 20 Jun 2014 16:39:09 +0200
Subject: [PATCH] block: Switch from BFQ-v7r4 for 3.10.0 to BFQ-v7r5 for 3.10.0

. IMPROVEMENT: Improve throughput boosting by idling the device
  only for processes that, in addition to perform sequential I/O,
  are I/O-bound (apart from weight-raised queues, for which idling
  is always performed to guarantee them a low latency).
. IMPROVEMENT: Improve throughput boosting by depriving processes
  that cooperate often of weight-raising.
. CODE IMPROVEMENT: Pass of improvement of the readability of both
  comments and actual code.

Signed-off-by: Paolo Valente <paolo.valente@unimore.it>
Tested-by: Takashi Iwai <tiwai@suse.de>
Reported-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Arianna Avanzini <avanzini.arianna@gmail.com>
---
 block/bfq-cgroup.c  |  29 ++-
 block/bfq-iosched.c | 696 +++++++++++++++++++++++++++++-----------------------
 block/bfq-sched.c   |  35 +--
 block/bfq.h         | 181 +++++++++-----
 4 files changed, 538 insertions(+), 403 deletions(-)

diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
index 666123f..43bfb35 100644
--- a/block/bfq-cgroup.c
+++ b/block/bfq-cgroup.c
@@ -9,7 +9,8 @@
  *
  * Copyright (C) 2010 Paolo Valente <paolo.valente@unimore.it>
  *
- * Licensed under the GPL-2 as detailed in the accompanying COPYING.BFQ file.
+ * Licensed under the GPL-2 as detailed in the accompanying COPYING.BFQ
+ * file.
  */
 
 #ifdef CONFIG_CGROUP_BFQIO
@@ -143,8 +144,9 @@ static struct bfq_group *bfq_group_chain_alloc(struct bfq_data *bfqd,
 			bfq_group_set_parent(prev, bfqg);
 			/*
 			 * Build a list of allocated nodes using the bfqd
-			 * filed, that is still unused and will be initialized
-			 * only after the node will be connected.
+			 * filed, that is still unused and will be
+			 * initialized only after the node will be
+			 * connected.
 			 */
 			prev->bfqd = bfqg;
 			prev = bfqg;
@@ -164,7 +166,8 @@ cleanup:
 }
 
 /**
- * bfq_group_chain_link - link an allocated group chain to a cgroup hierarchy.
+ * bfq_group_chain_link - link an allocated group chain to a cgroup
+ *                        hierarchy.
  * @bfqd: the queue descriptor.
  * @cgroup: the leaf cgroup to start from.
  * @leaf: the leaf group (to be associated to @cgroup).
@@ -437,7 +440,8 @@ static inline void bfq_reparent_leaf_entity(struct bfq_data *bfqd,
 }
 
 /**
- * bfq_reparent_active_entities - move to the root group all active entities.
+ * bfq_reparent_active_entities - move to the root group all active
+ *                                entities.
  * @bfqd: the device data structure with the root group.
  * @bfqg: the group to move from.
  * @st: the service tree with the entities.
@@ -482,8 +486,8 @@ static void bfq_destroy_group(struct bfqio_cgroup *bgrp, struct bfq_group *bfqg)
 	hlist_del(&bfqg->group_node);
 
 	/*
-	 * Empty all service_trees belonging to this group before deactivating
-	 * the group itself.
+	 * Empty all service_trees belonging to this group before
+	 * deactivating the group itself.
 	 */
 	for (i = 0; i < BFQ_IOPRIO_CLASSES; i++) {
 		st = bfqg->sched_data.service_tree + i;
@@ -503,7 +507,7 @@ static void bfq_destroy_group(struct bfqio_cgroup *bgrp, struct bfq_group *bfqg)
 		 * all the leaf entities corresponding to these queues
 		 * to the root_group.
 		 * Also, it may happen that the group has an entity
-		 * under service, which is disconnected from the active
+		 * in service, which is disconnected from the active
 		 * tree: it must be moved, too.
 		 * There is no need to put the sync queues, as the
 		 * scheduler has taken no reference.
@@ -772,10 +776,11 @@ static int bfqio_can_attach(struct cgroup *cgroup, struct cgroup_taskset *tset)
 		ioc = task->io_context;
 		if (ioc != NULL && atomic_read(&ioc->nr_tasks) > 1)
 			/*
-			 * ioc == NULL means that the task is either too young or
-			 * exiting: if it has still no ioc the ioc can't be shared,
-			 * if the task is exiting the attach will fail anyway, no
-			 * matter what we return here.
+			 * ioc == NULL means that the task is either too
+			 * young or exiting: if it has still no ioc the
+			 * ioc can't be shared, if the task is exiting the
+			 * attach will fail anyway, no matter what we
+			 * return here.
 			 */
 			ret = -EINVAL;
 		task_unlock(task);
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 2924f3f..89063f9 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -9,28 +9,32 @@
  *
  * Copyright (C) 2010 Paolo Valente <paolo.valente@unimore.it>
  *
- * Licensed under the GPL-2 as detailed in the accompanying COPYING.BFQ file.
+ * Licensed under the GPL-2 as detailed in the accompanying COPYING.BFQ
+ * file.
  *
- * BFQ is a proportional share disk scheduling algorithm based on the
- * slice-by-slice service scheme of CFQ. But BFQ assigns budgets, measured in
- * number of sectors, to tasks instead of time slices. The disk is not granted
- * to the in-service task for a given time slice, but until it has exhausted
- * its assigned budget. This change from the time to the service domain allows
- * BFQ to distribute the disk bandwidth among tasks as desired, without any
- * distortion due to ZBR, workload fluctuations or other factors. BFQ uses an
- * ad hoc internal scheduler, called B-WF2Q+, to schedule tasks according to
- * their budgets (more precisely BFQ schedules queues associated to tasks).
- * Thanks to this accurate scheduler, BFQ can afford to assign high budgets to
- * disk-bound non-seeky tasks (to boost the throughput), and yet guarantee low
- * latencies to interactive and soft real-time applications.
+ * BFQ is a proportional-share storage-I/O scheduling algorithm based on
+ * the slice-by-slice service scheme of CFQ. But BFQ assigns budgets,
+ * measured in number of sectors, to processes instead of time slices. The
+ * device is not granted to the in-service process for a given time slice,
+ * but until it has exhausted its assigned budget. This change from the time
+ * to the service domain allows BFQ to distribute the device throughput
+ * among processes as desired, without any distortion due to ZBR, workload
+ * fluctuations or other factors. BFQ uses an ad hoc internal scheduler,
+ * called B-WF2Q+, to schedule processes according to their budgets. More
+ * precisely, BFQ schedules queues associated to processes. Thanks to the
+ * accurate policy of B-WF2Q+, BFQ can afford to assign high budgets to
+ * I/O-bound processes issuing sequential requests (to boost the
+ * throughput), and yet guarantee a low latency to interactive and soft
+ * real-time applications.
  *
  * BFQ is described in [1], where also a reference to the initial, more
- * theoretical paper on BFQ can be found. The interested reader can find in
- * the latter paper full details on the main algorithm as well as formulas of
- * the guarantees, plus formal proofs of all the properties. With respect to
- * the version of BFQ presented in these papers, this implementation adds a
- * few more heuristics, such as the one that guarantees a low latency to soft
- * real-time applications, and a hierarchical extension based on H-WF2Q+.
+ * theoretical paper on BFQ can be found. The interested reader can find
+ * in the latter paper full details on the main algorithm, as well as
+ * formulas of the guarantees and formal proofs of all the properties.
+ * With respect to the version of BFQ presented in these papers, this
+ * implementation adds a few more heuristics, such as the one that
+ * guarantees a low latency to soft real-time applications, and a
+ * hierarchical extension based on H-WF2Q+.
  *
  * B-WF2Q+ is based on WF2Q+, that is described in [2], together with
  * H-WF2Q+, while the augmented tree used to implement B-WF2Q+ with O(log N)
@@ -115,18 +119,19 @@ struct kmem_cache *bfq_pool;
 #define BFQ_RATE_SHIFT		16
 
 /*
- * By default, BFQ computes the duration of the weight raising for interactive
- * applications automatically, using the following formula:
- * duration = (R / r) * T, where r is the peak rate of the device, and R and T
- * are two reference parameters.
- * In particular, R is the peak rate of the reference device (see below), and T
- * is a reference time: given the systems that are likely to be installed on
- * the reference device according to its speed class, T is about the maximum
- * time needed, under BFQ and while reading two files in parallel, to load
- * typical large applications on these systems.
- * In practice, the slower/faster the device at hand is, the more/less it takes
- * to load applications with respect to the reference device. Accordingly, the
- * longer/shorter BFQ grants weight raising to interactive applications.
+ * By default, BFQ computes the duration of the weight raising for
+ * interactive applications automatically, using the following formula:
+ * duration = (R / r) * T, where r is the peak rate of the device, and
+ * R and T are two reference parameters.
+ * In particular, R is the peak rate of the reference device (see below),
+ * and T is a reference time: given the systems that are likely to be
+ * installed on the reference device according to its speed class, T is
+ * about the maximum time needed, under BFQ and while reading two files in
+ * parallel, to load typical large applications on these systems.
+ * In practice, the slower/faster the device at hand is, the more/less it
+ * takes to load applications with respect to the reference device.
+ * Accordingly, the longer/shorter BFQ grants weight raising to interactive
+ * applications.
  *
  * BFQ uses four different reference pairs (R, T), depending on:
  * . whether the device is rotational or non-rotational;
@@ -148,8 +153,8 @@ static int R_slow[2] = {1536, 10752};
 static int R_fast[2] = {17415, 34791};
 /*
  * To improve readability, a conversion function is used to initialize the
- * following arrays, which entails that the latter can be initialized only
- * in a function.
+ * following arrays, which entails that they can be initialized only in a
+ * function.
  */
 static int T_slow[2];
 static int T_fast[2];
@@ -391,8 +396,8 @@ static inline bool bfq_differentiated_weights(struct bfq_data *bfqd)
  * weight-counter tree for the queues may contain at most one node.
  * This holds even if low_latency is on, because weight-raised queues
  * are not inserted in the tree.
- * In most scenarios, also the rate at which nodes are created/destroyed
- * should be low.
+ * In most scenarios, the rate at which nodes are created/destroyed
+ * should be low too.
  */
 static void bfq_weights_tree_add(struct bfq_data *bfqd,
 				 struct bfq_entity *entity,
@@ -454,7 +459,7 @@ static void bfq_weights_tree_remove(struct bfq_data *bfqd,
 {
 	/*
 	 * Check whether the entity is actually associated with a counter.
-	 * In fact, the device may be not be considered NCQ-capable for a while,
+	 * In fact, the device may not be considered NCQ-capable for a while,
 	 * which implies that no insertion in the weight trees is performed,
 	 * after which the device may start to be deemed NCQ-capable, and hence
 	 * this function may start to be invoked. This may cause the function
@@ -566,6 +571,12 @@ static inline unsigned int bfq_wr_duration(struct bfq_data *bfqd)
 	return dur;
 }
 
+static inline unsigned
+bfq_bfqq_cooperations(struct bfq_queue *bfqq)
+{
+	return bfqq->bic ? bfqq->bic->cooperations : 0;
+}
+
 static inline void
 bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_io_cq *bic)
 {
@@ -573,13 +584,18 @@ bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_io_cq *bic)
 		bfq_mark_bfqq_idle_window(bfqq);
 	else
 		bfq_clear_bfqq_idle_window(bfqq);
-	if (bic->wr_time_left && bfqq->bfqd->low_latency) {
+	if (bic->saved_IO_bound)
+		bfq_mark_bfqq_IO_bound(bfqq);
+	else
+		bfq_clear_bfqq_IO_bound(bfqq);
+	if (bic->wr_time_left && bfqq->bfqd->low_latency &&
+	    bic->cooperations < bfqq->bfqd->bfq_coop_thresh) {
 		/*
 		 * Start a weight raising period with the duration given by
 		 * the raising_time_left snapshot.
 		 */
 		if (bfq_bfqq_busy(bfqq))
-			bfqq->bfqd->raised_busy_queues++;
+			bfqq->bfqd->wr_busy_queues++;
 		bfqq->wr_coeff = bfqq->bfqd->bfq_wr_coeff;
 		bfqq->wr_cur_max_time = bic->wr_time_left;
 		bfqq->last_wr_start_finish = jiffies;
@@ -637,13 +653,28 @@ static void bfq_add_request(struct request *rq)
 
 	if (!bfq_bfqq_busy(bfqq)) {
 		int soft_rt = bfqd->bfq_wr_max_softrt_rate > 0 &&
+			bfq_bfqq_cooperations(bfqq) < bfqd->bfq_coop_thresh &&
 			time_is_before_jiffies(bfqq->soft_rt_next_start);
-		idle_for_long_time = time_is_before_jiffies(
+		idle_for_long_time = bfq_bfqq_cooperations(bfqq) <
+				     bfqd->bfq_coop_thresh &&
+			time_is_before_jiffies(
 			bfqq->budget_timeout +
 			bfqd->bfq_wr_min_idle_time);
 		entity->budget = max_t(unsigned long, bfqq->max_budget,
 				       bfq_serv_to_charge(next_rq, bfqq));
 
+		if (!bfq_bfqq_IO_bound(bfqq)) {
+			if (time_before(jiffies,
+					RQ_BIC(rq)->ttime.last_end_request +
+					bfqd->bfq_slice_idle)) {
+				bfqq->requests_within_timer++;
+				if (bfqq->requests_within_timer >=
+				    bfqd->bfq_requests_within_timer)
+					bfq_mark_bfqq_IO_bound(bfqq);
+			} else
+				bfqq->requests_within_timer = 0;
+		}
+
 		if (!bfqd->low_latency)
 			goto add_bfqq_busy;
 
@@ -674,15 +705,17 @@ static void bfq_add_request(struct request *rq)
 		} else if (old_wr_coeff > 1) {
 			if (idle_for_long_time)
 				bfqq->wr_cur_max_time = bfq_wr_duration(bfqd);
-			else if (bfqq->wr_cur_max_time ==
-				 bfqd->bfq_wr_rt_max_time &&
-				 !soft_rt) {
+			else if (bfq_bfqq_cooperations(bfqq) >=
+					bfqd->bfq_coop_thresh ||
+				 (bfqq->wr_cur_max_time ==
+				  bfqd->bfq_wr_rt_max_time &&
+				  !soft_rt)) {
 				bfqq->wr_coeff = 1;
 				bfq_log_bfqq(bfqd, bfqq,
 					"wrais ending at %lu, rais_max_time %u",
 					jiffies,
 					jiffies_to_msecs(bfqq->
-					wr_cur_max_time));
+						wr_cur_max_time));
 			} else if (time_before(
 					bfqq->last_wr_start_finish +
 					bfqq->wr_cur_max_time,
@@ -754,12 +787,12 @@ add_bfqq_busy:
 			bfqq->wr_coeff = bfqd->bfq_wr_coeff;
 			bfqq->wr_cur_max_time = bfq_wr_duration(bfqd);
 
-			bfqd->raised_busy_queues++;
+			bfqd->wr_busy_queues++;
 			entity->ioprio_changed = 1;
 			bfq_log_bfqq(bfqd, bfqq,
-		             "non-idle wrais starting at %lu, rais_max_time %u",
-			     jiffies,
-			     jiffies_to_msecs(bfqq->wr_cur_max_time));
+			    "non-idle wrais starting at %lu, rais_max_time %u",
+			    jiffies,
+			    jiffies_to_msecs(bfqq->wr_cur_max_time));
 		}
 		if (prev != bfqq->next_rq)
 			bfq_updated_next_req(bfqd, bfqq);
@@ -799,11 +832,12 @@ static void bfq_activate_request(struct request_queue *q, struct request *rq)
 		(long long unsigned)bfqd->last_position);
 }
 
-static void bfq_deactivate_request(struct request_queue *q, struct request *rq)
+static inline void bfq_deactivate_request(struct request_queue *q,
+					  struct request *rq)
 {
 	struct bfq_data *bfqd = q->elevator->elevator_data;
 
-	WARN_ON(bfqd->rq_in_driver == 0);
+	BUG_ON(bfqd->rq_in_driver == 0);
 	bfqd->rq_in_driver--;
 }
 
@@ -837,7 +871,7 @@ static void bfq_remove_request(struct request *rq)
 	}
 
 	if (rq->cmd_flags & REQ_META) {
-		WARN_ON(bfqq->meta_pending == 0);
+		BUG_ON(bfqq->meta_pending == 0);
 		bfqq->meta_pending--;
 	}
 }
@@ -879,8 +913,9 @@ static void bfq_merged_request(struct request_queue *q, struct request *req,
 		BUG_ON(next_rq == NULL);
 		bfqq->next_rq = next_rq;
 		/*
-		 * If next_rq changes, update both the queue's budget to fit
-		 * the new request and the queue's position in its rq_pos_tree.
+		 * If next_rq changes, update both the queue's budget to
+		 * fit the new request and the queue's position in its
+		 * rq_pos_tree.
 		 */
 		if (prev != bfqq->next_rq) {
 			bfq_updated_next_req(bfqd, bfqq);
@@ -914,7 +949,7 @@ static inline void bfq_bfqq_end_wr(struct bfq_queue *bfqq)
 {
 	BUG_ON(bfqq == NULL);
 	if (bfq_bfqq_busy(bfqq))
-		bfqq->bfqd->raised_busy_queues--;
+		bfqq->bfqd->wr_busy_queues--;
 	bfqq->wr_coeff = 1;
 	bfqq->wr_cur_max_time = 0;
 	/* Trigger a weight change on the next activation of the queue */
@@ -992,8 +1027,8 @@ static struct bfq_queue *bfqq_close(struct bfq_data *bfqd, sector_t sector)
 
 	/*
 	 * If the exact sector wasn't found, the parent of the NULL leaf
-	 * will contain the closest sector (rq_pos_tree sorted by next_request
-	 * position).
+	 * will contain the closest sector (rq_pos_tree sorted by
+	 * next_request position).
 	 */
 	__bfqq = rb_entry(parent, struct bfq_queue, pos_node);
 	if (bfq_rq_close_to_sector(__bfqq->next_rq, true, sector))
@@ -1103,24 +1138,26 @@ bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq)
 		new_bfqq->pid);
 
 	/*
-	 * Merging is just a redirection: the requests of the process owning
-	 * one of the two queues are redirected to the other queue. The latter
-	 * queue, in its turn, is set as shared if this is the first time that
-	 * the requests of some process are redirected to it.
+	 * Merging is just a redirection: the requests of the process
+	 * owning one of the two queues are redirected to the other queue.
+	 * The latter queue, in its turn, is set as shared if this is the
+	 * first time that the requests of some process are redirected to
+	 * it.
 	 *
 	 * We redirect bfqq to new_bfqq and not the opposite, because we
-	 * are in the context of the process owning bfqq, hence we have the
-	 * io_cq of this process. So we can immediately configure this io_cq
-	 * to redirect the requests of the process to new_bfqq.
+	 * are in the context of the process owning bfqq, hence we have
+	 * the io_cq of this process. So we can immediately configure this
+	 * io_cq to redirect the requests of the process to new_bfqq.
 	 *
 	 * NOTE, even if new_bfqq coincides with the in-service queue, the
-	 * io_cq of new_bfqq is not available, because, if the in-service queue
-	 * is shared, bfqd->in_service_bic may not point to the io_cq of the
-	 * in-service queue.
-	 * Redirecting the requests of the process owning bfqq to the currently
-	 * in-service queue is in any case the best option, as we feed the
-	 * in-service queue with new requests close to the last request served
-	 * and, by doing so, hopefully increase the throughput.
+	 * io_cq of new_bfqq is not available, because, if the in-service
+	 * queue is shared, bfqd->in_service_bic may not point to the
+	 * io_cq of the in-service queue.
+	 * Redirecting the requests of the process owning bfqq to the
+	 * currently in-service queue is in any case the best option, as
+	 * we feed the in-service queue with new requests close to the
+	 * last request served and, by doing so, hopefully increase the
+	 * throughput.
 	 */
 	bfqq->new_bfqq = new_bfqq;
 	atomic_add(process_refs, &new_bfqq->ref);
@@ -1128,8 +1165,8 @@ bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq)
 }
 
 /*
- * Attempt to schedule a merge of bfqq with the currently in-service queue or
- * with a close queue among the scheduled queues.
+ * Attempt to schedule a merge of bfqq with the currently in-service queue
+ * or with a close queue among the scheduled queues.
  * Return NULL if no merge was scheduled, a pointer to the shared bfq_queue
  * structure otherwise.
  */
@@ -1164,7 +1201,7 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq,
 	    bfq_bfqq_sync(in_service_bfqq) && bfq_bfqq_sync(bfqq)) {
 		new_bfqq = bfq_setup_merge(bfqq, in_service_bfqq);
 		if (new_bfqq != NULL)
-			return new_bfqq; /* Merge with the in-service queue */
+			return new_bfqq; /* Merge with in-service queue */
 	}
 
 	/*
@@ -1195,9 +1232,9 @@ bfq_bfqq_save_state(struct bfq_queue *bfqq)
 		/*
 		 * This is the queue of a just-started process, and would
 		 * deserve weight raising: we set wr_time_left to the full
-		 * weight-raising duration to trigger weight-raising when and
-		 * if the queue is split and the first request of the queue
-		 * is enqueued.
+		 * weight-raising duration to trigger weight-raising when
+		 * and if the queue is split and the first request of the
+		 * queue is enqueued.
 		 */
 		bfqq->bic->wr_time_left = bfq_wr_duration(bfqq->bfqd);
 	else if (bfqq->wr_coeff > 1) {
@@ -1219,13 +1256,16 @@ bfq_bfqq_save_state(struct bfq_queue *bfqq)
 		 * The bfq_queue is becoming shared or the requests of the
 		 * process owning the queue are being redirected to a shared
 		 * queue. Stop the weight raising period of the queue, as in
-		 * both cases it should not be owned by an interactive or soft
-		 * real-time application.
+		 * both cases it should not be owned by an interactive or
+		 * soft real-time application.
 		 */
 		bfq_bfqq_end_wr(bfqq);
 	} else
 		bfqq->bic->wr_time_left = 0;
 	bfqq->bic->saved_idle_window = bfq_bfqq_idle_window(bfqq);
+	bfqq->bic->saved_IO_bound = bfq_bfqq_IO_bound(bfqq);
+	bfqq->bic->cooperations++;
+	bfqq->bic->failed_cooperations = 0;
 }
 
 static inline void
@@ -1248,23 +1288,28 @@ bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic,
 	/* Save weight raising and idle window of the merged queues */
 	bfq_bfqq_save_state(bfqq);
 	bfq_bfqq_save_state(new_bfqq);
+	if (bfq_bfqq_IO_bound(bfqq))
+		bfq_mark_bfqq_IO_bound(new_bfqq);
+	bfq_clear_bfqq_IO_bound(bfqq);
 	/*
 	 * Grab a reference to the bic, to prevent it from being destroyed
 	 * before being possibly touched by a bfq_split_bfqq().
 	 */
 	bfq_get_bic_reference(bfqq);
 	bfq_get_bic_reference(new_bfqq);
-	/* Merge queues (that is, let bic redirect its requests to new_bfqq) */
+	/*
+	 * Merge queues (that is, let bic redirect its requests to new_bfqq)
+	 */
 	bic_set_bfqq(bic, new_bfqq, 1);
 	bfq_mark_bfqq_coop(new_bfqq);
 	/*
-	 * new_bfqq now belongs to at least two bics (it is a shared queue): set
-	 * new_bfqq->bic to NULL. bfqq either:
+	 * new_bfqq now belongs to at least two bics (it is a shared queue):
+	 * set new_bfqq->bic to NULL. bfqq either:
 	 * - does not belong to any bic any more, and hence bfqq->bic must
 	 *   be set to NULL, or
 	 * - is a queue whose owning bics have already been redirected to a
-	 *   different queue, hence the queue is destined to not belong to any
-	 *   bic soon and bfqq->bic is already NULL (therefore the next
+	 *   different queue, hence the queue is destined to not belong to
+	 *   any bic soon and bfqq->bic is already NULL (therefore the next
 	 *   assignment causes no harm).
 	 */
 	new_bfqq->bic = NULL;
@@ -1272,6 +1317,18 @@ bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic,
 	bfq_put_queue(bfqq);
 }
 
+static inline void bfq_bfqq_increase_failed_cooperations(struct bfq_queue *bfqq)
+{
+	struct bfq_io_cq *bic = bfqq->bic;
+	struct bfq_data *bfqd = bfqq->bfqd;
+
+	if (bic && bfq_bfqq_cooperations(bfqq) >= bfqd->bfq_coop_thresh) {
+		bic->failed_cooperations++;
+		if (bic->failed_cooperations >= bfqd->bfq_failed_cooperations)
+			bic->cooperations = 0;
+	}
+}
+
 static int bfq_allow_merge(struct request_queue *q, struct request *rq,
 			   struct bio *bio)
 {
@@ -1304,12 +1361,13 @@ static int bfq_allow_merge(struct request_queue *q, struct request *rq,
 		if (new_bfqq != NULL) {
 			bfq_merge_bfqqs(bfqd, bic, bfqq, new_bfqq);
 			/*
-			 * If we get here, the bio will be queued in the shared
-			 * queue, i.e., new_bfqq, so use new_bfqq to decide
-			 * whether bio and rq can be merged.
+			 * If we get here, the bio will be queued in the
+			 * shared queue, i.e., new_bfqq, so use new_bfqq
+			 * to decide whether bio and rq can be merged.
 			 */
 			bfqq = new_bfqq;
-		}
+		} else
+			bfq_bfqq_increase_failed_cooperations(bfqq);
 	}
 
 	return bfqq == RQ_BFQQ(rq);
@@ -1375,9 +1433,9 @@ static void bfq_arm_slice_timer(struct bfq_data *bfqd)
 	struct bfq_io_cq *bic;
 	unsigned long sl;
 
-	WARN_ON(!RB_EMPTY_ROOT(&bfqq->sort_list));
+	BUG_ON(!RB_EMPTY_ROOT(&bfqq->sort_list));
 
-	/* Tasks have exited, don't wait. */
+	/* Processes have exited, don't wait. */
 	bic = bfqd->in_service_bic;
 	if (bic == NULL || atomic_read(&bic->icq.ioc->active_ref) == 0)
 		return;
@@ -1452,9 +1510,9 @@ static void bfq_dispatch_insert(struct request_queue *q, struct request *rq)
 	 * We execute instead this instruction before bfq_remove_request()
 	 * (and hence introduce a temporary inconsistency), for efficiency.
 	 * In fact, in a forced_dispatch, this prevents two counters related
-	 * to bfqq->dispatched to risk to be uselessly decremented if bfqq is
-	 * not in service, and then to be incremented again after incrementing
-	 * bfqq->dispatched.
+	 * to bfqq->dispatched to risk to be uselessly decremented if bfqq
+	 * is not in service, and then to be incremented again after
+	 * incrementing bfqq->dispatched.
 	 */
 	bfqq->dispatched++;
 	bfq_remove_request(rq);
@@ -1510,9 +1568,9 @@ static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq)
 
 	if (RB_EMPTY_ROOT(&bfqq->sort_list)) {
 		/*
-		 * overloading budget_timeout field to store when
-		 * the queue remains with no backlog, used by
-		 * the weight-raising mechanism
+		 * Overloading budget_timeout field to store the time
+		 * at which the queue remains with no backlog; used by
+		 * the weight-raising mechanism.
 		 */
 		bfqq->budget_timeout = jiffies;
 		bfq_del_bfqq_busy(bfqd, bfqq, 1);
@@ -1795,10 +1853,10 @@ static int bfq_update_peak_rate(struct bfq_data *bfqd, struct bfq_queue *bfqq,
 }
 
 /*
- * To be deemed as soft real-time, an application must meet two requirements.
- * First, the application must not require an average bandwidth higher than
- * the approximate bandwidth required to playback or record a compressed high-
- * definition video.
+ * To be deemed as soft real-time, an application must meet two
+ * requirements. First, the application must not require an average
+ * bandwidth higher than the approximate bandwidth required to playback or
+ * record a compressed high-definition video.
  * The next function is invoked on the completion of the last request of a
  * batch, to compute the next-start time instant, soft_rt_next_start, such
  * that, if the next request of the application does not arrive before
@@ -1809,30 +1867,31 @@ static int bfq_update_peak_rate(struct bfq_data *bfqd, struct bfq_queue *bfqq,
  * the application stops issuing new requests until all its pending requests
  * have been completed. After that, the application may issue a new batch,
  * and so on.
- * For this reason the next function is invoked to compute soft_rt_next_start
- * only for applications that meet this requirement, whereas soft_rt_next_start
- * is set to infinity for applications that do not.
+ * For this reason the next function is invoked to compute
+ * soft_rt_next_start only for applications that meet this requirement,
+ * whereas soft_rt_next_start is set to infinity for applications that do
+ * not.
  *
  * Unfortunately, even a greedy application may happen to behave in an
- * isochronous way if the CPU load is high. In fact, the application may stop
- * issuing requests while the CPUs are busy serving other processes, then
- * restart, then stop again for a while, and so on. In addition, if the disk
- * achieves a low enough throughput with the request pattern issued by the
- * application (e.g., because the request pattern is random and/or the device
- * is slow), then the application may meet the above bandwidth requirement too.
- * To prevent such a greedy application to be deemed as soft real-time, a
- * further rule is used in the computation of soft_rt_next_start:
- * soft_rt_next_start must be higher than the current time plus the maximum
- * time for which the arrival of a request is waited for when a sync queue
- * becomes idle, namely bfqd->bfq_slice_idle.
- * This filters out greedy applications, as the latter issue instead their next
- * request as soon as possible after the last one has been completed (in
- * contrast, when a batch of requests is completed, a soft real-time application
- * spends some time processing data).
+ * isochronous way if the CPU load is high. In fact, the application may
+ * stop issuing requests while the CPUs are busy serving other processes,
+ * then restart, then stop again for a while, and so on. In addition, if
+ * the disk achieves a low enough throughput with the request pattern
+ * issued by the application (e.g., because the request pattern is random
+ * and/or the device is slow), then the application may meet the above
+ * bandwidth requirement too. To prevent such a greedy application to be
+ * deemed as soft real-time, a further rule is used in the computation of
+ * soft_rt_next_start: soft_rt_next_start must be higher than the current
+ * time plus the maximum time for which the arrival of a request is waited
+ * for when a sync queue becomes idle, namely bfqd->bfq_slice_idle.
+ * This filters out greedy applications, as the latter issue instead their
+ * next request as soon as possible after the last one has been completed
+ * (in contrast, when a batch of requests is completed, a soft real-time
+ * application spends some time processing data).
  *
- * Unfortunately, the last filter may easily generate false positives if only
- * bfqd->bfq_slice_idle is used as a reference time interval and one or both
- * the following cases occur:
+ * Unfortunately, the last filter may easily generate false positives if
+ * only bfqd->bfq_slice_idle is used as a reference time interval and one
+ * or both the following cases occur:
  * 1) HZ is so low that the duration of a jiffy is comparable to or higher
  *    than bfqd->bfq_slice_idle. This happens, e.g., on slow devices with
  *    HZ=100.
@@ -1841,8 +1900,9 @@ static int bfq_update_peak_rate(struct bfq_data *bfqd, struct bfq_queue *bfqq,
  *    increments. This seems to happen, e.g., inside virtual machines.
  * To address this issue, we do not use as a reference time interval just
  * bfqd->bfq_slice_idle, but bfqd->bfq_slice_idle plus a few jiffies. In
- * particular we add the minimum number of jiffies for which the filter seems
- * to be quite precise also in embedded systems and KVM/QEMU virtual machines.
+ * particular we add the minimum number of jiffies for which the filter
+ * seems to be quite precise also in embedded systems and KVM/QEMU virtual
+ * machines.
  */
 static inline unsigned long bfq_bfqq_softrt_next_start(struct bfq_data *bfqd,
 						       struct bfq_queue *bfqq)
@@ -1910,7 +1970,7 @@ static void bfq_bfqq_expire(struct bfq_data *bfqd,
 	 * As above explained, 'punish' slow (i.e., seeky), timed-out
 	 * and async queues, to favor sequential sync workloads.
 	 *
-	 * Processes doing IO in the slower disk zones will tend to be
+	 * Processes doing I/O in the slower disk zones will tend to be
 	 * slow(er) even if not seeky. Hence, since the estimated peak
 	 * rate is actually an average over the disk surface, these
 	 * processes may timeout just for bad luck. To avoid punishing
@@ -1930,6 +1990,10 @@ static void bfq_bfqq_expire(struct bfq_data *bfqd,
 			bfqd->const_seeky_busy_in_flight_queues++;
 	}
 
+	if (reason == BFQ_BFQQ_TOO_IDLE &&
+	    bfqq->entity.service <= 2 * bfqq->entity.budget / 10 )
+		bfq_clear_bfqq_IO_bound(bfqq);
+
 	if (bfqd->low_latency && bfqq->wr_coeff == 1)
 		bfqq->last_wr_start_finish = jiffies;
 
@@ -1938,11 +2002,12 @@ static void bfq_bfqq_expire(struct bfq_data *bfqd,
 		/*
 		 * If we get here, and there are no outstanding requests,
 		 * then the request pattern is isochronous (see the comments
-		 * to the function bfq_bfqq_softrt_next_start()). Hence we can
-		 * compute soft_rt_next_start. If, instead, the queue still
-		 * has outstanding requests, then we have to wait for the
-		 * completion of all the outstanding requests to discover
-		 * whether the request pattern is actually isochronous.
+		 * to the function bfq_bfqq_softrt_next_start()). Hence we
+		 * can compute soft_rt_next_start. If, instead, the queue
+		 * still has outstanding requests, then we have to wait
+		 * for the completion of all the outstanding requests to
+		 * discover whether the request pattern is actually
+		 * isochronous.
 		 */
 		if (bfqq->dispatched == 0)
 			bfqq->soft_rt_next_start =
@@ -1974,10 +2039,13 @@ static void bfq_bfqq_expire(struct bfq_data *bfqd,
 	}
 
 	bfq_log_bfqq(bfqd, bfqq,
-		"expire (%d, slow %d, num_disp %d, idle_win %d)", reason, slow,
-		bfqq->dispatched, bfq_bfqq_idle_window(bfqq));
+		"expire (%d, slow %d, num_disp %d, idle_win %d)", reason,
+		slow, bfqq->dispatched, bfq_bfqq_idle_window(bfqq));
 
-	/* Increase, decrease or leave budget unchanged according to reason */
+	/*
+	 * Increase, decrease or leave budget unchanged according to
+	 * reason.
+	 */
 	__bfq_bfqq_recalc_budget(bfqd, bfqq, reason);
 	__bfq_bfqq_expire(bfqd, bfqq);
 }
@@ -2018,58 +2086,63 @@ static inline int bfq_may_expire_for_budg_timeout(struct bfq_queue *bfqq)
 }
 
 /*
- * Device idling is allowed only for the queues for which this function returns
- * true. For this reason, the return value of this function plays a critical
- * role for both throughput boosting and service guarantees. This return value
- * is computed through a logical expression. In this rather long comment, we
- * try to briefly describe all the details and motivations behind the
- * components of this logical expression.
+ * Device idling is allowed only for the queues for which this function
+ * returns true. For this reason, the return value of this function plays a
+ * critical role for both throughput boosting and service guarantees. The
+ * return value is computed through a logical expression. In this rather
+ * long comment, we try to briefly describe all the details and motivations
+ * behind the components of this logical expression.
  *
- * First, the expression may be true only for sync queues. Besides, if bfqq is
- * also being weight-raised, then the expression always evaluates to true, as
- * device idling is instrumental for preserving low-latency guarantees
- * (see [1]). Otherwise, the expression evaluates to true only if bfqq has
- * a non-null idle window and either the device is not performing NCQ
- * (because, when both of the last two conditions hold, idling most certainly
- * boosts the throughput), or the following compound condition is true.
+ * First, the expression may be true only for sync queues. Besides, if
+ * bfqq is also being weight-raised, then the expression always evaluates
+ * to true, as device idling is instrumental for preserving low-latency
+ * guarantees (see [1]). Otherwise, the expression evaluates to true only
+ * if bfqq has a non-null idle window and at least one of the following
+ * two conditions holds. The first condition is that the device is not
+ * performing NCQ, because idling the device most certainly boosts the
+ * throughput if this condition holds and bfqq has been granted a non-null
+ * idle window. The second compound condition is made of the logical AND of
+ * two components.
  *
- * The compound condition contains a first component that lets the whole
- * compound condition evaluate to false if there is at least one
- * weight-raised busy queue. This guarantees that, in this case, the device
- * is not idled for a sync non-weight-raised queue. The latter is then expired
- * immediately if empty. Combined with the timestamping rules of BFQ (see [1]
- * for details), this causes sync non-weight-raised queues to get a lower
- * number of requests served, and hence to ask for a lower number of requests
- * from the request pool, before the busy weight-raised queues get served
- * again.
+ * The first component is true only if there is no weight-raised busy
+ * queue. This guarantees that the device is not idled for a sync non-
+ * weight-raised queue when there are busy weight-raised queues. The former
+ * is then expired immediately if empty. Combined with the timestamping
+ * rules of BFQ (see [1] for details), this causes sync non-weight-raised
+ * queues to get a lower number of requests served, and hence to ask for a
+ * lower number of requests from the request pool, before the busy weight-
+ * raised queues get served again.
  *
- * This is beneficial for the processes associated with weight-raised queues,
- * when the system operates in request-pool saturation conditions (e.g., in
- * the presence of write hogs). In fact, if the processes associated with the
- * other queues ask for requests at a lower rate, then weight-raised processes
- * have a higher probability to get a request from the pool immediately (or at
+ * This is beneficial for the processes associated with weight-raised
+ * queues, when the request pool is saturated (e.g., in the presence of
+ * write hogs). In fact, if the processes associated with the other queues
+ * ask for requests at a lower rate, then weight-raised processes have a
+ * higher probability to get a request from the pool immediately (or at
  * least soon) when they need one. Hence they have a higher probability to
- * actually get a fraction of the disk throughput proportional to their high
- * weight. This is especially true with NCQ-enabled drives, which enqueue
- * several requests in advance and further reorder internally-queued requests.
+ * actually get a fraction of the disk throughput proportional to their
+ * high weight. This is especially true with NCQ-capable drives, which
+ * enqueue several requests in advance and further reorder internally-
+ * queued requests.
  *
- * In the end, mistreating non-weight-raised queues when there are busy weight-
- * raised queues seems to mitigate starvation problems in the presence of heavy
- * write workloads and NCQ, and hence to guarantee a higher application and
- * system responsiveness in these hostile scenarios.
+ * In the end, mistreating non-weight-raised queues when there are busy
+ * weight-raised queues seems to mitigate starvation problems in the
+ * presence of heavy write workloads and NCQ, and hence to guarantee a
+ * higher application and system responsiveness in these hostile scenarios.
  *
- * If the first component of the compound condition is instead true (i.e.,
- * there is no weight-raised busy queue), then the rest of the compound
- * condition takes into account service-guarantee and throughput issues.
+ * If the first component of the compound condition is instead true, i.e.,
+ * there is no weight-raised busy queue, then the second component of the
+ * compound condition takes into account service-guarantee and throughput
+ * issues related to NCQ (recall that the compound condition is evaluated
+ * only if the device is detected as supporting NCQ).
  *
  * As for service guarantees, allowing the drive to enqueue more than one
- * request at a time, and hence delegating de facto final scheduling decisions
- * to the drive's internal scheduler, causes loss of control on the actual
- * request service order. In this respect, when the drive is allowed to
- * enqueue more than one request at a time, the service distribution enforced
- * by the drive's internal scheduler is likely to coincide with the desired
- * device-throughput distribution only in the following, perfectly symmetric,
- * scenario:
+ * request at a time, and hence delegating de facto final scheduling
+ * decisions to the drive's internal scheduler, causes loss of control on
+ * the actual request service order. In this respect, when the drive is
+ * allowed to enqueue more than one request at a time, the service
+ * distribution enforced by the drive's internal scheduler is likely to
+ * coincide with the desired device-throughput distribution only in the
+ * following, perfectly symmetric, scenario:
  * 1) all active queues have the same weight,
  * 2) all active groups at the same level in the groups tree have the same
  *    weight,
@@ -2078,59 +2151,61 @@ static inline int bfq_may_expire_for_budg_timeout(struct bfq_queue *bfqq)
  *
  * Even in such a scenario, sequential I/O may still receive a preferential
  * treatment, but this is not likely to be a big issue with flash-based
- * devices, because of their non-dramatic loss of throughput with random I/O.
- * Things do differ with HDDs, for which additional care is taken, as
+ * devices, because of their non-dramatic loss of throughput with random
+ * I/O. Things do differ with HDDs, for which additional care is taken, as
  * explained after completing the discussion for flash-based devices.
  *
- * Unfortunately, keeping the necessary state for evaluating exactly the above
- * symmetry conditions would be quite complex and time consuming. Therefore BFQ
- * evaluates instead the following stronger sub-conditions, for which it is
- * much easier to maintain the needed state:
+ * Unfortunately, keeping the necessary state for evaluating exactly the
+ * above symmetry conditions would be quite complex and time-consuming.
+ * Therefore BFQ evaluates instead the following stronger sub-conditions,
+ * for which it is much easier to maintain the needed state:
  * 1) all active queues have the same weight,
  * 2) all active groups have the same weight,
  * 3) all active groups have at most one active child each.
  * In particular, the last two conditions are always true if hierarchical
  * support and the cgroups interface are not enabled, hence no state needs
- * to be maintained.
+ * to be maintained in this case.
  *
- * According to the above considerations, the compound condition evaluates
- * to true and hence idling is performed if any of the above symmetry
- * sub-condition does not hold. These are the only sub-conditions considered
- * if the device is flash-based, as, for such a device, it is sensible to
- * force idling only for service-guarantee issues. In fact, as for throughput,
- * idling NCQ-capable flash-based devices would not boost the throughput even
- * with sequential I/O; rather it would lower the throughput in proportion to
- * how fast the device is. In the end, (only) if all the three sub-conditions
- * hold and the device is flash-based, then the compound condition evaluates
- * to false and hence no idling is performed.
+ * According to the above considerations, the second component of the
+ * compound condition evaluates to true if any of the above symmetry
+ * sub-condition does not hold, or the device is not flash-based. Therefore,
+ * if also the first component is true, then idling is allowed for a sync
+ * queue. These are the only sub-conditions considered if the device is
+ * flash-based, as, for such a device, it is sensible to force idling only
+ * for service-guarantee issues. In fact, as for throughput, idling
+ * NCQ-capable flash-based devices would not boost the throughput even
+ * with sequential I/O; rather it would lower the throughput in proportion
+ * to how fast the device is. In the end, (only) if all the three
+ * sub-conditions hold and the device is flash-based, the compound
+ * condition evaluates to false and therefore no idling is performed.
  *
- * As already said, things change with a rotational device, where idling boosts
- * the throughput with sequential I/O (even with NCQ). Hence, for such a device
- * the compound condition evaluates to true and idling is performed also if the
- * following additional sub-condition does not hold: the queue is (constantly)
- * seeky. Unfortunately, this different behavior with respect to flash-based
- * devices causes an additional asymmetry: if some sync queues enjoy idling and
- * some other sync queues do not, then the latter get a low share of the device
- * bandwidth, simply because the former get many requests served after being
- * set as in service, whereas the latter do not. As a consequence, to
- * guarantee the desired bandwidth distribution, on HDDs the compound
- * expression evaluates to true (and hence device idling is performed) also
- * if the following last symmetry condition does not hold: no other queue is
- * benefiting from idling.
- * Also this last condition is actually replaced with a simpler-to-maintain
- * and stronger condition: there is no busy queue which is not seeky (and
- * hence may also benefit from idling).
+ * As already said, things change with a rotational device, where idling
+ * boosts the throughput with sequential I/O (even with NCQ). Hence, for
+ * such a device the second component of the compound condition evaluates
+ * to true also if the following additional sub-condition does not hold:
+ * the queue is constantly seeky. Unfortunately, this different behavior
+ * with respect to flash-based devices causes an additional asymmetry: if
+ * some sync queues enjoy idling and some other sync queues do not, then
+ * the latter get a low share of the device throughput, simply because the
+ * former get many requests served after being set as in service, whereas
+ * the latter do not. As a consequence, to guarantee the desired throughput
+ * distribution, on HDDs the compound expression evaluates to true (and
+ * hence device idling is performed) also if the following last symmetry
+ * condition does not hold: no other queue is benefiting from idling. Also
+ * this last condition is actually replaced with a simpler-to-maintain and
+ * stronger condition: there is no busy queue which is not constantly seeky
+ * (and hence may also benefit from idling).
  *
  * To sum up, when all the required symmetry and throughput-boosting
- * sub-conditions hold, the compound condition evaluates to false, and hence
- * no idling is performed. This helps to keep the drives' internal queues full
- * on NCQ-capable devices, and hence to boost the throughput, without causing
- * 'almost' any loss of service guarantees. The 'almost' follows from the fact
- * that, if the internal queue of one such device is filled while all the
- * sub-conditions hold, but at some point in time some sub-condition stops to
- * hold, then it may become impossible to let requests be served in the new
- * desired order until all the requests already queued in the device have been
- * served.
+ * sub-conditions hold, the second component of the compound condition
+ * evaluates to false, and hence no idling is performed. This helps to
+ * keep the drives' internal queues full on NCQ-capable devices, and hence
+ * to boost the throughput, without causing 'almost' any loss of service
+ * guarantees. The 'almost' follows from the fact that, if the internal
+ * queue of one such device is filled while all the sub-conditions hold,
+ * but at some point in time some sub-condition stops to hold, then it may
+ * become impossible to let requests be served in the new desired order
+ * until all the requests already queued in the device have been served.
  */
 static inline bool bfq_bfqq_must_not_expire(struct bfq_queue *bfqq)
 {
@@ -2149,29 +2224,29 @@ static inline bool bfq_bfqq_must_not_expire(struct bfq_queue *bfqq)
  * the device).
  */
 #define cond_for_expiring_non_wr  (bfqd->hw_tag && \
-				   (bfqd->raised_busy_queues > 0 || \
+				   (bfqd->wr_busy_queues > 0 || \
 				    (symmetric_scenario && \
 				     (blk_queue_nonrot(bfqd->queue) || \
 				      cond_for_seeky_on_ncq_hdd))))
 
-	return bfq_bfqq_sync(bfqq) && (
-		bfqq->wr_coeff > 1 ||
-		(bfq_bfqq_idle_window(bfqq) &&
-		 !cond_for_expiring_non_wr)
+	return bfq_bfqq_sync(bfqq) &&
+		(bfq_bfqq_IO_bound(bfqq) || bfqq->wr_coeff > 1) &&
+		(bfqq->wr_coeff > 1 ||
+		 (bfq_bfqq_idle_window(bfqq) &&
+		  !cond_for_expiring_non_wr)
 	);
 }
 
 /*
- * If the in-service queue is empty, but it is sync and either of the following
- * conditions holds, then: 1) the queue must remain in service and cannot be
- * expired, and 2) the disk must be idled to wait for the possible arrival
- * of a new request for the queue. The conditions are:
- * - the device is rotational and not performing NCQ, and the queue has its
- *   idle window set (in this case, waiting for a new request for the queue
- *   is likely to boost the disk throughput);
- * - the queue is weight-raised (waiting for the request is necessary to
- *   provide the queue with fairness and latency guarantees, see [1] for
- *   details).
+ * If the in-service queue is empty but sync, and the function
+ * bfq_bfqq_must_not_expire returns true, then:
+ * 1) the queue must remain in service and cannot be expired, and
+ * 2) the disk must be idled to wait for the possible arrival of a new
+ *    request for the queue.
+ * See the comments to the function bfq_bfqq_must_not_expire for the reasons
+ * why performing device idling is the best choice to boost the throughput
+ * and preserve service guarantees when bfq_bfqq_must_not_expire itself
+ * returns true.
  */
 static inline bool bfq_bfqq_must_idle(struct bfq_queue *bfqq)
 {
@@ -2214,16 +2289,18 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd)
 			goto expire;
 		} else {
 			/*
-			 * The idle timer may be pending because we may not
-			 * disable disk idling even when a new request arrives
+			 * The idle timer may be pending because we may
+			 * not disable disk idling even when a new request
+			 * arrives.
 			 */
 			if (timer_pending(&bfqd->idle_slice_timer)) {
 				/*
 				 * If we get here: 1) at least a new request
 				 * has arrived but we have not disabled the
 				 * timer because the request was too small,
-				 * 2) then the block layer has unplugged the
-				 * device, causing the dispatch to be invoked.
+				 * 2) then the block layer has unplugged
+				 * the device, causing the dispatch to be
+				 * invoked.
 				 *
 				 * Since the device is unplugged, now the
 				 * requests are probably large enough to
@@ -2238,9 +2315,9 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd)
 	}
 
 	/*
-	 * No requests pending.  If the in-service queue still has requests in
-	 * flight (possibly waiting for a completion) or is idling for a new
-	 * request, then keep it.
+	 * No requests pending.  If the in-service queue still has requests
+	 * in flight (possibly waiting for a completion) or is idling for a
+	 * new request, then keep it.
 	 */
 	if (timer_pending(&bfqd->idle_slice_timer) ||
 	    (bfqq->dispatched != 0 && bfq_bfqq_must_not_expire(bfqq))) {
@@ -2259,36 +2336,36 @@ keep_queue:
 	return bfqq;
 }
 
-static void bfq_update_raising_data(struct bfq_data *bfqd,
-				    struct bfq_queue *bfqq)
+static void bfq_update_wr_data(struct bfq_data *bfqd, struct bfq_queue *bfqq)
 {
 	struct bfq_entity *entity = &bfqq->entity;
 	if (bfqq->wr_coeff > 1) { /* queue is being weight-raised */
 		bfq_log_bfqq(bfqd, bfqq,
-		"raising period dur %u/%u msec, old raising coeff %u, w %d(%d)",
-		jiffies_to_msecs(jiffies -
-			bfqq->last_wr_start_finish),
-		jiffies_to_msecs(bfqq->wr_cur_max_time),
-		bfqq->wr_coeff,
-		bfqq->entity.weight, bfqq->entity.orig_weight);
+			"raising period dur %u/%u msec, old coeff %u, w %d(%d)",
+			jiffies_to_msecs(jiffies - bfqq->last_wr_start_finish),
+			jiffies_to_msecs(bfqq->wr_cur_max_time),
+			bfqq->wr_coeff,
+			bfqq->entity.weight, bfqq->entity.orig_weight);
 
 		BUG_ON(bfqq != bfqd->in_service_queue && entity->weight !=
-			entity->orig_weight * bfqq->wr_coeff);
+		       entity->orig_weight * bfqq->wr_coeff);
 		if (entity->ioprio_changed)
-			bfq_log_bfqq(bfqd, bfqq,
-			"WARN: pending prio change");
+			bfq_log_bfqq(bfqd, bfqq, "WARN: pending prio change");
+
 		/*
 		 * If too much time has elapsed from the beginning
-		 * of this weight-raising period, stop it.
+		 * of this weight-raising period, or the queue has
+		 * exceeded the acceptable number of cooperations,
+		 * stop it.
 		 */
-		if (time_is_before_jiffies(bfqq->last_wr_start_finish +
+		if (bfq_bfqq_cooperations(bfqq) >= bfqd->bfq_coop_thresh ||
+		    time_is_before_jiffies(bfqq->last_wr_start_finish +
 					   bfqq->wr_cur_max_time)) {
 			bfqq->last_wr_start_finish = jiffies;
 			bfq_log_bfqq(bfqd, bfqq,
 				     "wrais ending at %lu, rais_max_time %u",
 				     bfqq->last_wr_start_finish,
-				     jiffies_to_msecs(bfqq->
-					wr_cur_max_time));
+				     jiffies_to_msecs(bfqq->wr_cur_max_time));
 			bfq_bfqq_end_wr(bfqq);
 		}
 	}
@@ -2320,20 +2397,18 @@ static int bfq_dispatch_request(struct bfq_data *bfqd,
 
 	if (service_to_charge > bfq_bfqq_budget_left(bfqq)) {
 		/*
-		 * This may happen if the next rq is chosen
-		 * in fifo order instead of sector order.
-		 * The budget is properly dimensioned
-		 * to be always sufficient to serve the next request
-		 * only if it is chosen in sector order. The reason is
-		 * that it would be quite inefficient and little useful
-		 * to always make sure that the budget is large enough
-		 * to serve even the possible next rq in fifo order.
+		 * This may happen if the next rq is chosen in fifo order
+		 * instead of sector order. The budget is properly
+		 * dimensioned to be always sufficient to serve the next
+		 * request only if it is chosen in sector order. The reason
+		 * is that it would be quite inefficient and little useful
+		 * to always make sure that the budget is large enough to
+		 * serve even the possible next rq in fifo order.
 		 * In fact, requests are seldom served in fifo order.
 		 *
-		 * Expire the queue for budget exhaustion, and
-		 * make sure that the next act_budget is enough
-		 * to serve the next request, even if it comes
-		 * from the fifo expired path.
+		 * Expire the queue for budget exhaustion, and make sure
+		 * that the next act_budget is enough to serve the next
+		 * request, even if it comes from the fifo expired path.
 		 */
 		bfqq->next_rq = rq;
 		/*
@@ -2349,7 +2424,7 @@ static int bfq_dispatch_request(struct bfq_data *bfqd,
 	bfq_bfqq_served(bfqq, service_to_charge);
 	bfq_dispatch_insert(bfqd->queue, rq);
 
-	bfq_update_raising_data(bfqd, bfqq);
+	bfq_update_wr_data(bfqd, bfqq);
 
 	bfq_log_bfqq(bfqd, bfqq,
 			"dispatched %u sec req (%llu), budg left %lu",
@@ -2390,8 +2465,8 @@ static int __bfq_forced_dispatch_bfqq(struct bfq_queue *bfqq)
 }
 
 /*
- * Drain our current requests.  Used for barriers and when switching
- * io schedulers on-the-fly.
+ * Drain our current requests.
+ * Used for barriers and when switching io schedulers on-the-fly.
  */
 static int bfq_forced_dispatch(struct bfq_data *bfqd)
 {
@@ -2507,10 +2582,8 @@ static void bfq_put_cooperator(struct bfq_queue *bfqq)
 	 */
 	__bfqq = bfqq->new_bfqq;
 	while (__bfqq) {
-		if (__bfqq == bfqq) {
-			WARN(1, "bfqq->new_bfqq loop detected.\n");
+		if (__bfqq == bfqq)
 			break;
-		}
 		next = __bfqq->new_bfqq;
 		bfq_put_queue(__bfqq);
 		__bfqq = next;
@@ -2532,7 +2605,7 @@ static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq)
 	bfq_put_queue(bfqq);
 }
 
-static void bfq_init_icq(struct io_cq *icq)
+static inline void bfq_init_icq(struct io_cq *icq)
 {
 	struct bfq_io_cq *bic = icq_to_bic(icq);
 
@@ -2622,11 +2695,6 @@ static void bfq_init_prio_data(struct bfq_queue *bfqq, struct bfq_io_cq *bic)
 
 	bfqq->entity.ioprio_changed = 1;
 
-	/*
-	 * Keep track of original prio settings in case we have to temporarily
-	 * elevate the priority of this queue.
-	 */
-	bfqq->org_ioprio = bfqq->entity.new_ioprio;
 	bfq_clear_bfqq_prio_changed(bfqq);
 }
 
@@ -2641,8 +2709,8 @@ static void bfq_changed_ioprio(struct bfq_io_cq *bic)
 	bfqd = bfq_get_bfqd_locked(&(bic->icq.q->elevator->elevator_data),
 				   &flags);
 	/*
-	 * This condition may trigger on a newly created bic, be sure to drop
-	 * the lock before returning.
+	 * This condition may trigger on a newly created bic, be sure to
+	 * drop the lock before returning.
 	 */
 	if (unlikely(bfqd == NULL) || likely(bic->ioprio == ioprio))
 		goto out;
@@ -2688,6 +2756,7 @@ static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
 			bfq_mark_bfqq_idle_window(bfqq);
 		bfq_mark_bfqq_sync(bfqq);
 	}
+	bfq_mark_bfqq_IO_bound(bfqq);
 
 	/* Tentative initial value to trade off between thr and lat */
 	bfqq->max_budget = (2 * bfq_max_budget(bfqd)) / 3;
@@ -2793,7 +2862,8 @@ static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd,
 		bfqq = bfq_find_alloc_queue(bfqd, bfqg, is_sync, bic, gfp_mask);
 
 	/*
-	 * Pin the queue now that it's allocated, scheduler exit will prune it.
+	 * Pin the queue now that it's allocated, scheduler exit will
+	 * prune it.
 	 */
 	if (!is_sync && *async_bfqq == NULL) {
 		atomic_inc(&bfqq->ref);
@@ -3006,7 +3076,8 @@ static void bfq_insert_request(struct request_queue *q, struct request *rq)
 						bfqq, new_bfqq);
 			rq->elv.priv[1] = new_bfqq;
 			bfqq = new_bfqq;
-		}
+		} else
+			bfq_bfqq_increase_failed_cooperations(bfqq);
 	}
 
 	bfq_init_prio_data(bfqq, RQ_BIC(rq));
@@ -3063,8 +3134,8 @@ static void bfq_completed_request(struct request_queue *q, struct request *rq)
 
 	bfq_update_hw_tag(bfqd);
 
-	WARN_ON(!bfqd->rq_in_driver);
-	WARN_ON(!bfqq->dispatched);
+	BUG_ON(!bfqd->rq_in_driver);
+	BUG_ON(!bfqq->dispatched);
 	bfqd->rq_in_driver--;
 	bfqq->dispatched--;
 
@@ -3075,8 +3146,8 @@ static void bfq_completed_request(struct request_queue *q, struct request *rq)
 			BUG_ON(!bfqd->busy_in_flight_queues);
 			bfqd->busy_in_flight_queues--;
 			if (bfq_bfqq_constantly_seeky(bfqq)) {
-				BUG_ON(
-				   !bfqd->const_seeky_busy_in_flight_queues);
+				BUG_ON(!bfqd->
+					const_seeky_busy_in_flight_queues);
 				bfqd->const_seeky_busy_in_flight_queues--;
 			}
 		}
@@ -3145,9 +3216,9 @@ static int bfq_may_queue(struct request_queue *q, int rw)
 
 	/*
 	 * Don't force setup of a queue from here, as a call to may_queue
-	 * does not necessarily imply that a request actually will be queued.
-	 * So just lookup a possibly existing queue, or return 'may queue'
-	 * if that fails.
+	 * does not necessarily imply that a request actually will be
+	 * queued. So just lookup a possibly existing queue, or return
+	 * 'may queue' if that fails.
 	 */
 	bic = bfq_bic_lookup(bfqd, tsk->io_context);
 	if (bic == NULL)
@@ -3273,9 +3344,9 @@ new_queue:
 		if (split) {
 			bfq_mark_bfqq_just_split(bfqq);
 			/*
-			 * If the queue has just been split from a shared queue,
-			 * restore the idle window and the possible weight
-			 * raising period.
+			 * If the queue has just been split from a shared
+			 * queue, restore the idle window and the possible
+			 * weight raising period.
 			 */
 			bfq_bfqq_resume_state(bfqq, bic);
 		}
@@ -3318,12 +3389,12 @@ static void bfq_idle_slice_timer(unsigned long data)
 
 	bfqq = bfqd->in_service_queue;
 	/*
-	 * Theoretical race here: the in-service queue can be NULL or different
-	 * from the queue that was idling if the timer handler spins on
-	 * the queue_lock and a new request arrives for the current
-	 * queue and there is a full dispatch cycle that changes the
-	 * in-service queue.  This can hardly happen, but in the worst case
-	 * we just expire a queue too early.
+	 * Theoretical race here: the in-service queue can be NULL or
+	 * different from the queue that was idling if the timer handler
+	 * spins on the queue_lock and a new request arrives for the
+	 * current queue and there is a full dispatch cycle that changes
+	 * the in-service queue.  This can hardly happen, but in the worst
+	 * case we just expire a queue too early.
 	 */
 	if (bfqq != NULL) {
 		bfq_log_bfqq(bfqd, bfqq, "slice_timer expired");
@@ -3337,9 +3408,9 @@ static void bfq_idle_slice_timer(unsigned long data)
 		else if (bfqq->queued[0] == 0 && bfqq->queued[1] == 0)
 			/*
 			 * The queue may not be empty upon timer expiration,
-			 * because we may not disable the timer when the first
-			 * request of the in-service queue arrives during
-			 * disk idling
+			 * because we may not disable the timer when the
+			 * first request of the in-service queue arrives
+			 * during disk idling.
 			 */
 			reason = BFQ_BFQQ_TOO_IDLE;
 		else
@@ -3479,6 +3550,10 @@ static int bfq_init_queue(struct request_queue *q)
 	bfqd->bfq_timeout[BLK_RW_ASYNC] = bfq_timeout_async;
 	bfqd->bfq_timeout[BLK_RW_SYNC] = bfq_timeout_sync;
 
+	bfqd->bfq_coop_thresh = 2;
+	bfqd->bfq_failed_cooperations = 7000;
+	bfqd->bfq_requests_within_timer = 120;
+
 	bfqd->low_latency = true;
 
 	bfqd->bfq_wr_coeff = 20;
@@ -3492,13 +3567,13 @@ static int bfq_init_queue(struct request_queue *q)
 					      * high-definition compressed
 					      * video.
 					      */
-	bfqd->raised_busy_queues = 0;
+	bfqd->wr_busy_queues = 0;
 	bfqd->busy_in_flight_queues = 0;
 	bfqd->const_seeky_busy_in_flight_queues = 0;
 
 	/*
-	 * Begin by assuming, optimistically, that the device peak rate is equal
-	 * to the highest reference rate.
+	 * Begin by assuming, optimistically, that the device peak rate is
+	 * equal to the highest reference rate.
 	 */
 	bfqd->RT_prod = R_fast[blk_queue_nonrot(bfqd->queue)] *
 			T_fast[blk_queue_nonrot(bfqd->queue)];
@@ -3527,7 +3602,8 @@ static ssize_t bfq_var_show(unsigned int var, char *page)
 	return sprintf(page, "%d\n", var);
 }
 
-static ssize_t bfq_var_store(unsigned long *var, const char *page, size_t count)
+static ssize_t bfq_var_store(unsigned long *var, const char *page,
+			     size_t count)
 {
 	unsigned long new_val;
 	int ret = kstrtoul(page, 10, &new_val);
@@ -3565,8 +3641,7 @@ static ssize_t bfq_weights_show(struct elevator_queue *e, char *page)
 			      bfqq->entity.weight,
 			      bfqq->queued[0],
 			      bfqq->queued[1],
-			jiffies_to_msecs(jiffies -
-				bfqq->last_wr_start_finish),
+			jiffies_to_msecs(jiffies - bfqq->last_wr_start_finish),
 			jiffies_to_msecs(bfqq->wr_cur_max_time));
 	}
 
@@ -3602,7 +3677,8 @@ SHOW_FUNCTION(bfq_back_seek_max_show, bfqd->bfq_back_max, 0);
 SHOW_FUNCTION(bfq_back_seek_penalty_show, bfqd->bfq_back_penalty, 0);
 SHOW_FUNCTION(bfq_slice_idle_show, bfqd->bfq_slice_idle, 1);
 SHOW_FUNCTION(bfq_max_budget_show, bfqd->bfq_user_max_budget, 0);
-SHOW_FUNCTION(bfq_max_budget_async_rq_show, bfqd->bfq_max_budget_async_rq, 0);
+SHOW_FUNCTION(bfq_max_budget_async_rq_show,
+	      bfqd->bfq_max_budget_async_rq, 0);
 SHOW_FUNCTION(bfq_timeout_sync_show, bfqd->bfq_timeout[BLK_RW_SYNC], 1);
 SHOW_FUNCTION(bfq_timeout_async_show, bfqd->bfq_timeout[BLK_RW_ASYNC], 1);
 SHOW_FUNCTION(bfq_low_latency_show, bfqd->low_latency, 0);
@@ -3806,14 +3882,14 @@ static int __init bfq_init(void)
 	T_fast[1] = msecs_to_jiffies(2000);
 
 	/*
-	 * Thresholds that determine the switch between speed classes (see the
-	 * comments before the definition of the array).
+	 * Thresholds that determine the switch between speed classes (see
+	 * the comments before the definition of the array).
 	 */
 	device_speed_thresh[0] = (R_fast[0] + R_slow[0]) / 2;
 	device_speed_thresh[1] = (R_fast[1] + R_slow[1]) / 2;
 
 	elv_register(&iosched_bfq);
-	pr_info("BFQ I/O-scheduler version: v7r4");
+	pr_info("BFQ I/O-scheduler version: v7r5");
 
 	return 0;
 }
diff --git a/block/bfq-sched.c b/block/bfq-sched.c
index 0fd077c..546a254 100644
--- a/block/bfq-sched.c
+++ b/block/bfq-sched.c
@@ -97,7 +97,8 @@ static inline void bfq_update_budget(struct bfq_entity *next_in_service)
  * Shift for timestamp calculations.  This actually limits the maximum
  * service allowed in one timestamp delta (small shift values increase it),
  * the maximum total weight that can be used for the queues in the system
- * (big shift values increase it), and the period of virtual time wraparounds.
+ * (big shift values increase it), and the period of virtual time
+ * wraparounds.
  */
 #define WFQ_SERVICE_SHIFT	22
 
@@ -339,7 +340,8 @@ static void bfq_weights_tree_remove(struct bfq_data *bfqd,
 
 
 /**
- * bfq_active_insert - insert an entity in the active tree of its group/device.
+ * bfq_active_insert - insert an entity in the active tree of its
+ *                     group/device.
  * @st: the service tree of the entity.
  * @entity: the entity being inserted.
  *
@@ -395,9 +397,9 @@ static void bfq_active_insert(struct bfq_service_tree *st,
  * bfq_ioprio_to_weight - calc a weight from an ioprio.
  * @ioprio: the ioprio value to convert.
  */
-static unsigned short bfq_ioprio_to_weight(int ioprio)
+static inline unsigned short bfq_ioprio_to_weight(int ioprio)
 {
-	WARN_ON(ioprio < 0 || ioprio >= IOPRIO_BE_NR);
+	BUG_ON(ioprio < 0 || ioprio >= IOPRIO_BE_NR);
 	return IOPRIO_BE_NR - ioprio;
 }
 
@@ -409,9 +411,9 @@ static unsigned short bfq_ioprio_to_weight(int ioprio)
  * 0 is used as an escape ioprio value for weights (numerically) equal or
  * larger than IOPRIO_BE_NR
  */
-static unsigned short bfq_weight_to_ioprio(int weight)
+static inline unsigned short bfq_weight_to_ioprio(int weight)
 {
-	WARN_ON(weight < BFQ_MIN_WEIGHT || weight > BFQ_MAX_WEIGHT);
+	BUG_ON(weight < BFQ_MIN_WEIGHT || weight > BFQ_MAX_WEIGHT);
 	return IOPRIO_BE_NR - weight < 0 ? 0 : IOPRIO_BE_NR - weight;
 }
 
@@ -681,7 +683,8 @@ __bfq_entity_update_weight_prio(struct bfq_service_tree *old_st,
 }
 
 /**
- * bfq_bfqq_served - update the scheduler status after selection for service.
+ * bfq_bfqq_served - update the scheduler status after selection for
+ *                   service.
  * @bfqq: the queue being served.
  * @served: bytes to transfer.
  *
@@ -820,7 +823,7 @@ static void bfq_activate_entity(struct bfq_entity *entity)
  * and if the caller did not specify @requeue, put it on the idle tree.
  *
  * Return %1 if the caller should update the entity hierarchy, i.e.,
- * if the entity was under service or if it was the next_in_service for
+ * if the entity was in service or if it was the next_in_service for
  * its sched_data; return %0 otherwise.
  */
 static int __bfq_deactivate_entity(struct bfq_entity *entity, int requeue)
@@ -876,7 +879,7 @@ static void bfq_deactivate_entity(struct bfq_entity *entity, int requeue)
 			/*
 			 * The parent entity is still backlogged, and
 			 * we don't need to update it as it is still
-			 * under service.
+			 * in service.
 			 */
 			break;
 
@@ -917,7 +920,7 @@ update:
  * active tree of the device is not empty.
  *
  * NOTE: this hierarchical implementation updates vtimes quite often,
- * we may end up with reactivated tasks getting timestamps after a
+ * we may end up with reactivated processes getting timestamps after a
  * vtime skip done because we needed a ->first_active entity on some
  * intermediate node.
  */
@@ -940,8 +943,8 @@ static void bfq_update_vtime(struct bfq_service_tree *st)
  *
  * This function searches the first schedulable entity, starting from the
  * root of the tree and going on the left every time on this side there is
- * a subtree with at least one eligible (start >= vtime) entity.  The path
- * on the right is followed only if a) the left subtree contains no eligible
+ * a subtree with at least one eligible (start >= vtime) entity. The path on
+ * the right is followed only if a) the left subtree contains no eligible
  * entities and b) no eligible entity has been found yet.
  */
 static struct bfq_entity *bfq_first_active_entity(struct bfq_service_tree *st)
@@ -1134,14 +1137,14 @@ static void bfq_del_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq,
 			BUG_ON(!bfqd->busy_in_flight_queues);
 			bfqd->busy_in_flight_queues--;
 			if (bfq_bfqq_constantly_seeky(bfqq)) {
-				BUG_ON(
-				   !bfqd->const_seeky_busy_in_flight_queues);
+				BUG_ON(!bfqd->
+					const_seeky_busy_in_flight_queues);
 				bfqd->const_seeky_busy_in_flight_queues--;
 			}
 		}
 	}
 	if (bfqq->wr_coeff > 1)
-		bfqd->raised_busy_queues--;
+		bfqd->wr_busy_queues--;
 
 	bfq_deactivate_bfqq(bfqd, bfqq, requeue);
 }
@@ -1172,5 +1175,5 @@ static void bfq_add_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq)
 		}
 	}
 	if (bfqq->wr_coeff > 1)
-		bfqd->raised_busy_queues++;
+		bfqd->wr_busy_queues++;
 }
diff --git a/block/bfq.h b/block/bfq.h
index d9a6ab4..03c3f0c 100644
--- a/block/bfq.h
+++ b/block/bfq.h
@@ -1,5 +1,5 @@
 /*
- * BFQ-v7r4 for 3.10.0: data structures and common functions prototypes.
+ * BFQ-v7r5 for 3.10.0: data structures and common functions prototypes.
  *
  * Based on ideas and code from CFQ:
  * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
@@ -57,7 +57,7 @@ struct bfq_service_tree {
 
 /**
  * struct bfq_sched_data - multi-class scheduler.
- * @in_service_entity: entity under service.
+ * @in_service_entity: entity in service.
  * @next_in_service: head-of-the-line entity in the scheduler.
  * @service_tree: array of service trees, one per ioprio_class.
  *
@@ -97,7 +97,7 @@ struct bfq_weight_counter {
 /**
  * struct bfq_entity - schedulable entity.
  * @rb_node: service_tree member.
- * @weights_counter: pointer to the weight counter associated with this entity.
+ * @weight_counter: pointer to the weight counter associated with this entity.
  * @on_st: flag, true if the entity is on a tree (either the active or
  *         the idle one of its service_tree).
  * @finish: B-WF2Q+ finish timestamp (aka F_i).
@@ -194,36 +194,42 @@ struct bfq_group;
  * @max_budget: maximum budget allowed from the feedback mechanism.
  * @budget_timeout: budget expiration (in jiffies).
  * @dispatched: number of requests on the dispatch list or inside driver.
- * @org_ioprio: saved ioprio during boosted periods.
  * @flags: status flags.
  * @bfqq_list: node for active/idle bfqq list inside our bfqd.
  * @seek_samples: number of seeks sampled
  * @seek_total: sum of the distances of the seeks sampled
  * @seek_mean: mean seek distance
  * @last_request_pos: position of the last request enqueued
+ * @requests_within_timer: number of consecutive pairs of request completion
+ *                         and arrival, such that the queue becomes idle
+ *                         after the completion, but the next request arrives
+ *                         within an idle time slice; used only if the queue's
+ *                         IO_bound has been cleared.
  * @pid: pid of the process owning the queue, used for logging purposes.
  * @last_wr_start_finish: start time of the current weight-raising period if
  *                        the @bfq-queue is being weight-raised, otherwise
  *                        finish time of the last weight-raising period
  * @wr_cur_max_time: current max raising time for this queue
- * @soft_rt_next_start: minimum time instant such that, only if a new request
- *                      is enqueued after this time instant in an idle
- *                      @bfq_queue with no outstanding requests, then the
- *                      task associated with the queue it is deemed as soft
- *                      real-time (see the comments to the function
+ * @soft_rt_next_start: minimum time instant such that, only if a new
+ *                      request is enqueued after this time instant in an
+ *                      idle @bfq_queue with no outstanding requests, then
+ *                      the task associated with the queue it is deemed as
+ *                      soft real-time (see the comments to the function
  *                      bfq_bfqq_softrt_next_start())
  * @last_idle_bklogged: time of the last transition of the @bfq_queue from
  *                      idle to backlogged
  * @service_from_backlogged: cumulative service received from the @bfq_queue
- *                           since the last transition from idle to backlogged
+ *                           since the last transition from idle to
+ *                           backlogged
  * @bic: pointer to the bfq_io_cq owning the bfq_queue, set to %NULL if the
  *	 queue is shared
  *
- * A bfq_queue is a leaf request queue; it can be associated with an io_context
- * or more, if it is async or shared between cooperating processes. @cgroup
- * holds a reference to the cgroup, to be sure that it does not disappear while
- * a bfqq still references it (mostly to avoid races between request issuing and
- * task migration followed by cgroup destruction).
+ * A bfq_queue is a leaf request queue; it can be associated with an
+ * io_context or more, if it  is  async or shared  between  cooperating
+ * processes. @cgroup holds a reference to the cgroup, to be sure that it
+ * does not disappear while a bfqq still references it (mostly to avoid
+ * races between request issuing and task migration followed by cgroup
+ * destruction).
  * All the fields are protected by the queue lock of the containing bfqd.
  */
 struct bfq_queue {
@@ -249,8 +255,6 @@ struct bfq_queue {
 
 	int dispatched;
 
-	unsigned short org_ioprio;
-
 	unsigned int flags;
 
 	struct list_head bfqq_list;
@@ -260,6 +264,8 @@ struct bfq_queue {
 	sector_t seek_mean;
 	sector_t last_request_pos;
 
+	unsigned int requests_within_timer;
+
 	pid_t pid;
 	struct bfq_io_cq *bic;
 
@@ -298,7 +304,14 @@ struct bfq_ttime {
  *		  shared queue, and is used to set @raising_cur_max_time
  *		  when the queue is split from the shared queue and its
  *		  weight is raised again
- * @saved_idle_window: same purpose as the previous field for the idle window
+ * @saved_idle_window: same purpose as the previous field for the idle
+ *                     window
+ * @saved_IO_bound: same purpose as the previous two fields for the I/O
+ *                  bound classification of a queue
+ * @cooperations: counter of consecutive successful queue merges underwent
+ *                by any of the process' @bfq_queues
+ * @failed_cooperations: counter of consecutive failed queue merges of any
+ *                       of the process' @bfq_queues
  */
 struct bfq_io_cq {
 	struct io_cq icq; /* must be the first member */
@@ -308,6 +321,10 @@ struct bfq_io_cq {
 
 	unsigned int wr_time_left;
 	unsigned int saved_idle_window;
+	unsigned int saved_IO_bound;
+
+	unsigned int cooperations;
+	unsigned int failed_cooperations;
 };
 
 enum bfq_device_speed {
@@ -319,41 +336,42 @@ enum bfq_device_speed {
  * struct bfq_data - per device data structure.
  * @queue: request queue for the managed device.
  * @root_group: root bfq_group for the device.
+ * @rq_pos_tree: rbtree sorted by next_request position, used when
+ *               determining if two or more queues have interleaving
+ *               requests (see bfq_close_cooperator()).
  * @active_numerous_groups: number of bfq_groups containing more than one
  *                          active @bfq_entity.
- * @rq_pos_tree: rbtree sorted by next_request position,
- *		used when determining if two or more queues
- *		have interleaving requests (see bfq_close_cooperator).
  * @queue_weights_tree: rbtree of weight counters of @bfq_queues, sorted by
  *                      weight. Used to keep track of whether all @bfq_queues
- *			have the same weight. The tree contains one counter
- *			for each distinct weight associated to some active
- *			and not weight-raised @bfq_queue (see the comments to
+ *                     have the same weight. The tree contains one counter
+ *                     for each distinct weight associated to some active
+ *                     and not weight-raised @bfq_queue (see the comments to
  *                      the functions bfq_weights_tree_[add|remove] for
- *			further details).
+ *                     further details).
  * @group_weights_tree: rbtree of non-queue @bfq_entity weight counters, sorted
  *                      by weight. Used to keep track of whether all
- *			@bfq_groups have the same weight. The tree contains
- *			one counter for each distinct weight associated to
- *			some active @bfq_group (see the comments to the
- *			functions bfq_weights_tree_[add|remove] for further
- *			details).
+ *                     @bfq_groups have the same weight. The tree contains
+ *                     one counter for each distinct weight associated to
+ *                     some active @bfq_group (see the comments to the
+ *                     functions bfq_weights_tree_[add|remove] for further
+ *                     details).
  * @busy_queues: number of bfq_queues containing requests (including the
- *		 queue under service, even if it is idling).
+ *		 queue in service, even if it is idling).
  * @busy_in_flight_queues: number of @bfq_queues containing pending or
- *                         in-flight requests, plus the @bfq_queue in service,
- *                         even if idle but waiting for the possible arrival
- *                         of its next sync request. This field is updated only
- *                         if the device is rotational, but used only if the
- *                         device is also NCQ-capable. The reason why the field
- *                         is updated also for non-NCQ-capable rotational
- *                         devices is related to the fact that the value of
- *                         hw_tag may be set also later than when this field may
- *                         need to be incremented for the first time(s).
- *                         Taking also this possibility into account, to avoid
- *                         unbalanced increments/decrements, would imply more
- *                         overhead than just updating this field regardless of
- *                         the value of hw_tag.
+ *                         in-flight requests, plus the @bfq_queue in
+ *                         service, even if idle but waiting for the
+ *                         possible arrival of its next sync request. This
+ *                         field is updated only if the device is rotational,
+ *                         but used only if the device is also NCQ-capable.
+ *                         The reason why the field is updated also for non-
+ *                         NCQ-capable rotational devices is related to the
+ *                         fact that the value of @hw_tag may be set also
+ *                         later than when busy_in_flight_queues may need to
+ *                         be incremented for the first time(s). Taking also
+ *                         this possibility into account, to avoid unbalanced
+ *                         increments/decrements, would imply more overhead
+ *                         than just updating busy_in_flight_queues
+ *                         regardless of the value of @hw_tag.
  * @const_seeky_busy_in_flight_queues: number of constantly-seeky @bfq_queues
  *                                     (that is, seeky queues that expired
  *                                     for budget timeout at least once)
@@ -364,26 +382,27 @@ enum bfq_device_speed {
  *                                     is rotational, but used only if the
  *                                     device is also NCQ-capable (see the
  *                                     comments to @busy_in_flight_queues).
- * @raised_busy_queues: number of weight-raised busy bfq_queues.
+ * @wr_busy_queues: number of weight-raised busy @bfq_queues.
  * @queued: number of queued requests.
  * @rq_in_driver: number of requests dispatched and waiting for completion.
  * @sync_flight: number of sync requests in the driver.
- * @max_rq_in_driver: max number of reqs in driver in the last @hw_tag_samples
- *		      completed requests.
+ * @max_rq_in_driver: max number of reqs in driver in the last
+ *                    @hw_tag_samples completed requests.
  * @hw_tag_samples: nr of samples used to calculate hw_tag.
  * @hw_tag: flag set to one if the driver is showing a queueing behavior.
  * @budgets_assigned: number of budgets assigned.
  * @idle_slice_timer: timer set when idling for the next sequential request
- *                    from the queue under service.
+ *                    from the queue in service.
  * @unplug_work: delayed work to restart dispatching on the request queue.
- * @in_service_queue: bfq_queue under service.
+ * @in_service_queue: bfq_queue in service.
  * @in_service_bic: bfq_io_cq (bic) associated with the @in_service_queue.
  * @last_position: on-disk position of the last served request.
  * @last_budget_start: beginning of the last budget.
  * @last_idling_start: beginning of the last idle slice.
  * @peak_rate: peak transfer rate observed for a budget.
  * @peak_rate_samples: number of samples used to calculate @peak_rate.
- * @bfq_max_budget: maximum budget allotted to a bfq_queue before rescheduling.
+ * @bfq_max_budget: maximum budget allotted to a bfq_queue before
+ *                  rescheduling.
  * @group_list: list of all the bfq_groups active on the device.
  * @active_list: list of all the bfq_queues active on the device.
  * @idle_list: list of all the bfq_queues idle on the device.
@@ -393,7 +412,8 @@ enum bfq_device_speed {
  * @bfq_back_penalty: weight of backward seeks wrt forward ones.
  * @bfq_back_max: maximum allowed backward seek.
  * @bfq_slice_idle: maximum idling time.
- * @bfq_user_max_budget: user-configured max budget value (0 for auto-tuning).
+ * @bfq_user_max_budget: user-configured max budget value
+ *                       (0 for auto-tuning).
  * @bfq_max_budget_async_rq: maximum budget (in nr of requests) allotted to
  *                           async queues.
  * @bfq_timeout: timeout for bfq_queues to consume their budget; used to
@@ -403,6 +423,17 @@ enum bfq_device_speed {
  *               they are charged for the whole allocated budget, to try
  *               to preserve a behavior reasonably fair among them, but
  *               without service-domain guarantees).
+ * @bfq_coop_thresh: number of queue merges after which a @bfq_queue is
+ *                   no more granted any weight-raising.
+ * @bfq_failed_cooperations: number of consecutive failed cooperation
+ *                           chances after which weight-raising is restored
+ *                           to a queue subject to more than bfq_coop_thresh
+ *                           queue merges.
+ * @bfq_requests_within_timer: number of consecutive requests that must be
+ *                             issued within the idle time slice to set
+ *                             again idling to a queue which was marked as
+ *                             non-I/O-bound (see the definition of the
+ *                             IO_bound flag for further details).
  * @bfq_wr_coeff: Maximum factor by which the weight of a weight-raised
  *                queue is multiplied
  * @bfq_wr_max_time: maximum duration of a weight-raising period (jiffies)
@@ -417,7 +448,7 @@ enum bfq_device_speed {
  *			    sectors per seconds
  * @RT_prod: cached value of the product R*T used for computing the maximum
  *	     duration of the weight raising automatically
- * @device_speed: device speed class for the low-latency heuristic
+ * @device_speed: device-speed class for the low-latency heuristic
  * @oom_bfqq: fallback dummy bfqq for extreme OOM conditions
  *
  * All the fields are protected by the @queue lock.
@@ -426,18 +457,19 @@ struct bfq_data {
 	struct request_queue *queue;
 
 	struct bfq_group *root_group;
+	struct rb_root rq_pos_tree;
+
 #ifdef CONFIG_CGROUP_BFQIO
 	int active_numerous_groups;
 #endif
 
-	struct rb_root rq_pos_tree;
 	struct rb_root queue_weights_tree;
 	struct rb_root group_weights_tree;
 
 	int busy_queues;
 	int busy_in_flight_queues;
 	int const_seeky_busy_in_flight_queues;
-	int raised_busy_queues;
+	int wr_busy_queues;
 	int queued;
 	int rq_in_driver;
 	int sync_flight;
@@ -477,6 +509,10 @@ struct bfq_data {
 	unsigned int bfq_max_budget_async_rq;
 	unsigned int bfq_timeout[2];
 
+	unsigned int bfq_coop_thresh;
+	unsigned int bfq_failed_cooperations;
+	unsigned int bfq_requests_within_timer;
+
 	bool low_latency;
 
 	/* parameters of the low_latency heuristics */
@@ -493,7 +529,7 @@ struct bfq_data {
 };
 
 enum bfqq_state_flags {
-	BFQ_BFQQ_FLAG_busy = 0,		/* has requests or is under service */
+	BFQ_BFQQ_FLAG_busy = 0,		/* has requests or is in service */
 	BFQ_BFQQ_FLAG_wait_request,	/* waiting for a request */
 	BFQ_BFQQ_FLAG_must_alloc,	/* must be allowed rq alloc */
 	BFQ_BFQQ_FLAG_fifo_expire,	/* FIFO checked in this slice */
@@ -501,14 +537,22 @@ enum bfqq_state_flags {
 	BFQ_BFQQ_FLAG_prio_changed,	/* task priority has changed */
 	BFQ_BFQQ_FLAG_sync,		/* synchronous queue */
 	BFQ_BFQQ_FLAG_budget_new,	/* no completion with this budget */
+	BFQ_BFQQ_FLAG_IO_bound,		/*
+					 * bfqq has timed-out at least once
+					 * having consumed at most 2/10 of
+					 * its budget
+					 */
 	BFQ_BFQQ_FLAG_constantly_seeky,	/*
-					 * bfqq has proved to be slow and seeky
-					 * until budget timeout
+					 * bfqq has proved to be slow and
+					 * seeky until budget timeout
+					 */
+	BFQ_BFQQ_FLAG_softrt_update,	/*
+					 * may need softrt-next-start
+					 * update
 					 */
 	BFQ_BFQQ_FLAG_coop,		/* bfqq is shared */
 	BFQ_BFQQ_FLAG_split_coop,	/* shared bfqq will be split */
 	BFQ_BFQQ_FLAG_just_split,	/* queue has just been split */
-	BFQ_BFQQ_FLAG_softrt_update,	/* may need softrt-next-start update */
 };
 
 #define BFQ_BFQQ_FNS(name)						\
@@ -533,6 +577,7 @@ BFQ_BFQQ_FNS(idle_window);
 BFQ_BFQQ_FNS(prio_changed);
 BFQ_BFQQ_FNS(sync);
 BFQ_BFQQ_FNS(budget_new);
+BFQ_BFQQ_FNS(IO_bound);
 BFQ_BFQQ_FNS(constantly_seeky);
 BFQ_BFQQ_FNS(coop);
 BFQ_BFQQ_FNS(split_coop);
@@ -549,7 +594,10 @@ BFQ_BFQQ_FNS(softrt_update);
 
 /* Expiration reasons. */
 enum bfqq_expiration {
-	BFQ_BFQQ_TOO_IDLE = 0,		/* queue has been idling for too long */
+	BFQ_BFQQ_TOO_IDLE = 0,		/*
+					 * queue has been idling for
+					 * too long
+					 */
 	BFQ_BFQQ_BUDGET_TIMEOUT,	/* budget took too long to be used */
 	BFQ_BFQQ_BUDGET_EXHAUSTED,	/* budget consumed */
 	BFQ_BFQQ_NO_MORE_REQUESTS,	/* the queue has no more requests */
@@ -571,11 +619,13 @@ enum bfqq_expiration {
  *              except for the idle class that has only one queue.
  * @async_idle_bfqq: async queue for the idle class (ioprio is ignored).
  * @my_entity: pointer to @entity, %NULL for the toplevel group; used
- *             to avoid too many special cases during group creation/migration.
- * @active_entities: number of active entities belonging to the group; unused
- *                   for the root group. Used to know whether there are groups
- *                   with more than one active @bfq_entity (see the comments
- *                   to the function bfq_bfqq_must_not_expire()).
+ *             to avoid too many special cases during group creation/
+ *             migration.
+ * @active_entities: number of active entities belonging to the group;
+ *                   unused for the root group. Used to know whether there
+ *                   are groups with more than one active @bfq_entity
+ *                   (see the comments to the function
+ *                   bfq_bfqq_must_not_expire()).
  *
  * Each (device, cgroup) pair has its own bfq_group, i.e., for each cgroup
  * there is a set of bfq_groups, each one collecting the lower-level
@@ -714,4 +764,5 @@ static void bfq_end_wr_async_queues(struct bfq_data *bfqd,
 				    struct bfq_group *bfqg);
 static void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg);
 static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq);
-#endif
+
+#endif /* _BFQ_H */
-- 
1.9.3