From b16c765122f987056e1dc9ef6c214571bb5bd694 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Mon, 1 Oct 2018 15:47:53 +0100
Subject: drm/i915: Priority boost for new clients

Taken from an idea used for FQ_CODEL, we give the first request of a
new request flows a small priority boost. These flows are likely to
correspond with short, interactive tasks and so be more latency sensitive
than the longer free running queues. As soon as the client has more than
one request in the queue, further requests are not boosted and it settles
down into ordinary steady state behaviour.  Such small kicks dramatically
help combat the starvation issue, by allowing each client the opportunity
to run even when the system is under heavy throughput load (within the
constraints of the user selected priority).

v2: Mark the preempted request as the start of a new flow, to prevent a
single client being continually gazumped by its peers.

Testcase: igt/benchmarks/rrul
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181001144755.7978-1-chris@chris-wilson.co.uk
---
 drivers/gpu/drm/i915/i915_request.c | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

(limited to 'drivers/gpu/drm/i915/i915_request.c')

diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index a492385b2089..56140ca054e8 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1127,8 +1127,20 @@ void i915_request_add(struct i915_request *request)
 	 */
 	local_bh_disable();
 	rcu_read_lock(); /* RCU serialisation for set-wedged protection */
-	if (engine->schedule)
-		engine->schedule(request, &request->gem_context->sched);
+	if (engine->schedule) {
+		struct i915_sched_attr attr = request->gem_context->sched;
+
+		/*
+		 * Boost priorities to new clients (new request flows).
+		 *
+		 * Allow interactive/synchronous clients to jump ahead of
+		 * the bulk clients. (FQ_CODEL)
+		 */
+		if (!prev || i915_request_completed(prev))
+			attr.priority |= I915_PRIORITY_NEWCLIENT;
+
+		engine->schedule(request, &attr);
+	}
 	rcu_read_unlock();
 	i915_sw_fence_commit(&request->submit);
 	local_bh_enable(); /* Kick the execlists tasklet if just scheduled */
-- 
cgit 


From e2f3496e93be3238de2e2e6bfc83b3a83c084ce5 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Mon, 1 Oct 2018 15:47:54 +0100
Subject: drm/i915: Pull scheduling under standalone lock

Currently, the backend scheduling code abuses struct_mutex into order to
have a global lock to manipulate a temporary list (without widespread
allocation) and to protect against list modifications. This is an
extraneous coupling to struct_mutex and further can not extend beyond
the local device.

Pull all the code that needs to be under the one true lock into
i915_scheduler.c, and make it so.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181001144755.7978-2-chris@chris-wilson.co.uk
---
 drivers/gpu/drm/i915/i915_request.c | 85 -------------------------------------
 1 file changed, 85 deletions(-)

(limited to 'drivers/gpu/drm/i915/i915_request.c')

diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 56140ca054e8..d73ad490a261 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -111,91 +111,6 @@ i915_request_remove_from_client(struct i915_request *request)
 	spin_unlock(&file_priv->mm.lock);
 }
 
-static struct i915_dependency *
-i915_dependency_alloc(struct drm_i915_private *i915)
-{
-	return kmem_cache_alloc(i915->dependencies, GFP_KERNEL);
-}
-
-static void
-i915_dependency_free(struct drm_i915_private *i915,
-		     struct i915_dependency *dep)
-{
-	kmem_cache_free(i915->dependencies, dep);
-}
-
-static void
-__i915_sched_node_add_dependency(struct i915_sched_node *node,
-				 struct i915_sched_node *signal,
-				 struct i915_dependency *dep,
-				 unsigned long flags)
-{
-	INIT_LIST_HEAD(&dep->dfs_link);
-	list_add(&dep->wait_link, &signal->waiters_list);
-	list_add(&dep->signal_link, &node->signalers_list);
-	dep->signaler = signal;
-	dep->flags = flags;
-}
-
-static int
-i915_sched_node_add_dependency(struct drm_i915_private *i915,
-			       struct i915_sched_node *node,
-			       struct i915_sched_node *signal)
-{
-	struct i915_dependency *dep;
-
-	dep = i915_dependency_alloc(i915);
-	if (!dep)
-		return -ENOMEM;
-
-	__i915_sched_node_add_dependency(node, signal, dep,
-					 I915_DEPENDENCY_ALLOC);
-	return 0;
-}
-
-static void
-i915_sched_node_fini(struct drm_i915_private *i915,
-		     struct i915_sched_node *node)
-{
-	struct i915_dependency *dep, *tmp;
-
-	GEM_BUG_ON(!list_empty(&node->link));
-
-	/*
-	 * Everyone we depended upon (the fences we wait to be signaled)
-	 * should retire before us and remove themselves from our list.
-	 * However, retirement is run independently on each timeline and
-	 * so we may be called out-of-order.
-	 */
-	list_for_each_entry_safe(dep, tmp, &node->signalers_list, signal_link) {
-		GEM_BUG_ON(!i915_sched_node_signaled(dep->signaler));
-		GEM_BUG_ON(!list_empty(&dep->dfs_link));
-
-		list_del(&dep->wait_link);
-		if (dep->flags & I915_DEPENDENCY_ALLOC)
-			i915_dependency_free(i915, dep);
-	}
-
-	/* Remove ourselves from everyone who depends upon us */
-	list_for_each_entry_safe(dep, tmp, &node->waiters_list, wait_link) {
-		GEM_BUG_ON(dep->signaler != node);
-		GEM_BUG_ON(!list_empty(&dep->dfs_link));
-
-		list_del(&dep->signal_link);
-		if (dep->flags & I915_DEPENDENCY_ALLOC)
-			i915_dependency_free(i915, dep);
-	}
-}
-
-static void
-i915_sched_node_init(struct i915_sched_node *node)
-{
-	INIT_LIST_HEAD(&node->signalers_list);
-	INIT_LIST_HEAD(&node->waiters_list);
-	INIT_LIST_HEAD(&node->link);
-	node->attr.priority = I915_PRIORITY_INVALID;
-}
-
 static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno)
 {
 	struct intel_engine_cs *engine;
-- 
cgit 


From e9eaf82d97a2b05460ff5ef6a3e07446f7d049fe Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Mon, 1 Oct 2018 15:47:55 +0100
Subject: drm/i915: Priority boost for waiting clients

Latency is in the eye of the beholder. In the case where a client stops
and waits for the gpu, give that request chain a small priority boost
(not so that it overtakes higher priority clients, to preserve the
external ordering) so that ideally the wait completes earlier.

v2: Tvrtko recommends to keep the boost-from-user-stall as small as
possible and to allow new client flows to be preferred for interactivity
over stalls.

Testcase: igt/gem_sync/switch-default
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181001144755.7978-3-chris@chris-wilson.co.uk
---
 drivers/gpu/drm/i915/i915_request.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'drivers/gpu/drm/i915/i915_request.c')

diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index d73ad490a261..abd4dacbab8e 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1237,6 +1237,8 @@ long i915_request_wait(struct i915_request *rq,
 		add_wait_queue(errq, &reset);
 
 	intel_wait_init(&wait);
+	if (flags & I915_WAIT_PRIORITY)
+		i915_schedule_bump_priority(rq, I915_PRIORITY_WAIT);
 
 restart:
 	do {
-- 
cgit 


From 33373258cf021869380140078bb3f9732b7d852d Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Fri, 5 Oct 2018 09:03:00 +0100
Subject: drm/i915: Remove the global cache shrink & rcu barrier on allocation
 failure

Earlier, we reasoned that having idled the gpu under mempressure, that
would be a good time to trim our request slabs in order to perform the
next request allocation. We have stopped performing the global operation
on the device (no idling) and wish to make the allocation failure
handling more local, so out with the global barrier that may take a long
time.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181005080300.9908-2-chris@chris-wilson.co.uk
---
 drivers/gpu/drm/i915/i915_request.c | 11 -----------
 1 file changed, 11 deletions(-)

(limited to 'drivers/gpu/drm/i915/i915_request.c')

diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index abd4dacbab8e..28819f8c4da6 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -655,17 +655,6 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 		if (rq)
 			cond_synchronize_rcu(rq->rcustate);
 
-		/*
-		 * We've forced the client to stall and catch up with whatever
-		 * backlog there might have been. As we are assuming that we
-		 * caused the mempressure, now is an opportune time to
-		 * recover as much memory from the request pool as is possible.
-		 * Having already penalized the client to stall, we spend
-		 * a little extra time to re-optimise page allocation.
-		 */
-		kmem_cache_shrink(i915->requests);
-		rcu_barrier(); /* Recover the TYPESAFE_BY_RCU pages */
-
 		rq = kmem_cache_alloc(i915->requests, GFP_KERNEL);
 		if (!rq) {
 			ret = -ENOMEM;
-- 
cgit 


From 1e016a8693368c163b18104a28c4edc71858f2d2 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Wed, 24 Oct 2018 11:49:39 +0100
Subject: drm/i915: Park signaling thread while wrapping the seqno

A danger encountered when resetting the seqno (using
debugfs/i915_next_seqno) is that as we change the breadcrumb stored in
the HWSP, it may be inspected by the signaler thread leading to
confusion in our sanity checks.

<0> [136.331342] i915/sig-347     3..s1 136336154us : execlists_submission_tasklet: rcs0 awake?=1, active=5
<0> [136.331373] i915/sig-347     3d.s2 136336155us : process_csb: rcs0 cs-irq head=5, tail=0
<0> [136.331402] i915/sig-347     3d.s2 136336155us : process_csb: rcs0 csb[0]: status=0x00000018:0x00000002, active=0x5
<0> [136.331434] i915/sig-347     3d.s2 136336156us : process_csb: rcs0 out[0]: ctx=2.1, global=219 (fence 46:8455) (current 219), prio=0
<0> [136.331466] i915/sig-347     3d.s2 136336156us : process_csb: rcs0 completed ctx=2
<0> [136.332027] gem_exec-1049    0.... 136336246us : reset_all_global_seqno.part.5: rcs0 seqno 219 (current 219) -> -43
<0> [136.332056] gem_exec-1049    0.... 136336251us : reset_all_global_seqno.part.5: bcs0 seqno 183 (current 183) -> -43
<0> [136.332085] gem_exec-1049    0.... 136336255us : reset_all_global_seqno.part.5: vcs0 seqno 191 (current 191) -> -43
<0> [136.332114] gem_exec-1049    0.... 136336259us : reset_all_global_seqno.part.5: vcs1 seqno 180 (current 180) -> -43
<0> [136.332143] gem_exec-1049    0.... 136336262us : reset_all_global_seqno.part.5: vecs0 seqno 212 (current 212) -> -43
<0> [136.332174] i915/sig-347     3.... 136336280us : intel_breadcrumbs_signaler: intel_breadcrumbs_signaler:673 GEM_BUG_ON(!i915_request_completed(rq))

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181024104939.2861-1-chris@chris-wilson.co.uk
---
 drivers/gpu/drm/i915/i915_request.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'drivers/gpu/drm/i915/i915_request.c')

diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 28819f8c4da6..71107540581d 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -136,6 +136,8 @@ static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno)
 			  intel_engine_get_seqno(engine),
 			  seqno);
 
+		kthread_park(engine->breadcrumbs.signaler);
+
 		if (!i915_seqno_passed(seqno, engine->timeline.seqno)) {
 			/* Flush any waiters before we reuse the seqno */
 			intel_engine_disarm_breadcrumbs(engine);
@@ -150,6 +152,8 @@ static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno)
 		/* Finally reset hw state */
 		intel_engine_init_global_seqno(engine, seqno);
 		engine->timeline.seqno = seqno;
+
+		kthread_unpark(engine->breadcrumbs.signaler);
 	}
 
 	list_for_each_entry(timeline, &i915->gt.timelines, link)
-- 
cgit