From b16c765122f987056e1dc9ef6c214571bb5bd694 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 1 Oct 2018 15:47:53 +0100 Subject: drm/i915: Priority boost for new clients Taken from an idea used for FQ_CODEL, we give the first request of a new request flows a small priority boost. These flows are likely to correspond with short, interactive tasks and so be more latency sensitive than the longer free running queues. As soon as the client has more than one request in the queue, further requests are not boosted and it settles down into ordinary steady state behaviour. Such small kicks dramatically help combat the starvation issue, by allowing each client the opportunity to run even when the system is under heavy throughput load (within the constraints of the user selected priority). v2: Mark the preempted request as the start of a new flow, to prevent a single client being continually gazumped by its peers. Testcase: igt/benchmarks/rrul Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Joonas Lahtinen Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20181001144755.7978-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_request.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index a492385b2089..56140ca054e8 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1127,8 +1127,20 @@ void i915_request_add(struct i915_request *request) */ local_bh_disable(); rcu_read_lock(); /* RCU serialisation for set-wedged protection */ - if (engine->schedule) - engine->schedule(request, &request->gem_context->sched); + if (engine->schedule) { + struct i915_sched_attr attr = request->gem_context->sched; + + /* + * Boost priorities to new clients (new request flows). + * + * Allow interactive/synchronous clients to jump ahead of + * the bulk clients. (FQ_CODEL) + */ + if (!prev || i915_request_completed(prev)) + attr.priority |= I915_PRIORITY_NEWCLIENT; + + engine->schedule(request, &attr); + } rcu_read_unlock(); i915_sw_fence_commit(&request->submit); local_bh_enable(); /* Kick the execlists tasklet if just scheduled */ -- cgit From e2f3496e93be3238de2e2e6bfc83b3a83c084ce5 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 1 Oct 2018 15:47:54 +0100 Subject: drm/i915: Pull scheduling under standalone lock Currently, the backend scheduling code abuses struct_mutex into order to have a global lock to manipulate a temporary list (without widespread allocation) and to protect against list modifications. This is an extraneous coupling to struct_mutex and further can not extend beyond the local device. Pull all the code that needs to be under the one true lock into i915_scheduler.c, and make it so. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20181001144755.7978-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_request.c | 85 ------------------------------------- 1 file changed, 85 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 56140ca054e8..d73ad490a261 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -111,91 +111,6 @@ i915_request_remove_from_client(struct i915_request *request) spin_unlock(&file_priv->mm.lock); } -static struct i915_dependency * -i915_dependency_alloc(struct drm_i915_private *i915) -{ - return kmem_cache_alloc(i915->dependencies, GFP_KERNEL); -} - -static void -i915_dependency_free(struct drm_i915_private *i915, - struct i915_dependency *dep) -{ - kmem_cache_free(i915->dependencies, dep); -} - -static void -__i915_sched_node_add_dependency(struct i915_sched_node *node, - struct i915_sched_node *signal, - struct i915_dependency *dep, - unsigned long flags) -{ - INIT_LIST_HEAD(&dep->dfs_link); - list_add(&dep->wait_link, &signal->waiters_list); - list_add(&dep->signal_link, &node->signalers_list); - dep->signaler = signal; - dep->flags = flags; -} - -static int -i915_sched_node_add_dependency(struct drm_i915_private *i915, - struct i915_sched_node *node, - struct i915_sched_node *signal) -{ - struct i915_dependency *dep; - - dep = i915_dependency_alloc(i915); - if (!dep) - return -ENOMEM; - - __i915_sched_node_add_dependency(node, signal, dep, - I915_DEPENDENCY_ALLOC); - return 0; -} - -static void -i915_sched_node_fini(struct drm_i915_private *i915, - struct i915_sched_node *node) -{ - struct i915_dependency *dep, *tmp; - - GEM_BUG_ON(!list_empty(&node->link)); - - /* - * Everyone we depended upon (the fences we wait to be signaled) - * should retire before us and remove themselves from our list. - * However, retirement is run independently on each timeline and - * so we may be called out-of-order. - */ - list_for_each_entry_safe(dep, tmp, &node->signalers_list, signal_link) { - GEM_BUG_ON(!i915_sched_node_signaled(dep->signaler)); - GEM_BUG_ON(!list_empty(&dep->dfs_link)); - - list_del(&dep->wait_link); - if (dep->flags & I915_DEPENDENCY_ALLOC) - i915_dependency_free(i915, dep); - } - - /* Remove ourselves from everyone who depends upon us */ - list_for_each_entry_safe(dep, tmp, &node->waiters_list, wait_link) { - GEM_BUG_ON(dep->signaler != node); - GEM_BUG_ON(!list_empty(&dep->dfs_link)); - - list_del(&dep->signal_link); - if (dep->flags & I915_DEPENDENCY_ALLOC) - i915_dependency_free(i915, dep); - } -} - -static void -i915_sched_node_init(struct i915_sched_node *node) -{ - INIT_LIST_HEAD(&node->signalers_list); - INIT_LIST_HEAD(&node->waiters_list); - INIT_LIST_HEAD(&node->link); - node->attr.priority = I915_PRIORITY_INVALID; -} - static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno) { struct intel_engine_cs *engine; -- cgit From e9eaf82d97a2b05460ff5ef6a3e07446f7d049fe Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 1 Oct 2018 15:47:55 +0100 Subject: drm/i915: Priority boost for waiting clients Latency is in the eye of the beholder. In the case where a client stops and waits for the gpu, give that request chain a small priority boost (not so that it overtakes higher priority clients, to preserve the external ordering) so that ideally the wait completes earlier. v2: Tvrtko recommends to keep the boost-from-user-stall as small as possible and to allow new client flows to be preferred for interactivity over stalls. Testcase: igt/gem_sync/switch-default Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Joonas Lahtinen Cc: Dmitry Rogozhkin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20181001144755.7978-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_request.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index d73ad490a261..abd4dacbab8e 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1237,6 +1237,8 @@ long i915_request_wait(struct i915_request *rq, add_wait_queue(errq, &reset); intel_wait_init(&wait); + if (flags & I915_WAIT_PRIORITY) + i915_schedule_bump_priority(rq, I915_PRIORITY_WAIT); restart: do { -- cgit From 33373258cf021869380140078bb3f9732b7d852d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 5 Oct 2018 09:03:00 +0100 Subject: drm/i915: Remove the global cache shrink & rcu barrier on allocation failure Earlier, we reasoned that having idled the gpu under mempressure, that would be a good time to trim our request slabs in order to perform the next request allocation. We have stopped performing the global operation on the device (no idling) and wish to make the allocation failure handling more local, so out with the global barrier that may take a long time. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20181005080300.9908-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_request.c | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index abd4dacbab8e..28819f8c4da6 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -655,17 +655,6 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) if (rq) cond_synchronize_rcu(rq->rcustate); - /* - * We've forced the client to stall and catch up with whatever - * backlog there might have been. As we are assuming that we - * caused the mempressure, now is an opportune time to - * recover as much memory from the request pool as is possible. - * Having already penalized the client to stall, we spend - * a little extra time to re-optimise page allocation. - */ - kmem_cache_shrink(i915->requests); - rcu_barrier(); /* Recover the TYPESAFE_BY_RCU pages */ - rq = kmem_cache_alloc(i915->requests, GFP_KERNEL); if (!rq) { ret = -ENOMEM; -- cgit From 1e016a8693368c163b18104a28c4edc71858f2d2 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 24 Oct 2018 11:49:39 +0100 Subject: drm/i915: Park signaling thread while wrapping the seqno A danger encountered when resetting the seqno (using debugfs/i915_next_seqno) is that as we change the breadcrumb stored in the HWSP, it may be inspected by the signaler thread leading to confusion in our sanity checks. <0> [136.331342] i915/sig-347 3..s1 136336154us : execlists_submission_tasklet: rcs0 awake?=1, active=5 <0> [136.331373] i915/sig-347 3d.s2 136336155us : process_csb: rcs0 cs-irq head=5, tail=0 <0> [136.331402] i915/sig-347 3d.s2 136336155us : process_csb: rcs0 csb[0]: status=0x00000018:0x00000002, active=0x5 <0> [136.331434] i915/sig-347 3d.s2 136336156us : process_csb: rcs0 out[0]: ctx=2.1, global=219 (fence 46:8455) (current 219), prio=0 <0> [136.331466] i915/sig-347 3d.s2 136336156us : process_csb: rcs0 completed ctx=2 <0> [136.332027] gem_exec-1049 0.... 136336246us : reset_all_global_seqno.part.5: rcs0 seqno 219 (current 219) -> -43 <0> [136.332056] gem_exec-1049 0.... 136336251us : reset_all_global_seqno.part.5: bcs0 seqno 183 (current 183) -> -43 <0> [136.332085] gem_exec-1049 0.... 136336255us : reset_all_global_seqno.part.5: vcs0 seqno 191 (current 191) -> -43 <0> [136.332114] gem_exec-1049 0.... 136336259us : reset_all_global_seqno.part.5: vcs1 seqno 180 (current 180) -> -43 <0> [136.332143] gem_exec-1049 0.... 136336262us : reset_all_global_seqno.part.5: vecs0 seqno 212 (current 212) -> -43 <0> [136.332174] i915/sig-347 3.... 136336280us : intel_breadcrumbs_signaler: intel_breadcrumbs_signaler:673 GEM_BUG_ON(!i915_request_completed(rq)) Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20181024104939.2861-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_request.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 28819f8c4da6..71107540581d 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -136,6 +136,8 @@ static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno) intel_engine_get_seqno(engine), seqno); + kthread_park(engine->breadcrumbs.signaler); + if (!i915_seqno_passed(seqno, engine->timeline.seqno)) { /* Flush any waiters before we reuse the seqno */ intel_engine_disarm_breadcrumbs(engine); @@ -150,6 +152,8 @@ static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno) /* Finally reset hw state */ intel_engine_init_global_seqno(engine, seqno); engine->timeline.seqno = seqno; + + kthread_unpark(engine->breadcrumbs.signaler); } list_for_each_entry(timeline, &i915->gt.timelines, link) -- cgit