From 654e9c18dfab02c8e5f9c5877c7a2f3264fa520a Mon Sep 17 00:00:00 2001
From: Rob Clark <robdclark@chromium.org>
Date: Sun, 26 Sep 2021 11:56:58 -0700
Subject: drm/msm: Fix crash on dev file close

If the device file was opened prior to fw being available (such as from
initrd before rootfs is mounted, when the initrd does not contain GPU
fw), that would cause a later crash when the dev file is closed due to
unitialized submitqueues list:

   CPU: 4 PID: 263 Comm: plymouthd Tainted: G        W         5.15.0-rc2-next-20210924 #2
   Hardware name: LENOVO 81JL/LNVNB161216, BIOS 9UCN33WW(V2.06) 06/ 4/2019
   pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
   pc : msm_submitqueue_close+0x30/0x190 [msm]
   lr : msm_postclose+0x54/0xf0 [msm]
   sp : ffff80001074bb80
   x29: ffff80001074bb80 x28: ffff03ad80c4db80 x27: ffff03ad80dc5ab0
   x26: 0000000000000000 x25: ffff03ad80dc5af8 x24: ffff03ad81e90800
   x23: 0000000000000000 x22: ffff03ad81e90800 x21: ffff03ad8b35e788
   x20: ffff03ad81e90878 x19: 0000000000000000 x18: 0000000000000000
   x17: 0000000000000000 x16: ffffda15f14f7940 x15: 0000000000000000
   x14: 0000000000000000 x13: 0000000000000001 x12: 0000000000000040
   x11: 0000000000000000 x10: 0000000000000000 x9 : ffffda15cd18ff88
   x8 : ffff03ad80c4db80 x7 : 0000000000000228 x6 : 0000000000000000
   x5 : 1793a4e807e636bd x4 : ffff03ad80c4db80 x3 : ffff03ad81e90878
   x2 : 0000000000000000 x1 : ffff03ad80c4db80 x0 : 0000000000000000
   Call trace:
    msm_submitqueue_close+0x30/0x190 [msm]
    msm_postclose+0x54/0xf0 [msm]
    drm_file_free.part.0+0x1cc/0x2e0 [drm]
    drm_close_helper.isra.0+0x74/0x84 [drm]
    drm_release+0x78/0x120 [drm]
    __fput+0x78/0x23c
    ____fput+0x1c/0x30
    task_work_run+0xcc/0x22c
    do_exit+0x304/0x9f4
    do_group_exit+0x44/0xb0
    __wake_up_parent+0x0/0x3c
    invoke_syscall+0x50/0x120
    el0_svc_common.constprop.0+0x4c/0xf4
    do_el0_svc+0x30/0x9c
    el0_svc+0x20/0x60
    el0t_64_sync_handler+0xe8/0xf0
    el0t_64_sync+0x1a0/0x1a4
   Code: aa0003f5 a90153f3 f8408eb3 aa1303e0 (f85e8674)
   ---[ end trace 39b2fa37509a2be2 ]---
   Fixing recursive fault but reboot is needed!

Fixes: 86c2a0f000c1 drm/msm: ("Small submitqueue creation cleanup")
Reported-by: Steev Klimaszewski <steev@kali.org>
Signed-off-by: Rob Clark <robdclark@chromium.org>
---
 drivers/gpu/drm/msm/msm_submitqueue.c | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'drivers/gpu/drm/msm/msm_submitqueue.c')

diff --git a/drivers/gpu/drm/msm/msm_submitqueue.c b/drivers/gpu/drm/msm/msm_submitqueue.c
index 32a55d81b58b..7ce0771b5582 100644
--- a/drivers/gpu/drm/msm/msm_submitqueue.c
+++ b/drivers/gpu/drm/msm/msm_submitqueue.c
@@ -140,10 +140,6 @@ int msm_submitqueue_init(struct drm_device *drm, struct msm_file_private *ctx)
 	 */
 	default_prio = DIV_ROUND_UP(max_priority, 2);
 
-	INIT_LIST_HEAD(&ctx->submitqueues);
-
-	rwlock_init(&ctx->queuelock);
-
 	return msm_submitqueue_create(drm, ctx, default_prio, 0, NULL);
 }
 
-- 
cgit 


From 68002469e571ae3db095e4ade1cfef64903f8fa1 Mon Sep 17 00:00:00 2001
From: Rob Clark <robdclark@chromium.org>
Date: Fri, 1 Oct 2021 09:42:05 -0700
Subject: drm/msm: One sched entity per process per priority

Some userspace apps make assumptions that rendering against multiple
contexts within the same process (from the same thread, with appropriate
MakeCurrent() calls) provides sufficient synchronization without any
external synchronization (ie. glFenceSync()/glWaitSync()).  Since a
submitqueue maps to a gl/vk context, having multiple sched entities of
the same priority only works with implicit sync enabled.

To fix this, limit things to a single sched entity per priority level
per process.

An alternative would be sharing submitqueues between contexts in
userspace, but tracking of per-context faults (ie. GL_EXT_robustness)
is already done at the submitqueue level, so this is not an option.

Signed-off-by: Rob Clark <robdclark@chromium.org>
---
 drivers/gpu/drm/msm/msm_submitqueue.c | 68 +++++++++++++++++++++++++++++------
 1 file changed, 58 insertions(+), 10 deletions(-)

(limited to 'drivers/gpu/drm/msm/msm_submitqueue.c')

diff --git a/drivers/gpu/drm/msm/msm_submitqueue.c b/drivers/gpu/drm/msm/msm_submitqueue.c
index 7ce0771b5582..b8621c6e0554 100644
--- a/drivers/gpu/drm/msm/msm_submitqueue.c
+++ b/drivers/gpu/drm/msm/msm_submitqueue.c
@@ -7,6 +7,24 @@
 
 #include "msm_gpu.h"
 
+void __msm_file_private_destroy(struct kref *kref)
+{
+	struct msm_file_private *ctx = container_of(kref,
+		struct msm_file_private, ref);
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(ctx->entities); i++) {
+		if (!ctx->entities[i])
+			continue;
+
+		drm_sched_entity_destroy(ctx->entities[i]);
+		kfree(ctx->entities[i]);
+	}
+
+	msm_gem_address_space_put(ctx->aspace);
+	kfree(ctx);
+}
+
 void msm_submitqueue_destroy(struct kref *kref)
 {
 	struct msm_gpu_submitqueue *queue = container_of(kref,
@@ -14,8 +32,6 @@ void msm_submitqueue_destroy(struct kref *kref)
 
 	idr_destroy(&queue->fence_idr);
 
-	drm_sched_entity_destroy(&queue->entity);
-
 	msm_file_private_put(queue->ctx);
 
 	kfree(queue);
@@ -61,13 +77,47 @@ void msm_submitqueue_close(struct msm_file_private *ctx)
 	}
 }
 
+static struct drm_sched_entity *
+get_sched_entity(struct msm_file_private *ctx, struct msm_ringbuffer *ring,
+		 unsigned ring_nr, enum drm_sched_priority sched_prio)
+{
+	static DEFINE_MUTEX(entity_lock);
+	unsigned idx = (ring_nr * NR_SCHED_PRIORITIES) + sched_prio;
+
+	/* We should have already validated that the requested priority is
+	 * valid by the time we get here.
+	 */
+	if (WARN_ON(idx >= ARRAY_SIZE(ctx->entities)))
+		return ERR_PTR(-EINVAL);
+
+	mutex_lock(&entity_lock);
+
+	if (!ctx->entities[idx]) {
+		struct drm_sched_entity *entity;
+		struct drm_gpu_scheduler *sched = &ring->sched;
+		int ret;
+
+		entity = kzalloc(sizeof(*ctx->entities[idx]), GFP_KERNEL);
+
+		ret = drm_sched_entity_init(entity, sched_prio, &sched, 1, NULL);
+		if (ret) {
+			kfree(entity);
+			return ERR_PTR(ret);
+		}
+
+		ctx->entities[idx] = entity;
+	}
+
+	mutex_unlock(&entity_lock);
+
+	return ctx->entities[idx];
+}
+
 int msm_submitqueue_create(struct drm_device *drm, struct msm_file_private *ctx,
 		u32 prio, u32 flags, u32 *id)
 {
 	struct msm_drm_private *priv = drm->dev_private;
 	struct msm_gpu_submitqueue *queue;
-	struct msm_ringbuffer *ring;
-	struct drm_gpu_scheduler *sched;
 	enum drm_sched_priority sched_prio;
 	unsigned ring_nr;
 	int ret;
@@ -91,12 +141,10 @@ int msm_submitqueue_create(struct drm_device *drm, struct msm_file_private *ctx,
 	queue->flags = flags;
 	queue->ring_nr = ring_nr;
 
-	ring = priv->gpu->rb[ring_nr];
-	sched = &ring->sched;
-
-	ret = drm_sched_entity_init(&queue->entity,
-			sched_prio, &sched, 1, NULL);
-	if (ret) {
+	queue->entity = get_sched_entity(ctx, priv->gpu->rb[ring_nr],
+					 ring_nr, sched_prio);
+	if (IS_ERR(queue->entity)) {
+		ret = PTR_ERR(queue->entity);
 		kfree(queue);
 		return ret;
 	}
-- 
cgit