[PATCH 09/17] drm/msm: Implement per-submitqueue fences

Thu Jul 27 16:42:38 UTC 2017

Currently we use a single fence context for the entire GPU. That
works until we start dealing with multiple ringbuffers that will
all need their own sequence number.

So in order for the user to continue using the fence functions
we will need some way to uniquely identify a submission that
gets mapped into the appropriate ringbuffer and sequence ID.

The best way to do this is to move to per-submitqueue fences.
This allows each user context to have their own monotonically
increasing identifier and we can easily map back and forth.

To implement this, create a fence context for each submitqueue
and assign user fences from that context.  Each submission will
have be assigned a submitqueue fence and a unique sequence ID for
the target ringbuffer. When the submission is retired on the ring
the fence will get signaled.

struct drm_msm_wait_fence in the UABI needs to be extended to include
the queue ID for the fence being queried. Legacy applications that
don't use submitqueues will use queue 0 by default.

Signed-off-by: Jordan Crouse <jcrouse at codeaurora.org>
---
 drivers/gpu/drm/msm/adreno/a5xx_gpu.c   |  4 ++--
 drivers/gpu/drm/msm/adreno/adreno_gpu.c | 10 ++++-----
 drivers/gpu/drm/msm/msm_drv.c           | 19 ++++++++++++-----
 drivers/gpu/drm/msm/msm_drv.h           |  6 +++---
 drivers/gpu/drm/msm/msm_fence.c         |  2 +-
 drivers/gpu/drm/msm/msm_fence.h         |  2 +-
 drivers/gpu/drm/msm/msm_gem.h           |  1 +
 drivers/gpu/drm/msm/msm_gem_submit.c    |  7 ++++---
 drivers/gpu/drm/msm/msm_gpu.c           | 36 +++++++++++++++++++--------------
 drivers/gpu/drm/msm/msm_gpu.h           |  8 +++++---
 drivers/gpu/drm/msm/msm_submitqueue.c   | 33 ++++++++++++++++++++++++------
 include/uapi/drm/msm_drm.h              |  1 +
 12 files changed, 85 insertions(+), 44 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
index 6361193..3acbba1 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
@@ -106,13 +106,13 @@ static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
 	}
 
 	OUT_PKT4(ring, REG_A5XX_CP_SCRATCH_REG(2), 1);
-	OUT_RING(ring, submit->fence->seqno);
+	OUT_RING(ring, submit->seqno);
 
 	OUT_PKT7(ring, CP_EVENT_WRITE, 4);
 	OUT_RING(ring, CACHE_FLUSH_TS | (1 << 31));
 	OUT_RING(ring, lower_32_bits(rbmemptr(adreno_gpu, fence)));
 	OUT_RING(ring, upper_32_bits(rbmemptr(adreno_gpu, fence)));
-	OUT_RING(ring, submit->fence->seqno);
+	OUT_RING(ring, submit->seqno);
 
 	gpu->funcs->flush(gpu);
 }
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index f1ab270..b1c1639 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -75,7 +75,7 @@ int adreno_hw_init(struct msm_gpu *gpu)
 	gpu->rb->cur = gpu->rb->start;
 
 	/* reset completed fence seqno: */
-	adreno_gpu->memptrs->fence = gpu->fctx->completed_fence;
+	adreno_gpu->memptrs->fence = gpu->seqno;
 	adreno_gpu->memptrs->rptr  = 0;
 
 	/* Setup REG_CP_RB_CNTL: */
@@ -165,7 +165,7 @@ void adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
 	}
 
 	OUT_PKT0(ring, REG_AXXX_CP_SCRATCH_REG2, 1);
-	OUT_RING(ring, submit->fence->seqno);
+	OUT_RING(ring, submit->seqno);
 
 	if (adreno_is_a3xx(adreno_gpu) || adreno_is_a4xx(adreno_gpu)) {
 		/* Flush HLSQ lazy updates to make sure there is nothing
@@ -182,7 +182,7 @@ void adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
 	OUT_PKT3(ring, CP_EVENT_WRITE, 3);
 	OUT_RING(ring, CACHE_FLUSH_TS);
 	OUT_RING(ring, rbmemptr(adreno_gpu, fence));
-	OUT_RING(ring, submit->fence->seqno);
+	OUT_RING(ring, submit->seqno);
 
 	/* we could maybe be clever and only CP_COND_EXEC the interrupt: */
 	OUT_PKT3(ring, CP_INTERRUPT, 1);
@@ -255,7 +255,7 @@ void adreno_show(struct msm_gpu *gpu, struct seq_file *m)
 			adreno_gpu->rev.patchid);
 
 	seq_printf(m, "fence:    %d/%d\n", adreno_gpu->memptrs->fence,
-			gpu->fctx->last_fence);
+			gpu->seqno);
 	seq_printf(m, "rptr:     %d\n", get_rptr(adreno_gpu));
 	seq_printf(m, "rb wptr:  %d\n", get_wptr(gpu->rb));
 
@@ -290,7 +290,7 @@ void adreno_dump_info(struct msm_gpu *gpu)
 			adreno_gpu->rev.patchid);
 
 	printk("fence:    %d/%d\n", adreno_gpu->memptrs->fence,
-			gpu->fctx->last_fence);
+			gpu->seqno);
 	printk("rptr:     %d\n", get_rptr(adreno_gpu));
 	printk("rb wptr:  %d\n", get_wptr(gpu->rb));
 }
diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index f4bb909..9599d02 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -510,7 +510,7 @@ static void load_gpu(struct drm_device *dev)
 	mutex_unlock(&init_lock);
 }
 
-static int context_init(struct drm_file *file)
+static int context_init(struct drm_device *dev, struct drm_file *file)
 {
 	struct msm_file_private *ctx;
 
@@ -518,7 +518,7 @@ static int context_init(struct drm_file *file)
 	if (!ctx)
 		return -ENOMEM;
 
-	msm_submitqueue_init(ctx);
+	msm_submitqueue_init(dev, ctx);
 
 	file->driver_priv = ctx;
 
@@ -532,7 +532,7 @@ static int msm_open(struct drm_device *dev, struct drm_file *file)
 	 */
 	load_gpu(dev);
 
-	return context_init(file);
+	return context_init(dev, file);
 }
 
 static void context_close(struct msm_file_private *ctx)
@@ -746,6 +746,8 @@ static int msm_ioctl_wait_fence(struct drm_device *dev, void *data,
 	struct msm_drm_private *priv = dev->dev_private;
 	struct drm_msm_wait_fence *args = data;
 	ktime_t timeout = to_ktime(args->timeout);
+	struct msm_gpu_submitqueue *queue;
+	int ret;
 
 	if (args->pad) {
 		DRM_ERROR("invalid pad: %08x\n", args->pad);
@@ -755,7 +757,14 @@ static int msm_ioctl_wait_fence(struct drm_device *dev, void *data,
 	if (!priv->gpu)
 		return 0;
 
-	return msm_wait_fence(priv->gpu->fctx, args->fence, &timeout, true);
+	queue = msm_submitqueue_get(file->driver_priv, args->queueid);
+	if (!queue)
+		return -ENOENT;
+
+	ret = msm_wait_fence(queue->fctx, args->fence, &timeout, true);
+
+	msm_submitqueue_put(queue);
+	return ret;
 }
 
 static int msm_ioctl_gem_madvise(struct drm_device *dev, void *data,
@@ -805,7 +814,7 @@ static int msm_ioctl_submitqueue_new(struct drm_device *dev, void *data,
 	if (args->flags & ~MSM_SUBMITQUEUE_FLAGS)
 		return -EINVAL;
 
-	return msm_submitqueue_create(file->driver_priv, args->prio,
+	return msm_submitqueue_create(dev, file->driver_priv, args->prio,
 		args->flags, &args->id);
 }
 
diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h
index f4d8328..0fa9a7d 100644
--- a/drivers/gpu/drm/msm/msm_drv.h
+++ b/drivers/gpu/drm/msm/msm_drv.h
@@ -314,11 +314,11 @@ void __iomem *msm_ioremap(struct platform_device *pdev, const char *name,
 u32 msm_readl(const void __iomem *addr);
 
 struct msm_gpu_submitqueue;
-int msm_submitqueue_init(struct msm_file_private *ctx);
+int msm_submitqueue_init(struct drm_device *drm, struct msm_file_private *ctx);
 struct msm_gpu_submitqueue *msm_submitqueue_get(struct msm_file_private *ctx,
 		u32 id);
-int msm_submitqueue_create(struct msm_file_private *ctx, u32 prio,
-		u32 flags, u32 *id);
+int msm_submitqueue_create(struct drm_device *drm, struct msm_file_private *ctx,
+		u32 prio, u32 flags, u32 *id);
 int msm_submitqueue_remove(struct msm_file_private *ctx, u32 id);
 void msm_submitqueue_close(struct msm_file_private *ctx);
 
diff --git a/drivers/gpu/drm/msm/msm_fence.c b/drivers/gpu/drm/msm/msm_fence.c
index a2f89ba..349c12f 100644
--- a/drivers/gpu/drm/msm/msm_fence.c
+++ b/drivers/gpu/drm/msm/msm_fence.c
@@ -31,7 +31,7 @@ struct msm_fence_context *
 		return ERR_PTR(-ENOMEM);
 
 	fctx->dev = dev;
-	fctx->name = name;
+	strncpy(fctx->name, name, sizeof(fctx->name));
 	fctx->context = dma_fence_context_alloc(1);
 	init_waitqueue_head(&fctx->event);
 	spin_lock_init(&fctx->spinlock);
diff --git a/drivers/gpu/drm/msm/msm_fence.h b/drivers/gpu/drm/msm/msm_fence.h
index 56061aa..1aa6a4c 100644
--- a/drivers/gpu/drm/msm/msm_fence.h
+++ b/drivers/gpu/drm/msm/msm_fence.h
@@ -22,7 +22,7 @@
 
 struct msm_fence_context {
 	struct drm_device *dev;
-	const char *name;
+	char name[32];
 	unsigned context;
 	/* last_fence == completed_fence --> no pending work */
 	uint32_t last_fence;          /* last assigned fence */
diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h
index 17f8a6c..c98d3a7 100644
--- a/drivers/gpu/drm/msm/msm_gem.h
+++ b/drivers/gpu/drm/msm/msm_gem.h
@@ -141,6 +141,7 @@ struct msm_gem_submit {
 	struct list_head node;   /* node in gpu submit_list */
 	struct list_head bo_list;
 	struct ww_acquire_ctx ticket;
+	uint32_t seqno;		/* Sequence number of the submit on the ring */
 	struct dma_fence *fence;
 	struct msm_gpu_submitqueue *queue;
 	struct pid *pid;    /* submitting process */
diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
index eb0b046..ac95816 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -233,7 +233,8 @@ static int submit_fence_sync(struct msm_gem_submit *submit)
 		struct msm_gem_object *msm_obj = submit->bos[i].obj;
 		bool write = submit->bos[i].flags & MSM_SUBMIT_BO_WRITE;
 
-		ret = msm_gem_sync_object(&msm_obj->base, submit->gpu->fctx, write);
+		ret = msm_gem_sync_object(&msm_obj->base, submit->queue->fctx,
+			write);
 		if (ret)
 			break;
 	}
@@ -426,7 +427,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
 		 * Wait if the fence is from a foreign context, or if the fence
 		 * array contains any fence from a foreign context.
 		 */
-		if (!dma_fence_match_context(in_fence, gpu->fctx->context)) {
+		if (!dma_fence_match_context(in_fence, queue->fctx->context)) {
 			ret = dma_fence_wait(in_fence, true);
 			if (ret)
 				return ret;
@@ -531,7 +532,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
 
 	submit->nr_cmds = i;
 
-	submit->fence = msm_fence_alloc(gpu->fctx);
+	submit->fence = msm_fence_alloc(queue->fctx);
 	if (IS_ERR(submit->fence)) {
 		ret = PTR_ERR(submit->fence);
 		submit->fence = NULL;
diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
index 9f3dbc2..69bd60e 100644
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -221,6 +221,19 @@ int msm_gpu_hw_init(struct msm_gpu *gpu)
  * Hangcheck detection for locked gpu:
  */
 
+static void update_fences(struct msm_gpu *gpu, uint32_t fence)
+{
+	struct msm_gem_submit *submit;
+
+	list_for_each_entry(submit, &gpu->submit_list, node) {
+		if (submit->seqno > fence)
+			break;
+
+		msm_update_fence(submit->queue->fctx,
+			submit->fence->seqno);
+	}
+}
+
 static void retire_submits(struct msm_gpu *gpu);
 
 static void recover_worker(struct work_struct *work)
@@ -230,13 +243,13 @@ static void recover_worker(struct work_struct *work)
 	struct msm_gem_submit *submit;
 	uint32_t fence = gpu->funcs->last_fence(gpu);
 
-	msm_update_fence(gpu->fctx, fence + 1);
+	update_fences(gpu, fence + 1);
 
 	mutex_lock(&dev->struct_mutex);
 
 	dev_err(dev->dev, "%s: hangcheck recover!\n", gpu->name);
 	list_for_each_entry(submit, &gpu->submit_list, node) {
-		if (submit->fence->seqno == (fence + 1)) {
+		if (submit->seqno == (fence + 1)) {
 			struct task_struct *task;
 
 			rcu_read_lock();
@@ -286,7 +299,7 @@ static void hangcheck_handler(unsigned long data)
 	if (fence != gpu->hangcheck_fence) {
 		/* some progress has been made.. ya! */
 		gpu->hangcheck_fence = fence;
-	} else if (fence < gpu->fctx->last_fence) {
+	} else if (fence < gpu->seqno) {
 		/* no progress and not done.. hung! */
 		gpu->hangcheck_fence = fence;
 		dev_err(dev->dev, "%s: hangcheck detected gpu lockup!\n",
@@ -294,12 +307,12 @@ static void hangcheck_handler(unsigned long data)
 		dev_err(dev->dev, "%s:     completed fence: %u\n",
 				gpu->name, fence);
 		dev_err(dev->dev, "%s:     submitted fence: %u\n",
-				gpu->name, gpu->fctx->last_fence);
+				gpu->name, gpu->seqno);
 		queue_work(priv->wq, &gpu->recover_work);
 	}
 
 	/* if still more pending work, reset the hangcheck timer: */
-	if (gpu->fctx->last_fence > gpu->hangcheck_fence)
+	if (gpu->seqno > gpu->hangcheck_fence)
 		hangcheck_timer_reset(gpu);
 
 	/* workaround for missing irq: */
@@ -451,7 +464,7 @@ static void retire_worker(struct work_struct *work)
 	struct drm_device *dev = gpu->dev;
 	uint32_t fence = gpu->funcs->last_fence(gpu);
 
-	msm_update_fence(gpu->fctx, fence);
+	update_fences(gpu, fence);
 
 	mutex_lock(&dev->struct_mutex);
 	retire_submits(gpu);
@@ -480,6 +493,8 @@ void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
 
 	msm_gpu_hw_init(gpu);
 
+	submit->seqno = ++gpu->seqno;
+
 	list_add_tail(&submit->node, &gpu->submit_list);
 
 	msm_rd_dump_submit(submit);
@@ -575,12 +590,6 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev,
 	gpu->dev = drm;
 	gpu->funcs = funcs;
 	gpu->name = name;
-	gpu->fctx = msm_fence_context_alloc(drm, name);
-	if (IS_ERR(gpu->fctx)) {
-		ret = PTR_ERR(gpu->fctx);
-		gpu->fctx = NULL;
-		goto fail;
-	}
 
 	INIT_LIST_HEAD(&gpu->active_list);
 	INIT_WORK(&gpu->retire_work, retire_worker);
@@ -693,7 +702,4 @@ void msm_gpu_cleanup(struct msm_gpu *gpu)
 			msm_gem_put_iova(gpu->rb->bo, gpu->aspace);
 		msm_ringbuffer_destroy(gpu->rb);
 	}
-
-	if (gpu->fctx)
-		msm_fence_context_free(gpu->fctx);
 }
diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
index a890176..d8485ec 100644
--- a/drivers/gpu/drm/msm/msm_gpu.h
+++ b/drivers/gpu/drm/msm/msm_gpu.h
@@ -93,8 +93,8 @@ struct msm_gpu {
 	/* list of GEM active objects: */
 	struct list_head active_list;
 
-	/* fencing: */
-	struct msm_fence_context *fctx;
+	/* The sequence number for ring submissions */
+	uint32_t seqno;
 
 	/* does gpu need hw_init? */
 	bool needs_hw_init;
@@ -134,7 +134,7 @@ struct msm_gpu {
 
 static inline bool msm_gpu_active(struct msm_gpu *gpu)
 {
-	return gpu->fctx->last_fence > gpu->funcs->last_fence(gpu);
+	return gpu->seqno > gpu->funcs->last_fence(gpu);
 }
 
 /* Perf-Counters:
@@ -157,6 +157,8 @@ struct msm_gpu_submitqueue {
 	int faults;
 	struct list_head node;
 	struct kref ref;
+
+	struct msm_fence_context *fctx;
 };
 
 static inline void gpu_write(struct msm_gpu *gpu, u32 reg, u32 data)
diff --git a/drivers/gpu/drm/msm/msm_submitqueue.c b/drivers/gpu/drm/msm/msm_submitqueue.c
index a545dc6..3afb086 100644
--- a/drivers/gpu/drm/msm/msm_submitqueue.c
+++ b/drivers/gpu/drm/msm/msm_submitqueue.c
@@ -19,6 +19,7 @@ void msm_submitqueue_destroy(struct kref *kref)
 	struct msm_gpu_submitqueue *queue = container_of(kref,
 		struct msm_gpu_submitqueue, ref);
 
+	msm_fence_context_free(queue->fctx);
 	kfree(queue);
 }
 
@@ -60,16 +61,17 @@ void msm_submitqueue_close(struct msm_file_private *ctx)
 		msm_submitqueue_put(entry);
 }
 
-int msm_submitqueue_create(struct msm_file_private *ctx, u32 prio, u32 flags,
-		u32 *id)
+int msm_submitqueue_create(struct drm_device *drm, struct msm_file_private *ctx,
+		u32 prio, u32 flags, u32 *id)
 {
 	struct msm_gpu_submitqueue *queue;
+	char name[32];
+	int ret = 0;
 
 	if (!ctx)
 		return -ENODEV;
 
 	queue = kzalloc(sizeof(*queue), GFP_KERNEL);
-
 	if (!queue)
 		return -ENOMEM;
 
@@ -86,12 +88,31 @@ int msm_submitqueue_create(struct msm_file_private *ctx, u32 prio, u32 flags,
 
 	list_add_tail(&queue->node, &ctx->submitqueues);
 
+	/*
+	 * Get another reference to the queue before releasing the lock
+	 * so it does't get destroyed while we are still setting it up
+	 */
+	kref_get(&queue->ref);
+
 	write_unlock(&ctx->queuelock);
 
-	return 0;
+	/* FIXME: What should the name be? */
+	sprintf(name, "gpu-queue-%d", queue->id);
+
+	/* Allocate a fence domain for the queue */
+
+	queue->fctx = msm_fence_context_alloc(drm, name);
+	if (IS_ERR(queue->fctx)) {
+		ret = PTR_ERR(queue->fctx);
+		msm_submitqueue_put(queue);
+	}
+
+	msm_submitqueue_put(queue);
+
+	return ret;
 }
 
-int msm_submitqueue_init(struct msm_file_private *ctx)
+int msm_submitqueue_init(struct drm_device *drm, struct msm_file_private *ctx)
 {
 	if (!ctx)
 		return 0;
@@ -104,7 +125,7 @@ int msm_submitqueue_init(struct msm_file_private *ctx)
 	 * Add the "default" submitqueue with id 0
 	 * "medium" priority (3) and no flags
 	 */
-	return msm_submitqueue_create(ctx, 3, 0, NULL);
+	return msm_submitqueue_create(drm, ctx, 3, 0, NULL);
 }
 
 int msm_submitqueue_remove(struct msm_file_private *ctx, u32 id)
diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h
index 99826bd..a06bf97 100644
--- a/include/uapi/drm/msm_drm.h
+++ b/include/uapi/drm/msm_drm.h
@@ -232,6 +232,7 @@ struct drm_msm_wait_fence {
 	__u32 fence;          /* in */
 	__u32 pad;
 	struct drm_msm_timespec timeout;   /* in */
+	__u32 queueid;		/* in, submitqueue id that owns the fence */
 };
 
 /* madvise provides a way to tell the kernel in case a buffers contents
-- 
1.9.1