[PATCH 3/3] drm/radeon: Provide a wait for CS completion interface

Simon Farnsworth simon.farnsworth at onelan.co.uk
Fri Feb 3 08:30:29 PST 2012


Provide a mechanism for userspace to wait for kernel fences, and teach the
CS ioctl to return enough information to let userspace use this mechanism.

This mechanism isn't safe to include as-is, as there is no way for the CS
ioctl to indicate that it hasn't given you the information needed to let you
wait for a kernel fence. I'm thus sending it out as a starting point in case
someone else wants to continue digging into this.

Signed-off-by: Simon Farnsworth <simon.farnsworth at onelan.co.uk>
---
Note the FIXME - how does the CS ioctl indicate a fence failure?

I'm also a little concerned that my general userspace API isn't flexible
enough to match what we actually want - in particular, it would be nice for
GL_ARB_sync to be able to plonk fences down scattered throughout the IB, and
wait for that fence, not the whole CS. I couldn't easily determine whether
it was safe to add a kernel fence to a userspace IB during parsing, though,
so didn't attempt to do that.

Finally, this mechanism (as compared to the wait mechanism I proposed in
[PATCH] drm/radeon: Add support for userspace fence waits), doesn't do
anything to protect us against malicious userspace causing interrupt storms
- userspace can still fill an IB with EVENT_WRITE_EOP packets requesting an
interrupt, and this mechanism allows userspace to ensure that the interrupt
is enabled.

Someone needs to decide if that's a risk worth worrying about - if so, the
CS checkers need to reject attempts by userspace to request interrupts on
completion.

 drivers/gpu/drm/radeon/radeon.h       |    3 +++
 drivers/gpu/drm/radeon/radeon_cs.c    |   25 +++++++++++++++++++++++++
 drivers/gpu/drm/radeon/radeon_fence.c |   16 ++++++++++++++++
 drivers/gpu/drm/radeon/radeon_kms.c   |    1 +
 include/drm/radeon_drm.h              |   15 +++++++++++++++
 5 files changed, 60 insertions(+), 0 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index b4cfb11..f26744a 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -800,6 +800,7 @@ struct radeon_cs_parser {
 	int			chunk_ib_idx;
 	int			chunk_relocs_idx;
 	int			chunk_flags_idx;
+	int			chunk_fence_idx;
 	struct radeon_ib	*ib;
 	void			*track;
 	unsigned		family;
@@ -1348,6 +1349,8 @@ int radeon_gem_set_tiling_ioctl(struct drm_device *dev, void *data,
 				struct drm_file *filp);
 int radeon_gem_get_tiling_ioctl(struct drm_device *dev, void *data,
 				struct drm_file *filp);
+int radeon_fence_wait_value_ioctl(struct drm_device *dev, void *data,
+				  struct drm_file *filp);
 
 /* VRAM scratch page for HDP bug, default vram page */
 struct r600_vram_scratch {
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
index 435a3d9..e0e8165 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -163,6 +163,7 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
 	p->chunk_ib_idx = -1;
 	p->chunk_relocs_idx = -1;
 	p->chunk_flags_idx = -1;
+	p->chunk_fence_idx = -1;
 	p->chunks_array = kcalloc(cs->num_chunks, sizeof(uint64_t), GFP_KERNEL);
 	if (p->chunks_array == NULL) {
 		return -ENOMEM;
@@ -207,6 +208,12 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
 			if (p->chunks[i].length_dw == 0)
 				return -EINVAL;
 		}
+		if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_FENCE) {
+			p->chunk_fence_idx = i;
+			/* A fence chunk must be able to store the end-of-stream fence */
+			if (p->chunks[i].length_dw != (sizeof(struct drm_radeon_fence_wait) / 4))
+				return -EINVAL;
+		}
 
 		p->chunks[i].length_dw = user_chunk.length_dw;
 		p->chunks[i].user_ptr = (void __user *)(unsigned long)user_chunk.chunk_data;
@@ -279,6 +286,24 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
 	return 0;
 }
 
+static void radeon_cs_parser_output_fence(struct radeon_cs_parser *parser)
+{
+	struct drm_radeon_fence_wait output;
+	void __user* chunk;
+
+	memset(&output, 0, sizeof(struct drm_radeon_fence_wait));
+
+	if (parser->chunk_fence_idx != -1) {
+		chunk = (void __user*)(unsigned long)parser->chunks_array[parser->chunk_fence_idx];
+		output.ring_token = parser->ib->fence->ring;
+		output.fence_token = parser->ib->fence->seq;
+		/* FIXME: What do I do about an error here? */
+		DRM_COPY_TO_USER(chunk,
+				 &output,
+				 sizeof(struct drm_radeon_fence_wait));
+	}
+}
+
 /**
  * cs_parser_fini() - clean parser states
  * @parser:	parser structure holding parsing context.
diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c
index 85893c3..50bb054 100644
--- a/drivers/gpu/drm/radeon/radeon_fence.c
+++ b/drivers/gpu/drm/radeon/radeon_fence.c
@@ -356,6 +356,22 @@ int radeon_fence_wait_value(struct radeon_device *rdev, int ring,
 	return r;
 }
 
+int radeon_fence_wait_value_ioctl(struct drm_device *dev, void *data,
+				  struct drm_file *filp)
+{
+	struct radeon_device *rdev = dev->dev_private;
+	struct drm_radeon_fence_wait *args = data;
+	int r;
+
+	r = radeon_fence_wait_value(rdev,
+				    args->ring_token,
+				    args->fence_token,
+				    usecs_to_jiffies(args->timeout_usec));
+	if (r > 0)
+		r = 0;
+
+	return r;
+}
 struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence)
 {
 	kref_get(&fence->kref);
diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c
index d335288..a52db06 100644
--- a/drivers/gpu/drm/radeon/radeon_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_kms.c
@@ -496,5 +496,6 @@ struct drm_ioctl_desc radeon_ioctls_kms[] = {
 	DRM_IOCTL_DEF_DRV(RADEON_GEM_GET_TILING, radeon_gem_get_tiling_ioctl, DRM_AUTH|DRM_UNLOCKED),
 	DRM_IOCTL_DEF_DRV(RADEON_GEM_BUSY, radeon_gem_busy_ioctl, DRM_AUTH|DRM_UNLOCKED),
 	DRM_IOCTL_DEF_DRV(RADEON_GEM_VA, radeon_gem_va_ioctl, DRM_AUTH|DRM_UNLOCKED),
+        DRM_IOCTL_DEF_DRV(RADEON_FENCE_WAIT, radeon_fence_wait_value_ioctl, DRM_AUTH|DRM_UNLOCKED),
 };
 int radeon_max_kms_ioctl = DRM_ARRAY_SIZE(radeon_ioctls_kms);
diff --git a/include/drm/radeon_drm.h b/include/drm/radeon_drm.h
index dd2e9cf..d81ce94 100644
--- a/include/drm/radeon_drm.h
+++ b/include/drm/radeon_drm.h
@@ -510,6 +510,7 @@ typedef struct {
 #define DRM_RADEON_GEM_GET_TILING	0x29
 #define DRM_RADEON_GEM_BUSY		0x2a
 #define DRM_RADEON_GEM_VA		0x2b
+#define DRM_RADEON_FENCE_WAIT		0x2c
 
 #define DRM_IOCTL_RADEON_CP_INIT    DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_CP_INIT, drm_radeon_init_t)
 #define DRM_IOCTL_RADEON_CP_START   DRM_IO(  DRM_COMMAND_BASE + DRM_RADEON_CP_START)
@@ -552,6 +553,7 @@ typedef struct {
 #define DRM_IOCTL_RADEON_GEM_GET_TILING	DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_GET_TILING, struct drm_radeon_gem_get_tiling)
 #define DRM_IOCTL_RADEON_GEM_BUSY	DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_BUSY, struct drm_radeon_gem_busy)
 #define DRM_IOCTL_RADEON_GEM_VA		DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_VA, struct drm_radeon_gem_va)
+#define DRM_IOCTL_RADEON_FENCE_WAIT	DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_FENCE_WAIT, struct drm_radeon_fence_wait)
 
 typedef struct drm_radeon_init {
 	enum {
@@ -906,6 +908,12 @@ struct drm_radeon_gem_va {
 #define RADEON_CHUNK_ID_RELOCS	0x01
 #define RADEON_CHUNK_ID_IB	0x02
 #define RADEON_CHUNK_ID_FLAGS	0x03
+#define RADEON_CHUNK_ID_FENCE   0x04
+
+/* A RADEON_CHUNK_ID_FENCE is a struct drm_radeon_fence_wait. If the CS is
+ * accepted, the kernel will fill in the fence_token and ring_token elements
+ * such that userspace just needs to fill in timeout_usec and call the
+ * DRM_RADEON_FENCE_WAIT ioctl to wait for this CS to complete.  */
 
 /* The first dword of RADEON_CHUNK_ID_FLAGS is a uint32 of these flags: */
 #define RADEON_CS_KEEP_TILING_FLAGS 0x01
@@ -967,4 +975,11 @@ struct drm_radeon_info {
 	uint64_t		value;
 };
 
+struct drm_radeon_fence_wait {
+	uint64_t		fence_token;
+	uint64_t		timeout_usec;
+	uint32_t		ring_token;
+	uint32_t		padding;
+};
+
 #endif
-- 
1.7.6.4



More information about the dri-devel mailing list