[PATCH] drm/radeon: Add support for userspace fence waits

Simon Farnsworth simon.farnsworth at onelan.co.uk
Tue Jan 31 05:18:02 PST 2012


Userspace currently busywaits for fences to complete; on my workload, this
busywait consumes 10% of the available CPU time.

Provide an ioctl so that userspace can wait for an EOP interrupt that
corresponds to a previous EVENT_WRITE_EOP.

This currently doesn't work, hence the debug code piled in.

Signed-off-by: Simon Farnsworth <simon.farnsworth at onelan.co.uk>
---
 drivers/gpu/drm/radeon/evergreen.c     |    8 ++--
 drivers/gpu/drm/radeon/radeon.h        |    3 +
 drivers/gpu/drm/radeon/radeon_device.c |    1 +
 drivers/gpu/drm/radeon/radeon_fence.c  |    3 +
 drivers/gpu/drm/radeon/radeon_gem.c    |   70 ++++++++++++++++++++++++++++++++
 drivers/gpu/drm/radeon/radeon_kms.c    |    1 +
 include/drm/radeon_drm.h               |   28 +++++++++++++
 7 files changed, 110 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index 0c5dd78..5b886b0 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -3083,11 +3083,11 @@ restart_ih:
 		case 176: /* CP_INT in ring buffer */
 		case 177: /* CP_INT in IB1 */
 		case 178: /* CP_INT in IB2 */
-			DRM_DEBUG("IH: CP int: 0x%08x\n", src_data);
+			printk(KERN_INFO "IH: CP int: 0x%08x\n", src_data);
 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
 			break;
 		case 181: /* CP EOP event */
-			DRM_DEBUG("IH: CP EOP\n");
+			printk(KERN_INFO "IH: CP EOP\n");
 			if (rdev->family >= CHIP_CAYMAN) {
 				switch (src_data) {
 				case 0:
@@ -3104,12 +3104,12 @@ restart_ih:
 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
 			break;
 		case 233: /* GUI IDLE */
-			DRM_DEBUG("IH: GUI idle\n");
+			printk(KERN_INFO "IH: GUI idle\n");
 			rdev->pm.gui_idle = true;
 			wake_up(&rdev->irq.idle_queue);
 			break;
 		default:
-			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
+			printk(KERN_INFO "Unhandled interrupt: %d %d\n", src_id, src_data);
 			break;
 		}
 
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 2859406..fb0eafd 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -1348,6 +1348,8 @@ int radeon_gem_set_tiling_ioctl(struct drm_device *dev, void *data,
 				struct drm_file *filp);
 int radeon_gem_get_tiling_ioctl(struct drm_device *dev, void *data,
 				struct drm_file *filp);
+int radeon_gem_wait_user_fence_ioctl(struct drm_device *dev, void *data,
+				     struct drm_file *filp);
 
 /* VRAM scratch page for HDP bug, default vram page */
 struct r600_vram_scratch {
@@ -1444,6 +1446,7 @@ struct radeon_device {
 	struct radeon_mman		mman;
 	rwlock_t			fence_lock;
 	struct radeon_fence_driver	fence_drv[RADEON_NUM_RINGS];
+	wait_queue_head_t		userspace_fence_wait_queue;
 	struct radeon_semaphore_driver	semaphore_drv;
 	struct radeon_ring		ring[RADEON_NUM_RINGS];
 	struct radeon_ib_pool		ib_pool;
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index 0afb13b..dcf11e5 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -730,6 +730,7 @@ int radeon_device_init(struct radeon_device *rdev,
 	mutex_init(&rdev->pm.mutex);
 	mutex_init(&rdev->vram_mutex);
 	rwlock_init(&rdev->fence_lock);
+	init_waitqueue_head(&rdev->userspace_fence_wait_queue);
 	rwlock_init(&rdev->semaphore_drv.lock);
 	INIT_LIST_HEAD(&rdev->gem.objects);
 	init_waitqueue_head(&rdev->irq.vblank_queue);
diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c
index 64ea3dd..5b8270f 100644
--- a/drivers/gpu/drm/radeon/radeon_fence.c
+++ b/drivers/gpu/drm/radeon/radeon_fence.c
@@ -355,7 +355,10 @@ void radeon_fence_process(struct radeon_device *rdev, int ring)
 	write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
 	if (wake) {
 		wake_up_all(&rdev->fence_drv[ring].queue);
+		printk( KERN_INFO "Woke kernel fences\n" );
 	}
+	printk( KERN_INFO "Waking up all waiters\n" );
+	wake_up_interruptible_all(&rdev->userspace_fence_wait_queue);
 }
 
 int radeon_fence_count_emitted(struct radeon_device *rdev, int ring)
diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
index 7337850..6866f75 100644
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -531,3 +531,73 @@ int radeon_mode_dumb_destroy(struct drm_file *file_priv,
 {
 	return drm_gem_handle_delete(file_priv, handle);
 }
+
+int radeon_gem_wait_user_fence_ioctl(struct drm_device *dev, void *data,
+			      struct drm_file *filp)
+{
+	struct drm_radeon_gem_wait_user_fence *args = data;
+	struct radeon_device *rdev = dev->dev_private;
+	struct drm_gem_object *gobj;
+	struct radeon_bo *robj;
+	void *buffer_data;
+	uint32_t *fence_data;
+	int r = 0;
+	long timeout;
+
+	printk( KERN_INFO "wait_user_fence offset %lld value %d timeout %lld\n", args->offset, args->value, args->timeout_usec );
+
+	gobj = drm_gem_object_lookup(dev, filp, args->handle);
+	if (gobj == NULL) {
+		return -ENOENT;
+	}
+	robj = gem_to_radeon_bo(gobj);
+
+	if (gobj->size < args->offset) {
+		printk( KERN_INFO "Offset too large\n" );
+		r = -EINVAL;
+		goto unreference;
+	}
+
+	r = radeon_bo_reserve(robj, true);
+	if (r) {
+		printk( KERN_INFO "Reserve fail\n" );
+		goto unreference;
+	}
+
+	r = radeon_bo_pin(robj, RADEON_GEM_DOMAIN_GTT, NULL);
+	if (r) {
+		printk( KERN_INFO "Pin fail\n" );
+		goto unreserve;
+	}
+
+	r = radeon_bo_kmap(robj, &buffer_data);
+	if (r) {
+		printk( KERN_INFO "kmap fail\n" );
+		goto unpin;
+	}
+
+	fence_data = (uint32_t*)buffer_data;
+
+	printk( KERN_INFO "Current data value %d\n", fence_data[args->offset >> 2] );
+
+	timeout = wait_event_interruptible_timeout(rdev->userspace_fence_wait_queue,
+						   fence_data[args->offset >> 2] != args->value,
+						   usecs_to_jiffies(args->timeout_usec));
+	if (timeout == 0)
+		r = -ETIMEDOUT;
+	else if (timeout < 0)
+		r = timeout;
+
+	printk( KERN_INFO "wait_user_fence offset %lld value %d timeout %lld\n", args->offset, args->value, args->timeout_usec );
+	printk( KERN_INFO "Finished data value %d\n", fence_data[args->offset >> 2] );
+
+	radeon_bo_kunmap(robj);
+unpin:
+	radeon_bo_unpin(robj);
+unreserve:
+	radeon_bo_unreserve(robj);
+unreference:
+	drm_gem_object_unreference_unlocked(gobj);
+
+	return r;
+}
diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c
index d335288..0e552cc 100644
--- a/drivers/gpu/drm/radeon/radeon_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_kms.c
@@ -496,5 +496,6 @@ struct drm_ioctl_desc radeon_ioctls_kms[] = {
 	DRM_IOCTL_DEF_DRV(RADEON_GEM_GET_TILING, radeon_gem_get_tiling_ioctl, DRM_AUTH|DRM_UNLOCKED),
 	DRM_IOCTL_DEF_DRV(RADEON_GEM_BUSY, radeon_gem_busy_ioctl, DRM_AUTH|DRM_UNLOCKED),
 	DRM_IOCTL_DEF_DRV(RADEON_GEM_VA, radeon_gem_va_ioctl, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(RADEON_GEM_WAIT_USER_FENCE, radeon_gem_wait_user_fence_ioctl, DRM_AUTH|DRM_UNLOCKED),
 };
 int radeon_max_kms_ioctl = DRM_ARRAY_SIZE(radeon_ioctls_kms);
diff --git a/include/drm/radeon_drm.h b/include/drm/radeon_drm.h
index dd2e9cf..c261c8c 100644
--- a/include/drm/radeon_drm.h
+++ b/include/drm/radeon_drm.h
@@ -510,6 +510,7 @@ typedef struct {
 #define DRM_RADEON_GEM_GET_TILING	0x29
 #define DRM_RADEON_GEM_BUSY		0x2a
 #define DRM_RADEON_GEM_VA		0x2b
+#define DRM_RADEON_GEM_WAIT_USER_FENCE  0x2c
 
 #define DRM_IOCTL_RADEON_CP_INIT    DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_CP_INIT, drm_radeon_init_t)
 #define DRM_IOCTL_RADEON_CP_START   DRM_IO(  DRM_COMMAND_BASE + DRM_RADEON_CP_START)
@@ -552,6 +553,7 @@ typedef struct {
 #define DRM_IOCTL_RADEON_GEM_GET_TILING	DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_GET_TILING, struct drm_radeon_gem_get_tiling)
 #define DRM_IOCTL_RADEON_GEM_BUSY	DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_BUSY, struct drm_radeon_gem_busy)
 #define DRM_IOCTL_RADEON_GEM_VA		DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_VA, struct drm_radeon_gem_va)
+#define DRM_IOCTL_RADEON_GEM_WAIT_USER_FENCE   DRM_IOW(DRM_COMMAND_BASE + DRM_RADEON_GEM_WAIT_USER_FENCE, struct drm_radeon_gem_wait_user_fence)
 
 typedef struct drm_radeon_init {
 	enum {
@@ -967,4 +969,30 @@ struct drm_radeon_info {
 	uint64_t		value;
 };
 
+/**
+ * struct drm_radeon_gem_wait_user_fence - DRM_RADEON_GEM_WAIT_USER_FENCE ioctl param
+ *
+ * @handle: Handle for the object that the GPU is expected to write
+ * @offset: Offset (in bytes) within that object where the GPU is expected
+ *          to write. Must be DWORD-aligned
+ * @value: The value expected if the GPU has not yet written to this location
+ * @timeout_usec: The maximum time to wait for the GPU, in microseconds
+ *
+ * The DRM_RADEON_GEM_WAIT_USER_FENCE ioctl is meant to allow userspace to
+ * avoid busy-waiting for a EVENT_WRITE_EOP packet to complete (e.g. for
+ * fence sync objects in OpenGL). It expects the EVENT_WRITE_EOP packet to
+ * have requested an interrupt on completion.
+ *
+ * The ioctl will return immediately if the value supplied is not the value
+ * found in the buffer at offset bytes in; otherwise, it will sleep for up
+ * to timeout_usec, waking up when an EVENT_WRITE_EOP packet causes an
+ * interrupt and the value in the buffer might have changed.
+ */
+struct drm_radeon_gem_wait_user_fence {
+	uint32_t                handle;
+	uint64_t                offset;
+	uint32_t                value;
+	uint64_t                timeout_usec;
+};
+
 #endif
-- 
1.7.6.4



More information about the dri-devel mailing list