[PATCH 2/3] drm/radeon: add command submission IDs

Thu Sep 18 08:30:12 PDT 2014

From: Christian König <christian.koenig at amd.com>

This patch adds a new 64bit ID as a result to each command submission.

Signed-off-by: Christian König <christian.koenig at amd.com>
---
 drivers/gpu/drm/radeon/Makefile     |   2 +-
 drivers/gpu/drm/radeon/radeon.h     |  13 +-
 drivers/gpu/drm/radeon/radeon_cs.c  |  13 ++
 drivers/gpu/drm/radeon/radeon_kms.c |  41 +++----
 drivers/gpu/drm/radeon/radeon_seq.c | 229 ++++++++++++++++++++++++++++++++++++
 include/uapi/drm/radeon_drm.h       |   1 +
 6 files changed, 277 insertions(+), 22 deletions(-)
 create mode 100644 drivers/gpu/drm/radeon/radeon_seq.c

diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile
index 8cc9f68..21ee928 100644
--- a/drivers/gpu/drm/radeon/Makefile
+++ b/drivers/gpu/drm/radeon/Makefile
@@ -81,7 +81,7 @@ radeon-y += radeon_device.o radeon_asic.o radeon_kms.o \
 	rv770_smc.o cypress_dpm.o btc_dpm.o sumo_dpm.o sumo_smc.o trinity_dpm.o \
 	trinity_smc.o ni_dpm.o si_smc.o si_dpm.o kv_smc.o kv_dpm.o ci_smc.o \
 	ci_dpm.o dce6_afmt.o radeon_vm.o radeon_ucode.o radeon_ib.o radeon_mn.o \
-	radeon_sync.o
+	radeon_sync.o radeon_seq.o
 
 # add async DMA block
 radeon-y += \
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index b3b4e96..dbfd346 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -423,6 +423,15 @@ static inline bool radeon_fence_is_earlier(struct radeon_fence *a,
 }
 
 /*
+ * Userspace command submission identifier generation
+ */
+struct radeon_seq;
+
+uint64_t radeon_seq_push(struct radeon_seq **seq, struct radeon_fence *fence);
+struct radeon_fence *radeon_seq_query(struct radeon_seq *seq, uint64_t id);
+void radeon_seq_destroy(struct radeon_seq **seq);
+
+/*
  * Tiling registers
  */
 struct radeon_surface_reg {
@@ -946,7 +955,9 @@ struct radeon_vm_manager {
  * file private structure
  */
 struct radeon_fpriv {
-	struct radeon_vm		vm;
+	struct radeon_vm	vm;
+	struct mutex		seq_lock;
+	struct radeon_seq	*seq;
 };
 
 /*
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
index b8f20a5..0c0f0b3 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -413,6 +413,19 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error, bo
 	unsigned i;
 
 	if (!error) {
+		if (parser->chunk_flags &&
+		    parser->chunk_flags->length_dw > 4) {
+			struct radeon_fpriv *fpriv = parser->filp->driver_priv;
+			uint32_t __user *to = parser->chunk_flags->user_ptr;
+			uint64_t id;
+
+			mutex_lock(&fpriv->seq_lock);
+			id = radeon_seq_push(&fpriv->seq, parser->ib.fence);
+			mutex_unlock(&fpriv->seq_lock);
+
+			copy_to_user(&to[3], &id, sizeof(uint64_t));
+		}
+
 		/* Sort the buffer list from the smallest to largest buffer,
 		 * which affects the order of buffers in the LRU list.
 		 * This assures that the smallest buffers are added first
diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c
index 85ee6f7..38880af 100644
--- a/drivers/gpu/drm/radeon/radeon_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_kms.c
@@ -578,39 +578,34 @@ void radeon_driver_lastclose_kms(struct drm_device *dev)
  */
 int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
 {
+	struct radeon_fpriv *fpriv = kzalloc(sizeof(*fpriv), GFP_KERNEL);
 	struct radeon_device *rdev = dev->dev_private;
 	int r;
 
-	file_priv->driver_priv = NULL;
+	if (unlikely(!fpriv))
+		return -ENOMEM;
+
+	file_priv->driver_priv = fpriv;
 
 	r = pm_runtime_get_sync(dev->dev);
 	if (r < 0)
-		return r;
+		goto error;
 
 	/* new gpu have virtual address space support */
 	if (rdev->family >= CHIP_CAYMAN) {
-		struct radeon_fpriv *fpriv;
 		struct radeon_vm *vm;
 		int r;
 
-		fpriv = kzalloc(sizeof(*fpriv), GFP_KERNEL);
-		if (unlikely(!fpriv)) {
-			return -ENOMEM;
-		}
-
 		vm = &fpriv->vm;
 		r = radeon_vm_init(rdev, vm);
-		if (r) {
-			kfree(fpriv);
-			return r;
-		}
+		if (r)
+			goto error;
 
 		if (rdev->accel_working) {
 			r = radeon_bo_reserve(rdev->ring_tmp_bo.bo, false);
 			if (r) {
 				radeon_vm_fini(rdev, vm);
-				kfree(fpriv);
-				return r;
+				goto error;
 			}
 
 			/* map the ib pool buffer read only into
@@ -623,16 +618,20 @@ int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
 						  RADEON_VM_PAGE_SNOOPED);
 			if (r) {
 				radeon_vm_fini(rdev, vm);
-				kfree(fpriv);
-				return r;
+				goto error;
 			}
 		}
-		file_priv->driver_priv = fpriv;
 	}
 
+	mutex_init(&fpriv->seq_lock);
+
 	pm_runtime_mark_last_busy(dev->dev);
 	pm_runtime_put_autosuspend(dev->dev);
 	return 0;
+
+error:
+	kfree(fpriv);
+	return r;
 }
 
 /**
@@ -646,11 +645,13 @@ int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
 void radeon_driver_postclose_kms(struct drm_device *dev,
 				 struct drm_file *file_priv)
 {
+	struct radeon_fpriv *fpriv = file_priv->driver_priv;
 	struct radeon_device *rdev = dev->dev_private;
 
+	radeon_seq_destroy(&fpriv->seq);
+
 	/* new gpu have virtual address space support */
 	if (rdev->family >= CHIP_CAYMAN && file_priv->driver_priv) {
-		struct radeon_fpriv *fpriv = file_priv->driver_priv;
 		struct radeon_vm *vm = &fpriv->vm;
 		int r;
 
@@ -664,9 +665,9 @@ void radeon_driver_postclose_kms(struct drm_device *dev,
 		}
 
 		radeon_vm_fini(rdev, vm);
-		kfree(fpriv);
-		file_priv->driver_priv = NULL;
 	}
+	kfree(fpriv);
+	file_priv->driver_priv = NULL;
 }
 
 /**
diff --git a/drivers/gpu/drm/radeon/radeon_seq.c b/drivers/gpu/drm/radeon/radeon_seq.c
new file mode 100644
index 0000000..d8857f1
--- /dev/null
+++ b/drivers/gpu/drm/radeon/radeon_seq.c
@@ -0,0 +1,229 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+/*
+ * Authors:
+ *    Christian König <christian.koenig at amd.com>
+ */
+
+#include <drm/drmP.h>
+#include "radeon.h"
+
+/*
+ * ID sequences
+ * This code generates a 64bit identifier for a command submission.
+ * It works by adding the fence of the command submission to a automatically
+ * resizing ring buffer.
+ */
+
+struct radeon_seq {
+	uint64_t		start;
+	uint64_t		end;
+	uint64_t		mask;
+	struct radeon_seq	*replacement;
+};
+
+/**
+ * radeon_seq_create - create a new sequence object
+ *
+ * @start: start value for this sequence
+ * @size: size of the ring buffer, must be power of two
+ *
+ * Allocate and initialize a new ring buffer and header.
+ * Returns NULL if allocation fails, new object otherwise.
+ */
+static struct radeon_seq *radeon_seq_create(uint64_t start, unsigned size)
+{
+	unsigned bytes = sizeof(struct radeon_seq) +
+		size * sizeof(struct radeon_fence *);
+
+	struct radeon_seq *seq;
+
+	seq = kmalloc(bytes, GFP_KERNEL);
+	if (!seq)
+		return NULL;
+
+	seq->start = start;
+	seq->end = start;
+	seq->mask = size - 1;
+	seq->replacement = NULL;
+
+	return seq;
+}
+
+/**
+ * radeon_seq_ring - get pointer to ring buffer
+ *
+ * @seq: sequence object
+ *
+ * Calculate the address of the ring buffer.
+ */
+static struct radeon_fence **radeon_seq_ring(struct radeon_seq *seq)
+{
+	return (struct radeon_fence **)&seq[1];
+}
+
+/**
+ * radeon_seq_try_free - try to free fences from the ring buffer
+ *
+ * @seq: sequence object
+ *
+ * Try to free fences from the start of the ring buffer.
+ */
+static void radeon_seq_try_free(struct radeon_seq *seq)
+{
+	struct radeon_fence **ring = radeon_seq_ring(seq);
+
+	while (seq->start != seq->end) {
+		unsigned idx = seq->start & seq->mask;
+		struct radeon_fence *fence = ring[idx];
+
+		if (!radeon_fence_signaled(fence))
+			break;
+
+		radeon_fence_unref(&fence);
+		++seq->start;
+	}
+}
+
+/**
+ * radeon_seq_add - add new fence to the end of the ring buffer
+ *
+ * @seq: sequence object
+ * @f: the fence object
+ *
+ * Add the fence and return the generated ID.
+ */
+static uint64_t radeon_seq_add(struct radeon_seq *seq, struct radeon_fence *f)
+{
+	struct radeon_fence **ring = radeon_seq_ring(seq);
+
+	ring[seq->end & seq->mask] = radeon_fence_ref(f);
+	return seq->end++;
+}
+
+/**
+ * radeon_seq_push - check for room and add the fence
+ *
+ * @seq: sequence object
+ * @fence: the fence object
+ *
+ * Check for room on the ring buffer, if there isn't enough
+ * reallocate the sequence object and add the fence.
+ * Returns the generated ID.
+ */
+uint64_t radeon_seq_push(struct radeon_seq **seq, struct radeon_fence *fence)
+{
+	unsigned size_for_new_seq = 4;
+	uint64_t start_for_new_seq = 1;
+
+	if (*seq) {
+		/* try to release old replacements */
+		while ((*seq)->replacement) {
+			radeon_seq_try_free(*seq);
+			if ((*seq)->start == (*seq)->end) {
+				struct radeon_seq *repl = (*seq)->replacement;
+
+				kfree(*seq);
+				*seq = repl;
+			} else {
+				/* move on to the current container */
+				seq = &(*seq)->replacement;
+			}
+		}
+
+		/* check if we have enough room for one more fence */
+		radeon_seq_try_free(*seq);
+		if (((*seq)->end - (*seq)->start) <= (*seq)->mask)
+			return radeon_seq_add(*seq, fence);
+
+		/* not enough room, let's allocate a replacement */
+		size_for_new_seq = ((*seq)->mask + 1) * 2;
+		start_for_new_seq = (*seq)->end + 1;
+		seq = &(*seq)->replacement;
+	}
+
+	*seq = radeon_seq_create(start_for_new_seq, size_for_new_seq);
+	if (!*seq) {
+		/* not enough memory for a new sequence object, but failing
+		   here isn't a good idea either cause the commands are already
+		   submitted to the hardware. So just block on the fence. */
+		int r = radeon_fence_wait(fence, false);
+		if (r)
+			DRM_ERROR("Error waiting for fence (%d)\n", r);
+		return 0;
+	}
+	return radeon_seq_add(*seq, fence);
+}
+
+/**
+ * radeon_seq_query - lockup fence by it's ID
+ *
+ * @seq: sequence object
+ * @id: the generated ID
+ *
+ * Lockup the associated fence by it's ID.
+ * Returns fence object or NULL if it couldn't be found.
+ */
+struct radeon_fence *radeon_seq_query(struct radeon_seq *seq, uint64_t id)
+{
+	struct radeon_fence **ring;
+
+	while (seq && id > seq->end)
+		seq = seq->replacement;
+
+	if (!seq || id < seq->start)
+		return NULL;
+
+	ring = radeon_seq_ring(seq);
+	return ring[id & seq->mask];
+}
+
+/**
+ * radeon_seq_destroy - destroy the sequence object
+ *
+ * @seq_ptr: pointer to sequence object
+ *
+ * Destroy the sequence objects and release all fence references taken.
+ */
+void radeon_seq_destroy(struct radeon_seq **seq_ptr)
+{
+	struct radeon_seq *seq = *seq_ptr;
+	while (seq) {
+		struct radeon_seq *repl = seq->replacement;
+		unsigned start = seq->start & seq->mask;
+		unsigned end = seq->end & seq->mask;
+		struct radeon_fence **ring;
+		unsigned i;
+
+		ring = radeon_seq_ring(seq);
+		for (i = start; i < end; ++i)
+			radeon_fence_unref(&ring[i]);
+
+		kfree(seq);
+		seq = repl;
+	}
+	*seq_ptr = NULL;
+}
diff --git a/include/uapi/drm/radeon_drm.h b/include/uapi/drm/radeon_drm.h
index 50d0fb4..6b2b2e7 100644
--- a/include/uapi/drm/radeon_drm.h
+++ b/include/uapi/drm/radeon_drm.h
@@ -959,6 +959,7 @@ struct drm_radeon_gem_va {
 #define RADEON_CS_RING_VCE          4
 /* The third dword of RADEON_CHUNK_ID_FLAGS is a sint32 that sets the priority */
 /* 0 = normal, + = higher priority, - = lower priority */
+/* The fourth and fives dword are a 64bit fence ID generated for this CS */
 
 struct drm_radeon_cs_chunk {
 	uint32_t		chunk_id;
-- 
1.9.1