[PATCH 04/13] drm/vmwgfx: Add kernel throttling support. Bump minor.

Tue Apr 27 10:45:36 PDT 2010

The throttle_us member in the execbuf argument is now honored.
If the member is 0, no waiting for lag will occur, which
guarantees backwards compatibility with well-behaved clients.

Signed-off-by: Thomas Hellstrom <thellstrom at vmware.com>
---
 drivers/gpu/drm/vmwgfx/Makefile         |    2 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.h     |   27 +++++-
 drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c |    9 ++
 drivers/gpu/drm/vmwgfx/vmwgfx_fence.c   |  173 +++++++++++++++++++++++++++++++
 drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c    |    5 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_irq.c     |   17 +++-
 6 files changed, 227 insertions(+), 6 deletions(-)
 create mode 100644 drivers/gpu/drm/vmwgfx/vmwgfx_fence.c

diff --git a/drivers/gpu/drm/vmwgfx/Makefile b/drivers/gpu/drm/vmwgfx/Makefile
index 1a3cb68..4505e17 100644
--- a/drivers/gpu/drm/vmwgfx/Makefile
+++ b/drivers/gpu/drm/vmwgfx/Makefile
@@ -4,6 +4,6 @@ ccflags-y := -Iinclude/drm
 vmwgfx-y := vmwgfx_execbuf.o vmwgfx_gmr.o vmwgfx_kms.o vmwgfx_drv.o \
 	    vmwgfx_fb.o vmwgfx_ioctl.o vmwgfx_resource.o vmwgfx_buffer.o \
 	    vmwgfx_fifo.o vmwgfx_irq.o vmwgfx_ldu.o vmwgfx_ttm_glue.o \
-	    vmwgfx_overlay.o
+	    vmwgfx_overlay.o vmwgfx_fence.o
 
 obj-$(CONFIG_DRM_VMWGFX) := vmwgfx.o
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index 356dc93..9a0a82b 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -41,7 +41,7 @@
 
 #define VMWGFX_DRIVER_DATE "20100209"
 #define VMWGFX_DRIVER_MAJOR 1
-#define VMWGFX_DRIVER_MINOR 0
+#define VMWGFX_DRIVER_MINOR 1
 #define VMWGFX_DRIVER_PATCHLEVEL 0
 #define VMWGFX_FILE_PAGE_OFFSET 0x00100000
 #define VMWGFX_FIFO_STATIC_SIZE (1024*1024)
@@ -102,6 +102,13 @@ struct vmw_surface {
 	struct vmw_cursor_snooper snooper;
 };
 
+struct vmw_fence_queue {
+	struct list_head head;
+	struct timespec lag;
+	struct timespec lag_time;
+	spinlock_t lock;
+};
+
 struct vmw_fifo_state {
 	unsigned long reserved_size;
 	__le32 *dynamic_buffer;
@@ -115,6 +122,7 @@ struct vmw_fifo_state {
 	uint32_t capabilities;
 	struct mutex fifo_mutex;
 	struct rw_semaphore rwsem;
+	struct vmw_fence_queue fence_queue;
 };
 
 struct vmw_relocation {
@@ -441,6 +449,23 @@ extern int vmw_fallback_wait(struct vmw_private *dev_priv,
 			     uint32_t sequence,
 			     bool interruptible,
 			     unsigned long timeout);
+extern void vmw_update_sequence(struct vmw_private *dev_priv,
+				struct vmw_fifo_state *fifo_state);
+
+
+/**
+ * Rudimentary fence objects currently used only for throttling -
+ * vmwgfx_fence.c
+ */
+
+extern void vmw_fence_queue_init(struct vmw_fence_queue *queue);
+extern void vmw_fence_queue_takedown(struct vmw_fence_queue *queue);
+extern int vmw_fence_push(struct vmw_fence_queue *queue,
+			  uint32_t sequence);
+extern int vmw_fence_pull(struct vmw_fence_queue *queue,
+			  uint32_t signaled_sequence);
+extern int vmw_wait_lag(struct vmw_private *dev_priv,
+			struct vmw_fence_queue *queue, uint32_t us);
 
 /**
  * Kernel framebuffer - vmwgfx_fb.c
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
index 0897359..0e425a4 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
@@ -669,6 +669,15 @@ int vmw_execbuf_ioctl(struct drm_device *dev, void *data,
 		goto out_err;
 
 	vmw_apply_relocations(sw_context);
+
+	if (arg->throttle_us) {
+		ret = vmw_wait_lag(dev_priv, &dev_priv->fifo.fence_queue,
+				   arg->throttle_us);
+
+		if (unlikely(ret != 0))
+			goto out_err;
+	}
+
 	vmw_fifo_commit(dev_priv, arg->command_size);
 
 	ret = vmw_fifo_send_fence(dev_priv, &sequence);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
new file mode 100644
index 0000000..61eacc1
--- /dev/null
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
@@ -0,0 +1,173 @@
+/**************************************************************************
+ *
+ * Copyright (C) 2010 VMware, Inc., Palo Alto, CA., USA
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "vmwgfx_drv.h"
+
+struct vmw_fence {
+	struct list_head head;
+	uint32_t sequence;
+	struct timespec submitted;
+};
+
+void vmw_fence_queue_init(struct vmw_fence_queue *queue)
+{
+	INIT_LIST_HEAD(&queue->head);
+	queue->lag = ns_to_timespec(0);
+	getrawmonotonic(&queue->lag_time);
+	spin_lock_init(&queue->lock);
+}
+
+void vmw_fence_queue_takedown(struct vmw_fence_queue *queue)
+{
+	struct vmw_fence *fence, *next;
+
+	spin_lock(&queue->lock);
+	list_for_each_entry_safe(fence, next, &queue->head, head) {
+		kfree(fence);
+	}
+	spin_unlock(&queue->lock);
+}
+
+int vmw_fence_push(struct vmw_fence_queue *queue,
+		   uint32_t sequence)
+{
+	struct vmw_fence *fence = kmalloc(sizeof(*fence), GFP_KERNEL);
+
+	if (unlikely(!fence))
+		return -ENOMEM;
+
+	fence->sequence = sequence;
+	getrawmonotonic(&fence->submitted);
+	spin_lock(&queue->lock);
+	list_add_tail(&fence->head, &queue->head);
+	spin_unlock(&queue->lock);
+
+	return 0;
+}
+
+int vmw_fence_pull(struct vmw_fence_queue *queue,
+		   uint32_t signaled_sequence)
+{
+	struct vmw_fence *fence, *next;
+	struct timespec now;
+	bool updated = false;
+
+	spin_lock(&queue->lock);
+	getrawmonotonic(&now);
+
+	if (list_empty(&queue->head)) {
+		queue->lag = ns_to_timespec(0);
+		queue->lag_time = now;
+		updated = true;
+		goto out_unlock;
+	}
+
+	list_for_each_entry_safe(fence, next, &queue->head, head) {
+		if (signaled_sequence - fence->sequence > (1 << 30))
+			continue;
+
+		queue->lag = timespec_sub(now, fence->submitted);
+		queue->lag_time = now;
+		updated = true;
+		list_del(&fence->head);
+		kfree(fence);
+	}
+
+out_unlock:
+	spin_unlock(&queue->lock);
+
+	return (updated) ? 0 : -EBUSY;
+}
+
+static struct timespec vmw_timespec_add(struct timespec t1,
+					struct timespec t2)
+{
+	t1.tv_sec += t2.tv_sec;
+	t1.tv_nsec += t2.tv_nsec;
+	if (t1.tv_nsec >= 1000000000L) {
+		t1.tv_sec += 1;
+		t1.tv_nsec -= 1000000000L;
+	}
+
+	return t1;
+}
+
+static struct timespec vmw_fifo_lag(struct vmw_fence_queue *queue)
+{
+	struct timespec now;
+
+	spin_lock(&queue->lock);
+	getrawmonotonic(&now);
+	queue->lag = vmw_timespec_add(queue->lag,
+				      timespec_sub(now, queue->lag_time));
+	queue->lag_time = now;
+	spin_unlock(&queue->lock);
+	return queue->lag;
+}
+
+
+static bool vmw_lag_lt(struct vmw_fence_queue *queue,
+		       uint32_t us)
+{
+	struct timespec lag, cond;
+
+	cond = ns_to_timespec((s64) us * 1000);
+	lag = vmw_fifo_lag(queue);
+	return (timespec_compare(&lag, &cond) < 1);
+}
+
+int vmw_wait_lag(struct vmw_private *dev_priv,
+		 struct vmw_fence_queue *queue, uint32_t us)
+{
+	struct vmw_fence *fence;
+	uint32_t sequence;
+	int ret;
+
+	while (!vmw_lag_lt(queue, us)) {
+		spin_lock(&queue->lock);
+		if (list_empty(&queue->head))
+			sequence = atomic_read(&dev_priv->fence_seq);
+		else {
+			fence = list_first_entry(&queue->head,
+						 struct vmw_fence, head);
+			sequence = fence->sequence;
+		}
+		spin_unlock(&queue->lock);
+
+		ret = vmw_wait_fence(dev_priv, false, sequence, true,
+				     3*HZ);
+
+		if (unlikely(ret != 0))
+			return ret;
+
+		(void) vmw_fence_pull(queue, sequence);
+	}
+	return 0;
+}
+
+
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
index 39d43a0..a8e5445 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
@@ -120,7 +120,7 @@ int vmw_fifo_init(struct vmw_private *dev_priv, struct vmw_fifo_state *fifo)
 
 	atomic_set(&dev_priv->fence_seq, dev_priv->last_read_sequence);
 	iowrite32(dev_priv->last_read_sequence, fifo_mem + SVGA_FIFO_FENCE);
-
+	vmw_fence_queue_init(&fifo->fence_queue);
 	return vmw_fifo_send_fence(dev_priv, &dummy);
 out_err:
 	vfree(fifo->static_buffer);
@@ -159,6 +159,7 @@ void vmw_fifo_release(struct vmw_private *dev_priv, struct vmw_fifo_state *fifo)
 		  dev_priv->enable_state);
 
 	mutex_unlock(&dev_priv->hw_mutex);
+	vmw_fence_queue_takedown(&fifo->fence_queue);
 
 	if (likely(fifo->last_buffer != NULL)) {
 		vfree(fifo->last_buffer);
@@ -484,6 +485,8 @@ int vmw_fifo_send_fence(struct vmw_private *dev_priv, uint32_t *sequence)
 	fifo_state->last_buffer_add = true;
 	vmw_fifo_commit(dev_priv, bytes);
 	fifo_state->last_buffer_add = false;
+	(void) vmw_fence_push(&fifo_state->fence_queue, *sequence);
+	vmw_update_sequence(dev_priv, fifo_state);
 
 out_err:
 	return ret;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c b/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
index 4d7cb53..e92298a 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
@@ -64,22 +64,33 @@ static bool vmw_fifo_idle(struct vmw_private *dev_priv, uint32_t sequence)
 	return (busy == 0);
 }
 
+void vmw_update_sequence(struct vmw_private *dev_priv,
+			 struct vmw_fifo_state *fifo_state)
+{
+	__le32 __iomem *fifo_mem = dev_priv->mmio_virt;
+
+	uint32_t sequence = ioread32(fifo_mem + SVGA_FIFO_FENCE);
+
+	if (dev_priv->last_read_sequence != sequence) {
+		dev_priv->last_read_sequence = sequence;
+		vmw_fence_pull(&fifo_state->fence_queue, sequence);
+	}
+}
 
 bool vmw_fence_signaled(struct vmw_private *dev_priv,
 			uint32_t sequence)
 {
-	__le32 __iomem *fifo_mem = dev_priv->mmio_virt;
 	struct vmw_fifo_state *fifo_state;
 	bool ret;
 
 	if (likely(dev_priv->last_read_sequence - sequence < VMW_FENCE_WRAP))
 		return true;
 
-	dev_priv->last_read_sequence = ioread32(fifo_mem + SVGA_FIFO_FENCE);
+	fifo_state = &dev_priv->fifo;
+	vmw_update_sequence(dev_priv, fifo_state);
 	if (likely(dev_priv->last_read_sequence - sequence < VMW_FENCE_WRAP))
 		return true;
 
-	fifo_state = &dev_priv->fifo;
 	if (!(fifo_state->capabilities & SVGA_FIFO_CAP_FENCE) &&
 	    vmw_fifo_idle(dev_priv, sequence))
 		return true;
-- 
1.6.2.5