[Intel-gfx] [RFC 2/6] drm/i915: cover ring access with rpm get/put

Wed Jan 22 13:04:18 CET 2014

From: Naresh Kumar Kachhi <naresh.kumar.kachhi at intel.com>

GPU idleness is tracked by checking the request queue. Whenever
request queue is empty we assume that GPU is idle. When a new
set of commands sheduled on ring we call i915_add_request to
make sure these commands are tracked properly. However there are
few places which are not being treacked currently.
This patch introduces a new function add_request_wo_flush to track
such requests. add_request_wo_flush is same as add_request, only
difference is that it will not cause a flush. This is to avoid
any extra overhead while adding new request.

To make sure Gfx is in D0 while there are still commands pending
on ring following is done.
- All the ioctls are already covered with get/put this makes sure
  at the time of scheduling commands on GPU, Gfx is in D0
- Once command scheduling is done, we call add_request to track
  ring activity.
- We call get_noresume if this is first request (ioctl is already
  covered with get_sync).
- put is called only when request_list becomes empty. i.e GPU is
  idle and there are no pending commands on the rings

Note: Make sure we don't do multiple add_request in same
ioctl/callback only one in the end is enough

Signed-off-by: Naresh Kumar Kachhi <naresh.kumar.kachhi at intel.com>
---
 drivers/gpu/drm/i915/i915_dma.c            |  5 ++++
 drivers/gpu/drm/i915/i915_drv.h            | 10 +++++--
 drivers/gpu/drm/i915/i915_gem.c            | 27 ++++++++++++-----
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  4 +--
 drivers/gpu/drm/i915/intel_display.c       |  1 +
 drivers/gpu/drm/i915/intel_drv.h           |  3 ++
 drivers/gpu/drm/i915/intel_pm.c            | 47 ++++++++++++++++++++++++++++++
 7 files changed, 84 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index ee9502b..b5af745 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -454,6 +454,7 @@ static int i915_dispatch_cmdbuffer(struct drm_device * dev,
 				   struct drm_clip_rect *cliprects,
 				   void *cmdbuf)
 {
+	struct drm_i915_private *dev_priv = dev->dev_private;
 	int nbox = cmd->num_cliprects;
 	int i = 0, count, ret;
 
@@ -480,6 +481,7 @@ static int i915_dispatch_cmdbuffer(struct drm_device * dev,
 	}
 
 	i915_emit_breadcrumb(dev);
+	i915_add_request_wo_flush(LP_RING(dev_priv));
 	return 0;
 }
 
@@ -542,6 +544,7 @@ static int i915_dispatch_batchbuffer(struct drm_device * dev,
 	}
 
 	i915_emit_breadcrumb(dev);
+	i915_add_request_wo_flush(LP_RING(dev_priv));
 	return 0;
 }
 
@@ -595,6 +598,7 @@ static int i915_dispatch_flip(struct drm_device * dev)
 		ADVANCE_LP_RING();
 	}
 
+	i915_add_request_wo_flush(LP_RING(dev_priv));
 	master_priv->sarea_priv->pf_current_page = dev_priv->dri1.current_page;
 	return 0;
 }
@@ -768,6 +772,7 @@ static int i915_emit_irq(struct drm_device * dev)
 		OUT_RING(dev_priv->dri1.counter);
 		OUT_RING(MI_USER_INTERRUPT);
 		ADVANCE_LP_RING();
+		i915_add_request_wo_flush(LP_RING(dev_priv));
 	}
 
 	return dev_priv->dri1.counter;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 56c720b..d1399f9 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1324,6 +1324,7 @@ struct i915_package_c8 {
 
 struct i915_runtime_pm {
 	bool suspended;
+	bool gpu_idle;
 };
 
 enum intel_pipe_crc_source {
@@ -2063,7 +2064,7 @@ static inline void i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
 
 int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
 int i915_gem_object_sync(struct drm_i915_gem_object *obj,
-			 struct intel_ring_buffer *to);
+			 struct intel_ring_buffer *to, bool add_request);
 void i915_vma_move_to_active(struct i915_vma *vma,
 			     struct intel_ring_buffer *ring);
 int i915_gem_dumb_create(struct drm_file *file_priv,
@@ -2139,9 +2140,12 @@ int __must_check i915_gem_suspend(struct drm_device *dev);
 int __i915_add_request(struct intel_ring_buffer *ring,
 		       struct drm_file *file,
 		       struct drm_i915_gem_object *batch_obj,
-		       u32 *seqno);
+		       u32 *seqno,
+		       bool flush_caches);
 #define i915_add_request(ring, seqno) \
-	__i915_add_request(ring, NULL, NULL, seqno)
+	__i915_add_request(ring, NULL, NULL, seqno, true)
+#define i915_add_request_wo_flush(ring) \
+	__i915_add_request(ring, NULL, NULL, NULL, false)
 int __must_check i915_wait_seqno(struct intel_ring_buffer *ring,
 				 uint32_t seqno);
 int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 024e454..3e8202e 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2136,7 +2136,8 @@ i915_gem_get_seqno(struct drm_device *dev, u32 *seqno)
 int __i915_add_request(struct intel_ring_buffer *ring,
 		       struct drm_file *file,
 		       struct drm_i915_gem_object *obj,
-		       u32 *out_seqno)
+		       u32 *out_seqno,
+		       bool flush_caches)
 {
 	drm_i915_private_t *dev_priv = ring->dev->dev_private;
 	struct drm_i915_gem_request *request;
@@ -2152,9 +2153,11 @@ int __i915_add_request(struct intel_ring_buffer *ring,
 	 * is that the flush _must_ happen before the next request, no matter
 	 * what.
 	 */
-	ret = intel_ring_flush_all_caches(ring);
-	if (ret)
-		return ret;
+	if (flush_caches) {
+		ret = intel_ring_flush_all_caches(ring);
+		if (ret)
+			return ret;
+	}
 
 	request = ring->preallocated_lazy_request;
 	if (WARN_ON(request == NULL))
@@ -2219,6 +2222,7 @@ int __i915_add_request(struct intel_ring_buffer *ring,
 					   &dev_priv->mm.retire_work,
 					   round_jiffies_up_relative(HZ));
 			intel_mark_busy(dev_priv->dev);
+			intel_runtime_pm_gpu_busy(dev_priv);
 		}
 	}
 
@@ -2544,10 +2548,12 @@ i915_gem_retire_requests(struct drm_device *dev)
 		idle &= list_empty(&ring->request_list);
 	}
 
-	if (idle)
+	if (idle) {
 		mod_delayed_work(dev_priv->wq,
 				   &dev_priv->mm.idle_work,
 				   msecs_to_jiffies(100));
+		intel_runtime_pm_gpu_idle(dev_priv);
+	}
 
 	return idle;
 }
@@ -2691,6 +2697,8 @@ out:
  *
  * @obj: object which may be in use on another ring.
  * @to: ring we wish to use the object on. May be NULL.
+ * @add_request: do we need to add a request to track operations
+ *    submitted on ring with sync_to function
  *
  * This code is meant to abstract object synchronization with the GPU.
  * Calling with NULL implies synchronizing the object with the CPU
@@ -2700,7 +2708,7 @@ out:
  */
 int
 i915_gem_object_sync(struct drm_i915_gem_object *obj,
-		     struct intel_ring_buffer *to)
+		     struct intel_ring_buffer *to, bool add_request)
 {
 	struct intel_ring_buffer *from = obj->ring;
 	u32 seqno;
@@ -2724,12 +2732,15 @@ i915_gem_object_sync(struct drm_i915_gem_object *obj,
 
 	trace_i915_gem_ring_sync_to(from, to, seqno);
 	ret = to->sync_to(to, from, seqno);
-	if (!ret)
+	if (!ret) {
 		/* We use last_read_seqno because sync_to()
 		 * might have just caused seqno wrap under
 		 * the radar.
 		 */
 		from->sync_seqno[idx] = obj->last_read_seqno;
+		if (add_request)
+			i915_add_request_wo_flush(to);
+	}
 
 	return ret;
 }
@@ -3707,7 +3718,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 	int ret;
 
 	if (pipelined != obj->ring) {
-		ret = i915_gem_object_sync(obj, pipelined);
+		ret = i915_gem_object_sync(obj, pipelined, true);
 		if (ret)
 			return ret;
 	}
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 0c6bcff..bda7a06 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -832,7 +832,7 @@ i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring,
 
 	list_for_each_entry(vma, vmas, exec_list) {
 		struct drm_i915_gem_object *obj = vma->obj;
-		ret = i915_gem_object_sync(obj, ring);
+		ret = i915_gem_object_sync(obj, ring, false);
 		if (ret)
 			return ret;
 
@@ -969,7 +969,7 @@ i915_gem_execbuffer_retire_commands(struct drm_device *dev,
 	ring->gpu_caches_dirty = true;
 
 	/* Add a breadcrumb for the completion of the batch buffer */
-	(void)__i915_add_request(ring, file, obj, NULL);
+	(void)__i915_add_request(ring, file, obj, NULL, true);
 }
 
 static int
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index ec96002..25eae03 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -8624,6 +8624,7 @@ static int intel_gen7_queue_flip(struct drm_device *dev,
 
 	intel_mark_page_flip_active(intel_crtc);
 	__intel_ring_advance(ring);
+	i915_add_request_wo_flush(ring);
 	return 0;
 
 err_unpin:
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 7b3c209..9061aa7 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -881,9 +881,12 @@ void gen6_rps_boost(struct drm_i915_private *dev_priv);
 void intel_aux_display_runtime_get(struct drm_i915_private *dev_priv);
 void intel_aux_display_runtime_put(struct drm_i915_private *dev_priv);
 void intel_runtime_pm_get(struct drm_i915_private *dev_priv);
+void intel_runtime_pm_get_noresume(struct drm_i915_private *dev_priv);
 void intel_runtime_pm_put(struct drm_i915_private *dev_priv);
 void intel_init_runtime_pm(struct drm_i915_private *dev_priv);
 void intel_fini_runtime_pm(struct drm_i915_private *dev_priv);
+void intel_runtime_pm_gpu_busy(struct drm_i915_private *dev_priv);
+void intel_runtime_pm_gpu_idle(struct drm_i915_private *dev_priv);
 void ilk_wm_get_hw_state(struct drm_device *dev);
 
 
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index b9b4fe4..991ff62 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -5470,6 +5470,37 @@ void intel_aux_display_runtime_put(struct drm_i915_private *dev_priv)
 	hsw_enable_package_c8(dev_priv);
 }
 
+void intel_runtime_pm_gpu_idle(struct drm_i915_private *dev_priv)
+{
+	if (!HAS_RUNTIME_PM(dev_priv->dev))
+		return;
+
+	/* don't need a seperate mutex here as callers are
+	 * already under struct_mutex
+	 */
+	WARN_ON(!mutex_is_locked(&dev_priv->dev->struct_mutex));
+	if (!dev_priv->pm.gpu_idle) {
+		dev_priv->pm.gpu_idle = true;
+		/* match with get in gpu_busy */
+		intel_runtime_pm_put(dev_priv);
+	}
+}
+
+void intel_runtime_pm_gpu_busy(struct drm_i915_private *dev_priv)
+{
+	if (!HAS_RUNTIME_PM(dev_priv->dev))
+		return;
+
+	WARN_ON(!mutex_is_locked(&dev_priv->dev->struct_mutex));
+	if (dev_priv->pm.gpu_idle) {
+		dev_priv->pm.gpu_idle = false;
+		/* make sure that we keep the GPU on until request list
+		 * is empty
+		 */
+		intel_runtime_pm_get_noresume(dev_priv);
+	}
+}
+
 void intel_runtime_pm_get(struct drm_i915_private *dev_priv)
 {
 	struct drm_device *dev = dev_priv->dev;
@@ -5482,6 +5513,21 @@ void intel_runtime_pm_get(struct drm_i915_private *dev_priv)
 	WARN(dev_priv->pm.suspended, "Device still suspended.\n");
 }
 
+void intel_runtime_pm_get_noresume(struct drm_i915_private *dev_priv)
+{
+	struct drm_device *dev = dev_priv->dev;
+	struct device *device = &dev->pdev->dev;
+
+	if (!HAS_RUNTIME_PM(dev))
+		return;
+
+	/* driver calls no resume when it is sure that device is
+	 * already active and just want to increment the ref count
+	 */
+	WARN(dev_priv->pm.suspended, "Device suspended. call get_sync?\n");
+	pm_runtime_get_noresume(device);
+}
+
 void intel_runtime_pm_put(struct drm_i915_private *dev_priv)
 {
 	struct drm_device *dev = dev_priv->dev;
@@ -5500,6 +5546,7 @@ void intel_init_runtime_pm(struct drm_i915_private *dev_priv)
 	struct device *device = &dev->pdev->dev;
 
 	dev_priv->pm.suspended = false;
+	dev_priv->pm.gpu_idle = true;
 
 	if (!HAS_RUNTIME_PM(dev))
 		return;
-- 
1.8.1.2