[Intel-gfx] [PATCH 2/2] drm/i915/vlv: Replaced Blitter ring based flips with MMIO Flips for VLV.

Thu Jan 9 12:26:39 CET 2014

From: Akash Goel <akash.goel at intel.com>

Using MMIO based flips now on VLV for Media power well residency optimization.
The blitter ring is currently being used just for the command streamer based
flip calls. For pure 3D workloads, with MMIO flips, there will be no use of
blitter ring and this will ensure the 100% residency in D0i3 for Media well.
The other alternative of having Render ring based flip calls is not being used,
as that option adversly affects the performance (FPS) of certain 3D Apps

Signed-off-by: Akash Goel <akash.goel at intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h      |   2 +
 drivers/gpu/drm/i915/i915_gem.c      |   4 +-
 drivers/gpu/drm/i915/intel_display.c | 147 +++++++++++++++++++++++++++++++++++
 3 files changed, 151 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 2e22430..6d1e496 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -74,6 +74,7 @@ enum plane {
 	PLANE_A = 0,
 	PLANE_B,
 	PLANE_C,
+	I915_MAX_PLANES
 };
 #define plane_name(p) ((p) + 'A')
 
@@ -2114,6 +2115,7 @@ bool i915_gem_retire_requests(struct drm_device *dev);
 void i915_gem_retire_requests_ring(struct intel_ring_buffer *ring);
 int __must_check i915_gem_check_wedge(struct i915_gpu_error *error,
 				      bool interruptible);
+int i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno);
 static inline bool i915_reset_in_progress(struct i915_gpu_error *error)
 {
 	return unlikely(atomic_read(&error->reset_counter)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 656406d..1a33501 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -957,7 +957,7 @@ i915_gem_check_wedge(struct i915_gpu_error *error,
  * Compare seqno against outstanding lazy request. Emit a request if they are
  * equal.
  */
-static int
+int
 i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno)
 {
 	int ret;
@@ -1008,7 +1008,7 @@ static bool can_wait_boost(struct drm_i915_file_private *file_priv)
  * Returns 0 if the seqno was found within the alloted time. Else returns the
  * errno with remaining time filled in timeout argument.
  */
-static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
+int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
 			unsigned reset_counter,
 			bool interruptible,
 			struct timespec *timeout,
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 4d1357a..25aa3a8 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -52,6 +52,9 @@ static void ironlake_pch_clock_get(struct intel_crtc *crtc,
 static int intel_set_mode(struct drm_crtc *crtc, struct drm_display_mode *mode,
 			  int x, int y, struct drm_framebuffer *old_fb);
 
+int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno, unsigned reset_counter,
+		 bool interruptible, struct timespec *timeout,
+		 struct drm_i915_file_private *file_priv);
 
 typedef struct {
 	int	min, max;
@@ -68,6 +71,24 @@ struct intel_limit {
 	intel_p2_t	    p2;
 };
 
+struct i915_flip_data {
+	struct drm_crtc *crtc;
+	u32 seqno;
+	u32 ring_id;
+};
+
+struct i915_flip_work {
+	struct i915_flip_data flipdata;
+	struct work_struct  work;
+};
+
+/*
+ * Need one work item only for each primary plane,
+ * as we support only one outstanding flip request
+ * on each plane at a time.
+ */
+static struct i915_flip_work flip_works[I915_MAX_PLANES];
+
 int
 intel_pch_rawclk(struct drm_device *dev)
 {
@@ -8588,6 +8609,123 @@ err:
 	return ret;
 }
 
+static void intel_gen7_queue_mmio_flip_work(struct work_struct *__work)
+{
+	struct i915_flip_work *flipwork =
+		container_of(__work, struct i915_flip_work, work);
+	int ret = 0;
+	unsigned int reset_counter;
+	struct drm_crtc *crtc = flipwork->flipdata.crtc;
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	struct drm_device *dev = crtc->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_ring_buffer *ring =
+			&dev_priv->ring[flipwork->flipdata.ring_id];
+
+	if (dev_priv->ums.mm_suspended || (ring->obj == NULL)) {
+		DRM_ERROR("flip attempted while the ring is not ready\n");
+		return;
+	}
+
+	/*
+	 * Wait is needed only for nonZero Seqnos, as zero Seqno indicates
+	 * that either the rendering on the object (through GPU) is already
+	 * completed or not intiated at all
+	 */
+	if (flipwork->flipdata.seqno > 0) {
+		reset_counter =
+			atomic_read(&dev_priv->gpu_error.reset_counter);
+		/* sleep wait until the seqno has passed */
+		ret = __wait_seqno(ring, flipwork->flipdata.seqno,
+					reset_counter, true, NULL, NULL);
+		if (ret)
+			DRM_ERROR("wait_seqno failed on seqno 0x%x(%d)\n",
+				flipwork->flipdata.seqno,
+				flipwork->flipdata.ring_id);
+	}
+
+	intel_mark_page_flip_active(intel_crtc);
+	i9xx_update_plane(crtc, crtc->fb, 0, 0);
+}
+
+/*
+ * Using MMIO based flips starting from VLV, for Media power well
+ * residency optimization. The other alternative of having Render
+ * ring based flip calls is not being used, as that option adversly
+ * affects the performance (FPS) of certain 3D Apps.
+ */
+static int intel_gen7_queue_mmio_flip(struct drm_device *dev,
+			struct drm_crtc *crtc,
+			struct drm_framebuffer *fb,
+			struct drm_i915_gem_object *obj,
+			uint32_t flags)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	struct i915_flip_work *work = &flip_works[intel_crtc->plane];
+	int ret;
+
+	ret = intel_pin_and_fence_fb_obj(dev, obj, obj->ring);
+	if (ret)
+		goto err;
+
+	switch (intel_crtc->plane) {
+	case PLANE_A:
+	case PLANE_B:
+	case PLANE_C:
+	break;
+	default:
+		WARN_ONCE(1, "unknown plane in flip command\n");
+		ret = -ENODEV;
+		goto err_unpin;
+	}
+
+	work->flipdata.crtc  = crtc;
+	work->flipdata.seqno = obj->last_write_seqno;
+	work->flipdata.ring_id = RCS;
+
+	if (obj->last_write_seqno > 0) {
+		if (obj->ring) {
+			work->flipdata.ring_id = obj->ring->id;
+			/*
+			 * Check if there is a need to add the request
+			 * in the ring to emit the seqno for this fb obj
+			 */
+			ret = i915_gem_check_olr(obj->ring,
+						obj->last_write_seqno);
+			if (ret)
+				goto err_unpin;
+		} else {
+			DRM_ERROR("NULL ring for active obj with seqno %x\n",
+				obj->last_write_seqno);
+			ret = -EINVAL;
+			goto err_unpin;
+		}
+	}
+
+	/*
+	 * Flush the work as it could be running now.
+	 * Although this could only happen in a particular,
+	 * very rare condition, as we allow only 1 outstanding
+	 * flip at a time through the 'unpin_work' variable.
+	 * To be checked, if its really needed or not
+	 */
+	flush_work(&work->work);
+
+	/*
+	 * Queue the MMIO flip work in our private workqueue.
+	 */
+	queue_work(dev_priv->flipwq, &work->work);
+
+	return 0;
+
+err_unpin:
+	intel_unpin_fb_obj(obj);
+err:
+	return ret;
+}
+
+
 static int intel_default_queue_flip(struct drm_device *dev,
 				    struct drm_crtc *crtc,
 				    struct drm_framebuffer *fb,
@@ -10171,6 +10309,12 @@ static void intel_crtc_init(struct drm_device *dev, int pipe)
 	dev_priv->pipe_to_crtc_mapping[intel_crtc->pipe] = &intel_crtc->base;
 
 	drm_crtc_helper_add(&intel_crtc->base, &intel_helper_funcs);
+
+	/*
+	 * Initialize the flip work item (one per primary plane)
+	 */
+	INIT_WORK(&flip_works[intel_crtc->plane].work,
+		  intel_gen7_queue_mmio_flip_work);
 }
 
 enum pipe intel_get_pipe_from_connector(struct intel_connector *connector)
@@ -10675,6 +10819,9 @@ static void intel_init_display(struct drm_device *dev)
 		break;
 	}
 
+	if (IS_VALLEYVIEW(dev))
+		dev_priv->display.queue_flip = intel_gen7_queue_mmio_flip;
+
 	intel_panel_init_backlight_funcs(dev);
 }
 
-- 
1.8.5.2