[Intel-gfx] [PATCH 14/15] drm/i915: Track frontbuffer invalidation/flushing

Daniel Vetter daniel.vetter at ffwll.ch
Mon Jun 16 19:51:34 CEST 2014


So these are the guts of the new beast. This tracks when a frontbuffer
gets invalidated (due to frontbuffer rendering) and hence should be
constantly scaned out, and when it's flushed again and can be
compressed/one-shot-upload.

Rules for flushing are simple: The frontbuffer needs one more full
upload starting from the next vblank. Which means that the flushing
can _only_ be called once the frontbuffer update has been latched.

But this poses a problem for pageflips: We can't just delay the
flushing until the pageflip is latched, since that would pose the risk
that we override frontbuffer rendering that has been scheduled
in-between the pageflip ioctl and the actual latching.

To handle this track asynchronous invalidations (and also pageflip)
state per-ring and delay any in-between flushing until the rendering
has completed. And also cancel any delayed flushing if we get a new
invalidation request (whether delayed or not).

Also call intel_mark_fb_busy in both cases in all cases to make sure
that we keep the screen at the highest refresh rate both on flips,
synchronous plane updates and for frontbuffer rendering.

v2: Lots of improvements

Suggestions from Chris:
- Move invalidate/flush in flush_*_domain and set_to_*_domain.
- Drop the flush in busy_ioctl since it's redundant. Was a leftover
  from an earlier concept to track flips/delayed flushes.
- Don't forget about the initial modeset enable/final disable.
  Suggested by Chris.

Track flips accurately, too. Since flips complete independently of
rendering we need to track pending flips in a separate mask. Again if
an invalidate happens we need to cancel the evenutal flush to avoid
races.

v3:
Provide correct header declarations for flip functions. Currently not
needed outside of intel_display.c, but part of the proper interface.

v4: Add proper domain management to fbcon so that the fbcon buffer is
also tracked correctly.

Cc: Rodrigo Vivi <rodrigo.vivi at intel.com>
Cc: Chris Wilson <chris at chris-wilson.co.uk>
Signed-off-by: Daniel Vetter <daniel.vetter at ffwll.ch>
---
 drivers/gpu/drm/i915/i915_drv.h            |  14 +++
 drivers/gpu/drm/i915/i915_gem.c            |  19 ++-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |   2 +-
 drivers/gpu/drm/i915/intel_display.c       | 178 +++++++++++++++++++++++++++--
 drivers/gpu/drm/i915/intel_drv.h           |  10 +-
 drivers/gpu/drm/i915/intel_fbdev.c         |  18 ++-
 drivers/gpu/drm/i915/intel_overlay.c       |   3 +
 drivers/gpu/drm/i915/intel_sprite.c        |   4 +-
 8 files changed, 225 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 4c3787699cdf..30c2ae3cbab7 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1332,6 +1332,17 @@ struct intel_pipe_crc {
 	wait_queue_head_t wq;
 };
 
+struct i915_frontbuffer_tracking {
+	struct mutex lock;
+
+	/*
+	 * Tracking bits for delayed frontbuffer flushing du to gpu activity or
+	 * scheduled flips.
+	 */
+	unsigned busy_bits;
+	unsigned flip_bits;
+};
+
 struct drm_i915_private {
 	struct drm_device *dev;
 	struct kmem_cache *slab;
@@ -1475,6 +1486,9 @@ struct drm_i915_private {
 	bool lvds_downclock_avail;
 	/* indicates the reduced downclock for LVDS*/
 	int lvds_downclock;
+
+	struct i915_frontbuffer_tracking fb_tracking;
+
 	u16 orig_clock;
 
 	bool mchbar_need_disable;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 9722991a0d19..2ccdd9dcd253 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1395,8 +1395,6 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
 		goto unlock;
 	}
 
-	intel_edp_psr_exit(dev);
-
 	/* Try to flush the object off the GPU without holding the lock.
 	 * We will repeat the flush holding the lock in the normal manner
 	 * to catch cases where we are gazumped.
@@ -1442,8 +1440,6 @@ i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
 	if (ret)
 		return ret;
 
-	intel_edp_psr_exit(dev);
-
 	obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
 	if (&obj->base == NULL) {
 		ret = -ENOENT;
@@ -2227,6 +2223,9 @@ i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
 			list_move_tail(&vma->mm_list, &vm->inactive_list);
 	}
 
+	if (obj->frontbuffer_bits)
+		intel_fb_flush(obj, true);
+
 	list_del_init(&obj->ring_list);
 	obj->ring = NULL;
 
@@ -3556,6 +3555,8 @@ i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
 	old_write_domain = obj->base.write_domain;
 	obj->base.write_domain = 0;
 
+	intel_fb_flush(obj, false);
+
 	trace_i915_gem_object_change_domain(obj,
 					    obj->base.read_domains,
 					    old_write_domain);
@@ -3577,6 +3578,8 @@ i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj,
 	old_write_domain = obj->base.write_domain;
 	obj->base.write_domain = 0;
 
+	intel_fb_flush(obj, false);
+
 	trace_i915_gem_object_change_domain(obj,
 					    obj->base.read_domains,
 					    old_write_domain);
@@ -3630,6 +3633,8 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 		obj->dirty = 1;
 	}
 
+	intel_fb_invalidate(obj, NULL);
+
 	trace_i915_gem_object_change_domain(obj,
 					    old_read_domains,
 					    old_write_domain);
@@ -3966,6 +3971,8 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
 		obj->base.write_domain = I915_GEM_DOMAIN_CPU;
 	}
 
+	intel_fb_invalidate(obj, NULL);
+
 	trace_i915_gem_object_change_domain(obj,
 					    old_read_domains,
 					    old_write_domain);
@@ -4240,8 +4247,6 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
 	if (ret)
 		return ret;
 
-	intel_edp_psr_exit(dev);
-
 	obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
 	if (&obj->base == NULL) {
 		ret = -ENOENT;
@@ -4941,6 +4946,8 @@ i915_gem_load(struct drm_device *dev)
 
 	dev_priv->mm.oom_notifier.notifier_call = i915_gem_shrinker_oom;
 	register_oom_notifier(&dev_priv->mm.oom_notifier);
+
+	mutex_init(&dev_priv->fb_tracking.lock);
 }
 
 void i915_gem_release(struct drm_device *dev, struct drm_file *file)
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 93d7f7246588..2d06aad4a954 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -978,7 +978,7 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas,
 			/* check for potential scanout */
 			if (i915_gem_obj_ggtt_bound(obj) &&
 			    i915_gem_obj_to_ggtt(obj)->pin_count)
-				intel_mark_fb_busy(obj, ring);
+				intel_fb_invalidate(obj, ring);
 
 			/* update for the implicit flush after a batch */
 			obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index b8359f4a6dc4..dfdbf2a02844 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -2755,6 +2755,8 @@ intel_pipe_set_base(struct drm_crtc *crtc, int x, int y,
 
 	dev_priv->display.update_primary_plane(crtc, fb, x, y);
 
+	intel_frontbuffer_flush(dev, INTEL_FRONTBUFFER_PRIMARY(pipe));
+
 	crtc->primary->fb = fb;
 	crtc->x = x;
 	crtc->y = y;
@@ -3949,6 +3951,8 @@ static void intel_crtc_enable_planes(struct drm_crtc *crtc)
 	mutex_lock(&dev->struct_mutex);
 	intel_update_fbc(dev);
 	mutex_unlock(&dev->struct_mutex);
+
+	intel_frontbuffer_flush(dev, INTEL_FRONTBUFFER_ALL_MASK(pipe));
 }
 
 static void intel_crtc_disable_planes(struct drm_crtc *crtc)
@@ -3971,6 +3975,8 @@ static void intel_crtc_disable_planes(struct drm_crtc *crtc)
 	intel_disable_planes(crtc);
 	intel_disable_primary_hw_plane(dev_priv, plane, pipe);
 
+	intel_frontbuffer_flush(dev, INTEL_FRONTBUFFER_ALL_MASK(pipe));
+
 	drm_vblank_off(dev, pipe);
 }
 
@@ -8211,6 +8217,8 @@ static int intel_crtc_cursor_set_obj(struct drm_crtc *crtc,
 		intel_crtc_update_cursor(crtc, intel_crtc->cursor_bo != NULL);
 	}
 
+	intel_frontbuffer_flush(dev, INTEL_FRONTBUFFER_CURSOR(pipe));
+
 	return 0;
 fail_unpin:
 	i915_gem_object_unpin_from_display_plane(obj);
@@ -8829,20 +8837,26 @@ out:
 }
 
 
-void intel_mark_fb_busy(struct drm_i915_gem_object *obj,
-			struct intel_engine_cs *ring)
+/**
+ * intel_mark_fb_busy - mark given planes as busy
+ * @dev: DRM device
+ * @frontbuffer_bits: bits for the affected planes
+ * @ring: optional ring for asynchronous commands
+ *
+ * This function gets called every time the screen contents change. It can be
+ * used to keep e.g. the update rate at the nominal refresh rate with DRRS.
+ */
+static void intel_mark_fb_busy(struct drm_device *dev,
+			       unsigned frontbuffer_bits,
+			       struct intel_engine_cs *ring)
 {
-	struct drm_device *dev = obj->base.dev;
 	enum pipe pipe;
 
-	intel_edp_psr_exit(dev);
-
 	if (!i915.powersave)
 		return;
 
 	for_each_pipe(pipe) {
-		if (!(obj->frontbuffer_bits &
-		      INTEL_FRONTBUFFER_ALL_MASK(pipe)))
+		if (!(frontbuffer_bits & INTEL_FRONTBUFFER_ALL_MASK(pipe)))
 			continue;
 
 		intel_increase_pllclock(dev, pipe);
@@ -8851,6 +8865,148 @@ void intel_mark_fb_busy(struct drm_i915_gem_object *obj,
 	}
 }
 
+/**
+ * intel_fb_invalidate - invalidate frontbuffer
+ * @obj: GEM object to invalidate
+ * @ring: set for asynchronous rendering
+ *
+ * This function gets called every time rendering on the given object starts and
+ * frontbuffer caching (fbc, low refresh rate for DRRS, panel self refresh) must
+ * be invalidated. If @ring is non-NULL any subsequent invalidation will be delayed
+ * until the rendering completes or a flip on this frontbuffer plane is
+ * scheduled.
+ */
+void intel_fb_invalidate(struct drm_i915_gem_object *obj,
+			 struct intel_engine_cs *ring)
+{
+	struct drm_device *dev = obj->base.dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	WARN_ON(!mutex_is_locked(&dev->struct_mutex));
+
+	if (!obj->frontbuffer_bits)
+		return;
+
+	if (ring) {
+		mutex_lock(&dev_priv->fb_tracking.lock);
+		dev_priv->fb_tracking.busy_bits
+			|= obj->frontbuffer_bits;
+		dev_priv->fb_tracking.flip_bits
+			&= ~obj->frontbuffer_bits;
+		mutex_unlock(&dev_priv->fb_tracking.lock);
+	}
+
+	intel_mark_fb_busy(dev, obj->frontbuffer_bits, ring);
+
+	intel_edp_psr_exit(dev);
+}
+
+/**
+ * intel_frontbuffer_flush - flush frontbuffer
+ * @dev: DRM device
+ * @frontbuffer_bits: frontbuffer plane tracking bits
+ *
+ * This function gets called every time rendering on the given planes has
+ * completed and frontbuffer caching can be started again. Flushes will get
+ * delayed if they're blocked by some oustanding asynchronous rendering.
+ *
+ * Can be called without any locks held.
+ */
+void intel_frontbuffer_flush(struct drm_device *dev,
+			     unsigned frontbuffer_bits)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	/* Delay flushing when rings are still busy.*/
+	mutex_lock(&dev_priv->fb_tracking.lock);
+	frontbuffer_bits &= ~dev_priv->fb_tracking.busy_bits;
+	mutex_unlock(&dev_priv->fb_tracking.lock);
+
+	intel_mark_fb_busy(dev, frontbuffer_bits, NULL);
+
+	intel_edp_psr_exit(dev);
+}
+
+/**
+ * intel_fb_flush - flush frontbuffer
+ * @obj: GEM object to flush
+ * @retire: set when retiring asynchronous rendering
+ *
+ * This function gets called every time rendering on the given object has
+ * completed and frontbuffer caching can be started again. If @retire is true
+ * then any delayed flushes will be unblocked.
+ */
+void intel_fb_flush(struct drm_i915_gem_object *obj,
+		    bool retire)
+{
+	struct drm_device *dev = obj->base.dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	WARN_ON(!mutex_is_locked(&dev->struct_mutex));
+
+	if (!obj->frontbuffer_bits)
+		return;
+
+	if (retire) {
+		mutex_lock(&dev_priv->fb_tracking.lock);
+		dev_priv->fb_tracking.busy_bits
+			&= ~obj->frontbuffer_bits;
+		mutex_unlock(&dev_priv->fb_tracking.lock);
+	}
+
+	intel_frontbuffer_flush(dev, obj->frontbuffer_bits);
+}
+
+/**
+ * intel_fb_flip_prepare - flip frontbuffer
+ * @obj: GEM object to flush
+ *
+ * This function gets called after scheduling a flip on @obj. The actual
+ * frontbuffer flushing will be delayed until completion is signalled with
+ * intel_frontbuffer_flip_complete. If an invalidate happens in between this
+ * flush will be cancelled.
+ *
+ * Note that the frontbuffer tracking bits in obj->frontbuffer_bits must have
+ * been updated already.
+ */
+void intel_fb_flip_prepare(struct drm_i915_gem_object *obj)
+{
+	struct drm_device *dev = obj->base.dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	WARN_ON(!mutex_is_locked(&dev->struct_mutex));
+	WARN_ON(!obj->frontbuffer_bits);
+
+	mutex_lock(&dev_priv->fb_tracking.lock);
+	dev_priv->fb_tracking.flip_bits
+		|= obj->frontbuffer_bits;
+	mutex_unlock(&dev_priv->fb_tracking.lock);
+}
+
+/**
+ * intel_frontbuffer_flip_complete - complete frontbuffer flip
+ * @dev: DRM device
+ * @frontbuffer_bits: frontbuffer plane tracking bits
+ *
+ * This function gets called after the flip has been latched and will complete
+ * on the next vblank. It will execute the fush if it hasn't been cancalled yet.
+ *
+ * Can be called without any locks held.
+ */
+void intel_frontbuffer_flip_complete(struct drm_device *dev,
+				     unsigned frontbuffer_bits)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	mutex_lock(&dev_priv->fb_tracking.lock);
+	/* Mask any cancelled flips. */
+	frontbuffer_bits &= dev_priv->fb_tracking.flip_bits;
+	dev_priv->fb_tracking.flip_bits &= ~frontbuffer_bits;
+	mutex_unlock(&dev_priv->fb_tracking.lock);
+
+	intel_frontbuffer_flush(dev, frontbuffer_bits);
+}
+
 static void intel_crtc_destroy(struct drm_crtc *crtc)
 {
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
@@ -8878,6 +9034,7 @@ static void intel_unpin_work_fn(struct work_struct *__work)
 	struct intel_unpin_work *work =
 		container_of(__work, struct intel_unpin_work, work);
 	struct drm_device *dev = work->crtc->dev;
+	enum pipe pipe = to_intel_crtc(work->crtc)->pipe;
 
 	mutex_lock(&dev->struct_mutex);
 	intel_unpin_fb_obj(work->old_fb_obj);
@@ -8887,6 +9044,8 @@ static void intel_unpin_work_fn(struct work_struct *__work)
 	intel_update_fbc(dev);
 	mutex_unlock(&dev->struct_mutex);
 
+	intel_frontbuffer_flip_complete(dev, INTEL_FRONTBUFFER_PRIMARY(pipe));
+
 	BUG_ON(atomic_read(&to_intel_crtc(work->crtc)->unpin_work_count) == 0);
 	atomic_dec(&to_intel_crtc(work->crtc)->unpin_work_count);
 
@@ -9302,9 +9461,6 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
 	if (work == NULL)
 		return -ENOMEM;
 
-	/* Exit PSR early in page flip */
-	intel_edp_psr_exit(dev);
-
 	work->event = event;
 	work->crtc = crtc;
 	work->old_fb_obj = to_intel_framebuffer(old_fb)->obj;
@@ -9375,7 +9531,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
 				INTEL_FRONTBUFFER_PRIMARY(pipe));
 
 	intel_disable_fbc(dev);
-	intel_mark_fb_busy(obj, NULL);
+	intel_fb_flip_prepare(obj);
 	mutex_unlock(&dev->struct_mutex);
 
 	trace_i915_flip_request(intel_crtc->plane, obj);
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 1d45629a6483..41776bfeddbd 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -718,8 +718,14 @@ bool intel_has_pending_fb_unpin(struct drm_device *dev);
 int intel_pch_rawclk(struct drm_device *dev);
 int valleyview_cur_cdclk(struct drm_i915_private *dev_priv);
 void intel_mark_busy(struct drm_device *dev);
-void intel_mark_fb_busy(struct drm_i915_gem_object *obj,
-			struct intel_engine_cs *ring);
+void intel_fb_invalidate(struct drm_i915_gem_object *obj,
+			 struct intel_engine_cs *ring);
+void intel_fb_flip_prepare(struct drm_i915_gem_object *obj);
+void intel_frontbuffer_flip_complete(struct drm_device *dev,
+				     unsigned frontbuffer_bits);
+void intel_frontbuffer_flush(struct drm_device *dev,
+			     unsigned frontbuffer_bits);
+void intel_fb_flush(struct drm_i915_gem_object *obj, bool retire);
 void intel_mark_idle(struct drm_device *dev);
 void intel_crtc_restore_mode(struct drm_crtc *crtc);
 void intel_crtc_update_dpms(struct drm_crtc *crtc);
diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c
index 27975c3e21c5..12276c39d14d 100644
--- a/drivers/gpu/drm/i915/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/intel_fbdev.c
@@ -43,10 +43,26 @@
 #include <drm/i915_drm.h>
 #include "i915_drv.h"
 
+static int intel_fbdev_set_par(struct fb_info *info)
+{
+	struct drm_fb_helper *fb_helper = info->par;
+	struct intel_fbdev *ifbdev =
+		container_of(fb_helper, struct intel_fbdev, helper);
+	int ret;
+
+	ret = drm_fb_helper_set_par(info);
+
+	if (ret == 0)
+		ret = i915_gem_object_set_to_gtt_domain(ifbdev->fb->obj,
+							true);
+
+	return ret;
+}
+
 static struct fb_ops intelfb_ops = {
 	.owner = THIS_MODULE,
 	.fb_check_var = drm_fb_helper_check_var,
-	.fb_set_par = drm_fb_helper_set_par,
+	.fb_set_par = intel_fbdev_set_par,
 	.fb_fillrect = cfb_fillrect,
 	.fb_copyarea = cfb_copyarea,
 	.fb_imageblit = cfb_imageblit,
diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
index 6aeda6a489a0..16e42071d56e 100644
--- a/drivers/gpu/drm/i915/intel_overlay.c
+++ b/drivers/gpu/drm/i915/intel_overlay.c
@@ -790,6 +790,9 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay,
 	overlay->old_vid_bo = overlay->vid_bo;
 	overlay->vid_bo = new_bo;
 
+	intel_frontbuffer_flush(dev,
+				INTEL_FRONTBUFFER_OVERLAY(pipe));
+
 	return 0;
 
 out_unpin:
diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c
index 3648594aeb7e..4d9c47368094 100644
--- a/drivers/gpu/drm/i915/intel_sprite.c
+++ b/drivers/gpu/drm/i915/intel_sprite.c
@@ -1034,6 +1034,8 @@ intel_update_plane(struct drm_plane *plane, struct drm_crtc *crtc,
 		else
 			intel_plane->disable_plane(plane, crtc);
 
+		intel_frontbuffer_flush(dev, INTEL_FRONTBUFFER_SPRITE(pipe));
+
 		if (!primary_was_enabled && primary_enabled)
 			intel_post_enable_primary(crtc);
 	}
@@ -1054,8 +1056,6 @@ intel_update_plane(struct drm_plane *plane, struct drm_crtc *crtc,
 		mutex_unlock(&dev->struct_mutex);
 	}
 
-	intel_edp_psr_exit(dev);
-
 	return 0;
 }
 
-- 
2.0.0




More information about the Intel-gfx mailing list