[Intel-gfx] [PATCH] drm/i915: Lazily apply the SNB+ seqno w/a

Daniel Vetter daniel at ffwll.ch
Thu Aug 9 12:29:16 CEST 2012


On Thu, Aug 09, 2012 at 10:58:30AM +0100, Chris Wilson wrote:
> Avoid the forcewake overhead when simply retiring requests, as often the
> last seen seqno is good enough to satisfy the retirment process and will
> be promptly re-run in any case. Only ensure that we force the coherent
> seqno read when we are explicitly waiting upon a completion event to be
> sure that none go missing, and also for when we are reporting seqno
> values in case of error or debugging.
> 
> This greatly reduces the load for userspace using the busy-ioctl to
> track active buffers, for instance halving the CPU used by X in pushing
> the pixels from a software render (flash). The effect will be even more
> magnified with userptr and so providing a zero-copy upload path in that
> instance, or in similar instances where X is simply compositing DRI
> buffers.
> 
> v2: Reverse the polarity of the tachyon stream. Daniel suggested that
> 'force' was too generic for the parameter name and that 'lazy_coherency'
> better encapsulated the semantics of it being an optimization and its
> purpose. Also notice that gen6_get_seqno() is only used by gen6/7
> chipsets and so the test for IS_GEN6 || IS_GEN7 is redundant in that
> function.
> 
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

Yeah, I like the new color.

Reviewed-by: Daniel Vetter <daniel.vetter at ffwll.ch>

I'll muse over this some more before picking it up, just in case I'll
notice a place this could blow up ...
-Daniel

> ---
>  drivers/gpu/drm/i915/i915_debugfs.c     |    2 +-
>  drivers/gpu/drm/i915/i915_gem.c         |    6 +++---
>  drivers/gpu/drm/i915/i915_irq.c         |    9 +++++----
>  drivers/gpu/drm/i915/intel_ringbuffer.c |   10 ++++------
>  drivers/gpu/drm/i915/intel_ringbuffer.h |    9 ++++++++-
>  5 files changed, 21 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 86444fe..544abf5 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -426,7 +426,7 @@ static void i915_ring_seqno_info(struct seq_file *m,
>  {
>  	if (ring->get_seqno) {
>  		seq_printf(m, "Current sequence (%s): %d\n",
> -			   ring->name, ring->get_seqno(ring));
> +			   ring->name, ring->get_seqno(ring, false));
>  	}
>  }
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 153c533..0582e22 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1888,7 +1888,7 @@ i915_gem_retire_requests_ring(struct intel_ring_buffer *ring)
>  
>  	WARN_ON(i915_verify_lists(ring->dev));
>  
> -	seqno = ring->get_seqno(ring);
> +	seqno = ring->get_seqno(ring, true);
>  
>  	for (i = 0; i < ARRAY_SIZE(ring->sync_seqno); i++)
>  		if (seqno >= ring->sync_seqno[i])
> @@ -2060,7 +2060,7 @@ static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
>  	bool wait_forever = true;
>  	int ret;
>  
> -	if (i915_seqno_passed(ring->get_seqno(ring), seqno))
> +	if (i915_seqno_passed(ring->get_seqno(ring, true), seqno))
>  		return 0;
>  
>  	trace_i915_gem_request_wait_begin(ring, seqno);
> @@ -2079,7 +2079,7 @@ static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
>  	getrawmonotonic(&before);
>  
>  #define EXIT_COND \
> -	(i915_seqno_passed(ring->get_seqno(ring), seqno) || \
> +	(i915_seqno_passed(ring->get_seqno(ring, false), seqno) || \
>  	atomic_read(&dev_priv->mm.wedged))
>  	do {
>  		if (interruptible)
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 57e4f2b..0ba15e8 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -335,7 +335,7 @@ static void notify_ring(struct drm_device *dev,
>  	if (ring->obj == NULL)
>  		return;
>  
> -	trace_i915_gem_request_complete(ring, ring->get_seqno(ring));
> +	trace_i915_gem_request_complete(ring, ring->get_seqno(ring, false));
>  
>  	wake_up_all(&ring->irq_queue);
>  	if (i915_enable_hangcheck) {
> @@ -1052,7 +1052,7 @@ i915_error_first_batchbuffer(struct drm_i915_private *dev_priv,
>  	if (!ring->get_seqno)
>  		return NULL;
>  
> -	seqno = ring->get_seqno(ring);
> +	seqno = ring->get_seqno(ring, false);
>  	list_for_each_entry(obj, &dev_priv->mm.active_list, mm_list) {
>  		if (obj->ring != ring)
>  			continue;
> @@ -1106,7 +1106,7 @@ static void i915_record_ring_state(struct drm_device *dev,
>  
>  	error->waiting[ring->id] = waitqueue_active(&ring->irq_queue);
>  	error->instpm[ring->id] = I915_READ(RING_INSTPM(ring->mmio_base));
> -	error->seqno[ring->id] = ring->get_seqno(ring);
> +	error->seqno[ring->id] = ring->get_seqno(ring, false);
>  	error->acthd[ring->id] = intel_ring_get_active_head(ring);
>  	error->head[ring->id] = I915_READ_HEAD(ring);
>  	error->tail[ring->id] = I915_READ_TAIL(ring);
> @@ -1603,7 +1603,8 @@ ring_last_seqno(struct intel_ring_buffer *ring)
>  static bool i915_hangcheck_ring_idle(struct intel_ring_buffer *ring, bool *err)
>  {
>  	if (list_empty(&ring->request_list) ||
> -	    i915_seqno_passed(ring->get_seqno(ring), ring_last_seqno(ring))) {
> +	    i915_seqno_passed(ring->get_seqno(ring, false),
> +			      ring_last_seqno(ring))) {
>  		/* Issue a wake-up to catch stuck h/w. */
>  		if (waitqueue_active(&ring->irq_queue)) {
>  			DRM_ERROR("Hangcheck timer elapsed... %s idle\n",
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 6c0f504..c77bcd8 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -618,26 +618,24 @@ pc_render_add_request(struct intel_ring_buffer *ring,
>  }
>  
>  static u32
> -gen6_ring_get_seqno(struct intel_ring_buffer *ring)
> +gen6_ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
>  {
> -	struct drm_device *dev = ring->dev;
> -
>  	/* Workaround to force correct ordering between irq and seqno writes on
>  	 * ivb (and maybe also on snb) by reading from a CS register (like
>  	 * ACTHD) before reading the status page. */
> -	if (IS_GEN6(dev) || IS_GEN7(dev))
> +	if (!lazy_coherency)
>  		intel_ring_get_active_head(ring);
>  	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
>  }
>  
>  static u32
> -ring_get_seqno(struct intel_ring_buffer *ring)
> +ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
>  {
>  	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
>  }
>  
>  static u32
> -pc_render_get_seqno(struct intel_ring_buffer *ring)
> +pc_render_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
>  {
>  	struct pipe_control *pc = ring->private;
>  	return pc->cpu_page[0];
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 8b2b92e..2ea7a31 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -72,7 +72,14 @@ struct  intel_ring_buffer {
>  				  u32	flush_domains);
>  	int		(*add_request)(struct intel_ring_buffer *ring,
>  				       u32 *seqno);
> -	u32		(*get_seqno)(struct intel_ring_buffer *ring);
> +	/* Some chipsets are not quite as coherent as advertised and need
> +	 * an expensive kick to force a true read of the up-to-date seqno.
> +	 * However, the up-to-date seqno is not always required and the last
> +	 * seen value is good enough. Note that the seqno will always be
> +	 * monotonic, even if not coherent.
> +	 */
> +	u32		(*get_seqno)(struct intel_ring_buffer *ring,
> +				     bool lazy_coherency);
>  	int		(*dispatch_execbuffer)(struct intel_ring_buffer *ring,
>  					       u32 offset, u32 length);
>  	void		(*cleanup)(struct intel_ring_buffer *ring);
> -- 
> 1.7.10.4
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Mail: daniel at ffwll.ch
Mobile: +41 (0)79 365 57 48



More information about the Intel-gfx mailing list