[Intel-gfx] [PATCH 1/3] drm/i915: Avoid the branch in computing intel_ring_space()

Mika Kuoppala mika.kuoppala at linux.intel.com
Wed May 3 11:53:43 UTC 2017


Chris Wilson <chris at chris-wilson.co.uk> writes:

> Exploit the power-of-two ring size to compute the space across the
> wraparound using a mask rather than a if. Convert to unsigned integers
> so the operation is well defined.
>
> References: https://bugs.freedesktop.org/show_bug.cgi?id=99671
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Mika Kuoppala <mika.kuoppala at intel.com>
> ---
>  drivers/gpu/drm/i915/intel_ringbuffer.c | 23 +++++++++++----------
>  drivers/gpu/drm/i915/intel_ringbuffer.h | 36 ++++++++++++++++++++-------------
>  2 files changed, 34 insertions(+), 25 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 29b5afac7856..5947bba20c4a 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -39,12 +39,16 @@
>   */
>  #define LEGACY_REQUEST_SIZE 200
>  
> -static int __intel_ring_space(int head, int tail, int size)
> +static unsigned int __intel_ring_space(unsigned int head,
> +				       unsigned int tail,
> +				       unsigned int size)

Why not u32 here too?

>  {
> -	int space = head - tail;
> -	if (space <= 0)
> -		space += size;
> -	return space - I915_RING_FREE_SPACE;
> +	/*
> +	 * "If the Ring Buffer Head Pointer and the Tail Pointer are on the
> +	 * same cacheline, the Head Pointer must not be greater than the Tail
> +	 * Pointer."
> +	 */
> +	return (head - tail - CACHELINE_BYTES) & (size - 1);

I almost went and complained about the incorrect calculation here
but rereading the commit message I regained my senses.

>  }
>  
>  void intel_ring_update_space(struct intel_ring *ring)
> @@ -1669,12 +1673,9 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes)

Bytes should be converted also to unsigned at some point.

>  	GEM_BUG_ON(!req->reserved_space);
>  
>  	list_for_each_entry(target, &ring->request_list, ring_link) {
> -		unsigned space;
> -
>  		/* Would completion of this request free enough space? */
> -		space = __intel_ring_space(target->postfix, ring->emit,
> -					   ring->size);
> -		if (space >= bytes)
> +		if (bytes <= __intel_ring_space(target->postfix,
> +						ring->emit, ring->size))
>  			break;
>  	}
>  
> @@ -1743,11 +1744,11 @@ u32 *intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords)
>  	}
>  
>  	GEM_BUG_ON(ring->emit > ring->size - bytes);
> +	GEM_BUG_ON(ring->space < bytes);
>  	cs = ring->vaddr + ring->emit;
>  	GEM_DEBUG_EXEC(memset(cs, POISON_INUSE, bytes));
>  	ring->emit += bytes;
>  	ring->space -= bytes;
> -	GEM_BUG_ON(ring->space < 0);
>  
>  	return cs;
>  }
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 02d741ef99ad..64584d134681 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -17,17 +17,6 @@
>  #define CACHELINE_BYTES 64
>  #define CACHELINE_DWORDS (CACHELINE_BYTES / sizeof(uint32_t))
>  
> -/*
> - * Gen2 BSpec "1. Programming Environment" / 1.4.4.6 "Ring Buffer Use"
> - * Gen3 BSpec "vol1c Memory Interface Functions" / 2.3.4.5 "Ring Buffer Use"
> - * Gen4+ BSpec "vol1c Memory Interface and Command Stream" / 5.3.4.5 "Ring Buffer Use"
> - *
> - * "If the Ring Buffer Head Pointer and the Tail Pointer are on the same
> - * cacheline, the Head Pointer must not be greater than the Tail
> - * Pointer."
> - */
> -#define I915_RING_FREE_SPACE 64
> -
>  struct intel_hw_status_page {
>  	struct i915_vma *vma;
>  	u32 *page_addr;
> @@ -145,9 +134,9 @@ struct intel_ring {
>  	u32 tail;
>  	u32 emit;
>  
> -	int space;
> -	int size;
> -	int effective_size;
> +	u32 space;
> +	u32 size;
> +	u32 effective_size;
>  };
>  
>  struct i915_gem_context;
> @@ -548,6 +537,25 @@ assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail)
>  	 */
>  	GEM_BUG_ON(!IS_ALIGNED(tail, 8));
>  	GEM_BUG_ON(tail >= ring->size);
> +
> +	/*
> +	 * "Ring Buffer Use"
> +	 *	Gen2 BSpec "1. Programming Environment" / 1.4.4.6
> +	 *	Gen3 BSpec "1c Memory Interface Functions" / 2.3.4.5
> +	 *	Gen4+ BSpec "1c Memory Interface and Command Stream" / 5.3.4.5
> +	 * "If the Ring Buffer Head Pointer and the Tail Pointer are on the
> +	 * same cacheline, the Head Pointer must not be greater than the Tail
> +	 * Pointer."
> +	 *
> +	 * We use ring->head as the last known location of the actual RING_HEAD,
> +	 * it may have advanced but in the worst case it is equally the same
> +	 * as ring->head and so we should never program RING_TAIL to advance
> +	 * into the same cacheline as ring->head.
> +	 */
> +#define cacheline(a) round_down(a, CACHELINE_BYTES)
> +	GEM_BUG_ON(cacheline(tail) == cacheline(ring->head) &&
> +		   tail < ring->head);

Nitpick, ring->head > tail should have fit the quote above better.

Reviewed-by: Mika Kuoppala <mika.kuoppala at intel.com>

> +#undef cacheline
>  }
>  
>  static inline unsigned int
> -- 
> 2.11.0


More information about the Intel-gfx mailing list