[Intel-gfx] [PATCH 3/3] drm/i915: Micro-optimise hotpath through intel_ring_begin()

Thu May 4 12:11:45 UTC 2017

Chris Wilson <chris at chris-wilson.co.uk> writes:

> Typically, there is space available within the ring and if not we have
> to wait (by definition a slow path). Rearrange the code to reduce the
> number of branches and stack size for the hotpath, accomodating a slight
> growth for the wait.
>
> v2: Fix the new assert that packets are not larger than the actual ring.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/intel_ringbuffer.c | 63 +++++++++++++++++----------------
>  1 file changed, 33 insertions(+), 30 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index c46e5439d379..53123c1cfcc5 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -1654,7 +1654,7 @@ static int ring_request_alloc(struct drm_i915_gem_request *request)
>  	return 0;
>  }
>  
> -static int wait_for_space(struct drm_i915_gem_request *req, int bytes)
> +static noinline int wait_for_space(struct drm_i915_gem_request *req, int bytes)
>  {
>  	struct intel_ring *ring = req->ring;
>  	struct drm_i915_gem_request *target;
> @@ -1702,49 +1702,52 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes)
>  u32 *intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords)
>  {
>  	struct intel_ring *ring = req->ring;
> -	int remain_actual = ring->size - ring->emit;
> -	int remain_usable = ring->effective_size - ring->emit;
> -	int bytes = num_dwords * sizeof(u32);
> -	int total_bytes, wait_bytes;
> -	bool need_wrap = false;
> +	const unsigned int remain_usable = ring->effective_size - ring->emit;
> +	const unsigned int bytes = num_dwords * sizeof(u32);
> +	unsigned int need_wrap = 0;
> +	unsigned int total_bytes;
>  	u32 *cs;
>  
>  	total_bytes = bytes + req->reserved_space;
> +	GEM_BUG_ON(total_bytes > ring->effective_size);
>  
> -	if (unlikely(bytes > remain_usable)) {
> -		/*
> -		 * Not enough space for the basic request. So need to flush
> -		 * out the remainder and then wait for base + reserved.
> -		 */
> -		wait_bytes = remain_actual + total_bytes;
> -		need_wrap = true;
> -	} else if (unlikely(total_bytes > remain_usable)) {
> -		/*
> -		 * The base request will fit but the reserved space
> -		 * falls off the end. So we don't need an immediate wrap
> -		 * and only need to effectively wait for the reserved
> -		 * size space from the start of ringbuffer.
> -		 */
> -		wait_bytes = remain_actual + req->reserved_space;
> -	} else {
> -		/* No wrapping required, just waiting. */
> -		wait_bytes = total_bytes;
> +	if (unlikely(total_bytes > remain_usable)) {
> +		const int remain_actual = ring->size - ring->emit;
> +
> +		if (bytes > remain_usable) {
> +			/*
> +			 * Not enough space for the basic request. So need to
> +			 * flush out the remainder and then wait for
> +			 * base + reserved.
> +			 */
> +			total_bytes += remain_actual;
> +			need_wrap = remain_actual | 1;

Your remain_actual should never reach zero. So in here
forcing the lowest bit on, and later off, seems superfluous.

-Mika

> +		} else  {
> +			/*
> +			 * The base request will fit but the reserved space
> +			 * falls off the end. So we don't need an immediate
> +			 * wrap and only need to effectively wait for the
> +			 * reserved size from the start of ringbuffer.
> +			 */
> +			total_bytes = req->reserved_space + remain_actual;
> +		}
>  	}
>  
> -	if (wait_bytes > ring->space) {
> -		int ret = wait_for_space(req, wait_bytes);
> +	if (unlikely(total_bytes > ring->space)) {
> +		int ret = wait_for_space(req, total_bytes);
>  		if (unlikely(ret))
>  			return ERR_PTR(ret);
>  	}
>  
>  	if (unlikely(need_wrap)) {
> -		GEM_BUG_ON(remain_actual > ring->space);
> -		GEM_BUG_ON(ring->emit + remain_actual > ring->size);
> +		need_wrap &= ~1;
> +		GEM_BUG_ON(need_wrap > ring->space);
> +		GEM_BUG_ON(ring->emit + need_wrap > ring->size);
>  
>  		/* Fill the tail with MI_NOOP */
> -		memset(ring->vaddr + ring->emit, 0, remain_actual);
> +		memset(ring->vaddr + ring->emit, 0, need_wrap);
>  		ring->emit = 0;
> -		ring->space -= remain_actual;
> +		ring->space -= need_wrap;
>  	}
>  
>  	GEM_BUG_ON(ring->emit > ring->size - bytes);
> -- 
> 2.11.0
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx