[Intel-gfx] [PATCH 2/2] drm/i915: Move the modulus for ring emission to the register write

Joonas Lahtinen joonas.lahtinen at linux.intel.com
Thu Jul 28 09:59:52 UTC 2016


On to, 2016-07-28 at 10:16 +0100, Chris Wilson wrote:
> Space reservation is already safe with respect to the ring->size
> modulus, but hardware only expects to see values in the range
> 0...ring->size-1 (inclusive) and so requires the modulus to prevent us
> writing the value ring->size instead of 0. As this is only required for
> the register itself, we can defer the modulus to the register update and
> not perform it after every command packet. We keep the
> intel_ring_advance() around in the code to provide demarcation for the
> end-of-packet (with then can be compared against intel_ring_begin() as
> the number of dwords emitted must match the reserved space).
> 
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
> Cc: Dave Gordon <david.s.gordon at intel.com>

LGTM,

Reviewed-by: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>

> ---
>  drivers/gpu/drm/i915/intel_lrc.c        |  2 +-
>  drivers/gpu/drm/i915/intel_ringbuffer.c |  6 ++++--
>  drivers/gpu/drm/i915/intel_ringbuffer.h | 17 +++++++++++++----
>  3 files changed, 18 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index bf42a66d6624..824f7efe4e64 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -373,7 +373,7 @@ static void execlists_update_context(struct drm_i915_gem_request *rq)
>  	struct i915_hw_ppgtt *ppgtt = rq->ctx->ppgtt;
>  	uint32_t *reg_state = rq->ctx->engine[engine->id].lrc_reg_state;
>  
> -	reg_state[CTX_RING_TAIL+1] = rq->tail;
> +	reg_state[CTX_RING_TAIL+1] = intel_ring_offset(rq->ring, rq->tail);
>  
>  	/* True 32b PPGTT with dynamic page allocation: update PDP
>  	 * registers and point the unallocated PDPs to scratch page.
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 3142085b5cc0..21d5e8209400 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -1718,7 +1718,8 @@ static void i9xx_submit_request(struct drm_i915_gem_request *request)
>  {
>  	struct drm_i915_private *dev_priv = request->i915;
>  
> -	I915_WRITE_TAIL(request->engine, request->tail);
> +	I915_WRITE_TAIL(request->engine,
> +			intel_ring_offset(request->ring, request->tail));
>  }
>  
>  static void
> @@ -2505,7 +2506,8 @@ static void gen6_bsd_submit_request(struct drm_i915_gem_request *request)
>  		DRM_ERROR("timed out waiting for the BSD ring to wake up\n");
>  
>  	/* Now that the ring is fully powered up, update the tail */
> -	I915_WRITE_FW(RING_TAIL(request->engine->mmio_base), request->tail);
> +	I915_WRITE_FW(RING_TAIL(request->engine->mmio_base),
> +		      intel_ring_offset(request->ring, request->tail));
>  	POSTING_READ_FW(RING_TAIL(request->engine->mmio_base));
>  
>  	/* Let the ring send IDLE messages to the GT again,
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 14d2ea36fb88..198b541f9b22 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -460,14 +460,23 @@ static inline void intel_ring_emit_reg(struct intel_ring *ring, i915_reg_t reg)
>  
>  static inline void intel_ring_advance(struct intel_ring *ring)
>  {
> +	/* Dummy function.
> +	 *
> +	 * This serves as a placeholder in the code so that the reader
> +	 * can compare against the preceeding intel_ring_begin() and
> +	 * check that the number of dwords emitted matches the space
> +	 * reserved for the command packet (i.e. the value passed to
> +	 * intel_ring_begin()).
> +	 */
> +}
> +
> +static inline u32 intel_ring_offset(struct intel_ring *ring, u32 value)
> +{
>  	/* The modulus is required so that we avoid writing
>  	 * request->tail == ring->size, rather than the expected 0,
>  	 * into the RING_TAIL register as that can cause a GPU hang.
> -	 * As this is only strictly required for the request->tail,
> -	 * and only then as we write the value into hardware, we can
> -	 * one day remove the modulus after every command packet.
>  	 */
> -	ring->tail &= ring->size - 1;
> +	return value & (ring->size - 1);
>  }
>  
>  int __intel_ring_space(int head, int tail, int size);
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation


More information about the Intel-gfx mailing list