[Intel-gfx] [PATCH 1/8] drm/i915: Get rid of the synchronous flag in switch_mm

Wed Jun 18 21:50:31 CEST 2014

On Wed, Jun 18, 2014 at 05:15:35PM +0100, oscar.mateo at intel.com wrote:
> From: Oscar Mateo <oscar.mateo at intel.com>
> 
> As a first step, split switch_mm into two different functions:
> sync_switch_mm (currently used by ppgtt_enable) and switch_mm
> (used by the outside world).
> 
> Signed-off-by: Oscar Mateo <oscar.mateo at intel.com>

I guess that jira task description was a bit unclear - removing the bool
synchronous argument isn't the goal itself. The idea was to rework the gpu
reset sequence so that it matches the same sequence we use at driver load
and so can use the same code. This patch removes the bool synchronous, but
the logical split with two different ways to load ppgtt is still there.

The reason for aiming as hard as possible to use the exact same code for
driver load, gpu reset and runtime pm/system resume is that we've simply
seen too many bugs due to slight variations and unintended omissions.

If for some really odd reason this is impossible (e.g. when the gpu needs
a special dance when coming out of reset) then we need a giant comment
explaining the magic. But without such a clear reason the goal should
always be to have just 1 single autorative piece of code for a given state
transition.
-Daniel

> ---
>  drivers/gpu/drm/i915/i915_gem_context.c |   2 +-
>  drivers/gpu/drm/i915/i915_gem_gtt.c     | 101 ++++++++++++++++++++++----------
>  drivers/gpu/drm/i915/i915_gem_gtt.h     |   5 +-
>  3 files changed, 73 insertions(+), 35 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 3ffe308..14cd7fc 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -624,7 +624,7 @@ static int do_switch(struct intel_engine_cs *ring,
>  	from = ring->last_context;
>  
>  	if (USES_FULL_PPGTT(ring->dev)) {
> -		ret = ppgtt->switch_mm(ppgtt, ring, false);
> +		ret = ppgtt->switch_mm(ppgtt, ring);
>  		if (ret)
>  			goto unpin_out;
>  	}
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index ee7af4e..ad5c85b 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -242,9 +242,26 @@ static int gen8_write_pdp(struct intel_engine_cs *ring, unsigned entry,
>  	return 0;
>  }
>  
> +static int gen8_mm_sync_switch(struct i915_hw_ppgtt *ppgtt,
> +			       struct intel_engine_cs *ring)
> +{
> +	int i, ret;
> +
> +	/* bit of a hack to find the actual last used pd */
> +	int used_pd = ppgtt->num_pd_entries / GEN8_PDES_PER_PAGE;
> +
> +	for (i = used_pd - 1; i >= 0; i--) {
> +		dma_addr_t addr = ppgtt->pd_dma_addr[i];
> +		ret = gen8_write_pdp(ring, i, addr, true);
> +		if (ret)
> +			return ret;
> +	}
> +
> +	return 0;
> +}
> +
>  static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt,
> -			  struct intel_engine_cs *ring,
> -			  bool synchronous)
> +			  struct intel_engine_cs *ring)
>  {
>  	int i, ret;
>  
> @@ -253,7 +270,7 @@ static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt,
>  
>  	for (i = used_pd - 1; i >= 0; i--) {
>  		dma_addr_t addr = ppgtt->pd_dma_addr[i];
> -		ret = gen8_write_pdp(ring, i, addr, synchronous);
> +		ret = gen8_write_pdp(ring, i, addr, false);
>  		if (ret)
>  			return ret;
>  	}
> @@ -654,6 +671,7 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size)
>  	}
>  
>  	ppgtt->enable = gen8_ppgtt_enable;
> +	ppgtt->sync_switch_mm = gen8_mm_sync_switch;
>  	ppgtt->switch_mm = gen8_mm_switch;
>  	ppgtt->base.clear_range = gen8_ppgtt_clear_range;
>  	ppgtt->base.insert_entries = gen8_ppgtt_insert_entries;
> @@ -768,9 +786,22 @@ static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt)
>  	return (ppgtt->pd_offset / 64) << 16;
>  }
>  
> +static int hsw_mm_sync_switch(struct i915_hw_ppgtt *ppgtt,
> +			      struct intel_engine_cs *ring)
> +{
> +	struct drm_device *dev = ppgtt->base.dev;
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +
> +	WARN_ON(ppgtt != dev_priv->mm.aliasing_ppgtt);
> +	I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
> +	I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt));
> +	POSTING_READ(RING_PP_DIR_BASE(ring));
> +
> +	return 0;
> +}
> +
>  static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
> -			 struct intel_engine_cs *ring,
> -			 bool synchronous)
> +			 struct intel_engine_cs *ring)
>  {
>  	struct drm_device *dev = ppgtt->base.dev;
>  	struct drm_i915_private *dev_priv = dev->dev_private;
> @@ -783,14 +814,8 @@ static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
>  	 *
>  	 * FIXME: We should try not to special case reset
>  	 */
> -	if (synchronous ||
> -	    i915_reset_in_progress(&dev_priv->gpu_error)) {
> -		WARN_ON(ppgtt != dev_priv->mm.aliasing_ppgtt);
> -		I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
> -		I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt));
> -		POSTING_READ(RING_PP_DIR_BASE(ring));
> -		return 0;
> -	}
> +	if (i915_reset_in_progress(&dev_priv->gpu_error))
> +		return ppgtt->sync_switch_mm(ppgtt, ring);
>  
>  	/* NB: TLBs must be flushed and invalidated before a switch */
>  	ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
> @@ -812,9 +837,22 @@ static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
>  	return 0;
>  }
>  
> +static int gen7_mm_sync_switch(struct i915_hw_ppgtt *ppgtt,
> +			       struct intel_engine_cs *ring)
> +{
> +	struct drm_device *dev = ppgtt->base.dev;
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +
> +	WARN_ON(ppgtt != dev_priv->mm.aliasing_ppgtt);
> +	I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
> +	I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt));
> +	POSTING_READ(RING_PP_DIR_BASE(ring));
> +
> +	return 0;
> +}
> +
>  static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
> -			  struct intel_engine_cs *ring,
> -			  bool synchronous)
> +			  struct intel_engine_cs *ring)
>  {
>  	struct drm_device *dev = ppgtt->base.dev;
>  	struct drm_i915_private *dev_priv = dev->dev_private;
> @@ -827,14 +865,8 @@ static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
>  	 *
>  	 * FIXME: We should try not to special case reset
>  	 */
> -	if (synchronous ||
> -	    i915_reset_in_progress(&dev_priv->gpu_error)) {
> -		WARN_ON(ppgtt != dev_priv->mm.aliasing_ppgtt);
> -		I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
> -		I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt));
> -		POSTING_READ(RING_PP_DIR_BASE(ring));
> -		return 0;
> -	}
> +	if (i915_reset_in_progress(&dev_priv->gpu_error))
> +		return ppgtt->sync_switch_mm(ppgtt, ring);
>  
>  	/* NB: TLBs must be flushed and invalidated before a switch */
>  	ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
> @@ -863,16 +895,12 @@ static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
>  	return 0;
>  }
>  
> -static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt,
> -			  struct intel_engine_cs *ring,
> -			  bool synchronous)
> +static int gen6_mm_sync_switch(struct i915_hw_ppgtt *ppgtt,
> +			       struct intel_engine_cs *ring)
>  {
>  	struct drm_device *dev = ppgtt->base.dev;
>  	struct drm_i915_private *dev_priv = dev->dev_private;
>  
> -	if (!synchronous)
> -		return 0;
> -
>  	I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
>  	I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt));
>  
> @@ -881,6 +909,12 @@ static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt,
>  	return 0;
>  }
>  
> +static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt,
> +			  struct intel_engine_cs *ring)
> +{
> +	return 0;
> +}
> +
>  static int gen8_ppgtt_enable(struct i915_hw_ppgtt *ppgtt)
>  {
>  	struct drm_device *dev = ppgtt->base.dev;
> @@ -897,7 +931,7 @@ static int gen8_ppgtt_enable(struct i915_hw_ppgtt *ppgtt)
>  		if (USES_FULL_PPGTT(dev))
>  			continue;
>  
> -		ret = ppgtt->switch_mm(ppgtt, ring, true);
> +		ret = ppgtt->sync_switch_mm(ppgtt, ring);
>  		if (ret)
>  			goto err_out;
>  	}
> @@ -942,7 +976,7 @@ static int gen7_ppgtt_enable(struct i915_hw_ppgtt *ppgtt)
>  		if (USES_FULL_PPGTT(dev))
>  			continue;
>  
> -		ret = ppgtt->switch_mm(ppgtt, ring, true);
> +		ret = ppgtt->sync_switch_mm(ppgtt, ring);
>  		if (ret)
>  			return ret;
>  	}
> @@ -971,7 +1005,7 @@ static int gen6_ppgtt_enable(struct i915_hw_ppgtt *ppgtt)
>  	I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
>  
>  	for_each_ring(ring, dev_priv, i) {
> -		int ret = ppgtt->switch_mm(ppgtt, ring, true);
> +		int ret = ppgtt->sync_switch_mm(ppgtt, ring);
>  		if (ret)
>  			return ret;
>  	}
> @@ -1196,12 +1230,15 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
>  	ppgtt->base.pte_encode = dev_priv->gtt.base.pte_encode;
>  	if (IS_GEN6(dev)) {
>  		ppgtt->enable = gen6_ppgtt_enable;
> +		ppgtt->sync_switch_mm = gen6_mm_sync_switch;
>  		ppgtt->switch_mm = gen6_mm_switch;
>  	} else if (IS_HASWELL(dev)) {
>  		ppgtt->enable = gen7_ppgtt_enable;
> +		ppgtt->sync_switch_mm = hsw_mm_sync_switch;
>  		ppgtt->switch_mm = hsw_mm_switch;
>  	} else if (IS_GEN7(dev)) {
>  		ppgtt->enable = gen7_ppgtt_enable;
> +		ppgtt->sync_switch_mm = gen7_mm_sync_switch;
>  		ppgtt->switch_mm = gen7_mm_switch;
>  	} else
>  		BUG();
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> index 7ab8446..c260ada 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> @@ -262,9 +262,10 @@ struct i915_hw_ppgtt {
>  	struct intel_context *ctx;
>  
>  	int (*enable)(struct i915_hw_ppgtt *ppgtt);
> +	int (*sync_switch_mm)(struct i915_hw_ppgtt *ppgtt,
> +			      struct intel_engine_cs *ring);
>  	int (*switch_mm)(struct i915_hw_ppgtt *ppgtt,
> -			 struct intel_engine_cs *ring,
> -			 bool synchronous);
> +			 struct intel_engine_cs *ring);
>  	void (*debug_dump)(struct i915_hw_ppgtt *ppgtt, struct seq_file *m);
>  };
>  
> -- 
> 1.9.0
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch