[Intel-gfx] [PATCH 4/9] drm/i915: Remove early l3-remap

Tue Apr 19 09:41:23 UTC 2016

On 19/04/16 07:49, Chris Wilson wrote:
> Since we do the l3-remap on context switch, and proceed to do a context
> switch immediately after manually doing the l3-remap, we can remove the
> redundant manual call.

Looks like it should say it is removing the l3-remap on driver load, not 
"manual".

> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_drv.h         |  1 -
>   drivers/gpu/drm/i915/i915_gem.c         | 39 +--------------------------------
>   drivers/gpu/drm/i915/i915_gem_context.c | 31 +++++++++++++++++++++++++-
>   3 files changed, 31 insertions(+), 40 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 368fd7090189..4c55f480f60b 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2983,7 +2983,6 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force);
>   int __must_check i915_gem_init(struct drm_device *dev);
>   int i915_gem_init_engines(struct drm_device *dev);
>   int __must_check i915_gem_init_hw(struct drm_device *dev);
> -int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice);
>   void i915_gem_init_swizzling(struct drm_device *dev);
>   void i915_gem_cleanup_engines(struct drm_device *dev);
>   int __must_check i915_gpu_idle(struct drm_device *dev);
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index c9d7dfc8f80c..fd9a36badb45 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -4077,35 +4077,6 @@ err:
>   	return ret;
>   }
>
> -int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice)
> -{
> -	struct intel_engine_cs *engine = req->engine;
> -	u32 *remap_info = to_i915(req)->l3_parity.remap_info[slice];

Wrong base.

> -	int i, ret;
> -
> -	if (!HAS_L3_DPF(req) || !remap_info)
> -		return 0;
> -
> -	ret = intel_ring_begin(req, GEN7_L3LOG_SIZE / 4 * 3);
> -	if (ret)
> -		return ret;
> -
> -	/*
> -	 * Note: We do not worry about the concurrent register cacheline hang
> -	 * here because no other code should access these registers other than
> -	 * at initialization time.
> -	 */
> -	for (i = 0; i < GEN7_L3LOG_SIZE / 4; i++) {
> -		intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(1));
> -		intel_ring_emit_reg(engine, GEN7_L3LOG(slice, i));
> -		intel_ring_emit(engine, remap_info[i]);
> -	}
> -
> -	intel_ring_advance(engine);
> -
> -	return ret;
> -}
> -
>   void i915_gem_init_swizzling(struct drm_device *dev)
>   {
>   	struct drm_i915_private *dev_priv = dev->dev_private;
> @@ -4210,7 +4181,7 @@ i915_gem_init_hw(struct drm_device *dev)
>   {
>   	struct drm_i915_private *dev_priv = dev->dev_private;
>   	struct intel_engine_cs *engine;
> -	int ret, j;
> +	int ret;
>
>   	if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt())
>   		return -EIO;
> @@ -4284,14 +4255,6 @@ i915_gem_init_hw(struct drm_device *dev)
>   			break;
>   		}
>
> -		if (engine->id == RCS) {
> -			for (j = 0; j < NUM_L3_SLICES(dev); j++) {
> -				ret = i915_gem_l3_remap(req, j);
> -				if (ret)
> -					goto err_request;
> -			}
> -		}
> -
>   		ret = i915_ppgtt_init_ring(req);
>   		if (ret)
>   			goto err_request;
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index db53d811370d..6870556a180b 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -691,6 +691,35 @@ needs_pd_load_post(struct intel_context *to, u32 hw_flags)
>   	return false;
>   }
>
> +static int remap_l3(struct drm_i915_gem_request *req, int slice)
> +{
> +	u32 *remap_info = to_i915(req)->l3_parity.remap_info[slice];
> +	struct intel_engine_cs *engine = req->engine;
> +	int i, ret;
> +
> +	if (!remap_info)
> +		return 0;

HAS_L3_DPF check is gone. Ok it looks correct from the call site, but 
best would be not to change it when moving around.

> +
> +	ret = intel_ring_begin(req, GEN7_L3LOG_SIZE/4 * 2 + 2);

More changes - should you perhaps add a patch which simplifies l3 remap 
to use since lri ahead of this one?

> +	if (ret)
> +		return ret;
> +
> +	/*
> +	 * Note: We do not worry about the concurrent register cacheline hang
> +	 * here because no other code should access these registers other than
> +	 * at initialization time.
> +	 */
> +	intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(GEN7_L3LOG_SIZE/4));
> +	for (i = 0; i < GEN7_L3LOG_SIZE/4; i++) {
> +		intel_ring_emit_reg(engine, GEN7_L3LOG(slice, i));
> +		intel_ring_emit(engine, remap_info[i]);
> +	}
> +	intel_ring_emit(engine, MI_NOOP);
> +	intel_ring_advance(engine);
> +
> +	return 0;
> +}
> +
>   static int do_rcs_switch(struct drm_i915_gem_request *req)
>   {
>   	struct intel_context *to = req->ctx;
> @@ -810,7 +839,7 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
>   		if (!(to->remap_slice & (1<<i)))
>   			continue;
>
> -		ret = i915_gem_l3_remap(req, i);
> +		ret = remap_l3(req, i);
>   		if (ret)
>   			return ret;
>
>

Otherwise looks safe, although I am not sure why it belongs in this series.

Regards,

Tvrtko