[Intel-gfx] [PATCH] Revert "drm/i915: use a separate context for gpu relocs"

Tue Dec 3 22:19:07 UTC 2019

On 11/29/19 4:48 AM, Chris Wilson wrote:
> Since commit c45e788d95b4 ("drm/i915/tgl: Suspend pre-parser across GTT
> invalidations"), we now disable the advanced preparser on Tigerlake for the
> invalidation phase at the start of the batch, we no longer need to emit
> the GPU relocations from a second context as they are now flushed inlined.
> 

c45e788d95b4 only applies to the RCS though and IIRC I've seen issues 
with the relocations on other engines as well, although they were much 
rarer. Also, the comment left in intel_lrc.c still references reloc_gpu().

Daniele

> References: 8a9a982767b7 ("drm/i915: use a separate context for gpu relocs")
> References: c45e788d95b4 ("drm/i915/tgl: Suspend pre-parser across GTT invalidations")
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Mika Kuoppala <mika.kuoppala at linux.intel.com>
> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
> ---
>   .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 30 +------------------
>   1 file changed, 1 insertion(+), 29 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> index 7a87e8270460..459f4d40b69b 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> @@ -253,7 +253,6 @@ struct i915_execbuffer {
>   		bool has_fence : 1;
>   		bool needs_unfenced : 1;
>   
> -		struct intel_context *ce;
>   		struct i915_request *rq;
>   		u32 *rq_cmd;
>   		unsigned int rq_size;
> @@ -886,9 +885,6 @@ static void eb_destroy(const struct i915_execbuffer *eb)
>   {
>   	GEM_BUG_ON(eb->reloc_cache.rq);
>   
> -	if (eb->reloc_cache.ce)
> -		intel_context_put(eb->reloc_cache.ce);
> -
>   	if (eb->lut_size > 0)
>   		kfree(eb->buckets);
>   }
> @@ -912,7 +908,6 @@ static void reloc_cache_init(struct reloc_cache *cache,
>   	cache->has_fence = cache->gen < 4;
>   	cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment;
>   	cache->node.flags = 0;
> -	cache->ce = NULL;
>   	cache->rq = NULL;
>   	cache->rq_size = 0;
>   }
> @@ -1182,7 +1177,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
>   	if (err)
>   		goto err_unmap;
>   
> -	rq = intel_context_create_request(cache->ce);
> +	rq = i915_request_create(eb->context);
>   	if (IS_ERR(rq)) {
>   		err = PTR_ERR(rq);
>   		goto err_unpin;
> @@ -1253,29 +1248,6 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb,
>   		if (!intel_engine_can_store_dword(eb->engine))
>   			return ERR_PTR(-ENODEV);
>   
> -		if (!cache->ce) {
> -			struct intel_context *ce;
> -
> -			/*
> -			 * The CS pre-parser can pre-fetch commands across
> -			 * memory sync points and starting gen12 it is able to
> -			 * pre-fetch across BB_START and BB_END boundaries
> -			 * (within the same context). We therefore use a
> -			 * separate context gen12+ to guarantee that the reloc
> -			 * writes land before the parser gets to the target
> -			 * memory location.
> -			 */
> -			if (cache->gen >= 12)
> -				ce = intel_context_create(eb->context->gem_context,
> -							  eb->engine);
> -			else
> -				ce = intel_context_get(eb->context);
> -			if (IS_ERR(ce))
> -				return ERR_CAST(ce);
> -
> -			cache->ce = ce;
> -		}
> -
>   		err = __reloc_gpu_alloc(eb, vma, len);
>   		if (unlikely(err))
>   			return ERR_PTR(err);
>