[Intel-gfx] [PATCH] drm/i915: use a separate context for gpu relocs

Daniele Ceraolo Spurio daniele.ceraolospurio at intel.com
Mon Aug 26 17:56:53 UTC 2019



On 8/24/2019 1:54 AM, Chris Wilson wrote:
> Quoting Daniele Ceraolo Spurio (2019-08-24 01:20:22)
>> The CS pre-parser can pre-fetch commands across memory sync points and
>> starting from gen12 it is able to pre-fetch across BB_START and BB_END
>> boundaries as well, so when we emit gpu relocs the pre-parser might
>> fetch the target location of the reloc before the memory write lands.
>>
>> The parser can't pre-fetch across the ctx switch, so we use a separate
>> context to guarantee that the memory is syncronized before the parser
>> can get to it.
>>
>> Suggested-by: Chris Wilson <chris at chris-wilson.co.uk>
>> Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
>> Cc: Chris Wilson <chris at chris-wilson.co.uk>
>> ---
>>   .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 27 ++++++++++++++++++-
>>   1 file changed, 26 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
>> index b5f6937369ea..d27201c654e9 100644
>> --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
>> @@ -252,6 +252,7 @@ struct i915_execbuffer {
>>                  bool has_fence : 1;
>>                  bool needs_unfenced : 1;
>>   
>> +               struct intel_context *ce;
>>                  struct i915_request *rq;
>>                  u32 *rq_cmd;
>>                  unsigned int rq_size;
>> @@ -903,6 +904,7 @@ static void reloc_cache_init(struct reloc_cache *cache,
>>          cache->has_fence = cache->gen < 4;
>>          cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment;
>>          cache->node.allocated = false;
>> +       cache->ce = NULL;
>>          cache->rq = NULL;
>>          cache->rq_size = 0;
>>   }
>> @@ -943,6 +945,7 @@ static void reloc_gpu_flush(struct reloc_cache *cache)
>>   static void reloc_cache_reset(struct reloc_cache *cache)
>>   {
>>          void *vaddr;
>> +       struct intel_context *ce;
>>   
>>          if (cache->rq)
>>                  reloc_gpu_flush(cache);
>> @@ -973,6 +976,10 @@ static void reloc_cache_reset(struct reloc_cache *cache)
>>                  }
>>          }
>>   
>> +       ce = fetch_and_zero(&cache->ce);
>> +       if (ce)
>> +               intel_context_put(ce);
> We don't need to create a new context between every buffer, it can be
> released in eb_destroy.
>
>> +
>>          cache->vaddr = 0;
>>          cache->page = -1;
>>   }
>> @@ -1168,7 +1175,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
>>          if (err)
>>                  goto err_unmap;
>>   
>> -       rq = i915_request_create(eb->context);
>> +       rq = intel_context_create_request(cache->ce);
>>          if (IS_ERR(rq)) {
>>                  err = PTR_ERR(rq);
>>                  goto err_unpin;
>> @@ -1239,6 +1246,24 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb,
>>                  if (!intel_engine_can_store_dword(eb->engine))
>>                          return ERR_PTR(-ENODEV);
>>   
>> +               /*
>> +                * The CS pre-parser can pre-fetch commands across memory sync
>> +                * points and starting gen12 it is able to pre-fetch across
>> +                * BB_START and BB_END boundaries (within the same context). We
>> +                * use a separate context to guarantee that the reloc writes
>> +                * land before the parser gets to the target memory location.
>> +                */
>> +               if (!cache->ce) {
>> +                       struct intel_context *ce;
>> +
> I was thinking of
>
> 	if (cache->gen >= 12)
>> +                       ce = intel_context_create(eb->context->gem_context,
>> +                                                 eb->engine);
> else
> 	ce = intel_context_get(eb->context);
>
> Note that this requires us to fix the tagging so that we don't perform a
> lite-restore from the reloc instance to the user instance.

What's wrong with lite-restoring in this case? It's not something we 
stop now AFAICS.

Daniele

>
> But yes, that's more or less the same as the sketch I did :)
> -Chris



More information about the Intel-gfx mailing list