[Intel-gfx] [PATCH 2/3] drm/i915/gem: Use a single chained reloc batches for a single execbuf
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Fri May 1 13:29:23 UTC 2020
On 01/05/2020 14:02, Chris Wilson wrote:
> As we can now keep chaining together a relocation batch to process any
> number of relocations, we can keep building that relocation batch for
> all of the target vma. This avoiding emitting a new request into the
> ring for each target, consuming precious ring space and a potential
> stall.
>
> v2: Propagate the failure from submitting the relocation batch.
>
> Testcase: igt/gem_exec_reloc/basic-wide-active
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com> #v1
> ---
> .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 31 ++++++++++++-------
> 1 file changed, 19 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> index 0874976b1cf7..4c4b9e0e75bc 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> @@ -268,6 +268,7 @@ struct i915_execbuffer {
> bool has_fence : 1;
> bool needs_unfenced : 1;
>
> + struct i915_vma *target;
> struct i915_request *rq;
> u32 *rq_cmd;
> unsigned int rq_size;
> @@ -1051,14 +1052,14 @@ static unsigned int reloc_bb_flags(const struct reloc_cache *cache)
> return cache->gen > 5 ? 0 : I915_DISPATCH_SECURE;
> }
>
> -static void reloc_gpu_flush(struct reloc_cache *cache)
> +static int reloc_gpu_flush(struct reloc_cache *cache)
> {
> struct i915_request *rq;
> int err;
>
> rq = fetch_and_zero(&cache->rq);
> if (!rq)
> - return;
> + return 0;
>
> if (cache->rq_vma) {
> struct drm_i915_gem_object *obj = cache->rq_vma->obj;
> @@ -1084,15 +1085,14 @@ static void reloc_gpu_flush(struct reloc_cache *cache)
>
> intel_gt_chipset_flush(rq->engine->gt);
> i915_request_add(rq);
> +
> + return err;
> }
>
> static void reloc_cache_reset(struct reloc_cache *cache)
> {
> void *vaddr;
>
> - if (cache->rq)
> - reloc_gpu_flush(cache);
> -
> if (!cache->vaddr)
> return;
>
> @@ -1285,7 +1285,6 @@ static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma)
> }
>
> static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
> - struct i915_vma *vma,
> unsigned int len)
> {
> struct reloc_cache *cache = &eb->reloc_cache;
> @@ -1308,7 +1307,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
> goto out_pool;
> }
>
> - batch = i915_vma_instance(pool->obj, vma->vm, NULL);
> + batch = i915_vma_instance(pool->obj, eb->context->vm, NULL);
> if (IS_ERR(batch)) {
> err = PTR_ERR(batch);
> goto err_unmap;
> @@ -1328,10 +1327,6 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
> if (err)
> goto err_request;
>
> - err = reloc_move_to_gpu(rq, vma);
> - if (err)
> - goto err_request;
> -
> i915_vma_lock(batch);
> err = i915_request_await_object(rq, batch->obj, false);
> if (err == 0)
> @@ -1376,11 +1371,19 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb,
> if (!intel_engine_can_store_dword(eb->engine))
> return ERR_PTR(-ENODEV);
>
> - err = __reloc_gpu_alloc(eb, vma, len);
> + err = __reloc_gpu_alloc(eb, len);
> if (unlikely(err))
> return ERR_PTR(err);
> }
>
> + if (vma != cache->target) {
> + err = reloc_move_to_gpu(cache->rq, vma);
> + if (unlikely(err))
> + return ERR_PTR(err);
> +
> + cache->target = vma;
> + }
> +
> if (unlikely(cache->rq_size + len >
> PAGE_SIZE / sizeof(u32) - RELOC_TAIL)) {
> err = reloc_gpu_chain(cache);
> @@ -1698,6 +1701,10 @@ static int eb_relocate(struct i915_execbuffer *eb)
> if (err)
> return err;
> }
> +
> + err = reloc_gpu_flush(&eb->reloc_cache);
> + if (err)
> + return err;
> }
>
> return 0;
>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Regards,
Tvrtko
More information about the Intel-gfx
mailing list