[Intel-gfx] [PATCH 1/3] drm/i915/gem: Use chained reloc batches
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Fri May 1 13:28:16 UTC 2020
On 01/05/2020 14:02, Chris Wilson wrote:
> The ring is a precious resource: we anticipate to only use a few hundred
> bytes for a request, and only try to reserve that before we start. If we
> go beyond our guess in building the request, then instead of waiting at
> the start of execbuf before we hold any locks or other resources, we
> may trigger a wait inside a critical region. One example is in using gpu
> relocations, where currently we emit a new MI_BB_START from the ring
> every time we overflow a page of relocation entries. However, instead of
> insert the command into the precious ring, we can chain the next page of
> relocation entries as MI_BB_START from the end of the previous.
>
> v2: Delay the emit_bb_start until after all the chained vma
> synchronisation is complete. Since the buffer pool batches are idle, this
> _should_ be a no-op, but one day we may some fancy async GPU bindings
> for new vma!
>
> v3: Use pool/batch consitently, once we start thinking in terms of the
> batch vma, use batch->obj.
> v4: Explain the magic number 4.
>
> Tvrtko spotted that we lose propagation of the error for failing to
> submit the relocation request; that's easier to fix up in the next
> patch.
>
> Testcase: igt/gem_exec_reloc/basic-many-active
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
> .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 134 +++++++++++++++---
> 1 file changed, 115 insertions(+), 19 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> index 414859fa2673..0874976b1cf7 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> @@ -271,6 +271,7 @@ struct i915_execbuffer {
> struct i915_request *rq;
> u32 *rq_cmd;
> unsigned int rq_size;
> + struct i915_vma *rq_vma;
> } reloc_cache;
>
> u64 invalid_flags; /** Set of execobj.flags that are invalid */
> @@ -975,20 +976,114 @@ static inline struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache)
> return &i915->ggtt;
> }
>
> +#define RELOC_TAIL 4
> +
> +static int reloc_gpu_chain(struct reloc_cache *cache)
> +{
> + struct intel_gt_buffer_pool_node *pool;
> + struct i915_request *rq = cache->rq;
> + struct i915_vma *batch;
> + u32 *cmd;
> + int err;
> +
> + pool = intel_gt_get_buffer_pool(rq->engine->gt, PAGE_SIZE);
> + if (IS_ERR(pool))
> + return PTR_ERR(pool);
> +
> + batch = i915_vma_instance(pool->obj, rq->context->vm, NULL);
> + if (IS_ERR(batch)) {
> + err = PTR_ERR(batch);
> + goto out_pool;
> + }
> +
> + err = i915_vma_pin(batch, 0, 0, PIN_USER | PIN_NONBLOCK);
> + if (err)
> + goto out_pool;
> +
> + GEM_BUG_ON(cache->rq_size + RELOC_TAIL > PAGE_SIZE / sizeof(u32));
> + cmd = cache->rq_cmd + cache->rq_size;
> + *cmd++ = MI_ARB_CHECK;
> + if (cache->gen >= 8) {
> + *cmd++ = MI_BATCH_BUFFER_START_GEN8;
> + *cmd++ = lower_32_bits(batch->node.start);
> + *cmd++ = upper_32_bits(batch->node.start);
> + } else {
> + *cmd++ = MI_BATCH_BUFFER_START;
> + *cmd++ = lower_32_bits(batch->node.start);
> + }
> + i915_gem_object_flush_map(cache->rq_vma->obj);
> + i915_gem_object_unpin_map(cache->rq_vma->obj);
> + cache->rq_vma = NULL;
> +
> + err = intel_gt_buffer_pool_mark_active(pool, rq);
> + if (err == 0) {
> + i915_vma_lock(batch);
> + err = i915_request_await_object(rq, batch->obj, false);
> + if (err == 0)
> + err = i915_vma_move_to_active(batch, rq, 0);
> + i915_vma_unlock(batch);
> + }
> + i915_vma_unpin(batch);
> + if (err)
> + goto out_pool;
> +
> + cmd = i915_gem_object_pin_map(batch->obj,
> + cache->has_llc ?
> + I915_MAP_FORCE_WB :
> + I915_MAP_FORCE_WC);
> + if (IS_ERR(cmd)) {
> + err = PTR_ERR(cmd);
> + goto out_pool;
> + }
> +
> + /* Return with batch mapping (cmd) still pinned */
> + cache->rq_cmd = cmd;
> + cache->rq_size = 0;
> + cache->rq_vma = batch;
> +
> +out_pool:
> + intel_gt_buffer_pool_put(pool);
> + return err;
> +}
> +
> +static unsigned int reloc_bb_flags(const struct reloc_cache *cache)
> +{
> + return cache->gen > 5 ? 0 : I915_DISPATCH_SECURE;
> +}
> +
> static void reloc_gpu_flush(struct reloc_cache *cache)
> {
> - struct drm_i915_gem_object *obj = cache->rq->batch->obj;
> + struct i915_request *rq;
> + int err;
>
> - GEM_BUG_ON(cache->rq_size >= obj->base.size / sizeof(u32));
> - cache->rq_cmd[cache->rq_size] = MI_BATCH_BUFFER_END;
> + rq = fetch_and_zero(&cache->rq);
> + if (!rq)
> + return;
>
> - __i915_gem_object_flush_map(obj, 0, sizeof(u32) * (cache->rq_size + 1));
> - i915_gem_object_unpin_map(obj);
> + if (cache->rq_vma) {
> + struct drm_i915_gem_object *obj = cache->rq_vma->obj;
>
> - intel_gt_chipset_flush(cache->rq->engine->gt);
> + GEM_BUG_ON(cache->rq_size >= obj->base.size / sizeof(u32));
> + cache->rq_cmd[cache->rq_size++] = MI_BATCH_BUFFER_END;
>
> - i915_request_add(cache->rq);
> - cache->rq = NULL;
> + __i915_gem_object_flush_map(obj,
> + 0, sizeof(u32) * cache->rq_size);
> + i915_gem_object_unpin_map(obj);
> + }
> +
> + err = 0;
> + if (rq->engine->emit_init_breadcrumb)
> + err = rq->engine->emit_init_breadcrumb(rq);
> + if (!err)
> + err = rq->engine->emit_bb_start(rq,
> + rq->batch->node.start,
> + PAGE_SIZE,
> + reloc_bb_flags(cache));
> + if (err)
> + i915_request_set_error_once(rq, err);
> +
> + intel_gt_chipset_flush(rq->engine->gt);
> + i915_request_add(rq);
> }
>
> static void reloc_cache_reset(struct reloc_cache *cache)
> @@ -1237,12 +1332,6 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
> if (err)
> goto err_request;
>
> - err = eb->engine->emit_bb_start(rq,
> - batch->node.start, PAGE_SIZE,
> - cache->gen > 5 ? 0 : I915_DISPATCH_SECURE);
> - if (err)
> - goto skip_request;
> -
> i915_vma_lock(batch);
> err = i915_request_await_object(rq, batch->obj, false);
> if (err == 0)
> @@ -1257,6 +1346,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
> cache->rq = rq;
> cache->rq_cmd = cmd;
> cache->rq_size = 0;
> + cache->rq_vma = batch;
>
> /* Return with batch mapping (cmd) still pinned */
> goto out_pool;
> @@ -1280,13 +1370,9 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb,
> {
> struct reloc_cache *cache = &eb->reloc_cache;
> u32 *cmd;
> -
> - if (cache->rq_size > PAGE_SIZE/sizeof(u32) - (len + 1))
> - reloc_gpu_flush(cache);
> + int err;
>
> if (unlikely(!cache->rq)) {
> - int err;
> -
> if (!intel_engine_can_store_dword(eb->engine))
> return ERR_PTR(-ENODEV);
>
> @@ -1295,6 +1381,16 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb,
> return ERR_PTR(err);
> }
>
> + if (unlikely(cache->rq_size + len >
> + PAGE_SIZE / sizeof(u32) - RELOC_TAIL)) {
> + err = reloc_gpu_chain(cache);
> + if (unlikely(err)) {
> + i915_request_set_error_once(cache->rq, err);
> + return ERR_PTR(err);
> + }
> + }
> +
> + GEM_BUG_ON(cache->rq_size + len >= PAGE_SIZE / sizeof(u32));
> cmd = cache->rq_cmd + cache->rq_size;
> cache->rq_size += len;
>
>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Regards,
Tvrtko
More information about the Intel-gfx
mailing list