[Intel-gfx] [PATCH v2 7/9] drm/i915/gt: Pipelined page migration

Wed Jun 9 12:48:15 UTC 2021

On 09/06/2021 07:34, Thomas Hellström wrote:
> From: Chris Wilson <chris at chris-wilson.co.uk>
> 
> If we pipeline the PTE updates and then do the copy of those pages
> within a single unpreemptible command packet, we can submit the copies
> and leave them to be scheduled without having to synchronously wait
> under a global lock. In order to manage migration, we need to
> preallocate the page tables (and keep them pinned and available for use
> at any time), causing a bottleneck for migrations as all clients must
> contend on the limited resources. By inlining the ppGTT updates and
> performing the blit atomically, each client only owns the PTE while in
> use, and so we can reschedule individual operations however we see fit.
> And most importantly, we do not need to take a global lock on the shared
> vm, and wait until the operation is complete before releasing the lock
> for others to claim the PTE for themselves.
> 
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Co-developed-by: Thomas Hellström <thomas.hellstrom at linux.intel.com>
> Signed-off-by: Thomas Hellström <thomas.hellstrom at linux.intel.com>
> ---
> v2:
> - Add a TODO for huge LMEM ptes (Pointed out by Matthew Auld)
> - Use intel_engine_destroy_pinned_context() to properly take the pinned
>    context timeline off the engine list. (CI warning).
> ---

<snip>

> +
> +int
> +intel_context_migrate_copy(struct intel_context *ce,
> +			   struct dma_fence *await,
> +			   struct scatterlist *src,
> +			   enum i915_cache_level src_cache_level,
> +			   bool src_is_lmem,
> +			   struct scatterlist *dst,
> +			   enum i915_cache_level dst_cache_level,
> +			   bool dst_is_lmem,
> +			   struct i915_request **out)
> +{
> +	struct sgt_dma it_src = sg_sgt(src), it_dst = sg_sgt(dst);
> +	struct i915_request *rq;
> +	int err;
> +
> +	*out = NULL;
> +
> +	/* GEM_BUG_ON(ce->vm != migrate_vm); */

Should we drop this?

> +
> +	GEM_BUG_ON(ce->ring->size < SZ_64K);
> +
> +	do {
> +		int len;
> +
> +		rq = i915_request_create(ce);
> +		if (IS_ERR(rq)) {
> +			err = PTR_ERR(rq);
> +			goto out_ce;
> +		}
> +
> +		if (await) {
> +			err = i915_request_await_dma_fence(rq, await);
> +			if (err)
> +				goto out_rq;
> +
> +			if (rq->engine->emit_init_breadcrumb) {
> +				err = rq->engine->emit_init_breadcrumb(rq);
> +				if (err)
> +					goto out_rq;
> +			}
> +
> +			await = NULL;
> +		}
> +
> +		/* The PTE updates + copy must not be interrupted. */
> +		err = emit_no_arbitration(rq);
> +		if (err)
> +			goto out_rq;
> +
> +		len = emit_pte(rq, &it_src, src_cache_level, src_is_lmem, 0,
> +			       CHUNK_SZ);
> +		if (len <= 0) {
> +			err = len;
> +			goto out_rq;
> +		}
> +
> +		err = emit_pte(rq, &it_dst, dst_cache_level, dst_is_lmem,
> +			       CHUNK_SZ, len);
> +		if (err < 0)
> +			goto out_rq;
> +		if (err < len) {
> +			err = -EINVAL;
> +			goto out_rq;
> +		}
> +
> +		err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
> +		if (err)
> +			goto out_rq;
> +
> +		err = emit_copy(rq, len);
> +
> +		/* Arbitration is re-enabled between requests. */
> +out_rq:
> +		if (*out)
> +			i915_request_put(*out);
> +		*out = i915_request_get(rq);
> +		i915_request_add(rq);
> +		if (err || !it_src.sg || !sg_dma_len(it_src.sg))
> +			break;
> +
> +		cond_resched();
> +	} while (1);
> +
> +out_ce:
> +	return err;
> +}
> +