[PATCH v5] drm/i915/gvt: skip populate shadow context if guest context not changed

Fri Apr 17 02:20:08 UTC 2020

> From: Zhao, Yan Y <yan.y.zhao at intel.com>
> Sent: Thursday, April 16, 2020 3:14 PM
> 
> Software is not expected to populate engine context except when using
> restore inhibit bit or golden state to initialize it for the first time.
> 
> Therefore, if a newly submitted guest context is the same as the last
> shadowed one, no need to populate its engine context from guest again.
> 
> Currently using lrca + ring_context_gpa to identify whether two guest
> contexts are the same.
> 
> The reason of why context id is not included as an identifier is that
> i915 recently changed the code and context id is only unique for a
> context when OA is enabled. And when OA is on, context id is generated
> based on lrca. Therefore, in that case, if two contexts are of the same
> lrca, they have identical context ids as well.
> (This patch also works with old guest kernel like 4.20.)
> 
> for guest context, if its ggtt entry is modified after last context
> shadowing, it is also deemed as not the same context as last shadowed one.
> 
> v5:
> merge all 3 patches into one patch  (Zhenyu Wang)
> 
> v4:
> - split the series into 3 patches.
> - don't turn on optimization until last patch in this series (Kevin Tian)
> - define lrca to be atomic in this patch rather than update its type in
> the second patch (Kevin Tian)
> 
> v3: updated commit message to describe engine context and context id
> clearly (Kevin Tian)
> v2: rebased to 5.6.0-rc4+Signed-off-by: Yan Zhao <yan.y.zhao at intel.com>
> 
> Cc: Kevin Tian <kevin.tian at intel.com>
> Suggested-by: Zhenyu Wang <zhenyuw at linux.intel.com>
> Signed-off-by: Yan Zhao <yan.y.zhao at intel.com>
> ---
>  drivers/gpu/drm/i915/gvt/gtt.c       | 17 ++++++++++++++
>  drivers/gpu/drm/i915/gvt/gvt.h       |  5 ++++
>  drivers/gpu/drm/i915/gvt/scheduler.c | 35 ++++++++++++++++++++++++----
>  3 files changed, 52 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c
> index 2a4b23f8aa74..128b24fe9a3b 100644
> --- a/drivers/gpu/drm/i915/gvt/gtt.c
> +++ b/drivers/gpu/drm/i915/gvt/gtt.c
> @@ -2341,12 +2341,29 @@ int intel_vgpu_emulate_ggtt_mmio_write(struct
> intel_vgpu *vgpu,
>  {
>  	const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
>  	int ret;
> +	struct intel_vgpu_submission *s = &vgpu->submission;
> +	struct intel_engine_cs *engine;
> +	int i;
> 
>  	if (bytes != 4 && bytes != 8)
>  		return -EINVAL;
> 
>  	off -= info->gtt_start_offset;
>  	ret = emulate_ggtt_mmio_write(vgpu, off, p_data, bytes);
> +
> +	/* if ggtt of last submitted context is written,
> +	 * that context is probably got unpinned.
> +	 * Set last shadowed ctx to invalid.
> +	 */
> +	for_each_engine(engine, vgpu->gvt->gt, i) {
> +		if (!s->last_ctx[i].valid)
> +			continue;
> +
> +		if (atomic_read(&s->last_ctx[i].lrca) ==
> +				off >> info->gtt_entry_size_shift) {
> +			s->last_ctx[i].valid = false;
> +		}
> +	}
>  	return ret;
>  }
> 
> diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h
> index 58c2c7932e3f..b342d7be741f 100644
> --- a/drivers/gpu/drm/i915/gvt/gvt.h
> +++ b/drivers/gpu/drm/i915/gvt/gvt.h
> @@ -163,6 +163,11 @@ struct intel_vgpu_submission {
>  	const struct intel_vgpu_submission_ops *ops;
>  	int virtual_submission_interface;
>  	bool active;
> +	struct {
> +		atomic_t lrca;
> +		bool valid;
> +		u64 ring_context_gpa;

Is there a reason why lrca is atomic_t while gpa is u64? From the
code they are always checked/updated together...

> +	} last_ctx[I915_NUM_ENGINES];
>  };
> 
>  struct intel_vgpu {
> diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c
> b/drivers/gpu/drm/i915/gvt/scheduler.c
> index f939ec3be39e..1f0b15b9a846 100644
> --- a/drivers/gpu/drm/i915/gvt/scheduler.c
> +++ b/drivers/gpu/drm/i915/gvt/scheduler.c
> @@ -135,7 +135,11 @@ static int populate_shadow_context(struct
> intel_vgpu_workload *workload)
>  	unsigned long context_gpa, context_page_num;
>  	unsigned long gpa_base; /* first gpa of consecutive GPAs */
>  	unsigned long gpa_size; /* size of consecutive GPAs */
> +	struct intel_vgpu_submission *s = &vgpu->submission;
>  	int i;
> +	bool skip = false;
> +	int ring_id = workload->engine->id;
> +	bool valid;
> 
>  	GEM_BUG_ON(!intel_context_is_pinned(ctx));
> 
> @@ -175,12 +179,29 @@ static int populate_shadow_context(struct
> intel_vgpu_workload *workload)
> 
>  	sr_oa_regs(workload, (u32 *)shadow_ring_context, false);
> 
> -	if (IS_RESTORE_INHIBIT(shadow_ring_context->ctx_ctrl.val))
> -		return 0;
> +	gvt_dbg_sched("ring %s workload lrca %x, ctx_id %x, ctx gpa %llx",
> +			workload->engine->name, workload->ctx_desc.lrca,
> +			workload->ctx_desc.context_id,
> +			workload->ring_context_gpa);
> 
> -	gvt_dbg_sched("ring %s workload lrca %x",
> -		      workload->engine->name,
> -		      workload->ctx_desc.lrca);
> +	/* only need to ensure this context is not pinned/unpinned during
> the
> +	 * period from last submission to this this submission.
> +	 * Upon reaching this function, the currently submitted context is not
> +	 * supposed to get unpinned. If a misbehaving guest driver ever does
> +	 * this, it would corrupt itself.
> +	 */
> +	valid = s->last_ctx[ring_id].valid;
> +	if (valid && (atomic_read(&s->last_ctx[ring_id].lrca) ==
> +				workload->ctx_desc.lrca) &&
> +			(s->last_ctx[ring_id].ring_context_gpa ==
> +				workload->ring_context_gpa))
> +		skip = true;
> +
> +	atomic_set(&s->last_ctx[ring_id].lrca, workload->ctx_desc.lrca);
> +	s->last_ctx[ring_id].ring_context_gpa = workload->ring_context_gpa;
> +
> +	if (IS_RESTORE_INHIBIT(shadow_ring_context->ctx_ctrl.val) || skip)
> +		return 0;
> 
>  	context_page_num = workload->engine->context_size;
>  	context_page_num = context_page_num >> PAGE_SHIFT;
> @@ -220,6 +241,8 @@ static int populate_shadow_context(struct
> intel_vgpu_workload *workload)
>  		gpa_size = I915_GTT_PAGE_SIZE;
>  		dst = context_base + (i << I915_GTT_PAGE_SHIFT);
>  	}

You missed one thing. There could be error captured when copying the
guest context:

                if (context_gpa == INTEL_GVT_INVALID_ADDR) {
                        gvt_vgpu_err("Invalid guest context descriptor\n");
                        return -EFAULT;
                }

In that case we should clear the valid bit in case that it is already
valid recording a different lrca before this submission. 

> +	if (!valid)
> +		s->last_ctx[ring_id].valid = true;
>  	return 0;
>  }
> 
> @@ -1297,6 +1320,8 @@ int intel_vgpu_setup_submission(struct intel_vgpu
> *vgpu)
>  	atomic_set(&s->running_workload_num, 0);
>  	bitmap_zero(s->tlb_handle_pending, I915_NUM_ENGINES);
> 
> +	memset(s->last_ctx, 0, sizeof(s->last_ctx));
> +
>  	i915_vm_put(&ppgtt->vm);
>  	return 0;
> 
> --
> 2.17.1