[PATCH v3 3/3] drm/i915/gvt: init mmio by lri command in vgpu inhibit context

Mon Feb 12 08:16:31 UTC 2018

> From: Weinan Li
> Sent: Monday, February 12, 2018 3:08 PM
> 
> There is one issue relates to Coarse Power Gating(CPG) on KBL NUC in GVT-
> g,
> vgpu can't get the correct default context by updating the registers before
> inhibit context submission. It always get back the hardware default value
> except the inhibit context submission happened before the 1st time

except -> unless

> forcewake put. With this wrong default context, vgpu will run with
> incorrect state and meet unknown issues.
> 
> The solution is initialize these mmios by adding lri command in ring buffer
> of the inhibit context, then gpu hardware has no chance to go down RC6,

to be accurate - hardware has no chance to go down RC6 >>when lri commands
are right being executed<<

> vgpu can get correct default context for further use.
> 
> v3: fix code fault, use 'for' to loop through mmio render list(Zhenyu)
> 
> Cc: Zhenyu Wang <zhenyuw at linux.intel.com>
> Signed-off-by: Weinan Li <weinan.z.li at intel.com>
> ---
>  drivers/gpu/drm/i915/gvt/mmio_context.c | 160
> ++++++++++++++++++++++++++++++++
>  drivers/gpu/drm/i915/gvt/mmio_context.h |   3 +
>  drivers/gpu/drm/i915/gvt/scheduler.c    |  11 +++
>  3 files changed, 174 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c
> b/drivers/gpu/drm/i915/gvt/mmio_context.c
> index 62a43b0..8d441c4 100644
> --- a/drivers/gpu/drm/i915/gvt/mmio_context.c
> +++ b/drivers/gpu/drm/i915/gvt/mmio_context.c
> @@ -188,6 +188,155 @@ static void load_render_mocs(struct
> drm_i915_private *dev_priv)
>  	gen9_render_mocs.initialized = true;
>  }
> 
> +static int intel_vgpu_emit_context_mmio(struct intel_vgpu *vgpu,
> +					struct drm_i915_gem_request *req)

as static function you don't need "intel_vgpu". Also the name is too
general while it's only used for inhibit context.

what about restore_basic_context_for_inhibit? (not sure whether 'basic'
is a good name here)

> +{
> +	u32 *cs;
> +	int ret, ring_id, count = 0;
> +	struct engine_mmio *mmio;
> +	struct drm_i915_private *dev_priv = req->i915;
> +
> +	ring_id = req->engine->id;
> +
> +	if (!IS_KABYLAKE(dev_priv) ||
> +	    !is_inhibit_context(req->ctx, ring_id))
> +		return -EINVAL;
> +
> +	for (mmio = dev_priv->gvt->engine_mmio_list;
> +	     i915_mmio_reg_valid(mmio->reg); mmio++) {
> +		if (mmio->ring_id == ring_id && mmio->in_context)
> +			count++;
> +	}

above is fixed knowledge given a sku and ring_id. better not 
repeat it for every inhibit context.

it's cleaner to add a per-sku callback pointer, which is set to above
function only when both KBL and count is valid. Then at run-time
you just check callback pointer to call.

> +
> +	if (!count)
> +		return 0;
> +
> +	ret = req->engine->emit_flush(req, EMIT_BARRIER);
> +	if (ret)
> +		return ret;
> +
> +	cs = intel_ring_begin(req, count * 2 + 2);
> +	if (IS_ERR(cs))
> +		return PTR_ERR(cs);
> +
> +	*cs++ = MI_LOAD_REGISTER_IMM(count);
> +	for (mmio = dev_priv->gvt->engine_mmio_list;
> +	     i915_mmio_reg_valid(mmio->reg); mmio++) {
> +		if (mmio->ring_id != ring_id ||
> +		    !mmio->in_context)
> +			continue;
> +
> +		*cs++ = i915_mmio_reg_offset(mmio->reg);
> +		*cs++ = vgpu_vreg_t(vgpu, mmio->reg) |
> +				(mmio->mask << 16);
> +		gvt_dbg_core("add lri reg pair 0x%x:0x%x in inhibit ctx,
> vgpu:%d, rind_id:%d\n",
> +			      *(cs-2), *(cs-1), vgpu->id, ring_id);
> +	}
> +
> +	*cs++ = MI_NOOP;
> +	intel_ring_advance(req, cs);
> +
> +	ret = req->engine->emit_flush(req, EMIT_BARRIER);
> +	if (ret)
> +		return ret;
> +
> +	return 0;
> +}
> +
> +static int intel_vgpu_emit_render_mocs_control(struct intel_vgpu *vgpu,
> +					       struct drm_i915_gem_request
> *req)

restore_render_mocs_control_for_inhibit

> +{
> +	unsigned int index;
> +	u32 *cs;
> +
> +	cs = intel_ring_begin(req, 2 * GEN9_MOCS_SIZE + 2);
> +	if (IS_ERR(cs))
> +		return PTR_ERR(cs);
> +
> +	*cs++ = MI_LOAD_REGISTER_IMM(GEN9_MOCS_SIZE);
> +
> +	for (index = 0; index < GEN9_MOCS_SIZE; index++) {
> +		*cs++ = i915_mmio_reg_offset(GEN9_GFX_MOCS(index));
> +		*cs++ = vgpu_vreg_t(vgpu, GEN9_GFX_MOCS(index));
> +		gvt_dbg_core("add lri reg pair 0x%x:0x%x in inhibit ctx,
> vgpu:%d, rind_id:%d\n",
> +			      *(cs-2), *(cs-1), vgpu->id, req->engine->id);
> +
> +	}
> +
> +	*cs++ = MI_NOOP;
> +	intel_ring_advance(req, cs);
> +
> +	return 0;
> +}
> +
> +static int intel_vgpu_emit_render_mocs_l3cc(struct intel_vgpu *vgpu,
> +					    struct drm_i915_gem_request
> *req)

restore_render_mocs_l3cc_for_inhibit

> +{
> +	unsigned int index;
> +	u32 *cs;
> +
> +	cs = intel_ring_begin(req, 2 * GEN9_MOCS_SIZE / 2 + 2);
> +	if (IS_ERR(cs))
> +		return PTR_ERR(cs);
> +
> +	*cs++ = MI_LOAD_REGISTER_IMM(GEN9_MOCS_SIZE / 2);
> +
> +	for (index = 0; index < GEN9_MOCS_SIZE / 2; index++) {
> +		*cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(index));
> +		*cs++ = vgpu_vreg_t(vgpu, GEN9_LNCFCMOCS(index));
> +		gvt_dbg_core("add lri reg pair 0x%x:0x%x in inhibit ctx,
> vgpu:%d, rind_id:%d\n",
> +			      *(cs-2), *(cs-1), vgpu->id, req->engine->id);
> +
> +	}
> +
> +	*cs++ = MI_NOOP;
> +	intel_ring_advance(req, cs);
> +
> +	return 0;
> +}
> +
> +int intel_vgpu_context_mmio_emit(struct intel_vgpu *vgpu,
> +				 struct drm_i915_gem_request *req)

intel_vgpu_restore_inhibit_context

> +{
> +	int ret;
> +	u32 *cs;
> +
> +	cs = intel_ring_begin(req, 2);
> +	if (IS_ERR(cs))
> +		return PTR_ERR(cs);
> +
> +	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
> +	*cs++ = MI_NOOP;
> +	intel_ring_advance(req, cs);
> +
> +	ret = intel_vgpu_emit_context_mmio(vgpu, req);
> +	if (ret)
> +		goto out;
> +
> +	/* no MOCS register in context except render engine */
> +	if (req->engine->id != RCS)
> +		goto out;
> +
> +	ret = intel_vgpu_emit_render_mocs_control(vgpu, req);
> +	if (ret)
> +		goto out;
> +
> +	ret = intel_vgpu_emit_render_mocs_l3cc(vgpu, req);
> +	if (ret)
> +		goto out;
> +
> +out:
> +	cs = intel_ring_begin(req, 2);
> +	if (IS_ERR(cs))
> +		return PTR_ERR(cs);
> +
> +	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
> +	*cs++ = MI_NOOP;

a curious question. If a preemption happens at this point, will we
get correct state recovered when re-scheduled-in?

> +	intel_ring_advance(req, cs);
> +
> +	return ret;
> +}
> +
>  static void handle_tlb_pending_event(struct intel_vgpu *vgpu, int ring_id)
>  {
>  	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
> @@ -254,6 +403,9 @@ static void switch_mocs(struct intel_vgpu *pre,
> struct intel_vgpu *next,
>  	if (WARN_ON(ring_id >= ARRAY_SIZE(regs)))
>  		return;
> 
> +	if (ring_id == RCS)
> +		return;
> +
>  	if (!pre && !gen9_render_mocs.initialized)
>  		load_render_mocs(dev_priv);
> 
> @@ -324,6 +476,14 @@ static void switch_mmio(struct intel_vgpu *pre,
>  	     i915_mmio_reg_valid(mmio->reg); mmio++) {
>  		if (mmio->ring_id != ring_id)
>  			continue;
> +		/*
> +		 * Update the mmio which in context during inhibit context
> +		 * initialization on Kablake platform, here don't need to do
> +		 * save/restore.

above is not clear.

> +		 */
> +		if (IS_KABYLAKE(dev_priv) && mmio->in_context)
> +			continue;
> +
>  		// save
>  		if (pre) {
>  			vgpu_vreg_t(pre, mmio->reg) =
> I915_READ_FW(mmio->reg);
> diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.h
> b/drivers/gpu/drm/i915/gvt/mmio_context.h
> index 4df87c7..e3fb82b 100644
> --- a/drivers/gpu/drm/i915/gvt/mmio_context.h
> +++ b/drivers/gpu/drm/i915/gvt/mmio_context.h
> @@ -51,4 +51,7 @@ void intel_gvt_switch_mmio(struct intel_vgpu *pre,
> 
>  bool is_inhibit_context(struct i915_gem_context *ctx, int ring_id);
> 
> +int intel_vgpu_context_mmio_emit(struct intel_vgpu *vgpu,
> +				 struct drm_i915_gem_request *req);
> +
>  #endif
> diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c
> b/drivers/gpu/drm/i915/gvt/scheduler.c
> index 9a64bd0..040d7a6 100644
> --- a/drivers/gpu/drm/i915/gvt/scheduler.c
> +++ b/drivers/gpu/drm/i915/gvt/scheduler.c
> @@ -225,6 +225,17 @@ static int copy_workload_to_ring_buffer(struct
> intel_vgpu_workload *workload)
>  	struct intel_vgpu *vgpu = workload->vgpu;
>  	void *shadow_ring_buffer_va;
>  	u32 *cs;
> +	struct drm_i915_gem_request *req = workload->req;
> +
> +	/*
> +	 * So far only emit init mmio pair in inhibit context on KBL
> +	 * platform, since there is CPG issue on KBL NUC, vgpu can't get
> +	 * correct default context by restoring mmio before inhibit context

again above talks about callee behavior

> +	 * submission. It should be also workable on other platforms.

"should also work" then do you plan to extend to all platforms? 

> +	 */
> +	if (IS_KABYLAKE(req->i915) &&
> +	    is_inhibit_context(req->ctx, req->engine->id))
> +		intel_vgpu_context_mmio_emit(vgpu, workload->req);
> 
>  	/* allocate shadow ring buffer */
>  	cs = intel_ring_begin(workload->req, workload->rb_len /
> sizeof(u32));
> --
> 1.9.1
> 
> _______________________________________________
> intel-gvt-dev mailing list
> intel-gvt-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gvt-dev