[PATCH v3 3/3] drm/i915/gvt: init mmio by lri command in vgpu inhibit context

Tue Feb 13 04:58:01 UTC 2018

On 2/12/2018 4:16 PM, Tian, Kevin wrote:
>> From: Weinan Li
>> Sent: Monday, February 12, 2018 3:08 PM
>>
>> There is one issue relates to Coarse Power Gating(CPG) on KBL NUC in GVT-
>> g,
>> vgpu can't get the correct default context by updating the registers before
>> inhibit context submission. It always get back the hardware default value
>> except the inhibit context submission happened before the 1st time
> except -> unless
>
>> forcewake put. With this wrong default context, vgpu will run with
>> incorrect state and meet unknown issues.
>>
>> The solution is initialize these mmios by adding lri command in ring buffer
>> of the inhibit context, then gpu hardware has no chance to go down RC6,
> to be accurate - hardware has no chance to go down RC6 >>when lri commands
> are right being executed<<
>
>
>> vgpu can get correct default context for further use.
>>
>> v3: fix code fault, use 'for' to loop through mmio render list(Zhenyu)
>>
>> Cc: Zhenyu Wang <zhenyuw at linux.intel.com>
>> Signed-off-by: Weinan Li <weinan.z.li at intel.com>
>> ---
>>   drivers/gpu/drm/i915/gvt/mmio_context.c | 160
>> ++++++++++++++++++++++++++++++++
>>   drivers/gpu/drm/i915/gvt/mmio_context.h |   3 +
>>   drivers/gpu/drm/i915/gvt/scheduler.c    |  11 +++
>>   3 files changed, 174 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c
>> b/drivers/gpu/drm/i915/gvt/mmio_context.c
>> index 62a43b0..8d441c4 100644
>> --- a/drivers/gpu/drm/i915/gvt/mmio_context.c
>> +++ b/drivers/gpu/drm/i915/gvt/mmio_context.c
>> @@ -188,6 +188,155 @@ static void load_render_mocs(struct
>> drm_i915_private *dev_priv)
>>   	gen9_render_mocs.initialized = true;
>>   }
>>
>> +static int intel_vgpu_emit_context_mmio(struct intel_vgpu *vgpu,
>> +					struct drm_i915_gem_request *req)
> as static function you don't need "intel_vgpu". Also the name is too
> general while it's only used for inhibit context.
>
> what about restore_basic_context_for_inhibit? (not sure whether 'basic'
> is a good name here)
restore_context_mmio_for_inhibit? the purpose is restore the mmio which 
is contained in context state image.
>> +{
>> +	u32 *cs;
>> +	int ret, ring_id, count = 0;
>> +	struct engine_mmio *mmio;
>> +	struct drm_i915_private *dev_priv = req->i915;
>> +
>> +	ring_id = req->engine->id;
>> +
>> +	if (!IS_KABYLAKE(dev_priv) ||
>> +	    !is_inhibit_context(req->ctx, ring_id))
>> +		return -EINVAL;
>> +
>> +	for (mmio = dev_priv->gvt->engine_mmio_list;
>> +	     i915_mmio_reg_valid(mmio->reg); mmio++) {
>> +		if (mmio->ring_id == ring_id && mmio->in_context)
>> +			count++;
>> +	}
> above is fixed knowledge given a sku and ring_id. better not
> repeat it for every inhibit context.
>
> it's cleaner to add a per-sku callback pointer, which is set to above
> function only when both KBL and count is valid. Then at run-time
> you just check callback pointer to call.
move it into intel_gvt_init_engine_mmio_context and save the count in 
gvt struct.
>> +
>> +	if (!count)
>> +		return 0;
>> +
>> +	ret = req->engine->emit_flush(req, EMIT_BARRIER);
>> +	if (ret)
>> +		return ret;
>> +
>> +	cs = intel_ring_begin(req, count * 2 + 2);
>> +	if (IS_ERR(cs))
>> +		return PTR_ERR(cs);
>> +
>> +	*cs++ = MI_LOAD_REGISTER_IMM(count);
>> +	for (mmio = dev_priv->gvt->engine_mmio_list;
>> +	     i915_mmio_reg_valid(mmio->reg); mmio++) {
>> +		if (mmio->ring_id != ring_id ||
>> +		    !mmio->in_context)
>> +			continue;
>> +
>> +		*cs++ = i915_mmio_reg_offset(mmio->reg);
>> +		*cs++ = vgpu_vreg_t(vgpu, mmio->reg) |
>> +				(mmio->mask << 16);
>> +		gvt_dbg_core("add lri reg pair 0x%x:0x%x in inhibit ctx,
>> vgpu:%d, rind_id:%d\n",
>> +			      *(cs-2), *(cs-1), vgpu->id, ring_id);
>> +	}
>> +
>> +	*cs++ = MI_NOOP;
>> +	intel_ring_advance(req, cs);
>> +
>> +	ret = req->engine->emit_flush(req, EMIT_BARRIER);
>> +	if (ret)
>> +		return ret;
>> +
>> +	return 0;
>> +}
>> +
>> +static int intel_vgpu_emit_render_mocs_control(struct intel_vgpu *vgpu,
>> +					       struct drm_i915_gem_request
>> *req)
> restore_render_mocs_control_for_inhibit
>
>> +{
>> +	unsigned int index;
>> +	u32 *cs;
>> +
>> +	cs = intel_ring_begin(req, 2 * GEN9_MOCS_SIZE + 2);
>> +	if (IS_ERR(cs))
>> +		return PTR_ERR(cs);
>> +
>> +	*cs++ = MI_LOAD_REGISTER_IMM(GEN9_MOCS_SIZE);
>> +
>> +	for (index = 0; index < GEN9_MOCS_SIZE; index++) {
>> +		*cs++ = i915_mmio_reg_offset(GEN9_GFX_MOCS(index));
>> +		*cs++ = vgpu_vreg_t(vgpu, GEN9_GFX_MOCS(index));
>> +		gvt_dbg_core("add lri reg pair 0x%x:0x%x in inhibit ctx,
>> vgpu:%d, rind_id:%d\n",
>> +			      *(cs-2), *(cs-1), vgpu->id, req->engine->id);
>> +
>> +	}
>> +
>> +	*cs++ = MI_NOOP;
>> +	intel_ring_advance(req, cs);
>> +
>> +	return 0;
>> +}
>> +
>> +static int intel_vgpu_emit_render_mocs_l3cc(struct intel_vgpu *vgpu,
>> +					    struct drm_i915_gem_request
>> *req)
> restore_render_mocs_l3cc_for_inhibit
>
>> +{
>> +	unsigned int index;
>> +	u32 *cs;
>> +
>> +	cs = intel_ring_begin(req, 2 * GEN9_MOCS_SIZE / 2 + 2);
>> +	if (IS_ERR(cs))
>> +		return PTR_ERR(cs);
>> +
>> +	*cs++ = MI_LOAD_REGISTER_IMM(GEN9_MOCS_SIZE / 2);
>> +
>> +	for (index = 0; index < GEN9_MOCS_SIZE / 2; index++) {
>> +		*cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(index));
>> +		*cs++ = vgpu_vreg_t(vgpu, GEN9_LNCFCMOCS(index));
>> +		gvt_dbg_core("add lri reg pair 0x%x:0x%x in inhibit ctx,
>> vgpu:%d, rind_id:%d\n",
>> +			      *(cs-2), *(cs-1), vgpu->id, req->engine->id);
>> +
>> +	}
>> +
>> +	*cs++ = MI_NOOP;
>> +	intel_ring_advance(req, cs);
>> +
>> +	return 0;
>> +}
>> +
>> +int intel_vgpu_context_mmio_emit(struct intel_vgpu *vgpu,
>> +				 struct drm_i915_gem_request *req)
> intel_vgpu_restore_inhibit_context
>
>> +{
>> +	int ret;
>> +	u32 *cs;
>> +
>> +	cs = intel_ring_begin(req, 2);
>> +	if (IS_ERR(cs))
>> +		return PTR_ERR(cs);
>> +
>> +	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
>> +	*cs++ = MI_NOOP;
>> +	intel_ring_advance(req, cs);
>> +
>> +	ret = intel_vgpu_emit_context_mmio(vgpu, req);
>> +	if (ret)
>> +		goto out;
>> +
>> +	/* no MOCS register in context except render engine */
>> +	if (req->engine->id != RCS)
>> +		goto out;
>> +
>> +	ret = intel_vgpu_emit_render_mocs_control(vgpu, req);
>> +	if (ret)
>> +		goto out;
>> +
>> +	ret = intel_vgpu_emit_render_mocs_l3cc(vgpu, req);
>> +	if (ret)
>> +		goto out;
>> +
>> +out:
>> +	cs = intel_ring_begin(req, 2);
>> +	if (IS_ERR(cs))
>> +		return PTR_ERR(cs);
>> +
>> +	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
>> +	*cs++ = MI_NOOP;
> a curious question. If a preemption happens at this point, will we
> get correct state recovered when re-scheduled-in?
hardware should have policy to do save/restore, still need confirmation 
from hardware team.
>> +	intel_ring_advance(req, cs);
>> +
>> +	return ret;
>> +}
>> +
>>   static void handle_tlb_pending_event(struct intel_vgpu *vgpu, int ring_id)
>>   {
>>   	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
>> @@ -254,6 +403,9 @@ static void switch_mocs(struct intel_vgpu *pre,
>> struct intel_vgpu *next,
>>   	if (WARN_ON(ring_id >= ARRAY_SIZE(regs)))
>>   		return;
>>
>> +	if (ring_id == RCS)
>> +		return;
>> +
>>   	if (!pre && !gen9_render_mocs.initialized)
>>   		load_render_mocs(dev_priv);
>>
>> @@ -324,6 +476,14 @@ static void switch_mmio(struct intel_vgpu *pre,
>>   	     i915_mmio_reg_valid(mmio->reg); mmio++) {
>>   		if (mmio->ring_id != ring_id)
>>   			continue;
>> +		/*
>> +		 * Update the mmio which in context during inhibit context
>> +		 * initialization on Kablake platform, here don't need to do
>> +		 * save/restore.
> above is not clear.
what about "No need to do save or restore of the mmio which is in 
context state image on kabylake, it's initialized by lri command and 
save or restore with context together."
>> +		 */
>> +		if (IS_KABYLAKE(dev_priv) && mmio->in_context)
>> +			continue;
>> +
>>   		// save
>>   		if (pre) {
>>   			vgpu_vreg_t(pre, mmio->reg) =
>> I915_READ_FW(mmio->reg);
>> diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.h
>> b/drivers/gpu/drm/i915/gvt/mmio_context.h
>> index 4df87c7..e3fb82b 100644
>> --- a/drivers/gpu/drm/i915/gvt/mmio_context.h
>> +++ b/drivers/gpu/drm/i915/gvt/mmio_context.h
>> @@ -51,4 +51,7 @@ void intel_gvt_switch_mmio(struct intel_vgpu *pre,
>>
>>   bool is_inhibit_context(struct i915_gem_context *ctx, int ring_id);
>>
>> +int intel_vgpu_context_mmio_emit(struct intel_vgpu *vgpu,
>> +				 struct drm_i915_gem_request *req);
>> +
>>   #endif
>> diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c
>> b/drivers/gpu/drm/i915/gvt/scheduler.c
>> index 9a64bd0..040d7a6 100644
>> --- a/drivers/gpu/drm/i915/gvt/scheduler.c
>> +++ b/drivers/gpu/drm/i915/gvt/scheduler.c
>> @@ -225,6 +225,17 @@ static int copy_workload_to_ring_buffer(struct
>> intel_vgpu_workload *workload)
>>   	struct intel_vgpu *vgpu = workload->vgpu;
>>   	void *shadow_ring_buffer_va;
>>   	u32 *cs;
>> +	struct drm_i915_gem_request *req = workload->req;
>> +
>> +	/*
>> +	 * So far only emit init mmio pair in inhibit context on KBL
>> +	 * platform, since there is CPG issue on KBL NUC, vgpu can't get
>> +	 * correct default context by restoring mmio before inhibit context
> again above talks about callee behavior
remove this comment and add the function description.
>> +	 * submission. It should be also workable on other platforms.
> "should also work" then do you plan to extend to all platforms?
i propose to extend to all platforms if there is no regression on KBL.
>> +	 */
>> +	if (IS_KABYLAKE(req->i915) &&
>> +	    is_inhibit_context(req->ctx, req->engine->id))
>> +		intel_vgpu_context_mmio_emit(vgpu, workload->req);
>>
>>   	/* allocate shadow ring buffer */
>>   	cs = intel_ring_begin(workload->req, workload->rb_len /
>> sizeof(u32));
>> --
>> 1.9.1
>>
>> _______________________________________________
>> intel-gvt-dev mailing list
>> intel-gvt-dev at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/intel-gvt-dev