[PATCH] i915/gvt: Stop tracking the pages of page table when failing to do shadow.

Wang, Zhi A zhi.a.wang at intel.com
Fri Dec 17 08:50:55 UTC 2021


On 12/17/2021 4:47 AM, Zhenyu Wang wrote:
> On 2021.12.16 15:29:00 -0500, Zhi Wang wrote:
>> The PPGTT page table inside a VM will be tracked when created. When a
>> tracked page is modified, GVT-g will update the shadow page table
>> accordingly.
>>
>> Sometimes, the guest will free a page belongs to a PPGTT page table, but
>> doesn't unbind the page from its upper level. So the page is still
> That's bad, right? Is there real case the guest driver might do this?

Hi Zhenyu, Yes. This patch is cooked for a key customer which solves the 
prob caused by the windows driver. It is reported that the problem can 
be easily re-produced in their environment and the problem will be fixed 
after applying this patch.


Thanks,

Zhi.

>> tracked. Later that page might be allocated to other clients, which causes
>> a flood of garbage traps. As the page has been used for other purpose,
>> doing the shadow on this page will always fail, which causes the error
>> "guest page write error".
>>
>> The patch will identify this case by counting the times of failure of
>> doing shadow on a tracked page. If the times of failure is larger than
>> 3, GVT-g will stop tracking the page and release the sub level of the
>> shadow pages accordingly.
>>
>> Signed-off-by: Zhi Wang <zhi.a.wang at intel.com>
>> ---
>>   drivers/gpu/drm/i915/gvt/gtt.c | 62 ++++++++++++++++++++++------------
>>   drivers/gpu/drm/i915/gvt/gtt.h |  1 +
>>   2 files changed, 41 insertions(+), 22 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c
>> index d7054bfb3e7d..fbfa5b8f1544 100644
>> --- a/drivers/gpu/drm/i915/gvt/gtt.c
>> +++ b/drivers/gpu/drm/i915/gvt/gtt.c
>> @@ -778,6 +778,21 @@ static void ppgtt_free_all_spt(struct intel_vgpu *vgpu)
>>   		ppgtt_free_spt(spt);
>>   }
>>   
>> +static int ppgtt_invalidate_spt(struct intel_vgpu_ppgtt_spt *spt, bool force);
>> +
>> +static void ppgtt_handle_invalid_spt(struct intel_vgpu_ppgtt_spt *spt)
>> +{
>> +	if (spt->fail_cnt > 3) {
>> +		ppgtt_invalidate_spt(spt, true);
>> +	} else
>> +		spt->fail_cnt++;
>> +}
>> +
>> +static void ppgtt_set_spt_valid(struct intel_vgpu_ppgtt_spt *spt)
>> +{
>> +	spt->fail_cnt = 0;
>> +}
>> +
>>   static int ppgtt_handle_guest_write_page_table_bytes(
>>   		struct intel_vgpu_ppgtt_spt *spt,
>>   		u64 pa, void *p_data, int bytes);
>> @@ -791,12 +806,18 @@ static int ppgtt_write_protection_handler(
>>   	int ret;
>>   
>>   	if (bytes != 4 && bytes != 8)
>> -		return -EINVAL;
>> +		goto invalid_spt;
>>   
>>   	ret = ppgtt_handle_guest_write_page_table_bytes(spt, gpa, data, bytes);
>>   	if (ret)
>> -		return ret;
>> -	return ret;
>> +		goto invalid_spt;
>> +
>> +	ppgtt_set_spt_valid(spt);
>> +	return 0;
>> +
>> +invalid_spt:
>> +	ppgtt_handle_invalid_spt(spt);
>> +	return 0;
>>   }
>>   
>>   /* Find a spt by guest gfn. */
>> @@ -941,10 +962,8 @@ static inline int ppgtt_put_spt(struct intel_vgpu_ppgtt_spt *spt)
>>   	return atomic_dec_return(&spt->refcount);
>>   }
>>   
>> -static int ppgtt_invalidate_spt(struct intel_vgpu_ppgtt_spt *spt);
>> -
>>   static int ppgtt_invalidate_spt_by_shadow_entry(struct intel_vgpu *vgpu,
>> -		struct intel_gvt_gtt_entry *e)
>> +		struct intel_gvt_gtt_entry *e, bool force)
>>   {
>>   	struct drm_i915_private *i915 = vgpu->gvt->gt->i915;
>>   	const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
>> @@ -973,11 +992,11 @@ static int ppgtt_invalidate_spt_by_shadow_entry(struct intel_vgpu *vgpu,
>>   	}
>>   	s = intel_vgpu_find_spt_by_mfn(vgpu, ops->get_pfn(e));
>>   	if (!s) {
>> -		gvt_vgpu_err("fail to find shadow page: mfn: 0x%lx\n",
>> +		gvt_dbg_mm("fail to find shadow page: mfn: 0x%lx\n",
>>   				ops->get_pfn(e));
>> -		return -ENXIO;
>> +		return 0;
>>   	}
>> -	return ppgtt_invalidate_spt(s);
>> +	return ppgtt_invalidate_spt(s, force);
>>   }
>>   
>>   static inline void ppgtt_invalidate_pte(struct intel_vgpu_ppgtt_spt *spt,
>> @@ -998,9 +1017,8 @@ static inline void ppgtt_invalidate_pte(struct intel_vgpu_ppgtt_spt *spt,
>>   	intel_gvt_hypervisor_dma_unmap_guest_page(vgpu, pfn << PAGE_SHIFT);
>>   }
>>   
>> -static int ppgtt_invalidate_spt(struct intel_vgpu_ppgtt_spt *spt)
>> +static int ppgtt_invalidate_spt(struct intel_vgpu_ppgtt_spt *spt, bool force)
>>   {
>> -	struct intel_vgpu *vgpu = spt->vgpu;
>>   	struct intel_gvt_gtt_entry e;
>>   	unsigned long index;
>>   	int ret;
>> @@ -1008,7 +1026,7 @@ static int ppgtt_invalidate_spt(struct intel_vgpu_ppgtt_spt *spt)
>>   	trace_spt_change(spt->vgpu->id, "die", spt,
>>   			spt->guest_page.gfn, spt->shadow_page.type);
>>   
>> -	if (ppgtt_put_spt(spt) > 0)
>> +	if (!force && ppgtt_put_spt(spt) > 0)
>>   		return 0;
>>   
>>   	for_each_present_shadow_entry(spt, &e, index) {
>> @@ -1032,7 +1050,7 @@ static int ppgtt_invalidate_spt(struct intel_vgpu_ppgtt_spt *spt)
>>   		case GTT_TYPE_PPGTT_PDE_ENTRY:
>>   			gvt_vdbg_mm("invalidate PMUL4/PDP/PDE entry\n");
>>   			ret = ppgtt_invalidate_spt_by_shadow_entry(
>> -					spt->vgpu, &e);
>> +					spt->vgpu, &e, force);
>>   			if (ret)
>>   				goto fail;
>>   			break;
>> @@ -1046,7 +1064,7 @@ static int ppgtt_invalidate_spt(struct intel_vgpu_ppgtt_spt *spt)
>>   	ppgtt_free_spt(spt);
>>   	return 0;
>>   fail:
>> -	gvt_vgpu_err("fail: shadow page %p shadow entry 0x%llx type %d\n",
>> +	gvt_dbg_mm("fail: shadow page %p shadow entry 0x%llx type %d\n",
>>   			spt, e.val64, e.type);
>>   	return ret;
>>   }
>> @@ -1196,7 +1214,7 @@ static int split_2MB_gtt_entry(struct intel_vgpu *vgpu,
>>   		ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu,
>>   				start_gfn + sub_index, PAGE_SIZE, &dma_addr);
>>   		if (ret) {
>> -			ppgtt_invalidate_spt(spt);
>> +			ppgtt_invalidate_spt(spt, false);
>>   			return ret;
>>   		}
>>   		sub_se.val64 = se->val64;
>> @@ -1371,11 +1389,11 @@ static int ppgtt_handle_guest_entry_removal(struct intel_vgpu_ppgtt_spt *spt,
>>   		struct intel_vgpu_ppgtt_spt *s =
>>   			intel_vgpu_find_spt_by_mfn(vgpu, ops->get_pfn(se));
>>   		if (!s) {
>> -			gvt_vgpu_err("fail to find guest page\n");
>> -			ret = -ENXIO;
>> +			gvt_dbg_mm("fail to find guest page\n");
>> +			ret = 0;
>>   			goto fail;
>>   		}
>> -		ret = ppgtt_invalidate_spt(s);
>> +		ret = ppgtt_invalidate_spt(s, false);
>>   		if (ret)
>>   			goto fail;
>>   	} else {
>> @@ -1387,7 +1405,7 @@ static int ppgtt_handle_guest_entry_removal(struct intel_vgpu_ppgtt_spt *spt,
>>   
>>   	return 0;
>>   fail:
>> -	gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n",
>> +	gvt_dbg_mm("fail: shadow page %p guest entry 0x%llx type %d\n",
>>   			spt, se->val64, se->type);
>>   	return ret;
>>   }
>> @@ -1422,7 +1440,7 @@ static int ppgtt_handle_guest_entry_add(struct intel_vgpu_ppgtt_spt *spt,
>>   	}
>>   	return 0;
>>   fail:
>> -	gvt_vgpu_err("fail: spt %p guest entry 0x%llx type %d\n",
>> +	gvt_dbg_mm("fail: spt %p guest entry 0x%llx type %d\n",
>>   		spt, we->val64, we->type);
>>   	return ret;
>>   }
>> @@ -1653,7 +1671,7 @@ static int ppgtt_handle_guest_write_page_table(
>>   
>>   	return 0;
>>   fail:
>> -	gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d.\n",
>> +	gvt_dbg_mm("fail: shadow page %p guest entry 0x%llx type %d.\n",
>>   			spt, we->val64, we->type);
>>   	return ret;
>>   }
>> @@ -1798,7 +1816,7 @@ static void invalidate_ppgtt_mm(struct intel_vgpu_mm *mm)
>>   		if (!ops->test_present(&se))
>>   			continue;
>>   
>> -		ppgtt_invalidate_spt_by_shadow_entry(vgpu, &se);
>> +		ppgtt_invalidate_spt_by_shadow_entry(vgpu, &se, false);
>>   		se.val64 = 0;
>>   		ppgtt_set_shadow_root_entry(mm, &se, index);
>>   
>> diff --git a/drivers/gpu/drm/i915/gvt/gtt.h b/drivers/gpu/drm/i915/gvt/gtt.h
>> index a3b0f59ec8bd..8b02ff5d9651 100644
>> --- a/drivers/gpu/drm/i915/gvt/gtt.h
>> +++ b/drivers/gpu/drm/i915/gvt/gtt.h
>> @@ -263,6 +263,7 @@ struct intel_vgpu_ppgtt_spt {
>>   	} guest_page;
>>   
>>   	DECLARE_BITMAP(post_shadow_bitmap, GTT_ENTRY_NUM_IN_ONE_PAGE);
>> +	unsigned long fail_cnt;
>>   	struct list_head post_shadow_list;
>>   };
>>   
>> -- 
>> 2.17.1
>>



More information about the intel-gvt-dev mailing list