[PATCH] i915/gvt: Stop tracking the pages of page table when failing to do shadow.

Zhenyu Wang zhenyuw at linux.intel.com
Mon Dec 20 06:31:18 UTC 2021


On 2021.12.17 08:50:55 +0000, Wang, Zhi A wrote:
> On 12/17/2021 4:47 AM, Zhenyu Wang wrote:
> > On 2021.12.16 15:29:00 -0500, Zhi Wang wrote:
> >> The PPGTT page table inside a VM will be tracked when created. When a
> >> tracked page is modified, GVT-g will update the shadow page table
> >> accordingly.
> >>
> >> Sometimes, the guest will free a page belongs to a PPGTT page table, but
> >> doesn't unbind the page from its upper level. So the page is still
> > That's bad, right? Is there real case the guest driver might do this?
> 
> Hi Zhenyu, Yes. This patch is cooked for a key customer which solves the 
> prob caused by the windows driver. It is reported that the problem can 
> be easily re-produced in their environment and the problem will be fixed 
> after applying this patch.
> 

I think we may try best to do sanity check of shadow pt modification with
quick fail back to invalidate, three times strikes doesn't seem good to me..

btw, you may split the change to convert from error message to debug info.

thanks

> 
> >> tracked. Later that page might be allocated to other clients, which causes
> >> a flood of garbage traps. As the page has been used for other purpose,
> >> doing the shadow on this page will always fail, which causes the error
> >> "guest page write error".
> >>
> >> The patch will identify this case by counting the times of failure of
> >> doing shadow on a tracked page. If the times of failure is larger than
> >> 3, GVT-g will stop tracking the page and release the sub level of the
> >> shadow pages accordingly.
> >>
> >> Signed-off-by: Zhi Wang <zhi.a.wang at intel.com>
> >> ---
> >>   drivers/gpu/drm/i915/gvt/gtt.c | 62 ++++++++++++++++++++++------------
> >>   drivers/gpu/drm/i915/gvt/gtt.h |  1 +
> >>   2 files changed, 41 insertions(+), 22 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c
> >> index d7054bfb3e7d..fbfa5b8f1544 100644
> >> --- a/drivers/gpu/drm/i915/gvt/gtt.c
> >> +++ b/drivers/gpu/drm/i915/gvt/gtt.c
> >> @@ -778,6 +778,21 @@ static void ppgtt_free_all_spt(struct intel_vgpu *vgpu)
> >>   		ppgtt_free_spt(spt);
> >>   }
> >>   
> >> +static int ppgtt_invalidate_spt(struct intel_vgpu_ppgtt_spt *spt, bool force);
> >> +
> >> +static void ppgtt_handle_invalid_spt(struct intel_vgpu_ppgtt_spt *spt)
> >> +{
> >> +	if (spt->fail_cnt > 3) {
> >> +		ppgtt_invalidate_spt(spt, true);
> >> +	} else
> >> +		spt->fail_cnt++;
> >> +}
> >> +
> >> +static void ppgtt_set_spt_valid(struct intel_vgpu_ppgtt_spt *spt)
> >> +{
> >> +	spt->fail_cnt = 0;
> >> +}
> >> +
> >>   static int ppgtt_handle_guest_write_page_table_bytes(
> >>   		struct intel_vgpu_ppgtt_spt *spt,
> >>   		u64 pa, void *p_data, int bytes);
> >> @@ -791,12 +806,18 @@ static int ppgtt_write_protection_handler(
> >>   	int ret;
> >>   
> >>   	if (bytes != 4 && bytes != 8)
> >> -		return -EINVAL;
> >> +		goto invalid_spt;
> >>   
> >>   	ret = ppgtt_handle_guest_write_page_table_bytes(spt, gpa, data, bytes);
> >>   	if (ret)
> >> -		return ret;
> >> -	return ret;
> >> +		goto invalid_spt;
> >> +
> >> +	ppgtt_set_spt_valid(spt);
> >> +	return 0;
> >> +
> >> +invalid_spt:
> >> +	ppgtt_handle_invalid_spt(spt);
> >> +	return 0;
> >>   }
> >>   
> >>   /* Find a spt by guest gfn. */
> >> @@ -941,10 +962,8 @@ static inline int ppgtt_put_spt(struct intel_vgpu_ppgtt_spt *spt)
> >>   	return atomic_dec_return(&spt->refcount);
> >>   }
> >>   
> >> -static int ppgtt_invalidate_spt(struct intel_vgpu_ppgtt_spt *spt);
> >> -
> >>   static int ppgtt_invalidate_spt_by_shadow_entry(struct intel_vgpu *vgpu,
> >> -		struct intel_gvt_gtt_entry *e)
> >> +		struct intel_gvt_gtt_entry *e, bool force)
> >>   {
> >>   	struct drm_i915_private *i915 = vgpu->gvt->gt->i915;
> >>   	const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
> >> @@ -973,11 +992,11 @@ static int ppgtt_invalidate_spt_by_shadow_entry(struct intel_vgpu *vgpu,
> >>   	}
> >>   	s = intel_vgpu_find_spt_by_mfn(vgpu, ops->get_pfn(e));
> >>   	if (!s) {
> >> -		gvt_vgpu_err("fail to find shadow page: mfn: 0x%lx\n",
> >> +		gvt_dbg_mm("fail to find shadow page: mfn: 0x%lx\n",
> >>   				ops->get_pfn(e));
> >> -		return -ENXIO;
> >> +		return 0;
> >>   	}
> >> -	return ppgtt_invalidate_spt(s);
> >> +	return ppgtt_invalidate_spt(s, force);
> >>   }
> >>   
> >>   static inline void ppgtt_invalidate_pte(struct intel_vgpu_ppgtt_spt *spt,
> >> @@ -998,9 +1017,8 @@ static inline void ppgtt_invalidate_pte(struct intel_vgpu_ppgtt_spt *spt,
> >>   	intel_gvt_hypervisor_dma_unmap_guest_page(vgpu, pfn << PAGE_SHIFT);
> >>   }
> >>   
> >> -static int ppgtt_invalidate_spt(struct intel_vgpu_ppgtt_spt *spt)
> >> +static int ppgtt_invalidate_spt(struct intel_vgpu_ppgtt_spt *spt, bool force)
> >>   {
> >> -	struct intel_vgpu *vgpu = spt->vgpu;
> >>   	struct intel_gvt_gtt_entry e;
> >>   	unsigned long index;
> >>   	int ret;
> >> @@ -1008,7 +1026,7 @@ static int ppgtt_invalidate_spt(struct intel_vgpu_ppgtt_spt *spt)
> >>   	trace_spt_change(spt->vgpu->id, "die", spt,
> >>   			spt->guest_page.gfn, spt->shadow_page.type);
> >>   
> >> -	if (ppgtt_put_spt(spt) > 0)
> >> +	if (!force && ppgtt_put_spt(spt) > 0)
> >>   		return 0;
> >>   
> >>   	for_each_present_shadow_entry(spt, &e, index) {
> >> @@ -1032,7 +1050,7 @@ static int ppgtt_invalidate_spt(struct intel_vgpu_ppgtt_spt *spt)
> >>   		case GTT_TYPE_PPGTT_PDE_ENTRY:
> >>   			gvt_vdbg_mm("invalidate PMUL4/PDP/PDE entry\n");
> >>   			ret = ppgtt_invalidate_spt_by_shadow_entry(
> >> -					spt->vgpu, &e);
> >> +					spt->vgpu, &e, force);
> >>   			if (ret)
> >>   				goto fail;
> >>   			break;
> >> @@ -1046,7 +1064,7 @@ static int ppgtt_invalidate_spt(struct intel_vgpu_ppgtt_spt *spt)
> >>   	ppgtt_free_spt(spt);
> >>   	return 0;
> >>   fail:
> >> -	gvt_vgpu_err("fail: shadow page %p shadow entry 0x%llx type %d\n",
> >> +	gvt_dbg_mm("fail: shadow page %p shadow entry 0x%llx type %d\n",
> >>   			spt, e.val64, e.type);
> >>   	return ret;
> >>   }
> >> @@ -1196,7 +1214,7 @@ static int split_2MB_gtt_entry(struct intel_vgpu *vgpu,
> >>   		ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu,
> >>   				start_gfn + sub_index, PAGE_SIZE, &dma_addr);
> >>   		if (ret) {
> >> -			ppgtt_invalidate_spt(spt);
> >> +			ppgtt_invalidate_spt(spt, false);
> >>   			return ret;
> >>   		}
> >>   		sub_se.val64 = se->val64;
> >> @@ -1371,11 +1389,11 @@ static int ppgtt_handle_guest_entry_removal(struct intel_vgpu_ppgtt_spt *spt,
> >>   		struct intel_vgpu_ppgtt_spt *s =
> >>   			intel_vgpu_find_spt_by_mfn(vgpu, ops->get_pfn(se));
> >>   		if (!s) {
> >> -			gvt_vgpu_err("fail to find guest page\n");
> >> -			ret = -ENXIO;
> >> +			gvt_dbg_mm("fail to find guest page\n");
> >> +			ret = 0;
> >>   			goto fail;
> >>   		}
> >> -		ret = ppgtt_invalidate_spt(s);
> >> +		ret = ppgtt_invalidate_spt(s, false);
> >>   		if (ret)
> >>   			goto fail;
> >>   	} else {
> >> @@ -1387,7 +1405,7 @@ static int ppgtt_handle_guest_entry_removal(struct intel_vgpu_ppgtt_spt *spt,
> >>   
> >>   	return 0;
> >>   fail:
> >> -	gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n",
> >> +	gvt_dbg_mm("fail: shadow page %p guest entry 0x%llx type %d\n",
> >>   			spt, se->val64, se->type);
> >>   	return ret;
> >>   }
> >> @@ -1422,7 +1440,7 @@ static int ppgtt_handle_guest_entry_add(struct intel_vgpu_ppgtt_spt *spt,
> >>   	}
> >>   	return 0;
> >>   fail:
> >> -	gvt_vgpu_err("fail: spt %p guest entry 0x%llx type %d\n",
> >> +	gvt_dbg_mm("fail: spt %p guest entry 0x%llx type %d\n",
> >>   		spt, we->val64, we->type);
> >>   	return ret;
> >>   }
> >> @@ -1653,7 +1671,7 @@ static int ppgtt_handle_guest_write_page_table(
> >>   
> >>   	return 0;
> >>   fail:
> >> -	gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d.\n",
> >> +	gvt_dbg_mm("fail: shadow page %p guest entry 0x%llx type %d.\n",
> >>   			spt, we->val64, we->type);
> >>   	return ret;
> >>   }
> >> @@ -1798,7 +1816,7 @@ static void invalidate_ppgtt_mm(struct intel_vgpu_mm *mm)
> >>   		if (!ops->test_present(&se))
> >>   			continue;
> >>   
> >> -		ppgtt_invalidate_spt_by_shadow_entry(vgpu, &se);
> >> +		ppgtt_invalidate_spt_by_shadow_entry(vgpu, &se, false);
> >>   		se.val64 = 0;
> >>   		ppgtt_set_shadow_root_entry(mm, &se, index);
> >>   
> >> diff --git a/drivers/gpu/drm/i915/gvt/gtt.h b/drivers/gpu/drm/i915/gvt/gtt.h
> >> index a3b0f59ec8bd..8b02ff5d9651 100644
> >> --- a/drivers/gpu/drm/i915/gvt/gtt.h
> >> +++ b/drivers/gpu/drm/i915/gvt/gtt.h
> >> @@ -263,6 +263,7 @@ struct intel_vgpu_ppgtt_spt {
> >>   	} guest_page;
> >>   
> >>   	DECLARE_BITMAP(post_shadow_bitmap, GTT_ENTRY_NUM_IN_ONE_PAGE);
> >> +	unsigned long fail_cnt;
> >>   	struct list_head post_shadow_list;
> >>   };
> >>   
> >> -- 
> >> 2.17.1
> >>
> 
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 195 bytes
Desc: not available
URL: <https://lists.freedesktop.org/archives/intel-gvt-dev/attachments/20211220/b8b42f03/attachment.sig>


More information about the intel-gvt-dev mailing list