[PATCH v3] drm/i915/gvt: introduced failsafe mode into vgpu

He, Min min.he at intel.com
Thu Feb 16 10:19:19 UTC 2017



在 2/16/2017 5:32 PM, Zhenyu Wang 写道:
> On 2017.02.16 16:35:04 +0800, Min He wrote:
>> New failsafe mode is introduced, when we detect guest not supporting
>> GVT-g.
>> In failsafe mode, we will ignore all the MMIO and cfg space read/write
>> from guest.
>>
>> This patch can fix the issue that when guest kernel or graphics driver
>> version is too low, there will be a lot of kernel traces in host.
>>
>> V3: modified coding style and error messages according to Zhenyu's comment
>> V2: 1) implemented MMIO/GTT/WP pages read/write logic; 2) used a unified
>> function to enter failsafe mode
>>
>> Signed-off-by: Min He <min.he at intel.com>
>> Signed-off-by: Pei Zhang <pei.zhang at intel.com>
>> ---
>>   drivers/gpu/drm/i915/gvt/cfg_space.c |  3 ++
>>   drivers/gpu/drm/i915/gvt/gvt.h       |  6 ++++
>>   drivers/gpu/drm/i915/gvt/handlers.c  | 36 +++++++++++++++++++++
>>   drivers/gpu/drm/i915/gvt/mmio.c      | 62 ++++++++++++++++++++++++++++++++++++
>>   drivers/gpu/drm/i915/gvt/vgpu.c      |  6 +++-
>>   5 files changed, 112 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gvt/cfg_space.c b/drivers/gpu/drm/i915/gvt/cfg_space.c
>> index 4a6a2ed..a77e050 100644
>> --- a/drivers/gpu/drm/i915/gvt/cfg_space.c
>> +++ b/drivers/gpu/drm/i915/gvt/cfg_space.c
>> @@ -237,6 +237,9 @@ int intel_vgpu_emulate_cfg_write(struct intel_vgpu *vgpu, unsigned int offset,
>>   {
>>   	int ret;
>>   
>> +	if (vgpu->failsafe)
>> +		return 0;
>> +
>>   	if (WARN_ON(bytes > 4))
>>   		return -EINVAL;
>>   
>> diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h
>> index e227caf..2c07e8d 100644
>> --- a/drivers/gpu/drm/i915/gvt/gvt.h
>> +++ b/drivers/gpu/drm/i915/gvt/gvt.h
>> @@ -143,6 +143,8 @@ struct intel_vgpu {
>>   	int id;
>>   	unsigned long handle; /* vGPU handle used by hypervisor MPT modules */
>>   	bool active;
>> +	bool pv_notified;
>> +	bool failsafe;
>>   	bool resetting;
>>   	void *sched_data;
>>   
>> @@ -449,6 +451,10 @@ struct intel_gvt_ops {
>>   };
>>   
>>   
>> +enum {
>> +	UNSUPPORTED_GUESTS,
>> +};
>> +
> GVT_FAILSAFE_UNSUPPORTED_GUEST, if not want to define specific enum type.
OK. will change it.

>
>>   #include "mpt.h"
>>   
>>   #endif
>> diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
>> index 1d45062..16449db 100644
>> --- a/drivers/gpu/drm/i915/gvt/handlers.c
>> +++ b/drivers/gpu/drm/i915/gvt/handlers.c
>> @@ -150,10 +150,34 @@ static int render_mmio_to_ring_id(struct intel_gvt *gvt, unsigned int reg)
>>   #define fence_num_to_offset(num) \
>>   	(num * 8 + i915_mmio_reg_offset(FENCE_REG_GEN6_LO(0)))
>>   
>> +
>> +static void enter_failsafe_mode(struct intel_vgpu *vgpu, int reason)
>> +{
>> +	switch (reason) {
>> +	case UNSUPPORTED_GUESTS:
>> +		pr_err("Detected your guest driver doesn't support GVT-g.\n");
>> +		pr_err("Please update your guest graphics driver.\n");
>> +		break;
>> +	default:
>> +		break;
>> +	}
>> +	pr_err("Now vgpu %d will enter failsafe mode.\n", vgpu->id);
>> +	pr_err("You may need to kill your guest manually.\n");
> a little too chatty, can remove second line for both above messages.
If we don't output enough message, customer cannot know what's the next 
step and how to handle this kind of issue.
At least we need to document it in somewhere else, for example in the 
wiki/usre guide.

>
>> +	vgpu->failsafe = true;
>> +}
>> +
>>   static int sanitize_fence_mmio_access(struct intel_vgpu *vgpu,
>>   		unsigned int fence_num, void *p_data, unsigned int bytes)
>>   {
>>   	if (fence_num >= vgpu_fence_sz(vgpu)) {
>> +		if (!vgpu->pv_notified) {
>> +			/* When guest access oob fence regs without access
>> +			 * pv_info first, we treat guest not supporting GVT,
>> +			 * and we will let vgpu enter failsafe mode.
>> +			 */
>> +			enter_failsafe_mode(vgpu, UNSUPPORTED_GUESTS);
>> +			return -EINVAL;
>> +		}
>>   		gvt_err("vgpu%d: found oob fence register access\n",
>>   				vgpu->id);
>>   		gvt_err("vgpu%d: total fence num %d access fence num %d\n",
>> @@ -1001,6 +1025,7 @@ static int pvinfo_mmio_read(struct intel_vgpu *vgpu, unsigned int offset,
>>   	if (invalid_read)
>>   		gvt_err("invalid pvinfo read: [%x:%x] = %x\n",
>>   				offset, bytes, *(u32 *)p_data);
>> +	vgpu->pv_notified = true;
>>   	return 0;
>>   }
>>   
>> @@ -1318,6 +1343,17 @@ static int ring_mode_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
>>   	bool enable_execlist;
>>   
>>   	write_vreg(vgpu, offset, p_data, bytes);
>> +
>> +	if (((data & _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)) ||
>> +			(data & _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE)))
>> +			&& !vgpu->pv_notified) {
>> +		/* when PPGTT mode enabled, we will check if guest has called
>> +		 * pvinfo, if not, we will treat this guest as non-gvtg-aware
>> +		 * guest, and stop emulating its cfg space, mmio, gtt, etc.
>> +		 */
>> +		enter_failsafe_mode(vgpu, UNSUPPORTED_GUESTS);
>> +		return 0;
>> +	}
>>   	if ((data & _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE))
>>   			|| (data & _MASKED_BIT_DISABLE(GFX_RUN_LIST_ENABLE))) {
>>   		enable_execlist = !!(data & GFX_RUN_LIST_ENABLE);
>> diff --git a/drivers/gpu/drm/i915/gvt/mmio.c b/drivers/gpu/drm/i915/gvt/mmio.c
>> index 4df078b..a31a53c 100644
>> --- a/drivers/gpu/drm/i915/gvt/mmio.c
>> +++ b/drivers/gpu/drm/i915/gvt/mmio.c
>> @@ -57,6 +57,58 @@ int intel_vgpu_gpa_to_mmio_offset(struct intel_vgpu *vgpu, u64 gpa)
>>   	(reg >= gvt->device_info.gtt_start_offset \
>>   	 && reg < gvt->device_info.gtt_start_offset + gvt_ggtt_sz(gvt))
>>   
>> +static void failsafe_emulate_mmio_rw(struct intel_vgpu *vgpu, uint64_t pa,
>> +		void *p_data, unsigned int bytes, bool read)
>> +{
>> +	struct intel_gvt *gvt = NULL;
>> +	void *pt = NULL;
>> +	unsigned int offset = 0;
>> +
>> +	if (!vgpu || !p_data)
>> +		return;
>> +
>> +	gvt = vgpu->gvt;
>> +	mutex_lock(&gvt->lock);
>> +	offset = intel_vgpu_gpa_to_mmio_offset(vgpu, pa);
>> +	if (reg_is_mmio(gvt, offset)) {
>> +		if (read)
>> +			intel_vgpu_default_mmio_read(vgpu, offset, p_data,
>> +					bytes);
>> +		else
>> +			intel_vgpu_default_mmio_write(vgpu, offset, p_data,
>> +					bytes);
>> +	} else if (reg_is_gtt(gvt, offset) &&
>> +			vgpu->gtt.ggtt_mm->virtual_page_table) {
>> +		offset -= gvt->device_info.gtt_start_offset;
>> +		pt = vgpu->gtt.ggtt_mm->virtual_page_table + offset;
>> +		if (read)
>> +			memcpy(p_data, pt, bytes);
>> +		else
>> +			memcpy(pt, p_data, bytes);
>> +
>> +	} else if (atomic_read(&vgpu->gtt.n_write_protected_guest_page)) {
>> +		/* Since we enter the failsafe mode early during guest boot,
>> +		 * guest may not have chance to set up its ppgtt table, so
>> +		 * there should not be any wp pages for guest. Keep the wp
>> +		 * related code here in case we need to handle it in furture.
>> +		 */
>> +		struct intel_vgpu_guest_page *gp;
>> +
>> +		gp = intel_vgpu_find_guest_page(vgpu, pa >> PAGE_SHIFT);
>> +		if (gp) {
>> +			/* remove write protection to prevent furture traps */
>> +			intel_vgpu_clean_guest_page(vgpu, gp);
>> +			if (read)
>> +				intel_gvt_hypervisor_read_gpa(vgpu, pa,
>> +						p_data, bytes);
>> +			else
>> +				intel_gvt_hypervisor_write_gpa(vgpu, pa,
>> +						p_data, bytes);
>> +		}
>> +	}
>> +	mutex_unlock(&gvt->lock);
>> +}
>> +
>>   /**
>>    * intel_vgpu_emulate_mmio_read - emulate MMIO read
>>    * @vgpu: a vGPU
>> @@ -75,6 +127,11 @@ int intel_vgpu_emulate_mmio_read(struct intel_vgpu *vgpu, uint64_t pa,
>>   	unsigned int offset = 0;
>>   	int ret = -EINVAL;
>>   
>> +
>> +	if (vgpu->failsafe) {
>> +		failsafe_emulate_mmio_rw(vgpu, pa, p_data, bytes, true);
>> +		return 0;
>> +	}
>>   	mutex_lock(&gvt->lock);
>>   
>>   	if (atomic_read(&vgpu->gtt.n_write_protected_guest_page)) {
>> @@ -188,6 +245,11 @@ int intel_vgpu_emulate_mmio_write(struct intel_vgpu *vgpu, uint64_t pa,
>>   	u32 old_vreg = 0, old_sreg = 0;
>>   	int ret = -EINVAL;
>>   
>> +	if (vgpu->failsafe) {
>> +		failsafe_emulate_mmio_rw(vgpu, pa, p_data, bytes, false);
>> +		return 0;
>> +	}
>> +
>>   	mutex_lock(&gvt->lock);
>>   
>>   	if (atomic_read(&vgpu->gtt.n_write_protected_guest_page)) {
>> diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c
>> index 67d471c..6c9b440 100644
>> --- a/drivers/gpu/drm/i915/gvt/vgpu.c
>> +++ b/drivers/gpu/drm/i915/gvt/vgpu.c
>> @@ -386,8 +386,12 @@ void intel_gvt_reset_vgpu_locked(struct intel_vgpu *vgpu, bool dmlr,
>>   		intel_vgpu_reset_mmio(vgpu);
>>   		populate_pvinfo_page(vgpu);
>>   
>> -		if (dmlr)
>> +		if (dmlr) {
>>   			intel_vgpu_reset_cfg_space(vgpu);
>> +			/* only reset the failsafe mode when dmlr reset */
>> +			vgpu->failsafe = false;
>> +			vgpu->pv_notified = false;
>> +		}
>>   	}
>>   
>>   	vgpu->resetting = false;
>> -- 
>> 1.9.1
>>
>> _______________________________________________
>> intel-gvt-dev mailing list
>> intel-gvt-dev at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/intel-gvt-dev



More information about the intel-gvt-dev mailing list