[PATCH] drm/i915/gvt: Activate/de-activate vGPU in mdev ops.

Wed Mar 29 09:18:03 UTC 2017

Hi Zhenyu:
     I just got the oops picture from Terrence. I don't have actually 
text log. :( Terrence can show you the log.

于 03/29/17 17:21, Zhenyu Wang 写道:
> On 2017.03.29 16:52:10 +0800, Zhi Wang wrote:
>>
>>
>> 于 03/29/17 16:55, Zhenyu Wang 写道:
>>> On 2017.03.30 00:55:20 +0800, Zhi Wang wrote:
>>>> This patch introduces two functions for activating/de-activating vGPU in
>>>> mdev ops.
>>>>
>>>> A racing condition was found between virtual vblank emulation and KVGMT
>>>> mdev release path. To solve this problem, we factor out vGPU activation
>>>> /de-activation from vGPU creation/destruction path and let KVMGT mdev
>>>> release ops de-activate the vGPU before release hypervisor handle. Once
>>>> a vGPU is de-activated, GVT-g will not emulate vblank for it or touch
>>>> the hypervisor handle.
>>>>
>>>
>>> Could you elaborate what's the race?
>>>
>> vblank emulation will emulate and inject vblank for every active vGPU with
>> holding gvt->lock, while in mdev release path, it will directly release
>> hypervisor handle without change vGPU status or taking gvt->lock, so a
>> kernel panic is encountered when vblank emulation is injecting a interrupt
>> with a invalid hypervisor handle. (Reported by Terrence)
>>
>
> Pls add this into commit description and better with oops that this fixes.
> Should add a Fixes: line?
>
>> This patch is a part of refinement on KVM mdev release path. After a
>> discussion with Jike, there is another gap in KVM mdev release path which
>> also needs to be covered in vgpu_deactivate(). So better factor out
>> vgpu_deactive() in this patch.
>>
>
> yeah, we should set proper vgpu state, I'm fine with this approach.
>
>>>> Signed-off-by: Zhi Wang <zhi.a.wang at intel.com>
>>>> ---
>>>>    drivers/gpu/drm/i915/gvt/gvt.c   |  2 ++
>>>>    drivers/gpu/drm/i915/gvt/gvt.h   |  5 ++++-
>>>>    drivers/gpu/drm/i915/gvt/kvmgt.c |  4 ++++
>>>>    drivers/gpu/drm/i915/gvt/vgpu.c  | 43 +++++++++++++++++++++++++++++++++++-----
>>>>    4 files changed, 48 insertions(+), 6 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/i915/gvt/gvt.c b/drivers/gpu/drm/i915/gvt/gvt.c
>>>> index 3b9d59e..ef3baa0 100644
>>>> --- a/drivers/gpu/drm/i915/gvt/gvt.c
>>>> +++ b/drivers/gpu/drm/i915/gvt/gvt.c
>>>> @@ -52,6 +52,8 @@ static const struct intel_gvt_ops intel_gvt_ops = {
>>>>    	.vgpu_create = intel_gvt_create_vgpu,
>>>>    	.vgpu_destroy = intel_gvt_destroy_vgpu,
>>>>    	.vgpu_reset = intel_gvt_reset_vgpu,
>>>> +	.vgpu_activate = intel_gvt_activate_vgpu,
>>>> +	.vgpu_deactivate = intel_gvt_deactivate_vgpu,
>>>>    };
>>>>
>>>>    /**
>>>> diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h
>>>> index 2379192..2387eac 100644
>>>> --- a/drivers/gpu/drm/i915/gvt/gvt.h
>>>> +++ b/drivers/gpu/drm/i915/gvt/gvt.h
>>>> @@ -382,7 +382,8 @@ void intel_gvt_destroy_vgpu(struct intel_vgpu *vgpu);
>>>>    void intel_gvt_reset_vgpu_locked(struct intel_vgpu *vgpu, bool dmlr,
>>>>    				 unsigned int engine_mask);
>>>>    void intel_gvt_reset_vgpu(struct intel_vgpu *vgpu);
>>>> -
>>>> +void intel_gvt_activate_vgpu(struct intel_vgpu *vgpu);
>>>> +void intel_gvt_deactivate_vgpu(struct intel_vgpu *vgpu);
>>>>
>>>>    /* validating GM functions */
>>>>    #define vgpu_gmadr_is_aperture(vgpu, gmadr) \
>>>> @@ -449,6 +450,8 @@ struct intel_gvt_ops {
>>>>    				struct intel_vgpu_type *);
>>>>    	void (*vgpu_destroy)(struct intel_vgpu *);
>>>>    	void (*vgpu_reset)(struct intel_vgpu *);
>>>> +	void (*vgpu_activate)(struct intel_vgpu *);
>>>> +	void (*vgpu_deactivate)(struct intel_vgpu *);
>>>>    };
>>>>
>>>>
>>>> diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
>>>> index 3c5682a..59c59e4 100644
>>>> --- a/drivers/gpu/drm/i915/gvt/kvmgt.c
>>>> +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
>>>> @@ -544,6 +544,8 @@ static int intel_vgpu_open(struct mdev_device *mdev)
>>>>    	if (ret)
>>>>    		goto undo_group;
>>>>
>>>> +	intel_gvt_ops->vgpu_activate(vgpu);
>>>> +
>>>>    	atomic_set(&vgpu->vdev.released, 0);
>>>>    	return ret;
>>>>
>>>> @@ -569,6 +571,8 @@ static void __intel_vgpu_release(struct intel_vgpu *vgpu)
>>>>    	if (atomic_cmpxchg(&vgpu->vdev.released, 0, 1))
>>>>    		return;
>>>>
>>>> +	intel_gvt_ops->vgpu_deactivate(vgpu);
>>>> +
>>>>    	ret = vfio_unregister_notifier(mdev_dev(vgpu->vdev.mdev), VFIO_IOMMU_NOTIFY,
>>>>    					&vgpu->vdev.iommu_notifier);
>>>>    	WARN(ret, "vfio_unregister_notifier for iommu failed: %d\n", ret);
>>>> diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c
>>>> index dfd2eff..fcd3c27 100644
>>>> --- a/drivers/gpu/drm/i915/gvt/vgpu.c
>>>> +++ b/drivers/gpu/drm/i915/gvt/vgpu.c
>>>> @@ -179,20 +179,34 @@ static void intel_gvt_update_vgpu_types(struct intel_gvt *gvt)
>>>>    }
>>>>
>>>>    /**
>>>> - * intel_gvt_destroy_vgpu - destroy a virtual GPU
>>>> + * intel_gvt_active_vgpu - activate a virtual GPU
>>>>     * @vgpu: virtual GPU
>>>>     *
>>>> - * This function is called when user wants to destroy a virtual GPU.
>>>> + * This function is called when user wants to activate a virtual GPU.
>>>>     *
>>>>     */
>>>> -void intel_gvt_destroy_vgpu(struct intel_vgpu *vgpu)
>>>> +void intel_gvt_activate_vgpu(struct intel_vgpu *vgpu)
>>>> +{
>>>> +	mutex_lock(&vgpu->gvt->lock);
>>>> +	vgpu->active = true;
>>>> +	mutex_unlock(&vgpu->gvt->lock);
>>>> +}
>>>> +
>>>> +/**
>>>> + * intel_gvt_deactive_vgpu - deactivate a virtual GPU
>>>> + * @vgpu: virtual GPU
>>>> + *
>>>> + * This function is called when user wants to deactivate a virtual GPU.
>>>> + * All virtual GPU runtime information will be destroyed.
>>>> + *
>>>> + */
>>>> +void intel_gvt_deactivate_vgpu(struct intel_vgpu *vgpu)
>>>>    {
>>>>    	struct intel_gvt *gvt = vgpu->gvt;
>>>>
>>>>    	mutex_lock(&gvt->lock);
>>>>
>>>>    	vgpu->active = false;
>>>> -	idr_remove(&gvt->vgpu_idr, vgpu->id);
>>>>
>>>>    	if (atomic_read(&vgpu->running_workload_num)) {
>>>>    		mutex_unlock(&gvt->lock);
>>>> @@ -201,6 +215,26 @@ void intel_gvt_destroy_vgpu(struct intel_vgpu *vgpu)
>>>>    	}
>>>>
>>>>    	intel_vgpu_stop_schedule(vgpu);
>>>> +
>>>> +	mutex_unlock(&gvt->lock);
>>>> +}
>>>> +
>>>> +/**
>>>> + * intel_gvt_destroy_vgpu - destroy a virtual GPU
>>>> + * @vgpu: virtual GPU
>>>> + *
>>>> + * This function is called when user wants to destroy a virtual GPU.
>>>> + *
>>>> + */
>>>> +void intel_gvt_destroy_vgpu(struct intel_vgpu *vgpu)
>>>> +{
>>>> +	struct intel_gvt *gvt = vgpu->gvt;
>>>> +
>>>> +	mutex_lock(&gvt->lock);
>>>> +
>>>> +	WARN(vgpu->active, "vGPU is still active!\n");
>>>> +
>>>> +	idr_remove(&gvt->vgpu_idr, vgpu->id);
>>>>    	intel_vgpu_clean_sched_policy(vgpu);
>>>>    	intel_vgpu_clean_gvt_context(vgpu);
>>>>    	intel_vgpu_clean_execlist(vgpu);
>>>> @@ -277,7 +311,6 @@ static struct intel_vgpu *__intel_gvt_create_vgpu(struct intel_gvt *gvt,
>>>>    	if (ret)
>>>>    		goto out_clean_shadow_ctx;
>>>>
>>>> -	vgpu->active = true;
>>>>    	mutex_unlock(&gvt->lock);
>>>>
>>>>    	return vgpu;
>>>> --
>>>> 2.7.4
>>>>
>>>> _______________________________________________
>>>> intel-gvt-dev mailing list
>>>> intel-gvt-dev at lists.freedesktop.org
>>>> https://lists.freedesktop.org/mailman/listinfo/intel-gvt-dev
>>>
>