[Intel-gfx] [PATCH v3] vfio: fix deadlock between group lock and kvm lock

Matthew Rosato mjrosato at linux.ibm.com
Fri Feb 3 21:19:10 UTC 2023


On 2/3/23 3:35 PM, Alex Williamson wrote:
> On Fri, 3 Feb 2023 12:29:01 -0500
> Matthew Rosato <mjrosato at linux.ibm.com> wrote:

...

> I'd probably go back to making this:
> 
> void _vfio_device_get_kvm_safe(struct vfio_device *device, struct kvm *kvm);
> 
> so the vfio_main function would handle setting and clearing
> device->kvm.  That way we could also move the lockdep into the
> vfio_main functions.  Once we do that, there's no reason to have a
> group vs cdev put function and we end up with only a wrapper on the get
> function, which should really never be used directly, so we prefix it
> with an underscore.
> 
> At that point (see incremental diff below), it's about a wash.  Current v3:
> 
>  drivers/vfio/group.c     |   32 +++++++++++++----
>  drivers/vfio/vfio.h      |   14 +++++++
>  drivers/vfio/vfio_main.c |   70 +++++++++++++++++++++++++++++++++++----
>  include/linux/vfio.h     |    2 -
>  4 files changed, 103 insertions(+), 15 deletions(-)
> 
> Folding in below:
> 
>  drivers/vfio/group.c     |   44 ++++++++++++++++++++++-----
>  drivers/vfio/vfio.h      |   15 +++++++++
>  drivers/vfio/vfio_main.c |   63 ++++++++++++++++++++++++++++++++++-----
>  include/linux/vfio.h     |    2 -
>  4 files changed, 109 insertions(+), 15 deletions(-)
> 
> Unfortunately it seems I've talked myself into the answer that we
> should maybe just pre-enable cdev by not adding a group reference in
> vfio_main.  Thanks,
> 
> Alex
> 
> diff --git a/drivers/vfio/group.c b/drivers/vfio/group.c
> index 7fed4233ca23..98621ac082f0 100644
> --- a/drivers/vfio/group.c
> +++ b/drivers/vfio/group.c
> @@ -154,6 +154,18 @@ static int vfio_group_ioctl_set_container(struct vfio_group *group,
>  	return ret;
>  }
>  
> +static void vfio_device_group_get_kvm_safe(struct vfio_device *device)
> +{
> +	spin_lock(&device->group->kvm_ref_lock);
> +	if (!device->group->kvm)
> +		goto unlock;
> +
> +	_vfio_device_get_kvm_safe(device, device->group->kvm);
> +
> +unlock:
> +	spin_unlock(&device->group->kvm_ref_lock);
> +}
> +
>  static int vfio_device_group_open(struct vfio_device *device)
>  {
>  	int ret;
> @@ -173,7 +185,7 @@ static int vfio_device_group_open(struct vfio_device *device)
>  	 * the pointer in the device for use by drivers.
>  	 */
>  	if (device->open_count == 0)
> -		vfio_device_get_kvm_safe(device);
> +		vfio_device_group_get_kvm_safe(device);
>  
>  	ret = vfio_device_open(device, device->group->iommufd, device->kvm);
>  
> diff --git a/drivers/vfio/vfio.h b/drivers/vfio/vfio.h
> index 20d715b0a3a8..24d6cd285945 100644
> --- a/drivers/vfio/vfio.h
> +++ b/drivers/vfio/vfio.h
> @@ -253,10 +253,11 @@ enum { vfio_noiommu = false };
>  #endif
>  
>  #ifdef CONFIG_HAVE_KVM
> -void vfio_device_get_kvm_safe(struct vfio_device *device);
> +void _vfio_device_get_kvm_safe(struct vfio_device *device, struct kvm *kvm);
>  void vfio_device_put_kvm(struct vfio_device *device);
>  #else
> -static inline void vfio_device_get_kvm_safe(struct vfio_device *device)
> +static inline void _vfio_device_get_kvm_safe(struct vfio_device *device,
> +					     struct kvm *kvm)
>  {
>  }
>  
> diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
> index 4762550e9f42..00d4d5167d6c 100644
> --- a/drivers/vfio/vfio_main.c
> +++ b/drivers/vfio/vfio_main.c
> @@ -342,7 +342,7 @@ void vfio_unregister_group_dev(struct vfio_device *device)
>  EXPORT_SYMBOL_GPL(vfio_unregister_group_dev);
>  
>  #ifdef CONFIG_HAVE_KVM
> -void vfio_device_get_kvm_safe(struct vfio_device *device)
> +void _vfio_device_get_kvm_safe(struct vfio_device *device, struct kvm *kvm)
>  {
>  	void (*pfn)(struct kvm *kvm);
>  	bool (*fn)(struct kvm *kvm);
> @@ -350,32 +350,25 @@ void vfio_device_get_kvm_safe(struct vfio_device *device)
>  
>  	lockdep_assert_held(&device->dev_set->lock);
>  
> -	spin_lock(&device->group->kvm_ref_lock);
> -	if (!device->group->kvm)
> -		goto unlock;
> -
>  	pfn = symbol_get(kvm_put_kvm);
>  	if (WARN_ON(!pfn))
> -		goto unlock;
> +		return;
>  
>  	fn = symbol_get(kvm_get_kvm_safe);
>  	if (WARN_ON(!fn)) {
>  		symbol_put(kvm_put_kvm);
> -		goto unlock;
> +		return;
>  	}
>  >  	ret = fn(device->group->kvm);

s/device->group->kvm/kvm/

With that small change, this looks good to me too (and testing looks good too).  Do you want me to send a v4 for one last round of review?

Thanks,
Matt


More information about the Intel-gfx mailing list