[PATCH v2 1/2] drm/i915/gvt: Fix guest vGPU hang caused by very high dma setup overhead
Du, Changbin
changbin.du at intel.com
Fri Feb 9 11:14:00 UTC 2018
On Fri, Feb 09, 2018 at 11:14:38AM +0800, Zhenyu Wang wrote:
> On 2018.02.07 10:55:41 +0800, changbin.du at intel.com wrote:
> > From: Changbin Du <changbin.du at intel.com>
[...]
> > +static void gvt_cache_init(struct intel_vgpu *vgpu)
> > +{
> > + vgpu->vdev.gfn_cache = RB_ROOT;
> > + vgpu->vdev.dma_addr_cache = RB_ROOT;
> > + mutex_init(&vgpu->vdev.cache_lock);
> > +}
> > +
> > static void kvmgt_protect_table_init(struct kvmgt_guest_info *info)
> > {
> > hash_init(info->ptable);
> > @@ -489,13 +490,19 @@ static int intel_vgpu_iommu_notifier(struct notifier_block *nb,
> >
> > if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) {
> > struct vfio_iommu_type1_dma_unmap *unmap = data;
> > - unsigned long gfn, end_gfn;
> > + struct gvt_dma *entry;
> > + unsigned long size;
> >
> > - gfn = unmap->iova >> PAGE_SHIFT;
> > - end_gfn = gfn + unmap->size / PAGE_SIZE;
> > + mutex_lock(&vgpu->vdev.cache_lock);
> > + for (size = 0; size < unmap->size; size += PAGE_SIZE) {
> > + entry = __gvt_cache_find_dma_addr(vgpu, unmap->iova + size);
> > + if (!entry)
> > + continue;
>
> I don't think this vfio unmap iova is related to our real hw dma address,
> if no vIOMMU, it's gpa and we don't support vIOMMU now. Could you double check?
> Looks not necessary for two cache tree, but split out dma interface is fine.
>
vIOMMU? This is called from host vfio type1 iommu, not in guest. Actually this
fixed a bug in kvmgt meanwhile.
Two caches are needed, we need to search by both gfn and dma addr.
> >
> > - while (gfn < end_gfn)
> > - gvt_cache_remove(vgpu, gfn++);
> > + gvt_dma_unmap_page(vgpu, entry->gfn, entry->dma_addr);
> > + __gvt_cache_remove_entry(vgpu, entry);
> > + }
> > + mutex_unlock(&vgpu->vdev.cache_lock);
> > }
> >
> > return NOTIFY_OK;
> > @@ -1527,39 +1534,77 @@ static int kvmgt_inject_msi(unsigned long handle, u32 addr, u16 data)
> >
> > static unsigned long kvmgt_gfn_to_pfn(unsigned long handle, unsigned long gfn)
> > {
> > - unsigned long iova, pfn;
> > struct kvmgt_guest_info *info;
> > - struct device *dev;
> > - struct intel_vgpu *vgpu;
> > - int rc;
> > + kvm_pfn_t pfn;
> >
> > if (!handle_valid(handle))
> > return INTEL_GVT_INVALID_ADDR;
> >
> > info = (struct kvmgt_guest_info *)handle;
> > - vgpu = info->vgpu;
> > - iova = gvt_cache_find(info->vgpu, gfn);
> > - if (iova != INTEL_GVT_INVALID_ADDR)
> > - return iova;
> > -
> > - pfn = INTEL_GVT_INVALID_ADDR;
> > - dev = mdev_dev(info->vgpu->vdev.mdev);
> > - rc = vfio_pin_pages(dev, &gfn, 1, IOMMU_READ | IOMMU_WRITE, &pfn);
> > - if (rc != 1) {
> > - gvt_vgpu_err("vfio_pin_pages failed for gfn 0x%lx: %d\n",
> > - gfn, rc);
> > - return INTEL_GVT_INVALID_ADDR;
> > - }
> > - /* transfer to host iova for GFX to use DMA */
> > - rc = gvt_dma_map_iova(info->vgpu, pfn, &iova);
> > - if (rc) {
> > - gvt_vgpu_err("gvt_dma_map_iova failed for gfn: 0x%lx\n", gfn);
> > - vfio_unpin_pages(dev, &gfn, 1);
> > +
> > + pfn = gfn_to_pfn(info->kvm, gfn);
> > + if (is_error_noslot_pfn(pfn))
> > return INTEL_GVT_INVALID_ADDR;
> > +
> > + return pfn;
> > +}
> > +
> > +int kvmgt_dma_map_guest_page(unsigned long handle, unsigned long gfn,
> > + dma_addr_t *dma_addr)
> > +{
> > + struct kvmgt_guest_info *info;
> > + struct intel_vgpu *vgpu;
> > + struct gvt_dma *entry;
> > + int ret;
> > +
> > + if (!handle_valid(handle))
> > + return -EINVAL;
> > +
> > + info = (struct kvmgt_guest_info *)handle;
> > + vgpu = info->vgpu;
> > +
> > + mutex_lock(&info->vgpu->vdev.cache_lock);
> > +
> > + entry = __gvt_cache_find_gfn(info->vgpu, gfn);
> > + if (!entry) {
> > + ret = gvt_dma_map_page(vgpu, gfn, dma_addr);
> > + if (ret) {
> > + mutex_unlock(&info->vgpu->vdev.cache_lock);
> > + return ret;
> > + }
> > + __gvt_cache_add(info->vgpu, gfn, *dma_addr);
> > + } else {
> > + kref_get(&entry->ref);
> > + *dma_addr = entry->dma_addr;
> > }
> >
> > - gvt_cache_add(info->vgpu, gfn, iova);
> > - return iova;
> > + mutex_unlock(&info->vgpu->vdev.cache_lock);
> > + return 0;
> > +}
> > +
> > +static void __gvt_dma_release(struct kref *ref)
> > +{
> > + struct gvt_dma *entry = container_of(ref, typeof(*entry), ref);
> > +
> > + gvt_dma_unmap_page(entry->vgpu, entry->gfn, entry->dma_addr);
> > + __gvt_cache_remove_entry(entry->vgpu, entry);
> > +}
> > +
> > +void kvmgt_dma_unmap_guest_page(unsigned long handle, dma_addr_t dma_addr)
> > +{
> > + struct kvmgt_guest_info *info;
> > + struct gvt_dma *entry;
> > +
> > + if (!handle_valid(handle))
> > + return;
> > +
> > + info = (struct kvmgt_guest_info *)handle;
> > +
> > + mutex_lock(&info->vgpu->vdev.cache_lock);
> > + entry = __gvt_cache_find_dma_addr(info->vgpu, dma_addr);
> > + if (entry)
> > + kref_put(&entry->ref, __gvt_dma_release);
> > + mutex_unlock(&info->vgpu->vdev.cache_lock);
> > }
> >
> > static int kvmgt_rw_gpa(unsigned long handle, unsigned long gpa,
> > @@ -1634,6 +1679,8 @@ struct intel_gvt_mpt kvmgt_mpt = {
> > .read_gpa = kvmgt_read_gpa,
> > .write_gpa = kvmgt_write_gpa,
> > .gfn_to_mfn = kvmgt_gfn_to_pfn,
> > + .dma_map_guest_page = kvmgt_dma_map_guest_page,
> > + .dma_unmap_guest_page = kvmgt_dma_unmap_guest_page,
> > .set_opregion = kvmgt_set_opregion,
> > .get_vfio_device = kvmgt_get_vfio_device,
> > .put_vfio_device = kvmgt_put_vfio_device,
> > diff --git a/drivers/gpu/drm/i915/gvt/mpt.h b/drivers/gpu/drm/i915/gvt/mpt.h
> > index 78fada9..32ffcd5 100644
> > --- a/drivers/gpu/drm/i915/gvt/mpt.h
> > +++ b/drivers/gpu/drm/i915/gvt/mpt.h
> > @@ -228,6 +228,34 @@ static inline unsigned long intel_gvt_hypervisor_gfn_to_mfn(
> > }
> >
> > /**
> > + * intel_gvt_hypervisor_dma_map_guest_page - setup dma map for guest page
> > + * @vgpu: a vGPU
> > + * @gpfn: guest pfn
> > + * @dma_addr: retrieve allocated dma addr
> > + *
> > + * Returns:
> > + * 0 on success, negative error code if failed.
> > + */
> > +static inline int intel_gvt_hypervisor_dma_map_guest_page(
> > + struct intel_vgpu *vgpu, unsigned long gfn,
> > + dma_addr_t *dma_addr)
> > +{
> > + return intel_gvt_host.mpt->dma_map_guest_page(vgpu->handle, gfn,
> > + dma_addr);
> > +}
> > +
> > +/**
> > + * intel_gvt_hypervisor_dma_unmap_guest_page - cancel dma map for guest page
> > + * @vgpu: a vGPU
> > + * @dma_addr: the mapped dma addr
> > + */
> > +static inline void intel_gvt_hypervisor_dma_unmap_guest_page(
> > + struct intel_vgpu *vgpu, dma_addr_t dma_addr)
> > +{
> > + intel_gvt_host.mpt->dma_unmap_guest_page(vgpu->handle, dma_addr);
> > +}
> > +
> > +/**
> > * intel_gvt_hypervisor_map_gfn_to_mfn - map a GFN region to MFN
> > * @vgpu: a vGPU
> > * @gfn: guest PFN
> > --
> > 2.7.4
> >
> > _______________________________________________
> > intel-gvt-dev mailing list
> > intel-gvt-dev at lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/intel-gvt-dev
>
> --
> Open Source Technology Center, Intel ltd.
>
> $gpg --keyserver wwwkeys.pgp.net --recv-keys 4D781827
--
Thanks,
Changbin Du
More information about the intel-gvt-dev
mailing list