[Intel-gfx] [PATCH v6 10/14] drm/i915/kvmgt: Support setting dma map for huge pages
Matthew Auld
matthew.william.auld at gmail.com
Thu May 10 14:31:03 UTC 2018
On 8 May 2018 at 10:05, <changbin.du at intel.com> wrote:
> From: Changbin Du <changbin.du at intel.com>
>
> To support huge gtt, we need to support huge pages in kvmgt first.
> This patch adds a 'size' param to the intel_gvt_mpt::dma_map_guest_page
> API and implements it in kvmgt.
>
> v2: rebase.
>
> Signed-off-by: Changbin Du <changbin.du at intel.com>
> ---
> drivers/gpu/drm/i915/gvt/gtt.c | 6 +-
> drivers/gpu/drm/i915/gvt/hypercall.h | 2 +-
> drivers/gpu/drm/i915/gvt/kvmgt.c | 130 +++++++++++++++++++++++++----------
> drivers/gpu/drm/i915/gvt/mpt.h | 7 +-
> 4 files changed, 101 insertions(+), 44 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c
> index 2f13464..ffeecda 100644
> --- a/drivers/gpu/drm/i915/gvt/gtt.c
> +++ b/drivers/gpu/drm/i915/gvt/gtt.c
> @@ -1104,7 +1104,7 @@ static int split_64KB_gtt_entry(struct intel_vgpu *vgpu,
>
> for (i = 0; i < GTT_64K_PTE_STRIDE; i++) {
> ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu,
> - start_gfn + i, &dma_addr);
> + start_gfn + i, PAGE_SIZE, &dma_addr);
> if (ret)
> return ret;
>
> @@ -1150,7 +1150,7 @@ static int ppgtt_populate_shadow_entry(struct intel_vgpu *vgpu,
> };
>
> /* direct shadow */
> - ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu, gfn, &dma_addr);
> + ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu, gfn, PAGE_SIZE, &dma_addr);
> if (ret)
> return -ENXIO;
>
> @@ -2078,7 +2078,7 @@ static int emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off,
> }
>
> ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu, gfn,
> - &dma_addr);
> + PAGE_SIZE, &dma_addr);
> if (ret) {
> gvt_vgpu_err("fail to populate guest ggtt entry\n");
> /* guest driver may read/write the entry when partial
> diff --git a/drivers/gpu/drm/i915/gvt/hypercall.h b/drivers/gpu/drm/i915/gvt/hypercall.h
> index f6dd9f7..5af11cf 100644
> --- a/drivers/gpu/drm/i915/gvt/hypercall.h
> +++ b/drivers/gpu/drm/i915/gvt/hypercall.h
> @@ -53,7 +53,7 @@ struct intel_gvt_mpt {
> unsigned long (*gfn_to_mfn)(unsigned long handle, unsigned long gfn);
>
> int (*dma_map_guest_page)(unsigned long handle, unsigned long gfn,
> - dma_addr_t *dma_addr);
> + unsigned long size, dma_addr_t *dma_addr);
> void (*dma_unmap_guest_page)(unsigned long handle, dma_addr_t dma_addr);
>
> int (*map_gfn_to_mfn)(unsigned long handle, unsigned long gfn,
> diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
> index df4e4a0..4d2f53a 100644
> --- a/drivers/gpu/drm/i915/gvt/kvmgt.c
> +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
> @@ -94,6 +94,7 @@ struct gvt_dma {
> struct rb_node dma_addr_node;
> gfn_t gfn;
> dma_addr_t dma_addr;
> + unsigned long size;
> struct kref ref;
> };
>
> @@ -106,51 +107,103 @@ static int kvmgt_guest_init(struct mdev_device *mdev);
> static void intel_vgpu_release_work(struct work_struct *work);
> static bool kvmgt_guest_exit(struct kvmgt_guest_info *info);
>
> -static int gvt_dma_map_page(struct intel_vgpu *vgpu, unsigned long gfn,
> - dma_addr_t *dma_addr)
> +static void gvt_unpin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn,
> + unsigned long size)
> {
> - struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev;
> - struct page *page;
> - unsigned long pfn;
> + int total_pages;
> + int npage;
> int ret;
>
> - /* Pin the page first. */
> - ret = vfio_pin_pages(mdev_dev(vgpu->vdev.mdev), &gfn, 1,
> - IOMMU_READ | IOMMU_WRITE, &pfn);
> - if (ret != 1) {
> - gvt_vgpu_err("vfio_pin_pages failed for gfn 0x%lx: %d\n",
> - gfn, ret);
> - return -EINVAL;
> + total_pages = roundup(size, PAGE_SIZE) / PAGE_SIZE;
> +
> + for (npage = 0; npage < total_pages; npage++) {
> + unsigned long cur_gfn = gfn + npage;
> +
> + ret = vfio_unpin_pages(mdev_dev(vgpu->vdev.mdev), &cur_gfn, 1);
> + WARN_ON(ret != 1);
> }
> +}
>
> - if (!pfn_valid(pfn)) {
> - gvt_vgpu_err("pfn 0x%lx is not mem backed\n", pfn);
> - vfio_unpin_pages(mdev_dev(vgpu->vdev.mdev), &gfn, 1);
> - return -EINVAL;
> +/* Pin a normal or compound guest page for dma. */
> +static int gvt_pin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn,
> + unsigned long size, struct page **page)
> +{
> + unsigned long base_pfn = 0;
> + int total_pages;
> + int npage;
> + int ret;
> +
> + total_pages = roundup(size, PAGE_SIZE) / PAGE_SIZE;
> + /*
> + * We pin the pages one-by-one to avoid allocating a big arrary
> + * on stack to hold pfns.
> + */
> + for (npage = 0; npage < total_pages; npage++) {
> + unsigned long cur_gfn = gfn + npage;
> + unsigned long pfn;
> +
> + ret = vfio_pin_pages(mdev_dev(vgpu->vdev.mdev), &cur_gfn, 1,
> + IOMMU_READ | IOMMU_WRITE, &pfn);
> + if (ret != 1) {
> + gvt_vgpu_err("vfio_pin_pages failed for gfn 0x%lx, ret %d\n",
> + cur_gfn, ret);
> + goto err;
> + }
> +
> + if (!pfn_valid(pfn)) {
> + gvt_vgpu_err("pfn 0x%lx is not mem backed\n", pfn);
> + npage++;
> + ret = -EFAULT;
> + goto err;
> + }
> +
> + if (npage == 0)
> + base_pfn = pfn;
> + else if (base_pfn + npage != pfn) {
> + gvt_vgpu_err("The pages are not continuous\n");
> + ret = -EINVAL;
> + npage++;
> + goto err;
> + }
> }
>
> + *page = pfn_to_page(base_pfn);
> + return 0;
> +err:
> + gvt_unpin_guest_page(vgpu, gfn, npage * PAGE_SIZE);
> + return ret;
> +}
> +
> +static int gvt_dma_map_page(struct intel_vgpu *vgpu, unsigned long gfn,
> + dma_addr_t *dma_addr, unsigned long size)
> +{
> + struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev;
> + struct page *page = NULL;
> + int ret;
> +
> + ret = gvt_pin_guest_page(vgpu, gfn, size, &page);
> + if (ret)
> + return ret;
> +
> /* Setup DMA mapping. */
> - page = pfn_to_page(pfn);
> - *dma_addr = dma_map_page(dev, page, 0, PAGE_SIZE,
> - PCI_DMA_BIDIRECTIONAL);
> - if (dma_mapping_error(dev, *dma_addr)) {
> - gvt_vgpu_err("DMA mapping failed for gfn 0x%lx\n", gfn);
> - vfio_unpin_pages(mdev_dev(vgpu->vdev.mdev), &gfn, 1);
> - return -ENOMEM;
> + *dma_addr = dma_map_page(dev, page, 0, size, PCI_DMA_BIDIRECTIONAL);
Do we not need to check if the dma addr we get back is not aligned to
the requested page-size, where we would then fall back to splitting
the 2M shadow entry?
More information about the intel-gvt-dev
mailing list