[Intel-gfx] [PATCH 147/190] drm/i915: Use remap_io_mapping() to prefault all PTE in a single pass
Chris Wilson
chris at chris-wilson.co.uk
Mon Jan 11 03:00:48 PST 2016
On an Ivybridge i7-3720qm with 1600MHz DDR3, with 32 fences,
Upload rate for 2 linear surfaces: 8134MiB/s -> 8154MiB/s
Upload rate for 2 tiled surfaces: 8625MiB/s -> 8632MiB/s
Upload rate for 4 linear surfaces: 8127MiB/s -> 8134MiB/s
Upload rate for 4 tiled surfaces: 8602MiB/s -> 8629MiB/s
Upload rate for 8 linear surfaces: 8124MiB/s -> 8137MiB/s
Upload rate for 8 tiled surfaces: 8603MiB/s -> 8624MiB/s
Upload rate for 16 linear surfaces: 8123MiB/s -> 8128MiB/s
Upload rate for 16 tiled surfaces: 8606MiB/s -> 8618MiB/s
Upload rate for 32 linear surfaces: 8121MiB/s -> 8128MiB/s
Upload rate for 32 tiled surfaces: 8605MiB/s -> 8614MiB/s
Upload rate for 64 linear surfaces: 8121MiB/s -> 8127MiB/s
Upload rate for 64 tiled surfaces: 3017MiB/s -> 5202MiB/s
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Testcase: igt/gem_fence_upload/performance
Testcase: igt/gem_mmap_gtt
---
drivers/gpu/drm/Makefile | 2 +-
drivers/gpu/drm/i915/Makefile | 5 +-
drivers/gpu/drm/i915/i915_drv.h | 4 ++
drivers/gpu/drm/i915/i915_gem.c | 46 +++-----------
drivers/gpu/drm/i915/i915_memory.c | 122 +++++++++++++++++++++++++++++++++++++
5 files changed, 138 insertions(+), 41 deletions(-)
create mode 100644 drivers/gpu/drm/i915/i915_memory.c
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index f858aa25fbb2..6834d0e33741 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -43,7 +43,7 @@ obj-$(CONFIG_DRM_RADEON)+= radeon/
obj-$(CONFIG_DRM_AMDGPU)+= amd/amdgpu/
obj-$(CONFIG_DRM_MGA) += mga/
obj-$(CONFIG_DRM_I810) += i810/
-obj-$(CONFIG_DRM_I915) += i915/
+obj-y += i915/
obj-$(CONFIG_DRM_MGAG200) += mgag200/
obj-$(CONFIG_DRM_VC4) += vc4/
obj-$(CONFIG_DRM_CIRRUS_QEMU) += cirrus/
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 79d657f29241..a362425ef862 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -100,6 +100,9 @@ i915-y += i915_vgpu.o
# legacy horrors
i915-y += i915_dma.o
-obj-$(CONFIG_DRM_I915) += i915.o
+obj-$(CONFIG_DRM_I915) += i915.o
+ifdef CONFIG_DRM_I915
+obj-y += i915_memory.o
+endif
CFLAGS_i915_trace_points.o := -I$(src)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 45b8cbdfab55..e6f49175af1b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3447,4 +3447,8 @@ static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req)
return false;
}
+int remap_io_mapping(struct vm_area_struct *vma,
+ unsigned long addr, unsigned long pfn, unsigned long size,
+ struct io_mapping *iomap);
+
#endif
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 7e321fdd90d2..1fa4752682d6 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1449,7 +1449,6 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
struct drm_i915_private *dev_priv = dev->dev_private;
struct i915_vma *ggtt;
pgoff_t page_offset;
- unsigned long pfn;
int ret = 0;
bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
@@ -1517,44 +1516,13 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
goto unpin;
/* Finally, remap it using the new GTT offset */
- pfn = dev_priv->gtt.mappable_base + ggtt->node.start;
- pfn >>= PAGE_SHIFT;
-
- if (ggtt->ggtt_view.type == I915_GGTT_VIEW_NORMAL) {
- if (!obj->fault_mappable) {
- unsigned long size = min_t(unsigned long,
- vma->vm_end - vma->vm_start,
- obj->base.size);
- int i;
-
- for (i = 0; i < size >> PAGE_SHIFT; i++) {
- ret = vm_insert_pfn(vma,
- (unsigned long)vma->vm_start + i * PAGE_SIZE,
- pfn + i);
- if (ret)
- break;
- }
- } else
- ret = vm_insert_pfn(vma,
- (unsigned long)vmf->virtual_address,
- pfn + page_offset);
- } else {
- /* Overriding existing pages in partial view does not cause
- * us any trouble as TLBs are still valid because the fault
- * is due to userspace losing part of the mapping or never
- * having accessed it before (at this partials' range).
- */
- const struct i915_ggtt_view *view = &ggtt->ggtt_view;
- unsigned long base = vma->vm_start +
- (view->params.partial.offset << PAGE_SHIFT);
- unsigned int i;
-
- for (i = 0; i < view->params.partial.size; i++) {
- ret = vm_insert_pfn(vma, base + i * PAGE_SIZE, pfn + i);
- if (ret)
- break;
- }
- }
+ ret = remap_io_mapping(vma,
+ vma->vm_start + (ggtt->ggtt_view.params.partial.offset << PAGE_SHIFT),
+ (dev_priv->gtt.mappable_base + ggtt->node.start) >> PAGE_SHIFT,
+ min_t(u64, ggtt->size, vma->vm_end - vma->vm_start),
+ &dev_priv->gtt.mappable);
+ if (ret)
+ goto unpin;
obj->fault_mappable = true;
unpin:
diff --git a/drivers/gpu/drm/i915/i915_memory.c b/drivers/gpu/drm/i915/i915_memory.c
new file mode 100644
index 000000000000..f684576022f3
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_memory.c
@@ -0,0 +1,122 @@
+#include <linux/mm.h>
+#include <linux/io-mapping.h>
+
+#include <asm/io.h>
+#include <asm/pgalloc.h>
+#include <asm/uaccess.h>
+#include <asm/tlb.h>
+#include <asm/tlbflush.h>
+#include <asm/pgtable.h>
+
+#include "i915_drv.h"
+
+struct remap_pfn {
+ struct mm_struct *mm;
+ unsigned long addr;
+ unsigned long pfn;
+ pgprot_t prot;
+};
+
+static inline void remap_pfn(struct remap_pfn *r, pte_t *pte)
+{
+ set_pte_at(r->mm, r->addr, pte,
+ pte_mkspecial(pfn_pte(r->pfn, r->prot)));
+ r->pfn++;
+ r->addr += PAGE_SIZE;
+}
+
+static inline int remap_pte_range(struct remap_pfn *r, pmd_t *pmd, unsigned long end)
+{
+ pte_t *pte;
+ spinlock_t *ptl;
+
+ pte = pte_alloc_map_lock(r->mm, pmd, r->addr, &ptl);
+ if (!pte)
+ return -ENOMEM;
+
+ arch_enter_lazy_mmu_mode();
+ do
+ remap_pfn(r, pte++);
+ while (r->addr < end);
+ arch_leave_lazy_mmu_mode();
+
+ pte_unmap_unlock(pte - 1, ptl);
+ return 0;
+}
+
+static inline int remap_pmd_range(struct remap_pfn *r, pud_t *pud, unsigned long end)
+{
+ pmd_t *pmd;
+ int err;
+
+ pmd = pmd_alloc(r->mm, pud, r->addr);
+ if (!pmd)
+ return -ENOMEM;
+ VM_BUG_ON(pmd_trans_huge(*pmd));
+
+ do
+ err = remap_pte_range(r, pmd++, pmd_addr_end(r->addr, end));
+ while (err == 0 && r->addr < end);
+
+ return err;
+}
+
+static inline int remap_pud_range(struct remap_pfn *r, pgd_t *pgd, unsigned long end)
+{
+ pud_t *pud;
+ int err;
+
+ pud = pud_alloc(r->mm, pgd, r->addr);
+ if (!pud)
+ return -ENOMEM;
+
+ do
+ err = remap_pmd_range(r, pud++, pud_addr_end(r->addr, end));
+ while (err == 0 && r->addr < end);
+
+ return err;
+}
+
+/**
+ * remap_io_mapping - remap an IO mapping to userspace
+ * @vma: user vma to map to
+ * @addr: target user address to start at
+ * @pfn: physical address of kernel memory
+ * @size: size of map area
+ * @iomap: the source io_mapping
+ *
+ * Note: this is only safe if the mm semaphore is held when called.
+ */
+int remap_io_mapping(struct vm_area_struct *vma,
+ unsigned long addr, unsigned long pfn, unsigned long size,
+ struct io_mapping *iomap)
+{
+ unsigned long end = addr + PAGE_ALIGN(size);
+ struct remap_pfn r;
+ pgd_t *pgd;
+ int err;
+
+ if (WARN_ON(addr >= end))
+ return -EINVAL;
+
+#define MUST_SET (VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP)
+ BUG_ON((vma->vm_flags & MUST_SET) != MUST_SET);
+#undef MUST_SET
+
+ r.mm = vma->vm_mm;
+ r.addr = addr;
+ r.pfn = pfn;
+ r.prot = __pgprot((pgprot_val(iomap->prot) & _PAGE_CACHE_MASK) |
+ (pgprot_val(vma->vm_page_prot) & ~_PAGE_CACHE_MASK));
+
+ pgd = pgd_offset(r.mm, addr);
+ do
+ err = remap_pud_range(&r, pgd++, pgd_addr_end(r.addr, end));
+ while (err == 0 && r.addr < end);
+
+ if (err)
+ zap_vma_ptes(vma, addr, r.addr - addr);
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(remap_io_mapping);
--
2.7.0.rc3
More information about the Intel-gfx
mailing list