[Intel-gfx] [PATCH] drm/i915/gtt: Make gen6 page directories evictable
Matthew Auld
matthew.william.auld at gmail.com
Wed Jun 6 23:07:21 UTC 2018
On 6 June 2018 at 07:51, Chris Wilson <chris at chris-wilson.co.uk> wrote:
> Currently all page directories are bound at creation using an
> unevictable node in the GGTT. This severely limits us as we cannot
> remove any inactive ppgtt for new contexts, or under aperture pressure.
> To fix this we need to make the page directory into a first class and
> unbindable vma. Hence, the creation of a custom vma to wrap the page
> directory as opposed to a GEM object.
>
> In this patch, we leave the page directories pinned upon creation.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
> Cc: Mika Kuoppala <mika.kuoppala at linux.intel.com>
> Cc: Matthew Auld <matthew.william.auld at gmail.com>
> ---
> drivers/gpu/drm/i915/i915_gem_gtt.c | 261 ++++++++++++++++------------
> drivers/gpu/drm/i915/i915_gem_gtt.h | 2 +-
> drivers/gpu/drm/i915/i915_vma.h | 7 +
> 3 files changed, 159 insertions(+), 111 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index ea27f799101f..60b5966360b6 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -1640,50 +1640,55 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *base, struct seq_file *m)
> {
> struct gen6_hw_ppgtt *ppgtt = to_gen6_ppgtt(base);
> struct i915_address_space *vm = &base->vm;
> - struct i915_page_table *unused;
> - gen6_pte_t scratch_pte;
> - u32 pd_entry, pte, pde;
> -
> - scratch_pte = vm->pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, 0);
> -
> - gen6_for_all_pdes(unused, &base->pd, pde) {
> - u32 expected;
> - gen6_pte_t *pt_vaddr;
> - const dma_addr_t pt_addr = px_dma(base->pd.page_table[pde]);
> - pd_entry = readl(ppgtt->pd_addr + pde);
> - expected = (GEN6_PDE_ADDR_ENCODE(pt_addr) | GEN6_PDE_VALID);
> -
> - if (pd_entry != expected)
> - seq_printf(m, "\tPDE #%d mismatch: Actual PDE: %x Expected PDE: %x\n",
> - pde,
> - pd_entry,
> - expected);
> - seq_printf(m, "\tPDE: %x\n", pd_entry);
> -
> - pt_vaddr = kmap_atomic_px(base->pd.page_table[pde]);
> -
> - for (pte = 0; pte < GEN6_PTES; pte+=4) {
> - unsigned long va =
> - (pde * PAGE_SIZE * GEN6_PTES) +
> - (pte * PAGE_SIZE);
> + const gen6_pte_t scratch_pte =
> + vm->pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, 0);
> + struct i915_page_table *pt;
> + u32 pte, pde;
> +
> + gen6_for_all_pdes(pt, &base->pd, pde) {
> + gen6_pte_t *vaddr;
> +
> + if (pt == base->vm.scratch_pt)
> + continue;
> +
> + if (i915_vma_is_bound(ppgtt->vma, I915_VMA_GLOBAL_BIND)) {
> + u32 expected =
> + GEN6_PDE_ADDR_ENCODE(px_dma(pt)) |
> + GEN6_PDE_VALID;
> + u32 pd_entry = readl(ppgtt->pd_addr + pde);
> +
> + if (pd_entry != expected)
> + seq_printf(m,
> + "\tPDE #%d mismatch: Actual PDE: %x Expected PDE: %x\n",
> + pde,
> + pd_entry,
> + expected);
> +
> + seq_printf(m, "\tPDE: %x\n", pd_entry);
> + }
> +
> + vaddr = kmap_atomic_px(base->pd.page_table[pde]);
> + for (pte = 0; pte < GEN6_PTES; pte += 4) {
> int i;
> - bool found = false;
> +
> for (i = 0; i < 4; i++)
> - if (pt_vaddr[pte + i] != scratch_pte)
> - found = true;
> - if (!found)
> + if (vaddr[pte + i] != scratch_pte)
> + break;
> + if (i == 4)
> continue;
>
> - seq_printf(m, "\t\t0x%lx [%03d,%04d]: =", va, pde, pte);
> + seq_printf(m, "\t\t(%03d, %04d) %08lx: ",
> + pde, pte,
> + (pde * GEN6_PTES + pte) * PAGE_SIZE);
> for (i = 0; i < 4; i++) {
> - if (pt_vaddr[pte + i] != scratch_pte)
> - seq_printf(m, " %08x", pt_vaddr[pte + i]);
> + if (vaddr[pte + i] != scratch_pte)
> + seq_printf(m, " %08x", vaddr[pte + i]);
> else
> - seq_puts(m, " SCRATCH ");
> + seq_puts(m, " SCRATCH");
> }
> seq_puts(m, "\n");
> }
> - kunmap_atomic(pt_vaddr);
> + kunmap_atomic(vaddr);
> }
> }
>
> @@ -1697,22 +1702,6 @@ static inline void gen6_write_pde(const struct gen6_hw_ppgtt *ppgtt,
> ppgtt->pd_addr + pde);
> }
>
> -/* Write all the page tables found in the ppgtt structure to incrementing page
> - * directories. */
> -static void gen6_write_page_range(struct i915_hw_ppgtt *base,
> - u32 start, u32 length)
> -{
> - struct gen6_hw_ppgtt *ppgtt = to_gen6_ppgtt(base);
> - struct i915_page_table *pt;
> - unsigned int pde;
> -
> - gen6_for_each_pde(pt, &base->pd, start, length, pde)
> - gen6_write_pde(ppgtt, pde, pt);
> -
> - mark_tlbs_dirty(base);
> - gen6_ggtt_invalidate(base->vm.i915);
> -}
> -
> static inline u32 get_pd_offset(struct gen6_hw_ppgtt *ppgtt)
> {
> GEM_BUG_ON(ppgtt->base.pd.base.ggtt_offset & 0x3f);
> @@ -1919,8 +1908,12 @@ static int gen6_alloc_va_range(struct i915_address_space *vm,
>
> gen6_initialize_pt(vm, pt);
> ppgtt->base.pd.page_table[pde] = pt;
> - gen6_write_pde(ppgtt, pde, pt);
> - flush = true;
> +
> + if (i915_vma_is_bound(ppgtt->vma,
> + I915_VMA_GLOBAL_BIND)) {
> + gen6_write_pde(ppgtt, pde, pt);
> + flush = true;
> + }
> }
> }
>
> @@ -1936,8 +1929,11 @@ static int gen6_alloc_va_range(struct i915_address_space *vm,
> return -ENOMEM;
> }
>
> -static int gen6_ppgtt_init_scratch(struct i915_address_space *vm)
> +static int gen6_ppgtt_init_scratch(struct gen6_hw_ppgtt *ppgtt)
> {
> + struct i915_address_space * const vm = &ppgtt->base.vm;
> + struct i915_page_table *unused;
> + u32 pde;
> int ret;
>
> ret = setup_scratch_page(vm, __GFP_HIGHMEM);
> @@ -1951,6 +1947,8 @@ static int gen6_ppgtt_init_scratch(struct i915_address_space *vm)
> }
>
> gen6_initialize_pt(vm, vm->scratch_pt);
> + gen6_for_all_pdes(unused, &ppgtt->base.pd, pde)
> + ppgtt->base.pd.page_table[pde] = vm->scratch_pt;
>
> return 0;
> }
> @@ -1975,52 +1973,110 @@ static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
> {
> struct gen6_hw_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
>
> - drm_mm_remove_node(&ppgtt->node);
> + i915_vma_unpin(ppgtt->vma);
> + i915_vma_destroy(ppgtt->vma);
>
> gen6_ppgtt_free_pd(ppgtt);
> gen6_ppgtt_free_scratch(vm);
> }
>
> -static int gen6_ppgtt_allocate_page_directories(struct gen6_hw_ppgtt *ppgtt)
> +static int pd_vma_set_pages(struct i915_vma *vma)
> {
> - struct drm_i915_private *dev_priv = ppgtt->base.vm.i915;
> - struct i915_ggtt *ggtt = &dev_priv->ggtt;
> - int err;
> + vma->pages = ERR_PTR(-ENODEV);
> + return 0;
> +}
>
> - /* PPGTT PDEs reside in the GGTT and consists of 512 entries. The
> - * allocator works in address space sizes, so it's multiplied by page
> - * size. We allocate at the top of the GTT to avoid fragmentation.
> - */
> - BUG_ON(!drm_mm_initialized(&ggtt->vm.mm));
> +static void pd_vma_clear_pages(struct i915_vma *vma)
> +{
> + GEM_BUG_ON(!vma->pages);
>
> - err = i915_gem_gtt_insert(&ggtt->vm, &ppgtt->node,
> - GEN6_PD_SIZE, GEN6_PD_ALIGN,
> - I915_COLOR_UNEVICTABLE,
> - 0, ggtt->vm.total,
> - PIN_HIGH);
> - if (err)
> - return err;
> + vma->pages = NULL;
> +}
>
> - if (ppgtt->node.start < ggtt->mappable_end)
> - DRM_DEBUG("Forced to use aperture for PDEs\n");
> +static int pd_vma_bind(struct i915_vma *vma,
> + enum i915_cache_level cache_level,
> + u32 unused)
> +{
> + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vma->vm);
> + struct gen6_hw_ppgtt *ppgtt = vma->private;
> + u32 ggtt_offset = i915_ggtt_offset(vma) / PAGE_SIZE;
> + struct i915_page_table *pt;
> + unsigned int pde;
>
> - ppgtt->base.pd.base.ggtt_offset =
> - ppgtt->node.start / PAGE_SIZE * sizeof(gen6_pte_t);
> + ppgtt->base.pd.base.ggtt_offset = ggtt_offset * sizeof(gen6_pte_t);
> + ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm + ggtt_offset;
>
> - ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm +
> - ppgtt->base.pd.base.ggtt_offset / sizeof(gen6_pte_t);
> + gen6_for_all_pdes(pt, &ppgtt->base.pd, pde)
> + gen6_write_pde(ppgtt, pde, pt);
> +
> + mark_tlbs_dirty(&ppgtt->base);
> + gen6_ggtt_invalidate(ppgtt->base.vm.i915);
>
> return 0;
> }
>
> -static void gen6_scratch_va_range(struct gen6_hw_ppgtt *ppgtt,
> - u64 start, u64 length)
> +static void pd_vma_unbind(struct i915_vma *vma)
> {
> - struct i915_page_table *unused;
> - u32 pde;
> +}
> +
> +static const struct i915_vma_ops pd_vma_ops = {
> + .set_pages = pd_vma_set_pages,
> + .clear_pages = pd_vma_clear_pages,
> + .bind_vma = pd_vma_bind,
> + .unbind_vma = pd_vma_unbind,
> +};
> +
> +static struct i915_vma *pd_vma_create(struct gen6_hw_ppgtt *ppgtt, int size)
> +{
> + struct drm_i915_private *i915 = ppgtt->base.vm.i915;
> + struct i915_ggtt *ggtt = &i915->ggtt;
> + struct i915_vma *vma;
> + int i;
>
> - gen6_for_each_pde(unused, &ppgtt->base.pd, start, length, pde)
> - ppgtt->base.pd.page_table[pde] = ppgtt->base.vm.scratch_pt;
> + GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
> + GEM_BUG_ON(size > ggtt->vm.total);
> +
> + vma = kmem_cache_zalloc(i915->vmas, GFP_KERNEL);
> + if (!vma)
> + return ERR_PTR(-ENOMEM);
> +
> + for (i = 0; i < ARRAY_SIZE(vma->last_read); i++)
> + init_request_active(&vma->last_read[i], NULL);
> + init_request_active(&vma->last_fence, NULL);
> +
> + vma->vm = &ggtt->vm;
> + vma->ops = &pd_vma_ops;
> + vma->private = ppgtt;
> +
> + vma->obj = NULL;
> + vma->resv = NULL;
> + vma->size = size;
> + vma->display_alignment = I915_GTT_MIN_ALIGNMENT;
> +
> + vma->fence_size = size;
> + vma->fence_alignment = I915_GTT_MIN_ALIGNMENT;
> +
> + vma->flags |= I915_VMA_GGTT;
> + vma->ggtt_view.type = I915_GGTT_VIEW_ROTATED; /* prevent fencing */
> +
> + INIT_LIST_HEAD(&vma->obj_link);
> + list_add(&vma->vm_link, &vma->vm->unbound_list);
> +
> + return vma;
> +}
> +
> +static int gen6_ppgtt_pin(struct i915_hw_ppgtt *base)
> +{
> + struct gen6_hw_ppgtt *ppgtt = to_gen6_ppgtt(base);
> +
> + /*
> + * PPGTT PDEs reside in the GGTT and consists of 512 entries. The
> + * allocator works in address space sizes, so it's multiplied by page
> + * size. We allocate at the top of the GTT to avoid fragmentation.
> + */
> + return i915_vma_pin(ppgtt->vma,
> + 0, GEN6_PD_ALIGN,
> + PIN_GLOBAL | PIN_HIGH);
> }
>
> static struct i915_hw_ppgtt *gen6_ppgtt_create(struct drm_i915_private *i915)
> @@ -2058,24 +2114,25 @@ static struct i915_hw_ppgtt *gen6_ppgtt_create(struct drm_i915_private *i915)
> else
> BUG();
>
> - err = gen6_ppgtt_init_scratch(&ppgtt->base.vm);
> + err = gen6_ppgtt_init_scratch(ppgtt);
> if (err)
> goto err_free;
>
> - err = gen6_ppgtt_allocate_page_directories(ppgtt);
> - if (err)
> + ppgtt->vma = pd_vma_create(ppgtt, GEN6_PD_SIZE);
> + if (IS_ERR(ppgtt->vma))
err = PTR_ERR()
Reviewed-by: Matthew Auld <matthew.william.auld at gmail.com>
More information about the Intel-gfx
mailing list