[Intel-gfx] [PATCH] drm/i915/gtt: Make gen6 page directories evictable

Matthew Auld matthew.william.auld at gmail.com
Wed Jun 6 23:07:21 UTC 2018


On 6 June 2018 at 07:51, Chris Wilson <chris at chris-wilson.co.uk> wrote:
> Currently all page directories are bound at creation using an
> unevictable node in the GGTT. This severely limits us as we cannot
> remove any inactive ppgtt for new contexts, or under aperture pressure.
> To fix this we need to make the page directory into a first class and
> unbindable vma. Hence, the creation of a custom vma to wrap the page
> directory as opposed to a GEM object.
>
> In this patch, we leave the page directories pinned upon creation.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
> Cc: Mika Kuoppala <mika.kuoppala at linux.intel.com>
> Cc: Matthew Auld <matthew.william.auld at gmail.com>
> ---
>  drivers/gpu/drm/i915/i915_gem_gtt.c | 261 ++++++++++++++++------------
>  drivers/gpu/drm/i915/i915_gem_gtt.h |   2 +-
>  drivers/gpu/drm/i915/i915_vma.h     |   7 +
>  3 files changed, 159 insertions(+), 111 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index ea27f799101f..60b5966360b6 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -1640,50 +1640,55 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *base, struct seq_file *m)
>  {
>         struct gen6_hw_ppgtt *ppgtt = to_gen6_ppgtt(base);
>         struct i915_address_space *vm = &base->vm;
> -       struct i915_page_table *unused;
> -       gen6_pte_t scratch_pte;
> -       u32 pd_entry, pte, pde;
> -
> -       scratch_pte = vm->pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, 0);
> -
> -       gen6_for_all_pdes(unused, &base->pd, pde) {
> -               u32 expected;
> -               gen6_pte_t *pt_vaddr;
> -               const dma_addr_t pt_addr = px_dma(base->pd.page_table[pde]);
> -               pd_entry = readl(ppgtt->pd_addr + pde);
> -               expected = (GEN6_PDE_ADDR_ENCODE(pt_addr) | GEN6_PDE_VALID);
> -
> -               if (pd_entry != expected)
> -                       seq_printf(m, "\tPDE #%d mismatch: Actual PDE: %x Expected PDE: %x\n",
> -                                  pde,
> -                                  pd_entry,
> -                                  expected);
> -               seq_printf(m, "\tPDE: %x\n", pd_entry);
> -
> -               pt_vaddr = kmap_atomic_px(base->pd.page_table[pde]);
> -
> -               for (pte = 0; pte < GEN6_PTES; pte+=4) {
> -                       unsigned long va =
> -                               (pde * PAGE_SIZE * GEN6_PTES) +
> -                               (pte * PAGE_SIZE);
> +       const gen6_pte_t scratch_pte =
> +               vm->pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, 0);
> +       struct i915_page_table *pt;
> +       u32 pte, pde;
> +
> +       gen6_for_all_pdes(pt, &base->pd, pde) {
> +               gen6_pte_t *vaddr;
> +
> +               if (pt == base->vm.scratch_pt)
> +                       continue;
> +
> +               if (i915_vma_is_bound(ppgtt->vma, I915_VMA_GLOBAL_BIND)) {
> +                       u32 expected =
> +                               GEN6_PDE_ADDR_ENCODE(px_dma(pt)) |
> +                               GEN6_PDE_VALID;
> +                       u32 pd_entry = readl(ppgtt->pd_addr + pde);
> +
> +                       if (pd_entry != expected)
> +                               seq_printf(m,
> +                                          "\tPDE #%d mismatch: Actual PDE: %x Expected PDE: %x\n",
> +                                          pde,
> +                                          pd_entry,
> +                                          expected);
> +
> +                       seq_printf(m, "\tPDE: %x\n", pd_entry);
> +               }
> +
> +               vaddr = kmap_atomic_px(base->pd.page_table[pde]);
> +               for (pte = 0; pte < GEN6_PTES; pte += 4) {
>                         int i;
> -                       bool found = false;
> +
>                         for (i = 0; i < 4; i++)
> -                               if (pt_vaddr[pte + i] != scratch_pte)
> -                                       found = true;
> -                       if (!found)
> +                               if (vaddr[pte + i] != scratch_pte)
> +                                       break;
> +                       if (i == 4)
>                                 continue;
>
> -                       seq_printf(m, "\t\t0x%lx [%03d,%04d]: =", va, pde, pte);
> +                       seq_printf(m, "\t\t(%03d, %04d) %08lx: ",
> +                                  pde, pte,
> +                                  (pde * GEN6_PTES + pte) * PAGE_SIZE);
>                         for (i = 0; i < 4; i++) {
> -                               if (pt_vaddr[pte + i] != scratch_pte)
> -                                       seq_printf(m, " %08x", pt_vaddr[pte + i]);
> +                               if (vaddr[pte + i] != scratch_pte)
> +                                       seq_printf(m, " %08x", vaddr[pte + i]);
>                                 else
> -                                       seq_puts(m, "  SCRATCH ");
> +                                       seq_puts(m, "  SCRATCH");
>                         }
>                         seq_puts(m, "\n");
>                 }
> -               kunmap_atomic(pt_vaddr);
> +               kunmap_atomic(vaddr);
>         }
>  }
>
> @@ -1697,22 +1702,6 @@ static inline void gen6_write_pde(const struct gen6_hw_ppgtt *ppgtt,
>                   ppgtt->pd_addr + pde);
>  }
>
> -/* Write all the page tables found in the ppgtt structure to incrementing page
> - * directories. */
> -static void gen6_write_page_range(struct i915_hw_ppgtt *base,
> -                                 u32 start, u32 length)
> -{
> -       struct gen6_hw_ppgtt *ppgtt = to_gen6_ppgtt(base);
> -       struct i915_page_table *pt;
> -       unsigned int pde;
> -
> -       gen6_for_each_pde(pt, &base->pd, start, length, pde)
> -               gen6_write_pde(ppgtt, pde, pt);
> -
> -       mark_tlbs_dirty(base);
> -       gen6_ggtt_invalidate(base->vm.i915);
> -}
> -
>  static inline u32 get_pd_offset(struct gen6_hw_ppgtt *ppgtt)
>  {
>         GEM_BUG_ON(ppgtt->base.pd.base.ggtt_offset & 0x3f);
> @@ -1919,8 +1908,12 @@ static int gen6_alloc_va_range(struct i915_address_space *vm,
>
>                         gen6_initialize_pt(vm, pt);
>                         ppgtt->base.pd.page_table[pde] = pt;
> -                       gen6_write_pde(ppgtt, pde, pt);
> -                       flush = true;
> +
> +                       if (i915_vma_is_bound(ppgtt->vma,
> +                                             I915_VMA_GLOBAL_BIND)) {
> +                               gen6_write_pde(ppgtt, pde, pt);
> +                               flush = true;
> +                       }
>                 }
>         }
>
> @@ -1936,8 +1929,11 @@ static int gen6_alloc_va_range(struct i915_address_space *vm,
>         return -ENOMEM;
>  }
>
> -static int gen6_ppgtt_init_scratch(struct i915_address_space *vm)
> +static int gen6_ppgtt_init_scratch(struct gen6_hw_ppgtt *ppgtt)
>  {
> +       struct i915_address_space * const vm = &ppgtt->base.vm;
> +       struct i915_page_table *unused;
> +       u32 pde;
>         int ret;
>
>         ret = setup_scratch_page(vm, __GFP_HIGHMEM);
> @@ -1951,6 +1947,8 @@ static int gen6_ppgtt_init_scratch(struct i915_address_space *vm)
>         }
>
>         gen6_initialize_pt(vm, vm->scratch_pt);
> +       gen6_for_all_pdes(unused, &ppgtt->base.pd, pde)
> +               ppgtt->base.pd.page_table[pde] = vm->scratch_pt;
>
>         return 0;
>  }
> @@ -1975,52 +1973,110 @@ static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
>  {
>         struct gen6_hw_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
>
> -       drm_mm_remove_node(&ppgtt->node);
> +       i915_vma_unpin(ppgtt->vma);
> +       i915_vma_destroy(ppgtt->vma);
>
>         gen6_ppgtt_free_pd(ppgtt);
>         gen6_ppgtt_free_scratch(vm);
>  }
>
> -static int gen6_ppgtt_allocate_page_directories(struct gen6_hw_ppgtt *ppgtt)
> +static int pd_vma_set_pages(struct i915_vma *vma)
>  {
> -       struct drm_i915_private *dev_priv = ppgtt->base.vm.i915;
> -       struct i915_ggtt *ggtt = &dev_priv->ggtt;
> -       int err;
> +       vma->pages = ERR_PTR(-ENODEV);
> +       return 0;
> +}
>
> -       /* PPGTT PDEs reside in the GGTT and consists of 512 entries. The
> -        * allocator works in address space sizes, so it's multiplied by page
> -        * size. We allocate at the top of the GTT to avoid fragmentation.
> -        */
> -       BUG_ON(!drm_mm_initialized(&ggtt->vm.mm));
> +static void pd_vma_clear_pages(struct i915_vma *vma)
> +{
> +       GEM_BUG_ON(!vma->pages);
>
> -       err = i915_gem_gtt_insert(&ggtt->vm, &ppgtt->node,
> -                                 GEN6_PD_SIZE, GEN6_PD_ALIGN,
> -                                 I915_COLOR_UNEVICTABLE,
> -                                 0, ggtt->vm.total,
> -                                 PIN_HIGH);
> -       if (err)
> -               return err;
> +       vma->pages = NULL;
> +}
>
> -       if (ppgtt->node.start < ggtt->mappable_end)
> -               DRM_DEBUG("Forced to use aperture for PDEs\n");
> +static int pd_vma_bind(struct i915_vma *vma,
> +                      enum i915_cache_level cache_level,
> +                      u32 unused)
> +{
> +       struct i915_ggtt *ggtt = i915_vm_to_ggtt(vma->vm);
> +       struct gen6_hw_ppgtt *ppgtt = vma->private;
> +       u32 ggtt_offset = i915_ggtt_offset(vma) / PAGE_SIZE;
> +       struct i915_page_table *pt;
> +       unsigned int pde;
>
> -       ppgtt->base.pd.base.ggtt_offset =
> -               ppgtt->node.start / PAGE_SIZE * sizeof(gen6_pte_t);
> +       ppgtt->base.pd.base.ggtt_offset = ggtt_offset * sizeof(gen6_pte_t);
> +       ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm + ggtt_offset;
>
> -       ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm +
> -               ppgtt->base.pd.base.ggtt_offset / sizeof(gen6_pte_t);
> +       gen6_for_all_pdes(pt, &ppgtt->base.pd, pde)
> +               gen6_write_pde(ppgtt, pde, pt);
> +
> +       mark_tlbs_dirty(&ppgtt->base);
> +       gen6_ggtt_invalidate(ppgtt->base.vm.i915);
>
>         return 0;
>  }
>
> -static void gen6_scratch_va_range(struct gen6_hw_ppgtt *ppgtt,
> -                                 u64 start, u64 length)
> +static void pd_vma_unbind(struct i915_vma *vma)
>  {
> -       struct i915_page_table *unused;
> -       u32 pde;
> +}
> +
> +static const struct i915_vma_ops pd_vma_ops = {
> +       .set_pages = pd_vma_set_pages,
> +       .clear_pages = pd_vma_clear_pages,
> +       .bind_vma = pd_vma_bind,
> +       .unbind_vma = pd_vma_unbind,
> +};
> +
> +static struct i915_vma *pd_vma_create(struct gen6_hw_ppgtt *ppgtt, int size)
> +{
> +       struct drm_i915_private *i915 = ppgtt->base.vm.i915;
> +       struct i915_ggtt *ggtt = &i915->ggtt;
> +       struct i915_vma *vma;
> +       int i;
>
> -       gen6_for_each_pde(unused, &ppgtt->base.pd, start, length, pde)
> -               ppgtt->base.pd.page_table[pde] = ppgtt->base.vm.scratch_pt;
> +       GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
> +       GEM_BUG_ON(size > ggtt->vm.total);
> +
> +       vma = kmem_cache_zalloc(i915->vmas, GFP_KERNEL);
> +       if (!vma)
> +               return ERR_PTR(-ENOMEM);
> +
> +       for (i = 0; i < ARRAY_SIZE(vma->last_read); i++)
> +               init_request_active(&vma->last_read[i], NULL);
> +       init_request_active(&vma->last_fence, NULL);
> +
> +       vma->vm = &ggtt->vm;
> +       vma->ops = &pd_vma_ops;
> +       vma->private = ppgtt;
> +
> +       vma->obj = NULL;
> +       vma->resv = NULL;
> +       vma->size = size;
> +       vma->display_alignment = I915_GTT_MIN_ALIGNMENT;
> +
> +       vma->fence_size = size;
> +       vma->fence_alignment = I915_GTT_MIN_ALIGNMENT;
> +
> +       vma->flags |= I915_VMA_GGTT;
> +       vma->ggtt_view.type = I915_GGTT_VIEW_ROTATED; /* prevent fencing */
> +
> +       INIT_LIST_HEAD(&vma->obj_link);
> +       list_add(&vma->vm_link, &vma->vm->unbound_list);
> +
> +       return vma;
> +}
> +
> +static int gen6_ppgtt_pin(struct i915_hw_ppgtt *base)
> +{
> +       struct gen6_hw_ppgtt *ppgtt = to_gen6_ppgtt(base);
> +
> +       /*
> +        * PPGTT PDEs reside in the GGTT and consists of 512 entries. The
> +        * allocator works in address space sizes, so it's multiplied by page
> +        * size. We allocate at the top of the GTT to avoid fragmentation.
> +        */
> +       return i915_vma_pin(ppgtt->vma,
> +                           0, GEN6_PD_ALIGN,
> +                           PIN_GLOBAL | PIN_HIGH);
>  }
>
>  static struct i915_hw_ppgtt *gen6_ppgtt_create(struct drm_i915_private *i915)
> @@ -2058,24 +2114,25 @@ static struct i915_hw_ppgtt *gen6_ppgtt_create(struct drm_i915_private *i915)
>         else
>                 BUG();
>
> -       err = gen6_ppgtt_init_scratch(&ppgtt->base.vm);
> +       err = gen6_ppgtt_init_scratch(ppgtt);
>         if (err)
>                 goto err_free;
>
> -       err = gen6_ppgtt_allocate_page_directories(ppgtt);
> -       if (err)
> +       ppgtt->vma = pd_vma_create(ppgtt, GEN6_PD_SIZE);
> +       if (IS_ERR(ppgtt->vma))

err = PTR_ERR()

Reviewed-by: Matthew Auld <matthew.william.auld at gmail.com>


More information about the Intel-gfx mailing list