[Intel-gfx] [PATCH 10/12] drm/i915/bdw: Add 4 level support in insert_entries and clear_range

akash goel akash.goels at gmail.com
Tue Mar 3 08:39:36 PST 2015


On Fri, Feb 20, 2015 at 11:16 PM, Michel Thierry
<michel.thierry at intel.com> wrote:
> When 48b is enabled, gen8_ppgtt_insert_entries needs to read the Page Map
> Level 4 (PML4), before it selects which Page Directory Pointer (PDP)
> it will write to.
>
> Similarly, gen8_ppgtt_clear_range needs to get the correct PDP/PD range.
>
> Also add a scratch page for PML4.
>
> This patch was inspired by Ben's "Depend exclusively on map and
> unmap_vma".
>
> Signed-off-by: Michel Thierry <michel.thierry at intel.com>
> ---
>  drivers/gpu/drm/i915/i915_gem_gtt.c | 66 ++++++++++++++++++++++++++++++-------
>  drivers/gpu/drm/i915/i915_gem_gtt.h | 12 +++++++
>  2 files changed, 67 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index a1396cb..0954827 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -676,24 +676,52 @@ static void gen8_ppgtt_clear_pte_range(struct i915_page_directory_pointer_entry
>         }
>  }
>
> +static void gen8_ppgtt_clear_range_4lvl(struct i915_hw_ppgtt *ppgtt,
> +                                       gen8_gtt_pte_t scratch_pte,
> +                                       uint64_t start,
> +                                       uint64_t length)
> +{
> +       struct i915_page_directory_pointer_entry *pdp;
> +       uint64_t templ4, templ3, pml4e, pdpe;
> +
> +       gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, templ4, pml4e) {
> +               struct i915_page_directory_entry *pd;
> +               uint64_t pdp_len = gen8_clamp_pdp(start, length);
> +               uint64_t pdp_start = start;
> +
> +               gen8_for_each_pdpe(pd, pdp, pdp_start, pdp_len, templ3, pdpe) {

The 'gen8_ppgtt_clear_pte_range' function is equipped to switch to a
new page directory appropriately.
So having just an outer loop of pml4 entries should suffice, with the
use of  gen8_clamp_pdp.
The inner loop  'gen8_for_each_pdpe' is not really needed.

> +                       uint64_t pd_len = gen8_clamp_pd(pdp_start, pdp_len);
> +                       uint64_t pd_start = pdp_start;
> +
> +                       gen8_ppgtt_clear_pte_range(pdp, pd_start, pd_len,
> +                                                  scratch_pte, !HAS_LLC(ppgtt->base.dev));
> +               }
> +       }
> +}
> +
>  static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
> -                                  uint64_t start,
> -                                  uint64_t length,
> +                                  uint64_t start, uint64_t length,
>                                    bool use_scratch)
>  {
>         struct i915_hw_ppgtt *ppgtt =
> -               container_of(vm, struct i915_hw_ppgtt, base);
> -       struct i915_page_directory_pointer_entry *pdp = &ppgtt->pdp; /* FIXME: 48b */
> -
> +                       container_of(vm, struct i915_hw_ppgtt, base);
>         gen8_gtt_pte_t scratch_pte = gen8_pte_encode(ppgtt->base.scratch.addr,
>                                                      I915_CACHE_LLC, use_scratch);
>
> -       gen8_ppgtt_clear_pte_range(pdp, start, length, scratch_pte, !HAS_LLC(vm->dev));
> +       if (!USES_FULL_48BIT_PPGTT(vm->dev)) {
> +               struct i915_page_directory_pointer_entry *pdp = &ppgtt->pdp;
> +
> +               gen8_ppgtt_clear_pte_range(pdp, start, length, scratch_pte,
> +                                          !HAS_LLC(ppgtt->base.dev));
> +       } else {
> +               gen8_ppgtt_clear_range_4lvl(ppgtt, scratch_pte, start, length);
> +       }
>  }
>
>  static void gen8_ppgtt_insert_pte_entries(struct i915_page_directory_pointer_entry *pdp,
>                                           struct sg_page_iter *sg_iter,
>                                           uint64_t start,
> +                                         size_t pages,
>                                           enum i915_cache_level cache_level,
>                                           const bool flush)
>  {
> @@ -704,7 +732,7 @@ static void gen8_ppgtt_insert_pte_entries(struct i915_page_directory_pointer_ent
>
>         pt_vaddr = NULL;
>
> -       while (__sg_page_iter_next(sg_iter)) {
> +       while (pages-- && __sg_page_iter_next(sg_iter)) {
>                 if (pt_vaddr == NULL) {
>                         struct i915_page_directory_entry *pd = pdp->page_directory[pdpe];
>                         struct i915_page_table_entry *pt = pd->page_tables[pde];
> @@ -742,11 +770,26 @@ static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
>                                       u32 unused)
>  {
>         struct i915_hw_ppgtt *ppgtt = container_of(vm, struct i915_hw_ppgtt, base);
> -       struct i915_page_directory_pointer_entry *pdp = &ppgtt->pdp; /* FIXME: 48b */
> +       struct i915_page_directory_pointer_entry *pdp;
>         struct sg_page_iter sg_iter;
>
>         __sg_page_iter_start(&sg_iter, pages->sgl, sg_nents(pages->sgl), 0);
> -       gen8_ppgtt_insert_pte_entries(pdp, &sg_iter, start, cache_level, !HAS_LLC(vm->dev));
> +
> +       if (!USES_FULL_48BIT_PPGTT(vm->dev)) {
> +               pdp = &ppgtt->pdp;
> +               gen8_ppgtt_insert_pte_entries(pdp, &sg_iter, start,
> +                               sg_nents(pages->sgl),
> +                               cache_level, !HAS_LLC(vm->dev));
> +       } else {
> +               struct i915_pml4 *pml4;
> +               unsigned pml4e = gen8_pml4e_index(start);
> +
> +               pml4 = &ppgtt->pml4;
> +               pdp = pml4->pdps[pml4e];

Since an object could get mapped, such that it straddles the pdp
boundary & 'gen8_ppgtt_insert_pte_entries'
can't switch on its own to a new page directory pointer table (pdp), a
modification is required here to have a
loop of 'gen8_for_each_pml4e', with the use of  'gen8_clamp_pdp',
similar to how gen8_ppgtt_clear_range_4lvl
has been implemented.

> +               gen8_ppgtt_insert_pte_entries(pdp, &sg_iter, start,
> +                               sg_nents(pages->sgl),
> +                               cache_level, !HAS_LLC(vm->dev));
> +       }
>  }
>
>  static void __gen8_do_map_pt(gen8_ppgtt_pde_t * const pde,
> @@ -1185,7 +1228,8 @@ static int __gen8_alloc_vma_range_3lvl(struct i915_address_space *vm,
>                         if (sg_iter) {
>                                 BUG_ON(!sg_iter->__nents);
>                                 gen8_ppgtt_insert_pte_entries(pdp, sg_iter, pd_start,
> -                                                             flags, !HAS_LLC(vm->dev));
> +                                               gen8_pte_count(pd_start, pd_len),
> +                                               flags, !HAS_LLC(vm->dev));
>                         }
>                         set_bit(pde, pd->used_pdes);
>                 }
> @@ -1330,7 +1374,7 @@ static int gen8_ppgtt_init_common(struct i915_hw_ppgtt *ppgtt, uint64_t size)
>         if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) {
>                 int ret = pml4_init(ppgtt);
>                 if (ret) {
> -                       unmap_and_free_pt(ppgtt->scratch_pd, ppgtt->base.dev);
> +                       unmap_and_free_pt(ppgtt->scratch_pml4, ppgtt->base.dev);
>                         return ret;
>                 }
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> index 1f4cdb1..602d446c 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> @@ -332,6 +332,7 @@ struct i915_hw_ppgtt {
>         union {
>                 struct i915_page_table_entry *scratch_pt;
>                 struct i915_page_table_entry *scratch_pd; /* Just need the daddr */
> +               struct i915_page_table_entry *scratch_pml4;
>         };
>
>         struct drm_i915_file_private *file_priv;
> @@ -452,6 +453,17 @@ static inline uint64_t gen8_clamp_pd(uint64_t start, uint64_t length)
>         return next_pd - start;
>  }
>
> +/* Clamp length to the next page_directory pointer boundary */
> +static inline uint64_t gen8_clamp_pdp(uint64_t start, uint64_t length)
> +{
> +       uint64_t next_pdp = ALIGN(start + 1, 1ULL << GEN8_PML4E_SHIFT);
> +
> +       if (next_pdp > (start + length))
> +               return length;
> +
> +       return next_pdp - start;
> +}
> +
>  static inline uint32_t gen8_pte_index(uint64_t address)
>  {
>         return i915_pte_index(address, GEN8_PDE_SHIFT);
> --
> 2.1.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx


More information about the Intel-gfx mailing list