[Intel-gfx] [PATCH 01/11] drm/i915/gtt: Use shallow dma pages for scratch

Mika Kuoppala mika.kuoppala at linux.intel.com
Tue Jul 9 12:41:37 UTC 2019


Chris Wilson <chris at chris-wilson.co.uk> writes:

> Quoting Mika Kuoppala (2019-07-09 13:24:27)
>> Chris Wilson <chris at chris-wilson.co.uk> writes:
>> 
>> > We only use the dma pages for scratch, and so do not need to allocate
>> > the extra storage for the shadow page directory.
>> >
>> > Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
>> > Cc: Mika Kuoppala <mika.kuoppala at linux.intel.com>
>> > ---
>> >  drivers/gpu/drm/i915/i915_gem_gtt.c | 192 ++++++++++++----------------
>> >  drivers/gpu/drm/i915/i915_gem_gtt.h |   6 +-
>> >  2 files changed, 85 insertions(+), 113 deletions(-)
>> >
>> > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
>> > index 236c964dd761..937236913e70 100644
>> > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
>> > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
>> > @@ -594,25 +594,17 @@ static void cleanup_page_dma(struct i915_address_space *vm,
>> >  
>> >  #define kmap_atomic_px(px) kmap_atomic(px_base(px)->page)
>> >  
>> > -#define fill_px(vm, px, v) fill_page_dma((vm), px_base(px), (v))
>> > -#define fill32_px(vm, px, v) fill_page_dma_32((vm), px_base(px), (v))
>> > +#define fill_px(px, v) fill_page_dma(px_base(px), (v))
>> > +#define fill32_px(px, v) fill_page_dma_32(px_base(px), (v))
>> >  
>> > -static void fill_page_dma(struct i915_address_space *vm,
>> > -                       struct i915_page_dma *p,
>> > -                       const u64 val)
>> > +static void fill_page_dma(struct i915_page_dma *p, const u64 val)
>> >  {
>> > -     u64 * const vaddr = kmap_atomic(p->page);
>> > -
>> > -     memset64(vaddr, val, PAGE_SIZE / sizeof(val));
>> > -
>> > -     kunmap_atomic(vaddr);
>> > +     kunmap_atomic(memset64(kmap_atomic(p->page), val, I915_PDES));
>> 
>> Neat.
>> 
>> I would go for 512 instead of I915_PDES. There is no magic
>> and there never will be magic as it is as const as carved into stone.
>
> I was just going with I915_PDES and I915_PDE_MASK throughout. Later this
> one becomes count, fwiw.
>> 
>> >  }
>> >  
>> > -static void fill_page_dma_32(struct i915_address_space *vm,
>> > -                          struct i915_page_dma *p,
>> > -                          const u32 v)
>> > +static void fill_page_dma_32(struct i915_page_dma *p, const u32 v)
>> >  {
>> > -     fill_page_dma(vm, p, (u64)v << 32 | v);
>> > +     fill_page_dma(p, (u64)v << 32 | v);
>> >  }
>> >  
>> >  static int
>> > @@ -687,6 +679,21 @@ static void cleanup_scratch_page(struct i915_address_space *vm)
>> >       __free_pages(p->page, order);
>> >  }
>> >  
>> > +static void free_scratch(struct i915_address_space *vm)
>> > +{
>> > +     if (!vm->scratch_page.daddr) /* set to 0 on clones */
>> > +             return;
>> > +
>> > +     if (vm->scratch_pdp.daddr)
>> > +             cleanup_page_dma(vm, &vm->scratch_pdp);
>> > +     if (vm->scratch_pd.daddr)
>> > +             cleanup_page_dma(vm, &vm->scratch_pd);
>> > +     if (vm->scratch_pt.daddr)
>> > +             cleanup_page_dma(vm, &vm->scratch_pt);
>> > +
>> > +     cleanup_scratch_page(vm);
>> > +}
>> > +
>> >  static struct i915_page_table *alloc_pt(struct i915_address_space *vm)
>> >  {
>> >       struct i915_page_table *pt;
>> > @@ -711,18 +718,6 @@ static void free_pt(struct i915_address_space *vm, struct i915_page_table *pt)
>> >       kfree(pt);
>> >  }
>> >  
>> > -static void gen8_initialize_pt(struct i915_address_space *vm,
>> > -                            struct i915_page_table *pt)
>> > -{
>> > -     fill_px(vm, pt, vm->scratch_pte);
>> > -}
>> > -
>> > -static void gen6_initialize_pt(struct i915_address_space *vm,
>> > -                            struct i915_page_table *pt)
>> > -{
>> > -     fill32_px(vm, pt, vm->scratch_pte);
>> > -}
>> > -
>> >  static struct i915_page_directory *__alloc_pd(void)
>> >  {
>> >       struct i915_page_directory *pd;
>> > @@ -765,9 +760,11 @@ static void free_pd(struct i915_address_space *vm,
>> >       kfree(pd);
>> >  }
>> >  
>> > -#define init_pd(vm, pd, to) {                                        \
>> > -     fill_px((vm), (pd), gen8_pde_encode(px_dma(to), I915_CACHE_LLC)); \
>> > -     memset_p((pd)->entry, (to), 512);                               \
>> > +static void init_pd(struct i915_page_directory *pd,
>> > +                 struct i915_page_dma *scratch)
>> > +{
>> > +     fill_px(pd, gen8_pde_encode(scratch->daddr, I915_CACHE_LLC));
>> > +     memset_p(pd->entry, scratch, 512);
>> >  }
>> >  
>> >  static inline void
>> > @@ -869,12 +866,11 @@ static void gen8_ppgtt_clear_pd(struct i915_address_space *vm,
>> >       u32 pde;
>> >  
>> >       gen8_for_each_pde(pt, pd, start, length, pde) {
>> > -             GEM_BUG_ON(pt == vm->scratch_pt);
>> > +             GEM_BUG_ON(px_base(pt) == &vm->scratch_pt);
>> >  
>> >               atomic_inc(&pt->used);
>> >               gen8_ppgtt_clear_pt(vm, pt, start, length);
>> > -             if (release_pd_entry(pd, pde, &pt->used,
>> > -                                  px_base(vm->scratch_pt)))
>> > +             if (release_pd_entry(pd, pde, &pt->used, &vm->scratch_pt))
>> >                       free_pt(vm, pt);
>> >       }
>> >  }
>> > @@ -890,12 +886,11 @@ static void gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
>> >       unsigned int pdpe;
>> >  
>> >       gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
>> > -             GEM_BUG_ON(pd == vm->scratch_pd);
>> > +             GEM_BUG_ON(px_base(pd) == &vm->scratch_pd);
>> 
>> Perhaps future will bring pd_points_scratch(pd).
>> 
>> Now the intriguing, bordering irritating, question in my mind is
>> that can we fold the scratch_pd and scratch_pdp to be the same thing.
>
> No, we can fold the scratch_pd to be the same (dma wise) as they do need
> to end up at the scratch_pte. And sadly we can't use the scratch_pte as
> the filler for scratch_pd.

Oh indeed. it needs to be hierarchy even on upper level to break
out. *blush*

>
>> Patch lgtm with some dislike towards I915_PDES,
>
> I'm not keen on it tbh. But the mix of alternating between 512/0x1ff
> does suggest to use some name.

512 everywhere.
-Mika


More information about the Intel-gfx mailing list