[Intel-gfx] [PATCH 3/3] drm/i915/gtt: Setup phys pages for 3lvl pdps
Chris Wilson
chris at chris-wilson.co.uk
Tue Jun 18 16:37:22 UTC 2019
Quoting Mika Kuoppala (2019-06-18 17:17:31)
> If we setup backing phys page for 3lvl pdps, even they
even though they
> are not used, we lose 5 pages per ppgtt.
>
> Trading this memory on bsw, we gain more common code paths for all
> gen8+ directory manipulation. And those paths are now void of checks
> for page directory type, making the hot paths faster.
>
> Signed-off-by: Mika Kuoppala <mika.kuoppala at linux.intel.com>
> ---
> drivers/gpu/drm/i915/i915_gem_gtt.c | 106 +++++++++++++++++-----------
> 1 file changed, 66 insertions(+), 40 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index b521b1ddd19b..ea78302c6348 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -715,22 +715,14 @@ static struct i915_page_directory *alloc_pd(struct i915_address_space *vm)
> return pd;
> }
>
> -static inline bool pd_has_phys_page(const struct i915_page_directory * const pd)
> -{
> - return pd->base.page;
> -}
> -
> static void free_pd(struct i915_address_space *vm,
> struct i915_page_directory *pd)
> {
> - if (likely(pd_has_phys_page(pd)))
> - cleanup_page_dma(vm, &pd->base);
> -
> + cleanup_page_dma(vm, &pd->base);
> kfree(pd);
> }
>
> #define init_pd(vm, pd, to) { \
> - GEM_DEBUG_BUG_ON(!pd_has_phys_page(pd)); \
> fill_px((vm), (pd), gen8_pde_encode(px_dma(to), I915_CACHE_LLC)); \
> memset_p((pd)->entry, (to), 512); \
> }
> @@ -1539,6 +1531,50 @@ static void ppgtt_init(struct drm_i915_private *i915,
> ppgtt->vm.vma_ops.clear_pages = clear_pages;
> }
>
> +static void init_pd_n(struct i915_address_space *vm,
> + struct i915_page_directory *pd,
> + struct i915_page_directory *to,
> + const unsigned int entries)
> +{
> + const u64 daddr = gen8_pde_encode(px_dma(to), I915_CACHE_LLC);
> + u64 * const vaddr = kmap_atomic(pd->base.page);
> +
> + memset64(vaddr, daddr, entries);
> + kunmap_atomic(vaddr);
> +
> + memset_p(pd->entry, to, entries);
> +}
> +
> +static struct i915_page_directory *
> +gen8_alloc_top_pd(struct i915_address_space *vm)
> +{
> + struct i915_page_directory *pd;
> +
> + if (i915_vm_is_4lvl(vm)) {
> + pd = alloc_pd(vm);
> + if (!IS_ERR(pd))
> + init_pd(vm, pd, vm->scratch_pdp);
> +
> + return pd;
> + }
> +
> + /* 3lvl */
> + pd = __alloc_pd();
> + if (!pd)
> + return ERR_PTR(-ENOMEM);
> +
> + pd->entry[GEN8_3LVL_PDPES] = NULL;
> +
> + if (unlikely(setup_page_dma(vm, &pd->base))) {
> + kfree(pd);
> + return ERR_PTR(-ENOMEM);
> + }
> +
> + init_pd_n(vm, pd, vm->scratch_pd, GEN8_3LVL_PDPES);
> +
> + return pd;
> +}
> +
> /*
> * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers
> * with a net effect resembling a 2-level page table in normal x86 terms. Each
> @@ -1548,6 +1584,7 @@ static void ppgtt_init(struct drm_i915_private *i915,
> */
> static struct i915_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915)
> {
> + struct i915_address_space *vm;
> struct i915_ppgtt *ppgtt;
> int err;
>
> @@ -1557,70 +1594,59 @@ static struct i915_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915)
>
> ppgtt_init(i915, ppgtt);
>
> + vm = &ppgtt->vm;
Been having this debate with Tursulin, whether or not it is more
confusing to have a local alias here. I think on reading it, it is much
clearer that we are setting up one object if we use ppgtt->vm.foo than
it is to alternate between ppgtt->foo and vm->bar.
I'd suggest leaving it as ppgtt->vm.foo in this patch.
-Chris
More information about the Intel-gfx
mailing list