[Intel-gfx] [PATCH 11/21] drm/i915: support 1G pages for the 48b PPGTT

Matthew Auld matthew.william.auld at gmail.com
Wed Jul 26 15:32:09 UTC 2017


On 25 July 2017 at 20:53, Chris Wilson <chris at chris-wilson.co.uk> wrote:
> Quoting Matthew Auld (2017-07-25 20:21:23)
>> Support inserting 1G gtt pages into the 48b PPGTT.
>>
>> v2: sanity check sg->length against page_size
>>
>> Signed-off-by: Matthew Auld <matthew.auld at intel.com>
>> Cc: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
>> Cc: Chris Wilson <chris at chris-wilson.co.uk>
>> ---
>>  drivers/gpu/drm/i915/i915_gem_gtt.c | 73 +++++++++++++++++++++++++++++++++++--
>>  drivers/gpu/drm/i915/i915_gem_gtt.h |  2 +
>>  2 files changed, 71 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
>> index 385cd85f47bb..acd0c0d1ba8d 100644
>> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
>> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
>> @@ -945,6 +945,66 @@ static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm,
>>                                       cache_level);
>>  }
>>
>> +static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma,
>> +                                          struct i915_page_directory_pointer **pdps,
>> +                                          struct sgt_dma *iter,
>> +                                          enum i915_cache_level cache_level)
>> +{
>> +       const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level);
>> +       u64 start = vma->node.start;
>> +
>> +       do {
>> +               struct gen8_insert_pte idx = gen8_insert_pte(start);
>> +               struct i915_page_directory_pointer *pdp = pdps[idx.pml4e];
>> +               struct i915_page_directory *pd = pdp->page_directory[idx.pdpe];
>> +               struct i915_page_table *pt = pd->page_table[idx.pde];
>> +               dma_addr_t rem = iter->max - iter->dma;
>
> We don't need to recalculate rem each loop, it dwindles until we advance
> the iter and then we reset rem = iter->sg->length.
>
>> +               unsigned int page_size;
>> +               gen8_pte_t encode = pte_encode;
>> +               gen8_pte_t *vaddr;
>> +               u16 index, max;
>> +
>> +               if (unlikely(vma->page_sizes.sg & I915_GTT_PAGE_SIZE_1G) &&
>> +                   IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_1G) &&
>> +                   rem >= I915_GTT_PAGE_SIZE_1G && !(idx.pte | idx.pde)) {
>> +                       vaddr = kmap_atomic_px(pdp);
>> +                       index = idx.pdpe;
>> +                       max = GEN8_PML4ES_PER_PML4;
>> +                       page_size = I915_GTT_PAGE_SIZE_1G;
>> +                       encode |= GEN8_PDPE_PS_1G;
>> +               } else {
>> +                       vaddr = kmap_atomic_px(pt);
>> +                       index = idx.pte;
>> +                       max = GEN8_PTES;
>> +                       page_size = I915_GTT_PAGE_SIZE;
>> +               }
>> +
>> +               do {
>> +                       GEM_BUG_ON(iter->sg->length < page_size);
>> +                       vaddr[index++] = encode | iter->dma;
>> +
>> +                       start += page_size;
>> +                       iter->dma += page_size;
>
> rem -= page_size;
>
>> +                       if (iter->dma >= iter->max) {
>> +                               iter->sg = __sg_next(iter->sg);
>> +                               if (!iter->sg)
>> +                                       break;
>> +
> rem = iter->sg->length;
>> +                               iter->dma = sg_dma_address(iter->sg);
>> +                               iter->max = iter->dma + iter->sg->length;
> iter->max = iter->dma + rem;
>
> if (rem < page_size)
>         break;
>> +
>> +                               if (unlikely(!IS_ALIGNED(iter->dma, page_size)))
>> +                                       break;
>> +                       }
>> +                       rem = iter->max - iter->dma;
>> +
>> +               } while (rem >= page_size && index < max);
>
> Check against rem is now redundant.

What about an sg entry which spans multiple page sizes? Maybe I'm
being overly paranoid, but my thinking was that shmem could
potentially to give us two continuous 2M pages, for an object of size
2M + 4K, which after coalescing would give us a single sg entry.

I'll try to address the other comments, thanks.

>
> Then to review the impact upon later patches.
> -Chris
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx


More information about the Intel-gfx mailing list