[PATCH 4/5] drm/panfrost: Add support for GPU heap allocations

Fri Jul 19 14:27:34 UTC 2019

On Thu, Jul 18, 2019 at 9:03 AM Steven Price <steven.price at arm.com> wrote:
>
> On 17/07/2019 19:33, Rob Herring wrote:
> > The midgard/bifrost GPUs need to allocate GPU heap memory which is
> > allocated on GPU page faults and not pinned in memory. The vendor driver
> > calls this functionality GROW_ON_GPF.
> >
> > This implementation assumes that BOs allocated with the
> > PANFROST_BO_NOEXEC flag are never mmapped or exported. Both of those may
> > actually work, but I'm unsure if there's some interaction there. It
> > would cause the whole object to be pinned in memory which would defeat
> > the point of this.
> >
> > On faults, we map in 2MB at a time in order to utilize huge pages (if
> > enabled). Currently, once we've mapped pages in, they are only unmapped
> > if the BO is freed. Once we add shrinker support, we can unmap pages
> > with the shrinker.
> >
> > Cc: Tomeu Vizoso <tomeu.vizoso at collabora.com>
> > Cc: Boris Brezillon <boris.brezillon at collabora.com>
> > Cc: Robin Murphy <robin.murphy at arm.com>
> > Cc: Steven Price <steven.price at arm.com>
> > Cc: Alyssa Rosenzweig <alyssa at rosenzweig.io>
> > Signed-off-by: Rob Herring <robh at kernel.org>
> > ---
> >  drivers/gpu/drm/panfrost/TODO           |   2 -
> >  drivers/gpu/drm/panfrost/panfrost_drv.c |   2 +-
> >  drivers/gpu/drm/panfrost/panfrost_gem.c |  14 ++-
> >  drivers/gpu/drm/panfrost/panfrost_gem.h |   8 ++
> >  drivers/gpu/drm/panfrost/panfrost_mmu.c | 114 +++++++++++++++++++++---
> >  include/uapi/drm/panfrost_drm.h         |   1 +
> >  6 files changed, 125 insertions(+), 16 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/panfrost/TODO b/drivers/gpu/drm/panfrost/TODO
> > index c2e44add37d8..64129bf73933 100644
> > --- a/drivers/gpu/drm/panfrost/TODO
> > +++ b/drivers/gpu/drm/panfrost/TODO
> > @@ -14,8 +14,6 @@
> >    The hard part is handling when more address spaces are needed than what
> >    the h/w provides.
> >
> > -- Support pinning pages on demand (GPU page faults).
> > -
> >  - Support userspace controlled GPU virtual addresses. Needed for Vulkan. (Tomeu)
> >
> >  - Support for madvise and a shrinker.
> > diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c
> > index b91e991bc6a3..9e87d0060202 100644
> > --- a/drivers/gpu/drm/panfrost/panfrost_drv.c
> > +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
> > @@ -50,7 +50,7 @@ static int panfrost_ioctl_create_bo(struct drm_device *dev, void *data,
> >       struct drm_panfrost_create_bo *args = data;
> >
> >       if (!args->size || args->pad ||
> > -         (args->flags & ~PANFROST_BO_NOEXEC))
> > +         (args->flags & ~(PANFROST_BO_NOEXEC | PANFROST_BO_HEAP)))
> >               return -EINVAL;
> >
> >       bo = panfrost_gem_create_with_handle(file, dev, args->size, args->flags,
> > diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.c b/drivers/gpu/drm/panfrost/panfrost_gem.c
> > index 37ffec8391da..528396000038 100644
> > --- a/drivers/gpu/drm/panfrost/panfrost_gem.c
> > +++ b/drivers/gpu/drm/panfrost/panfrost_gem.c
> > @@ -87,7 +87,10 @@ static int panfrost_gem_map(struct panfrost_device *pfdev, struct panfrost_gem_o
> >       if (ret)
> >               return ret;
> >
> > -     return panfrost_mmu_map(bo);
> > +     if (!bo->is_heap)
> > +             ret = panfrost_mmu_map(bo);
> > +
> > +     return ret;
> >  }
> >
> >  struct panfrost_gem_object *
> > @@ -101,7 +104,11 @@ panfrost_gem_create_with_handle(struct drm_file *file_priv,
> >       struct drm_gem_shmem_object *shmem;
> >       struct panfrost_gem_object *bo;
> >
> > -     size = roundup(size, PAGE_SIZE);
> > +     /* Round up heap allocations to 2MB to keep fault handling simple */
> > +     if (flags & PANFROST_BO_HEAP)
> > +             size = roundup(size, SZ_2M);
> > +     else
> > +             size = roundup(size, PAGE_SIZE);
> >
> >       shmem = drm_gem_shmem_create_with_handle(file_priv, dev, size, handle);
> >       if (IS_ERR(shmem))
> > @@ -109,6 +116,9 @@ panfrost_gem_create_with_handle(struct drm_file *file_priv,
> >
> >       bo = to_panfrost_bo(&shmem->base);
> >       bo->noexec = !!(flags & PANFROST_BO_NOEXEC);
> > +     bo->is_heap = !!(flags & PANFROST_BO_HEAP);
> > +     if (bo->is_heap)
> > +             bo->noexec = true;
>
> While I agree an executable heap is pretty weird, I'd prefer making this
> explicit - i.e. failing the allocation if the flags don't make sense.

Seems a bit strange too to always have to set NOEXEC when HEAP is set.
There's not really any reason to reject setting just HEAP.

[...]

> > +int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as, u64 addr)
> > +{
> > +     int ret, i;
> > +     struct drm_mm_node *node;
> > +     struct panfrost_gem_object *bo;
> > +     struct address_space *mapping;
> > +     pgoff_t page_offset;
> > +     struct sg_table sgt = {};
> > +     struct page **pages;
> > +
> > +     node = addr_to_drm_mm_node(pfdev, as, addr);
> > +     if (!node)
> > +             return -ENOENT;
> > +
> > +     bo = drm_mm_node_to_panfrost_bo(node);
> > +     if (!bo->is_heap) {
> > +             dev_WARN(pfdev->dev, "matching BO is not heap type (GPU VA = %llx)",
> > +                      node->start << PAGE_SHIFT);
> > +             return -EINVAL;
> > +     }
> > +     /* Assume 2MB alignment and size multiple */
> > +     addr &= ~((u64)SZ_2M - 1);
> > +     page_offset = addr >> PAGE_SHIFT;
> > +     page_offset -= node->start;
> > +
> > +     pages = kvmalloc_array(NUM_FAULT_PAGES, sizeof(struct page *), GFP_KERNEL);
> > +     if (!pages)
> > +             return -ENOMEM;
> > +
> > +     mapping = bo->base.base.filp->f_mapping;
> > +     mapping_set_unevictable(mapping);
> > +
> > +     for (i = 0; i < NUM_FAULT_PAGES; i++) {
> > +             pages[i] = shmem_read_mapping_page(mapping, page_offset + i);
> > +             if (IS_ERR(pages[i])) {
> > +                     ret = PTR_ERR(pages[i]);
> > +                     goto err_pages;
> > +             }
> > +     }
> > +
> > +     ret = sg_alloc_table_from_pages(&sgt, pages, NUM_FAULT_PAGES, 0,
> > +                                     SZ_2M, GFP_KERNEL);
> > +     if (ret)
> > +             goto err_pages;
> > +
> > +     if (dma_map_sg(pfdev->dev, sgt.sgl, sgt.nents, DMA_BIDIRECTIONAL) == 0) {
> > +             ret = -EINVAL;
> > +             goto err_map;
> > +     }
> > +
> > +     mmu_map_sg(pfdev, addr, IOMMU_WRITE | IOMMU_READ | IOMMU_NOEXEC, &sgt);
> > +
> > +     mmu_write(pfdev, MMU_INT_CLEAR, BIT(as));
> > +     bo->is_mapped = true;
> > +
> > +     dev_dbg(pfdev->dev, "mapped page fault @ %llx", addr);
> > +
> > +     return 0;
>
> You still need to free sgt and pages - so this should be:
>
> ret = 0;
>
> to fall through to the clean up below:

I think I had that then thought I forgot the return...

>
> > +
> > +err_map:
> > +     sg_free_table(&sgt);
> > +err_pages:
> > +     kvfree(pages);
> > +     return ret;
> > +}
> > +
>
> But actually, you need to store the pages allocated in the buffer object
> so that they can be freed later. At the moment you have a big memory leak.

Ah yes, now I see the memory ends up in 'Unevictable' bucket. I'd been
looking at the shmem counts and it seemed fine. I have it working now,
but still need to figure out how to do a dma_unmap.

> >  static const char *access_type_name(struct panfrost_device *pfdev,
> >               u32 fault_status)
> >  {
> > @@ -323,13 +405,11 @@ static irqreturn_t panfrost_mmu_irq_handler(int irq, void *data)
> >  {
> >       struct panfrost_device *pfdev = data;
> >       u32 status = mmu_read(pfdev, MMU_INT_STAT);
> > -     int i;
> > +     int i, ret;
> >
> >       if (!status)
> >               return IRQ_NONE;
> >
> > -     dev_err(pfdev->dev, "mmu irq status=%x\n", status);
> > -
> >       for (i = 0; status; i++) {
> >               u32 mask = BIT(i) | BIT(i + 16);
> >               u64 addr;
> > @@ -350,6 +430,17 @@ static irqreturn_t panfrost_mmu_irq_handler(int irq, void *data)
> >               access_type = (fault_status >> 8) & 0x3;
> >               source_id = (fault_status >> 16);
> >
> > +             /* Page fault only */
> > +             if ((status & mask) == BIT(i)) {
> > +                     WARN_ON(exception_type < 0xC1 || exception_type > 0xC4);
> > +
> > +                     ret = panfrost_mmu_map_fault_addr(pfdev, i, addr);
> > +                     if (!ret) {
> > +                             status &= ~mask;
> > +                             continue;
>
> In this case the IRQ isn't handled and will remain asserted, which
> probably isn't going to end particularly well.

This is the success condition. We've already cleared the interrupt in
panfrost_mmu_map_fault_addr. On failure, we fall thru printing out the
next message and clear the interrupt. Maybe it would be better to
clear the interrupt in 1 place...

>
> Ideally you would switch the address space to UNMAPPED to kill off the
> job, but at the very least we should acknowledge the interrupt and let
> the GPU timeout reset the GPU to recover (which is equivalent while we
> still only use the one AS on the GPU).

I'll hopefully remember this detail when doing multiple AS.

Thanks for the review.

Rob