[PATCH] drm/panfrost: Implement per FD address spaces

Fri Aug 9 14:46:36 UTC 2019

 cOn Fri, Aug 9, 2019 at 6:36 AM Steven Price <steven.price at arm.com> wrote:
>
> On 08/08/2019 23:29, Rob Herring wrote:
> > Up until now, a single shared GPU address space was used. This is not
> > ideal as there's no protection between processes and doesn't work for
> > supporting the same GPU/CPU VA feature. Most importantly, this will
> > hopefully mitigate Alyssa's fear of WebGL, whatever that is.
> >
> > Most of the changes here are moving struct drm_mm and struct
> > panfrost_mmu objects from the per device struct to the per FD struct.
> > The critical function is panfrost_mmu_as_get() which handles allocating
> > and switching the h/w address spaces.
> >
> > Cc: Tomeu Vizoso <tomeu.vizoso at collabora.com>
> > Cc: David Airlie <airlied at linux.ie>
> > Cc: Daniel Vetter <daniel at ffwll.ch>
> > Cc: Robin Murphy <robin.murphy at arm.com>
> > Cc: Steven Price <steven.price at arm.com>
> > Cc: Alyssa Rosenzweig <alyssa.rosenzweig at collabora.com>
> > Signed-off-by: Rob Herring <robh at kernel.org>
> > ---
> > This depends on madvise support (now in drm-misc) and the heap/no-exec
> > series (just the rework). Seeems to be working pretty well for me, but
> > more testing would be helpful. I've run multiple 'glmark2-es2-drm
> > --off-screen' instances and Gnome Shell. Running more than 8 clients (at
> > least for T860) will hit the address space switch code paths.
> >
> > Rob
> >
> >  drivers/gpu/drm/panfrost/TODO              |   4 -
> >  drivers/gpu/drm/panfrost/panfrost_device.c |   2 +
> >  drivers/gpu/drm/panfrost/panfrost_device.h |  24 ++-
> >  drivers/gpu/drm/panfrost/panfrost_drv.c    |  31 ++-
> >  drivers/gpu/drm/panfrost/panfrost_gem.c    |  15 +-
> >  drivers/gpu/drm/panfrost/panfrost_gem.h    |   3 +
> >  drivers/gpu/drm/panfrost/panfrost_job.c    |  12 +-
> >  drivers/gpu/drm/panfrost/panfrost_mmu.c    | 220 +++++++++++++++------
> >  drivers/gpu/drm/panfrost/panfrost_mmu.h    |   8 +
> >  9 files changed, 239 insertions(+), 80 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/panfrost/TODO b/drivers/gpu/drm/panfrost/TODO
> > index e7727b292355..536a0d4f8d29 100644
> > --- a/drivers/gpu/drm/panfrost/TODO
> > +++ b/drivers/gpu/drm/panfrost/TODO
> > @@ -6,10 +6,6 @@
> >    - Bifrost specific feature and issue handling
> >    - Coherent DMA support
> >
> > -- Per FD address space support. The h/w supports multiple addresses spaces.
> > -  The hard part is handling when more address spaces are needed than what
> > -  the h/w provides.
> > -
> >  - Support userspace controlled GPU virtual addresses. Needed for Vulkan. (Tomeu)
> >
> >  - Compute job support. So called 'compute only' jobs need to be plumbed up to
> > diff --git a/drivers/gpu/drm/panfrost/panfrost_device.c b/drivers/gpu/drm/panfrost/panfrost_device.c
> > index 9814f4ccbd26..4da71bb56c20 100644
> > --- a/drivers/gpu/drm/panfrost/panfrost_device.c
> > +++ b/drivers/gpu/drm/panfrost/panfrost_device.c
> > @@ -123,8 +123,10 @@ int panfrost_device_init(struct panfrost_device *pfdev)
> >       mutex_init(&pfdev->sched_lock);
> >       mutex_init(&pfdev->reset_lock);
> >       INIT_LIST_HEAD(&pfdev->scheduled_jobs);
> > +     INIT_LIST_HEAD(&pfdev->as_lru_list);
> >
> >       spin_lock_init(&pfdev->hwaccess_lock);
> > +     spin_lock_init(&pfdev->as_lock);
> >
> >       err = panfrost_clk_init(pfdev);
> >       if (err) {
> > diff --git a/drivers/gpu/drm/panfrost/panfrost_device.h b/drivers/gpu/drm/panfrost/panfrost_device.h
> > index 4e5641db9c7e..f503c566e99f 100644
> > --- a/drivers/gpu/drm/panfrost/panfrost_device.h
> > +++ b/drivers/gpu/drm/panfrost/panfrost_device.h
> > @@ -5,6 +5,8 @@
> >  #ifndef __PANFROST_DEVICE_H__
> >  #define __PANFROST_DEVICE_H__
> >
> > +#include <linux/atomic.h>
> > +#include <linux/io-pgtable.h>
> >  #include <linux/spinlock.h>
> >  #include <drm/drm_device.h>
> >  #include <drm/drm_mm.h>
> > @@ -63,9 +65,6 @@ struct panfrost_device {
> >
> >       spinlock_t hwaccess_lock;
> >
> > -     struct drm_mm mm;
> > -     spinlock_t mm_lock;
> > -
> >       void __iomem *iomem;
> >       struct clk *clock;
> >       struct clk *bus_clock;
> > @@ -74,7 +73,11 @@ struct panfrost_device {
> >
> >       struct panfrost_features features;
> >
> > -     struct panfrost_mmu *mmu;
> > +     spinlock_t as_lock;
> > +     unsigned long as_in_use_mask;
> > +     unsigned long as_alloc_mask;
> > +     struct list_head as_lru_list;
> > +
> >       struct panfrost_job_slot *js;
> >
> >       struct panfrost_job *jobs[NUM_JOB_SLOTS];
> > @@ -98,10 +101,23 @@ struct panfrost_device {
> >       } devfreq;
> >  };
> >
> > +struct panfrost_mmu {
> > +     struct io_pgtable_cfg pgtbl_cfg;
> > +     struct io_pgtable_ops *pgtbl_ops;
> > +     struct mutex lock;
> > +     int as;
> > +     atomic_t as_count;
> > +     struct list_head list;
> > +};
> > +
> >  struct panfrost_file_priv {
> >       struct panfrost_device *pfdev;
> >
> >       struct drm_sched_entity sched_entity[NUM_JOB_SLOTS];
> > +
> > +     struct panfrost_mmu mmu;
> > +     struct drm_mm mm;
> > +     spinlock_t mm_lock;
> >  };
> >
> >  static inline struct panfrost_device *to_panfrost_device(struct drm_device *ddev)
> > diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c
> > index a1352750984c..7c8aa1a8054f 100644
> > --- a/drivers/gpu/drm/panfrost/panfrost_drv.c
> > +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
> > @@ -403,6 +403,7 @@ static void panfrost_drm_mm_color_adjust(const struct drm_mm_node *node,
> >  static int
> >  panfrost_open(struct drm_device *dev, struct drm_file *file)
> >  {
> > +     int ret;
> >       struct panfrost_device *pfdev = dev->dev_private;
> >       struct panfrost_file_priv *panfrost_priv;
> >
> > @@ -413,7 +414,28 @@ panfrost_open(struct drm_device *dev, struct drm_file *file)
> >       panfrost_priv->pfdev = pfdev;
> >       file->driver_priv = panfrost_priv;
> >
> > -     return panfrost_job_open(panfrost_priv);
> > +     spin_lock_init(&panfrost_priv->mm_lock);
> > +
> > +     /* 4G enough for now. can be 48-bit */
> > +     panfrost_priv->mm.color_adjust = panfrost_drm_mm_color_adjust;
> > +     drm_mm_init(&panfrost_priv->mm, SZ_32M >> PAGE_SHIFT, (SZ_4G - SZ_32M) >> PAGE_SHIFT);
> > +
> > +     ret = panfrost_mmu_pgtable_alloc(panfrost_priv);
> > +     if (ret)
> > +             goto err_pgtable;
> > +
> > +     ret = panfrost_job_open(panfrost_priv);
> > +     if (ret)
> > +             goto err_job;
> > +
> > +     return 0;
> > +
> > +err_job:
> > +     panfrost_mmu_pgtable_free(panfrost_priv);
> > +err_pgtable:
> > +     drm_mm_takedown(&panfrost_priv->mm);
> > +     kfree(panfrost_priv);
>
> Looks like this kfree was missing before... :)

Yes, if panfrost_job_open() can fail. Probably warrants its own (stable) patch.

>
> > +     return ret;
> >  }
> >
> >  static void
> > @@ -424,6 +446,8 @@ panfrost_postclose(struct drm_device *dev, struct drm_file *file)
> >       panfrost_perfcnt_close(panfrost_priv);
> >       panfrost_job_close(panfrost_priv);
> >
> > +     panfrost_mmu_pgtable_free(panfrost_priv);
> > +     drm_mm_takedown(&panfrost_priv->mm);
> >       kfree(panfrost_priv);
> >  }
> >
> > @@ -496,14 +520,9 @@ static int panfrost_probe(struct platform_device *pdev)
> >       ddev->dev_private = pfdev;
> >       pfdev->ddev = ddev;
> >
> > -     spin_lock_init(&pfdev->mm_lock);
> >       mutex_init(&pfdev->shrinker_lock);
> >       INIT_LIST_HEAD(&pfdev->shrinker_list);
> >
> > -     /* 4G enough for now. can be 48-bit */
> > -     drm_mm_init(&pfdev->mm, SZ_32M >> PAGE_SHIFT, (SZ_4G - SZ_32M) >> PAGE_SHIFT);
> > -     pfdev->mm.color_adjust = panfrost_drm_mm_color_adjust;
> > -
> >       pm_runtime_use_autosuspend(pfdev->dev);
> >       pm_runtime_set_autosuspend_delay(pfdev->dev, 50); /* ~3 frames */
> >       pm_runtime_enable(pfdev->dev);
> > diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.c b/drivers/gpu/drm/panfrost/panfrost_gem.c
> > index 37a3a6ed4617..93204b44ef90 100644
> > --- a/drivers/gpu/drm/panfrost/panfrost_gem.c
> > +++ b/drivers/gpu/drm/panfrost/panfrost_gem.c
> > @@ -47,8 +47,8 @@ static int panfrost_gem_open(struct drm_gem_object *obj, struct drm_file *file_p
> >       size_t size = obj->size;
> >       u64 align;
> >       struct panfrost_gem_object *bo = to_panfrost_bo(obj);
> > -     struct panfrost_device *pfdev = obj->dev->dev_private;
> >       unsigned long color = bo->noexec ? PANFROST_BO_NOEXEC : 0;
> > +     struct panfrost_file_priv *priv = file_priv->driver_priv;
> >
> >       /*
> >        * Executable buffers cannot cross a 16MB boundary as the program
> > @@ -61,8 +61,9 @@ static int panfrost_gem_open(struct drm_gem_object *obj, struct drm_file *file_p
> >       else
> >               align = size >= SZ_2M ? SZ_2M >> PAGE_SHIFT : 0;
> >
> > -     spin_lock(&pfdev->mm_lock);
> > -     ret = drm_mm_insert_node_generic(&pfdev->mm, &bo->node,
> > +     bo->mmu = &priv->mmu;
> > +     spin_lock(&priv->mm_lock);
> > +     ret = drm_mm_insert_node_generic(&priv->mm, &bo->node,
> >                                        size >> PAGE_SHIFT, align, color, 0);
> >       if (ret)
> >               goto out;
> > @@ -73,22 +74,22 @@ static int panfrost_gem_open(struct drm_gem_object *obj, struct drm_file *file_p
> >                       drm_mm_remove_node(&bo->node);
> >       }
> >  out:
> > -     spin_unlock(&pfdev->mm_lock);
> > +     spin_unlock(&priv->mm_lock);
> >       return ret;
> >  }
> >
> >  static void panfrost_gem_close(struct drm_gem_object *obj, struct drm_file *file_priv)
> >  {
> >       struct panfrost_gem_object *bo = to_panfrost_bo(obj);
> > -     struct panfrost_device *pfdev = obj->dev->dev_private;
> > +     struct panfrost_file_priv *priv = file_priv->driver_priv;
> >
> >       if (bo->is_mapped)
> >               panfrost_mmu_unmap(bo);
> >
> > -     spin_lock(&pfdev->mm_lock);
> > +     spin_lock(&priv->mm_lock);
> >       if (drm_mm_node_allocated(&bo->node))
> >               drm_mm_remove_node(&bo->node);
> > -     spin_unlock(&pfdev->mm_lock);
> > +     spin_unlock(&priv->mm_lock);
> >  }
> >
> >  static int panfrost_gem_pin(struct drm_gem_object *obj)
> > diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.h b/drivers/gpu/drm/panfrost/panfrost_gem.h
> > index e10f58316915..50920819cc16 100644
> > --- a/drivers/gpu/drm/panfrost/panfrost_gem.h
> > +++ b/drivers/gpu/drm/panfrost/panfrost_gem.h
> > @@ -7,10 +7,13 @@
> >  #include <drm/drm_gem_shmem_helper.h>
> >  #include <drm/drm_mm.h>
> >
> > +struct panfrost_mmu;
> > +
> >  struct panfrost_gem_object {
> >       struct drm_gem_shmem_object base;
> >       struct sg_table *sgts;
> >
> > +     struct panfrost_mmu *mmu;
> >       struct drm_mm_node node;
> >       bool is_mapped          :1;
> >       bool noexec             :1;
> > diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c
> > index d567ce98494c..aea84610b8cd 100644
> > --- a/drivers/gpu/drm/panfrost/panfrost_job.c
> > +++ b/drivers/gpu/drm/panfrost/panfrost_job.c
> > @@ -153,6 +153,8 @@ static void panfrost_job_hw_submit(struct panfrost_job *job, int js)
> >       if (WARN_ON(job_read(pfdev, JS_COMMAND_NEXT(js))))
> >               goto end;
> >
> > +     cfg = panfrost_mmu_as_get(pfdev, &job->file_priv->mmu);
> > +
> >       panfrost_devfreq_record_transition(pfdev, js);
> >       spin_lock_irqsave(&pfdev->hwaccess_lock, flags);
> >
> > @@ -163,8 +165,7 @@ static void panfrost_job_hw_submit(struct panfrost_job *job, int js)
> >
> >       /* start MMU, medium priority, cache clean/flush on end, clean/flush on
> >        * start */
> > -     /* TODO: different address spaces */
> > -     cfg = JS_CONFIG_THREAD_PRI(8) |
> > +     cfg |= JS_CONFIG_THREAD_PRI(8) |
> >               JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE |
> >               JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE;
> >
> > @@ -280,6 +281,9 @@ static void panfrost_job_cleanup(struct kref *ref)
> >               kvfree(job->bos);
> >       }
> >
> > +     for (i = 0; i < NUM_JOB_SLOTS; i++)
> > +             if (job == job->pfdev->jobs[i])
> > +                     job->pfdev->jobs[i] = NULL;
>
> I'm a bit worried about this. As far as I can see previously the jobs
> array would just contain dangling pointers - which admittedly wasn't
> ideal. However do we have sufficient locks here to ensure that this
> could be racing with a panfrost_job_run() on the same slot? I'm worried
> that between the check and the assignment to NULL it is possible
> (admittedly very unlikely) for panfrost_job_run() to assign a new
> pointer which would then be clobbered.

Yeah, it's ugly. The only place we need this is the job ISR. Looking
at it again, I could just clear this pointer in the ISR instead before
the done_fence. I think the fence should be enough to prevent any
races.

We should probably also clear the array in the timeout/reset handling,
but I guess it doesn't really matter given an ISR should not happen
before a job run.

>
> >       kfree(job);
> >  }
> >
> > @@ -377,8 +381,9 @@ static void panfrost_job_timedout(struct drm_sched_job *sched_job)
> >       if (dma_fence_is_signaled(job->done_fence))
> >               return;
> >
> > -     dev_err(pfdev->dev, "gpu sched timeout, js=%d, status=0x%x, head=0x%x, tail=0x%x, sched_job=%p",
> > +     dev_err(pfdev->dev, "gpu sched timeout, js=%d, config=0x%x, status=0x%x, head=0x%x, tail=0x%x, sched_job=%p",
> >               js,
> > +             job_read(pfdev, JS_CONFIG(js)),
> >               job_read(pfdev, JS_STATUS(js)),
> >               job_read(pfdev, JS_HEAD_LO(js)),
> >               job_read(pfdev, JS_TAIL_LO(js)),
> > @@ -448,6 +453,7 @@ static irqreturn_t panfrost_job_irq_handler(int irq, void *data)
> >               }
> >
> >               if (status & JOB_INT_MASK_DONE(j)) {
> > +                     panfrost_mmu_as_put(pfdev, &pfdev->jobs[j]->file_priv->mmu);
> >                       panfrost_devfreq_record_transition(pfdev, j);
> >                       dma_fence_signal(pfdev->jobs[j]->done_fence);
> >               }
> > diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/drm/panfrost/panfrost_mmu.c
> > index 2ed411f09d80..f8da7557d1be 100644
> > --- a/drivers/gpu/drm/panfrost/panfrost_mmu.c
> > +++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c
> > @@ -1,5 +1,6 @@
> >  // SPDX-License-Identifier:  GPL-2.0
> >  /* Copyright 2019 Linaro, Ltd, Rob Herring <robh at kernel.org> */
> > +#include <linux/atomic.h>
> >  #include <linux/bitfield.h>
> >  #include <linux/delay.h>
> >  #include <linux/dma-mapping.h>
> > @@ -22,12 +23,6 @@
> >  #define mmu_write(dev, reg, data) writel(data, dev->iomem + reg)
> >  #define mmu_read(dev, reg) readl(dev->iomem + reg)
> >
> > -struct panfrost_mmu {
> > -     struct io_pgtable_cfg pgtbl_cfg;
> > -     struct io_pgtable_ops *pgtbl_ops;
> > -     struct mutex lock;
> > -};
> > -
> >  static int wait_ready(struct panfrost_device *pfdev, u32 as_nr)
> >  {
> >       int ret;
> > @@ -91,6 +86,9 @@ static int mmu_hw_do_operation(struct panfrost_device *pfdev, u32 as_nr,
> >       unsigned long flags;
> >       int ret;
> >
> > +     if (as_nr < 0)
> > +             return 0;
> > +
>
> Do we need any synchronisation with panfrost_mmu_as_get() for this to be
> reliable? As far as I can see this check can race with the assignment of
> an address space. Perhaps panfrost_mmu_as_get() (or maybe
> panfrost_mmu_enable) should hold mmu->lock?

Yes, I think you are right.

>
> >       spin_lock_irqsave(&pfdev->hwaccess_lock, flags);
> >
> >       if (op != AS_COMMAND_UNLOCK)
> > @@ -107,9 +105,10 @@ static int mmu_hw_do_operation(struct panfrost_device *pfdev, u32 as_nr,
> >       return ret;
> >  }
> >
> > -static void panfrost_mmu_enable(struct panfrost_device *pfdev, u32 as_nr)
> > +static void panfrost_mmu_enable(struct panfrost_device *pfdev, struct panfrost_mmu *mmu)
> >  {
> > -     struct io_pgtable_cfg *cfg = &pfdev->mmu->pgtbl_cfg;
> > +     int as_nr = mmu->as;
> > +     struct io_pgtable_cfg *cfg = &mmu->pgtbl_cfg;
> >       u64 transtab = cfg->arm_mali_lpae_cfg.transtab;
> >       u64 memattr = cfg->arm_mali_lpae_cfg.memattr;
> >
> > @@ -136,9 +135,87 @@ static void mmu_disable(struct panfrost_device *pfdev, u32 as_nr)
> >       write_cmd(pfdev, as_nr, AS_COMMAND_UPDATE);
> >  }
> >
> > +u32 panfrost_mmu_as_get(struct panfrost_device *pfdev, struct panfrost_mmu *mmu)
> > +{
> > +     int as = mmu->as;
> > +
> > +     spin_lock(&pfdev->as_lock);
> > +
> > +     as = mmu->as;
> > +     if (as >= 0) {
> > +             int en = atomic_fetch_inc(&mmu->as_count);
> > +             WARN_ON(en > 2);
> > +
> > +             list_move(&mmu->list, &pfdev->as_lru_list);
> > +             spin_unlock(&pfdev->as_lock);
> > +             if (!en)
> > +                     panfrost_mmu_enable(pfdev, mmu);
>
> I don't see why we need to re-enable here - if (as >= 0) then the AS
> should be present on the GPU and mmu_hw_do_operation() will have been
> doing any necessary MMU commands.

I may be able to drop it. I needed it at one point when GPU reset
occurred, but after I reworked that, I think it is no longer needed.

>
> > +             return as;
> > +     }
> > +
> > +     /* Check for a free AS */
> > +     as = ffz(pfdev->as_alloc_mask);
> > +     if (!(BIT(as) & pfdev->features.as_present)) {
> > +             struct panfrost_mmu *lru_mmu;
> > +
> > +             list_for_each_entry_reverse(lru_mmu, &pfdev->as_lru_list, list) {
> > +                     if (!atomic_read(&lru_mmu->as_count))
> > +                             break;
> > +             }
> > +             WARN_ON(!lru_mmu);
> > +
> > +             list_del_init(&lru_mmu->list);
> > +             as = lru_mmu->as;
> > +
> > +             WARN_ON(as < 0);
> > +             lru_mmu->as = -1;
>
> This should be holding a lock to synchronise with the mmu_hw_do_operation().

At least for the old AS assignee, I think it shouldn't matter. Whether
we do flush operations or not won't matter because it will all get
sync'ed when the lru_mmu gets a new AS and this AS get assigned new
page tables.

I think it's more when we get to the panfrost_mmu_enable call that
doing flushes for lru_mmu could be problematic (though do we do any op
that's harmful?).

In any case, I think it will be better to add a lock rather than think
thru whether we'll be okay.

>
> > +     }
> > +
> > +     /* Assign the free or reclaimed AS to the  */
> > +     mmu->as = as;
> > +     set_bit(as, &pfdev->as_alloc_mask);
> > +     atomic_set(&mmu->as_count, 1);
> > +     list_add(&mmu->list, &pfdev->as_lru_list);
> > +
> > +     dev_dbg(pfdev->dev, "Assigned AS%d to mmu %p, alloc_mask=%lx", as, mmu, pfdev->as_alloc_mask);
> > +
> > +     spin_unlock(&pfdev->as_lock);
> > +
> > +     panfrost_mmu_enable(pfdev, mmu);
> > +     return as;
> > +}
> > +
> > +void panfrost_mmu_as_put(struct panfrost_device *pfdev, struct panfrost_mmu *mmu)
> > +{
> > +     atomic_dec(&mmu->as_count);
> > +     WARN_ON(atomic_read(&mmu->as_count) < 0);
> > +}
> > +
> > +static void panfrost_mmu_as_reset(struct panfrost_device *pfdev, struct panfrost_mmu *mmu)
> > +{
> > +     spin_lock(&pfdev->as_lock);
> > +
> > +     atomic_set(&mmu->as_count, 0);
> > +     if (mmu->as >= 0) {
> > +             clear_bit(mmu->as, &pfdev->as_alloc_mask);
> > +             list_del_init(&mmu->list);
> > +             mmu->as = -1;
> > +     }
> > +
> > +     spin_unlock(&pfdev->as_lock);
> > +}
> > +
> >  void panfrost_mmu_reset(struct panfrost_device *pfdev)
> >  {
> > -     panfrost_mmu_enable(pfdev, 0);
> > +     struct drm_file *file;
> > +
> > +     mutex_lock(&pfdev->ddev->filelist_mutex);
> > +     list_for_each_entry(file, &pfdev->ddev->filelist, lhead) {
> > +             struct panfrost_file_priv *priv = file->driver_priv;
> > +
> > +             panfrost_mmu_as_reset(pfdev, &priv->mmu);
> > +     }
> > +     mutex_unlock(&pfdev->ddev->filelist_mutex);
> >
> >       mmu_write(pfdev, MMU_INT_CLEAR, ~0);
> >       mmu_write(pfdev, MMU_INT_MASK, ~0);
> > @@ -152,21 +229,21 @@ static size_t get_pgsize(u64 addr, size_t size)
> >       return SZ_2M;
> >  }
> >
> > -static int mmu_map_sg(struct panfrost_device *pfdev, u64 iova,
> > -                   int prot, struct sg_table *sgt)
> > +static int mmu_map_sg(struct panfrost_device *pfdev, struct panfrost_mmu *mmu,
> > +                   u64 iova, int prot, struct sg_table *sgt)
> >  {
> >       unsigned int count;
> >       struct scatterlist *sgl;
> > -     struct io_pgtable_ops *ops = pfdev->mmu->pgtbl_ops;
> > +     struct io_pgtable_ops *ops = mmu->pgtbl_ops;
> >       u64 start_iova = iova;
> >
> > -     mutex_lock(&pfdev->mmu->lock);
> > +     mutex_lock(&mmu->lock);
> >
> >       for_each_sg(sgt->sgl, sgl, sgt->nents, count) {
> >               unsigned long paddr = sg_dma_address(sgl);
> >               size_t len = sg_dma_len(sgl);
> >
> > -             dev_dbg(pfdev->dev, "map: iova=%llx, paddr=%lx, len=%zx", iova, paddr, len);
> > +             dev_dbg(pfdev->dev, "map: as=%d, iova=%llx, paddr=%lx, len=%zx", mmu->as, iova, paddr, len);
> >
> >               while (len) {
> >                       size_t pgsize = get_pgsize(iova | paddr, len);
> > @@ -178,10 +255,10 @@ static int mmu_map_sg(struct panfrost_device *pfdev, u64 iova,
> >               }
> >       }
> >
> > -     mmu_hw_do_operation(pfdev, 0, start_iova, iova - start_iova,
> > +     mmu_hw_do_operation(pfdev, mmu->as, start_iova, iova - start_iova,
> >                           AS_COMMAND_FLUSH_PT);
> >
> > -     mutex_unlock(&pfdev->mmu->lock);
> > +     mutex_unlock(&mmu->lock);
> >
> >       return 0;
> >  }
> > @@ -208,7 +285,7 @@ int panfrost_mmu_map(struct panfrost_gem_object *bo)
> >       if (ret < 0)
> >               return ret;
> >
> > -     mmu_map_sg(pfdev, bo->node.start << PAGE_SHIFT, prot, sgt);
> > +     mmu_map_sg(pfdev, bo->mmu, bo->node.start << PAGE_SHIFT, prot, sgt);
> >
> >       pm_runtime_mark_last_busy(pfdev->dev);
> >       pm_runtime_put_autosuspend(pfdev->dev);
> > @@ -221,7 +298,7 @@ void panfrost_mmu_unmap(struct panfrost_gem_object *bo)
> >  {
> >       struct drm_gem_object *obj = &bo->base.base;
> >       struct panfrost_device *pfdev = to_panfrost_device(obj->dev);
> > -     struct io_pgtable_ops *ops = pfdev->mmu->pgtbl_ops;
> > +     struct io_pgtable_ops *ops = bo->mmu->pgtbl_ops;
> >       u64 iova = bo->node.start << PAGE_SHIFT;
> >       size_t len = bo->node.size << PAGE_SHIFT;
> >       size_t unmapped_len = 0;
> > @@ -230,13 +307,13 @@ void panfrost_mmu_unmap(struct panfrost_gem_object *bo)
> >       if (WARN_ON(!bo->is_mapped))
> >               return;
> >
> > -     dev_dbg(pfdev->dev, "unmap: iova=%llx, len=%zx", iova, len);
> > +     dev_dbg(pfdev->dev, "unmap: as=%d, iova=%llx, len=%zx", bo->mmu->as, iova, len);
> >
> >       ret = pm_runtime_get_sync(pfdev->dev);
> >       if (ret < 0)
> >               return;
> >
> > -     mutex_lock(&pfdev->mmu->lock);
> > +     mutex_lock(&bo->mmu->lock);
> >
> >       while (unmapped_len < len) {
> >               size_t unmapped_page;
> > @@ -250,10 +327,10 @@ void panfrost_mmu_unmap(struct panfrost_gem_object *bo)
> >               unmapped_len += pgsize;
> >       }
> >
> > -     mmu_hw_do_operation(pfdev, 0, bo->node.start << PAGE_SHIFT,
> > +     mmu_hw_do_operation(pfdev, bo->mmu->as, bo->node.start << PAGE_SHIFT,
> >                           bo->node.size << PAGE_SHIFT, AS_COMMAND_FLUSH_PT);
> >
> > -     mutex_unlock(&pfdev->mmu->lock);
> > +     mutex_unlock(&bo->mmu->lock);
> >
> >       pm_runtime_mark_last_busy(pfdev->dev);
> >       pm_runtime_put_autosuspend(pfdev->dev);
> > @@ -262,9 +339,10 @@ void panfrost_mmu_unmap(struct panfrost_gem_object *bo)
> >
> >  static void mmu_tlb_inv_context_s1(void *cookie)
> >  {
> > -     struct panfrost_device *pfdev = cookie;
> > +     struct panfrost_file_priv *priv = cookie;
> >
> > -     mmu_hw_do_operation(pfdev, 0, 0, ~0UL, AS_COMMAND_FLUSH_MEM);
> > +     /* Only flush if we have an assigned AS */
>
> I'm not sure if this comment makes sense here - seems like it belongs in
> mmu_hw_do_operation()

Left over from when I had the check here...

>
> > +     mmu_hw_do_operation(priv->pfdev, priv->mmu.as, 0, ~0UL, AS_COMMAND_FLUSH_MEM);
> >  }
> >
> >  static void mmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
> > @@ -283,16 +361,69 @@ static const struct iommu_gather_ops mmu_tlb_ops = {
> >       .tlb_sync       = mmu_tlb_sync_context,
> >  };
> >
> > +int panfrost_mmu_pgtable_alloc(struct panfrost_file_priv *priv)
> > +{
> > +     struct panfrost_mmu *mmu = &priv->mmu;
> > +     struct panfrost_device *pfdev = priv->pfdev;
> > +
> > +     mutex_init(&mmu->lock);
> > +     INIT_LIST_HEAD(&mmu->list);
> > +     mmu->as = -1;
> > +
> > +     mmu->pgtbl_cfg = (struct io_pgtable_cfg) {
> > +             .pgsize_bitmap  = SZ_4K | SZ_2M,
> > +             .ias            = FIELD_GET(0xff, pfdev->features.mmu_features),
> > +             .oas            = FIELD_GET(0xff00, pfdev->features.mmu_features),
> > +             .tlb            = &mmu_tlb_ops,
> > +             .iommu_dev      = pfdev->dev,
> > +     };
> > +
> > +     mmu->pgtbl_ops = alloc_io_pgtable_ops(ARM_MALI_LPAE, &mmu->pgtbl_cfg,
> > +                                           priv);
> > +     if (!mmu->pgtbl_ops)
> > +             return -EINVAL;
> > +
> > +     return 0;
> > +}
> > +
> > +void panfrost_mmu_pgtable_free(struct panfrost_file_priv *priv)
> > +{
> > +     struct panfrost_device *pfdev = priv->pfdev;
> > +     struct panfrost_mmu *mmu = &priv->mmu;
> > +
> > +     spin_lock(&pfdev->as_lock);
> > +     if (mmu->as >= 0) {
> > +             clear_bit(mmu->as, &pfdev->as_alloc_mask);
> > +             clear_bit(mmu->as, &pfdev->as_in_use_mask);
> > +             list_del(&mmu->list);
> > +     }
> > +     spin_unlock(&pfdev->as_lock);
> > +
> > +     free_io_pgtable_ops(mmu->pgtbl_ops);
> > +}
> > +
> >  static struct drm_mm_node *addr_to_drm_mm_node(struct panfrost_device *pfdev, int as, u64 addr)
> >  {
> > -     struct drm_mm_node *node;
> > +     struct drm_mm_node *node = NULL;
> >       u64 offset = addr >> PAGE_SHIFT;
> > +     struct drm_file *file;
> >
> > -     drm_mm_for_each_node(node, &pfdev->mm) {
> > -             if (offset >= node->start && offset < (node->start + node->size))
> > -                     return node;
> > +     mutex_lock(&pfdev->ddev->filelist_mutex);
> > +     list_for_each_entry(file, &pfdev->ddev->filelist, lhead) {
>
> You could walk as_lru_list rather than every file as you know you are
> looking for a fd which is assigned an address space.

Yes, good point.

Thanks for the review,
Rob