[Lima] [PATCH v3 2/2] drm/lima: driver for ARM Mali4xx GPUs

Sat Mar 2 02:32:16 UTC 2019

On Thu, Feb 28, 2019 at 5:41 AM Rob Herring <robh at kernel.org> wrote:
>
> On Wed, Feb 27, 2019 at 7:42 AM Qiang Yu <yuq825 at gmail.com> wrote:
> >
>
> Looks pretty good. A few small things and some questions hopefully
> some others can answer.
>
> > - Mali 4xx GPUs have two kinds of processors GP and PP. GP is for
> >   OpenGL vertex shader processing and PP is for fragment shader
> >   processing. Each processor has its own MMU so prcessors work in
> >   virtual address space.
> > - There's only one GP but multiple PP (max 4 for mali 400 and 8
> >   for mali 450) in the same mali 4xx GPU. All PPs are grouped
> >   togather to handle a single fragment shader task divided by
> >   FB output tiled pixels. Mali 400 user space driver is
> >   responsible for assign target tiled pixels to each PP, but mali
> >   450 has a HW module called DLBU to dynamically balance each
> >   PP's load.
> > - User space driver allocate buffer object and map into GPU
> >   virtual address space, upload command stream and draw data with
> >   CPU mmap of the buffer object, then submit task to GP/PP with
> >   a register frame indicating where is the command stream and misc
> >   settings.
> > - There's no command stream validation/relocation due to each user
> >   process has its own GPU virtual address space. GP/PP's MMU switch
> >   virtual address space before running two tasks from different
> >   user process. Error or evil user space code just get MMU fault
> >   or GP/PP error IRQ, then the HW/SW will be recovered.
> > - Use GEM+shmem for MM. Currently just alloc and pin memory when
> >   gem object creation. GPU vm map of the buffer is also done in
> >   the alloc stage in kernel space. We may delay the memory
> >   allocation and real GPU vm map to command submition stage in the
> >   furture as improvement.
> > - Use drm_sched for GPU task schedule. Each OpenGL context should
> >   have a lima context object in the kernel to distinguish tasks
> >   from different user. drm_sched gets task from each lima context
> >   in a fair way.
> >
> > v3:
> > - fix comments from kbuild robot
> > - restrict supported arch to tested ones
> >
> > v2:
> > - fix syscall argument check
> > - fix job finish fence leak since kernel 5.0
> > - use drm syncobj to replace native fence
> > - move buffer object GPU va map into kernel
> > - reserve syscall argument space for future info
> > - remove kernel gem modifier
> > - switch TTM back to GEM+shmem MM
> > - use time based io poll
> > - use whole register name
> > - adopt gem reservation obj integration
> > - use drm_timeout_abs_to_jiffies
> >
> > Cc: Eric Anholt <eric at anholt.net>
> > Cc: Rob Herring <robh at kernel.org>
> > Cc: Christian König <ckoenig.leichtzumerken at gmail.com>
> > Cc: Daniel Vetter <daniel at ffwll.ch>
> > Cc: Alex Deucher <alexdeucher at gmail.com>
> > Signed-off-by: Andreas Baierl <ichgeh at imkreisrum.de>
> > Signed-off-by: Erico Nunes <nunes.erico at gmail.com>
> > Signed-off-by: Heiko Stuebner <heiko at sntech.de>
> > Signed-off-by: Marek Vasut <marex at denx.de>
> > Signed-off-by: Neil Armstrong <narmstrong at baylibre.com>
> > Signed-off-by: Simon Shields <simon at lineageos.org>
> > Signed-off-by: Vasily Khoruzhick <anarsoul at gmail.com>
> > Signed-off-by: Qiang Yu <yuq825 at gmail.com>
> > ---
>
> > diff --git a/drivers/gpu/drm/lima/lima_bcast.c b/drivers/gpu/drm/lima/lima_bcast.c
> > new file mode 100644
> > index 000000000000..398e6d604426
> > --- /dev/null
> > +++ b/drivers/gpu/drm/lima/lima_bcast.c
> > @@ -0,0 +1,46 @@
> > +// SPDX-License-Identifier: GPL-2.0 OR MIT
> > +/* Copyright 2018 Qiang Yu <yuq825 at gmail.com> */
>
> It's 2019 now.
>
> > +
> > +#include <linux/io.h>
> > +#include <linux/device.h>
> > +
> > +#include "lima_device.h"
> > +#include "lima_bcast.h"
> > +#include "lima_regs.h"
> > +
> > +#define bcast_write(reg, data) writel(data, ip->iomem + reg)
> > +#define bcast_read(reg) readl(ip->iomem + reg)
> > +
> > +void lima_bcast_enable(struct lima_device *dev, int num_pp)
> > +{
> > +       struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_pp;
> > +       struct lima_ip *ip = dev->ip + lima_ip_bcast;
> > +       int i, mask = bcast_read(LIMA_BCAST_BROADCAST_MASK) & 0xffff0000;
> > +
> > +       for (i = 0; i < num_pp; i++) {
> > +               struct lima_ip *pp = pipe->processor[i];
> > +               mask |= 1 << (pp->id - lima_ip_pp0);
> > +       }
> > +
> > +       bcast_write(LIMA_BCAST_BROADCAST_MASK, mask);
> > +}
> > +
> > +int lima_bcast_init(struct lima_ip *ip)
> > +{
> > +       int i, mask = 0;
> > +
> > +       for (i = lima_ip_pp0; i <= lima_ip_pp7; i++) {
> > +               if (ip->dev->ip[i].present)
> > +                       mask |= 1 << (i - lima_ip_pp0);
> > +       }
> > +
> > +       bcast_write(LIMA_BCAST_BROADCAST_MASK, mask << 16);
> > +       bcast_write(LIMA_BCAST_INTERRUPT_MASK, mask);
> > +       return 0;
> > +}
> > +
> > +void lima_bcast_fini(struct lima_ip *ip)
> > +{
> > +
> > +}
> > +
> > diff --git a/drivers/gpu/drm/lima/lima_bcast.h b/drivers/gpu/drm/lima/lima_bcast.h
> > new file mode 100644
> > index 000000000000..345e3e809860
> > --- /dev/null
> > +++ b/drivers/gpu/drm/lima/lima_bcast.h
> > @@ -0,0 +1,14 @@
> > +/* SPDX-License-Identifier: GPL-2.0 OR MIT */
> > +/* Copyright 2018 Qiang Yu <yuq825 at gmail.com> */
> > +
> > +#ifndef __LIMA_BCAST_H__
> > +#define __LIMA_BCAST_H__
> > +
> > +struct lima_ip;
> > +
> > +int lima_bcast_init(struct lima_ip *ip);
> > +void lima_bcast_fini(struct lima_ip *ip);
> > +
> > +void lima_bcast_enable(struct lima_device *dev, int num_pp);
> > +
> > +#endif
> > diff --git a/drivers/gpu/drm/lima/lima_ctx.c b/drivers/gpu/drm/lima/lima_ctx.c
> > new file mode 100644
> > index 000000000000..439cb44d7a0d
> > --- /dev/null
> > +++ b/drivers/gpu/drm/lima/lima_ctx.c
> > @@ -0,0 +1,105 @@
> > +// SPDX-License-Identifier: GPL-2.0 OR MIT
> > +/* Copyright 2018 Qiang Yu <yuq825 at gmail.com> */
> > +
> > +#include <linux/slab.h>
> > +
> > +#include "lima_device.h"
> > +#include "lima_ctx.h"
> > +
> > +int lima_ctx_create(struct lima_device *dev, struct lima_ctx_mgr *mgr, u32 *id)
> > +{
> > +       struct lima_ctx *ctx;
> > +       int i, err;
> > +
> > +       ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
> > +       if (!ctx)
> > +               return -ENOMEM;
> > +       ctx->dev = dev;
> > +       kref_init(&ctx->refcnt);
> > +
> > +       for (i = 0; i < lima_pipe_num; i++) {
> > +               err = lima_sched_context_init(dev->pipe + i, ctx->context + i, &ctx->guilty);
> > +               if (err)
> > +                       goto err_out0;
> > +       }
> > +
> > +       idr_preload(GFP_KERNEL);
> > +       spin_lock(&mgr->lock);
> > +       err = idr_alloc(&mgr->handles, ctx, 1, 0, GFP_ATOMIC);
> > +       spin_unlock(&mgr->lock);
> > +       idr_preload_end();
> > +       if (err < 0)
> > +               goto err_out0;
> > +
> > +       *id = err;
> > +       return 0;
> > +
> > +err_out0:
> > +       for (i--; i >= 0; i--)
> > +               lima_sched_context_fini(dev->pipe + i, ctx->context + i);
> > +       kfree(ctx);
> > +       return err;
> > +}
> > +
> > +static void lima_ctx_do_release(struct kref *ref)
> > +{
> > +       struct lima_ctx *ctx = container_of(ref, struct lima_ctx, refcnt);
> > +       int i;
> > +
> > +       for (i = 0; i < lima_pipe_num; i++)
> > +               lima_sched_context_fini(ctx->dev->pipe + i, ctx->context + i);
> > +       kfree(ctx);
> > +}
> > +
> > +int lima_ctx_free(struct lima_ctx_mgr *mgr, u32 id)
> > +{
> > +       struct lima_ctx *ctx;
> > +
> > +       spin_lock(&mgr->lock);
> > +       ctx = idr_remove(&mgr->handles, id);
> > +       spin_unlock(&mgr->lock);
> > +
> > +       if (ctx) {
> > +               kref_put(&ctx->refcnt, lima_ctx_do_release);
> > +               return 0;
> > +       }
> > +       return -EINVAL;
> > +}
> > +
> > +struct lima_ctx *lima_ctx_get(struct lima_ctx_mgr *mgr, u32 id)
> > +{
> > +       struct lima_ctx *ctx;
> > +
> > +       spin_lock(&mgr->lock);
> > +       ctx = idr_find(&mgr->handles, id);
> > +       if (ctx)
> > +               kref_get(&ctx->refcnt);
> > +       spin_unlock(&mgr->lock);
> > +       return ctx;
> > +}
> > +
> > +void lima_ctx_put(struct lima_ctx *ctx)
> > +{
> > +       kref_put(&ctx->refcnt, lima_ctx_do_release);
> > +}
> > +
> > +void lima_ctx_mgr_init(struct lima_ctx_mgr *mgr)
> > +{
> > +       spin_lock_init(&mgr->lock);
> > +       idr_init(&mgr->handles);
> > +}
> > +
> > +void lima_ctx_mgr_fini(struct lima_ctx_mgr *mgr)
> > +{
> > +       struct lima_ctx *ctx;
> > +       struct idr *idp;
> > +       uint32_t id;
> > +
> > +       idp = &mgr->handles;
> > +
> > +       idr_for_each_entry(idp, ctx, id) {
> > +               kref_put(&ctx->refcnt, lima_ctx_do_release);
> > +       }
> > +
> > +       idr_destroy(&mgr->handles);
> > +}
> > diff --git a/drivers/gpu/drm/lima/lima_ctx.h b/drivers/gpu/drm/lima/lima_ctx.h
> > new file mode 100644
> > index 000000000000..2d32ff9b30ad
> > --- /dev/null
> > +++ b/drivers/gpu/drm/lima/lima_ctx.h
> > @@ -0,0 +1,30 @@
> > +/* SPDX-License-Identifier: GPL-2.0 OR MIT */
> > +/* Copyright 2018 Qiang Yu <yuq825 at gmail.com> */
> > +
> > +#ifndef __LIMA_CTX_H__
> > +#define __LIMA_CTX_H__
> > +
> > +#include <linux/idr.h>
> > +
> > +#include "lima_device.h"
> > +
> > +struct lima_ctx {
> > +       struct kref refcnt;
> > +       struct lima_device *dev;
> > +       struct lima_sched_context context[lima_pipe_num];
> > +       atomic_t guilty;
> > +};
> > +
> > +struct lima_ctx_mgr {
> > +       spinlock_t lock;
> > +       struct idr handles;
> > +};
> > +
> > +int lima_ctx_create(struct lima_device *dev, struct lima_ctx_mgr *mgr, u32 *id);
> > +int lima_ctx_free(struct lima_ctx_mgr *mgr, u32 id);
> > +struct lima_ctx *lima_ctx_get(struct lima_ctx_mgr *mgr, u32 id);
> > +void lima_ctx_put(struct lima_ctx *ctx);
> > +void lima_ctx_mgr_init(struct lima_ctx_mgr *mgr);
> > +void lima_ctx_mgr_fini(struct lima_ctx_mgr *mgr);
> > +
> > +#endif
> > diff --git a/drivers/gpu/drm/lima/lima_device.c b/drivers/gpu/drm/lima/lima_device.c
> > new file mode 100644
> > index 000000000000..2e137a0baddb
> > --- /dev/null
> > +++ b/drivers/gpu/drm/lima/lima_device.c
> > @@ -0,0 +1,376 @@
> > +// SPDX-License-Identifier: GPL-2.0 OR MIT
> > +/* Copyright 2017-2018 Qiang Yu <yuq825 at gmail.com> */
> > +
> > +#include <linux/regulator/consumer.h>
> > +#include <linux/reset.h>
> > +#include <linux/clk.h>
> > +#include <linux/dma-mapping.h>
> > +#include <linux/platform_device.h>
> > +
> > +#include "lima_device.h"
> > +#include "lima_gp.h"
> > +#include "lima_pp.h"
> > +#include "lima_mmu.h"
> > +#include "lima_pmu.h"
> > +#include "lima_l2_cache.h"
> > +#include "lima_dlbu.h"
> > +#include "lima_bcast.h"
> > +#include "lima_vm.h"
> > +
> > +struct lima_ip_desc {
> > +       char *name;
> > +       char *irq_name;
> > +       bool must_have[lima_gpu_num];
> > +       int offset[lima_gpu_num];
> > +
> > +       int (*init)(struct lima_ip *);
> > +       void (*fini)(struct lima_ip *);
> > +};
> > +
> > +#define LIMA_IP_DESC(ipname, mst0, mst1, off0, off1, func, irq) \
> > +       [lima_ip_##ipname] = { \
> > +               .name = #ipname, \
> > +               .irq_name = irq, \
> > +               .must_have = { \
> > +                       [lima_gpu_mali400] = mst0, \
> > +                       [lima_gpu_mali450] = mst1, \
> > +               }, \
> > +               .offset = { \
> > +                       [lima_gpu_mali400] = off0, \
> > +                       [lima_gpu_mali450] = off1, \
> > +               }, \
> > +               .init = lima_##func##_init, \
> > +               .fini = lima_##func##_fini, \
> > +       }
> > +
> > +static struct lima_ip_desc lima_ip_desc[lima_ip_num] = {
> > +       LIMA_IP_DESC(pmu,         false, false, 0x02000, 0x02000, pmu,      "pmu"),
> > +       LIMA_IP_DESC(l2_cache0,   true,  true,  0x01000, 0x10000, l2_cache, NULL),
> > +       LIMA_IP_DESC(l2_cache1,   false, true,  -1,      0x01000, l2_cache, NULL),
> > +       LIMA_IP_DESC(l2_cache2,   false, false, -1,      0x11000, l2_cache, NULL),
> > +       LIMA_IP_DESC(gp,          true,  true,  0x00000, 0x00000, gp,       "gp"),
> > +       LIMA_IP_DESC(pp0,         true,  true,  0x08000, 0x08000, pp,       "pp0"),
> > +       LIMA_IP_DESC(pp1,         false, false, 0x0A000, 0x0A000, pp,       "pp1"),
> > +       LIMA_IP_DESC(pp2,         false, false, 0x0C000, 0x0C000, pp,       "pp2"),
> > +       LIMA_IP_DESC(pp3,         false, false, 0x0E000, 0x0E000, pp,       "pp3"),
> > +       LIMA_IP_DESC(pp4,         false, false, -1,      0x28000, pp,       "pp4"),
> > +       LIMA_IP_DESC(pp5,         false, false, -1,      0x2A000, pp,       "pp5"),
> > +       LIMA_IP_DESC(pp6,         false, false, -1,      0x2C000, pp,       "pp6"),
> > +       LIMA_IP_DESC(pp7,         false, false, -1,      0x2E000, pp,       "pp7"),
> > +       LIMA_IP_DESC(gpmmu,       true,  true,  0x03000, 0x03000, mmu,      "gpmmu"),
> > +       LIMA_IP_DESC(ppmmu0,      true,  true,  0x04000, 0x04000, mmu,      "ppmmu0"),
> > +       LIMA_IP_DESC(ppmmu1,      false, false, 0x05000, 0x05000, mmu,      "ppmmu1"),
> > +       LIMA_IP_DESC(ppmmu2,      false, false, 0x06000, 0x06000, mmu,      "ppmmu2"),
> > +       LIMA_IP_DESC(ppmmu3,      false, false, 0x07000, 0x07000, mmu,      "ppmmu3"),
> > +       LIMA_IP_DESC(ppmmu4,      false, false, -1,      0x1C000, mmu,      "ppmmu4"),
> > +       LIMA_IP_DESC(ppmmu5,      false, false, -1,      0x1D000, mmu,      "ppmmu5"),
> > +       LIMA_IP_DESC(ppmmu6,      false, false, -1,      0x1E000, mmu,      "ppmmu6"),
> > +       LIMA_IP_DESC(ppmmu7,      false, false, -1,      0x1F000, mmu,      "ppmmu7"),
> > +       LIMA_IP_DESC(dlbu,        false, true,  -1,      0x14000, dlbu,     NULL),
> > +       LIMA_IP_DESC(bcast,       false, true,  -1,      0x13000, bcast,    NULL),
> > +       LIMA_IP_DESC(pp_bcast,    false, true,  -1,      0x16000, pp_bcast, "pp"),
> > +       LIMA_IP_DESC(ppmmu_bcast, false, true,  -1,      0x15000, mmu,      NULL),
> > +};
> > +
> > +const char *lima_ip_name(struct lima_ip *ip)
> > +{
> > +       return lima_ip_desc[ip->id].name;
> > +}
> > +
> > +static int lima_clk_init(struct lima_device *dev)
> > +{
> > +       int err;
> > +       unsigned long bus_rate, gpu_rate;
> > +
> > +       dev->clk_bus = devm_clk_get(dev->dev, "bus");
> > +       if (IS_ERR(dev->clk_bus)) {
> > +               dev_err(dev->dev, "get bus clk failed %ld\n", PTR_ERR(dev->clk_bus));
> > +               return PTR_ERR(dev->clk_bus);
> > +       }
> > +
> > +       dev->clk_gpu = devm_clk_get(dev->dev, "core");
> > +       if (IS_ERR(dev->clk_gpu)) {
> > +               dev_err(dev->dev, "get core clk failed %ld\n", PTR_ERR(dev->clk_gpu));
> > +               return PTR_ERR(dev->clk_gpu);
> > +       }
> > +
> > +       bus_rate = clk_get_rate(dev->clk_bus);
> > +       dev_info(dev->dev, "bus rate = %lu\n", bus_rate);
> > +
> > +       gpu_rate = clk_get_rate(dev->clk_gpu);
> > +       dev_info(dev->dev, "mod rate = %lu", gpu_rate);
> > +
> > +       if ((err = clk_prepare_enable(dev->clk_bus)))
> > +               return err;
> > +       if ((err = clk_prepare_enable(dev->clk_gpu)))
> > +               goto error_out0;
> > +
> > +       dev->reset = devm_reset_control_get_optional(dev->dev, NULL);
> > +       if (IS_ERR(dev->reset)) {
> > +               err = PTR_ERR(dev->reset);
> > +               goto error_out1;
> > +       } else if (dev->reset != NULL) {
> > +               if ((err = reset_control_deassert(dev->reset)))
> > +                       goto error_out1;
> > +       }
> > +
> > +       return 0;
> > +
> > +error_out1:
> > +       clk_disable_unprepare(dev->clk_gpu);
> > +error_out0:
> > +       clk_disable_unprepare(dev->clk_bus);
> > +       return err;
> > +}
> > +
> > +static void lima_clk_fini(struct lima_device *dev)
> > +{
> > +       if (dev->reset != NULL)
> > +               reset_control_assert(dev->reset);
> > +       clk_disable_unprepare(dev->clk_gpu);
> > +       clk_disable_unprepare(dev->clk_bus);
> > +}
> > +
> > +static int lima_regulator_init(struct lima_device *dev)
> > +{
> > +       int ret;
> > +       dev->regulator = devm_regulator_get_optional(dev->dev, "mali");
> > +       if (IS_ERR(dev->regulator)) {
> > +               ret = PTR_ERR(dev->regulator);
> > +               dev->regulator = NULL;
> > +               if (ret == -ENODEV)
> > +                       return 0;
> > +               dev_err(dev->dev, "failed to get regulator: %d\n", ret);
> > +               return ret;
> > +       }
> > +
> > +       ret = regulator_enable(dev->regulator);
> > +       if (ret < 0) {
> > +               dev_err(dev->dev, "failed to enable regulator: %d\n", ret);
> > +               return ret;
> > +       }
> > +
> > +       return 0;
> > +}
> > +
> > +static void lima_regulator_fini(struct lima_device *dev)
> > +{
> > +       if (dev->regulator)
> > +               regulator_disable(dev->regulator);
> > +}
> > +
> > +static int lima_init_ip(struct lima_device *dev, int index)
> > +{
> > +       struct lima_ip_desc *desc = lima_ip_desc + index;
> > +       struct lima_ip *ip = dev->ip + index;
> > +       int offset = desc->offset[dev->id];
> > +       bool must = desc->must_have[dev->id];
> > +       int err;
> > +
> > +       if (offset < 0)
> > +               return 0;
> > +
> > +       ip->dev = dev;
> > +       ip->id = index;
> > +       ip->iomem = dev->iomem + offset;
> > +       if (desc->irq_name) {
> > +               err = platform_get_irq_byname(dev->pdev, desc->irq_name);
> > +               if (err < 0)
> > +                       goto out;
> > +               ip->irq = err;
> > +       }
> > +
> > +       err = desc->init(ip);
> > +       if (!err) {
> > +               ip->present = true;
> > +               return 0;
> > +       }
> > +
> > +out:
> > +       return must ? err : 0;
> > +}
> > +
> > +static void lima_fini_ip(struct lima_device *ldev, int index)
> > +{
> > +       struct lima_ip_desc *desc = lima_ip_desc + index;
> > +       struct lima_ip *ip = ldev->ip + index;
> > +
> > +       if (ip->present)
> > +               desc->fini(ip);
> > +}
> > +
> > +static int lima_init_gp_pipe(struct lima_device *dev)
> > +{
> > +       struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_gp;
> > +       int err;
> > +
> > +       if ((err = lima_sched_pipe_init(pipe, "gp")))
> > +               return err;
> > +
> > +       pipe->l2_cache[pipe->num_l2_cache++] = dev->ip + lima_ip_l2_cache0;
> > +       pipe->mmu[pipe->num_mmu++] = dev->ip + lima_ip_gpmmu;
> > +       pipe->processor[pipe->num_processor++] = dev->ip + lima_ip_gp;
> > +
> > +       if ((err = lima_gp_pipe_init(dev))) {
> > +               lima_sched_pipe_fini(pipe);
> > +               return err;
> > +       }
> > +
> > +       return 0;
> > +}
> > +
> > +static void lima_fini_gp_pipe(struct lima_device *dev)
> > +{
> > +       struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_gp;
> > +
> > +       lima_gp_pipe_fini(dev);
> > +       lima_sched_pipe_fini(pipe);
> > +}
> > +
> > +static int lima_init_pp_pipe(struct lima_device *dev)
> > +{
> > +       struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_pp;
> > +       int err, i;
> > +
> > +       if ((err = lima_sched_pipe_init(pipe, "pp")))
> > +               return err;
> > +
> > +       for (i = 0; i < LIMA_SCHED_PIPE_MAX_PROCESSOR; i++) {
> > +               struct lima_ip *pp = dev->ip + lima_ip_pp0 + i;
> > +               struct lima_ip *ppmmu = dev->ip + lima_ip_ppmmu0 + i;
> > +               struct lima_ip *l2_cache;
> > +
> > +               if (dev->id == lima_gpu_mali400)
> > +                       l2_cache = dev->ip + lima_ip_l2_cache0;
> > +               else
> > +                       l2_cache = dev->ip + lima_ip_l2_cache1 + (i >> 2);
> > +
> > +               if (pp->present && ppmmu->present && l2_cache->present) {
> > +                       pipe->mmu[pipe->num_mmu++] = ppmmu;
> > +                       pipe->processor[pipe->num_processor++] = pp;
> > +                       if (!pipe->l2_cache[i >> 2])
> > +                               pipe->l2_cache[pipe->num_l2_cache++] = l2_cache;
> > +               }
> > +       }
> > +
> > +       if (dev->ip[lima_ip_bcast].present) {
> > +               pipe->bcast_processor = dev->ip + lima_ip_pp_bcast;
> > +               pipe->bcast_mmu = dev->ip + lima_ip_ppmmu_bcast;
> > +       }
> > +
> > +       if ((err = lima_pp_pipe_init(dev))) {
> > +               lima_sched_pipe_fini(pipe);
> > +               return err;
> > +       }
> > +
> > +       return 0;
> > +}
> > +
> > +static void lima_fini_pp_pipe(struct lima_device *dev)
> > +{
> > +       struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_pp;
> > +
> > +       lima_pp_pipe_fini(dev);
> > +       lima_sched_pipe_fini(pipe);
> > +}
> > +
> > +int lima_device_init(struct lima_device *ldev)
> > +{
> > +       int err, i;
> > +       struct resource *res;
> > +
> > +       dma_set_coherent_mask(ldev->dev, DMA_BIT_MASK(32));
> > +
> > +       err = lima_clk_init(ldev);
> > +       if (err) {
> > +               dev_err(ldev->dev, "clk init fail %d\n", err);
> > +               return err;
> > +       }
> > +
> > +       if ((err = lima_regulator_init(ldev))) {
> > +               dev_err(ldev->dev, "regulator init fail %d\n", err);
> > +               goto err_out0;
> > +       }
> > +
> > +       ldev->empty_vm = lima_vm_create(ldev);
> > +       if (!ldev->empty_vm) {
> > +               err = -ENOMEM;
> > +               goto err_out1;
> > +       }
> > +
> > +       ldev->va_start = 0;
> > +       if (ldev->id == lima_gpu_mali450) {
> > +               ldev->va_end = LIMA_VA_RESERVE_START;
> > +               ldev->dlbu_cpu = dma_alloc_wc(
> > +                       ldev->dev, LIMA_PAGE_SIZE,
> > +                       &ldev->dlbu_dma, GFP_KERNEL);
> > +               if (!ldev->dlbu_cpu) {
> > +                       err = -ENOMEM;
> > +                       goto err_out2;
> > +               }
> > +       }
> > +       else
> > +               ldev->va_end = LIMA_VA_RESERVE_END;
> > +
> > +       res = platform_get_resource(ldev->pdev, IORESOURCE_MEM, 0);
> > +       ldev->iomem = devm_ioremap_resource(ldev->dev, res);
> > +       if (IS_ERR(ldev->iomem)) {
> > +               dev_err(ldev->dev, "fail to ioremap iomem\n");
> > +               err = PTR_ERR(ldev->iomem);
> > +               goto err_out3;
> > +       }
> > +
> > +       for (i = 0; i < lima_ip_num; i++) {
> > +               err = lima_init_ip(ldev, i);
> > +               if (err)
> > +                       goto err_out4;
> > +       }
> > +
> > +       err = lima_init_gp_pipe(ldev);
> > +       if (err)
> > +               goto err_out4;
> > +
> > +       err = lima_init_pp_pipe(ldev);
> > +       if (err)
> > +               goto err_out5;
> > +
> > +       return 0;
> > +
> > +err_out5:
> > +       lima_fini_gp_pipe(ldev);
> > +err_out4:
> > +       while (--i >= 0)
> > +               lima_fini_ip(ldev, i);
> > +err_out3:
> > +       if (ldev->dlbu_cpu)
> > +               dma_free_wc(ldev->dev, LIMA_PAGE_SIZE,
> > +                           ldev->dlbu_cpu, ldev->dlbu_dma);
> > +err_out2:
> > +       lima_vm_put(ldev->empty_vm);
> > +err_out1:
> > +       lima_regulator_fini(ldev);
> > +err_out0:
> > +       lima_clk_fini(ldev);
> > +       return err;
> > +}
> > +
> > +void lima_device_fini(struct lima_device *ldev)
> > +{
> > +       int i;
> > +
> > +       lima_fini_pp_pipe(ldev);
> > +       lima_fini_gp_pipe(ldev);
> > +
> > +       for (i = lima_ip_num - 1; i >= 0; i--)
> > +               lima_fini_ip(ldev, i);
> > +
> > +       if (ldev->dlbu_cpu)
> > +               dma_free_wc(ldev->dev, LIMA_PAGE_SIZE,
> > +                           ldev->dlbu_cpu, ldev->dlbu_dma);
> > +
> > +       lima_vm_put(ldev->empty_vm);
> > +
> > +       lima_regulator_fini(ldev);
> > +
> > +       lima_clk_fini(ldev);
> > +}
> > diff --git a/drivers/gpu/drm/lima/lima_device.h b/drivers/gpu/drm/lima/lima_device.h
> > new file mode 100644
> > index 000000000000..41499f28ae13
> > --- /dev/null
> > +++ b/drivers/gpu/drm/lima/lima_device.h
> > @@ -0,0 +1,129 @@
> > +/* SPDX-License-Identifier: GPL-2.0 OR MIT */
> > +/* Copyright 2018 Qiang Yu <yuq825 at gmail.com> */
> > +
> > +#ifndef __LIMA_DEVICE_H__
> > +#define __LIMA_DEVICE_H__
> > +
> > +#include <drm/drm_device.h>
> > +#include <linux/delay.h>
> > +
> > +#include "lima_sched.h"
> > +
> > +enum lima_gpu_id {
> > +       lima_gpu_mali400 = 0,
> > +       lima_gpu_mali450,
> > +       lima_gpu_num,
> > +};
> > +
> > +enum lima_ip_id {
> > +       lima_ip_pmu,
> > +       lima_ip_gpmmu,
> > +       lima_ip_ppmmu0,
> > +       lima_ip_ppmmu1,
> > +       lima_ip_ppmmu2,
> > +       lima_ip_ppmmu3,
> > +       lima_ip_ppmmu4,
> > +       lima_ip_ppmmu5,
> > +       lima_ip_ppmmu6,
> > +       lima_ip_ppmmu7,
> > +       lima_ip_gp,
> > +       lima_ip_pp0,
> > +       lima_ip_pp1,
> > +       lima_ip_pp2,
> > +       lima_ip_pp3,
> > +       lima_ip_pp4,
> > +       lima_ip_pp5,
> > +       lima_ip_pp6,
> > +       lima_ip_pp7,
> > +       lima_ip_l2_cache0,
> > +       lima_ip_l2_cache1,
> > +       lima_ip_l2_cache2,
> > +       lima_ip_dlbu,
> > +       lima_ip_bcast,
> > +       lima_ip_pp_bcast,
> > +       lima_ip_ppmmu_bcast,
> > +       lima_ip_num,
> > +};
> > +
> > +struct lima_device;
> > +
> > +struct lima_ip {
> > +       struct lima_device *dev;
> > +       enum lima_ip_id id;
> > +       bool present;
> > +
> > +       void __iomem *iomem;
> > +       int irq;
> > +
> > +       union {
> > +               /* gp/pp */
> > +               bool async_reset;
> > +               /* l2 cache */
> > +               spinlock_t lock;
> > +       } data;
> > +};
> > +
> > +enum lima_pipe_id {
> > +       lima_pipe_gp,
> > +       lima_pipe_pp,
> > +       lima_pipe_num,
> > +};
> > +
> > +struct lima_device {
> > +       struct device *dev;
> > +       struct drm_device *ddev;
> > +       struct platform_device *pdev;
> > +
> > +       enum lima_gpu_id id;
> > +       int num_pp;
> > +
> > +       void __iomem *iomem;
> > +       struct clk *clk_bus;
> > +       struct clk *clk_gpu;
> > +       struct reset_control *reset;
> > +       struct regulator *regulator;
> > +
> > +       struct lima_ip ip[lima_ip_num];
> > +       struct lima_sched_pipe pipe[lima_pipe_num];
> > +
> > +       struct lima_vm *empty_vm;
> > +       uint64_t va_start;
> > +       uint64_t va_end;
> > +
> > +       u32 *dlbu_cpu;
> > +       dma_addr_t dlbu_dma;
> > +};
> > +
> > +static inline struct lima_device *
> > +to_lima_dev(struct drm_device *dev)
> > +{
> > +       return dev->dev_private;
> > +}
> > +
> > +int lima_device_init(struct lima_device *ldev);
> > +void lima_device_fini(struct lima_device *ldev);
> > +
> > +const char *lima_ip_name(struct lima_ip *ip);
> > +
> > +typedef int (*lima_poll_func_t)(struct lima_ip *);
> > +
> > +static inline int lima_poll_timeout(struct lima_ip *ip, lima_poll_func_t func,
> > +                                   int sleep_us, int timeout_us)
> > +{
> > +       ktime_t timeout = ktime_add_us(ktime_get(), timeout_us);
> > +
> > +       might_sleep_if(sleep_us);
> > +       while (1) {
> > +               if (func(ip))
> > +                       return 0;
> > +
> > +               if (timeout_us && ktime_compare(ktime_get(), timeout) > 0)
> > +                       return -ETIMEDOUT;
> > +
> > +               if (sleep_us)
> > +                       usleep_range((sleep_us >> 2) + 1, sleep_us);
> > +       }
> > +       return 0;
> > +}
> > +
> > +#endif
> > diff --git a/drivers/gpu/drm/lima/lima_dlbu.c b/drivers/gpu/drm/lima/lima_dlbu.c
> > new file mode 100644
> > index 000000000000..b7739712f235
> > --- /dev/null
> > +++ b/drivers/gpu/drm/lima/lima_dlbu.c
> > @@ -0,0 +1,56 @@
> > +// SPDX-License-Identifier: GPL-2.0 OR MIT
> > +/* Copyright 2018 Qiang Yu <yuq825 at gmail.com> */
> > +
> > +#include <linux/io.h>
> > +#include <linux/device.h>
> > +
> > +#include "lima_device.h"
> > +#include "lima_dlbu.h"
> > +#include "lima_vm.h"
> > +#include "lima_regs.h"
> > +
> > +#define dlbu_write(reg, data) writel(data, ip->iomem + reg)
> > +#define dlbu_read(reg) readl(ip->iomem + reg)
> > +
> > +void lima_dlbu_enable(struct lima_device *dev, int num_pp)
> > +{
> > +       struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_pp;
> > +       struct lima_ip *ip = dev->ip + lima_ip_dlbu;
> > +       int i, mask = 0;
> > +
> > +       for (i = 0; i < num_pp; i++) {
> > +               struct lima_ip *pp = pipe->processor[i];
> > +               mask |= 1 << (pp->id - lima_ip_pp0);
> > +       }
> > +
> > +       dlbu_write(LIMA_DLBU_PP_ENABLE_MASK, mask);
> > +}
> > +
> > +void lima_dlbu_disable(struct lima_device *dev)
> > +{
> > +       struct lima_ip *ip = dev->ip + lima_ip_dlbu;
> > +       dlbu_write(LIMA_DLBU_PP_ENABLE_MASK, 0);
> > +}
> > +
> > +void lima_dlbu_set_reg(struct lima_ip *ip, u32 *reg)
> > +{
> > +       dlbu_write(LIMA_DLBU_TLLIST_VBASEADDR, reg[0]);
> > +       dlbu_write(LIMA_DLBU_FB_DIM, reg[1]);
> > +       dlbu_write(LIMA_DLBU_TLLIST_CONF, reg[2]);
> > +       dlbu_write(LIMA_DLBU_START_TILE_POS, reg[3]);
> > +}
> > +
> > +int lima_dlbu_init(struct lima_ip *ip)
> > +{
> > +       struct lima_device *dev = ip->dev;
> > +
> > +       dlbu_write(LIMA_DLBU_MASTER_TLLIST_PHYS_ADDR, dev->dlbu_dma | 1);
> > +       dlbu_write(LIMA_DLBU_MASTER_TLLIST_VADDR, LIMA_VA_RESERVE_DLBU);
> > +
> > +       return 0;
> > +}
> > +
> > +void lima_dlbu_fini(struct lima_ip *ip)
> > +{
> > +
> > +}
> > diff --git a/drivers/gpu/drm/lima/lima_dlbu.h b/drivers/gpu/drm/lima/lima_dlbu.h
> > new file mode 100644
> > index 000000000000..60cba387cf30
> > --- /dev/null
> > +++ b/drivers/gpu/drm/lima/lima_dlbu.h
> > @@ -0,0 +1,18 @@
> > +/* SPDX-License-Identifier: GPL-2.0 OR MIT */
> > +/* Copyright 2018 Qiang Yu <yuq825 at gmail.com> */
> > +
> > +#ifndef __LIMA_DLBU_H__
> > +#define __LIMA_DLBU_H__
> > +
> > +struct lima_ip;
> > +struct lima_device;
> > +
> > +void lima_dlbu_enable(struct lima_device *dev, int num_pp);
> > +void lima_dlbu_disable(struct lima_device *dev);
> > +
> > +void lima_dlbu_set_reg(struct lima_ip *ip, u32 *reg);
> > +
> > +int lima_dlbu_init(struct lima_ip *ip);
> > +void lima_dlbu_fini(struct lima_ip *ip);
> > +
> > +#endif
> > diff --git a/drivers/gpu/drm/lima/lima_drv.c b/drivers/gpu/drm/lima/lima_drv.c
> > new file mode 100644
> > index 000000000000..e93bce16ee10
> > --- /dev/null
> > +++ b/drivers/gpu/drm/lima/lima_drv.c
> > @@ -0,0 +1,353 @@
> > +// SPDX-License-Identifier: GPL-2.0 OR MIT
> > +/* Copyright 2017-2018 Qiang Yu <yuq825 at gmail.com> */
> > +
> > +#include <linux/module.h>
> > +#include <linux/of_platform.h>
> > +#include <linux/log2.h>
> > +#include <drm/drm_prime.h>
> > +#include <drm/lima_drm.h>
> > +
> > +#include "lima_drv.h"
> > +#include "lima_gem.h"
> > +#include "lima_gem_prime.h"
> > +#include "lima_vm.h"
> > +
> > +int lima_sched_timeout_ms = 0;
> > +int lima_sched_max_tasks = 32;
> > +
> > +MODULE_PARM_DESC(sched_timeout_ms, "task run timeout in ms (0 = no timeout (default))");
> > +module_param_named(sched_timeout_ms, lima_sched_timeout_ms, int, 0444);
> > +
> > +MODULE_PARM_DESC(sched_max_tasks, "max queued task num in a context (default 32)");
> > +module_param_named(sched_max_tasks, lima_sched_max_tasks, int, 0444);
> > +
> > +static int lima_ioctl_info(struct drm_device *dev, void *data, struct drm_file *file)
> > +{
>
> For panfrost, we generalized this to "get param" like other drivers.
> Looks like you can only add 7 more items.
>
> What about GPU revisions?

Currently I don't know there's any programming difference between GPUs
with different revision. Would be appreciate if anyone can tell me before
some hard reverse engineering effort.

Regards,
Qiang

>
> > +       struct drm_lima_info *info = data;
> > +       struct lima_device *ldev = to_lima_dev(dev);
> > +
> > +       switch (ldev->id) {
> > +       case lima_gpu_mali400:
> > +               info->gpu_id = LIMA_INFO_GPU_MALI400;
>
> Personally, I'd return the either the raw model or something like
> '400' rather than making up some enumeration.
>
> > +               break;
> > +       case lima_gpu_mali450:
> > +               info->gpu_id = LIMA_INFO_GPU_MALI450;
> > +               break;
> > +       default:
> > +               return -ENODEV;
> > +       }
> > +       info->num_pp = ldev->pipe[lima_pipe_pp].num_processor;
> > +       info->valid = 0;
> > +       return 0;
> > +}
> > +
> > +static int lima_ioctl_gem_create(struct drm_device *dev, void *data, struct drm_file *file)
> > +{
> > +       struct drm_lima_gem_create *args = data;
> > +
> > +       if (args->flags)
> > +               return -EINVAL;
> > +
> > +       if (args->size == 0)
> > +               return -EINVAL;
> > +
> > +       return lima_gem_create_handle(dev, file, args->size, args->flags, &args->handle);
> > +}
> > +
> > +static int lima_ioctl_gem_info(struct drm_device *dev, void *data, struct drm_file *file)
> > +{
> > +       struct drm_lima_gem_info *args = data;
> > +
> > +       return lima_gem_get_info(file, args->handle, &args->va, &args->offset);
> > +}
> > +
> > +static int lima_ioctl_gem_submit(struct drm_device *dev, void *data, struct drm_file *file)
> > +{
> > +       struct drm_lima_gem_submit *args = data;
> > +       struct lima_device *ldev = to_lima_dev(dev);
> > +       struct lima_drm_priv *priv = file->driver_priv;
> > +       struct drm_lima_gem_submit_bo *bos;
> > +       struct lima_sched_pipe *pipe;
> > +       struct lima_sched_task *task;
> > +       struct lima_ctx *ctx;
> > +       struct lima_submit submit = {0};
> > +       size_t size;
> > +       int err = 0;
> > +
> > +       if (args->pipe >= lima_pipe_num || args->nr_bos == 0)
> > +               return -EINVAL;
> > +
> > +       if (args->flags & ~(LIMA_SUBMIT_FLAG_EXPLICIT_FENCE))
> > +               return -EINVAL;
> > +
> > +       pipe = ldev->pipe + args->pipe;
> > +       if (args->frame_size != pipe->frame_size)
> > +               return -EINVAL;
> > +
> > +       bos = kvcalloc(args->nr_bos, sizeof(*submit.bos) + sizeof(*submit.lbos), GFP_KERNEL);
> > +       if (!bos)
> > +               return -ENOMEM;
> > +
> > +       size = args->nr_bos * sizeof(*submit.bos);
> > +       if (copy_from_user(bos, u64_to_user_ptr(args->bos), size)) {
> > +               err = -EFAULT;
> > +               goto out0;
> > +       }
> > +
> > +       task = kmem_cache_zalloc(pipe->task_slab, GFP_KERNEL);
> > +       if (!task) {
> > +               err = -ENOMEM;
> > +               goto out0;
> > +       }
> > +
> > +       task->frame = task + 1;
> > +       if (copy_from_user(task->frame, u64_to_user_ptr(args->frame), args->frame_size)) {
> > +               err = -EFAULT;
> > +               goto out1;
> > +       }
> > +
> > +       err = pipe->task_validate(pipe, task);
> > +       if (err)
> > +               goto out1;
> > +
> > +       ctx = lima_ctx_get(&priv->ctx_mgr, args->ctx);
> > +       if (!ctx) {
> > +               err = -ENOENT;
> > +               goto out1;
> > +       }
> > +
> > +       submit.pipe = args->pipe;
> > +       submit.bos = bos;
> > +       submit.lbos = (void *)bos + size;
> > +       submit.nr_bos = args->nr_bos;
> > +       submit.task = task;
> > +       submit.ctx = ctx;
> > +       submit.flags = args->flags;
> > +       submit.in_sync[0] = args->in_sync[0];
> > +       submit.in_sync[1] = args->in_sync[1];
> > +       submit.out_sync = args->out_sync;
> > +
> > +       err = lima_gem_submit(file, &submit);
> > +
> > +       lima_ctx_put(ctx);
> > +out1:
> > +       if (err)
> > +               kmem_cache_free(pipe->task_slab, task);
> > +out0:
> > +       kvfree(bos);
> > +       return err;
> > +}
> > +
> > +static int lima_ioctl_gem_wait(struct drm_device *dev, void *data, struct drm_file *file)
> > +{
> > +       struct drm_lima_gem_wait *args = data;
> > +
> > +       if (args->op & ~(LIMA_GEM_WAIT_READ|LIMA_GEM_WAIT_WRITE))
> > +               return -EINVAL;
> > +
> > +       return lima_gem_wait(file, args->handle, args->op, args->timeout_ns);
> > +}
> > +
> > +static int lima_ioctl_ctx(struct drm_device *dev, void *data, struct drm_file *file)
> > +{
> > +       struct drm_lima_ctx *args = data;
> > +       struct lima_drm_priv *priv = file->driver_priv;
> > +       struct lima_device *ldev = to_lima_dev(dev);
> > +
> > +       if (args->op == LIMA_CTX_OP_CREATE)
> > +               return lima_ctx_create(ldev, &priv->ctx_mgr, &args->id);
> > +       else if (args->op == LIMA_CTX_OP_FREE)
> > +               return lima_ctx_free(&priv->ctx_mgr, args->id);
>
> Wasn't it suggested in the prior version to not mux these? Make them 2 ioctls.
>
> > +
> > +       return -EINVAL;
> > +}
> > +
> > +static int lima_drm_driver_open(struct drm_device *dev, struct drm_file *file)
> > +{
> > +       int err;
> > +       struct lima_drm_priv *priv;
> > +       struct lima_device *ldev = to_lima_dev(dev);
> > +
> > +       priv = kzalloc(sizeof(*priv), GFP_KERNEL);
> > +       if (!priv)
> > +               return -ENOMEM;
> > +
> > +       priv->vm = lima_vm_create(ldev);
> > +       if (!priv->vm) {
> > +               err = -ENOMEM;
> > +               goto err_out0;
> > +       }
> > +
> > +       lima_ctx_mgr_init(&priv->ctx_mgr);
> > +
> > +       file->driver_priv = priv;
> > +       return 0;
> > +
> > +err_out0:
> > +       kfree(priv);
> > +       return err;
> > +}
> > +
> > +static void lima_drm_driver_postclose(struct drm_device *dev, struct drm_file *file)
> > +{
> > +       struct lima_drm_priv *priv = file->driver_priv;
> > +
> > +       lima_ctx_mgr_fini(&priv->ctx_mgr);
> > +       lima_vm_put(priv->vm);
> > +       kfree(priv);
> > +}
> > +
> > +static const struct drm_ioctl_desc lima_drm_driver_ioctls[] = {
> > +       DRM_IOCTL_DEF_DRV(LIMA_INFO, lima_ioctl_info, DRM_AUTH|DRM_RENDER_ALLOW),
> > +       DRM_IOCTL_DEF_DRV(LIMA_GEM_CREATE, lima_ioctl_gem_create, DRM_AUTH|DRM_RENDER_ALLOW),
> > +       DRM_IOCTL_DEF_DRV(LIMA_GEM_INFO, lima_ioctl_gem_info, DRM_AUTH|DRM_RENDER_ALLOW),
> > +       DRM_IOCTL_DEF_DRV(LIMA_GEM_SUBMIT, lima_ioctl_gem_submit, DRM_AUTH|DRM_RENDER_ALLOW),
> > +       DRM_IOCTL_DEF_DRV(LIMA_GEM_WAIT, lima_ioctl_gem_wait, DRM_AUTH|DRM_RENDER_ALLOW),
> > +       DRM_IOCTL_DEF_DRV(LIMA_CTX, lima_ioctl_ctx, DRM_AUTH|DRM_RENDER_ALLOW),
> > +};
> > +
> > +static const struct file_operations lima_drm_driver_fops = {
> > +       .owner              = THIS_MODULE,
> > +       .open               = drm_open,
> > +       .release            = drm_release,
> > +       .unlocked_ioctl     = drm_ioctl,
> > +#ifdef CONFIG_COMPAT
> > +       .compat_ioctl       = drm_compat_ioctl,
> > +#endif
> > +       .mmap               = lima_gem_mmap,
> > +};
> > +
> > +static struct drm_driver lima_drm_driver = {
> > +       .driver_features    = DRIVER_RENDER | DRIVER_GEM | DRIVER_PRIME | DRIVER_SYNCOBJ,
> > +       .open               = lima_drm_driver_open,
> > +       .postclose          = lima_drm_driver_postclose,
> > +       .ioctls             = lima_drm_driver_ioctls,
> > +       .num_ioctls         = ARRAY_SIZE(lima_drm_driver_ioctls),
> > +       .fops               = &lima_drm_driver_fops,
> > +       .gem_free_object_unlocked = lima_gem_free_object,
> > +       .gem_open_object    = lima_gem_object_open,
> > +       .gem_close_object   = lima_gem_object_close,
> > +       .gem_vm_ops         = &lima_gem_vm_ops,
> > +       .name               = "lima",
> > +       .desc               = "lima DRM",
> > +       .date               = "20190217",
> > +       .major              = 1,
> > +       .minor              = 0,
> > +       .patchlevel         = 0,
> > +
> > +       .prime_fd_to_handle = drm_gem_prime_fd_to_handle,
> > +       .gem_prime_import_sg_table = lima_gem_prime_import_sg_table,
> > +       .prime_handle_to_fd = drm_gem_prime_handle_to_fd,
> > +       .gem_prime_get_sg_table = lima_gem_prime_get_sg_table,
> > +       .gem_prime_mmap = lima_gem_prime_mmap,
> > +};
> > +
> > +static int lima_pdev_probe(struct platform_device *pdev)
> > +{
> > +       struct lima_device *ldev;
> > +       struct drm_device *ddev;
> > +       int err;
> > +
> > +       ldev = devm_kzalloc(&pdev->dev, sizeof(*ldev), GFP_KERNEL);
> > +       if (!ldev)
> > +               return -ENOMEM;
> > +
> > +       ldev->pdev = pdev;
> > +       ldev->dev = &pdev->dev;
> > +       ldev->id = (enum lima_gpu_id)of_device_get_match_data(&pdev->dev);
> > +
> > +       platform_set_drvdata(pdev, ldev);
> > +
> > +       /* Allocate and initialize the DRM device. */
> > +       ddev = drm_dev_alloc(&lima_drm_driver, &pdev->dev);
> > +       if (IS_ERR(ddev))
> > +               return PTR_ERR(ddev);
> > +
> > +       ddev->dev_private = ldev;
> > +       ldev->ddev = ddev;
> > +
> > +       err = lima_device_init(ldev);
> > +       if (err) {
> > +               dev_err(&pdev->dev, "Fatal error during GPU init\n");
> > +               goto err_out0;
> > +       }
> > +
> > +       /*
> > +        * Register the DRM device with the core and the connectors with
> > +        * sysfs.
> > +        */
> > +       err = drm_dev_register(ddev, 0);
> > +       if (err < 0)
> > +               goto err_out1;
> > +
> > +       return 0;
> > +
> > +err_out1:
> > +       lima_device_fini(ldev);
> > +err_out0:
> > +       drm_dev_put(ddev);
> > +       return err;
> > +}
> > +
> > +static int lima_pdev_remove(struct platform_device *pdev)
> > +{
> > +       struct lima_device *ldev = platform_get_drvdata(pdev);
> > +       struct drm_device *ddev = ldev->ddev;
> > +
> > +       drm_dev_unregister(ddev);
> > +       lima_device_fini(ldev);
> > +       drm_dev_put(ddev);
> > +       return 0;
> > +}
> > +
> > +static const struct of_device_id dt_match[] = {
> > +       { .compatible = "arm,mali-400", .data = (void *)lima_gpu_mali400 },
> > +       { .compatible = "arm,mali-450", .data = (void *)lima_gpu_mali450 },
> > +       {}
> > +};
> > +MODULE_DEVICE_TABLE(of, dt_match);
> > +
> > +static struct platform_driver lima_platform_driver = {
> > +       .probe      = lima_pdev_probe,
> > +       .remove     = lima_pdev_remove,
> > +       .driver     = {
> > +               .name   = "lima",
> > +               .of_match_table = dt_match,
> > +       },
> > +};
> > +
> > +static void lima_check_module_param(void)
> > +{
> > +       if (lima_sched_max_tasks < 4)
> > +               lima_sched_max_tasks = 4;
> > +       else
> > +               lima_sched_max_tasks = roundup_pow_of_two(lima_sched_max_tasks);
> > +}
> > +
> > +static int __init lima_init(void)
> > +{
> > +       int ret;
> > +
> > +       lima_check_module_param();
> > +       ret = lima_sched_slab_init();
> > +       if (ret)
> > +               return ret;
> > +
> > +       ret = platform_driver_register(&lima_platform_driver);
> > +       if (ret)
> > +               lima_sched_slab_fini();
> > +
> > +       return ret;
> > +}
> > +module_init(lima_init);
> > +
> > +static void __exit lima_exit(void)
> > +{
> > +       platform_driver_unregister(&lima_platform_driver);
> > +       lima_sched_slab_fini();
> > +}
> > +module_exit(lima_exit);
> > +
> > +MODULE_AUTHOR("Lima Project Developers");
> > +MODULE_DESCRIPTION("Lima DRM Driver");
> > +MODULE_LICENSE("GPL v2");
> > diff --git a/drivers/gpu/drm/lima/lima_drv.h b/drivers/gpu/drm/lima/lima_drv.h
> > new file mode 100644
> > index 000000000000..640a548cd617
> > --- /dev/null
> > +++ b/drivers/gpu/drm/lima/lima_drv.h
> > @@ -0,0 +1,46 @@
> > +/* SPDX-License-Identifier: GPL-2.0 OR MIT */
> > +/* Copyright 2017-2018 Qiang Yu <yuq825 at gmail.com> */
> > +
> > +#ifndef __LIMA_DRV_H__
> > +#define __LIMA_DRV_H__
> > +
> > +#include <drm/drmP.h>
> > +
> > +#include "lima_ctx.h"
> > +
> > +extern int lima_sched_timeout_ms;
> > +extern int lima_sched_max_tasks;
> > +
> > +struct lima_vm;
> > +struct lima_bo;
> > +struct lima_sched_task;
> > +
> > +struct drm_lima_gem_submit_bo;
> > +
> > +struct lima_drm_priv {
> > +       struct lima_vm *vm;
> > +       struct lima_ctx_mgr ctx_mgr;
> > +};
> > +
> > +struct lima_submit {
> > +       struct lima_ctx *ctx;
> > +       int pipe;
> > +       u32 flags;
> > +
> > +       struct drm_lima_gem_submit_bo *bos;
> > +       struct lima_bo **lbos;
> > +       u32 nr_bos;
> > +
> > +       u32 in_sync[2];
> > +       u32 out_sync;
> > +
> > +       struct lima_sched_task *task;
> > +};
> > +
> > +static inline struct lima_drm_priv *
> > +to_lima_drm_priv(struct drm_file *file)
> > +{
> > +       return file->driver_priv;
> > +}
> > +
> > +#endif
> > diff --git a/drivers/gpu/drm/lima/lima_gem.c b/drivers/gpu/drm/lima/lima_gem.c
> > new file mode 100644
> > index 000000000000..666960345566
> > --- /dev/null
> > +++ b/drivers/gpu/drm/lima/lima_gem.c
> > @@ -0,0 +1,379 @@
> > +// SPDX-License-Identifier: GPL-2.0 OR MIT
> > +/* Copyright 2017-2018 Qiang Yu <yuq825 at gmail.com> */
> > +
> > +#include <drm/drmP.h>
> > +#include <drm/drm_syncobj.h>
> > +#include <drm/drm_utils.h>
> > +#include <linux/sync_file.h>
> > +#include <linux/pfn_t.h>
> > +
> > +#include <drm/lima_drm.h>
> > +
> > +#include "lima_drv.h"
> > +#include "lima_gem.h"
> > +#include "lima_gem_prime.h"
> > +#include "lima_vm.h"
> > +#include "lima_object.h"
> > +
> > +int lima_gem_create_handle(struct drm_device *dev, struct drm_file *file,
> > +                          u32 size, u32 flags, u32 *handle)
> > +{
> > +       int err;
> > +       struct lima_bo *bo;
> > +       struct lima_device *ldev = to_lima_dev(dev);
> > +
> > +       bo = lima_bo_create(ldev, size, flags, NULL, NULL);
> > +       if (IS_ERR(bo))
> > +               return PTR_ERR(bo);
> > +
> > +       err = drm_gem_handle_create(file, &bo->gem, handle);
> > +
> > +       /* drop reference from allocate - handle holds it now */
> > +       drm_gem_object_put_unlocked(&bo->gem);
> > +
> > +       return err;
> > +}
> > +
> > +void lima_gem_free_object(struct drm_gem_object *obj)
> > +{
> > +       struct lima_bo *bo = to_lima_bo(obj);
> > +
> > +       if (!list_empty(&bo->va))
> > +               dev_err(obj->dev->dev, "lima gem free bo still has va\n");
> > +
> > +       lima_bo_destroy(bo);
> > +}
> > +
> > +int lima_gem_object_open(struct drm_gem_object *obj, struct drm_file *file)
> > +{
> > +       struct lima_bo *bo = to_lima_bo(obj);
> > +       struct lima_drm_priv *priv = to_lima_drm_priv(file);
> > +       struct lima_vm *vm = priv->vm;
> > +
> > +       return lima_vm_bo_add(vm, bo, true);
> > +}
> > +
> > +void lima_gem_object_close(struct drm_gem_object *obj, struct drm_file *file)
> > +{
> > +       struct lima_bo *bo = to_lima_bo(obj);
> > +       struct lima_drm_priv *priv = to_lima_drm_priv(file);
> > +       struct lima_vm *vm = priv->vm;
> > +
> > +       lima_vm_bo_del(vm, bo);
> > +}
> > +
> > +int lima_gem_get_info(struct drm_file *file, u32 handle, u32 *va, u64 *offset)
> > +{
> > +       struct drm_gem_object *obj;
> > +       struct lima_bo *bo;
> > +       struct lima_drm_priv *priv = to_lima_drm_priv(file);
> > +       struct lima_vm *vm = priv->vm;
> > +       int err;
> > +
> > +       obj = drm_gem_object_lookup(file, handle);
> > +       if (!obj)
> > +               return -ENOENT;
> > +
> > +       bo = to_lima_bo(obj);
> > +
> > +       *va = lima_vm_get_va(vm, bo);
> > +
> > +       err = drm_gem_create_mmap_offset(obj);
> > +       if (!err)
> > +               *offset = drm_vma_node_offset_addr(&obj->vma_node);
> > +
> > +       drm_gem_object_put_unlocked(obj);
> > +       return err;
> > +}
> > +
> > +static vm_fault_t lima_gem_fault(struct vm_fault *vmf)
> > +{
> > +       struct vm_area_struct *vma = vmf->vma;
> > +       struct drm_gem_object *obj = vma->vm_private_data;
> > +       struct lima_bo *bo = to_lima_bo(obj);
> > +       pfn_t pfn;
> > +       pgoff_t pgoff;
> > +
> > +       /* We don't use vmf->pgoff since that has the fake offset: */
> > +       pgoff = (vmf->address - vma->vm_start) >> PAGE_SHIFT;
> > +       pfn = __pfn_to_pfn_t(page_to_pfn(bo->pages[pgoff]), PFN_DEV);
> > +
> > +       return vmf_insert_mixed(vma, vmf->address, pfn);
> > +}
> > +
> > +const struct vm_operations_struct lima_gem_vm_ops = {
> > +       .fault = lima_gem_fault,
> > +       .open = drm_gem_vm_open,
> > +       .close = drm_gem_vm_close,
> > +};
> > +
> > +void lima_set_vma_flags(struct vm_area_struct *vma)
> > +{
> > +       pgprot_t prot = vm_get_page_prot(vma->vm_flags);
> > +
> > +       vma->vm_flags |= VM_MIXEDMAP;
> > +       vma->vm_flags &= ~VM_PFNMAP;
> > +       vma->vm_page_prot = pgprot_writecombine(prot);
> > +}
> > +
> > +int lima_gem_mmap(struct file *filp, struct vm_area_struct *vma)
> > +{
> > +       int ret;
> > +
> > +       ret = drm_gem_mmap(filp, vma);
> > +       if (ret)
> > +               return ret;
> > +
> > +       lima_set_vma_flags(vma);
> > +       return 0;
> > +}
> > +
> > +static int lima_gem_sync_bo(struct lima_sched_task *task, struct lima_bo *bo,
> > +                           bool write, bool explicit)
> > +{
> > +       int err = 0;
> > +
> > +       if (!write) {
> > +               err = reservation_object_reserve_shared(bo->gem.resv, 1);
> > +               if (err)
> > +                       return err;
> > +       }
> > +
> > +       /* explicit sync use user passed dep fence */
> > +       if (explicit)
> > +               return 0;
> > +
> > +       /* implicit sync use bo fence in resv obj */
> > +       if (write) {
> > +               unsigned nr_fences;
> > +               struct dma_fence **fences;
> > +               int i;
> > +
> > +               err = reservation_object_get_fences_rcu(
> > +                       bo->gem.resv, NULL, &nr_fences, &fences);
> > +               if (err || !nr_fences)
> > +                       return err;
> > +
> > +               for (i = 0; i < nr_fences; i++) {
> > +                       err = lima_sched_task_add_dep(task, fences[i]);
> > +                       if (err)
> > +                               break;
> > +               }
> > +
> > +               /* for error case free remaining fences */
> > +               for ( ; i < nr_fences; i++)
> > +                       dma_fence_put(fences[i]);
> > +
> > +               kfree(fences);
> > +       }
> > +       else {
> > +               struct dma_fence *fence;
> > +               fence = reservation_object_get_excl_rcu(bo->gem.resv);
> > +               if (fence) {
> > +                       err = lima_sched_task_add_dep(task, fence);
> > +                       if (err)
> > +                               dma_fence_put(fence);
> > +               }
> > +       }
> > +
> > +       return err;
> > +}
> > +
> > +static int lima_gem_lock_bos(struct lima_bo **bos, u32 nr_bos,
> > +                            struct ww_acquire_ctx *ctx)
> > +{
> > +       int i, ret = 0, contended, slow_locked = -1;
> > +
> > +       ww_acquire_init(ctx, &reservation_ww_class);
> > +
> > +retry:
> > +       for (i = 0; i < nr_bos; i++) {
> > +               if (i == slow_locked) {
> > +                       slow_locked = -1;
> > +                       continue;
> > +               }
> > +
> > +               ret = ww_mutex_lock_interruptible(&bos[i]->gem.resv->lock, ctx);
> > +               if (ret < 0) {
> > +                       contended = i;
> > +                       goto err;
> > +               }
> > +       }
> > +
> > +       ww_acquire_done(ctx);
> > +       return 0;
> > +
> > +err:
> > +       for (i--; i >= 0; i--)
> > +               ww_mutex_unlock(&bos[i]->gem.resv->lock);
> > +
> > +       if (slow_locked >= 0)
> > +               ww_mutex_unlock(&bos[slow_locked]->gem.resv->lock);
> > +
> > +       if (ret == -EDEADLK) {
> > +               /* we lost out in a seqno race, lock and retry.. */
> > +               ret = ww_mutex_lock_slow_interruptible(
> > +                       &bos[contended]->gem.resv->lock, ctx);
> > +               if (!ret) {
> > +                       slow_locked = contended;
> > +                       goto retry;
> > +               }
> > +       }
> > +       ww_acquire_fini(ctx);
> > +
> > +       return ret;
> > +}
> > +
> > +static void lima_gem_unlock_bos(struct lima_bo **bos, u32 nr_bos,
> > +                               struct ww_acquire_ctx *ctx)
> > +{
> > +       int i;
> > +
> > +       for (i = 0; i < nr_bos; i++)
> > +               ww_mutex_unlock(&bos[i]->gem.resv->lock);
> > +       ww_acquire_fini(ctx);
> > +}
> > +
> > +static int lima_gem_add_deps(struct drm_file *file, struct lima_submit *submit)
> > +{
> > +       int i, err;
> > +
> > +       for (i = 0; i < ARRAY_SIZE(submit->in_sync); i++) {
> > +               struct dma_fence *fence = NULL;
> > +
> > +               if (!submit->in_sync[i])
> > +                       continue;
> > +
> > +               err = drm_syncobj_find_fence(file, submit->in_sync[i],
> > +                                            0, 0, &fence);
> > +               if (err)
> > +                       return err;
> > +
> > +               err = lima_sched_task_add_dep(submit->task, fence);
> > +               if (err) {
> > +                       dma_fence_put(fence);
> > +                       return err;
> > +               }
> > +       }
> > +
> > +       return 0;
> > +}
> > +
> > +int lima_gem_submit(struct drm_file *file, struct lima_submit *submit)
> > +{
> > +       int i, err = 0;
> > +       struct ww_acquire_ctx ctx;
> > +       struct lima_drm_priv *priv = to_lima_drm_priv(file);
> > +       struct lima_vm *vm = priv->vm;
> > +       struct drm_syncobj *out_sync = NULL;
> > +       struct dma_fence *fence;
> > +       struct lima_bo **bos = submit->lbos;
> > +
> > +       if (submit->out_sync) {
> > +               out_sync = drm_syncobj_find(file, submit->out_sync);
> > +               if (!out_sync)
> > +                       return -ENOENT;
> > +       }
> > +
> > +       for (i = 0; i < submit->nr_bos; i++) {
> > +               struct drm_gem_object *obj;
> > +               struct lima_bo *bo;
> > +
> > +               obj = drm_gem_object_lookup(file, submit->bos[i].handle);
> > +               if (!obj) {
> > +                       err = -ENOENT;
> > +                       goto err_out0;
> > +               }
> > +
> > +               bo = to_lima_bo(obj);
> > +
> > +               /* increase refcnt of gpu va map to prevent unmapped when executing,
> > +                * will be decreased when task done */
> > +               err = lima_vm_bo_add(vm, bo, false);
> > +               if (err) {
> > +                       drm_gem_object_put_unlocked(obj);
> > +                       goto err_out0;
> > +               }
> > +
> > +               bos[i] = bo;
> > +       }
> > +
> > +       err = lima_gem_lock_bos(bos, submit->nr_bos, &ctx);
> > +       if (err)
> > +               goto err_out0;
> > +
> > +       err = lima_sched_task_init(
> > +               submit->task, submit->ctx->context + submit->pipe,
> > +               bos, submit->nr_bos, vm);
> > +       if (err)
> > +               goto err_out1;
> > +
> > +       err = lima_gem_add_deps(file, submit);
> > +       if (err)
> > +               goto err_out2;
> > +
> > +       for (i = 0; i < submit->nr_bos; i++) {
> > +               err = lima_gem_sync_bo(
> > +                       submit->task, bos[i],
> > +                       submit->bos[i].flags & LIMA_SUBMIT_BO_WRITE,
> > +                       submit->flags & LIMA_SUBMIT_FLAG_EXPLICIT_FENCE);
> > +               if (err)
> > +                       goto err_out2;
> > +       }
> > +
> > +       fence = lima_sched_context_queue_task(
> > +               submit->ctx->context + submit->pipe, submit->task);
> > +
> > +       for (i = 0; i < submit->nr_bos; i++) {
> > +               if (submit->bos[i].flags & LIMA_SUBMIT_BO_WRITE)
> > +                       reservation_object_add_excl_fence(bos[i]->gem.resv, fence);
> > +               else
> > +                       reservation_object_add_shared_fence(bos[i]->gem.resv, fence);
> > +       }
> > +
> > +       lima_gem_unlock_bos(bos, submit->nr_bos, &ctx);
> > +
> > +       for (i = 0; i < submit->nr_bos; i++)
> > +               drm_gem_object_put_unlocked(&bos[i]->gem);
> > +
> > +       if (out_sync) {
> > +               drm_syncobj_replace_fence(out_sync, fence);
> > +               drm_syncobj_put(out_sync);
> > +       }
> > +
> > +       dma_fence_put(fence);
> > +
> > +       return 0;
> > +
> > +err_out2:
> > +       lima_sched_task_fini(submit->task);
> > +err_out1:
> > +       lima_gem_unlock_bos(bos, submit->nr_bos, &ctx);
> > +err_out0:
> > +       for (i = 0; i < submit->nr_bos; i++) {
> > +               if (!bos[i])
> > +                       break;
> > +               lima_vm_bo_del(vm, bos[i]);
> > +               drm_gem_object_put_unlocked(&bos[i]->gem);
> > +       }
> > +       if (out_sync)
> > +               drm_syncobj_put(out_sync);
> > +       return err;
> > +}
> > +
> > +int lima_gem_wait(struct drm_file *file, u32 handle, u32 op, s64 timeout_ns)
> > +{
> > +       bool write = op & LIMA_GEM_WAIT_WRITE;
> > +       long ret, timeout;
> > +
> > +       if (!op)
> > +               return 0;
> > +
> > +       timeout = drm_timeout_abs_to_jiffies(timeout_ns);
> > +
> > +       ret = drm_gem_reservation_object_wait(file, handle, write, timeout);
> > +       if (ret == 0)
> > +               ret = timeout ? -ETIMEDOUT : -EBUSY;
> > +
> > +       return ret;
> > +}
> > diff --git a/drivers/gpu/drm/lima/lima_gem.h b/drivers/gpu/drm/lima/lima_gem.h
> > new file mode 100644
> > index 000000000000..f1c4658100a8
> > --- /dev/null
> > +++ b/drivers/gpu/drm/lima/lima_gem.h
> > @@ -0,0 +1,25 @@
> > +/* SPDX-License-Identifier: GPL-2.0 OR MIT */
> > +/* Copyright 2017-2018 Qiang Yu <yuq825 at gmail.com> */
> > +
> > +#ifndef __LIMA_GEM_H__
> > +#define __LIMA_GEM_H__
> > +
> > +struct lima_bo;
> > +struct lima_submit;
> > +
> > +extern const struct vm_operations_struct lima_gem_vm_ops;
> > +
> > +struct lima_bo *lima_gem_create_bo(struct drm_device *dev, u32 size, u32 flags);
> > +int lima_gem_create_handle(struct drm_device *dev, struct drm_file *file,
> > +                          u32 size, u32 flags, u32 *handle);
> > +void lima_gem_free_object(struct drm_gem_object *obj);
> > +int lima_gem_object_open(struct drm_gem_object *obj, struct drm_file *file);
> > +void lima_gem_object_close(struct drm_gem_object *obj, struct drm_file *file);
> > +int lima_gem_get_info(struct drm_file *file, u32 handle, u32 *va, u64 *offset);
> > +int lima_gem_mmap(struct file *filp, struct vm_area_struct *vma);
> > +int lima_gem_submit(struct drm_file *file, struct lima_submit *submit);
> > +int lima_gem_wait(struct drm_file *file, u32 handle, u32 op, s64 timeout_ns);
> > +
> > +void lima_set_vma_flags(struct vm_area_struct *vma);
> > +
> > +#endif
> > diff --git a/drivers/gpu/drm/lima/lima_gem_prime.c b/drivers/gpu/drm/lima/lima_gem_prime.c
> > new file mode 100644
> > index 000000000000..fe8348a055f6
> > --- /dev/null
> > +++ b/drivers/gpu/drm/lima/lima_gem_prime.c
> > @@ -0,0 +1,47 @@
> > +// SPDX-License-Identifier: GPL-2.0 OR MIT
> > +/* Copyright 2018 Qiang Yu <yuq825 at gmail.com> */
> > +
> > +#include <linux/dma-buf.h>
> > +#include <drm/drm_prime.h>
> > +#include <drm/drm_drv.h>
> > +#include <drm/drm_file.h>
> > +
> > +#include "lima_device.h"
> > +#include "lima_object.h"
> > +#include "lima_gem.h"
> > +#include "lima_gem_prime.h"
> > +
> > +struct drm_gem_object *lima_gem_prime_import_sg_table(
> > +       struct drm_device *dev, struct dma_buf_attachment *attach,
> > +       struct sg_table *sgt)
> > +{
> > +       struct lima_device *ldev = to_lima_dev(dev);
> > +       struct lima_bo *bo;
> > +
> > +       bo = lima_bo_create(ldev, attach->dmabuf->size, 0, sgt,
> > +                           attach->dmabuf->resv);
> > +       if (IS_ERR(bo))
> > +               return ERR_CAST(bo);
> > +
> > +       return &bo->gem;
> > +}
> > +
> > +struct sg_table *lima_gem_prime_get_sg_table(struct drm_gem_object *obj)
> > +{
> > +       struct lima_bo *bo = to_lima_bo(obj);
> > +       int npages = obj->size >> PAGE_SHIFT;
> > +
> > +       return drm_prime_pages_to_sg(bo->pages, npages);
> > +}
> > +
> > +int lima_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
> > +{
> > +       int ret;
> > +
> > +       ret = drm_gem_mmap_obj(obj, obj->size, vma);
> > +       if (ret)
> > +               return ret;
> > +
> > +       lima_set_vma_flags(vma);
> > +       return 0;
> > +}
> > diff --git a/drivers/gpu/drm/lima/lima_gem_prime.h b/drivers/gpu/drm/lima/lima_gem_prime.h
> > new file mode 100644
> > index 000000000000..ceb1be9840a5
> > --- /dev/null
> > +++ b/drivers/gpu/drm/lima/lima_gem_prime.h
> > @@ -0,0 +1,13 @@
> > +/* SPDX-License-Identifier: GPL-2.0 OR MIT */
> > +/* Copyright 2018 Qiang Yu <yuq825 at gmail.com> */
> > +
> > +#ifndef __LIMA_GEM_PRIME_H__
> > +#define __LIMA_GEM_PRIME_H__
> > +
> > +struct drm_gem_object *lima_gem_prime_import_sg_table(
> > +       struct drm_device *dev, struct dma_buf_attachment *attach,
> > +       struct sg_table *sgt);
> > +struct sg_table *lima_gem_prime_get_sg_table(struct drm_gem_object *obj);
> > +int lima_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);
> > +
> > +#endif
> > diff --git a/drivers/gpu/drm/lima/lima_gp.c b/drivers/gpu/drm/lima/lima_gp.c
> > new file mode 100644
> > index 000000000000..4f4e9f5f7e19
> > --- /dev/null
> > +++ b/drivers/gpu/drm/lima/lima_gp.c
> > @@ -0,0 +1,282 @@
> > +// SPDX-License-Identifier: GPL-2.0 OR MIT
> > +/* Copyright 2017-2018 Qiang Yu <yuq825 at gmail.com> */
> > +
> > +#include <linux/interrupt.h>
> > +#include <linux/iopoll.h>
> > +#include <linux/device.h>
> > +#include <linux/slab.h>
> > +
> > +#include <drm/lima_drm.h>
> > +
> > +#include "lima_device.h"
> > +#include "lima_gp.h"
> > +#include "lima_regs.h"
> > +
> > +#define gp_write(reg, data) writel(data, ip->iomem + reg)
> > +#define gp_read(reg) readl(ip->iomem + reg)
> > +
> > +static irqreturn_t lima_gp_irq_handler(int irq, void *data)
> > +{
> > +       struct lima_ip *ip = data;
> > +       struct lima_device *dev = ip->dev;
> > +       struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_gp;
> > +       u32 state = gp_read(LIMA_GP_INT_STAT);
> > +       u32 status = gp_read(LIMA_GP_STATUS);
> > +       bool done = false;
> > +
> > +       /* for shared irq case */
> > +       if (!state)
> > +               return IRQ_NONE;
> > +
> > +       if (state & LIMA_GP_IRQ_MASK_ERROR) {
> > +               dev_err(dev->dev, "gp error irq state=%x status=%x\n",
> > +                       state, status);
> > +
> > +               /* mask all interrupts before hard reset */
> > +               gp_write(LIMA_GP_INT_MASK, 0);
> > +
> > +               pipe->error = true;
> > +               done = true;
> > +       }
> > +       else {
> > +               bool valid = state & (LIMA_GP_IRQ_VS_END_CMD_LST |
> > +                                     LIMA_GP_IRQ_PLBU_END_CMD_LST);
> > +               bool active = status & (LIMA_GP_STATUS_VS_ACTIVE |
> > +                                       LIMA_GP_STATUS_PLBU_ACTIVE);
> > +               done = valid && !active;
> > +       }
> > +
> > +       gp_write(LIMA_GP_INT_CLEAR, state);
> > +
> > +       if (done)
> > +               lima_sched_pipe_task_done(pipe);
> > +
> > +       return IRQ_HANDLED;
> > +}
> > +
> > +static void lima_gp_soft_reset_async(struct lima_ip *ip)
> > +{
> > +       if (ip->data.async_reset)
> > +               return;
> > +
> > +       gp_write(LIMA_GP_INT_MASK, 0);
> > +       gp_write(LIMA_GP_INT_CLEAR, LIMA_GP_IRQ_RESET_COMPLETED);
> > +       gp_write(LIMA_GP_CMD, LIMA_GP_CMD_SOFT_RESET);
> > +       ip->data.async_reset = true;
> > +}
> > +
> > +static int lima_gp_soft_reset_async_wait(struct lima_ip *ip)
> > +{
> > +       struct lima_device *dev = ip->dev;
> > +       int err;
> > +       u32 v;
> > +
> > +       if (!ip->data.async_reset)
> > +               return 0;
> > +
> > +       err = readl_poll_timeout(ip->iomem + LIMA_GP_INT_RAWSTAT, v,
> > +                                v & LIMA_GP_IRQ_RESET_COMPLETED,
> > +                                0, 100);
> > +       if (err) {
> > +               dev_err(dev->dev, "gp soft reset time out\n");
> > +               return err;
> > +       }
> > +
> > +       gp_write(LIMA_GP_INT_CLEAR, LIMA_GP_IRQ_MASK_ALL);
> > +       gp_write(LIMA_GP_INT_MASK, LIMA_GP_IRQ_MASK_USED);
> > +
> > +       ip->data.async_reset = false;
> > +       return 0;
> > +}
> > +
> > +static int lima_gp_task_validate(struct lima_sched_pipe *pipe,
> > +                                struct lima_sched_task *task)
> > +{
> > +       struct drm_lima_gp_frame *frame = task->frame;
> > +       u32 *f = frame->frame;
> > +       (void)pipe;
> > +
> > +       if (f[LIMA_GP_VSCL_START_ADDR >> 2] >
> > +           f[LIMA_GP_VSCL_END_ADDR >> 2] ||
> > +           f[LIMA_GP_PLBUCL_START_ADDR >> 2] >
> > +           f[LIMA_GP_PLBUCL_END_ADDR >> 2] ||
> > +           f[LIMA_GP_PLBU_ALLOC_START_ADDR >> 2] >
> > +           f[LIMA_GP_PLBU_ALLOC_END_ADDR >> 2])
> > +               return -EINVAL;
> > +
> > +       if (f[LIMA_GP_VSCL_START_ADDR >> 2] ==
> > +           f[LIMA_GP_VSCL_END_ADDR >> 2] &&
> > +           f[LIMA_GP_PLBUCL_START_ADDR >> 2] ==
> > +           f[LIMA_GP_PLBUCL_END_ADDR >> 2])
> > +               return -EINVAL;
> > +
> > +       return 0;
> > +}
> > +
> > +static void lima_gp_task_run(struct lima_sched_pipe *pipe,
> > +                            struct lima_sched_task *task)
> > +{
> > +       struct lima_ip *ip = pipe->processor[0];
> > +       struct drm_lima_gp_frame *frame = task->frame;
> > +       u32 *f = frame->frame;
> > +       u32 cmd = 0;
> > +       int i;
> > +
> > +       if (f[LIMA_GP_VSCL_START_ADDR >> 2] !=
> > +           f[LIMA_GP_VSCL_END_ADDR >> 2])
> > +               cmd |= LIMA_GP_CMD_START_VS;
> > +       if (f[LIMA_GP_PLBUCL_START_ADDR >> 2] !=
> > +           f[LIMA_GP_PLBUCL_END_ADDR >> 2])
> > +               cmd |= LIMA_GP_CMD_START_PLBU;
> > +
> > +       /* before any hw ops, wait last success task async soft reset */
> > +       lima_gp_soft_reset_async_wait(ip);
> > +
> > +       for (i = 0; i < LIMA_GP_FRAME_REG_NUM; i++)
> > +               writel(f[i], ip->iomem + LIMA_GP_VSCL_START_ADDR + i * 4);
> > +
> > +       gp_write(LIMA_GP_CMD, LIMA_GP_CMD_UPDATE_PLBU_ALLOC);
> > +       gp_write(LIMA_GP_CMD, cmd);
> > +}
> > +
> > +static int lima_gp_hard_reset_poll(struct lima_ip *ip)
> > +{
> > +       gp_write(LIMA_GP_PERF_CNT_0_LIMIT, 0xC01A0000);
> > +       return gp_read(LIMA_GP_PERF_CNT_0_LIMIT) == 0xC01A0000;
> > +}
> > +
> > +static int lima_gp_hard_reset(struct lima_ip *ip)
> > +{
> > +       struct lima_device *dev = ip->dev;
> > +       int ret;
> > +
> > +       gp_write(LIMA_GP_PERF_CNT_0_LIMIT, 0xC0FFE000);
> > +       gp_write(LIMA_GP_INT_MASK, 0);
> > +       gp_write(LIMA_GP_CMD, LIMA_GP_CMD_RESET);
> > +       ret = lima_poll_timeout(ip, lima_gp_hard_reset_poll, 10, 100);
> > +       if (ret) {
> > +               dev_err(dev->dev, "gp hard reset timeout\n");
> > +               return ret;
> > +       }
> > +
> > +       gp_write(LIMA_GP_PERF_CNT_0_LIMIT, 0);
> > +       gp_write(LIMA_GP_INT_CLEAR, LIMA_GP_IRQ_MASK_ALL);
> > +       gp_write(LIMA_GP_INT_MASK, LIMA_GP_IRQ_MASK_USED);
> > +       return 0;
> > +}
> > +
> > +static void lima_gp_task_fini(struct lima_sched_pipe *pipe)
> > +{
> > +       lima_gp_soft_reset_async(pipe->processor[0]);
> > +}
> > +
> > +static void lima_gp_task_error(struct lima_sched_pipe *pipe)
> > +{
> > +       struct lima_ip *ip = pipe->processor[0];
> > +
> > +       dev_err(ip->dev->dev, "gp task error int_state=%x status=%x\n",
> > +               gp_read(LIMA_GP_INT_STAT), gp_read(LIMA_GP_STATUS));
> > +
> > +       lima_gp_hard_reset(ip);
> > +}
> > +
> > +static void lima_gp_task_mmu_error(struct lima_sched_pipe *pipe)
> > +{
> > +       lima_sched_pipe_task_done(pipe);
> > +}
> > +
> > +static void lima_gp_print_version(struct lima_ip *ip)
> > +{
> > +       u32 version, major, minor;
> > +       char *name;
> > +
> > +       version = gp_read(LIMA_GP_VERSION);
> > +       major = (version >> 8) & 0xFF;
> > +       minor = version & 0xFF;
> > +       switch (version >> 16) {
> > +       case 0xA07:
> > +           name = "mali200";
> > +               break;
> > +       case 0xC07:
> > +               name = "mali300";
> > +               break;
> > +       case 0xB07:
> > +               name = "mali400";
> > +               break;
> > +       case 0xD07:
> > +               name = "mali450";
> > +               break;
> > +       default:
> > +               name = "unknow";
> > +               break;
> > +       }
> > +       dev_info(ip->dev->dev, "%s - %s version major %d minor %d\n",
> > +                lima_ip_name(ip), name, major, minor);
> > +}
> > +
> > +static struct kmem_cache *lima_gp_task_slab = NULL;
> > +static int lima_gp_task_slab_refcnt = 0;
> > +
> > +int lima_gp_init(struct lima_ip *ip)
> > +{
> > +       struct lima_device *dev = ip->dev;
> > +       int err;
> > +
> > +       lima_gp_print_version(ip);
> > +
> > +       ip->data.async_reset = false;
> > +       lima_gp_soft_reset_async(ip);
> > +       err = lima_gp_soft_reset_async_wait(ip);
> > +       if (err)
> > +               return err;
> > +
> > +       err = devm_request_irq(dev->dev, ip->irq, lima_gp_irq_handler,
> > +                              IRQF_SHARED, lima_ip_name(ip), ip);
> > +       if (err) {
> > +               dev_err(dev->dev, "gp %s fail to request irq\n",
> > +                       lima_ip_name(ip));
> > +               return err;
> > +       }
> > +
> > +       return 0;
> > +}
> > +
> > +void lima_gp_fini(struct lima_ip *ip)
> > +{
> > +
> > +}
> > +
> > +int lima_gp_pipe_init(struct lima_device *dev)
> > +{
> > +       int frame_size = sizeof(struct drm_lima_gp_frame);
> > +       struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_gp;
> > +
> > +       if (!lima_gp_task_slab) {
> > +               lima_gp_task_slab = kmem_cache_create_usercopy(
> > +                       "lima_gp_task", sizeof(struct lima_sched_task) + frame_size,
> > +                       0, SLAB_HWCACHE_ALIGN, sizeof(struct lima_sched_task),
> > +                       frame_size, NULL);
> > +               if (!lima_gp_task_slab)
> > +                       return -ENOMEM;
> > +       }
> > +       lima_gp_task_slab_refcnt++;
> > +
> > +       pipe->frame_size = frame_size;
> > +       pipe->task_slab = lima_gp_task_slab;
> > +
> > +       pipe->task_validate = lima_gp_task_validate;
> > +       pipe->task_run = lima_gp_task_run;
> > +       pipe->task_fini = lima_gp_task_fini;
> > +       pipe->task_error = lima_gp_task_error;
> > +       pipe->task_mmu_error = lima_gp_task_mmu_error;
> > +
> > +       return 0;
> > +}
> > +
> > +void lima_gp_pipe_fini(struct lima_device *dev)
> > +{
> > +       if (!--lima_gp_task_slab_refcnt) {
> > +               kmem_cache_destroy(lima_gp_task_slab);
> > +               lima_gp_task_slab = NULL;
> > +       }
> > +}
> > diff --git a/drivers/gpu/drm/lima/lima_gp.h b/drivers/gpu/drm/lima/lima_gp.h
> > new file mode 100644
> > index 000000000000..55bc48ec7603
> > --- /dev/null
> > +++ b/drivers/gpu/drm/lima/lima_gp.h
> > @@ -0,0 +1,16 @@
> > +/* SPDX-License-Identifier: GPL-2.0 OR MIT */
> > +/* Copyright 2017-2018 Qiang Yu <yuq825 at gmail.com> */
> > +
> > +#ifndef __LIMA_GP_H__
> > +#define __LIMA_GP_H__
> > +
> > +struct lima_ip;
> > +struct lima_device;
> > +
> > +int lima_gp_init(struct lima_ip *ip);
> > +void lima_gp_fini(struct lima_ip *ip);
> > +
> > +int lima_gp_pipe_init(struct lima_device *dev);
> > +void lima_gp_pipe_fini(struct lima_device *dev);
> > +
> > +#endif
> > diff --git a/drivers/gpu/drm/lima/lima_l2_cache.c b/drivers/gpu/drm/lima/lima_l2_cache.c
> > new file mode 100644
> > index 000000000000..2ba4786f9ec7
> > --- /dev/null
> > +++ b/drivers/gpu/drm/lima/lima_l2_cache.c
> > @@ -0,0 +1,80 @@
> > +// SPDX-License-Identifier: GPL-2.0 OR MIT
> > +/* Copyright 2017-2018 Qiang Yu <yuq825 at gmail.com> */
> > +
> > +#include <linux/iopoll.h>
> > +#include <linux/device.h>
> > +
> > +#include "lima_device.h"
> > +#include "lima_l2_cache.h"
> > +#include "lima_regs.h"
> > +
> > +#define l2_cache_write(reg, data) writel(data, ip->iomem + reg)
> > +#define l2_cache_read(reg) readl(ip->iomem + reg)
> > +
> > +static int lima_l2_cache_wait_idle(struct lima_ip *ip)
> > +{
> > +       struct lima_device *dev = ip->dev;
> > +       int err;
> > +       u32 v;
> > +
> > +       err = readl_poll_timeout(ip->iomem + LIMA_L2_CACHE_STATUS, v,
> > +                                !(v & LIMA_L2_CACHE_STATUS_COMMAND_BUSY),
> > +                                0, 1000);
> > +       if (err) {
> > +           dev_err(dev->dev, "l2 cache wait command timeout\n");
> > +           return err;
> > +       }
> > +       return 0;
> > +}
> > +
> > +int lima_l2_cache_flush(struct lima_ip *ip)
> > +{
> > +       int ret;
> > +
> > +       spin_lock(&ip->data.lock);
> > +       l2_cache_write(LIMA_L2_CACHE_COMMAND, LIMA_L2_CACHE_COMMAND_CLEAR_ALL);
> > +       ret = lima_l2_cache_wait_idle(ip);
> > +       spin_unlock(&ip->data.lock);
> > +       return ret;
> > +}
> > +
> > +int lima_l2_cache_init(struct lima_ip *ip)
> > +{
> > +       int i, err;
> > +       u32 size;
> > +       struct lima_device *dev = ip->dev;
> > +
> > +       /* l2_cache2 only exists when one of PP4-7 present */
> > +       if (ip->id == lima_ip_l2_cache2) {
> > +               for (i = lima_ip_pp4; i <= lima_ip_pp7; i++) {
> > +                       if (dev->ip[i].present)
> > +                               break;
> > +               }
> > +               if (i > lima_ip_pp7)
> > +                       return -ENODEV;
> > +       }
> > +
> > +       spin_lock_init(&ip->data.lock);
> > +
> > +       size = l2_cache_read(LIMA_L2_CACHE_SIZE);
> > +       dev_info(dev->dev, "l2 cache %uK, %u-way, %ubyte cache line, %ubit external bus\n",
> > +                1 << (((size >> 16) & 0xff) - 10),
> > +                1 << ((size >> 8) & 0xff),
> > +                1 << (size & 0xff),
> > +                1 << ((size >> 24) & 0xff));
> > +
> > +       err = lima_l2_cache_flush(ip);
> > +       if (err)
> > +               return err;
> > +
> > +       l2_cache_write(LIMA_L2_CACHE_ENABLE,
> > +                      LIMA_L2_CACHE_ENABLE_ACCESS|LIMA_L2_CACHE_ENABLE_READ_ALLOCATE);
> > +       l2_cache_write(LIMA_L2_CACHE_MAX_READS, 0x1c);
> > +
> > +       return 0;
> > +}
> > +
> > +void lima_l2_cache_fini(struct lima_ip *ip)
> > +{
> > +
> > +}
> > diff --git a/drivers/gpu/drm/lima/lima_l2_cache.h b/drivers/gpu/drm/lima/lima_l2_cache.h
> > new file mode 100644
> > index 000000000000..2ff91eafefbe
> > --- /dev/null
> > +++ b/drivers/gpu/drm/lima/lima_l2_cache.h
> > @@ -0,0 +1,14 @@
> > +/* SPDX-License-Identifier: GPL-2.0 OR MIT */
> > +/* Copyright 2017-2018 Qiang Yu <yuq825 at gmail.com> */
> > +
> > +#ifndef __LIMA_L2_CACHE_H__
> > +#define __LIMA_L2_CACHE_H__
> > +
> > +struct lima_ip;
> > +
> > +int lima_l2_cache_init(struct lima_ip *ip);
> > +void lima_l2_cache_fini(struct lima_ip *ip);
> > +
> > +int lima_l2_cache_flush(struct lima_ip *ip);
> > +
> > +#endif
> > diff --git a/drivers/gpu/drm/lima/lima_mmu.c b/drivers/gpu/drm/lima/lima_mmu.c
> > new file mode 100644
> > index 000000000000..c6c151d33cf8
> > --- /dev/null
> > +++ b/drivers/gpu/drm/lima/lima_mmu.c
> > @@ -0,0 +1,142 @@
> > +// SPDX-License-Identifier: GPL-2.0 OR MIT
> > +/* Copyright 2017-2018 Qiang Yu <yuq825 at gmail.com> */
> > +
> > +#include <linux/interrupt.h>
> > +#include <linux/iopoll.h>
> > +#include <linux/device.h>
> > +
> > +#include "lima_device.h"
> > +#include "lima_mmu.h"
> > +#include "lima_vm.h"
> > +#include "lima_object.h"
> > +#include "lima_regs.h"
> > +
> > +#define mmu_write(reg, data) writel(data, ip->iomem + reg)
> > +#define mmu_read(reg) readl(ip->iomem + reg)
> > +
> > +#define lima_mmu_send_command(cmd, addr, val, cond)         \
> > +({                                                          \
> > +       int __ret;                                           \
> > +                                                            \
> > +       mmu_write(LIMA_MMU_COMMAND, cmd);                    \
> > +       __ret = readl_poll_timeout(ip->iomem + (addr), val,  \
> > +                                 cond, 0, 100);             \
> > +       if (__ret)                                           \
> > +               dev_err(dev->dev,                            \
> > +                       "mmu command %x timeout\n", cmd);    \
> > +       __ret;                                               \
> > +})
> > +
> > +static irqreturn_t lima_mmu_irq_handler(int irq, void *data)
> > +{
> > +       struct lima_ip *ip = data;
> > +       struct lima_device *dev = ip->dev;
> > +       u32 status = mmu_read(LIMA_MMU_INT_STATUS);
> > +       struct lima_sched_pipe *pipe;
> > +
> > +       /* for shared irq case */
> > +       if (!status)
> > +               return IRQ_NONE;
> > +
> > +       if (status & LIMA_MMU_INT_PAGE_FAULT) {
> > +               u32 fault = mmu_read(LIMA_MMU_PAGE_FAULT_ADDR);
> > +               dev_err(dev->dev, "mmu page fault at 0x%x from bus id %d of type %s on %s\n",
> > +                       fault, LIMA_MMU_STATUS_BUS_ID(status),
> > +                       status & LIMA_MMU_STATUS_PAGE_FAULT_IS_WRITE ? "write" : "read",
> > +                       lima_ip_name(ip));
> > +       }
> > +
> > +       if (status & LIMA_MMU_INT_READ_BUS_ERROR) {
> > +               dev_err(dev->dev, "mmu %s irq bus error\n", lima_ip_name(ip));
> > +       }
> > +
> > +       /* mask all interrupts before resume */
> > +       mmu_write(LIMA_MMU_INT_MASK, 0);
> > +       mmu_write(LIMA_MMU_INT_CLEAR, status);
> > +
> > +       pipe = dev->pipe + (ip->id == lima_ip_gpmmu ? lima_pipe_gp : lima_pipe_pp);
> > +       lima_sched_pipe_mmu_error(pipe);
> > +
> > +       return IRQ_HANDLED;
> > +}
> > +
> > +int lima_mmu_init(struct lima_ip *ip)
> > +{
> > +       struct lima_device *dev = ip->dev;
> > +       int err;
> > +       u32 v;
> > +
> > +       if (ip->id == lima_ip_ppmmu_bcast)
> > +               return 0;
> > +
> > +       mmu_write(LIMA_MMU_DTE_ADDR, 0xCAFEBABE);
> > +       if (mmu_read(LIMA_MMU_DTE_ADDR) != 0xCAFEB000) {
> > +               dev_err(dev->dev, "mmu %s dte write test fail\n", lima_ip_name(ip));
> > +               return -EIO;
> > +       }
> > +
> > +       mmu_write(LIMA_MMU_COMMAND, LIMA_MMU_COMMAND_HARD_RESET);
> > +       err = lima_mmu_send_command(LIMA_MMU_COMMAND_HARD_RESET,
> > +                                   LIMA_MMU_DTE_ADDR, v, v == 0);
> > +       if (err)
> > +               return err;
> > +
> > +       err = devm_request_irq(dev->dev, ip->irq, lima_mmu_irq_handler,
> > +                              IRQF_SHARED, lima_ip_name(ip), ip);
> > +       if (err) {
> > +               dev_err(dev->dev, "mmu %s fail to request irq\n", lima_ip_name(ip));
> > +               return err;
> > +       }
> > +
> > +       mmu_write(LIMA_MMU_INT_MASK, LIMA_MMU_INT_PAGE_FAULT | LIMA_MMU_INT_READ_BUS_ERROR);
> > +       mmu_write(LIMA_MMU_DTE_ADDR, dev->empty_vm->pd.dma);
> > +       return lima_mmu_send_command(LIMA_MMU_COMMAND_ENABLE_PAGING,
> > +                                    LIMA_MMU_STATUS, v,
> > +                                    v & LIMA_MMU_STATUS_PAGING_ENABLED);
> > +}
> > +
> > +void lima_mmu_fini(struct lima_ip *ip)
> > +{
> > +
> > +}
> > +
> > +void lima_mmu_switch_vm(struct lima_ip *ip, struct lima_vm *vm)
> > +{
> > +       struct lima_device *dev = ip->dev;
> > +       u32 v;
> > +
> > +       lima_mmu_send_command(LIMA_MMU_COMMAND_ENABLE_STALL,
> > +                             LIMA_MMU_STATUS, v,
> > +                             v & LIMA_MMU_STATUS_STALL_ACTIVE);
> > +
> > +       if (vm)
> > +               mmu_write(LIMA_MMU_DTE_ADDR, vm->pd.dma);
> > +
> > +       /* flush the TLB */
> > +       mmu_write(LIMA_MMU_COMMAND, LIMA_MMU_COMMAND_ZAP_CACHE);
> > +
> > +       lima_mmu_send_command(LIMA_MMU_COMMAND_DISABLE_STALL,
> > +                             LIMA_MMU_STATUS, v,
> > +                             !(v & LIMA_MMU_STATUS_STALL_ACTIVE));
> > +}
> > +
> > +void lima_mmu_page_fault_resume(struct lima_ip *ip)
> > +{
> > +       struct lima_device *dev = ip->dev;
> > +       u32 status = mmu_read(LIMA_MMU_STATUS);
> > +       u32 v;
> > +
> > +       if (status & LIMA_MMU_STATUS_PAGE_FAULT_ACTIVE) {
> > +               dev_info(dev->dev, "mmu resume\n");
> > +
> > +               mmu_write(LIMA_MMU_INT_MASK, 0);
> > +               mmu_write(LIMA_MMU_DTE_ADDR, 0xCAFEBABE);
> > +               lima_mmu_send_command(LIMA_MMU_COMMAND_HARD_RESET,
> > +                                     LIMA_MMU_DTE_ADDR, v, v == 0);
> > +               mmu_write(LIMA_MMU_INT_MASK, LIMA_MMU_INT_PAGE_FAULT | LIMA_MMU_INT_READ_BUS_ERROR);
> > +               mmu_write(LIMA_MMU_DTE_ADDR, dev->empty_vm->pd.dma);
> > +               lima_mmu_send_command(LIMA_MMU_COMMAND_ENABLE_PAGING,
> > +                                     LIMA_MMU_STATUS, v,
> > +                                     v & LIMA_MMU_STATUS_PAGING_ENABLED);
> > +       }
> > +}
> > diff --git a/drivers/gpu/drm/lima/lima_mmu.h b/drivers/gpu/drm/lima/lima_mmu.h
> > new file mode 100644
> > index 000000000000..ca173b60fc73
> > --- /dev/null
> > +++ b/drivers/gpu/drm/lima/lima_mmu.h
> > @@ -0,0 +1,16 @@
> > +/* SPDX-License-Identifier: GPL-2.0 OR MIT */
> > +/* Copyright 2017-2018 Qiang Yu <yuq825 at gmail.com> */
> > +
> > +#ifndef __LIMA_MMU_H__
> > +#define __LIMA_MMU_H__
> > +
> > +struct lima_ip;
> > +struct lima_vm;
> > +
> > +int lima_mmu_init(struct lima_ip *ip);
> > +void lima_mmu_fini(struct lima_ip *ip);
> > +
> > +void lima_mmu_switch_vm(struct lima_ip *ip, struct lima_vm *vm);
> > +void lima_mmu_page_fault_resume(struct lima_ip *ip);
> > +
> > +#endif
> > diff --git a/drivers/gpu/drm/lima/lima_object.c b/drivers/gpu/drm/lima/lima_object.c
> > new file mode 100644
> > index 000000000000..28ff1b8e1dca
> > --- /dev/null
> > +++ b/drivers/gpu/drm/lima/lima_object.c
> > @@ -0,0 +1,124 @@
> > +// SPDX-License-Identifier: GPL-2.0 OR MIT
> > +/* Copyright 2018 Qiang Yu <yuq825 at gmail.com> */
> > +
> > +#include <drm/drm_prime.h>
> > +#include <linux/pagemap.h>
> > +#include <linux/dma-mapping.h>
> > +
> > +#include "lima_object.h"
> > +
> > +void lima_bo_destroy(struct lima_bo *bo)
> > +{
> > +        if (bo->sgt) {
> > +               kfree(bo->pages);
> > +               drm_prime_gem_destroy(&bo->gem, bo->sgt);
> > +       }
> > +       else {
> > +               if (bo->pages_dma_addr) {
> > +                       int i, npages = bo->gem.size >> PAGE_SHIFT;
> > +
> > +                       for (i = 0; i < npages; i++) {
> > +                               if (bo->pages_dma_addr[i])
> > +                                       dma_unmap_page(bo->gem.dev->dev,
> > +                                                      bo->pages_dma_addr[i],
> > +                                                      PAGE_SIZE, DMA_BIDIRECTIONAL);
> > +                       }
> > +               }
> > +
> > +               if (bo->pages)
> > +                       drm_gem_put_pages(&bo->gem, bo->pages, true, true);
> > +       }
> > +
> > +       kfree(bo->pages_dma_addr);
> > +       drm_gem_object_release(&bo->gem);
> > +       kfree(bo);
> > +}
> > +
> > +static struct lima_bo *lima_bo_create_struct(struct lima_device *dev, u32 size, u32 flags,
> > +                                            struct reservation_object *resv)
> > +{
> > +       struct lima_bo *bo;
> > +       int err;
> > +
> > +       size = PAGE_ALIGN(size);
> > +
> > +       bo = kzalloc(sizeof(*bo), GFP_KERNEL);
> > +       if (!bo)
> > +               return ERR_PTR(-ENOMEM);
> > +
> > +       mutex_init(&bo->lock);
> > +       INIT_LIST_HEAD(&bo->va);
> > +       bo->gem.resv = resv;
> > +
> > +       err = drm_gem_object_init(dev->ddev, &bo->gem, size);
> > +       if (err) {
> > +               kfree(bo);
> > +               return ERR_PTR(err);
> > +       }
> > +
> > +       return bo;
> > +}
> > +
> > +struct lima_bo *lima_bo_create(struct lima_device *dev, u32 size,
> > +                              u32 flags, struct sg_table *sgt,
> > +                              struct reservation_object *resv)
> > +{
> > +       int i, err;
> > +       size_t npages;
> > +       struct lima_bo *bo, *ret;
> > +
> > +       bo = lima_bo_create_struct(dev, size, flags, resv);
> > +       if (IS_ERR(bo))
> > +               return bo;
> > +
> > +       npages = bo->gem.size >> PAGE_SHIFT;
> > +
> > +       bo->pages_dma_addr = kzalloc(npages * sizeof(dma_addr_t), GFP_KERNEL);
> > +       if (!bo->pages_dma_addr) {
> > +               ret = ERR_PTR(-ENOMEM);
> > +               goto err_out;
> > +       }
> > +
> > +       if (sgt) {
> > +               bo->sgt = sgt;
> > +
> > +               bo->pages = kzalloc(npages * sizeof(*bo->pages), GFP_KERNEL);
> > +               if (!bo->pages) {
> > +                       ret = ERR_PTR(-ENOMEM);
> > +                       goto err_out;
> > +               }
> > +
> > +               err = drm_prime_sg_to_page_addr_arrays(
> > +                       sgt, bo->pages, bo->pages_dma_addr, npages);
> > +               if (err) {
> > +                       ret = ERR_PTR(err);
> > +                       goto err_out;
> > +               }
> > +       }
> > +       else {
> > +               mapping_set_gfp_mask(bo->gem.filp->f_mapping, GFP_DMA32);
> > +               bo->pages = drm_gem_get_pages(&bo->gem);
> > +               if (IS_ERR(bo->pages)) {
> > +                       ret = ERR_CAST(bo->pages);
> > +                       bo->pages = NULL;
> > +                       goto err_out;
> > +               }
> > +
> > +               for (i = 0; i < npages; i++) {
> > +                       dma_addr_t addr = dma_map_page(dev->dev, bo->pages[i], 0,
> > +                                                      PAGE_SIZE, DMA_BIDIRECTIONAL);
> > +                       if (dma_mapping_error(dev->dev, addr)) {
> > +                               ret = ERR_PTR(-EFAULT);
> > +                               goto err_out;
> > +                       }
> > +                       bo->pages_dma_addr[i] = addr;
> > +               }
> > +
> > +       }
> > +
> > +       return bo;
> > +
> > +err_out:
> > +       lima_bo_destroy(bo);
> > +       return ret;
> > +}
> > diff --git a/drivers/gpu/drm/lima/lima_object.h b/drivers/gpu/drm/lima/lima_object.h
> > new file mode 100644
> > index 000000000000..70099f1045ac
> > --- /dev/null
> > +++ b/drivers/gpu/drm/lima/lima_object.h
> > @@ -0,0 +1,36 @@
> > +/* SPDX-License-Identifier: GPL-2.0 OR MIT */
> > +/* Copyright 2018 Qiang Yu <yuq825 at gmail.com> */
> > +
> > +#ifndef __LIMA_OBJECT_H__
> > +#define __LIMA_OBJECT_H__
> > +
> > +#include <drm/drm_gem.h>
> > +
> > +#include "lima_device.h"
> > +
> > +struct lima_bo {
> > +       struct drm_gem_object gem;
> > +
> > +       struct page **pages;
> > +       dma_addr_t *pages_dma_addr;
> > +       struct sg_table *sgt;
> > +       void *vaddr;
> > +
> > +       struct mutex lock;
> > +       struct list_head va;
> > +};
> > +
> > +static inline struct lima_bo *
> > +to_lima_bo(struct drm_gem_object *obj)
> > +{
> > +       return container_of(obj, struct lima_bo, gem);
> > +}
> > +
> > +struct lima_bo *lima_bo_create(struct lima_device *dev, u32 size,
> > +                              u32 flags, struct sg_table *sgt,
> > +                              struct reservation_object *resv);
> > +void lima_bo_destroy(struct lima_bo *bo);
> > +void *lima_bo_vmap(struct lima_bo *bo);
> > +void lima_bo_vunmap(struct lima_bo *bo);
> > +
> > +#endif
> > diff --git a/drivers/gpu/drm/lima/lima_pmu.c b/drivers/gpu/drm/lima/lima_pmu.c
> > new file mode 100644
> > index 000000000000..3c50524b70a7
> > --- /dev/null
> > +++ b/drivers/gpu/drm/lima/lima_pmu.c
> > @@ -0,0 +1,59 @@
> > +// SPDX-License-Identifier: GPL-2.0 OR MIT
> > +/* Copyright 2017-2018 Qiang Yu <yuq825 at gmail.com> */
> > +
> > +#include <linux/iopoll.h>
> > +#include <linux/device.h>
> > +
> > +#include "lima_device.h"
> > +#include "lima_pmu.h"
> > +#include "lima_regs.h"
> > +
> > +#define pmu_write(reg, data) writel(data, ip->iomem + reg)
> > +#define pmu_read(reg) readl(ip->iomem + reg)
> > +
> > +static int lima_pmu_wait_cmd(struct lima_ip *ip)
> > +{
> > +       struct lima_device *dev = ip->dev;
> > +       int err;
> > +       u32 v;
> > +
> > +       err = readl_poll_timeout(ip->iomem + LIMA_PMU_INT_RAWSTAT,
> > +                                v, v & LIMA_PMU_INT_CMD_MASK,
> > +                                100, 100000);
> > +       if (err) {
> > +               dev_err(dev->dev, "timeout wait pmd cmd\n");
> > +               return err;
> > +       }
> > +
> > +       pmu_write(LIMA_PMU_INT_CLEAR, LIMA_PMU_INT_CMD_MASK);
> > +       return 0;
> > +}
> > +
> > +int lima_pmu_init(struct lima_ip *ip)
> > +{
> > +       int err;
> > +       u32 stat;
> > +
> > +       pmu_write(LIMA_PMU_INT_MASK, 0);
> > +
> > +       /* If this value is too low, when in high GPU clk freq,
> > +        * GPU will be in unstable state. */
> > +       pmu_write(LIMA_PMU_SW_DELAY, 0xffff);
> > +
> > +       /* status reg 1=off 0=on */
> > +       stat = pmu_read(LIMA_PMU_STATUS);
> > +
> > +       /* power up all ip */
> > +       if (stat) {
> > +               pmu_write(LIMA_PMU_POWER_UP, stat);
> > +               err = lima_pmu_wait_cmd(ip);
> > +               if (err)
> > +                       return err;
> > +       }
> > +       return 0;
> > +}
> > +
> > +void lima_pmu_fini(struct lima_ip *ip)
> > +{
> > +
> > +}
> > diff --git a/drivers/gpu/drm/lima/lima_pmu.h b/drivers/gpu/drm/lima/lima_pmu.h
> > new file mode 100644
> > index 000000000000..1cf94a35bdf9
> > --- /dev/null
> > +++ b/drivers/gpu/drm/lima/lima_pmu.h
> > @@ -0,0 +1,12 @@
> > +/* SPDX-License-Identifier: GPL-2.0 OR MIT */
> > +/* Copyright 2017-2018 Qiang Yu <yuq825 at gmail.com> */
> > +
> > +#ifndef __LIMA_PMU_H__
> > +#define __LIMA_PMU_H__
> > +
> > +struct lima_ip;
> > +
> > +int lima_pmu_init(struct lima_ip *ip);
> > +void lima_pmu_fini(struct lima_ip *ip);
> > +
> > +#endif
> > diff --git a/drivers/gpu/drm/lima/lima_pp.c b/drivers/gpu/drm/lima/lima_pp.c
> > new file mode 100644
> > index 000000000000..7b36c29eee89
> > --- /dev/null
> > +++ b/drivers/gpu/drm/lima/lima_pp.c
> > @@ -0,0 +1,423 @@
> > +// SPDX-License-Identifier: GPL-2.0 OR MIT
> > +/* Copyright 2017-2018 Qiang Yu <yuq825 at gmail.com> */
> > +
> > +#include <linux/interrupt.h>
> > +#include <linux/io.h>
> > +#include <linux/device.h>
> > +#include <linux/slab.h>
> > +
> > +#include <drm/lima_drm.h>
> > +
> > +#include "lima_device.h"
> > +#include "lima_pp.h"
> > +#include "lima_dlbu.h"
> > +#include "lima_bcast.h"
> > +#include "lima_vm.h"
> > +#include "lima_regs.h"
> > +
> > +#define pp_write(reg, data) writel(data, ip->iomem + reg)
> > +#define pp_read(reg) readl(ip->iomem + reg)
> > +
> > +static void lima_pp_handle_irq(struct lima_ip *ip, u32 state)
> > +{
> > +       struct lima_device *dev = ip->dev;
> > +       struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_pp;
> > +
> > +       if (state & LIMA_PP_IRQ_MASK_ERROR) {
> > +               u32 status = pp_read(LIMA_PP_STATUS);
> > +
> > +               dev_err(dev->dev, "pp error irq state=%x status=%x\n",
> > +                       state, status);
> > +
> > +               pipe->error = true;
> > +
> > +               /* mask all interrupts before hard reset */
> > +               pp_write(LIMA_PP_INT_MASK, 0);
> > +       }
> > +
> > +       pp_write(LIMA_PP_INT_CLEAR, state);
> > +}
> > +
> > +static irqreturn_t lima_pp_irq_handler(int irq, void *data)
> > +{
> > +       struct lima_ip *ip = data;
> > +       struct lima_device *dev = ip->dev;
> > +       struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_pp;
> > +       u32 state = pp_read(LIMA_PP_INT_STATUS);
> > +
> > +       /* for shared irq case */
> > +       if (!state)
> > +               return IRQ_NONE;
> > +
> > +       lima_pp_handle_irq(ip, state);
> > +
> > +       if (atomic_dec_and_test(&pipe->task))
> > +               lima_sched_pipe_task_done(pipe);
> > +
> > +       return IRQ_HANDLED;
> > +}
> > +
> > +static irqreturn_t lima_pp_bcast_irq_handler(int irq, void *data)
> > +{
> > +       int i;
> > +       irqreturn_t ret = IRQ_NONE;
> > +       struct lima_ip *pp_bcast = data;
> > +       struct lima_device *dev = pp_bcast->dev;
> > +       struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_pp;
> > +       struct drm_lima_m450_pp_frame *frame = pipe->current_task->frame;
> > +
> > +       for (i = 0; i < frame->num_pp; i++) {
> > +               struct lima_ip *ip = pipe->processor[i];
> > +               u32 status, state;
> > +
> > +               if (pipe->done & (1 << i))
> > +                       continue;
> > +
> > +               /* status read first in case int state change in the middle
> > +                * which may miss the interrupt handling */
> > +               status = pp_read(LIMA_PP_STATUS);
> > +               state = pp_read(LIMA_PP_INT_STATUS);
> > +
> > +               if (state) {
> > +                       lima_pp_handle_irq(ip, state);
> > +                       ret = IRQ_HANDLED;
> > +               }
> > +               else {
> > +                       if (status & LIMA_PP_STATUS_RENDERING_ACTIVE)
> > +                               continue;
> > +               }
> > +
> > +               pipe->done |= (1 << i);
> > +               if (atomic_dec_and_test(&pipe->task))
> > +                       lima_sched_pipe_task_done(pipe);
> > +       }
> > +
> > +       return ret;
> > +}
> > +
> > +static void lima_pp_soft_reset_async(struct lima_ip *ip)
> > +{
> > +       if (ip->data.async_reset)
> > +               return;
> > +
> > +       pp_write(LIMA_PP_INT_MASK, 0);
> > +       pp_write(LIMA_PP_INT_RAWSTAT, LIMA_PP_IRQ_MASK_ALL);
> > +       pp_write(LIMA_PP_CTRL, LIMA_PP_CTRL_SOFT_RESET);
> > +       ip->data.async_reset = true;
> > +}
> > +
> > +static int lima_pp_soft_reset_poll(struct lima_ip *ip)
> > +{
> > +       return !(pp_read(LIMA_PP_STATUS) & LIMA_PP_STATUS_RENDERING_ACTIVE) &&
> > +               pp_read(LIMA_PP_INT_RAWSTAT) == LIMA_PP_IRQ_RESET_COMPLETED;
> > +}
> > +
> > +static int lima_pp_soft_reset_async_wait_one(struct lima_ip *ip)
> > +{
> > +       struct lima_device *dev = ip->dev;
> > +       int ret;
> > +
> > +       ret = lima_poll_timeout(ip, lima_pp_soft_reset_poll, 0, 100);
> > +       if (ret) {
> > +               dev_err(dev->dev, "pp %s reset time out\n", lima_ip_name(ip));
> > +               return ret;
> > +       }
> > +
> > +       pp_write(LIMA_PP_INT_CLEAR, LIMA_PP_IRQ_MASK_ALL);
> > +       pp_write(LIMA_PP_INT_MASK, LIMA_PP_IRQ_MASK_USED);
> > +       return 0;
> > +}
> > +
> > +static int lima_pp_soft_reset_async_wait(struct lima_ip *ip)
> > +{
> > +       int i, err = 0;
> > +
> > +       if (!ip->data.async_reset)
> > +               return 0;
> > +
> > +       if (ip->id == lima_ip_pp_bcast) {
> > +               struct lima_device *dev = ip->dev;
> > +               struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_pp;
> > +               struct drm_lima_m450_pp_frame *frame = pipe->current_task->frame;
> > +
> > +               for (i = 0; i < frame->num_pp; i++)
> > +                       err |= lima_pp_soft_reset_async_wait_one(pipe->processor[i]);
> > +       }
> > +       else
> > +               err = lima_pp_soft_reset_async_wait_one(ip);
> > +
> > +       ip->data.async_reset = false;
> > +       return err;
> > +}
> > +
> > +static void lima_pp_write_frame(struct lima_ip *ip, u32 *frame, u32 *wb)
> > +{
> > +       int i, j, n = 0;
> > +
> > +       for (i = 0; i < LIMA_PP_FRAME_REG_NUM; i++)
> > +               writel(frame[i], ip->iomem + LIMA_PP_FRAME + i * 4);
> > +
> > +       for (i = 0; i < 3; i++) {
> > +               for (j = 0; j < LIMA_PP_WB_REG_NUM; j++)
> > +                       writel(wb[n++], ip->iomem + LIMA_PP_WB(i) + j * 4);
> > +       }
> > +}
> > +
> > +static int lima_pp_hard_reset_poll(struct lima_ip *ip)
> > +{
> > +       pp_write(LIMA_PP_PERF_CNT_0_LIMIT, 0xC01A0000);
> > +       return pp_read(LIMA_PP_PERF_CNT_0_LIMIT) == 0xC01A0000;
> > +}
> > +
> > +static int lima_pp_hard_reset(struct lima_ip *ip)
> > +{
> > +       struct lima_device *dev = ip->dev;
> > +       int ret;
> > +
> > +       pp_write(LIMA_PP_PERF_CNT_0_LIMIT, 0xC0FFE000);
> > +       pp_write(LIMA_PP_INT_MASK, 0);
> > +       pp_write(LIMA_PP_CTRL, LIMA_PP_CTRL_FORCE_RESET);
> > +       ret = lima_poll_timeout(ip, lima_pp_hard_reset_poll, 10, 100);
> > +       if (ret) {
> > +               dev_err(dev->dev, "pp hard reset timeout\n");
> > +               return ret;
> > +       }
> > +
> > +       pp_write(LIMA_PP_PERF_CNT_0_LIMIT, 0);
> > +       pp_write(LIMA_PP_INT_CLEAR, LIMA_PP_IRQ_MASK_ALL);
> > +       pp_write(LIMA_PP_INT_MASK, LIMA_PP_IRQ_MASK_USED);
> > +       return 0;
> > +}
> > +
> > +static void lima_pp_print_version(struct lima_ip *ip)
> > +{
> > +       u32 version, major, minor;
> > +       char *name;
> > +
> > +       version = pp_read(LIMA_PP_VERSION);
> > +       major = (version >> 8) & 0xFF;
> > +       minor = version & 0xFF;
> > +       switch (version >> 16) {
> > +       case 0xC807:
> > +           name = "mali200";
> > +               break;
> > +       case 0xCE07:
> > +               name = "mali300";
> > +               break;
> > +       case 0xCD07:
> > +               name = "mali400";
> > +               break;
> > +       case 0xCF07:
> > +               name = "mali450";
> > +               break;
> > +       default:
> > +               name = "unknow";
> > +               break;
> > +       }
> > +       dev_info(ip->dev->dev, "%s - %s version major %d minor %d\n",
> > +                lima_ip_name(ip), name, major, minor);
> > +}
> > +
> > +int lima_pp_init(struct lima_ip *ip)
> > +{
> > +       struct lima_device *dev = ip->dev;
> > +       int err;
> > +
> > +       lima_pp_print_version(ip);
> > +
> > +       ip->data.async_reset = false;
> > +       lima_pp_soft_reset_async(ip);
> > +       err = lima_pp_soft_reset_async_wait(ip);
> > +       if (err)
> > +               return err;
> > +
> > +       err = devm_request_irq(dev->dev, ip->irq, lima_pp_irq_handler,
> > +                              IRQF_SHARED, lima_ip_name(ip), ip);
> > +       if (err) {
> > +               dev_err(dev->dev, "pp %s fail to request irq\n",
> > +                       lima_ip_name(ip));
> > +               return err;
> > +       }
> > +
> > +       return 0;
> > +}
> > +
> > +void lima_pp_fini(struct lima_ip *ip)
> > +{
> > +
> > +}
> > +
> > +int lima_pp_bcast_init(struct lima_ip *ip)
> > +{
> > +       struct lima_device *dev = ip->dev;
> > +       int err;
> > +
> > +       err = devm_request_irq(dev->dev, ip->irq, lima_pp_bcast_irq_handler,
> > +                              IRQF_SHARED, lima_ip_name(ip), ip);
> > +       if (err) {
> > +               dev_err(dev->dev, "pp %s fail to request irq\n",
> > +                       lima_ip_name(ip));
> > +               return err;
> > +       }
> > +
> > +       return 0;
> > +}
> > +
> > +void lima_pp_bcast_fini(struct lima_ip *ip)
> > +{
> > +
> > +}
> > +
> > +static int lima_pp_task_validate(struct lima_sched_pipe *pipe,
> > +                                struct lima_sched_task *task)
> > +{
> > +       u32 num_pp;
> > +
> > +       if (pipe->bcast_processor) {
> > +               struct drm_lima_m450_pp_frame *f = task->frame;
> > +               num_pp = f->num_pp;
> > +       }
> > +       else {
> > +               struct drm_lima_m400_pp_frame *f = task->frame;
> > +               num_pp = f->num_pp;
> > +       }
> > +
> > +       if (num_pp == 0 || num_pp > pipe->num_processor)
> > +               return -EINVAL;
> > +
> > +       return 0;
> > +}
> > +
> > +static void lima_pp_task_run(struct lima_sched_pipe *pipe,
> > +                            struct lima_sched_task *task)
> > +{
> > +       if (pipe->bcast_processor) {
> > +               struct drm_lima_m450_pp_frame *frame = task->frame;
> > +               struct lima_device *dev = pipe->bcast_processor->dev;
> > +               struct lima_ip *ip = pipe->bcast_processor;
> > +               int i;
> > +
> > +               pipe->done = 0;
> > +               atomic_set(&pipe->task, frame->num_pp);
> > +
> > +               if (frame->use_dlbu) {
> > +                       lima_dlbu_enable(dev, frame->num_pp);
> > +
> > +                       frame->frame[LIMA_PP_FRAME >> 2] = LIMA_VA_RESERVE_DLBU;
> > +                       lima_dlbu_set_reg(dev->ip + lima_ip_dlbu, frame->dlbu_regs);
> > +               }
> > +               else
> > +                       lima_dlbu_disable(dev);
> > +
> > +               lima_bcast_enable(dev, frame->num_pp);
> > +
> > +               lima_pp_soft_reset_async_wait(ip);
> > +
> > +               lima_pp_write_frame(ip, frame->frame, frame->wb);
> > +
> > +               for (i = 0; i < frame->num_pp; i++) {
> > +                       struct lima_ip *ip = pipe->processor[i];
> > +
> > +                       pp_write(LIMA_PP_STACK, frame->fragment_stack_address[i]);
> > +                       if (!frame->use_dlbu)
> > +                               pp_write(LIMA_PP_FRAME, frame->plbu_array_address[i]);
> > +               }
> > +
> > +               pp_write(LIMA_PP_CTRL, LIMA_PP_CTRL_START_RENDERING);
> > +       }
> > +       else {
> > +               struct drm_lima_m400_pp_frame *frame = task->frame;
> > +               int i;
> > +
> > +               atomic_set(&pipe->task, frame->num_pp);
> > +
> > +               for (i = 0; i < frame->num_pp; i++) {
> > +                       struct lima_ip *ip = pipe->processor[i];
> > +
> > +                       frame->frame[LIMA_PP_FRAME >> 2] =
> > +                               frame->plbu_array_address[i];
> > +                       frame->frame[LIMA_PP_STACK >> 2] =
> > +                               frame->fragment_stack_address[i];
> > +
> > +                       lima_pp_soft_reset_async_wait(ip);
> > +
> > +                       lima_pp_write_frame(ip, frame->frame, frame->wb);
> > +
> > +                       pp_write(LIMA_PP_CTRL, LIMA_PP_CTRL_START_RENDERING);
> > +               }
> > +       }
> > +}
> > +
> > +static void lima_pp_task_fini(struct lima_sched_pipe *pipe)
> > +{
> > +       if (pipe->bcast_processor)
> > +               lima_pp_soft_reset_async(pipe->bcast_processor);
> > +       else {
> > +               int i;
> > +               for (i = 0; i < pipe->num_processor; i++)
> > +                       lima_pp_soft_reset_async(pipe->processor[i]);
> > +       }
> > +}
> > +
> > +static void lima_pp_task_error(struct lima_sched_pipe *pipe)
> > +{
> > +       int i;
> > +
> > +       for (i = 0; i < pipe->num_processor; i++) {
> > +               struct lima_ip *ip = pipe->processor[i];
> > +
> > +               dev_err(ip->dev->dev, "pp task error %d int_state=%x status=%x\n",
> > +                       i, pp_read(LIMA_PP_INT_STATUS), pp_read(LIMA_PP_STATUS));
> > +
> > +               lima_pp_hard_reset(ip);
> > +       }
> > +}
> > +
> > +static void lima_pp_task_mmu_error(struct lima_sched_pipe *pipe)
> > +{
> > +       if (atomic_dec_and_test(&pipe->task))
> > +               lima_sched_pipe_task_done(pipe);
> > +}
> > +
> > +static struct kmem_cache *lima_pp_task_slab = NULL;
> > +static int lima_pp_task_slab_refcnt = 0;
> > +
> > +int lima_pp_pipe_init(struct lima_device *dev)
> > +{
> > +       int frame_size;
> > +       struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_pp;
> > +
> > +       if (dev->id == lima_gpu_mali400)
> > +               frame_size = sizeof(struct drm_lima_m400_pp_frame);
> > +       else
> > +               frame_size = sizeof(struct drm_lima_m450_pp_frame);
> > +
> > +       if (!lima_pp_task_slab) {
> > +               lima_pp_task_slab = kmem_cache_create_usercopy(
> > +                       "lima_pp_task", sizeof(struct lima_sched_task) + frame_size,
> > +                       0, SLAB_HWCACHE_ALIGN, sizeof(struct lima_sched_task),
> > +                       frame_size, NULL);
> > +               if (!lima_pp_task_slab)
> > +                       return -ENOMEM;
> > +       }
> > +       lima_pp_task_slab_refcnt++;
> > +
> > +       pipe->frame_size = frame_size;
> > +       pipe->task_slab = lima_pp_task_slab;
> > +
> > +       pipe->task_validate = lima_pp_task_validate;
> > +       pipe->task_run = lima_pp_task_run;
> > +       pipe->task_fini = lima_pp_task_fini;
> > +       pipe->task_error = lima_pp_task_error;
> > +       pipe->task_mmu_error = lima_pp_task_mmu_error;
> > +
> > +       return 0;
> > +}
> > +
> > +void lima_pp_pipe_fini(struct lima_device *dev)
> > +{
> > +       if (!--lima_pp_task_slab_refcnt) {
> > +               kmem_cache_destroy(lima_pp_task_slab);
> > +               lima_pp_task_slab = NULL;
> > +       }
> > +}
> > diff --git a/drivers/gpu/drm/lima/lima_pp.h b/drivers/gpu/drm/lima/lima_pp.h
> > new file mode 100644
> > index 000000000000..f83f8cb4d30a
> > --- /dev/null
> > +++ b/drivers/gpu/drm/lima/lima_pp.h
> > @@ -0,0 +1,19 @@
> > +/* SPDX-License-Identifier: GPL-2.0 OR MIT */
> > +/* Copyright 2017-2018 Qiang Yu <yuq825 at gmail.com> */
> > +
> > +#ifndef __LIMA_PP_H__
> > +#define __LIMA_PP_H__
> > +
> > +struct lima_ip;
> > +struct lima_device;
> > +
> > +int lima_pp_init(struct lima_ip *ip);
> > +void lima_pp_fini(struct lima_ip *ip);
> > +
> > +int lima_pp_bcast_init(struct lima_ip *ip);
> > +void lima_pp_bcast_fini(struct lima_ip *ip);
> > +
> > +int lima_pp_pipe_init(struct lima_device *dev);
> > +void lima_pp_pipe_fini(struct lima_device *dev);
> > +
> > +#endif
> > diff --git a/drivers/gpu/drm/lima/lima_regs.h b/drivers/gpu/drm/lima/lima_regs.h
> > new file mode 100644
> > index 000000000000..d5ade8fc8901
> > --- /dev/null
> > +++ b/drivers/gpu/drm/lima/lima_regs.h
> > @@ -0,0 +1,298 @@
> > +/* SPDX-License-Identifier: GPL-2.0 */
> > +/* Copyright 2010-2017 ARM Limited. All rights reserved.
> > + * Copyright 2017-2018 Qiang Yu <yuq825 at gmail.com>
> > + */
> > +
> > +#ifndef __LIMA_REGS_H__
> > +#define __LIMA_REGS_H__
> > +
> > +/* This file's register definition is collected from the
> > + * official ARM Mali Utgard GPU kernel driver source code
> > + */
> > +
> > +/* PMU regs */
> > +#define LIMA_PMU_POWER_UP                  0x00
> > +#define LIMA_PMU_POWER_DOWN                0x04
> > +#define   LIMA_PMU_POWER_GP0_MASK          BIT(0)
> > +#define   LIMA_PMU_POWER_L2_MASK           BIT(1)
> > +#define   LIMA_PMU_POWER_PP_MASK(i)        BIT(2 + i)
> > +
> > +/*
> > + * On Mali450 each block automatically starts up its corresponding L2
> > + * and the PPs are not fully independent controllable.
> > + * Instead PP0, PP1-3 and PP4-7 can be turned on or off.
> > + */
> > +#define   LIMA450_PMU_POWER_PP0_MASK       BIT(1)
> > +#define   LIMA450_PMU_POWER_PP13_MASK      BIT(2)
> > +#define   LIMA450_PMU_POWER_PP47_MASK      BIT(3)
> > +
> > +#define LIMA_PMU_STATUS                    0x08
> > +#define LIMA_PMU_INT_MASK                  0x0C
> > +#define LIMA_PMU_INT_RAWSTAT               0x10
> > +#define LIMA_PMU_INT_CLEAR                 0x18
> > +#define   LIMA_PMU_INT_CMD_MASK            BIT(0)
> > +#define LIMA_PMU_SW_DELAY                  0x1C
> > +
> > +/* L2 cache regs */
> > +#define LIMA_L2_CACHE_SIZE                   0x0004
> > +#define LIMA_L2_CACHE_STATUS                 0x0008
> > +#define   LIMA_L2_CACHE_STATUS_COMMAND_BUSY  BIT(0)
> > +#define   LIMA_L2_CACHE_STATUS_DATA_BUSY     BIT(1)
> > +#define LIMA_L2_CACHE_COMMAND                0x0010
> > +#define   LIMA_L2_CACHE_COMMAND_CLEAR_ALL    BIT(0)
> > +#define LIMA_L2_CACHE_CLEAR_PAGE             0x0014
> > +#define LIMA_L2_CACHE_MAX_READS              0x0018
> > +#define LIMA_L2_CACHE_ENABLE                 0x001C
> > +#define   LIMA_L2_CACHE_ENABLE_ACCESS        BIT(0)
> > +#define   LIMA_L2_CACHE_ENABLE_READ_ALLOCATE BIT(1)
> > +#define LIMA_L2_CACHE_PERFCNT_SRC0           0x0020
> > +#define LIMA_L2_CACHE_PERFCNT_VAL0           0x0024
> > +#define LIMA_L2_CACHE_PERFCNT_SRC1           0x0028
> > +#define LIMA_L2_CACHE_ERFCNT_VAL1            0x002C
> > +
> > +/* GP regs */
> > +#define LIMA_GP_VSCL_START_ADDR                0x00
> > +#define LIMA_GP_VSCL_END_ADDR                  0x04
> > +#define LIMA_GP_PLBUCL_START_ADDR              0x08
> > +#define LIMA_GP_PLBUCL_END_ADDR                0x0c
> > +#define LIMA_GP_PLBU_ALLOC_START_ADDR          0x10
> > +#define LIMA_GP_PLBU_ALLOC_END_ADDR            0x14
> > +#define LIMA_GP_CMD                            0x20
> > +#define   LIMA_GP_CMD_START_VS                 BIT(0)
> > +#define   LIMA_GP_CMD_START_PLBU               BIT(1)
> > +#define   LIMA_GP_CMD_UPDATE_PLBU_ALLOC        BIT(4)
> > +#define   LIMA_GP_CMD_RESET                    BIT(5)
> > +#define   LIMA_GP_CMD_FORCE_HANG               BIT(6)
> > +#define   LIMA_GP_CMD_STOP_BUS                 BIT(9)
> > +#define   LIMA_GP_CMD_SOFT_RESET               BIT(10)
> > +#define LIMA_GP_INT_RAWSTAT                    0x24
> > +#define LIMA_GP_INT_CLEAR                      0x28
> > +#define LIMA_GP_INT_MASK                       0x2C
> > +#define LIMA_GP_INT_STAT                       0x30
> > +#define   LIMA_GP_IRQ_VS_END_CMD_LST           BIT(0)
> > +#define   LIMA_GP_IRQ_PLBU_END_CMD_LST         BIT(1)
> > +#define   LIMA_GP_IRQ_PLBU_OUT_OF_MEM          BIT(2)
> > +#define   LIMA_GP_IRQ_VS_SEM_IRQ               BIT(3)
> > +#define   LIMA_GP_IRQ_PLBU_SEM_IRQ             BIT(4)
> > +#define   LIMA_GP_IRQ_HANG                     BIT(5)
> > +#define   LIMA_GP_IRQ_FORCE_HANG               BIT(6)
> > +#define   LIMA_GP_IRQ_PERF_CNT_0_LIMIT         BIT(7)
> > +#define   LIMA_GP_IRQ_PERF_CNT_1_LIMIT         BIT(8)
> > +#define   LIMA_GP_IRQ_WRITE_BOUND_ERR          BIT(9)
> > +#define   LIMA_GP_IRQ_SYNC_ERROR               BIT(10)
> > +#define   LIMA_GP_IRQ_AXI_BUS_ERROR            BIT(11)
> > +#define   LIMA_GP_IRQ_AXI_BUS_STOPPED          BIT(12)
> > +#define   LIMA_GP_IRQ_VS_INVALID_CMD           BIT(13)
> > +#define   LIMA_GP_IRQ_PLB_INVALID_CMD          BIT(14)
> > +#define   LIMA_GP_IRQ_RESET_COMPLETED          BIT(19)
> > +#define   LIMA_GP_IRQ_SEMAPHORE_UNDERFLOW      BIT(20)
> > +#define   LIMA_GP_IRQ_SEMAPHORE_OVERFLOW       BIT(21)
> > +#define   LIMA_GP_IRQ_PTR_ARRAY_OUT_OF_BOUNDS  BIT(22)
> > +#define LIMA_GP_WRITE_BOUND_LOW                0x34
> > +#define LIMA_GP_PERF_CNT_0_ENABLE              0x3C
> > +#define LIMA_GP_PERF_CNT_1_ENABLE              0x40
> > +#define LIMA_GP_PERF_CNT_0_SRC                 0x44
> > +#define LIMA_GP_PERF_CNT_1_SRC                 0x48
> > +#define LIMA_GP_PERF_CNT_0_VALUE               0x4C
> > +#define LIMA_GP_PERF_CNT_1_VALUE               0x50
> > +#define LIMA_GP_PERF_CNT_0_LIMIT               0x54
> > +#define LIMA_GP_STATUS                         0x68
> > +#define   LIMA_GP_STATUS_VS_ACTIVE             BIT(1)
> > +#define   LIMA_GP_STATUS_BUS_STOPPED           BIT(2)
> > +#define   LIMA_GP_STATUS_PLBU_ACTIVE           BIT(3)
> > +#define   LIMA_GP_STATUS_BUS_ERROR             BIT(6)
> > +#define   LIMA_GP_STATUS_WRITE_BOUND_ERR       BIT(8)
> > +#define LIMA_GP_VERSION                        0x6C
> > +#define LIMA_GP_VSCL_START_ADDR_READ           0x80
> > +#define LIMA_GP_PLBCL_START_ADDR_READ          0x84
> > +#define LIMA_GP_CONTR_AXI_BUS_ERROR_STAT       0x94
> > +
> > +#define LIMA_GP_IRQ_MASK_ALL              \
> > +       (                                  \
> > +        LIMA_GP_IRQ_VS_END_CMD_LST      | \
> > +        LIMA_GP_IRQ_PLBU_END_CMD_LST    | \
> > +        LIMA_GP_IRQ_PLBU_OUT_OF_MEM     | \
> > +        LIMA_GP_IRQ_VS_SEM_IRQ          | \
> > +        LIMA_GP_IRQ_PLBU_SEM_IRQ        | \
> > +        LIMA_GP_IRQ_HANG                | \
> > +        LIMA_GP_IRQ_FORCE_HANG          | \
> > +        LIMA_GP_IRQ_PERF_CNT_0_LIMIT    | \
> > +        LIMA_GP_IRQ_PERF_CNT_1_LIMIT    | \
> > +        LIMA_GP_IRQ_WRITE_BOUND_ERR     | \
> > +        LIMA_GP_IRQ_SYNC_ERROR          | \
> > +        LIMA_GP_IRQ_AXI_BUS_ERROR       | \
> > +        LIMA_GP_IRQ_AXI_BUS_STOPPED     | \
> > +        LIMA_GP_IRQ_VS_INVALID_CMD      | \
> > +        LIMA_GP_IRQ_PLB_INVALID_CMD     | \
> > +        LIMA_GP_IRQ_RESET_COMPLETED     | \
> > +        LIMA_GP_IRQ_SEMAPHORE_UNDERFLOW | \
> > +        LIMA_GP_IRQ_SEMAPHORE_OVERFLOW  | \
> > +        LIMA_GP_IRQ_PTR_ARRAY_OUT_OF_BOUNDS)
> > +
> > +#define LIMA_GP_IRQ_MASK_ERROR             \
> > +       (                                  \
> > +        LIMA_GP_IRQ_PLBU_OUT_OF_MEM     | \
> > +        LIMA_GP_IRQ_FORCE_HANG          | \
> > +        LIMA_GP_IRQ_WRITE_BOUND_ERR     | \
> > +        LIMA_GP_IRQ_SYNC_ERROR          | \
> > +        LIMA_GP_IRQ_AXI_BUS_ERROR       | \
> > +        LIMA_GP_IRQ_VS_INVALID_CMD      | \
> > +        LIMA_GP_IRQ_PLB_INVALID_CMD     | \
> > +        LIMA_GP_IRQ_SEMAPHORE_UNDERFLOW | \
> > +        LIMA_GP_IRQ_SEMAPHORE_OVERFLOW  | \
> > +        LIMA_GP_IRQ_PTR_ARRAY_OUT_OF_BOUNDS)
> > +
> > +#define LIMA_GP_IRQ_MASK_USED             \
> > +       (                                  \
> > +        LIMA_GP_IRQ_VS_END_CMD_LST      | \
> > +        LIMA_GP_IRQ_PLBU_END_CMD_LST    | \
> > +        LIMA_GP_IRQ_MASK_ERROR)
> > +
> > +/* PP regs */
> > +#define LIMA_PP_FRAME                        0x0000
> > +#define LIMA_PP_RSW                         0x0004
> > +#define LIMA_PP_STACK                       0x0030
> > +#define LIMA_PP_STACK_SIZE                  0x0034
> > +#define LIMA_PP_ORIGIN_OFFSET_X                     0x0040
> > +#define LIMA_PP_WB(i)                       (0x0100 * (i + 1))
> > +#define   LIMA_PP_WB_SOURCE_SELECT           0x0000
> > +#define          LIMA_PP_WB_SOURCE_ADDR             0x0004
> > +
> > +#define LIMA_PP_VERSION                      0x1000
> > +#define LIMA_PP_CURRENT_REND_LIST_ADDR       0x1004
> > +#define LIMA_PP_STATUS                       0x1008
> > +#define   LIMA_PP_STATUS_RENDERING_ACTIVE    BIT(0)
> > +#define   LIMA_PP_STATUS_BUS_STOPPED         BIT(4)
> > +#define LIMA_PP_CTRL                         0x100c
> > +#define   LIMA_PP_CTRL_STOP_BUS              BIT(0)
> > +#define   LIMA_PP_CTRL_FLUSH_CACHES          BIT(3)
> > +#define   LIMA_PP_CTRL_FORCE_RESET           BIT(5)
> > +#define   LIMA_PP_CTRL_START_RENDERING       BIT(6)
> > +#define   LIMA_PP_CTRL_SOFT_RESET            BIT(7)
> > +#define LIMA_PP_INT_RAWSTAT                  0x1020
> > +#define LIMA_PP_INT_CLEAR                    0x1024
> > +#define LIMA_PP_INT_MASK                     0x1028
> > +#define LIMA_PP_INT_STATUS                   0x102c
> > +#define   LIMA_PP_IRQ_END_OF_FRAME           BIT(0)
> > +#define   LIMA_PP_IRQ_END_OF_TILE            BIT(1)
> > +#define   LIMA_PP_IRQ_HANG                   BIT(2)
> > +#define   LIMA_PP_IRQ_FORCE_HANG             BIT(3)
> > +#define   LIMA_PP_IRQ_BUS_ERROR              BIT(4)
> > +#define   LIMA_PP_IRQ_BUS_STOP               BIT(5)
> > +#define   LIMA_PP_IRQ_CNT_0_LIMIT            BIT(6)
> > +#define   LIMA_PP_IRQ_CNT_1_LIMIT            BIT(7)
> > +#define   LIMA_PP_IRQ_WRITE_BOUNDARY_ERROR   BIT(8)
> > +#define   LIMA_PP_IRQ_INVALID_PLIST_COMMAND  BIT(9)
> > +#define   LIMA_PP_IRQ_CALL_STACK_UNDERFLOW   BIT(10)
> > +#define   LIMA_PP_IRQ_CALL_STACK_OVERFLOW    BIT(11)
> > +#define   LIMA_PP_IRQ_RESET_COMPLETED        BIT(12)
> > +#define LIMA_PP_WRITE_BOUNDARY_LOW           0x1044
> > +#define LIMA_PP_BUS_ERROR_STATUS             0x1050
> > +#define LIMA_PP_PERF_CNT_0_ENABLE            0x1080
> > +#define LIMA_PP_PERF_CNT_0_SRC               0x1084
> > +#define LIMA_PP_PERF_CNT_0_LIMIT             0x1088
> > +#define LIMA_PP_PERF_CNT_0_VALUE             0x108c
> > +#define LIMA_PP_PERF_CNT_1_ENABLE            0x10a0
> > +#define LIMA_PP_PERF_CNT_1_SRC               0x10a4
> > +#define LIMA_PP_PERF_CNT_1_LIMIT             0x10a8
> > +#define LIMA_PP_PERF_CNT_1_VALUE             0x10ac
> > +#define LIMA_PP_PERFMON_CONTR                0x10b0
> > +#define LIMA_PP_PERFMON_BASE                 0x10b4
> > +
> > +#define LIMA_PP_IRQ_MASK_ALL                 \
> > +       (                                    \
> > +        LIMA_PP_IRQ_END_OF_FRAME          | \
> > +        LIMA_PP_IRQ_END_OF_TILE           | \
> > +        LIMA_PP_IRQ_HANG                  | \
> > +        LIMA_PP_IRQ_FORCE_HANG            | \
> > +        LIMA_PP_IRQ_BUS_ERROR             | \
> > +        LIMA_PP_IRQ_BUS_STOP              | \
> > +        LIMA_PP_IRQ_CNT_0_LIMIT           | \
> > +        LIMA_PP_IRQ_CNT_1_LIMIT           | \
> > +        LIMA_PP_IRQ_WRITE_BOUNDARY_ERROR  | \
> > +        LIMA_PP_IRQ_INVALID_PLIST_COMMAND | \
> > +        LIMA_PP_IRQ_CALL_STACK_UNDERFLOW  | \
> > +        LIMA_PP_IRQ_CALL_STACK_OVERFLOW   | \
> > +        LIMA_PP_IRQ_RESET_COMPLETED)
> > +
> > +#define LIMA_PP_IRQ_MASK_ERROR               \
> > +       (                                    \
> > +        LIMA_PP_IRQ_FORCE_HANG            | \
> > +        LIMA_PP_IRQ_BUS_ERROR             | \
> > +        LIMA_PP_IRQ_WRITE_BOUNDARY_ERROR  | \
> > +        LIMA_PP_IRQ_INVALID_PLIST_COMMAND | \
> > +        LIMA_PP_IRQ_CALL_STACK_UNDERFLOW  | \
> > +        LIMA_PP_IRQ_CALL_STACK_OVERFLOW)
> > +
> > +#define LIMA_PP_IRQ_MASK_USED                \
> > +       (                                    \
> > +        LIMA_PP_IRQ_END_OF_FRAME          | \
> > +        LIMA_PP_IRQ_MASK_ERROR)
> > +
> > +/* MMU regs */
> > +#define LIMA_MMU_DTE_ADDR                     0x0000
> > +#define LIMA_MMU_STATUS                       0x0004
> > +#define   LIMA_MMU_STATUS_PAGING_ENABLED      BIT(0)
> > +#define   LIMA_MMU_STATUS_PAGE_FAULT_ACTIVE   BIT(1)
> > +#define   LIMA_MMU_STATUS_STALL_ACTIVE        BIT(2)
> > +#define   LIMA_MMU_STATUS_IDLE                BIT(3)
> > +#define   LIMA_MMU_STATUS_REPLAY_BUFFER_EMPTY BIT(4)
> > +#define   LIMA_MMU_STATUS_PAGE_FAULT_IS_WRITE BIT(5)
> > +#define   LIMA_MMU_STATUS_BUS_ID(x)           ((x >> 6) & 0x1F)
> > +#define LIMA_MMU_COMMAND                      0x0008
> > +#define   LIMA_MMU_COMMAND_ENABLE_PAGING      0x00
> > +#define   LIMA_MMU_COMMAND_DISABLE_PAGING     0x01
> > +#define   LIMA_MMU_COMMAND_ENABLE_STALL       0x02
> > +#define   LIMA_MMU_COMMAND_DISABLE_STALL      0x03
> > +#define   LIMA_MMU_COMMAND_ZAP_CACHE          0x04
> > +#define   LIMA_MMU_COMMAND_PAGE_FAULT_DONE    0x05
> > +#define   LIMA_MMU_COMMAND_HARD_RESET         0x06
> > +#define LIMA_MMU_PAGE_FAULT_ADDR              0x000C
> > +#define LIMA_MMU_ZAP_ONE_LINE                 0x0010
> > +#define LIMA_MMU_INT_RAWSTAT                  0x0014
> > +#define LIMA_MMU_INT_CLEAR                    0x0018
> > +#define LIMA_MMU_INT_MASK                     0x001C
> > +#define   LIMA_MMU_INT_PAGE_FAULT             BIT(0)
> > +#define   LIMA_MMU_INT_READ_BUS_ERROR         BIT(1)
> > +#define LIMA_MMU_INT_STATUS                   0x0020
> > +
> > +#define LIMA_VM_FLAG_PRESENT          BIT(0)
> > +#define LIMA_VM_FLAG_READ_PERMISSION  BIT(1)
> > +#define LIMA_VM_FLAG_WRITE_PERMISSION BIT(2)
> > +#define LIMA_VM_FLAG_OVERRIDE_CACHE   BIT(3)
> > +#define LIMA_VM_FLAG_WRITE_CACHEABLE  BIT(4)
> > +#define LIMA_VM_FLAG_WRITE_ALLOCATE   BIT(5)
> > +#define LIMA_VM_FLAG_WRITE_BUFFERABLE BIT(6)
> > +#define LIMA_VM_FLAG_READ_CACHEABLE   BIT(7)
> > +#define LIMA_VM_FLAG_READ_ALLOCATE    BIT(8)
> > +#define LIMA_VM_FLAG_MASK             0x1FF
> > +
> > +#define LIMA_VM_FLAGS_CACHE (                   \
> > +               LIMA_VM_FLAG_PRESENT |           \
> > +               LIMA_VM_FLAG_READ_PERMISSION |   \
> > +               LIMA_VM_FLAG_WRITE_PERMISSION |  \
> > +               LIMA_VM_FLAG_OVERRIDE_CACHE |    \
> > +               LIMA_VM_FLAG_WRITE_CACHEABLE |   \
> > +               LIMA_VM_FLAG_WRITE_BUFFERABLE |  \
> > +               LIMA_VM_FLAG_READ_CACHEABLE |    \
> > +               LIMA_VM_FLAG_READ_ALLOCATE )
> > +
> > +#define LIMA_VM_FLAGS_UNCACHE (                        \
> > +               LIMA_VM_FLAG_PRESENT |          \
> > +               LIMA_VM_FLAG_READ_PERMISSION |  \
> > +               LIMA_VM_FLAG_WRITE_PERMISSION )
> > +
> > +/* DLBU regs */
> > +#define LIMA_DLBU_MASTER_TLLIST_PHYS_ADDR  0x0000
> > +#define        LIMA_DLBU_MASTER_TLLIST_VADDR      0x0004
> > +#define        LIMA_DLBU_TLLIST_VBASEADDR         0x0008
> > +#define        LIMA_DLBU_FB_DIM                   0x000C
> > +#define        LIMA_DLBU_TLLIST_CONF              0x0010
> > +#define        LIMA_DLBU_START_TILE_POS           0x0014
> > +#define        LIMA_DLBU_PP_ENABLE_MASK           0x0018
> > +
> > +/* BCAST regs */
> > +#define LIMA_BCAST_BROADCAST_MASK    0x0
> > +#define LIMA_BCAST_INTERRUPT_MASK    0x4
> > +
> > +#endif
> > diff --git a/drivers/gpu/drm/lima/lima_sched.c b/drivers/gpu/drm/lima/lima_sched.c
> > new file mode 100644
> > index 000000000000..539b29ce5e9a
> > --- /dev/null
> > +++ b/drivers/gpu/drm/lima/lima_sched.c
> > @@ -0,0 +1,398 @@
> > +// SPDX-License-Identifier: GPL-2.0 OR MIT
> > +/* Copyright 2017-2018 Qiang Yu <yuq825 at gmail.com> */
> > +
> > +#include <linux/kthread.h>
> > +#include <linux/slab.h>
> > +
> > +#include "lima_drv.h"
> > +#include "lima_sched.h"
> > +#include "lima_vm.h"
> > +#include "lima_mmu.h"
> > +#include "lima_l2_cache.h"
> > +#include "lima_object.h"
> > +
> > +struct lima_fence {
> > +       struct dma_fence base;
> > +       struct lima_sched_pipe *pipe;
> > +};
> > +
> > +static struct kmem_cache *lima_fence_slab = NULL;
> > +
> > +int lima_sched_slab_init(void)
> > +{
> > +       lima_fence_slab = kmem_cache_create(
> > +               "lima_fence", sizeof(struct lima_fence), 0,
> > +               SLAB_HWCACHE_ALIGN, NULL);
> > +       if (!lima_fence_slab)
> > +               return -ENOMEM;
> > +
> > +       return 0;
> > +}
> > +
> > +void lima_sched_slab_fini(void)
> > +{
> > +       kmem_cache_destroy(lima_fence_slab);
> > +}
> > +
> > +static inline struct lima_fence *to_lima_fence(struct dma_fence *fence)
> > +{
> > +       return container_of(fence, struct lima_fence, base);
> > +}
> > +
> > +static const char *lima_fence_get_driver_name(struct dma_fence *fence)
> > +{
> > +       return "lima";
> > +}
> > +
> > +static const char *lima_fence_get_timeline_name(struct dma_fence *fence)
> > +{
> > +       struct lima_fence *f = to_lima_fence(fence);
> > +
> > +       return f->pipe->base.name;
> > +}
> > +
> > +static bool lima_fence_enable_signaling(struct dma_fence *fence)
> > +{
> > +       return true;
> > +}
> > +
> > +static void lima_fence_release_rcu(struct rcu_head *rcu)
> > +{
> > +       struct dma_fence *f = container_of(rcu, struct dma_fence, rcu);
> > +       struct lima_fence *fence = to_lima_fence(f);
> > +
> > +       kmem_cache_free(lima_fence_slab, fence);
> > +}
> > +
> > +static void lima_fence_release(struct dma_fence *fence)
> > +{
> > +       struct lima_fence *f = to_lima_fence(fence);
> > +
> > +       call_rcu(&f->base.rcu, lima_fence_release_rcu);
> > +}
> > +
> > +static const struct dma_fence_ops lima_fence_ops = {
> > +       .get_driver_name = lima_fence_get_driver_name,
> > +       .get_timeline_name = lima_fence_get_timeline_name,
> > +       .enable_signaling = lima_fence_enable_signaling,
> > +       .wait = dma_fence_default_wait,
> > +       .release = lima_fence_release,
> > +};
> > +
> > +static struct lima_fence *lima_fence_create(struct lima_sched_pipe *pipe)
> > +{
> > +       struct lima_fence *fence;
> > +
> > +       fence = kmem_cache_zalloc(lima_fence_slab, GFP_KERNEL);
>
> Out of curiosity, what is the benefit of using a separate slab here?
> If this is beneficial, then other drivers should do this too and it
> should be common. Otherwise, it adds some complexity.
>
> And maybe the slab should be initialzed in probe rather than module_init.
>
> > +       if (!fence)
> > +              return NULL;
> > +
> > +       fence->pipe = pipe;
> > +       dma_fence_init(&fence->base, &lima_fence_ops, &pipe->fence_lock,
> > +                      pipe->fence_context, ++pipe->fence_seqno);
> > +
> > +       return fence;
> > +}
> > +
> > +static inline struct lima_sched_task *to_lima_task(struct drm_sched_job *job)
> > +{
> > +       return container_of(job, struct lima_sched_task, base);
> > +}
> > +
> > +static inline struct lima_sched_pipe *to_lima_pipe(struct drm_gpu_scheduler *sched)
> > +{
> > +       return container_of(sched, struct lima_sched_pipe, base);
> > +}
> > +
> > +int lima_sched_task_init(struct lima_sched_task *task,
> > +                        struct lima_sched_context *context,
> > +                        struct lima_bo **bos, int num_bos,
> > +                        struct lima_vm *vm)
> > +{
> > +       int err, i;
> > +
> > +       task->bos = kmemdup(bos, sizeof(*bos) * num_bos, GFP_KERNEL);
> > +       if (!task->bos)
> > +               return -ENOMEM;
> > +
> > +       for (i = 0; i < num_bos; i++)
> > +               drm_gem_object_get(&bos[i]->gem);
> > +
> > +       err = drm_sched_job_init(&task->base, &context->base, vm);
> > +       if (err) {
> > +               kfree(task->bos);
> > +               return err;
> > +       }
> > +
> > +       task->num_bos = num_bos;
> > +       task->vm = lima_vm_get(vm);
> > +       return 0;
> > +}
> > +
> > +void lima_sched_task_fini(struct lima_sched_task *task)
> > +{
> > +       int i;
> > +
> > +       drm_sched_job_cleanup(&task->base);
> > +
> > +       for (i = 0; i < task->num_dep; i++)
> > +               dma_fence_put(task->dep[i]);
> > +
> > +       kfree(task->dep);
> > +
> > +       if (task->bos) {
> > +               for (i = 0; i < task->num_bos; i++)
> > +                       drm_gem_object_put_unlocked(&task->bos[i]->gem);
> > +               kfree(task->bos);
> > +       }
> > +
> > +       lima_vm_put(task->vm);
> > +}
> > +
> > +int lima_sched_task_add_dep(struct lima_sched_task *task, struct dma_fence *fence)
> > +{
> > +       int i, new_dep = 4;
> > +
> > +       /* same context's fence is definitly earlier then this task */
> > +       if (fence->context == task->base.s_fence->finished.context) {
> > +               dma_fence_put(fence);
> > +               return 0;
> > +       }
> > +
> > +       if (task->dep && task->num_dep == task->max_dep)
> > +               new_dep = task->max_dep * 2;
> > +
> > +       if (task->max_dep < new_dep) {
> > +               void *dep = krealloc(task->dep, sizeof(*task->dep) * new_dep, GFP_KERNEL);
> > +               if (!dep)
> > +                       return -ENOMEM;
> > +               task->max_dep = new_dep;
> > +               task->dep = dep;
> > +       }
> > +
> > +       for (i = 0; i < task->num_dep; i++) {
> > +               if (task->dep[i]->context == fence->context &&
> > +                   dma_fence_is_later(fence, task->dep[i])) {
> > +                       dma_fence_put(task->dep[i]);
> > +                       task->dep[i] = fence;
> > +                       return 0;
> > +               }
> > +       }
> > +
> > +       task->dep[task->num_dep++] = fence;
> > +       return 0;
> > +}
> > +
> > +int lima_sched_context_init(struct lima_sched_pipe *pipe,
> > +                           struct lima_sched_context *context,
> > +                           atomic_t *guilty)
> > +{
> > +       struct drm_sched_rq *rq = pipe->base.sched_rq + DRM_SCHED_PRIORITY_NORMAL;
> > +
> > +       return drm_sched_entity_init(&context->base, &rq, 1, guilty);
> > +}
> > +
> > +void lima_sched_context_fini(struct lima_sched_pipe *pipe,
> > +                            struct lima_sched_context *context)
> > +{
> > +       drm_sched_entity_fini(&context->base);
> > +}
> > +
> > +struct dma_fence *lima_sched_context_queue_task(struct lima_sched_context *context,
> > +                                               struct lima_sched_task *task)
> > +{
> > +       struct dma_fence *fence = dma_fence_get(&task->base.s_fence->finished);
> > +
> > +       drm_sched_entity_push_job(&task->base, &context->base);
> > +       return fence;
> > +}
> > +
> > +static struct dma_fence *lima_sched_dependency(struct drm_sched_job *job,
> > +                                              struct drm_sched_entity *entity)
> > +{
> > +       struct lima_sched_task *task = to_lima_task(job);
> > +       int i;
> > +
> > +       for (i = 0; i < task->num_dep; i++) {
> > +               struct dma_fence *fence = task->dep[i];
> > +
> > +               if (!task->dep[i])
> > +                       continue;
> > +
> > +               task->dep[i] = NULL;
> > +
> > +               if (!dma_fence_is_signaled(fence))
> > +                       return fence;
> > +
> > +               dma_fence_put(fence);
> > +       }
> > +
> > +       return NULL;
> > +}
> > +
> > +static struct dma_fence *lima_sched_run_job(struct drm_sched_job *job)
> > +{
> > +       struct lima_sched_task *task = to_lima_task(job);
> > +       struct lima_sched_pipe *pipe = to_lima_pipe(job->sched);
> > +       struct lima_fence *fence;
> > +       struct dma_fence *ret;
> > +       struct lima_vm *vm = NULL, *last_vm = NULL;
> > +       int i;
> > +
> > +       /* after GPU reset */
> > +       if (job->s_fence->finished.error < 0)
> > +               return NULL;
> > +
> > +       fence = lima_fence_create(pipe);
> > +       if (!fence)
> > +               return NULL;
> > +       task->fence = &fence->base;
> > +
> > +       /* for caller usage of the fence, otherwise irq handler
> > +        * may consume the fence before caller use it */
> > +       ret = dma_fence_get(task->fence);
> > +
> > +       pipe->current_task = task;
> > +
> > +       /* this is needed for MMU to work correctly, otherwise GP/PP
> > +        * will hang or page fault for unknown reason after running for
> > +        * a while.
> > +        *
> > +        * Need to investigate:
> > +        * 1. is it related to TLB
> > +        * 2. how much performance will be affected by L2 cache flush
> > +        * 3. can we reduce the calling of this function because all
> > +        *    GP/PP use the same L2 cache on mali400
> > +        *
> > +        * TODO:
> > +        * 1. move this to task fini to save some wait time?
> > +        * 2. when GP/PP use different l2 cache, need PP wait GP l2
> > +        *    cache flush?
> > +        */
> > +       for (i = 0; i < pipe->num_l2_cache; i++)
> > +               lima_l2_cache_flush(pipe->l2_cache[i]);
> > +
> > +       if (task->vm != pipe->current_vm) {
> > +               vm = lima_vm_get(task->vm);
> > +               last_vm = pipe->current_vm;
> > +               pipe->current_vm = task->vm;
> > +       }
> > +
> > +       if (pipe->bcast_mmu)
> > +               lima_mmu_switch_vm(pipe->bcast_mmu, vm);
> > +       else {
> > +               for (i = 0; i < pipe->num_mmu; i++)
> > +                       lima_mmu_switch_vm(pipe->mmu[i], vm);
> > +       }
> > +
> > +       if (last_vm)
> > +               lima_vm_put(last_vm);
> > +
> > +       pipe->error = false;
> > +       pipe->task_run(pipe, task);
> > +
> > +       return task->fence;
> > +}
> > +
> > +static void lima_sched_handle_error_task(struct lima_sched_pipe *pipe,
> > +                                        struct lima_sched_task *task)
> > +{
> > +       kthread_park(pipe->base.thread);
> > +       drm_sched_hw_job_reset(&pipe->base, &task->base);
> > +
> > +       pipe->task_error(pipe);
> > +
> > +       if (pipe->bcast_mmu)
> > +               lima_mmu_page_fault_resume(pipe->bcast_mmu);
> > +       else {
> > +               int i;
> > +               for (i = 0; i < pipe->num_mmu; i++)
> > +                       lima_mmu_page_fault_resume(pipe->mmu[i]);
> > +       }
> > +
> > +       if (pipe->current_vm)
> > +               lima_vm_put(pipe->current_vm);
> > +
> > +       pipe->current_vm = NULL;
> > +       pipe->current_task = NULL;
> > +
> > +       drm_sched_job_recovery(&pipe->base);
> > +       kthread_unpark(pipe->base.thread);
> > +}
> > +
> > +static void lima_sched_timedout_job(struct drm_sched_job *job)
> > +{
> > +       struct lima_sched_pipe *pipe = to_lima_pipe(job->sched);
> > +       struct lima_sched_task *task = to_lima_task(job);
> > +
> > +       DRM_ERROR("lima job timeout\n");
> > +
> > +       lima_sched_handle_error_task(pipe, task);
> > +}
> > +
> > +static void lima_sched_free_job(struct drm_sched_job *job)
> > +{
> > +       struct lima_sched_task *task = to_lima_task(job);
> > +       struct lima_sched_pipe *pipe = to_lima_pipe(job->sched);
> > +       struct lima_vm *vm = task->vm;
> > +       struct lima_bo **bos = task->bos;
> > +       int i;
> > +
> > +       dma_fence_put(task->fence);
> > +
> > +       for (i = 0; i < task->num_bos; i++)
> > +               lima_vm_bo_del(vm, bos[i]);
> > +
> > +       lima_sched_task_fini(task);
> > +       kmem_cache_free(pipe->task_slab, task);
> > +}
> > +
> > +const struct drm_sched_backend_ops lima_sched_ops = {
> > +       .dependency = lima_sched_dependency,
> > +       .run_job = lima_sched_run_job,
> > +       .timedout_job = lima_sched_timedout_job,
> > +       .free_job = lima_sched_free_job,
> > +};
> > +
> > +static void lima_sched_error_work(struct work_struct *work)
> > +{
> > +       struct lima_sched_pipe *pipe =
> > +               container_of(work, struct lima_sched_pipe, error_work);
> > +       struct lima_sched_task *task = pipe->current_task;
> > +
> > +       lima_sched_handle_error_task(pipe, task);
> > +}
> > +
> > +int lima_sched_pipe_init(struct lima_sched_pipe *pipe, const char *name)
> > +{
> > +       long timeout;
> > +
> > +       if (lima_sched_timeout_ms <= 0)
> > +               timeout = MAX_SCHEDULE_TIMEOUT;
> > +       else
> > +               timeout = msecs_to_jiffies(lima_sched_timeout_ms);
> > +
> > +       pipe->fence_context = dma_fence_context_alloc(1);
> > +       spin_lock_init(&pipe->fence_lock);
> > +
> > +       INIT_WORK(&pipe->error_work, lima_sched_error_work);
> > +
> > +       return drm_sched_init(&pipe->base, &lima_sched_ops, 1, 0, timeout, name);
> > +}
> > +
> > +void lima_sched_pipe_fini(struct lima_sched_pipe *pipe)
> > +{
> > +       drm_sched_fini(&pipe->base);
> > +}
> > +
> > +void lima_sched_pipe_task_done(struct lima_sched_pipe *pipe)
> > +{
> > +       if (pipe->error)
> > +               schedule_work(&pipe->error_work);
> > +       else {
> > +               struct lima_sched_task *task = pipe->current_task;
> > +
> > +               pipe->task_fini(pipe);
> > +               dma_fence_signal(task->fence);
> > +       }
> > +}
> > diff --git a/drivers/gpu/drm/lima/lima_sched.h b/drivers/gpu/drm/lima/lima_sched.h
> > new file mode 100644
> > index 000000000000..44985e4da3fb
> > --- /dev/null
> > +++ b/drivers/gpu/drm/lima/lima_sched.h
> > @@ -0,0 +1,104 @@
> > +/* SPDX-License-Identifier: GPL-2.0 OR MIT */
> > +/* Copyright 2017-2018 Qiang Yu <yuq825 at gmail.com> */
> > +
> > +#ifndef __LIMA_SCHED_H__
> > +#define __LIMA_SCHED_H__
> > +
> > +#include <drm/gpu_scheduler.h>
> > +
> > +struct lima_vm;
> > +
> > +struct lima_sched_task {
> > +       struct drm_sched_job base;
> > +
> > +       struct lima_vm *vm;
> > +       void *frame;
> > +
> > +       struct dma_fence **dep;
> > +       int num_dep;
> > +       int max_dep;
> > +
> > +       struct lima_bo **bos;
> > +       int num_bos;
> > +
> > +       /* pipe fence */
> > +       struct dma_fence *fence;
> > +};
> > +
> > +struct lima_sched_context {
> > +       struct drm_sched_entity base;
> > +};
> > +
> > +#define LIMA_SCHED_PIPE_MAX_MMU       8
> > +#define LIMA_SCHED_PIPE_MAX_L2_CACHE  2
> > +#define LIMA_SCHED_PIPE_MAX_PROCESSOR 8
> > +
> > +struct lima_ip;
> > +
> > +struct lima_sched_pipe {
> > +       struct drm_gpu_scheduler base;
> > +
> > +       u64 fence_context;
> > +       u32 fence_seqno;
> > +       spinlock_t fence_lock;
> > +
> > +       struct lima_sched_task *current_task;
> > +       struct lima_vm *current_vm;
> > +
> > +       struct lima_ip *mmu[LIMA_SCHED_PIPE_MAX_MMU];
> > +       int num_mmu;
> > +
> > +       struct lima_ip *l2_cache[LIMA_SCHED_PIPE_MAX_L2_CACHE];
> > +       int num_l2_cache;
> > +
> > +       struct lima_ip *processor[LIMA_SCHED_PIPE_MAX_PROCESSOR];
> > +       int num_processor;
> > +
> > +       struct lima_ip *bcast_processor;
> > +       struct lima_ip *bcast_mmu;
> > +
> > +       u32 done;
> > +       bool error;
> > +       atomic_t task;
> > +
> > +       int frame_size;
> > +       struct kmem_cache *task_slab;
> > +
> > +       int (*task_validate)(struct lima_sched_pipe *pipe, struct lima_sched_task *task);
> > +       void (*task_run)(struct lima_sched_pipe *pipe, struct lima_sched_task *task);
> > +       void (*task_fini)(struct lima_sched_pipe *pipe);
> > +       void (*task_error)(struct lima_sched_pipe *pipe);
> > +       void (*task_mmu_error)(struct lima_sched_pipe *pipe);
> > +
> > +       struct work_struct error_work;
> > +};
> > +
> > +int lima_sched_task_init(struct lima_sched_task *task,
> > +                        struct lima_sched_context *context,
> > +                        struct lima_bo **bos, int num_bos,
> > +                        struct lima_vm *vm);
> > +void lima_sched_task_fini(struct lima_sched_task *task);
> > +int lima_sched_task_add_dep(struct lima_sched_task *task, struct dma_fence *fence);
> > +
> > +int lima_sched_context_init(struct lima_sched_pipe *pipe,
> > +                           struct lima_sched_context *context,
> > +                           atomic_t *guilty);
> > +void lima_sched_context_fini(struct lima_sched_pipe *pipe,
> > +                            struct lima_sched_context *context);
> > +struct dma_fence *lima_sched_context_queue_task(struct lima_sched_context *context,
> > +                                               struct lima_sched_task *task);
> > +
> > +int lima_sched_pipe_init(struct lima_sched_pipe *pipe, const char *name);
> > +void lima_sched_pipe_fini(struct lima_sched_pipe *pipe);
> > +void lima_sched_pipe_task_done(struct lima_sched_pipe *pipe);
> > +
> > +static inline void lima_sched_pipe_mmu_error(struct lima_sched_pipe *pipe)
> > +{
> > +       pipe->error = true;
> > +       pipe->task_mmu_error(pipe);
> > +}
> > +
> > +int lima_sched_slab_init(void);
> > +void lima_sched_slab_fini(void);
> > +
> > +#endif
> > diff --git a/drivers/gpu/drm/lima/lima_vm.c b/drivers/gpu/drm/lima/lima_vm.c
> > new file mode 100644
> > index 000000000000..39eba3fae019
> > --- /dev/null
> > +++ b/drivers/gpu/drm/lima/lima_vm.c
> > @@ -0,0 +1,280 @@
> > +// SPDX-License-Identifier: GPL-2.0 OR MIT
> > +/* Copyright 2017-2018 Qiang Yu <yuq825 at gmail.com> */
> > +
> > +#include <linux/slab.h>
> > +#include <linux/dma-mapping.h>
> > +
> > +#include "lima_device.h"
> > +#include "lima_vm.h"
> > +#include "lima_object.h"
> > +#include "lima_regs.h"
> > +
> > +struct lima_bo_va {
> > +       struct list_head list;
> > +       unsigned ref_count;
> > +
> > +       struct drm_mm_node node;
> > +
> > +       struct lima_vm *vm;
> > +};
> > +
> > +#define LIMA_VM_PD_SHIFT 22
> > +#define LIMA_VM_PT_SHIFT 12
> > +#define LIMA_VM_PB_SHIFT (LIMA_VM_PD_SHIFT + LIMA_VM_NUM_PT_PER_BT_SHIFT)
> > +#define LIMA_VM_BT_SHIFT LIMA_VM_PT_SHIFT
> > +
> > +#define LIMA_VM_PT_MASK ((1 << LIMA_VM_PD_SHIFT) - 1)
> > +#define LIMA_VM_BT_MASK ((1 << LIMA_VM_PB_SHIFT) - 1)
> > +
> > +#define LIMA_PDE(va) (va >> LIMA_VM_PD_SHIFT)
> > +#define LIMA_PTE(va) ((va & LIMA_VM_PT_MASK) >> LIMA_VM_PT_SHIFT)
> > +#define LIMA_PBE(va) (va >> LIMA_VM_PB_SHIFT)
> > +#define LIMA_BTE(va) ((va & LIMA_VM_BT_MASK) >> LIMA_VM_BT_SHIFT)
> > +
> > +
> > +static void lima_vm_unmap_page_table(struct lima_vm *vm, u32 start, u32 end)
> > +{
> > +       u32 addr;
> > +
> > +       for (addr = start; addr <= end; addr += LIMA_PAGE_SIZE) {
> > +               u32 pbe = LIMA_PBE(addr);
> > +               u32 bte = LIMA_BTE(addr);
> > +
> > +               vm->bts[pbe].cpu[bte] = 0;
> > +       }
> > +}
> > +
> > +static int lima_vm_map_page_table(struct lima_vm *vm, dma_addr_t *dma,
> > +                                 u32 start, u32 end)
> > +{
> > +       u64 addr;
> > +       int i = 0;
> > +
> > +       for (addr = start; addr <= end; addr += LIMA_PAGE_SIZE) {
> > +               u32 pbe = LIMA_PBE(addr);
> > +               u32 bte = LIMA_BTE(addr);
> > +
> > +               if (!vm->bts[pbe].cpu) {
> > +                       dma_addr_t pts;
> > +                       u32 *pd;
> > +                       int j;
> > +
> > +                       vm->bts[pbe].cpu = dma_alloc_wc(
> > +                               vm->dev->dev, LIMA_PAGE_SIZE << LIMA_VM_NUM_PT_PER_BT_SHIFT,
> > +                               &vm->bts[pbe].dma, GFP_KERNEL | __GFP_ZERO);
> > +                       if (!vm->bts[pbe].cpu) {
> > +                               if (addr != start)
> > +                                       lima_vm_unmap_page_table(vm, start, addr - 1);
> > +                               return -ENOMEM;
> > +                       }
> > +
> > +                       pts = vm->bts[pbe].dma;
> > +                       pd = vm->pd.cpu + (pbe << LIMA_VM_NUM_PT_PER_BT_SHIFT);
> > +                       for (j = 0; j < LIMA_VM_NUM_PT_PER_BT; j++) {
> > +                               pd[j] = pts | LIMA_VM_FLAG_PRESENT;
> > +                               pts += LIMA_PAGE_SIZE;
> > +                       }
> > +               }
> > +
> > +               vm->bts[pbe].cpu[bte] = dma[i++] | LIMA_VM_FLAGS_CACHE;
> > +       }
> > +
> > +       return 0;
> > +}
> > +
> > +static struct lima_bo_va *
> > +lima_vm_bo_find(struct lima_vm *vm, struct lima_bo *bo)
> > +{
> > +       struct lima_bo_va *bo_va, *ret = NULL;
> > +
> > +       list_for_each_entry(bo_va, &bo->va, list) {
> > +               if (bo_va->vm == vm) {
> > +                       ret = bo_va;
> > +                       break;
> > +               }
> > +       }
> > +
> > +       return ret;
> > +}
> > +
> > +int lima_vm_bo_add(struct lima_vm *vm, struct lima_bo *bo, bool create)
> > +{
> > +       struct lima_bo_va *bo_va;
> > +       int err;
> > +
> > +       mutex_lock(&bo->lock);
> > +
> > +       bo_va = lima_vm_bo_find(vm, bo);
> > +       if (bo_va) {
> > +               bo_va->ref_count++;
> > +               mutex_unlock(&bo->lock);
> > +               return 0;
> > +       }
> > +
> > +       /* should not create new bo_va if not asked by caller */
> > +       if (!create) {
> > +               mutex_unlock(&bo->lock);
> > +               return -ENOENT;
> > +       }
> > +
> > +       bo_va = kzalloc(sizeof(*bo_va), GFP_KERNEL);
> > +       if (!bo_va) {
> > +               err = -ENOMEM;
> > +               goto err_out0;
> > +       }
> > +
> > +       bo_va->vm = vm;
> > +       bo_va->ref_count = 1;
> > +
> > +       mutex_lock(&vm->lock);
> > +
> > +       err = drm_mm_insert_node(&vm->mm, &bo_va->node, bo->gem.size);
> > +       if (err)
> > +               goto err_out1;
> > +
> > +       err = lima_vm_map_page_table(vm, bo->pages_dma_addr, bo_va->node.start,
> > +                                    bo_va->node.start + bo_va->node.size - 1);
> > +       if (err)
> > +               goto err_out2;
> > +
> > +       mutex_unlock(&vm->lock);
> > +
> > +       list_add_tail(&bo_va->list, &bo->va);
>
> So you can have 1 BO at multiple VAs? Is that really needed?
>
> > +
> > +       mutex_unlock(&bo->lock);
> > +       return 0;
> > +
> > +err_out2:
> > +       drm_mm_remove_node(&bo_va->node);
> > +err_out1:
> > +       mutex_unlock(&vm->lock);
> > +       kfree(bo_va);
> > +err_out0:
> > +       mutex_unlock(&bo->lock);
> > +       return err;
> > +}
> > +
> > +void lima_vm_bo_del(struct lima_vm *vm, struct lima_bo *bo)
> > +{
> > +       struct lima_bo_va *bo_va;
> > +
> > +       mutex_lock(&bo->lock);
> > +
> > +       bo_va = lima_vm_bo_find(vm, bo);
> > +       if (--bo_va->ref_count > 0) {
> > +               mutex_unlock(&bo->lock);
> > +               return;
> > +       }
> > +
> > +       mutex_lock(&vm->lock);
> > +
> > +       lima_vm_unmap_page_table(vm, bo_va->node.start,
> > +                                bo_va->node.start + bo_va->node.size - 1);
> > +
> > +       drm_mm_remove_node(&bo_va->node);
> > +
> > +       mutex_unlock(&vm->lock);
> > +
> > +       list_del(&bo_va->list);
> > +
> > +       mutex_unlock(&bo->lock);
> > +
> > +       kfree(bo_va);
> > +}
> > +
> > +u32 lima_vm_get_va(struct lima_vm *vm, struct lima_bo *bo)
> > +{
> > +       struct lima_bo_va *bo_va;
> > +       u32 ret;
> > +
> > +       mutex_lock(&bo->lock);
> > +
> > +       bo_va = lima_vm_bo_find(vm, bo);
> > +       ret = bo_va->node.start;
> > +
> > +       mutex_unlock(&bo->lock);
> > +
> > +       return ret;
> > +}
> > +
> > +struct lima_vm *lima_vm_create(struct lima_device *dev)
> > +{
> > +       struct lima_vm *vm;
> > +
> > +       vm = kzalloc(sizeof(*vm), GFP_KERNEL);
> > +       if (!vm)
> > +               return NULL;
> > +
> > +       vm->dev = dev;
> > +       mutex_init(&vm->lock);
> > +       kref_init(&vm->refcount);
> > +
> > +       vm->pd.cpu = dma_alloc_wc(dev->dev, LIMA_PAGE_SIZE, &vm->pd.dma,
> > +                                 GFP_KERNEL | __GFP_ZERO);
> > +       if (!vm->pd.cpu)
> > +               goto err_out0;
> > +
> > +       if (dev->dlbu_cpu) {
> > +               int err = lima_vm_map_page_table(
> > +                       vm, &dev->dlbu_dma, LIMA_VA_RESERVE_DLBU,
> > +                       LIMA_VA_RESERVE_DLBU + LIMA_PAGE_SIZE - 1);
> > +               if (err)
> > +                       goto err_out1;
> > +       }
> > +
> > +       drm_mm_init(&vm->mm, dev->va_start, dev->va_end - dev->va_start);
> > +
> > +       return vm;
> > +
> > +err_out1:
> > +       dma_free_wc(dev->dev, LIMA_PAGE_SIZE, vm->pd.cpu, vm->pd.dma);
> > +err_out0:
> > +       kfree(vm);
> > +       return NULL;
> > +}
> > +
> > +void lima_vm_release(struct kref *kref)
> > +{
> > +       struct lima_vm *vm = container_of(kref, struct lima_vm, refcount);
> > +       int i;
> > +
> > +       drm_mm_takedown(&vm->mm);
> > +
> > +       for (i = 0; i < LIMA_VM_NUM_BT; i++) {
> > +               if (vm->bts[i].cpu)
> > +                       dma_free_wc(vm->dev->dev, LIMA_PAGE_SIZE << LIMA_VM_NUM_PT_PER_BT_SHIFT,
> > +                                   vm->bts[i].cpu, vm->bts[i].dma);
> > +       }
> > +
> > +        if (vm->pd.cpu)
> > +               dma_free_wc(vm->dev->dev, LIMA_PAGE_SIZE, vm->pd.cpu, vm->pd.dma);
> > +
> > +       kfree(vm);
> > +}
> > +
> > +void lima_vm_print(struct lima_vm *vm)
> > +{
> > +       int i, j, k;
> > +       u32 *pd, *pt;
> > +
> > +       if (!vm->pd.cpu)
> > +               return;
> > +
> > +       pd = vm->pd.cpu;
> > +       for (i = 0; i < LIMA_VM_NUM_BT; i++) {
> > +               if (!vm->bts[i].cpu)
> > +                       continue;
> > +
> > +               pt = vm->bts[i].cpu;
> > +               for (j = 0; j < LIMA_VM_NUM_PT_PER_BT; j++) {
> > +                       int idx = (i << LIMA_VM_NUM_PT_PER_BT_SHIFT) + j;
> > +                       printk(KERN_INFO "lima vm pd %03x:%08x\n", idx, pd[idx]);
> > +
> > +                       for (k = 0; k < LIMA_PAGE_ENT_NUM; k++) {
> > +                               u32 pte = *pt++;
> > +                               if (pte)
> > +                                       printk(KERN_INFO "  pt %03x:%08x\n", k, pte);
> > +                       }
> > +               }
> > +       }
> > +}
> > diff --git a/drivers/gpu/drm/lima/lima_vm.h b/drivers/gpu/drm/lima/lima_vm.h
> > new file mode 100644
> > index 000000000000..a135e2f05315
> > --- /dev/null
> > +++ b/drivers/gpu/drm/lima/lima_vm.h
> > @@ -0,0 +1,62 @@
> > +/* SPDX-License-Identifier: GPL-2.0 OR MIT */
> > +/* Copyright 2017-2018 Qiang Yu <yuq825 at gmail.com> */
> > +
> > +#ifndef __LIMA_VM_H__
> > +#define __LIMA_VM_H__
> > +
> > +#include <drm/drm_mm.h>
> > +#include <linux/kref.h>
> > +
> > +#define LIMA_PAGE_SIZE    4096
> > +#define LIMA_PAGE_MASK    (LIMA_PAGE_SIZE - 1)
> > +#define LIMA_PAGE_ENT_NUM (LIMA_PAGE_SIZE / sizeof(u32))
> > +
> > +#define LIMA_VM_NUM_PT_PER_BT_SHIFT 3
> > +#define LIMA_VM_NUM_PT_PER_BT (1 << LIMA_VM_NUM_PT_PER_BT_SHIFT)
> > +#define LIMA_VM_NUM_BT (LIMA_PAGE_ENT_NUM >> LIMA_VM_NUM_PT_PER_BT_SHIFT)
> > +
> > +#define LIMA_VA_RESERVE_START  0xFFF00000
> > +#define LIMA_VA_RESERVE_DLBU   LIMA_VA_RESERVE_START
> > +#define LIMA_VA_RESERVE_END    0x100000000
> > +
> > +struct lima_device;
> > +
> > +struct lima_vm_page {
> > +       u32 *cpu;
> > +       dma_addr_t dma;
> > +};
> > +
> > +struct lima_vm {
> > +       struct mutex lock;
> > +       struct kref refcount;
> > +
> > +       struct drm_mm mm;
> > +
> > +       struct lima_device *dev;
> > +
> > +       struct lima_vm_page pd;
> > +       struct lima_vm_page bts[LIMA_VM_NUM_BT];
> > +};
> > +
> > +int lima_vm_bo_add(struct lima_vm *vm, struct lima_bo *bo, bool create);
> > +void lima_vm_bo_del(struct lima_vm *vm, struct lima_bo *bo);
> > +
> > +u32 lima_vm_get_va(struct lima_vm *vm, struct lima_bo *bo);
> > +
> > +struct lima_vm *lima_vm_create(struct lima_device *dev);
> > +void lima_vm_release(struct kref *kref);
> > +
> > +static inline struct lima_vm *lima_vm_get(struct lima_vm *vm)
> > +{
> > +       kref_get(&vm->refcount);
> > +       return vm;
> > +}
> > +
> > +static inline void lima_vm_put(struct lima_vm *vm)
> > +{
> > +       kref_put(&vm->refcount, lima_vm_release);
> > +}
> > +
> > +void lima_vm_print(struct lima_vm *vm);
> > +
> > +#endif
> > diff --git a/include/uapi/drm/lima_drm.h b/include/uapi/drm/lima_drm.h
> > new file mode 100644
> > index 000000000000..64fb4807958d
> > --- /dev/null
> > +++ b/include/uapi/drm/lima_drm.h
> > @@ -0,0 +1,126 @@
> > +/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR MIT */
> > +/* Copyright 2017-2018 Qiang Yu <yuq825 at gmail.com> */
> > +
> > +#ifndef __LIMA_DRM_H__
> > +#define __LIMA_DRM_H__
> > +
> > +#include "drm.h"
> > +
> > +#if defined(__cplusplus)
> > +extern "C" {
> > +#endif
> > +
> > +#define LIMA_INFO_GPU_MALI400 0x00
> > +#define LIMA_INFO_GPU_MALI450 0x01
> > +
> > +struct drm_lima_info {
> > +       __u32 gpu_id;   /* out */
> > +       __u32 num_pp;   /* out */
> > +       __u32 valid;    /* out */
> > +       __u32 _resv[7];
> > +};
> > +
> > +struct drm_lima_gem_create {
> > +       __u32 size;    /* in */
> > +       __u32 flags;   /* in */
> > +       __u32 handle;  /* out */
> > +       __u32 pad;
> > +};
> > +
> > +struct drm_lima_gem_info {
> > +       __u32 handle;  /* in */
> > +       __u32 va;      /* out */
> > +       __u64 offset;  /* out */
> > +};
> > +
> > +#define LIMA_SUBMIT_BO_READ   0x01
> > +#define LIMA_SUBMIT_BO_WRITE  0x02
> > +
> > +struct drm_lima_gem_submit_bo {
> > +       __u32 handle;  /* in */
> > +       __u32 flags;   /* in */
> > +};
> > +
> > +#define LIMA_GP_FRAME_REG_NUM 6
> > +
> > +struct drm_lima_gp_frame {
> > +       __u32 frame[LIMA_GP_FRAME_REG_NUM];
> > +};
> > +
> > +#define LIMA_PP_FRAME_REG_NUM 23
> > +#define LIMA_PP_WB_REG_NUM 12
> > +
> > +struct drm_lima_m400_pp_frame {
> > +       __u32 frame[LIMA_PP_FRAME_REG_NUM];
> > +       __u32 num_pp;
> > +       __u32 wb[3 * LIMA_PP_WB_REG_NUM];
> > +       __u32 plbu_array_address[4];
> > +       __u32 fragment_stack_address[4];
> > +};
> > +
> > +struct drm_lima_m450_pp_frame {
> > +       __u32 frame[LIMA_PP_FRAME_REG_NUM];
> > +       __u32 num_pp;
> > +       __u32 wb[3 * LIMA_PP_WB_REG_NUM];
> > +       __u32 use_dlbu;
> > +       __u32 _pad;
> > +       union {
> > +               __u32 plbu_array_address[8];
> > +               __u32 dlbu_regs[4];
> > +       };
> > +       __u32 fragment_stack_address[8];
> > +};
> > +
> > +#define LIMA_PIPE_GP  0x00
> > +#define LIMA_PIPE_PP  0x01
> > +
> > +#define LIMA_SUBMIT_FLAG_EXPLICIT_FENCE (1 << 0)
> > +
> > +struct drm_lima_gem_submit {
> > +       __u32 ctx;         /* in */
> > +       __u32 pipe;        /* in */
> > +       __u32 nr_bos;      /* in */
> > +       __u32 frame_size;  /* in */
> > +       __u64 bos;         /* in */
> > +       __u64 frame;       /* in */
> > +       __u32 flags;       /* in */
> > +       __u32 out_sync;    /* in */
> > +       __u32 in_sync[2];  /* in */
> > +};
> > +
> > +#define LIMA_GEM_WAIT_READ   0x01
> > +#define LIMA_GEM_WAIT_WRITE  0x02
> > +
> > +struct drm_lima_gem_wait {
> > +       __u32 handle;      /* in */
> > +       __u32 op;          /* in */
> > +       __s64 timeout_ns;  /* in */
> > +};
> > +
> > +#define LIMA_CTX_OP_CREATE 1
> > +#define LIMA_CTX_OP_FREE   2
> > +
> > +struct drm_lima_ctx {
> > +       __u32 op;          /* in */
> > +       __u32 id;          /* in/out */
> > +};
> > +
> > +#define DRM_LIMA_INFO        0x00
> > +#define DRM_LIMA_GEM_CREATE  0x01
> > +#define DRM_LIMA_GEM_INFO    0x02
> > +#define DRM_LIMA_GEM_SUBMIT  0x03
> > +#define DRM_LIMA_GEM_WAIT    0x04
> > +#define DRM_LIMA_CTX         0x05
> > +
> > +#define DRM_IOCTL_LIMA_INFO DRM_IOR(DRM_COMMAND_BASE + DRM_LIMA_INFO, struct drm_lima_info)
> > +#define DRM_IOCTL_LIMA_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_LIMA_GEM_CREATE, struct drm_lima_gem_create)
> > +#define DRM_IOCTL_LIMA_GEM_INFO DRM_IOWR(DRM_COMMAND_BASE + DRM_LIMA_GEM_INFO, struct drm_lima_gem_info)
> > +#define DRM_IOCTL_LIMA_GEM_SUBMIT DRM_IOW(DRM_COMMAND_BASE + DRM_LIMA_GEM_SUBMIT, struct drm_lima_gem_submit)
> > +#define DRM_IOCTL_LIMA_GEM_WAIT DRM_IOW(DRM_COMMAND_BASE + DRM_LIMA_GEM_WAIT, struct drm_lima_gem_wait)
> > +#define DRM_IOCTL_LIMA_CTX DRM_IOWR(DRM_COMMAND_BASE + DRM_LIMA_CTX, struct drm_lima_ctx)
> > +
> > +#if defined(__cplusplus)
> > +}
> > +#endif
> > +
> > +#endif /* __LIMA_DRM_H__ */
> > --
> > 2.17.1
> >