[Lima] [PATCH 2/2] drm/lima: driver for ARM Mali4xx GPUs

Wed Feb 13 00:56:21 UTC 2019

On Tue, Feb 12, 2019 at 11:47 PM Rob Herring <robh at kernel.org> wrote:
>
> On Wed, Feb 6, 2019 at 7:16 AM Qiang Yu <yuq825 at gmail.com> wrote:
> >
> > From: Lima Project Developers <lima at lists.freedesktop.org>
>
> This should be a person (you).
>
> > Signed-off-by: Andreas Baierl <ichgeh at imkreisrum.de>
> > Signed-off-by: Erico Nunes <nunes.erico at gmail.com>
> > Signed-off-by: Heiko Stuebner <heiko at sntech.de>
> > Signed-off-by: Marek Vasut <marex at denx.de>
> > Signed-off-by: Neil Armstrong <narmstrong at baylibre.com>
> > Signed-off-by: Qiang Yu <yuq825 at gmail.com>
>
> Being the submitter, your S-o-b should be last.
>
> > Signed-off-by: Simon Shields <simon at lineageos.org>
> > Signed-off-by: Vasily Khoruzhick <anarsoul at gmail.com>
> > ---
>
> > diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
> > index 4385f00e1d05..dfefcb393858 100644
> > --- a/drivers/gpu/drm/Kconfig
> > +++ b/drivers/gpu/drm/Kconfig
> > @@ -333,6 +333,8 @@ source "drivers/gpu/drm/tve200/Kconfig"
> >
> >  source "drivers/gpu/drm/xen/Kconfig"
> >
> > +source "drivers/gpu/drm/lima/Kconfig"
> > +
> >  # Keep legacy drivers last
> >
> >  menuconfig DRM_LEGACY
> > diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
> > index ce8d1d384319..8d024b729902 100644
> > --- a/drivers/gpu/drm/Makefile
> > +++ b/drivers/gpu/drm/Makefile
> > @@ -109,3 +109,4 @@ obj-$(CONFIG_DRM_TINYDRM) += tinydrm/
> >  obj-$(CONFIG_DRM_PL111) += pl111/
> >  obj-$(CONFIG_DRM_TVE200) += tve200/
> >  obj-$(CONFIG_DRM_XEN) += xen/
> > +obj-$(CONFIG_DRM_LIMA)  += lima/
>
> Not sure about this file, but normally these should be kept sorted.
>
> > diff --git a/drivers/gpu/drm/lima/lima_bcast.c b/drivers/gpu/drm/lima/lima_bcast.c
> > new file mode 100644
> > index 000000000000..63754f6465ea
> > --- /dev/null
> > +++ b/drivers/gpu/drm/lima/lima_bcast.c
> > @@ -0,0 +1,46 @@
> > +// SPDX-License-Identifier: GPL-2.0 OR MIT
> > +/* Copyright 2018 Qiang Yu <yuq825 at gmail.com> */
> > +
> > +#include <linux/io.h>
> > +#include <linux/device.h>
> > +
> > +#include "lima_device.h"
> > +#include "lima_bcast.h"
> > +#include "lima_regs.h"
> > +
> > +#define bcast_write(reg, data) writel(data, ip->iomem + LIMA_BCAST_##reg)
> > +#define bcast_read(reg) readl(ip->iomem + LIMA_BCAST_##reg)
>
> There are 2 things about this I would change. Just pass in 'ip' to the
> macro so it is clear in calling functions that ip is actually used.
> Second, don't do token pasting. It is generally avoided in the kernel.
> It makes grepping the source code harder and is a pointless
> indirection.
>
> If you do both of those, then these can be static inline functions
> instead which are preferred because you get type checking.
>
> Same comment applies to all the other register accessors.
>
>
> > +struct lima_ip {
> > +       struct lima_device *dev;
> > +       enum lima_ip_id id;
> > +       bool present;
> > +
> > +       void __iomem *iomem;
> > +       int irq;
> > +
> > +       union {
> > +               /* gp/pp */
> > +               bool async_reset;
> > +               /* l2 cache */
> > +               spinlock_t lock;
>
> What happens when you need 2 elements for a sub-block. I'd make this a
> struct pointer for each IP sub-block.
Let's use sub-block latter if there comes second element.

>
> > +       } data;
> > +};
> > +
> > +enum lima_pipe_id {
> > +       lima_pipe_gp,
> > +       lima_pipe_pp,
> > +       lima_pipe_num,
> > +};
> > +
> > +struct lima_device {
> > +       struct device *dev;
> > +       struct drm_device *ddev;
> > +       struct platform_device *pdev;
> > +
> > +       enum lima_gpu_id id;
> > +       int num_pp;
> > +
> > +       void __iomem *iomem;
> > +       struct clk *clk_bus;
> > +       struct clk *clk_gpu;
> > +       struct reset_control *reset;
> > +       struct regulator *regulator;
> > +
> > +       struct lima_ip ip[lima_ip_num];
> > +       struct lima_sched_pipe pipe[lima_pipe_num];
> > +
> > +       struct lima_mman mman;
> > +
> > +       struct lima_vm *empty_vm;
> > +       uint64_t va_start;
> > +       uint64_t va_end;
> > +
> > +       u32 *dlbu_cpu;
> > +       dma_addr_t dlbu_dma;
> > +};
> > +
> > +static inline struct lima_device *
> > +to_lima_dev(struct drm_device *dev)
> > +{
> > +       return dev->dev_private;
> > +}
> > +
> > +static inline struct lima_device *
> > +ttm_to_lima_dev(struct ttm_bo_device *dev)
> > +{
> > +       return container_of(dev, struct lima_device, mman.bdev);
> > +}
> > +
> > +int lima_device_init(struct lima_device *ldev);
> > +void lima_device_fini(struct lima_device *ldev);
> > +
> > +const char *lima_ip_name(struct lima_ip *ip);
> > +
> > +#endif
> > diff --git a/drivers/gpu/drm/lima/lima_dlbu.c b/drivers/gpu/drm/lima/lima_dlbu.c
> > new file mode 100644
> > index 000000000000..6697d4ddd887
> > --- /dev/null
> > +++ b/drivers/gpu/drm/lima/lima_dlbu.c
> > @@ -0,0 +1,56 @@
> > +// SPDX-License-Identifier: GPL-2.0 OR MIT
> > +/* Copyright 2018 Qiang Yu <yuq825 at gmail.com> */
> > +
> > +#include <linux/io.h>
> > +#include <linux/device.h>
> > +
> > +#include "lima_device.h"
> > +#include "lima_dlbu.h"
> > +#include "lima_vm.h"
> > +#include "lima_regs.h"
> > +
> > +#define dlbu_write(reg, data) writel(data, ip->iomem + LIMA_DLBU_##reg)
> > +#define dlbu_read(reg) readl(ip->iomem + LIMA_DLBU_##reg)
> > +
> > +void lima_dlbu_enable(struct lima_device *dev, int num_pp)
> > +{
> > +       struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_pp;
> > +       struct lima_ip *ip = dev->ip + lima_ip_dlbu;
> > +       int i, mask = 0;
> > +
> > +       for (i = 0; i < num_pp; i++) {
> > +               struct lima_ip *pp = pipe->processor[i];
> > +               mask |= 1 << (pp->id - lima_ip_pp0);
> > +       }
> > +
> > +       dlbu_write(PP_ENABLE_MASK, mask);
> > +}
> > +
> > +void lima_dlbu_disable(struct lima_device *dev)
> > +{
> > +       struct lima_ip *ip = dev->ip + lima_ip_dlbu;
> > +       dlbu_write(PP_ENABLE_MASK, 0);
> > +}
> > +
> > +void lima_dlbu_set_reg(struct lima_ip *ip, u32 *reg)
> > +{
> > +       dlbu_write(TLLIST_VBASEADDR, reg[0]);
> > +       dlbu_write(FB_DIM, reg[1]);
> > +       dlbu_write(TLLIST_CONF, reg[2]);
> > +       dlbu_write(START_TILE_POS, reg[3]);
> > +}
> > +
> > +int lima_dlbu_init(struct lima_ip *ip)
> > +{
> > +       struct lima_device *dev = ip->dev;
> > +
> > +       dlbu_write(MASTER_TLLIST_PHYS_ADDR, dev->dlbu_dma | 1);
> > +       dlbu_write(MASTER_TLLIST_VADDR, LIMA_VA_RESERVE_DLBU);
> > +
> > +       return 0;
> > +}
> > +
> > +void lima_dlbu_fini(struct lima_ip *ip)
> > +{
> > +
> > +}
> > diff --git a/drivers/gpu/drm/lima/lima_dlbu.h b/drivers/gpu/drm/lima/lima_dlbu.h
> > new file mode 100644
> > index 000000000000..60cba387cf30
> > --- /dev/null
> > +++ b/drivers/gpu/drm/lima/lima_dlbu.h
> > @@ -0,0 +1,18 @@
> > +/* SPDX-License-Identifier: GPL-2.0 OR MIT */
> > +/* Copyright 2018 Qiang Yu <yuq825 at gmail.com> */
> > +
> > +#ifndef __LIMA_DLBU_H__
> > +#define __LIMA_DLBU_H__
> > +
> > +struct lima_ip;
> > +struct lima_device;
> > +
> > +void lima_dlbu_enable(struct lima_device *dev, int num_pp);
> > +void lima_dlbu_disable(struct lima_device *dev);
> > +
> > +void lima_dlbu_set_reg(struct lima_ip *ip, u32 *reg);
> > +
> > +int lima_dlbu_init(struct lima_ip *ip);
> > +void lima_dlbu_fini(struct lima_ip *ip);
> > +
> > +#endif
> > diff --git a/drivers/gpu/drm/lima/lima_drv.c b/drivers/gpu/drm/lima/lima_drv.c
> > new file mode 100644
> > index 000000000000..132071b9be9b
> > --- /dev/null
> > +++ b/drivers/gpu/drm/lima/lima_drv.c
> > @@ -0,0 +1,459 @@
> > +// SPDX-License-Identifier: GPL-2.0 OR MIT
> > +/* Copyright 2017-2018 Qiang Yu <yuq825 at gmail.com> */
> > +
> > +#include <linux/module.h>
> > +#include <linux/of_platform.h>
> > +#include <linux/log2.h>
> > +#include <drm/drm_prime.h>
> > +#include <drm/lima_drm.h>
> > +
> > +#include "lima_drv.h"
> > +#include "lima_gem.h"
> > +#include "lima_gem_prime.h"
> > +#include "lima_vm.h"
> > +
> > +int lima_sched_timeout_ms = 0;
> > +int lima_sched_max_tasks = 32;
> > +int lima_max_mem = -1;
> > +
> > +MODULE_PARM_DESC(sched_timeout_ms, "task run timeout in ms (0 = no timeout (default))");
> > +module_param_named(sched_timeout_ms, lima_sched_timeout_ms, int, 0444);
> > +
> > +MODULE_PARM_DESC(sched_max_tasks, "max queued task num in a context (default 32)");
> > +module_param_named(sched_max_tasks, lima_sched_max_tasks, int, 0444);
> > +
> > +MODULE_PARM_DESC(max_mem, "Max memory size in MB can be used (<0 = auto)");
> > +module_param_named(max_mem, lima_max_mem, int, 0444);
> > +
> > +static int lima_ioctl_info(struct drm_device *dev, void *data, struct drm_file *file)
> > +{
> > +       struct drm_lima_info *info = data;
> > +       struct lima_device *ldev = to_lima_dev(dev);
> > +
> > +       switch (ldev->id) {
> > +       case lima_gpu_mali400:
> > +               info->gpu_id = LIMA_INFO_GPU_MALI400;
> > +               break;
> > +       case lima_gpu_mali450:
> > +               info->gpu_id = LIMA_INFO_GPU_MALI450;
> > +               break;
> > +       default:
> > +               return -ENODEV;
> > +       }
> > +       info->num_pp = ldev->pipe[lima_pipe_pp].num_processor;
> > +       info->va_start = ldev->va_start;
> > +       info->va_end = ldev->va_end;
> > +       return 0;
> > +}
> > +
> > +static int lima_ioctl_gem_create(struct drm_device *dev, void *data, struct drm_file *file)
> > +{
> > +       struct drm_lima_gem_create *args = data;
> > +
> > +       if (args->flags)
> > +               return -EINVAL;
> > +
> > +       if (args->size == 0)
> > +               return -EINVAL;
> > +
> > +       return lima_gem_create_handle(dev, file, args->size, args->flags, &args->handle);
> > +}
> > +
> > +static int lima_ioctl_gem_info(struct drm_device *dev, void *data, struct drm_file *file)
> > +{
> > +       struct drm_lima_gem_info *args = data;
> > +
> > +       return lima_gem_mmap_offset(file, args->handle, &args->offset);
> > +}
> > +
> > +static int lima_ioctl_gem_va(struct drm_device *dev, void *data, struct drm_file *file)
> > +{
> > +       struct drm_lima_gem_va *args = data;
> > +
> > +       switch (args->op) {
> > +       case LIMA_VA_OP_MAP:
> > +               return lima_gem_va_map(file, args->handle, args->flags, args->va);
> > +       case LIMA_VA_OP_UNMAP:
> > +               return lima_gem_va_unmap(file, args->handle, args->va);
>
> These are mapping to GPU VA. Why not do that on GEM object creation or
> import or when the objects are submitted with cmd queue as other
> drivers do?
>
> To put it another way, These ioctls look different than what other
> drivers do. Why do you need to do things differently? My understanding
> is best practice is to map and return the GPU offset when the GEM
> object is created. This is what v3d does. I think Intel is moving to
> that. And panfrost will do that.
>
> > +       default:
> > +               return -EINVAL;
> > +       }
> > +}
> > +
> > +static int lima_ioctl_gem_submit(struct drm_device *dev, void *data, struct drm_file *file)
> > +{
> > +       struct drm_lima_gem_submit_in *args = data;
> > +       struct lima_device *ldev = to_lima_dev(dev);
> > +       struct lima_drm_priv *priv = file->driver_priv;
> > +       struct drm_lima_gem_submit_bo *bos;
> > +       struct ttm_validate_buffer *vbs;
> > +       union drm_lima_gem_submit_dep *deps = NULL;
> > +       struct lima_sched_pipe *pipe;
> > +       struct lima_sched_task *task;
> > +       struct lima_ctx *ctx;
> > +       struct lima_submit submit = {0};
> > +       int err = 0, size;
> > +
> > +       if (args->pipe >= lima_pipe_num || args->nr_bos == 0)
> > +               return -EINVAL;
> > +
> > +       if (args->flags & ~(LIMA_SUBMIT_FLAG_EXPLICIT_FENCE |
> > +                           LIMA_SUBMIT_FLAG_SYNC_FD_OUT))
> > +               return -EINVAL;
> > +
> > +       pipe = ldev->pipe + args->pipe;
> > +       if (args->frame_size != pipe->frame_size)
> > +               return -EINVAL;
> > +
> > +       size = args->nr_bos * (sizeof(*submit.bos) + sizeof(*submit.vbs)) +
> > +               args->nr_deps * sizeof(*submit.deps);
> > +       bos = kzalloc(size, GFP_KERNEL);
> > +       if (!bos)
> > +               return -ENOMEM;
> > +
> > +       size = args->nr_bos * sizeof(*submit.bos);
> > +       if (copy_from_user(bos, u64_to_user_ptr(args->bos), size)) {
> > +               err = -EFAULT;
> > +               goto out0;
> > +       }
> > +
> > +       vbs = (void *)bos + size;
> > +
> > +       if (args->nr_deps) {
> > +               deps = (void *)vbs + args->nr_bos * sizeof(*submit.vbs);
> > +               size = args->nr_deps * sizeof(*submit.deps);
> > +               if (copy_from_user(deps, u64_to_user_ptr(args->deps), size)) {
> > +                       err = -EFAULT;
> > +                       goto out0;
> > +               }
> > +       }
> > +
> > +       task = kmem_cache_zalloc(pipe->task_slab, GFP_KERNEL);
> > +       if (!task) {
> > +               err = -ENOMEM;
> > +               goto out0;
> > +       }
> > +
> > +       task->frame = task + 1;
> > +       if (copy_from_user(task->frame, u64_to_user_ptr(args->frame), args->frame_size)) {
> > +               err = -EFAULT;
> > +               goto out1;
> > +       }
> > +
> > +       err = pipe->task_validate(pipe, task);
> > +       if (err)
> > +               goto out1;
> > +
> > +       ctx = lima_ctx_get(&priv->ctx_mgr, args->ctx);
> > +       if (!ctx) {
> > +               err = -ENOENT;
> > +               goto out1;
> > +       }
> > +
> > +       submit.pipe = args->pipe;
> > +       submit.bos = bos;
> > +       submit.vbs = vbs;
> > +       submit.nr_bos = args->nr_bos;
> > +       submit.task = task;
> > +       submit.ctx = ctx;
> > +       submit.deps = deps;
> > +       submit.nr_deps = args->nr_deps;
> > +       submit.flags = args->flags;
> > +
> > +       err = lima_gem_submit(file, &submit);
> > +       if (!err) {
> > +               struct drm_lima_gem_submit_out *out = data;
> > +               out->fence = submit.fence;
> > +               out->done = submit.done;
> > +               out->sync_fd = submit.sync_fd;
> > +       }
> > +
> > +       lima_ctx_put(ctx);
> > +out1:
> > +       if (err)
> > +               kmem_cache_free(pipe->task_slab, task);
> > +out0:
> > +       kfree(bos);
> > +       return err;
> > +}
> > +
> > +static int lima_wait_fence(struct dma_fence *fence, u64 timeout_ns)
> > +{
> > +       signed long ret;
> > +
> > +       if (!timeout_ns)
> > +               ret = dma_fence_is_signaled(fence) ? 0 : -EBUSY;
>
> I think you can just call dma_fence_wait_timeout with a 0 timeout
> below and remove this clause.
>
> > +       else {
> > +               unsigned long timeout = lima_timeout_to_jiffies(timeout_ns);
> > +
> > +               /* must use long for result check because in 64bit arch int
> > +                * will overflow if timeout is too large and get <0 result
> > +                */
> > +               ret = dma_fence_wait_timeout(fence, true, timeout);
> > +               if (ret == 0)
> > +                       ret = timeout ? -ETIMEDOUT : -EBUSY;
> > +               else if (ret > 0)
> > +                       ret = 0;
> > +       }
>
> I suspect this could be common like reservation object waits. However,
> I'm curious why lima needs this ioctl in the first place when I don't
> see the same for other drivers.
I've prepared a v2 patch set which removed this ioctl by adopt Eric's
suggestion to use drm_syncobj.

>
> > +
> > +       return ret;
> > +}
> > +
> > +static int lima_ioctl_wait_fence(struct drm_device *dev, void *data, struct drm_file *file)
> > +{
> > +       struct drm_lima_wait_fence *args = data;
> > +       struct lima_drm_priv *priv = file->driver_priv;
> > +       struct dma_fence *fence;
> > +       int err = 0;
> > +
> > +       fence = lima_ctx_get_native_fence(&priv->ctx_mgr, args->ctx,
> > +                                         args->pipe, args->seq);
> > +       if (IS_ERR(fence))
> > +               return PTR_ERR(fence);
> > +
> > +       if (fence) {
> > +               err = lima_wait_fence(fence, args->timeout_ns);
> > +               args->error = fence->error;
> > +               dma_fence_put(fence);
> > +       }
> > +       else
> > +               args->error = 0;
> > +
> > +       return err;
> > +}
> > +
> > +static int lima_ioctl_gem_wait(struct drm_device *dev, void *data, struct drm_file *file)
> > +{
> > +       struct drm_lima_gem_wait *args = data;
> > +
> > +       if (!(args->op & (LIMA_GEM_WAIT_READ|LIMA_GEM_WAIT_WRITE)))
> > +           return -EINVAL;
> > +
> > +       return lima_gem_wait(file, args->handle, args->op, args->timeout_ns);
> > +}
> > +
> > +static int lima_ioctl_ctx(struct drm_device *dev, void *data, struct drm_file *file)
> > +{
> > +       struct drm_lima_ctx *args = data;
> > +       struct lima_drm_priv *priv = file->driver_priv;
> > +       struct lima_device *ldev = to_lima_dev(dev);
> > +
> > +       if (args->op == LIMA_CTX_OP_CREATE)
> > +               return lima_ctx_create(ldev, &priv->ctx_mgr, &args->id);
> > +       else if (args->op == LIMA_CTX_OP_FREE)
> > +               return lima_ctx_free(&priv->ctx_mgr, args->id);
> > +
> > +       return -EINVAL;
> > +}
> > +
> > +static int lima_ioctl_gem_mod(struct drm_device *dev, void *data, struct drm_file *file)
> > +{
> > +       struct drm_lima_gem_mod *args = data;
> > +
> > +       if (args->op == LIMA_GEM_MOD_OP_GET)
> > +               return lima_gem_get_modifier(file, args->handle, &args->modifier);
> > +       else if (args->op == LIMA_GEM_MOD_OP_SET)
> > +               return lima_gem_set_modifier(file, args->handle, args->modifier);
> > +
> > +       return -EINVAL;
> > +}
> > +
> > +static int lima_drm_driver_open(struct drm_device *dev, struct drm_file *file)
> > +{
> > +       int err;
> > +       struct lima_drm_priv *priv;
> > +       struct lima_device *ldev = to_lima_dev(dev);
> > +
> > +       priv = kzalloc(sizeof(*priv), GFP_KERNEL);
> > +       if (!priv)
> > +               return -ENOMEM;
> > +
> > +       priv->vm = lima_vm_create(ldev);
> > +       if (!priv->vm) {
> > +               err = -ENOMEM;
> > +               goto err_out0;
> > +       }
> > +
> > +        lima_ctx_mgr_init(&priv->ctx_mgr);
> > +
> > +       file->driver_priv = priv;
> > +       return 0;
> > +
> > +err_out0:
> > +       kfree(priv);
> > +       return err;
> > +}
> > +
> > +static void lima_drm_driver_postclose(struct drm_device *dev, struct drm_file *file)
> > +{
> > +       struct lima_drm_priv *priv = file->driver_priv;
> > +
> > +       lima_ctx_mgr_fini(&priv->ctx_mgr);
> > +       lima_vm_put(priv->vm);
> > +       kfree(priv);
> > +}
> > +
> > +static const struct drm_ioctl_desc lima_drm_driver_ioctls[] = {
> > +       DRM_IOCTL_DEF_DRV(LIMA_INFO, lima_ioctl_info, DRM_AUTH|DRM_RENDER_ALLOW),
> > +       DRM_IOCTL_DEF_DRV(LIMA_GEM_CREATE, lima_ioctl_gem_create, DRM_AUTH|DRM_RENDER_ALLOW),
> > +       DRM_IOCTL_DEF_DRV(LIMA_GEM_INFO, lima_ioctl_gem_info, DRM_AUTH|DRM_RENDER_ALLOW),
> > +       DRM_IOCTL_DEF_DRV(LIMA_GEM_VA, lima_ioctl_gem_va, DRM_AUTH|DRM_RENDER_ALLOW),
> > +       DRM_IOCTL_DEF_DRV(LIMA_GEM_SUBMIT, lima_ioctl_gem_submit, DRM_AUTH|DRM_RENDER_ALLOW),
> > +       DRM_IOCTL_DEF_DRV(LIMA_WAIT_FENCE, lima_ioctl_wait_fence, DRM_AUTH|DRM_RENDER_ALLOW),
> > +       DRM_IOCTL_DEF_DRV(LIMA_GEM_WAIT, lima_ioctl_gem_wait, DRM_AUTH|DRM_RENDER_ALLOW),
> > +       DRM_IOCTL_DEF_DRV(LIMA_CTX, lima_ioctl_ctx, DRM_AUTH|DRM_RENDER_ALLOW),
> > +       DRM_IOCTL_DEF_DRV(LIMA_GEM_MOD, lima_ioctl_gem_mod, DRM_AUTH|DRM_RENDER_ALLOW),
> > +};
> > +
> > +static const struct file_operations lima_drm_driver_fops = {
> > +       .owner              = THIS_MODULE,
> > +       .open               = drm_open,
> > +       .release            = drm_release,
> > +       .unlocked_ioctl     = drm_ioctl,
> > +#ifdef CONFIG_COMPAT
> > +       .compat_ioctl       = drm_compat_ioctl,
> > +#endif
> > +       .mmap               = lima_gem_mmap,
> > +};
> > +
> > +static struct drm_driver lima_drm_driver = {
> > +       .driver_features    = DRIVER_RENDER | DRIVER_GEM | DRIVER_PRIME,
> > +       .open               = lima_drm_driver_open,
> > +       .postclose          = lima_drm_driver_postclose,
> > +       .ioctls             = lima_drm_driver_ioctls,
> > +       .num_ioctls         = ARRAY_SIZE(lima_drm_driver_ioctls),
> > +       .fops               = &lima_drm_driver_fops,
> > +       .gem_free_object_unlocked = lima_gem_free_object,
> > +       .gem_open_object    = lima_gem_object_open,
> > +       .gem_close_object   = lima_gem_object_close,
> > +       .name               = "lima",
> > +       .desc               = "lima DRM",
> > +       .date               = "20170325",
>
> Perhaps this should be updated? TBH, I don't know why this is even useful.
>
> > +       .major              = 1,
> > +       .minor              = 0,
> > +       .patchlevel         = 0,
> > +
> > +       .prime_fd_to_handle = drm_gem_prime_fd_to_handle,
> > +       .gem_prime_import   = drm_gem_prime_import,
> > +       .gem_prime_import_sg_table = lima_gem_prime_import_sg_table,
> > +       .prime_handle_to_fd = drm_gem_prime_handle_to_fd,
> > +       .gem_prime_export   = drm_gem_prime_export,
>
> import and export don't have to be set if you use the defaults.
>
> > +       .gem_prime_res_obj  = lima_gem_prime_res_obj,
> > +       .gem_prime_get_sg_table = lima_gem_prime_get_sg_table,
> > +       .gem_prime_vmap = lima_gem_prime_vmap,
> > +       .gem_prime_vunmap = lima_gem_prime_vunmap,
> > +       .gem_prime_mmap = lima_gem_prime_mmap,
> > +};
> > +
>
> > +int lima_gem_wait(struct drm_file *file, u32 handle, u32 op, u64 timeout_ns)
> > +{
> > +       bool write = op & LIMA_GEM_WAIT_WRITE;
> > +       struct drm_gem_object *obj;
> > +       struct lima_bo *bo;
> > +       signed long ret;
> > +       unsigned long timeout;
> > +
> > +       obj = drm_gem_object_lookup(file, handle);
> > +       if (!obj)
> > +               return -ENOENT;
> > +
> > +       bo = to_lima_bo(obj);
> > +
> > +       timeout = timeout_ns ? lima_timeout_to_jiffies(timeout_ns) : 0;
> > +
> > +       ret = lima_bo_reserve(bo, true);
> > +       if (ret)
> > +               goto out;
> > +
> > +       /* must use long for result check because in 64bit arch int
> > +        * will overflow if timeout is too large and get <0 result
> > +        */
> > +       ret = reservation_object_wait_timeout_rcu(bo->tbo.resv, write, true, timeout);
> > +       if (ret == 0)
> > +               ret = timeout ? -ETIMEDOUT : -EBUSY;
> > +       else if (ret > 0)
> > +               ret = 0;
>
> There's a helper I added for all this that should land in 5.1.
>
> > +
> > +       lima_bo_unreserve(bo);
> > +out:
> > +       drm_gem_object_put_unlocked(obj);
> > +       return ret;
> > +}
> > +
>
> > +static int lima_gp_soft_reset_async_wait(struct lima_ip *ip)
> > +{
> > +       struct lima_device *dev = ip->dev;
> > +       int timeout;
> > +
> > +       if (!ip->data.async_reset)
> > +               return 0;
> > +
> > +       for (timeout = 1000; timeout > 0; timeout--) {
> > +               if (gp_read(INT_RAWSTAT) & LIMA_GP_IRQ_RESET_COMPLETED)
> > +                       break;
>
> Use readl_poll_timeout instead of writing your own. At least add a
> udelay to the loop so the timing is fixed and not dependent on how
> fast the code can run.
>
> > +       }
> > +       if (!timeout) {
> > +               dev_err(dev->dev, "gp soft reset time out\n");
> > +               return -ETIMEDOUT;
> > +       }
> > +
> > +       gp_write(INT_CLEAR, LIMA_GP_IRQ_MASK_ALL);
> > +       gp_write(INT_MASK, LIMA_GP_IRQ_MASK_USED);
> > +
> > +       ip->data.async_reset = false;
> > +       return 0;
> > +}
>
> > diff --git a/drivers/gpu/drm/lima/lima_l2_cache.c b/drivers/gpu/drm/lima/lima_l2_cache.c
> > new file mode 100644
> > index 000000000000..e7cdec720e5d
> > --- /dev/null
> > +++ b/drivers/gpu/drm/lima/lima_l2_cache.c
> > @@ -0,0 +1,79 @@
> > +// SPDX-License-Identifier: GPL-2.0 OR MIT
> > +/* Copyright 2017-2018 Qiang Yu <yuq825 at gmail.com> */
> > +
> > +#include <linux/io.h>
> > +#include <linux/device.h>
> > +
> > +#include "lima_device.h"
> > +#include "lima_l2_cache.h"
> > +#include "lima_regs.h"
> > +
> > +#define l2_cache_write(reg, data) writel(data, ip->iomem + LIMA_L2_CACHE_##reg)
> > +#define l2_cache_read(reg) readl(ip->iomem + LIMA_L2_CACHE_##reg)
> > +
> > +static int lima_l2_cache_wait_idle(struct lima_ip *ip)
> > +{
> > +       int timeout;
> > +       struct lima_device *dev = ip->dev;
> > +
> > +       for (timeout = 100000; timeout > 0; timeout--) {
> > +           if (!(l2_cache_read(STATUS) & LIMA_L2_CACHE_STATUS_COMMAND_BUSY))
> > +               break;
>
> Use readl_poll_timeout or variant.
>
> > +       }
> > +       if (!timeout) {
> > +           dev_err(dev->dev, "l2 cache wait command timeout\n");
> > +           return -ETIMEDOUT;
> > +       }
> > +       return 0;
> > +}
> > +
> > +int lima_l2_cache_flush(struct lima_ip *ip)
> > +{
> > +       int ret;
> > +
> > +       spin_lock(&ip->data.lock);
> > +       l2_cache_write(COMMAND, LIMA_L2_CACHE_COMMAND_CLEAR_ALL);
> > +       ret = lima_l2_cache_wait_idle(ip);
> > +       spin_unlock(&ip->data.lock);
> > +       return ret;
> > +}
> > +
> > +int lima_l2_cache_init(struct lima_ip *ip)
> > +{
> > +       int i, err;
> > +       u32 size;
> > +       struct lima_device *dev = ip->dev;
> > +
> > +       /* l2_cache2 only exists when one of PP4-7 present */
> > +       if (ip->id == lima_ip_l2_cache2) {
> > +               for (i = lima_ip_pp4; i <= lima_ip_pp7; i++) {
> > +                       if (dev->ip[i].present)
> > +                               break;
> > +               }
> > +               if (i > lima_ip_pp7)
> > +                       return -ENODEV;
> > +       }
> > +
> > +       spin_lock_init(&ip->data.lock);
> > +
> > +       size = l2_cache_read(SIZE);
> > +       dev_info(dev->dev, "l2 cache %uK, %u-way, %ubyte cache line, %ubit external bus\n",
> > +                1 << (((size >> 16) & 0xff) - 10),
> > +                1 << ((size >> 8) & 0xff),
> > +                1 << (size & 0xff),
> > +                1 << ((size >> 24) & 0xff));
> > +
> > +       err = lima_l2_cache_flush(ip);
> > +       if (err)
> > +               return err;
> > +
> > +       l2_cache_write(ENABLE, LIMA_L2_CACHE_ENABLE_ACCESS | LIMA_L2_CACHE_ENABLE_READ_ALLOCATE);
> > +       l2_cache_write(MAX_READS, 0x1c);
> > +
> > +       return 0;
> > +}
> > +
> > +void lima_l2_cache_fini(struct lima_ip *ip)
> > +{
> > +
> > +}
> > diff --git a/drivers/gpu/drm/lima/lima_l2_cache.h b/drivers/gpu/drm/lima/lima_l2_cache.h
> > new file mode 100644
> > index 000000000000..2ff91eafefbe
> > --- /dev/null
> > +++ b/drivers/gpu/drm/lima/lima_l2_cache.h
> > @@ -0,0 +1,14 @@
> > +/* SPDX-License-Identifier: GPL-2.0 OR MIT */
> > +/* Copyright 2017-2018 Qiang Yu <yuq825 at gmail.com> */
> > +
> > +#ifndef __LIMA_L2_CACHE_H__
> > +#define __LIMA_L2_CACHE_H__
> > +
> > +struct lima_ip;
> > +
> > +int lima_l2_cache_init(struct lima_ip *ip);
> > +void lima_l2_cache_fini(struct lima_ip *ip);
> > +
> > +int lima_l2_cache_flush(struct lima_ip *ip);
> > +
> > +#endif
> > diff --git a/drivers/gpu/drm/lima/lima_mmu.c b/drivers/gpu/drm/lima/lima_mmu.c
> > new file mode 100644
> > index 000000000000..234fb90a4285
> > --- /dev/null
> > +++ b/drivers/gpu/drm/lima/lima_mmu.c
> > @@ -0,0 +1,135 @@
> > +// SPDX-License-Identifier: GPL-2.0 OR MIT
> > +/* Copyright 2017-2018 Qiang Yu <yuq825 at gmail.com> */
> > +
> > +#include <linux/interrupt.h>
> > +#include <linux/io.h>
> > +#include <linux/device.h>
> > +
> > +#include "lima_device.h"
> > +#include "lima_mmu.h"
> > +#include "lima_vm.h"
> > +#include "lima_object.h"
> > +#include "lima_regs.h"
> > +
> > +#define mmu_write(reg, data) writel(data, ip->iomem + LIMA_MMU_##reg)
> > +#define mmu_read(reg) readl(ip->iomem + LIMA_MMU_##reg)
> > +
> > +#define lima_mmu_send_command(command, condition)           \
> > +({                                                          \
> > +       int __timeout, __ret = 0;                            \
> > +                                                            \
> > +       mmu_write(COMMAND, command);                         \
> > +       for (__timeout = 1000; __timeout > 0; __timeout--) { \
> > +               if (condition)                               \
> > +                       break;                               \
> > +       }                                                    \
> > +       if (!__timeout) {                                    \
> > +               dev_err(dev->dev, "mmu command %x timeout\n", command); \
> > +               __ret = -ETIMEDOUT;                          \
> > +       }                                                    \
> > +       __ret;                                               \
> > +})
> > +
> > +static irqreturn_t lima_mmu_irq_handler(int irq, void *data)
> > +{
> > +       struct lima_ip *ip = data;
> > +       struct lima_device *dev = ip->dev;
> > +       u32 status = mmu_read(INT_STATUS);
> > +       struct lima_sched_pipe *pipe;
> > +
> > +       /* for shared irq case */
> > +       if (!status)
>
> Can status have masked irq's? If so, you should be masking out the
> disabled irq bits.
>
> > +               return IRQ_NONE;
> > +
> > +       if (status & LIMA_MMU_INT_PAGE_FAULT) {
> > +               u32 fault = mmu_read(PAGE_FAULT_ADDR);
> > +               dev_err(dev->dev, "mmu page fault at 0x%x from bus id %d of type %s on %s\n",
> > +                       fault, LIMA_MMU_STATUS_BUS_ID(status),
> > +                       status & LIMA_MMU_STATUS_PAGE_FAULT_IS_WRITE ? "write" : "read",
> > +                       lima_ip_name(ip));
> > +       }
> > +
> > +       if (status & LIMA_MMU_INT_READ_BUS_ERROR) {
> > +               dev_err(dev->dev, "mmu %s irq bus error\n", lima_ip_name(ip));
> > +       }
> > +
> > +       /* mask all interrupts before resume */
> > +       mmu_write(INT_MASK, 0);
> > +       mmu_write(INT_CLEAR, status);
> > +
> > +       pipe = dev->pipe + (ip->id == lima_ip_gpmmu ? lima_pipe_gp : lima_pipe_pp);
> > +       lima_sched_pipe_mmu_error(pipe);
> > +
> > +       return IRQ_HANDLED;
> > +}
>
>
> > +
> > +unsigned long lima_timeout_to_jiffies(u64 timeout_ns)
>
> Create a common helper instead of copy-n-pasting this from other
> drivers (etnaviv).
There's one drm_timeout_abs_to_jiffies but not exported.

>
> > +{
> > +       unsigned long timeout_jiffies;
> > +       ktime_t timeout;
> > +
> > +       /* clamp timeout if it's to large */
> > +       if (((s64)timeout_ns) < 0)
> > +               return MAX_SCHEDULE_TIMEOUT;
> > +
> > +       timeout = ktime_sub(ns_to_ktime(timeout_ns), ktime_get());
> > +       if (ktime_to_ns(timeout) < 0)
> > +               return 0;
> > +
> > +       timeout_jiffies = nsecs_to_jiffies(ktime_to_ns(timeout));
> > +       /*  clamp timeout to avoid unsigned-> signed overflow */
> > +       if (timeout_jiffies > MAX_SCHEDULE_TIMEOUT )
> > +               return MAX_SCHEDULE_TIMEOUT;
> > +
> > +       return timeout_jiffies;
> > +}
> > +
> > +void lima_sched_pipe_task_done(struct lima_sched_pipe *pipe)
> > +{
> > +       if (pipe->error)
> > +               schedule_work(&pipe->error_work);
> > +       else {
> > +               struct lima_sched_task *task = pipe->current_task;
> > +
> > +               pipe->task_fini(pipe);
> > +               dma_fence_signal(task->fence);
> > +       }
> > +}
>
> > diff --git a/drivers/gpu/drm/lima/lima_vm.c b/drivers/gpu/drm/lima/lima_vm.c
> > new file mode 100644
> > index 000000000000..a264f3ae83fe
> > --- /dev/null
> > +++ b/drivers/gpu/drm/lima/lima_vm.c
> > @@ -0,0 +1,354 @@
> > +// SPDX-License-Identifier: GPL-2.0 OR MIT
> > +/* Copyright 2017-2018 Qiang Yu <yuq825 at gmail.com> */
> > +
> > +#include <linux/slab.h>
> > +#include <linux/dma-mapping.h>
> > +#include <linux/interval_tree_generic.h>
> > +
> > +#include "lima_device.h"
> > +#include "lima_vm.h"
> > +#include "lima_object.h"
> > +#include "lima_regs.h"
> > +
> > +struct lima_bo_va_mapping {
> > +       struct list_head list;
> > +       struct rb_node rb;
> > +       uint32_t start;
> > +       uint32_t last;
> > +       uint32_t __subtree_last;
> > +};
> > +
> > +struct lima_bo_va {
> > +       struct list_head list;
> > +       unsigned ref_count;
> > +
> > +       struct list_head mapping;
> > +
> > +       struct lima_vm *vm;
> > +};
> > +
> > +#define LIMA_VM_PD_SHIFT 22
> > +#define LIMA_VM_PT_SHIFT 12
> > +#define LIMA_VM_PB_SHIFT (LIMA_VM_PD_SHIFT + LIMA_VM_NUM_PT_PER_BT_SHIFT)
> > +#define LIMA_VM_BT_SHIFT LIMA_VM_PT_SHIFT
> > +
> > +#define LIMA_VM_PT_MASK ((1 << LIMA_VM_PD_SHIFT) - 1)
> > +#define LIMA_VM_BT_MASK ((1 << LIMA_VM_PB_SHIFT) - 1)
> > +
> > +#define LIMA_PDE(va) (va >> LIMA_VM_PD_SHIFT)
> > +#define LIMA_PTE(va) ((va & LIMA_VM_PT_MASK) >> LIMA_VM_PT_SHIFT)
> > +#define LIMA_PBE(va) (va >> LIMA_VM_PB_SHIFT)
> > +#define LIMA_BTE(va) ((va & LIMA_VM_BT_MASK) >> LIMA_VM_BT_SHIFT)
> > +
> > +#define START(node) ((node)->start)
> > +#define LAST(node) ((node)->last)
> > +
> > +INTERVAL_TREE_DEFINE(struct lima_bo_va_mapping, rb, uint32_t, __subtree_last,
> > +                    START, LAST, static, lima_vm_it)
> > +
> > +#undef START
> > +#undef LAST
> > +
> > +static void lima_vm_unmap_page_table(struct lima_vm *vm, u32 start, u32 end)
> > +{
> > +       u32 addr;
> > +
> > +       for (addr = start; addr <= end; addr += LIMA_PAGE_SIZE) {
> > +               u32 pbe = LIMA_PBE(addr);
> > +               u32 bte = LIMA_BTE(addr);
> > +               u32 *bt;
> > +
> > +               bt = lima_bo_kmap(vm->bts[pbe]);
> > +               bt[bte] = 0;
> > +       }
> > +}
> > +
> > +static int lima_vm_map_page_table(struct lima_vm *vm, dma_addr_t *dma,
> > +                                 u32 start, u32 end)
> > +{
> > +       u64 addr;
> > +       int err, i = 0;
> > +
> > +       for (addr = start; addr <= end; addr += LIMA_PAGE_SIZE) {
> > +               u32 pbe = LIMA_PBE(addr);
> > +               u32 bte = LIMA_BTE(addr);
> > +               u32 *bt;
> > +
> > +               if (vm->bts[pbe])
> > +                       bt = lima_bo_kmap(vm->bts[pbe]);
> > +               else {
> > +                       struct lima_bo *bt_bo;
> > +                       dma_addr_t *pts;
> > +                       u32 *pd;
> > +                       int j;
> > +
> > +                       bt_bo = lima_bo_create(
> > +                               vm->dev, LIMA_PAGE_SIZE << LIMA_VM_NUM_PT_PER_BT_SHIFT,
> > +                               0, ttm_bo_type_kernel,
> > +                               NULL, vm->pd->tbo.resv);
>
> I don't think using BOs for page tables buys you anything. You could
> just use the kernel DMA API directly. See io-pgtable-arm-v7s.c for
> inspiration. For panfrost, it's standard ARM format page tables so we
> can just use the io-pgtable library.
Right, v2 will use DMA API directly.

>
> > +                       if (IS_ERR(bt_bo)) {
> > +                               err = PTR_ERR(bt_bo);
> > +                               goto err_out;
> > +                       }
> > +
> > +                       bt = lima_bo_kmap(bt_bo);
> > +                       if (IS_ERR(bt)) {
> > +                               lima_bo_unref(bt_bo);
> > +                               err = PTR_ERR(bt);
> > +                               goto err_out;
> > +                       }
> > +                       memset(bt, 0, LIMA_PAGE_SIZE << LIMA_VM_NUM_PT_PER_BT_SHIFT);
> > +
> > +                       vm->bts[pbe] = bt_bo;
> > +                       pd = lima_bo_kmap(vm->pd);
> > +                       pd += pbe << LIMA_VM_NUM_PT_PER_BT_SHIFT;
> > +                       pts = lima_bo_get_pages(bt_bo);
> > +                       for (j = 0; j < LIMA_VM_NUM_PT_PER_BT; j++)
> > +                               *pd++ = *pts++ | LIMA_VM_FLAG_PRESENT;
> > +               }
> > +
> > +               bt[bte] = dma[i++] | LIMA_VM_FLAGS_CACHE;
> > +       }
> > +
> > +       return 0;
> > +
> > +err_out:
> > +       if (addr != start)
> > +               lima_vm_unmap_page_table(vm, start, addr - 1);
> > +       return err;
> > +}
> > +
> > +static struct lima_bo_va *
> > +lima_vm_bo_find(struct lima_vm *vm, struct lima_bo *bo)
> > +{
> > +       struct lima_bo_va *bo_va, *ret = NULL;
> > +
> > +       list_for_each_entry(bo_va, &bo->va, list) {
> > +               if (bo_va->vm == vm) {
> > +                       ret = bo_va;
> > +                       break;
> > +               }
> > +       }
> > +
> > +       return ret;
> > +}
> > +
> > +int lima_vm_bo_map(struct lima_vm *vm, struct lima_bo *bo, u32 start)
> > +{
> > +       int err;
> > +       struct lima_bo_va_mapping *it, *mapping;
> > +       u32 end = start + bo->gem.size - 1;
> > +       dma_addr_t *pages_dma = lima_bo_get_pages(bo);
> > +       struct lima_bo_va *bo_va;
> > +
> > +       it = lima_vm_it_iter_first(&vm->va, start, end);
> > +       if (it) {
> > +               dev_dbg(bo->gem.dev->dev, "lima vm map va overlap %x-%x %x-%x\n",
> > +                       start, end, it->start, it->last);
> > +               return -EINVAL;
> > +       }
> > +
> > +       mapping = kmalloc(sizeof(*mapping), GFP_KERNEL);
> > +       if (!mapping)
> > +               return -ENOMEM;
> > +       mapping->start = start;
> > +       mapping->last = end;
>
> Why don't you use the drm_mm_XX APIs instead of writing your own?
Use it in v2.

>
> > +
> > +       err = lima_vm_map_page_table(vm, pages_dma, start, end);
> > +       if (err) {
> > +               kfree(mapping);
> > +               return err;
> > +       }
> > +
> > +       lima_vm_it_insert(mapping, &vm->va);
> > +
> > +       bo_va = lima_vm_bo_find(vm, bo);
> > +       list_add_tail(&mapping->list, &bo_va->mapping);
> > +
> > +       return 0;
> > +}