[Lima] [PATCH 2/2] drm/lima: driver for ARM Mali4xx GPUs

Tue Feb 12 15:46:48 UTC 2019

On Wed, Feb 6, 2019 at 7:16 AM Qiang Yu <yuq825 at gmail.com> wrote:
>
> From: Lima Project Developers <lima at lists.freedesktop.org>

This should be a person (you).

> Signed-off-by: Andreas Baierl <ichgeh at imkreisrum.de>
> Signed-off-by: Erico Nunes <nunes.erico at gmail.com>
> Signed-off-by: Heiko Stuebner <heiko at sntech.de>
> Signed-off-by: Marek Vasut <marex at denx.de>
> Signed-off-by: Neil Armstrong <narmstrong at baylibre.com>
> Signed-off-by: Qiang Yu <yuq825 at gmail.com>

Being the submitter, your S-o-b should be last.

> Signed-off-by: Simon Shields <simon at lineageos.org>
> Signed-off-by: Vasily Khoruzhick <anarsoul at gmail.com>
> ---

> diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
> index 4385f00e1d05..dfefcb393858 100644
> --- a/drivers/gpu/drm/Kconfig
> +++ b/drivers/gpu/drm/Kconfig
> @@ -333,6 +333,8 @@ source "drivers/gpu/drm/tve200/Kconfig"
>
>  source "drivers/gpu/drm/xen/Kconfig"
>
> +source "drivers/gpu/drm/lima/Kconfig"
> +
>  # Keep legacy drivers last
>
>  menuconfig DRM_LEGACY
> diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
> index ce8d1d384319..8d024b729902 100644
> --- a/drivers/gpu/drm/Makefile
> +++ b/drivers/gpu/drm/Makefile
> @@ -109,3 +109,4 @@ obj-$(CONFIG_DRM_TINYDRM) += tinydrm/
>  obj-$(CONFIG_DRM_PL111) += pl111/
>  obj-$(CONFIG_DRM_TVE200) += tve200/
>  obj-$(CONFIG_DRM_XEN) += xen/
> +obj-$(CONFIG_DRM_LIMA)  += lima/

Not sure about this file, but normally these should be kept sorted.

> diff --git a/drivers/gpu/drm/lima/lima_bcast.c b/drivers/gpu/drm/lima/lima_bcast.c
> new file mode 100644
> index 000000000000..63754f6465ea
> --- /dev/null
> +++ b/drivers/gpu/drm/lima/lima_bcast.c
> @@ -0,0 +1,46 @@
> +// SPDX-License-Identifier: GPL-2.0 OR MIT
> +/* Copyright 2018 Qiang Yu <yuq825 at gmail.com> */
> +
> +#include <linux/io.h>
> +#include <linux/device.h>
> +
> +#include "lima_device.h"
> +#include "lima_bcast.h"
> +#include "lima_regs.h"
> +
> +#define bcast_write(reg, data) writel(data, ip->iomem + LIMA_BCAST_##reg)
> +#define bcast_read(reg) readl(ip->iomem + LIMA_BCAST_##reg)

There are 2 things about this I would change. Just pass in 'ip' to the
macro so it is clear in calling functions that ip is actually used.
Second, don't do token pasting. It is generally avoided in the kernel.
It makes grepping the source code harder and is a pointless
indirection.

If you do both of those, then these can be static inline functions
instead which are preferred because you get type checking.

Same comment applies to all the other register accessors.

> +struct lima_ip {
> +       struct lima_device *dev;
> +       enum lima_ip_id id;
> +       bool present;
> +
> +       void __iomem *iomem;
> +       int irq;
> +
> +       union {
> +               /* gp/pp */
> +               bool async_reset;
> +               /* l2 cache */
> +               spinlock_t lock;

What happens when you need 2 elements for a sub-block. I'd make this a
struct pointer for each IP sub-block.

> +       } data;
> +};
> +
> +enum lima_pipe_id {
> +       lima_pipe_gp,
> +       lima_pipe_pp,
> +       lima_pipe_num,
> +};
> +
> +struct lima_device {
> +       struct device *dev;
> +       struct drm_device *ddev;
> +       struct platform_device *pdev;
> +
> +       enum lima_gpu_id id;
> +       int num_pp;
> +
> +       void __iomem *iomem;
> +       struct clk *clk_bus;
> +       struct clk *clk_gpu;
> +       struct reset_control *reset;
> +       struct regulator *regulator;
> +
> +       struct lima_ip ip[lima_ip_num];
> +       struct lima_sched_pipe pipe[lima_pipe_num];
> +
> +       struct lima_mman mman;
> +
> +       struct lima_vm *empty_vm;
> +       uint64_t va_start;
> +       uint64_t va_end;
> +
> +       u32 *dlbu_cpu;
> +       dma_addr_t dlbu_dma;
> +};
> +
> +static inline struct lima_device *
> +to_lima_dev(struct drm_device *dev)
> +{
> +       return dev->dev_private;
> +}
> +
> +static inline struct lima_device *
> +ttm_to_lima_dev(struct ttm_bo_device *dev)
> +{
> +       return container_of(dev, struct lima_device, mman.bdev);
> +}
> +
> +int lima_device_init(struct lima_device *ldev);
> +void lima_device_fini(struct lima_device *ldev);
> +
> +const char *lima_ip_name(struct lima_ip *ip);
> +
> +#endif
> diff --git a/drivers/gpu/drm/lima/lima_dlbu.c b/drivers/gpu/drm/lima/lima_dlbu.c
> new file mode 100644
> index 000000000000..6697d4ddd887
> --- /dev/null
> +++ b/drivers/gpu/drm/lima/lima_dlbu.c
> @@ -0,0 +1,56 @@
> +// SPDX-License-Identifier: GPL-2.0 OR MIT
> +/* Copyright 2018 Qiang Yu <yuq825 at gmail.com> */
> +
> +#include <linux/io.h>
> +#include <linux/device.h>
> +
> +#include "lima_device.h"
> +#include "lima_dlbu.h"
> +#include "lima_vm.h"
> +#include "lima_regs.h"
> +
> +#define dlbu_write(reg, data) writel(data, ip->iomem + LIMA_DLBU_##reg)
> +#define dlbu_read(reg) readl(ip->iomem + LIMA_DLBU_##reg)
> +
> +void lima_dlbu_enable(struct lima_device *dev, int num_pp)
> +{
> +       struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_pp;
> +       struct lima_ip *ip = dev->ip + lima_ip_dlbu;
> +       int i, mask = 0;
> +
> +       for (i = 0; i < num_pp; i++) {
> +               struct lima_ip *pp = pipe->processor[i];
> +               mask |= 1 << (pp->id - lima_ip_pp0);
> +       }
> +
> +       dlbu_write(PP_ENABLE_MASK, mask);
> +}
> +
> +void lima_dlbu_disable(struct lima_device *dev)
> +{
> +       struct lima_ip *ip = dev->ip + lima_ip_dlbu;
> +       dlbu_write(PP_ENABLE_MASK, 0);
> +}
> +
> +void lima_dlbu_set_reg(struct lima_ip *ip, u32 *reg)
> +{
> +       dlbu_write(TLLIST_VBASEADDR, reg[0]);
> +       dlbu_write(FB_DIM, reg[1]);
> +       dlbu_write(TLLIST_CONF, reg[2]);
> +       dlbu_write(START_TILE_POS, reg[3]);
> +}
> +
> +int lima_dlbu_init(struct lima_ip *ip)
> +{
> +       struct lima_device *dev = ip->dev;
> +
> +       dlbu_write(MASTER_TLLIST_PHYS_ADDR, dev->dlbu_dma | 1);
> +       dlbu_write(MASTER_TLLIST_VADDR, LIMA_VA_RESERVE_DLBU);
> +
> +       return 0;
> +}
> +
> +void lima_dlbu_fini(struct lima_ip *ip)
> +{
> +
> +}
> diff --git a/drivers/gpu/drm/lima/lima_dlbu.h b/drivers/gpu/drm/lima/lima_dlbu.h
> new file mode 100644
> index 000000000000..60cba387cf30
> --- /dev/null
> +++ b/drivers/gpu/drm/lima/lima_dlbu.h
> @@ -0,0 +1,18 @@
> +/* SPDX-License-Identifier: GPL-2.0 OR MIT */
> +/* Copyright 2018 Qiang Yu <yuq825 at gmail.com> */
> +
> +#ifndef __LIMA_DLBU_H__
> +#define __LIMA_DLBU_H__
> +
> +struct lima_ip;
> +struct lima_device;
> +
> +void lima_dlbu_enable(struct lima_device *dev, int num_pp);
> +void lima_dlbu_disable(struct lima_device *dev);
> +
> +void lima_dlbu_set_reg(struct lima_ip *ip, u32 *reg);
> +
> +int lima_dlbu_init(struct lima_ip *ip);
> +void lima_dlbu_fini(struct lima_ip *ip);
> +
> +#endif
> diff --git a/drivers/gpu/drm/lima/lima_drv.c b/drivers/gpu/drm/lima/lima_drv.c
> new file mode 100644
> index 000000000000..132071b9be9b
> --- /dev/null
> +++ b/drivers/gpu/drm/lima/lima_drv.c
> @@ -0,0 +1,459 @@
> +// SPDX-License-Identifier: GPL-2.0 OR MIT
> +/* Copyright 2017-2018 Qiang Yu <yuq825 at gmail.com> */
> +
> +#include <linux/module.h>
> +#include <linux/of_platform.h>
> +#include <linux/log2.h>
> +#include <drm/drm_prime.h>
> +#include <drm/lima_drm.h>
> +
> +#include "lima_drv.h"
> +#include "lima_gem.h"
> +#include "lima_gem_prime.h"
> +#include "lima_vm.h"
> +
> +int lima_sched_timeout_ms = 0;
> +int lima_sched_max_tasks = 32;
> +int lima_max_mem = -1;
> +
> +MODULE_PARM_DESC(sched_timeout_ms, "task run timeout in ms (0 = no timeout (default))");
> +module_param_named(sched_timeout_ms, lima_sched_timeout_ms, int, 0444);
> +
> +MODULE_PARM_DESC(sched_max_tasks, "max queued task num in a context (default 32)");
> +module_param_named(sched_max_tasks, lima_sched_max_tasks, int, 0444);
> +
> +MODULE_PARM_DESC(max_mem, "Max memory size in MB can be used (<0 = auto)");
> +module_param_named(max_mem, lima_max_mem, int, 0444);
> +
> +static int lima_ioctl_info(struct drm_device *dev, void *data, struct drm_file *file)
> +{
> +       struct drm_lima_info *info = data;
> +       struct lima_device *ldev = to_lima_dev(dev);
> +
> +       switch (ldev->id) {
> +       case lima_gpu_mali400:
> +               info->gpu_id = LIMA_INFO_GPU_MALI400;
> +               break;
> +       case lima_gpu_mali450:
> +               info->gpu_id = LIMA_INFO_GPU_MALI450;
> +               break;
> +       default:
> +               return -ENODEV;
> +       }
> +       info->num_pp = ldev->pipe[lima_pipe_pp].num_processor;
> +       info->va_start = ldev->va_start;
> +       info->va_end = ldev->va_end;
> +       return 0;
> +}
> +
> +static int lima_ioctl_gem_create(struct drm_device *dev, void *data, struct drm_file *file)
> +{
> +       struct drm_lima_gem_create *args = data;
> +
> +       if (args->flags)
> +               return -EINVAL;
> +
> +       if (args->size == 0)
> +               return -EINVAL;
> +
> +       return lima_gem_create_handle(dev, file, args->size, args->flags, &args->handle);
> +}
> +
> +static int lima_ioctl_gem_info(struct drm_device *dev, void *data, struct drm_file *file)
> +{
> +       struct drm_lima_gem_info *args = data;
> +
> +       return lima_gem_mmap_offset(file, args->handle, &args->offset);
> +}
> +
> +static int lima_ioctl_gem_va(struct drm_device *dev, void *data, struct drm_file *file)
> +{
> +       struct drm_lima_gem_va *args = data;
> +
> +       switch (args->op) {
> +       case LIMA_VA_OP_MAP:
> +               return lima_gem_va_map(file, args->handle, args->flags, args->va);
> +       case LIMA_VA_OP_UNMAP:
> +               return lima_gem_va_unmap(file, args->handle, args->va);

These are mapping to GPU VA. Why not do that on GEM object creation or
import or when the objects are submitted with cmd queue as other
drivers do?

To put it another way, These ioctls look different than what other
drivers do. Why do you need to do things differently? My understanding
is best practice is to map and return the GPU offset when the GEM
object is created. This is what v3d does. I think Intel is moving to
that. And panfrost will do that.

> +       default:
> +               return -EINVAL;
> +       }
> +}
> +
> +static int lima_ioctl_gem_submit(struct drm_device *dev, void *data, struct drm_file *file)
> +{
> +       struct drm_lima_gem_submit_in *args = data;
> +       struct lima_device *ldev = to_lima_dev(dev);
> +       struct lima_drm_priv *priv = file->driver_priv;
> +       struct drm_lima_gem_submit_bo *bos;
> +       struct ttm_validate_buffer *vbs;
> +       union drm_lima_gem_submit_dep *deps = NULL;
> +       struct lima_sched_pipe *pipe;
> +       struct lima_sched_task *task;
> +       struct lima_ctx *ctx;
> +       struct lima_submit submit = {0};
> +       int err = 0, size;
> +
> +       if (args->pipe >= lima_pipe_num || args->nr_bos == 0)
> +               return -EINVAL;
> +
> +       if (args->flags & ~(LIMA_SUBMIT_FLAG_EXPLICIT_FENCE |
> +                           LIMA_SUBMIT_FLAG_SYNC_FD_OUT))
> +               return -EINVAL;
> +
> +       pipe = ldev->pipe + args->pipe;
> +       if (args->frame_size != pipe->frame_size)
> +               return -EINVAL;
> +
> +       size = args->nr_bos * (sizeof(*submit.bos) + sizeof(*submit.vbs)) +
> +               args->nr_deps * sizeof(*submit.deps);
> +       bos = kzalloc(size, GFP_KERNEL);
> +       if (!bos)
> +               return -ENOMEM;
> +
> +       size = args->nr_bos * sizeof(*submit.bos);
> +       if (copy_from_user(bos, u64_to_user_ptr(args->bos), size)) {
> +               err = -EFAULT;
> +               goto out0;
> +       }
> +
> +       vbs = (void *)bos + size;
> +
> +       if (args->nr_deps) {
> +               deps = (void *)vbs + args->nr_bos * sizeof(*submit.vbs);
> +               size = args->nr_deps * sizeof(*submit.deps);
> +               if (copy_from_user(deps, u64_to_user_ptr(args->deps), size)) {
> +                       err = -EFAULT;
> +                       goto out0;
> +               }
> +       }
> +
> +       task = kmem_cache_zalloc(pipe->task_slab, GFP_KERNEL);
> +       if (!task) {
> +               err = -ENOMEM;
> +               goto out0;
> +       }
> +
> +       task->frame = task + 1;
> +       if (copy_from_user(task->frame, u64_to_user_ptr(args->frame), args->frame_size)) {
> +               err = -EFAULT;
> +               goto out1;
> +       }
> +
> +       err = pipe->task_validate(pipe, task);
> +       if (err)
> +               goto out1;
> +
> +       ctx = lima_ctx_get(&priv->ctx_mgr, args->ctx);
> +       if (!ctx) {
> +               err = -ENOENT;
> +               goto out1;
> +       }
> +
> +       submit.pipe = args->pipe;
> +       submit.bos = bos;
> +       submit.vbs = vbs;
> +       submit.nr_bos = args->nr_bos;
> +       submit.task = task;
> +       submit.ctx = ctx;
> +       submit.deps = deps;
> +       submit.nr_deps = args->nr_deps;
> +       submit.flags = args->flags;
> +
> +       err = lima_gem_submit(file, &submit);
> +       if (!err) {
> +               struct drm_lima_gem_submit_out *out = data;
> +               out->fence = submit.fence;
> +               out->done = submit.done;
> +               out->sync_fd = submit.sync_fd;
> +       }
> +
> +       lima_ctx_put(ctx);
> +out1:
> +       if (err)
> +               kmem_cache_free(pipe->task_slab, task);
> +out0:
> +       kfree(bos);
> +       return err;
> +}
> +
> +static int lima_wait_fence(struct dma_fence *fence, u64 timeout_ns)
> +{
> +       signed long ret;
> +
> +       if (!timeout_ns)
> +               ret = dma_fence_is_signaled(fence) ? 0 : -EBUSY;

I think you can just call dma_fence_wait_timeout with a 0 timeout
below and remove this clause.

> +       else {
> +               unsigned long timeout = lima_timeout_to_jiffies(timeout_ns);
> +
> +               /* must use long for result check because in 64bit arch int
> +                * will overflow if timeout is too large and get <0 result
> +                */
> +               ret = dma_fence_wait_timeout(fence, true, timeout);
> +               if (ret == 0)
> +                       ret = timeout ? -ETIMEDOUT : -EBUSY;
> +               else if (ret > 0)
> +                       ret = 0;
> +       }

I suspect this could be common like reservation object waits. However,
I'm curious why lima needs this ioctl in the first place when I don't
see the same for other drivers.

> +
> +       return ret;
> +}
> +
> +static int lima_ioctl_wait_fence(struct drm_device *dev, void *data, struct drm_file *file)
> +{
> +       struct drm_lima_wait_fence *args = data;
> +       struct lima_drm_priv *priv = file->driver_priv;
> +       struct dma_fence *fence;
> +       int err = 0;
> +
> +       fence = lima_ctx_get_native_fence(&priv->ctx_mgr, args->ctx,
> +                                         args->pipe, args->seq);
> +       if (IS_ERR(fence))
> +               return PTR_ERR(fence);
> +
> +       if (fence) {
> +               err = lima_wait_fence(fence, args->timeout_ns);
> +               args->error = fence->error;
> +               dma_fence_put(fence);
> +       }
> +       else
> +               args->error = 0;
> +
> +       return err;
> +}
> +
> +static int lima_ioctl_gem_wait(struct drm_device *dev, void *data, struct drm_file *file)
> +{
> +       struct drm_lima_gem_wait *args = data;
> +
> +       if (!(args->op & (LIMA_GEM_WAIT_READ|LIMA_GEM_WAIT_WRITE)))
> +           return -EINVAL;
> +
> +       return lima_gem_wait(file, args->handle, args->op, args->timeout_ns);
> +}
> +
> +static int lima_ioctl_ctx(struct drm_device *dev, void *data, struct drm_file *file)
> +{
> +       struct drm_lima_ctx *args = data;
> +       struct lima_drm_priv *priv = file->driver_priv;
> +       struct lima_device *ldev = to_lima_dev(dev);
> +
> +       if (args->op == LIMA_CTX_OP_CREATE)
> +               return lima_ctx_create(ldev, &priv->ctx_mgr, &args->id);
> +       else if (args->op == LIMA_CTX_OP_FREE)
> +               return lima_ctx_free(&priv->ctx_mgr, args->id);
> +
> +       return -EINVAL;
> +}
> +
> +static int lima_ioctl_gem_mod(struct drm_device *dev, void *data, struct drm_file *file)
> +{
> +       struct drm_lima_gem_mod *args = data;
> +
> +       if (args->op == LIMA_GEM_MOD_OP_GET)
> +               return lima_gem_get_modifier(file, args->handle, &args->modifier);
> +       else if (args->op == LIMA_GEM_MOD_OP_SET)
> +               return lima_gem_set_modifier(file, args->handle, args->modifier);
> +
> +       return -EINVAL;
> +}
> +
> +static int lima_drm_driver_open(struct drm_device *dev, struct drm_file *file)
> +{
> +       int err;
> +       struct lima_drm_priv *priv;
> +       struct lima_device *ldev = to_lima_dev(dev);
> +
> +       priv = kzalloc(sizeof(*priv), GFP_KERNEL);
> +       if (!priv)
> +               return -ENOMEM;
> +
> +       priv->vm = lima_vm_create(ldev);
> +       if (!priv->vm) {
> +               err = -ENOMEM;
> +               goto err_out0;
> +       }
> +
> +        lima_ctx_mgr_init(&priv->ctx_mgr);
> +
> +       file->driver_priv = priv;
> +       return 0;
> +
> +err_out0:
> +       kfree(priv);
> +       return err;
> +}
> +
> +static void lima_drm_driver_postclose(struct drm_device *dev, struct drm_file *file)
> +{
> +       struct lima_drm_priv *priv = file->driver_priv;
> +
> +       lima_ctx_mgr_fini(&priv->ctx_mgr);
> +       lima_vm_put(priv->vm);
> +       kfree(priv);
> +}
> +
> +static const struct drm_ioctl_desc lima_drm_driver_ioctls[] = {
> +       DRM_IOCTL_DEF_DRV(LIMA_INFO, lima_ioctl_info, DRM_AUTH|DRM_RENDER_ALLOW),
> +       DRM_IOCTL_DEF_DRV(LIMA_GEM_CREATE, lima_ioctl_gem_create, DRM_AUTH|DRM_RENDER_ALLOW),
> +       DRM_IOCTL_DEF_DRV(LIMA_GEM_INFO, lima_ioctl_gem_info, DRM_AUTH|DRM_RENDER_ALLOW),
> +       DRM_IOCTL_DEF_DRV(LIMA_GEM_VA, lima_ioctl_gem_va, DRM_AUTH|DRM_RENDER_ALLOW),
> +       DRM_IOCTL_DEF_DRV(LIMA_GEM_SUBMIT, lima_ioctl_gem_submit, DRM_AUTH|DRM_RENDER_ALLOW),
> +       DRM_IOCTL_DEF_DRV(LIMA_WAIT_FENCE, lima_ioctl_wait_fence, DRM_AUTH|DRM_RENDER_ALLOW),
> +       DRM_IOCTL_DEF_DRV(LIMA_GEM_WAIT, lima_ioctl_gem_wait, DRM_AUTH|DRM_RENDER_ALLOW),
> +       DRM_IOCTL_DEF_DRV(LIMA_CTX, lima_ioctl_ctx, DRM_AUTH|DRM_RENDER_ALLOW),
> +       DRM_IOCTL_DEF_DRV(LIMA_GEM_MOD, lima_ioctl_gem_mod, DRM_AUTH|DRM_RENDER_ALLOW),
> +};
> +
> +static const struct file_operations lima_drm_driver_fops = {
> +       .owner              = THIS_MODULE,
> +       .open               = drm_open,
> +       .release            = drm_release,
> +       .unlocked_ioctl     = drm_ioctl,
> +#ifdef CONFIG_COMPAT
> +       .compat_ioctl       = drm_compat_ioctl,
> +#endif
> +       .mmap               = lima_gem_mmap,
> +};
> +
> +static struct drm_driver lima_drm_driver = {
> +       .driver_features    = DRIVER_RENDER | DRIVER_GEM | DRIVER_PRIME,
> +       .open               = lima_drm_driver_open,
> +       .postclose          = lima_drm_driver_postclose,
> +       .ioctls             = lima_drm_driver_ioctls,
> +       .num_ioctls         = ARRAY_SIZE(lima_drm_driver_ioctls),
> +       .fops               = &lima_drm_driver_fops,
> +       .gem_free_object_unlocked = lima_gem_free_object,
> +       .gem_open_object    = lima_gem_object_open,
> +       .gem_close_object   = lima_gem_object_close,
> +       .name               = "lima",
> +       .desc               = "lima DRM",
> +       .date               = "20170325",

Perhaps this should be updated? TBH, I don't know why this is even useful.

> +       .major              = 1,
> +       .minor              = 0,
> +       .patchlevel         = 0,
> +
> +       .prime_fd_to_handle = drm_gem_prime_fd_to_handle,
> +       .gem_prime_import   = drm_gem_prime_import,
> +       .gem_prime_import_sg_table = lima_gem_prime_import_sg_table,
> +       .prime_handle_to_fd = drm_gem_prime_handle_to_fd,
> +       .gem_prime_export   = drm_gem_prime_export,

import and export don't have to be set if you use the defaults.

> +       .gem_prime_res_obj  = lima_gem_prime_res_obj,
> +       .gem_prime_get_sg_table = lima_gem_prime_get_sg_table,
> +       .gem_prime_vmap = lima_gem_prime_vmap,
> +       .gem_prime_vunmap = lima_gem_prime_vunmap,
> +       .gem_prime_mmap = lima_gem_prime_mmap,
> +};
> +

> +int lima_gem_wait(struct drm_file *file, u32 handle, u32 op, u64 timeout_ns)
> +{
> +       bool write = op & LIMA_GEM_WAIT_WRITE;
> +       struct drm_gem_object *obj;
> +       struct lima_bo *bo;
> +       signed long ret;
> +       unsigned long timeout;
> +
> +       obj = drm_gem_object_lookup(file, handle);
> +       if (!obj)
> +               return -ENOENT;
> +
> +       bo = to_lima_bo(obj);
> +
> +       timeout = timeout_ns ? lima_timeout_to_jiffies(timeout_ns) : 0;
> +
> +       ret = lima_bo_reserve(bo, true);
> +       if (ret)
> +               goto out;
> +
> +       /* must use long for result check because in 64bit arch int
> +        * will overflow if timeout is too large and get <0 result
> +        */
> +       ret = reservation_object_wait_timeout_rcu(bo->tbo.resv, write, true, timeout);
> +       if (ret == 0)
> +               ret = timeout ? -ETIMEDOUT : -EBUSY;
> +       else if (ret > 0)
> +               ret = 0;

There's a helper I added for all this that should land in 5.1.

> +
> +       lima_bo_unreserve(bo);
> +out:
> +       drm_gem_object_put_unlocked(obj);
> +       return ret;
> +}
> +

> +static int lima_gp_soft_reset_async_wait(struct lima_ip *ip)
> +{
> +       struct lima_device *dev = ip->dev;
> +       int timeout;
> +
> +       if (!ip->data.async_reset)
> +               return 0;
> +
> +       for (timeout = 1000; timeout > 0; timeout--) {
> +               if (gp_read(INT_RAWSTAT) & LIMA_GP_IRQ_RESET_COMPLETED)
> +                       break;

Use readl_poll_timeout instead of writing your own. At least add a
udelay to the loop so the timing is fixed and not dependent on how
fast the code can run.

> +       }
> +       if (!timeout) {
> +               dev_err(dev->dev, "gp soft reset time out\n");
> +               return -ETIMEDOUT;
> +       }
> +
> +       gp_write(INT_CLEAR, LIMA_GP_IRQ_MASK_ALL);
> +       gp_write(INT_MASK, LIMA_GP_IRQ_MASK_USED);
> +
> +       ip->data.async_reset = false;
> +       return 0;
> +}

> diff --git a/drivers/gpu/drm/lima/lima_l2_cache.c b/drivers/gpu/drm/lima/lima_l2_cache.c
> new file mode 100644
> index 000000000000..e7cdec720e5d
> --- /dev/null
> +++ b/drivers/gpu/drm/lima/lima_l2_cache.c
> @@ -0,0 +1,79 @@
> +// SPDX-License-Identifier: GPL-2.0 OR MIT
> +/* Copyright 2017-2018 Qiang Yu <yuq825 at gmail.com> */
> +
> +#include <linux/io.h>
> +#include <linux/device.h>
> +
> +#include "lima_device.h"
> +#include "lima_l2_cache.h"
> +#include "lima_regs.h"
> +
> +#define l2_cache_write(reg, data) writel(data, ip->iomem + LIMA_L2_CACHE_##reg)
> +#define l2_cache_read(reg) readl(ip->iomem + LIMA_L2_CACHE_##reg)
> +
> +static int lima_l2_cache_wait_idle(struct lima_ip *ip)
> +{
> +       int timeout;
> +       struct lima_device *dev = ip->dev;
> +
> +       for (timeout = 100000; timeout > 0; timeout--) {
> +           if (!(l2_cache_read(STATUS) & LIMA_L2_CACHE_STATUS_COMMAND_BUSY))
> +               break;

Use readl_poll_timeout or variant.

> +       }
> +       if (!timeout) {
> +           dev_err(dev->dev, "l2 cache wait command timeout\n");
> +           return -ETIMEDOUT;
> +       }
> +       return 0;
> +}
> +
> +int lima_l2_cache_flush(struct lima_ip *ip)
> +{
> +       int ret;
> +
> +       spin_lock(&ip->data.lock);
> +       l2_cache_write(COMMAND, LIMA_L2_CACHE_COMMAND_CLEAR_ALL);
> +       ret = lima_l2_cache_wait_idle(ip);
> +       spin_unlock(&ip->data.lock);
> +       return ret;
> +}
> +
> +int lima_l2_cache_init(struct lima_ip *ip)
> +{
> +       int i, err;
> +       u32 size;
> +       struct lima_device *dev = ip->dev;
> +
> +       /* l2_cache2 only exists when one of PP4-7 present */
> +       if (ip->id == lima_ip_l2_cache2) {
> +               for (i = lima_ip_pp4; i <= lima_ip_pp7; i++) {
> +                       if (dev->ip[i].present)
> +                               break;
> +               }
> +               if (i > lima_ip_pp7)
> +                       return -ENODEV;
> +       }
> +
> +       spin_lock_init(&ip->data.lock);
> +
> +       size = l2_cache_read(SIZE);
> +       dev_info(dev->dev, "l2 cache %uK, %u-way, %ubyte cache line, %ubit external bus\n",
> +                1 << (((size >> 16) & 0xff) - 10),
> +                1 << ((size >> 8) & 0xff),
> +                1 << (size & 0xff),
> +                1 << ((size >> 24) & 0xff));
> +
> +       err = lima_l2_cache_flush(ip);
> +       if (err)
> +               return err;
> +
> +       l2_cache_write(ENABLE, LIMA_L2_CACHE_ENABLE_ACCESS | LIMA_L2_CACHE_ENABLE_READ_ALLOCATE);
> +       l2_cache_write(MAX_READS, 0x1c);
> +
> +       return 0;
> +}
> +
> +void lima_l2_cache_fini(struct lima_ip *ip)
> +{
> +
> +}
> diff --git a/drivers/gpu/drm/lima/lima_l2_cache.h b/drivers/gpu/drm/lima/lima_l2_cache.h
> new file mode 100644
> index 000000000000..2ff91eafefbe
> --- /dev/null
> +++ b/drivers/gpu/drm/lima/lima_l2_cache.h
> @@ -0,0 +1,14 @@
> +/* SPDX-License-Identifier: GPL-2.0 OR MIT */
> +/* Copyright 2017-2018 Qiang Yu <yuq825 at gmail.com> */
> +
> +#ifndef __LIMA_L2_CACHE_H__
> +#define __LIMA_L2_CACHE_H__
> +
> +struct lima_ip;
> +
> +int lima_l2_cache_init(struct lima_ip *ip);
> +void lima_l2_cache_fini(struct lima_ip *ip);
> +
> +int lima_l2_cache_flush(struct lima_ip *ip);
> +
> +#endif
> diff --git a/drivers/gpu/drm/lima/lima_mmu.c b/drivers/gpu/drm/lima/lima_mmu.c
> new file mode 100644
> index 000000000000..234fb90a4285
> --- /dev/null
> +++ b/drivers/gpu/drm/lima/lima_mmu.c
> @@ -0,0 +1,135 @@
> +// SPDX-License-Identifier: GPL-2.0 OR MIT
> +/* Copyright 2017-2018 Qiang Yu <yuq825 at gmail.com> */
> +
> +#include <linux/interrupt.h>
> +#include <linux/io.h>
> +#include <linux/device.h>
> +
> +#include "lima_device.h"
> +#include "lima_mmu.h"
> +#include "lima_vm.h"
> +#include "lima_object.h"
> +#include "lima_regs.h"
> +
> +#define mmu_write(reg, data) writel(data, ip->iomem + LIMA_MMU_##reg)
> +#define mmu_read(reg) readl(ip->iomem + LIMA_MMU_##reg)
> +
> +#define lima_mmu_send_command(command, condition)           \
> +({                                                          \
> +       int __timeout, __ret = 0;                            \
> +                                                            \
> +       mmu_write(COMMAND, command);                         \
> +       for (__timeout = 1000; __timeout > 0; __timeout--) { \
> +               if (condition)                               \
> +                       break;                               \
> +       }                                                    \
> +       if (!__timeout) {                                    \
> +               dev_err(dev->dev, "mmu command %x timeout\n", command); \
> +               __ret = -ETIMEDOUT;                          \
> +       }                                                    \
> +       __ret;                                               \
> +})
> +
> +static irqreturn_t lima_mmu_irq_handler(int irq, void *data)
> +{
> +       struct lima_ip *ip = data;
> +       struct lima_device *dev = ip->dev;
> +       u32 status = mmu_read(INT_STATUS);
> +       struct lima_sched_pipe *pipe;
> +
> +       /* for shared irq case */
> +       if (!status)

Can status have masked irq's? If so, you should be masking out the
disabled irq bits.

> +               return IRQ_NONE;
> +
> +       if (status & LIMA_MMU_INT_PAGE_FAULT) {
> +               u32 fault = mmu_read(PAGE_FAULT_ADDR);
> +               dev_err(dev->dev, "mmu page fault at 0x%x from bus id %d of type %s on %s\n",
> +                       fault, LIMA_MMU_STATUS_BUS_ID(status),
> +                       status & LIMA_MMU_STATUS_PAGE_FAULT_IS_WRITE ? "write" : "read",
> +                       lima_ip_name(ip));
> +       }
> +
> +       if (status & LIMA_MMU_INT_READ_BUS_ERROR) {
> +               dev_err(dev->dev, "mmu %s irq bus error\n", lima_ip_name(ip));
> +       }
> +
> +       /* mask all interrupts before resume */
> +       mmu_write(INT_MASK, 0);
> +       mmu_write(INT_CLEAR, status);
> +
> +       pipe = dev->pipe + (ip->id == lima_ip_gpmmu ? lima_pipe_gp : lima_pipe_pp);
> +       lima_sched_pipe_mmu_error(pipe);
> +
> +       return IRQ_HANDLED;
> +}

> +
> +unsigned long lima_timeout_to_jiffies(u64 timeout_ns)

Create a common helper instead of copy-n-pasting this from other
drivers (etnaviv).

> +{
> +       unsigned long timeout_jiffies;
> +       ktime_t timeout;
> +
> +       /* clamp timeout if it's to large */
> +       if (((s64)timeout_ns) < 0)
> +               return MAX_SCHEDULE_TIMEOUT;
> +
> +       timeout = ktime_sub(ns_to_ktime(timeout_ns), ktime_get());
> +       if (ktime_to_ns(timeout) < 0)
> +               return 0;
> +
> +       timeout_jiffies = nsecs_to_jiffies(ktime_to_ns(timeout));
> +       /*  clamp timeout to avoid unsigned-> signed overflow */
> +       if (timeout_jiffies > MAX_SCHEDULE_TIMEOUT )
> +               return MAX_SCHEDULE_TIMEOUT;
> +
> +       return timeout_jiffies;
> +}
> +
> +void lima_sched_pipe_task_done(struct lima_sched_pipe *pipe)
> +{
> +       if (pipe->error)
> +               schedule_work(&pipe->error_work);
> +       else {
> +               struct lima_sched_task *task = pipe->current_task;
> +
> +               pipe->task_fini(pipe);
> +               dma_fence_signal(task->fence);
> +       }
> +}

> diff --git a/drivers/gpu/drm/lima/lima_vm.c b/drivers/gpu/drm/lima/lima_vm.c
> new file mode 100644
> index 000000000000..a264f3ae83fe
> --- /dev/null
> +++ b/drivers/gpu/drm/lima/lima_vm.c
> @@ -0,0 +1,354 @@
> +// SPDX-License-Identifier: GPL-2.0 OR MIT
> +/* Copyright 2017-2018 Qiang Yu <yuq825 at gmail.com> */
> +
> +#include <linux/slab.h>
> +#include <linux/dma-mapping.h>
> +#include <linux/interval_tree_generic.h>
> +
> +#include "lima_device.h"
> +#include "lima_vm.h"
> +#include "lima_object.h"
> +#include "lima_regs.h"
> +
> +struct lima_bo_va_mapping {
> +       struct list_head list;
> +       struct rb_node rb;
> +       uint32_t start;
> +       uint32_t last;
> +       uint32_t __subtree_last;
> +};
> +
> +struct lima_bo_va {
> +       struct list_head list;
> +       unsigned ref_count;
> +
> +       struct list_head mapping;
> +
> +       struct lima_vm *vm;
> +};
> +
> +#define LIMA_VM_PD_SHIFT 22
> +#define LIMA_VM_PT_SHIFT 12
> +#define LIMA_VM_PB_SHIFT (LIMA_VM_PD_SHIFT + LIMA_VM_NUM_PT_PER_BT_SHIFT)
> +#define LIMA_VM_BT_SHIFT LIMA_VM_PT_SHIFT
> +
> +#define LIMA_VM_PT_MASK ((1 << LIMA_VM_PD_SHIFT) - 1)
> +#define LIMA_VM_BT_MASK ((1 << LIMA_VM_PB_SHIFT) - 1)
> +
> +#define LIMA_PDE(va) (va >> LIMA_VM_PD_SHIFT)
> +#define LIMA_PTE(va) ((va & LIMA_VM_PT_MASK) >> LIMA_VM_PT_SHIFT)
> +#define LIMA_PBE(va) (va >> LIMA_VM_PB_SHIFT)
> +#define LIMA_BTE(va) ((va & LIMA_VM_BT_MASK) >> LIMA_VM_BT_SHIFT)
> +
> +#define START(node) ((node)->start)
> +#define LAST(node) ((node)->last)
> +
> +INTERVAL_TREE_DEFINE(struct lima_bo_va_mapping, rb, uint32_t, __subtree_last,
> +                    START, LAST, static, lima_vm_it)
> +
> +#undef START
> +#undef LAST
> +
> +static void lima_vm_unmap_page_table(struct lima_vm *vm, u32 start, u32 end)
> +{
> +       u32 addr;
> +
> +       for (addr = start; addr <= end; addr += LIMA_PAGE_SIZE) {
> +               u32 pbe = LIMA_PBE(addr);
> +               u32 bte = LIMA_BTE(addr);
> +               u32 *bt;
> +
> +               bt = lima_bo_kmap(vm->bts[pbe]);
> +               bt[bte] = 0;
> +       }
> +}
> +
> +static int lima_vm_map_page_table(struct lima_vm *vm, dma_addr_t *dma,
> +                                 u32 start, u32 end)
> +{
> +       u64 addr;
> +       int err, i = 0;
> +
> +       for (addr = start; addr <= end; addr += LIMA_PAGE_SIZE) {
> +               u32 pbe = LIMA_PBE(addr);
> +               u32 bte = LIMA_BTE(addr);
> +               u32 *bt;
> +
> +               if (vm->bts[pbe])
> +                       bt = lima_bo_kmap(vm->bts[pbe]);
> +               else {
> +                       struct lima_bo *bt_bo;
> +                       dma_addr_t *pts;
> +                       u32 *pd;
> +                       int j;
> +
> +                       bt_bo = lima_bo_create(
> +                               vm->dev, LIMA_PAGE_SIZE << LIMA_VM_NUM_PT_PER_BT_SHIFT,
> +                               0, ttm_bo_type_kernel,
> +                               NULL, vm->pd->tbo.resv);

I don't think using BOs for page tables buys you anything. You could
just use the kernel DMA API directly. See io-pgtable-arm-v7s.c for
inspiration. For panfrost, it's standard ARM format page tables so we
can just use the io-pgtable library.

> +                       if (IS_ERR(bt_bo)) {
> +                               err = PTR_ERR(bt_bo);
> +                               goto err_out;
> +                       }
> +
> +                       bt = lima_bo_kmap(bt_bo);
> +                       if (IS_ERR(bt)) {
> +                               lima_bo_unref(bt_bo);
> +                               err = PTR_ERR(bt);
> +                               goto err_out;
> +                       }
> +                       memset(bt, 0, LIMA_PAGE_SIZE << LIMA_VM_NUM_PT_PER_BT_SHIFT);
> +
> +                       vm->bts[pbe] = bt_bo;
> +                       pd = lima_bo_kmap(vm->pd);
> +                       pd += pbe << LIMA_VM_NUM_PT_PER_BT_SHIFT;
> +                       pts = lima_bo_get_pages(bt_bo);
> +                       for (j = 0; j < LIMA_VM_NUM_PT_PER_BT; j++)
> +                               *pd++ = *pts++ | LIMA_VM_FLAG_PRESENT;
> +               }
> +
> +               bt[bte] = dma[i++] | LIMA_VM_FLAGS_CACHE;
> +       }
> +
> +       return 0;
> +
> +err_out:
> +       if (addr != start)
> +               lima_vm_unmap_page_table(vm, start, addr - 1);
> +       return err;
> +}
> +
> +static struct lima_bo_va *
> +lima_vm_bo_find(struct lima_vm *vm, struct lima_bo *bo)
> +{
> +       struct lima_bo_va *bo_va, *ret = NULL;
> +
> +       list_for_each_entry(bo_va, &bo->va, list) {
> +               if (bo_va->vm == vm) {
> +                       ret = bo_va;
> +                       break;
> +               }
> +       }
> +
> +       return ret;
> +}
> +
> +int lima_vm_bo_map(struct lima_vm *vm, struct lima_bo *bo, u32 start)
> +{
> +       int err;
> +       struct lima_bo_va_mapping *it, *mapping;
> +       u32 end = start + bo->gem.size - 1;
> +       dma_addr_t *pages_dma = lima_bo_get_pages(bo);
> +       struct lima_bo_va *bo_va;
> +
> +       it = lima_vm_it_iter_first(&vm->va, start, end);
> +       if (it) {
> +               dev_dbg(bo->gem.dev->dev, "lima vm map va overlap %x-%x %x-%x\n",
> +                       start, end, it->start, it->last);
> +               return -EINVAL;
> +       }
> +
> +       mapping = kmalloc(sizeof(*mapping), GFP_KERNEL);
> +       if (!mapping)
> +               return -ENOMEM;
> +       mapping->start = start;
> +       mapping->last = end;

Why don't you use the drm_mm_XX APIs instead of writing your own?

> +
> +       err = lima_vm_map_page_table(vm, pages_dma, start, end);
> +       if (err) {
> +               kfree(mapping);
> +               return err;
> +       }
> +
> +       lima_vm_it_insert(mapping, &vm->va);
> +
> +       bo_va = lima_vm_bo_find(vm, bo);
> +       list_add_tail(&mapping->list, &bo_va->mapping);
> +
> +       return 0;
> +}