[PATCH v6 2/2] drm/lima: driver for ARM Mali4xx GPUs

Eric Anholt eric at anholt.net
Tue Mar 5 20:16:21 UTC 2019


Qiang Yu <yuq825 at gmail.com> writes:

> - Mali 4xx GPUs have two kinds of processors GP and PP. GP is for
>   OpenGL vertex shader processing and PP is for fragment shader
>   processing. Each processor has its own MMU so prcessors work in
>   virtual address space.
> - There's only one GP but multiple PP (max 4 for mali 400 and 8
>   for mali 450) in the same mali 4xx GPU. All PPs are grouped
>   togather to handle a single fragment shader task divided by
>   FB output tiled pixels. Mali 400 user space driver is
>   responsible for assign target tiled pixels to each PP, but mali
>   450 has a HW module called DLBU to dynamically balance each
>   PP's load.
> - User space driver allocate buffer object and map into GPU
>   virtual address space, upload command stream and draw data with
>   CPU mmap of the buffer object, then submit task to GP/PP with
>   a register frame indicating where is the command stream and misc
>   settings.
> - There's no command stream validation/relocation due to each user
>   process has its own GPU virtual address space. GP/PP's MMU switch
>   virtual address space before running two tasks from different
>   user process. Error or evil user space code just get MMU fault
>   or GP/PP error IRQ, then the HW/SW will be recovered.
> - Use GEM+shmem for MM. Currently just alloc and pin memory when
>   gem object creation. GPU vm map of the buffer is also done in
>   the alloc stage in kernel space. We may delay the memory
>   allocation and real GPU vm map to command submission stage in the
>   furture as improvement.
> - Use drm_sched for GPU task schedule. Each OpenGL context should
>   have a lima context object in the kernel to distinguish tasks
>   from different user. drm_sched gets task from each lima context
>   in a fair way.

Given the requirement for open source userspace for new DRM kernel
drivers, it would be nice to see the link to your open source userspace
with the submission so we can see how the interfaces get used.  (I know
I found it once before, but I don't remember now)

However, other than a concern about the _pad fields in the UABI, I'm
ready to add a reviewed-b.

> --- drivers/gpu/drm/Kconfig | 2 + drivers/gpu/drm/Makefile | 1 +
> drivers/gpu/drm/lima/Kconfig | 10 + drivers/gpu/drm/lima/Makefile | 21
> ++ drivers/gpu/drm/lima/lima_bcast.c | 47 +++
> drivers/gpu/drm/lima/lima_bcast.h | 14 +
> drivers/gpu/drm/lima/lima_ctx.c | 97 ++++++
> drivers/gpu/drm/lima/lima_ctx.h | 30 ++
> drivers/gpu/drm/lima/lima_device.c | 385 +++++++++++++++++++++++
> drivers/gpu/drm/lima/lima_device.h | 131 ++++++++
> drivers/gpu/drm/lima/lima_dlbu.c | 58 ++++
> drivers/gpu/drm/lima/lima_dlbu.h | 18 ++
> drivers/gpu/drm/lima/lima_drv.c | 366 ++++++++++++++++++++++
> drivers/gpu/drm/lima/lima_drv.h | 45 +++
> drivers/gpu/drm/lima/lima_gem.c | 381 +++++++++++++++++++++++
> drivers/gpu/drm/lima/lima_gem.h | 25 ++
> drivers/gpu/drm/lima/lima_gem_prime.c | 47 +++
> drivers/gpu/drm/lima/lima_gem_prime.h | 13 +
> drivers/gpu/drm/lima/lima_gp.c | 283 +++++++++++++++++
> drivers/gpu/drm/lima/lima_gp.h | 16 +
> drivers/gpu/drm/lima/lima_l2_cache.c | 80 +++++
> drivers/gpu/drm/lima/lima_l2_cache.h | 14 +
> drivers/gpu/drm/lima/lima_mmu.c | 142 +++++++++
> drivers/gpu/drm/lima/lima_mmu.h | 16 +
> drivers/gpu/drm/lima/lima_object.c | 122 ++++++++
> drivers/gpu/drm/lima/lima_object.h | 36 +++
> drivers/gpu/drm/lima/lima_pmu.c | 60 ++++
> drivers/gpu/drm/lima/lima_pmu.h | 12 + drivers/gpu/drm/lima/lima_pp.c
> | 424 ++++++++++++++++++++++++++ drivers/gpu/drm/lima/lima_pp.h | 19
> ++ drivers/gpu/drm/lima/lima_regs.h | 298 ++++++++++++++++++
> drivers/gpu/drm/lima/lima_sched.c | 404 ++++++++++++++++++++++++
> drivers/gpu/drm/lima/lima_sched.h | 104 +++++++
> drivers/gpu/drm/lima/lima_vm.c | 282 +++++++++++++++++
> drivers/gpu/drm/lima/lima_vm.h | 62 ++++ include/uapi/drm/lima_drm.h |
> 139 +++++++++ 36 files changed, 4204 insertions(+) create mode 100644
> drivers/gpu/drm/lima/Kconfig create mode 100644
> drivers/gpu/drm/lima/Makefile create mode 100644
> drivers/gpu/drm/lima/lima_bcast.c create mode 100644
> drivers/gpu/drm/lima/lima_bcast.h create mode 100644
> drivers/gpu/drm/lima/lima_ctx.c create mode 100644
> drivers/gpu/drm/lima/lima_ctx.h create mode 100644
> drivers/gpu/drm/lima/lima_device.c create mode 100644
> drivers/gpu/drm/lima/lima_device.h create mode 100644
> drivers/gpu/drm/lima/lima_dlbu.c create mode 100644
> drivers/gpu/drm/lima/lima_dlbu.h create mode 100644
> drivers/gpu/drm/lima/lima_drv.c create mode 100644
> drivers/gpu/drm/lima/lima_drv.h create mode 100644
> drivers/gpu/drm/lima/lima_gem.c create mode 100644
> drivers/gpu/drm/lima/lima_gem.h create mode 100644
> drivers/gpu/drm/lima/lima_gem_prime.c create mode 100644
> drivers/gpu/drm/lima/lima_gem_prime.h create mode 100644
> drivers/gpu/drm/lima/lima_gp.c create mode 100644
> drivers/gpu/drm/lima/lima_gp.h create mode 100644
> drivers/gpu/drm/lima/lima_l2_cache.c create mode 100644
> drivers/gpu/drm/lima/lima_l2_cache.h create mode 100644
> drivers/gpu/drm/lima/lima_mmu.c create mode 100644
> drivers/gpu/drm/lima/lima_mmu.h create mode 100644
> drivers/gpu/drm/lima/lima_object.c create mode 100644
> drivers/gpu/drm/lima/lima_object.h create mode 100644
> drivers/gpu/drm/lima/lima_pmu.c create mode 100644
> drivers/gpu/drm/lima/lima_pmu.h create mode 100644
> drivers/gpu/drm/lima/lima_pp.c create mode 100644
> drivers/gpu/drm/lima/lima_pp.h create mode 100644
> drivers/gpu/drm/lima/lima_regs.h create mode 100644
> drivers/gpu/drm/lima/lima_sched.c create mode 100644
> drivers/gpu/drm/lima/lima_sched.h create mode 100644
> drivers/gpu/drm/lima/lima_vm.c create mode 100644
> drivers/gpu/drm/lima/lima_vm.h create mode 100644
> include/uapi/drm/lima_drm.h

> diff --git a/drivers/gpu/drm/lima/lima_sched.c b/drivers/gpu/drm/lima/lima_sched.c
> new file mode 100644
> index 000000000000..606e8aad2a82
> --- /dev/null
> +++ b/drivers/gpu/drm/lima/lima_sched.c
> @@ -0,0 +1,404 @@
> +// SPDX-License-Identifier: GPL-2.0 OR MIT
> +/* Copyright 2017-2019 Qiang Yu <yuq825 at gmail.com> */
> +
> +#include <linux/kthread.h>
> +#include <linux/slab.h>
> +
> +#include "lima_drv.h"
> +#include "lima_sched.h"
> +#include "lima_vm.h"
> +#include "lima_mmu.h"
> +#include "lima_l2_cache.h"
> +#include "lima_object.h"
> +
> +struct lima_fence {
> +	struct dma_fence base;
> +	struct lima_sched_pipe *pipe;
> +};
> +
> +static struct kmem_cache *lima_fence_slab;
> +
> +int lima_sched_slab_init(void)
> +{
> +	lima_fence_slab = kmem_cache_create(
> +		"lima_fence", sizeof(struct lima_fence), 0,
> +		SLAB_HWCACHE_ALIGN, NULL);
> +	if (!lima_fence_slab)
> +		return -ENOMEM;
> +
> +	return 0;
> +}
> +
> +void lima_sched_slab_fini(void)
> +{
> +	kmem_cache_destroy(lima_fence_slab);
> +}
> +
> +static inline struct lima_fence *to_lima_fence(struct dma_fence *fence)
> +{
> +	return container_of(fence, struct lima_fence, base);
> +}
> +
> +static const char *lima_fence_get_driver_name(struct dma_fence *fence)
> +{
> +	return "lima";
> +}
> +
> +static const char *lima_fence_get_timeline_name(struct dma_fence *fence)
> +{
> +	struct lima_fence *f = to_lima_fence(fence);
> +
> +	return f->pipe->base.name;
> +}
> +
> +static bool lima_fence_enable_signaling(struct dma_fence *fence)
> +{
> +	return true;
> +}
> +
> +static void lima_fence_release_rcu(struct rcu_head *rcu)
> +{
> +	struct dma_fence *f = container_of(rcu, struct dma_fence, rcu);
> +	struct lima_fence *fence = to_lima_fence(f);
> +
> +	kmem_cache_free(lima_fence_slab, fence);
> +}
> +
> +static void lima_fence_release(struct dma_fence *fence)
> +{
> +	struct lima_fence *f = to_lima_fence(fence);
> +
> +	call_rcu(&f->base.rcu, lima_fence_release_rcu);
> +}
> +
> +static const struct dma_fence_ops lima_fence_ops = {
> +	.get_driver_name = lima_fence_get_driver_name,
> +	.get_timeline_name = lima_fence_get_timeline_name,
> +	.enable_signaling = lima_fence_enable_signaling,
> +	.wait = dma_fence_default_wait,
> +	.release = lima_fence_release,
> +};

You can delete .enable_signaling and .wait now (See 418cc6ca0607
("dma-fence: Make ->wait callback optional"))

> diff --git a/include/uapi/drm/lima_drm.h b/include/uapi/drm/lima_drm.h
> new file mode 100644
> index 000000000000..705723a64b5f
> --- /dev/null
> +++ b/include/uapi/drm/lima_drm.h
> @@ -0,0 +1,139 @@
> +/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR MIT */
> +/* Copyright 2017-2018 Qiang Yu <yuq825 at gmail.com> */
> +
> +#ifndef __LIMA_DRM_H__
> +#define __LIMA_DRM_H__
> +
> +#include "drm.h"
> +
> +#if defined(__cplusplus)
> +extern "C" {
> +#endif
> +
> +enum drm_lima_param_gpu_id {
> +	DRM_LIMA_PARAM_GPU_ID_UNKNOWN,
> +	DRM_LIMA_PARAM_GPU_ID_MALI400,
> +	DRM_LIMA_PARAM_GPU_ID_MALI450,
> +};
> +
> +enum drm_lima_param {
> +	DRM_LIMA_PARAM_GPU_ID,
> +	DRM_LIMA_PARAM_NUM_PP,
> +	DRM_LIMA_PARAM_GP_VERSION,
> +	DRM_LIMA_PARAM_PP_VERSION,
> +};
> +
> +struct drm_lima_get_param {
> +	__u32 param; /* in */
> +	__u32 pad;
> +	__u64 value; /* out */
> +};
> +
> +struct drm_lima_gem_create {
> +	__u32 size;    /* in */
> +	__u32 flags;   /* in */
> +	__u32 handle;  /* out */
> +	__u32 pad;
> +};

Might be nice to pass the offset back out from create like I did in v3d.
It's convenient to not need an immediate GEM_INFO.  Totally optional
suggestion, though.

> +
> +struct drm_lima_gem_info {
> +	__u32 handle;  /* in */
> +	__u32 va;      /* out */
> +	__u64 offset;  /* out */
> +};
> +
> +#define LIMA_SUBMIT_BO_READ   0x01
> +#define LIMA_SUBMIT_BO_WRITE  0x02
> +
> +struct drm_lima_gem_submit_bo {
> +	__u32 handle;  /* in */
> +	__u32 flags;   /* in */
> +};
> +
> +#define LIMA_GP_FRAME_REG_NUM 6
> +
> +struct drm_lima_gp_frame {
> +	__u32 frame[LIMA_GP_FRAME_REG_NUM];
> +};
> +
> +#define LIMA_PP_FRAME_REG_NUM 23
> +#define LIMA_PP_WB_REG_NUM 12
> +
> +struct drm_lima_m400_pp_frame {
> +	__u32 frame[LIMA_PP_FRAME_REG_NUM];
> +	__u32 num_pp;
> +	__u32 wb[3 * LIMA_PP_WB_REG_NUM];
> +	__u32 plbu_array_address[4];
> +	__u32 fragment_stack_address[4];
> +};
> +
> +struct drm_lima_m450_pp_frame {
> +	__u32 frame[LIMA_PP_FRAME_REG_NUM];
> +	__u32 num_pp;
> +	__u32 wb[3 * LIMA_PP_WB_REG_NUM];
> +	__u32 use_dlbu;
> +	__u32 _pad;
> +	union {
> +		__u32 plbu_array_address[8];
> +		__u32 dlbu_regs[4];
> +	};
> +	__u32 fragment_stack_address[8];
> +};
> +
> +#define LIMA_PIPE_GP  0x00
> +#define LIMA_PIPE_PP  0x01
> +
> +#define LIMA_SUBMIT_FLAG_EXPLICIT_FENCE (1 << 0)
> +
> +struct drm_lima_gem_submit {
> +	__u32 ctx;         /* in */
> +	__u32 pipe;        /* in */
> +	__u32 nr_bos;      /* in */
> +	__u32 frame_size;  /* in */
> +	__u64 bos;         /* in */
> +	__u64 frame;       /* in */
> +	__u32 flags;       /* in */
> +	__u32 out_sync;    /* in */
> +	__u32 in_sync[2];  /* in */
> +};
> +
> +#define LIMA_GEM_WAIT_READ   0x01
> +#define LIMA_GEM_WAIT_WRITE  0x02
> +
> +struct drm_lima_gem_wait {
> +	__u32 handle;      /* in */
> +	__u32 op;          /* in */
> +	__s64 timeout_ns;  /* in */

Add a comment that it's an absolute ns?

> +};
> +
> +struct drm_lima_ctx_create {
> +	__u32 id;          /* out */
> +	__u32 _pad;
> +};
> +
> +struct drm_lima_ctx_free {
> +	__u32 id;          /* in */
> +	__u32 _pad;
> +};

I don't think you need the _pad fields here, and they're actually a bad
idea because the lack of checking in your ioctls means you can't trust
that userspace has initialized them to 0 when you want to redefine them
as a flags field later.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 832 bytes
Desc: not available
URL: <https://lists.freedesktop.org/archives/dri-devel/attachments/20190305/fb929d01/attachment-0001.sig>


More information about the dri-devel mailing list