Re: [PATCH v5 20/21] drm/tegra: Implement job submission part of new UAPI

23 Mar 2021

On Mon, Jan 11, 2021 at 03:00:18PM +0200, Mikko Perttunen wrote:
...
Implement the job submission IOCTL with a minimum feature set.
Signed-off-by: Mikko Perttunen mperttunen@nvidia.com
v5:

Add 16K size limit to copies from userspace.
Guard RELOC_BLOCKLINEAR flag handling to only exist in ARM64
to prevent oversized shift on 32-bit platforms.

v4:

Remove all features that are not strictly necessary.
Split into two patches.

v3:

Remove WRITE_RELOC. Relocations are now patched implicitly
when patching is needed.
Directly call PM runtime APIs on devices instead of using
power_on/power_off callbacks.
Remove incorrect mutex unlock in tegra_drm_ioctl_channel_open
Use XA_FLAGS_ALLOC1 instead of XA_FLAGS_ALLOC
Accommodate for removal of timeout field and inlining of
syncpt_incrs array.
Copy entire user arrays at a time instead of going through
elements one-by-one.
Implement waiting of DMA reservations.
Split out gather_bo implementation into a separate file.
Fix length parameter passed to sg_init_one in gather_bo
Cosmetic cleanup.


drivers/gpu/drm/tegra/Makefile         |   2 +
 drivers/gpu/drm/tegra/drm.c            |   2 +
 drivers/gpu/drm/tegra/uapi/gather_bo.c |  86 +++++
 drivers/gpu/drm/tegra/uapi/gather_bo.h |  22 ++
 drivers/gpu/drm/tegra/uapi/submit.c    | 428 +++++++++++++++++++++++++
 drivers/gpu/drm/tegra/uapi/submit.h    |  17 +
 6 files changed, 557 insertions(+)
 create mode 100644 drivers/gpu/drm/tegra/uapi/gather_bo.c
 create mode 100644 drivers/gpu/drm/tegra/uapi/gather_bo.h
 create mode 100644 drivers/gpu/drm/tegra/uapi/submit.c
 create mode 100644 drivers/gpu/drm/tegra/uapi/submit.h

diff --git a/drivers/gpu/drm/tegra/Makefile b/drivers/gpu/drm/tegra/Makefile
index 0abdb21b38b9..059322e88943 100644
--- a/drivers/gpu/drm/tegra/Makefile
+++ b/drivers/gpu/drm/tegra/Makefile
@@ -4,6 +4,8 @@ ccflags-$(CONFIG_DRM_TEGRA_DEBUG) += -DDEBUG
 tegra-drm-y := \
   drm.o \
   uapi/uapi.o \

uapi/submit.o \
uapi/gather_bo.o \
gem.o \
fb.o \
dp.o \

diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index 6a51035ce33f..60eab403ae9b 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -737,6 +737,8 @@ static const struct drm_ioctl_desc tegra_drm_ioctls[] = {
   		  DRM_RENDER_ALLOW),
   DRM_IOCTL_DEF_DRV(TEGRA_CHANNEL_UNMAP, tegra_drm_ioctl_channel_unmap,
   		  DRM_RENDER_ALLOW),

DRM_IOCTL_DEF_DRV(TEGRA_CHANNEL_SUBMIT, tegra_drm_ioctl_channel_submit,
	  DRM_RENDER_ALLOW),

DRM_IOCTL_DEF_DRV(TEGRA_GEM_CREATE, tegra_drm_ioctl_gem_create,
	  DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(TEGRA_GEM_MMAP, tegra_drm_ioctl_gem_mmap,

diff --git a/drivers/gpu/drm/tegra/uapi/gather_bo.c b/drivers/gpu/drm/tegra/uapi/gather_bo.c
new file mode 100644
index 000000000000..b487a0d44648
--- /dev/null
+++ b/drivers/gpu/drm/tegra/uapi/gather_bo.c
@@ -0,0 +1,86 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2020 NVIDIA Corporation */



+#include <linux/scatterlist.h>
+#include <linux/slab.h>



+#include "gather_bo.h"



+static struct host1x_bo *gather_bo_get(struct host1x_bo *host_bo)
+{

struct gather_bo *bo = container_of(host_bo, struct gather_bo, base);

kref_get(&bo->ref);

return host_bo;

+}



+static void gather_bo_release(struct kref *ref)
+{

struct gather_bo *bo = container_of(ref, struct gather_bo, ref);

kfree(bo->gather_data);
kfree(bo);

+}



+void gather_bo_put(struct host1x_bo *host_bo)
+{

struct gather_bo *bo = container_of(host_bo, struct gather_bo, base);

kref_put(&bo->ref, gather_bo_release);

+}



+static struct sg_table *
+gather_bo_pin(struct device *dev, struct host1x_bo *host_bo, dma_addr_t *phys)
+{

struct gather_bo *bo = container_of(host_bo, struct gather_bo, base);
struct sg_table *sgt;
int err;

if (phys) {
*phys = virt_to_phys(bo->gather_data);


return NULL;


}

sgt = kzalloc(sizeof(*sgt), GFP_KERNEL);
if (!sgt)
return ERR_PTR(-ENOMEM);



err = sg_alloc_table(sgt, 1, GFP_KERNEL);
if (err) {
kfree(sgt);


return ERR_PTR(err);


}

sg_init_one(sgt->sgl, bo->gather_data, bo->gather_data_words*4);

return sgt;

+}



+static void gather_bo_unpin(struct device *dev, struct sg_table *sgt)
+{

if (sgt) {
sg_free_table(sgt);


kfree(sgt);


}

+}



+static void *gather_bo_mmap(struct host1x_bo *host_bo)
+{

struct gather_bo *bo = container_of(host_bo, struct gather_bo, base);

return bo->gather_data;

+}



+static void gather_bo_munmap(struct host1x_bo *host_bo, void *addr)
+{
+}



+const struct host1x_bo_ops gather_bo_ops = {

.get = gather_bo_get,
.put = gather_bo_put,
.pin = gather_bo_pin,
.unpin = gather_bo_unpin,
.mmap = gather_bo_mmap,
.munmap = gather_bo_munmap,

+};
diff --git a/drivers/gpu/drm/tegra/uapi/gather_bo.h b/drivers/gpu/drm/tegra/uapi/gather_bo.h
new file mode 100644
index 000000000000..6b4c9d83ac91
--- /dev/null
+++ b/drivers/gpu/drm/tegra/uapi/gather_bo.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (c) 2020 NVIDIA Corporation */



+#ifndef _TEGRA_DRM_SUBMIT_GATHER_BO_H
+#define _TEGRA_DRM_SUBMIT_GATHER_BO_H



+#include <linux/host1x.h>
+#include <linux/kref.h>



+struct gather_bo {

struct host1x_bo base;

struct kref ref;

u32 *gather_data;
size_t gather_data_words;

+};



+extern const struct host1x_bo_ops gather_bo_ops;
+void gather_bo_put(struct host1x_bo *host_bo);



+#endif
diff --git a/drivers/gpu/drm/tegra/uapi/submit.c b/drivers/gpu/drm/tegra/uapi/submit.c
new file mode 100644
index 000000000000..398be3065e21
--- /dev/null
+++ b/drivers/gpu/drm/tegra/uapi/submit.c
@@ -0,0 +1,428 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2020 NVIDIA Corporation */



+#include <linux/dma-fence-array.h>
+#include <linux/file.h>
+#include <linux/host1x.h>
+#include <linux/iommu.h>
+#include <linux/kref.h>
+#include <linux/list.h>
+#include <linux/nospec.h>
+#include <linux/pm_runtime.h>
+#include <linux/sync_file.h>



+#include <drm/drm_drv.h>
+#include <drm/drm_file.h>



+#include "../uapi.h"
+#include "../drm.h"
+#include "../gem.h"



+#include "gather_bo.h"
+#include "submit.h"



+static struct tegra_drm_mapping *
+tegra_drm_mapping_get(struct tegra_drm_channel_ctx *ctx, u32 id)
+{

struct tegra_drm_mapping *mapping;

xa_lock(&ctx->mappings);
mapping = xa_load(&ctx->mappings, id);
if (mapping)
kref_get(&mapping->ref);


xa_unlock(&ctx->mappings);

return mapping;

+}



+static void *alloc_copy_user_array(void __user *from, size_t count, size_t size)
+{

unsigned long copy_err;
size_t copy_len;
void *data;

if (check_mul_overflow(count, size, &copy_len))
return ERR_PTR(-EINVAL);



if (copy_len > 0x4000)
return ERR_PTR(-E2BIG);



data = kvmalloc(copy_len, GFP_KERNEL);
if (!data)
return ERR_PTR(-ENOMEM);



copy_err = copy_from_user(data, from, copy_len);
if (copy_err) {
kvfree(data);


return ERR_PTR(-EFAULT);


}

return data;

+}



+static int submit_copy_gather_data(struct drm_device *drm,

		   struct gather_bo **pbo,


		   struct drm_tegra_channel_submit *args)



+{

unsigned long copy_err;
struct gather_bo *bo;
size_t copy_len;

if (args->gather_data_words == 0) {
drm_info(drm, "gather_data_words can't be 0");


return -EINVAL;


}

if (check_mul_overflow((size_t)args->gather_data_words, (size_t)4, &copy_len))
return -EINVAL;



bo = kzalloc(sizeof(*bo), GFP_KERNEL);
if (!bo)
return -ENOMEM;



kref_init(&bo->ref);
host1x_bo_init(&bo->base, &gather_bo_ops);

bo->gather_data = kmalloc(copy_len, GFP_KERNEL | __GFP_NOWARN);
if (!bo->gather_data) {
kfree(bo);


return -ENOMEM;


}

copy_err = copy_from_user(bo->gather_data,
		  u64_to_user_ptr(args->gather_data_ptr),


		  copy_len);


if (copy_err) {
kfree(bo->gather_data);


kfree(bo);


return -EFAULT;


}

bo->gather_data_words = args->gather_data_words;

*pbo = bo;

return 0;

+}



+static int submit_write_reloc(struct gather_bo *bo,

	      struct drm_tegra_submit_buf *buf,


	      struct tegra_drm_mapping *mapping)



+{

/* TODO check that target_offset is within bounds */
dma_addr_t iova = mapping->iova + buf->reloc.target_offset;
u32 written_ptr = (u32)(iova >> buf->reloc.shift);


+#ifdef CONFIG_ARM64

if (buf->flags & DRM_TEGRA_SUBMIT_BUF_RELOC_BLOCKLINEAR)
written_ptr |= BIT(39);



+#endif
Sorry, but this still isn't correct. written_ptr is still only 32-bits
wide, so your BIT(39) is going to get discarded even on 64-bit ARM. The
idiomatic way to do this is to make written_ptr dma_addr_t and use a
CONFIG_ARCH_DMA_ADDR_T_64BIT guard.
But even with that this looks wrong because you're OR'ing this in after
shifting by buf->reloc.shift. Doesn't that OR it in at the wrong offset?
Should you perhaps be doing this instead:
#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
    	if (buf->flags & DRM_TEGRA_SUBMIT_BUF_RELOC_BLOCKLINEAR)
    		iova |= BIT(39);
    #endif
written_ptr = (u32)(iova >> buf->reloc_shift);
?
Also, on a side-note: BLOCKLINEAR really isn't the right term here. I
recently dealt with this for display (though I haven't sent out that
patch yet) and this is actually a bit that selects which sector layout
swizzling is being applied. That's independent of block linear format
and I think you can have different sector layouts irrespective of the
block linear format (though I don't think that's usually done).
That said, I wonder if a better interface here would be to reuse format
modifiers here. That would allow us to more fully describe the format of
a surface in case we ever need it, and it already includes the sector
layout information as well.
Thierry

    

2025

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

2011

2010

Re: [PATCH v5 20/21] drm/tegra: Implement job submission part of new UAPI

Signed-off-by: Mikko Perttunen mperttunen@nvidia.com