On Mon, Jan 11, 2021 at 03:00:18PM +0200, Mikko Perttunen wrote:
Implement the job submission IOCTL with a minimum feature set.
Signed-off-by: Mikko Perttunen mperttunen@nvidia.com
v5:
- Add 16K size limit to copies from userspace.
- Guard RELOC_BLOCKLINEAR flag handling to only exist in ARM64 to prevent oversized shift on 32-bit platforms.
v4:
- Remove all features that are not strictly necessary.
- Split into two patches.
v3:
- Remove WRITE_RELOC. Relocations are now patched implicitly when patching is needed.
- Directly call PM runtime APIs on devices instead of using power_on/power_off callbacks.
- Remove incorrect mutex unlock in tegra_drm_ioctl_channel_open
- Use XA_FLAGS_ALLOC1 instead of XA_FLAGS_ALLOC
- Accommodate for removal of timeout field and inlining of syncpt_incrs array.
- Copy entire user arrays at a time instead of going through elements one-by-one.
- Implement waiting of DMA reservations.
- Split out gather_bo implementation into a separate file.
- Fix length parameter passed to sg_init_one in gather_bo
- Cosmetic cleanup.
drivers/gpu/drm/tegra/Makefile | 2 + drivers/gpu/drm/tegra/drm.c | 2 + drivers/gpu/drm/tegra/uapi/gather_bo.c | 86 +++++ drivers/gpu/drm/tegra/uapi/gather_bo.h | 22 ++ drivers/gpu/drm/tegra/uapi/submit.c | 428 +++++++++++++++++++++++++ drivers/gpu/drm/tegra/uapi/submit.h | 17 + 6 files changed, 557 insertions(+) create mode 100644 drivers/gpu/drm/tegra/uapi/gather_bo.c create mode 100644 drivers/gpu/drm/tegra/uapi/gather_bo.h create mode 100644 drivers/gpu/drm/tegra/uapi/submit.c create mode 100644 drivers/gpu/drm/tegra/uapi/submit.h
diff --git a/drivers/gpu/drm/tegra/Makefile b/drivers/gpu/drm/tegra/Makefile index 0abdb21b38b9..059322e88943 100644 --- a/drivers/gpu/drm/tegra/Makefile +++ b/drivers/gpu/drm/tegra/Makefile @@ -4,6 +4,8 @@ ccflags-$(CONFIG_DRM_TEGRA_DEBUG) += -DDEBUG tegra-drm-y := \ drm.o \ uapi/uapi.o \
- uapi/submit.o \
- uapi/gather_bo.o \ gem.o \ fb.o \ dp.o \
diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c index 6a51035ce33f..60eab403ae9b 100644 --- a/drivers/gpu/drm/tegra/drm.c +++ b/drivers/gpu/drm/tegra/drm.c @@ -737,6 +737,8 @@ static const struct drm_ioctl_desc tegra_drm_ioctls[] = { DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(TEGRA_CHANNEL_UNMAP, tegra_drm_ioctl_channel_unmap, DRM_RENDER_ALLOW),
- DRM_IOCTL_DEF_DRV(TEGRA_CHANNEL_SUBMIT, tegra_drm_ioctl_channel_submit,
DRM_IOCTL_DEF_DRV(TEGRA_GEM_CREATE, tegra_drm_ioctl_gem_create, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(TEGRA_GEM_MMAP, tegra_drm_ioctl_gem_mmap,DRM_RENDER_ALLOW),
diff --git a/drivers/gpu/drm/tegra/uapi/gather_bo.c b/drivers/gpu/drm/tegra/uapi/gather_bo.c new file mode 100644 index 000000000000..b487a0d44648 --- /dev/null +++ b/drivers/gpu/drm/tegra/uapi/gather_bo.c @@ -0,0 +1,86 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2020 NVIDIA Corporation */
+#include <linux/scatterlist.h> +#include <linux/slab.h>
+#include "gather_bo.h"
+static struct host1x_bo *gather_bo_get(struct host1x_bo *host_bo) +{
- struct gather_bo *bo = container_of(host_bo, struct gather_bo, base);
- kref_get(&bo->ref);
- return host_bo;
+}
+static void gather_bo_release(struct kref *ref) +{
- struct gather_bo *bo = container_of(ref, struct gather_bo, ref);
- kfree(bo->gather_data);
- kfree(bo);
+}
+void gather_bo_put(struct host1x_bo *host_bo) +{
- struct gather_bo *bo = container_of(host_bo, struct gather_bo, base);
- kref_put(&bo->ref, gather_bo_release);
+}
+static struct sg_table * +gather_bo_pin(struct device *dev, struct host1x_bo *host_bo, dma_addr_t *phys) +{
- struct gather_bo *bo = container_of(host_bo, struct gather_bo, base);
- struct sg_table *sgt;
- int err;
- if (phys) {
*phys = virt_to_phys(bo->gather_data);
return NULL;
- }
- sgt = kzalloc(sizeof(*sgt), GFP_KERNEL);
- if (!sgt)
return ERR_PTR(-ENOMEM);
- err = sg_alloc_table(sgt, 1, GFP_KERNEL);
- if (err) {
kfree(sgt);
return ERR_PTR(err);
- }
- sg_init_one(sgt->sgl, bo->gather_data, bo->gather_data_words*4);
- return sgt;
+}
+static void gather_bo_unpin(struct device *dev, struct sg_table *sgt) +{
- if (sgt) {
sg_free_table(sgt);
kfree(sgt);
- }
+}
+static void *gather_bo_mmap(struct host1x_bo *host_bo) +{
- struct gather_bo *bo = container_of(host_bo, struct gather_bo, base);
- return bo->gather_data;
+}
+static void gather_bo_munmap(struct host1x_bo *host_bo, void *addr) +{ +}
+const struct host1x_bo_ops gather_bo_ops = {
- .get = gather_bo_get,
- .put = gather_bo_put,
- .pin = gather_bo_pin,
- .unpin = gather_bo_unpin,
- .mmap = gather_bo_mmap,
- .munmap = gather_bo_munmap,
+}; diff --git a/drivers/gpu/drm/tegra/uapi/gather_bo.h b/drivers/gpu/drm/tegra/uapi/gather_bo.h new file mode 100644 index 000000000000..6b4c9d83ac91 --- /dev/null +++ b/drivers/gpu/drm/tegra/uapi/gather_bo.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (c) 2020 NVIDIA Corporation */
+#ifndef _TEGRA_DRM_SUBMIT_GATHER_BO_H +#define _TEGRA_DRM_SUBMIT_GATHER_BO_H
+#include <linux/host1x.h> +#include <linux/kref.h>
+struct gather_bo {
- struct host1x_bo base;
- struct kref ref;
- u32 *gather_data;
- size_t gather_data_words;
+};
+extern const struct host1x_bo_ops gather_bo_ops; +void gather_bo_put(struct host1x_bo *host_bo);
+#endif diff --git a/drivers/gpu/drm/tegra/uapi/submit.c b/drivers/gpu/drm/tegra/uapi/submit.c new file mode 100644 index 000000000000..398be3065e21 --- /dev/null +++ b/drivers/gpu/drm/tegra/uapi/submit.c @@ -0,0 +1,428 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2020 NVIDIA Corporation */
+#include <linux/dma-fence-array.h> +#include <linux/file.h> +#include <linux/host1x.h> +#include <linux/iommu.h> +#include <linux/kref.h> +#include <linux/list.h> +#include <linux/nospec.h> +#include <linux/pm_runtime.h> +#include <linux/sync_file.h>
+#include <drm/drm_drv.h> +#include <drm/drm_file.h>
+#include "../uapi.h" +#include "../drm.h" +#include "../gem.h"
+#include "gather_bo.h" +#include "submit.h"
+static struct tegra_drm_mapping * +tegra_drm_mapping_get(struct tegra_drm_channel_ctx *ctx, u32 id) +{
- struct tegra_drm_mapping *mapping;
- xa_lock(&ctx->mappings);
- mapping = xa_load(&ctx->mappings, id);
- if (mapping)
kref_get(&mapping->ref);
- xa_unlock(&ctx->mappings);
- return mapping;
+}
+static void *alloc_copy_user_array(void __user *from, size_t count, size_t size) +{
- unsigned long copy_err;
- size_t copy_len;
- void *data;
- if (check_mul_overflow(count, size, ©_len))
return ERR_PTR(-EINVAL);
- if (copy_len > 0x4000)
return ERR_PTR(-E2BIG);
- data = kvmalloc(copy_len, GFP_KERNEL);
- if (!data)
return ERR_PTR(-ENOMEM);
- copy_err = copy_from_user(data, from, copy_len);
- if (copy_err) {
kvfree(data);
return ERR_PTR(-EFAULT);
- }
- return data;
+}
+static int submit_copy_gather_data(struct drm_device *drm,
struct gather_bo **pbo,
struct drm_tegra_channel_submit *args)
+{
- unsigned long copy_err;
- struct gather_bo *bo;
- size_t copy_len;
- if (args->gather_data_words == 0) {
drm_info(drm, "gather_data_words can't be 0");
return -EINVAL;
- }
- if (check_mul_overflow((size_t)args->gather_data_words, (size_t)4, ©_len))
return -EINVAL;
- bo = kzalloc(sizeof(*bo), GFP_KERNEL);
- if (!bo)
return -ENOMEM;
- kref_init(&bo->ref);
- host1x_bo_init(&bo->base, &gather_bo_ops);
- bo->gather_data = kmalloc(copy_len, GFP_KERNEL | __GFP_NOWARN);
- if (!bo->gather_data) {
kfree(bo);
return -ENOMEM;
- }
- copy_err = copy_from_user(bo->gather_data,
u64_to_user_ptr(args->gather_data_ptr),
copy_len);
- if (copy_err) {
kfree(bo->gather_data);
kfree(bo);
return -EFAULT;
- }
- bo->gather_data_words = args->gather_data_words;
- *pbo = bo;
- return 0;
+}
+static int submit_write_reloc(struct gather_bo *bo,
struct drm_tegra_submit_buf *buf,
struct tegra_drm_mapping *mapping)
+{
- /* TODO check that target_offset is within bounds */
- dma_addr_t iova = mapping->iova + buf->reloc.target_offset;
- u32 written_ptr = (u32)(iova >> buf->reloc.shift);
+#ifdef CONFIG_ARM64
- if (buf->flags & DRM_TEGRA_SUBMIT_BUF_RELOC_BLOCKLINEAR)
written_ptr |= BIT(39);
+#endif
Sorry, but this still isn't correct. written_ptr is still only 32-bits wide, so your BIT(39) is going to get discarded even on 64-bit ARM. The idiomatic way to do this is to make written_ptr dma_addr_t and use a CONFIG_ARCH_DMA_ADDR_T_64BIT guard.
But even with that this looks wrong because you're OR'ing this in after shifting by buf->reloc.shift. Doesn't that OR it in at the wrong offset? Should you perhaps be doing this instead:
#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT if (buf->flags & DRM_TEGRA_SUBMIT_BUF_RELOC_BLOCKLINEAR) iova |= BIT(39); #endif
written_ptr = (u32)(iova >> buf->reloc_shift);
?
Also, on a side-note: BLOCKLINEAR really isn't the right term here. I recently dealt with this for display (though I haven't sent out that patch yet) and this is actually a bit that selects which sector layout swizzling is being applied. That's independent of block linear format and I think you can have different sector layouts irrespective of the block linear format (though I don't think that's usually done).
That said, I wonder if a better interface here would be to reuse format modifiers here. That would allow us to more fully describe the format of a surface in case we ever need it, and it already includes the sector layout information as well.
Thierry