[Beignet] [RFC PATCH 3/3] drm/i915: add SVM execbuf ioctl v13
Mika Kuoppala
mika.kuoppala at linux.intel.com
Mon Jan 9 16:52:54 UTC 2017
From: Jesse Barnes <jbarnes at virtuousgeek.org>
We just need to pass in an address to execute and some flags, since we
don't have to worry about buffer relocation or any of the other usual
stuff. Takes in a fance and returns a fence to be used for
synchronization.
v2: add a request after batch submission (Jesse)
v3: add a flag for fence creation (Chris)
v4: add CLOEXEC flag (Kristian)
add non-RCS ring support (Jesse)
v5: update for request alloc change (Jesse)
v6: new sync file interface, error paths, request breadcrumbs
v7: always CLOEXEC for sync_file_install
v8: rebase on new sync file api
v9: rework on top of fence requests and sync_file
v10: take fence ref for sync_file (Chris)
use correct flush (Chris)
limit exec on rcs
v11: allow exec on all engines
v12: dma_fence_put/get, fix error path on getting sync_file
v13: add in fences, removed superfluous dma_fence_put/get
Cc: Daniel Vetter <daniel.vetter at ffwll.ch>
Cc: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
Signed-off-by: Jesse Barnes <jbarnes at virtuousgeek.org> (v5)
Signed-off-by: Mika Kuoppala <mika.kuoppala at intel.com>
---
drivers/gpu/drm/i915/Kconfig | 1 +
drivers/gpu/drm/i915/i915_drv.c | 1 +
drivers/gpu/drm/i915/i915_drv.h | 2 +
drivers/gpu/drm/i915/i915_gem_execbuffer.c | 175 +++++++++++++++++++++++++++++
include/uapi/drm/i915_drm.h | 44 ++++++++
5 files changed, 223 insertions(+)
diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
index 183f5dc..387990d 100644
--- a/drivers/gpu/drm/i915/Kconfig
+++ b/drivers/gpu/drm/i915/Kconfig
@@ -8,6 +8,7 @@ config DRM_I915
# the shmem_readpage() which depends upon tmpfs
select SHMEM
select TMPFS
+ select SYNC_FILE
select DRM_KMS_HELPER
select DRM_PANEL
select DRM_MIPI_DSI
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 4d22b4b..2276ba6 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -2567,6 +2567,7 @@ static const struct drm_ioctl_desc i915_ioctls[] = {
DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_GETPARAM, i915_gem_context_getparam_ioctl, DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_SETPARAM, i915_gem_context_setparam_ioctl, DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(I915_PERF_OPEN, i915_perf_open_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(I915_GEM_EXEC_MM, i915_gem_exec_mm, DRM_RENDER_ALLOW),
};
static struct drm_driver driver = {
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index ca1eaaf..8436ea3 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3094,6 +3094,8 @@ int i915_gem_execbuffer(struct drm_device *dev, void *data,
struct drm_file *file_priv);
int i915_gem_execbuffer2(struct drm_device *dev, void *data,
struct drm_file *file_priv);
+int i915_gem_exec_mm(struct drm_device *dev, void *data,
+ struct drm_file *file);
int i915_gem_busy_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv);
int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index a5fe299..bb51ce4 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -29,6 +29,7 @@
#include <linux/dma_remapping.h>
#include <linux/reservation.h>
#include <linux/uaccess.h>
+#include <linux/sync_file.h>
#include <drm/drmP.h>
#include <drm/i915_drm.h>
@@ -1984,3 +1985,177 @@ i915_gem_execbuffer2(struct drm_device *dev, void *data,
drm_free_large(exec2_list);
return ret;
}
+
+static struct intel_engine_cs *
+exec_mm_select_engine(const struct drm_i915_private *dev_priv,
+ const struct drm_i915_exec_mm *exec_mm)
+{
+ const unsigned int user_ring_id = exec_mm->ring_id;
+ struct intel_engine_cs *engine;
+
+ if (user_ring_id > I915_USER_RINGS) {
+ DRM_DEBUG("exec_mm with unknown ring: %u\n", user_ring_id);
+ return NULL;
+ }
+
+ engine = dev_priv->engine[user_ring_map[user_ring_id]];
+
+ if (!engine) {
+ DRM_DEBUG("exec_mm with invalid engine: %u\n", user_ring_id);
+ return NULL;
+ }
+
+ return engine;
+}
+
+static int do_exec_mm(struct drm_i915_exec_mm *exec_mm,
+ struct drm_i915_gem_request *req,
+ const u32 flags)
+{
+ struct sync_file *out_fence = NULL;
+ struct dma_fence *in_fence = NULL;
+ int out_fence_fd = -1;
+ int ret;
+
+ if (flags & I915_EXEC_MM_FENCE_IN) {
+ in_fence = sync_file_get_fence(exec_mm->in_fence_fd);
+ if (!in_fence)
+ return -EINVAL;
+
+ ret = i915_gem_request_await_dma_fence(req, in_fence);
+ if (ret < 0)
+ goto err_out;
+ }
+
+ if (flags & I915_EXEC_MM_FENCE_OUT) {
+ out_fence = sync_file_create(&req->fence);
+ if (!out_fence) {
+ DRM_DEBUG_DRIVER("sync_file_create failed\n");
+ ret = -ENOMEM;
+ goto err_out;
+ }
+
+ out_fence_fd = get_unused_fd_flags(O_CLOEXEC);
+ if (out_fence_fd < 0) {
+ ret = out_fence_fd;
+ out_fence_fd = -1;
+ goto err_out;
+ }
+
+ fd_install(out_fence_fd, out_fence->file);
+ exec_mm->out_fence_fd = out_fence_fd;
+ }
+
+ ret = req->engine->emit_flush(req, EMIT_INVALIDATE);
+ if (ret) {
+ DRM_DEBUG_DRIVER("engine flush failed: %d\n", ret);
+ goto err_out;
+ }
+
+ ret = req->engine->emit_bb_start(req, exec_mm->batch_ptr, 0, 0);
+ if (ret) {
+ DRM_DEBUG_DRIVER("engine dispatch execbuf failed: %d\n", ret);
+ goto err_out;
+ }
+
+ return 0;
+
+err_out:
+ if (out_fence_fd != -1)
+ put_unused_fd(out_fence_fd);
+
+ if (out_fence)
+ fput(out_fence->file);
+
+ if (in_fence)
+ dma_fence_put(in_fence);
+
+ return ret;
+}
+
+int i915_gem_exec_mm(struct drm_device *dev, void *data, struct drm_file *file)
+{
+ struct drm_i915_private *dev_priv = to_i915(dev);
+ struct drm_i915_exec_mm *exec_mm = data;
+ struct intel_engine_cs *engine;
+ struct i915_gem_context *ctx;
+ struct drm_i915_gem_request *req;
+ const u32 ctx_id = exec_mm->ctx_id;
+ const u32 flags = exec_mm->flags;
+ int ret;
+
+ if (exec_mm->batch_ptr & 3) {
+ DRM_DEBUG_DRIVER("ptr not 4-byt aligned %llu\n",
+ exec_mm->batch_ptr);
+ return -EINVAL;
+ }
+
+ if (flags & ~(I915_EXEC_MM_FENCE_OUT | I915_EXEC_MM_FENCE_IN)) {
+ DRM_DEBUG_DRIVER("bad flags: 0x%08x\n", flags);
+ return -EINVAL;
+ }
+
+ if (exec_mm->pad != 0) {
+ DRM_DEBUG_DRIVER("bad pad: 0x%08x\n", exec_mm->pad);
+ return -EINVAL;
+ }
+
+ if (file == NULL)
+ return -EINVAL;
+
+ engine = exec_mm_select_engine(dev_priv, exec_mm);
+ if (!engine)
+ return -EINVAL;
+
+ intel_runtime_pm_get(dev_priv);
+
+ ret = i915_mutex_lock_interruptible(dev);
+ if (ret) {
+ DRM_ERROR("mutex interrupted\n");
+ goto pm_put;
+ }
+
+ ctx = i915_gem_validate_context(dev, file, engine, ctx_id);
+ if (IS_ERR(ctx)) {
+ ret = PTR_ERR(ctx);
+ DRM_DEBUG_DRIVER("couldn't get context\n");
+ goto unlock;
+ }
+
+ if (!i915_gem_context_is_svm(ctx)) {
+ ret = -EINVAL;
+ DRM_DEBUG_DRIVER("context is not SVM enabled\n");
+ goto unlock;
+ }
+
+ i915_gem_context_get(ctx);
+
+ req = i915_gem_request_alloc(engine, ctx);
+ if (IS_ERR(req)) {
+ ret = PTR_ERR(req);
+ goto ctx_put;
+ }
+
+ ret = i915_gem_request_add_to_client(req, file);
+ if (ret) {
+ DRM_DEBUG_DRIVER("failed to add request to client, %d\n", ret);
+ goto add_request;
+ }
+
+ /* If we fail here, we still need to submit the req */
+ ret = do_exec_mm(exec_mm, req, flags);
+
+add_request:
+ i915_add_request(req);
+
+ctx_put:
+ i915_gem_context_put(ctx);
+
+unlock:
+ mutex_unlock(&dev->struct_mutex);
+
+pm_put:
+ intel_runtime_pm_put(dev_priv);
+
+ return ret;
+}
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 5b06ab1..843f943 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -259,6 +259,7 @@ typedef struct _drm_i915_sarea {
#define DRM_I915_GEM_CONTEXT_GETPARAM 0x34
#define DRM_I915_GEM_CONTEXT_SETPARAM 0x35
#define DRM_I915_PERF_OPEN 0x36
+#define DRM_I915_GEM_EXEC_MM 0x37
#define DRM_IOCTL_I915_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
#define DRM_IOCTL_I915_FLUSH DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
@@ -313,6 +314,7 @@ typedef struct _drm_i915_sarea {
#define DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_GETPARAM, struct drm_i915_gem_context_param)
#define DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_SETPARAM, struct drm_i915_gem_context_param)
#define DRM_IOCTL_I915_PERF_OPEN DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_OPEN, struct drm_i915_perf_open_param)
+#define DRM_IOCTL_I915_GEM_EXEC_MM DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_EXEC_MM, struct drm_i915_exec_mm)
/* Allow drivers to submit batchbuffers directly to hardware, relying
* on the security mechanisms provided by hardware.
@@ -1363,6 +1365,48 @@ enum drm_i915_perf_record_type {
DRM_I915_PERF_RECORD_MAX /* non-ABI */
};
+/**
+ * drm_i915_exec_mm - shared address space execbuf
+ * @batch_ptr: address of batch buffer (in context's CPU address space)
+ * @ctx_id: context to use for execution, must be svm enabled
+ * @ring_id: ring to which this context will be submitted, see execbuffer2
+ * @flags: used to signal if in/out fences should be used
+ * @out_fence_fd: returned fence handle of out fence you can wait on
+ * @in_fence_fd: given fence handle of fence the gpu will wait for
+ * @pad: padding, must be zero
+ *
+ * This simplified execbuf just executes an MI_BATCH_BUFFER_START at
+ * @batch_ptr using @ctx_id as the context. The context will indicate
+ * which address space the @batch_ptr will use.
+ *
+ * Note @batch_ptr must be dword aligned.
+ *
+ * By default, the kernel will simply execute the address given on the GPU.
+ *
+ * If the %I915_EXEC_MM_FENCE_OUT flag is passed in the @flags field however,
+ * the kernel will return a sync_file (dma_fence) object in @out_fence_fd for
+ * the caller to use to synchronize execution of the passed batch.
+ *
+ * If the %I915_EXEC_MM_FENCE_IN flag is passed in the @flags field,
+ * the kernel will wait until the fence (dma_fence) object passed in
+ * @in_fence_fd to complete before submitting batch to gpu.
+ *
+ */
+struct drm_i915_exec_mm {
+ __u64 batch_ptr;
+ __u32 ctx_id;
+ __u32 ring_id; /* see execbuffer2 ring flags */
+
+#define I915_EXEC_MM_FENCE_OUT (1<<0)
+#define I915_EXEC_MM_FENCE_IN (1<<1)
+ __u32 flags;
+
+ __u32 out_fence_fd;
+ __u32 in_fence_fd;
+
+ __u32 pad;
+};
+
#if defined(__cplusplus)
}
#endif
--
2.7.4
More information about the Beignet
mailing list