[Intel-gfx] [RFC PATCH 3/3] drm/i915: add SVM execbuf ioctl v13

Tue Feb 7 12:11:41 UTC 2017

Chris Wilson <chris at chris-wilson.co.uk> writes:

> On Mon, Jan 09, 2017 at 06:52:54PM +0200, Mika Kuoppala wrote:
>> From: Jesse Barnes <jbarnes at virtuousgeek.org>
>> 
>> We just need to pass in an address to execute and some flags, since we
>> don't have to worry about buffer relocation or any of the other usual
>> stuff.  Takes in a fance and returns a fence to be used for
>> synchronization.
>> 
>> v2: add a request after batch submission (Jesse)
>> v3: add a flag for fence creation (Chris)
>> v4: add CLOEXEC flag (Kristian)
>>     add non-RCS ring support (Jesse)
>> v5: update for request alloc change (Jesse)
>> v6: new sync file interface, error paths, request breadcrumbs
>> v7: always CLOEXEC for sync_file_install
>> v8: rebase on new sync file api
>> v9: rework on top of fence requests and sync_file
>> v10: take fence ref for sync_file (Chris)
>>      use correct flush (Chris)
>>      limit exec on rcs
>> v11: allow exec on all engines
>> v12: dma_fence_put/get, fix error path on getting sync_file
>> v13: add in fences, removed superfluous dma_fence_put/get
>> 
>> Cc: Daniel Vetter <daniel.vetter at ffwll.ch>
>> Cc: Chris Wilson <chris at chris-wilson.co.uk>
>> Cc: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
>> Signed-off-by: Jesse Barnes <jbarnes at virtuousgeek.org> (v5)
>> Signed-off-by: Mika Kuoppala <mika.kuoppala at intel.com>
>> ---
>>  drivers/gpu/drm/i915/Kconfig               |   1 +
>>  drivers/gpu/drm/i915/i915_drv.c            |   1 +
>>  drivers/gpu/drm/i915/i915_drv.h            |   2 +
>>  drivers/gpu/drm/i915/i915_gem_execbuffer.c | 175 +++++++++++++++++++++++++++++
>>  include/uapi/drm/i915_drm.h                |  44 ++++++++
>>  5 files changed, 223 insertions(+)
>> 
>> diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
>> index 183f5dc..387990d 100644
>> --- a/drivers/gpu/drm/i915/Kconfig
>> +++ b/drivers/gpu/drm/i915/Kconfig
>> @@ -8,6 +8,7 @@ config DRM_I915
>>  	# the shmem_readpage() which depends upon tmpfs
>>  	select SHMEM
>>  	select TMPFS
>> +	select SYNC_FILE
>
> Duplicate.
>
>>  	select DRM_KMS_HELPER
>>  	select DRM_PANEL
>>  	select DRM_MIPI_DSI
>> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
>> index 4d22b4b..2276ba6 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.c
>> +++ b/drivers/gpu/drm/i915/i915_drv.c
>> @@ -2567,6 +2567,7 @@ static const struct drm_ioctl_desc i915_ioctls[] = {
>>  	DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_GETPARAM, i915_gem_context_getparam_ioctl, DRM_RENDER_ALLOW),
>>  	DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_SETPARAM, i915_gem_context_setparam_ioctl, DRM_RENDER_ALLOW),
>>  	DRM_IOCTL_DEF_DRV(I915_PERF_OPEN, i915_perf_open_ioctl, DRM_RENDER_ALLOW),
>> +	DRM_IOCTL_DEF_DRV(I915_GEM_EXEC_MM, i915_gem_exec_mm, DRM_RENDER_ALLOW),
>>  };
>>  
>>  static struct drm_driver driver = {
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>> index ca1eaaf..8436ea3 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -3094,6 +3094,8 @@ int i915_gem_execbuffer(struct drm_device *dev, void *data,
>>  			struct drm_file *file_priv);
>>  int i915_gem_execbuffer2(struct drm_device *dev, void *data,
>>  			 struct drm_file *file_priv);
>> +int i915_gem_exec_mm(struct drm_device *dev, void *data,
>> +		     struct drm_file *file);
>>  int i915_gem_busy_ioctl(struct drm_device *dev, void *data,
>>  			struct drm_file *file_priv);
>>  int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
>> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
>> index a5fe299..bb51ce4 100644
>> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
>> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
>> @@ -29,6 +29,7 @@
>>  #include <linux/dma_remapping.h>
>>  #include <linux/reservation.h>
>>  #include <linux/uaccess.h>
>> +#include <linux/sync_file.h>
>
> Out of date.

Looking at the current code, it is duplicate as the explicit fencing
prolly brought it in.

>>  #include <drm/drmP.h>
>>  #include <drm/i915_drm.h>
>> @@ -1984,3 +1985,177 @@ i915_gem_execbuffer2(struct drm_device *dev, void *data,
>>  	drm_free_large(exec2_list);
>>  	return ret;
>>  }
>> +
>> +static struct intel_engine_cs *
>> +exec_mm_select_engine(const struct drm_i915_private *dev_priv,
>> +		      const struct drm_i915_exec_mm *exec_mm)
>> +{
>> +	const unsigned int user_ring_id = exec_mm->ring_id;
>> +	struct intel_engine_cs *engine;
>> +
>> +	if (user_ring_id > I915_USER_RINGS) {
>> +		DRM_DEBUG("exec_mm with unknown ring: %u\n", user_ring_id);
>> +		return NULL;
>> +	}
>> +
>> +	engine = dev_priv->engine[user_ring_map[user_ring_id]];
>> +
>> +	if (!engine) {
>> +		DRM_DEBUG("exec_mm with invalid engine: %u\n", user_ring_id);
>> +		return NULL;
>> +	}
>> +
>> +	return engine;
>> +}
>
> I would rather we reuse eb_select_engine(). We know we have to fix the
> ABI anyway, so might as well kill 2 birds with one stone.
>

I am still hesistant about this. Tried but this would expose the exec
svm flags to similar kind of trickery for BSD ring.

>> +static int do_exec_mm(struct drm_i915_exec_mm *exec_mm,
>> +		      struct drm_i915_gem_request *req,
>> +		      const u32 flags)
>> +{
>> +	struct sync_file *out_fence = NULL;
>> +	struct dma_fence *in_fence = NULL;
>> +	int out_fence_fd = -1;
>> +	int ret;
>> +
>> +	if (flags & I915_EXEC_MM_FENCE_IN) {
>> +		in_fence = sync_file_get_fence(exec_mm->in_fence_fd);
>> +		if (!in_fence)
>> +			return -EINVAL;
>> +
>> +		ret = i915_gem_request_await_dma_fence(req, in_fence);
>
> You can drop the fence ref immediately.

So closely mimiced from explicit fences that execbuf path
might be able to do the same :)

>
> 		dma_fence_put(in_fence);
>> +		if (ret < 0)
>> +			goto err_out;
>> +	}
>> +
>> +	if (flags & I915_EXEC_MM_FENCE_OUT) {
>> +		out_fence = sync_file_create(&req->fence);
>> +		if (!out_fence) {
>> +			DRM_DEBUG_DRIVER("sync_file_create failed\n");
>> +			ret = -ENOMEM;
>> +			goto err_out;
>> +		}
>> +
>> +		out_fence_fd = get_unused_fd_flags(O_CLOEXEC);
>> +		if (out_fence_fd < 0) {
>> +			ret = out_fence_fd;
>> +			out_fence_fd = -1;
>> +			goto err_out;
>> +		}
>> +
>> +		fd_install(out_fence_fd, out_fence->file);
>> +		exec_mm->out_fence_fd = out_fence_fd;
>
> Try not to set before success.
>
>> +	ret = req->engine->emit_flush(req, EMIT_INVALIDATE);
>> +	if (ret) {
>> +		DRM_DEBUG_DRIVER("engine flush failed: %d\n", ret);
>> +		goto err_out;
>> +	}
>> +
>> +	ret = req->engine->emit_bb_start(req, exec_mm->batch_ptr, 0, 0);
>> +	if (ret) {
>> +		DRM_DEBUG_DRIVER("engine dispatch execbuf failed: %d\n", ret);
>> +		goto err_out;
>> +	}
>> +
>> +	return 0;
>> +
>> +err_out:
>> +	if (out_fence_fd != -1)
>> +		put_unused_fd(out_fence_fd);
>> +
>> +	if (out_fence)
>> +		fput(out_fence->file);
>> +
>> +	if (in_fence)
>> +		dma_fence_put(in_fence);
>> +
>> +	return ret;
>> +}
>> +
>> +int i915_gem_exec_mm(struct drm_device *dev, void *data, struct drm_file *file)
>
> Might as call this exec_svm. Unless you have immediate plans to
> generalise.
>

I renamed everywhere s/mm/svm

>> +{
>> +	struct drm_i915_private *dev_priv = to_i915(dev);
>> +	struct drm_i915_exec_mm *exec_mm = data;
>> +	struct intel_engine_cs *engine;
>> +	struct i915_gem_context *ctx;
>> +	struct drm_i915_gem_request *req;
>> +	const u32 ctx_id = exec_mm->ctx_id;
>> +	const u32 flags = exec_mm->flags;
>> +	int ret;
>> +
>> +	if (exec_mm->batch_ptr & 3) {
>> +		DRM_DEBUG_DRIVER("ptr not 4-byt aligned %llu\n",
>> +				 exec_mm->batch_ptr);
>> +		return -EINVAL;
>> +	}
>
> Where does access_ok() get performed? A little spiel on the mm validation
> would help here if it is all done for us by svm.
>

Passing in length wouldn't help much as the refs inside the bb
would also need to be checked?

>> +	if (flags & ~(I915_EXEC_MM_FENCE_OUT | I915_EXEC_MM_FENCE_IN)) {
>> +		DRM_DEBUG_DRIVER("bad flags: 0x%08x\n", flags);
>> +		return -EINVAL;
>> +	}
>> +
>> +	if (exec_mm->pad != 0) {
>> +		DRM_DEBUG_DRIVER("bad pad: 0x%08x\n", exec_mm->pad);
>> +		return -EINVAL;
>> +	}
>> +
>> +	if (file == NULL)
>> +		return -EINVAL;
>
> ? That's not a user error (EINVAL), that's a kaboom!
>
>> +
>> +	engine = exec_mm_select_engine(dev_priv, exec_mm);
>> +	if (!engine)
>> +		return -EINVAL;
>> +
>> +	intel_runtime_pm_get(dev_priv);
>> +
>> +	ret = i915_mutex_lock_interruptible(dev);
>> +	if (ret) {
>> +		DRM_ERROR("mutex interrupted\n");
>> +		goto pm_put;
>> +	}
>> +
>> +	ctx = i915_gem_validate_context(dev, file, engine, ctx_id);
>> +	if (IS_ERR(ctx)) {
>> +		ret = PTR_ERR(ctx);
>> +		DRM_DEBUG_DRIVER("couldn't get context\n");
>> +		goto unlock;
>> +	}
>> +
>> +	if (!i915_gem_context_is_svm(ctx)) {
>> +		ret = -EINVAL;
>> +		DRM_DEBUG_DRIVER("context is not SVM enabled\n");
>> +		goto unlock;
>> +	}
>> +
>> +	i915_gem_context_get(ctx);
>> +
>> +	req = i915_gem_request_alloc(engine, ctx);
>> +	if (IS_ERR(req)) {
>> +		ret = PTR_ERR(req);
>> +		goto ctx_put;
>> +	}
>> +
>> +	ret = i915_gem_request_add_to_client(req, file);
>> +	if (ret) {
>> +		DRM_DEBUG_DRIVER("failed to add request to client, %d\n", ret);
>> +		goto add_request;
>> +	}
>> +
>> +	/* If we fail here, we still need to submit the req */
>> +	ret = do_exec_mm(exec_mm, req, flags);
>> +
>> +add_request:
>> +	i915_add_request(req);
>
> Do the same as execbuf just for consistency (i.e.
> __i915_add_request(req, ret == 0)).
>
> What's the story for i915_gpu_error.c?
>

Work in progress.

>> +
>> +ctx_put:
>> +	i915_gem_context_put(ctx);
>> +
>> +unlock:
>> +	mutex_unlock(&dev->struct_mutex);
>> +
>> +pm_put:
>> +	intel_runtime_pm_put(dev_priv);
>> +
>> +	return ret;
>> +}
>> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
>> index 5b06ab1..843f943 100644
>> --- a/include/uapi/drm/i915_drm.h
>> +++ b/include/uapi/drm/i915_drm.h
>> @@ -259,6 +259,7 @@ typedef struct _drm_i915_sarea {
>>  #define DRM_I915_GEM_CONTEXT_GETPARAM	0x34
>>  #define DRM_I915_GEM_CONTEXT_SETPARAM	0x35
>>  #define DRM_I915_PERF_OPEN		0x36
>> +#define DRM_I915_GEM_EXEC_MM		0x37
>>  
>>  #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
>>  #define DRM_IOCTL_I915_FLUSH		DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
>> @@ -313,6 +314,7 @@ typedef struct _drm_i915_sarea {
>>  #define DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM	DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_GETPARAM, struct drm_i915_gem_context_param)
>>  #define DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM	DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_SETPARAM, struct drm_i915_gem_context_param)
>>  #define DRM_IOCTL_I915_PERF_OPEN	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_OPEN, struct drm_i915_perf_open_param)
>> +#define DRM_IOCTL_I915_GEM_EXEC_MM			DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_EXEC_MM, struct drm_i915_exec_mm)
>>  
>>  /* Allow drivers to submit batchbuffers directly to hardware, relying
>>   * on the security mechanisms provided by hardware.
>> @@ -1363,6 +1365,48 @@ enum drm_i915_perf_record_type {
>>  	DRM_I915_PERF_RECORD_MAX /* non-ABI */
>>  };
>>  
>> +/**
>> + * drm_i915_exec_mm - shared address space execbuf
>> + * @batch_ptr: address of batch buffer (in context's CPU address space)
>> + * @ctx_id: context to use for execution, must be svm enabled
>> + * @ring_id: ring to which this context will be submitted, see execbuffer2
>> + * @flags: used to signal if in/out fences should be used
>> + * @out_fence_fd: returned fence handle of out fence you can wait on
>> + * @in_fence_fd: given fence handle of fence the gpu will wait for
>> + * @pad: padding, must be zero
>> + *
>> + * This simplified execbuf just executes an MI_BATCH_BUFFER_START at
>> + * @batch_ptr using @ctx_id as the context.  The context will indicate
>> + * which address space the @batch_ptr will use.
>> + *
>> + * Note @batch_ptr must be dword aligned.
>> + *
>> + * By default, the kernel will simply execute the address given on the GPU.
>> + *
>> + * If the %I915_EXEC_MM_FENCE_OUT flag is passed in the @flags field however,
>> + * the kernel will return a sync_file (dma_fence) object in @out_fence_fd for
>> + * the caller to use to synchronize execution of the passed batch.
>> + *
>> + * If the %I915_EXEC_MM_FENCE_IN flag is passed in the @flags field,
>> + * the kernel will wait until the fence (dma_fence) object passed in
>> + * @in_fence_fd to complete before submitting batch to gpu.
>> + *
>> + */
>> +struct drm_i915_exec_mm {
>> +	__u64 batch_ptr;
>> +	__u32 ctx_id;
>> +	__u32 ring_id; /* see execbuffer2 ring flags */
>> +
>> +#define I915_EXEC_MM_FENCE_OUT (1<<0)
>> +#define I915_EXEC_MM_FENCE_IN  (1<<1)
>> +	__u32 flags;
>> +
>> +	__u32 out_fence_fd;
>> +	__u32 in_fence_fd;
>
> In then out just makes more sense to mo!
>> +
>> +	__u32 pad;
>
> If you use u64 flags (as the second parameter) we can put this pad to
> good use.
>

I have addressed everything with the exception where I have
stated otherwise.

Thank you for the feedback,
-Mika

>> +};
>> +
>>  #if defined(__cplusplus)
>>  }
>>  #endif
>> -- 
>> 2.7.4
>> 
>
> -- 
> Chris Wilson, Intel Open Source Technology Centre