[Intel-gfx] [PATCH 13/13] drm/i915: Support explicit fencing for execbuf

John Harrison John.C.Harrison at Intel.com
Fri Aug 26 15:29:57 UTC 2016


On 26/08/2016 16:08, John Harrison wrote:
> On 25/08/2016 10:08, Chris Wilson wrote:
>> Now that the user can opt-out of implicit fencing, we need to give them
>> back control over the fencing. We employ sync_file to wrap our
>> drm_i915_gem_request and provide an fd that userspace can merge with
>> other sync_file fds and pass back to the kernel to wait upon before
>> future execution.
It is worth mentioning in the description that this isn't just useful 
for pushing the synchronisation down to user land. It also allows 
synchronisation with non-rendering operations. E.g. submit a batch 
buffer for delayed execution when some external process (e.g. video 
capture card, other CPU thread, ...) has finished supplying the source 
data. Android also uses it for generating flip chains - submit rendering 
that waits on SurfaceFlinger page flip -> submit page flip that waits on 
render -> submit render that waits on page flip -> ...

Note also that with in order only request execution, it also allows the 
driver to be completely blocked on events that are outside of its control.


>>
>> Testcase: igt/gem_exec_fence
The new IGT is not included as part of this patch set?

>> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
>> ---
>>   drivers/gpu/drm/i915/Kconfig               |  1 +
>>   drivers/gpu/drm/i915/i915_drv.c            |  5 +-
>>   drivers/gpu/drm/i915/i915_gem_execbuffer.c | 79 
>> ++++++++++++++++++++++++++++--
>>   include/uapi/drm/i915_drm.h                |  8 ++-
>>   4 files changed, 86 insertions(+), 7 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
>> index 7769e469118f..319ca27ea719 100644
>> --- a/drivers/gpu/drm/i915/Kconfig
>> +++ b/drivers/gpu/drm/i915/Kconfig
>> @@ -18,6 +18,7 @@ config DRM_I915
>>       select INPUT if ACPI
>>       select ACPI_VIDEO if ACPI
>>       select ACPI_BUTTON if ACPI
>> +    select SYNC_FILE
>>       help
>>         Choose this option if you have a system that has "Intel Graphics
>>         Media Accelerator" or "HD Graphics" integrated graphics,
>> diff --git a/drivers/gpu/drm/i915/i915_drv.c 
>> b/drivers/gpu/drm/i915/i915_drv.c
>> index 3eecfef8bcca..9460bfff360f 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.c
>> +++ b/drivers/gpu/drm/i915/i915_drv.c
>> @@ -358,6 +358,9 @@ static int i915_getparam(struct drm_device *dev, 
>> void *data,
>>       case I915_PARAM_HAS_EXEC_ASYNC:
>>           value = 1;
>>           break;
>> +    case I915_PARAM_HAS_EXEC_FENCE:
>> +        value = 1;
>> +        break;
>>       default:
>>           DRM_DEBUG("Unknown parameter %d\n", param->param);
>>           return -EINVAL;
>> @@ -2547,7 +2550,7 @@ static const struct drm_ioctl_desc 
>> i915_ioctls[] = {
>>       DRM_IOCTL_DEF_DRV(I915_HWS_ADDR, drm_noop, 
>> DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
>>       DRM_IOCTL_DEF_DRV(I915_GEM_INIT, drm_noop, 
>> DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
>>       DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER, i915_gem_execbuffer, 
>> DRM_AUTH),
>> -    DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER2, i915_gem_execbuffer2, 
>> DRM_AUTH|DRM_RENDER_ALLOW),
>> +    DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER2_WR, i915_gem_execbuffer2, 
>> DRM_AUTH|DRM_RENDER_ALLOW),
>>       DRM_IOCTL_DEF_DRV(I915_GEM_PIN, i915_gem_reject_pin_ioctl, 
>> DRM_AUTH|DRM_ROOT_ONLY),
>>       DRM_IOCTL_DEF_DRV(I915_GEM_UNPIN, i915_gem_reject_pin_ioctl, 
>> DRM_AUTH|DRM_ROOT_ONLY),
>>       DRM_IOCTL_DEF_DRV(I915_GEM_BUSY, i915_gem_busy_ioctl, 
>> DRM_AUTH|DRM_RENDER_ALLOW),
>> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
>> b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
>> index a0f46293e492..8701b8bb46fc 100644
>> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
>> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
>> @@ -28,6 +28,7 @@
>>     #include <linux/dma_remapping.h>
>>   #include <linux/reservation.h>
>> +#include <linux/sync_file.h>
>>   #include <linux/uaccess.h>
>>     #include <drm/drmP.h>
>> @@ -1686,6 +1687,33 @@ eb_select_engine(struct drm_i915_private 
>> *dev_priv,
>>       return engine;
>>   }
>>   +static int eb_add_fence(struct drm_i915_gem_request *req, struct 
>> fence *fence)
>> +{
>> +    struct fence_array *array;
>> +    int ret, i;
>> +
>> +    if (fence_is_i915(fence))
>> +        return __eb_sync(req, to_request(fence));
>> +
>> +    if (!fence_is_array(fence))
>> +        return i915_sw_fence_await_dma_fence(&req->submit, fence, 
>> GFP_KERNEL);
>> +
>> +    array = to_fence_array(fence);
>> +    for (i = 0; i < array->num_fences; i++) {
>> +        struct fence *child = array->fences[i];
>> +
>> +        if (fence_is_i915(child))
>> +            ret = __eb_sync(req, to_request(child));
>> +        else
>> +            ret = i915_sw_fence_await_dma_fence(&req->submit,
>> +                                child, GFP_KERNEL);
>> +        if (ret < 0)
>> +            return ret;
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>>   static int
>>   i915_gem_do_execbuffer(struct drm_device *dev, void *data,
>>                  struct drm_file *file,
>> @@ -1703,6 +1731,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, 
>> void *data,
>>       struct i915_execbuffer_params *params = &params_master;
>>       const u32 ctx_id = i915_execbuffer2_get_context_id(*args);
>>       u32 dispatch_flags;
>> +    struct fence *in_fence = NULL;
>> +    struct sync_file *out_fence = NULL;
>> +    int out_fence_fd = -1;
>>       int ret;
>>       bool need_relocs;
>>   @@ -1746,6 +1777,23 @@ i915_gem_do_execbuffer(struct drm_device 
>> *dev, void *data,
>>           dispatch_flags |= I915_DISPATCH_RS;
>>       }
>>   +    if (args->flags & I915_EXEC_FENCE_IN) {
>> +        in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
>> +        if (!in_fence) {
>> +            ret = -EINVAL;
>> +            goto pre_mutex_err;
>> +        }
>> +    }
>> +
>> +    if (args->flags & I915_EXEC_FENCE_OUT) {
>> +        out_fence_fd = get_unused_fd_flags(O_CLOEXEC);
>> +        if (out_fence_fd < 0) {
>> +            ret = out_fence_fd;
>> +            out_fence_fd = -1;
>> +            goto pre_mutex_err;
>> +        }
>> +    }
>> +
>>       /* Take a local wakeref for preparing to dispatch the execbuf as
>>        * we expect to access the hardware fairly frequently in the
>>        * process. Upon first dispatch, we acquire another prolonged
>> @@ -1890,6 +1938,20 @@ i915_gem_do_execbuffer(struct drm_device *dev, 
>> void *data,
>>           goto err_batch_unpin;
>>       }
>>   +    if (in_fence) {
>> +        ret = eb_add_fence(params->request, in_fence);
>> +        if (ret < 0)
>> +            goto err_request;
>> +    }
>> +
>> +    if (out_fence_fd != -1) {
>> +        out_fence = 
>> sync_file_create(fence_get(&params->request->fence));
>> +        if (!out_fence) {
>> +            ret = -ENOMEM;
>> +            goto err_request;
>> +        }
>> +    }
>> +
>>       /* Whilst this request exists, batch_obj will be on the
>>        * active_list, and so will hold the active reference. Only 
>> when this
>>        * request is retired will the the batch_obj be moved onto the
>> @@ -1917,6 +1979,15 @@ i915_gem_do_execbuffer(struct drm_device *dev, 
>> void *data,
>>       ret = execbuf_submit(params, args, &eb->vmas);
>>   err_request:
>>       __i915_add_request(params->request, ret == 0);
>> +    if (out_fence) {
>> +        if (ret == 0) {
>> +            fd_install(out_fence_fd, out_fence->file);
>> +            args->rsvd2 &= ~(u64)0xffffffff << 32;
>> +            args->rsvd2 |= (u64)out_fence_fd << 32;
>> +            out_fence_fd = -1;
>> +        } else
>> +            fput(out_fence->file);
>> +    }
>>     err_batch_unpin:
>>       /*
>> @@ -1938,6 +2009,9 @@ pre_mutex_err:
>>       /* intel_gpu_busy should also get a ref, so it will free when 
>> the device
>>        * is really idle. */
>>       intel_runtime_pm_put(dev_priv);
>> +    if (out_fence_fd != -1)
>> +        put_unused_fd(out_fence_fd);
>> +    fence_put(in_fence);
>>       return ret;
>>   }
>>   @@ -2045,11 +2119,6 @@ i915_gem_execbuffer2(struct drm_device *dev, 
>> void *data,
>>           return -EINVAL;
>>       }
>>   -    if (args->rsvd2 != 0) {
>> -        DRM_DEBUG("dirty rvsd2 field\n");
>> -        return -EINVAL;
>> -    }
>> -
>>       exec2_list = drm_malloc_gfp(args->buffer_count,
>>                       sizeof(*exec2_list),
>>                       GFP_TEMPORARY);
>> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
>> index 75f9cab23249..9fe670cc7bf3 100644
>> --- a/include/uapi/drm/i915_drm.h
>> +++ b/include/uapi/drm/i915_drm.h
>> @@ -246,6 +246,7 @@ typedef struct _drm_i915_sarea {
>>   #define DRM_I915_OVERLAY_PUT_IMAGE    0x27
>>   #define DRM_I915_OVERLAY_ATTRS    0x28
>>   #define DRM_I915_GEM_EXECBUFFER2    0x29
>> +#define DRM_I915_GEM_EXECBUFFER2_WR    DRM_I915_GEM_EXECBUFFER2
>>   #define DRM_I915_GET_SPRITE_COLORKEY    0x2a
>>   #define DRM_I915_SET_SPRITE_COLORKEY    0x2b
>>   #define DRM_I915_GEM_WAIT    0x2c
>> @@ -279,6 +280,7 @@ typedef struct _drm_i915_sarea {
>>   #define DRM_IOCTL_I915_GEM_INIT DRM_IOW(DRM_COMMAND_BASE + 
>> DRM_I915_GEM_INIT, struct drm_i915_gem_init)
>>   #define DRM_IOCTL_I915_GEM_EXECBUFFER DRM_IOW(DRM_COMMAND_BASE + 
>> DRM_I915_GEM_EXECBUFFER, struct drm_i915_gem_execbuffer)
>>   #define DRM_IOCTL_I915_GEM_EXECBUFFER2 DRM_IOW(DRM_COMMAND_BASE + 
>> DRM_I915_GEM_EXECBUFFER2, struct drm_i915_gem_execbuffer2)
>> +#define DRM_IOCTL_I915_GEM_EXECBUFFER2_WR DRM_IOWR(DRM_COMMAND_BASE 
>> + DRM_I915_GEM_EXECBUFFER2_WR, struct drm_i915_gem_execbuffer2)
>>   #define DRM_IOCTL_I915_GEM_PIN DRM_IOWR(DRM_COMMAND_BASE + 
>> DRM_I915_GEM_PIN, struct drm_i915_gem_pin)
>>   #define DRM_IOCTL_I915_GEM_UNPIN    DRM_IOW(DRM_COMMAND_BASE + 
>> DRM_I915_GEM_UNPIN, struct drm_i915_gem_unpin)
>>   #define DRM_IOCTL_I915_GEM_BUSY DRM_IOWR(DRM_COMMAND_BASE + 
>> DRM_I915_GEM_BUSY, struct drm_i915_gem_busy)
>> @@ -388,6 +390,7 @@ typedef struct drm_i915_irq_wait {
>>   #define I915_PARAM_HAS_POOLED_EU     38
>>   #define I915_PARAM_MIN_EU_IN_POOL     39
>>   #define I915_PARAM_HAS_EXEC_ASYNC     40
>> +#define I915_PARAM_HAS_EXEC_FENCE     41
>>     typedef struct drm_i915_getparam {
>>       __s32 param;
>> @@ -821,7 +824,10 @@ struct drm_i915_gem_execbuffer2 {
>>    */
>>   #define I915_EXEC_RESOURCE_STREAMER     (1<<15)
>>   -#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_RESOURCE_STREAMER<<1)
>> +#define I915_EXEC_FENCE_IN        (1<<16)
>> +#define I915_EXEC_FENCE_OUT        (1<<17)
>> +
>> +#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_OUT<<1))
>>     #define I915_EXEC_CONTEXT_ID_MASK    (0xffffffff)
>>   #define i915_execbuffer2_set_context_id(eb2, context) \
>
> I think it is also worth adding something like:
> @@ -705,7 +705,7 @@ struct drm_i915_gem_exec_object2 {
>         __u64 flags;
>
>         __u64 rsvd1;
> -       __u64 rsvd2;
> +       __u64 rsvd2;    /* Used for fence fd */
>  };
>
> Or even renaming 'rsvd2' to 'fence_io' or some such? Maybe split it 
> into two u32 entries to avoid all the messy bitmasking and shifting.
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx



More information about the Intel-gfx mailing list