<div dir="ltr"><div class="gmail_extra"><div class="gmail_quote">On Wed, Jul 5, 2017 at 10:37 AM, Chris Wilson <span dir="ltr"><<a href="mailto:chris@chris-wilson.co.uk" target="_blank">chris@chris-wilson.co.uk</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">Quoting Jason Ekstrand (2017-07-05 18:21:22)<br>
<div><div class="gmail-h5">> This commit adds support for waiting on or signaling DRM syncobjs as<br>
> part of execbuf.  It does so by hijacking the currently unused cliprects<br>
> pointer to instead point to an array of i915_gem_exec_fence structs<br>
> which containe a DRM syncobj and a flags parameter which specifies<br>
> whether to wait on it or to signal it.  This implementation<br>
> theoretically allows for both flags to be set in which case it waits on<br>
> the dma_fence that was in the syncobj and then immediately replaces it<br>
> with the dma_fence from the current execbuf.<br>
><br>
> Cc: Chris Wilson <<a href="mailto:chris@chris-wilson.co.uk">chris@chris-wilson.co.uk</a>><br>
> ---<br>
><br>
> Ideally, I'd like to get whatever kernel reviewing and/or merging is<br>
> required to land the userspace bits ASAP.  That said, I understand that<br>
> this is my first kernel patch and it's liable to have a few rounds of<br>
> review before going in.  :-)<br>
><br>
> The mesa branch for using this in Vulkan can be found here:<br>
><br>
> <a href="https://cgit.freedesktop.org/%7Ejekstrand/mesa/log/?h=review/anv-syncobj" rel="noreferrer" target="_blank">https://cgit.freedesktop.org/~<wbr>jekstrand/mesa/log/?h=review/<wbr>anv-syncobj</a><br>
><br>
>  drivers/gpu/drm/i915/i915_drv.<wbr>c            |  3 +-<br>
>  drivers/gpu/drm/i915/i915_gem_<wbr>execbuffer.c | 97 ++++++++++++++++++++++++++++--<br>
>  include/uapi/drm/i915_drm.h                | 30 ++++++++-<br>
>  3 files changed, 121 insertions(+), 9 deletions(-)<br>
><br>
> diff --git a/drivers/gpu/drm/i915/i915_<wbr>drv.c b/drivers/gpu/drm/i915/i915_<wbr>drv.c<br>
> index 9167a73..e6f7f49 100644<br>
> --- a/drivers/gpu/drm/i915/i915_<wbr>drv.c<br>
> +++ b/drivers/gpu/drm/i915/i915_<wbr>drv.c<br>
> @@ -384,6 +384,7 @@ static int i915_getparam(struct drm_device *dev, void *data,<br>
>         case I915_PARAM_HAS_EXEC_FENCE:<br>
>         case I915_PARAM_HAS_EXEC_CAPTURE:<br>
>         case I915_PARAM_HAS_EXEC_BATCH_<wbr>FIRST:<br>
> +       case I915_PARAM_HAS_EXEC_FENCE_<wbr>ARRAY:<br>
>                 /* For the time being all of these are always true;<br>
>                  * if some supported hardware does not have one of these<br>
>                  * features this value needs to be provided from<br>
> @@ -2734,7 +2735,7 @@ static struct drm_driver driver = {<br>
>          */<br>
>         .driver_features =<br>
>             DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED | DRIVER_GEM | DRIVER_PRIME |<br>
> -           DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC,<br>
> +           DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC | DRIVER_SYNCOBJ,<br>
>         .release = i915_driver_release,<br>
>         .open = i915_driver_open,<br>
>         .lastclose = i915_driver_lastclose,<br>
> diff --git a/drivers/gpu/drm/i915/i915_<wbr>gem_execbuffer.c b/drivers/gpu/drm/i915/i915_<wbr>gem_execbuffer.c<br>
> index 929f275..81b7b7b 100644<br>
> --- a/drivers/gpu/drm/i915/i915_<wbr>gem_execbuffer.c<br>
> +++ b/drivers/gpu/drm/i915/i915_<wbr>gem_execbuffer.c<br>
> @@ -33,6 +33,7 @@<br>
><br>
>  #include <drm/drmP.h><br>
>  #include <drm/i915_drm.h><br>
> +#include <drm/drm_syncobj.h><br>
><br>
>  #include "i915_drv.h"<br>
>  #include "i915_gem_clflush.h"<br>
> @@ -1882,8 +1883,10 @@ static bool i915_gem_check_execbuffer(<wbr>struct drm_i915_gem_execbuffer2 *exec)<br>
>                 return false;<br>
><br>
>         /* Kernel clipping was a DRI1 misfeature */<br>
> -       if (exec->num_cliprects || exec->cliprects_ptr)<br>
> -               return false;<br>
> +       if (!(exec->flags & I915_EXEC_FENCE_ARRAY)) {<br>
> +               if (exec->num_cliprects || exec->cliprects_ptr)<br>
> +                       return false;<br>
> +       }<br>
><br>
>         if (exec->DR4 == 0xffffffff) {<br>
>                 DRM_DEBUG("UXA submitting garbage DR4, fixing up\n");<br>
> @@ -2118,12 +2121,15 @@ static int<br>
>  i915_gem_do_execbuffer(struct drm_device *dev,<br>
>                        struct drm_file *file,<br>
>                        struct drm_i915_gem_execbuffer2 *args,<br>
> -                      struct drm_i915_gem_exec_object2 *exec)<br>
> +                      struct drm_i915_gem_exec_object2 *exec,<br>
> +                      struct drm_i915_gem_exec_fence *fences)<br>
>  {<br>
>         struct i915_execbuffer eb;<br>
>         struct dma_fence *in_fence = NULL;<br>
>         struct sync_file *out_fence = NULL;<br>
> +       struct drm_syncobj **syncobjs = NULL;<br>
>         int out_fence_fd = -1;<br>
> +       unsigned int i;<br>
>         int err;<br>
><br>
>         BUILD_BUG_ON(__EXEC_OBJECT_<wbr>INTERNAL_FLAGS &<br>
> @@ -2186,9 +2192,30 @@ i915_gem_do_execbuffer(struct drm_device *dev,<br>
>                 }<br>
>         }<br>
><br>
> +       if (args->flags & I915_EXEC_FENCE_ARRAY) {<br>
> +               syncobjs = kvmalloc_array(args->num_<wbr>cliprects,<br>
> +                                         sizeof(*syncobjs),<br>
> +                                         __GFP_NOWARN | GFP_TEMPORARY |<br>
> +                                         __GFP_ZERO);<br>
> +               if (syncobjs == NULL) {<br>
> +                       DRM_DEBUG("Failed to allocate array of %d syncobjs\n",<br>
> +                                 args->num_cliprects);<br>
> +                       err = -ENOMEM;<br>
> +                       goto err_out_fence;<br>
> +               }<br>
> +               for (i = 0; i < args->num_cliprects; i++) {<br>
> +                       syncobjs[i] = drm_syncobj_find(file, fences[i].handle);<br>
> +                       if (!syncobjs[i]) {<br>
> +                               DRM_DEBUG("Invalid syncobj handle provided\n");<br>
> +                               err = -EINVAL;<br>
> +                               goto err_syncobjs;<br>
> +                       }<br>
> +               }<br>
<br>
</div></div>I did this in the caller so that we only allocated and passed in a single<br>
array of drm_syncobj (with the flags packed into the low bits).<br></blockquote><div><br></div><div>Packing things into the low bits seems a bit sketchy but it works so long as we only have the two flag bits.<br></div><div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex"><div><div class="gmail-h5">
> +       }<br>
> +<br>
>         err = eb_create(&eb);<br>
>         if (err)<br>
> -               goto err_out_fence;<br>
> +               goto err_syncobjs;<br>
><br>
>         GEM_BUG_ON(!eb.lut_size);<br>
><br>
> @@ -2304,6 +2331,16 @@ i915_gem_do_execbuffer(struct drm_device *dev,<br>
>                         goto err_request;<br>
>         }<br>
><br>
> +       if (args->flags & I915_EXEC_FENCE_ARRAY) {<br>
> +               for (i = 0; i < args->num_cliprects; i++) {<br>
> +                       if (fences[i].flags & I915_EXEC_FENCE_WAIT) {<br>
> +                               if (i915_gem_request_await_dma_<wbr>fence(eb.request,<br>
> +                                                                    syncobjs[i]->fence))<br>
> +                                       goto err_request;<br>
> +                       }<br>
> +               }<br>
> +       }<br>
> +<br>
>         if (out_fence_fd != -1) {<br>
>                 out_fence = sync_file_create(&eb.request-><wbr>fence);<br>
>                 if (!out_fence) {<br>
> @@ -2312,6 +2349,20 @@ i915_gem_do_execbuffer(struct drm_device *dev,<br>
>                 }<br>
>         }<br>
><br>
> +    /* We need to add fences to syncobjs last because doing so mutates the<br>
> +     * syncobj and we have no good way to recover if the execbuf fails<br>
> +     * after this point.  Fortunately, drm_syncobj_replace_fence can never<br>
> +     * fail.<br>
> +     */<br>
> +       if (args->flags & I915_EXEC_FENCE_ARRAY) {<br>
> +               for (i = 0; i < args->num_cliprects; i++) {<br>
> +                       if (fences[i].flags & I915_EXEC_FENCE_SIGNAL) {<br>
> +                               drm_syncobj_replace_fence(<wbr>file, syncobjs[i],<br>
> +                                                         &eb.request->fence);<br>
> +                       }<br>
> +               }<br>
> +       }<br>
> +<br>
<br>
</div></div>This has to be after the execbuf is submitted, next to where out_fence<br>
is attached is a good spot, and should only be updated on success. (The<br>
kernel API allows combining of FENCE_WAIT | FENCE_SIGNAL so we can only<br>
replace a FENCE_WAIT once we know we have committed the execbuf.)<br></blockquote><div><br></div><div>It's currently right after we call<br><br>        out_fence = sync_file_create(&eb.request->fence);<br><br></div><div>which is before eb_submit().  Are sync files wrong?  Note that I was careful to ensure that there are no error paths after syncobj_replace_fence so we know for 100% sure that it will be committed, hence the comment.<br></div><div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
I moved all of these to little functions: get_fence_array,<br>
await_fence_array, signal_fence_array and put_fence_array.<span class="gmail-HOEnZb"><font color="#888888"><br></font></span></blockquote><div><br></div><div>You clearly have a version of this patch somewhere.  Where can I find it?<br></div></div></div></div>