<div dir="ltr"><div class="gmail_extra"><div class="gmail_quote">On Mon, Jul 10, 2017 at 8:45 AM, Christian König <span dir="ltr"><<a href="mailto:deathsimple@vodafone.de" target="_blank">deathsimple@vodafone.de</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
  
    
  
  <div bgcolor="#FFFFFF"><div><div class="gmail-h5">
    <div class="gmail-m_-226855681158848481moz-cite-prefix">Am 10.07.2017 um 17:28 schrieb Jason
      Ekstrand:<br>
    </div>
    <blockquote type="cite">
      <div dir="ltr">
        <div class="gmail_extra">
          <div class="gmail_quote">On Wed, Jul 5, 2017 at 6:04 PM, Dave
            Airlie <span dir="ltr"><<a href="mailto:airlied@gmail.com" target="_blank">airlied@gmail.com</a>></span>
            wrote:<br>
            <blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">From:
              Dave Airlie <<a href="mailto:airlied@redhat.com" target="_blank">airlied@redhat.com</a>><br>
              <br>
              This interface will allow sync object to be used to back<br>
              Vulkan fences. This API is pretty much the vulkan fence
              waiting<br>
              API, and I've ported the code from amdgpu.<br>
              <br>
              v2: accept relative timeout, pass remaining time back<br>
              to userspace.<br>
              v3: return to absolute timeouts.<br>
              v4: absolute zero = poll,<br>
                  rewrite any/all code to have same operation for arrays<br>
                  return -EINVAL for 0 fences.<br>
              v4.1: fixup fences allocation check, use u64_to_user_ptr<br>
              v5: move to sec/nsec, and use timespec64 for calcs.<br>
              v6: use -ETIME and drop the out status flag. (-ETIME<br>
              is suggested by ickle, I can feel a shed painting)<br>
              <br>
              Signed-off-by: Dave Airlie <<a href="mailto:airlied@redhat.com" target="_blank">airlied@redhat.com</a>><br>
              ---<br>
               drivers/gpu/drm/drm_internal.<wbr>h |   2 +<br>
               drivers/gpu/drm/drm_ioctl.c    |   2 +<br>
               drivers/gpu/drm/drm_syncobj.<wbr>c  | 142
              ++++++++++++++++++++++++++++++<wbr>+++++++++++<br>
               include/uapi/drm/drm.h         |  13 ++++<br>
               4 files changed, 159 insertions(+)<br>
              <br>
              diff --git a/drivers/gpu/drm/drm_internal<wbr>.h
              b/drivers/gpu/drm/drm_internal<wbr>.h<br>
              index 5cecc97..d71b50d 100644<br>
              --- a/drivers/gpu/drm/drm_internal<wbr>.h<br>
              +++ b/drivers/gpu/drm/drm_internal<wbr>.h<br>
              @@ -157,3 +157,5 @@ int drm_syncobj_handle_to_fd_ioctl<wbr>(struct
              drm_device *dev, void *data,<br>
                                                 struct drm_file
              *file_private);<br>
               int drm_syncobj_fd_to_handle_ioctl<wbr>(struct drm_device
              *dev, void *data,<br>
                                                 struct drm_file
              *file_private);<br>
              +int drm_syncobj_wait_ioctl(struct drm_device *dev, void
              *data,<br>
              +                          struct drm_file *file_private);<br>
              diff --git a/drivers/gpu/drm/drm_ioctl.c
              b/drivers/gpu/drm/drm_ioctl.c<br>
              index f1e5681..385ce74 100644<br>
              --- a/drivers/gpu/drm/drm_ioctl.c<br>
              +++ b/drivers/gpu/drm/drm_ioctl.c<br>
              @@ -657,6 +657,8 @@ static const struct drm_ioctl_desc
              drm_ioctls[] = {<br>
                                    DRM_UNLOCKED|DRM_RENDER_ALLOW)<wbr>,<br>
                      DRM_IOCTL_DEF(DRM_IOCTL_SYNCOB<wbr>J_FD_TO_HANDLE,
              drm_syncobj_fd_to_handle_ioctl<wbr>,<br>
                                    DRM_UNLOCKED|DRM_RENDER_ALLOW)<wbr>,<br>
              +       DRM_IOCTL_DEF(DRM_IOCTL_SYNCO<wbr>BJ_WAIT,
              drm_syncobj_wait_ioctl,<br>
              +                     DRM_UNLOCKED|DRM_RENDER_<wbr>ALLOW),<br>
               };<br>
              <br>
               #define DRM_CORE_IOCTL_COUNT   ARRAY_SIZE( drm_ioctls )<br>
              diff --git a/drivers/gpu/drm/drm_syncobj.<wbr>c
              b/drivers/gpu/drm/drm_syncobj.<wbr>c<br>
              index 89441bc..2d5a7a1 100644<br>
              --- a/drivers/gpu/drm/drm_syncobj.<wbr>c<br>
              +++ b/drivers/gpu/drm/drm_syncobj.<wbr>c<br>
              @@ -1,5 +1,7 @@<br>
               /*<br>
                * Copyright 2017 Red Hat<br>
              + * Parts ported from amdgpu (fence wait code).<br>
              + * Copyright 2016 Advanced Micro Devices, Inc.<br>
                *<br>
                * Permission is hereby granted, free of charge, to any
              person obtaining a<br>
                * copy of this software and associated documentation
              files (the "Software"),<br>
              @@ -31,6 +33,9 @@<br>
                * that contain an optional fence. The fence can be
              updated with a new<br>
                * fence, or be NULL.<br>
                *<br>
              + * syncobj's can be waited upon, where it will wait for
              the underlying<br>
              + * fence.<br>
              + *<br>
                * syncobj's can be export to fd's and back, these fd's
              are opaque and<br>
                * have no other use case, except passing the syncobj
              between processes.<br>
                *<br>
              @@ -451,3 +456,140 @@ drm_syncobj_fd_to_handle_ioctl<wbr>(struct
              drm_device *dev, void *data,<br>
                      return drm_syncobj_fd_to_handle(file_<wbr>private,
              args->fd,<br>
                                                     
              &args->handle);<br>
               }<br>
              +<br>
              +/**<br>
              + * drm_timeout_abs_to_jiffies - calculate jiffies timeout
              from absolute value<br>
              + *<br>
              + * @timeout_sec: timeout sec component, 0 for poll<br>
              + * @timeout_nsec: timeout nsec component in ns, 0 for
              poll<br>
              + * both must be 0 for poll.<br>
              + *<br>
              + * Calculate the timeout in jiffies from an absolute time
              in sec/nsec.<br>
              + */<br>
              +static unsigned long drm_timeout_abs_to_jiffies(int<wbr>64_t
              timeout_sec, uint64_t timeout_nsec)<br>
              +{<br>
              +       struct timespec64 abs_timeout, timeout,
              max_jiffy_timespec;<br>
              +       unsigned long timeout_jiffies;<br>
              +<br>
              +       /* make 0 timeout means poll - absolute 0 doesn't
              seem valid */<br>
              +       if (timeout_sec == 0 && timeout_nsec == 0)<br>
              +               return 0;<br>
              +<br>
              +       abs_timeout.tv_sec = timeout_sec;<br>
              +       abs_timeout.tv_nsec = timeout_nsec;<br>
              +<br>
              +       /* clamp timeout if it's to large */<br>
              +       if (!timespec64_valid_strict(&abs<wbr>_timeout))<br>
              +               return MAX_SCHEDULE_TIMEOUT - 1;<br>
              +<br>
              +       timeout = timespec64_sub(abs_timeout,
              ktime_to_timespec64(ktime_get(<wbr>)));<br>
              +       if (!timespec64_valid(&timeout))<br>
              +               return 0;<br>
              +<br>
              +       jiffies_to_timespec64(MAX_JIF<wbr>FY_OFFSET,
              &max_jiffy_timespec);<br>
              +       if (timespec64_compare(&timeout,
              &max_jiffy_timespec) >= 0)<br>
              +               return MAX_SCHEDULE_TIMEOUT - 1;<br>
              +<br>
              +       timeout_jiffies = timespec64_to_jiffies(&timeout<wbr>);<br>
              +       /*  clamp timeout to avoid infinite timeout */<br>
              +       if (timeout_jiffies >= MAX_SCHEDULE_TIMEOUT)<br>
              +               return MAX_SCHEDULE_TIMEOUT - 1;<br>
              +<br>
              +       return timeout_jiffies + 1;<br>
              +}<br>
              +<br>
              +static int drm_syncobj_wait_fences(struct drm_device
              *dev,<br>
              +                                  struct drm_file
              *file_private,<br>
              +                                  struct drm_syncobj_wait
              *wait,<br>
              +                                  struct dma_fence
              **fences)<br>
              +{<br>
              +       unsigned long timeout =
              drm_timeout_abs_to_jiffies(wai<wbr>t->timeout_sec,
              wait->timeout_nsec);<br>
              +       int ret = 0;<br>
              +       uint32_t first = ~0;<br>
              +<br>
              +       if (wait->flags &
              DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AL<wbr>L) {<br>
              +               int i;<br>
              +               for (i = 0; i < wait->count_handles;
              i++) {<br>
              +                       ret =
              dma_fence_wait_timeout(fences[<wbr>i], true, timeout);<br>
              +<br>
              +                       if (ret < 0)<br>
              +                               return ret;<br>
              +                       if (ret == 0)<br>
              +                               break;<br>
              +                       timeout = ret;<br>
              +               }<br>
              +               first = 0;<br>
              +       } else {<br>
              +               ret = dma_fence_wait_any_timeout(fen<wbr>ces,<br>
              +                                               
              wait->count_handles,<br>
              +                                                true,
              timeout,<br>
              +                                               
              &first);<br>
              +       }<br>
              +<br>
              +       if (ret < 0)<br>
              +               return ret;<br>
              +<br>
              +       wait->first_signaled = first;<br>
              +       if (ret == 0)<br>
              +               return -ETIME;<br>
              +       return 0;<br>
              +}<br>
              +<br>
              +int<br>
              +drm_syncobj_wait_ioctl(struct drm_device *dev, void
              *data,<br>
              +                      struct drm_file *file_private)<br>
              +{<br>
              +       struct drm_syncobj_wait *args = data;<br>
              +       uint32_t *handles;<br>
              +       struct dma_fence **fences;<br>
              +       int ret = 0;<br>
              +       int i;<br>
              +<br>
              +       if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))<br>
              +               return -ENODEV;<br>
              +<br>
              +       if (args->flags != 0 && args->flags
              != DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AL<wbr>L)<br>
              +               return -EINVAL;<br>
              +<br>
              +       if (args->count_handles == 0)<br>
              +               return -EINVAL;<br>
              +<br>
              +       /* Get the handles from userspace */<br>
              +       handles = kmalloc_array(args->count_hand<wbr>les,
              sizeof(uint32_t),<br>
              +                               GFP_KERNEL);<br>
              +       if (handles == NULL)<br>
              +               return -ENOMEM;<br>
              +<br>
              +       if (copy_from_user(handles,<br>
              +                         
              u64_to_user_ptr(args->handles)<wbr>,<br>
              +                          sizeof(uint32_t) *
              args->count_handles)) {<br>
              +               ret = -EFAULT;<br>
              +               goto err_free_handles;<br>
              +       }<br>
              +<br>
              +       fences = kcalloc(args->count_handles,<br>
              +                        sizeof(struct dma_fence *),
              GFP_KERNEL);<br>
              +       if (!fences) {<br>
              +               ret = -ENOMEM;<br>
              +               goto err_free_handles;<br>
              +       }<br>
              +<br>
              +       for (i = 0; i < args->count_handles; i++) {<br>
              +               ret = drm_syncobj_fence_get(file_pri<wbr>vate,
              handles[i],<br>
              +                                         
               &fences[i]);<br>
              +               if (ret)<br>
              +                       goto err_free_fence_array;<br>
              +       }<br>
              +<br>
              +       ret = drm_syncobj_wait_fences(dev, file_private,<br>
              +                                     args, fences);<br>
            </blockquote>
            <div><br>
            </div>
            <div>So, reading some CTS tests again, and I think we have a
              problem here.  The Vulkan spec allows you to wait on a
              fence that is in the unsignaled state.</div>
          </div>
        </div>
      </div>
    </blockquote>
    <br></div></div>
    At least on the closed source driver that would be illegal as far as
    I know.<br></div></blockquote><div><br></div><div>Then they are doing workarounds in userspace.  There are definitely CTS tests for this:<br><br><a href="https://github.com/KhronosGroup/VK-GL-CTS/blob/master/external/vulkancts/modules/vulkan/synchronization/vktSynchronizationBasicFenceTests.cpp#L74">https://github.com/KhronosGroup/VK-GL-CTS/blob/master/external/vulkancts/modules/vulkan/synchronization/vktSynchronizationBasicFenceTests.cpp#L74</a><br></div><div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex"><div>
    You can't wait on a semaphore before the signal operation is send
    down to the kerel.</div></blockquote><div><br></div><div>We (Intel) deal with this today by tracking whether or not the fence has been submitted and using a condition variable in userspace to sort it all out.  If we ever want to share fences across processes (which we do), then this needs to be sorted in the kernel. <br><br></div><div>--Jason<br></div><div><br></div><div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex"><div bgcolor="#FFFFFF">
    Regards,<br>
    Christian.<div><div class="gmail-h5"><br>
    <br>
    <blockquote type="cite">
      <div dir="ltr">
        <div class="gmail_extra">
          <div class="gmail_quote">
            <div>  In theory, you could have thread A start waiting on a
              fence before thread B submits the work which triggers that
              fence.  This means that the dma_fence may not exist yet
              when vkWaitForFences gets called.  If we really want to
              support the full Vulkan usage, we need to somehow support
              missing dma_fences by waiting for the dma_fence to show
              up.  Unfortunately, I don't know enough about the internal
              kernel APIs to know what that would look like.<br>
            </div>
          </div>
        </div>
      </div>
    </blockquote>
    </div></div><blockquote type="cite"><div><div class="gmail-h5">
      <div dir="ltr">
        <div class="gmail_extra">
          <div class="gmail_quote">
            <div> </div>
            <blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
              +<br>
              +err_free_fence_array:<br>
              +       for (i = 0; i < args->count_handles; i++)<br>
              +               dma_fence_put(fences[i]);<br>
              +       kfree(fences);<br>
              +err_free_handles:<br>
              +       kfree(handles);<br>
              +<br>
              +       return ret;<br>
              +}<br>
              diff --git a/include/uapi/drm/drm.h
              b/include/uapi/drm/drm.h<br>
              index 101593a..91746a7 100644<br>
              --- a/include/uapi/drm/drm.h<br>
              +++ b/include/uapi/drm/drm.h<br>
              @@ -718,6 +718,18 @@ struct drm_syncobj_handle {<br>
                      __u32 pad;<br>
               };<br>
              <br>
              +#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AL<wbr>L (1 <<
              0)<br>
              +struct drm_syncobj_wait {<br>
              +       __u64 handles;<br>
              +       /* absolute timeout */<br>
              +       __s64 timeout_sec;<br>
              +       __s64 timeout_nsec;<br>
              +       __u32 count_handles;<br>
              +       __u32 flags;<br>
              +       __u32 first_signaled; /* only valid when not
              waiting all */<br>
              +       __u32 pad;<br>
              +};<br>
              +<br>
               #if defined(__cplusplus)<br>
               }<br>
               #endif<br>
              @@ -840,6 +852,7 @@ extern "C" {<br>
               #define DRM_IOCTL_SYNCOBJ_DESTROY      DRM_IOWR(0xC0,
              struct drm_syncobj_destroy)<br>
               #define DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD DRM_IOWR(0xC1,
              struct drm_syncobj_handle)<br>
               #define DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE DRM_IOWR(0xC2,
              struct drm_syncobj_handle)<br>
              +#define DRM_IOCTL_SYNCOBJ_WAIT         DRM_IOWR(0xC3,
              struct drm_syncobj_wait)<br>
              <br>
               /**<br>
                * Device specific ioctls should only be in their
              respective headers<br>
              <span class="gmail-m_-226855681158848481HOEnZb"><font color="#888888">--<br>
                  2.9.4<br>
                  <br>
                  ______________________________<wbr>_________________<br>
                  dri-devel mailing list<br>
                  <a href="mailto:dri-devel@lists.freedesktop.org" target="_blank">dri-devel@lists.freedesktop.or<wbr>g</a><br>
                  <a href="https://lists.freedesktop.org/mailman/listinfo/dri-devel" rel="noreferrer" target="_blank">https://lists.freedesktop.org/<wbr>mailman/listinfo/dri-devel</a><br>
                </font></span></blockquote>
          </div>
          <br>
        </div>
      </div>
      <br>
      <fieldset class="gmail-m_-226855681158848481mimeAttachmentHeader"></fieldset>
      <br>
      </div></div><pre>______________________________<wbr>_________________
amd-gfx mailing list
<a class="gmail-m_-226855681158848481moz-txt-link-abbreviated" href="mailto:amd-gfx@lists.freedesktop.org" target="_blank">amd-gfx@lists.freedesktop.org</a>
<a class="gmail-m_-226855681158848481moz-txt-link-freetext" href="https://lists.freedesktop.org/mailman/listinfo/amd-gfx" target="_blank">https://lists.freedesktop.org/<wbr>mailman/listinfo/amd-gfx</a>
</pre>
    </blockquote>
    <p><br>
    </p>
  </div>

</blockquote></div><br></div></div>