<div dir="ltr"><div>I like it</div><div><br></div><div>Reviewed-by: Jason Ekstrand <<a href="mailto:jason@jlekstrand.net">jason@jlekstrand.net</a>><br></div></div><br><div class="gmail_quote"><div dir="ltr">On Wed, Oct 17, 2018 at 11:49 AM Keith Packard <<a href="mailto:keithp@keithp.com">keithp@keithp.com</a>> wrote:<br></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Offers three clocks, device, clock monotonic and clock monotonic<br>
raw. Could use some kernel support to reduce the deviation between<br>
clock values.<br>
<br>
v2:<br>
        Ensure deviation is at least as big as the GPU time interval.<br>
<br>
v3:<br>
        Set device->lost when returning DEVICE_LOST.<br>
        Use MAX2 and DIV_ROUND_UP instead of open coding these.<br>
        Delete spurious TIMESTAMP in radv version.<br>
<br>
        Suggested-by: Jason Ekstrand <<a href="mailto:jason@jlekstrand.net" target="_blank">jason@jlekstrand.net</a>><br>
        Suggested-by: Lionel Landwerlin <<a href="mailto:lionel.g.landwerlin@intel.com" target="_blank">lionel.g.landwerlin@intel.com</a>><br>
<br>
v4:<br>
        Add anv_gem_reg_read to anv_gem_stubs.c<br>
<br>
        Suggested-by: Jason Ekstrand <<a href="mailto:jason@jlekstrand.net" target="_blank">jason@jlekstrand.net</a>><br>
<br>
v5:<br>
        Adjust maxDeviation computation to max(sampled_clock_period) +<br>
        sample_interval.<br>
<br>
        Suggested-by: Bas Nieuwenhuizen <<a href="mailto:bas@basnieuwenhuizen.nl" target="_blank">bas@basnieuwenhuizen.nl</a>><br>
        Suggested-by: Jason Ekstrand <<a href="mailto:jason@jlekstrand.net" target="_blank">jason@jlekstrand.net</a>><br>
<br>
Signed-off-by: Keith Packard <<a href="mailto:keithp@keithp.com" target="_blank">keithp@keithp.com</a>><br>
---<br>
 src/amd/vulkan/radv_device.c       | 119 +++++++++++++++++++++++++++<br>
 src/amd/vulkan/radv_extensions.py  |   1 +<br>
 src/intel/vulkan/anv_device.c      | 127 +++++++++++++++++++++++++++++<br>
 src/intel/vulkan/anv_extensions.py |   1 +<br>
 src/intel/vulkan/anv_gem.c         |  13 +++<br>
 src/intel/vulkan/anv_gem_stubs.c   |   7 ++<br>
 src/intel/vulkan/anv_private.h     |   2 +<br>
 7 files changed, 270 insertions(+)<br>
<br>
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c<br>
index 174922780fc..4a705a724ef 100644<br>
--- a/src/amd/vulkan/radv_device.c<br>
+++ b/src/amd/vulkan/radv_device.c<br>
@@ -4955,3 +4955,122 @@ radv_GetDeviceGroupPeerMemoryFeatures(<br>
                               VK_PEER_MEMORY_FEATURE_GENERIC_SRC_BIT |<br>
                               VK_PEER_MEMORY_FEATURE_GENERIC_DST_BIT;<br>
 }<br>
+<br>
+static const VkTimeDomainEXT radv_time_domains[] = {<br>
+       VK_TIME_DOMAIN_DEVICE_EXT,<br>
+       VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT,<br>
+       VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT,<br>
+};<br>
+<br>
+VkResult radv_GetPhysicalDeviceCalibrateableTimeDomainsEXT(<br>
+       VkPhysicalDevice                             physicalDevice,<br>
+       uint32_t                                     *pTimeDomainCount,<br>
+       VkTimeDomainEXT                              *pTimeDomains)<br>
+{<br>
+       int d;<br>
+       VK_OUTARRAY_MAKE(out, pTimeDomains, pTimeDomainCount);<br>
+<br>
+       for (d = 0; d < ARRAY_SIZE(radv_time_domains); d++) {<br>
+               vk_outarray_append(&out, i) {<br>
+                       *i = radv_time_domains[d];<br>
+               }<br>
+       }<br>
+<br>
+       return vk_outarray_status(&out);<br>
+}<br>
+<br>
+static uint64_t<br>
+radv_clock_gettime(clockid_t clock_id)<br>
+{<br>
+       struct timespec current;<br>
+       int ret;<br>
+<br>
+       ret = clock_gettime(clock_id, &current);<br>
+       if (ret < 0 && clock_id == CLOCK_MONOTONIC_RAW)<br>
+               ret = clock_gettime(CLOCK_MONOTONIC, &current);<br>
+       if (ret < 0)<br>
+               return 0;<br>
+<br>
+       return (uint64_t) current.tv_sec * 1000000000ULL + current.tv_nsec;<br>
+}<br>
+<br>
+VkResult radv_GetCalibratedTimestampsEXT(<br>
+       VkDevice                                     _device,<br>
+       uint32_t                                     timestampCount,<br>
+       const VkCalibratedTimestampInfoEXT           *pTimestampInfos,<br>
+       uint64_t                                     *pTimestamps,<br>
+       uint64_t                                     *pMaxDeviation)<br>
+{<br>
+       RADV_FROM_HANDLE(radv_device, device, _device);<br>
+       uint32_t clock_crystal_freq = device->physical_device->rad_info.clock_crystal_freq;<br>
+       int d;<br>
+       uint64_t begin, end;<br>
+        uint64_t max_clock_period = 0;<br>
+<br>
+       begin = radv_clock_gettime(CLOCK_MONOTONIC_RAW);<br>
+<br>
+       for (d = 0; d < timestampCount; d++) {<br>
+               switch (pTimestampInfos[d].timeDomain) {<br>
+               case VK_TIME_DOMAIN_DEVICE_EXT:<br>
+                       pTimestamps[d] = device->ws->query_value(device->ws,<br>
+                                                                RADEON_TIMESTAMP);<br>
+                        uint64_t device_period = DIV_ROUND_UP(1000000, clock_crystal_freq);<br>
+                        max_clock_period = MAX2(max_clock_period, device_period);<br>
+                       break;<br>
+               case VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT:<br>
+                       pTimestamps[d] = radv_clock_gettime(CLOCK_MONOTONIC);<br>
+                        max_clock_period = MAX2(max_clock_period, 1);<br>
+                       break;<br>
+<br>
+               case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT:<br>
+                       pTimestamps[d] = begin;<br>
+                       break;<br>
+               default:<br>
+                       pTimestamps[d] = 0;<br>
+                       break;<br>
+               }<br>
+       }<br>
+<br>
+       end = radv_clock_gettime(CLOCK_MONOTONIC_RAW);<br>
+<br>
+        /*<br>
+         * The maximum deviation is the sum of the interval over which we<br>
+         * perform the sampling and the maximum period of any sampled<br>
+         * clock. That's because the maximum skew between any two sampled<br>
+         * clock edges is when the sampled clock with the largest period is<br>
+         * sampled at the end of that period but right at the beginning of the<br>
+         * sampling interval and some other clock is sampled right at the<br>
+         * begining of its sampling period and right at the end of the<br>
+         * sampling interval. Let's assume the GPU has the longest clock<br>
+         * period and that the application is sampling GPU and monotonic:<br>
+         *<br>
+         *                               s                 e<br>
+         *                      w x y z 0 1 2 3 4 5 6 7 8 9 a b c d e f<br>
+         *     Raw              -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-<br>
+         *<br>
+         *                               g<br>
+         *               0         1         2         3<br>
+         *     GPU       -----_____-----_____-----_____-----_____<br>
+         *<br>
+         *                                                m<br>
+         *                                         x y z 0 1 2 3 4 5 6 7 8 9 a b c<br>
+         *     Monotonic                           -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-<br>
+         *<br>
+         *     Interval                     <-----------------><br>
+         *     Deviation           <--------------------------><br>
+         *<br>
+         *             s  = read(raw)       2<br>
+         *             g  = read(GPU)       1<br>
+         *             m  = read(monotonic) 2<br>
+         *             e  = read(raw)       b<br>
+         *<br>
+         * We round the sample interval up by one tick to cover sampling error<br>
+         * in the interval clock<br>
+         */<br>
+<br>
+        uint64_t sample_interval = end - begin + 1;<br>
+<br>
+        *pMaxDeviation = sample_interval + max_clock_period;<br>
+<br>
+       return VK_SUCCESS;<br>
+}<br>
diff --git a/src/amd/vulkan/radv_extensions.py b/src/amd/vulkan/radv_extensions.py<br>
index 5dcedae1c63..4c81d3f0068 100644<br>
--- a/src/amd/vulkan/radv_extensions.py<br>
+++ b/src/amd/vulkan/radv_extensions.py<br>
@@ -92,6 +92,7 @@ EXTENSIONS = [<br>
     Extension('VK_KHR_display',                          23, 'VK_USE_PLATFORM_DISPLAY_KHR'),<br>
     Extension('VK_EXT_direct_mode_display',               1, 'VK_USE_PLATFORM_DISPLAY_KHR'),<br>
     Extension('VK_EXT_acquire_xlib_display',              1, 'VK_USE_PLATFORM_XLIB_XRANDR_EXT'),<br>
+    Extension('VK_EXT_calibrated_timestamps',             1, True),<br>
     Extension('VK_EXT_conditional_rendering',             1, True),<br>
     Extension('VK_EXT_conservative_rasterization',        1, 'device->rad_info.chip_class >= GFX9'),<br>
     Extension('VK_EXT_display_surface_counter',           1, 'VK_USE_PLATFORM_DISPLAY_KHR'),<br>
diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c<br>
index a2551452eb1..076ff3a57f6 100644<br>
--- a/src/intel/vulkan/anv_device.c<br>
+++ b/src/intel/vulkan/anv_device.c<br>
@@ -3021,6 +3021,133 @@ void anv_DestroyFramebuffer(<br>
    vk_free2(&device->alloc, pAllocator, fb);<br>
 }<br>
<br>
+static const VkTimeDomainEXT anv_time_domains[] = {<br>
+   VK_TIME_DOMAIN_DEVICE_EXT,<br>
+   VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT,<br>
+   VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT,<br>
+};<br>
+<br>
+VkResult anv_GetPhysicalDeviceCalibrateableTimeDomainsEXT(<br>
+   VkPhysicalDevice                             physicalDevice,<br>
+   uint32_t                                     *pTimeDomainCount,<br>
+   VkTimeDomainEXT                              *pTimeDomains)<br>
+{<br>
+   int d;<br>
+   VK_OUTARRAY_MAKE(out, pTimeDomains, pTimeDomainCount);<br>
+<br>
+   for (d = 0; d < ARRAY_SIZE(anv_time_domains); d++) {<br>
+      vk_outarray_append(&out, i) {<br>
+         *i = anv_time_domains[d];<br>
+      }<br>
+   }<br>
+<br>
+   return vk_outarray_status(&out);<br>
+}<br>
+<br>
+static uint64_t<br>
+anv_clock_gettime(clockid_t clock_id)<br>
+{<br>
+   struct timespec current;<br>
+   int ret;<br>
+<br>
+   ret = clock_gettime(clock_id, &current);<br>
+   if (ret < 0 && clock_id == CLOCK_MONOTONIC_RAW)<br>
+      ret = clock_gettime(CLOCK_MONOTONIC, &current);<br>
+   if (ret < 0)<br>
+      return 0;<br>
+<br>
+   return (uint64_t) current.tv_sec * 1000000000ULL + current.tv_nsec;<br>
+}<br>
+<br>
+#define TIMESTAMP 0x2358<br>
+<br>
+VkResult anv_GetCalibratedTimestampsEXT(<br>
+   VkDevice                                     _device,<br>
+   uint32_t                                     timestampCount,<br>
+   const VkCalibratedTimestampInfoEXT           *pTimestampInfos,<br>
+   uint64_t                                     *pTimestamps,<br>
+   uint64_t                                     *pMaxDeviation)<br>
+{<br>
+   ANV_FROM_HANDLE(anv_device, device, _device);<br>
+   uint64_t timestamp_frequency = device->info.timestamp_frequency;<br>
+   int  ret;<br>
+   int d;<br>
+   uint64_t begin, end;<br>
+   uint64_t max_clock_period = 0;<br>
+<br>
+   begin = anv_clock_gettime(CLOCK_MONOTONIC_RAW);<br>
+<br>
+   for (d = 0; d < timestampCount; d++) {<br>
+      switch (pTimestampInfos[d].timeDomain) {<br>
+      case VK_TIME_DOMAIN_DEVICE_EXT:<br>
+         ret = anv_gem_reg_read(device, TIMESTAMP | 1,<br>
+                                &pTimestamps[d]);<br>
+<br>
+         if (ret != 0) {<br>
+            device->lost = TRUE;<br>
+            return VK_ERROR_DEVICE_LOST;<br>
+         }<br>
+         uint64_t device_period = DIV_ROUND_UP(1000000000, timestamp_frequency);<br>
+         max_clock_period = MAX2(max_clock_period, device_period);<br>
+         break;<br>
+      case VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT:<br>
+         pTimestamps[d] = anv_clock_gettime(CLOCK_MONOTONIC);<br>
+         max_clock_period = MAX2(max_clock_period, 1);<br>
+         break;<br>
+<br>
+      case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT:<br>
+         pTimestamps[d] = begin;<br>
+         break;<br>
+      default:<br>
+         pTimestamps[d] = 0;<br>
+         break;<br>
+      }<br>
+   }<br>
+<br>
+   end = anv_clock_gettime(CLOCK_MONOTONIC_RAW);<br>
+<br>
+    /*<br>
+     * The maximum deviation is the sum of the interval over which we<br>
+     * perform the sampling and the maximum period of any sampled<br>
+     * clock. That's because the maximum skew between any two sampled<br>
+     * clock edges is when the sampled clock with the largest period is<br>
+     * sampled at the end of that period but right at the beginning of the<br>
+     * sampling interval and some other clock is sampled right at the<br>
+     * begining of its sampling period and right at the end of the<br>
+     * sampling interval. Let's assume the GPU has the longest clock<br>
+     * period and that the application is sampling GPU and monotonic:<br>
+     *<br>
+     *                               s                 e<br>
+     *                  w x y z 0 1 2 3 4 5 6 7 8 9 a b c d e f<br>
+     * Raw              -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-<br>
+     *<br>
+     *                               g<br>
+     *           0         1         2         3<br>
+     * GPU       -----_____-----_____-----_____-----_____<br>
+     *<br>
+     *                                                m<br>
+     *                                     x y z 0 1 2 3 4 5 6 7 8 9 a b c<br>
+     * Monotonic                           -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-<br>
+     *<br>
+     * Interval                     <-----------------><br>
+     * Deviation           <--------------------------><br>
+     *<br>
+     *         s  = read(raw)       2<br>
+     *         g  = read(GPU)       1<br>
+     *         m  = read(monotonic) 2<br>
+     *         e  = read(raw)       b<br>
+     *<br>
+     * We round the sample interval up by one tick to cover sampling error<br>
+     * in the interval clock<br>
+     */<br>
+<br>
+   uint64_t sample_interval = end - begin + 1;<br>
+<br>
+   *pMaxDeviation = sample_interval + max_clock_period;<br>
+<br>
+   return VK_SUCCESS;<br>
+}<br>
+<br>
 /* vk_icd.h does not declare this function, so we declare it here to<br>
  * suppress Wmissing-prototypes.<br>
  */<br>
diff --git a/src/intel/vulkan/anv_extensions.py b/src/intel/vulkan/anv_extensions.py<br>
index d4915c95013..a8535964da7 100644<br>
--- a/src/intel/vulkan/anv_extensions.py<br>
+++ b/src/intel/vulkan/anv_extensions.py<br>
@@ -126,6 +126,7 @@ EXTENSIONS = [<br>
     Extension('VK_EXT_vertex_attribute_divisor',          3, True),<br>
     Extension('VK_EXT_post_depth_coverage',               1, 'device->info.gen >= 9'),<br>
     Extension('VK_EXT_sampler_filter_minmax',             1, 'device->info.gen >= 9'),<br>
+    Extension('VK_EXT_calibrated_timestamps',             1, True),<br>
 ]<br>
<br>
 class VkVersion:<br>
diff --git a/src/intel/vulkan/anv_gem.c b/src/intel/vulkan/anv_gem.c<br>
index c43b5ef9e06..1bdf040c1a3 100644<br>
--- a/src/intel/vulkan/anv_gem.c<br>
+++ b/src/intel/vulkan/anv_gem.c<br>
@@ -423,6 +423,19 @@ anv_gem_fd_to_handle(struct anv_device *device, int fd)<br>
    return args.handle;<br>
 }<br>
<br>
+int<br>
+anv_gem_reg_read(struct anv_device *device, uint32_t offset, uint64_t *result)<br>
+{<br>
+   struct drm_i915_reg_read args = {<br>
+      .offset = offset<br>
+   };<br>
+<br>
+   int ret = anv_ioctl(device->fd, DRM_IOCTL_I915_REG_READ, &args);<br>
+<br>
+   *result = args.val;<br>
+   return ret;<br>
+}<br>
+<br>
 #ifndef SYNC_IOC_MAGIC<br>
 /* duplicated from linux/sync_file.h to avoid build-time dependency<br>
  * on new (v4.7) kernel headers.  Once distro's are mostly using<br>
diff --git a/src/intel/vulkan/anv_gem_stubs.c b/src/intel/vulkan/anv_gem_stubs.c<br>
index 5093bd5db1a..8cc3ad1f22e 100644<br>
--- a/src/intel/vulkan/anv_gem_stubs.c<br>
+++ b/src/intel/vulkan/anv_gem_stubs.c<br>
@@ -251,3 +251,10 @@ anv_gem_syncobj_wait(struct anv_device *device,<br>
 {<br>
    unreachable("Unused");<br>
 }<br>
+<br>
+int<br>
+anv_gem_reg_read(struct anv_device *device,<br>
+                 uint32_t offset, uint64_t *result)<br>
+{<br>
+   unreachable("Unused");<br>
+}<br>
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h<br>
index 599b903f25c..08376b00c8e 100644<br>
--- a/src/intel/vulkan/anv_private.h<br>
+++ b/src/intel/vulkan/anv_private.h<br>
@@ -1103,6 +1103,8 @@ int anv_gem_get_aperture(int fd, uint64_t *size);<br>
 int anv_gem_gpu_get_reset_stats(struct anv_device *device,<br>
                                 uint32_t *active, uint32_t *pending);<br>
 int anv_gem_handle_to_fd(struct anv_device *device, uint32_t gem_handle);<br>
+int anv_gem_reg_read(struct anv_device *device,<br>
+                     uint32_t offset, uint64_t *result);<br>
 uint32_t anv_gem_fd_to_handle(struct anv_device *device, int fd);<br>
 int anv_gem_set_caching(struct anv_device *device, uint32_t gem_handle, uint32_t caching);<br>
 int anv_gem_set_domain(struct anv_device *device, uint32_t gem_handle,<br>
-- <br>
2.19.1<br>
<br>
_______________________________________________<br>
mesa-dev mailing list<br>
<a href="mailto:mesa-dev@lists.freedesktop.org" target="_blank">mesa-dev@lists.freedesktop.org</a><br>
<a href="https://lists.freedesktop.org/mailman/listinfo/mesa-dev" rel="noreferrer" target="_blank">https://lists.freedesktop.org/mailman/listinfo/mesa-dev</a><br>
</blockquote></div>