[Mesa-dev] [PATCH] vulkan: Add VK_EXT_calibrated_timestamps extension (radv and anv) [v5]
Jason Ekstrand
jason at jlekstrand.net
Wed Oct 17 17:06:41 UTC 2018
I like it
Reviewed-by: Jason Ekstrand <jason at jlekstrand.net>
On Wed, Oct 17, 2018 at 11:49 AM Keith Packard <keithp at keithp.com> wrote:
> Offers three clocks, device, clock monotonic and clock monotonic
> raw. Could use some kernel support to reduce the deviation between
> clock values.
>
> v2:
> Ensure deviation is at least as big as the GPU time interval.
>
> v3:
> Set device->lost when returning DEVICE_LOST.
> Use MAX2 and DIV_ROUND_UP instead of open coding these.
> Delete spurious TIMESTAMP in radv version.
>
> Suggested-by: Jason Ekstrand <jason at jlekstrand.net>
> Suggested-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
>
> v4:
> Add anv_gem_reg_read to anv_gem_stubs.c
>
> Suggested-by: Jason Ekstrand <jason at jlekstrand.net>
>
> v5:
> Adjust maxDeviation computation to max(sampled_clock_period) +
> sample_interval.
>
> Suggested-by: Bas Nieuwenhuizen <bas at basnieuwenhuizen.nl>
> Suggested-by: Jason Ekstrand <jason at jlekstrand.net>
>
> Signed-off-by: Keith Packard <keithp at keithp.com>
> ---
> src/amd/vulkan/radv_device.c | 119 +++++++++++++++++++++++++++
> src/amd/vulkan/radv_extensions.py | 1 +
> src/intel/vulkan/anv_device.c | 127 +++++++++++++++++++++++++++++
> src/intel/vulkan/anv_extensions.py | 1 +
> src/intel/vulkan/anv_gem.c | 13 +++
> src/intel/vulkan/anv_gem_stubs.c | 7 ++
> src/intel/vulkan/anv_private.h | 2 +
> 7 files changed, 270 insertions(+)
>
> diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
> index 174922780fc..4a705a724ef 100644
> --- a/src/amd/vulkan/radv_device.c
> +++ b/src/amd/vulkan/radv_device.c
> @@ -4955,3 +4955,122 @@ radv_GetDeviceGroupPeerMemoryFeatures(
> VK_PEER_MEMORY_FEATURE_GENERIC_SRC_BIT |
> VK_PEER_MEMORY_FEATURE_GENERIC_DST_BIT;
> }
> +
> +static const VkTimeDomainEXT radv_time_domains[] = {
> + VK_TIME_DOMAIN_DEVICE_EXT,
> + VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT,
> + VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT,
> +};
> +
> +VkResult radv_GetPhysicalDeviceCalibrateableTimeDomainsEXT(
> + VkPhysicalDevice physicalDevice,
> + uint32_t *pTimeDomainCount,
> + VkTimeDomainEXT *pTimeDomains)
> +{
> + int d;
> + VK_OUTARRAY_MAKE(out, pTimeDomains, pTimeDomainCount);
> +
> + for (d = 0; d < ARRAY_SIZE(radv_time_domains); d++) {
> + vk_outarray_append(&out, i) {
> + *i = radv_time_domains[d];
> + }
> + }
> +
> + return vk_outarray_status(&out);
> +}
> +
> +static uint64_t
> +radv_clock_gettime(clockid_t clock_id)
> +{
> + struct timespec current;
> + int ret;
> +
> + ret = clock_gettime(clock_id, ¤t);
> + if (ret < 0 && clock_id == CLOCK_MONOTONIC_RAW)
> + ret = clock_gettime(CLOCK_MONOTONIC, ¤t);
> + if (ret < 0)
> + return 0;
> +
> + return (uint64_t) current.tv_sec * 1000000000ULL + current.tv_nsec;
> +}
> +
> +VkResult radv_GetCalibratedTimestampsEXT(
> + VkDevice _device,
> + uint32_t timestampCount,
> + const VkCalibratedTimestampInfoEXT *pTimestampInfos,
> + uint64_t *pTimestamps,
> + uint64_t *pMaxDeviation)
> +{
> + RADV_FROM_HANDLE(radv_device, device, _device);
> + uint32_t clock_crystal_freq =
> device->physical_device->rad_info.clock_crystal_freq;
> + int d;
> + uint64_t begin, end;
> + uint64_t max_clock_period = 0;
> +
> + begin = radv_clock_gettime(CLOCK_MONOTONIC_RAW);
> +
> + for (d = 0; d < timestampCount; d++) {
> + switch (pTimestampInfos[d].timeDomain) {
> + case VK_TIME_DOMAIN_DEVICE_EXT:
> + pTimestamps[d] =
> device->ws->query_value(device->ws,
> +
> RADEON_TIMESTAMP);
> + uint64_t device_period = DIV_ROUND_UP(1000000,
> clock_crystal_freq);
> + max_clock_period = MAX2(max_clock_period,
> device_period);
> + break;
> + case VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT:
> + pTimestamps[d] =
> radv_clock_gettime(CLOCK_MONOTONIC);
> + max_clock_period = MAX2(max_clock_period, 1);
> + break;
> +
> + case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT:
> + pTimestamps[d] = begin;
> + break;
> + default:
> + pTimestamps[d] = 0;
> + break;
> + }
> + }
> +
> + end = radv_clock_gettime(CLOCK_MONOTONIC_RAW);
> +
> + /*
> + * The maximum deviation is the sum of the interval over which we
> + * perform the sampling and the maximum period of any sampled
> + * clock. That's because the maximum skew between any two sampled
> + * clock edges is when the sampled clock with the largest period
> is
> + * sampled at the end of that period but right at the beginning
> of the
> + * sampling interval and some other clock is sampled right at the
> + * begining of its sampling period and right at the end of the
> + * sampling interval. Let's assume the GPU has the longest clock
> + * period and that the application is sampling GPU and monotonic:
> + *
> + * s e
> + * w x y z 0 1 2 3 4 5 6 7 8 9 a b c d e f
> + * Raw -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
> + *
> + * g
> + * 0 1 2 3
> + * GPU -----_____-----_____-----_____-----_____
> + *
> + * m
> + * x y z 0 1 2 3 4 5 6 7
> 8 9 a b c
> + * Monotonic
> -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
> + *
> + * Interval <----------------->
> + * Deviation <-------------------------->
> + *
> + * s = read(raw) 2
> + * g = read(GPU) 1
> + * m = read(monotonic) 2
> + * e = read(raw) b
> + *
> + * We round the sample interval up by one tick to cover sampling
> error
> + * in the interval clock
> + */
> +
> + uint64_t sample_interval = end - begin + 1;
> +
> + *pMaxDeviation = sample_interval + max_clock_period;
> +
> + return VK_SUCCESS;
> +}
> diff --git a/src/amd/vulkan/radv_extensions.py
> b/src/amd/vulkan/radv_extensions.py
> index 5dcedae1c63..4c81d3f0068 100644
> --- a/src/amd/vulkan/radv_extensions.py
> +++ b/src/amd/vulkan/radv_extensions.py
> @@ -92,6 +92,7 @@ EXTENSIONS = [
> Extension('VK_KHR_display', 23,
> 'VK_USE_PLATFORM_DISPLAY_KHR'),
> Extension('VK_EXT_direct_mode_display', 1,
> 'VK_USE_PLATFORM_DISPLAY_KHR'),
> Extension('VK_EXT_acquire_xlib_display', 1,
> 'VK_USE_PLATFORM_XLIB_XRANDR_EXT'),
> + Extension('VK_EXT_calibrated_timestamps', 1, True),
> Extension('VK_EXT_conditional_rendering', 1, True),
> Extension('VK_EXT_conservative_rasterization', 1,
> 'device->rad_info.chip_class >= GFX9'),
> Extension('VK_EXT_display_surface_counter', 1,
> 'VK_USE_PLATFORM_DISPLAY_KHR'),
> diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
> index a2551452eb1..076ff3a57f6 100644
> --- a/src/intel/vulkan/anv_device.c
> +++ b/src/intel/vulkan/anv_device.c
> @@ -3021,6 +3021,133 @@ void anv_DestroyFramebuffer(
> vk_free2(&device->alloc, pAllocator, fb);
> }
>
> +static const VkTimeDomainEXT anv_time_domains[] = {
> + VK_TIME_DOMAIN_DEVICE_EXT,
> + VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT,
> + VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT,
> +};
> +
> +VkResult anv_GetPhysicalDeviceCalibrateableTimeDomainsEXT(
> + VkPhysicalDevice physicalDevice,
> + uint32_t *pTimeDomainCount,
> + VkTimeDomainEXT *pTimeDomains)
> +{
> + int d;
> + VK_OUTARRAY_MAKE(out, pTimeDomains, pTimeDomainCount);
> +
> + for (d = 0; d < ARRAY_SIZE(anv_time_domains); d++) {
> + vk_outarray_append(&out, i) {
> + *i = anv_time_domains[d];
> + }
> + }
> +
> + return vk_outarray_status(&out);
> +}
> +
> +static uint64_t
> +anv_clock_gettime(clockid_t clock_id)
> +{
> + struct timespec current;
> + int ret;
> +
> + ret = clock_gettime(clock_id, ¤t);
> + if (ret < 0 && clock_id == CLOCK_MONOTONIC_RAW)
> + ret = clock_gettime(CLOCK_MONOTONIC, ¤t);
> + if (ret < 0)
> + return 0;
> +
> + return (uint64_t) current.tv_sec * 1000000000ULL + current.tv_nsec;
> +}
> +
> +#define TIMESTAMP 0x2358
> +
> +VkResult anv_GetCalibratedTimestampsEXT(
> + VkDevice _device,
> + uint32_t timestampCount,
> + const VkCalibratedTimestampInfoEXT *pTimestampInfos,
> + uint64_t *pTimestamps,
> + uint64_t *pMaxDeviation)
> +{
> + ANV_FROM_HANDLE(anv_device, device, _device);
> + uint64_t timestamp_frequency = device->info.timestamp_frequency;
> + int ret;
> + int d;
> + uint64_t begin, end;
> + uint64_t max_clock_period = 0;
> +
> + begin = anv_clock_gettime(CLOCK_MONOTONIC_RAW);
> +
> + for (d = 0; d < timestampCount; d++) {
> + switch (pTimestampInfos[d].timeDomain) {
> + case VK_TIME_DOMAIN_DEVICE_EXT:
> + ret = anv_gem_reg_read(device, TIMESTAMP | 1,
> + &pTimestamps[d]);
> +
> + if (ret != 0) {
> + device->lost = TRUE;
> + return VK_ERROR_DEVICE_LOST;
> + }
> + uint64_t device_period = DIV_ROUND_UP(1000000000,
> timestamp_frequency);
> + max_clock_period = MAX2(max_clock_period, device_period);
> + break;
> + case VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT:
> + pTimestamps[d] = anv_clock_gettime(CLOCK_MONOTONIC);
> + max_clock_period = MAX2(max_clock_period, 1);
> + break;
> +
> + case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT:
> + pTimestamps[d] = begin;
> + break;
> + default:
> + pTimestamps[d] = 0;
> + break;
> + }
> + }
> +
> + end = anv_clock_gettime(CLOCK_MONOTONIC_RAW);
> +
> + /*
> + * The maximum deviation is the sum of the interval over which we
> + * perform the sampling and the maximum period of any sampled
> + * clock. That's because the maximum skew between any two sampled
> + * clock edges is when the sampled clock with the largest period is
> + * sampled at the end of that period but right at the beginning of the
> + * sampling interval and some other clock is sampled right at the
> + * begining of its sampling period and right at the end of the
> + * sampling interval. Let's assume the GPU has the longest clock
> + * period and that the application is sampling GPU and monotonic:
> + *
> + * s e
> + * w x y z 0 1 2 3 4 5 6 7 8 9 a b c d e f
> + * Raw -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
> + *
> + * g
> + * 0 1 2 3
> + * GPU -----_____-----_____-----_____-----_____
> + *
> + * m
> + * x y z 0 1 2 3 4 5 6 7 8 9 a b c
> + * Monotonic -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
> + *
> + * Interval <----------------->
> + * Deviation <-------------------------->
> + *
> + * s = read(raw) 2
> + * g = read(GPU) 1
> + * m = read(monotonic) 2
> + * e = read(raw) b
> + *
> + * We round the sample interval up by one tick to cover sampling error
> + * in the interval clock
> + */
> +
> + uint64_t sample_interval = end - begin + 1;
> +
> + *pMaxDeviation = sample_interval + max_clock_period;
> +
> + return VK_SUCCESS;
> +}
> +
> /* vk_icd.h does not declare this function, so we declare it here to
> * suppress Wmissing-prototypes.
> */
> diff --git a/src/intel/vulkan/anv_extensions.py
> b/src/intel/vulkan/anv_extensions.py
> index d4915c95013..a8535964da7 100644
> --- a/src/intel/vulkan/anv_extensions.py
> +++ b/src/intel/vulkan/anv_extensions.py
> @@ -126,6 +126,7 @@ EXTENSIONS = [
> Extension('VK_EXT_vertex_attribute_divisor', 3, True),
> Extension('VK_EXT_post_depth_coverage', 1,
> 'device->info.gen >= 9'),
> Extension('VK_EXT_sampler_filter_minmax', 1,
> 'device->info.gen >= 9'),
> + Extension('VK_EXT_calibrated_timestamps', 1, True),
> ]
>
> class VkVersion:
> diff --git a/src/intel/vulkan/anv_gem.c b/src/intel/vulkan/anv_gem.c
> index c43b5ef9e06..1bdf040c1a3 100644
> --- a/src/intel/vulkan/anv_gem.c
> +++ b/src/intel/vulkan/anv_gem.c
> @@ -423,6 +423,19 @@ anv_gem_fd_to_handle(struct anv_device *device, int
> fd)
> return args.handle;
> }
>
> +int
> +anv_gem_reg_read(struct anv_device *device, uint32_t offset, uint64_t
> *result)
> +{
> + struct drm_i915_reg_read args = {
> + .offset = offset
> + };
> +
> + int ret = anv_ioctl(device->fd, DRM_IOCTL_I915_REG_READ, &args);
> +
> + *result = args.val;
> + return ret;
> +}
> +
> #ifndef SYNC_IOC_MAGIC
> /* duplicated from linux/sync_file.h to avoid build-time dependency
> * on new (v4.7) kernel headers. Once distro's are mostly using
> diff --git a/src/intel/vulkan/anv_gem_stubs.c
> b/src/intel/vulkan/anv_gem_stubs.c
> index 5093bd5db1a..8cc3ad1f22e 100644
> --- a/src/intel/vulkan/anv_gem_stubs.c
> +++ b/src/intel/vulkan/anv_gem_stubs.c
> @@ -251,3 +251,10 @@ anv_gem_syncobj_wait(struct anv_device *device,
> {
> unreachable("Unused");
> }
> +
> +int
> +anv_gem_reg_read(struct anv_device *device,
> + uint32_t offset, uint64_t *result)
> +{
> + unreachable("Unused");
> +}
> diff --git a/src/intel/vulkan/anv_private.h
> b/src/intel/vulkan/anv_private.h
> index 599b903f25c..08376b00c8e 100644
> --- a/src/intel/vulkan/anv_private.h
> +++ b/src/intel/vulkan/anv_private.h
> @@ -1103,6 +1103,8 @@ int anv_gem_get_aperture(int fd, uint64_t *size);
> int anv_gem_gpu_get_reset_stats(struct anv_device *device,
> uint32_t *active, uint32_t *pending);
> int anv_gem_handle_to_fd(struct anv_device *device, uint32_t gem_handle);
> +int anv_gem_reg_read(struct anv_device *device,
> + uint32_t offset, uint64_t *result);
> uint32_t anv_gem_fd_to_handle(struct anv_device *device, int fd);
> int anv_gem_set_caching(struct anv_device *device, uint32_t gem_handle,
> uint32_t caching);
> int anv_gem_set_domain(struct anv_device *device, uint32_t gem_handle,
> --
> 2.19.1
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/dri-devel/attachments/20181017/d387ef44/attachment-0001.html>
More information about the dri-devel
mailing list