<div dir="ltr">While you're at it, would you mind making Vulkan use this as well? It should be a 2-line change to GetPhysicalDeviceProperties.<br></div><div class="gmail_extra"><br><div class="gmail_quote">On Fri, Jan 6, 2017 at 1:17 PM, Kenneth Graunke <span dir="ltr"><<a href="mailto:kenneth@whitecape.org" target="_blank">kenneth@whitecape.org</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">From: Robert Bragg <<a href="mailto:robert@sixbynine.org">robert@sixbynine.org</a>><br>
<br>
v2: (Ken) Update timebase_scale for platforms past Skylake/Broxton too.<br>
---<br>
src/intel/common/gen_device_<wbr>info.c | 13 ++++++--<br>
src/intel/common/gen_device_<wbr>info.h | 24 ++++++++++++++<br>
src/mesa/drivers/dri/i965/brw_<wbr>context.c | 15 +++++++++<br>
src/mesa/drivers/dri/i965/brw_<wbr>context.h | 3 ++<br>
src/mesa/drivers/dri/i965/brw_<wbr>queryobj.c | 53 ++++++++++++++++++++++++++++--<wbr>-<br>
src/mesa/drivers/dri/i965/<wbr>gen6_queryobj.c | 28 +++++-----------<br>
6 files changed, 109 insertions(+), 27 deletions(-)<br>
<br>
diff --git a/src/intel/common/gen_device_<wbr>info.c b/src/intel/common/gen_device_<wbr>info.c<br>
index 9bf3cd5cc42..209b293e510 100644<br>
--- a/src/intel/common/gen_device_<wbr>info.c<br>
+++ b/src/intel/common/gen_device_<wbr>info.c<br>
@@ -36,6 +36,7 @@ static const struct gen_device_info gen_device_info_i965 = {<br>
.urb = {<br>
.size = 256,<br>
},<br>
+ .timebase_scale = 80,<br>
};<br>
<br>
static const struct gen_device_info gen_device_info_g4x = {<br>
@@ -51,6 +52,7 @@ static const struct gen_device_info gen_device_info_g4x = {<br>
.urb = {<br>
.size = 384,<br>
},<br>
+ .timebase_scale = 80,<br>
};<br>
<br>
static const struct gen_device_info gen_device_info_ilk = {<br>
@@ -65,6 +67,7 @@ static const struct gen_device_info gen_device_info_ilk = {<br>
.urb = {<br>
.size = 1024,<br>
},<br>
+ .timebase_scale = 80,<br>
};<br>
<br>
static const struct gen_device_info gen_device_info_snb_gt1 = {<br>
@@ -89,6 +92,7 @@ static const struct gen_device_info gen_device_info_snb_gt1 = {<br>
[MESA_SHADER_GEOMETRY] = 256,<br>
},<br>
},<br>
+ .timebase_scale = 80,<br>
};<br>
<br>
static const struct gen_device_info gen_device_info_snb_gt2 = {<br>
@@ -113,6 +117,7 @@ static const struct gen_device_info gen_device_info_snb_gt2 = {<br>
[MESA_SHADER_GEOMETRY] = 256,<br>
},<br>
},<br>
+ .timebase_scale = 80,<br>
};<br>
<br>
#define GEN7_FEATURES \<br>
@@ -121,7 +126,8 @@ static const struct gen_device_info gen_device_info_snb_gt2 = {<br>
.must_use_separate_stencil = true, \<br>
.has_llc = true, \<br>
.has_pln = true, \<br>
- .has_surface_tile_offset = true<br>
+ .has_surface_tile_offset = true, \<br>
+ .timebase_scale = 80<br>
<br>
static const struct gen_device_info gen_device_info_ivb_gt1 = {<br>
GEN7_FEATURES, .is_ivybridge = true, .gt = 1,<br>
@@ -287,7 +293,8 @@ static const struct gen_device_info gen_device_info_hsw_gt3 = {<br>
.max_tcs_threads = 504, \<br>
.max_tes_threads = 504, \<br>
.max_gs_threads = 504, \<br>
- .max_wm_threads = 384<br>
+ .max_wm_threads = 384, \<br>
+ .timebase_scale = 80<br>
<br>
static const struct gen_device_info gen_device_info_bdw_gt1 = {<br>
GEN8_FEATURES, .gt = 1,<br>
@@ -385,6 +392,7 @@ static const struct gen_device_info gen_device_info_chv = {<br>
.max_tcs_threads = 336, \<br>
.max_tes_threads = 336, \<br>
.max_cs_threads = 56, \<br>
+ .timebase_scale = 1000000000.0 / 12000000.0, \<br>
.urb = { \<br>
.size = 384, \<br>
.min_entries = { \<br>
@@ -410,6 +418,7 @@ static const struct gen_device_info gen_device_info_chv = {<br>
.max_tes_threads = 112, \<br>
.max_gs_threads = 112, \<br>
.max_cs_threads = 6 * 6, \<br>
+ .timebase_scale = 1000000000.0 / 19200123.0, \<br>
.urb = { \<br>
.size = 192, \<br>
.min_entries = { \<br>
diff --git a/src/intel/common/gen_device_<wbr>info.h b/src/intel/common/gen_device_<wbr>info.h<br>
index f0e8750d0ea..80676d0e003 100644<br>
--- a/src/intel/common/gen_device_<wbr>info.h<br>
+++ b/src/intel/common/gen_device_<wbr>info.h<br>
@@ -147,6 +147,30 @@ struct gen_device_info<br>
*/<br>
unsigned max_entries[4];<br>
} urb;<br>
+<br>
+ /**<br>
+ * For the longest time the timestamp frequency for Gen's timestamp counter<br>
+ * could be assumed to be 12.5MHz, where the least significant bit neatly<br>
+ * corresponded to 80 nanoseconds.<br>
+ *<br>
+ * Since Gen9 the numbers aren't so round, with a a frequency of 12MHz for<br>
+ * SKL (or scale factor of 83.33333333) and a frequency of 19200123Hz for<br>
+ * BXT.<br>
+ *<br>
+ * For simplicty to fit with the current code scaling by a single constant<br>
+ * to map from raw timestamps to nanoseconds we now do the conversion in<br>
+ * floating point instead of integer arithmetic.<br>
+ *<br>
+ * In general it's probably worth noting that the documented constants we<br>
+ * have for the per-platform timestamp frequencies aren't perfect and<br>
+ * shouldn't be trusted for scaling and comparing timestamps with a large<br>
+ * delta.<br>
+ *<br>
+ * E.g. with crude testing on my system using the 'correct' scale factor I'm<br>
+ * seeing a drift of ~2 milliseconds per second.<br>
+ */<br>
+ double timebase_scale;<br>
+<br>
/** @} */<br>
};<br>
<br>
diff --git a/src/mesa/drivers/dri/i965/<wbr>brw_context.c b/src/mesa/drivers/dri/i965/<wbr>brw_context.c<br>
index f939463539c..b071d08e95d 100644<br>
--- a/src/mesa/drivers/dri/i965/<wbr>brw_context.c<br>
+++ b/src/mesa/drivers/dri/i965/<wbr>brw_context.c<br>
@@ -524,6 +524,21 @@ brw_initialize_context_<wbr>constants(struct brw_context *brw)<br>
ctx->Const.<wbr>MaxCombinedShaderOutputResourc<wbr>es =<br>
MAX_IMAGE_UNITS + BRW_MAX_DRAW_BUFFERS;<br>
<br>
+ /* The timestamp register we can read for glGetTimestamp() is<br>
+ * sometimes only 32 bits, before scaling to nanoseconds (depending<br>
+ * on kernel).<br>
+ *<br>
+ * Once scaled to nanoseconds the timestamp would roll over at a<br>
+ * non-power-of-two, so an application couldn't use<br>
+ * GL_QUERY_COUNTER_BITS to handle rollover correctly. Instead, we<br>
+ * report 36 bits and truncate at that (rolling over 5 times as<br>
+ * often as the HW counter), and when the 32-bit counter rolls<br>
+ * over, it happens to also be at a rollover in the reported value<br>
+ * from near (1<<36) to 0.<br>
+ *<br>
+ * The low 32 bits rolls over in ~343 seconds. Our 36-bit result<br>
+ * rolls over every ~69 seconds.<br>
+ */<br>
ctx->Const.QueryCounterBits.<wbr>Timestamp = 36;<br>
<br>
ctx->Const.<wbr>MaxTextureCoordUnits = 8; /* Mesa limit */<br>
diff --git a/src/mesa/drivers/dri/i965/<wbr>brw_context.h b/src/mesa/drivers/dri/i965/<wbr>brw_context.h<br>
index ff3f861a147..b6ac1a2e67f 100644<br>
--- a/src/mesa/drivers/dri/i965/<wbr>brw_context.h<br>
+++ b/src/mesa/drivers/dri/i965/<wbr>brw_context.h<br>
@@ -1300,6 +1300,9 @@ void brw_emit_query_begin(struct brw_context *brw);<br>
void brw_emit_query_end(struct brw_context *brw);<br>
void brw_query_counter(struct gl_context *ctx, struct gl_query_object *q);<br>
bool brw_is_query_pipelined(struct brw_query_object *query);<br>
+uint64_t brw_timebase_scale(struct brw_context *brw, uint64_t gpu_timestamp);<br>
+uint64_t brw_raw_timestamp_delta(struct brw_context *brw,<br>
+ uint64_t time0, uint64_t time1);<br>
<br>
/** gen6_queryobj.c */<br>
void gen6_init_queryobj_functions(<wbr>struct dd_function_table *functions);<br>
diff --git a/src/mesa/drivers/dri/i965/<wbr>brw_queryobj.c b/src/mesa/drivers/dri/i965/<wbr>brw_queryobj.c<br>
index dda17de7157..ebd5776d0aa 100644<br>
--- a/src/mesa/drivers/dri/i965/<wbr>brw_queryobj.c<br>
+++ b/src/mesa/drivers/dri/i965/<wbr>brw_queryobj.c<br>
@@ -42,6 +42,37 @@<br>
#include "brw_state.h"<br>
#include "intel_batchbuffer.h"<br>
<br>
+uint64_t<br>
+brw_timebase_scale(struct brw_context *brw, uint64_t gpu_timestamp)<br>
+{<br>
+ const struct gen_device_info *devinfo = &brw->screen->devinfo;<br>
+<br>
+ return (double)gpu_timestamp * devinfo->timebase_scale;<br>
+}<br>
+<br>
+/* As best we know currently, the Gen HW timestamps are 36bits across<br>
+ * all platforms, which we need to account for when calculating a<br>
+ * delta to measure elapsed time.<br>
+ *<br>
+ * The timestamps read via glGetTimestamp() / brw_get_timestamp() sometimes<br>
+ * only have 32bits due to a kernel bug and so in that case we make sure to<br>
+ * treat all raw timestamps as 32bits so they overflow consistently and remain<br>
+ * comparable.<br>
+ */<br>
+uint64_t<br>
+brw_raw_timestamp_delta(<wbr>struct brw_context *brw, uint64_t time0, uint64_t time1)<br>
+{<br>
+ if (brw->screen->hw_has_timestamp == 2) {<br>
+ /* Kernel clips timestamps to 32bits in this case */<br>
+ return (uint32_t)time1 - (uint32_t)time0;<br>
+ } else {<br>
+ if (time0 > time1)<br>
+ return (1ULL << 36) + time1 - time0;<br>
+ else<br>
+ return time1 - time0;<br>
+ }<br>
+}<br>
+<br>
/**<br>
* Emit PIPE_CONTROLs to write the current GPU timestamp into a buffer.<br>
*/<br>
@@ -117,12 +148,18 @@ brw_queryobj_get_results(<wbr>struct gl_context *ctx,<br>
/* The query BO contains the starting and ending timestamps.<br>
* Subtract the two and convert to nanoseconds.<br>
*/<br>
- query->Base.Result += 1000 * ((results[1] >> 32) - (results[0] >> 32));<br>
+ query->Base.Result = brw_raw_timestamp_delta(brw, results[0], results[1]);<br>
+ query->Base.Result = brw_timebase_scale(brw, query->Base.Result);<br>
break;<br>
<br>
case GL_TIMESTAMP:<br>
/* The query BO contains a single timestamp value in results[0]. */<br>
- query->Base.Result = 1000 * (results[0] >> 32);<br>
+ query->Base.Result = brw_timebase_scale(brw, results[0]);<br>
+<br>
+ /* Ensure the scaled timestamp overflows according to<br>
+ * GL_QUERY_COUNTER_BITS<br>
+ */<br>
+ query->Base.Result &= (1ull << ctx->Const.QueryCounterBits.<wbr>Timestamp) - 1;<br>
break;<br>
<br>
case GL_SAMPLES_PASSED_ARB:<br>
@@ -508,9 +545,15 @@ brw_get_timestamp(struct gl_context *ctx)<br>
break;<br>
}<br>
<br>
- /* See logic in brw_queryobj_get_results() */<br>
- result *= 80;<br>
- result &= (1ull << 36) - 1;<br>
+ /* Scale to nanosecond units */<br>
+ result = brw_timebase_scale(brw, result);<br>
+<br>
+ /* Ensure the scaled timestamp overflows according to<br>
+ * GL_QUERY_COUNTER_BITS. Technically this isn't required if<br>
+ * querying GL_TIMESTAMP via glGetInteger but it seems best to keep<br>
+ * QueryObject and GetInteger timestamps consistent.<br>
+ */<br>
+ result &= (1ull << ctx->Const.QueryCounterBits.<wbr>Timestamp) - 1;<br>
return result;<br>
}<br>
<br>
diff --git a/src/mesa/drivers/dri/i965/<wbr>gen6_queryobj.c b/src/mesa/drivers/dri/i965/<wbr>gen6_queryobj.c<br>
index bbd3c44fb0f..3f6edf1da1a 100644<br>
--- a/src/mesa/drivers/dri/i965/<wbr>gen6_queryobj.c<br>
+++ b/src/mesa/drivers/dri/i965/<wbr>gen6_queryobj.c<br>
@@ -171,30 +171,18 @@ gen6_queryobj_get_results(<wbr>struct gl_context *ctx,<br>
/* The query BO contains the starting and ending timestamps.<br>
* Subtract the two and convert to nanoseconds.<br>
*/<br>
- query->Base.Result += 80 * (results[1] - results[0]);<br>
+ query->Base.Result = brw_raw_timestamp_delta(brw, results[0], results[1]);<br>
+ query->Base.Result = brw_timebase_scale(brw, query->Base.Result);<br>
break;<br>
<br>
case GL_TIMESTAMP:<br>
- /* Our timer is a clock that increments every 80ns (regardless of<br>
- * other clock scaling in the system). The timestamp register we can<br>
- * read for glGetTimestamp() masks out the top 32 bits, so we do that<br>
- * here too to let the two counters be compared against each other.<br>
- *<br>
- * If we just multiplied that 32 bits of data by 80, it would roll<br>
- * over at a non-power-of-two, so an application couldn't use<br>
- * GL_QUERY_COUNTER_BITS to handle rollover correctly. Instead, we<br>
- * report 36 bits and truncate at that (rolling over 5 times as often<br>
- * as the HW counter), and when the 32-bit counter rolls over, it<br>
- * happens to also be at a rollover in the reported value from near<br>
- * (1<<36) to 0.<br>
- *<br>
- * The low 32 bits rolls over in ~343 seconds. Our 36-bit result<br>
- * rolls over every ~69 seconds.<br>
- *<br>
- * The query BO contains a single timestamp value in results[0].<br>
+ /* The query BO contains a single timestamp value in results[0]. */<br>
+ query->Base.Result = brw_timebase_scale(brw, results[0]);<br>
+<br>
+ /* Ensure the scaled timestamp overflows according to<br>
+ * GL_QUERY_COUNTER_BITS<br>
*/<br>
- query->Base.Result = 80 * (results[0] & 0xffffffff);<br>
- query->Base.Result &= (1ull << 36) - 1;<br>
+ query->Base.Result &= (1ull << ctx->Const.QueryCounterBits.<wbr>Timestamp) - 1;<br>
break;<br>
<br>
case GL_SAMPLES_PASSED_ARB:<br>
<span class="HOEnZb"><font color="#888888">--<br>
2.11.0<br>
<br>
______________________________<wbr>_________________<br>
mesa-dev mailing list<br>
<a href="mailto:mesa-dev@lists.freedesktop.org">mesa-dev@lists.freedesktop.org</a><br>
<a href="https://lists.freedesktop.org/mailman/listinfo/mesa-dev" rel="noreferrer" target="_blank">https://lists.freedesktop.org/<wbr>mailman/listinfo/mesa-dev</a><br>
</font></span></blockquote></div><br></div>