[Intel-gfx] [PATCH v4 14/15] drm/i915/perf: per-gen timebase for checking sample freq
Robert Bragg
robert at sixbynine.org
Wed Apr 12 15:55:55 UTC 2017
An oa_exponent_to_ns() utility and per-gen timebase constants where
recently removed when updating the tail pointer race condition WA, and
this restores those so we can update the _PROP_OA_EXPONENT validation
done in read_properties_unlocked() to not assume we have a 12.5MHz
timebase as we did for Haswell.
Accordingly the oa_sample_rate_hard_limit value that's referenced by
proc_dointvec_minmax defining the absolute limit for the OA sampling
frequency is now initialized to (timestamp_frequency / 2) instead of the
6.25MHz constant for Haswell.
v2:
Specify frequency of 19.2MHz for BXT (Ville)
Initialize oa_sample_rate_hard_limit per-gen too (Lionel)
Signed-off-by: Robert Bragg <robert at sixbynine.org>
Cc: Lionel Landwerlin <lionel.g.landwerlin at linux.intel.com>
Cc: Ville Syrjälä <ville.syrjala at linux.intel.com>
Reviewed-by: Matthew Auld <matthew.auld at intel.com>
Acked-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
---
drivers/gpu/drm/i915/i915_drv.h | 1 +
drivers/gpu/drm/i915/i915_perf.c | 37 ++++++++++++++++++++++++++-----------
2 files changed, 27 insertions(+), 11 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index b8dcf281db53..59dcce3b40a9 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2457,6 +2457,7 @@ struct drm_i915_private {
bool periodic;
int period_exponent;
+ int timestamp_frequency;
int metrics_set;
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 611f996bece7..5de8d57e0b77 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -288,10 +288,12 @@ static u32 i915_perf_stream_paranoid = true;
/* For sysctl proc_dointvec_minmax of i915_oa_max_sample_rate
*
- * 160ns is the smallest sampling period we can theoretically program the OA
- * unit with on Haswell, corresponding to 6.25MHz.
+ * The highest sampling frequency we can theoretically program the OA unit
+ * with is always half the timestamp frequency: E.g. 6.25Mhz for Haswell.
+ *
+ * Initialized just before we register the sysctl parameter.
*/
-static int oa_sample_rate_hard_limit = 6250000;
+static int oa_sample_rate_hard_limit;
/* Theoretically we can program the OA unit to sample every 160ns but don't
* allow that by default unless root...
@@ -2560,6 +2562,12 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv,
return ret;
}
+static u64 oa_exponent_to_ns(struct drm_i915_private *dev_priv, int exponent)
+{
+ return div_u64(1000000000ULL * (2ULL << exponent),
+ dev_priv->perf.oa.timestamp_frequency);
+}
+
/**
* read_properties_unlocked - validate + copy userspace stream open properties
* @dev_priv: i915 device instance
@@ -2656,16 +2664,13 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv,
}
/* Theoretically we can program the OA unit to sample
- * every 160ns but don't allow that by default unless
- * root.
- *
- * On Haswell the period is derived from the exponent
- * as:
- *
- * period = 80ns * 2^(exponent + 1)
+ * e.g. every 160ns for HSW, 167ns for BDW/SKL or 104ns
+ * for BXT. We don't allow such high sampling
+ * frequencies by default unless root.
*/
+
BUILD_BUG_ON(sizeof(oa_period) != 8);
- oa_period = 80ull * (2ull << value);
+ oa_period = oa_exponent_to_ns(dev_priv, value);
/* This check is primarily to ensure that oa_period <=
* UINT32_MAX (before passing to do_div which only
@@ -2921,6 +2926,8 @@ void i915_perf_init(struct drm_i915_private *dev_priv)
dev_priv->perf.oa.ops.oa_hw_tail_read =
gen7_oa_hw_tail_read;
+ dev_priv->perf.oa.timestamp_frequency = 12500000;
+
dev_priv->perf.oa.oa_formats = hsw_oa_formats;
dev_priv->perf.oa.n_builtin_sets =
@@ -2934,6 +2941,8 @@ void i915_perf_init(struct drm_i915_private *dev_priv)
*/
if (IS_GEN8(dev_priv)) {
+ dev_priv->perf.oa.timestamp_frequency = 12500000;
+
dev_priv->perf.oa.ctx_oactxctrl_offset = 0x120;
dev_priv->perf.oa.ctx_flexeu0_offset = 0x2ce;
dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<25);
@@ -2950,6 +2959,8 @@ void i915_perf_init(struct drm_i915_private *dev_priv)
i915_oa_select_metric_set_chv;
}
} else if (IS_GEN9(dev_priv)) {
+ dev_priv->perf.oa.timestamp_frequency = 12000000;
+
dev_priv->perf.oa.ctx_oactxctrl_offset = 0x128;
dev_priv->perf.oa.ctx_flexeu0_offset = 0x3de;
dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<16);
@@ -2970,6 +2981,8 @@ void i915_perf_init(struct drm_i915_private *dev_priv)
dev_priv->perf.oa.ops.select_metric_set =
i915_oa_select_metric_set_sklgt4;
} else if (IS_BROXTON(dev_priv)) {
+ dev_priv->perf.oa.timestamp_frequency = 19200000;
+
dev_priv->perf.oa.n_builtin_sets =
i915_oa_n_builtin_metric_sets_bxt;
dev_priv->perf.oa.ops.select_metric_set =
@@ -3004,6 +3017,8 @@ void i915_perf_init(struct drm_i915_private *dev_priv)
spin_lock_init(&dev_priv->perf.hook_lock);
spin_lock_init(&dev_priv->perf.oa.oa_buffer.ptr_lock);
+ oa_sample_rate_hard_limit =
+ dev_priv->perf.oa.timestamp_frequency / 2;
dev_priv->perf.sysctl_header = register_sysctl_table(dev_root);
dev_priv->perf.initialized = true;
--
2.12.0
More information about the Intel-gfx
mailing list