[Intel-gfx] [PATCH v16 15/17] drm/i915/perf: allow NOA muxes reprogramming before workloads
Lionel Landwerlin
lionel.g.landwerlin at intel.com
Mon Jun 5 14:48:56 UTC 2017
Dynamic slices/subslices shutdown will effectivelly loose the NOA
configuration uploaded in the slices/subslices.
Here we introduce a new parameter to configure the i915 perf driver
when userspace wants to monitor parts of the GPU within
slices/subslices and it knows that some of the workloads running on
the system will disable slices/subslices.
This new configuration option will force the i915 driver to reemit NOA
configurations between context switches.
v2: Make sure we handle configs with more register writes than the max
MI_LOAD_REGISTER_IMM can do (Lionel)
v3: Introduce new parameter to the perf driver to make reprogramming
optional (Lionel)
Fix off by one calculation of number of required
MI_LOAD_REGISTER_IMM (Lionel)
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
---
drivers/gpu/drm/i915/i915_drv.h | 15 +++++
drivers/gpu/drm/i915/i915_perf.c | 100 ++++++++++++++++++++++++++++++++
drivers/gpu/drm/i915/intel_ringbuffer.c | 3 +
include/uapi/drm/i915_drm.h | 10 ++++
4 files changed, 128 insertions(+)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index d53419e9bfa2..7ef1908e3a98 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1964,6 +1964,12 @@ struct i915_perf_stream {
struct i915_gem_context *ctx;
/**
+ * @noa_restore: Whether the user who opened the stream requested NOA
+ * config to be restored between context switches.
+ */
+ bool noa_restore;
+
+ /**
* @enabled: Whether the stream is currently enabled, considering
* whether the stream was opened in a disabled state and based
* on `I915_PERF_IOCTL_ENABLE` and `I915_PERF_IOCTL_DISABLE` calls.
@@ -2401,6 +2407,7 @@ struct drm_i915_private {
const struct i915_oa_reg *mux_regs[6];
int mux_regs_lens[6];
int n_mux_configs;
+ int total_n_mux_regs;
const struct i915_oa_reg *b_counter_regs;
int b_counter_regs_len;
@@ -2493,6 +2500,13 @@ struct drm_i915_private {
struct i915_oa_ops ops;
const struct i915_oa_format *oa_formats;
int n_builtin_sets;
+
+ /**
+ * Whether the user has requested the NOA
+ * configuration to be reprogrammed between context
+ * switches.
+ */
+ atomic_t noa_restore;
} oa;
} perf;
@@ -3543,6 +3557,7 @@ int i915_perf_open_ioctl(struct drm_device *dev, void *data,
void i915_oa_init_reg_state(struct intel_engine_cs *engine,
struct i915_gem_context *ctx,
uint32_t *reg_state);
+int i915_oa_emit_noa_config_locked(struct drm_i915_gem_request *req);
/* i915_gem_evict.c */
int __must_check i915_gem_evict_something(struct i915_address_space *vm,
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 5222dac1ee5b..3f49ce69641b 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -350,6 +350,8 @@ struct perf_open_properties {
u64 single_context:1;
u64 ctx_handle;
+ bool noa_restore;
+
/* OA sampling state */
int metrics_set;
int oa_format;
@@ -1451,11 +1453,24 @@ static void config_oa_regs(struct drm_i915_private *dev_priv,
}
}
+static void count_total_mux_regs(struct drm_i915_private *dev_priv)
+{
+ int i;
+
+ dev_priv->perf.oa.total_n_mux_regs = 0;
+ for (i = 0; i < dev_priv->perf.oa.n_mux_configs; i++) {
+ dev_priv->perf.oa.total_n_mux_regs +=
+ dev_priv->perf.oa.mux_regs_lens[i];
+ }
+}
+
static int hsw_enable_metric_set(struct drm_i915_private *dev_priv)
{
int ret = i915_oa_select_metric_set_hsw(dev_priv);
int i;
+ count_total_mux_regs(dev_priv);
+
if (ret)
return ret;
@@ -1777,6 +1792,8 @@ static int gen8_enable_metric_set(struct drm_i915_private *dev_priv)
int ret = dev_priv->perf.oa.ops.select_metric_set(dev_priv);
int i;
+ count_total_mux_regs(dev_priv);
+
if (ret)
return ret;
@@ -2065,6 +2082,11 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
return ret;
}
+ if (props->noa_restore) {
+ stream->noa_restore = true;
+ atomic_inc(&dev_priv->perf.oa.noa_restore);
+ }
+
ret = alloc_oa_buffer(dev_priv);
if (ret)
goto err_oa_buf_alloc;
@@ -2121,6 +2143,74 @@ void i915_oa_init_reg_state(struct intel_engine_cs *engine,
gen8_update_reg_state_unlocked(ctx, reg_state);
}
+int i915_oa_emit_noa_config_locked(struct drm_i915_gem_request *req)
+{
+ struct drm_i915_private *dev_priv = req->i915;
+ int max_load = 125;
+ int n_lri, n_registers, n_loaded_register;
+ int i, j;
+ u32 *cs;
+
+ lockdep_assert_held(&dev_priv->drm.struct_mutex);
+
+ /* Perf not supported. */
+ if (!dev_priv->perf.initialized)
+ return 0;
+
+ /*
+ * We do not expect dynamic slice/subslice configuration to change
+ * across contexts prior to Gen8.
+ */
+ if (INTEL_GEN(dev_priv) < 8)
+ return 0;
+
+ /* Has the user requested that NOA configuration be restored? */
+ if (!atomic_read(&dev_priv->perf.oa.noa_restore))
+ return 0;
+
+ n_registers = dev_priv->perf.oa.total_n_mux_regs;
+ n_lri = (n_registers / max_load) + (n_registers % max_load) != 0;
+
+ cs = intel_ring_begin(req,
+ 3 * 2 + /* MI_LOAD_REGISTER_IMM for chicken registers */
+ n_lri + /* MI_LOAD_REGISTER_IMM for mux registers */
+ n_registers * 2 + /* offset & value for mux registers*/
+ 1 /* NOOP */);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ *cs++ = MI_LOAD_REGISTER_IMM(1);
+ *cs++ = i915_mmio_reg_offset(GDT_CHICKEN_BITS);
+ *cs++ = 0xA0;
+
+ n_loaded_register = 0;
+ for (i = 0; i < dev_priv->perf.oa.n_mux_configs; i++) {
+ const struct i915_oa_reg *mux_regs =
+ dev_priv->perf.oa.mux_regs[i];
+ const int mux_regs_len = dev_priv->perf.oa.mux_regs_lens[i];
+
+ for (j = 0; j < mux_regs_len; j++) {
+ if ((n_loaded_register % max_load) == 0) {
+ n_lri = min(n_registers - n_loaded_register, max_load);
+ *cs++ = MI_LOAD_REGISTER_IMM(n_lri);
+ }
+
+ *cs++ = i915_mmio_reg_offset(mux_regs[j].addr);
+ *cs++ = mux_regs[j].value;
+ n_loaded_register++;
+ }
+ }
+
+ *cs++ = MI_LOAD_REGISTER_IMM(1);
+ *cs++ = i915_mmio_reg_offset(GDT_CHICKEN_BITS);
+ *cs++ = 0x80;
+
+ *cs++ = MI_NOOP;
+ intel_ring_advance(req, cs);
+
+ return 0;
+}
+
/**
* i915_perf_read_locked - &i915_perf_stream_ops->read with error normalisation
* @stream: An i915 perf stream
@@ -2433,9 +2523,14 @@ static long i915_perf_ioctl(struct file *file,
*/
static void i915_perf_destroy_locked(struct i915_perf_stream *stream)
{
+ struct drm_i915_private *dev_priv = stream->dev_priv;
+
if (stream->enabled)
i915_perf_disable_locked(stream);
+ if (stream->noa_restore)
+ atomic_dec(&dev_priv->perf.oa.noa_restore);
+
if (stream->ops->destroy)
stream->ops->destroy(stream);
@@ -2769,6 +2864,9 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv,
props->oa_periodic = true;
props->oa_period_exponent = value;
break;
+ case DRM_I915_PERF_PROP_NOA_RESTORE:
+ props->noa_restore = true;
+ break;
case DRM_I915_PERF_PROP_MAX:
MISSING_CASE(id);
return -EINVAL;
@@ -3129,6 +3227,8 @@ void i915_perf_init(struct drm_i915_private *dev_priv)
mutex_init(&dev_priv->perf.lock);
spin_lock_init(&dev_priv->perf.oa.oa_buffer.ptr_lock);
+ atomic_set(&dev_priv->perf.oa.noa_restore, 0);
+
oa_sample_rate_hard_limit =
dev_priv->perf.oa.timestamp_frequency / 2;
dev_priv->perf.sysctl_header = register_sysctl_table(dev_root);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index acd1da9b62a3..67aaaebb194b 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1874,6 +1874,9 @@ gen8_emit_bb_start(struct drm_i915_gem_request *req,
!(dispatch_flags & I915_DISPATCH_SECURE);
u32 *cs;
+ /* Emit NOA config */
+ i915_oa_emit_noa_config_locked(req);
+
cs = intel_ring_begin(req, 4);
if (IS_ERR(cs))
return PTR_ERR(cs);
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 15bc9f78ba4d..27cc72a85d07 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1366,6 +1366,16 @@ enum drm_i915_perf_property_id {
*/
DRM_I915_PERF_PROP_OA_EXPONENT,
+ /**
+ * Specifying this property will alterate the behavior of the i915
+ * driver, by forcing the driver to restore the NOA configuration of
+ * the OA unit between context switches. You should only use this flag
+ * if you think that that at least one application running on the
+ * system is using a SSEU configurations with disabled
+ * slices/subslices.
+ */
+ DRM_I915_PERF_PROP_NOA_RESTORE,
+
DRM_I915_PERF_PROP_MAX /* non-ABI */
};
--
2.11.0
More information about the Intel-gfx
mailing list