[Intel-gfx] [PATCH v15 13/14] drm/i915/perf: reprogram NOA muxes at the beginning of each workload
Lionel Landwerlin
lionel.g.landwerlin at intel.com
Wed May 31 12:33:54 UTC 2017
Dynamic slices/subslices shutdown will effectivelly loose the NOA
configuration uploaded in the slices/subslices. When i915 perf is in
use, we therefore need to reprogram it.
v2: Make sure we handle configs with more register writes than the max
MI_LOAD_REGISTER_IMM can do (Lionel)
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
---
drivers/gpu/drm/i915/i915_drv.h | 2 +
drivers/gpu/drm/i915/i915_perf.c | 77 +++++++++++++++++++++++++++++++++
drivers/gpu/drm/i915/intel_ringbuffer.c | 3 ++
3 files changed, 82 insertions(+)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index c3acb0e9eb5d..499b2f9aa4be 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2400,6 +2400,7 @@ struct drm_i915_private {
const struct i915_oa_reg *mux_regs[6];
int mux_regs_lens[6];
int n_mux_configs;
+ int total_n_mux_regs;
const struct i915_oa_reg *b_counter_regs;
int b_counter_regs_len;
@@ -3535,6 +3536,7 @@ int i915_perf_open_ioctl(struct drm_device *dev, void *data,
void i915_oa_init_reg_state(struct intel_engine_cs *engine,
struct i915_gem_context *ctx,
uint32_t *reg_state);
+int i915_oa_emit_noa_config_locked(struct drm_i915_gem_request *req);
/* i915_gem_evict.c */
int __must_check i915_gem_evict_something(struct i915_address_space *vm,
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index c281847eb56b..4229c74baa22 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1438,11 +1438,24 @@ static void config_oa_regs(struct drm_i915_private *dev_priv,
}
}
+static void count_total_mux_regs(struct drm_i915_private *dev_priv)
+{
+ int i;
+
+ dev_priv->perf.oa.total_n_mux_regs = 0;
+ for (i = 0; i < dev_priv->perf.oa.n_mux_configs; i++) {
+ dev_priv->perf.oa.total_n_mux_regs +=
+ dev_priv->perf.oa.mux_regs_lens[i];
+ }
+}
+
static int hsw_enable_metric_set(struct drm_i915_private *dev_priv)
{
int ret = i915_oa_select_metric_set_hsw(dev_priv);
int i;
+ count_total_mux_regs(dev_priv);
+
if (ret)
return ret;
@@ -1756,6 +1769,8 @@ static int gen8_enable_metric_set(struct drm_i915_private *dev_priv)
int ret = dev_priv->perf.oa.ops.select_metric_set(dev_priv);
int i;
+ count_total_mux_regs(dev_priv);
+
if (ret)
return ret;
@@ -2094,6 +2109,68 @@ void i915_oa_init_reg_state(struct intel_engine_cs *engine,
gen8_update_reg_state_unlocked(ctx, reg_state);
}
+int i915_oa_emit_noa_config_locked(struct drm_i915_gem_request *req)
+{
+ struct drm_i915_private *dev_priv = req->i915;
+ int max_loads = 125;
+ int n_load, n_registers, n_loaded_register;
+ int i, j;
+ u32 *cs;
+
+ lockdep_assert_held(&dev_priv->drm.struct_mutex);
+
+ if (!IS_GEN(dev_priv, 8, 9))
+ return 0;
+
+ /* Perf not supported or not enabled. */
+ if (!dev_priv->perf.initialized ||
+ !dev_priv->perf.oa.exclusive_stream)
+ return 0;
+
+ n_registers = dev_priv->perf.oa.total_n_mux_regs;
+ n_load = (n_registers / max_loads) +
+ (n_registers % max_loads) == 0;
+
+ cs = intel_ring_begin(req,
+ 3 * 2 + /* MI_LOAD_REGISTER_IMM for chicken registers */
+ n_load + /* MI_LOAD_REGISTER_IMM for mux registers */
+ n_registers * 2 + /* offset & value for mux registers*/
+ 1 /* NOOP */);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ *cs++ = MI_LOAD_REGISTER_IMM(1);
+ *cs++ = i915_mmio_reg_offset(GDT_CHICKEN_BITS);
+ *cs++ = 0xA0;
+
+ n_loaded_register = 0;
+ for (i = 0; i < dev_priv->perf.oa.n_mux_configs; i++) {
+ const struct i915_oa_reg *mux_regs =
+ dev_priv->perf.oa.mux_regs[i];
+ const int mux_regs_len = dev_priv->perf.oa.mux_regs_lens[i];
+
+ for (j = 0; j < mux_regs_len; j++) {
+ if ((n_loaded_register % max_loads) == 0) {
+ n_load = min(n_registers - n_loaded_register, max_loads);
+ *cs++ = MI_LOAD_REGISTER_IMM(n_load);
+ }
+
+ *cs++ = i915_mmio_reg_offset(mux_regs[j].addr);
+ *cs++ = mux_regs[j].value;
+ n_loaded_register++;
+ }
+ }
+
+ *cs++ = MI_LOAD_REGISTER_IMM(1);
+ *cs++ = i915_mmio_reg_offset(GDT_CHICKEN_BITS);
+ *cs++ = 0x80;
+
+ *cs++ = MI_NOOP;
+ intel_ring_advance(req, cs);
+
+ return 0;
+}
+
/**
* i915_perf_read_locked - &i915_perf_stream_ops->read with error normalisation
* @stream: An i915 perf stream
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index acd1da9b62a3..67aaaebb194b 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1874,6 +1874,9 @@ gen8_emit_bb_start(struct drm_i915_gem_request *req,
!(dispatch_flags & I915_DISPATCH_SECURE);
u32 *cs;
+ /* Emit NOA config */
+ i915_oa_emit_noa_config_locked(req);
+
cs = intel_ring_begin(req, 4);
if (IS_ERR(cs))
return PTR_ERR(cs);
--
2.11.0
More information about the Intel-gfx
mailing list