[Intel-gfx] [PATCH v3 1/2] drm/i915/perf: prune OA configs
Lionel Landwerlin
lionel.g.landwerlin at intel.com
Fri Jul 14 14:32:57 UTC 2017
On 14/07/17 13:20, Matthew Auld wrote:
> On 07/14, Lionel Landwerlin wrote:
>> In the following commit we'll introduce loadable userspace
>> configs. This change reworks how configurations are handled in the
>> perf driver and retains only the test configurations in kernel space.
>>
>> We now store the test config in dev_priv and resolve the id only once
>> when opening the perf stream. The OA config is then handled through a
>> pointer to the structure holding the configuration details.
>>
>> v2: Rework how test configs are handled (Lionel)
>>
>> v3: Use u32 to hold number of register (Matthew)
>>
>> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
>> ---
>> drivers/gpu/drm/i915/i915_drv.h | 44 +-
>> drivers/gpu/drm/i915/i915_oa_bdw.c | 5360 +--------------------------------
>> drivers/gpu/drm/i915/i915_oa_bdw.h | 8 +-
>> drivers/gpu/drm/i915/i915_oa_bxt.c | 2623 +---------------
>> drivers/gpu/drm/i915/i915_oa_bxt.h | 8 +-
>> drivers/gpu/drm/i915/i915_oa_chv.c | 2806 +----------------
>> drivers/gpu/drm/i915/i915_oa_chv.h | 8 +-
>> drivers/gpu/drm/i915/i915_oa_glk.c | 2535 +---------------
>> drivers/gpu/drm/i915/i915_oa_glk.h | 8 +-
>> drivers/gpu/drm/i915/i915_oa_hsw.c | 764 +----
>> drivers/gpu/drm/i915/i915_oa_hsw.h | 8 +-
>> drivers/gpu/drm/i915/i915_oa_kblgt2.c | 2971 +-----------------
>> drivers/gpu/drm/i915/i915_oa_kblgt2.h | 8 +-
>> drivers/gpu/drm/i915/i915_oa_kblgt3.c | 3020 +------------------
>> drivers/gpu/drm/i915/i915_oa_kblgt3.h | 8 +-
>> drivers/gpu/drm/i915/i915_oa_sklgt2.c | 3458 +--------------------
>> drivers/gpu/drm/i915/i915_oa_sklgt2.h | 8 +-
>> drivers/gpu/drm/i915/i915_oa_sklgt3.c | 3019 +------------------
>> drivers/gpu/drm/i915/i915_oa_sklgt3.h | 8 +-
>> drivers/gpu/drm/i915/i915_oa_sklgt4.c | 3073 +------------------
>> drivers/gpu/drm/i915/i915_oa_sklgt4.h | 8 +-
>> drivers/gpu/drm/i915/i915_perf.c | 309 +-
>> 22 files changed, 504 insertions(+), 29558 deletions(-)
> <SNIP>
>
>> -static int gen8_emit_oa_config(struct drm_i915_gem_request *req)
>> +static int gen8_emit_oa_config(struct drm_i915_gem_request *req,
>> + const struct i915_oa_config *oa_config)
>> {
>> struct drm_i915_private *dev_priv = req->i915;
>> - const struct i915_oa_reg *flex_regs = dev_priv->perf.oa.flex_regs;
>> - int n_flex_regs = dev_priv->perf.oa.flex_regs_len;
>> /* The MMIO offsets for Flex EU registers aren't contiguous */
>> u32 flex_mmio[] = {
>> i915_mmio_reg_offset(EU_PERF_CNTL0),
>> @@ -1601,11 +1594,14 @@ static int gen8_emit_oa_config(struct drm_i915_gem_request *req)
>> u32 *cs;
>> int i;
>>
>> - cs = intel_ring_begin(req, n_flex_regs * 2 + 4);
>> + if (WARN_ON(!oa_config))
>> + return -EINVAL;
>> +
>> + cs = intel_ring_begin(req, oa_config->flex_regs_len * 2 + 4);
>> if (IS_ERR(cs))
>> return PTR_ERR(cs);
>>
>> - *cs++ = MI_LOAD_REGISTER_IMM(n_flex_regs + 1);
>> + *cs++ = MI_LOAD_REGISTER_IMM(oa_config->flex_regs_len + 1);
> Something doesn't add up here, if we don't supply any flex registers, which
> seems to be the case for the oa test configs, or potentially any user provided
> config then we only reserve ring_begin(4)/IMM(1), but we still go off and write
> out the default flex values as per flex_mmio. Also we could potentially over
> reserve since we can only ever write out at most flex_mmio registers.
>
> My first thought is perhaps:
>
> int flex_max = ARRAY_SIZE(flex_mmio);
>
> GEM_BUG_ON(flex_regs_len > flex_max);
>
> cs = intel_ring_begin(req, flex_max * 2 + 4);
>
> *cs++ = MI_LOAD_REGISTER_IMM(flex_max + 1);
>
> for (i = 0; i < flex_max; i++) {
>
> for (...) {
> if (addr == mmio) {
> value = ...
> ++count;
> }
> }
> }
>
> GEM_BUG_ON(count != flex_regs_len);
Thanks for pointing this out, there is indeed a problem.
>
>>
>> *cs++ = i915_mmio_reg_offset(GEN8_OACTXCONTROL);
>> *cs++ = (dev_priv->perf.oa.period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
>> @@ -1624,9 +1620,9 @@ static int gen8_emit_oa_config(struct drm_i915_gem_request *req)
>> u32 value = 0;
>> int j;
>>
>> - for (j = 0; j < n_flex_regs; j++) {
>> - if (i915_mmio_reg_offset(flex_regs[j].addr) == mmio) {
>> - value = flex_regs[j].value;
>> + for (j = 0; j < oa_config->flex_regs_len; j++) {
>> + if (i915_mmio_reg_offset(oa_config->flex_regs[j].addr) == mmio) {
>> + value = oa_config->flex_regs[j].value;
>> break;
>> }
>> }
>> @@ -1641,7 +1637,8 @@ static int gen8_emit_oa_config(struct drm_i915_gem_request *req)
>> return 0;
>> }
>>
>> -static int gen8_switch_to_updated_kernel_context(struct drm_i915_private *dev_priv)
>> +static int gen8_switch_to_updated_kernel_context(struct drm_i915_private *dev_priv,
>> + const struct i915_oa_config *oa_config)
>> {
>> struct intel_engine_cs *engine = dev_priv->engine[RCS];
>> struct i915_gem_timeline *timeline;
>> @@ -1656,10 +1653,12 @@ static int gen8_switch_to_updated_kernel_context(struct drm_i915_private *dev_pr
>> if (IS_ERR(req))
>> return PTR_ERR(req);
>>
>> - ret = gen8_emit_oa_config(req);
>> - if (ret) {
>> - i915_add_request(req);
>> - return ret;
>> + if (oa_config) {
>> + ret = gen8_emit_oa_config(req, oa_config);
>> + if (ret) {
>> + i915_add_request(req);
>> + return ret;
>> + }
>> }
>>
>> /* Queue this switch after all other activity */
>> @@ -1707,6 +1706,7 @@ static int gen8_switch_to_updated_kernel_context(struct drm_i915_private *dev_pr
>> * Note: it's only the RCS/Render context that has any OA state.
>> */
>> static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
>> + const struct i915_oa_config *oa_config,
>> bool interruptible)
>> {
>> struct i915_gem_context *ctx;
>> @@ -1724,7 +1724,7 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
>> }
>>
>> /* Switch away from any user context. */
>> - ret = gen8_switch_to_updated_kernel_context(dev_priv);
>> + ret = gen8_switch_to_updated_kernel_context(dev_priv, oa_config);
>> if (ret)
>> goto out;
>>
>> @@ -1763,7 +1763,8 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
>> ce->state->obj->mm.dirty = true;
>> regs += LRC_STATE_PN * PAGE_SIZE / sizeof(*regs);
>>
>> - gen8_update_reg_state_unlocked(ctx, regs);
>> + if (oa_config)
>> + gen8_update_reg_state_unlocked(ctx, regs, oa_config);
>>
>> i915_gem_object_unpin_map(ce->state->obj);
>> }
>> @@ -1774,13 +1775,10 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
>> return ret;
>> }
>>
>> -static int gen8_enable_metric_set(struct drm_i915_private *dev_priv)
>> +static int gen8_enable_metric_set(struct drm_i915_private *dev_priv,
>> + const struct i915_oa_config *oa_config)
>> {
>> - int ret = dev_priv->perf.oa.ops.select_metric_set(dev_priv);
>> - int i;
>> -
>> - if (ret)
>> - return ret;
>> + int ret;
>>
>> /*
>> * We disable slice/unslice clock ratio change reports on SKL since
>> @@ -1817,19 +1815,18 @@ static int gen8_enable_metric_set(struct drm_i915_private *dev_priv)
>> * to make sure all slices/subslices are ON before writing to NOA
>> * registers.
>> */
>> - ret = gen8_configure_all_contexts(dev_priv, true);
>> + ret = gen8_configure_all_contexts(dev_priv, oa_config, true);
>> if (ret)
>> return ret;
>>
>> I915_WRITE(GDT_CHICKEN_BITS, 0xA0);
>> - for (i = 0; i < dev_priv->perf.oa.n_mux_configs; i++) {
>> - config_oa_regs(dev_priv, dev_priv->perf.oa.mux_regs[i],
>> - dev_priv->perf.oa.mux_regs_lens[i]);
>> - }
>> +
>> + config_oa_regs(dev_priv, oa_config->mux_regs, oa_config->mux_regs_len);
>> +
>> I915_WRITE(GDT_CHICKEN_BITS, 0x80);
>>
>> - config_oa_regs(dev_priv, dev_priv->perf.oa.b_counter_regs,
>> - dev_priv->perf.oa.b_counter_regs_len);
>> + config_oa_regs(dev_priv, oa_config->b_counter_regs,
>> + oa_config->b_counter_regs_len);
>>
>> return 0;
>> }
>> @@ -1837,7 +1834,7 @@ static int gen8_enable_metric_set(struct drm_i915_private *dev_priv)
>> static void gen8_disable_metric_set(struct drm_i915_private *dev_priv)
>> {
>> /* Reset all contexts' slices/subslices configurations. */
>> - gen8_configure_all_contexts(dev_priv, false);
>> + gen8_configure_all_contexts(dev_priv, NULL, false);
>> }
>>
>> static void gen7_oa_enable(struct drm_i915_private *dev_priv)
>> @@ -2011,8 +2008,8 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
>> return -EBUSY;
>> }
>>
>> - if (!props->metrics_set) {
>> - DRM_DEBUG("OA metric set not specified\n");
>> + if (!props->oa_config) {
>> + DRM_DEBUG("OA configuration set not found/specified\n");
>> return -EINVAL;
>> }
>>
>> @@ -2055,7 +2052,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
>> dev_priv->perf.oa.oa_buffer.format =
>> dev_priv->perf.oa.oa_formats[props->oa_format].format;
>>
>> - dev_priv->perf.oa.metrics_set = props->metrics_set;
>> + stream->oa_config = props->oa_config;
>>
>> dev_priv->perf.oa.periodic = props->oa_periodic;
>> if (dev_priv->perf.oa.periodic)
>> @@ -2086,7 +2083,8 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
>> if (ret)
>> goto err_oa_buf_alloc;
>>
>> - ret = dev_priv->perf.oa.ops.enable_metric_set(dev_priv);
>> + ret = dev_priv->perf.oa.ops.enable_metric_set(dev_priv,
>> + stream->oa_config);
>> if (ret)
>> goto err_enable;
>>
>> @@ -2113,6 +2111,7 @@ void i915_oa_init_reg_state(struct intel_engine_cs *engine,
>> u32 *reg_state)
>> {
>> struct drm_i915_private *dev_priv = engine->i915;
>> + struct i915_oa_config *oa_config = dev_priv->perf.oa.current_config;
> I'm confused, isn't current_config always NULL, where do we set it exactly?
> Maybe I'm missing something. Also why not just perf.exclusive_stream->oa_config,
> is that not the current config?
>
>
Oh dear, screwed up my rebase...
current_config should have been removed.
More information about the Intel-gfx
mailing list