[Intel-gfx] [PATCH v5 1/4] drm/i915/perf: break OA config buffer object in 2
Lionel Landwerlin
lionel.g.landwerlin at intel.com
Tue Apr 14 09:21:00 UTC 2020
Yeah it's only filtering.
We need to pin the context to a particular HW tag so that filtering can
work properly.
-Lionel
On 14/04/2020 02:24, Umesh Nerlige Ramappa wrote:
> Hi Lionel,
>
> What's the implication of using separate contexts for 3d and compute
> on perf OA? Is it only context-filtering? If so, have you considered
> disabling context filtering with a parameter instead of actually
> filtering for specific contexts? Is this privileged use case?
>
> Thanks,
> Umesh
>
> On Thu, Apr 09, 2020 at 12:17:03PM +0300, Lionel Landwerlin wrote:
>> We want to enable performance monitoring on multiple contexts to cover
>> the Iris use case of using 2 GEM contexts (3D & compute).
>>
>> So start by breaking the OA configuration BO which contains global &
>> per context register writes.
>>
>> NOA muxes & OA configurations are global, while FLEXEU register
>> configurations are per context.
>>
>> v2: Use an offset into the same VMA (Chris)
>>
>> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
>> ---
>> drivers/gpu/drm/i915/i915_perf.c | 176 ++++++++++++++++++++-----------
>> 1 file changed, 116 insertions(+), 60 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_perf.c
>> b/drivers/gpu/drm/i915/i915_perf.c
>> index 5cde3e4e7be6..d2183fd701a3 100644
>> --- a/drivers/gpu/drm/i915/i915_perf.c
>> +++ b/drivers/gpu/drm/i915/i915_perf.c
>> @@ -372,6 +372,7 @@ struct i915_oa_config_bo {
>>
>> struct i915_oa_config *oa_config;
>> struct i915_vma *vma;
>> + u32 per_context_offset;
>> };
>>
>> static struct ctl_table_header *sysctl_header;
>> @@ -1826,37 +1827,43 @@ static struct i915_oa_config_bo *
>> alloc_oa_config_buffer(struct i915_perf_stream *stream,
>> struct i915_oa_config *oa_config)
>> {
>> - struct drm_i915_gem_object *obj;
>> struct i915_oa_config_bo *oa_bo;
>> + struct drm_i915_gem_object *obj;
>> size_t config_length = 0;
>> - u32 *cs;
>> + u32 *cs_start, *cs;
>> int err;
>>
>> oa_bo = kzalloc(sizeof(*oa_bo), GFP_KERNEL);
>> if (!oa_bo)
>> return ERR_PTR(-ENOMEM);
>>
>> + /*
>> + * Global configuration requires a jump into the NOA wait BO for
>> it to
>> + * apply.
>> + */
>> config_length += num_lri_dwords(oa_config->mux_regs_len);
>> config_length += num_lri_dwords(oa_config->b_counter_regs_len);
>> - config_length += num_lri_dwords(oa_config->flex_regs_len);
>> config_length += 3; /* MI_BATCH_BUFFER_START */
>> +
>> + config_length += num_lri_dwords(oa_config->flex_regs_len);
>> + config_length += 1 /* MI_BATCH_BUFFER_END */;
>> +
>> config_length = ALIGN(sizeof(u32) * config_length,
>> I915_GTT_PAGE_SIZE);
>>
>> - obj = i915_gem_object_create_shmem(stream->perf->i915,
>> config_length);
>> + obj = i915_gem_object_create_shmem(stream->perf->i915,
>> + config_length);
>> if (IS_ERR(obj)) {
>> err = PTR_ERR(obj);
>> goto err_free;
>> }
>>
>> - cs = i915_gem_object_pin_map(obj, I915_MAP_WB);
>> - if (IS_ERR(cs)) {
>> - err = PTR_ERR(cs);
>> - goto err_oa_bo;
>> + cs_start = i915_gem_object_pin_map(obj, I915_MAP_WB);
>> + if (IS_ERR(cs_start)) {
>> + err = PTR_ERR(cs_start);
>> + goto err_bo;
>> }
>>
>> - cs = write_cs_mi_lri(cs,
>> - oa_config->mux_regs,
>> - oa_config->mux_regs_len);
>> + cs = cs_start;
>> cs = write_cs_mi_lri(cs,
>> oa_config->b_counter_regs,
>> oa_config->b_counter_regs_len);
>> @@ -1871,6 +1878,14 @@ alloc_oa_config_buffer(struct i915_perf_stream
>> *stream,
>> *cs++ = i915_ggtt_offset(stream->noa_wait);
>> *cs++ = 0;
>>
>> + oa_bo->per_context_offset = 4 * (cs - cs_start);
>> +
>> + cs = write_cs_mi_lri(cs,
>> + oa_config->mux_regs,
>> + oa_config->mux_regs_len);
>> +
>> + *cs++ = MI_BATCH_BUFFER_END;
>> +
>> i915_gem_object_flush_map(obj);
>> i915_gem_object_unpin_map(obj);
>>
>> @@ -1879,7 +1894,7 @@ alloc_oa_config_buffer(struct i915_perf_stream
>> *stream,
>> NULL);
>> if (IS_ERR(oa_bo->vma)) {
>> err = PTR_ERR(oa_bo->vma);
>> - goto err_oa_bo;
>> + goto err_bo;
>> }
>>
>> oa_bo->oa_config = i915_oa_config_get(oa_config);
>> @@ -1887,15 +1902,15 @@ alloc_oa_config_buffer(struct
>> i915_perf_stream *stream,
>>
>> return oa_bo;
>>
>> -err_oa_bo:
>> +err_bo:
>> i915_gem_object_put(obj);
>> err_free:
>> kfree(oa_bo);
>> return ERR_PTR(err);
>> }
>>
>> -static struct i915_vma *
>> -get_oa_vma(struct i915_perf_stream *stream, struct i915_oa_config
>> *oa_config)
>> +static struct i915_oa_config_bo *
>> +get_oa_bo(struct i915_perf_stream *stream, struct i915_oa_config
>> *oa_config)
>> {
>> struct i915_oa_config_bo *oa_bo;
>>
>> @@ -1908,34 +1923,31 @@ get_oa_vma(struct i915_perf_stream *stream,
>> struct i915_oa_config *oa_config)
>> memcmp(oa_bo->oa_config->uuid,
>> oa_config->uuid,
>> sizeof(oa_config->uuid)) == 0)
>> - goto out;
>> + return oa_bo;
>> }
>>
>> - oa_bo = alloc_oa_config_buffer(stream, oa_config);
>> - if (IS_ERR(oa_bo))
>> - return ERR_CAST(oa_bo);
>> -
>> -out:
>> - return i915_vma_get(oa_bo->vma);
>> + return alloc_oa_config_buffer(stream, oa_config);
>> }
>>
>> static int
>> emit_oa_config(struct i915_perf_stream *stream,
>> struct i915_oa_config *oa_config,
>> struct intel_context *ce,
>> - struct i915_active *active)
>> + struct i915_active *active,
>> + bool global)
>> {
>> + struct i915_oa_config_bo *oa_bo;
>> struct i915_request *rq;
>> - struct i915_vma *vma;
>> + u64 vma_offset;
>> int err;
>>
>> - vma = get_oa_vma(stream, oa_config);
>> - if (IS_ERR(vma))
>> - return PTR_ERR(vma);
>> + oa_bo = get_oa_bo(stream, oa_config);
>> + if (IS_ERR(oa_bo))
>> + return PTR_ERR(oa_bo);
>>
>> - err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
>> + err = i915_vma_pin(oa_bo->vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
>> if (err)
>> - goto err_vma_put;
>> + return err;
>>
>> intel_engine_pm_get(ce->engine);
>> rq = i915_request_create(ce);
>> @@ -1957,16 +1969,19 @@ emit_oa_config(struct i915_perf_stream *stream,
>> goto err_add_request;
>> }
>>
>> - i915_vma_lock(vma);
>> - err = i915_request_await_object(rq, vma->obj, 0);
>> + i915_vma_lock(oa_bo->vma);
>> + err = i915_request_await_object(rq, oa_bo->vma->obj, 0);
>> if (!err)
>> - err = i915_vma_move_to_active(vma, rq, 0);
>> - i915_vma_unlock(vma);
>> + err = i915_vma_move_to_active(oa_bo->vma, rq, 0);
>> + i915_vma_unlock(oa_bo->vma);
>> if (err)
>> goto err_add_request;
>>
>> - err = rq->engine->emit_bb_start(rq,
>> - vma->node.start, 0,
>> + vma_offset = oa_bo->vma->node.start;
>> + if (!global)
>> + vma_offset += oa_bo->per_context_offset;
>> +
>> + err = rq->engine->emit_bb_start(rq, vma_offset, 0,
>> I915_DISPATCH_SECURE);
>> if (err)
>> goto err_add_request;
>> @@ -1974,9 +1989,7 @@ emit_oa_config(struct i915_perf_stream *stream,
>> err_add_request:
>> i915_request_add(rq);
>> err_vma_unpin:
>> - i915_vma_unpin(vma);
>> -err_vma_put:
>> - i915_vma_put(vma);
>> + i915_vma_unpin(oa_bo->vma);
>> return err;
>> }
>>
>> @@ -1990,6 +2003,7 @@ hsw_enable_metric_set(struct i915_perf_stream
>> *stream,
>> struct i915_active *active)
>> {
>> struct intel_uncore *uncore = stream->uncore;
>> + int err;
>>
>> /*
>> * PRM:
>> @@ -2006,9 +2020,17 @@ hsw_enable_metric_set(struct i915_perf_stream
>> *stream,
>> intel_uncore_rmw(uncore, GEN6_UCGCTL1,
>> 0, GEN6_CSUNIT_CLOCK_GATE_DISABLE);
>>
>> - return emit_oa_config(stream,
>> - stream->oa_config, oa_context(stream),
>> - active);
>> + err = emit_oa_config(stream, stream->oa_config,
>> + oa_context(stream),
>> + active,
>> + false /* global */);
>> + if (err)
>> + return err;
>> +
>> + return emit_oa_config(stream, stream->oa_config,
>> + oa_context(stream),
>> + active,
>> + true /* global */);
>> }
>>
>> static void hsw_disable_metric_set(struct i915_perf_stream *stream)
>> @@ -2419,7 +2441,7 @@ gen8_enable_metric_set(struct i915_perf_stream
>> *stream,
>> {
>> struct intel_uncore *uncore = stream->uncore;
>> struct i915_oa_config *oa_config = stream->oa_config;
>> - int ret;
>> + int err;
>>
>> /*
>> * We disable slice/unslice clock ratio change reports on SKL since
>> @@ -2455,13 +2477,21 @@ gen8_enable_metric_set(struct
>> i915_perf_stream *stream,
>> * to make sure all slices/subslices are ON before writing to NOA
>> * registers.
>> */
>> - ret = lrc_configure_all_contexts(stream, oa_config, active);
>> - if (ret)
>> - return ret;
>> + err = lrc_configure_all_contexts(stream, oa_config, active);
>> + if (err)
>> + return err;
>>
>> - return emit_oa_config(stream,
>> - stream->oa_config, oa_context(stream),
>> - active);
>> + err = emit_oa_config(stream, oa_config,
>> + oa_context(stream),
>> + active,
>> + false /* global */);
>> + if (err)
>> + return err;
>> +
>> + return emit_oa_config(stream, stream->oa_config,
>> + oa_context(stream),
>> + active,
>> + true /* global */);
>> }
>>
>> static u32 oag_report_ctx_switches(const struct i915_perf_stream
>> *stream)
>> @@ -2507,9 +2537,9 @@ gen12_enable_metric_set(struct i915_perf_stream
>> *stream,
>> return ret;
>>
>> /*
>> - * For Gen12, performance counters are context
>> - * saved/restored. Only enable it for the context that
>> - * requested this.
>> + * For Gen12, performance counters are also context
>> saved/restored on
>> + * another set of performance registers. Configure the unit dealing
>> + * with those.
>> */
>> if (stream->ctx) {
>> ret = gen12_configure_oar_context(stream, active);
>> @@ -2517,9 +2547,17 @@ gen12_enable_metric_set(struct
>> i915_perf_stream *stream,
>> return ret;
>> }
>>
>> - return emit_oa_config(stream,
>> - stream->oa_config, oa_context(stream),
>> - active);
>> + ret = emit_oa_config(stream, oa_config,
>> + oa_context(stream),
>> + active,
>> + false /* global */);
>> + if (ret)
>> + return ret;
>> +
>> + return emit_oa_config(stream, stream->oa_config,
>> + oa_context(stream),
>> + active,
>> + true /* global */);
>> }
>>
>> static void gen8_disable_metric_set(struct i915_perf_stream *stream)
>> @@ -3174,6 +3212,7 @@ static long i915_perf_config_locked(struct
>> i915_perf_stream *stream,
>> unsigned long metrics_set)
>> {
>> struct i915_oa_config *config;
>> + struct i915_active *active = NULL;
>> long ret = stream->oa_config->id;
>>
>> config = i915_perf_get_oa_config(stream->perf, metrics_set);
>> @@ -3181,7 +3220,11 @@ static long i915_perf_config_locked(struct
>> i915_perf_stream *stream,
>> return -EINVAL;
>>
>> if (config != stream->oa_config) {
>> - int err;
>> + active = i915_active_create();
>> + if (!active) {
>> + ret = -ENOMEM;
>> + goto err_config;
>> + }
>>
>> /*
>> * If OA is bound to a specific context, emit the
>> @@ -3192,13 +3235,26 @@ static long i915_perf_config_locked(struct
>> i915_perf_stream *stream,
>> * When set globally, we use a low priority kernel context,
>> * so it will effectively take effect when idle.
>> */
>> - err = emit_oa_config(stream, config, oa_context(stream), NULL);
>> - if (!err)
>> - config = xchg(&stream->oa_config, config);
>> - else
>> - ret = err;
>> + ret = emit_oa_config(stream, config,
>> + oa_context(stream),
>> + active,
>> + false /* global */);
>> + if (ret)
>> + goto err_active;
>> +
>> + ret = emit_oa_config(stream, config,
>> + oa_context(stream),
>> + active,
>> + true /* global */);
>> + if (ret)
>> + goto err_active;
>> +
>> + config = xchg(&stream->oa_config, config);
>> }
>>
>> +err_active:
>> + i915_active_put(active);
>> +err_config:
>> i915_oa_config_put(config);
>>
>> return ret;
>> --
>> 2.26.0
>>
>> _______________________________________________
>> Intel-gfx mailing list
>> Intel-gfx at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
More information about the Intel-gfx
mailing list