[Intel-gfx] [PATCH 2/2] drm/i915/perf: fix ctx_id read with GuC & ICL
Lionel Landwerlin
lionel.g.landwerlin at intel.com
Thu May 31 21:11:07 UTC 2018
On 31/05/18 21:46, Michel Thierry wrote:
> On 5/31/2018 12:56 PM, Lionel Landwerlin wrote:
>> One thing we didn't really understand about the OA report is that the
>> ContextID field (dword 2) is copy of the context descriptor (dword 1).
>>
>> On Gen8->10 and without using GuC we didn't notice the issue because
>> we only checked the 21bits of the ContextID field in the OA reports
>> which matches exactly the hw_id stored into the context descriptor.
>>
>> When using GuC submission we have an issue of a non matching hw_id
>> because GuC uses bit 20 of the hw_id to signal proxy submission. This
>> change makes introduces a mask to compare only the relevant bits.
> Choose one: makes or introduces ;)
>
>>
>> On ICL the context descriptor format has changed and we failed to
>> address this. On top of using a mask we also need to shift the bits
>> properly.
>>
>
> Someone is going to complain this should be two patches (one to
> address the GuC-ness and one for Gen11), but not me.
>
> Reviewed-by: Michel Thierry <michel.thierry at intel.com>
>
Kind of agree, but that's a pain.
We can put it as a fix of the first commit that enabled gen8 and I'll do
the backport in stable versions.
Cheers,
-
Lionel
>
>> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
>> Fixes: 1de401c08fa805 ("drm/i915/perf: enable perf support on ICL")
>> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104252
>> BSpec: 1237
>> Testcase: igt/perf/gen8-unprivileged-single-ctx-counters
>> ---
>> drivers/gpu/drm/i915/i915_drv.h | 1 +
>> drivers/gpu/drm/i915/i915_perf.c | 123 ++++++++++++++++++++++++-------
>> drivers/gpu/drm/i915/intel_lrc.c | 5 ++
>> 3 files changed, 101 insertions(+), 28 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h
>> b/drivers/gpu/drm/i915/i915_drv.h
>> index 58ab9259fb73..0ccda488a8db 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -1953,6 +1953,7 @@ struct drm_i915_private {
>> struct intel_context *pinned_ctx;
>> u32 specific_ctx_id;
>> + u32 specific_ctx_id_mask;
>> struct hrtimer poll_check_timer;
>> wait_queue_head_t poll_wq;
>> diff --git a/drivers/gpu/drm/i915/i915_perf.c
>> b/drivers/gpu/drm/i915/i915_perf.c
>> index 4a62024cbf85..d5e5d4635f1f 100644
>> --- a/drivers/gpu/drm/i915/i915_perf.c
>> +++ b/drivers/gpu/drm/i915/i915_perf.c
>> @@ -738,12 +738,7 @@ static int gen8_append_oa_reports(struct
>> i915_perf_stream *stream,
>> continue;
>> }
>> - /*
>> - * XXX: Just keep the lower 21 bits for now since I'm not
>> - * entirely sure if the HW touches any of the higher bits in
>> - * this field
>> - */
>> - ctx_id = report32[2] & 0x1fffff;
>> + ctx_id = report32[2] & dev_priv->perf.oa.specific_ctx_id_mask;
>> /*
>> * Squash whatever is in the CTX_ID field if it's marked as
>> @@ -1204,6 +1199,36 @@ static int i915_oa_read(struct
>> i915_perf_stream *stream,
>> return dev_priv->perf.oa.ops.read(stream, buf, count, offset);
>> }
>> +static int oa_get_render_lrca(struct drm_i915_private *i915,
>> + struct i915_gem_context *ctx,
>> + u32 *lrca)
>> +{
>> + struct intel_engine_cs *engine = i915->engine[RCS];
>> + struct intel_context *ce;
>> + int ret;
>> +
>> + ret = i915_mutex_lock_interruptible(&i915->drm);
>> + if (ret)
>> + return ret;
>> +
>> + /*
>> + * As the ID is the gtt offset of the context's vma we
>> + * pin the vma to ensure the ID remains fixed.
>> + *
>> + * NB: implied RCS engine...
>> + */
>> + ce = intel_context_pin(ctx, engine);
>> + mutex_unlock(&i915->drm.struct_mutex);
>> + if (IS_ERR(ce))
>> + return PTR_ERR(ce);
>> +
>> + i915->perf.oa.pinned_ctx = ce;
>> +
>> + *lrca = i915_ggtt_offset(ce->state);
>> +
>> + return 0;
>> +}
>> +
>> /**
>> * oa_get_render_ctx_id - determine and hold ctx hw id
>> * @stream: An i915-perf stream opened for OA metrics
>> @@ -1216,40 +1241,81 @@ static int i915_oa_read(struct
>> i915_perf_stream *stream,
>> */
>> static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
>> {
>> - struct drm_i915_private *dev_priv = stream->dev_priv;
>> + struct drm_i915_private *i915 = stream->dev_priv;
>> - if (HAS_LOGICAL_RING_CONTEXTS(dev_priv)) {
>> - dev_priv->perf.oa.specific_ctx_id = stream->ctx->hw_id;
>> - } else {
>> - struct intel_engine_cs *engine = dev_priv->engine[RCS];
>> - struct intel_context *ce;
>> + switch (INTEL_GEN(i915)) {
>> + case 7: {
>> int ret;
>> - ret = i915_mutex_lock_interruptible(&dev_priv->drm);
>> + ret = oa_get_render_lrca(i915, stream->ctx,
>> + &i915->perf.oa.specific_ctx_id);
>> if (ret)
>> return ret;
>> /*
>> - * As the ID is the gtt offset of the context's vma we
>> - * pin the vma to ensure the ID remains fixed.
>> - *
>> - * NB: implied RCS engine...
>> + * On Haswell we don't do any post processing of the reports
>> + * and don't need to use the mask.
>> */
>> - ce = intel_context_pin(stream->ctx, engine);
>> - mutex_unlock(&dev_priv->drm.struct_mutex);
>> - if (IS_ERR(ce))
>> - return PTR_ERR(ce);
>> + i915->perf.oa.specific_ctx_id_mask = 0;
>> + break;
>> + }
>> - dev_priv->perf.oa.pinned_ctx = ce;
>> + case 8:
>> + case 9:
>> + case 10:
>> + if (USES_GUC_SUBMISSION(i915)) {
>> + u32 lrca;
>> + int ret;
>> - /*
>> - * Explicitly track the ID (instead of calling
>> - * i915_ggtt_offset() on the fly) considering the difference
>> - * with gen8+ and execlists
>> - */
>> - dev_priv->perf.oa.specific_ctx_id =
>> i915_ggtt_offset(ce->state);
>> + ret = oa_get_render_lrca(i915, stream->ctx, &lrca);
>> + if (ret)
>> + return ret;
>> +
>> + /*
>> + * The LRCA is aligned to a page. As a result the
>> + * lower 12bits are always at 0 and reused in the
>> + * context descriptor for some flags. They won't be
>> + * part of the context ID in the OA reports, so squash
>> + * those lower bits.
>> + */
>> + i915->perf.oa.specific_ctx_id =
>> + (lrca + LRC_HEADER_PAGES * PAGE_SIZE) >> 12;
>> +
>> + /*
>> + * GuC uses the top bit to signal proxy submission, so
>> + * ignore that bit if using GuC.
>> + */
>> + i915->perf.oa.specific_ctx_id_mask =
>> + (1U << (GEN8_CTX_ID_WIDTH - 1)) - 1;
>> + } else {
>> + i915->perf.oa.specific_ctx_id = stream->ctx->hw_id;
>> + i915->perf.oa.specific_ctx_id_mask =
>> + (1U << GEN8_CTX_ID_WIDTH) - 1;
>> + }
>> + break;
>> +
>> + case 11: {
>> + struct intel_engine_cs *engine = i915->engine[RCS];
>> +
>> + i915->perf.oa.specific_ctx_id =
>> + stream->ctx->hw_id << (GEN11_SW_CTX_ID_SHIFT - 32) |
>> + engine->instance << (GEN11_ENGINE_INSTANCE_SHIFT - 32) |
>> + engine->class << (GEN11_ENGINE_INSTANCE_SHIFT - 32);
>> + i915->perf.oa.specific_ctx_id_mask =
>> + ((1U << GEN11_SW_CTX_ID_WIDTH) - 1) <<
>> (GEN11_SW_CTX_ID_SHIFT - 32) |
>> + ((1U << GEN11_ENGINE_INSTANCE_WIDTH) - 1) <<
>> (GEN11_ENGINE_INSTANCE_SHIFT - 32) |
>> + ((1 << GEN11_ENGINE_CLASS_WIDTH) - 1) <<
>> (GEN11_ENGINE_CLASS_SHIFT - 32);
>> + break;
>> }
>> + default:
>> + MISSING_CASE(INTEL_GEN(i915));
>> + }
>> +
>> + DRM_DEBUG_DRIVER("filtering on ctx_id=0x%x ctx_id_mask=0x%x\n",
>> + i915->perf.oa.specific_ctx_id,
>> + i915->perf.oa.specific_ctx_id_mask);
>> +
>> return 0;
>> }
>> @@ -1266,6 +1332,7 @@ static void oa_put_render_ctx_id(struct
>> i915_perf_stream *stream)
>> struct intel_context *ce;
>> dev_priv->perf.oa.specific_ctx_id = INVALID_CTX_ID;
>> + dev_priv->perf.oa.specific_ctx_id_mask = 0;
>> ce = fetch_and_zero(&dev_priv->perf.oa.pinned_ctx);
>> if (ce) {
>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c
>> b/drivers/gpu/drm/i915/intel_lrc.c
>> index 94f9c4795190..7d63c6d2f687 100644
>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>> @@ -233,6 +233,11 @@ intel_lr_context_descriptor_update(struct
>> i915_gem_context *ctx,
>> /* bits 12-31 */
>> GEM_BUG_ON(desc & GENMASK_ULL(63, 32));
>> + /*
>> + * The following 32bits are copied into the OA reports (dword 2).
>> + * Consider updating oa_get_render_ctx_id in i915_perf.c when
>> changing
>> + * anything below.
>> + */
>> if (INTEL_GEN(ctx->i915) >= 11) {
>> GEM_BUG_ON(ctx->hw_id >= BIT(GEN11_SW_CTX_ID_WIDTH));
>> desc |= (u64)ctx->hw_id << GEN11_SW_CTX_ID_SHIFT;
>>
>
More information about the Intel-gfx
mailing list