[Intel-gfx] [PATCH 06/12] drm/i915: Populate ctx ID for periodic OA reports
Lionel Landwerlin
lionel.g.landwerlin at intel.com
Mon Jul 31 09:27:17 UTC 2017
Hi Sagar,
I'm curious to what happens if 2 contexts submit requests which a time
period smaller than the sampling OA period on Gen7.5.
My understanding is that with this change you'll only retain the last
submission and then the ctx_id reported in the SAMPLE_CTX_ID field will
be incorrect for the first workload.
Am I missing something?
-
Lionel
On 31/07/17 08:59, Sagar Arun Kamble wrote:
> From: Sourab Gupta <sourab.gupta at intel.com>
>
> This adds support for populating the ctx id for the periodic OA reports
> when requested through the corresponding property.
>
> For Gen8, the OA reports itself have the ctx ID and it is the one
> programmed into HW while submitting workloads. Thus it's retrieved from
> reports itself.
> For Gen7, the OA reports don't have any such field, and we can populate
> this field with the last seen ctx ID while sending CS reports.
>
> Signed-off-by: Sourab Gupta <sourab.gupta at intel.com>
> Signed-off-by: Sagar Arun Kamble <sagar.a.kamble at intel.com>
> ---
> drivers/gpu/drm/i915/i915_drv.h | 8 ++++++
> drivers/gpu/drm/i915/i915_perf.c | 58 +++++++++++++++++++++++++++++++---------
> 2 files changed, 54 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index fb81315..6c011f3 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2077,6 +2077,8 @@ struct i915_perf_stream {
>
> wait_queue_head_t poll_wq;
> bool pollin;
> +
> + u32 last_ctx_id;
> };
>
> /**
> @@ -2151,6 +2153,12 @@ struct i915_oa_ops {
> * generations.
> */
> u32 (*oa_hw_tail_read)(struct drm_i915_private *dev_priv);
> +
> + /**
> + * @get_ctx_id: Retrieve the ctx_id associated with the (periodic) OA
> + * report.
> + */
> + u32 (*get_ctx_id)(struct i915_perf_stream *stream, const u8 *report);
> };
>
> /*
> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> index 905c5bb..1f5ebdb 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -790,6 +790,45 @@ static u32 oa_buffer_num_reports_unlocked(
> return aged_tail == INVALID_TAIL_PTR ? 0 : num_reports;
> }
>
> +static u32 gen7_oa_buffer_get_ctx_id(struct i915_perf_stream *stream,
> + const u8 *report)
> +{
> + if (!stream->cs_mode)
> + WARN_ONCE(1,
> + "CTX ID can't be retrieved if command stream mode not enabled");
> +
> + /*
> + * OA reports generated in Gen7 don't have the ctx ID information.
> + * Therefore, just rely on the ctx ID information from the last CS
> + * sample forwarded
> + */
> + return stream->last_ctx_id;
> +}
> +
> +static u32 gen8_oa_buffer_get_ctx_id(struct i915_perf_stream *stream,
> + const u8 *report)
> +{
> + u32 ctx_id;
> +
> + /* The ctx ID present in the OA reports have intel_context::hw_id
> + * present, since this is programmed into the ELSP in execlist mode.
> + * In non-execlist mode, fall back to retrieving the ctx ID from the
> + * last saved ctx ID from command stream mode.
> + */
> + if (i915.enable_execlists) {
> + u32 *report32 = (void *)report;
> +
> + ctx_id = report32[2] & 0x1fffff;
> + } else {
> + if (!stream->cs_mode)
> + WARN_ONCE(1,
> + "CTX ID can't be retrieved if command stream mode not enabled");
> +
> + ctx_id = stream->last_ctx_id;
> + }
> + return ctx_id;
> +}
> +
> /**
> * append_oa_status - Appends a status record to a userspace read() buffer.
> * @stream: An i915-perf stream opened for OA metrics
> @@ -914,22 +953,12 @@ static int append_oa_buffer_sample(struct i915_perf_stream *stream,
> struct drm_i915_private *dev_priv = stream->dev_priv;
> u32 sample_flags = stream->sample_flags;
> struct i915_perf_sample_data data = { 0 };
> - u32 *report32 = (u32 *)report;
>
> if (sample_flags & SAMPLE_OA_SOURCE)
> data.source = I915_PERF_SAMPLE_OA_SOURCE_OABUFFER;
>
> if (sample_flags & SAMPLE_CTX_ID) {
> - if (INTEL_INFO(dev_priv)->gen < 8)
> - data.ctx_id = 0;
> - else {
> - /*
> - * XXX: Just keep the lower 21 bits for now since I'm
> - * not entirely sure if the HW touches any of the higher
> - * bits in this field
> - */
> - data.ctx_id = report32[2] & 0x1fffff;
> - }
> + data.ctx_id = dev_priv->perf.oa.ops.get_ctx_id(stream, report);
> }
>
> if (sample_flags & SAMPLE_OA_REPORT)
> @@ -1524,8 +1553,10 @@ static int append_cs_buffer_sample(struct i915_perf_stream *stream,
> if (sample_flags & SAMPLE_OA_SOURCE)
> data.source = I915_PERF_SAMPLE_OA_SOURCE_CS;
>
> - if (sample_flags & SAMPLE_CTX_ID)
> + if (sample_flags & SAMPLE_CTX_ID) {
> data.ctx_id = node->ctx_id;
> + stream->last_ctx_id = data.ctx_id;
> + }
>
> return append_perf_sample(stream, buf, count, offset, &data);
> }
> @@ -3838,6 +3869,7 @@ void i915_perf_init(struct drm_i915_private *dev_priv)
> dev_priv->perf.oa.ops.read = gen7_oa_read;
> dev_priv->perf.oa.ops.oa_hw_tail_read =
> gen7_oa_hw_tail_read;
> + dev_priv->perf.oa.ops.get_ctx_id = gen7_oa_buffer_get_ctx_id;
>
> dev_priv->perf.oa.timestamp_frequency = 12500000;
>
> @@ -3933,6 +3965,8 @@ void i915_perf_init(struct drm_i915_private *dev_priv)
> dev_priv->perf.oa.ops.read = gen8_oa_read;
> dev_priv->perf.oa.ops.oa_hw_tail_read =
> gen8_oa_hw_tail_read;
> + dev_priv->perf.oa.ops.get_ctx_id =
> + gen8_oa_buffer_get_ctx_id;
>
> dev_priv->perf.oa.oa_formats = gen8_plus_oa_formats;
> }
More information about the Intel-gfx
mailing list