[Intel-gfx] [PATCH 2/2] drm/i915/pmu: Add queued counter
Rogozhkin, Dmitry V
dmitry.v.rogozhkin at intel.com
Wed Nov 22 21:15:24 UTC 2017
On Wed, 2017-11-22 at 12:46 +0000, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
>
> We add a PMU counter to expose the number of requests currently submitted
> to the GPU, plus the number of runnable requests waiting on GPU time.
>
> This is useful to analyze the overall load of the system.
>
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> ---
> drivers/gpu/drm/i915/i915_pmu.c | 30 +++++++++++++++++++++++++-----
> include/uapi/drm/i915_drm.h | 6 ++++++
> 2 files changed, 31 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
> index 112243720ff3..b2b4b32af35f 100644
> --- a/drivers/gpu/drm/i915/i915_pmu.c
> +++ b/drivers/gpu/drm/i915/i915_pmu.c
> @@ -36,7 +36,8 @@
> #define ENGINE_SAMPLE_MASK \
> (BIT(I915_SAMPLE_BUSY) | \
> BIT(I915_SAMPLE_WAIT) | \
> - BIT(I915_SAMPLE_SEMA))
> + BIT(I915_SAMPLE_SEMA) | \
> + BIT(I915_SAMPLE_QUEUED))
>
> #define ENGINE_SAMPLE_BITS (1 << I915_PMU_SAMPLE_BITS)
>
> @@ -223,6 +224,12 @@ static void engines_sample(struct drm_i915_private *dev_priv)
>
> update_sample(&engine->pmu.sample[I915_SAMPLE_SEMA],
> PERIOD, !!(val & RING_WAIT_SEMAPHORE));
> +
> + if (engine->pmu.enable & BIT(I915_SAMPLE_QUEUED))
> + update_sample(&engine->pmu.sample[I915_SAMPLE_QUEUED],
> + 1 / I915_SAMPLE_QUEUED_SCALE,
> + engine->queued +
> + (last_seqno - current_seqno));
> }
>
> if (fw)
> @@ -310,6 +317,10 @@ static int engine_event_init(struct perf_event *event)
> if (INTEL_GEN(i915) < 6)
> return -ENODEV;
> break;
> + case I915_SAMPLE_QUEUED:
> + if (INTEL_GEN(i915) < 8)
> + return -ENODEV;
> + break;
> default:
> return -ENOENT;
> }
> @@ -399,6 +410,10 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
> } else if (sample == I915_SAMPLE_BUSY &&
> engine->pmu.busy_stats) {
> val = ktime_to_ns(intel_engine_get_busy_time(engine));
> + } else if (sample == I915_SAMPLE_QUEUED) {
> + val =
> + div_u64(engine->pmu.sample[I915_SAMPLE_QUEUED].cur,
> + FREQUENCY);
> } else {
> val = engine->pmu.sample[sample].cur;
> }
> @@ -679,13 +694,18 @@ static ssize_t i915_pmu_event_show(struct device *dev,
> I915_EVENT_STR(_name.unit, _unit)
>
> #define I915_ENGINE_EVENT(_name, _class, _instance, _sample) \
> - I915_EVENT_ATTR(_name, __I915_PMU_ENGINE(_class, _instance, _sample)), \
> + I915_EVENT_ATTR(_name, __I915_PMU_ENGINE(_class, _instance, _sample))
> +
> +#define I915_ENGINE_EVENT_NS(_name, _class, _instance, _sample) \
> + I915_ENGINE_EVENT(_name, _class, _instance, _sample), \
> I915_EVENT_STR(_name.unit, "ns")
>
> #define I915_ENGINE_EVENTS(_name, _class, _instance) \
> - I915_ENGINE_EVENT(_name##_instance-busy, _class, _instance, I915_SAMPLE_BUSY), \
> - I915_ENGINE_EVENT(_name##_instance-sema, _class, _instance, I915_SAMPLE_SEMA), \
> - I915_ENGINE_EVENT(_name##_instance-wait, _class, _instance, I915_SAMPLE_WAIT)
> + I915_ENGINE_EVENT_NS(_name##_instance-busy, _class, _instance, I915_SAMPLE_BUSY), \
> + I915_ENGINE_EVENT_NS(_name##_instance-sema, _class, _instance, I915_SAMPLE_SEMA), \
> + I915_ENGINE_EVENT_NS(_name##_instance-wait, _class, _instance, I915_SAMPLE_WAIT), \
> + I915_ENGINE_EVENT(_name##_instance-queued, _class, _instance, I915_SAMPLE_QUEUED), \
> + I915_EVENT_STR(_name##_instance-queued.scale, __stringify(I915_SAMPLE_QUEUED_SCALE))
We expose queued as an "instant" metric, i.e. that's a number of
requests on the very moment when we query the metric, i.e. that's not an
ever growing counter - is that right? I doubt such a metric will make
sense for perf-stat. Can we somehow restrict it to be queried by uAPI
only and avoid perf-stat for it?
>
> static struct attribute *i915_pmu_events_attrs[] = {
> I915_ENGINE_EVENTS(rcs, I915_ENGINE_CLASS_RENDER, 0),
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 915a6e85a855..20ee668d1428 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -111,9 +111,12 @@ enum drm_i915_pmu_engine_sample {
> I915_SAMPLE_BUSY = 0,
> I915_SAMPLE_WAIT = 1,
> I915_SAMPLE_SEMA = 2,
> + I915_SAMPLE_QUEUED = 3,
> I915_ENGINE_SAMPLE_MAX /* non-ABI */
> };
>
> +#define I915_SAMPLE_QUEUED_SCALE 1e-2 /* No braces please. */
> +
> #define I915_PMU_SAMPLE_BITS (4)
> #define I915_PMU_SAMPLE_MASK (0xf)
> #define I915_PMU_SAMPLE_INSTANCE_BITS (8)
> @@ -134,6 +137,9 @@ enum drm_i915_pmu_engine_sample {
> #define I915_PMU_ENGINE_SEMA(class, instance) \
> __I915_PMU_ENGINE(class, instance, I915_SAMPLE_SEMA)
>
> +#define I915_PMU_ENGINE_QUEUED(class, instance) \
> + __I915_PMU_ENGINE(class, instance, I915_SAMPLE_QUEUED)
> +
> #define __I915_PMU_OTHER(x) (__I915_PMU_ENGINE(0xff, 0xff, 0xf) + 1 + (x))
>
> #define I915_PMU_ACTUAL_FREQUENCY __I915_PMU_OTHER(0)
More information about the Intel-gfx
mailing list