[Intel-gfx] [PATCH 5/9] drm/i915/perf: Group engines into respective OA groups

Thu Feb 16 23:44:07 UTC 2023

On Thu, Feb 16, 2023 at 12:51:34PM +0200, Jani Nikula wrote:
>On Tue, 14 Feb 2023, Umesh Nerlige Ramappa <umesh.nerlige.ramappa at intel.com> wrote:
>> Now that we may have multiple OA units in a single GT as well as on
>> separate GTs, create an engine group that maps to a single OA unit.
>>
>> Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa at intel.com>
>> ---
>>  drivers/gpu/drm/i915/gt/intel_engine_types.h |   4 +
>>  drivers/gpu/drm/i915/gt/intel_sseu.c         |   3 +-
>>  drivers/gpu/drm/i915/i915_perf.c             | 126 +++++++++++++++++--
>>  drivers/gpu/drm/i915/i915_perf_types.h       |  51 +++++++-
>>  4 files changed, 171 insertions(+), 13 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
>> index 4fd54fb8810f..8a8b0dce241b 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
>> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
>> @@ -53,6 +53,8 @@ struct intel_gt;
>>  struct intel_ring;
>>  struct intel_uncore;
>>  struct intel_breadcrumbs;
>> +struct intel_engine_cs;
>> +struct i915_perf_group;
>>
>>  typedef u32 intel_engine_mask_t;
>>  #define ALL_ENGINES ((intel_engine_mask_t)~0ul)
>> @@ -603,6 +605,8 @@ struct intel_engine_cs {
>>  	} props, defaults;
>>
>>  	I915_SELFTEST_DECLARE(struct fault_attr reset_timeout);
>> +
>> +	struct i915_perf_group *oa_group;
>>  };
>>
>>  static inline bool
>> diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c
>> index 6c6198a257ac..1141f875f5bd 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_sseu.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c
>> @@ -6,6 +6,7 @@
>>  #include <linux/string_helpers.h>
>>
>>  #include "i915_drv.h"
>> +#include "i915_perf_types.h"
>>  #include "intel_engine_regs.h"
>>  #include "intel_gt_regs.h"
>>  #include "intel_sseu.h"
>> @@ -677,7 +678,7 @@ u32 intel_sseu_make_rpcs(struct intel_gt *gt,
>>  	 * If i915/perf is active, we want a stable powergating configuration
>>  	 * on the system. Use the configuration pinned by i915/perf.
>>  	 */
>> -	if (gt->perf.exclusive_stream)
>> +	if (gt->perf.group && gt->perf.group[PERF_GROUP_OAG].exclusive_stream)
>>  		req_sseu = &gt->perf.sseu;
>>
>>  	slices = hweight8(req_sseu->slice_mask);
>> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
>> index e134523576f8..fda779b2c16f 100644
>> --- a/drivers/gpu/drm/i915/i915_perf.c
>> +++ b/drivers/gpu/drm/i915/i915_perf.c
>> @@ -1587,8 +1587,9 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
>>  {
>>  	struct i915_perf *perf = stream->perf;
>>  	struct intel_gt *gt = stream->engine->gt;
>> +	struct i915_perf_group *g = stream->engine->oa_group;
>>
>> -	if (WARN_ON(stream != gt->perf.exclusive_stream))
>> +	if (WARN_ON(stream != g->exclusive_stream))
>>  		return;
>>
>>  	/*
>> @@ -1597,7 +1598,7 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
>>  	 *
>>  	 * See i915_oa_init_reg_state() and lrc_configure_all_contexts()
>>  	 */
>> -	WRITE_ONCE(gt->perf.exclusive_stream, NULL);
>> +	WRITE_ONCE(g->exclusive_stream, NULL);
>>  	perf->ops.disable_metric_set(stream);
>>
>>  	free_oa_buffer(stream);
>> @@ -3195,6 +3196,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
>>  {
>>  	struct drm_i915_private *i915 = stream->perf->i915;
>>  	struct i915_perf *perf = stream->perf;
>> +	struct i915_perf_group *g;
>>  	struct intel_gt *gt;
>>  	int ret;
>>
>> @@ -3205,6 +3207,12 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
>>  	}
>>  	gt = props->engine->gt;
>>
>> +	g = props->engine->oa_group;
>> +	if (!g) {
>> +		DRM_DEBUG("Perf group invalid\n");
>> +		return -EINVAL;
>> +	}
>> +
>>  	/*
>>  	 * If the sysfs metrics/ directory wasn't registered for some
>>  	 * reason then don't let userspace try their luck with config
>> @@ -3234,7 +3242,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
>>  	 * counter reports and marshal to the appropriate client
>>  	 * we currently only allow exclusive access
>>  	 */
>> -	if (gt->perf.exclusive_stream) {
>> +	if (g->exclusive_stream) {
>>  		drm_dbg(&stream->perf->i915->drm,
>>  			"OA unit already in use\n");
>>  		return -EBUSY;
>> @@ -3329,7 +3337,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
>>  	stream->ops = &i915_oa_stream_ops;
>>
>>  	stream->engine->gt->perf.sseu = props->sseu;
>> -	WRITE_ONCE(gt->perf.exclusive_stream, stream);
>> +	WRITE_ONCE(g->exclusive_stream, stream);
>>
>>  	ret = i915_perf_stream_enable_sync(stream);
>>  	if (ret) {
>> @@ -3352,7 +3360,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
>>  	return 0;
>>
>>  err_enable:
>> -	WRITE_ONCE(gt->perf.exclusive_stream, NULL);
>> +	WRITE_ONCE(g->exclusive_stream, NULL);
>>  	perf->ops.disable_metric_set(stream);
>>
>>  	free_oa_buffer(stream);
>> @@ -3381,12 +3389,13 @@ void i915_oa_init_reg_state(const struct intel_context *ce,
>>  			    const struct intel_engine_cs *engine)
>>  {
>>  	struct i915_perf_stream *stream;
>> +	struct i915_perf_group *g = engine->oa_group;
>>
>> -	if (!engine_supports_oa(engine))
>> +	if (!g)
>>  		return;
>>
>>  	/* perf.exclusive_stream serialised by lrc_configure_all_contexts() */
>> -	stream = READ_ONCE(engine->gt->perf.exclusive_stream);
>> +	stream = READ_ONCE(g->exclusive_stream);
>>  	if (stream && GRAPHICS_VER(stream->perf->i915) < 12)
>>  		gen8_update_reg_state_unlocked(ce, stream);
>>  }
>> @@ -4755,6 +4764,95 @@ static struct ctl_table oa_table[] = {
>>  	{}
>>  };
>>
>> +static u32 __num_perf_groups_per_gt(struct intel_gt *gt)
>> +{
>> +	enum intel_platform platform = INTEL_INFO(gt->i915)->platform;
>> +
>> +	switch (platform) {
>> +	default:
>> +		return 1;
>> +	}
>> +}
>> +
>> +static u32 __oa_engine_group(struct intel_engine_cs *engine)
>> +{
>> +	if (!engine_supports_oa(engine))
>> +		return PERF_GROUP_INVALID;
>> +
>> +	switch (engine->class) {
>> +	case RENDER_CLASS:
>> +		return PERF_GROUP_OAG;
>> +
>> +	default:
>> +		return PERF_GROUP_INVALID;
>> +	}
>> +}
>> +
>> +static void oa_init_groups(struct intel_gt *gt)
>> +{
>> +	int i, num_groups = gt->perf.num_perf_groups;
>> +	struct i915_perf *perf = &gt->i915->perf;
>> +
>> +	for (i = 0; i < num_groups; i++) {
>> +		struct i915_perf_group *g = &gt->perf.group[i];
>> +
>> +		/* Fused off engines can result in a group with num_engines == 0 */
>> +		if (g->num_engines == 0)
>> +			continue;
>> +
>> +		/* Set oa_unit_ids now to ensure ids remain contiguous. */
>> +		g->oa_unit_id = perf->oa_unit_ids++;
>> +
>> +		g->gt = gt;
>> +	}
>> +}
>> +
>> +static int oa_init_gt(struct intel_gt *gt)
>> +{
>> +	u32 num_groups = __num_perf_groups_per_gt(gt);
>> +	struct intel_engine_cs *engine;
>> +	struct i915_perf_group *g;
>> +	intel_engine_mask_t tmp;
>> +
>> +	g = kcalloc(num_groups, sizeof(*g), GFP_KERNEL);
>> +	if (drm_WARN_ON(&gt->i915->drm, !g))
>> +		return -ENOMEM;
>
>No warnings or messages on -ENOMEM is standard policy.

Will remove warn ON.

>
>> +
>> +	for_each_engine_masked(engine, gt, ALL_ENGINES, tmp) {
>> +		u32 index;
>> +
>> +		index = __oa_engine_group(engine);
>> +		if (index < num_groups) {
>> +			g[index].engine_mask |= BIT(engine->id);
>> +			g[index].num_engines++;
>> +			engine->oa_group = &g[index];
>> +		} else {
>> +			engine->oa_group = NULL;
>> +		}
>> +	}
>> +
>> +	gt->perf.num_perf_groups = num_groups;
>> +	gt->perf.group = g;
>> +
>> +	oa_init_groups(gt);
>> +
>> +	return 0;
>> +}
>> +
>> +static int oa_init_engine_groups(struct i915_perf *perf)
>> +{
>> +	struct intel_gt *gt;
>> +	int i, ret;
>> +
>> +	for_each_gt(gt, perf->i915, i) {
>> +		ret = oa_init_gt(gt);
>> +		if (ret)
>> +			return ret;
>
>If this fails in the middle, you'll leave things in half-initialized
>state when returning, and expect the caller to clean it up. But that's a
>surprising interface design. If i915_perf_init() returns an error, it's
>*not* customary to have to call the corresponding cleanup function. On
>the contrary, you only call it on success. Any init failures need to be
>cleaned up internally before returning.

ENOMEM is the only error I am expecting here, so no other cleanup is 
needed if this fails.

Thanks,
Umesh