[igt-dev] [PATCH i-g-t 15/31] i915/perf: Add OAM support

Kamil Konieczny kamil.konieczny at linux.intel.com
Mon Mar 13 15:21:21 UTC 2023


Hi Umesh,

On 2023-02-14 at 16:46:32 -0800, Umesh Nerlige Ramappa wrote:
> Add OAM formats and support for media engines in perf tests
> 
> Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa at intel.com>
> ---
>  include/drm-uapi/i915_drm.h |   4 ++
>  lib/intel_chipset.h         |   3 +
>  lib/intel_device_info.c     |   1 +
>  tests/i915/perf.c           | 138 +++++++++++++++++++++++++-----------
>  4 files changed, 105 insertions(+), 41 deletions(-)
> 
> diff --git a/include/drm-uapi/i915_drm.h b/include/drm-uapi/i915_drm.h
> index 5fab3066..ab244346 100644
> --- a/include/drm-uapi/i915_drm.h
> +++ b/include/drm-uapi/i915_drm.h
> @@ -2545,6 +2545,10 @@ enum drm_i915_oa_format {
>  	I915_OAR_FORMAT_A32u40_A4u32_B8_C8,
>  	I915_OA_FORMAT_A24u40_A14u32_B8_C8,
>  
> +	/* MTL OAM */
> +	I915_OAM_FORMAT_MPEC8u64_B8_C8,
> +	I915_OAM_FORMAT_MPEC8u32_B8_C8,
> +
>  	I915_OA_FORMAT_MAX	    /* non-ABI */
>  };
>  

I would prefer to have drm-uapi changes in separate patch.

> diff --git a/lib/intel_chipset.h b/lib/intel_chipset.h
> index c9762ae6..c2c8998d 100644
> --- a/lib/intel_chipset.h
> +++ b/lib/intel_chipset.h
> @@ -45,6 +45,7 @@ struct intel_device_info {
>  	unsigned gt; /* 0 if unknown */
>  	bool has_4tile : 1;
>  	bool has_flatccs : 1;
> +	bool has_oam : 1;
>  	bool is_mobile : 1;
>  	bool is_whitney : 1;
>  	bool is_almador : 1;
> @@ -231,4 +232,6 @@ void intel_check_pch(void);
>  
>  #define HAS_FLATCCS(devid)	(intel_get_device_info(devid)->has_flatccs)
>  
> +#define HAS_OAM(devid)		(intel_get_device_info(devid)->has_oam)
> +
>  #endif /* _INTEL_CHIPSET_H */
> diff --git a/lib/intel_device_info.c b/lib/intel_device_info.c
> index 12b81d48..0b11dfce 100644
> --- a/lib/intel_device_info.c
> +++ b/lib/intel_device_info.c
> @@ -472,6 +472,7 @@ static const struct intel_device_info intel_meteorlake_info = {
>  	.graphics_rel = 70,
>  	.display_ver = 14,
>  	.has_4tile = true,
> +	.has_oam = true,
>  	.is_meteorlake = true,
>  	.codename = "meteorlake",
>  	.cmds_info = &gen12_mtl_cmds_info,

imho here also lib changes for lib/intel_device_info should be
in separate patch, they can be merged separatly before.

> diff --git a/tests/i915/perf.c b/tests/i915/perf.c
> index 9c926fd2..9c546cc1 100644
> --- a/tests/i915/perf.c
> +++ b/tests/i915/perf.c
> @@ -212,6 +212,40 @@ static struct oa_format dg2_oa_formats[I915_OA_FORMAT_MAX] = {
>  		.c_off = 224, .n_c = 8, .oa_type = OAG, },
>  };
>  
> +static struct oa_format mtl_oa_formats[I915_OA_FORMAT_MAX] = {
> +	[I915_OAR_FORMAT_A32u40_A4u32_B8_C8] = {
> +		"A32u40_A4u32_B8_C8", .size = 256,
> +		.a40_high_off = 160, .a40_low_off = 16, .n_a40 = 32,
> +		.a_off = 144, .n_a = 4, .first_a = 32,
> +		.b_off = 192, .n_b = 8,
> +		.c_off = 224, .n_c = 8, .oa_type = OAR, },
> +	/* This format has A36 and A37 interleaved with high bytes of some A
> +	 * counters, so we will accumulate only subset of counters.
> +	 */
> +	[I915_OA_FORMAT_A24u40_A14u32_B8_C8] = {
> +		"A24u40_A14u32_B8_C8", .size = 256,
> +		/* u40: A4 - A23 */
> +		.a40_high_off = 160, .a40_low_off = 16, .n_a40 = 20, .first_a40 = 4,
> +		/* u32: A0 - A3 */
> +		.a_off = 16, .n_a = 4,
> +		.b_off = 192, .n_b = 8,
> +		.c_off = 224, .n_c = 8, .oa_type = OAG, },
> +
> +	/* Treat MPEC countes as A counters for now */
> +	[I915_OAM_FORMAT_MPEC8u64_B8_C8] = {
> +		"MPEC8u64_B8_C8", .size = 192,
> +		.a64_off = 32, .n_a64 = 8,
> +		.b_off = 96, .n_b = 8,
> +		.c_off = 128, .n_c = 8, .oa_type = OAM,
> +		.report_hdr_64bit = true, },
> +	[I915_OAM_FORMAT_MPEC8u32_B8_C8] = {
> +		"MPEC8u32_B8_C8", .size = 128,
> +		.a_off = 32, .n_a = 8,
> +		.b_off = 64, .n_b = 8,
> +		.c_off = 96, .n_c = 8, .oa_type = OAM,
> +		.report_hdr_64bit = true, },
> +};
> +
>  static bool hsw_undefined_a_counters[45] = {
>  	[4] = true,
>  	[6] = true,
> @@ -273,8 +307,10 @@ get_oa_format(enum drm_i915_oa_format format)
>  {
>  	if (IS_HASWELL(devid))
>  		return hsw_oa_formats[format];
> -	else if (IS_DG2(devid) || IS_METEORLAKE(devid))
> +	else if (IS_DG2(devid))
>  		return dg2_oa_formats[format];
> +	else if (IS_METEORLAKE(devid))
> +		return mtl_oa_formats[format];

After some recent discussion it is prefered to have order
from new ones to oldest one.

>  	else if (IS_GEN12(devid))
>  		return gen12_oa_formats[format];
>  	else
> @@ -356,21 +392,6 @@ static int i915_perf_revision(int fd)
>  	return value;
>  }
>  
> -/*
> - * perf_supports_engine is used in the for loop that iterates over engines and
> - * determines if perf test can be run on a particular engine. For perf revisions
> - * below 10, we only need to run the test once, so we return true only for rcs0.
> - * Note that the test itself ignores the class instance parameters if they are
> - * not supported by the perf interface. This enables us to use a single for-loop
> - * construct to run the same test on all platforms and all perf revisions.
> - */
> -static bool
> -perf_supports_engine(const struct intel_execution_engine2 *e)
> -{
> -	return e->class == I915_ENGINE_CLASS_RENDER &&
> -	       e->instance == 0;
> -}
> -
>  static bool
>  has_param_class_instance(void)
>  {
> @@ -674,8 +695,12 @@ oar_unit_default_format(void)
>  }
>  
>  static int
> -oa_unit_default_format(void)
> +oa_unit_default_format(const struct intel_execution_engine2 *e)
>  {
> +	if (e->class == I915_ENGINE_CLASS_VIDEO ||
> +	    e->class == I915_ENGINE_CLASS_VIDEO_ENHANCE)
> +		return I915_OAM_FORMAT_MPEC8u32_B8_C8;
> +
>  	return test_set->perf_oa_format;
>  }
>  
> @@ -1752,6 +1777,20 @@ print_report(uint32_t *report, int fmt)
>  }
>  #endif
>  
> +static bool
> +oa_unit_supports_engine(int oa_unit, const struct intel_execution_engine2 *e)
> +{
> +	switch (oa_unit) {
> +	case OAM:
> +		return e->class == I915_ENGINE_CLASS_VIDEO ||
> +		       e->class == I915_ENGINE_CLASS_VIDEO_ENHANCE;
> +	case OAG:
> +		return e->class == I915_ENGINE_CLASS_RENDER;
> +	}
> +
> +	return false;
> +}
> +
>  static void
>  test_oa_formats(const struct intel_execution_engine2 *e)
>  {
> @@ -1763,7 +1802,7 @@ test_oa_formats(const struct intel_execution_engine2 *e)
>  		if (!format.name) /* sparse, indexed by ID */
>  			continue;
>  
> -		if (format.oa_type != OAG) /* sparse, indexed by ID */
> +		if (!oa_unit_supports_engine(format.oa_type, e))
>  			continue;
>  
>  		igt_debug("Checking OA format %s\n", format.name);
> @@ -1922,7 +1961,7 @@ static bool expected_report_timing_delta(uint32_t delta, uint32_t expected_delta
>  static void
>  test_oa_exponents(const struct intel_execution_engine2 *e)
>  {
> -	uint64_t fmt = oa_unit_default_format();
> +	uint64_t fmt = oa_unit_default_format(e);
>  
>  	load_helper_init();
>  	load_helper_run(HIGH);
> @@ -2266,7 +2305,7 @@ test_blocking(uint64_t requested_oa_period,
>  
>  	ADD_PROPS(props, idx, SAMPLE_OA, true);
>  	ADD_PROPS(props, idx, OA_METRICS_SET, test_set->perf_oa_metrics_set);
> -	ADD_PROPS(props, idx, OA_FORMAT, oa_unit_default_format());
> +	ADD_PROPS(props, idx, OA_FORMAT, oa_unit_default_format(e));
>  	ADD_PROPS(props, idx, OA_EXPONENT, oa_exponent);
>  
>  	if (has_param_poll_period() && set_kernel_hrtimer)
> @@ -2429,7 +2468,7 @@ test_polling(uint64_t requested_oa_period,
>  
>  	ADD_PROPS(props, idx, SAMPLE_OA, true);
>  	ADD_PROPS(props, idx, OA_METRICS_SET, test_set->perf_oa_metrics_set);
> -	ADD_PROPS(props, idx, OA_FORMAT, oa_unit_default_format());
> +	ADD_PROPS(props, idx, OA_FORMAT, oa_unit_default_format(e));
>  	ADD_PROPS(props, idx, OA_EXPONENT, oa_exponent);
>  
>  	if (has_param_poll_period() && set_kernel_hrtimer)
> @@ -2703,7 +2742,7 @@ gen12_test_oa_tlb_invalidate(const struct intel_execution_engine2 *e)
>  		DRM_I915_PERF_PROP_SAMPLE_OA, true,
>  
>  		DRM_I915_PERF_PROP_OA_METRICS_SET, test_set->perf_oa_metrics_set,
> -		DRM_I915_PERF_PROP_OA_FORMAT, oa_unit_default_format(),
> +		DRM_I915_PERF_PROP_OA_FORMAT, oa_unit_default_format(e),
>  		DRM_I915_PERF_PROP_OA_EXPONENT, oa_exponent,
>  		DRM_I915_PERF_PROP_OA_ENGINE_CLASS, e->class,
>  		DRM_I915_PERF_PROP_OA_ENGINE_INSTANCE, e->instance,
> @@ -2746,7 +2785,7 @@ test_buffer_fill(const struct intel_execution_engine2 *e)
>  	/* ~5 micro second period */
>  	int oa_exponent = max_oa_exponent_for_period_lte(5000);
>  	uint64_t oa_period = oa_exponent_to_ns(oa_exponent);
> -	uint64_t fmt = oa_unit_default_format();
> +	uint64_t fmt = oa_unit_default_format(e);
>  	uint64_t properties[] = {
>  		/* Include OA reports in samples */
>  		DRM_I915_PERF_PROP_SAMPLE_OA, true,
> @@ -2982,7 +3021,7 @@ test_enable_disable(const struct intel_execution_engine2 *e)
>  	/* ~5 micro second period */
>  	int oa_exponent = max_oa_exponent_for_period_lte(5000);
>  	uint64_t oa_period = oa_exponent_to_ns(oa_exponent);
> -	uint64_t fmt = oa_unit_default_format();
> +	uint64_t fmt = oa_unit_default_format(e);
>  	uint64_t properties[] = {
>  		/* Include OA reports in samples */
>  		DRM_I915_PERF_PROP_SAMPLE_OA, true,
> @@ -4577,7 +4616,7 @@ test_stress_open_close(const struct intel_execution_engine2 *e)
>  
>  			/* OA unit configuration */
>  			DRM_I915_PERF_PROP_OA_METRICS_SET, test_set->perf_oa_metrics_set,
> -			DRM_I915_PERF_PROP_OA_FORMAT, oa_unit_default_format(),
> +			DRM_I915_PERF_PROP_OA_FORMAT, oa_unit_default_format(e),
>  			DRM_I915_PERF_PROP_OA_EXPONENT, oa_exponent,
>  			DRM_I915_PERF_PROP_OA_ENGINE_CLASS, e->class,
>  			DRM_I915_PERF_PROP_OA_ENGINE_INSTANCE, e->instance,
> @@ -4680,7 +4719,7 @@ test_global_sseu_config_invalid(const struct intel_execution_engine2 *e)
>  
>  		/* OA unit configuration */
>  		DRM_I915_PERF_PROP_OA_METRICS_SET, test_set->perf_oa_metrics_set,
> -		DRM_I915_PERF_PROP_OA_FORMAT, oa_unit_default_format(),
> +		DRM_I915_PERF_PROP_OA_FORMAT, oa_unit_default_format(e),
>  		DRM_I915_PERF_PROP_OA_EXPONENT, oa_exp_1_millisec,
>  		DRM_I915_PERF_PROP_GLOBAL_SSEU, to_user_pointer(&sseu_param),
>  		DRM_I915_PERF_PROP_OA_ENGINE_CLASS, e->class,
> @@ -4769,7 +4808,7 @@ test_global_sseu_config(const struct intel_execution_engine2 *e)
>  
>  		/* OA unit configuration */
>  		DRM_I915_PERF_PROP_OA_METRICS_SET, test_set->perf_oa_metrics_set,
> -		DRM_I915_PERF_PROP_OA_FORMAT, oa_unit_default_format(),
> +		DRM_I915_PERF_PROP_OA_FORMAT, oa_unit_default_format(e),
>  		DRM_I915_PERF_PROP_OA_EXPONENT, oa_exp_1_millisec,
>  		DRM_I915_PERF_PROP_GLOBAL_SSEU, to_user_pointer(&sseu_param),
>  		DRM_I915_PERF_PROP_OA_ENGINE_CLASS, e->class,
> @@ -5306,10 +5345,27 @@ test_sysctl_defaults(void)
>  	igt_assert_eq(max_freq, 100000);
>  }
>  
> -#define __for_each_perf_enabled_engine(fd__, e__) \
> -	for_each_physical_engine(fd__, e__) \
> -		if (perf_supports_engine(e__)) \
> -			igt_dynamic_f("%s", e__->name)
> +static struct intel_execution_engine2 *
> +__ci_to_e2(const intel_ctx_t *ctx, struct i915_engine_class_instance *ci)
> +{
> +	static struct intel_execution_engine2 e2;

e2 is on stack, see below, so it should be *e2

> +	struct intel_execution_engine2 *e;
> +
> +	for_each_ctx_engine(drm_fd, ctx, e) {
> +		if (e->class == ci->engine_class && e->instance == ci->engine_instance) {
> +			e2 = *e;
> +			break;
> +		}
> +	}
> +
> +	return &e2;
--------------- ^
imho you should allocate memory for this and return NULL when
not found.

> +}
> +
> +#define __for_random_engine_in_each_group(groups_, ctx_, e_) \
> +	for (int i_ = 0; \
> +	     i_ < num_perf_oa_groups && !!(e_ = __ci_to_e2(ctx_, random_engine(&groups_[i_]))); \
> +	     i_++) \
> +		igt_dynamic_f("%d-%s", i_, e_->name)
>  
>  #define __for_each_render_engine(fd__, e__) \
>  	for_each_physical_engine(fd__, e__) \
> @@ -5684,7 +5740,7 @@ igt_main
>  		test_missing_sample_flags();
>  
>  	igt_subtest_with_dynamic("oa-formats")
> -		__for_each_perf_enabled_engine(drm_fd, e)
> +		__for_random_engine_in_each_group(perf_oa_groups, ctx, e)
>  			test_oa_formats(e);

Do you free 'e' before exit from test ?

Regards,
Kamil

>  
>  	igt_subtest("invalid-oa-exponent")
> @@ -5692,7 +5748,7 @@ igt_main
>  	igt_subtest("low-oa-exponent-permissions")
>  		test_low_oa_exponent_permissions();
>  	igt_subtest_with_dynamic("oa-exponents")
> -		__for_each_perf_enabled_engine(drm_fd, e)
> +		__for_random_engine_in_each_group(perf_oa_groups, ctx, e)
>  			test_oa_exponents(e);
>  
>  	igt_subtest("per-context-mode-unprivileged") {
> @@ -5701,7 +5757,7 @@ igt_main
>  	}
>  
>  	igt_subtest_with_dynamic("buffer-fill")
> -		__for_each_perf_enabled_engine(drm_fd, e)
> +		__for_random_engine_in_each_group(perf_oa_groups, ctx, e)
>  			test_buffer_fill(e);
>  
>  	igt_describe("Test that reason field in OA reports is never 0 on Gen8+");
> @@ -5717,12 +5773,12 @@ igt_main
>  		test_non_sampling_read_error();
>  
>  	igt_subtest_with_dynamic("enable-disable")
> -		__for_each_perf_enabled_engine(drm_fd, e)
> +		__for_random_engine_in_each_group(perf_oa_groups, ctx, e)
>  			test_enable_disable(e);
>  
>  	igt_describe("Test blocking read with default hrtimer frequency");
>  	igt_subtest_with_dynamic("blocking") {
> -		__for_each_perf_enabled_engine(drm_fd, e)
> +		__for_random_engine_in_each_group(perf_oa_groups, ctx, e)
>  			test_blocking(40 * 1000 * 1000 /* 40ms oa period */,
>  				      false /* set_kernel_hrtimer */,
>  				      5 * 1000 * 1000 /* default 5ms/200Hz hrtimer */,
> @@ -5750,7 +5806,7 @@ igt_main
>  
>  	igt_describe("Test polled read with default hrtimer frequency");
>  	igt_subtest_with_dynamic("polling") {
> -		__for_each_perf_enabled_engine(drm_fd, e)
> +		__for_random_engine_in_each_group(perf_oa_groups, ctx, e)
>  			test_polling(40 * 1000 * 1000 /* 40ms oa period */,
>  				     false /* set_kernel_hrtimer */,
>  				     5 * 1000 * 1000 /* default 5ms/200Hz hrtimer */,
> @@ -5820,7 +5876,7 @@ igt_main
>  
>  		igt_describe("Test OA TLB invalidate");
>  		igt_subtest_with_dynamic("gen12-oa-tlb-invalidate")
> -			__for_each_perf_enabled_engine(drm_fd, e)
> +			__for_random_engine_in_each_group(perf_oa_groups, ctx, e)
>  				gen12_test_oa_tlb_invalidate(e);
>  
>  		igt_describe("Measure performance for a specific context using OAR in Gen 12");
> @@ -5857,7 +5913,7 @@ igt_main
>  
>  	igt_describe("Stress tests opening & closing the i915-perf stream in a busy loop");
>  	igt_subtest_with_dynamic("stress-open-close")
> -		__for_each_perf_enabled_engine(drm_fd, e)
> +		__for_random_engine_in_each_group(perf_oa_groups, ctx, e)
>  			test_stress_open_close(e);
>  
>  	igt_subtest_group {
> @@ -5868,12 +5924,12 @@ igt_main
>  
>  		igt_describe("Verify invalid SSEU opening parameters");
>  		igt_subtest_with_dynamic("global-sseu-config-invalid")
> -			__for_each_perf_enabled_engine(drm_fd, e)
> +			__for_random_engine_in_each_group(perf_oa_groups, ctx, e)
>  				test_global_sseu_config_invalid(e);
>  
>  		igt_describe("Verify specifying SSEU opening parameters");
>  		igt_subtest_with_dynamic("global-sseu-config")
> -			__for_each_perf_enabled_engine(drm_fd, e)
> +			__for_random_engine_in_each_group(perf_oa_groups, ctx, e)
>  				test_global_sseu_config(e);
>  	}
>  
> -- 
> 2.36.1
> 


More information about the igt-dev mailing list