[PATCH i-g-t] perf: initial changes for testing XE OA

Ashutosh Dixit ashutosh.dixit at intel.com
Sat Jul 1 00:24:48 UTC 2023


Currently tests will run only on RPLP/ADL. See intel_perf_for_devinfo. By
obtaing values from i915 and hardcoding values in intel_perf_for_devinfo
tests can be made to run on other platforms.

The following tests can be made to run (for now only on render engine):

"i915-ref-count"
"oa-formats"
"oa-exponents"
"buffer-fill"
"non-zero-reason"
"enable-disable"
"blocking"
"blocking-parameterized"
"polling"

Current issues from above tests:
* unlanded report(s) head=0x0 tail=0x0 hw_tail=0x764100
* "blocking": Failed assertion: kernel_ns <= (test_duration_ns / 100ull)

Signed-off-by: Ashutosh Dixit <ashutosh.dixit at intel.com>

Enable gen12-mi-rpc

sudo ./build/tests/perf --r "i915-ref-count"
sudo ./build/tests/perf --r "oa-formats"
sudo ./build/tests/perf --r "oa-exponents"
sudo ./build/tests/perf --r "buffer-fill"
sudo ./build/tests/perf --r "non-zero-reason"
sudo ./build/tests/perf --r "enable-disable"
sudo ./build/tests/perf --r "blocking"
sudo ./build/tests/perf --r "blocking-parameterized"
sudo ./build/tests/perf --r "polling"
sudo ./build/tests/perf --r "gen12-mi-rpc"

Signed-off-by: Ashutosh Dixit <ashutosh.dixit at intel.com>
---
 lib/i915/perf.c     |  30 ++++++++++---
 lib/intel_chipset.h |   2 +-
 tests/i915/perf.c   | 102 ++++++++++++++++++++++++++------------------
 3 files changed, 86 insertions(+), 48 deletions(-)

diff --git a/lib/i915/perf.c b/lib/i915/perf.c
index ddadb53b61c8..9aa66e080048 100644
--- a/lib/i915/perf.c
+++ b/lib/i915/perf.c
@@ -249,6 +249,25 @@ intel_perf_for_devinfo(uint32_t device_id,
 		       uint64_t gt_min_freq,
 		       uint64_t gt_max_freq,
 		       const struct drm_i915_query_topology_info *topology)
+#if 1
+{
+	struct intel_perf *perf;
+
+	perf = calloc(1, sizeof(*perf));
+	perf->root_group = intel_perf_logical_counter_group_new(perf, NULL, "");
+	perf->devinfo.timestamp_frequency = timestamp_frequency;
+
+	IGT_INIT_LIST_HEAD(&perf->metric_sets);
+
+	// FIXME: Below are hardcoded for RPLP/ADL
+	perf->devinfo.n_eus = 96;
+	perf->devinfo.n_eu_slices = 1;
+	perf->devinfo.n_eu_sub_slices = 6;
+	intel_perf_load_metrics_adl(perf);
+
+	return perf;
+}
+#else
 {
 	const struct intel_device_info *devinfo = intel_get_device_info(device_id);
 	struct intel_perf *perf;
@@ -452,6 +471,7 @@ intel_perf_for_devinfo(uint32_t device_id,
 
 	return perf;
 }
+#endif
 
 static int
 getparam(int drm_fd, uint32_t param, uint32_t *val)
@@ -614,7 +634,7 @@ intel_perf_for_fd(int drm_fd, int gt)
 {
 	uint32_t device_id;
 	uint32_t device_revision;
-	uint32_t timestamp_frequency;
+	uint32_t timestamp_frequency = 19200000;
 	uint64_t gt_min_freq;
 	uint64_t gt_max_freq;
 	struct drm_i915_query_topology_info *topology;
@@ -623,7 +643,7 @@ intel_perf_for_fd(int drm_fd, int gt)
 
 	if (sysfs_dir_fd < 0)
 		return NULL;
-
+#if 0
 #define read_sysfs_rps(fd, id, value) \
 	read_sysfs(fd, intel_sysfs_attr_id_to_name(fd, id, gt), value)
 
@@ -646,14 +666,14 @@ intel_perf_for_fd(int drm_fd, int gt)
 	topology = query_topology(drm_fd);
 	if (!topology)
 		return NULL;
-
+#endif
 	ret = intel_perf_for_devinfo(device_id,
 				     device_revision,
 				     timestamp_frequency,
 				     gt_min_freq * 1000000,
 				     gt_max_freq * 1000000,
 				     topology);
-	free(topology);
+	// free(topology);
 
 	return ret;
 }
@@ -735,7 +755,7 @@ load_metric_set_config(struct intel_perf_metric_set *metric_set, int drm_fd)
 
 	config.n_flex_regs = metric_set->n_flex_regs;
 	config.flex_regs_ptr = (uintptr_t) metric_set->flex_regs;
-
+	printf("before DRM_IOCTL_I915_PERF_ADD_CONFIG\n");
 	ret = perf_ioctl(drm_fd, DRM_IOCTL_I915_PERF_ADD_CONFIG, &config);
 	if (ret >= 0)
 		metric_set->perf_oa_metrics_set = ret;
diff --git a/lib/intel_chipset.h b/lib/intel_chipset.h
index accfa90ef046..ee79b8b25a18 100644
--- a/lib/intel_chipset.h
+++ b/lib/intel_chipset.h
@@ -182,7 +182,7 @@ void intel_check_pch(void);
 #define IS_SANDYBRIDGE(devid)	(intel_get_device_info(devid)->is_sandybridge)
 #define IS_IVYBRIDGE(devid)	(intel_get_device_info(devid)->is_ivybridge)
 #define IS_VALLEYVIEW(devid)	(intel_get_device_info(devid)->is_valleyview)
-#define IS_HASWELL(devid)	(intel_get_device_info(devid)->is_haswell)
+#define IS_HASWELL(devid)	(0) // intel_get_device_info(devid)->is_haswell)
 #define IS_BROADWELL(devid)	(intel_get_device_info(devid)->is_broadwell)
 #define IS_CHERRYVIEW(devid)	(intel_get_device_info(devid)->is_cherryview)
 #define IS_SKYLAKE(devid)	(intel_get_device_info(devid)->is_skylake)
diff --git a/tests/i915/perf.c b/tests/i915/perf.c
index 069ab8c0218c..5d014f7dc097 100644
--- a/tests/i915/perf.c
+++ b/tests/i915/perf.c
@@ -45,6 +45,9 @@
 #include "igt_perf.h"
 #include "igt_sysfs.h"
 #include "drm.h"
+#include "xe/xe_ioctl.h"
+#include "xe/xe_query.h"
+
 /**
  * TEST: perf
  * Description: Test the i915 perf metrics streaming interface
@@ -556,6 +559,8 @@ static int i915_perf_revision(int fd)
 	drm_i915_getparam_t gp;
 	int value = 1, ret;
 
+	return 99;
+
 	gp.param = I915_PARAM_PERF_REVISION;
 	gp.value = &value;
 	ret = igt_ioctl(drm_fd, DRM_IOCTL_I915_GETPARAM, &gp);
@@ -874,15 +879,17 @@ oar_unit_default_format(void)
  * Temporary wrapper to distinguish mappings on !llc platforms,
  * where it seems cache over GEM_MMAP_OFFSET is not flushed before execution.
  */
-static void *buf_map(int i915, struct intel_buf *buf, bool write)
+static void *buf_map(int fd, struct intel_buf *buf, bool write)
 {
 	void *p;
-
+#if 1
+	p = buf->ptr = xe_bo_map(fd, buf->handle, buf->surface[0].size);
+#else
 	if (gem_has_llc(i915))
 		p = intel_buf_cpu_map(buf, write);
 	else
 		p = intel_buf_device_map(buf, write);
-
+#endif
 	return p;
 }
 
@@ -3561,20 +3568,20 @@ gen12_test_mi_rpc(const struct intel_execution_engine2 *e)
 	struct intel_bb *ibb;
 	struct intel_buf *buf;
 #define INVALID_CTX_ID 0xffffffff
-	uint32_t ctx_id = INVALID_CTX_ID;
+	uint32_t vm, engine;
 	uint32_t *report32;
 	size_t format_size_32;
 	struct oa_format format = get_oa_format(fmt);
 
 	/* Ensure perf_stream_paranoid is set to 1 by default */
-	write_u64_file("/proc/sys/dev/i915/perf_stream_paranoid", 1);
+	// write_u64_file("/proc/sys/dev/i915/perf_stream_paranoid", 1);
 
 	bops = buf_ops_create(drm_fd);
-	ctx_id = gem_context_create(drm_fd);
-	igt_assert_neq(ctx_id, INVALID_CTX_ID);
-	properties[1] = ctx_id;
+	vm = xe_vm_create(drm_fd, DRM_XE_VM_CREATE_ASYNC_BIND_OPS, 0);
+	engine = xe_engine_create(drm_fd, vm, xe_hw_engine(drm_fd, 0), 0);
+	properties[1] = engine;
 
-	ibb = intel_bb_create_with_context(drm_fd, ctx_id, 0, NULL, BATCH_SZ);
+	ibb = intel_bb_create_with_context(drm_fd, engine, vm, NULL, BATCH_SZ);
 	buf = intel_buf_create(bops, 4096, 1, 8, 64,
 			       I915_TILING_NONE, I915_COMPRESSION_NONE);
 
@@ -3584,7 +3591,7 @@ gen12_test_mi_rpc(const struct intel_execution_engine2 *e)
 
 	stream_fd = __perf_open(drm_fd, &param, false);
 
-#define REPORT_ID 0xdeadbeef
+#define REPORT_ID 0xa5c0ffee
 #define REPORT_OFFSET 0
 	emit_report_perf_count(ibb,
 			       buf,
@@ -3593,7 +3600,7 @@ gen12_test_mi_rpc(const struct intel_execution_engine2 *e)
 	intel_bb_flush_render(ibb);
 	intel_bb_sync(ibb);
 
-	intel_buf_cpu_map(buf, false);
+	buf_map(drm_fd, buf, false);
 	report32 = buf->ptr;
 	format_size_32 = format.size >> 2;
 	dump_report(report32, format_size_32, "mi-rpc");
@@ -3619,7 +3626,8 @@ gen12_test_mi_rpc(const struct intel_execution_engine2 *e)
 	intel_buf_unmap(buf);
 	intel_buf_destroy(buf);
 	intel_bb_destroy(ibb);
-	gem_context_destroy(drm_fd, ctx_id);
+	xe_engine_destroy(drm_fd, engine);
+	xe_vm_destroy(drm_fd, vm);
 	buf_ops_destroy(bops);
 	__perf_close(stream_fd);
 }
@@ -5402,7 +5410,7 @@ read_i915_module_ref(void)
 	igt_assert(fp);
 
 	while ((len = getline(&line, &line_buf_size, fp)) > 0) {
-		if (strncmp(line, "i915 ", 5) == 0) {
+		if (strncmp(line, "xe ", 3) == 0) {
 			unsigned long mem;
 			int ret = sscanf(line + 5, "%lu %u", &mem, &ref_count);
 			igt_assert(ret == 2);
@@ -5459,12 +5467,11 @@ test_i915_ref_count(void)
 	 * should have been opened so far...
 	 */
 	igt_assert_eq(drm_fd, -1);
-
 	baseline = read_i915_module_ref();
 	igt_debug("baseline ref count (drm fd closed) = %u\n", baseline);
 
-	drm_fd = __drm_open_driver(DRIVER_INTEL);
-	igt_require_i915(drm_fd);
+	drm_fd = __drm_open_driver(DRIVER_XE);
+	// igt_require_i915(drm_fd);
 	devid = intel_get_drm_devid(drm_fd);
 	sysfs = perf_sysfs_open(drm_fd);
 
@@ -5900,9 +5907,11 @@ test_group_concurrent_oa_buffer_read(void)
 igt_main
 {
 	const intel_ctx_t *ctx;
-	const struct intel_execution_engine2 *e;
+	struct intel_execution_engine2 __e22 = {};
+	const struct intel_execution_engine2 *e = &__e22;
 
 	igt_fixture {
+#if 0
 		struct stat sb;
 
 		/*
@@ -5916,14 +5925,19 @@ igt_main
 			    == 0);
 		igt_require(stat("/proc/sys/dev/i915/oa_max_sample_rate", &sb)
 			    == 0);
+#endif
 	}
 
+	// Try this
 	igt_subtest("i915-ref-count")
 		test_i915_ref_count();
 
+	// igt_exit();
+
+#if 0
 	igt_subtest("sysctl-defaults")
 		test_sysctl_defaults();
-
+#endif
 	igt_fixture {
 		/* We expect that the ref count test before these fixtures
 		 * should have closed drm_fd...
@@ -5931,14 +5945,15 @@ igt_main
 		igt_assert_eq(drm_fd, -1);
 
 		/* Avoid the normal exithandler, our perf-fd interferes */
-		drm_fd = __drm_open_driver(DRIVER_INTEL);
-		igt_require_gem(drm_fd);
+		drm_fd = __drm_open_driver(DRIVER_XE);
+		// igt_require_gem(drm_fd);
+		xe_device_get(drm_fd);
 
 		devid = intel_get_drm_devid(drm_fd);
-		sysfs = perf_sysfs_open(drm_fd);
+		// sysfs = perf_sysfs_open(drm_fd);
 
 		igt_require(init_sys_info());
-
+#if 0
 		ctx = intel_ctx_create_all_physical(drm_fd);
 		set_default_engine(ctx);
 		write_u64_file("/proc/sys/dev/i915/perf_stream_paranoid", 1);
@@ -5950,6 +5965,7 @@ igt_main
 
 		if (has_class_instance(drm_fd, I915_ENGINE_CLASS_RENDER, 0))
 			render_copy = igt_get_render_copyfunc(devid);
+#endif
 	}
 
 	igt_subtest("non-system-wide-paranoid")
@@ -5967,16 +5983,16 @@ igt_main
 	igt_subtest("missing-sample-flags")
 		test_missing_sample_flags();
 
-	igt_subtest_with_dynamic("oa-formats")
-		__for_random_engine_in_each_group(perf_oa_groups, ctx, e)
+	igt_subtest("oa-formats")
+		// __for_random_engine_in_each_group(perf_oa_groups, ctx, e)
 			test_oa_formats(e);
 
 	igt_subtest("invalid-oa-exponent")
 		test_invalid_oa_exponent();
 	igt_subtest("low-oa-exponent-permissions")
 		test_low_oa_exponent_permissions();
-	igt_subtest_with_dynamic("oa-exponents")
-		__for_random_engine_in_each_group(perf_oa_groups, ctx, e)
+	igt_subtest("oa-exponents")
+		// __for_random_engine_in_each_group(perf_oa_groups, ctx, e)
 			test_oa_exponents(e);
 
 	igt_subtest("per-context-mode-unprivileged") {
@@ -5984,15 +6000,15 @@ igt_main
 		test_per_context_mode_unprivileged();
 	}
 
-	igt_subtest_with_dynamic("buffer-fill")
-		__for_random_engine_in_each_group(perf_oa_groups, ctx, e)
+	igt_subtest("buffer-fill")
+		// __for_random_engine_in_each_group(perf_oa_groups, ctx, e)
 			test_buffer_fill(e);
 
 	igt_describe("Test that reason field in OA reports is never 0 on Gen8+");
-	igt_subtest_with_dynamic("non-zero-reason") {
+	igt_subtest("non-zero-reason") {
 		/* Reason field is only available on Gen8+ */
-		igt_require(intel_gen(devid) >= 8);
-		__for_random_engine_in_each_group(perf_oa_groups, ctx, e)
+		// igt_require(intel_gen(devid) >= 8);
+		// __for_random_engine_in_each_group(perf_oa_groups, ctx, e)
 			test_non_zero_reason(e);
 	}
 
@@ -6001,19 +6017,20 @@ igt_main
 	igt_subtest("non-sampling-read-error")
 		test_non_sampling_read_error();
 
-	igt_subtest_with_dynamic("enable-disable")
-		__for_random_engine_in_each_group(perf_oa_groups, ctx, e)
+	igt_subtest("enable-disable")
+		// __for_random_engine_in_each_group(perf_oa_groups, ctx, e)
 			test_enable_disable(e);
 
 	igt_describe("Test blocking read with default hrtimer frequency");
-	igt_subtest_with_dynamic("blocking") {
-		__for_random_engine_in_each_group(perf_oa_groups, ctx, e)
+	igt_subtest("blocking") {
+		//__for_random_engine_in_each_group(perf_oa_groups, ctx, e)
 			test_blocking(40 * 1000 * 1000 /* 40ms oa period */,
 				      false /* set_kernel_hrtimer */,
 				      5 * 1000 * 1000 /* default 5ms/200Hz hrtimer */,
 				      e);
 	}
 
+	// Try this
 	igt_describe("Test blocking read with different hrtimer frequencies");
 	igt_subtest("blocking-parameterized") {
 		const struct intel_execution_engine2 _e = {
@@ -6021,7 +6038,7 @@ igt_main
 		      .instance = default_e2.instance,
 		};
 
-		igt_require(i915_perf_revision(drm_fd) >= 5);
+		// igt_require(i915_perf_revision(drm_fd) >= 5);
 
 		test_blocking(10 * 1000 * 1000 /* 10ms oa period */,
 			      true /* set_kernel_hrtimer */,
@@ -6034,8 +6051,8 @@ igt_main
 	}
 
 	igt_describe("Test polled read with default hrtimer frequency");
-	igt_subtest_with_dynamic("polling") {
-		__for_random_engine_in_each_group(perf_oa_groups, ctx, e)
+	igt_subtest("polling") {
+		// __for_random_engine_in_each_group(perf_oa_groups, ctx, e)
 			test_polling(40 * 1000 * 1000 /* 40ms oa period */,
 				     false /* set_kernel_hrtimer */,
 				     5 * 1000 * 1000 /* default 5ms/200Hz hrtimer */,
@@ -6097,9 +6114,9 @@ igt_main
 		igt_fixture igt_require(intel_gen(devid) >= 12);
 
 		igt_describe("Test MI REPORT PERF COUNT for Gen 12");
-		igt_subtest_with_dynamic("gen12-mi-rpc") {
-			igt_require(has_class_instance(drm_fd, I915_ENGINE_CLASS_RENDER, 0));
-			__for_each_render_engine(drm_fd, e)
+		igt_subtest("gen12-mi-rpc") {
+			// igt_require(has_class_instance(drm_fd, I915_ENGINE_CLASS_RENDER, 0));
+			// __for_each_render_engine(drm_fd, e)
 				gen12_test_mi_rpc(e);
 		}
 
@@ -6181,7 +6198,7 @@ igt_main
 
 	igt_subtest("whitelisted-registers-userspace-config")
 		test_whitelisted_registers_userspace_config();
-
+#if 0
 	igt_fixture {
 		/* leave sysctl options in their default state... */
 		write_u64_file("/proc/sys/dev/i915/oa_max_sample_rate", 100000);
@@ -6196,4 +6213,5 @@ igt_main
 		intel_ctx_destroy(drm_fd, ctx);
 		close(drm_fd);
 	}
+#endif
 }
-- 
2.38.0



More information about the Intel-gfx-trybot mailing list