[PATCH i-g-t] perf: initial changes for testing XE OA
Ashutosh Dixit
ashutosh.dixit at intel.com
Sat Jul 1 00:24:48 UTC 2023
Currently tests will run only on RPLP/ADL. See intel_perf_for_devinfo. By
obtaing values from i915 and hardcoding values in intel_perf_for_devinfo
tests can be made to run on other platforms.
The following tests can be made to run (for now only on render engine):
"i915-ref-count"
"oa-formats"
"oa-exponents"
"buffer-fill"
"non-zero-reason"
"enable-disable"
"blocking"
"blocking-parameterized"
"polling"
Current issues from above tests:
* unlanded report(s) head=0x0 tail=0x0 hw_tail=0x764100
* "blocking": Failed assertion: kernel_ns <= (test_duration_ns / 100ull)
Signed-off-by: Ashutosh Dixit <ashutosh.dixit at intel.com>
Enable gen12-mi-rpc
sudo ./build/tests/perf --r "i915-ref-count"
sudo ./build/tests/perf --r "oa-formats"
sudo ./build/tests/perf --r "oa-exponents"
sudo ./build/tests/perf --r "buffer-fill"
sudo ./build/tests/perf --r "non-zero-reason"
sudo ./build/tests/perf --r "enable-disable"
sudo ./build/tests/perf --r "blocking"
sudo ./build/tests/perf --r "blocking-parameterized"
sudo ./build/tests/perf --r "polling"
sudo ./build/tests/perf --r "gen12-mi-rpc"
Signed-off-by: Ashutosh Dixit <ashutosh.dixit at intel.com>
---
lib/i915/perf.c | 30 ++++++++++---
lib/intel_chipset.h | 2 +-
tests/i915/perf.c | 102 ++++++++++++++++++++++++++------------------
3 files changed, 86 insertions(+), 48 deletions(-)
diff --git a/lib/i915/perf.c b/lib/i915/perf.c
index ddadb53b61c8..9aa66e080048 100644
--- a/lib/i915/perf.c
+++ b/lib/i915/perf.c
@@ -249,6 +249,25 @@ intel_perf_for_devinfo(uint32_t device_id,
uint64_t gt_min_freq,
uint64_t gt_max_freq,
const struct drm_i915_query_topology_info *topology)
+#if 1
+{
+ struct intel_perf *perf;
+
+ perf = calloc(1, sizeof(*perf));
+ perf->root_group = intel_perf_logical_counter_group_new(perf, NULL, "");
+ perf->devinfo.timestamp_frequency = timestamp_frequency;
+
+ IGT_INIT_LIST_HEAD(&perf->metric_sets);
+
+ // FIXME: Below are hardcoded for RPLP/ADL
+ perf->devinfo.n_eus = 96;
+ perf->devinfo.n_eu_slices = 1;
+ perf->devinfo.n_eu_sub_slices = 6;
+ intel_perf_load_metrics_adl(perf);
+
+ return perf;
+}
+#else
{
const struct intel_device_info *devinfo = intel_get_device_info(device_id);
struct intel_perf *perf;
@@ -452,6 +471,7 @@ intel_perf_for_devinfo(uint32_t device_id,
return perf;
}
+#endif
static int
getparam(int drm_fd, uint32_t param, uint32_t *val)
@@ -614,7 +634,7 @@ intel_perf_for_fd(int drm_fd, int gt)
{
uint32_t device_id;
uint32_t device_revision;
- uint32_t timestamp_frequency;
+ uint32_t timestamp_frequency = 19200000;
uint64_t gt_min_freq;
uint64_t gt_max_freq;
struct drm_i915_query_topology_info *topology;
@@ -623,7 +643,7 @@ intel_perf_for_fd(int drm_fd, int gt)
if (sysfs_dir_fd < 0)
return NULL;
-
+#if 0
#define read_sysfs_rps(fd, id, value) \
read_sysfs(fd, intel_sysfs_attr_id_to_name(fd, id, gt), value)
@@ -646,14 +666,14 @@ intel_perf_for_fd(int drm_fd, int gt)
topology = query_topology(drm_fd);
if (!topology)
return NULL;
-
+#endif
ret = intel_perf_for_devinfo(device_id,
device_revision,
timestamp_frequency,
gt_min_freq * 1000000,
gt_max_freq * 1000000,
topology);
- free(topology);
+ // free(topology);
return ret;
}
@@ -735,7 +755,7 @@ load_metric_set_config(struct intel_perf_metric_set *metric_set, int drm_fd)
config.n_flex_regs = metric_set->n_flex_regs;
config.flex_regs_ptr = (uintptr_t) metric_set->flex_regs;
-
+ printf("before DRM_IOCTL_I915_PERF_ADD_CONFIG\n");
ret = perf_ioctl(drm_fd, DRM_IOCTL_I915_PERF_ADD_CONFIG, &config);
if (ret >= 0)
metric_set->perf_oa_metrics_set = ret;
diff --git a/lib/intel_chipset.h b/lib/intel_chipset.h
index accfa90ef046..ee79b8b25a18 100644
--- a/lib/intel_chipset.h
+++ b/lib/intel_chipset.h
@@ -182,7 +182,7 @@ void intel_check_pch(void);
#define IS_SANDYBRIDGE(devid) (intel_get_device_info(devid)->is_sandybridge)
#define IS_IVYBRIDGE(devid) (intel_get_device_info(devid)->is_ivybridge)
#define IS_VALLEYVIEW(devid) (intel_get_device_info(devid)->is_valleyview)
-#define IS_HASWELL(devid) (intel_get_device_info(devid)->is_haswell)
+#define IS_HASWELL(devid) (0) // intel_get_device_info(devid)->is_haswell)
#define IS_BROADWELL(devid) (intel_get_device_info(devid)->is_broadwell)
#define IS_CHERRYVIEW(devid) (intel_get_device_info(devid)->is_cherryview)
#define IS_SKYLAKE(devid) (intel_get_device_info(devid)->is_skylake)
diff --git a/tests/i915/perf.c b/tests/i915/perf.c
index 069ab8c0218c..5d014f7dc097 100644
--- a/tests/i915/perf.c
+++ b/tests/i915/perf.c
@@ -45,6 +45,9 @@
#include "igt_perf.h"
#include "igt_sysfs.h"
#include "drm.h"
+#include "xe/xe_ioctl.h"
+#include "xe/xe_query.h"
+
/**
* TEST: perf
* Description: Test the i915 perf metrics streaming interface
@@ -556,6 +559,8 @@ static int i915_perf_revision(int fd)
drm_i915_getparam_t gp;
int value = 1, ret;
+ return 99;
+
gp.param = I915_PARAM_PERF_REVISION;
gp.value = &value;
ret = igt_ioctl(drm_fd, DRM_IOCTL_I915_GETPARAM, &gp);
@@ -874,15 +879,17 @@ oar_unit_default_format(void)
* Temporary wrapper to distinguish mappings on !llc platforms,
* where it seems cache over GEM_MMAP_OFFSET is not flushed before execution.
*/
-static void *buf_map(int i915, struct intel_buf *buf, bool write)
+static void *buf_map(int fd, struct intel_buf *buf, bool write)
{
void *p;
-
+#if 1
+ p = buf->ptr = xe_bo_map(fd, buf->handle, buf->surface[0].size);
+#else
if (gem_has_llc(i915))
p = intel_buf_cpu_map(buf, write);
else
p = intel_buf_device_map(buf, write);
-
+#endif
return p;
}
@@ -3561,20 +3568,20 @@ gen12_test_mi_rpc(const struct intel_execution_engine2 *e)
struct intel_bb *ibb;
struct intel_buf *buf;
#define INVALID_CTX_ID 0xffffffff
- uint32_t ctx_id = INVALID_CTX_ID;
+ uint32_t vm, engine;
uint32_t *report32;
size_t format_size_32;
struct oa_format format = get_oa_format(fmt);
/* Ensure perf_stream_paranoid is set to 1 by default */
- write_u64_file("/proc/sys/dev/i915/perf_stream_paranoid", 1);
+ // write_u64_file("/proc/sys/dev/i915/perf_stream_paranoid", 1);
bops = buf_ops_create(drm_fd);
- ctx_id = gem_context_create(drm_fd);
- igt_assert_neq(ctx_id, INVALID_CTX_ID);
- properties[1] = ctx_id;
+ vm = xe_vm_create(drm_fd, DRM_XE_VM_CREATE_ASYNC_BIND_OPS, 0);
+ engine = xe_engine_create(drm_fd, vm, xe_hw_engine(drm_fd, 0), 0);
+ properties[1] = engine;
- ibb = intel_bb_create_with_context(drm_fd, ctx_id, 0, NULL, BATCH_SZ);
+ ibb = intel_bb_create_with_context(drm_fd, engine, vm, NULL, BATCH_SZ);
buf = intel_buf_create(bops, 4096, 1, 8, 64,
I915_TILING_NONE, I915_COMPRESSION_NONE);
@@ -3584,7 +3591,7 @@ gen12_test_mi_rpc(const struct intel_execution_engine2 *e)
stream_fd = __perf_open(drm_fd, ¶m, false);
-#define REPORT_ID 0xdeadbeef
+#define REPORT_ID 0xa5c0ffee
#define REPORT_OFFSET 0
emit_report_perf_count(ibb,
buf,
@@ -3593,7 +3600,7 @@ gen12_test_mi_rpc(const struct intel_execution_engine2 *e)
intel_bb_flush_render(ibb);
intel_bb_sync(ibb);
- intel_buf_cpu_map(buf, false);
+ buf_map(drm_fd, buf, false);
report32 = buf->ptr;
format_size_32 = format.size >> 2;
dump_report(report32, format_size_32, "mi-rpc");
@@ -3619,7 +3626,8 @@ gen12_test_mi_rpc(const struct intel_execution_engine2 *e)
intel_buf_unmap(buf);
intel_buf_destroy(buf);
intel_bb_destroy(ibb);
- gem_context_destroy(drm_fd, ctx_id);
+ xe_engine_destroy(drm_fd, engine);
+ xe_vm_destroy(drm_fd, vm);
buf_ops_destroy(bops);
__perf_close(stream_fd);
}
@@ -5402,7 +5410,7 @@ read_i915_module_ref(void)
igt_assert(fp);
while ((len = getline(&line, &line_buf_size, fp)) > 0) {
- if (strncmp(line, "i915 ", 5) == 0) {
+ if (strncmp(line, "xe ", 3) == 0) {
unsigned long mem;
int ret = sscanf(line + 5, "%lu %u", &mem, &ref_count);
igt_assert(ret == 2);
@@ -5459,12 +5467,11 @@ test_i915_ref_count(void)
* should have been opened so far...
*/
igt_assert_eq(drm_fd, -1);
-
baseline = read_i915_module_ref();
igt_debug("baseline ref count (drm fd closed) = %u\n", baseline);
- drm_fd = __drm_open_driver(DRIVER_INTEL);
- igt_require_i915(drm_fd);
+ drm_fd = __drm_open_driver(DRIVER_XE);
+ // igt_require_i915(drm_fd);
devid = intel_get_drm_devid(drm_fd);
sysfs = perf_sysfs_open(drm_fd);
@@ -5900,9 +5907,11 @@ test_group_concurrent_oa_buffer_read(void)
igt_main
{
const intel_ctx_t *ctx;
- const struct intel_execution_engine2 *e;
+ struct intel_execution_engine2 __e22 = {};
+ const struct intel_execution_engine2 *e = &__e22;
igt_fixture {
+#if 0
struct stat sb;
/*
@@ -5916,14 +5925,19 @@ igt_main
== 0);
igt_require(stat("/proc/sys/dev/i915/oa_max_sample_rate", &sb)
== 0);
+#endif
}
+ // Try this
igt_subtest("i915-ref-count")
test_i915_ref_count();
+ // igt_exit();
+
+#if 0
igt_subtest("sysctl-defaults")
test_sysctl_defaults();
-
+#endif
igt_fixture {
/* We expect that the ref count test before these fixtures
* should have closed drm_fd...
@@ -5931,14 +5945,15 @@ igt_main
igt_assert_eq(drm_fd, -1);
/* Avoid the normal exithandler, our perf-fd interferes */
- drm_fd = __drm_open_driver(DRIVER_INTEL);
- igt_require_gem(drm_fd);
+ drm_fd = __drm_open_driver(DRIVER_XE);
+ // igt_require_gem(drm_fd);
+ xe_device_get(drm_fd);
devid = intel_get_drm_devid(drm_fd);
- sysfs = perf_sysfs_open(drm_fd);
+ // sysfs = perf_sysfs_open(drm_fd);
igt_require(init_sys_info());
-
+#if 0
ctx = intel_ctx_create_all_physical(drm_fd);
set_default_engine(ctx);
write_u64_file("/proc/sys/dev/i915/perf_stream_paranoid", 1);
@@ -5950,6 +5965,7 @@ igt_main
if (has_class_instance(drm_fd, I915_ENGINE_CLASS_RENDER, 0))
render_copy = igt_get_render_copyfunc(devid);
+#endif
}
igt_subtest("non-system-wide-paranoid")
@@ -5967,16 +5983,16 @@ igt_main
igt_subtest("missing-sample-flags")
test_missing_sample_flags();
- igt_subtest_with_dynamic("oa-formats")
- __for_random_engine_in_each_group(perf_oa_groups, ctx, e)
+ igt_subtest("oa-formats")
+ // __for_random_engine_in_each_group(perf_oa_groups, ctx, e)
test_oa_formats(e);
igt_subtest("invalid-oa-exponent")
test_invalid_oa_exponent();
igt_subtest("low-oa-exponent-permissions")
test_low_oa_exponent_permissions();
- igt_subtest_with_dynamic("oa-exponents")
- __for_random_engine_in_each_group(perf_oa_groups, ctx, e)
+ igt_subtest("oa-exponents")
+ // __for_random_engine_in_each_group(perf_oa_groups, ctx, e)
test_oa_exponents(e);
igt_subtest("per-context-mode-unprivileged") {
@@ -5984,15 +6000,15 @@ igt_main
test_per_context_mode_unprivileged();
}
- igt_subtest_with_dynamic("buffer-fill")
- __for_random_engine_in_each_group(perf_oa_groups, ctx, e)
+ igt_subtest("buffer-fill")
+ // __for_random_engine_in_each_group(perf_oa_groups, ctx, e)
test_buffer_fill(e);
igt_describe("Test that reason field in OA reports is never 0 on Gen8+");
- igt_subtest_with_dynamic("non-zero-reason") {
+ igt_subtest("non-zero-reason") {
/* Reason field is only available on Gen8+ */
- igt_require(intel_gen(devid) >= 8);
- __for_random_engine_in_each_group(perf_oa_groups, ctx, e)
+ // igt_require(intel_gen(devid) >= 8);
+ // __for_random_engine_in_each_group(perf_oa_groups, ctx, e)
test_non_zero_reason(e);
}
@@ -6001,19 +6017,20 @@ igt_main
igt_subtest("non-sampling-read-error")
test_non_sampling_read_error();
- igt_subtest_with_dynamic("enable-disable")
- __for_random_engine_in_each_group(perf_oa_groups, ctx, e)
+ igt_subtest("enable-disable")
+ // __for_random_engine_in_each_group(perf_oa_groups, ctx, e)
test_enable_disable(e);
igt_describe("Test blocking read with default hrtimer frequency");
- igt_subtest_with_dynamic("blocking") {
- __for_random_engine_in_each_group(perf_oa_groups, ctx, e)
+ igt_subtest("blocking") {
+ //__for_random_engine_in_each_group(perf_oa_groups, ctx, e)
test_blocking(40 * 1000 * 1000 /* 40ms oa period */,
false /* set_kernel_hrtimer */,
5 * 1000 * 1000 /* default 5ms/200Hz hrtimer */,
e);
}
+ // Try this
igt_describe("Test blocking read with different hrtimer frequencies");
igt_subtest("blocking-parameterized") {
const struct intel_execution_engine2 _e = {
@@ -6021,7 +6038,7 @@ igt_main
.instance = default_e2.instance,
};
- igt_require(i915_perf_revision(drm_fd) >= 5);
+ // igt_require(i915_perf_revision(drm_fd) >= 5);
test_blocking(10 * 1000 * 1000 /* 10ms oa period */,
true /* set_kernel_hrtimer */,
@@ -6034,8 +6051,8 @@ igt_main
}
igt_describe("Test polled read with default hrtimer frequency");
- igt_subtest_with_dynamic("polling") {
- __for_random_engine_in_each_group(perf_oa_groups, ctx, e)
+ igt_subtest("polling") {
+ // __for_random_engine_in_each_group(perf_oa_groups, ctx, e)
test_polling(40 * 1000 * 1000 /* 40ms oa period */,
false /* set_kernel_hrtimer */,
5 * 1000 * 1000 /* default 5ms/200Hz hrtimer */,
@@ -6097,9 +6114,9 @@ igt_main
igt_fixture igt_require(intel_gen(devid) >= 12);
igt_describe("Test MI REPORT PERF COUNT for Gen 12");
- igt_subtest_with_dynamic("gen12-mi-rpc") {
- igt_require(has_class_instance(drm_fd, I915_ENGINE_CLASS_RENDER, 0));
- __for_each_render_engine(drm_fd, e)
+ igt_subtest("gen12-mi-rpc") {
+ // igt_require(has_class_instance(drm_fd, I915_ENGINE_CLASS_RENDER, 0));
+ // __for_each_render_engine(drm_fd, e)
gen12_test_mi_rpc(e);
}
@@ -6181,7 +6198,7 @@ igt_main
igt_subtest("whitelisted-registers-userspace-config")
test_whitelisted_registers_userspace_config();
-
+#if 0
igt_fixture {
/* leave sysctl options in their default state... */
write_u64_file("/proc/sys/dev/i915/oa_max_sample_rate", 100000);
@@ -6196,4 +6213,5 @@ igt_main
intel_ctx_destroy(drm_fd, ctx);
close(drm_fd);
}
+#endif
}
--
2.38.0
More information about the Intel-gfx-trybot
mailing list