[PATCH i-g-t 2/2] tests/intel/xe_eu_stall: Add tests for EU stall sampling

Dixit, Ashutosh ashutosh.dixit at intel.com
Tue Feb 4 02:11:10 UTC 2025


On Tue, 31 Dec 2024 01:46:29 -0800, Harish Chegondi wrote:
>

Hi Harish,

> diff --git a/include/drm-uapi/xe_drm.h b/include/drm-uapi/xe_drm.h
> index 56163eb91..d4aff5d01 100644
> --- a/include/drm-uapi/xe_drm.h
> +++ b/include/drm-uapi/xe_drm.h
> @@ -700,6 +700,7 @@ struct drm_xe_device_query {
>  #define DRM_XE_DEVICE_QUERY_ENGINE_CYCLES	6
>  #define DRM_XE_DEVICE_QUERY_UC_FW_VERSION	7
>  #define DRM_XE_DEVICE_QUERY_OA_UNITS		8
> +#define DRM_XE_DEVICE_QUERY_EU_STALL		9
>	/** @query: The type of data to query */
>	__u32 query;
>
> @@ -1397,6 +1398,8 @@ struct drm_xe_wait_user_fence {
>  enum drm_xe_observation_type {
>	/** @DRM_XE_OBSERVATION_TYPE_OA: OA observation stream type */
>	DRM_XE_OBSERVATION_TYPE_OA,
> +	/** @DRM_XE_OBSERVATION_TYPE_EU_STALL: EU stall sampling observation stream type */
> +	DRM_XE_OBSERVATION_TYPE_EU_STALL,
>  };
>
>  /**
> @@ -1713,6 +1716,77 @@ struct drm_xe_oa_stream_info {
>	__u64 reserved[3];
>  };
>
> +/**
> + * enum drm_xe_eu_stall_property_id - EU stall sampling input property ids.
> + *
> + * These properties are passed to the driver at open as a chain of
> + * @drm_xe_ext_set_property structures with @property set to these
> + * properties' enums and @value set to the corresponding values of these
> + * properties. @drm_xe_user_extension base.name should be set to
> + * @DRM_XE_EU_STALL_EXTENSION_SET_PROPERTY.
> + *
> + * With the file descriptor obtained from open, user space must enable
> + * the EU stall stream fd with @DRM_XE_OBSERVATION_IOCTL_ENABLE before
> + * calling read(). EIO errno from read() indicates HW dropped data
> + * due to full buffer.
> + */
> +enum drm_xe_eu_stall_property_id {
> +#define DRM_XE_EU_STALL_EXTENSION_SET_PROPERTY		0
> +	/**
> +	 * @DRM_XE_EU_STALL_PROP_GT_ID: @gt_id of the GT on which
> +	 * EU stall data will be captured.
> +	 */
> +	DRM_XE_EU_STALL_PROP_GT_ID = 1,
> +
> +	/**
> +	 * @DRM_XE_EU_STALL_PROP_SAMPLE_RATE: Sampling rate in
> +	 * GPU cycles from @sampling_rates in struct @drm_xe_query_eu_stall
> +	 */
> +	DRM_XE_EU_STALL_PROP_SAMPLE_RATE,
> +
> +	/**
> +	 * @DRM_XE_EU_STALL_PROP_WAIT_NUM_REPORTS: Minimum number of
> +	 * EU stall data reports to be present in the kernel buffer
> +	 * before unblocking poll or read that is blocked.
> +	 */
> +	DRM_XE_EU_STALL_PROP_WAIT_NUM_REPORTS,
> +};
> +
> +/**
> + * struct drm_xe_query_eu_stall - Information about EU stall sampling.
> + *
> + * If a query is made with a struct @drm_xe_device_query where .query
> + * is equal to @DRM_XE_DEVICE_QUERY_EU_STALL, then the reply uses
> + * struct @drm_xe_query_eu_stall in .data.
> + */
> +struct drm_xe_query_eu_stall {
> +	/** @extensions: Pointer to the first extension struct, if any */
> +	__u64 extensions;
> +
> +	/** @capabilities: EU stall capabilities bit-mask */
> +	__u64 capabilities;
> +#define DRM_XE_EU_STALL_CAPS_BASE		(1 << 0)
> +
> +	/** @record_size: size of each EU stall data record */
> +	__u64 record_size;
> +
> +	/** @per_xecore_buf_size: Per XeCore buffer size */
> +	__u64 per_xecore_buf_size;
> +
> +	/** @num_sampling_rates: Number of sampling rates supported */
> +	__u64 num_sampling_rates;
> +
> +	/** @reserved: Reserved */
> +	__u64 reserved[5];
> +
> +	/**
> +	 * @sampling_rates: Flexible array of sampling rates
> +	 * sorted in the fastest to slowest order.
> +	 * Sampling rates are specified in GPU clock cycles.
> +	 */
> +	__u64 sampling_rates[];
> +};
> +
>  #if defined(__cplusplus)
>  }
>  #endif

For the final merge, uapi changes will be merged by syncing kernel and IGT
uapi headers. So maybe separate out into a separate patch so we can R-b the
real EU stall test patch.

> diff --git a/tests/intel/xe_eu_stall.c b/tests/intel/xe_eu_stall.c
> new file mode 100644
> index 000000000..754d2c379
> --- /dev/null
> +++ b/tests/intel/xe_eu_stall.c
> @@ -0,0 +1,579 @@
> +// SPDX-License-Identifier: MIT
> +/*
> + * Copyright(c) 2024 Intel Corporation. All rights reserved.
> + */
> +
> +/**
> + * TEST: Basic tests for EU stall sampling functionality
> + * Category: Core
> + * Functionality: EU stall sampling
> + * Mega feature: Performance interface
> + * Test category: xe
> + * Sub-category: Performance
> + * Run type: FULL
> + *
> + * SUBTEST: non-blocking-read
> + * Description: Verify non-blocking read of EU stall data during a workload run
> + *
> + * SUBTEST: blocking-read
> + * Description: Verify blocking read of EU stall data during a workload run
> + *
> + * SUBTEST: unprivileged-access
> + * Description: Verify unprivileged open of a EU stall data stream fd
> + */
> +
> +#include <poll.h>
> +#include <fcntl.h>
> +#include <sys/wait.h>
> +#include <sys/ioctl.h>
> +
> +#include "igt.h"
> +#include "igt_core.h"
> +#include "xe_drm.h"
> +#include "xe/xe_oa.h"
> +#include "xe/xe_ioctl.h"
> +
> +#define OBSERVATION_PARANOID	"/proc/sys/dev/xe/observation_paranoid"
> +
> +#define NUM_ITERS_GPGPU_FILL	100
> +#define DEFAULT_GT_ID		0
> +#define DEFAULT_NUM_REPORTS	1
> +#define DEFAULT_SAMPLE_RATE	(251 * 4)
> +#define DEFAULT_USER_BUF_SIZE	(64 * 512 * 1024)
> +
> +#define WIDTH		64
> +#define HEIGHT		64
> +#define COLOR_88	0x88
> +#define COLOR_4C	0x4c
> +
> +static char *p_args[8];
> +static uint8_t p_gt_id = DEFAULT_GT_ID;
> +static uint32_t p_rate = DEFAULT_SAMPLE_RATE;
> +static uint32_t p_user = DEFAULT_USER_BUF_SIZE;
> +static uint32_t p_num_reports = DEFAULT_NUM_REPORTS;
> +
> +static volatile bool child_is_running = true;
> +
> +/**
> + * struct xe_eu_stall_data_pvc - EU stall data format for PVC
> + *
> + * Bits		Field
> + * 0  to 28	IP (addr)
> + * 29 to 36	active count
> + * 37 to 44	other count
> + * 45 to 52	control count
> + * 53 to 60	pipestall count
> + * 61 to 68	send count
> + * 69 to 76	dist_acc count
> + * 77 to 84	sbid count
> + * 85 to 92	sync count
> + * 93 to 100	inst_fetch count
> + */
> +struct xe_eu_stall_data_pvc {
> +	__u64 ip_addr:29;
> +	__u64 active_count:8;
> +	__u64 other_count:8;
> +	__u64 control_count:8;
> +	__u64 pipestall_count:8;
> +	__u64 send_count:8;
> +	__u64 dist_acc_count:8;
> +	__u64 sbid_count:8;
> +	__u64 sync_count:8;
> +	__u64 inst_fetch_count:8;
> +	__u64 unused_bits:27;
> +	__u64 unused[6];
> +} __attribute__((packed));
> +
> +/**
> + * struct xe_eu_stall_data_xe2 - EU stall data format for LNL, BMG
> + *
> + * Bits		Field
> + * 0  to 28	IP (addr)
> + * 29 to 36	Tdr count
> + * 37 to 44	other count
> + * 45 to 52	control count
> + * 53 to 60	pipestall count
> + * 61 to 68	send count
> + * 69 to 76	dist_acc count
> + * 77 to 84	sbid count
> + * 85 to 92	sync count
> + * 93 to 100	inst_fetch count
> + * 101 to 108	Active count
> + * 109 to 111	Exid
> + * 112		EndFlag (is always 1)
> + */
> +struct xe_eu_stall_data_xe2 {
> +	__u64 ip_addr:29;
> +	__u64 tdr_count:8;
> +	__u64 other_count:8;
> +	__u64 control_count:8;
> +	__u64 pipestall_count:8;
> +	__u64 send_count:8;
> +	__u64 dist_acc_count:8;
> +	__u64 sbid_count:8;
> +	__u64 sync_count:8;
> +	__u64 inst_fetch_count:8;
> +	__u64 active_count:8;
> +	__u64 ex_id:3;
> +	__u64 end_flag:1;
> +	__u64 unused_bits:15;
> +	__u64 unused[6];
> +} __attribute__((packed));
> +
> +union xe_eu_stall_data {
> +	struct xe_eu_stall_data_pvc pvc;
> +	struct xe_eu_stall_data_xe2 xe2;
> +};
> +
> +typedef struct {
> +	int drm_fd;
> +	uint32_t devid;
> +	struct buf_ops *bops;
> +} data_t;
> +
> +static struct intel_buf *
> +create_buf(data_t *data, int width, int height, uint8_t color, uint64_t region)
> +{
> +	struct intel_buf *buf;
> +	uint8_t *ptr;
> +	int i;
> +
> +	buf = calloc(1, sizeof(*buf));
> +	igt_assert(buf);
> +
> +	buf = intel_buf_create(data->bops, width/4, height, 32, 0,
> +			       I915_TILING_NONE, 0);
> +
> +	ptr = xe_bo_map(data->drm_fd, buf->handle, buf->surface[0].size);

For the size maybe buf->bo_size is sufficient. See 7812065f4aeb. So replace
everywhere. If you think this is ok, ok to leave as is too.

> +
> +	for (i = 0; i < buf->surface[0].size; i++)
> +		ptr[i] = color;
> +
> +	munmap(ptr, buf->surface[0].size);

> +
> +	return buf;
> +}
> +
> +static void buf_check(uint8_t *ptr, int width, int x, int y, uint8_t color)
> +{
> +	uint8_t val;
> +
> +	val = ptr[y * width + x];
> +	igt_assert_f(val == color,
> +		     "Expected 0x%02x, found 0x%02x at (%d,%d)\n",
> +		     color, val, x, y);
> +}
> +
> +static void gpgpu_fill(data_t *data, igt_fillfunc_t fill, uint32_t region,
> +		       uint32_t surf_width, uint32_t surf_height,
> +		       uint32_t x, uint32_t y,
> +		       uint32_t width, uint32_t height)
> +{
> +	struct intel_buf *buf;
> +	uint8_t *ptr;
> +	int i, j;
> +
> +	buf = create_buf(data, surf_width, surf_height, COLOR_88, region);
> +	ptr = xe_bo_map(data->drm_fd, buf->handle, buf->surface[0].size);
> +
> +	for (i = 0; i < surf_width; i++)
> +		for (j = 0; j < surf_height; j++)
> +			buf_check(ptr, surf_width, i, j, COLOR_88);
> +
> +	fill(data->drm_fd, buf, x, y, width, height, COLOR_4C);
> +
> +	for (i = 0; i < surf_width; i++)
> +		for (j = 0; j < surf_height; j++)
> +			if (i >= x && i < width + x &&
> +			    j >= y && j < height + y)
> +				buf_check(ptr, surf_width, i, j, COLOR_4C);
> +			else
> +				buf_check(ptr, surf_height, i, j, COLOR_88);
> +
> +	munmap(ptr, buf->surface[0].size);
> +}
> +
> +static int run_gpgpu_fill(int drm_fd, uint32_t devid)
> +{
> +	data_t data = {drm_fd, devid, NULL};
> +	igt_fillfunc_t fill_fn = NULL;
> +	unsigned i;
> +
> +	data.bops = buf_ops_create(drm_fd);
> +	fill_fn = igt_get_gpgpu_fillfunc(devid);
> +
> +	for (i = 0; i < NUM_ITERS_GPGPU_FILL; i++)
> +		gpgpu_fill(&data, fill_fn, 0, WIDTH, HEIGHT, 16, 16, WIDTH / 2, HEIGHT / 2);

How long does this test run for? Instead of NUM_ITERS_GPGPU_FILL determine
the time for the test, another idea is pre-select a time, say 5 seconds,
and kill the child running the workload after 5 seconds. This way you have
a more deterministic time. But let's first see how long we're running this
for. Execution time is of concern in CI.

> +
> +	buf_ops_destroy(data.bops);
> +
> +	return EXIT_SUCCESS;
> +}
> +
> +static uint64_t
> +read_u64_file(const char *path)
> +{
> +	FILE *f;
> +	uint64_t val;
> +
> +	f = fopen(path, "r");
> +	igt_assert(f);
> +
> +	igt_assert_eq(fscanf(f, "%"PRIu64, &val), 1);
> +
> +	fclose(f);
> +
> +	return val;
> +}
> +
> +static void
> +write_u64_file(const char *path, uint64_t val)
> +{
> +	FILE *f;
> +
> +	f = fopen(path, "w");
> +	igt_assert(f);
> +
> +	igt_assert(fprintf(f, "%"PRIu64, val) > 0);
> +
> +	fclose(f);
> +}
> +
> +static void set_fd_flags(int fd, int flags)
> +{
> +	int old = fcntl(fd, F_GETFL, 0);
> +
> +	igt_assert_lte(0, old);
> +	igt_assert_eq(0, fcntl(fd, F_SETFL, old | flags));
> +}
> +
> +static inline void enable_paranoid(void)
> +{
> +	write_u64_file(OBSERVATION_PARANOID, 1);
> +}
> +
> +static inline void disable_paranoid(void)
> +{
> +	write_u64_file(OBSERVATION_PARANOID, 0);
> +}
> +
> +/*
> + * Test to verify that only a privileged process can open
> + * a EU stall data stream file descriptor.
> + */
> +static void
> +test_non_privileged_access(int drm_fd)
> +{
> +	int paranoid, stream_fd;
> +
> +	paranoid = read_u64_file(OBSERVATION_PARANOID);
> +
> +	igt_fork(child, 1) {
> +		uint64_t properties[] = {
> +			DRM_XE_EU_STALL_PROP_GT_ID, p_gt_id,
> +			DRM_XE_EU_STALL_PROP_SAMPLE_RATE, p_rate,
> +			DRM_XE_EU_STALL_PROP_WAIT_NUM_REPORTS, p_num_reports,
> +		};
> +
> +		struct intel_xe_oa_open_prop props = {
> +			.num_properties = sizeof(properties) / 16,
> +			.properties_ptr = to_user_pointer(properties),
> +		};
> +
> +		if (!paranoid)
> +			enable_paranoid();
> +
> +		igt_drop_root();
> +
> +		intel_xe_perf_ioctl_err(drm_fd, DRM_XE_OBSERVATION_TYPE_EU_STALL,
> +					DRM_XE_OBSERVATION_OP_STREAM_OPEN, &props, EACCES);
> +	}
> +
> +	igt_waitchildren();
> +
> +	igt_fork(child, 1) {
> +		uint64_t properties[] = {
> +			DRM_XE_EU_STALL_PROP_GT_ID, p_gt_id,
> +			DRM_XE_EU_STALL_PROP_SAMPLE_RATE, p_rate,
> +			DRM_XE_EU_STALL_PROP_WAIT_NUM_REPORTS, p_num_reports,
> +		};
> +
> +		struct intel_xe_oa_open_prop props = {
> +			.num_properties = sizeof(properties) / 16,
> +			.properties_ptr = to_user_pointer(properties),
> +		};
> +
> +		disable_paranoid();
> +
> +		igt_drop_root();
> +
> +		stream_fd = intel_xe_perf_ioctl(drm_fd, DRM_XE_OBSERVATION_TYPE_EU_STALL,
> +						DRM_XE_OBSERVATION_OP_STREAM_OPEN, &props);
> +		igt_require_fd(stream_fd);
> +		close(stream_fd);
> +	}
> +
> +	igt_waitchildren();
> +
> +	/* restore paranoid state */
> +	if (paranoid)
> +		enable_paranoid();
> +}
> +
> +static int wait_child(struct igt_helper_process *child_proc)
> +{
> +	int status;
> +
> +	status = igt_wait_helper(child_proc);
> +	if (WIFEXITED(status))
> +		return WEXITSTATUS(status);
> +	if (WIFSIGNALED(status))
> +		return (128 + WTERMSIG(status));
> +	return 0;
> +}
> +
> +static void sighandler(int sig)
> +{
> +	child_is_running = false;
> +}
> +
> +static void parse_eu_stall_data(uint32_t devid, uint8_t *buf, size_t size)
> +{
> +	int i;
> +	uint8_t *sample_addr;
> +	union xe_eu_stall_data stall_data;
> +
> +	for (i = 0; i < size / sizeof(stall_data); i++) {
> +		sample_addr = buf + (i * sizeof(stall_data));
> +		memcpy(&stall_data, sample_addr, sizeof(stall_data));
> +		if (IS_PONTEVECCHIO(devid)) {
> +			igt_info("ip: 0x%08x ", stall_data.pvc.ip_addr);
> +			igt_info("active: %u ", stall_data.pvc.active_count);
> +			igt_info("other: %u ", stall_data.pvc.other_count);
> +			igt_info("control: %u ", stall_data.pvc.control_count);
> +			igt_info("pipestall: %u ", stall_data.pvc.pipestall_count);
> +			igt_info("send: %u ", stall_data.pvc.send_count);
> +			igt_info("dist_acc: %u ", stall_data.pvc.dist_acc_count);
> +			igt_info("sbid: %u ", stall_data.pvc.sbid_count);
> +			igt_info("sync: %u ", stall_data.pvc.sync_count);
> +			igt_info("inst_fetch: %u\n", stall_data.pvc.inst_fetch_count);
> +		} else {
> +			igt_info("ip: 0x%08x ", stall_data.xe2.ip_addr);
> +			igt_info("tdr: %u ", stall_data.xe2.tdr_count);
> +			igt_info("other: %u ", stall_data.xe2.other_count);
> +			igt_info("control: %u ", stall_data.xe2.control_count);
> +			igt_info("pipestall: %u ", stall_data.xe2.pipestall_count);
> +			igt_info("send: %u ", stall_data.xe2.send_count);
> +			igt_info("dist_acc: %u ", stall_data.xe2.dist_acc_count);
> +			igt_info("sbid: %u ", stall_data.xe2.sbid_count);
> +			igt_info("sync: %u ", stall_data.xe2.sync_count);
> +			igt_info("inst_fetch: %u ", stall_data.xe2.inst_fetch_count);
> +			igt_info("active: %u ", stall_data.xe2.active_count);
> +			igt_info("ex_id: %u ", stall_data.xe2.ex_id);
> +			igt_info("end_flag: %u\n", stall_data.xe2.end_flag);
> +		}

As Kamil commented, this is excessive output. If you really need to see
this in the CI logs for debugging (unlikely), we can keep igt_info. Else
change to igt_debug.

Think about changing other igt_info's to igt_debug's too, based on the
above criteria. With igt_debug, you can just run the IGT with a --debug and
you will get all the output.

> +	}
> +}
> +
> +/*
> + * Test enables EU stall counters, runs a given workload on a child process
> + * while the parent process reads the stall counters data, disables EU stall
> + * counters once the workload completes execution.
> + */
> +static void
> +test_eustall(int drm_fd, uint32_t devid, bool blocking_read)
> +{
> +	uint32_t num_samples = 0, num_drops = 0;
> +	struct igt_helper_process work_load = { };

Space between braces is not needed.

> +	struct sigaction sa = { 0 };
> +	int ret, flags, stream_fd;
> +	uint64_t total_size = 0;
> +	uint8_t *buf;
> +
> +	uint64_t properties[] = {
> +		DRM_XE_EU_STALL_PROP_GT_ID, p_gt_id,
> +		DRM_XE_EU_STALL_PROP_SAMPLE_RATE, p_rate,
> +		DRM_XE_EU_STALL_PROP_WAIT_NUM_REPORTS, p_num_reports,
> +	};
> +
> +	struct intel_xe_oa_open_prop props = {
> +		.num_properties = sizeof(properties) / 16,

Use 'ARRAY_SIZE(properties) / 2' so it's clear, everywhere.

> +		.properties_ptr = to_user_pointer(properties),
> +	};
> +
> +	struct drm_xe_query_eu_stall *eu_stall_data;

Maybe name this query_ something to make it clearer, xe_eu_stall_data is
different.

> +	struct drm_xe_device_query query = {
> +		.extensions = 0,
> +		.query = DRM_XE_DEVICE_QUERY_EU_STALL,
> +		.size = 0,
> +		.data = 0,
> +	};
> +
> +	igt_info("User buffer size: %u\n", p_user);
> +	if (p_args[0])
> +		igt_info("Workload: %s\n", p_args[0]);
> +	else
> +		igt_info("Workload: GPGPU fill\n");
> +
> +	buf = malloc(p_user);
> +	igt_assert(buf);
> +
> +	igt_assert_eq(igt_ioctl(drm_fd, DRM_IOCTL_XE_DEVICE_QUERY, &query), 0);
> +	igt_assert_neq(query.size, 0);
> +
> +	eu_stall_data = malloc(query.size);
> +	igt_assert(eu_stall_data);
> +
> +	query.data = to_user_pointer(eu_stall_data);
> +	igt_assert_eq(igt_ioctl(drm_fd, DRM_IOCTL_XE_DEVICE_QUERY, &query), 0);
> +
> +	igt_assert(eu_stall_data->num_sampling_rates > 0);
> +	/* Set sampling rate to the fastest available one */
> +	properties[3] = eu_stall_data->sampling_rates[0];
> +	igt_info("Sampling Rate: %u\n", (unsigned)eu_stall_data->sampling_rates[0]);
> +
> +	stream_fd = intel_xe_perf_ioctl(drm_fd, DRM_XE_OBSERVATION_TYPE_EU_STALL,
> +					DRM_XE_OBSERVATION_OP_STREAM_OPEN, &props);
> +	igt_require_fd(stream_fd);
> +
> +	if (!blocking_read)
> +		flags = O_CLOEXEC | O_NONBLOCK;
> +	else
> +		flags = O_CLOEXEC;
> +
> +	set_fd_flags(stream_fd, flags);
> +
> +	do_ioctl(stream_fd, DRM_XE_OBSERVATION_IOCTL_ENABLE, 0);
> +
> +	sa.sa_handler = sighandler;
> +	if (sigaction(SIGCHLD, &sa, NULL) == -1) {
> +		igt_critical("Failed to register SIGCHLD signal handler \n");
> +		igt_fail(IGT_EXIT_FAILURE);
> +	}
> +
> +	child_is_running = true;
> +	/* Child process runs the workload */
> +	igt_fork_helper(&work_load) {
> +		setpgid(0, 0);
> +		if (p_args[0]) {
> +			execv(p_args[0], p_args);
> +			_exit(EXIT_FAILURE);
> +		} else {
> +			_exit(run_gpgpu_fill(drm_fd, devid));
> +		}
> +	}
> +	/* Parent process reads the EU stall counters data */
> +	do {
> +		if (!blocking_read) {
> +			struct pollfd pollfd = { .fd = stream_fd, .events = POLLIN };
> +			ret = poll(&pollfd, 1, 0);
> +			if (ret <= 0)
> +				continue;
> +			igt_assert_eq(ret, 1);
> +			igt_assert(pollfd.revents & POLLIN);
> +		}
> +		ret = read(stream_fd, buf, p_user);
> +		if (ret > 0) {
> +			total_size += ret;
> +			parse_eu_stall_data(devid, buf, ret);
> +			num_samples += ret / eu_stall_data->record_size;
> +		} else if ((ret < 0) && (errno != EAGAIN)) {
> +			if (errno == EINTR)
> +				continue;
> +			if (errno == EIO) {
> +				num_drops++;
> +				continue;
> +			}
> +			igt_critical("read() - ret: %d, errno: %d \n", ret, errno);
> +			kill(-work_load.pid, SIGTERM);
> +			break;
> +		}
> +	} while(child_is_running);
> +
> +	igt_info("Total size read: %lu\n", total_size);
> +	igt_info("Number of samples: %u\n", num_samples);
> +	igt_info("Number of drops reported: %u\n", num_drops);
> +
> +	do_ioctl(stream_fd, DRM_XE_OBSERVATION_IOCTL_DISABLE, 0);
> +
> +	close(stream_fd);
> +	free(buf);
> +
> +	ret = wait_child(&work_load);
> +	igt_assert_f(ret == 0, "waitpid() - ret: %d, errno: %d \n", ret, errno);
> +	igt_assert_f(num_samples, "No EU stalls detected during the workload \n");
> +}
> +
> +static int opt_handler(int opt, int opt_index, void *data)
> +{
> +	switch (opt) {
> +	case 'e':
> +		p_num_reports = strtoul(optarg, NULL, 0);
> +		break;
> +	case 'g':
> +		p_gt_id = strtoul(optarg, NULL, 0);
> +		break;
> +	case 'r':
> +		p_rate = strtoul(optarg, NULL, 0);
> +		break;
> +	case 'u':
> +		p_user = strtoul(optarg, NULL, 0);
> +		break;
> +	case 'w':
> +		p_args[0] = optarg;
> +		p_args[1] = NULL;
> +		break;
> +	default:
> +		return IGT_OPT_HANDLER_ERROR;
> +	}
> +
> +	return IGT_OPT_HANDLER_SUCCESS;
> +}
> +
> +const char *help_str =  "  --rate | -r\t\tSampling rate in GPU cycles\n"
> +			"  --user_buf_sz | -u\t\tUser buffer size\n"
> +			"  --gt_id | -g\t\tGT ID for the GT to sample EU stalls\n"
> +			"  --event_count | -e\t\tPoll event report count\n"
> +			"  --workload | -w\t\tWorkload to run\n";
> +
> +static struct option long_options[] = {
> +	{"rate", 0, 0, 'r'},
> +	{"user_buf_sz", 0, 0, 'u'},
> +	{"gt_id", 0, 0, 'g'},
> +	{"event_count", 0, 0, 'e'},
> +	{"workload", 0, 0, 'w'},
> +	{ NULL, 0, 0, 0 }
> +};
> +
> +igt_main_args("e:g:r:u:w:", long_options, help_str, opt_handler, NULL)

How about order these options same as in the array above, so it's easier to read.

> +{
> +	int drm_fd;
> +	uint32_t devid;
> +	bool blocking_read = true;
> +
> +	igt_fixture {
> +		drm_fd = drm_open_driver(DRIVER_XE);
> +		igt_require_fd(drm_fd);
> +		devid = intel_get_drm_devid(drm_fd);
> +		igt_require(IS_PONTEVECCHIO(devid) || intel_graphics_ver(devid) >= IP_VER(20, 0));
> +		igt_require_f(igt_get_gpgpu_fillfunc(devid), "no gpgpu-fill function\n");
> +	}
> +
> +	igt_describe("Verify non-blocking read of EU stall data during a workload run");
> +	igt_subtest("non-blocking-read") {
> +		test_eustall(drm_fd, devid, !blocking_read);
> +	}
> +
> +	igt_describe("Verify blocking read of EU stall data during a workload run");
> +	igt_subtest("blocking-read") {
> +		test_eustall(drm_fd, devid, blocking_read);
> +	}
> +
> +	igt_describe("Verify that unprivileged open of a EU stall data fd fails");
> +	igt_subtest("unprivileged-access")
> +		test_non_privileged_access(drm_fd);
> +
> +	igt_fixture {
> +		drm_close_driver(drm_fd);
> +	}
> +}
> diff --git a/tests/meson.build b/tests/meson.build
> index 89bba6454..b60f0f1ec 100644
> --- a/tests/meson.build
> +++ b/tests/meson.build
> @@ -281,6 +281,7 @@ intel_xe_progs = [
>	'xe_dma_buf_sync',
>	'xe_debugfs',
>	'xe_drm_fdinfo',
> +	'xe_eu_stall',
>	'xe_evict',
>	'xe_evict_ccs',
>	'xe_exec_atomic',
> @@ -387,6 +388,7 @@ extra_dependencies = {
>	'perf': [ lib_igt_i915_perf ],
>	'perf_pmu':  [ lib_igt_perf ],
>	'sw_sync': [ libatomic ],
> +	'xe_eu_stall': [ lib_igt_xe_oa ],
>	'xe_oa': [ lib_igt_xe_oa ],
>  }
>
> --
> 2.47.1
>

Thanks.
--
Ashutosh


More information about the igt-dev mailing list