[PATCH] tests/xe_exec_reset: Add readout of devcoredump
Kamil Konieczny
kamil.konieczny at linux.intel.com
Thu Oct 24 15:30:24 UTC 2024
Hi Maarten,
On 2024-10-18 at 19:38:17 +0200, Maarten Lankhorst wrote:
in subject add '/intel/' prefix, so instead of:
[PATCH] tests/xe_exec_reset: Add readout of devcoredump
better:
[PATCH] tests/intel/xe_exec_reset: Add readout of devcoredump
> We're mostly testing if we can read the devcoredump, clear the
> devcoredump at the start of each subtest, and read it out at the end
> of the test.
Code looks good, I have few nits, see below.
>
> Signed-off-by: Maarten Lankhorst <maarten.lankhorst at linux.intel.com>
> ---
> tests/intel/xe_exec_reset.c | 134 ++++++++++++++++++++++++++++++++++--
> 1 file changed, 129 insertions(+), 5 deletions(-)
>
> diff --git a/tests/intel/xe_exec_reset.c b/tests/intel/xe_exec_reset.c
> index 43ef1e334..96e0a85d3 100644
> --- a/tests/intel/xe_exec_reset.c
> +++ b/tests/intel/xe_exec_reset.c
> @@ -12,7 +12,12 @@
> * Test category: functionality test
> */
>
> +#include <unistd.h>
> +#include <fcntl.h>
> +#include <sys/stat.h>
> +
> #include "igt.h"
> +#include "lib/igt_io.h"
> #include "lib/igt_syncobj.h"
> #include "lib/intel_reg.h"
> #include "xe_drm.h"
> @@ -25,6 +30,74 @@
>
> #define SYNC_OBJ_SIGNALED (0x1 << 0)
>
> +static int sysfd = -1;
> +
> +static u64 dummy_size;
> +static void *dummy;
> +
> +/* Clear any previous devcoredump */
> +static void tryclear_hang(void)
> +{
> + int fd;
> + char buf[256];
Could it be a little larger?
> +
> + if (sysfd < 0)
> + return;
> +
> + fd = openat(sysfd, "devcoredump/data", O_RDWR);
> + if (fd < 0)
> + return;
> +
> + /* Read is optional, but see comment below why we do it */
This would be ok if called from test named 'read-devcoredump'
or at least add here an igt_info() about clearing a dump.
> + while (read(fd, buf, sizeof(buf)) > 0)
> + { }
> + write(fd, "1", 1);
> + close(fd);
> +}
> +
> +/*
> + * Helper to read and clear devcore. We want to read it completely to ensure
> + * we catch any kernel side regressions like:
> + * https://gitlab.freedesktop.org/drm/msm/-/issues/20
> + */
> +static void
> +read_and_clear_hang(void)
> +{
> + char buf[0x1000];
> + int fd;
> +
> + if (sysfd < 0)
> + return;
> +
> + fd = openat(sysfd, "devcoredump/data", O_RDWR);
Please don't repeat code, make it a separate function, like:
fd = open_devcoredump();
> + igt_assert(fd >= 0);
Why assert here? If any, use igt_assert_f() and explain what
failed here. For eaxmple, such info is not very helpfull:
(xe_exec_reset:14850) CRITICAL: Failed assertion: fd >= 0
(xe_exec_reset:14850) CRITICAL: Last errno: 2, No such file or directory
btw please respond to CI report, one test is failing:
sudo build/tests/xe_exec_reset --r spin
> +
> + /*
> + * We want to read the entire file but we can throw away the
> + * contents.. we just want to make sure that we exercise the
> + * kernel side codepaths hit when reading the devcore from
> + * sysfs
> + */
> + igt_debug("---- begin coredump ----\n");
> + while (1) {
> + ssize_t ret;
> +
> + ret = igt_readn(fd, buf, sizeof(buf) - 1);
> + igt_assert(ret >= 0);
> + if (ret == 0)
> + break;
> + buf[ret] = '\0';
> + igt_debug("%s", buf);
> + }
> +
> + igt_debug("---- end coredump ----\n");
Could you also print with igt_info how big is it?
> +
> + /* Clear the devcore: */
> + igt_writen(fd, "1", 1);
> +
> + close(fd);
> +}
> +
> /**
> * SUBTEST: spin
> * Description: test spin
> @@ -68,7 +141,11 @@ static void test_spin(int fd, struct drm_xe_engine_class_instance *eci,
> DRM_SYNCOBJ_CREATE_SIGNALED : 0);
>
> sync[0].handle = syncobj_create(fd, 0);
> - xe_vm_bind_async(fd, vm, 0, bo, 0, addr, bo_size, sync, 1);
> + xe_vm_bind_async_flags(fd, vm, 0, bo, 0, addr, bo_size, sync, 1,
> + DRM_XE_VM_BIND_FLAG_DUMPABLE);
> +
> + xe_vm_bind_userptr_async_flags(fd, vm, 0, to_user_pointer(dummy), addr + bo_size, dummy_size, sync, 1,
> + DRM_XE_VM_BIND_FLAG_DUMPABLE);
>
> #define N_TIMES 4
> for (i = 0; i < N_TIMES; ++i) {
> @@ -103,6 +180,8 @@ static void test_spin(int fd, struct drm_xe_engine_class_instance *eci,
> munmap(spin, bo_size);
> gem_close(fd, bo);
> xe_vm_destroy(fd, vm);
> +
> + read_and_clear_hang();
> }
>
> #define MAX_N_EXECQUEUES 16
> @@ -112,6 +191,7 @@ static void test_spin(int fd, struct drm_xe_engine_class_instance *eci,
> #define VIRTUAL (0x1 << 3)
> #define PARALLEL (0x1 << 4)
> #define CAT_ERROR (0x1 << 5)
> +#define CAPTURE (0x1 << 6)
>
> /**
> * SUBTEST: %s-cat-error
> @@ -172,6 +252,8 @@ test_balancer(int fd, int gt, int class, int n_exec_queues, int n_execs,
> fd = drm_open_driver(DRIVER_XE);
>
> num_placements = xe_gt_fill_engines_by_class(fd, gt, class, eci);
> + tryclear_hang();
> +
> if (num_placements < 2)
> return;
>
> @@ -193,7 +275,11 @@ test_balancer(int fd, int gt, int class, int n_exec_queues, int n_execs,
> exec.num_batch_buffer = flags & PARALLEL ? num_placements : 1;
>
> sync[0].handle = syncobj_create(fd, 0);
> - xe_vm_bind_async(fd, vm, 0, bo, 0, addr, bo_size, sync, 1);
> + xe_vm_bind_async_flags(fd, vm, 0, bo, 0, addr, bo_size, sync, 1,
> + DRM_XE_VM_BIND_FLAG_DUMPABLE);
> +
> + xe_vm_bind_userptr_async_flags(fd, vm, 0, to_user_pointer(dummy), addr + bo_size, dummy_size, sync, 1,
> + DRM_XE_VM_BIND_FLAG_DUMPABLE);
>
> if (flags & VIRTUAL && (flags & CAT_ERROR || flags & GT_RESET))
> bad_batches = num_placements;
> @@ -285,6 +371,8 @@ test_balancer(int fd, int gt, int class, int n_exec_queues, int n_execs,
> munmap(data, bo_size);
> gem_close(fd, bo);
> xe_vm_destroy(fd, vm);
> +
> + read_and_clear_hang();
> }
>
> /**
> @@ -337,6 +425,8 @@ test_legacy_mode(int fd, struct drm_xe_engine_class_instance *eci,
> if (flags & CLOSE_FD)
> fd = drm_open_driver(DRIVER_XE);
>
> + tryclear_hang();
> +
> vm = xe_vm_create(fd, 0, 0);
> bo_size = sizeof(*data) * n_execs;
> bo_size = xe_bb_size(fd, bo_size);
> @@ -352,7 +442,11 @@ test_legacy_mode(int fd, struct drm_xe_engine_class_instance *eci,
> };
>
> sync[0].handle = syncobj_create(fd, 0);
> - xe_vm_bind_async(fd, vm, 0, bo, 0, addr, bo_size, sync, 1);
> + xe_vm_bind_async_flags(fd, vm, 0, bo, 0, addr, bo_size, sync, 1,
> + DRM_XE_VM_BIND_FLAG_DUMPABLE);
> +
> + xe_vm_bind_userptr_async_flags(fd, vm, 0, to_user_pointer(dummy), addr + bo_size, dummy_size, sync, 1,
> + DRM_XE_VM_BIND_FLAG_DUMPABLE);
>
> for (i = 0; i < n_execs; i++) {
> uint64_t base_addr = flags & CAT_ERROR && !i ?
> @@ -432,6 +526,8 @@ test_legacy_mode(int fd, struct drm_xe_engine_class_instance *eci,
> munmap(data, bo_size);
> gem_close(fd, bo);
> xe_vm_destroy(fd, vm);
> +
> + read_and_clear_hang();
> }
>
> /**
> @@ -486,6 +582,8 @@ test_compute_mode(int fd, struct drm_xe_engine_class_instance *eci,
> if (flags & CLOSE_FD)
> fd = drm_open_driver(DRIVER_XE);
>
> + tryclear_hang();
> +
> vm = xe_vm_create(fd, DRM_XE_VM_CREATE_FLAG_LR_MODE, 0);
> bo_size = sizeof(*data) * n_execs;
> bo_size = xe_bb_size(fd, bo_size);
> @@ -501,7 +599,12 @@ test_compute_mode(int fd, struct drm_xe_engine_class_instance *eci,
> };
>
> sync[0].addr = to_user_pointer(&data[0].vm_sync);
> - xe_vm_bind_async(fd, vm, 0, bo, 0, addr, bo_size, sync, 1);
> + xe_vm_bind_async_flags(fd, vm, 0, bo, 0, addr, bo_size, sync, 1,
> + DRM_XE_VM_BIND_FLAG_DUMPABLE);
> +
> + /* Capture BO as userptr too */
> + xe_vm_bind_userptr_async_flags(fd, vm, 0, to_user_pointer(dummy), addr + bo_size, dummy_size, sync, 1,
> + DRM_XE_VM_BIND_FLAG_DUMPABLE);
>
> xe_wait_ufence(fd, &data[0].vm_sync, USER_FENCE_VALUE, 0, 3 * NSEC_PER_SEC);
> data[0].vm_sync = 0;
> @@ -583,6 +686,8 @@ test_compute_mode(int fd, struct drm_xe_engine_class_instance *eci,
> munmap(data, bo_size);
> gem_close(fd, bo);
> xe_vm_destroy(fd, vm);
> +
> + read_and_clear_hang();
> }
>
> struct gt_thread_data {
> @@ -603,6 +708,8 @@ static void do_resets(struct gt_thread_data *t)
> usleep(250000); /* 250 ms */
> (*t->num_reset)++;
> xe_force_gt_reset_async(t->fd, t->gt);
> +
> + tryclear_hang();
> }
> }
>
> @@ -713,6 +820,8 @@ gt_reset(int fd, int n_threads, int n_sec)
> igt_info("number of resets %d\n", num_reset);
>
> free(threads);
> +
> + tryclear_hang();
> }
>
> igt_main
> @@ -730,9 +839,24 @@ igt_main
> int class;
> int fd;
>
> - igt_fixture
> + igt_fixture {
> + struct stat stat;
> + char str[256];
> +
> fd = drm_open_driver(DRIVER_XE);
>
> + igt_assert_eq(fstat(fd, &stat), 0);
> + sprintf(str, "/sys/dev/char/%ld:%ld/device", stat.st_rdev >> 8, stat.st_rdev & 0xff);
> + sysfd = open(str, O_DIRECTORY);
> +
> + tryclear_hang();
imho it would be better if that function wouldn't try to read all
devcoredump, as you wrote there was a bug in some driver in past
but imho if it should be checked, it is better to do in a separate
subtest.
Regards,
Kamil
> +
> + dummy_size = sysconf(_SC_PAGESIZE);
> + if (dummy_size < SZ_64K)
> + dummy_size = SZ_64K;
> + dummy = aligned_alloc(dummy_size, dummy_size);
> + }
> +
> igt_subtest("spin")
> xe_for_each_engine(fd, hwe)
> test_spin(fd, hwe, 0);
> --
> 2.45.2
>
More information about the igt-dev
mailing list