[PATCH i-g-t, v5 5/5] tests/intel/xe_render_copy: Add new test for render under copy stress
Zbigniew Kempczyński
zbigniew.kempczynski at intel.com
Thu Apr 3 08:40:02 UTC 2025
On Wed, Apr 02, 2025 at 01:50:03PM +0200, Francois Dugast wrote:
> Add a new series of tests which use the spinner with mem copy to keep
> the copy functions busy and perform memory accesses, while a regular
> render job is running. The objective of the test is to ensure that
> both the copies and the rendering are correct. Some information about
> rendering duration is provided in the logs, which can help monitor if
> copies have an impact on render.
>
> Signed-off-by: Francois Dugast <francois.dugast at intel.com>
> ---
> tests/intel/xe_render_copy.c | 221 +++++++++++++++++++++++++++++++++++
> 1 file changed, 221 insertions(+)
>
> diff --git a/tests/intel/xe_render_copy.c b/tests/intel/xe_render_copy.c
> index e2fbbc0f8..d9e785a40 100644
> --- a/tests/intel/xe_render_copy.c
> +++ b/tests/intel/xe_render_copy.c
> @@ -12,8 +12,12 @@
> #include "igt.h"
> #include "intel_blt.h"
> #include "intel_bufops.h"
> +#include "intel_mocs.h"
> +#include "intel_pat.h"
> #include "xe/xe_ioctl.h"
> #include "xe/xe_query.h"
> +#include "xe/xe_spin.h"
> +#include "xe/xe_util.h"
>
> /**
> * TEST: Copy memory using 3d engine
> @@ -437,6 +441,207 @@ static int render(struct buf_ops *bops, uint32_t tiling,
> return fails;
> }
>
> +static void mem_copy_busy(int fd, struct drm_xe_engine_class_instance *hwe, uint32_t vm,
> + uint64_t ahnd, uint32_t region, struct xe_spin **spin,
> + pthread_mutex_t *lock_init_spin)
> +{
> + uint32_t copy_size = SZ_4M;
> + uint64_t duration_ns = NSEC_PER_SEC * 4.5; // Keep below 5 s timeout
Replace to /* */ style comments. Rest is fine.
BTW you've revealed bug in the vm-bind util code (https://patchwork.freedesktop.org/series/147185/).
>From me:
Reviewed-by: Zbigniew Kempczyński <zbigniew.kempczynski at intel.com>
--
Zbigniew
> + intel_ctx_t *ctx;
> + uint32_t exec_queue;
> + uint32_t width = copy_size;
> + uint32_t height = 1;
> + uint32_t bo_size = ALIGN(SZ_4K, xe_get_default_alignment(fd));
> + uint32_t bo;
> + uint64_t spin_addr;
> + int32_t src_handle, dst_handle;
> + struct blt_mem_object src, dst;
> + struct xe_spin_mem_copy mem_copy = {
> + .src = &src,
> + .dst = &dst,
> + };
> +
> + exec_queue = xe_exec_queue_create(fd, vm, hwe, 0);
> + ctx = intel_ctx_xe(fd, vm, exec_queue, 0, 0, 0);
> +
> + /* Create source and destination objects used for the copy */
> + src_handle = xe_bo_create(fd, 0, copy_size, region, 0);
> + dst_handle = xe_bo_create(fd, 0, copy_size, region, 0);
> + blt_set_mem_object(mem_copy.src, src_handle, copy_size, 0, width, height, region,
> + intel_get_uc_mocs_index(fd), DEFAULT_PAT_INDEX,
> + M_LINEAR, COMPRESSION_DISABLED);
> + blt_set_mem_object(mem_copy.dst, dst_handle, copy_size, 0, width, height, region,
> + intel_get_uc_mocs_index(fd), DEFAULT_PAT_INDEX,
> + M_LINEAR, COMPRESSION_DISABLED);
> + mem_copy.src->ptr = xe_bo_map(fd, src_handle, copy_size);
> + mem_copy.dst->ptr = xe_bo_map(fd, dst_handle, copy_size);
> + mem_copy.src_offset = get_offset_pat_index(ahnd, mem_copy.src->handle,
> + mem_copy.src->size, 0, mem_copy.src->pat_index);
> + mem_copy.dst_offset = get_offset_pat_index(ahnd, mem_copy.dst->handle,
> + mem_copy.dst->size, 0, mem_copy.dst->pat_index);
> +
> + /* Create spinner */
> + bo = xe_bo_create(fd, vm, bo_size, vram_if_possible(fd, 0), 0);
> + *spin = xe_bo_map(fd, bo, bo_size);
> + spin_addr = intel_allocator_alloc_with_strategy(ahnd, bo, bo_size, 0,
> + ALLOC_STRATEGY_LOW_TO_HIGH);
> + xe_vm_bind_sync(fd, vm, bo, 0, spin_addr, bo_size);
> + xe_spin_init_opts(*spin, .addr = spin_addr, .preempt = true,
> + .ctx_ticks = xe_spin_nsec_to_ticks(fd, 0, duration_ns),
> + .mem_copy = &mem_copy);
> + igt_assert_eq(pthread_mutex_unlock(lock_init_spin), 0);
> +
> + while (true) {
> + src.ptr[0] = 0xdeadbeaf;
> + intel_ctx_xe_exec(ctx, ahnd, spin_addr);
> + /* Abort if the spinner was stopped, otherwise continue looping */
> + if ((*spin)->end == 0)
> + break;
> + igt_assert_f(!memcmp(mem_copy.src->ptr, mem_copy.dst->ptr, mem_copy.src->size),
> + "source and destination differ\n");
> + dst.ptr[0] = 0;
> + }
> +
> + /* Cleanup */
> + xe_vm_unbind_sync(fd, vm, 0, spin_addr, bo_size);
> + gem_munmap(*spin, bo_size);
> + gem_close(fd, bo);
> + gem_munmap(mem_copy.dst->ptr, copy_size);
> + gem_munmap(mem_copy.src->ptr, copy_size);
> + gem_close(fd, dst_handle);
> + gem_close(fd, src_handle);
> + intel_ctx_destroy(fd, ctx);
> + xe_exec_queue_destroy(fd, exec_queue);
> +}
> +
> +typedef struct {
> + int fd;
> + struct drm_xe_engine_class_instance *hwe;
> + uint32_t vm;
> + uint64_t ahnd;
> + uint32_t region;
> + struct xe_spin *spin;
> + pthread_mutex_t lock_init_spin;
> +} data_thread_mem_copy;
> +
> +static void *run_thread_mem_copy(void *arg)
> +{
> + data_thread_mem_copy *data = (data_thread_mem_copy *)arg;
> +
> + mem_copy_busy(data->fd, data->hwe, data->vm, data->ahnd, data->region,
> + &data->spin, &data->lock_init_spin);
> + pthread_exit(NULL);
> +}
> +
> +static bool has_copy_function(struct drm_xe_engine_class_instance *hwe)
> +{
> + return hwe->engine_class == DRM_XE_ENGINE_CLASS_COPY;
> +}
> +
> +/**
> + * TEST: Render while stressing copy functions
> + * Category: Core
> + * Mega feature: Render
> + * Sub-category: 3d
> + * Functionality: copy
> + * Test category: stress test
> + *
> + * SUBTEST: render-stress-%s-copies
> + * Description: Render while running %arg[1] parallel copies per supported engine.
> + * Even under stress from concurrent memory accesses, the render buffer
> + * and the copies must all be correct.
> + *
> + * arg[1]:
> + * @0: 0 parallel copies
> + * @1: 1 parallel copies
> + * @2: 2 parallel copies
> + * @4: 4 parallel copies
> + */
> +#define MAX_COPY_THREADS 64
> +static void render_stress_copy(int fd, struct igt_collection *set,
> + uint32_t nparallel_copies_per_engine)
> +{
> + struct igt_collection *regions;
> + struct drm_xe_engine_class_instance *hwe;
> + uint32_t vm;
> + uint64_t ahnd;
> + data_thread_mem_copy data_mem_copy[MAX_COPY_THREADS];
> + pthread_t thread_mem_copy[MAX_COPY_THREADS];
> + int thread_copy_count = 0;
> + struct buf_ops *bops;
> + int render_timeout = 3;
> + int render_count = 0;
> + uint64_t render_duration_total = 0, render_duration_min = -1, render_duration_max = 0;
> +
> + vm = xe_vm_create(fd, 0, 0);
> + ahnd = intel_allocator_open_full(fd, vm, 0, 0,
> + INTEL_ALLOCATOR_SIMPLE,
> + ALLOC_STRATEGY_LOW_TO_HIGH, 0);
> +
> + for_each_variation_r(regions, 1, set) {
> + xe_for_each_engine(fd, hwe) {
> + if (!has_copy_function(hwe))
> + continue;
> + for (int i = 0; i < nparallel_copies_per_engine; i++) {
> + data_thread_mem_copy *data = &data_mem_copy[thread_copy_count];
> +
> + data->fd = fd;
> + data->hwe = hwe;
> + data->vm = vm;
> + data->ahnd = ahnd;
> + data->region = igt_collection_get_value(regions, 0);
> + /*
> + * lock_init_spin is held by the newly created thread until the
> + * spinner is initialized and ready to be waited on with
> + * xe_spin_wait_started().
> + */
> + igt_assert_eq(pthread_mutex_init(&data->lock_init_spin, NULL), 0);
> + igt_assert_eq(pthread_mutex_lock(&data->lock_init_spin), 0);
> + igt_assert_eq(pthread_create(
> + &thread_mem_copy[thread_copy_count],
> + NULL,
> + run_thread_mem_copy,
> + data),
> + 0);
> + thread_copy_count++;
> + }
> + }
> + }
> +
> + /* Wait for all mem copy spinners to be initialized and started */
> + for (int i = 0; i < thread_copy_count; i++) {
> + igt_assert_eq(pthread_mutex_lock(&data_mem_copy[i].lock_init_spin), 0);
> + xe_spin_wait_started(data_mem_copy[i].spin);
> + igt_assert_eq(pthread_mutex_unlock(&data_mem_copy[i].lock_init_spin), 0);
> + }
> +
> + bops = buf_ops_create(fd);
> + igt_until_timeout(render_timeout) {
> + uint64_t duration;
> +
> + render(bops, T_LINEAR, WIDTH, HEIGHT, COPY_FULL, &duration);
> + render_count++;
> + render_duration_total += duration;
> + if (duration < render_duration_min)
> + render_duration_min = duration;
> + if (duration > render_duration_max)
> + render_duration_max = duration;
> + }
> + igt_info("%d render() loops in %d seconds\n", render_count, render_timeout);
> + igt_info("Render duration: avg = %ld ns, min = %ld ns, max = %ld ns\n",
> + render_duration_total / render_count,
> + render_duration_min, render_duration_max);
> +
> + /* End all mem copy threads */
> + for (int i = 0; i < thread_copy_count; i++)
> + xe_spin_end(data_mem_copy[i].spin);
> + for (int i = 0; i < thread_copy_count; i++)
> + pthread_join(thread_mem_copy[i], NULL);
> +
> + put_ahnd(ahnd);
> + xe_vm_destroy(fd, vm);
> +}
> +
> static int opt_handler(int opt, int opt_index, void *data)
> {
> switch (opt) {
> @@ -477,11 +682,23 @@ igt_main_args("dpiW:H:", NULL, help_str, opt_handler, NULL)
> struct buf_ops *bops;
> const char *tiling_name;
> int tiling;
> + struct igt_collection *set;
> + const struct section {
> + const char *name;
> + unsigned int nparallel_copies_per_engine;
> + } sections[] = {
> + { "0", 0 },
> + { "1", 1 },
> + { "2", 2 },
> + { "4", 4 },
> + { NULL },
> + };
>
> igt_fixture {
> xe = drm_open_driver(DRIVER_XE);
> bops = buf_ops_create(xe);
> srand(time(NULL));
> + set = xe_get_memory_region_set(xe, DRM_XE_MEM_REGION_CLASS_SYSMEM);
> }
>
> for (int id = 0; id <= COPY_FULL_COMPRESSED; id++) {
> @@ -501,6 +718,10 @@ igt_main_args("dpiW:H:", NULL, help_str, opt_handler, NULL)
> }
> }
>
> + for (const struct section *s = sections; s->name; s++)
> + igt_subtest_f("render-stress-%s-copies", s->name)
> + render_stress_copy(xe, set, s->nparallel_copies_per_engine);
> +
> igt_fixture {
> buf_ops_destroy(bops);
> drm_close_driver(xe);
> --
> 2.43.0
>
More information about the igt-dev
mailing list