[PATCH i-g-t 3/8] tests/intel/xe_drm_fdinfo: Add helpers for spinning batches

Mon Jul 1 18:08:53 UTC 2024

On Mon, Jul 01, 2024 at 10:27:54AM GMT, Umesh Nerlige Ramappa wrote:
>On Mon, Jul 01, 2024 at 11:57:14AM -0500, Lucas De Marchi wrote:
>>On Sat, Jun 22, 2024 at 07:00:57AM GMT, Umesh Nerlige Ramappa wrote:
>>>Add helpers for submitting batches and waiting for them to start.
>>>
>>>Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa at intel.com>
>>>---
>>>tests/intel/xe_drm_fdinfo.c | 135 ++++++++++++++++++++++++++++++++++++
>>>1 file changed, 135 insertions(+)
>>>
>>>diff --git a/tests/intel/xe_drm_fdinfo.c b/tests/intel/xe_drm_fdinfo.c
>>>index 41409b2d2..27459b7f1 100644
>>>--- a/tests/intel/xe_drm_fdinfo.c
>>>+++ b/tests/intel/xe_drm_fdinfo.c
>>>@@ -51,6 +51,17 @@ static const char *engine_map[] = {
>>>	"vecs",
>>>	"ccs",
>>>};
>>>+
>>>+static const uint64_t batch_addr[] = {
>>>+	0x170000,
>>>+	0x180000,
>>>+	0x190000,
>>>+	0x1a0000,
>>>+	0x1b0000,
>>>+	0x1c0000,
>>>+	0x1d0000,
>>>+	0x1e0000,
>>>+};
>>>static void read_engine_cycles(int xe, struct pceu_cycles *pceu)
>>>{
>>>	struct drm_client_fdinfo info = { };
>>>@@ -316,6 +327,130 @@ static void basic(int xe, unsigned int num_classes)
>>>	}
>>>}
>>>
>>>+#define MAX_PARALLEL 8
>>>+struct xe_spin_ctx {
>>>+	uint32_t vm;
>>>+	uint64_t addr[MAX_PARALLEL];
>>>+	struct drm_xe_sync sync[2];
>>>+	struct drm_xe_exec exec;
>>>+	uint32_t exec_queue;
>>>+	size_t bo_size;
>>>+	uint32_t bo;
>>>+	struct xe_spin *spin;
>>>+	struct xe_spin_opts spin_opts;
>>>+	bool ended;
>>>+	uint16_t class;
>>>+	uint16_t width;
>>>+	uint16_t num_placements;
>>>+};
>>>+
>>>+static struct xe_spin_ctx *
>>>+xe_spin_ctx_init(int fd, struct drm_xe_engine_class_instance *hwe, uint32_t vm,
>>>+		 uint16_t width, uint16_t num_placements)
>>>+{
>>>+	struct xe_spin_ctx *ctx = calloc(1, sizeof(*ctx));
>>>+
>>>+	igt_assert(width && num_placements &&
>>>+		   (width == 1 || num_placements == 1));
>>>+
>>>+	igt_assert(width <= MAX_PARALLEL);
>>>+
>>>+	ctx->class = hwe->engine_class;
>>>+	ctx->width = width;
>>>+	ctx->num_placements = num_placements;
>>>+	ctx->vm = vm;
>>>+	for (int i = 0; i < ctx->width; i++)
>>>+		ctx->addr[i] = batch_addr[hwe->engine_class];
>>>+
>>>+	ctx->exec.num_batch_buffer = width;
>>>+	ctx->exec.num_syncs = 2;
>>>+	ctx->exec.syncs = to_user_pointer(ctx->sync);
>>>+
>>>+	ctx->sync[0].type = DRM_XE_SYNC_TYPE_SYNCOBJ;
>>>+	ctx->sync[0].flags = DRM_XE_SYNC_FLAG_SIGNAL;
>>>+	ctx->sync[0].handle = syncobj_create(fd, 0);
>>>+
>>>+	ctx->sync[1].type = DRM_XE_SYNC_TYPE_SYNCOBJ;
>>>+	ctx->sync[1].flags = DRM_XE_SYNC_FLAG_SIGNAL;
>>>+	ctx->sync[1].handle = syncobj_create(fd, 0);
>>>+
>>>+	ctx->bo_size = sizeof(struct xe_spin);
>>>+	ctx->bo_size = xe_bb_size(fd, ctx->bo_size);
>>>+	ctx->bo = xe_bo_create(fd, ctx->vm, ctx->bo_size,
>>>+			       vram_if_possible(fd, hwe->gt_id),
>>>+			       DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM);
>>>+	ctx->spin = xe_bo_map(fd, ctx->bo, ctx->bo_size);
>>>+
>>>+	igt_assert_eq(__xe_exec_queue_create(fd, ctx->vm, width, num_placements,
>>>+					     hwe, 0, &ctx->exec_queue), 0);
>>>+
>>>+	xe_vm_bind_async(fd, ctx->vm, 0, ctx->bo, 0, ctx->addr[0], ctx->bo_size,
>>>+			 ctx->sync, 1);
>>>+
>>>+	return ctx;
>>>+}
>>>+
>>>+static void
>>>+xe_spin_sync_start(int fd, struct xe_spin_ctx *ctx)
>>
>>I don't think we should create these wrappers on each individual test.
>>If a wrapper like this is needed, can we add the proper abstraction in
>>lib/xe/xe_spin.{c,h}?
>
>The wrappers are required to be able to control when the utilization 
>is sampled when a batch runs.
>
>I thought the general rule was to add stuff to (IGT) library when 
>there are more users of a particular code. Right now this test suite 
>is the only one using this breakdown.
>
>In future single engine utilization exposed from GuC (busy v3) will 
>make use of these helpers to build the various tests for normal, 
>virtual and parallel submissions. In addition to sampling counters 
>before and after, these tests will also sample perf counters when the 
>batch is actively running on the engine, so the breakdown provided by 
>the above helpers is useful. I can work on abstracting it into the 
>libraries then since we would have more users and more data on what's 
>needed for the abstractions.
>
>Right now, I think we should target test coverage now and then work on 
>the abstraction part as an improvement when implementing busy v3 
>tests.  Does that sound reasonable?

IMO xe_spin_* should be implemented in the xe_spin library. If you want
a wrapper, call it something else. Just dropping the xe_ prefix could be
it, making a future conversion easy.

However it seems that doing this later would just make it harder as you
won't have the xe_spin_ctx (that's also misnamed since it's not part of
xe_spin).

I think an abstraction in xe_spin for
xe_spin_init() + xe_exec() + xe_spin_wait_started() would be acceptable.

Anyway... I think you can keep it like this if you drop the xe_ prefix
here and related functions/structs

Lucas De Marchi