[PATCH i-g-t 4/4] xe_exec_nop: create xe_exec_nop
Dandamudi, Priyanka
priyanka.dandamudi at intel.com
Tue Apr 15 08:54:12 UTC 2025
> -----Original Message-----
> From: Gurram, Pravalika <pravalika.gurram at intel.com>
> Sent: 11 April 2025 06:14 PM
> To: Dandamudi, Priyanka <priyanka.dandamudi at intel.com>; igt-
> dev at lists.freedesktop.org
> Subject: RE: [PATCH i-g-t 4/4] xe_exec_nop: create xe_exec_nop
>
>
>
> > -----Original Message-----
> > From: Dandamudi, Priyanka <priyanka.dandamudi at intel.com>
> > Sent: Thursday, April 10, 2025 2:24 PM
> > To: Gurram, Pravalika <pravalika.gurram at intel.com>; igt-
> > dev at lists.freedesktop.org
> > Cc: Gurram, Pravalika <pravalika.gurram at intel.com>
> > Subject: RE: [PATCH i-g-t 4/4] xe_exec_nop: create xe_exec_nop
> >
> >
> >
> > > -----Original Message-----
> > > From: igt-dev <igt-dev-bounces at lists.freedesktop.org> On Behalf Of
> > > Pravalika Gurram
> > > Sent: 02 April 2025 10:11 PM
> > > To: igt-dev at lists.freedesktop.org
> > > Cc: Gurram, Pravalika <pravalika.gurram at intel.com>
> > > Subject: [PATCH i-g-t 4/4] xe_exec_nop: create xe_exec_nop
> > >
> > > Signed-off-by: Pravalika Gurram <pravalika.gurram at intel.com>
> > > ---
> > > benchmarks/meson.build | 1 +
> > > benchmarks/xe_exec_nop.c | 221
> > > +++++++++++++++++++++++++++++++++++++++
> > > 2 files changed, 222 insertions(+)
> > > create mode 100644 benchmarks/xe_exec_nop.c
> > >
> > > diff --git a/benchmarks/meson.build b/benchmarks/meson.build index
> > > f29d5a288..1af13b0c6 100644
> > > --- a/benchmarks/meson.build
> > > +++ b/benchmarks/meson.build
> > > @@ -24,6 +24,7 @@ benchmark_progs = [
> > > 'xe_blt',
> > > 'xe_create',
> > > 'xe_exec_ctx',
> > > + 'xe_exec_nop',
> > > ]
> > >
> > > benchmarksdir = join_paths(libexecdir, 'benchmarks') diff --git
> > > a/benchmarks/xe_exec_nop.c b/benchmarks/xe_exec_nop.c new file
> > mode
> > > 100644 index 000000000..99249dd96
> > > --- /dev/null
> > > +++ b/benchmarks/xe_exec_nop.c
> > > @@ -0,0 +1,221 @@
> > > +/*
> > > + * Copyright © 2025 Intel Corporation
> > > + *
> > > + * Permission is hereby granted, free of charge, to any person
> > > +obtaining a
> > > + * copy of this software and associated documentation files (the
> > > +"Software"),
> > > + * to deal in the Software without restriction, including without
> > > +limitation
> > > + * the rights to use, copy, modify, merge, publish, distribute,
> > > +sublicense,
> > > + * and/or sell copies of the Software, and to permit persons to
> > > +whom the
> > > + * Software is furnished to do so, subject to the following conditions:
> > > + *
> > > + * The above copyright notice and this permission notice (including
> > > +the next
> > > + * paragraph) shall be included in all copies or substantial
> > > +portions of the
> > > + * Software.
> > > + *
> > > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
> > KIND,
> > > +EXPRESS OR
> > > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> > > +MERCHANTABILITY,
> > > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
> > NO
> > > EVENT
> > > +SHALL
> > > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
> > > DAMAGES OR
> > > +OTHER
> > > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
> > OTHERWISE,
> > > +ARISING
> > > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
> > OR
> > > OTHER
> > > +DEALINGS
> > > + * IN THE SOFTWARE.
> > > + *
> > > + * Authors:
> > > + * Pravalika Gurram <pravalika.gurram at intel.com>
> > > + *
> > > + */
> > > +
> > > +#include "drm.h"
> > > +#include "drmtest.h"
> > > +
> > > +#include "intel_io.h"
> > > +#include "intel_reg.h"
> > > +#include <sys/ioctl.h>
> > > +#include "ioctl_wrappers.h"
> > > +#include "igt_syncobj.h"
> > > +#include "xe/xe_ioctl.h"
> > > +#include "xe/xe_query.h"
> > > +#include "xe/xe_util.h"
> > > +#include <time.h>
> > > +
> > > +#define READ_ALL 0x4
> > > +static double elapsed(const struct timespec *start,
> > > + const struct timespec *end) {
> > > + return (end->tv_sec - start->tv_sec) + 1e-9*(end->tv_nsec -
> > > +start->tv_nsec); }
> > > +
> > > +static void first_batch(int fd, struct drm_xe_engine_class_instance
> > > +*eci) {
> > > + uint64_t bo_size = xe_bb_size(fd, SZ_4K);
> > > + struct drm_xe_engine_class_instance inst = {
> > > + .engine_class = DRM_XE_ENGINE_CLASS_RENDER,
> > > + };
> > Why are you using inst here, no where you are using in this function.
> > Can you explain why are you using first batch and second batch.
>
> first_batch second_batch
> in first batch creates the BO
> second batch create the bo and write the BBE to BO
>
> > > + uint32_t vm, bo;
> > > + u32 q;
> > > + vm = xe_vm_create(fd,
> > DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE,
> > > 0);
> > > + bo = xe_bo_create(fd, vm, bo_size, system_memory(fd), 0);
> > > + q = xe_exec_queue_create(fd, vm, eci, 0);
> > > + xe_exec_queue_destroy(fd, q);
> > > + gem_close(fd, bo);
> > > + xe_vm_destroy(fd, vm);
> > > +}
> > > +static void second_batch(int fd, struct
> > > +drm_xe_engine_class_instance
> > > +*eci) {
> > > +
> > > + int err;
> > > + uint64_t bo_size = xe_bb_size(fd, SZ_4K), bo_addr = 0x1a0000;
> > > + uint32_t vm, bo, *batch, exec_queue;
> > > + struct drm_xe_engine_class_instance inst = {
> > > + .engine_class = DRM_XE_ENGINE_CLASS_RENDER,
> > > + };
> > > +
> > > + struct drm_xe_sync sync = {
> > > + .type = DRM_XE_SYNC_TYPE_SYNCOBJ,
> > > + .flags = DRM_XE_SYNC_FLAG_SIGNAL,
> > > + .handle = syncobj_create(fd, 0),
> > > + };
> > > + struct drm_xe_exec exec = {
> > > + .num_syncs = 1,
> > > + .syncs = to_user_pointer(&sync),
> > > + .address = bo_addr,
> > > + .num_batch_buffer = 1,
> > > + };
> > > +
> > > + vm = xe_vm_create(fd,
> > DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE,
> > > 0);
> > > + bo = xe_bo_create(fd, vm, bo_size, system_memory(fd), 0);
> > > +
> > > + batch = xe_bo_map(fd, bo, bo_size);
> > > + *batch = MI_BATCH_BUFFER_END;
> > > + munmap(batch, bo_size);
> > > +
> > > + xe_vm_bind_sync(fd, vm, bo, 0, bo_addr, bo_size);
> > > + exec_queue = xe_exec_queue_create(fd, vm, eci, 0);
> > > +
> > > + exec.exec_queue_id = exec_queue;
> > > + err = __xe_exec(fd, &exec);
> > > +
> > > + err = syncobj_wait(fd, &sync.handle, 1, INT64_MAX, 0, NULL);
> > > + xe_exec_queue_destroy(fd, exec_queue);
> > > + gem_close(fd, bo);
> > > + xe_vm_destroy(fd, vm);
> > > + syncobj_destroy(fd, sync.handle);
> > > +
> > > +}
> > > +static int loop(unsigned ring, int reps, int ncpus, unsigned flags) {
> > > + double *shared;
> > > + int fd;
> > > + struct drm_xe_engine_class_instance *hwe;
> > > + shared = mmap(0, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON,
> > -
> > > 1, 0);
> > > +
> > > + fd = drm_open_driver(DRIVER_XE);
> > > + if (ring) {
> > > + xe_for_each_engine(fd, hwe) {
> > > + if (hwe->engine_class == ring) {
> > > + first_batch(fd, hwe);
> > > + second_batch(fd, hwe);
> > > + }
> > > + }
> > > + } else {
> > > + xe_for_each_engine(fd, hwe) {
> > > + first_batch(fd, hwe);
> > > + second_batch(fd, hwe);
> > > + }
> > > + }
> > > +
> > > +
> > > + while (reps--) {
> > > + memset(shared, 0, 4096);
> > > +
> > > + sleep(1); /* wait for the hw to go back to sleep */
> > > +
> > > + igt_fork(child, ncpus) {
> > > + struct timespec start, end;
> > > + unsigned count = 0;
> > > +
> > > + first_batch(fd, hwe);
> > > + second_batch(fd, hwe);
> > > +
> > > + clock_gettime(CLOCK_MONOTONIC, &start);
> > > + do {
> > > + for (int inner = 0; inner < 1024; inner++) {
> > > + if (flags & READ_ALL) {
> > What does here READ_ALL mean, actual interpretation for this in i915
> > is different Refer this commit:
> > 05ca171aa9a6902614241f9685de2f62f30126d8
> > "we look at the throughput for submitting a read batch to a
> > single engine or any. The kernel optimises for this by allowing multiple
> > engine to read at the same time, but writes are exclusive to a single
> > engine. So, lets try to measure the impact of inserting the barriers
> > between writes on different engines."
> > Look into it and you will get the meaning behind it, First go through
> > the code flow in i915 and then port to xe.
> >
> In a loop for each engine, in if case we are reading and writing to each engine
> In else case doing for each engine i think exact replica of i915 will not be
> possible is what i feel if it possible, could you please do let me know?
No, this interpretation is not correct. As I explained to you offline in detail, please make the change in that way.
But before that there is no test of xe_exec_nop, I see no point in writing benchmark for it.
First it needs to be confirmed whether this itself is needed or not until then hold on.
> > > + if (ring) {
> > > +
> > > xe_for_each_engine(fd, hwe) {
> > > + if (hwe-
> > > >engine_class == ring) {
> > > +
> > > first_batch(fd, hwe);
> > > +
> > > second_batch(fd, hwe);
> > > + }
> > > + }
> > > + } else {
> > > +
> > > xe_for_each_engine(fd, hwe) {
> > > +
> > first_batch(fd,
> > > hwe);
> > > +
> > > second_batch(fd, hwe);
> > > + }
> > > + }
> > > + }
> > > + }
> > > +
> > > + clock_gettime(CLOCK_MONOTONIC, &end);
> > > + } while (elapsed(&start, &end) < 2.);
> > > +
> > > + clock_gettime(CLOCK_MONOTONIC, &end);
> > > + shared[child] = 1e6*elapsed(&start, &end) / count;
> > > +
> > > + }
> > > + igt_waitchildren();
> > > +
> > > + for (int child = 0; child < ncpus; child++)
> > > + shared[ncpus] += shared[child];
> > > + printf("%7.3f ncpus %d\n", shared[ncpus] / ncpus, ncpus);
> > > + }
> > > + return 0;
> > > +}
> > > +
> > > +int main(int argc, char **argv)
> > > +{
> > > + unsigned ring = DRM_XE_ENGINE_CLASS_RENDER;
> > > + unsigned flags = 0;
> > > + int reps = 1;
> > > + int ncpus = 1;
> > > + int c;
> > > +
> > > + while ((c = getopt (argc, argv, "e:r:f:A")) != -1) {
> > > + switch (c) {
> > > + case 'e':
> > > + if (strcmp(optarg, "rcs") == 0)
> > > + ring = DRM_XE_ENGINE_CLASS_RENDER;
> > > + else if (strcmp(optarg, "vcs") == 0)
> > > + ring =
> > > DRM_XE_ENGINE_CLASS_VIDEO_DECODE;
> > > + else if (strcmp(optarg, "bcs") == 0)
> > > + ring = DRM_XE_ENGINE_CLASS_COPY;
> > > + else if (strcmp(optarg, "vecs") == 0)
> > > + ring =
> > > DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE;
> > > + else if (strcmp(optarg, "ccs") == 0)
> > > + ring = DRM_XE_ENGINE_CLASS_COMPUTE;
> > > + else
> > > + ring = atoi(optarg);
> > > + break;
> > > +
> > > + case 'r':
> > > + reps = atoi(optarg);
> > > + if (reps < 1)
> > > + reps = 1;
> > > + break;
> > > +
> > > + case 'f':
> > > + ncpus = sysconf(_SC_NPROCESSORS_ONLN);
> > > + break;
> > > +
> > > + case 'A':
> > > + flags |= READ_ALL;
> > > + break;
> > > +
> > > + default:
> > > + break;
> > > + }
> > > + }
> > > +
> > > + return loop(ring, reps, ncpus, flags); }
> > > --
> > > 2.34.1
More information about the igt-dev
mailing list