[igt-dev] [PATCH i-g-t 2/5] xe/xe_compute: place OpenCL kernel on a separate file
Zbigniew Kempczyński
zbigniew.kempczynski at intel.com
Mon Apr 3 10:49:41 UTC 2023
On Mon, Mar 27, 2023 at 03:41:16PM +0200, Mauro Carvalho Chehab wrote:
> From: Mauro Carvalho Chehab <mchehab at kernel.org>
>
> In order to prepare for supporting multiple Kernels, move
> the tgllp to a separate file.
>
> While here, address a few coding style nitpicks.
>
> Signed-off-by: Mauro Carvalho Chehab <mchehab at kernel.org>
> ---
> lib/meson.build | 1 +
> lib/xe/xe_compute.c | 234 ++++++++++++++++++++---------
> lib/xe/xe_compute.h | 31 ++--
> lib/xe/xe_compute_square_kernels.c | 71 +++++++++
> tests/xe/xe_compute.c | 108 +------------
> 5 files changed, 256 insertions(+), 189 deletions(-)
> create mode 100644 lib/xe/xe_compute_square_kernels.c
>
> diff --git a/lib/meson.build b/lib/meson.build
> index ad9e2abef4c3..ad68089dcf43 100644
> --- a/lib/meson.build
> +++ b/lib/meson.build
> @@ -99,6 +99,7 @@ lib_sources = [
> 'igt_msm.c',
> 'igt_dsc.c',
> 'xe/xe_compute.c',
> + 'xe/xe_compute_square_kernels.c',
> 'xe/xe_ioctl.c',
> 'xe/xe_query.c',
> 'xe/xe_spin.c'
> diff --git a/lib/xe/xe_compute.c b/lib/xe/xe_compute.c
> index 2165eada8931..7259b888eb9e 100644
> --- a/lib/xe/xe_compute.c
> +++ b/lib/xe/xe_compute.c
> @@ -6,71 +6,51 @@
> * Francois Dugast <francois.dugast at intel.com>
> */
>
> +#include <stdint.h>
> +
> +#include "igt.h"
> +#include "xe_drm.h"
> +#include "lib/igt_syncobj.h"
> +#include "lib/intel_reg.h"
> +
> #include "xe_compute.h"
> +#include "xe/xe_ioctl.h"
> +#include "xe/xe_query.h"
>
> #define PIPE_CONTROL 0x7a000004
> -#define MI_LOAD_REGISTER_IMM 0x11000001
> -#define PIPELINE_SELECT 0x69040302
> +#define MEDIA_STATE_FLUSH 0x0
> +#define MAX(X, Y) (((X) > (Y)) ? (X) : (Y))
> +#define SIZE_DATA 64
> +#define SIZE_BATCH 0x1000
> +#define SIZE_BUFFER_INPUT MAX(sizeof(float) * SIZE_DATA, 0x1000)
> +#define SIZE_BUFFER_OUTPUT MAX(sizeof(float) * SIZE_DATA, 0x1000)
> +#define ADDR_BATCH 0x100000
> +#define ADDR_INPUT 0x200000UL
> +#define ADDR_OUTPUT 0x300000UL
> +#define ADDR_SURFACE_STATE_BASE 0x400000UL
> +#define ADDR_DYNAMIC_STATE_BASE 0x500000UL
> +#define ADDR_INDIRECT_OBJECT_BASE 0x800100000000
> +#define OFFSET_INDIRECT_DATA_START 0xFFFDF000
> +#define OFFSET_KERNEL 0xFFFEF000
> +
> +#undef MEDIA_VFE_STATE
> #define MEDIA_VFE_STATE 0x70000007
> +#undef STATE_BASE_ADDRESS
> #define STATE_BASE_ADDRESS 0x61010014
> -#define MEDIA_STATE_FLUSH 0x0
> +#undef MEDIA_INTERFACE_DESCRIPTOR_LOAD
> #define MEDIA_INTERFACE_DESCRIPTOR_LOAD 0x70020002
> +#undef GPGPU_WALKER
> #define GPGPU_WALKER 0x7105000d
> -#define MI_BATCH_BUFFER_END (0xA << 23)
> -
> -// generated with:
> -// ocloc -file opencl/compute_square_kernel.cl -device tgllp && xxd -i compute_square_kernel_Gen12LPlp.bin
> -unsigned char tgllp_kernel_square_bin[] = {
> - 0x61, 0x00, 0x03, 0x80, 0x20, 0x02, 0x05, 0x03, 0x04, 0x00, 0x10, 0x00,
> - 0x00, 0x00, 0x00, 0x00, 0x66, 0x01, 0x00, 0x80, 0x20, 0x82, 0x01, 0x80,
> - 0x00, 0x80, 0x00, 0x01, 0xc0, 0x04, 0xc0, 0x04, 0x41, 0x01, 0x20, 0x22,
> - 0x16, 0x09, 0x11, 0x03, 0x49, 0x00, 0x04, 0xa2, 0x12, 0x09, 0x11, 0x03,
> - 0x40, 0x01, 0x04, 0x00, 0x60, 0x06, 0x05, 0x05, 0x04, 0x04, 0x00, 0x01,
> - 0x05, 0x01, 0x58, 0x00, 0x40, 0x00, 0x24, 0x00, 0x60, 0x06, 0x05, 0x0a,
> - 0x04, 0x04, 0x00, 0x01, 0x05, 0x02, 0x58, 0x00, 0x40, 0x02, 0x0c, 0xa0,
> - 0x02, 0x05, 0x10, 0x07, 0x40, 0x02, 0x0e, 0xa6, 0x02, 0x0a, 0x10, 0x07,
> - 0x70, 0x02, 0x04, 0x00, 0x60, 0x02, 0x01, 0x00, 0x05, 0x0c, 0x46, 0x52,
> - 0x84, 0x08, 0x00, 0x00, 0x70, 0x02, 0x24, 0x00, 0x60, 0x02, 0x01, 0x00,
> - 0x05, 0x0e, 0x46, 0x52, 0x84, 0x08, 0x00, 0x00, 0x72, 0x00, 0x02, 0x80,
> - 0x50, 0x0d, 0x04, 0x00, 0x05, 0x00, 0x05, 0x1d, 0x05, 0x00, 0x05, 0x00,
> - 0x22, 0x00, 0x05, 0x01, 0x00, 0xc0, 0x00, 0x00, 0x90, 0x00, 0x00, 0x00,
> - 0x90, 0x00, 0x00, 0x00, 0x69, 0x00, 0x10, 0x60, 0x02, 0x0c, 0x20, 0x00,
> - 0x69, 0x00, 0x12, 0x66, 0x02, 0x0e, 0x20, 0x00, 0x40, 0x02, 0x14, 0xa0,
> - 0x32, 0x10, 0x10, 0x08, 0x40, 0x02, 0x16, 0xa6, 0x32, 0x12, 0x10, 0x08,
> - 0x31, 0xa0, 0x04, 0x00, 0x00, 0x00, 0x14, 0x18, 0x14, 0x14, 0x00, 0xcc,
> - 0x00, 0x00, 0x16, 0x00, 0x31, 0x91, 0x24, 0x00, 0x00, 0x00, 0x14, 0x1a,
> - 0x14, 0x16, 0x00, 0xcc, 0x00, 0x00, 0x16, 0x00, 0x40, 0x00, 0x10, 0xa0,
> - 0x4a, 0x10, 0x10, 0x08, 0x40, 0x00, 0x12, 0xa6, 0x4a, 0x12, 0x10, 0x08,
> - 0x41, 0x20, 0x18, 0x20, 0x00, 0x18, 0x00, 0x18, 0x41, 0x21, 0x1a, 0x26,
> - 0x00, 0x1a, 0x00, 0x1a, 0x31, 0xa2, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00,
> - 0x14, 0x10, 0x02, 0xcc, 0x14, 0x18, 0x96, 0x00, 0x31, 0x93, 0x24, 0x00,
> - 0x00, 0x00, 0x00, 0x00, 0x14, 0x12, 0x02, 0xcc, 0x14, 0x1a, 0x96, 0x00,
> - 0x25, 0x00, 0x05, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
> - 0x10, 0x00, 0x00, 0x00, 0x61, 0x00, 0x7f, 0x64, 0x00, 0x03, 0x10, 0x00,
> - 0x31, 0x44, 0x03, 0x80, 0x00, 0x00, 0x0c, 0x1c, 0x0c, 0x03, 0x00, 0xa0,
> - 0x00, 0x00, 0x78, 0x02, 0x61, 0x24, 0x03, 0x80, 0x20, 0x02, 0x01, 0x00,
> - 0x05, 0x1c, 0x46, 0x00, 0x00, 0x00, 0x00, 0x00, 0x61, 0x00, 0x04, 0x80,
> - 0xa0, 0x4a, 0x01, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
> - 0x31, 0x01, 0x03, 0x80, 0x04, 0x00, 0x00, 0x00, 0x0c, 0x7f, 0x20, 0x70,
> - 0x00, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
> - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
> - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
> - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
> - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
> - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
> - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
> - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
> - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
> - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
> - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
> - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
> - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
> - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
> - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
> - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
> - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
> +
> +struct bo_dict_entry {
> + uint64_t addr;
> + uint32_t size;
> + void *data;
> };
> -unsigned int tgllp_kernel_square_length = sizeof(tgllp_kernel_square_bin);
> +
> +/*
> + * TGL compatible batch
> + */
>
> /**
> * tgllp_create_indirect_data:
> @@ -80,8 +60,9 @@ unsigned int tgllp_kernel_square_length = sizeof(tgllp_kernel_square_bin);
> *
> * Prepares indirect data for compute pipeline.
> */
> -void tgllp_create_indirect_data(uint32_t *addr_bo_buffer_batch,
> - uint64_t addr_input, uint64_t addr_output)
> +static void tgllp_create_indirect_data(uint32_t *addr_bo_buffer_batch,
> + uint64_t addr_input,
> + uint64_t addr_output)
> {
> int b = 0;
>
> @@ -183,8 +164,9 @@ void tgllp_create_indirect_data(uint32_t *addr_bo_buffer_batch,
> *
> * Prepares surface state for compute pipeline.
> */
> -void tgllp_create_surface_state(uint32_t *addr_bo_buffer_batch,
> - uint64_t addr_input, uint64_t addr_output)
> +static void tgllp_create_surface_state(uint32_t *addr_bo_buffer_batch,
> + uint64_t addr_input,
> + uint64_t addr_output)
> {
> int b = 0;
>
> @@ -261,8 +243,8 @@ void tgllp_create_surface_state(uint32_t *addr_bo_buffer_batch,
> *
> * Prepares dynamic state for compute pipeline.
> */
> -void tgllp_create_dynamic_state(uint32_t *addr_bo_buffer_batch,
> - uint64_t offset_kernel)
> +static void tgllp_create_dynamic_state(uint32_t *addr_bo_buffer_batch,
> + uint64_t offset_kernel)
> {
> int b = 0;
>
> @@ -280,7 +262,7 @@ void tgllp_create_dynamic_state(uint32_t *addr_bo_buffer_batch,
> }
>
> /**
> - * tgllp_create_batch_compute:
> + * tgllp_compute_exec_compute:
> * @addr_bo_buffer_batch: pointer to batch buffer
> * @addr_surface_state_base: gpu offset of surface state data
> * @addr_dynamic_state_base: gpu offset of dynamic state data
> @@ -289,19 +271,19 @@ void tgllp_create_dynamic_state(uint32_t *addr_bo_buffer_batch,
> *
> * Prepares compute pipeline.
> */
> -void tgllp_create_batch_compute(uint32_t *addr_bo_buffer_batch,
> - uint64_t addr_surface_state_base,
> - uint64_t addr_dynamic_state_base,
> - uint64_t addr_indirect_object_base,
> - uint64_t offset_indirect_data_start)
> +static void tgllp_compute_exec_compute(uint32_t *addr_bo_buffer_batch,
> + uint64_t addr_surface_state_base,
> + uint64_t addr_dynamic_state_base,
> + uint64_t addr_indirect_object_base,
> + uint64_t offset_indirect_data_start)
> {
> int b = 0;
>
> - addr_bo_buffer_batch[b++] = MI_LOAD_REGISTER_IMM;
> + addr_bo_buffer_batch[b++] = MI_LOAD_REGISTER_IMM(1);
> addr_bo_buffer_batch[b++] = 0x00002580;
> addr_bo_buffer_batch[b++] = 0x00060002;
> addr_bo_buffer_batch[b++] = PIPELINE_SELECT;
> - addr_bo_buffer_batch[b++] = MI_LOAD_REGISTER_IMM;
> + addr_bo_buffer_batch[b++] = MI_LOAD_REGISTER_IMM(1);
> addr_bo_buffer_batch[b++] = 0x00007034;
> addr_bo_buffer_batch[b++] = 0x60000321;
> addr_bo_buffer_batch[b++] = PIPE_CONTROL;
> @@ -310,7 +292,7 @@ void tgllp_create_batch_compute(uint32_t *addr_bo_buffer_batch,
> addr_bo_buffer_batch[b++] = 0x00000000;
> addr_bo_buffer_batch[b++] = 0x00000000;
> addr_bo_buffer_batch[b++] = 0x00000000;
> - addr_bo_buffer_batch[b++] = MI_LOAD_REGISTER_IMM;
> + addr_bo_buffer_batch[b++] = MI_LOAD_REGISTER_IMM(1);
> addr_bo_buffer_batch[b++] = 0x0000E404;
> addr_bo_buffer_batch[b++] = 0x00000100;
> addr_bo_buffer_batch[b++] = PIPE_CONTROL;
> @@ -405,3 +387,111 @@ void tgllp_create_batch_compute(uint32_t *addr_bo_buffer_batch,
> addr_bo_buffer_batch[b++] = 0x00000000;
> addr_bo_buffer_batch[b++] = MI_BATCH_BUFFER_END;
> }
> +
> +/**
> + * tgl_compute_exec - run a pipeline compatible with Tiger Lake
> + *
> + * @fd: file descriptor of the opened DRM device
> + * @kernel: GPU Kernel binary to be executed
> + * @size: size of @kernel.
> + */
> +static void tgl_compute_exec(int fd, const unsigned char *kernel,
> + unsigned int size)
> +{
> + uint32_t vm, engine;
> + float *dinput;
> + struct drm_xe_sync sync = { 0 };
> +#define TGL_BO_DICT_ENTRIES 7
> + struct bo_dict_entry bo_dict[TGL_BO_DICT_ENTRIES] = {
> + { .addr = ADDR_INDIRECT_OBJECT_BASE + OFFSET_KERNEL}, // kernel
> + { .addr = ADDR_DYNAMIC_STATE_BASE, .size = 0x1000}, // dynamic state
> + { .addr = ADDR_SURFACE_STATE_BASE, .size = 0x1000}, // surface state
> + { .addr = ADDR_INDIRECT_OBJECT_BASE + OFFSET_INDIRECT_DATA_START, .size = 0x10000}, // indirect data
> + { .addr = ADDR_INPUT, .size = SIZE_BUFFER_INPUT }, // input
> + { .addr = ADDR_OUTPUT, .size = SIZE_BUFFER_OUTPUT }, // output
> + { .addr = ADDR_BATCH, .size = SIZE_BATCH }, // batch
> + };
> +
> + /* Sets Kernel size */
> + bo_dict[0].size = ALIGN(size, 0x1000);
> +
> + vm = xe_vm_create(fd, DRM_XE_VM_CREATE_ASYNC_BIND_OPS, 0);
> + engine = xe_engine_create_class(fd, vm, DRM_XE_ENGINE_CLASS_RENDER);
> + sync.flags = DRM_XE_SYNC_SYNCOBJ | DRM_XE_SYNC_SIGNAL;
> + sync.handle = syncobj_create(fd, 0);
> +
> + for (int i = 0; i < TGL_BO_DICT_ENTRIES; i++) {
> + bo_dict[i].data = aligned_alloc(xe_get_default_alignment(fd), bo_dict[i].size);
> + xe_vm_bind_userptr_async(fd, vm, 0, to_user_pointer(bo_dict[i].data), bo_dict[i].addr, bo_dict[i].size, &sync, 1);
> + syncobj_wait(fd, &sync.handle, 1, INT64_MAX, 0, NULL);
> + memset(bo_dict[i].data, 0, bo_dict[i].size);
> + }
> + memcpy(bo_dict[0].data, kernel, size);
> + tgllp_create_dynamic_state(bo_dict[1].data, OFFSET_KERNEL);
> + tgllp_create_surface_state(bo_dict[2].data, ADDR_INPUT, ADDR_OUTPUT);
> + tgllp_create_indirect_data(bo_dict[3].data, ADDR_INPUT, ADDR_OUTPUT);
> + dinput = (float *)bo_dict[4].data;
> + srand(time(NULL));
> +
> + for (int i = 0; i < SIZE_DATA; i++)
> + ((float *)dinput)[i] = rand() / (float)RAND_MAX;
> +
> + tgllp_compute_exec_compute(bo_dict[6].data, ADDR_SURFACE_STATE_BASE, ADDR_DYNAMIC_STATE_BASE, ADDR_INDIRECT_OBJECT_BASE, OFFSET_INDIRECT_DATA_START);
> +
> + xe_exec_wait(fd, engine, ADDR_BATCH);
> +
> + for (int i = 0; i < SIZE_DATA; i++)
> + igt_assert(((float *)bo_dict[5].data)[i] == ((float *)bo_dict[4].data)[i] * ((float *) bo_dict[4].data)[i]);
> +
> + for (int i = 0; i < TGL_BO_DICT_ENTRIES; i++) {
> + xe_vm_unbind_async(fd, vm, 0, 0, bo_dict[i].addr, bo_dict[i].size, &sync, 1);
> + syncobj_wait(fd, &sync.handle, 1, INT64_MAX, 0, NULL);
> + free(bo_dict[i].data);
> + }
> +
> + syncobj_destroy(fd, sync.handle);
> + xe_engine_destroy(fd, engine);
> + xe_vm_destroy(fd, vm);
> +}
> +
> +/*
> + * Generic code
> + */
> +
> +static const struct {
> + unsigned int ip_ver;
> + void (*compute_exec)(int fd, const unsigned char *kernel,
> + unsigned int size);
> +} xe_compute_batches[] = {
> + {
> + .ip_ver = IP_VER(12, 0),
> + .compute_exec = tgl_compute_exec,
> + },
> +};
> +
> +int run_xe_compute_kernel(int fd)
> +{
> + unsigned int ip_ver = intel_graphics_ver(intel_get_drm_devid(fd));
> + unsigned int batch;
> + const struct xe_compute_kernels *kernels = xe_compute_square_kernels;
> +
> + for (batch = 0; batch < ARRAY_SIZE(xe_compute_batches); batch++) {
> + if (ip_ver == xe_compute_batches[batch].ip_ver)
> + break;
> + }
> + if (batch == ARRAY_SIZE(xe_compute_batches))
> + return 1;
> +
> + while (kernels->kernel) {
> + if (ip_ver == kernels->ip_ver)
> + break;
> + kernels++;
> + }
> + if (!kernels->kernel)
> + return 1;
> +
> + xe_compute_batches[batch].compute_exec(fd, kernels->kernel,
> + kernels->size);
> +
> + return 0;
> +}
> diff --git a/lib/xe/xe_compute.h b/lib/xe/xe_compute.h
> index de763101da90..5faa3713c40e 100644
> --- a/lib/xe/xe_compute.h
> +++ b/lib/xe/xe_compute.h
> @@ -9,21 +9,24 @@
> #ifndef XE_COMPUTE_H
> #define XE_COMPUTE_H
>
> -#include <stdint.h>
> +/*
> + * OpenCL Kernels are generated using:
> + *
> + * GPU=tgllp && \
> + * ocloc -file opencl/compute_square_kernel.cl -device $GPU && \
> + * xxd -i compute_square_kernel_Gen12LPlp.bin
> + *
> + * For each GPU model desired. A list of supported models can be obtained with: ocloc compile --help
> + */
> +
> +struct xe_compute_kernels {
> + int ip_ver;
> + unsigned int size;
> + const unsigned char *kernel;
> +};
>
> -void tgllp_create_indirect_data(uint32_t *addr_bo_buffer_batch,
> - uint64_t addr_input, uint64_t addr_output);
> -void tgllp_create_surface_state(uint32_t *addr_bo_buffer_batch,
> - uint64_t addr_input, uint64_t addr_output);
> -void tgllp_create_dynamic_state(uint32_t *addr_bo_buffer_batch,
> - uint64_t offset_kernel);
> -void tgllp_create_batch_compute(uint32_t *addr_bo_buffer_batch,
> - uint64_t addr_surface_state_base,
> - uint64_t addr_dynamic_state_base,
> - uint64_t addr_indirect_object_base,
> - uint64_t offset_indirect_data_start);
> +extern const struct xe_compute_kernels xe_compute_square_kernels[];
>
> -extern unsigned char tgllp_kernel_square_bin[];
> -extern unsigned int tgllp_kernel_square_length;
> +int run_xe_compute_kernel(int fd);
>
> #endif /* XE_COMPUTE_H */
> diff --git a/lib/xe/xe_compute_square_kernels.c b/lib/xe/xe_compute_square_kernels.c
> new file mode 100644
> index 000000000000..f9c07dc778bd
> --- /dev/null
> +++ b/lib/xe/xe_compute_square_kernels.c
> @@ -0,0 +1,71 @@
> +/* SPDX-License-Identifier: MIT */
> +
> +/*
> + * Copyright © 2022 Intel Corporation
> + *
> + * Authors:
> + * Francois Dugast <francois.dugast at intel.com>
> + */
> +
> +#include "intel_chipset.h"
> +#include "lib/xe/xe_compute.h"
> +
> +static const unsigned char tgllp_kernel_square_bin[] = {
> + 0x61, 0x00, 0x03, 0x80, 0x20, 0x02, 0x05, 0x03, 0x04, 0x00, 0x10, 0x00,
> + 0x00, 0x00, 0x00, 0x00, 0x66, 0x01, 0x00, 0x80, 0x20, 0x82, 0x01, 0x80,
> + 0x00, 0x80, 0x00, 0x01, 0xc0, 0x04, 0xc0, 0x04, 0x41, 0x01, 0x20, 0x22,
> + 0x16, 0x09, 0x11, 0x03, 0x49, 0x00, 0x04, 0xa2, 0x12, 0x09, 0x11, 0x03,
> + 0x40, 0x01, 0x04, 0x00, 0x60, 0x06, 0x05, 0x05, 0x04, 0x04, 0x00, 0x01,
> + 0x05, 0x01, 0x58, 0x00, 0x40, 0x00, 0x24, 0x00, 0x60, 0x06, 0x05, 0x0a,
> + 0x04, 0x04, 0x00, 0x01, 0x05, 0x02, 0x58, 0x00, 0x40, 0x02, 0x0c, 0xa0,
> + 0x02, 0x05, 0x10, 0x07, 0x40, 0x02, 0x0e, 0xa6, 0x02, 0x0a, 0x10, 0x07,
> + 0x70, 0x02, 0x04, 0x00, 0x60, 0x02, 0x01, 0x00, 0x05, 0x0c, 0x46, 0x52,
> + 0x84, 0x08, 0x00, 0x00, 0x70, 0x02, 0x24, 0x00, 0x60, 0x02, 0x01, 0x00,
> + 0x05, 0x0e, 0x46, 0x52, 0x84, 0x08, 0x00, 0x00, 0x72, 0x00, 0x02, 0x80,
> + 0x50, 0x0d, 0x04, 0x00, 0x05, 0x00, 0x05, 0x1d, 0x05, 0x00, 0x05, 0x00,
> + 0x22, 0x00, 0x05, 0x01, 0x00, 0xc0, 0x00, 0x00, 0x90, 0x00, 0x00, 0x00,
> + 0x90, 0x00, 0x00, 0x00, 0x69, 0x00, 0x10, 0x60, 0x02, 0x0c, 0x20, 0x00,
> + 0x69, 0x00, 0x12, 0x66, 0x02, 0x0e, 0x20, 0x00, 0x40, 0x02, 0x14, 0xa0,
> + 0x32, 0x10, 0x10, 0x08, 0x40, 0x02, 0x16, 0xa6, 0x32, 0x12, 0x10, 0x08,
> + 0x31, 0xa0, 0x04, 0x00, 0x00, 0x00, 0x14, 0x18, 0x14, 0x14, 0x00, 0xcc,
> + 0x00, 0x00, 0x16, 0x00, 0x31, 0x91, 0x24, 0x00, 0x00, 0x00, 0x14, 0x1a,
> + 0x14, 0x16, 0x00, 0xcc, 0x00, 0x00, 0x16, 0x00, 0x40, 0x00, 0x10, 0xa0,
> + 0x4a, 0x10, 0x10, 0x08, 0x40, 0x00, 0x12, 0xa6, 0x4a, 0x12, 0x10, 0x08,
> + 0x41, 0x20, 0x18, 0x20, 0x00, 0x18, 0x00, 0x18, 0x41, 0x21, 0x1a, 0x26,
> + 0x00, 0x1a, 0x00, 0x1a, 0x31, 0xa2, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00,
> + 0x14, 0x10, 0x02, 0xcc, 0x14, 0x18, 0x96, 0x00, 0x31, 0x93, 0x24, 0x00,
> + 0x00, 0x00, 0x00, 0x00, 0x14, 0x12, 0x02, 0xcc, 0x14, 0x1a, 0x96, 0x00,
> + 0x25, 0x00, 0x05, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
> + 0x10, 0x00, 0x00, 0x00, 0x61, 0x00, 0x7f, 0x64, 0x00, 0x03, 0x10, 0x00,
> + 0x31, 0x44, 0x03, 0x80, 0x00, 0x00, 0x0c, 0x1c, 0x0c, 0x03, 0x00, 0xa0,
> + 0x00, 0x00, 0x78, 0x02, 0x61, 0x24, 0x03, 0x80, 0x20, 0x02, 0x01, 0x00,
> + 0x05, 0x1c, 0x46, 0x00, 0x00, 0x00, 0x00, 0x00, 0x61, 0x00, 0x04, 0x80,
> + 0xa0, 0x4a, 0x01, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
> + 0x31, 0x01, 0x03, 0x80, 0x04, 0x00, 0x00, 0x00, 0x0c, 0x7f, 0x20, 0x70,
> + 0x00, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
> + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
> + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
> + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
> + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
> + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
> + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
> + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
> + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
> + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
> + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
> + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
> + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
> + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
> + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
> + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
> + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
> +};
> +
> +const struct xe_compute_kernels xe_compute_square_kernels[] = {
> + {
> + .ip_ver = IP_VER(12, 0),
> + .size = sizeof(tgllp_kernel_square_bin),
> + .kernel = tgllp_kernel_square_bin,
> + },
> + {}
> +};
> diff --git a/tests/xe/xe_compute.c b/tests/xe/xe_compute.c
> index 138d80671435..202d318c60c0 100644
> --- a/tests/xe/xe_compute.c
> +++ b/tests/xe/xe_compute.c
> @@ -14,117 +14,21 @@
> #include <string.h>
>
> #include "igt.h"
> -#include "lib/igt_syncobj.h"
> -#include "xe_drm.h"
> -#include "xe/xe_ioctl.h"
> #include "xe/xe_query.h"
> #include "xe/xe_compute.h"
>
> -#define MAX(X, Y) (((X) > (Y)) ? (X) : (Y))
> -#define SIZE_DATA 64
> -#define SIZE_BATCH 0x1000
> -#define SIZE_KERNEL 0x1000
> -#define SIZE_BUFFER_INPUT MAX(sizeof(float)*SIZE_DATA, 0x1000)
> -#define SIZE_BUFFER_OUTPUT MAX(sizeof(float)*SIZE_DATA, 0x1000)
> -#define ADDR_BATCH 0x100000
> -#define ADDR_INPUT (unsigned long)0x200000
> -#define ADDR_OUTPUT (unsigned long)0x300000
> -#define ADDR_SURFACE_STATE_BASE (unsigned long)0x400000
> -#define ADDR_DYNAMIC_STATE_BASE (unsigned long)0x500000
> -#define ADDR_INDIRECT_OBJECT_BASE 0x800100000000
> -#define OFFSET_INDIRECT_DATA_START 0xFFFDF000
> -#define OFFSET_KERNEL 0xFFFEF000
> -
> -struct bo_dict_entry {
> - uint64_t addr;
> - uint32_t size;
> - void *data;
> -};
> -
> /**
> * SUBTEST: compute-square
> - * GPU requirement: only works on TGL_GT2 with device ID: 0x9a49
> + * GPU requirement: only works on TGL
> * Description:
> - * This test shows how to create a batch to execute a
> - * compute kernel. For now it supports tgllp only.
> + * Run an openCL Kernel that returns output[i] = input[i] * input[i],
> + * for an input dataset..
> * TODO: extend test to cover other platforms
> */
> static void
> test_compute_square(int fd)
> {
> - uint32_t vm, engine;
> - float *dinput;
> - struct drm_xe_sync sync = { 0 };
> -
> -#define BO_DICT_ENTRIES 7
> - struct bo_dict_entry bo_dict[BO_DICT_ENTRIES] = {
> - { .addr = ADDR_INDIRECT_OBJECT_BASE + OFFSET_KERNEL, .size = SIZE_KERNEL }, // kernel
> - { .addr = ADDR_DYNAMIC_STATE_BASE, .size = 0x1000}, // dynamic state
> - { .addr = ADDR_SURFACE_STATE_BASE, .size = 0x1000}, // surface state
> - { .addr = ADDR_INDIRECT_OBJECT_BASE + OFFSET_INDIRECT_DATA_START, .size = 0x10000}, // indirect data
> - { .addr = ADDR_INPUT, .size = SIZE_BUFFER_INPUT }, // input
> - { .addr = ADDR_OUTPUT, .size = SIZE_BUFFER_OUTPUT }, // output
> - { .addr = ADDR_BATCH, .size = SIZE_BATCH }, // batch
> - };
> -
> - vm = xe_vm_create(fd, DRM_XE_VM_CREATE_ASYNC_BIND_OPS, 0);
> - engine = xe_engine_create_class(fd, vm, DRM_XE_ENGINE_CLASS_RENDER);
> - sync.flags = DRM_XE_SYNC_SYNCOBJ | DRM_XE_SYNC_SIGNAL;
> - sync.handle = syncobj_create(fd, 0);
> -
> - for(int i = 0; i < BO_DICT_ENTRIES; i++) {
> - bo_dict[i].data = aligned_alloc(xe_get_default_alignment(fd), bo_dict[i].size);
> - xe_vm_bind_userptr_async(fd, vm, 0, to_user_pointer(bo_dict[i].data), bo_dict[i].addr, bo_dict[i].size, &sync, 1);
> - syncobj_wait(fd, &sync.handle, 1, INT64_MAX, 0, NULL);
> - memset(bo_dict[i].data, 0, bo_dict[i].size);
> - }
> - memcpy(bo_dict[0].data, tgllp_kernel_square_bin, tgllp_kernel_square_length);
> - tgllp_create_dynamic_state(bo_dict[1].data, OFFSET_KERNEL);
> - tgllp_create_surface_state(bo_dict[2].data, ADDR_INPUT, ADDR_OUTPUT);
> - tgllp_create_indirect_data(bo_dict[3].data, ADDR_INPUT, ADDR_OUTPUT);
> - dinput = (float *)bo_dict[4].data;
> - srand(time(NULL));
> - for(int i=0; i < SIZE_DATA; i++) {
> - ((float*) dinput)[i] = rand()/(float)RAND_MAX;
> - }
> - tgllp_create_batch_compute(bo_dict[6].data, ADDR_SURFACE_STATE_BASE, ADDR_DYNAMIC_STATE_BASE, ADDR_INDIRECT_OBJECT_BASE, OFFSET_INDIRECT_DATA_START);
> -
> - xe_exec_wait(fd, engine, ADDR_BATCH);
> - for(int i = 0; i < SIZE_DATA; i++) {
> - igt_assert(((float*) bo_dict[5].data)[i] == ((float*) bo_dict[4].data)[i] * ((float*) bo_dict[4].data)[i]);
> - }
> -
> - for(int i = 0; i < BO_DICT_ENTRIES; i++) {
> - xe_vm_unbind_async(fd, vm, 0, 0, bo_dict[i].addr, bo_dict[i].size, &sync, 1);
> - syncobj_wait(fd, &sync.handle, 1, INT64_MAX, 0, NULL);
> - free(bo_dict[i].data);
> - }
> -
> - syncobj_destroy(fd, sync.handle);
> - xe_engine_destroy(fd, engine);
> - xe_vm_destroy(fd, vm);
> -}
> -
> -static bool
> -is_device_supported(int fd)
> -{
> - struct drm_xe_query_config *config;
> - struct drm_xe_device_query query = {
> - .extensions = 0,
> - .query = DRM_XE_DEVICE_QUERY_CONFIG,
> - .size = 0,
> - .data = 0,
> - };
> -
> - igt_assert_eq(igt_ioctl(fd, DRM_IOCTL_XE_DEVICE_QUERY, &query), 0);
> -
> - config = malloc(query.size);
> - igt_assert(config);
> -
> - query.data = to_user_pointer(config);
> - igt_assert_eq(igt_ioctl(fd, DRM_IOCTL_XE_DEVICE_QUERY, &query), 0);
> -
> - return (config->info[XE_QUERY_CONFIG_REV_AND_DEVICE_ID] & 0xffff) == 0x9a49;
> + igt_require_f(!run_xe_compute_kernel(fd), "GPU not supported\n");
This looks weird. I interpret this "require NOT run xe compute kernel".
I think run_xe_compute_kernel() should return bool (true) if succeed,
false otherwise.
With this fixed:
Reviewed-by: Zbigniew Kempczyński <zbigniew.kempczynski at intel.com>
--
Zbigniew
> }
>
> igt_main
> @@ -136,10 +40,8 @@ igt_main
> xe_device_get(xe);
> }
>
> - igt_subtest("compute-square") {
> - igt_skip_on(!is_device_supported(xe));
> + igt_subtest("compute-square")
> test_compute_square(xe);
> - }
>
> igt_fixture {
> xe_device_put(xe);
> --
> 2.39.2
>
More information about the igt-dev
mailing list