[igt-dev] [PATCH i-g-t v2 2/4] xe/xe_compute: place OpenCL kernel on a separate file
Mauro Carvalho Chehab
mauro.chehab at linux.intel.com
Tue Apr 4 07:38:33 UTC 2023
From: Mauro Carvalho Chehab <mchehab at kernel.org>
In order to prepare for supporting multiple Kernels, move
the tgllp to a separate file.
While here, address a few coding style nitpicks.
Reviewed-by: Zbigniew Kempczyński <zbigniew.kempczynski at intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab at kernel.org>
---
lib/meson.build | 1 +
lib/xe/xe_compute.c | 234 ++++++++++++++++++++---------
lib/xe/xe_compute.h | 31 ++--
lib/xe/xe_compute_square_kernels.c | 71 +++++++++
tests/xe/xe_compute.c | 108 +------------
5 files changed, 256 insertions(+), 189 deletions(-)
create mode 100644 lib/xe/xe_compute_square_kernels.c
diff --git a/lib/meson.build b/lib/meson.build
index ad9e2abef4c3..ad68089dcf43 100644
--- a/lib/meson.build
+++ b/lib/meson.build
@@ -99,6 +99,7 @@ lib_sources = [
'igt_msm.c',
'igt_dsc.c',
'xe/xe_compute.c',
+ 'xe/xe_compute_square_kernels.c',
'xe/xe_ioctl.c',
'xe/xe_query.c',
'xe/xe_spin.c'
diff --git a/lib/xe/xe_compute.c b/lib/xe/xe_compute.c
index 2165eada8931..fb11b8bc7770 100644
--- a/lib/xe/xe_compute.c
+++ b/lib/xe/xe_compute.c
@@ -6,71 +6,51 @@
* Francois Dugast <francois.dugast at intel.com>
*/
+#include <stdint.h>
+
+#include "igt.h"
+#include "xe_drm.h"
+#include "lib/igt_syncobj.h"
+#include "lib/intel_reg.h"
+
#include "xe_compute.h"
+#include "xe/xe_ioctl.h"
+#include "xe/xe_query.h"
#define PIPE_CONTROL 0x7a000004
-#define MI_LOAD_REGISTER_IMM 0x11000001
-#define PIPELINE_SELECT 0x69040302
+#define MEDIA_STATE_FLUSH 0x0
+#define MAX(X, Y) (((X) > (Y)) ? (X) : (Y))
+#define SIZE_DATA 64
+#define SIZE_BATCH 0x1000
+#define SIZE_BUFFER_INPUT MAX(sizeof(float) * SIZE_DATA, 0x1000)
+#define SIZE_BUFFER_OUTPUT MAX(sizeof(float) * SIZE_DATA, 0x1000)
+#define ADDR_BATCH 0x100000
+#define ADDR_INPUT 0x200000UL
+#define ADDR_OUTPUT 0x300000UL
+#define ADDR_SURFACE_STATE_BASE 0x400000UL
+#define ADDR_DYNAMIC_STATE_BASE 0x500000UL
+#define ADDR_INDIRECT_OBJECT_BASE 0x800100000000
+#define OFFSET_INDIRECT_DATA_START 0xFFFDF000
+#define OFFSET_KERNEL 0xFFFEF000
+
+#undef MEDIA_VFE_STATE
#define MEDIA_VFE_STATE 0x70000007
+#undef STATE_BASE_ADDRESS
#define STATE_BASE_ADDRESS 0x61010014
-#define MEDIA_STATE_FLUSH 0x0
+#undef MEDIA_INTERFACE_DESCRIPTOR_LOAD
#define MEDIA_INTERFACE_DESCRIPTOR_LOAD 0x70020002
+#undef GPGPU_WALKER
#define GPGPU_WALKER 0x7105000d
-#define MI_BATCH_BUFFER_END (0xA << 23)
-
-// generated with:
-// ocloc -file opencl/compute_square_kernel.cl -device tgllp && xxd -i compute_square_kernel_Gen12LPlp.bin
-unsigned char tgllp_kernel_square_bin[] = {
- 0x61, 0x00, 0x03, 0x80, 0x20, 0x02, 0x05, 0x03, 0x04, 0x00, 0x10, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x66, 0x01, 0x00, 0x80, 0x20, 0x82, 0x01, 0x80,
- 0x00, 0x80, 0x00, 0x01, 0xc0, 0x04, 0xc0, 0x04, 0x41, 0x01, 0x20, 0x22,
- 0x16, 0x09, 0x11, 0x03, 0x49, 0x00, 0x04, 0xa2, 0x12, 0x09, 0x11, 0x03,
- 0x40, 0x01, 0x04, 0x00, 0x60, 0x06, 0x05, 0x05, 0x04, 0x04, 0x00, 0x01,
- 0x05, 0x01, 0x58, 0x00, 0x40, 0x00, 0x24, 0x00, 0x60, 0x06, 0x05, 0x0a,
- 0x04, 0x04, 0x00, 0x01, 0x05, 0x02, 0x58, 0x00, 0x40, 0x02, 0x0c, 0xa0,
- 0x02, 0x05, 0x10, 0x07, 0x40, 0x02, 0x0e, 0xa6, 0x02, 0x0a, 0x10, 0x07,
- 0x70, 0x02, 0x04, 0x00, 0x60, 0x02, 0x01, 0x00, 0x05, 0x0c, 0x46, 0x52,
- 0x84, 0x08, 0x00, 0x00, 0x70, 0x02, 0x24, 0x00, 0x60, 0x02, 0x01, 0x00,
- 0x05, 0x0e, 0x46, 0x52, 0x84, 0x08, 0x00, 0x00, 0x72, 0x00, 0x02, 0x80,
- 0x50, 0x0d, 0x04, 0x00, 0x05, 0x00, 0x05, 0x1d, 0x05, 0x00, 0x05, 0x00,
- 0x22, 0x00, 0x05, 0x01, 0x00, 0xc0, 0x00, 0x00, 0x90, 0x00, 0x00, 0x00,
- 0x90, 0x00, 0x00, 0x00, 0x69, 0x00, 0x10, 0x60, 0x02, 0x0c, 0x20, 0x00,
- 0x69, 0x00, 0x12, 0x66, 0x02, 0x0e, 0x20, 0x00, 0x40, 0x02, 0x14, 0xa0,
- 0x32, 0x10, 0x10, 0x08, 0x40, 0x02, 0x16, 0xa6, 0x32, 0x12, 0x10, 0x08,
- 0x31, 0xa0, 0x04, 0x00, 0x00, 0x00, 0x14, 0x18, 0x14, 0x14, 0x00, 0xcc,
- 0x00, 0x00, 0x16, 0x00, 0x31, 0x91, 0x24, 0x00, 0x00, 0x00, 0x14, 0x1a,
- 0x14, 0x16, 0x00, 0xcc, 0x00, 0x00, 0x16, 0x00, 0x40, 0x00, 0x10, 0xa0,
- 0x4a, 0x10, 0x10, 0x08, 0x40, 0x00, 0x12, 0xa6, 0x4a, 0x12, 0x10, 0x08,
- 0x41, 0x20, 0x18, 0x20, 0x00, 0x18, 0x00, 0x18, 0x41, 0x21, 0x1a, 0x26,
- 0x00, 0x1a, 0x00, 0x1a, 0x31, 0xa2, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x14, 0x10, 0x02, 0xcc, 0x14, 0x18, 0x96, 0x00, 0x31, 0x93, 0x24, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x14, 0x12, 0x02, 0xcc, 0x14, 0x1a, 0x96, 0x00,
- 0x25, 0x00, 0x05, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x10, 0x00, 0x00, 0x00, 0x61, 0x00, 0x7f, 0x64, 0x00, 0x03, 0x10, 0x00,
- 0x31, 0x44, 0x03, 0x80, 0x00, 0x00, 0x0c, 0x1c, 0x0c, 0x03, 0x00, 0xa0,
- 0x00, 0x00, 0x78, 0x02, 0x61, 0x24, 0x03, 0x80, 0x20, 0x02, 0x01, 0x00,
- 0x05, 0x1c, 0x46, 0x00, 0x00, 0x00, 0x00, 0x00, 0x61, 0x00, 0x04, 0x80,
- 0xa0, 0x4a, 0x01, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x31, 0x01, 0x03, 0x80, 0x04, 0x00, 0x00, 0x00, 0x0c, 0x7f, 0x20, 0x70,
- 0x00, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+
+struct bo_dict_entry {
+ uint64_t addr;
+ uint32_t size;
+ void *data;
};
-unsigned int tgllp_kernel_square_length = sizeof(tgllp_kernel_square_bin);
+
+/*
+ * TGL compatible batch
+ */
/**
* tgllp_create_indirect_data:
@@ -80,8 +60,9 @@ unsigned int tgllp_kernel_square_length = sizeof(tgllp_kernel_square_bin);
*
* Prepares indirect data for compute pipeline.
*/
-void tgllp_create_indirect_data(uint32_t *addr_bo_buffer_batch,
- uint64_t addr_input, uint64_t addr_output)
+static void tgllp_create_indirect_data(uint32_t *addr_bo_buffer_batch,
+ uint64_t addr_input,
+ uint64_t addr_output)
{
int b = 0;
@@ -183,8 +164,9 @@ void tgllp_create_indirect_data(uint32_t *addr_bo_buffer_batch,
*
* Prepares surface state for compute pipeline.
*/
-void tgllp_create_surface_state(uint32_t *addr_bo_buffer_batch,
- uint64_t addr_input, uint64_t addr_output)
+static void tgllp_create_surface_state(uint32_t *addr_bo_buffer_batch,
+ uint64_t addr_input,
+ uint64_t addr_output)
{
int b = 0;
@@ -261,8 +243,8 @@ void tgllp_create_surface_state(uint32_t *addr_bo_buffer_batch,
*
* Prepares dynamic state for compute pipeline.
*/
-void tgllp_create_dynamic_state(uint32_t *addr_bo_buffer_batch,
- uint64_t offset_kernel)
+static void tgllp_create_dynamic_state(uint32_t *addr_bo_buffer_batch,
+ uint64_t offset_kernel)
{
int b = 0;
@@ -280,7 +262,7 @@ void tgllp_create_dynamic_state(uint32_t *addr_bo_buffer_batch,
}
/**
- * tgllp_create_batch_compute:
+ * tgllp_compute_exec_compute:
* @addr_bo_buffer_batch: pointer to batch buffer
* @addr_surface_state_base: gpu offset of surface state data
* @addr_dynamic_state_base: gpu offset of dynamic state data
@@ -289,19 +271,19 @@ void tgllp_create_dynamic_state(uint32_t *addr_bo_buffer_batch,
*
* Prepares compute pipeline.
*/
-void tgllp_create_batch_compute(uint32_t *addr_bo_buffer_batch,
- uint64_t addr_surface_state_base,
- uint64_t addr_dynamic_state_base,
- uint64_t addr_indirect_object_base,
- uint64_t offset_indirect_data_start)
+static void tgllp_compute_exec_compute(uint32_t *addr_bo_buffer_batch,
+ uint64_t addr_surface_state_base,
+ uint64_t addr_dynamic_state_base,
+ uint64_t addr_indirect_object_base,
+ uint64_t offset_indirect_data_start)
{
int b = 0;
- addr_bo_buffer_batch[b++] = MI_LOAD_REGISTER_IMM;
+ addr_bo_buffer_batch[b++] = MI_LOAD_REGISTER_IMM(1);
addr_bo_buffer_batch[b++] = 0x00002580;
addr_bo_buffer_batch[b++] = 0x00060002;
addr_bo_buffer_batch[b++] = PIPELINE_SELECT;
- addr_bo_buffer_batch[b++] = MI_LOAD_REGISTER_IMM;
+ addr_bo_buffer_batch[b++] = MI_LOAD_REGISTER_IMM(1);
addr_bo_buffer_batch[b++] = 0x00007034;
addr_bo_buffer_batch[b++] = 0x60000321;
addr_bo_buffer_batch[b++] = PIPE_CONTROL;
@@ -310,7 +292,7 @@ void tgllp_create_batch_compute(uint32_t *addr_bo_buffer_batch,
addr_bo_buffer_batch[b++] = 0x00000000;
addr_bo_buffer_batch[b++] = 0x00000000;
addr_bo_buffer_batch[b++] = 0x00000000;
- addr_bo_buffer_batch[b++] = MI_LOAD_REGISTER_IMM;
+ addr_bo_buffer_batch[b++] = MI_LOAD_REGISTER_IMM(1);
addr_bo_buffer_batch[b++] = 0x0000E404;
addr_bo_buffer_batch[b++] = 0x00000100;
addr_bo_buffer_batch[b++] = PIPE_CONTROL;
@@ -405,3 +387,111 @@ void tgllp_create_batch_compute(uint32_t *addr_bo_buffer_batch,
addr_bo_buffer_batch[b++] = 0x00000000;
addr_bo_buffer_batch[b++] = MI_BATCH_BUFFER_END;
}
+
+/**
+ * tgl_compute_exec - run a pipeline compatible with Tiger Lake
+ *
+ * @fd: file descriptor of the opened DRM device
+ * @kernel: GPU Kernel binary to be executed
+ * @size: size of @kernel.
+ */
+static void tgl_compute_exec(int fd, const unsigned char *kernel,
+ unsigned int size)
+{
+ uint32_t vm, engine;
+ float *dinput;
+ struct drm_xe_sync sync = { 0 };
+#define TGL_BO_DICT_ENTRIES 7
+ struct bo_dict_entry bo_dict[TGL_BO_DICT_ENTRIES] = {
+ { .addr = ADDR_INDIRECT_OBJECT_BASE + OFFSET_KERNEL}, // kernel
+ { .addr = ADDR_DYNAMIC_STATE_BASE, .size = 0x1000}, // dynamic state
+ { .addr = ADDR_SURFACE_STATE_BASE, .size = 0x1000}, // surface state
+ { .addr = ADDR_INDIRECT_OBJECT_BASE + OFFSET_INDIRECT_DATA_START, .size = 0x10000}, // indirect data
+ { .addr = ADDR_INPUT, .size = SIZE_BUFFER_INPUT }, // input
+ { .addr = ADDR_OUTPUT, .size = SIZE_BUFFER_OUTPUT }, // output
+ { .addr = ADDR_BATCH, .size = SIZE_BATCH }, // batch
+ };
+
+ /* Sets Kernel size */
+ bo_dict[0].size = ALIGN(size, 0x1000);
+
+ vm = xe_vm_create(fd, DRM_XE_VM_CREATE_ASYNC_BIND_OPS, 0);
+ engine = xe_engine_create_class(fd, vm, DRM_XE_ENGINE_CLASS_RENDER);
+ sync.flags = DRM_XE_SYNC_SYNCOBJ | DRM_XE_SYNC_SIGNAL;
+ sync.handle = syncobj_create(fd, 0);
+
+ for (int i = 0; i < TGL_BO_DICT_ENTRIES; i++) {
+ bo_dict[i].data = aligned_alloc(xe_get_default_alignment(fd), bo_dict[i].size);
+ xe_vm_bind_userptr_async(fd, vm, 0, to_user_pointer(bo_dict[i].data), bo_dict[i].addr, bo_dict[i].size, &sync, 1);
+ syncobj_wait(fd, &sync.handle, 1, INT64_MAX, 0, NULL);
+ memset(bo_dict[i].data, 0, bo_dict[i].size);
+ }
+ memcpy(bo_dict[0].data, kernel, size);
+ tgllp_create_dynamic_state(bo_dict[1].data, OFFSET_KERNEL);
+ tgllp_create_surface_state(bo_dict[2].data, ADDR_INPUT, ADDR_OUTPUT);
+ tgllp_create_indirect_data(bo_dict[3].data, ADDR_INPUT, ADDR_OUTPUT);
+ dinput = (float *)bo_dict[4].data;
+ srand(time(NULL));
+
+ for (int i = 0; i < SIZE_DATA; i++)
+ ((float *)dinput)[i] = rand() / (float)RAND_MAX;
+
+ tgllp_compute_exec_compute(bo_dict[6].data, ADDR_SURFACE_STATE_BASE, ADDR_DYNAMIC_STATE_BASE, ADDR_INDIRECT_OBJECT_BASE, OFFSET_INDIRECT_DATA_START);
+
+ xe_exec_wait(fd, engine, ADDR_BATCH);
+
+ for (int i = 0; i < SIZE_DATA; i++)
+ igt_assert(((float *)bo_dict[5].data)[i] == ((float *)bo_dict[4].data)[i] * ((float *) bo_dict[4].data)[i]);
+
+ for (int i = 0; i < TGL_BO_DICT_ENTRIES; i++) {
+ xe_vm_unbind_async(fd, vm, 0, 0, bo_dict[i].addr, bo_dict[i].size, &sync, 1);
+ syncobj_wait(fd, &sync.handle, 1, INT64_MAX, 0, NULL);
+ free(bo_dict[i].data);
+ }
+
+ syncobj_destroy(fd, sync.handle);
+ xe_engine_destroy(fd, engine);
+ xe_vm_destroy(fd, vm);
+}
+
+/*
+ * Generic code
+ */
+
+static const struct {
+ unsigned int ip_ver;
+ void (*compute_exec)(int fd, const unsigned char *kernel,
+ unsigned int size);
+} xe_compute_batches[] = {
+ {
+ .ip_ver = IP_VER(12, 0),
+ .compute_exec = tgl_compute_exec,
+ },
+};
+
+bool run_xe_compute_kernel(int fd)
+{
+ unsigned int ip_ver = intel_graphics_ver(intel_get_drm_devid(fd));
+ unsigned int batch;
+ const struct xe_compute_kernels *kernels = xe_compute_square_kernels;
+
+ for (batch = 0; batch < ARRAY_SIZE(xe_compute_batches); batch++) {
+ if (ip_ver == xe_compute_batches[batch].ip_ver)
+ break;
+ }
+ if (batch == ARRAY_SIZE(xe_compute_batches))
+ return false;
+
+ while (kernels->kernel) {
+ if (ip_ver == kernels->ip_ver)
+ break;
+ kernels++;
+ }
+ if (!kernels->kernel)
+ return 1;
+
+ xe_compute_batches[batch].compute_exec(fd, kernels->kernel,
+ kernels->size);
+
+ return true;
+}
diff --git a/lib/xe/xe_compute.h b/lib/xe/xe_compute.h
index de763101da90..b2e7e9827836 100644
--- a/lib/xe/xe_compute.h
+++ b/lib/xe/xe_compute.h
@@ -9,21 +9,24 @@
#ifndef XE_COMPUTE_H
#define XE_COMPUTE_H
-#include <stdint.h>
+/*
+ * OpenCL Kernels are generated using:
+ *
+ * GPU=tgllp && \
+ * ocloc -file opencl/compute_square_kernel.cl -device $GPU && \
+ * xxd -i compute_square_kernel_Gen12LPlp.bin
+ *
+ * For each GPU model desired. A list of supported models can be obtained with: ocloc compile --help
+ */
+
+struct xe_compute_kernels {
+ int ip_ver;
+ unsigned int size;
+ const unsigned char *kernel;
+};
-void tgllp_create_indirect_data(uint32_t *addr_bo_buffer_batch,
- uint64_t addr_input, uint64_t addr_output);
-void tgllp_create_surface_state(uint32_t *addr_bo_buffer_batch,
- uint64_t addr_input, uint64_t addr_output);
-void tgllp_create_dynamic_state(uint32_t *addr_bo_buffer_batch,
- uint64_t offset_kernel);
-void tgllp_create_batch_compute(uint32_t *addr_bo_buffer_batch,
- uint64_t addr_surface_state_base,
- uint64_t addr_dynamic_state_base,
- uint64_t addr_indirect_object_base,
- uint64_t offset_indirect_data_start);
+extern const struct xe_compute_kernels xe_compute_square_kernels[];
-extern unsigned char tgllp_kernel_square_bin[];
-extern unsigned int tgllp_kernel_square_length;
+bool run_xe_compute_kernel(int fd);
#endif /* XE_COMPUTE_H */
diff --git a/lib/xe/xe_compute_square_kernels.c b/lib/xe/xe_compute_square_kernels.c
new file mode 100644
index 000000000000..f9c07dc778bd
--- /dev/null
+++ b/lib/xe/xe_compute_square_kernels.c
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: MIT */
+
+/*
+ * Copyright © 2022 Intel Corporation
+ *
+ * Authors:
+ * Francois Dugast <francois.dugast at intel.com>
+ */
+
+#include "intel_chipset.h"
+#include "lib/xe/xe_compute.h"
+
+static const unsigned char tgllp_kernel_square_bin[] = {
+ 0x61, 0x00, 0x03, 0x80, 0x20, 0x02, 0x05, 0x03, 0x04, 0x00, 0x10, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x66, 0x01, 0x00, 0x80, 0x20, 0x82, 0x01, 0x80,
+ 0x00, 0x80, 0x00, 0x01, 0xc0, 0x04, 0xc0, 0x04, 0x41, 0x01, 0x20, 0x22,
+ 0x16, 0x09, 0x11, 0x03, 0x49, 0x00, 0x04, 0xa2, 0x12, 0x09, 0x11, 0x03,
+ 0x40, 0x01, 0x04, 0x00, 0x60, 0x06, 0x05, 0x05, 0x04, 0x04, 0x00, 0x01,
+ 0x05, 0x01, 0x58, 0x00, 0x40, 0x00, 0x24, 0x00, 0x60, 0x06, 0x05, 0x0a,
+ 0x04, 0x04, 0x00, 0x01, 0x05, 0x02, 0x58, 0x00, 0x40, 0x02, 0x0c, 0xa0,
+ 0x02, 0x05, 0x10, 0x07, 0x40, 0x02, 0x0e, 0xa6, 0x02, 0x0a, 0x10, 0x07,
+ 0x70, 0x02, 0x04, 0x00, 0x60, 0x02, 0x01, 0x00, 0x05, 0x0c, 0x46, 0x52,
+ 0x84, 0x08, 0x00, 0x00, 0x70, 0x02, 0x24, 0x00, 0x60, 0x02, 0x01, 0x00,
+ 0x05, 0x0e, 0x46, 0x52, 0x84, 0x08, 0x00, 0x00, 0x72, 0x00, 0x02, 0x80,
+ 0x50, 0x0d, 0x04, 0x00, 0x05, 0x00, 0x05, 0x1d, 0x05, 0x00, 0x05, 0x00,
+ 0x22, 0x00, 0x05, 0x01, 0x00, 0xc0, 0x00, 0x00, 0x90, 0x00, 0x00, 0x00,
+ 0x90, 0x00, 0x00, 0x00, 0x69, 0x00, 0x10, 0x60, 0x02, 0x0c, 0x20, 0x00,
+ 0x69, 0x00, 0x12, 0x66, 0x02, 0x0e, 0x20, 0x00, 0x40, 0x02, 0x14, 0xa0,
+ 0x32, 0x10, 0x10, 0x08, 0x40, 0x02, 0x16, 0xa6, 0x32, 0x12, 0x10, 0x08,
+ 0x31, 0xa0, 0x04, 0x00, 0x00, 0x00, 0x14, 0x18, 0x14, 0x14, 0x00, 0xcc,
+ 0x00, 0x00, 0x16, 0x00, 0x31, 0x91, 0x24, 0x00, 0x00, 0x00, 0x14, 0x1a,
+ 0x14, 0x16, 0x00, 0xcc, 0x00, 0x00, 0x16, 0x00, 0x40, 0x00, 0x10, 0xa0,
+ 0x4a, 0x10, 0x10, 0x08, 0x40, 0x00, 0x12, 0xa6, 0x4a, 0x12, 0x10, 0x08,
+ 0x41, 0x20, 0x18, 0x20, 0x00, 0x18, 0x00, 0x18, 0x41, 0x21, 0x1a, 0x26,
+ 0x00, 0x1a, 0x00, 0x1a, 0x31, 0xa2, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x14, 0x10, 0x02, 0xcc, 0x14, 0x18, 0x96, 0x00, 0x31, 0x93, 0x24, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x14, 0x12, 0x02, 0xcc, 0x14, 0x1a, 0x96, 0x00,
+ 0x25, 0x00, 0x05, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x10, 0x00, 0x00, 0x00, 0x61, 0x00, 0x7f, 0x64, 0x00, 0x03, 0x10, 0x00,
+ 0x31, 0x44, 0x03, 0x80, 0x00, 0x00, 0x0c, 0x1c, 0x0c, 0x03, 0x00, 0xa0,
+ 0x00, 0x00, 0x78, 0x02, 0x61, 0x24, 0x03, 0x80, 0x20, 0x02, 0x01, 0x00,
+ 0x05, 0x1c, 0x46, 0x00, 0x00, 0x00, 0x00, 0x00, 0x61, 0x00, 0x04, 0x80,
+ 0xa0, 0x4a, 0x01, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x31, 0x01, 0x03, 0x80, 0x04, 0x00, 0x00, 0x00, 0x0c, 0x7f, 0x20, 0x70,
+ 0x00, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+};
+
+const struct xe_compute_kernels xe_compute_square_kernels[] = {
+ {
+ .ip_ver = IP_VER(12, 0),
+ .size = sizeof(tgllp_kernel_square_bin),
+ .kernel = tgllp_kernel_square_bin,
+ },
+ {}
+};
diff --git a/tests/xe/xe_compute.c b/tests/xe/xe_compute.c
index 138d80671435..7ac64dfe3199 100644
--- a/tests/xe/xe_compute.c
+++ b/tests/xe/xe_compute.c
@@ -14,117 +14,21 @@
#include <string.h>
#include "igt.h"
-#include "lib/igt_syncobj.h"
-#include "xe_drm.h"
-#include "xe/xe_ioctl.h"
#include "xe/xe_query.h"
#include "xe/xe_compute.h"
-#define MAX(X, Y) (((X) > (Y)) ? (X) : (Y))
-#define SIZE_DATA 64
-#define SIZE_BATCH 0x1000
-#define SIZE_KERNEL 0x1000
-#define SIZE_BUFFER_INPUT MAX(sizeof(float)*SIZE_DATA, 0x1000)
-#define SIZE_BUFFER_OUTPUT MAX(sizeof(float)*SIZE_DATA, 0x1000)
-#define ADDR_BATCH 0x100000
-#define ADDR_INPUT (unsigned long)0x200000
-#define ADDR_OUTPUT (unsigned long)0x300000
-#define ADDR_SURFACE_STATE_BASE (unsigned long)0x400000
-#define ADDR_DYNAMIC_STATE_BASE (unsigned long)0x500000
-#define ADDR_INDIRECT_OBJECT_BASE 0x800100000000
-#define OFFSET_INDIRECT_DATA_START 0xFFFDF000
-#define OFFSET_KERNEL 0xFFFEF000
-
-struct bo_dict_entry {
- uint64_t addr;
- uint32_t size;
- void *data;
-};
-
/**
* SUBTEST: compute-square
- * GPU requirement: only works on TGL_GT2 with device ID: 0x9a49
+ * GPU requirement: only works on TGL
* Description:
- * This test shows how to create a batch to execute a
- * compute kernel. For now it supports tgllp only.
+ * Run an openCL Kernel that returns output[i] = input[i] * input[i],
+ * for an input dataset..
* TODO: extend test to cover other platforms
*/
static void
test_compute_square(int fd)
{
- uint32_t vm, engine;
- float *dinput;
- struct drm_xe_sync sync = { 0 };
-
-#define BO_DICT_ENTRIES 7
- struct bo_dict_entry bo_dict[BO_DICT_ENTRIES] = {
- { .addr = ADDR_INDIRECT_OBJECT_BASE + OFFSET_KERNEL, .size = SIZE_KERNEL }, // kernel
- { .addr = ADDR_DYNAMIC_STATE_BASE, .size = 0x1000}, // dynamic state
- { .addr = ADDR_SURFACE_STATE_BASE, .size = 0x1000}, // surface state
- { .addr = ADDR_INDIRECT_OBJECT_BASE + OFFSET_INDIRECT_DATA_START, .size = 0x10000}, // indirect data
- { .addr = ADDR_INPUT, .size = SIZE_BUFFER_INPUT }, // input
- { .addr = ADDR_OUTPUT, .size = SIZE_BUFFER_OUTPUT }, // output
- { .addr = ADDR_BATCH, .size = SIZE_BATCH }, // batch
- };
-
- vm = xe_vm_create(fd, DRM_XE_VM_CREATE_ASYNC_BIND_OPS, 0);
- engine = xe_engine_create_class(fd, vm, DRM_XE_ENGINE_CLASS_RENDER);
- sync.flags = DRM_XE_SYNC_SYNCOBJ | DRM_XE_SYNC_SIGNAL;
- sync.handle = syncobj_create(fd, 0);
-
- for(int i = 0; i < BO_DICT_ENTRIES; i++) {
- bo_dict[i].data = aligned_alloc(xe_get_default_alignment(fd), bo_dict[i].size);
- xe_vm_bind_userptr_async(fd, vm, 0, to_user_pointer(bo_dict[i].data), bo_dict[i].addr, bo_dict[i].size, &sync, 1);
- syncobj_wait(fd, &sync.handle, 1, INT64_MAX, 0, NULL);
- memset(bo_dict[i].data, 0, bo_dict[i].size);
- }
- memcpy(bo_dict[0].data, tgllp_kernel_square_bin, tgllp_kernel_square_length);
- tgllp_create_dynamic_state(bo_dict[1].data, OFFSET_KERNEL);
- tgllp_create_surface_state(bo_dict[2].data, ADDR_INPUT, ADDR_OUTPUT);
- tgllp_create_indirect_data(bo_dict[3].data, ADDR_INPUT, ADDR_OUTPUT);
- dinput = (float *)bo_dict[4].data;
- srand(time(NULL));
- for(int i=0; i < SIZE_DATA; i++) {
- ((float*) dinput)[i] = rand()/(float)RAND_MAX;
- }
- tgllp_create_batch_compute(bo_dict[6].data, ADDR_SURFACE_STATE_BASE, ADDR_DYNAMIC_STATE_BASE, ADDR_INDIRECT_OBJECT_BASE, OFFSET_INDIRECT_DATA_START);
-
- xe_exec_wait(fd, engine, ADDR_BATCH);
- for(int i = 0; i < SIZE_DATA; i++) {
- igt_assert(((float*) bo_dict[5].data)[i] == ((float*) bo_dict[4].data)[i] * ((float*) bo_dict[4].data)[i]);
- }
-
- for(int i = 0; i < BO_DICT_ENTRIES; i++) {
- xe_vm_unbind_async(fd, vm, 0, 0, bo_dict[i].addr, bo_dict[i].size, &sync, 1);
- syncobj_wait(fd, &sync.handle, 1, INT64_MAX, 0, NULL);
- free(bo_dict[i].data);
- }
-
- syncobj_destroy(fd, sync.handle);
- xe_engine_destroy(fd, engine);
- xe_vm_destroy(fd, vm);
-}
-
-static bool
-is_device_supported(int fd)
-{
- struct drm_xe_query_config *config;
- struct drm_xe_device_query query = {
- .extensions = 0,
- .query = DRM_XE_DEVICE_QUERY_CONFIG,
- .size = 0,
- .data = 0,
- };
-
- igt_assert_eq(igt_ioctl(fd, DRM_IOCTL_XE_DEVICE_QUERY, &query), 0);
-
- config = malloc(query.size);
- igt_assert(config);
-
- query.data = to_user_pointer(config);
- igt_assert_eq(igt_ioctl(fd, DRM_IOCTL_XE_DEVICE_QUERY, &query), 0);
-
- return (config->info[XE_QUERY_CONFIG_REV_AND_DEVICE_ID] & 0xffff) == 0x9a49;
+ igt_require_f(run_xe_compute_kernel(fd), "GPU not supported\n");
}
igt_main
@@ -136,10 +40,8 @@ igt_main
xe_device_get(xe);
}
- igt_subtest("compute-square") {
- igt_skip_on(!is_device_supported(xe));
+ igt_subtest("compute-square")
test_compute_square(xe);
- }
igt_fixture {
xe_device_put(xe);
--
2.39.2
More information about the igt-dev
mailing list