<!DOCTYPE html><html><head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
</head>
<body>
<p><br>
</p>
<div class="moz-cite-prefix">On 3/13/2024 8:25 PM,
<a class="moz-txt-link-abbreviated" href="mailto:janga.rahul.kumar@intel.com">janga.rahul.kumar@intel.com</a> wrote:<br>
</div>
<blockquote type="cite" cite="mid:20240313192510.2316807-1-janga.rahul.kumar@intel.com">
<pre class="moz-quote-pre" wrap="">From: Janga Rahul Kumar <a class="moz-txt-link-rfc2396E" href="mailto:janga.rahul.kumar@intel.com"><janga.rahul.kumar@intel.com></a>
Test submits long kernel with a higher threadgroup count, lower
iteration kernel and a short opencl kernel to exercise threadgroup
preemption scenario with WMTP disabled.
Cc: Nirmoy Das <a class="moz-txt-link-rfc2396E" href="mailto:nirmoy.das@intel.com"><nirmoy.das@intel.com></a>
Signed-off-by: Janga Rahul Kumar <a class="moz-txt-link-rfc2396E" href="mailto:janga.rahul.kumar@intel.com"><janga.rahul.kumar@intel.com></a>
---
lib/intel_compute.c | 61 +++++++++++++++++++++++---------
lib/intel_compute.h | 3 +-
tests/intel/xe_compute_preempt.c | 24 ++++++++++---
3 files changed, 67 insertions(+), 21 deletions(-)
diff --git a/lib/intel_compute.c b/lib/intel_compute.c
index c5d253ebc..b33f49fb5 100644
--- a/lib/intel_compute.c
+++ b/lib/intel_compute.c
@@ -1162,7 +1162,8 @@ static void xe2lpg_compute_exec_compute(uint32_t *addr_bo_buffer_batch,
uint64_t addr_state_contect_data_base,
uint64_t offset_indirect_data_start,
uint64_t kernel_start_pointer,
- uint64_t sip_start_pointer)
+ uint64_t sip_start_pointer,
+ bool threadgroup_preemption)
{
int b = 0;
@@ -1236,7 +1237,12 @@ static void xe2lpg_compute_exec_compute(uint32_t *addr_bo_buffer_batch,
addr_bo_buffer_batch[b++] = 0xbe040000;
addr_bo_buffer_batch[b++] = 0xffffffff;
addr_bo_buffer_batch[b++] = 0x000003ff;
- addr_bo_buffer_batch[b++] = 0x00000002;
+
+ if (threadgroup_preemption)
+ addr_bo_buffer_batch[b++] = 0x00200000; // Global workgroup size</pre>
</blockquote>
<p>Bspec says "Thread Group ID X Dimension" which is here used as
thread group size. Would be nice document how is this used </p>
<p>to control number of threads, I fine if that is done as a
separate patch.</p>
<blockquote type="cite" cite="mid:20240313192510.2316807-1-janga.rahul.kumar@intel.com">
<pre class="moz-quote-pre" wrap="">
+ else
+ addr_bo_buffer_batch[b++] = 0x00000002;
+
addr_bo_buffer_batch[b++] = 0x00000001;
addr_bo_buffer_batch[b++] = 0x00000001;
addr_bo_buffer_batch[b++] = 0x00000000;
@@ -1251,7 +1257,12 @@ static void xe2lpg_compute_exec_compute(uint32_t *addr_bo_buffer_batch,
addr_bo_buffer_batch[b++] = kernel_start_pointer;
addr_bo_buffer_batch[b++] = 0x00000000;
- addr_bo_buffer_batch[b++] = 0x00100000; // Enable Thread Preemption BitField:20
+
+ if (threadgroup_preemption)
+ addr_bo_buffer_batch[b++] = 0x00000000;
+ else
+ addr_bo_buffer_batch[b++] = 0x00100000; // Enable Mid Thread Preemption BitField:20
+
addr_bo_buffer_batch[b++] = 0x00000000;
addr_bo_buffer_batch[b++] = 0x00000000;
addr_bo_buffer_batch[b++] = 0x0c000020;
@@ -1369,7 +1380,7 @@ static void xe2lpg_compute_exec(int fd, const unsigned char *kernel,
ADDR_INSTRUCTION_STATE_BASE,
XE2_ADDR_STATE_CONTEXT_DATA_BASE,
OFFSET_INDIRECT_DATA_START,
- OFFSET_KERNEL, 0);
+ OFFSET_KERNEL, 0, false);
bo_execenv_exec(&execenv, ADDR_BATCH);
@@ -1527,7 +1538,8 @@ static void xe2lpg_compute_preempt_exec(int fd, const unsigned char *long_kernel
unsigned int short_kernel_size,
const unsigned char *sip_kernel,
unsigned int sip_kernel_size,
- struct drm_xe_engine_class_instance *eci)
+ struct drm_xe_engine_class_instance *eci,
+ bool threadgroup_preemption)
{
#define XE2_BO_PREEMPT_DICT_ENTRIES 11
struct bo_dict_entry bo_dict_long[XE2_BO_PREEMPT_DICT_ENTRIES] = {
@@ -1564,6 +1576,7 @@ static void xe2lpg_compute_preempt_exec(int fd, const unsigned char *long_kernel
struct bo_dict_entry bo_dict_short[XE2_BO_PREEMPT_DICT_ENTRIES];
struct bo_execenv execenv_short, execenv_long;
float *dinput;
+ unsigned int long_kernel_loop_count;
struct drm_xe_sync sync_long = {
.type = DRM_XE_SYNC_TYPE_SYNCOBJ,
.flags = DRM_XE_SYNC_FLAG_SIGNAL,
@@ -1574,7 +1587,11 @@ static void xe2lpg_compute_preempt_exec(int fd, const unsigned char *long_kernel
.flags = DRM_XE_SYNC_FLAG_SIGNAL,
.handle = syncobj_create(fd, 0),
};
- unsigned int long_kernel_loop_count = 1000000;
+
+ if (threadgroup_preemption)
+ long_kernel_loop_count = 10;
+ else
+ long_kernel_loop_count = 1000000;</pre>
</blockquote>
Lets put those const into a macros <br>
<blockquote type="cite" cite="mid:20240313192510.2316807-1-janga.rahul.kumar@intel.com">
<pre class="moz-quote-pre" wrap="">
for (int i = 0; i < XE2_BO_PREEMPT_DICT_ENTRIES; ++i)
bo_dict_short[i] = bo_dict_long[i];
@@ -1622,12 +1639,12 @@ static void xe2lpg_compute_preempt_exec(int fd, const unsigned char *long_kernel
xe2lpg_compute_exec_compute(bo_dict_long[8].data, ADDR_GENERAL_STATE_BASE,
ADDR_SURFACE_STATE_BASE, ADDR_DYNAMIC_STATE_BASE,
ADDR_INSTRUCTION_STATE_BASE, XE2_ADDR_STATE_CONTEXT_DATA_BASE,
- OFFSET_INDIRECT_DATA_START, OFFSET_KERNEL, OFFSET_STATE_SIP);
+ OFFSET_INDIRECT_DATA_START, OFFSET_KERNEL, OFFSET_STATE_SIP, threadgroup_preemption);
xe2lpg_compute_exec_compute(bo_dict_short[8].data, ADDR_GENERAL_STATE_BASE,
ADDR_SURFACE_STATE_BASE, ADDR_DYNAMIC_STATE_BASE,
ADDR_INSTRUCTION_STATE_BASE, XE2_ADDR_STATE_CONTEXT_DATA_BASE,
- OFFSET_INDIRECT_DATA_START, OFFSET_KERNEL, OFFSET_STATE_SIP);
+ OFFSET_INDIRECT_DATA_START, OFFSET_KERNEL, OFFSET_STATE_SIP, false);
xe_exec_sync(fd, execenv_long.exec_queue, ADDR_BATCH, &sync_long, 1);
@@ -1655,9 +1672,15 @@ static void xe2lpg_compute_preempt_exec(int fd, const unsigned char *long_kernel
f1 = ((float *) bo_dict_long[5].data)[i];
- if (f1 != long_kernel_loop_count)
- igt_debug("[%4d] f1: %f != %u\n", i, f1, long_kernel_loop_count);
- igt_assert(f1 == long_kernel_loop_count);
+ if (threadgroup_preemption) {
+ if (f1 < long_kernel_loop_count)
+ igt_debug("[%4d] f1: %f != %u\n", i, f1, long_kernel_loop_count);</pre>
</blockquote>
<p>Also document this why f1 should be > <span style="white-space: pre-wrap">long_kernel_loop_count.</span></p>
<p><span style="white-space: pre-wrap">With those this is:
reviewed-by: Nirmoy Das <a class="moz-txt-link-rfc2396E" href="mailto:nirmoy.das@intel.com"><nirmoy.das@intel.com></a>
</span></p>
<br>
<blockquote type="cite" cite="mid:20240313192510.2316807-1-janga.rahul.kumar@intel.com">
<pre class="moz-quote-pre" wrap="">
+ igt_assert(f1 > long_kernel_loop_count);
+ } else {
+ if (f1 != long_kernel_loop_count)
+ igt_debug("[%4d] f1: %f != %u\n", i, f1, long_kernel_loop_count);
+ igt_assert(f1 == long_kernel_loop_count);
+ }
}
bo_execenv_unbind(&execenv_short, bo_dict_short, XE2_BO_PREEMPT_DICT_ENTRIES);
@@ -1675,7 +1698,8 @@ static const struct {
unsigned int short_kernel_size,
const unsigned char *sip_kernel,
unsigned int sip_kernel_size,
- struct drm_xe_engine_class_instance *eci);
+ struct drm_xe_engine_class_instance *eci,
+ bool threadgroup_preemption);
uint32_t compat;
} intel_compute_preempt_batches[] = {
{
@@ -1686,7 +1710,8 @@ static const struct {
};
static bool __run_intel_compute_kernel_preempt(int fd,
- struct drm_xe_engine_class_instance *eci)
+ struct drm_xe_engine_class_instance *eci,
+ bool threadgroup_preemption)
{
unsigned int ip_ver = intel_graphics_ver(intel_get_drm_devid(fd));
unsigned int batch;
@@ -1724,7 +1749,8 @@ static bool __run_intel_compute_kernel_preempt(int fd,
kernels->kernel, kernels->size,
kernels->sip_kernel,
kernels->sip_kernel_size,
- eci);
+ eci,
+ threadgroup_preemption);
return true;
}
@@ -1733,11 +1759,14 @@ static bool __run_intel_compute_kernel_preempt(int fd,
* exercise preemption scenario.
*
* @fd: file descriptor of the opened DRM Xe device
+ * @eci: engine class instance
+ * @thread_preemption: enable/disable threadgroup preemption test
*
* Returns true on success, false otherwise.
*/
bool run_intel_compute_kernel_preempt(int fd,
- struct drm_xe_engine_class_instance *eci)
+ struct drm_xe_engine_class_instance *eci,
+ bool threadgroup_preemption)
{
- return __run_intel_compute_kernel_preempt(fd, eci);
+ return __run_intel_compute_kernel_preempt(fd, eci, threadgroup_preemption);
}
diff --git a/lib/intel_compute.h b/lib/intel_compute.h
index fe9637b91..3c2cd010c 100644
--- a/lib/intel_compute.h
+++ b/lib/intel_compute.h
@@ -37,5 +37,6 @@ extern const struct intel_compute_kernels intel_compute_square_kernels[];
bool run_intel_compute_kernel(int fd);
bool xe_run_intel_compute_kernel_on_engine(int fd, struct drm_xe_engine_class_instance *eci);
-bool run_intel_compute_kernel_preempt(int fd, struct drm_xe_engine_class_instance *eci);
+bool run_intel_compute_kernel_preempt(int fd, struct drm_xe_engine_class_instance *eci,
+ bool threadgroup_preemption);
#endif /* INTEL_COMPUTE_H */
diff --git a/tests/intel/xe_compute_preempt.c b/tests/intel/xe_compute_preempt.c
index 0aeb10547..2bc27eff1 100644
--- a/tests/intel/xe_compute_preempt.c
+++ b/tests/intel/xe_compute_preempt.c
@@ -27,11 +27,16 @@
* Description:
* Exercise multiple walker mid thread preemption scenario
* Functionality: compute openCL kernel
+ * SUBTEST: compute-threadgroup-preempt
+ * GPU requirement: LNL
+ * Description:
+ * Exercise compute walker threadgroup preemption scenario
+ * Functionality: compute openCL kernel
*/
static void
-test_compute_preempt(int fd, struct drm_xe_engine_class_instance *hwe)
+test_compute_preempt(int fd, struct drm_xe_engine_class_instance *hwe, bool threadgroup_preemption)
{
- igt_require_f(run_intel_compute_kernel_preempt(fd, hwe), "GPU not supported\n");
+ igt_require_f(run_intel_compute_kernel_preempt(fd, hwe, threadgroup_preemption), "GPU not supported\n");
}
igt_main
@@ -49,7 +54,7 @@ igt_main
continue;
igt_dynamic_f("engine-%s", xe_engine_class_string(hwe->engine_class))
- test_compute_preempt(xe, hwe);
+ test_compute_preempt(xe, hwe, false);
}
}
@@ -61,12 +66,23 @@ igt_main
igt_dynamic_f("engine-%s", xe_engine_class_string(hwe->engine_class)) {
igt_fork(child, 100)
- test_compute_preempt(xe, hwe);
+ test_compute_preempt(xe, hwe, false);
igt_waitchildren();
}
}
}
+ igt_subtest_with_dynamic("compute-threadgroup-preempt") {
+ xe_for_each_engine(xe, hwe) {
+ if (hwe->engine_class != DRM_XE_ENGINE_CLASS_COMPUTE &&
+ hwe->engine_class != DRM_XE_ENGINE_CLASS_RENDER)
+ continue;
+
+ igt_dynamic_f("engine-%s", xe_engine_class_string(hwe->engine_class))
+ test_compute_preempt(xe, hwe, true);
+ }
+ }
+
igt_fixture
drm_close_driver(xe);
</pre>
</blockquote>
</body>
</html>