[PATCH] drm/xe/uapi: Add DRM_XE_QUERY_CONFIG_FLAG_HAS_DISABLE_NULL_QUERY

Lucas De Marchi lucas.demarchi at intel.com
Thu Jul 10 18:03:41 UTC 2025


On Thu, Jul 10, 2025 at 01:24:35PM +0530, Nitin Gote wrote:
>Add the DRM_XE_QUERY_CONFIG_FLAG_HAS_DISABLE_NULL_QUERY device query flag
>which indicates whether a device supports DIS_NULL_QUERY (Disable null
>anyhit shader query mechanism). The intent is for UMDs
>to use this query and opt-in DRM_XE_EXEC_QUEUE_DISABLE_NULL_QUERY flag
>to disable null query mechanism for anyhit shader by setting
>DIS_NULL_QUERY bit of RT_CTRL register for Xe2.

it seems like this commit has been over squashed and this commit message
doesn't really match what it's doing. This isn't just adding a
DRM_XE_QUERY_* (what the title implies), it's actually implementing the
DRM_XE_EXEC_QUEUE_*

>
>v2:
>   - Use xe_rtp_match_first_render_or_compute() api to check
>     render_or_compute. (Tejas)
>   - Validate args->flags (Tejas/Matthew)
>   - Add proper kernel-doc for both DRM_XE_EXEC_QUEUE_DIS_NULL_QUERY
>     and DRM_XE_EXEC_QUEUE_LOW_LATENCY_HINT. (Matthew)
>
>v3:
>   - Rename DRM_XE_EXEC_QUEUE_DIS_NULL_QUERY to
>     DRM_XE_EXEC_QUEUE_DISABLE_NULL_QUERY. (Matthew)
>   - Move implementaion from exec queue creation time to
>     WA BB program. (Matthew)
>
>Signed-off-by: Nitin Gote <nitin.r.gote at intel.com>
>---
>Hi,
>
>Please note the review comments from previous rev2, addressed in this patch.
>https://patchwork.freedesktop.org/patch/660580/?series=150601&rev=2
>
>
> .../gpu/drm/xe/instructions/xe_mi_commands.h  |  4 ++
> drivers/gpu/drm/xe/regs/xe_gt_regs.h          |  2 +
> drivers/gpu/drm/xe/xe_exec_queue.c            |  9 ++-
> drivers/gpu/drm/xe/xe_exec_queue_types.h      |  2 +
> drivers/gpu/drm/xe/xe_gt_mcr.c                |  1 -
> drivers/gpu/drm/xe/xe_lrc.c                   | 65 +++++++++++++++++++
> drivers/gpu/drm/xe/xe_query.c                 |  3 +-
> include/uapi/drm/xe_drm.h                     | 12 ++++
> 8 files changed, 95 insertions(+), 3 deletions(-)
>
>diff --git a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h
>index e3f5e8bb3ebc..84e889528464 100644
>--- a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h
>+++ b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h
>@@ -80,4 +80,8 @@
> #define MI_SET_APPID_SESSION_ID_MASK	REG_GENMASK(6, 0)
> #define MI_SET_APPID_SESSION_ID(x)	REG_FIELD_PREP(MI_SET_APPID_SESSION_ID_MASK, x)
>
>+#define MI_SEMAPHORE_WAIT		__MI_INSTR(0x1c)
>+#define  MI_SEMAPHORE_POLL		(1 << 15)
>+#define  MI_SEMAPHORE_SAD_EQ_SDD	(4 << 12)
>+#define  MI_SEMAPHORE_REGISTER_POLL	(1 << 16)

look at the defines above and implement it according to them. There are
multiple things wrong wrt coding style here:

1) the MI definition is sorted by its number. 0x1c shouldn't be the last
in the file
2) bits definition should be sorted, highest number first, to follow the
bspec and be easy to compare
3) don't use shifts, use REG_BIT()
4) there's a missing space in the bits definitions


> #endif
>diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
>index 5cd5ab8529c5..8234789f05b7 100644
>--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
>+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
>@@ -55,6 +55,8 @@
> #define   MTL_MCR_GROUPID			REG_GENMASK(11, 8)
> #define   MTL_MCR_INSTANCEID			REG_GENMASK(3, 0)
>
>+#define STEER_SEMAPHORE				XE_REG(0xFD0)

ditto, should be sorted too, but it also clashes with
MCFG_MCR_SELECTOR. I'm actually not sure why we are using MCFG_MCR_SELECTOR
name... probably a copy & paste from i915. That register is indeed
called STEER_SEMAPHORE in bspec.

>+
> #define PS_INVOCATION_COUNT			XE_REG(0x2348)
>
> #define XELP_GLOBAL_MOCS(i)			XE_REG(0x4000 + (i) * 4)
>diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
>index 8991b4aed440..18a0bee38eed 100644
>--- a/drivers/gpu/drm/xe/xe_exec_queue.c
>+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
>@@ -131,6 +131,9 @@ static int __xe_exec_queue_init(struct xe_exec_queue *q)
> 			flags |= XE_LRC_CREATE_RUNALONE;
> 	}
>
>+	if (q->flags & EXEC_QUEUE_DISABLE_NULL_QUERY)
>+		flags |= EXEC_QUEUE_DISABLE_NULL_QUERY;
>+
> 	for (i = 0; i < q->width; ++i) {
> 		q->lrc[i] = xe_lrc_create(q->hwe, q->vm, SZ_16K, q->msix_vec, flags);
> 		if (IS_ERR(q->lrc[i])) {
>@@ -597,7 +600,8 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
> 	u32 len;
> 	int err;
>
>-	if (XE_IOCTL_DBG(xe, args->flags & ~DRM_XE_EXEC_QUEUE_LOW_LATENCY_HINT) ||
>+	if (XE_IOCTL_DBG(xe, args->flags &
>+	    ~(DRM_XE_EXEC_QUEUE_LOW_LATENCY_HINT | DRM_XE_EXEC_QUEUE_DISABLE_NULL_QUERY)) ||
> 	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
> 		return -EINVAL;
>
>@@ -616,6 +620,9 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
> 	if (args->flags & DRM_XE_EXEC_QUEUE_LOW_LATENCY_HINT)
> 		flags |= EXEC_QUEUE_FLAG_LOW_LATENCY;
>
>+	if (args->flags & DRM_XE_EXEC_QUEUE_DISABLE_NULL_QUERY)
>+		flags |= EXEC_QUEUE_DISABLE_NULL_QUERY;
>+
> 	if (eci[0].engine_class == DRM_XE_ENGINE_CLASS_VM_BIND) {
> 		if (XE_IOCTL_DBG(xe, args->width != 1) ||
> 		    XE_IOCTL_DBG(xe, args->num_placements != 1) ||
>diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h
>index cc1cffb5c87f..b62502a18c1b 100644
>--- a/drivers/gpu/drm/xe/xe_exec_queue_types.h
>+++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h
>@@ -87,6 +87,8 @@ struct xe_exec_queue {
> #define EXEC_QUEUE_FLAG_HIGH_PRIORITY		BIT(4)
> /* flag to indicate low latency hint to guc */
> #define EXEC_QUEUE_FLAG_LOW_LATENCY		BIT(5)
>+/* flag to indicate disable null query hint */
>+#define EXEC_QUEUE_DISABLE_NULL_QUERY		BIT(6)

		     ^ missing FLAG here to follow the other defines
>
> 	/**
> 	 * @flags: flags for this exec queue, should statically setup aside from ban
>diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c
>index 64a2f0d6aaf9..a7624b30d0c9 100644
>--- a/drivers/gpu/drm/xe/xe_gt_mcr.c
>+++ b/drivers/gpu/drm/xe/xe_gt_mcr.c
>@@ -46,7 +46,6 @@
>  * MCR registers are not available on Virtual Function (VF).
>  */
>
>-#define STEER_SEMAPHORE		XE_REG(0xFD0)
>
> static inline struct xe_reg to_xe_reg(struct xe_reg_mcr reg_mcr)
> {
>diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
>index d2ad8fe737eb..6fc09df67442 100644
>--- a/drivers/gpu/drm/xe/xe_lrc.c
>+++ b/drivers/gpu/drm/xe/xe_lrc.c
>@@ -13,6 +13,7 @@
> #include "instructions/xe_gfxpipe_commands.h"
> #include "instructions/xe_gfx_state_commands.h"
> #include "regs/xe_engine_regs.h"
>+#include "regs/xe_gt_regs.h"
> #include "regs/xe_lrc_layout.h"
> #include "xe_bb.h"
> #include "xe_bo.h"
>@@ -25,6 +26,7 @@
> #include "xe_map.h"
> #include "xe_memirq.h"
> #include "xe_mmio.h"
>+#include "xe_rtp.h"
> #include "xe_sriov.h"
> #include "xe_trace_lrc.h"
> #include "xe_vm.h"
>@@ -972,9 +974,56 @@ static ssize_t wa_bb_setup_utilization(struct xe_lrc *lrc, struct xe_hw_engine *
> 	return cmd - batch;
> }
>
>+/*
>+ * wa_bb_disable_null_query() - Emit commands in the WA BB
>+ * to disable or enable NULL Anyhit Shader Query Mechanism.
>+ *
>+ * Some platforms require a workaround to disable (or enable) the
>+ * Anyhit Shader NULL QUERY for specific engines (typically the
>+ * first render or compute engine). This function emits a sequence
>+ * of MI commands into the workaround batch buffer (WA BB) to perform
>+ * a multicast write to the RT_CTRL register, setting or clearing the
>+ * DIS_NULL_QUERY bit.
>+ *
>+ * The sequence includes a semaphore wait to ensure proper ordering,
>+ * followed by MI_LOAD_REGISTER_IMM commands to write the desired value
>+ * to the RT_CTRL register, and finally restores the semaphore state.
>+ *
>+ */
>+static ssize_t wa_bb_disable_null_query(struct xe_gt *gt, struct xe_hw_engine *hwe,
>+					u32 *batch, size_t max_size, u32 value)
>+{
>+	u32 *cmd = batch;
>+	struct xe_reg_mcr reg_mcr = RT_CTRL;
>+	struct xe_reg reg = reg_mcr.__reg;
>+	struct xe_reg reg_sema = STEER_SEMAPHORE;
>+
>+	/* Add WARN_ON check for minimum required space (10 DWORDs) */

no need for this comment

>+	if (xe_gt_WARN_ON(gt, max_size < 10))
>+		return -ENOSPC;
>+
>+	if (xe_rtp_match_first_render_or_compute(gt, hwe)) {
>+		*cmd++ = (MI_SEMAPHORE_WAIT | MI_SEMAPHORE_POLL |
>+			  MI_SEMAPHORE_SAD_EQ_SDD | MI_SEMAPHORE_REGISTER_POLL);
>+		*cmd++ = 1;
>+		*cmd++ = reg_sema.addr;

I think we can simply use

		*cmd++ = STEER_SEMAPHORE.addr

and similarly in other places here and let go the local vars above.

>+		*cmd++ = 0;
>+		*cmd++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(1) | MI_LRI_MMIO_REMAP_EN;
>+		*cmd++ = reg.addr;
>+		*cmd++ = value;
>+		*cmd++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(1) | MI_LRI_MMIO_REMAP_EN;
>+		*cmd++ = reg_sema.addr;
>+		*cmd++ = 1;
>+	}
>+
>+	return cmd - batch;
>+}
>+
> struct wa_bb_setup {
> 	ssize_t (*setup)(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
> 			 u32 *batch, size_t max_size);
>+	ssize_t (*disable_null_query)(struct xe_gt *gt, struct xe_hw_engine *hwe,
>+				      u32 *batch, size_t max_size, u32 value);

this is not how this works... you should define just your function, and
add it to the array below as a "setup function".

> };
>
> static int setup_wa_bb(struct xe_lrc *lrc, struct xe_hw_engine *hwe)
>@@ -982,6 +1031,7 @@ static int setup_wa_bb(struct xe_lrc *lrc, struct xe_hw_engine *hwe)
> 	const size_t max_size = LRC_WA_BB_SIZE;
> 	static const struct wa_bb_setup funcs[] = {
> 		{ .setup = wa_bb_setup_utilization },
>+		{ .disable_null_query = wa_bb_disable_null_query },

should have been:

		{ .setup = wa_bb_disable_null_query },

But also beware we are refactoring this a little bit, so on next rev you
will hit some conflicts. See 

drm/xe: LRC refactors @ https://patchwork.freedesktop.org/series/151152/
More missing XeLP workarounds @ https://patchwork.freedesktop.org/series/149097/



> 	};
> 	ssize_t remain;
> 	u32 *cmd, *buf = NULL;
>@@ -1000,6 +1050,18 @@ static int setup_wa_bb(struct xe_lrc *lrc, struct xe_hw_engine *hwe)
> 	for (size_t i = 0; i < ARRAY_SIZE(funcs); i++) {
> 		ssize_t len = funcs[i].setup(lrc, hwe, cmd, remain);
>
>+		if (((GRAPHICS_VER(gt_to_xe(lrc->gt)) >= 20) &&

this check should be inside the setup function, not here.

Also, is this a WA? It's looking like it, which means you need a
WA number, add it to drivers/gpu/drm/xe/xe_wa_oob.rules with the
graphics range where it applies and then in wa_bb_disable_null_query()
you use:

	if (!XE_WA(gt, ...))
		return 0;

>+		     (GRAPHICS_VER(gt_to_xe(lrc->gt)) < 30))) {
>+			if (lrc->flags & EXEC_QUEUE_DISABLE_NULL_QUERY)
>+				len += funcs[i].disable_null_query(lrc->gt, hwe,
>+								   cmd, remain,
>+								   DIS_NULL_QUERY);
>+			else
>+				len += funcs[i].disable_null_query(lrc->gt, hwe,
>+								   cmd, remain,
>+								   ~DIS_NULL_QUERY);
>+		}
>+
> 		remain -= len;
>
> 		/*
>@@ -1175,6 +1237,9 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
> 	map = __xe_lrc_start_seqno_map(lrc);
> 	xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1);
>
>+	if (init_flags & EXEC_QUEUE_DISABLE_NULL_QUERY)

ahn? Please don't mix LRC_*_FLAGS and EXEC_QUEUE_*_FLAGS. This
is a recipe for disaster. I don't see why you'd need to set any flag in
lrc.

Lucas De Marchi

>+		lrc->flags |= EXEC_QUEUE_DISABLE_NULL_QUERY;
>+
> 	err = setup_wa_bb(lrc, hwe);
> 	if (err)
> 		goto err_lrc_finish;
>diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
>index d517ec9ddcbf..5b48befd32cd 100644
>--- a/drivers/gpu/drm/xe/xe_query.c
>+++ b/drivers/gpu/drm/xe/xe_query.c
>@@ -344,7 +344,8 @@ static int query_config(struct xe_device *xe, struct drm_xe_device_query *query)
> 		config->info[DRM_XE_QUERY_CONFIG_FLAGS] |=
> 			DRM_XE_QUERY_CONFIG_FLAG_HAS_CPU_ADDR_MIRROR;
> 	config->info[DRM_XE_QUERY_CONFIG_FLAGS] |=
>-			DRM_XE_QUERY_CONFIG_FLAG_HAS_LOW_LATENCY;
>+			(DRM_XE_QUERY_CONFIG_FLAG_HAS_LOW_LATENCY |
>+			DRM_XE_QUERY_CONFIG_FLAG_HAS_DISABLE_NULL_QUERY);
> 	config->info[DRM_XE_QUERY_CONFIG_MIN_ALIGNMENT] =
> 		xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K;
> 	config->info[DRM_XE_QUERY_CONFIG_VA_BITS] = xe->info.va_bits;
>diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
>index e2426413488f..adab49e63757 100644
>--- a/include/uapi/drm/xe_drm.h
>+++ b/include/uapi/drm/xe_drm.h
>@@ -397,6 +397,8 @@ struct drm_xe_query_mem_regions {
>  *      has low latency hint support
>  *    - %DRM_XE_QUERY_CONFIG_FLAG_HAS_CPU_ADDR_MIRROR - Flag is set if the
>  *      device has CPU address mirroring support
>+ *    - %DRM_XE_QUERY_CONFIG_FLAG_HAS_DISABLE_NULL_QUERY - Flag is set if the
>+ *      device has null query support for anyhit shader.
>  *  - %DRM_XE_QUERY_CONFIG_MIN_ALIGNMENT - Minimal memory alignment
>  *    required by this device, typically SZ_4K or SZ_64K
>  *  - %DRM_XE_QUERY_CONFIG_VA_BITS - Maximum bits of a virtual address
>@@ -415,6 +417,7 @@ struct drm_xe_query_config {
> 	#define DRM_XE_QUERY_CONFIG_FLAG_HAS_VRAM	(1 << 0)
> 	#define DRM_XE_QUERY_CONFIG_FLAG_HAS_LOW_LATENCY	(1 << 1)
> 	#define DRM_XE_QUERY_CONFIG_FLAG_HAS_CPU_ADDR_MIRROR	(1 << 2)
>+	#define DRM_XE_QUERY_CONFIG_FLAG_HAS_DISABLE_NULL_QUERY	(1 << 3)
> #define DRM_XE_QUERY_CONFIG_MIN_ALIGNMENT		2
> #define DRM_XE_QUERY_CONFIG_VA_BITS			3
> #define DRM_XE_QUERY_CONFIG_MAX_EXEC_QUEUE_PRIORITY	4
>@@ -1270,7 +1273,16 @@ struct drm_xe_exec_queue_create {
> 	/** @vm_id: VM to use for this exec queue */
> 	__u32 vm_id;
>
>+	/** DRM_XE_QUERY_CONFIG_FLAG_HAS_LOW_LATENCY - \
>+	 *	Flag is set if the device has low latency hint support
>+	 */
> #define DRM_XE_EXEC_QUEUE_LOW_LATENCY_HINT	(1 << 0)
>+
>+	/** DRM_XE_EXEC_QUEUE_DISABLE_NULL_QUERY - \
>+	 *	flag is use to disable null query check for Anyhit shader
>+	 */
>+#define DRM_XE_EXEC_QUEUE_DISABLE_NULL_QUERY	(1 << 1)
>+
> 	/** @flags: flags to use for this exec queue */
> 	__u32 flags;
>
>-- 
>2.25.1
>


More information about the Intel-xe mailing list