[PATCH] drm/xe/uapi: Add DRM_XE_QUERY_CONFIG_FLAG_HAS_DISABLE_NULL_QUERY
Matthew Brost
matthew.brost at intel.com
Thu Jul 10 18:21:18 UTC 2025
On Thu, Jul 10, 2025 at 01:03:41PM -0500, Lucas De Marchi wrote:
> On Thu, Jul 10, 2025 at 01:24:35PM +0530, Nitin Gote wrote:
> > Add the DRM_XE_QUERY_CONFIG_FLAG_HAS_DISABLE_NULL_QUERY device query flag
> > which indicates whether a device supports DIS_NULL_QUERY (Disable null
> > anyhit shader query mechanism). The intent is for UMDs
> > to use this query and opt-in DRM_XE_EXEC_QUEUE_DISABLE_NULL_QUERY flag
> > to disable null query mechanism for anyhit shader by setting
> > DIS_NULL_QUERY bit of RT_CTRL register for Xe2.
>
> it seems like this commit has been over squashed and this commit message
> doesn't really match what it's doing. This isn't just adding a
> DRM_XE_QUERY_* (what the title implies), it's actually implementing the
> DRM_XE_EXEC_QUEUE_*
>
> >
> > v2:
> > - Use xe_rtp_match_first_render_or_compute() api to check
> > render_or_compute. (Tejas)
> > - Validate args->flags (Tejas/Matthew)
> > - Add proper kernel-doc for both DRM_XE_EXEC_QUEUE_DIS_NULL_QUERY
> > and DRM_XE_EXEC_QUEUE_LOW_LATENCY_HINT. (Matthew)
> >
> > v3:
> > - Rename DRM_XE_EXEC_QUEUE_DIS_NULL_QUERY to
> > DRM_XE_EXEC_QUEUE_DISABLE_NULL_QUERY. (Matthew)
> > - Move implementaion from exec queue creation time to
> > WA BB program. (Matthew)
> >
> > Signed-off-by: Nitin Gote <nitin.r.gote at intel.com>
> > ---
> > Hi,
> >
> > Please note the review comments from previous rev2, addressed in this patch.
> > https://patchwork.freedesktop.org/patch/660580/?series=150601&rev=2
> >
> >
> > .../gpu/drm/xe/instructions/xe_mi_commands.h | 4 ++
> > drivers/gpu/drm/xe/regs/xe_gt_regs.h | 2 +
> > drivers/gpu/drm/xe/xe_exec_queue.c | 9 ++-
> > drivers/gpu/drm/xe/xe_exec_queue_types.h | 2 +
> > drivers/gpu/drm/xe/xe_gt_mcr.c | 1 -
> > drivers/gpu/drm/xe/xe_lrc.c | 65 +++++++++++++++++++
> > drivers/gpu/drm/xe/xe_query.c | 3 +-
> > include/uapi/drm/xe_drm.h | 12 ++++
> > 8 files changed, 95 insertions(+), 3 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h
> > index e3f5e8bb3ebc..84e889528464 100644
> > --- a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h
> > +++ b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h
> > @@ -80,4 +80,8 @@
> > #define MI_SET_APPID_SESSION_ID_MASK REG_GENMASK(6, 0)
> > #define MI_SET_APPID_SESSION_ID(x) REG_FIELD_PREP(MI_SET_APPID_SESSION_ID_MASK, x)
> >
> > +#define MI_SEMAPHORE_WAIT __MI_INSTR(0x1c)
> > +#define MI_SEMAPHORE_POLL (1 << 15)
> > +#define MI_SEMAPHORE_SAD_EQ_SDD (4 << 12)
> > +#define MI_SEMAPHORE_REGISTER_POLL (1 << 16)
>
> look at the defines above and implement it according to them. There are
> multiple things wrong wrt coding style here:
>
> 1) the MI definition is sorted by its number. 0x1c shouldn't be the last
> in the file
> 2) bits definition should be sorted, highest number first, to follow the
> bspec and be easy to compare
> 3) don't use shifts, use REG_BIT()
> 4) there's a missing space in the bits definitions
>
Thanks Lucas - it was late when I was reviewing this and apparently
missed a ton things.
Agree with everything Lucas says here.
One follow up below.
>
> > #endif
> > diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
> > index 5cd5ab8529c5..8234789f05b7 100644
> > --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
> > +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
> > @@ -55,6 +55,8 @@
> > #define MTL_MCR_GROUPID REG_GENMASK(11, 8)
> > #define MTL_MCR_INSTANCEID REG_GENMASK(3, 0)
> >
> > +#define STEER_SEMAPHORE XE_REG(0xFD0)
>
> ditto, should be sorted too, but it also clashes with
> MCFG_MCR_SELECTOR. I'm actually not sure why we are using MCFG_MCR_SELECTOR
> name... probably a copy & paste from i915. That register is indeed
> called STEER_SEMAPHORE in bspec.
>
> > +
> > #define PS_INVOCATION_COUNT XE_REG(0x2348)
> >
> > #define XELP_GLOBAL_MOCS(i) XE_REG(0x4000 + (i) * 4)
> > diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
> > index 8991b4aed440..18a0bee38eed 100644
> > --- a/drivers/gpu/drm/xe/xe_exec_queue.c
> > +++ b/drivers/gpu/drm/xe/xe_exec_queue.c
> > @@ -131,6 +131,9 @@ static int __xe_exec_queue_init(struct xe_exec_queue *q)
> > flags |= XE_LRC_CREATE_RUNALONE;
> > }
> >
> > + if (q->flags & EXEC_QUEUE_DISABLE_NULL_QUERY)
> > + flags |= EXEC_QUEUE_DISABLE_NULL_QUERY;
> > +
> > for (i = 0; i < q->width; ++i) {
> > q->lrc[i] = xe_lrc_create(q->hwe, q->vm, SZ_16K, q->msix_vec, flags);
> > if (IS_ERR(q->lrc[i])) {
> > @@ -597,7 +600,8 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
> > u32 len;
> > int err;
> >
> > - if (XE_IOCTL_DBG(xe, args->flags & ~DRM_XE_EXEC_QUEUE_LOW_LATENCY_HINT) ||
> > + if (XE_IOCTL_DBG(xe, args->flags &
> > + ~(DRM_XE_EXEC_QUEUE_LOW_LATENCY_HINT | DRM_XE_EXEC_QUEUE_DISABLE_NULL_QUERY)) ||
> > XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
> > return -EINVAL;
> >
> > @@ -616,6 +620,9 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
> > if (args->flags & DRM_XE_EXEC_QUEUE_LOW_LATENCY_HINT)
> > flags |= EXEC_QUEUE_FLAG_LOW_LATENCY;
> >
> > + if (args->flags & DRM_XE_EXEC_QUEUE_DISABLE_NULL_QUERY)
> > + flags |= EXEC_QUEUE_DISABLE_NULL_QUERY;
> > +
> > if (eci[0].engine_class == DRM_XE_ENGINE_CLASS_VM_BIND) {
> > if (XE_IOCTL_DBG(xe, args->width != 1) ||
> > XE_IOCTL_DBG(xe, args->num_placements != 1) ||
> > diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h
> > index cc1cffb5c87f..b62502a18c1b 100644
> > --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h
> > +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h
> > @@ -87,6 +87,8 @@ struct xe_exec_queue {
> > #define EXEC_QUEUE_FLAG_HIGH_PRIORITY BIT(4)
> > /* flag to indicate low latency hint to guc */
> > #define EXEC_QUEUE_FLAG_LOW_LATENCY BIT(5)
> > +/* flag to indicate disable null query hint */
> > +#define EXEC_QUEUE_DISABLE_NULL_QUERY BIT(6)
>
> ^ missing FLAG here to follow the other defines
> >
> > /**
> > * @flags: flags for this exec queue, should statically setup aside from ban
> > diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c
> > index 64a2f0d6aaf9..a7624b30d0c9 100644
> > --- a/drivers/gpu/drm/xe/xe_gt_mcr.c
> > +++ b/drivers/gpu/drm/xe/xe_gt_mcr.c
> > @@ -46,7 +46,6 @@
> > * MCR registers are not available on Virtual Function (VF).
> > */
> >
> > -#define STEER_SEMAPHORE XE_REG(0xFD0)
> >
> > static inline struct xe_reg to_xe_reg(struct xe_reg_mcr reg_mcr)
> > {
> > diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
> > index d2ad8fe737eb..6fc09df67442 100644
> > --- a/drivers/gpu/drm/xe/xe_lrc.c
> > +++ b/drivers/gpu/drm/xe/xe_lrc.c
> > @@ -13,6 +13,7 @@
> > #include "instructions/xe_gfxpipe_commands.h"
> > #include "instructions/xe_gfx_state_commands.h"
> > #include "regs/xe_engine_regs.h"
> > +#include "regs/xe_gt_regs.h"
> > #include "regs/xe_lrc_layout.h"
> > #include "xe_bb.h"
> > #include "xe_bo.h"
> > @@ -25,6 +26,7 @@
> > #include "xe_map.h"
> > #include "xe_memirq.h"
> > #include "xe_mmio.h"
> > +#include "xe_rtp.h"
> > #include "xe_sriov.h"
> > #include "xe_trace_lrc.h"
> > #include "xe_vm.h"
> > @@ -972,9 +974,56 @@ static ssize_t wa_bb_setup_utilization(struct xe_lrc *lrc, struct xe_hw_engine *
> > return cmd - batch;
> > }
> >
> > +/*
> > + * wa_bb_disable_null_query() - Emit commands in the WA BB
> > + * to disable or enable NULL Anyhit Shader Query Mechanism.
> > + *
> > + * Some platforms require a workaround to disable (or enable) the
> > + * Anyhit Shader NULL QUERY for specific engines (typically the
> > + * first render or compute engine). This function emits a sequence
> > + * of MI commands into the workaround batch buffer (WA BB) to perform
> > + * a multicast write to the RT_CTRL register, setting or clearing the
> > + * DIS_NULL_QUERY bit.
> > + *
> > + * The sequence includes a semaphore wait to ensure proper ordering,
> > + * followed by MI_LOAD_REGISTER_IMM commands to write the desired value
> > + * to the RT_CTRL register, and finally restores the semaphore state.
> > + *
> > + */
> > +static ssize_t wa_bb_disable_null_query(struct xe_gt *gt, struct xe_hw_engine *hwe,
> > + u32 *batch, size_t max_size, u32 value)
> > +{
> > + u32 *cmd = batch;
> > + struct xe_reg_mcr reg_mcr = RT_CTRL;
> > + struct xe_reg reg = reg_mcr.__reg;
> > + struct xe_reg reg_sema = STEER_SEMAPHORE;
> > +
> > + /* Add WARN_ON check for minimum required space (10 DWORDs) */
>
> no need for this comment
>
> > + if (xe_gt_WARN_ON(gt, max_size < 10))
> > + return -ENOSPC;
> > +
> > + if (xe_rtp_match_first_render_or_compute(gt, hwe)) {
> > + *cmd++ = (MI_SEMAPHORE_WAIT | MI_SEMAPHORE_POLL |
> > + MI_SEMAPHORE_SAD_EQ_SDD | MI_SEMAPHORE_REGISTER_POLL);
> > + *cmd++ = 1;
> > + *cmd++ = reg_sema.addr;
>
> I think we can simply use
>
> *cmd++ = STEER_SEMAPHORE.addr
>
> and similarly in other places here and let go the local vars above.
>
> > + *cmd++ = 0;
> > + *cmd++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(1) | MI_LRI_MMIO_REMAP_EN;
> > + *cmd++ = reg.addr;
> > + *cmd++ = value;
> > + *cmd++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(1) | MI_LRI_MMIO_REMAP_EN;
> > + *cmd++ = reg_sema.addr;
> > + *cmd++ = 1;
> > + }
> > +
> > + return cmd - batch;
> > +}
> > +
> > struct wa_bb_setup {
> > ssize_t (*setup)(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
> > u32 *batch, size_t max_size);
> > + ssize_t (*disable_null_query)(struct xe_gt *gt, struct xe_hw_engine *hwe,
> > + u32 *batch, size_t max_size, u32 value);
>
> this is not how this works... you should define just your function, and
> add it to the array below as a "setup function".
>
> > };
> >
> > static int setup_wa_bb(struct xe_lrc *lrc, struct xe_hw_engine *hwe)
> > @@ -982,6 +1031,7 @@ static int setup_wa_bb(struct xe_lrc *lrc, struct xe_hw_engine *hwe)
> > const size_t max_size = LRC_WA_BB_SIZE;
> > static const struct wa_bb_setup funcs[] = {
> > { .setup = wa_bb_setup_utilization },
> > + { .disable_null_query = wa_bb_disable_null_query },
>
> should have been:
>
> { .setup = wa_bb_disable_null_query },
>
> But also beware we are refactoring this a little bit, so on next rev you
> will hit some conflicts. See
>
> drm/xe: LRC refactors @ https://patchwork.freedesktop.org/series/151152/
> More missing XeLP workarounds @ https://patchwork.freedesktop.org/series/149097/
>
>
>
> > };
> > ssize_t remain;
> > u32 *cmd, *buf = NULL;
> > @@ -1000,6 +1050,18 @@ static int setup_wa_bb(struct xe_lrc *lrc, struct xe_hw_engine *hwe)
> > for (size_t i = 0; i < ARRAY_SIZE(funcs); i++) {
> > ssize_t len = funcs[i].setup(lrc, hwe, cmd, remain);
> >
> > + if (((GRAPHICS_VER(gt_to_xe(lrc->gt)) >= 20) &&
>
> this check should be inside the setup function, not here.
>
> Also, is this a WA? It's looking like it, which means you need a
> WA number, add it to drivers/gpu/drm/xe/xe_wa_oob.rules with the
> graphics range where it applies and then in wa_bb_disable_null_query()
> you use:
>
> if (!XE_WA(gt, ...))
> return 0;
>
> > + (GRAPHICS_VER(gt_to_xe(lrc->gt)) < 30))) {
> > + if (lrc->flags & EXEC_QUEUE_DISABLE_NULL_QUERY)
> > + len += funcs[i].disable_null_query(lrc->gt, hwe,
> > + cmd, remain,
> > + DIS_NULL_QUERY);
> > + else
> > + len += funcs[i].disable_null_query(lrc->gt, hwe,
> > + cmd, remain,
> > + ~DIS_NULL_QUERY);
> > + }
> > +
> > remain -= len;
> >
> > /*
> > @@ -1175,6 +1237,9 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
> > map = __xe_lrc_start_seqno_map(lrc);
> > xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1);
> >
> > + if (init_flags & EXEC_QUEUE_DISABLE_NULL_QUERY)
>
> ahn? Please don't mix LRC_*_FLAGS and EXEC_QUEUE_*_FLAGS. This
> is a recipe for disaster. I don't see why you'd need to set any flag in
> lrc.
>
I think the caller (__xe_exec_queue_init) needs to go from
EXEC_QUEUE_DISABLE_NULL_QUERY -> to an LRC specific flag and pass it via
init_flags.
Matt
> Lucas De Marchi
>
> > + lrc->flags |= EXEC_QUEUE_DISABLE_NULL_QUERY;
> > +
> > err = setup_wa_bb(lrc, hwe);
> > if (err)
> > goto err_lrc_finish;
> > diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
> > index d517ec9ddcbf..5b48befd32cd 100644
> > --- a/drivers/gpu/drm/xe/xe_query.c
> > +++ b/drivers/gpu/drm/xe/xe_query.c
> > @@ -344,7 +344,8 @@ static int query_config(struct xe_device *xe, struct drm_xe_device_query *query)
> > config->info[DRM_XE_QUERY_CONFIG_FLAGS] |=
> > DRM_XE_QUERY_CONFIG_FLAG_HAS_CPU_ADDR_MIRROR;
> > config->info[DRM_XE_QUERY_CONFIG_FLAGS] |=
> > - DRM_XE_QUERY_CONFIG_FLAG_HAS_LOW_LATENCY;
> > + (DRM_XE_QUERY_CONFIG_FLAG_HAS_LOW_LATENCY |
> > + DRM_XE_QUERY_CONFIG_FLAG_HAS_DISABLE_NULL_QUERY);
> > config->info[DRM_XE_QUERY_CONFIG_MIN_ALIGNMENT] =
> > xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K;
> > config->info[DRM_XE_QUERY_CONFIG_VA_BITS] = xe->info.va_bits;
> > diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
> > index e2426413488f..adab49e63757 100644
> > --- a/include/uapi/drm/xe_drm.h
> > +++ b/include/uapi/drm/xe_drm.h
> > @@ -397,6 +397,8 @@ struct drm_xe_query_mem_regions {
> > * has low latency hint support
> > * - %DRM_XE_QUERY_CONFIG_FLAG_HAS_CPU_ADDR_MIRROR - Flag is set if the
> > * device has CPU address mirroring support
> > + * - %DRM_XE_QUERY_CONFIG_FLAG_HAS_DISABLE_NULL_QUERY - Flag is set if the
> > + * device has null query support for anyhit shader.
> > * - %DRM_XE_QUERY_CONFIG_MIN_ALIGNMENT - Minimal memory alignment
> > * required by this device, typically SZ_4K or SZ_64K
> > * - %DRM_XE_QUERY_CONFIG_VA_BITS - Maximum bits of a virtual address
> > @@ -415,6 +417,7 @@ struct drm_xe_query_config {
> > #define DRM_XE_QUERY_CONFIG_FLAG_HAS_VRAM (1 << 0)
> > #define DRM_XE_QUERY_CONFIG_FLAG_HAS_LOW_LATENCY (1 << 1)
> > #define DRM_XE_QUERY_CONFIG_FLAG_HAS_CPU_ADDR_MIRROR (1 << 2)
> > + #define DRM_XE_QUERY_CONFIG_FLAG_HAS_DISABLE_NULL_QUERY (1 << 3)
> > #define DRM_XE_QUERY_CONFIG_MIN_ALIGNMENT 2
> > #define DRM_XE_QUERY_CONFIG_VA_BITS 3
> > #define DRM_XE_QUERY_CONFIG_MAX_EXEC_QUEUE_PRIORITY 4
> > @@ -1270,7 +1273,16 @@ struct drm_xe_exec_queue_create {
> > /** @vm_id: VM to use for this exec queue */
> > __u32 vm_id;
> >
> > + /** DRM_XE_QUERY_CONFIG_FLAG_HAS_LOW_LATENCY - \
> > + * Flag is set if the device has low latency hint support
> > + */
> > #define DRM_XE_EXEC_QUEUE_LOW_LATENCY_HINT (1 << 0)
> > +
> > + /** DRM_XE_EXEC_QUEUE_DISABLE_NULL_QUERY - \
> > + * flag is use to disable null query check for Anyhit shader
> > + */
> > +#define DRM_XE_EXEC_QUEUE_DISABLE_NULL_QUERY (1 << 1)
> > +
> > /** @flags: flags to use for this exec queue */
> > __u32 flags;
> >
> > --
> > 2.25.1
> >
More information about the Intel-xe
mailing list