[PATCH] drm/xe: Add L3 bank and node masks to topology query
Matt Roper
matthew.d.roper at intel.com
Fri Jan 26 17:14:45 UTC 2024
On Fri, Jan 26, 2024 at 03:38:40PM +0000, Francois Dugast wrote:
> Expose the masks of L3 banks and of L3 nodes to user space. L3 count is not
> sufficient because in some configuration not all banks are enabled, so user
> space needs to know which ones.
>
> Bspec: 52545, 52546, 62482
> Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs")
As Lucas noted, this patch isn't fixing anything, just adding a new
optional feature.
We should probably label this patch more clearly as being a uapi patch
too.
> Signed-off-by: Francois Dugast <francois.dugast at intel.com>
> Cc: Jonathan Cavitt <jonathan.cavitt at intel.com>
> Cc: Robert Krzemien <robert.krzemien at intel.com>
> ---
> drivers/gpu/drm/xe/regs/xe_gt_regs.h | 1 +
> drivers/gpu/drm/xe/xe_gt_topology.c | 38 ++++++++++++++++++++++++++++
> drivers/gpu/drm/xe/xe_gt_types.h | 14 ++++++++--
> drivers/gpu/drm/xe/xe_query.c | 20 +++++++++++++--
> include/uapi/drm/xe_drm.h | 2 ++
> 5 files changed, 71 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
> index cd27480f6486..f8606649d1e2 100644
> --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
> +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
> @@ -156,6 +156,7 @@
> #define MIRROR_FUSE3 XE_REG(0x9118)
> #define XE2_NODE_ENABLE_MASK REG_GENMASK(31, 16)
> #define L3BANK_PAIR_COUNT 4
> +#define L3MODE_MASK REG_GENMASK(7, 4)
This definition is only correct for Xe_HPC platforms; we should probably
have an XEHPC_ prefix so that we can have different field definitions
for other IPs.
> #define L3BANK_MASK REG_GENMASK(3, 0)
> /* on Xe_HP the same fuses indicates mslices instead of L3 banks */
> #define MAX_MSLICES 4
> diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c b/drivers/gpu/drm/xe/xe_gt_topology.c
> index a8d7f272c30a..6f4cf147a7aa 100644
> --- a/drivers/gpu/drm/xe/xe_gt_topology.c
> +++ b/drivers/gpu/drm/xe/xe_gt_topology.c
> @@ -13,6 +13,8 @@
>
> #define XE_MAX_DSS_FUSE_BITS (32 * XE_MAX_DSS_FUSE_REGS)
> #define XE_MAX_EU_FUSE_BITS (32 * XE_MAX_EU_FUSE_REGS)
> +#define XE_MAX_L3_BANK_FUSE_BITS (32 * XE_MAX_L3_BANK_FUSE_REGS)
> +#define XE_MAX_L3_NODE_FUSE_BITS (32 * XE_MAX_L3_NODE_FUSE_REGS)
We don't really need these definitions; if multi-register masks show up
in the future we'll need different logic below anyway.
>
> static void
> load_dss_mask(struct xe_gt *gt, xe_dss_mask_t mask, int numregs, ...)
> @@ -62,6 +64,36 @@ load_eu_mask(struct xe_gt *gt, xe_eu_mask_t mask)
> bitmap_from_arr32(mask, &val, XE_MAX_EU_FUSE_BITS);
> }
>
> +static void
> +load_l3_masks(struct xe_gt *gt, xe_l3_bank_mask_t l3_bank_mask,
> + xe_l3_node_mask_t l3_node_mask)
> +{
> + u32 bank, node;
> +
> + if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) {
We need a separate condition for Xe2 as well since it doesn't match
Xe_LPG.
> + node = REG_FIELD_GET(MEML3_EN_MASK,
> + xe_mmio_read32(gt, MIRROR_FUSE3));
> + bank = REG_FIELD_GET(GT_L3_EXC_MASK,
> + xe_mmio_read32(gt, XEHP_FUSE4));
This one isn't correct; the bits here aren't a mask of the banks but
rather a mask of half-nodes. I.e., 011 means all four banks in the
mslice are enabled. 001 means only the lower two banks are enabled.
And 010 means only the upper two banks are enabled.
> + } else if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1250) {
You're missing a condition for Xe_HPC here. On PVC some of the l3 banks
are inside bslices and some are inside rambo's, so I'm not sure how you
want to express that.
> + node = REG_FIELD_GET(MEML3_EN_MASK,
> + xe_mmio_read32(gt, MIRROR_FUSE3));
> + bank = REG_FIELD_GET(L3MODE_MASK,
> + xe_mmio_read32(gt, MIRROR_FUSE3));
This one isn't correct; Xe_HP platforms don't have a field at [7:4].
The only thing provided by fusing is the mslice mask (which is
effectively what you're calling "nodes") here. If a node is present
then all 8 banks inside of it are present. So assuming your "bank" mask
here is actually a "banks per node" mask, you just want a hardcoded
0xFF.
> + } else {
> + /*
> + * Here the mask logic is reversed: a bit is set if the bank is
> + * disabled.
> + */
> + bank = REG_FIELD_GET(L3BANK_MASK,
> + ~xe_mmio_read32(gt, MIRROR_FUSE3));
For gen12 (and older platforms supported by i915 too), the L3 bank mask
is technically 8 bits ([7:0]), but bits [7:4] are guaranteed to match
[3:0] since the hardware always enables/disables the banks in pairs. So
this is only going to give you half the actual bank mask.
It's probably best to just just define an XELP_GT_L3_MODE_MASK as [7:0]
since that gives the entire bank mask. In fact we can go ahead and
delete L3BANK_MASK completely and just use XELP_GT_L3_MODE_MASK in
init_steering_l3bank() too. We only wind up using the __ffs() of that
value for steering purposes so it doesn't matter at all whether we
include the replicated upper bits or not in the logic there.
> + node = 0; /* Unused */
Since the cases above are giving a "bank per node" mask, it might be
more consistent to provide a hardcoded "1" here instead. I.e., these
platforms effectively organize all of their L3 banks into a single
always-present node.
> + }
> +
> + bitmap_from_arr32(l3_bank_mask, &bank, XE_MAX_L3_BANK_FUSE_BITS);
> + bitmap_from_arr32(l3_node_mask, &node, XE_MAX_L3_NODE_FUSE_BITS);
> +}
> +
> static void
> get_num_dss_regs(struct xe_device *xe, int *geometry_regs, int *compute_regs)
> {
> @@ -106,6 +138,8 @@ xe_gt_topology_init(struct xe_gt *gt)
> XEHPC_GT_COMPUTE_DSS_ENABLE_EXT,
> XE2_GT_COMPUTE_DSS_2);
> load_eu_mask(gt, gt->fuse_topo.eu_mask_per_dss);
> + load_l3_masks(gt, gt->fuse_topo.l3_bank_mask,
> + gt->fuse_topo.l3_node_mask);
>
> xe_gt_topology_dump(gt, &p);
> }
> @@ -121,6 +155,10 @@ xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p)
> drm_printf(p, "EU mask per DSS: %*pb\n", XE_MAX_EU_FUSE_BITS,
> gt->fuse_topo.eu_mask_per_dss);
>
> + drm_printf(p, "L3 bank mask: %*pb\n", XE_MAX_L3_BANK_FUSE_BITS,
> + gt->fuse_topo.l3_bank_mask);
> + drm_printf(p, "L3 node mask: %*pb\n", XE_MAX_L3_NODE_FUSE_BITS,
> + gt->fuse_topo.l3_node_mask);
> }
>
> /*
> diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
> index 70c615dd1498..adb1de613e77 100644
> --- a/drivers/gpu/drm/xe/xe_gt_types.h
> +++ b/drivers/gpu/drm/xe/xe_gt_types.h
> @@ -24,11 +24,15 @@ enum xe_gt_type {
> XE_GT_TYPE_MEDIA,
> };
>
> -#define XE_MAX_DSS_FUSE_REGS 3
> -#define XE_MAX_EU_FUSE_REGS 1
> +#define XE_MAX_DSS_FUSE_REGS 3
> +#define XE_MAX_EU_FUSE_REGS 1
> +#define XE_MAX_L3_BANK_FUSE_REGS 1
> +#define XE_MAX_L3_NODE_FUSE_REGS 1
>
> typedef unsigned long xe_dss_mask_t[BITS_TO_LONGS(32 * XE_MAX_DSS_FUSE_REGS)];
> typedef unsigned long xe_eu_mask_t[BITS_TO_LONGS(32 * XE_MAX_EU_FUSE_REGS)];
> +typedef unsigned long xe_l3_bank_mask_t[BITS_TO_LONGS(32 * XE_MAX_L3_BANK_FUSE_REGS)];
> +typedef unsigned long xe_l3_node_mask_t[BITS_TO_LONGS(32 * XE_MAX_L3_NODE_FUSE_REGS)];
>
> struct xe_mmio_range {
> u32 start;
> @@ -332,6 +336,12 @@ struct xe_gt {
>
> /** @fuse_topo.eu_mask_per_dss: EU mask per DSS*/
> xe_eu_mask_t eu_mask_per_dss;
> +
> + /** @l3_bank_mask: L3 bank mask */
> + xe_l3_bank_mask_t l3_bank_mask;
This doesn't match what you're calculating above. Above it looks like
you're trying to return the "banks per node" mask, but this implies that
you're returning the actual L3 bank mask.
Also, it's not clear exactly how this mask should really be expressed.
Unlike stuff like DSS, L3 banks are organized in unusual ways and can
live on different dies. E.g., some the base die, some on the rambo die.
Is that something that userspace cares about? Do they expect the
different dies to be expressed in a specific order or anything?
Matt
> +
> + /** @l3_node_mask: L3 node mask */
> + xe_l3_node_mask_t l3_node_mask;
> } fuse_topo;
>
> /** @steering: register steering for individual HW units */
> diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
> index 9b35673b286c..fe3906e8f721 100644
> --- a/drivers/gpu/drm/xe/xe_query.c
> +++ b/drivers/gpu/drm/xe/xe_query.c
> @@ -453,10 +453,12 @@ static int query_hwconfig(struct xe_device *xe,
> static size_t calc_topo_query_size(struct xe_device *xe)
> {
> return xe->info.gt_count *
> - (3 * sizeof(struct drm_xe_query_topology_mask) +
> + (5 * sizeof(struct drm_xe_query_topology_mask) +
> sizeof_field(struct xe_gt, fuse_topo.g_dss_mask) +
> sizeof_field(struct xe_gt, fuse_topo.c_dss_mask) +
> - sizeof_field(struct xe_gt, fuse_topo.eu_mask_per_dss));
> + sizeof_field(struct xe_gt, fuse_topo.eu_mask_per_dss) +
> + sizeof_field(struct xe_gt, fuse_topo.l3_bank_mask) +
> + sizeof_field(struct xe_gt, fuse_topo.l3_node_mask));
> }
>
> static void __user *copy_mask(void __user *ptr,
> @@ -515,6 +517,20 @@ static int query_gt_topology(struct xe_device *xe,
> sizeof(gt->fuse_topo.eu_mask_per_dss));
> if (IS_ERR(query_ptr))
> return PTR_ERR(query_ptr);
> +
> + topo.type = DRM_XE_TOPO_L3_BANK;
> + query_ptr = copy_mask(query_ptr, &topo,
> + gt->fuse_topo.l3_bank_mask,
> + sizeof(gt->fuse_topo.l3_bank_mask));
> + if (IS_ERR(query_ptr))
> + return PTR_ERR(query_ptr);
> +
> + topo.type = DRM_XE_TOPO_L3_NODE;
> + query_ptr = copy_mask(query_ptr, &topo,
> + gt->fuse_topo.l3_node_mask,
> + sizeof(gt->fuse_topo.l3_node_mask));
> + if (IS_ERR(query_ptr))
> + return PTR_ERR(query_ptr);
> }
>
> return 0;
> diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
> index 50bbea0992d9..eca8f5f502c3 100644
> --- a/include/uapi/drm/xe_drm.h
> +++ b/include/uapi/drm/xe_drm.h
> @@ -513,6 +513,8 @@ struct drm_xe_query_topology_mask {
> #define DRM_XE_TOPO_DSS_GEOMETRY (1 << 0)
> #define DRM_XE_TOPO_DSS_COMPUTE (1 << 1)
> #define DRM_XE_TOPO_EU_PER_DSS (1 << 2)
> +#define DRM_XE_TOPO_L3_BANK (1 << 3)
> +#define DRM_XE_TOPO_L3_NODE (1 << 4)
> /** @type: type of mask */
> __u16 type;
>
> --
> 2.34.1
>
--
Matt Roper
Graphics Software Engineer
Linux GPU Platform Enablement
Intel Corporation
More information about the Intel-xe
mailing list