[PATCH v2] drm/xe/uapi: Add L3 bank mask to topology query

Dong, Zhanjun zhanjun.dong at intel.com
Wed Mar 20 16:14:21 UTC 2024


See my comments below.

Regards,
Zhanjun

On 2024-03-20 11:00 a.m., Francois Dugast wrote:
> Extend the existing topology uAPI to expose the masks of L3 banks
> to user space. L3 count is not sufficient because in some configuration
> not all banks are enabled. User space needs to know which ones are
> enabled, in the context of OA.
> 
> v2:
> - Remove "Fixes" and make uAPI change explicit in commit message
>    (Lucas De Marchi and Matt Roper)
> - Add separate conditions for Xe2 and for PVC (Matt Roper)
> - Return the L3 bank mask instead of the L3 node mask and the L3
>    banks per node mask, as the node mask can be derived from the
>    bank mask by user space, just like slice masks are derived from
>    subslice masks today (Francois Dugast)
> 
> Bspec: 52545, 52546, 62482
> Signed-off-by: Francois Dugast <francois.dugast at intel.com>
> Cc: Jonathan Cavitt <jonathan.cavitt at intel.com>
> Cc: Robert Krzemien <robert.krzemien at intel.com>
> Cc: Matt Roper <matthew.d.roper at intel.com>
> ---
>   drivers/gpu/drm/xe/regs/xe_gt_regs.h |  3 ++
>   drivers/gpu/drm/xe/xe_gt_topology.c  | 72 ++++++++++++++++++++++++++++
>   drivers/gpu/drm/xe/xe_gt_types.h     | 14 ++++--
>   drivers/gpu/drm/xe/xe_query.c        | 12 ++++-
>   include/uapi/drm/xe_drm.h            |  1 +
>   5 files changed, 96 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
> index 95969935f58b..be5b4936eb53 100644
> --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
> +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
> @@ -156,7 +156,10 @@
>   #define	MIRROR_FUSE3				XE_REG(0x9118)
>   #define   XE2_NODE_ENABLE_MASK			REG_GENMASK(31, 16)
>   #define   L3BANK_PAIR_COUNT			4
> +#define   XEHPC_GT_L3_MODE_MASK			REG_GENMASK(7, 4)
> +#define   XE2_GT_L3_MODE_MASK			REG_GENMASK(7, 4)
>   #define   L3BANK_MASK				REG_GENMASK(3, 0)
> +#define   XELP_GT_L3_MODE_MASK			REG_GENMASK(7, 0)
>   /* on Xe_HP the same fuses indicates mslices instead of L3 banks */
>   #define   MAX_MSLICES				4
>   #define   MEML3_EN_MASK				REG_GENMASK(3, 0)
> diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c b/drivers/gpu/drm/xe/xe_gt_topology.c
> index f5773a14f3c8..8920426332dd 100644
> --- a/drivers/gpu/drm/xe/xe_gt_topology.c
> +++ b/drivers/gpu/drm/xe/xe_gt_topology.c
> @@ -59,6 +59,75 @@ load_eu_mask(struct xe_gt *gt, xe_eu_mask_t mask)
>   	bitmap_from_arr32(mask, &val, XE_MAX_EU_FUSE_BITS);
>   }
>   
> +static void
> +load_l3_bank_mask(struct xe_gt *gt, xe_l3_bank_mask_t l3_bank_mask)
> +{
> +	struct xe_device *xe = gt_to_xe(gt);
> +	u64 node_mask, bank_mask = 0;
> +	u64 banks_per_node_fuse, banks_per_node_mask = 0;
> +	int i;
> +
> +	if (GRAPHICS_VER(xe) >= 20) {
> +		node_mask =
> +			REG_FIELD_GET(XE2_NODE_ENABLE_MASK,
> +				      xe_mmio_read32(gt, MIRROR_FUSE3));
> +		banks_per_node_mask =
> +			REG_FIELD_GET(XE2_GT_L3_MODE_MASK,
> +				      xe_mmio_read32(gt, MIRROR_FUSE3));
> +		for (i = 0; i < fls(node_mask); i++)
> +			if (node_mask & BIT(i))

Is the for/if equal to "for_each_set_bit"?

> +				bank_mask |= banks_per_node_mask << 4 * i;
> +	} else if (GRAPHICS_VERx100(xe) >= 1270) {
> +		node_mask =
> +			REG_FIELD_GET(MEML3_EN_MASK,
> +				      xe_mmio_read32(gt, MIRROR_FUSE3));
> +		banks_per_node_fuse =
> +			REG_FIELD_GET(GT_L3_EXC_MASK,
> +				      xe_mmio_read32(gt, XEHP_FUSE4));
> +		/* Each bit represents 2 banks in the node. */
> +		for (i = 0; i < fls(banks_per_node_fuse); i++)
> +			if (banks_per_node_fuse & BIT(i))

multiple dittos ...

> +				banks_per_node_mask |= 0x3 << 2 * i;
> +		for (i = 0; i < fls(node_mask); i++)
> +			if (node_mask & BIT(i))
> +				bank_mask |= banks_per_node_mask << 4 * i;
> +	} else if (xe->info.platform == XE_PVC) {
> +		node_mask =
> +			REG_FIELD_GET(MEML3_EN_MASK,
> +				      xe_mmio_read32(gt, MIRROR_FUSE3));
> +		banks_per_node_fuse =
> +			REG_FIELD_GET(XEHPC_GT_L3_MODE_MASK,
> +				      xe_mmio_read32(gt, MIRROR_FUSE3));
> +		/* Each bit represents 4 banks in the node. */
> +		for (i = 0; i < fls(banks_per_node_fuse); i++)
> +			if (banks_per_node_fuse & BIT(i))
> +				banks_per_node_mask |= 0xf << 4 * i;
> +		for (i = 0; i < fls(node_mask); i++)
> +			if (node_mask & BIT(i))
> +				bank_mask |= banks_per_node_mask << 16 * i;
> +	} else if (xe->info.platform == XE_DG2) {
> +		node_mask =
> +			REG_FIELD_GET(MEML3_EN_MASK,
> +				      xe_mmio_read32(gt, MIRROR_FUSE3));
> +		/*
> +		 * In this case, if a node is present then all 8 banks inside of
> +		 * it are present.
> +		 */
> +		for (i = 0; i < fls(node_mask); i++)
> +			if (node_mask & BIT(i))
> +				bank_mask |= 0xfful << 8 * i;
> +	} else {
> +		/*
> +		 * Here the mask logic is reversed: a bit is set if the bank is
> +		 * disabled.
> +		 */
> +		bank_mask = REG_FIELD_GET(XELP_GT_L3_MODE_MASK,
> +					  ~xe_mmio_read32(gt, MIRROR_FUSE3));
> +	}
> +
> +	bitmap_from_arr64(l3_bank_mask, &bank_mask, XE_MAX_L3_BANK_FUSE_BITS);
> +}
> +
>   static void
>   get_num_dss_regs(struct xe_device *xe, int *geometry_regs, int *compute_regs)
>   {
> @@ -103,6 +172,7 @@ xe_gt_topology_init(struct xe_gt *gt)
>   		      XEHPC_GT_COMPUTE_DSS_ENABLE_EXT,
>   		      XE2_GT_COMPUTE_DSS_2);
>   	load_eu_mask(gt, gt->fuse_topo.eu_mask_per_dss);
> +	load_l3_bank_mask(gt, gt->fuse_topo.l3_bank_mask);
>   
>   	p = drm_dbg_printer(&gt_to_xe(gt)->drm, DRM_UT_DRIVER, "GT topology");
>   
> @@ -120,6 +190,8 @@ xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p)
>   	drm_printf(p, "EU mask per DSS:     %*pb\n", XE_MAX_EU_FUSE_BITS,
>   		   gt->fuse_topo.eu_mask_per_dss);
>   
> +	drm_printf(p, "L3 bank mask:        %*pb\n", XE_MAX_L3_BANK_FUSE_BITS,
> +		   gt->fuse_topo.l3_bank_mask);
>   }
>   
>   /*
> diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
> index f6da2ad9719f..e1438a478f94 100644
> --- a/drivers/gpu/drm/xe/xe_gt_types.h
> +++ b/drivers/gpu/drm/xe/xe_gt_types.h
> @@ -24,13 +24,16 @@ enum xe_gt_type {
>   	XE_GT_TYPE_MEDIA,
>   };
>   
> -#define XE_MAX_DSS_FUSE_REGS	3
> -#define XE_MAX_DSS_FUSE_BITS	(32 * XE_MAX_DSS_FUSE_REGS)
> -#define XE_MAX_EU_FUSE_REGS	1
> -#define XE_MAX_EU_FUSE_BITS	(32 * XE_MAX_EU_FUSE_REGS)
> +#define XE_MAX_DSS_FUSE_REGS		3
> +#define XE_MAX_DSS_FUSE_BITS		(32 * XE_MAX_DSS_FUSE_REGS)
> +#define XE_MAX_EU_FUSE_REGS		1
> +#define XE_MAX_EU_FUSE_BITS		(32 * XE_MAX_EU_FUSE_REGS)
> +#define XE_MAX_L3_BANK_FUSE_REGS	2
> +#define XE_MAX_L3_BANK_FUSE_BITS	(32 * XE_MAX_L3_BANK_FUSE_REGS)
>   
>   typedef unsigned long xe_dss_mask_t[BITS_TO_LONGS(XE_MAX_DSS_FUSE_BITS)];
>   typedef unsigned long xe_eu_mask_t[BITS_TO_LONGS(XE_MAX_EU_FUSE_BITS)];
> +typedef unsigned long xe_l3_bank_mask_t[BITS_TO_LONGS(XE_MAX_L3_BANK_FUSE_BITS)];
>   
>   struct xe_mmio_range {
>   	u32 start;
> @@ -334,6 +337,9 @@ struct xe_gt {
>   
>   		/** @fuse_topo.eu_mask_per_dss: EU mask per DSS*/
>   		xe_eu_mask_t eu_mask_per_dss;
> +
> +		/** @l3_bank_mask: L3 bank mask */
> +		xe_l3_bank_mask_t l3_bank_mask;
>   	} fuse_topo;
>   
>   	/** @steering: register steering for individual HW units */
> diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
> index e80321b34918..0a064c12d80a 100644
> --- a/drivers/gpu/drm/xe/xe_query.c
> +++ b/drivers/gpu/drm/xe/xe_query.c
> @@ -453,10 +453,11 @@ static int query_hwconfig(struct xe_device *xe,
>   static size_t calc_topo_query_size(struct xe_device *xe)
>   {
>   	return xe->info.gt_count *
> -		(3 * sizeof(struct drm_xe_query_topology_mask) +
> +		(4 * sizeof(struct drm_xe_query_topology_mask) +

I know the 3 to 4 is for numbers of masks, can we make it a marco? For 
easy to read.

>   		 sizeof_field(struct xe_gt, fuse_topo.g_dss_mask) +
>   		 sizeof_field(struct xe_gt, fuse_topo.c_dss_mask) +
> -		 sizeof_field(struct xe_gt, fuse_topo.eu_mask_per_dss));
> +		 sizeof_field(struct xe_gt, fuse_topo.eu_mask_per_dss) +
> +		 sizeof_field(struct xe_gt, fuse_topo.l3_bank_mask));
>   }
>   
>   static int copy_mask(void __user **ptr,
> @@ -515,6 +516,13 @@ static int query_gt_topology(struct xe_device *xe,
>   				sizeof(gt->fuse_topo.eu_mask_per_dss));
>   		if (err)
>   			return err;
> +
> +		topo.type = DRM_XE_TOPO_L3_BANK;
> +		err = copy_mask(&query_ptr, &topo,
> +				gt->fuse_topo.l3_bank_mask,
> +				sizeof(gt->fuse_topo.l3_bank_mask));
> +		if (err)
> +			return err;
>   	}
>   
>   	return 0;
> diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
> index 808ad1c308ec..aa917308cdeb 100644
> --- a/include/uapi/drm/xe_drm.h
> +++ b/include/uapi/drm/xe_drm.h
> @@ -521,6 +521,7 @@ struct drm_xe_query_topology_mask {
>   #define DRM_XE_TOPO_DSS_GEOMETRY	(1 << 0)
>   #define DRM_XE_TOPO_DSS_COMPUTE		(1 << 1)
>   #define DRM_XE_TOPO_EU_PER_DSS		(1 << 2)
> +#define DRM_XE_TOPO_L3_BANK		(1 << 3)

Here is a good place to define numbers of masks
for example

#define DRM_XE_TOPO_L3_BANK		(1 << 3)
#define DRM_XE_TOPO_NUMBER_OF_MASKS	(4)


>   	/** @type: type of mask */
>   	__u16 type;
>   


More information about the Intel-xe mailing list