[PATCH v3 2/2] drm/xe/uapi: Add L3 bank mask to topology query
Francois Dugast
francois.dugast at intel.com
Thu Mar 21 14:28:10 UTC 2024
Please ignore this revision.
Francois
On Thu, Mar 21, 2024 at 02:21:54PM +0000, Francois Dugast wrote:
> Extend the existing topology uAPI to expose the masks of L3 banks
> to user space. L3 count is not sufficient because in some configuration
> not all banks are enabled. User space needs to know which ones are
> enabled, in the context of OA.
>
> v2:
> - Remove "Fixes" and make uAPI change explicit in commit message
> (Lucas De Marchi and Matt Roper)
> - Add separate conditions for Xe2 and for PVC (Matt Roper)
> - Return the L3 bank mask instead of the L3 node mask and the L3
> banks per node mask, as the node mask can be derived from the
> bank mask by user space, just like slice masks are derived from
> subslice masks today (Francois Dugast)
>
> v3: Use TOPO_NUMBER_OF_MASKS for the number of masks (Zhanjun Dong)
>
> Bspec: 52545, 52546, 62482
> Signed-off-by: Francois Dugast <francois.dugast at intel.com>
> Cc: Jonathan Cavitt <jonathan.cavitt at intel.com>
> Cc: Robert Krzemien <robert.krzemien at intel.com>
> Cc: Matt Roper <matthew.d.roper at intel.com>
> Cc: Zhanjun Dong <zhanjun.dong at intel.com>
> ---
> drivers/gpu/drm/xe/regs/xe_gt_regs.h | 3 ++
> drivers/gpu/drm/xe/xe_gt_topology.c | 72 ++++++++++++++++++++++++++++
> drivers/gpu/drm/xe/xe_gt_types.h | 14 ++++--
> drivers/gpu/drm/xe/xe_query.c | 10 +++-
> include/uapi/drm/xe_drm.h | 1 +
> 5 files changed, 95 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
> index 95969935f58b..be5b4936eb53 100644
> --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
> +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
> @@ -156,7 +156,10 @@
> #define MIRROR_FUSE3 XE_REG(0x9118)
> #define XE2_NODE_ENABLE_MASK REG_GENMASK(31, 16)
> #define L3BANK_PAIR_COUNT 4
> +#define XEHPC_GT_L3_MODE_MASK REG_GENMASK(7, 4)
> +#define XE2_GT_L3_MODE_MASK REG_GENMASK(7, 4)
> #define L3BANK_MASK REG_GENMASK(3, 0)
> +#define XELP_GT_L3_MODE_MASK REG_GENMASK(7, 0)
> /* on Xe_HP the same fuses indicates mslices instead of L3 banks */
> #define MAX_MSLICES 4
> #define MEML3_EN_MASK REG_GENMASK(3, 0)
> diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c b/drivers/gpu/drm/xe/xe_gt_topology.c
> index f5773a14f3c8..8920426332dd 100644
> --- a/drivers/gpu/drm/xe/xe_gt_topology.c
> +++ b/drivers/gpu/drm/xe/xe_gt_topology.c
> @@ -59,6 +59,75 @@ load_eu_mask(struct xe_gt *gt, xe_eu_mask_t mask)
> bitmap_from_arr32(mask, &val, XE_MAX_EU_FUSE_BITS);
> }
>
> +static void
> +load_l3_bank_mask(struct xe_gt *gt, xe_l3_bank_mask_t l3_bank_mask)
> +{
> + struct xe_device *xe = gt_to_xe(gt);
> + u64 node_mask, bank_mask = 0;
> + u64 banks_per_node_fuse, banks_per_node_mask = 0;
> + int i;
> +
> + if (GRAPHICS_VER(xe) >= 20) {
> + node_mask =
> + REG_FIELD_GET(XE2_NODE_ENABLE_MASK,
> + xe_mmio_read32(gt, MIRROR_FUSE3));
> + banks_per_node_mask =
> + REG_FIELD_GET(XE2_GT_L3_MODE_MASK,
> + xe_mmio_read32(gt, MIRROR_FUSE3));
> + for (i = 0; i < fls(node_mask); i++)
> + if (node_mask & BIT(i))
> + bank_mask |= banks_per_node_mask << 4 * i;
> + } else if (GRAPHICS_VERx100(xe) >= 1270) {
> + node_mask =
> + REG_FIELD_GET(MEML3_EN_MASK,
> + xe_mmio_read32(gt, MIRROR_FUSE3));
> + banks_per_node_fuse =
> + REG_FIELD_GET(GT_L3_EXC_MASK,
> + xe_mmio_read32(gt, XEHP_FUSE4));
> + /* Each bit represents 2 banks in the node. */
> + for (i = 0; i < fls(banks_per_node_fuse); i++)
> + if (banks_per_node_fuse & BIT(i))
> + banks_per_node_mask |= 0x3 << 2 * i;
> + for (i = 0; i < fls(node_mask); i++)
> + if (node_mask & BIT(i))
> + bank_mask |= banks_per_node_mask << 4 * i;
> + } else if (xe->info.platform == XE_PVC) {
> + node_mask =
> + REG_FIELD_GET(MEML3_EN_MASK,
> + xe_mmio_read32(gt, MIRROR_FUSE3));
> + banks_per_node_fuse =
> + REG_FIELD_GET(XEHPC_GT_L3_MODE_MASK,
> + xe_mmio_read32(gt, MIRROR_FUSE3));
> + /* Each bit represents 4 banks in the node. */
> + for (i = 0; i < fls(banks_per_node_fuse); i++)
> + if (banks_per_node_fuse & BIT(i))
> + banks_per_node_mask |= 0xf << 4 * i;
> + for (i = 0; i < fls(node_mask); i++)
> + if (node_mask & BIT(i))
> + bank_mask |= banks_per_node_mask << 16 * i;
> + } else if (xe->info.platform == XE_DG2) {
> + node_mask =
> + REG_FIELD_GET(MEML3_EN_MASK,
> + xe_mmio_read32(gt, MIRROR_FUSE3));
> + /*
> + * In this case, if a node is present then all 8 banks inside of
> + * it are present.
> + */
> + for (i = 0; i < fls(node_mask); i++)
> + if (node_mask & BIT(i))
> + bank_mask |= 0xfful << 8 * i;
> + } else {
> + /*
> + * Here the mask logic is reversed: a bit is set if the bank is
> + * disabled.
> + */
> + bank_mask = REG_FIELD_GET(XELP_GT_L3_MODE_MASK,
> + ~xe_mmio_read32(gt, MIRROR_FUSE3));
> + }
> +
> + bitmap_from_arr64(l3_bank_mask, &bank_mask, XE_MAX_L3_BANK_FUSE_BITS);
> +}
> +
> static void
> get_num_dss_regs(struct xe_device *xe, int *geometry_regs, int *compute_regs)
> {
> @@ -103,6 +172,7 @@ xe_gt_topology_init(struct xe_gt *gt)
> XEHPC_GT_COMPUTE_DSS_ENABLE_EXT,
> XE2_GT_COMPUTE_DSS_2);
> load_eu_mask(gt, gt->fuse_topo.eu_mask_per_dss);
> + load_l3_bank_mask(gt, gt->fuse_topo.l3_bank_mask);
>
> p = drm_dbg_printer(>_to_xe(gt)->drm, DRM_UT_DRIVER, "GT topology");
>
> @@ -120,6 +190,8 @@ xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p)
> drm_printf(p, "EU mask per DSS: %*pb\n", XE_MAX_EU_FUSE_BITS,
> gt->fuse_topo.eu_mask_per_dss);
>
> + drm_printf(p, "L3 bank mask: %*pb\n", XE_MAX_L3_BANK_FUSE_BITS,
> + gt->fuse_topo.l3_bank_mask);
> }
>
> /*
> diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
> index f6da2ad9719f..e1438a478f94 100644
> --- a/drivers/gpu/drm/xe/xe_gt_types.h
> +++ b/drivers/gpu/drm/xe/xe_gt_types.h
> @@ -24,13 +24,16 @@ enum xe_gt_type {
> XE_GT_TYPE_MEDIA,
> };
>
> -#define XE_MAX_DSS_FUSE_REGS 3
> -#define XE_MAX_DSS_FUSE_BITS (32 * XE_MAX_DSS_FUSE_REGS)
> -#define XE_MAX_EU_FUSE_REGS 1
> -#define XE_MAX_EU_FUSE_BITS (32 * XE_MAX_EU_FUSE_REGS)
> +#define XE_MAX_DSS_FUSE_REGS 3
> +#define XE_MAX_DSS_FUSE_BITS (32 * XE_MAX_DSS_FUSE_REGS)
> +#define XE_MAX_EU_FUSE_REGS 1
> +#define XE_MAX_EU_FUSE_BITS (32 * XE_MAX_EU_FUSE_REGS)
> +#define XE_MAX_L3_BANK_FUSE_REGS 2
> +#define XE_MAX_L3_BANK_FUSE_BITS (32 * XE_MAX_L3_BANK_FUSE_REGS)
>
> typedef unsigned long xe_dss_mask_t[BITS_TO_LONGS(XE_MAX_DSS_FUSE_BITS)];
> typedef unsigned long xe_eu_mask_t[BITS_TO_LONGS(XE_MAX_EU_FUSE_BITS)];
> +typedef unsigned long xe_l3_bank_mask_t[BITS_TO_LONGS(XE_MAX_L3_BANK_FUSE_BITS)];
>
> struct xe_mmio_range {
> u32 start;
> @@ -334,6 +337,9 @@ struct xe_gt {
>
> /** @fuse_topo.eu_mask_per_dss: EU mask per DSS*/
> xe_eu_mask_t eu_mask_per_dss;
> +
> + /** @l3_bank_mask: L3 bank mask */
> + xe_l3_bank_mask_t l3_bank_mask;
> } fuse_topo;
>
> /** @steering: register steering for individual HW units */
> diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
> index fa8de1a77b1e..b830eac7a621 100644
> --- a/drivers/gpu/drm/xe/xe_query.c
> +++ b/drivers/gpu/drm/xe/xe_query.c
> @@ -459,7 +459,8 @@ static size_t calc_topo_query_size(struct xe_device *xe)
> * sizeof(struct drm_xe_query_topology_mask) +
> sizeof_field(struct xe_gt, fuse_topo.g_dss_mask) +
> sizeof_field(struct xe_gt, fuse_topo.c_dss_mask) +
> - sizeof_field(struct xe_gt, fuse_topo.eu_mask_per_dss));
> + sizeof_field(struct xe_gt, fuse_topo.eu_mask_per_dss) +
> + sizeof_field(struct xe_gt, fuse_topo.l3_bank_mask));
> }
>
> static int copy_mask(void __user **ptr,
> @@ -518,6 +519,13 @@ static int query_gt_topology(struct xe_device *xe,
> sizeof(gt->fuse_topo.eu_mask_per_dss));
> if (err)
> return err;
> +
> + topo.type = DRM_XE_TOPO_L3_BANK;
> + err = copy_mask(&query_ptr, &topo,
> + gt->fuse_topo.l3_bank_mask,
> + sizeof(gt->fuse_topo.l3_bank_mask));
> + if (err)
> + return err;
> }
>
> return 0;
> diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
> index 808ad1c308ec..aa917308cdeb 100644
> --- a/include/uapi/drm/xe_drm.h
> +++ b/include/uapi/drm/xe_drm.h
> @@ -521,6 +521,7 @@ struct drm_xe_query_topology_mask {
> #define DRM_XE_TOPO_DSS_GEOMETRY (1 << 0)
> #define DRM_XE_TOPO_DSS_COMPUTE (1 << 1)
> #define DRM_XE_TOPO_EU_PER_DSS (1 << 2)
> +#define DRM_XE_TOPO_L3_BANK (1 << 3)
> /** @type: type of mask */
> __u16 type;
>
> --
> 2.34.1
>
More information about the Intel-xe
mailing list