[PATCH v2] drm/xe/uapi: Add L3 bank mask to topology query
Francois Dugast
francois.dugast at intel.com
Thu Mar 21 13:19:57 UTC 2024
On Wed, Mar 20, 2024 at 12:14:21PM -0400, Dong, Zhanjun wrote:
> See my comments below.
Hi,
Thanks for the review, please see my reply below.
>
> Regards,
> Zhanjun
>
> On 2024-03-20 11:00 a.m., Francois Dugast wrote:
> > Extend the existing topology uAPI to expose the masks of L3 banks
> > to user space. L3 count is not sufficient because in some configuration
> > not all banks are enabled. User space needs to know which ones are
> > enabled, in the context of OA.
> >
> > v2:
> > - Remove "Fixes" and make uAPI change explicit in commit message
> > (Lucas De Marchi and Matt Roper)
> > - Add separate conditions for Xe2 and for PVC (Matt Roper)
> > - Return the L3 bank mask instead of the L3 node mask and the L3
> > banks per node mask, as the node mask can be derived from the
> > bank mask by user space, just like slice masks are derived from
> > subslice masks today (Francois Dugast)
> >
> > Bspec: 52545, 52546, 62482
> > Signed-off-by: Francois Dugast <francois.dugast at intel.com>
> > Cc: Jonathan Cavitt <jonathan.cavitt at intel.com>
> > Cc: Robert Krzemien <robert.krzemien at intel.com>
> > Cc: Matt Roper <matthew.d.roper at intel.com>
> > ---
> > drivers/gpu/drm/xe/regs/xe_gt_regs.h | 3 ++
> > drivers/gpu/drm/xe/xe_gt_topology.c | 72 ++++++++++++++++++++++++++++
> > drivers/gpu/drm/xe/xe_gt_types.h | 14 ++++--
> > drivers/gpu/drm/xe/xe_query.c | 12 ++++-
> > include/uapi/drm/xe_drm.h | 1 +
> > 5 files changed, 96 insertions(+), 6 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
> > index 95969935f58b..be5b4936eb53 100644
> > --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
> > +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
> > @@ -156,7 +156,10 @@
> > #define MIRROR_FUSE3 XE_REG(0x9118)
> > #define XE2_NODE_ENABLE_MASK REG_GENMASK(31, 16)
> > #define L3BANK_PAIR_COUNT 4
> > +#define XEHPC_GT_L3_MODE_MASK REG_GENMASK(7, 4)
> > +#define XE2_GT_L3_MODE_MASK REG_GENMASK(7, 4)
> > #define L3BANK_MASK REG_GENMASK(3, 0)
> > +#define XELP_GT_L3_MODE_MASK REG_GENMASK(7, 0)
> > /* on Xe_HP the same fuses indicates mslices instead of L3 banks */
> > #define MAX_MSLICES 4
> > #define MEML3_EN_MASK REG_GENMASK(3, 0)
> > diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c b/drivers/gpu/drm/xe/xe_gt_topology.c
> > index f5773a14f3c8..8920426332dd 100644
> > --- a/drivers/gpu/drm/xe/xe_gt_topology.c
> > +++ b/drivers/gpu/drm/xe/xe_gt_topology.c
> > @@ -59,6 +59,75 @@ load_eu_mask(struct xe_gt *gt, xe_eu_mask_t mask)
> > bitmap_from_arr32(mask, &val, XE_MAX_EU_FUSE_BITS);
> > }
> > +static void
> > +load_l3_bank_mask(struct xe_gt *gt, xe_l3_bank_mask_t l3_bank_mask)
> > +{
> > + struct xe_device *xe = gt_to_xe(gt);
> > + u64 node_mask, bank_mask = 0;
> > + u64 banks_per_node_fuse, banks_per_node_mask = 0;
> > + int i;
> > +
> > + if (GRAPHICS_VER(xe) >= 20) {
> > + node_mask =
> > + REG_FIELD_GET(XE2_NODE_ENABLE_MASK,
> > + xe_mmio_read32(gt, MIRROR_FUSE3));
> > + banks_per_node_mask =
> > + REG_FIELD_GET(XE2_GT_L3_MODE_MASK,
> > + xe_mmio_read32(gt, MIRROR_FUSE3));
> > + for (i = 0; i < fls(node_mask); i++)
> > + if (node_mask & BIT(i))
>
> Is the for/if equal to "for_each_set_bit"?
Yes conceptually but it seems "for_each_set_bit" only applies to bitmaps.
Here the intermediate masks come from using registers and they are
used in that format, before the end result (bank_mask) is converted to
bitmap in the end, see last line of this function and other functions
in this file.
>
> > + bank_mask |= banks_per_node_mask << 4 * i;
> > + } else if (GRAPHICS_VERx100(xe) >= 1270) {
> > + node_mask =
> > + REG_FIELD_GET(MEML3_EN_MASK,
> > + xe_mmio_read32(gt, MIRROR_FUSE3));
> > + banks_per_node_fuse =
> > + REG_FIELD_GET(GT_L3_EXC_MASK,
> > + xe_mmio_read32(gt, XEHP_FUSE4));
> > + /* Each bit represents 2 banks in the node. */
> > + for (i = 0; i < fls(banks_per_node_fuse); i++)
> > + if (banks_per_node_fuse & BIT(i))
>
> multiple dittos ...
>
> > + banks_per_node_mask |= 0x3 << 2 * i;
> > + for (i = 0; i < fls(node_mask); i++)
> > + if (node_mask & BIT(i))
> > + bank_mask |= banks_per_node_mask << 4 * i;
> > + } else if (xe->info.platform == XE_PVC) {
> > + node_mask =
> > + REG_FIELD_GET(MEML3_EN_MASK,
> > + xe_mmio_read32(gt, MIRROR_FUSE3));
> > + banks_per_node_fuse =
> > + REG_FIELD_GET(XEHPC_GT_L3_MODE_MASK,
> > + xe_mmio_read32(gt, MIRROR_FUSE3));
> > + /* Each bit represents 4 banks in the node. */
> > + for (i = 0; i < fls(banks_per_node_fuse); i++)
> > + if (banks_per_node_fuse & BIT(i))
> > + banks_per_node_mask |= 0xf << 4 * i;
> > + for (i = 0; i < fls(node_mask); i++)
> > + if (node_mask & BIT(i))
> > + bank_mask |= banks_per_node_mask << 16 * i;
> > + } else if (xe->info.platform == XE_DG2) {
> > + node_mask =
> > + REG_FIELD_GET(MEML3_EN_MASK,
> > + xe_mmio_read32(gt, MIRROR_FUSE3));
> > + /*
> > + * In this case, if a node is present then all 8 banks inside of
> > + * it are present.
> > + */
> > + for (i = 0; i < fls(node_mask); i++)
> > + if (node_mask & BIT(i))
> > + bank_mask |= 0xfful << 8 * i;
> > + } else {
> > + /*
> > + * Here the mask logic is reversed: a bit is set if the bank is
> > + * disabled.
> > + */
> > + bank_mask = REG_FIELD_GET(XELP_GT_L3_MODE_MASK,
> > + ~xe_mmio_read32(gt, MIRROR_FUSE3));
> > + }
> > +
> > + bitmap_from_arr64(l3_bank_mask, &bank_mask, XE_MAX_L3_BANK_FUSE_BITS);
> > +}
> > +
> > static void
> > get_num_dss_regs(struct xe_device *xe, int *geometry_regs, int *compute_regs)
> > {
> > @@ -103,6 +172,7 @@ xe_gt_topology_init(struct xe_gt *gt)
> > XEHPC_GT_COMPUTE_DSS_ENABLE_EXT,
> > XE2_GT_COMPUTE_DSS_2);
> > load_eu_mask(gt, gt->fuse_topo.eu_mask_per_dss);
> > + load_l3_bank_mask(gt, gt->fuse_topo.l3_bank_mask);
> > p = drm_dbg_printer(>_to_xe(gt)->drm, DRM_UT_DRIVER, "GT topology");
> > @@ -120,6 +190,8 @@ xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p)
> > drm_printf(p, "EU mask per DSS: %*pb\n", XE_MAX_EU_FUSE_BITS,
> > gt->fuse_topo.eu_mask_per_dss);
> > + drm_printf(p, "L3 bank mask: %*pb\n", XE_MAX_L3_BANK_FUSE_BITS,
> > + gt->fuse_topo.l3_bank_mask);
> > }
> > /*
> > diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
> > index f6da2ad9719f..e1438a478f94 100644
> > --- a/drivers/gpu/drm/xe/xe_gt_types.h
> > +++ b/drivers/gpu/drm/xe/xe_gt_types.h
> > @@ -24,13 +24,16 @@ enum xe_gt_type {
> > XE_GT_TYPE_MEDIA,
> > };
> > -#define XE_MAX_DSS_FUSE_REGS 3
> > -#define XE_MAX_DSS_FUSE_BITS (32 * XE_MAX_DSS_FUSE_REGS)
> > -#define XE_MAX_EU_FUSE_REGS 1
> > -#define XE_MAX_EU_FUSE_BITS (32 * XE_MAX_EU_FUSE_REGS)
> > +#define XE_MAX_DSS_FUSE_REGS 3
> > +#define XE_MAX_DSS_FUSE_BITS (32 * XE_MAX_DSS_FUSE_REGS)
> > +#define XE_MAX_EU_FUSE_REGS 1
> > +#define XE_MAX_EU_FUSE_BITS (32 * XE_MAX_EU_FUSE_REGS)
> > +#define XE_MAX_L3_BANK_FUSE_REGS 2
> > +#define XE_MAX_L3_BANK_FUSE_BITS (32 * XE_MAX_L3_BANK_FUSE_REGS)
> > typedef unsigned long xe_dss_mask_t[BITS_TO_LONGS(XE_MAX_DSS_FUSE_BITS)];
> > typedef unsigned long xe_eu_mask_t[BITS_TO_LONGS(XE_MAX_EU_FUSE_BITS)];
> > +typedef unsigned long xe_l3_bank_mask_t[BITS_TO_LONGS(XE_MAX_L3_BANK_FUSE_BITS)];
> > struct xe_mmio_range {
> > u32 start;
> > @@ -334,6 +337,9 @@ struct xe_gt {
> > /** @fuse_topo.eu_mask_per_dss: EU mask per DSS*/
> > xe_eu_mask_t eu_mask_per_dss;
> > +
> > + /** @l3_bank_mask: L3 bank mask */
> > + xe_l3_bank_mask_t l3_bank_mask;
> > } fuse_topo;
> > /** @steering: register steering for individual HW units */
> > diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
> > index e80321b34918..0a064c12d80a 100644
> > --- a/drivers/gpu/drm/xe/xe_query.c
> > +++ b/drivers/gpu/drm/xe/xe_query.c
> > @@ -453,10 +453,11 @@ static int query_hwconfig(struct xe_device *xe,
> > static size_t calc_topo_query_size(struct xe_device *xe)
> > {
> > return xe->info.gt_count *
> > - (3 * sizeof(struct drm_xe_query_topology_mask) +
> > + (4 * sizeof(struct drm_xe_query_topology_mask) +
>
> I know the 3 to 4 is for numbers of masks, can we make it a marco? For easy
> to read.
Sure, I will make it a separate patch as this is not caused by the new
code added here.
>
> > sizeof_field(struct xe_gt, fuse_topo.g_dss_mask) +
> > sizeof_field(struct xe_gt, fuse_topo.c_dss_mask) +
> > - sizeof_field(struct xe_gt, fuse_topo.eu_mask_per_dss));
> > + sizeof_field(struct xe_gt, fuse_topo.eu_mask_per_dss) +
> > + sizeof_field(struct xe_gt, fuse_topo.l3_bank_mask));
> > }
> > static int copy_mask(void __user **ptr,
> > @@ -515,6 +516,13 @@ static int query_gt_topology(struct xe_device *xe,
> > sizeof(gt->fuse_topo.eu_mask_per_dss));
> > if (err)
> > return err;
> > +
> > + topo.type = DRM_XE_TOPO_L3_BANK;
> > + err = copy_mask(&query_ptr, &topo,
> > + gt->fuse_topo.l3_bank_mask,
> > + sizeof(gt->fuse_topo.l3_bank_mask));
> > + if (err)
> > + return err;
> > }
> > return 0;
> > diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
> > index 808ad1c308ec..aa917308cdeb 100644
> > --- a/include/uapi/drm/xe_drm.h
> > +++ b/include/uapi/drm/xe_drm.h
> > @@ -521,6 +521,7 @@ struct drm_xe_query_topology_mask {
> > #define DRM_XE_TOPO_DSS_GEOMETRY (1 << 0)
> > #define DRM_XE_TOPO_DSS_COMPUTE (1 << 1)
> > #define DRM_XE_TOPO_EU_PER_DSS (1 << 2)
> > +#define DRM_XE_TOPO_L3_BANK (1 << 3)
>
> Here is a good place to define numbers of masks
> for example
>
> #define DRM_XE_TOPO_L3_BANK (1 << 3)
> #define DRM_XE_TOPO_NUMBER_OF_MASKS (4)
Unless this is required by user space we could keep it internal to the
source file where it is used.
Francois
>
>
> > /** @type: type of mask */
> > __u16 type;
More information about the Intel-xe
mailing list