[PATCH] drm/xe/uapi: Expose EU width via topology query
Lucas De Marchi
lucas.demarchi at intel.com
Wed Jul 10 14:36:26 UTC 2024
On Wed, Jul 10, 2024 at 09:05:43AM GMT, Jose Souza wrote:
>On Tue, 2024-07-09 at 22:53 -0700, Lucas De Marchi wrote:
>> PVC, Xe2 and later platforms have a 16 wide EU. We were implicitly
>> reporting for PVC the number of 16-wide EUs without giving userspace any
>> hint that they were different than for other platforms. Xe2 and later
>> also have 16-wide, but in those case the reported number would
>> correspond to the 8-wide count.
>>
>> Add a new item to the topology that aims to clarify what the EU_PER_DSS
>> mask means. This new item uses mask[] as a single u8 value. Xe2 and
>> later platforms start returning the number of SIMD16 EUs.
>>
>> Signed-off-by: Lucas De Marchi <lucas.demarchi at intel.com>
>> ---
>>
>> Tested on TGL and LNL to check the behavior in dmesg, debugfs and ioctl.
>> Test-with: https://lore.kernel.org/igt-dev/20240710054446.1985069-1-lucas.demarchi@intel.com/
>>
>> drivers/gpu/drm/xe/xe_gt_topology.c | 13 ++++++++-----
>> drivers/gpu/drm/xe/xe_gt_types.h | 6 ++++++
>> drivers/gpu/drm/xe/xe_query.c | 12 ++++++++++--
>> include/uapi/drm/xe_drm.h | 20 ++++++++++++++++++++
>> 4 files changed, 44 insertions(+), 7 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c b/drivers/gpu/drm/xe/xe_gt_topology.c
>> index 25ff03ab8448..55ae1c79048f 100644
>> --- a/drivers/gpu/drm/xe/xe_gt_topology.c
>> +++ b/drivers/gpu/drm/xe/xe_gt_topology.c
>> @@ -31,7 +31,7 @@ load_dss_mask(struct xe_gt *gt, xe_dss_mask_t mask, int numregs, ...)
>> }
>>
>> static void
>> -load_eu_mask(struct xe_gt *gt, xe_eu_mask_t mask)
>> +load_eu_mask(struct xe_gt *gt, xe_eu_mask_t mask, u8 *eu_width)
>> {
>> struct xe_device *xe = gt_to_xe(gt);
>> u32 reg_val = xe_mmio_read32(gt, XELP_EU_ENABLE);
>> @@ -47,11 +47,13 @@ load_eu_mask(struct xe_gt *gt, xe_eu_mask_t mask)
>> if (GRAPHICS_VERx100(xe) < 1250)
>> reg_val = ~reg_val & XELP_EU_MASK;
>>
>> - /* On PVC, one bit = one EU */
>> - if (GRAPHICS_VERx100(xe) == 1260) {
>> + if (GRAPHICS_VERx100(xe) == 1260 || GRAPHICS_VER(xe) >= 20) {
>> + /* SIMD16 EUs, one bit = one EU */
>> + *eu_width = DRM_XE_TOPO_EU_WIDTH_SIMD16;
>
>In my opinion would be better to return the actual/HW EU count and legacy/Windows/Marketing EU count.
I think you interpreted this wrong.
Actual/HW is what we are currently *not* exporting... not sure
those 2 groups make sense the way you grouped them:
- actual/HW: new number, also matches hwconfig
- legacy: SIMD8
- Windows: I'm not sure, probably depends on what stack you're
looking, but I think it matches hwconfig, so the actual/HW
- Marketing: actual/HW
However if we export the actual number and try to compare with other
platforms, it doesn't make sense because we are counting apples and
oranges. Hence the need to say if it's apple or orange.
The alternative is to always convert it to SIMD8 (and keep doing it
forever for all new platforms) to maintain compatibility.
I'm not calling the old one "legacy" on purpose: it only created a huge
confusion (spec still to be updated). I want to export what the HW
actually is, then userspace can decide what it wants to use that number
for. Also note that if you get the max # from hwconfig, it is expected to
match this number (see igt test xe_query at query-hwconfig)
Always keep the conversion in the kernel, always exporting as SIMD8
number, not matching what the hw actually is, not being very future
proof, not matching hwconfig, doesn't sound appealing.
Lucas De Marchi
>
>> val = reg_val;
>> } else {
>> - /* All other platforms, one bit = 2 EU */
>> + /* SIMD8 EUs, one bit = 2 EU */
>> + *eu_width = DRM_XE_TOPO_EU_WIDTH_SIMD8;
>> for (i = 0; i < fls(reg_val); i++)
>> if (reg_val & BIT(i))
>> val |= 0x3 << 2 * i;
>> @@ -213,7 +215,7 @@ xe_gt_topology_init(struct xe_gt *gt)
>> XEHP_GT_COMPUTE_DSS_ENABLE,
>> XEHPC_GT_COMPUTE_DSS_ENABLE_EXT,
>> XE2_GT_COMPUTE_DSS_2);
>> - load_eu_mask(gt, gt->fuse_topo.eu_mask_per_dss);
>> + load_eu_mask(gt, gt->fuse_topo.eu_mask_per_dss, >->fuse_topo.eu_width);
>> load_l3_bank_mask(gt, gt->fuse_topo.l3_bank_mask);
>>
>> p = drm_dbg_printer(>_to_xe(gt)->drm, DRM_UT_DRIVER, "GT topology");
>> @@ -231,6 +233,7 @@ xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p)
>>
>> drm_printf(p, "EU mask per DSS: %*pb\n", XE_MAX_EU_FUSE_BITS,
>> gt->fuse_topo.eu_mask_per_dss);
>> + drm_printf(p, "EU width: %u\n", gt->fuse_topo.eu_width);
>>
>> drm_printf(p, "L3 bank mask: %*pb\n", XE_MAX_L3_BANK_MASK_BITS,
>> gt->fuse_topo.l3_bank_mask);
>> diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
>> index 6b5e0b45efb0..71b4834080f5 100644
>> --- a/drivers/gpu/drm/xe/xe_gt_types.h
>> +++ b/drivers/gpu/drm/xe/xe_gt_types.h
>> @@ -343,6 +343,12 @@ struct xe_gt {
>>
>> /** @fuse_topo.l3_bank_mask: L3 bank mask */
>> xe_l3_bank_mask_t l3_bank_mask;
>> +
>> + /**
>> + * @fuse_topo.eu_width: EU width - SIMD8, SIMD16, etc. See
>> + * enum drm_xe_topo_eu_width.
>> + */
>> + u8 eu_width;
>> } fuse_topo;
>>
>> /** @steering: register steering for individual HW units */
>> diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
>> index 4e01df6b1b7a..82c966b73302 100644
>> --- a/drivers/gpu/drm/xe/xe_query.c
>> +++ b/drivers/gpu/drm/xe/xe_query.c
>> @@ -455,11 +455,12 @@ static int query_hwconfig(struct xe_device *xe,
>> static size_t calc_topo_query_size(struct xe_device *xe)
>> {
>> return xe->info.gt_count *
>> - (4 * sizeof(struct drm_xe_query_topology_mask) +
>> + (5 * sizeof(struct drm_xe_query_topology_mask) +
>> sizeof_field(struct xe_gt, fuse_topo.g_dss_mask) +
>> sizeof_field(struct xe_gt, fuse_topo.c_dss_mask) +
>> sizeof_field(struct xe_gt, fuse_topo.l3_bank_mask) +
>> - sizeof_field(struct xe_gt, fuse_topo.eu_mask_per_dss));
>> + sizeof_field(struct xe_gt, fuse_topo.eu_mask_per_dss) +
>> + sizeof_field(struct xe_gt, fuse_topo.eu_width));
>> }
>>
>> static int copy_mask(void __user **ptr,
>> @@ -524,6 +525,13 @@ static int query_gt_topology(struct xe_device *xe,
>> sizeof(gt->fuse_topo.eu_mask_per_dss));
>> if (err)
>> return err;
>> +
>> + topo.type = DRM_XE_TOPO_EU_WIDTH;
>> + err = copy_mask(&query_ptr, &topo,
>> + >->fuse_topo.eu_width,
>> + sizeof(gt->fuse_topo.eu_width));
>> + if (err)
>> + return err;
>> }
>>
>> return 0;
>> diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
>> index 19619d4952a8..6114a9064849 100644
>> --- a/include/uapi/drm/xe_drm.h
>> +++ b/include/uapi/drm/xe_drm.h
>> @@ -491,6 +491,19 @@ struct drm_xe_query_gt_list {
>> struct drm_xe_gt gt_list[];
>> };
>>
>> +enum drm_xe_topo_eu_width {
>> + /**
>> + * @DRM_XE_TOPO_EU_WIDTH_SIMD8 - EUs reported with
>> + * DRM_XE_TOPO_EU_PER_DSS have SIMD8 width
>> + */
>> + DRM_XE_TOPO_EU_WIDTH_SIMD8 = 0,
>> + /**
>> + * @DRM_XE_TOPO_EU_WIDTH_SIMD16 - EUs reported with
>> + * DRM_XE_TOPO_EU_PER_DSS have SIMD16 width
>> + */
>> + DRM_XE_TOPO_EU_WIDTH_SIMD16,
>> +};
>> +
>> /**
>> * struct drm_xe_query_topology_mask - describe the topology mask of a GT
>> *
>> @@ -518,6 +531,12 @@ struct drm_xe_query_gt_list {
>> * containing the following in mask:
>> * ``EU_PER_DSS ff ff 00 00 00 00 00 00``
>> * means each DSS has 16 EU.
>> + * - %DRM_XE_TOPO_EU_WIDTH - To query the width of EUs as per
>> + * enum drm_xe_topo_eu_width. It always has num_bytes == 1 and the only byte
>> + * in the mask being the value. For example, a query response containing the
>> + * following in mask:
>> + * ``EU_WIDTH 00``
>> + * means the EUs are SIMD8.
>> */
>> struct drm_xe_query_topology_mask {
>> /** @gt_id: GT ID the mask is associated with */
>> @@ -527,6 +546,7 @@ struct drm_xe_query_topology_mask {
>> #define DRM_XE_TOPO_DSS_COMPUTE 2
>> #define DRM_XE_TOPO_L3_BANK 3
>> #define DRM_XE_TOPO_EU_PER_DSS 4
>> +#define DRM_XE_TOPO_EU_WIDTH 5
>> /** @type: type of mask */
>> __u16 type;
>>
>
More information about the Intel-xe
mailing list