[PATCH v2] drm/xe/ptl: L3bank mask is not available on the media GT

Francois Dugast francois.dugast at intel.com
Tue Oct 8 08:26:43 UTC 2024


On Mon, Oct 07, 2024 at 08:41:44AM -0700, Matt Roper wrote:
> From: Shekhar Chauhan <shekhar.chauhan at intel.com>
> 
> On PTL platforms with media version 30.00, the fuse registers for
> reporting L3 bank availability to the GT just read out as ~0 and do not
> provide proper values.  Xe does not use the L3 bank mask for anything
> internally; it only passes the mask through to userspace via the GT
> topology query.
> 
> Since we don't have any way to get the real L3 bank mask, we don't want
> to pass garbage to userspace.  Passing a zeroed mask or a copy of the
> primary GT's L3 bank mask would also be inaccurate and likely to cause
> confusion for userspace.  The best approach is to simply not include L3
> in the list of masks returned by the topology query in cases where we
> aren't able to provide a meaningful value.  This won't change the
> behavior for any existing platforms (where we can always obtain L3 masks
> successfully for all GTs), it will only prevent us from mis-reporting
> bad information on upcoming platform(s).
> 
> There's a good chance this will become a formal workaround in the
> future, but for now we don't have a lineage number so "no_media_l3" is
> used in place of a lineage as the OOB workaround descriptor.
> 
> v2:
>  - Re-calculate query size to properly match data returned. (Gustavo)
>  - Update kerneldoc to clarify that the L3bank mask may not be included
>    in the query results if the hardware doesn't make it available.
>    (Gustavo)
> 
> Cc: Matt Atwood <matthew.s.atwood at intel.com>
> Cc: Gustavo Sousa <gustavo.sousa at intel.com>
> Signed-off-by: Shekhar Chauhan <shekhar.chauhan at intel.com>
> Co-developed-by: Matt Roper <matthew.d.roper at intel.com>
> Signed-off-by: Matt Roper <matthew.d.roper at intel.com>

Acked-by: Francois Dugast <francois.dugast at intel.com>

> ---
>  drivers/gpu/drm/xe/xe_gt_topology.c | 14 ++++++++++
>  drivers/gpu/drm/xe/xe_query.c       | 42 +++++++++++++++++++++--------
>  drivers/gpu/drm/xe/xe_wa_oob.rules  |  1 +
>  include/uapi/drm/xe_drm.h           |  4 ++-
>  4 files changed, 49 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c b/drivers/gpu/drm/xe/xe_gt_topology.c
> index 651ba53623e5..df2042db7ee6 100644
> --- a/drivers/gpu/drm/xe/xe_gt_topology.c
> +++ b/drivers/gpu/drm/xe/xe_gt_topology.c
> @@ -5,6 +5,7 @@
>  
>  #include "xe_gt_topology.h"
>  
> +#include <generated/xe_wa_oob.h>
>  #include <linux/bitmap.h>
>  #include <linux/compiler.h>
>  
> @@ -12,6 +13,7 @@
>  #include "xe_assert.h"
>  #include "xe_gt.h"
>  #include "xe_mmio.h"
> +#include "xe_wa.h"
>  
>  static void
>  load_dss_mask(struct xe_gt *gt, xe_dss_mask_t mask, int numregs, ...)
> @@ -129,6 +131,18 @@ load_l3_bank_mask(struct xe_gt *gt, xe_l3_bank_mask_t l3_bank_mask)
>  	struct xe_device *xe = gt_to_xe(gt);
>  	u32 fuse3 = xe_mmio_read32(&gt->mmio, MIRROR_FUSE3);
>  
> +	/*
> +	 * PTL platforms with media version 30.00 do not provide proper values
> +	 * for the media GT's L3 bank registers.  Skip the readout since we
> +	 * don't have any way to obtain real values.
> +	 *
> +	 * This may get re-described as an official workaround in the future,
> +	 * but there's no tracking number assigned yet so we use a custom
> +	 * OOB workaround descriptor.
> +	 */
> +	if (XE_WA(gt, no_media_l3))
> +		return;
> +
>  	if (GRAPHICS_VER(xe) >= 20) {
>  		xe_l3_bank_mask_t per_node = {};
>  		u32 meml3_en = REG_FIELD_GET(XE2_NODE_ENABLE_MASK, fuse3);
> diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
> index 5246a4a2740e..158629971eab 100644
> --- a/drivers/gpu/drm/xe/xe_query.c
> +++ b/drivers/gpu/drm/xe/xe_query.c
> @@ -9,6 +9,7 @@
>  #include <linux/sched/clock.h>
>  
>  #include <drm/ttm/ttm_placement.h>
> +#include <generated/xe_wa_oob.h>
>  #include <uapi/drm/xe_drm.h>
>  
>  #include "regs/xe_engine_regs.h"
> @@ -23,6 +24,7 @@
>  #include "xe_macros.h"
>  #include "xe_mmio.h"
>  #include "xe_ttm_vram_mgr.h"
> +#include "xe_wa.h"
>  
>  static const u16 xe_to_user_engine_class[] = {
>  	[XE_ENGINE_CLASS_RENDER] = DRM_XE_ENGINE_CLASS_RENDER,
> @@ -455,12 +457,23 @@ static int query_hwconfig(struct xe_device *xe,
>  
>  static size_t calc_topo_query_size(struct xe_device *xe)
>  {
> -	return xe->info.gt_count *
> -		(4 * sizeof(struct drm_xe_query_topology_mask) +
> -		 sizeof_field(struct xe_gt, fuse_topo.g_dss_mask) +
> -		 sizeof_field(struct xe_gt, fuse_topo.c_dss_mask) +
> -		 sizeof_field(struct xe_gt, fuse_topo.l3_bank_mask) +
> -		 sizeof_field(struct xe_gt, fuse_topo.eu_mask_per_dss));
> +	struct xe_gt *gt;
> +	size_t query_size = 0;
> +	int id;
> +
> +	for_each_gt(gt, xe, id) {
> +		query_size += 3 * sizeof(struct drm_xe_query_topology_mask) +
> +			sizeof_field(struct xe_gt, fuse_topo.g_dss_mask) +
> +			sizeof_field(struct xe_gt, fuse_topo.c_dss_mask) +
> +			sizeof_field(struct xe_gt, fuse_topo.eu_mask_per_dss);
> +
> +		/* L3bank mask may not be available for some GTs */
> +		if (!XE_WA(gt, no_media_l3))
> +			query_size += sizeof(struct drm_xe_query_topology_mask) +
> +				sizeof_field(struct xe_gt, fuse_topo.l3_bank_mask);
> +	}
> +
> +	return query_size;
>  }
>  
>  static int copy_mask(void __user **ptr,
> @@ -513,11 +526,18 @@ static int query_gt_topology(struct xe_device *xe,
>  		if (err)
>  			return err;
>  
> -		topo.type = DRM_XE_TOPO_L3_BANK;
> -		err = copy_mask(&query_ptr, &topo, gt->fuse_topo.l3_bank_mask,
> -				sizeof(gt->fuse_topo.l3_bank_mask));
> -		if (err)
> -			return err;
> +		/*
> +		 * If the kernel doesn't have a way to obtain a correct L3bank
> +		 * mask, then it's better to omit L3 from the query rather than
> +		 * reporting bogus or zeroed information to userspace.
> +		 */
> +		if (!XE_WA(gt, no_media_l3)) {
> +			topo.type = DRM_XE_TOPO_L3_BANK;
> +			err = copy_mask(&query_ptr, &topo, gt->fuse_topo.l3_bank_mask,
> +					sizeof(gt->fuse_topo.l3_bank_mask));
> +			if (err)
> +				return err;
> +		}
>  
>  		topo.type = gt->fuse_topo.eu_type == XE_GT_EU_TYPE_SIMD16 ?
>  			DRM_XE_TOPO_SIMD16_EU_PER_DSS :
> diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules
> index 920ca5060146..0154fbe154e9 100644
> --- a/drivers/gpu/drm/xe/xe_wa_oob.rules
> +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules
> @@ -37,3 +37,4 @@
>  16023588340	GRAPHICS_VERSION(2001)
>  14019789679	GRAPHICS_VERSION(1255)
>  		GRAPHICS_VERSION_RANGE(1270, 2004)
> +no_media_l3	MEDIA_VERSION(3000)
> diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
> index b6fbe4988f2e..c4182e95a619 100644
> --- a/include/uapi/drm/xe_drm.h
> +++ b/include/uapi/drm/xe_drm.h
> @@ -512,7 +512,9 @@ struct drm_xe_query_gt_list {
>   *    containing the following in mask:
>   *    ``DSS_COMPUTE    ff ff ff ff 00 00 00 00``
>   *    means 32 DSS are available for compute.
> - *  - %DRM_XE_TOPO_L3_BANK - To query the mask of enabled L3 banks
> + *  - %DRM_XE_TOPO_L3_BANK - To query the mask of enabled L3 banks.  This type
> + *    may be omitted if the driver is unable to query the mask from the
> + *    hardware.
>   *  - %DRM_XE_TOPO_EU_PER_DSS - To query the mask of Execution Units (EU)
>   *    available per Dual Sub Slices (DSS). For example a query response
>   *    containing the following in mask:
> -- 
> 2.46.2
> 


More information about the Intel-xe mailing list