[Intel-xe] [PATCH] drm/xe: Fix driver load with CONFIG_DRM_XE_LARGE_GUC_BUFFER

Matthew Brost matthew.brost at intel.com
Mon Apr 3 04:33:08 UTC 2023


On Sun, Apr 02, 2023 at 12:09:30AM -0700, Matthew Brost wrote:
> While debugging [1] I noticed the GuC log was corrupt. I believe our
> logic to program the GuC log size was incorrect resulting driver load
> failures. Shameless copy from to fix this. A follow up will wire the GuC
> log sizes to modparams.
> 
> [1] https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/232
> 

Everyone ignore this patch, mistakenly thought this patch fixed something.

> Signed-off-by: Matthew Brost <matthew.brost at intel.com>
> ---
>  drivers/gpu/drm/xe/xe_guc.c           |  47 ++-------
>  drivers/gpu/drm/xe/xe_guc_log.c       | 145 +++++++++++++++++++++++++-
>  drivers/gpu/drm/xe/xe_guc_log.h       |   9 --
>  drivers/gpu/drm/xe/xe_guc_log_types.h |  20 ++++
>  4 files changed, 167 insertions(+), 54 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
> index 58b9841616e4..1694fdc3a7fa 100644
> --- a/drivers/gpu/drm/xe/xe_guc.c
> +++ b/drivers/gpu/drm/xe/xe_guc.c
> @@ -69,54 +69,21 @@ static u32 guc_ctl_feature_flags(struct xe_guc *guc)
>  
>  static u32 guc_ctl_log_params_flags(struct xe_guc *guc)
>  {
> +	struct xe_guc_log *log = &guc->log;
>  	u32 offset = guc_bo_ggtt_addr(guc, guc->log.bo) >> PAGE_SHIFT;
>  	u32 flags;
>  
> -	#if (((CRASH_BUFFER_SIZE) % SZ_1M) == 0)
> -	#define LOG_UNIT SZ_1M
> -	#define LOG_FLAG GUC_LOG_LOG_ALLOC_UNITS
> -	#else
> -	#define LOG_UNIT SZ_4K
> -	#define LOG_FLAG 0
> -	#endif
> -
> -	#if (((CAPTURE_BUFFER_SIZE) % SZ_1M) == 0)
> -	#define CAPTURE_UNIT SZ_1M
> -	#define CAPTURE_FLAG GUC_LOG_CAPTURE_ALLOC_UNITS
> -	#else
> -	#define CAPTURE_UNIT SZ_4K
> -	#define CAPTURE_FLAG 0
> -	#endif
> -
> -	BUILD_BUG_ON(!CRASH_BUFFER_SIZE);
> -	BUILD_BUG_ON(!IS_ALIGNED(CRASH_BUFFER_SIZE, LOG_UNIT));
> -	BUILD_BUG_ON(!DEBUG_BUFFER_SIZE);
> -	BUILD_BUG_ON(!IS_ALIGNED(DEBUG_BUFFER_SIZE, LOG_UNIT));
> -	BUILD_BUG_ON(!CAPTURE_BUFFER_SIZE);
> -	BUILD_BUG_ON(!IS_ALIGNED(CAPTURE_BUFFER_SIZE, CAPTURE_UNIT));
> -
> -	BUILD_BUG_ON((CRASH_BUFFER_SIZE / LOG_UNIT - 1) >
> -			(GUC_LOG_CRASH_MASK >> GUC_LOG_CRASH_SHIFT));
> -	BUILD_BUG_ON((DEBUG_BUFFER_SIZE / LOG_UNIT - 1) >
> -			(GUC_LOG_DEBUG_MASK >> GUC_LOG_DEBUG_SHIFT));
> -	BUILD_BUG_ON((CAPTURE_BUFFER_SIZE / CAPTURE_UNIT - 1) >
> -			(GUC_LOG_CAPTURE_MASK >> GUC_LOG_CAPTURE_SHIFT));
> +	XE_BUG_ON(!log->sizes_initialised);
>  
>  	flags = GUC_LOG_VALID |
>  		GUC_LOG_NOTIFY_ON_HALF_FULL |
> -		CAPTURE_FLAG |
> -		LOG_FLAG |
> -		((CRASH_BUFFER_SIZE / LOG_UNIT - 1) << GUC_LOG_CRASH_SHIFT) |
> -		((DEBUG_BUFFER_SIZE / LOG_UNIT - 1) << GUC_LOG_DEBUG_SHIFT) |
> -		((CAPTURE_BUFFER_SIZE / CAPTURE_UNIT - 1) <<
> -		 GUC_LOG_CAPTURE_SHIFT) |
> +		log->sizes[GUC_LOG_SECTIONS_DEBUG].flag |
> +		log->sizes[GUC_LOG_SECTIONS_CAPTURE].flag |
> +		(log->sizes[GUC_LOG_SECTIONS_CRASH].count << GUC_LOG_CRASH_SHIFT) |
> +		(log->sizes[GUC_LOG_SECTIONS_DEBUG].count << GUC_LOG_DEBUG_SHIFT) |
> +		(log->sizes[GUC_LOG_SECTIONS_CAPTURE].count << GUC_LOG_CAPTURE_SHIFT) |
>  		(offset << GUC_LOG_BUF_ADDR_SHIFT);
>  
> -	#undef LOG_UNIT
> -	#undef LOG_FLAG
> -	#undef CAPTURE_UNIT
> -	#undef CAPTURE_FLAG
> -
>  	return flags;
>  }
>  
> diff --git a/drivers/gpu/drm/xe/xe_guc_log.c b/drivers/gpu/drm/xe/xe_guc_log.c
> index 9a7b5d5906c1..f171b997b4cb 100644
> --- a/drivers/gpu/drm/xe/xe_guc_log.c
> +++ b/drivers/gpu/drm/xe/xe_guc_log.c
> @@ -12,6 +12,16 @@
>  #include "xe_map.h"
>  #include "xe_module.h"
>  
> +#if IS_ENABLED(CONFIG_DRM_XE_LARGE_GUC_BUFFER)
> +#define CRASH_BUFFER_SIZE       SZ_1M
> +#define DEBUG_BUFFER_SIZE       SZ_8M
> +#define CAPTURE_BUFFER_SIZE     SZ_2M
> +#else
> +#define CRASH_BUFFER_SIZE	SZ_8K
> +#define DEBUG_BUFFER_SIZE	SZ_64K
> +#define CAPTURE_BUFFER_SIZE	SZ_16K
> +#endif
> +
>  static struct xe_gt *
>  log_to_gt(struct xe_guc_log *log)
>  {
> @@ -24,7 +34,130 @@ log_to_xe(struct xe_guc_log *log)
>  	return gt_to_xe(log_to_gt(log));
>  }
>  
> -static size_t guc_log_size(void)
> +struct guc_log_section {
> +	u32 max;
> +	u32 flag;
> +	u32 default_val;
> +	const char *name;
> +};
> +
> +static s32 scale_log_param(struct xe_guc_log *log,
> +			   const struct guc_log_section *section)
> +{
> +	/* XXX: Add modparams */
> +	return section->default_val;
> +}
> +
> +static void _guc_log_init_sizes(struct xe_guc_log *log)
> +{
> +	struct xe_device *xe = log_to_xe(log);
> +	static const struct guc_log_section sections[GUC_LOG_SECTIONS_LIMIT] = {
> +		{
> +			GUC_LOG_CRASH_MASK >> GUC_LOG_CRASH_SHIFT,
> +			GUC_LOG_LOG_ALLOC_UNITS,
> +			CRASH_BUFFER_SIZE,
> +			"crash dump"
> +		},
> +		{
> +			GUC_LOG_DEBUG_MASK >> GUC_LOG_DEBUG_SHIFT,
> +			GUC_LOG_LOG_ALLOC_UNITS,
> +			DEBUG_BUFFER_SIZE,
> +			"debug",
> +		},
> +		{
> +			GUC_LOG_CAPTURE_MASK >> GUC_LOG_CAPTURE_SHIFT,
> +			GUC_LOG_CAPTURE_ALLOC_UNITS,
> +			CAPTURE_BUFFER_SIZE,
> +			"capture",
> +		}
> +	};
> +	int i;
> +
> +	for (i = 0; i < GUC_LOG_SECTIONS_LIMIT; i++)
> +		log->sizes[i].bytes = scale_log_param(log, sections + i);
> +
> +	/* If debug size > 1MB then bump default crash size to keep the same units */
> +	if ((log->sizes[GUC_LOG_SECTIONS_DEBUG].bytes >= SZ_1M) &&
> +	    (CRASH_BUFFER_SIZE < SZ_1M))
> +		log->sizes[GUC_LOG_SECTIONS_CRASH].bytes = SZ_1M;
> +
> +	/* Prepare the GuC API structure fields: */
> +	for (i = 0; i < GUC_LOG_SECTIONS_LIMIT; i++) {
> +		/* Convert to correct units */
> +		if ((log->sizes[i].bytes % SZ_1M) == 0) {
> +			log->sizes[i].units = SZ_1M;
> +			log->sizes[i].flag = sections[i].flag;
> +		} else {
> +			log->sizes[i].units = SZ_4K;
> +			log->sizes[i].flag = 0;
> +		}
> +
> +		if (!IS_ALIGNED(log->sizes[i].bytes, log->sizes[i].units))
> +			drm_err(&xe->drm, "Mis-aligned GuC log %s size: 0x%X vs 0x%X!",
> +				sections[i].name, log->sizes[i].bytes,
> +				log->sizes[i].units);
> +		log->sizes[i].count = log->sizes[i].bytes / log->sizes[i].units;
> +
> +		if (!log->sizes[i].count) {
> +			drm_err(&xe->drm, "Zero GuC log %s size!",
> +				sections[i].name);
> +		} else {
> +			/* Size is +1 unit */
> +			log->sizes[i].count--;
> +		}
> +
> +		/* Clip to field size */
> +		if (log->sizes[i].count > sections[i].max) {
> +			drm_err(&xe->drm, "GuC log %s size too large: %d vs %d!",
> +				sections[i].name, log->sizes[i].count + 1,
> +				sections[i].max + 1);
> +			log->sizes[i].count = sections[i].max;
> +		}
> +	}
> +
> +	if (log->sizes[GUC_LOG_SECTIONS_CRASH].units !=
> +	    log->sizes[GUC_LOG_SECTIONS_DEBUG].units) {
> +		drm_err(&xe->drm, "Unit mis-match for GuC log crash and debug sections: %d vs %d!",
> +			log->sizes[GUC_LOG_SECTIONS_CRASH].units,
> +			log->sizes[GUC_LOG_SECTIONS_DEBUG].units);
> +		log->sizes[GUC_LOG_SECTIONS_CRASH].units =
> +			log->sizes[GUC_LOG_SECTIONS_DEBUG].units;
> +		log->sizes[GUC_LOG_SECTIONS_CRASH].count = 0;
> +	}
> +
> +	log->sizes_initialised = true;
> +}
> +
> +static void guc_log_init_sizes(struct xe_guc_log *log)
> +{
> +	if (log->sizes_initialised)
> +		return;
> +
> +	_guc_log_init_sizes(log);
> +}
> +
> +static u32 xe_guc_log_section_size_crash(struct xe_guc_log *log)
> +{
> +	guc_log_init_sizes(log);
> +
> +	return log->sizes[GUC_LOG_SECTIONS_CRASH].bytes;
> +}
> +
> +static u32 xe_guc_log_section_size_debug(struct xe_guc_log *log)
> +{
> +	guc_log_init_sizes(log);
> +
> +	return log->sizes[GUC_LOG_SECTIONS_DEBUG].bytes;
> +}
> +
> +u32 xe_guc_log_section_size_capture(struct xe_guc_log *log)
> +{
> +	guc_log_init_sizes(log);
> +
> +	return log->sizes[GUC_LOG_SECTIONS_CAPTURE].bytes;
> +}
> +
> +static size_t guc_log_size(struct xe_guc_log *log)
>  {
>  	/*
>  	 *  GuC Log buffer Layout
> @@ -45,8 +178,10 @@ static size_t guc_log_size(void)
>  	 *  |         Capture logs          |
>  	 *  +===============================+ + CAPTURE_SIZE
>  	 */
> -	return PAGE_SIZE + CRASH_BUFFER_SIZE + DEBUG_BUFFER_SIZE +
> -		CAPTURE_BUFFER_SIZE;
> +	return PAGE_SIZE +
> +		xe_guc_log_section_size_crash(log) +
> +		xe_guc_log_section_size_debug(log) +
> +		xe_guc_log_section_size_capture(log);
>  }
>  
>  void xe_guc_log_print(struct xe_guc_log *log, struct drm_printer *p)
> @@ -91,14 +226,14 @@ int xe_guc_log_init(struct xe_guc_log *log)
>  	struct xe_bo *bo;
>  	int err;
>  
> -	bo = xe_bo_create_pin_map(xe, gt, NULL, guc_log_size(),
> +	bo = xe_bo_create_pin_map(xe, gt, NULL, guc_log_size(log),
>  				  ttm_bo_type_kernel,
>  				  XE_BO_CREATE_VRAM_IF_DGFX(gt) |
>  				  XE_BO_CREATE_GGTT_BIT);
>  	if (IS_ERR(bo))
>  		return PTR_ERR(bo);
>  
> -	xe_map_memset(xe, &bo->vmap, 0, 0, guc_log_size());
> +	xe_map_memset(xe, &bo->vmap, 0, 0, guc_log_size(log));
>  	log->bo = bo;
>  	log->level = xe_guc_log_level;
>  
> diff --git a/drivers/gpu/drm/xe/xe_guc_log.h b/drivers/gpu/drm/xe/xe_guc_log.h
> index 2d25ab28b4b3..52dc264ca540 100644
> --- a/drivers/gpu/drm/xe/xe_guc_log.h
> +++ b/drivers/gpu/drm/xe/xe_guc_log.h
> @@ -10,15 +10,6 @@
>  
>  struct drm_printer;
>  
> -#if IS_ENABLED(CONFIG_DRM_XE_LARGE_GUC_BUFFER)
> -#define CRASH_BUFFER_SIZE       SZ_1M
> -#define DEBUG_BUFFER_SIZE       SZ_8M
> -#define CAPTURE_BUFFER_SIZE     SZ_2M
> -#else
> -#define CRASH_BUFFER_SIZE	SZ_8K
> -#define DEBUG_BUFFER_SIZE	SZ_64K
> -#define CAPTURE_BUFFER_SIZE	SZ_16K
> -#endif
>  /*
>   * While we're using plain log level in i915, GuC controls are much more...
>   * "elaborate"? We have a couple of bits for verbosity, separate bit for actual
> diff --git a/drivers/gpu/drm/xe/xe_guc_log_types.h b/drivers/gpu/drm/xe/xe_guc_log_types.h
> index 125080d138a7..8807d7bc6ec9 100644
> --- a/drivers/gpu/drm/xe/xe_guc_log_types.h
> +++ b/drivers/gpu/drm/xe/xe_guc_log_types.h
> @@ -10,6 +10,13 @@
>  
>  struct xe_bo;
>  
> +enum {
> +	GUC_LOG_SECTIONS_CRASH,
> +	GUC_LOG_SECTIONS_DEBUG,
> +	GUC_LOG_SECTIONS_CAPTURE,
> +	GUC_LOG_SECTIONS_LIMIT
> +};
> +
>  /**
>   * struct xe_guc_log - GuC log
>   */
> @@ -18,6 +25,19 @@ struct xe_guc_log {
>  	u32 level;
>  	/** @bo: XE BO for GuC log */
>  	struct xe_bo *bo;
> +	/** @sizes: Allocation settings */
> +	struct {
> +		/** @bytes: Size in bytes */
> +		s32 bytes;
> +		/** @units: GuC API units - 1MB or 4KB */
> +		s32 units;
> +		/** @count: Number of API units */
> +		s32 count;
> +		/** @flag: GuC API units flag */
> +		u32 flag;
> +	} sizes[GUC_LOG_SECTIONS_LIMIT];
> +	/** @sizes_initialised: sizes of log initialized */
> +	bool sizes_initialised;
>  };
>  
>  #endif
> -- 
> 2.34.1
> 


More information about the Intel-xe mailing list