[Intel-gfx] [PATCH 2/7] drm/i915/guc: Fix capture size warning and bump the size

Teres Alexis, Alan Previn alan.previn.teres.alexis at intel.com
Tue Aug 2 17:46:09 UTC 2022


Straight forward change - LGTM.

Reviewed-by: Alan Previn <alan.previn.teres.alexis at intel.com>


On Wed, 2022-07-27 at 19:20 -0700, John.C.Harrison at Intel.com wrote:
> From: John Harrison <John.C.Harrison at Intel.com>
> 
> There was a size check to warn if the GuC error state capture buffer
> allocation would be too small to fit a reasonable amount of capture
> data for the current platform. Unfortunately, the test was done too
> early in the boot sequence and was actually testing 'if(-ENODEV >
> size)'.
> 
> Move the check to be later. The check is only used to print a warning
> message, so it doesn't really matter how early or late it is done.
> Note that it is not possible to dynamically size the buffer because
> the allocation needs to be done before the engine information is
> available (at least, it would be in the intended two-phase GuC init
> process).
> 
> Now that the check works, it is reporting size too small for newer
> platforms. The check includes a 3x oversample multiplier to allow for
> multiple error captures to be bufferd by GuC before i915 has a chance
> to read them out. This is less important than simply being big enough
> to fit the first capture.
> 
> So a) bump the default size to be large enough for one capture minimum
> and b) make the warning only if one capture won't fit, instead use a
> notice for the 3x size.
> 
> Note that the size estimate is a worst case scenario. Actual captures
> will likely be smaller.
> 
> Lastly, use drm_warn istead of DRM_WARN as the former provides more
> infmration and the latter is deprecated.
> 
> Signed-off-by: John Harrison <John.C.Harrison at Intel.com>
> ---
>  .../gpu/drm/i915/gt/uc/intel_guc_capture.c    | 40 ++++++++++++++-----
>  .../gpu/drm/i915/gt/uc/intel_guc_capture.h    |  1 -
>  drivers/gpu/drm/i915/gt/uc/intel_guc_log.c    |  4 --
>  drivers/gpu/drm/i915/gt/uc/intel_guc_log.h    |  4 +-
>  4 files changed, 31 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c
> index 75257bd20ff01..b54b7883320b1 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c
> @@ -600,10 +600,8 @@ intel_guc_capture_getnullheader(struct intel_guc *guc,
>  	return 0;
>  }
>  
> -#define GUC_CAPTURE_OVERBUFFER_MULTIPLIER 3
> -
> -int
> -intel_guc_capture_output_min_size_est(struct intel_guc *guc)
> +static int
> +guc_capture_output_min_size_est(struct intel_guc *guc)
>  {
>  	struct intel_gt *gt = guc_to_gt(guc);
>  	struct intel_engine_cs *engine;
> @@ -623,13 +621,8 @@ intel_guc_capture_output_min_size_est(struct intel_guc *guc)
>  	 * For each engine instance, there would be 1 x guc_state_capture_group_t output
>  	 * followed by 3 x guc_state_capture_t lists. The latter is how the register
>  	 * dumps are split across different register types (where the '3' are global vs class
> -	 * vs instance). Finally, let's multiply the whole thing by 3x (just so we are
> -	 * not limited to just 1 round of data in a worst case full register dump log)
> -	 *
> -	 * NOTE: intel_guc_log that allocates the log buffer would round this size up to
> -	 * a power of two.
> +	 * vs instance).
>  	 */
> -
>  	for_each_engine(engine, gt, id) {
>  		worst_min_size += sizeof(struct guc_state_capture_group_header_t) +
>  					 (3 * sizeof(struct guc_state_capture_header_t));
> @@ -649,7 +642,30 @@ intel_guc_capture_output_min_size_est(struct intel_guc *guc)
>  
>  	worst_min_size += (num_regs * sizeof(struct guc_mmio_reg));
>  
> -	return (worst_min_size * GUC_CAPTURE_OVERBUFFER_MULTIPLIER);
> +	return worst_min_size;
> +}
> +
> +/*
> + * Add on a 3x multiplier to allow for multiple back-to-back captures occurring
> + * before the i915 can read the data out and process it
> + */
> +#define GUC_CAPTURE_OVERBUFFER_MULTIPLIER 3
> +
> +static void check_guc_capture_size(struct intel_guc *guc)
> +{
> +	struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
> +	int min_size = guc_capture_output_min_size_est(guc);
> +	int spare_size = min_size * GUC_CAPTURE_OVERBUFFER_MULTIPLIER;
> +
> +	if (min_size < 0)
> +		drm_warn(&i915->drm, "Failed to calculate GuC error state capture buffer minimum size: %d!\n",
> +			 min_size);
> +	else if (min_size > CAPTURE_BUFFER_SIZE)
> +		drm_warn(&i915->drm, "GuC error state capture buffer is too small: %d < %d\n",
> +			 CAPTURE_BUFFER_SIZE, min_size);
> +	else if (spare_size > CAPTURE_BUFFER_SIZE)
> +		drm_notice(&i915->drm, "GuC error state capture buffer maybe too small: %d < %d (min = %d)\n",
> +			   CAPTURE_BUFFER_SIZE, spare_size, min_size);
>  }
>  
>  /*
> @@ -1580,5 +1596,7 @@ int intel_guc_capture_init(struct intel_guc *guc)
>  	INIT_LIST_HEAD(&guc->capture->outlist);
>  	INIT_LIST_HEAD(&guc->capture->cachelist);
>  
> +	check_guc_capture_size(guc);
> +
>  	return 0;
>  }
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h
> index d3d7bd0b6db64..fbd3713c7832d 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h
> @@ -21,7 +21,6 @@ int intel_guc_capture_print_engine_node(struct drm_i915_error_state_buf *m,
>  void intel_guc_capture_get_matching_node(struct intel_gt *gt, struct intel_engine_coredump *ee,
>  					 struct intel_context *ce);
>  void intel_guc_capture_process(struct intel_guc *guc);
> -int intel_guc_capture_output_min_size_est(struct intel_guc *guc);
>  int intel_guc_capture_getlist(struct intel_guc *guc, u32 owner, u32 type, u32 classid,
>  			      void **outptr);
>  int intel_guc_capture_getlistsize(struct intel_guc *guc, u32 owner, u32 type, u32 classid,
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
> index 492bbf419d4df..991d4a02248dc 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
> @@ -487,10 +487,6 @@ int intel_guc_log_create(struct intel_guc_log *log)
>  
>  	GEM_BUG_ON(log->vma);
>  
> -	if (intel_guc_capture_output_min_size_est(guc) > CAPTURE_BUFFER_SIZE)
> -		DRM_WARN("GuC log buffer for state_capture maybe too small. %d < %d\n",
> -			 CAPTURE_BUFFER_SIZE, intel_guc_capture_output_min_size_est(guc));
> -
>  	guc_log_size = intel_guc_log_size(log);
>  
>  	vma = intel_guc_allocate_vma(guc, guc_log_size);
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h
> index 18007e639be99..dc9715411d626 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h
> @@ -22,11 +22,11 @@ struct intel_guc;
>  #elif defined(CONFIG_DRM_I915_DEBUG_GEM)
>  #define CRASH_BUFFER_SIZE	SZ_1M
>  #define DEBUG_BUFFER_SIZE	SZ_2M
> -#define CAPTURE_BUFFER_SIZE	SZ_1M
> +#define CAPTURE_BUFFER_SIZE	SZ_4M
>  #else
>  #define CRASH_BUFFER_SIZE	SZ_8K
>  #define DEBUG_BUFFER_SIZE	SZ_64K
> -#define CAPTURE_BUFFER_SIZE	SZ_16K
> +#define CAPTURE_BUFFER_SIZE	SZ_2M
>  #endif
>  
>  /*
> -- 
> 2.37.1
> 



More information about the dri-devel mailing list