[PATCH v8 09/13] drm/i915/guc: Check sizing of guc_capture output

Matthew Brost matthew.brost at intel.com
Wed Mar 9 04:05:13 UTC 2022


On Mon, Feb 28, 2022 at 09:39:28AM -0800, Alan Previn wrote:
> Add intel_guc_capture_output_min_size_est function to
> provide a reasonable minimum size for error-capture
> region before allocating the shared buffer.
> 
> Signed-off-by: Alan Previn <alan.previn.teres.alexis at intel.com>

Looks like you are missing including gt/uc/intel_guc_capture.h in
gt/uc/intel_guc_capture.c.

With that fixed:
Reviewed-by: Matthew Brost <matthew.brost at intel.com>

> ---
>  .../gpu/drm/i915/gt/uc/intel_guc_capture.c    | 48 +++++++++++++++++++
>  .../gpu/drm/i915/gt/uc/intel_guc_capture.h    |  1 +
>  drivers/gpu/drm/i915/gt/uc/intel_guc_log.c    |  7 ++-
>  3 files changed, 55 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c
> index dab5dde3a03a..500a881d552d 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c
> @@ -674,6 +674,54 @@ intel_guc_capture_getnullheader(struct intel_guc *guc,
>  	return 0;
>  }
>  
> +#define GUC_CAPTURE_OVERBUFFER_MULTIPLIER 3
> +int
> +intel_guc_capture_output_min_size_est(struct intel_guc *guc)
> +{
> +	struct intel_gt *gt = guc_to_gt(guc);
> +	struct intel_engine_cs *engine;
> +	enum intel_engine_id id;
> +	int worst_min_size = 0, num_regs = 0;
> +	size_t tmp = 0;
> +
> +	/*
> +	 * If every single engine-instance suffered a failure in quick succession but
> +	 * were all unrelated, then a burst of multiple error-capture events would dump
> +	 * registers for every one engine instance, one at a time. In this case, GuC
> +	 * would even dump the global-registers repeatedly.
> +	 *
> +	 * For each engine instance, there would be 1 x guc_state_capture_group_t output
> +	 * followed by 3 x guc_state_capture_t lists. The latter is how the register
> +	 * dumps are split across different register types (where the '3' are global vs class
> +	 * vs instance). Finally, let's multiply the whole thing by 3x (just so we are
> +	 * not limited to just 1 round of data in a worst case full register dump log)
> +	 *
> +	 * NOTE: intel_guc_log that allocates the log buffer would round this size up to
> +	 * a power of two.
> +	 */
> +
> +	for_each_engine(engine, gt, id) {
> +		worst_min_size += sizeof(struct guc_state_capture_group_header_t) +
> +				  (3 * sizeof(struct guc_state_capture_header_t));
> +
> +		if (!intel_guc_capture_getlistsize(guc, 0, GUC_CAPTURE_LIST_TYPE_GLOBAL, 0, &tmp))
> +			num_regs += tmp;
> +
> +		if (!intel_guc_capture_getlistsize(guc, 0, GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS,
> +						   engine->class, &tmp)) {
> +			num_regs += tmp;
> +		}
> +		if (!intel_guc_capture_getlistsize(guc, 0, GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE,
> +						   engine->class, &tmp)) {
> +			num_regs += tmp;
> +		}
> +	}
> +
> +	worst_min_size += (num_regs * sizeof(struct guc_mmio_reg));
> +
> +	return (worst_min_size * GUC_CAPTURE_OVERBUFFER_MULTIPLIER);
> +}
> +
>  static void
>  guc_capture_free_ads_cache(struct __guc_state_capture_priv *gc)
>  {
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h
> index 042f574e1750..bf8938e392bf 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h
> @@ -12,6 +12,7 @@ struct file;
>  struct guc_gt_system_info;
>  struct intel_guc;
>  
> +int intel_guc_capture_output_min_size_est(struct intel_guc *guc);
>  int intel_guc_capture_getlist(struct intel_guc *guc, u32 owner, u32 type, u32 classid,
>  			      struct file **fileptr);
>  int intel_guc_capture_getlistsize(struct intel_guc *guc, u32 owner, u32 type, u32 classid,
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
> index 2cc52f1eedf3..e9a865c2f4cb 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
> @@ -6,10 +6,11 @@
>  #include <linux/debugfs.h>
>  
>  #include "gt/intel_gt.h"
> +#include "intel_guc_capture.h"
> +#include "intel_guc_log.h"
>  #include "i915_drv.h"
>  #include "i915_irq.h"
>  #include "i915_memcpy.h"
> -#include "intel_guc_log.h"
>  
>  static void guc_log_copy_debuglogs_for_relay(struct intel_guc_log *log);
>  
> @@ -464,6 +465,10 @@ int intel_guc_log_create(struct intel_guc_log *log)
>  	 *  |         Capture logs          |
>  	 *  +===============================+ + CAPTURE_SIZE
>  	 */
> +	if (intel_guc_capture_output_min_size_est(guc) > CAPTURE_BUFFER_SIZE)
> +		DRM_WARN("GuC log buffer for state_capture maybe too small. %d < %d\n",
> +			 CAPTURE_BUFFER_SIZE, intel_guc_capture_output_min_size_est(guc));
> +
>  	guc_log_size = PAGE_SIZE + CRASH_BUFFER_SIZE + DEBUG_BUFFER_SIZE +
>  		       CAPTURE_BUFFER_SIZE;
>  
> -- 
> 2.25.1
> 


More information about the Intel-gfx-trybot mailing list