[Intel-gfx] [PATCH 2/7] drm/i915/guc: Fix capture size warning and bump the size
Teres Alexis, Alan Previn
alan.previn.teres.alexis at intel.com
Tue Aug 2 17:46:09 UTC 2022
Straight forward change - LGTM.
Reviewed-by: Alan Previn <alan.previn.teres.alexis at intel.com>
On Wed, 2022-07-27 at 19:20 -0700, John.C.Harrison at Intel.com wrote:
> From: John Harrison <John.C.Harrison at Intel.com>
>
> There was a size check to warn if the GuC error state capture buffer
> allocation would be too small to fit a reasonable amount of capture
> data for the current platform. Unfortunately, the test was done too
> early in the boot sequence and was actually testing 'if(-ENODEV >
> size)'.
>
> Move the check to be later. The check is only used to print a warning
> message, so it doesn't really matter how early or late it is done.
> Note that it is not possible to dynamically size the buffer because
> the allocation needs to be done before the engine information is
> available (at least, it would be in the intended two-phase GuC init
> process).
>
> Now that the check works, it is reporting size too small for newer
> platforms. The check includes a 3x oversample multiplier to allow for
> multiple error captures to be bufferd by GuC before i915 has a chance
> to read them out. This is less important than simply being big enough
> to fit the first capture.
>
> So a) bump the default size to be large enough for one capture minimum
> and b) make the warning only if one capture won't fit, instead use a
> notice for the 3x size.
>
> Note that the size estimate is a worst case scenario. Actual captures
> will likely be smaller.
>
> Lastly, use drm_warn istead of DRM_WARN as the former provides more
> infmration and the latter is deprecated.
>
> Signed-off-by: John Harrison <John.C.Harrison at Intel.com>
> ---
> .../gpu/drm/i915/gt/uc/intel_guc_capture.c | 40 ++++++++++++++-----
> .../gpu/drm/i915/gt/uc/intel_guc_capture.h | 1 -
> drivers/gpu/drm/i915/gt/uc/intel_guc_log.c | 4 --
> drivers/gpu/drm/i915/gt/uc/intel_guc_log.h | 4 +-
> 4 files changed, 31 insertions(+), 18 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c
> index 75257bd20ff01..b54b7883320b1 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c
> @@ -600,10 +600,8 @@ intel_guc_capture_getnullheader(struct intel_guc *guc,
> return 0;
> }
>
> -#define GUC_CAPTURE_OVERBUFFER_MULTIPLIER 3
> -
> -int
> -intel_guc_capture_output_min_size_est(struct intel_guc *guc)
> +static int
> +guc_capture_output_min_size_est(struct intel_guc *guc)
> {
> struct intel_gt *gt = guc_to_gt(guc);
> struct intel_engine_cs *engine;
> @@ -623,13 +621,8 @@ intel_guc_capture_output_min_size_est(struct intel_guc *guc)
> * For each engine instance, there would be 1 x guc_state_capture_group_t output
> * followed by 3 x guc_state_capture_t lists. The latter is how the register
> * dumps are split across different register types (where the '3' are global vs class
> - * vs instance). Finally, let's multiply the whole thing by 3x (just so we are
> - * not limited to just 1 round of data in a worst case full register dump log)
> - *
> - * NOTE: intel_guc_log that allocates the log buffer would round this size up to
> - * a power of two.
> + * vs instance).
> */
> -
> for_each_engine(engine, gt, id) {
> worst_min_size += sizeof(struct guc_state_capture_group_header_t) +
> (3 * sizeof(struct guc_state_capture_header_t));
> @@ -649,7 +642,30 @@ intel_guc_capture_output_min_size_est(struct intel_guc *guc)
>
> worst_min_size += (num_regs * sizeof(struct guc_mmio_reg));
>
> - return (worst_min_size * GUC_CAPTURE_OVERBUFFER_MULTIPLIER);
> + return worst_min_size;
> +}
> +
> +/*
> + * Add on a 3x multiplier to allow for multiple back-to-back captures occurring
> + * before the i915 can read the data out and process it
> + */
> +#define GUC_CAPTURE_OVERBUFFER_MULTIPLIER 3
> +
> +static void check_guc_capture_size(struct intel_guc *guc)
> +{
> + struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
> + int min_size = guc_capture_output_min_size_est(guc);
> + int spare_size = min_size * GUC_CAPTURE_OVERBUFFER_MULTIPLIER;
> +
> + if (min_size < 0)
> + drm_warn(&i915->drm, "Failed to calculate GuC error state capture buffer minimum size: %d!\n",
> + min_size);
> + else if (min_size > CAPTURE_BUFFER_SIZE)
> + drm_warn(&i915->drm, "GuC error state capture buffer is too small: %d < %d\n",
> + CAPTURE_BUFFER_SIZE, min_size);
> + else if (spare_size > CAPTURE_BUFFER_SIZE)
> + drm_notice(&i915->drm, "GuC error state capture buffer maybe too small: %d < %d (min = %d)\n",
> + CAPTURE_BUFFER_SIZE, spare_size, min_size);
> }
>
> /*
> @@ -1580,5 +1596,7 @@ int intel_guc_capture_init(struct intel_guc *guc)
> INIT_LIST_HEAD(&guc->capture->outlist);
> INIT_LIST_HEAD(&guc->capture->cachelist);
>
> + check_guc_capture_size(guc);
> +
> return 0;
> }
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h
> index d3d7bd0b6db64..fbd3713c7832d 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h
> @@ -21,7 +21,6 @@ int intel_guc_capture_print_engine_node(struct drm_i915_error_state_buf *m,
> void intel_guc_capture_get_matching_node(struct intel_gt *gt, struct intel_engine_coredump *ee,
> struct intel_context *ce);
> void intel_guc_capture_process(struct intel_guc *guc);
> -int intel_guc_capture_output_min_size_est(struct intel_guc *guc);
> int intel_guc_capture_getlist(struct intel_guc *guc, u32 owner, u32 type, u32 classid,
> void **outptr);
> int intel_guc_capture_getlistsize(struct intel_guc *guc, u32 owner, u32 type, u32 classid,
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
> index 492bbf419d4df..991d4a02248dc 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
> @@ -487,10 +487,6 @@ int intel_guc_log_create(struct intel_guc_log *log)
>
> GEM_BUG_ON(log->vma);
>
> - if (intel_guc_capture_output_min_size_est(guc) > CAPTURE_BUFFER_SIZE)
> - DRM_WARN("GuC log buffer for state_capture maybe too small. %d < %d\n",
> - CAPTURE_BUFFER_SIZE, intel_guc_capture_output_min_size_est(guc));
> -
> guc_log_size = intel_guc_log_size(log);
>
> vma = intel_guc_allocate_vma(guc, guc_log_size);
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h
> index 18007e639be99..dc9715411d626 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h
> @@ -22,11 +22,11 @@ struct intel_guc;
> #elif defined(CONFIG_DRM_I915_DEBUG_GEM)
> #define CRASH_BUFFER_SIZE SZ_1M
> #define DEBUG_BUFFER_SIZE SZ_2M
> -#define CAPTURE_BUFFER_SIZE SZ_1M
> +#define CAPTURE_BUFFER_SIZE SZ_4M
> #else
> #define CRASH_BUFFER_SIZE SZ_8K
> #define DEBUG_BUFFER_SIZE SZ_64K
> -#define CAPTURE_BUFFER_SIZE SZ_16K
> +#define CAPTURE_BUFFER_SIZE SZ_2M
> #endif
>
> /*
> --
> 2.37.1
>
More information about the Intel-gfx
mailing list