[Intel-xe] [PATCH] drm/xe: Fix driver load with CONFIG_DRM_XE_LARGE_GUC_BUFFER
Matthew Brost
matthew.brost at intel.com
Mon Apr 3 04:33:08 UTC 2023
On Sun, Apr 02, 2023 at 12:09:30AM -0700, Matthew Brost wrote:
> While debugging [1] I noticed the GuC log was corrupt. I believe our
> logic to program the GuC log size was incorrect resulting driver load
> failures. Shameless copy from to fix this. A follow up will wire the GuC
> log sizes to modparams.
>
> [1] https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/232
>
Everyone ignore this patch, mistakenly thought this patch fixed something.
> Signed-off-by: Matthew Brost <matthew.brost at intel.com>
> ---
> drivers/gpu/drm/xe/xe_guc.c | 47 ++-------
> drivers/gpu/drm/xe/xe_guc_log.c | 145 +++++++++++++++++++++++++-
> drivers/gpu/drm/xe/xe_guc_log.h | 9 --
> drivers/gpu/drm/xe/xe_guc_log_types.h | 20 ++++
> 4 files changed, 167 insertions(+), 54 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
> index 58b9841616e4..1694fdc3a7fa 100644
> --- a/drivers/gpu/drm/xe/xe_guc.c
> +++ b/drivers/gpu/drm/xe/xe_guc.c
> @@ -69,54 +69,21 @@ static u32 guc_ctl_feature_flags(struct xe_guc *guc)
>
> static u32 guc_ctl_log_params_flags(struct xe_guc *guc)
> {
> + struct xe_guc_log *log = &guc->log;
> u32 offset = guc_bo_ggtt_addr(guc, guc->log.bo) >> PAGE_SHIFT;
> u32 flags;
>
> - #if (((CRASH_BUFFER_SIZE) % SZ_1M) == 0)
> - #define LOG_UNIT SZ_1M
> - #define LOG_FLAG GUC_LOG_LOG_ALLOC_UNITS
> - #else
> - #define LOG_UNIT SZ_4K
> - #define LOG_FLAG 0
> - #endif
> -
> - #if (((CAPTURE_BUFFER_SIZE) % SZ_1M) == 0)
> - #define CAPTURE_UNIT SZ_1M
> - #define CAPTURE_FLAG GUC_LOG_CAPTURE_ALLOC_UNITS
> - #else
> - #define CAPTURE_UNIT SZ_4K
> - #define CAPTURE_FLAG 0
> - #endif
> -
> - BUILD_BUG_ON(!CRASH_BUFFER_SIZE);
> - BUILD_BUG_ON(!IS_ALIGNED(CRASH_BUFFER_SIZE, LOG_UNIT));
> - BUILD_BUG_ON(!DEBUG_BUFFER_SIZE);
> - BUILD_BUG_ON(!IS_ALIGNED(DEBUG_BUFFER_SIZE, LOG_UNIT));
> - BUILD_BUG_ON(!CAPTURE_BUFFER_SIZE);
> - BUILD_BUG_ON(!IS_ALIGNED(CAPTURE_BUFFER_SIZE, CAPTURE_UNIT));
> -
> - BUILD_BUG_ON((CRASH_BUFFER_SIZE / LOG_UNIT - 1) >
> - (GUC_LOG_CRASH_MASK >> GUC_LOG_CRASH_SHIFT));
> - BUILD_BUG_ON((DEBUG_BUFFER_SIZE / LOG_UNIT - 1) >
> - (GUC_LOG_DEBUG_MASK >> GUC_LOG_DEBUG_SHIFT));
> - BUILD_BUG_ON((CAPTURE_BUFFER_SIZE / CAPTURE_UNIT - 1) >
> - (GUC_LOG_CAPTURE_MASK >> GUC_LOG_CAPTURE_SHIFT));
> + XE_BUG_ON(!log->sizes_initialised);
>
> flags = GUC_LOG_VALID |
> GUC_LOG_NOTIFY_ON_HALF_FULL |
> - CAPTURE_FLAG |
> - LOG_FLAG |
> - ((CRASH_BUFFER_SIZE / LOG_UNIT - 1) << GUC_LOG_CRASH_SHIFT) |
> - ((DEBUG_BUFFER_SIZE / LOG_UNIT - 1) << GUC_LOG_DEBUG_SHIFT) |
> - ((CAPTURE_BUFFER_SIZE / CAPTURE_UNIT - 1) <<
> - GUC_LOG_CAPTURE_SHIFT) |
> + log->sizes[GUC_LOG_SECTIONS_DEBUG].flag |
> + log->sizes[GUC_LOG_SECTIONS_CAPTURE].flag |
> + (log->sizes[GUC_LOG_SECTIONS_CRASH].count << GUC_LOG_CRASH_SHIFT) |
> + (log->sizes[GUC_LOG_SECTIONS_DEBUG].count << GUC_LOG_DEBUG_SHIFT) |
> + (log->sizes[GUC_LOG_SECTIONS_CAPTURE].count << GUC_LOG_CAPTURE_SHIFT) |
> (offset << GUC_LOG_BUF_ADDR_SHIFT);
>
> - #undef LOG_UNIT
> - #undef LOG_FLAG
> - #undef CAPTURE_UNIT
> - #undef CAPTURE_FLAG
> -
> return flags;
> }
>
> diff --git a/drivers/gpu/drm/xe/xe_guc_log.c b/drivers/gpu/drm/xe/xe_guc_log.c
> index 9a7b5d5906c1..f171b997b4cb 100644
> --- a/drivers/gpu/drm/xe/xe_guc_log.c
> +++ b/drivers/gpu/drm/xe/xe_guc_log.c
> @@ -12,6 +12,16 @@
> #include "xe_map.h"
> #include "xe_module.h"
>
> +#if IS_ENABLED(CONFIG_DRM_XE_LARGE_GUC_BUFFER)
> +#define CRASH_BUFFER_SIZE SZ_1M
> +#define DEBUG_BUFFER_SIZE SZ_8M
> +#define CAPTURE_BUFFER_SIZE SZ_2M
> +#else
> +#define CRASH_BUFFER_SIZE SZ_8K
> +#define DEBUG_BUFFER_SIZE SZ_64K
> +#define CAPTURE_BUFFER_SIZE SZ_16K
> +#endif
> +
> static struct xe_gt *
> log_to_gt(struct xe_guc_log *log)
> {
> @@ -24,7 +34,130 @@ log_to_xe(struct xe_guc_log *log)
> return gt_to_xe(log_to_gt(log));
> }
>
> -static size_t guc_log_size(void)
> +struct guc_log_section {
> + u32 max;
> + u32 flag;
> + u32 default_val;
> + const char *name;
> +};
> +
> +static s32 scale_log_param(struct xe_guc_log *log,
> + const struct guc_log_section *section)
> +{
> + /* XXX: Add modparams */
> + return section->default_val;
> +}
> +
> +static void _guc_log_init_sizes(struct xe_guc_log *log)
> +{
> + struct xe_device *xe = log_to_xe(log);
> + static const struct guc_log_section sections[GUC_LOG_SECTIONS_LIMIT] = {
> + {
> + GUC_LOG_CRASH_MASK >> GUC_LOG_CRASH_SHIFT,
> + GUC_LOG_LOG_ALLOC_UNITS,
> + CRASH_BUFFER_SIZE,
> + "crash dump"
> + },
> + {
> + GUC_LOG_DEBUG_MASK >> GUC_LOG_DEBUG_SHIFT,
> + GUC_LOG_LOG_ALLOC_UNITS,
> + DEBUG_BUFFER_SIZE,
> + "debug",
> + },
> + {
> + GUC_LOG_CAPTURE_MASK >> GUC_LOG_CAPTURE_SHIFT,
> + GUC_LOG_CAPTURE_ALLOC_UNITS,
> + CAPTURE_BUFFER_SIZE,
> + "capture",
> + }
> + };
> + int i;
> +
> + for (i = 0; i < GUC_LOG_SECTIONS_LIMIT; i++)
> + log->sizes[i].bytes = scale_log_param(log, sections + i);
> +
> + /* If debug size > 1MB then bump default crash size to keep the same units */
> + if ((log->sizes[GUC_LOG_SECTIONS_DEBUG].bytes >= SZ_1M) &&
> + (CRASH_BUFFER_SIZE < SZ_1M))
> + log->sizes[GUC_LOG_SECTIONS_CRASH].bytes = SZ_1M;
> +
> + /* Prepare the GuC API structure fields: */
> + for (i = 0; i < GUC_LOG_SECTIONS_LIMIT; i++) {
> + /* Convert to correct units */
> + if ((log->sizes[i].bytes % SZ_1M) == 0) {
> + log->sizes[i].units = SZ_1M;
> + log->sizes[i].flag = sections[i].flag;
> + } else {
> + log->sizes[i].units = SZ_4K;
> + log->sizes[i].flag = 0;
> + }
> +
> + if (!IS_ALIGNED(log->sizes[i].bytes, log->sizes[i].units))
> + drm_err(&xe->drm, "Mis-aligned GuC log %s size: 0x%X vs 0x%X!",
> + sections[i].name, log->sizes[i].bytes,
> + log->sizes[i].units);
> + log->sizes[i].count = log->sizes[i].bytes / log->sizes[i].units;
> +
> + if (!log->sizes[i].count) {
> + drm_err(&xe->drm, "Zero GuC log %s size!",
> + sections[i].name);
> + } else {
> + /* Size is +1 unit */
> + log->sizes[i].count--;
> + }
> +
> + /* Clip to field size */
> + if (log->sizes[i].count > sections[i].max) {
> + drm_err(&xe->drm, "GuC log %s size too large: %d vs %d!",
> + sections[i].name, log->sizes[i].count + 1,
> + sections[i].max + 1);
> + log->sizes[i].count = sections[i].max;
> + }
> + }
> +
> + if (log->sizes[GUC_LOG_SECTIONS_CRASH].units !=
> + log->sizes[GUC_LOG_SECTIONS_DEBUG].units) {
> + drm_err(&xe->drm, "Unit mis-match for GuC log crash and debug sections: %d vs %d!",
> + log->sizes[GUC_LOG_SECTIONS_CRASH].units,
> + log->sizes[GUC_LOG_SECTIONS_DEBUG].units);
> + log->sizes[GUC_LOG_SECTIONS_CRASH].units =
> + log->sizes[GUC_LOG_SECTIONS_DEBUG].units;
> + log->sizes[GUC_LOG_SECTIONS_CRASH].count = 0;
> + }
> +
> + log->sizes_initialised = true;
> +}
> +
> +static void guc_log_init_sizes(struct xe_guc_log *log)
> +{
> + if (log->sizes_initialised)
> + return;
> +
> + _guc_log_init_sizes(log);
> +}
> +
> +static u32 xe_guc_log_section_size_crash(struct xe_guc_log *log)
> +{
> + guc_log_init_sizes(log);
> +
> + return log->sizes[GUC_LOG_SECTIONS_CRASH].bytes;
> +}
> +
> +static u32 xe_guc_log_section_size_debug(struct xe_guc_log *log)
> +{
> + guc_log_init_sizes(log);
> +
> + return log->sizes[GUC_LOG_SECTIONS_DEBUG].bytes;
> +}
> +
> +u32 xe_guc_log_section_size_capture(struct xe_guc_log *log)
> +{
> + guc_log_init_sizes(log);
> +
> + return log->sizes[GUC_LOG_SECTIONS_CAPTURE].bytes;
> +}
> +
> +static size_t guc_log_size(struct xe_guc_log *log)
> {
> /*
> * GuC Log buffer Layout
> @@ -45,8 +178,10 @@ static size_t guc_log_size(void)
> * | Capture logs |
> * +===============================+ + CAPTURE_SIZE
> */
> - return PAGE_SIZE + CRASH_BUFFER_SIZE + DEBUG_BUFFER_SIZE +
> - CAPTURE_BUFFER_SIZE;
> + return PAGE_SIZE +
> + xe_guc_log_section_size_crash(log) +
> + xe_guc_log_section_size_debug(log) +
> + xe_guc_log_section_size_capture(log);
> }
>
> void xe_guc_log_print(struct xe_guc_log *log, struct drm_printer *p)
> @@ -91,14 +226,14 @@ int xe_guc_log_init(struct xe_guc_log *log)
> struct xe_bo *bo;
> int err;
>
> - bo = xe_bo_create_pin_map(xe, gt, NULL, guc_log_size(),
> + bo = xe_bo_create_pin_map(xe, gt, NULL, guc_log_size(log),
> ttm_bo_type_kernel,
> XE_BO_CREATE_VRAM_IF_DGFX(gt) |
> XE_BO_CREATE_GGTT_BIT);
> if (IS_ERR(bo))
> return PTR_ERR(bo);
>
> - xe_map_memset(xe, &bo->vmap, 0, 0, guc_log_size());
> + xe_map_memset(xe, &bo->vmap, 0, 0, guc_log_size(log));
> log->bo = bo;
> log->level = xe_guc_log_level;
>
> diff --git a/drivers/gpu/drm/xe/xe_guc_log.h b/drivers/gpu/drm/xe/xe_guc_log.h
> index 2d25ab28b4b3..52dc264ca540 100644
> --- a/drivers/gpu/drm/xe/xe_guc_log.h
> +++ b/drivers/gpu/drm/xe/xe_guc_log.h
> @@ -10,15 +10,6 @@
>
> struct drm_printer;
>
> -#if IS_ENABLED(CONFIG_DRM_XE_LARGE_GUC_BUFFER)
> -#define CRASH_BUFFER_SIZE SZ_1M
> -#define DEBUG_BUFFER_SIZE SZ_8M
> -#define CAPTURE_BUFFER_SIZE SZ_2M
> -#else
> -#define CRASH_BUFFER_SIZE SZ_8K
> -#define DEBUG_BUFFER_SIZE SZ_64K
> -#define CAPTURE_BUFFER_SIZE SZ_16K
> -#endif
> /*
> * While we're using plain log level in i915, GuC controls are much more...
> * "elaborate"? We have a couple of bits for verbosity, separate bit for actual
> diff --git a/drivers/gpu/drm/xe/xe_guc_log_types.h b/drivers/gpu/drm/xe/xe_guc_log_types.h
> index 125080d138a7..8807d7bc6ec9 100644
> --- a/drivers/gpu/drm/xe/xe_guc_log_types.h
> +++ b/drivers/gpu/drm/xe/xe_guc_log_types.h
> @@ -10,6 +10,13 @@
>
> struct xe_bo;
>
> +enum {
> + GUC_LOG_SECTIONS_CRASH,
> + GUC_LOG_SECTIONS_DEBUG,
> + GUC_LOG_SECTIONS_CAPTURE,
> + GUC_LOG_SECTIONS_LIMIT
> +};
> +
> /**
> * struct xe_guc_log - GuC log
> */
> @@ -18,6 +25,19 @@ struct xe_guc_log {
> u32 level;
> /** @bo: XE BO for GuC log */
> struct xe_bo *bo;
> + /** @sizes: Allocation settings */
> + struct {
> + /** @bytes: Size in bytes */
> + s32 bytes;
> + /** @units: GuC API units - 1MB or 4KB */
> + s32 units;
> + /** @count: Number of API units */
> + s32 count;
> + /** @flag: GuC API units flag */
> + u32 flag;
> + } sizes[GUC_LOG_SECTIONS_LIMIT];
> + /** @sizes_initialised: sizes of log initialized */
> + bool sizes_initialised;
> };
>
> #endif
> --
> 2.34.1
>
More information about the Intel-xe
mailing list