[PATCH v12 3/5] drm/xe/guc: Add capture size check in GuC log buffer

Dong, Zhanjun zhanjun.dong at intel.com
Tue Jul 16 20:25:50 UTC 2024



On 2024-07-15 5:40 p.m., Teres Alexis, Alan Previn wrote:
> On Fri, 2024-07-12 at 09:47 -0700, Zhanjun Dong wrote:
>> The capture-nodes is included in GuC log buffer, add the size check
>> for capture region in the whole GuC log buffer.
>> Add capture output size check before allocating the shared buffer.
>>
>> Signed-off-by: Zhanjun Dong <zhanjun.dong at intel.com>
>> ---
>>   drivers/gpu/drm/xe/abi/guc_log_abi.h  |  20 ++++
>>
> alan:snip
>> diff --git a/drivers/gpu/drm/xe/xe_guc_log.c b/drivers/gpu/drm/xe/xe_guc_log.c
>> index a37ee3419428..8fecb528b92c 100644
>> --- a/drivers/gpu/drm/xe/xe_guc_log.c
>> +++ b/drivers/gpu/drm/xe/xe_guc_log.c
>> @@ -9,9 +9,64 @@
>>   
>>   #include "xe_bo.h"
>>   #include "xe_gt.h"
>> +#include "xe_gt_printk.h"
>> +#include "xe_guc.h"
>>   #include "xe_map.h"
>>   #include "xe_module.h"
>>   
>> +#define GUC_LOG_DEFAULT_CRASH_BUFFER_SIZE      CRASH_BUFFER_SIZE
>> +#define GUC_LOG_DEFAULT_DEBUG_BUFFER_SIZE      DEBUG_BUFFER_SIZE
>> +#define GUC_LOG_DEFAULT_CAPTURE_BUFFER_SIZE    CAPTURE_BUFFER_SIZE
>> +
>> +#define GUC_LOG_CALC_UNIT(size)                (((size) % SZ_1M) == 0 ? SZ_1M : SZ_4K)
>> +#define GUC_LOG_CALC_FLAG(size)                (((size) % SZ_1M) == 0 ? GUC_LOG_LOG_ALLOC_UNITS : 0)
>> +
>> +/* Size is +1 unit */
>> +#define GUC_LOG_CALC_COUNT(size)       ((size) / GUC_LOG_CALC_UNIT(size) - 1)
>> +
>> +static const struct xe_log_sizes_type log_sizes[GUC_LOG_BUFFER_TYPE_MAX] = {
>> +       {
>> +               GUC_LOG_DEFAULT_CRASH_BUFFER_SIZE,
>> +               GUC_LOG_CALC_UNIT(GUC_LOG_DEFAULT_CRASH_BUFFER_SIZE),
>> +               GUC_LOG_CALC_COUNT(GUC_LOG_DEFAULT_CRASH_BUFFER_SIZE),
>> +               GUC_LOG_CALC_FLAG(GUC_LOG_DEFAULT_CRASH_BUFFER_SIZE)
>> +       },
>> +       {
>> +               GUC_LOG_DEFAULT_DEBUG_BUFFER_SIZE,
>> +               GUC_LOG_CALC_UNIT(GUC_LOG_DEFAULT_DEBUG_BUFFER_SIZE),
>> +               GUC_LOG_CALC_COUNT(GUC_LOG_DEFAULT_DEBUG_BUFFER_SIZE),
>> +               GUC_LOG_CALC_FLAG(GUC_LOG_DEFAULT_DEBUG_BUFFER_SIZE)
>> +       },
>> +       {
>> +               GUC_LOG_DEFAULT_CAPTURE_BUFFER_SIZE,
>> +               GUC_LOG_CALC_UNIT(GUC_LOG_DEFAULT_CAPTURE_BUFFER_SIZE),
>> +               GUC_LOG_CALC_COUNT(GUC_LOG_DEFAULT_CAPTURE_BUFFER_SIZE),
>> +               GUC_LOG_CALC_FLAG(GUC_LOG_DEFAULT_CAPTURE_BUFFER_SIZE)
>> +       }
>> +};
>> +
>> +/* Zero is not allowed for crash/debug/capture size */
>> +static_assert(GUC_LOG_DEFAULT_CRASH_BUFFER_SIZE);
>> +static_assert(GUC_LOG_DEFAULT_DEBUG_BUFFER_SIZE);
>> +static_assert(GUC_LOG_DEFAULT_CAPTURE_BUFFER_SIZE);
>> +
>> +/* crash unit must equal to debug unit */
>> +static_assert(GUC_LOG_CALC_UNIT(GUC_LOG_DEFAULT_CRASH_BUFFER_SIZE) ==
>> +             GUC_LOG_CALC_UNIT(GUC_LOG_DEFAULT_DEBUG_BUFFER_SIZE));
>> +
>> +/* crash/debug/capture count must less than mask range */
>> +static_assert(GUC_LOG_CALC_COUNT(GUC_LOG_DEFAULT_CRASH_BUFFER_SIZE) <=
>> +             GUC_LOG_CRASH_MASK >> GUC_LOG_CRASH_SHIFT);
>> +static_assert(GUC_LOG_CALC_COUNT(GUC_LOG_DEFAULT_DEBUG_BUFFER_SIZE) <=
>> +             GUC_LOG_DEBUG_MASK >> GUC_LOG_DEBUG_SHIFT);
>> +static_assert(GUC_LOG_CALC_COUNT(GUC_LOG_DEFAULT_CAPTURE_BUFFER_SIZE) <=
>> +             GUC_LOG_CAPTURE_MASK >> GUC_LOG_CAPTURE_SHIFT);
>> +
> alan: as per offline conversation and in line with my review comments from v11,
> you don't need to add any of these build time checks because guc_ctl_log_params_flags
> already does this - i just wanted you to check that existing code is still
> good for the increased capture region size.
Right, this is duplicated check, will be removed on next rev.

> 
> 
>> +static inline struct xe_guc *log_to_guc(struct xe_guc_log *log)
>> +{
>> +       return container_of(log, struct xe_guc, log);
>> +}
>> +
>>   static struct xe_gt *
>>   log_to_gt(struct xe_guc_log *log)
>>   {
>> @@ -80,7 +135,8 @@ void xe_guc_log_print(struct xe_guc_log *log, struct drm_printer *p)
>>   int xe_guc_log_init(struct xe_guc_log *log)
>>   {
>>          struct xe_device *xe = log_to_xe(log);
>> -       struct xe_tile *tile = gt_to_tile(log_to_gt(log));
>> +       struct xe_gt *gt = log_to_gt(log);
>> +       struct xe_tile *tile = gt_to_tile(gt);
>>          struct xe_bo *bo;
>>   
>>          bo = xe_managed_bo_create_pin_map(xe, tile, guc_log_size(),
>> @@ -96,3 +152,86 @@ int xe_guc_log_init(struct xe_guc_log *log)
>>   
>>          return 0;
>>   }
>> +
>> +/**
>> + * xe_guc_log_section_size_capture - Get capture buffer size in log sections.
>> + * @log: The log object.
>> + *
>> + * This function will return the capture buffer size in log sections.
>> + *
>> + * Return: capture buffer size.
>> + */
>> +u32 xe_guc_log_section_size_capture(struct xe_guc_log *log)
>> +{
>> +       return log_sizes[GUC_LOG_BUFFER_CAPTURE].bytes;
>> +}
>> +
>> +/**
>> + * xe_guc_check_log_buf_overflow - Check if log buffer overflowed
>> + * @log: The log object.
>> + * @type: The log buffer type
>> + * @full_cnt: The count of buffer full
>> + *
>> + * This function will check count of buffer full against previous, mismatch
>> + * indicate overflowed.
>> + * Update the sampled_overflow counter, if the 4 bit counter overflowed, add
>> + * up 16 to correct the value.
>> + *
>> + * Return: True if overflowed.
>> + */
>> +bool xe_guc_check_log_buf_overflow(struct xe_guc_log *log, enum guc_log_buffer_type type,
>> +                                  unsigned int full_cnt)
>> +{
>> +       unsigned int prev_full_cnt = log->stats[type].sampled_overflow;
>> +       bool overflow = false;
>> +
>> +       if (full_cnt != prev_full_cnt) {
>> +               overflow = true;
>> +
>> +               log->stats[type].overflow = full_cnt;
>> +               log->stats[type].sampled_overflow += full_cnt - prev_full_cnt;
>> +
>> +               if (full_cnt < prev_full_cnt) {
>> +                       /* buffer_full_cnt is a 4 bit counter */
>> +                       log->stats[type].sampled_overflow += 16;
>> +               }
>> +               xe_gt_notice(log_to_gt(log), "log buffer overflow\n");
>> +       }
>> +
>> +       return overflow;
>> +}
>> +
>> +/**
>> + * xe_guc_get_log_buffer_size - Get log buffer size for a type.
>> + * @log: The log object.
>> + * @type: The log buffer type
>> + *
>> + * Return: buffer size.
>> + */
>> +u32 xe_guc_get_log_buffer_size(struct xe_guc_log *log, enum guc_log_buffer_type type)
>> +{
>> +       xe_gt_assert(log_to_gt(log), type < GUC_LOG_BUFFER_TYPE_MAX);
>> +       return log_sizes[type].bytes;
>> +}
>> +
>> +/**
>> + * xe_guc_get_log_buffer_offset - Get offset in log buffer for a type.
>> + * @log: The log object.
>> + * @type: The log buffer type
>> + *
>> + * This function will return the offset in the log buffer for a type.
>> + * Return: buffer offset.
>> + */
>> +u32 xe_guc_get_log_buffer_offset(struct xe_guc_log *log, enum guc_log_buffer_type type)
>> +{
>> +       enum guc_log_buffer_type i;
>> +       u32 offset = PAGE_SIZE;/* for the log_buffer_states */
>> +
>> +       for (i = GUC_LOG_BUFFER_CRASH_DUMP; i < GUC_LOG_BUFFER_TYPE_MAX; ++i) {
>> +               if (i == type)
>> +                       break;
>> +               offset += xe_guc_get_log_buffer_size(log, i);
>> +       }
>> +
>> +       return offset;
>> +}
>> diff --git a/drivers/gpu/drm/xe/xe_guc_log.h b/drivers/gpu/drm/xe/xe_guc_log.h
>> index 2d25ab28b4b3..1d3b123cebd3 100644
>> --- a/drivers/gpu/drm/xe/xe_guc_log.h
>> +++ b/drivers/gpu/drm/xe/xe_guc_log.h
>> @@ -7,6 +7,7 @@
>>   #define _XE_GUC_LOG_H_
>>   
>>   #include "xe_guc_log_types.h"
>> +#include "xe_guc_types.h"
>>   
>>   struct drm_printer;
>>   
>> @@ -17,7 +18,7 @@ struct drm_printer;
>>   #else
>>   #define CRASH_BUFFER_SIZE      SZ_8K
>>   #define DEBUG_BUFFER_SIZE      SZ_64K
>> -#define CAPTURE_BUFFER_SIZE    SZ_16K
>> +#define CAPTURE_BUFFER_SIZE    SZ_1M
>>   #endif
>>   /*
>>    * While we're using plain log level in i915, GuC controls are much more...
>> @@ -45,4 +46,13 @@ xe_guc_log_get_level(struct xe_guc_log *log)
>>          return log->level;
>>   }
>>   
>> +u32 xe_guc_log_section_size_capture(struct xe_guc_log *log);
>> +
>> +bool xe_guc_check_log_buf_overflow(struct xe_guc_log *log,
>> +                                  enum guc_log_buffer_type type,
>> +                                  unsigned int full_cnt);
>> +
>> +u32 xe_guc_get_log_buffer_size(struct xe_guc_log *log, enum guc_log_buffer_type type);
>> +u32 xe_guc_get_log_buffer_offset(struct xe_guc_log *log, enum guc_log_buffer_type type);
>> +
>>   #endif
>> diff --git a/drivers/gpu/drm/xe/xe_guc_log_types.h b/drivers/gpu/drm/xe/xe_guc_log_types.h
>> index 125080d138a7..e0122aa99aa1 100644
>> --- a/drivers/gpu/drm/xe/xe_guc_log_types.h
>> +++ b/drivers/gpu/drm/xe/xe_guc_log_types.h
>> @@ -7,9 +7,18 @@
>>   #define _XE_GUC_LOG_TYPES_H_
>>   
>>   #include <linux/types.h>
>> +#include "abi/guc_log_abi.h"
>>   
>>   struct xe_bo;
>>   
>> +/** @sizes: Allocation settings */
>> +struct xe_log_sizes_type {
>> +       u32 bytes;      /* Size in bytes */
>> +       u32 units;      /* GuC API units - 1MB or 4KB */
>> +       u32 count;      /* Number of API units */
>> +       u32 flag;       /* GuC API units flag */
>> +};
>> +
>>   /**
>>    * struct xe_guc_log - GuC log
>>    */
>> @@ -18,6 +27,14 @@ struct xe_guc_log {
>>          u32 level;
>>          /** @bo: XE BO for GuC log */
>>          struct xe_bo *bo;
>> +       /** @sizes: Allocation settings */
>> +       struct xe_log_sizes_type sizes[GUC_LOG_BUFFER_TYPE_MAX];
>> +       /** @stats: logging related stats */
>> +       struct {
>> +               u32 sampled_overflow;
>> +               u32 overflow;
>> +               u32 flush;
>> +       } stats[GUC_LOG_BUFFER_TYPE_MAX];
>>   };
>>   
>>   #endif
> 


More information about the Intel-xe mailing list