[PATCH v14 3/5] drm/xe/guc: Add capture size check in GuC log buffer
Dong, Zhanjun
zhanjun.dong at intel.com
Wed Jul 31 14:55:10 UTC 2024
See my comments below.
Regards,
Zhanjun Dong
On 2024-07-30 3:20 p.m., Teres Alexis, Alan Previn wrote:
> On Wed, 2024-07-24 at 07:51 -0700, Zhanjun Dong wrote:
>> The capture-nodes is included in GuC log buffer, add the size check
>> for capture region in the whole GuC log buffer.
>> Add capture output size check before allocating the shared buffer.
>>
> alan: I think the use of the term "capture-nodes" above is not very clear,
> assuming the term "nodes" is per the meaning in the extraction patch.
> So perhaps a tweak to above first line:
> "Capture-nodes generated by GuC are placed in the GuC capture ring
> buffer which is a sub-region of the larger Guc-Log-buffer.
> Add capture output size check ....". >
>
>> Signed-off-by: Zhanjun Dong <zhanjun.dong at intel.com>
>> ---
>> drivers/gpu/drm/xe/abi/guc_log_abi.h | 20 ++++
>> drivers/gpu/drm/xe/xe_guc_capture.c | 83 ++++++++++++++-
>> drivers/gpu/drm/xe/xe_guc_log.c | 141 +++++++++++++++++++++++++-
>> drivers/gpu/drm/xe/xe_guc_log.h | 12 ++-
>> drivers/gpu/drm/xe/xe_guc_log_types.h | 15 +++
>> 5 files changed, 268 insertions(+), 3 deletions(-)
>> create mode 100644 drivers/gpu/drm/xe/abi/guc_log_abi.h
>>
...
>> diff --git a/drivers/gpu/drm/xe/xe_guc_log.c b/drivers/gpu/drm/xe/xe_guc_log.c
>> index a37ee3419428..23512870503c 100644
>> --- a/drivers/gpu/drm/xe/xe_guc_log.c
>> +++ b/drivers/gpu/drm/xe/xe_guc_log.c
>> @@ -9,9 +9,47 @@
>>
>> #include "xe_bo.h"
>> #include "xe_gt.h"
>> +#include "xe_gt_printk.h"
>> +#include "xe_guc.h"
>> #include "xe_map.h"
>> #include "xe_module.h"
>>
>> +#define GUC_LOG_DEFAULT_CRASH_BUFFER_SIZE CRASH_BUFFER_SIZE
>> +#define GUC_LOG_DEFAULT_DEBUG_BUFFER_SIZE DEBUG_BUFFER_SIZE
>> +#define GUC_LOG_DEFAULT_CAPTURE_BUFFER_SIZE CAPTURE_BUFFER_SIZE
>> +
>> +#define GUC_LOG_CALC_UNIT(size) (((size) % SZ_1M) == 0 ? SZ_1M : SZ_4K)
>> +#define GUC_LOG_CALC_FLAG(size) (((size) % SZ_1M) == 0 ? GUC_LOG_LOG_ALLOC_UNITS : 0)
>> +
>> +/* Size is +1 unit */
>> +#define GUC_LOG_CALC_COUNT(size) ((size) / GUC_LOG_CALC_UNIT(size) - 1)
>> +
>> +static const struct xe_log_sizes_type log_sizes[GUC_LOG_BUFFER_TYPE_MAX] = {
>>
> alan: i believe you are still not getting me, apologies for not explaining
> clearer, I dont believe you need "xe_log_sizes_type" at all, i dont believe
> you need this log_sizes at all, all you need is for the function
> xe_guc_log_section_size_capture to return either CRASH_BUFFER_SIZE or
> DEBUG_BUFFER_SIZE or CAPTURE_BUFFER_SIZE based on the type. Even if you want
> a static lookup table it might only need to be as simple as:
>
> size_t guc_region_sizes[GUC_LOG_BUFFER_TYPE_MAX] = {CRASH_BUFFER_SIZE, DEBUG_BUFFER_SIZE, CAPTURE_BUFFER_SIZE};
>
> That said, you dont even need any of the #defines above for GUC_LOG_DEFAULT_*_SIZE,
> GUC_LOG_CALC_UNIT/FLAG/COUNT, GUC_LOG_COUNT. You are overbloating this patch unnecessarily
> because you keep pulling in i915 code that was used for the init time dynamic calculation
> of region sizes. None of this is requred. I dont see you even using any of other info
> besides size which is bascically CRASH/DEBUG/CAPTURE_BUFFER_SIZE
>
Thanks for provide more description. Yes, as we moved from dynamic to
static, all of those defines no longer needed. Put into a structure is
not needed. Sounds like for C compiler, even all info within the
structure is constant, the compiler still dont treat the structure
itself is a constant. Which doesn't optimized out equal to your suggestion:
size_t guc_region_sizes[GUC_LOG_BUFFER_TYPE_MAX] = {CRASH_BUFFER_SIZE,
DEBUG_BUFFER_SIZE, CAPTURE_BUFFER_SIZE};
I will remove all defines and structures and switch to your suggestion
in next rev.
>> + {
>> + GUC_LOG_DEFAULT_CRASH_BUFFER_SIZE,
>> + GUC_LOG_CALC_UNIT(GUC_LOG_DEFAULT_CRASH_BUFFER_SIZE),
>> + GUC_LOG_CALC_COUNT(GUC_LOG_DEFAULT_CRASH_BUFFER_SIZE),
>> + GUC_LOG_CALC_FLAG(GUC_LOG_DEFAULT_CRASH_BUFFER_SIZE)
>> + },
>> + {
>> + GUC_LOG_DEFAULT_DEBUG_BUFFER_SIZE,
>> + GUC_LOG_CALC_UNIT(GUC_LOG_DEFAULT_DEBUG_BUFFER_SIZE),
>> + GUC_LOG_CALC_COUNT(GUC_LOG_DEFAULT_DEBUG_BUFFER_SIZE),
>> + GUC_LOG_CALC_FLAG(GUC_LOG_DEFAULT_DEBUG_BUFFER_SIZE)
>> + },
>> + {
>> + GUC_LOG_DEFAULT_CAPTURE_BUFFER_SIZE,
>> + GUC_LOG_CALC_UNIT(GUC_LOG_DEFAULT_CAPTURE_BUFFER_SIZE),
>> + GUC_LOG_CALC_COUNT(GUC_LOG_DEFAULT_CAPTURE_BUFFER_SIZE),
>> + GUC_LOG_CALC_FLAG(GUC_LOG_DEFAULT_CAPTURE_BUFFER_SIZE)
>> + }
>> +};
>> +
>> +static inline struct xe_guc *log_to_guc(struct xe_guc_log *log)
>> +{
>> + return container_of(log, struct xe_guc, log);
>> +}
>> +
>> static struct xe_gt *
>> log_to_gt(struct xe_guc_log *log)
>> {
>> @@ -80,7 +118,8 @@ void xe_guc_log_print(struct xe_guc_log *log, struct drm_printer *p)
>> int xe_guc_log_init(struct xe_guc_log *log)
>> {
>> struct xe_device *xe = log_to_xe(log);
>> - struct xe_tile *tile = gt_to_tile(log_to_gt(log));
>> + struct xe_gt *gt = log_to_gt(log);
>> + struct xe_tile *tile = gt_to_tile(gt);
>> struct xe_bo *bo;
>>
>> bo = xe_managed_bo_create_pin_map(xe, tile, guc_log_size(),
>> @@ -96,3 +135,103 @@ int xe_guc_log_init(struct xe_guc_log *log)
>>
>> return 0;
>> }
>> +
>> +static u32 xe_guc_log_section_size_crash(struct xe_guc_log *log)
>> +{
>> + return log_sizes[GUC_LOG_BUFFER_CRASH_DUMP].bytes;
>> +}
>> +
>> +static u32 xe_guc_log_section_size_debug(struct xe_guc_log *log)
>> +{
>> + return log_sizes[GUC_LOG_BUFFER_DEBUG].bytes;
>> +}
>> +
>> +/**
>> + * xe_guc_log_section_size_capture - Get capture buffer size in log sections.
>> + * @log: The log object.
>> + *
>> + * This function will return the capture buffer size in log sections.
>> + *
>> + * Return: capture buffer size.
>> + */
>> +u32 xe_guc_log_section_size_capture(struct xe_guc_log *log)
>> +{
>> + return log_sizes[GUC_LOG_BUFFER_CAPTURE].bytes;
>> +}
>> +
>> +/**
>> + * xe_guc_check_log_buf_overflow - Check if log buffer overflowed
>> + * @log: The log object.
>> + * @type: The log buffer type
>> + * @full_cnt: The count of buffer full
>> + *
>> + * This function will check count of buffer full against previous, mismatch
>> + * indicate overflowed.
>> + * Update the sampled_overflow counter, if the 4 bit counter overflowed, add
>> + * up 16 to correct the value.
>> + *
>> + * Return: True if overflowed.
>> + */
>> +bool xe_guc_check_log_buf_overflow(struct xe_guc_log *log, enum guc_log_buffer_type type,
>> + unsigned int full_cnt)
>> +{
> alan: I dont see you using this function at all. This function along with the "stats"
> sub-structure you added into 'struct xe_guc_log" right at the bottom are both
> only used in the extraction patch so these should be in that other patch.
Sure, will do
>> + unsigned int prev_full_cnt = log->stats[type].sampled_overflow;
>> + bool overflow = false;
>> +
>> + if (full_cnt != prev_full_cnt) {
>> + overflow = true;
>> +
>> + log->stats[type].overflow = full_cnt;
>> + log->stats[type].sampled_overflow += full_cnt - prev_full_cnt;
>> +
>> + if (full_cnt < prev_full_cnt) {
>> + /* buffer_full_cnt is a 4 bit counter */
>> + log->stats[type].sampled_overflow += 16;
>> + }
>> + xe_gt_notice(log_to_gt(log), "log buffer overflow\n");
>> + }
>> +
>> + return overflow;
>> +}
>> +
>> +/**
>> + * xe_guc_get_log_buffer_size - Get log buffer size for a type.
>> + * @log: The log object.
>> + * @type: The log buffer type
>> + *
>> + * Return: buffer size.
>> + */
>> +u32 xe_guc_get_log_buffer_size(struct xe_guc_log *log, enum guc_log_buffer_type type)
>> +{
>> + switch (type) {
>> + case GUC_LOG_BUFFER_CRASH_DUMP:
>> + return xe_guc_log_section_size_crash(log);
>> + case GUC_LOG_BUFFER_DEBUG:
>> + return xe_guc_log_section_size_debug(log);
>> + case GUC_LOG_BUFFER_CAPTURE:
>> + return xe_guc_log_section_size_capture(log);
>> + }
>> + return 0;
>> +}
>> +
>> +/**
>> + * xe_guc_get_log_buffer_offset - Get offset in log buffer for a type.
>> + * @log: The log object.
>> + * @type: The log buffer type
>> + *
>> + * This function will return the offset in the log buffer for a type.
>> + * Return: buffer offset.
>> + */
>> +u32 xe_guc_get_log_buffer_offset(struct xe_guc_log *log, enum guc_log_buffer_type type)
>> +{
>> + enum guc_log_buffer_type i;
>> + u32 offset = PAGE_SIZE;/* for the log_buffer_states */
>> +
>> + for (i = GUC_LOG_BUFFER_CRASH_DUMP; i < GUC_LOG_BUFFER_TYPE_MAX; ++i) {
>> + if (i == type)
>> + break;
>> + offset += xe_guc_get_log_buffer_size(log, i);
>> + }
>> +
>> + return offset;
>> +}
>> diff --git a/drivers/gpu/drm/xe/xe_guc_log.h b/drivers/gpu/drm/xe/xe_guc_log.h
>> index 2d25ab28b4b3..1d3b123cebd3 100644
>> --- a/drivers/gpu/drm/xe/xe_guc_log.h
>> +++ b/drivers/gpu/drm/xe/xe_guc_log.h
>> @@ -7,6 +7,7 @@
>> #define _XE_GUC_LOG_H_
>>
>> #include "xe_guc_log_types.h"
>> +#include "xe_guc_types.h"
>>
>> struct drm_printer;
>>
>> @@ -17,7 +18,7 @@ struct drm_printer;
>> #else
>> #define CRASH_BUFFER_SIZE SZ_8K
>> #define DEBUG_BUFFER_SIZE SZ_64K
>> -#define CAPTURE_BUFFER_SIZE SZ_16K
>> +#define CAPTURE_BUFFER_SIZE SZ_1M
>> #endif
>> /*
>> * While we're using plain log level in i915, GuC controls are much more...
>> @@ -45,4 +46,13 @@ xe_guc_log_get_level(struct xe_guc_log *log)
>> return log->level;
>> }
>>
>> +u32 xe_guc_log_section_size_capture(struct xe_guc_log *log);
>> +
>> +bool xe_guc_check_log_buf_overflow(struct xe_guc_log *log,
>> + enum guc_log_buffer_type type,
>> + unsigned int full_cnt);
>> +
>> +u32 xe_guc_get_log_buffer_size(struct xe_guc_log *log, enum guc_log_buffer_type type);
>> +u32 xe_guc_get_log_buffer_offset(struct xe_guc_log *log, enum guc_log_buffer_type type);
>> +
>> #endif
>> diff --git a/drivers/gpu/drm/xe/xe_guc_log_types.h b/drivers/gpu/drm/xe/xe_guc_log_types.h
>> index 125080d138a7..3da7d5262601 100644
>> --- a/drivers/gpu/drm/xe/xe_guc_log_types.h
>> +++ b/drivers/gpu/drm/xe/xe_guc_log_types.h
>> @@ -7,9 +7,18 @@
>> #define _XE_GUC_LOG_TYPES_H_
>>
>> #include <linux/types.h>
>> +#include "abi/guc_log_abi.h"
>>
>> struct xe_bo;
>>
>> +/** @sizes: Allocation settings */
>> +struct xe_log_sizes_type {
>>
>> + u32 bytes; /* Size in bytes */
>> + u32 units; /* GuC API units - 1MB or 4KB */
>> + u32 count; /* Number of API units */
>> + u32 flag; /* GuC API units flag */
>> +};
> alan: as mentioned above, you dont need this structure at all.
>> +
>> /**
>> * struct xe_guc_log - GuC log
>> */
>> @@ -18,6 +27,12 @@ struct xe_guc_log {
>> u32 level;
>> /** @bo: XE BO for GuC log */
>> struct xe_bo *bo;
>> + /** @stats: logging related stats */
>> + struct {
>> + u32 sampled_overflow;
>> + u32 overflow;
>> + u32 flush;
>> + } stats[GUC_LOG_BUFFER_TYPE_MAX];
> alan: as mentioned earlier, this "stats" structure along
> with function xe_guc_check_log_buf_overflow needs to be
> in the extraction patch.
>> };
>>
>> #endif
>
More information about the Intel-xe
mailing list