[PATCH v11 3/5] drm/xe/guc: Add capture size check in GuC log buffer

Dong, Zhanjun zhanjun.dong at intel.com
Fri Jun 28 23:15:00 UTC 2024


Please see my inline comments below.

Regards,
Zhanjun Dong

On 2024-06-27 2:56 p.m., Michal Wajdeczko wrote:
> 
> 
> On 24.06.2024 23:54, Zhanjun Dong wrote:
>> The capture-nodes is included in GuC log buffer, add the size check
>> for capture region in the whole GuC log buffer.
>> Add capture output size check before allocating the shared buffer.
>>
>> Signed-off-by: Zhanjun Dong <zhanjun.dong at intel.com>
>> ---
>>   drivers/gpu/drm/xe/abi/guc_log_abi.h  |  59 ++++++++
>>   drivers/gpu/drm/xe/xe_guc_capture.c   |  81 ++++++++++
>>   drivers/gpu/drm/xe/xe_guc_log.c       | 205 ++++++++++++++++++++++++++
>>   drivers/gpu/drm/xe/xe_guc_log.h       |  17 ++-
...
>> +struct guc_log_buffer_state {
>> +	u32 marker[2];
>> +	u32 read_ptr;
>> +	u32 write_ptr;
>> +	u32 size;
>> +	u32 sampled_write_ptr;
>> +	u32 wrap_offset;
>> +	union {
>> +		struct {
>> +			u32 flush_to_file:1;
>> +			u32 buffer_full_cnt:4;
>> +			u32 reserved:27;
>> +		};
> 
> maybe instead of defining the union and bitfields, just provide GENMASK
> like it's done for other ABI definitions ?
Ok, to be changed.
> 
>> +		u32 flags;
>> +	};
>> +	u32 version;
>> +} __packed;
>> +
>> +#endif
>> diff --git a/drivers/gpu/drm/xe/xe_guc_capture.c b/drivers/gpu/drm/xe/xe_guc_capture.c
>> index 9c473aceb402..c2a08d4a6751 100644
>> --- a/drivers/gpu/drm/xe/xe_guc_capture.c
>> +++ b/drivers/gpu/drm/xe/xe_guc_capture.c
>> @@ -22,6 +22,7 @@
>>   #include "xe_gt_mcr.h"
>>   #include "xe_gt_printk.h"
>>   #include "xe_guc.h"
>> +#include "xe_guc_ads.h"
>>   #include "xe_guc_capture.h"
>>   #include "xe_guc_capture_types.h"
>>   #include "xe_guc_ct.h"
>> @@ -558,6 +559,85 @@ size_t xe_guc_capture_ads_input_worst_size(struct xe_guc *guc)
>>   	return PAGE_ALIGN(total_size);
>>   }
>>   
>> +static int
>> +guc_capture_output_size_est(struct xe_guc *guc)
> 
> nit: no need for line split
> 
>> +{
>> +	struct xe_gt *gt = guc_to_gt(guc);
>> +	struct xe_hw_engine *hwe;
>> +	enum xe_hw_engine_id id;
>> +
>> +	int capture_size = 0;
>> +	size_t tmp = 0;
...
>> diff --git a/drivers/gpu/drm/xe/xe_guc_log.c b/drivers/gpu/drm/xe/xe_guc_log.c
>> index a37ee3419428..0188bc0a2b84 100644
>> --- a/drivers/gpu/drm/xe/xe_guc_log.c
>> +++ b/drivers/gpu/drm/xe/xe_guc_log.c
>> @@ -9,9 +9,22 @@
>>   
>>   #include "xe_bo.h"
>>   #include "xe_gt.h"
>> +#include "xe_gt_printk.h"
>> +#include "xe_guc.h"
>>   #include "xe_map.h"
>>   #include "xe_module.h"
>>   
>> +#define GUC_LOG_DEFAULT_CRASH_BUFFER_SIZE	CRASH_BUFFER_SIZE
>> +#define GUC_LOG_DEFAULT_DEBUG_BUFFER_SIZE	DEBUG_BUFFER_SIZE
>> +#define GUC_LOG_DEFAULT_CAPTURE_BUFFER_SIZE	CAPTURE_BUFFER_SIZE
>> +
>> +struct guc_log_section {
>> +	u32 max;
>> +	u32 flag;
>> +	u32 default_val;
>> +	const char *name;
>> +};
>> +
>>   static struct xe_gt *
>>   log_to_gt(struct xe_guc_log *log)
>>   {
>> @@ -96,3 +109,195 @@ int xe_guc_log_init(struct xe_guc_log *log)
>>   
>>   	return 0;
>>   }
>> +
>> +static void _guc_log_init_sizes(struct xe_guc_log *log)
>> +{
>> +	struct xe_guc *guc = log_to_guc(log);
>> +	static const struct guc_log_section sections[GUC_LOG_BUFFER_TYPE_MAX] = {
> 
> you don't need to specify array size here, let the compiler figure it out
I would prefer to have GUC_LOG_BUFFER_TYPE_MAX here, to prevent 
unpurposed sections add/delete.
> 
>> +		{
>> +			GUC_LOG_CRASH_MASK >> GUC_LOG_CRASH_SHIFT,
>> +			GUC_LOG_LOG_ALLOC_UNITS,
>> +			GUC_LOG_DEFAULT_CRASH_BUFFER_SIZE,
>> +			"crash dump"
>> +		},
>> +		{
>> +			GUC_LOG_DEBUG_MASK >> GUC_LOG_DEBUG_SHIFT,
>> +			GUC_LOG_LOG_ALLOC_UNITS,
>> +			GUC_LOG_DEFAULT_DEBUG_BUFFER_SIZE,
>> +			"debug",
>> +		},
>> +		{
>> +			GUC_LOG_CAPTURE_MASK >> GUC_LOG_CAPTURE_SHIFT,
>> +			GUC_LOG_CAPTURE_ALLOC_UNITS,
>> +			GUC_LOG_DEFAULT_CAPTURE_BUFFER_SIZE,
>> +			"capture",
>> +		}
>> +	};
>> +	int i;
>> +
>> +	for (i = 0; i < GUC_LOG_BUFFER_TYPE_MAX; i++)
> 
> and here you can use ARRAY_SIZE to get rid of TYPE_MAX definition
> 
>> +		log->sizes[i].bytes = sections[i].default_val;
>> +
>> +	/* If debug size > 1MB then bump default crash size to keep the same units */
>> +	if (log->sizes[GUC_LOG_BUFFER_DEBUG].bytes >= SZ_1M &&
>> +	    GUC_LOG_DEFAULT_CRASH_BUFFER_SIZE < SZ_1M)
>> +		log->sizes[GUC_LOG_BUFFER_CRASH_DUMP].bytes = SZ_1M;
>> +
>> +	/* Prepare the GuC API structure fields: */
>> +	for (i = 0; i < GUC_LOG_BUFFER_TYPE_MAX; i++) {
>> +		/* Convert to correct units */
>> +		if ((log->sizes[i].bytes % SZ_1M) == 0) {
>> +			log->sizes[i].units = SZ_1M;
>> +			log->sizes[i].flag = sections[i].flag;
>> +		} else {
>> +			log->sizes[i].units = SZ_4K;
>> +			log->sizes[i].flag = 0;
>> +		}
...
>> diff --git a/drivers/gpu/drm/xe/xe_guc_log.h b/drivers/gpu/drm/xe/xe_guc_log.h
>> index 2d25ab28b4b3..ea9e79ccd314 100644
>> --- a/drivers/gpu/drm/xe/xe_guc_log.h
>> +++ b/drivers/gpu/drm/xe/xe_guc_log.h
>> @@ -7,6 +7,7 @@
>>   #define _XE_GUC_LOG_H_
>>   
>>   #include "xe_guc_log_types.h"
>> +#include "xe_guc_types.h"
>>   
>>   struct drm_printer;
>>   
>> @@ -17,7 +18,7 @@ struct drm_printer;
>>   #else
>>   #define CRASH_BUFFER_SIZE	SZ_8K
>>   #define DEBUG_BUFFER_SIZE	SZ_64K
>> -#define CAPTURE_BUFFER_SIZE	SZ_16K
>> +#define CAPTURE_BUFFER_SIZE	SZ_1M
>>   #endif
>>   /*
>>    * While we're using plain log level in i915, GuC controls are much more...
>> @@ -36,6 +37,11 @@ struct drm_printer;
>>   #define GUC_VERBOSITY_TO_LOG_LEVEL(x)	((x) + 2)
>>   #define GUC_LOG_LEVEL_MAX GUC_VERBOSITY_TO_LOG_LEVEL(GUC_LOG_VERBOSITY_MAX)
>>   
>> +static inline struct xe_guc *log_to_guc(struct xe_guc_log *log)
>> +{
>> +	return container_of(log, struct xe_guc, log);
>> +}
> 
> do we really need to promote this to .h ?
Sure, will be removed from .h
> 
>> +
>>   int xe_guc_log_init(struct xe_guc_log *log);
>>   void xe_guc_log_print(struct xe_guc_log *log, struct drm_printer *p);
>>   
>> @@ -45,4 +51,13 @@ xe_guc_log_get_level(struct xe_guc_log *log)
>>   	return log->level;
>>   }
>>   
>> +u32 xe_guc_log_section_size_capture(struct xe_guc_log *log);
>> +
>> +bool xe_guc_check_log_buf_overflow(struct xe_guc_log *log,
>> +				   enum guc_log_buffer_type type,
>> +				   unsigned int full_cnt);
>> +
>> +u32 xe_guc_get_log_buffer_size(struct xe_guc_log *log, enum guc_log_buffer_type type);
>> +u32 xe_guc_get_log_buffer_offset(struct xe_guc_log *log, enum guc_log_buffer_type type);
>> +
>>   #endif
>> diff --git a/drivers/gpu/drm/xe/xe_guc_log_types.h b/drivers/gpu/drm/xe/xe_guc_log_types.h
>> index 125080d138a7..67a9c58e7ed7 100644
>> --- a/drivers/gpu/drm/xe/xe_guc_log_types.h
>> +++ b/drivers/gpu/drm/xe/xe_guc_log_types.h
>> @@ -7,6 +7,7 @@
>>   #define _XE_GUC_LOG_TYPES_H_
>>   
>>   #include <linux/types.h>
>> +#include "abi/guc_log_abi.h"
>>   
>>   struct xe_bo;
>>   
>> @@ -18,6 +19,23 @@ struct xe_guc_log {
>>   	u32 level;
>>   	/** @bo: XE BO for GuC log */
>>   	struct xe_bo *bo;
>> +
>> +	/** @sizes: Allocation settings */
>> +	struct {
>> +		u32 bytes;	/* Size in bytes */
>> +		u32 units;	/* GuC API units - 1MB or 4KB */
>> +		u32 count;	/* Number of API units */
>> +		u32 flag;	/* GuC API units flag */
>> +	} sizes[GUC_LOG_BUFFER_TYPE_MAX];
> 
> maybe definition of GUC_LOG_BUFFER_TYPE_MAX can be part of this .h ?
GUC_LOG_BUFFER_TYPE_MAX defined in guc_log_abi.h and was placed close to 
enum guc_log_buffer_type, the max has strong relationship with this 
enum, make it better to be defined in _abi.h rather than here.
> 
>> +	/** @sizes_initialised: sizes initialised */
> 
> can you elaborate ;)

> 
>> +	bool sizes_initialised;
>> +
>> +	/** @stats: logging related stats */
>> +	struct {
>> +		u32 sampled_overflow;
>> +		u32 overflow;
>> +		u32 flush;
>> +	} stats[GUC_LOG_BUFFER_TYPE_MAX];
>>   };
>>   
>>   #endif


More information about the Intel-xe mailing list