[PATCH v4 3/7] drm/xe/guc: Use a two stage dump for GuC logs and add more info

Wed Jun 12 23:52:30 UTC 2024

On 6/11/2024 15:49, Michal Wajdeczko wrote:
>
> On 11.06.2024 03:20, John.C.Harrison at Intel.com wrote:
>> From: John Harrison <John.C.Harrison at Intel.com>
>>
>> Split the GuC log dump into a two stage snapshot and print mechanism.
>> This allows the log to be captured at the point of an error (which may
>> be in a resticted context) and then dump it out later (from a regular
> typo
>
>> context such as a worker function or a sysfs file handler).
>>
>> Also add a bunch of other useful pieces of information that can help
>> (or are fundamentally required!) to decode and parse the log.
>>
>> Signed-off-by: John Harrison <John.C.Harrison at Intel.com>
>> ---
>>   drivers/gpu/drm/xe/regs/xe_guc_regs.h |   1 +
>>   drivers/gpu/drm/xe/xe_guc_log.c       | 150 ++++++++++++++++++++------
>>   drivers/gpu/drm/xe/xe_guc_log.h       |   6 ++
>>   drivers/gpu/drm/xe/xe_guc_log_types.h |  29 +++++
>>   4 files changed, 155 insertions(+), 31 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/xe/regs/xe_guc_regs.h b/drivers/gpu/drm/xe/regs/xe_guc_regs.h
>> index a5fd14307f94..b27b73680c12 100644
>> --- a/drivers/gpu/drm/xe/regs/xe_guc_regs.h
>> +++ b/drivers/gpu/drm/xe/regs/xe_guc_regs.h
>> @@ -84,6 +84,7 @@
>>   #define   HUC_LOADING_AGENT_GUC			REG_BIT(1)
>>   #define   GUC_WOPCM_OFFSET_VALID		REG_BIT(0)
>>   #define GUC_MAX_IDLE_COUNT			XE_REG(0xc3e4)
>> +#define GUC_PMTIMESTAMP				XE_REG(0xc3e8)
>>   
>>   #define GUC_SEND_INTERRUPT			XE_REG(0xc4c8)
>>   #define   GUC_SEND_TRIGGER			REG_BIT(0)
>> diff --git a/drivers/gpu/drm/xe/xe_guc_log.c b/drivers/gpu/drm/xe/xe_guc_log.c
>> index a35309926271..a35d3cc0c369 100644
>> --- a/drivers/gpu/drm/xe/xe_guc_log.c
>> +++ b/drivers/gpu/drm/xe/xe_guc_log.c
>> @@ -7,12 +7,20 @@
>>   
>>   #include <drm/drm_managed.h>
>>   
>> +#include "regs/xe_guc_regs.h"
>>   #include "xe_bo.h"
>>   #include "xe_gt.h"
>>   #include "xe_gt_printk.h"
>>   #include "xe_map.h"
>> +#include "xe_mmio.h"
>>   #include "xe_module.h"
>>   
>> +static struct xe_guc *
>> +log_to_guc(struct xe_guc_log *log)
> any reason why to split signature into two lines?
Because the similar log_to_gt underneath was done that way so I figured 
it would be best to match that. And no, I don't know why that (or indeed 
the same in many other files) is done that way but it does appear to be 
the common convention.

>
>> +{
>> +	return container_of(log, struct xe_guc, log);
>> +}
>> +
>>   static struct xe_gt *
>>   log_to_gt(struct xe_guc_log *log)
>>   {
>> @@ -108,62 +116,142 @@ static void xe_hexdump_blob(struct xe_device *xe, const void *blob, size_t size,
>>   
>>   #define GUC_LOG_CHUNK_SIZE	SZ_2M
>>   
>> -void xe_guc_log_print(struct xe_guc_log *log, struct drm_printer *p, bool atomic)
> no kernel-doc
>
>> +struct xe_guc_log_snapshot *xe_guc_log_snapshot_alloc(struct xe_guc_log *log, bool atomic)
>>   {
>> -	struct xe_device *xe = log_to_xe(log);
>> -	size_t size, remain;
>> -	void **copy;
>> -	int num_chunks, i;
>> +	struct xe_guc_log_snapshot *snapshot;
>> +	size_t remain;
>> +	int i;
>>   
>> -	xe_assert(xe, log->bo);
>> +	snapshot = kzalloc(sizeof(*snapshot), atomic ? GFP_ATOMIC : GFP_KERNEL);
>> +	if (!snapshot)
>> +		return NULL;
>>   
>>   	/*
>>   	 * NB: kmalloc has a hard limit well below the maximum GuC log buffer size.
>>   	 * Also, can't use vmalloc as might be called from atomic context. So need
>>   	 * to break the buffer up into smaller chunks that can be allocated.
>>   	 */
>> -	size = log->bo->size;
>> -	num_chunks = (size + GUC_LOG_CHUNK_SIZE - 1) / GUC_LOG_CHUNK_SIZE;
>> +	snapshot->size = log->bo->size;
>> +	snapshot->num_chunks = (snapshot->size + GUC_LOG_CHUNK_SIZE - 1) / GUC_LOG_CHUNK_SIZE;
>>   
>> -	copy = kcalloc(num_chunks, sizeof(*copy), atomic ? GFP_ATOMIC : GFP_KERNEL);
>> -	if (!copy) {
>> -		drm_printf(p, "Failed to allocate array x%d", num_chunks);
>> -		return;
>> -	}
>> +	snapshot->copy = kcalloc(snapshot->num_chunks, sizeof(*snapshot->copy),
>> +				 atomic ? GFP_ATOMIC : GFP_KERNEL);
>> +	if (!snapshot->copy)
>> +		goto fail_snap;
>>   
>> -	remain = size;
>> -	for (i = 0; i < num_chunks; i++) {
>> +	remain = snapshot->size;
>> +	for (i = 0; i < snapshot->num_chunks; i++) {
>>   		size_t size = min(GUC_LOG_CHUNK_SIZE, remain);
>>   
>> -		copy[i] = kmalloc(size, atomic ? GFP_ATOMIC : GFP_KERNEL);
>> -		if (!copy[i]) {
>> -			drm_printf(p, "Failed to allocate %ld at chunk %d of %d",
>> -				   size, i, num_chunks);
>> -			goto out;
>> -		}
>> +		snapshot->copy[i] = kmalloc(size, atomic ? GFP_ATOMIC : GFP_KERNEL);
>> +		if (!snapshot->copy[i])
>> +			goto fail_copy;
>>   		remain -= size;
>>   	}
>>   
>> -	remain = size;
>> -	for (i = 0; i < num_chunks; i++) {
>> +	return snapshot;
>> +
>> +fail_copy:
>> +	for (i = 0; i < snapshot->num_chunks; i++)
>> +		kfree(snapshot->copy[i]);
>> +	kfree(snapshot->copy);
>> +fail_snap:
>> +	kfree(snapshot);
>> +	return NULL;
>> +}
>> +
>> +void xe_guc_log_snapshot_free(struct xe_guc_log_snapshot *snapshot)
>> +{
>> +	int i;
>> +
>> +	if (!snapshot)
>> +		return;
>> +
>> +	if (!snapshot->copy) {
>> +		for (i = 0; i < snapshot->num_chunks; i++)
>> +			kfree(snapshot->copy[i]);
>> +		kfree(snapshot->copy);
>> +	}
>> +
>> +	kfree(snapshot);
>> +}
>> +
>> +struct xe_guc_log_snapshot *xe_guc_log_snapshot_capture(struct xe_guc_log *log, bool atomic)
>> +{
>> +	struct xe_guc_log_snapshot *snapshot;
>> +	struct xe_device *xe = log_to_xe(log);
>> +	struct xe_guc *guc = log_to_guc(log);
>> +	struct xe_gt *gt = log_to_gt(log);
>> +	size_t remain;
>> +	int i;
>> +
>> +	if (!log->bo) {
>> +		xe_gt_err(gt, "GuC log not allocated!\n");
>> +		return NULL;
>> +	}
>> +
>> +	snapshot = xe_guc_log_snapshot_alloc(log, atomic);
>> +	if (!snapshot) {
>> +		xe_gt_err(gt, "GuC log snapshot not allocated!\n");
>> +		return NULL;
>> +	}
>> +
>> +	remain = snapshot->size;
>> +	for (i = 0; i < snapshot->num_chunks; i++) {
>>   		size_t size = min(GUC_LOG_CHUNK_SIZE, remain);
>>   
>> -		xe_map_memcpy_from(xe, copy[i], &log->bo->vmap, i * GUC_LOG_CHUNK_SIZE, size);
>> +		xe_map_memcpy_from(xe, snapshot->copy[i], &log->bo->vmap,
>> +				   i * GUC_LOG_CHUNK_SIZE, size);
>>   		remain -= size;
>>   	}
>>   
>> -	remain = size;
>> -	for (i = 0; i < num_chunks; i++) {
>> +	snapshot->ktime = ktime_get_boottime_ns();
>> +	snapshot->stamp = xe_mmio_read32(gt, GUC_PMTIMESTAMP);
>> +	snapshot->ref_clk = gt->info.reference_clock;
>> +	snapshot->level = log->level;
>> +	snapshot->ver_found = guc->fw.versions.found[XE_UC_FW_VER_RELEASE];
>> +	snapshot->ver_want = guc->fw.versions.wanted;
>> +	snapshot->path = guc->fw.path;
> shouldn't we use kstrdup() instead ?
The firmware path is always a static string. It is either an actual 
static const thing from the compiled in table or it has come from the 
kernel command line as a module parameter. Either way, it is not going 
to change or disappear for the lifetime of the Xe module. So no reason 
to waste memory duplicating it.

>
>> +
>> +	return snapshot;
>> +}
>> +
>> +void xe_guc_log_snapshot_print(struct xe_device *xe, struct xe_guc_log_snapshot *snapshot,
>> +			       struct drm_printer *p, bool atomic)
>> +{
>> +	size_t remain;
>> +	int i;
>> +
>> +	if (!snapshot) {
>> +		drm_printf(p, "GuC log snapshot not allocated!\n");
>> +		return;
>> +	}
>> +
>> +	drm_printf(p, "GuC version %u.%u.%u (wanted %u.%u.%u)\n",
>> +		   snapshot->ver_found.major, snapshot->ver_found.minor, snapshot->ver_found.patch,
>> +		   snapshot->ver_want.major, snapshot->ver_want.minor, snapshot->ver_want.patch);
>> +	drm_printf(p, "GuC firmware: %s\n", snapshot->path);
>> +	drm_printf(p, "Kernel timestamp: 0x%08llX [%llu]\n", snapshot->ktime, snapshot->ktime);
>> +	drm_printf(p, "GuC timestamp: 0x%08X [%u]\n", snapshot->stamp, snapshot->stamp);
>> +	drm_printf(p, "CS timestamp frequency: %u Hz\n", snapshot->ref_clk);
>> +	drm_printf(p, "Log level: %u\n", snapshot->level);
>> +
>> +	remain = snapshot->size;
>> +	for (i = 0; i < snapshot->num_chunks; i++) {
>>   		size_t size = min(GUC_LOG_CHUNK_SIZE, remain);
>>   
>> -		xe_hexdump_blob(xe, copy[i], size, p, atomic);
>> +		xe_hexdump_blob(xe, snapshot->copy[i], size, p, atomic);
> we really should get rid of xe in xe_hexdump_blob
I'm not seeing why but that can be done as a follow up patch if you 
really think it is necessary. Personally, I'd rather spend the effort on 
completely removing it and going with compressed ASCII encoded blobs in 
devcoredump.

>
>>   		remain -= size;
>>   	}
>> +}
>> +
>> +void xe_guc_log_print(struct xe_guc_log *log, struct drm_printer *p, bool atomic)
>> +{
>> +	struct xe_guc_log_snapshot *snapshot;
>>   
>> -out:
>> -	for (i = 0; i < num_chunks; i++)
>> -		kfree(copy[i]);
>> -	kfree(copy);
>> +	snapshot = xe_guc_log_snapshot_capture(log, atomic);
>> +	xe_guc_log_snapshot_print(log_to_xe(log), snapshot, p, atomic);
>> +	xe_guc_log_snapshot_free(snapshot);
>>   }
>>   
>>   int xe_guc_log_init(struct xe_guc_log *log)
>> diff --git a/drivers/gpu/drm/xe/xe_guc_log.h b/drivers/gpu/drm/xe/xe_guc_log.h
>> index 5149b492c3b8..6e4d0b369c19 100644
>> --- a/drivers/gpu/drm/xe/xe_guc_log.h
>> +++ b/drivers/gpu/drm/xe/xe_guc_log.h
>> @@ -9,6 +9,7 @@
>>   #include "xe_guc_log_types.h"
>>   
>>   struct drm_printer;
>> +struct xe_device;
>>   
>>   #if IS_ENABLED(CONFIG_DRM_XE_LARGE_GUC_BUFFER)
>>   #define CRASH_BUFFER_SIZE       SZ_1M
>> @@ -38,6 +39,11 @@ struct drm_printer;
>>   
>>   int xe_guc_log_init(struct xe_guc_log *log);
>>   void xe_guc_log_print(struct xe_guc_log *log, struct drm_printer *p, bool atomic);
>> +struct xe_guc_log_snapshot *xe_guc_log_snapshot_alloc(struct xe_guc_log *log, bool atomic);
>> +struct xe_guc_log_snapshot *xe_guc_log_snapshot_capture(struct xe_guc_log *log, bool atomic);
>> +void xe_guc_log_snapshot_print(struct xe_device *xe, struct xe_guc_log_snapshot *snapshot,
>> +			       struct drm_printer *p, bool atomic);
>> +void xe_guc_log_snapshot_free(struct xe_guc_log_snapshot *snapshot);
>>   
>>   static inline u32
>>   xe_guc_log_get_level(struct xe_guc_log *log)
>> diff --git a/drivers/gpu/drm/xe/xe_guc_log_types.h b/drivers/gpu/drm/xe/xe_guc_log_types.h
>> index 125080d138a7..e3a6a44c2f18 100644
>> --- a/drivers/gpu/drm/xe/xe_guc_log_types.h
>> +++ b/drivers/gpu/drm/xe/xe_guc_log_types.h
>> @@ -8,8 +8,37 @@
>>   
>>   #include <linux/types.h>
>>   
>> +#include "xe_uc_fw_types.h"
>> +
>>   struct xe_bo;
>>   
>> +/**
>> + * struct xe_guc_log_snapshot:
>> + * Capture of the GuC log plus various state useful for decoding the log
>> + */
>> +struct xe_guc_log_snapshot {
>> +	/** @size: Size in bytes of the @copy allocation */
>> +	size_t size;
>> +	/** @copy: Host memory copy of the log buffer for later dumping, split into chunks */
>> +	void **copy;
>> +	/** @num_chunks: Number of chunks withint @copy */
> typo
>
>> +	int num_chunks;
>> +	/** @ktime: Kernel time the snapshot was taken */
>> +	u64 ktime;
>> +	/** @stamp: GuC timestamp at which the snapshot was taken */
>> +	u32 stamp;
>> +	/** @ref_clk: GuC timestamp frequency */
>> +	u32 ref_clk;
>> +	/** @level: GuC log verbosity level */
>> +	u32 level;
>> +	/** @ver_found: GuC firmware version */
>> +	struct xe_uc_fw_version ver_found;
> didn't you mention that platform and/or revision is also mandatory to
> correctly decode GuC log ?
Yeah, but the platform is not really a GuC related thing. So putting 
that inside a GuC specific structure seems even more wrong.

>
>> +	/** @ver_want: GuC firmware version that driver expected */
>> +	struct xe_uc_fw_version ver_want;
> I'm still not convinced that it's relevant to the actual GuC log what
> driver wanted to use - this should be part of the GuC snapshot instead
>
>> +	/** @path: Path of GuC firmware blob */
>> +	const char *path;
> ditto
When the snapshot moves into the devcoredump, then all of this gets 
moved into a GuC specific structure within the core dump. But for now, 
there is no GuC snapshot. So therefore, they are here. As already 
stated, this is all stop gap measures until the proper solution is 
implemented.

John.

>
>> +};
>> +
>>   /**
>>    * struct xe_guc_log - GuC log
>>    */