[PATCH 2/2] drm/xe/guc: Separate full CTB content from guc_info debugfs

John Harrison john.c.harrison at intel.com
Wed Oct 23 22:40:56 UTC 2024


On 10/23/2024 15:17, Matthew Brost wrote:
> On Tue, Oct 22, 2024 at 05:17:35PM -0700, John.C.Harrison at Intel.com wrote:
>> From: John Harrison <John.C.Harrison at Intel.com>
>>
>> The guc_info debugfs file is meant to be a quick view of the current
>> software state of the GuC interface. Including the full CTN contents
>> makes the file as a whole much less human readable and is not
>> partiular useful in the general case. So don't pollute the info dump
>> with the full buffers. Instead, move those into a separate debugfs
>> entry that can be read when that information is actually required.
>>
> I did notice this when using the debugfs entry and yea a bunch of asic
> isn't all that helpful in the common case. So change LGTM but one nit.
>
>> Also, improve the human readability by adding a few extra blank lines
>> to delimt the sections.
>>
>> Signed-off-by: John Harrison <John.C.Harrison at Intel.com>
>> ---
>>   drivers/gpu/drm/xe/xe_devcoredump.c |  2 +-
>>   drivers/gpu/drm/xe/xe_guc.c         |  5 ++++-
>>   drivers/gpu/drm/xe/xe_guc_ct.c      | 25 ++++++++++++-------------
>>   drivers/gpu/drm/xe/xe_guc_ct.h      |  8 +++++---
>>   drivers/gpu/drm/xe/xe_guc_debugfs.c | 14 ++++++++++++++
>>   5 files changed, 36 insertions(+), 18 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c
>> index 8b0ea77661b2..5d7d8192d6f3 100644
>> --- a/drivers/gpu/drm/xe/xe_devcoredump.c
>> +++ b/drivers/gpu/drm/xe/xe_devcoredump.c
>> @@ -267,7 +267,7 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump,
>>   	fw_ref = xe_force_wake_get(gt_to_fw(q->gt), XE_FORCEWAKE_ALL);
>>   
>>   	ss->guc.log = xe_guc_log_snapshot_capture(&guc->log, true);
>> -	ss->guc.ct = xe_guc_ct_snapshot_capture(&guc->ct, true);
>> +	ss->guc.ct = xe_guc_ct_snapshot_capture(&guc->ct, true, true);
> One bool, not great. Two bools, yuck. Anyway to refactor this without
> bools all over the place?
It seemed excessive to create a whole set of flags. But given that this 
is the only instance outside of the GuC internals, it would be simple to 
create a wrapper that hides all of the internal only params.

John.

>
> Matt
>
>>   	ss->ge = xe_guc_exec_queue_snapshot_capture(q);
>>   	ss->job = xe_sched_job_snapshot_capture(job);
>>   	ss->vm = xe_vm_snapshot_capture(q->vm);
>> diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
>> index b065bb9973e9..fcb540003e53 100644
>> --- a/drivers/gpu/drm/xe/xe_guc.c
>> +++ b/drivers/gpu/drm/xe/xe_guc.c
>> @@ -1187,7 +1187,10 @@ void xe_guc_print_info(struct xe_guc *guc, struct drm_printer *p)
>>   
>>   	xe_force_wake_put(gt_to_fw(gt), fw_ref);
>>   
>> -	xe_guc_ct_print(&guc->ct, p);
>> +	drm_puts(p, "\n");
>> +	xe_guc_ct_print(&guc->ct, p, false);
>> +
>> +	drm_puts(p, "\n");
>>   	xe_guc_submit_print(guc, p);
>>   }
>>   
>> diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
>> index c260d8840990..352673b6974a 100644
>> --- a/drivers/gpu/drm/xe/xe_guc_ct.c
>> +++ b/drivers/gpu/drm/xe/xe_guc_ct.c
>> @@ -1607,7 +1607,8 @@ static void g2h_worker_func(struct work_struct *w)
>>   	receive_g2h(ct);
>>   }
>>   
>> -struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_alloc(struct xe_guc_ct *ct, bool atomic)
>> +struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_alloc(struct xe_guc_ct *ct, bool atomic,
>> +						    bool want_ctb)
>>   {
>>   	struct xe_guc_ct_snapshot *snapshot;
>>   
>> @@ -1615,7 +1616,7 @@ struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_alloc(struct xe_guc_ct *ct, bool a
>>   	if (!snapshot)
>>   		return NULL;
>>   
>> -	if (ct->bo) {
>> +	if (ct->bo && want_ctb) {
>>   		snapshot->ctb_size = ct->bo->size;
>>   		snapshot->ctb = kmalloc(snapshot->ctb_size, atomic ? GFP_ATOMIC : GFP_KERNEL);
>>   	}
>> @@ -1650,6 +1651,7 @@ static void guc_ctb_snapshot_print(struct guc_ctb_snapshot *snapshot,
>>    * @ct: GuC CT object.
>>    * @atomic: Boolean to indicate if this is called from atomic context like
>>    * reset or CTB handler or from some regular path like debugfs.
>> + * @want_ctb: Should the full CTB content be captured (vs just the headers)
>>    *
>>    * This can be printed out in a later stage like during dev_coredump
>>    * analysis.
>> @@ -1658,12 +1660,12 @@ static void guc_ctb_snapshot_print(struct guc_ctb_snapshot *snapshot,
>>    * by using `xe_guc_ct_snapshot_free`.
>>    */
>>   struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct,
>> -						      bool atomic)
>> +						      bool atomic, bool want_ctb)
>>   {
>>   	struct xe_device *xe = ct_to_xe(ct);
>>   	struct xe_guc_ct_snapshot *snapshot;
>>   
>> -	snapshot = xe_guc_ct_snapshot_alloc(ct, atomic);
>> +	snapshot = xe_guc_ct_snapshot_alloc(ct, atomic, want_ctb);
>>   	if (!snapshot) {
>>   		xe_gt_err(ct_to_gt(ct), "Skipping CTB snapshot entirely.\n");
>>   		return NULL;
>> @@ -1704,12 +1706,8 @@ void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot,
>>   		drm_printf(p, "\tg2h outstanding: %d\n",
>>   			   snapshot->g2h_outstanding);
>>   
>> -		if (snapshot->ctb) {
>> +		if (snapshot->ctb)
>>   			xe_print_blob_ascii85(p, "CTB data", snapshot->ctb, 0, snapshot->ctb_size);
>> -		} else {
>> -			drm_printf(p, "CTB snapshot missing!\n");
>> -			return;
>> -		}
>>   	} else {
>>   		drm_puts(p, "CT disabled\n");
>>   	}
>> @@ -1736,13 +1734,14 @@ void xe_guc_ct_snapshot_free(struct xe_guc_ct_snapshot *snapshot)
>>    * @ct: GuC CT.
>>    * @p: drm_printer where it will be printed out.
>>    *
>> - * This function quickly capture a snapshot and immediately print it out.
>> + * This function will quickly capture a snapshot of the CT state
>> + * and immediately print it out.
>>    */
>> -void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p)
>> +void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p, bool want_ctb)
>>   {
>>   	struct xe_guc_ct_snapshot *snapshot;
>>   
>> -	snapshot = xe_guc_ct_snapshot_capture(ct, false);
>> +	snapshot = xe_guc_ct_snapshot_capture(ct, false, want_ctb);
>>   	xe_guc_ct_snapshot_print(snapshot, p);
>>   	xe_guc_ct_snapshot_free(snapshot);
>>   }
>> @@ -1776,7 +1775,7 @@ static void ct_dead_capture(struct xe_guc_ct *ct, struct guc_ctb *ctb, u32 reaso
>>   		return;
>>   
>>   	snapshot_log = xe_guc_log_snapshot_capture(&guc->log, true);
>> -	snapshot_ct = xe_guc_ct_snapshot_capture((ct), true);
>> +	snapshot_ct = xe_guc_ct_snapshot_capture((ct), true, true);
>>   
>>   	spin_lock_irqsave(&ct->dead.lock, flags);
>>   
>> diff --git a/drivers/gpu/drm/xe/xe_guc_ct.h b/drivers/gpu/drm/xe/xe_guc_ct.h
>> index 338f0b75d29f..01d3b0183bf0 100644
>> --- a/drivers/gpu/drm/xe/xe_guc_ct.h
>> +++ b/drivers/gpu/drm/xe/xe_guc_ct.h
>> @@ -17,11 +17,13 @@ void xe_guc_ct_disable(struct xe_guc_ct *ct);
>>   void xe_guc_ct_stop(struct xe_guc_ct *ct);
>>   void xe_guc_ct_fast_path(struct xe_guc_ct *ct);
>>   
>> -struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_alloc(struct xe_guc_ct *ct, bool atomic);
>> -struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct, bool atomic);
>> +struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_alloc(struct xe_guc_ct *ct, bool atomic,
>> +						    bool want_ctb);
>> +struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct, bool atomic,
>> +						      bool want_ctb);
>>   void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot, struct drm_printer *p);
>>   void xe_guc_ct_snapshot_free(struct xe_guc_ct_snapshot *snapshot);
>> -void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p);
>> +void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p, bool want_ctb);
>>   
>>   static inline bool xe_guc_ct_enabled(struct xe_guc_ct *ct)
>>   {
>> diff --git a/drivers/gpu/drm/xe/xe_guc_debugfs.c b/drivers/gpu/drm/xe/xe_guc_debugfs.c
>> index d3822cbea273..995b306aced7 100644
>> --- a/drivers/gpu/drm/xe/xe_guc_debugfs.c
>> +++ b/drivers/gpu/drm/xe/xe_guc_debugfs.c
>> @@ -47,9 +47,23 @@ static int guc_log(struct seq_file *m, void *data)
>>   	return 0;
>>   }
>>   
>> +static int guc_ctb(struct seq_file *m, void *data)
>> +{
>> +	struct xe_guc *guc = node_to_guc(m->private);
>> +	struct xe_device *xe = guc_to_xe(guc);
>> +	struct drm_printer p = drm_seq_file_printer(m);
>> +
>> +	xe_pm_runtime_get(xe);
>> +	xe_guc_ct_print(&guc->ct, &p, true);
>> +	xe_pm_runtime_put(xe);
>> +
>> +	return 0;
>> +}
>> +
>>   static const struct drm_info_list debugfs_list[] = {
>>   	{"guc_info", guc_info, 0},
>>   	{"guc_log", guc_log, 0},
>> +	{"guc_ctb", guc_ctb, 0},
>>   };
>>   
>>   void xe_guc_debugfs_register(struct xe_guc *guc, struct dentry *parent)
>> -- 
>> 2.47.0
>>



More information about the Intel-xe mailing list