[PATCH 6/7] drm/xe: Change xe_engine_snapshot_capture_for_job to be for_queue

Dong, Zhanjun zhanjun.dong at intel.com
Mon Nov 11 22:41:01 UTC 2024



On 2024-11-11 5:15 p.m., Dong, Zhanjun wrote:
> On 2024-11-08 12:43 p.m., Matthew Brost wrote:
>> Add job may unavailable at capture time (e.g., LR mode) while an exec
>> queue is. Change xe_engine_snapshot_capture_for_job to take a queue
>> argument and rename to xe_engine_snapshot_capture_for_queue.
>>
>> Cc: Zhanjun Dong <zhanjun.dong at intel.com>
>> Cc: Rodrigo Vivi <rodrigo.vivi at intel.com>
>> Signed-off-by: Matthew Brost <matthew.brost at intel.com>
>> ---
>>   drivers/gpu/drm/xe/xe_devcoredump.c |  3 +--
>>   drivers/gpu/drm/xe/xe_guc_capture.c | 28 +++++++++++++---------------
>>   drivers/gpu/drm/xe/xe_guc_capture.h |  6 +++---
>>   drivers/gpu/drm/xe/xe_guc_submit.c  |  4 ++--
>>   drivers/gpu/drm/xe/xe_hw_engine.c   |  8 ++++----
>>   drivers/gpu/drm/xe/xe_hw_engine.h   |  4 ++--
>>   6 files changed, 25 insertions(+), 28 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/ 
>> xe_devcoredump.c
>> index c32cbb46ef8c..dfa946a3793c 100644
>> --- a/drivers/gpu/drm/xe/xe_devcoredump.c
>> +++ b/drivers/gpu/drm/xe/xe_devcoredump.c
>> @@ -282,8 +282,7 @@ static void devcoredump_snapshot(struct 
>> xe_devcoredump *coredump,
>>           ss->job = xe_sched_job_snapshot_capture(job);
>>       ss->vm = xe_vm_snapshot_capture(q->vm);
>> -    if (job)
>> -        xe_engine_snapshot_capture_for_job(job);
>> +    xe_engine_snapshot_capture_for_queue(q);

The
	/** @job: Point to the faulting job */
	struct xe_sched_job *job;
in struct xe_devcoredump also could be removed.

Regards,
Zhanjun Dong

>>       queue_work(system_unbound_wq, &ss->work);
>> diff --git a/drivers/gpu/drm/xe/xe_guc_capture.c b/drivers/gpu/drm/xe/ 
>> xe_guc_capture.c
>> index cc72446a5de1..511a8e40d521 100644
>> --- a/drivers/gpu/drm/xe/xe_guc_capture.c
>> +++ b/drivers/gpu/drm/xe/xe_guc_capture.c
>> @@ -1793,29 +1793,27 @@ void xe_engine_snapshot_print(struct 
>> xe_hw_engine_snapshot *snapshot, struct drm
>>   }
>>   /**
>> - * xe_guc_capture_get_matching_and_lock - Matching GuC capture for 
>> the job.
>> - * @job: The job object.
>> + * xe_guc_capture_get_matching_and_lock - Matching GuC capture for 
>> the queue.
>> + * @q: The exec queue object
>>    *
>> - * Search within the capture outlist for the job, could be used for 
>> check if
>> - * GuC capture is ready for the job.
>> + * Search within the capture outlist for the queue, could be used for 
>> check if
>> + * GuC capture is ready for the queue.
>>    * If found, the locked boolean of the node will be flagged.
>>    *
>>    * Returns: found guc-capture node ptr else NULL
>>    */
>>   struct __guc_capture_parsed_output *
>> -xe_guc_capture_get_matching_and_lock(struct xe_sched_job *job)
>> +xe_guc_capture_get_matching_and_lock(struct xe_exec_queue *q)
>>   {
>>       struct xe_hw_engine *hwe;
>>       enum xe_hw_engine_id id;
>> -    struct xe_exec_queue *q;
>>       struct xe_device *xe;
>>       u16 guc_class = GUC_LAST_ENGINE_CLASS + 1;
>>       struct xe_devcoredump_snapshot *ss;
>> -    if (!job)
>> +    if (!q)
> unneccessary, there is !q check 2 lines below.
> 
>>           return NULL;
>> -    q = job->q;
>>       if (!q || !q->gt)
>>           return NULL;
>> @@ -1827,7 +1825,7 @@ xe_guc_capture_get_matching_and_lock(struct 
>> xe_sched_job *job)
>>       if (ss->matched_node && ss->matched_node->source == 
>> XE_ENGINE_CAPTURE_SOURCE_GUC)
>>           return ss->matched_node;
>> -    /* Find hwe for the job */
>> +    /* Find hwe for the queue */
>>       for_each_hw_engine(hwe, q->gt, id) {
>>           if (hwe != q->hwe)
>>               continue;
>> @@ -1859,17 +1857,16 @@ xe_guc_capture_get_matching_and_lock(struct 
>> xe_sched_job *job)
>>   }
>>   /**
>> - * xe_engine_snapshot_capture_for_job - Take snapshot of associated 
>> engine
>> - * @job: The job object
>> + * xe_engine_snapshot_capture_for_queue - Take snapshot of associated 
>> engine
>> + * @q: The exec queue object
>>    *
>>    * Take snapshot of associated HW Engine
>>    *
>>    * Returns: None.
>>    */
>>   void
>> -xe_engine_snapshot_capture_for_job(struct xe_sched_job *job)
>> +xe_engine_snapshot_capture_for_queue(struct xe_exec_queue *q)
>>   {
>> -    struct xe_exec_queue *q = job->q;
>>       struct xe_device *xe = gt_to_xe(q->gt);
>>       struct xe_devcoredump *coredump = &xe->devcoredump;
>>       struct xe_hw_engine *hwe;
>> @@ -1887,11 +1884,12 @@ xe_engine_snapshot_capture_for_job(struct 
>> xe_sched_job *job)
>>           }
>>           if (!coredump->snapshot.hwe[id]) {
>> -            coredump->snapshot.hwe[id] = 
>> xe_hw_engine_snapshot_capture(hwe, job);
>> +            coredump->snapshot.hwe[id] =
>> +                xe_hw_engine_snapshot_capture(hwe, q);
>>           } else {
>>               struct __guc_capture_parsed_output *new;
>> -            new = xe_guc_capture_get_matching_and_lock(job);
>> +            new = xe_guc_capture_get_matching_and_lock(q);
>>               if (new) {
>>                   struct xe_guc *guc =  &q->gt->uc.guc;
>> diff --git a/drivers/gpu/drm/xe/xe_guc_capture.h b/drivers/gpu/drm/xe/ 
>> xe_guc_capture.h
>> index 97a795d13dd1..20a078dc4b85 100644
>> --- a/drivers/gpu/drm/xe/xe_guc_capture.h
>> +++ b/drivers/gpu/drm/xe/xe_guc_capture.h
>> @@ -11,10 +11,10 @@
>>   #include "xe_guc.h"
>>   #include "xe_guc_fwif.h"
>> +struct xe_exec_queue;
>>   struct xe_guc;
>>   struct xe_hw_engine;
>>   struct xe_hw_engine_snapshot;
>> -struct xe_sched_job;
>>   static inline enum guc_capture_list_class_type 
>> xe_guc_class_to_capture_class(u16 class)
>>   {
>> @@ -50,10 +50,10 @@ size_t xe_guc_capture_ads_input_worst_size(struct 
>> xe_guc *guc);
>>   const struct __guc_mmio_reg_descr_group *
>>   xe_guc_capture_get_reg_desc_list(struct xe_gt *gt, u32 owner, u32 type,
>>                    enum guc_capture_list_class_type capture_class, 
>> bool is_ext);
>> -struct __guc_capture_parsed_output 
>> *xe_guc_capture_get_matching_and_lock(struct xe_sched_job *job);
>> +struct __guc_capture_parsed_output 
>> *xe_guc_capture_get_matching_and_lock(struct xe_exec_queue *q);
>>   void xe_engine_manual_capture(struct xe_hw_engine *hwe, struct 
>> xe_hw_engine_snapshot *snapshot);
>>   void xe_engine_snapshot_print(struct xe_hw_engine_snapshot 
>> *snapshot, struct drm_printer *p);
>> -void xe_engine_snapshot_capture_for_job(struct xe_sched_job *job);
>> +void xe_engine_snapshot_capture_for_queue(struct xe_exec_queue *q);
>>   void xe_guc_capture_steered_list_init(struct xe_guc *guc);
>>   void xe_guc_capture_put_matched_nodes(struct xe_guc *guc);
>>   int xe_guc_capture_init(struct xe_guc *guc);
>> diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/ 
>> xe_guc_submit.c
>> index 974c7af7064d..3df8543deec1 100644
>> --- a/drivers/gpu/drm/xe/xe_guc_submit.c
>> +++ b/drivers/gpu/drm/xe/xe_guc_submit.c
>> @@ -1066,13 +1066,13 @@ guc_exec_queue_timedout_job(struct 
>> drm_sched_job *drm_job)
>>        * do manual capture first and decide later if we need to use it
>>        */
>>       if (!exec_queue_killed(q) && !xe->devcoredump.captured &&
>> -        !xe_guc_capture_get_matching_and_lock(job)) {
>> +        !xe_guc_capture_get_matching_and_lock(q)) {
>>           /* take force wake before engine register manual capture */
>>           fw_ref = xe_force_wake_get(gt_to_fw(q->gt), XE_FORCEWAKE_ALL);
>>           if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL))
>>               xe_gt_info(q->gt, "failed to get forcewake for coredump 
>> capture\n");
>> -        xe_engine_snapshot_capture_for_job(job);
>> +        xe_engine_snapshot_capture_for_queue(q);
>>           xe_force_wake_put(gt_to_fw(q->gt), fw_ref);
>>       }
>> diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/ 
>> xe_hw_engine.c
>> index 1557acee3523..0cfa2b9282be 100644
>> --- a/drivers/gpu/drm/xe/xe_hw_engine.c
>> +++ b/drivers/gpu/drm/xe/xe_hw_engine.c
>> @@ -829,7 +829,7 @@ void xe_hw_engine_handle_irq(struct xe_hw_engine 
>> *hwe, u16 intr_vec)
>>   /**
>>    * xe_hw_engine_snapshot_capture - Take a quick snapshot of the HW 
>> Engine.
>>    * @hwe: Xe HW Engine.
>> - * @job: The job object.
>> + * @q: The exec queue object.
>>    *
>>    * This can be printed out in a later stage like during dev_coredump
>>    * analysis.
>> @@ -838,7 +838,7 @@ void xe_hw_engine_handle_irq(struct xe_hw_engine 
>> *hwe, u16 intr_vec)
>>    * caller, using `xe_hw_engine_snapshot_free`.
>>    */
>>   struct xe_hw_engine_snapshot *
>> -xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe, struct 
>> xe_sched_job *job)
>> +xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe, struct 
>> xe_exec_queue *q)
>>   {
>>       struct xe_hw_engine_snapshot *snapshot;
>>       struct __guc_capture_parsed_output *node;
>> @@ -864,9 +864,9 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine 
>> *hwe, struct xe_sched_job *job
>>       if (IS_SRIOV_VF(gt_to_xe(hwe->gt)))
>>           return snapshot;
>> -    if (job) {
>> +    if (q) {
>>           /* If got guc capture, set source to GuC */
>> -        node = xe_guc_capture_get_matching_and_lock(job);
>> +        node = xe_guc_capture_get_matching_and_lock(q);
>>           if (node) {
>>               struct xe_device *xe = gt_to_xe(hwe->gt);
>>               struct xe_devcoredump *coredump = &xe->devcoredump;
>> diff --git a/drivers/gpu/drm/xe/xe_hw_engine.h b/drivers/gpu/drm/xe/ 
>> xe_hw_engine.h
>> index da0a6922a26f..6b5f9fa2a594 100644
>> --- a/drivers/gpu/drm/xe/xe_hw_engine.h
>> +++ b/drivers/gpu/drm/xe/xe_hw_engine.h
>> @@ -11,7 +11,7 @@
>>   struct drm_printer;
>>   struct drm_xe_engine_class_instance;
>>   struct xe_device;
>> -struct xe_sched_job;
>> +struct xe_exec_queue;
>>   #ifdef CONFIG_DRM_XE_JOB_TIMEOUT_MIN
>>   #define XE_HW_ENGINE_JOB_TIMEOUT_MIN CONFIG_DRM_XE_JOB_TIMEOUT_MIN
>> @@ -56,7 +56,7 @@ void xe_hw_engine_enable_ring(struct xe_hw_engine 
>> *hwe);
>>   u32 xe_hw_engine_mask_per_class(struct xe_gt *gt,
>>                   enum xe_engine_class engine_class);
>>   struct xe_hw_engine_snapshot *
>> -xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe, struct 
>> xe_sched_job *job);
>> +xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe, struct 
>> xe_exec_queue *q);
>>   void xe_hw_engine_snapshot_free(struct xe_hw_engine_snapshot 
>> *snapshot);
>>   void xe_hw_engine_print(struct xe_hw_engine *hwe, struct drm_printer 
>> *p);
>>   void xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe);
> 



More information about the Intel-xe mailing list