[PATCH v2 3/7] drm/xe/vf: Pause submissions during RESFIX fixups

Mon May 19 23:07:46 UTC 2025

On 16.05.2025 19:30, Michal Wajdeczko wrote:
>
> On 16.05.2025 00:18, Tomasz Lis wrote:
>> While applying post-migration fixups to VF, GuC will not respond
>> to any commands. This means submissions have no way of finishing.
>>
>> To avoid acquiring additional resources and then stalling
>> on hardware access, pause the submission work. This will
>> decrease the chance of depleting resources, and speed up
>> the recovery.
>>
>> v2: Commented xe_irq_resume() call
>>
>> Signed-off-by: Tomasz Lis <tomasz.lis at intel.com>
>> Cc: Michal Wajdeczko <michal.wajdeczko at intel.com>
>> ---
>>   drivers/gpu/drm/xe/xe_gpu_scheduler.c | 13 +++++++++
>>   drivers/gpu/drm/xe/xe_gpu_scheduler.h |  1 +
>>   drivers/gpu/drm/xe/xe_guc_submit.c    | 35 ++++++++++++++++++++++
>>   drivers/gpu/drm/xe/xe_guc_submit.h    |  2 ++
>>   drivers/gpu/drm/xe/xe_sriov_vf.c      | 42 +++++++++++++++++++++++++++
>>   5 files changed, 93 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler.c b/drivers/gpu/drm/xe/xe_gpu_scheduler.c
>> index 869b43a4151d..455ccaf17314 100644
>> --- a/drivers/gpu/drm/xe/xe_gpu_scheduler.c
>> +++ b/drivers/gpu/drm/xe/xe_gpu_scheduler.c
>> @@ -101,6 +101,19 @@ void xe_sched_submission_stop(struct xe_gpu_scheduler *sched)
>>   	cancel_work_sync(&sched->work_process_msg);
>>   }
>>   
>> +/**
>> + * xe_sched_submission_stop_async - Stop further runs of submission tasks on a scheduler.
>> + * @sched: the &xe_gpu_scheduler struct instance
>> + *
>> + * This call disables further runs of scheduling work queue. It does not wait
>> + * for any in-progress runs to finish, only makes sure no further runs happen
>> + * afterwards.
>> + */
>> +void xe_sched_submission_stop_async(struct xe_gpu_scheduler *sched)
>> +{
>> +	drm_sched_wqueue_stop(&sched->base);
>> +}
>> +
>>   void xe_sched_submission_resume_tdr(struct xe_gpu_scheduler *sched)
>>   {
>>   	drm_sched_resume_timeout(&sched->base, sched->base.timeout);
>> diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler.h b/drivers/gpu/drm/xe/xe_gpu_scheduler.h
>> index c250ea773491..d78b4e8203f9 100644
>> --- a/drivers/gpu/drm/xe/xe_gpu_scheduler.h
>> +++ b/drivers/gpu/drm/xe/xe_gpu_scheduler.h
>> @@ -21,6 +21,7 @@ void xe_sched_fini(struct xe_gpu_scheduler *sched);
>>   
>>   void xe_sched_submission_start(struct xe_gpu_scheduler *sched);
>>   void xe_sched_submission_stop(struct xe_gpu_scheduler *sched);
>> +void xe_sched_submission_stop_async(struct xe_gpu_scheduler *sched);
>>   
>>   void xe_sched_submission_resume_tdr(struct xe_gpu_scheduler *sched);
>>   
>> diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
>> index 80f748baad3f..6f280333de13 100644
>> --- a/drivers/gpu/drm/xe/xe_guc_submit.c
>> +++ b/drivers/gpu/drm/xe/xe_guc_submit.c
>> @@ -1811,6 +1811,19 @@ void xe_guc_submit_stop(struct xe_guc *guc)
>>   
>>   }
>>   
>> +/**
>> + * xe_guc_submit_pause - Stop further runs of submission tasks on given GuC.
>> + * @guc: the &xe_guc struct instance whose scheduler is to be disabled
>> + */
>> +void xe_guc_submit_pause(struct xe_guc *guc)
>> +{
>> +	struct xe_exec_queue *q;
>> +	unsigned long index;
>> +
>> +	xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
>> +		xe_sched_submission_stop_async(&q->guc->sched);
>> +}
>> +
>>   static void guc_exec_queue_start(struct xe_exec_queue *q)
>>   {
>>   	struct xe_gpu_scheduler *sched = &q->guc->sched;
>> @@ -1851,6 +1864,28 @@ int xe_guc_submit_start(struct xe_guc *guc)
>>   	return 0;
>>   }
>>   
>> +static void guc_exec_queue_unpause(struct xe_exec_queue *q)
>> +{
>> +	struct xe_gpu_scheduler *sched = &q->guc->sched;
>> +
>> +	xe_sched_submission_start(sched);
>> +}
>> +
>> +/**
>> + * xe_guc_submit_unpause - Allow further runs of submission tasks on given GuC.
>> + * @guc: the &xe_guc struct instance whose scheduler is to be enabled
>> + */
>> +void xe_guc_submit_unpause(struct xe_guc *guc)
>> +{
>> +	struct xe_exec_queue *q;
>> +	unsigned long index;
>> +
>> +	xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
>> +		guc_exec_queue_unpause(q);
>> +
>> +	wake_up_all(&guc->ct.wq);
>> +}
>> +
>>   static struct xe_exec_queue *
>>   g2h_exec_queue_lookup(struct xe_guc *guc, u32 guc_id)
>>   {
>> diff --git a/drivers/gpu/drm/xe/xe_guc_submit.h b/drivers/gpu/drm/xe/xe_guc_submit.h
>> index 9b71a986c6ca..f1cf271492ae 100644
>> --- a/drivers/gpu/drm/xe/xe_guc_submit.h
>> +++ b/drivers/gpu/drm/xe/xe_guc_submit.h
>> @@ -18,6 +18,8 @@ int xe_guc_submit_reset_prepare(struct xe_guc *guc);
>>   void xe_guc_submit_reset_wait(struct xe_guc *guc);
>>   void xe_guc_submit_stop(struct xe_guc *guc);
>>   int xe_guc_submit_start(struct xe_guc *guc);
>> +void xe_guc_submit_pause(struct xe_guc *guc);
>> +void xe_guc_submit_unpause(struct xe_guc *guc);
>>   void xe_guc_submit_wedge(struct xe_guc *guc);
>>   
>>   int xe_guc_read_stopped(struct xe_guc *guc);
>> diff --git a/drivers/gpu/drm/xe/xe_sriov_vf.c b/drivers/gpu/drm/xe/xe_sriov_vf.c
>> index 099a395fbf59..f0d6abedd126 100644
>> --- a/drivers/gpu/drm/xe/xe_sriov_vf.c
>> +++ b/drivers/gpu/drm/xe/xe_sriov_vf.c
>> @@ -11,6 +11,8 @@
>>   #include "xe_gt_sriov_printk.h"
>>   #include "xe_gt_sriov_vf.h"
>>   #include "xe_guc_ct.h"
>> +#include "xe_guc_submit.h"
>> +#include "xe_irq.h"
>>   #include "xe_pm.h"
>>   #include "xe_sriov.h"
>>   #include "xe_sriov_printk.h"
>> @@ -134,6 +136,44 @@ void xe_sriov_vf_init_early(struct xe_device *xe)
>>   	INIT_WORK(&xe->sriov.vf.migration.worker, migration_worker_func);
>>   }
>>   
>> +/**
>> + * vf_post_migration_shutdown - Stop the driver activities after VF migration.
>> + * @xe: the &xe_device struct instance
>> + *
>> + * After this VM is migrated and assigned to a new VF, it is running on a new
>> + * hardware, and therefore many hardware-dependent states and related structures
>> + * require fixups. Without fixups, the hardware cannot do any work, and therefore
>> + * all GPU pipelines are stalled.
>> + * Stop some of kernel acivities to make the fixup process faster.
> typo acivities
ack
>> + */
>> +static void vf_post_migration_shutdown(struct xe_device *xe)
>> +{
>> +	struct xe_gt *gt;
>> +	unsigned int id;
>> +
>> +	for_each_gt(gt, xe, id)
>> +		xe_guc_submit_pause(&gt->uc.guc);
>> +}
>> +
>> +/**
>> + * vf_post_migration_kickstart - Re-start the driver activities under new hardware.
>> + * @xe: the &xe_device struct instance
>> + *
>> + * After we have finished with all post-migration fixups, restart the driver
>> + * activities to continue feeding the GPU with workloads.
>> + */
>> +static void vf_post_migration_kickstart(struct xe_device *xe)
>> +{
>> +	struct xe_gt *gt;
>> +	unsigned int id;
>> +
>> +	/* make sure interrupts on the new HW are properly set */
>> +	xe_irq_resume(xe);
> hmm, it still looks unbalanced when compared to shutdown
> don't we need xe_irq_suspend() there?
No, the idea is not to stop interrupts on shutdown. We shouldn't receive 
any as it's new hardware, so disabling it would bring no change. And in 
that corner scenario where GuC is running and we could receive them, 
it's better if they are handled - actual work is done outside of IRQs 
anyway, and these should be blocked. As an example, no IRQs would mean 
we get information about a 2nd migration only after recovery of the 1st 
is done. But the first will fail if GGTT range changed again - that's 
why we have the "defer" mechanism.
> also IIRC the whole recovery starts due to a MIGRATED IRQ event, so
> interrupts had to be already working, no?

The GuC IRQ had to work. For the rest - they probably do as well, but..

There's a reason we have a very specific IRQ support routine in bspec. 
The restore process should've restored both MMIO and MEMIRQ areas 
configuring interrupts, but it did that through blitting, which is not 
what the procedure says. In the past, we did had issues with IRQ state  
- while we fixed it by adding such re-enable (and a 2nd re-enable on PF 
side) before we even had MEMIRQ support, it's possible we'd run into 
issues with MEMIRQ as well.

So, for MMIO interrupts (which we never actually use for VFs on Xe) this 
is proven to be required. For MEMIRQs, it's not proven, but it' a matter 
of adhering to sequences given to us by HW teams.

>> +
>> +	for_each_gt(gt, xe, id)
>> +		xe_guc_submit_unpause(&gt->uc.guc);
>> +}
>> +
>>   /**
>>    * xe_sriov_vf_post_migration_reset_guc_state - Reset VF state in all GuCs.
>>    * @xe: the &xe_device struct instance
>> @@ -247,6 +287,7 @@ static void vf_post_migration_recovery(struct xe_device *xe)
>>   
>>   	drm_dbg(&xe->drm, "migration recovery in progress\n");
>>   	xe_pm_runtime_get(xe);
>> +	vf_post_migration_shutdown(xe);
>>   	err = vf_post_migration_requery_guc(xe);
>>   	if (vf_post_migration_imminent(xe))
>>   		goto defer;
>> @@ -258,6 +299,7 @@ static void vf_post_migration_recovery(struct xe_device *xe)
>>   	if (need_fixups)
>>   		vf_post_migration_fixup_ctb(xe);
>>   
>> +	vf_post_migration_kickstart(xe);
> since above call will, as you said in comment above, start "feeding the
> GPU with workloads", shouldn't we do this step _after_ confirming RESFIX
> below and thus truly unblocking the VF submission on the GuC side?

What is the benefit of the reordered code over this one?

We start feeding the GuC with new data before it starts to consume it, 
yes. But the only restriction we have is for these to be close enough to 
not influence timeouts.

Will the reordered code run faster? I don't think so, the difference is 
non-measurable. Will it better handle some scenario? I can't say I see 
any. Low resources, maybe?

On the other side, if interrupts are not fully working, or anything else 
is not ready, then sending the resfix_done requires additional prepare 
function. The kickstart is a good candidate to place any such code.

-Tomasz

>>   	vf_post_migration_notify_resfix_done(xe);
>>   	xe_pm_runtime_put(xe);
>>   	drm_notice(&xe->drm, "migration recovery ended\n");