[PATCH v1 6/7] drm/xe/vf: Rebase MEMIRQ structures for all contexts after migration

Thu May 15 22:07:47 UTC 2025

On 14.05.2025 22:03, Michal Wajdeczko wrote:
>
> On 14.05.2025 00:49, Tomasz Lis wrote:
>> All contexts require an update of state data, as the data includes
>> GGTT references to memirq-related buffers.
>>
>> Default contexts need these references updated as well, because they
>> are not refreshed when a new context is created from them.
>>
>> Signed-off-by: Tomasz Lis<tomasz.lis at intel.com>
>> ---
>>   drivers/gpu/drm/xe/xe_lrc.c      | 41 ++++++++++++++++++++++++++++++++
>>   drivers/gpu/drm/xe/xe_lrc.h      |  2 ++
>>   drivers/gpu/drm/xe/xe_sriov_vf.c | 17 +++++++++++--
>>   3 files changed, 58 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
>> index 43e1c18e1769..5a7f0077ef31 100644
>> --- a/drivers/gpu/drm/xe/xe_lrc.c
>> +++ b/drivers/gpu/drm/xe/xe_lrc.c
>> @@ -898,6 +898,47 @@ static void *empty_lrc_data(struct xe_hw_engine *hwe)
>>   	return data;
>>   }
>>   
>> +/**
>> + * xe_default_lrc_update_memirq_regs_with_address - Re-compute GGTT references in default LRC
>> + *   of given engine.
>> + * @hwe: the &xe_hw_engine struct instance
>> + */
>> +void xe_default_lrc_update_memirq_regs_with_address(struct xe_hw_engine *hwe)
>> +{
>> +	struct xe_gt *gt = hwe->gt;
>> +	u32 *regs;
>> +
>> +	if (!gt->default_lrc[hwe->class])
>> +		return;
>> +
>> +	regs = gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE;
>> +	set_memory_based_intr(regs, hwe);
>> +}
>> +
>> +/**
>> + * xe_lrc_update_memirq_regs_with_address - Re-compute GGTT references in mem interrupt data
>> + *   for given LRC.
>> + * @hwe: the &xe_hw_engine struct instance
>> + * @lrc: the &xe_lrc struct instance
>> + */
>> +void xe_lrc_update_memirq_regs_with_address(struct xe_hw_engine *hwe, struct xe_lrc *lrc)
>> +{
>> +	struct xe_gt *gt = hwe->gt;
>> +	struct iosys_map map;
>> +	size_t regs_len;
>> +	u32 *regs;
>> +
>> +	map = __xe_lrc_regs_map(lrc);
>> +	regs_len = lrc_reg_size(gt_to_xe(gt));
>> +	regs = kzalloc(regs_len, GFP_ATOMIC);
>> +	if (!regs)
>> +		return;
> no error ? but recovery will be now broken, no?

If there is a problem with allocating even 300 bytes, then something 
definitely will be broken.

We used `|GFP_ATOMIC`allocation, which in case of quick way failing has 
an expensive callback capable of using reserves. If that is failing then 
the system must have encountered something really bad. GFX recovery 
won't be the only effect.
|

>> +	xe_map_memcpy_from(gt_to_xe(gt), regs, &map, 0, regs_len);
>> +	set_memory_based_intr(regs, hwe);
>> +	xe_map_memcpy_to(gt_to_xe(gt), &map, 0, regs, regs_len);
>> +	kfree(regs);
> maybe instead of this alloc + RMW + free just update:
>
> 	[CTX_INT_MASK_ENABLE_PTR]
> 	[CTX_INT_STATUS_REPORT_PTR]
> 	[CTX_INT_SRC_REPORT_PTR]
>
> using 3x xe_lrc_write_ctx_reg() like it was done in patch 5/7 ?

Ok, we have a precedence for that in `xe_lrc_init()` (so we're setting 
the values twice there). Avoiding this unnecessary allocation and copy 
makes up for slight duplication, plus the final code should still be 
shorter. Good suggestion.

-Tomasz

>> +}
>> +
>>   static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm)
>>   {
>>   	u64 desc = xe_vm_pdp4_descriptor(vm, gt_to_tile(lrc->gt));
>> diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h
>> index e7a99cfd0abe..3f0ae3affafe 100644
>> --- a/drivers/gpu/drm/xe/xe_lrc.h
>> +++ b/drivers/gpu/drm/xe/xe_lrc.h
>> @@ -89,6 +89,8 @@ u32 xe_lrc_indirect_ring_ggtt_addr(struct xe_lrc *lrc);
>>   u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc);
>>   u32 *xe_lrc_regs(struct xe_lrc *lrc);
>>   void xe_lrc_update_hwctx_regs_with_address(struct xe_lrc *lrc);
>> +void xe_default_lrc_update_memirq_regs_with_address(struct xe_hw_engine *hwe);
>> +void xe_lrc_update_memirq_regs_with_address(struct xe_hw_engine *hwe, struct xe_lrc *lrc);
>>   
>>   u32 xe_lrc_read_ctx_reg(struct xe_lrc *lrc, int reg_nr);
>>   void xe_lrc_write_ctx_reg(struct xe_lrc *lrc, int reg_nr, u32 val);
>> diff --git a/drivers/gpu/drm/xe/xe_sriov_vf.c b/drivers/gpu/drm/xe/xe_sriov_vf.c
>> index 016faa29cddd..c08c44dbd383 100644
>> --- a/drivers/gpu/drm/xe/xe_sriov_vf.c
>> +++ b/drivers/gpu/drm/xe/xe_sriov_vf.c
>> @@ -225,12 +225,23 @@ static int vf_post_migration_requery_guc(struct xe_device *xe)
>>   	return ret;
>>   }
>>   
>> +static void xe_gt_default_lrcs_hwsp_rebase(struct xe_gt *gt)
>> +{
>> +	struct xe_hw_engine *hwe;
>> +	enum xe_hw_engine_id id;
>> +
>> +	for_each_hw_engine(hwe, gt, id)
>> +		xe_default_lrc_update_memirq_regs_with_address(hwe);
>> +}
>> +
>>   static void xe_exec_queue_contexts_hwsp_rebase(struct xe_exec_queue *eq)
>>   {
>>   	int i;
>>   
>> -	for (i = 0; i < eq->width; ++i)
>> +	for (i = 0; i < eq->width; ++i) {
>> +		xe_lrc_update_memirq_regs_with_address(eq->hwe, eq->lrc[i]);
>>   		xe_lrc_update_hwctx_regs_with_address(eq->lrc[i]);
>> +	}
>>   }
>>   
>>   static void xe_guc_contexts_hwsp_rebase(struct xe_guc *guc)
>> @@ -249,8 +260,10 @@ static void vf_post_migration_fixup_contexts(struct xe_device *xe)
>>   	struct xe_gt *gt;
>>   	unsigned int id;
>>   
>> -	for_each_gt(gt, xe, id)
>> +	for_each_gt(gt, xe, id) {
>> +		xe_gt_default_lrcs_hwsp_rebase(gt);
>>   		xe_guc_contexts_hwsp_rebase(&gt->uc.guc);
>> +	}
>>   }
>>   
>>   static void vf_post_migration_fixup_ctb(struct xe_device *xe)
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/intel-xe/attachments/20250516/b22a8cc1/attachment-0001.htm>