[PATCH v2 1/4] drm/xe/vf: React to MIGRATED interrupt

Thu Sep 26 21:22:03 UTC 2024

On 26.09.2024 16:05, Michal Wajdeczko wrote:
>
> On 24.09.2024 22:25, Tomasz Lis wrote:
>> To properly support VF Save/Restore procedure, fixups need to be
>> applied after PF driver finishes its part of VF Restore. Those
>> fixups are applied by the VF driver within a VM.
>>
>> A VF driver gets informed that it was migrated by receiving an
>> interrupt from each GuC. That should be the trigger for fixups.
>>
>> The VF can safely do post-migration fixups on resources associated
>> to each GuC only after that GuC issued the MIGRATED interrupt.
>>
>> This change introduces a worker to be used for post-migration fixups,
>> and a mechanism to schedule said worker when all GuCs sent the irq.
>>
>> v2: renamed and moved functions, updated logged messages, removed
>>    unused includes, used anon struct (mwajdeczko)
>>
>> Signed-off-by: Tomasz Lis <tomasz.lis at intel.com>
>> ---
>>   drivers/gpu/drm/xe/Makefile          |  1 +
>>   drivers/gpu/drm/xe/xe_device_types.h |  2 +
>>   drivers/gpu/drm/xe/xe_gt_sriov_vf.c  | 11 +++++
>>   drivers/gpu/drm/xe/xe_gt_sriov_vf.h  |  2 +
>>   drivers/gpu/drm/xe/xe_guc.c          | 11 +++++
>>   drivers/gpu/drm/xe/xe_memirq.c       |  3 ++
>>   drivers/gpu/drm/xe/xe_sriov.c        |  4 ++
>>   drivers/gpu/drm/xe/xe_sriov_types.h  | 10 ++++
>>   drivers/gpu/drm/xe/xe_sriov_vf.c     | 73 ++++++++++++++++++++++++++++
>>   drivers/gpu/drm/xe/xe_sriov_vf.h     | 14 ++++++
>>   10 files changed, 131 insertions(+)
>>   create mode 100644 drivers/gpu/drm/xe/xe_sriov_vf.c
>>   create mode 100644 drivers/gpu/drm/xe/xe_sriov_vf.h
>>
>> diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
>> index 8f1c5c329f79..1f07983bb865 100644
>> --- a/drivers/gpu/drm/xe/Makefile
>> +++ b/drivers/gpu/drm/xe/Makefile
>> @@ -123,6 +123,7 @@ xe-y += \
>>   	xe_gt_sriov_vf.o \
>>   	xe_guc_relay.o \
>>   	xe_memirq.o \
>> +	xe_sriov_vf.o \
> wrong order
ack
>
>>   	xe_sriov.o
>>   
>>   xe-$(CONFIG_PCI_IOV) += \
>> diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
>> index 484fb34dde98..07ff7554fc3e 100644
>> --- a/drivers/gpu/drm/xe/xe_device_types.h
>> +++ b/drivers/gpu/drm/xe/xe_device_types.h
>> @@ -374,6 +374,8 @@ struct xe_device {
>>   
>>   		/** @sriov.pf: PF specific data */
>>   		struct xe_device_pf pf;
>> +		/** @sriov.vf: VF specific data */
>> +		struct xe_device_vf vf;
>>   
>>   		/** @sriov.wq: workqueue used by the virtualization workers */
>>   		struct workqueue_struct *wq;
>> diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
>> index d3baba50f085..489ace57c2a1 100644
>> --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
>> +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
>> @@ -27,6 +27,7 @@
>>   #include "xe_guc_relay.h"
>>   #include "xe_mmio.h"
>>   #include "xe_sriov.h"
>> +#include "xe_sriov_vf.h"
>>   #include "xe_uc_fw.h"
>>   #include "xe_wopcm.h"
>>   
>> @@ -692,6 +693,16 @@ int xe_gt_sriov_vf_connect(struct xe_gt *gt)
>>   	return err;
>>   }
>>   
>> +void xe_gt_sriov_vf_migrated_event_handler(struct xe_gt *gt)
> this is public function, so you should add kernel-doc for it
ack
>
>> +{
>> +	struct xe_device *xe = gt_to_xe(gt);
>> +
> since below you are accessing VF specific fields, you may want to add:
>
> 	xe_gt_assert(gt, IS_SRIOV_VF(xe));
ack
>
>> +	set_bit(gt->info.id, &xe->sriov.vf.migration.gt_flags);
>> +	smp_mb__after_atomic();
>> +	xe_gt_sriov_info(gt, "VF migration recovery ready\n");
> the xe_gt_sriov_info() will already add "VF:" prefix for you:
>
> 	xe_gt_sriov_info(gt, "ready for recovery after migration\n");
>
> [ ] xe 0000:00:02.0: [drm] GT0: VF: ready for recovery after migration
ok
>
>> +	xe_sriov_vf_start_migration_recovery(xe);
>> +}
>> +
>>   static bool vf_is_negotiated(struct xe_gt *gt, u16 major, u16 minor)
>>   {
>>   	xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
>> diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.h b/drivers/gpu/drm/xe/xe_gt_sriov_vf.h
>> index e541ce57bec2..544e913b8465 100644
>> --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.h
>> +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.h
>> @@ -18,6 +18,8 @@ int xe_gt_sriov_vf_connect(struct xe_gt *gt);
>>   int xe_gt_sriov_vf_query_runtime(struct xe_gt *gt);
>>   int xe_gt_sriov_vf_prepare_ggtt(struct xe_gt *gt);
>>   
>> +void xe_gt_sriov_vf_migrated_event_handler(struct xe_gt *gt);
>> +
>>   u32 xe_gt_sriov_vf_gmdid(struct xe_gt *gt);
>>   u16 xe_gt_sriov_vf_guc_ids(struct xe_gt *gt);
>>   u64 xe_gt_sriov_vf_lmem(struct xe_gt *gt);
>> diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
>> index b6cd5e941f19..05aa93070e5a 100644
>> --- a/drivers/gpu/drm/xe/xe_guc.c
>> +++ b/drivers/gpu/drm/xe/xe_guc.c
>> @@ -1092,10 +1092,21 @@ int xe_guc_self_cfg64(struct xe_guc *guc, u16 key, u64 val)
>>   	return guc_self_cfg(guc, key, 2, val);
>>   }
>>   
>> +static void xe_guc_sw_0_irq_handler(struct xe_guc *guc)
>> +{
>> +	struct xe_gt *gt = guc_to_gt(guc);
>> +
>> +	if (IS_SRIOV_VF(gt_to_xe(gt)))
>> +		xe_gt_sriov_vf_migrated_event_handler(gt);
>> +}
>> +
>>   void xe_guc_irq_handler(struct xe_guc *guc, const u16 iir)
>>   {
>>   	if (iir & GUC_INTR_GUC2HOST)
>>   		xe_guc_ct_irq_handler(&guc->ct);
>> +
>> +	if (iir & GUC_INTR_SW_INT_0)
>> +		xe_guc_sw_0_irq_handler(guc);
>>   }
>>   
>>   void xe_guc_sanitize(struct xe_guc *guc)
>> diff --git a/drivers/gpu/drm/xe/xe_memirq.c b/drivers/gpu/drm/xe/xe_memirq.c
>> index 3f8d4ca64302..2d2f40378942 100644
>> --- a/drivers/gpu/drm/xe/xe_memirq.c
>> +++ b/drivers/gpu/drm/xe/xe_memirq.c
>> @@ -435,6 +435,9 @@ static void memirq_dispatch_guc(struct xe_memirq *memirq, struct iosys_map *stat
>>   
>>   	if (memirq_received(memirq, status, ilog2(GUC_INTR_GUC2HOST), name))
>>   		xe_guc_irq_handler(guc, GUC_INTR_GUC2HOST);
>> +
>> +	if (memirq_received(memirq, status, ilog2(GUC_INTR_SW_INT_0), name))
>> +		xe_guc_irq_handler(guc, GUC_INTR_SW_INT_0);
>>   }
>>   
>>   /**
>> diff --git a/drivers/gpu/drm/xe/xe_sriov.c b/drivers/gpu/drm/xe/xe_sriov.c
>> index 69a066ef20c0..2f4c3ae24f6e 100644
>> --- a/drivers/gpu/drm/xe/xe_sriov.c
>> +++ b/drivers/gpu/drm/xe/xe_sriov.c
>> @@ -12,6 +12,7 @@
>>   #include "xe_mmio.h"
>>   #include "xe_sriov.h"
>>   #include "xe_sriov_pf.h"
>> +#include "xe_sriov_vf.h"
>>   
>>   /**
>>    * xe_sriov_mode_to_string - Convert enum value to string.
>> @@ -112,6 +113,9 @@ int xe_sriov_init(struct xe_device *xe)
>>   			return err;
>>   	}
>>   
>> +	if (IS_SRIOV_VF(xe))
>> +		xe_sriov_vf_init_early(xe);
>> +
>>   	xe_assert(xe, !xe->sriov.wq);
>>   	xe->sriov.wq = alloc_workqueue("xe-sriov-wq", 0, 0);
>>   	if (!xe->sriov.wq)
>> diff --git a/drivers/gpu/drm/xe/xe_sriov_types.h b/drivers/gpu/drm/xe/xe_sriov_types.h
>> index c7b7ad4af5c8..8c518e68d33a 100644
>> --- a/drivers/gpu/drm/xe/xe_sriov_types.h
>> +++ b/drivers/gpu/drm/xe/xe_sriov_types.h
>> @@ -56,4 +56,14 @@ struct xe_device_pf {
>>   	struct mutex master_lock;
>>   };
>>   
>> +struct xe_device_vf {
> you need to add kernel-doc for the struct, otherwise below doc will be
> ignored (now) or wrongly interpreted (in near future)
will do
>
>> +	/** @migration: VF Migration state data */
>> +	struct {
>> +		/** @migration.worker: VF migration recovery worker */
>> +		struct work_struct worker;
>> +		/** @migration.gt_flags: Per-GT request flags for VF migration recovery */
> hmm, if something is per-GT then maybe it should be defined under struct
> xe_gt in the xe_gt_sriov_vf_types.h as
>
> 	struct xe_gt_sriov_vf {
>
> 	/** @migration.ready: ready for VF migration recovery */
> 	bool ready;

So there seem to be two ways of doing this: keeping the flags together, 
or distributing between GTs.

Keeping together restricts the amount of supported GTs, but that's a 
lesser argument.

Separating consumes more space, but that's also a lesser argument.

There is no strong reason to go either way. I kept the flags together to 
allow atomic clearing of all of them.

-Tomasz

>
>> +		unsigned long gt_flags;
>> +	} migration;
>> +};
>> +
>>   #endif
>> diff --git a/drivers/gpu/drm/xe/xe_sriov_vf.c b/drivers/gpu/drm/xe/xe_sriov_vf.c
>> new file mode 100644
>> index 000000000000..0e332bb215fd
>> --- /dev/null
>> +++ b/drivers/gpu/drm/xe/xe_sriov_vf.c
>> @@ -0,0 +1,73 @@
>> +// SPDX-License-Identifier: MIT
>> +/*
>> + * Copyright © 2023-2024 Intel Corporation
>> + */
>> +
>> +#include <drm/drm_managed.h>
>> +
>> +#include "xe_assert.h"
>> +#include "xe_device.h"
>> +#include "xe_sriov.h"
>> +#include "xe_sriov_vf.h"
>> +#include "xe_sriov_printk.h"
>> +
>> +static void migration_worker_func(struct work_struct *w);
>> +
>> +/**
>> + * xe_sriov_vf_init_early - Initialize SR-IOV VF specific data.
>> + * @xe: the &xe_device to initialize
>> + */
>> +void xe_sriov_vf_init_early(struct xe_device *xe)
>> +{
>> +	INIT_WORK(&xe->sriov.vf.migration.worker, migration_worker_func);
>> +}
>> +
>> +static void vf_post_migration_recovery(struct xe_device *xe)
>> +{
>> +	drm_dbg(&xe->drm, "migration recovery in progress\n");
>> +	/* FIXME: add the recovery steps */
>> +	drm_notice(&xe->drm, "migration recovery ended\n");
>> +}
>> +
>> +static void migration_worker_func(struct work_struct *w)
>> +{
>> +	struct xe_device *xe = container_of(w, struct xe_device,
>> +					    sriov.vf.migration.worker);
>> +
>> +	vf_post_migration_recovery(xe);
>> +}
>> +
>> +static bool vf_ready_to_recovery_on_all_tiles(struct xe_device *xe)
> you likely want to mean "on all GTs", right?
ack
>
>> +{
>> +	struct xe_gt *gt;
>> +	unsigned int id;
>> +
>> +	for_each_gt(gt, xe, id) {
>> +		if (!test_bit(id, &xe->sriov.vf.migration.gt_flags))
> 		if (!gt->sriov.vf.migration.ready) ?
I'm not that sure about changing the flags into separated bools (see 
earlier comment).
>
>> +			return false;
> as you're testing each GT separately, then maybe it's worth to add
>
> 	xe_gt_sriov_dbg_verbose(gt, "still not ready to recover\n");
ack. Yeah, I originally had a message there.
>
>> +	}
>> +	return true;
>> +}
>> +
>> +/**
>> + * xe_sriov_vf_start_migration_recovery - Start VF migration recovery.
>> + * @xe: the &xe_device to start recovery on
>> + *
>> + * This function shall be called only by VF.
>> + */
>> +void xe_sriov_vf_start_migration_recovery(struct xe_device *xe)
>> +{
>> +	bool started;
>> +
>> +	xe_assert(xe, IS_SRIOV_VF(xe));
>> +
>> +	if (!vf_ready_to_recovery_on_all_tiles(xe))
>> +		return;
>> +
>> +	WRITE_ONCE(xe->sriov.vf.migration.gt_flags, 0);
>> +	smp_mb();
> IIRC each use of mb requires documentation of "why"

That's true, but I don't really know how to comment that other than 
citing the mb docs, ie:

/* make sure all workers see the flags change */

-Tomasz

>
>> +
>> +	started = queue_work(xe->sriov.wq, &xe->sriov.vf.migration.worker);
>> +	drm_info(&xe->drm, "VF migration recovery %s\n", started ?
>> +		 "scheduled" : "already in progress");
>> +}
>> diff --git a/drivers/gpu/drm/xe/xe_sriov_vf.h b/drivers/gpu/drm/xe/xe_sriov_vf.h
>> new file mode 100644
>> index 000000000000..7b8622cff2b7
>> --- /dev/null
>> +++ b/drivers/gpu/drm/xe/xe_sriov_vf.h
>> @@ -0,0 +1,14 @@
>> +/* SPDX-License-Identifier: MIT */
>> +/*
>> + * Copyright © 2023-2024 Intel Corporation
>> + */
>> +
>> +#ifndef _XE_SRIOV_VF_H_
>> +#define _XE_SRIOV_VF_H_
>> +
>> +struct xe_device;
>> +
>> +void xe_sriov_vf_init_early(struct xe_device *xe);
>> +void xe_sriov_vf_start_migration_recovery(struct xe_device *xe);
>> +
>> +#endif