[PATCH v2 3/3] drm/xe/vf: Fixup CTB send buffer messages after migration

Sat Nov 23 02:49:41 UTC 2024

On 16.11.2024 21:01, Michal Wajdeczko wrote:
> On 16.11.2024 03:12, Tomasz Lis wrote:
>> During post-migration recovery of a VF, it in necessary to update
>> GGTT references included in messages which are going to be sent
>> to GuC. GuC will start consuming messages after VF KMD will inform
>> it about fixups being done; before that, the VF KMD is expected
>> to update any H2G messages which are already in send buffer but
>> were not consumed by GuC.
>>
>> Only a small subset of messages allowed for VFs have GGTT references
>> in them. This patch adds the functionality to parse the CTB send
>> ring buffer and shift addresses contained within.
>>
>> While fixing the CTB content, ct->lock is not taken. This means
>> the only barier taken remains GGTT address lock - which is ok,
> typo barrier
ack
>> because only requests with GGTT addresses matter, but it also means
>> tail changes can happen during the CTB fixups execution (which may
>> be ignored as any new messages will not have anything to fix).
>>
>> The GGTT address locking will be introduced in a future series.
>>
>> Signed-off-by: Tomasz Lis<tomasz.lis at intel.com>
>> ---
>>   drivers/gpu/drm/xe/xe_gt_sriov_vf.c       |   2 +
>>   drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h |   2 +
>>   drivers/gpu/drm/xe/xe_guc_ct.c            | 146 ++++++++++++++++++++++
>>   drivers/gpu/drm/xe/xe_guc_ct.h            |   2 +
>>   drivers/gpu/drm/xe/xe_sriov_vf.c          |  12 ++
>>   5 files changed, 164 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
>> index ae24c47ed8f8..604cbbf55d4f 100644
>> --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
>> +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
>> @@ -920,12 +920,14 @@ static u64 drm_mm_node_end(struct drm_mm_node *node)
>>   static s64 vf_get_post_migration_ggtt_shift(struct xe_gt *gt)
>>   {
>>   	struct xe_gt_sriov_vf_selfconfig *config = &gt->sriov.vf.self_config;
>> +	struct xe_gt_sriov_vf_runtime *runtime = &gt->sriov.vf.runtime;
>>   	struct xe_tile *tile = gt_to_tile(gt);
>>   	u64 old_base;
>>   	s64 ggtt_shift;
>>   
>>   	old_base = drm_mm_node_end(&tile->sriov.vf.ggtt_balloon[0]->base);
>>   	ggtt_shift = config->ggtt_base - (s64)old_base;
>> +	runtime->ggtt_shift = ggtt_shift;
> can't we store that when reading new config?
will do.
>
>>   
>>   	xe_gt_sriov_info(gt, "GGTT base shifted from %#llx to %#llx\n",
>>   		  old_base, old_base + ggtt_shift);
>> diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h
>> index a57f13b5afcd..6af219b0eb1e 100644
>> --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h
>> +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h
>> @@ -65,6 +65,8 @@ struct xe_gt_sriov_vf_runtime {
>>   		/** @regs.value: register value. */
>>   		u32 value;
>>   	} *regs;
>> +	/** @ggtt_shift: difference in ggtt_base on last migration */
>> +	s64 ggtt_shift;
> by 'VF runtime data' we meant here the missing HW data obtained at
> _runtime_ from the GuC/PF (see "runtime-regs")
>
> this 'shift' is more like a migration specific data and doesn't really
> fit here, so better define a separate struct for it
since it's now part of getting config, will add it there.
>
>>   };
>>   
>>   /**
>> diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
>> index 7eb175a0b874..119f4627a6d5 100644
>> --- a/drivers/gpu/drm/xe/xe_guc_ct.c
>> +++ b/drivers/gpu/drm/xe/xe_guc_ct.c
>> @@ -84,6 +84,8 @@ struct g2h_fence {
>>   	bool done;
>>   };
>>   
>> +#define make_u64(hi, lo) ((u64)((u64)(u32)(hi) << 32 | (u32)(lo)))
>> +
>>   static void g2h_fence_init(struct g2h_fence *g2h_fence, u32 *response_buffer)
>>   {
>>   	g2h_fence->response_buffer = response_buffer;
>> @@ -1620,6 +1622,150 @@ static void g2h_worker_func(struct work_struct *w)
>>   	receive_g2h(ct);
>>   }
>>   
>> +/*
>> + * ct_update_addresses_in_message - Shift any GGTT addresses within
>> + * a single message left within CTB from before post-migration recovery.
>> + * @ct: pointer to CT struct of the target GuC
>> + * @cmds: iomap buffer containing CT messages
>> + * @head: start of the target message within the buffer
>> + * @len: length of the target message
>> + * @size: size of the commands buffer
>> + * @shift: the address shift to be added to each GGTT reference
>> + */
>> +static void ct_update_addresses_in_message(struct xe_guc_ct *ct,
>> +					    struct iosys_map *cmds, u32 head,
>> +					    u32 len, u32 size, s64 shift)
>> +{
>> +	struct xe_device *xe = ct_to_xe(ct);
>> +	u32 action, i, n;
>> +	u32 msg[2];
>> +	u64 offset;
>> +
>> +#define read32(o, p)							\
>> +do {									\
>> +	xe_map_memcpy_from(xe, msg, cmds, (head + p) * sizeof(u32),	\
>> +			   1 * sizeof(u32));				\
>> +	o = msg[0];							\
>> +} while (0)
>> +#define fixup64(p)							\
>> +do {									\
>> +	xe_map_memcpy_from(xe, msg, cmds, (head + p) * sizeof(u32),	\
>> +			   2 * sizeof(u32));				\
> what about buffer wrap?

I don't see any implementation of ability to wrap with cutting a message 
in half. Rather, h2g_write() fills remaining area with null messages.

No special support is needed.

>
>> +	offset = make_u64(msg[1], msg[0]);				\
>> +	offset += shift;						\
>> +	msg[0] = lower_32_bits(offset);					\
>> +	msg[1] = upper_32_bits(offset);					\
>> +	xe_map_memcpy_to(xe, cmds, (head + p) * sizeof(u32), msg, 2 * sizeof(u32)); \
>> +} while (0)
> those macros are quite ugly and violating many rules
> maybe they should be converted into inlines?
will do.
>
>> +
>> +	xe_map_memcpy_from(xe, msg, cmds, head * sizeof(u32),
>> +			   1 * sizeof(u32));
>> +	action = FIELD_GET(GUC_HXG_REQUEST_MSG_0_ACTION, msg[0]);
>> +	switch (action) {
>> +	case XE_GUC_ACTION_REGISTER_CONTEXT:
>> +	case XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC:
>> +		/* field wq_desc */
>> +		fixup64(5);
>> +		/* field wq_base */
>> +		fixup64(7);
>> +		if (action == XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC) {
>> +			/* field number_children */
>> +			read32(n, 10);
>> +			/* field hwlrca and child lrcas */
>> +			for (i = 0; i < n; i++)
>> +				fixup64(11 + 2 * i);
>> +		} else {
>> +			/* field hwlrca */
>> +			fixup64(10);
>> +		}
>> +		break;
>> +	default:
>> +		break;
>> +	}
>> +#undef fixup64
>> +#undef read32
>> +}
>> +
>> +static int ct_update_addresses_in_buffer(struct xe_guc_ct *ct,
>> +					 struct guc_ctb *h2g,
>> +					 s64 shift, u32 *mhead, s32 avail)
>> +{
>> +	struct xe_device *xe = ct_to_xe(ct);
>> +	u32 head = *mhead;
>> +	u32 size = h2g->info.size;
>> +	u32 msg[1];
>> +	u32 len;
>> +
>> +	/* Read header */
>> +	xe_map_memcpy_from(xe, msg, &h2g->cmds, sizeof(u32) * head,
>> +			   sizeof(u32));
>> +	len = FIELD_GET(GUC_CTB_MSG_0_NUM_DWORDS, msg[0]) + GUC_CTB_MSG_MIN_LEN;
> is it ok to access here all this CTB data without any lock?
Yes, it is ok. The lack of lock is  explained in patch comment.
>> +
>> +	if (unlikely(len > (u32)avail)) {
>> +		struct xe_gt *gt = ct_to_gt(ct);
>> +
>> +		xe_gt_err(gt, "H2G channel broken on read, avail=%d, len=%d, fixups skipped\n",
>> +			  avail, len);
>> +		return 0;
> shouldn't we return some -errno to correctly report failure to the caller?

Why would we do this? What error handling would we do?

We are within a delicate, unusual situation during the recovery. The 
goal of migration recovery is clearly stated, and it is not verification 
of the state of buffers.

If that condition was reached, then:

* Maybe we've got double migration at a very specific time and we're 
operating on a living GuC. In that case, we should just back off and not 
touch it. (low probability of hitting that precise timings)

* Maybe the CTB was broken before migration, or got broken while 
restoring - the post-migration recovery is not a place to react on that. 
(low probability, as we check the head and tail before)

* Maybe our CTB parser is imperfect, or our locking will be imperfect 
(or more likely, it will regress due to unrelated changes) - in that 
case we also should back off.

In general, if we've failed with CTB fixups, it doesn't mean the GFX is 
unusable. There is actually much greater chance that no outgoing message 
required fixups - most workloads are not registering contexts every 
second. There is no point in propagating errors which do not break the 
recovery. And if something is broken, then we should have mechanisms 
within driver to deal with that. Post-migration recovery is not for that.

>
>> +	}
>> +
>> +	head = (head + 1) % size;
>> +	ct_update_addresses_in_message(ct, &h2g->cmds, head, len - 1, size, shift);
>> +	*mhead = (head + len - 1) % size;
>> +
>> +	return avail - len;
>> +}
>> +
>> +/**
>> + * xe_guc_ct_update_addresses - Shifts any GGTT addresses left
>> + * within CTB from before post-migration recovery.
> "Fixup any pending H2G CTB messages by updating GGTT offsets in their
> payloads" ?
ok, will switch to this.
>
>> + * @ct: pointer to CT struct of the target GuC
>> + */
>> +int xe_guc_ct_update_addresses(struct xe_guc_ct *ct)
>> +{
>> +	struct xe_guc *guc = ct_to_guc(ct);
>> +	struct xe_gt *gt = guc_to_gt(guc);
>> +	struct xe_gt_sriov_vf_runtime *runtime = &gt->sriov.vf.runtime;
> please provide ggtt-shift explicitly to avoid direct access to the VF
> data from the CT code
will do.
>
>> +	struct guc_ctb *h2g = &ct->ctbs.h2g;
>> +	u32 head = h2g->info.head;
>> +	u32 tail = READ_ONCE(h2g->info.tail);
>> +	u32 size = h2g->info.size;
>> +	s32 avail;
>> +	s64 ggtt_shift;
>> +
>> +	if (unlikely(h2g->info.broken))
>> +		return -EPIPE;
>> +
>> +	XE_WARN_ON(head > size);
> use xe_gt_WARN() or xe_gt_assert() instead - depends on what exactly do
> you want to check here
ack
>
>> +
>> +	if (unlikely(tail >= size)) {
>> +		xe_gt_err(gt, "H2G channel has Invalid tail offset (%u >= %u)\n",
> s/Invalid/invalid
ok
>> +			 tail, size);
>> +		goto corrupted;
>> +	}
>> +
>> +	avail = tail - head;
>> +
>> +	/* beware of buffer wrap case */
>> +	if (unlikely(avail < 0))
>> +		avail += size;
>> +	xe_gt_dbg(gt, "available %d (%u:%u:%u)\n", avail, head, tail, size);
>> +	XE_WARN_ON(avail < 0);
>> +
>> +	ggtt_shift = runtime->ggtt_shift;
>> +
>> +	while (avail > 0)
>> +		avail = ct_update_addresses_in_buffer(ct, h2g, ggtt_shift, &head, avail);
>> +
>> +	return 0;
>> +
>> +corrupted:
>> +	xe_gt_err(gt, "Corrupted descriptor head=%u tail=%u\n",
>> +		 head, tail);
>> +	h2g->info.broken = true;
>> +	return -EPIPE;
>> +}
>> +
>>   static struct xe_guc_ct_snapshot *guc_ct_snapshot_alloc(struct xe_guc_ct *ct, bool atomic,
>>   							bool want_ctb)
>>   {
>> diff --git a/drivers/gpu/drm/xe/xe_guc_ct.h b/drivers/gpu/drm/xe/xe_guc_ct.h
>> index 82c4ae458dda..6b04fd4b1e03 100644
>> --- a/drivers/gpu/drm/xe/xe_guc_ct.h
>> +++ b/drivers/gpu/drm/xe/xe_guc_ct.h
>> @@ -22,6 +22,8 @@ void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot, struct drm_pr
>>   void xe_guc_ct_snapshot_free(struct xe_guc_ct_snapshot *snapshot);
>>   void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p, bool want_ctb);
>>   
>> +int xe_guc_ct_update_addresses(struct xe_guc_ct *ct);
>> +
>>   static inline bool xe_guc_ct_enabled(struct xe_guc_ct *ct)
>>   {
>>   	return ct->state == XE_GUC_CT_STATE_ENABLED;
>> diff --git a/drivers/gpu/drm/xe/xe_sriov_vf.c b/drivers/gpu/drm/xe/xe_sriov_vf.c
>> index 5bcd55999e0e..08c789ebfa18 100644
>> --- a/drivers/gpu/drm/xe/xe_sriov_vf.c
>> +++ b/drivers/gpu/drm/xe/xe_sriov_vf.c
>> @@ -10,6 +10,7 @@
>>   #include "xe_gt.h"
>>   #include "xe_gt_sriov_printk.h"
>>   #include "xe_gt_sriov_vf.h"
>> +#include "xe_guc_ct.h"
>>   #include "xe_pm.h"
>>   #include "xe_sriov.h"
>>   #include "xe_sriov_printk.h"
>> @@ -158,6 +159,15 @@ static int vf_post_migration_requery_guc(struct xe_device *xe)
>>   	return ret;
>>   }
>>   
>> +static void vf_post_migration_fixup_ctb(struct xe_device *xe)
>> +{
>> +	struct xe_gt *gt;
>> +	unsigned int id;
>> +
>> +	for_each_gt(gt, xe, id)
>> +		xe_guc_ct_update_addresses(&gt->uc.guc.ct);
>> +}
>> +
>>   /*
>>    * vf_post_migration_imminent - Check if post-restore recovery is coming.
>>    * @xe: the &xe_device struct instance
>> @@ -217,6 +227,8 @@ static void vf_post_migration_recovery(struct xe_device *xe)
>>   
>>   	vf_post_migration_fixup_ggtt_nodes(xe);
>>   	/* FIXME: add the recovery steps */
>> +	vf_post_migration_fixup_ctb(xe);
>> +
> why this extra separation line?

it separates fixups from finalization. Similarly, there's a line at 
start of fixups.

-Tomasz

>>   	vf_post_migration_notify_resfix_done(xe);
>>   	xe_pm_runtime_put(xe);
>>   	drm_notice(&xe->drm, "migration recovery ended\n");