[PATCH] drm/i915/guc: support v69 in parallel to v70

Mon Jul 18 21:19:00 UTC 2022

On 7/15/2022 15:54, Daniele Ceraolo Spurio wrote:
> This patch re-introduces support for GuC v69 in parallel to v70. As this
> is a quick fix, v69 has been re-introduced as the single "fallback" guc
> version in case v70 is not available on disk. All v69 specific code has
> been labeled as such for easy identification, and the same was done for
> all v70 functions for which there is a separate v69 version, to avoid
> accidentally calling the wrong version via the unlabeled name.
>
> When the fallback mode kicks in, a drm_warn message is printed in dmesg
> to warn the user of the required update.
>
> The plan is to follow this up with a more complex rework to allow for
> multiple different GuC versions to be supported at the same time.
>
> Fixes: 2584b3549f4c ("drm/i915/guc: Update to GuC version 70.1.1")
> Link: https://lists.freedesktop.org/archives/intel-gfx/2022-July/301640.html
> Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
> Cc: John Harrison <John.C.Harrison at Intel.com>
> Cc: Matthew Brost <matthew.brost at intel.com>
> Cc: Matt Roper <matthew.d.roper at intel.com>
> Cc: Dave Airlie <airlied at redhat.com>
> ---
>   drivers/gpu/drm/i915/gt/intel_context_types.h |  11 +-
>   .../gpu/drm/i915/gt/uc/abi/guc_actions_abi.h  |   3 +
>   drivers/gpu/drm/i915/gt/uc/intel_guc.h        |   5 +
>   drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h   |  45 +++
>   .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 348 +++++++++++++++---
>   drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c      |  57 ++-
>   drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h      |   7 +
>   7 files changed, 419 insertions(+), 57 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
> index d2d75d9c0c8d..04eacae1aca5 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
> @@ -275,10 +275,17 @@ struct intel_context {
>   		u8 child_index;
>   		/** @guc: GuC specific members for parallel submission */
>   		struct {
> -			/** @wqi_head: head pointer in work queue */
> +			/** @wqi_head: cached head pointer in work queue */
>   			u16 wqi_head;
> -			/** @wqi_tail: tail pointer in work queue */
> +			/** @wqi_tail: cached tail pointer in work queue */
>   			u16 wqi_tail;
> +			/** @wq_head: pointer to the actual head in work queue */
> +			u32 *wq_head;
> +			/** @wq_tail: pointer to the actual head in work queue */
> +			u32 *wq_tail;
> +			/** @wq_status: pointer to the status in work queue */
> +			u32 *wq_status;
> +
>   			/**
>   			 * @parent_page: page in context state (ce->state) used
>   			 * by parent for work queue, process descriptor
> diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
> index 4ef9990ed7f8..29ef8afc8c2e 100644
> --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
> +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
> @@ -122,6 +122,9 @@ enum intel_guc_action {
>   	INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_DONE = 0x1002,
>   	INTEL_GUC_ACTION_SCHED_ENGINE_MODE_SET = 0x1003,
>   	INTEL_GUC_ACTION_SCHED_ENGINE_MODE_DONE = 0x1004,
> +	INTEL_GUC_ACTION_V69_SET_CONTEXT_PRIORITY = 0x1005,
> +	INTEL_GUC_ACTION_V69_SET_CONTEXT_EXECUTION_QUANTUM = 0x1006,
> +	INTEL_GUC_ACTION_V69_SET_CONTEXT_PREEMPTION_TIMEOUT = 0x1007,
>   	INTEL_GUC_ACTION_CONTEXT_RESET_NOTIFICATION = 0x1008,
>   	INTEL_GUC_ACTION_ENGINE_FAILURE_NOTIFICATION = 0x1009,
>   	INTEL_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES = 0x100B,
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> index d0d99f178f2d..a7acffbf15d1 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> @@ -170,6 +170,11 @@ struct intel_guc {
>   	/** @ads_engine_usage_size: size of engine usage in the ADS */
>   	u32 ads_engine_usage_size;
>   
> +	/** @lrc_desc_pool_v69: object allocated to hold the GuC LRC descriptor pool */
> +	struct i915_vma *lrc_desc_pool_v69;
> +	/** @lrc_desc_pool_vaddr_v69: contents of the GuC LRC descriptor pool */
> +	void *lrc_desc_pool_vaddr_v69;
> +
>   	/**
>   	 * @context_lookup: used to resolve intel_context from guc_id, if a
>   	 * context is present in this structure it is registered with the GuC
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
> index b3c9a9327f76..323b055e5db9 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
> @@ -204,6 +204,20 @@ struct guc_wq_item {
>   	u32 fence_id;
>   } __packed;
>   
> +struct guc_process_desc_v69 {
> +	u32 stage_id;
> +	u64 db_base_addr;
> +	u32 head;
> +	u32 tail;
> +	u32 error_offset;
> +	u64 wq_base_addr;
> +	u32 wq_size_bytes;
> +	u32 wq_status;
> +	u32 engine_presence;
> +	u32 priority;
> +	u32 reserved[36];
> +} __packed;
> +
>   struct guc_sched_wq_desc {
>   	u32 head;
>   	u32 tail;
> @@ -228,6 +242,37 @@ struct guc_ctxt_registration_info {
>   };
>   #define CONTEXT_REGISTRATION_FLAG_KMD	BIT(0)
>   
> +/* Preempt to idle on quantum expiry */
> +#define CONTEXT_POLICY_FLAG_PREEMPT_TO_IDLE_V69	BIT(0)
> +
> +/*
> + * GuC Context registration descriptor.
> + * FIXME: This is only required to exist during context registration.
> + * The current 1:1 between guc_lrc_desc and LRCs for the lifetime of the LRC
> + * is not required.
> + */
> +struct guc_lrc_desc_v69 {
> +	u32 hw_context_desc;
> +	u32 slpm_perf_mode_hint;	/* SPLC v1 only */
> +	u32 slpm_freq_hint;
> +	u32 engine_submit_mask;		/* In logical space */
> +	u8 engine_class;
> +	u8 reserved0[3];
> +	u32 priority;
> +	u32 process_desc;
> +	u32 wq_addr;
> +	u32 wq_size;
> +	u32 context_flags;		/* CONTEXT_REGISTRATION_* */
> +	/* Time for one workload to execute. (in micro seconds) */
> +	u32 execution_quantum;
> +	/* Time to wait for a preemption request to complete before issuing a
> +	 * reset. (in micro seconds).
> +	 */
> +	u32 preemption_timeout;
> +	u32 policy_flags;		/* CONTEXT_POLICY_* */
> +	u32 reserved1[19];
> +} __packed;
> +
>   /* 32-bit KLV structure as used by policy updates and others */
>   struct guc_klv_generic_dw_t {
>   	u32 kl;
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> index 40f726c61e95..aa10db25cc06 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> @@ -414,12 +414,15 @@ struct sync_semaphore {
>   };
>   
>   struct parent_scratch {
> -	struct guc_sched_wq_desc wq_desc;
> +	union guc_descs {
> +		struct guc_sched_wq_desc wq_desc;
> +		struct guc_process_desc_v69 pdesc;
> +	} descs;
>   
>   	struct sync_semaphore go;
>   	struct sync_semaphore join[MAX_ENGINE_INSTANCE + 1];
>   
> -	u8 unused[WQ_OFFSET - sizeof(struct guc_sched_wq_desc) -
> +	u8 unused[WQ_OFFSET - sizeof(union guc_descs) -
>   		sizeof(struct sync_semaphore) * (MAX_ENGINE_INSTANCE + 2)];
>   
>   	u32 wq[WQ_SIZE / sizeof(u32)];
> @@ -456,17 +459,23 @@ __get_parent_scratch(struct intel_context *ce)
>   		   LRC_STATE_OFFSET) / sizeof(u32)));
>   }
>   
> +static struct guc_process_desc_v69 *
> +__get_process_desc_v69(struct intel_context *ce)
> +{
> +	struct parent_scratch *ps = __get_parent_scratch(ce);
> +
> +	return &ps->descs.pdesc;
> +}
> +
>   static struct guc_sched_wq_desc *
> -__get_wq_desc(struct intel_context *ce)
> +__get_wq_desc_v70(struct intel_context *ce)
>   {
>   	struct parent_scratch *ps = __get_parent_scratch(ce);
>   
> -	return &ps->wq_desc;
> +	return &ps->descs.wq_desc;
>   }
>   
> -static u32 *get_wq_pointer(struct guc_sched_wq_desc *wq_desc,
> -			   struct intel_context *ce,
> -			   u32 wqi_size)
> +static u32 *get_wq_pointer(struct intel_context *ce, u32 wqi_size)
>   {
>   	/*
>   	 * Check for space in work queue. Caching a value of head pointer in
> @@ -476,7 +485,7 @@ static u32 *get_wq_pointer(struct guc_sched_wq_desc *wq_desc,
>   #define AVAILABLE_SPACE	\
>   	CIRC_SPACE(ce->parallel.guc.wqi_tail, ce->parallel.guc.wqi_head, WQ_SIZE)
>   	if (wqi_size > AVAILABLE_SPACE) {
> -		ce->parallel.guc.wqi_head = READ_ONCE(wq_desc->head);
> +		ce->parallel.guc.wqi_head = READ_ONCE(*ce->parallel.guc.wq_head);
>   
>   		if (wqi_size > AVAILABLE_SPACE)
>   			return NULL;
> @@ -495,11 +504,55 @@ static inline struct intel_context *__get_context(struct intel_guc *guc, u32 id)
>   	return ce;
>   }
>   
> +static struct guc_lrc_desc_v69 *__get_lrc_desc_v69(struct intel_guc *guc, u32 index)
> +{
Wondering if it is worth adding GEM_BUG_ON(correct_fw_version) to these 
v## functions? Probably overkill at this point.

> +	struct guc_lrc_desc_v69 *base = guc->lrc_desc_pool_vaddr_v69;
> +
> +	if (!base)
> +		return NULL;
> +
> +	GEM_BUG_ON(index >= GUC_MAX_CONTEXT_ID);
> +
> +	return &base[index];
> +}
> +
> +static int guc_lrc_desc_pool_create_v69(struct intel_guc *guc)
> +{
> +	u32 size;
> +	int ret;
> +
> +	size = PAGE_ALIGN(sizeof(struct guc_lrc_desc_v69) *
> +			  GUC_MAX_CONTEXT_ID);
> +	ret = intel_guc_allocate_and_map_vma(guc, size, &guc->lrc_desc_pool_v69,
> +					     (void **)&guc->lrc_desc_pool_vaddr_v69);
> +	if (ret)
> +		return ret;
> +
> +	return 0;
> +}
> +
> +static void guc_lrc_desc_pool_destroy_v69(struct intel_guc *guc)
> +{
> +	if (!guc->lrc_desc_pool_vaddr_v69)
> +		return;
> +
> +	guc->lrc_desc_pool_vaddr_v69 = NULL;
> +	i915_vma_unpin_and_release(&guc->lrc_desc_pool_v69, I915_VMA_RELEASE_MAP);
> +}
> +
>   static inline bool guc_submission_initialized(struct intel_guc *guc)
>   {
>   	return guc->submission_initialized;
>   }
>   
> +static inline void _reset_lrc_desc_v69(struct intel_guc *guc, u32 id)
> +{
> +	struct guc_lrc_desc_v69 *desc = __get_lrc_desc_v69(guc, id);
> +
> +	if (desc)
> +		memset(desc, 0, sizeof(*desc));
> +}
> +
>   static inline bool ctx_id_mapped(struct intel_guc *guc, u32 id)
>   {
>   	return __get_context(guc, id);
> @@ -526,6 +579,8 @@ static inline void clr_ctx_id_mapping(struct intel_guc *guc, u32 id)
>   	if (unlikely(!guc_submission_initialized(guc)))
>   		return;
>   
> +	_reset_lrc_desc_v69(guc, id);
> +
>   	/*
>   	 * xarray API doesn't have xa_erase_irqsave wrapper, so calling
>   	 * the lower level functions directly.
> @@ -611,7 +666,7 @@ int intel_guc_wait_for_idle(struct intel_guc *guc, long timeout)
>   					      true, timeout);
>   }
>   
> -static int guc_context_policy_init(struct intel_context *ce, bool loop);
> +static int guc_context_policy_init_v70(struct intel_context *ce, bool loop);
>   static int try_context_registration(struct intel_context *ce, bool loop);
>   
>   static int __guc_add_request(struct intel_guc *guc, struct i915_request *rq)
> @@ -639,7 +694,7 @@ static int __guc_add_request(struct intel_guc *guc, struct i915_request *rq)
>   	GEM_BUG_ON(context_guc_id_invalid(ce));
>   
>   	if (context_policy_required(ce)) {
> -		err = guc_context_policy_init(ce, false);
> +		err = guc_context_policy_init_v70(ce, false);
>   		if (err)
>   			return err;
>   	}
> @@ -737,9 +792,7 @@ static u32 wq_space_until_wrap(struct intel_context *ce)
>   	return (WQ_SIZE - ce->parallel.guc.wqi_tail);
>   }
>   
> -static void write_wqi(struct guc_sched_wq_desc *wq_desc,
> -		      struct intel_context *ce,
> -		      u32 wqi_size)
> +static void write_wqi(struct intel_context *ce, u32 wqi_size)
>   {
>   	BUILD_BUG_ON(!is_power_of_2(WQ_SIZE));
>   
> @@ -750,13 +803,12 @@ static void write_wqi(struct guc_sched_wq_desc *wq_desc,
>   
>   	ce->parallel.guc.wqi_tail = (ce->parallel.guc.wqi_tail + wqi_size) &
>   		(WQ_SIZE - 1);
> -	WRITE_ONCE(wq_desc->tail, ce->parallel.guc.wqi_tail);
> +	WRITE_ONCE(*ce->parallel.guc.wq_tail, ce->parallel.guc.wqi_tail);
>   }
>   
>   static int guc_wq_noop_append(struct intel_context *ce)
>   {
> -	struct guc_sched_wq_desc *wq_desc = __get_wq_desc(ce);
> -	u32 *wqi = get_wq_pointer(wq_desc, ce, wq_space_until_wrap(ce));
> +	u32 *wqi = get_wq_pointer(ce, wq_space_until_wrap(ce));
>   	u32 len_dw = wq_space_until_wrap(ce) / sizeof(u32) - 1;
>   
>   	if (!wqi)
> @@ -775,7 +827,6 @@ static int __guc_wq_item_append(struct i915_request *rq)
>   {
>   	struct intel_context *ce = request_to_scheduling_context(rq);
>   	struct intel_context *child;
> -	struct guc_sched_wq_desc *wq_desc = __get_wq_desc(ce);
>   	unsigned int wqi_size = (ce->parallel.number_children + 4) *
>   		sizeof(u32);
>   	u32 *wqi;
> @@ -795,7 +846,7 @@ static int __guc_wq_item_append(struct i915_request *rq)
>   			return ret;
>   	}
>   
> -	wqi = get_wq_pointer(wq_desc, ce, wqi_size);
> +	wqi = get_wq_pointer(ce, wqi_size);
>   	if (!wqi)
>   		return -EBUSY;
>   
> @@ -810,7 +861,7 @@ static int __guc_wq_item_append(struct i915_request *rq)
>   	for_each_child(ce, child)
>   		*wqi++ = child->ring->tail / sizeof(u64);
>   
> -	write_wqi(wq_desc, ce, wqi_size);
> +	write_wqi(ce, wqi_size);
>   
>   	return 0;
>   }
> @@ -1812,20 +1863,34 @@ static void reset_fail_worker_func(struct work_struct *w);
>   int intel_guc_submission_init(struct intel_guc *guc)
>   {
>   	struct intel_gt *gt = guc_to_gt(guc);
> +	int ret;
>   
>   	if (guc->submission_initialized)
>   		return 0;
>   
> +	if (guc->fw.major_ver_found < 70) {
> +		ret = guc_lrc_desc_pool_create_v69(guc);
> +		if (ret)
> +			return ret;
> +	}
> +
>   	guc->submission_state.guc_ids_bitmap =
>   		bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL);
> -	if (!guc->submission_state.guc_ids_bitmap)
> -		return -ENOMEM;
> +	if (!guc->submission_state.guc_ids_bitmap) {
> +		ret = -ENOMEM;
> +		goto destroy_pool;
> +	}
>   
>   	guc->timestamp.ping_delay = (POLL_TIME_CLKS / gt->clock_frequency + 1) * HZ;
>   	guc->timestamp.shift = gpm_timestamp_shift(gt);
>   	guc->submission_initialized = true;
>   
>   	return 0;
> +
> +destroy_pool:
> +	guc_lrc_desc_pool_destroy_v69(guc);
> +
> +	return ret;
>   }
>   
>   void intel_guc_submission_fini(struct intel_guc *guc)
> @@ -1834,6 +1899,7 @@ void intel_guc_submission_fini(struct intel_guc *guc)
>   		return;
>   
>   	guc_flush_destroyed_contexts(guc);
> +	guc_lrc_desc_pool_destroy_v69(guc);
>   	i915_sched_engine_put(guc->sched_engine);
>   	bitmap_free(guc->submission_state.guc_ids_bitmap);
>   	guc->submission_initialized = false;
> @@ -2091,10 +2157,34 @@ static void unpin_guc_id(struct intel_guc *guc, struct intel_context *ce)
>   	spin_unlock_irqrestore(&guc->submission_state.lock, flags);
>   }
>   
> -static int __guc_action_register_multi_lrc(struct intel_guc *guc,
> +static int __guc_action_register_multi_lrc_v69(struct intel_guc *guc,
>   					   struct intel_context *ce,
> -					   struct guc_ctxt_registration_info *info,
> +					   u32 guc_id,
> +					   u32 offset,
>   					   bool loop)
> +{
> +	struct intel_context *child;
> +	u32 action[4 + MAX_ENGINE_INSTANCE];
> +	int len = 0;
> +
> +	GEM_BUG_ON(ce->parallel.number_children > MAX_ENGINE_INSTANCE);
> +
> +	action[len++] = INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC;
> +	action[len++] = guc_id;
> +	action[len++] = ce->parallel.number_children + 1;
> +	action[len++] = offset;
> +	for_each_child(ce, child) {
> +		offset += sizeof(struct guc_lrc_desc_v69);
> +		action[len++] = offset;
> +	}
> +
> +	return guc_submission_send_busy_loop(guc, action, len, 0, loop);
> +}
> +
> +static int __guc_action_register_multi_lrc_v70(struct intel_guc *guc,
> +					       struct intel_context *ce,
> +					       struct guc_ctxt_registration_info *info,
> +					       bool loop)
>   {
>   	struct intel_context *child;
>   	u32 action[13 + (MAX_ENGINE_INSTANCE * 2)];
> @@ -2134,9 +2224,24 @@ static int __guc_action_register_multi_lrc(struct intel_guc *guc,
>   	return guc_submission_send_busy_loop(guc, action, len, 0, loop);
>   }
>   
> -static int __guc_action_register_context(struct intel_guc *guc,
> -					 struct guc_ctxt_registration_info *info,
> -					 bool loop)
> +static int __guc_action_register_context_v69(struct intel_guc *guc,
> +					     u32 guc_id,
> +					     u32 offset,
> +					     bool loop)
> +{
> +	u32 action[] = {
> +		INTEL_GUC_ACTION_REGISTER_CONTEXT,
> +		guc_id,
> +		offset,
> +	};
> +
> +	return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action),
> +					     0, loop);
> +}
> +
> +static int __guc_action_register_context_v70(struct intel_guc *guc,
> +					     struct guc_ctxt_registration_info *info,
> +					     bool loop)
>   {
>   	u32 action[] = {
>   		INTEL_GUC_ACTION_REGISTER_CONTEXT,
> @@ -2157,24 +2262,52 @@ static int __guc_action_register_context(struct intel_guc *guc,
>   					     0, loop);
>   }
>   
> -static void prepare_context_registration_info(struct intel_context *ce,
> -					      struct guc_ctxt_registration_info *info);
> +static void prepare_context_registration_info_v69(struct intel_context *ce);
> +static void prepare_context_registration_info_v70(struct intel_context *ce,
> +						  struct guc_ctxt_registration_info *info);
>   
> -static int register_context(struct intel_context *ce, bool loop)
> +static int
> +register_context_v69(struct intel_guc *guc, struct intel_context *ce, bool loop)
> +{
> +	u32 offset = intel_guc_ggtt_offset(guc, guc->lrc_desc_pool_v69) +
> +		ce->guc_id.id * sizeof(struct guc_lrc_desc_v69);
> +
> +	prepare_context_registration_info_v69(ce);
> +
> +	if (intel_context_is_parent(ce))
> +		return __guc_action_register_multi_lrc_v69(guc, ce, ce->guc_id.id,
> +							   offset, loop);
> +	else
> +		return __guc_action_register_context_v69(guc, ce->guc_id.id,
> +							 offset, loop);
> +}
> +
> +static int
> +register_context_v70(struct intel_guc *guc, struct intel_context *ce, bool loop)
>   {
>   	struct guc_ctxt_registration_info info;
> +
> +	prepare_context_registration_info_v70(ce, &info);
> +
> +	if (intel_context_is_parent(ce))
> +		return __guc_action_register_multi_lrc_v70(guc, ce, &info, loop);
> +	else
> +		return __guc_action_register_context_v70(guc, &info, loop);
> +}
> +
> +static int register_context(struct intel_context *ce, bool loop)
> +{
>   	struct intel_guc *guc = ce_to_guc(ce);
>   	int ret;
>   
>   	GEM_BUG_ON(intel_context_is_child(ce));
>   	trace_intel_context_register(ce);
>   
> -	prepare_context_registration_info(ce, &info);
> -
> -	if (intel_context_is_parent(ce))
> -		ret = __guc_action_register_multi_lrc(guc, ce, &info, loop);
> +	if (guc->fw.major_ver_found >= 70)
> +		ret = register_context_v70(guc, ce, loop);
>   	else
> -		ret = __guc_action_register_context(guc, &info, loop);
> +		ret = register_context_v69(guc, ce, loop);
> +
>   	if (likely(!ret)) {
>   		unsigned long flags;
>   
> @@ -2182,7 +2315,8 @@ static int register_context(struct intel_context *ce, bool loop)
>   		set_context_registered(ce);
>   		spin_unlock_irqrestore(&ce->guc_state.lock, flags);
>   
> -		guc_context_policy_init(ce, loop);
> +		if (guc->fw.major_ver_found >= 70)
> +			guc_context_policy_init_v70(ce, loop);
>   	}
>   
>   	return ret;
> @@ -2279,7 +2413,7 @@ static int __guc_context_set_context_policies(struct intel_guc *guc,
>   					0, loop);
>   }
>   
> -static int guc_context_policy_init(struct intel_context *ce, bool loop)
> +static int guc_context_policy_init_v70(struct intel_context *ce, bool loop)
>   {
>   	struct intel_engine_cs *engine = ce->engine;
>   	struct intel_guc *guc = &engine->gt->uc.guc;
> @@ -2338,6 +2472,19 @@ static int guc_context_policy_init(struct intel_context *ce, bool loop)
>   	return ret;
>   }
>   
> +static void guc_context_policy_init_v69(struct intel_engine_cs *engine,
> +					struct guc_lrc_desc_v69 *desc)
> +{
> +	desc->policy_flags = 0;
> +
> +	if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION)
> +		desc->policy_flags |= CONTEXT_POLICY_FLAG_PREEMPT_TO_IDLE_V69;
> +
> +	/* NB: For both of these, zero means disabled. */
> +	desc->execution_quantum = engine->props.timeslice_duration_ms * 1000;
> +	desc->preemption_timeout = engine->props.preempt_timeout_ms * 1000;
> +}
> +
>   static u32 map_guc_prio_to_lrc_desc_prio(u8 prio)
>   {
>   	/*
> @@ -2358,8 +2505,75 @@ static u32 map_guc_prio_to_lrc_desc_prio(u8 prio)
>   	}
>   }
>   
> -static void prepare_context_registration_info(struct intel_context *ce,
> -					      struct guc_ctxt_registration_info *info)
> +static void prepare_context_registration_info_v69(struct intel_context *ce)
> +{
> +	struct intel_engine_cs *engine = ce->engine;
> +	struct intel_guc *guc = &engine->gt->uc.guc;
> +	u32 ctx_id = ce->guc_id.id;
> +	struct guc_lrc_desc_v69 *desc;
> +	struct intel_context *child;
> +
> +	GEM_BUG_ON(!engine->mask);
> +
> +	/*
> +	 * Ensure LRC + CT vmas are is same region as write barrier is done
> +	 * based on CT vma region.
> +	 */
> +	GEM_BUG_ON(i915_gem_object_is_lmem(guc->ct.vma->obj) !=
> +		   i915_gem_object_is_lmem(ce->ring->vma->obj));
> +
> +	desc = __get_lrc_desc_v69(guc, ctx_id);
> +	desc->engine_class = engine_class_to_guc_class(engine->class);
> +	desc->engine_submit_mask = engine->logical_mask;
> +	desc->hw_context_desc = ce->lrc.lrca;
> +	desc->priority = ce->guc_state.prio;
> +	desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD;
> +	guc_context_policy_init_v69(engine, desc);
> +
> +	/*
> +	 * If context is a parent, we need to register a process descriptor
> +	 * describing a work queue and register all child contexts.
> +	 */
> +	if (intel_context_is_parent(ce)) {
> +		struct guc_process_desc_v69 *pdesc;
> +
> +		ce->parallel.guc.wqi_tail = 0;
> +		ce->parallel.guc.wqi_head = 0;
> +
> +		desc->process_desc = i915_ggtt_offset(ce->state) +
> +			__get_parent_scratch_offset(ce);
> +		desc->wq_addr = i915_ggtt_offset(ce->state) +
> +			__get_wq_offset(ce);
> +		desc->wq_size = WQ_SIZE;
> +
> +		pdesc = __get_process_desc_v69(ce);
> +		memset(pdesc, 0, sizeof(*(pdesc)));
> +		pdesc->stage_id = ce->guc_id.id;
> +		pdesc->wq_base_addr = desc->wq_addr;
> +		pdesc->wq_size_bytes = desc->wq_size;
> +		pdesc->wq_status = WQ_STATUS_ACTIVE;
> +
> +		ce->parallel.guc.wq_head = &pdesc->head;
> +		ce->parallel.guc.wq_tail = &pdesc->tail;
> +		ce->parallel.guc.wq_status = &pdesc->wq_status;
> +
> +		for_each_child(ce, child) {
> +			desc = __get_lrc_desc_v69(guc, child->guc_id.id);
> +
> +			desc->engine_class =
> +				engine_class_to_guc_class(engine->class);
> +			desc->hw_context_desc = child->lrc.lrca;
> +			desc->priority = ce->guc_state.prio;
> +			desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD;
> +			guc_context_policy_init_v69(engine, desc);
> +		}
> +
> +		clear_children_join_go_memory(ce);
> +	}
> +}
> +
> +static void prepare_context_registration_info_v70(struct intel_context *ce,
> +						  struct guc_ctxt_registration_info *info)
>   {
>   	struct intel_engine_cs *engine = ce->engine;
>   	struct intel_guc *guc = &engine->gt->uc.guc;
> @@ -2409,10 +2623,14 @@ static void prepare_context_registration_info(struct intel_context *ce,
>   		info->wq_base_hi = upper_32_bits(wq_base_offset);
>   		info->wq_size = WQ_SIZE;
>   
> -		wq_desc = __get_wq_desc(ce);
> +		wq_desc = __get_wq_desc_v70(ce);
>   		memset(wq_desc, 0, sizeof(*wq_desc));
>   		wq_desc->wq_status = WQ_STATUS_ACTIVE;
>   
> +		ce->parallel.guc.wq_head = &wq_desc->head;
> +		ce->parallel.guc.wq_tail = &wq_desc->tail;
> +		ce->parallel.guc.wq_status = &wq_desc->wq_status;
Not clearing these pointers down in the context de-registration makes me 
nervous. Sure, they should never be touched on a context that is not 
registered, but they would still be dangling.

> +
>   		clear_children_join_go_memory(ce);
>   	}
>   }
> @@ -2727,11 +2945,21 @@ static void __guc_context_set_preemption_timeout(struct intel_guc *guc,
>   						 u16 guc_id,
>   						 u32 preemption_timeout)
>   {
> -	struct context_policy policy;
> +	if (guc->fw.major_ver_found >= 70) {
> +		struct context_policy policy;
>   
> -	__guc_context_policy_start_klv(&policy, guc_id);
> -	__guc_context_policy_add_preemption_timeout(&policy, preemption_timeout);
> -	__guc_context_set_context_policies(guc, &policy, true);
> +		__guc_context_policy_start_klv(&policy, guc_id);
> +		__guc_context_policy_add_preemption_timeout(&policy, preemption_timeout);
> +		__guc_context_set_context_policies(guc, &policy, true);
> +	} else {
> +		u32 action[] = {
> +			INTEL_GUC_ACTION_V69_SET_CONTEXT_PREEMPTION_TIMEOUT,
> +			guc_id,
> +			preemption_timeout
> +		};
> +
> +		intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true);
> +	}
>   }
>   
>   static void
> @@ -2982,11 +3210,21 @@ static int guc_context_alloc(struct intel_context *ce)
>   static void __guc_context_set_prio(struct intel_guc *guc,
>   				   struct intel_context *ce)
>   {
> -	struct context_policy policy;
> +	if (guc->fw.major_ver_found >= 70) {
> +		struct context_policy policy;
>   
> -	__guc_context_policy_start_klv(&policy, ce->guc_id.id);
> -	__guc_context_policy_add_priority(&policy, ce->guc_state.prio);
> -	__guc_context_set_context_policies(guc, &policy, true);
> +		__guc_context_policy_start_klv(&policy, ce->guc_id.id);
> +		__guc_context_policy_add_priority(&policy, ce->guc_state.prio);
> +		__guc_context_set_context_policies(guc, &policy, true);
> +	} else {
> +		u32 action[] = {
> +			INTEL_GUC_ACTION_V69_SET_CONTEXT_PRIORITY,
> +			ce->guc_id.id,
> +			ce->guc_state.prio,
> +		};
> +
> +		guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true);
> +	}
>   }
>   
>   static void guc_context_set_prio(struct intel_guc *guc,
> @@ -4496,17 +4734,19 @@ void intel_guc_submission_print_context_info(struct intel_guc *guc,
>   		guc_log_context_priority(p, ce);
>   
>   		if (intel_context_is_parent(ce)) {
> -			struct guc_sched_wq_desc *wq_desc = __get_wq_desc(ce);
>   			struct intel_context *child;
>   
>   			drm_printf(p, "\t\tNumber children: %u\n",
>   				   ce->parallel.number_children);
> -			drm_printf(p, "\t\tWQI Head: %u\n",
> -				   READ_ONCE(wq_desc->head));
> -			drm_printf(p, "\t\tWQI Tail: %u\n",
> -				   READ_ONCE(wq_desc->tail));
> -			drm_printf(p, "\t\tWQI Status: %u\n\n",
> -				   READ_ONCE(wq_desc->wq_status));
> +
> +			if (ce->parallel.guc.wq_status) {
> +				drm_printf(p, "\t\tWQI Head: %u\n",
> +					   READ_ONCE(*ce->parallel.guc.wq_head));
> +				drm_printf(p, "\t\tWQI Tail: %u\n",
> +					   READ_ONCE(*ce->parallel.guc.wq_tail));
> +				drm_printf(p, "\t\tWQI Status: %u\n\n",
> +					   READ_ONCE(*ce->parallel.guc.wq_status));
> +			}
>   
>   			if (ce->engine->emit_bb_start ==
>   			    emit_bb_start_parent_no_preempt_mid_batch) {
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
> index 27363091e1af..210c84411406 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
> @@ -70,6 +70,23 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
>   	fw_def(BROXTON,      0, guc_def(bxt,  70, 1, 1)) \
>   	fw_def(SKYLAKE,      0, guc_def(skl,  70, 1, 1))
>   
> +#define INTEL_GUC_FIRMWARE_DEFS_FALLBACK(fw_def, guc_def) \
> +	fw_def(ALDERLAKE_P,  0, guc_def(adlp, 69, 0, 3)) \
> +	fw_def(ALDERLAKE_S,  0, guc_def(tgl,  69, 0, 3)) \
> +	fw_def(DG1,          0, guc_def(dg1,  69, 0, 3)) \
> +	fw_def(ROCKETLAKE,   0, guc_def(tgl,  69, 0, 3)) \
> +	fw_def(TIGERLAKE,    0, guc_def(tgl,  69, 0, 3)) \
> +	fw_def(JASPERLAKE,   0, guc_def(ehl,  69, 0, 3)) \
> +	fw_def(ELKHARTLAKE,  0, guc_def(ehl,  69, 0, 3)) \
> +	fw_def(ICELAKE,      0, guc_def(icl,  69, 0, 3)) \
> +	fw_def(COMETLAKE,    5, guc_def(cml,  69, 0, 3)) \
> +	fw_def(COMETLAKE,    0, guc_def(kbl,  69, 0, 3)) \
> +	fw_def(COFFEELAKE,   0, guc_def(kbl,  69, 0, 3)) \
> +	fw_def(GEMINILAKE,   0, guc_def(glk,  69, 0, 3)) \
> +	fw_def(KABYLAKE,     0, guc_def(kbl,  69, 0, 3)) \
> +	fw_def(BROXTON,      0, guc_def(bxt,  69, 0, 3)) \
> +	fw_def(SKYLAKE,      0, guc_def(skl,  69, 0, 3))
> +
>   #define INTEL_HUC_FIRMWARE_DEFS(fw_def, huc_def) \
>   	fw_def(ALDERLAKE_P,  0, huc_def(tgl,  7, 9, 3)) \
>   	fw_def(ALDERLAKE_S,  0, huc_def(tgl,  7, 9, 3)) \
> @@ -105,6 +122,7 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
>   	MODULE_FIRMWARE(uc_);
>   
>   INTEL_GUC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_GUC_FW_PATH)
> +INTEL_GUC_FIRMWARE_DEFS_FALLBACK(INTEL_UC_MODULE_FW, MAKE_GUC_FW_PATH)
>   INTEL_HUC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_HUC_FW_PATH)
>   
>   /* The below structs and macros are used to iterate across the list of blobs */
> @@ -149,6 +167,9 @@ __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw)
>   	static const struct uc_fw_platform_requirement blobs_guc[] = {
>   		INTEL_GUC_FIRMWARE_DEFS(MAKE_FW_LIST, GUC_FW_BLOB)
>   	};
> +	static const struct uc_fw_platform_requirement blobs_guc_fallback[] = {
> +		INTEL_GUC_FIRMWARE_DEFS_FALLBACK(MAKE_FW_LIST, GUC_FW_BLOB)
> +	};
>   	static const struct uc_fw_platform_requirement blobs_huc[] = {
>   		INTEL_HUC_FIRMWARE_DEFS(MAKE_FW_LIST, HUC_FW_BLOB)
>   	};
> @@ -179,12 +200,28 @@ __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw)
>   		if (p == fw_blobs[i].p && rev >= fw_blobs[i].rev) {
>   			const struct uc_fw_blob *blob = &fw_blobs[i].blob;
>   			uc_fw->path = blob->path;
> +			uc_fw->wanted_path = blob->path;
>   			uc_fw->major_ver_wanted = blob->major;
>   			uc_fw->minor_ver_wanted = blob->minor;
>   			break;
>   		}
>   	}
>   
> +	if (uc_fw->type == INTEL_UC_FW_TYPE_GUC) {
> +		const struct uc_fw_platform_requirement *blobs = blobs_guc_fallback;
> +		u32 count = ARRAY_SIZE(blobs_guc_fallback);
> +
> +		for (i = 0; i < count && p <= blobs[i].p; i++) {
> +			if (p == blobs[i].p && rev >= blobs[i].rev) {
> +				const struct uc_fw_blob *blob = &blobs[i].blob;
> +				uc_fw->fallback.path = blob->path;
> +				uc_fw->fallback.major_ver = blob->major;
> +				uc_fw->fallback.minor_ver = blob->minor;
> +				break;
> +			}
> +		}
> +	}
> +
>   	/* make sure the list is ordered as expected */
>   	if (IS_ENABLED(CONFIG_DRM_I915_SELFTEST)) {
>   		for (i = 1; i < fw_count; i++) {
> @@ -413,6 +450,18 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw)
>   	__force_fw_fetch_failures(uc_fw, -ESTALE);
>   
>   	err = request_firmware(&fw, uc_fw->path, dev);
> +	if (err && !intel_uc_fw_is_overridden(uc_fw) && uc_fw->fallback.path) {
> +		err = request_firmware(&fw, uc_fw->fallback.path, dev);
> +		if (!err) {
> +			drm_warn(&i915->drm, "%s firmware %s not found, falling back to %s\n",
> +					     intel_uc_fw_type_repr(uc_fw->type),
> +					     uc_fw->wanted_path,
> +					     uc_fw->fallback.path);
As discussed offline, this should not be a warning. Also, the 
request_firmware() call above needs to be request_firmware_nowarn().

Otherwise, it all looks plausible.

John.

> +			uc_fw->path = uc_fw->fallback.path;
> +			uc_fw->major_ver_wanted = uc_fw->fallback.major_ver;
> +			uc_fw->minor_ver_wanted = uc_fw->fallback.minor_ver;
> +		}
> +	}
>   	if (err)
>   		goto fail;
>   
> @@ -822,7 +871,13 @@ size_t intel_uc_fw_copy_rsa(struct intel_uc_fw *uc_fw, void *dst, u32 max_len)
>   void intel_uc_fw_dump(const struct intel_uc_fw *uc_fw, struct drm_printer *p)
>   {
>   	drm_printf(p, "%s firmware: %s\n",
> -		   intel_uc_fw_type_repr(uc_fw->type), uc_fw->path);
> +		   intel_uc_fw_type_repr(uc_fw->type), uc_fw->wanted_path);
> +	if (uc_fw->fallback.path) {
> +		drm_printf(p, "%s firmware fallback: %s\n",
> +			   intel_uc_fw_type_repr(uc_fw->type), uc_fw->fallback.path);
> +		drm_printf(p, "fallback selected: %s\n",
> +			   str_yes_no(uc_fw->path == uc_fw->fallback.path));
> +	}
>   	drm_printf(p, "\tstatus: %s\n",
>   		   intel_uc_fw_status_repr(uc_fw->status));
>   	drm_printf(p, "\tversion: wanted %u.%u, found %u.%u\n",
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
> index 4f169035f504..7aa2644400b9 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
> @@ -74,6 +74,7 @@ struct intel_uc_fw {
>   		const enum intel_uc_fw_status status;
>   		enum intel_uc_fw_status __status; /* no accidental overwrites */
>   	};
> +	const char *wanted_path;
>   	const char *path;
>   	bool user_overridden;
>   	size_t size;
> @@ -98,6 +99,12 @@ struct intel_uc_fw {
>   	u16 major_ver_found;
>   	u16 minor_ver_found;
>   
> +	struct {
> +		const char *path;
> +		u16 major_ver;
> +		u16 minor_ver;
> +	} fallback;
> +
>   	u32 rsa_size;
>   	u32 ucode_size;
>