[PATCH v3] drm/xe/vf: Enable CCS save/restore only on supported GUC versions

Michal Wajdeczko michal.wajdeczko at intel.com
Mon Aug 25 14:17:00 UTC 2025



On 8/25/2025 3:56 PM, Satyanarayana K V P wrote:
> CCS save/restore is supported starting with GuC 70.48.0 (compatibility
> version 1.23.0). Gate the feature on the GuC firmware version and keep it
> disabled on older or unsupported versions.
> 
> Fixes: f3009272ff2e ("drm/xe/vf: Create contexts for CCS read write")
> Signed-off-by: Satyanarayana K V P <satyanarayana.k.v.p at intel.com>
> Cc: Michal Wajdeczko <michal.wajdeczko at intel.com>
> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
> 
> ---
> V2 -> V3:
> - Fixed review comments (Michal)
> 
> V1 -> V2:
> - Fixed review comments (Michal)
> ---
>  drivers/gpu/drm/xe/xe_sriov_vf.c       | 40 +++++++++++++++++++++++++-
>  drivers/gpu/drm/xe/xe_sriov_vf_ccs.c   |  3 ++
>  drivers/gpu/drm/xe/xe_sriov_vf_types.h |  5 ++++
>  3 files changed, 47 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_sriov_vf.c b/drivers/gpu/drm/xe/xe_sriov_vf.c
> index 5de81f213d83..734e64a50f13 100644
> --- a/drivers/gpu/drm/xe/xe_sriov_vf.c
> +++ b/drivers/gpu/drm/xe/xe_sriov_vf.c
> @@ -10,6 +10,7 @@
>  #include "xe_gt.h"
>  #include "xe_gt_sriov_printk.h"
>  #include "xe_gt_sriov_vf.h"
> +#include "xe_guc.h"
>  #include "xe_guc_ct.h"
>  #include "xe_guc_submit.h"
>  #include "xe_irq.h"
> @@ -133,7 +134,42 @@ static bool vf_migration_supported(struct xe_device *xe)

this static function should likely become a public one:

	bool xe_sriov_vf_migration_supported(xe)

as you would need this either in xe_sriov_late_init() while calling
ccs_init unless you introduce here

	xe_sriov_vf_late_init()

and call xe_sriov_vf_ccs_init from it


>  	 * TODO: Add conditions to allow specific platforms, when they're
>  	 * supported at production quality.
>  	 */

this comment above should be moved to vf_check_migration_support()

> -	return IS_ENABLED(CONFIG_DRM_XE_DEBUG);
> +	xe_assert(xe, IS_SRIOV_VF(xe));
> +	return xe->sriov.vf.migration.disabled;
> +}
> +
> +static void vf_disable_migration(struct xe_device *xe, const char *fmt, ...)
> +{
> +	struct va_format vaf;
> +	va_list va_args;
> +
> +	xe_assert(xe, IS_SRIOV_VF(xe));
> +
> +	va_start(va_args, fmt);
> +	vaf.fmt = fmt;
> +	vaf.va  = &va_args;
> +	xe_sriov_notice(xe, "migration disabled: %pV\n", &vaf);
> +	va_end(va_args);
> +
> +	xe->sriov.vf.migration.disabled = true;
> +}
> +
> +static void vf_check_migration_support(struct xe_device *xe)
> +{
> +	struct xe_tile *tile = xe_device_get_root_tile(xe);
> +	struct xe_uc_fw_version *guc_version;
> +
> +	if (!IS_ENABLED(CONFIG_DRM_XE_DEBUG))
> +		return vf_disable_migration(xe,
> +					    "experimantal feature not available on production builds");

typo: experimental

> +
> +	guc_version = &tile->primary_gt->sriov.vf.guc_version;

maybe some small helper:

	u32 vf_guc_version(xe_gt *gt)

and here

	vf_guc_version(xe_device_get_gt(xe, 0))

> +	if (!IS_DGFX(xe) && MAKE_GUC_VER(guc_version->major, guc_version->minor,
> +					 guc_version->patch) < MAKE_GUC_VER(1, 23, 0)) {
> +		return vf_disable_migration(xe,
> +					    "CCS migration requires GuC ABI >= 1.23 but only %u.%u found",
> +					    guc_version->major, guc_version->minor);
> +	}
>  }
>  
>  static void migration_worker_func(struct work_struct *w);
> @@ -146,6 +182,8 @@ void xe_sriov_vf_init_early(struct xe_device *xe)
>  {
>  	INIT_WORK(&xe->sriov.vf.migration.worker, migration_worker_func);
>  
> +	vf_check_migration_support(xe);
> +
>  	if (!vf_migration_supported(xe))
>  		xe_sriov_info(xe, "migration not supported by this module version\n");

this message is now redundant since we have some new in vf_check_migration_support()

>  }
> diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c
> index 4872e43eb440..05d3c680b3fa 100644
> --- a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c
> +++ b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c
> @@ -268,6 +268,9 @@ int xe_sriov_vf_ccs_init(struct xe_device *xe)
>  	xe_assert(xe, !IS_DGFX(xe));
>  	xe_assert(xe, xe_device_has_flat_ccs(xe));
>  
> +	if (xe->sriov.vf.migration.disabled)

IMO we shouldn't call this function if migration is disabled,
so such check shall be on the caller (maybe just some asserts here)

> +		return 0;
> +
>  	for_each_ccs_rw_ctx(ctx_id) {
>  		ctx = &tile->sriov.vf.ccs[ctx_id];
>  		ctx->ctx_id = ctx_id;
> diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_types.h b/drivers/gpu/drm/xe/xe_sriov_vf_types.h
> index 24a873c50c49..8eb4ca9f62de 100644
> --- a/drivers/gpu/drm/xe/xe_sriov_vf_types.h
> +++ b/drivers/gpu/drm/xe/xe_sriov_vf_types.h
> @@ -35,6 +35,11 @@ struct xe_device_vf {
>  		struct work_struct worker;
>  		/** @migration.gt_flags: Per-GT request flags for VF migration recovery */
>  		unsigned long gt_flags;
> +		/**
> +		 * @migration.disabled: flag indicating if migration support
> +		 * was disabled due to missing prerequisites
> +		 */
> +		bool disabled;
>  	} migration;
>  
>  	/** @ccs: VF CCS state data */



More information about the Intel-xe mailing list