[PATCH v3] drm/xe/vf: Enable CCS save/restore only on supported GUC versions
Michal Wajdeczko
michal.wajdeczko at intel.com
Mon Aug 25 14:17:00 UTC 2025
On 8/25/2025 3:56 PM, Satyanarayana K V P wrote:
> CCS save/restore is supported starting with GuC 70.48.0 (compatibility
> version 1.23.0). Gate the feature on the GuC firmware version and keep it
> disabled on older or unsupported versions.
>
> Fixes: f3009272ff2e ("drm/xe/vf: Create contexts for CCS read write")
> Signed-off-by: Satyanarayana K V P <satyanarayana.k.v.p at intel.com>
> Cc: Michal Wajdeczko <michal.wajdeczko at intel.com>
> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
>
> ---
> V2 -> V3:
> - Fixed review comments (Michal)
>
> V1 -> V2:
> - Fixed review comments (Michal)
> ---
> drivers/gpu/drm/xe/xe_sriov_vf.c | 40 +++++++++++++++++++++++++-
> drivers/gpu/drm/xe/xe_sriov_vf_ccs.c | 3 ++
> drivers/gpu/drm/xe/xe_sriov_vf_types.h | 5 ++++
> 3 files changed, 47 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_sriov_vf.c b/drivers/gpu/drm/xe/xe_sriov_vf.c
> index 5de81f213d83..734e64a50f13 100644
> --- a/drivers/gpu/drm/xe/xe_sriov_vf.c
> +++ b/drivers/gpu/drm/xe/xe_sriov_vf.c
> @@ -10,6 +10,7 @@
> #include "xe_gt.h"
> #include "xe_gt_sriov_printk.h"
> #include "xe_gt_sriov_vf.h"
> +#include "xe_guc.h"
> #include "xe_guc_ct.h"
> #include "xe_guc_submit.h"
> #include "xe_irq.h"
> @@ -133,7 +134,42 @@ static bool vf_migration_supported(struct xe_device *xe)
this static function should likely become a public one:
bool xe_sriov_vf_migration_supported(xe)
as you would need this either in xe_sriov_late_init() while calling
ccs_init unless you introduce here
xe_sriov_vf_late_init()
and call xe_sriov_vf_ccs_init from it
> * TODO: Add conditions to allow specific platforms, when they're
> * supported at production quality.
> */
this comment above should be moved to vf_check_migration_support()
> - return IS_ENABLED(CONFIG_DRM_XE_DEBUG);
> + xe_assert(xe, IS_SRIOV_VF(xe));
> + return xe->sriov.vf.migration.disabled;
> +}
> +
> +static void vf_disable_migration(struct xe_device *xe, const char *fmt, ...)
> +{
> + struct va_format vaf;
> + va_list va_args;
> +
> + xe_assert(xe, IS_SRIOV_VF(xe));
> +
> + va_start(va_args, fmt);
> + vaf.fmt = fmt;
> + vaf.va = &va_args;
> + xe_sriov_notice(xe, "migration disabled: %pV\n", &vaf);
> + va_end(va_args);
> +
> + xe->sriov.vf.migration.disabled = true;
> +}
> +
> +static void vf_check_migration_support(struct xe_device *xe)
> +{
> + struct xe_tile *tile = xe_device_get_root_tile(xe);
> + struct xe_uc_fw_version *guc_version;
> +
> + if (!IS_ENABLED(CONFIG_DRM_XE_DEBUG))
> + return vf_disable_migration(xe,
> + "experimantal feature not available on production builds");
typo: experimental
> +
> + guc_version = &tile->primary_gt->sriov.vf.guc_version;
maybe some small helper:
u32 vf_guc_version(xe_gt *gt)
and here
vf_guc_version(xe_device_get_gt(xe, 0))
> + if (!IS_DGFX(xe) && MAKE_GUC_VER(guc_version->major, guc_version->minor,
> + guc_version->patch) < MAKE_GUC_VER(1, 23, 0)) {
> + return vf_disable_migration(xe,
> + "CCS migration requires GuC ABI >= 1.23 but only %u.%u found",
> + guc_version->major, guc_version->minor);
> + }
> }
>
> static void migration_worker_func(struct work_struct *w);
> @@ -146,6 +182,8 @@ void xe_sriov_vf_init_early(struct xe_device *xe)
> {
> INIT_WORK(&xe->sriov.vf.migration.worker, migration_worker_func);
>
> + vf_check_migration_support(xe);
> +
> if (!vf_migration_supported(xe))
> xe_sriov_info(xe, "migration not supported by this module version\n");
this message is now redundant since we have some new in vf_check_migration_support()
> }
> diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c
> index 4872e43eb440..05d3c680b3fa 100644
> --- a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c
> +++ b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c
> @@ -268,6 +268,9 @@ int xe_sriov_vf_ccs_init(struct xe_device *xe)
> xe_assert(xe, !IS_DGFX(xe));
> xe_assert(xe, xe_device_has_flat_ccs(xe));
>
> + if (xe->sriov.vf.migration.disabled)
IMO we shouldn't call this function if migration is disabled,
so such check shall be on the caller (maybe just some asserts here)
> + return 0;
> +
> for_each_ccs_rw_ctx(ctx_id) {
> ctx = &tile->sriov.vf.ccs[ctx_id];
> ctx->ctx_id = ctx_id;
> diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_types.h b/drivers/gpu/drm/xe/xe_sriov_vf_types.h
> index 24a873c50c49..8eb4ca9f62de 100644
> --- a/drivers/gpu/drm/xe/xe_sriov_vf_types.h
> +++ b/drivers/gpu/drm/xe/xe_sriov_vf_types.h
> @@ -35,6 +35,11 @@ struct xe_device_vf {
> struct work_struct worker;
> /** @migration.gt_flags: Per-GT request flags for VF migration recovery */
> unsigned long gt_flags;
> + /**
> + * @migration.disabled: flag indicating if migration support
> + * was disabled due to missing prerequisites
> + */
> + bool disabled;
> } migration;
>
> /** @ccs: VF CCS state data */
More information about the Intel-xe
mailing list