[PATCH v1 3/3] drm/xe/pf: Add functions to save and restore VF LMEM and CCS data
Michal Wajdeczko
michal.wajdeczko at intel.com
Thu Oct 17 11:12:32 UTC 2024
On 16.10.2024 11:57, Lukasz Laguna wrote:
> State of VF LMEM and corresponding CCS metadata needs to be saved and
> restored during migration of VM with attached dGPU VF. Add helpers that
> allow it. We will start using them in upcoming patches.
>
> Signed-off-by: Lukasz Laguna <lukasz.laguna at intel.com
> ---
> drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c | 100 ++++++++++++++++++
> drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.h | 7 ++
> 2 files changed, 107 insertions(+)
>
> diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c
> index c712111aa30d..feb22d3e5dd5 100644
> --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c
> +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c
> @@ -12,6 +12,7 @@
> #include "xe_gt_sriov_printk.h"
> #include "xe_guc.h"
> #include "xe_guc_ct.h"
> +#include "xe_migrate.h"
> #include "xe_sriov.h"
>
> /* Return: number of dwords saved/restored/required or a negative error code on failure */
> @@ -382,6 +383,105 @@ ssize_t xe_gt_sriov_pf_migration_write_guc_state(struct xe_gt *gt, unsigned int
> }
> #endif /* CONFIG_DEBUG_FS */
>
> +static int pf_save_restore_lmem(struct xe_gt *gt, unsigned int vfid, struct xe_bo *sysmem,
> + struct xe_bo *ccs, u64 offset, size_t size, bool save)
bool flags are not welcomed by the maintainters
> +{
> + struct xe_bo *vram;
> + struct dma_fence *fence;
> + int ret;
you may want to assert that vfid != PFID
> +
> + if (!sysmem && !ccs)
> + return -EINVAL;
input parameters shall be validated at the top public function, in all
internal static functions/helpers we should just have asserts
> +
> + vram = gt->sriov.pf.vfs[vfid].config.lmem_obj;
access to config.lmem_obj should be under xe_gt_sriov_pf_master_mutex
> + if (!vram)
> + return -ENXIO;
> +
> + if (!xe_bo_trylock(vram))
> + return -EBUSY;
> +
> + if (sysmem) {
> + if (!xe_bo_trylock(sysmem)) {
> + ret = -EBUSY;
> + goto err_vram_unlock;
> + }
> + }
> + if (ccs) {
> + if (!xe_bo_trylock(ccs)) {
> + ret = -EBUSY;
> + goto err_sysmem_unlock;
> + }
> + }
> +
> + fence = xe_migrate_raw_vram_copy(vram, offset, sysmem, 0, ccs, 0, size, save);
> + if (IS_ERR(fence)) {
> + ret = PTR_ERR(fence);
> + goto err_ccs_unlock;
> + }
> + ret = dma_fence_wait_timeout(fence, false, 5 * HZ);
do you want to keep this hardcoded 5s for all cases?
maybe the fence should be returned to the caller?
> + if (!ret)
> + ret = -ETIME;
> +
> + dma_fence_put(fence);
> +
> +err_ccs_unlock:
> + if (ccs)
> + xe_bo_unlock(ccs);
> +err_sysmem_unlock:
> + if (sysmem)
> + xe_bo_unlock(sysmem);
> +err_vram_unlock:
> + xe_bo_unlock(vram);
> +
> + return ret < 0 ? ret : 0;
> +}
> +
> +/**
> + * xe_gt_sriov_pf_migration_save_lmem() - Save a VF LMEM and corresponding CCS.
> + * @gt: the &xe_gt
> + * @vfid: the VF identifier
> + * @smem: the sysmem BO to save LMEM data to (NULL if not needed)
likely the vfio driver will not be able to provide such BO, so do we
plan to introduce yet another layer for it?
> + * @ccs: the sysmem BO to save CCS data to (NULL if not needed)
shouldn't the logic about need of the CCS data be somehow hidden from
the caller?
> + * @offset: the offset of VF LMEM chunk
> + * @size: the size of VF LMEM chunk
> + *
> + * This function is for PF only.
> + *
> + * This functions saves VF LMEM and corresponding CCS data into sysmem buffer
> + * objects.
> + *
> + * Return: 0 on success or a negative error code on failure.
> + */
> +int xe_gt_sriov_pf_migration_save_lmem(struct xe_gt *gt, unsigned int vfid,
> + struct xe_bo *sysmem, struct xe_bo *ccs, u64 offset,
> + size_t size)
> +{
> + return pf_save_restore_lmem(gt, vfid, sysmem, ccs, offset, size, true);
> +}
> +
> +/**
> + * xe_gt_sriov_pf_migration_restore_lmem() - Restore a VF LMEM and corresponding CCS.
> + * @gt: the &xe_gt
> + * @vfid: the VF identifier
> + * @smem: the sysmem BO to restore LMEM data from (NULL if not needed)
> + * @ccs: the sysmem BO to restore CCS data from (NULL if not needed)
> + * @offset: the offset of VF LMEM chunk
> + * @size: the size of VF LMEM chunk
> + *
> + * This function is for PF only.
> + *
> + * This functions restores VF LMEM and corresponding CCS data from sysmem
> + * buffer objects.
> + *
> + * Return: 0 on success or a negative error code on failure.
> + */
> +int xe_gt_sriov_pf_migration_restore_lmem(struct xe_gt *gt, unsigned int vfid,
> + struct xe_bo *sysmem, struct xe_bo *ccs, u64 offset,
> + size_t size)
> +{
> + return pf_save_restore_lmem(gt, vfid, sysmem, ccs, offset, size, false);
> +}
> +
> static bool pf_check_migration_support(struct xe_gt *gt)
> {
> /* GuC 70.25 with save/restore v2 is required */
> diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.h
> index 09faeae00ddb..dc1e6c5b44fa 100644
> --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.h
> +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.h
> @@ -8,11 +8,18 @@
>
> #include <linux/types.h>
>
> +struct xe_bo;
> struct xe_gt;
>
> int xe_gt_sriov_pf_migration_init(struct xe_gt *gt);
> int xe_gt_sriov_pf_migration_save_guc_state(struct xe_gt *gt, unsigned int vfid);
> int xe_gt_sriov_pf_migration_restore_guc_state(struct xe_gt *gt, unsigned int vfid);
> +int xe_gt_sriov_pf_migration_save_lmem(struct xe_gt *gt, unsigned int vfid,
> + struct xe_bo *sysmem, struct xe_bo *ccs, u64 offset,
> + size_t size);
> +int xe_gt_sriov_pf_migration_restore_lmem(struct xe_gt *gt, unsigned int vfid,
> + struct xe_bo *sysmem, struct xe_bo *ccs, u64 offset,
> + size_t size);
>
> #ifdef CONFIG_DEBUG_FS
> ssize_t xe_gt_sriov_pf_migration_read_guc_state(struct xe_gt *gt, unsigned int vfid,
More information about the Intel-xe
mailing list