[PATCH v5 1/4] drm/amdgpu: update xgmi info on resume
Samuel Zhang
guoqing.zhang at amd.com
Mon May 12 06:41:38 UTC 2025
For virtual machine with vGPUs in SRIOV single device mode and XGMI
is enabled, XGMI physical node ids may change when waking up from
hiberation with different vGPU devices. So update XGMI info on resume
for gfx944 on SRIOV environment.
add amdgpu_virt_xgmi_migrate_enabled() as the feature flag.
Signed-off-by: Jiang Liu <gerry at linux.alibaba.com>
Signed-off-by: Samuel Zhang <guoqing.zhang at amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 27 ++++++++++++++++++++++
drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | 7 ++++++
2 files changed, 34 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index d477a901af84..d8aacd5a88a6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2732,6 +2732,12 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
if (!amdgpu_device_pcie_dynamic_switching_supported(adev))
adev->pm.pp_feature &= ~PP_PCIE_DPM_MASK;
+ adev->virt.is_xgmi_node_migrate_enabled = false;
+ if (amdgpu_sriov_vf(adev)) {
+ adev->virt.is_xgmi_node_migrate_enabled =
+ amdgpu_ip_version((adev), GC_HWIP, 0) == IP_VERSION(9, 4, 4);
+ }
+
total = true;
for (i = 0; i < adev->num_ip_blocks; i++) {
ip_block = &adev->ip_blocks[i];
@@ -5040,6 +5046,23 @@ int amdgpu_device_suspend(struct drm_device *dev, bool notify_clients)
return 0;
}
+static inline int amdgpu_device_update_xgmi_info(struct amdgpu_device *adev)
+{
+ int r;
+ unsigned int prev_physical_node_id = adev->gmc.xgmi.physical_node_id;
+
+ if (!amdgpu_virt_xgmi_migrate_enabled(adev))
+ return 0;
+
+ r = adev->gfxhub.funcs->get_xgmi_info(adev);
+ if (r)
+ return r;
+
+ dev_info(adev->dev, "xgmi node, old id %d, new id %d\n",
+ prev_physical_node_id, adev->gmc.xgmi.physical_node_id);
+ return 0;
+}
+
/**
* amdgpu_device_resume - initiate device resume
*
@@ -5061,6 +5084,10 @@ int amdgpu_device_resume(struct drm_device *dev, bool notify_clients)
return r;
}
+ r = amdgpu_device_update_xgmi_info(adev);
+ if (r)
+ goto exit;
+
if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index df03dba67ab8..532b92628171 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -295,6 +295,9 @@ struct amdgpu_virt {
union amd_sriov_ras_caps ras_telemetry_en_caps;
struct amdgpu_virt_ras ras;
struct amd_sriov_ras_telemetry_error_count count_cache;
+
+ /* hibernate and resume with different VF feature for xgmi enabled system */
+ bool is_xgmi_node_migrate_enabled;
};
struct amdgpu_video_codec_info;
@@ -376,6 +379,10 @@ static inline bool is_virtual_machine(void)
((adev)->virt.gim_feature & AMDGIM_FEATURE_VCN_RB_DECOUPLE)
#define amdgpu_sriov_is_mes_info_enable(adev) \
((adev)->virt.gim_feature & AMDGIM_FEATURE_MES_INFO_ENABLE)
+
+#define amdgpu_virt_xgmi_migrate_enabled(adev) \
+ ((adev)->virt.is_xgmi_node_migrate_enabled)
+
bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev);
void amdgpu_virt_init_setting(struct amdgpu_device *adev);
int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init);
--
2.43.5
More information about the amd-gfx
mailing list