[PATCH v6 3/4] drm/amdgpu: enable pdb0 for hibernation on SRIOV
Christian König
christian.koenig at amd.com
Mon May 19 13:57:18 UTC 2025
On 5/19/25 10:20, Samuel Zhang wrote:
> When switching to new GPU index after hibernation and then resume,
> VRAM offset of each VRAM BO will be changed, and the cached gpu
> addresses needed to updated.
>
> This is to enable pdb0 and switch to use pdb0-based virtual gpu
> address by default in amdgpu_bo_create_reserved(). since the virtual
> addresses do not change, this can avoid the need to update all
> cached gpu addresses all over the codebase.
>
> Signed-off-by: Emily Deng <Emily.Deng at amd.com>
> Signed-off-by: Samuel Zhang <guoqing.zhang at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 32 ++++++++++++++++++------
> drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 1 +
> drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c | 2 +-
> drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 10 +++++---
> drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c | 6 +++--
> 5 files changed, 37 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> index d1fa5e8e3937..265d6c777af5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> @@ -38,6 +38,8 @@
> #include <drm/drm_drv.h>
> #include <drm/ttm/ttm_tt.h>
>
> +static const u64 four_gb = 0x100000000ULL;
> +
> /**
> * amdgpu_gmc_pdb0_alloc - allocate vram for pdb0
> *
> @@ -249,15 +251,24 @@ void amdgpu_gmc_sysvm_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc
> {
> u64 hive_vram_start = 0;
> u64 hive_vram_end = mc->xgmi.node_segment_size * mc->xgmi.num_physical_nodes - 1;
> - mc->vram_start = mc->xgmi.node_segment_size * mc->xgmi.physical_node_id;
> - mc->vram_end = mc->vram_start + mc->xgmi.node_segment_size - 1;
> - mc->gart_start = hive_vram_end + 1;
> +
> + if (amdgpu_virt_xgmi_migrate_enabled(adev)) {
> + /* set mc->vram_start to 0 to switch the returned GPU address of
> + * amdgpu_bo_create_reserved() from FB aperture to GART aperture.
> + */
> + amdgpu_gmc_vram_location(adev, mc, 0);
This function does a lot more than just setting mc->vram_start and mc->vram_end.
You should probably just update the two setting and not call amdgpu_gmc_vram_location() at all.
> + } else {
> + mc->vram_start = mc->xgmi.node_segment_size * mc->xgmi.physical_node_id;
> + mc->vram_end = mc->vram_start + mc->xgmi.node_segment_size - 1;
> + dev_info(adev->dev, "VRAM: %lluM 0x%016llX - 0x%016llX (%lluM used)\n",
> + mc->mc_vram_size >> 20, mc->vram_start,
> + mc->vram_end, mc->real_vram_size >> 20);
> + }
> + /* node_segment_size may not 4GB aligned on SRIOV, align up is needed. */
> + mc->gart_start = ALIGN(hive_vram_end + 1, four_gb);
> mc->gart_end = mc->gart_start + mc->gart_size - 1;
> mc->fb_start = hive_vram_start;
> mc->fb_end = hive_vram_end;
> - dev_info(adev->dev, "VRAM: %lluM 0x%016llX - 0x%016llX (%lluM used)\n",
> - mc->mc_vram_size >> 20, mc->vram_start,
> - mc->vram_end, mc->real_vram_size >> 20);
> dev_info(adev->dev, "GART: %lluM 0x%016llX - 0x%016llX\n",
> mc->gart_size >> 20, mc->gart_start, mc->gart_end);
> }
> @@ -276,7 +287,6 @@ void amdgpu_gmc_sysvm_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc
> void amdgpu_gmc_gart_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc,
> enum amdgpu_gart_placement gart_placement)
> {
> - const uint64_t four_gb = 0x100000000ULL;
> u64 size_af, size_bf;
> /*To avoid the hole, limit the max mc address to AMDGPU_GMC_HOLE_START*/
> u64 max_mc_address = min(adev->gmc.mc_mask, AMDGPU_GMC_HOLE_START - 1);
> @@ -1068,6 +1078,14 @@ void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev)
> flags |= AMDGPU_PTE_FRAG((adev->gmc.vmid0_page_table_block_size + 9*1));
> flags |= AMDGPU_PDE_PTE_FLAG(adev);
>
> + if (amdgpu_virt_xgmi_migrate_enabled(adev)) {
> + /* always start from current device so that the GART address can keep
> + * consistent when hibernate-resume with different GPUs.
> + */
> + vram_addr = adev->vm_manager.vram_base_offset;
> + vram_end = vram_addr + vram_size;
> + }
> +
> /* The first n PDE0 entries are used as PTE,
> * pointing to vram
> */
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> index bd7fc123b8f9..46fac7ca7dfa 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> @@ -307,6 +307,7 @@ struct amdgpu_gmc {
> struct amdgpu_bo *pdb0_bo;
> /* CPU kmapped address of pdb0*/
> void *ptr_pdb0;
> + bool pdb0_enabled;
This isn't needed, just always check (adev->gmc.xgmi.connected_to_cpu || amdgpu_virt_xgmi_migrate_enabled(adev)), make a function for that if necessary.
>
> /* MALL size */
> u64 mall_size;
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c
> index cb25f7f0dfc1..e6165f6d0763 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c
> @@ -180,7 +180,7 @@ gfxhub_v1_2_xcc_init_system_aperture_regs(struct amdgpu_device *adev,
> /* In the case squeezing vram into GART aperture, we don't use
> * FB aperture and AGP aperture. Disable them.
> */
> - if (adev->gmc.pdb0_bo) {
> + if (adev->gmc.pdb0_bo && !amdgpu_virt_xgmi_migrate_enabled(adev)) {
> WREG32_SOC15(GC, GET_INST(GC, i), regMC_VM_FB_LOCATION_TOP, 0);
> WREG32_SOC15(GC, GET_INST(GC, i), regMC_VM_FB_LOCATION_BASE, 0x00FFFFFF);
> WREG32_SOC15(GC, GET_INST(GC, i), regMC_VM_AGP_TOP, 0);
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index 59385da80185..04fb99c64b37 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@ -1682,6 +1682,8 @@ static int gmc_v9_0_early_init(struct amdgpu_ip_block *ip_block)
> adev->gmc.private_aperture_start + (4ULL << 30) - 1;
> adev->gmc.noretry_flags = AMDGPU_VM_NORETRY_FLAGS_TF;
>
> + adev->gmc.pdb0_enabled = adev->gmc.xgmi.connected_to_cpu ||
> + amdgpu_virt_xgmi_migrate_enabled(adev);
> return 0;
> }
>
> @@ -1726,7 +1728,7 @@ static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev,
>
> /* add the xgmi offset of the physical node */
> base += adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
> - if (adev->gmc.xgmi.connected_to_cpu) {
> + if (adev->gmc.pdb0_enabled) {
> amdgpu_gmc_sysvm_location(adev, mc);
> } else {
> amdgpu_gmc_vram_location(adev, mc, base);
> @@ -1841,7 +1843,7 @@ static int gmc_v9_0_gart_init(struct amdgpu_device *adev)
> return 0;
> }
>
> - if (adev->gmc.xgmi.connected_to_cpu) {
> + if (adev->gmc.pdb0_enabled) {
> adev->gmc.vmid0_page_table_depth = 1;
> adev->gmc.vmid0_page_table_block_size = 12;
> } else {
> @@ -1867,7 +1869,7 @@ static int gmc_v9_0_gart_init(struct amdgpu_device *adev)
> if (r)
> return r;
>
> - if (adev->gmc.xgmi.connected_to_cpu)
> + if (adev->gmc.pdb0_enabled)
> r = amdgpu_gmc_pdb0_alloc(adev);
> }
>
> @@ -2372,7 +2374,7 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)
> {
> int r;
>
> - if (adev->gmc.xgmi.connected_to_cpu)
> + if (adev->gmc.pdb0_enabled)
> amdgpu_gmc_init_pdb0(adev);
>
> if (adev->gart.bo == NULL) {
> diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c
> index 84cde1239ee4..18e80aa78aff 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c
> @@ -45,8 +45,10 @@ static u64 mmhub_v1_8_get_fb_location(struct amdgpu_device *adev)
> top &= MC_VM_FB_LOCATION_TOP__FB_TOP_MASK;
> top <<= 24;
>
> - adev->gmc.fb_start = base;
> - adev->gmc.fb_end = top;
> + if (!amdgpu_virt_xgmi_migrate_enabled(adev)) {
> + adev->gmc.fb_start = base;
> + adev->gmc.fb_end = top;
> + }
We should probably avoid calling this in the first place.
The function gmc_v9_0_vram_gtt_location() should probably be adjusted.
Regards,
Christian.
>
> return base;
> }
More information about the amd-gfx
mailing list