[PATCH 4/6] drm/amdgpu: enable pdb0 for hibernation on SRIOV

Samuel Zhang guoqing.zhang at amd.com
Mon Apr 14 10:46:53 UTC 2025


When switching to new GPU index after hibernation and then resume,
VRAM offset of each VRAM BO will be changed, and the cached gpu
addresses needed to updated.

This is to enable pdb0 and switch to use pdb0-based virtual gpu
address by default in amdgpu_bo_create_reserved(). since the virtual
addresses do not change, this can avoid the need to update all
cached gpu addresses all over the codebase.

Signed-off-by: Emily Deng <Emily.Deng at amd.com>
Signed-off-by: Samuel Zhang <guoqing.zhang at amd.com>
Change-Id: I2b20b9b94f1e41820a013ce5d05bb3fa96859b21
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c    | 43 +++++++++++++++-------
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h    |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c |  8 +++-
 drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c   |  2 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c      | 29 +++++++++------
 drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c    | 30 ++++++++++++---
 6 files changed, 82 insertions(+), 31 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 5b60d714e089..e706afcb7e95 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -248,18 +248,25 @@ void amdgpu_gmc_vram_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc,
 void amdgpu_gmc_sysvm_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc)
 {
 	u64 hive_vram_start = 0;
-	u64 hive_vram_end = mc->xgmi.node_segment_size * mc->xgmi.num_physical_nodes - 1;
-	mc->vram_start = mc->xgmi.node_segment_size * mc->xgmi.physical_node_id;
-	mc->vram_end = mc->vram_start + mc->xgmi.node_segment_size - 1;
+	u64 hive_vram_end = mc->xgmi.node_segment_size * mc->xgmi.num_physical_nodes;
+
+	hive_vram_end = ALIGN(hive_vram_end, (1ULL<<adev->gmc.vmid0_page_table_block_size)<<21) - 1;
+
+	if (!mc->vram_start) {
+		mc->vram_start = mc->xgmi.node_segment_size * mc->xgmi.physical_node_id;
+		mc->vram_end = mc->vram_start + mc->xgmi.node_segment_size - 1;
+		dev_info(adev->dev, "VRAM: %lluM 0x%016llX - 0x%016llX (%lluM used)\n",
+			mc->mc_vram_size >> 20, mc->vram_start,
+			mc->vram_end, mc->real_vram_size >> 20);
+	}
+
 	mc->gart_start = hive_vram_end + 1;
 	mc->gart_end = mc->gart_start + mc->gart_size - 1;
 	mc->fb_start = hive_vram_start;
 	mc->fb_end = hive_vram_end;
-	dev_info(adev->dev, "VRAM: %lluM 0x%016llX - 0x%016llX (%lluM used)\n",
-			mc->mc_vram_size >> 20, mc->vram_start,
-			mc->vram_end, mc->real_vram_size >> 20);
-	dev_info(adev->dev, "GART: %lluM 0x%016llX - 0x%016llX\n",
-			mc->gart_size >> 20, mc->gart_start, mc->gart_end);
+
+	dev_info(adev->dev, "FB 0x%016llX - 0x%016llX, GART: %lluM 0x%016llX - 0x%016llX\n",
+			mc->fb_start, mc->fb_end, mc->gart_size >> 20, mc->gart_start, mc->gart_end);
 }
 
 /**
@@ -677,8 +684,9 @@ void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
 				     &job);
 	if (r)
 		goto error_alloc;
-
-	job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo);
+	job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gmc.pdb0_bo ?
+							adev->gmc.pdb0_bo :
+							adev->gart.bo);
 	job->vm_needs_flush = true;
 	job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop;
 	amdgpu_ring_pad_ib(ring, &job->ibs[0]);
@@ -1041,8 +1049,9 @@ void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev)
 	 */
 	u64 vram_size = adev->gmc.xgmi.node_segment_size * adev->gmc.xgmi.num_physical_nodes;
 	u64 pde0_page_size = (1ULL<<adev->gmc.vmid0_page_table_block_size)<<21;
-	u64 vram_addr = adev->vm_manager.vram_base_offset -
+	u64 vram_addr_first = adev->vm_manager.vram_base_offset -
 		adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
+	u64 vram_addr = adev->vm_manager.vram_base_offset;
 	u64 vram_end = vram_addr + vram_size;
 	u64 gart_ptb_gpu_pa = amdgpu_gmc_vram_pa(adev, adev->gart.bo);
 	int idx;
@@ -1056,11 +1065,19 @@ void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev)
 	flags |= AMDGPU_PTE_FRAG((adev->gmc.vmid0_page_table_block_size + 9*1));
 	flags |= AMDGPU_PDE_PTE_FLAG(adev);
 
+	if (adev->gmc.xgmi.connected_to_cpu) {
+		vram_addr = vram_addr_first;
+		vram_end = vram_addr + vram_size;
+	}
+
 	/* The first n PDE0 entries are used as PTE,
 	 * pointing to vram
 	 */
-	for (i = 0; vram_addr < vram_end; i++, vram_addr += pde0_page_size)
-		amdgpu_gmc_set_pte_pde(adev, adev->gmc.ptr_pdb0, i, vram_addr, flags);
+	for (i = 0; vram_addr < vram_end; i++, vram_addr += pde0_page_size) {
+		amdgpu_gmc_set_pte_pde(adev, adev->gmc.ptr_pdb0, i,
+			(vram_addr >= vram_addr_first + vram_size) ? (vram_addr - vram_size) : vram_addr,
+			flags);
+	}
 
 	/* The n+1'th PDE0 entry points to a huge
 	 * PTB who has more than 512 entries each
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
index bd7fc123b8f9..758b47240c6f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -307,6 +307,7 @@ struct amdgpu_gmc {
 	struct amdgpu_bo		*pdb0_bo;
 	/* CPU kmapped address of pdb0*/
 	void				*ptr_pdb0;
+	bool enable_pdb0;
 
 	/* MALL size */
 	u64 mall_size;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index d90e9daf5a50..83a3444c69d9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -287,8 +287,14 @@ int amdgpu_bo_create_reserved(struct amdgpu_device *adev,
 		goto error_unpin;
 	}
 
-	if (gpu_addr)
+	if (gpu_addr) {
 		*gpu_addr = amdgpu_bo_gpu_offset(*bo_ptr);
+		if (!adev->gmc.xgmi.connected_to_cpu && adev->gmc.enable_pdb0) {
+			if ((*bo_ptr)->tbo.resource->mem_type == TTM_PL_VRAM) {
+				*gpu_addr -= amdgpu_ttm_domain_start(adev, TTM_PL_VRAM);
+			}
+		}
+	}
 
 	if (cpu_addr) {
 		r = amdgpu_bo_kmap(*bo_ptr, cpu_addr);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c
index cb25f7f0dfc1..5ebb92ac9fd7 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c
@@ -180,7 +180,7 @@ gfxhub_v1_2_xcc_init_system_aperture_regs(struct amdgpu_device *adev,
 		/* In the case squeezing vram into GART aperture, we don't use
 		 * FB aperture and AGP aperture. Disable them.
 		 */
-		if (adev->gmc.pdb0_bo) {
+		if (adev->gmc.pdb0_bo && !amdgpu_sriov_vf(adev)) {
 			WREG32_SOC15(GC, GET_INST(GC, i), regMC_VM_FB_LOCATION_TOP, 0);
 			WREG32_SOC15(GC, GET_INST(GC, i), regMC_VM_FB_LOCATION_BASE, 0x00FFFFFF);
 			WREG32_SOC15(GC, GET_INST(GC, i), regMC_VM_AGP_TOP, 0);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 7c7a9fe6be6d..73ac05b9a1bf 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1677,6 +1677,10 @@ static int gmc_v9_0_early_init(struct amdgpu_ip_block *ip_block)
 		adev->gmc.private_aperture_start + (4ULL << 30) - 1;
 	adev->gmc.noretry_flags = AMDGPU_VM_NORETRY_FLAGS_TF;
 
+	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
+	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) ||
+	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0))
+	    adev->gmc.enable_pdb0 = amdgpu_sriov_vf(adev);
 	return 0;
 }
 
@@ -1719,6 +1723,14 @@ static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev,
 {
 	u64 base = adev->mmhub.funcs->get_fb_location(adev);
 
+	if (adev->gmc.xgmi.connected_to_cpu || adev->gmc.enable_pdb0) {
+		adev->gmc.vmid0_page_table_depth = 1;
+		adev->gmc.vmid0_page_table_block_size = 12;
+	} else {
+		adev->gmc.vmid0_page_table_depth = 0;
+		adev->gmc.vmid0_page_table_block_size = 0;
+	}
+
 	amdgpu_gmc_set_agp_default(adev, mc);
 
 	/* add the xgmi offset of the physical node */
@@ -1727,7 +1739,10 @@ static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev,
 		amdgpu_gmc_sysvm_location(adev, mc);
 	} else {
 		amdgpu_gmc_vram_location(adev, mc, base);
-		amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_BEST_FIT);
+		if (!adev->gmc.enable_pdb0)
+			amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_BEST_FIT);
+		else
+			amdgpu_gmc_sysvm_location(adev, mc);
 		if (!amdgpu_sriov_vf(adev) && (amdgpu_agp == 1))
 			amdgpu_gmc_agp_location(adev, mc);
 	}
@@ -1838,14 +1853,6 @@ static int gmc_v9_0_gart_init(struct amdgpu_device *adev)
 		return 0;
 	}
 
-	if (adev->gmc.xgmi.connected_to_cpu) {
-		adev->gmc.vmid0_page_table_depth = 1;
-		adev->gmc.vmid0_page_table_block_size = 12;
-	} else {
-		adev->gmc.vmid0_page_table_depth = 0;
-		adev->gmc.vmid0_page_table_block_size = 0;
-	}
-
 	/* Initialize common gart structure */
 	r = amdgpu_gart_init(adev);
 	if (r)
@@ -1864,7 +1871,7 @@ static int gmc_v9_0_gart_init(struct amdgpu_device *adev)
 		if (r)
 			return r;
 
-		if (adev->gmc.xgmi.connected_to_cpu)
+		if (adev->gmc.xgmi.connected_to_cpu || adev->gmc.enable_pdb0)
 			r = amdgpu_gmc_pdb0_alloc(adev);
 	}
 
@@ -2361,7 +2368,7 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)
 {
 	int r;
 
-	if (adev->gmc.xgmi.connected_to_cpu)
+	if (adev->gmc.xgmi.connected_to_cpu || adev->gmc.enable_pdb0)
 		amdgpu_gmc_init_pdb0(adev);
 
 	if (adev->gart.bo == NULL) {
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
index fe0710b55c3a..13b229d07ac4 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
@@ -74,27 +74,47 @@ static void mmhub_v9_4_setup_hubid_vm_pt_regs(struct amdgpu_device *adev, int hu
 static void mmhub_v9_4_init_gart_aperture_regs(struct amdgpu_device *adev,
 					       int hubid)
 {
-	uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
+	uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gmc.pdb0_bo ? adev->gmc.pdb0_bo : adev->gart.bo);
 
 	mmhub_v9_4_setup_hubid_vm_pt_regs(adev, hubid, 0, pt_base);
 
-	WREG32_SOC15_OFFSET(MMHUB, 0,
+	if (adev->gmc.pdb0_bo) {
+		WREG32_SOC15_OFFSET(MMHUB, 0,
+				    mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+				    hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
+				    (u32)(adev->gmc.fb_start >> 12));
+		WREG32_SOC15_OFFSET(MMHUB, 0,
+				    mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+				    hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
+				    (u32)(adev->gmc.fb_start >> 44));
+
+		WREG32_SOC15_OFFSET(MMHUB, 0,
+				    mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+				    hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
+				    (u32)(adev->gmc.gart_end >> 12));
+		WREG32_SOC15_OFFSET(MMHUB, 0,
+				    mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
+				    hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
+				    (u32)(adev->gmc.gart_end >> 44));
+	} else {
+		WREG32_SOC15_OFFSET(MMHUB, 0,
 			    mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
 			    hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
 			    (u32)(adev->gmc.gart_start >> 12));
-	WREG32_SOC15_OFFSET(MMHUB, 0,
+		WREG32_SOC15_OFFSET(MMHUB, 0,
 			    mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
 			    hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
 			    (u32)(adev->gmc.gart_start >> 44));
 
-	WREG32_SOC15_OFFSET(MMHUB, 0,
+		WREG32_SOC15_OFFSET(MMHUB, 0,
 			    mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
 			    hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
 			    (u32)(adev->gmc.gart_end >> 12));
-	WREG32_SOC15_OFFSET(MMHUB, 0,
+		WREG32_SOC15_OFFSET(MMHUB, 0,
 			    mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
 			    hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
 			    (u32)(adev->gmc.gart_end >> 44));
+	}
 }
 
 static void mmhub_v9_4_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
-- 
2.43.5



More information about the amd-gfx mailing list