<html xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:w="urn:schemas-microsoft-com:office:word" xmlns:m="http://schemas.microsoft.com/office/2004/12/omml" xmlns="http://www.w3.org/TR/REC-html40">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=us-ascii">
<meta name="Generator" content="Microsoft Word 15 (filtered medium)">
<style><!--
/* Font Definitions */
@font-face
{font-family:"Cambria Math";
panose-1:2 4 5 3 5 4 6 3 2 4;}
@font-face
{font-family:DengXian;
panose-1:2 1 6 0 3 1 1 1 1 1;}
@font-face
{font-family:Calibri;
panose-1:2 15 5 2 2 2 4 3 2 4;}
@font-face
{font-family:Aptos;
panose-1:2 11 0 4 2 2 2 2 2 4;}
@font-face
{font-family:"\@DengXian";
panose-1:2 1 6 0 3 1 1 1 1 1;}
/* Style Definitions */
p.MsoNormal, li.MsoNormal, div.MsoNormal
{margin:0cm;
font-size:11.0pt;
font-family:"Calibri",sans-serif;}
.MsoChpDefault
{mso-style-type:export-only;
font-size:10.0pt;
mso-ligatures:none;}
@page WordSection1
{size:612.0pt 792.0pt;
margin:72.0pt 72.0pt 72.0pt 72.0pt;}
div.WordSection1
{page:WordSection1;}
--></style>
</head>
<body lang="en-CN" link="#467886" vlink="#96607D" style="word-wrap:break-word">
<p style="font-family:Calibri;font-size:10pt;color:#0000FF;margin:5pt;font-style:normal;font-weight:normal;text-decoration:none;" align="Left">
[AMD Official Use Only - AMD Internal Distribution Only]<br>
</p>
<br>
<div>
<div class="WordSection1">
<p class="MsoNormal"><span style="font-family:"Aptos",sans-serif">> > @@ -1210,6 +1212,9 @@ static void psp_prep_ta_load_cmd_buf(struct psp_gfx_cmd_resp *cmd,<o:p></o:p></span></p>
<p class="MsoNormal"><span style="font-family:"Aptos",sans-serif">> > cmd->cmd.cmd_load_ta.app_phy_addr_hi = upper_32_bits(ta_bin_mc);<o:p></o:p></span></p>
<p class="MsoNormal"><span style="font-family:"Aptos",sans-serif">> > cmd->cmd.cmd_load_ta.app_len = context->bin_desc.size_bytes;<o:p></o:p></span></p>
<p class="MsoNormal"><span style="font-family:"Aptos",sans-serif">> > <o:p></o:p></span></p>
<p class="MsoNormal"><span style="font-family:"Aptos",sans-serif">> > + if (context->mem_context.shared_bo)<o:p></o:p></span></p>
<p class="MsoNormal"><span style="font-family:"Aptos",sans-serif">> > + context->mem_context.shared_mc_addr = amdgpu_bo_fb_aper_addr(context->mem_context.shared_bo);<o:p></o:p></span></p>
<p class="MsoNormal"><span style="font-family:"Aptos",sans-serif">> > +<o:p></o:p></span></p>
<p class="MsoNormal"><span style="font-family:"Aptos",sans-serif">> <o:p></o:p></span></p>
<p class="MsoNormal"><span style="font-family:"Aptos",sans-serif">> Rather put this into the psp_prep_ta_load_cmd_buf() function and remove the shared_mc_addr member.<o:p></o:p></span></p>
<p class="MsoNormal"><span style="font-family:"Aptos",sans-serif">> <o:p></o:p></span></p>
<p class="MsoNormal"><span style="font-family:"Aptos",sans-serif">> Please double check other similar cases as well.<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US" style="font-family:"Aptos",sans-serif"><o:p> </o:p></span></p>
<p class="MsoNormal"><span lang="EN-US" style="font-family:"Aptos",sans-serif">Removing of these members are easy to change and test, since they are used in one c file. I can make one refactoring patch for each removal.<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US" style="font-family:"Aptos",sans-serif">- ta_mem_context.shared_mc_addr<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US" style="font-family:"Aptos",sans-serif">- amdgpu_firmware.fw_buf_mc<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US" style="font-family:"Aptos",sans-serif">- psp_context.tmr_mc_addr<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US" style="font-family:"Aptos",sans-serif">- psp_context.cmd_buf_mc_addr<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US" style="font-family:"Aptos",sans-serif">- dummy_read_1_table->mc_address<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US" style="font-family:"Aptos",sans-serif"><o:p> </o:p></span></p>
<p class="MsoNormal"><span lang="EN-US" style="font-family:"Aptos",sans-serif">Removing of these members will involve changes in multiple PSP and SMU version files. For versions that MI308 don't use, I don't have the environment to test the changes. Should
I remove them as well? <a id="OWAAM15AC2FC7572B7247A4F02F1FD462F32A" href="mailto:Christian.Koenig@amd.com">
<span style="font-family:"Aptos",sans-serif;text-decoration:none">@Koenig, Christian</span></a><o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US" style="font-family:"Aptos",sans-serif">- psp->fw_pri_mc_addr<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US" style="font-family:"Aptos",sans-serif">- psp->fence_buf_mc_addr<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US" style="font-family:"Aptos",sans-serif">- psp->km_ring.ring_mem_mc_addr<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US" style="font-family:"Aptos",sans-serif">- driver_table->mc_address<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US" style="font-family:"Aptos",sans-serif">- pm_status_table->mc_address<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US" style="font-family:"Aptos",sans-serif"><o:p> </o:p></span></p>
<p class="MsoNormal"><span lang="EN-US" style="font-family:"Aptos",sans-serif">Thanks<o:p></o:p></span></p>
<p class="MsoNormal"><span lang="EN-US" style="font-family:"Aptos",sans-serif">Sam<o:p></o:p></span></p>
<p class="MsoNormal"><span style="font-family:"Aptos",sans-serif"><o:p> </o:p></span></p>
<div id="mail-editor-reference-message-container">
<div>
<div style="border:none;border-top:solid #B5C4DF 1.0pt;padding:3.0pt 0cm 0cm 0cm">
<p class="MsoNormal" style="margin-bottom:12.0pt"><b><span style="font-size:12.0pt;font-family:"Aptos",sans-serif;color:black">From:
</span></b><span style="font-size:12.0pt;font-family:"Aptos",sans-serif;color:black">Koenig, Christian <Christian.Koenig@amd.com><br>
<b>Date: </b>Wednesday, April 30, 2025 at 20:51<br>
<b>To: </b>Zhang, GuoQing (Sam) <GuoQing.Zhang@amd.com>, amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org><br>
<b>Cc: </b>Zhao, Victor <Victor.Zhao@amd.com>, Chang, HaiJun <HaiJun.Chang@amd.com>, Deucher, Alexander <Alexander.Deucher@amd.com>, Jiang Liu <gerry@linux.alibaba.com><br>
<b>Subject: </b>Re: [PATCH v2 2/3] drm/amdgpu: update GPU addresses for SMU and PSP<o:p></o:p></span></p>
</div>
<div>
<p class="MsoNormal" style="margin-bottom:12.0pt">On 4/30/25 12:16, Samuel Zhang wrote:<br>
> add amdgpu_bo_fb_aper_addr() and update the cached GPU addresses to use<br>
> the FB aperture address for SMU and PSP.<br>
> <br>
> 2 reasons for this change:<br>
> 1. when pdb0 is enabled, gpu addr from amdgpu_bo_create_kernel() is GART<br>
> aperture address, it is not compatible with SMU and PSP, it need to updated<br>
> to use FB aperture address.<br>
> 2. Since FB aperture address will change after switching to new GPU<br>
> index after hibernation, it need to be updated after resume.<br>
> <br>
> Signed-off-by: Jiang Liu <gerry@linux.alibaba.com><br>
> Signed-off-by: Samuel Zhang <guoqing.zhang@amd.com><br>
> ---<br>
> drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 1 +<br>
> drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 1 +<br>
> drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 18 ++++++++++++++++++<br>
> drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 1 +<br>
> drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 22 ++++++++++++++++++++++<br>
> drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c | 3 +++<br>
> drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 17 +++++++++++++++++<br>
> 7 files changed, 63 insertions(+)<br>
> <br>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c<br>
> index a2abddf3c110..ef6eaddc2ccb 100644<br>
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c<br>
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c<br>
> @@ -209,6 +209,7 @@ void amdgpu_gmc_vram_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc,<br>
> uint64_t vis_limit = (uint64_t)amdgpu_vis_vram_limit << 20;<br>
> uint64_t limit = (uint64_t)amdgpu_vram_limit << 20;<br>
> <br>
> + mc->vram_offset = base;<br>
> mc->vram_start = base;<br>
> mc->vram_end = mc->vram_start + mc->mc_vram_size - 1;<br>
> if (limit < mc->real_vram_size)<br>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h<br>
> index bd7fc123b8f9..291d96168a57 100644<br>
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h<br>
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h<br>
> @@ -251,6 +251,7 @@ struct amdgpu_gmc {<br>
> */<br>
> u64 vram_start;<br>
> u64 vram_end;<br>
> + u64 vram_offset;<br>
<br>
<br>
Please don't add any new fields here. We should already have all the necessary information in that structure.<br>
<br>
<br>
> /* FB region , it's same as local vram region in single GPU, in XGMI<br>
> * configuration, this region covers all GPUs in the same hive ,<br>
> * each GPU in the hive has the same view of this FB region .<br>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c<br>
> index 4e794d546b61..ca29270f66d1 100644<br>
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c<br>
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c<br>
> @@ -1513,6 +1513,24 @@ u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo)<br>
> return amdgpu_bo_gpu_offset_no_check(bo);<br>
> }<br>
> <br>
> +/**<br>
> + * amdgpu_bo_fb_aper_addr - return FB aperture GPU offset of the VRAM bo<br>
> + * @bo: amdgpu VRAM buffer object for which we query the offset<br>
> + *<br>
> + * Returns:<br>
> + * current FB aperture GPU offset of the object.<br>
> + */<br>
> +u64 amdgpu_bo_fb_aper_addr(struct amdgpu_bo *bo)<br>
> +{<br>
> + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);<br>
> + uint64_t offset;<br>
> +<br>
> + WARN_ON_ONCE(bo->tbo.resource->mem_type != TTM_PL_VRAM);<br>
> +<br>
> + offset = (bo->tbo.resource->start << PAGE_SHIFT) + adev->gmc.vram_offset;<br>
<br>
Rather use fb_base + XGMI hive index here.<br>
<br>
> + return amdgpu_gmc_sign_extend(offset);<br>
> +}<br>
> +<br>
> /**<br>
> * amdgpu_bo_gpu_offset_no_check - return GPU offset of bo<br>
> * @bo: amdgpu object for which we query the offset<br>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h<br>
> index dcce362bfad3..c8a63e38f5d9 100644<br>
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h<br>
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h<br>
> @@ -320,6 +320,7 @@ int amdgpu_bo_sync_wait_resv(struct amdgpu_device *adev, struct dma_resv *resv,<br>
> bool intr);<br>
> int amdgpu_bo_sync_wait(struct amdgpu_bo *bo, void *owner, bool intr);<br>
> u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo);<br>
> +u64 amdgpu_bo_fb_aper_addr(struct amdgpu_bo *bo);<br>
> u64 amdgpu_bo_gpu_offset_no_check(struct amdgpu_bo *bo);<br>
> uint32_t amdgpu_bo_mem_stats_placement(struct amdgpu_bo *bo);<br>
> uint32_t amdgpu_bo_get_preferred_domain(struct amdgpu_device *adev,<br>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c<br>
> index e1e658a97b36..bdab40b42983 100644<br>
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c<br>
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c<br>
> @@ -871,6 +871,8 @@ static int psp_tmr_init(struct psp_context *psp)<br>
> &psp->tmr_bo, &psp->tmr_mc_addr,<br>
> pptr);<br>
> }<br>
> + if (psp->tmr_bo)<br>
> + psp->tmr_mc_addr = amdgpu_bo_fb_aper_addr(psp->tmr_bo);<br>
> <br>
> return ret;<br>
> }<br>
> @@ -1210,6 +1212,9 @@ static void psp_prep_ta_load_cmd_buf(struct psp_gfx_cmd_resp *cmd,<br>
> cmd->cmd.cmd_load_ta.app_phy_addr_hi = upper_32_bits(ta_bin_mc);<br>
> cmd->cmd.cmd_load_ta.app_len = context->bin_desc.size_bytes;<br>
> <br>
> + if (context->mem_context.shared_bo)<br>
> + context->mem_context.shared_mc_addr = amdgpu_bo_fb_aper_addr(context->mem_context.shared_bo);<br>
> +<br>
<br>
Rather put this into the psp_prep_ta_load_cmd_buf() function and remove the shared_mc_addr member.<br>
<br>
Please double check other similar cases as well.<br>
<br>
Apart from that this looks rather good to me,<br>
Christian.<br>
<br>
<br>
> cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_lo =<br>
> lower_32_bits(context->mem_context.shared_mc_addr);<br>
> cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_hi =<br>
> @@ -2336,11 +2341,26 @@ bool amdgpu_psp_tos_reload_needed(struct amdgpu_device *adev)<br>
> return false;<br>
> }<br>
> <br>
> +static void psp_update_gpu_addresses(struct amdgpu_device *adev)<br>
> +{<br>
> + struct psp_context *psp = &adev->psp;<br>
> +<br>
> + if (psp->cmd_buf_bo && psp->cmd_buf_mem) {<br>
> + psp->fw_pri_mc_addr = amdgpu_bo_fb_aper_addr(psp->fw_pri_bo);<br>
> + psp->fence_buf_mc_addr = amdgpu_bo_fb_aper_addr(psp->fence_buf_bo);<br>
> + psp->cmd_buf_mc_addr = amdgpu_bo_fb_aper_addr(psp->cmd_buf_bo);<br>
> + }<br>
> + if (adev->firmware.rbuf && psp->km_ring.ring_mem)<br>
> + psp->km_ring.ring_mem_mc_addr = amdgpu_bo_fb_aper_addr(adev->firmware.rbuf);<br>
> +}<br>
> +<br>
> static int psp_hw_start(struct psp_context *psp)<br>
> {<br>
> struct amdgpu_device *adev = psp->adev;<br>
> int ret;<br>
> <br>
> + psp_update_gpu_addresses(adev);<br>
> +<br>
> if (!amdgpu_sriov_vf(adev)) {<br>
> if ((is_psp_fw_valid(psp->kdb)) &&<br>
> (psp->funcs->bootloader_load_kdb != NULL)) {<br>
> @@ -3976,6 +3996,7 @@ static ssize_t psp_usbc_pd_fw_sysfs_write(struct device *dev,<br>
> memcpy_toio(fw_pri_cpu_addr, usbc_pd_fw->data, usbc_pd_fw->size);<br>
> <br>
> mutex_lock(&adev->psp.mutex);<br>
> + fw_pri_mc_addr = amdgpu_bo_fb_aper_addr(fw_buf_bo);<br>
> ret = psp_load_usbc_pd_fw(&adev->psp, fw_pri_mc_addr);<br>
> mutex_unlock(&adev->psp.mutex);<br>
> <br>
> @@ -4085,6 +4106,7 @@ static ssize_t amdgpu_psp_vbflash_read(struct file *filp, struct kobject *kobj,<br>
> memcpy_toio(fw_pri_cpu_addr, adev->psp.vbflash_tmp_buf, adev->psp.vbflash_image_size);<br>
> <br>
> mutex_lock(&adev->psp.mutex);<br>
> + fw_pri_mc_addr = amdgpu_bo_fb_aper_addr(fw_buf_bo);<br>
> ret = psp_update_spirom(&adev->psp, fw_pri_mc_addr);<br>
> mutex_unlock(&adev->psp.mutex);<br>
> <br>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c<br>
> index 3d9e9fdc10b4..f3b56c219e7e 100644<br>
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c<br>
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c<br>
> @@ -1152,6 +1152,9 @@ int amdgpu_ucode_init_bo(struct amdgpu_device *adev)<br>
> adev->firmware.max_ucodes = AMDGPU_UCODE_ID_MAXIMUM;<br>
> }<br>
> <br>
> + if (adev->firmware.fw_buf)<br>
> + adev->firmware.fw_buf_mc = amdgpu_bo_fb_aper_addr(adev->firmware.fw_buf);<br>
> +<br>
> for (i = 0; i < adev->firmware.max_ucodes; i++) {<br>
> ucode = &adev->firmware.ucode[i];<br>
> if (ucode->fw) {<br>
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c<br>
> index 315b0856bf02..dfdda98cf0c5 100644<br>
> --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c<br>
> +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c<br>
> @@ -1000,6 +1000,21 @@ static int smu_fini_fb_allocations(struct smu_context *smu)<br>
> return 0;<br>
> }<br>
> <br>
> +static void smu_update_gpu_addresses(struct smu_context *smu)<br>
> +{<br>
> + struct smu_table_context *smu_table = &smu->smu_table;<br>
> + struct smu_table *pm_status_table = smu_table->tables + SMU_TABLE_PMSTATUSLOG;<br>
> + struct smu_table *driver_table = &(smu_table->driver_table);<br>
> + struct smu_table *dummy_read_1_table = &smu_table->dummy_read_1_table;<br>
> +<br>
> + if (pm_status_table->bo)<br>
> + pm_status_table->mc_address = amdgpu_bo_fb_aper_addr(pm_status_table->bo);<br>
> + if (driver_table->bo)<br>
> + driver_table->mc_address = amdgpu_bo_fb_aper_addr(driver_table->bo);<br>
> + if (dummy_read_1_table->bo)<br>
> + dummy_read_1_table->mc_address = amdgpu_bo_fb_aper_addr(dummy_read_1_table->bo);<br>
> +}<br>
> +<br>
> /**<br>
> * smu_alloc_memory_pool - allocate memory pool in the system memory<br>
> *<br>
> @@ -1789,6 +1804,8 @@ static int smu_start_smc_engine(struct smu_context *smu)<br>
> struct amdgpu_device *adev = smu->adev;<br>
> int ret = 0;<br>
> <br>
> + smu_update_gpu_addresses(smu);<br>
> +<br>
> smu->smc_fw_state = SMU_FW_INIT;<br>
> <br>
> if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {</p>
</div>
</div>
</div>
</div>
</div>
</body>
</html>