<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=us-ascii">
<style type="text/css" style="display:none;"> P {margin-top:0;margin-bottom:0;} </style>
</head>
<body dir="ltr">
<div style="font-family: Calibri, Arial, Helvetica, sans-serif; font-size: 12pt; color: rgb(0, 0, 0);">
Thanks very much! Please review again.</div>
<div style="font-family: Calibri, Arial, Helvetica, sans-serif; font-size: 12pt; color: rgb(0, 0, 0);">
<br>
</div>
<div style="font-family: Calibri, Arial, Helvetica, sans-serif; font-size: 12pt; color: rgb(0, 0, 0);">
Rico<br>
</div>
<div id="appendonsend"></div>
<hr style="display:inline-block;width:98%" tabindex="-1">
<div id="divRplyFwdMsg" dir="ltr"><font face="Calibri, sans-serif" style="font-size:11pt" color="#000000"><b>From:</b> Tuikov, Luben <Luben.Tuikov@amd.com><br>
<b>Sent:</b> Wednesday, October 16, 2019 1:59<br>
<b>To:</b> Yin, Tianci (Rico) <Tianci.Yin@amd.com>; amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org><br>
<b>Cc:</b> Koenig, Christian <Christian.Koenig@amd.com>; Deucher, Alexander <Alexander.Deucher@amd.com><br>
<b>Subject:</b> Re: [PATCH 8/8] drm/amdgpu/psp: add psp memory training implementation</font>
<div> </div>
</div>
<div class="BodyFragment"><font size="2"><span style="font-size:11pt;">
<div class="PlainText">On 2019-10-13 11:21 p.m., Tianci Yin wrote:<br>
> From: "Tianci.Yin" <tianci.yin@amd.com><br>
> <br>
> add memory training implementation code to save resume time.<br>
> <br>
> Change-Id: I625794a780b11d824ab57ef39cc33b872c6dc6c9<br>
> Reviewed-by: Alex Deucher <alexander.deucher@amd.com><br>
> Signed-off-by: Tianci.Yin <tianci.yin@amd.com><br>
> ---<br>
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h     |   1 +<br>
>  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c |   9 ++<br>
>  drivers/gpu/drm/amd/amdgpu/psp_v11_0.c  | 159 ++++++++++++++++++++++++<br>
>  3 files changed, 169 insertions(+)<br>
> <br>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h<br>
> index 8704f93cabf2..c2b776fd82b5 100644<br>
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h<br>
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h<br>
> @@ -151,6 +151,7 @@ extern uint amdgpu_sdma_phase_quantum;<br>
>  extern char *amdgpu_disable_cu;<br>
>  extern char *amdgpu_virtual_display;<br>
>  extern uint amdgpu_pp_feature_mask;<br>
> +extern uint amdgpu_force_long_training;<br>
>  extern int amdgpu_job_hang_limit;<br>
>  extern int amdgpu_lbpw;<br>
>  extern int amdgpu_compute_multipipe;<br>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c<br>
> index da7cbee25c61..c7d086569acb 100644<br>
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c<br>
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c<br>
> @@ -127,6 +127,7 @@ char *amdgpu_disable_cu = NULL;<br>
>  char *amdgpu_virtual_display = NULL;<br>
>  /* OverDrive(bit 14) disabled by default*/<br>
>  uint amdgpu_pp_feature_mask = 0xffffbfff;<br>
> +uint amdgpu_force_long_training = 0;<br>
>  int amdgpu_job_hang_limit = 0;<br>
>  int amdgpu_lbpw = -1;<br>
>  int amdgpu_compute_multipipe = -1;<br>
> @@ -390,6 +391,14 @@ module_param_named(sched_hw_submission, amdgpu_sched_hw_submission, int, 0444);<br>
>  MODULE_PARM_DESC(ppfeaturemask, "all power features enabled (default))");<br>
>  module_param_named(ppfeaturemask, amdgpu_pp_feature_mask, uint, 0444);<br>
>  <br>
> +/**<br>
> + * DOC: forcelongtraining (uint)<br>
> + * Force long memory training in resume.<br>
> + * The default is zero, indicates short training in resume.<br>
> + */<br>
> +MODULE_PARM_DESC(forcelongtraining, "force memory long training");<br>
> +module_param_named(forcelongtraining, amdgpu_force_long_training, uint, 0444);<br>
> +<br>
>  /**<br>
>   * DOC: pcie_gen_cap (uint)<br>
>   * Override PCIE gen speed capabilities. See the CAIL flags in drivers/gpu/drm/amd/include/amd_pcie.h.<br>
> diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c<br>
> index 2ba0f68ced10..b7efaa3e913c 100644<br>
> --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c<br>
> +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c<br>
> @@ -902,6 +902,162 @@ static int psp_v11_0_rlc_autoload_start(struct psp_context *psp)<br>
>        return psp_rlc_autoload_start(psp);<br>
>  }<br>
>  <br>
> +static int psp_v11_0_memory_training_send_msg(struct psp_context *psp, int msg)<br>
> +{<br>
> +     int ret = 0;<br>
> +     int i = 0;<br>
> +     uint32_t data_32 = 0;<br>
<br>
NAK!<br>
<br>
Leave all of those integer variables uninitialized.<br>
<br>
> +     struct amdgpu_device *adev = psp->adev;<br>
> +<br>
> +     data_32 = (psp->mem_train_ctx.c2p_train_data_offset >> 20);<br>
> +     WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, data_32);<br>
> +     WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35, msg);<br>
> +<br>
> +     /*max 5s*/<br>
> +     while (i < 50) {<br>
> +             ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),<br>
> +                                0x80000000, 0x80000000, false);<br>
> +             if (ret == 0)<br>
> +                     break;<br>
> +             i++;<br>
> +     }<br>
<br>
NAK! <br>
<br>
For-loop please:<br>
<br>
for (i = 0; i < 50; i++) {<br>
        ret = ...;<br>
}<br>
<br>
Regards,<br>
Luben<br>
<br>
> +     DRM_DEBUG("%s training %s, cost %d * %dms.\n",<br>
> +               (msg == PSP_BL__DRAM_SHORT_TRAIN) ? "short" : "long",<br>
> +               (ret == 0) ? "succeed" : "failed",<br>
> +               i, adev->usec_timeout/1000);<br>
> +     return ret;<br>
> +}<br>
> +<br>
> +static int psp_v11_0_memory_training_fini(struct psp_context *psp)<br>
> +{<br>
> +     int ret = 0;<br>
> +     struct psp_memory_training_context *ctx = &psp->mem_train_ctx;<br>
> +<br>
> +     ctx->init = PSP_MEM_TRAIN_NOT_SUPPORT;<br>
> +     if(ctx->sys_cache) {<br>
> +             kfree(ctx->sys_cache);<br>
> +             ctx->sys_cache = NULL;<br>
> +     }<br>
> +<br>
> +     return ret;<br>
> +}<br>
> +<br>
> +static int psp_v11_0_memory_training_init(struct psp_context *psp)<br>
> +{<br>
> +     int ret = 0;<br>
> +     struct psp_memory_training_context *ctx = &psp->mem_train_ctx;<br>
> +<br>
> +     if(ctx->init != PSP_MEM_TRAIN_RESERVE_SUCCESS) {<br>
> +             DRM_DEBUG("memory training does not support!\n");<br>
> +             return 0;<br>
> +     }<br>
> +<br>
> +     ctx->sys_cache = kzalloc(ctx->train_data_size, GFP_KERNEL);<br>
> +     if(ctx->sys_cache == NULL) {<br>
> +             DRM_ERROR("alloc mem_train_ctx.sys_cache failed(%d)!\n", ret);<br>
> +             ret = -ENOMEM;<br>
> +             goto Err_out;<br>
> +     }<br>
> +<br>
> +     DRM_DEBUG("train_data_size:%llx,p2c_train_data_offset:%llx,c2p_train_data_offset:%llx.\n",<br>
> +               ctx->train_data_size,<br>
> +               ctx->p2c_train_data_offset,<br>
> +               ctx->c2p_train_data_offset);<br>
> +     ctx->init = PSP_MEM_TRAIN_INIT_SUCCESS;<br>
> +     return 0;<br>
> +<br>
> +Err_out:<br>
> +     psp_v11_0_memory_training_fini(psp);<br>
> +     return ret;<br>
> +}<br>
> +<br>
> +/*<br>
> + * save and restore proces<br>
> + */<br>
> +static int psp_v11_0_memory_training(struct psp_context *psp, uint32_t ops)<br>
> +{<br>
> +     int ret = 0;<br>
> +     uint32_t p2c_header[4];<br>
> +     struct psp_memory_training_context *ctx = &psp->mem_train_ctx;<br>
> +     uint32_t *pcache = (uint32_t*)ctx->sys_cache;<br>
> +<br>
> +     if (ctx->init == PSP_MEM_TRAIN_NOT_SUPPORT) {<br>
> +             DRM_DEBUG("Memory training does not support.\n");<br>
> +             return 0;<br>
> +     } else if (ctx->init != PSP_MEM_TRAIN_INIT_SUCCESS) {<br>
> +             DRM_ERROR("Please check initialization failure.\n");<br>
> +             return -EINVAL;<br>
> +     }<br>
> +<br>
> +     if (psp_v11_0_is_sos_alive(psp)) {<br>
> +             DRM_DEBUG("sos is alive, skip memory training.\n");<br>
> +             return 0;<br>
> +     }<br>
> +<br>
> +     amdgpu_device_vram_access(psp->adev, ctx->p2c_train_data_offset, p2c_header, sizeof(p2c_header), false);<br>
> +     DRM_DEBUG("sys_cache[%08x,%08x,%08x,%08x] p2c_header[%08x,%08x,%08x,%08x]\n",<br>
> +               pcache[0], pcache[1], pcache[2], pcache[3],<br>
> +               p2c_header[0], p2c_header[1], p2c_header[2], p2c_header[3]);<br>
> +<br>
> +     if (ops & PSP_MEM_TRAIN_SEND_SHORT_MSG) {<br>
> +             DRM_DEBUG("short training depend on restore.\n");<br>
> +             ops |= PSP_MEM_TRAIN_RESTORE;<br>
> +     }<br>
> +<br>
> +     if ((ops & PSP_MEM_TRAIN_RESTORE) &&<br>
> +         pcache[0] != MEM_TRAIN_SYSTEM_SIGNATURE) {<br>
> +             DRM_DEBUG("sys_cache[0] is invalid, restore depend on save.\n");<br>
> +             ops |= PSP_MEM_TRAIN_SAVE;<br>
> +     }<br>
> +<br>
> +     if (p2c_header[0] == MEM_TRAIN_SYSTEM_SIGNATURE &&<br>
> +         !(pcache[0] == MEM_TRAIN_SYSTEM_SIGNATURE &&<br>
> +           pcache[3] == p2c_header[3])) {<br>
> +             DRM_DEBUG("sys_cache is invalid or out-of-date, need save training data to sys_cache.\n");<br>
> +             ops |= PSP_MEM_TRAIN_SAVE;<br>
> +     }<br>
> +<br>
> +     if ((ops & PSP_MEM_TRAIN_SAVE) &&<br>
> +         p2c_header[0] != MEM_TRAIN_SYSTEM_SIGNATURE) {<br>
> +             DRM_DEBUG("p2c_header[0] is invalid, save depend on long training.\n");<br>
> +             ops |= PSP_MEM_TRAIN_SEND_LONG_MSG;<br>
> +     }<br>
> +<br>
> +     if (ops & PSP_MEM_TRAIN_SEND_LONG_MSG) {<br>
> +             ops &= ~PSP_MEM_TRAIN_SEND_SHORT_MSG;<br>
> +             ops |= PSP_MEM_TRAIN_SAVE;<br>
> +     }<br>
> +<br>
> +     DRM_DEBUG("mem training ops:%x.\n", ops);<br>
> +<br>
> +     if (ops & PSP_MEM_TRAIN_SEND_LONG_MSG) {<br>
> +             ret = psp_v11_0_memory_training_send_msg(psp, PSP_BL__DRAM_LONG_TRAIN);<br>
> +             if (ret) {<br>
> +                     DRM_ERROR("send long training msg failed.\n");<br>
> +                     return ret;<br>
> +             }<br>
> +     }<br>
> +<br>
> +     if (ops & PSP_MEM_TRAIN_SAVE) {<br>
> +             amdgpu_device_vram_access(psp->adev, ctx->p2c_train_data_offset, ctx->sys_cache, ctx->train_data_size, false);<br>
> +     }<br>
> +<br>
> +     if (ops & PSP_MEM_TRAIN_RESTORE) {<br>
> +             amdgpu_device_vram_access(psp->adev, ctx->c2p_train_data_offset, ctx->sys_cache, ctx->train_data_size, true);<br>
> +     }<br>
> +<br>
> +     if (ops & PSP_MEM_TRAIN_SEND_SHORT_MSG) {<br>
> +             ret = psp_v11_0_memory_training_send_msg(psp, (amdgpu_force_long_training > 0) ?<br>
> +                                                      PSP_BL__DRAM_LONG_TRAIN : PSP_BL__DRAM_SHORT_TRAIN);<br>
> +             if (ret) {<br>
> +                     DRM_ERROR("send training msg failed.\n");<br>
> +                     return ret;<br>
> +             }<br>
> +     }<br>
> +     ctx->training_cnt++;<br>
> +     return ret;<br>
> +}<br>
> +<br>
>  static const struct psp_funcs psp_v11_0_funcs = {<br>
>        .init_microcode = psp_v11_0_init_microcode,<br>
>        .bootloader_load_kdb = psp_v11_0_bootloader_load_kdb,<br>
> @@ -922,6 +1078,9 @@ static const struct psp_funcs psp_v11_0_funcs = {<br>
>        .ras_trigger_error = psp_v11_0_ras_trigger_error,<br>
>        .ras_cure_posion = psp_v11_0_ras_cure_posion,<br>
>        .rlc_autoload_start = psp_v11_0_rlc_autoload_start,<br>
> +     .mem_training_init = psp_v11_0_memory_training_init,<br>
> +     .mem_training_fini = psp_v11_0_memory_training_fini,<br>
> +     .mem_training = psp_v11_0_memory_training,<br>
>  };<br>
>  <br>
>  void psp_v11_0_set_psp_funcs(struct psp_context *psp)<br>
> <br>
<br>
</div>
</span></font></div>
</body>
</html>