[PATCH 8/8] drm/amdgpu/psp: add psp memory training implementation

Tuikov, Luben Luben.Tuikov at amd.com
Fri Oct 11 23:44:23 UTC 2019


On 2019-10-10 11:50 p.m., Tianci Yin wrote:
> From: "Tianci.Yin" <tianci.yin at amd.com>
> 
> add memory training implementation code to save resume time.
> 
> Change-Id: I625794a780b11d824ab57ef39cc33b872c6dc6c9
> Reviewed-by: Alex Deucher <alexander.deucher at amd.com>
> Signed-off-by: Tianci.Yin <tianci.yin at amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h     |   1 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c |   9 ++
>  drivers/gpu/drm/amd/amdgpu/psp_v11_0.c  | 171 ++++++++++++++++++++++++
>  3 files changed, 181 insertions(+)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index e3d715c31ac9..03c5c18bb51e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -151,6 +151,7 @@ extern uint amdgpu_sdma_phase_quantum;
>  extern char *amdgpu_disable_cu;
>  extern char *amdgpu_virtual_display;
>  extern uint amdgpu_pp_feature_mask;
> +extern uint amdgpu_force_long_training;
>  extern int amdgpu_job_hang_limit;
>  extern int amdgpu_lbpw;
>  extern int amdgpu_compute_multipipe;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index 5985bd79216d..f03dcc5d185d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -127,6 +127,7 @@ char *amdgpu_disable_cu = NULL;
>  char *amdgpu_virtual_display = NULL;
>  /* OverDrive(bit 14) disabled by default*/
>  uint amdgpu_pp_feature_mask = 0xffffbfff;
> +uint amdgpu_force_long_training = 0;
>  int amdgpu_job_hang_limit = 0;
>  int amdgpu_lbpw = -1;
>  int amdgpu_compute_multipipe = -1;
> @@ -390,6 +391,14 @@ module_param_named(sched_hw_submission, amdgpu_sched_hw_submission, int, 0444);
>  MODULE_PARM_DESC(ppfeaturemask, "all power features enabled (default))");
>  module_param_named(ppfeaturemask, amdgpu_pp_feature_mask, uint, 0444);
>  
> +/**
> + * DOC: forcelongtraining (uint)
> + * Force long memory training in resume.
> + * The default is zero, indicates short training in resume.
> + */
> +MODULE_PARM_DESC(forcelongtraining, "force memory long training");
> +module_param_named(forcelongtraining, amdgpu_force_long_training, uint, 0444);
> +
>  /**
>   * DOC: pcie_gen_cap (uint)
>   * Override PCIE gen speed capabilities. See the CAIL flags in drivers/gpu/drm/amd/include/amd_pcie.h.
> diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
> index 2ba0f68ced10..074f23c846cb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
> @@ -902,6 +902,174 @@ static int psp_v11_0_rlc_autoload_start(struct psp_context *psp)
>  	return psp_rlc_autoload_start(psp);
>  }
>  
> +static int psp_v11_0_memory_training_send_msg(struct psp_context *psp, int msg)
> +{
> +	int ret = 0;
> +	int i = 0;
> +	uint32_t data_32 = 0;
> +	struct amdgpu_device *adev = psp->adev;

NAK! to preinitializing variables.

> +
> +	data_32 = (psp->mem_train_ctx.c2p_train_data_offset >> 20);
> +	WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, data_32);
> +	WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35, msg);
> +
> +	/*max 5s*/
> +	while (i < 50) {
> +		ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
> +				   0x80000000, 0x80000000, false);

Hmm, none of the loop body depends on "i"?
So, conceivably one could do this one time instead of 50?
Interesting... So perhaps the "50" couuld be a macro explaining
what kind of constant it is...

> +		if (ret == 0)
> +			break;
> +		i++;
> +	}

NAK to broken up loop definition. Use a for-loop:

for (i = 0; i < 50; i++) {
	ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
			   0x80000000, 0x80000000, false);
	if (ret)
		break;
}

This makes the code more secure as well as the loop-body relocatable
into a function, etc. As well as obvious that the body
of the loop does NOT depend on "i" and therefore "50" is completely
arbitratry.

> +	DRM_DEBUG("%s training %s, cost %d * %dms.\n",
> +		  (msg == PSP_BL__DRAM_SHORT_TRAIN) ? "short" : "long",
> +		  (ret == 0) ? "succeed" : "failed",
> +		  i, adev->usec_timeout/1000);
> +	return ret;
> +}
> +
> +static int psp_v11_0_memory_training_fini(struct psp_context *psp)
> +{
> +	int ret = 0;

NAK!

> +	struct psp_memory_training_context *ctx = &psp->mem_train_ctx;
> +
> +	ctx->init = PSP_MEM_TRAIN_NOT_SUPPORT;
> +	if(ctx->sys_cache) {

Space after keyword: "if (...".

> +		kfree(ctx->sys_cache);
> +		ctx->sys_cache = NULL;
> +	}
> +
> +	return ret;

NAK!
This function should be "void"! Like all "free" functions.

> +}
> +
> +static int psp_v11_0_memory_training_init(struct psp_context *psp)
> +{
> +	int ret = 0;

NAK due to pre-initializing "ret".

> +	struct psp_memory_training_context *ctx = &psp->mem_train_ctx;
> +
> +	if(ctx->init != PSP_MEM_TRAIN_RESERVE_SUCCESS) {
> +		DRM_DEBUG("memory training does not support!\n");

Perhaps you mean:
		DRM_DEBUG("Memory training is not supported!\n");

> +		return 0;
> +	}> +
> +	ctx->sys_cache = kzalloc(ctx->train_data_size, GFP_KERNEL);
> +	if(ctx->sys_cache == NULL) {

Space after keyword "if (...".

> +		DRM_ERROR("alloc mem_train_ctx.sys_cache failed(%d)!\n", ret);
> +		ret = -ENOMEM;
> +		goto err_out;
> +	}
> +
> +	DRM_DEBUG("train_data_size:%llx,p2c_train_data_offset:%llx,c2p_train_data_offset:%llx.\n",
> +		  ctx->train_data_size,
> +		  ctx->p2c_train_data_offset,
> +		  ctx->c2p_train_data_offset);
> +	ctx->init = PSP_MEM_TRAIN_INIT_SUCCESS;
> +	return 0;
> +
> +err_out:
> +	psp_v11_0_memory_training_fini(psp);
> +	return ret;
> +}
> +
> +/*
> + * save and restore proces
> + */
> +static int psp_v11_0_memory_training(struct psp_context *psp, uint32_t ops)
> +{
> +	int ret = 0;

NAK to pre-initializing return value of function which you don't know
apriori if it will succeed or fail.

Simply, don't pre-initialize automatic variables.

> +	uint32_t p2c_header[4];
> +	struct psp_memory_training_context *ctx = &psp->mem_train_ctx;
> +	uint32_t *pcache = (uint32_t*)ctx->sys_cache;
> +
> +	if (ctx->init == PSP_MEM_TRAIN_NOT_SUPPORT) {
> +		DRM_DEBUG("Memory training does not support.\n");

"Memory training is not supported!"

> +		return 0;
> +	} else if (ctx->init != PSP_MEM_TRAIN_INIT_SUCCESS) {
> +		DRM_ERROR("Please check initialization failure.\n");

"Memory training initialization failure."

> +		return -EINVAL;
> +	}
> +
> +	if (psp_v11_0_is_sos_alive(psp)) {
> +		DRM_DEBUG("sos is alive, skip memory training.\n");
> +		return 0;
> +	}
> +
> +	ret = amdgpu_device_vram_access(psp->adev, ctx->p2c_train_data_offset, p2c_header, sizeof(p2c_header), false);
> +	if (ret) {
> +		DRM_ERROR("read p2c header failed.\n");

"P2C header read failed."

> +		return ret;
> +	}
> +	DRM_DEBUG("sys_cache[%08x,%08x,%08x,%08x] p2c_header[%08x,%08x,%08x,%08x]\n",
> +		  pcache[0], pcache[1], pcache[2], pcache[3],
> +		  p2c_header[0], p2c_header[1], p2c_header[2], p2c_header[3]);
> +
> +	if (ops & PSP_MEM_TRAIN_SEND_SHORT_MSG) {
> +		DRM_DEBUG("short training depend on restore.\n");

Change "depend" --> "depends".
also feel free to start your sentences with a capital letter.

"Short training depends on restore."

Regards,
Luben

> +		ops |= PSP_MEM_TRAIN_RESTORE;
> +	}
> +
> +	if ((ops & PSP_MEM_TRAIN_RESTORE) &&
> +	    pcache[0] != MEM_TRAIN_SYSTEM_SIGNATURE) {
> +		DRM_DEBUG("sys_cache[0] is invalid, restore depend on save.\n");
> +		ops |= PSP_MEM_TRAIN_SAVE;
> +	}
> +
> +	if (p2c_header[0] == MEM_TRAIN_SYSTEM_SIGNATURE &&
> +	    !(pcache[0] == MEM_TRAIN_SYSTEM_SIGNATURE &&
> +	      pcache[3] == p2c_header[3])) {
> +		DRM_DEBUG("sys_cache is invalid or out-of-date, need save training data to sys_cache.\n");
> +		ops |= PSP_MEM_TRAIN_SAVE;
> +	}
> +
> +	if ((ops & PSP_MEM_TRAIN_SAVE) &&
> +	    p2c_header[0] != MEM_TRAIN_SYSTEM_SIGNATURE) {
> +		DRM_DEBUG("p2c_header[0] is invalid, save depend on long training.\n");
> +		ops |= PSP_MEM_TRAIN_SEND_LONG_MSG;
> +	}
> +
> +	if (ops & PSP_MEM_TRAIN_SEND_LONG_MSG) {
> +		ops &= ~PSP_MEM_TRAIN_SEND_SHORT_MSG;
> +		ops |= PSP_MEM_TRAIN_SAVE;
> +	}
> +
> +	DRM_DEBUG("mem training ops:%x.\n", ops);
> +
> +	if (ops & PSP_MEM_TRAIN_SEND_LONG_MSG) {
> +		ret = psp_v11_0_memory_training_send_msg(psp, PSP_BL__DRAM_LONG_TRAIN);
> +		if (ret) {
> +			DRM_ERROR("send long training msg failed.\n");
> +			return ret;
> +		}
> +	}
> +
> +	if (ops & PSP_MEM_TRAIN_SAVE) {
> +		ret = amdgpu_device_vram_access(psp->adev, ctx->p2c_train_data_offset, ctx->sys_cache, ctx->train_data_size, false);
> +		if (ret) {
> +			DRM_ERROR("read training data from vram failed.\n");
> +			return ret;
> +		}
> +	}
> +
> +	if (ops & PSP_MEM_TRAIN_RESTORE) {
> +		ret = amdgpu_device_vram_access(psp->adev, ctx->c2p_train_data_offset, ctx->sys_cache, ctx->train_data_size, true);
> +		if (ret) {
> +			DRM_ERROR("write training data to vram failed.\n");
> +			return ret;
> +		}
> +	}
> +
> +	if (ops & PSP_MEM_TRAIN_SEND_SHORT_MSG) {
> +		ret = psp_v11_0_memory_training_send_msg(psp, (amdgpu_force_long_training > 0) ?
> +							 PSP_BL__DRAM_LONG_TRAIN : PSP_BL__DRAM_SHORT_TRAIN);
> +		if (ret) {
> +			DRM_ERROR("send training msg failed.\n");
> +			return ret;
> +		}
> +	}
> +	ctx->training_cnt++;
> +	return ret;
> +}
> +
>  static const struct psp_funcs psp_v11_0_funcs = {
>  	.init_microcode = psp_v11_0_init_microcode,
>  	.bootloader_load_kdb = psp_v11_0_bootloader_load_kdb,
> @@ -922,6 +1090,9 @@ static const struct psp_funcs psp_v11_0_funcs = {
>  	.ras_trigger_error = psp_v11_0_ras_trigger_error,
>  	.ras_cure_posion = psp_v11_0_ras_cure_posion,
>  	.rlc_autoload_start = psp_v11_0_rlc_autoload_start,
> +	.mem_training_init = psp_v11_0_memory_training_init,
> +	.mem_training_fini = psp_v11_0_memory_training_fini,
> +	.mem_training = psp_v11_0_memory_training,
>  };
>  
>  void psp_v11_0_set_psp_funcs(struct psp_context *psp)
> 



More information about the amd-gfx mailing list