[PATCH 4/4] drm/amdgpu: add module parameter choose reset mode

Christian König ckoenig.leichtzumerken at gmail.com
Tue Jul 14 08:35:45 UTC 2020


Am 14.07.20 um 04:29 schrieb Wenhui Sheng:
> Default value is auto, doesn't change
> original reset method logic.
>
> v2: change to use parameter reset_method
> v3: add warn msg if specified mode isn't supported
>
> Signed-off-by: Likun Gao <Likun.Gao at amd.com>
> Signed-off-by: Wenhui Sheng <Wenhui.Sheng at amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h     | 1 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 8 ++++++++
>   drivers/gpu/drm/amd/amdgpu/cik.c        | 7 +++++++
>   drivers/gpu/drm/amd/amdgpu/nv.c         | 7 +++++++
>   drivers/gpu/drm/amd/amdgpu/si.c         | 5 +++++
>   drivers/gpu/drm/amd/amdgpu/soc15.c      | 8 ++++++++
>   drivers/gpu/drm/amd/amdgpu/vi.c         | 7 +++++++
>   7 files changed, 43 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 4de93cef79b9..06bfb8658dec 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -196,6 +196,7 @@ static const bool debug_evictions; /* = false */
>   #endif
>   
>   extern int amdgpu_tmz;
> +extern int amdgpu_reset_method;
>   
>   #ifdef CONFIG_DRM_AMDGPU_SI
>   extern int amdgpu_si_support;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index 94c83a9d4987..581d5fcac361 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -154,6 +154,7 @@ int amdgpu_mes = 0;
>   int amdgpu_noretry = 1;
>   int amdgpu_force_asic_type = -1;
>   int amdgpu_tmz = 0;
> +int amdgpu_reset_method = -1; /* auto */
>   
>   struct amdgpu_mgpu_info mgpu_info = {
>   	.mutex = __MUTEX_INITIALIZER(mgpu_info.mutex),
> @@ -793,6 +794,13 @@ module_param_named(abmlevel, amdgpu_dm_abm_level, uint, 0444);
>   MODULE_PARM_DESC(tmz, "Enable TMZ feature (-1 = auto, 0 = off (default), 1 = on)");
>   module_param_named(tmz, amdgpu_tmz, int, 0444);
>   
> +/**
> + * DOC: reset_method (int)
> + * GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco)
> + */
> +MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco)");
> +module_param_named(reset_method, amdgpu_reset_method, int, 0444);
> +
>   static const struct pci_device_id pciidlist[] = {
>   #ifdef  CONFIG_DRM_AMDGPU_SI
>   	{0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
> diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c
> index fe306d0f73f7..310bcf81256f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/cik.c
> +++ b/drivers/gpu/drm/amd/amdgpu/cik.c
> @@ -1326,6 +1326,13 @@ cik_asic_reset_method(struct amdgpu_device *adev)
>   {
>   	bool baco_reset;
>   
> +	if (amdgpu_reset_method == AMD_RESET_METHOD_LEGACY ||
> +	    amdgpu_reset_method == AMD_RESET_METHOD_BACO)
> +		return amdgpu_reset_method;
> +	else if (amdgpu_reset_method != -1)

When you return anyway you can also drop the else here and on other 
occasions as well.

Apart from that the patch looks good to me.

We usually try to avoid adding more module parameters, but I think this 
one is really justified.

Thanks,
Christian.

> +		dev_warn(adev->dev, "Specified reset:%d isn't supported, using AUTO instead.\n",
> +				  amdgpu_reset_method);
> +
>   	switch (adev->asic_type) {
>   	case CHIP_BONAIRE:
>   	case CHIP_HAWAII:
> diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c
> index 9f1240bd0310..486321bcab8f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/nv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/nv.c
> @@ -311,6 +311,13 @@ nv_asic_reset_method(struct amdgpu_device *adev)
>   {
>   	struct smu_context *smu = &adev->smu;
>   
> +	if (amdgpu_reset_method == AMD_RESET_METHOD_MODE1 ||
> +	    amdgpu_reset_method == AMD_RESET_METHOD_BACO)
> +		return amdgpu_reset_method;
> +	else if (amdgpu_reset_method != -1)
> +		dev_warn(adev->dev, "Specified reset method:%d isn't supported, using AUTO instead.\n",
> +				  amdgpu_reset_method);
> +
>   	if (smu_baco_is_support(smu))
>   		return AMD_RESET_METHOD_BACO;
>   	else
> diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c
> index 9d7b4ccd17b8..1b449291f068 100644
> --- a/drivers/gpu/drm/amd/amdgpu/si.c
> +++ b/drivers/gpu/drm/amd/amdgpu/si.c
> @@ -1229,6 +1229,11 @@ static bool si_asic_supports_baco(struct amdgpu_device *adev)
>   static enum amd_reset_method
>   si_asic_reset_method(struct amdgpu_device *adev)
>   {
> +	if (amdgpu_reset_method != AMD_RESET_METHOD_LEGACY &&
> +	    amdgpu_reset_method != -1)
> +		dev_warn(adev->dev, "Specified reset method:%d isn't supported, using AUTO instead.\n",
> +				  amdgpu_reset_method);
> +
>   	return AMD_RESET_METHOD_LEGACY;
>   }
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
> index 8c739b285915..40b343b25588 100644
> --- a/drivers/gpu/drm/amd/amdgpu/soc15.c
> +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
> @@ -532,6 +532,14 @@ soc15_asic_reset_method(struct amdgpu_device *adev)
>   	bool baco_reset = false;
>   	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
>   
> +	if (amdgpu_reset_method == AMD_RESET_METHOD_MODE1 ||
> +	    amdgpu_reset_method == AMD_RESET_METHOD_MODE2 ||
> +		amdgpu_reset_method == AMD_RESET_METHOD_BACO)
> +		return amdgpu_reset_method;
> +	else if (amdgpu_reset_method != -1)
> +		dev_warn(adev->dev, "Specified reset method:%d isn't supported, using AUTO instead.\n",
> +				  amdgpu_reset_method);
> +
>   	switch (adev->asic_type) {
>   	case CHIP_RAVEN:
>   	case CHIP_RENOIR:
> diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
> index 4e5e91888d87..e4628c17802f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vi.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vi.c
> @@ -710,6 +710,13 @@ vi_asic_reset_method(struct amdgpu_device *adev)
>   {
>   	bool baco_reset;
>   
> +	if (amdgpu_reset_method == AMD_RESET_METHOD_LEGACY ||
> +	    amdgpu_reset_method == AMD_RESET_METHOD_BACO)
> +		return amdgpu_reset_method;
> +	else if (amdgpu_reset_method != -1)
> +		dev_warn(adev->dev, "Specified reset method:%d isn't supported, using AUTO instead.\n",
> +				  amdgpu_reset_method);
> +
>   	switch (adev->asic_type) {
>   	case CHIP_FIJI:
>   	case CHIP_TONGA:



More information about the amd-gfx mailing list