[PATCH 2/5 V2] drm/amdgpu: Add sysfs interface for sdma reset mask

Christian König christian.koenig at amd.com
Wed Oct 23 07:11:08 UTC 2024



Am 23.10.24 um 04:43 schrieb Jesse.zhang at amd.com:
> Add the sysfs interface for sdma:
> sdma_reset_mask
>
> The interface is read-only and show the resets supported by the IP.
> For example, full adapter reset (mode1/mode2/BACO/etc),
> soft reset, queue reset, and pipe reset.
>
> V2: the sysfs node returns a text string instead of some flags (Christian)
>
> Signed-off-by: Jesse Zhang <Jesse.Zhang at amd.com>
> Suggested-by:Alex Deucher <alexander.deucher at amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c | 48 ++++++++++++++++++++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h |  2 +
>   drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c   |  5 +++
>   drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c   |  5 +++
>   drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c   |  5 +++
>   drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c |  5 +++
>   drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c   |  5 +++
>   drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c   |  5 +++
>   drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c   |  5 +++
>   drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c   |  5 +++
>   10 files changed, 90 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
> index 183a976ba29d..f20b7285f5fd 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
> @@ -343,3 +343,51 @@ int amdgpu_sdma_ras_sw_init(struct amdgpu_device *adev)
>   
>   	return 0;
>   }
> +
> +static ssize_t amdgpu_get_sdma_reset_mask(struct device *dev,
> +						struct device_attribute *attr,
> +						char *buf)
> +{
> +	struct drm_device *ddev = dev_get_drvdata(dev);
> +	struct amdgpu_device *adev = drm_to_adev(ddev);
> +	ssize_t size = 0;
> +	struct amdgpu_ring *ring = &adev->sdma.instance[0].ring;
> +
> +	if (!adev || !ring)
> +		return -ENODEV;
> +

> +	if (amdgpu_device_should_recover_gpu(adev))
> +		size += sysfs_emit_at(buf, size, "full ");
> +
> +	if (amdgpu_gpu_recovery && unlikely(!adev->debug_disable_soft_recovery)
> +			&& !amdgpu_sriov_vf(adev) && ring->funcs->soft_recovery)

A total nit pick, but the indentation here is wrong and I think 
checkpatch.pl might complain about that.

In general the Linux kernel coding styles says that for "if"s you should 
indent with tabs and then spaces so that in this case the && is under 
the amdgpu_gpu_recovery.... of the previous line.

> +		size += sysfs_emit_at(buf, size, "soft ");
> +
> +	if (amdgpu_gpu_recovery && ring->funcs->reset)
> +		size += sysfs_emit_at(buf, size, "queue ");

You could add a generic helper which takes the ring as parameter and 
prints "full soft queue" into the buffer. Patch #1 is kind of special 
because of the FW limitations, but that should make patch #2-#5 a bit 
smaller and use more generic code.

And should we print the strings in the order they are applied? In other 
words "soft queue full" ?

Apart from that the patches look totally clean to me.

Regards,
Christian.

> +
> +	size += sysfs_emit_at(buf, size, "\n");
> +	return size;
> +}
> +
> +static DEVICE_ATTR(sdma_reset_mask, 0444,
> +		   amdgpu_get_sdma_reset_mask, NULL);
> +
> +int amdgpu_sdma_sysfs_reset_mask_init(struct amdgpu_device *adev)
> +{
> +	int r = 0;
> +
> +	if (adev->sdma.num_instances) {
> +		r = device_create_file(adev->dev, &dev_attr_sdma_reset_mask);
> +		if (r)
> +			return r;
> +	}
> +
> +	return r;
> +}
> +
> +void amdgpu_sdma_sysfs_reset_mask_fini(struct amdgpu_device *adev)
> +{
> +	if (adev->sdma.num_instances)
> +		device_remove_file(adev->dev, &dev_attr_sdma_reset_mask);
> +}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
> index 087ce0f6fa07..3058548d0733 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
> @@ -175,5 +175,7 @@ int amdgpu_sdma_init_microcode(struct amdgpu_device *adev, u32 instance,
>   void amdgpu_sdma_destroy_inst_ctx(struct amdgpu_device *adev,
>           bool duplicate);
>   int amdgpu_sdma_ras_sw_init(struct amdgpu_device *adev);
> +int amdgpu_sdma_sysfs_reset_mask_init(struct amdgpu_device *adev);
> +void amdgpu_sdma_sysfs_reset_mask_fini(struct amdgpu_device *adev);
>   
>   #endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
> index 10fd772cb80f..bd04310cb2b1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
> @@ -863,6 +863,10 @@ static int sdma_v2_4_sw_init(struct amdgpu_ip_block *ip_block)
>   			return r;
>   	}
>   
> +	r = amdgpu_sdma_sysfs_reset_mask_init(adev);
> +	if (r)
> +		return r;
> +
>   	return r;
>   }
>   
> @@ -874,6 +878,7 @@ static int sdma_v2_4_sw_fini(struct amdgpu_ip_block *ip_block)
>   	for (i = 0; i < adev->sdma.num_instances; i++)
>   		amdgpu_ring_fini(&adev->sdma.instance[i].ring);
>   
> +	amdgpu_sdma_sysfs_reset_mask_fini(adev);
>   	sdma_v2_4_free_microcode(adev);
>   	return 0;
>   }
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
> index 69fba087e09c..1fcf7e977143 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
> @@ -1149,6 +1149,10 @@ static int sdma_v3_0_sw_init(struct amdgpu_ip_block *ip_block)
>   			return r;
>   	}
>   
> +	r = amdgpu_sdma_sysfs_reset_mask_init(adev)
> +	if (r)
> +		return r;
> +
>   	return r;
>   }
>   
> @@ -1160,6 +1164,7 @@ static int sdma_v3_0_sw_fini(struct amdgpu_ip_block *ip_block)
>   	for (i = 0; i < adev->sdma.num_instances; i++)
>   		amdgpu_ring_fini(&adev->sdma.instance[i].ring);
>   
> +	amdgpu_sdma_sysfs_reset_mask_fini(adev);
>   	sdma_v3_0_free_microcode(adev);
>   	return 0;
>   }
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> index c1f98f6cf20d..19fe25cbb24e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> @@ -1926,6 +1926,10 @@ static int sdma_v4_0_sw_init(struct amdgpu_ip_block *ip_block)
>   	else
>   		DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n");
>   
> +	r = amdgpu_sdma_sysfs_reset_mask_init(adev);
> +	if (r)
> +		return r;
> +
>   	return r;
>   }
>   
> @@ -1940,6 +1944,7 @@ static int sdma_v4_0_sw_fini(struct amdgpu_ip_block *ip_block)
>   			amdgpu_ring_fini(&adev->sdma.instance[i].page);
>   	}
>   
> +	amdgpu_sdma_sysfs_reset_mask_fini(adev);
>   	if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 2, 2) ||
>   	    amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 0))
>   		amdgpu_sdma_destroy_inst_ctx(adev, true);
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
> index 9c7cea0890c9..320b1e63b78b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
> @@ -1442,6 +1442,10 @@ static int sdma_v4_4_2_sw_init(struct amdgpu_ip_block *ip_block)
>   	else
>   		DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n");
>   
> +	r = amdgpu_sdma_sysfs_reset_mask_init(adev);
> +	if (r)
> +		return r;
> +
>   	return r;
>   }
>   
> @@ -1456,6 +1460,7 @@ static int sdma_v4_4_2_sw_fini(struct amdgpu_ip_block *ip_block)
>   			amdgpu_ring_fini(&adev->sdma.instance[i].page);
>   	}
>   
> +	amdgpu_sdma_sysfs_reset_mask_fini(adev);
>   	if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 2) ||
>   	    amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 5))
>   		amdgpu_sdma_destroy_inst_ctx(adev, true);
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> index 6a675daf5620..00dcae89119e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> @@ -1459,6 +1459,10 @@ static int sdma_v5_0_sw_init(struct amdgpu_ip_block *ip_block)
>   	else
>   		DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n");
>   
> +	r = amdgpu_sdma_sysfs_reset_mask_init(adev);
> +	if (r)
> +		return r;
> +
>   	return r;
>   }
>   
> @@ -1470,6 +1474,7 @@ static int sdma_v5_0_sw_fini(struct amdgpu_ip_block *ip_block)
>   	for (i = 0; i < adev->sdma.num_instances; i++)
>   		amdgpu_ring_fini(&adev->sdma.instance[i].ring);
>   
> +	amdgpu_sdma_sysfs_reset_mask_fini(adev);
>   	amdgpu_sdma_destroy_inst_ctx(adev, false);
>   
>   	kfree(adev->sdma.ip_dump);
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> index e1413ccaf7e4..dab4210c4401 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> @@ -1364,6 +1364,10 @@ static int sdma_v5_2_sw_init(struct amdgpu_ip_block *ip_block)
>   	else
>   		DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n");
>   
> +	r = amdgpu_sdma_sysfs_reset_mask_init(adev);
> +	if (r)
> +		return r;
> +
>   	return r;
>   }
>   
> @@ -1375,6 +1379,7 @@ static int sdma_v5_2_sw_fini(struct amdgpu_ip_block *ip_block)
>   	for (i = 0; i < adev->sdma.num_instances; i++)
>   		amdgpu_ring_fini(&adev->sdma.instance[i].ring);
>   
> +	amdgpu_sdma_sysfs_reset_mask_fini(adev);
>   	amdgpu_sdma_destroy_inst_ctx(adev, true);
>   
>   	kfree(adev->sdma.ip_dump);
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
> index 4856a093e23f..e90d9ab65017 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
> @@ -1367,6 +1367,10 @@ static int sdma_v6_0_sw_init(struct amdgpu_ip_block *ip_block)
>   	adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_v11_0_funcs;
>   #endif
>   
> +	r = amdgpu_sdma_sysfs_reset_mask_init(adev);
> +	if (r)
> +		return r;
> +
>   	return r;
>   }
>   
> @@ -1378,6 +1382,7 @@ static int sdma_v6_0_sw_fini(struct amdgpu_ip_block *ip_block)
>   	for (i = 0; i < adev->sdma.num_instances; i++)
>   		amdgpu_ring_fini(&adev->sdma.instance[i].ring);
>   
> +	amdgpu_sdma_sysfs_reset_mask_fini(adev);
>   	amdgpu_sdma_destroy_inst_ctx(adev, true);
>   
>   	kfree(adev->sdma.ip_dump);
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
> index 24f24974ac1d..650bb470426f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
> @@ -1317,6 +1317,10 @@ static int sdma_v7_0_sw_init(struct amdgpu_ip_block *ip_block)
>   	else
>   		DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n");
>   
> +	r = amdgpu_sdma_sysfs_reset_mask_init(adev);
> +	if (r)
> +		return r;
> +
>   	return r;
>   }
>   
> @@ -1328,6 +1332,7 @@ static int sdma_v7_0_sw_fini(struct amdgpu_ip_block *ip_block)
>   	for (i = 0; i < adev->sdma.num_instances; i++)
>   		amdgpu_ring_fini(&adev->sdma.instance[i].ring);
>   
> +	amdgpu_sdma_sysfs_reset_mask_fini(adev);
>   	amdgpu_sdma_destroy_inst_ctx(adev, true);
>   
>   	if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT)



More information about the amd-gfx mailing list