[PATCH 2/2] drm/amdgpu: add AMDGPURESET uevent on AMD GPU reset

Somalapuram, Amaranath Amaranath.Somalapuram at amd.com
Mon Jan 17 10:01:11 UTC 2022


[AMD Official Use Only]



-----Original Message-----
From: Koenig, Christian <Christian.Koenig at amd.com> 
Sent: Monday, January 17, 2022 12:57 PM
To: Somalapuram, Amaranath <Amaranath.Somalapuram at amd.com>; amd-gfx at lists.freedesktop.org
Cc: Deucher, Alexander <Alexander.Deucher at amd.com>; Sharma, Shashank <Shashank.Sharma at amd.com>
Subject: Re: [PATCH 2/2] drm/amdgpu: add AMDGPURESET uevent on AMD GPU reset

Am 17.01.22 um 07:33 schrieb Somalapuram Amaranath:
> AMDGPURESET uevent added to notify userspace, collect dump_stack and 
> trace
>
> Signed-off-by: Somalapuram Amaranath <Amaranath.Somalapuram at amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/nv.c | 45 +++++++++++++++++++++++++++++++++
>   1 file changed, 45 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c 
> b/drivers/gpu/drm/amd/amdgpu/nv.c index 2ec1ffb36b1f..b73147ae41fb 
> 100644
> --- a/drivers/gpu/drm/amd/amdgpu/nv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/nv.c
> @@ -529,10 +529,55 @@ nv_asic_reset_method(struct amdgpu_device *adev)
>   	}
>   }
>   
> +/**
> + * drm_sysfs_reset_event - generate a DRM uevent
> + * @dev: DRM device
> + *
> + * Send a uevent for the DRM device specified by @dev.  Currently we 
> +only
> + * set AMDGPURESET=1 in the uevent environment, but this could be 
> +expanded to
> + * deal with other types of events.
> + *
> + * Any new uapi should be using the 
> +drm_sysfs_connector_status_event()
> + * for uevents on connector status change.
> + */
> +void drm_sysfs_reset_event(struct drm_device *dev)

This should probably go directly into the DRM subsystem.

> +{
> +	char *event_string = "AMDGPURESET=1";
> +	char *envp[2] = { event_string, NULL };
> +
> +	kobject_uevent_env(&dev->primary->kdev->kobj, KOBJ_CHANGE, envp);

As I said this approach is a clear NAK. We can't allocate any memory when we do a reset.

Regards,
Christian.

Can I do something like this:

void drm_sysfs_reset_event(struct drm_device *dev)
 {
-       char *event_string = "AMDGPURESET=1";
-       char *envp[2] = { event_string, NULL };
+       char **envp;
+
+       envp = kcalloc(2,sizeof(char *), GFP_ATOMIC);
+       envp[0] = kcalloc(30, sizeof(char), GFP_ATOMIC);
+       envp[1] = kcalloc(30, sizeof(char), GFP_ATOMIC);
+
+       strcpy(envp[0], "AMDGPURESET=1");
+       strcpy(envp[1], "");
+

        kobject_uevent_env(&dev->primary->kdev->kobj, KOBJ_CHANGE, envp);
+
+       kfree(envp[0]);
+       kfree(envp[1]);
+       kfree(envp);
 }

Regards,
S.Amarnath

> +}
> +
> +void amdgpu_reset_dumps(struct amdgpu_device *adev) {
> +	struct drm_device *ddev = adev_to_drm(adev);
> +	int r = 0, i;
> +
> +	/* original raven doesn't have full asic reset */
> +	if ((adev->apu_flags & AMD_APU_IS_RAVEN) &&
> +		!(adev->apu_flags & AMD_APU_IS_RAVEN2))
> +		return;
> +	for (i = 0; i < adev->num_ip_blocks; i++) {
> +		if (!adev->ip_blocks[i].status.valid)
> +			continue;
> +		if (!adev->ip_blocks[i].version->funcs->reset_reg_dumps)
> +			continue;
> +		r = adev->ip_blocks[i].version->funcs->reset_reg_dumps(adev);
> +
> +		if (r)
> +			DRM_ERROR("reset_reg_dumps of IP block <%s> failed %d\n",
> +					adev->ip_blocks[i].version->funcs->name, r);
> +	}
> +
> +	drm_sysfs_reset_event(ddev);
> +	dump_stack();
> +}
> +
>   static int nv_asic_reset(struct amdgpu_device *adev)
>   {
>   	int ret = 0;
>   
> +	amdgpu_reset_dumps(adev);
>   	switch (nv_asic_reset_method(adev)) {
>   	case AMD_RESET_METHOD_PCI:
>   		dev_info(adev->dev, "PCI reset\n");


More information about the amd-gfx mailing list