[PATCH v2 1/2] drm: Add GPU reset sysfs event

Rob Clark robdclark at gmail.com
Wed Mar 16 21:50:28 UTC 2022


On Tue, Mar 8, 2022 at 11:40 PM Shashank Sharma
<contactshashanksharma at gmail.com> wrote:
>
> From: Shashank Sharma <shashank.sharma at amd.com>
>
> This patch adds a new sysfs event, which will indicate
> the userland about a GPU reset, and can also provide
> some information like:
> - process ID of the process involved with the GPU reset
> - process name of the involved process
> - the GPU status info (using flags)
>
> This patch also introduces the first flag of the flags
> bitmap, which can be appended as and when required.
>
> V2: Addressed review comments from Christian and Amar
>    - move the reset information structure to DRM layer
>    - drop _ctx from struct name
>    - make pid 32 bit(than 64)
>    - set flag when VRAM invalid (than valid)
>    - add process name as well (Amar)
>
> Cc: Alexandar Deucher <alexander.deucher at amd.com>
> Cc: Christian Koenig <christian.koenig at amd.com>
> Cc: Amaranath Somalapuram <amaranath.somalapuram at amd.com>
> Signed-off-by: Shashank Sharma <shashank.sharma at amd.com>
> ---
>  drivers/gpu/drm/drm_sysfs.c | 31 +++++++++++++++++++++++++++++++
>  include/drm/drm_sysfs.h     | 10 ++++++++++
>  2 files changed, 41 insertions(+)
>
> diff --git a/drivers/gpu/drm/drm_sysfs.c b/drivers/gpu/drm/drm_sysfs.c
> index 430e00b16eec..840994810910 100644
> --- a/drivers/gpu/drm/drm_sysfs.c
> +++ b/drivers/gpu/drm/drm_sysfs.c
> @@ -409,6 +409,37 @@ void drm_sysfs_hotplug_event(struct drm_device *dev)
>  }
>  EXPORT_SYMBOL(drm_sysfs_hotplug_event);
>
> +/**
> + * drm_sysfs_reset_event - generate a DRM uevent to indicate GPU reset
> + * @dev: DRM device
> + * @reset_info: The contextual information about the reset (like PID, flags)
> + *
> + * Send a uevent for the DRM device specified by @dev. This informs
> + * user that a GPU reset has occurred, so that an interested client
> + * can take any recovery or profiling measure.
> + */
> +void drm_sysfs_reset_event(struct drm_device *dev, struct drm_reset_event *reset_info)
> +{
> +       unsigned char pid_str[13];
> +       unsigned char flags_str[15];
> +       unsigned char pname_str[TASK_COMM_LEN + 6];
> +       unsigned char reset_str[] = "RESET=1";
> +       char *envp[] = { reset_str, pid_str, pname_str, flags_str, NULL };
> +
> +       if (!reset_info) {
> +               DRM_WARN("No reset info, not sending the event\n");
> +               return;
> +       }
> +
> +       DRM_DEBUG("generating reset event\n");
> +
> +       snprintf(pid_str, ARRAY_SIZE(pid_str), "PID=%u", reset_info->pid);
> +       snprintf(pname_str, ARRAY_SIZE(pname_str), "NAME=%s", reset_info->pname);
> +       snprintf(flags_str, ARRAY_SIZE(flags_str), "FLAGS=%u", reset_info->flags);
> +       kobject_uevent_env(&dev->primary->kdev->kobj, KOBJ_CHANGE, envp);
> +}
> +EXPORT_SYMBOL(drm_sysfs_reset_event);
> +
>  /**
>   * drm_sysfs_connector_hotplug_event - generate a DRM uevent for any connector
>   * change
> diff --git a/include/drm/drm_sysfs.h b/include/drm/drm_sysfs.h
> index 6273cac44e47..5ba11c760619 100644
> --- a/include/drm/drm_sysfs.h
> +++ b/include/drm/drm_sysfs.h
> @@ -1,16 +1,26 @@
>  /* SPDX-License-Identifier: GPL-2.0 */
>  #ifndef _DRM_SYSFS_H_
>  #define _DRM_SYSFS_H_
> +#include <linux/sched.h>
> +
> +#define DRM_GPU_RESET_FLAG_VRAM_INVALID (1 << 0)
>
>  struct drm_device;
>  struct device;
>  struct drm_connector;
>  struct drm_property;
>
> +struct drm_reset_event {
> +       uint32_t pid;

One side note, unrelated to devcoredump vs this..

AFAIU you probably want to be passing around a `struct pid *`, and
then somehow use pid_vnr() in the context of the process reading the
event to get the numeric pid.  Otherwise things will not do what you
expect if the process triggering the crash is in a different pid
namespace from the compositor.

BR,
-R

> +       uint32_t flags;
> +       char pname[TASK_COMM_LEN];
> +};
> +
>  int drm_class_device_register(struct device *dev);
>  void drm_class_device_unregister(struct device *dev);
>
>  void drm_sysfs_hotplug_event(struct drm_device *dev);
> +void drm_sysfs_reset_event(struct drm_device *dev, struct drm_reset_event *reset_info);
>  void drm_sysfs_connector_hotplug_event(struct drm_connector *connector);
>  void drm_sysfs_connector_status_event(struct drm_connector *connector,
>                                       struct drm_property *property);
> --
> 2.32.0
>


More information about the amd-gfx mailing list