[PATCH] drm/amdkfd: Add GPU reset SMI event
Nils Wallménius
nils.wallmenius at gmail.com
Wed Aug 26 08:29:32 UTC 2020
Hi, see inline comment below.
Den tis 25 aug. 2020 21:12Mukul Joshi <mukul.joshi at amd.com> skrev:
> Add support for reporting GPU reset events through SMI. KFD
> would report both pre and post GPU reset events.
>
> Signed-off-by: Mukul Joshi <mukul.joshi at amd.com>
> ---
> drivers/gpu/drm/amd/amdkfd/kfd_device.c | 4 +++
> drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 2 ++
> drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c | 30 +++++++++++++++++++++
> drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h | 1 +
> include/uapi/linux/kfd_ioctl.h | 2 ++
> 5 files changed, 39 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> index e1cd6599529f..aad1ecfa1239 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> @@ -812,6 +812,8 @@ int kgd2kfd_pre_reset(struct kfd_dev *kfd)
> if (!kfd->init_complete)
> return 0;
>
> + kfd_smi_event_update_gpu_reset(kfd, false);
> +
> kfd->dqm->ops.pre_reset(kfd->dqm);
>
> kgd2kfd_suspend(kfd, false);
> @@ -833,6 +835,8 @@ int kgd2kfd_post_reset(struct kfd_dev *kfd)
> if (!kfd->init_complete)
> return 0;
>
> + kfd_smi_event_update_gpu_reset(kfd, true);
> +
> ret = kfd_resume(kfd);
> if (ret)
> return ret;
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index 18bc711f97ae..b1a2979e086f 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -312,6 +312,8 @@ struct kfd_dev {
> /* Clients watching SMI events */
> struct list_head smi_clients;
> spinlock_t smi_lock;
> +
> + uint64_t reset_seq_num;
> };
>
> enum kfd_mempool {
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
> index 4d4b6e3ab697..448abfdde230 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
> @@ -174,6 +174,36 @@ static void add_event_to_kfifo(struct kfd_dev *dev,
> unsigned int smi_event,
> rcu_read_unlock();
> }
>
> +void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev, bool post_reset)
> +{
> + /*
> + * GpuReset msg = Reset seq number (incremented for
> + * every reset message sent before GPU reset).
> + * 1 byte event + 1 byte space + 16 bytes seq num +
> + * 1 byte \n + 1 byte \0 = 20
> + */
> + char fifo_in[20];
> + int len;
> + unsigned int event;
> +
> + if (list_empty(&dev->smi_clients)) {
> + return;
> + }
> +
> + memset(fifo_in, 0x0, sizeof(fifo_in));
> +
> + if (post_reset) {
> + event = KFD_SMI_EVENT_GPU_POST_RESET;
> + } else {
> + event = KFD_SMI_EVENT_GPU_PRE_RESET;
> + ++(dev->reset_seq_num);
> + }
> +
> + len = snprintf(fifo_in, 4, "%x %llx\n", event, dev->reset_seq_num);
>
I think the 4 will cause truncation of the message here.
Regards
Nils
+
> + add_event_to_kfifo(dev, event, fifo_in, len);
> +}
> +
> void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev,
> uint32_t throttle_bitmask)
> {
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
> b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
> index 15537b2cccb5..b9b0438202e2 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
> @@ -27,5 +27,6 @@ int kfd_smi_event_open(struct kfd_dev *dev, uint32_t
> *fd);
> void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid);
> void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev,
> uint32_t throttle_bitmask);
> +void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev, bool post_reset);
>
> #endif
> diff --git a/include/uapi/linux/kfd_ioctl.h
> b/include/uapi/linux/kfd_ioctl.h
> index cb1f963a84e0..8b7368bfbd84 100644
> --- a/include/uapi/linux/kfd_ioctl.h
> +++ b/include/uapi/linux/kfd_ioctl.h
> @@ -453,6 +453,8 @@ enum kfd_smi_event {
> KFD_SMI_EVENT_NONE = 0, /* not used */
> KFD_SMI_EVENT_VMFAULT = 1, /* event start counting at 1 */
> KFD_SMI_EVENT_THERMAL_THROTTLE = 2,
> + KFD_SMI_EVENT_GPU_PRE_RESET = 3,
> + KFD_SMI_EVENT_GPU_POST_RESET = 4,
> };
>
> #define KFD_SMI_EVENT_MASK_FROM_INDEX(i) (1ULL << ((i) - 1))
> --
> 2.17.1
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/amd-gfx/attachments/20200826/040520fc/attachment-0001.htm>
More information about the amd-gfx
mailing list