[PATCH] drm/amdkfd: Add GPU reset SMI event
Joshi, Mukul
Mukul.Joshi at amd.com
Wed Aug 26 20:02:09 UTC 2020
[AMD Official Use Only - Internal Distribution Only]
Sorry I missed that. Thanks for pointing it out.
I will send out an updated patch.
Thanks,
Mukul
From: Nils Wallménius <nils.wallmenius at gmail.com>
Sent: Wednesday, August 26, 2020 4:30 AM
To: Joshi, Mukul <Mukul.Joshi at amd.com>
Cc: amd-gfx at lists.freedesktop.org; Kuehling, Felix <Felix.Kuehling at amd.com>
Subject: Re: [PATCH] drm/amdkfd: Add GPU reset SMI event
[CAUTION: External Email]
Hi, see inline comment below.
Den tis 25 aug. 2020 21:12Mukul Joshi <mukul.joshi at amd.com<mailto:mukul.joshi at amd.com>> skrev:
Add support for reporting GPU reset events through SMI. KFD
would report both pre and post GPU reset events.
Signed-off-by: Mukul Joshi <mukul.joshi at amd.com<mailto:mukul.joshi at amd.com>>
---
drivers/gpu/drm/amd/amdkfd/kfd_device.c | 4 +++
drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 2 ++
drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c | 30 +++++++++++++++++++++
drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h | 1 +
include/uapi/linux/kfd_ioctl.h | 2 ++
5 files changed, 39 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index e1cd6599529f..aad1ecfa1239 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -812,6 +812,8 @@ int kgd2kfd_pre_reset(struct kfd_dev *kfd)
if (!kfd->init_complete)
return 0;
+ kfd_smi_event_update_gpu_reset(kfd, false);
+
kfd->dqm->ops.pre_reset(kfd->dqm);
kgd2kfd_suspend(kfd, false);
@@ -833,6 +835,8 @@ int kgd2kfd_post_reset(struct kfd_dev *kfd)
if (!kfd->init_complete)
return 0;
+ kfd_smi_event_update_gpu_reset(kfd, true);
+
ret = kfd_resume(kfd);
if (ret)
return ret;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 18bc711f97ae..b1a2979e086f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -312,6 +312,8 @@ struct kfd_dev {
/* Clients watching SMI events */
struct list_head smi_clients;
spinlock_t smi_lock;
+
+ uint64_t reset_seq_num;
};
enum kfd_mempool {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
index 4d4b6e3ab697..448abfdde230 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
@@ -174,6 +174,36 @@ static void add_event_to_kfifo(struct kfd_dev *dev, unsigned int smi_event,
rcu_read_unlock();
}
+void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev, bool post_reset)
+{
+ /*
+ * GpuReset msg = Reset seq number (incremented for
+ * every reset message sent before GPU reset).
+ * 1 byte event + 1 byte space + 16 bytes seq num +
+ * 1 byte \n + 1 byte \0 = 20
+ */
+ char fifo_in[20];
+ int len;
+ unsigned int event;
+
+ if (list_empty(&dev->smi_clients)) {
+ return;
+ }
+
+ memset(fifo_in, 0x0, sizeof(fifo_in));
+
+ if (post_reset) {
+ event = KFD_SMI_EVENT_GPU_POST_RESET;
+ } else {
+ event = KFD_SMI_EVENT_GPU_PRE_RESET;
+ ++(dev->reset_seq_num);
+ }
+
+ len = snprintf(fifo_in, 4, "%x %llx\n", event, dev->reset_seq_num);
I think the 4 will cause truncation of the message here.
Regards
Nils
+
+ add_event_to_kfifo(dev, event, fifo_in, len);
+}
+
void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev,
uint32_t throttle_bitmask)
{
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
index 15537b2cccb5..b9b0438202e2 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
@@ -27,5 +27,6 @@ int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd);
void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid);
void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev,
uint32_t throttle_bitmask);
+void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev, bool post_reset);
#endif
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index cb1f963a84e0..8b7368bfbd84 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -453,6 +453,8 @@ enum kfd_smi_event {
KFD_SMI_EVENT_NONE = 0, /* not used */
KFD_SMI_EVENT_VMFAULT = 1, /* event start counting at 1 */
KFD_SMI_EVENT_THERMAL_THROTTLE = 2,
+ KFD_SMI_EVENT_GPU_PRE_RESET = 3,
+ KFD_SMI_EVENT_GPU_POST_RESET = 4,
};
#define KFD_SMI_EVENT_MASK_FROM_INDEX(i) (1ULL << ((i) - 1))
--
2.17.1
_______________________________________________
amd-gfx mailing list
amd-gfx at lists.freedesktop.org<mailto:amd-gfx at lists.freedesktop.org>
https://lists.freedesktop.org/mailman/listinfo/amd-gfx<https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&data=02%7C01%7Cmukul.joshi%40amd.com%7Cf1f1b5a522714387b43808d8499a2fd6%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637340273862855741&sdata=S9Ch%2BXajhfEnZ7pn2DV%2F1Pb9%2FCSZfujBkxIw9INjvEg%3D&reserved=0>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/amd-gfx/attachments/20200826/9624611a/attachment-0001.htm>
More information about the amd-gfx
mailing list