[PATCH] drm/amdkfd: Update SMI throttle event bitmask
Kasiviswanathan, Harish
Harish.Kasiviswanathan at amd.com
Tue Jul 20 22:05:40 UTC 2021
[AMD Official Use Only]
Reviewed-by: Harish Kasiviswanathan <Harish.Kasiviswanathan at amd.com>
-----Original Message-----
From: Sider, Graham <Graham.Sider at amd.com>
Sent: Thursday, July 8, 2021 1:39 PM
To: amd-gfx at lists.freedesktop.org
Cc: Kasiviswanathan, Harish <Harish.Kasiviswanathan at amd.com>; Sider, Graham <Graham.Sider at amd.com>
Subject: [PATCH] drm/amdkfd: Update SMI throttle event bitmask
Update Arcturus/Aldebaran thermal throttle SMI event path to use
ASIC-independent throttler bits when logging.
Signed-off-by: Graham Sider <Graham.Sider at amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 4 ++--
drivers/gpu/drm/amd/amdkfd/kfd_device.c | 2 +-
drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c | 10 +++++-----
drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h | 2 +-
drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c | 4 +++-
drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c | 4 +++-
6 files changed, 15 insertions(+), 11 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index fabc68eec36a..9c505ac0be8c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -330,7 +330,7 @@ int kgd2kfd_pre_reset(struct kfd_dev *kfd);
int kgd2kfd_post_reset(struct kfd_dev *kfd);
void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry);
void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd);
-void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint32_t throttle_bitmask);
+void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask);
#else
static inline int kgd2kfd_init(void)
{
@@ -389,7 +389,7 @@ void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd)
}
static inline
-void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint32_t throttle_bitmask)
+void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask)
{
}
#endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 9e4a05e937f0..5b06bc308782 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -1370,7 +1370,7 @@ void kfd_dec_compute_active(struct kfd_dev *kfd)
WARN_ONCE(count < 0, "Compute profile ref. count error");
}
-void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint32_t throttle_bitmask)
+void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask)
{
if (kfd && kfd->init_complete)
kfd_smi_event_update_thermal_throttling(kfd, throttle_bitmask);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
index 246522423559..ed4bc5f844ce 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
@@ -205,23 +205,23 @@ void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev, bool post_reset)
}
void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev,
- uint32_t throttle_bitmask)
+ uint64_t throttle_bitmask)
{
struct amdgpu_device *adev = (struct amdgpu_device *)dev->kgd;
/*
* ThermalThrottle msg = throttle_bitmask(8):
* thermal_interrupt_count(16):
- * 1 byte event + 1 byte space + 8 byte throttle_bitmask +
+ * 1 byte event + 1 byte space + 16 byte throttle_bitmask +
* 1 byte : + 16 byte thermal_interupt_counter + 1 byte \n +
- * 1 byte \0 = 29
+ * 1 byte \0 = 37
*/
- char fifo_in[29];
+ char fifo_in[37];
int len;
if (list_empty(&dev->smi_clients))
return;
- len = snprintf(fifo_in, sizeof(fifo_in), "%x %x:%llx\n",
+ len = snprintf(fifo_in, sizeof(fifo_in), "%x %llx:%llx\n",
KFD_SMI_EVENT_THERMAL_THROTTLE, throttle_bitmask,
atomic64_read(&adev->smu.throttle_int_counter));
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
index b9b0438202e2..bffd0c32b060 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
@@ -26,7 +26,7 @@
int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd);
void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid);
void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev,
- uint32_t throttle_bitmask);
+ uint64_t throttle_bitmask);
void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev, bool post_reset);
#endif
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
index 6b3e0ea10163..6ec8492f71f5 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
@@ -2178,7 +2178,9 @@ static void arcturus_log_thermal_throttling_event(struct smu_context *smu)
dev_warn(adev->dev, "WARN: GPU thermal throttling temperature reached, expect performance decrease. %s.\n",
log_buf);
- kgd2kfd_smi_event_throttle(smu->adev->kfd.dev, throttler_status);
+ kgd2kfd_smi_event_throttle(smu->adev->kfd.dev,
+ smu_cmn_get_indep_throttler_status(throttler_status,
+ arcturus_throttler_map));
}
static uint16_t arcturus_get_current_pcie_link_speed(struct smu_context *smu)
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
index c16ca0c78e93..e1575d974315 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
@@ -1627,7 +1627,9 @@ static void aldebaran_log_thermal_throttling_event(struct smu_context *smu)
dev_warn(adev->dev, "WARN: GPU thermal throttling temperature reached, expect performance decrease. %s.\n",
log_buf);
- kgd2kfd_smi_event_throttle(smu->adev->kfd.dev, throttler_status);
+ kgd2kfd_smi_event_throttle(smu->adev->kfd.dev,
+ smu_cmn_get_indep_throttler_status(throttler_status,
+ aldebaran_throttler_map));
}
static int aldebaran_get_current_pcie_link_speed(struct smu_context *smu)
--
2.17.1
More information about the amd-gfx
mailing list