[PATCH 2/2] drm/amdkfd: Output migrate end event if migration failed
Philip Yang
Philip.Yang at amd.com
Thu Feb 15 15:18:30 UTC 2024
To track the migrate end-event in case of a migration failure, always
output migrate end event, with the failure result added to the existing
migrate end event string.
Signed-off-by: Philip Yang <Philip.Yang at amd.com>
---
drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 16 ++++++++--------
drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c | 5 +++--
drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h | 2 +-
include/uapi/linux/kfd_ioctl.h | 7 ++++---
4 files changed, 16 insertions(+), 14 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index 480e222364d5..23cf9484331e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -445,15 +445,15 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange,
pr_debug("successful/cpages/npages 0x%lx/0x%lx/0x%lx\n",
mpages, cpages, migrate.npages);
- kfd_smi_event_migration_end(node, p->lead_thread->pid,
- start >> PAGE_SHIFT, end >> PAGE_SHIFT,
- 0, node->id, trigger);
-
svm_range_dma_unmap_dev(adev->dev, scratch, 0, npages);
out_free:
kvfree(buf);
out:
+ kfd_smi_event_migration_end(node, p->lead_thread->pid,
+ start >> PAGE_SHIFT, end >> PAGE_SHIFT,
+ 0, node->id, trigger, r);
+
if (!r && mpages) {
pdd = svm_range_get_pdd_by_node(prange, node);
if (pdd)
@@ -737,15 +737,15 @@ svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range *prange,
svm_migrate_copy_done(adev, mfence);
migrate_vma_finalize(&migrate);
- kfd_smi_event_migration_end(node, p->lead_thread->pid,
- start >> PAGE_SHIFT, end >> PAGE_SHIFT,
- node->id, 0, trigger);
-
svm_range_dma_unmap_dev(adev->dev, scratch, 0, npages);
out_free:
kvfree(buf);
out:
+ kfd_smi_event_migration_end(node, p->lead_thread->pid,
+ start >> PAGE_SHIFT, end >> PAGE_SHIFT,
+ node->id, 0, trigger, r);
+
if (!r && cpages) {
mpages = cpages - upages;
pdd = svm_range_get_pdd_by_node(prange, node);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
index 85465eb303a9..d1a567f8a8d9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
@@ -282,11 +282,12 @@ void kfd_smi_event_migration_start(struct kfd_node *node, pid_t pid,
void kfd_smi_event_migration_end(struct kfd_node *node, pid_t pid,
unsigned long start, unsigned long end,
- uint32_t from, uint32_t to, uint32_t trigger)
+ uint32_t from, uint32_t to, uint32_t trigger,
+ int result)
{
kfd_smi_event_add(pid, node, KFD_SMI_EVENT_MIGRATE_END,
KFD_EVENT_FMT_MIGRATE_END(ktime_get_boottime_ns(), pid,
- start, end - start, from, to, trigger));
+ start, end - start, from, to, trigger, result));
}
void kfd_smi_event_queue_eviction(struct kfd_node *node, pid_t pid,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
index fa95c2dfd587..6c99eaa39f09 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
@@ -41,7 +41,7 @@ void kfd_smi_event_migration_start(struct kfd_node *node, pid_t pid,
uint32_t trigger);
void kfd_smi_event_migration_end(struct kfd_node *node, pid_t pid,
unsigned long start, unsigned long end,
- uint32_t from, uint32_t to, uint32_t trigger);
+ uint32_t from, uint32_t to, uint32_t trigger, int r);
void kfd_smi_event_queue_eviction(struct kfd_node *node, pid_t pid,
uint32_t trigger);
void kfd_smi_event_queue_restore(struct kfd_node *node, pid_t pid);
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index 430c01f4148b..5220670a434d 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -601,6 +601,7 @@ struct kfd_ioctl_smi_events_args {
* migrate_update: the GPU page is recovered by 'M' for migrate, 'U' for update
* rescheduled: 'R' if the queue restore failed and rescheduled to try again
* rw: 'W' for write page fault, 'R' for read page fault
+ * result: page mirgate result, 0 for success, otherwise error code
*/
#define KFD_EVENT_FMT_UPDATE_GPU_RESET(reset_seq_num)\
"%x\n", (reset_seq_num)
@@ -622,9 +623,9 @@ struct kfd_ioctl_smi_events_args {
"%lld -%d @%lx(%lx) %x->%x %x:%x %d\n", (ns), (pid), (start), (size),\
(from), (to), (prefetch_loc), (preferred_loc), (migrate_trigger)
-#define KFD_EVENT_FMT_MIGRATE_END(ns, pid, start, size, from, to, migrate_trigger)\
- "%lld -%d @%lx(%lx) %x->%x %d\n", (ns), (pid), (start), (size),\
- (from), (to), (migrate_trigger)
+#define KFD_EVENT_FMT_MIGRATE_END(ns, pid, start, size, from, to, migrate_trigger, result)\
+ "%lld -%d @%lx(%lx) %x->%x %d %d\n", (ns), (pid), (start), (size),\
+ (from), (to), (migrate_trigger), (result)
#define KFD_EVENT_FMT_QUEUE_EVICTION(ns, pid, node, evict_trigger)\
"%lld -%d %x %d\n", (ns), (pid), (node), (evict_trigger)
--
2.35.1
More information about the amd-gfx
mailing list