[PATCH 25/29] drm/amdkfd: add debug query event operation
Jonathan Kim
jonathan.kim at amd.com
Mon Aug 29 14:30:22 UTC 2022
Allow the debugger to a single query queue, device and process exception
in a FIFO manner.
The KFD should also return the GPU or Queue id of the exception.
The debugger also has the option of clearing exceptions after
being queried.
Signed-off-by: Jonathan Kim <jonathan.kim at amd.com>
---
drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 6 +++
drivers/gpu/drm/amd/amdkfd/kfd_debug.c | 64 ++++++++++++++++++++++++
drivers/gpu/drm/amd/amdkfd/kfd_debug.h | 5 ++
3 files changed, 75 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index bec52bbba330..042cb0f6426b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -2915,6 +2915,12 @@ static int kfd_ioctl_set_debug_trap(struct file *filep, struct kfd_process *p, v
r = kfd_dbg_trap_set_flags(target, &args->set_flags.flags);
break;
case KFD_IOC_DBG_TRAP_QUERY_DEBUG_EVENT:
+ r = kfd_dbg_ev_query_debug_event(target,
+ &args->query_debug_event.queue_id,
+ &args->query_debug_event.gpu_id,
+ args->query_debug_event.exception_mask,
+ &args->query_debug_event.exception_mask);
+ break;
case KFD_IOC_DBG_TRAP_QUERY_EXCEPTION_INFO:
case KFD_IOC_DBG_TRAP_GET_QUEUE_SNAPSHOT:
case KFD_IOC_DBG_TRAP_GET_DEVICE_SNAPSHOT:
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
index 0219032b9ce1..3b15159421cc 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
@@ -33,6 +33,70 @@
#define MAX_WATCH_ADDRESSES 4
static DEFINE_SPINLOCK(watch_points_lock);
+int kfd_dbg_ev_query_debug_event(struct kfd_process *process,
+ unsigned int *queue_id,
+ unsigned int *gpu_id,
+ uint64_t exception_clear_mask,
+ uint64_t *event_status)
+{
+ struct process_queue_manager *pqm;
+ struct process_queue_node *pqn;
+ int i;
+
+ if (!(process && process->debug_trap_enabled))
+ return -ENODATA;
+
+ mutex_lock(&process->event_mutex);
+ *event_status = 0;
+ *queue_id = 0;
+ *gpu_id = 0;
+
+ /* find and report queue events */
+ pqm = &process->pqm;
+ list_for_each_entry(pqn, &pqm->queues, process_queue_list) {
+ uint64_t tmp = process->exception_enable_mask;
+
+ if (!pqn->q)
+ continue;
+
+ tmp &= pqn->q->properties.exception_status;
+
+ if (!tmp)
+ continue;
+
+ *event_status = pqn->q->properties.exception_status;
+ *queue_id = pqn->q->properties.queue_id;
+ *gpu_id = pqn->q->device->id;
+ pqn->q->properties.exception_status &= ~exception_clear_mask;
+ goto out;
+ }
+
+ /* find and report device events */
+ for (i = 0; i < process->n_pdds; i++) {
+ struct kfd_process_device *pdd = process->pdds[i];
+ uint64_t tmp = process->exception_enable_mask
+ & pdd->exception_status;
+
+ if (!tmp)
+ continue;
+
+ *event_status = pdd->exception_status;
+ *gpu_id = pdd->dev->id;
+ pdd->exception_status &= ~exception_clear_mask;
+ goto out;
+ }
+
+ /* report process events */
+ if (process->exception_enable_mask & process->exception_status) {
+ *event_status = process->exception_status;
+ process->exception_status &= ~exception_clear_mask;
+ }
+
+out:
+ mutex_unlock(&process->event_mutex);
+ return *event_status ? 0 : -EAGAIN;
+}
+
void debug_event_write_work_handler(struct work_struct *work)
{
struct kfd_process *process;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
index 12b80b6c96d0..c64ffd3efc46 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
@@ -27,6 +27,11 @@
void kfd_dbg_trap_deactivate(struct kfd_process *target, bool unwind, int unwind_count);
int kfd_dbg_trap_activate(struct kfd_process *target);
+int kfd_dbg_ev_query_debug_event(struct kfd_process *process,
+ unsigned int *queue_id,
+ unsigned int *gpu_id,
+ uint64_t exception_clear_mask,
+ uint64_t *event_status);
bool kfd_set_dbg_ev_from_interrupt(struct kfd_dev *dev,
unsigned int pasid,
uint32_t doorbell_id,
--
2.25.1
More information about the amd-gfx
mailing list