[PATCH 6/9] drm/amdkfd: Add bad opcode exception handling

Oded Gabbay oded.gabbay at amd.com
Tue Apr 21 05:47:23 PDT 2015


From: Alexey Skidanov <Alexey.Skidanov at amd.com>

Signed-off-by: Alexey Skidanov <Alexey.Skidanov at amd.com>
Signed-off-by: Oded Gabbay <oded.gabbay at amd.com>
---
 drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c |  5 ++++-
 drivers/gpu/drm/amd/amdkfd/cik_int.h             |  1 +
 drivers/gpu/drm/amd/amdkfd/kfd_events.c          | 21 +++++++++++++++++++++
 drivers/gpu/drm/amd/amdkfd/kfd_events.h          |  1 +
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h            |  2 ++
 5 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
index c466dc4..5ddb217 100644
--- a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
+++ b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
@@ -33,7 +33,8 @@ static bool cik_event_interrupt_isr(struct kfd_dev *dev,
 	/* Do not process in ISR, just request it to be forwarded to WQ. */
 	return (ihre->pasid != 0) &&
 		(ihre->source_id == CIK_INTSRC_CP_END_OF_PIPE ||
-		ihre->source_id == CIK_INTSRC_SQ_INTERRUPT_MSG);
+		ihre->source_id == CIK_INTSRC_SQ_INTERRUPT_MSG ||
+		ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE);
 }
 
 static void cik_event_interrupt_wq(struct kfd_dev *dev,
@@ -49,6 +50,8 @@ static void cik_event_interrupt_wq(struct kfd_dev *dev,
 		kfd_signal_event_interrupt(ihre->pasid, 0, 0);
 	else if (ihre->source_id == CIK_INTSRC_SQ_INTERRUPT_MSG)
 		kfd_signal_event_interrupt(ihre->pasid, ihre->data & 0xFF, 8);
+	else if (ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE)
+		kfd_signal_hw_exception_event(ihre->pasid);
 }
 
 const struct kfd_event_interrupt_class event_interrupt_class_cik = {
diff --git a/drivers/gpu/drm/amd/amdkfd/cik_int.h b/drivers/gpu/drm/amd/amdkfd/cik_int.h
index af19b3a..122b8f6 100644
--- a/drivers/gpu/drm/amd/amdkfd/cik_int.h
+++ b/drivers/gpu/drm/amd/amdkfd/cik_int.h
@@ -48,6 +48,7 @@ struct cik_ih_ring_entry {
 
 #define CIK_INTSRC_DEQUEUE_COMPLETE	0xC6
 #define CIK_INTSRC_CP_END_OF_PIPE	0xB5
+#define CIK_INTSRC_CP_BAD_OPCODE	0xB7
 #define CIK_INTSRC_SQ_INTERRUPT_MSG	0xEF
 
 #endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index ac60fb2..9eed7bb 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -911,3 +911,24 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid,
 	mutex_unlock(&p->event_mutex);
 	mutex_unlock(&p->mutex);
 }
+
+void kfd_signal_hw_exception_event(unsigned int pasid)
+{
+	/*
+	 * Because we are called from arbitrary context (workqueue) as opposed
+	 * to process context, kfd_process could attempt to exit while we are
+	 * running so the lookup function returns a locked process.
+	 */
+	struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
+
+	if (!p)
+		return; /* Presumably process exited. */
+
+	mutex_lock(&p->event_mutex);
+
+	/* Lookup events by type and signal them */
+	lookup_events_by_type_and_signal(p, KFD_EVENT_TYPE_HW_EXCEPTION, NULL);
+
+	mutex_unlock(&p->event_mutex);
+	mutex_unlock(&p->mutex);
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.h b/drivers/gpu/drm/amd/amdkfd/kfd_events.h
index 691cf85..28f6838 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.h
@@ -74,6 +74,7 @@ struct kfd_event {
 
 /* Matching HSA_EVENTTYPE */
 #define KFD_EVENT_TYPE_SIGNAL 0
+#define KFD_EVENT_TYPE_HW_EXCEPTION 3
 #define KFD_EVENT_TYPE_DEBUG 5
 #define KFD_EVENT_TYPE_MEMORY 8
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 8ecc033..fe76b79 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -682,6 +682,7 @@ phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev,
 
 /* Events */
 extern const struct kfd_event_interrupt_class event_interrupt_class_cik;
+extern const struct kfd_device_global_init_class device_global_init_class_cik;
 
 enum kfd_event_wait_result {
 	KFD_WAIT_COMPLETE,
@@ -702,6 +703,7 @@ void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id,
 void kfd_signal_iommu_event(struct kfd_dev *dev,
 		unsigned int pasid, unsigned long address,
 		bool is_write_requested, bool is_execute_requested);
+void kfd_signal_hw_exception_event(unsigned int pasid);
 int kfd_set_event(struct kfd_process *p, uint32_t event_id);
 int kfd_reset_event(struct kfd_process *p, uint32_t event_id);
 int kfd_event_create(struct file *devkfd, struct kfd_process *p,
-- 
1.9.1



More information about the dri-devel mailing list