[PATCH 6/9] accel/habanalabs: dump the EQ entries headers on EQ heartbeat failure
Ofir Bitton
obitton at habana.ai
Wed Jun 19 06:34:22 UTC 2024
From: Tomer Tayar <ttayar at habana.ai>
Add a dump of the EQ entries headers upon a EQ heartbeat failure.
Signed-off-by: Tomer Tayar <ttayar at habana.ai>
Reviewed-by: Ofir Bitton <obitton at habana.ai>
---
drivers/accel/habanalabs/common/device.c | 2 ++
drivers/accel/habanalabs/common/habanalabs.h | 1 +
drivers/accel/habanalabs/common/irq.c | 25 ++++++++++++++++++++
include/linux/habanalabs/cpucp_if.h | 3 +++
4 files changed, 31 insertions(+)
diff --git a/drivers/accel/habanalabs/common/device.c b/drivers/accel/habanalabs/common/device.c
index 3efc26dd9497..7bd7c2eb5dd2 100644
--- a/drivers/accel/habanalabs/common/device.c
+++ b/drivers/accel/habanalabs/common/device.c
@@ -1082,6 +1082,8 @@ static bool hl_device_eq_heartbeat_received(struct hl_device *hdev)
atomic_read(&hdev->kernel_queues[cpu_q_id].ci),
atomic_read(&hdev->kernel_queues[cpu_q_id].ci) & pq_pi_mask);
+ hl_eq_dump(hdev, &hdev->event_queue);
+
return false;
}
diff --git a/drivers/accel/habanalabs/common/habanalabs.h b/drivers/accel/habanalabs/common/habanalabs.h
index f4ac5e9b1974..ce78b331e244 100644
--- a/drivers/accel/habanalabs/common/habanalabs.h
+++ b/drivers/accel/habanalabs/common/habanalabs.h
@@ -3754,6 +3754,7 @@ int hl_eq_init(struct hl_device *hdev, struct hl_eq *q);
void hl_eq_fini(struct hl_device *hdev, struct hl_eq *q);
void hl_cq_reset(struct hl_device *hdev, struct hl_cq *q);
void hl_eq_reset(struct hl_device *hdev, struct hl_eq *q);
+void hl_eq_dump(struct hl_device *hdev, struct hl_eq *q);
irqreturn_t hl_irq_handler_cq(int irq, void *arg);
irqreturn_t hl_irq_handler_eq(int irq, void *arg);
irqreturn_t hl_irq_handler_dec_abnrm(int irq, void *arg);
diff --git a/drivers/accel/habanalabs/common/irq.c b/drivers/accel/habanalabs/common/irq.c
index 2caf2df4de08..7c9f2f6a2870 100644
--- a/drivers/accel/habanalabs/common/irq.c
+++ b/drivers/accel/habanalabs/common/irq.c
@@ -697,3 +697,28 @@ void hl_eq_reset(struct hl_device *hdev, struct hl_eq *q)
memset(q->kernel_address, 0, q->size);
}
+
+void hl_eq_dump(struct hl_device *hdev, struct hl_eq *q)
+{
+ u32 eq_length, eqe_size, ctl, ready, mode, type, index;
+ struct hl_eq_header *hdr;
+ u8 *ptr;
+ int i;
+
+ eq_length = HL_EQ_LENGTH;
+ eqe_size = q->size / HL_EQ_LENGTH;
+
+ dev_info(hdev->dev, "Contents of EQ entries headers:\n");
+
+ for (i = 0, ptr = q->kernel_address ; i < eq_length ; ++i, ptr += eqe_size) {
+ hdr = (struct hl_eq_header *) ptr;
+ ctl = le32_to_cpu(hdr->ctl);
+ ready = FIELD_GET(EQ_CTL_READY_MASK, ctl);
+ mode = FIELD_GET(EQ_CTL_EVENT_MODE_MASK, ctl);
+ type = FIELD_GET(EQ_CTL_EVENT_TYPE_MASK, ctl);
+ index = FIELD_GET(EQ_CTL_INDEX_MASK, ctl);
+
+ dev_info(hdev->dev, "%02u: %#010x [ready: %u, mode %u, type %04u, index %05u]\n",
+ i, ctl, ready, mode, type, index);
+ }
+}
diff --git a/include/linux/habanalabs/cpucp_if.h b/include/linux/habanalabs/cpucp_if.h
index 1ed17887f1a8..7ed3fdd55dda 100644
--- a/include/linux/habanalabs/cpucp_if.h
+++ b/include/linux/habanalabs/cpucp_if.h
@@ -397,6 +397,9 @@ struct hl_eq_entry {
#define EQ_CTL_READY_SHIFT 31
#define EQ_CTL_READY_MASK 0x80000000
+#define EQ_CTL_EVENT_MODE_SHIFT 28
+#define EQ_CTL_EVENT_MODE_MASK 0x70000000
+
#define EQ_CTL_EVENT_TYPE_SHIFT 16
#define EQ_CTL_EVENT_TYPE_MASK 0x0FFF0000
--
2.34.1
More information about the dri-devel
mailing list