[PATCH 07/12] accel/habanalabs: stop fetching MME SBTE error cause
Oded Gabbay
ogabbay at kernel.org
Thu Jun 8 13:38:44 UTC 2023
From: Ofir Bitton <obitton at habana.ai>
Because in this case we have only a single possible cause, we can
safely stop fetching the cause from firmware.
Signed-off-by: Ofir Bitton <obitton at habana.ai>
Reviewed-by: Oded Gabbay <ogabbay at kernel.org>
Signed-off-by: Oded Gabbay <ogabbay at kernel.org>
---
drivers/accel/habanalabs/gaudi2/gaudi2.c | 31 ++++++------------------
1 file changed, 8 insertions(+), 23 deletions(-)
diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2.c b/drivers/accel/habanalabs/gaudi2/gaudi2.c
index ed3b0b6225d2..899b1c4b53f6 100644
--- a/drivers/accel/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/accel/habanalabs/gaudi2/gaudi2.c
@@ -66,7 +66,6 @@
#define GAUDI2_NUM_OF_TPC_INTR_CAUSE 31
#define GAUDI2_NUM_OF_DEC_ERR_CAUSE 25
#define GAUDI2_NUM_OF_MME_ERR_CAUSE 16
-#define GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE 5
#define GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE 7
#define GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE 8
#define GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE 19
@@ -916,14 +915,6 @@ static const char * const guadi2_mme_error_cause[GAUDI2_NUM_OF_MME_ERR_CAUSE] =
"sbte_prtn_intr_4",
};
-static const char * const guadi2_mme_sbte_error_cause[GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE] = {
- "i0",
- "i1",
- "i2",
- "i3",
- "i4",
-};
-
static const char * const guadi2_mme_wap_error_cause[GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE] = {
"WBC ERR RESP_0",
"WBC ERR RESP_1",
@@ -8781,21 +8772,16 @@ static int gaudi2_handle_mme_err(struct hl_device *hdev, u8 mme_index, u16 event
return error_count;
}
-static int gaudi2_handle_mme_sbte_err(struct hl_device *hdev, u16 event_type,
- u64 intr_cause_data)
+static int gaudi2_handle_mme_sbte_err(struct hl_device *hdev, u16 event_type)
{
- int i, error_count = 0;
-
- for (i = 0 ; i < GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE ; i++)
- if (intr_cause_data & BIT(i)) {
- gaudi2_print_event(hdev, event_type, true,
- "err cause: %s", guadi2_mme_sbte_error_cause[i]);
- error_count++;
- }
-
+ /*
+ * We have a single error cause here but the report mechanism is
+ * buggy. Hence there is no good reason to fetch the cause so we
+ * just check for glbl_errors and exit.
+ */
hl_check_for_glbl_errors(hdev);
- return error_count;
+ return GAUDI2_NA_EVENT_CAUSE;
}
static int gaudi2_handle_mme_wap_err(struct hl_device *hdev, u8 mme_index, u16 event_type,
@@ -9856,8 +9842,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
case GAUDI2_EVENT_MME1_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME1_SBTE4_AXI_ERR_RSP:
case GAUDI2_EVENT_MME2_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME2_SBTE4_AXI_ERR_RSP:
case GAUDI2_EVENT_MME3_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME3_SBTE4_AXI_ERR_RSP:
- error_count = gaudi2_handle_mme_sbte_err(hdev, event_type,
- le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
+ error_count = gaudi2_handle_mme_sbte_err(hdev, event_type);
event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
break;
case GAUDI2_EVENT_VM0_ALARM_A ... GAUDI2_EVENT_VM3_ALARM_B:
--
2.40.1
More information about the dri-devel
mailing list