[PATCH 01/10] accel/habanalabs: align to latest firmware specs
Ofir Bitton
obitton at habana.ai
Mon Mar 20 15:27:50 UTC 2023
On 16/03/2023 13:36, Oded Gabbay wrote:
> Copy the most up-to-date interface files to the firmware.
>
> Signed-off-by: Oded Gabbay <ogabbay at kernel.org>
> ---
> drivers/accel/habanalabs/gaudi2/gaudi2.c | 2 +-
> .../habanalabs/include/common/cpucp_if.h | 51 ++++++++++++++++++-
> .../habanalabs/include/common/hl_boot_if.h | 47 +++++------------
> .../include/gaudi2/gaudi2_async_events.h | 4 +-
> .../habanalabs/include/gaudi2/gaudi2_fw_if.h | 5 +-
> 5 files changed, 69 insertions(+), 40 deletions(-)
>
> diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2.c b/drivers/accel/habanalabs/gaudi2/gaudi2.c
> index 8943dc9872da..21cf7180fe9f 100644
> --- a/drivers/accel/habanalabs/gaudi2/gaudi2.c
> +++ b/drivers/accel/habanalabs/gaudi2/gaudi2.c
> @@ -9784,7 +9784,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
> break;
>
> case GAUDI2_EVENT_CPU_FP32_NOT_SUPPORTED:
> - case GAUDI2_EVENT_DEV_RESET_REQ:
> + case GAUDI2_EVENT_CPU_DEV_RESET_REQ:
> event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
> error_count = GAUDI2_NA_EVENT_CAUSE;
> is_critical = true;
> diff --git a/drivers/accel/habanalabs/include/common/cpucp_if.h b/drivers/accel/habanalabs/include/common/cpucp_if.h
> index d713252a4f13..bb65b9e2b424 100644
> --- a/drivers/accel/habanalabs/include/common/cpucp_if.h
> +++ b/drivers/accel/habanalabs/include/common/cpucp_if.h
> @@ -33,6 +33,10 @@
> #define PLL_MAP_MAX_BITS 128
> #define PLL_MAP_LEN (PLL_MAP_MAX_BITS / 8)
>
> +enum eq_event_id {
> + EQ_EVENT_NIC_STS_REQUEST = 0,
> +};
> +
> /*
> * info of the pkt queue pointers in the first async occurrence
> */
> @@ -354,9 +358,48 @@ struct hl_eq_addr_dec_intr_data {
> __u8 pad[7];
> };
>
> +enum hl_mme_acc_err_type {
> + MME_ACC_WBC_ERR_RESP_LEGACY,
> + MME_ACC_WBC_ERR_RESP_SET0_CH0,
> + MME_ACC_WBC_ERR_RESP_SET0_CH1,
> + MME_ACC_WBC_ERR_RESP_SET1_CH0,
> + MME_ACC_WBC_ERR_RESP_SET1_CH1,
> + MME_ACC_WBC_BUSER_NUMERICAL_INF_ERR_SET0_CH0,
> + MME_ACC_WBC_BUSER_NUMERICAL_INF_ERR_SET0_CH1,
> + MME_ACC_WBC_BUSER_NUMERICAL_NINF_ERR_SET0_CH0,
> + MME_ACC_WBC_BUSER_NUMERICAL_NINF_ERR_SET0_CH1,
> + MME_ACC_WBC_BUSER_NUMERICAL_NAN_ERR_SET0_CH0,
> + MME_ACC_WBC_BUSER_NUMERICAL_NAN_ERR_SET0_CH1,
> + MME_ACC_WBC_BUSER_RR_DBG_ERR_SET0_CH0,
> + MME_ACC_WBC_BUSER_RR_DBG_ERR_SET0_CH1,
> + MME_ACC_WBC_BUSER_NUMERICAL_INF_ERR_SET1_CH0,
> + MME_ACC_WBC_BUSER_NUMERICAL_INF_ERR_SET1_CH1,
> + MME_ACC_WBC_BUSER_NUMERICAL_NINF_ERR_SET1_CH0,
> + MME_ACC_WBC_BUSER_NUMERICAL_NINF_ERR_SET1_CH1,
> + MME_ACC_WBC_BUSER_NUMERICAL_NAN_ERR_SET1_CH0,
> + MME_ACC_WBC_BUSER_NUMERICAL_NAN_ERR_SET1_CH1,
> + MME_ACC_WBC_BUSER_RR_DBG_ERR_SET1_CH0,
> + MME_ACC_WBC_BUSER_RR_DBG_ERR_SET1_CH1,
> + MME_ACC_AP_STS_SRC_DNRM,
> + MME_ACC_AP_STS_SRC_INF,
> + MME_ACC_AP_STS_SRC_NINF,
> + MME_ACC_AP_STS_SRC_NAN,
> + MME_ACC_AP_STS_RES_INF,
> + MME_ACC_AP_STS_RES_NINF,
> + MME_ACC_AP_STS_RES_NAN
> +};
> +
> +struct hl_eq_mme_acc_data {
> + __u8 mme_id;
> + __u8 err_type; /* enum hl_mme_acc_err_type */
> + __le16 ctx_id;
> + __u8 pad[4];
> +};
> +
> struct hl_eq_entry {
> struct hl_eq_header hdr;
> union {
> + __le64 data_placeholder;
> struct hl_eq_ecc_data ecc_data;
> struct hl_eq_hbm_ecc_data hbm_ecc_data; /* Gaudi1 HBM */
> struct hl_eq_sm_sei_data sm_sei_data;
> @@ -661,6 +704,9 @@ enum pq_init_status {
> * CPUCP_PACKET_ACTIVE_STATUS_SET -
> * LKD sends FW indication whether device is free or in use, this indication is reported
> * also to the BMC.
> + *
> + * CPUCP_PACKET_REGISTER_INTERRUPTS -
> + * Packet to register interrupts indicating LKD is ready to receive events from FW.
> */
>
> enum cpucp_packet_id {
> @@ -725,6 +771,8 @@ enum cpucp_packet_id {
> CPUCP_PACKET_RESERVED9, /* not used */
> CPUCP_PACKET_RESERVED10, /* not used */
> CPUCP_PACKET_RESERVED11, /* not used */
> + CPUCP_PACKET_RESERVED12, /* internal */
> + CPUCP_PACKET_REGISTER_INTERRUPTS, /* internal */
> CPUCP_PACKET_ID_MAX /* must be last */
> };
>
> @@ -1127,6 +1175,7 @@ struct cpucp_security_info {
> * (0 = functional 1 = binned)
> * @interposer_version: Interposer version programmed in eFuse
> * @substrate_version: Substrate version programmed in eFuse
> + * @fw_hbm_region_size: Size in bytes of FW reserved region in HBM.
> * @fw_os_version: Firmware OS Version
> */
> struct cpucp_info {
> @@ -1154,7 +1203,7 @@ struct cpucp_info {
> __u8 substrate_version;
> __u8 reserved2;
> struct cpucp_security_info sec_info;
> - __le32 reserved3;
> + __le32 fw_hbm_region_size;
> __u8 pll_map[PLL_MAP_LEN];
> __le64 mme_binning_mask;
> __u8 fw_os_version[VERSION_MAX_LEN];
> diff --git a/drivers/accel/habanalabs/include/common/hl_boot_if.h b/drivers/accel/habanalabs/include/common/hl_boot_if.h
> index 2256add057c5..c58d76a2705c 100644
> --- a/drivers/accel/habanalabs/include/common/hl_boot_if.h
> +++ b/drivers/accel/habanalabs/include/common/hl_boot_if.h
> @@ -770,15 +770,23 @@ enum hl_components {
> HL_COMPONENTS_ARMCP,
> HL_COMPONENTS_CPLD,
> HL_COMPONENTS_UBOOT,
> + HL_COMPONENTS_FUSE,
> HL_COMPONENTS_MAX_NUM = 16
> };
>
> +#define NAME_MAX_LEN 32 /* bytes */
> +struct hl_module_data {
> + __u8 name[NAME_MAX_LEN];
> + __u8 version[VERSION_MAX_LEN];
> +};
> +
> /**
> * struct hl_component_versions - versions associated with hl component.
> * @struct_size: size of all the struct (including dynamic size of modules).
> * @modules_offset: offset of the modules field in this struct.
> * @component: version of the component itself.
> * @fw_os: Firmware OS Version.
> + * @comp_name: Name of the component.
> * @modules_mask: i'th bit (from LSB) is a flag - on if module i in enum
> * hl_modules is used.
> * @modules_counter: number of set bits in modules_mask.
> @@ -791,45 +799,14 @@ struct hl_component_versions {
> __le16 modules_offset;
> __u8 component[VERSION_MAX_LEN];
> __u8 fw_os[VERSION_MAX_LEN];
> + __u8 comp_name[NAME_MAX_LEN];
> __le16 modules_mask;
> __u8 modules_counter;
> __u8 reserved[1];
> - __u8 modules[][VERSION_MAX_LEN];
> -};
> -
> -/**
> - * struct hl_fw_versions - all versions (fuse, cpucp's components with their
> - * modules)
> - * @struct_size: size of all the struct (including dynamic size of components).
> - * @components_offset: offset of the components field in this struct.
> - * @fuse: silicon production FUSE information.
> - * @components_mask: i'th bit (from LSB) is a flag - on if component i in enum
> - * hl_components is used.
> - * @components_counter: number of set bits in components_mask.
> - * @reserved: reserved for future use.
> - * @components: versions of hl components. Index i corresponds to the i'th bit
> - * that is *on* in components_mask. For example, if
> - * components_mask=0b101, then *components represents arcpid and
> - * *(hl_component_versions*)((char*)components + 1') represents
> - * preboot, where 1' = components[0].struct_size.
> - */
> -struct hl_fw_versions {
> - __le16 struct_size;
> - __le16 components_offset;
> - __u8 fuse[VERSION_MAX_LEN];
> - __le16 components_mask;
> - __u8 components_counter;
> - __u8 reserved[1];
> - struct hl_component_versions components[];
> + struct hl_module_data modules[];
> };
>
> -/* Max size of struct hl_component_versions */
> -#define HL_COMPONENT_VERSIONS_MAX_SIZE \
> - (sizeof(struct hl_component_versions) + HL_MODULES_MAX_NUM * \
> - VERSION_MAX_LEN)
> -
> -/* Max size of struct hl_fw_versions */
> -#define HL_FW_VERSIONS_MAX_SIZE (sizeof(struct hl_fw_versions) + \
> - HL_COMPONENTS_MAX_NUM * HL_COMPONENT_VERSIONS_MAX_SIZE)
> +/* Max size of fit size */
> +#define HL_FW_VERSIONS_FIT_SIZE 4096
>
> #endif /* HL_BOOT_IF_H */
> diff --git a/drivers/accel/habanalabs/include/gaudi2/gaudi2_async_events.h b/drivers/accel/habanalabs/include/gaudi2/gaudi2_async_events.h
> index 50852cc80373..f661068d0c5f 100644
> --- a/drivers/accel/habanalabs/include/gaudi2/gaudi2_async_events.h
> +++ b/drivers/accel/habanalabs/include/gaudi2/gaudi2_async_events.h
> @@ -1,6 +1,6 @@
> /* SPDX-License-Identifier: GPL-2.0
> *
> - * Copyright 2018-2021 HabanaLabs, Ltd.
> + * Copyright 2018-2022 HabanaLabs, Ltd.
> * All Rights Reserved.
> *
> */
> @@ -958,7 +958,7 @@ enum gaudi2_async_event_id {
> GAUDI2_EVENT_CPU11_STATUS_NIC11_ENG1 = 1318,
> GAUDI2_EVENT_ARC_DCCM_FULL = 1319,
> GAUDI2_EVENT_CPU_FP32_NOT_SUPPORTED = 1320,
> - GAUDI2_EVENT_DEV_RESET_REQ = 1321,
> + GAUDI2_EVENT_CPU_DEV_RESET_REQ = 1321,
> GAUDI2_EVENT_SIZE,
> };
>
> diff --git a/drivers/accel/habanalabs/include/gaudi2/gaudi2_fw_if.h b/drivers/accel/habanalabs/include/gaudi2/gaudi2_fw_if.h
> index 82f3ca2a3966..8522f24deac0 100644
> --- a/drivers/accel/habanalabs/include/gaudi2/gaudi2_fw_if.h
> +++ b/drivers/accel/habanalabs/include/gaudi2/gaudi2_fw_if.h
> @@ -63,7 +63,10 @@ struct gaudi2_cold_rst_data {
> u32 fake_sig_validation_en : 1;
> u32 bist_skip_enable : 1;
> u32 bist_need_iatu_config : 1;
> - u32 reserved : 24;
> + u32 fake_bis_compliant : 1;
> + u32 wd_rst_cause_arm : 1;
> + u32 wd_rst_cause_arcpid : 1;
> + u32 reserved : 21;
> };
> __le32 data;
> };
Reviewed-by: Ofir Bitton <obitton at habana.ai>
More information about the dri-devel
mailing list