[PATCH 04/12] accel/habanalabs: align to latest firmware specs

Ofir Bitton obitton at habana.ai
Wed May 17 18:03:18 UTC 2023


On 16/05/2023 12:30, Oded Gabbay wrote:
> Update the firmware common interface files with the latest version.
>
> Signed-off-by: Oded Gabbay <ogabbay at kernel.org>
> ---
>   .../habanalabs/include/common/cpucp_if.h      | 18 ++++----
>   .../habanalabs/include/common/hl_boot_if.h    | 41 ++++---------------
>   2 files changed, 16 insertions(+), 43 deletions(-)
>
> diff --git a/drivers/accel/habanalabs/include/common/cpucp_if.h b/drivers/accel/habanalabs/include/common/cpucp_if.h
> index f68308cc2524..33807b839c37 100644
> --- a/drivers/accel/habanalabs/include/common/cpucp_if.h
> +++ b/drivers/accel/habanalabs/include/common/cpucp_if.h
> @@ -359,7 +359,7 @@ struct hl_eq_entry {
>   	union {
>   		__le64 data_placeholder;
>   		struct hl_eq_ecc_data ecc_data;
> -		struct hl_eq_hbm_ecc_data hbm_ecc_data;	/* Gaudi1 HBM */
> +		struct hl_eq_hbm_ecc_data hbm_ecc_data;	/* Obsolete */
>   		struct hl_eq_sm_sei_data sm_sei_data;
>   		struct cpucp_pkt_sync_err pkt_sync_err;
>   		struct hl_eq_fw_alive fw_alive;
> @@ -653,7 +653,7 @@ enum pq_init_status {
>    *       which address is passed via the CpuCp packet. In addition, the host's driver
>    *       passes the max size it allows the CpuCP to write to the structure, to prevent
>    *       data corruption in case of mismatched driver/FW versions.
> - *       Relevant only to Gaudi.
> + *       Obsolete.
>    *
>    * CPUCP_PACKET_GENERIC_PASSTHROUGH -
>    *      Generic opcode for all firmware info that is only passed to host
> @@ -868,19 +868,19 @@ struct cpucp_array_data_packet {
>   enum cpucp_led_index {
>   	CPUCP_LED0_INDEX = 0,
>   	CPUCP_LED1_INDEX,
> -	CPUCP_LED2_INDEX
> +	CPUCP_LED2_INDEX,
> +	CPUCP_LED_MAX_INDEX = CPUCP_LED2_INDEX
>   };
>   
>   /*
>    * enum cpucp_packet_rc - Error return code
>    * @cpucp_packet_success	-> in case of success.
> - * @cpucp_packet_invalid	-> this is to support Goya and Gaudi platform.
> + * @cpucp_packet_invalid	-> this is to support first generation platforms.
>    * @cpucp_packet_fault		-> in case of processing error like failing to
>    *                                 get device binding or semaphore etc.
> - * @cpucp_packet_invalid_pkt	-> when cpucp packet is un-supported. This is
> - *                                 supported Greco onwards.
> + * @cpucp_packet_invalid_pkt	-> when cpucp packet is un-supported.
>    * @cpucp_packet_invalid_params	-> when checking parameter like length of buffer
> - *				   or attribute value etc. Supported Greco onwards.
> + *				   or attribute value etc.
>    * @cpucp_packet_rc_max		-> It indicates size of enum so should be at last.
>    */
>   enum cpucp_packet_rc {
> @@ -1365,7 +1365,7 @@ struct cpucp_dev_info_signed {
>   #define DCORE_MON_REGS_SZ	512
>   /*
>    * struct dcore_monitor_regs_data - DCORE monitor regs data.
> - * the structure follows sync manager block layout. relevant only to Gaudi.
> + * the structure follows sync manager block layout. Obsolete.
>    * @mon_pay_addrl: array of payload address low bits.
>    * @mon_pay_addrh: array of payload address high bits.
>    * @mon_pay_data: array of payload data.
> @@ -1380,7 +1380,7 @@ struct dcore_monitor_regs_data {
>   	__le32 mon_status[DCORE_MON_REGS_SZ];
>   };
>   
> -/* contains SM data for each SYNC_MNGR (relevant only to Gaudi) */
> +/* contains SM data for each SYNC_MNGR (Obsolete) */
>   struct cpucp_monitor_dump {
>   	struct dcore_monitor_regs_data sync_mngr_w_s;
>   	struct dcore_monitor_regs_data sync_mngr_e_s;
> diff --git a/drivers/accel/habanalabs/include/common/hl_boot_if.h b/drivers/accel/habanalabs/include/common/hl_boot_if.h
> index c58d76a2705c..cff79f7f9f75 100644
> --- a/drivers/accel/habanalabs/include/common/hl_boot_if.h
> +++ b/drivers/accel/habanalabs/include/common/hl_boot_if.h
> @@ -35,6 +35,7 @@ enum cpu_boot_err {
>   	CPU_BOOT_ERR_TPM_FAIL = 20,
>   	CPU_BOOT_ERR_TMP_THRESH_INIT_FAIL = 21,
>   	CPU_BOOT_ERR_EEPROM_FAIL = 22,
> +	CPU_BOOT_ERR_ENG_ARC_MEM_SCRUB_FAIL = 23,
>   	CPU_BOOT_ERR_ENABLED = 31,
>   	CPU_BOOT_ERR_SCND_EN = 63,
>   	CPU_BOOT_ERR_LAST = 64 /* we have 2 registers of 32 bits */
> @@ -51,6 +52,7 @@ enum cpu_boot_err {
>   		 (1 << CPU_BOOT_ERR_DEVICE_UNUSABLE_FAIL) |	\
>   		 (1 << CPU_BOOT_ERR_BINNING_FAIL) |		\
>   		 (1 << CPU_BOOT_ERR_DRAM_SKIPPED) |		\
> +		 (1 << CPU_BOOT_ERR_ENG_ARC_MEM_SCRUB_FAIL) |	\
>   		 (1 << CPU_BOOT_ERR_EEPROM_FAIL))
>   
>   /*
> @@ -132,6 +134,9 @@ enum cpu_boot_err {
>    * CPU_BOOT_ERR_EEPROM_FAIL		Failed reading EEPROM data. Defaults
>    *					are used.
>    *
> + * CPU_BOOT_ERR_ENG_ARC_MEM_SCRUB_FAIL	Failed scrubbing the Engines/ARCFarm
> + *					memories. Boot disabled until reset.
> + *
>    * CPU_BOOT_ERR0_ENABLED		Error registers enabled.
>    *					This is a main indication that the
>    *					running FW populates the error
> @@ -157,6 +162,7 @@ enum cpu_boot_err {
>   #define CPU_BOOT_ERR0_TPM_FAIL			(1 << CPU_BOOT_ERR_TPM_FAIL)
>   #define CPU_BOOT_ERR0_TMP_THRESH_INIT_FAIL	(1 << CPU_BOOT_ERR_TMP_THRESH_INIT_FAIL)
>   #define CPU_BOOT_ERR0_EEPROM_FAIL		(1 << CPU_BOOT_ERR_EEPROM_FAIL)
> +#define CPU_BOOT_ERR0_ENG_ARC_MEM_SCRUB_FAIL	(1 << CPU_BOOT_ERR_ENG_ARC_MEM_SCRUB_FAIL)
>   #define CPU_BOOT_ERR0_ENABLED			(1 << CPU_BOOT_ERR_ENABLED)
>   #define CPU_BOOT_ERR1_ENABLED			(1 << CPU_BOOT_ERR_ENABLED)
>   
> @@ -744,36 +750,6 @@ struct comms_status {
>   	};
>   };
>   
> -/**
> - * HL_MODULES_MAX_NUM is determined by the size of modules_mask in struct
> - *      hl_component_versions
> - */
> -enum hl_modules {
> -	HL_MODULES_BOOT_INFO = 0,
> -	HL_MODULES_EEPROM,
> -	HL_MODULES_FDT,
> -	HL_MODULES_I2C,
> -	HL_MODULES_LZ4,
> -	HL_MODULES_MBEDTLS,
> -	HL_MODULES_MAX_NUM = 16
> -};
> -
> -/**
> - * HL_COMPONENTS_MAX_NUM is determined by the size of components_mask in
> - *      struct cpucp_versions
> - */
> -enum hl_components {
> -	HL_COMPONENTS_PID = 0,
> -	HL_COMPONENTS_MGMT,
> -	HL_COMPONENTS_PREBOOT,
> -	HL_COMPONENTS_PPBOOT,
> -	HL_COMPONENTS_ARMCP,
> -	HL_COMPONENTS_CPLD,
> -	HL_COMPONENTS_UBOOT,
> -	HL_COMPONENTS_FUSE,
> -	HL_COMPONENTS_MAX_NUM = 16
> -};
> -
>   #define NAME_MAX_LEN	32 /* bytes */
>   struct hl_module_data {
>   	__u8 name[NAME_MAX_LEN];
> @@ -787,8 +763,6 @@ struct hl_module_data {
>    * @component: version of the component itself.
>    * @fw_os: Firmware OS Version.
>    * @comp_name: Name of the component.
> - * @modules_mask: i'th bit (from LSB) is a flag - on if module i in enum
> - *              hl_modules is used.
>    * @modules_counter: number of set bits in modules_mask.
>    * @reserved: reserved for future use.
>    * @modules: versions of the component's modules. Elborated explanation in
> @@ -800,9 +774,8 @@ struct hl_component_versions {
>   	__u8 component[VERSION_MAX_LEN];
>   	__u8 fw_os[VERSION_MAX_LEN];
>   	__u8 comp_name[NAME_MAX_LEN];
> -	__le16 modules_mask;
>   	__u8 modules_counter;
> -	__u8 reserved[1];
> +	__u8 reserved[3];
>   	struct hl_module_data modules[];
>   };
>   

Reviewed-by: Ofir Bitton <obitton at habana.ai>



More information about the dri-devel mailing list