[PATCH v2 01/10] drm/amdgpu: Add init levels
Bhardwaj, Rajneesh
rajneesh.bhardwaj at amd.com
Thu Sep 12 02:09:30 UTC 2024
The series is
Acked-and-tested-by: Rajneesh Bhardwaj <rajneesh.bhardwaj at amd.com>
On 9/11/2024 2:58 AM, Lijo Lazar wrote:
> Add init levels to define the level to which device needs to be
> initialized.
>
> Signed-off-by: Lijo Lazar <lijo.lazar at amd.com>
> ---
>
> v2:
> Add comments describing init levels
> Drop unnecessary assignment
> Rename AMDGPU_INIT_LEVEL_MINIMAL to AMDGPU_INIT_LEVEL_MINIMAL_XGMI
>
> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 22 ++++++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 66 ++++++++++++++++++++++
> 2 files changed, 88 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 6e6580ab7e04..d8299383af11 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -820,6 +820,24 @@ struct amdgpu_mqd {
> struct amdgpu_mqd_prop *p);
> };
>
> +/*
> + * Custom Init levels could be defined for different situations where a full
> + * initialization of all hardware blocks are not expected. Sample cases are
> + * custom init sequences after resume after S0i3/S3, reset on initialization,
> + * partial reset of blocks etc. Presently, this defines only two levels. Levels
> + * are described in corresponding struct definitions - amdgpu_init_default,
> + * amdgpu_init_minimal_xgmi.
> + */
> +enum amdgpu_init_lvl_id {
> + AMDGPU_INIT_LEVEL_DEFAULT,
> + AMDGPU_INIT_LEVEL_MINIMAL_XGMI,
> +};
> +
> +struct amdgpu_init_level {
> + enum amdgpu_init_lvl_id level;
> + uint32_t hwini_ip_block_mask;
> +};
> +
> #define AMDGPU_RESET_MAGIC_NUM 64
> #define AMDGPU_MAX_DF_PERFMONS 4
> struct amdgpu_reset_domain;
> @@ -1169,6 +1187,8 @@ struct amdgpu_device {
> bool enforce_isolation[MAX_XCP];
> /* Added this mutex for cleaner shader isolation between GFX and compute processes */
> struct mutex enforce_isolation_mutex;
> +
> + struct amdgpu_init_level *init_lvl;
> };
>
> static inline uint32_t amdgpu_ip_version(const struct amdgpu_device *adev,
> @@ -1623,4 +1643,6 @@ extern const struct attribute_group amdgpu_vram_mgr_attr_group;
> extern const struct attribute_group amdgpu_gtt_mgr_attr_group;
> extern const struct attribute_group amdgpu_flash_attr_group;
>
> +void amdgpu_set_init_level(struct amdgpu_device *adev,
> + enum amdgpu_init_lvl_id lvl);
> #endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 61a189e30bcd..2ecc70f220d2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -144,6 +144,50 @@ const char *amdgpu_asic_name[] = {
> "LAST",
> };
>
> +#define AMDGPU_IP_BLK_MASK_ALL GENMASK(AMDGPU_MAX_IP_NUM - 1, 0)
> +/*
> + * Default init level where all blocks are expected to be initialized. This is
> + * the level of initialization expected by default and also after a full reset
> + * of the device.
> + */
> +struct amdgpu_init_level amdgpu_init_default = {
> + .level = AMDGPU_INIT_LEVEL_DEFAULT,
> + .hwini_ip_block_mask = AMDGPU_IP_BLK_MASK_ALL,
> +};
> +
> +/*
> + * Minimal blocks needed to be initialized before a XGMI hive can be reset. This
> + * is used for cases like reset on initialization where the entire hive needs to
> + * be reset before first use.
> + */
> +struct amdgpu_init_level amdgpu_init_minimal_xgmi = {
> + .level = AMDGPU_INIT_LEVEL_MINIMAL_XGMI,
> + .hwini_ip_block_mask =
> + BIT(AMD_IP_BLOCK_TYPE_GMC) | BIT(AMD_IP_BLOCK_TYPE_SMC) |
> + BIT(AMD_IP_BLOCK_TYPE_COMMON) | BIT(AMD_IP_BLOCK_TYPE_IH)
> +};
> +
> +static inline bool amdgpu_ip_member_of_hwini(struct amdgpu_device *adev,
> + enum amd_ip_block_type block)
> +{
> + return (adev->init_lvl->hwini_ip_block_mask & (1U << block)) != 0;
> +}
> +
> +void amdgpu_set_init_level(struct amdgpu_device *adev,
> + enum amdgpu_init_lvl_id lvl)
> +{
> + switch (lvl) {
> + case AMDGPU_INIT_LEVEL_MINIMAL_XGMI:
> + adev->init_lvl = &amdgpu_init_minimal_xgmi;
> + break;
> + case AMDGPU_INIT_LEVEL_DEFAULT:
> + fallthrough;
> + default:
> + adev->init_lvl = &amdgpu_init_default;
> + break;
> + }
> +}
> +
> static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev);
>
> /**
> @@ -2633,6 +2677,9 @@ static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
> continue;
> if (adev->ip_blocks[i].status.hw)
> continue;
> + if (!amdgpu_ip_member_of_hwini(
> + adev, adev->ip_blocks[i].version->type))
> + continue;
> if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
> (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
> adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
> @@ -2658,6 +2705,9 @@ static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
> continue;
> if (adev->ip_blocks[i].status.hw)
> continue;
> + if (!amdgpu_ip_member_of_hwini(
> + adev, adev->ip_blocks[i].version->type))
> + continue;
> r = adev->ip_blocks[i].version->funcs->hw_init(adev);
> if (r) {
> DRM_ERROR("hw_init of IP block <%s> failed %d\n",
> @@ -2681,6 +2731,10 @@ static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
> if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
> continue;
>
> + if (!amdgpu_ip_member_of_hwini(adev,
> + AMD_IP_BLOCK_TYPE_PSP))
> + break;
> +
> if (!adev->ip_blocks[i].status.sw)
> continue;
>
> @@ -2803,6 +2857,10 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
> }
> adev->ip_blocks[i].status.sw = true;
>
> + if (!amdgpu_ip_member_of_hwini(
> + adev, adev->ip_blocks[i].version->type))
> + continue;
> +
> if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
> /* need to do common hw init early so everything is set up for gmc */
> r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
> @@ -4196,6 +4254,12 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>
> amdgpu_device_set_mcbp(adev);
>
> + /*
> + * By default, use default mode where all blocks are expected to be
> + * initialized. At present a 'swinit' of blocks is required to be
> + * completed before the need for a different level is detected.
> + */
> + amdgpu_set_init_level(adev, AMDGPU_INIT_LEVEL_DEFAULT);
> /* early init functions */
> r = amdgpu_device_ip_early_init(adev);
> if (r)
> @@ -5473,6 +5537,8 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle,
> }
>
> list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
> + /* After reset, it's default init level */
> + amdgpu_set_init_level(tmp_adev, AMDGPU_INIT_LEVEL_DEFAULT);
> if (need_full_reset) {
> /* post card */
> amdgpu_ras_set_fed(tmp_adev, false);
More information about the amd-gfx
mailing list