[PATCH 01/10] drm/amdgpu: Add init levels

Alex Deucher alexdeucher at gmail.com
Mon Sep 9 21:02:32 UTC 2024


On Mon, Sep 9, 2024 at 4:57 PM Alex Deucher <alexdeucher at gmail.com> wrote:
>
> On Mon, Sep 2, 2024 at 3:34 AM Lijo Lazar <lijo.lazar at amd.com> wrote:
> >
> > Add init levels to define the level to which device needs to be
> > initialized.
> >
> > Signed-off-by: Lijo Lazar <lijo.lazar at amd.com>
> > ---
> >  drivers/gpu/drm/amd/amdgpu/amdgpu.h        | 14 ++++++
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 54 ++++++++++++++++++++++
> >  2 files changed, 68 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > index 6e6580ab7e04..fefdace22894 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > @@ -820,6 +820,16 @@ struct amdgpu_mqd {
> >                         struct amdgpu_mqd_prop *p);
> >  };
> >
> > +enum amdgpu_init_lvl_id {
> > +       AMDGPU_INIT_LEVEL_DEFAULT,
> > +       AMDGPU_INIT_LEVEL_MINIMAL,
>
> Add some comments here to define what they mean?  E.g.,
>
> +       AMDGPU_INIT_LEVEL_MINIMAL, /* minimum needed for reset at load time */

Or maybe:

AMDGPU_INIT_LEVEL_MINIMAL_RESET_ON_INIT

to make it explicit?

>
> > +};
> > +
> > +struct amdgpu_init_level {
> > +       enum amdgpu_init_lvl_id level;
> > +       uint32_t hwini_ip_block_mask;
> > +};
> > +
> >  #define AMDGPU_RESET_MAGIC_NUM 64
> >  #define AMDGPU_MAX_DF_PERFMONS 4
> >  struct amdgpu_reset_domain;
> > @@ -1169,6 +1179,8 @@ struct amdgpu_device {
> >         bool                            enforce_isolation[MAX_XCP];
> >         /* Added this mutex for cleaner shader isolation between GFX and compute processes */
> >         struct mutex                    enforce_isolation_mutex;
> > +
> > +       struct amdgpu_init_level *init_lvl;
> >  };
> >
> >  static inline uint32_t amdgpu_ip_version(const struct amdgpu_device *adev,
> > @@ -1623,4 +1635,6 @@ extern const struct attribute_group amdgpu_vram_mgr_attr_group;
> >  extern const struct attribute_group amdgpu_gtt_mgr_attr_group;
> >  extern const struct attribute_group amdgpu_flash_attr_group;
> >
> > +void amdgpu_set_init_level(struct amdgpu_device *adev,
> > +                          enum amdgpu_init_lvl_id lvl);
> >  #endif
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > index 61a189e30bcd..4fb09c4fbf22 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > @@ -144,6 +144,42 @@ const char *amdgpu_asic_name[] = {
> >         "LAST",
> >  };
> >
> > +#define AMDGPU_IP_BLK_MASK_ALL GENMASK(AMDGPU_MAX_IP_NUM - 1, 0)
> > +
> > +struct amdgpu_init_level amdgpu_init_default = {
> > +       .level = AMDGPU_INIT_LEVEL_DEFAULT,
> > +       .hwini_ip_block_mask = AMDGPU_IP_BLK_MASK_ALL,
> > +};
> > +
> > +struct amdgpu_init_level amdgpu_init_minimal = {
> > +       .level = AMDGPU_INIT_LEVEL_MINIMAL,
> > +       .hwini_ip_block_mask =
> > +               BIT(AMD_IP_BLOCK_TYPE_GMC) | BIT(AMD_IP_BLOCK_TYPE_SMC) |
> > +               BIT(AMD_IP_BLOCK_TYPE_COMMON) | BIT(AMD_IP_BLOCK_TYPE_IH)
> > +};
> > +
> > +static inline bool amdgpu_ip_member_of_hwini(struct amdgpu_device *adev,
> > +                                            enum amd_ip_block_type block)
> > +{
> > +       return (adev->init_lvl->hwini_ip_block_mask & (1U << block)) != 0;
> > +}
> > +
> > +void amdgpu_set_init_level(struct amdgpu_device *adev,
> > +                          enum amdgpu_init_lvl_id lvl)
> > +{
> > +       switch (lvl) {
> > +       case AMDGPU_INIT_LEVEL_DEFAULT:
>
> Can move the default case here.
>
> > +               adev->init_lvl = &amdgpu_init_default;
> > +               break;
> > +       case AMDGPU_INIT_LEVEL_MINIMAL:
> > +               adev->init_lvl = &amdgpu_init_minimal;
> > +               break;
> > +       default:
> > +               adev->init_lvl = &amdgpu_init_default;
> > +               break;
> > +       }
> > +}
> > +
> >  static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev);
> >
> >  /**
> > @@ -2633,6 +2669,9 @@ static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
> >                         continue;
> >                 if (adev->ip_blocks[i].status.hw)
> >                         continue;
> > +               if (!amdgpu_ip_member_of_hwini(
> > +                           adev, adev->ip_blocks[i].version->type))
> > +                       continue;
> >                 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
> >                     (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
> >                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
> > @@ -2658,6 +2697,9 @@ static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
> >                         continue;
> >                 if (adev->ip_blocks[i].status.hw)
> >                         continue;
> > +               if (!amdgpu_ip_member_of_hwini(
> > +                           adev, adev->ip_blocks[i].version->type))
> > +                       continue;
> >                 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
> >                 if (r) {
> >                         DRM_ERROR("hw_init of IP block <%s> failed %d\n",
> > @@ -2681,6 +2723,10 @@ static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
> >                         if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
> >                                 continue;
> >
> > +                       if (!amdgpu_ip_member_of_hwini(adev,
> > +                                                      AMD_IP_BLOCK_TYPE_PSP))
> > +                               break;
> > +
> >                         if (!adev->ip_blocks[i].status.sw)
> >                                 continue;
> >
> > @@ -2803,6 +2849,10 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
> >                 }
> >                 adev->ip_blocks[i].status.sw = true;
> >
> > +               if (!amdgpu_ip_member_of_hwini(
> > +                           adev, adev->ip_blocks[i].version->type))
> > +                       continue;
> > +
> >                 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
> >                         /* need to do common hw init early so everything is set up for gmc */
> >                         r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
> > @@ -4196,6 +4246,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
> >
> >         amdgpu_device_set_mcbp(adev);
> >
>
> Add a comment here to make it clear where and why we change the init level.
>
> > +       amdgpu_set_init_level(adev, AMDGPU_INIT_LEVEL_DEFAULT);
> > +       adev->init_lvl = &amdgpu_init_default;
>
> This pointer assignment can be dropped.
>
> >         /* early init functions */
> >         r = amdgpu_device_ip_early_init(adev);
> >         if (r)
> > @@ -5473,6 +5525,8 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle,
> >         }
> >
> >         list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
> > +               /* After reset, it's default init level */
> > +               amdgpu_set_init_level(tmp_adev, AMDGPU_INIT_LEVEL_DEFAULT);
> >                 if (need_full_reset) {
> >                         /* post card */
> >                         amdgpu_ras_set_fed(tmp_adev, false);
> > --
> > 2.25.1
> >


More information about the amd-gfx mailing list