[PATCH] drm/amdgpu/discovery: fix fw based ip discovery

Deucher, Alexander Alexander.Deucher at amd.com
Wed Aug 6 15:20:52 UTC 2025


[AMD Official Use Only - AMD Internal Distribution Only]

> -----Original Message-----
> From: Lazar, Lijo <Lijo.Lazar at amd.com>
> Sent: Wednesday, August 6, 2025 11:17 AM
> To: Deucher, Alexander <Alexander.Deucher at amd.com>; amd-
> gfx at lists.freedesktop.org
> Cc: stable at vger.kernel.org
> Subject: Re: [PATCH] drm/amdgpu/discovery: fix fw based ip discovery
>
>
>
> On 7/30/2025 9:29 PM, Alex Deucher wrote:
> > We only need the fw based discovery table for sysfs.  No need to parse
> > it.  Additionally parsing some of the board specific tables may result
> > in incorrect data on some boards.
> > just load the binary and don't parse it on those boards.
> >
> > Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4441
> > Fixes: 80a0e8282933 ("drm/amdgpu/discovery: optionally use fw based ip
> > discovery")
> > Cc: stable at vger.kernel.org
> > Signed-off-by: Alex Deucher <alexander.deucher at amd.com>
>
> One generic question - if discovery content is completely ignored by driver, how
> external tool using sysfs could consume the data? Wouldn't there be a mismatch in
> the config?

The IP register offsets are what tools use sysfs for.  It's possible some of the other data is invalid (e.g., the harvest tables) because they are not coming from IFWI in this case.

Alex


>
> Thanks,
> Lijo
>
> > ---
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c    |  5 +-
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 72
> > ++++++++++---------
> >  2 files changed, 41 insertions(+), 36 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > index efe98ffb679a4..b2538cff222ce 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > @@ -2570,9 +2570,6 @@ static int
> > amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
> >
> >     adev->firmware.gpu_info_fw = NULL;
> >
> > -   if (adev->mman.discovery_bin)
> > -           return 0;
> > -
> >     switch (adev->asic_type) {
> >     default:
> >             return 0;
> > @@ -2594,6 +2591,8 @@ static int amdgpu_device_parse_gpu_info_fw(struct
> amdgpu_device *adev)
> >             chip_name = "arcturus";
> >             break;
> >     case CHIP_NAVI12:
> > +           if (adev->mman.discovery_bin)
> > +                   return 0;
> >             chip_name = "navi12";
> >             break;
> >     }
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
> > index 81b3443c8d7f4..27bd7659961e8 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
> > @@ -2555,40 +2555,11 @@ int amdgpu_discovery_set_ip_blocks(struct
> > amdgpu_device *adev)
> >
> >     switch (adev->asic_type) {
> >     case CHIP_VEGA10:
> > -   case CHIP_VEGA12:
> > -   case CHIP_RAVEN:
> > -   case CHIP_VEGA20:
> > -   case CHIP_ARCTURUS:
> > -   case CHIP_ALDEBARAN:
> > -           /* this is not fatal.  We have a fallback below
> > -            * if the new firmwares are not present. some of
> > -            * this will be overridden below to keep things
> > -            * consistent with the current behavior.
> > +           /* This is not fatal.  We only need the discovery
> > +            * binary for sysfs.  We don't need it for a
> > +            * functional system.
> >              */
> > -           r = amdgpu_discovery_reg_base_init(adev);
> > -           if (!r) {
> > -                   amdgpu_discovery_harvest_ip(adev);
> > -                   amdgpu_discovery_get_gfx_info(adev);
> > -                   amdgpu_discovery_get_mall_info(adev);
> > -                   amdgpu_discovery_get_vcn_info(adev);
> > -           }
> > -           break;
> > -   default:
> > -           r = amdgpu_discovery_reg_base_init(adev);
> > -           if (r) {
> > -                   drm_err(&adev->ddev, "discovery failed: %d\n", r);
> > -                   return r;
> > -           }
> > -
> > -           amdgpu_discovery_harvest_ip(adev);
> > -           amdgpu_discovery_get_gfx_info(adev);
> > -           amdgpu_discovery_get_mall_info(adev);
> > -           amdgpu_discovery_get_vcn_info(adev);
> > -           break;
> > -   }
> > -
> > -   switch (adev->asic_type) {
> > -   case CHIP_VEGA10:
> > +           amdgpu_discovery_init(adev);
> >             vega10_reg_base_init(adev);
> >             adev->sdma.num_instances = 2;
> >             adev->gmc.num_umc = 4;
> > @@ -2611,6 +2582,11 @@ int amdgpu_discovery_set_ip_blocks(struct
> amdgpu_device *adev)
> >             adev->ip_versions[DCI_HWIP][0] = IP_VERSION(12, 0, 0);
> >             break;
> >     case CHIP_VEGA12:
> > +           /* This is not fatal.  We only need the discovery
> > +            * binary for sysfs.  We don't need it for a
> > +            * functional system.
> > +            */
> > +           amdgpu_discovery_init(adev);
> >             vega10_reg_base_init(adev);
> >             adev->sdma.num_instances = 2;
> >             adev->gmc.num_umc = 4;
> > @@ -2633,6 +2609,11 @@ int amdgpu_discovery_set_ip_blocks(struct
> amdgpu_device *adev)
> >             adev->ip_versions[DCI_HWIP][0] = IP_VERSION(12, 0, 1);
> >             break;
> >     case CHIP_RAVEN:
> > +           /* This is not fatal.  We only need the discovery
> > +            * binary for sysfs.  We don't need it for a
> > +            * functional system.
> > +            */
> > +           amdgpu_discovery_init(adev);
> >             vega10_reg_base_init(adev);
> >             adev->sdma.num_instances = 1;
> >             adev->vcn.num_vcn_inst = 1;
> > @@ -2674,6 +2655,11 @@ int amdgpu_discovery_set_ip_blocks(struct
> amdgpu_device *adev)
> >             }
> >             break;
> >     case CHIP_VEGA20:
> > +           /* This is not fatal.  We only need the discovery
> > +            * binary for sysfs.  We don't need it for a
> > +            * functional system.
> > +            */
> > +           amdgpu_discovery_init(adev);
> >             vega20_reg_base_init(adev);
> >             adev->sdma.num_instances = 2;
> >             adev->gmc.num_umc = 8;
> > @@ -2697,6 +2683,11 @@ int amdgpu_discovery_set_ip_blocks(struct
> amdgpu_device *adev)
> >             adev->ip_versions[DCI_HWIP][0] = IP_VERSION(12, 1, 0);
> >             break;
> >     case CHIP_ARCTURUS:
> > +           /* This is not fatal.  We only need the discovery
> > +            * binary for sysfs.  We don't need it for a
> > +            * functional system.
> > +            */
> > +           amdgpu_discovery_init(adev);
> >             arct_reg_base_init(adev);
> >             adev->sdma.num_instances = 8;
> >             adev->vcn.num_vcn_inst = 2;
> > @@ -2725,6 +2716,11 @@ int amdgpu_discovery_set_ip_blocks(struct
> amdgpu_device *adev)
> >             adev->ip_versions[UVD_HWIP][1] = IP_VERSION(2, 5, 0);
> >             break;
> >     case CHIP_ALDEBARAN:
> > +           /* This is not fatal.  We only need the discovery
> > +            * binary for sysfs.  We don't need it for a
> > +            * functional system.
> > +            */
> > +           amdgpu_discovery_init(adev);
> >             aldebaran_reg_base_init(adev);
> >             adev->sdma.num_instances = 5;
> >             adev->vcn.num_vcn_inst = 2;
> > @@ -2751,6 +2747,16 @@ int amdgpu_discovery_set_ip_blocks(struct
> amdgpu_device *adev)
> >             adev->ip_versions[XGMI_HWIP][0] = IP_VERSION(6, 1, 0);
> >             break;
> >     default:
> > +           r = amdgpu_discovery_reg_base_init(adev);
> > +           if (r) {
> > +                   drm_err(&adev->ddev, "discovery failed: %d\n", r);
> > +                   return r;
> > +           }
> > +
> > +           amdgpu_discovery_harvest_ip(adev);
> > +           amdgpu_discovery_get_gfx_info(adev);
> > +           amdgpu_discovery_get_mall_info(adev);
> > +           amdgpu_discovery_get_vcn_info(adev);
> >             break;
> >     }
> >



More information about the amd-gfx mailing list