[PATCH] drm/amdgpu: Unify Device Aperture in amdgpu_info_ioctl for KGD/KFD

Tue Jun 24 13:27:59 UTC 2025

On Tue, Jun 24, 2025 at 4:26 AM Christian König
<christian.koenig at amd.com> wrote:
>
>
>
> On 23.06.25 13:17, Srinivasan Shanmugam wrote:
> > This commit refines the amdgpu_info_ioctl function to unify
> > the reporting of device apertures for both KGD and KFD
> > subsystems.
> >
> > Cc: Christian König <christian.koenig at amd.com>
> > Cc: Alex Deucher <alexander.deucher at amd.com>
> > Signed-off-by: Srinivasan Shanmugam <srinivasan.shanmugam at amd.com>
> > ---
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c      | 23 ++++++++++++++++
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h       | 29 ++++++++++++++++++++
> >  drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c | 29 --------------------
> >  include/uapi/drm/amdgpu_drm.h                |  6 ++++
> >  4 files changed, 58 insertions(+), 29 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> > index d2ce7d86dbc8..6ca399a92814 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> > @@ -46,6 +46,7 @@
> >  #include "amdgpu_reset.h"
> >  #include "amd_pcie.h"
> >  #include "amdgpu_userq.h"
> > +#include "amdgpu_vm.h"
> >
> >  void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev)
> >  {
> > @@ -1011,6 +1012,28 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
> >
> >               dev_info->userq_ip_mask = amdgpu_userq_get_supported_ip_mask(adev);
> >
> > +             /* Retrieve Device Apertures */
> > +             if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 0, 0)) {
> > +                     dev_info->lds_base = MAKE_LDS_APP_BASE_V9();
> > +                     dev_info->scratch_base = MAKE_SCRATCH_APP_BASE_V9();
> > +
> > +                     dev_info->lds_limit = MAKE_LDS_APP_LIMIT(dev_info->lds_base);
> > +                     dev_info->scratch_limit = MAKE_SCRATCH_APP_LIMIT(dev_info->scratch_base);
> > +             } else {
> > +                     dev_info->lds_base = MAKE_LDS_APP_BASE_VI();
> > +                     dev_info->scratch_base = MAKE_SCRATCH_APP_BASE_VI();
> > +
> > +                     dev_info->lds_limit = MAKE_LDS_APP_LIMIT(dev_info->lds_base);
> > +                     dev_info->scratch_limit = MAKE_SCRATCH_APP_LIMIT(dev_info->scratch_base);
> > +             }
> > +
> > +             dev_dbg(adev->dev, "Node ID: %u\n", adev->dev->id);
> > +             dev_dbg(adev->dev, "GPU ID: %u\n", dev_info->device_id);
> > +             dev_dbg(adev->dev, "LDS Base: %llX\n", dev_info->lds_base);
> > +             dev_dbg(adev->dev, "LDS Limit: %llX\n", dev_info->lds_limit);
> > +             dev_dbg(adev->dev, "Scratch Base: %llX\n", dev_info->scratch_base);
> > +             dev_dbg(adev->dev, "Scratch Limit: %llX\n", dev_info->scratch_limit);
> > +
> >               ret = copy_to_user(out, dev_info,
> >                                  min((size_t)size, sizeof(*dev_info))) ? -EFAULT : 0;
> >               kfree(dev_info);
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> > index f3ad687125ad..6ee09df0d345 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> > @@ -184,6 +184,35 @@ struct amdgpu_bo_vm;
> >  #define AMDGPU_VM_USE_CPU_FOR_GFX (1 << 0)
> >  #define AMDGPU_VM_USE_CPU_FOR_COMPUTE (1 << 1)
> >
> > +#define MAKE_GPUVM_APP_BASE_VI(gpu_num) \
> > +     (((uint64_t)(gpu_num) << 61) + 0x1000000000000L)
>
> AMDGPU_VM_* prefix for those defines please.
>
> > +
> > +#define MAKE_GPUVM_APP_LIMIT(base, size) \
> > +     (((uint64_t)(base) & 0xFFFFFF0000000000UL) + (size) - 1)
> > +
> > +#define MAKE_SCRATCH_APP_BASE_VI() \
> > +     (((uint64_t)(0x1UL) << 61) + 0x100000000L)
> > +
> > +#define MAKE_SCRATCH_APP_LIMIT(base) \
> > +     (((uint64_t)(base) & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF)
> > +
> > +#define MAKE_LDS_APP_BASE_VI() \
> > +     (((uint64_t)(0x1UL) << 61) + 0x0)
> > +#define MAKE_LDS_APP_LIMIT(base) \
> > +     (((uint64_t)(base) & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF)
> > +
> > +/* On GFXv9 the LDS and scratch apertures are programmed independently
> > + * using the high 16 bits of the 64-bit virtual address. They must be
> > + * in the hole, which will be the case as long as the high 16 bits are
> > + * not 0.
>
> That is not 100% correct. The high bits should also not be 0xffff because then we get into trouble with sign extension.
>
> > + *
> > + * The aperture sizes are still 4GB implicitly.
> > + *
> > + * A GPUVM aperture is not applicable on GFXv9.
>
> That's also not correct. We have one APU case where even GFX9 has a limited GPUVM aperture.
>
> > + */
> > +#define MAKE_LDS_APP_BASE_V9() ((uint64_t)(0x1UL) << 48)
> > +#define MAKE_SCRATCH_APP_BASE_V9() ((uint64_t)(0x2UL) << 48)
>
> But I don't see where those defines are actually used.
>
> > +
> >  /* VMPT level enumerate, and the hiberachy is:
> >   * PDB2->PDB1->PDB0->PTB
> >   */
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
> > index dbcb60eb54b2..fdcbb400f618 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
> > @@ -277,35 +277,6 @@
> >   * for FLAT_* / S_LOAD operations.
> >   */
> >
> > -#define MAKE_GPUVM_APP_BASE_VI(gpu_num) \
> > -     (((uint64_t)(gpu_num) << 61) + 0x1000000000000L)
> > -
> > -#define MAKE_GPUVM_APP_LIMIT(base, size) \
> > -     (((uint64_t)(base) & 0xFFFFFF0000000000UL) + (size) - 1)
> > -
> > -#define MAKE_SCRATCH_APP_BASE_VI() \
> > -     (((uint64_t)(0x1UL) << 61) + 0x100000000L)
> > -
> > -#define MAKE_SCRATCH_APP_LIMIT(base) \
> > -     (((uint64_t)base & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF)
> > -
> > -#define MAKE_LDS_APP_BASE_VI() \
> > -     (((uint64_t)(0x1UL) << 61) + 0x0)
> > -#define MAKE_LDS_APP_LIMIT(base) \
> > -     (((uint64_t)(base) & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF)
> > -
> > -/* On GFXv9 the LDS and scratch apertures are programmed independently
> > - * using the high 16 bits of the 64-bit virtual address. They must be
> > - * in the hole, which will be the case as long as the high 16 bits are
> > - * not 0.
> > - *
> > - * The aperture sizes are still 4GB implicitly.
> > - *
> > - * A GPUVM aperture is not applicable on GFXv9.
> > - */
> > -#define MAKE_LDS_APP_BASE_V9() ((uint64_t)(0x1UL) << 48)
> > -#define MAKE_SCRATCH_APP_BASE_V9() ((uint64_t)(0x2UL) << 48)
> > -
> >  /* User mode manages most of the SVM aperture address space. The low
> >   * 16MB are reserved for kernel use (CWSR trap handler and kernel IB
> >   * for now).
> > diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> > index 45c4fa13499c..a52683ae7115 100644
> > --- a/include/uapi/drm/amdgpu_drm.h
> > +++ b/include/uapi/drm/amdgpu_drm.h
> > @@ -1477,6 +1477,12 @@ struct drm_amdgpu_info_device {
> >       /* Userq IP mask (1 << AMDGPU_HW_IP_*) */
> >       __u32 userq_ip_mask;
> >       __u32 pad;
> > +
> > +     /* Additional fields for memory aperture information */
> > +     __u64 lds_base;          /* LDS base */
> > +     __u64 lds_limit;         /* LDS limit */
> > +     __u64 scratch_base;      /* Scratch base */
> > +     __u64 scratch_limit;     /* Scratch limit */
>
> It might be better to have a separate IOCTL instead of extending the amdgpu_info_device structure.
>
> But let us discuss that with Alex and eventually Felix on our weekly call.

ROCr team said the rest of the stuff in that query is useful to them
so it makes sense to include the aperture info as well to reduce the
number of IOCTLs needed.

Alex

>
> Regards,
> Christian.
>
>
>
> >  };
> >
> >  struct drm_amdgpu_info_hw_ip {
>