[PATCH v2] drm/amdgpu: Unify Device Aperture in amdgpu_info_ioctl for KGD/KFD

Tue Jul 1 14:13:03 UTC 2025

On Tue, Jul 1, 2025 at 10:11 AM Christian König
<christian.koenig at amd.com> wrote:
>
> On 01.07.25 16:07, Alex Deucher wrote:
> > On Tue, Jul 1, 2025 at 2:53 AM Srinivasan Shanmugam
> > <srinivasan.shanmugam at amd.com> wrote:
> >>
> >> This commit refines the amdgpu_info_ioctl function to unify
> >> the reporting of device apertures for both KGD and KFD
> >> subsystems.
> >>
> >> v2:
> >> - Use amdgpu_ip_version() instead of directly accessing
> >>   adev->ip_versions. (Alex)
> >> - Added AMDGPU_VM_ prefix to scratch and LDS base and limit macros.
> >>   (Christian)
> >> - Clarified in comments that the top 16 bits of the 64-bit address must
> >>   not be 0x0000 or 0xffff to avoid sign extension problems. (Christian)
> >>
> >> Cc: David Yat Sin <David.YatSin at amd.com>
> >> Cc: Christian König <christian.koenig at amd.com>
> >> Cc: Alex Deucher <alexander.deucher at amd.com>
> >> Signed-off-by: Srinivasan Shanmugam <srinivasan.shanmugam at amd.com>
> >
> > Reviewed-by: Alex Deucher <alexander.deucher at amd.com>
>
> Reviewed-by: Christian König <christian.koenig at amd.com> as well.
>
> > But don't commit this yet as we are still waiting on the userspace
> > side to be complete.
>
> Mhm, how do we want to do this with the ROCm?
>
> I mean for Mesa we usually wait for the merge request these days.
>
> For ROCm we just wait for it to end up in thunk or libdrm or what?

Yeah, we usually post a topic branch on github with support for the new feature.

Alex

>
> Regards,
> Christian.
>
> >
> > Alex
> >
> >> ---
> >>  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c      | 25 +++++++++++
> >>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h       | 23 ++++++++++
> >>  drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c | 45 ++++----------------
> >>  include/uapi/drm/amdgpu_drm.h                |  6 +++
> >>  4 files changed, 62 insertions(+), 37 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> >> index 4aab5e394ce2..76d902342271 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> >> @@ -46,6 +46,7 @@
> >>  #include "amdgpu_reset.h"
> >>  #include "amd_pcie.h"
> >>  #include "amdgpu_userq.h"
> >> +#include "amdgpu_vm.h"
> >>
> >>  void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev)
> >>  {
> >> @@ -1031,6 +1032,30 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
> >>
> >>                 dev_info->userq_ip_mask = amdgpu_userq_get_supported_ip_mask(adev);
> >>
> >> +               /* Retrieve Device Apertures */
> >> +               if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 0, 0)) {
> >> +                       dev_info->lds_base = AMDGPU_VM_MAKE_LDS_APP_BASE_V9();
> >> +                       dev_info->scratch_base = AMDGPU_VM_MAKE_SCRATCH_APP_BASE_V9();
> >> +
> >> +                       dev_info->lds_limit = AMDGPU_VM_MAKE_LDS_APP_LIMIT(dev_info->lds_base);
> >> +                       dev_info->scratch_limit =
> >> +                               AMDGPU_VM_MAKE_SCRATCH_APP_LIMIT(dev_info->scratch_base);
> >> +               } else {
> >> +                       dev_info->lds_base = AMDGPU_VM_MAKE_LDS_APP_BASE_VI();
> >> +                       dev_info->scratch_base = AMDGPU_VM_MAKE_SCRATCH_APP_BASE_VI();
> >> +
> >> +                       dev_info->lds_limit = AMDGPU_VM_MAKE_LDS_APP_LIMIT(dev_info->lds_base);
> >> +                       dev_info->scratch_limit =
> >> +                               AMDGPU_VM_MAKE_SCRATCH_APP_LIMIT(dev_info->scratch_base);
> >> +               }
> >> +
> >> +               dev_dbg(adev->dev, "Node ID: %u\n", adev->dev->id);
> >> +               dev_dbg(adev->dev, "GPU ID: %u\n", dev_info->device_id);
> >> +               dev_dbg(adev->dev, "LDS Base: %llX\n", dev_info->lds_base);
> >> +               dev_dbg(adev->dev, "LDS Limit: %llX\n", dev_info->lds_limit);
> >> +               dev_dbg(adev->dev, "Scratch Base: %llX\n", dev_info->scratch_base);
> >> +               dev_dbg(adev->dev, "Scratch Limit: %llX\n", dev_info->scratch_limit);
> >> +
> >>                 ret = copy_to_user(out, dev_info,
> >>                                    min((size_t)size, sizeof(*dev_info))) ? -EFAULT : 0;
> >>                 kfree(dev_info);
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> >> index f3ad687125ad..fd8f6da30096 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> >> @@ -184,6 +184,29 @@ struct amdgpu_bo_vm;
> >>  #define AMDGPU_VM_USE_CPU_FOR_GFX (1 << 0)
> >>  #define AMDGPU_VM_USE_CPU_FOR_COMPUTE (1 << 1)
> >>
> >> +#define AMDGPU_VM_MAKE_SCRATCH_APP_BASE_VI() \
> >> +       (((uint64_t)(0x1UL) << 61) + 0x100000000L)
> >> +#define AMDGPU_VM_MAKE_SCRATCH_APP_LIMIT(base) \
> >> +       (((uint64_t)(base) & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF)
> >> +
> >> +#define AMDGPU_VM_MAKE_LDS_APP_BASE_VI() \
> >> +       (((uint64_t)(0x1UL) << 61) + 0x0)
> >> +#define AMDGPU_VM_MAKE_LDS_APP_LIMIT(base) \
> >> +       (((uint64_t)(base) & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF)
> >> +
> >> +/* On GFXv9 the LDS and scratch apertures are programmed independently
> >> + * using the high 16 bits of the 64-bit virtual address. They must be
> >> + * in the hole, which will be the case as long as the high 16 bits are
> >> + * neither 0x0000 nor 0xffff to avoid sign extension issues.
> >> + *
> >> + * The aperture sizes are still 4GB implicitly.
> >> + *
> >> + * Note: While GPUVM apertures are generally not applicable on GFXv9,
> >> + * there is at least one APU case where GFX9 has a limited GPUVM aperture.
> >> + */
> >> +#define AMDGPU_VM_MAKE_LDS_APP_BASE_V9() ((uint64_t)(0x1UL) << 48)
> >> +#define AMDGPU_VM_MAKE_SCRATCH_APP_BASE_V9() ((uint64_t)(0x2UL) << 48)
> >> +
> >>  /* VMPT level enumerate, and the hiberachy is:
> >>   * PDB2->PDB1->PDB0->PTB
> >>   */
> >> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
> >> index 1d170dc50df3..291b068aaf8a 100644
> >> --- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
> >> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
> >> @@ -276,35 +276,6 @@
> >>   * for FLAT_* / S_LOAD operations.
> >>   */
> >>
> >> -#define MAKE_GPUVM_APP_BASE_VI(gpu_num) \
> >> -       (((uint64_t)(gpu_num) << 61) + 0x1000000000000L)
> >> -
> >> -#define MAKE_GPUVM_APP_LIMIT(base, size) \
> >> -       (((uint64_t)(base) & 0xFFFFFF0000000000UL) + (size) - 1)
> >> -
> >> -#define MAKE_SCRATCH_APP_BASE_VI() \
> >> -       (((uint64_t)(0x1UL) << 61) + 0x100000000L)
> >> -
> >> -#define MAKE_SCRATCH_APP_LIMIT(base) \
> >> -       (((uint64_t)base & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF)
> >> -
> >> -#define MAKE_LDS_APP_BASE_VI() \
> >> -       (((uint64_t)(0x1UL) << 61) + 0x0)
> >> -#define MAKE_LDS_APP_LIMIT(base) \
> >> -       (((uint64_t)(base) & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF)
> >> -
> >> -/* On GFXv9 the LDS and scratch apertures are programmed independently
> >> - * using the high 16 bits of the 64-bit virtual address. They must be
> >> - * in the hole, which will be the case as long as the high 16 bits are
> >> - * not 0.
> >> - *
> >> - * The aperture sizes are still 4GB implicitly.
> >> - *
> >> - * A GPUVM aperture is not applicable on GFXv9.
> >> - */
> >> -#define MAKE_LDS_APP_BASE_V9() ((uint64_t)(0x1UL) << 48)
> >> -#define MAKE_SCRATCH_APP_BASE_V9() ((uint64_t)(0x2UL) << 48)
> >> -
> >>  /* User mode manages most of the SVM aperture address space. The low
> >>   * 16MB are reserved for kernel use (CWSR trap handler and kernel IB
> >>   * for now).
> >> @@ -319,8 +290,8 @@ static void kfd_init_apertures_vi(struct kfd_process_device *pdd, uint8_t id)
> >>          * node id couldn't be 0 - the three MSB bits of
> >>          * aperture shouldn't be 0
> >>          */
> >> -       pdd->lds_base = MAKE_LDS_APP_BASE_VI();
> >> -       pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base);
> >> +       pdd->lds_base = AMDGPU_VM_MAKE_LDS_APP_BASE_VI();
> >> +       pdd->lds_limit = AMDGPU_VM_MAKE_LDS_APP_LIMIT(pdd->lds_base);
> >>
> >>         /* dGPUs: SVM aperture starting at 0
> >>          * with small reserved space for kernel.
> >> @@ -336,21 +307,21 @@ static void kfd_init_apertures_vi(struct kfd_process_device *pdd, uint8_t id)
> >>         pdd->qpd.cwsr_base = SVM_CWSR_BASE;
> >>         pdd->qpd.ib_base = SVM_IB_BASE;
> >>
> >> -       pdd->scratch_base = MAKE_SCRATCH_APP_BASE_VI();
> >> -       pdd->scratch_limit = MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base);
> >> +       pdd->scratch_base = AMDGPU_VM_MAKE_SCRATCH_APP_BASE_VI();
> >> +       pdd->scratch_limit = AMDGPU_VM_MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base);
> >>  }
> >>
> >>  static void kfd_init_apertures_v9(struct kfd_process_device *pdd, uint8_t id)
> >>  {
> >> -       pdd->lds_base = MAKE_LDS_APP_BASE_V9();
> >> -       pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base);
> >> +       pdd->lds_base = AMDGPU_VM_MAKE_LDS_APP_BASE_V9();
> >> +       pdd->lds_limit = AMDGPU_VM_MAKE_LDS_APP_LIMIT(pdd->lds_base);
> >>
> >>         pdd->gpuvm_base = AMDGPU_VA_RESERVED_BOTTOM;
> >>         pdd->gpuvm_limit =
> >>                 pdd->dev->kfd->shared_resources.gpuvm_size - 1;
> >>
> >> -       pdd->scratch_base = MAKE_SCRATCH_APP_BASE_V9();
> >> -       pdd->scratch_limit = MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base);
> >> +       pdd->scratch_base = AMDGPU_VM_MAKE_SCRATCH_APP_BASE_V9();
> >> +       pdd->scratch_limit = AMDGPU_VM_MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base);
> >>
> >>         /*
> >>          * Place TBA/TMA on opposite side of VM hole to prevent
> >> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> >> index 66c4a03ac9f9..f285e9325d06 100644
> >> --- a/include/uapi/drm/amdgpu_drm.h
> >> +++ b/include/uapi/drm/amdgpu_drm.h
> >> @@ -1477,6 +1477,12 @@ struct drm_amdgpu_info_device {
> >>         /* Userq IP mask (1 << AMDGPU_HW_IP_*) */
> >>         __u32 userq_ip_mask;
> >>         __u32 pad;
> >> +
> >> +       /* Additional fields for memory aperture information */
> >> +       __u64 lds_base;          /* LDS base */
> >> +       __u64 lds_limit;         /* LDS limit */
> >> +       __u64 scratch_base;      /* Scratch base */
> >> +       __u64 scratch_limit;     /* Scratch limit */
> >>  };
> >>
> >>  struct drm_amdgpu_info_hw_ip {
> >> --
> >> 2.34.1
> >>
>