[PATCH] drm/amdgpu: Unify Device Aperture in amdgpu_info_ioctl for KGD/KFD

Tue Jun 24 08:26:04 UTC 2025

On 23.06.25 13:17, Srinivasan Shanmugam wrote:
> This commit refines the amdgpu_info_ioctl function to unify
> the reporting of device apertures for both KGD and KFD
> subsystems.
> 
> Cc: Christian König <christian.koenig at amd.com>
> Cc: Alex Deucher <alexander.deucher at amd.com>
> Signed-off-by: Srinivasan Shanmugam <srinivasan.shanmugam at amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c      | 23 ++++++++++++++++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h       | 29 ++++++++++++++++++++
>  drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c | 29 --------------------
>  include/uapi/drm/amdgpu_drm.h                |  6 ++++
>  4 files changed, 58 insertions(+), 29 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> index d2ce7d86dbc8..6ca399a92814 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> @@ -46,6 +46,7 @@
>  #include "amdgpu_reset.h"
>  #include "amd_pcie.h"
>  #include "amdgpu_userq.h"
> +#include "amdgpu_vm.h"
>  
>  void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev)
>  {
> @@ -1011,6 +1012,28 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
>  
>  		dev_info->userq_ip_mask = amdgpu_userq_get_supported_ip_mask(adev);
>  
> +		/* Retrieve Device Apertures */
> +		if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 0, 0)) {
> +			dev_info->lds_base = MAKE_LDS_APP_BASE_V9();
> +			dev_info->scratch_base = MAKE_SCRATCH_APP_BASE_V9();
> +
> +			dev_info->lds_limit = MAKE_LDS_APP_LIMIT(dev_info->lds_base);
> +			dev_info->scratch_limit = MAKE_SCRATCH_APP_LIMIT(dev_info->scratch_base);
> +		} else {
> +			dev_info->lds_base = MAKE_LDS_APP_BASE_VI();
> +			dev_info->scratch_base = MAKE_SCRATCH_APP_BASE_VI();
> +
> +			dev_info->lds_limit = MAKE_LDS_APP_LIMIT(dev_info->lds_base);
> +			dev_info->scratch_limit = MAKE_SCRATCH_APP_LIMIT(dev_info->scratch_base);
> +		}
> +
> +		dev_dbg(adev->dev, "Node ID: %u\n", adev->dev->id);
> +		dev_dbg(adev->dev, "GPU ID: %u\n", dev_info->device_id);
> +		dev_dbg(adev->dev, "LDS Base: %llX\n", dev_info->lds_base);
> +		dev_dbg(adev->dev, "LDS Limit: %llX\n", dev_info->lds_limit);
> +		dev_dbg(adev->dev, "Scratch Base: %llX\n", dev_info->scratch_base);
> +		dev_dbg(adev->dev, "Scratch Limit: %llX\n", dev_info->scratch_limit);
> +
>  		ret = copy_to_user(out, dev_info,
>  				   min((size_t)size, sizeof(*dev_info))) ? -EFAULT : 0;
>  		kfree(dev_info);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index f3ad687125ad..6ee09df0d345 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -184,6 +184,35 @@ struct amdgpu_bo_vm;
>  #define AMDGPU_VM_USE_CPU_FOR_GFX (1 << 0)
>  #define AMDGPU_VM_USE_CPU_FOR_COMPUTE (1 << 1)
>  
> +#define MAKE_GPUVM_APP_BASE_VI(gpu_num) \
> +	(((uint64_t)(gpu_num) << 61) + 0x1000000000000L)

AMDGPU_VM_* prefix for those defines please.

> +
> +#define MAKE_GPUVM_APP_LIMIT(base, size) \
> +	(((uint64_t)(base) & 0xFFFFFF0000000000UL) + (size) - 1)
> +
> +#define MAKE_SCRATCH_APP_BASE_VI() \
> +	(((uint64_t)(0x1UL) << 61) + 0x100000000L)
> +
> +#define MAKE_SCRATCH_APP_LIMIT(base) \
> +	(((uint64_t)(base) & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF)
> +
> +#define MAKE_LDS_APP_BASE_VI() \
> +	(((uint64_t)(0x1UL) << 61) + 0x0)
> +#define MAKE_LDS_APP_LIMIT(base) \
> +	(((uint64_t)(base) & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF)
> +
> +/* On GFXv9 the LDS and scratch apertures are programmed independently
> + * using the high 16 bits of the 64-bit virtual address. They must be
> + * in the hole, which will be the case as long as the high 16 bits are
> + * not 0.

That is not 100% correct. The high bits should also not be 0xffff because then we get into trouble with sign extension.

> + *
> + * The aperture sizes are still 4GB implicitly.
> + *
> + * A GPUVM aperture is not applicable on GFXv9.

That's also not correct. We have one APU case where even GFX9 has a limited GPUVM aperture.

> + */
> +#define MAKE_LDS_APP_BASE_V9() ((uint64_t)(0x1UL) << 48)
> +#define MAKE_SCRATCH_APP_BASE_V9() ((uint64_t)(0x2UL) << 48)

But I don't see where those defines are actually used.

> +
>  /* VMPT level enumerate, and the hiberachy is:
>   * PDB2->PDB1->PDB0->PTB
>   */
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
> index dbcb60eb54b2..fdcbb400f618 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
> @@ -277,35 +277,6 @@
>   * for FLAT_* / S_LOAD operations.
>   */
>  
> -#define MAKE_GPUVM_APP_BASE_VI(gpu_num) \
> -	(((uint64_t)(gpu_num) << 61) + 0x1000000000000L)
> -
> -#define MAKE_GPUVM_APP_LIMIT(base, size) \
> -	(((uint64_t)(base) & 0xFFFFFF0000000000UL) + (size) - 1)
> -
> -#define MAKE_SCRATCH_APP_BASE_VI() \
> -	(((uint64_t)(0x1UL) << 61) + 0x100000000L)
> -
> -#define MAKE_SCRATCH_APP_LIMIT(base) \
> -	(((uint64_t)base & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF)
> -
> -#define MAKE_LDS_APP_BASE_VI() \
> -	(((uint64_t)(0x1UL) << 61) + 0x0)
> -#define MAKE_LDS_APP_LIMIT(base) \
> -	(((uint64_t)(base) & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF)
> -
> -/* On GFXv9 the LDS and scratch apertures are programmed independently
> - * using the high 16 bits of the 64-bit virtual address. They must be
> - * in the hole, which will be the case as long as the high 16 bits are
> - * not 0.
> - *
> - * The aperture sizes are still 4GB implicitly.
> - *
> - * A GPUVM aperture is not applicable on GFXv9.
> - */
> -#define MAKE_LDS_APP_BASE_V9() ((uint64_t)(0x1UL) << 48)
> -#define MAKE_SCRATCH_APP_BASE_V9() ((uint64_t)(0x2UL) << 48)
> -
>  /* User mode manages most of the SVM aperture address space. The low
>   * 16MB are reserved for kernel use (CWSR trap handler and kernel IB
>   * for now).
> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> index 45c4fa13499c..a52683ae7115 100644
> --- a/include/uapi/drm/amdgpu_drm.h
> +++ b/include/uapi/drm/amdgpu_drm.h
> @@ -1477,6 +1477,12 @@ struct drm_amdgpu_info_device {
>  	/* Userq IP mask (1 << AMDGPU_HW_IP_*) */
>  	__u32 userq_ip_mask;
>  	__u32 pad;
> +
> +	/* Additional fields for memory aperture information */
> +	__u64 lds_base;          /* LDS base */
> +	__u64 lds_limit;         /* LDS limit */
> +	__u64 scratch_base;      /* Scratch base */
> +	__u64 scratch_limit;     /* Scratch limit */

It might be better to have a separate IOCTL instead of extending the amdgpu_info_device structure.

But let us discuss that with Alex and eventually Felix on our weekly call.

Regards,
Christian.

>  };
>  
>  struct drm_amdgpu_info_hw_ip {