[PATCH 1/6] drm/amdgpu: Support contiguous VRAM allocation
Christian König
christian.koenig at amd.com
Mon Apr 15 12:02:23 UTC 2024
Am 12.04.24 um 22:12 schrieb Philip Yang:
> RDMA device with limited scatter-gather capability requires physical
> address contiguous VRAM buffer for RDMA peer direct access.
>
> Add a new KFD alloc memory flag and store as new GEM bo alloc flag. When
> pin this buffer object to export for RDMA peerdirect access, set
> AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS flag, and then vram_mgr will set
> TTM_PL_FLAG_CONTIFUOUS flag to ask VRAM buddy allocator to get
> contiguous VRAM.
>
> Remove the 2GB max memory block size limit for contiguous allocation.
I'm going to sync up with Arun on this once more, but I think we won't
even need the new flag.
We will just downgrade the existing flag to be a best effort allocation
for contiguous buffers and only use the TTM flag internally to signal
that we need to alter it while pinning.
Regards,
Christian.
>
> Signed-off-by: Philip Yang <Philip.Yang at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 7 +++++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 9 +++++++--
> include/uapi/drm/amdgpu_drm.h | 5 +++++
> include/uapi/linux/kfd_ioctl.h | 1 +
> 4 files changed, 20 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index 0ae9fd844623..3523b91f8add 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -1470,6 +1470,9 @@ static int amdgpu_amdkfd_gpuvm_pin_bo(struct amdgpu_bo *bo, u32 domain)
> if (unlikely(ret))
> return ret;
>
> + if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS_BEST_EFFORT)
> + bo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
> +
> ret = amdgpu_bo_pin_restricted(bo, domain, 0, 0);
> if (ret)
> pr_err("Error in Pinning BO to domain: %d\n", domain);
> @@ -1712,6 +1715,10 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
> alloc_flags = AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
> alloc_flags |= (flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) ?
> AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED : 0;
> +
> + /* For contiguous VRAM allocation */
> + if (flags & KFD_IOC_ALLOC_MEM_FLAGS_CONTIGUOUS_BEST_EFFORT)
> + alloc_flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS_BEST_EFFORT;
> }
> xcp_id = fpriv->xcp_id == AMDGPU_XCP_NO_PARTITION ?
> 0 : fpriv->xcp_id;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
> index 8db880244324..1d6e45e238e1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
> @@ -516,8 +516,13 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>
> BUG_ON(min_block_size < mm->chunk_size);
>
> - /* Limit maximum size to 2GiB due to SG table limitations */
> - size = min(remaining_size, 2ULL << 30);
> + if (place->flags & TTM_PL_FLAG_CONTIGUOUS)
> + size = remaining_size;
> + else
> + /* Limit maximum size to 2GiB due to SG table limitations
> + * for no contiguous allocation.
> + */
> + size = min(remaining_size, 2ULL << 30);
>
> if ((size >= (u64)pages_per_block << PAGE_SHIFT) &&
> !(size & (((u64)pages_per_block << PAGE_SHIFT) - 1)))
> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> index ad21c613fec8..13645abb8e46 100644
> --- a/include/uapi/drm/amdgpu_drm.h
> +++ b/include/uapi/drm/amdgpu_drm.h
> @@ -171,6 +171,11 @@ extern "C" {
> * may override the MTYPE selected in AMDGPU_VA_OP_MAP.
> */
> #define AMDGPU_GEM_CREATE_EXT_COHERENT (1 << 15)
> +/* Flag that allocating the BO with best effort for contiguous VRAM.
> + * If no contiguous VRAM, fallback to scattered allocation.
> + * Pin the BO for peerdirect RDMA trigger VRAM defragmentation.
> + */
> +#define AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS_BEST_EFFORT (1 << 16)
>
> struct drm_amdgpu_gem_create_in {
> /** the requested memory size */
> diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
> index 2040a470ddb4..c1394c162d4e 100644
> --- a/include/uapi/linux/kfd_ioctl.h
> +++ b/include/uapi/linux/kfd_ioctl.h
> @@ -407,6 +407,7 @@ struct kfd_ioctl_acquire_vm_args {
> #define KFD_IOC_ALLOC_MEM_FLAGS_COHERENT (1 << 26)
> #define KFD_IOC_ALLOC_MEM_FLAGS_UNCACHED (1 << 25)
> #define KFD_IOC_ALLOC_MEM_FLAGS_EXT_COHERENT (1 << 24)
> +#define KFD_IOC_ALLOC_MEM_FLAGS_CONTIGUOUS_BEST_EFFORT (1 << 23)
>
> /* Allocate memory for later SVM (shared virtual memory) mapping.
> *
More information about the amd-gfx
mailing list