<html><head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
  </head>
  <body>
    <p><br>
    </p>
    <div class="moz-cite-prefix">On 2024-04-15 08:02, Christian König
      wrote:<br>
    </div>
    <blockquote type="cite" cite="mid:05f277d3-f1b1-4a3e-b364-37f18cffcb2d@amd.com">Am
      12.04.24 um 22:12 schrieb Philip Yang:
      <br>
      <blockquote type="cite">RDMA device with limited scatter-gather
        capability requires physical
        <br>
        address contiguous VRAM buffer for RDMA peer direct access.
        <br>
        <br>
        Add a new KFD alloc memory flag and store as new GEM bo alloc
        flag. When
        <br>
        pin this buffer object to export for RDMA peerdirect access, set
        <br>
        AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS flag, and then vram_mgr will
        set
        <br>
        TTM_PL_FLAG_CONTIFUOUS flag to ask VRAM buddy allocator to get
        <br>
        contiguous VRAM.
        <br>
        <br>
        Remove the 2GB max memory block size limit for contiguous
        allocation.
        <br>
      </blockquote>
      <br>
      I'm going to sync up with Arun on this once more, but I think we
      won't even need the new flag.
      <br>
      <br>
      We will just downgrade the existing flag to be a best effort
      allocation for contiguous buffers and only use the TTM flag
      internally to signal that we need to alter it while pinning.
      <br>
    </blockquote>
    <p>sure, I will rebase this patch series to "[PATCH] drm/amdgpu:
      Modify the contiguous flags behaviour", this will remove the new
      flag.<br>
    </p>
    <p>Will send v2 patch series after Arun's v2 patch.</p>
    <p>Regards,</p>
    <p>Philip<br>
    </p>
    <blockquote type="cite" cite="mid:05f277d3-f1b1-4a3e-b364-37f18cffcb2d@amd.com">
      <br>
      Regards,
      <br>
      Christian.
      <br>
      <br>
      <blockquote type="cite">
        <br>
        Signed-off-by: Philip Yang <a class="moz-txt-link-rfc2396E" href="mailto:Philip.Yang@amd.com"><Philip.Yang@amd.com></a>
        <br>
        ---
        <br>
          drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 7 +++++++
        <br>
          drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c     | 9 +++++++--
        <br>
          include/uapi/drm/amdgpu_drm.h                    | 5 +++++
        <br>
          include/uapi/linux/kfd_ioctl.h                   | 1 +
        <br>
          4 files changed, 20 insertions(+), 2 deletions(-)
        <br>
        <br>
        diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
        b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
        <br>
        index 0ae9fd844623..3523b91f8add 100644
        <br>
        --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
        <br>
        +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
        <br>
        @@ -1470,6 +1470,9 @@ static int
        amdgpu_amdkfd_gpuvm_pin_bo(struct amdgpu_bo *bo, u32 domain)
        <br>
              if (unlikely(ret))
        <br>
                  return ret;
        <br>
          +    if (bo->flags &
        AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS_BEST_EFFORT)
        <br>
        +        bo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
        <br>
        +
        <br>
              ret = amdgpu_bo_pin_restricted(bo, domain, 0, 0);
        <br>
              if (ret)
        <br>
                  pr_err("Error in Pinning BO to domain: %d\n", domain);
        <br>
        @@ -1712,6 +1715,10 @@ int
        amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
        <br>
                      alloc_flags =
        AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
        <br>
                      alloc_flags |= (flags &
        KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) ?
        <br>
                      AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED : 0;
        <br>
        +
        <br>
        +            /* For contiguous VRAM allocation */
        <br>
        +            if (flags &
        KFD_IOC_ALLOC_MEM_FLAGS_CONTIGUOUS_BEST_EFFORT)
        <br>
        +                alloc_flags |=
        AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS_BEST_EFFORT;
        <br>
                  }
        <br>
                  xcp_id = fpriv->xcp_id == AMDGPU_XCP_NO_PARTITION ?
        <br>
                              0 : fpriv->xcp_id;
        <br>
        diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
        b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
        <br>
        index 8db880244324..1d6e45e238e1 100644
        <br>
        --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
        <br>
        +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
        <br>
        @@ -516,8 +516,13 @@ static int amdgpu_vram_mgr_new(struct
        ttm_resource_manager *man,
        <br>
                    BUG_ON(min_block_size < mm->chunk_size);
        <br>
          -        /* Limit maximum size to 2GiB due to SG table
        limitations */
        <br>
        -        size = min(remaining_size, 2ULL << 30);
        <br>
        +        if (place->flags & TTM_PL_FLAG_CONTIGUOUS)
        <br>
        +            size = remaining_size;
        <br>
        +        else
        <br>
        +            /* Limit maximum size to 2GiB due to SG table
        limitations
        <br>
        +             * for no contiguous allocation.
        <br>
        +             */
        <br>
        +            size = min(remaining_size, 2ULL << 30);
        <br>
                    if ((size >= (u64)pages_per_block <<
        PAGE_SHIFT) &&
        <br>
                          !(size & (((u64)pages_per_block <<
        PAGE_SHIFT) - 1)))
        <br>
        diff --git a/include/uapi/drm/amdgpu_drm.h
        b/include/uapi/drm/amdgpu_drm.h
        <br>
        index ad21c613fec8..13645abb8e46 100644
        <br>
        --- a/include/uapi/drm/amdgpu_drm.h
        <br>
        +++ b/include/uapi/drm/amdgpu_drm.h
        <br>
        @@ -171,6 +171,11 @@ extern "C" {
        <br>
           * may override the MTYPE selected in AMDGPU_VA_OP_MAP.
        <br>
           */
        <br>
          #define AMDGPU_GEM_CREATE_EXT_COHERENT        (1 << 15)
        <br>
        +/* Flag that allocating the BO with best effort for contiguous
        VRAM.
        <br>
        + * If no contiguous VRAM, fallback to scattered allocation.
        <br>
        + * Pin the BO for peerdirect RDMA trigger VRAM defragmentation.
        <br>
        + */
        <br>
        +#define AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS_BEST_EFFORT    (1
        << 16)
        <br>
            struct drm_amdgpu_gem_create_in  {
        <br>
              /** the requested memory size */
        <br>
        diff --git a/include/uapi/linux/kfd_ioctl.h
        b/include/uapi/linux/kfd_ioctl.h
        <br>
        index 2040a470ddb4..c1394c162d4e 100644
        <br>
        --- a/include/uapi/linux/kfd_ioctl.h
        <br>
        +++ b/include/uapi/linux/kfd_ioctl.h
        <br>
        @@ -407,6 +407,7 @@ struct kfd_ioctl_acquire_vm_args {
        <br>
          #define KFD_IOC_ALLOC_MEM_FLAGS_COHERENT    (1 << 26)
        <br>
          #define KFD_IOC_ALLOC_MEM_FLAGS_UNCACHED    (1 << 25)
        <br>
          #define KFD_IOC_ALLOC_MEM_FLAGS_EXT_COHERENT    (1 <<
        24)
        <br>
        +#define KFD_IOC_ALLOC_MEM_FLAGS_CONTIGUOUS_BEST_EFFORT    (1
        << 23)
        <br>
            /* Allocate memory for later SVM (shared virtual memory)
        mapping.
        <br>
           *
        <br>
      </blockquote>
      <br>
    </blockquote>
  </body>
</html>