<html><head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
  </head>
  <body>
    <p><br>
    </p>
    <div class="moz-cite-prefix">On 2021-05-23 1:10 p.m., Christian
      König wrote:<br>
    </div>
    <blockquote type="cite" cite="mid:091aaf6e-73f2-3ae1-e4cb-309732424150@gmail.com">Am
      21.05.21 um 21:28 schrieb philip yang:
      <br>
      <blockquote type="cite">
        <br>
        This simply the logic, several comments inline.
        <br>
        <br>
        Thanks,
        <br>
        <br>
        Philip
        <br>
        <br>
        On 2021-05-21 9:52 a.m., Christian König wrote:
        <br>
        <blockquote type="cite">Access to the mm_node is now forbidden.
          So instead of hand wiring that
          <br>
          use the cursor functionality.
          <br>
          <br>
          Signed-off-by: Christian König<a class="moz-txt-link-rfc2396E" href="mailto:christian.koenig@amd.com"><christian.koenig@amd.com></a>
          <br>
          ---
          <br>
            drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 76
          +++---------------------
          <br>
            1 file changed, 9 insertions(+), 67 deletions(-)
          <br>
          <br>
          diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
          b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
          <br>
          index fd8f544f0de2..cb28d1e660af 100644
          <br>
          --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
          <br>
          +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
          <br>
          @@ -29,6 +29,7 @@
          <br>
            #include "amdgpu_object.h"
          <br>
            #include "amdgpu_vm.h"
          <br>
            #include "amdgpu_mn.h"
          <br>
          +#include "amdgpu_res_cursor.h"
          <br>
            #include "kfd_priv.h"
          <br>
            #include "kfd_svm.h"
          <br>
            #include "kfd_migrate.h"
          <br>
          @@ -205,34 +206,6 @@ svm_migrate_copy_done(struct
          amdgpu_device *adev, struct dma_fence *mfence)
          <br>
                return r;
          <br>
            }
          <br>
            -static uint64_t
          <br>
          -svm_migrate_node_physical_addr(struct amdgpu_device *adev,
          <br>
          -                   struct drm_mm_node **mm_node, uint64_t
          *offset)
          <br>
          -{
          <br>
          -    struct drm_mm_node *node = *mm_node;
          <br>
          -    uint64_t pos = *offset;
          <br>
          -
          <br>
          -    if (node->start == AMDGPU_BO_INVALID_OFFSET) {
          <br>
          -        pr_debug("drm node is not validated\n");
          <br>
          -        return 0;
          <br>
          -    }
          <br>
          -
          <br>
          -    pr_debug("vram node start 0x%llx npages 0x%llx\n",
          node->start,
          <br>
          -         node->size);
          <br>
          -
          <br>
          -    if (pos >= node->size) {
          <br>
          -        do  {
          <br>
          -            pos -= node->size;
          <br>
          -            node++;
          <br>
          -        } while (pos >= node->size);
          <br>
          -
          <br>
          -        *mm_node = node;
          <br>
          -        *offset = pos;
          <br>
          -    }
          <br>
          -
          <br>
          -    return (node->start + pos) << PAGE_SHIFT;
          <br>
          -}
          <br>
          -
          <br>
            unsigned long
          <br>
            svm_migrate_addr_to_pfn(struct amdgpu_device *adev, unsigned
          long addr)
          <br>
            {
          <br>
          @@ -297,11 +270,9 @@ svm_migrate_copy_to_vram(struct
          amdgpu_device *adev, struct svm_range *prange,
          <br>
            {
          <br>
                uint64_t npages = migrate->cpages;
          <br>
                struct device *dev = adev->dev;
          <br>
          -    struct drm_mm_node *node;
          <br>
          +    struct amdgpu_res_cursor cursor;
          <br>
                dma_addr_t *src;
          <br>
                uint64_t *dst;
          <br>
          -    uint64_t vram_addr;
          <br>
          -    uint64_t offset;
          <br>
                uint64_t i, j;
          <br>
                int r;
          <br>
            @@ -317,19 +288,12 @@ svm_migrate_copy_to_vram(struct
          amdgpu_device *adev, struct svm_range *prange,
          <br>
                    goto out;
          <br>
                }
          <br>
            -    node = prange->ttm_res->mm_node;
          <br>
          -    offset = prange->offset;
          <br>
          -    vram_addr = svm_migrate_node_physical_addr(adev,
          &node, &offset);
          <br>
          -    if (!vram_addr) {
          <br>
        </blockquote>
        The prange->ttm_res valid check is not needed because we
        already check svm_range_vram_node_new return value
        <br>
        <blockquote type="cite">-        WARN_ONCE(1, "vram node address
          is 0\n");
          <br>
          -        r = -ENOMEM;
          <br>
          -        goto out;
          <br>
          -    }
          <br>
          -
          <br>
          +    amdgpu_res_first(prange->ttm_res, prange->offset,
          npages << PAGE_SHIFT,
          <br>
        </blockquote>
        <br>
        prange->offset<< PAGE_SHIFT
        <br>
        <br>
        amdgpu_res_first takes start and size in bytes,
        prange->offset use page aligned offset
        <br>
        <br>
      </blockquote>
      <br>
      Ah, yes good point.
      <br>
      <br>
      <blockquote type="cite">
        <br>
        <blockquote type="cite">+             &cursor);
          <br>
                for (i = j = 0; i < npages; i++) {
          <br>
                    struct page *spage;
          <br>
            -        dst[i] = vram_addr + (j << PAGE_SHIFT);
          <br>
          +        dst[i] = cursor.start + (j << PAGE_SHIFT);
          <br>
                    migrate->dst[i] = svm_migrate_addr_to_pfn(adev,
          dst[i]);
          <br>
                    svm_migrate_get_vram_page(prange,
          migrate->dst[i]);
          <br>
            @@ -354,18 +318,10 @@ svm_migrate_copy_to_vram(struct
          amdgpu_device *adev, struct svm_range *prange,
          <br>
                                    mfence);
          <br>
                            if (r)
          <br>
                                goto out_free_vram_pages;
          <br>
          -                offset += j;
          <br>
          -                vram_addr = (node->start + offset)
          << PAGE_SHIFT;
          <br>
          +                amdgpu_res_next(&cursor, j <<
          PAGE_SHIFT);
          <br>
                            j = 0;
          <br>
                        } else {
          <br>
          -                offset++;
          <br>
          -                vram_addr += PAGE_SIZE;
          <br>
          -            }
          <br>
          -            if (offset >= node->size) {
          <br>
          -                node++;
          <br>
          -                pr_debug("next node size 0x%llx\n",
          node->size);
          <br>
          -                vram_addr = node->start <<
          PAGE_SHIFT;
          <br>
          -                offset = 0;
          <br>
          +                amdgpu_res_next(&cursor, PAGE_SIZE);
          <br>
                        }
          <br>
                        continue;
          <br>
                    }
          <br>
          @@ -373,22 +329,8 @@ svm_migrate_copy_to_vram(struct
          amdgpu_device *adev, struct svm_range *prange,
          <br>
                    pr_debug("dma mapping src to 0x%llx, page_to_pfn
          0x%lx\n",
          <br>
                         src[i] >> PAGE_SHIFT,
          page_to_pfn(spage));
          <br>
            -        if (j + offset >= node->size - 1 && i
          < npages - 1) {
          <br>
          -            r = svm_migrate_copy_memory_gart(adev, src + i -
          j,
          <br>
          -                             dst + i - j, j + 1,
          <br>
          -                             FROM_RAM_TO_VRAM,
          <br>
          -                             mfence);
          <br>
          -            if (r)
          <br>
          -                goto out_free_vram_pages;
          <br>
          -
          <br>
          -            node++;
          <br>
          -            pr_debug("next node size 0x%llx\n",
          node->size);
          <br>
          -            vram_addr = node->start << PAGE_SHIFT;
          <br>
          -            offset = 0;
          <br>
          -            j = 0;
          <br>
          -        } else {
          <br>
          -            j++;
          <br>
          -        }
          <br>
          +        amdgpu_res_next(&cursor, PAGE_SIZE);
          <br>
          +        j++;
          <br>
        </blockquote>
        Here to handle cross mm_node case.
        <br>
        <br>
        if (j >= cursor->size - 1 && i < npages - 1) {
        <br>
        <br>
            r = svm_migrate_copy_memory_gart(adev, src + i - j,
        <br>
        <br>
                                        dst + i - j, j + 1,
        <br>
        <br>
                    FROM_RAM_TO_VRAM,
        <br>
                     mfence);
        <br>
             if (r)
        <br>
            goto out_free_vram_pages;
        <br>
                  amdgpu_res_next(&cursor, (j + 1) * PAGE_SIZE);
        <br>
             j= 0;
        <br>
        } else {
        <br>
            j++;
        <br>
        }
        <br>
      </blockquote>
      <br>
      Yeah, that was the point I couldn't understand. Why would we want
      that anyway?
      <br>
    </blockquote>
    <p>svm_migrate_copy_memory_gart uses sdma to copy from system memory
      to vram, system memory is gart mapping paged memory, and vram is
      direct mapping, physically continuous memory. We have to call
      svm_migrate_copy_memory_gart to setup new sdma for two cases:<br>
    </p>
    <p>1. system memory pages array, if src page is not
      MIGRATE_PFN_VALID</p>
    <p>2. if dst vram page cross mm_nodes, physical vram address is not
      continuous.<br>
    </p>
    <p>This if is for case 2. <br>
    </p>
    <p>Regards,</p>
    <p>Philip<br>
    </p>
    <blockquote type="cite" cite="mid:091aaf6e-73f2-3ae1-e4cb-309732424150@gmail.com">
      <br>
      Regards,
      <br>
      Christian.
      <br>
      <br>
      <blockquote type="cite">
        <br>
        <br>
        <blockquote type="cite">      }
          <br>
                  r = svm_migrate_copy_memory_gart(adev, src + i - j,
          dst + i - j, j,
          <br>
        </blockquote>
      </blockquote>
      <br>
    </blockquote>
  </body>
</html>