<html><head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
  </head>
  <body>
    <p><br>
    </p>
    <div class="moz-cite-prefix">On 2021-08-19 5:32 p.m., Felix Kuehling
      wrote:<br>
    </div>
    <blockquote type="cite" cite="mid:3fa1a300-8ea8-3b7d-9ca8-82c70ec8fac4@amd.com">
      <pre class="moz-quote-pre" wrap="">Am 2021-08-19 um 10:56 a.m. schrieb Philip Yang:
</pre>
      <blockquote type="cite">
        <pre class="moz-quote-pre" wrap="">Restore retry fault or prefetch range, or restore svm range after
eviction to map range to GPU with correct read or write access
permission.

Range may includes multiple VMAs, update GPU page table with offset of
prange, number of pages for each VMA according VMA access permission.

Signed-off-by: Philip Yang <a class="moz-txt-link-rfc2396E" href="mailto:Philip.Yang@amd.com"><Philip.Yang@amd.com></a>
</pre>
      </blockquote>
      <pre class="moz-quote-pre" wrap="">
Minor nitpicks, and one question. See inline. It looks good otherwise.


</pre>
      <blockquote type="cite">
        <pre class="moz-quote-pre" wrap="">---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 131 +++++++++++++++++----------
 1 file changed, 84 insertions(+), 47 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index cf1009bb532a..94612581963f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -120,6 +120,7 @@ static void svm_range_remove_notifier(struct svm_range *prange)
 
 static int
 svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange,
+                     unsigned long offset, unsigned long npages,
                      unsigned long *hmm_pfns, uint32_t gpuidx)
 {
        enum dma_data_direction dir = DMA_BIDIRECTIONAL;
@@ -136,7 +137,8 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange,
                prange->dma_addr[gpuidx] = addr;
        }
 
-       for (i = 0; i < prange->npages; i++) {
+       addr += offset;
+       for (i = 0; i < npages; i++) {
                if (WARN_ONCE(addr[i] && !dma_mapping_error(dev, addr[i]),
                              "leaking dma mapping\n"))
                        dma_unmap_page(dev, addr[i], PAGE_SIZE, dir);
@@ -167,6 +169,7 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange,
 
 static int
 svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap,
+                 unsigned long offset, unsigned long npages,
                  unsigned long *hmm_pfns)
 {
        struct kfd_process *p;
@@ -187,7 +190,8 @@ svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap,
                }
                adev = (struct amdgpu_device *)pdd->dev->kgd;
 
-               r = svm_range_dma_map_dev(adev, prange, hmm_pfns, gpuidx);
+               r = svm_range_dma_map_dev(adev, prange, offset, npages,
+                                         hmm_pfns, gpuidx);
                if (r)
                        break;
        }
@@ -1088,11 +1092,6 @@ svm_range_get_pte_flags(struct amdgpu_device *adev, struct svm_range *prange,
        pte_flags |= snoop ? AMDGPU_PTE_SNOOPED : 0;
 
        pte_flags |= amdgpu_gem_va_map_flags(adev, mapping_flags);
-
-       pr_debug("svms 0x%p [0x%lx 0x%lx] vram %d PTE 0x%llx mapping 0x%x\n",
-                prange->svms, prange->start, prange->last,
-                (domain == SVM_RANGE_VRAM_DOMAIN) ? 1:0, pte_flags, mapping_flags);
-
        return pte_flags;
 }
 
@@ -1156,7 +1155,8 @@ svm_range_unmap_from_gpus(struct svm_range *prange, unsigned long start,
 
 static int
 svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
-                    struct svm_range *prange, dma_addr_t *dma_addr,
+                    struct svm_range *prange, unsigned long offset,
+                    unsigned long npages, bool readonly, dma_addr_t *dma_addr,
                     struct amdgpu_device *bo_adev, struct dma_fence **fence)
 {
        struct amdgpu_bo_va bo_va;
@@ -1167,14 +1167,15 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
        int r = 0;
        int64_t i;
 
-       pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start,
-                prange->last);
+       last_start = prange->start + offset;
+
+       pr_debug("svms 0x%p [0x%lx 0x%lx] readonly %d\n", prange->svms,
+                last_start, last_start + npages - 1, readonly);
 
        if (prange->svm_bo && prange->ttm_res)
                bo_va.is_xgmi = amdgpu_xgmi_same_hive(adev, bo_adev);
 
-       last_start = prange->start;
-       for (i = 0; i < prange->npages; i++) {
+       for (i = offset; i < offset + npages; i++) {
                last_domain = dma_addr[i] & SVM_RANGE_VRAM_DOMAIN;
                dma_addr[i] &= ~SVM_RANGE_VRAM_DOMAIN;
                if ((prange->start + i) < prange->last &&
@@ -1183,13 +1184,21 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 
                pr_debug("Mapping range [0x%lx 0x%llx] on domain: %s\n",
                         last_start, prange->start + i, last_domain ? "GPU" : "CPU");
+
                pte_flags = svm_range_get_pte_flags(adev, prange, last_domain);
-               r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false, NULL,
-                                               last_start,
+               if (readonly)
+                       pte_flags &= ~AMDGPU_PTE_WRITEABLE;
+
+               pr_debug("svms 0x%p map [0x%lx 0x%llx] vram %d PTE 0x%llx\n",
+                        prange->svms, last_start, prange->start + i,
+                        (last_domain == SVM_RANGE_VRAM_DOMAIN) ? 1 : 0,
+                        pte_flags);
+
+               r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false,
+                                               NULL, last_start,
                                                prange->start + i, pte_flags,
                                                last_start - prange->start,
-                                               NULL,
-                                               dma_addr,
+                                               NULL, dma_addr,
                                                &vm->last_update,
                                                &table_freed);
                if (r) {
@@ -1220,8 +1229,10 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
        return r;
 }
 
-static int svm_range_map_to_gpus(struct svm_range *prange,
-                                unsigned long *bitmap, bool wait)
+static int
+svm_range_map_to_gpus(struct svm_range *prange, unsigned long offset,
+                     unsigned long npages, bool readonly,
+                     unsigned long *bitmap, bool wait)
 {
        struct kfd_process_device *pdd;
        struct amdgpu_device *bo_adev;
@@ -1257,7 +1268,8 @@ static int svm_range_map_to_gpus(struct svm_range *prange,
                }
 
                r = svm_range_map_to_gpu(adev, drm_priv_to_vm(pdd->drm_priv),
-                                        prange, prange->dma_addr[gpuidx],
+                                        prange, offset, npages, readonly,
+                                        prange->dma_addr[gpuidx],
                                         bo_adev, wait ? &fence : NULL);
                if (r)
                        break;
@@ -1390,6 +1402,7 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
                                      int32_t gpuidx, bool intr, bool wait)
 {
        struct svm_validate_context ctx;
+       unsigned long start, end, addr;
        struct hmm_range *hmm_range;
        struct kfd_process *p;
        void *owner;
@@ -1448,40 +1461,64 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
                        break;
                }
        }
-       r = amdgpu_hmm_range_get_pages(&prange->notifier, mm, NULL,
-                                      prange->start << PAGE_SHIFT,
-                                      prange->npages, &hmm_range,
-                                      false, true, owner);
-       if (r) {
-               pr_debug("failed %d to get svm range pages\n", r);
-               goto unreserve_out;
-       }
 
-       r = svm_range_dma_map(prange, ctx.bitmap,
-                             hmm_range->hmm_pfns);
-       if (r) {
-               pr_debug("failed %d to dma map range\n", r);
-               goto unreserve_out;
-       }
+       start = prange->start << PAGE_SHIFT;
+       end = (prange->last + 1) << PAGE_SHIFT;
+       for (addr = start; addr < end && !r; ) {
+               struct vm_area_struct *vma;
+               unsigned long next;
+               unsigned long offset;
+               unsigned long npages;
+               bool readonly;
 
-       prange->validated_once = true;
+               vma = find_vma(mm, addr);
+               if (!vma || addr < vma->vm_start) {
+                       r = -EINVAL;
</pre>
      </blockquote>
      <pre class="moz-quote-pre" wrap="">
I think -EFAULT would be the appropriate error code here.</pre>
    </blockquote>
    Yes, this error code will pass to user space, -EFAULT means bad
    address, is appropriate error code here.<br>
    <blockquote type="cite" cite="mid:3fa1a300-8ea8-3b7d-9ca8-82c70ec8fac4@amd.com">
      <pre class="moz-quote-pre" wrap="">


</pre>
      <blockquote type="cite">
        <pre class="moz-quote-pre" wrap="">+                  goto unreserve_out;
+               }
+               readonly = !(vma->vm_flags & VM_WRITE);
 
-       svm_range_lock(prange);
-       if (amdgpu_hmm_range_get_pages_done(hmm_range)) {
-               pr_debug("hmm update the range, need validate again\n");
-               r = -EAGAIN;
-               goto unlock_out;
-       }
-       if (!list_empty(&prange->child_list)) {
-               pr_debug("range split by unmap in parallel, validate again\n");
-               r = -EAGAIN;
-               goto unlock_out;
-       }
+               next = min(vma->vm_end, end);
+               npages = (next - addr) / PAGE_SIZE;
</pre>
      </blockquote>
      <pre class="moz-quote-pre" wrap="">
Use >> PAGE_SHIFT for consistency.


</pre>
      <blockquote type="cite">
        <pre class="moz-quote-pre" wrap="">+          r = amdgpu_hmm_range_get_pages(&prange->notifier, mm, NULL,
+                                              addr, npages, &hmm_range,
+                                              readonly, true, owner);
+               if (r) {
+                       pr_debug("failed %d to get svm range pages\n", r);
+                       goto unreserve_out;
+               }
 
-       r = svm_range_map_to_gpus(prange, ctx.bitmap, wait);
+               offset = (addr - start) / PAGE_SIZE;
</pre>
      </blockquote>
      <pre class="moz-quote-pre" wrap="">
</pre>
      <blockquote type="cite">
        <blockquote type="cite">
          <pre class="moz-quote-pre" wrap="">PAGE_SHIFT
</pre>
        </blockquote>
      </blockquote>
    </blockquote>
    done.<br>
    <blockquote type="cite" cite="mid:3fa1a300-8ea8-3b7d-9ca8-82c70ec8fac4@amd.com">
      <pre class="moz-quote-pre" wrap="">

</pre>
      <blockquote type="cite">
        <pre class="moz-quote-pre" wrap="">+          r = svm_range_dma_map(prange, ctx.bitmap, offset, npages,
+                                     hmm_range->hmm_pfns);
+               if (r) {
+                       pr_debug("failed %d to dma map range\n", r);
+                       goto unreserve_out;
+               }
+
+               svm_range_lock(prange);
+               if (amdgpu_hmm_range_get_pages_done(hmm_range)) {
+                       pr_debug("hmm update the range, need validate again\n");
+                       r = -EAGAIN;
+                       goto unlock_out;
+               }
+               if (!list_empty(&prange->child_list)) {
+                       pr_debug("range split by unmap in parallel, validate again\n");
+                       r = -EAGAIN;
+                       goto unlock_out;
+               }
+
+               r = svm_range_map_to_gpus(prange, offset, npages, readonly,
+                                         ctx.bitmap, wait);
 
 unlock_out:
-       svm_range_unlock(prange);
+               svm_range_unlock(prange);
+
+               addr = next;
+       }
+
+       prange->validated_once = true;
</pre>
      </blockquote>
      <pre class="moz-quote-pre" wrap="">
Should this be conditional on "!r"?</pre>
    </blockquote>
    <p>Add if (addr == end) condition, to ensure all pages of range are
      validated once.</p>
    <p>Regards,</p>
    <p>Philip<br>
    </p>
    <blockquote type="cite" cite="mid:3fa1a300-8ea8-3b7d-9ca8-82c70ec8fac4@amd.com">
      <pre class="moz-quote-pre" wrap="">

Regards,
  Felix


</pre>
      <blockquote type="cite">
        <pre class="moz-quote-pre" wrap="">+
 unreserve_out:
        svm_range_unreserve_bos(&ctx);
 
</pre>
      </blockquote>
    </blockquote>
  </body>
</html>