<html><head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
  </head>
  <body>
    <p><br>
    </p>
    <div class="moz-cite-prefix">On 2022-03-17 9:50 a.m., Christian
      König wrote:<br>
    </div>
    <blockquote type="cite" cite="mid:20220317135044.2099-7-christian.koenig@amd.com">
      <pre class="moz-quote-pre" wrap="">Better to leave the decision when to flush the VM changes in the TLB to
the VM code.

Signed-off-by: Christian König <a class="moz-txt-link-rfc2396E" href="mailto:christian.koenig@amd.com"><christian.koenig@amd.com></a>
---
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c        |  6 +++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c       |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c        | 21 +++++++------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h        |  4 ++--
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c          |  5 ++---
 6 files changed, 16 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 57b521bb1eec..8b14c55a0793 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1104,7 +1104,7 @@ static int update_gpuvm_pte(struct kgd_mem *mem,
                return ret;
 
        /* Update the page tables  */
-       ret = amdgpu_vm_bo_update(adev, bo_va, false, NULL);
+       ret = amdgpu_vm_bo_update(adev, bo_va, false);
        if (ret) {
                pr_err("amdgpu_vm_bo_update failed\n");
                return ret;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 2d4a89fb264e..62518c7b31c4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -806,7 +806,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
        if (r)
                return r;
 
-       r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false, NULL);
+       r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false);
        if (r)
                return r;
 
@@ -817,7 +817,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
        if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
                bo_va = fpriv->csa_va;
                BUG_ON(!bo_va);
-               r = amdgpu_vm_bo_update(adev, bo_va, false, NULL);
+               r = amdgpu_vm_bo_update(adev, bo_va, false);
                if (r)
                        return r;
 
@@ -836,7 +836,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
                if (bo_va == NULL)
                        continue;
 
-               r = amdgpu_vm_bo_update(adev, bo_va, false, NULL);
+               r = amdgpu_vm_bo_update(adev, bo_va, false);
                if (r)
                        return r;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index bab6500728cb..2e16484bf606 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -618,7 +618,7 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
 
        if (operation == AMDGPU_VA_OP_MAP ||
            operation == AMDGPU_VA_OP_REPLACE) {
-               r = amdgpu_vm_bo_update(adev, bo_va, false, NULL);
+               r = amdgpu_vm_bo_update(adev, bo_va, false);
                if (r)
                        goto error;
        }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index a43302a86254..c29b12fec863 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -808,7 +808,6 @@ static void amdgpu_vm_tlb_seq_cb(struct dma_fence *fence,
  * @res: ttm_resource to map
  * @pages_addr: DMA addresses to use for mapping
  * @fence: optional resulting fence
- * @table_freed: return true if page table is freed
  *
  * Fill in the page table entries between @start and @last.
  *
@@ -823,8 +822,7 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
                                uint64_t flags, uint64_t offset,
                                struct ttm_resource *res,
                                dma_addr_t *pages_addr,
-                               struct dma_fence **fence,
-                               bool *table_freed)
+                               struct dma_fence **fence)
 {
        struct amdgpu_vm_update_params params;
        struct amdgpu_vm_tlb_seq_cb *tlb_cb;
@@ -937,9 +935,6 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
                tlb_cb = NULL;
        }
 
-       if (table_freed)
-               *table_freed = *table_freed || params.table_freed;
-
 error_free:
        kfree(tlb_cb);
 
@@ -999,7 +994,6 @@ void amdgpu_vm_get_memory(struct amdgpu_vm *vm, uint64_t *vram_mem,
  * @adev: amdgpu_device pointer
  * @bo_va: requested BO and VM object
  * @clear: if true clear the entries
- * @table_freed: return true if page table is freed
  *
  * Fill in the page table entries for @bo_va.
  *
@@ -1007,7 +1001,7 @@ void amdgpu_vm_get_memory(struct amdgpu_vm *vm, uint64_t *vram_mem,
  * 0 for success, -EINVAL for failure.
  */
 int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
-                       bool clear, bool *table_freed)
+                       bool clear)
 {
        struct amdgpu_bo *bo = bo_va->base.bo;
        struct amdgpu_vm *vm = bo_va->base.vm;
@@ -1086,7 +1080,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
                                                resv, mapping->start,
                                                mapping->last, update_flags,
                                                mapping->offset, mem,
-                                               pages_addr, last_update, table_freed);
+                                               pages_addr, last_update);
                if (r)
                        return r;
        }
@@ -1280,7 +1274,7 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
                r = amdgpu_vm_bo_update_mapping(adev, adev, vm, false, false,
                                                resv, mapping->start,
                                                mapping->last, init_pte_value,
-                                               0, NULL, NULL, &f, NULL);
+                                               0, NULL, NULL, &f);
                amdgpu_vm_free_mapping(adev, vm, mapping, f);
                if (r) {
                        dma_fence_put(f);
@@ -1322,7 +1316,7 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
 
        list_for_each_entry_safe(bo_va, tmp, &vm->moved, base.vm_status) {
                /* Per VM BOs never need to bo cleared in the page tables */
-               r = amdgpu_vm_bo_update(adev, bo_va, false, NULL);
+               r = amdgpu_vm_bo_update(adev, bo_va, false);
                if (r)
                        return r;
        }
@@ -1341,7 +1335,7 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
                else
                        clear = true;
 
-               r = amdgpu_vm_bo_update(adev, bo_va, clear, NULL);
+               r = amdgpu_vm_bo_update(adev, bo_va, clear);
                if (r)
                        return r;
 
@@ -2525,8 +2519,7 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
        }
 
        r = amdgpu_vm_bo_update_mapping(adev, adev, vm, true, false, NULL, addr,
-                                       addr, flags, value, NULL, NULL, NULL,
-                                       NULL);
+                                       addr, flags, value, NULL, NULL, NULL);
        if (r)
                goto error_unlock;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 38a1eab1ff74..6b7682fe84f8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -410,10 +410,10 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
                                uint64_t flags, uint64_t offset,
                                struct ttm_resource *res,
                                dma_addr_t *pages_addr,
-                               struct dma_fence **fence, bool *free_table);
+                               struct dma_fence **fence);
 int amdgpu_vm_bo_update(struct amdgpu_device *adev,
                        struct amdgpu_bo_va *bo_va,
-                       bool clear, bool *table_freed);
+                       bool clear);
 bool amdgpu_vm_evictable(struct amdgpu_bo *bo);
 void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
                             struct amdgpu_bo *bo, bool evicted);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 509d915cbe69..1cb0d811dde0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1190,7 +1190,7 @@ svm_range_unmap_from_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 
        return amdgpu_vm_bo_update_mapping(adev, adev, vm, false, true, NULL,
                                           start, last, init_pte_value, 0,
-                                          NULL, NULL, fence, NULL);
+                                          NULL, NULL, fence);
 }
 
 static int
@@ -1283,8 +1283,7 @@ svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange,
                                                prange->start + i, pte_flags,
                                                last_start - prange->start,
                                                NULL, dma_addr,
-                                               &vm->last_update,
-                                               &table_freed);
+                                               &vm->last_update);
 
                for (j = last_start - prange->start; j <= i; j++)
                        dma_addr[j] |= last_domain;
</pre>
    </blockquote>
    <p>You already pointed out this bug, to flush TLB after waiting for
      vm update done. The below fix can be merged to this patch.</p>
    <p>SVM is missing one optimization for mGPUs (Felix pointer out this
      before), add fence of mGPUs to a sync, then wait for sync, instead
      of waiting for fence of each GPU. This can be added later.<br>
    </p>
    <p>--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c</p>
    +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c<br>
    @@ -1243,7 +1243,6 @@ svm_range_map_to_gpu(struct kfd_process_device
    *pdd, struct svm_range *prange,<br>
     {<br>
            struct amdgpu_device *adev = pdd->dev->adev;<br>
            struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv);<br>
    -       bool table_freed = false;<br>
            uint64_t pte_flags;<br>
            unsigned long last_start;<br>
            int last_domain;<br>
    @@ -1305,8 +1304,6 @@ svm_range_map_to_gpu(struct kfd_process_device
    *pdd, struct svm_range *prange,<br>
            if (fence)<br>
                    *fence = dma_fence_get(vm->last_update);<br>
     <br>
    -       if (table_freed)<br>
    -               kfd_flush_tlb(pdd, TLB_FLUSH_LEGACY);<br>
     out:<br>
            return r;<br>
     }<br>
    @@ -1362,6 +1359,8 @@ svm_range_map_to_gpus(struct svm_range
    *prange, unsigned long offset,<br>
                                    break;<br>
                            }<br>
                    }<br>
    +<br>
    +               kfd_flush_tlb(pdd, TLB_FLUSH_LEGACY);<br>
            }<br>
     <br>
            return r;<br>
    <br>
  </body>
</html>