<html><head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
</head>
<body>
<p><br>
</p>
<div class="moz-cite-prefix">On 2022-03-17 9:50 a.m., Christian
König wrote:<br>
</div>
<blockquote type="cite" cite="mid:20220317135044.2099-7-christian.koenig@amd.com">
<pre class="moz-quote-pre" wrap="">Better to leave the decision when to flush the VM changes in the TLB to
the VM code.
Signed-off-by: Christian König <a class="moz-txt-link-rfc2396E" href="mailto:christian.koenig@amd.com"><christian.koenig@amd.com></a>
---
.../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 2 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 6 +++---
drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 2 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 21 +++++++------------
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 4 ++--
drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 5 ++---
6 files changed, 16 insertions(+), 24 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 57b521bb1eec..8b14c55a0793 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1104,7 +1104,7 @@ static int update_gpuvm_pte(struct kgd_mem *mem,
return ret;
/* Update the page tables */
- ret = amdgpu_vm_bo_update(adev, bo_va, false, NULL);
+ ret = amdgpu_vm_bo_update(adev, bo_va, false);
if (ret) {
pr_err("amdgpu_vm_bo_update failed\n");
return ret;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 2d4a89fb264e..62518c7b31c4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -806,7 +806,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
if (r)
return r;
- r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false, NULL);
+ r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false);
if (r)
return r;
@@ -817,7 +817,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
bo_va = fpriv->csa_va;
BUG_ON(!bo_va);
- r = amdgpu_vm_bo_update(adev, bo_va, false, NULL);
+ r = amdgpu_vm_bo_update(adev, bo_va, false);
if (r)
return r;
@@ -836,7 +836,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
if (bo_va == NULL)
continue;
- r = amdgpu_vm_bo_update(adev, bo_va, false, NULL);
+ r = amdgpu_vm_bo_update(adev, bo_va, false);
if (r)
return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index bab6500728cb..2e16484bf606 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -618,7 +618,7 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
if (operation == AMDGPU_VA_OP_MAP ||
operation == AMDGPU_VA_OP_REPLACE) {
- r = amdgpu_vm_bo_update(adev, bo_va, false, NULL);
+ r = amdgpu_vm_bo_update(adev, bo_va, false);
if (r)
goto error;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index a43302a86254..c29b12fec863 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -808,7 +808,6 @@ static void amdgpu_vm_tlb_seq_cb(struct dma_fence *fence,
* @res: ttm_resource to map
* @pages_addr: DMA addresses to use for mapping
* @fence: optional resulting fence
- * @table_freed: return true if page table is freed
*
* Fill in the page table entries between @start and @last.
*
@@ -823,8 +822,7 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
uint64_t flags, uint64_t offset,
struct ttm_resource *res,
dma_addr_t *pages_addr,
- struct dma_fence **fence,
- bool *table_freed)
+ struct dma_fence **fence)
{
struct amdgpu_vm_update_params params;
struct amdgpu_vm_tlb_seq_cb *tlb_cb;
@@ -937,9 +935,6 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
tlb_cb = NULL;
}
- if (table_freed)
- *table_freed = *table_freed || params.table_freed;
-
error_free:
kfree(tlb_cb);
@@ -999,7 +994,6 @@ void amdgpu_vm_get_memory(struct amdgpu_vm *vm, uint64_t *vram_mem,
* @adev: amdgpu_device pointer
* @bo_va: requested BO and VM object
* @clear: if true clear the entries
- * @table_freed: return true if page table is freed
*
* Fill in the page table entries for @bo_va.
*
@@ -1007,7 +1001,7 @@ void amdgpu_vm_get_memory(struct amdgpu_vm *vm, uint64_t *vram_mem,
* 0 for success, -EINVAL for failure.
*/
int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
- bool clear, bool *table_freed)
+ bool clear)
{
struct amdgpu_bo *bo = bo_va->base.bo;
struct amdgpu_vm *vm = bo_va->base.vm;
@@ -1086,7 +1080,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
resv, mapping->start,
mapping->last, update_flags,
mapping->offset, mem,
- pages_addr, last_update, table_freed);
+ pages_addr, last_update);
if (r)
return r;
}
@@ -1280,7 +1274,7 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
r = amdgpu_vm_bo_update_mapping(adev, adev, vm, false, false,
resv, mapping->start,
mapping->last, init_pte_value,
- 0, NULL, NULL, &f, NULL);
+ 0, NULL, NULL, &f);
amdgpu_vm_free_mapping(adev, vm, mapping, f);
if (r) {
dma_fence_put(f);
@@ -1322,7 +1316,7 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
list_for_each_entry_safe(bo_va, tmp, &vm->moved, base.vm_status) {
/* Per VM BOs never need to bo cleared in the page tables */
- r = amdgpu_vm_bo_update(adev, bo_va, false, NULL);
+ r = amdgpu_vm_bo_update(adev, bo_va, false);
if (r)
return r;
}
@@ -1341,7 +1335,7 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
else
clear = true;
- r = amdgpu_vm_bo_update(adev, bo_va, clear, NULL);
+ r = amdgpu_vm_bo_update(adev, bo_va, clear);
if (r)
return r;
@@ -2525,8 +2519,7 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
}
r = amdgpu_vm_bo_update_mapping(adev, adev, vm, true, false, NULL, addr,
- addr, flags, value, NULL, NULL, NULL,
- NULL);
+ addr, flags, value, NULL, NULL, NULL);
if (r)
goto error_unlock;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 38a1eab1ff74..6b7682fe84f8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -410,10 +410,10 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
uint64_t flags, uint64_t offset,
struct ttm_resource *res,
dma_addr_t *pages_addr,
- struct dma_fence **fence, bool *free_table);
+ struct dma_fence **fence);
int amdgpu_vm_bo_update(struct amdgpu_device *adev,
struct amdgpu_bo_va *bo_va,
- bool clear, bool *table_freed);
+ bool clear);
bool amdgpu_vm_evictable(struct amdgpu_bo *bo);
void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
struct amdgpu_bo *bo, bool evicted);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 509d915cbe69..1cb0d811dde0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1190,7 +1190,7 @@ svm_range_unmap_from_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
return amdgpu_vm_bo_update_mapping(adev, adev, vm, false, true, NULL,
start, last, init_pte_value, 0,
- NULL, NULL, fence, NULL);
+ NULL, NULL, fence);
}
static int
@@ -1283,8 +1283,7 @@ svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange,
prange->start + i, pte_flags,
last_start - prange->start,
NULL, dma_addr,
- &vm->last_update,
- &table_freed);
+ &vm->last_update);
for (j = last_start - prange->start; j <= i; j++)
dma_addr[j] |= last_domain;
</pre>
</blockquote>
<p>You already pointed out this bug, to flush TLB after waiting for
vm update done. The below fix can be merged to this patch.</p>
<p>SVM is missing one optimization for mGPUs (Felix pointer out this
before), add fence of mGPUs to a sync, then wait for sync, instead
of waiting for fence of each GPU. This can be added later.<br>
</p>
<p>--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c</p>
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c<br>
@@ -1243,7 +1243,6 @@ svm_range_map_to_gpu(struct kfd_process_device
*pdd, struct svm_range *prange,<br>
{<br>
struct amdgpu_device *adev = pdd->dev->adev;<br>
struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv);<br>
- bool table_freed = false;<br>
uint64_t pte_flags;<br>
unsigned long last_start;<br>
int last_domain;<br>
@@ -1305,8 +1304,6 @@ svm_range_map_to_gpu(struct kfd_process_device
*pdd, struct svm_range *prange,<br>
if (fence)<br>
*fence = dma_fence_get(vm->last_update);<br>
<br>
- if (table_freed)<br>
- kfd_flush_tlb(pdd, TLB_FLUSH_LEGACY);<br>
out:<br>
return r;<br>
}<br>
@@ -1362,6 +1359,8 @@ svm_range_map_to_gpus(struct svm_range
*prange, unsigned long offset,<br>
break;<br>
}<br>
}<br>
+<br>
+ kfd_flush_tlb(pdd, TLB_FLUSH_LEGACY);<br>
}<br>
<br>
return r;<br>
<br>
</body>
</html>