[PATCH] drm/amdkfd: Add memory sync before TLB flush on unmap
Eric Huang
jinhuieric.huang at amd.com
Thu Jun 10 19:00:09 UTC 2021
It is to fix a failure for SDMA updating PTEs.
Signed-off-by: Eric Huang <jinhuieric.huang at amd.com>
---
drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 26 +++++++++++++++++++++---
1 file changed, 23 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 98f1d2b586c5..17d0a318b708 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1766,14 +1766,33 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
amdgpu_read_unlock(peer->ddev);
goto unmap_memory_from_gpu_failed;
}
- kfd_flush_tlb(peer_pdd, TLB_FLUSH_HEAVYWEIGHT);
amdgpu_read_unlock(peer->ddev);
args->n_success = i+1;
}
- kfree(devices_arr);
-
mutex_unlock(&p->mutex);
+ err = amdgpu_amdkfd_gpuvm_sync_memory(dev->kgd, (struct kgd_mem *) mem, true);
+ if (err) {
+ pr_debug("Sync memory failed, wait interrupted by user signal\n");
+ goto sync_memory_failed;
+ }
+
+ /* Flush TLBs after waiting for the page table updates to complete */
+ for (i = 0; i < args->n_devices; i++) {
+ peer = kfd_device_by_id(devices_arr[i]);
+ if (WARN_ON_ONCE(!peer))
+ continue;
+ peer_pdd = kfd_get_process_device_data(peer, p);
+ if (WARN_ON_ONCE(!peer_pdd))
+ continue;
+ if (!amdgpu_read_lock(peer->ddev, true)) {
+ kfd_flush_tlb(peer_pdd, TLB_FLUSH_HEAVYWEIGHT);
+ amdgpu_read_unlock(peer->ddev);
+ }
+ }
+
+ kfree(devices_arr);
+
return 0;
bind_process_to_device_failed:
@@ -1781,6 +1800,7 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
unmap_memory_from_gpu_failed:
mutex_unlock(&p->mutex);
copy_from_user_failed:
+sync_memory_failed:
kfree(devices_arr);
return err;
}
--
2.25.1
More information about the amd-gfx
mailing list