[PATCH] drm/amdkfd: Add memory sync before TLB flush on unmap

Eric Huang jinhuieric.huang at amd.com
Thu Jun 10 19:00:09 UTC 2021


It is to fix a failure for SDMA updating PTEs.

Signed-off-by: Eric Huang <jinhuieric.huang at amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 26 +++++++++++++++++++++---
 1 file changed, 23 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 98f1d2b586c5..17d0a318b708 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1766,14 +1766,33 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
 			amdgpu_read_unlock(peer->ddev);
 			goto unmap_memory_from_gpu_failed;
 		}
-		kfd_flush_tlb(peer_pdd, TLB_FLUSH_HEAVYWEIGHT);
 		amdgpu_read_unlock(peer->ddev);
 		args->n_success = i+1;
 	}
-	kfree(devices_arr);
-
 	mutex_unlock(&p->mutex);
 
+	err = amdgpu_amdkfd_gpuvm_sync_memory(dev->kgd, (struct kgd_mem *) mem, true);
+	if (err) {
+		pr_debug("Sync memory failed, wait interrupted by user signal\n");
+		goto sync_memory_failed;
+	}
+
+	/* Flush TLBs after waiting for the page table updates to complete */
+	for (i = 0; i < args->n_devices; i++) {
+		peer = kfd_device_by_id(devices_arr[i]);
+		if (WARN_ON_ONCE(!peer))
+			continue;
+		peer_pdd = kfd_get_process_device_data(peer, p);
+		if (WARN_ON_ONCE(!peer_pdd))
+			continue;
+		if (!amdgpu_read_lock(peer->ddev, true)) {
+			kfd_flush_tlb(peer_pdd, TLB_FLUSH_HEAVYWEIGHT);
+			amdgpu_read_unlock(peer->ddev);
+		}
+	}
+
+	kfree(devices_arr);
+
 	return 0;
 
 bind_process_to_device_failed:
@@ -1781,6 +1800,7 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
 unmap_memory_from_gpu_failed:
 	mutex_unlock(&p->mutex);
 copy_from_user_failed:
+sync_memory_failed:
 	kfree(devices_arr);
 	return err;
 }
-- 
2.25.1



More information about the amd-gfx mailing list