[PATCH 13/35] drm/amdkfd: map svm range to GPUs

Felix Kuehling Felix.Kuehling at amd.com
Thu Jan 7 03:01:05 UTC 2021


From: Philip Yang <Philip.Yang at amd.com>

Use amdgpu_vm_bo_update_mapping to update GPU page table to map or unmap
svm range system memory pages address to GPUs.

Signed-off-by: Philip Yang <Philip.Yang at amd.com>
Signed-off-by: Alex Sierra <alex.sierra at amd.com>
Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 232 ++++++++++++++++++++++++++-
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h |   2 +
 2 files changed, 233 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 55500ec4972f..3c4a036609c4 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -534,6 +534,229 @@ svm_range_split_add_front(struct svm_range *prange, struct svm_range *new,
 	return 0;
 }
 
+static uint64_t
+svm_range_get_pte_flags(struct amdgpu_device *adev, struct svm_range *prange)
+{
+	uint32_t flags = prange->flags;
+	uint32_t mapping_flags;
+	uint64_t pte_flags;
+
+	pte_flags = AMDGPU_PTE_VALID;
+	pte_flags |= AMDGPU_PTE_SYSTEM | AMDGPU_PTE_SNOOPED;
+
+	mapping_flags = AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE;
+
+	if (flags & KFD_IOCTL_SVM_FLAG_GPU_RO)
+		mapping_flags &= ~AMDGPU_VM_PAGE_WRITEABLE;
+	if (flags & KFD_IOCTL_SVM_FLAG_GPU_EXEC)
+		mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE;
+	if (flags & KFD_IOCTL_SVM_FLAG_COHERENT)
+		mapping_flags |= AMDGPU_VM_MTYPE_UC;
+	else
+		mapping_flags |= AMDGPU_VM_MTYPE_NC;
+
+	/* TODO: add CHIP_ARCTURUS new flags for vram mapping */
+
+	pte_flags |= amdgpu_gem_va_map_flags(adev, mapping_flags);
+
+	/* Apply ASIC specific mapping flags */
+	amdgpu_gmc_get_vm_pte(adev, &prange->mapping, &pte_flags);
+
+	pr_debug("PTE flags 0x%llx\n", pte_flags);
+
+	return pte_flags;
+}
+
+static int
+svm_range_unmap_from_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+			 struct svm_range *prange, struct dma_fence **fence)
+{
+	uint64_t init_pte_value = 0;
+	uint64_t start;
+	uint64_t last;
+
+	start = prange->it_node.start;
+	last = prange->it_node.last;
+
+	pr_debug("svms 0x%p [0x%llx 0x%llx]\n", prange->svms, start, last);
+
+	return amdgpu_vm_bo_update_mapping(adev, adev, vm, false, true, NULL,
+					   start, last, init_pte_value, 0,
+					   NULL, NULL, fence);
+}
+
+static int
+svm_range_unmap_from_gpus(struct svm_range *prange)
+{
+	DECLARE_BITMAP(bitmap, MAX_GPU_INSTANCE);
+	struct kfd_process_device *pdd;
+	struct dma_fence *fence = NULL;
+	struct amdgpu_device *adev;
+	struct kfd_process *p;
+	struct kfd_dev *dev;
+	uint32_t gpuidx;
+	int r = 0;
+
+	bitmap_or(bitmap, prange->bitmap_access, prange->bitmap_aip,
+		  MAX_GPU_INSTANCE);
+	p = container_of(prange->svms, struct kfd_process, svms);
+
+	for_each_set_bit(gpuidx, bitmap, MAX_GPU_INSTANCE) {
+		pr_debug("unmap from gpu idx 0x%x\n", gpuidx);
+		r = kfd_process_device_from_gpuidx(p, gpuidx, &dev);
+		if (r) {
+			pr_debug("failed to find device idx %d\n", gpuidx);
+			return -EINVAL;
+		}
+
+		pdd = kfd_bind_process_to_device(dev, p);
+		if (IS_ERR(pdd))
+			return -EINVAL;
+
+		adev = (struct amdgpu_device *)dev->kgd;
+
+		r = svm_range_unmap_from_gpu(adev, pdd->vm, prange, &fence);
+		if (r)
+			break;
+
+		if (fence) {
+			r = dma_fence_wait(fence, false);
+			dma_fence_put(fence);
+			fence = NULL;
+			if (r)
+				break;
+		}
+
+		amdgpu_amdkfd_flush_gpu_tlb_pasid((struct kgd_dev *)adev,
+						  p->pasid);
+	}
+
+	return r;
+}
+
+static int svm_range_bo_validate(void *param, struct amdgpu_bo *bo)
+{
+	struct ttm_operation_ctx ctx = { false, false };
+
+	amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM);
+
+	return ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+}
+
+static int
+svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+		     struct svm_range *prange, bool reserve_vm,
+		     struct dma_fence **fence)
+{
+	struct amdgpu_bo *root;
+	dma_addr_t *pages_addr;
+	uint64_t pte_flags;
+	int r = 0;
+
+	pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms,
+		 prange->it_node.start, prange->it_node.last);
+
+	if (reserve_vm) {
+		root = amdgpu_bo_ref(vm->root.base.bo);
+		r = amdgpu_bo_reserve(root, true);
+		if (r) {
+			pr_debug("failed %d to reserve root bo\n", r);
+			amdgpu_bo_unref(&root);
+			goto out;
+		}
+		r = amdgpu_vm_validate_pt_bos(adev, vm, svm_range_bo_validate,
+					      NULL);
+		if (r) {
+			pr_debug("failed %d validate pt bos\n", r);
+			goto unreserve_out;
+		}
+	}
+
+	prange->mapping.start = prange->it_node.start;
+	prange->mapping.last = prange->it_node.last;
+	prange->mapping.offset = 0;
+	pte_flags = svm_range_get_pte_flags(adev, prange);
+	prange->mapping.flags = pte_flags;
+	pages_addr = prange->pages_addr;
+
+	r = amdgpu_vm_bo_update_mapping(adev, adev, vm, false, false, NULL,
+					prange->mapping.start,
+					prange->mapping.last, pte_flags,
+					prange->mapping.offset, NULL,
+					pages_addr, &vm->last_update);
+	if (r) {
+		pr_debug("failed %d to map to gpu 0x%lx\n", r,
+			 prange->it_node.start);
+		goto unreserve_out;
+	}
+
+
+	r = amdgpu_vm_update_pdes(adev, vm, false);
+	if (r) {
+		pr_debug("failed %d to update directories 0x%lx\n", r,
+			 prange->it_node.start);
+		goto unreserve_out;
+	}
+
+	if (fence)
+		*fence = dma_fence_get(vm->last_update);
+
+unreserve_out:
+	if (reserve_vm) {
+		amdgpu_bo_unreserve(root);
+		amdgpu_bo_unref(&root);
+	}
+
+out:
+	return r;
+}
+
+static int svm_range_map_to_gpus(struct svm_range *prange, bool reserve_vm)
+{
+	DECLARE_BITMAP(bitmap, MAX_GPU_INSTANCE);
+	struct kfd_process_device *pdd;
+	struct amdgpu_device *adev;
+	struct kfd_process *p;
+	struct kfd_dev *dev;
+	struct dma_fence *fence = NULL;
+	uint32_t gpuidx;
+	int r = 0;
+
+	bitmap_or(bitmap, prange->bitmap_access, prange->bitmap_aip,
+		  MAX_GPU_INSTANCE);
+	p = container_of(prange->svms, struct kfd_process, svms);
+
+	for_each_set_bit(gpuidx, bitmap, MAX_GPU_INSTANCE) {
+		r = kfd_process_device_from_gpuidx(p, gpuidx, &dev);
+		if (r) {
+			pr_debug("failed to find device idx %d\n", gpuidx);
+			return -EINVAL;
+		}
+
+		pdd = kfd_bind_process_to_device(dev, p);
+		if (IS_ERR(pdd))
+			return -EINVAL;
+		adev = (struct amdgpu_device *)dev->kgd;
+
+		r = svm_range_map_to_gpu(adev, pdd->vm, prange, reserve_vm,
+					 &fence);
+		if (r)
+			break;
+
+		if (fence) {
+			r = dma_fence_wait(fence, false);
+			dma_fence_put(fence);
+			fence = NULL;
+			if (r) {
+				pr_debug("failed %d to dma fence wait\n", r);
+				break;
+			}
+		}
+	}
+
+	return r;
+}
+
 struct svm_range *svm_range_clone(struct svm_range *old)
 {
 	struct svm_range *new;
@@ -750,6 +973,7 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, unsigned long start,
 	 */
 	list_for_each_entry_safe(prange, tmp, &update_list, update_list) {
 		list_del(&prange->list);
+		svm_range_unmap_from_gpus(prange);
 		mutex_lock(&svms->free_list_lock);
 		list_add_tail(&prange->remove_list, &svms->free_list);
 		mutex_unlock(&svms->free_list_lock);
@@ -991,8 +1215,14 @@ svm_range_set_attr(struct kfd_process *p, uint64_t start, uint64_t size,
 		}
 
 		r = svm_range_validate(mm, prange);
-		if (r)
+		if (r) {
 			pr_debug("failed %d to validate svm range\n", r);
+			goto out_unlock;
+		}
+
+		r = svm_range_map_to_gpus(prange, true);
+		if (r)
+			pr_debug("failed %d to map svm range\n", r);
 
 out_unlock:
 		if (r) {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
index 4d394f72eefc..fb68b5ee54f8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
@@ -42,6 +42,7 @@
  * @update_list:link list node used to add to update_list
  * @remove_list:link list node used to add to remove list
  * @hmm_range:  hmm range structure used by hmm_range_fault to get system pages
+ * @mapping:    bo_va mapping structure to create and update GPU page table
  * @npages:     number of pages
  * @pages_addr: list of system memory physical page address
  * @flags:      flags defined as KFD_IOCTL_SVM_FLAG_*
@@ -63,6 +64,7 @@ struct svm_range {
 	struct list_head		update_list;
 	struct list_head		remove_list;
 	struct hmm_range		*hmm_range;
+	struct amdgpu_bo_va_mapping	mapping;
 	uint64_t			npages;
 	dma_addr_t			*pages_addr;
 	uint32_t			flags;
-- 
2.29.2



More information about the dri-devel mailing list