[PATCH v2] drm/amdkfd: Use partial hmm page walk during buffer validation in SVM

Xiaogang.Chen xiaogang.chen at amd.com
Mon Dec 4 20:23:10 UTC 2023


From: Xiaogang Chen <xiaogang.chen at amd.com>

v2:
-not need calculate vram page number for new registered svm range, only
do it for split vram pages.

SVM uses hmm page walk to valid buffer before map to gpu vm. After have partial
migration/mapping do validation on same vm range as migration/map do instead of
whole svm range that can be very large. This change is expected to improve svm
code performance.

Signed-off-by: Xiaogang Chen<Xiaogang.Chen at amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 149 ++++++++++++++++++++-------
 1 file changed, 109 insertions(+), 40 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 2834fb351818..2f14cd1a3416 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -158,13 +158,12 @@ svm_is_valid_dma_mapping_addr(struct device *dev, dma_addr_t dma_addr)
 static int
 svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange,
 		      unsigned long offset, unsigned long npages,
-		      unsigned long *hmm_pfns, uint32_t gpuidx, uint64_t *vram_pages)
+		      unsigned long *hmm_pfns, uint32_t gpuidx)
 {
 	enum dma_data_direction dir = DMA_BIDIRECTIONAL;
 	dma_addr_t *addr = prange->dma_addr[gpuidx];
 	struct device *dev = adev->dev;
 	struct page *page;
-	uint64_t vram_pages_dev;
 	int i, r;
 
 	if (!addr) {
@@ -174,7 +173,6 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange,
 		prange->dma_addr[gpuidx] = addr;
 	}
 
-	vram_pages_dev = 0;
 	addr += offset;
 	for (i = 0; i < npages; i++) {
 		if (svm_is_valid_dma_mapping_addr(dev, addr[i]))
@@ -184,7 +182,6 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange,
 		if (is_zone_device_page(page)) {
 			struct amdgpu_device *bo_adev = prange->svm_bo->node->adev;
 
-			vram_pages_dev++;
 			addr[i] = (hmm_pfns[i] << PAGE_SHIFT) +
 				   bo_adev->vm_manager.vram_base_offset -
 				   bo_adev->kfd.pgmap.range.start;
@@ -201,14 +198,14 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange,
 		pr_debug_ratelimited("dma mapping 0x%llx for page addr 0x%lx\n",
 				     addr[i] >> PAGE_SHIFT, page_to_pfn(page));
 	}
-	*vram_pages = vram_pages_dev;
+
 	return 0;
 }
 
 static int
 svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap,
 		  unsigned long offset, unsigned long npages,
-		  unsigned long *hmm_pfns, uint64_t *vram_pages)
+		  unsigned long *hmm_pfns)
 {
 	struct kfd_process *p;
 	uint32_t gpuidx;
@@ -227,7 +224,7 @@ svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap,
 		}
 
 		r = svm_range_dma_map_dev(pdd->dev->adev, prange, offset, npages,
-					  hmm_pfns, gpuidx, vram_pages);
+					  hmm_pfns, gpuidx);
 		if (r)
 			break;
 	}
@@ -982,11 +979,6 @@ svm_range_split_nodes(struct svm_range *new, struct svm_range *old,
 	new->svm_bo = svm_range_bo_ref(old->svm_bo);
 	new->ttm_res = old->ttm_res;
 
-	/* set new's vram_pages as old range's now, the acurate vram_pages
-	 * will be updated during mapping
-	 */
-	new->vram_pages = min(old->vram_pages, new->npages);
-
 	spin_lock(&new->svm_bo->list_lock);
 	list_add(&new->svm_bo_list, &new->svm_bo->range_list);
 	spin_unlock(&new->svm_bo->list_lock);
@@ -1107,9 +1099,9 @@ svm_range_split(struct svm_range *prange, uint64_t start, uint64_t last,
 
 static int
 svm_range_split_tail(struct svm_range *prange, uint64_t new_last,
-		     struct list_head *insert_list, struct list_head *remap_list)
+		     struct list_head *insert_list, struct list_head *remap_list,
+		     struct svm_range *tail)
 {
-	struct svm_range *tail;
 	int r = svm_range_split(prange, prange->start, new_last, &tail);
 
 	if (!r) {
@@ -1122,9 +1114,9 @@ svm_range_split_tail(struct svm_range *prange, uint64_t new_last,
 
 static int
 svm_range_split_head(struct svm_range *prange, uint64_t new_start,
-		     struct list_head *insert_list, struct list_head *remap_list)
+		     struct list_head *insert_list, struct list_head *remap_list,
+		     struct svm_range *head)
 {
-	struct svm_range *head;
 	int r = svm_range_split(prange, new_start, prange->last, &head);
 
 	if (!r) {
@@ -1573,7 +1565,6 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
 	struct svm_validate_context *ctx;
 	unsigned long start, end, addr;
 	struct kfd_process *p;
-	uint64_t vram_pages;
 	void *owner;
 	int32_t idx;
 	int r = 0;
@@ -1642,15 +1633,13 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
 		}
 	}
 
-	vram_pages = 0;
-	start = prange->start << PAGE_SHIFT;
-	end = (prange->last + 1) << PAGE_SHIFT;
+	start = map_start << PAGE_SHIFT;
+	end = (map_last + 1) << PAGE_SHIFT;
 	for (addr = start; !r && addr < end; ) {
 		struct hmm_range *hmm_range;
 		unsigned long map_start_vma;
 		unsigned long map_last_vma;
 		struct vm_area_struct *vma;
-		uint64_t vram_pages_vma;
 		unsigned long next = 0;
 		unsigned long offset;
 		unsigned long npages;
@@ -1677,13 +1666,11 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
 		}
 
 		if (!r) {
-			offset = (addr - start) >> PAGE_SHIFT;
+			offset = (addr - (prange->start << PAGE_SHIFT)) >> PAGE_SHIFT;
 			r = svm_range_dma_map(prange, ctx->bitmap, offset, npages,
-					      hmm_range->hmm_pfns, &vram_pages_vma);
+					      hmm_range->hmm_pfns);
 			if (r)
 				pr_debug("failed %d to dma map range\n", r);
-			else
-				vram_pages += vram_pages_vma;
 		}
 
 		svm_range_lock(prange);
@@ -1716,19 +1703,6 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
 		addr = next;
 	}
 
-	if (addr == end) {
-		prange->vram_pages = vram_pages;
-
-		/* if prange does not include any vram page and it
-		 * has not released svm_bo drop its svm_bo reference
-		 * and set its actaul_loc to sys ram
-		 */
-		if (!vram_pages && prange->ttm_res) {
-			prange->actual_loc = 0;
-			svm_range_vram_node_free(prange);
-		}
-	}
-
 	svm_range_unreserve_bos(ctx);
 	if (!r)
 		prange->validate_timestamp = ktime_get_boottime();
@@ -2037,6 +2011,81 @@ svm_range_split_new(struct svm_range_list *svms, uint64_t start, uint64_t last,
 	return 0;
 }
 
+static int
+svm_range_vram_pages(struct svm_range *prange)
+{
+	unsigned long start, end, addr;
+	struct svm_range_list *svms;
+	struct kfd_process *p;
+	struct mm_struct *mm;
+	struct page *page;
+	int32_t gpuidx;
+	void *owner;
+	int r = 0;
+
+	prange->vram_pages = 0;
+	svms = prange->svms;
+	p = container_of(svms, struct kfd_process, svms);
+	mm = get_task_mm(p->lead_thread);
+	if (!mm) {
+		pr_debug("svms 0x%p process mm gone\n", svms);
+		return -ESRCH;
+	}
+
+	/* prange->actual_loc should not be 0 here */
+	gpuidx = kfd_process_gpuidx_from_gpuid(p, prange->actual_loc);
+	if (gpuidx < 0) {
+		pr_debug("failed get device by id 0x%x\n", prange->actual_loc);
+		return -EINVAL;
+	}
+	owner = kfd_svm_page_owner(p, gpuidx);
+
+	start = prange->start << PAGE_SHIFT;
+	end = (prange->last + 1) << PAGE_SHIFT;
+	for (addr = start; addr < end; ) {
+		struct hmm_range *hmm_range;
+		struct vm_area_struct *vma;
+		unsigned long next = 0;
+		unsigned long npages;
+		bool readonly;
+
+		vma = vma_lookup(mm, addr);
+		if (!vma) {
+			mmput(mm);
+			return -EFAULT;
+		}
+
+		readonly = !(vma->vm_flags & VM_WRITE);
+		next = min(vma->vm_end, end);
+		npages = (next - addr) >> PAGE_SHIFT;
+		r = amdgpu_hmm_range_get_pages(&prange->notifier, addr, npages,
+					       readonly, owner, NULL,
+					       &hmm_range);
+		if (r) {
+			pr_debug("failed %d to get svm range pages\n", r);
+			mmput(mm);
+			return r;
+		}
+
+		for (int i = 0; i < npages; i++) {
+			page = hmm_pfn_to_page(hmm_range->hmm_pfns[i]);
+			if (is_zone_device_page(page))
+				prange->vram_pages++;
+		}
+
+		if (amdgpu_hmm_range_get_pages_done(hmm_range)) {
+			pr_debug("hmm update the range, need validate again\n");
+			mmput(mm);
+			return -EAGAIN;
+		}
+
+		addr = next;
+	}
+
+	mmput(mm);
+	return 0;
+}
+
 /**
  * svm_range_add - add svm range and handle overlap
  * @p: the range add to this process svms
@@ -2109,6 +2158,7 @@ svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size,
 			 * will change. Clone and split it, apply updates only
 			 * to the overlapping part
 			 */
+			struct svm_range *head, *tail;
 			struct svm_range *old = prange;
 
 			prange = svm_range_clone(old);
@@ -2123,18 +2173,37 @@ svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size,
 
 			if (node->start < start) {
 				pr_debug("change old range start\n");
+				head = NULL;
 				r = svm_range_split_head(prange, start,
-							 insert_list, remap_list);
+							 insert_list, remap_list, head);
 				if (r)
 					goto out;
 			}
 			if (node->last > last) {
 				pr_debug("change old range last\n");
+				tail = NULL;
 				r = svm_range_split_tail(prange, last,
-							 insert_list, remap_list);
+							 insert_list, remap_list, tail);
 				if (r)
 					goto out;
 			}
+			/* cal each inserted svn pragen vram_pages */
+			if (prange->actual_loc && prange->ttm_res) {
+
+				if (head) {
+					r = svm_range_vram_pages(head);
+					if (r)
+						return r;
+					prange->vram_pages = prange->vram_pages  - head->vram_pages;
+				}
+
+				if (tail) {
+					r = svm_range_vram_pages(tail);
+					if (r)
+						return  r;
+					prange->vram_pages = prange->vram_pages - tail->vram_pages;
+				}
+			}
 		} else {
 			/* The node is contained within start..last,
 			 * just update it
-- 
2.25.1



More information about the amd-gfx mailing list