[v2 18/31] drm/xe/svm: Build userptr sg table for device pages

Tue Apr 9 20:17:29 UTC 2024

Previously function xe_build_sg only support userptr with system
memory pages. Now this function is extended to support userptr
with device pages backing as well.

For device pages, there is no need of dma-mapping. Instead, we
calculated the device page's dpa (device physical address) and
use dpa to fill sg table.

As of now, we assume each userptr is only backed either by all
system memory pages or all by device pages. There is no support
of mixture backing of device and system memory pages.

Signed-off-by: Oak Zeng <oak.zeng at intel.com>
---
 drivers/gpu/drm/xe/xe_hmm.c      | 121 +++++++++++++++++++++++++------
 drivers/gpu/drm/xe/xe_vm_types.h |   2 +
 2 files changed, 100 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_hmm.c b/drivers/gpu/drm/xe/xe_hmm.c
index 427c6bc49949..a261c1dd2060 100644
--- a/drivers/gpu/drm/xe/xe_hmm.c
+++ b/drivers/gpu/drm/xe/xe_hmm.c
@@ -11,6 +11,7 @@
 #include <linux/hmm.h>
 #include <linux/mm.h>
 #include "xe_hmm.h"
+#include "xe_svm.h"
 #include "xe_vm.h"
 #include "xe_bo.h"
 
@@ -43,15 +44,90 @@ static void xe_mark_range_accessed(struct hmm_range *range, bool write)
 	}
 }
 
+/**
+ * xe_build_sg_device_pages() - build sg table for userptr when the backing store
+ * is device pages
+ *
+ * @st: sg table to build
+ * @hmm_pfns: pfn array of the userptr
+ * @pages: struct page arrary of this userptr
+ * @npages: how many pages in this userptr
+ */
+static int xe_build_sg_device_pages(struct sg_table *st, unsigned long *hmm_pfns,
+						struct page **pages, uint64_t npages)
+{
+	struct scatterlist *sg;
+	int i;
+
+	sg = NULL;
+	st->nents = 0;
+	if (unlikely(sg_alloc_table(st, npages, GFP_KERNEL)))
+		return -ENOMEM;
+
+	for (i = 0; i < npages; i++) {
+		unsigned long addr;
+		struct xe_mem_region *mr;
+
+		mr = xe_page_to_mem_region(pages[i]);
+		addr = xe_mem_region_pfn_to_dpa(mr, hmm_pfns[i]);
+		if (sg && (addr == (sg_dma_address(sg) + sg->length))) {
+			sg->length += PAGE_SIZE;
+			sg_dma_len(sg) += PAGE_SIZE;
+			continue;
+		}
+
+		sg =  sg ? sg_next(sg) : st->sgl;
+		sg_dma_address(sg) = addr;
+		sg_dma_len(sg) = PAGE_SIZE;
+		sg->length = PAGE_SIZE;
+		st->nents++;
+	}
+
+	sg_mark_end(sg);
+	return 0;
+}
+
+/**
+ * xe_validate_hmm_pfns() - validate all pages in a userptr belong to one memory
+ * region, and populate the pages array.
+ *
+ * @userptr: The userptr to validate
+ * @hmm_pfns: an array holding hmm pfns
+ * @npages: number of pages of this userptr
+ * @pages: output parameter to hold the populated pages from pfn.
+ */
+static void xe_validate_hmm_pfns(struct xe_userptr *userptr, unsigned long *hmm_pfns,
+						uint64_t npages, struct page **pages)
+{
+	int i;
+	struct xe_vma *vma = xe_userptr_to_vma(userptr);
+	struct xe_vm *vm = xe_vma_vm(vma);
+
+	pages[0] = hmm_pfn_to_page(hmm_pfns[0]);
+	userptr->is_device_pages = is_device_private_page(pages[0]);
+	for (i = 1; i < npages; i++) {
+		pages[i] = hmm_pfn_to_page(hmm_pfns[i]);
+		/**
+		 * We currently assume no mixture of device pages and system memory
+		 * pages in one userptr. If it turns out this is not true, we will
+		 * either split userptr into device pages based and system memory
+		 * based, or support a mixture backing store in one userptr.
+		 */
+		xe_assert(vm->xe,
+			userptr->is_device_pages == is_device_private_page(pages[i]));
+	}
+}
+
+
 /**
  * xe_build_sg() - build a scatter gather table for all the physical pages/pfn
  * in a hmm_range. dma-map pages if necessary. dma-address is save in sg table
  * and will be used to program GPU page table later.
  *
  * @xe: the xe device who will access the dma-address in sg table
+ * @userptr: the userptr that we build the sg table for
  * @range: the hmm range that we build the sg table from. range->hmm_pfns[]
  * has the pfn numbers of pages that back up this hmm address range.
- * @st: pointer to the sg table.
  * @write: whether we write to this range. This decides dma map direction
  * for system pages. If write we map it bi-diretional; otherwise
  * DMA_TO_DEVICE
@@ -64,11 +140,6 @@ static void xe_mark_range_accessed(struct hmm_range *range, bool write)
  * access memory. So if the memory is system memory, we need to
  * do a dma-mapping so it can be accessed by GPU/DMA.
  *
- * FIXME: This function currently only support pages in system
- * memory. If the memory is GPU local memory (of the GPU who
- * is going to access memory), we need gpu dpa (device physical
- * address), and there is no need of dma-mapping. This is TBD.
- *
  * FIXME: dma-mapping for peer gpu device to access remote gpu's
  * memory. Add this when you support p2p
  *
@@ -77,12 +148,13 @@ static void xe_mark_range_accessed(struct hmm_range *range, bool write)
  *
  * Returns 0 if successful; -ENOMEM if fails to allocate memory
  */
-static int xe_build_sg(struct xe_device *xe, struct hmm_range *range,
-			     struct sg_table *st, bool write)
+static int xe_build_sg(struct xe_device *xe, struct xe_userptr *userptr,
+					struct hmm_range *range, bool write)
 {
+	struct sg_table *st = &userptr->sgt;
 	struct device *dev = xe->drm.dev;
 	struct page **pages;
-	u64 i, npages;
+	u64 npages;
 	int ret;
 
 	npages = xe_npages_in_range(range->start, range->end);
@@ -90,19 +162,22 @@ static int xe_build_sg(struct xe_device *xe, struct hmm_range *range,
 	if (!pages)
 		return -ENOMEM;
 
-	for (i = 0; i < npages; i++) {
-		pages[i] = hmm_pfn_to_page(range->hmm_pfns[i]);
-		xe_assert(xe, !is_device_private_page(pages[i]));
-	}
-
-	ret = sg_alloc_table_from_pages_segment(st, pages, npages, 0,
-			npages << PAGE_SHIFT, xe_sg_segment_size(dev), GFP_KERNEL);
-	if (ret)
-		goto free_pages;
+	xe_validate_hmm_pfns(userptr, range->hmm_pfns, npages, pages);
+	if (!userptr->is_device_pages) {
+		ret = sg_alloc_table_from_pages_segment(st, pages, npages, 0,
+				npages << PAGE_SHIFT, xe_sg_segment_size(dev), GFP_KERNEL);
+		if (ret)
+			goto free_pages;
 
-	ret = dma_map_sgtable(dev, st, write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE,
-			DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_NO_KERNEL_MAPPING);
+		ret = dma_map_sgtable(dev, st, write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE,
+				DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_NO_KERNEL_MAPPING);
+	} else {
+		ret = xe_build_sg_device_pages(st, range->hmm_pfns, pages, npages);
+		if (ret)
+			goto free_pages;
+	}
 
+	userptr->sg = st;
 free_pages:
 	kvfree(pages);
 	return ret;
@@ -127,7 +202,8 @@ void xe_userptr_free_sg(struct xe_userptr_vma *uvma)
 	struct device *dev = xe->drm.dev;
 
 	xe_assert(xe, userptr->sg);
-	dma_unmap_sgtable(dev, userptr->sg,
+	if (!userptr->is_device_pages)
+		dma_unmap_sgtable(dev, userptr->sg,
 			write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE, 0);
 
 	sg_free_table(userptr->sg);
@@ -239,12 +315,11 @@ int xe_userptr_populate_range(struct xe_userptr_vma *uvma)
 	if (ret)
 		goto free_pfns;
 
-	ret = xe_build_sg(vm->xe, &hmm_range, &userptr->sgt, write);
+	ret = xe_build_sg(vm->xe, userptr, &hmm_range, write);
 	if (ret)
 		goto free_pfns;
 
 	xe_mark_range_accessed(&hmm_range, write);
-	userptr->sg = &userptr->sgt;
 	userptr->notifier_seq = hmm_range.notifier_seq;
 
 free_pfns:
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index fbf6bfcf59a8..3b4debfecc9b 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -64,6 +64,8 @@ struct xe_userptr {
 	struct sg_table *sg;
 	/** @notifier_seq: notifier sequence number */
 	unsigned long notifier_seq;
+	/** @is_device_pages: the backing store is in device memory*/
+	bool is_device_pages;
 	/**
 	 * @initial_bind: user pointer has been bound at least once.
 	 * write: vm->userptr.notifier_lock in read mode and vm->resv held.
-- 
2.26.3