[PATCH 1/3] drm/ttm: use apply_page_range instead of vmf_insert_pfn_prot

Christian König ckoenig.leichtzumerken at gmail.com
Wed Aug 20 14:33:11 UTC 2025


Thomas pointed out that i915 is using apply_page_range instead of
vm_insert_pfn_prot to circumvent the PAT lookup and generally speed up
the page fault handling.

I've thought I give it a try and measure how much this can improve
things and it turned that mapping a 1GiB buffer is now more than 4x times
faster than before.

Signed-off-by: Christian König <christian.koenig at amd.com>
---
 drivers/gpu/drm/ttm/ttm_bo_vm.c | 130 ++++++++++++++++----------------
 1 file changed, 64 insertions(+), 66 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
index a194db83421d..93764b166678 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -160,6 +160,38 @@ vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo,
 }
 EXPORT_SYMBOL(ttm_bo_vm_reserve);
 
+/* State bag for calls to ttm_bo_vm_apply_cb */
+struct ttm_bo_vm_bag {
+	struct mm_struct		*mm;
+	struct ttm_buffer_object	*bo;
+	struct ttm_tt			*ttm;
+	unsigned long			page_offset;
+	pgprot_t			prot;
+};
+
+/* Callback to fill in a specific PTE */
+static int ttm_bo_vm_apply_cb(pte_t *pte, unsigned long addr, void *data)
+{
+        struct ttm_bo_vm_bag *bag = data;
+	struct ttm_buffer_object *bo = bag->bo;
+	unsigned long pfn;
+
+	if (bo->resource->bus.is_iomem) {
+		pfn = ttm_bo_io_mem_pfn(bo, bag->page_offset);
+	} else {
+		struct page *page = bag->ttm->pages[bag->page_offset];
+
+		if (unlikely(!page))
+			return -ENOMEM;
+		pfn = page_to_pfn(page);
+	}
+
+        /* Special PTE are not associated with any struct page */
+        set_pte_at(bag->mm, addr, pte, pte_mkspecial(pfn_pte(pfn, bag->prot)));
+	bag->page_offset++;
+	return 0;
+}
+
 /**
  * ttm_bo_vm_fault_reserved - TTM fault helper
  * @vmf: The struct vm_fault given as argument to the fault callback
@@ -183,101 +215,67 @@ vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf,
 				    pgoff_t num_prefault)
 {
 	struct vm_area_struct *vma = vmf->vma;
-	struct ttm_buffer_object *bo = vma->vm_private_data;
-	struct ttm_device *bdev = bo->bdev;
-	unsigned long page_offset;
-	unsigned long page_last;
-	unsigned long pfn;
-	struct ttm_tt *ttm = NULL;
-	struct page *page;
+	struct ttm_bo_vm_bag bag = {
+		.mm = vma->vm_mm,
+		.bo = vma->vm_private_data
+	};
+	unsigned long size;
+	vm_fault_t ret;
 	int err;
-	pgoff_t i;
-	vm_fault_t ret = VM_FAULT_NOPAGE;
-	unsigned long address = vmf->address;
 
 	/*
 	 * Wait for buffer data in transit, due to a pipelined
 	 * move.
 	 */
-	ret = ttm_bo_vm_fault_idle(bo, vmf);
+	ret = ttm_bo_vm_fault_idle(bag.bo, vmf);
 	if (unlikely(ret != 0))
 		return ret;
 
-	err = ttm_mem_io_reserve(bdev, bo->resource);
+	err = ttm_mem_io_reserve(bag.bo->bdev, bag.bo->resource);
 	if (unlikely(err != 0))
 		return VM_FAULT_SIGBUS;
 
-	page_offset = ((address - vma->vm_start) >> PAGE_SHIFT) +
-		vma->vm_pgoff - drm_vma_node_start(&bo->base.vma_node);
-	page_last = vma_pages(vma) + vma->vm_pgoff -
-		drm_vma_node_start(&bo->base.vma_node);
-
-	if (unlikely(page_offset >= PFN_UP(bo->base.size)))
+	bag.page_offset = ((vmf->address - vma->vm_start) >> PAGE_SHIFT) +
+		vma->vm_pgoff - drm_vma_node_start(&bag.bo->base.vma_node);
+	if (unlikely(bag.page_offset >= PFN_UP(bag.bo->base.size)))
 		return VM_FAULT_SIGBUS;
 
-	prot = ttm_io_prot(bo, bo->resource, prot);
-	if (!bo->resource->bus.is_iomem) {
+	prot = ttm_io_prot(bag.bo, bag.bo->resource, prot);
+	if (!bag.bo->resource->bus.is_iomem) {
 		struct ttm_operation_ctx ctx = {
 			.interruptible = true,
 			.no_wait_gpu = false,
 			.force_alloc = true
 		};
 
-		ttm = bo->ttm;
-		err = ttm_bo_populate(bo, &ctx);
-		if (err) {
-			if (err == -EINTR || err == -ERESTARTSYS ||
-			    err == -EAGAIN)
-				return VM_FAULT_NOPAGE;
-
-			pr_debug("TTM fault hit %pe.\n", ERR_PTR(err));
-			return VM_FAULT_SIGBUS;
-		}
+		bag.ttm = bag.bo->ttm;
+		err = ttm_bo_populate(bag.bo, &ctx);
+		if (err)
+			goto error;
 	} else {
 		/* Iomem should not be marked encrypted */
 		prot = pgprot_decrypted(prot);
 	}
+	bag.prot = prot;
 
-	/*
-	 * Speculatively prefault a number of pages. Only error on
-	 * first page.
-	 */
-	for (i = 0; i < num_prefault; ++i) {
-		if (bo->resource->bus.is_iomem) {
-			pfn = ttm_bo_io_mem_pfn(bo, page_offset);
-		} else {
-			page = ttm->pages[page_offset];
-			if (unlikely(!page && i == 0)) {
-				return VM_FAULT_OOM;
-			} else if (unlikely(!page)) {
-				break;
-			}
-			pfn = page_to_pfn(page);
-		}
+	/* Speculatively prefault a number of pages. */
+	size = min(num_prefault << PAGE_SHIFT, vma->vm_end - vmf->address);
+	err = apply_to_page_range(vma->vm_mm, vmf->address, size,
+				  ttm_bo_vm_apply_cb, &bag);
 
-		/*
-		 * Note that the value of @prot at this point may differ from
-		 * the value of @vma->vm_page_prot in the caching- and
-		 * encryption bits. This is because the exact location of the
-		 * data may not be known at mmap() time and may also change
-		 * at arbitrary times while the data is mmap'ed.
-		 * See vmf_insert_pfn_prot() for a discussion.
-		 */
-		ret = vmf_insert_pfn_prot(vma, address, pfn, prot);
+error:
+	if (err == -EINTR || err == -ERESTARTSYS || err == -EAGAIN)
+		return VM_FAULT_NOPAGE;
 
-		/* Never error on prefaulted PTEs */
-		if (unlikely((ret & VM_FAULT_ERROR))) {
-			if (i == 0)
-				return VM_FAULT_NOPAGE;
-			else
-				break;
-		}
+	if (err == -ENOMEM)
+		return VM_FAULT_OOM;
 
-		address += PAGE_SIZE;
-		if (unlikely(++page_offset >= page_last))
-			break;
+	if (err) {
+		pr_debug("TTM fault hit %pe.\n", ERR_PTR(err));
+		return VM_FAULT_SIGBUS;
 	}
-	return ret;
+
+	return VM_FAULT_NOPAGE;
 }
 EXPORT_SYMBOL(ttm_bo_vm_fault_reserved);
 
-- 
2.43.0



More information about the Intel-xe mailing list