[v2 18/31] drm/xe/svm: Build userptr sg table for device pages

Matthew Brost matthew.brost at intel.com
Wed Apr 10 21:52:59 UTC 2024


On Tue, Apr 09, 2024 at 04:17:29PM -0400, Oak Zeng wrote:
> Previously function xe_build_sg only support userptr with system
> memory pages. Now this function is extended to support userptr
> with device pages backing as well.
> 
> For device pages, there is no need of dma-mapping. Instead, we
> calculated the device page's dpa (device physical address) and
> use dpa to fill sg table.
> 
> As of now, we assume each userptr is only backed either by all
> system memory pages or all by device pages. There is no support
> of mixture backing of device and system memory pages.
> 

I'm not sure if this required as per Jason's suggestion or rather
insistence to not use a sg list for a collection of dpas [1].

For single device we should just be able to the buddy blocks as the
cursor which I suggest in [1]. Maybe this doesn't work in a multi-device
case but it certainly should work for a single device. Since we are
working a single device first, let get it working without abusing a SG
list.

Matt

[1] https://patchwork.freedesktop.org/patch/574894/?series=128910&rev=1

> Signed-off-by: Oak Zeng <oak.zeng at intel.com>
> ---
>  drivers/gpu/drm/xe/xe_hmm.c      | 121 +++++++++++++++++++++++++------
>  drivers/gpu/drm/xe/xe_vm_types.h |   2 +
>  2 files changed, 100 insertions(+), 23 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_hmm.c b/drivers/gpu/drm/xe/xe_hmm.c
> index 427c6bc49949..a261c1dd2060 100644
> --- a/drivers/gpu/drm/xe/xe_hmm.c
> +++ b/drivers/gpu/drm/xe/xe_hmm.c
> @@ -11,6 +11,7 @@
>  #include <linux/hmm.h>
>  #include <linux/mm.h>
>  #include "xe_hmm.h"
> +#include "xe_svm.h"
>  #include "xe_vm.h"
>  #include "xe_bo.h"
>  
> @@ -43,15 +44,90 @@ static void xe_mark_range_accessed(struct hmm_range *range, bool write)
>  	}
>  }
>  
> +/**
> + * xe_build_sg_device_pages() - build sg table for userptr when the backing store
> + * is device pages
> + *
> + * @st: sg table to build
> + * @hmm_pfns: pfn array of the userptr
> + * @pages: struct page arrary of this userptr
> + * @npages: how many pages in this userptr
> + */
> +static int xe_build_sg_device_pages(struct sg_table *st, unsigned long *hmm_pfns,
> +						struct page **pages, uint64_t npages)
> +{
> +	struct scatterlist *sg;
> +	int i;
> +
> +	sg = NULL;
> +	st->nents = 0;
> +	if (unlikely(sg_alloc_table(st, npages, GFP_KERNEL)))
> +		return -ENOMEM;
> +
> +	for (i = 0; i < npages; i++) {
> +		unsigned long addr;
> +		struct xe_mem_region *mr;
> +
> +		mr = xe_page_to_mem_region(pages[i]);
> +		addr = xe_mem_region_pfn_to_dpa(mr, hmm_pfns[i]);
> +		if (sg && (addr == (sg_dma_address(sg) + sg->length))) {
> +			sg->length += PAGE_SIZE;
> +			sg_dma_len(sg) += PAGE_SIZE;
> +			continue;
> +		}
> +
> +		sg =  sg ? sg_next(sg) : st->sgl;
> +		sg_dma_address(sg) = addr;
> +		sg_dma_len(sg) = PAGE_SIZE;
> +		sg->length = PAGE_SIZE;
> +		st->nents++;
> +	}
> +
> +	sg_mark_end(sg);
> +	return 0;
> +}
> +
> +/**
> + * xe_validate_hmm_pfns() - validate all pages in a userptr belong to one memory
> + * region, and populate the pages array.
> + *
> + * @userptr: The userptr to validate
> + * @hmm_pfns: an array holding hmm pfns
> + * @npages: number of pages of this userptr
> + * @pages: output parameter to hold the populated pages from pfn.
> + */
> +static void xe_validate_hmm_pfns(struct xe_userptr *userptr, unsigned long *hmm_pfns,
> +						uint64_t npages, struct page **pages)
> +{
> +	int i;
> +	struct xe_vma *vma = xe_userptr_to_vma(userptr);
> +	struct xe_vm *vm = xe_vma_vm(vma);
> +
> +	pages[0] = hmm_pfn_to_page(hmm_pfns[0]);
> +	userptr->is_device_pages = is_device_private_page(pages[0]);
> +	for (i = 1; i < npages; i++) {
> +		pages[i] = hmm_pfn_to_page(hmm_pfns[i]);
> +		/**
> +		 * We currently assume no mixture of device pages and system memory
> +		 * pages in one userptr. If it turns out this is not true, we will
> +		 * either split userptr into device pages based and system memory
> +		 * based, or support a mixture backing store in one userptr.
> +		 */
> +		xe_assert(vm->xe,
> +			userptr->is_device_pages == is_device_private_page(pages[i]));
> +	}
> +}
> +
> +
>  /**
>   * xe_build_sg() - build a scatter gather table for all the physical pages/pfn
>   * in a hmm_range. dma-map pages if necessary. dma-address is save in sg table
>   * and will be used to program GPU page table later.
>   *
>   * @xe: the xe device who will access the dma-address in sg table
> + * @userptr: the userptr that we build the sg table for
>   * @range: the hmm range that we build the sg table from. range->hmm_pfns[]
>   * has the pfn numbers of pages that back up this hmm address range.
> - * @st: pointer to the sg table.
>   * @write: whether we write to this range. This decides dma map direction
>   * for system pages. If write we map it bi-diretional; otherwise
>   * DMA_TO_DEVICE
> @@ -64,11 +140,6 @@ static void xe_mark_range_accessed(struct hmm_range *range, bool write)
>   * access memory. So if the memory is system memory, we need to
>   * do a dma-mapping so it can be accessed by GPU/DMA.
>   *
> - * FIXME: This function currently only support pages in system
> - * memory. If the memory is GPU local memory (of the GPU who
> - * is going to access memory), we need gpu dpa (device physical
> - * address), and there is no need of dma-mapping. This is TBD.
> - *
>   * FIXME: dma-mapping for peer gpu device to access remote gpu's
>   * memory. Add this when you support p2p
>   *
> @@ -77,12 +148,13 @@ static void xe_mark_range_accessed(struct hmm_range *range, bool write)
>   *
>   * Returns 0 if successful; -ENOMEM if fails to allocate memory
>   */
> -static int xe_build_sg(struct xe_device *xe, struct hmm_range *range,
> -			     struct sg_table *st, bool write)
> +static int xe_build_sg(struct xe_device *xe, struct xe_userptr *userptr,
> +					struct hmm_range *range, bool write)
>  {
> +	struct sg_table *st = &userptr->sgt;
>  	struct device *dev = xe->drm.dev;
>  	struct page **pages;
> -	u64 i, npages;
> +	u64 npages;
>  	int ret;
>  
>  	npages = xe_npages_in_range(range->start, range->end);
> @@ -90,19 +162,22 @@ static int xe_build_sg(struct xe_device *xe, struct hmm_range *range,
>  	if (!pages)
>  		return -ENOMEM;
>  
> -	for (i = 0; i < npages; i++) {
> -		pages[i] = hmm_pfn_to_page(range->hmm_pfns[i]);
> -		xe_assert(xe, !is_device_private_page(pages[i]));
> -	}
> -
> -	ret = sg_alloc_table_from_pages_segment(st, pages, npages, 0,
> -			npages << PAGE_SHIFT, xe_sg_segment_size(dev), GFP_KERNEL);
> -	if (ret)
> -		goto free_pages;
> +	xe_validate_hmm_pfns(userptr, range->hmm_pfns, npages, pages);
> +	if (!userptr->is_device_pages) {
> +		ret = sg_alloc_table_from_pages_segment(st, pages, npages, 0,
> +				npages << PAGE_SHIFT, xe_sg_segment_size(dev), GFP_KERNEL);
> +		if (ret)
> +			goto free_pages;
>  
> -	ret = dma_map_sgtable(dev, st, write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE,
> -			DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_NO_KERNEL_MAPPING);
> +		ret = dma_map_sgtable(dev, st, write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE,
> +				DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_NO_KERNEL_MAPPING);
> +	} else {
> +		ret = xe_build_sg_device_pages(st, range->hmm_pfns, pages, npages);
> +		if (ret)
> +			goto free_pages;
> +	}
>  
> +	userptr->sg = st;
>  free_pages:
>  	kvfree(pages);
>  	return ret;
> @@ -127,7 +202,8 @@ void xe_userptr_free_sg(struct xe_userptr_vma *uvma)
>  	struct device *dev = xe->drm.dev;
>  
>  	xe_assert(xe, userptr->sg);
> -	dma_unmap_sgtable(dev, userptr->sg,
> +	if (!userptr->is_device_pages)
> +		dma_unmap_sgtable(dev, userptr->sg,
>  			write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE, 0);
>  
>  	sg_free_table(userptr->sg);
> @@ -239,12 +315,11 @@ int xe_userptr_populate_range(struct xe_userptr_vma *uvma)
>  	if (ret)
>  		goto free_pfns;
>  
> -	ret = xe_build_sg(vm->xe, &hmm_range, &userptr->sgt, write);
> +	ret = xe_build_sg(vm->xe, userptr, &hmm_range, write);
>  	if (ret)
>  		goto free_pfns;
>  
>  	xe_mark_range_accessed(&hmm_range, write);
> -	userptr->sg = &userptr->sgt;
>  	userptr->notifier_seq = hmm_range.notifier_seq;
>  
>  free_pfns:
> diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
> index fbf6bfcf59a8..3b4debfecc9b 100644
> --- a/drivers/gpu/drm/xe/xe_vm_types.h
> +++ b/drivers/gpu/drm/xe/xe_vm_types.h
> @@ -64,6 +64,8 @@ struct xe_userptr {
>  	struct sg_table *sg;
>  	/** @notifier_seq: notifier sequence number */
>  	unsigned long notifier_seq;
> +	/** @is_device_pages: the backing store is in device memory*/
> +	bool is_device_pages;
>  	/**
>  	 * @initial_bind: user pointer has been bound at least once.
>  	 * write: vm->userptr.notifier_lock in read mode and vm->resv held.
> -- 
> 2.26.3
> 


More information about the Intel-xe mailing list