[Intel-xe] [PATCH v2 04/10] drm/xe: Remove vma arg from xe_pte_encode()

Matthew Brost matthew.brost at intel.com
Thu Jul 27 14:27:51 UTC 2023


On Wed, Jul 26, 2023 at 10:30:46AM -0700, Matt Roper wrote:
> On Wed, Jul 26, 2023 at 09:07:02AM -0700, Lucas De Marchi wrote:
> > All the callers pass a NULL vma, so the buffer is always the BO. Remove
> > the argument and the side effects of dealing with it.
> 
> You're right that it's only ever called with a NULL vma, but the vma
> parameter was added for userptr support (null bo, vma only) in
> 
>         commit b01fd49ada0fa88c88aaa86529461c2feae09eed
>         Author:     Matthew Brost <matthew.brost at intel.com>
>         AuthorDate: Mon Apr 4 11:18:22 2022 -0700
>         Commit:     Maarten Lankhorst <maarten.lankhorst at linux.intel.com>
>         CommitDate: Thu Oct 20 14:21:59 2022 +0200
> 
>             drm/xe: Update VM PTE write code to accept a VMA argument
>             
>             This will help to implement userptr within the VMA.
>             
>             Signed-off-by: Matthew Brost <matthew.brost at intel.com>
> 
> Did something change with how userptr is handled somewhere along the
> line?  Is userptr working today?
> 

Yes, I think at some point we switched over to using xe_res_cursors to
generate addresses for PTEs when programming the PPGTT rather than
calling xe_pte_encode for each entry as a res cursors is an iterator
which is much more efficient.

This change left only the xe_migrate code to encode PTE entries direct
and the xe_migrate code always operates on BOs.

And finally, yes userptr is working today. Quite a few tests use
userptr, the simplest being xe_exec_basic.once-userptr.

With that, this patch LGTM:
Reviewed-by: Matthew Brost <matthew.brost at intel.com>

> 
> Matt
> 
> > 
> > Signed-off-by: Lucas De Marchi <lucas.demarchi at intel.com>
> > ---
> >  drivers/gpu/drm/xe/tests/xe_migrate.c |  2 +-
> >  drivers/gpu/drm/xe/xe_migrate.c       |  8 ++---
> >  drivers/gpu/drm/xe/xe_pt.c            | 47 ++++-----------------------
> >  drivers/gpu/drm/xe/xe_pt.h            |  4 +--
> >  4 files changed, 13 insertions(+), 48 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c
> > index c332dc54cb70..9e9b228fe315 100644
> > --- a/drivers/gpu/drm/xe/tests/xe_migrate.c
> > +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
> > @@ -301,7 +301,7 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
> >  	/* First part of the test, are we updating our pagetable bo with a new entry? */
> >  	xe_map_wr(xe, &bo->vmap, XE_PAGE_SIZE * (NUM_KERNEL_PDE - 1), u64,
> >  		  0xdeaddeadbeefbeef);
> > -	expected = xe_pte_encode(NULL, pt, 0, XE_CACHE_WB, 0);
> > +	expected = xe_pte_encode(pt, 0, XE_CACHE_WB, 0);
> >  	if (m->eng->vm->flags & XE_VM_FLAG_64K)
> >  		expected |= XE_PTE_PS64;
> >  	if (xe_bo_is_vram(pt))
> > diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
> > index bc7dac4e2086..3c7d5cfd30bc 100644
> > --- a/drivers/gpu/drm/xe/xe_migrate.c
> > +++ b/drivers/gpu/drm/xe/xe_migrate.c
> > @@ -196,8 +196,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
> >  
> >  	/* Map the entire BO in our level 0 pt */
> >  	for (i = 0, level = 0; i < num_entries; level++) {
> > -		entry = xe_pte_encode(NULL, bo, i * XE_PAGE_SIZE,
> > -				      XE_CACHE_WB, 0);
> > +		entry = xe_pte_encode(bo, i * XE_PAGE_SIZE, XE_CACHE_WB, 0);
> >  
> >  		xe_map_wr(xe, &bo->vmap, map_ofs + level * 8, u64, entry);
> >  
> > @@ -215,8 +214,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
> >  		for (i = 0; i < batch->size;
> >  		     i += vm->flags & XE_VM_FLAG_64K ? XE_64K_PAGE_SIZE :
> >  		     XE_PAGE_SIZE) {
> > -			entry = xe_pte_encode(NULL, batch, i,
> > -					      XE_CACHE_WB, 0);
> > +			entry = xe_pte_encode(batch, i, XE_CACHE_WB, 0);
> >  
> >  			xe_map_wr(xe, &bo->vmap, map_ofs + level * 8, u64,
> >  				  entry);
> > @@ -1235,7 +1233,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
> >  
> >  			BUG_ON(pt_bo->size != SZ_4K);
> >  
> > -			addr = xe_pte_encode(NULL, pt_bo, 0, XE_CACHE_WB, 0);
> > +			addr = xe_pte_encode(pt_bo, 0, XE_CACHE_WB, 0);
> >  			bb->cs[bb->len++] = lower_32_bits(addr);
> >  			bb->cs[bb->len++] = upper_32_bits(addr);
> >  		}
> > diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
> > index d5a237b7c883..ac01bc42e54f 100644
> > --- a/drivers/gpu/drm/xe/xe_pt.c
> > +++ b/drivers/gpu/drm/xe/xe_pt.c
> > @@ -78,30 +78,6 @@ u64 xe_pde_encode(struct xe_bo *bo, u64 bo_offset,
> >  	return pde;
> >  }
> >  
> > -static dma_addr_t vma_addr(struct xe_vma *vma, u64 offset,
> > -			   size_t page_size, bool *is_vram)
> > -{
> > -	if (xe_vma_is_null(vma)) {
> > -		*is_vram = 0;
> > -		return 0;
> > -	}
> > -
> > -	if (xe_vma_is_userptr(vma)) {
> > -		struct xe_res_cursor cur;
> > -		u64 page;
> > -
> > -		*is_vram = false;
> > -		page = offset >> PAGE_SHIFT;
> > -		offset &= (PAGE_SIZE - 1);
> > -
> > -		xe_res_first_sg(vma->userptr.sg, page << PAGE_SHIFT, page_size,
> > -				&cur);
> > -		return xe_res_dma(&cur) + offset;
> > -	} else {
> > -		return xe_bo_addr(xe_vma_bo(vma), offset, page_size, is_vram);
> > -	}
> > -}
> > -
> >  static u64 __pte_encode(u64 pte, enum xe_cache_level cache,
> >  			struct xe_vma *vma, u32 pt_level)
> >  {
> > @@ -140,34 +116,25 @@ static u64 __pte_encode(u64 pte, enum xe_cache_level cache,
> >  
> >  /**
> >   * xe_pte_encode() - Encode a page-table entry pointing to memory.
> > - * @vma: The vma representing the memory to point to.
> > - * @bo: If @vma is NULL, representing the memory to point to.
> > - * @offset: The offset into @vma or @bo.
> > + * @bo: The BO representing the memory to point to.
> > + * @offset: The offset into @bo.
> >   * @cache: The cache level indicating
> >   * @pt_level: The page-table level of the page-table into which the entry
> >   * is to be inserted.
> >   *
> >   * Return: An encoded page-table entry. No errors.
> >   */
> > -u64 xe_pte_encode(struct xe_vma *vma, struct xe_bo *bo,
> > -		  u64 offset, enum xe_cache_level cache,
> > +u64 xe_pte_encode(struct xe_bo *bo, u64 offset, enum xe_cache_level cache,
> >  		  u32 pt_level)
> >  {
> >  	u64 pte;
> >  	bool is_vram;
> >  
> > -	if (vma)
> > -		pte = vma_addr(vma, offset, XE_PAGE_SIZE, &is_vram);
> > -	else
> > -		pte = xe_bo_addr(bo, offset, XE_PAGE_SIZE, &is_vram);
> > -
> > -	if (is_vram) {
> > +	pte = xe_bo_addr(bo, offset, XE_PAGE_SIZE, &is_vram);
> > +	if (is_vram)
> >  		pte |= XE_PPGTT_PTE_LM;
> > -		if (vma && vma->gpuva.flags & XE_VMA_ATOMIC_PTE_BIT)
> > -			pte |= XE_USM_PPGTT_PTE_AE;
> > -	}
> >  
> > -	return __pte_encode(pte, cache, vma, pt_level);
> > +	return __pte_encode(pte, cache, NULL, pt_level);
> >  }
> >  
> >  static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm,
> > @@ -179,7 +146,7 @@ static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm,
> >  		return 0;
> >  
> >  	if (level == 0) {
> > -		u64 empty = xe_pte_encode(NULL, vm->scratch_bo[id], 0,
> > +		u64 empty = xe_pte_encode(vm->scratch_bo[id], 0,
> >  					  XE_CACHE_WB, 0);
> >  
> >  		return empty;
> > diff --git a/drivers/gpu/drm/xe/xe_pt.h b/drivers/gpu/drm/xe/xe_pt.h
> > index aaf4b7b851e2..bbb00d6461ff 100644
> > --- a/drivers/gpu/drm/xe/xe_pt.h
> > +++ b/drivers/gpu/drm/xe/xe_pt.h
> > @@ -48,7 +48,7 @@ bool xe_pt_zap_ptes(struct xe_tile *tile, struct xe_vma *vma);
> >  u64 xe_pde_encode(struct xe_bo *bo, u64 bo_offset,
> >  		  const enum xe_cache_level level);
> >  
> > -u64 xe_pte_encode(struct xe_vma *vma, struct xe_bo *bo,
> > -		  u64 offset, enum xe_cache_level cache,
> > +u64 xe_pte_encode(struct xe_bo *bo, u64 offset, enum xe_cache_level cache,
> >  		  u32 pt_level);
> > +
> >  #endif
> > -- 
> > 2.40.1
> > 
> 
> -- 
> Matt Roper
> Graphics Software Engineer
> Linux GPU Platform Enablement
> Intel Corporation


More information about the Intel-xe mailing list