[Intel-gfx] [PATCH 4/4] drm/i915/gtt: Tidy up ppgtt insertion for gen8

Abdiel Janulgue abdiel.janulgue at linux.intel.com
Tue Jul 16 15:30:09 UTC 2019



On 12/07/2019 14.27, Chris Wilson wrote:
> Apply the new radix shift helpers to extract the multi-level indices
> cleanly when inserting pte into the gtt tree.
> 
Reviewed-by: Abdiel Janulgue <abdiel.janulgue at linux.intel.com>

> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_gem_gtt.c | 115 +++++++++++-----------------
>  drivers/gpu/drm/i915/i915_gem_gtt.h |  90 ++--------------------
>  2 files changed, 48 insertions(+), 157 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 72e0f9799a46..de78dc8c425c 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -1131,47 +1131,28 @@ static inline struct sgt_dma {
>  	return (struct sgt_dma) { sg, addr, addr + sg->length };
>  }
>  
> -struct gen8_insert_pte {
> -	u16 pml4e;
> -	u16 pdpe;
> -	u16 pde;
> -	u16 pte;
> -};
> -
> -static __always_inline struct gen8_insert_pte gen8_insert_pte(u64 start)
> -{
> -	return (struct gen8_insert_pte) {
> -		 gen8_pml4e_index(start),
> -		 gen8_pdpe_index(start),
> -		 gen8_pde_index(start),
> -		 gen8_pte_index(start),
> -	};
> -}
> -
> -static __always_inline bool
> +static __always_inline u64
>  gen8_ppgtt_insert_pte_entries(struct i915_ppgtt *ppgtt,
>  			      struct i915_page_directory *pdp,
>  			      struct sgt_dma *iter,
> -			      struct gen8_insert_pte *idx,
> +			      u64 idx,
>  			      enum i915_cache_level cache_level,
>  			      u32 flags)
>  {
>  	struct i915_page_directory *pd;
>  	const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
>  	gen8_pte_t *vaddr;
> -	bool ret;
>  
> -	GEM_BUG_ON(idx->pdpe >= i915_pdpes_per_pdp(&ppgtt->vm));
> -	pd = i915_pd_entry(pdp, idx->pdpe);
> -	vaddr = kmap_atomic_px(i915_pt_entry(pd, idx->pde));
> +	pd = i915_pd_entry(pdp, gen8_pd_index(idx, 2));
> +	vaddr = kmap_atomic_px(i915_pt_entry(pd, gen8_pd_index(idx, 1)));
>  	do {
> -		vaddr[idx->pte] = pte_encode | iter->dma;
> +		vaddr[gen8_pd_index(idx, 0)] = pte_encode | iter->dma;
>  
>  		iter->dma += I915_GTT_PAGE_SIZE;
>  		if (iter->dma >= iter->max) {
>  			iter->sg = __sg_next(iter->sg);
>  			if (!iter->sg) {
> -				ret = false;
> +				idx = 0;
>  				break;
>  			}
>  
> @@ -1179,30 +1160,22 @@ gen8_ppgtt_insert_pte_entries(struct i915_ppgtt *ppgtt,
>  			iter->max = iter->dma + iter->sg->length;
>  		}
>  
> -		if (++idx->pte == GEN8_PTES) {
> -			idx->pte = 0;
> -
> -			if (++idx->pde == I915_PDES) {
> -				idx->pde = 0;
> -
> +		if (gen8_pd_index(++idx, 0) == 0) {
> +			if (gen8_pd_index(idx, 1) == 0) {
>  				/* Limited by sg length for 3lvl */
> -				if (++idx->pdpe == GEN8_PML4ES_PER_PML4) {
> -					idx->pdpe = 0;
> -					ret = true;
> +				if (gen8_pd_index(idx, 2) == 0)
>  					break;
> -				}
>  
> -				GEM_BUG_ON(idx->pdpe >= i915_pdpes_per_pdp(&ppgtt->vm));
> -				pd = pdp->entry[idx->pdpe];
> +				pd = pdp->entry[gen8_pd_index(idx, 2)];
>  			}
>  
>  			kunmap_atomic(vaddr);
> -			vaddr = kmap_atomic_px(i915_pt_entry(pd, idx->pde));
> +			vaddr = kmap_atomic_px(i915_pt_entry(pd, gen8_pd_index(idx, 1)));
>  		}
>  	} while (1);
>  	kunmap_atomic(vaddr);
>  
> -	return ret;
> +	return idx;
>  }
>  
>  static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm,
> @@ -1212,9 +1185,9 @@ static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm,
>  {
>  	struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
>  	struct sgt_dma iter = sgt_dma(vma);
> -	struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start);
>  
> -	gen8_ppgtt_insert_pte_entries(ppgtt, ppgtt->pd, &iter, &idx,
> +	gen8_ppgtt_insert_pte_entries(ppgtt, ppgtt->pd, &iter,
> +				      vma->node.start >> GEN8_PTE_SHIFT,
>  				      cache_level, flags);
>  
>  	vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
> @@ -1231,39 +1204,38 @@ static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma,
>  	dma_addr_t rem = iter->sg->length;
>  
>  	do {
> -		struct gen8_insert_pte idx = gen8_insert_pte(start);
>  		struct i915_page_directory *pdp =
> -			i915_pdp_entry(pml4, idx.pml4e);
> -		struct i915_page_directory *pd = i915_pd_entry(pdp, idx.pdpe);
> -		unsigned int page_size;
> -		bool maybe_64K = false;
> +			i915_pd_entry(pml4, __gen8_pte_index(start, 3));
> +		struct i915_page_directory *pd =
> +			i915_pd_entry(pdp, __gen8_pte_index(start, 2));
>  		gen8_pte_t encode = pte_encode;
> +		unsigned int maybe_64K = -1;
> +		unsigned int page_size;
>  		gen8_pte_t *vaddr;
> -		u16 index, max;
> +		u16 index;
>  
>  		if (vma->page_sizes.sg & I915_GTT_PAGE_SIZE_2M &&
>  		    IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) &&
> -		    rem >= I915_GTT_PAGE_SIZE_2M && !idx.pte) {
> -			index = idx.pde;
> -			max = I915_PDES;
> -			page_size = I915_GTT_PAGE_SIZE_2M;
> -
> +		    rem >= I915_GTT_PAGE_SIZE_2M &&
> +		    !__gen8_pte_index(start, 0)) {
> +			index = __gen8_pte_index(start, 1);
>  			encode |= GEN8_PDE_PS_2M;
> +			page_size = I915_GTT_PAGE_SIZE_2M;
>  
>  			vaddr = kmap_atomic_px(pd);
>  		} else {
> -			struct i915_page_table *pt = i915_pt_entry(pd, idx.pde);
> +			struct i915_page_table *pt =
> +				i915_pt_entry(pd, __gen8_pte_index(start, 1));
>  
> -			index = idx.pte;
> -			max = GEN8_PTES;
> +			index = __gen8_pte_index(start, 0);
>  			page_size = I915_GTT_PAGE_SIZE;
>  
>  			if (!index &&
>  			    vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K &&
>  			    IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) &&
>  			    (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) ||
> -			     rem >= (max - index) * I915_GTT_PAGE_SIZE))
> -				maybe_64K = true;
> +			     rem >= (I915_PDES - index) * I915_GTT_PAGE_SIZE))
> +				maybe_64K = __gen8_pte_index(start, 1);
>  
>  			vaddr = kmap_atomic_px(pt);
>  		}
> @@ -1284,16 +1256,16 @@ static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma,
>  				iter->dma = sg_dma_address(iter->sg);
>  				iter->max = iter->dma + rem;
>  
> -				if (maybe_64K && index < max &&
> +				if (maybe_64K != -1 && index < I915_PDES &&
>  				    !(IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) &&
>  				      (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) ||
> -				       rem >= (max - index) * I915_GTT_PAGE_SIZE)))
> -					maybe_64K = false;
> +				       rem >= (I915_PDES - index) * I915_GTT_PAGE_SIZE)))
> +					maybe_64K = -1;
>  
>  				if (unlikely(!IS_ALIGNED(iter->dma, page_size)))
>  					break;
>  			}
> -		} while (rem >= page_size && index < max);
> +		} while (rem >= page_size && index < I915_PDES);
>  
>  		kunmap_atomic(vaddr);
>  
> @@ -1303,14 +1275,14 @@ static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma,
>  		 * it and have reached the end of the sg table and we have
>  		 * enough padding.
>  		 */
> -		if (maybe_64K &&
> -		    (index == max ||
> +		if (maybe_64K != -1 &&
> +		    (index == I915_PDES ||
>  		     (i915_vm_has_scratch_64K(vma->vm) &&
>  		      !iter->sg && IS_ALIGNED(vma->node.start +
>  					      vma->node.size,
>  					      I915_GTT_PAGE_SIZE_2M)))) {
>  			vaddr = kmap_atomic_px(pd);
> -			vaddr[idx.pde] |= GEN8_PDE_IPS_64K;
> +			vaddr[maybe_64K] |= GEN8_PDE_IPS_64K;
>  			kunmap_atomic(vaddr);
>  			page_size = I915_GTT_PAGE_SIZE_64K;
>  
> @@ -1327,8 +1299,7 @@ static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma,
>  				u16 i;
>  
>  				encode = vma->vm->scratch[0].encode;
> -				vaddr = kmap_atomic_px(i915_pt_entry(pd,
> -								     idx.pde));
> +				vaddr = kmap_atomic_px(i915_pt_entry(pd, maybe_64K));
>  
>  				for (i = 1; i < index; i += 16)
>  					memset64(vaddr + i, encode, 15);
> @@ -1354,13 +1325,13 @@ static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm,
>  		gen8_ppgtt_insert_huge_entries(vma, pml4, &iter, cache_level,
>  					       flags);
>  	} else {
> -		struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start);
> +		u64 idx = vma->node.start >> GEN8_PTE_SHIFT;
>  
> -		while (gen8_ppgtt_insert_pte_entries(ppgtt,
> -						     i915_pdp_entry(pml4, idx.pml4e++),
> -						     &iter, &idx, cache_level,
> -						     flags))
> -			GEM_BUG_ON(idx.pml4e >= GEN8_PML4ES_PER_PML4);
> +		while ((idx = gen8_ppgtt_insert_pte_entries(ppgtt,
> +							    i915_pd_entry(pml4, gen8_pd_index(idx, 3)),
> +							    &iter, idx, cache_level,
> +							    flags)))
> +			;
>  
>  		vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
>  	}
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> index aea9a7084414..6d6e1f4015b9 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> @@ -115,29 +115,18 @@ typedef u64 gen8_pte_t;
>  #define HSW_GTT_ADDR_ENCODE(addr)	((addr) | (((addr) >> 28) & 0x7f0))
>  #define HSW_PTE_ADDR_ENCODE(addr)	HSW_GTT_ADDR_ENCODE(addr)
>  
> -/* GEN8 32b style address is defined as a 3 level page table:
> +/*
> + * GEN8 32b style address is defined as a 3 level page table:
>   * 31:30 | 29:21 | 20:12 |  11:0
>   * PDPE  |  PDE  |  PTE  | offset
>   * The difference as compared to normal x86 3 level page table is the PDPEs are
>   * programmed via register.
> - */
> -#define GEN8_3LVL_PDPES			4
> -#define GEN8_PDE_SHIFT			21
> -#define GEN8_PDE_MASK			0x1ff
> -#define GEN8_PTE_MASK			0x1ff
> -#define GEN8_PTES			I915_PTES(sizeof(gen8_pte_t))
> -
> -/* GEN8 48b style address is defined as a 4 level page table:
> + *
> + * GEN8 48b style address is defined as a 4 level page table:
>   * 47:39 | 38:30 | 29:21 | 20:12 |  11:0
>   * PML4E | PDPE  |  PDE  |  PTE  | offset
>   */
> -#define GEN8_PML4ES_PER_PML4		512
> -#define GEN8_PML4E_SHIFT		39
> -#define GEN8_PML4E_MASK			(GEN8_PML4ES_PER_PML4 - 1)
> -#define GEN8_PDPE_SHIFT			30
> -/* NB: GEN8_PDPE_MASK is untrue for 32b platforms, but it has no impact on 32b page
> - * tables */
> -#define GEN8_PDPE_MASK			0x1ff
> +#define GEN8_3LVL_PDPES			4
>  
>  #define PPAT_UNCACHED			(_PAGE_PWT | _PAGE_PCD)
>  #define PPAT_CACHED_PDE			0 /* WB LLC */
> @@ -521,15 +510,6 @@ static inline u32 gen6_pde_index(u32 addr)
>  	return i915_pde_index(addr, GEN6_PDE_SHIFT);
>  }
>  
> -static inline unsigned int
> -i915_pdpes_per_pdp(const struct i915_address_space *vm)
> -{
> -	if (i915_vm_is_4lvl(vm))
> -		return GEN8_PML4ES_PER_PML4;
> -
> -	return GEN8_3LVL_PDPES;
> -}
> -
>  static inline struct i915_page_table *
>  i915_pt_entry(const struct i915_page_directory * const pd,
>  	      const unsigned short n)
> @@ -544,66 +524,6 @@ i915_pd_entry(const struct i915_page_directory * const pdp,
>  	return pdp->entry[n];
>  }
>  
> -static inline struct i915_page_directory *
> -i915_pdp_entry(const struct i915_page_directory * const pml4,
> -	       const unsigned short n)
> -{
> -	return pml4->entry[n];
> -}
> -
> -/* Equivalent to the gen6 version, For each pde iterates over every pde
> - * between from start until start + length. On gen8+ it simply iterates
> - * over every page directory entry in a page directory.
> - */
> -#define gen8_for_each_pde(pt, pd, start, length, iter)			\
> -	for (iter = gen8_pde_index(start);				\
> -	     length > 0 && iter < I915_PDES &&				\
> -		     (pt = i915_pt_entry(pd, iter), true);		\
> -	     ({ u64 temp = ALIGN(start+1, 1 << GEN8_PDE_SHIFT);		\
> -		    temp = min(temp - start, length);			\
> -		    start += temp, length -= temp; }), ++iter)
> -
> -#define gen8_for_each_pdpe(pd, pdp, start, length, iter)		\
> -	for (iter = gen8_pdpe_index(start);				\
> -	     length > 0 && iter < i915_pdpes_per_pdp(vm) &&		\
> -		     (pd = i915_pd_entry(pdp, iter), true);		\
> -	     ({ u64 temp = ALIGN(start+1, 1 << GEN8_PDPE_SHIFT);	\
> -		    temp = min(temp - start, length);			\
> -		    start += temp, length -= temp; }), ++iter)
> -
> -#define gen8_for_each_pml4e(pdp, pml4, start, length, iter)		\
> -	for (iter = gen8_pml4e_index(start);				\
> -	     length > 0 && iter < GEN8_PML4ES_PER_PML4 &&		\
> -		     (pdp = i915_pdp_entry(pml4, iter), true);		\
> -	     ({ u64 temp = ALIGN(start+1, 1ULL << GEN8_PML4E_SHIFT);	\
> -		    temp = min(temp - start, length);			\
> -		    start += temp, length -= temp; }), ++iter)
> -
> -static inline u32 gen8_pte_index(u64 address)
> -{
> -	return i915_pte_index(address, GEN8_PDE_SHIFT);
> -}
> -
> -static inline u32 gen8_pde_index(u64 address)
> -{
> -	return i915_pde_index(address, GEN8_PDE_SHIFT);
> -}
> -
> -static inline u32 gen8_pdpe_index(u64 address)
> -{
> -	return (address >> GEN8_PDPE_SHIFT) & GEN8_PDPE_MASK;
> -}
> -
> -static inline u32 gen8_pml4e_index(u64 address)
> -{
> -	return (address >> GEN8_PML4E_SHIFT) & GEN8_PML4E_MASK;
> -}
> -
> -static inline u64 gen8_pte_count(u64 address, u64 length)
> -{
> -	return i915_pte_count(address, length, GEN8_PDE_SHIFT);
> -}
> -
>  static inline dma_addr_t
>  i915_page_dir_dma_addr(const struct i915_ppgtt *ppgtt, const unsigned int n)
>  {
> 


More information about the Intel-gfx mailing list