[Intel-gfx] [PATCH 06/19] drm/i915: Convert clflushed pagetables over to WC maps

Mika Kuoppala mika.kuoppala at linux.intel.com
Thu Feb 9 15:08:54 UTC 2017


Chris Wilson <chris at chris-wilson.co.uk> writes:

> We flush the entire page every time we update a few bytes, making the
> update of a page table many, many times slower than is required. If we
> create a WC map of the page for our updates, we can avoid the clflush
> but incur additional cost for creating the pagetable. We amoritize that
> cost by reusing page vmappings, and only changing the page protection in
> batches.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_gem_gtt.c | 329 ++++++++++++++++++------------------
>  drivers/gpu/drm/i915/i915_gem_gtt.h |   5 +
>  2 files changed, 172 insertions(+), 162 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 45bab7b7b026..302aee193ce5 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -340,43 +340,69 @@ static gen6_pte_t iris_pte_encode(dma_addr_t addr,
>  	return pte;
>  }
>  
> -static int __setup_page_dma(struct drm_i915_private *dev_priv,
> -			    struct i915_page_dma *p, gfp_t flags)
> +static struct page *vm_alloc_page(struct i915_address_space *vm, gfp_t gfp)
>  {
> -	struct device *kdev = &dev_priv->drm.pdev->dev;
> +	struct page *page;
>  
> -	p->page = alloc_page(flags);
> -	if (!p->page)
> -		return -ENOMEM;
> +	if (vm->free_pages.nr)
> +		return vm->free_pages.pages[--vm->free_pages.nr];
>  
> -	p->daddr = dma_map_page(kdev,
> -				p->page, 0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
> +	page = alloc_page(gfp);
> +	if (!page)
> +		return NULL;
>  
> -	if (dma_mapping_error(kdev, p->daddr)) {
> -		__free_page(p->page);
> -		return -EINVAL;
> +	if (vm->pt_kmap_wc)
> +		set_pages_array_wc(&page, 1);
> +
> +	return page;
> +}
> +
> +static void vm_free_pages_release(struct i915_address_space *vm)
> +{
> +	GEM_BUG_ON(!pagevec_count(&vm->free_pages));
> +
> +	if (vm->pt_kmap_wc)
> +		set_pages_array_wb(vm->free_pages.pages,
> +				   pagevec_count(&vm->free_pages));
> +
> +	__pagevec_release(&vm->free_pages);
> +}
> +
> +static void vm_free_page(struct i915_address_space *vm, struct page *page)
> +{
> +	if (!pagevec_add(&vm->free_pages, page))
> +		vm_free_pages_release(vm);

What about the page you failed to push to vec?

For me it seems that you are missing a retry after
vec cleanup.

-Mika

> +}
> +
> +static int __setup_page_dma(struct i915_address_space *vm,
> +			    struct i915_page_dma *p,
> +			    gfp_t gfp)
> +{
> +	p->page = vm_alloc_page(vm, gfp | __GFP_NOWARN | __GFP_NORETRY);
> +	if (unlikely(!p->page))
> +		return -ENOMEM;
> +
> +	p->daddr = dma_map_page(vm->dma, p->page, 0, PAGE_SIZE,
> +				PCI_DMA_BIDIRECTIONAL);
> +	if (unlikely(dma_mapping_error(vm->dma, p->daddr))) {
> +		vm_free_page(vm, p->page);
> +		return -ENOMEM;
>  	}
>  
>  	return 0;
>  }
>  
> -static int setup_page_dma(struct drm_i915_private *dev_priv,
> +static int setup_page_dma(struct i915_address_space *vm,
>  			  struct i915_page_dma *p)
>  {
> -	return __setup_page_dma(dev_priv, p, I915_GFP_DMA);
> +	return __setup_page_dma(vm, p, I915_GFP_DMA);
>  }
>  
> -static void cleanup_page_dma(struct drm_i915_private *dev_priv,
> +static void cleanup_page_dma(struct i915_address_space *vm,
>  			     struct i915_page_dma *p)
>  {
> -	struct pci_dev *pdev = dev_priv->drm.pdev;
> -
> -	if (WARN_ON(!p->page))
> -		return;
> -
> -	dma_unmap_page(&pdev->dev, p->daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
> -	__free_page(p->page);
> -	memset(p, 0, sizeof(*p));
> +	dma_unmap_page(vm->dma, p->daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
> +	vm_free_page(vm, p->page);
>  }
>  
>  static void *kmap_page_dma(struct i915_page_dma *p)
> @@ -387,67 +413,54 @@ static void *kmap_page_dma(struct i915_page_dma *p)
>  /* We use the flushing unmap only with ppgtt structures:
>   * page directories, page tables and scratch pages.
>   */
> -static void kunmap_page_dma(struct drm_i915_private *dev_priv, void *vaddr)
> +static void kunmap_page_dma(void *vaddr)
>  {
> -	/* There are only few exceptions for gen >=6. chv and bxt.
> -	 * And we are not sure about the latter so play safe for now.
> -	 */
> -	if (IS_CHERRYVIEW(dev_priv) || IS_GEN9_LP(dev_priv))
> -		drm_clflush_virt_range(vaddr, PAGE_SIZE);
> -
>  	kunmap_atomic(vaddr);
>  }
>  
>  #define kmap_px(px) kmap_page_dma(px_base(px))
> -#define kunmap_px(ppgtt, vaddr) \
> -		kunmap_page_dma((ppgtt)->base.i915, (vaddr))
> +#define kunmap_px(vaddr) kunmap_page_dma((vaddr))
>  
> -#define setup_px(dev_priv, px) setup_page_dma((dev_priv), px_base(px))
> -#define cleanup_px(dev_priv, px) cleanup_page_dma((dev_priv), px_base(px))
> -#define fill_px(dev_priv, px, v) fill_page_dma((dev_priv), px_base(px), (v))
> -#define fill32_px(dev_priv, px, v) \
> -		fill_page_dma_32((dev_priv), px_base(px), (v))
> +#define setup_px(vm, px) setup_page_dma((vm), px_base(px))
> +#define cleanup_px(vm, px) cleanup_page_dma((vm), px_base(px))
> +#define fill_px(ppgtt, px, v) fill_page_dma((vm), px_base(px), (v))
> +#define fill32_px(ppgtt, px, v) fill_page_dma_32((vm), px_base(px), (v))
>  
> -static void fill_page_dma(struct drm_i915_private *dev_priv,
> -			  struct i915_page_dma *p, const uint64_t val)
> +static void fill_page_dma(struct i915_address_space *vm,
> +			  struct i915_page_dma *p,
> +			  const u64 val)
>  {
> +	u64 * const vaddr = kmap_page_dma(p);
>  	int i;
> -	uint64_t * const vaddr = kmap_page_dma(p);
>  
>  	for (i = 0; i < 512; i++)
>  		vaddr[i] = val;
>  
> -	kunmap_page_dma(dev_priv, vaddr);
> +	kunmap_page_dma(vaddr);
>  }
>  
> -static void fill_page_dma_32(struct drm_i915_private *dev_priv,
> -			     struct i915_page_dma *p, const uint32_t val32)
> +static void fill_page_dma_32(struct i915_address_space *vm,
> +			     struct i915_page_dma *p,
> +			     const u32 v)
>  {
> -	uint64_t v = val32;
> -
> -	v = v << 32 | val32;
> -
> -	fill_page_dma(dev_priv, p, v);
> +	fill_page_dma(vm, p, (u64)v << 32 | v);
>  }
>  
>  static int
> -setup_scratch_page(struct drm_i915_private *dev_priv,
> -		   struct i915_page_dma *scratch,
> -		   gfp_t gfp)
> +setup_scratch_page(struct i915_address_space *vm, gfp_t gfp)
>  {
> -	return __setup_page_dma(dev_priv, scratch, gfp | __GFP_ZERO);
> +	return __setup_page_dma(vm, &vm->scratch_page, gfp | __GFP_ZERO);
>  }
>  
> -static void cleanup_scratch_page(struct drm_i915_private *dev_priv,
> -				 struct i915_page_dma *scratch)
> +static void cleanup_scratch_page(struct i915_address_space *vm)
>  {
> -	cleanup_page_dma(dev_priv, scratch);
> +	cleanup_page_dma(vm, &vm->scratch_page);
>  }
>  
> -static struct i915_page_table *alloc_pt(struct drm_i915_private *dev_priv)
> +static struct i915_page_table *alloc_pt(struct i915_address_space *vm)
>  {
>  	struct i915_page_table *pt;
> -	const size_t count = INTEL_GEN(dev_priv) >= 8 ? GEN8_PTES : GEN6_PTES;
> +	const size_t count = INTEL_GEN(vm->i915) >= 8 ? GEN8_PTES : GEN6_PTES;
>  	int ret = -ENOMEM;
>  
>  	pt = kzalloc(sizeof(*pt), GFP_KERNEL);
> @@ -460,7 +473,7 @@ static struct i915_page_table *alloc_pt(struct drm_i915_private *dev_priv)
>  	if (!pt->used_ptes)
>  		goto fail_bitmap;
>  
> -	ret = setup_px(dev_priv, pt);
> +	ret = setup_px(vm, pt);
>  	if (ret)
>  		goto fail_page_m;
>  
> @@ -474,10 +487,9 @@ static struct i915_page_table *alloc_pt(struct drm_i915_private *dev_priv)
>  	return ERR_PTR(ret);
>  }
>  
> -static void free_pt(struct drm_i915_private *dev_priv,
> -		    struct i915_page_table *pt)
> +static void free_pt(struct i915_address_space *vm, struct i915_page_table *pt)
>  {
> -	cleanup_px(dev_priv, pt);
> +	cleanup_px(vm, pt);
>  	kfree(pt->used_ptes);
>  	kfree(pt);
>  }
> @@ -490,7 +502,7 @@ static void gen8_initialize_pt(struct i915_address_space *vm,
>  	scratch_pte = gen8_pte_encode(vm->scratch_page.daddr,
>  				      I915_CACHE_LLC);
>  
> -	fill_px(vm->i915, pt, scratch_pte);
> +	fill_px(vm, pt, scratch_pte);
>  }
>  
>  static void gen6_initialize_pt(struct i915_address_space *vm,
> @@ -503,10 +515,10 @@ static void gen6_initialize_pt(struct i915_address_space *vm,
>  	scratch_pte = vm->pte_encode(vm->scratch_page.daddr,
>  				     I915_CACHE_LLC, 0);
>  
> -	fill32_px(vm->i915, pt, scratch_pte);
> +	fill32_px(vm, pt, scratch_pte);
>  }
>  
> -static struct i915_page_directory *alloc_pd(struct drm_i915_private *dev_priv)
> +static struct i915_page_directory *alloc_pd(struct i915_address_space *vm)
>  {
>  	struct i915_page_directory *pd;
>  	int ret = -ENOMEM;
> @@ -520,7 +532,7 @@ static struct i915_page_directory *alloc_pd(struct drm_i915_private *dev_priv)
>  	if (!pd->used_pdes)
>  		goto fail_bitmap;
>  
> -	ret = setup_px(dev_priv, pd);
> +	ret = setup_px(vm, pd);
>  	if (ret)
>  		goto fail_page_m;
>  
> @@ -534,11 +546,11 @@ static struct i915_page_directory *alloc_pd(struct drm_i915_private *dev_priv)
>  	return ERR_PTR(ret);
>  }
>  
> -static void free_pd(struct drm_i915_private *dev_priv,
> +static void free_pd(struct i915_address_space *vm,
>  		    struct i915_page_directory *pd)
>  {
>  	if (px_page(pd)) {
> -		cleanup_px(dev_priv, pd);
> +		cleanup_px(vm, pd);
>  		kfree(pd->used_pdes);
>  		kfree(pd);
>  	}
> @@ -551,7 +563,7 @@ static void gen8_initialize_pd(struct i915_address_space *vm,
>  
>  	scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt), I915_CACHE_LLC);
>  
> -	fill_px(vm->i915, pd, scratch_pde);
> +	fill_px(vm, pd, scratch_pde);
>  }
>  
>  static int __pdp_init(struct drm_i915_private *dev_priv,
> @@ -585,23 +597,23 @@ static void __pdp_fini(struct i915_page_directory_pointer *pdp)
>  	pdp->page_directory = NULL;
>  }
>  
> -static struct
> -i915_page_directory_pointer *alloc_pdp(struct drm_i915_private *dev_priv)
> +static struct i915_page_directory_pointer *
> +alloc_pdp(struct i915_address_space *vm)
>  {
>  	struct i915_page_directory_pointer *pdp;
>  	int ret = -ENOMEM;
>  
> -	WARN_ON(!USES_FULL_48BIT_PPGTT(dev_priv));
> +	WARN_ON(!USES_FULL_48BIT_PPGTT(vm->i915));
>  
>  	pdp = kzalloc(sizeof(*pdp), GFP_KERNEL);
>  	if (!pdp)
>  		return ERR_PTR(-ENOMEM);
>  
> -	ret = __pdp_init(dev_priv, pdp);
> +	ret = __pdp_init(vm->i915, pdp);
>  	if (ret)
>  		goto fail_bitmap;
>  
> -	ret = setup_px(dev_priv, pdp);
> +	ret = setup_px(vm, pdp);
>  	if (ret)
>  		goto fail_page_m;
>  
> @@ -615,12 +627,12 @@ i915_page_directory_pointer *alloc_pdp(struct drm_i915_private *dev_priv)
>  	return ERR_PTR(ret);
>  }
>  
> -static void free_pdp(struct drm_i915_private *dev_priv,
> +static void free_pdp(struct i915_address_space *vm,
>  		     struct i915_page_directory_pointer *pdp)
>  {
>  	__pdp_fini(pdp);
> -	if (USES_FULL_48BIT_PPGTT(dev_priv)) {
> -		cleanup_px(dev_priv, pdp);
> +	if (USES_FULL_48BIT_PPGTT(vm->i915)) {
> +		cleanup_px(vm, pdp);
>  		kfree(pdp);
>  	}
>  }
> @@ -632,7 +644,7 @@ static void gen8_initialize_pdp(struct i915_address_space *vm,
>  
>  	scratch_pdpe = gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC);
>  
> -	fill_px(vm->i915, pdp, scratch_pdpe);
> +	fill_px(vm, pdp, scratch_pdpe);
>  }
>  
>  static void gen8_initialize_pml4(struct i915_address_space *vm,
> @@ -643,7 +655,7 @@ static void gen8_initialize_pml4(struct i915_address_space *vm,
>  	scratch_pml4e = gen8_pml4e_encode(px_dma(vm->scratch_pdp),
>  					  I915_CACHE_LLC);
>  
> -	fill_px(vm->i915, pml4, scratch_pml4e);
> +	fill_px(vm, pml4, scratch_pml4e);
>  }
>  
>  static void
> @@ -659,20 +671,18 @@ gen8_setup_pdpe(struct i915_hw_ppgtt *ppgtt,
>  
>  	page_directorypo = kmap_px(pdp);
>  	page_directorypo[index] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC);
> -	kunmap_px(ppgtt, page_directorypo);
> +	kunmap_px(page_directorypo);
>  }
>  
>  static void
> -gen8_setup_pml4e(struct i915_hw_ppgtt *ppgtt,
> -		 struct i915_pml4 *pml4,
> +gen8_setup_pml4e(struct i915_pml4 *pml4,
>  		 struct i915_page_directory_pointer *pdp,
>  		 int index)
>  {
>  	gen8_ppgtt_pml4e_t *pagemap = kmap_px(pml4);
>  
> -	WARN_ON(!USES_FULL_48BIT_PPGTT(to_i915(ppgtt->base.dev)));
>  	pagemap[index] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC);
> -	kunmap_px(ppgtt, pagemap);
> +	kunmap_px(pagemap);
>  }
>  
>  /* Broadwell Page Directory Pointer Descriptors */
> @@ -741,7 +751,6 @@ static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm,
>  				uint64_t start,
>  				uint64_t length)
>  {
> -	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
>  	unsigned int num_entries = gen8_pte_count(start, length);
>  	unsigned int pte = gen8_pte_index(start);
>  	unsigned int pte_end = pte + num_entries;
> @@ -762,7 +771,7 @@ static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm,
>  	vaddr = kmap_px(pt);
>  	while (pte < pte_end)
>  		vaddr[pte++] = scratch_pte;
> -	kunmap_px(ppgtt, vaddr);
> +	kunmap_px(vaddr);
>  
>  	return false;
>  }
> @@ -775,7 +784,6 @@ static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm,
>  				uint64_t start,
>  				uint64_t length)
>  {
> -	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
>  	struct i915_page_table *pt;
>  	uint64_t pde;
>  	gen8_pde_t *pde_vaddr;
> @@ -790,8 +798,8 @@ static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm,
>  			__clear_bit(pde, pd->used_pdes);
>  			pde_vaddr = kmap_px(pd);
>  			pde_vaddr[pde] = scratch_pde;
> -			kunmap_px(ppgtt, pde_vaddr);
> -			free_pt(vm->i915, pt);
> +			kunmap_px(pde_vaddr);
> +			free_pt(vm, pt);
>  		}
>  	}
>  
> @@ -820,7 +828,7 @@ static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
>  		if (gen8_ppgtt_clear_pd(vm, pd, start, length)) {
>  			__clear_bit(pdpe, pdp->used_pdpes);
>  			gen8_setup_pdpe(ppgtt, pdp, vm->scratch_pd, pdpe);
> -			free_pd(vm->i915, pd);
> +			free_pd(vm, pd);
>  		}
>  	}
>  
> @@ -841,7 +849,6 @@ static void gen8_ppgtt_clear_pml4(struct i915_address_space *vm,
>  				  uint64_t start,
>  				  uint64_t length)
>  {
> -	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
>  	struct i915_page_directory_pointer *pdp;
>  	uint64_t pml4e;
>  
> @@ -853,8 +860,8 @@ static void gen8_ppgtt_clear_pml4(struct i915_address_space *vm,
>  
>  		if (gen8_ppgtt_clear_pdp(vm, pdp, start, length)) {
>  			__clear_bit(pml4e, pml4->used_pml4es);
> -			gen8_setup_pml4e(ppgtt, pml4, vm->scratch_pdp, pml4e);
> -			free_pdp(vm->i915, pdp);
> +			gen8_setup_pml4e(pml4, vm->scratch_pdp, pml4e);
> +			free_pdp(vm, pdp);
>  		}
>  	}
>  }
> @@ -912,12 +919,12 @@ gen8_ppgtt_insert_pte_entries(struct i915_hw_ppgtt *ppgtt,
>  				pde = 0;
>  			}
>  
> -			kunmap_px(ppgtt, vaddr);
> +			kunmap_px(vaddr);
>  			vaddr = kmap_px(pd->page_table[pde]);
>  			pte = 0;
>  		}
>  	} while (1);
> -	kunmap_px(ppgtt, vaddr);
> +	kunmap_px(vaddr);
>  
>  	return ret;
>  }
> @@ -959,7 +966,7 @@ static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm,
>  		;
>  }
>  
> -static void gen8_free_page_tables(struct drm_i915_private *dev_priv,
> +static void gen8_free_page_tables(struct i915_address_space *vm,
>  				  struct i915_page_directory *pd)
>  {
>  	int i;
> @@ -971,34 +978,33 @@ static void gen8_free_page_tables(struct drm_i915_private *dev_priv,
>  		if (WARN_ON(!pd->page_table[i]))
>  			continue;
>  
> -		free_pt(dev_priv, pd->page_table[i]);
> +		free_pt(vm, pd->page_table[i]);
>  		pd->page_table[i] = NULL;
>  	}
>  }
>  
>  static int gen8_init_scratch(struct i915_address_space *vm)
>  {
> -	struct drm_i915_private *dev_priv = vm->i915;
>  	int ret;
>  
> -	ret = setup_scratch_page(dev_priv, &vm->scratch_page, I915_GFP_DMA);
> +	ret = setup_scratch_page(vm, I915_GFP_DMA);
>  	if (ret)
>  		return ret;
>  
> -	vm->scratch_pt = alloc_pt(dev_priv);
> +	vm->scratch_pt = alloc_pt(vm);
>  	if (IS_ERR(vm->scratch_pt)) {
>  		ret = PTR_ERR(vm->scratch_pt);
>  		goto free_scratch_page;
>  	}
>  
> -	vm->scratch_pd = alloc_pd(dev_priv);
> +	vm->scratch_pd = alloc_pd(vm);
>  	if (IS_ERR(vm->scratch_pd)) {
>  		ret = PTR_ERR(vm->scratch_pd);
>  		goto free_pt;
>  	}
>  
> -	if (USES_FULL_48BIT_PPGTT(dev_priv)) {
> -		vm->scratch_pdp = alloc_pdp(dev_priv);
> +	if (USES_FULL_48BIT_PPGTT(dev)) {
> +		vm->scratch_pdp = alloc_pdp(vm);
>  		if (IS_ERR(vm->scratch_pdp)) {
>  			ret = PTR_ERR(vm->scratch_pdp);
>  			goto free_pd;
> @@ -1013,11 +1019,11 @@ static int gen8_init_scratch(struct i915_address_space *vm)
>  	return 0;
>  
>  free_pd:
> -	free_pd(dev_priv, vm->scratch_pd);
> +	free_pd(vm, vm->scratch_pd);
>  free_pt:
> -	free_pt(dev_priv, vm->scratch_pt);
> +	free_pt(vm, vm->scratch_pt);
>  free_scratch_page:
> -	cleanup_scratch_page(dev_priv, &vm->scratch_page);
> +	cleanup_scratch_page(vm);
>  
>  	return ret;
>  }
> @@ -1055,44 +1061,41 @@ static int gen8_ppgtt_notify_vgt(struct i915_hw_ppgtt *ppgtt, bool create)
>  
>  static void gen8_free_scratch(struct i915_address_space *vm)
>  {
> -	struct drm_i915_private *dev_priv = vm->i915;
> -
> -	if (USES_FULL_48BIT_PPGTT(dev_priv))
> -		free_pdp(dev_priv, vm->scratch_pdp);
> -	free_pd(dev_priv, vm->scratch_pd);
> -	free_pt(dev_priv, vm->scratch_pt);
> -	cleanup_scratch_page(dev_priv, &vm->scratch_page);
> +	if (USES_FULL_48BIT_PPGTT(vm->i915))
> +		free_pdp(vm, vm->scratch_pdp);
> +	free_pd(vm, vm->scratch_pd);
> +	free_pt(vm, vm->scratch_pt);
> +	cleanup_scratch_page(vm);
>  }
>  
> -static void gen8_ppgtt_cleanup_3lvl(struct drm_i915_private *dev_priv,
> +static void gen8_ppgtt_cleanup_3lvl(struct i915_address_space *vm,
>  				    struct i915_page_directory_pointer *pdp)
>  {
>  	int i;
>  
> -	for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(dev_priv)) {
> +	for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(vm->i915)) {
>  		if (WARN_ON(!pdp->page_directory[i]))
>  			continue;
>  
> -		gen8_free_page_tables(dev_priv, pdp->page_directory[i]);
> -		free_pd(dev_priv, pdp->page_directory[i]);
> +		gen8_free_page_tables(vm, pdp->page_directory[i]);
> +		free_pd(vm, pdp->page_directory[i]);
>  	}
>  
> -	free_pdp(dev_priv, pdp);
> +	free_pdp(vm, pdp);
>  }
>  
>  static void gen8_ppgtt_cleanup_4lvl(struct i915_hw_ppgtt *ppgtt)
>  {
> -	struct drm_i915_private *dev_priv = ppgtt->base.i915;
>  	int i;
>  
>  	for_each_set_bit(i, ppgtt->pml4.used_pml4es, GEN8_PML4ES_PER_PML4) {
>  		if (WARN_ON(!ppgtt->pml4.pdps[i]))
>  			continue;
>  
> -		gen8_ppgtt_cleanup_3lvl(dev_priv, ppgtt->pml4.pdps[i]);
> +		gen8_ppgtt_cleanup_3lvl(&ppgtt->base, ppgtt->pml4.pdps[i]);
>  	}
>  
> -	cleanup_px(dev_priv, &ppgtt->pml4);
> +	cleanup_px(&ppgtt->base, &ppgtt->pml4);
>  }
>  
>  static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
> @@ -1103,8 +1106,8 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
>  	if (intel_vgpu_active(dev_priv))
>  		gen8_ppgtt_notify_vgt(ppgtt, false);
>  
> -	if (!USES_FULL_48BIT_PPGTT(dev_priv))
> -		gen8_ppgtt_cleanup_3lvl(dev_priv, &ppgtt->pdp);
> +	if (!USES_FULL_48BIT_PPGTT(vm->i915))
> +		gen8_ppgtt_cleanup_3lvl(&ppgtt->base, &ppgtt->pdp);
>  	else
>  		gen8_ppgtt_cleanup_4lvl(ppgtt);
>  
> @@ -1135,7 +1138,6 @@ static int gen8_ppgtt_alloc_pagetabs(struct i915_address_space *vm,
>  				     uint64_t length,
>  				     unsigned long *new_pts)
>  {
> -	struct drm_i915_private *dev_priv = vm->i915;
>  	struct i915_page_table *pt;
>  	uint32_t pde;
>  
> @@ -1147,7 +1149,7 @@ static int gen8_ppgtt_alloc_pagetabs(struct i915_address_space *vm,
>  			continue;
>  		}
>  
> -		pt = alloc_pt(dev_priv);
> +		pt = alloc_pt(vm);
>  		if (IS_ERR(pt))
>  			goto unwind_out;
>  
> @@ -1161,7 +1163,7 @@ static int gen8_ppgtt_alloc_pagetabs(struct i915_address_space *vm,
>  
>  unwind_out:
>  	for_each_set_bit(pde, new_pts, I915_PDES)
> -		free_pt(dev_priv, pd->page_table[pde]);
> +		free_pt(vm, pd->page_table[pde]);
>  
>  	return -ENOMEM;
>  }
> @@ -1196,7 +1198,6 @@ gen8_ppgtt_alloc_page_directories(struct i915_address_space *vm,
>  				  uint64_t length,
>  				  unsigned long *new_pds)
>  {
> -	struct drm_i915_private *dev_priv = vm->i915;
>  	struct i915_page_directory *pd;
>  	uint32_t pdpe;
>  	uint32_t pdpes = I915_PDPES_PER_PDP(dev_priv);
> @@ -1207,7 +1208,7 @@ gen8_ppgtt_alloc_page_directories(struct i915_address_space *vm,
>  		if (test_bit(pdpe, pdp->used_pdpes))
>  			continue;
>  
> -		pd = alloc_pd(dev_priv);
> +		pd = alloc_pd(vm);
>  		if (IS_ERR(pd))
>  			goto unwind_out;
>  
> @@ -1221,7 +1222,7 @@ gen8_ppgtt_alloc_page_directories(struct i915_address_space *vm,
>  
>  unwind_out:
>  	for_each_set_bit(pdpe, new_pds, pdpes)
> -		free_pd(dev_priv, pdp->page_directory[pdpe]);
> +		free_pd(vm, pdp->page_directory[pdpe]);
>  
>  	return -ENOMEM;
>  }
> @@ -1249,7 +1250,6 @@ gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm,
>  				  uint64_t length,
>  				  unsigned long *new_pdps)
>  {
> -	struct drm_i915_private *dev_priv = vm->i915;
>  	struct i915_page_directory_pointer *pdp;
>  	uint32_t pml4e;
>  
> @@ -1257,7 +1257,7 @@ gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm,
>  
>  	gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
>  		if (!test_bit(pml4e, pml4->used_pml4es)) {
> -			pdp = alloc_pdp(dev_priv);
> +			pdp = alloc_pdp(vm);
>  			if (IS_ERR(pdp))
>  				goto unwind_out;
>  
> @@ -1275,7 +1275,7 @@ gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm,
>  
>  unwind_out:
>  	for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4)
> -		free_pdp(dev_priv, pml4->pdps[pml4e]);
> +		free_pdp(vm, pml4->pdps[pml4e]);
>  
>  	return -ENOMEM;
>  }
> @@ -1324,7 +1324,6 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
>  {
>  	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
>  	unsigned long *new_page_dirs, *new_page_tables;
> -	struct drm_i915_private *dev_priv = vm->i915;
>  	struct i915_page_directory *pd;
>  	const uint64_t orig_start = start;
>  	const uint64_t orig_length = length;
> @@ -1393,7 +1392,7 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
>  			 * point we're still relying on insert_entries() */
>  		}
>  
> -		kunmap_px(ppgtt, page_directory);
> +		kunmap_px(page_directory);
>  		__set_bit(pdpe, pdp->used_pdpes);
>  		gen8_setup_pdpe(ppgtt, pdp, pd, pdpe);
>  	}
> @@ -1408,12 +1407,11 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
>  
>  		for_each_set_bit(temp, new_page_tables + pdpe *
>  				BITS_TO_LONGS(I915_PDES), I915_PDES)
> -			free_pt(dev_priv,
> -				pdp->page_directory[pdpe]->page_table[temp]);
> +			free_pt(vm, pdp->page_directory[pdpe]->page_table[temp]);
>  	}
>  
>  	for_each_set_bit(pdpe, new_page_dirs, pdpes)
> -		free_pd(dev_priv, pdp->page_directory[pdpe]);
> +		free_pd(vm, pdp->page_directory[pdpe]);
>  
>  	free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
>  	mark_tlbs_dirty(ppgtt);
> @@ -1426,7 +1424,6 @@ static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm,
>  				    uint64_t length)
>  {
>  	DECLARE_BITMAP(new_pdps, GEN8_PML4ES_PER_PML4);
> -	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
>  	struct i915_page_directory_pointer *pdp;
>  	uint64_t pml4e;
>  	int ret = 0;
> @@ -1454,7 +1451,7 @@ static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm,
>  		if (ret)
>  			goto err_out;
>  
> -		gen8_setup_pml4e(ppgtt, pml4, pdp, pml4e);
> +		gen8_setup_pml4e(pml4, pdp, pml4e);
>  	}
>  
>  	bitmap_or(pml4->used_pml4es, new_pdps, pml4->used_pml4es,
> @@ -1464,7 +1461,7 @@ static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm,
>  
>  err_out:
>  	for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4)
> -		gen8_ppgtt_cleanup_3lvl(vm->i915, pml4->pdps[pml4e]);
> +		gen8_ppgtt_cleanup_3lvl(vm, pml4->pdps[pml4e]);
>  
>  	return ret;
>  }
> @@ -1480,7 +1477,8 @@ static int gen8_alloc_va_range(struct i915_address_space *vm,
>  		return gen8_alloc_va_range_3lvl(vm, &ppgtt->pdp, start, length);
>  }
>  
> -static void gen8_dump_pdp(struct i915_page_directory_pointer *pdp,
> +static void gen8_dump_pdp(struct i915_hw_ppgtt *ppgtt,
> +			  struct i915_page_directory_pointer *pdp,
>  			  uint64_t start, uint64_t length,
>  			  gen8_pte_t scratch_pte,
>  			  struct seq_file *m)
> @@ -1546,7 +1544,7 @@ static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
>  						 I915_CACHE_LLC);
>  
>  	if (!USES_FULL_48BIT_PPGTT(vm->i915)) {
> -		gen8_dump_pdp(&ppgtt->pdp, start, length, scratch_pte, m);
> +		gen8_dump_pdp(ppgtt, &ppgtt->pdp, start, length, scratch_pte, m);
>  	} else {
>  		uint64_t pml4e;
>  		struct i915_pml4 *pml4 = &ppgtt->pml4;
> @@ -1557,7 +1555,7 @@ static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
>  				continue;
>  
>  			seq_printf(m, "    PML4E #%llu\n", pml4e);
> -			gen8_dump_pdp(pdp, start, length, scratch_pte, m);
> +			gen8_dump_pdp(ppgtt, pdp, start, length, scratch_pte, m);
>  		}
>  	}
>  }
> @@ -1613,8 +1611,14 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
>  	ppgtt->base.bind_vma = ppgtt_bind_vma;
>  	ppgtt->debug_dump = gen8_dump_ppgtt;
>  
> +	/* There are only few exceptions for gen >=6. chv and bxt.
> +	 * And we are not sure about the latter so play safe for now.
> +	 */
> +	if (IS_CHERRYVIEW(dev_priv) || IS_BROXTON(dev_priv))
> +		ppgtt->base.pt_kmap_wc = true;
> +
>  	if (USES_FULL_48BIT_PPGTT(dev_priv)) {
> -		ret = setup_px(dev_priv, &ppgtt->pml4);
> +		ret = setup_px(&ppgtt->base, &ppgtt->pml4);
>  		if (ret)
>  			goto free_scratch;
>  
> @@ -1703,7 +1707,7 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
>  			}
>  			seq_puts(m, "\n");
>  		}
> -		kunmap_px(ppgtt, pt_vaddr);
> +		kunmap_px(pt_vaddr);
>  	}
>  }
>  
> @@ -1900,7 +1904,7 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
>  		for (i = first_pte; i < last_pte; i++)
>  			pt_vaddr[i] = scratch_pte;
>  
> -		kunmap_px(ppgtt, pt_vaddr);
> +		kunmap_px(pt_vaddr);
>  
>  		num_entries -= last_pte - first_pte;
>  		first_pte = 0;
> @@ -1939,12 +1943,12 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
>  		}
>  
>  		if (++act_pte == GEN6_PTES) {
> -			kunmap_px(ppgtt, vaddr);
> +			kunmap_px(vaddr);
>  			vaddr = kmap_px(ppgtt->pd.page_table[++act_pt]);
>  			act_pte = 0;
>  		}
>  	} while (1);
> -	kunmap_px(ppgtt, vaddr);
> +	kunmap_px(vaddr);
>  }
>  
>  static int gen6_alloc_va_range(struct i915_address_space *vm,
> @@ -1978,7 +1982,7 @@ static int gen6_alloc_va_range(struct i915_address_space *vm,
>  		/* We've already allocated a page table */
>  		WARN_ON(!bitmap_empty(pt->used_ptes, GEN6_PTES));
>  
> -		pt = alloc_pt(dev_priv);
> +		pt = alloc_pt(vm);
>  		if (IS_ERR(pt)) {
>  			ret = PTR_ERR(pt);
>  			goto unwind_out;
> @@ -2026,7 +2030,7 @@ static int gen6_alloc_va_range(struct i915_address_space *vm,
>  		struct i915_page_table *pt = ppgtt->pd.page_table[pde];
>  
>  		ppgtt->pd.page_table[pde] = vm->scratch_pt;
> -		free_pt(dev_priv, pt);
> +		free_pt(vm, pt);
>  	}
>  
>  	mark_tlbs_dirty(ppgtt);
> @@ -2035,16 +2039,15 @@ static int gen6_alloc_va_range(struct i915_address_space *vm,
>  
>  static int gen6_init_scratch(struct i915_address_space *vm)
>  {
> -	struct drm_i915_private *dev_priv = vm->i915;
>  	int ret;
>  
> -	ret = setup_scratch_page(dev_priv, &vm->scratch_page, I915_GFP_DMA);
> +	ret = setup_scratch_page(vm, I915_GFP_DMA);
>  	if (ret)
>  		return ret;
>  
> -	vm->scratch_pt = alloc_pt(dev_priv);
> +	vm->scratch_pt = alloc_pt(vm);
>  	if (IS_ERR(vm->scratch_pt)) {
> -		cleanup_scratch_page(dev_priv, &vm->scratch_page);
> +		cleanup_scratch_page(vm);
>  		return PTR_ERR(vm->scratch_pt);
>  	}
>  
> @@ -2055,17 +2058,14 @@ static int gen6_init_scratch(struct i915_address_space *vm)
>  
>  static void gen6_free_scratch(struct i915_address_space *vm)
>  {
> -	struct drm_i915_private *dev_priv = vm->i915;
> -
> -	free_pt(dev_priv, vm->scratch_pt);
> -	cleanup_scratch_page(dev_priv, &vm->scratch_page);
> +	free_pt(vm, vm->scratch_pt);
> +	cleanup_scratch_page(vm);
>  }
>  
>  static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
>  {
>  	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
>  	struct i915_page_directory *pd = &ppgtt->pd;
> -	struct drm_i915_private *dev_priv = vm->i915;
>  	struct i915_page_table *pt;
>  	uint32_t pde;
>  
> @@ -2073,7 +2073,7 @@ static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
>  
>  	gen6_for_all_pdes(pt, pd, pde)
>  		if (pt != vm->scratch_pt)
> -			free_pt(dev_priv, pt);
> +			free_pt(vm, pt);
>  
>  	gen6_free_scratch(vm);
>  }
> @@ -2182,6 +2182,7 @@ static int __hw_ppgtt_init(struct i915_hw_ppgtt *ppgtt,
>  			   struct drm_i915_private *dev_priv)
>  {
>  	ppgtt->base.i915 = dev_priv;
> +	ppgtt->base.dma = &dev_priv->drm.pdev->dev;
>  
>  	if (INTEL_INFO(dev_priv)->gen < 8)
>  		return gen6_ppgtt_init(ppgtt);
> @@ -2199,10 +2200,14 @@ static void i915_address_space_init(struct i915_address_space *vm,
>  	INIT_LIST_HEAD(&vm->inactive_list);
>  	INIT_LIST_HEAD(&vm->unbound_list);
>  	list_add_tail(&vm->global_link, &dev_priv->vm_list);
> +	pagevec_init(&vm->free_pages, false);
>  }
>  
>  static void i915_address_space_fini(struct i915_address_space *vm)
>  {
> +	if (pagevec_count(&vm->free_pages))
> +		vm_free_pages_release(vm);
> +
>  	i915_gem_timeline_fini(&vm->timeline);
>  	drm_mm_takedown(&vm->mm);
>  	list_del(&vm->global_link);
> @@ -2310,9 +2315,8 @@ void i915_ppgtt_release(struct kref *kref)
>  	WARN_ON(!list_empty(&ppgtt->base.inactive_list));
>  	WARN_ON(!list_empty(&ppgtt->base.unbound_list));
>  
> -	i915_address_space_fini(&ppgtt->base);
> -
>  	ppgtt->base.cleanup(&ppgtt->base);
> +	i915_address_space_fini(&ppgtt->base);
>  	kfree(ppgtt);
>  }
>  
> @@ -2947,7 +2951,7 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
>  		return -ENOMEM;
>  	}
>  
> -	ret = setup_scratch_page(dev_priv, &ggtt->base.scratch_page, GFP_DMA32);
> +	ret = setup_scratch_page(&ggtt->base, GFP_DMA32);
>  	if (ret) {
>  		DRM_ERROR("Scratch setup failed\n");
>  		/* iounmap will also get called at remove, but meh */
> @@ -3036,7 +3040,7 @@ static void gen6_gmch_remove(struct i915_address_space *vm)
>  	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
>  
>  	iounmap(ggtt->gsm);
> -	cleanup_scratch_page(vm->i915, &vm->scratch_page);
> +	cleanup_scratch_page(vm);
>  }
>  
>  static int gen8_gmch_probe(struct i915_ggtt *ggtt)
> @@ -3187,6 +3191,7 @@ int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv)
>  	int ret;
>  
>  	ggtt->base.i915 = dev_priv;
> +	ggtt->base.dma = &dev_priv->drm.pdev->dev;
>  
>  	if (INTEL_GEN(dev_priv) <= 5)
>  		ret = i915_gmch_probe(ggtt);
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> index 3c5ef5358cef..c59a7687ed6f 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> @@ -36,6 +36,7 @@
>  
>  #include <linux/io-mapping.h>
>  #include <linux/mm.h>
> +#include <linux/pagevec.h>
>  
>  #include "i915_gem_timeline.h"
>  #include "i915_gem_request.h"
> @@ -247,6 +248,7 @@ struct i915_address_space {
>  	struct drm_mm mm;
>  	struct i915_gem_timeline timeline;
>  	struct drm_i915_private *i915;
> +	struct device *dma;
>  	/* Every address space belongs to a struct file - except for the global
>  	 * GTT that is owned by the driver (and so @file is set to NULL). In
>  	 * principle, no information should leak from one context to another
> @@ -297,6 +299,9 @@ struct i915_address_space {
>  	 */
>  	struct list_head unbound_list;
>  
> +	struct pagevec free_pages;
> +	bool pt_kmap_wc;
> +
>  	/* FIXME: Need a more generic return type */
>  	gen6_pte_t (*pte_encode)(dma_addr_t addr,
>  				 enum i915_cache_level level,
> -- 
> 2.11.0
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx


More information about the Intel-gfx mailing list