[Intel-gfx] [PATCH v2 23/24] drm/i915/bdw: Dynamic page table allocations

Daniel Vetter daniel at ffwll.ch
Mon Jan 5 06:52:49 PST 2015


On Tue, Dec 23, 2014 at 05:16:26PM +0000, Michel Thierry wrote:
> From: Ben Widawsky <benjamin.widawsky at intel.com>
> 
> This finishes off the dynamic page tables allocations, in the legacy 3
> level style that already exists. Most everything has already been setup
> to this point, the patch finishes off the enabling by setting the
> appropriate function pointers.
> 
> Zombie tracking:
> This could be a separate patch, but I found it helpful for debugging.
> Since we write page tables asynchronously with respect to the GPU using
> them, we can't actually free the page tables until we know the GPU won't
> use them. With this patch, that is always when the context dies.  It
> would be possible to write a reaper to go through zombies and clean them
> up when under memory pressure. That exercise is left for the reader.

As mention in some previous reply, freeing pagetables is a separate issue
entirely. Imo we can just idle the gpu and then rip out all pagetables for
empty vms.

> Scratch unused pages:
> The object pages can get freed even if a page table still points to
> them.  Like the zombie fix, we need to make sure we don't let our GPU
> access arbitrary memory when we've unmapped things.

Hm, either I don't follow or this would mean that our active tracking is
broken and we release backing storage before the gpu stopped using it.

In any case I vote to simplify things a lot for now and just never
teardown pagetables at all. Implementing a last-ditch attempt to free
memory can be done with a lot less complexity imo than trying to be super
careful without stalling the gpu in normal operations.

One more comment below.
-Daniel

> 
> v2: Update aliasing/true ppgtt allocate/teardown/clear functions for
> gen 6 & 7.
> 
> v3: Rebase.
> 
> Signed-off-by: Ben Widawsky <ben at bwidawsk.net>
> Signed-off-by: Michel Thierry <michel.thierry at intel.com> (v2+)
> ---
>  drivers/gpu/drm/i915/i915_gem_gtt.c | 377 +++++++++++++++++++++++++++++-------
>  drivers/gpu/drm/i915/i915_gem_gtt.h |  16 +-
>  2 files changed, 326 insertions(+), 67 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 6254677..571c307 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -602,7 +602,7 @@ static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
>  	}
>  }
>  
> -static void __gen8_do_map_pt(gen8_ppgtt_pde_t *pde,
> +static void __gen8_do_map_pt(gen8_ppgtt_pde_t * const pde,
>  			     struct i915_pagetab *pt,
>  			     struct drm_device *dev)
>  {
> @@ -619,7 +619,7 @@ static void gen8_map_pagetable_range(struct i915_pagedir *pd,
>  				     uint64_t length,
>  				     struct drm_device *dev)
>  {
> -	gen8_ppgtt_pde_t *pagedir = kmap_atomic(pd->page);
> +	gen8_ppgtt_pde_t * const pagedir = kmap_atomic(pd->page);
>  	struct i915_pagetab *pt;
>  	uint64_t temp, pde;
>  
> @@ -632,8 +632,9 @@ static void gen8_map_pagetable_range(struct i915_pagedir *pd,
>  	kunmap_atomic(pagedir);
>  }
>  
> -static void gen8_teardown_va_range(struct i915_address_space *vm,
> -				   uint64_t start, uint64_t length)
> +static void __gen8_teardown_va_range(struct i915_address_space *vm,
> +				     uint64_t start, uint64_t length,
> +				     bool dead)
>  {
>  	struct i915_hw_ppgtt *ppgtt =
>  				container_of(vm, struct i915_hw_ppgtt, base);
> @@ -655,6 +656,13 @@ static void gen8_teardown_va_range(struct i915_address_space *vm,
>  			     pdpe, vm);
>  			continue;
>  		} else {
> +			if (dead && pd->zombie) {
> +				WARN_ON(test_bit(pdpe, ppgtt->pdp.used_pdpes));
> +				free_pd_single(pd, vm->dev);
> +				ppgtt->pdp.pagedir[pdpe] = NULL;
> +				continue;
> +			}
> +
>  			WARN(!test_bit(pdpe, ppgtt->pdp.used_pdpes),
>  			     "PDPE %d not reserved, but is allocated (%p)",
>  			     pdpe, vm);
> @@ -666,34 +674,64 @@ static void gen8_teardown_va_range(struct i915_address_space *vm,
>  				     "PDE %d is not allocated, but is reserved (%p)\n",
>  				     pde, vm);
>  				continue;
> -			} else
> +			} else {
> +				if (dead && pt->zombie) {
> +					WARN_ON(test_bit(pde, pd->used_pdes));
> +					free_pt_single(pt, vm->dev);
> +					pd->page_tables[pde] = NULL;
> +					continue;
> +				}
>  				WARN(!test_bit(pde, pd->used_pdes),
>  				     "PDE %d not reserved, but is allocated (%p)",
>  				     pde, vm);
> +			}
>  
>  			bitmap_clear(pt->used_ptes,
>  				     gen8_pte_index(pd_start),
>  				     gen8_pte_count(pd_start, pd_len));
>  
>  			if (bitmap_empty(pt->used_ptes, GEN8_PTES_PER_PAGE)) {
> +				WARN_ON(!test_and_clear_bit(pde, pd->used_pdes));
> +				if (!dead) {
> +					pt->zombie = 1;
> +					continue;
> +				}
>  				free_pt_single(pt, vm->dev);
>  				pd->page_tables[pde] = NULL;
> -				WARN_ON(!test_and_clear_bit(pde, pd->used_pdes));
> +
>  			}
>  		}
>  
> +		gen8_ppgtt_clear_range(vm, pd_start, pd_len, true);
> +
>  		if (bitmap_empty(pd->used_pdes, GEN8_PDES_PER_PAGE)) {
> +			WARN_ON(!test_and_clear_bit(pdpe, ppgtt->pdp.used_pdpes));
> +			if (!dead) {
> +				/* We've unmapped a possibly live context. Make
> +				 * note of it so we can clean it up later. */
> +				pd->zombie = 1;
> +				continue;
> +			}
>  			free_pd_single(pd, vm->dev);
>  			ppgtt->pdp.pagedir[pdpe] = NULL;
> -			WARN_ON(!test_and_clear_bit(pdpe, ppgtt->pdp.used_pdpes));
>  		}
>  	}
>  }
>  
> +static void gen8_teardown_va_range(struct i915_address_space *vm,
> +				   uint64_t start, uint64_t length)
> +{
> +	__gen8_teardown_va_range(vm, start, length, false);
> +}
> +
>  static void gen8_ppgtt_free(struct i915_hw_ppgtt *ppgtt)
>  {
> -	gen8_teardown_va_range(&ppgtt->base,
> -			       ppgtt->base.start, ppgtt->base.total);
> +	trace_i915_va_teardown(&ppgtt->base,
> +			       ppgtt->base.start, ppgtt->base.total,
> +			       VM_TO_TRACE_NAME(&ppgtt->base));
> +	__gen8_teardown_va_range(&ppgtt->base,
> +				 ppgtt->base.start, ppgtt->base.total,
> +				 true);
>  }
>  
>  static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
> @@ -704,67 +742,177 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
>  	gen8_ppgtt_free(ppgtt);
>  }
>  
> -static int gen8_ppgtt_alloc_pagetabs(struct i915_pagedir *pd,
> +/**
> + * gen8_ppgtt_alloc_pagetabs() - Allocate page tables for VA range.
> + * @ppgtt:	Master ppgtt structure.
> + * @pd:		Page directory for this address range.
> + * @start:	Starting virtual address to begin allocations.
> + * @length	Size of the allocations.
> + * @new_pts:	Bitmap set by function with new allocations. Likely used by the
> + *		caller to free on error.
> + *
> + * Allocate the required number of page tables. Extremely similar to
> + * gen8_ppgtt_alloc_pagedirs(). The main difference is here we are limited by
> + * the page directory boundary (instead of the page directory pointer). That
> + * boundary is 1GB virtual. Therefore, unlike gen8_ppgtt_alloc_pagedirs(), it is
> + * possible, and likely that the caller will need to use multiple calls of this
> + * function to achieve the appropriate allocation.
> + *
> + * Return: 0 if success; negative error code otherwise.
> + */
> +static int gen8_ppgtt_alloc_pagetabs(struct i915_hw_ppgtt *ppgtt,
> +				     struct i915_pagedir *pd,
>  				     uint64_t start,
>  				     uint64_t length,
> -				     struct drm_device *dev)
> +				     unsigned long *new_pts)
>  {
> -	struct i915_pagetab *unused;
> +	struct i915_pagetab *pt;
>  	uint64_t temp;
>  	uint32_t pde;
>  
> -	gen8_for_each_pde(unused, pd, start, length, temp, pde) {
> -		BUG_ON(unused);
> -		pd->page_tables[pde] = alloc_pt_single(dev);
> -		if (IS_ERR(pd->page_tables[pde]))
> +	gen8_for_each_pde(pt, pd, start, length, temp, pde) {
> +		/* Don't reallocate page tables */
> +		if (pt) {
> +			/* Scratch is never allocated this way */
> +			WARN_ON(pt->scratch);
> +			/* If there is a zombie, we can reuse it and save time
> +			 * on the allocation. If we clear the zombie status and
> +			 * the caller somehow fails, we'll probably hit some
> +			 * assertions, so it's up to them to fix up the bitmaps.
> +			 */
> +			continue;
> +		}
> +
> +		pt = alloc_pt_single(ppgtt->base.dev);
> +		if (IS_ERR(pt))
>  			goto unwind_out;
> +
> +		pd->page_tables[pde] = pt;
> +		set_bit(pde, new_pts);
>  	}
>  
>  	return 0;
>  
>  unwind_out:
> -	while (pde--)
> -		free_pt_single(pd->page_tables[pde], dev);
> +	for_each_set_bit(pde, new_pts, GEN8_PDES_PER_PAGE)
> +		free_pt_single(pd->page_tables[pde], ppgtt->base.dev);
>  
>  	return -ENOMEM;
>  }
>  
> -/* bitmap of new pagedirs */
> -static int gen8_ppgtt_alloc_pagedirs(struct i915_pagedirpo *pdp,
> +/**
> + * gen8_ppgtt_alloc_pagedirs() - Allocate page directories for VA range.
> + * @ppgtt:	Master ppgtt structure.
> + * @pdp:	Page directory pointer for this address range.
> + * @start:	Starting virtual address to begin allocations.
> + * @length	Size of the allocations.
> + * @new_pds	Bitmap set by function with new allocations. Likely used by the
> + *		caller to free on error.
> + *
> + * Allocate the required number of page directories starting at the pde index of
> + * @start, and ending at the pde index @start + @length. This function will skip
> + * over already allocated page directories within the range, and only allocate
> + * new ones, setting the appropriate pointer within the pdp as well as the
> + * correct position in the bitmap @new_pds.
> + *
> + * The function will only allocate the pages within the range for a give page
> + * directory pointer. In other words, if @start + @length straddles a virtually
> + * addressed PDP boundary (512GB for 4k pages), there will be more allocations
> + * required by the caller, This is not currently possible, and the BUG in the
> + * code will prevent it.
> + *
> + * Return: 0 if success; negative error code otherwise.
> + */
> +static int gen8_ppgtt_alloc_pagedirs(struct i915_hw_ppgtt *ppgtt,
> +				     struct i915_pagedirpo *pdp,
>  				     uint64_t start,
>  				     uint64_t length,
> -				     struct drm_device *dev)
> +				     unsigned long *new_pds)
>  {
> -	struct i915_pagedir *unused;
> +	struct i915_pagedir *pd;
>  	uint64_t temp;
>  	uint32_t pdpe;
>  
> +	BUG_ON(!bitmap_empty(new_pds, GEN8_LEGACY_PDPES));
> +
>  	/* FIXME: PPGTT container_of won't work for 64b */
>  	BUG_ON((start + length) > 0x800000000ULL);
>  
> -	gen8_for_each_pdpe(unused, pdp, start, length, temp, pdpe) {
> -		BUG_ON(unused);
> -		pdp->pagedir[pdpe] = alloc_pd_single(dev);
> +	gen8_for_each_pdpe(pd, pdp, start, length, temp, pdpe) {
> +		if (pd)
> +			continue;
>  
> -		if (IS_ERR(pdp->pagedir[pdpe]))
> +		pd = alloc_pd_single(ppgtt->base.dev);
> +		if (IS_ERR(pd))
>  			goto unwind_out;
> +
> +		pdp->pagedir[pdpe] = pd;
> +		set_bit(pdpe, new_pds);
>  	}
>  
>  	return 0;
>  
>  unwind_out:
> -	while (pdpe--)
> -		free_pd_single(pdp->pagedir[pdpe], dev);
> +	for_each_set_bit(pdpe, new_pds, GEN8_LEGACY_PDPES)
> +		free_pd_single(pdp->pagedir[pdpe], ppgtt->base.dev);
>  
>  	return -ENOMEM;
>  }
>  
> +static inline void
> +free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long **new_pts)
> +{
> +	int i;
> +	for (i = 0; i < GEN8_LEGACY_PDPES; i++)
> +		kfree(new_pts[i]);
> +	kfree(new_pts);
> +	kfree(new_pds);
> +}
> +
> +/* Fills in the page directory bitmap, ant the array of page tables bitmap. Both
> + * of these are based on the number of PDPEs in the system.
> + */
> +int __must_check alloc_gen8_temp_bitmaps(unsigned long **new_pds,
> +					 unsigned long ***new_pts)
> +{
> +	int i;
> +	unsigned long *pds;
> +	unsigned long **pts;
> +
> +	pds = kcalloc(BITS_TO_LONGS(GEN8_LEGACY_PDPES), sizeof(unsigned long), GFP_KERNEL);
> +	if (!pds)
> +		return -ENOMEM;
> +
> +	pts = kcalloc(GEN8_PDES_PER_PAGE, sizeof(unsigned long *), GFP_KERNEL);
> +	if (!pts) {
> +		kfree(pds);
> +		return -ENOMEM;
> +	}
> +
> +	for (i = 0; i < GEN8_LEGACY_PDPES; i++) {
> +		pts[i] = kcalloc(BITS_TO_LONGS(GEN8_PDES_PER_PAGE),
> +				 sizeof(unsigned long), GFP_KERNEL);
> +		if (!pts[i])
> +			goto err_out;
> +	}
> +
> +	*new_pds = pds;
> +	*new_pts = (unsigned long **)pts;
> +
> +	return 0;
> +
> +err_out:
> +	free_gen8_temp_bitmaps(pds, pts);
> +	return -ENOMEM;
> +}
> +
>  static int gen8_alloc_va_range(struct i915_address_space *vm,
>  			       uint64_t start,
>  			       uint64_t length)
>  {
>  	struct i915_hw_ppgtt *ppgtt =
>  		container_of(vm, struct i915_hw_ppgtt, base);
> +	unsigned long *new_page_dirs, **new_page_tables;
>  	struct i915_pagedir *pd;
>  	const uint64_t orig_start = start;
>  	const uint64_t orig_length = length;
> @@ -772,43 +920,103 @@ static int gen8_alloc_va_range(struct i915_address_space *vm,
>  	uint32_t pdpe;
>  	int ret;
>  
> -	/* Do the allocations first so we can easily bail out */
> -	ret = gen8_ppgtt_alloc_pagedirs(&ppgtt->pdp, start, length,
> -					ppgtt->base.dev);
> +#ifndef CONFIG_64BIT
> +	/* Disallow 64b address on 32b platforms. Nothing is wrong with doing
> +	 * this in hardware, but a lot of the drm code is not prepared to handle
> +	 * 64b offset on 32b platforms. */
> +	if (start + length > 0x100000000ULL)
> +		return -E2BIG;
> +#endif
> +
> +	/* Wrap is never okay since we can only represent 48b, and we don't
> +	 * actually use the other side of the canonical address space.
> +	 */
> +	if (WARN_ON(start + length < start))
> +		return -ERANGE;
> +
> +	ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables);
>  	if (ret)
>  		return ret;
>  
> +	/* Do the allocations first so we can easily bail out */
> +	ret = gen8_ppgtt_alloc_pagedirs(ppgtt, &ppgtt->pdp, start, length,
> +					new_page_dirs);
> +	if (ret) {
> +		free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
> +		return ret;
> +	}
> +
> +	/* For every page directory referenced, allocate page tables */
>  	gen8_for_each_pdpe(pd, &ppgtt->pdp, start, length, temp, pdpe) {
> -		ret = gen8_ppgtt_alloc_pagetabs(pd, start, length,
> -						ppgtt->base.dev);
> +		bitmap_zero(new_page_tables[pdpe], GEN8_PDES_PER_PAGE);
> +		ret = gen8_ppgtt_alloc_pagetabs(ppgtt, pd, start, length,
> +						new_page_tables[pdpe]);
>  		if (ret)
>  			goto err_out;
>  	}
>  
> -	/* Now mark everything we've touched as used. This doesn't allow for
> -	 * robust error checking, but it makes the code a hell of a lot simpler.
> -	 */
>  	start = orig_start;
>  	length = orig_length;
>  
> +	/* Allocations have completed successfully, so set the bitmaps, and do
> +	 * the mappings. */
>  	gen8_for_each_pdpe(pd, &ppgtt->pdp, start, length, temp, pdpe) {
> +		gen8_ppgtt_pde_t *const pagedir = kmap_atomic(pd->page);
>  		struct i915_pagetab *pt;
>  		uint64_t pd_len = gen8_clamp_pd(start, length);
>  		uint64_t pd_start = start;
>  		uint32_t pde;
> -		gen8_for_each_pde(pt, &ppgtt->pd, pd_start, pd_len, temp, pde) {
> -			bitmap_set(pd->page_tables[pde]->used_ptes,
> -				   gen8_pte_index(start),
> -				   gen8_pte_count(start, length));
> +
> +		/* Every pd should be allocated, we just did that above. */
> +		BUG_ON(!pd);
> +
> +		gen8_for_each_pde(pt, pd, pd_start, pd_len, temp, pde) {
> +			/* Same reasoning as pd */
> +			BUG_ON(!pt);
> +			BUG_ON(!pd_len);
> +			BUG_ON(!gen8_pte_count(pd_start, pd_len));
> +
> +			/* Set our used ptes within the page table */
> +			bitmap_set(pt->used_ptes,
> +				   gen8_pte_index(pd_start),
> +				   gen8_pte_count(pd_start, pd_len));
> +
> +			/* Our pde is now pointing to the pagetable, pt */
>  			set_bit(pde, pd->used_pdes);
> +
> +			/* Map the PDE to the page table */
> +			__gen8_do_map_pt(pagedir + pde, pt, vm->dev);
> +
> +			/* NB: We haven't yet mapped ptes to pages. At this
> +			 * point we're still relying on insert_entries() */
> +
> +			/* No longer possible this page table is a zombie */
> +			pt->zombie = 0;
>  		}
> +
> +		if (!HAS_LLC(vm->dev))
> +			drm_clflush_virt_range(pagedir, PAGE_SIZE);
> +
> +		kunmap_atomic(pagedir);
> +
>  		set_bit(pdpe, ppgtt->pdp.used_pdpes);
> +		/* This pd is officially not a zombie either */
> +		ppgtt->pdp.pagedir[pdpe]->zombie = 0;
>  	}
>  
> +	free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
>  	return 0;
>  
>  err_out:
> -	gen8_teardown_va_range(vm, orig_start, start);
> +	while (pdpe--) {
> +		for_each_set_bit(temp, new_page_tables[pdpe], GEN8_PDES_PER_PAGE)
> +			free_pt_single(pd->page_tables[temp], vm->dev);
> +	}
> +
> +	for_each_set_bit(pdpe, new_page_dirs, GEN8_LEGACY_PDPES)
> +		free_pd_single(ppgtt->pdp.pagedir[pdpe], vm->dev);
> +
> +	free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
>  	return ret;
>  }
>  
> @@ -819,37 +1027,68 @@ err_out:
>   * space.
>   *
>   */
> -static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size)
> +static int gen8_ppgtt_init_common(struct i915_hw_ppgtt *ppgtt, uint64_t size)
>  {
> -	struct i915_pagedir *pd;
> -	uint64_t temp, start = 0;
> -	const uint64_t orig_length = size;
> -	uint32_t pdpe;
> -	int ret;
> +	ppgtt->scratch_pd = alloc_pt_scratch(ppgtt->base.dev);
> +	if (IS_ERR(ppgtt->scratch_pd))
> +		return PTR_ERR(ppgtt->scratch_pd);
>  
>  	ppgtt->base.start = 0;
>  	ppgtt->base.total = size;
> -	ppgtt->base.clear_range = gen8_ppgtt_clear_range;
> -	ppgtt->base.insert_entries = gen8_ppgtt_insert_entries;
>  	ppgtt->base.cleanup = gen8_ppgtt_cleanup;
> +	ppgtt->base.insert_entries = gen8_ppgtt_insert_entries;
> +
>  	ppgtt->switch_mm = gen8_mm_switch;
>  
> -	ppgtt->scratch_pd = alloc_pt_scratch(ppgtt->base.dev);
> -	if (IS_ERR(ppgtt->scratch_pd))
> -		return PTR_ERR(ppgtt->scratch_pd);
> +	return 0;
> +}
> +
> +static int gen8_aliasing_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
> +{
> +	struct drm_device *dev = ppgtt->base.dev;
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +	struct i915_pagedir *pd;
> +	uint64_t temp, start = 0, size = dev_priv->gtt.base.total;
> +	uint32_t pdpe;
> +	int ret;
>  
> +	ret = gen8_ppgtt_init_common(ppgtt, dev_priv->gtt.base.total);
> +	if (ret)
> +		return ret;
> +
> +	/* Aliasing PPGTT has to always work and be mapped because of the way we
> +	 * use RESTORE_INHIBIT in the context switch. This will be fixed
> +	 * eventually. */
>  	ret = gen8_alloc_va_range(&ppgtt->base, start, size);
>  	if (ret) {
>  		free_pt_scratch(ppgtt->scratch_pd, ppgtt->base.dev);
>  		return ret;
>  	}
>  
> -	start = 0;
> -	size = orig_length;
> -
>  	gen8_for_each_pdpe(pd, &ppgtt->pdp, start, size, temp, pdpe)
>  		gen8_map_pagetable_range(pd, start, size, ppgtt->base.dev);
>  
> +	ppgtt->base.allocate_va_range = NULL;
> +	ppgtt->base.teardown_va_range = NULL;
> +	ppgtt->base.clear_range = gen8_ppgtt_clear_range;
> +
> +	return 0;
> +}
> +
> +static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
> +{
> +	struct drm_device *dev = ppgtt->base.dev;
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +	int ret;
> +
> +	ret = gen8_ppgtt_init_common(ppgtt, dev_priv->gtt.base.total);
> +	if (ret)
> +		return ret;
> +
> +	ppgtt->base.allocate_va_range = gen8_alloc_va_range;
> +	ppgtt->base.teardown_va_range = gen8_teardown_va_range;
> +	ppgtt->base.clear_range = NULL;
> +
>  	return 0;
>  }
>  
> @@ -1413,9 +1652,9 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt, bool aliasing)
>  	if (ret)
>  		return ret;
>  
> -	ppgtt->base.allocate_va_range = gen6_alloc_va_range;
> -	ppgtt->base.teardown_va_range = gen6_teardown_va_range;
> -	ppgtt->base.clear_range = gen6_ppgtt_clear_range;
> +	ppgtt->base.allocate_va_range = aliasing ? NULL : gen6_alloc_va_range;
> +	ppgtt->base.teardown_va_range = aliasing ? NULL : gen6_teardown_va_range;
> +	ppgtt->base.clear_range = aliasing ? gen6_ppgtt_clear_range : NULL;
>  	ppgtt->base.insert_entries = gen6_ppgtt_insert_entries;
>  	ppgtt->base.cleanup = gen6_ppgtt_cleanup;
>  	ppgtt->base.start = 0;
> @@ -1453,8 +1692,10 @@ static int __hw_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt,
>  
>  	if (INTEL_INFO(dev)->gen < 8)
>  		return gen6_ppgtt_init(ppgtt, aliasing);
> +	else if (aliasing)
> +		return gen8_aliasing_ppgtt_init(ppgtt);
>  	else
> -		return gen8_ppgtt_init(ppgtt, dev_priv->gtt.base.total);
> +		return gen8_ppgtt_init(ppgtt);
>  }
>  int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt)
>  {
> @@ -1466,8 +1707,9 @@ int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt)
>  		kref_init(&ppgtt->ref);
>  		drm_mm_init(&ppgtt->base.mm, ppgtt->base.start,
>  			    ppgtt->base.total);
> -		ppgtt->base.clear_range(&ppgtt->base, 0,
> -			    ppgtt->base.total, true);
> +		if (ppgtt->base.clear_range)
> +			ppgtt->base.clear_range(&ppgtt->base, 0,
> +				ppgtt->base.total, true);
>  		i915_init_vm(dev_priv, &ppgtt->base);
>  	}
>  
> @@ -1565,10 +1807,7 @@ ppgtt_bind_vma(struct i915_vma *vma,
>  
>  static void ppgtt_unbind_vma(struct i915_vma *vma)
>  {
> -	vma->vm->clear_range(vma->vm,
> -			     vma->node.start,
> -			     vma->obj->base.size,
> -			     true);
> +	WARN_ON(vma->vm->teardown_va_range && vma->vm->clear_range);
>  	if (vma->vm->teardown_va_range) {
>  		trace_i915_va_teardown(vma->vm,
>  				       vma->node.start, vma->node.size,
> @@ -1576,7 +1815,13 @@ static void ppgtt_unbind_vma(struct i915_vma *vma)
>  
>  		vma->vm->teardown_va_range(vma->vm,
>  					   vma->node.start, vma->node.size);
> -	}
> +	} else if (vma->vm->clear_range) {
> +		vma->vm->clear_range(vma->vm,
> +				     vma->node.start,
> +				     vma->obj->base.size,
> +				     true);
> +	} else
> +		BUG();

No gratitious additions of BUG please.

>  }
>  
>  extern int intel_iommu_gfx_mapped;
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> index 957f2d0..534ed82 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> @@ -190,13 +190,26 @@ struct i915_vma {
>  			u32 flags);
>  };
>  
> -
> +/* Zombies. We write page tables with the CPU, and hardware switches them with
> + * the GPU. As such, the only time we can safely remove a page table is when we
> + * know the context is idle. Since we have no good way to do this, we use the
> + * zombie.
> + *
> + * Under memory pressure, if the system is idle, zombies may be reaped.
> + *
> + * There are 3 states a page table can be in (not including scratch)
> + *  bitmap = 0, zombie = 0: unallocated
> + *  bitmap = 1, zombie = 0: allocated
> + *  bitmap = 0, zombie = 1: zombie
> + *  bitmap = 1, zombie = 1: invalid
> + */
>  struct i915_pagetab {
>  	struct page *page;
>  	dma_addr_t daddr;
>  
>  	unsigned long *used_ptes;
>  	unsigned int scratch:1;
> +	unsigned zombie:1;
>  };
>  
>  struct i915_pagedir {
> @@ -208,6 +221,7 @@ struct i915_pagedir {
>  
>  	unsigned long *used_pdes;
>  	struct i915_pagetab *page_tables[GEN6_PPGTT_PD_ENTRIES];
> +	unsigned zombie:1;
>  };
>  
>  struct i915_pagedirpo {
> -- 
> 2.1.1
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch


More information about the Intel-gfx mailing list