[Intel-gfx] [PATCH 01/43] drm/i915: Track unbound pages

Daniel Vetter daniel at ffwll.ch
Fri Jun 1 11:14:25 CEST 2012


On Fri, May 25, 2012 at 01:32:40PM +0100, Chris Wilson wrote:
> When dealing with a working set larger than the GATT, or even the
> mappable aperture when touching through the GTT, we end up with evicting
> objects only to rebind them at a new offset again later. Moving an
> object into and out of the GTT requires clflushing the pages, thus
> causing a double-clflush penalty for rebinding.
> 
> To avoid having to clflush on rebinding, we can track the pages as they
> are evicted from the GTT and only relinquish those pages on memory
> pressure.
> 
> As usual, if it were not for the handling of out-of-memory condition and
> having to manually shrink our own bo caches, it would be a net reduction
> of code. Alas.
> 
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

Ok, I'd like to start slurping in this stuff for 3.6, at least the
foundations (i.e. unbound tracking + stolen mem backed objects). The first
patches I'd like to take is just the unbound tracking here, if possible
before I'll push out a new -next for QA to beat on Monday or thereabouts.
So essentially just this patch ;-)

I've looked through it and I think the only implication where it breaks
stuff is the mmap offset reaping. Currently we reap that for purged
objects at unbind time, and thanks to only 2GB of aperture this allows us
to pass the relevant i-g-t tests. So I guess I need that patch, too.

I've also looked at implications due to the new dma-buf stuff, which grabs
the backing storage pages, too. But all the thinks I've checked for
(put_pages while dma_buf still needs them and implications for the our
shrinker) are broken already with the current code, so I think we can
safely ignore these.

For the patch itself, one comment below about unbind/put_pages vs. domain
tracking.

I've also backmerged the latest dma-buf stuff from Dave into dinq, so can
you please rebase this patch + the improve mmap offset reaper and then
resubmit? And maybe check your massive patch-queue for anything else that
might be required to make this minimal merge go.

For the next 2-week cycle I think we should be able to slurp in the stolen
mem stuff, leaving decent amounts of time for the real fastboot stuff for
3.6.

Cheers, Daniel

> ---
>  drivers/gpu/drm/i915/i915_debugfs.c        |   11 +-
>  drivers/gpu/drm/i915/i915_drv.h            |   10 +-
>  drivers/gpu/drm/i915/i915_gem.c            |  451 ++++++++++++++++------------
>  drivers/gpu/drm/i915/i915_gem_evict.c      |   13 +-
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c |    9 +-
>  drivers/gpu/drm/i915/i915_gem_gtt.c        |    2 +-
>  drivers/gpu/drm/i915/i915_irq.c            |    4 +-
>  drivers/gpu/drm/i915/i915_trace.h          |   10 +-
>  8 files changed, 283 insertions(+), 227 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index eb2b3c2..3e00b27 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -232,7 +232,7 @@ static int i915_gem_object_info(struct seq_file *m, void* data)
>  		   dev_priv->mm.object_memory);
>  
>  	size = count = mappable_size = mappable_count = 0;
> -	count_objects(&dev_priv->mm.gtt_list, gtt_list);
> +	count_objects(&dev_priv->mm.bound_list, gtt_list);
>  	seq_printf(m, "%u [%u] objects, %zu [%zu] bytes in gtt\n",
>  		   count, mappable_count, size, mappable_size);
>  
> @@ -247,8 +247,13 @@ static int i915_gem_object_info(struct seq_file *m, void* data)
>  	seq_printf(m, "  %u [%u] inactive objects, %zu [%zu] bytes\n",
>  		   count, mappable_count, size, mappable_size);
>  
> +	size = count = 0;
> +	list_for_each_entry(obj, &dev_priv->mm.unbound_list, gtt_list)
> +		size += obj->base.size, ++count;
> +	seq_printf(m, "%u unbound objects, %zu bytes\n", count, size);
> +
>  	size = count = mappable_size = mappable_count = 0;
> -	list_for_each_entry(obj, &dev_priv->mm.gtt_list, gtt_list) {
> +	list_for_each_entry(obj, &dev_priv->mm.bound_list, gtt_list) {
>  		if (obj->fault_mappable) {
>  			size += obj->gtt_space->size;
>  			++count;
> @@ -286,7 +291,7 @@ static int i915_gem_gtt_info(struct seq_file *m, void* data)
>  		return ret;
>  
>  	total_obj_size = total_gtt_size = count = 0;
> -	list_for_each_entry(obj, &dev_priv->mm.gtt_list, gtt_list) {
> +	list_for_each_entry(obj, &dev_priv->mm.bound_list, gtt_list) {
>  		if (list == PINNED_LIST && obj->pin_count == 0)
>  			continue;
>  
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index d5155db..fabf011 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -643,7 +643,13 @@ typedef struct drm_i915_private {
>  		struct drm_mm gtt_space;
>  		/** List of all objects in gtt_space. Used to restore gtt
>  		 * mappings on resume */
> -		struct list_head gtt_list;
> +		struct list_head bound_list;
> +		/**
> +		 * List of objects which are not bound to the GTT (thus
> +		 * are idle and not used by the GPU) but still have
> +		 * (presumably uncached) pages still attached.
> +		 */
> +		struct list_head unbound_list;
>  
>  		/** Usable portion of the GTT for GEM */
>  		unsigned long gtt_start;
> @@ -1362,7 +1368,7 @@ void i915_gem_init_global_gtt(struct drm_device *dev,
>  /* i915_gem_evict.c */
>  int __must_check i915_gem_evict_something(struct drm_device *dev, int min_size,
>  					  unsigned alignment, bool mappable);
> -int i915_gem_evict_everything(struct drm_device *dev, bool purgeable_only);
> +int i915_gem_evict_everything(struct drm_device *dev);
>  
>  /* i915_gem_stolen.c */
>  int i915_gem_init_stolen(struct drm_device *dev);
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 99f816c..33279b5 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -55,6 +55,8 @@ static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
>  
>  static int i915_gem_inactive_shrink(struct shrinker *shrinker,
>  				    struct shrink_control *sc);
> +static long i915_gem_purge(struct drm_i915_private *dev_priv, long target);
> +static void i915_gem_shrink_all(struct drm_i915_private *dev_priv);
>  static void i915_gem_object_truncate(struct drm_i915_gem_object *obj);
>  
>  static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj)
> @@ -131,7 +133,7 @@ int i915_mutex_lock_interruptible(struct drm_device *dev)
>  static inline bool
>  i915_gem_object_is_inactive(struct drm_i915_gem_object *obj)
>  {
> -	return !obj->active;
> +	return obj->gtt_space && !obj->active;
>  }
>  
>  int
> @@ -170,7 +172,7 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
>  
>  	pinned = 0;
>  	mutex_lock(&dev->struct_mutex);
> -	list_for_each_entry(obj, &dev_priv->mm.gtt_list, gtt_list)
> +	list_for_each_entry(obj, &dev_priv->mm.bound_list, gtt_list)
>  		if (obj->pin_count)
>  			pinned += obj->gtt_space->size;
>  	mutex_unlock(&dev->struct_mutex);
> @@ -414,9 +416,11 @@ i915_gem_shmem_pread(struct drm_device *dev,
>  		 * anyway again before the next pread happens. */
>  		if (obj->cache_level == I915_CACHE_NONE)
>  			needs_clflush = 1;
> -		ret = i915_gem_object_set_to_gtt_domain(obj, false);
> -		if (ret)
> -			return ret;
> +		if (obj->gtt_space) {
> +			ret = i915_gem_object_set_to_gtt_domain(obj, false);
> +			if (ret)
> +				return ret;
> +		}
>  	}
>  
>  	offset = args->offset;
> @@ -734,9 +738,11 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
>  		 * right away and we therefore have to clflush anyway. */
>  		if (obj->cache_level == I915_CACHE_NONE)
>  			needs_clflush_after = 1;
> -		ret = i915_gem_object_set_to_gtt_domain(obj, true);
> -		if (ret)
> -			return ret;
> +		if (obj->gtt_space) {
> +			ret = i915_gem_object_set_to_gtt_domain(obj, true);
> +			if (ret)
> +				return ret;
> +		}
>  	}
>  	/* Same trick applies for invalidate partially written cachelines before
>  	 * writing.  */
> @@ -1302,59 +1308,56 @@ i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
>  	return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset);
>  }
>  
> -
> -static int
> -i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj,
> -			      gfp_t gfpmask)
> +/* Immediately discard the backing storage */
> +static void
> +i915_gem_object_truncate(struct drm_i915_gem_object *obj)
>  {
> -	int page_count, i;
> -	struct address_space *mapping;
>  	struct inode *inode;
> -	struct page *page;
>  
> -	/* Get the list of pages out of our struct file.  They'll be pinned
> -	 * at this point until we release them.
> +	/* Our goal here is to return as much of the memory as
> +	 * is possible back to the system as we are called from OOM.
> +	 * To do this we must instruct the shmfs to drop all of its
> +	 * backing pages, *now*.
>  	 */
> -	page_count = obj->base.size / PAGE_SIZE;
> -	BUG_ON(obj->pages != NULL);
> -	obj->pages = drm_malloc_ab(page_count, sizeof(struct page *));
> -	if (obj->pages == NULL)
> -		return -ENOMEM;
> -
>  	inode = obj->base.filp->f_path.dentry->d_inode;
> -	mapping = inode->i_mapping;
> -	gfpmask |= mapping_gfp_mask(mapping);
> -
> -	for (i = 0; i < page_count; i++) {
> -		page = shmem_read_mapping_page_gfp(mapping, i, gfpmask);
> -		if (IS_ERR(page))
> -			goto err_pages;
> -
> -		obj->pages[i] = page;
> -	}
> -
> -	if (i915_gem_object_needs_bit17_swizzle(obj))
> -		i915_gem_object_do_bit_17_swizzle(obj);
> +	shmem_truncate_range(inode, 0, (loff_t)-1);
>  
> -	return 0;
> +	if (obj->base.map_list.map)
> +		drm_gem_free_mmap_offset(&obj->base);
>  
> -err_pages:
> -	while (i--)
> -		page_cache_release(obj->pages[i]);
> +	obj->madv = __I915_MADV_PURGED;
> +}
>  
> -	drm_free_large(obj->pages);
> -	obj->pages = NULL;
> -	return PTR_ERR(page);
> +static inline int
> +i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj)
> +{
> +	return obj->madv == I915_MADV_DONTNEED;
>  }
>  
> -static void
> +static int
>  i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
>  {
>  	int page_count = obj->base.size / PAGE_SIZE;
> -	int i;
> +	int ret, i;
> +
> +	if (obj->pages == NULL)
> +		return 0;
>  
> +	BUG_ON(obj->gtt_space);
>  	BUG_ON(obj->madv == __I915_MADV_PURGED);
>  
> +	ret = i915_gem_object_set_to_cpu_domain(obj, 0);
> +	if (ret && ret != -EIO)
> +		return ret;

I think we need to unconditionally set the pages to the cpu write domain -
as soon as we drop the pages the vm is free to swap them out and swap them
back in again at a different place.

> +
> +	if (ret) {
> +		/* In the event of a disaster, abandon all caches and
> +		 * hope for the best.
> +		 */
> +		i915_gem_clflush_object(obj);
> +		obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
> +	}
> +
>  	if (i915_gem_object_needs_bit17_swizzle(obj))
>  		i915_gem_object_save_bit_17_swizzle(obj);
>  
> @@ -1374,6 +1377,181 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
>  
>  	drm_free_large(obj->pages);
>  	obj->pages = NULL;
> +
> +	list_del(&obj->gtt_list);
> +
> +	if (i915_gem_object_is_purgeable(obj))
> +		i915_gem_object_truncate(obj);
> +
> +	return 0;
> +}
> +
> +static long
> +i915_gem_purge(struct drm_i915_private *dev_priv, long target)
> +{
> +	struct drm_i915_gem_object *obj, *next;
> +	long count = 0;
> +
> +	list_for_each_entry_safe(obj, next,
> +				 &dev_priv->mm.unbound_list,
> +				 gtt_list) {
> +		if (i915_gem_object_is_purgeable(obj) &&
> +		    i915_gem_object_put_pages_gtt(obj) == 0) {
> +			count += obj->base.size >> PAGE_SHIFT;
> +			if (count >= target)
> +				return count;
> +		}
> +	}
> +
> +	list_for_each_entry_safe(obj, next,
> +				 &dev_priv->mm.inactive_list,
> +				 mm_list) {
> +		if (i915_gem_object_is_purgeable(obj) &&
> +		    i915_gem_object_unbind(obj) == 0 &&
> +		    i915_gem_object_put_pages_gtt(obj) == 0) {
> +			count += obj->base.size >> PAGE_SHIFT;
> +			if (count >= target)
> +				return count;
> +		}
> +	}
> +
> +	return count;
> +}
> +
> +inline static struct drm_i915_gem_object *
> +first_unbound_bo(struct drm_i915_private *dev_priv)
> +{
> +	return list_first_entry(&dev_priv->mm.unbound_list,
> +				struct drm_i915_gem_object,
> +				gtt_list);
> +}
> +
> +static void
> +i915_gem_shrink_all(struct drm_i915_private *dev_priv)
> +{
> +	i915_gem_evict_everything(dev_priv->dev);
> +	while (!list_empty(&dev_priv->mm.unbound_list))
> +		i915_gem_object_put_pages_gtt(first_unbound_bo(dev_priv));
> +}
> +
> +/* Try to allocate some memory under the struct_mutex by purging some
> + * of our own buffers if necessary.
> + */
> +static void *i915_malloc(struct drm_i915_private *dev_priv,
> +			 unsigned long size)
> +{
> +	gfp_t gfp;
> +	void *ptr;
> +
> +	gfp = GFP_KERNEL;
> +	gfp |= __GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD;
> +	gfp &= ~(__GFP_IO | __GFP_WAIT);
> +
> +	ptr = kmalloc(size, gfp);
> +	if (ptr)
> +		return ptr;
> +
> +	if (size <= 2*PAGE_SIZE) {
> +		i915_gem_purge(dev_priv, (size >> PAGE_SHIFT) + 1);
> +		ptr = kmalloc(size, gfp);
> +		if (ptr)
> +			return ptr;
> +
> +		i915_gem_shrink_all(dev_priv);
> +
> +		gfp &= ~(__GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD);
> +		gfp |= __GFP_IO | __GFP_WAIT;
> +		return kmalloc(size, gfp);
> +	} else {
> +		gfp |= __GFP_HIGHMEM;
> +		ptr =  __vmalloc(size, gfp, PAGE_KERNEL);
> +		if (ptr)
> +			return ptr;
> +
> +		i915_gem_purge(dev_priv, (size >> PAGE_SHIFT) + 1);
> +		ptr =  __vmalloc(size, gfp, PAGE_KERNEL);
> +		if (ptr)
> +			return ptr;
> +
> +		i915_gem_shrink_all(dev_priv);
> +
> +		gfp &= ~(__GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD);
> +		gfp |= __GFP_IO | __GFP_WAIT;
> +		return  __vmalloc(size, gfp, PAGE_KERNEL);
> +	}
> +}
> +
> +static int
> +i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
> +{
> +	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
> +	int page_count, i;
> +	struct address_space *mapping;
> +	struct page *page;
> +	gfp_t gfp;
> +
> +	if (obj->pages)
> +		return 0;
> +
> +	/* Assert that the object is not currently in any GPU domain. As it
> +	 * wasn't in the GTT, there shouldn't be any way it could have been in
> +	 * a GPU cache
> +	 */
> +	BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS);
> +	BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
> +
> +	/* Get the list of pages out of our struct file.  They'll be pinned
> +	 * at this point until we release them.
> +	 */
> +	page_count = obj->base.size / PAGE_SIZE;
> +	obj->pages = i915_malloc(dev_priv, page_count*sizeof(struct page *));
> +	if (obj->pages == NULL)
> +		return -ENOMEM;
> +
> +	/* Fail silently without starting the shrinker */
> +	mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
> +	gfp = mapping_gfp_mask(mapping);
> +	gfp |= __GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD;
> +	gfp &= ~(__GFP_IO | __GFP_WAIT);
> +	for (i = 0; i < page_count; i++) {
> +		page = shmem_read_mapping_page_gfp(mapping, i, gfp);
> +		if (IS_ERR(page)) {
> +			i915_gem_purge(dev_priv, page_count);
> +			page = shmem_read_mapping_page_gfp(mapping, i, gfp);
> +		}
> +		if (IS_ERR(page)) {
> +			/* We've tried hard to allocate the memory by reaping
> +			 * our own buffer, now let the real VM do its job and
> +			 * go down in flames if truly OOM.
> +			 */
> +			gfp &= ~(__GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD);
> +			gfp |= __GFP_IO | __GFP_WAIT;
> +
> +			i915_gem_shrink_all(dev_priv);
> +			page = shmem_read_mapping_page_gfp(mapping, i, gfp);
> +			if (IS_ERR(page))
> +				goto err_pages;
> +
> +			gfp |= __GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD;
> +			gfp &= ~(__GFP_IO | __GFP_WAIT);
> +		}
> +
> +		obj->pages[i] = page;
> +	}
> +
> +	if (i915_gem_object_needs_bit17_swizzle(obj))
> +		i915_gem_object_do_bit_17_swizzle(obj);
> +
> +	list_add_tail(&obj->gtt_list, &dev_priv->mm.unbound_list);
> +	return 0;
> +
> +err_pages:
> +	while (i--)
> +		page_cache_release(obj->pages[i]);
> +
> +	drm_free_large(obj->pages);
> +	obj->pages = NULL;
> +	return PTR_ERR(page);
>  }
>  
>  void
> @@ -1458,32 +1636,6 @@ i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
>  	WARN_ON(i915_verify_lists(dev));
>  }
>  
> -/* Immediately discard the backing storage */
> -static void
> -i915_gem_object_truncate(struct drm_i915_gem_object *obj)
> -{
> -	struct inode *inode;
> -
> -	/* Our goal here is to return as much of the memory as
> -	 * is possible back to the system as we are called from OOM.
> -	 * To do this we must instruct the shmfs to drop all of its
> -	 * backing pages, *now*.
> -	 */
> -	inode = obj->base.filp->f_path.dentry->d_inode;
> -	shmem_truncate_range(inode, 0, (loff_t)-1);
> -
> -	if (obj->base.map_list.map)
> -		drm_gem_free_mmap_offset(&obj->base);
> -
> -	obj->madv = __I915_MADV_PURGED;
> -}
> -
> -static inline int
> -i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj)
> -{
> -	return obj->madv == I915_MADV_DONTNEED;
> -}
> -
>  static void
>  i915_gem_process_flushing_list(struct intel_ring_buffer *ring,
>  			       uint32_t flush_domains)
> @@ -1690,6 +1842,9 @@ void i915_gem_reset(struct drm_device *dev)
>  		obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS;
>  	}
>  
> +	while (!list_empty(&dev_priv->mm.unbound_list))
> +		i915_gem_object_put_pages_gtt(first_unbound_bo(dev_priv));
> +
>  	/* The fence registers are invalidated so clear them out */
>  	i915_gem_reset_fences(dev);
>  }
> @@ -2053,22 +2208,6 @@ i915_gem_object_unbind(struct drm_i915_gem_object *obj)
>  
>  	i915_gem_object_finish_gtt(obj);
>  
> -	/* Move the object to the CPU domain to ensure that
> -	 * any possible CPU writes while it's not in the GTT
> -	 * are flushed when we go to remap it.
> -	 */
> -	if (ret == 0)
> -		ret = i915_gem_object_set_to_cpu_domain(obj, 1);
> -	if (ret == -ERESTARTSYS)
> -		return ret;
> -	if (ret) {
> -		/* In the event of a disaster, abandon all caches and
> -		 * hope for the best.
> -		 */
> -		i915_gem_clflush_object(obj);
> -		obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
> -	}
> -
>  	/* release the fence reg _after_ flushing */
>  	ret = i915_gem_object_put_fence(obj);
>  	if (ret)
> @@ -2084,10 +2223,8 @@ i915_gem_object_unbind(struct drm_i915_gem_object *obj)
>  	}
>  	i915_gem_gtt_finish_object(obj);
>  
> -	i915_gem_object_put_pages_gtt(obj);
> -
> -	list_del_init(&obj->gtt_list);
> -	list_del_init(&obj->mm_list);
> +	list_del(&obj->mm_list);
> +	list_move_tail(&obj->gtt_list, &dev_priv->mm.unbound_list);
>  	/* Avoid an unnecessary call to unbind on rebind. */
>  	obj->map_and_fenceable = true;
>  
> @@ -2095,10 +2232,10 @@ i915_gem_object_unbind(struct drm_i915_gem_object *obj)
>  	obj->gtt_space = NULL;
>  	obj->gtt_offset = 0;
>  
> -	if (i915_gem_object_is_purgeable(obj))
> -		i915_gem_object_truncate(obj);
> +	if (obj->base.read_domains & I915_GEM_DOMAIN_CPU)
> +		i915_gem_object_put_pages_gtt(obj);

With the change in put_pages to set the gem object to the cpu write domain
when we stop controlled the pages, this incurs an unnecessary clflush on
rebind. Also, I think it makes conceptually more sense if the new unbind
really does just that, and doesn't drop the backing storage pages under
any circumstances.

>  
> -	return ret;
> +	return 0;
>  }
>  
>  int
> @@ -2484,7 +2621,6 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
>  	struct drm_device *dev = obj->base.dev;
>  	drm_i915_private_t *dev_priv = dev->dev_private;
>  	struct drm_mm_node *free_space;
> -	gfp_t gfpmask = __GFP_NORETRY | __GFP_NOWARN;
>  	u32 size, fence_size, fence_alignment, unfenced_alignment;
>  	bool mappable, fenceable;
>  	int ret;
> @@ -2524,6 +2660,10 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
>  		return -E2BIG;
>  	}
>  
> +	ret = i915_gem_object_get_pages_gtt(obj);
> +	if (ret)
> +		return ret;
> +
>   search_free:
>  	if (map_and_fenceable)
>  		free_space =
> @@ -2547,9 +2687,6 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
>  				drm_mm_get_block(free_space, size, alignment);
>  	}
>  	if (obj->gtt_space == NULL) {
> -		/* If the gtt is empty and we're still having trouble
> -		 * fitting our object in, we're out of memory.
> -		 */
>  		ret = i915_gem_evict_something(dev, size, alignment,
>  					       map_and_fenceable);
>  		if (ret)
> @@ -2558,55 +2695,20 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
>  		goto search_free;
>  	}
>  
> -	ret = i915_gem_object_get_pages_gtt(obj, gfpmask);
> -	if (ret) {
> -		drm_mm_put_block(obj->gtt_space);
> -		obj->gtt_space = NULL;
> -
> -		if (ret == -ENOMEM) {
> -			/* first try to reclaim some memory by clearing the GTT */
> -			ret = i915_gem_evict_everything(dev, false);
> -			if (ret) {
> -				/* now try to shrink everyone else */
> -				if (gfpmask) {
> -					gfpmask = 0;
> -					goto search_free;
> -				}
> -
> -				return -ENOMEM;
> -			}
> -
> -			goto search_free;
> -		}
> -
> -		return ret;
> -	}
>  
>  	ret = i915_gem_gtt_prepare_object(obj);
>  	if (ret) {
> -		i915_gem_object_put_pages_gtt(obj);
>  		drm_mm_put_block(obj->gtt_space);
>  		obj->gtt_space = NULL;
> -
> -		if (i915_gem_evict_everything(dev, false))
> -			return ret;
> -
> -		goto search_free;
> +		return ret;
>  	}
>  
>  	if (!dev_priv->mm.aliasing_ppgtt)
>  		i915_gem_gtt_bind_object(obj, obj->cache_level);
>  
> -	list_add_tail(&obj->gtt_list, &dev_priv->mm.gtt_list);
> +	list_move_tail(&obj->gtt_list, &dev_priv->mm.bound_list);
>  	list_add_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
>  
> -	/* Assert that the object is not currently in any GPU domain. As it
> -	 * wasn't in the GTT, there shouldn't be any way it could have been in
> -	 * a GPU cache
> -	 */
> -	BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS);
> -	BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
> -
>  	obj->gtt_offset = obj->gtt_space->start;
>  
>  	fenceable =
> @@ -3248,9 +3350,8 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
>  	if (obj->madv != __I915_MADV_PURGED)
>  		obj->madv = args->madv;
>  
> -	/* if the object is no longer bound, discard its backing storage */
> -	if (i915_gem_object_is_purgeable(obj) &&
> -	    obj->gtt_space == NULL)
> +	/* if the object is no longer attached, discard its backing storage */
> +	if (i915_gem_object_is_purgeable(obj) && obj->pages == NULL)
>  		i915_gem_object_truncate(obj);
>  
>  	args->retained = obj->madv != __I915_MADV_PURGED;
> @@ -3347,6 +3448,7 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
>  		dev_priv->mm.interruptible = was_interruptible;
>  	}
>  
> +	i915_gem_object_put_pages_gtt(obj);
>  	if (obj->base.map_list.map)
>  		drm_gem_free_mmap_offset(&obj->base);
>  
> @@ -3379,7 +3481,7 @@ i915_gem_idle(struct drm_device *dev)
>  
>  	/* Under UMS, be paranoid and evict. */
>  	if (!drm_core_check_feature(dev, DRIVER_MODESET))
> -		i915_gem_evict_everything(dev, false);
> +		i915_gem_evict_everything(dev);
>  
>  	i915_gem_reset_fences(dev);
>  
> @@ -3682,8 +3784,9 @@ i915_gem_load(struct drm_device *dev)
>  	INIT_LIST_HEAD(&dev_priv->mm.active_list);
>  	INIT_LIST_HEAD(&dev_priv->mm.flushing_list);
>  	INIT_LIST_HEAD(&dev_priv->mm.inactive_list);
> +	INIT_LIST_HEAD(&dev_priv->mm.unbound_list);
> +	INIT_LIST_HEAD(&dev_priv->mm.bound_list);
>  	INIT_LIST_HEAD(&dev_priv->mm.fence_list);
> -	INIT_LIST_HEAD(&dev_priv->mm.gtt_list);
>  	for (i = 0; i < I915_NUM_RINGS; i++)
>  		init_ring_lists(&dev_priv->ring[i]);
>  	for (i = 0; i < I915_MAX_NUM_FENCES; i++)
> @@ -3928,18 +4031,6 @@ void i915_gem_release(struct drm_device *dev, struct drm_file *file)
>  }
>  
>  static int
> -i915_gpu_is_active(struct drm_device *dev)
> -{
> -	drm_i915_private_t *dev_priv = dev->dev_private;
> -	int lists_empty;
> -
> -	lists_empty = list_empty(&dev_priv->mm.flushing_list) &&
> -		      list_empty(&dev_priv->mm.active_list);
> -
> -	return !lists_empty;
> -}
> -
> -static int
>  i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc)
>  {
>  	struct drm_i915_private *dev_priv =
> @@ -3947,60 +4038,26 @@ i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc)
>  			     struct drm_i915_private,
>  			     mm.inactive_shrinker);
>  	struct drm_device *dev = dev_priv->dev;
> -	struct drm_i915_gem_object *obj, *next;
> +	struct drm_i915_gem_object *obj;
>  	int nr_to_scan = sc->nr_to_scan;
>  	int cnt;
>  
>  	if (!mutex_trylock(&dev->struct_mutex))
>  		return 0;
>  
> -	/* "fast-path" to count number of available objects */
> -	if (nr_to_scan == 0) {
> -		cnt = 0;
> -		list_for_each_entry(obj,
> -				    &dev_priv->mm.inactive_list,
> -				    mm_list)
> -			cnt++;
> -		mutex_unlock(&dev->struct_mutex);
> -		return cnt / 100 * sysctl_vfs_cache_pressure;
> +	if (nr_to_scan) {
> +		nr_to_scan -= i915_gem_purge(dev_priv, nr_to_scan);
> +		if (nr_to_scan > 0)
> +			i915_gem_shrink_all(dev_priv);
>  	}
>  
> -rescan:
> -	/* first scan for clean buffers */
> -	i915_gem_retire_requests(dev);
> -
> -	list_for_each_entry_safe(obj, next,
> -				 &dev_priv->mm.inactive_list,
> -				 mm_list) {
> -		if (i915_gem_object_is_purgeable(obj)) {
> -			if (i915_gem_object_unbind(obj) == 0 &&
> -			    --nr_to_scan == 0)
> -				break;
> -		}
> -	}
> -
> -	/* second pass, evict/count anything still on the inactive list */
>  	cnt = 0;
> -	list_for_each_entry_safe(obj, next,
> -				 &dev_priv->mm.inactive_list,
> -				 mm_list) {
> -		if (nr_to_scan &&
> -		    i915_gem_object_unbind(obj) == 0)
> -			nr_to_scan--;
> -		else
> -			cnt++;
> -	}
> +	list_for_each_entry(obj, &dev_priv->mm.unbound_list, gtt_list)
> +		cnt += obj->base.size >> PAGE_SHIFT;
> +	list_for_each_entry(obj, &dev_priv->mm.bound_list, gtt_list)
> +		if (obj->pin_count == 0)
> +			cnt += obj->base.size >> PAGE_SHIFT;
>  
> -	if (nr_to_scan && i915_gpu_is_active(dev)) {
> -		/*
> -		 * We are desperate for pages, so as a last resort, wait
> -		 * for the GPU to finish and discard whatever we can.
> -		 * This has a dramatic impact to reduce the number of
> -		 * OOM-killer events whilst running the GPU aggressively.
> -		 */
> -		if (i915_gpu_idle(dev) == 0)
> -			goto rescan;
> -	}
>  	mutex_unlock(&dev->struct_mutex);
> -	return cnt / 100 * sysctl_vfs_cache_pressure;
> +	return cnt;
>  }
> diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
> index ae7c24e..d951920 100644
> --- a/drivers/gpu/drm/i915/i915_gem_evict.c
> +++ b/drivers/gpu/drm/i915/i915_gem_evict.c
> @@ -163,7 +163,7 @@ found:
>  }
>  
>  int
> -i915_gem_evict_everything(struct drm_device *dev, bool purgeable_only)
> +i915_gem_evict_everything(struct drm_device *dev)
>  {
>  	drm_i915_private_t *dev_priv = dev->dev_private;
>  	struct drm_i915_gem_object *obj, *next;
> @@ -176,7 +176,7 @@ i915_gem_evict_everything(struct drm_device *dev, bool purgeable_only)
>  	if (lists_empty)
>  		return -ENOSPC;
>  
> -	trace_i915_gem_evict_everything(dev, purgeable_only);
> +	trace_i915_gem_evict_everything(dev);
>  
>  	/* The gpu_idle will flush everything in the write domain to the
>  	 * active list. Then we must move everything off the active list
> @@ -192,12 +192,9 @@ i915_gem_evict_everything(struct drm_device *dev, bool purgeable_only)
>  
>  	/* Having flushed everything, unbind() should never raise an error */
>  	list_for_each_entry_safe(obj, next,
> -				 &dev_priv->mm.inactive_list, mm_list) {
> -		if (!purgeable_only || obj->madv != I915_MADV_WILLNEED) {
> -			if (obj->pin_count == 0)
> -				WARN_ON(i915_gem_object_unbind(obj));
> -		}
> -	}
> +				 &dev_priv->mm.inactive_list, mm_list)
> +		if (obj->pin_count == 0)
> +			WARN_ON(i915_gem_object_unbind(obj));
>  
>  	return 0;
>  }
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 1b61762..018870c 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -675,17 +675,12 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
>  			}
>  		}
>  
> -		if (ret != -ENOSPC || retry > 1)
> +		if (ret != -ENOSPC || retry++)
>  			return ret;
>  
> -		/* First attempt, just clear anything that is purgeable.
> -		 * Second attempt, clear the entire GTT.
> -		 */
> -		ret = i915_gem_evict_everything(ring->dev, retry == 0);
> +		ret = i915_gem_evict_everything(ring->dev);
>  		if (ret)
>  			return ret;
> -
> -		retry++;
>  	} while (1);
>  
>  err:
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 29d573c..6f08425 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -342,7 +342,7 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev)
>  	intel_gtt_clear_range(dev_priv->mm.gtt_start / PAGE_SIZE,
>  			      (dev_priv->mm.gtt_end - dev_priv->mm.gtt_start) / PAGE_SIZE);
>  
> -	list_for_each_entry(obj, &dev_priv->mm.gtt_list, gtt_list) {
> +	list_for_each_entry(obj, &dev_priv->mm.bound_list, gtt_list) {
>  		i915_gem_clflush_object(obj);
>  		i915_gem_gtt_bind_object(obj, obj->cache_level);
>  	}
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 5134a62..f4b2281 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -1125,7 +1125,7 @@ static void i915_capture_error_state(struct drm_device *dev)
>  	list_for_each_entry(obj, &dev_priv->mm.active_list, mm_list)
>  		i++;
>  	error->active_bo_count = i;
> -	list_for_each_entry(obj, &dev_priv->mm.gtt_list, gtt_list)
> +	list_for_each_entry(obj, &dev_priv->mm.bound_list, gtt_list)
>  		if (obj->pin_count)
>  			i++;
>  	error->pinned_bo_count = i - error->active_bo_count;
> @@ -1150,7 +1150,7 @@ static void i915_capture_error_state(struct drm_device *dev)
>  		error->pinned_bo_count =
>  			capture_pinned_bo(error->pinned_bo,
>  					  error->pinned_bo_count,
> -					  &dev_priv->mm.gtt_list);
> +					  &dev_priv->mm.bound_list);
>  
>  	do_gettimeofday(&error->time);
>  
> diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
> index dac7bba..b6ce36a 100644
> --- a/drivers/gpu/drm/i915/i915_trace.h
> +++ b/drivers/gpu/drm/i915/i915_trace.h
> @@ -214,22 +214,18 @@ TRACE_EVENT(i915_gem_evict,
>  );
>  
>  TRACE_EVENT(i915_gem_evict_everything,
> -	    TP_PROTO(struct drm_device *dev, bool purgeable),
> -	    TP_ARGS(dev, purgeable),
> +	    TP_PROTO(struct drm_device *dev),
> +	    TP_ARGS(dev),
>  
>  	    TP_STRUCT__entry(
>  			     __field(u32, dev)
> -			     __field(bool, purgeable)
>  			    ),
>  
>  	    TP_fast_assign(
>  			   __entry->dev = dev->primary->index;
> -			   __entry->purgeable = purgeable;
>  			  ),
>  
> -	    TP_printk("dev=%d%s",
> -		      __entry->dev,
> -		      __entry->purgeable ? ", purgeable only" : "")
> +	    TP_printk("dev=%d", __entry->dev)
>  );
>  
>  TRACE_EVENT(i915_gem_ring_dispatch,
> -- 
> 1.7.10
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Mail: daniel at ffwll.ch
Mobile: +41 (0)79 365 57 48



More information about the Intel-gfx mailing list