[Intel-gfx] [PATCH 4/6] drm/i915: Add support for stealing purgable stolen pages

Daniel Vetter daniel at ffwll.ch
Fri Oct 9 01:11:21 PDT 2015


On Thu, Oct 08, 2015 at 11:43:29AM +0100, Tvrtko Ursulin wrote:
> 
> Hi,
> 
> On 08/10/15 07:24, ankitprasad.r.sharma at intel.com wrote:
> >From: Chris Wilson <chris at chris-wilson.co.uk>
> >
> >If we run out of stolen memory when trying to allocate an object, see if
> >we can reap enough purgeable objects to free up enough contiguous free
> >space for the allocation. This is in principle very much like evicting
> >objects to free up enough contiguous space in the vma when binding
> >a new object - and you will be forgiven for thinking that the code looks
> >very similar.
> >
> >At the moment, we do not allow userspace to allocate objects in stolen,
> >so there is neither the memory pressure to trigger stolen eviction nor
> >any purgeable objects inside the stolen arena. However, this will change
> >in the near future, and so better management and defragmentation of
> >stolen memory will become a real issue.
> >
> >v2: Remember to remove the drm_mm_node.
> >
> >v3: Rebased to the latest drm-intel-nightly (Ankit)
> >
> >v4: corrected if-else braces format (Tvrtko/kerneldoc)
> >
> >v5: Rebased to the latest drm-intel-nightly (Ankit)
> >Added a seperate list to maintain purgable objects from stolen memory
> >region (Chris/Daniel)
> >
> >v6: Compiler optimization (merging 2 single loops into one for() loop),
> >corrected code for object eviction, retire_requests before starting
> >object eviction (Chris)
> >
> >v7: Added kernel doc for i915_gem_object_create_stolen()
> >
> >Testcase: igt/gem_stolen
> >
> >Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> >Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma at intel.com>
> >---
> >  drivers/gpu/drm/i915/i915_debugfs.c    |   4 +-
> >  drivers/gpu/drm/i915/i915_drv.h        |  17 +++-
> >  drivers/gpu/drm/i915/i915_gem.c        |  16 ++++
> >  drivers/gpu/drm/i915/i915_gem_stolen.c | 169 +++++++++++++++++++++++++++++----
> >  drivers/gpu/drm/i915/intel_pm.c        |   4 +-
> >  5 files changed, 186 insertions(+), 24 deletions(-)
> >
> >diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> >index 8797717..13762c1 100644
> >--- a/drivers/gpu/drm/i915/i915_debugfs.c
> >+++ b/drivers/gpu/drm/i915/i915_debugfs.c
> >@@ -174,7 +174,7 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
> >  			seq_puts(m, ")");
> >  	}
> >  	if (obj->stolen)
> >-		seq_printf(m, " (stolen: %08llx)", obj->stolen->start);
> >+		seq_printf(m, " (stolen: %08llx)", obj->stolen->base.start);
> >  	if (obj->pin_display || obj->fault_mappable) {
> >  		char s[3], *t = s;
> >  		if (obj->pin_display)
> >@@ -253,7 +253,7 @@ static int obj_rank_by_stolen(void *priv,
> >  	struct drm_i915_gem_object *b =
> >  		container_of(B, struct drm_i915_gem_object, obj_exec_link);
> >
> >-	return a->stolen->start - b->stolen->start;
> >+	return a->stolen->base.start - b->stolen->base.start;
> >  }
> >
> >  static int i915_gem_stolen_list_info(struct seq_file *m, void *data)
> >diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> >index bca1572..5612df3 100644
> >--- a/drivers/gpu/drm/i915/i915_drv.h
> >+++ b/drivers/gpu/drm/i915/i915_drv.h
> >@@ -850,6 +850,12 @@ struct i915_ctx_hang_stats {
> >  	bool banned;
> >  };
> >
> >+struct i915_stolen_node {
> >+	struct drm_mm_node base;
> >+	struct list_head mm_link;
> >+	struct drm_i915_gem_object *obj;
> >+};
> >+
> >  /* This must match up with the value previously used for execbuf2.rsvd1. */
> >  #define DEFAULT_CONTEXT_HANDLE 0
> >
> >@@ -1279,6 +1285,13 @@ struct i915_gem_mm {
> >  	 */
> >  	struct list_head unbound_list;
> >
> >+	/**
> >+	 * List of stolen objects that have been marked as purgeable and
> >+	 * thus available for reaping if we need more space for a new
> >+	 * allocation. Ordered by time of marking purgeable.
> >+	 */
> >+	struct list_head stolen_list;
> >+
> >  	/** Usable portion of the GTT for GEM */
> >  	unsigned long stolen_base; /* limited to low memory (32-bit) */
> >
> >@@ -2047,7 +2060,7 @@ struct drm_i915_gem_object {
> >  	struct list_head vma_list;
> >
> >  	/** Stolen memory for this object, instead of being backed by shmem. */
> >-	struct drm_mm_node *stolen;
> >+	struct i915_stolen_node *stolen;
> >  	struct list_head global_list;
> >
> >  	struct list_head ring_list[I915_NUM_RINGS];
> >@@ -2055,6 +2068,7 @@ struct drm_i915_gem_object {
> >  	struct list_head obj_exec_link;
> >
> >  	struct list_head batch_pool_link;
> >+	struct list_head tmp_link;
> >
> >  	/**
> >  	 * This is set if the object is on the active lists (has pending
> >@@ -2171,6 +2185,7 @@ struct drm_i915_gem_object {
> >  	};
> >  };
> >  #define to_intel_bo(x) container_of(x, struct drm_i915_gem_object, base)
> >+#define I915_BO_IS_ACTIVE(__obj) (__obj->active)
> >
> >  void i915_gem_track_fb(struct drm_i915_gem_object *old,
> >  		       struct drm_i915_gem_object *new,
> >diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> >index 54f7df1..91a2e97 100644
> >--- a/drivers/gpu/drm/i915/i915_gem.c
> >+++ b/drivers/gpu/drm/i915/i915_gem.c
> >@@ -4233,6 +4233,20 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
> >  	if (obj->madv == I915_MADV_DONTNEED && obj->pages == NULL)
> >  		i915_gem_object_truncate(obj);
> >
> >+	if (obj->stolen) {
> >+		switch (obj->madv) {
> >+		case I915_MADV_WILLNEED:
> >+			list_del_init(&obj->stolen->mm_link);
> >+			break;
> >+		case I915_MADV_DONTNEED:
> >+			list_move(&obj->stolen->mm_link,
> >+				  &dev_priv->mm.stolen_list);
> >+			break;
> >+		default:
> >+			break;
> >+		}
> >+	}
> >+
> >  	args->retained = obj->madv != __I915_MADV_PURGED;
> >
> >  out:
> >@@ -4253,6 +4267,7 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
> >  	INIT_LIST_HEAD(&obj->obj_exec_link);
> >  	INIT_LIST_HEAD(&obj->vma_list);
> >  	INIT_LIST_HEAD(&obj->batch_pool_link);
> >+	INIT_LIST_HEAD(&obj->tmp_link);
> >
> >  	obj->ops = ops;
> >
> >@@ -4895,6 +4910,7 @@ i915_gem_load(struct drm_device *dev)
> >  	INIT_LIST_HEAD(&dev_priv->context_list);
> >  	INIT_LIST_HEAD(&dev_priv->mm.unbound_list);
> >  	INIT_LIST_HEAD(&dev_priv->mm.bound_list);
> >+	INIT_LIST_HEAD(&dev_priv->mm.stolen_list);
> >  	INIT_LIST_HEAD(&dev_priv->mm.fence_list);
> >  	for (i = 0; i < I915_NUM_RINGS; i++)
> >  		init_ring_lists(&dev_priv->ring[i]);
> >diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
> >index d3e6813..bd5b2ea 100644
> >--- a/drivers/gpu/drm/i915/i915_gem_stolen.c
> >+++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
> >@@ -456,7 +456,8 @@ i915_gem_object_release_stolen(struct drm_i915_gem_object *obj)
> >  	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
> >
> >  	if (obj->stolen) {
> >-		i915_gem_stolen_remove_node(dev_priv, obj->stolen);
> >+		list_del(&obj->stolen->mm_link);
> >+		i915_gem_stolen_remove_node(dev_priv, &obj->stolen->base);
> >  		kfree(obj->stolen);
> >  		obj->stolen = NULL;
> >  	}
> >@@ -469,7 +470,7 @@ static const struct drm_i915_gem_object_ops i915_gem_object_stolen_ops = {
> >
> >  static struct drm_i915_gem_object *
> >  _i915_gem_object_create_stolen(struct drm_device *dev,
> >-			       struct drm_mm_node *stolen)
> >+			       struct i915_stolen_node *stolen)
> >  {
> >  	struct drm_i915_gem_object *obj;
> >  	int ret = 0;
> >@@ -478,11 +479,12 @@ _i915_gem_object_create_stolen(struct drm_device *dev,
> >  	if (obj == NULL)
> >  		return ERR_PTR(-ENOMEM);
> >
> >-	drm_gem_private_object_init(dev, &obj->base, stolen->size);
> >+	drm_gem_private_object_init(dev, &obj->base, stolen->base.size);
> >  	i915_gem_object_init(obj, &i915_gem_object_stolen_ops);
> >
> >  	obj->pages = i915_pages_create_for_stolen(dev,
> >-						  stolen->start, stolen->size);
> >+						  stolen->base.start,
> >+						  stolen->base.size);
> >  	if (IS_ERR(obj->pages)) {
> >  		ret = PTR_ERR(obj->pages);
> >  		goto cleanup;
> >@@ -491,6 +493,9 @@ _i915_gem_object_create_stolen(struct drm_device *dev,
> >  	i915_gem_object_pin_pages(obj);
> >  	obj->stolen = stolen;
> >
> >+	stolen->obj = obj;
> >+	INIT_LIST_HEAD(&stolen->mm_link);
> >+
> >  	obj->base.read_domains = I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT;
> >  	obj->cache_level = HAS_LLC(dev) ? I915_CACHE_LLC : I915_CACHE_NONE;
> >
> >@@ -501,18 +506,102 @@ cleanup:
> >  	return ERR_PTR(ret);
> >  }
> >
> >-struct drm_i915_gem_object *
> >-i915_gem_object_create_stolen(struct drm_device *dev, u64 size)
> >+static bool
> >+mark_free(struct drm_i915_gem_object *obj, struct list_head *unwind)
> >+{
> >+	BUG_ON(obj->stolen == NULL);
> 
> I am fundamentally opposed to BUG_ONs which can be avoided. In this I see no
> value in hanging the machine while we could WARN_ON and return false.
> 
> >+	if (obj->madv != I915_MADV_DONTNEED)
> >+		return false;
> >+
> >+	if (obj->pin_display)
> >+		return false;
> >+
> >+	list_add(&obj->tmp_link, unwind);
> >+	return drm_mm_scan_add_block(&obj->stolen->base);
> >+}
> >+
> >+static int
> >+stolen_evict(struct drm_i915_private *dev_priv, u64 size)
> >  {
> >-	struct drm_i915_private *dev_priv = dev->dev_private;
> >  	struct drm_i915_gem_object *obj;
> >-	struct drm_mm_node *stolen;
> >-	int ret;
> >+	struct list_head unwind, evict;
> >+	struct i915_stolen_node *iter;
> >+	int ret, active;
> >
> >-	if (!drm_mm_initialized(&dev_priv->mm.stolen))
> >-		return ERR_PTR(-ENODEV);
> >+	drm_mm_init_scan(&dev_priv->mm.stolen, size, 0, 0);
> >+	INIT_LIST_HEAD(&unwind);
> >+
> >+	/* Retire all requests before creating the evict list */
> >+	i915_gem_retire_requests(dev_priv->dev);
> >+
> >+	for (active = 0; active <= 1; active++) {
> >+		list_for_each_entry(iter, &dev_priv->mm.stolen_list, mm_link) {
> >+			if (I915_BO_IS_ACTIVE(iter->obj) != active)
> >+				continue;
> >+
> >+			if (mark_free(iter->obj, &unwind))
> >+				goto found;
> >+		}
> >+	}
> >+
> >+found:
> >+	INIT_LIST_HEAD(&evict);
> >+	while (!list_empty(&unwind)) {
> >+		obj = list_first_entry(&unwind,
> >+				       struct drm_i915_gem_object,
> >+				       tmp_link);
> >+		list_del(&obj->tmp_link);
> >+
> >+		if (drm_mm_scan_remove_block(&obj->stolen->base)) {
> >+			list_add(&obj->tmp_link, &evict);
> >+			drm_gem_object_reference(&obj->base);
> >+		}
> >+	}
> >+
> >+	ret = 0;
> >+	while (!list_empty(&evict)) {
> >+		obj = list_first_entry(&evict,
> >+				       struct drm_i915_gem_object,
> >+				       tmp_link);
> >+		list_del(&obj->tmp_link);
> >+
> >+		if (ret == 0) {
> >+			struct i915_vma *vma, *vma_next;
> >+
> >+			list_for_each_entry_safe(vma, vma_next,
> >+						 &obj->vma_list,
> >+						 vma_link)
> >+				if (i915_vma_unbind(vma))
> >+					break;
> >+
> >+			/* Stolen pins its pages to prevent the
> >+			 * normal shrinker from processing stolen
> >+			 * objects.
> >+			 */
> >+			i915_gem_object_unpin_pages(obj);
> >+
> >+			ret = i915_gem_object_put_pages(obj);
> >+			if (ret == 0) {
> >+				i915_gem_object_release_stolen(obj);
> >+				obj->madv = __I915_MADV_PURGED;
> >+			} else {
> >+				i915_gem_object_pin_pages(obj);
> >+			}
> >+		}
> >+
> >+		drm_gem_object_unreference(&obj->base);
> >+	}
> >+
> >+	return ret;
> >+}
> >+
> >+static struct i915_stolen_node *
> >+stolen_alloc(struct drm_i915_private *dev_priv, u64 size)
> >+{
> >+	struct i915_stolen_node *stolen;
> >+	int ret;
> >
> >-	DRM_DEBUG_KMS("creating stolen object: size=%llx\n", size);
> >  	if (size == 0)
> >  		return ERR_PTR(-EINVAL);
> >
> >@@ -520,17 +609,59 @@ i915_gem_object_create_stolen(struct drm_device *dev, u64 size)
> >  	if (!stolen)
> >  		return ERR_PTR(-ENOMEM);
> >
> >-	ret = i915_gem_stolen_insert_node(dev_priv, stolen, size, 4096);
> >+	ret = i915_gem_stolen_insert_node(dev_priv, &stolen->base, size, 4096);
> >+	if (ret == 0)
> >+		goto out;
> >+
> >+	/* No more stolen memory available, or too fragmented.
> >+	 * Try evicting purgeable objects and search again.
> >+	 */
> >+	ret = stolen_evict(dev_priv, size);
> 
> I have raised this question of struct_mutex in the previous round.
> 
> Correct me if I am wrong, but I thought there was some effort made to make
> stolen object allocation run without struct mutex?
> 
> With this change it requires it again. At the moment callers seem to hold it
> anyway. But I think lockdep_assert_held is needed now at least to document
> the requirement, probably in top level i915_gem_object_create_stolen.

Please use WARN_ON(!mutex_is_locked()) because lockdep_assert_held is
compiled out when lockdep is disabled. And that's for almost everyone. The
reason for that is that the semantics aren't a perfect match -
lockdep_assert_held also makes sure that it's the calling process holding
the lock, not just that it's locked.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch


More information about the Intel-gfx mailing list