[Intel-gfx] [PATCH v3] drm/i915: Allocate active tracking nodes from a slabcache

Chris Wilson chris at chris-wilson.co.uk
Wed Jan 30 17:25:07 UTC 2019


Wrap the active tracking for a GPU references in a slabcache for faster
allocations, and keep track of inflight nodes so we can reap the
stale entries upon idling (thereby trimming our memory usage).

v2: Automatically discard the trees everytime the tracker idles; they
should be rarely used and fast to allocate as required.
v3: Nothing device specific left, it's just a slabcache that we can
make global.

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_active.c | 61 ++++++++++++++++++++++++------
 drivers/gpu/drm/i915/i915_active.h | 11 ++++--
 drivers/gpu/drm/i915/i915_gem.c    | 10 +++--
 drivers/gpu/drm/i915/i915_pci.c    |  3 ++
 4 files changed, 65 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
index e0182e19cb8b..d05fd92dbc82 100644
--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -9,6 +9,17 @@
 
 #define BKL(ref) (&(ref)->i915->drm.struct_mutex)
 
+/*
+ * Active refs memory management
+ *
+ * To be more economical with memory, we reap all the i915_active trees as
+ * they idle (when we know the active requests are inactive) and allocate the
+ * nodes from a local slab cache to hopefully reduce the fragmentation.
+ */
+static struct i915_global_active {
+	struct kmem_cache *slab_cache;
+} global;
+
 struct active_node {
 	struct i915_gem_active base;
 	struct i915_active *ref;
@@ -16,12 +27,29 @@ struct active_node {
 	u64 timeline;
 };
 
+static void
+__active_park(struct i915_active *ref)
+{
+	struct active_node *it, *n;
+
+	rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
+		GEM_BUG_ON(i915_gem_active_isset(&it->base));
+		kmem_cache_free(global.slab_cache, it);
+	}
+	ref->tree = RB_ROOT;
+}
+
 static void
 __active_retire(struct i915_active *ref)
 {
 	GEM_BUG_ON(!ref->count);
-	if (!--ref->count)
-		ref->retire(ref);
+	if (--ref->count)
+		return;
+
+	/* return the unused nodes to our slabcache */
+	__active_park(ref);
+
+	ref->retire(ref);
 }
 
 static void
@@ -79,11 +107,11 @@ active_instance(struct i915_active *ref, u64 idx)
 			p = &parent->rb_left;
 	}
 
-	node = kmalloc(sizeof(*node), GFP_KERNEL);
+	node = kmem_cache_alloc(global.slab_cache, GFP_KERNEL);
 
 	/* kmalloc may retire the ref->last (thanks shrinker)! */
 	if (unlikely(!i915_gem_active_raw(&ref->last, BKL(ref)))) {
-		kfree(node);
+		kmem_cache_free(global.slab_cache, node);
 		goto out;
 	}
 
@@ -174,7 +202,7 @@ int i915_active_wait(struct i915_active *ref)
 			return ret;
 
 		GEM_BUG_ON(i915_gem_active_isset(&it->base));
-		kfree(it);
+		kmem_cache_free(global.slab_cache, it);
 	}
 	ref->tree = RB_ROOT;
 
@@ -208,17 +236,26 @@ int i915_request_await_active(struct i915_request *rq, struct i915_active *ref)
 	return 0;
 }
 
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
 void i915_active_fini(struct i915_active *ref)
 {
-	struct active_node *it, *n;
-
 	GEM_BUG_ON(i915_gem_active_isset(&ref->last));
+	GEM_BUG_ON(!RB_EMPTY_ROOT(&ref->tree));
+}
+#endif
 
-	rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
-		GEM_BUG_ON(i915_gem_active_isset(&it->base));
-		kfree(it);
-	}
-	ref->tree = RB_ROOT;
+int __init i915_global_active_init(void)
+{
+	global.slab_cache = KMEM_CACHE(active_node, SLAB_HWCACHE_ALIGN);
+	if (!global.slab_cache)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void __exit i915_global_active_exit(void)
+{
+	kmem_cache_destroy(global.slab_cache);
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h
index c0729a046f98..48fdb1497883 100644
--- a/drivers/gpu/drm/i915/i915_active.h
+++ b/drivers/gpu/drm/i915/i915_active.h
@@ -9,10 +9,6 @@
 
 #include "i915_active_types.h"
 
-#include <linux/rbtree.h>
-
-#include "i915_request.h"
-
 /*
  * GPU activity tracking
  *
@@ -61,6 +57,13 @@ i915_active_is_idle(const struct i915_active *ref)
 	return !ref->count;
 }
 
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
 void i915_active_fini(struct i915_active *ref);
+#else
+static inline void i915_active_fini(struct i915_active *ref) { }
+#endif
+
+int i915_global_active_init(void);
+void i915_global_active_exit(void);
 
 #endif /* _I915_ACTIVE_H_ */
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index caccff87a2a1..843b7fd93331 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -5002,11 +5002,11 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
 
 	ret = i915_gem_init_userptr(dev_priv);
 	if (ret)
-		return ret;
+		goto err_timelines;
 
 	ret = intel_uc_init_misc(dev_priv);
 	if (ret)
-		return ret;
+		goto err_userptr;
 
 	ret = intel_wopcm_init(&dev_priv->wopcm);
 	if (ret)
@@ -5122,10 +5122,12 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
 err_uc_misc:
 	intel_uc_fini_misc(dev_priv);
 
-	if (ret != -EIO) {
+err_userptr:
+	if (ret != -EIO)
 		i915_gem_cleanup_userptr(dev_priv);
+err_timelines:
+	if (ret != -EIO)
 		i915_timelines_fini(dev_priv);
-	}
 
 	if (ret == -EIO) {
 		mutex_lock(&dev_priv->drm.struct_mutex);
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 44c23ac60347..751a787c83d1 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -793,6 +793,8 @@ static int __init i915_init(void)
 	bool use_kms = true;
 	int err;
 
+	i915_global_active_init();
+
 	err = i915_mock_selftests();
 	if (err)
 		return err > 0 ? 0 : err;
@@ -824,6 +826,7 @@ static void __exit i915_exit(void)
 		return;
 
 	pci_unregister_driver(&i915_pci_driver);
+	i915_global_active_exit();
 }
 
 module_init(i915_init);
-- 
2.20.1



More information about the Intel-gfx mailing list