[Intel-gfx] [PATCH 06/11] drm/i915: Allocate active tracking nodes from a slabcache
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Wed Jan 30 15:57:06 UTC 2019
On 30/01/2019 02:19, Chris Wilson wrote:
> Wrap the active tracking for a GPU references in a slabcache for faster
> allocations, and keep track of inflight nodes so we can reap the
> stale entries upon parking (thereby trimming our memory usage).
I suggest a two staged approach. First patch add a slab cache (you can
also add kmem_cache_shrink on park as we do for other caches), then add
the parking/reaping bit.
Under what scenarios we end up not freeing active nodes sufficiently? It
would have to be some user which keeps many contexts around, having only
used them once?
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
> drivers/gpu/drm/i915/i915_active.c | 55 ++++++++++++++++---
> drivers/gpu/drm/i915/i915_active.h | 21 +++++--
> drivers/gpu/drm/i915/i915_active_types.h | 12 +++-
> drivers/gpu/drm/i915/i915_drv.h | 2 +
> drivers/gpu/drm/i915/i915_gem.c | 16 +++++-
> drivers/gpu/drm/i915/i915_gem_gtt.c | 2 +-
> drivers/gpu/drm/i915/i915_vma.c | 3 +-
> drivers/gpu/drm/i915/selftests/i915_active.c | 3 +-
> .../gpu/drm/i915/selftests/mock_gem_device.c | 6 ++
> 9 files changed, 100 insertions(+), 20 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
> index e0182e19cb8b..3c7abbde42ac 100644
> --- a/drivers/gpu/drm/i915/i915_active.c
> +++ b/drivers/gpu/drm/i915/i915_active.c
> @@ -7,7 +7,9 @@
> #include "i915_drv.h"
> #include "i915_active.h"
>
> -#define BKL(ref) (&(ref)->i915->drm.struct_mutex)
> +#define i915_from_gt(x) \
> + container_of(x, struct drm_i915_private, gt.active_refs)
> +#define BKL(ref) (&i915_from_gt((ref)->gt)->drm.struct_mutex)
>
> struct active_node {
> struct i915_gem_active base;
> @@ -79,11 +81,11 @@ active_instance(struct i915_active *ref, u64 idx)
> p = &parent->rb_left;
> }
>
> - node = kmalloc(sizeof(*node), GFP_KERNEL);
> + node = kmem_cache_alloc(ref->gt->slab_cache, GFP_KERNEL);
>
> /* kmalloc may retire the ref->last (thanks shrinker)! */
> if (unlikely(!i915_gem_active_raw(&ref->last, BKL(ref)))) {
> - kfree(node);
> + kmem_cache_free(ref->gt->slab_cache, node);
> goto out;
> }
>
> @@ -94,6 +96,9 @@ active_instance(struct i915_active *ref, u64 idx)
> node->ref = ref;
> node->timeline = idx;
>
> + if (RB_EMPTY_ROOT(&ref->tree))
> + list_add(&ref->active_link, &ref->gt->active_refs);
> +
> rb_link_node(&node->node, parent, p);
> rb_insert_color(&node->node, &ref->tree);
>
> @@ -119,11 +124,11 @@ active_instance(struct i915_active *ref, u64 idx)
> return &ref->last;
> }
>
> -void i915_active_init(struct drm_i915_private *i915,
> +void i915_active_init(struct i915_gt_active *gt,
> struct i915_active *ref,
> void (*retire)(struct i915_active *ref))
> {
> - ref->i915 = i915;
> + ref->gt = gt;
> ref->retire = retire;
> ref->tree = RB_ROOT;
> init_request_active(&ref->last, last_retire);
> @@ -161,6 +166,7 @@ void i915_active_release(struct i915_active *ref)
>
> int i915_active_wait(struct i915_active *ref)
> {
> + struct kmem_cache *slab = ref->gt->slab_cache;
> struct active_node *it, *n;
> int ret;
>
> @@ -168,15 +174,19 @@ int i915_active_wait(struct i915_active *ref)
> if (ret)
> return ret;
>
> + if (RB_EMPTY_ROOT(&ref->tree))
> + return 0;
> +
> rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
> ret = i915_gem_active_retire(&it->base, BKL(ref));
> if (ret)
> return ret;
>
> GEM_BUG_ON(i915_gem_active_isset(&it->base));
> - kfree(it);
> + kmem_cache_free(slab, it);
> }
> ref->tree = RB_ROOT;
> + list_del(&ref->active_link);
>
> return 0;
> }
> @@ -210,15 +220,46 @@ int i915_request_await_active(struct i915_request *rq, struct i915_active *ref)
>
> void i915_active_fini(struct i915_active *ref)
> {
> + struct kmem_cache *slab = ref->gt->slab_cache;
> struct active_node *it, *n;
>
> + lockdep_assert_held(BKL(ref));
> GEM_BUG_ON(i915_gem_active_isset(&ref->last));
>
> + if (RB_EMPTY_ROOT(&ref->tree))
> + return;
> +
> rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
> GEM_BUG_ON(i915_gem_active_isset(&it->base));
> - kfree(it);
> + kmem_cache_free(slab, it);
> }
> ref->tree = RB_ROOT;
> + list_del(&ref->active_link);
> +}
> +
> +int i915_gt_active_init(struct i915_gt_active *gt)
> +{
> + gt->slab_cache = KMEM_CACHE(active_node, SLAB_HWCACHE_ALIGN);
> + if (!gt->slab_cache)
> + return -ENOMEM;
> +
> + INIT_LIST_HEAD(>->active_refs);
> +
> + return 0;
> +}
> +
> +void i915_gt_active_park(struct i915_gt_active *gt)
> +{
> + struct i915_active *it, *n;
> +
> + list_for_each_entry_safe(it, n, >->active_refs, active_link)
> + i915_active_fini(it);
> +}
> +
> +void i915_gt_active_fini(struct i915_gt_active *gt)
> +{
> + GEM_BUG_ON(!list_empty(>->active_refs));
> + kmem_cache_destroy(gt->slab_cache);
> }
>
> #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
> diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h
> index c0729a046f98..41c4a5da84c8 100644
> --- a/drivers/gpu/drm/i915/i915_active.h
> +++ b/drivers/gpu/drm/i915/i915_active.h
> @@ -9,10 +9,6 @@
>
> #include "i915_active_types.h"
>
> -#include <linux/rbtree.h>
> -
> -#include "i915_request.h"
> -
> /*
> * GPU activity tracking
> *
> @@ -39,7 +35,7 @@
> * synchronisation.
> */
>
> -void i915_active_init(struct drm_i915_private *i915,
> +void i915_active_init(struct i915_gt_active *gt,
> struct i915_active *ref,
> void (*retire)(struct i915_active *ref));
>
> @@ -63,4 +59,19 @@ i915_active_is_idle(const struct i915_active *ref)
>
> void i915_active_fini(struct i915_active *ref);
>
> +/*
> + * Active refs memory management
> + *
> + * To be more economical with memory, we reap all the i915_active trees on
> + * parking the GPU (when we know the GPU is inactive) and allocate the nodes
> + * from a local slab cache to hopefully reduce the fragmentation as we will
> + * then be able to free all pages en masse upon idling.
> + */
> +
> +int i915_gt_active_init(struct i915_gt_active *gt);
> +void i915_gt_active_park(struct i915_gt_active *gt);
> +void i915_gt_active_fini(struct i915_gt_active *gt);
> +
> +#define i915_gt_active(i915) (&(i915)->gt.active_refs)
> +
> #endif /* _I915_ACTIVE_H_ */
> diff --git a/drivers/gpu/drm/i915/i915_active_types.h b/drivers/gpu/drm/i915/i915_active_types.h
> index 411e502ed8dd..3d41c33ca78c 100644
> --- a/drivers/gpu/drm/i915/i915_active_types.h
> +++ b/drivers/gpu/drm/i915/i915_active_types.h
> @@ -7,14 +7,17 @@
> #ifndef _I915_ACTIVE_TYPES_H_
> #define _I915_ACTIVE_TYPES_H_
>
> +#include <linux/list.h>
> #include <linux/rbtree.h>
>
> #include "i915_request.h"
>
> -struct drm_i915_private;
> +struct i915_gt_active;
> +struct kmem_cache;
>
> struct i915_active {
> - struct drm_i915_private *i915;
> + struct i915_gt_active *gt;
gt_active would be better - gt is to vague.
> + struct list_head active_link;
>
> struct rb_root tree;
> struct i915_gem_active last;
> @@ -23,4 +26,9 @@ struct i915_active {
> void (*retire)(struct i915_active *ref);
> };
>
> +struct i915_gt_active {
> + struct list_head active_refs;
> + struct kmem_cache *slab_cache;
> +};
> +
> #endif /* _I915_ACTIVE_TYPES_H_ */
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 8ec28a7f5452..480ab3e00ba8 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1984,6 +1984,8 @@ struct drm_i915_private {
> struct list_head hwsp_free_list;
> } timelines;
>
> + struct i915_gt_active active_refs;
> +
> struct list_head active_rings;
> struct list_head closed_vma;
> u32 active_requests;
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index caccff87a2a1..2bc735df408b 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -130,6 +130,7 @@ static u32 __i915_gem_park(struct drm_i915_private *i915)
>
> intel_engines_park(i915);
> i915_timelines_park(i915);
> + i915_gt_active_park(i915_gt_active(i915));
The i915_gt_active macro is just too horrible IMHO. Why? :)
Make i915_gt_active_park take i915, or i915->gt_active.
>
> i915_pmu_gt_parked(i915);
> i915_vma_parked(i915);
> @@ -4998,15 +4999,19 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
> dev_priv->gt.cleanup_engine = intel_engine_cleanup;
> }
>
> + ret = i915_gt_active_init(i915_gt_active(dev_priv));
> + if (ret)
> + return ret;
> +
> i915_timelines_init(dev_priv);
>
> ret = i915_gem_init_userptr(dev_priv);
> if (ret)
> - return ret;
> + goto err_timelines;
>
> ret = intel_uc_init_misc(dev_priv);
> if (ret)
> - return ret;
> + goto err_userptr;
>
> ret = intel_wopcm_init(&dev_priv->wopcm);
> if (ret)
> @@ -5122,9 +5127,13 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
> err_uc_misc:
> intel_uc_fini_misc(dev_priv);
>
> - if (ret != -EIO) {
> +err_userptr:
> + if (ret != -EIO)
> i915_gem_cleanup_userptr(dev_priv);
> +err_timelines:
> + if (ret != -EIO) {
> i915_timelines_fini(dev_priv);
> + i915_gt_active_fini(i915_gt_active(dev_priv));
> }
>
> if (ret == -EIO) {
> @@ -5177,6 +5186,7 @@ void i915_gem_fini(struct drm_i915_private *dev_priv)
> intel_uc_fini_misc(dev_priv);
> i915_gem_cleanup_userptr(dev_priv);
> i915_timelines_fini(dev_priv);
> + i915_gt_active_fini(i915_gt_active(dev_priv));
>
> i915_gem_drain_freed_objects(dev_priv);
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index e625659c03a2..d8819de0d6ee 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -1917,7 +1917,7 @@ static struct i915_vma *pd_vma_create(struct gen6_hw_ppgtt *ppgtt, int size)
> if (!vma)
> return ERR_PTR(-ENOMEM);
>
> - i915_active_init(i915, &vma->active, NULL);
> + i915_active_init(i915_gt_active(i915), &vma->active, NULL);
> init_request_active(&vma->last_fence, NULL);
>
> vma->vm = &ggtt->vm;
> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> index d4772061e642..2456bfb4877b 100644
> --- a/drivers/gpu/drm/i915/i915_vma.c
> +++ b/drivers/gpu/drm/i915/i915_vma.c
> @@ -119,7 +119,8 @@ vma_create(struct drm_i915_gem_object *obj,
> if (vma == NULL)
> return ERR_PTR(-ENOMEM);
>
> - i915_active_init(vm->i915, &vma->active, __i915_vma_retire);
> + i915_active_init(i915_gt_active(vm->i915),
> + &vma->active, __i915_vma_retire);
> init_request_active(&vma->last_fence, NULL);
>
> vma->vm = vm;
> diff --git a/drivers/gpu/drm/i915/selftests/i915_active.c b/drivers/gpu/drm/i915/selftests/i915_active.c
> index 7c5c3068565b..0e923476920e 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_active.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_active.c
> @@ -30,7 +30,8 @@ static int __live_active_setup(struct drm_i915_private *i915,
> unsigned int count = 0;
> int err = 0;
>
> - i915_active_init(i915, &active->base, __live_active_retire);
> + i915_active_init(i915_gt_active(i915),
> + &active->base, __live_active_retire);
> active->retired = false;
>
> if (!i915_active_acquire(&active->base)) {
> diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
> index 074a0d9cbf26..5b88f74c1677 100644
> --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
> +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
> @@ -69,6 +69,7 @@ static void mock_device_release(struct drm_device *dev)
> mutex_unlock(&i915->drm.struct_mutex);
>
> i915_timelines_fini(i915);
> + i915_gt_active_fini(i915_gt_active(i915));
>
> drain_workqueue(i915->wq);
> i915_gem_drain_freed_objects(i915);
> @@ -228,6 +229,9 @@ struct drm_i915_private *mock_gem_device(void)
> if (!i915->priorities)
> goto err_dependencies;
>
> + if (i915_gt_active_init(i915_gt_active(i915)))
> + goto err_priorities;
> +
> i915_timelines_init(i915);
>
> INIT_LIST_HEAD(&i915->gt.active_rings);
> @@ -257,6 +261,8 @@ struct drm_i915_private *mock_gem_device(void)
> err_unlock:
> mutex_unlock(&i915->drm.struct_mutex);
> i915_timelines_fini(i915);
> + i915_gt_active_fini(i915_gt_active(i915));
> +err_priorities:
> kmem_cache_destroy(i915->priorities);
> err_dependencies:
> kmem_cache_destroy(i915->dependencies);
>
Regards,
Tvrtko
More information about the Intel-gfx
mailing list