[PATCH] drm/i915: Replace vma parking with a clock aging algorithm

Chris Wilson chris at chris-wilson.co.uk
Sun Mar 22 14:55:04 UTC 2020


We cache the user's vma for a brief period of time after they close them
so that if they are immediately reopened we avoid having to unbind and
rebind them. This happens quite frequently for display servers which
only keep a client's frame open for as long as they are copying from it,
and so they open/close every vma about 30 Hz (every other frame for
double buffering).

Our current strategy is to keep the vma alive until the next global idle
point. However this cache should be purely temporal, so switch over from
using the parked notifier to using its own clock based aging algorithm:
if the closed vma is not reused within 2 clock ticks, it is destroyed.

Closes: https://gitlab.freedesktop.org/drm/intel/issues/644
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
---
 drivers/gpu/drm/i915/Makefile                |   1 +
 drivers/gpu/drm/i915/gt/intel_gt.c           |   5 +-
 drivers/gpu/drm/i915/gt/intel_gt.h           |   4 +
 drivers/gpu/drm/i915/gt/intel_gt_pm.c        |   1 -
 drivers/gpu/drm/i915/gt/intel_gt_types.h     |   9 +-
 drivers/gpu/drm/i915/gt/intel_gt_vma_clock.c | 131 +++++++++++++++++++
 drivers/gpu/drm/i915/i915_debugfs.c          |   3 +
 drivers/gpu/drm/i915/i915_drv.c              |   1 -
 drivers/gpu/drm/i915/i915_vma.c              |  55 ++------
 drivers/gpu/drm/i915/i915_vma.h              |   2 -
 10 files changed, 156 insertions(+), 56 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_vma_clock.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 99cd3d25f816..ada716f3f5fd 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -98,6 +98,7 @@ gt-y += \
 	gt/intel_gt_pm.o \
 	gt/intel_gt_pm_irq.o \
 	gt/intel_gt_requests.o \
+	gt/intel_gt_vma_clock.o \
 	gt/intel_gtt.o \
 	gt/intel_llc.o \
 	gt/intel_lrc.o \
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index d09f7596cb98..840b6015144e 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -23,12 +23,10 @@ void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915)
 
 	spin_lock_init(&gt->irq_lock);
 
-	INIT_LIST_HEAD(&gt->closed_vma);
-	spin_lock_init(&gt->closed_lock);
-
 	intel_gt_init_reset(gt);
 	intel_gt_init_requests(gt);
 	intel_gt_init_timelines(gt);
+	intel_gt_init_vma_clock(gt);
 	intel_gt_pm_init_early(gt);
 
 	intel_rps_init_early(&gt->rps);
@@ -671,6 +669,7 @@ void intel_gt_driver_late_release(struct intel_gt *gt)
 	rcu_barrier();
 
 	intel_uc_driver_late_release(&gt->uc);
+	intel_gt_fini_vma_clock(gt);
 	intel_gt_fini_requests(gt);
 	intel_gt_fini_reset(gt);
 	intel_gt_fini_timelines(gt);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h
index 4fac043750aa..20664dd28c7e 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt.h
@@ -52,6 +52,10 @@ void intel_gt_clear_error_registers(struct intel_gt *gt,
 void intel_gt_flush_ggtt_writes(struct intel_gt *gt);
 void intel_gt_chipset_flush(struct intel_gt *gt);
 
+void intel_gt_init_vma_clock(struct intel_gt *gt);
+void intel_gt_flush_vma_clock(struct intel_gt *gt);
+void intel_gt_fini_vma_clock(struct intel_gt *gt);
+
 static inline u32 intel_gt_scratch_offset(const struct intel_gt *gt,
 					  enum intel_gt_scratch_field field)
 {
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
index 2e40400d1ecd..02635f2aa65c 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
@@ -80,7 +80,6 @@ static int __gt_park(struct intel_wakeref *wf)
 
 	intel_gt_park_requests(gt);
 
-	i915_vma_parked(gt);
 	i915_pmu_gt_parked(i915);
 	intel_rps_park(&gt->rps);
 	intel_rc6_park(&gt->rc6);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index 96890dd12b5f..950bd05b63e8 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -58,9 +58,6 @@ struct intel_gt {
 	struct intel_wakeref wakeref;
 	atomic_t user_wakeref;
 
-	struct list_head closed_vma;
-	spinlock_t closed_lock; /* guards the list of closed_vma */
-
 	struct intel_reset reset;
 
 	/**
@@ -97,6 +94,12 @@ struct intel_gt {
 	 * Reserved for exclusive use by the kernel.
 	 */
 	struct i915_address_space *vm;
+
+	struct intel_gt_vma_clock {
+		spinlock_t lock;
+		struct list_head age[2];
+		struct delayed_work work;
+	} vma_clock;
 };
 
 enum intel_gt_scratch_field {
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_vma_clock.c b/drivers/gpu/drm/i915/gt/intel_gt_vma_clock.c
new file mode 100644
index 000000000000..5bf6d20ed087
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gt_vma_clock.c
@@ -0,0 +1,131 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#include <linux/workqueue.h>
+
+#include "intel_gt.h"
+
+static bool __vma_clock(struct intel_gt_vma_clock *clock)
+{
+	struct i915_vma *vma, *next;
+	bool alive = false;
+
+	/*
+	 * A very simple clock aging algorithm: we keep the user's closed
+	 * vma alive for a couple of timer ticks before destroying them.
+	 * This serves a shortlived cache so that frequently reused VMA
+	 * are kept alive between frames and we skip having to rebing them.
+	 *
+	 * When closed, we insert the vma into age[0]. Upon completion of
+	 * a timer tick, it is moved to age[1]. At the start of each timer
+	 * tick, we destroy all the old vma that were accumulated into age[1]
+	 * and have not been reused. All destroyed vma have therefore been
+	 * unused for more than 1 tick (at least a second), and at most 2
+	 * ticks (we expect the average to be 1.5 ticks).
+	 */
+
+	spin_lock(&clock->lock);
+
+	list_for_each_entry_safe(vma, next, &clock->age[1], closed_link) {
+		struct drm_i915_gem_object *obj = vma->obj;
+		struct i915_address_space *vm = vma->vm;
+
+		if (i915_vma_is_active(vma))
+			continue;
+
+		/* XXX All to avoid keeping a reference on i915_vma itself */
+
+		if (!kref_get_unless_zero(&obj->base.refcount))
+			continue;
+
+		if (i915_vm_tryopen(vm)) {
+			list_del_init(&vma->closed_link);
+		} else {
+			i915_gem_object_put(obj);
+			obj = NULL;
+		}
+
+		spin_unlock(&clock->lock);
+
+		if (obj) {
+			__i915_vma_put(vma);
+			i915_gem_object_put(obj);
+		}
+
+		i915_vm_close(vm);
+
+		/* Restart after dropping lock */
+		spin_lock(&clock->lock);
+		next = list_first_entry(&clock->age[1],
+					typeof(*next), closed_link);
+	}
+	list_splice_tail_init(&clock->age[0], &clock->age[1]);
+
+	if (!list_empty(&clock->age[1])) {
+		/* Keep active VMA around until second tick after idling */
+		list_for_each_entry_safe(vma, next,
+					 &clock->age[1], closed_link) {
+			if (i915_vma_is_active(vma))
+				list_move_tail(&vma->closed_link,
+					       &clock->age[0]);
+		}
+
+		alive = true;
+	}
+
+	spin_unlock(&clock->lock);
+
+	return alive;
+}
+
+static void vma_clock(struct work_struct *w)
+{
+	struct intel_gt_vma_clock *clock =
+		container_of(w, typeof(*clock), work.work);
+	struct intel_gt *gt = container_of(clock, typeof(*gt), vma_clock);
+	bool alive = true;
+
+	/*
+	 * This is crude. We want to use a simple clock mechanism to discard
+	 * unused VMA after a short period of inactivity, but currently we
+	 * rely on the engine/gt wakeref to serialise removal with new
+	 * execbuf. Once we have better control over the i915_vma lifetimes,
+	 * this should be possible to eliminate.
+	 */
+	intel_wakeref_lock(&gt->wakeref);
+	if (!intel_wakeref_is_active(&gt->wakeref))
+		alive = __vma_clock(clock);
+	intel_wakeref_unlock(&gt->wakeref);
+
+	if (alive)
+		schedule_delayed_work(&clock->work,
+				      round_jiffies_up_relative(HZ));
+}
+
+void intel_gt_init_vma_clock(struct intel_gt *gt)
+{
+	struct intel_gt_vma_clock *clock = &gt->vma_clock;
+
+	spin_lock_init(&clock->lock);
+	INIT_LIST_HEAD(&clock->age[0]);
+	INIT_LIST_HEAD(&clock->age[1]);
+
+	INIT_DELAYED_WORK(&clock->work, vma_clock);
+}
+
+void intel_gt_flush_vma_clock(struct intel_gt *gt)
+{
+	struct intel_gt_vma_clock *clock = &gt->vma_clock;
+
+	do {
+		if (cancel_delayed_work_sync(&clock->work))
+			vma_clock(&clock->work.work);
+	} while (delayed_work_pending(&clock->work));
+}
+
+void intel_gt_fini_vma_clock(struct intel_gt *gt)
+{
+	intel_gt_flush_vma_clock(gt);
+}
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 25bf997e2dd1..3b41b6e0fbbd 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -32,6 +32,7 @@
 #include <drm/drm_debugfs.h>
 
 #include "gem/i915_gem_context.h"
+#include "gt/intel_gt.h"
 #include "gt/intel_gt_pm.h"
 #include "gt/intel_gt_requests.h"
 #include "gt/intel_reset.h"
@@ -1752,6 +1753,8 @@ gt_drop_caches(struct intel_gt *gt, u64 val)
 		ret = intel_gt_pm_wait_for_idle(gt);
 		if (ret)
 			return ret;
+
+		intel_gt_flush_vma_clock(gt);
 	}
 
 	if (val & DROP_RESET_ACTIVE && intel_gt_terminally_wedged(gt))
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 48ba37e35bea..463286581b8d 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -453,7 +453,6 @@ static int i915_driver_early_probe(struct drm_i915_private *dev_priv)
 	intel_wopcm_init_early(&dev_priv->wopcm);
 
 	intel_gt_init_early(&dev_priv->gt, dev_priv);
-
 	i915_gem_init_early(dev_priv);
 
 	/* This must be called before any calls to HAS_PCH_* */
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 5b3efb43a8ef..b30fa5ff6807 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -1028,8 +1028,7 @@ int i915_ggtt_pin(struct i915_vma *vma, u32 align, unsigned int flags)
 
 void i915_vma_close(struct i915_vma *vma)
 {
-	struct intel_gt *gt = vma->vm->gt;
-	unsigned long flags;
+	struct intel_gt_vma_clock *clock = &vma->vm->gt->vma_clock;
 
 	GEM_BUG_ON(i915_vma_is_closed(vma));
 
@@ -1045,18 +1044,20 @@ void i915_vma_close(struct i915_vma *vma)
 	 * causing us to rebind the VMA once more. This ends up being a lot
 	 * of wasted work for the steady state.
 	 */
-	spin_lock_irqsave(&gt->closed_lock, flags);
-	list_add(&vma->closed_link, &gt->closed_vma);
-	spin_unlock_irqrestore(&gt->closed_lock, flags);
+	spin_lock(&clock->lock);
+	list_add(&vma->closed_link, &clock->age[0]);
+	spin_unlock(&clock->lock);
+
+	schedule_delayed_work(&clock->work, round_jiffies_up_relative(HZ));
 }
 
 static void __i915_vma_remove_closed(struct i915_vma *vma)
 {
-	struct intel_gt *gt = vma->vm->gt;
+	struct intel_gt_vma_clock *clock = &vma->vm->gt->vma_clock;
 
-	spin_lock_irq(&gt->closed_lock);
+	spin_lock(&clock->lock);
 	list_del_init(&vma->closed_link);
-	spin_unlock_irq(&gt->closed_lock);
+	spin_unlock(&clock->lock);
 }
 
 void i915_vma_reopen(struct i915_vma *vma)
@@ -1094,44 +1095,6 @@ void i915_vma_release(struct kref *ref)
 	i915_vma_free(vma);
 }
 
-void i915_vma_parked(struct intel_gt *gt)
-{
-	struct i915_vma *vma, *next;
-
-	spin_lock_irq(&gt->closed_lock);
-	list_for_each_entry_safe(vma, next, &gt->closed_vma, closed_link) {
-		struct drm_i915_gem_object *obj = vma->obj;
-		struct i915_address_space *vm = vma->vm;
-
-		/* XXX All to avoid keeping a reference on i915_vma itself */
-
-		if (!kref_get_unless_zero(&obj->base.refcount))
-			continue;
-
-		if (i915_vm_tryopen(vm)) {
-			list_del_init(&vma->closed_link);
-		} else {
-			i915_gem_object_put(obj);
-			obj = NULL;
-		}
-
-		spin_unlock_irq(&gt->closed_lock);
-
-		if (obj) {
-			__i915_vma_put(vma);
-			i915_gem_object_put(obj);
-		}
-
-		i915_vm_close(vm);
-
-		/* Restart after dropping lock */
-		spin_lock_irq(&gt->closed_lock);
-		next = list_first_entry(&gt->closed_vma,
-					typeof(*next), closed_link);
-	}
-	spin_unlock_irq(&gt->closed_lock);
-}
-
 static void __i915_vma_iounmap(struct i915_vma *vma)
 {
 	GEM_BUG_ON(i915_vma_is_pinned(vma));
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index b958ad07f212..4fe084ddac26 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -351,8 +351,6 @@ i915_vma_unpin_fence(struct i915_vma *vma)
 		__i915_vma_unpin_fence(vma);
 }
 
-void i915_vma_parked(struct intel_gt *gt);
-
 #define for_each_until(cond) if (cond) break; else
 
 /**
-- 
2.20.1



More information about the Intel-gfx-trybot mailing list