[PATCH 80/94] drm/i915/gt: Defer the kmem_cache_free() until after the HW submit

Chris Wilson chris at chris-wilson.co.uk
Thu Jul 30 23:22:26 UTC 2020


Watching lock_stat, we noticed that the kmem_cache_free() would cause
the occasional multi-millisecond spike (directly affecting max-holdtime
and so the max-waittime). Delaying our submission of the next ELSP by a
millisecond will leave the GPU idle, so defer the kmem_cache_free()
until afterwards.

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gt/intel_lrc.c   | 10 +++++++++-
 drivers/gpu/drm/i915/i915_scheduler.c | 13 +++++++++++++
 drivers/gpu/drm/i915/i915_scheduler.h | 11 +++++++++++
 3 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index bdea277723f8..0a51cf54cfa9 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -2031,6 +2031,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 	struct i915_request **port = execlists->pending;
 	struct i915_request ** const last_port = port + execlists->port_mask;
 	struct i915_request *last = *execlists->active;
+	struct list_head *free = NULL;
 	struct virtual_engine *ve;
 	struct rb_node *rb;
 	bool submit = false;
@@ -2312,8 +2313,9 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 			}
 		}
 
+		/* Remove the node, but defer the free for later */
 		rb_erase_cached(&p->node, &execlists->queue);
-		i915_priolist_free(p);
+		free = i915_priolist_free_defer(p, free);
 	}
 done:
 	*port++ = i915_request_get(last);
@@ -2341,6 +2343,12 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 			i915_request_put(*port);
 		*execlists->pending = NULL;
 	}
+
+	/*
+	 * We noticed that kmem_cache_free() may cause 1ms+ latencies, so
+	 * we defer the frees until after we have submitted the ELSP.
+	 */
+	i915_priolist_free_many(free);
 }
 
 static inline void clear_ports(struct i915_request **ports, int count)
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index cc8dcc9eb1f1..e39c80cabb92 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -179,6 +179,19 @@ void i915_priolist_free(struct i915_priolist *p)
 		kmem_cache_free(global.slab_priorities, p);
 }
 
+void i915_priolist_free_many(struct list_head *list)
+{
+	while (list) {
+		struct i915_priolist *p;
+
+		p = container_of(list, typeof(*p), requests);
+		list = p->requests.next;
+
+		GEM_BUG_ON(!p->deadline);
+		kmem_cache_free(global.slab_priorities, p);
+	}
+}
+
 static bool kick_submission(const struct intel_engine_cs *engine, u64 deadline)
 {
 	const struct intel_engine_execlists *el = &engine->execlists;
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
index c1452b98642e..f3edf08b0d00 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -49,6 +49,17 @@ struct list_head *
 i915_sched_lookup_priolist(struct intel_engine_cs *engine, u64 deadline);
 
 void i915_priolist_free(struct i915_priolist *p);
+void i915_priolist_free_many(struct list_head *list);
+
+static inline struct list_head *
+i915_priolist_free_defer(struct i915_priolist *p, struct list_head *free)
+{
+	if (p->deadline) {
+		p->requests.next = free;
+		free = &p->requests;
+	}
+	return free;
+}
 
 static inline u64 i915_sched_to_ticks(ktime_t kt)
 {
-- 
2.20.1



More information about the Intel-gfx-trybot mailing list