[PATCH 12/12] tl

Chris Wilson chris at chris-wilson.co.uk
Thu Apr 27 23:15:59 UTC 2017


---
 drivers/gpu/drm/i915/i915_gem_timeline.c           | 52 +++++++-----
 drivers/gpu/drm/i915/i915_syncmap.c                | 67 +++++++++++-----
 drivers/gpu/drm/i915/selftests/i915_gem_timeline.c | 93 +++++++++++-----------
 drivers/gpu/drm/i915/selftests/mock_timeline.c     | 27 +++----
 drivers/gpu/drm/i915/selftests/mock_timeline.h     |  4 +-
 5 files changed, 135 insertions(+), 108 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.c b/drivers/gpu/drm/i915/i915_gem_timeline.c
index 8c9f403e804c..4b3933a1fba5 100644
--- a/drivers/gpu/drm/i915/i915_gem_timeline.c
+++ b/drivers/gpu/drm/i915/i915_gem_timeline.c
@@ -24,6 +24,31 @@
 
 #include "i915_drv.h"
 
+static void __intel_timeline_init(struct intel_timeline *tl,
+				  struct i915_gem_timeline *parent,
+				  u64 context,
+				  struct lock_class_key *lockclass,
+				  const char *lockname)
+{
+	tl->fence_context = context;
+	tl->common = parent;
+#ifdef CONFIG_DEBUG_SPINLOCK
+	__raw_spin_lock_init(&tl->lock.rlock, lockname, lockclass);
+#else
+	spin_lock_init(&tl->lock);
+#endif
+	init_request_active(&tl->last_request, NULL);
+	INIT_LIST_HEAD(&tl->requests);
+	i915_syncmap_init(&tl->sync);
+}
+
+static void __intel_timeline_fini(struct intel_timeline *tl)
+{
+	GEM_BUG_ON(!list_empty(&tl->requests));
+
+	i915_syncmap_free(&tl->sync);
+}
+
 static int __i915_gem_timeline_init(struct drm_i915_private *i915,
 				    struct i915_gem_timeline *timeline,
 				    const char *name,
@@ -49,20 +74,10 @@ static int __i915_gem_timeline_init(struct drm_i915_private *i915,
 
 	/* Called during early_init before we know how many engines there are */
 	fences = dma_fence_context_alloc(ARRAY_SIZE(timeline->engine));
-	for (i = 0; i < ARRAY_SIZE(timeline->engine); i++) {
-		struct intel_timeline *tl = &timeline->engine[i];
-
-		tl->fence_context = fences++;
-		tl->common = timeline;
-#ifdef CONFIG_DEBUG_SPINLOCK
-		__raw_spin_lock_init(&tl->lock.rlock, lockname, lockclass);
-#else
-		spin_lock_init(&tl->lock);
-#endif
-		init_request_active(&tl->last_request, NULL);
-		INIT_LIST_HEAD(&tl->requests);
-		i915_syncmap_init(&tl->sync);
-	}
+	for (i = 0; i < ARRAY_SIZE(timeline->engine); i++)
+		__intel_timeline_init(&timeline->engine[i],
+				      timeline, fences++,
+				      lockclass, lockname);
 
 	return 0;
 }
@@ -123,13 +138,8 @@ void i915_gem_timeline_fini(struct i915_gem_timeline *timeline)
 
 	lockdep_assert_held(&timeline->i915->drm.struct_mutex);
 
-	for (i = 0; i < ARRAY_SIZE(timeline->engine); i++) {
-		struct intel_timeline *tl = &timeline->engine[i];
-
-		GEM_BUG_ON(!list_empty(&tl->requests));
-
-		i915_syncmap_free(&tl->sync);
-	}
+	for (i = 0; i < ARRAY_SIZE(timeline->engine); i++)
+		__intel_timeline_fini(&timeline->engine[i]);
 
 	list_del(&timeline->link);
 	kfree(timeline->name);
diff --git a/drivers/gpu/drm/i915/i915_syncmap.c b/drivers/gpu/drm/i915/i915_syncmap.c
index cc597fdcff51..395d10eaa7f9 100644
--- a/drivers/gpu/drm/i915/i915_syncmap.c
+++ b/drivers/gpu/drm/i915/i915_syncmap.c
@@ -84,9 +84,20 @@ static inline unsigned int __sync_idx(const struct i915_syncmap *p, u64 id)
 	return (id >> p->height) & MASK;
 }
 
-static inline bool seqno_passed(u32 a, u32 b)
+static inline u64 __sync_prefix(const struct i915_syncmap *p, u64 id)
 {
-	return (s32)(b - a) < 0;
+	return id >> p->height >> SHIFT;
+}
+
+static inline u64 __sync_leaf(const struct i915_syncmap *p, u64 id)
+{
+	GEM_BUG_ON(p->height);
+	return id >> SHIFT;
+}
+
+static inline bool seqno_later(u32 a, u32 b)
+{
+	return (s32)(a - b) >= 0;
 }
 
 /** i915_syncmap_is_later -- compare against the last know sync point
@@ -111,7 +122,7 @@ bool i915_syncmap_is_later(struct i915_syncmap **root, u64 id, u32 seqno)
 	if (!p)
 		return false;
 
-	if (likely((id >> SHIFT) == p->prefix))
+	if (likely(__sync_leaf(p, id) == p->prefix))
 		goto found;
 
 	/* First climb the tree back to a parent branch */
@@ -120,7 +131,7 @@ bool i915_syncmap_is_later(struct i915_syncmap **root, u64 id, u32 seqno)
 		if (!p)
 			return false;
 
-		if ((id >> p->height >> SHIFT) == p->prefix)
+		if (__sync_prefix(p, id) == p->prefix)
 			break;
 	} while (1);
 
@@ -133,7 +144,7 @@ bool i915_syncmap_is_later(struct i915_syncmap **root, u64 id, u32 seqno)
 		if (!p)
 			return false;
 
-		if ((id >> p->height >> SHIFT) != p->prefix)
+		if (__sync_prefix(p, id) != p->prefix)
 			return false;
 	} while (1);
 
@@ -143,7 +154,23 @@ bool i915_syncmap_is_later(struct i915_syncmap **root, u64 id, u32 seqno)
 	if (!(p->bitmap & BIT(idx)))
 		return false;
 
-	return seqno_passed(__sync_seqno(p)[idx], seqno);
+	return seqno_later(__sync_seqno(p)[idx], seqno);
+}
+
+static struct i915_syncmap *
+__sync_alloc_leaf(struct i915_syncmap *parent, u64 id)
+{
+	struct i915_syncmap *p;
+
+	p = kmalloc(sizeof(*p) + NSYNCMAP * sizeof(u32), GFP_KERNEL);
+	if (unlikely(!p))
+		return NULL;
+
+	p->parent = parent;
+	p->height = 0;
+	p->bitmap = 0;
+	p->prefix = __sync_leaf(p, id);
+	return p;
 }
 
 static noinline int
@@ -153,11 +180,10 @@ __i915_syncmap_set(struct i915_syncmap **root, u64 id, u32 seqno)
 	unsigned int idx;
 
 	if (!p) {
-		p = kzalloc(sizeof(*p) + NSYNCMAP * sizeof(seqno), GFP_KERNEL);
+		p = __sync_alloc_leaf(NULL, id);
 		if (unlikely(!p))
 			return -ENOMEM;
 
-		p->prefix = id >> SHIFT;
 		goto found;
 	}
 
@@ -168,7 +194,7 @@ __i915_syncmap_set(struct i915_syncmap **root, u64 id, u32 seqno)
 
 		p = p->parent;
 
-		if ((id >> p->height >> SHIFT) == p->prefix)
+		if (__sync_prefix(p, id) == p->prefix)
 			break;
 	} while (1);
 
@@ -195,22 +221,25 @@ __i915_syncmap_set(struct i915_syncmap **root, u64 id, u32 seqno)
 	do {
 		struct i915_syncmap *next;
 
-		if ((id >> p->height >> SHIFT) != p->prefix) {
+		if (__sync_prefix(p, id) != p->prefix) {
+			unsigned int above;
+
 			/* insert a join above the current layer */
 			next = kzalloc(sizeof(*next) + NSYNCMAP * sizeof(next),
 				       GFP_KERNEL);
 			if (unlikely(!next))
 				return -ENOMEM;
 
-			next->height = ALIGN(fls64((id >> p->height >> SHIFT) ^ p->prefix),
-					    SHIFT) + p->height;
-			next->prefix = id >> next->height >> SHIFT;
+			above = fls64(__sync_prefix(p, id) ^ p->prefix);
+			above = round_up(above, SHIFT);
+			next->height = above + p->height;
+			next->prefix = __sync_prefix(next, id);
 
 			if (p->parent)
 				__sync_child(p->parent)[__sync_idx(p->parent, id)] = next;
 			next->parent = p->parent;
 
-			idx = p->prefix >> (next->height - p->height - SHIFT) & MASK;
+			idx = p->prefix >> (above - SHIFT) & MASK;
 			__sync_child(next)[idx] = p;
 			next->bitmap |= BIT(idx);
 			p->parent = next;
@@ -227,15 +256,12 @@ __i915_syncmap_set(struct i915_syncmap **root, u64 id, u32 seqno)
 		idx = __sync_idx(p, id);
 		next = __sync_child(p)[idx];
 		if (unlikely(!next)) {
-			next = kzalloc(sizeof(*next) + NSYNCMAP * sizeof(seqno),
-				       GFP_KERNEL);
+			next = __sync_alloc_leaf(p, id);
 			if (unlikely(!next))
 				return -ENOMEM;
 
 			__sync_child(p)[idx] = next;
 			p->bitmap |= BIT(idx);
-			next->parent = p;
-			next->prefix = id >> SHIFT;
 
 			p = next;
 			break;
@@ -245,8 +271,7 @@ __i915_syncmap_set(struct i915_syncmap **root, u64 id, u32 seqno)
 	} while (1);
 
 found:
-	GEM_BUG_ON(p->height);
-	GEM_BUG_ON(p->prefix != id >> SHIFT);
+	GEM_BUG_ON(p->prefix != __sync_leaf(p, id));
 	idx = id & MASK;
 	__sync_seqno(p)[idx] = seqno;
 	p->bitmap |= BIT(idx);
@@ -271,7 +296,7 @@ int i915_syncmap_set(struct i915_syncmap **root, u64 id, u32 seqno)
 	/* We expect to be called in sequence following a  _get(id), which
 	 * should have preloaded the tl->sync hint for us.
 	 */
-	if (likely(p && (id >> SHIFT) == p->prefix)) {
+	if (likely(p && __sync_leaf(p, id) == p->prefix)) {
 		unsigned int idx = id & MASK;
 
 		__sync_seqno(p)[idx] = seqno;
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_timeline.c b/drivers/gpu/drm/i915/selftests/i915_gem_timeline.c
index 35f7cdd6d6bb..2058e754c86d 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_timeline.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_timeline.c
@@ -30,7 +30,6 @@
 
 static int igt_sync(void *arg)
 {
-	struct drm_i915_private *i915 = arg;
 	const struct {
 		const char *name;
 		u32 seqno;
@@ -54,16 +53,14 @@ static int igt_sync(void *arg)
 		{ "unwrap", UINT_MAX, true, false },
 		{},
 	}, *p;
-	struct i915_gem_timeline *timeline;
 	struct intel_timeline *tl;
 	int order, offset;
 	int ret;
 
-	timeline = mock_timeline(i915);
-	if (!timeline)
+	tl = mock_timeline(0);
+	if (!tl)
 		return -ENOMEM;
 
-	tl = &timeline->engine[RCS];
 	for (p = pass; p->name; p++) {
 		for (order = 1; order < 64; order++) {
 			for (offset = -1; offset <= (order > 1); offset++) {
@@ -85,8 +82,12 @@ static int igt_sync(void *arg)
 			}
 		}
 	}
+	mock_timeline_destroy(tl);
+
+	tl = mock_timeline(0);
+	if (!tl)
+		return -ENOMEM;
 
-	tl = &timeline->engine[BCS];
 	for (order = 1; order < 64; order++) {
 		for (offset = -1; offset <= (order > 1); offset++) {
 			u64 ctx = BIT_ULL(order) + offset;
@@ -110,7 +111,7 @@ static int igt_sync(void *arg)
 	}
 
 out:
-	mock_timeline_destroy(timeline);
+	mock_timeline_destroy(tl);
 	return ret;
 }
 
@@ -132,23 +133,18 @@ static unsigned int random_engine(struct rnd_state *rnd)
 
 static int bench_sync(void *arg)
 {
-	struct drm_i915_private *i915 = arg;
 	struct rnd_state prng;
-	struct i915_gem_timeline *timeline;
 	struct intel_timeline *tl;
 	unsigned long end_time, count;
 	ktime_t kt;
-	int ret;
 
-	timeline = mock_timeline(i915);
-	if (!timeline)
+	tl = mock_timeline(0);
+	if (!tl)
 		return -ENOMEM;
 
-	tl = &timeline->engine[0];
-
 	prandom_seed_state(&prng, i915_selftest.random_seed);
 	count = 0;
-	kt = -ktime_get();
+	kt = ktime_get();
 	end_time = jiffies + HZ/10;
 	do {
 		u64 id = prandom_u64_state(&prng);
@@ -156,54 +152,64 @@ static int bench_sync(void *arg)
 		__intel_timeline_sync_set(tl, id, 0);
 		count++;
 	} while (!time_after(jiffies, end_time));
-	kt = ktime_add(ktime_get(), kt);
+	kt = ktime_sub(ktime_get(), kt);
 	pr_info("%s: %lu random insertions, %lluns/insert\n",
 		__func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
 
 	prandom_seed_state(&prng, i915_selftest.random_seed);
 	end_time = count;
-	kt = -ktime_get();
+	kt = ktime_get();
 	while (end_time--) {
 		u64 id = prandom_u64_state(&prng);
 
 		if (!__intel_timeline_sync_is_later(tl, id, 0)) {
+			mock_timeline_destroy(tl);
 			pr_err("Lookup of %llu failed\n", id);
-			ret = -EINVAL;
-			goto out;
+			return -EINVAL;
 		}
 	}
-	kt = ktime_add(ktime_get(), kt);
+	kt = ktime_sub(ktime_get(), kt);
 	pr_info("%s: %lu random lookups, %lluns/lookup\n",
 		__func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
 
-	tl = &timeline->engine[1];
+	mock_timeline_destroy(tl);
+
+	tl = mock_timeline(0);
+	if (!tl)
+		return -ENOMEM;
+
 	count = 0;
-	kt = -ktime_get();
+	kt = ktime_get();
 	end_time = jiffies + HZ/10;
 	do {
 		__intel_timeline_sync_set(tl, count++, 0);
 	} while (!time_after(jiffies, end_time));
-	kt = ktime_add(ktime_get(), kt);
+	kt = ktime_sub(ktime_get(), kt);
 	pr_info("%s: %lu in-order insertions, %lluns/insert\n",
 		__func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
 
 	end_time = count;
-	kt = -ktime_get();
+	kt = ktime_get();
 	while (end_time--) {
 		if (!__intel_timeline_sync_is_later(tl, end_time, 0)) {
 			pr_err("Lookup of %lu failed\n", end_time);
-			ret = -EINVAL;
-			goto out;
+			mock_timeline_destroy(tl);
+			return -EINVAL;
 		}
 	}
-	kt = ktime_add(ktime_get(), kt);
+	kt = ktime_sub(ktime_get(), kt);
 	pr_info("%s: %lu in-order lookups, %lluns/lookup\n",
 		__func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
 
+	mock_timeline_destroy(tl);
+
+	tl = mock_timeline(0);
+	if (!tl)
+		return -ENOMEM;
+
 	prandom_seed_state(&prng, i915_selftest.random_seed);
-	tl = &timeline->engine[2];
 	count = 0;
-	kt = -ktime_get();
+	kt = ktime_get();
 	end_time = jiffies + HZ/10;
 	do {
 		u32 id = random_engine(&prng);
@@ -214,13 +220,17 @@ static int bench_sync(void *arg)
 
 		count++;
 	} while (!time_after(jiffies, end_time));
-	kt = ktime_add(ktime_get(), kt);
+	kt = ktime_sub(ktime_get(), kt);
 	pr_info("%s: %lu repeated insert/lookups, %lluns/op\n",
 		__func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
+	mock_timeline_destroy(tl);
+
+	tl = mock_timeline(0);
+	if (!tl)
+		return -ENOMEM;
 
-	tl = &timeline->engine[3];
 	count = 0;
-	kt = -ktime_get();
+	kt = ktime_get();
 	end_time = jiffies + HZ/10;
 	do {
 		if (!__intel_timeline_sync_is_later(tl, count & 7, count >> 4))
@@ -228,14 +238,12 @@ static int bench_sync(void *arg)
 
 		count++;
 	} while (!time_after(jiffies, end_time));
-	kt = ktime_add(ktime_get(), kt);
+	kt = ktime_sub(ktime_get(), kt);
 	pr_info("%s: %lu cyclic insert/lookups, %lluns/op\n",
 		__func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
+	mock_timeline_destroy(tl);
 
-	ret = 0;
-out:
-	mock_timeline_destroy(timeline);
-	return ret;
+	return 0;
 }
 
 int i915_gem_timeline_mock_selftests(void)
@@ -244,15 +252,6 @@ int i915_gem_timeline_mock_selftests(void)
 		SUBTEST(igt_sync),
 		SUBTEST(bench_sync),
 	};
-	struct drm_i915_private *i915;
-	int err;
-
-	i915 = mock_gem_device();
-	if (!i915)
-		return -ENOMEM;
-
-	err = i915_subtests(tests, i915);
-	drm_dev_unref(&i915->drm);
 
-	return err;
+	return i915_subtests(tests, NULL);
 }
diff --git a/drivers/gpu/drm/i915/selftests/mock_timeline.c b/drivers/gpu/drm/i915/selftests/mock_timeline.c
index e8d62f5f6ed3..47b1f47c5812 100644
--- a/drivers/gpu/drm/i915/selftests/mock_timeline.c
+++ b/drivers/gpu/drm/i915/selftests/mock_timeline.c
@@ -24,29 +24,22 @@
 
 #include "mock_timeline.h"
 
-struct i915_gem_timeline *
-mock_timeline(struct drm_i915_private *i915)
+struct intel_timeline *mock_timeline(u64 context)
 {
-	struct i915_gem_timeline *timeline;
+	static struct lock_class_key class;
+	struct intel_timeline *tl;
 
-	timeline = kzalloc(sizeof(*timeline), GFP_KERNEL);
-	if (!timeline)
+	tl = kzalloc(sizeof(*tl), GFP_KERNEL);
+	if (!tl)
 		return NULL;
 
-	mutex_lock(&i915->drm.struct_mutex);
-	i915_gem_timeline_init(i915, timeline, "mock");
-	mutex_unlock(&i915->drm.struct_mutex);
+	__intel_timeline_init(tl, NULL, context, &class, "mock");
 
-	return timeline;
+	return tl;
 }
 
-void mock_timeline_destroy(struct i915_gem_timeline *timeline)
+void mock_timeline_destroy(struct intel_timeline *tl)
 {
-	struct drm_i915_private *i915 = timeline->i915;
-
-	mutex_lock(&i915->drm.struct_mutex);
-	i915_gem_timeline_fini(timeline);
-	mutex_unlock(&i915->drm.struct_mutex);
-
-	kfree(timeline);
+	__intel_timeline_fini(tl);
+	kfree(tl);
 }
diff --git a/drivers/gpu/drm/i915/selftests/mock_timeline.h b/drivers/gpu/drm/i915/selftests/mock_timeline.h
index b33dcd2151ef..c27ff4639b8b 100644
--- a/drivers/gpu/drm/i915/selftests/mock_timeline.h
+++ b/drivers/gpu/drm/i915/selftests/mock_timeline.h
@@ -27,7 +27,7 @@
 
 #include "../i915_gem_timeline.h"
 
-struct i915_gem_timeline *mock_timeline(struct drm_i915_private *i915);
-void mock_timeline_destroy(struct i915_gem_timeline *timeline);
+struct intel_timeline *mock_timeline(u64 context);
+void mock_timeline_destroy(struct intel_timeline *tl);
 
 #endif /* !__MOCK_TIMELINE__ */
-- 
2.11.0



More information about the Intel-gfx-trybot mailing list