[PATCH 42/49] drm/i915: Allocate a status page for each timeline

Chris Wilson chris at chris-wilson.co.uk
Sun Dec 30 14:47:06 UTC 2018


Allocate a page for use as a status page by a group of timelines, as we
only need a dword of storage for each (rounded up to the cacheline for
safety) we can pack multiple timelines into the same page. Each timeline
will then be able to track its own HW seqno.

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h               |   4 +
 drivers/gpu/drm/i915/i915_gem.c               |   5 +-
 drivers/gpu/drm/i915/i915_timeline.c          | 117 +++++++++++++++++-
 drivers/gpu/drm/i915/i915_timeline.h          |  12 ++
 .../gpu/drm/i915/selftests/mock_gem_device.c  |   8 +-
 5 files changed, 137 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 384eb8f6cc1a..0549efc088fa 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1958,6 +1958,10 @@ struct drm_i915_private {
 		struct mutex timeline_lock;
 		struct list_head timelines;
 
+		/* Pack multiple timelines' seqnos into the same page */
+		struct i915_vma *timeline_hwsp;
+		u64 timeline_free;
+
 		struct list_head active_rings;
 		struct list_head closed_vma;
 		u32 active_requests;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index d1735ba4c18c..2a231ed86c92 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -5288,6 +5288,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
 		mutex_unlock(&dev_priv->drm.struct_mutex);
 	}
 
+	i915_timelines_fini(dev_priv);
 	i915_gem_drain_freed_objects(dev_priv);
 	return ret;
 }
@@ -5314,6 +5315,7 @@ void i915_gem_fini(struct drm_i915_private *dev_priv)
 
 	intel_uc_fini_misc(dev_priv);
 	i915_gem_cleanup_userptr(dev_priv);
+	i915_timelines_fini(dev_priv);
 
 	i915_gem_drain_freed_objects(dev_priv);
 
@@ -5416,8 +5418,7 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv)
 	if (!dev_priv->priorities)
 		goto err_dependencies;
 
-	mutex_init(&dev_priv->gt.timeline_lock);
-	INIT_LIST_HEAD(&dev_priv->gt.timelines);
+	i915_timelines_init(dev_priv);
 
 	INIT_LIST_HEAD(&dev_priv->gt.active_rings);
 	INIT_LIST_HEAD(&dev_priv->gt.closed_vma);
diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c
index c4e7ad179d86..64c93502f8a3 100644
--- a/drivers/gpu/drm/i915/i915_timeline.c
+++ b/drivers/gpu/drm/i915/i915_timeline.c
@@ -9,6 +9,61 @@
 #include "i915_timeline.h"
 #include "i915_syncmap.h"
 
+#define NBITS BITS_PER_TYPE(typeof(i915->gt.timeline_free))
+
+static int find_first_cacheline(struct drm_i915_private *i915)
+{
+	return find_first_bit((unsigned long *)&i915->gt.timeline_free, NBITS);
+}
+
+static void get_seqno_locked(struct drm_i915_private *i915,
+			     struct i915_timeline *timeline)
+{
+	struct i915_vma *vma;
+	void *vaddr;
+	int offset;
+
+	offset = find_first_cacheline(i915);
+	if (offset == NBITS && i915->gt.timeline_hwsp) {
+		i915_vma_put(i915->gt.timeline_hwsp);
+		i915->gt.timeline_hwsp = NULL;
+	}
+
+	vma = i915->gt.timeline_hwsp;
+	if (!vma) {
+		struct drm_i915_gem_object *bo;
+
+		BUILD_BUG_ON(NBITS * CACHELINE_BYTES > PAGE_SIZE);
+		bo = i915_gem_object_create_internal(i915, PAGE_SIZE);
+		if (!bo)
+			return;
+
+		i915_gem_object_set_cache_level(bo, I915_CACHE_LLC);
+
+		vma = i915_vma_instance(bo, &i915->ggtt.vm, NULL);
+		if (IS_ERR(vma))
+			return;
+
+		i915->gt.timeline_hwsp = vma;
+		i915->gt.timeline_free = ~0ull;
+		offset = 0;
+	}
+
+	vaddr = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
+	if (IS_ERR(vaddr))
+		return;
+
+	timeline->hwsp_ggtt = i915_vma_get(vma);
+	timeline->hwsp_offset = offset * CACHELINE_BYTES;
+
+	timeline->hwsp_seqno =
+		memset(vaddr + timeline->hwsp_offset,
+		       0,
+		       sizeof(*timeline->hwsp_seqno));
+
+	i915->gt.timeline_free &= ~BIT_ULL(offset);
+}
+
 void i915_timeline_init(struct drm_i915_private *i915,
 			struct i915_timeline *timeline,
 			const char *name)
@@ -23,8 +78,10 @@ void i915_timeline_init(struct drm_i915_private *i915,
 
 	timeline->i915 = i915;
 	timeline->name = name;
+	timeline->pin_count = 0;
 
 	mutex_lock(&i915->gt.timeline_lock);
+	get_seqno_locked(i915, timeline);
 	list_add(&timeline->link, &i915->gt.timelines);
 	mutex_unlock(&i915->gt.timeline_lock);
 
@@ -40,6 +97,12 @@ void i915_timeline_init(struct drm_i915_private *i915,
 	i915_syncmap_init(&timeline->sync);
 }
 
+void i915_timelines_init(struct drm_i915_private *i915)
+{
+	mutex_init(&i915->gt.timeline_lock);
+	INIT_LIST_HEAD(&i915->gt.timelines);
+}
+
 /**
  * i915_timelines_park - called when the driver idles
  * @i915: the drm_i915_private device
@@ -69,13 +132,22 @@ void i915_timelines_park(struct drm_i915_private *i915)
 
 void i915_timeline_fini(struct i915_timeline *timeline)
 {
+	struct drm_i915_private *i915 = timeline->i915;
+
+	GEM_BUG_ON(timeline->pin_count);
 	GEM_BUG_ON(!list_empty(&timeline->requests));
 
 	i915_syncmap_free(&timeline->sync);
 
-	mutex_lock(&timeline->i915->gt.timeline_lock);
+	mutex_lock(&i915->gt.timeline_lock);
 	list_del(&timeline->link);
-	mutex_unlock(&timeline->i915->gt.timeline_lock);
+	if (timeline->hwsp_ggtt == i915->gt.timeline_hwsp)
+		i915->gt.timeline_free |=
+			BIT_ULL(timeline->hwsp_offset / CACHELINE_BYTES);
+	mutex_unlock(&i915->gt.timeline_lock);
+
+	i915_gem_object_unpin_map(timeline->hwsp_ggtt->obj);
+	i915_vma_put(timeline->hwsp_ggtt);
 }
 
 struct i915_timeline *
@@ -93,6 +165,34 @@ i915_timeline_create(struct drm_i915_private *i915, const char *name)
 	return timeline;
 }
 
+int i915_timeline_pin(struct i915_timeline *tl)
+{
+	int err;
+
+	if (tl->pin_count++)
+		return 0;
+	GEM_BUG_ON(!tl->pin_count);
+
+	err = i915_vma_pin(tl->hwsp_ggtt, 0, 0, PIN_GLOBAL | PIN_HIGH);
+	if (err)
+		goto unpin;
+
+	return 0;
+
+unpin:
+	tl->pin_count = 0;
+	return err;
+}
+
+void i915_timeline_unpin(struct i915_timeline *tl)
+{
+	GEM_BUG_ON(!tl->pin_count);
+	if (--tl->pin_count)
+		return;
+
+	__i915_vma_unpin(tl->hwsp_ggtt);
+}
+
 void __i915_timeline_free(struct kref *kref)
 {
 	struct i915_timeline *timeline =
@@ -102,6 +202,19 @@ void __i915_timeline_free(struct kref *kref)
 	kfree(timeline);
 }
 
+void i915_timelines_fini(struct drm_i915_private *i915)
+{
+	struct i915_vma *vma;
+
+	GEM_BUG_ON(!list_empty(&i915->gt.timelines));
+
+	vma = fetch_and_zero(&i915->gt.timeline_hwsp);
+	if (vma)
+		i915_vma_put(vma);
+
+	mutex_destroy(&i915->gt.timeline_lock);
+}
+
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 #include "selftests/mock_timeline.c"
 #include "selftests/i915_timeline.c"
diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h
index c6f051541a01..597cf4e9ca7a 100644
--- a/drivers/gpu/drm/i915/i915_timeline.h
+++ b/drivers/gpu/drm/i915/i915_timeline.h
@@ -32,6 +32,8 @@
 #include "i915_syncmap.h"
 #include "i915_utils.h"
 
+struct i915_vma;
+
 struct i915_timeline {
 	u64 fence_context;
 	u32 seqno;
@@ -40,6 +42,11 @@ struct i915_timeline {
 #define TIMELINE_CLIENT 0 /* default subclass */
 #define TIMELINE_ENGINE 1
 
+	unsigned int pin_count;
+	const u32 *hwsp_seqno;
+	struct i915_vma *hwsp_ggtt;
+	u32 hwsp_offset;
+
 	/**
 	 * List of breadcrumbs associated with GPU requests currently
 	 * outstanding.
@@ -135,6 +142,11 @@ static inline bool i915_timeline_sync_is_later(struct i915_timeline *tl,
 	return __i915_timeline_sync_is_later(tl, fence->context, fence->seqno);
 }
 
+int i915_timeline_pin(struct i915_timeline *tl);
+void i915_timeline_unpin(struct i915_timeline *tl);
+
+void i915_timelines_init(struct drm_i915_private *i915);
 void i915_timelines_park(struct drm_i915_private *i915);
+void i915_timelines_fini(struct drm_i915_private *i915);
 
 #endif
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index b2292eb609f8..40f3774b2666 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -68,6 +68,8 @@ static void mock_device_release(struct drm_device *dev)
 	i915_gem_contexts_fini(i915);
 	mutex_unlock(&i915->drm.struct_mutex);
 
+	i915_timelines_fini(i915);
+
 	drain_workqueue(i915->wq);
 	i915_gem_drain_freed_objects(i915);
 
@@ -75,9 +77,6 @@ static void mock_device_release(struct drm_device *dev)
 	mock_fini_ggtt(i915);
 	mutex_unlock(&i915->drm.struct_mutex);
 
-	WARN_ON(!list_empty(&i915->gt.timelines));
-	mutex_destroy(&i915->gt.timeline_lock);
-
 	destroy_workqueue(i915->wq);
 
 	kmem_cache_destroy(i915->priorities);
@@ -229,8 +228,7 @@ struct drm_i915_private *mock_gem_device(void)
 	if (!i915->priorities)
 		goto err_dependencies;
 
-	mutex_init(&i915->gt.timeline_lock);
-	INIT_LIST_HEAD(&i915->gt.timelines);
+	i915_timelines_init(i915);
 
 	INIT_LIST_HEAD(&i915->gt.active_rings);
 	INIT_LIST_HEAD(&i915->gt.closed_vma);
-- 
2.20.1



More information about the Intel-gfx-trybot mailing list