[Intel-gfx] [PATCH 34/37] drm/i915/gt: Push the wait for the context to bound to the request

Chris Wilson chris at chris-wilson.co.uk
Wed Aug 5 12:22:28 UTC 2020


Rather than synchronously wait for the context to be bound, within the
intel_context_pin(), we can track the pending completion of the bind
fence and only submit requests along the context when signaled.

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Reviewed-by: Thomas Hellström <thomas.hellstrom at intel.com>
---
 drivers/gpu/drm/i915/Makefile              |  1 +
 drivers/gpu/drm/i915/gt/intel_context.c    | 80 +++++++++++++---------
 drivers/gpu/drm/i915/gt/intel_context.h    |  6 ++
 drivers/gpu/drm/i915/i915_active.h         |  1 -
 drivers/gpu/drm/i915/i915_request.c        |  4 ++
 drivers/gpu/drm/i915/i915_sw_fence_await.c | 62 +++++++++++++++++
 drivers/gpu/drm/i915/i915_sw_fence_await.h | 19 +++++
 7 files changed, 140 insertions(+), 33 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/i915_sw_fence_await.c
 create mode 100644 drivers/gpu/drm/i915/i915_sw_fence_await.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index a3a4c8a555ec..2cf54db8b847 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -61,6 +61,7 @@ i915-y += \
 	i915_memcpy.o \
 	i915_mm.o \
 	i915_sw_fence.o \
+	i915_sw_fence_await.o \
 	i915_sw_fence_work.o \
 	i915_syncmap.o \
 	i915_user_extensions.o
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index ff3f7580d1ca..04c2f207b11d 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -10,6 +10,7 @@
 
 #include "i915_drv.h"
 #include "i915_globals.h"
+#include "i915_sw_fence_await.h"
 
 #include "intel_context.h"
 #include "intel_engine.h"
@@ -140,31 +141,71 @@ intel_context_acquire_lock(struct intel_context *ce,
 	return 0;
 }
 
+static int await_bind(struct dma_fence_await *fence, struct i915_vma *vma)
+{
+	struct dma_fence *bind;
+	int err = 0;
+
+	bind = i915_active_fence_get(&vma->active.excl);
+	if (bind) {
+		err = i915_sw_fence_await_dma_fence(&fence->await, bind,
+						    0, GFP_KERNEL);
+		dma_fence_put(bind);
+	}
+
+	return err;
+}
+
 static int intel_context_active_locked(struct intel_context *ce)
 {
+	struct dma_fence_await *fence;
 	int err;
 
+	fence = dma_fence_await_create(GFP_KERNEL);
+	if (!fence)
+		return -ENOMEM;
+
 	err = __ring_active_locked(ce->ring);
 	if (err)
-		return err;
+		goto out_fence;
+
+	err = await_bind(fence, ce->ring->vma);
+	if (err < 0)
+		goto err_ring;
 
 	err = intel_timeline_pin_locked(ce->timeline);
 	if (err)
 		goto err_ring;
 
-	if (!ce->state)
-		return 0;
-
-	err = __context_active_locked(ce->state);
-	if (err)
+	err = await_bind(fence, ce->timeline->hwsp_ggtt);
+	if (err < 0)
 		goto err_timeline;
 
-	return 0;
+	if (ce->state) {
+		err = __context_active_locked(ce->state);
+		if (err)
+			goto err_timeline;
+
+		err = await_bind(fence, ce->state);
+		if (err < 0)
+			goto err_state;
+	}
+
+	/* Must be the last action as it *releases* the ce->active */
+	if (atomic_read(&fence->await.pending) > 1)
+		i915_active_set_exclusive(&ce->active, &fence->dma);
 
+	err = 0;
+	goto out_fence;
+
+err_state:
+	__context_retire_state(ce->state);
 err_timeline:
 	intel_timeline_unpin(ce->timeline);
 err_ring:
 	__ring_retire(ce->ring);
+out_fence:
+	i915_sw_fence_commit(&fence->await);
 	return err;
 }
 
@@ -322,27 +363,6 @@ static void intel_context_active_release(struct intel_context *ce)
 	i915_active_release(&ce->active);
 }
 
-static int __intel_context_sync(struct intel_context *ce)
-{
-	int err;
-
-	err = i915_vma_wait_for_bind(ce->ring->vma);
-	if (err)
-		return err;
-
-	err = i915_vma_wait_for_bind(ce->timeline->hwsp_ggtt);
-	if (err)
-		return err;
-
-	if (ce->state) {
-		err = i915_vma_wait_for_bind(ce->state);
-		if (err)
-			return err;
-	}
-
-	return 0;
-}
-
 int __intel_context_do_pin(struct intel_context *ce)
 {
 	int err;
@@ -368,10 +388,6 @@ int __intel_context_do_pin(struct intel_context *ce)
 	}
 
 	if (likely(!atomic_add_unless(&ce->pin_count, 1, 0))) {
-		err = __intel_context_sync(ce);
-		if (unlikely(err))
-			goto out_unlock;
-
 		err = intel_context_active_acquire(ce);
 		if (unlikely(err))
 			goto out_unlock;
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
index 07be021882cc..f48df2784a6c 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -249,4 +249,10 @@ static inline u64 intel_context_get_avg_runtime_ns(struct intel_context *ce)
 	return mul_u32_u32(ewma_runtime_read(&ce->runtime.avg), period);
 }
 
+static inline int i915_request_await_context(struct i915_request *rq,
+					     struct intel_context *ce)
+{
+	return __i915_request_await_exclusive(rq, &ce->active);
+}
+
 #endif /* __INTEL_CONTEXT_H__ */
diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h
index 6df7e721616d..43efc06dcde8 100644
--- a/drivers/gpu/drm/i915/i915_active.h
+++ b/drivers/gpu/drm/i915/i915_active.h
@@ -207,7 +207,6 @@ void i915_active_release(struct i915_active *ref);
 
 static inline void __i915_active_acquire(struct i915_active *ref)
 {
-	GEM_BUG_ON(!atomic_read(&ref->count));
 	atomic_inc(&ref->count);
 }
 
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 0208e917d14a..bb7e33b6314a 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -890,6 +890,10 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)
 	if (ret)
 		goto err_unwind;
 
+	ret = i915_request_await_context(rq, ce);
+	if (ret)
+		goto err_unwind;
+
 	rq->infix = rq->ring->emit; /* end of header; start of user payload */
 
 	intel_context_mark_active(ce);
diff --git a/drivers/gpu/drm/i915/i915_sw_fence_await.c b/drivers/gpu/drm/i915/i915_sw_fence_await.c
new file mode 100644
index 000000000000..431d324e5591
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_sw_fence_await.c
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: MIT
+/*
+ * (C) Copyright 2020 Intel Corporation
+ */
+
+#include <linux/slab.h>
+#include <linux/dma-fence.h>
+
+#include "i915_sw_fence_await.h"
+
+static int __i915_sw_fence_call
+fence_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
+{
+	struct dma_fence_await *f = container_of(fence, typeof(*f), await);
+
+	switch (state) {
+	case FENCE_COMPLETE:
+		dma_fence_signal(&f->dma);
+		break;
+
+	case FENCE_FREE:
+		dma_fence_put(&f->dma);
+		break;
+	}
+
+	return NOTIFY_DONE;
+}
+
+static const char *fence_name(struct dma_fence *fence)
+{
+	return "dma-fence-await";
+}
+
+static void fence_release(struct dma_fence *fence)
+{
+	struct dma_fence_await *f = container_of(fence, typeof(*f), dma);
+
+	i915_sw_fence_fini(&f->await);
+
+	BUILD_BUG_ON(offsetof(typeof(*f), dma));
+	dma_fence_free(&f->dma);
+}
+
+static const struct dma_fence_ops fence_ops = {
+	.get_driver_name = fence_name,
+	.get_timeline_name = fence_name,
+	.release = fence_release,
+};
+
+struct dma_fence_await *dma_fence_await_create(gfp_t gfp)
+{
+	struct dma_fence_await *f;
+
+	f = kmalloc(sizeof(*f), gfp);
+	if (!f)
+		return NULL;
+
+	i915_sw_fence_init(&f->await, fence_notify);
+	dma_fence_init(&f->dma, &fence_ops, &f->await.wait.lock, 0, 0);
+
+	return f;
+}
diff --git a/drivers/gpu/drm/i915/i915_sw_fence_await.h b/drivers/gpu/drm/i915/i915_sw_fence_await.h
new file mode 100644
index 000000000000..71882a5ed443
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_sw_fence_await.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * (C) Copyright 2020 Intel Corporation
+ */
+
+#ifndef I915_SW_FENCE_AWAIT_H
+#define I915_SW_FENCE_AWAIT_H
+
+#include <linux/dma-fence.h>
+#include <linux/slab.h>
+
+#include "i915_sw_fence.h"
+
+struct dma_fence_await {
+	struct dma_fence dma;
+	struct i915_sw_fence await;
+} *dma_fence_await_create(gfp_t gfp);
+
+#endif /* I915_SW_FENCE_AWAIT_H */
-- 
2.20.1



More information about the Intel-gfx mailing list