[PATCH 2/3] drm/i915: Squash repeated awaits on the same fence

Chris Wilson chris at chris-wilson.co.uk
Sat Apr 8 21:42:07 UTC 2017


Track the latest fence waited upon on each context, and only add a new
asynchronous wait if the new fence is more recent than the recorded
fence for that context. This requires us to filter out unordered
timelines, which are noted by DMA_FENCE_NO_CONTEXT. However, in the
absence of a universal identifier, we have to use our own
i915->mm.unordered_timeline token.

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem_request.c | 33 +++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_gem_request.h |  2 ++
 lib/radix-tree.c                        |  1 +
 3 files changed, 36 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 313cdff7c6dd..fc7bdbe3bf94 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -606,6 +606,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 
 	i915_priotree_init(&req->priotree);
 
+	INIT_RADIX_TREE(&req->waits, GFP_KERNEL);
 	INIT_LIST_HEAD(&req->active_list);
 	req->i915 = dev_priv;
 	req->engine = engine;
@@ -723,6 +724,27 @@ i915_gem_request_await_dma_fence(struct drm_i915_gem_request *req,
 	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
 		return 0;
 
+	/* Squash repeated waits to the same timelines, picking the latest */
+	if (fence->context != req->i915->mm.unordered_timeline) {
+		void __rcu **slot;
+
+		slot = radix_tree_lookup_slot(&req->waits, fence->context);
+		if (!slot) {
+			ret = radix_tree_insert(&req->waits,
+						fence->context, fence);
+			if (ret)
+				return ret;
+		} else {
+			struct dma_fence *old =
+				rcu_dereference_protected(*slot, true);
+
+			if (!dma_fence_is_later(fence, old))
+				return 0;
+
+			radix_tree_replace_slot(&req->waits, slot, fence);
+		}
+	}
+
 	if (dma_fence_is_i915(fence))
 		return i915_gem_request_await_request(req, to_request(fence));
 
@@ -843,6 +865,15 @@ static void i915_gem_mark_busy(const struct intel_engine_cs *engine)
 			   round_jiffies_up_relative(HZ));
 }
 
+static void free_radixtree(struct radix_tree_root *root)
+{
+	struct radix_tree_iter iter;
+	void __rcu **slot;
+
+	radix_tree_for_each_slot(slot, root, &iter, 0)
+		radix_tree_iter_delete(root, &iter, slot);
+}
+
 /*
  * NB: This function is not allowed to fail. Doing so would mean the the
  * request is not being tracked for completion but the work itself is
@@ -943,6 +974,8 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
 	local_bh_disable();
 	i915_sw_fence_commit(&request->submit);
 	local_bh_enable(); /* Kick the execlists tasklet if just scheduled */
+
+	free_radixtree(&request->waits);
 }
 
 static unsigned long local_clock_us(unsigned int *cpu)
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index a211c53c813f..638899b9c170 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -137,6 +137,8 @@ struct drm_i915_gem_request {
 	struct i915_priotree priotree;
 	struct i915_dependency dep;
 
+	struct radix_tree_root waits;
+
 	/** GEM sequence number associated with this request on the
 	 * global execution timeline. It is zero when the request is not
 	 * on the HW queue (i.e. not on the engine timeline list).
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 691a9ad48497..84cccf7138c4 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -2022,6 +2022,7 @@ void radix_tree_iter_delete(struct radix_tree_root *root,
 	if (__radix_tree_delete(root, iter->node, slot))
 		iter->index = iter->next_index;
 }
+EXPORT_SYMBOL(radix_tree_iter_delete);
 
 /**
  * radix_tree_delete_item - delete an item from a radix tree
-- 
2.11.0



More information about the Intel-gfx-trybot mailing list