[Intel-gfx] [PATCH 3/3] drm/i915: Squash repeated awaits on the same fence
Chris Wilson
chris at chris-wilson.co.uk
Sat Apr 8 16:26:03 UTC 2017
Track the latest fence waited upon on each context, and only add a new
asynchronous wait if the new fence is more recent than the recorded
fence for that context. This requires us to filter out unordered
timelines, which are noted by DMA_FENCE_NO_CONTEXT.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
---
drivers/gpu/drm/i915/i915_gem_request.c | 33 +++++++++++++++++++++++++++++++++
drivers/gpu/drm/i915/i915_gem_request.h | 2 ++
lib/radix-tree.c | 1 +
3 files changed, 36 insertions(+)
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 313cdff7c6dd..c184f1d26f25 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -606,6 +606,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
i915_priotree_init(&req->priotree);
+ INIT_RADIX_TREE(&req->waits, GFP_KERNEL);
INIT_LIST_HEAD(&req->active_list);
req->i915 = dev_priv;
req->engine = engine;
@@ -723,6 +724,27 @@ i915_gem_request_await_dma_fence(struct drm_i915_gem_request *req,
if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
return 0;
+ /* Squash repeated waits to the same timelines, picking the latest */
+ if (fence->context != DMA_FENCE_NO_CONTEXT) {
+ void __rcu **slot;
+
+ slot = radix_tree_lookup_slot(&req->waits, fence->context);
+ if (!slot) {
+ ret = radix_tree_insert(&req->waits,
+ fence->context, fence);
+ if (ret)
+ return ret;
+ } else {
+ struct dma_fence *old =
+ rcu_dereference_protected(*slot, true);
+
+ if (!dma_fence_is_later(fence, old))
+ return 0;
+
+ radix_tree_replace_slot(&req->waits, slot, fence);
+ }
+ }
+
if (dma_fence_is_i915(fence))
return i915_gem_request_await_request(req, to_request(fence));
@@ -843,6 +865,15 @@ static void i915_gem_mark_busy(const struct intel_engine_cs *engine)
round_jiffies_up_relative(HZ));
}
+static void free_radixtree(struct radix_tree_root *root)
+{
+ struct radix_tree_iter iter;
+ void __rcu **slot;
+
+ radix_tree_for_each_slot(slot, root, &iter, 0)
+ radix_tree_iter_delete(root, &iter, slot);
+}
+
/*
* NB: This function is not allowed to fail. Doing so would mean the the
* request is not being tracked for completion but the work itself is
@@ -943,6 +974,8 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
local_bh_disable();
i915_sw_fence_commit(&request->submit);
local_bh_enable(); /* Kick the execlists tasklet if just scheduled */
+
+ free_radixtree(&request->waits);
}
static unsigned long local_clock_us(unsigned int *cpu)
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index a211c53c813f..638899b9c170 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -137,6 +137,8 @@ struct drm_i915_gem_request {
struct i915_priotree priotree;
struct i915_dependency dep;
+ struct radix_tree_root waits;
+
/** GEM sequence number associated with this request on the
* global execution timeline. It is zero when the request is not
* on the HW queue (i.e. not on the engine timeline list).
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 691a9ad48497..84cccf7138c4 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -2022,6 +2022,7 @@ void radix_tree_iter_delete(struct radix_tree_root *root,
if (__radix_tree_delete(root, iter->node, slot))
iter->index = iter->next_index;
}
+EXPORT_SYMBOL(radix_tree_iter_delete);
/**
* radix_tree_delete_item - delete an item from a radix tree
--
2.11.0
More information about the Intel-gfx
mailing list