[PATCH 3/3] common-tl-sync
Chris Wilson
chris at chris-wilson.co.uk
Sat Apr 8 21:42:08 UTC 2017
---
drivers/gpu/drm/i915/i915_gem_request.c | 71 ++++++++++++++++++++------------
drivers/gpu/drm/i915/i915_gem_timeline.c | 6 +++
drivers/gpu/drm/i915/i915_gem_timeline.h | 3 ++
3 files changed, 54 insertions(+), 26 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index fc7bdbe3bf94..3d96568a8152 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -717,9 +717,7 @@ int
i915_gem_request_await_dma_fence(struct drm_i915_gem_request *req,
struct dma_fence *fence)
{
- struct dma_fence_array *array;
int ret;
- int i;
if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
return 0;
@@ -727,6 +725,11 @@ i915_gem_request_await_dma_fence(struct drm_i915_gem_request *req,
/* Squash repeated waits to the same timelines, picking the latest */
if (fence->context != req->i915->mm.unordered_timeline) {
void __rcu **slot;
+ void *p_seqno;
+
+ p_seqno = radix_tree_lookup(&req->timeline->sync, fence->context);
+ if ((int)((u32)(uintptr_t)p_seqno - fence->seqno) >= 0)
+ return 0;
slot = radix_tree_lookup_slot(&req->waits, fence->context);
if (!slot) {
@@ -745,37 +748,53 @@ i915_gem_request_await_dma_fence(struct drm_i915_gem_request *req,
}
}
- if (dma_fence_is_i915(fence))
- return i915_gem_request_await_request(req, to_request(fence));
-
- if (!dma_fence_is_array(fence)) {
+ if (dma_fence_is_i915(fence)) {
+ ret = i915_gem_request_await_request(req, to_request(fence));
+ if (ret < 0)
+ return ret;
+ } else if (!dma_fence_is_array(fence)) {
ret = i915_sw_fence_await_dma_fence(&req->submit,
fence, I915_FENCE_TIMEOUT,
GFP_KERNEL);
- return ret < 0 ? ret : 0;
- }
+ if (ret < 0)
+ return ret;
+ } else {
+ struct dma_fence_array *array = to_dma_fence_array(fence);
+ int i;
+
+ /* Note that if the fence-array was created in signal-on-any mode,
+ * we should *not* decompose it into its individual fences. However,
+ * we don't currently store which mode the fence-array is operating
+ * in. Fortunately, the only user of signal-on-any is private to
+ * amdgpu and we should not see any incoming fence-array from
+ * sync-file being in signal-on-any mode.
+ */
- /* Note that if the fence-array was created in signal-on-any mode,
- * we should *not* decompose it into its individual fences. However,
- * we don't currently store which mode the fence-array is operating
- * in. Fortunately, the only user of signal-on-any is private to
- * amdgpu and we should not see any incoming fence-array from
- * sync-file being in signal-on-any mode.
- */
+ for (i = 0; i < array->num_fences; i++) {
+ struct dma_fence *child = array->fences[i];
- array = to_dma_fence_array(fence);
- for (i = 0; i < array->num_fences; i++) {
- struct dma_fence *child = array->fences[i];
+ if (dma_fence_is_i915(child))
+ ret = i915_gem_request_await_request(req,
+ to_request(child));
+ else
+ ret = i915_sw_fence_await_dma_fence(&req->submit,
+ child, I915_FENCE_TIMEOUT,
+ GFP_KERNEL);
+ if (ret < 0)
+ return ret;
+ }
+ }
+
+ if (fence->context != req->i915->mm.unordered_timeline) {
+ struct radix_tree_root *root = &req->timeline->sync;
+ void *p_seqno = (void *)(uintptr_t)fence->seqno;
+ void __rcu **slot;
- if (dma_fence_is_i915(child))
- ret = i915_gem_request_await_request(req,
- to_request(child));
+ slot = radix_tree_lookup_slot(root, fence->context);
+ if (slot)
+ radix_tree_replace_slot(root, slot, p_seqno);
else
- ret = i915_sw_fence_await_dma_fence(&req->submit,
- child, I915_FENCE_TIMEOUT,
- GFP_KERNEL);
- if (ret < 0)
- return ret;
+ radix_tree_insert(root, fence->context, p_seqno);
}
return 0;
diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.c b/drivers/gpu/drm/i915/i915_gem_timeline.c
index b596ca7ee058..e295aaebb8ea 100644
--- a/drivers/gpu/drm/i915/i915_gem_timeline.c
+++ b/drivers/gpu/drm/i915/i915_gem_timeline.c
@@ -56,6 +56,7 @@ static int __i915_gem_timeline_init(struct drm_i915_private *i915,
#endif
init_request_active(&tl->last_request, NULL);
INIT_LIST_HEAD(&tl->requests);
+ INIT_RADIX_TREE(&tl->sync, GFP_KERNEL);
}
return 0;
@@ -89,6 +90,11 @@ void i915_gem_timeline_fini(struct i915_gem_timeline *timeline)
for (i = 0; i < ARRAY_SIZE(timeline->engine); i++) {
struct intel_timeline *tl = &timeline->engine[i];
+ struct radix_tree_iter iter;
+ void __rcu **slot;
+
+ radix_tree_for_each_slot(slot, &tl->sync, &iter, 0)
+ radix_tree_iter_delete(&tl->sync, &iter, slot);
GEM_BUG_ON(!list_empty(&tl->requests));
}
diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.h b/drivers/gpu/drm/i915/i915_gem_timeline.h
index 6c53e14cab2a..db0443e3ff22 100644
--- a/drivers/gpu/drm/i915/i915_gem_timeline.h
+++ b/drivers/gpu/drm/i915/i915_gem_timeline.h
@@ -26,6 +26,7 @@
#define I915_GEM_TIMELINE_H
#include <linux/list.h>
+#include <linux/radix-tree.h>
#include "i915_gem_request.h"
@@ -55,6 +56,8 @@ struct intel_timeline {
* struct_mutex.
*/
struct i915_gem_active last_request;
+ struct radix_tree_root sync;
+
u32 sync_seqno[I915_NUM_ENGINES];
struct i915_gem_timeline *common;
--
2.11.0
More information about the Intel-gfx-trybot
mailing list