[PATCH 37/72] rt-active
Chris Wilson
chris at chris-wilson.co.uk
Tue Feb 6 20:57:37 UTC 2018
---
drivers/gpu/drm/i915/i915_drv.h | 5 +-
drivers/gpu/drm/i915/i915_gem_execbuffer.c | 85 ------------------
drivers/gpu/drm/i915/i915_gpu_error.c | 14 +--
drivers/gpu/drm/i915/i915_vma.c | 137 ++++++++++++++++++++++++-----
drivers/gpu/drm/i915/i915_vma.h | 41 +++------
5 files changed, 128 insertions(+), 154 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 10d77e831830..3b198567981b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -603,7 +603,7 @@ struct i915_gpu_state {
struct drm_i915_error_buffer {
u32 size;
u32 name;
- u32 rseqno[I915_NUM_ENGINES], wseqno;
+ u32 wseqno;
u64 gtt_offset;
u32 read_domains;
u32 write_domain;
@@ -3302,9 +3302,6 @@ i915_gem_obj_finish_shmem_access(struct drm_i915_gem_object *obj)
}
int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
-int __must_check i915_vma_move_to_active(struct i915_vma *vma,
- struct drm_i915_gem_request *req,
- unsigned int flags);
int i915_gem_dumb_create(struct drm_file *file_priv,
struct drm_device *dev,
struct drm_mode_create_dumb *args);
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index e1ca67c995c9..5cf9e3f78f14 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1734,7 +1734,6 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
unsigned int flags = eb->flags[i];
struct i915_vma *vma = eb->vma[i];
struct drm_i915_gem_object *obj = vma->obj;
- struct drm_i915_gem_request *order;
if (flags & EXEC_OBJECT_CAPTURE) {
struct i915_gem_capture_list *capture;
@@ -1765,29 +1764,6 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
flags &= ~EXEC_OBJECT_ASYNC;
}
- /*
- * XXX As we allow multiple queues to share the vma, but
- * with different timelines, yet we rely on a single
- * timeline through the vm (for activity tracking
- * see i915_vma_move_to_active()/i915_vma_retire()) we impose
- * that ordering constraint on the different timelines here.
- *
- * Note that this ordering constraint is undesirable as we
- * want to keep our weakly ordered reads through the GEM
- * interface. That will require us to be able to track
- * multiple timelines (lifting the current limit of one
- * per engine), like struct reservation_object but coupled
- * into our activity tracking.
- */
- order = i915_gem_active_peek(&vma->last_read[eb->engine->id],
- &eb->i915->drm.struct_mutex);
- if (order) {
- err = i915_gem_request_await_dma_fence(eb->request,
- &order->fence);
- if (err)
- return err;
- }
-
if (flags & EXEC_OBJECT_ASYNC)
continue;
@@ -1845,67 +1821,6 @@ static bool i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
return true;
}
-static void export_fence(struct i915_vma *vma,
- struct drm_i915_gem_request *req,
- unsigned int flags)
-{
- struct reservation_object *resv = vma->resv;
-
- /*
- * Ignore errors from failing to allocate the new fence, we can't
- * handle an error right now. Worst case should be missed
- * synchronisation leading to rendering corruption.
- */
- reservation_object_lock(resv, NULL);
- if (flags & EXEC_OBJECT_WRITE)
- reservation_object_add_excl_fence(resv, &req->fence);
- else if (reservation_object_reserve_shared(resv) == 0)
- reservation_object_add_shared_fence(resv, &req->fence);
- reservation_object_unlock(resv);
-}
-
-int i915_vma_move_to_active(struct i915_vma *vma,
- struct drm_i915_gem_request *req,
- unsigned int flags)
-{
- struct drm_i915_gem_object *obj = vma->obj;
- const unsigned int idx = req->engine->id;
-
- lockdep_assert_held(&req->i915->drm.struct_mutex);
- GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
-
- /*
- * Add a reference if we're newly entering the active list.
- * The order in which we add operations to the retirement queue is
- * vital here: mark_active adds to the start of the callback list,
- * such that subsequent callbacks are called first. Therefore we
- * add the active reference first and queue for it to be dropped
- * *last*.
- */
- if (!i915_vma_is_active(vma))
- obj->active_count++;
- i915_vma_set_active(vma, idx);
- i915_gem_active_set(&vma->last_read[idx], req);
- list_move_tail(&vma->vm_link, &vma->vm->active_list);
-
- obj->base.write_domain = 0;
- if (flags & EXEC_OBJECT_WRITE) {
- obj->base.write_domain = I915_GEM_DOMAIN_RENDER;
-
- if (intel_fb_obj_invalidate(obj, ORIGIN_CS))
- i915_gem_active_set(&obj->frontbuffer_write, req);
-
- obj->base.read_domains = 0;
- }
- obj->base.read_domains |= I915_GEM_GPU_DOMAINS;
-
- if (flags & EXEC_OBJECT_NEEDS_FENCE)
- i915_gem_active_set(&vma->last_fence, req);
-
- export_fence(vma, req, flags);
- return 0;
-}
-
static int i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req)
{
u32 *cs;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 7f82c6062c44..59d2c782204b 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -334,21 +334,16 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m,
struct drm_i915_error_buffer *err,
int count)
{
- int i;
-
err_printf(m, "%s [%d]:\n", name, count);
while (count--) {
- err_printf(m, " %08x_%08x %8u %02x %02x [ ",
+ err_printf(m, " %08x_%08x %8u %02x %02x %02x",
upper_32_bits(err->gtt_offset),
lower_32_bits(err->gtt_offset),
err->size,
err->read_domains,
- err->write_domain);
- for (i = 0; i < I915_NUM_ENGINES; i++)
- err_printf(m, "%02x ", err->rseqno[i]);
-
- err_printf(m, "] %02x", err->wseqno);
+ err->write_domain,
+ err->wseqno);
err_puts(m, tiling_flag(err->tiling));
err_puts(m, dirty_flag(err->dirty));
err_puts(m, purgeable_flag(err->purgeable));
@@ -1010,13 +1005,10 @@ static void capture_bo(struct drm_i915_error_buffer *err,
struct i915_vma *vma)
{
struct drm_i915_gem_object *obj = vma->obj;
- int i;
err->size = obj->base.size;
err->name = obj->base.name;
- for (i = 0; i < I915_NUM_ENGINES; i++)
- err->rseqno[i] = __active_get_seqno(&vma->last_read[i]);
err->wseqno = __active_get_seqno(&obj->frontbuffer_write);
err->engine = __active_get_engine_id(&obj->frontbuffer_write);
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index e0e7c48f45dc..46d079afef0a 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -30,18 +30,23 @@
#include <drm/drm_gem.h>
+struct i915_vma_active {
+ struct i915_gem_active base;
+ struct i915_vma *vma;
+};
+
static void
-i915_vma_retire(struct i915_gem_active *active,
+i915_vma_retire(struct i915_gem_active *base,
struct drm_i915_gem_request *rq)
{
- const unsigned int idx = rq->engine->id;
- struct i915_vma *vma =
- container_of(active, struct i915_vma, last_read[idx]);
+ struct i915_vma_active *active =
+ container_of(base, typeof(*active), base);
+ struct i915_vma *vma = active->vma;
struct drm_i915_gem_object *obj = vma->obj;
- GEM_BUG_ON(!i915_vma_has_active_engine(vma, idx));
+ radix_tree_delete(&vma->active, rq->fence.context);
+ kfree(active);
- i915_vma_clear_active(vma, idx);
if (i915_vma_is_active(vma))
return;
@@ -85,7 +90,6 @@ vma_create(struct drm_i915_gem_object *obj,
{
struct i915_vma *vma;
struct rb_node *rb, **p;
- int i;
/* The aliasing_ppgtt should never be used directly! */
GEM_BUG_ON(vm == &vm->i915->mm.aliasing_ppgtt->base);
@@ -94,8 +98,8 @@ vma_create(struct drm_i915_gem_object *obj,
if (vma == NULL)
return ERR_PTR(-ENOMEM);
- for (i = 0; i < ARRAY_SIZE(vma->last_read); i++)
- init_request_active(&vma->last_read[i], i915_vma_retire);
+ INIT_RADIX_TREE(&vma->active, GFP_KERNEL);
+
init_request_active(&vma->last_fence, NULL);
vma->vm = vm;
vma->obj = obj;
@@ -687,15 +691,11 @@ int __i915_vma_do_pin(struct i915_vma *vma,
static void i915_vma_destroy(struct i915_vma *vma)
{
- int i;
-
GEM_BUG_ON(vma->node.allocated);
GEM_BUG_ON(i915_vma_is_active(vma));
GEM_BUG_ON(!i915_vma_is_closed(vma));
GEM_BUG_ON(vma->fence);
- for (i = 0; i < ARRAY_SIZE(vma->last_read); i++)
- GEM_BUG_ON(i915_gem_active_isset(&vma->last_read[i]));
GEM_BUG_ON(i915_gem_active_isset(&vma->last_fence));
list_del(&vma->obj_link);
@@ -753,10 +753,93 @@ void i915_vma_revoke_mmap(struct i915_vma *vma)
list_del(&vma->obj->userfault_link);
}
+static void export_fence(struct i915_vma *vma,
+ struct drm_i915_gem_request *req,
+ unsigned int flags)
+{
+ struct reservation_object *resv = vma->resv;
+
+ /*
+ * Ignore errors from failing to allocate the new fence, we can't
+ * handle an error right now. Worst case should be missed
+ * synchronisation leading to rendering corruption.
+ */
+ reservation_object_lock(resv, NULL);
+ if (flags & EXEC_OBJECT_WRITE)
+ reservation_object_add_excl_fence(resv, &req->fence);
+ else if (reservation_object_reserve_shared(resv) == 0)
+ reservation_object_add_shared_fence(resv, &req->fence);
+ reservation_object_unlock(resv);
+}
+
+int i915_vma_move_to_active(struct i915_vma *vma,
+ struct drm_i915_gem_request *req,
+ unsigned int flags)
+{
+ struct drm_i915_gem_object *obj = vma->obj;
+ struct i915_vma_active *active;
+ bool inactive;
+
+ lockdep_assert_held(&req->i915->drm.struct_mutex);
+ GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
+
+ inactive = !i915_vma_is_active(vma);
+
+ active = radix_tree_lookup(&vma->active, req->fence.context);
+ if (!active) {
+ int err;
+
+ active = kmalloc(sizeof(*active), GFP_KERNEL);
+ if (!active)
+ return -ENOMEM;
+
+ init_request_active(&active->base, i915_vma_retire);
+ active->vma = vma;
+
+ err = radix_tree_insert(&vma->active,
+ req->fence.context,
+ active);
+ if (err) {
+ kfree(active);
+ return err;
+ }
+ }
+ i915_gem_active_set(&active->base, req);
+
+ /*
+ * Add a reference if we're newly entering the active list.
+ * The order in which we add operations to the retirement queue is
+ * vital here: mark_active adds to the start of the callback list,
+ * such that subsequent callbacks are called first. Therefore we
+ * add the active reference first and queue for it to be dropped
+ * *last*.
+ */
+ if (inactive)
+ obj->active_count++;
+
+ list_move_tail(&vma->vm_link, &vma->vm->active_list);
+
+ obj->base.write_domain = 0;
+ if (flags & EXEC_OBJECT_WRITE) {
+ obj->base.write_domain = I915_GEM_DOMAIN_RENDER;
+
+ if (intel_fb_obj_invalidate(obj, ORIGIN_CS))
+ i915_gem_active_set(&obj->frontbuffer_write, req);
+
+ obj->base.read_domains = 0;
+ }
+ obj->base.read_domains |= I915_GEM_GPU_DOMAINS;
+
+ if (flags & EXEC_OBJECT_NEEDS_FENCE)
+ i915_gem_active_set(&vma->last_fence, req);
+
+ export_fence(vma, req, flags);
+ return 0;
+}
+
int i915_vma_unbind(struct i915_vma *vma)
{
struct drm_i915_gem_object *obj = vma->obj;
- unsigned long active;
int ret;
lockdep_assert_held(&obj->base.dev->struct_mutex);
@@ -765,9 +848,9 @@ int i915_vma_unbind(struct i915_vma *vma)
* have side-effects such as unpinning or even unbinding this vma.
*/
might_sleep();
- active = i915_vma_get_active(vma);
- if (active) {
- int idx;
+ if (i915_vma_is_active(vma)) {
+ struct radix_tree_iter iter;
+ void __rcu **slot;
/* When a closed VMA is retired, it is unbound - eek.
* In order to prevent it from being recursively closed,
@@ -783,18 +866,24 @@ int i915_vma_unbind(struct i915_vma *vma)
*/
__i915_vma_pin(vma);
- for_each_active(active, idx) {
- ret = i915_gem_active_retire(&vma->last_read[idx],
+ rcu_read_lock();
+ radix_tree_for_each_slot(slot, &vma->active, &iter, 0) {
+ struct i915_vma_active *active =
+ rcu_dereference_raw(*slot);
+ rcu_read_unlock();
+
+ ret = i915_gem_active_retire(&active->base,
&vma->vm->i915->drm.struct_mutex);
if (ret)
- break;
- }
+ goto unpin;
- if (!ret) {
- ret = i915_gem_active_retire(&vma->last_fence,
- &vma->vm->i915->drm.struct_mutex);
+ rcu_read_lock();
}
+ rcu_read_unlock();
+ ret = i915_gem_active_retire(&vma->last_fence,
+ &vma->vm->i915->drm.struct_mutex);
+unpin:
__i915_vma_unpin(vma);
if (ret)
return ret;
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index fd5b84904f7c..d5e36b107ae4 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -26,6 +26,7 @@
#define __I915_VMA_H__
#include <linux/io-mapping.h>
+#include <linux/radix-tree.h>
#include <drm/drm_mm.h>
@@ -92,8 +93,7 @@ struct i915_vma {
#define I915_VMA_USERFAULT BIT(I915_VMA_USERFAULT_BIT)
#define I915_VMA_GGTT_WRITE BIT(12)
- unsigned int active;
- struct i915_gem_active last_read[I915_NUM_ENGINES];
+ struct radix_tree_root active;
struct i915_gem_active last_fence;
/**
@@ -134,6 +134,15 @@ i915_vma_instance(struct drm_i915_gem_object *obj,
void i915_vma_unpin_and_release(struct i915_vma **p_vma);
+static inline bool i915_vma_is_active(struct i915_vma *vma)
+{
+ return !radix_tree_empty(&vma->active);
+}
+
+int __must_check i915_vma_move_to_active(struct i915_vma *vma,
+ struct drm_i915_gem_request *req,
+ unsigned int flags);
+
static inline bool i915_vma_is_ggtt(const struct i915_vma *vma)
{
return vma->flags & I915_VMA_GGTT;
@@ -183,34 +192,6 @@ static inline bool i915_vma_has_userfault(const struct i915_vma *vma)
return test_bit(I915_VMA_USERFAULT_BIT, &vma->flags);
}
-static inline unsigned int i915_vma_get_active(const struct i915_vma *vma)
-{
- return vma->active;
-}
-
-static inline bool i915_vma_is_active(const struct i915_vma *vma)
-{
- return i915_vma_get_active(vma);
-}
-
-static inline void i915_vma_set_active(struct i915_vma *vma,
- unsigned int engine)
-{
- vma->active |= BIT(engine);
-}
-
-static inline void i915_vma_clear_active(struct i915_vma *vma,
- unsigned int engine)
-{
- vma->active &= ~BIT(engine);
-}
-
-static inline bool i915_vma_has_active_engine(const struct i915_vma *vma,
- unsigned int engine)
-{
- return vma->active & BIT(engine);
-}
-
static inline u32 i915_ggtt_offset(const struct i915_vma *vma)
{
GEM_BUG_ON(!i915_vma_is_ggtt(vma));
--
2.16.1
More information about the Intel-gfx-trybot
mailing list