[PATCH 27/27] drm/i915: Generalise GPU read locks
Chris Wilson
chris at chris-wilson.co.uk
Fri Jan 25 11:56:14 UTC 2019
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
drivers/gpu/drm/i915/Makefile | 1 +
drivers/gpu/drm/i915/i915_gem_gtt.c | 2 -
drivers/gpu/drm/i915/i915_read_lock.c | 198 ++++++++++++++++++++++++++
drivers/gpu/drm/i915/i915_read_lock.h | 46 ++++++
drivers/gpu/drm/i915/i915_vma.c | 150 ++-----------------
drivers/gpu/drm/i915/i915_vma.h | 9 +-
6 files changed, 262 insertions(+), 144 deletions(-)
create mode 100644 drivers/gpu/drm/i915/i915_read_lock.c
create mode 100644 drivers/gpu/drm/i915/i915_read_lock.h
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 8300efe60fe1..21880c26cb2e 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -74,6 +74,7 @@ i915-y += i915_cmd_parser.o \
i915_gem_tiling.o \
i915_gem_userptr.o \
i915_gemfs.o \
+ i915_read_lock.o \
i915_query.o \
i915_request.o \
i915_scheduler.o \
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 49b00996a15e..89d96d4a7628 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1923,8 +1923,6 @@ static struct i915_vma *pd_vma_create(struct gen6_hw_ppgtt *ppgtt, int size)
vma->ops = &pd_vma_ops;
vma->private = ppgtt;
- vma->active = RB_ROOT;
-
vma->size = size;
vma->fence_size = size;
vma->flags = I915_VMA_GGTT;
diff --git a/drivers/gpu/drm/i915/i915_read_lock.c b/drivers/gpu/drm/i915/i915_read_lock.c
new file mode 100644
index 000000000000..226ef7ed63f4
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_read_lock.c
@@ -0,0 +1,198 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "i915_drv.h"
+#include "i915_read_lock.h"
+
+struct read_lock_active {
+ struct i915_gem_active base;
+ struct i915_read_lock *lock;
+ struct rb_node node;
+ u64 timeline;
+};
+
+static void
+__read_lock_retire(struct i915_read_lock *lock, struct i915_request *rq)
+{
+ if (--lock->count)
+ return;
+
+ lock->retire(lock, rq);
+}
+
+static void
+read_lock_retire(struct i915_gem_active *base, struct i915_request *rq)
+{
+ __read_lock_retire(container_of(base,
+ struct read_lock_active,
+ base)->lock,
+ rq);
+}
+
+static void
+read_lock_last_retire(struct i915_gem_active *base, struct i915_request *rq)
+{
+ __read_lock_retire(container_of(base,
+ struct i915_read_lock,
+ last_active),
+ rq);
+}
+
+static struct i915_gem_active *
+active_instance(struct i915_read_lock *lock, u64 idx)
+{
+ struct read_lock_active *active;
+ struct rb_node **p, *parent;
+ struct i915_request *old;
+
+ /*
+ * We track the most recently used timeline to skip a rbtree search
+ * for the common case, under typical loads we never need the rbtree
+ * at all. We can reuse the last_active slot if it is empty, that is
+ * after the previous activity has been retired, or if the active
+ * matches the current timeline.
+ *
+ * Note that we allow the timeline to be active simultaneously in
+ * the rbtree and the last_active cache. We do this to avoid having
+ * to search and replace the rbtree element for a new timeline, with
+ * the cost being that we must be aware that the lock may be retired
+ * twice for the same timeline (as the older rbtree element will be
+ * retired before the new request added to last_active).
+ */
+ old = i915_gem_active_raw(&lock->last_active,
+ &lock->i915->drm.struct_mutex);
+ if (!old || old->fence.context == idx)
+ goto out;
+
+ /* Move the currently active fence into the rbtree */
+ idx = old->fence.context;
+
+ parent = NULL;
+ p = &lock->active.rb_node;
+ while (*p) {
+ parent = *p;
+
+ active = rb_entry(parent, struct read_lock_active, node);
+ if (active->timeline == idx)
+ goto replace;
+
+ if (active->timeline < idx)
+ p = &parent->rb_right;
+ else
+ p = &parent->rb_left;
+ }
+
+ active = kmalloc(sizeof(*active), GFP_KERNEL);
+
+ /* kmalloc may retire the lock->last_active (thanks shrinker)! */
+ if (unlikely(!i915_gem_active_raw(&lock->last_active,
+ &lock->i915->drm.struct_mutex))) {
+ kfree(active);
+ goto out;
+ }
+
+ if (unlikely(!active))
+ return ERR_PTR(-ENOMEM);
+
+ init_request_active(&active->base, read_lock_retire);
+ active->lock = lock;
+ active->timeline = idx;
+
+ rb_link_node(&active->node, parent, p);
+ rb_insert_color(&active->node, &lock->active);
+
+replace:
+ /*
+ * Overwrite the previous active slot in the rbtree with last_active,
+ * leaving last_active zeroed. If the previous slot is still active,
+ * we must be careful as we now only expect to receive one retire
+ * callback not two, and so much undo the active counting for the
+ * overwritten slot.
+ */
+ if (i915_gem_active_isset(&active->base)) {
+ /* Retire ourselves from the old rq->active_list */
+ __list_del_entry(&active->base.link);
+ lock->count--;
+ GEM_BUG_ON(!lock->count);
+ }
+ GEM_BUG_ON(list_empty(&lock->last_active.link));
+ list_replace_init(&lock->last_active.link, &active->base.link);
+ active->base.request = fetch_and_zero(&lock->last_active.request);
+
+out:
+ return &lock->last_active;
+}
+
+void i915_read_lock_init(struct drm_i915_private *i915,
+ struct i915_read_lock *lock,
+ void (*retire)(struct i915_read_lock *lock,
+ struct i915_request *rq))
+{
+ lock->i915 = i915;
+ lock->retire = retire;
+ lock->active = RB_ROOT;
+ init_request_active(&lock->last_active, read_lock_last_retire);
+}
+
+int i915_read_lock(struct i915_read_lock *lock,
+ u64 timeline, struct i915_request *rq)
+{
+ struct i915_gem_active *active;
+
+ active = active_instance(lock, timeline);
+ if (IS_ERR(active))
+ return PTR_ERR(active);
+
+ if (!i915_gem_active_isset(active))
+ lock->count++;
+ i915_gem_active_set(active, rq);
+
+ return 0;
+}
+
+void i915_read_lock_acquire(struct i915_read_lock *lock)
+{
+ lockdep_assert_held(&lock->i915->drm.struct_mutex);
+ lock->count++;
+}
+
+void i915_read_lock_release(struct i915_read_lock *lock)
+{
+ lockdep_assert_held(&lock->i915->drm.struct_mutex);
+ __read_lock_retire(lock, NULL);
+}
+
+int i915_read_lock_wait(struct i915_read_lock *lock)
+{
+ struct read_lock_active *iter, *n;
+ int ret;
+
+ ret = i915_gem_active_retire(&lock->last_active,
+ &lock->i915->drm.struct_mutex);
+ if (ret)
+ return ret;
+
+ rbtree_postorder_for_each_entry_safe(iter, n, &lock->active, node) {
+ ret = i915_gem_active_retire(&iter->base,
+ &lock->i915->drm.struct_mutex);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+void i915_read_lock_fini(struct i915_read_lock *lock)
+{
+ struct read_lock_active *iter, *n;
+
+ GEM_BUG_ON(i915_gem_active_isset(&lock->last_active));
+
+ rbtree_postorder_for_each_entry_safe(iter, n, &lock->active, node) {
+ GEM_BUG_ON(i915_gem_active_isset(&iter->base));
+ kfree(iter);
+ }
+}
diff --git a/drivers/gpu/drm/i915/i915_read_lock.h b/drivers/gpu/drm/i915/i915_read_lock.h
new file mode 100644
index 000000000000..010310f49c1a
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_read_lock.h
@@ -0,0 +1,46 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef _I915_READ_LOCK_H_
+#define _I915_READ_LOCK_H_
+
+#include <linux/rbtree.h>
+
+#include "i915_request.h"
+
+struct drm_i915_private;
+
+struct i915_read_lock {
+ struct drm_i915_private *i915;
+
+ struct rb_root active;
+ struct i915_gem_active last_active;
+ unsigned int count;
+
+ void (*retire)(struct i915_read_lock *lock, struct i915_request *rq);
+};
+
+void i915_read_lock_init(struct drm_i915_private *i915,
+ struct i915_read_lock *lock,
+ void (*retire)(struct i915_read_lock *lock,
+ struct i915_request *rq));
+
+int i915_read_lock(struct i915_read_lock *lock,
+ u64 timeline,
+ struct i915_request *rq);
+int i915_read_lock_wait(struct i915_read_lock *lock);
+
+void i915_read_lock_acquire(struct i915_read_lock *lock);
+void i915_read_lock_release(struct i915_read_lock *lock);
+
+static inline bool i915_read_lock_is_idle(const struct i915_read_lock *lock)
+{
+ return !lock->count;
+}
+
+void i915_read_lock_fini(struct i915_read_lock *lock);
+
+#endif /* _I915_READ_LOCK_H_ */
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index d83b8ad5f859..ae74e2373458 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -63,22 +63,12 @@ static void vma_print_allocator(struct i915_vma *vma, const char *reason)
#endif
-struct i915_vma_active {
- struct i915_gem_active base;
- struct i915_vma *vma;
- struct rb_node node;
- u64 timeline;
-};
-
static void
-__i915_vma_retire(struct i915_vma *vma, struct i915_request *rq)
+__i915_vma_retire(struct i915_read_lock *lock, struct i915_request *rq)
{
+ struct i915_vma *vma = container_of(lock, typeof(*vma), read_lock);
struct drm_i915_gem_object *obj = vma->obj;
- GEM_BUG_ON(!i915_vma_is_active(vma));
- if (--vma->active_count)
- return;
-
GEM_BUG_ON(!i915_gem_object_is_active(obj));
if (--obj->active_count)
return;
@@ -107,21 +97,6 @@ __i915_vma_retire(struct i915_vma *vma, struct i915_request *rq)
}
}
-static void
-i915_vma_retire(struct i915_gem_active *base, struct i915_request *rq)
-{
- struct i915_vma_active *active =
- container_of(base, typeof(*active), base);
-
- __i915_vma_retire(active->vma, rq);
-}
-
-static void
-i915_vma_last_retire(struct i915_gem_active *base, struct i915_request *rq)
-{
- __i915_vma_retire(container_of(base, struct i915_vma, last_active), rq);
-}
-
static struct i915_vma *
vma_create(struct drm_i915_gem_object *obj,
struct i915_address_space *vm,
@@ -137,9 +112,8 @@ vma_create(struct drm_i915_gem_object *obj,
if (vma == NULL)
return ERR_PTR(-ENOMEM);
- vma->active = RB_ROOT;
+ i915_read_lock_init(vm->i915, &vma->read_lock, __i915_vma_retire);
- init_request_active(&vma->last_active, i915_vma_last_retire);
init_request_active(&vma->last_fence, NULL);
vma->vm = vm;
vma->ops = &vm->vma_ops;
@@ -823,7 +797,6 @@ void i915_vma_reopen(struct i915_vma *vma)
static void __i915_vma_destroy(struct i915_vma *vma)
{
struct drm_i915_private *i915 = vma->vm->i915;
- struct i915_vma_active *iter, *n;
GEM_BUG_ON(vma->node.allocated);
GEM_BUG_ON(vma->fence);
@@ -843,10 +816,7 @@ static void __i915_vma_destroy(struct i915_vma *vma)
spin_unlock(&obj->vma.lock);
}
- rbtree_postorder_for_each_entry_safe(iter, n, &vma->active, node) {
- GEM_BUG_ON(i915_gem_active_isset(&iter->base));
- kfree(iter);
- }
+ i915_read_lock_fini(&vma->read_lock);
kmem_cache_free(i915->vmas, vma);
}
@@ -931,104 +901,15 @@ static void export_fence(struct i915_vma *vma,
reservation_object_unlock(resv);
}
-static struct i915_gem_active *active_instance(struct i915_vma *vma, u64 idx)
-{
- struct i915_vma_active *active;
- struct rb_node **p, *parent;
- struct i915_request *old;
-
- /*
- * We track the most recently used timeline to skip a rbtree search
- * for the common case, under typical loads we never need the rbtree
- * at all. We can reuse the last_active slot if it is empty, that is
- * after the previous activity has been retired, or if the active
- * matches the current timeline.
- *
- * Note that we allow the timeline to be active simultaneously in
- * the rbtree and the last_active cache. We do this to avoid having
- * to search and replace the rbtree element for a new timeline, with
- * the cost being that we must be aware that the vma may be retired
- * twice for the same timeline (as the older rbtree element will be
- * retired before the new request added to last_active).
- */
- old = i915_gem_active_raw(&vma->last_active,
- &vma->vm->i915->drm.struct_mutex);
- if (!old || old->fence.context == idx)
- goto out;
-
- /* Move the currently active fence into the rbtree */
- idx = old->fence.context;
-
- parent = NULL;
- p = &vma->active.rb_node;
- while (*p) {
- parent = *p;
-
- active = rb_entry(parent, struct i915_vma_active, node);
- if (active->timeline == idx)
- goto replace;
-
- if (active->timeline < idx)
- p = &parent->rb_right;
- else
- p = &parent->rb_left;
- }
-
- active = kmalloc(sizeof(*active), GFP_KERNEL);
-
- /* kmalloc may retire the vma->last_active request (thanks shrinker)! */
- if (unlikely(!i915_gem_active_raw(&vma->last_active,
- &vma->vm->i915->drm.struct_mutex))) {
- kfree(active);
- goto out;
- }
-
- if (unlikely(!active))
- return ERR_PTR(-ENOMEM);
-
- init_request_active(&active->base, i915_vma_retire);
- active->vma = vma;
- active->timeline = idx;
-
- rb_link_node(&active->node, parent, p);
- rb_insert_color(&active->node, &vma->active);
-
-replace:
- /*
- * Overwrite the previous active slot in the rbtree with last_active,
- * leaving last_active zeroed. If the previous slot is still active,
- * we must be careful as we now only expect to receive one retire
- * callback not two, and so much undo the active counting for the
- * overwritten slot.
- */
- if (i915_gem_active_isset(&active->base)) {
- /* Retire ourselves from the old rq->active_list */
- __list_del_entry(&active->base.link);
- vma->active_count--;
- GEM_BUG_ON(!vma->active_count);
- }
- GEM_BUG_ON(list_empty(&vma->last_active.link));
- list_replace_init(&vma->last_active.link, &active->base.link);
- active->base.request = fetch_and_zero(&vma->last_active.request);
-
-out:
- return &vma->last_active;
-}
-
int i915_vma_move_to_active(struct i915_vma *vma,
struct i915_request *rq,
unsigned int flags)
{
struct drm_i915_gem_object *obj = vma->obj;
- struct i915_gem_active *active;
lockdep_assert_held(&rq->i915->drm.struct_mutex);
GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
- active = active_instance(vma, rq->fence.context);
- if (IS_ERR(active))
- return PTR_ERR(active);
-
/*
* Add a reference if we're newly entering the active list.
* The order in which we add operations to the retirement queue is
@@ -1037,9 +918,15 @@ int i915_vma_move_to_active(struct i915_vma *vma,
* add the active reference first and queue for it to be dropped
* *last*.
*/
- if (!i915_gem_active_isset(active) && !vma->active_count++)
+ if (!vma->read_lock.count)
obj->active_count++;
- i915_gem_active_set(active, rq);
+
+ if (unlikely(i915_read_lock(&vma->read_lock, rq->fence.context, rq))) {
+ if (!vma->read_lock.count)
+ obj->active_count--;
+ return -ENOMEM;
+ }
+
GEM_BUG_ON(!i915_vma_is_active(vma));
GEM_BUG_ON(!obj->active_count);
@@ -1073,8 +960,6 @@ int i915_vma_unbind(struct i915_vma *vma)
*/
might_sleep();
if (i915_vma_is_active(vma)) {
- struct i915_vma_active *active, *n;
-
/*
* When a closed VMA is retired, it is unbound - eek.
* In order to prevent it from being recursively closed,
@@ -1090,19 +975,10 @@ int i915_vma_unbind(struct i915_vma *vma)
*/
__i915_vma_pin(vma);
- ret = i915_gem_active_retire(&vma->last_active,
- &vma->vm->i915->drm.struct_mutex);
+ ret = i915_read_lock_wait(&vma->read_lock);
if (ret)
goto unpin;
- rbtree_postorder_for_each_entry_safe(active, n,
- &vma->active, node) {
- ret = i915_gem_active_retire(&active->base,
- &vma->vm->i915->drm.struct_mutex);
- if (ret)
- goto unpin;
- }
-
ret = i915_gem_active_retire(&vma->last_fence,
&vma->vm->i915->drm.struct_mutex);
unpin:
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index b0f6b1d904a5..a4b06557ce2e 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -34,6 +34,7 @@
#include "i915_gem_fence_reg.h"
#include "i915_gem_object.h"
+#include "i915_read_lock.h"
#include "i915_request.h"
enum i915_cache_level;
@@ -108,9 +109,7 @@ struct i915_vma {
#define I915_VMA_USERFAULT BIT(I915_VMA_USERFAULT_BIT)
#define I915_VMA_GGTT_WRITE BIT(15)
- unsigned int active_count;
- struct rb_root active;
- struct i915_gem_active last_active;
+ struct i915_read_lock read_lock;
struct i915_gem_active last_fence;
/**
@@ -154,9 +153,9 @@ i915_vma_instance(struct drm_i915_gem_object *obj,
void i915_vma_unpin_and_release(struct i915_vma **p_vma, unsigned int flags);
#define I915_VMA_RELEASE_MAP BIT(0)
-static inline bool i915_vma_is_active(struct i915_vma *vma)
+static inline bool i915_vma_is_active(const struct i915_vma *vma)
{
- return vma->active_count;
+ return !i915_read_lock_is_idle(&vma->read_lock);
}
int __must_check i915_vma_move_to_active(struct i915_vma *vma,
--
2.20.1
More information about the Intel-gfx-trybot
mailing list