[PATCH 30/30] bdw-idr-lut
Chris Wilson
chris at chris-wilson.co.uk
Mon May 29 16:10:30 UTC 2017
---
drivers/gpu/drm/i915/i915_debugfs.c | 6 --
drivers/gpu/drm/i915/i915_drv.h | 1 +
drivers/gpu/drm/i915/i915_gem.c | 32 ++++++++--
drivers/gpu/drm/i915/i915_gem_context.c | 83 +++++---------------------
drivers/gpu/drm/i915/i915_gem_context.h | 30 ++--------
drivers/gpu/drm/i915/i915_gem_execbuffer.c | 84 +++++++++------------------
drivers/gpu/drm/i915/i915_gem_object.h | 10 +++-
drivers/gpu/drm/i915/i915_vma.c | 22 -------
drivers/gpu/drm/i915/i915_vma.h | 4 --
drivers/gpu/drm/i915/selftests/mock_context.c | 15 ++---
lib/radix-tree.c | 1 +
11 files changed, 87 insertions(+), 201 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 0ef2d23c718e..1ea60b8e6ead 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1988,12 +1988,6 @@ static int i915_context_status(struct seq_file *m, void *unused)
seq_putc(m, '\n');
}
- seq_printf(m,
- "\tvma hashtable size=%u (actual %lu), count=%u\n",
- ctx->vma_lut.ht_size,
- BIT(ctx->vma_lut.ht_bits),
- ctx->vma_lut.ht_count);
-
seq_putc(m, '\n');
}
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index f6324d461484..aeaf81ab5eac 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2089,6 +2089,7 @@ struct drm_i915_private {
struct kmem_cache *objects;
struct kmem_cache *vmas;
+ struct kmem_cache *luts;
struct kmem_cache *requests;
struct kmem_cache *dependencies;
struct kmem_cache *priorities;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index a7e1caa6bacf..d034e72ff270 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3260,19 +3260,33 @@ i915_gem_idle_work_handler(struct work_struct *work)
void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
{
+ struct drm_i915_private *i915 = to_i915(gem->dev);
struct drm_i915_gem_object *obj = to_intel_bo(gem);
struct drm_i915_file_private *fpriv = file->driver_priv;
struct i915_vma *vma, *vn;
+ struct i915_lut_handle *lut, *ln;
mutex_lock(&obj->base.dev->struct_mutex);
+
+ list_for_each_entry_safe(lut, ln, &obj->lut_list, obj_link) {
+ struct i915_gem_context *ctx = lut->ctx;
+
+ if (ctx->file_priv != fpriv)
+ continue;
+
+ radix_tree_delete(&ctx->handles_vma, lut->handle);
+ i915_gem_object_put(obj);
+
+ list_del(&lut->obj_link);
+ list_del(&lut->ctx_link);
+
+ kmem_cache_free(i915->luts, lut);
+ }
+
list_for_each_entry_safe(vma, vn, &obj->vma_list, obj_link)
if (vma->vm->file == fpriv)
i915_vma_close(vma);
- vma = obj->vma_hashed;
- if (vma && vma->ctx->file_priv == fpriv)
- i915_vma_unlink_ctx(vma);
-
if (i915_gem_object_is_active(obj) &&
!i915_gem_object_has_active_reference(obj)) {
i915_gem_object_set_active_reference(obj);
@@ -4262,6 +4276,7 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
INIT_LIST_HEAD(&obj->global_link);
INIT_LIST_HEAD(&obj->userfault_link);
INIT_LIST_HEAD(&obj->vma_list);
+ INIT_LIST_HEAD(&obj->lut_list);
INIT_LIST_HEAD(&obj->batch_pool_link);
obj->ops = ops;
@@ -4899,12 +4914,16 @@ i915_gem_load_init(struct drm_i915_private *dev_priv)
if (!dev_priv->vmas)
goto err_objects;
+ dev_priv->luts = KMEM_CACHE(i915_lut_handle, 0);
+ if (!dev_priv->luts)
+ goto err_vmas;
+
dev_priv->requests = KMEM_CACHE(drm_i915_gem_request,
SLAB_HWCACHE_ALIGN |
SLAB_RECLAIM_ACCOUNT |
SLAB_TYPESAFE_BY_RCU);
if (!dev_priv->requests)
- goto err_vmas;
+ goto err_luts;
dev_priv->dependencies = KMEM_CACHE(i915_dependency,
SLAB_HWCACHE_ALIGN |
@@ -4950,6 +4969,8 @@ i915_gem_load_init(struct drm_i915_private *dev_priv)
kmem_cache_destroy(dev_priv->dependencies);
err_requests:
kmem_cache_destroy(dev_priv->requests);
+err_luts:
+ kmem_cache_destroy(dev_priv->luts);
err_vmas:
kmem_cache_destroy(dev_priv->vmas);
err_objects:
@@ -4972,6 +4993,7 @@ void i915_gem_load_cleanup(struct drm_i915_private *dev_priv)
kmem_cache_destroy(dev_priv->priorities);
kmem_cache_destroy(dev_priv->dependencies);
kmem_cache_destroy(dev_priv->requests);
+ kmem_cache_destroy(dev_priv->luts);
kmem_cache_destroy(dev_priv->vmas);
kmem_cache_destroy(dev_priv->objects);
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 1e51fd36f355..6cf4643c30f4 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -138,66 +138,21 @@ void i915_gem_contexts_mark_idle(struct drm_i915_private *i915)
}
}
-static void resize_vma_ht(struct work_struct *work)
+static void lut_close(struct i915_gem_context *ctx)
{
- struct i915_gem_context_vma_lut *lut =
- container_of(work, typeof(*lut), resize);
- unsigned int bits, new_bits, size, i;
- struct hlist_head *new_ht;
-
- GEM_BUG_ON(!(lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS));
-
- bits = 1 + ilog2(4*lut->ht_count/3 + 1);
- new_bits = min_t(unsigned int,
- max(bits, VMA_HT_BITS),
- sizeof(unsigned int) * BITS_PER_BYTE - 1);
- if (new_bits == lut->ht_bits)
- goto out;
-
- new_ht = kzalloc(sizeof(*new_ht)<<new_bits, GFP_KERNEL | __GFP_NOWARN);
- if (!new_ht)
- new_ht = vzalloc(sizeof(*new_ht)<<new_bits);
- if (!new_ht)
- /* Pretend resize succeeded and stop calling us for a bit! */
- goto out;
-
- size = BIT(lut->ht_bits);
- for (i = 0; i < size; i++) {
- struct i915_vma *vma;
- struct hlist_node *tmp;
+ struct i915_lut_handle *lut, *ln;
+ struct radix_tree_iter iter;
+ void __rcu **slot;
- hlist_for_each_entry_safe(vma, tmp, &lut->ht[i], ctx_node)
- hlist_add_head(&vma->ctx_node,
- &new_ht[hash_32(vma->ctx_handle,
- new_bits)]);
+ list_for_each_entry_safe(lut, ln, &ctx->handles_list, ctx_link) {
+ list_del(&lut->obj_link);
+ kmem_cache_free(ctx->i915->luts, lut);
}
- kvfree(lut->ht);
- lut->ht = new_ht;
- lut->ht_bits = new_bits;
-out:
- smp_store_release(&lut->ht_size, BIT(bits));
- GEM_BUG_ON(lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS);
-}
-
-static void vma_lut_free(struct i915_gem_context *ctx)
-{
- struct i915_gem_context_vma_lut *lut = &ctx->vma_lut;
- unsigned int i, size;
-
- if (lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS)
- cancel_work_sync(&lut->resize);
- size = BIT(lut->ht_bits);
- for (i = 0; i < size; i++) {
- struct i915_vma *vma;
-
- hlist_for_each_entry(vma, &lut->ht[i], ctx_node) {
- vma->obj->vma_hashed = NULL;
- vma->ctx = NULL;
- i915_vma_put(vma);
- }
+ radix_tree_for_each_slot(slot, &ctx->handles_vma, &iter, 0) {
+ i915_vma_put(rcu_dereference_raw(*slot));
+ radix_tree_iter_delete(&ctx->handles_vma, &iter, slot);
}
- kvfree(lut->ht);
}
static void i915_gem_context_free(struct i915_gem_context *ctx)
@@ -208,7 +163,6 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
lockdep_assert_held(&i915->drm.struct_mutex);
GEM_BUG_ON(!i915_gem_context_is_closed(ctx));
- vma_lut_free(ctx);
i915_ppgtt_put(ctx->ppgtt);
for (i = 0; i < I915_NUM_ENGINES; i++) {
@@ -271,8 +225,11 @@ void i915_gem_context_release(struct kref *ref)
static void context_close(struct i915_gem_context *ctx)
{
i915_gem_context_set_closed(ctx);
+
+ lut_close(ctx);
if (ctx->ppgtt)
i915_ppgtt_close(&ctx->ppgtt->base);
+
ctx->file_priv = ERR_PTR(-EBADF);
i915_gem_context_put(ctx);
}
@@ -345,16 +302,8 @@ __create_hw_context(struct drm_i915_private *dev_priv,
ctx->i915 = dev_priv;
ctx->priority = I915_PRIORITY_NORMAL;
- ctx->vma_lut.ht_bits = VMA_HT_BITS;
- ctx->vma_lut.ht_size = BIT(VMA_HT_BITS);
- BUILD_BUG_ON(BIT(VMA_HT_BITS) == I915_CTX_RESIZE_IN_PROGRESS);
- ctx->vma_lut.ht = kcalloc(ctx->vma_lut.ht_size,
- sizeof(*ctx->vma_lut.ht),
- GFP_KERNEL);
- if (!ctx->vma_lut.ht)
- goto err_out;
-
- INIT_WORK(&ctx->vma_lut.resize, resize_vma_ht);
+ INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
+ INIT_LIST_HEAD(&ctx->handles_list);
/* Default context will never have a file_priv */
ret = DEFAULT_CONTEXT_HANDLE;
@@ -404,8 +353,6 @@ __create_hw_context(struct drm_i915_private *dev_priv,
put_pid(ctx->pid);
idr_remove(&file_priv->context_idr, ctx->user_handle);
err_lut:
- kvfree(ctx->vma_lut.ht);
-err_out:
context_close(ctx);
return ERR_PTR(ret);
}
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
index 4654caab9b6c..162125977b11 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -27,6 +27,7 @@
#include <linux/bitops.h>
#include <linux/list.h>
+#include <linux/radix-tree.h>
struct pid;
@@ -149,32 +150,6 @@ struct i915_gem_context {
/** ggtt_offset_bias: placement restriction for context objects */
u32 ggtt_offset_bias;
- struct i915_gem_context_vma_lut {
- /** ht_size: last request size to allocate the hashtable for. */
- unsigned int ht_size;
-#define I915_CTX_RESIZE_IN_PROGRESS BIT(0)
- /** ht_bits: real log2(size) of hashtable. */
- unsigned int ht_bits;
- /** ht_count: current number of entries inside the hashtable */
- unsigned int ht_count;
-
- /** ht: the array of buckets comprising the simple hashtable */
- struct hlist_head *ht;
-
- /**
- * resize: After an execbuf completes, we check the load factor
- * of the hashtable. If the hashtable is too full, or too empty,
- * we schedule a task to resize the hashtable. During the
- * resize, the entries are moved between different buckets and
- * so we cannot simultaneously read the hashtable as it is
- * being resized (unlike rhashtable). Therefore we treat the
- * active work as a strong barrier, pausing a subsequent
- * execbuf to wait for the resize worker to complete, if
- * required.
- */
- struct work_struct resize;
- } vma_lut;
-
/** engine: per-engine logical HW state */
struct intel_context {
struct i915_vma *state;
@@ -205,6 +180,9 @@ struct i915_gem_context {
/** remap_slice: Bitmask of cache lines that need remapping */
u8 remap_slice;
+
+ struct radix_tree_root handles_vma;
+ struct list_head handles_list;
};
static inline bool i915_gem_context_is_closed(const struct i915_gem_context *ctx)
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index b56397f872ea..2ac3475fcdd1 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -642,19 +642,6 @@ static int eb_reserve(struct i915_execbuffer *eb)
} while (1);
}
-static inline struct hlist_head *
-ht_head(const struct i915_gem_context_vma_lut *lut, u32 handle)
-{
- return &lut->ht[hash_32(handle, lut->ht_bits)];
-}
-
-static inline bool
-ht_needs_resize(const struct i915_gem_context_vma_lut *lut)
-{
- return (4*lut->ht_count > 3*lut->ht_size ||
- 4*lut->ht_count + 1 < lut->ht_size);
-}
-
static unsigned int eb_batch_index(const struct i915_execbuffer *eb)
{
if (eb->args->flags & I915_EXEC_BATCH_FIRST)
@@ -690,75 +677,57 @@ static int eb_select_context(struct i915_execbuffer *eb)
static int eb_lookup_vmas(struct i915_execbuffer *eb, unsigned int flags)
{
- struct i915_gem_context_vma_lut *lut = &eb->ctx->vma_lut;
- struct drm_i915_gem_object *uninitialized_var(obj);
+ struct drm_i915_gem_object *obj;
+ struct radix_tree_root *handles_vma = &eb->ctx->handles_vma;
unsigned int i;
int err;
INIT_LIST_HEAD(&eb->relocs);
INIT_LIST_HEAD(&eb->unbound);
- if (unlikely(lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS))
- flush_work(&lut->resize);
- GEM_BUG_ON(lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS);
-
for (i = 0; i < eb->buffer_count; i++) {
u32 handle = eb->exec[i].handle;
- struct hlist_head *hl = ht_head(lut, handle);
struct i915_vma *vma;
+ struct i915_lut_handle *lut;
- hlist_for_each_entry(vma, hl, ctx_node) {
- GEM_BUG_ON(vma->ctx != eb->ctx);
-
- if (vma->ctx_handle != handle)
- continue;
-
+ vma = radix_tree_lookup(handles_vma, handle);
+ if (likely(vma))
goto add_vma;
- }
obj = i915_gem_object_lookup(eb->file, handle);
if (unlikely(!obj)) {
err = -ENOENT;
- goto err;
+ goto err_vma;
}
- flags |= __EXEC_OBJECT_HAS_REF;
-
vma = i915_vma_instance(obj, eb->vm, NULL);
if (unlikely(IS_ERR(vma))) {
err = PTR_ERR(vma);
- goto err;
+ goto err_obj;
}
- /* First come, first served */
- if (!vma->ctx) {
- vma->ctx = eb->ctx;
- vma->ctx_handle = handle;
- hlist_add_head(&vma->ctx_node, hl);
- lut->ht_count++;
- lut->ht_size |= I915_CTX_RESIZE_IN_PROGRESS;
- if (i915_vma_is_ggtt(vma)) {
- GEM_BUG_ON(obj->vma_hashed);
- obj->vma_hashed = vma;
- }
+ lut = kmem_cache_alloc(eb->i915->luts, GFP_KERNEL);
+ if (unlikely(!lut)) {
+ err = -ENOMEM;
+ goto err_obj;
+ }
- /* transfer ref to ctx */
- flags &= ~__EXEC_OBJECT_HAS_REF;
+ /* transfer ref to ctx */
+ err = radix_tree_insert(handles_vma, handle, vma);
+ if (unlikely(err)) {
+ kfree(lut);
+ goto err_obj;
}
+ list_add(&lut->obj_link, &obj->lut_list);
+ list_add(&lut->ctx_link, &eb->ctx->handles_list);
+ lut->ctx = eb->ctx;
+ lut->handle = handle;
+
add_vma:
err = eb_add_vma(eb, i, vma, flags);
if (unlikely(err))
- goto err;
-
- flags &= ~__EXEC_OBJECT_HAS_REF;
- }
-
- if (lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS) {
- if (ht_needs_resize(lut))
- queue_work(system_highpri_wq, &lut->resize);
- else
- lut->ht_size &= ~I915_CTX_RESIZE_IN_PROGRESS;
+ goto err_vma;
}
/* take note of the batch buffer before we might reorder the lists */
@@ -782,10 +751,9 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb, unsigned int flags)
return eb_reserve(eb);
-err:
- if (flags & __EXEC_OBJECT_HAS_REF)
- i915_gem_object_put(obj);
- lut->ht_size &= ~I915_CTX_RESIZE_IN_PROGRESS;
+err_obj:
+ i915_gem_object_put(obj);
+err_vma:
eb->vma[i] = NULL;
return err;
}
diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h
index 826e1ad6d654..abb3df880a99 100644
--- a/drivers/gpu/drm/i915/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/i915_gem_object.h
@@ -35,6 +35,13 @@
#include "i915_selftest.h"
+struct i915_lut_handle {
+ struct list_head obj_link;
+ struct list_head ctx_link;
+ struct i915_gem_context *ctx;
+ u32 handle;
+};
+
struct drm_i915_gem_object_ops {
unsigned int flags;
#define I915_GEM_OBJECT_HAS_STRUCT_PAGE BIT(0)
@@ -86,7 +93,8 @@ struct drm_i915_gem_object {
* They are also added to @vma_list for easy iteration.
*/
struct rb_root vma_tree;
- struct i915_vma *vma_hashed;
+
+ struct list_head lut_list;
/** Stolen memory for this object, instead of being backed by shmem. */
struct drm_mm_node *stolen;
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index fc1a8412b0a0..4953fdfeb408 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -591,33 +591,11 @@ void i915_vma_destroy(struct i915_vma *vma)
kmem_cache_free(to_i915(vma->obj->base.dev)->vmas, vma);
}
-void i915_vma_unlink_ctx(struct i915_vma *vma)
-{
- struct i915_gem_context *ctx = vma->ctx;
-
- if (ctx->vma_lut.ht_size & I915_CTX_RESIZE_IN_PROGRESS) {
- cancel_work_sync(&ctx->vma_lut.resize);
- ctx->vma_lut.ht_size &= ~I915_CTX_RESIZE_IN_PROGRESS;
- }
-
- __hlist_del(&vma->ctx_node);
- ctx->vma_lut.ht_count--;
-
- if (i915_vma_is_ggtt(vma))
- vma->obj->vma_hashed = NULL;
- vma->ctx = NULL;
-
- i915_vma_put(vma);
-}
-
void i915_vma_close(struct i915_vma *vma)
{
GEM_BUG_ON(i915_vma_is_closed(vma));
vma->flags |= I915_VMA_CLOSED;
- if (vma->ctx)
- i915_vma_unlink_ctx(vma);
-
list_del(&vma->obj_link);
rb_erase(&vma->obj_node, &vma->obj->vma_tree);
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index cbf0795465bf..d9a6079b5e26 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -115,10 +115,6 @@ struct i915_vma {
unsigned int *exec_flags;
struct hlist_node exec_node;
u32 exec_handle;
-
- struct i915_gem_context *ctx;
- struct hlist_node ctx_node;
- u32 ctx_handle;
};
struct i915_vma *
diff --git a/drivers/gpu/drm/i915/selftests/mock_context.c b/drivers/gpu/drm/i915/selftests/mock_context.c
index f4972674c96e..b933fab2da78 100644
--- a/drivers/gpu/drm/i915/selftests/mock_context.c
+++ b/drivers/gpu/drm/i915/selftests/mock_context.c
@@ -40,18 +40,13 @@ mock_context(struct drm_i915_private *i915,
INIT_LIST_HEAD(&ctx->link);
ctx->i915 = i915;
- ctx->vma_lut.ht_bits = VMA_HT_BITS;
- ctx->vma_lut.ht_size = BIT(VMA_HT_BITS);
- ctx->vma_lut.ht = kcalloc(ctx->vma_lut.ht_size,
- sizeof(*ctx->vma_lut.ht),
- GFP_KERNEL);
- if (!ctx->vma_lut.ht)
- goto err_free;
+ INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
+ INIT_LIST_HEAD(&ctx->handles_list);
ret = ida_simple_get(&i915->contexts.hw_ida,
0, MAX_CONTEXT_HW_ID, GFP_KERNEL);
if (ret < 0)
- goto err_vma_ht;
+ goto err_handles;
ctx->hw_id = ret;
if (name) {
@@ -66,9 +61,7 @@ mock_context(struct drm_i915_private *i915,
return ctx;
-err_vma_ht:
- kvfree(ctx->vma_lut.ht);
-err_free:
+err_handles:
kfree(ctx);
return NULL;
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 898e87998417..3527eb364964 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -2022,6 +2022,7 @@ void radix_tree_iter_delete(struct radix_tree_root *root,
if (__radix_tree_delete(root, iter->node, slot))
iter->index = iter->next_index;
}
+EXPORT_SYMBOL(radix_tree_iter_delete);
/**
* radix_tree_delete_item - delete an item from a radix tree
--
2.11.0
More information about the Intel-gfx-trybot
mailing list