[Intel-gfx] [PATCH 11/17] drm/i915: Store a direct lookup from object handle to vma

Mon Aug 22 08:03:44 UTC 2016

The advent of full-ppgtt lead to an extra indirection between the object
and its binding. That extra indirection has a noticeable impact on how
fast we can convert from the user handles to our internal vma for
execbuffer. In order to bypass the extra indirection, we use a
resizable hashtable to jump from the object to the per-ctx vma.
rhashtable was considered but we don't need the online resizing feature
and the extra complexity proved to undermine its usefulness. Instead, we
simply reallocate the hastable on demand in a background task and
serialize it before iterating.

In non-full-ppgtt modes, multiple files and multiple contexts can share
the same vma. This leads to having multiple possible handle->vma links,
so we only use the first to establish the fast path. The majority of
buffers are not shared and so we should still be able to realise
speedups with multiple clients.

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c        |  17 +-
 drivers/gpu/drm/i915/i915_drv.h            |  19 ++-
 drivers/gpu/drm/i915/i915_gem.c            |   5 +-
 drivers/gpu/drm/i915/i915_gem_context.c    |  73 +++++++++
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 243 +++++++++++++++++------------
 drivers/gpu/drm/i915/i915_gem_gtt.c        |  20 +++
 drivers/gpu/drm/i915/i915_gem_gtt.h        |   8 +-
 7 files changed, 269 insertions(+), 116 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 996744708f31..d812157ed94c 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -208,9 +208,9 @@ static int obj_rank_by_stolen(void *priv,
 			      struct list_head *A, struct list_head *B)
 {
 	struct drm_i915_gem_object *a =
-		container_of(A, struct drm_i915_gem_object, obj_exec_link);
+		container_of(A, struct drm_i915_gem_object, tmp_link);
 	struct drm_i915_gem_object *b =
-		container_of(B, struct drm_i915_gem_object, obj_exec_link);
+		container_of(B, struct drm_i915_gem_object, tmp_link);
 
 	if (a->stolen->start < b->stolen->start)
 		return -1;
@@ -238,7 +238,7 @@ static int i915_gem_stolen_list_info(struct seq_file *m, void *data)
 		if (obj->stolen == NULL)
 			continue;
 
-		list_add(&obj->obj_exec_link, &stolen);
+		list_add(&obj->tmp_link, &stolen);
 
 		total_obj_size += obj->base.size;
 		total_gtt_size += i915_gem_obj_total_ggtt_size(obj);
@@ -248,7 +248,7 @@ static int i915_gem_stolen_list_info(struct seq_file *m, void *data)
 		if (obj->stolen == NULL)
 			continue;
 
-		list_add(&obj->obj_exec_link, &stolen);
+		list_add(&obj->tmp_link, &stolen);
 
 		total_obj_size += obj->base.size;
 		count++;
@@ -256,11 +256,11 @@ static int i915_gem_stolen_list_info(struct seq_file *m, void *data)
 	list_sort(NULL, &stolen, obj_rank_by_stolen);
 	seq_puts(m, "Stolen:\n");
 	while (!list_empty(&stolen)) {
-		obj = list_first_entry(&stolen, typeof(*obj), obj_exec_link);
+		obj = list_first_entry(&stolen, typeof(*obj), tmp_link);
 		seq_puts(m, "   ");
 		describe_obj(m, obj);
 		seq_putc(m, '\n');
-		list_del_init(&obj->obj_exec_link);
+		list_del(&obj->tmp_link);
 	}
 	mutex_unlock(&dev->struct_mutex);
 
@@ -1996,6 +1996,11 @@ static int i915_context_status(struct seq_file *m, void *unused)
 			seq_putc(m, '\n');
 		}
 
+		seq_printf(m, "\tvma hashtable size=%u (actual %u), count=%u\n",
+			   ctx->vma.ht_size,
+			   1 << ctx->vma.ht_bits,
+			   ctx->vma.ht_count);
+
 		seq_putc(m, '\n');
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 049ac53b05da..e7e7840d5a68 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -37,7 +37,7 @@
 #include <linux/i2c.h>
 #include <linux/i2c-algo-bit.h>
 #include <linux/backlight.h>
-#include <linux/hashtable.h>
+#include <linux/hash.h>
 #include <linux/intel-iommu.h>
 #include <linux/kref.h>
 #include <linux/pm_qos.h>
@@ -889,8 +889,11 @@ struct i915_ctx_hang_stats {
  */
 struct i915_gem_context {
 	struct kref ref;
+
 	struct drm_i915_private *i915;
 	struct drm_i915_file_private *file_priv;
+	struct list_head link;
+
 	struct i915_hw_ppgtt *ppgtt;
 	struct pid *pid;
 
@@ -919,7 +922,13 @@ struct i915_gem_context {
 	struct atomic_notifier_head status_notifier;
 	bool execlists_force_single_submission;
 
-	struct list_head link;
+	struct {
+		struct work_struct resize;
+		struct hlist_head *ht;
+		unsigned int ht_bits;
+		unsigned int ht_size;
+		unsigned int ht_count;
+	} vma;
 
 	u8 remap_slice;
 	bool closed:1;
@@ -2153,15 +2162,14 @@ struct drm_i915_gem_object {
 
 	/** List of VMAs backed by this object */
 	struct list_head vma_list;
+	struct i915_vma *vma_hashed;
 
 	/** Stolen memory for this object, instead of being backed by shmem. */
 	struct drm_mm_node *stolen;
 	struct list_head global_list;
 
-	/** Used in execbuf to temporarily hold a ref */
-	struct list_head obj_exec_link;
-
 	struct list_head batch_pool_link;
+	struct list_head tmp_link;
 
 	unsigned long flags;
 	/**
@@ -3123,6 +3131,7 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
 		  u32 flags);
 void __i915_vma_set_map_and_fenceable(struct i915_vma *vma);
 int __must_check i915_vma_unbind(struct i915_vma *vma);
+void i915_vma_unlink_ctx(struct i915_vma *vma);
 void i915_vma_close(struct i915_vma *vma);
 void i915_vma_destroy(struct i915_vma *vma);
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index e432211e8b24..0ca3ef547136 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2679,6 +2679,10 @@ void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
 		if (vma->vm->file == fpriv)
 			i915_vma_close(vma);
 
+	vma = obj->vma_hashed;
+	if (vma && vma->ctx->file_priv == fpriv)
+		i915_vma_unlink_ctx(vma);
+
 	if (i915_gem_object_is_active(obj) &&
 	    !i915_gem_object_has_active_reference(obj)) {
 		i915_gem_object_set_active_reference(obj);
@@ -4065,7 +4069,6 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
 				    i915_gem_object_retire__read);
 	init_request_active(&obj->last_write,
 			    i915_gem_object_retire__write);
-	INIT_LIST_HEAD(&obj->obj_exec_link);
 	INIT_LIST_HEAD(&obj->vma_list);
 	INIT_LIST_HEAD(&obj->batch_pool_link);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index ab54f208a4a9..3b4eec676d66 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -85,6 +85,7 @@
  *
  */
 
+#include <linux/log2.h>
 #include <drm/drmP.h>
 #include <drm/i915_drm.h>
 #include "i915_drv.h"
@@ -99,6 +100,9 @@
 #define GEN6_CONTEXT_ALIGN (64<<10)
 #define GEN7_CONTEXT_ALIGN 4096
 
+/* Initial size (as log2) to preallocate the handle->object hashtable */
+#define VMA_HT_BITS 2u /* 4 x 2 pointers, 64 bytes minimum */
+
 static size_t get_context_alignment(struct drm_i915_private *dev_priv)
 {
 	if (IS_GEN6(dev_priv))
@@ -134,6 +138,64 @@ static int get_context_size(struct drm_i915_private *dev_priv)
 	return ret;
 }
 
+static void resize_vma_ht(struct work_struct *work)
+{
+	struct i915_gem_context *ctx =
+		container_of(work, typeof(*ctx), vma.resize);
+	unsigned int size, bits, new_bits, i;
+	struct hlist_head *new_ht;
+
+	bits = 1 + ilog2(4*ctx->vma.ht_count/3);
+	new_bits = min_t(unsigned int,
+			 max(bits, VMA_HT_BITS),
+			 sizeof(unsigned int)*8);
+	if (new_bits == ctx->vma.ht_bits)
+		goto out;
+
+	new_ht = kzalloc(sizeof(*new_ht)<<new_bits, GFP_KERNEL | __GFP_NOWARN);
+	if (!new_ht)
+		new_ht = vzalloc(sizeof(*new_ht)<<new_bits);
+	if (!new_ht)
+		/* pretend the resize suceeded and stop calling us for a bit! */
+		goto out;
+
+	size = 1 << ctx->vma.ht_bits;
+	for (i = 0; i < size; i++) {
+		struct i915_vma *vma;
+		struct hlist_node *tmp;
+
+		hlist_for_each_entry_safe(vma, tmp, &ctx->vma.ht[i], ctx_node)
+			hlist_add_head(&vma->ctx_node,
+				       &new_ht[hash_32(vma->ctx_handle,
+						       new_bits)]);
+	}
+	kvfree(ctx->vma.ht);
+	ctx->vma.ht = new_ht;
+	ctx->vma.ht_bits = new_bits;
+	smp_wmb();
+out:
+	ctx->vma.ht_size = 1 << bits;
+}
+
+static void decouple_vma(struct i915_gem_context *ctx)
+{
+	unsigned int i, size;
+
+	if (ctx->vma.ht_size & 1)
+		cancel_work_sync(&ctx->vma.resize);
+
+	size = 1 << ctx->vma.ht_bits;
+	for (i = 0; i < size; i++) {
+		struct i915_vma *vma;
+
+		hlist_for_each_entry(vma, &ctx->vma.ht[i], ctx_node) {
+			vma->obj->vma_hashed = NULL;
+			vma->ctx = NULL;
+		}
+	}
+	kvfree(ctx->vma.ht);
+}
+
 void i915_gem_context_free(struct kref *ctx_ref)
 {
 	struct i915_gem_context *ctx = container_of(ctx_ref, typeof(*ctx), ref);
@@ -143,6 +205,7 @@ void i915_gem_context_free(struct kref *ctx_ref)
 	trace_i915_context_free(ctx);
 	GEM_BUG_ON(!ctx->closed);
 
+	decouple_vma(ctx);
 	i915_ppgtt_put(ctx->ppgtt);
 
 	for (i = 0; i < I915_NUM_ENGINES; i++) {
@@ -159,6 +222,7 @@ void i915_gem_context_free(struct kref *ctx_ref)
 	}
 
 	put_pid(ctx->pid);
+
 	list_del(&ctx->link);
 
 	ida_simple_remove(&ctx->i915->context_hw_ida, ctx->hw_id);
@@ -281,6 +345,15 @@ __create_hw_context(struct drm_device *dev,
 
 	ctx->ggtt_alignment = get_context_alignment(dev_priv);
 
+	ctx->vma.ht_bits = VMA_HT_BITS;
+	ctx->vma.ht_size = 1 << ctx->vma.ht_bits;
+	ctx->vma.ht = kzalloc(sizeof(*ctx->vma.ht)*ctx->vma.ht_size,
+			      GFP_KERNEL);
+	if (!ctx->vma.ht)
+		goto err_out;
+
+	INIT_WORK(&ctx->vma.resize, resize_vma_ht);
+
 	if (dev_priv->hw_context_size) {
 		struct drm_i915_gem_object *obj;
 		struct i915_vma *vma;
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 82496dd73309..35751e855859 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -70,38 +70,33 @@ struct i915_execbuffer {
 		unsigned int page;
 		bool use_64bit_reloc;
 	} reloc_cache;
-	int and;
-	union {
-		struct i915_vma **lut;
-		struct hlist_head *buckets;
-	};
+	int lut_mask;
+	struct hlist_head *buckets;
 };
 
 static int
 eb_create(struct i915_execbuffer *eb)
 {
-	eb->lut = NULL;
-	if (eb->args->flags & I915_EXEC_HANDLE_LUT) {
-		unsigned int size = eb->args->buffer_count;
-		size *= sizeof(struct i915_vma *);
-		eb->lut = kmalloc(size,
-				  GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
-	}
-
-	if (!eb->lut) {
-		unsigned int size = eb->args->buffer_count;
-		unsigned int count = PAGE_SIZE / sizeof(struct hlist_head) / 2;
-		BUILD_BUG_ON_NOT_POWER_OF_2(PAGE_SIZE / sizeof(struct hlist_head));
-		while (count > 2*size)
-			count >>= 1;
-		eb->lut = kzalloc(count*sizeof(struct hlist_head),
-				  GFP_TEMPORARY);
-		if (!eb->lut)
-			return -ENOMEM;
-
-		eb->and = count - 1;
+	if ((eb->args->flags & I915_EXEC_HANDLE_LUT) == 0) {
+		unsigned int size = 1 + ilog2(eb->args->buffer_count);
+
+		do {
+			eb->buckets = kzalloc(sizeof(struct hlist_head) << size,
+					     GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
+			if (eb->buckets)
+				break;
+		} while (--size);
+
+		if (unlikely(!eb->buckets)) {
+			eb->buckets = kzalloc(sizeof(struct hlist_head),
+					      GFP_TEMPORARY);
+			if (unlikely(!eb->buckets))
+				return -ENOMEM;
+		}
+
+		eb->lut_mask = size;
 	} else
-		eb->and = -eb->args->buffer_count;
+		eb->lut_mask = -eb->args->buffer_count;
 
 	return 0;
 }
@@ -136,72 +131,103 @@ eb_reset(struct i915_execbuffer *eb)
 		vma->exec_entry = NULL;
 	}
 
-	if (eb->and >= 0)
-		memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head));
+	if (eb->lut_mask >= 0)
+		memset(eb->buckets, 0,
+		       (1<<eb->lut_mask)*sizeof(struct hlist_head));
 }
 
-static struct i915_vma *
-eb_get_batch(struct i915_execbuffer *eb)
+#define to_ptr(T, x) ((T *)(uintptr_t)(x))
+
+static bool
+eb_add_vma(struct i915_execbuffer *eb, struct i915_vma *vma, int i)
 {
-	struct i915_vma *vma = list_entry(eb->vmas.prev, typeof(*vma), exec_list);
+	if (unlikely(vma->exec_entry)) {
+		DRM_DEBUG("Object [handle %d, index %d] appears more than once in object list\n",
+			  eb->exec[i].handle, i);
+		return false;
+	}
+	list_add_tail(&vma->exec_list, &eb->vmas);
 
-	/*
-	 * SNA is doing fancy tricks with compressing batch buffers, which leads
-	 * to negative relocation deltas. Usually that works out ok since the
-	 * relocate address is still positive, except when the batch is placed
-	 * very low in the GTT. Ensure this doesn't happen.
-	 *
-	 * Note that actual hangs have only been observed on gen7, but for
-	 * paranoia do it everywhere.
-	 */
-	if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0)
-		vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
+	vma->exec_entry = &eb->exec[i];
+	if (eb->lut_mask >= 0) {
+		vma->exec_handle = eb->exec[i].handle;
+		hlist_add_head(&vma->exec_node,
+			       &eb->buckets[hash_32(vma->exec_handle,
+						    eb->lut_mask)]);
+	}
 
-	return vma;
+	eb->exec[i].rsvd2 = (uintptr_t)vma;
+	return true;
+}
+
+static inline struct hlist_head *ht_head(struct i915_gem_context *ctx,
+					 u32 handle)
+{
+	return &ctx->vma.ht[hash_32(handle, ctx->vma.ht_bits)];
 }
 
 static int
 eb_lookup_vmas(struct i915_execbuffer *eb)
 {
-	struct drm_i915_gem_object *obj;
-	struct list_head objects;
-	int i, ret;
+	const int count = eb->args->buffer_count;
+	struct i915_vma *vma;
+	int slow_pass = -1;
+	int i;
 
 	INIT_LIST_HEAD(&eb->vmas);
 
-	INIT_LIST_HEAD(&objects);
+	if (unlikely(eb->ctx->vma.ht_size & 1))
+		flush_work(&eb->ctx->vma.resize);
+	for (i = 0; i < count; i++) {
+		eb->exec[i].rsvd2 = 0;
+
+		hlist_for_each_entry(vma,
+				     ht_head(eb->ctx, eb->exec[i].handle),
+				     ctx_node) {
+			if (vma->ctx_handle != eb->exec[i].handle)
+				continue;
+
+			if (!eb_add_vma(eb, vma, i))
+				return -EINVAL;
+
+			goto next_vma;
+		}
+
+		if (slow_pass < 0)
+			slow_pass = i;
+next_vma: ;
+	}
+
+	if (slow_pass < 0)
+		return 0;
+
 	spin_lock(&eb->file->table_lock);
 	/* Grab a reference to the object and release the lock so we can lookup
 	 * or create the VMA without using GFP_ATOMIC */
-	for (i = 0; i < eb->args->buffer_count; i++) {
-		obj = to_intel_bo(idr_find(&eb->file->object_idr, eb->exec[i].handle));
-		if (obj == NULL) {
-			spin_unlock(&eb->file->table_lock);
-			DRM_DEBUG("Invalid object handle %d at index %d\n",
-				   eb->exec[i].handle, i);
-			ret = -ENOENT;
-			goto err;
-		}
+	for (i = slow_pass; i < count; i++) {
+		struct drm_i915_gem_object *obj;
+
+		if (eb->exec[i].rsvd2)
+			continue;
 
-		if (!list_empty(&obj->obj_exec_link)) {
+		obj = to_intel_bo(idr_find(&eb->file->object_idr,
+					   eb->exec[i].handle));
+		if (unlikely(!obj)) {
 			spin_unlock(&eb->file->table_lock);
-			DRM_DEBUG("Object %p [handle %d, index %d] appears more than once in object list\n",
-				   obj, eb->exec[i].handle, i);
-			ret = -EINVAL;
-			goto err;
+			DRM_DEBUG("Invalid object handle %d at index %d\n",
+				  eb->exec[i].handle, i);
+			return -ENOENT;
 		}
 
-		list_add_tail(&obj->obj_exec_link, &objects);
+		eb->exec[i].rsvd2 = 1 | (uintptr_t)obj;
 	}
 	spin_unlock(&eb->file->table_lock);
 
-	i = 0;
-	while (!list_empty(&objects)) {
-		struct i915_vma *vma;
+	for (i = slow_pass; i < count; i++) {
+		struct drm_i915_gem_object *obj;
 
-		obj = list_first_entry(&objects,
-				       struct drm_i915_gem_object,
-				       obj_exec_link);
+		if ((eb->exec[i].rsvd2 & 1) == 0)
+			continue;
 
 		/*
 		 * NOTE: We can leak any vmas created here when something fails
@@ -211,61 +237,72 @@ eb_lookup_vmas(struct i915_execbuffer *eb)
 		 * from the (obj, vm) we don't run the risk of creating
 		 * duplicated vmas for the same vm.
 		 */
+		obj = to_ptr(struct drm_i915_gem_object, eb->exec[i].rsvd2 & ~1);
 		vma = i915_gem_obj_lookup_or_create_vma(obj, eb->vm, NULL);
 		if (unlikely(IS_ERR(vma))) {
 			DRM_DEBUG("Failed to lookup VMA\n");
-			ret = PTR_ERR(vma);
-			goto err;
+			return PTR_ERR(vma);
 		}
 
-		/* Transfer ownership from the objects list to the vmas list. */
-		list_add_tail(&vma->exec_list, &eb->vmas);
-		list_del_init(&obj->obj_exec_link);
-
-		vma->exec_entry = &eb->exec[i];
-		if (eb->and < 0) {
-			eb->lut[i] = vma;
-		} else {
-			u32 handle =
-				eb->args->flags & I915_EXEC_HANDLE_LUT ?
-				i : eb->exec[i].handle;
-			vma->exec_handle = handle;
-			hlist_add_head(&vma->exec_node,
-				       &eb->buckets[handle & eb->and]);
+		/* First come, first served */
+		if (!vma->ctx) {
+			vma->ctx = eb->ctx;
+			vma->ctx_handle = eb->exec[i].handle;
+			hlist_add_head(&vma->ctx_node,
+				       ht_head(eb->ctx, eb->exec[i].handle));
+			eb->ctx->vma.ht_count++;
+			if (i915_vma_is_ggtt(vma)) {
+				GEM_BUG_ON(obj->vma_hashed);
+				obj->vma_hashed = vma;
+			}
 		}
-		++i;
+
+		if (!eb_add_vma(eb, vma, i))
+			return -EINVAL;
+	}
+	if (4*eb->ctx->vma.ht_count > 3*eb->ctx->vma.ht_size ||
+	    4*eb->ctx->vma.ht_count < eb->ctx->vma.ht_size) {
+		eb->ctx->vma.ht_size |= 1;
+		queue_work(system_highpri_wq, &eb->ctx->vma.resize);
 	}
 
 	return 0;
+}
+
+static struct i915_vma *
+eb_get_batch(struct i915_execbuffer *eb)
+{
+	struct i915_vma *vma;
 
+	vma = to_ptr(struct i915_vma, eb->exec[eb->args->buffer_count-1].rsvd2);
 
-err:
-	while (!list_empty(&objects)) {
-		obj = list_first_entry(&objects,
-				       struct drm_i915_gem_object,
-				       obj_exec_link);
-		list_del_init(&obj->obj_exec_link);
-		i915_gem_object_put(obj);
-	}
 	/*
-	 * Objects already transfered to the vmas list will be unreferenced by
-	 * eb_destroy.
+	 * SNA is doing fancy tricks with compressing batch buffers, which leads
+	 * to negative relocation deltas. Usually that works out ok since the
+	 * relocate address is still positive, except when the batch is placed
+	 * very low in the GTT. Ensure this doesn't happen.
+	 *
+	 * Note that actual hangs have only been observed on gen7, but for
+	 * paranoia do it everywhere.
 	 */
+	if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0)
+		vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
 
-	return ret;
+	return vma;
 }
 
-static struct i915_vma *eb_get_vma(struct i915_execbuffer *eb, unsigned long handle)
+static struct i915_vma *
+eb_get_vma(struct i915_execbuffer *eb, unsigned long handle)
 {
-	if (eb->and < 0) {
-		if (handle >= -eb->and)
+	if (eb->lut_mask < 0) {
+		if (handle >= -eb->lut_mask)
 			return NULL;
-		return eb->lut[handle];
+		return to_ptr(struct i915_vma, eb->exec[handle].rsvd2);
 	} else {
 		struct hlist_head *head;
 		struct i915_vma *vma;
 
-		head = &eb->buckets[handle & eb->and];
+		head = &eb->buckets[hash_32(handle, eb->lut_mask)];
 		hlist_for_each_entry(vma, head, exec_node) {
 			if (vma->exec_handle == handle)
 				return vma;
@@ -288,7 +325,7 @@ static void eb_destroy(struct i915_execbuffer *eb)
 
 	i915_gem_context_put(eb->ctx);
 
-	if (eb->buckets)
+	if (eb->lut_mask >= 0)
 		kfree(eb->buckets);
 }
 
@@ -916,7 +953,7 @@ static int eb_reserve(struct i915_execbuffer *eb)
 			entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE;
 		need_fence =
 			entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
-			i915_gem_object_is_tiled(obj);
+			i915_gem_object_is_tiled(vma->obj);
 		need_mappable = need_fence || need_reloc_mappable(vma);
 
 		if (entry->flags & EXEC_OBJECT_PINNED)
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index f51e483569b9..b07a84f2603f 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -3333,11 +3333,31 @@ void i915_vma_destroy(struct i915_vma *vma)
 	kmem_cache_free(to_i915(vma->obj->base.dev)->vmas, vma);
 }
 
+void i915_vma_unlink_ctx(struct i915_vma *vma)
+{
+	struct i915_gem_context *ctx = vma->ctx;
+
+	if (ctx->vma.ht_size & 1) {
+		cancel_work_sync(&ctx->vma.resize);
+		ctx->vma.ht_size &= ~1;
+	}
+
+	__hlist_del(&vma->ctx_node);
+	ctx->vma.ht_count--;
+
+	if (i915_vma_is_ggtt(vma))
+		vma->obj->vma_hashed = NULL;
+	vma->ctx = NULL;
+}
+
 void i915_vma_close(struct i915_vma *vma)
 {
 	GEM_BUG_ON(i915_vma_is_closed(vma));
 	vma->flags |= I915_VMA_CLOSED;
 
+	if (vma->ctx)
+		i915_vma_unlink_ctx(vma);
+
 	list_del_init(&vma->obj_link);
 	if (!i915_vma_is_active(vma) && !i915_vma_is_pinned(vma))
 		WARN_ON(i915_vma_unbind(vma));
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index a90999fe2e57..f9540683d2c0 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -166,6 +166,7 @@ struct i915_ggtt_view {
 extern const struct i915_ggtt_view i915_ggtt_view_normal;
 extern const struct i915_ggtt_view i915_ggtt_view_rotated;
 
+struct i915_gem_context;
 enum i915_cache_level;
 
 /**
@@ -226,6 +227,7 @@ struct i915_vma {
 	struct list_head vm_link;
 
 	struct list_head obj_link; /* Link in the object's VMA list */
+	struct hlist_node obj_node;
 
 	/** This vma's place in the batchbuffer or on the eviction list */
 	struct list_head exec_list;
@@ -234,8 +236,12 @@ struct i915_vma {
 	 * Used for performing relocations during execbuffer insertion.
 	 */
 	struct hlist_node exec_node;
-	unsigned long exec_handle;
 	struct drm_i915_gem_exec_object2 *exec_entry;
+	u32 exec_handle;
+
+	struct i915_gem_context *ctx;
+	struct hlist_node ctx_node;
+	u32 ctx_handle;
 };
 
 struct i915_vma *
-- 
2.9.3