[Intel-gfx] [PATCH 18/24] drm/i915: Convert execbuf to use struct-of-array packing for critical fields

Chris Wilson chris at chris-wilson.co.uk
Thu May 18 09:46:32 UTC 2017


When userspace is doing most of the work, avoiding relocs (using
NO_RELOC) and opting out of implicit synchronisation (using ASYNC), we
still spend a lot of time processing the arrays in execbuf, even though
we now should have nothing to do most of the time. One issue that
becomes readily apparent in profiling anv is that iterating over the
large execobj[] is unfriendly to the loop prefetchers of the CPU and it
much prefers iterating over a pair of arrays rather than one big array.

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_evict.c      |   4 +-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 224 +++++++++++++++--------------
 drivers/gpu/drm/i915/i915_vma.h            |   2 +-
 3 files changed, 118 insertions(+), 112 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index a193f1b36c67..4df039ef2ce3 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -318,8 +318,8 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
 		/* Overlap of objects in the same batch? */
 		if (i915_vma_is_pinned(vma)) {
 			ret = -ENOSPC;
-			if (vma->exec_entry &&
-			    vma->exec_entry->flags & EXEC_OBJECT_PINNED)
+			if (vma->exec_flags &&
+			    *vma->exec_flags & EXEC_OBJECT_PINNED)
 				ret = -EINVAL;
 			break;
 		}
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index f5a0e419bfec..d6eb38b12976 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -47,13 +47,13 @@ enum {
 #define DBG_FORCE_RELOC 0 /* choose one of the above! */
 };
 
-#define  __EXEC_OBJECT_HAS_REF		BIT(31)
-#define  __EXEC_OBJECT_HAS_PIN		BIT(30)
-#define  __EXEC_OBJECT_HAS_FENCE	BIT(29)
+#define  __LUT_HAS_REF		BIT(31)
+#define  __LUT_HAS_PIN		BIT(30)
+#define  __LUT_HAS_FENCE	BIT(29)
 #define  __EXEC_OBJECT_NEEDS_MAP	BIT(28)
 #define  __EXEC_OBJECT_NEEDS_BIAS	BIT(27)
-#define  __EXEC_OBJECT_INTERNAL_FLAGS (~0u << 27) /* all of the above */
-#define __EB_RESERVED (__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_FENCE)
+#define  __LUT_INTERNAL_FLAGS (~0u << 27) /* all of the above */
+#define __LUT_RESERVED (__LUT_HAS_PIN | __LUT_HAS_FENCE)
 
 #define __EXEC_HAS_RELOC	BIT(31)
 #define __EXEC_VALIDATED	BIT(30)
@@ -191,6 +191,8 @@ struct i915_execbuffer {
 	struct drm_file *file; /** per-file lookup tables and limits */
 	struct drm_i915_gem_execbuffer2 *args; /** ioctl parameters */
 	struct drm_i915_gem_exec_object2 *exec; /** ioctl execobj[] */
+	struct i915_vma **vma;
+	unsigned int *flags;
 
 	struct intel_engine_cs *engine; /** engine to queue the request to */
 	struct i915_gem_context *ctx; /** context for building the request */
@@ -245,14 +247,6 @@ struct i915_execbuffer {
 };
 
 /*
- * As an alternative to creating a hashtable of handle-to-vma for a batch,
- * we used the last available reserved field in the execobject[] and stash
- * a link from the execobj to its vma.
- */
-#define __exec_to_vma(ee) (ee)->rsvd2
-#define exec_to_vma(ee) u64_to_ptr(struct i915_vma, __exec_to_vma(ee))
-
-/*
  * Used to convert any address to canonical form.
  * Starting from gen8, some commands (e.g. STATE_BASE_ADDRESS,
  * MI_LOAD_REGISTER_MEM and others, see Broadwell PRM Vol2a) require the
@@ -316,7 +310,7 @@ static bool
 eb_vma_misplaced(const struct drm_i915_gem_exec_object2 *entry,
 		 const struct i915_vma *vma)
 {
-	if (!(entry->flags & __EXEC_OBJECT_HAS_PIN))
+	if (!(*vma->exec_flags & __LUT_HAS_PIN))
 		return true;
 
 	if (vma->node.size < entry->pad_to_size)
@@ -342,7 +336,7 @@ eb_vma_misplaced(const struct drm_i915_gem_exec_object2 *entry,
 
 static inline void
 eb_pin_vma(struct i915_execbuffer *eb,
-	   struct drm_i915_gem_exec_object2 *entry,
+	   const struct drm_i915_gem_exec_object2 *entry,
 	   struct i915_vma *vma)
 {
 	u64 flags;
@@ -366,31 +360,28 @@ eb_pin_vma(struct i915_execbuffer *eb,
 		}
 
 		if (i915_vma_pin_fence(vma))
-			entry->flags |= __EXEC_OBJECT_HAS_FENCE;
+			*vma->exec_flags |= __LUT_HAS_FENCE;
 	}
 
-	entry->flags |= __EXEC_OBJECT_HAS_PIN;
+	*vma->exec_flags |= __LUT_HAS_PIN;
 }
 
-static inline void
-__eb_unreserve_vma(struct i915_vma *vma,
-		   const struct drm_i915_gem_exec_object2 *entry)
+static inline void __eb_unreserve_vma(struct i915_vma *vma, unsigned int flags)
 {
-	GEM_BUG_ON(!(entry->flags & __EXEC_OBJECT_HAS_PIN));
+	GEM_BUG_ON(!(flags & __LUT_HAS_PIN));
 
-	if (unlikely(entry->flags & __EXEC_OBJECT_HAS_FENCE))
+	if (unlikely(flags & __LUT_HAS_FENCE))
 		i915_vma_unpin_fence(vma);
 
 	__i915_vma_unpin(vma);
 }
 
 static inline void
-eb_unreserve_vma(struct i915_vma *vma,
-		 struct drm_i915_gem_exec_object2 *entry)
+eb_unreserve_vma(struct i915_vma *vma, unsigned int *flags)
 {
-	if (entry->flags & __EXEC_OBJECT_HAS_PIN) {
-		__eb_unreserve_vma(vma, entry);
-		entry->flags &= ~__EB_RESERVED;
+	if (*flags & __LUT_HAS_PIN) {
+		__eb_unreserve_vma(vma, *flags);
+		*flags &= ~__LUT_RESERVED;
 	}
 }
 
@@ -430,7 +421,7 @@ eb_validate_vma(struct i915_execbuffer *eb,
 		entry->pad_to_size = 0;
 	}
 
-	if (unlikely(vma->exec_entry)) {
+	if (unlikely(vma->exec_flags)) {
 		DRM_DEBUG("Object [handle %d, index %d] appears more than once in object list\n",
 			  entry->handle, (int)(entry - eb->exec));
 		return -EINVAL;
@@ -440,10 +431,9 @@ eb_validate_vma(struct i915_execbuffer *eb,
 }
 
 static int
-eb_add_vma(struct i915_execbuffer *eb,
-	   struct drm_i915_gem_exec_object2 *entry,
-	   struct i915_vma *vma)
+eb_add_vma(struct i915_execbuffer *eb, unsigned int i, struct i915_vma *vma)
 {
+	struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
 	int err;
 
 	GEM_BUG_ON(i915_vma_is_closed(vma));
@@ -454,15 +444,6 @@ eb_add_vma(struct i915_execbuffer *eb,
 			return err;
 	}
 
-	/*
-	 * Stash a pointer from the vma to execobj, so we can query its flags,
-	 * size, alignment etc as provided by the user. Also we stash a pointer
-	 * to the vma inside the execobj so that we can use a direct lookup
-	 * to find the right target VMA when doing relocations.
-	 */
-	vma->exec_entry = entry;
-	__exec_to_vma(entry) = (uintptr_t)vma;
-
 	if (eb->lut_size >= 0) {
 		vma->exec_handle = entry->handle;
 		hlist_add_head(&vma->exec_node,
@@ -485,10 +466,20 @@ eb_add_vma(struct i915_execbuffer *eb,
 	if (!(entry->flags & EXEC_OBJECT_PINNED))
 		entry->flags |= eb->context_flags;
 
+	/*
+	 * Stash a pointer from the vma to execobj, so we can query its flags,
+	 * size, alignment etc as provided by the user. Also we stash a pointer
+	 * to the vma inside the execobj so that we can use a direct lookup
+	 * to find the right target VMA when doing relocations.
+	 */
+	eb->vma[i] = vma;
+	eb->flags[i] = entry->flags;
+	vma->exec_flags = &eb->flags[i];
+
 	err = 0;
 	eb_pin_vma(eb, entry, vma);
 	if (eb_vma_misplaced(entry, vma)) {
-		eb_unreserve_vma(vma, entry);
+		eb_unreserve_vma(vma, vma->exec_flags);
 
 		list_add_tail(&vma->exec_link, &eb->unbound);
 		if (drm_mm_node_allocated(&vma->node))
@@ -522,7 +513,8 @@ static inline int use_cpu_reloc(const struct reloc_cache *cache,
 static int
 eb_reserve_vma(struct i915_execbuffer *eb, struct i915_vma *vma)
 {
-	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
+	struct drm_i915_gem_exec_object2 *entry =
+		&eb->exec[vma->exec_flags - eb->flags];
 	u64 flags;
 	int err;
 
@@ -559,7 +551,7 @@ eb_reserve_vma(struct i915_execbuffer *eb, struct i915_vma *vma)
 		eb->args->flags |= __EXEC_HAS_RELOC;
 	}
 
-	entry->flags |= __EXEC_OBJECT_HAS_PIN;
+	*vma->exec_flags |= __LUT_HAS_PIN;
 	GEM_BUG_ON(eb_vma_misplaced(entry, vma));
 
 	if (unlikely(entry->flags & EXEC_OBJECT_NEEDS_FENCE)) {
@@ -570,7 +562,7 @@ eb_reserve_vma(struct i915_execbuffer *eb, struct i915_vma *vma)
 		}
 
 		if (i915_vma_pin_fence(vma))
-			entry->flags |= __EXEC_OBJECT_HAS_FENCE;
+			*vma->exec_flags |= __LUT_HAS_FENCE;
 	}
 
 	return 0;
@@ -613,18 +605,17 @@ static int eb_reserve(struct i915_execbuffer *eb)
 		INIT_LIST_HEAD(&eb->unbound);
 		INIT_LIST_HEAD(&last);
 		for (i = 0; i < count; i++) {
-			struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
+			unsigned int flags = eb->flags[i];
+			struct i915_vma *vma = eb->vma[i];
 
-			if (entry->flags & EXEC_OBJECT_PINNED &&
-			    entry->flags & __EXEC_OBJECT_HAS_PIN)
+			if (flags & EXEC_OBJECT_PINNED && flags & __LUT_HAS_PIN)
 				continue;
 
-			vma = exec_to_vma(entry);
-			eb_unreserve_vma(vma, entry);
+			eb_unreserve_vma(vma, &eb->flags[i]);
 
-			if (entry->flags & EXEC_OBJECT_PINNED)
+			if (flags & EXEC_OBJECT_PINNED)
 				list_add(&vma->exec_link, &eb->unbound);
-			else if (entry->flags & __EXEC_OBJECT_NEEDS_MAP)
+			else if (flags & __EXEC_OBJECT_NEEDS_MAP)
 				list_add_tail(&vma->exec_link, &eb->unbound);
 			else
 				list_add_tail(&vma->exec_link, &last);
@@ -712,23 +703,23 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
 	GEM_BUG_ON(lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS);
 
 	for (i = 0; i < count; i++) {
-		__exec_to_vma(&eb->exec[i]) = 0;
-
 		hlist_for_each_entry(vma,
 				     ht_head(lut, eb->exec[i].handle),
 				     ctx_node) {
 			if (vma->ctx_handle != eb->exec[i].handle)
 				continue;
 
-			err = eb_add_vma(eb, &eb->exec[i], vma);
+			err = eb_add_vma(eb, i, vma);
 			if (unlikely(err))
 				return err;
-
 			goto next_vma;
 		}
 
 		if (slow_pass < 0)
 			slow_pass = i;
+
+		eb->vma[i] = NULL;
+
 next_vma: ;
 	}
 
@@ -744,7 +735,7 @@ next_vma: ;
 	for (i = slow_pass; i < count; i++) {
 		struct drm_i915_gem_object *obj;
 
-		if (__exec_to_vma(&eb->exec[i]))
+		if (eb->vma[i])
 			continue;
 
 		obj = to_intel_bo(idr_find(idr, eb->exec[i].handle));
@@ -756,14 +747,17 @@ next_vma: ;
 			goto err;
 		}
 
-		__exec_to_vma(&eb->exec[i]) = INTERMEDIATE | (uintptr_t)obj;
+		eb->vma[i] =
+			(struct i915_vma *)ptr_pack_bits(obj, INTERMEDIATE, 1);
 	}
 	spin_unlock(&eb->file->table_lock);
 
 	for (i = slow_pass; i < count; i++) {
 		struct drm_i915_gem_object *obj;
+		unsigned int is_obj;
 
-		if (!(__exec_to_vma(&eb->exec[i]) & INTERMEDIATE))
+		obj = (typeof(obj))ptr_unpack_bits(eb->vma[i], &is_obj, 1);
+		if (!is_obj)
 			continue;
 
 		/*
@@ -774,8 +768,6 @@ next_vma: ;
 		 * from the (obj, vm) we don't run the risk of creating
 		 * duplicated vmas for the same vm.
 		 */
-		obj = u64_to_ptr(typeof(*obj),
-				 __exec_to_vma(&eb->exec[i]) & ~INTERMEDIATE);
 		vma = i915_vma_instance(obj, eb->vm, NULL);
 		if (unlikely(IS_ERR(vma))) {
 			DRM_DEBUG("Failed to lookup VMA\n");
@@ -799,14 +791,14 @@ next_vma: ;
 			i915_vma_get(vma);
 		}
 
-		err = eb_add_vma(eb, &eb->exec[i], vma);
+		err = eb_add_vma(eb, i, vma);
 		if (unlikely(err))
 			goto err;
 
 		/* Only after we validated the user didn't use our bits */
 		if (vma->ctx != eb->ctx) {
 			i915_vma_get(vma);
-			eb->exec[i].flags |= __EXEC_OBJECT_HAS_REF;
+			eb->flags[i] |= __LUT_HAS_REF;
 		}
 	}
 
@@ -820,7 +812,8 @@ next_vma: ;
 out:
 	/* take note of the batch buffer before we might reorder the lists */
 	i = eb_batch_index(eb);
-	eb->batch = exec_to_vma(&eb->exec[i]);
+	eb->batch = eb->vma[i];
+	GEM_BUG_ON(eb->batch->exec_flags != &eb->flags[i]);
 
 	/*
 	 * SNA is doing fancy tricks with compressing batch buffers, which leads
@@ -841,8 +834,8 @@ next_vma: ;
 
 err:
 	for (i = slow_pass; i < count; i++) {
-		if (__exec_to_vma(&eb->exec[i]) & INTERMEDIATE)
-			__exec_to_vma(&eb->exec[i]) = 0;
+		if (ptr_unmask_bits(eb->vma[i], 1))
+			eb->vma[i] = NULL;
 	}
 	lut->ht_size &= ~I915_CTX_RESIZE_IN_PROGRESS;
 	return err;
@@ -855,7 +848,7 @@ eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle)
 	if (eb->lut_size < 0) {
 		if (handle >= -eb->lut_size)
 			return NULL;
-		return exec_to_vma(&eb->exec[handle]);
+		return eb->vma[handle];
 	} else {
 		struct hlist_head *head;
 		struct i915_vma *vma;
@@ -875,13 +868,16 @@ static void eb_reset_vmas(const struct i915_execbuffer *eb)
 	unsigned int i;
 
 	for (i = 0; i < count; i++) {
-		struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
-		struct i915_vma *vma = exec_to_vma(entry);
+		struct i915_vma *vma = eb->vma[i];
+		unsigned int *flags = &eb->flags[i];
 
-		eb_unreserve_vma(vma, entry);
-		vma->exec_entry = NULL;
-		if (unlikely(entry->flags & __EXEC_OBJECT_HAS_REF))
+		eb_unreserve_vma(vma, flags);
+		vma->exec_flags = NULL;
+
+		if (unlikely(*flags & __LUT_HAS_REF)) {
 			i915_vma_put(vma);
+			*flags &= ~__LUT_HAS_REF;
+		}
 	}
 
 	if (eb->lut_size >= 0)
@@ -898,17 +894,18 @@ static void eb_release_vmas(const struct i915_execbuffer *eb)
 		return;
 
 	for (i = 0; i < count; i++) {
-		struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
-		struct i915_vma *vma = exec_to_vma(entry);
+		struct i915_vma *vma = eb->vma[i];
+		unsigned int *flags = &eb->flags[i];
 
-		if (!vma || !vma->exec_entry)
+		if (!vma)
 			continue;
 
-		GEM_BUG_ON(vma->exec_entry != entry);
-		if (entry->flags & __EXEC_OBJECT_HAS_PIN)
-			__eb_unreserve_vma(vma, entry);
-		vma->exec_entry = NULL;
-		if (unlikely(entry->flags & __EXEC_OBJECT_HAS_REF))
+		GEM_BUG_ON(vma->exec_flags != &eb->flags[i]);
+
+		eb_unreserve_vma(vma, flags);
+		vma->exec_flags = NULL;
+
+		if (unlikely(*flags & __LUT_HAS_REF))
 			i915_vma_put(vma);
 	}
 }
@@ -1390,7 +1387,7 @@ eb_relocate_entry(struct i915_execbuffer *eb,
 	}
 
 	if (reloc->write_domain) {
-		target->exec_entry->flags |= EXEC_OBJECT_WRITE;
+		*target->exec_flags |= EXEC_OBJECT_WRITE;
 
 		/*
 		 * Sandybridge PPGTT errata: We need a global gtt mapping
@@ -1442,7 +1439,7 @@ eb_relocate_entry(struct i915_execbuffer *eb,
 	 * do relocations we are already stalling, disable the user's opt
 	 * of our synchronisation.
 	 */
-	vma->exec_entry->flags &= ~EXEC_OBJECT_ASYNC;
+	*vma->exec_flags &= ~EXEC_OBJECT_ASYNC;
 
 	/* and update the user's relocation entry */
 	return relocate_entry(vma, reloc, eb, target);
@@ -1453,7 +1450,8 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct i915_vma *vma)
 #define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry))
 	struct drm_i915_gem_relocation_entry stack[N_RELOC(512)];
 	struct drm_i915_gem_relocation_entry __user *urelocs;
-	const struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
+	const struct drm_i915_gem_exec_object2 *entry =
+		&eb->exec[vma->exec_flags - eb->flags];
 	unsigned int remain;
 
 	urelocs = u64_to_user_ptr(entry->relocs_ptr);
@@ -1536,7 +1534,8 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct i915_vma *vma)
 static int
 eb_relocate_vma_slow(struct i915_execbuffer *eb, struct i915_vma *vma)
 {
-	const struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
+	const struct drm_i915_gem_exec_object2 *entry =
+		&eb->exec[vma->exec_flags - eb->flags];
 	struct drm_i915_gem_relocation_entry *relocs =
 		u64_to_ptr(typeof(*relocs), entry->relocs_ptr);
 	unsigned int i;
@@ -1755,6 +1754,8 @@ static int eb_relocate_slow(struct i915_execbuffer *eb)
 	if (err)
 		goto err;
 
+	GEM_BUG_ON(!eb->batch);
+
 	list_for_each_entry(vma, &eb->relocs, reloc_link) {
 		if (!have_copy) {
 			pagefault_disable();
@@ -1828,11 +1829,10 @@ eb_move_to_gpu(struct i915_execbuffer *eb)
 	int err;
 
 	for (i = 0; i < count; i++) {
-		const struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
-		struct i915_vma *vma = exec_to_vma(entry);
-		struct drm_i915_gem_object *obj = vma->obj;
+		unsigned int flags = eb->flags[i];
+		struct drm_i915_gem_object *obj;
 
-		if (entry->flags & EXEC_OBJECT_CAPTURE) {
+		if (flags & EXEC_OBJECT_CAPTURE) {
 			struct i915_gem_capture_list *capture;
 
 			capture = kmalloc(sizeof(*capture), GFP_KERNEL);
@@ -1840,33 +1840,34 @@ eb_move_to_gpu(struct i915_execbuffer *eb)
 				return -ENOMEM;
 
 			capture->next = eb->request->capture_list;
-			capture->vma = vma;
+			capture->vma = eb->vma[i];
 			eb->request->capture_list = capture;
 		}
 
-		if (entry->flags & EXEC_OBJECT_ASYNC)
-			goto skip_flushes;
+		if (flags & EXEC_OBJECT_ASYNC)
+			continue;
 
+		obj = eb->vma[i]->obj;
 		if (obj->cache_dirty & ~obj->cache_coherent)
 			i915_gem_clflush_object(obj, 0);
 
 		err = i915_gem_request_await_object
-			(eb->request, obj, entry->flags & EXEC_OBJECT_WRITE);
+			(eb->request, obj, flags & EXEC_OBJECT_WRITE);
 		if (err)
 			return err;
-
-skip_flushes:
-		i915_vma_move_to_active(vma, eb->request, entry->flags);
-		__eb_unreserve_vma(vma, entry);
-		vma->exec_entry = NULL;
 	}
 
 	for (i = 0; i < count; i++) {
-		const struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
-		struct i915_vma *vma = exec_to_vma(entry);
+		struct i915_vma *vma = eb->vma[i];
+		unsigned int flags = eb->flags[i];
+
+		i915_vma_move_to_active(vma, eb->request, flags);
+		eb_export_fence(vma, eb->request, flags);
+
+		__eb_unreserve_vma(vma, flags);
+		vma->exec_flags = NULL;
 
-		eb_export_fence(vma, eb->request, entry->flags);
-		if (unlikely(entry->flags & __EXEC_OBJECT_HAS_REF))
+		if (unlikely(flags & __LUT_HAS_REF))
 			i915_vma_put(vma);
 	}
 	eb->exec = NULL;
@@ -1995,11 +1996,10 @@ static struct i915_vma *eb_parse(struct i915_execbuffer *eb, bool is_master)
 	if (IS_ERR(vma))
 		goto out;
 
-	vma->exec_entry =
-		memset(&eb->exec[eb->buffer_count++],
-		       0, sizeof(*vma->exec_entry));
-	vma->exec_entry->flags = __EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_REF;
-	__exec_to_vma(vma->exec_entry) = (uintptr_t)i915_vma_get(vma);
+	eb->vma[eb->buffer_count] = i915_vma_get(vma);
+	eb->flags[eb->buffer_count] = __LUT_HAS_PIN | __LUT_HAS_REF;
+	vma->exec_flags = &eb->flags[eb->buffer_count];
+	eb->buffer_count++;
 
 out:
 	i915_gem_object_unpin_pages(shadow_batch_obj);
@@ -2132,7 +2132,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	int out_fence_fd = -1;
 	int err;
 
-	BUILD_BUG_ON(__EXEC_OBJECT_INTERNAL_FLAGS & ~__EXEC_OBJECT_UNKNOWN_FLAGS);
+	BUILD_BUG_ON(__LUT_INTERNAL_FLAGS & ~__EXEC_OBJECT_UNKNOWN_FLAGS);
 
 	eb.i915 = to_i915(dev);
 	eb.file = file;
@@ -2141,6 +2141,8 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 		args->flags |= __EXEC_HAS_RELOC;
 	eb.exec = exec;
 	eb.ctx = NULL;
+	eb.vma = (struct i915_vma **)(exec + args->buffer_count + 1);
+	eb.flags = (unsigned int *)(eb.vma + args->buffer_count + 1);
 	eb.invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS;
 	if (USES_FULL_PPGTT(eb.i915))
 		eb.invalid_flags |= EXEC_OBJECT_NEEDS_GTT;
@@ -2228,7 +2230,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	if (err < 0)
 		goto err_vma;
 
-	if (unlikely(eb.batch->exec_entry->flags & EXEC_OBJECT_WRITE)) {
+	if (unlikely(*eb.batch->exec_flags & EXEC_OBJECT_WRITE)) {
 		DRM_DEBUG("Attempting to use self-modifying batch buffer\n");
 		err = -EINVAL;
 		goto err_vma;
@@ -2401,7 +2403,9 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
 				   sizeof(*exec_list),
 				   __GFP_NOWARN | GFP_TEMPORARY);
 	exec2_list = drm_malloc_gfp(args->buffer_count + 1,
-				    sizeof(*exec2_list),
+				    sizeof(*exec2_list) +
+				    sizeof(struct i915_vma *) +
+				    sizeof(unsigned int),
 				    __GFP_NOWARN | GFP_TEMPORARY);
 	if (exec_list == NULL || exec2_list == NULL) {
 		DRM_DEBUG("Failed to allocate exec list for %d buffers\n",
@@ -2477,7 +2481,9 @@ i915_gem_execbuffer2(struct drm_device *dev, void *data,
 
 	/* Allocate an extra slot for use by the command parser */
 	exec2_list = drm_malloc_gfp(args->buffer_count + 1,
-				    sizeof(*exec2_list),
+				    sizeof(*exec2_list) +
+				    sizeof(struct i915_vma *) +
+				    sizeof(unsigned int),
 				    __GFP_NOWARN | GFP_TEMPORARY);
 	if (exec2_list == NULL) {
 		DRM_DEBUG("Failed to allocate exec list for %d buffers\n",
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 3840b8cdc6b1..cbf0795465bf 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -112,7 +112,7 @@ struct i915_vma {
 	/**
 	 * Used for performing relocations during execbuffer insertion.
 	 */
-	struct drm_i915_gem_exec_object2 *exec_entry;
+	unsigned int *exec_flags;
 	struct hlist_node exec_node;
 	u32 exec_handle;
 
-- 
2.11.0



More information about the Intel-gfx mailing list