[Intel-gfx] [PATCH 6/8] drm/i915: Prepare gen7 cmdparser for async execution

Sat Dec 7 17:01:08 UTC 2019

The gen7 cmdparser is primarily a promotion-based system to allow access
to additional registers beyond the HW validation, and allows fallback to
normal execution of the user batch buffer if valid and requires
chaining. In the next patch, we will do the cmdparser validation in the
pipeline asynchronously and so at the point of request construction we
will not know if we want to execute the privileged and validated batch,
or the original user batch. The solution employed here is to execute
both batches, one with raised privileges and one as normal. This is
because the gen7 MI_BATCH_BUFFER_START command cannot change privilege
level within a batch and must strictly use the current privilege level
(or undefined behaviour kills the GPU). So in order to execute the
original batch, we need a second non-priviledged batch buffer chain from
the ring, i.e. we need to emit two batches for each user batch. Inside
the two batches we determine which one should actually execute, we
provide a conditional trampoline to call the original batch.

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 127 ++++++++++--------
 drivers/gpu/drm/i915/i915_cmd_parser.c        |  27 ++++
 2 files changed, 100 insertions(+), 54 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 690a3670ed08..5f4e460701ca 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -47,6 +47,8 @@ enum {
 #define __EXEC_INTERNAL_FLAGS	(~0u << 30)
 #define UPDATE			PIN_OFFSET_FIXED
 
+#define NUM_EXTRA 2
+
 #define BATCH_OFFSET_BIAS (256*1024)
 
 #define __I915_EXEC_ILLEGAL_FLAGS \
@@ -228,6 +230,7 @@ struct i915_execbuffer {
 
 	struct i915_request *request; /** our request to build */
 	struct i915_vma *batch; /** identity of the batch obj/vma */
+	struct i915_vma *trampoline; /** trampoline used for chaining */
 
 	/** actual size of execobj[] as we may extend it for the cmdparser */
 	unsigned int buffer_count;
@@ -1946,31 +1949,13 @@ static int i915_reset_gen7_sol_offsets(struct i915_request *rq)
 }
 
 static struct i915_vma *
-shadow_batch_pin(struct i915_execbuffer *eb, struct drm_i915_gem_object *obj)
+shadow_batch_pin(struct drm_i915_gem_object *obj,
+		 struct i915_address_space *vm,
+		 unsigned int flags)
 {
-	struct i915_address_space *vm;
 	struct i915_vma *vma;
-	u64 flags;
 	int err;
 
-	/*
-	 * PPGTT backed shadow buffers must be mapped RO, to prevent
-	 * post-scan tampering
-	 */
-	if (CMDPARSER_USES_GGTT(eb->i915)) {
-		vm = &eb->engine->gt->ggtt->vm;
-		flags = PIN_GLOBAL;
-	} else {
-		vm = eb->context->vm;
-		if (!vm->has_read_only) {
-			DRM_DEBUG("Cannot prevent post-scan tampering without RO capable vm\n");
-			return ERR_PTR(-EINVAL);
-		}
-
-		i915_gem_object_set_readonly(obj);
-		flags = PIN_USER;
-	}
-
 	vma = i915_vma_instance(obj, vm, NULL);
 	if (IS_ERR(vma))
 		return vma;
@@ -1985,59 +1970,80 @@ shadow_batch_pin(struct i915_execbuffer *eb, struct drm_i915_gem_object *obj)
 static int eb_parse(struct i915_execbuffer *eb)
 {
 	struct intel_engine_pool_node *pool;
-	struct i915_vma *vma;
+	struct i915_vma *shadow, *trampoline;
+	unsigned int len;
 	int err;
 
 	if (!eb_use_cmdparser(eb))
 		return 0;
 
-	pool = intel_engine_get_pool(eb->engine, eb->batch_len);
+	len = eb->batch_len;
+	if (!CMDPARSER_USES_GGTT(eb->i915)) {
+		/*
+		 * PPGTT backed shadow buffers must be mapped RO, to prevent
+		 * post-scan tampering
+		 */
+		if (!eb->context->vm->has_read_only) {
+			DRM_DEBUG("Cannot prevent post-scan tampering without RO capable vm\n");
+			return -EINVAL;
+		}
+	} else {
+		len += 8;
+	}
+
+	pool = intel_engine_get_pool(eb->engine, len);
 	if (IS_ERR(pool))
 		return PTR_ERR(pool);
 
-	vma = shadow_batch_pin(eb, pool->obj);
-	if (IS_ERR(vma)) {
-		err = PTR_ERR(vma);
+	shadow = shadow_batch_pin(pool->obj, eb->context->vm, PIN_USER);
+	if (IS_ERR(shadow)) {
+		err = PTR_ERR(shadow);
 		goto err;
 	}
+	i915_gem_object_set_readonly(shadow->obj);
+
+	trampoline = NULL;
+	if (CMDPARSER_USES_GGTT(eb->i915)) {
+		trampoline = shadow;
+
+		shadow = shadow_batch_pin(pool->obj,
+					  &eb->engine->gt->ggtt->vm,
+					  PIN_GLOBAL);
+		if (IS_ERR(shadow)) {
+			err = PTR_ERR(shadow);
+			shadow = trampoline;
+			goto err_shadow;
+		}
+
+		eb->batch_flags |= I915_DISPATCH_SECURE;
+	}
 
 	err = intel_engine_cmd_parser(eb->engine,
 				      eb->batch,
 				      eb->batch_start_offset,
 				      eb->batch_len,
-				      vma);
-	if (err) {
-		/*
-		 * Unsafe GGTT-backed buffers can still be submitted safely
-		 * as non-secure.
-		 * For PPGTT backing however, we have no choice but to forcibly
-		 * reject unsafe buffers
-		 */
-		if (i915_vma_is_ggtt(vma) && err == -EACCES)
-			/* Execute original buffer non-secure */
-			err = 0;
-		goto err_unpin;
-	}
+				      shadow);
+	if (err)
+		goto err_trampoline;
 
-	eb->vma[eb->buffer_count] = i915_vma_get(vma);
+	eb->vma[eb->buffer_count] = i915_vma_get(shadow);
 	eb->flags[eb->buffer_count] =
 		__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_REF;
-	vma->exec_flags = &eb->flags[eb->buffer_count];
+	shadow->exec_flags = &eb->flags[eb->buffer_count];
 	eb->buffer_count++;
 
+	eb->trampoline = trampoline;
 	eb->batch_start_offset = 0;
-	eb->batch = vma;
-
-	if (i915_vma_is_ggtt(vma))
-		eb->batch_flags |= I915_DISPATCH_SECURE;
-
-	/* eb->batch_len unchanged */
+	eb->batch = shadow;
 
-	vma->private = pool;
+	shadow->private = pool;
 	return 0;
 
-err_unpin:
-	i915_vma_unpin(vma);
+err_trampoline:
+	if (trampoline)
+		i915_vma_unpin(trampoline);
+err_shadow:
+	i915_vma_unpin(shadow);
 err:
 	intel_engine_pool_put(pool);
 	return err;
@@ -2089,6 +2095,16 @@ static int eb_submit(struct i915_execbuffer *eb)
 	if (err)
 		return err;
 
+	if (eb->trampoline) {
+		GEM_BUG_ON(eb->batch_start_offset);
+		err = eb->engine->emit_bb_start(eb->request,
+						eb->trampoline->node.start +
+						eb->batch_len,
+						8, 0);
+		if (err)
+			return err;
+	}
+
 	if (i915_gem_context_nopreempt(eb->gem_context))
 		eb->request->flags |= I915_REQUEST_NOPREEMPT;
 
@@ -2460,9 +2476,9 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 		args->flags |= __EXEC_HAS_RELOC;
 
 	eb.exec = exec;
-	eb.vma = (struct i915_vma **)(exec + args->buffer_count + 1);
+	eb.vma = (struct i915_vma **)(exec + args->buffer_count + NUM_EXTRA);
 	eb.vma[0] = NULL;
-	eb.flags = (unsigned int *)(eb.vma + args->buffer_count + 1);
+	eb.flags = (unsigned int *)(eb.vma + args->buffer_count + NUM_EXTRA);
 
 	eb.invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS;
 	reloc_cache_init(&eb.reloc_cache, eb.i915);
@@ -2470,6 +2486,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	eb.buffer_count = args->buffer_count;
 	eb.batch_start_offset = args->batch_start_offset;
 	eb.batch_len = args->batch_len;
+	eb.trampoline = NULL;
 
 	eb.batch_flags = 0;
 	if (args->flags & I915_EXEC_SECURE) {
@@ -2667,6 +2684,8 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 err_vma:
 	if (eb.exec)
 		eb_release_vmas(&eb);
+	if (eb.trampoline)
+		i915_vma_unpin(eb.trampoline);
 	mutex_unlock(&dev->struct_mutex);
 err_engine:
 	eb_unpin_engine(&eb);
@@ -2742,7 +2761,7 @@ i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data,
 	/* Copy in the exec list from userland */
 	exec_list = kvmalloc_array(count, sizeof(*exec_list),
 				   __GFP_NOWARN | GFP_KERNEL);
-	exec2_list = kvmalloc_array(count + 1, eb_element_size(),
+	exec2_list = kvmalloc_array(count + NUM_EXTRA, eb_element_size(),
 				    __GFP_NOWARN | GFP_KERNEL);
 	if (exec_list == NULL || exec2_list == NULL) {
 		DRM_DEBUG("Failed to allocate exec list for %d buffers\n",
@@ -2818,7 +2837,7 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
 		return -EINVAL;
 
 	/* Allocate an extra slot for use by the command parser */
-	exec2_list = kvmalloc_array(count + 1, eb_element_size(),
+	exec2_list = kvmalloc_array(count + NUM_EXTRA, eb_element_size(),
 				    __GFP_NOWARN | GFP_KERNEL);
 	if (exec2_list == NULL) {
 		DRM_DEBUG("Failed to allocate exec list for %zd buffers\n",
diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 2977316d64ae..5c942a582b06 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -1504,6 +1504,33 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
 		}
 	} while (1);
 
+	if (!jump_whitelist) { /* setup up the trampoline for chaining */
+		cmd = page_mask_bits(shadow->obj->mm.mapping);
+		if (!ret) {
+			cmd += batch_length / sizeof(*cmd);
+			*cmd = MI_BATCH_BUFFER_END;
+		} else {
+			*cmd = MI_BATCH_BUFFER_END;
+			cmd += batch_length / sizeof(*cmd);
+
+			if (ret == -EACCES) {
+				u32 bbs;
+
+				bbs = MI_BATCH_BUFFER_START;
+				bbs |= MI_BATCH_NON_SECURE_I965;
+				if (IS_HASWELL(engine->i915))
+					bbs |= MI_BATCH_NON_SECURE_HSW;
+
+				cmd[0] = bbs;
+				cmd[1] = batch_addr;
+
+				ret = 0;
+			} else {
+				*cmd = MI_BATCH_BUFFER_END;
+			}
+		}
+	}
+
 	if (needs_clflush_after) {
 		void *ptr = page_mask_bits(shadow->obj->mm.mapping);
 
-- 
2.24.0