[Intel-gfx] [PATCH 43/68] drm/i915: Track page table reload need

Ben Widawsky benjamin.widawsky at intel.com
Fri Aug 22 05:12:06 CEST 2014


This patch was formerly known as, "Force pd restore when PDEs change,
gen6-7." I had to change the name because it is needed for GEN8 too.

The real issue this is trying to solve is when a new object is mapped
into the current address space. The GPU does not snoop the new mapping
so we must do the gen specific action to reload the page tables.

GEN8 and GEN7 do differ in the way they load page tables for the RCS.
GEN8 does so with the context restore, while GEN7 requires the proper
load commands in the command streamer. Non-render is similar for both.

Caveat for GEN7
The docs say you cannot change the PDEs of a currently running context.
We never map new PDEs of a running context, and expect them to be
present - so I think this is okay. (We can unmap, but this should also
be okay since we only unmap unreferenced objects that the GPU shouldn't
be tryingto va->pa xlate.) The MI_SET_CONTEXT command does have a flag
to signal that even if the context is the same, force a reload. It's
unclear exactly what this does, but I have a hunch it's the right thing
to do.

The logic assumes that we always emit a context switch after mapping new
PDEs, and before we submit a batch. This is the case today, and has been
the case since the inception of hardware contexts. A note in the comment
let's the user know.

Signed-off-by: Ben Widawsky <ben at bwidawsk.net>

squash! drm/i915: Force pd restore when PDEs change, gen6-7

It's not just for gen8. If the current context has mappings change, we
need a context reload to switch
---
 drivers/gpu/drm/i915/i915_gem_context.c    | 55 ++++++++++++++++++++----------
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  6 ++++
 drivers/gpu/drm/i915/i915_gem_gtt.c        | 16 ++++++++-
 drivers/gpu/drm/i915/i915_gem_gtt.h        |  2 ++
 4 files changed, 60 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 6d341ef..dade4e2 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -693,6 +693,8 @@ static int do_switch_xcs(struct intel_engine_cs *ring,
 		ret = ppgtt->switch_mm(ppgtt, ring, false);
 		if (ret)
 			return ret;
+		/* Context switch on GEN8 will reload pds also */
+		clear_bit(ring->id, &to->vm->pd_reload_mask);
 	}
 
 	if (from)
@@ -719,6 +721,25 @@ static void remap_l3(struct intel_engine_cs *ring,
 	}
 }
 
+static inline bool should_skip_switch(struct intel_engine_cs *ring,
+				      struct intel_context *from,
+				      struct intel_context *to)
+{
+	if (to->remap_slice)
+		return false;
+
+	if (from == to && !test_bit(ring->id, &to->vm->pd_reload_mask))
+		return true;
+
+	return false;
+}
+
+static bool
+needs_pd_load(struct intel_engine_cs *ring, struct intel_context *to)
+{
+	return (USES_FULL_PPGTT(ring->dev) || &to->vm->pd_reload_mask);
+}
+
 static int do_switch_rcs(struct intel_engine_cs *ring,
 			 struct intel_context *from,
 			 struct intel_context *to)
@@ -727,7 +748,6 @@ static int do_switch_rcs(struct intel_engine_cs *ring,
 	struct i915_hw_ppgtt *ppgtt = ctx_to_ppgtt(to);
 	u32 hw_flags = 0;
 	bool uninitialized = false;
-	bool needs_pd_load = (INTEL_INFO(ring->dev)->gen < 8) && USES_FULL_PPGTT(ring->dev);
 	int ret;
 
 	if (from != NULL) {
@@ -752,18 +772,21 @@ static int do_switch_rcs(struct intel_engine_cs *ring,
 	 * context. The default context cannot end up in evict everything (as
 	 * commented above) because it is always pinned.
 	 */
-	if (WARN_ON(from == to)) {
+	if (WARN_ON(from == to && should_skip_switch(ring, from, to))) {
 		ret = -EPERM;
 		goto unpin_out;
 	}
 
-	if (needs_pd_load) {
+	if (INTEL_INFO(ring->dev)->gen < 8 && needs_pd_load(ring, to)) {
 		/* Older GENs still want the load first, "PP_DCLV followed by
 		 * PP_DIR_BASE register through Load Register Immediate commands
 		 * in Ring Buffer before submitting a context."*/
 		ret = ppgtt->switch_mm(ppgtt, ring, false);
 		if (ret)
 			goto unpin_out;
+
+		/* Doing a PD load always reloads the page dirs */
+		clear_bit(ring->id, &to->vm->pd_reload_mask);
 	}
 
 	/*
@@ -784,22 +807,28 @@ static int do_switch_rcs(struct intel_engine_cs *ring,
 		i915_gem_vma_bind(vma, to->legacy_hw_ctx.rcs_state->cache_level, GLOBAL_BIND);
 	}
 
-	if (!to->legacy_hw_ctx.initialized || i915_gem_context_is_default(to)) {
+	if (!to->legacy_hw_ctx.initialized || i915_gem_context_is_default(to))
 		hw_flags |= MI_RESTORE_INHIBIT;
-		needs_pd_load = USES_FULL_PPGTT(ring->dev) && IS_GEN8(ring->dev);
-	}
+	else if (test_and_clear_bit(ring->id, &to->vm->pd_reload_mask))
+		hw_flags |= MI_FORCE_RESTORE;
 
 	ret = mi_set_context(ring, to, hw_flags);
 	if (ret)
 		goto unpin_out;
 
+	if (IS_GEN8(ring->dev)) {
+		/* Setting the context is enough to reload pds on gen8 */
+		clear_bit(ring->id, &to->vm->pd_reload_mask);
+	}
+
 	/* GEN8 does *not* require an explicit reload if the PDPs have been
 	 * setup, and we do not wish to move them.
 	 *
 	 * XXX: If we implemented page directory eviction code, this
 	 * optimization needs to be removed.
 	 */
-	if (needs_pd_load) {
+	if (IS_GEN8(ring->dev) && needs_pd_load(ring, to)) {
+		/* Doing a PD load always reloads the page dirs */
 		ret = ppgtt->switch_mm(ppgtt, ring, false);
 		/* The hardware context switch is emitted, but we haven't
 		 * actually changed the state - so it's probably safe to bail
@@ -860,16 +889,6 @@ unpin_out:
 	return ret;
 }
 
-static inline bool should_skip_switch(struct intel_engine_cs *ring,
-				      struct intel_context *from,
-				      struct intel_context *to)
-{
-	if (from == to && !to->remap_slice)
-		return true;
-
-	return false;
-}
-
 /**
  * i915_switch_context() - perform a GPU context switch.
  * @ring: ring for which we'll execute the context switch
@@ -898,7 +917,7 @@ int i915_switch_context(struct intel_engine_cs *ring,
 		return 0;
 	}
 
-	if (from == to && !to->remap_slice)
+	if (should_skip_switch(ring, from, to))
 		return 0;
 
 	if (IS_GEN8(ring->dev))
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index caccee9..fdd68d6 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1096,6 +1096,8 @@ legacy_ringbuffer_submission(struct drm_device *dev, struct drm_file *file,
 	if (ret)
 		goto error;
 
+	WARN(ctx->vm->pd_reload_mask & (1<<ring->id), "%s didn't clear reload\n", ring->name);
+
 	instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK;
 	instp_mask = I915_EXEC_CONSTANTS_MASK;
 	switch (instp_mode) {
@@ -1347,6 +1349,10 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	if (ret)
 		goto err;
 
+	/* XXX: Reserve has possibly change PDEs which means we must do a
+	 * context switch before we can coherently read some of the reserved
+	 * VMAs. */
+
 	/* The objects are in their final locations, apply the relocations. */
 	if (need_relocs)
 		ret = i915_gem_execbuffer_relocate(eb);
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index e7adbff..c3df359 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1262,6 +1262,15 @@ int i915_gem_init_ppgtt(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt)
 	return 0;
 }
 
+/* PDE TLBs are a pain invalidate pre GEN8. It requires a context reload. If we
+ * are switching between contexts with the same LRCA, we also must do a force
+ * restore.
+ */
+#define ppgtt_invalidate_tlbs(vm) do {\
+	/* If current vm != vm, */ \
+	vm->pd_reload_mask = INTEL_INFO(vm->dev)->ring_mask; \
+} while(0)
+
 static int
 ppgtt_bind_vma(struct i915_vma *vma,
 	       enum i915_cache_level cache_level,
@@ -1279,10 +1288,13 @@ ppgtt_bind_vma(struct i915_vma *vma,
 						 vma->node.size);
 		if (ret)
 			return ret;
+
+		ppgtt_invalidate_tlbs(vma->vm);
 	}
 
 	vma->vm->insert_entries(vma->vm, vma->obj->pages, vma->node.start,
 				cache_level, flags);
+
 	return 0;
 }
 
@@ -1292,9 +1304,11 @@ static void ppgtt_unbind_vma(struct i915_vma *vma)
 			     vma->node.start,
 			     vma->obj->base.size,
 			     true);
-	if (vma->vm->teardown_va_range)
+	if (vma->vm->teardown_va_range) {
 		vma->vm->teardown_va_range(vma->vm,
 					   vma->node.start, vma->node.size);
+		ppgtt_invalidate_tlbs(vma->vm);
+	}
 }
 
 extern int intel_iommu_gfx_mapped;
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 3729222..c89930d 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -215,6 +215,8 @@ struct i915_address_space {
 		struct page *page;
 	} scratch;
 
+	unsigned long pd_reload_mask;
+
 	/**
 	 * List of objects currently involved in rendering.
 	 *
-- 
2.0.4




More information about the Intel-gfx mailing list