[PATCH 2/2] barrier-after-pdp
Chris Wilson
chris at chris-wilson.co.uk
Wed Nov 28 09:28:59 UTC 2018
---
drivers/gpu/drm/i915/intel_lrc.c | 120 ++++++++++++++-----------------
1 file changed, 53 insertions(+), 67 deletions(-)
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 08fd9b12e4d7..29dc34279ff9 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -363,31 +363,12 @@ execlists_context_schedule_out(struct i915_request *rq, unsigned long status)
trace_i915_request_out(rq);
}
-static void
-execlists_update_context_pdps(struct i915_hw_ppgtt *ppgtt, u32 *reg_state)
-{
- ASSIGN_CTX_PDP(ppgtt, reg_state, 3);
- ASSIGN_CTX_PDP(ppgtt, reg_state, 2);
- ASSIGN_CTX_PDP(ppgtt, reg_state, 1);
- ASSIGN_CTX_PDP(ppgtt, reg_state, 0);
-}
-
static u64 execlists_update_context(struct i915_request *rq)
{
- struct i915_hw_ppgtt *ppgtt = rq->gem_context->ppgtt;
struct intel_context *ce = rq->hw_context;
- u32 *reg_state = ce->lrc_reg_state;
-
- reg_state[CTX_RING_TAIL+1] = intel_ring_set_tail(rq->ring, rq->tail);
- /*
- * True 32b PPGTT with dynamic page allocation: update PDP
- * registers and point the unallocated PDPs to scratch page.
- * PML4 is allocated during ppgtt init, so this is not needed
- * in 48-bit mode.
- */
- if (!i915_vm_is_48bit(&ppgtt->vm))
- execlists_update_context_pdps(ppgtt, reg_state);
+ ce->lrc_reg_state[CTX_RING_TAIL+1] =
+ intel_ring_set_tail(rq->ring, rq->tail);
/*
* Make sure the context image is complete before we submit it to HW.
@@ -1233,6 +1214,38 @@ execlists_context_pin(struct intel_engine_cs *engine,
return __execlists_context_pin(engine, ctx, ce);
}
+static int emit_pdps(struct i915_request *rq)
+{
+ const struct intel_engine_cs * const engine = rq->engine;
+ struct i915_hw_ppgtt * const ppgtt = rq->gem_context->ppgtt;
+ const int num_lri_cmds = GEN8_3LVL_PDPES * 2;
+ u32 *cs;
+ int i;
+
+ if (!(ppgtt->pd_dirty_rings & intel_engine_flag(engine)))
+ return 0;
+
+ cs = intel_ring_begin(rq, num_lri_cmds * 2 + 2);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ *cs++ = MI_LOAD_REGISTER_IMM(num_lri_cmds);
+ for (i = GEN8_3LVL_PDPES; i--; ) {
+ const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
+
+ *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(engine, i));
+ *cs++ = upper_32_bits(pd_daddr);
+ *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, i));
+ *cs++ = lower_32_bits(pd_daddr);
+ }
+ *cs++ = MI_NOOP;
+
+ intel_ring_advance(rq, cs);
+
+ ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine);
+ return 0;
+}
+
static int execlists_request_alloc(struct i915_request *request)
{
int ret;
@@ -1249,13 +1262,30 @@ static int execlists_request_alloc(struct i915_request *request)
if (ret)
return ret;
- /* Note that after this point, we have committed to using
+ /*
+ * Note that after this point, we have committed to using
* this request as it is being used to both track the
* state of engine initialisation and liveness of the
* golden renderstate above. Think twice before you try
* to cancel/unwind this request now.
*/
+ if (!i915_vm_is_48bit(&request->gem_context->ppgtt->vm)) {
+ GEM_BUG_ON(intel_vgpu_active(request->i915));
+
+ ret = request->engine->emit_flush(request, EMIT_FLUSH);
+ if (ret)
+ return ret;
+
+ ret = emit_pdps(request);
+ if (ret)
+ return ret;
+
+ ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
+ if (ret)
+ return ret;
+ }
+
request->reserved_space -= EXECLISTS_REQUEST_SIZE;
return 0;
}
@@ -1836,56 +1866,11 @@ static void execlists_reset_finish(struct intel_engine_cs *engine)
atomic_read(&execlists->tasklet.count));
}
-static int intel_logical_ring_emit_pdps(struct i915_request *rq)
-{
- struct i915_hw_ppgtt *ppgtt = rq->gem_context->ppgtt;
- struct intel_engine_cs *engine = rq->engine;
- const int num_lri_cmds = GEN8_3LVL_PDPES * 2;
- u32 *cs;
- int i;
-
- cs = intel_ring_begin(rq, num_lri_cmds * 2 + 2);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- *cs++ = MI_LOAD_REGISTER_IMM(num_lri_cmds);
- for (i = GEN8_3LVL_PDPES - 1; i >= 0; i--) {
- const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
-
- *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(engine, i));
- *cs++ = upper_32_bits(pd_daddr);
- *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, i));
- *cs++ = lower_32_bits(pd_daddr);
- }
-
- *cs++ = MI_NOOP;
- intel_ring_advance(rq, cs);
-
- return 0;
-}
-
static int gen8_emit_bb_start(struct i915_request *rq,
u64 offset, u32 len,
const unsigned int flags)
{
u32 *cs;
- int ret;
-
- /* Don't rely in hw updating PDPs, specially in lite-restore.
- * Ideally, we should set Force PD Restore in ctx descriptor,
- * but we can't. Force Restore would be a second option, but
- * it is unsafe in case of lite-restore (because the ctx is
- * not idle). PML4 is allocated during ppgtt init so this is
- * not needed in 48-bit.*/
- if ((intel_engine_flag(rq->engine) & rq->gem_context->ppgtt->pd_dirty_rings) &&
- !i915_vm_is_48bit(&rq->gem_context->ppgtt->vm) &&
- !intel_vgpu_active(rq->i915)) {
- ret = intel_logical_ring_emit_pdps(rq);
- if (ret)
- return ret;
-
- rq->gem_context->ppgtt->pd_dirty_rings &= ~intel_engine_flag(rq->engine);
- }
cs = intel_ring_begin(rq, 6);
if (IS_ERR(cs))
@@ -1918,6 +1903,7 @@ static int gen8_emit_bb_start(struct i915_request *rq,
*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
*cs++ = MI_NOOP;
+
intel_ring_advance(rq, cs);
return 0;
--
2.20.0.rc1
More information about the Intel-gfx-trybot
mailing list