[Intel-gfx] [PATCH 1/5] drm/i915: Amalgamate the parameters to ring flush
Chris Wilson
chris at chris-wilson.co.uk
Thu Jun 20 19:18:28 CEST 2013
As now the invalidate and flush bitfields are only used as booleans, and
we may want to extend the range of actions in future, consolidate those
parameters into a new bitmask.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
drivers/gpu/drm/i915/i915_gem_context.c | 2 +-
drivers/gpu/drm/i915/i915_trace.h | 11 +-
drivers/gpu/drm/i915/intel_ringbuffer.c | 284 +++++++++++++++++---------------
drivers/gpu/drm/i915/intel_ringbuffer.h | 7 +-
4 files changed, 162 insertions(+), 142 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index ff47145..540a9c4 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -360,7 +360,7 @@ mi_set_context(struct intel_ring_buffer *ring,
* itlb_before_ctx_switch.
*/
if (IS_GEN6(ring->dev) && ring->itlb_before_ctx_switch) {
- ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, 0);
+ ret = intel_ring_invalidate_all_caches(ring);
if (ret)
return ret;
}
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
index 3db4a68..ce392eb 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -252,26 +252,23 @@ TRACE_EVENT(i915_gem_ring_dispatch,
);
TRACE_EVENT(i915_gem_ring_flush,
- TP_PROTO(struct intel_ring_buffer *ring, u32 invalidate, u32 flush),
- TP_ARGS(ring, invalidate, flush),
+ TP_PROTO(struct intel_ring_buffer *ring, u32 flush),
+ TP_ARGS(ring, flush),
TP_STRUCT__entry(
__field(u32, dev)
__field(u32, ring)
- __field(u32, invalidate)
__field(u32, flush)
),
TP_fast_assign(
__entry->dev = ring->dev->primary->index;
__entry->ring = ring->id;
- __entry->invalidate = invalidate;
__entry->flush = flush;
),
- TP_printk("dev=%u, ring=%x, invalidate=%04x, flush=%04x",
- __entry->dev, __entry->ring,
- __entry->invalidate, __entry->flush)
+ TP_printk("dev=%u, ring=%x, flush=%04x",
+ __entry->dev, __entry->ring, __entry->flush)
);
DECLARE_EVENT_CLASS(i915_gem_request,
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index e51ab55..601e1eb 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -52,38 +52,32 @@ static inline int ring_space(struct intel_ring_buffer *ring)
}
static int
-gen2_render_ring_flush(struct intel_ring_buffer *ring,
- u32 invalidate_domains,
- u32 flush_domains)
+gen2_render_ring_flush(struct intel_ring_buffer *ring, u32 action)
{
- u32 cmd;
int ret;
- cmd = MI_FLUSH;
- if (((invalidate_domains|flush_domains) & I915_GEM_DOMAIN_RENDER) == 0)
- cmd |= MI_NO_WRITE_FLUSH;
+ if (action & (RING_INVALIDATE | RING_FLUSH)) {
+ u32 cmd = MI_FLUSH;
- if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER)
- cmd |= MI_READ_FLUSH;
+ if (action & RING_INVALIDATE)
+ cmd |= MI_READ_FLUSH;
- ret = intel_ring_begin(ring, 2);
- if (ret)
- return ret;
+ ret = intel_ring_begin(ring, 2);
+ if (ret)
+ return ret;
- intel_ring_emit(ring, cmd);
- intel_ring_emit(ring, MI_NOOP);
- intel_ring_advance(ring);
+ intel_ring_emit(ring, cmd);
+ intel_ring_emit(ring, MI_NOOP);
+ intel_ring_advance(ring);
+ }
return 0;
}
static int
-gen4_render_ring_flush(struct intel_ring_buffer *ring,
- u32 invalidate_domains,
- u32 flush_domains)
+gen4_render_ring_flush(struct intel_ring_buffer *ring, u32 action)
{
struct drm_device *dev = ring->dev;
- u32 cmd;
int ret;
/*
@@ -114,23 +108,23 @@ gen4_render_ring_flush(struct intel_ring_buffer *ring,
* are flushed at any MI_FLUSH.
*/
- cmd = MI_FLUSH | MI_NO_WRITE_FLUSH;
- if ((invalidate_domains|flush_domains) & I915_GEM_DOMAIN_RENDER)
- cmd &= ~MI_NO_WRITE_FLUSH;
- if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION)
- cmd |= MI_EXE_FLUSH;
+ if (action & (RING_FLUSH | RING_INVALIDATE)) {
+ u32 cmd = MI_FLUSH;
- if (invalidate_domains & I915_GEM_DOMAIN_COMMAND &&
- (IS_G4X(dev) || IS_GEN5(dev)))
- cmd |= MI_INVALIDATE_ISP;
+ if (action & RING_INVALIDATE) {
+ cmd |= MI_EXE_FLUSH;
+ if (IS_G4X(dev) || IS_GEN5(dev))
+ cmd |= MI_INVALIDATE_ISP;
+ }
- ret = intel_ring_begin(ring, 2);
- if (ret)
- return ret;
+ ret = intel_ring_begin(ring, 2);
+ if (ret)
+ return ret;
- intel_ring_emit(ring, cmd);
- intel_ring_emit(ring, MI_NOOP);
- intel_ring_advance(ring);
+ intel_ring_emit(ring, cmd);
+ intel_ring_emit(ring, MI_NOOP);
+ intel_ring_advance(ring);
+ }
return 0;
}
@@ -179,7 +173,6 @@ intel_emit_post_sync_nonzero_flush(struct intel_ring_buffer *ring)
u32 scratch_addr = pc->gtt_offset + 128;
int ret;
-
ret = intel_ring_begin(ring, 6);
if (ret)
return ret;
@@ -209,24 +202,18 @@ intel_emit_post_sync_nonzero_flush(struct intel_ring_buffer *ring)
}
static int
-gen6_render_ring_flush(struct intel_ring_buffer *ring,
- u32 invalidate_domains, u32 flush_domains)
+gen6_render_ring_flush(struct intel_ring_buffer *ring, u32 action)
{
u32 flags = 0;
struct pipe_control *pc = ring->private;
u32 scratch_addr = pc->gtt_offset + 128;
int ret;
- /* Force SNB workarounds for PIPE_CONTROL flushes */
- ret = intel_emit_post_sync_nonzero_flush(ring);
- if (ret)
- return ret;
-
/* Just flush everything. Experiments have shown that reducing the
* number of bits based on the write domains has little performance
* impact.
*/
- if (flush_domains) {
+ if (action & RING_FLUSH) {
flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
/*
@@ -235,7 +222,7 @@ gen6_render_ring_flush(struct intel_ring_buffer *ring,
*/
flags |= PIPE_CONTROL_CS_STALL;
}
- if (invalidate_domains) {
+ if (action & RING_INVALIDATE) {
flags |= PIPE_CONTROL_TLB_INVALIDATE;
flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
@@ -248,15 +235,22 @@ gen6_render_ring_flush(struct intel_ring_buffer *ring,
flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL;
}
- ret = intel_ring_begin(ring, 4);
- if (ret)
- return ret;
+ if (flags) {
+ /* Force SNB workarounds for PIPE_CONTROL flushes */
+ ret = intel_emit_post_sync_nonzero_flush(ring);
+ if (ret)
+ return ret;
- intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
- intel_ring_emit(ring, flags);
- intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT);
- intel_ring_emit(ring, 0);
- intel_ring_advance(ring);
+ ret = intel_ring_begin(ring, 4);
+ if (ret)
+ return ret;
+
+ intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
+ intel_ring_emit(ring, flags);
+ intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT);
+ intel_ring_emit(ring, 0);
+ intel_ring_advance(ring);
+ }
return 0;
}
@@ -302,33 +296,22 @@ static int gen7_ring_fbc_flush(struct intel_ring_buffer *ring, u32 value)
}
static int
-gen7_render_ring_flush(struct intel_ring_buffer *ring,
- u32 invalidate_domains, u32 flush_domains)
+gen7_render_ring_flush(struct intel_ring_buffer *ring, u32 action)
{
u32 flags = 0;
struct pipe_control *pc = ring->private;
u32 scratch_addr = pc->gtt_offset + 128;
int ret;
- /*
- * Ensure that any following seqno writes only happen when the render
- * cache is indeed flushed.
- *
- * Workaround: 4th PIPE_CONTROL command (except the ones with only
- * read-cache invalidate bits set) must have the CS_STALL bit set. We
- * don't try to be clever and just set it unconditionally.
- */
- flags |= PIPE_CONTROL_CS_STALL;
-
/* Just flush everything. Experiments have shown that reducing the
* number of bits based on the write domains has little performance
* impact.
*/
- if (flush_domains) {
+ if (action & RING_FLUSH) {
flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
}
- if (invalidate_domains) {
+ if (action & RING_INVALIDATE) {
flags |= PIPE_CONTROL_TLB_INVALIDATE;
flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
@@ -347,17 +330,30 @@ gen7_render_ring_flush(struct intel_ring_buffer *ring,
gen7_render_ring_cs_stall_wa(ring);
}
- ret = intel_ring_begin(ring, 4);
- if (ret)
- return ret;
+ if (flags) {
+ /*
+ * Ensure that any following seqno writes only happen when the render
+ * cache is indeed flushed.
+ *
+ * Workaround: 4th PIPE_CONTROL command (except the ones with only
+ * read-cache invalidate bits set) must have the CS_STALL bit set. We
+ * don't try to be clever and just set it unconditionally.
+ */
+ if ((flags & RING_INVALIDATE) == 0)
+ flags |= PIPE_CONTROL_CS_STALL;
- intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
- intel_ring_emit(ring, flags);
- intel_ring_emit(ring, scratch_addr);
- intel_ring_emit(ring, 0);
- intel_ring_advance(ring);
+ ret = intel_ring_begin(ring, 4);
+ if (ret)
+ return ret;
- if (flush_domains)
+ intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
+ intel_ring_emit(ring, flags);
+ intel_ring_emit(ring, scratch_addr);
+ intel_ring_emit(ring, 0);
+ intel_ring_advance(ring);
+ }
+
+ if (action & RING_FLUSH)
return gen7_ring_fbc_flush(ring, FBC_REND_NUKE);
return 0;
@@ -956,19 +952,19 @@ void intel_ring_setup_status_page(struct intel_ring_buffer *ring)
}
static int
-bsd_ring_flush(struct intel_ring_buffer *ring,
- u32 invalidate_domains,
- u32 flush_domains)
+bsd_ring_flush(struct intel_ring_buffer *ring, u32 action)
{
int ret;
- ret = intel_ring_begin(ring, 2);
- if (ret)
- return ret;
+ if (action & (RING_FLUSH | RING_INVALIDATE)) {
+ ret = intel_ring_begin(ring, 2);
+ if (ret)
+ return ret;
- intel_ring_emit(ring, MI_FLUSH);
- intel_ring_emit(ring, MI_NOOP);
- intel_ring_advance(ring);
+ intel_ring_emit(ring, MI_FLUSH);
+ intel_ring_emit(ring, MI_NOOP);
+ intel_ring_advance(ring);
+ }
return 0;
}
@@ -1636,31 +1632,34 @@ static void gen6_bsd_ring_write_tail(struct intel_ring_buffer *ring,
_MASKED_BIT_DISABLE(GEN6_BSD_SLEEP_MSG_DISABLE));
}
-static int gen6_bsd_ring_flush(struct intel_ring_buffer *ring,
- u32 invalidate, u32 flush)
+static int gen6_bsd_ring_flush(struct intel_ring_buffer *ring, u32 action)
{
- uint32_t cmd;
int ret;
- ret = intel_ring_begin(ring, 4);
- if (ret)
- return ret;
+ if (action & (RING_FLUSH | RING_INVALIDATE)) {
+ u32 cmd = MI_FLUSH_DW;
+
+ /*
+ * Bspec vol 1c.5 - video engine command streamer:
+ * "If ENABLED, all TLBs will be invalidated once the flush
+ * operation is complete. This bit is only valid when the
+ * Post-Sync Operation field is a value of 1h or 3h."
+ */
+ if (action & RING_INVALIDATE)
+ cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD |
+ MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
+
+ ret = intel_ring_begin(ring, 4);
+ if (ret)
+ return ret;
+
+ intel_ring_emit(ring, cmd);
+ intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
+ intel_ring_emit(ring, 0);
+ intel_ring_emit(ring, MI_NOOP);
+ intel_ring_advance(ring);
+ }
- cmd = MI_FLUSH_DW;
- /*
- * Bspec vol 1c.5 - video engine command streamer:
- * "If ENABLED, all TLBs will be invalidated once the flush
- * operation is complete. This bit is only valid when the
- * Post-Sync Operation field is a value of 1h or 3h."
- */
- if (invalidate & I915_GEM_GPU_DOMAINS)
- cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD |
- MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
- intel_ring_emit(ring, cmd);
- intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
- intel_ring_emit(ring, 0);
- intel_ring_emit(ring, MI_NOOP);
- intel_ring_advance(ring);
return 0;
}
@@ -1708,34 +1707,37 @@ gen6_ring_dispatch_execbuffer(struct intel_ring_buffer *ring,
/* Blitter support (SandyBridge+) */
-static int gen6_ring_flush(struct intel_ring_buffer *ring,
- u32 invalidate, u32 flush)
+static int gen6_ring_flush(struct intel_ring_buffer *ring, u32 action)
{
struct drm_device *dev = ring->dev;
uint32_t cmd;
int ret;
- ret = intel_ring_begin(ring, 4);
- if (ret)
- return ret;
+ if (action & (RING_FLUSH | RING_INVALIDATE)) {
+ cmd = MI_FLUSH_DW;
- cmd = MI_FLUSH_DW;
- /*
- * Bspec vol 1c.3 - blitter engine command streamer:
- * "If ENABLED, all TLBs will be invalidated once the flush
- * operation is complete. This bit is only valid when the
- * Post-Sync Operation field is a value of 1h or 3h."
- */
- if (invalidate & I915_GEM_DOMAIN_RENDER)
- cmd |= MI_INVALIDATE_TLB | MI_FLUSH_DW_STORE_INDEX |
- MI_FLUSH_DW_OP_STOREDW;
- intel_ring_emit(ring, cmd);
- intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
- intel_ring_emit(ring, 0);
- intel_ring_emit(ring, MI_NOOP);
- intel_ring_advance(ring);
+ /*
+ * Bspec vol 1c.3 - blitter engine command streamer:
+ * "If ENABLED, all TLBs will be invalidated once the flush
+ * operation is complete. This bit is only valid when the
+ * Post-Sync Operation field is a value of 1h or 3h."
+ */
+ if (action & RING_INVALIDATE)
+ cmd |= MI_INVALIDATE_TLB | MI_FLUSH_DW_STORE_INDEX |
+ MI_FLUSH_DW_OP_STOREDW;
- if (IS_GEN7(dev) && flush)
+ ret = intel_ring_begin(ring, 4);
+ if (ret)
+ return ret;
+
+ intel_ring_emit(ring, cmd);
+ intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
+ intel_ring_emit(ring, 0);
+ intel_ring_emit(ring, MI_NOOP);
+ intel_ring_advance(ring);
+ }
+
+ if (IS_GEN7(dev) && action & RING_FLUSH)
return gen7_ring_fbc_flush(ring, FBC_REND_CACHE_CLEAN);
return 0;
@@ -2027,11 +2029,11 @@ intel_ring_flush_all_caches(struct intel_ring_buffer *ring)
if (!ring->gpu_caches_dirty)
return 0;
- ret = ring->flush(ring, 0, I915_GEM_GPU_DOMAINS);
+ ret = ring->flush(ring, RING_FLUSH);
if (ret)
return ret;
- trace_i915_gem_ring_flush(ring, 0, I915_GEM_GPU_DOMAINS);
+ trace_i915_gem_ring_flush(ring, RING_FLUSH);
ring->gpu_caches_dirty = false;
return 0;
@@ -2040,18 +2042,36 @@ intel_ring_flush_all_caches(struct intel_ring_buffer *ring)
int
intel_ring_invalidate_all_caches(struct intel_ring_buffer *ring)
{
- uint32_t flush_domains;
+ u32 action;
+ int ret;
+
+ action = RING_INVALIDATE;
+ if (ring->gpu_caches_dirty)
+ action |= RING_FLUSH;
+
+ ret = ring->flush(ring, action);
+ if (ret)
+ return ret;
+
+ trace_i915_gem_ring_flush(ring, action);
+
+ ring->gpu_caches_dirty = false;
+ return 0;
+}
+
+int
+intel_ring_flush_internal(struct intel_ring_buffer *ring, u32 action)
+{
int ret;
- flush_domains = 0;
if (ring->gpu_caches_dirty)
- flush_domains = I915_GEM_GPU_DOMAINS;
+ action |= RING_FLUSH;
- ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, flush_domains);
+ ret = ring->flush(ring, action);
if (ret)
return ret;
- trace_i915_gem_ring_flush(ring, I915_GEM_GPU_DOMAINS, flush_domains);
+ trace_i915_gem_ring_flush(ring, action);
ring->gpu_caches_dirty = false;
return 0;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 799f04c..5066b3b 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -93,8 +93,10 @@ struct intel_ring_buffer {
void (*write_tail)(struct intel_ring_buffer *ring,
u32 value);
int __must_check (*flush)(struct intel_ring_buffer *ring,
- u32 invalidate_domains,
- u32 flush_domains);
+ u32 action);
+#define RING_FLUSH 0x1
+#define RING_INVALIDATE 0x2
+
int (*add_request)(struct intel_ring_buffer *ring);
/* Some chipsets are not quite as coherent as advertised and need
* an expensive kick to force a true read of the up-to-date seqno.
@@ -240,6 +242,7 @@ int __must_check intel_ring_idle(struct intel_ring_buffer *ring);
void intel_ring_init_seqno(struct intel_ring_buffer *ring, u32 seqno);
int intel_ring_flush_all_caches(struct intel_ring_buffer *ring);
int intel_ring_invalidate_all_caches(struct intel_ring_buffer *ring);
+int intel_ring_flush_internal(struct intel_ring_buffer *ring, u32 action);
int intel_init_render_ring_buffer(struct drm_device *dev);
int intel_init_bsd_ring_buffer(struct drm_device *dev);
--
1.8.3.1
More information about the Intel-gfx
mailing list