[Intel-gfx] [PATCH 1/2] rendercopy/bdw: Enable hw-generated binding tables

Ville Syrjälä ville.syrjala at linux.intel.com
Wed May 7 13:49:31 CEST 2014


I quickly cobbled together a hsw version of this and gave it a whirl on
one machine. Seems to work just fine here, and no lockups when switching
between hw and sw binding tables. Did you get the lockups on hsw even
with rendercopy?

Here's my hsw version:

>From 17eeb8021815e2c18d6ba9b2185a37904296c2d9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala at linux.intel.com>
Date: Wed, 7 May 2014 12:33:01 +0300
Subject: [PATCH] rendercopy: use resource streamer on hsw
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Ville Syrjälä <ville.syrjala at linux.intel.com>
---
 lib/gen7_render.h     |  16 +++++++-
 lib/rendercopy_gen7.c | 103 +++++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 115 insertions(+), 4 deletions(-)

diff --git a/lib/gen7_render.h b/lib/gen7_render.h
index 1661d4c..58a88ef 100644
--- a/lib/gen7_render.h
+++ b/lib/gen7_render.h
@@ -155,8 +155,11 @@
 #define GEN7_PIPE_CONTROL_IS_FLUSH      (1 << 11)
 #define GEN7_PIPE_CONTROL_TC_FLUSH      (1 << 10)
 #define GEN7_PIPE_CONTROL_NOTIFY_ENABLE (1 << 8)
-#define GEN7_PIPE_CONTROL_GLOBAL_GTT    (1 << 2)
-#define GEN7_PIPE_CONTROL_LOCAL_PGTT    (0 << 2)
+#define GEN7_PIPE_CONTROL_FLUSH		(1 << 7)
+#define GEN7_PIPE_CONTROL_DC_FLUSH      (1 << 5)
+#define GEN7_PIPE_CONTROL_VF_INVALIDATE (1 << 4)
+#define GEN7_PIPE_CONTROL_CC_INVALIDATE (1 << 2)
+#define GEN7_PIPE_CONTROL_SC_INVALIDATE (1 << 2)
 #define GEN7_PIPE_CONTROL_STALL_AT_SCOREBOARD   (1 << 1)
 #define GEN7_PIPE_CONTROL_DEPTH_CACHE_FLUSH	(1 << 0)
 
@@ -1361,4 +1364,13 @@ typedef enum {
 	EXTEND_COUNT
 } sampler_extend_t;
 
+/* HSW+ resource streamer */
+#define HSW_3DSTATE_BINDING_TABLE_POOL_ALLOC	GEN7_3D(3, 1, 0x19)
+# define BINDING_TABLE_POOL_ENABLE		(1 << 11)
+#define HSW_3DSTATE_BINDING_TABLE_EDIT_VS	GEN7_3D(3, 0, 0x43)
+#define HSW_3DSTATE_BINDING_TABLE_EDIT_GS	GEN7_3D(3, 0, 0x44)
+#define HSW_3DSTATE_BINDING_TABLE_EDIT_HS	GEN7_3D(3, 0, 0x45)
+#define HSW_3DSTATE_BINDING_TABLE_EDIT_DS	GEN7_3D(3, 0, 0x46)
+#define HSW_3DSTATE_BINDING_TABLE_EDIT_PS	GEN7_3D(3, 0, 0x47)
+
 #endif
diff --git a/lib/rendercopy_gen7.c b/lib/rendercopy_gen7.c
index 5131d8f..4efccb9 100644
--- a/lib/rendercopy_gen7.c
+++ b/lib/rendercopy_gen7.c
@@ -21,6 +21,9 @@
 #include "gen7_render.h"
 #include "intel_reg.h"
 
+#ifndef I915_EXEC_RESOURCE_STREAMER
+#define I915_EXEC_RESOURCE_STREAMER (1<<13)
+#endif
 
 static const uint32_t ps_kernel[][4] = {
 	{ 0x0080005a, 0x2e2077bd, 0x000000c0, 0x008d0040 },
@@ -73,11 +76,14 @@ gen7_render_flush(struct intel_batchbuffer *batch,
 		  drm_intel_context *context, uint32_t batch_end)
 {
 	int ret;
+	uint32_t flags = I915_EXEC_RENDER;
 
 	ret = drm_intel_bo_subdata(batch->bo, 0, 4096, batch->buffer);
+	if (batch->use_resource_streamer)
+		flags |= I915_EXEC_RESOURCE_STREAMER;
 	if (ret == 0)
 		ret = drm_intel_gem_bo_context_exec(batch->bo, context,
-						    batch_end, 0);
+						    batch_end, flags);
 	assert(ret == 0);
 }
 
@@ -219,6 +225,75 @@ static void gen7_emit_vertex_buffer(struct intel_batchbuffer *batch,
 	OUT_BATCH(0);
 }
 
+static void
+gen7_hw_binding_table(struct intel_batchbuffer *batch, bool enable)
+{
+	if (!enable) {
+		OUT_BATCH(MI_RS_CONTROL | 0x0);
+
+		OUT_BATCH(HSW_3DSTATE_BINDING_TABLE_POOL_ALLOC | (3 - 2));
+		/* binding table pool base address */
+		OUT_BATCH(3 << 5);
+		/* Upper bound */
+		OUT_BATCH(0);
+
+		OUT_BATCH(GEN7_PIPE_CONTROL | (4 - 2));
+		OUT_BATCH(GEN7_PIPE_CONTROL_CS_STALL | GEN7_PIPE_CONTROL_STALL_AT_SCOREBOARD);
+		OUT_BATCH(0);
+		OUT_BATCH(0);
+
+		OUT_BATCH(GEN7_PIPE_CONTROL | (4 - 2));
+		OUT_BATCH(GEN7_PIPE_CONTROL_SC_INVALIDATE);
+		OUT_BATCH(0);
+		OUT_BATCH(0);
+
+		return;
+        }
+	OUT_BATCH(HSW_3DSTATE_BINDING_TABLE_POOL_ALLOC | (3 - 2));
+
+	/* binding table pool base address */
+	OUT_RELOC(batch->hw_bt_pool_bo, I915_GEM_DOMAIN_SAMPLER, 0,
+                  BINDING_TABLE_POOL_ENABLE | (3 << 5));
+
+	/* Upper bound */
+	OUT_RELOC(batch->hw_bt_pool_bo, I915_GEM_DOMAIN_SAMPLER, 0,
+                  batch->hw_bt_pool_bo->size);
+
+	OUT_BATCH(GEN7_PIPE_CONTROL | (4 - 2));
+	OUT_BATCH(GEN7_PIPE_CONTROL_CS_STALL | GEN7_PIPE_CONTROL_STALL_AT_SCOREBOARD);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+
+	OUT_BATCH(GEN7_PIPE_CONTROL | (4 - 2));
+	OUT_BATCH(GEN7_PIPE_CONTROL_SC_INVALIDATE);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+}
+
+static uint32_t
+gen7_rs_bind_surfaces(struct intel_batchbuffer *batch,
+		      struct igt_buf *src,
+		      struct igt_buf *dst,
+		      uint32_t *surf0, uint32_t *surf1)
+{
+	*surf0 = gen7_bind_buf(batch, dst, GEN7_SURFACEFORMAT_B8G8R8A8_UNORM, 1);
+	*surf1 = gen7_bind_buf(batch, src, GEN7_SURFACEFORMAT_B8G8R8A8_UNORM, 0);
+
+	return 0;
+}
+
+static void
+gen7_rs_edit_surfaces(struct intel_batchbuffer *batch,
+		      uint32_t surf0, uint32_t surf1)
+{
+	OUT_BATCH(HSW_3DSTATE_BINDING_TABLE_EDIT_PS | (4 - 2));
+	OUT_BATCH(0x3);
+	{
+		OUT_BATCH(0 << 16 | surf0 >> 5);
+		OUT_BATCH(1 << 16 | surf1 >> 5);
+	}
+}
+
 static uint32_t
 gen7_bind_surfaces(struct intel_batchbuffer *batch,
 		   struct igt_buf *src,
@@ -241,8 +316,19 @@ gen7_emit_binding_table(struct intel_batchbuffer *batch,
 			struct igt_buf *src,
 			struct igt_buf *dst)
 {
+	uint32_t surf0 = 0, surf1 = 1;
+	uint32_t binding_table;
+
+	if (batch->use_resource_streamer) {
+		binding_table = gen7_rs_bind_surfaces(batch, src, dst,
+							 &surf0, &surf1);
+		gen7_rs_edit_surfaces(batch, surf0, surf1);
+	} else {
+		binding_table = gen7_bind_surfaces(batch, src, dst);
+	}
+
 	OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2));
-	OUT_BATCH(gen7_bind_surfaces(batch, src, dst));
+	OUT_BATCH(binding_table);
 }
 
 static void
@@ -273,6 +359,9 @@ gen7_create_blend_state(struct intel_batchbuffer *batch)
 static void
 gen7_emit_state_base_address(struct intel_batchbuffer *batch)
 {
+	if (batch->use_resource_streamer)
+		OUT_BATCH(MI_RS_CONTROL | 0x0);
+
 	OUT_BATCH(GEN7_STATE_BASE_ADDRESS | (10 - 2));
 	OUT_BATCH(0);
 	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
@@ -284,6 +373,9 @@ gen7_emit_state_base_address(struct intel_batchbuffer *batch)
 	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
 	OUT_BATCH(0);
 	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
+
+	if (batch->use_resource_streamer)
+		OUT_BATCH(MI_RS_CONTROL | 0x1);
 }
 
 static uint32_t
@@ -545,6 +637,10 @@ void gen7_render_copyfunc(struct intel_batchbuffer *batch,
 	OUT_BATCH(GEN7_PIPELINE_SELECT | PIPELINE_SELECT_3D);
 
 	gen7_emit_state_base_address(batch);
+
+	if (batch->use_resource_streamer)
+		gen7_hw_binding_table(batch, true);
+
 	gen7_emit_multisample(batch);
 	gen7_emit_urb(batch);
 	gen7_emit_vs(batch);
@@ -576,6 +672,9 @@ void gen7_render_copyfunc(struct intel_batchbuffer *batch,
         OUT_BATCH(0);   /* start instance location */
         OUT_BATCH(0);   /* index buffer offset, ignored */
 
+	if (batch->use_resource_streamer)
+		gen7_hw_binding_table(batch, false);
+
 	OUT_BATCH(MI_BATCH_BUFFER_END);
 
 	batch_end = batch->ptr - batch->buffer;
-- 
1.8.3.2

-- 
Ville Syrjälä
Intel OTC



More information about the Intel-gfx mailing list