[igt-dev] [PATCH i-g-t] libs: Adding rendercopy support for gen11

Lukasz Kalamarz lukasz.kalamarz at intel.com
Tue Oct 9 14:45:25 UTC 2018


From: "Kalamarz, Lukasz" <lukasz.kalamarz at intel.com>

In Gen11 there were EU changes which was causing gem_render_copy test
failing. Shader used in gen9 is no longer applicable for those change
and it had to be aligned to gen11 changes. Since only difference is
shader, created a wrapper for gen9 function.

Signed-off-by: Lukasz Kalamarz <lukasz.kalamarz at intel.com>

Cc: Michał Winiarski <michal.winiarski at intel.com>
Cc: Antonio Argenziano <antonio.argenziano at intel.com>
Cc: Lucas De Marchi <lucas.demarchi at intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi at intel.com>
Cc: Katarzyna Dec <katarzyna.dec at intel.com>
---
 lib/i915/shaders/ps/blit.g11a | 22 ++++++++++
 lib/intel_batchbuffer.c       |  2 +
 lib/rendercopy.h              |  5 +++
 lib/rendercopy_gen9.c         | 97 ++++++++++++++++++++++++++++++++++---------
 4 files changed, 106 insertions(+), 20 deletions(-)
 create mode 100644 lib/i915/shaders/ps/blit.g11a

diff --git a/lib/i915/shaders/ps/blit.g11a b/lib/i915/shaders/ps/blit.g11a
new file mode 100644
index 00000000..15fe78af
--- /dev/null
+++ b/lib/i915/shaders/ps/blit.g11a
@@ -0,0 +1,22 @@
+/*  This is the same shader as for previous gens. On Gen 11 instruction pln was deleted and needs to be replaced by mad.
+This shader was generated using IGA tool (not assembler integrated into IGT)
+*/
+
+(W) mad(8|M0) acc0.0<1>:nf r6.7<0;0>:f r2.0<8;1>:f r6.0<0>:f
+(W) mad(8|M0) r10.0<1>:f acc0.0<8;1>:nf r3.0<8;1>:f r6.1<0>:f
+
+(W) mad(8|M0) acc0.0<1>:nf r6.0<0;0>:f r4.0<8;1>:f r6.0<0>:f
+(W) mad(8|M0) r11.0<1>:f acc0.0<8;1>:nf r5.0<8;1>:f r6.1<0>:f
+
+(W) mad(8|M0) acc0.0<1>:nf r6.4<0;0>:f r2.0<8;1>:f r6.4<0>:f
+(W) mad(8|M0) r12.0<1>:f acc0.0<8;1>:nf r3.0<8;1>:f r6.5<0>:f
+
+(W) mad(8|M0) acc0.0<1>:nf r6.0<0;0>:f r4.0<8;1>:f r6.4<0>:f
+(W) mad(8|M0) r13.0<1>:f acc0.0<8;1>:nf r5.0<8;1>:f r6.5<0>:f
+
+(W) send(16|M0) r112:f r10:ub 0x10000002 0x08840001 //  SAMPLER  wr:4, rd:8, fc: 0x40001
+    mov (16|M0)              r113.0<1>:f   r12.0<8;8,1>:f
+    mov (16|M0)              r115.0<1>:f   r14.0<8;8,1>:f
+    mov (16|M0)              r117.0<1>:f   r16.0<8;8,1>:f
+    mov (16|M0)              r119.0<1>:f   r18.0<8;8,1>:f
+(W) send(16|M0) null:f r112:ub 0x10000025 0x10031000 {EOT} //  DP_RC  wr:8, rd:0, Render Target Write msc:16, to #0
diff --git a/lib/intel_batchbuffer.c b/lib/intel_batchbuffer.c
index 387404ff..c13b1dc4 100644
--- a/lib/intel_batchbuffer.c
+++ b/lib/intel_batchbuffer.c
@@ -843,6 +843,8 @@ igt_render_copyfunc_t igt_get_render_copyfunc(int devid)
 		copy = gen8_render_copyfunc;
 	else if (IS_GEN9(devid) || IS_GEN10(devid))
 		copy = gen9_render_copyfunc;
+	else if (IS_GEN11(devid))
+		copy = gen11_render_copyfunc;
 
 	return copy;
 }
diff --git a/lib/rendercopy.h b/lib/rendercopy.h
index d1bb6284..35c28dd9 100644
--- a/lib/rendercopy.h
+++ b/lib/rendercopy.h
@@ -23,6 +23,11 @@ static inline void emit_vertex_normalized(struct intel_batchbuffer *batch,
 	OUT_BATCH(u.ui);
 }
 
+void gen11_render_copyfunc(struct intel_batchbuffer *batch,
+			  drm_intel_context *context,
+			  const struct igt_buf *src, unsigned src_x, unsigned src_y,
+			  unsigned width, unsigned height,
+			  const struct igt_buf *dst, unsigned dst_x, unsigned dst_y);
 void gen9_render_copyfunc(struct intel_batchbuffer *batch,
 			  drm_intel_context *context,
 			  const struct igt_buf *src, unsigned src_x, unsigned src_y,
diff --git a/lib/rendercopy_gen9.c b/lib/rendercopy_gen9.c
index f324fbd7..3bf18ae5 100644
--- a/lib/rendercopy_gen9.c
+++ b/lib/rendercopy_gen9.c
@@ -50,23 +50,53 @@ struct {
 } viewport;
 
 /* see lib/i915/shaders/ps/blit.g7a */
-static const uint32_t ps_kernel[][4] = {
+static const uint32_t ps_kernel_gen9[][4] = {
 #if 1
-   { 0x0080005a, 0x2f403ae8, 0x3a0000c0, 0x008d0040 },
-   { 0x0080005a, 0x2f803ae8, 0x3a0000d0, 0x008d0040 },
-   { 0x02800031, 0x2e203a48, 0x0e8d0f40, 0x08840001 },
-   { 0x05800031, 0x20003a40, 0x0e8d0e20, 0x90031000 },
+	{ 0x0080005a, 0x2f403ae8, 0x3a0000c0, 0x008d0040 },
+	{ 0x0080005a, 0x2f803ae8, 0x3a0000d0, 0x008d0040 },
+	{ 0x02800031, 0x2e203a48, 0x0e8d0f40, 0x08840001 },
+	{ 0x05800031, 0x20003a40, 0x0e8d0e20, 0x90031000 },
 #else
-   /* Write all -1 */
-   { 0x00600001, 0x2e000608, 0x00000000, 0x3f800000 },
-   { 0x00600001, 0x2e200608, 0x00000000, 0x3f800000 },
-   { 0x00600001, 0x2e400608, 0x00000000, 0x3f800000 },
-   { 0x00600001, 0x2e600608, 0x00000000, 0x3f800000 },
-   { 0x00600001, 0x2e800608, 0x00000000, 0x3f800000 },
-   { 0x00600001, 0x2ea00608, 0x00000000, 0x3f800000 },
-   { 0x00600001, 0x2ec00608, 0x00000000, 0x3f800000 },
-   { 0x00600001, 0x2ee00608, 0x00000000, 0x3f800000 },
-   { 0x05800031, 0x200022e0, 0x0e000e00, 0x90031000 },
+	/* Write all -1 */
+	{ 0x00600001, 0x2e000608, 0x00000000, 0x3f800000 },
+	{ 0x00600001, 0x2e200608, 0x00000000, 0x3f800000 },
+	{ 0x00600001, 0x2e400608, 0x00000000, 0x3f800000 },
+	{ 0x00600001, 0x2e600608, 0x00000000, 0x3f800000 },
+	{ 0x00600001, 0x2e800608, 0x00000000, 0x3f800000 },
+	{ 0x00600001, 0x2ea00608, 0x00000000, 0x3f800000 },
+	{ 0x00600001, 0x2ec00608, 0x00000000, 0x3f800000 },
+	{ 0x00600001, 0x2ee00608, 0x00000000, 0x3f800000 },
+	{ 0x05800031, 0x200022e0, 0x0e000e00, 0x90031000 },
+#endif
+};
+
+static const uint32_t ps_kernel_gen11[][4] = {
+#if 1
+	{ 0x0060005b, 0x2000c01c, 0x07206601, 0x01800404 },
+	{ 0x0060005b, 0x7100480c, 0x0722003b, 0x01880406 },
+	{ 0x0060005b, 0x2000c01c, 0x07206601, 0x01800408 },
+	{ 0x0060005b, 0x7200480c, 0x0722003b, 0x0188040a },
+	{ 0x0060005b, 0x2000c01c, 0x07206e01, 0x01a00404 },
+	{ 0x0060005b, 0x7300480c, 0x0722003b, 0x01a80406 },
+	{ 0x0060005b, 0x2000c01c, 0x07206e01, 0x01a00408 },
+	{ 0x0060005b, 0x7400480c, 0x0722003b, 0x01a8040a },
+	{ 0x02800031, 0x21804a4c, 0x06000e20, 0x08840001 },
+	{ 0x00800001, 0x2e204b28, 0x008d0180, 0x00000000 },
+	{ 0x00800001, 0x2e604b28, 0x008d01c0, 0x00000000 },
+	{ 0x00800001, 0x2ea04b28, 0x008d0200, 0x00000000 },
+	{ 0x00800001, 0x2ee04b28, 0x008d0240, 0x00000000 },
+	{ 0x05800031, 0x20004a44, 0x06000e20, 0x90031000 },
+#else
+	/* Write all -1 */
+	{ 0x00600001, 0x2e000608, 0x00000000, 0x3f800000 },
+	{ 0x00600001, 0x2e200608, 0x00000000, 0x3f800000 },
+	{ 0x00600001, 0x2e400608, 0x00000000, 0x3f800000 },
+	{ 0x00600001, 0x2e600608, 0x00000000, 0x3f800000 },
+	{ 0x00600001, 0x2e800608, 0x00000000, 0x3f800000 },
+	{ 0x00600001, 0x2ea00608, 0x00000000, 0x3f800000 },
+	{ 0x00600001, 0x2ec00608, 0x00000000, 0x3f800000 },
+	{ 0x00600001, 0x2ee00608, 0x00000000, 0x3f800000 },
+	{ 0x05800031, 0x200022e0, 0x0e000e00, 0x90031000 },
 #endif
 };
 
@@ -907,11 +937,14 @@ static void gen8_emit_primitive(struct intel_batchbuffer *batch, uint32_t offset
 
 #define BATCH_STATE_SPLIT 2048
 
-void gen9_render_copyfunc(struct intel_batchbuffer *batch,
+static
+void _gen9_render_copyfunc(struct intel_batchbuffer *batch,
 			  drm_intel_context *context,
-			  const struct igt_buf *src, unsigned src_x, unsigned src_y,
-			  unsigned width, unsigned height,
-			  const struct igt_buf *dst, unsigned dst_x, unsigned dst_y)
+			  const struct igt_buf *src, unsigned src_x,
+			  unsigned src_y, unsigned width, unsigned height,
+			  const struct igt_buf *dst, unsigned dst_x,
+			  unsigned dst_y, const uint32_t ps_kernel[][4],
+			  uint32_t ps_kernel_size)
 {
 	uint32_t ps_sampler_state, ps_kernel_off, ps_binding_table;
 	uint32_t scissor_state;
@@ -928,7 +961,7 @@ void gen9_render_copyfunc(struct intel_batchbuffer *batch,
 
 	ps_binding_table  = gen8_bind_surfaces(batch, src, dst);
 	ps_sampler_state  = gen8_create_sampler(batch);
-	ps_kernel_off = gen8_fill_ps(batch, ps_kernel, sizeof(ps_kernel));
+	ps_kernel_off = gen8_fill_ps(batch, ps_kernel, ps_kernel_size);
 	vertex_buffer = gen7_fill_vertex_buffer_data(batch, src,
 						     src_x, src_y,
 						     dst_x, dst_y,
@@ -1014,3 +1047,27 @@ void gen9_render_copyfunc(struct intel_batchbuffer *batch,
 	gen6_render_flush(batch, context, batch_end);
 	intel_batchbuffer_reset(batch);
 }
+
+void gen9_render_copyfunc(struct intel_batchbuffer *batch,
+			  drm_intel_context *context,
+			  const struct igt_buf *src, unsigned src_x, unsigned src_y,
+			  unsigned width, unsigned height,
+			  const struct igt_buf *dst, unsigned dst_x, unsigned dst_y)
+
+{
+	_gen9_render_copyfunc(batch, context, src, src_x, src_y,
+			  width, height, dst, dst_x, dst_y, ps_kernel_gen9,
+			  sizeof(ps_kernel_gen9));
+}
+
+void gen11_render_copyfunc(struct intel_batchbuffer *batch,
+			  drm_intel_context *context,
+			  const struct igt_buf *src, unsigned src_x, unsigned src_y,
+			  unsigned width, unsigned height,
+			  const struct igt_buf *dst, unsigned dst_x, unsigned dst_y)
+
+{
+	_gen9_render_copyfunc(batch, context, src, src_x, src_y,
+			  width, height, dst, dst_x, dst_y, ps_kernel_gen11,
+			  sizeof(ps_kernel_gen11));
+}
-- 
2.14.4



More information about the igt-dev mailing list