[igt-dev] [PATCH i-g-t] libs: Adding rendercopy support for gen11
Lukasz Kalamarz
lukasz.kalamarz at intel.com
Tue Oct 9 14:45:25 UTC 2018
From: "Kalamarz, Lukasz" <lukasz.kalamarz at intel.com>
In Gen11 there were EU changes which was causing gem_render_copy test
failing. Shader used in gen9 is no longer applicable for those change
and it had to be aligned to gen11 changes. Since only difference is
shader, created a wrapper for gen9 function.
Signed-off-by: Lukasz Kalamarz <lukasz.kalamarz at intel.com>
Cc: Michał Winiarski <michal.winiarski at intel.com>
Cc: Antonio Argenziano <antonio.argenziano at intel.com>
Cc: Lucas De Marchi <lucas.demarchi at intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi at intel.com>
Cc: Katarzyna Dec <katarzyna.dec at intel.com>
---
lib/i915/shaders/ps/blit.g11a | 22 ++++++++++
lib/intel_batchbuffer.c | 2 +
lib/rendercopy.h | 5 +++
lib/rendercopy_gen9.c | 97 ++++++++++++++++++++++++++++++++++---------
4 files changed, 106 insertions(+), 20 deletions(-)
create mode 100644 lib/i915/shaders/ps/blit.g11a
diff --git a/lib/i915/shaders/ps/blit.g11a b/lib/i915/shaders/ps/blit.g11a
new file mode 100644
index 00000000..15fe78af
--- /dev/null
+++ b/lib/i915/shaders/ps/blit.g11a
@@ -0,0 +1,22 @@
+/* This is the same shader as for previous gens. On Gen 11 instruction pln was deleted and needs to be replaced by mad.
+This shader was generated using IGA tool (not assembler integrated into IGT)
+*/
+
+(W) mad(8|M0) acc0.0<1>:nf r6.7<0;0>:f r2.0<8;1>:f r6.0<0>:f
+(W) mad(8|M0) r10.0<1>:f acc0.0<8;1>:nf r3.0<8;1>:f r6.1<0>:f
+
+(W) mad(8|M0) acc0.0<1>:nf r6.0<0;0>:f r4.0<8;1>:f r6.0<0>:f
+(W) mad(8|M0) r11.0<1>:f acc0.0<8;1>:nf r5.0<8;1>:f r6.1<0>:f
+
+(W) mad(8|M0) acc0.0<1>:nf r6.4<0;0>:f r2.0<8;1>:f r6.4<0>:f
+(W) mad(8|M0) r12.0<1>:f acc0.0<8;1>:nf r3.0<8;1>:f r6.5<0>:f
+
+(W) mad(8|M0) acc0.0<1>:nf r6.0<0;0>:f r4.0<8;1>:f r6.4<0>:f
+(W) mad(8|M0) r13.0<1>:f acc0.0<8;1>:nf r5.0<8;1>:f r6.5<0>:f
+
+(W) send(16|M0) r112:f r10:ub 0x10000002 0x08840001 // SAMPLER wr:4, rd:8, fc: 0x40001
+ mov (16|M0) r113.0<1>:f r12.0<8;8,1>:f
+ mov (16|M0) r115.0<1>:f r14.0<8;8,1>:f
+ mov (16|M0) r117.0<1>:f r16.0<8;8,1>:f
+ mov (16|M0) r119.0<1>:f r18.0<8;8,1>:f
+(W) send(16|M0) null:f r112:ub 0x10000025 0x10031000 {EOT} // DP_RC wr:8, rd:0, Render Target Write msc:16, to #0
diff --git a/lib/intel_batchbuffer.c b/lib/intel_batchbuffer.c
index 387404ff..c13b1dc4 100644
--- a/lib/intel_batchbuffer.c
+++ b/lib/intel_batchbuffer.c
@@ -843,6 +843,8 @@ igt_render_copyfunc_t igt_get_render_copyfunc(int devid)
copy = gen8_render_copyfunc;
else if (IS_GEN9(devid) || IS_GEN10(devid))
copy = gen9_render_copyfunc;
+ else if (IS_GEN11(devid))
+ copy = gen11_render_copyfunc;
return copy;
}
diff --git a/lib/rendercopy.h b/lib/rendercopy.h
index d1bb6284..35c28dd9 100644
--- a/lib/rendercopy.h
+++ b/lib/rendercopy.h
@@ -23,6 +23,11 @@ static inline void emit_vertex_normalized(struct intel_batchbuffer *batch,
OUT_BATCH(u.ui);
}
+void gen11_render_copyfunc(struct intel_batchbuffer *batch,
+ drm_intel_context *context,
+ const struct igt_buf *src, unsigned src_x, unsigned src_y,
+ unsigned width, unsigned height,
+ const struct igt_buf *dst, unsigned dst_x, unsigned dst_y);
void gen9_render_copyfunc(struct intel_batchbuffer *batch,
drm_intel_context *context,
const struct igt_buf *src, unsigned src_x, unsigned src_y,
diff --git a/lib/rendercopy_gen9.c b/lib/rendercopy_gen9.c
index f324fbd7..3bf18ae5 100644
--- a/lib/rendercopy_gen9.c
+++ b/lib/rendercopy_gen9.c
@@ -50,23 +50,53 @@ struct {
} viewport;
/* see lib/i915/shaders/ps/blit.g7a */
-static const uint32_t ps_kernel[][4] = {
+static const uint32_t ps_kernel_gen9[][4] = {
#if 1
- { 0x0080005a, 0x2f403ae8, 0x3a0000c0, 0x008d0040 },
- { 0x0080005a, 0x2f803ae8, 0x3a0000d0, 0x008d0040 },
- { 0x02800031, 0x2e203a48, 0x0e8d0f40, 0x08840001 },
- { 0x05800031, 0x20003a40, 0x0e8d0e20, 0x90031000 },
+ { 0x0080005a, 0x2f403ae8, 0x3a0000c0, 0x008d0040 },
+ { 0x0080005a, 0x2f803ae8, 0x3a0000d0, 0x008d0040 },
+ { 0x02800031, 0x2e203a48, 0x0e8d0f40, 0x08840001 },
+ { 0x05800031, 0x20003a40, 0x0e8d0e20, 0x90031000 },
#else
- /* Write all -1 */
- { 0x00600001, 0x2e000608, 0x00000000, 0x3f800000 },
- { 0x00600001, 0x2e200608, 0x00000000, 0x3f800000 },
- { 0x00600001, 0x2e400608, 0x00000000, 0x3f800000 },
- { 0x00600001, 0x2e600608, 0x00000000, 0x3f800000 },
- { 0x00600001, 0x2e800608, 0x00000000, 0x3f800000 },
- { 0x00600001, 0x2ea00608, 0x00000000, 0x3f800000 },
- { 0x00600001, 0x2ec00608, 0x00000000, 0x3f800000 },
- { 0x00600001, 0x2ee00608, 0x00000000, 0x3f800000 },
- { 0x05800031, 0x200022e0, 0x0e000e00, 0x90031000 },
+ /* Write all -1 */
+ { 0x00600001, 0x2e000608, 0x00000000, 0x3f800000 },
+ { 0x00600001, 0x2e200608, 0x00000000, 0x3f800000 },
+ { 0x00600001, 0x2e400608, 0x00000000, 0x3f800000 },
+ { 0x00600001, 0x2e600608, 0x00000000, 0x3f800000 },
+ { 0x00600001, 0x2e800608, 0x00000000, 0x3f800000 },
+ { 0x00600001, 0x2ea00608, 0x00000000, 0x3f800000 },
+ { 0x00600001, 0x2ec00608, 0x00000000, 0x3f800000 },
+ { 0x00600001, 0x2ee00608, 0x00000000, 0x3f800000 },
+ { 0x05800031, 0x200022e0, 0x0e000e00, 0x90031000 },
+#endif
+};
+
+static const uint32_t ps_kernel_gen11[][4] = {
+#if 1
+ { 0x0060005b, 0x2000c01c, 0x07206601, 0x01800404 },
+ { 0x0060005b, 0x7100480c, 0x0722003b, 0x01880406 },
+ { 0x0060005b, 0x2000c01c, 0x07206601, 0x01800408 },
+ { 0x0060005b, 0x7200480c, 0x0722003b, 0x0188040a },
+ { 0x0060005b, 0x2000c01c, 0x07206e01, 0x01a00404 },
+ { 0x0060005b, 0x7300480c, 0x0722003b, 0x01a80406 },
+ { 0x0060005b, 0x2000c01c, 0x07206e01, 0x01a00408 },
+ { 0x0060005b, 0x7400480c, 0x0722003b, 0x01a8040a },
+ { 0x02800031, 0x21804a4c, 0x06000e20, 0x08840001 },
+ { 0x00800001, 0x2e204b28, 0x008d0180, 0x00000000 },
+ { 0x00800001, 0x2e604b28, 0x008d01c0, 0x00000000 },
+ { 0x00800001, 0x2ea04b28, 0x008d0200, 0x00000000 },
+ { 0x00800001, 0x2ee04b28, 0x008d0240, 0x00000000 },
+ { 0x05800031, 0x20004a44, 0x06000e20, 0x90031000 },
+#else
+ /* Write all -1 */
+ { 0x00600001, 0x2e000608, 0x00000000, 0x3f800000 },
+ { 0x00600001, 0x2e200608, 0x00000000, 0x3f800000 },
+ { 0x00600001, 0x2e400608, 0x00000000, 0x3f800000 },
+ { 0x00600001, 0x2e600608, 0x00000000, 0x3f800000 },
+ { 0x00600001, 0x2e800608, 0x00000000, 0x3f800000 },
+ { 0x00600001, 0x2ea00608, 0x00000000, 0x3f800000 },
+ { 0x00600001, 0x2ec00608, 0x00000000, 0x3f800000 },
+ { 0x00600001, 0x2ee00608, 0x00000000, 0x3f800000 },
+ { 0x05800031, 0x200022e0, 0x0e000e00, 0x90031000 },
#endif
};
@@ -907,11 +937,14 @@ static void gen8_emit_primitive(struct intel_batchbuffer *batch, uint32_t offset
#define BATCH_STATE_SPLIT 2048
-void gen9_render_copyfunc(struct intel_batchbuffer *batch,
+static
+void _gen9_render_copyfunc(struct intel_batchbuffer *batch,
drm_intel_context *context,
- const struct igt_buf *src, unsigned src_x, unsigned src_y,
- unsigned width, unsigned height,
- const struct igt_buf *dst, unsigned dst_x, unsigned dst_y)
+ const struct igt_buf *src, unsigned src_x,
+ unsigned src_y, unsigned width, unsigned height,
+ const struct igt_buf *dst, unsigned dst_x,
+ unsigned dst_y, const uint32_t ps_kernel[][4],
+ uint32_t ps_kernel_size)
{
uint32_t ps_sampler_state, ps_kernel_off, ps_binding_table;
uint32_t scissor_state;
@@ -928,7 +961,7 @@ void gen9_render_copyfunc(struct intel_batchbuffer *batch,
ps_binding_table = gen8_bind_surfaces(batch, src, dst);
ps_sampler_state = gen8_create_sampler(batch);
- ps_kernel_off = gen8_fill_ps(batch, ps_kernel, sizeof(ps_kernel));
+ ps_kernel_off = gen8_fill_ps(batch, ps_kernel, ps_kernel_size);
vertex_buffer = gen7_fill_vertex_buffer_data(batch, src,
src_x, src_y,
dst_x, dst_y,
@@ -1014,3 +1047,27 @@ void gen9_render_copyfunc(struct intel_batchbuffer *batch,
gen6_render_flush(batch, context, batch_end);
intel_batchbuffer_reset(batch);
}
+
+void gen9_render_copyfunc(struct intel_batchbuffer *batch,
+ drm_intel_context *context,
+ const struct igt_buf *src, unsigned src_x, unsigned src_y,
+ unsigned width, unsigned height,
+ const struct igt_buf *dst, unsigned dst_x, unsigned dst_y)
+
+{
+ _gen9_render_copyfunc(batch, context, src, src_x, src_y,
+ width, height, dst, dst_x, dst_y, ps_kernel_gen9,
+ sizeof(ps_kernel_gen9));
+}
+
+void gen11_render_copyfunc(struct intel_batchbuffer *batch,
+ drm_intel_context *context,
+ const struct igt_buf *src, unsigned src_x, unsigned src_y,
+ unsigned width, unsigned height,
+ const struct igt_buf *dst, unsigned dst_x, unsigned dst_y)
+
+{
+ _gen9_render_copyfunc(batch, context, src, src_x, src_y,
+ width, height, dst, dst_x, dst_y, ps_kernel_gen11,
+ sizeof(ps_kernel_gen11));
+}
--
2.14.4
More information about the igt-dev
mailing list