[Intel-gfx] [PATCH i-g-t 2/3] gem_render_copy: Add functions for performance testing
Antti Koskipaa
antti.koskipaa at linux.intel.com
Mon Oct 5 04:42:24 PDT 2015
gen8_render_writefunc does only constant writes to the
framebuffer, no texture reads. Used for testing write bandwidth.
gen8_render_readfunc does only reads from the sampler and
discards the result. Used for testing sampler read bandwidth.
Alpha blend tests and support for more Gens still to come.
Signed-off-by: Antti Koskipaa <antti.koskipaa at linux.intel.com>
---
lib/gen8_render.h | 3 ++
lib/intel_batchbuffer.c | 40 +++++++++++++++++++++++++++
lib/intel_batchbuffer.h | 2 ++
lib/rendercopy.h | 12 ++++++++
lib/rendercopy_gen8.c | 53 +++++++++++++++++++++++++++++++----
shaders/ps/discard.g7a | 73 +++++++++++++++++++++++++++++++++++++++++++++++++
shaders/ps/fill.g7a | 6 ++++
7 files changed, 184 insertions(+), 5 deletions(-)
create mode 100644 shaders/ps/discard.g7a
create mode 100644 shaders/ps/fill.g7a
diff --git a/lib/gen8_render.h b/lib/gen8_render.h
index ba3f9f2..610a457 100644
--- a/lib/gen8_render.h
+++ b/lib/gen8_render.h
@@ -60,6 +60,9 @@
#define GEN8_3DSTATE_WM_DEPTH_STENCIL GEN6_3D(3, 0, 0x4e)
#define GEN8_3DSTATE_PS_EXTRA GEN6_3D(3,0, 0x4f)
# define GEN8_PSX_PIXEL_SHADER_VALID (1 << 31)
+# define GEN8_PSX_DONT_WRITE_RT (1 << 30)
+# define GEN8_PSX_OMASK_PRESENT (1 << 29)
+# define GEN8_PSX_KILLS_PIXEL (1 << 28)
# define GEN8_PSX_ATTRIBUTE_ENABLE (1 << 8)
#define GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC GEN6_3D(3, 0, 0x23)
diff --git a/lib/intel_batchbuffer.c b/lib/intel_batchbuffer.c
index 692521f..e3cf622 100644
--- a/lib/intel_batchbuffer.c
+++ b/lib/intel_batchbuffer.c
@@ -748,6 +748,46 @@ igt_render_copyfunc_t igt_get_render_copyfunc(int devid)
}
/**
+ * igt_get_render_writefunc:
+ * @devid: pci device id
+ *
+ * Returns:
+ *
+ * The platform-specific render write function pointer for the device
+ * specified with @devid. Will return NULL when no render copy function is
+ * implemented.
+ */
+igt_render_copyfunc_t igt_get_render_writefunc(int devid)
+{
+ igt_render_copyfunc_t copy = NULL;
+
+ if (IS_GEN8(devid))
+ copy = gen8_render_writefunc;
+
+ return copy;
+}
+
+/**
+ * igt_get_render_readfunc:
+ * @devid: pci device id
+ *
+ * Returns:
+ *
+ * The platform-specific render read function pointer for the device
+ * specified with @devid. Will return NULL when no render copy function is
+ * implemented.
+ */
+igt_render_copyfunc_t igt_get_render_readfunc(int devid)
+{
+ igt_render_copyfunc_t copy = NULL;
+
+ if (IS_GEN8(devid))
+ copy = gen8_render_readfunc;
+
+ return copy;
+}
+
+/**
* igt_get_media_fillfunc:
* @devid: pci device id
*
diff --git a/lib/intel_batchbuffer.h b/lib/intel_batchbuffer.h
index 869747d..7d8f990 100644
--- a/lib/intel_batchbuffer.h
+++ b/lib/intel_batchbuffer.h
@@ -274,6 +274,8 @@ typedef void (*igt_render_copyfunc_t)(struct intel_batchbuffer *batch,
struct igt_buf *dst, unsigned dst_x, unsigned dst_y);
igt_render_copyfunc_t igt_get_render_copyfunc(int devid);
+igt_render_copyfunc_t igt_get_render_writefunc(int devid);
+igt_render_copyfunc_t igt_get_render_readfunc(int devid);
/**
* igt_fillfunc_t:
diff --git a/lib/rendercopy.h b/lib/rendercopy.h
index fdc3cab..f4ec74b 100644
--- a/lib/rendercopy.h
+++ b/lib/rendercopy.h
@@ -53,3 +53,15 @@ void gen2_render_copyfunc(struct intel_batchbuffer *batch,
struct igt_buf *src, unsigned src_x, unsigned src_y,
unsigned width, unsigned height,
struct igt_buf *dst, unsigned dst_x, unsigned dst_y);
+
+void gen8_render_writefunc(struct intel_batchbuffer *batch,
+ drm_intel_context *context,
+ struct igt_buf *src, unsigned src_x, unsigned src_y,
+ unsigned width, unsigned height,
+ struct igt_buf *dst, unsigned dst_x, unsigned dst_y);
+
+void gen8_render_readfunc(struct intel_batchbuffer *batch,
+ drm_intel_context *context,
+ struct igt_buf *src, unsigned src_x, unsigned src_y,
+ unsigned width, unsigned height,
+ struct igt_buf *dst, unsigned dst_x, unsigned dst_y);
diff --git a/lib/rendercopy_gen8.c b/lib/rendercopy_gen8.c
index 4a9a283..b243ed7 100644
--- a/lib/rendercopy_gen8.c
+++ b/lib/rendercopy_gen8.c
@@ -71,6 +71,28 @@ static const uint32_t ps_kernel_copy[][4] = {
#endif
};
+/* see shaders/ps/discard.g7a */
+static const uint32_t ps_kernel_read[][4] = {
+ { 0x0060005a, 0x21403ae8, 0x3a0000c0, 0x008d0040 },
+ { 0x0060005a, 0x21603ae8, 0x3a0000c0, 0x008d0080 },
+ { 0x0060005a, 0x21803ae8, 0x3a0000d0, 0x008d0040 },
+ { 0x0060005a, 0x21a03ae8, 0x3a0000d0, 0x008d0080 },
+ { 0x02800031, 0x2e4022e8, 0x0e000140, 0x08840001 },
+ { 0x00000001, 0x2e020e08, 0x08000000, 0x00000000 },
+ { 0x00000001, 0x2e000e08, 0x08000000, 0x00000000 },
+ { 0x00000001, 0x2e010e08, 0x08000000, 0x00000000 },
+ { 0x05800031, 0x200022e0, 0x0e000e00, 0x920b1000 },
+};
+
+/* see shaders/ps/fill.g7a */
+static const uint32_t ps_kernel_write[][4] = {
+ { 0x00800001, 0x2e003ee8, 0x38000000, 0x3f800000 },
+ { 0x00800001, 0x2e403ee8, 0x38000000, 0x3f7d70a4 },
+ { 0x00800001, 0x2e803ee8, 0x38000000, 0x3f000000 },
+ { 0x00800001, 0x2ec03ee8, 0x38000000, 0x3dcccccd },
+ { 0x05800031, 0x200022e0, 0x0e000e00, 0x90031000 },
+};
+
/* AUB annotation support */
#define MAX_ANNOTATIONS 33
struct annotations_context {
@@ -779,7 +801,7 @@ gen8_emit_sf(struct intel_batchbuffer *batch)
}
static void
-gen8_emit_ps(struct intel_batchbuffer *batch, uint32_t kernel) {
+gen8_emit_ps(struct intel_batchbuffer *batch, uint32_t kernel, bool kills_pixel) {
const int max_threads = 63;
OUT_BATCH(GEN6_3DSTATE_WM | (2 - 2));
@@ -819,7 +841,8 @@ gen8_emit_ps(struct intel_batchbuffer *batch, uint32_t kernel) {
OUT_BATCH(GEN8_PS_BLEND_HAS_WRITEABLE_RT);
OUT_BATCH(GEN8_3DSTATE_PS_EXTRA | (2 - 2));
- OUT_BATCH(GEN8_PSX_PIXEL_SHADER_VALID | GEN8_PSX_ATTRIBUTE_ENABLE);
+ OUT_BATCH(GEN8_PSX_PIXEL_SHADER_VALID | GEN8_PSX_ATTRIBUTE_ENABLE |
+ (kills_pixel ? (GEN8_PSX_KILLS_PIXEL | GEN8_PSX_DONT_WRITE_RT) : 0));
}
static void
@@ -925,7 +948,7 @@ static void _gen8_render_func(struct intel_batchbuffer *batch,
struct igt_buf *src, unsigned src_x, unsigned src_y,
unsigned width, unsigned height,
struct igt_buf *dst, unsigned dst_x, unsigned dst_y,
- const uint32_t ps_kernel[][4], int kernel_size)
+ const uint32_t ps_kernel[][4], int kernel_size, bool discard)
{
struct annotations_context aub_annotations;
uint32_t ps_sampler_state, ps_kernel_off, ps_binding_table;
@@ -1001,7 +1024,7 @@ static void _gen8_render_func(struct intel_batchbuffer *batch,
OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS);
OUT_BATCH(ps_sampler_state);
- gen8_emit_ps(batch, ps_kernel_off);
+ gen8_emit_ps(batch, ps_kernel_off, discard);
OUT_BATCH(GEN6_3DSTATE_SCISSOR_STATE_POINTERS);
OUT_BATCH(scissor_state);
@@ -1039,5 +1062,25 @@ void gen8_render_copyfunc(struct intel_batchbuffer *batch,
struct igt_buf *dst, unsigned dst_x, unsigned dst_y)
{
_gen8_render_func(batch, context, src, src_x, src_y, width, height,
- dst, dst_x, dst_y, ps_kernel_copy, sizeof(ps_kernel_copy));
+ dst, dst_x, dst_y, ps_kernel_copy, sizeof(ps_kernel_copy), false);
+}
+
+void gen8_render_writefunc(struct intel_batchbuffer *batch,
+ drm_intel_context *context,
+ struct igt_buf *src, unsigned src_x, unsigned src_y,
+ unsigned width, unsigned height,
+ struct igt_buf *dst, unsigned dst_x, unsigned dst_y)
+{
+ _gen8_render_func(batch, context, src, src_x, src_y, width, height,
+ dst, dst_x, dst_y, ps_kernel_write, sizeof(ps_kernel_write), false);
+}
+
+void gen8_render_readfunc(struct intel_batchbuffer *batch,
+ drm_intel_context *context,
+ struct igt_buf *src, unsigned src_x, unsigned src_y,
+ unsigned width, unsigned height,
+ struct igt_buf *dst, unsigned dst_x, unsigned dst_y)
+{
+ _gen8_render_func(batch, context, src, src_x, src_y, width, height,
+ dst, dst_x, dst_y, ps_kernel_read, sizeof(ps_kernel_read), true);
}
diff --git a/shaders/ps/discard.g7a b/shaders/ps/discard.g7a
new file mode 100644
index 0000000..eafbd60
--- /dev/null
+++ b/shaders/ps/discard.g7a
@@ -0,0 +1,73 @@
+/* Assemble with ".../intel-gen4asm/src/intel-gen4asm -g 7" */
+
+
+/* Move pixels into g10-g13. The pixel shaader does not load what you want. It
+ * loads the input data for a plane function to calculate what you want. The
+ * following is boiler plate code to move our normalized texture coordinates
+ * (u,v) into g10-g13. It does this 4 subspans (16 pixels) at a time.
+ *
+ * This should do the same thing, but it doesn't work for some reason.
+ * pln(16) g10 g6<0,1,0>F g2<8,8,1>F { align1 };
+ * pln(16) g12 g6.16<1>F g2<8,8,1>F { align1 };
+ */
+/* U */
+pln (8) g10<1>F g6.0<0,1,0>F g2.0<8,8,1>F { align1 }; /* pixel 0-7 */
+pln (8) g11<1>F g6.0<0,1,0>F g4.0<8,8,1>F { align1 }; /* pixel 8-15 */
+/* V */
+pln (8) g12<1>F g6.16<0,1,0> g2.0<8,8,1>F { align1 }; /* pixel 0-7 */
+pln (8) g13<1>F g6.16<0,1,0> g4.0<8,8,1>F { align1 }; /* pixel 8-15 */
+
+
+/* Next the we want the sampler to fetch the src texture (ie. src buffer). This
+ * is done with a pretty simple send message. The output goes to g112, which is
+ * exactly what we're supposed to use in our final send message.
+ * In intel-gen4asm, we should end up parsed by the following rule:
+ * predicate SEND execsize dst sendleadreg sndopr directsrcoperand instoptions
+ *
+ * Send message descriptor:
+ * 28:25 = message len = 4 // our 4 registers have 16 pixels
+ * 24:20 = response len = 8 // Each pixel is RGBA32, so we need 8 registers
+ * 19:19 = header present = 0
+ * 18:17 = SIMD16 = 2
+ * 16:12 = TYPE = 0 (regular sample)
+ * 11:08 = Sampler index = ignored/0
+ * 7:0 = binding table index = src = 1
+ * 0x8840001
+ *
+ * Send message extra descriptor
+ * 5:5 = End of Thread = 0
+ * 3:0 = Target Function ID = SFID_SAMPLER (2)
+ * 0x2
+ */
+
+send(16) g114 g10 0x2 0x8840001 { align1 };
+
+/* Next discard the result. This is done by using a send message to the pixel
+ * data port with all the output masks set to 0. These are in the message header,
+ * in dword g112.2.
+ */
+mov(1) g112.2<1>UD 0x00000000 { align1 };
+
+/* Set pixel offsets in the header to 0 */
+mov(1) g112.0<1>UD 0 { align1 };
+mov(1) g112.1<1>UD 0 { align1 };
+
+/* Send message descriptor:
+ * 28:25 = message len = 12 // 16 pixels RGBA32 + header
+ * 24:20 = response len = 0
+ * 19:19 = header present = 1
+ * 17:14 = message type = Render Target Write (12)
+ * 12:12 = Last Render Target Select = 1
+ * 10:08 = Message Type = SIMD16 (0)
+ * 07:00 = Binding Table Index = dest = 0
+ * 0x120B1000
+ *
+ * Send message extra descriptor
+ * 5:5 = End of Thread = 1
+ * 3:0 = Target Function ID = SFID_DP_RC (5)
+ * 0x25
+ */
+
+send(16) null g112 0x25 0x120B1000 { align1, EOT };
+
+/* vim: set ft=c ts=4 sw=2 tw=80 et: */
diff --git a/shaders/ps/fill.g7a b/shaders/ps/fill.g7a
new file mode 100644
index 0000000..89f130f
--- /dev/null
+++ b/shaders/ps/fill.g7a
@@ -0,0 +1,6 @@
+mov (16) g112<1>F 1.0F { align1 };
+mov (16) g114<1>F 0.99F { align1 };
+mov (16) g116<1>F 0.5F { align1 };
+mov (16) g118<1>F 0.1F { align1 };
+send (16) null g112 0x25 0x10031000 { align1, EOT };
+/* <8,8,1>F render RT write SIMD16 LastRT Surface = 0 mlen 8 rlen 0 { align1 1H EOT }; */
--
2.3.6
More information about the Intel-gfx
mailing list