[Piglit] [PATCH 5/8] arb_shader_image_load_store: add additional coherency test
Nicolai Hähnle
nhaehnle at gmail.com
Thu Apr 7 01:10:49 UTC 2016
From: Nicolai Hähnle <nicolai.haehnle at amd.com>
The existing coherency test isn't a good match for the AMD GCN execution
model.
---
.../execution/coherency-extra.shader_test | 90 ++++++++++++++++++++++
1 file changed, 90 insertions(+)
create mode 100644 tests/spec/arb_shader_image_load_store/execution/coherency-extra.shader_test
diff --git a/tests/spec/arb_shader_image_load_store/execution/coherency-extra.shader_test b/tests/spec/arb_shader_image_load_store/execution/coherency-extra.shader_test
new file mode 100644
index 0000000..f718cd2
--- /dev/null
+++ b/tests/spec/arb_shader_image_load_store/execution/coherency-extra.shader_test
@@ -0,0 +1,90 @@
+# Additional coherency test that can demonstrate failures in an incorrect
+# coherency implementation for AMD GCN, unlike arb_shader_image_load_store-coherency.
+#
+# The real problem with coherency in AMD GCN is separate, non-coherent L1
+# caches, i.e. when a shader execution writes to an image in a CU that uses
+# one L1 cache, and a different shader execution reads from the image
+# in a CU with a different L1 cache.
+#
+# This test uses atomic accesses to a control texture to select the very first
+# fragment shader thread as a writer thread which keeps changing a data
+# texture in a tight loop. All other threads become reader threads which
+# report success if they see two different values of the same texture.
+#
+# This test can produce a false negative (false failure) in two cases:
+# 1) The timeout value ITERS is too low,
+# 2) There is no (or insufficient) parallelism in the implementation, and
+# therefore the writer thread must finish before most of the reader threads
+# get a chance to run.
+#
+
+[require]
+GL >= 3.3
+GLSL >= 3.30
+GL_ARB_shader_image_load_store
+SIZE 256 256
+
+[vertex shader passthrough]
+
+[fragment shader]
+#version 330
+#extension GL_ARB_shader_image_load_store: enable
+
+// Change this to 0 to get a control test that should fail on hardware
+// without coherent L1 caches.
+//
+// Need volatile instead of just coherent to prevent overly smart compilers
+// from moving the imageLoad/imageStore out of the loop.
+#if 1
+volatile
+#endif
+layout(r32i) uniform iimage2D tex;
+volatile layout(r32i) uniform iimage2D ctrl;
+out vec4 outcolor;
+
+// Add a timeout so that an incorrect coherency implementation doesn't hang
+// the GPU. If this timeout is too low, you can get false negative results
+// because the writer thread quits before all reader threads have
+// executed.
+#define ITERS 100000
+
+void main()
+{
+ int id = imageAtomicAdd(ctrl, ivec2(0, 0), 1);
+ int orig = imageLoad(tex, ivec2(0, 0)).x;
+ bool done = false;
+
+ outcolor = vec4(0.0, 0.0, 0.0, 1.0);
+
+ for (int iter = 0; iter < ITERS && !done; ++iter) {
+ if (id == 0) {
+ imageStore(tex, ivec2(0, 0), ivec4(iter));
+ if (imageLoad(ctrl, ivec2(0, 1)).x >= 256 * 256)
+ done = true;
+ } else {
+ int current = imageLoad(tex, ivec2(0, 0)).x;
+ if (current != orig)
+ done = true;
+ }
+
+ if (done || (id == 0 && iter == 0))
+ imageAtomicAdd(ctrl, ivec2(0, 1), 1);
+ }
+
+ if (done)
+ outcolor.y = 1.0;
+ else
+ outcolor.x = 1.0;
+}
+
+[test]
+texture integer 0 (1, 2) (0, 0) GL_R32I
+image texture 0 GL_R32I
+texture integer 1 (1, 1) (0, 0) GL_R32I
+image texture 1 GL_R32I
+
+uniform int ctrl 0
+uniform int tex 1
+draw rect -1 -1 2 2
+
+probe all rgba 0.0 1.0 0.0 1.0
--
2.5.0
More information about the Piglit
mailing list