Mesa (main): freedreno: Fix the uniform/nonuniform handling for cat5 bindful modes.

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Wed Nov 10 18:09:05 UTC 2021


Module: Mesa
Branch: main
Commit: 9e04f97d8e9d7dd3ceb951e3dd0acb260b24e5b8
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=9e04f97d8e9d7dd3ceb951e3dd0acb260b24e5b8

Author: Emma Anholt <emma at anholt.net>
Date:   Fri Oct 29 15:00:10 2021 -0700

freedreno: Fix the uniform/nonuniform handling for cat5 bindful modes.

We can see from the dynamically_uniform (compiler doesn't know if you're
uniform or not) vs uniform (compiler can see it's uniform) case in the
blob which is which.  Now that we have the right names, also use the
nonunif flag for encoding the actual non-uniform mode (previously, we were
always setting it always in a way that meant uniform).

I verified this behavior back to a418 with samplers.  The a3xx blob I have
only does GLES3, so we don't have the opaque_type_indexing tests to see.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13601>

---

 src/freedreno/ir3/instr-a3xx.h   | 13 ++++++-------
 src/freedreno/ir3/ir3_parser.y   |  1 +
 src/freedreno/ir3/tests/disasm.c | 19 ++++++++++++++++---
 src/freedreno/isa/encode.c       | 10 ++++------
 src/freedreno/isa/ir3-cat5.xml   | 20 +++++++++-----------
 5 files changed, 36 insertions(+), 27 deletions(-)

diff --git a/src/freedreno/ir3/instr-a3xx.h b/src/freedreno/ir3/instr-a3xx.h
index 9e708694076..8957182b2aa 100644
--- a/src/freedreno/ir3/instr-a3xx.h
+++ b/src/freedreno/ir3/instr-a3xx.h
@@ -470,12 +470,11 @@ typedef enum {
  * for the texture.
  */
 typedef enum {
-   /* Use traditional GL binding model, get texture and sampler index
-    * from src3 which is not presumed to be uniform. This is
-    * backwards-compatible with earlier generations, where this field was
-    * always 0 and nonuniform-indexed sampling always worked.
+   /* Use traditional GL binding model, get texture and sampler index from src3
+    * which is presumed to be uniform on a4xx+ (a3xx doesn't have the other
+    * modes, but does handle non-uniform indexing).
     */
-   CAT5_NONUNIFORM = 0,
+   CAT5_UNIFORM = 0,
 
    /* The sampler base comes from the low 3 bits of a1.x, and the sampler
     * and texture index come from src3 which is presumed to be uniform.
@@ -494,9 +493,9 @@ typedef enum {
    CAT5_BINDLESS_A1_NONUNIFORM = 3,
 
    /* Use traditional GL binding model, get texture and sampler index
-    * from src3 which is presumed to be uniform.
+    * from src3 which is *not* presumed to be uniform.
     */
-   CAT5_UNIFORM = 4,
+   CAT5_NONUNIFORM = 4,
 
    /* The texture and sampler share the same base, and the sampler and
     * texture index come from src3 which is presumed to be uniform.
diff --git a/src/freedreno/ir3/ir3_parser.y b/src/freedreno/ir3/ir3_parser.y
index 450e7b267d6..d41c38bc681 100644
--- a/src/freedreno/ir3/ir3_parser.y
+++ b/src/freedreno/ir3/ir3_parser.y
@@ -980,6 +980,7 @@ cat5_flag:         '.' T_3D       { instr->flags |= IR3_INSTR_3D; }
 |                  '.' 'p'        { instr->flags |= IR3_INSTR_P; }
 |                  '.' 's'        { instr->flags |= IR3_INSTR_S; }
 |                  '.' T_S2EN     { instr->flags |= IR3_INSTR_S2EN; }
+|                  '.' T_UNIFORM  { }
 |                  '.' T_NONUNIFORM  { instr->flags |= IR3_INSTR_NONUNIF; }
 |                  '.' T_BASE     { instr->flags |= IR3_INSTR_B; instr->cat5.tex_base = $2; }
 cat5_flags:
diff --git a/src/freedreno/ir3/tests/disasm.c b/src/freedreno/ir3/tests/disasm.c
index 0be470cf80d..41c03b82d64 100644
--- a/src/freedreno/ir3/tests/disasm.c
+++ b/src/freedreno/ir3/tests/disasm.c
@@ -43,6 +43,8 @@
 #include "isa/isa.h"
 
 /* clang-format off */
+/* Note: @anholt's 4xx disasm was done on an a418 Nexus 5x */
+#define INSTR_4XX(i, d, ...) { .gpu_id = 420, .instr = #i, .expected = d, __VA_ARGS__ }
 #define INSTR_5XX(i, d, ...) { .gpu_id = 540, .instr = #i, .expected = d, __VA_ARGS__ }
 #define INSTR_6XX(i, d, ...) { .gpu_id = 630, .instr = #i, .expected = d, __VA_ARGS__ }
 /* clang-format on */
@@ -307,6 +309,13 @@ static const struct test {
    INSTR_6XX(c0260000_00c78080, "ldc.offset0.1.nonuniform r0.x, 0, r0.x"), /* ldc.1.mode2.base0 r0.x, 0, r0.x */
    INSTR_6XX(c0260201_00c78080, "ldc.offset0.1.nonuniform r0.y, 0, r0.y"), /* ldc.1.mode2.base0 r0.y, 0, r0.y */
 
+   /* a4xx-a5xx has the exact same instrs in
+    * dEQP-GLES31.functional.shaders.opaque_type_indexing.ubo.(dynamically_)uniform_fragment
+    * with no change based on the mode. Note that we can't decode this yet.
+    */
+   /* INSTR_4XX(c7860000_00810001), */ /* ldc.1 r0.x, g[r1.x], 0, r0.x */
+   /* INSTR_5XX(c7860000_00800000), */ /* ldc.a.1 r0.x, g[r0.x], 0, r0.x */
+
    /* custom */
    INSTR_6XX(c0260201_ffc78080, "ldc.offset0.1.nonuniform r0.y, 255, r0.y"), /* ldc.1.mode2.base0 r0.y, 255, r0.y */
 
@@ -342,10 +351,14 @@ static const struct test {
 
    /* dEQP-GLES31.functional.shaders.opaque_type_indexing.sampler.const_literal.fragment.sampler2d */
    INSTR_6XX(a0c01f04_0cc00005, "sam (f32)(xyzw)r1.x, r0.z, s#6, t#6"),
-   /* dEQP-GLES31.functional.shaders.opaque_type_indexing.sampler.uniform.fragment.sampler2d (looks like maybe the compiler didn't figure out */
-   INSTR_6XX(a0c81f07_0100000b, "sam.s2en (f32)(xyzw)r1.w, r1.y, hr2.x"), /* sam.s2en.mode0 (f32)(xyzw)r1.w, r1.y, hr2.x */
+
+   /* dEQP-GLES31.functional.shaders.opaque_type_indexing.sampler.uniform.fragment.sampler2d */
+   INSTR_4XX(a0c81f02_00800001, "sam.s2en.uniform (f32)(xyzw)r0.z, r0.x, hr1.x"), /* sam.s2en.mode0 (f32)(xyzw)r0.z, r0.x, hr1.x */ /* same for 5xx */
+   INSTR_6XX(a0c81f07_0100000b, "sam.s2en.uniform (f32)(xyzw)r1.w, r1.y, hr2.x"), /* sam.s2en.mode0 (f32)(xyzw)r1.w, r1.y, hr2.x */
+
    /* dEQP-GLES31.functional.shaders.opaque_type_indexing.sampler.dynamically_uniform.fragment.sampler2d */
-   INSTR_6XX(a0c81f07_8100000b, "sam.s2en.uniform (f32)(xyzw)r1.w, r1.y, hr2.x", .parse_fail=true), /* sam.s2en.mode4 (f32)(xyzw)r1.w, r1.y, hr2.x */
+   INSTR_4XX(a0c81f02_80800001, "sam.s2en.nonuniform (f32)(xyzw)r0.z, r0.x, hr1.x"), /* sam.s2en.uniform (f32)(xyzw)r0.z, r0.x, hr1.x */ /* same for 5xx */
+   INSTR_6XX(a0c81f07_8100000b, "sam.s2en.nonuniform (f32)(xyzw)r1.w, r1.y, hr2.x"), /* sam.s2en.mode4 (f32)(xyzw)r1.w, r1.y, hr2.x */
 
    /* NonUniform: */
    /* dEQP-VK.descriptor_indexing.storage_buffer */
diff --git a/src/freedreno/isa/encode.c b/src/freedreno/isa/encode.c
index 5b89353ec23..ad9dbd2e39b 100644
--- a/src/freedreno/isa/encode.c
+++ b/src/freedreno/isa/encode.c
@@ -204,12 +204,10 @@ extract_cat5_DESC_MODE(struct ir3_instruction *instr)
 				return CAT5_BINDLESS_UNIFORM;
 			}
 		} else {
-			/* TODO: This should probably be CAT5_UNIFORM, at least on a6xx,
-			 * as this is what the blob does and it is presumably faster, but
-			 * first we should confirm it is actually nonuniform and figure
-			 * out when the whole descriptor mode mechanism was introduced.
-			 */
-			return CAT5_NONUNIFORM;
+			if (instr->flags & IR3_INSTR_NONUNIF)
+				return CAT5_NONUNIFORM;
+			else
+				return CAT5_UNIFORM;
 		}
 		assert(!(instr->cat5.samp | instr->cat5.tex));
 	} else if (instr->flags & IR3_INSTR_B) {
diff --git a/src/freedreno/isa/ir3-cat5.xml b/src/freedreno/isa/ir3-cat5.xml
index a129b75fb03..dc2ee6ac44e 100644
--- a/src/freedreno/isa/ir3-cat5.xml
+++ b/src/freedreno/isa/ir3-cat5.xml
@@ -509,12 +509,11 @@ SOFTWARE.
 		display strings, but which have 'C' names that can be used
 		to generate header that the compiler can use
 	</doc>
-	<value val="0" display="CAT5_NONUNIFORM">
+	<value val="0" display="CAT5_UNIFORM">
 		<doc>
 			Use traditional GL binding model, get texture and sampler index
-			from src3 which is not presumed to be uniform. This is
-			backwards-compatible with earlier generations, where this field was
-			always 0 and nonuniform-indexed sampling always worked.
+			from src3 which is presumed to be uniform on a4xx+ (a3xx doesn't
+			have the other modes, but does handle non-uniform indexing).
 		</doc>
 	</value>
 	<value val="1" display="CAT5_BINDLESS_A1_UNIFORM">
@@ -536,10 +535,10 @@ SOFTWARE.
 			uniform.
 		</doc>
 	</value>
-	<value val="4" display="CAT5_UNIFORM">
+	<value val="4" display="CAT5_NONUNIFORM">
 		<doc>
 			Use traditional GL binding model, get texture and sampler index
-			from src3 which is presumed to be uniform.
+			from src3 which is *not* presumed to be uniform.
 		</doc>
 	</value>
 	<value val="5" display="CAT5_BINDLESS_UNIFORM">
@@ -587,17 +586,16 @@ SOFTWARE.
 
 <!-- Helper to map s2en/bindless DESC_MODE to whether it is uniform (flow control) mode -->
 <expr name="#cat5-s2enb-is-uniform">
+	({DESC_MODE} == 0) /* CAT5_UNIFORM */ ||
 	({DESC_MODE} == 1) /* CAT5_BINDLESS_A1_UNIFORM */ ||
-	({DESC_MODE} == 4) /* CAT5_UNIFORM */ ||
 	({DESC_MODE} == 5) /* CAT5_BINDLESS_UNIFORM */
 </expr>
 
-<!-- Helper to map s2en/bindless DESC_MODE to whether it is non-uniform mode
-	 Note that it returns only for bindless for now, since we need to figure out bindful
-	 uniform/nonuniform mode correctly. See TODO in extract_cat5_DESC_MODE in encode.c -->
+<!-- Helper to map s2en/bindless DESC_MODE to whether it is non-uniform mode. -->
 <expr name="#cat5-s2enb-is-nonuniform">
 	({DESC_MODE} == 2) /* CAT5_BINDLESS_NONUNIFORM */ ||
-	({DESC_MODE} == 3) /* CAT5_BINDLESS_A1_NONUNIFORM */
+	({DESC_MODE} == 3) /* CAT5_BINDLESS_A1_NONUNIFORM */ ||
+	({DESC_MODE} == 4) /* CAT5_NONUNIFORM */
 </expr>
 
 <bitset name="#cat5-src3" size="8">



More information about the mesa-commit mailing list