[Mesa-dev] [PATCH 2/2] freedreno/ir3: add a pass to lower tg4 to txl, enable gather on a4xx

Ilia Mirkin imirkin at alum.mit.edu
Sun Nov 19 19:54:16 UTC 2017


Unfortunately Adreno A4xx hardware returns incorrect results with the
GATHER4 opcodes. As a result, we have to lower to 4 individual texture
calls (txl since we have to force lod to 0). We achieve this using
offsets, including on cube maps which normally never have offsets.

Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu>
---

This pass relies on the hw doing the "right thing", working with nonconst
offsets, and not having the usual limits (since the gather offset will in
effect get offset by another 1).

It fails two tests out of all the gather ones:

bin/zero-tex-coord textureGather
tests/spec/arb_gpu_shader5/execution/built-in-functions/fs-textureGatherOffset-uniform-array-offset.shader_test

We haven't fully investigated why yet, but this is a good start.

Note that the blob does this differently - they modify the source coordinate.
However this seems unnecessary given that the hw can be made to use the
offsets.

Also please note that my knowledge of nir is minimal. Please carefully check
that I used the right helpers/etc. This was largely a result of seeing what
doesn't result in assertions.

 docs/features.txt                                  |   4 +-
 src/gallium/drivers/freedreno/Makefile.sources     |   1 +
 src/gallium/drivers/freedreno/freedreno_screen.c   |   2 +-
 .../drivers/freedreno/ir3/ir3_compiler_nir.c       |   7 +-
 src/gallium/drivers/freedreno/ir3/ir3_nir.c        |   2 +
 src/gallium/drivers/freedreno/ir3/ir3_nir.h        |   1 +
 .../freedreno/ir3/ir3_nir_lower_tg4_to_tex.c       | 139 +++++++++++++++++++++
 src/gallium/drivers/freedreno/meson.build          |   1 +
 8 files changed, 152 insertions(+), 5 deletions(-)
 create mode 100644 src/gallium/drivers/freedreno/ir3/ir3_nir_lower_tg4_to_tex.c

diff --git a/docs/features.txt b/docs/features.txt
index 633d2593738..99fb1715e0b 100644
--- a/docs/features.txt
+++ b/docs/features.txt
@@ -130,7 +130,7 @@ GL 4.0, GLSL 4.00 --- all DONE: i965/gen7+, nvc0, r600, radeonsi
   GL_ARB_tessellation_shader                            DONE (i965/gen7+)
   GL_ARB_texture_buffer_object_rgb32                    DONE (freedreno, i965/gen6+, llvmpipe, softpipe, swr)
   GL_ARB_texture_cube_map_array                         DONE (i965/gen6+, nv50, llvmpipe, softpipe)
-  GL_ARB_texture_gather                                 DONE (freedreno/a5xx, i965/gen6+, nv50, llvmpipe, softpipe, swr)
+  GL_ARB_texture_gather                                 DONE (freedreno, i965/gen6+, nv50, llvmpipe, softpipe, swr)
   GL_ARB_texture_query_lod                              DONE (freedreno, i965, nv50, llvmpipe, softpipe)
   GL_ARB_transform_feedback2                            DONE (i965/gen6+, nv50, llvmpipe, softpipe, swr)
   GL_ARB_transform_feedback3                            DONE (i965/gen7+, llvmpipe, softpipe, swr)
@@ -256,7 +256,7 @@ GLES3.1, GLSL ES 3.1 -- all DONE: i965/hsw+, nvc0, radeonsi
   GL_ARB_texture_multisample (Multisample textures)     DONE (i965/gen7+, nv50, r600, llvmpipe, softpipe)
   GL_ARB_texture_storage_multisample                    DONE (all drivers that support GL_ARB_texture_multisample)
   GL_ARB_vertex_attrib_binding                          DONE (all drivers)
-  GS5 Enhanced textureGather                            DONE (i965/gen7+, r600)
+  GS5 Enhanced textureGather                            DONE (freedreno, i965/gen7+, r600)
   GS5 Packing/bitfield/conversion functions             DONE (i965/gen6+, r600)
   GL_EXT_shader_integer_mix                             DONE (all drivers that support GLSL)
 
diff --git a/src/gallium/drivers/freedreno/Makefile.sources b/src/gallium/drivers/freedreno/Makefile.sources
index b109a5a7a21..40c2eff0455 100644
--- a/src/gallium/drivers/freedreno/Makefile.sources
+++ b/src/gallium/drivers/freedreno/Makefile.sources
@@ -168,6 +168,7 @@ ir3_SOURCES := \
 	ir3/ir3_nir.c \
 	ir3/ir3_nir.h \
 	ir3/ir3_nir_lower_if_else.c \
+	ir3/ir3_nir_lower_tg4_to_tex.c \
 	ir3/ir3_print.c \
 	ir3/ir3_ra.c \
 	ir3/ir3_sched.c \
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c
index e61344fd104..62e4a574b90 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -264,7 +264,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
 		return 0;
 
 	case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
-		if (is_a5xx(screen))
+		if (is_a4xx(screen) || is_a5xx(screen))
 			return 4;
 		return 0;
 
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
index da4aeaa7acb..c97df4f1d63 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
@@ -2399,9 +2399,12 @@ emit_tex(struct ir3_context *ctx, nir_tex_instr *tex)
 	 */
 	if (has_off | has_lod | has_bias) {
 		if (has_off) {
-			for (i = 0; i < coords; i++)
+			unsigned off_coords = coords;
+			if (tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE)
+				off_coords--;
+			for (i = 0; i < off_coords; i++)
 				src1[nsrc1++] = off[i];
-			if (coords < 2)
+			if (off_coords < 2)
 				src1[nsrc1++] = create_immed(b, fui(0.0));
 			flags |= IR3_INSTR_O;
 		}
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_nir.c
index 7dd24e5f4ee..39d3bf15fde 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_nir.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_nir.c
@@ -188,6 +188,8 @@ ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s,
 
 	OPT_V(s, nir_lower_tex, &tex_options);
 	OPT_V(s, nir_lower_load_const_to_scalar);
+	if (shader->compiler->gpu_id < 500)
+		OPT_V(s, ir3_nir_lower_tg4_to_tex);
 
 	ir3_optimize_loop(s);
 
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_nir.h b/src/gallium/drivers/freedreno/ir3/ir3_nir.h
index e0e3108e328..ca873407fae 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_nir.h
+++ b/src/gallium/drivers/freedreno/ir3/ir3_nir.h
@@ -38,6 +38,7 @@ void ir3_nir_scan_driver_consts(nir_shader *shader, struct ir3_driver_const_layo
 
 bool ir3_nir_lower_if_else(nir_shader *shader);
 bool ir3_nir_apply_trig_workarounds(nir_shader *shader);
+bool ir3_nir_lower_tg4_to_tex(nir_shader *shader);
 
 struct nir_shader * ir3_tgsi_to_nir(const struct tgsi_token *tokens);
 const nir_shader_compiler_options * ir3_get_compiler_options(struct ir3_compiler *compiler);
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_nir_lower_tg4_to_tex.c b/src/gallium/drivers/freedreno/ir3/ir3_nir_lower_tg4_to_tex.c
new file mode 100644
index 00000000000..087688cd234
--- /dev/null
+++ b/src/gallium/drivers/freedreno/ir3/ir3_nir_lower_tg4_to_tex.c
@@ -0,0 +1,139 @@
+/*
+ * Copyright © 2017 Ilia Mirkin
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "ir3_nir.h"
+#include "compiler/nir/nir_builder.h"
+
+/* A4XX has a broken GATHER4 operation. It performs the texture swizzle on the
+ * gather results, rather than before. As a result, it must be emulated with
+ * direct texture calls.
+ */
+
+static bool
+lower_tg4(nir_block *block, nir_builder *b, void *mem_ctx)
+{
+	bool progress = false;
+
+	static const int offsets[3][2] = { {0, 1}, {1, 1}, {1, 0} };
+
+	nir_foreach_instr_safe(instr, block) {
+		if (instr->type != nir_instr_type_tex)
+			continue;
+
+        nir_tex_instr *tg4 = (nir_tex_instr *)instr;
+
+		if (tg4->op != nir_texop_tg4)
+			continue;
+
+		b->cursor = nir_before_instr(&tg4->instr);
+
+		nir_ssa_def *results[4];
+		int offset_index = nir_tex_instr_src_index(tg4, nir_tex_src_offset);
+		for (int i = 0; i < 4; i++) {
+			int num_srcs = tg4->num_srcs + 1 /* lod */;
+			if (offset_index < 0 && i < 3)
+				num_srcs++;
+
+			nir_tex_instr *tex = nir_tex_instr_create(b->shader, num_srcs);
+			tex->op = nir_texop_txl;
+			tex->sampler_dim = tg4->sampler_dim;
+			tex->coord_components = tg4->coord_components;
+			tex->is_array = tg4->is_array;
+			tex->is_shadow = tg4->is_shadow;
+			tex->is_new_style_shadow = tg4->is_new_style_shadow;
+			tex->texture_index = tg4->texture_index;
+			tex->texture = nir_deref_var_clone(tg4->texture, tex);
+			tex->sampler_index = tg4->sampler_index;
+			tex->sampler = nir_deref_var_clone(tg4->sampler, tex);
+			tex->dest_type = tg4->dest_type;
+
+			for (int j = 0; j < tg4->num_srcs; j++) {
+				nir_src_copy(&tex->src[j].src, &tg4->src[j].src, tex);
+				tex->src[j].src_type = tg4->src[j].src_type;
+			}
+			if (i != 3) {
+				nir_ssa_def *offset =
+					nir_vec2(b, nir_imm_int(b, offsets[i][0]),
+							 nir_imm_int(b, offsets[i][1]));
+				if (offset_index < 0) {
+					tex->src[tg4->num_srcs].src = nir_src_for_ssa(offset);
+					tex->src[tg4->num_srcs].src_type = nir_tex_src_offset;
+				} else {
+					assert(tex->src[offset_index].src.is_ssa);
+					nir_ssa_def *orig = tex->src[offset_index].src.ssa;
+					tex->src[offset_index].src =
+						nir_src_for_ssa(nir_iadd(b, orig, offset));
+				}
+			}
+			tex->src[num_srcs - 1].src = nir_src_for_ssa(nir_imm_float(b, 0));
+			tex->src[num_srcs - 1].src_type = nir_tex_src_lod;
+
+			nir_ssa_dest_init(&tex->instr, &tex->dest,
+							  nir_tex_instr_dest_size(tex), 32, NULL);
+			nir_builder_instr_insert(b, &tex->instr);
+
+			results[i] = nir_channel(b, &tex->dest.ssa, tg4->component);
+		}
+
+		nir_ssa_def *result = nir_vec4(b, results[0], results[1], results[2], results[3]);
+		nir_ssa_def_rewrite_uses(&tg4->dest.ssa, nir_src_for_ssa(result));
+
+		nir_instr_remove(&tg4->instr);
+
+		progress = true;
+	}
+
+	return progress;
+}
+
+static bool
+lower_tg4_func(nir_function_impl *impl)
+{
+	void *mem_ctx = ralloc_parent(impl);
+	nir_builder b;
+	nir_builder_init(&b, impl);
+
+	bool progress = false;
+	nir_foreach_block_safe(block, impl) {
+		progress |= lower_tg4(block, &b, mem_ctx);
+	}
+
+	if (progress)
+		nir_metadata_preserve(impl, nir_metadata_none);
+
+	return progress;
+}
+
+bool
+ir3_nir_lower_tg4_to_tex(nir_shader *shader)
+{
+	bool progress = false;
+
+	nir_foreach_function(function, shader) {
+		if (function->impl)
+			progress |= lower_tg4_func(function->impl);
+	}
+
+	return progress;
+}
diff --git a/src/gallium/drivers/freedreno/meson.build b/src/gallium/drivers/freedreno/meson.build
index d2b901334d0..01b5836be29 100644
--- a/src/gallium/drivers/freedreno/meson.build
+++ b/src/gallium/drivers/freedreno/meson.build
@@ -188,6 +188,7 @@ files_libfreedreno = files(
   'ir3/ir3_nir.c',
   'ir3/ir3_nir.h',
   'ir3/ir3_nir_lower_if_else.c',
+  'ir3/ir3_nir_lower_tg4_to_tex.c',
   'ir3/ir3_print.c',
   'ir3/ir3_ra.c',
   'ir3/ir3_sched.c',
-- 
2.13.6



More information about the mesa-dev mailing list