Mesa (master): freedreno/ir3: rework varying packing

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Sat Mar 30 17:07:16 UTC 2019


Module: Mesa
Branch: master
Commit: 831f1a05c0de3f9623115d17bf0904ebe1c4a74e
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=831f1a05c0de3f9623115d17bf0904ebe1c4a74e

Author: Rob Clark <robdclark at gmail.com>
Date:   Sun Mar 24 11:58:21 2019 -0400

freedreno/ir3: rework varying packing

Originally we kept track of a table of inputs.  But with new-style frag
inputs this becomes awkward.  Re-work it so that initially we assigned
un-packed varying locations, and then after the shader is compiled scan
to find actual used inputs, and re-pack.

Signed-off-by: Rob Clark <robdclark at gmail.com>

---

 src/freedreno/ir3/ir3_compiler_nir.c | 128 +++++++++++++++++++++++++++--------
 1 file changed, 98 insertions(+), 30 deletions(-)

diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c
index afab76ab8c8..ec741ae92d9 100644
--- a/src/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/freedreno/ir3/ir3_compiler_nir.c
@@ -83,12 +83,12 @@ create_input(struct ir3_context *ctx, unsigned n)
 }
 
 static struct ir3_instruction *
-create_frag_input(struct ir3_context *ctx, bool use_ldlv)
+create_frag_input(struct ir3_context *ctx, bool use_ldlv, unsigned n)
 {
 	struct ir3_block *block = ctx->block;
 	struct ir3_instruction *instr;
-	/* actual inloc is assigned and fixed up later: */
-	struct ir3_instruction *inloc = create_immed(block, 0);
+	/* packed inloc is fixed up later: */
+	struct ir3_instruction *inloc = create_immed(block, n);
 
 	if (use_ldlv) {
 		instr = ir3_LDLV(block, inloc, 0, create_immed(block, 1), 0);
@@ -2275,7 +2275,7 @@ setup_input(struct ir3_context *ctx, nir_variable *in)
 				 */
 				so->inputs[n].slot = VARYING_SLOT_VAR8;
 				so->inputs[n].bary = true;
-				instr = create_frag_input(ctx, false);
+				instr = create_frag_input(ctx, false, idx);
 			} else {
 				bool use_ldlv = false;
 
@@ -2304,7 +2304,7 @@ setup_input(struct ir3_context *ctx, nir_variable *in)
 
 				so->inputs[n].bary = true;
 
-				instr = create_frag_input(ctx, use_ldlv);
+				instr = create_frag_input(ctx, use_ldlv, idx);
 			}
 
 			compile_assert(ctx, idx < ctx->ir->ninputs);
@@ -2326,6 +2326,92 @@ setup_input(struct ir3_context *ctx, nir_variable *in)
 	}
 }
 
+/* Initially we assign non-packed inloc's for varyings, as we don't really
+ * know up-front which components will be unused.  After all the compilation
+ * stages we scan the shader to see which components are actually used, and
+ * re-pack the inlocs to eliminate unneeded varyings.
+ */
+static void
+pack_inlocs(struct ir3_context *ctx)
+{
+	struct ir3_shader_variant *so = ctx->so;
+	uint8_t used_components[so->inputs_count];
+
+	memset(used_components, 0, sizeof(used_components));
+
+	/*
+	 * First Step: scan shader to find which bary.f/ldlv remain:
+	 */
+
+	list_for_each_entry (struct ir3_block, block, &ctx->ir->block_list, node) {
+		list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
+			if (is_input(instr)) {
+				unsigned inloc = instr->regs[1]->iim_val;
+				unsigned i = inloc / 4;
+				unsigned j = inloc % 4;
+
+				compile_assert(ctx, instr->regs[1]->flags & IR3_REG_IMMED);
+				compile_assert(ctx, i < so->inputs_count);
+
+				used_components[i] |= 1 << j;
+			}
+		}
+	}
+
+	/*
+	 * Second Step: reassign varying inloc/slots:
+	 */
+
+	unsigned actual_in = 0;
+	unsigned inloc = 0;
+
+	for (unsigned i = 0; i < so->inputs_count; i++) {
+		unsigned compmask = 0, maxcomp = 0;
+
+		so->inputs[i].ncomp = 0;
+		so->inputs[i].inloc = inloc;
+		so->inputs[i].bary = false;
+
+		for (unsigned j = 0; j < 4; j++) {
+			if (!(used_components[i] & (1 << j)))
+				continue;
+
+			compmask |= (1 << j);
+			actual_in++;
+			so->inputs[i].ncomp++;
+			maxcomp = j + 1;
+
+			/* at this point, since used_components[i] mask is only
+			 * considering varyings (ie. not sysvals) we know this
+			 * is a varying:
+			 */
+			so->inputs[i].bary = true;
+		}
+
+		if (so->inputs[i].bary) {
+			so->varying_in++;
+			so->inputs[i].compmask = (1 << maxcomp) - 1;
+			inloc += maxcomp;
+		}
+	}
+
+	/*
+	 * Third Step: reassign packed inloc's:
+	 */
+
+	list_for_each_entry (struct ir3_block, block, &ctx->ir->block_list, node) {
+		list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
+			if (is_input(instr)) {
+				unsigned inloc = instr->regs[1]->iim_val;
+				unsigned i = inloc / 4;
+				unsigned j = inloc % 4;
+
+				instr->regs[1]->iim_val = so->inputs[i].inloc + j;
+			}
+		}
+	}
+}
+
 static void
 setup_output(struct ir3_context *ctx, nir_variable *out)
 {
@@ -2596,7 +2682,7 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
 	struct ir3_context *ctx;
 	struct ir3 *ir;
 	struct ir3_instruction **inputs;
-	unsigned i, actual_in, inloc;
+	unsigned i;
 	int ret = 0, max_bary;
 
 	assert(!so->ir);
@@ -2741,6 +2827,9 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
 		ir3_print(ir);
 	}
 
+	if (so->type == MESA_SHADER_FRAGMENT)
+		pack_inlocs(ctx);
+
 	/* fixup input/outputs: */
 	for (i = 0; i < so->outputs_count; i++) {
 		/* sometimes we get outputs that don't write the .x coord, like:
@@ -2761,34 +2850,15 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
 	}
 
 	/* Note that some or all channels of an input may be unused: */
-	actual_in = 0;
-	inloc = 0;
 	for (i = 0; i < so->inputs_count; i++) {
-		unsigned j, reg = regid(63,0), compmask = 0, maxcomp = 0;
-		so->inputs[i].ncomp = 0;
-		so->inputs[i].inloc = inloc;
+		unsigned j, reg = regid(63,0);
 		for (j = 0; j < 4; j++) {
 			struct ir3_instruction *in = inputs[(i*4) + j];
+
 			if (in && !(in->flags & IR3_INSTR_UNUSED)) {
-				compmask |= (1 << j);
 				reg = in->regs[0]->num - j;
-				actual_in++;
-				so->inputs[i].ncomp++;
-				if ((so->type == MESA_SHADER_FRAGMENT) && so->inputs[i].bary) {
-					/* assign inloc: */
-					assert(in->regs[1]->flags & IR3_REG_IMMED);
-					in->regs[1]->iim_val = inloc + j;
-					maxcomp = j + 1;
-				}
 			}
 		}
-		if ((so->type == MESA_SHADER_FRAGMENT) && compmask && so->inputs[i].bary) {
-			so->varying_in++;
-			so->inputs[i].compmask = (1 << maxcomp) - 1;
-			inloc += maxcomp;
-		} else if (!so->inputs[i].sysval) {
-			so->inputs[i].compmask = compmask;
-		}
 		so->inputs[i].regid = reg;
 	}
 
@@ -2808,9 +2878,7 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
 	so->branchstack = ctx->max_stack;
 
 	/* Note that actual_in counts inputs that are not bary.f'd for FS: */
-	if (so->type == MESA_SHADER_VERTEX)
-		so->total_in = actual_in;
-	else
+	if (so->type == MESA_SHADER_FRAGMENT)
 		so->total_in = max_bary + 1;
 
 	so->max_sun = ir->max_sun;




More information about the mesa-commit mailing list