Mesa (master): freedreno/ir3: sample-shading support

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Thu Apr 25 22:12:34 UTC 2019


Module: Mesa
Branch: master
Commit: ee2e3a07bb1d58d761bf7250e88822b6955d13bf
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=ee2e3a07bb1d58d761bf7250e88822b6955d13bf

Author: Rob Clark <robdclark at chromium.org>
Date:   Thu Apr 25 12:25:02 2019 -0700

freedreno/ir3: sample-shading support

The compiler support for:

  OES_sample_shading
  OES_sample_variables
  OES_shader_multisample_interpolation

Signed-off-by: Rob Clark <robdclark at chromium.org>

---

 src/freedreno/ir3/ir3.h              |  3 ++
 src/freedreno/ir3/ir3_compiler_nir.c | 97 ++++++++++++++++++++++++++++++++++--
 src/freedreno/ir3/ir3_context.h      |  2 +-
 src/freedreno/ir3/ir3_shader.c       | 13 ++++-
 src/freedreno/ir3/ir3_shader.h       |  6 ++-
 5 files changed, 113 insertions(+), 8 deletions(-)

diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h
index 5b3544c3542..f3c25ea2792 100644
--- a/src/freedreno/ir3/ir3.h
+++ b/src/freedreno/ir3/ir3.h
@@ -1311,6 +1311,9 @@ INSTR1(SQRT)
 /* cat5 instructions: */
 INSTR1(DSX)
 INSTR1(DSY)
+INSTR1F(3D, DSX)
+INSTR1F(3D, DSY)
+INSTR1(RGETPOS)
 
 static inline struct ir3_instruction *
 ir3_SAM(struct ir3_block *block, opc_t opc, type_t type,
diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c
index 73e7dd6b973..34fb7b0969d 100644
--- a/src/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/freedreno/ir3/ir3_compiler_nir.c
@@ -1127,6 +1127,55 @@ static void add_sysval_input(struct ir3_context *ctx, gl_system_value slot,
 	add_sysval_input_compmask(ctx, slot, 0x1, instr);
 }
 
+static struct ir3_instruction *
+get_barycentric_centroid(struct ir3_context *ctx)
+{
+	if (!ctx->ij_centroid) {
+		struct ir3_instruction *xy[2];
+		struct ir3_instruction *ij;
+
+		ij = create_input_compmask(ctx, 0, 0x3);
+		ir3_split_dest(ctx->block, xy, ij, 0, 2);
+
+		ctx->ij_centroid = ir3_create_collect(ctx, xy, 2);
+
+		add_sysval_input_compmask(ctx,
+				SYSTEM_VALUE_BARYCENTRIC_CENTROID,
+				0x3, ij);
+	}
+
+	return ctx->ij_centroid;
+}
+
+static struct ir3_instruction *
+get_barycentric_sample(struct ir3_context *ctx)
+{
+	if (!ctx->ij_sample) {
+		struct ir3_instruction *xy[2];
+		struct ir3_instruction *ij;
+
+		ij = create_input_compmask(ctx, 0, 0x3);
+		ir3_split_dest(ctx->block, xy, ij, 0, 2);
+
+		ctx->ij_sample = ir3_create_collect(ctx, xy, 2);
+
+		add_sysval_input_compmask(ctx,
+				SYSTEM_VALUE_BARYCENTRIC_SAMPLE,
+				0x3, ij);
+	}
+
+	return ctx->ij_sample;
+}
+
+static struct ir3_instruction  *
+get_barycentric_pixel(struct ir3_context *ctx)
+{
+	/* TODO when tgsi_to_nir supports "new-style" FS inputs switch
+	 * this to create ij_pixel only on demand:
+	 */
+	return ctx->ij_pixel;
+}
+
 static void
 emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
 {
@@ -1168,13 +1217,40 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
 	case nir_intrinsic_load_ubo:
 		emit_intrinsic_load_ubo(ctx, intr, dst);
 		break;
+	case nir_intrinsic_load_sample_pos_from_id: {
+		/* NOTE: blob seems to always use TYPE_F16 and then cov.f16f32,
+		 * but that doesn't seem necessary.
+		 */
+		struct ir3_instruction *offset =
+			ir3_RGETPOS(b, ir3_get_src(ctx, &intr->src[0])[0], 0);
+		offset->regs[0]->wrmask = 0x3;
+		offset->cat5.type = TYPE_F32;
+
+		ir3_split_dest(b, dst, offset, 0, 2);
+
+		break;
+	}
+	case nir_intrinsic_load_size_ir3:
+		if (!ctx->ij_size) {
+			ctx->ij_size = create_input(ctx, 0);
+
+			add_sysval_input(ctx, SYSTEM_VALUE_BARYCENTRIC_SIZE,
+					ctx->ij_size);
+		}
+		dst[0] = ctx->ij_size;
+		break;
 	case nir_intrinsic_load_barycentric_centroid:
+		ir3_split_dest(b, dst, get_barycentric_centroid(ctx), 0, 2);
+		break;
+	case nir_intrinsic_load_barycentric_sample:
+		if (ctx->so->key.msaa) {
+			ir3_split_dest(b, dst, get_barycentric_sample(ctx), 0, 2);
+		} else {
+			ir3_split_dest(b, dst, get_barycentric_pixel(ctx), 0, 2);
+		}
+		break;
 	case nir_intrinsic_load_barycentric_pixel:
-		/* NOTE: we still pre-create ij_pixel just to keep things working with
-		 * nir producers that create "old style" frag shader inputs (ie. just
-		 * load_input, vs load_barycentric_* + load_interpolated_input)
-		 */
-		ir3_split_dest(b, dst, ctx->ij_pixel, 0, 2);
+		ir3_split_dest(b, dst, get_barycentric_pixel(ctx), 0, 2);
 		break;
 	case nir_intrinsic_load_interpolated_input:
 		idx = nir_intrinsic_base(intr);
@@ -1345,6 +1421,8 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
 		dst[0] = ctx->instance_id;
 		break;
 	case nir_intrinsic_load_sample_id:
+		ctx->so->per_samp = true;
+		/* fall-thru */
 	case nir_intrinsic_load_sample_id_no_per_sample:
 		if (!ctx->samp_id) {
 			ctx->samp_id = create_input(ctx, 0);
@@ -2282,6 +2360,12 @@ setup_input(struct ir3_context *ctx, nir_variable *in)
 	so->inputs[n].interpolate = in->data.interpolation;
 
 	if (ctx->so->type == MESA_SHADER_FRAGMENT) {
+
+		/* if any varyings have 'sample' qualifer, that triggers us
+		 * to run in per-sample mode:
+		 */
+		so->per_samp |= in->data.sample;
+
 		for (int i = 0; i < ncomp; i++) {
 			struct ir3_instruction *instr = NULL;
 			unsigned idx = (n * 4) + i + frac;
@@ -2457,6 +2541,9 @@ setup_output(struct ir3_context *ctx, nir_variable *out)
 		case FRAG_RESULT_COLOR:
 			so->color0_mrt = 1;
 			break;
+		case FRAG_RESULT_SAMPLE_MASK:
+			so->writes_smask = true;
+			break;
 		default:
 			if (slot >= FRAG_RESULT_DATA0)
 				break;
diff --git a/src/freedreno/ir3/ir3_context.h b/src/freedreno/ir3/ir3_context.h
index 8c1dc45e42b..c3e16ba37fa 100644
--- a/src/freedreno/ir3/ir3_context.h
+++ b/src/freedreno/ir3/ir3_context.h
@@ -65,7 +65,7 @@ struct ir3_context {
 	 * inputs.  So we do all the input tracking normally and fix
 	 * things up after compile_instructions()
 	 */
-	struct ir3_instruction *ij_pixel;
+	struct ir3_instruction *ij_pixel, *ij_sample, *ij_centroid, *ij_size;
 
 	/* for fragment shaders, for gl_FrontFacing and gl_FragCoord: */
 	struct ir3_instruction *frag_face, *frag_coord;
diff --git a/src/freedreno/ir3/ir3_shader.c b/src/freedreno/ir3/ir3_shader.c
index fa4f432e606..3f8e8abdc08 100644
--- a/src/freedreno/ir3/ir3_shader.c
+++ b/src/freedreno/ir3/ir3_shader.c
@@ -262,8 +262,15 @@ ir3_shader_from_nir(struct ir3_compiler *compiler, nir_shader *nir)
 	NIR_PASS_V(nir, nir_lower_io, nir_var_all, ir3_glsl_type_size,
 			   (nir_lower_io_options)0);
 
-	if (nir->info.stage == MESA_SHADER_FRAGMENT)
+	if (nir->info.stage == MESA_SHADER_FRAGMENT) {
+		/* NOTE: lower load_barycentric_at_sample first, since it
+		 * produces load_barycentric_at_offset:
+		 */
+		NIR_PASS_V(nir, ir3_nir_lower_load_barycentric_at_sample);
+		NIR_PASS_V(nir, ir3_nir_lower_load_barycentric_at_offset);
+
 		NIR_PASS_V(nir, ir3_nir_move_varying_inputs);
+	}
 
 	NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
 
@@ -409,6 +416,10 @@ ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin, FILE *out)
 	case MESA_SHADER_FRAGMENT:
 		dump_reg(out, "pos (ij_pixel)",
 			ir3_find_sysval_regid(so, SYSTEM_VALUE_BARYCENTRIC_PIXEL));
+		dump_reg(out, "pos (ij_centroid)",
+			ir3_find_sysval_regid(so, SYSTEM_VALUE_BARYCENTRIC_CENTROID));
+		dump_reg(out, "pos (ij_size)",
+			ir3_find_sysval_regid(so, SYSTEM_VALUE_BARYCENTRIC_SIZE));
 		dump_output(out, so, FRAG_RESULT_DEPTH, "posz");
 		if (so->color0_mrt) {
 			dump_output(out, so, FRAG_RESULT_COLOR, "color");
diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h
index b3481c12990..7f09ee5312f 100644
--- a/src/freedreno/ir3/ir3_shader.h
+++ b/src/freedreno/ir3/ir3_shader.h
@@ -154,6 +154,8 @@ struct ir3_shader_key {
 			/*
 			 * Fragment shader variant parameters:
 			 */
+			unsigned sample_shading : 1;
+			unsigned msaa           : 1;
 			unsigned color_two_side : 1;
 			unsigned half_precision : 1;
 			/* used when shader needs to handle flat varyings (a4xx)
@@ -389,7 +391,7 @@ struct ir3_shader_variant {
 		uint8_t slot;
 		uint8_t regid;
 	} outputs[16 + 2];  /* +POSITION +PSIZE */
-	bool writes_pos, writes_psize;
+	bool writes_pos, writes_smask, writes_psize;
 
 	/* attributes (VS) / varyings (FS):
 	 * Note that sysval's should come *after* normal inputs.
@@ -439,6 +441,8 @@ struct ir3_shader_variant {
 	/* do we have kill, image write, etc (which prevents early-z): */
 	bool no_earlyz;
 
+	bool per_samp;
+
 	/* Layout of constant registers, each section (in vec4). Pointer size
 	 * is 32b (a3xx, a4xx), or 64b (a5xx+), which effects the size of the
 	 * UBO and stream-out consts.




More information about the mesa-commit mailing list