Mesa (master): freedreno/a6xx: sample-shading support

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Thu Apr 25 22:12:34 UTC 2019


Module: Mesa
Branch: master
Commit: 7a57cfbed695c48915fdb3d7bec37505d3f18e81
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=7a57cfbed695c48915fdb3d7bec37505d3f18e81

Author: Rob Clark <robdclark at chromium.org>
Date:   Thu Apr 25 12:28:35 2019 -0700

freedreno/a6xx: sample-shading support

Enables:

  OES_sample_shading
  OES_sample_variables
  OES_shader_multisample_interpolation

Signed-off-by: Rob Clark <robdclark at chromium.org>

---

 docs/features.txt                                |  6 +-
 src/gallium/drivers/freedreno/a6xx/fd6_draw.c    |  4 +-
 src/gallium/drivers/freedreno/a6xx/fd6_emit.c    |  4 +-
 src/gallium/drivers/freedreno/a6xx/fd6_program.c | 76 ++++++++++++++++++------
 src/gallium/drivers/freedreno/freedreno_screen.c |  4 ++
 5 files changed, 70 insertions(+), 24 deletions(-)

diff --git a/docs/features.txt b/docs/features.txt
index f9aa5f06d89..c63afeacaba 100644
--- a/docs/features.txt
+++ b/docs/features.txt
@@ -282,11 +282,11 @@ GLES3.2, GLSL ES 3.2 -- all DONE: i965/gen9+, radeonsi, virgl
   GL_OES_geometry_shader                                DONE (i965/hsw+, nvc0)
   GL_OES_gpu_shader5                                    DONE (freedreno/a6xx, all drivers that support GL_ARB_gpu_shader5)
   GL_OES_primitive_bounding_box                         DONE (freedreno/a5xx+, i965/gen7+, nvc0)
-  GL_OES_sample_shading                                 DONE (i965, nvc0, r600)
-  GL_OES_sample_variables                               DONE (i965, nvc0, r600)
+  GL_OES_sample_shading                                 DONE (freedreno/a6xx, i965, nvc0, r600)
+  GL_OES_sample_variables                               DONE (freedreno/a6xx, i965, nvc0, r600)
   GL_OES_shader_image_atomic                            DONE (all drivers that support GL_ARB_shader_image_load_store)
   GL_OES_shader_io_blocks                               DONE (All drivers that support GLES 3.1)
-  GL_OES_shader_multisample_interpolation               DONE (i965, nvc0, r600)
+  GL_OES_shader_multisample_interpolation               DONE (freedreno/a6xx, i965, nvc0, r600)
   GL_OES_tessellation_shader                            DONE (all drivers that support GL_ARB_tessellation_shader)
   GL_OES_texture_border_clamp                           DONE (all drivers)
   GL_OES_texture_buffer                                 DONE (freedreno, i965, nvc0, softpipe)
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_draw.c b/src/gallium/drivers/freedreno/a6xx/fd6_draw.c
index 5cd619acc19..767312cc3f2 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_draw.c
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_draw.c
@@ -167,7 +167,9 @@ fd6_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info,
 				.fsaturate_r = fd6_ctx->fsaturate_r,
 				.vsamples = ctx->tex[PIPE_SHADER_VERTEX].samples,
 				.fsamples = ctx->tex[PIPE_SHADER_FRAGMENT].samples,
-			}
+				.sample_shading = (ctx->min_samples > 1),
+				.msaa = (ctx->framebuffer.samples > 1),
+			},
 		},
 		.rasterflat = ctx->rasterizer->flatshade,
 		.sprite_coord_enable = ctx->rasterizer->sprite_coord_enable,
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c
index c9cf26721b4..7b8184d2993 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c
@@ -868,7 +868,9 @@ fd6_emit_state(struct fd_ringbuffer *ring, struct fd6_emit *emit)
 			nr = 0;
 
 		OUT_PKT4(ring, REG_A6XX_RB_FS_OUTPUT_CNTL0, 2);
-		OUT_RING(ring, COND(fp->writes_pos, A6XX_RB_FS_OUTPUT_CNTL0_FRAG_WRITES_Z));
+		OUT_RING(ring, COND(fp->writes_pos, A6XX_RB_FS_OUTPUT_CNTL0_FRAG_WRITES_Z) |
+				COND(fp->writes_smask && pfb->samples > 1,
+						A6XX_RB_FS_OUTPUT_CNTL0_FRAG_WRITES_SAMPMASK));
 		OUT_RING(ring, A6XX_RB_FS_OUTPUT_CNTL1_MRT(nr));
 
 		OUT_PKT4(ring, REG_A6XX_SP_FS_OUTPUT_CNTL1, 1);
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_program.c b/src/gallium/drivers/freedreno/a6xx/fd6_program.c
index 94c725f738c..3f8cdb3cc38 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_program.c
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_program.c
@@ -300,19 +300,23 @@ next_regid(uint32_t reg, uint32_t increment)
 #define CONDREG(r, val)  COND(VALIDREG(r), (val))
 
 static void
-setup_stateobj(struct fd_ringbuffer *ring,
-               struct fd6_program_state *state, bool binning_pass)
+setup_stateobj(struct fd_ringbuffer *ring, struct fd6_program_state *state,
+		const struct ir3_shader_key *key, bool binning_pass)
 {
 	struct stage s[MAX_STAGES];
 	uint32_t pos_regid, psize_regid, color_regid[8], posz_regid;
-	uint32_t face_regid, coord_regid, zwcoord_regid, samp_id_regid, samp_mask_regid;
-	uint32_t vcoord_regid, vertex_regid, instance_regid;
+	uint32_t face_regid, coord_regid, zwcoord_regid, samp_id_regid;
+	uint32_t smask_in_regid, smask_regid;
+	uint32_t vertex_regid, instance_regid;
+	uint32_t ij_pix_regid, ij_samp_regid, ij_cent_regid, ij_size_regid;
 	enum a3xx_threadsize fssz;
 	uint8_t psize_loc = ~0;
 	int i, j;
 
 	setup_stages(state, s, binning_pass);
 
+	bool sample_shading = s[FS].v->per_samp | key->sample_shading;
+
 	fssz = FOUR_QUADS;
 
 	pos_regid = ir3_find_output_regid(s[VS].v, VARYING_SLOT_POS);
@@ -336,12 +340,22 @@ setup_stateobj(struct fd_ringbuffer *ring,
 	}
 
 	samp_id_regid   = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_SAMPLE_ID);
-	samp_mask_regid = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_SAMPLE_MASK_IN);
+	smask_in_regid  = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_SAMPLE_MASK_IN);
 	face_regid      = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_FRONT_FACE);
 	coord_regid     = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_FRAG_COORD);
 	zwcoord_regid   = next_regid(coord_regid, 2);
-	vcoord_regid    = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_BARYCENTRIC_PIXEL);
+	ij_pix_regid    = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_BARYCENTRIC_PIXEL);
+	ij_samp_regid   = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_BARYCENTRIC_SAMPLE);
+	ij_cent_regid   = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_BARYCENTRIC_CENTROID);
+	ij_size_regid   = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_BARYCENTRIC_SIZE);
 	posz_regid      = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DEPTH);
+	smask_regid     = ir3_find_output_regid(s[FS].v, FRAG_RESULT_SAMPLE_MASK);
+
+	/* we can't write gl_SampleMask for !msaa..  if b0 is zero then we
+	 * end up masking the single sample!!
+	 */
+	if (!key->msaa)
+		smask_regid = regid(63, 0);
 
 	/* we could probably divide this up into things that need to be
 	 * emitted if frag-prog is dirty vs if vert-prog is dirty..
@@ -390,7 +404,8 @@ setup_stateobj(struct fd_ringbuffer *ring,
 
 	OUT_PKT4(ring, REG_A6XX_SP_FS_OUTPUT_CNTL0, 1);
 	OUT_RING(ring, A6XX_SP_FS_OUTPUT_CNTL0_DEPTH_REGID(posz_regid) |
-			 0xfcfc0000);
+			 A6XX_SP_FS_OUTPUT_CNTL0_SAMPMASK_REGID(smask_regid) |
+			 0xfc000000);
 
 	OUT_PKT4(ring, REG_A6XX_HLSQ_VS_CNTL, 4);
 	OUT_RING(ring, A6XX_HLSQ_VS_CNTL_CONSTLEN(s[VS].constlen) |
@@ -510,13 +525,15 @@ setup_stateobj(struct fd_ringbuffer *ring,
 	OUT_RING(ring, 0x7);                /* XXX */
 	OUT_RING(ring, A6XX_HLSQ_CONTROL_2_REG_FACEREGID(face_regid) |
 			 A6XX_HLSQ_CONTROL_2_REG_SAMPLEID(samp_id_regid) |
-			 A6XX_HLSQ_CONTROL_2_REG_SAMPLEMASK(samp_mask_regid) |
-			 0xfc000000);               /* XXX */
-	OUT_RING(ring, A6XX_HLSQ_CONTROL_3_REG_BARY_IJ_PIXEL(vcoord_regid) |
-			0xfcfcfc00);               /* XXX */
+			 A6XX_HLSQ_CONTROL_2_REG_SAMPLEMASK(smask_in_regid) |
+			 A6XX_HLSQ_CONTROL_2_REG_SIZE(ij_size_regid));
+	OUT_RING(ring, A6XX_HLSQ_CONTROL_3_REG_BARY_IJ_PIXEL(ij_pix_regid) |
+			 A6XX_HLSQ_CONTROL_3_REG_BARY_IJ_CENTROID(ij_cent_regid) |
+			 0xfc00fc00);               /* XXX */
 	OUT_RING(ring, A6XX_HLSQ_CONTROL_4_REG_XYCOORDREGID(coord_regid) |
-			A6XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID(zwcoord_regid) |
-			0x0000fcfc);               /* XXX */
+			 A6XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID(zwcoord_regid) |
+			 A6XX_HLSQ_CONTROL_4_REG_BARY_IJ_PIXEL_PERSAMP(ij_samp_regid) |
+			 0x0000fc00);               /* XXX */
 	OUT_RING(ring, 0xfc);              /* XXX */
 
 	OUT_PKT4(ring, REG_A6XX_HLSQ_UNKNOWN_B980, 1);
@@ -547,7 +564,12 @@ setup_stateobj(struct fd_ringbuffer *ring,
 #endif
 
 	OUT_PKT4(ring, REG_A6XX_GRAS_CNTL, 1);
-	OUT_RING(ring, COND(enable_varyings, A6XX_GRAS_CNTL_VARYING) |
+	OUT_RING(ring,
+			CONDREG(ij_pix_regid, A6XX_GRAS_CNTL_VARYING) |
+			CONDREG(ij_cent_regid, A6XX_GRAS_CNTL_CENTROID) |
+			CONDREG(ij_samp_regid, A6XX_GRAS_CNTL_PERSAMP_VARYING) |
+			COND(VALIDREG(ij_size_regid) && !sample_shading, A6XX_GRAS_CNTL_SIZE) |
+			COND(VALIDREG(ij_size_regid) &&  sample_shading, A6XX_GRAS_CNTL_SIZE_PERSAMP) |
 			COND(s[FS].v->frag_coord,
 					A6XX_GRAS_CNTL_SIZE |
 					A6XX_GRAS_CNTL_XCOORD |
@@ -557,8 +579,13 @@ setup_stateobj(struct fd_ringbuffer *ring,
 			COND(s[FS].v->frag_face, A6XX_GRAS_CNTL_SIZE));
 
 	OUT_PKT4(ring, REG_A6XX_RB_RENDER_CONTROL0, 2);
-	OUT_RING(ring, COND(enable_varyings, A6XX_RB_RENDER_CONTROL0_VARYING |
-			A6XX_RB_RENDER_CONTROL0_UNK10) |
+	OUT_RING(ring,
+			CONDREG(ij_pix_regid, A6XX_RB_RENDER_CONTROL0_VARYING) |
+			CONDREG(ij_cent_regid, A6XX_RB_RENDER_CONTROL0_CENTROID) |
+			CONDREG(ij_samp_regid, A6XX_RB_RENDER_CONTROL0_PERSAMP_VARYING) |
+			COND(enable_varyings, A6XX_RB_RENDER_CONTROL0_UNK10) |
+			COND(VALIDREG(ij_size_regid) && !sample_shading, A6XX_RB_RENDER_CONTROL0_SIZE) |
+			COND(VALIDREG(ij_size_regid) &&  sample_shading, A6XX_RB_RENDER_CONTROL0_SIZE_PERSAMP) |
 			COND(s[FS].v->frag_coord,
 					A6XX_RB_RENDER_CONTROL0_SIZE |
 					A6XX_RB_RENDER_CONTROL0_XCOORD |
@@ -568,10 +595,21 @@ setup_stateobj(struct fd_ringbuffer *ring,
 			COND(s[FS].v->frag_face, A6XX_RB_RENDER_CONTROL0_SIZE));
 
 	OUT_RING(ring,
-			CONDREG(samp_mask_regid, A6XX_RB_RENDER_CONTROL1_SAMPLEMASK) |
+			CONDREG(smask_in_regid, A6XX_RB_RENDER_CONTROL1_SAMPLEMASK) |
+			COND(sample_shading, A6XX_RB_RENDER_CONTROL1_UNK4 | A6XX_RB_RENDER_CONTROL1_UNK5) |
 			CONDREG(samp_id_regid, A6XX_RB_RENDER_CONTROL1_SAMPLEID) |
+			CONDREG(ij_size_regid, A6XX_RB_RENDER_CONTROL1_SIZE) |
 			COND(s[FS].v->frag_face, A6XX_RB_RENDER_CONTROL1_FACENESS));
 
+	OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_CNTL, 1);
+	OUT_RING(ring, COND(sample_shading, A6XX_RB_SAMPLE_CNTL_PER_SAMP_MODE));
+
+	OUT_PKT4(ring, REG_A6XX_GRAS_UNKNOWN_8101, 1);
+	OUT_RING(ring, COND(sample_shading, 0x6));  // XXX
+
+	OUT_PKT4(ring, REG_A6XX_GRAS_SAMPLE_CNTL, 1);
+	OUT_RING(ring, COND(sample_shading, A6XX_GRAS_SAMPLE_CNTL_PER_SAMP_MODE));
+
 	OUT_PKT4(ring, REG_A6XX_SP_FS_OUTPUT_REG(0), 8);
 	for (i = 0; i < 8; i++) {
 		// TODO we could have a mix of half and full precision outputs,
@@ -743,8 +781,8 @@ fd6_program_create(void *data, struct ir3_shader_variant *bs,
 	state->binning_stateobj = fd_ringbuffer_new_object(ctx->pipe, 0x1000);
 	state->stateobj = fd_ringbuffer_new_object(ctx->pipe, 0x1000);
 
-	setup_stateobj(state->binning_stateobj, state, true);
-	setup_stateobj(state->stateobj, state, false);
+	setup_stateobj(state->binning_stateobj, state, key, true);
+	setup_stateobj(state->stateobj, state, key, false);
 
 	return &state->base;
 }
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c
index 58640a82b0b..d1edf82592c 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -307,6 +307,10 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
 	case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
 		return 0;
 
+	case PIPE_CAP_SAMPLE_SHADING:
+		if (is_a6xx(screen)) return 1;
+		return 0;
+
 	case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION:
 		return 0;
 




More information about the mesa-commit mailing list