[Mesa-dev] [PATCH 2/7] radeonsi: export SampleMask from pixel shaders at full rate

Marek Olšák maraeo at gmail.com
Mon Sep 12 22:44:38 UTC 2016


From: Marek Olšák <marek.olsak at amd.com>

Heaven and Valley write gl_SampleMask and not Z.
Use 16_ABGR instead of 32_ABGR if Z isn't written.
---
 src/gallium/drivers/radeonsi/si_shader.c        | 63 ++++++++++++++++++++-----
 src/gallium/drivers/radeonsi/si_shader.h        |  2 +
 src/gallium/drivers/radeonsi/si_state_shaders.c |  7 ++-
 3 files changed, 56 insertions(+), 16 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 6801722..6d30d1c 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2921,57 +2921,96 @@ static void si_llvm_emit_vs_epilogue(struct lp_build_tgsi_context *bld_base)
 
 	si_llvm_export_vs(bld_base, outputs, i);
 	FREE(outputs);
 }
 
 struct si_ps_exports {
 	unsigned num;
 	LLVMValueRef args[10][9];
 };
 
+unsigned si_get_spi_shader_z_format(bool writes_z, bool writes_stencil,
+				    bool writes_samplemask)
+{
+	if (writes_z) {
+		/* Z needs 32 bits. */
+		if (writes_samplemask)
+			return V_028710_SPI_SHADER_32_ABGR;
+		else if (writes_stencil)
+			return V_028710_SPI_SHADER_32_GR;
+		else
+			return V_028710_SPI_SHADER_32_R;
+	} else if (writes_stencil || writes_samplemask) {
+		/* Both stencil and sample mask need only 16 bits. */
+		return V_028710_SPI_SHADER_UINT16_ABGR;
+	} else {
+		return V_028710_SPI_SHADER_ZERO;
+	}
+}
+
 static void si_export_mrt_z(struct lp_build_tgsi_context *bld_base,
 			    LLVMValueRef depth, LLVMValueRef stencil,
 			    LLVMValueRef samplemask, struct si_ps_exports *exp)
 {
 	struct si_shader_context *ctx = si_shader_context(bld_base);
 	struct lp_build_context *base = &bld_base->base;
 	struct lp_build_context *uint = &bld_base->uint_bld;
 	LLVMValueRef args[9];
 	unsigned mask = 0;
+	unsigned format = si_get_spi_shader_z_format(depth != NULL,
+						     stencil != NULL,
+						     samplemask != NULL);
 
 	assert(depth || stencil || samplemask);
 
 	args[1] = uint->one; /* whether the EXEC mask is valid */
 	args[2] = uint->one; /* DONE bit */
 
 	/* Specify the target we are exporting */
 	args[3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_MRTZ);
 
 	args[4] = uint->zero; /* COMP flag */
 	args[5] = base->undef; /* R, depth */
 	args[6] = base->undef; /* G, stencil test value[0:7], stencil op value[8:15] */
 	args[7] = base->undef; /* B, sample mask */
 	args[8] = base->undef; /* A, alpha to mask */
 
-	if (depth) {
-		args[5] = depth;
-		mask |= 0x1;
-	}
-
-	if (stencil) {
-		args[6] = stencil;
-		mask |= 0x2;
-	}
+	if (format == V_028710_SPI_SHADER_UINT16_ABGR) {
+		assert(!depth);
+		args[4] = uint->one; /* COMPR flag */
 
-	if (samplemask) {
-		args[7] = samplemask;
-		mask |= 0x4;
+		if (stencil) {
+			/* Stencil should be in X[23:16]. */
+			stencil = bitcast(bld_base, TGSI_TYPE_UNSIGNED, stencil);
+			stencil = LLVMBuildShl(base->gallivm->builder, stencil,
+					       LLVMConstInt(ctx->i32, 16, 0), "");
+			args[5] = bitcast(bld_base, TGSI_TYPE_FLOAT, stencil);
+			mask |= 0x3;
+		}
+		if (samplemask) {
+			/* SampleMask should be in Y[15:0]. */
+			args[6] = samplemask;
+			mask |= 0xc;
+		}
+	} else {
+		if (depth) {
+			args[5] = depth;
+			mask |= 0x1;
+		}
+		if (stencil) {
+			args[6] = stencil;
+			mask |= 0x2;
+		}
+		if (samplemask) {
+			args[7] = samplemask;
+			mask |= 0x4;
+		}
 	}
 
 	/* SI (except OLAND) has a bug that it only looks
 	 * at the X writemask component. */
 	if (ctx->screen->b.chip_class == SI &&
 	    ctx->screen->b.family != CHIP_OLAND)
 		mask |= 0x1;
 
 	/* Specify which components to enable */
 	args[0] = lp_build_const_int32(base->gallivm, mask);
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index fc1b22d..de4705d 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -511,12 +511,14 @@ int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader)
 void si_shader_dump(struct si_screen *sscreen, struct si_shader *shader,
 		    struct pipe_debug_callback *debug, unsigned processor,
 		    FILE *f);
 void si_shader_apply_scratch_relocs(struct si_context *sctx,
 			struct si_shader *shader,
 			struct si_shader_config *config,
 			uint64_t scratch_va);
 void si_shader_binary_read_config(struct radeon_shader_binary *binary,
 				  struct si_shader_config *conf,
 				  unsigned symbol_offset);
+unsigned si_get_spi_shader_z_format(bool writes_z, bool writes_stencil,
+				    bool writes_samplemask);
 
 #endif
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index b4f19fe..816aadc 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -741,24 +741,23 @@ static void si_shader_ps(struct si_shader *shader)
 		       shader->config.spi_ps_input_addr);
 
 	/* Set interpolation controls. */
 	spi_ps_in_control = S_0286D8_NUM_INTERP(si_get_ps_num_interp(shader));
 
 	/* Set registers. */
 	si_pm4_set_reg(pm4, R_0286E0_SPI_BARYC_CNTL, spi_baryc_cntl);
 	si_pm4_set_reg(pm4, R_0286D8_SPI_PS_IN_CONTROL, spi_ps_in_control);
 
 	si_pm4_set_reg(pm4, R_028710_SPI_SHADER_Z_FORMAT,
-		       info->writes_samplemask ? V_028710_SPI_SHADER_32_ABGR :
-		       info->writes_stencil ? V_028710_SPI_SHADER_32_GR :
-		       info->writes_z ? V_028710_SPI_SHADER_32_R :
-		       V_028710_SPI_SHADER_ZERO);
+		       si_get_spi_shader_z_format(info->writes_z,
+						  info->writes_stencil,
+						  info->writes_samplemask));
 
 	si_pm4_set_reg(pm4, R_028714_SPI_SHADER_COL_FORMAT, spi_shader_col_format);
 	si_pm4_set_reg(pm4, R_02823C_CB_SHADER_MASK, cb_shader_mask);
 
 	va = shader->bo->gpu_address;
 	si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_BINARY);
 	si_pm4_set_reg(pm4, R_00B020_SPI_SHADER_PGM_LO_PS, va >> 8);
 	si_pm4_set_reg(pm4, R_00B024_SPI_SHADER_PGM_HI_PS, va >> 40);
 
 	si_pm4_set_reg(pm4, R_00B028_SPI_SHADER_PGM_RSRC1_PS,
-- 
2.7.4



More information about the mesa-dev mailing list