[Mesa-dev] [PATCH] radv: export SampleMask from pixel shaders at full rate

Samuel Pitoiset samuel.pitoiset at gmail.com
Tue Dec 12 17:08:09 UTC 2017


Use 16_ABGR instead of 32_ABGR if Z isn't written.

Ported from RadeonSI.

No CTS regressions on Polaris.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
---
 src/amd/common/ac_nir_to_llvm.c | 65 ++++++++++++++++++++++++++++++++++-------
 src/amd/vulkan/radv_pipeline.c  | 29 ++++++++++++++----
 2 files changed, 78 insertions(+), 16 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 663b27d265..5916619e97 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -6166,6 +6166,26 @@ si_export_mrt_color(struct nir_to_llvm_context *ctx,
 	return true;
 }
 
+static unsigned
+si_get_spi_shader_z_format(bool writes_z, bool writes_stencil,
+			   bool writes_samplemask)
+{
+	if (writes_z) {
+		/* Z needs 32 bits. */
+		if (writes_samplemask)
+			return V_028710_SPI_SHADER_32_ABGR;
+		else if (writes_stencil)
+			return V_028710_SPI_SHADER_32_GR;
+		else
+			return V_028710_SPI_SHADER_32_R;
+	} else if (writes_stencil || writes_samplemask) {
+		/* Both stencil and sample mask need only 16 bits. */
+		return V_028710_SPI_SHADER_UINT16_ABGR;
+	} else {
+		return V_028710_SPI_SHADER_ZERO;
+	}
+}
+
 static void
 si_export_mrt_z(struct nir_to_llvm_context *ctx,
 		LLVMValueRef depth, LLVMValueRef stencil,
@@ -6184,19 +6204,42 @@ si_export_mrt_z(struct nir_to_llvm_context *ctx,
 	args.out[2] = LLVMGetUndef(ctx->ac.f32); /* B, sample mask */
 	args.out[3] = LLVMGetUndef(ctx->ac.f32); /* A, alpha to mask */
 
-	if (depth) {
-		args.out[0] = depth;
-		args.enabled_channels |= 0x1;
-	}
+	unsigned format = si_get_spi_shader_z_format(depth != NULL,
+						     stencil != NULL,
+						     samplemask != NULL);
+
+	if (format == V_028710_SPI_SHADER_UINT16_ABGR) {
+		assert(!depth);
+		args.compr = 1; /* COMPR flag */
+
+		if (stencil) {
+			/* Stencil should be in X[23:16]. */
+			stencil = ac_to_integer(&ctx->ac, stencil);
+			stencil = LLVMBuildShl(ctx->builder, stencil,
+					       LLVMConstInt(ctx->ac.i32, 16, 0), "");
+			args.out[0] = ac_to_float(&ctx->ac, stencil);
+			args.enabled_channels |= 0x3;
+		}
+		if (samplemask) {
+			/* SampleMask should be in Y[15:0]. */
+			args.out[1] = samplemask;
+			args.enabled_channels |= 0xc;
+		}
+	} else {
+		if (depth) {
+			args.out[0] = depth;
+			args.enabled_channels |= 0x1;
+		}
 
-	if (stencil) {
-		args.out[1] = stencil;
-		args.enabled_channels |= 0x2;
-	}
+		if (stencil) {
+			args.out[1] = stencil;
+			args.enabled_channels |= 0x2;
+		}
 
-	if (samplemask) {
-		args.out[2] = samplemask;
-		args.enabled_channels |= 0x4;
+		if (samplemask) {
+			args.out[2] = samplemask;
+			args.enabled_channels |= 0x4;
+		}
 	}
 
 	/* SI (except OLAND and HAINAN) has a bug that it only looks
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index 0146d6935e..baaf5c4c77 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -2013,6 +2013,25 @@ radv_pipeline_stage_to_user_data_0(struct radv_pipeline *pipeline,
 	}
 }
 
+static unsigned
+si_get_spi_shader_z_format(bool writes_z, bool writes_stencil,
+			   bool writes_samplemask)
+{
+	if (writes_z) {
+		/* Z needs 32 bits. */
+		if (writes_samplemask)
+			return V_028710_SPI_SHADER_32_ABGR;
+		else if (writes_stencil)
+			return V_028710_SPI_SHADER_32_GR;
+		else
+			return V_028710_SPI_SHADER_32_R;
+	} else if (writes_stencil || writes_samplemask) {
+		/* Both stencil and sample mask need only 16 bits. */
+		return V_028710_SPI_SHADER_UINT16_ABGR;
+	} else {
+		return V_028710_SPI_SHADER_ZERO;
+	}
+}
 
 static VkResult
 radv_pipeline_init(struct radv_pipeline *pipeline,
@@ -2108,11 +2127,11 @@ radv_pipeline_init(struct radv_pipeline *pipeline,
 	if (pipeline->device->physical_device->has_rbplus)
 		pipeline->graphics.db_shader_control |= S_02880C_DUAL_QUAD_DISABLE(1);
 
-	pipeline->graphics.shader_z_format =
-		ps->info.fs.writes_sample_mask ? V_028710_SPI_SHADER_32_ABGR :
-		ps->info.fs.writes_stencil ? V_028710_SPI_SHADER_32_GR :
-		ps->info.fs.writes_z ? V_028710_SPI_SHADER_32_R :
-		V_028710_SPI_SHADER_ZERO;
+	unsigned shader_z_format =
+		si_get_spi_shader_z_format(ps->info.fs.writes_z,
+					   ps->info.fs.writes_stencil,
+					   ps->info.fs.writes_sample_mask);
+	pipeline->graphics.shader_z_format = shader_z_format;
 
 	calculate_vgt_gs_mode(pipeline);
 	calculate_vs_outinfo(pipeline);
-- 
2.15.1



More information about the mesa-dev mailing list