Mesa (master): radeonsi: Use FP16 shader export format when necessary / possible.

Michel Dänzer daenzer at kemper.freedesktop.org
Mon Aug 27 10:13:57 UTC 2012


Module: Mesa
Branch: master
Commit: f402acdbe244e5de9b2b616e0a908f5d1416ce89
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=f402acdbe244e5de9b2b616e0a908f5d1416ce89

Author: Michel Dänzer <michel.daenzer at amd.com>
Date:   Wed Aug 22 18:15:36 2012 +0200

radeonsi: Use FP16 shader export format when necessary / possible.

Fixes piglit fbo-blending-formats.

Signed-off-by: Michel Dänzer <michel.daenzer at amd.com>
Reviewed-by: Tom Stellard <thomas.stellard at amd.com>
Reviewed-by: Christian König <christian.koenig at amd.com>
Reviewed-by: Alex Deucher <alexander.deucher at amd.com>

---

 src/gallium/drivers/radeon/SIInstructions.td   |    4 +-
 src/gallium/drivers/radeon/SIIntrinsics.td     |    1 +
 src/gallium/drivers/radeonsi/radeonsi_pipe.h   |    3 +-
 src/gallium/drivers/radeonsi/radeonsi_shader.c |   51 ++++++++++++++----
 src/gallium/drivers/radeonsi/si_state.c        |   69 +++++++++++++++++++++++-
 src/gallium/drivers/radeonsi/si_state_draw.c   |    4 --
 6 files changed, 114 insertions(+), 18 deletions(-)

diff --git a/src/gallium/drivers/radeon/SIInstructions.td b/src/gallium/drivers/radeon/SIInstructions.td
index f09d604..3047321 100644
--- a/src/gallium/drivers/radeon/SIInstructions.td
+++ b/src/gallium/drivers/radeon/SIInstructions.td
@@ -726,7 +726,9 @@ defm V_LDEXP_F32 : VOP2_32 <0x0000002b, "V_LDEXP_F32", []>;
 ////def V_CVT_PKACCUM_U8_F32 : VOP2_U8 <0x0000002c, "V_CVT_PKACCUM_U8_F32", []>;
 ////def V_CVT_PKNORM_I16_F32 : VOP2_I16 <0x0000002d, "V_CVT_PKNORM_I16_F32", []>;
 ////def V_CVT_PKNORM_U16_F32 : VOP2_U16 <0x0000002e, "V_CVT_PKNORM_U16_F32", []>;
-////def V_CVT_PKRTZ_F16_F32 : VOP2_F16 <0x0000002f, "V_CVT_PKRTZ_F16_F32", []>;
+defm V_CVT_PKRTZ_F16_F32 : VOP2_32 <0x0000002f, "V_CVT_PKRTZ_F16_F32",
+ [(set VReg_32:$dst, (int_SI_packf16 AllReg_32:$src0, VReg_32:$src1))]
+>;
 ////def V_CVT_PK_U16_U32 : VOP2_U16 <0x00000030, "V_CVT_PK_U16_U32", []>;
 ////def V_CVT_PK_I16_I32 : VOP2_I16 <0x00000031, "V_CVT_PK_I16_I32", []>;
 def S_CMP_EQ_I32 : SOPC_32 <0x00000000, "S_CMP_EQ_I32", []>;
diff --git a/src/gallium/drivers/radeon/SIIntrinsics.td b/src/gallium/drivers/radeon/SIIntrinsics.td
index 6eadc94..b9544f1 100644
--- a/src/gallium/drivers/radeon/SIIntrinsics.td
+++ b/src/gallium/drivers/radeon/SIIntrinsics.td
@@ -14,6 +14,7 @@
 
 let TargetPrefix = "SI", isTarget = 1 in {
 
+  def int_SI_packf16 : Intrinsic <[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
   def int_SI_export : Intrinsic <[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>;
   /* XXX: We may need a seperate intrinsic here for loading integer values */
   def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_i64_ty, llvm_i32_ty], []>;
diff --git a/src/gallium/drivers/radeonsi/radeonsi_pipe.h b/src/gallium/drivers/radeonsi/radeonsi_pipe.h
index 989bb49..099b509 100644
--- a/src/gallium/drivers/radeonsi/radeonsi_pipe.h
+++ b/src/gallium/drivers/radeonsi/radeonsi_pipe.h
@@ -134,7 +134,8 @@ struct r600_context {
 	unsigned			saved_render_cond_mode;
 	/* shader information */
 	unsigned			sprite_coord_enable;
-	boolean				export_16bpc;
+	unsigned			export_16bpc;
+	unsigned			spi_shader_col_format;
 	unsigned			alpha_ref;
 	boolean				alpha_ref_dirty;
 	struct r600_textures_info	vs_samplers;
diff --git a/src/gallium/drivers/radeonsi/radeonsi_shader.c b/src/gallium/drivers/radeonsi/radeonsi_shader.c
index fd614dd..98866c4 100644
--- a/src/gallium/drivers/radeonsi/radeonsi_shader.c
+++ b/src/gallium/drivers/radeonsi/radeonsi_shader.c
@@ -390,13 +390,47 @@ static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base,
 	unsigned compressed = 0;
 	unsigned chan;
 
-	for (chan = 0; chan < 4; chan++ ) {
-		LLVMValueRef out_ptr =
-			si_shader_ctx->radeon_bld.soa.outputs[index][chan];
-		/* +5 because the first output value will be
-		 * the 6th argument to the intrinsic. */
-		args[chan + 5] = LLVMBuildLoad(base->gallivm->builder,
-					       out_ptr, "");
+	if (si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT) {
+		int cbuf = target - V_008DFC_SQ_EXP_MRT;
+
+		if (cbuf >= 0 && cbuf < 8) {
+			struct r600_context *rctx = si_shader_ctx->rctx;
+			compressed = (rctx->export_16bpc >> cbuf) & 0x1;
+		}
+	}
+
+	if (compressed) {
+		/* Pixel shader needs to pack output values before export */
+		for (chan = 0; chan < 2; chan++ ) {
+			LLVMValueRef *out_ptr =
+				si_shader_ctx->radeon_bld.soa.outputs[index];
+			args[0] = LLVMBuildLoad(base->gallivm->builder,
+						out_ptr[2 * chan], "");
+			args[1] = LLVMBuildLoad(base->gallivm->builder,
+						out_ptr[2 * chan + 1], "");
+			args[chan + 5] =
+				build_intrinsic(base->gallivm->builder,
+						"llvm.SI.packf16",
+						LLVMInt32TypeInContext(base->gallivm->context),
+						args, 2,
+						LLVMReadNoneAttribute);
+			args[chan + 7] = args[chan + 5];
+		}
+
+		/* Set COMPR flag */
+		args[4] = uint->one;
+	} else {
+		for (chan = 0; chan < 4; chan++ ) {
+			LLVMValueRef out_ptr =
+				si_shader_ctx->radeon_bld.soa.outputs[index][chan];
+			/* +5 because the first output value will be
+			 * the 6th argument to the intrinsic. */
+			args[chan + 5] = LLVMBuildLoad(base->gallivm->builder,
+						       out_ptr, "");
+		}
+
+		/* Clear COMPR flag */
+		args[4] = uint->zero;
 	}
 
 	/* XXX: This controls which components of the output
@@ -415,9 +449,6 @@ static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base,
 	/* Specify the target we are exporting */
 	args[3] = lp_build_const_int32(base->gallivm, target);
 
-	/* Set COMPR flag */
-	args[4] = uint->zero;
-
 	/* XXX: We probably need to keep track of the output
 	 * values, so we know what we are passing to the next
 	 * stage. */
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 5c2e743..fced24c 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -996,6 +996,53 @@ static uint32_t si_colorformat_endian_swap(uint32_t colorformat)
 	}
 }
 
+/* Returns the size in bits of the widest component of a CB format */
+static unsigned si_colorformat_max_comp_size(uint32_t colorformat)
+{
+	switch(colorformat) {
+	case V_028C70_COLOR_4_4_4_4:
+		return 4;
+
+	case V_028C70_COLOR_1_5_5_5:
+	case V_028C70_COLOR_5_5_5_1:
+		return 5;
+
+	case V_028C70_COLOR_5_6_5:
+		return 6;
+
+	case V_028C70_COLOR_8:
+	case V_028C70_COLOR_8_8:
+	case V_028C70_COLOR_8_8_8_8:
+		return 8;
+
+	case V_028C70_COLOR_10_10_10_2:
+	case V_028C70_COLOR_2_10_10_10:
+		return 10;
+
+	case V_028C70_COLOR_10_11_11:
+	case V_028C70_COLOR_11_11_10:
+		return 11;
+
+	case V_028C70_COLOR_16:
+	case V_028C70_COLOR_16_16:
+	case V_028C70_COLOR_16_16_16_16:
+		return 16;
+
+	case V_028C70_COLOR_8_24:
+	case V_028C70_COLOR_24_8:
+		return 24;
+
+	case V_028C70_COLOR_32:
+	case V_028C70_COLOR_32_32:
+	case V_028C70_COLOR_32_32_32_32:
+	case V_028C70_COLOR_X24_8_32_FLOAT:
+		return 32;
+	}
+
+	assert(!"Unknown maximum component size");
+	return 0;
+}
+
 static uint32_t si_translate_dbformat(enum pipe_format format)
 {
 	switch (format) {
@@ -1409,6 +1456,7 @@ static void si_cb(struct r600_context *rctx, struct si_pm4_state *pm4,
 	const struct util_format_description *desc;
 	int i;
 	unsigned blend_clamp = 0, blend_bypass = 0;
+	unsigned max_comp_size;
 
 	surf = (struct r600_surface *)state->cbufs[cb];
 	rtex = (struct r600_resource_texture*)state->cbufs[cb]->texture;
@@ -1549,6 +1597,17 @@ static void si_cb(struct r600_context *rctx, struct si_pm4_state *pm4,
 	}
 	si_pm4_set_reg(pm4, R_028C70_CB_COLOR0_INFO + cb * 0x3C, color_info);
 	si_pm4_set_reg(pm4, R_028C74_CB_COLOR0_ATTRIB + cb * 0x3C, color_attrib);
+
+	/* Determine pixel shader export format */
+	max_comp_size = si_colorformat_max_comp_size(format);
+	if (ntype == V_028C70_NUMBER_SRGB ||
+	    ((ntype == V_028C70_NUMBER_UNORM || ntype == V_028C70_NUMBER_SNORM) &&
+	     max_comp_size <= 10) ||
+	    (ntype == V_028C70_NUMBER_FLOAT && max_comp_size <= 16)) {
+		rctx->export_16bpc |= 1 << cb;
+		rctx->spi_shader_col_format |= V_028714_SPI_SHADER_FP16_ABGR << (4 * cb);
+	} else
+		rctx->spi_shader_col_format |= V_028714_SPI_SHADER_32_ABGR << (4 * cb);
 }
 
 static void si_db(struct r600_context *rctx, struct si_pm4_state *pm4,
@@ -1667,9 +1726,12 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
 
 	/* build states */
 	rctx->have_depth_fb = 0;
+	rctx->export_16bpc = 0;
+	rctx->spi_shader_col_format = 0;
 	for (int i = 0; i < state->nr_cbufs; i++) {
 		si_cb(rctx, pm4, state, i);
 	}
+	assert(!(rctx->export_16bpc & ~0xff));
 	si_db(rctx, pm4, state);
 
 	shader_mask = 0;
@@ -1706,6 +1768,8 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
 	si_pm4_set_reg(pm4, R_028200_PA_SC_WINDOW_OFFSET, 0x00000000);
 	si_pm4_set_reg(pm4, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA);
 	si_pm4_set_reg(pm4, R_02823C_CB_SHADER_MASK, shader_mask);
+	si_pm4_set_reg(pm4, R_028714_SPI_SHADER_COL_FORMAT,
+		       rctx->spi_shader_col_format);
 	si_pm4_set_reg(pm4, R_028BE0_PA_SC_AA_CONFIG, 0x00000000);
 
 	si_pm4_set_state(rctx, framebuffer, pm4);
@@ -1727,9 +1791,10 @@ static INLINE unsigned si_shader_selector_key(struct pipe_context *ctx,
 	if (sel->type == PIPE_SHADER_FRAGMENT) {
 		if (sel->fs_write_all)
 			key |= rctx->framebuffer.nr_cbufs;
+		key |= rctx->export_16bpc << 4;
 		/*if (rctx->queued.named.rasterizer)
-			  key |= rctx->queued.named.rasterizer->flatshade << 4;*/
-		/*key |== rctx->two_side << 5;*/
+			  key |= rctx->queued.named.rasterizer->flatshade << 12;*/
+		/*key |== rctx->two_side << 13;*/
 	}
 
 	return key;
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index 95821dc..5f8e211 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -186,10 +186,6 @@ static void si_pipe_shader_ps(struct pipe_context *ctx, struct si_pipe_shader *s
 	/* XXX: Depends on Z buffer format? */
 	si_pm4_set_reg(pm4, R_028710_SPI_SHADER_Z_FORMAT, 0);
 
-	/* XXX: Depends on color buffer format? */
-	si_pm4_set_reg(pm4, R_028714_SPI_SHADER_COL_FORMAT,
-		       S_028714_COL0_EXPORT_FORMAT(V_028714_SPI_SHADER_32_ABGR));
-
 	va = r600_resource_va(ctx->screen, (void *)shader->bo);
 	si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ);
 	si_pm4_set_reg(pm4, R_00B020_SPI_SHADER_PGM_LO_PS, va >> 8);




More information about the mesa-commit mailing list