[Mesa-dev] [PATCH 2/6] radeonsi: implement 32-bit SNORM/UNORM/SSCALED/USCALED vertex formats

Marek Olšák maraeo at gmail.com
Mon Jan 16 02:00:02 UTC 2017


From: Marek Olšák <marek.olsak at amd.com>

---
 src/gallium/drivers/radeonsi/si_shader.c | 52 +++++++++++++++++++++++++++++++-
 src/gallium/drivers/radeonsi/si_shader.h | 12 ++++++--
 src/gallium/drivers/radeonsi/si_state.c  | 44 ++++++++++++++++++---------
 3 files changed, 90 insertions(+), 18 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index dfba9d4..5fb5f43 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -427,21 +427,25 @@ static void declare_input_vs(
 		LP_FUNC_ATTR_READNONE);
 
 	/* Break up the vec4 into individual components */
 	for (chan = 0; chan < 4; chan++) {
 		LLVMValueRef llvm_chan = lp_build_const_int32(gallivm, chan);
 		out[chan] = LLVMBuildExtractElement(gallivm->builder,
 						    input, llvm_chan, "");
 	}
 
 	fix_fetch = (ctx->shader->key.mono.vs.fix_fetch >> (4 * input_index)) & 0xf;
-	if (fix_fetch) {
+
+	switch (fix_fetch) {
+	case SI_FIX_FETCH_A2_SNORM:
+	case SI_FIX_FETCH_A2_SSCALED:
+	case SI_FIX_FETCH_A2_SINT: {
 		/* The hardware returns an unsigned value; convert it to a
 		 * signed one.
 		 */
 		LLVMValueRef tmp = out[3];
 		LLVMValueRef c30 = LLVMConstInt(ctx->i32, 30, 0);
 
 		/* First, recover the sign-extended signed integer value. */
 		if (fix_fetch == SI_FIX_FETCH_A2_SSCALED)
 			tmp = LLVMBuildFPToUI(gallivm->builder, tmp, ctx->i32, "");
 		else
@@ -463,20 +467,66 @@ static void declare_input_vs(
 			LLVMValueRef clamp;
 			LLVMValueRef neg_one = LLVMConstReal(ctx->f32, -1.0);
 			tmp = LLVMBuildSIToFP(gallivm->builder, tmp, ctx->f32, "");
 			clamp = LLVMBuildFCmp(gallivm->builder, LLVMRealULT, tmp, neg_one, "");
 			tmp = LLVMBuildSelect(gallivm->builder, clamp, neg_one, tmp, "");
 		} else if (fix_fetch == SI_FIX_FETCH_A2_SSCALED) {
 			tmp = LLVMBuildSIToFP(gallivm->builder, tmp, ctx->f32, "");
 		}
 
 		out[3] = tmp;
+		break;
+	}
+	case SI_FIX_FETCH_RGBA_32_UNORM:
+	case SI_FIX_FETCH_RGBX_32_UNORM:
+		for (chan = 0; chan < 4; chan++) {
+			out[chan] = LLVMBuildBitCast(gallivm->builder, out[chan],
+						     ctx->i32, "");
+			out[chan] = LLVMBuildUIToFP(gallivm->builder,
+						    out[chan], ctx->f32, "");
+			out[chan] = LLVMBuildFMul(gallivm->builder, out[chan],
+						  LLVMConstReal(ctx->f32, 1.0 / UINT_MAX), "");
+		}
+		/* RGBX UINT returns 1 in alpha, which would be rounded to 0 by normalizing. */
+		if (fix_fetch == SI_FIX_FETCH_RGBX_32_UNORM)
+			out[3] = LLVMConstReal(ctx->f32, 1);
+		break;
+	case SI_FIX_FETCH_RGBA_32_SNORM:
+	case SI_FIX_FETCH_RGBX_32_SNORM:
+		for (chan = 0; chan < 4; chan++) {
+			out[chan] = LLVMBuildBitCast(gallivm->builder, out[chan],
+						     ctx->i32, "");
+			out[chan] = LLVMBuildSIToFP(gallivm->builder,
+						    out[chan], ctx->f32, "");
+			out[chan] = LLVMBuildFMul(gallivm->builder, out[chan],
+						  LLVMConstReal(ctx->f32, 1.0 / INT_MAX), "");
+		}
+		/* RGBX SINT returns 1 in alpha, which would be rounded to 0 by normalizing. */
+		if (fix_fetch == SI_FIX_FETCH_RGBX_32_SNORM)
+			out[3] = LLVMConstReal(ctx->f32, 1);
+		break;
+	case SI_FIX_FETCH_RGBA_32_USCALED:
+		for (chan = 0; chan < 4; chan++) {
+			out[chan] = LLVMBuildBitCast(gallivm->builder, out[chan],
+						     ctx->i32, "");
+			out[chan] = LLVMBuildUIToFP(gallivm->builder,
+						    out[chan], ctx->f32, "");
+		}
+		break;
+	case SI_FIX_FETCH_RGBA_32_SSCALED:
+		for (chan = 0; chan < 4; chan++) {
+			out[chan] = LLVMBuildBitCast(gallivm->builder, out[chan],
+						     ctx->i32, "");
+			out[chan] = LLVMBuildSIToFP(gallivm->builder,
+						    out[chan], ctx->f32, "");
+		}
+		break;
 	}
 }
 
 static LLVMValueRef get_primitive_id(struct lp_build_tgsi_context *bld_base,
 				     unsigned swizzle)
 {
 	struct si_shader_context *ctx = si_shader_context(bld_base);
 
 	if (swizzle > 0)
 		return bld_base->uint_bld.zero;
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index 89f9628..5e554d9 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -229,23 +229,29 @@ enum {
 
 /* SI-specific system values. */
 enum {
 	TGSI_SEMANTIC_DEFAULT_TESSOUTER_SI = TGSI_SEMANTIC_COUNT,
 	TGSI_SEMANTIC_DEFAULT_TESSINNER_SI,
 };
 
 /* For VS shader key fix_fetch. */
 enum {
 	SI_FIX_FETCH_NONE = 0,
-	SI_FIX_FETCH_A2_SNORM = 1,
-	SI_FIX_FETCH_A2_SSCALED = 2,
-	SI_FIX_FETCH_A2_SINT = 3,
+	SI_FIX_FETCH_A2_SNORM,
+	SI_FIX_FETCH_A2_SSCALED,
+	SI_FIX_FETCH_A2_SINT,
+	SI_FIX_FETCH_RGBA_32_UNORM,
+	SI_FIX_FETCH_RGBX_32_UNORM,
+	SI_FIX_FETCH_RGBA_32_SNORM,
+	SI_FIX_FETCH_RGBX_32_SNORM,
+	SI_FIX_FETCH_RGBA_32_USCALED,
+	SI_FIX_FETCH_RGBA_32_SSCALED,
 };
 
 struct si_shader;
 
 /* A shader selector is a gallium CSO and contains shader variants and
  * binaries for one TGSI program. This can be shared by multiple contexts.
  */
 struct si_shader_selector {
 	struct si_screen	*screen;
 	struct util_queue_fence ready;
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index fa78a56..c8d1099 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -1739,28 +1739,20 @@ static uint32_t si_translate_buffer_dataformat(struct pipe_screen *screen,
 		case 1:
 			return V_008F0C_BUF_DATA_FORMAT_16;
 		case 2:
 			return V_008F0C_BUF_DATA_FORMAT_16_16;
 		case 3:
 		case 4:
 			return V_008F0C_BUF_DATA_FORMAT_16_16_16_16;
 		}
 		break;
 	case 32:
-		/* From the Southern Islands ISA documentation about MTBUF:
-		 * 'Memory reads of data in memory that is 32 or 64 bits do not
-		 * undergo any format conversion.'
-		 */
-		if (type != UTIL_FORMAT_TYPE_FLOAT &&
-		    !desc->channel[first_non_void].pure_integer)
-			return V_008F0C_BUF_DATA_FORMAT_INVALID;
-
 		switch (desc->nr_channels) {
 		case 1:
 			return V_008F0C_BUF_DATA_FORMAT_32;
 		case 2:
 			return V_008F0C_BUF_DATA_FORMAT_32_32;
 		case 3:
 			return V_008F0C_BUF_DATA_FORMAT_32_32_32;
 		case 4:
 			return V_008F0C_BUF_DATA_FORMAT_32_32_32_32;
 		}
@@ -1774,32 +1766,34 @@ static uint32_t si_translate_buffer_numformat(struct pipe_screen *screen,
 					      const struct util_format_description *desc,
 					      int first_non_void)
 {
 	if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT)
 		return V_008F0C_BUF_NUM_FORMAT_FLOAT;
 
 	assert(first_non_void >= 0);
 
 	switch (desc->channel[first_non_void].type) {
 	case UTIL_FORMAT_TYPE_SIGNED:
-		if (desc->channel[first_non_void].normalized)
-			return V_008F0C_BUF_NUM_FORMAT_SNORM;
-		else if (desc->channel[first_non_void].pure_integer)
+		if (desc->channel[first_non_void].size >= 32 ||
+		    desc->channel[first_non_void].pure_integer)
 			return V_008F0C_BUF_NUM_FORMAT_SINT;
+		else if (desc->channel[first_non_void].normalized)
+			return V_008F0C_BUF_NUM_FORMAT_SNORM;
 		else
 			return V_008F0C_BUF_NUM_FORMAT_SSCALED;
 		break;
 	case UTIL_FORMAT_TYPE_UNSIGNED:
-		if (desc->channel[first_non_void].normalized)
-			return V_008F0C_BUF_NUM_FORMAT_UNORM;
-		else if (desc->channel[first_non_void].pure_integer)
+		if (desc->channel[first_non_void].size >= 32 ||
+		    desc->channel[first_non_void].pure_integer)
 			return V_008F0C_BUF_NUM_FORMAT_UINT;
+		else if (desc->channel[first_non_void].normalized)
+			return V_008F0C_BUF_NUM_FORMAT_UNORM;
 		else
 			return V_008F0C_BUF_NUM_FORMAT_USCALED;
 		break;
 	case UTIL_FORMAT_TYPE_FLOAT:
 	default:
 		return V_008F0C_BUF_NUM_FORMAT_FLOAT;
 	}
 }
 
 static unsigned si_is_vertex_format_supported(struct pipe_screen *screen,
@@ -3335,27 +3329,29 @@ static void *si_create_vertex_elements(struct pipe_context *ctx,
 	struct si_vertex_element *v = CALLOC_STRUCT(si_vertex_element);
 	int i;
 
 	assert(count <= SI_MAX_ATTRIBS);
 	if (!v)
 		return NULL;
 
 	v->count = count;
 	for (i = 0; i < count; ++i) {
 		const struct util_format_description *desc;
+		const struct util_format_channel_description *channel;
 		unsigned data_format, num_format;
 		int first_non_void;
 
 		desc = util_format_description(elements[i].src_format);
 		first_non_void = util_format_get_first_non_void_channel(elements[i].src_format);
 		data_format = si_translate_buffer_dataformat(ctx->screen, desc, first_non_void);
 		num_format = si_translate_buffer_numformat(ctx->screen, desc, first_non_void);
+		channel = &desc->channel[first_non_void];
 
 		v->rsrc_word3[i] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) |
 				   S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) |
 				   S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) |
 				   S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])) |
 				   S_008F0C_NUM_FORMAT(num_format) |
 				   S_008F0C_DATA_FORMAT(data_format);
 		v->format_size[i] = desc->block.bits / 8;
 
 		/* The hardware always treats the 2-bit alpha channel as
@@ -3363,20 +3359,40 @@ static void *si_create_vertex_elements(struct pipe_context *ctx,
 		 */
 		if (data_format == V_008F0C_BUF_DATA_FORMAT_2_10_10_10) {
 			if (num_format == V_008F0C_BUF_NUM_FORMAT_SNORM) {
 				v->fix_fetch |= (uint64_t)SI_FIX_FETCH_A2_SNORM << (4 * i);
 			} else if (num_format == V_008F0C_BUF_NUM_FORMAT_SSCALED) {
 				v->fix_fetch |= (uint64_t)SI_FIX_FETCH_A2_SSCALED << (4 * i);
 			} else if (num_format == V_008F0C_BUF_NUM_FORMAT_SINT) {
 				/* This isn't actually used in OpenGL. */
 				v->fix_fetch |= (uint64_t)SI_FIX_FETCH_A2_SINT << (4 * i);
 			}
+		} else if (channel->size == 32 && !channel->pure_integer) {
+			if (channel->type == UTIL_FORMAT_TYPE_SIGNED) {
+				if (channel->normalized) {
+					if (desc->swizzle[3] == PIPE_SWIZZLE_1)
+						v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBX_32_SNORM << (4 * i);
+					else
+						v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBA_32_SNORM << (4 * i);
+				} else {
+					v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBA_32_SSCALED << (4 * i);
+				}
+			} else if (channel->type == UTIL_FORMAT_TYPE_UNSIGNED) {
+				if (channel->normalized) {
+					if (desc->swizzle[3] == PIPE_SWIZZLE_1)
+						v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBX_32_UNORM << (4 * i);
+					else
+						v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBA_32_UNORM << (4 * i);
+				} else {
+					v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBA_32_USCALED << (4 * i);
+				}
+			}
 		}
 
 		/* We work around the fact that 8_8_8 and 16_16_16 data formats
 		 * do not exist by using the corresponding 4-component formats.
 		 * This requires a fixup of the descriptor for bounds checks.
 		 */
 		if (desc->block.bits == 3 * 8 ||
 		    desc->block.bits == 3 * 16) {
 			v->fix_size3 |= (desc->block.bits / 24) << (2 * i);
 		}
-- 
2.7.4



More information about the mesa-dev mailing list