[Mesa-dev] [PATCH 02/18] radeonsi: add a workaround for clamping unaligned RGB 8 & 16-bit vertex loads

Thu Feb 16 12:52:54 UTC 2017

From: Marek Olšák <marek.olsak at amd.com>

---
 src/gallium/drivers/radeonsi/si_shader.c           | 38 +++++++++++++++++++---
 src/gallium/drivers/radeonsi/si_shader.h           |  4 +++
 src/gallium/drivers/radeonsi/si_shader_internal.h  |  3 ++
 .../drivers/radeonsi/si_shader_tgsi_setup.c        |  3 ++
 src/gallium/drivers/radeonsi/si_state.c            | 18 ++++++++--
 5 files changed, 60 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 8b9fed9..1829e3e 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -361,28 +361,41 @@ static void declare_input_vs(
 	t_offset = lp_build_const_int32(gallivm, input_index);
 
 	t_list = ac_build_indexed_load_const(&ctx->ac, t_list_ptr, t_offset);
 
 	vertex_index = LLVMGetParam(ctx->main_fn,
 				    ctx->param_vertex_index0 +
 				    input_index);
 
 	fix_fetch = ctx->shader->key.mono.vs.fix_fetch[input_index];
 
-	/* Do multiple loads for double formats. */
-	if (fix_fetch == SI_FIX_FETCH_RGB_64_FLOAT) {
+	/* Do multiple loads for special formats. */
+	switch (fix_fetch) {
+	case SI_FIX_FETCH_RGB_64_FLOAT:
 		num_fetches = 3; /* 3 2-dword loads */
 		fetch_stride = 8;
-	} else if (fix_fetch == SI_FIX_FETCH_RGBA_64_FLOAT) {
+		break;
+	case SI_FIX_FETCH_RGBA_64_FLOAT:
 		num_fetches = 2; /* 2 4-dword loads */
 		fetch_stride = 16;
-	} else {
+		break;
+	case SI_FIX_FETCH_RGB_8:
+	case SI_FIX_FETCH_RGB_8_INT:
+		num_fetches = 3;
+		fetch_stride = 1;
+		break;
+	case SI_FIX_FETCH_RGB_16:
+	case SI_FIX_FETCH_RGB_16_INT:
+		num_fetches = 3;
+		fetch_stride = 2;
+		break;
+	default:
 		num_fetches = 1;
 		fetch_stride = 0;
 	}
 
 	args[0] = t_list;
 	args[2] = vertex_index;
 
 	for (unsigned i = 0; i < num_fetches; i++) {
 		args[1] = LLVMConstInt(ctx->i32, fetch_stride * i, 0);
 
@@ -505,20 +518,37 @@ static void declare_input_vs(
 			out[chan] = extract_double_to_float(ctx, input[chan], 0);
 
 		out[3] = LLVMConstReal(ctx->f32, 1);
 		break;
 	case SI_FIX_FETCH_RGBA_64_FLOAT:
 		for (chan = 0; chan < 4; chan++) {
 			out[chan] = extract_double_to_float(ctx, input[chan / 2],
 							    chan % 2);
 		}
 		break;
+	case SI_FIX_FETCH_RGB_8:
+	case SI_FIX_FETCH_RGB_8_INT:
+	case SI_FIX_FETCH_RGB_16:
+	case SI_FIX_FETCH_RGB_16_INT:
+		for (chan = 0; chan < 3; chan++) {
+			out[chan] = LLVMBuildExtractElement(gallivm->builder,
+							    input[chan],
+							    ctx->i32_0, "");
+		}
+		if (fix_fetch == SI_FIX_FETCH_RGB_8 ||
+		    fix_fetch == SI_FIX_FETCH_RGB_16) {
+			out[3] = LLVMConstReal(ctx->f32, 1);
+		} else {
+			out[3] = LLVMBuildBitCast(gallivm->builder, ctx->i32_1,
+						  ctx->f32, "");
+		}
+		break;
 	}
 }
 
 static LLVMValueRef get_primitive_id(struct lp_build_tgsi_context *bld_base,
 				     unsigned swizzle)
 {
 	struct si_shader_context *ctx = si_shader_context(bld_base);
 
 	if (swizzle > 0)
 		return bld_base->uint_bld.zero;
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index 4616190..da88df0 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -243,20 +243,24 @@ enum {
 	SI_FIX_FETCH_RGBX_32_UNORM,
 	SI_FIX_FETCH_RGBA_32_SNORM,
 	SI_FIX_FETCH_RGBX_32_SNORM,
 	SI_FIX_FETCH_RGBA_32_USCALED,
 	SI_FIX_FETCH_RGBA_32_SSCALED,
 	SI_FIX_FETCH_RGBA_32_FIXED,
 	SI_FIX_FETCH_RGBX_32_FIXED,
 	SI_FIX_FETCH_RG_64_FLOAT,
 	SI_FIX_FETCH_RGB_64_FLOAT,
 	SI_FIX_FETCH_RGBA_64_FLOAT,
+	SI_FIX_FETCH_RGB_8,	/* A = 1.0 */
+	SI_FIX_FETCH_RGB_8_INT,	/* A = 1 */
+	SI_FIX_FETCH_RGB_16,
+	SI_FIX_FETCH_RGB_16_INT,
 };
 
 struct si_shader;
 
 /* State of the context creating the shader object. */
 struct si_compiler_ctx_state {
 	/* Should only be used by si_init_shader_selector_async and
 	 * si_build_shader_variant if thread_index == -1 (non-threaded). */
 	LLVMTargetMachineRef		tm;
 
diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h
index 26cc28d..8fde6c2 100644
--- a/src/gallium/drivers/radeonsi/si_shader_internal.h
+++ b/src/gallium/drivers/radeonsi/si_shader_internal.h
@@ -142,20 +142,23 @@ struct si_shader_context {
 	LLVMTypeRef i32;
 	LLVMTypeRef i64;
 	LLVMTypeRef i128;
 	LLVMTypeRef f32;
 	LLVMTypeRef v16i8;
 	LLVMTypeRef v2i32;
 	LLVMTypeRef v4i32;
 	LLVMTypeRef v4f32;
 	LLVMTypeRef v8i32;
 
+	LLVMValueRef i32_0;
+	LLVMValueRef i32_1;
+
 	LLVMValueRef shared_memory;
 };
 
 static inline struct si_shader_context *
 si_shader_context(struct lp_build_tgsi_context *bld_base)
 {
 	return (struct si_shader_context*)bld_base;
 }
 
 void si_llvm_add_attribute(LLVMValueRef F, const char *name, int value);
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
index c7445e0..c7019c1 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
@@ -1359,20 +1359,23 @@ void si_llvm_context_init(struct si_shader_context *ctx,
 	ctx->i8 = LLVMInt8TypeInContext(ctx->gallivm.context);
 	ctx->i32 = LLVMInt32TypeInContext(ctx->gallivm.context);
 	ctx->i64 = LLVMInt64TypeInContext(ctx->gallivm.context);
 	ctx->i128 = LLVMIntTypeInContext(ctx->gallivm.context, 128);
 	ctx->f32 = LLVMFloatTypeInContext(ctx->gallivm.context);
 	ctx->v16i8 = LLVMVectorType(ctx->i8, 16);
 	ctx->v2i32 = LLVMVectorType(ctx->i32, 2);
 	ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
 	ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
 	ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
+
+	ctx->i32_0 = LLVMConstInt(ctx->i32, 0, 0);
+	ctx->i32_1 = LLVMConstInt(ctx->i32, 1, 0);
 }
 
 void si_llvm_create_func(struct si_shader_context *ctx,
 			 const char *name,
 			 LLVMTypeRef *return_types, unsigned num_return_elems,
 			 LLVMTypeRef *ParamTypes, unsigned ParamCount)
 {
 	LLVMTypeRef main_fn_type, ret_type;
 	LLVMBasicBlockRef main_fn_body;
 
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index d9b9f83..024de8b 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -1725,35 +1725,35 @@ static uint32_t si_translate_buffer_dataformat(struct pipe_screen *screen,
 	/* See whether the components are of the same size. */
 	for (i = 0; i < desc->nr_channels; i++) {
 		if (desc->channel[first_non_void].size != desc->channel[i].size)
 			return V_008F0C_BUF_DATA_FORMAT_INVALID;
 	}
 
 	switch (desc->channel[first_non_void].size) {
 	case 8:
 		switch (desc->nr_channels) {
 		case 1:
+		case 3: /* 3 loads */
 			return V_008F0C_BUF_DATA_FORMAT_8;
 		case 2:
 			return V_008F0C_BUF_DATA_FORMAT_8_8;
-		case 3:
 		case 4:
 			return V_008F0C_BUF_DATA_FORMAT_8_8_8_8;
 		}
 		break;
 	case 16:
 		switch (desc->nr_channels) {
 		case 1:
+		case 3: /* 3 loads */
 			return V_008F0C_BUF_DATA_FORMAT_16;
 		case 2:
 			return V_008F0C_BUF_DATA_FORMAT_16_16;
-		case 3:
 		case 4:
 			return V_008F0C_BUF_DATA_FORMAT_16_16_16_16;
 		}
 		break;
 	case 32:
 		switch (desc->nr_channels) {
 		case 1:
 			return V_008F0C_BUF_DATA_FORMAT_32;
 		case 2:
 			return V_008F0C_BUF_DATA_FORMAT_32_32;
@@ -3452,20 +3452,34 @@ static void *si_create_vertex_elements(struct pipe_context *ctx,
 			case 4:
 				v->fix_fetch[i] = SI_FIX_FETCH_RGBA_64_FLOAT;
 				swizzle[0] = PIPE_SWIZZLE_X; /* 2 loads */
 				swizzle[1] = PIPE_SWIZZLE_Y;
 				swizzle[2] = PIPE_SWIZZLE_Z;
 				swizzle[3] = PIPE_SWIZZLE_W;
 				break;
 			default:
 				assert(0);
 			}
+		} else if (channel && desc->nr_channels == 3) {
+			assert(desc->swizzle[0] == PIPE_SWIZZLE_X);
+
+			if (channel->size == 8) {
+				if (channel->pure_integer)
+					v->fix_fetch[i] = SI_FIX_FETCH_RGB_8_INT;
+				else
+					v->fix_fetch[i] = SI_FIX_FETCH_RGB_8;
+			} else if (channel->size == 16) {
+				if (channel->pure_integer)
+					v->fix_fetch[i] = SI_FIX_FETCH_RGB_16_INT;
+				else
+					v->fix_fetch[i] = SI_FIX_FETCH_RGB_16;
+			}
 		}
 
 		v->rsrc_word3[i] = S_008F0C_DST_SEL_X(si_map_swizzle(swizzle[0])) |
 				   S_008F0C_DST_SEL_Y(si_map_swizzle(swizzle[1])) |
 				   S_008F0C_DST_SEL_Z(si_map_swizzle(swizzle[2])) |
 				   S_008F0C_DST_SEL_W(si_map_swizzle(swizzle[3])) |
 				   S_008F0C_NUM_FORMAT(num_format) |
 				   S_008F0C_DATA_FORMAT(data_format);
 
 		/* We work around the fact that 8_8_8 and 16_16_16 data formats
-- 
2.7.4