Mesa (master): radeonsi: load the right number of components for VS inputs and TBOs

Marek Olšák mareko at kemper.freedesktop.org
Thu Feb 1 15:22:45 UTC 2018


Module: Mesa
Branch: master
Commit: be973ed21f6e456ebd753f26a99151d9ea6e765c
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=be973ed21f6e456ebd753f26a99151d9ea6e765c

Author: Marek Olšák <marek.olsak at amd.com>
Date:   Tue Jan 30 18:34:25 2018 +0100

radeonsi: load the right number of components for VS inputs and TBOs

The supported counts are 1, 2, 4. (3=4)

The following snippet loads float, vec2, vec3, and vec4:

Before:
    buffer_load_format_x v9, v4, s[0:3], 0 idxen          ; E0002000 80000904
    buffer_load_format_xyzw v[0:3], v5, s[8:11], 0 idxen  ; E00C2000 80020005
    s_waitcnt vmcnt(0)                                    ; BF8C0F70
    buffer_load_format_xyzw v[2:5], v6, s[12:15], 0 idxen ; E00C2000 80030206
    s_waitcnt vmcnt(0)                                    ; BF8C0F70
    buffer_load_format_xyzw v[5:8], v7, s[4:7], 0 idxen   ; E00C2000 80010507

After:
    buffer_load_format_x v10, v4, s[0:3], 0 idxen         ; E0002000 80000A04
    buffer_load_format_xy v[8:9], v5, s[8:11], 0 idxen    ; E0042000 80020805
    buffer_load_format_xyzw v[0:3], v6, s[12:15], 0 idxen ; E00C2000 80030006
    s_waitcnt vmcnt(0)                                    ; BF8C0F70
    buffer_load_format_xyzw v[3:6], v7, s[4:7], 0 idxen   ; E00C2000 80010307

Reviewed-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>

---

 src/amd/common/ac_llvm_build.c                    | 35 +++++++++++++++++++++++
 src/amd/common/ac_llvm_build.h                    |  3 ++
 src/gallium/drivers/radeonsi/si_shader.c          | 13 +++++++--
 src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c |  8 ++++--
 4 files changed, 54 insertions(+), 5 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 6afe7f9723..a5cb72daed 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -461,6 +461,41 @@ ac_build_gather_values(struct ac_llvm_context *ctx,
 	return ac_build_gather_values_extended(ctx, values, value_count, 1, false, false);
 }
 
+/* Expand a scalar or vector to <4 x type> by filling the remaining channels
+ * with undef. Extract at most num_channels components from the input.
+ */
+LLVMValueRef ac_build_expand_to_vec4(struct ac_llvm_context *ctx,
+				     LLVMValueRef value,
+				     unsigned num_channels)
+{
+	LLVMTypeRef elemtype;
+	LLVMValueRef chan[4];
+
+	if (LLVMGetTypeKind(LLVMTypeOf(value)) == LLVMVectorTypeKind) {
+		unsigned vec_size = LLVMGetVectorSize(LLVMTypeOf(value));
+		num_channels = MIN2(num_channels, vec_size);
+
+		if (num_channels >= 4)
+			return value;
+
+		for (unsigned i = 0; i < num_channels; i++)
+			chan[i] = ac_llvm_extract_elem(ctx, value, i);
+
+		elemtype = LLVMGetElementType(LLVMTypeOf(value));
+	} else {
+		if (num_channels) {
+			assert(num_channels == 1);
+			chan[0] = value;
+		}
+		elemtype = LLVMTypeOf(value);
+	}
+
+	while (num_channels < 4)
+		chan[num_channels++] = LLVMGetUndef(elemtype);
+
+	return ac_build_gather_values(ctx, chan, 4);
+}
+
 LLVMValueRef
 ac_build_fdiv(struct ac_llvm_context *ctx,
 	      LLVMValueRef num,
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index 78437d603e..3ae96781b6 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -141,6 +141,9 @@ LLVMValueRef
 ac_build_gather_values(struct ac_llvm_context *ctx,
 		       LLVMValueRef *values,
 		       unsigned value_count);
+LLVMValueRef ac_build_expand_to_vec4(struct ac_llvm_context *ctx,
+				     LLVMValueRef value,
+				     unsigned num_channels);
 
 LLVMValueRef
 ac_build_fdiv(struct ac_llvm_context *ctx,
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 9458920b9d..78156f1ede 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -480,8 +480,8 @@ void si_llvm_load_input_vs(
 	unsigned input_index,
 	LLVMValueRef out[4])
 {
-	unsigned vs_blit_property =
-		ctx->shader->selector->info.properties[TGSI_PROPERTY_VS_BLIT_SGPRS];
+	const struct tgsi_shader_info *info = &ctx->shader->selector->info;
+	unsigned vs_blit_property = info->properties[TGSI_PROPERTY_VS_BLIT_SGPRS];
 
 	if (vs_blit_property) {
 		LLVMValueRef vertex_id = ctx->abi.vertex_id;
@@ -555,6 +555,7 @@ void si_llvm_load_input_vs(
 	unsigned fix_fetch;
 	unsigned num_fetches;
 	unsigned fetch_stride;
+	unsigned num_channels;
 
 	LLVMValueRef t_list_ptr;
 	LLVMValueRef t_offset;
@@ -580,24 +581,29 @@ void si_llvm_load_input_vs(
 	case SI_FIX_FETCH_RGB_64_FLOAT:
 		num_fetches = 3; /* 3 2-dword loads */
 		fetch_stride = 8;
+		num_channels = 2;
 		break;
 	case SI_FIX_FETCH_RGBA_64_FLOAT:
 		num_fetches = 2; /* 2 4-dword loads */
 		fetch_stride = 16;
+		num_channels = 4;
 		break;
 	case SI_FIX_FETCH_RGB_8:
 	case SI_FIX_FETCH_RGB_8_INT:
 		num_fetches = 3;
 		fetch_stride = 1;
+		num_channels = 1;
 		break;
 	case SI_FIX_FETCH_RGB_16:
 	case SI_FIX_FETCH_RGB_16_INT:
 		num_fetches = 3;
 		fetch_stride = 2;
+		num_channels = 1;
 		break;
 	default:
 		num_fetches = 1;
 		fetch_stride = 0;
+		num_channels = util_last_bit(info->input_usage_mask[input_index]);
 	}
 
 	for (unsigned i = 0; i < num_fetches; i++) {
@@ -605,7 +611,8 @@ void si_llvm_load_input_vs(
 
 		input[i] = ac_build_buffer_load_format(&ctx->ac, t_list,
 						       vertex_index, voffset,
-						       4, true);
+						       num_channels, true);
+		input[i] = ac_build_expand_to_vec4(&ctx->ac, input[i], num_channels);
 	}
 
 	/* Break up the vec4 into individual components */
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
index c80dc8ef0e..42cb3251e7 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
@@ -1821,12 +1821,16 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action *action,
 	unsigned target = inst->Texture.Texture;
 
 	if (target == TGSI_TEXTURE_BUFFER) {
-		emit_data->output[emit_data->chan] =
+		unsigned num_channels =
+			util_last_bit(inst->Dst[0].Register.WriteMask);
+		LLVMValueRef result =
 			ac_build_buffer_load_format(&ctx->ac,
 						    emit_data->args[0],
 						    emit_data->args[2],
 						    emit_data->args[1],
-						    4, true);
+						    num_channels, true);
+		emit_data->output[emit_data->chan] =
+			ac_build_expand_to_vec4(&ctx->ac, result, num_channels);
 		return;
 	}
 




More information about the mesa-commit mailing list