[Mesa-dev] [PATCH 05/11] gallivm: add dst register index to lp_build_tgsi_context::emit_store

Nicolai Hähnle nhaehnle at gmail.com
Sat Sep 16 11:23:47 UTC 2017


From: Nicolai Hähnle <nicolai.haehnle at amd.com>

---
 src/gallium/auxiliary/gallivm/lp_bld_tgsi.c         |  2 +-
 src/gallium/auxiliary/gallivm/lp_bld_tgsi.h         |  1 +
 src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c     | 15 +++++++--------
 src/gallium/drivers/radeonsi/si_shader.c            | 14 ++++++++------
 src/gallium/drivers/radeonsi/si_shader_internal.h   |  1 +
 src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c | 14 +++++++++-----
 6 files changed, 27 insertions(+), 20 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
index e450092a82c..66f752989ab 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
@@ -306,21 +306,21 @@ lp_build_tgsi_inst_llvm(
       if (info->output_mode == TGSI_OUTPUT_REPLICATE && bld_base->soa) {
          val = emit_data.output[0];
          memset(emit_data.output, 0, sizeof(emit_data.output));
          TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan_index) {
             emit_data.output[chan_index] = val;
          }
       }
    }
 
    if (info->num_dst > 0 && info->opcode != TGSI_OPCODE_STORE) {
-      bld_base->emit_store(bld_base, inst, info, emit_data.output);
+      bld_base->emit_store(bld_base, inst, info, 0, emit_data.output);
    }
    return TRUE;
 }
 
 
 LLVMValueRef
 lp_build_emit_fetch_src(
    struct lp_build_tgsi_context *bld_base,
    const struct tgsi_full_src_register *reg,
    enum tgsi_opcode_type stype,
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
index eb632b700ab..eeeea507810 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
@@ -363,20 +363,21 @@ struct lp_build_tgsi_context
                          LLVMValueRef, unsigned, unsigned, unsigned, unsigned);
 
 
    void (*emit_debug)(struct lp_build_tgsi_context *,
                       const struct tgsi_full_instruction *,
                       const struct tgsi_opcode_info *);
 
    void (*emit_store)(struct lp_build_tgsi_context *,
                       const struct tgsi_full_instruction *,
                       const struct tgsi_opcode_info *,
+                      unsigned index,
                       LLVMValueRef dst[4]);
 
    void (*emit_declaration)(struct lp_build_tgsi_context *,
                              const struct tgsi_full_declaration *decl);
 
    void (*emit_immediate)(struct lp_build_tgsi_context *,
                           const struct tgsi_full_immediate *imm);
 
 
    /* Allow the user to store data in this structure rather than passing it
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
index f16c579f38d..45110e8b9fe 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
@@ -1906,33 +1906,32 @@ emit_debug(
          lp_build_print_value(gallivm, "    mask = ", bld->exec_mask.exec_mask);
       }
    }
 }
 
 static void
 emit_store(
    struct lp_build_tgsi_context * bld_base,
    const struct tgsi_full_instruction * inst,
    const struct tgsi_opcode_info * info,
+   unsigned index,
    LLVMValueRef dst[4])
 
 {
-   unsigned chan_index;
    enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode);
 
-   if(info->num_dst) {
-      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
-
-         if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3))
-             continue;
-         emit_store_chan(bld_base, inst, 0, chan_index, dst[chan_index]);
-      }
+   unsigned writemask = inst->Dst[index].Register.WriteMask;
+   while (writemask) {
+      unsigned chan_index = u_bit_scan(&writemask);
+      if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3))
+          continue;
+      emit_store_chan(bld_base, inst, index, chan_index, dst[chan_index]);
    }
 }
 
 static unsigned
 tgsi_to_pipe_tex_target(unsigned tgsi_target)
 {
    switch (tgsi_target) {
    case TGSI_TEXTURE_BUFFER:
       return PIPE_BUFFER;
    case TGSI_TEXTURE_1D:
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index c4e7f225a8f..1a1a70e23ea 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -1132,39 +1132,40 @@ static LLVMValueRef fetch_input_tes(
 
 	base = LLVMGetParam(ctx->main_fn, ctx->param_tcs_offchip_offset);
 	addr = get_tcs_tes_buffer_address_from_reg(ctx, NULL, reg);
 
 	return buffer_load(bld_base, type, swizzle, buffer, base, addr, true);
 }
 
 static void store_output_tcs(struct lp_build_tgsi_context *bld_base,
 			     const struct tgsi_full_instruction *inst,
 			     const struct tgsi_opcode_info *info,
+			     unsigned index,
 			     LLVMValueRef dst[4])
 {
 	struct si_shader_context *ctx = si_shader_context(bld_base);
 	struct gallivm_state *gallivm = &ctx->gallivm;
-	const struct tgsi_full_dst_register *reg = &inst->Dst[0];
+	const struct tgsi_full_dst_register *reg = &inst->Dst[index];
 	const struct tgsi_shader_info *sh_info = &ctx->shader->selector->info;
 	unsigned chan_index;
 	LLVMValueRef dw_addr, stride;
 	LLVMValueRef buffer, base, buf_addr;
 	LLVMValueRef values[4];
 	bool skip_lds_store;
 	bool is_tess_factor = false, is_tess_inner = false;
 
 	/* Only handle per-patch and per-vertex outputs here.
 	 * Vectors will be lowered to scalars and this function will be called again.
 	 */
 	if (reg->Register.File != TGSI_FILE_OUTPUT ||
 	    (dst[0] && LLVMGetTypeKind(LLVMTypeOf(dst[0])) == LLVMVectorTypeKind)) {
-		si_llvm_emit_store(bld_base, inst, info, dst);
+		si_llvm_emit_store(bld_base, inst, info, index, dst);
 		return;
 	}
 
 	if (reg->Register.Dimension) {
 		stride = get_tcs_out_vertex_dw_stride(ctx);
 		dw_addr = get_tcs_out_current_patch_offset(ctx);
 		dw_addr = get_dw_address(ctx, reg, NULL, stride, dw_addr);
 		skip_lds_store = !sh_info->reads_pervertex_outputs;
 	} else {
 		dw_addr = get_tcs_out_current_patch_data_offset(ctx);
@@ -1184,54 +1185,55 @@ static void store_output_tcs(struct lp_build_tgsi_context *bld_base,
 				is_tess_inner = name == TGSI_SEMANTIC_TESSINNER;
 			}
 		}
 	}
 
 	buffer = desc_from_addr_base64k(ctx, ctx->param_tcs_offchip_addr_base64k);
 
 	base = LLVMGetParam(ctx->main_fn, ctx->param_tcs_offchip_offset);
 	buf_addr = get_tcs_tes_buffer_address_from_reg(ctx, reg, NULL);
 
-
-	TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan_index) {
+	uint32_t writemask = reg->Register.WriteMask;
+	while (writemask) {
+		chan_index = u_bit_scan(&writemask);
 		LLVMValueRef value = dst[chan_index];
 
 		if (inst->Instruction.Saturate)
 			value = ac_build_clamp(&ctx->ac, value);
 
 		/* Skip LDS stores if there is no LDS read of this output. */
 		if (!skip_lds_store)
 			lds_store(bld_base, chan_index, dw_addr, value);
 
 		value = LLVMBuildBitCast(gallivm->builder, value, ctx->i32, "");
 		values[chan_index] = value;
 
-		if (inst->Dst[0].Register.WriteMask != 0xF && !is_tess_factor) {
+		if (reg->Register.WriteMask != 0xF && !is_tess_factor) {
 			ac_build_buffer_store_dword(&ctx->ac, buffer, value, 1,
 						    buf_addr, base,
 						    4 * chan_index, 1, 0, true, false);
 		}
 
 		/* Write tess factors into VGPRs for the epilog. */
 		if (is_tess_factor &&
 		    ctx->shader->selector->tcs_info.tessfactors_are_def_in_all_invocs) {
 			if (!is_tess_inner) {
 				LLVMBuildStore(gallivm->builder, value, /* outer */
 					       ctx->invoc0_tess_factors[chan_index]);
 			} else if (chan_index < 2) {
 				LLVMBuildStore(gallivm->builder, value, /* inner */
 					       ctx->invoc0_tess_factors[4 + chan_index]);
 			}
 		}
 	}
 
-	if (inst->Dst[0].Register.WriteMask == 0xF && !is_tess_factor) {
+	if (reg->Register.WriteMask == 0xF && !is_tess_factor) {
 		LLVMValueRef value = lp_build_gather_values(gallivm,
 		                                            values, 4);
 		ac_build_buffer_store_dword(&ctx->ac, buffer, value, 4, buf_addr,
 					    base, 0, 1, 0, true, false);
 	}
 }
 
 static LLVMValueRef fetch_input_gs(
 	struct lp_build_tgsi_context *bld_base,
 	const struct tgsi_full_src_register *reg,
diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h
index 023f9a6a093..141dd34be3d 100644
--- a/src/gallium/drivers/radeonsi/si_shader_internal.h
+++ b/src/gallium/drivers/radeonsi/si_shader_internal.h
@@ -282,20 +282,21 @@ LLVMValueRef si_llvm_emit_fetch_64bit(struct lp_build_tgsi_context *bld_base,
 				      LLVMValueRef ptr2);
 
 LLVMValueRef si_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base,
 				const struct tgsi_full_src_register *reg,
 				enum tgsi_opcode_type type,
 				unsigned swizzle);
 
 void si_llvm_emit_store(struct lp_build_tgsi_context *bld_base,
 			const struct tgsi_full_instruction *inst,
 			const struct tgsi_opcode_info *info,
+			unsigned index,
 			LLVMValueRef dst[4]);
 
 /* Combine these with & instead of |. */
 #define NOOP_WAITCNT 0xf7f
 #define LGKM_CNT 0x07f
 #define VM_CNT 0xf70
 
 void si_emit_waitcnt(struct si_shader_context *ctx, unsigned simm16);
 
 LLVMValueRef si_get_indirect_index(struct si_shader_context *ctx,
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
index 231f16f049d..67172729bb6 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
@@ -874,48 +874,52 @@ static void emit_declaration(struct lp_build_tgsi_context *bld_base,
 		break;
 
 	default:
 		break;
 	}
 }
 
 void si_llvm_emit_store(struct lp_build_tgsi_context *bld_base,
 			const struct tgsi_full_instruction *inst,
 			const struct tgsi_opcode_info *info,
+			unsigned index,
 			LLVMValueRef dst[4])
 {
 	struct si_shader_context *ctx = si_shader_context(bld_base);
 	struct gallivm_state *gallivm = &ctx->gallivm;
-	const struct tgsi_full_dst_register *reg = &inst->Dst[0];
+	const struct tgsi_full_dst_register *reg = &inst->Dst[index];
 	LLVMBuilderRef builder = ctx->gallivm.builder;
 	LLVMValueRef temp_ptr, temp_ptr2 = NULL;
-	unsigned chan, chan_index;
 	bool is_vec_store = false;
 	enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode);
 
 	if (dst[0]) {
 		LLVMTypeKind k = LLVMGetTypeKind(LLVMTypeOf(dst[0]));
 		is_vec_store = (k == LLVMVectorTypeKind);
 	}
 
 	if (is_vec_store) {
 		LLVMValueRef values[4] = {};
-		TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan) {
+		uint32_t writemask = reg->Register.WriteMask;
+		while (writemask) {
+			unsigned chan = u_bit_scan(&writemask);
 			LLVMValueRef index = LLVMConstInt(ctx->i32, chan, 0);
 			values[chan]  = LLVMBuildExtractElement(gallivm->builder,
 							dst[0], index, "");
 		}
-		bld_base->emit_store(bld_base, inst, info, values);
+		bld_base->emit_store(bld_base, inst, info, index, values);
 		return;
 	}
 
-	TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+	uint32_t writemask = reg->Register.WriteMask;
+	while (writemask) {
+		unsigned chan_index = u_bit_scan(&writemask);
 		LLVMValueRef value = dst[chan_index];
 
 		if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3))
 			continue;
 		if (inst->Instruction.Saturate)
 			value = ac_build_clamp(&ctx->ac, value);
 
 		if (reg->Register.File == TGSI_FILE_ADDRESS) {
 			temp_ptr = ctx->addrs[reg->Register.Index][chan_index];
 			LLVMBuildStore(builder, value, temp_ptr);
-- 
2.11.0



More information about the mesa-dev mailing list