[Mesa-dev] [PATCH 04/20] ac: add emit_vertex to the abi

Fri Nov 10 03:13:38 UTC 2017

---
 src/amd/common/ac_nir_to_llvm.c          | 11 +++++-----
 src/amd/common/ac_shader_abi.h           |  4 ++++
 src/gallium/drivers/radeonsi/si_shader.c | 35 +++++++++++++++++++-------------
 3 files changed, 31 insertions(+), 19 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 2ae656693f..36f471dcc7 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -3902,46 +3902,45 @@ static LLVMValueRef visit_interp(struct nir_to_llvm_context *ctx,
 							      LLVMConstInt(ctx->ac.i32, 2, false),
 							      llvm_chan, attr_number,
 							      ctx->prim_mask);
 		}
 	}
 	return build_varying_gather_values(&ctx->ac, result, instr->num_components,
 					   instr->variables[0]->var->data.location_frac);
 }
 
 static void
-visit_emit_vertex(struct nir_to_llvm_context *ctx,
-		  const nir_intrinsic_instr *instr)
+visit_emit_vertex(struct ac_shader_abi *abi, unsigned stream, LLVMValueRef *addrs)
 {
 	LLVMValueRef gs_next_vertex;
 	LLVMValueRef can_emit;
 	int idx;
+	struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi);
 
-	assert(instr->const_index[0] == 0);
 	/* Write vertex attribute values to GSVS ring */
 	gs_next_vertex = LLVMBuildLoad(ctx->builder,
 				       ctx->gs_next_vertex,
 				       "");
 
 	/* If this thread has already emitted the declared maximum number of
 	 * vertices, kill it: excessive vertex emissions are not supposed to
 	 * have any effect, and GS threads have no externally observable
 	 * effects other than emitting vertices.
 	 */
 	can_emit = LLVMBuildICmp(ctx->builder, LLVMIntULT, gs_next_vertex,
 				 LLVMConstInt(ctx->ac.i32, ctx->gs_max_out_vertices, false), "");
 	ac_build_kill_if_false(&ctx->ac, can_emit);
 
 	/* loop num outputs */
 	idx = 0;
 	for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
-		LLVMValueRef *out_ptr = &ctx->nir->outputs[i * 4];
+		LLVMValueRef *out_ptr = &addrs[i * 4];
 		int length = 4;
 		int slot = idx;
 		int slot_inc = 1;
 
 		if (!(ctx->output_mask & (1ull << i)))
 			continue;
 
 		if (i == VARYING_SLOT_CLIP_DIST0) {
 			/* pack clip and cull into a single set of slots */
 			length = ctx->num_output_clips + ctx->num_output_culls;
@@ -4160,21 +4159,22 @@ static void visit_intrinsic(struct ac_nir_context *ctx,
 	case nir_intrinsic_var_atomic_exchange:
 	case nir_intrinsic_var_atomic_comp_swap:
 		result = visit_var_atomic(ctx->nctx, instr);
 		break;
 	case nir_intrinsic_interp_var_at_centroid:
 	case nir_intrinsic_interp_var_at_sample:
 	case nir_intrinsic_interp_var_at_offset:
 		result = visit_interp(ctx->nctx, instr);
 		break;
 	case nir_intrinsic_emit_vertex:
-		visit_emit_vertex(ctx->nctx, instr);
+		assert(instr->const_index[0] == 0);
+		ctx->abi->emit_vertex(ctx->abi, 0, ctx->outputs);
 		break;
 	case nir_intrinsic_end_primitive:
 		visit_end_primitive(ctx->nctx, instr);
 		break;
 	case nir_intrinsic_load_tess_coord:
 		result = visit_load_tess_coord(ctx->nctx, instr);
 		break;
 	case nir_intrinsic_load_patch_vertices_in:
 		result = LLVMConstInt(ctx->ac.i32, ctx->nctx->options->key.tcs.input_vertices, false);
 		break;
@@ -6490,20 +6490,21 @@ LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm,
 		ctx.max_workgroup_size = MAX2(ctx.max_workgroup_size,
 		                              ac_nir_get_max_workgroup_size(ctx.options->chip_class,
 		                                                            shaders[i]));
 	}
 
 	create_function(&ctx, shaders[shader_count - 1]->info.stage, shader_count >= 2,
 	                shader_count >= 2 ? shaders[shader_count - 2]->info.stage  : MESA_SHADER_VERTEX);
 
 	ctx.abi.inputs = &ctx.inputs[0];
 	ctx.abi.emit_outputs = handle_shader_outputs_post;
+	ctx.abi.emit_vertex = visit_emit_vertex;
 	ctx.abi.load_ssbo = radv_load_ssbo;
 	ctx.abi.load_sampler_desc = radv_get_sampler_desc;
 	ctx.abi.clamp_shadow_reference = false;
 
 	if (shader_count >= 2)
 		ac_init_exec_full_mask(&ctx.ac);
 
 	if (ctx.ac.chip_class == GFX9 &&
 	    shaders[shader_count - 1]->info.stage == MESA_SHADER_TESS_CTRL)
 		ac_nir_fixup_ls_hs_input_vgprs(&ctx);
diff --git a/src/amd/common/ac_shader_abi.h b/src/amd/common/ac_shader_abi.h
index 14517d5570..27586d0212 100644
--- a/src/amd/common/ac_shader_abi.h
+++ b/src/amd/common/ac_shader_abi.h
@@ -51,20 +51,24 @@ struct ac_shader_abi {
 	 *
 	 * Currently only used for NIR shaders; indexed by variables'
 	 * driver_location.
 	 */
 	LLVMValueRef *inputs;
 
 	void (*emit_outputs)(struct ac_shader_abi *abi,
 			     unsigned max_outputs,
 			     LLVMValueRef *addrs);
 
+	void (*emit_vertex)(struct ac_shader_abi *abi,
+			    unsigned stream,
+			    LLVMValueRef *addrs);
+
 	LLVMValueRef (*load_ubo)(struct ac_shader_abi *abi, LLVMValueRef index);
 
 	/**
 	 * Load the descriptor for the given buffer.
 	 *
 	 * \param buffer the buffer as presented in NIR: this is the descriptor
 	 *               in Vulkan, and the buffer index in OpenGL/Gallium
 	 * \param write whether buffer contents will be written
 	 */
 	LLVMValueRef (*load_ssbo)(struct ac_shader_abi *abi,
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index d234e08071..47ca64fdea 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -4031,39 +4031,35 @@ static unsigned si_llvm_get_stream(struct lp_build_tgsi_context *bld_base,
 	unsigned stream;
 
 	assert(src0.File == TGSI_FILE_IMMEDIATE);
 
 	imm = ctx->imms[src0.Index * TGSI_NUM_CHANNELS + src0.SwizzleX];
 	stream = LLVMConstIntGetZExtValue(imm) & 0x3;
 	return stream;
 }
 
 /* Emit one vertex from the geometry shader */
-static void si_llvm_emit_vertex(
-	const struct lp_build_tgsi_action *action,
-	struct lp_build_tgsi_context *bld_base,
-	struct lp_build_emit_data *emit_data)
+static void si_llvm_emit_vertex(struct ac_shader_abi *abi,
+				unsigned stream,
+				LLVMValueRef *addrs)
 {
-	struct si_shader_context *ctx = si_shader_context(bld_base);
-	struct lp_build_context *uint = &bld_base->uint_bld;
+	struct si_shader_context *ctx = si_shader_context_from_abi(abi);
+	struct tgsi_shader_info *info = &ctx->shader->selector->info;
+	struct lp_build_context *uint = &ctx->bld_base.uint_bld;
 	struct si_shader *shader = ctx->shader;
-	struct tgsi_shader_info *info = &shader->selector->info;
 	struct lp_build_if_state if_state;
 	LLVMValueRef soffset = LLVMGetParam(ctx->main_fn,
 					    ctx->param_gs2vs_offset);
 	LLVMValueRef gs_next_vertex;
 	LLVMValueRef can_emit;
 	unsigned chan, offset;
 	int i;
-	unsigned stream;
-
-	stream = si_llvm_get_stream(bld_base, emit_data);
 
 	/* Write vertex attribute values to GSVS ring */
 	gs_next_vertex = LLVMBuildLoad(ctx->ac.builder,
 				       ctx->gs_next_vertex[stream],
 				       "");
 
 	/* If this thread has already emitted the declared maximum number of
 	 * vertices, skip the write: excessive vertex emissions are not
 	 * supposed to have any effect.
 	 *
@@ -4077,28 +4073,26 @@ static void si_llvm_emit_vertex(
 
 	bool use_kill = !info->writes_memory;
 	if (use_kill) {
 		ac_build_kill_if_false(&ctx->ac, can_emit);
 	} else {
 		lp_build_if(&if_state, &ctx->gallivm, can_emit);
 	}
 
 	offset = 0;
 	for (i = 0; i < info->num_outputs; i++) {
-		LLVMValueRef *out_ptr = ctx->outputs[i];
-
 		for (chan = 0; chan < 4; chan++) {
 			if (!(info->output_usagemask[i] & (1 << chan)) ||
 			    ((info->output_streams[i] >> (2 * chan)) & 3) != stream)
 				continue;
 
-			LLVMValueRef out_val = LLVMBuildLoad(ctx->ac.builder, out_ptr[chan], "");
+			LLVMValueRef out_val = LLVMBuildLoad(ctx->ac.builder, addrs[4 * i + chan], "");
 			LLVMValueRef voffset =
 				LLVMConstInt(ctx->i32, offset *
 					     shader->selector->gs_max_out_vertices, 0);
 			offset++;
 
 			voffset = lp_build_add(uint, voffset, gs_next_vertex);
 			voffset = lp_build_mul_imm(uint, voffset, 4);
 
 			out_val = ac_to_integer(&ctx->ac, out_val);
 
@@ -4115,20 +4109,32 @@ static void si_llvm_emit_vertex(
 
 	LLVMBuildStore(ctx->ac.builder, gs_next_vertex, ctx->gs_next_vertex[stream]);
 
 	/* Signal vertex emission */
 	ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_EMIT | AC_SENDMSG_GS | (stream << 8),
 			 si_get_gs_wave_id(ctx));
 	if (!use_kill)
 		lp_build_endif(&if_state);
 }
 
+/* Emit one vertex from the geometry shader */
+static void si_tgsi_emit_vertex(
+	const struct lp_build_tgsi_action *action,
+	struct lp_build_tgsi_context *bld_base,
+	struct lp_build_emit_data *emit_data)
+{
+	struct si_shader_context *ctx = si_shader_context(bld_base);
+	unsigned stream = si_llvm_get_stream(bld_base, emit_data);
+
+	si_llvm_emit_vertex(&ctx->abi, stream, ctx->outputs[0]);
+}
+
 /* Cut one primitive from the geometry shader */
 static void si_llvm_emit_primitive(
 	const struct lp_build_tgsi_action *action,
 	struct lp_build_tgsi_context *bld_base,
 	struct lp_build_emit_data *emit_data)
 {
 	struct si_shader_context *ctx = si_shader_context(bld_base);
 	unsigned stream;
 
 	/* Signal primitive cut */
@@ -5634,21 +5640,21 @@ static void si_init_shader_ctx(struct si_shader_context *ctx,
 	bld_base->op_actions[TGSI_OPCODE_VOTE_ALL].emit = vote_all_emit;
 	bld_base->op_actions[TGSI_OPCODE_VOTE_ANY].emit = vote_any_emit;
 	bld_base->op_actions[TGSI_OPCODE_VOTE_EQ].emit = vote_eq_emit;
 	bld_base->op_actions[TGSI_OPCODE_BALLOT].emit = ballot_emit;
 	bld_base->op_actions[TGSI_OPCODE_READ_FIRST].intr_name = "llvm.amdgcn.readfirstlane";
 	bld_base->op_actions[TGSI_OPCODE_READ_FIRST].emit = read_lane_emit;
 	bld_base->op_actions[TGSI_OPCODE_READ_INVOC].intr_name = "llvm.amdgcn.readlane";
 	bld_base->op_actions[TGSI_OPCODE_READ_INVOC].fetch_args = read_invoc_fetch_args;
 	bld_base->op_actions[TGSI_OPCODE_READ_INVOC].emit = read_lane_emit;
 
-	bld_base->op_actions[TGSI_OPCODE_EMIT].emit = si_llvm_emit_vertex;
+	bld_base->op_actions[TGSI_OPCODE_EMIT].emit = si_tgsi_emit_vertex;
 	bld_base->op_actions[TGSI_OPCODE_ENDPRIM].emit = si_llvm_emit_primitive;
 	bld_base->op_actions[TGSI_OPCODE_BARRIER].emit = si_llvm_emit_barrier;
 }
 
 static void si_optimize_vs_outputs(struct si_shader_context *ctx)
 {
 	struct si_shader *shader = ctx->shader;
 	struct tgsi_shader_info *info = &shader->selector->info;
 
 	if ((ctx->type != PIPE_SHADER_VERTEX &&
@@ -5748,20 +5754,21 @@ static bool si_compile_tgsi_main(struct si_shader_context *ctx,
 		bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = fetch_input_tes;
 		if (shader->key.as_es)
 			bld_base->emit_epilogue = si_llvm_emit_es_epilogue;
 		else {
 			ctx->abi.emit_outputs = si_llvm_emit_vs_epilogue;
 			bld_base->emit_epilogue = si_tgsi_emit_epilogue;
 		}
 		break;
 	case PIPE_SHADER_GEOMETRY:
 		bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = fetch_input_gs;
+		ctx->abi.emit_vertex = si_llvm_emit_vertex;
 		bld_base->emit_epilogue = si_llvm_emit_gs_epilogue;
 		break;
 	case PIPE_SHADER_FRAGMENT:
 		ctx->load_input = declare_input_fs;
 		ctx->abi.emit_outputs = si_llvm_return_fs_outputs;
 		bld_base->emit_epilogue = si_tgsi_emit_epilogue;
 		break;
 	case PIPE_SHADER_COMPUTE:
 		break;
 	default:
-- 
2.14.3