[Mesa-dev] [PATCH 04/20] ac: add emit_vertex to the abi
Timothy Arceri
tarceri at itsqueeze.com
Fri Nov 10 03:13:38 UTC 2017
---
src/amd/common/ac_nir_to_llvm.c | 11 +++++-----
src/amd/common/ac_shader_abi.h | 4 ++++
src/gallium/drivers/radeonsi/si_shader.c | 35 +++++++++++++++++++-------------
3 files changed, 31 insertions(+), 19 deletions(-)
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 2ae656693f..36f471dcc7 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -3902,46 +3902,45 @@ static LLVMValueRef visit_interp(struct nir_to_llvm_context *ctx,
LLVMConstInt(ctx->ac.i32, 2, false),
llvm_chan, attr_number,
ctx->prim_mask);
}
}
return build_varying_gather_values(&ctx->ac, result, instr->num_components,
instr->variables[0]->var->data.location_frac);
}
static void
-visit_emit_vertex(struct nir_to_llvm_context *ctx,
- const nir_intrinsic_instr *instr)
+visit_emit_vertex(struct ac_shader_abi *abi, unsigned stream, LLVMValueRef *addrs)
{
LLVMValueRef gs_next_vertex;
LLVMValueRef can_emit;
int idx;
+ struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi);
- assert(instr->const_index[0] == 0);
/* Write vertex attribute values to GSVS ring */
gs_next_vertex = LLVMBuildLoad(ctx->builder,
ctx->gs_next_vertex,
"");
/* If this thread has already emitted the declared maximum number of
* vertices, kill it: excessive vertex emissions are not supposed to
* have any effect, and GS threads have no externally observable
* effects other than emitting vertices.
*/
can_emit = LLVMBuildICmp(ctx->builder, LLVMIntULT, gs_next_vertex,
LLVMConstInt(ctx->ac.i32, ctx->gs_max_out_vertices, false), "");
ac_build_kill_if_false(&ctx->ac, can_emit);
/* loop num outputs */
idx = 0;
for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
- LLVMValueRef *out_ptr = &ctx->nir->outputs[i * 4];
+ LLVMValueRef *out_ptr = &addrs[i * 4];
int length = 4;
int slot = idx;
int slot_inc = 1;
if (!(ctx->output_mask & (1ull << i)))
continue;
if (i == VARYING_SLOT_CLIP_DIST0) {
/* pack clip and cull into a single set of slots */
length = ctx->num_output_clips + ctx->num_output_culls;
@@ -4160,21 +4159,22 @@ static void visit_intrinsic(struct ac_nir_context *ctx,
case nir_intrinsic_var_atomic_exchange:
case nir_intrinsic_var_atomic_comp_swap:
result = visit_var_atomic(ctx->nctx, instr);
break;
case nir_intrinsic_interp_var_at_centroid:
case nir_intrinsic_interp_var_at_sample:
case nir_intrinsic_interp_var_at_offset:
result = visit_interp(ctx->nctx, instr);
break;
case nir_intrinsic_emit_vertex:
- visit_emit_vertex(ctx->nctx, instr);
+ assert(instr->const_index[0] == 0);
+ ctx->abi->emit_vertex(ctx->abi, 0, ctx->outputs);
break;
case nir_intrinsic_end_primitive:
visit_end_primitive(ctx->nctx, instr);
break;
case nir_intrinsic_load_tess_coord:
result = visit_load_tess_coord(ctx->nctx, instr);
break;
case nir_intrinsic_load_patch_vertices_in:
result = LLVMConstInt(ctx->ac.i32, ctx->nctx->options->key.tcs.input_vertices, false);
break;
@@ -6490,20 +6490,21 @@ LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm,
ctx.max_workgroup_size = MAX2(ctx.max_workgroup_size,
ac_nir_get_max_workgroup_size(ctx.options->chip_class,
shaders[i]));
}
create_function(&ctx, shaders[shader_count - 1]->info.stage, shader_count >= 2,
shader_count >= 2 ? shaders[shader_count - 2]->info.stage : MESA_SHADER_VERTEX);
ctx.abi.inputs = &ctx.inputs[0];
ctx.abi.emit_outputs = handle_shader_outputs_post;
+ ctx.abi.emit_vertex = visit_emit_vertex;
ctx.abi.load_ssbo = radv_load_ssbo;
ctx.abi.load_sampler_desc = radv_get_sampler_desc;
ctx.abi.clamp_shadow_reference = false;
if (shader_count >= 2)
ac_init_exec_full_mask(&ctx.ac);
if (ctx.ac.chip_class == GFX9 &&
shaders[shader_count - 1]->info.stage == MESA_SHADER_TESS_CTRL)
ac_nir_fixup_ls_hs_input_vgprs(&ctx);
diff --git a/src/amd/common/ac_shader_abi.h b/src/amd/common/ac_shader_abi.h
index 14517d5570..27586d0212 100644
--- a/src/amd/common/ac_shader_abi.h
+++ b/src/amd/common/ac_shader_abi.h
@@ -51,20 +51,24 @@ struct ac_shader_abi {
*
* Currently only used for NIR shaders; indexed by variables'
* driver_location.
*/
LLVMValueRef *inputs;
void (*emit_outputs)(struct ac_shader_abi *abi,
unsigned max_outputs,
LLVMValueRef *addrs);
+ void (*emit_vertex)(struct ac_shader_abi *abi,
+ unsigned stream,
+ LLVMValueRef *addrs);
+
LLVMValueRef (*load_ubo)(struct ac_shader_abi *abi, LLVMValueRef index);
/**
* Load the descriptor for the given buffer.
*
* \param buffer the buffer as presented in NIR: this is the descriptor
* in Vulkan, and the buffer index in OpenGL/Gallium
* \param write whether buffer contents will be written
*/
LLVMValueRef (*load_ssbo)(struct ac_shader_abi *abi,
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index d234e08071..47ca64fdea 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -4031,39 +4031,35 @@ static unsigned si_llvm_get_stream(struct lp_build_tgsi_context *bld_base,
unsigned stream;
assert(src0.File == TGSI_FILE_IMMEDIATE);
imm = ctx->imms[src0.Index * TGSI_NUM_CHANNELS + src0.SwizzleX];
stream = LLVMConstIntGetZExtValue(imm) & 0x3;
return stream;
}
/* Emit one vertex from the geometry shader */
-static void si_llvm_emit_vertex(
- const struct lp_build_tgsi_action *action,
- struct lp_build_tgsi_context *bld_base,
- struct lp_build_emit_data *emit_data)
+static void si_llvm_emit_vertex(struct ac_shader_abi *abi,
+ unsigned stream,
+ LLVMValueRef *addrs)
{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- struct lp_build_context *uint = &bld_base->uint_bld;
+ struct si_shader_context *ctx = si_shader_context_from_abi(abi);
+ struct tgsi_shader_info *info = &ctx->shader->selector->info;
+ struct lp_build_context *uint = &ctx->bld_base.uint_bld;
struct si_shader *shader = ctx->shader;
- struct tgsi_shader_info *info = &shader->selector->info;
struct lp_build_if_state if_state;
LLVMValueRef soffset = LLVMGetParam(ctx->main_fn,
ctx->param_gs2vs_offset);
LLVMValueRef gs_next_vertex;
LLVMValueRef can_emit;
unsigned chan, offset;
int i;
- unsigned stream;
-
- stream = si_llvm_get_stream(bld_base, emit_data);
/* Write vertex attribute values to GSVS ring */
gs_next_vertex = LLVMBuildLoad(ctx->ac.builder,
ctx->gs_next_vertex[stream],
"");
/* If this thread has already emitted the declared maximum number of
* vertices, skip the write: excessive vertex emissions are not
* supposed to have any effect.
*
@@ -4077,28 +4073,26 @@ static void si_llvm_emit_vertex(
bool use_kill = !info->writes_memory;
if (use_kill) {
ac_build_kill_if_false(&ctx->ac, can_emit);
} else {
lp_build_if(&if_state, &ctx->gallivm, can_emit);
}
offset = 0;
for (i = 0; i < info->num_outputs; i++) {
- LLVMValueRef *out_ptr = ctx->outputs[i];
-
for (chan = 0; chan < 4; chan++) {
if (!(info->output_usagemask[i] & (1 << chan)) ||
((info->output_streams[i] >> (2 * chan)) & 3) != stream)
continue;
- LLVMValueRef out_val = LLVMBuildLoad(ctx->ac.builder, out_ptr[chan], "");
+ LLVMValueRef out_val = LLVMBuildLoad(ctx->ac.builder, addrs[4 * i + chan], "");
LLVMValueRef voffset =
LLVMConstInt(ctx->i32, offset *
shader->selector->gs_max_out_vertices, 0);
offset++;
voffset = lp_build_add(uint, voffset, gs_next_vertex);
voffset = lp_build_mul_imm(uint, voffset, 4);
out_val = ac_to_integer(&ctx->ac, out_val);
@@ -4115,20 +4109,32 @@ static void si_llvm_emit_vertex(
LLVMBuildStore(ctx->ac.builder, gs_next_vertex, ctx->gs_next_vertex[stream]);
/* Signal vertex emission */
ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_EMIT | AC_SENDMSG_GS | (stream << 8),
si_get_gs_wave_id(ctx));
if (!use_kill)
lp_build_endif(&if_state);
}
+/* Emit one vertex from the geometry shader */
+static void si_tgsi_emit_vertex(
+ const struct lp_build_tgsi_action *action,
+ struct lp_build_tgsi_context *bld_base,
+ struct lp_build_emit_data *emit_data)
+{
+ struct si_shader_context *ctx = si_shader_context(bld_base);
+ unsigned stream = si_llvm_get_stream(bld_base, emit_data);
+
+ si_llvm_emit_vertex(&ctx->abi, stream, ctx->outputs[0]);
+}
+
/* Cut one primitive from the geometry shader */
static void si_llvm_emit_primitive(
const struct lp_build_tgsi_action *action,
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
unsigned stream;
/* Signal primitive cut */
@@ -5634,21 +5640,21 @@ static void si_init_shader_ctx(struct si_shader_context *ctx,
bld_base->op_actions[TGSI_OPCODE_VOTE_ALL].emit = vote_all_emit;
bld_base->op_actions[TGSI_OPCODE_VOTE_ANY].emit = vote_any_emit;
bld_base->op_actions[TGSI_OPCODE_VOTE_EQ].emit = vote_eq_emit;
bld_base->op_actions[TGSI_OPCODE_BALLOT].emit = ballot_emit;
bld_base->op_actions[TGSI_OPCODE_READ_FIRST].intr_name = "llvm.amdgcn.readfirstlane";
bld_base->op_actions[TGSI_OPCODE_READ_FIRST].emit = read_lane_emit;
bld_base->op_actions[TGSI_OPCODE_READ_INVOC].intr_name = "llvm.amdgcn.readlane";
bld_base->op_actions[TGSI_OPCODE_READ_INVOC].fetch_args = read_invoc_fetch_args;
bld_base->op_actions[TGSI_OPCODE_READ_INVOC].emit = read_lane_emit;
- bld_base->op_actions[TGSI_OPCODE_EMIT].emit = si_llvm_emit_vertex;
+ bld_base->op_actions[TGSI_OPCODE_EMIT].emit = si_tgsi_emit_vertex;
bld_base->op_actions[TGSI_OPCODE_ENDPRIM].emit = si_llvm_emit_primitive;
bld_base->op_actions[TGSI_OPCODE_BARRIER].emit = si_llvm_emit_barrier;
}
static void si_optimize_vs_outputs(struct si_shader_context *ctx)
{
struct si_shader *shader = ctx->shader;
struct tgsi_shader_info *info = &shader->selector->info;
if ((ctx->type != PIPE_SHADER_VERTEX &&
@@ -5748,20 +5754,21 @@ static bool si_compile_tgsi_main(struct si_shader_context *ctx,
bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = fetch_input_tes;
if (shader->key.as_es)
bld_base->emit_epilogue = si_llvm_emit_es_epilogue;
else {
ctx->abi.emit_outputs = si_llvm_emit_vs_epilogue;
bld_base->emit_epilogue = si_tgsi_emit_epilogue;
}
break;
case PIPE_SHADER_GEOMETRY:
bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = fetch_input_gs;
+ ctx->abi.emit_vertex = si_llvm_emit_vertex;
bld_base->emit_epilogue = si_llvm_emit_gs_epilogue;
break;
case PIPE_SHADER_FRAGMENT:
ctx->load_input = declare_input_fs;
ctx->abi.emit_outputs = si_llvm_return_fs_outputs;
bld_base->emit_epilogue = si_tgsi_emit_epilogue;
break;
case PIPE_SHADER_COMPUTE:
break;
default:
--
2.14.3
More information about the mesa-dev
mailing list