[Mesa-dev] [PATCH 04/20] ac: add emit_vertex to the abi
Marek Olšák
maraeo at gmail.com
Fri Nov 10 16:55:45 UTC 2017
Patches 2 - 4:
Reviewed-by: Marek Olšák <marek.olsak at amd.com>
Marek
On Fri, Nov 10, 2017 at 4:13 AM, Timothy Arceri <tarceri at itsqueeze.com> wrote:
> ---
> src/amd/common/ac_nir_to_llvm.c | 11 +++++-----
> src/amd/common/ac_shader_abi.h | 4 ++++
> src/gallium/drivers/radeonsi/si_shader.c | 35 +++++++++++++++++++-------------
> 3 files changed, 31 insertions(+), 19 deletions(-)
>
> diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
> index 2ae656693f..36f471dcc7 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -3902,46 +3902,45 @@ static LLVMValueRef visit_interp(struct nir_to_llvm_context *ctx,
> LLVMConstInt(ctx->ac.i32, 2, false),
> llvm_chan, attr_number,
> ctx->prim_mask);
> }
> }
> return build_varying_gather_values(&ctx->ac, result, instr->num_components,
> instr->variables[0]->var->data.location_frac);
> }
>
> static void
> -visit_emit_vertex(struct nir_to_llvm_context *ctx,
> - const nir_intrinsic_instr *instr)
> +visit_emit_vertex(struct ac_shader_abi *abi, unsigned stream, LLVMValueRef *addrs)
> {
> LLVMValueRef gs_next_vertex;
> LLVMValueRef can_emit;
> int idx;
> + struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi);
>
> - assert(instr->const_index[0] == 0);
> /* Write vertex attribute values to GSVS ring */
> gs_next_vertex = LLVMBuildLoad(ctx->builder,
> ctx->gs_next_vertex,
> "");
>
> /* If this thread has already emitted the declared maximum number of
> * vertices, kill it: excessive vertex emissions are not supposed to
> * have any effect, and GS threads have no externally observable
> * effects other than emitting vertices.
> */
> can_emit = LLVMBuildICmp(ctx->builder, LLVMIntULT, gs_next_vertex,
> LLVMConstInt(ctx->ac.i32, ctx->gs_max_out_vertices, false), "");
> ac_build_kill_if_false(&ctx->ac, can_emit);
>
> /* loop num outputs */
> idx = 0;
> for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
> - LLVMValueRef *out_ptr = &ctx->nir->outputs[i * 4];
> + LLVMValueRef *out_ptr = &addrs[i * 4];
> int length = 4;
> int slot = idx;
> int slot_inc = 1;
>
> if (!(ctx->output_mask & (1ull << i)))
> continue;
>
> if (i == VARYING_SLOT_CLIP_DIST0) {
> /* pack clip and cull into a single set of slots */
> length = ctx->num_output_clips + ctx->num_output_culls;
> @@ -4160,21 +4159,22 @@ static void visit_intrinsic(struct ac_nir_context *ctx,
> case nir_intrinsic_var_atomic_exchange:
> case nir_intrinsic_var_atomic_comp_swap:
> result = visit_var_atomic(ctx->nctx, instr);
> break;
> case nir_intrinsic_interp_var_at_centroid:
> case nir_intrinsic_interp_var_at_sample:
> case nir_intrinsic_interp_var_at_offset:
> result = visit_interp(ctx->nctx, instr);
> break;
> case nir_intrinsic_emit_vertex:
> - visit_emit_vertex(ctx->nctx, instr);
> + assert(instr->const_index[0] == 0);
> + ctx->abi->emit_vertex(ctx->abi, 0, ctx->outputs);
> break;
> case nir_intrinsic_end_primitive:
> visit_end_primitive(ctx->nctx, instr);
> break;
> case nir_intrinsic_load_tess_coord:
> result = visit_load_tess_coord(ctx->nctx, instr);
> break;
> case nir_intrinsic_load_patch_vertices_in:
> result = LLVMConstInt(ctx->ac.i32, ctx->nctx->options->key.tcs.input_vertices, false);
> break;
> @@ -6490,20 +6490,21 @@ LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm,
> ctx.max_workgroup_size = MAX2(ctx.max_workgroup_size,
> ac_nir_get_max_workgroup_size(ctx.options->chip_class,
> shaders[i]));
> }
>
> create_function(&ctx, shaders[shader_count - 1]->info.stage, shader_count >= 2,
> shader_count >= 2 ? shaders[shader_count - 2]->info.stage : MESA_SHADER_VERTEX);
>
> ctx.abi.inputs = &ctx.inputs[0];
> ctx.abi.emit_outputs = handle_shader_outputs_post;
> + ctx.abi.emit_vertex = visit_emit_vertex;
> ctx.abi.load_ssbo = radv_load_ssbo;
> ctx.abi.load_sampler_desc = radv_get_sampler_desc;
> ctx.abi.clamp_shadow_reference = false;
>
> if (shader_count >= 2)
> ac_init_exec_full_mask(&ctx.ac);
>
> if (ctx.ac.chip_class == GFX9 &&
> shaders[shader_count - 1]->info.stage == MESA_SHADER_TESS_CTRL)
> ac_nir_fixup_ls_hs_input_vgprs(&ctx);
> diff --git a/src/amd/common/ac_shader_abi.h b/src/amd/common/ac_shader_abi.h
> index 14517d5570..27586d0212 100644
> --- a/src/amd/common/ac_shader_abi.h
> +++ b/src/amd/common/ac_shader_abi.h
> @@ -51,20 +51,24 @@ struct ac_shader_abi {
> *
> * Currently only used for NIR shaders; indexed by variables'
> * driver_location.
> */
> LLVMValueRef *inputs;
>
> void (*emit_outputs)(struct ac_shader_abi *abi,
> unsigned max_outputs,
> LLVMValueRef *addrs);
>
> + void (*emit_vertex)(struct ac_shader_abi *abi,
> + unsigned stream,
> + LLVMValueRef *addrs);
> +
> LLVMValueRef (*load_ubo)(struct ac_shader_abi *abi, LLVMValueRef index);
>
> /**
> * Load the descriptor for the given buffer.
> *
> * \param buffer the buffer as presented in NIR: this is the descriptor
> * in Vulkan, and the buffer index in OpenGL/Gallium
> * \param write whether buffer contents will be written
> */
> LLVMValueRef (*load_ssbo)(struct ac_shader_abi *abi,
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
> index d234e08071..47ca64fdea 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -4031,39 +4031,35 @@ static unsigned si_llvm_get_stream(struct lp_build_tgsi_context *bld_base,
> unsigned stream;
>
> assert(src0.File == TGSI_FILE_IMMEDIATE);
>
> imm = ctx->imms[src0.Index * TGSI_NUM_CHANNELS + src0.SwizzleX];
> stream = LLVMConstIntGetZExtValue(imm) & 0x3;
> return stream;
> }
>
> /* Emit one vertex from the geometry shader */
> -static void si_llvm_emit_vertex(
> - const struct lp_build_tgsi_action *action,
> - struct lp_build_tgsi_context *bld_base,
> - struct lp_build_emit_data *emit_data)
> +static void si_llvm_emit_vertex(struct ac_shader_abi *abi,
> + unsigned stream,
> + LLVMValueRef *addrs)
> {
> - struct si_shader_context *ctx = si_shader_context(bld_base);
> - struct lp_build_context *uint = &bld_base->uint_bld;
> + struct si_shader_context *ctx = si_shader_context_from_abi(abi);
> + struct tgsi_shader_info *info = &ctx->shader->selector->info;
> + struct lp_build_context *uint = &ctx->bld_base.uint_bld;
> struct si_shader *shader = ctx->shader;
> - struct tgsi_shader_info *info = &shader->selector->info;
> struct lp_build_if_state if_state;
> LLVMValueRef soffset = LLVMGetParam(ctx->main_fn,
> ctx->param_gs2vs_offset);
> LLVMValueRef gs_next_vertex;
> LLVMValueRef can_emit;
> unsigned chan, offset;
> int i;
> - unsigned stream;
> -
> - stream = si_llvm_get_stream(bld_base, emit_data);
>
> /* Write vertex attribute values to GSVS ring */
> gs_next_vertex = LLVMBuildLoad(ctx->ac.builder,
> ctx->gs_next_vertex[stream],
> "");
>
> /* If this thread has already emitted the declared maximum number of
> * vertices, skip the write: excessive vertex emissions are not
> * supposed to have any effect.
> *
> @@ -4077,28 +4073,26 @@ static void si_llvm_emit_vertex(
>
> bool use_kill = !info->writes_memory;
> if (use_kill) {
> ac_build_kill_if_false(&ctx->ac, can_emit);
> } else {
> lp_build_if(&if_state, &ctx->gallivm, can_emit);
> }
>
> offset = 0;
> for (i = 0; i < info->num_outputs; i++) {
> - LLVMValueRef *out_ptr = ctx->outputs[i];
> -
> for (chan = 0; chan < 4; chan++) {
> if (!(info->output_usagemask[i] & (1 << chan)) ||
> ((info->output_streams[i] >> (2 * chan)) & 3) != stream)
> continue;
>
> - LLVMValueRef out_val = LLVMBuildLoad(ctx->ac.builder, out_ptr[chan], "");
> + LLVMValueRef out_val = LLVMBuildLoad(ctx->ac.builder, addrs[4 * i + chan], "");
> LLVMValueRef voffset =
> LLVMConstInt(ctx->i32, offset *
> shader->selector->gs_max_out_vertices, 0);
> offset++;
>
> voffset = lp_build_add(uint, voffset, gs_next_vertex);
> voffset = lp_build_mul_imm(uint, voffset, 4);
>
> out_val = ac_to_integer(&ctx->ac, out_val);
>
> @@ -4115,20 +4109,32 @@ static void si_llvm_emit_vertex(
>
> LLVMBuildStore(ctx->ac.builder, gs_next_vertex, ctx->gs_next_vertex[stream]);
>
> /* Signal vertex emission */
> ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_EMIT | AC_SENDMSG_GS | (stream << 8),
> si_get_gs_wave_id(ctx));
> if (!use_kill)
> lp_build_endif(&if_state);
> }
>
> +/* Emit one vertex from the geometry shader */
> +static void si_tgsi_emit_vertex(
> + const struct lp_build_tgsi_action *action,
> + struct lp_build_tgsi_context *bld_base,
> + struct lp_build_emit_data *emit_data)
> +{
> + struct si_shader_context *ctx = si_shader_context(bld_base);
> + unsigned stream = si_llvm_get_stream(bld_base, emit_data);
> +
> + si_llvm_emit_vertex(&ctx->abi, stream, ctx->outputs[0]);
> +}
> +
> /* Cut one primitive from the geometry shader */
> static void si_llvm_emit_primitive(
> const struct lp_build_tgsi_action *action,
> struct lp_build_tgsi_context *bld_base,
> struct lp_build_emit_data *emit_data)
> {
> struct si_shader_context *ctx = si_shader_context(bld_base);
> unsigned stream;
>
> /* Signal primitive cut */
> @@ -5634,21 +5640,21 @@ static void si_init_shader_ctx(struct si_shader_context *ctx,
> bld_base->op_actions[TGSI_OPCODE_VOTE_ALL].emit = vote_all_emit;
> bld_base->op_actions[TGSI_OPCODE_VOTE_ANY].emit = vote_any_emit;
> bld_base->op_actions[TGSI_OPCODE_VOTE_EQ].emit = vote_eq_emit;
> bld_base->op_actions[TGSI_OPCODE_BALLOT].emit = ballot_emit;
> bld_base->op_actions[TGSI_OPCODE_READ_FIRST].intr_name = "llvm.amdgcn.readfirstlane";
> bld_base->op_actions[TGSI_OPCODE_READ_FIRST].emit = read_lane_emit;
> bld_base->op_actions[TGSI_OPCODE_READ_INVOC].intr_name = "llvm.amdgcn.readlane";
> bld_base->op_actions[TGSI_OPCODE_READ_INVOC].fetch_args = read_invoc_fetch_args;
> bld_base->op_actions[TGSI_OPCODE_READ_INVOC].emit = read_lane_emit;
>
> - bld_base->op_actions[TGSI_OPCODE_EMIT].emit = si_llvm_emit_vertex;
> + bld_base->op_actions[TGSI_OPCODE_EMIT].emit = si_tgsi_emit_vertex;
> bld_base->op_actions[TGSI_OPCODE_ENDPRIM].emit = si_llvm_emit_primitive;
> bld_base->op_actions[TGSI_OPCODE_BARRIER].emit = si_llvm_emit_barrier;
> }
>
> static void si_optimize_vs_outputs(struct si_shader_context *ctx)
> {
> struct si_shader *shader = ctx->shader;
> struct tgsi_shader_info *info = &shader->selector->info;
>
> if ((ctx->type != PIPE_SHADER_VERTEX &&
> @@ -5748,20 +5754,21 @@ static bool si_compile_tgsi_main(struct si_shader_context *ctx,
> bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = fetch_input_tes;
> if (shader->key.as_es)
> bld_base->emit_epilogue = si_llvm_emit_es_epilogue;
> else {
> ctx->abi.emit_outputs = si_llvm_emit_vs_epilogue;
> bld_base->emit_epilogue = si_tgsi_emit_epilogue;
> }
> break;
> case PIPE_SHADER_GEOMETRY:
> bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = fetch_input_gs;
> + ctx->abi.emit_vertex = si_llvm_emit_vertex;
> bld_base->emit_epilogue = si_llvm_emit_gs_epilogue;
> break;
> case PIPE_SHADER_FRAGMENT:
> ctx->load_input = declare_input_fs;
> ctx->abi.emit_outputs = si_llvm_return_fs_outputs;
> bld_base->emit_epilogue = si_tgsi_emit_epilogue;
> break;
> case PIPE_SHADER_COMPUTE:
> break;
> default:
> --
> 2.14.3
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list