[Mesa-dev] [PATCH 04/20] ac: add emit_vertex to the abi

Fri Nov 10 16:55:45 UTC 2017

Patches 2 - 4:

Reviewed-by: Marek Olšák <marek.olsak at amd.com>

Marek

On Fri, Nov 10, 2017 at 4:13 AM, Timothy Arceri <tarceri at itsqueeze.com> wrote:
> ---
>  src/amd/common/ac_nir_to_llvm.c          | 11 +++++-----
>  src/amd/common/ac_shader_abi.h           |  4 ++++
>  src/gallium/drivers/radeonsi/si_shader.c | 35 +++++++++++++++++++-------------
>  3 files changed, 31 insertions(+), 19 deletions(-)
>
> diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
> index 2ae656693f..36f471dcc7 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -3902,46 +3902,45 @@ static LLVMValueRef visit_interp(struct nir_to_llvm_context *ctx,
>                                                               LLVMConstInt(ctx->ac.i32, 2, false),
>                                                               llvm_chan, attr_number,
>                                                               ctx->prim_mask);
>                 }
>         }
>         return build_varying_gather_values(&ctx->ac, result, instr->num_components,
>                                            instr->variables[0]->var->data.location_frac);
>  }
>
>  static void
> -visit_emit_vertex(struct nir_to_llvm_context *ctx,
> -                 const nir_intrinsic_instr *instr)
> +visit_emit_vertex(struct ac_shader_abi *abi, unsigned stream, LLVMValueRef *addrs)
>  {
>         LLVMValueRef gs_next_vertex;
>         LLVMValueRef can_emit;
>         int idx;
> +       struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi);
>
> -       assert(instr->const_index[0] == 0);
>         /* Write vertex attribute values to GSVS ring */
>         gs_next_vertex = LLVMBuildLoad(ctx->builder,
>                                        ctx->gs_next_vertex,
>                                        "");
>
>         /* If this thread has already emitted the declared maximum number of
>          * vertices, kill it: excessive vertex emissions are not supposed to
>          * have any effect, and GS threads have no externally observable
>          * effects other than emitting vertices.
>          */
>         can_emit = LLVMBuildICmp(ctx->builder, LLVMIntULT, gs_next_vertex,
>                                  LLVMConstInt(ctx->ac.i32, ctx->gs_max_out_vertices, false), "");
>         ac_build_kill_if_false(&ctx->ac, can_emit);
>
>         /* loop num outputs */
>         idx = 0;
>         for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
> -               LLVMValueRef *out_ptr = &ctx->nir->outputs[i * 4];
> +               LLVMValueRef *out_ptr = &addrs[i * 4];
>                 int length = 4;
>                 int slot = idx;
>                 int slot_inc = 1;
>
>                 if (!(ctx->output_mask & (1ull << i)))
>                         continue;
>
>                 if (i == VARYING_SLOT_CLIP_DIST0) {
>                         /* pack clip and cull into a single set of slots */
>                         length = ctx->num_output_clips + ctx->num_output_culls;
> @@ -4160,21 +4159,22 @@ static void visit_intrinsic(struct ac_nir_context *ctx,
>         case nir_intrinsic_var_atomic_exchange:
>         case nir_intrinsic_var_atomic_comp_swap:
>                 result = visit_var_atomic(ctx->nctx, instr);
>                 break;
>         case nir_intrinsic_interp_var_at_centroid:
>         case nir_intrinsic_interp_var_at_sample:
>         case nir_intrinsic_interp_var_at_offset:
>                 result = visit_interp(ctx->nctx, instr);
>                 break;
>         case nir_intrinsic_emit_vertex:
> -               visit_emit_vertex(ctx->nctx, instr);
> +               assert(instr->const_index[0] == 0);
> +               ctx->abi->emit_vertex(ctx->abi, 0, ctx->outputs);
>                 break;
>         case nir_intrinsic_end_primitive:
>                 visit_end_primitive(ctx->nctx, instr);
>                 break;
>         case nir_intrinsic_load_tess_coord:
>                 result = visit_load_tess_coord(ctx->nctx, instr);
>                 break;
>         case nir_intrinsic_load_patch_vertices_in:
>                 result = LLVMConstInt(ctx->ac.i32, ctx->nctx->options->key.tcs.input_vertices, false);
>                 break;
> @@ -6490,20 +6490,21 @@ LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm,
>                 ctx.max_workgroup_size = MAX2(ctx.max_workgroup_size,
>                                               ac_nir_get_max_workgroup_size(ctx.options->chip_class,
>                                                                             shaders[i]));
>         }
>
>         create_function(&ctx, shaders[shader_count - 1]->info.stage, shader_count >= 2,
>                         shader_count >= 2 ? shaders[shader_count - 2]->info.stage  : MESA_SHADER_VERTEX);
>
>         ctx.abi.inputs = &ctx.inputs[0];
>         ctx.abi.emit_outputs = handle_shader_outputs_post;
> +       ctx.abi.emit_vertex = visit_emit_vertex;
>         ctx.abi.load_ssbo = radv_load_ssbo;
>         ctx.abi.load_sampler_desc = radv_get_sampler_desc;
>         ctx.abi.clamp_shadow_reference = false;
>
>         if (shader_count >= 2)
>                 ac_init_exec_full_mask(&ctx.ac);
>
>         if (ctx.ac.chip_class == GFX9 &&
>             shaders[shader_count - 1]->info.stage == MESA_SHADER_TESS_CTRL)
>                 ac_nir_fixup_ls_hs_input_vgprs(&ctx);
> diff --git a/src/amd/common/ac_shader_abi.h b/src/amd/common/ac_shader_abi.h
> index 14517d5570..27586d0212 100644
> --- a/src/amd/common/ac_shader_abi.h
> +++ b/src/amd/common/ac_shader_abi.h
> @@ -51,20 +51,24 @@ struct ac_shader_abi {
>          *
>          * Currently only used for NIR shaders; indexed by variables'
>          * driver_location.
>          */
>         LLVMValueRef *inputs;
>
>         void (*emit_outputs)(struct ac_shader_abi *abi,
>                              unsigned max_outputs,
>                              LLVMValueRef *addrs);
>
> +       void (*emit_vertex)(struct ac_shader_abi *abi,
> +                           unsigned stream,
> +                           LLVMValueRef *addrs);
> +
>         LLVMValueRef (*load_ubo)(struct ac_shader_abi *abi, LLVMValueRef index);
>
>         /**
>          * Load the descriptor for the given buffer.
>          *
>          * \param buffer the buffer as presented in NIR: this is the descriptor
>          *               in Vulkan, and the buffer index in OpenGL/Gallium
>          * \param write whether buffer contents will be written
>          */
>         LLVMValueRef (*load_ssbo)(struct ac_shader_abi *abi,
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
> index d234e08071..47ca64fdea 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -4031,39 +4031,35 @@ static unsigned si_llvm_get_stream(struct lp_build_tgsi_context *bld_base,
>         unsigned stream;
>
>         assert(src0.File == TGSI_FILE_IMMEDIATE);
>
>         imm = ctx->imms[src0.Index * TGSI_NUM_CHANNELS + src0.SwizzleX];
>         stream = LLVMConstIntGetZExtValue(imm) & 0x3;
>         return stream;
>  }
>
>  /* Emit one vertex from the geometry shader */
> -static void si_llvm_emit_vertex(
> -       const struct lp_build_tgsi_action *action,
> -       struct lp_build_tgsi_context *bld_base,
> -       struct lp_build_emit_data *emit_data)
> +static void si_llvm_emit_vertex(struct ac_shader_abi *abi,
> +                               unsigned stream,
> +                               LLVMValueRef *addrs)
>  {
> -       struct si_shader_context *ctx = si_shader_context(bld_base);
> -       struct lp_build_context *uint = &bld_base->uint_bld;
> +       struct si_shader_context *ctx = si_shader_context_from_abi(abi);
> +       struct tgsi_shader_info *info = &ctx->shader->selector->info;
> +       struct lp_build_context *uint = &ctx->bld_base.uint_bld;
>         struct si_shader *shader = ctx->shader;
> -       struct tgsi_shader_info *info = &shader->selector->info;
>         struct lp_build_if_state if_state;
>         LLVMValueRef soffset = LLVMGetParam(ctx->main_fn,
>                                             ctx->param_gs2vs_offset);
>         LLVMValueRef gs_next_vertex;
>         LLVMValueRef can_emit;
>         unsigned chan, offset;
>         int i;
> -       unsigned stream;
> -
> -       stream = si_llvm_get_stream(bld_base, emit_data);
>
>         /* Write vertex attribute values to GSVS ring */
>         gs_next_vertex = LLVMBuildLoad(ctx->ac.builder,
>                                        ctx->gs_next_vertex[stream],
>                                        "");
>
>         /* If this thread has already emitted the declared maximum number of
>          * vertices, skip the write: excessive vertex emissions are not
>          * supposed to have any effect.
>          *
> @@ -4077,28 +4073,26 @@ static void si_llvm_emit_vertex(
>
>         bool use_kill = !info->writes_memory;
>         if (use_kill) {
>                 ac_build_kill_if_false(&ctx->ac, can_emit);
>         } else {
>                 lp_build_if(&if_state, &ctx->gallivm, can_emit);
>         }
>
>         offset = 0;
>         for (i = 0; i < info->num_outputs; i++) {
> -               LLVMValueRef *out_ptr = ctx->outputs[i];
> -
>                 for (chan = 0; chan < 4; chan++) {
>                         if (!(info->output_usagemask[i] & (1 << chan)) ||
>                             ((info->output_streams[i] >> (2 * chan)) & 3) != stream)
>                                 continue;
>
> -                       LLVMValueRef out_val = LLVMBuildLoad(ctx->ac.builder, out_ptr[chan], "");
> +                       LLVMValueRef out_val = LLVMBuildLoad(ctx->ac.builder, addrs[4 * i + chan], "");
>                         LLVMValueRef voffset =
>                                 LLVMConstInt(ctx->i32, offset *
>                                              shader->selector->gs_max_out_vertices, 0);
>                         offset++;
>
>                         voffset = lp_build_add(uint, voffset, gs_next_vertex);
>                         voffset = lp_build_mul_imm(uint, voffset, 4);
>
>                         out_val = ac_to_integer(&ctx->ac, out_val);
>
> @@ -4115,20 +4109,32 @@ static void si_llvm_emit_vertex(
>
>         LLVMBuildStore(ctx->ac.builder, gs_next_vertex, ctx->gs_next_vertex[stream]);
>
>         /* Signal vertex emission */
>         ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_EMIT | AC_SENDMSG_GS | (stream << 8),
>                          si_get_gs_wave_id(ctx));
>         if (!use_kill)
>                 lp_build_endif(&if_state);
>  }
>
> +/* Emit one vertex from the geometry shader */
> +static void si_tgsi_emit_vertex(
> +       const struct lp_build_tgsi_action *action,
> +       struct lp_build_tgsi_context *bld_base,
> +       struct lp_build_emit_data *emit_data)
> +{
> +       struct si_shader_context *ctx = si_shader_context(bld_base);
> +       unsigned stream = si_llvm_get_stream(bld_base, emit_data);
> +
> +       si_llvm_emit_vertex(&ctx->abi, stream, ctx->outputs[0]);
> +}
> +
>  /* Cut one primitive from the geometry shader */
>  static void si_llvm_emit_primitive(
>         const struct lp_build_tgsi_action *action,
>         struct lp_build_tgsi_context *bld_base,
>         struct lp_build_emit_data *emit_data)
>  {
>         struct si_shader_context *ctx = si_shader_context(bld_base);
>         unsigned stream;
>
>         /* Signal primitive cut */
> @@ -5634,21 +5640,21 @@ static void si_init_shader_ctx(struct si_shader_context *ctx,
>         bld_base->op_actions[TGSI_OPCODE_VOTE_ALL].emit = vote_all_emit;
>         bld_base->op_actions[TGSI_OPCODE_VOTE_ANY].emit = vote_any_emit;
>         bld_base->op_actions[TGSI_OPCODE_VOTE_EQ].emit = vote_eq_emit;
>         bld_base->op_actions[TGSI_OPCODE_BALLOT].emit = ballot_emit;
>         bld_base->op_actions[TGSI_OPCODE_READ_FIRST].intr_name = "llvm.amdgcn.readfirstlane";
>         bld_base->op_actions[TGSI_OPCODE_READ_FIRST].emit = read_lane_emit;
>         bld_base->op_actions[TGSI_OPCODE_READ_INVOC].intr_name = "llvm.amdgcn.readlane";
>         bld_base->op_actions[TGSI_OPCODE_READ_INVOC].fetch_args = read_invoc_fetch_args;
>         bld_base->op_actions[TGSI_OPCODE_READ_INVOC].emit = read_lane_emit;
>
> -       bld_base->op_actions[TGSI_OPCODE_EMIT].emit = si_llvm_emit_vertex;
> +       bld_base->op_actions[TGSI_OPCODE_EMIT].emit = si_tgsi_emit_vertex;
>         bld_base->op_actions[TGSI_OPCODE_ENDPRIM].emit = si_llvm_emit_primitive;
>         bld_base->op_actions[TGSI_OPCODE_BARRIER].emit = si_llvm_emit_barrier;
>  }
>
>  static void si_optimize_vs_outputs(struct si_shader_context *ctx)
>  {
>         struct si_shader *shader = ctx->shader;
>         struct tgsi_shader_info *info = &shader->selector->info;
>
>         if ((ctx->type != PIPE_SHADER_VERTEX &&
> @@ -5748,20 +5754,21 @@ static bool si_compile_tgsi_main(struct si_shader_context *ctx,
>                 bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = fetch_input_tes;
>                 if (shader->key.as_es)
>                         bld_base->emit_epilogue = si_llvm_emit_es_epilogue;
>                 else {
>                         ctx->abi.emit_outputs = si_llvm_emit_vs_epilogue;
>                         bld_base->emit_epilogue = si_tgsi_emit_epilogue;
>                 }
>                 break;
>         case PIPE_SHADER_GEOMETRY:
>                 bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = fetch_input_gs;
> +               ctx->abi.emit_vertex = si_llvm_emit_vertex;
>                 bld_base->emit_epilogue = si_llvm_emit_gs_epilogue;
>                 break;
>         case PIPE_SHADER_FRAGMENT:
>                 ctx->load_input = declare_input_fs;
>                 ctx->abi.emit_outputs = si_llvm_return_fs_outputs;
>                 bld_base->emit_epilogue = si_tgsi_emit_epilogue;
>                 break;
>         case PIPE_SHADER_COMPUTE:
>                 break;
>         default:
> --
> 2.14.3
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev