[Mesa-dev] [PATCH 17/20] ac: add si_nir_load_input_gs() to the abi

Wed Nov 15 11:23:22 UTC 2017

On 15/11/17 22:17, Nicolai Hähnle wrote:
> On 15.11.2017 12:09, Timothy Arceri wrote:
>>
>>
>> On 15/11/17 21:56, Nicolai Hähnle wrote:
>>> On 10.11.2017 04:13, Timothy Arceri wrote:
>>>> ---
>>>>   src/amd/common/ac_nir_to_llvm.c                   | 24 
>>>> ++++++++++++---------
>>>>   src/amd/common/ac_shader_abi.h                    |  7 ++++++
>>>>   src/gallium/drivers/radeonsi/si_shader.c          |  1 +
>>>>   src/gallium/drivers/radeonsi/si_shader_internal.h |  5 +++++
>>>>   src/gallium/drivers/radeonsi/si_shader_nir.c      | 26 
>>>> +++++++++++++++++++++++
>>>>   5 files changed, 53 insertions(+), 10 deletions(-)
>>>>
>>>> diff --git a/src/amd/common/ac_nir_to_llvm.c 
>>>> b/src/amd/common/ac_nir_to_llvm.c
>>>> index 158e954fa8..483dd52b36 100644
>>>> --- a/src/amd/common/ac_nir_to_llvm.c
>>>> +++ b/src/amd/common/ac_nir_to_llvm.c
>>>> @@ -2854,32 +2854,31 @@ load_tes_input(struct nir_to_llvm_context *ctx,
>>>>       buf_addr = LLVMBuildAdd(ctx->builder, buf_addr, comp_offset, "");
>>>>       result = ac_build_buffer_load(&ctx->ac, 
>>>> ctx->hs_ring_tess_offchip, instr->num_components, NULL,
>>>>                         buf_addr, ctx->oc_lds, is_compact ? (4 * 
>>>> const_index) : 0, 1, 0, true, false);
>>>>       result = trim_vector(&ctx->ac, result, instr->num_components);
>>>>       result = LLVMBuildBitCast(ctx->builder, result, 
>>>> get_def_type(ctx->nir, &instr->dest.ssa), "");
>>>>       return result;
>>>>   }
>>>>   static LLVMValueRef
>>>> -load_gs_input(struct nir_to_llvm_context *ctx,
>>>> -          nir_intrinsic_instr *instr)
>>>> +load_gs_input(struct ac_shader_abi *abi,
>>>> +          nir_intrinsic_instr *instr,
>>>> +          unsigned vertex_index,
>>>> +          unsigned const_index)
>>>>   {
>>>> -    LLVMValueRef indir_index, vtx_offset;
>>>> -    unsigned const_index;
>>>> +    struct nir_to_llvm_context *ctx = 
>>>> nir_to_llvm_context_from_abi(abi);
>>>> +    LLVMValueRef vtx_offset;
>>>>       LLVMValueRef args[9];
>>>>       unsigned param, vtx_offset_param;
>>>>       LLVMValueRef value[4], result;
>>>> -    unsigned vertex_index;
>>>> -    get_deref_offset(ctx->nir, instr->variables[0],
>>>> -             false, &vertex_index, NULL,
>>>> -             &const_index, &indir_index);
>>>> +
>>>>       vtx_offset_param = vertex_index;
>>>>       assert(vtx_offset_param < 6);
>>>>       vtx_offset = LLVMBuildMul(ctx->builder, 
>>>> ctx->gs_vtx_offset[vtx_offset_param],
>>>>                     LLVMConstInt(ctx->ac.i32, 4, false), "");
>>>>       param = 
>>>> shader_io_get_unique_index(instr->variables[0]->var->data.location);
>>>>       unsigned comp = instr->variables[0]->var->data.location_frac;
>>>>       for (unsigned i = comp; i < instr->num_components + comp; i++) {
>>>>           if (ctx->ac.chip_class >= GFX9) {
>>>> @@ -2966,21 +2965,26 @@ static LLVMValueRef visit_load_var(struct 
>>>> ac_nir_context *ctx,
>>>>       if (instr->dest.ssa.bit_size == 64)
>>>>           ve *= 2;
>>>>       switch (instr->variables[0]->var->data.mode) {
>>>>       case nir_var_shader_in:
>>>>           if (ctx->stage == MESA_SHADER_TESS_CTRL)
>>>>               return load_tcs_input(ctx->nctx, instr);
>>>>           if (ctx->stage == MESA_SHADER_TESS_EVAL)
>>>>               return load_tes_input(ctx->nctx, instr);
>>>>           if (ctx->stage == MESA_SHADER_GEOMETRY) {
>>>> -            return load_gs_input(ctx->nctx, instr);
>>>> +                LLVMValueRef indir_index;
>>>> +                unsigned const_index, vertex_index;
>>>> +                get_deref_offset(ctx, instr->variables[0],
>>>> +                         false, &vertex_index, NULL,
>>>> +                         &const_index, &indir_index);
>>>
>>> The indentation looks wrong here.
>>>
>>>
>>>> +            return ctx->abi->load_inputs(ctx->abi, instr, 
>>>> vertex_index, const_index);
>>>>           }
>>>>           for (unsigned chan = comp; chan < ve + comp; chan++) {
>>>>               if (indir_index) {
>>>>                   unsigned count = glsl_count_attribute_slots(
>>>>                           instr->variables[0]->var->type,
>>>>                           ctx->stage == MESA_SHADER_VERTEX);
>>>>                   count -= chan / 4;
>>>>                   LLVMValueRef tmp_vec = 
>>>> ac_build_gather_values_extended(
>>>>                           &ctx->ac, ctx->abi->inputs + idx + chan, 
>>>> count,
>>>> @@ -6489,22 +6493,22 @@ LLVMModuleRef 
>>>> ac_translate_nir_to_llvm(LLVMTargetMachineRef tm,
>>>>       for(int i = 0; i < shader_count; ++i) {
>>>>           ctx.stage = shaders[i]->info.stage;
>>>>           ctx.output_mask = 0;
>>>>           ctx.tess_outputs_written = 0;
>>>>           ctx.num_output_clips = 
>>>> shaders[i]->info.clip_distance_array_size;
>>>>           ctx.num_output_culls = 
>>>> shaders[i]->info.cull_distance_array_size;
>>>>           if (shaders[i]->info.stage == MESA_SHADER_GEOMETRY) {
>>>>               ctx.gs_next_vertex = ac_build_alloca(&ctx.ac, 
>>>> ctx.ac.i32, "gs_next_vertex");
>>>> -
>>>>               ctx.gs_max_out_vertices = 
>>>> shaders[i]->info.gs.vertices_out;
>>>> +            ctx.abi.load_inputs = load_gs_input;
>>>>           } else if (shaders[i]->info.stage == MESA_SHADER_TESS_EVAL) {
>>>>               ctx.tes_primitive_mode = 
>>>> shaders[i]->info.tess.primitive_mode;
>>>>           } else if (shaders[i]->info.stage == MESA_SHADER_VERTEX) {
>>>>               if (shader_info->info.vs.needs_instance_id) {
>>>>                   ctx.shader_info->vs.vgpr_comp_cnt =
>>>>                       MAX2(3, ctx.shader_info->vs.vgpr_comp_cnt);
>>>>               }
>>>>           } else if (shaders[i]->info.stage == MESA_SHADER_FRAGMENT) {
>>>>               shader_info->fs.can_discard = 
>>>> shaders[i]->info.fs.uses_discard;
>>>>           }
>>>> diff --git a/src/amd/common/ac_shader_abi.h 
>>>> b/src/amd/common/ac_shader_abi.h
>>>> index 27586d0212..6ba1a51e07 100644
>>>> --- a/src/amd/common/ac_shader_abi.h
>>>> +++ b/src/amd/common/ac_shader_abi.h
>>>> @@ -19,20 +19,22 @@
>>>>    * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
>>>> TORT OR
>>>>    * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
>>>> SOFTWARE OR THE
>>>>    * USE OR OTHER DEALINGS IN THE SOFTWARE.
>>>>    */
>>>>   #ifndef AC_SHADER_ABI_H
>>>>   #define AC_SHADER_ABI_H
>>>>   #include <llvm-c/Core.h>
>>>> +#include "nir.h" >
>>>> +
>>>>   enum ac_descriptor_type {
>>>>       AC_DESC_IMAGE,
>>>>       AC_DESC_FMASK,
>>>>       AC_DESC_SAMPLER,
>>>>       AC_DESC_BUFFER,
>>>>   };
>>>>   /* Document the shader ABI during compilation. This is what allows 
>>>> radeonsi and
>>>>    * radv to share a compiler backend.
>>>>    */
>>>> @@ -55,20 +57,25 @@ struct ac_shader_abi {
>>>>       LLVMValueRef *inputs;
>>>>       void (*emit_outputs)(struct ac_shader_abi *abi,
>>>>                    unsigned max_outputs,
>>>>                    LLVMValueRef *addrs);
>>>>       void (*emit_vertex)(struct ac_shader_abi *abi,
>>>>                   unsigned stream,
>>>>                   LLVMValueRef *addrs);
>>>> +    LLVMValueRef (*load_inputs)(struct ac_shader_abi *abi,
>>>> +                    nir_intrinsic_instr *instr,
>>>> +                    unsigned vertex_index,
>>>> +                    unsigned const_index);
>>>> +
>>>>       LLVMValueRef (*load_ubo)(struct ac_shader_abi *abi, 
>>>> LLVMValueRef index);
>>>>       /**
>>>>        * Load the descriptor for the given buffer.
>>>>        *
>>>>        * \param buffer the buffer as presented in NIR: this is the 
>>>> descriptor
>>>>        *               in Vulkan, and the buffer index in 
>>>> OpenGL/Gallium
>>>>        * \param write whether buffer contents will be written
>>>>        */
>>>>       LLVMValueRef (*load_ssbo)(struct ac_shader_abi *abi,
>>>> diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
>>>> b/src/gallium/drivers/radeonsi/si_shader.c
>>>> index 06e3d0f9f1..746816d6a3 100644
>>>> --- a/src/gallium/drivers/radeonsi/si_shader.c
>>>> +++ b/src/gallium/drivers/radeonsi/si_shader.c
>>>> @@ -5814,20 +5814,21 @@ static bool si_compile_tgsi_main(struct 
>>>> si_shader_context *ctx,
>>>>           if (shader->key.as_es) {
>>>>               ctx->abi.emit_outputs = si_llvm_emit_es_epilogue;
>>>>               bld_base->emit_epilogue = si_tgsi_emit_es_epilogue;
>>>>           } else {
>>>>               ctx->abi.emit_outputs = si_llvm_emit_vs_epilogue;
>>>>               bld_base->emit_epilogue = si_tgsi_emit_epilogue;
>>>>           }
>>>>           break;
>>>>       case PIPE_SHADER_GEOMETRY:
>>>>           bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = fetch_input_gs;
>>>> +        ctx->abi.load_inputs = si_nir_load_input_gs;
>>>>           ctx->abi.emit_vertex = si_llvm_emit_vertex;
>>>>           ctx->abi.emit_outputs = si_llvm_emit_gs_epilogue;
>>>>           bld_base->emit_epilogue = si_tgsi_emit_gs_epilogue;
>>>>           break;
>>>>       case PIPE_SHADER_FRAGMENT:
>>>>           ctx->load_input = declare_input_fs;
>>>>           ctx->abi.emit_outputs = si_llvm_return_fs_outputs;
>>>>           bld_base->emit_epilogue = si_tgsi_emit_epilogue;
>>>>           break;
>>>>       case PIPE_SHADER_COMPUTE:
>>>> diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h 
>>>> b/src/gallium/drivers/radeonsi/si_shader_internal.h
>>>> index 58413e9947..afb723d2ef 100644
>>>> --- a/src/gallium/drivers/radeonsi/si_shader_internal.h
>>>> +++ b/src/gallium/drivers/radeonsi/si_shader_internal.h
>>>> @@ -332,11 +332,16 @@ void si_llvm_load_input_vs(
>>>>       struct si_shader_context *ctx,
>>>>       unsigned input_index,
>>>>       LLVMValueRef out[4]);
>>>>   void si_llvm_load_input_fs(
>>>>       struct si_shader_context *ctx,
>>>>       unsigned input_index,
>>>>       LLVMValueRef out[4]);
>>>>   bool si_nir_build_llvm(struct si_shader_context *ctx, struct 
>>>> nir_shader *nir);
>>>> +LLVMValueRef si_nir_load_input_gs(struct ac_shader_abi *abi,
>>>> +                  nir_intrinsic_instr *instr,
>>>> +                  unsigned vertex_index,
>>>> +                  unsigned const_index);
>>>> +
>>>>   #endif
>>>> diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c 
>>>> b/src/gallium/drivers/radeonsi/si_shader_nir.c
>>>> index fca16f46cf..5b68ff2a07 100644
>>>> --- a/src/gallium/drivers/radeonsi/si_shader_nir.c
>>>> +++ b/src/gallium/drivers/radeonsi/si_shader_nir.c
>>>> @@ -480,20 +480,46 @@ static void declare_nir_input_fs(struct 
>>>> si_shader_context *ctx,
>>>>           out[2] = LLVMGetParam(ctx->main_fn, SI_PARAM_POS_Z_FLOAT);
>>>>           out[3] = ac_build_fdiv(&ctx->ac, ctx->ac.f32_1,
>>>>                   LLVMGetParam(ctx->main_fn, SI_PARAM_POS_W_FLOAT));
>>>>           return;
>>>>       }
>>>>       si_llvm_load_input_fs(ctx, *fs_attr_idx, out);
>>>>       (*fs_attr_idx)++;
>>>>   }
>>>> +LLVMValueRef si_nir_load_input_gs(struct ac_shader_abi *abi,
>>>> +                  nir_intrinsic_instr *instr,
>>>> +                  unsigned vertex_index,
>>>> +                  unsigned const_index)
>>>> +{
>>>> +    struct si_shader_context *ctx = si_shader_context_from_abi(abi);
>>>> +
>>>> +    nir_shader *nir = ctx->shader->selector->nir;
>>>> +    unsigned input_index = 0;
>>>> +    nir_foreach_variable(var, &nir->inputs) {
>>>> +        if (instr->variables[0]->var->data.location == 
>>>> var->data.location)
>>>> +            break;
>>>> +        input_index++;
>>>> +    }
>>>
>>> This whole approach here seems wrong to me. The order in which input 
>>> variables are declared shouldn't matter.
>>>
>>> The TGSI code uses si_shader_io_get_unique_index, and I think the NIR 
>>> path should do the same (as does the NIR-path for radv, by the way...). 
>>
>> This code does use si_shader_io_get_unique_index() the input_index is 
>> used as a lookup e.g info->input_semantic_name[input_index] so that we 
>> can get the parms to pass into si_shader_io_get_unique_index().
>>
>> TGSI stores this input_index for later use, it's a TGSIism that we 
>> need to work around.
> 
> Could we use var->data.driver_location like for vertex and fragment I/O?

Hmm...maybe.

> 
> Then the interface could be reduced to
> 
> load_input(abi, vertex_index, driver_location, const_index, component);
> 
> (not sure if const_index is still needed, actually)

It will also require some shuffling on the radv side to get to this but 
it might be doable. I'll give it a try tomorrow. Thanks!

> 
> Cheers,
> Nicolai
> 
> 
>>
>>
>>> That way, the dependency of ac_shader_abi on NIR can also be removed, 
>>> which would be preferable.
>>>
>>> Cheers,
>>> Nicolai
>>>
>>>
>>>> +
>>>> +    LLVMValueRef value[4];
>>>> +    unsigned comp = instr->variables[0]->var->data.location_frac;
>>>> +    for (unsigned i = comp; i < instr->num_components + comp; i++) {
>>>> +        value[i] = si_llvm_load_input_gs(&ctx->abi, input_index, 
>>>> vertex_index,
>>>> +                         nir2llvmtype(ctx, 
>>>> instr->variables[0]->var->type),
>>>> +                         i);
>>>> +    }
>>>> +
>>>> +    return ac_build_varying_gather_values(&ctx->ac, value, 
>>>> instr->num_components, comp);
>>>> +}
>>>> +
>>>>   static LLVMValueRef
>>>>   si_nir_load_sampler_desc(struct ac_shader_abi *abi,
>>>>                    unsigned descriptor_set, unsigned base_index,
>>>>                    unsigned constant_index, LLVMValueRef dynamic_index,
>>>>                    enum ac_descriptor_type desc_type, bool image,
>>>>                bool write)
>>>>   {
>>>>       struct si_shader_context *ctx = si_shader_context_from_abi(abi);
>>>>       LLVMBuilderRef builder = ctx->ac.builder;
>>>>       LLVMValueRef list = LLVMGetParam(ctx->main_fn, 
>>>> ctx->param_samplers_and_images);
>>>>
>>>
>>>
> 
>