[Mesa-dev] [PATCH 03/18] ac: add load_tes_inputs() to the abi

Tue Dec 12 10:21:48 UTC 2017

On 12/12/17 20:42, Nicolai Hähnle wrote:
> On 11.12.2017 03:43, Timothy Arceri wrote:
>> ---
>>   src/amd/common/ac_nir_to_llvm.c          | 66 
>> +++++++++++++++++++++-----------
>>   src/amd/common/ac_shader_abi.h           | 12 ++++++
>>   src/gallium/drivers/radeonsi/si_shader.c |  1 +
>>   3 files changed, 57 insertions(+), 22 deletions(-)
>>
>> diff --git a/src/amd/common/ac_nir_to_llvm.c 
>> b/src/amd/common/ac_nir_to_llvm.c
>> index 663b27d265a..78ae25ee147 100644
>> --- a/src/amd/common/ac_nir_to_llvm.c
>> +++ b/src/amd/common/ac_nir_to_llvm.c
>> @@ -2840,53 +2840,51 @@ store_tcs_output(struct nir_to_llvm_context *ctx,
>>       }
>>       if (writemask == 0xF) {
>>           ac_build_buffer_store_dword(&ctx->ac, 
>> ctx->hs_ring_tess_offchip, src, 4,
>>                           buf_addr, ctx->oc_lds,
>>                           (base * 4), 1, 0, true, false);
>>       }
>>   }
>>   static LLVMValueRef
>> -load_tes_input(struct nir_to_llvm_context *ctx,
>> -           const nir_intrinsic_instr *instr)
>> +load_tes_input(struct ac_shader_abi *abi,
>> +           LLVMValueRef vertex_index,
>> +           LLVMValueRef param_index,
>> +           unsigned const_index,
>> +           unsigned location,
>> +           unsigned driver_location,
>> +           LLVMTypeRef type,
>> +           unsigned component,
>> +           unsigned num_components,
>> +           bool is_patch,
>> +           bool is_compact)
>>   {
>> +    struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi);
>>       LLVMValueRef buf_addr;
>>       LLVMValueRef result;
>> -    LLVMValueRef vertex_index = NULL;
>> -    LLVMValueRef indir_index = NULL;
>> -    unsigned const_index = 0;
>> -    unsigned param;
>> -    const bool per_vertex = 
>> nir_is_per_vertex_io(instr->variables[0]->var, ctx->stage);
>> -    const bool is_compact = instr->variables[0]->var->data.compact;
>> +    unsigned param = shader_io_get_unique_index(location);
>> -    get_deref_offset(ctx->nir, instr->variables[0],
>> -             false, NULL, per_vertex ? &vertex_index : NULL,
>> -             &const_index, &indir_index);
>> -    param = 
>> shader_io_get_unique_index(instr->variables[0]->var->data.location);
>> -    if (instr->variables[0]->var->data.location == 
>> VARYING_SLOT_CLIP_DIST0 &&
>> -        is_compact && const_index > 3) {
>> +    if (location == VARYING_SLOT_CLIP_DIST0 && is_compact && 
>> const_index > 3) {
>>           const_index -= 3;
> 
> So... this is in the original code as well, but why is that -= 3 instead 
> of 4?

Honestly since I didn't have to touch this code I haven't really tried 
to understand it. Seems like a classic example of why you shouldn't use 
magic numbers without a comment.

> 
> 
>>           param++;
>>       }
>> -    unsigned comp = instr->variables[0]->var->data.location_frac;
>>       buf_addr = get_tcs_tes_buffer_address_params(ctx, param, 
>> const_index,
>> -                             is_compact, vertex_index, indir_index);
>> +                             is_compact, vertex_index, param_index);
>> -    LLVMValueRef comp_offset = LLVMConstInt(ctx->ac.i32, comp * 4, 
>> false);
>> +    LLVMValueRef comp_offset = LLVMConstInt(ctx->ac.i32, component * 
>> 4, false);
>>       buf_addr = LLVMBuildAdd(ctx->builder, buf_addr, comp_offset, "");
>> -    result = ac_build_buffer_load(&ctx->ac, 
>> ctx->hs_ring_tess_offchip, instr->num_components, NULL,
>> +    result = ac_build_buffer_load(&ctx->ac, 
>> ctx->hs_ring_tess_offchip, num_components, NULL,
>>                         buf_addr, ctx->oc_lds, is_compact ? (4 * 
>> const_index) : 0, 1, 0, true, false);
>> -    result = trim_vector(&ctx->ac, result, instr->num_components);
>> -    result = LLVMBuildBitCast(ctx->builder, result, 
>> get_def_type(ctx->nir, &instr->dest.ssa), "");
>> +    result = trim_vector(&ctx->ac, result, num_components);
>>       return result;
>>   }
>>   static LLVMValueRef
>>   load_gs_input(struct ac_shader_abi *abi,
>>             unsigned location,
>>             unsigned driver_location,
>>             unsigned component,
>>             unsigned num_components,
>>             unsigned vertex_index,
>> @@ -2988,22 +2986,45 @@ static LLVMValueRef visit_load_var(struct 
>> ac_nir_context *ctx,
>>       get_deref_offset(ctx, instr->variables[0], vs_in, NULL, NULL,
>>                         &const_index, &indir_index);
>>       if (instr->dest.ssa.bit_size == 64)
>>           ve *= 2;
>>       switch (instr->variables[0]->var->data.mode) {
>>       case nir_var_shader_in:
>>           if (ctx->stage == MESA_SHADER_TESS_CTRL)
>>               return load_tcs_input(ctx->nctx, instr);
>> -        if (ctx->stage == MESA_SHADER_TESS_EVAL)
>> -            return load_tes_input(ctx->nctx, instr);
>> +        if (ctx->stage == MESA_SHADER_TESS_EVAL) {
>> +            LLVMValueRef result;
>> +            LLVMValueRef vertex_index = NULL;
>> +            LLVMValueRef indir_index = NULL;
>> +            unsigned const_index = 0;
>> +            unsigned location = instr->variables[0]->var->data.location;
>> +            unsigned driver_location = 
>> instr->variables[0]->var->data.driver_location;
>> +            const bool is_patch =  instr->variables[0]->var->data.patch;
>> +            const bool is_compact = 
>> instr->variables[0]->var->data.compact;
>> +
>> +            get_deref_offset(ctx, instr->variables[0],
>> +                     false, NULL, is_patch ? NULL : &vertex_index,
>> +                     &const_index, &indir_index);
>> +
>> +            result = ctx->abi->load_tess_inputs(ctx->abi, 
>> vertex_index, indir_index,
>> +                                const_index, location, driver_location,
>> +                                nir2llvmtype(ctx, 
>> instr->variables[0]->var->type),
>> +                                
>> instr->variables[0]->var->data.location_frac,
>> +                                instr->num_components,
>> +                                is_patch, is_compact);
>> +            return ctx->nctx ?
>> +                LLVMBuildBitCast(ctx->nctx->builder, result, 
>> get_def_type(ctx, &instr->dest.ssa), "") :
>> +                result;
> 
> Why is this conditional on ctx->nctx? Could and should probably just use 
> ctx->ac.builder...

I'm fairly certain the bitcast caused problems for the radeonsi path. 
Maybe this is not the best solution but this is one of a couple of 
places where the backends need to be massaged in slightly different 
ways, checking for ctx->nctx is a convenient way to switch paths.

> 
> Cheers,
> Nicolai
> 
> 
>> +        }
>> +
>>           if (ctx->stage == MESA_SHADER_GEOMETRY) {
>>                   LLVMValueRef indir_index;
>>                   unsigned const_index, vertex_index;
>>                   get_deref_offset(ctx, instr->variables[0],
>>                            false, &vertex_index, NULL,
>>                            &const_index, &indir_index);
>>               return ctx->abi->load_inputs(ctx->abi, 
>> instr->variables[0]->var->data.location,
>>                                
>> instr->variables[0]->var->data.driver_location,
>>                                
>> instr->variables[0]->var->data.location_frac, ve,
>>                                vertex_index, const_index,
>> @@ -6561,20 +6582,21 @@ LLVMModuleRef 
>> ac_translate_nir_to_llvm(LLVMTargetMachineRef tm,
>>           if (shaders[i]->info.stage == MESA_SHADER_GEOMETRY) {
>>               ctx.gs_next_vertex = ac_build_alloca(&ctx.ac, 
>> ctx.ac.i32, "gs_next_vertex");
>>               ctx.gs_max_out_vertices = shaders[i]->info.gs.vertices_out;
>>               ctx.abi.load_inputs = load_gs_input;
>>           } else if (shaders[i]->info.stage == MESA_SHADER_TESS_CTRL) {
>>               ctx.tcs_outputs_read = shaders[i]->info.outputs_read;
>>               ctx.tcs_patch_outputs_read = 
>> shaders[i]->info.patch_outputs_read;
>>           } else if (shaders[i]->info.stage == MESA_SHADER_TESS_EVAL) {
>>               ctx.tes_primitive_mode = 
>> shaders[i]->info.tess.primitive_mode;
>> +            ctx.abi.load_tess_inputs = load_tes_input;
>>           } else if (shaders[i]->info.stage == MESA_SHADER_VERTEX) {
>>               if (shader_info->info.vs.needs_instance_id) {
>>                   ctx.shader_info->vs.vgpr_comp_cnt =
>>                       MAX2(3, ctx.shader_info->vs.vgpr_comp_cnt);
>>               }
>>           } else if (shaders[i]->info.stage == MESA_SHADER_FRAGMENT) {
>>               shader_info->fs.can_discard = 
>> shaders[i]->info.fs.uses_discard;
>>           }
>>           if (i)
>> diff --git a/src/amd/common/ac_shader_abi.h 
>> b/src/amd/common/ac_shader_abi.h
>> index 68fc431d426..f22a6a001e5 100644
>> --- a/src/amd/common/ac_shader_abi.h
>> +++ b/src/amd/common/ac_shader_abi.h
>> @@ -66,20 +66,32 @@ struct ac_shader_abi {
>>       LLVMValueRef (*load_inputs)(struct ac_shader_abi *abi,
>>                       unsigned location,
>>                       unsigned driver_location,
>>                       unsigned component,
>>                       unsigned num_components,
>>                       unsigned vertex_index,
>>                       unsigned const_index,
>>                       LLVMTypeRef type);
>> +    LLVMValueRef (*load_tess_inputs)(struct ac_shader_abi *abi,
>> +                     LLVMValueRef vertex_index,
>> +                     LLVMValueRef param_index,
>> +                     unsigned const_index,
>> +                     unsigned location,
>> +                     unsigned driver_location,
>> +                     LLVMTypeRef type,
>> +                     unsigned component,
>> +                     unsigned num_components,
>> +                     bool is_patch,
>> +                     bool is_compact);
>> +
>>       LLVMValueRef (*load_ubo)(struct ac_shader_abi *abi, LLVMValueRef 
>> index);
>>       /**
>>        * Load the descriptor for the given buffer.
>>        *
>>        * \param buffer the buffer as presented in NIR: this is the 
>> descriptor
>>        *               in Vulkan, and the buffer index in OpenGL/Gallium
>>        * \param write whether buffer contents will be written
>>        */
>>       LLVMValueRef (*load_ssbo)(struct ac_shader_abi *abi,
>> diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
>> b/src/gallium/drivers/radeonsi/si_shader.c
>> index df77b28c577..96488ca4e29 100644
>> --- a/src/gallium/drivers/radeonsi/si_shader.c
>> +++ b/src/gallium/drivers/radeonsi/si_shader.c
>> @@ -5857,20 +5857,21 @@ static bool si_compile_tgsi_main(struct 
>> si_shader_context *ctx,
>>           bld_base->emit_epilogue = si_tgsi_emit_epilogue;
>>           break;
>>       case PIPE_SHADER_TESS_CTRL:
>>           bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = fetch_input_tcs;
>>           bld_base->emit_fetch_funcs[TGSI_FILE_OUTPUT] = 
>> fetch_output_tcs;
>>           bld_base->emit_store = store_output_tcs;
>>           bld_base->emit_epilogue = si_llvm_emit_tcs_epilogue;
>>           break;
>>       case PIPE_SHADER_TESS_EVAL:
>>           bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = fetch_input_tes;
>> +        ctx->abi.load_tess_inputs = si_nir_load_input_tes;
>>           if (shader->key.as_es)
>>               ctx->abi.emit_outputs = si_llvm_emit_es_epilogue;
>>           else
>>               ctx->abi.emit_outputs = si_llvm_emit_vs_epilogue;
>>           bld_base->emit_epilogue = si_tgsi_emit_epilogue;
>>           break;
>>       case PIPE_SHADER_GEOMETRY:
>>           bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = fetch_input_gs;
>>           ctx->abi.load_inputs = si_nir_load_input_gs;
>>           ctx->abi.emit_vertex = si_llvm_emit_vertex;
>>
> 
>