[Mesa-dev] [PATCH 17/20] ac: add si_nir_load_input_gs() to the abi
Nicolai Hähnle
nhaehnle at gmail.com
Wed Nov 15 11:17:47 UTC 2017
On 15.11.2017 12:09, Timothy Arceri wrote:
>
>
> On 15/11/17 21:56, Nicolai Hähnle wrote:
>> On 10.11.2017 04:13, Timothy Arceri wrote:
>>> ---
>>> src/amd/common/ac_nir_to_llvm.c | 24
>>> ++++++++++++---------
>>> src/amd/common/ac_shader_abi.h | 7 ++++++
>>> src/gallium/drivers/radeonsi/si_shader.c | 1 +
>>> src/gallium/drivers/radeonsi/si_shader_internal.h | 5 +++++
>>> src/gallium/drivers/radeonsi/si_shader_nir.c | 26
>>> +++++++++++++++++++++++
>>> 5 files changed, 53 insertions(+), 10 deletions(-)
>>>
>>> diff --git a/src/amd/common/ac_nir_to_llvm.c
>>> b/src/amd/common/ac_nir_to_llvm.c
>>> index 158e954fa8..483dd52b36 100644
>>> --- a/src/amd/common/ac_nir_to_llvm.c
>>> +++ b/src/amd/common/ac_nir_to_llvm.c
>>> @@ -2854,32 +2854,31 @@ load_tes_input(struct nir_to_llvm_context *ctx,
>>> buf_addr = LLVMBuildAdd(ctx->builder, buf_addr, comp_offset, "");
>>> result = ac_build_buffer_load(&ctx->ac,
>>> ctx->hs_ring_tess_offchip, instr->num_components, NULL,
>>> buf_addr, ctx->oc_lds, is_compact ? (4 *
>>> const_index) : 0, 1, 0, true, false);
>>> result = trim_vector(&ctx->ac, result, instr->num_components);
>>> result = LLVMBuildBitCast(ctx->builder, result,
>>> get_def_type(ctx->nir, &instr->dest.ssa), "");
>>> return result;
>>> }
>>> static LLVMValueRef
>>> -load_gs_input(struct nir_to_llvm_context *ctx,
>>> - nir_intrinsic_instr *instr)
>>> +load_gs_input(struct ac_shader_abi *abi,
>>> + nir_intrinsic_instr *instr,
>>> + unsigned vertex_index,
>>> + unsigned const_index)
>>> {
>>> - LLVMValueRef indir_index, vtx_offset;
>>> - unsigned const_index;
>>> + struct nir_to_llvm_context *ctx =
>>> nir_to_llvm_context_from_abi(abi);
>>> + LLVMValueRef vtx_offset;
>>> LLVMValueRef args[9];
>>> unsigned param, vtx_offset_param;
>>> LLVMValueRef value[4], result;
>>> - unsigned vertex_index;
>>> - get_deref_offset(ctx->nir, instr->variables[0],
>>> - false, &vertex_index, NULL,
>>> - &const_index, &indir_index);
>>> +
>>> vtx_offset_param = vertex_index;
>>> assert(vtx_offset_param < 6);
>>> vtx_offset = LLVMBuildMul(ctx->builder,
>>> ctx->gs_vtx_offset[vtx_offset_param],
>>> LLVMConstInt(ctx->ac.i32, 4, false), "");
>>> param =
>>> shader_io_get_unique_index(instr->variables[0]->var->data.location);
>>> unsigned comp = instr->variables[0]->var->data.location_frac;
>>> for (unsigned i = comp; i < instr->num_components + comp; i++) {
>>> if (ctx->ac.chip_class >= GFX9) {
>>> @@ -2966,21 +2965,26 @@ static LLVMValueRef visit_load_var(struct
>>> ac_nir_context *ctx,
>>> if (instr->dest.ssa.bit_size == 64)
>>> ve *= 2;
>>> switch (instr->variables[0]->var->data.mode) {
>>> case nir_var_shader_in:
>>> if (ctx->stage == MESA_SHADER_TESS_CTRL)
>>> return load_tcs_input(ctx->nctx, instr);
>>> if (ctx->stage == MESA_SHADER_TESS_EVAL)
>>> return load_tes_input(ctx->nctx, instr);
>>> if (ctx->stage == MESA_SHADER_GEOMETRY) {
>>> - return load_gs_input(ctx->nctx, instr);
>>> + LLVMValueRef indir_index;
>>> + unsigned const_index, vertex_index;
>>> + get_deref_offset(ctx, instr->variables[0],
>>> + false, &vertex_index, NULL,
>>> + &const_index, &indir_index);
>>
>> The indentation looks wrong here.
>>
>>
>>> + return ctx->abi->load_inputs(ctx->abi, instr,
>>> vertex_index, const_index);
>>> }
>>> for (unsigned chan = comp; chan < ve + comp; chan++) {
>>> if (indir_index) {
>>> unsigned count = glsl_count_attribute_slots(
>>> instr->variables[0]->var->type,
>>> ctx->stage == MESA_SHADER_VERTEX);
>>> count -= chan / 4;
>>> LLVMValueRef tmp_vec =
>>> ac_build_gather_values_extended(
>>> &ctx->ac, ctx->abi->inputs + idx + chan,
>>> count,
>>> @@ -6489,22 +6493,22 @@ LLVMModuleRef
>>> ac_translate_nir_to_llvm(LLVMTargetMachineRef tm,
>>> for(int i = 0; i < shader_count; ++i) {
>>> ctx.stage = shaders[i]->info.stage;
>>> ctx.output_mask = 0;
>>> ctx.tess_outputs_written = 0;
>>> ctx.num_output_clips =
>>> shaders[i]->info.clip_distance_array_size;
>>> ctx.num_output_culls =
>>> shaders[i]->info.cull_distance_array_size;
>>> if (shaders[i]->info.stage == MESA_SHADER_GEOMETRY) {
>>> ctx.gs_next_vertex = ac_build_alloca(&ctx.ac,
>>> ctx.ac.i32, "gs_next_vertex");
>>> -
>>> ctx.gs_max_out_vertices =
>>> shaders[i]->info.gs.vertices_out;
>>> + ctx.abi.load_inputs = load_gs_input;
>>> } else if (shaders[i]->info.stage == MESA_SHADER_TESS_EVAL) {
>>> ctx.tes_primitive_mode =
>>> shaders[i]->info.tess.primitive_mode;
>>> } else if (shaders[i]->info.stage == MESA_SHADER_VERTEX) {
>>> if (shader_info->info.vs.needs_instance_id) {
>>> ctx.shader_info->vs.vgpr_comp_cnt =
>>> MAX2(3, ctx.shader_info->vs.vgpr_comp_cnt);
>>> }
>>> } else if (shaders[i]->info.stage == MESA_SHADER_FRAGMENT) {
>>> shader_info->fs.can_discard =
>>> shaders[i]->info.fs.uses_discard;
>>> }
>>> diff --git a/src/amd/common/ac_shader_abi.h
>>> b/src/amd/common/ac_shader_abi.h
>>> index 27586d0212..6ba1a51e07 100644
>>> --- a/src/amd/common/ac_shader_abi.h
>>> +++ b/src/amd/common/ac_shader_abi.h
>>> @@ -19,20 +19,22 @@
>>> * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
>>> TORT OR
>>> * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
>>> SOFTWARE OR THE
>>> * USE OR OTHER DEALINGS IN THE SOFTWARE.
>>> */
>>> #ifndef AC_SHADER_ABI_H
>>> #define AC_SHADER_ABI_H
>>> #include <llvm-c/Core.h>
>>> +#include "nir.h" >
>>> +
>>> enum ac_descriptor_type {
>>> AC_DESC_IMAGE,
>>> AC_DESC_FMASK,
>>> AC_DESC_SAMPLER,
>>> AC_DESC_BUFFER,
>>> };
>>> /* Document the shader ABI during compilation. This is what allows
>>> radeonsi and
>>> * radv to share a compiler backend.
>>> */
>>> @@ -55,20 +57,25 @@ struct ac_shader_abi {
>>> LLVMValueRef *inputs;
>>> void (*emit_outputs)(struct ac_shader_abi *abi,
>>> unsigned max_outputs,
>>> LLVMValueRef *addrs);
>>> void (*emit_vertex)(struct ac_shader_abi *abi,
>>> unsigned stream,
>>> LLVMValueRef *addrs);
>>> + LLVMValueRef (*load_inputs)(struct ac_shader_abi *abi,
>>> + nir_intrinsic_instr *instr,
>>> + unsigned vertex_index,
>>> + unsigned const_index);
>>> +
>>> LLVMValueRef (*load_ubo)(struct ac_shader_abi *abi,
>>> LLVMValueRef index);
>>> /**
>>> * Load the descriptor for the given buffer.
>>> *
>>> * \param buffer the buffer as presented in NIR: this is the
>>> descriptor
>>> * in Vulkan, and the buffer index in OpenGL/Gallium
>>> * \param write whether buffer contents will be written
>>> */
>>> LLVMValueRef (*load_ssbo)(struct ac_shader_abi *abi,
>>> diff --git a/src/gallium/drivers/radeonsi/si_shader.c
>>> b/src/gallium/drivers/radeonsi/si_shader.c
>>> index 06e3d0f9f1..746816d6a3 100644
>>> --- a/src/gallium/drivers/radeonsi/si_shader.c
>>> +++ b/src/gallium/drivers/radeonsi/si_shader.c
>>> @@ -5814,20 +5814,21 @@ static bool si_compile_tgsi_main(struct
>>> si_shader_context *ctx,
>>> if (shader->key.as_es) {
>>> ctx->abi.emit_outputs = si_llvm_emit_es_epilogue;
>>> bld_base->emit_epilogue = si_tgsi_emit_es_epilogue;
>>> } else {
>>> ctx->abi.emit_outputs = si_llvm_emit_vs_epilogue;
>>> bld_base->emit_epilogue = si_tgsi_emit_epilogue;
>>> }
>>> break;
>>> case PIPE_SHADER_GEOMETRY:
>>> bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = fetch_input_gs;
>>> + ctx->abi.load_inputs = si_nir_load_input_gs;
>>> ctx->abi.emit_vertex = si_llvm_emit_vertex;
>>> ctx->abi.emit_outputs = si_llvm_emit_gs_epilogue;
>>> bld_base->emit_epilogue = si_tgsi_emit_gs_epilogue;
>>> break;
>>> case PIPE_SHADER_FRAGMENT:
>>> ctx->load_input = declare_input_fs;
>>> ctx->abi.emit_outputs = si_llvm_return_fs_outputs;
>>> bld_base->emit_epilogue = si_tgsi_emit_epilogue;
>>> break;
>>> case PIPE_SHADER_COMPUTE:
>>> diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h
>>> b/src/gallium/drivers/radeonsi/si_shader_internal.h
>>> index 58413e9947..afb723d2ef 100644
>>> --- a/src/gallium/drivers/radeonsi/si_shader_internal.h
>>> +++ b/src/gallium/drivers/radeonsi/si_shader_internal.h
>>> @@ -332,11 +332,16 @@ void si_llvm_load_input_vs(
>>> struct si_shader_context *ctx,
>>> unsigned input_index,
>>> LLVMValueRef out[4]);
>>> void si_llvm_load_input_fs(
>>> struct si_shader_context *ctx,
>>> unsigned input_index,
>>> LLVMValueRef out[4]);
>>> bool si_nir_build_llvm(struct si_shader_context *ctx, struct
>>> nir_shader *nir);
>>> +LLVMValueRef si_nir_load_input_gs(struct ac_shader_abi *abi,
>>> + nir_intrinsic_instr *instr,
>>> + unsigned vertex_index,
>>> + unsigned const_index);
>>> +
>>> #endif
>>> diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c
>>> b/src/gallium/drivers/radeonsi/si_shader_nir.c
>>> index fca16f46cf..5b68ff2a07 100644
>>> --- a/src/gallium/drivers/radeonsi/si_shader_nir.c
>>> +++ b/src/gallium/drivers/radeonsi/si_shader_nir.c
>>> @@ -480,20 +480,46 @@ static void declare_nir_input_fs(struct
>>> si_shader_context *ctx,
>>> out[2] = LLVMGetParam(ctx->main_fn, SI_PARAM_POS_Z_FLOAT);
>>> out[3] = ac_build_fdiv(&ctx->ac, ctx->ac.f32_1,
>>> LLVMGetParam(ctx->main_fn, SI_PARAM_POS_W_FLOAT));
>>> return;
>>> }
>>> si_llvm_load_input_fs(ctx, *fs_attr_idx, out);
>>> (*fs_attr_idx)++;
>>> }
>>> +LLVMValueRef si_nir_load_input_gs(struct ac_shader_abi *abi,
>>> + nir_intrinsic_instr *instr,
>>> + unsigned vertex_index,
>>> + unsigned const_index)
>>> +{
>>> + struct si_shader_context *ctx = si_shader_context_from_abi(abi);
>>> +
>>> + nir_shader *nir = ctx->shader->selector->nir;
>>> + unsigned input_index = 0;
>>> + nir_foreach_variable(var, &nir->inputs) {
>>> + if (instr->variables[0]->var->data.location ==
>>> var->data.location)
>>> + break;
>>> + input_index++;
>>> + }
>>
>> This whole approach here seems wrong to me. The order in which input
>> variables are declared shouldn't matter.
>>
>> The TGSI code uses si_shader_io_get_unique_index, and I think the NIR
>> path should do the same (as does the NIR-path for radv, by the way...).
>
> This code does use si_shader_io_get_unique_index() the input_index is
> used as a lookup e.g info->input_semantic_name[input_index] so that we
> can get the parms to pass into si_shader_io_get_unique_index().
>
> TGSI stores this input_index for later use, it's a TGSIism that we need
> to work around.
Could we use var->data.driver_location like for vertex and fragment I/O?
Then the interface could be reduced to
load_input(abi, vertex_index, driver_location, const_index, component);
(not sure if const_index is still needed, actually)
Cheers,
Nicolai
>
>
>> That way, the dependency of ac_shader_abi on NIR can also be removed,
>> which would be preferable.
>>
>> Cheers,
>> Nicolai
>>
>>
>>> +
>>> + LLVMValueRef value[4];
>>> + unsigned comp = instr->variables[0]->var->data.location_frac;
>>> + for (unsigned i = comp; i < instr->num_components + comp; i++) {
>>> + value[i] = si_llvm_load_input_gs(&ctx->abi, input_index,
>>> vertex_index,
>>> + nir2llvmtype(ctx,
>>> instr->variables[0]->var->type),
>>> + i);
>>> + }
>>> +
>>> + return ac_build_varying_gather_values(&ctx->ac, value,
>>> instr->num_components, comp);
>>> +}
>>> +
>>> static LLVMValueRef
>>> si_nir_load_sampler_desc(struct ac_shader_abi *abi,
>>> unsigned descriptor_set, unsigned base_index,
>>> unsigned constant_index, LLVMValueRef dynamic_index,
>>> enum ac_descriptor_type desc_type, bool image,
>>> bool write)
>>> {
>>> struct si_shader_context *ctx = si_shader_context_from_abi(abi);
>>> LLVMBuilderRef builder = ctx->ac.builder;
>>> LLVMValueRef list = LLVMGetParam(ctx->main_fn,
>>> ctx->param_samplers_and_images);
>>>
>>
>>
--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
More information about the mesa-dev
mailing list