[Mesa-dev] [PATCH 17/20] ac: add si_nir_load_input_gs() to the abi

Wed Nov 15 10:56:34 UTC 2017

On 10.11.2017 04:13, Timothy Arceri wrote:
> ---
>   src/amd/common/ac_nir_to_llvm.c                   | 24 ++++++++++++---------
>   src/amd/common/ac_shader_abi.h                    |  7 ++++++
>   src/gallium/drivers/radeonsi/si_shader.c          |  1 +
>   src/gallium/drivers/radeonsi/si_shader_internal.h |  5 +++++
>   src/gallium/drivers/radeonsi/si_shader_nir.c      | 26 +++++++++++++++++++++++
>   5 files changed, 53 insertions(+), 10 deletions(-)
> 
> diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
> index 158e954fa8..483dd52b36 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -2854,32 +2854,31 @@ load_tes_input(struct nir_to_llvm_context *ctx,
>   	buf_addr = LLVMBuildAdd(ctx->builder, buf_addr, comp_offset, "");
>   
>   	result = ac_build_buffer_load(&ctx->ac, ctx->hs_ring_tess_offchip, instr->num_components, NULL,
>   				      buf_addr, ctx->oc_lds, is_compact ? (4 * const_index) : 0, 1, 0, true, false);
>   	result = trim_vector(&ctx->ac, result, instr->num_components);
>   	result = LLVMBuildBitCast(ctx->builder, result, get_def_type(ctx->nir, &instr->dest.ssa), "");
>   	return result;
>   }
>   
>   static LLVMValueRef
> -load_gs_input(struct nir_to_llvm_context *ctx,
> -	      nir_intrinsic_instr *instr)
> +load_gs_input(struct ac_shader_abi *abi,
> +	      nir_intrinsic_instr *instr,
> +	      unsigned vertex_index,
> +	      unsigned const_index)
>   {
> -	LLVMValueRef indir_index, vtx_offset;
> -	unsigned const_index;
> +	struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi);
> +	LLVMValueRef vtx_offset;
>   	LLVMValueRef args[9];
>   	unsigned param, vtx_offset_param;
>   	LLVMValueRef value[4], result;
> -	unsigned vertex_index;
> -	get_deref_offset(ctx->nir, instr->variables[0],
> -			 false, &vertex_index, NULL,
> -			 &const_index, &indir_index);
> +
>   	vtx_offset_param = vertex_index;
>   	assert(vtx_offset_param < 6);
>   	vtx_offset = LLVMBuildMul(ctx->builder, ctx->gs_vtx_offset[vtx_offset_param],
>   				  LLVMConstInt(ctx->ac.i32, 4, false), "");
>   
>   	param = shader_io_get_unique_index(instr->variables[0]->var->data.location);
>   
>   	unsigned comp = instr->variables[0]->var->data.location_frac;
>   	for (unsigned i = comp; i < instr->num_components + comp; i++) {
>   		if (ctx->ac.chip_class >= GFX9) {
> @@ -2966,21 +2965,26 @@ static LLVMValueRef visit_load_var(struct ac_nir_context *ctx,
>   	if (instr->dest.ssa.bit_size == 64)
>   		ve *= 2;
>   
>   	switch (instr->variables[0]->var->data.mode) {
>   	case nir_var_shader_in:
>   		if (ctx->stage == MESA_SHADER_TESS_CTRL)
>   			return load_tcs_input(ctx->nctx, instr);
>   		if (ctx->stage == MESA_SHADER_TESS_EVAL)
>   			return load_tes_input(ctx->nctx, instr);
>   		if (ctx->stage == MESA_SHADER_GEOMETRY) {
> -			return load_gs_input(ctx->nctx, instr);
> +				LLVMValueRef indir_index;
> +				unsigned const_index, vertex_index;
> +				get_deref_offset(ctx, instr->variables[0],
> +						 false, &vertex_index, NULL,
> +						 &const_index, &indir_index);

The indentation looks wrong here.

> +			return ctx->abi->load_inputs(ctx->abi, instr, vertex_index, const_index);
>   		}
>   
>   		for (unsigned chan = comp; chan < ve + comp; chan++) {
>   			if (indir_index) {
>   				unsigned count = glsl_count_attribute_slots(
>   						instr->variables[0]->var->type,
>   						ctx->stage == MESA_SHADER_VERTEX);
>   				count -= chan / 4;
>   				LLVMValueRef tmp_vec = ac_build_gather_values_extended(
>   						&ctx->ac, ctx->abi->inputs + idx + chan, count,
> @@ -6489,22 +6493,22 @@ LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm,
>   
>   	for(int i = 0; i < shader_count; ++i) {
>   		ctx.stage = shaders[i]->info.stage;
>   		ctx.output_mask = 0;
>   		ctx.tess_outputs_written = 0;
>   		ctx.num_output_clips = shaders[i]->info.clip_distance_array_size;
>   		ctx.num_output_culls = shaders[i]->info.cull_distance_array_size;
>   
>   		if (shaders[i]->info.stage == MESA_SHADER_GEOMETRY) {
>   			ctx.gs_next_vertex = ac_build_alloca(&ctx.ac, ctx.ac.i32, "gs_next_vertex");
> -
>   			ctx.gs_max_out_vertices = shaders[i]->info.gs.vertices_out;
> +			ctx.abi.load_inputs = load_gs_input;
>   		} else if (shaders[i]->info.stage == MESA_SHADER_TESS_EVAL) {
>   			ctx.tes_primitive_mode = shaders[i]->info.tess.primitive_mode;
>   		} else if (shaders[i]->info.stage == MESA_SHADER_VERTEX) {
>   			if (shader_info->info.vs.needs_instance_id) {
>   				ctx.shader_info->vs.vgpr_comp_cnt =
>   					MAX2(3, ctx.shader_info->vs.vgpr_comp_cnt);
>   			}
>   		} else if (shaders[i]->info.stage == MESA_SHADER_FRAGMENT) {
>   			shader_info->fs.can_discard = shaders[i]->info.fs.uses_discard;
>   		}
> diff --git a/src/amd/common/ac_shader_abi.h b/src/amd/common/ac_shader_abi.h
> index 27586d0212..6ba1a51e07 100644
> --- a/src/amd/common/ac_shader_abi.h
> +++ b/src/amd/common/ac_shader_abi.h
> @@ -19,20 +19,22 @@
>    * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
>    * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
>    * USE OR OTHER DEALINGS IN THE SOFTWARE.
>    */
>   
>   #ifndef AC_SHADER_ABI_H
>   #define AC_SHADER_ABI_H
>   
>   #include <llvm-c/Core.h>
>   
> +#include "nir.h" >
> +
>   enum ac_descriptor_type {
>   	AC_DESC_IMAGE,
>   	AC_DESC_FMASK,
>   	AC_DESC_SAMPLER,
>   	AC_DESC_BUFFER,
>   };
>   
>   /* Document the shader ABI during compilation. This is what allows radeonsi and
>    * radv to share a compiler backend.
>    */
> @@ -55,20 +57,25 @@ struct ac_shader_abi {
>   	LLVMValueRef *inputs;
>   
>   	void (*emit_outputs)(struct ac_shader_abi *abi,
>   			     unsigned max_outputs,
>   			     LLVMValueRef *addrs);
>   
>   	void (*emit_vertex)(struct ac_shader_abi *abi,
>   			    unsigned stream,
>   			    LLVMValueRef *addrs);
>   
> +	LLVMValueRef (*load_inputs)(struct ac_shader_abi *abi,
> +				    nir_intrinsic_instr *instr,
> +				    unsigned vertex_index,
> +				    unsigned const_index);
> +
>   	LLVMValueRef (*load_ubo)(struct ac_shader_abi *abi, LLVMValueRef index);
>   
>   	/**
>   	 * Load the descriptor for the given buffer.
>   	 *
>   	 * \param buffer the buffer as presented in NIR: this is the descriptor
>   	 *               in Vulkan, and the buffer index in OpenGL/Gallium
>   	 * \param write whether buffer contents will be written
>   	 */
>   	LLVMValueRef (*load_ssbo)(struct ac_shader_abi *abi,
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
> index 06e3d0f9f1..746816d6a3 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -5814,20 +5814,21 @@ static bool si_compile_tgsi_main(struct si_shader_context *ctx,
>   		if (shader->key.as_es) {
>   			ctx->abi.emit_outputs = si_llvm_emit_es_epilogue;
>   			bld_base->emit_epilogue = si_tgsi_emit_es_epilogue;
>   		} else {
>   			ctx->abi.emit_outputs = si_llvm_emit_vs_epilogue;
>   			bld_base->emit_epilogue = si_tgsi_emit_epilogue;
>   		}
>   		break;
>   	case PIPE_SHADER_GEOMETRY:
>   		bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = fetch_input_gs;
> +		ctx->abi.load_inputs = si_nir_load_input_gs;
>   		ctx->abi.emit_vertex = si_llvm_emit_vertex;
>   		ctx->abi.emit_outputs = si_llvm_emit_gs_epilogue;
>   		bld_base->emit_epilogue = si_tgsi_emit_gs_epilogue;
>   		break;
>   	case PIPE_SHADER_FRAGMENT:
>   		ctx->load_input = declare_input_fs;
>   		ctx->abi.emit_outputs = si_llvm_return_fs_outputs;
>   		bld_base->emit_epilogue = si_tgsi_emit_epilogue;
>   		break;
>   	case PIPE_SHADER_COMPUTE:
> diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h
> index 58413e9947..afb723d2ef 100644
> --- a/src/gallium/drivers/radeonsi/si_shader_internal.h
> +++ b/src/gallium/drivers/radeonsi/si_shader_internal.h
> @@ -332,11 +332,16 @@ void si_llvm_load_input_vs(
>   	struct si_shader_context *ctx,
>   	unsigned input_index,
>   	LLVMValueRef out[4]);
>   void si_llvm_load_input_fs(
>   	struct si_shader_context *ctx,
>   	unsigned input_index,
>   	LLVMValueRef out[4]);
>   
>   bool si_nir_build_llvm(struct si_shader_context *ctx, struct nir_shader *nir);
>   
> +LLVMValueRef si_nir_load_input_gs(struct ac_shader_abi *abi,
> +				  nir_intrinsic_instr *instr,
> +				  unsigned vertex_index,
> +				  unsigned const_index);
> +
>   #endif
> diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c b/src/gallium/drivers/radeonsi/si_shader_nir.c
> index fca16f46cf..5b68ff2a07 100644
> --- a/src/gallium/drivers/radeonsi/si_shader_nir.c
> +++ b/src/gallium/drivers/radeonsi/si_shader_nir.c
> @@ -480,20 +480,46 @@ static void declare_nir_input_fs(struct si_shader_context *ctx,
>   		out[2] = LLVMGetParam(ctx->main_fn, SI_PARAM_POS_Z_FLOAT);
>   		out[3] = ac_build_fdiv(&ctx->ac, ctx->ac.f32_1,
>   				LLVMGetParam(ctx->main_fn, SI_PARAM_POS_W_FLOAT));
>   		return;
>   	}
>   
>   	si_llvm_load_input_fs(ctx, *fs_attr_idx, out);
>   	(*fs_attr_idx)++;
>   }
>   
> +LLVMValueRef si_nir_load_input_gs(struct ac_shader_abi *abi,
> +				  nir_intrinsic_instr *instr,
> +				  unsigned vertex_index,
> +				  unsigned const_index)
> +{
> +	struct si_shader_context *ctx = si_shader_context_from_abi(abi);
> +
> +	nir_shader *nir = ctx->shader->selector->nir;
> +	unsigned input_index = 0;
> +	nir_foreach_variable(var, &nir->inputs) {
> +		if (instr->variables[0]->var->data.location == var->data.location)
> +			break;
> +		input_index++;
> +	}

This whole approach here seems wrong to me. The order in which input 
variables are declared shouldn't matter.

The TGSI code uses si_shader_io_get_unique_index, and I think the NIR 
path should do the same (as does the NIR-path for radv, by the way...). 
That way, the dependency of ac_shader_abi on NIR can also be removed, 
which would be preferable.

Cheers,
Nicolai

> +
> +	LLVMValueRef value[4];
> +	unsigned comp = instr->variables[0]->var->data.location_frac;
> +	for (unsigned i = comp; i < instr->num_components + comp; i++) {
> +		value[i] = si_llvm_load_input_gs(&ctx->abi, input_index, vertex_index,
> +						 nir2llvmtype(ctx, instr->variables[0]->var->type),
> +						 i);
> +	}
> +
> +	return ac_build_varying_gather_values(&ctx->ac, value, instr->num_components, comp);
> +}
> +
>   static LLVMValueRef
>   si_nir_load_sampler_desc(struct ac_shader_abi *abi,
>   		         unsigned descriptor_set, unsigned base_index,
>   		         unsigned constant_index, LLVMValueRef dynamic_index,
>   		         enum ac_descriptor_type desc_type, bool image,
>   			 bool write)
>   {
>   	struct si_shader_context *ctx = si_shader_context_from_abi(abi);
>   	LLVMBuilderRef builder = ctx->ac.builder;
>   	LLVMValueRef list = LLVMGetParam(ctx->main_fn, ctx->param_samplers_and_images);
> 

-- 
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.