[Mesa-dev] [PATCH 12/14] radeonsi/nir: add support for packed inputs

Wed Nov 29 11:53:23 UTC 2017

On 21.11.2017 04:37, Timothy Arceri wrote:
> Because NIR can create non vec4 variables when implementing component
> packing we need to make sure not to reprocess the same slot again.
> 
> Also we can drop the fs_attr_idx counter and just use driver_location.
> ---
>   src/gallium/drivers/radeonsi/si_shader_nir.c | 46 +++++++++++++++-------------
>   1 file changed, 25 insertions(+), 21 deletions(-)
> 
> diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c b/src/gallium/drivers/radeonsi/si_shader_nir.c
> index 5d82715f7a..ec748c9679 100644
> --- a/src/gallium/drivers/radeonsi/si_shader_nir.c
> +++ b/src/gallium/drivers/radeonsi/si_shader_nir.c
> @@ -437,46 +437,42 @@ si_lower_nir(struct si_shader_selector* sel)
>   
>   		NIR_PASS(progress, sel->nir, nir_opt_undef);
>   		NIR_PASS(progress, sel->nir, nir_opt_conditional_discard);
>   		if (sel->nir->options->max_unroll_iterations) {
>   			NIR_PASS(progress, sel->nir, nir_opt_loop_unroll, 0);
>   		}
>   	} while (progress);
>   }
>   
>   static void declare_nir_input_vs(struct si_shader_context *ctx,
> -				 struct nir_variable *variable, unsigned rel,
> +				 struct nir_variable *variable,
>   				 LLVMValueRef out[4])
>   {
> -	si_llvm_load_input_vs(ctx, variable->data.driver_location / 4 + rel, out);
> +	si_llvm_load_input_vs(ctx, variable->data.driver_location / 4, out);
>   }
>   
>   static void declare_nir_input_fs(struct si_shader_context *ctx,
> -				 struct nir_variable *variable, unsigned rel,
> -				 unsigned *fs_attr_idx,
> +				 struct nir_variable *variable,
> +				 unsigned input_index,
>   				 LLVMValueRef out[4])
>   {
> -	unsigned slot = variable->data.location + rel;
> -
> -	assert(variable->data.location >= VARYING_SLOT_VAR0 || rel == 0);
> -
> +	unsigned slot = variable->data.location;
>   	if (slot == VARYING_SLOT_POS) {
>   		out[0] = LLVMGetParam(ctx->main_fn, SI_PARAM_POS_X_FLOAT);
>   		out[1] = LLVMGetParam(ctx->main_fn, SI_PARAM_POS_Y_FLOAT);
>   		out[2] = LLVMGetParam(ctx->main_fn, SI_PARAM_POS_Z_FLOAT);
>   		out[3] = ac_build_fdiv(&ctx->ac, ctx->ac.f32_1,
>   				LLVMGetParam(ctx->main_fn, SI_PARAM_POS_W_FLOAT));
>   		return;
>   	}
>   
> -	si_llvm_load_input_fs(ctx, *fs_attr_idx, out);
> -	(*fs_attr_idx)++;
> +	si_llvm_load_input_fs(ctx, input_index, out);
>   }
>   
>   static LLVMValueRef
>   si_nir_load_sampler_desc(struct ac_shader_abi *abi,
>   		         unsigned descriptor_set, unsigned base_index,
>   		         unsigned constant_index, LLVMValueRef dynamic_index,
>   		         enum ac_descriptor_type desc_type, bool image,
>   			 bool write)
>   {
>   	struct si_shader_context *ctx = si_shader_context_from_abi(abi);
> @@ -516,39 +512,47 @@ si_nir_load_sampler_desc(struct ac_shader_abi *abi,
>   	index = LLVMBuildAdd(ctx->gallivm.builder, index,
>   			     LLVMConstInt(ctx->i32, SI_NUM_IMAGES / 2, 0), "");
>   
>   	return si_load_sampler_desc(ctx, list, index, desc_type);
>   }
>   
>   bool si_nir_build_llvm(struct si_shader_context *ctx, struct nir_shader *nir)
>   {
>   	struct tgsi_shader_info *info = &ctx->shader->selector->info;
>   
> -	unsigned fs_attr_idx = 0;
> +	uint64_t processed_outputs = 0;

s/processed_outputs/processed_inputs/

Cheers,
Nicolai


>   	nir_foreach_variable(variable, &nir->inputs) {
>   		unsigned attrib_count = glsl_count_attribute_slots(variable->type,
>   								   nir->info.stage == MESA_SHADER_VERTEX);
>   		unsigned input_idx = variable->data.driver_location;
>   
> -		for (unsigned i = 0; i < attrib_count; ++i) {
> -			LLVMValueRef data[4];
> +		assert(attrib_count == 1);
>   
> -			if (nir->info.stage == MESA_SHADER_VERTEX)
> -				declare_nir_input_vs(ctx, variable, i, data);
> -			else if (nir->info.stage == MESA_SHADER_FRAGMENT)
> -				declare_nir_input_fs(ctx, variable, i, &fs_attr_idx, data);
> +		LLVMValueRef data[4];
> +		unsigned loc = variable->data.location;
>   
> -			for (unsigned chan = 0; chan < 4; chan++) {
> -				ctx->inputs[input_idx + chan] =
> -					LLVMBuildBitCast(ctx->ac.builder, data[chan], ctx->ac.i32, "");
> -			}
> +		/* Packed components share the same location so skip
> +		 * them if we have already processed the location.
> +		 */
> +		if (processed_outputs & ((uint64_t)1 << loc))
> +			continue;
> +
> +		if (nir->info.stage == MESA_SHADER_VERTEX)
> +			declare_nir_input_vs(ctx, variable, data);
> +		else if (nir->info.stage == MESA_SHADER_FRAGMENT)
> +			declare_nir_input_fs(ctx, variable, input_idx / 4, data);
> +
> +		for (unsigned chan = 0; chan < 4; chan++) {
> +			ctx->inputs[input_idx + chan] =
> +				LLVMBuildBitCast(ctx->ac.builder, data[chan], ctx->ac.i32, "");
>   		}
> +		processed_outputs |= ((uint64_t)1 << loc);
>   	}
>   
>   	ctx->abi.inputs = &ctx->inputs[0];
>   	ctx->abi.load_sampler_desc = si_nir_load_sampler_desc;
>   	ctx->abi.clamp_shadow_reference = true;
>   
>   	ctx->num_samplers = util_last_bit(info->samplers_declared);
>   	ctx->num_images = util_last_bit(info->images_declared);
>   
>   	ac_nir_translate(&ctx->ac, &ctx->abi, nir, NULL);
> 


-- 
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.