[Mesa-dev] [PATCH 4/4] radeonsi/nir: gather buffers declared more accurately and use const fast path

Timothy Arceri tarceri at itsqueeze.com
Tue Mar 27 05:23:40 UTC 2018


On 27/03/18 15:19, Timothy Arceri wrote:
> For now we skip SI && HAVE_LLVM < 0x0600 for simplicity. We also skip
> setting the more accurate masks for some builtin uniforms for now as
> it causes some piglit regressions.
> ---
>   src/gallium/drivers/radeonsi/si_shader.c     |  8 +++
>   src/gallium/drivers/radeonsi/si_shader_nir.c | 82 ++++++++++++++++++++++++++--
>   2 files changed, 84 insertions(+), 6 deletions(-)
> 
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
> index 62cb7ea7eb5..9a12f9ee8f2 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -2377,8 +2377,16 @@ static LLVMValueRef load_const_buffer_desc(struct si_shader_context *ctx, int i)
>   static LLVMValueRef load_ubo(struct ac_shader_abi *abi, LLVMValueRef index)
>   {
>   	struct si_shader_context *ctx = si_shader_context_from_abi(abi);
> +	struct si_shader_selector *sel = ctx->shader->selector;
> +
>   	LLVMValueRef ptr = LLVMGetParam(ctx->main_fn, ctx->param_const_and_shader_buffers);
>   
> +	if (sel->info.const_buffers_declared == 1 &&
> +	    sel->info.shader_buffers_declared == 0 &&
> +	    !(ctx->screen->info.chip_class == SI && HAVE_LLVM < 0x0600)) {
> +		return load_const_buffer_desc_fast_path(ctx);
> +	}
> +
>   	index = si_llvm_bound_index(ctx, index, ctx->num_const_buffers);
>   	index = LLVMBuildAdd(ctx->ac.builder, index,
>   			     LLVMConstInt(ctx->i32, SI_NUM_SHADER_BUFFERS, 0), "");
> diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c b/src/gallium/drivers/radeonsi/si_shader_nir.c
> index 52950668714..595f376f6a2 100644
> --- a/src/gallium/drivers/radeonsi/si_shader_nir.c
> +++ b/src/gallium/drivers/radeonsi/si_shader_nir.c
> @@ -611,23 +611,91 @@ void si_nir_scan_shader(const struct nir_shader *nir,
>   
>   	info->num_outputs = num_outputs;
>   
> +	struct set *ubo_set = _mesa_set_create(NULL, _mesa_hash_pointer,
> +					       _mesa_key_pointer_equal);
> +
> +	unsigned ubo_idx = 1;
>   	nir_foreach_variable(variable, &nir->uniforms) {
>   		const struct glsl_type *type = variable->type;
>   		enum glsl_base_type base_type =
>   			glsl_get_base_type(glsl_without_array(type));
>   		unsigned aoa_size = MAX2(1, glsl_get_aoa_size(type));
>   
> +		/* Gather buffers declared bitmasks. Note: radeonsi doesn't
> +		 * really use the mask (other than ubo_idx == 1 for regular
> +		 * uniforms) its really only used for getting the buffer count
> +		 * so we don't need to worry about the ordering.
> +		 */
> +		if (variable->interface_type != NULL) {
> +			if (variable->data.mode == nir_var_uniform) {
> +
> +				unsigned block_size;
> +				if (base_type != GLSL_TYPE_INTERFACE) {
> +					struct set_entry *entry =
> +						_mesa_set_search(ubo_set, variable->interface_type);
> +
> +					/* Check if we have already processed
> +					 * a member from this ubo.
> +					 */
> +					if (entry)
> +						continue;
> +
> +					block_size = 1;
> +				} else {
> +					block_size = aoa_size;
> +				}
> +
> +				info->const_buffers_declared |= u_bit_consecutive(ubo_idx, block_size);
> +				ubo_idx += block_size;
> +
> +				_mesa_set_add(ubo_set, variable->interface_type);
> +			}
> +
> +			if (variable->data.mode == nir_var_shader_storage) {
> +				/* TODO: make this more accurate */
> +				info->shader_buffers_declared =
> +					u_bit_consecutive(0, SI_NUM_SHADER_BUFFERS);
> +			}
> +
> +			continue;
> +		}
> +
>   		/* We rely on the fact that nir_lower_samplers_as_deref has
>   		 * eliminated struct dereferences.
>   		 */
> -		if (base_type == GLSL_TYPE_SAMPLER)
> +		if (base_type == GLSL_TYPE_SAMPLER) {
>   			info->samplers_declared |=
>   				u_bit_consecutive(variable->data.binding, aoa_size);
> -		else if (base_type == GLSL_TYPE_IMAGE)
> +
> +			if (variable->data.bindless)
> +				info->const_buffers_declared |= 1;

Whoops this if should also do:

				info->const_file_max[0] +=
					glsl_count_attribute_slots(type, false);


> +		} else if (base_type == GLSL_TYPE_IMAGE) {
>   			info->images_declared |=
>   				u_bit_consecutive(variable->data.binding, aoa_size);
> +
> +			if (variable->data.bindless)
> +				info->const_buffers_declared |= 1;


As should this one. These are fixed locally.


> +		} else if (base_type != GLSL_TYPE_ATOMIC_UINT) {
> +			if (strncmp(variable->name, "state.", 6) == 0) {
> +				/* FIXME: figure out why piglit tests with builtin
> +				 * uniforms are failing without this.
> +				 */
> +				info->const_buffers_declared =
> +					u_bit_consecutive(0, SI_NUM_CONST_BUFFERS);
> +			} else {
> +				info->const_buffers_declared |= 1;
> +				info->const_file_max[0] +=
> +					glsl_count_attribute_slots(type, false);
> +			}
> +		}
>   	}
>   
> +	_mesa_set_destroy(ubo_set, NULL);
> +
> +	/* This is the max index not max count so we adjust it here */
> +	if (info->const_file_max[0] != 0)
> +		info->const_file_max[0] -= 1;
> +
>   	info->num_written_clipdistance = nir->info.clip_distance_array_size;
>   	info->num_written_culldistance = nir->info.cull_distance_array_size;
>   	info->clipdist_writemask = u_bit_consecutive(0, info->num_written_clipdistance);
> @@ -636,10 +704,6 @@ void si_nir_scan_shader(const struct nir_shader *nir,
>   	if (info->processor == PIPE_SHADER_FRAGMENT)
>   		info->uses_kill = nir->info.fs.uses_discard;
>   
> -	/* TODO make this more accurate */
> -	info->const_buffers_declared = u_bit_consecutive(0, SI_NUM_CONST_BUFFERS);
> -	info->shader_buffers_declared = u_bit_consecutive(0, SI_NUM_SHADER_BUFFERS);
> -
>   	func = (struct nir_function *)exec_list_get_head_const(&nir->functions);
>   	nir_foreach_block(block, func->impl) {
>   		nir_foreach_instr(instr, block)
> @@ -654,6 +718,12 @@ void si_nir_scan_shader(const struct nir_shader *nir,
>   void
>   si_lower_nir(struct si_shader_selector* sel)
>   {
> +	/* Disable const buffer fast path for old LLVM versions */
> +	if (sel->screen->info.chip_class == SI && HAVE_LLVM < 0x0600) {
> +		sel->info.const_buffers_declared = u_bit_consecutive(0, SI_NUM_CONST_BUFFERS);
> +		sel->info.shader_buffers_declared = u_bit_consecutive(0, SI_NUM_SHADER_BUFFERS);
> +	}
> +
>   	/* Adjust the driver location of inputs and outputs. The state tracker
>   	 * interprets them as slots, while the ac/nir backend interprets them
>   	 * as individual components.
> 


More information about the mesa-dev mailing list