[Mesa-dev] [PATCH] r600g: merge the TXQ and BUFFER constant buffers

Glenn Kennard glenn.kennard at gmail.com
Wed Nov 26 15:39:13 PST 2014


On Mon, 24 Nov 2014 04:36:06 +0100, Dave Airlie <airlied at gmail.com> wrote:

> From: Dave Airlie <airlied at redhat.com>
>
> We are using 1 more buffer than we have, although in the future the
> driver should just end up using one buffer in total probably, this
> is a good first step, it merges the txq cube array and buffer info
> constants on r600 and evergreen.
>
> this also most likely breaks llvm backend, I've changed it,
> but it definitely needs fixes for this.
>
> this fixes a bunch of geom shader textureSize tests on rv635
> from gpu reset to pass.
>
> Signed-off-by: Dave Airlie <airlied at redhat.com>
> ---
>  src/gallium/drivers/r600/r600_llvm.c         |  3 +-
>  src/gallium/drivers/r600/r600_pipe.h         |  8 +--
>  src/gallium/drivers/r600/r600_shader.c       | 18 ++++--
>  src/gallium/drivers/r600/r600_state_common.c | 92  
> +++++++++++-----------------
>  4 files changed, 54 insertions(+), 67 deletions(-)
>
> diff --git a/src/gallium/drivers/r600/r600_llvm.c  
> b/src/gallium/drivers/r600/r600_llvm.c
> index c19693a..470c65f 100644
> --- a/src/gallium/drivers/r600/r600_llvm.c
> +++ b/src/gallium/drivers/r600/r600_llvm.c
> @@ -23,7 +23,6 @@
> #define CONSTANT_BUFFER_0_ADDR_SPACE 8
>  #define CONSTANT_BUFFER_1_ADDR_SPACE (CONSTANT_BUFFER_0_ADDR_SPACE +  
> R600_UCP_CONST_BUFFER)
> -#define CONSTANT_TXQ_BUFFER (CONSTANT_BUFFER_0_ADDR_SPACE +  
> R600_TXQ_CONST_BUFFER)
>  #define LLVM_R600_BUFFER_INFO_CONST_BUFFER \
>  	(CONSTANT_BUFFER_0_ADDR_SPACE + R600_BUFFER_INFO_CONST_BUFFER)
> @@ -690,7 +689,7 @@ static void llvm_emit_tex(
>  		if (emit_data->inst->Dst[0].Register.WriteMask & 4) {
>  			LLVMValueRef offset = lp_build_const_int32(bld_base->base.gallivm,  
> 0);
>  			LLVMValueRef ZLayer = LLVMBuildExtractElement(gallivm->builder,
> -				llvm_load_const_buffer(bld_base, offset, CONSTANT_TXQ_BUFFER),
> +				llvm_load_const_buffer(bld_base, offset,  
> LLVM_R600_BUFFER_INFO_CONST_BUFFER,
>  				lp_build_const_int32(gallivm, 0), "");

I don't see how llvm_emit_tex can possibly work in its current condition.  
This patch breaks it a little more...

> 			emit_data->output[0] = LLVMBuildInsertElement(gallivm->builder,  
> emit_data->output[0], ZLayer, lp_build_const_int32(gallivm, 2), "");
> diff --git a/src/gallium/drivers/r600/r600_pipe.h  
> b/src/gallium/drivers/r600/r600_pipe.h
> index 40b0328..e27e877 100644
> --- a/src/gallium/drivers/r600/r600_pipe.h
> +++ b/src/gallium/drivers/r600/r600_pipe.h
> @@ -44,14 +44,13 @@
>  #define R600_TRACE_CS_DWORDS		7
> #define R600_MAX_USER_CONST_BUFFERS 13
> -#define R600_MAX_DRIVER_CONST_BUFFERS 4
> +#define R600_MAX_DRIVER_CONST_BUFFERS 3
>  #define R600_MAX_CONST_BUFFERS (R600_MAX_USER_CONST_BUFFERS +  
> R600_MAX_DRIVER_CONST_BUFFERS)
> /* start driver buffers after user buffers */
>  #define R600_UCP_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS)
> -#define R600_TXQ_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 1)
> -#define R600_BUFFER_INFO_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 2)
> -#define R600_GS_RING_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 3)
> +#define R600_BUFFER_INFO_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 1)
> +#define R600_GS_RING_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 2)
>  /* Currently R600_MAX_CONST_BUFFERS is too large, the hardware only has  
> 16 buffers, but the driver is
>   * trying to use 17. Avoid accidentally aliasing with user UBOs for

The comment here needs updating, its should be down to 16 buffers total  
now ie within hardware limits.

D3D 11 does require 15 to be available for application use though so  
eventually all the driver buffers ought to be merged, but outside of scope  
of this patch.

> SAMPLE_POSITIONS by using an id<16.
>   * UCP/SAMPLE_POSITIONS are never accessed by same shader stage so they  
> can use the same id.
> @@ -316,7 +315,6 @@ struct r600_samplerview_state {
>  	uint32_t			dirty_mask;
>  	uint32_t			compressed_depthtex_mask; /* which textures are depth */
>  	uint32_t			compressed_colortex_mask;
> -	boolean                         dirty_txq_constants;
>  	boolean				dirty_buffer_constants;
>  };
> diff --git a/src/gallium/drivers/r600/r600_shader.c  
> b/src/gallium/drivers/r600/r600_shader.c
> index a772dee..61ff162 100644
> --- a/src/gallium/drivers/r600/r600_shader.c
> +++ b/src/gallium/drivers/r600/r600_shader.c
> @@ -5035,8 +5035,9 @@ static int r600_do_buffer_txq(struct  
> r600_shader_ctx *ctx)
>  	alu.op = ALU_OP1_MOV;
> 	if (ctx->bc->chip_class >= EVERGREEN) {
> -		alu.src[0].sel = 512 + (id / 4);
> -		alu.src[0].chan = id % 4;
> +		/* channel 0 or 2 of each word */
> +		alu.src[0].sel = 512 + (id / 2);
> +		alu.src[0].chan = (id % 2) * 2;
>  	} else {
>  		/* r600 we have them at channel 2 of the second dword */
>  		alu.src[0].sel = 512 + (id * 2) + 1;
> @@ -5697,9 +5698,16 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
>  		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
>  		alu.op = ALU_OP1_MOV;
> -		alu.src[0].sel = 512 + (id / 4);
> -		alu.src[0].kc_bank = R600_TXQ_CONST_BUFFER;
> -		alu.src[0].chan = id % 4;
> +		if (ctx->bc->chip_class >= EVERGREEN) {
> +			/* channel 1 or 3 of each word */
> +			alu.src[0].sel = 512 + (id / 2);
> +			alu.src[0].chan = ((id % 2) * 2) + 1;
> +		} else {
> +			/* r600 we have them at channel 2 of the second dword */
> +			alu.src[0].sel = 512 + (id * 2) + 1;
> +			alu.src[0].chan = 2;
> +		}
> +		alu.src[0].kc_bank = R600_BUFFER_INFO_CONST_BUFFER;
>  		tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
>  		alu.last = 1;
>  		r = r600_bytecode_add_alu(ctx->bc, &alu);
> diff --git a/src/gallium/drivers/r600/r600_state_common.c  
> b/src/gallium/drivers/r600/r600_state_common.c
> index c3f21cb..ff94454 100644
> --- a/src/gallium/drivers/r600/r600_state_common.c
> +++ b/src/gallium/drivers/r600/r600_state_common.c
> @@ -649,7 +649,6 @@ static void r600_set_sampler_views(struct  
> pipe_context *pipe, unsigned shader,
>  	dst->views.dirty_mask |= new_mask;
>  	dst->views.compressed_depthtex_mask &= dst->views.enabled_mask;
>  	dst->views.compressed_colortex_mask &= dst->views.enabled_mask;
> -	dst->views.dirty_txq_constants = TRUE;
>  	dst->views.dirty_buffer_constants = TRUE;
>  	r600_sampler_views_dirty(rctx, &dst->views);
> @@ -984,6 +983,7 @@ static void r600_set_sample_mask(struct pipe_context  
> *pipe, unsigned sample_mask
>   * then in the shader, we AND the 4 components with 0xffffffff or 0,
>   * then OR the alpha with the value given here.
>   * We use a 6th constant to store the txq buffer size in
> + * we use 7th slot for cube map

The comment neglects to tell what value is placed in this 7th slot?

>   */
>  static void r600_setup_buffer_constants(struct r600_context *rctx, int  
> shader_type)
>  {
> @@ -1022,6 +1022,7 @@ static void r600_setup_buffer_constants(struct  
> r600_context *rctx, int shader_ty
>  				samplers->buffer_constants[offset + 4] = 0;
> 			samplers->buffer_constants[offset + 5] =  
> samplers->views.views[i]->base.texture->width0 /  
> util_format_get_blocksize(samplers->views.views[i]->base.format);
> +			samplers->buffer_constants[offset + 6] =  
> samplers->views.views[i]->base.texture->array_size / 6;
>  		}
>  	}
> @@ -1048,12 +1049,16 @@ static void eg_setup_buffer_constants(struct  
> r600_context *rctx, int shader_type

Comment for this function is out of date and needs updating.

>  	samplers->views.dirty_buffer_constants = FALSE;
> 	bits = util_last_bit(samplers->views.enabled_mask);
> -	array_size = bits * sizeof(uint32_t) * 4;
> +	array_size = bits * 2 * sizeof(uint32_t) * 4;
>  	samplers->buffer_constants = realloc(samplers->buffer_constants,  
> array_size);
>  	memset(samplers->buffer_constants, 0, array_size);
> -	for (i = 0; i < bits; i++)
> -		if (samplers->views.enabled_mask & (1 << i))
> -		   samplers->buffer_constants[i] =  
> samplers->views.views[i]->base.texture->width0 /  
> util_format_get_blocksize(samplers->views.views[i]->base.format);
> +	for (i = 0; i < bits; i++) {
> +		if (samplers->views.enabled_mask & (1 << i)) {
> +			uint32_t offset = i * 2;
> +			samplers->buffer_constants[offset] =  
> samplers->views.views[i]->base.texture->width0 /  
> util_format_get_blocksize(samplers->views.views[i]->base.format);
> +			samplers->buffer_constants[offset + 1] =  
> samplers->views.views[i]->base.texture->array_size / 6;
> +		}
> +	}
> 	cb.buffer = NULL;
>  	cb.user_buffer = samplers->buffer_constants;
> @@ -1063,35 +1068,6 @@ static void eg_setup_buffer_constants(struct  
> r600_context *rctx, int shader_type
>  	pipe_resource_reference(&cb.buffer, NULL);
>  }
> -static void r600_setup_txq_cube_array_constants(struct r600_context  
> *rctx, int shader_type)
> -{
> -	struct r600_textures_info *samplers = &rctx->samplers[shader_type];
> -	int bits;
> -	uint32_t array_size;
> -	struct pipe_constant_buffer cb;
> -	int i;
> -
> -	if (!samplers->views.dirty_txq_constants)
> -		return;
> -
> -	samplers->views.dirty_txq_constants = FALSE;
> -
> -	bits = util_last_bit(samplers->views.enabled_mask);
> -	array_size = bits * sizeof(uint32_t) * 4;
> -	samplers->txq_constants = realloc(samplers->txq_constants, array_size);
> -	memset(samplers->txq_constants, 0, array_size);
> -	for (i = 0; i < bits; i++)
> -		if (samplers->views.enabled_mask & (1 << i))
> -			samplers->txq_constants[i] =  
> samplers->views.views[i]->base.texture->array_size / 6;
> -
> -	cb.buffer = NULL;
> -	cb.user_buffer = samplers->txq_constants;
> -	cb.buffer_offset = 0;
> -	cb.buffer_size = array_size;
> -	rctx->b.b.set_constant_buffer(&rctx->b.b, shader_type,  
> R600_TXQ_CONST_BUFFER, &cb);
> -	pipe_resource_reference(&cb.buffer, NULL);
> -}
> -
>  /* set sample xy locations as array of fragment shader constants */
>  void r600_set_sample_locations_constant_buffer(struct r600_context  
> *rctx)
>  {
> @@ -1175,7 +1151,7 @@ static bool r600_update_derived_state(struct  
> r600_context *rctx)
>  	struct pipe_context * ctx = (struct pipe_context*)rctx;
>  	bool ps_dirty = false, vs_dirty = false, gs_dirty = false;
>  	bool blend_disable;
> -
> +	bool need_buf_const;
>  	if (!rctx->blitter->running) {
>  		unsigned i;
> @@ -1296,29 +1272,35 @@ static bool r600_update_derived_state(struct  
> r600_context *rctx)
> 	/* on R600 we stuff masks + txq info into one constant buffer */
>  	/* on evergreen we only need a txq info one */
> -	if (rctx->b.chip_class < EVERGREEN) {
> -		if (rctx->ps_shader &&  
> rctx->ps_shader->current->shader.uses_tex_buffers)
> -			r600_setup_buffer_constants(rctx, PIPE_SHADER_FRAGMENT);
> -		if (rctx->vs_shader &&  
> rctx->vs_shader->current->shader.uses_tex_buffers)
> -			r600_setup_buffer_constants(rctx, PIPE_SHADER_VERTEX);
> -		if (rctx->gs_shader &&  
> rctx->gs_shader->current->shader.uses_tex_buffers)
> -			r600_setup_buffer_constants(rctx, PIPE_SHADER_GEOMETRY);
> -	} else {
> -		if (rctx->ps_shader &&  
> rctx->ps_shader->current->shader.uses_tex_buffers)
> -			eg_setup_buffer_constants(rctx, PIPE_SHADER_FRAGMENT);
> -		if (rctx->vs_shader &&  
> rctx->vs_shader->current->shader.uses_tex_buffers)
> -			eg_setup_buffer_constants(rctx, PIPE_SHADER_VERTEX);
> -		if (rctx->gs_shader &&  
> rctx->gs_shader->current->shader.uses_tex_buffers)
> -			eg_setup_buffer_constants(rctx, PIPE_SHADER_GEOMETRY);
> +	if (rctx->ps_shader) {
> +		need_buf_const = rctx->ps_shader->current->shader.uses_tex_buffers ||  
> rctx->ps_shader->current->shader.has_txq_cube_array_z_comp;
> +		if (need_buf_const) {
> +			if (rctx->b.chip_class < EVERGREEN)
> +				r600_setup_buffer_constants(rctx, PIPE_SHADER_FRAGMENT);
> +			else
> +				eg_setup_buffer_constants(rctx, PIPE_SHADER_FRAGMENT);
> +		}
>  	}
> +	if (rctx->vs_shader) {
> +		need_buf_const = rctx->vs_shader->current->shader.uses_tex_buffers ||  
> rctx->vs_shader->current->shader.has_txq_cube_array_z_comp;
> +		if (need_buf_const) {
> +			if (rctx->b.chip_class < EVERGREEN)
> +				r600_setup_buffer_constants(rctx, PIPE_SHADER_VERTEX);
> +			else
> +				eg_setup_buffer_constants(rctx, PIPE_SHADER_VERTEX);
> +		}
> +	}
> -	if (rctx->ps_shader &&  
> rctx->ps_shader->current->shader.has_txq_cube_array_z_comp)
> -		r600_setup_txq_cube_array_constants(rctx, PIPE_SHADER_FRAGMENT);
> -	if (rctx->vs_shader &&  
> rctx->vs_shader->current->shader.has_txq_cube_array_z_comp)
> -		r600_setup_txq_cube_array_constants(rctx, PIPE_SHADER_VERTEX);
> -	if (rctx->gs_shader &&  
> rctx->gs_shader->current->shader.has_txq_cube_array_z_comp)
> -		r600_setup_txq_cube_array_constants(rctx, PIPE_SHADER_GEOMETRY);
> +	if (rctx->gs_shader) {
> +		need_buf_const = rctx->gs_shader->current->shader.uses_tex_buffers ||  
> rctx->gs_shader->current->shader.has_txq_cube_array_z_comp;
> +		if (need_buf_const) {
> +			if (rctx->b.chip_class < EVERGREEN)
> +				r600_setup_buffer_constants(rctx, PIPE_SHADER_GEOMETRY);
> +			else
> +				eg_setup_buffer_constants(rctx, PIPE_SHADER_GEOMETRY);
> +		}
> +	}
> 	if (rctx->b.chip_class < EVERGREEN && rctx->ps_shader &&  
> rctx->vs_shader) {
>  		if (!r600_adjust_gprs(rctx)) {


Passes piglits on a Turks with no obvious regressions, so with nits above  
fixed, consider it
Reviewed-by: Glenn Kennard <glenn.kennard at gmail.com>


More information about the mesa-dev mailing list