[Mesa-dev] [PATCH 17/17] radeonsi; Put GS ring buffer descriptors with streamout buffer descriptors
Marek Olšák
maraeo at gmail.com
Tue Jan 28 04:44:11 PST 2014
For the series:
Reviewed-by: Marek Olšák <marek.olsak at amd.com>
Marek
On Tue, Jan 28, 2014 at 10:46 AM, Michel Dänzer <michel at daenzer.net> wrote:
> From: Michel Dänzer <michel.daenzer at amd.com>
>
> Signed-off-by: Michel Dänzer <michel.daenzer at amd.com>
> ---
> src/gallium/drivers/radeonsi/si_descriptors.c | 93 ++++++++++++++++-----------
> src/gallium/drivers/radeonsi/si_pipe.h | 6 +-
> src/gallium/drivers/radeonsi/si_shader.c | 22 ++++---
> src/gallium/drivers/radeonsi/si_shader.h | 72 +++++++++++----------
> src/gallium/drivers/radeonsi/si_state.h | 6 +-
> 5 files changed, 115 insertions(+), 84 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
> index 2a54fcb..9078c6c 100644
> --- a/src/gallium/drivers/radeonsi/si_descriptors.c
> +++ b/src/gallium/drivers/radeonsi/si_descriptors.c
> @@ -516,7 +516,7 @@ void si_set_ring_buffer(struct pipe_context *ctx, uint shader, uint slot,
> unsigned element_size, unsigned index_stride)
> {
> struct si_context *sctx = (struct si_context *)ctx;
> - struct si_buffer_resources *buffers = &sctx->const_buffers[shader];
> + struct si_buffer_resources *buffers = &sctx->rw_buffers[shader];
>
> if (shader >= SI_NUM_SHADERS)
> return;
> @@ -608,9 +608,9 @@ static void si_set_streamout_targets(struct pipe_context *ctx,
> unsigned append_bitmask)
> {
> struct si_context *sctx = (struct si_context *)ctx;
> - struct si_buffer_resources *buffers = &sctx->streamout_buffers;
> + struct si_buffer_resources *buffers = &sctx->rw_buffers[PIPE_SHADER_VERTEX];
> unsigned old_num_targets = sctx->b.streamout.num_targets;
> - unsigned i;
> + unsigned i, bufidx;
>
> /* Streamout buffers must be bound in 2 places:
> * 1) in VGT by setting the VGT_STRMOUT registers
> @@ -622,12 +622,14 @@ static void si_set_streamout_targets(struct pipe_context *ctx,
>
> /* Set the shader resources.*/
> for (i = 0; i < num_targets; i++) {
> + bufidx = SI_RW_SO + i;
> +
> if (targets[i]) {
> struct pipe_resource *buffer = targets[i]->buffer;
> uint64_t va = r600_resource_va(ctx->screen, buffer);
>
> /* Set the descriptor. */
> - uint32_t *desc = buffers->desc_data[i];
> + uint32_t *desc = buffers->desc_data[bufidx];
> desc[0] = va;
> desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32);
> desc[2] = 0xffffffff;
> @@ -637,25 +639,29 @@ static void si_set_streamout_targets(struct pipe_context *ctx,
> S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
>
> /* Set the resource. */
> - pipe_resource_reference(&buffers->buffers[i], buffer);
> + pipe_resource_reference(&buffers->buffers[bufidx],
> + buffer);
> r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
> (struct r600_resource*)buffer,
> buffers->shader_usage);
> - buffers->desc.enabled_mask |= 1 << i;
> + buffers->desc.enabled_mask |= 1 << bufidx;
> } else {
> /* Clear the descriptor and unset the resource. */
> - memset(buffers->desc_data[i], 0, sizeof(uint32_t) * 4);
> - pipe_resource_reference(&buffers->buffers[i], NULL);
> - buffers->desc.enabled_mask &= ~(1 << i);
> + memset(buffers->desc_data[bufidx], 0,
> + sizeof(uint32_t) * 4);
> + pipe_resource_reference(&buffers->buffers[bufidx],
> + NULL);
> + buffers->desc.enabled_mask &= ~(1 << bufidx);
> }
> - buffers->desc.dirty_mask |= 1 << i;
> + buffers->desc.dirty_mask |= 1 << bufidx;
> }
> for (; i < old_num_targets; i++) {
> + bufidx = SI_RW_SO + i;
> /* Clear the descriptor and unset the resource. */
> - memset(buffers->desc_data[i], 0, sizeof(uint32_t) * 4);
> - pipe_resource_reference(&buffers->buffers[i], NULL);
> - buffers->desc.enabled_mask &= ~(1 << i);
> - buffers->desc.dirty_mask |= 1 << i;
> + memset(buffers->desc_data[bufidx], 0, sizeof(uint32_t) * 4);
> + pipe_resource_reference(&buffers->buffers[bufidx], NULL);
> + buffers->desc.enabled_mask &= ~(1 << bufidx);
> + buffers->desc.dirty_mask |= 1 << bufidx;
> }
>
> si_update_descriptors(sctx, &buffers->desc);
> @@ -712,25 +718,37 @@ static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource
> /* Vertex buffers. */
> /* Nothing to do. Vertex buffer bindings are updated before every draw call. */
>
> - /* Streamout buffers. */
> - for (i = 0; i < sctx->streamout_buffers.num_buffers; i++) {
> - if (sctx->streamout_buffers.buffers[i] == buf) {
> - /* Update the descriptor. */
> - si_desc_reset_buffer_offset(ctx, sctx->streamout_buffers.desc_data[i],
> - old_va, buf);
> + /* Read/Write buffers. */
> + for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
> + struct si_buffer_resources *buffers = &sctx->rw_buffers[shader];
> + bool found = false;
> + uint32_t mask = buffers->desc.enabled_mask;
>
> - r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
> - (struct r600_resource*)buf,
> - sctx->streamout_buffers.shader_usage);
> - sctx->streamout_buffers.desc.dirty_mask |= 1 << i;
> - si_update_descriptors(sctx, &sctx->streamout_buffers.desc);
> -
> - /* Update the streamout state. */
> - if (sctx->b.streamout.begin_emitted) {
> - r600_emit_streamout_end(&sctx->b);
> + while (mask) {
> + i = u_bit_scan(&mask);
> + if (buffers->buffers[i] == buf) {
> + si_desc_reset_buffer_offset(ctx, buffers->desc_data[i],
> + old_va, buf);
> +
> + r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
> + rbuffer, buffers->shader_usage);
> +
> + buffers->desc.dirty_mask |= 1 << i;
> + found = true;
> +
> + if (i >= SI_RW_SO && shader == PIPE_SHADER_VERTEX) {
> + /* Update the streamout state. */
> + if (sctx->b.streamout.begin_emitted) {
> + r600_emit_streamout_end(&sctx->b);
> + }
> + sctx->b.streamout.append_bitmask =
> + sctx->b.streamout.enabled_mask;
> + r600_streamout_buffers_dirty(&sctx->b);
> + }
> }
> - sctx->b.streamout.append_bitmask = sctx->b.streamout.enabled_mask;
> - r600_streamout_buffers_dirty(&sctx->b);
> + }
> + if (found) {
> + si_update_descriptors(sctx, &buffers->desc);
> }
> }
>
> @@ -936,17 +954,20 @@ void si_init_all_descriptors(struct si_context *sctx)
> for (i = 0; i < SI_NUM_SHADERS; i++) {
> si_init_buffer_resources(sctx, &sctx->const_buffers[i],
> NUM_CONST_BUFFERS, i, SI_SGPR_CONST,
> + RADEON_USAGE_READ);
> + si_init_buffer_resources(sctx, &sctx->rw_buffers[i],
> + i == PIPE_SHADER_VERTEX ?
> + SI_RW_SO + 4 : SI_RW_SO,
> + i, SI_SGPR_RW_BUFFERS,
> RADEON_USAGE_READWRITE);
>
> si_init_sampler_views(sctx, &sctx->samplers[i].views, i);
>
> sctx->atoms.const_buffers[i] = &sctx->const_buffers[i].desc.atom;
> + sctx->atoms.rw_buffers[i] = &sctx->rw_buffers[i].desc.atom;
> sctx->atoms.sampler_views[i] = &sctx->samplers[i].views.desc.atom;
> }
>
> - si_init_buffer_resources(sctx, &sctx->streamout_buffers, 4, PIPE_SHADER_VERTEX,
> - SI_SGPR_SO_BUFFER, RADEON_USAGE_WRITE);
> - sctx->atoms.streamout_buffers = &sctx->streamout_buffers.desc.atom;
>
> /* Set pipe_context functions. */
> sctx->b.b.set_constant_buffer = si_set_constant_buffer;
> @@ -961,9 +982,9 @@ void si_release_all_descriptors(struct si_context *sctx)
>
> for (i = 0; i < SI_NUM_SHADERS; i++) {
> si_release_buffer_resources(&sctx->const_buffers[i]);
> + si_release_buffer_resources(&sctx->rw_buffers[i]);
> si_release_sampler_views(&sctx->samplers[i].views);
> }
> - si_release_buffer_resources(&sctx->streamout_buffers);
> }
>
> void si_all_descriptors_begin_new_cs(struct si_context *sctx)
> @@ -972,7 +993,7 @@ void si_all_descriptors_begin_new_cs(struct si_context *sctx)
>
> for (i = 0; i < SI_NUM_SHADERS; i++) {
> si_buffer_resources_begin_new_cs(sctx, &sctx->const_buffers[i]);
> + si_buffer_resources_begin_new_cs(sctx, &sctx->rw_buffers[i]);
> si_sampler_views_begin_new_cs(sctx, &sctx->samplers[i].views);
> }
> - si_buffer_resources_begin_new_cs(sctx, &sctx->streamout_buffers);
> }
> diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
> index d63a52b..f97feb0 100644
> --- a/src/gallium/drivers/radeonsi/si_pipe.h
> +++ b/src/gallium/drivers/radeonsi/si_pipe.h
> @@ -78,6 +78,8 @@ struct si_surface {
>
> #define SI_NUM_SHADERS (PIPE_SHADER_GEOMETRY+1)
>
> +#define SI_RW_SO 2 /* Streamout buffer descriptors after ring buffers */
> +
> struct si_context {
> struct r600_common_context b;
> struct blitter_context *blitter;
> @@ -93,8 +95,8 @@ struct si_context {
> struct {
> /* The order matters. */
> struct r600_atom *const_buffers[SI_NUM_SHADERS];
> + struct r600_atom *rw_buffers[SI_NUM_SHADERS];
> struct r600_atom *sampler_views[SI_NUM_SHADERS];
> - struct r600_atom *streamout_buffers;
> /* Caches must be flushed after resource descriptors are
> * updated in memory. */
> struct r600_atom *cache_flush;
> @@ -120,7 +122,7 @@ struct si_context {
> unsigned sprite_coord_enable;
> unsigned export_16bpc;
> struct si_buffer_resources const_buffers[SI_NUM_SHADERS];
> - struct si_buffer_resources streamout_buffers;
> + struct si_buffer_resources rw_buffers[SI_NUM_SHADERS];
> struct si_textures_info samplers[SI_NUM_SHADERS];
> struct r600_resource *border_color_table;
> unsigned border_color_offset;
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
> index 5b95c11..54270cd 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -319,7 +319,8 @@ static LLVMValueRef fetch_input_gs(
> 4);
>
> /* Load the ESGS ring resource descriptor */
> - t_list_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_CONST);
> + t_list_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
> + SI_PARAM_RW_BUFFERS);
> t_list = build_indexed_load(si_shader_ctx, t_list_ptr,
> lp_build_const_int32(gallivm, SI_RING_ESGS));
>
> @@ -1202,7 +1203,8 @@ static void si_llvm_emit_es_epilogue(struct lp_build_tgsi_context * bld_base)
> }
>
> /* Load the ESGS ring resource descriptor */
> - t_list_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_CONST);
> + t_list_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
> + SI_PARAM_RW_BUFFERS);
> t_list = build_indexed_load(si_shader_ctx, t_list_ptr,
> lp_build_const_int32(gallivm, SI_RING_ESGS));
>
> @@ -1910,7 +1912,8 @@ static void si_llvm_emit_vertex(
> int i;
>
> /* Load the GSVS ring resource descriptor */
> - t_list_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_CONST);
> + t_list_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
> + SI_PARAM_RW_BUFFERS);
> t_list = build_indexed_load(si_shader_ctx, t_list_ptr,
> lp_build_const_int32(gallivm, SI_RING_GSVS));
>
> @@ -2038,7 +2041,7 @@ static void create_function(struct si_shader_context *si_shader_ctx)
> struct lp_build_tgsi_context *bld_base = &si_shader_ctx->radeon_bld.soa.bld_base;
> struct gallivm_state *gallivm = bld_base->base.gallivm;
> struct si_pipe_shader *shader = si_shader_ctx->shader;
> - LLVMTypeRef params[21], f32, i8, i32, v2i32, v3i32;
> + LLVMTypeRef params[SI_NUM_PARAMS], f32, i8, i32, v2i32, v3i32;
> unsigned i, last_sgpr, num_params;
>
> i8 = LLVMInt8TypeInContext(gallivm->context);
> @@ -2049,6 +2052,8 @@ static void create_function(struct si_shader_context *si_shader_ctx)
>
> params[SI_PARAM_CONST] = LLVMPointerType(
> LLVMArrayType(LLVMVectorType(i8, 16), NUM_CONST_BUFFERS), CONST_ADDR_SPACE);
> + params[SI_PARAM_RW_BUFFERS] = params[SI_PARAM_CONST];
> +
> /* We assume at most 16 textures per program at the moment.
> * This need probably need to be changed to support bindless textures */
> params[SI_PARAM_SAMPLER] = LLVMPointerType(
> @@ -2059,7 +2064,6 @@ static void create_function(struct si_shader_context *si_shader_ctx)
> switch (si_shader_ctx->type) {
> case TGSI_PROCESSOR_VERTEX:
> params[SI_PARAM_VERTEX_BUFFER] = params[SI_PARAM_CONST];
> - params[SI_PARAM_SO_BUFFER] = params[SI_PARAM_CONST];
> params[SI_PARAM_START_INSTANCE] = i32;
> num_params = SI_PARAM_START_INSTANCE+1;
> if (shader->key.vs.as_es) {
> @@ -2257,12 +2261,13 @@ static void preload_streamout_buffers(struct si_shader_context *si_shader_ctx)
> return;
>
> LLVMValueRef buf_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
> - SI_PARAM_SO_BUFFER);
> + SI_PARAM_RW_BUFFERS);
>
> /* Load the resources, we rely on the code sinking to do the rest */
> for (i = 0; i < 4; ++i) {
> if (si_shader_ctx->shader->selector->so.stride[i]) {
> - LLVMValueRef offset = lp_build_const_int32(gallivm, i);
> + LLVMValueRef offset = lp_build_const_int32(gallivm,
> + SI_RW_SO + i);
>
> si_shader_ctx->so_buffers[i] = build_indexed_load(si_shader_ctx, buf_ptr, offset);
> }
> @@ -2371,7 +2376,8 @@ static int si_generate_gs_copy_shader(struct si_context *sctx,
> preload_streamout_buffers(si_shader_ctx);
>
> /* Load the GSVS ring resource descriptor */
> - t_list_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_CONST);
> + t_list_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
> + SI_PARAM_RW_BUFFERS);
> t_list = build_indexed_load(si_shader_ctx, t_list_ptr,
> lp_build_const_int32(gallivm, SI_RING_GSVS));
>
> diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
> index 63c19ec..d667baf 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.h
> +++ b/src/gallium/drivers/radeonsi/si_shader.h
> @@ -34,23 +34,23 @@
> #define SI_SGPR_CONST 0
> #define SI_SGPR_SAMPLER 2
> #define SI_SGPR_RESOURCE 4
> -#define SI_SGPR_VERTEX_BUFFER 6 /* VS only */
> -#define SI_SGPR_SO_BUFFER 8 /* VS only, stream-out */
> +#define SI_SGPR_RW_BUFFERS 6 /* rings (& stream-out, VS only) */
> +#define SI_SGPR_VERTEX_BUFFER 8 /* VS only */
> #define SI_SGPR_START_INSTANCE 10 /* VS only */
> -#define SI_SGPR_ALPHA_REF 6 /* PS only */
> +#define SI_SGPR_ALPHA_REF 8 /* PS only */
>
> #define SI_VS_NUM_USER_SGPR 11
> -#define SI_GS_NUM_USER_SGPR 6
> -#define SI_PS_NUM_USER_SGPR 7
> +#define SI_GS_NUM_USER_SGPR 8
> +#define SI_PS_NUM_USER_SGPR 9
>
> /* LLVM function parameter indices */
> #define SI_PARAM_CONST 0
> #define SI_PARAM_SAMPLER 1
> #define SI_PARAM_RESOURCE 2
> +#define SI_PARAM_RW_BUFFERS 3
>
> /* VS only parameters */
> -#define SI_PARAM_VERTEX_BUFFER 3
> -#define SI_PARAM_SO_BUFFER 4
> +#define SI_PARAM_VERTEX_BUFFER 4
> #define SI_PARAM_START_INSTANCE 5
> /* the other VS parameters are assigned dynamically */
>
> @@ -58,36 +58,38 @@
> #define SI_PARAM_ES2GS_OFFSET 6
>
> /* GS only parameters */
> -#define SI_PARAM_GS2VS_OFFSET 3
> -#define SI_PARAM_GS_WAVE_ID 4
> -#define SI_PARAM_VTX0_OFFSET 5
> -#define SI_PARAM_VTX1_OFFSET 6
> -#define SI_PARAM_PRIMITIVE_ID 7
> -#define SI_PARAM_VTX2_OFFSET 8
> -#define SI_PARAM_VTX3_OFFSET 9
> -#define SI_PARAM_VTX4_OFFSET 10
> -#define SI_PARAM_VTX5_OFFSET 11
> -#define SI_PARAM_GS_INSTANCE_ID 12
> +#define SI_PARAM_GS2VS_OFFSET 4
> +#define SI_PARAM_GS_WAVE_ID 5
> +#define SI_PARAM_VTX0_OFFSET 6
> +#define SI_PARAM_VTX1_OFFSET 7
> +#define SI_PARAM_PRIMITIVE_ID 8
> +#define SI_PARAM_VTX2_OFFSET 9
> +#define SI_PARAM_VTX3_OFFSET 10
> +#define SI_PARAM_VTX4_OFFSET 11
> +#define SI_PARAM_VTX5_OFFSET 12
> +#define SI_PARAM_GS_INSTANCE_ID 13
>
> /* PS only parameters */
> -#define SI_PARAM_ALPHA_REF 3
> -#define SI_PARAM_PRIM_MASK 4
> -#define SI_PARAM_PERSP_SAMPLE 5
> -#define SI_PARAM_PERSP_CENTER 6
> -#define SI_PARAM_PERSP_CENTROID 7
> -#define SI_PARAM_PERSP_PULL_MODEL 8
> -#define SI_PARAM_LINEAR_SAMPLE 9
> -#define SI_PARAM_LINEAR_CENTER 10
> -#define SI_PARAM_LINEAR_CENTROID 11
> -#define SI_PARAM_LINE_STIPPLE_TEX 12
> -#define SI_PARAM_POS_X_FLOAT 13
> -#define SI_PARAM_POS_Y_FLOAT 14
> -#define SI_PARAM_POS_Z_FLOAT 15
> -#define SI_PARAM_POS_W_FLOAT 16
> -#define SI_PARAM_FRONT_FACE 17
> -#define SI_PARAM_ANCILLARY 18
> -#define SI_PARAM_SAMPLE_COVERAGE 19
> -#define SI_PARAM_POS_FIXED_PT 20
> +#define SI_PARAM_ALPHA_REF 4
> +#define SI_PARAM_PRIM_MASK 5
> +#define SI_PARAM_PERSP_SAMPLE 6
> +#define SI_PARAM_PERSP_CENTER 7
> +#define SI_PARAM_PERSP_CENTROID 8
> +#define SI_PARAM_PERSP_PULL_MODEL 9
> +#define SI_PARAM_LINEAR_SAMPLE 10
> +#define SI_PARAM_LINEAR_CENTER 11
> +#define SI_PARAM_LINEAR_CENTROID 12
> +#define SI_PARAM_LINE_STIPPLE_TEX 13
> +#define SI_PARAM_POS_X_FLOAT 14
> +#define SI_PARAM_POS_Y_FLOAT 15
> +#define SI_PARAM_POS_Z_FLOAT 16
> +#define SI_PARAM_POS_W_FLOAT 17
> +#define SI_PARAM_FRONT_FACE 18
> +#define SI_PARAM_ANCILLARY 19
> +#define SI_PARAM_SAMPLE_COVERAGE 20
> +#define SI_PARAM_POS_FIXED_PT 21
> +
> +#define SI_NUM_PARAMS (SI_PARAM_POS_FIXED_PT + 1)
>
> struct si_shader_input {
> unsigned name;
> diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
> index f7082f5..6922c88 100644
> --- a/src/gallium/drivers/radeonsi/si_state.h
> +++ b/src/gallium/drivers/radeonsi/si_state.h
> @@ -115,10 +115,10 @@ union si_state {
> #define NUM_SAMPLER_STATES NUM_TEX_UNITS
>
> #define NUM_PIPE_CONST_BUFFERS 16
> -#define SI_RING_ESGS 17
> -#define SI_RING_GSVS 18
> -#define NUM_CONST_BUFFERS (SI_RING_GSVS + 1)
> +#define NUM_CONST_BUFFERS (NUM_PIPE_CONST_BUFFERS + 1)
>
> +#define SI_RING_ESGS 0
> +#define SI_RING_GSVS 1
>
> /* This represents resource descriptors in memory, such as buffer resources,
> * image resources, and sampler states.
> --
> 1.8.5.3
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list