[Mesa-dev] [PATCH 02/11] radeonsi: implement set_shader_buffers
Nicolai Hähnle
nhaehnle at gmail.com
Mon Mar 21 23:21:54 UTC 2016
From: Nicolai Hähnle <nicolai.haehnle at amd.com>
---
src/gallium/drivers/radeonsi/si_descriptors.c | 61 +++++++++++++-
src/gallium/drivers/radeonsi/si_pipe.h | 1 +
src/gallium/drivers/radeonsi/si_shader.c | 5 +-
src/gallium/drivers/radeonsi/si_shader.h | 114 +++++++++++++-------------
4 files changed, 123 insertions(+), 58 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
index c7c30bf..72bd50f 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -746,6 +746,55 @@ static void si_set_constant_buffer(struct pipe_context *ctx, uint shader, uint s
buffers->desc.list_dirty = true;
}
+/* SHADER BUFFERS */
+
+static void si_set_shader_buffers(struct pipe_context *ctx, unsigned shader,
+ unsigned start_slot, unsigned count,
+ struct pipe_shader_buffer *sbuffers)
+{
+ struct si_context *sctx = (struct si_context *)ctx;
+ struct si_buffer_resources *buffers = &sctx->shader_buffers[shader];
+ unsigned i;
+
+ assert(start_slot + count <= SI_NUM_SHADER_BUFFERS);
+
+ for (i = 0; i < count; ++i) {
+ struct pipe_shader_buffer *sbuffer = sbuffers ? &sbuffers[i] : NULL;
+ struct r600_resource *buf;
+ unsigned slot = start_slot + i;
+ uint32_t *desc = buffers->desc.list + slot * 4;
+ uint64_t va;
+
+ if (!sbuffer || !sbuffer->buffer) {
+ pipe_resource_reference(&buffers->buffers[slot], NULL);
+ memset(desc, 0, sizeof(uint32_t) * 4);
+ buffers->desc.enabled_mask &= ~(1llu << slot);
+ continue;
+ }
+
+ buf = (struct r600_resource *)sbuffer->buffer;
+ va = buf->gpu_address + sbuffer->buffer_offset;
+
+ desc[0] = va;
+ desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
+ S_008F04_STRIDE(0);
+ desc[2] = sbuffer->buffer_size;
+ desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
+ S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+ S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
+ S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
+ S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+ S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
+
+ pipe_resource_reference(&buffers->buffers[slot], &buf->b.b);
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, buf,
+ buffers->shader_usage, buffers->priority);
+ buffers->desc.enabled_mask |= 1llu << slot;
+ }
+
+ buffers->desc.list_dirty = true;
+}
+
/* RING BUFFERS */
void si_set_ring_buffer(struct pipe_context *ctx, uint shader, uint slot,
@@ -1072,10 +1121,12 @@ static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource
}
}
- /* Constant buffers. */
+ /* Constant and shader buffers. */
for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
si_reset_buffer_resources(sctx, &sctx->const_buffers[shader],
buf, old_va);
+ si_reset_buffer_resources(sctx, &sctx->shader_buffers[shader],
+ buf, old_va);
}
/* Texture buffers - update virtual addresses in sampler view descriptors. */
@@ -1255,6 +1306,7 @@ void si_emit_shader_userdata(struct si_context *sctx, struct r600_atom *atom)
si_emit_shader_pointer(sctx, &sctx->rw_buffers[i].desc, base, false);
si_emit_shader_pointer(sctx, &sctx->const_buffers[i].desc, base, false);
+ si_emit_shader_pointer(sctx, &sctx->shader_buffers[i].desc, base, false);
si_emit_shader_pointer(sctx, &sctx->samplers[i].views.desc, base, false);
si_emit_shader_pointer(sctx, &sctx->images[i].desc, base, false);
}
@@ -1274,6 +1326,9 @@ void si_init_all_descriptors(struct si_context *sctx)
si_init_buffer_resources(&sctx->rw_buffers[i],
SI_NUM_RW_BUFFERS, SI_SGPR_RW_BUFFERS,
RADEON_USAGE_READWRITE, RADEON_PRIO_RINGS_STREAMOUT);
+ si_init_buffer_resources(&sctx->shader_buffers[i],
+ SI_NUM_SHADER_BUFFERS, SI_SGPR_SHADER_BUFFERS,
+ RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RW_BUFFER);
si_init_descriptors(&sctx->samplers[i].views.desc,
SI_SGPR_SAMPLERS, 16, SI_NUM_SAMPLERS,
@@ -1291,6 +1346,7 @@ void si_init_all_descriptors(struct si_context *sctx)
sctx->b.b.bind_sampler_states = si_bind_sampler_states;
sctx->b.b.set_shader_images = si_set_shader_images;
sctx->b.b.set_constant_buffer = si_set_constant_buffer;
+ sctx->b.b.set_shader_buffers = si_set_shader_buffers;
sctx->b.b.set_sampler_views = si_set_sampler_views;
sctx->b.b.set_stream_output_targets = si_set_streamout_targets;
sctx->b.invalidate_buffer = si_invalidate_buffer;
@@ -1313,6 +1369,7 @@ bool si_upload_shader_descriptors(struct si_context *sctx)
for (i = 0; i < SI_NUM_SHADERS; i++) {
if (!si_upload_descriptors(sctx, &sctx->const_buffers[i].desc) ||
!si_upload_descriptors(sctx, &sctx->rw_buffers[i].desc) ||
+ !si_upload_descriptors(sctx, &sctx->shader_buffers[i].desc) ||
!si_upload_descriptors(sctx, &sctx->samplers[i].views.desc) ||
!si_upload_descriptors(sctx, &sctx->images[i].desc))
return false;
@@ -1327,6 +1384,7 @@ void si_release_all_descriptors(struct si_context *sctx)
for (i = 0; i < SI_NUM_SHADERS; i++) {
si_release_buffer_resources(&sctx->const_buffers[i]);
si_release_buffer_resources(&sctx->rw_buffers[i]);
+ si_release_buffer_resources(&sctx->shader_buffers[i]);
si_release_sampler_views(&sctx->samplers[i].views);
si_release_image_views(&sctx->images[i]);
}
@@ -1340,6 +1398,7 @@ void si_all_descriptors_begin_new_cs(struct si_context *sctx)
for (i = 0; i < SI_NUM_SHADERS; i++) {
si_buffer_resources_begin_new_cs(sctx, &sctx->const_buffers[i]);
si_buffer_resources_begin_new_cs(sctx, &sctx->rw_buffers[i]);
+ si_buffer_resources_begin_new_cs(sctx, &sctx->shader_buffers[i]);
si_sampler_views_begin_new_cs(sctx, &sctx->samplers[i].views);
si_image_views_begin_new_cs(sctx, &sctx->images[i]);
}
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 6d0d687..dfdb8bb 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -256,6 +256,7 @@ struct si_context {
struct si_descriptors vertex_buffers;
struct si_buffer_resources const_buffers[SI_NUM_SHADERS];
struct si_buffer_resources rw_buffers[SI_NUM_SHADERS];
+ struct si_buffer_resources shader_buffers[SI_NUM_SHADERS];
struct si_textures_info samplers[SI_NUM_SHADERS];
struct si_images_info images[SI_NUM_SHADERS];
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 1e4bf82..efc00f3 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -4408,7 +4408,8 @@ static void create_function(struct si_shader_context *ctx)
params[SI_PARAM_CONST_BUFFERS] = const_array(ctx->v16i8, SI_NUM_CONST_BUFFERS);
params[SI_PARAM_SAMPLERS] = const_array(ctx->v8i32, SI_NUM_SAMPLERS);
params[SI_PARAM_IMAGES] = const_array(ctx->v8i32, SI_NUM_IMAGES);
- last_array_pointer = SI_PARAM_IMAGES;
+ params[SI_PARAM_SHADER_BUFFERS] = const_array(ctx->v4i32, SI_NUM_SHADER_BUFFERS);
+ last_array_pointer = SI_PARAM_SHADER_BUFFERS;
switch (ctx->type) {
case TGSI_PROCESSOR_VERTEX:
@@ -5988,6 +5989,7 @@ static bool si_compile_tcs_epilog(struct si_screen *sscreen,
params[SI_PARAM_CONST_BUFFERS] = ctx.i64;
params[SI_PARAM_SAMPLERS] = ctx.i64;
params[SI_PARAM_IMAGES] = ctx.i64;
+ params[SI_PARAM_SHADER_BUFFERS] = ctx.i64;
params[SI_PARAM_TCS_OUT_OFFSETS] = ctx.i32;
params[SI_PARAM_TCS_OUT_LAYOUT] = ctx.i32;
params[SI_PARAM_TCS_IN_LAYOUT] = ctx.i32;
@@ -6238,6 +6240,7 @@ static bool si_compile_ps_epilog(struct si_screen *sscreen,
params[SI_PARAM_CONST_BUFFERS] = ctx.i64;
params[SI_PARAM_SAMPLERS] = ctx.i64;
params[SI_PARAM_IMAGES] = ctx.i64;
+ params[SI_PARAM_SHADER_BUFFERS] = ctx.i64;
params[SI_PARAM_ALPHA_REF] = ctx.f32;
last_array_pointer = -1;
last_sgpr = SI_PARAM_ALPHA_REF;
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index 8059edf..013c8a2 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -81,95 +81,97 @@ struct radeon_shader_reloc;
#define SI_SGPR_CONST_BUFFERS 2
#define SI_SGPR_SAMPLERS 4 /* images & sampler states interleaved */
#define SI_SGPR_IMAGES 6
-#define SI_SGPR_VERTEX_BUFFERS 8 /* VS only */
-#define SI_SGPR_BASE_VERTEX 10 /* VS only */
-#define SI_SGPR_START_INSTANCE 11 /* VS only */
-#define SI_SGPR_VS_STATE_BITS 12 /* VS(VS) only */
-#define SI_SGPR_LS_OUT_LAYOUT 12 /* VS(LS) only */
-#define SI_SGPR_TCS_OUT_OFFSETS 8 /* TCS & TES only */
-#define SI_SGPR_TCS_OUT_LAYOUT 9 /* TCS & TES only */
-#define SI_SGPR_TCS_IN_LAYOUT 10 /* TCS only */
-#define SI_SGPR_ALPHA_REF 8 /* PS only */
-
-#define SI_VS_NUM_USER_SGPR 13 /* API VS */
-#define SI_ES_NUM_USER_SGPR 12 /* API VS */
-#define SI_LS_NUM_USER_SGPR 13 /* API VS */
-#define SI_TCS_NUM_USER_SGPR 11
-#define SI_TES_NUM_USER_SGPR 10
-#define SI_GS_NUM_USER_SGPR 8
+#define SI_SGPR_SHADER_BUFFERS 8
+#define SI_SGPR_VERTEX_BUFFERS 10 /* VS only */
+#define SI_SGPR_BASE_VERTEX 12 /* VS only */
+#define SI_SGPR_START_INSTANCE 13 /* VS only */
+#define SI_SGPR_VS_STATE_BITS 14 /* VS(VS) only */
+#define SI_SGPR_LS_OUT_LAYOUT 14 /* VS(LS) only */
+#define SI_SGPR_TCS_OUT_OFFSETS 10 /* TCS & TES only */
+#define SI_SGPR_TCS_OUT_LAYOUT 11 /* TCS & TES only */
+#define SI_SGPR_TCS_IN_LAYOUT 12 /* TCS only */
+#define SI_SGPR_ALPHA_REF 10 /* PS only */
+
+#define SI_VS_NUM_USER_SGPR 15 /* API VS */
+#define SI_ES_NUM_USER_SGPR 14 /* API VS */
+#define SI_LS_NUM_USER_SGPR 15 /* API VS */
+#define SI_TCS_NUM_USER_SGPR 13
+#define SI_TES_NUM_USER_SGPR 12
+#define SI_GS_NUM_USER_SGPR 10
#define SI_GSCOPY_NUM_USER_SGPR 4
-#define SI_PS_NUM_USER_SGPR 9
+#define SI_PS_NUM_USER_SGPR 11
/* LLVM function parameter indices */
#define SI_PARAM_RW_BUFFERS 0
#define SI_PARAM_CONST_BUFFERS 1
#define SI_PARAM_SAMPLERS 2
#define SI_PARAM_IMAGES 3
+#define SI_PARAM_SHADER_BUFFERS 4
/* VS only parameters */
-#define SI_PARAM_VERTEX_BUFFERS 4
-#define SI_PARAM_BASE_VERTEX 5
-#define SI_PARAM_START_INSTANCE 6
+#define SI_PARAM_VERTEX_BUFFERS 5
+#define SI_PARAM_BASE_VERTEX 6
+#define SI_PARAM_START_INSTANCE 7
/* [0] = clamp vertex color */
-#define SI_PARAM_VS_STATE_BITS 7
+#define SI_PARAM_VS_STATE_BITS 8
/* the other VS parameters are assigned dynamically */
/* Offsets where TCS outputs and TCS patch outputs live in LDS:
* [0:15] = TCS output patch0 offset / 16, max = NUM_PATCHES * 32 * 32
* [16:31] = TCS output patch0 offset for per-patch / 16, max = NUM_PATCHES*32*32* + 32*32
*/
-#define SI_PARAM_TCS_OUT_OFFSETS 4 /* for TCS & TES */
+#define SI_PARAM_TCS_OUT_OFFSETS 5 /* for TCS & TES */
/* Layout of TCS outputs / TES inputs:
* [0:12] = stride between output patches in dwords, num_outputs * num_vertices * 4, max = 32*32*4
* [13:20] = stride between output vertices in dwords = num_inputs * 4, max = 32*4
* [26:31] = gl_PatchVerticesIn, max = 32
*/
-#define SI_PARAM_TCS_OUT_LAYOUT 5 /* for TCS & TES */
+#define SI_PARAM_TCS_OUT_LAYOUT 6 /* for TCS & TES */
/* Layout of LS outputs / TCS inputs
* [0:12] = stride between patches in dwords = num_inputs * num_vertices * 4, max = 32*32*4
* [13:20] = stride between vertices in dwords = num_inputs * 4, max = 32*4
*/
-#define SI_PARAM_TCS_IN_LAYOUT 6 /* TCS only */
-#define SI_PARAM_LS_OUT_LAYOUT 7 /* same value as TCS_IN_LAYOUT, LS only */
+#define SI_PARAM_TCS_IN_LAYOUT 7 /* TCS only */
+#define SI_PARAM_LS_OUT_LAYOUT 8 /* same value as TCS_IN_LAYOUT, LS only */
/* TCS only parameters. */
-#define SI_PARAM_TESS_FACTOR_OFFSET 7
-#define SI_PARAM_PATCH_ID 8
-#define SI_PARAM_REL_IDS 9
+#define SI_PARAM_TESS_FACTOR_OFFSET 8
+#define SI_PARAM_PATCH_ID 9
+#define SI_PARAM_REL_IDS 10
/* GS only parameters */
-#define SI_PARAM_GS2VS_OFFSET 4
-#define SI_PARAM_GS_WAVE_ID 5
-#define SI_PARAM_VTX0_OFFSET 6
-#define SI_PARAM_VTX1_OFFSET 7
-#define SI_PARAM_PRIMITIVE_ID 8
-#define SI_PARAM_VTX2_OFFSET 9
-#define SI_PARAM_VTX3_OFFSET 10
-#define SI_PARAM_VTX4_OFFSET 11
-#define SI_PARAM_VTX5_OFFSET 12
-#define SI_PARAM_GS_INSTANCE_ID 13
+#define SI_PARAM_GS2VS_OFFSET 5
+#define SI_PARAM_GS_WAVE_ID 6
+#define SI_PARAM_VTX0_OFFSET 7
+#define SI_PARAM_VTX1_OFFSET 8
+#define SI_PARAM_PRIMITIVE_ID 9
+#define SI_PARAM_VTX2_OFFSET 10
+#define SI_PARAM_VTX3_OFFSET 11
+#define SI_PARAM_VTX4_OFFSET 12
+#define SI_PARAM_VTX5_OFFSET 13
+#define SI_PARAM_GS_INSTANCE_ID 14
/* PS only parameters */
-#define SI_PARAM_ALPHA_REF 4
-#define SI_PARAM_PRIM_MASK 5
-#define SI_PARAM_PERSP_SAMPLE 6
-#define SI_PARAM_PERSP_CENTER 7
-#define SI_PARAM_PERSP_CENTROID 8
-#define SI_PARAM_PERSP_PULL_MODEL 9
-#define SI_PARAM_LINEAR_SAMPLE 10
-#define SI_PARAM_LINEAR_CENTER 11
-#define SI_PARAM_LINEAR_CENTROID 12
-#define SI_PARAM_LINE_STIPPLE_TEX 13
-#define SI_PARAM_POS_X_FLOAT 14
-#define SI_PARAM_POS_Y_FLOAT 15
-#define SI_PARAM_POS_Z_FLOAT 16
-#define SI_PARAM_POS_W_FLOAT 17
-#define SI_PARAM_FRONT_FACE 18
-#define SI_PARAM_ANCILLARY 19
-#define SI_PARAM_SAMPLE_COVERAGE 20
-#define SI_PARAM_POS_FIXED_PT 21
+#define SI_PARAM_ALPHA_REF 5
+#define SI_PARAM_PRIM_MASK 6
+#define SI_PARAM_PERSP_SAMPLE 7
+#define SI_PARAM_PERSP_CENTER 8
+#define SI_PARAM_PERSP_CENTROID 9
+#define SI_PARAM_PERSP_PULL_MODEL 10
+#define SI_PARAM_LINEAR_SAMPLE 11
+#define SI_PARAM_LINEAR_CENTER 12
+#define SI_PARAM_LINEAR_CENTROID 13
+#define SI_PARAM_LINE_STIPPLE_TEX 14
+#define SI_PARAM_POS_X_FLOAT 15
+#define SI_PARAM_POS_Y_FLOAT 16
+#define SI_PARAM_POS_Z_FLOAT 17
+#define SI_PARAM_POS_W_FLOAT 18
+#define SI_PARAM_FRONT_FACE 19
+#define SI_PARAM_ANCILLARY 20
+#define SI_PARAM_SAMPLE_COVERAGE 21
+#define SI_PARAM_POS_FIXED_PT 22
#define SI_NUM_PARAMS (SI_PARAM_POS_FIXED_PT + 9) /* +8 for COLOR[0..1] */
--
2.5.0
More information about the mesa-dev
mailing list