[Mesa-dev] [PATCH 02/11] radeonsi: implement set_shader_buffers

Nicolai Hähnle nhaehnle at gmail.com
Mon Mar 21 23:21:54 UTC 2016


From: Nicolai Hähnle <nicolai.haehnle at amd.com>

---
 src/gallium/drivers/radeonsi/si_descriptors.c |  61 +++++++++++++-
 src/gallium/drivers/radeonsi/si_pipe.h        |   1 +
 src/gallium/drivers/radeonsi/si_shader.c      |   5 +-
 src/gallium/drivers/radeonsi/si_shader.h      | 114 +++++++++++++-------------
 4 files changed, 123 insertions(+), 58 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
index c7c30bf..72bd50f 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -746,6 +746,55 @@ static void si_set_constant_buffer(struct pipe_context *ctx, uint shader, uint s
 	buffers->desc.list_dirty = true;
 }
 
+/* SHADER BUFFERS */
+
+static void si_set_shader_buffers(struct pipe_context *ctx, unsigned shader,
+				  unsigned start_slot, unsigned count,
+				  struct pipe_shader_buffer *sbuffers)
+{
+	struct si_context *sctx = (struct si_context *)ctx;
+	struct si_buffer_resources *buffers = &sctx->shader_buffers[shader];
+	unsigned i;
+
+	assert(start_slot + count <= SI_NUM_SHADER_BUFFERS);
+
+	for (i = 0; i < count; ++i) {
+		struct pipe_shader_buffer *sbuffer = sbuffers ? &sbuffers[i] : NULL;
+		struct r600_resource *buf;
+		unsigned slot = start_slot + i;
+		uint32_t *desc = buffers->desc.list + slot * 4;
+		uint64_t va;
+
+		if (!sbuffer || !sbuffer->buffer) {
+			pipe_resource_reference(&buffers->buffers[slot], NULL);
+			memset(desc, 0, sizeof(uint32_t) * 4);
+			buffers->desc.enabled_mask &= ~(1llu << slot);
+			continue;
+		}
+
+		buf = (struct r600_resource *)sbuffer->buffer;
+		va = buf->gpu_address + sbuffer->buffer_offset;
+
+		desc[0] = va;
+		desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
+			  S_008F04_STRIDE(0);
+		desc[2] = sbuffer->buffer_size;
+		desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
+			  S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+			  S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
+			  S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
+			  S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+			  S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
+
+		pipe_resource_reference(&buffers->buffers[slot], &buf->b.b);
+		radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, buf,
+				      buffers->shader_usage, buffers->priority);
+		buffers->desc.enabled_mask |= 1llu << slot;
+	}
+
+	buffers->desc.list_dirty = true;
+}
+
 /* RING BUFFERS */
 
 void si_set_ring_buffer(struct pipe_context *ctx, uint shader, uint slot,
@@ -1072,10 +1121,12 @@ static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource
 		}
 	}
 
-	/* Constant buffers. */
+	/* Constant and shader buffers. */
 	for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
 		si_reset_buffer_resources(sctx, &sctx->const_buffers[shader],
 					  buf, old_va);
+		si_reset_buffer_resources(sctx, &sctx->shader_buffers[shader],
+					  buf, old_va);
 	}
 
 	/* Texture buffers - update virtual addresses in sampler view descriptors. */
@@ -1255,6 +1306,7 @@ void si_emit_shader_userdata(struct si_context *sctx, struct r600_atom *atom)
 			si_emit_shader_pointer(sctx, &sctx->rw_buffers[i].desc, base, false);
 
 		si_emit_shader_pointer(sctx, &sctx->const_buffers[i].desc, base, false);
+		si_emit_shader_pointer(sctx, &sctx->shader_buffers[i].desc, base, false);
 		si_emit_shader_pointer(sctx, &sctx->samplers[i].views.desc, base, false);
 		si_emit_shader_pointer(sctx, &sctx->images[i].desc, base, false);
 	}
@@ -1274,6 +1326,9 @@ void si_init_all_descriptors(struct si_context *sctx)
 		si_init_buffer_resources(&sctx->rw_buffers[i],
 					 SI_NUM_RW_BUFFERS, SI_SGPR_RW_BUFFERS,
 					 RADEON_USAGE_READWRITE, RADEON_PRIO_RINGS_STREAMOUT);
+		si_init_buffer_resources(&sctx->shader_buffers[i],
+					 SI_NUM_SHADER_BUFFERS, SI_SGPR_SHADER_BUFFERS,
+					 RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RW_BUFFER);
 
 		si_init_descriptors(&sctx->samplers[i].views.desc,
 				    SI_SGPR_SAMPLERS, 16, SI_NUM_SAMPLERS,
@@ -1291,6 +1346,7 @@ void si_init_all_descriptors(struct si_context *sctx)
 	sctx->b.b.bind_sampler_states = si_bind_sampler_states;
 	sctx->b.b.set_shader_images = si_set_shader_images;
 	sctx->b.b.set_constant_buffer = si_set_constant_buffer;
+	sctx->b.b.set_shader_buffers = si_set_shader_buffers;
 	sctx->b.b.set_sampler_views = si_set_sampler_views;
 	sctx->b.b.set_stream_output_targets = si_set_streamout_targets;
 	sctx->b.invalidate_buffer = si_invalidate_buffer;
@@ -1313,6 +1369,7 @@ bool si_upload_shader_descriptors(struct si_context *sctx)
 	for (i = 0; i < SI_NUM_SHADERS; i++) {
 		if (!si_upload_descriptors(sctx, &sctx->const_buffers[i].desc) ||
 		    !si_upload_descriptors(sctx, &sctx->rw_buffers[i].desc) ||
+		    !si_upload_descriptors(sctx, &sctx->shader_buffers[i].desc) ||
 		    !si_upload_descriptors(sctx, &sctx->samplers[i].views.desc) ||
 		    !si_upload_descriptors(sctx, &sctx->images[i].desc))
 			return false;
@@ -1327,6 +1384,7 @@ void si_release_all_descriptors(struct si_context *sctx)
 	for (i = 0; i < SI_NUM_SHADERS; i++) {
 		si_release_buffer_resources(&sctx->const_buffers[i]);
 		si_release_buffer_resources(&sctx->rw_buffers[i]);
+		si_release_buffer_resources(&sctx->shader_buffers[i]);
 		si_release_sampler_views(&sctx->samplers[i].views);
 		si_release_image_views(&sctx->images[i]);
 	}
@@ -1340,6 +1398,7 @@ void si_all_descriptors_begin_new_cs(struct si_context *sctx)
 	for (i = 0; i < SI_NUM_SHADERS; i++) {
 		si_buffer_resources_begin_new_cs(sctx, &sctx->const_buffers[i]);
 		si_buffer_resources_begin_new_cs(sctx, &sctx->rw_buffers[i]);
+		si_buffer_resources_begin_new_cs(sctx, &sctx->shader_buffers[i]);
 		si_sampler_views_begin_new_cs(sctx, &sctx->samplers[i].views);
 		si_image_views_begin_new_cs(sctx, &sctx->images[i]);
 	}
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 6d0d687..dfdb8bb 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -256,6 +256,7 @@ struct si_context {
 	struct si_descriptors		vertex_buffers;
 	struct si_buffer_resources	const_buffers[SI_NUM_SHADERS];
 	struct si_buffer_resources	rw_buffers[SI_NUM_SHADERS];
+	struct si_buffer_resources	shader_buffers[SI_NUM_SHADERS];
 	struct si_textures_info		samplers[SI_NUM_SHADERS];
 	struct si_images_info		images[SI_NUM_SHADERS];
 
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 1e4bf82..efc00f3 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -4408,7 +4408,8 @@ static void create_function(struct si_shader_context *ctx)
 	params[SI_PARAM_CONST_BUFFERS] = const_array(ctx->v16i8, SI_NUM_CONST_BUFFERS);
 	params[SI_PARAM_SAMPLERS] = const_array(ctx->v8i32, SI_NUM_SAMPLERS);
 	params[SI_PARAM_IMAGES] = const_array(ctx->v8i32, SI_NUM_IMAGES);
-	last_array_pointer = SI_PARAM_IMAGES;
+	params[SI_PARAM_SHADER_BUFFERS] = const_array(ctx->v4i32, SI_NUM_SHADER_BUFFERS);
+	last_array_pointer = SI_PARAM_SHADER_BUFFERS;
 
 	switch (ctx->type) {
 	case TGSI_PROCESSOR_VERTEX:
@@ -5988,6 +5989,7 @@ static bool si_compile_tcs_epilog(struct si_screen *sscreen,
 	params[SI_PARAM_CONST_BUFFERS] = ctx.i64;
 	params[SI_PARAM_SAMPLERS] = ctx.i64;
 	params[SI_PARAM_IMAGES] = ctx.i64;
+	params[SI_PARAM_SHADER_BUFFERS] = ctx.i64;
 	params[SI_PARAM_TCS_OUT_OFFSETS] = ctx.i32;
 	params[SI_PARAM_TCS_OUT_LAYOUT] = ctx.i32;
 	params[SI_PARAM_TCS_IN_LAYOUT] = ctx.i32;
@@ -6238,6 +6240,7 @@ static bool si_compile_ps_epilog(struct si_screen *sscreen,
 	params[SI_PARAM_CONST_BUFFERS] = ctx.i64;
 	params[SI_PARAM_SAMPLERS] = ctx.i64;
 	params[SI_PARAM_IMAGES] = ctx.i64;
+	params[SI_PARAM_SHADER_BUFFERS] = ctx.i64;
 	params[SI_PARAM_ALPHA_REF] = ctx.f32;
 	last_array_pointer = -1;
 	last_sgpr = SI_PARAM_ALPHA_REF;
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index 8059edf..013c8a2 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -81,95 +81,97 @@ struct radeon_shader_reloc;
 #define SI_SGPR_CONST_BUFFERS	2
 #define SI_SGPR_SAMPLERS	4  /* images & sampler states interleaved */
 #define SI_SGPR_IMAGES		6
-#define SI_SGPR_VERTEX_BUFFERS	8  /* VS only */
-#define SI_SGPR_BASE_VERTEX	10 /* VS only */
-#define SI_SGPR_START_INSTANCE	11 /* VS only */
-#define SI_SGPR_VS_STATE_BITS	12 /* VS(VS) only */
-#define SI_SGPR_LS_OUT_LAYOUT	12 /* VS(LS) only */
-#define SI_SGPR_TCS_OUT_OFFSETS	8  /* TCS & TES only */
-#define SI_SGPR_TCS_OUT_LAYOUT	9  /* TCS & TES only */
-#define SI_SGPR_TCS_IN_LAYOUT	10 /* TCS only */
-#define SI_SGPR_ALPHA_REF	8  /* PS only */
-
-#define SI_VS_NUM_USER_SGPR	13 /* API VS */
-#define SI_ES_NUM_USER_SGPR	12 /* API VS */
-#define SI_LS_NUM_USER_SGPR	13 /* API VS */
-#define SI_TCS_NUM_USER_SGPR	11
-#define SI_TES_NUM_USER_SGPR	10
-#define SI_GS_NUM_USER_SGPR	8
+#define SI_SGPR_SHADER_BUFFERS	8
+#define SI_SGPR_VERTEX_BUFFERS	10  /* VS only */
+#define SI_SGPR_BASE_VERTEX	12 /* VS only */
+#define SI_SGPR_START_INSTANCE	13 /* VS only */
+#define SI_SGPR_VS_STATE_BITS	14 /* VS(VS) only */
+#define SI_SGPR_LS_OUT_LAYOUT	14 /* VS(LS) only */
+#define SI_SGPR_TCS_OUT_OFFSETS	10 /* TCS & TES only */
+#define SI_SGPR_TCS_OUT_LAYOUT	11 /* TCS & TES only */
+#define SI_SGPR_TCS_IN_LAYOUT	12 /* TCS only */
+#define SI_SGPR_ALPHA_REF	10 /* PS only */
+
+#define SI_VS_NUM_USER_SGPR	15 /* API VS */
+#define SI_ES_NUM_USER_SGPR	14 /* API VS */
+#define SI_LS_NUM_USER_SGPR	15 /* API VS */
+#define SI_TCS_NUM_USER_SGPR	13
+#define SI_TES_NUM_USER_SGPR	12
+#define SI_GS_NUM_USER_SGPR	10
 #define SI_GSCOPY_NUM_USER_SGPR	4
-#define SI_PS_NUM_USER_SGPR	9
+#define SI_PS_NUM_USER_SGPR	11
 
 /* LLVM function parameter indices */
 #define SI_PARAM_RW_BUFFERS	0
 #define SI_PARAM_CONST_BUFFERS	1
 #define SI_PARAM_SAMPLERS	2
 #define SI_PARAM_IMAGES		3
+#define SI_PARAM_SHADER_BUFFERS	4
 
 /* VS only parameters */
-#define SI_PARAM_VERTEX_BUFFERS	4
-#define SI_PARAM_BASE_VERTEX	5
-#define SI_PARAM_START_INSTANCE	6
+#define SI_PARAM_VERTEX_BUFFERS	5
+#define SI_PARAM_BASE_VERTEX	6
+#define SI_PARAM_START_INSTANCE	7
 /* [0] = clamp vertex color */
-#define SI_PARAM_VS_STATE_BITS	7
+#define SI_PARAM_VS_STATE_BITS	8
 /* the other VS parameters are assigned dynamically */
 
 /* Offsets where TCS outputs and TCS patch outputs live in LDS:
  *   [0:15] = TCS output patch0 offset / 16, max = NUM_PATCHES * 32 * 32
  *   [16:31] = TCS output patch0 offset for per-patch / 16, max = NUM_PATCHES*32*32* + 32*32
  */
-#define SI_PARAM_TCS_OUT_OFFSETS 4 /* for TCS & TES */
+#define SI_PARAM_TCS_OUT_OFFSETS 5 /* for TCS & TES */
 
 /* Layout of TCS outputs / TES inputs:
  *   [0:12] = stride between output patches in dwords, num_outputs * num_vertices * 4, max = 32*32*4
  *   [13:20] = stride between output vertices in dwords = num_inputs * 4, max = 32*4
  *   [26:31] = gl_PatchVerticesIn, max = 32
  */
-#define SI_PARAM_TCS_OUT_LAYOUT	5 /* for TCS & TES */
+#define SI_PARAM_TCS_OUT_LAYOUT	6 /* for TCS & TES */
 
 /* Layout of LS outputs / TCS inputs
  *   [0:12] = stride between patches in dwords = num_inputs * num_vertices * 4, max = 32*32*4
  *   [13:20] = stride between vertices in dwords = num_inputs * 4, max = 32*4
  */
-#define SI_PARAM_TCS_IN_LAYOUT	6 /* TCS only */
-#define SI_PARAM_LS_OUT_LAYOUT	7 /* same value as TCS_IN_LAYOUT, LS only */
+#define SI_PARAM_TCS_IN_LAYOUT	7 /* TCS only */
+#define SI_PARAM_LS_OUT_LAYOUT	8 /* same value as TCS_IN_LAYOUT, LS only */
 
 /* TCS only parameters. */
-#define SI_PARAM_TESS_FACTOR_OFFSET 7
-#define SI_PARAM_PATCH_ID	8
-#define SI_PARAM_REL_IDS	9
+#define SI_PARAM_TESS_FACTOR_OFFSET 8
+#define SI_PARAM_PATCH_ID	9
+#define SI_PARAM_REL_IDS	10
 
 /* GS only parameters */
-#define SI_PARAM_GS2VS_OFFSET	4
-#define SI_PARAM_GS_WAVE_ID	5
-#define SI_PARAM_VTX0_OFFSET	6
-#define SI_PARAM_VTX1_OFFSET	7
-#define SI_PARAM_PRIMITIVE_ID	8
-#define SI_PARAM_VTX2_OFFSET	9
-#define SI_PARAM_VTX3_OFFSET	10
-#define SI_PARAM_VTX4_OFFSET	11
-#define SI_PARAM_VTX5_OFFSET	12
-#define SI_PARAM_GS_INSTANCE_ID	13
+#define SI_PARAM_GS2VS_OFFSET	5
+#define SI_PARAM_GS_WAVE_ID	6
+#define SI_PARAM_VTX0_OFFSET	7
+#define SI_PARAM_VTX1_OFFSET	8
+#define SI_PARAM_PRIMITIVE_ID	9
+#define SI_PARAM_VTX2_OFFSET	10
+#define SI_PARAM_VTX3_OFFSET	11
+#define SI_PARAM_VTX4_OFFSET	12
+#define SI_PARAM_VTX5_OFFSET	13
+#define SI_PARAM_GS_INSTANCE_ID	14
 
 /* PS only parameters */
-#define SI_PARAM_ALPHA_REF		4
-#define SI_PARAM_PRIM_MASK		5
-#define SI_PARAM_PERSP_SAMPLE		6
-#define SI_PARAM_PERSP_CENTER		7
-#define SI_PARAM_PERSP_CENTROID		8
-#define SI_PARAM_PERSP_PULL_MODEL	9
-#define SI_PARAM_LINEAR_SAMPLE		10
-#define SI_PARAM_LINEAR_CENTER		11
-#define SI_PARAM_LINEAR_CENTROID	12
-#define SI_PARAM_LINE_STIPPLE_TEX	13
-#define SI_PARAM_POS_X_FLOAT		14
-#define SI_PARAM_POS_Y_FLOAT		15
-#define SI_PARAM_POS_Z_FLOAT		16
-#define SI_PARAM_POS_W_FLOAT		17
-#define SI_PARAM_FRONT_FACE		18
-#define SI_PARAM_ANCILLARY		19
-#define SI_PARAM_SAMPLE_COVERAGE	20
-#define SI_PARAM_POS_FIXED_PT		21
+#define SI_PARAM_ALPHA_REF		5
+#define SI_PARAM_PRIM_MASK		6
+#define SI_PARAM_PERSP_SAMPLE		7
+#define SI_PARAM_PERSP_CENTER		8
+#define SI_PARAM_PERSP_CENTROID		9
+#define SI_PARAM_PERSP_PULL_MODEL	10
+#define SI_PARAM_LINEAR_SAMPLE		11
+#define SI_PARAM_LINEAR_CENTER		12
+#define SI_PARAM_LINEAR_CENTROID	13
+#define SI_PARAM_LINE_STIPPLE_TEX	14
+#define SI_PARAM_POS_X_FLOAT		15
+#define SI_PARAM_POS_Y_FLOAT		16
+#define SI_PARAM_POS_Z_FLOAT		17
+#define SI_PARAM_POS_W_FLOAT		18
+#define SI_PARAM_FRONT_FACE		19
+#define SI_PARAM_ANCILLARY		20
+#define SI_PARAM_SAMPLE_COVERAGE	21
+#define SI_PARAM_POS_FIXED_PT		22
 
 #define SI_NUM_PARAMS (SI_PARAM_POS_FIXED_PT + 9) /* +8 for COLOR[0..1] */
 
-- 
2.5.0



More information about the mesa-dev mailing list