[Mesa-dev] [PATCH v2 14/25] radeonsi: group streamout writes by vertex stream
Nicolai Hähnle
nhaehnle at gmail.com
Tue Dec 6 10:48:25 UTC 2016
From: Nicolai Hähnle <nicolai.haehnle at amd.com>
---
src/gallium/drivers/radeonsi/si_shader.c | 32 ++++++++++++++++++++++----------
1 file changed, 22 insertions(+), 10 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index f1a4946..1dca958 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2137,20 +2137,22 @@ static void emit_streamout_output(struct si_shader_context *ctx,
unsigned start = stream_out->start_component;
unsigned num_comps = stream_out->num_components;
LLVMValueRef out[4];
assert(num_comps && num_comps <= 4);
if (!num_comps || num_comps > 4)
return;
/* Load the output as int. */
for (int j = 0; j < num_comps; j++) {
+ assert(stream_out->stream == shader_out->vertex_stream[start + j]);
+
out[j] = LLVMBuildBitCast(builder,
shader_out->values[start + j],
ctx->i32, "");
}
/* Pack the output. */
LLVMValueRef vdata = NULL;
switch (num_comps) {
case 1: /* as i32 */
@@ -2173,21 +2175,22 @@ static void emit_streamout_output(struct si_shader_context *ctx,
LLVMConstInt(ctx->i32, 0, 0),
stream_out->dst_offset * 4);
}
/* On SI, the vertex shader is responsible for writing streamout data
* to buffers. */
static void si_llvm_emit_streamout(struct si_shader_context *ctx,
struct si_shader_output_values *outputs,
unsigned noutput)
{
- struct pipe_stream_output_info *so = &ctx->shader->selector->so;
+ struct si_shader_selector *sel = ctx->shader->selector;
+ struct pipe_stream_output_info *so = &sel->so;
struct gallivm_state *gallivm = &ctx->gallivm;
LLVMBuilderRef builder = gallivm->builder;
int i;
struct lp_build_if_state if_ctx;
/* Get bits [22:16], i.e. (so_param >> 16) & 127; */
LLVMValueRef so_vtx_count =
unpack_param(ctx, ctx->param_streamout_config, 16, 7);
LLVMValueRef tid = get_thread_id(ctx);
@@ -2196,21 +2199,34 @@ static void si_llvm_emit_streamout(struct si_shader_context *ctx,
LLVMValueRef can_emit =
LLVMBuildICmp(builder, LLVMIntULT, tid, so_vtx_count, "");
LLVMValueRef stream_id =
unpack_param(ctx, ctx->param_streamout_config, 24, 2);
/* Emit the streamout code conditionally. This actually avoids
* out-of-bounds buffer access. The hw tells us via the SGPR
* (so_vtx_count) which threads are allowed to emit streamout data. */
lp_build_if(&if_ctx, gallivm, can_emit);
- {
+
+ for (int stream = 0; stream < 4; ++stream) {
+ struct lp_build_if_state if_ctx_stream;
+
+ if (!sel->info.num_stream_output_components[stream])
+ continue;
+
+ LLVMValueRef is_stream =
+ LLVMBuildICmp(builder, LLVMIntEQ,
+ stream_id,
+ lp_build_const_int32(gallivm, stream), "");
+
+ lp_build_if(&if_ctx_stream, gallivm, is_stream);
+
/* The buffer offset is computed as follows:
* ByteOffset = streamout_offset[buffer_id]*4 +
* (streamout_write_index + thread_id)*stride[buffer_id] +
* attrib_offset
*/
LLVMValueRef so_write_index =
LLVMGetParam(ctx->main_fn,
ctx->param_streamout_write_index);
@@ -2238,36 +2254,32 @@ static void si_llvm_emit_streamout(struct si_shader_context *ctx,
so_offset = LLVMBuildMul(builder, so_offset, LLVMConstInt(ctx->i32, 4, 0), "");
so_write_offset[i] = LLVMBuildMul(builder, so_write_index,
LLVMConstInt(ctx->i32, so->stride[i]*4, 0), "");
so_write_offset[i] = LLVMBuildAdd(builder, so_write_offset[i], so_offset, "");
}
/* Write streamout data. */
for (i = 0; i < so->num_outputs; i++) {
unsigned reg = so->output[i].register_index;
- unsigned stream = so->output[i].stream;
- struct lp_build_if_state if_ctx_stream;
if (reg >= noutput)
continue;
- LLVMValueRef can_emit_stream =
- LLVMBuildICmp(builder, LLVMIntEQ,
- stream_id,
- lp_build_const_int32(gallivm, stream), "");
+ if (stream != so->output[i].stream)
+ continue;
- lp_build_if(&if_ctx_stream, gallivm, can_emit_stream);
emit_streamout_output(ctx, so_buffers, so_write_offset,
&so->output[i], &outputs[reg]);
- lp_build_endif(&if_ctx_stream);
}
+
+ lp_build_endif(&if_ctx_stream);
}
lp_build_endif(&if_ctx);
}
/* Generate export instructions for hardware VS shader stage */
static void si_llvm_export_vs(struct lp_build_tgsi_context *bld_base,
struct si_shader_output_values *outputs,
unsigned noutput)
{
--
2.7.4
More information about the mesa-dev
mailing list