[Mesa-dev] [PATCH] gallium: improve the pipe_stream_output_info struct
Jose Fonseca
jfonseca at vmware.com
Mon Jan 9 10:31:51 PST 2012
----- Original Message -----
> There are 3 changes:
>
> 1) stride is specified for each buffer, not just one, so that drivers
> don't
> have to derive it from the outputs
>
> 2) new per-output property dst_offset, which specifies the offset
> into the buffer in dwords where the output should be stored,
> so that drivers don't have to compute the offsets manually;
> this will also be useful for gl_SkipComponents
> from ARB_transform_feedback3
>
> 3) register_mask is removed, instead, there is start_component
> and num_components; register_mask with non-consecutive 1s
> doesn't make much sense (some hardware cannot do packing of
> components)
>
> Christoph Bumiller: fixed nvc0.
> ---
> src/gallium/auxiliary/draw/draw_pt_so_emit.c | 4 ++-
> src/gallium/auxiliary/util/u_blitter.c | 4 +-
> src/gallium/auxiliary/util/u_dump_state.c | 6 ++-
> src/gallium/drivers/llvmpipe/lp_state_so.c | 2 +-
> src/gallium/drivers/nvc0/nvc0_program.c | 11 ++----
> src/gallium/drivers/r600/r600.h | 4 +-
> src/gallium/drivers/r600/r600_hw_context.c | 8 ++--
> src/gallium/drivers/r600/r600_pipe.h | 1 -
> src/gallium/drivers/r600/r600_shader.c | 46
> +++++++-------------------
> src/gallium/drivers/r600/r600_state_common.c | 2 +-
> src/gallium/drivers/softpipe/sp_state_so.c | 2 +-
> src/gallium/drivers/trace/tr_dump_state.c | 6 ++-
> src/gallium/include/pipe/p_state.h | 13 ++++---
> src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 18 ++++------
> 14 files changed, 53 insertions(+), 74 deletions(-)
>
> diff --git a/src/gallium/auxiliary/draw/draw_pt_so_emit.c
> b/src/gallium/auxiliary/draw/draw_pt_so_emit.c
> index 2dc9e29..7dc6937 100644
> --- a/src/gallium/auxiliary/draw/draw_pt_so_emit.c
> +++ b/src/gallium/auxiliary/draw/draw_pt_so_emit.c
> @@ -144,7 +144,9 @@ static void so_emit_prim(struct pt_so_emit *so,
> (const char *)input_ptr + (indices[i] *
> input_vertex_stride));
> for (slot = 0; slot < state->num_outputs; ++slot) {
> unsigned idx = state->output[slot].register_index;
> - unsigned writemask = state->output[slot].register_mask;
> + unsigned writemask =
> + ((1 << state->output[slot].num_components) - 1) <<
> + state->output[slot].start_component;
> unsigned written_compos = 0;
> unsigned compo;
>
> diff --git a/src/gallium/auxiliary/util/u_blitter.c
> b/src/gallium/auxiliary/util/u_blitter.c
> index 80fdfe0..94012c5 100644
> --- a/src/gallium/auxiliary/util/u_blitter.c
> +++ b/src/gallium/auxiliary/util/u_blitter.c
> @@ -266,8 +266,8 @@ struct blitter_context
> *util_blitter_create(struct pipe_context *pipe)
>
> memset(&so, 0, sizeof(so));
> so.num_outputs = 1;
> - so.output[0].register_mask = TGSI_WRITEMASK_XYZW;
> - so.stride = 4;
> + so.output[0].num_components = 4;
> + so.stride[0] = 4;
>
> ctx->vs_pos_only =
> util_make_vertex_passthrough_shader_with_so(pipe, 1,
> semantic_names,
> diff --git a/src/gallium/auxiliary/util/u_dump_state.c
> b/src/gallium/auxiliary/util/u_dump_state.c
> index e44c619..4b5a042 100644
> --- a/src/gallium/auxiliary/util/u_dump_state.c
> +++ b/src/gallium/auxiliary/util/u_dump_state.c
> @@ -441,13 +441,15 @@ util_dump_shader_state(FILE *stream, const
> struct pipe_shader_state *state)
> util_dump_member_begin(stream, "stream_output");
> util_dump_struct_begin(stream, "pipe_stream_output_info");
> util_dump_member(stream, uint, &state->stream_output,
> num_outputs);
> - util_dump_member(stream, uint, &state->stream_output, stride);
> + util_dump_array(stream, uint, state->stream_output.stride,
> + Elements(state->stream_output.stride));
> util_dump_array_begin(stream);
> for(i = 0; i < state->stream_output.num_outputs; ++i) {
> util_dump_elem_begin(stream);
> util_dump_struct_begin(stream, ""); /* anonymous */
> util_dump_member(stream, uint,
> &state->stream_output.output[i], register_index);
> - util_dump_member(stream, uint,
> &state->stream_output.output[i], register_mask);
> + util_dump_member(stream, uint,
> &state->stream_output.output[i], start_component);
> + util_dump_member(stream, uint,
> &state->stream_output.output[i], num_components);
> util_dump_member(stream, uint,
> &state->stream_output.output[i], output_buffer);
> util_dump_struct_end(stream);
> util_dump_elem_end(stream);
> diff --git a/src/gallium/drivers/llvmpipe/lp_state_so.c
> b/src/gallium/drivers/llvmpipe/lp_state_so.c
> index 108f3aa..ed2272d 100644
> --- a/src/gallium/drivers/llvmpipe/lp_state_so.c
> +++ b/src/gallium/drivers/llvmpipe/lp_state_so.c
> @@ -42,7 +42,7 @@ llvmpipe_create_stream_output_state(struct
> pipe_context *pipe,
>
> if (so) {
> so->base.num_outputs = templ->num_outputs;
> - so->base.stride = templ->stride;
> + memcpy(so->base.stride, templ->stride, sizeof(templ->stride));
> memcpy(so->base.output, templ->output,
> templ->num_outputs * sizeof(templ->output[0]));
> }
> diff --git a/src/gallium/drivers/nvc0/nvc0_program.c
> b/src/gallium/drivers/nvc0/nvc0_program.c
> index 605bca5..499d20e 100644
> --- a/src/gallium/drivers/nvc0/nvc0_program.c
> +++ b/src/gallium/drivers/nvc0/nvc0_program.c
> @@ -496,20 +496,17 @@ nvc0_program_create_tfb_state(const struct
> nv50_ir_prog_info *info,
> tfb->varying_count[b] = 0;
>
> for (i = 0; i < pso->num_outputs; ++i) {
> + unsigned startc = pso->output[i].start_component;
> if (pso->output[i].output_buffer != b)
> continue;
> - for (c = 0; c < 4; ++c) {
> - if (!(pso->output[i].register_mask & (1 << c)))
> - continue;
> + for (c = 0; c < pso->output[i].num_components; ++c) {
> tfb->varying_count[b]++;
> tfb->varying_index[n++] =
> - info->out[pso->output[i].register_index].slot[c];
> + info->out[pso->output[i].register_index].slot[startc
> + c];
> }
> }
> - tfb->stride[b] = tfb->varying_count[b] * 4;
> + tfb->stride[b] = pso->stride[b] * 4;
> }
> - if (pso->stride)
> - tfb->stride[0] = pso->stride;
>
> return tfb;
> }
> diff --git a/src/gallium/drivers/r600/r600.h
> b/src/gallium/drivers/r600/r600.h
> index 4bfb5a9..baf09c1 100644
> --- a/src/gallium/drivers/r600/r600.h
> +++ b/src/gallium/drivers/r600/r600.h
> @@ -196,7 +196,7 @@ struct r600_so_target {
>
> /* The buffer where BUFFER_FILLED_SIZE is stored. */
> struct r600_resource *filled_size;
> - unsigned stride;
> + unsigned stride_in_dw;
> unsigned so_index;
> };
>
> @@ -248,7 +248,7 @@ struct r600_context {
> struct r600_so_target *so_targets[PIPE_MAX_SO_BUFFERS];
> boolean streamout_start;
> unsigned streamout_append_bitmask;
> - unsigned *vs_shader_so_strides;
> + unsigned *vs_so_stride_in_dw;
> };
>
> struct r600_draw {
> diff --git a/src/gallium/drivers/r600/r600_hw_context.c
> b/src/gallium/drivers/r600/r600_hw_context.c
> index 1dba966..09bc5a7 100644
> --- a/src/gallium/drivers/r600/r600_hw_context.c
> +++ b/src/gallium/drivers/r600/r600_hw_context.c
> @@ -2012,7 +2012,7 @@ static void r600_set_streamout_enable(struct
> r600_context *ctx, unsigned buffer_
> void r600_context_streamout_begin(struct r600_context *ctx)
> {
> struct r600_so_target **t = ctx->so_targets;
> - unsigned *strides = ctx->vs_shader_so_strides;
> + unsigned *stride_in_dw = ctx->vs_so_stride_in_dw;
> unsigned buffer_en, i, update_flags = 0;
>
> buffer_en = (ctx->num_so_targets >= 1 && t[0] ? 1 : 0) |
> @@ -2043,7 +2043,7 @@ void r600_context_streamout_begin(struct
> r600_context *ctx)
>
> for (i = 0; i < ctx->num_so_targets; i++) {
> if (t[i]) {
> - t[i]->stride = strides[i];
> + t[i]->stride_in_dw = stride_in_dw[i];
> t[i]->so_index = i;
>
> update_flags |= SURFACE_BASE_UPDATE_STRMOUT(i);
> @@ -2053,7 +2053,7 @@ void r600_context_streamout_begin(struct
> r600_context *ctx)
> 16*i - R600_CONTEXT_REG_OFFSET) >> 2;
> ctx->pm4[ctx->pm4_cdwords++] = (t[i]->b.buffer_offset +
> t[i]->b.buffer_size) >> 2; /* BUFFER_SIZE (in DW) */
> - ctx->pm4[ctx->pm4_cdwords++] = strides[i] >> 2; /* VTX_STRIDE
> (in DW) */
> + ctx->pm4[ctx->pm4_cdwords++] = stride_in_dw[i]; /* VTX_STRIDE
> (in DW) */
> ctx->pm4[ctx->pm4_cdwords++] = 0; /* BUFFER_BASE */
>
> ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0);
> @@ -2165,7 +2165,7 @@ void r600_context_draw_opaque_count(struct
> r600_context *ctx, struct r600_so_tar
>
> ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SET_CONTEXT_REG, 1, 0);
> ctx->pm4[ctx->pm4_cdwords++] =
> (R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE -
> R600_CONTEXT_REG_OFFSET) >> 2;
> - ctx->pm4[ctx->pm4_cdwords++] = t->stride >> 2;
> + ctx->pm4[ctx->pm4_cdwords++] = t->stride_in_dw;
>
> ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_COPY_DW, 4, 0);
> ctx->pm4[ctx->pm4_cdwords++] = COPY_DW_SRC_IS_MEM |
> COPY_DW_DST_IS_REG;
> diff --git a/src/gallium/drivers/r600/r600_pipe.h
> b/src/gallium/drivers/r600/r600_pipe.h
> index 447b9dc..91eb0e8 100644
> --- a/src/gallium/drivers/r600/r600_pipe.h
> +++ b/src/gallium/drivers/r600/r600_pipe.h
> @@ -147,7 +147,6 @@ struct r600_pipe_shader {
> struct tgsi_token *tokens;
> unsigned sprite_coord_enable;
> struct pipe_stream_output_info so;
> - unsigned so_strides[4];
> };
>
> struct r600_pipe_sampler_state {
> diff --git a/src/gallium/drivers/r600/r600_shader.c
> b/src/gallium/drivers/r600/r600_shader.c
> index ad4aded..9dbbbd8 100644
> --- a/src/gallium/drivers/r600/r600_shader.c
> +++ b/src/gallium/drivers/r600/r600_shader.c
> @@ -124,12 +124,14 @@ int r600_pipe_shader_create(struct pipe_context
> *ctx, struct r600_pipe_shader *s
> unsigned i;
> fprintf(stderr, "STREAMOUT\n");
> for (i = 0; i < shader->so.num_outputs; i++) {
> + unsigned mask = ((1 << shader->so.output[i].num_components) - 1)
> <<
> + shader->so.output[i].start_component;
> fprintf(stderr, " %i: MEM_STREAM0_BUF%i OUT[%i].%s%s%s%s\n", i,
> shader->so.output[i].output_buffer,
> shader->so.output[i].register_index,
> - shader->so.output[i].register_mask & 1 ? "x" : "_",
> - (shader->so.output[i].register_mask >> 1) & 1 ? "y" :
> "_",
> - (shader->so.output[i].register_mask >> 2) & 1 ? "z" :
> "_",
> - (shader->so.output[i].register_mask >> 3) & 1 ? "w" :
> "_");
> + mask & 1 ? "x" : "_",
> + (mask >> 1) & 1 ? "y" : "_",
> + (mask >> 2) & 1 ? "z" : "_",
> + (mask >> 3) & 1 ? "w" : "_");
> }
> }
> }
> @@ -863,11 +865,8 @@ static int r600_shader_from_tgsi(struct
> r600_pipe_context * rctx, struct r600_pi
>
> /* Add stream outputs. */
> if (ctx.type == TGSI_PROCESSOR_VERTEX && so.num_outputs) {
> - unsigned buffer_offset[PIPE_MAX_SO_BUFFERS] = {0};
> -
> for (i = 0; i < so.num_outputs; i++) {
> struct r600_bytecode_output output;
> - unsigned comps;
>
> if (so.output[i].output_buffer >= 4) {
> R600_ERR("exceeded the max number of stream output buffers, got:
> %d\n",
> @@ -875,36 +874,21 @@ static int r600_shader_from_tgsi(struct
> r600_pipe_context * rctx, struct r600_pi
> r = -EINVAL;
> goto out_err;
> }
> -
> - switch (so.output[i].register_mask) {
> - case TGSI_WRITEMASK_XYZW:
> - comps = 4;
> - break;
> - case TGSI_WRITEMASK_XYZ:
> - comps = 3;
> - break;
> - case TGSI_WRITEMASK_XY:
> - comps = 2;
> - break;
> - case TGSI_WRITEMASK_X:
> - comps = 1;
> - break;
> - default:
> - R600_ERR("streamout: invalid register_mask, got: %x\n",
> - so.output[i].register_mask);
> - r = -EINVAL;
> - goto out_err;
> + if (so.output[i].start_component) {
> + R600_ERR("stream_output - start_component cannot be
> non-zero\n");
> + r = -EINVAL;
> + goto out_err;
> }
>
> memset(&output, 0, sizeof(struct r600_bytecode_output));
> output.gpr = shader->output[so.output[i].register_index].gpr;
> output.elem_size = 0;
> - output.array_base = buffer_offset[so.output[i].output_buffer];
> + output.array_base = so.output[i].dst_offset;
> output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE;
> output.burst_count = 1;
> output.barrier = 1;
> output.array_size = 0;
> - output.comp_mask = so.output[i].register_mask;
> + output.comp_mask = (1 << so.output[i].num_components) - 1;
> if (ctx.bc->chip_class >= EVERGREEN) {
> switch (so.output[i].output_buffer) {
> case 0:
> @@ -939,12 +923,6 @@ static int r600_shader_from_tgsi(struct
> r600_pipe_context * rctx, struct r600_pi
> r = r600_bytecode_add_output(ctx.bc, &output);
> if (r)
> goto out_err;
> -
> - buffer_offset[so.output[i].output_buffer] += comps;
> - }
> -
> - for (i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
> - pipeshader->so_strides[i] = buffer_offset[i] * 4;
> }
> }
>
> diff --git a/src/gallium/drivers/r600/r600_state_common.c
> b/src/gallium/drivers/r600/r600_state_common.c
> index 054ab90..d96857e 100644
> --- a/src/gallium/drivers/r600/r600_state_common.c
> +++ b/src/gallium/drivers/r600/r600_state_common.c
> @@ -645,7 +645,7 @@ void r600_draw_vbo(struct pipe_context *ctx,
> const struct pipe_draw_info *dinfo)
> }
> }
>
> - rctx->ctx.vs_shader_so_strides = rctx->vs_shader->so_strides;
> + rctx->ctx.vs_so_stride_in_dw = rctx->vs_shader->so.stride;
>
> mask = (1ULL << ((unsigned)rctx->framebuffer.nr_cbufs * 4)) - 1;
>
> diff --git a/src/gallium/drivers/softpipe/sp_state_so.c
> b/src/gallium/drivers/softpipe/sp_state_so.c
> index 31ef384..f4b5cdf 100644
> --- a/src/gallium/drivers/softpipe/sp_state_so.c
> +++ b/src/gallium/drivers/softpipe/sp_state_so.c
> @@ -43,7 +43,7 @@ softpipe_create_stream_output_state(struct
> pipe_context *pipe,
>
> if (so) {
> so->base.num_outputs = templ->num_outputs;
> - so->base.stride = templ->stride;
> + memcpy(so->base.stride, templ->stride, sizeof(templ->stride));
> memcpy(so->base.output, templ->output,
> templ->num_outputs * sizeof(templ->output[0]));
> }
> diff --git a/src/gallium/drivers/trace/tr_dump_state.c
> b/src/gallium/drivers/trace/tr_dump_state.c
> index 038a80e..7ce477e 100644
> --- a/src/gallium/drivers/trace/tr_dump_state.c
> +++ b/src/gallium/drivers/trace/tr_dump_state.c
> @@ -271,14 +271,16 @@ void trace_dump_shader_state(const struct
> pipe_shader_state *state)
> trace_dump_member_begin("stream_output");
> trace_dump_struct_begin("pipe_stream_output_info");
> trace_dump_member(uint, &state->stream_output, num_outputs);
> - trace_dump_member(uint, &state->stream_output, stride);
> + trace_dump_array(uint, state->stream_output.stride,
> PIPE_MAX_SO_BUFFERS);
> trace_dump_array_begin();
> for(i = 0; i < state->stream_output.num_outputs; ++i) {
> trace_dump_elem_begin();
> trace_dump_struct_begin(""); /* anonymous */
> trace_dump_member(uint, &state->stream_output.output[i],
> register_index);
> - trace_dump_member(uint, &state->stream_output.output[i],
> register_mask);
> + trace_dump_member(uint, &state->stream_output.output[i],
> start_component);
> + trace_dump_member(uint, &state->stream_output.output[i],
> num_components);
> trace_dump_member(uint, &state->stream_output.output[i],
> output_buffer);
> + trace_dump_member(uint, &state->stream_output.output[i],
> dst_offset);
> trace_dump_struct_end();
> trace_dump_elem_end();
> }
> diff --git a/src/gallium/include/pipe/p_state.h
> b/src/gallium/include/pipe/p_state.h
> index f943ca5..15cc001 100644
> --- a/src/gallium/include/pipe/p_state.h
> +++ b/src/gallium/include/pipe/p_state.h
> @@ -176,16 +176,19 @@ struct pipe_clip_state
> struct pipe_stream_output_info
> {
> unsigned num_outputs;
> - /** stride for an entire vertex, only used if all output_buffers
> are 0 */
> - unsigned stride;
> + /** stride for an entire vertex for each buffer in dwords */
> + unsigned stride[PIPE_MAX_SO_BUFFERS];
If it per output buffer then please move it inside the array below. Otherwise no objection.
BTW, why stride in dwords? Elsewhere in gallium we always use bytes.
Jose
> +
> /**
> * Array of stream outputs, in the order they are to be written
> in.
> * Selected components are tightly packed into the output buffer.
> */
> struct {
> - unsigned register_index:8; /**< 0 to PIPE_MAX_SHADER_OUTPUTS
> */
> - unsigned register_mask:4; /**< TGSI_WRITEMASK_x */
> - unsigned output_buffer:4; /**< 0 to PIPE_MAX_SO_BUFFERS */
> + unsigned register_index:8; /**< 0 to PIPE_MAX_SHADER_OUTPUTS
> */
> + unsigned start_component:2; /** 0 to 3 */
> + unsigned num_components:3; /** 1 to 4 */
> + unsigned output_buffer:3; /**< 0 to PIPE_MAX_SO_BUFFERS */
> + unsigned dst_offset:16; /**< offset into the buffer in
> dwords */
> } output[PIPE_MAX_SHADER_OUTPUTS];
> };
>
> diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> index 3b8e2fe..1613bf8 100644
> --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> @@ -5091,25 +5091,21 @@ st_translate_stream_output_info(struct
> glsl_to_tgsi_visitor *glsl_to_tgsi,
> const GLuint outputMapping[],
> struct pipe_stream_output_info *so)
> {
> - static unsigned comps_to_mask[] = {
> - 0,
> - TGSI_WRITEMASK_X,
> - TGSI_WRITEMASK_XY,
> - TGSI_WRITEMASK_XYZ,
> - TGSI_WRITEMASK_XYZW
> - };
> unsigned i;
> struct gl_transform_feedback_info *info =
> &glsl_to_tgsi->shader_program->LinkedTransformFeedback;
>
> for (i = 0; i < info->NumOutputs; i++) {
> - assert(info->Outputs[i].NumComponents <
> Elements(comps_to_mask));
> so->output[i].register_index =
> outputMapping[info->Outputs[i].OutputRegister];
> - so->output[i].register_mask =
> - comps_to_mask[info->Outputs[i].NumComponents]
> - << info->Outputs[i].ComponentOffset;
> + so->output[i].start_component =
> info->Outputs[i].ComponentOffset;
> + so->output[i].num_components = info->Outputs[i].NumComponents;
> so->output[i].output_buffer = info->Outputs[i].OutputBuffer;
> + so->output[i].dst_offset = info->Outputs[i].DstOffset;
> + }
> +
> + for (i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
> + so->stride[i] = info->BufferStride[i];
> }
> so->num_outputs = info->NumOutputs;
> }
> --
> 1.7.5.4
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
More information about the mesa-dev
mailing list