[Mesa-dev] [PATCH] gallium: improve the pipe_stream_output_info struct

Jose Fonseca jfonseca at vmware.com
Mon Jan 9 10:31:51 PST 2012



----- Original Message -----
> There are 3 changes:
> 
> 1) stride is specified for each buffer, not just one, so that drivers
> don't
>    have to derive it from the outputs
> 
> 2) new per-output property dst_offset, which specifies the offset
>    into the buffer in dwords where the output should be stored,
>    so that drivers don't have to compute the offsets manually;
>    this will also be useful for gl_SkipComponents
>    from ARB_transform_feedback3
> 
> 3) register_mask is removed, instead, there is start_component
>    and num_components; register_mask with non-consecutive 1s
>    doesn't make much sense (some hardware cannot do packing of
>    components)
> 
> Christoph Bumiller: fixed nvc0.
> ---
>  src/gallium/auxiliary/draw/draw_pt_so_emit.c |    4 ++-
>  src/gallium/auxiliary/util/u_blitter.c       |    4 +-
>  src/gallium/auxiliary/util/u_dump_state.c    |    6 ++-
>  src/gallium/drivers/llvmpipe/lp_state_so.c   |    2 +-
>  src/gallium/drivers/nvc0/nvc0_program.c      |   11 ++----
>  src/gallium/drivers/r600/r600.h              |    4 +-
>  src/gallium/drivers/r600/r600_hw_context.c   |    8 ++--
>  src/gallium/drivers/r600/r600_pipe.h         |    1 -
>  src/gallium/drivers/r600/r600_shader.c       |   46
>  +++++++-------------------
>  src/gallium/drivers/r600/r600_state_common.c |    2 +-
>  src/gallium/drivers/softpipe/sp_state_so.c   |    2 +-
>  src/gallium/drivers/trace/tr_dump_state.c    |    6 ++-
>  src/gallium/include/pipe/p_state.h           |   13 ++++---
>  src/mesa/state_tracker/st_glsl_to_tgsi.cpp   |   18 ++++------
>  14 files changed, 53 insertions(+), 74 deletions(-)
> 
> diff --git a/src/gallium/auxiliary/draw/draw_pt_so_emit.c
> b/src/gallium/auxiliary/draw/draw_pt_so_emit.c
> index 2dc9e29..7dc6937 100644
> --- a/src/gallium/auxiliary/draw/draw_pt_so_emit.c
> +++ b/src/gallium/auxiliary/draw/draw_pt_so_emit.c
> @@ -144,7 +144,9 @@ static void so_emit_prim(struct pt_so_emit *so,
>           (const char *)input_ptr + (indices[i] *
>           input_vertex_stride));
>        for (slot = 0; slot < state->num_outputs; ++slot) {
>           unsigned idx = state->output[slot].register_index;
> -         unsigned writemask = state->output[slot].register_mask;
> +         unsigned writemask =
> +               ((1 << state->output[slot].num_components) - 1) <<
> +               state->output[slot].start_component;
>           unsigned written_compos = 0;
>           unsigned compo;
>  
> diff --git a/src/gallium/auxiliary/util/u_blitter.c
> b/src/gallium/auxiliary/util/u_blitter.c
> index 80fdfe0..94012c5 100644
> --- a/src/gallium/auxiliary/util/u_blitter.c
> +++ b/src/gallium/auxiliary/util/u_blitter.c
> @@ -266,8 +266,8 @@ struct blitter_context
> *util_blitter_create(struct pipe_context *pipe)
>  
>        memset(&so, 0, sizeof(so));
>        so.num_outputs = 1;
> -      so.output[0].register_mask = TGSI_WRITEMASK_XYZW;
> -      so.stride = 4;
> +      so.output[0].num_components = 4;
> +      so.stride[0] = 4;
>  
>        ctx->vs_pos_only =
>           util_make_vertex_passthrough_shader_with_so(pipe, 1,
>           semantic_names,
> diff --git a/src/gallium/auxiliary/util/u_dump_state.c
> b/src/gallium/auxiliary/util/u_dump_state.c
> index e44c619..4b5a042 100644
> --- a/src/gallium/auxiliary/util/u_dump_state.c
> +++ b/src/gallium/auxiliary/util/u_dump_state.c
> @@ -441,13 +441,15 @@ util_dump_shader_state(FILE *stream, const
> struct pipe_shader_state *state)
>     util_dump_member_begin(stream, "stream_output");
>     util_dump_struct_begin(stream, "pipe_stream_output_info");
>     util_dump_member(stream, uint, &state->stream_output,
>     num_outputs);
> -   util_dump_member(stream, uint, &state->stream_output, stride);
> +   util_dump_array(stream, uint, state->stream_output.stride,
> +                   Elements(state->stream_output.stride));
>     util_dump_array_begin(stream);
>     for(i = 0; i < state->stream_output.num_outputs; ++i) {
>        util_dump_elem_begin(stream);
>        util_dump_struct_begin(stream, ""); /* anonymous */
>        util_dump_member(stream, uint,
>        &state->stream_output.output[i], register_index);
> -      util_dump_member(stream, uint,
> &state->stream_output.output[i], register_mask);
> +      util_dump_member(stream, uint,
> &state->stream_output.output[i], start_component);
> +      util_dump_member(stream, uint,
> &state->stream_output.output[i], num_components);
>        util_dump_member(stream, uint,
>        &state->stream_output.output[i], output_buffer);
>        util_dump_struct_end(stream);
>        util_dump_elem_end(stream);
> diff --git a/src/gallium/drivers/llvmpipe/lp_state_so.c
> b/src/gallium/drivers/llvmpipe/lp_state_so.c
> index 108f3aa..ed2272d 100644
> --- a/src/gallium/drivers/llvmpipe/lp_state_so.c
> +++ b/src/gallium/drivers/llvmpipe/lp_state_so.c
> @@ -42,7 +42,7 @@ llvmpipe_create_stream_output_state(struct
> pipe_context *pipe,
>  
>     if (so) {
>        so->base.num_outputs = templ->num_outputs;
> -      so->base.stride = templ->stride;
> +      memcpy(so->base.stride, templ->stride, sizeof(templ->stride));
>        memcpy(so->base.output, templ->output,
>               templ->num_outputs * sizeof(templ->output[0]));
>     }
> diff --git a/src/gallium/drivers/nvc0/nvc0_program.c
> b/src/gallium/drivers/nvc0/nvc0_program.c
> index 605bca5..499d20e 100644
> --- a/src/gallium/drivers/nvc0/nvc0_program.c
> +++ b/src/gallium/drivers/nvc0/nvc0_program.c
> @@ -496,20 +496,17 @@ nvc0_program_create_tfb_state(const struct
> nv50_ir_prog_info *info,
>        tfb->varying_count[b] = 0;
>  
>        for (i = 0; i < pso->num_outputs; ++i) {
> +         unsigned startc = pso->output[i].start_component;
>           if (pso->output[i].output_buffer != b)
>              continue;
> -         for (c = 0; c < 4; ++c) {
> -            if (!(pso->output[i].register_mask & (1 << c)))
> -               continue;
> +         for (c = 0; c < pso->output[i].num_components; ++c) {
>              tfb->varying_count[b]++;
>              tfb->varying_index[n++] =
> -               info->out[pso->output[i].register_index].slot[c];
> +               info->out[pso->output[i].register_index].slot[startc
> + c];
>           }
>        }
> -      tfb->stride[b] = tfb->varying_count[b] * 4;
> +      tfb->stride[b] = pso->stride[b] * 4;
>     }
> -   if (pso->stride)
> -      tfb->stride[0] = pso->stride;
>  
>     return tfb;
>  }
> diff --git a/src/gallium/drivers/r600/r600.h
> b/src/gallium/drivers/r600/r600.h
> index 4bfb5a9..baf09c1 100644
> --- a/src/gallium/drivers/r600/r600.h
> +++ b/src/gallium/drivers/r600/r600.h
> @@ -196,7 +196,7 @@ struct r600_so_target {
>  
>  	/* The buffer where BUFFER_FILLED_SIZE is stored. */
>  	struct r600_resource	*filled_size;
> -	unsigned		stride;
> +	unsigned		stride_in_dw;
>  	unsigned		so_index;
>  };
>  
> @@ -248,7 +248,7 @@ struct r600_context {
>  	struct r600_so_target		*so_targets[PIPE_MAX_SO_BUFFERS];
>  	boolean				streamout_start;
>  	unsigned			streamout_append_bitmask;
> -	unsigned			*vs_shader_so_strides;
> +	unsigned			*vs_so_stride_in_dw;
>  };
>  
>  struct r600_draw {
> diff --git a/src/gallium/drivers/r600/r600_hw_context.c
> b/src/gallium/drivers/r600/r600_hw_context.c
> index 1dba966..09bc5a7 100644
> --- a/src/gallium/drivers/r600/r600_hw_context.c
> +++ b/src/gallium/drivers/r600/r600_hw_context.c
> @@ -2012,7 +2012,7 @@ static void r600_set_streamout_enable(struct
> r600_context *ctx, unsigned buffer_
>  void r600_context_streamout_begin(struct r600_context *ctx)
>  {
>  	struct r600_so_target **t = ctx->so_targets;
> -	unsigned *strides = ctx->vs_shader_so_strides;
> +	unsigned *stride_in_dw = ctx->vs_so_stride_in_dw;
>  	unsigned buffer_en, i, update_flags = 0;
>  
>  	buffer_en = (ctx->num_so_targets >= 1 && t[0] ? 1 : 0) |
> @@ -2043,7 +2043,7 @@ void r600_context_streamout_begin(struct
> r600_context *ctx)
>  
>  	for (i = 0; i < ctx->num_so_targets; i++) {
>  		if (t[i]) {
> -			t[i]->stride = strides[i];
> +			t[i]->stride_in_dw = stride_in_dw[i];
>  			t[i]->so_index = i;
>  
>  			update_flags |= SURFACE_BASE_UPDATE_STRMOUT(i);
> @@ -2053,7 +2053,7 @@ void r600_context_streamout_begin(struct
> r600_context *ctx)
>  							16*i - R600_CONTEXT_REG_OFFSET) >> 2;
>  			ctx->pm4[ctx->pm4_cdwords++] = (t[i]->b.buffer_offset +
>  							t[i]->b.buffer_size) >> 2; /* BUFFER_SIZE (in DW) */
> -			ctx->pm4[ctx->pm4_cdwords++] = strides[i] >> 2;		   /* VTX_STRIDE
> (in DW) */
> +			ctx->pm4[ctx->pm4_cdwords++] = stride_in_dw[i];		   /* VTX_STRIDE
> (in DW) */
>  			ctx->pm4[ctx->pm4_cdwords++] = 0;			   /* BUFFER_BASE */
>  
>  			ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0);
> @@ -2165,7 +2165,7 @@ void r600_context_draw_opaque_count(struct
> r600_context *ctx, struct r600_so_tar
>  
>  	ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SET_CONTEXT_REG, 1, 0);
>  	ctx->pm4[ctx->pm4_cdwords++] =
>  	(R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE -
>  	R600_CONTEXT_REG_OFFSET) >> 2;
> -	ctx->pm4[ctx->pm4_cdwords++] = t->stride >> 2;
> +	ctx->pm4[ctx->pm4_cdwords++] = t->stride_in_dw;
>  
>  	ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_COPY_DW, 4, 0);
>  	ctx->pm4[ctx->pm4_cdwords++] = COPY_DW_SRC_IS_MEM |
>  	COPY_DW_DST_IS_REG;
> diff --git a/src/gallium/drivers/r600/r600_pipe.h
> b/src/gallium/drivers/r600/r600_pipe.h
> index 447b9dc..91eb0e8 100644
> --- a/src/gallium/drivers/r600/r600_pipe.h
> +++ b/src/gallium/drivers/r600/r600_pipe.h
> @@ -147,7 +147,6 @@ struct r600_pipe_shader {
>  	struct tgsi_token		*tokens;
>  	unsigned	sprite_coord_enable;
>  	struct pipe_stream_output_info	so;
> -	unsigned			so_strides[4];
>  };
>  
>  struct r600_pipe_sampler_state {
> diff --git a/src/gallium/drivers/r600/r600_shader.c
> b/src/gallium/drivers/r600/r600_shader.c
> index ad4aded..9dbbbd8 100644
> --- a/src/gallium/drivers/r600/r600_shader.c
> +++ b/src/gallium/drivers/r600/r600_shader.c
> @@ -124,12 +124,14 @@ int r600_pipe_shader_create(struct pipe_context
> *ctx, struct r600_pipe_shader *s
>  			unsigned i;
>  			fprintf(stderr, "STREAMOUT\n");
>  			for (i = 0; i < shader->so.num_outputs; i++) {
> +				unsigned mask = ((1 << shader->so.output[i].num_components) - 1)
> <<
> +						shader->so.output[i].start_component;
>  				fprintf(stderr, "  %i: MEM_STREAM0_BUF%i OUT[%i].%s%s%s%s\n", i,
>  					shader->so.output[i].output_buffer,
>  					shader->so.output[i].register_index,
> -				        shader->so.output[i].register_mask & 1 ? "x" : "_",
> -				        (shader->so.output[i].register_mask >> 1) & 1 ? "y" :
> "_",
> -				        (shader->so.output[i].register_mask >> 2) & 1 ? "z" :
> "_",
> -				        (shader->so.output[i].register_mask >> 3) & 1 ? "w" :
> "_");
> +				        mask & 1 ? "x" : "_",
> +				        (mask >> 1) & 1 ? "y" : "_",
> +				        (mask >> 2) & 1 ? "z" : "_",
> +				        (mask >> 3) & 1 ? "w" : "_");
>  			}
>  		}
>  	}
> @@ -863,11 +865,8 @@ static int r600_shader_from_tgsi(struct
> r600_pipe_context * rctx, struct r600_pi
>  
>  	/* Add stream outputs. */
>  	if (ctx.type == TGSI_PROCESSOR_VERTEX && so.num_outputs) {
> -		unsigned buffer_offset[PIPE_MAX_SO_BUFFERS] = {0};
> -
>  		for (i = 0; i < so.num_outputs; i++) {
>  			struct r600_bytecode_output output;
> -			unsigned comps;
>  
>  			if (so.output[i].output_buffer >= 4) {
>  				R600_ERR("exceeded the max number of stream output buffers, got:
>  				%d\n",
> @@ -875,36 +874,21 @@ static int r600_shader_from_tgsi(struct
> r600_pipe_context * rctx, struct r600_pi
>  				r = -EINVAL;
>  				goto out_err;
>  			}
> -
> -			switch (so.output[i].register_mask) {
> -			case TGSI_WRITEMASK_XYZW:
> -				comps = 4;
> -				break;
> -			case TGSI_WRITEMASK_XYZ:
> -				comps = 3;
> -				break;
> -			case TGSI_WRITEMASK_XY:
> -				comps = 2;
> -				break;
> -			case TGSI_WRITEMASK_X:
> -				comps = 1;
> -				break;
> -			default:
> -				R600_ERR("streamout: invalid register_mask, got: %x\n",
> -					 so.output[i].register_mask);
> -				r = -EINVAL;
> -				goto out_err;
> +			if (so.output[i].start_component) {
> +			   R600_ERR("stream_output - start_component cannot be
> non-zero\n");
> +			   r = -EINVAL;
> +			   goto out_err;
>  			}
>  
>  			memset(&output, 0, sizeof(struct r600_bytecode_output));
>  			output.gpr = shader->output[so.output[i].register_index].gpr;
>  			output.elem_size = 0;
> -			output.array_base = buffer_offset[so.output[i].output_buffer];
> +			output.array_base = so.output[i].dst_offset;
>  			output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE;
>  			output.burst_count = 1;
>  			output.barrier = 1;
>  			output.array_size = 0;
> -			output.comp_mask = so.output[i].register_mask;
> +			output.comp_mask = (1 << so.output[i].num_components) - 1;
>  			if (ctx.bc->chip_class >= EVERGREEN) {
>  				switch (so.output[i].output_buffer) {
>  				case 0:
> @@ -939,12 +923,6 @@ static int r600_shader_from_tgsi(struct
> r600_pipe_context * rctx, struct r600_pi
>  			r = r600_bytecode_add_output(ctx.bc, &output);
>  			if (r)
>  				goto out_err;
> -
> -			buffer_offset[so.output[i].output_buffer] += comps;
> -		}
> -
> -		for (i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
> -			pipeshader->so_strides[i] = buffer_offset[i] * 4;
>  		}
>  	}
>  
> diff --git a/src/gallium/drivers/r600/r600_state_common.c
> b/src/gallium/drivers/r600/r600_state_common.c
> index 054ab90..d96857e 100644
> --- a/src/gallium/drivers/r600/r600_state_common.c
> +++ b/src/gallium/drivers/r600/r600_state_common.c
> @@ -645,7 +645,7 @@ void r600_draw_vbo(struct pipe_context *ctx,
> const struct pipe_draw_info *dinfo)
>  		}
>  	}
>  
> -	rctx->ctx.vs_shader_so_strides = rctx->vs_shader->so_strides;
> +	rctx->ctx.vs_so_stride_in_dw = rctx->vs_shader->so.stride;
>  
>  	mask = (1ULL << ((unsigned)rctx->framebuffer.nr_cbufs * 4)) - 1;
>  
> diff --git a/src/gallium/drivers/softpipe/sp_state_so.c
> b/src/gallium/drivers/softpipe/sp_state_so.c
> index 31ef384..f4b5cdf 100644
> --- a/src/gallium/drivers/softpipe/sp_state_so.c
> +++ b/src/gallium/drivers/softpipe/sp_state_so.c
> @@ -43,7 +43,7 @@ softpipe_create_stream_output_state(struct
> pipe_context *pipe,
>  
>     if (so) {
>        so->base.num_outputs = templ->num_outputs;
> -      so->base.stride = templ->stride;
> +      memcpy(so->base.stride, templ->stride, sizeof(templ->stride));
>        memcpy(so->base.output, templ->output,
>               templ->num_outputs * sizeof(templ->output[0]));
>     }
> diff --git a/src/gallium/drivers/trace/tr_dump_state.c
> b/src/gallium/drivers/trace/tr_dump_state.c
> index 038a80e..7ce477e 100644
> --- a/src/gallium/drivers/trace/tr_dump_state.c
> +++ b/src/gallium/drivers/trace/tr_dump_state.c
> @@ -271,14 +271,16 @@ void trace_dump_shader_state(const struct
> pipe_shader_state *state)
>     trace_dump_member_begin("stream_output");
>     trace_dump_struct_begin("pipe_stream_output_info");
>     trace_dump_member(uint, &state->stream_output, num_outputs);
> -   trace_dump_member(uint, &state->stream_output, stride);
> +   trace_dump_array(uint, state->stream_output.stride,
> PIPE_MAX_SO_BUFFERS);
>     trace_dump_array_begin();
>     for(i = 0; i < state->stream_output.num_outputs; ++i) {
>        trace_dump_elem_begin();
>        trace_dump_struct_begin(""); /* anonymous */
>        trace_dump_member(uint, &state->stream_output.output[i],
>        register_index);
> -      trace_dump_member(uint, &state->stream_output.output[i],
> register_mask);
> +      trace_dump_member(uint, &state->stream_output.output[i],
> start_component);
> +      trace_dump_member(uint, &state->stream_output.output[i],
> num_components);
>        trace_dump_member(uint, &state->stream_output.output[i],
>        output_buffer);
> +      trace_dump_member(uint, &state->stream_output.output[i],
> dst_offset);
>        trace_dump_struct_end();
>        trace_dump_elem_end();
>     }
> diff --git a/src/gallium/include/pipe/p_state.h
> b/src/gallium/include/pipe/p_state.h
> index f943ca5..15cc001 100644
> --- a/src/gallium/include/pipe/p_state.h
> +++ b/src/gallium/include/pipe/p_state.h
> @@ -176,16 +176,19 @@ struct pipe_clip_state
>  struct pipe_stream_output_info
>  {
>     unsigned num_outputs;
> -   /** stride for an entire vertex, only used if all output_buffers
> are 0 */
> -   unsigned stride;
> +   /** stride for an entire vertex for each buffer in dwords */
> +   unsigned stride[PIPE_MAX_SO_BUFFERS];

If it per output buffer then please move it inside the array below. Otherwise no objection.


BTW, why stride in dwords? Elsewhere in gallium we always use bytes.


Jose

> +
>     /**
>      * Array of stream outputs, in the order they are to be written
>      in.
>      * Selected components are tightly packed into the output buffer.
>      */
>     struct {
> -      unsigned register_index:8; /**< 0 to PIPE_MAX_SHADER_OUTPUTS
> */
> -      unsigned register_mask:4;  /**< TGSI_WRITEMASK_x */
> -      unsigned output_buffer:4;  /**< 0 to PIPE_MAX_SO_BUFFERS */
> +      unsigned register_index:8;  /**< 0 to PIPE_MAX_SHADER_OUTPUTS
> */
> +      unsigned start_component:2; /** 0 to 3 */
> +      unsigned num_components:3;  /** 1 to 4 */
> +      unsigned output_buffer:3;   /**< 0 to PIPE_MAX_SO_BUFFERS */
> +      unsigned dst_offset:16;     /**< offset into the buffer in
> dwords */
>     } output[PIPE_MAX_SHADER_OUTPUTS];
>  };
>  
> diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> index 3b8e2fe..1613bf8 100644
> --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> @@ -5091,25 +5091,21 @@ st_translate_stream_output_info(struct
> glsl_to_tgsi_visitor *glsl_to_tgsi,
>                                  const GLuint outputMapping[],
>                                  struct pipe_stream_output_info *so)
>  {
> -   static unsigned comps_to_mask[] = {
> -      0,
> -      TGSI_WRITEMASK_X,
> -      TGSI_WRITEMASK_XY,
> -      TGSI_WRITEMASK_XYZ,
> -      TGSI_WRITEMASK_XYZW
> -   };
>     unsigned i;
>     struct gl_transform_feedback_info *info =
>        &glsl_to_tgsi->shader_program->LinkedTransformFeedback;
>  
>     for (i = 0; i < info->NumOutputs; i++) {
> -      assert(info->Outputs[i].NumComponents <
> Elements(comps_to_mask));
>        so->output[i].register_index =
>           outputMapping[info->Outputs[i].OutputRegister];
> -      so->output[i].register_mask =
> -         comps_to_mask[info->Outputs[i].NumComponents]
> -         << info->Outputs[i].ComponentOffset;
> +      so->output[i].start_component =
> info->Outputs[i].ComponentOffset;
> +      so->output[i].num_components = info->Outputs[i].NumComponents;
>        so->output[i].output_buffer = info->Outputs[i].OutputBuffer;
> +      so->output[i].dst_offset = info->Outputs[i].DstOffset;
> +   }
> +
> +   for (i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
> +      so->stride[i] = info->BufferStride[i];
>     }
>     so->num_outputs = info->NumOutputs;
>  }
> --
> 1.7.5.4
> 
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
> 


More information about the mesa-dev mailing list