[Mesa-stable] [Mesa-dev] [PATCH v2] swr: fix transform feedback logic
Cherniak, Bruce
bruce.cherniak at intel.com
Thu Jul 27 18:20:09 UTC 2017
Reviewed-by: Bruce Cherniak <bruce.cherniak at intel.com>
> On Jul 26, 2017, at 5:27 PM, George Kyriazis <george.kyriazis at intel.com> wrote:
>
> The shader that is used to copy vertex data out of the vs/gs shaders to
> the user-specified buffer (streamout or SO shader) was not using the
> correct offsets.
>
> Adjust the offsets that are used just for the SO shader:
> - Make sure that position is handled in the same special way
> as in the vs/gs shaders
> - Use the correct offset to be passed in the core
> - consolidate register slot mapping logic into one function, since it's
> been calculated in 2 different places (one for calcuating the slot mask,
> and one for the register offsets themselves
>
> Also make room for all attibutes in the backend vertex area.
>
> Fixes:
> - all vtk GL2PS tests
> - 18 piglit tests (16 ext_transform_feedback tests,
> arb-quads-follow-provoking-vertex and primitive-type gl_points
>
> v2:
>
> - take care of more SGV slots in slot mapping logic
> - trim feState.vsVertexSize
> - fix GS interface and incorporate GS while calculating vsVertexSize
>
> Note that vsVertexSize is used in the core as the one parameter that
> controls vertex size between all stages, so it has to be adjusted appropriately
> for the whole vs/gs/fs pipeline.
>
> fixes:
> - fixes total of 20 piglit tests
>
> CC: 17.2 <mesa-stable at lists.freedesktop.org>
> ---
> src/gallium/drivers/swr/swr_draw.cpp | 38 +++++++++++++++++++++++++++++-----
> src/gallium/drivers/swr/swr_shader.cpp | 32 +++++++++++++++++++++++++++-
> src/gallium/drivers/swr/swr_shader.h | 3 +++
> src/gallium/drivers/swr/swr_state.cpp | 5 +++--
> 4 files changed, 70 insertions(+), 8 deletions(-)
>
> diff --git a/src/gallium/drivers/swr/swr_draw.cpp b/src/gallium/drivers/swr/swr_draw.cpp
> index 62ad3f7..38a711e 100644
> --- a/src/gallium/drivers/swr/swr_draw.cpp
> +++ b/src/gallium/drivers/swr/swr_draw.cpp
> @@ -81,8 +81,11 @@ swr_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
> offsets[output_buffer] = so->output[i].dst_offset;
> }
>
> + unsigned attrib_slot = so->output[i].register_index;
> + attrib_slot = swr_so_adjust_attrib(attrib_slot, ctx->vs);
> +
> state.stream.decl[num].bufferIndex = output_buffer;
> - state.stream.decl[num].attribSlot = so->output[i].register_index - 1;
> + state.stream.decl[num].attribSlot = attrib_slot;
> state.stream.decl[num].componentMask =
> ((1 << so->output[i].num_components) - 1)
> << so->output[i].start_component;
> @@ -129,10 +132,35 @@ swr_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
> * XXX setup provokingVertex & topologyProvokingVertex */
> SWR_FRONTEND_STATE feState = {0};
>
> - feState.vsVertexSize =
> - VERTEX_ATTRIB_START_SLOT +
> - + ctx->vs->info.base.num_outputs
> - - (ctx->vs->info.base.writes_position ? 1 : 0);
> + // feState.vsVertexSize seeds the PA size that is used as an interface
> + // between all the shader stages, so it has to be large enough to
> + // incorporate all interfaces between stages
> +
> + // max of gs and vs num_outputs
> + feState.vsVertexSize = ctx->vs->info.base.num_outputs;
> + if (ctx->gs &&
> + ctx->gs->info.base.num_outputs > feState.vsVertexSize) {
> + feState.vsVertexSize = ctx->gs->info.base.num_outputs;
> + }
> +
> + if (ctx->vs->info.base.num_outputs)
> + // gs does not adjust for position in SGV slot at input from vs
> + if (!ctx->gs)
> + feState.vsVertexSize--;
> +
> + // other (non-SGV) slots start at VERTEX_ATTRIB_START_SLOT
> + feState.vsVertexSize += VERTEX_ATTRIB_START_SLOT;
> +
> + // The PA in the clipper does not handle BE vertex sizes
> + // different from FE. Increase vertexsize only for the cases that needed it
> +
> + // primid needs a slot
> + if (ctx->fs->info.base.uses_primid)
> + feState.vsVertexSize++;
> + // sprite coord enable
> + if (ctx->rasterizer->sprite_coord_enable)
> + feState.vsVertexSize++;
> +
>
> if (ctx->rasterizer->flatshade_first) {
> feState.provokingVertex = {1, 0, 0};
> diff --git a/src/gallium/drivers/swr/swr_shader.cpp b/src/gallium/drivers/swr/swr_shader.cpp
> index 83b49c4..0a81eaa 100644
> --- a/src/gallium/drivers/swr/swr_shader.cpp
> +++ b/src/gallium/drivers/swr/swr_shader.cpp
> @@ -414,7 +414,10 @@ BuilderSWR::swr_gs_llvm_emit_vertex(const struct lp_build_tgsi_gs_iface *gs_base
> } else if (iface->info->output_semantic_name[attrib] == TGSI_SEMANTIC_POSITION) {
> attribSlot = VERTEX_POSITION_SLOT;
> } else {
> - attribSlot = VERTEX_ATTRIB_START_SLOT + attrib - 1;
> + attribSlot = VERTEX_ATTRIB_START_SLOT + attrib;
> + if (iface->info->writes_position) {
> + attribSlot--;
> + }
> }
>
> #if USE_SIMD16_FRONTEND
> @@ -923,6 +926,33 @@ swr_compile_vs(struct swr_context *ctx, swr_jit_vs_key &key)
> return func;
> }
>
> +unsigned
> +swr_so_adjust_attrib(unsigned in_attrib,
> + swr_vertex_shader *swr_vs)
> +{
> + ubyte semantic_name;
> + unsigned attrib;
> +
> + attrib = in_attrib + VERTEX_ATTRIB_START_SLOT;
> +
> + if (swr_vs) {
> + semantic_name = swr_vs->info.base.output_semantic_name[in_attrib];
> + if (semantic_name == TGSI_SEMANTIC_POSITION) {
> + attrib = VERTEX_POSITION_SLOT;
> + } else if (semantic_name == TGSI_SEMANTIC_PSIZE) {
> + attrib = VERTEX_SGV_SLOT;
> + } else if (semantic_name == TGSI_SEMANTIC_LAYER) {
> + attrib = VERTEX_SGV_SLOT;
> + } else {
> + if (swr_vs->info.base.writes_position) {
> + attrib--;
> + }
> + }
> + }
> +
> + return attrib;
> +}
> +
> static unsigned
> locate_linkage(ubyte name, ubyte index, struct tgsi_shader_info *info)
> {
> diff --git a/src/gallium/drivers/swr/swr_shader.h b/src/gallium/drivers/swr/swr_shader.h
> index 1ab6846..6468874 100644
> --- a/src/gallium/drivers/swr/swr_shader.h
> +++ b/src/gallium/drivers/swr/swr_shader.h
> @@ -30,6 +30,9 @@ struct swr_jit_fs_key;
> struct swr_jit_vs_key;
> struct swr_jit_gs_key;
>
> +unsigned swr_so_adjust_attrib(unsigned in_attrib,
> + swr_vertex_shader *swr_vs);
> +
> PFN_VERTEX_FUNC
> swr_compile_vs(struct swr_context *ctx, swr_jit_vs_key &key);
>
> diff --git a/src/gallium/drivers/swr/swr_state.cpp b/src/gallium/drivers/swr/swr_state.cpp
> index 501fdea..47ab445 100644
> --- a/src/gallium/drivers/swr/swr_state.cpp
> +++ b/src/gallium/drivers/swr/swr_state.cpp
> @@ -345,13 +345,14 @@ swr_create_vs_state(struct pipe_context *pipe,
> // soState.streamToRasterizer not used
>
> for (uint32_t i = 0; i < stream_output->num_outputs; i++) {
> + unsigned attrib_slot = stream_output->output[i].register_index;
> + attrib_slot = swr_so_adjust_attrib(attrib_slot, swr_vs);
> swr_vs->soState.streamMasks[stream_output->output[i].stream] |=
> - 1 << (stream_output->output[i].register_index - 1);
> + (1 << attrib_slot);
> }
> for (uint32_t i = 0; i < MAX_SO_STREAMS; i++) {
> swr_vs->soState.streamNumEntries[i] =
> _mm_popcnt_u32(swr_vs->soState.streamMasks[i]);
> - swr_vs->soState.vertexAttribOffset[i] = VERTEX_ATTRIB_START_SLOT; // TODO: optimize
> }
> }
>
> --
> 2.7.4
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-stable
mailing list