[Mesa-dev] [PATCH 8/8] draw/gs: make sure geometry shaders don't overflow
Jose Fonseca
jfonseca at vmware.com
Wed Apr 17 05:52:48 PDT 2013
Series looks good to me.
Reviewed-by: Jose Fonseca <jfonseca at vmware.com>
----- Original Message -----
> The specification says that the geometry shader should exit if the
> number of emitted vertices is bigger or equal to max_output_vertices and
> we can't do that because we're running in the SoA mode, which means that
> our storing routines will keep getting called on channels that have
> overflown (even though they will be masked out, but we just can't skip
> them).
> So we need some scratch area where we can keep writing the overflown
> vertices without overwriting anything important or crashing.
> ---
> src/gallium/auxiliary/draw/draw_gs.c | 50
> +++++++++++++++++++----
> src/gallium/auxiliary/draw/draw_gs.h | 1 +
> src/gallium/auxiliary/draw/draw_llvm.c | 6 +--
> src/gallium/auxiliary/gallivm/lp_bld_tgsi.h | 1 +
> src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c | 34 ++++++++++++++-
> 5 files changed, 81 insertions(+), 11 deletions(-)
>
> diff --git a/src/gallium/auxiliary/draw/draw_gs.c
> b/src/gallium/auxiliary/draw/draw_gs.c
> index 5771aad..73917be 100644
> --- a/src/gallium/auxiliary/draw/draw_gs.c
> +++ b/src/gallium/auxiliary/draw/draw_gs.c
> @@ -280,6 +280,7 @@ llvm_fetch_gs_outputs(struct draw_geometry_shader
> *shader,
> int max_prims_per_invocation = 0;
> char *output_ptr = (char*)shader->gs_output;
> int i, j, prim_idx;
> + unsigned next_prim_boundary = shader->primitive_boundary;
>
> for (i = 0; i < shader->vector_length; ++i) {
> int prims = shader->llvm_emitted_primitives[i];
> @@ -290,19 +291,42 @@ llvm_fetch_gs_outputs(struct draw_geometry_shader
> *shader,
> total_verts += shader->llvm_emitted_vertices[i];
> }
>
> -
> output_ptr += shader->emitted_vertices * shader->vertex_size;
> for (i = 0; i < shader->vector_length - 1; ++i) {
> int current_verts = shader->llvm_emitted_vertices[i];
> -
> - if (current_verts != shader->max_output_vertices) {
> - memcpy(output_ptr + (vertex_count + current_verts) *
> shader->vertex_size,
> - output_ptr + (vertex_count + shader->max_output_vertices) *
> shader->vertex_size,
> - shader->vertex_size * (total_verts - vertex_count));
> + int next_verts = shader->llvm_emitted_vertices[i + 1];
> +#if 0
> + int j;
> + for (j = 0; j < current_verts; ++j) {
> + struct vertex_header *vh = (struct vertex_header *)
> + (output_ptr + shader->vertex_size * (i * next_prim_boundary +
> j));
> + debug_printf("--- %d) [%f, %f, %f, %f]\n", j + vertex_count,
> + vh->data[0][0], vh->data[0][1], vh->data[0][2],
> vh->data[0][3]);
> +
> + }
> +#endif
> + debug_assert(current_verts <= shader->max_output_vertices);
> + debug_assert(next_verts <= shader->max_output_vertices);
> + if (next_verts) {
> + memmove(output_ptr + (vertex_count + current_verts) *
> shader->vertex_size,
> + output_ptr + ((i + 1) * next_prim_boundary) *
> shader->vertex_size,
> + shader->vertex_size * next_verts);
> }
> vertex_count += current_verts;
> }
>
> +#if 0
> + {
> + int i;
> + for (i = 0; i < total_verts; ++i) {
> + struct vertex_header *vh = (struct vertex_header *)(output_ptr +
> shader->vertex_size * i);
> + debug_printf("%d) [%f, %f, %f, %f]\n", i,
> + vh->data[0][0], vh->data[0][1], vh->data[0][2],
> vh->data[0][3]);
> +
> + }
> + }
> +#endif
> +
> prim_idx = 0;
> for (i = 0; i < shader->vector_length; ++i) {
> int num_prims = shader->llvm_emitted_primitives[i];
> @@ -513,10 +537,12 @@ int draw_geometry_shader_run(struct
> draw_geometry_shader *shader,
>
> output_verts->vertex_size = vertex_size;
> output_verts->stride = output_verts->vertex_size;
> + /* we allocate exactly one extra vertex per primitive to allow the GS to
> emit
> + * overflown vertices into some area where they won't harm anyone */
> output_verts->verts =
> (struct vertex_header *)MALLOC(output_verts->vertex_size *
> max_out_prims *
> - shader->max_output_vertices);
> + shader->primitive_boundary);
>
> #if 0
> debug_printf("%s count = %d (in prims # = %d)\n",
> @@ -723,6 +749,16 @@ draw_create_geometry_shader(struct draw_context *draw,
> TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES)
> gs->max_output_vertices = gs->info.properties[i].data[0];
> }
> + /* Primitive boundary is bigger than max_output_vertices by one, because
> + * the specification says that the geometry shader should exit if the
> + * number of emitted vertices is bigger or equal to max_output_vertices
> and
> + * we can't do that because we're running in the SoA mode, which means
> that
> + * our storing routines will keep getting called on channels that have
> + * overflown.
> + * So we need some scratch area where we can keep writing the overflown
> + * vertices without overwriting anything important or crashing.
> + */
> + gs->primitive_boundary = gs->max_output_vertices + 1;
>
> for (i = 0; i < gs->info.num_outputs; i++) {
> if (gs->info.output_semantic_name[i] == TGSI_SEMANTIC_POSITION &&
> diff --git a/src/gallium/auxiliary/draw/draw_gs.h
> b/src/gallium/auxiliary/draw/draw_gs.h
> index 7c84139..ca744ce 100644
> --- a/src/gallium/auxiliary/draw/draw_gs.h
> +++ b/src/gallium/auxiliary/draw/draw_gs.h
> @@ -68,6 +68,7 @@ struct draw_geometry_shader {
> unsigned position_output;
>
> unsigned max_output_vertices;
> + unsigned primitive_boundary;
> unsigned input_primitive;
> unsigned output_primitive;
>
> diff --git a/src/gallium/auxiliary/draw/draw_llvm.c
> b/src/gallium/auxiliary/draw/draw_llvm.c
> index 5100ce0..8ecbc88 100644
> --- a/src/gallium/auxiliary/draw/draw_llvm.c
> +++ b/src/gallium/auxiliary/draw/draw_llvm.c
> @@ -1287,8 +1287,8 @@ draw_gs_llvm_emit_vertex(const struct
> lp_build_tgsi_gs_iface *gs_base,
> LLVMValueRef clipmask = lp_build_const_int_vec(gallivm,
> lp_int_type(gs_type), 0);
> LLVMValueRef indices[LP_MAX_VECTOR_LENGTH];
> - LLVMValueRef max_output_vertices =
> - lp_build_const_int32(gallivm,
> variant->shader->base.max_output_vertices);
> + LLVMValueRef next_prim_offset =
> + lp_build_const_int32(gallivm,
> variant->shader->base.primitive_boundary);
> LLVMValueRef io = variant->io_ptr;
> unsigned i;
> const struct tgsi_shader_info *gs_info = &variant->shader->base.info;
> @@ -1297,7 +1297,7 @@ draw_gs_llvm_emit_vertex(const struct
> lp_build_tgsi_gs_iface *gs_base,
> LLVMValueRef ind = lp_build_const_int32(gallivm, i);
> LLVMValueRef currently_emitted =
> LLVMBuildExtractElement(builder, emitted_vertices_vec, ind, "");
> - indices[i] = LLVMBuildMul(builder, ind, max_output_vertices, "");
> + indices[i] = LLVMBuildMul(builder, ind, next_prim_offset, "");
> indices[i] = LLVMBuildAdd(builder, indices[i], currently_emitted, "");
> }
>
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
> b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
> index f1b1d79..0fbb8aa 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
> @@ -396,6 +396,7 @@ struct lp_build_tgsi_soa_context
> LLVMValueRef emitted_prims_vec_ptr;
> LLVMValueRef total_emitted_vertices_vec_ptr;
> LLVMValueRef emitted_vertices_vec_ptr;
> + LLVMValueRef max_output_vertices_vec;
>
> LLVMValueRef consts_ptr;
> const LLVMValueRef *pos;
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
> b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
> index 299d7eb..7cff5fa 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
> @@ -828,7 +828,6 @@ emit_fetch_gs_input(
> vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
> }
>
> -
> res = bld->gs_iface->fetch_input(bld->gs_iface, bld_base,
> vertex_index, attrib_index,
> swizzle_index);
> @@ -2257,6 +2256,20 @@ clear_uint_vec_ptr_from_mask(struct
> lp_build_tgsi_context * bld_base,
> LLVMBuildStore(builder, current_vec, ptr);
> }
>
> +static LLVMValueRef
> +clamp_mask_to_max_output_vertices(struct lp_build_tgsi_soa_context * bld,
> + LLVMValueRef current_mask_vec,
> + LLVMValueRef total_emitted_vertices_vec)
> +{
> + LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
> + struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
> + LLVMValueRef max_mask = lp_build_cmp(uint_bld, PIPE_FUNC_LESS,
> + total_emitted_vertices_vec,
> + bld->max_output_vertices_vec);
> +
> + return LLVMBuildAnd(builder, current_mask_vec, max_mask, "");
> +}
> +
> static void
> emit_vertex(
> const struct lp_build_tgsi_action * action,
> @@ -2270,6 +2283,8 @@ emit_vertex(
> LLVMValueRef masked_ones = mask_to_one_vec(bld_base);
> LLVMValueRef total_emitted_vertices_vec =
> LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
> + masked_ones = clamp_mask_to_max_output_vertices(bld, masked_ones,
> +
> total_emitted_vertices_vec);
> gather_outputs(bld);
> bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base,
> bld->outputs,
> @@ -2794,12 +2809,29 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm,
> bld.bld_base.op_actions[TGSI_OPCODE_SVIEWINFO].emit = sviewinfo_emit;
>
> if (gs_iface) {
> + /* There's no specific value for this because it should always
> + * be set, but apps using ext_geometry_shader4 quite often
> + * were forgetting so we're using MAX_VERTEX_VARYING from
> + * that spec even though we could debug_assert if it's not
> + * set, but that's a lot uglier. */
> + uint max_output_vertices = 32;
> + uint i = 0;
> /* inputs are always indirect with gs */
> bld.indirect_files |= (1 << TGSI_FILE_INPUT);
> bld.gs_iface = gs_iface;
> bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_gs_input;
> bld.bld_base.op_actions[TGSI_OPCODE_EMIT].emit = emit_vertex;
> bld.bld_base.op_actions[TGSI_OPCODE_ENDPRIM].emit = end_primitive;
> +
> + for (i = 0; i < info->num_properties; ++i) {
> + if (info->properties[i].name ==
> + TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES) {
> + max_output_vertices = info->properties[i].data[0];
> + }
> + }
> + bld.max_output_vertices_vec =
> + lp_build_const_int_vec(gallivm, bld.bld_base.uint_bld.type,
> + max_output_vertices);
> }
>
> lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.base);
> --
> 1.7.10.4
>
>
More information about the mesa-dev
mailing list