[Mesa-dev] [PATCH 8/8] draw/gs: make sure geometry shaders don't overflow

Wed Apr 17 05:52:48 PDT 2013

Series looks good to me.

Reviewed-by: Jose Fonseca <jfonseca at vmware.com>

----- Original Message -----
> The specification says that the geometry shader should exit if the
> number of emitted vertices is bigger or equal to max_output_vertices and
> we can't do that because we're running in the SoA mode, which means that
> our storing routines will keep getting called on channels that have
> overflown (even though they will be masked out, but we just can't skip
> them).
> So we need some scratch area where we can keep writing the overflown
> vertices without overwriting anything important or crashing.
> ---
>  src/gallium/auxiliary/draw/draw_gs.c            |   50
>  +++++++++++++++++++----
>  src/gallium/auxiliary/draw/draw_gs.h            |    1 +
>  src/gallium/auxiliary/draw/draw_llvm.c          |    6 +--
>  src/gallium/auxiliary/gallivm/lp_bld_tgsi.h     |    1 +
>  src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c |   34 ++++++++++++++-
>  5 files changed, 81 insertions(+), 11 deletions(-)
> 
> diff --git a/src/gallium/auxiliary/draw/draw_gs.c
> b/src/gallium/auxiliary/draw/draw_gs.c
> index 5771aad..73917be 100644
> --- a/src/gallium/auxiliary/draw/draw_gs.c
> +++ b/src/gallium/auxiliary/draw/draw_gs.c
> @@ -280,6 +280,7 @@ llvm_fetch_gs_outputs(struct draw_geometry_shader
> *shader,
>     int max_prims_per_invocation = 0;
>     char *output_ptr = (char*)shader->gs_output;
>     int i, j, prim_idx;
> +   unsigned next_prim_boundary = shader->primitive_boundary;
>  
>     for (i = 0; i < shader->vector_length; ++i) {
>        int prims = shader->llvm_emitted_primitives[i];
> @@ -290,19 +291,42 @@ llvm_fetch_gs_outputs(struct draw_geometry_shader
> *shader,
>        total_verts += shader->llvm_emitted_vertices[i];
>     }
>  
> -
>     output_ptr += shader->emitted_vertices * shader->vertex_size;
>     for (i = 0; i < shader->vector_length - 1; ++i) {
>        int current_verts = shader->llvm_emitted_vertices[i];
> -
> -      if (current_verts != shader->max_output_vertices) {
> -         memcpy(output_ptr + (vertex_count + current_verts) *
> shader->vertex_size,
> -                output_ptr + (vertex_count + shader->max_output_vertices) *
> shader->vertex_size,
> -                shader->vertex_size * (total_verts - vertex_count));
> +      int next_verts = shader->llvm_emitted_vertices[i + 1];
> +#if 0
> +      int j;
> +      for (j = 0; j < current_verts; ++j) {
> +         struct vertex_header *vh = (struct vertex_header *)
> +            (output_ptr + shader->vertex_size * (i * next_prim_boundary +
> j));
> +         debug_printf("--- %d) [%f, %f, %f, %f]\n", j + vertex_count,
> +                      vh->data[0][0], vh->data[0][1], vh->data[0][2],
> vh->data[0][3]);
> +
> +      }
> +#endif
> +      debug_assert(current_verts <= shader->max_output_vertices);
> +      debug_assert(next_verts <= shader->max_output_vertices);
> +      if (next_verts) {
> +         memmove(output_ptr + (vertex_count + current_verts) *
> shader->vertex_size,
> +                 output_ptr + ((i + 1) * next_prim_boundary) *
> shader->vertex_size,
> +                 shader->vertex_size * next_verts);
>        }
>        vertex_count += current_verts;
>     }
>  
> +#if 0
> +   {
> +      int i;
> +      for (i = 0; i < total_verts; ++i) {
> +         struct vertex_header *vh = (struct vertex_header *)(output_ptr +
> shader->vertex_size * i);
> +         debug_printf("%d) [%f, %f, %f, %f]\n", i,
> +                      vh->data[0][0], vh->data[0][1], vh->data[0][2],
> vh->data[0][3]);
> +
> +      }
> +   }
> +#endif
> +
>     prim_idx = 0;
>     for (i = 0; i < shader->vector_length; ++i) {
>        int num_prims = shader->llvm_emitted_primitives[i];
> @@ -513,10 +537,12 @@ int draw_geometry_shader_run(struct
> draw_geometry_shader *shader,
>  
>     output_verts->vertex_size = vertex_size;
>     output_verts->stride = output_verts->vertex_size;
> +   /* we allocate exactly one extra vertex per primitive to allow the GS to
> emit
> +    * overflown vertices into some area where they won't harm anyone */
>     output_verts->verts =
>        (struct vertex_header *)MALLOC(output_verts->vertex_size *
>                                       max_out_prims *
> -                                     shader->max_output_vertices);
> +                                     shader->primitive_boundary);
>  
>  #if 0
>     debug_printf("%s count = %d (in prims # = %d)\n",
> @@ -723,6 +749,16 @@ draw_create_geometry_shader(struct draw_context *draw,
>                 TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES)
>           gs->max_output_vertices = gs->info.properties[i].data[0];
>     }
> +   /* Primitive boundary is bigger than max_output_vertices by one, because
> +    * the specification says that the geometry shader should exit if the
> +    * number of emitted vertices is bigger or equal to max_output_vertices
> and
> +    * we can't do that because we're running in the SoA mode, which means
> that
> +    * our storing routines will keep getting called on channels that have
> +    * overflown.
> +    * So we need some scratch area where we can keep writing the overflown
> +    * vertices without overwriting anything important or crashing.
> +    */
> +   gs->primitive_boundary = gs->max_output_vertices + 1;
>  
>     for (i = 0; i < gs->info.num_outputs; i++) {
>        if (gs->info.output_semantic_name[i] == TGSI_SEMANTIC_POSITION &&
> diff --git a/src/gallium/auxiliary/draw/draw_gs.h
> b/src/gallium/auxiliary/draw/draw_gs.h
> index 7c84139..ca744ce 100644
> --- a/src/gallium/auxiliary/draw/draw_gs.h
> +++ b/src/gallium/auxiliary/draw/draw_gs.h
> @@ -68,6 +68,7 @@ struct draw_geometry_shader {
>     unsigned position_output;
>  
>     unsigned max_output_vertices;
> +   unsigned primitive_boundary;
>     unsigned input_primitive;
>     unsigned output_primitive;
>  
> diff --git a/src/gallium/auxiliary/draw/draw_llvm.c
> b/src/gallium/auxiliary/draw/draw_llvm.c
> index 5100ce0..8ecbc88 100644
> --- a/src/gallium/auxiliary/draw/draw_llvm.c
> +++ b/src/gallium/auxiliary/draw/draw_llvm.c
> @@ -1287,8 +1287,8 @@ draw_gs_llvm_emit_vertex(const struct
> lp_build_tgsi_gs_iface *gs_base,
>     LLVMValueRef clipmask = lp_build_const_int_vec(gallivm,
>                                                    lp_int_type(gs_type), 0);
>     LLVMValueRef indices[LP_MAX_VECTOR_LENGTH];
> -   LLVMValueRef max_output_vertices =
> -      lp_build_const_int32(gallivm,
> variant->shader->base.max_output_vertices);
> +   LLVMValueRef next_prim_offset =
> +      lp_build_const_int32(gallivm,
> variant->shader->base.primitive_boundary);
>     LLVMValueRef io = variant->io_ptr;
>     unsigned i;
>     const struct tgsi_shader_info *gs_info = &variant->shader->base.info;
> @@ -1297,7 +1297,7 @@ draw_gs_llvm_emit_vertex(const struct
> lp_build_tgsi_gs_iface *gs_base,
>        LLVMValueRef ind = lp_build_const_int32(gallivm, i);
>        LLVMValueRef currently_emitted =
>           LLVMBuildExtractElement(builder, emitted_vertices_vec, ind, "");
> -      indices[i] = LLVMBuildMul(builder, ind, max_output_vertices, "");
> +      indices[i] = LLVMBuildMul(builder, ind, next_prim_offset, "");
>        indices[i] = LLVMBuildAdd(builder, indices[i], currently_emitted, "");
>     }
>  
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
> b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
> index f1b1d79..0fbb8aa 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
> @@ -396,6 +396,7 @@ struct lp_build_tgsi_soa_context
>     LLVMValueRef emitted_prims_vec_ptr;
>     LLVMValueRef total_emitted_vertices_vec_ptr;
>     LLVMValueRef emitted_vertices_vec_ptr;
> +   LLVMValueRef max_output_vertices_vec;
>  
>     LLVMValueRef consts_ptr;
>     const LLVMValueRef *pos;
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
> b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
> index 299d7eb..7cff5fa 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
> @@ -828,7 +828,6 @@ emit_fetch_gs_input(
>        vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
>     }
>  
> -
>     res = bld->gs_iface->fetch_input(bld->gs_iface, bld_base,
>                                      vertex_index, attrib_index,
>                                      swizzle_index);
> @@ -2257,6 +2256,20 @@ clear_uint_vec_ptr_from_mask(struct
> lp_build_tgsi_context * bld_base,
>     LLVMBuildStore(builder, current_vec, ptr);
>  }
>  
> +static LLVMValueRef
> +clamp_mask_to_max_output_vertices(struct lp_build_tgsi_soa_context * bld,
> +                                  LLVMValueRef current_mask_vec,
> +                                  LLVMValueRef total_emitted_vertices_vec)
> +{
> +   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
> +   struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
> +   LLVMValueRef max_mask = lp_build_cmp(uint_bld, PIPE_FUNC_LESS,
> +                                        total_emitted_vertices_vec,
> +                                        bld->max_output_vertices_vec);
> +
> +   return LLVMBuildAnd(builder, current_mask_vec, max_mask, "");
> +}
> +
>  static void
>  emit_vertex(
>     const struct lp_build_tgsi_action * action,
> @@ -2270,6 +2283,8 @@ emit_vertex(
>        LLVMValueRef masked_ones = mask_to_one_vec(bld_base);
>        LLVMValueRef total_emitted_vertices_vec =
>           LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
> +      masked_ones = clamp_mask_to_max_output_vertices(bld, masked_ones,
> +
> total_emitted_vertices_vec);
>        gather_outputs(bld);
>        bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base,
>                                   bld->outputs,
> @@ -2794,12 +2809,29 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm,
>     bld.bld_base.op_actions[TGSI_OPCODE_SVIEWINFO].emit = sviewinfo_emit;
>  
>     if (gs_iface) {
> +      /* There's no specific value for this because it should always
> +       * be set, but apps using ext_geometry_shader4 quite often
> +       * were forgetting so we're using MAX_VERTEX_VARYING from
> +       * that spec even though we could debug_assert if it's not
> +       * set, but that's a lot uglier. */
> +      uint max_output_vertices = 32;
> +      uint i = 0;
>        /* inputs are always indirect with gs */
>        bld.indirect_files |= (1 << TGSI_FILE_INPUT);
>        bld.gs_iface = gs_iface;
>        bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_gs_input;
>        bld.bld_base.op_actions[TGSI_OPCODE_EMIT].emit = emit_vertex;
>        bld.bld_base.op_actions[TGSI_OPCODE_ENDPRIM].emit = end_primitive;
> +
> +      for (i = 0; i < info->num_properties; ++i) {
> +         if (info->properties[i].name ==
> +             TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES) {
> +            max_output_vertices = info->properties[i].data[0];
> +         }
> +      }
> +      bld.max_output_vertices_vec =
> +         lp_build_const_int_vec(gallivm, bld.bld_base.uint_bld.type,
> +                                max_output_vertices);
>     }
>  
>     lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.base);
> --
> 1.7.10.4
> 
>