[Mesa-dev] [PATCH 5/7] draw: add support to tgsi paths for geometry streams.

Roland Scheidegger sroland at vmware.com
Wed May 27 20:01:46 PDT 2015


Am 27.05.2015 um 09:45 schrieb Dave Airlie:
> This hooks up the geometry shader processing to the TGSI
> support added in the previous commits.
> 
> It doesn't change the llvm interface other than to
> keep things building.
> 
> Signed-off-by: Dave Airlie <airlied at redhat.com>
> ---
>  src/gallium/auxiliary/draw/draw_gs.c               | 195 +++++++++++++--------
>  src/gallium/auxiliary/draw/draw_gs.h               |  21 ++-
>  src/gallium/auxiliary/draw/draw_pt.h               |   1 +
>  .../auxiliary/draw/draw_pt_fetch_shade_pipeline.c  |  16 +-
>  .../draw/draw_pt_fetch_shade_pipeline_llvm.c       |  14 +-
>  src/gallium/auxiliary/draw/draw_pt_so_emit.c       |  64 ++++---
>  6 files changed, 192 insertions(+), 119 deletions(-)
> 
> diff --git a/src/gallium/auxiliary/draw/draw_gs.c b/src/gallium/auxiliary/draw/draw_gs.c
> index 755e527..9798518 100644
> --- a/src/gallium/auxiliary/draw/draw_gs.c
> +++ b/src/gallium/auxiliary/draw/draw_gs.c
> @@ -75,6 +75,7 @@ draw_gs_should_flush(struct draw_geometry_shader *shader)
>  /*#define DEBUG_OUTPUTS 1*/
>  static void
>  tgsi_fetch_gs_outputs(struct draw_geometry_shader *shader,
> +                      unsigned stream,
>                        unsigned num_primitives,
>                        float (**p_output)[4])
>  {
> @@ -89,14 +90,16 @@ tgsi_fetch_gs_outputs(struct draw_geometry_shader *shader,
>      */
>  
>     for (prim_idx = 0; prim_idx < num_primitives; ++prim_idx) {
> -      unsigned num_verts_per_prim = machine->Primitives[prim_idx];
> -      shader->primitive_lengths[prim_idx + shader->emitted_primitives] =
> -         machine->Primitives[prim_idx];
> -      shader->emitted_vertices += num_verts_per_prim;
> +      unsigned num_verts_per_prim = machine->Primitives[stream][prim_idx];
> +
> +      shader->stream[stream].primitive_lengths[prim_idx + shader->stream[stream].emitted_primitives] =
I'm not really trying to enforce strict 80 column lines, but over 100 is
definitely too much. I know there's some other ridiculously long lines
in some draw and tgsi code but please try to avoid new ones.

> +         machine->Primitives[stream][prim_idx];
> +      shader->stream[stream].emitted_vertices += num_verts_per_prim;
Did things actually still build/work around here before this patch (that
is if
you just applied patches up to 3/7)? Looks to me like it might not, if
so it would be nice to get this fixed.

>        for (j = 0; j < num_verts_per_prim; j++, current_idx++) {
> -         int idx = current_idx * shader->info.num_outputs;
> +         int idx = machine->PrimitiveOffsets[stream][prim_idx] + current_idx * shader->info.num_outputs;
linelength

>  #ifdef DEBUG_OUTPUTS
> -         debug_printf("%d) Output vert:\n", idx / shader->info.num_outputs);
> +         debug_printf("%d/%d) Output vert:\n", stream, idx / shader->info.num_outputs);
>  #endif
>           for (slot = 0; slot < shader->info.num_outputs; slot++) {
>              output[slot][0] = machine->Outputs[idx + slot].xyzw[0].f[0];
> @@ -115,7 +118,7 @@ tgsi_fetch_gs_outputs(struct draw_geometry_shader *shader,
>        }
>     }
>     *p_output = output;
> -   shader->emitted_primitives += num_primitives;
> +   shader->stream[stream].emitted_primitives += num_primitives;
>  }
>  
>  /*#define DEBUG_INPUTS 1*/
> @@ -201,11 +204,12 @@ static void tgsi_gs_prepare(struct draw_geometry_shader *shader,
>     }
>  }
>  
> -static unsigned tgsi_gs_run(struct draw_geometry_shader *shader,
> -                            unsigned input_primitives)
> +static void tgsi_gs_run(struct draw_geometry_shader *shader,
> +                            unsigned input_primitives,
> +                            unsigned *out_prims)
whitespace?

>  {
>     struct tgsi_exec_machine *machine = shader->machine;
> -
> +   int i;
>     tgsi_set_exec_mask(machine,
>                        1,
>                        input_primitives > 1,
> @@ -215,8 +219,30 @@ static unsigned tgsi_gs_run(struct draw_geometry_shader *shader,
>     /* run interpreter */
>     tgsi_exec_machine_run(machine);
>  
> -   return
> -      machine->Temps[TGSI_EXEC_TEMP_PRIMITIVE_I].xyzw[TGSI_EXEC_TEMP_PRIMITIVE_C].u[0];
> +   for (i = 0; i < 4; i++) {
> +      int prim_i;
> +      int prim_c;
> +      switch (i) {
> +      case 0:
> +         prim_i = TGSI_EXEC_TEMP_PRIMITIVE_I;
> +         prim_c = TGSI_EXEC_TEMP_PRIMITIVE_C;
> +         break;
> +      case 1:
> +         prim_i = TGSI_EXEC_TEMP_PRIMITIVE_S1_I;
> +         prim_c = TGSI_EXEC_TEMP_PRIMITIVE_S1_C;
> +         break;
> +      case 2:
> +         prim_i = TGSI_EXEC_TEMP_PRIMITIVE_S2_I;
> +         prim_c = TGSI_EXEC_TEMP_PRIMITIVE_S2_C;
> +         break;
> +      case 3:
> +         prim_i = TGSI_EXEC_TEMP_PRIMITIVE_S3_I;
> +         prim_c = TGSI_EXEC_TEMP_PRIMITIVE_S3_C;
> +         break;
> +      };
> +
> +      out_prims[i] = machine->Temps[prim_i].xyzw[prim_c].u[0];
> +   }
>  }
>  
>  #ifdef HAVE_LLVM
> @@ -293,6 +319,7 @@ llvm_fetch_gs_input(struct draw_geometry_shader *shader,
>  
>  static void
>  llvm_fetch_gs_outputs(struct draw_geometry_shader *shader,
> +                      unsigned stream,
>                        unsigned num_primitives,
>                        float (**p_output)[4])
>  {
> @@ -313,7 +340,7 @@ llvm_fetch_gs_outputs(struct draw_geometry_shader *shader,
>        total_verts += shader->llvm_emitted_vertices[i];
>     }
>  
> -   output_ptr += shader->emitted_vertices * shader->vertex_size;
> +   output_ptr += shader->stream[0].emitted_vertices * shader->vertex_size;
>     for (i = 0; i < shader->vector_length - 1; ++i) {
>        int current_verts = shader->llvm_emitted_vertices[i];
>        int next_verts = shader->llvm_emitted_vertices[i + 1];
> @@ -360,14 +387,14 @@ llvm_fetch_gs_outputs(struct draw_geometry_shader *shader,
>        for (j = 0; j < num_prims; ++j) {
>           int prim_length =
>              shader->llvm_prim_lengths[j][i];
> -         shader->primitive_lengths[shader->emitted_primitives + prim_idx] =
> +         shader->stream[0].primitive_lengths[shader->stream[0].emitted_primitives + prim_idx] =
>              prim_length;
>           ++prim_idx;
>        }
>     }
>  
> -   shader->emitted_primitives += total_prims;
> -   shader->emitted_vertices += total_verts;
> +   shader->stream[0].emitted_primitives += total_prims;
> +   shader->stream[0].emitted_vertices += total_verts;
>  }
>  
>  static void
> @@ -377,14 +404,14 @@ llvm_gs_prepare(struct draw_geometry_shader *shader,
>  {
>  }
>  
> -static unsigned
> +static void
>  llvm_gs_run(struct draw_geometry_shader *shader,
> -            unsigned input_primitives)
> +            unsigned input_primitives, unsigned *out_prims)
>  {
>     unsigned ret;
>     char *input = (char*)shader->gs_output;
>  
> -   input += (shader->emitted_vertices * shader->vertex_size);
> +   input += (shader->stream[0].emitted_vertices * shader->vertex_size);
>  
>     ret = shader->current_variant->jit_func(
>        shader->jit_context, shader->gs_input->data,
> @@ -393,15 +420,15 @@ llvm_gs_run(struct draw_geometry_shader *shader,
>        shader->draw->instance_id,
>        shader->llvm_prim_ids);
>  
> -   return ret;
> +   *out_prims = ret;
>  }
>  
>  #endif
>  
>  static void gs_flush(struct draw_geometry_shader *shader)
>  {
> -   unsigned out_prim_count;
> -
> +   unsigned out_prim_count[TGSI_MAX_VERTEX_STREAMS];
> +   unsigned i;
>     unsigned input_primitives = shader->fetched_prim_count;
>  
>     if (shader->draw->collect_statistics) {
> @@ -411,14 +438,19 @@ static void gs_flush(struct draw_geometry_shader *shader)
>     debug_assert(input_primitives > 0 &&
>                  input_primitives <= 4);
>  
> -   out_prim_count = shader->run(shader, input_primitives);
> -   shader->fetch_outputs(shader, out_prim_count,
> -                         &shader->tmp_output);
> +   shader->run(shader, input_primitives, out_prim_count);
> +   for (i = 0; i < shader->num_vertex_streams; i++) {
> +      shader->fetch_outputs(shader, i, out_prim_count[i],
> +                            &shader->stream[i].tmp_output);
> +   }
>  
>  #if 0
> -   debug_printf("PRIM emitted prims = %d (verts=%d), cur prim count = %d\n",
> -                shader->emitted_primitives, shader->emitted_vertices,
> -                out_prim_count);
> +   for (i = 0; i < shader->num_vertex_streams; i++) {
> +      debug_printf("stream %d: PRIM emitted prims = %d (verts=%d), cur prim count = %d\n",
> +                   i,
> +                   shader->stream[i].emitted_primitives, shader->stream[i].emitted_vertices,
> +                   out_prim_count[i]);
> +   }
>  #endif
>  
>     shader->fetched_prim_count = 0;
> @@ -562,16 +594,19 @@ int draw_geometry_shader_run(struct draw_geometry_shader *shader,
>     unsigned total_verts_per_buffer = shader->primitive_boundary *
>        num_in_primitives;
>     unsigned invocation;
> +   int i;
>     //Assume at least one primitive
>     max_out_prims = MAX2(max_out_prims, 1);
>  
> -
> -   output_verts->vertex_size = vertex_size;
> -   output_verts->stride = output_verts->vertex_size;
> -   output_verts->verts =
> -      (struct vertex_header *)MALLOC(output_verts->vertex_size *
> -                                     total_verts_per_buffer * shader->num_invocations);
> -   debug_assert(output_verts->verts);
> +   for (i = 0; i < shader->num_vertex_streams; i++) {
> +      /* write all the vertex data into all the streams */
> +      output_verts[i].vertex_size = vertex_size;
> +      output_verts[i].stride = output_verts[i].vertex_size;
> +      output_verts[i].verts =
> +         (struct vertex_header *)MALLOC(output_verts[i].vertex_size *
> +                                        total_verts_per_buffer * shader->num_invocations);
> +      debug_assert(output_verts[i].verts);
> +   }
>  
>  #if 0
>     debug_printf("%s count = %d (in prims # = %d)\n",
> @@ -589,21 +624,22 @@ int draw_geometry_shader_run(struct draw_geometry_shader *shader,
>                  total_verts_per_buffer);
>  #endif
>  
> -   shader->emitted_vertices = 0;
> -   shader->emitted_primitives = 0;
> +   for (i = 0; i < shader->num_vertex_streams; i++) {
> +      shader->stream[i].emitted_vertices = 0;
> +      shader->stream[i].emitted_primitives = 0;
> +      FREE(shader->stream[i].primitive_lengths);
> +      shader->stream[i].primitive_lengths = MALLOC(max_out_prims * sizeof(unsigned) * shader->num_invocations);
linelength

> +      shader->stream[i].tmp_output = (float (*)[4])output_verts[i].verts->data;
> +   }
>     shader->vertex_size = vertex_size;
> -   shader->tmp_output = (float (*)[4])output_verts->verts->data;
>     shader->fetched_prim_count = 0;
>     shader->input_vertex_stride = input_stride;
>     shader->input = input;
>     shader->input_info = input_info;
> -   FREE(shader->primitive_lengths);
> -   shader->primitive_lengths = MALLOC(max_out_prims * sizeof(unsigned) * shader->num_invocations);
> -
>  
>  #ifdef HAVE_LLVM
>     if (shader->draw->llvm) {
> -      shader->gs_output = output_verts->verts;
> +      shader->gs_output = output_verts[0].verts;
>        if (max_out_prims > shader->max_out_prims) {
>           unsigned i;
>           if (shader->llvm_prim_lengths) {
> @@ -651,32 +687,34 @@ int draw_geometry_shader_run(struct draw_geometry_shader *shader,
>  
>     /* Update prim_info:
>      */
> -   output_prims->linear = TRUE;
> -   output_prims->elts = NULL;
> -   output_prims->start = 0;
> -   output_prims->count = shader->emitted_vertices;
> -   output_prims->prim = shader->output_primitive;
> -   output_prims->flags = 0x0;
> -   output_prims->primitive_lengths = shader->primitive_lengths;
> -   output_prims->primitive_count = shader->emitted_primitives;
> -   output_verts->count = shader->emitted_vertices;
> -
> -   if (shader->draw->collect_statistics) {
> -      unsigned i;
> -      for (i = 0; i < shader->emitted_primitives; ++i) {
> -         shader->draw->statistics.gs_primitives +=
> -            u_decomposed_prims_for_vertices(shader->output_primitive,
> -                                            shader->primitive_lengths[i]);
> +   for (i = 0; i < shader->num_vertex_streams; i++) {
> +      output_prims[i].linear = TRUE;
> +      output_prims[i].elts = NULL;
> +      output_prims[i].start = 0;
> +      output_prims[i].count = shader->stream[i].emitted_vertices;
> +      output_prims[i].prim = shader->output_primitive;
> +      output_prims[i].flags = 0x0;
> +      output_prims[i].primitive_lengths = shader->stream[i].primitive_lengths;
> +      output_prims[i].primitive_count = shader->stream[i].emitted_primitives;
> +      output_verts[i].count = shader->stream[i].emitted_vertices;
> +
> +      if (shader->draw->collect_statistics) {
> +         unsigned i;
> +         for (i = 0; i < shader->stream[i].emitted_primitives; ++i) {
> +            shader->draw->statistics.gs_primitives +=
> +               u_decomposed_prims_for_vertices(shader->output_primitive,
> +                                               shader->stream[i].primitive_lengths[i]);
> +         }
>        }
>     }
>  
>  #if 0
> -   debug_printf("GS finished, prims = %d, verts = %d\n",
> -                output_prims->primitive_count,
> -                output_verts->count);
> +   debug_printf("GS finished\n");
> +   for (i = 0; i < 4; i++)
> +      debug_printf("stream %d: prims = %d verts = %d\n", i, output_prims[i].primitive_count, output_verts[i].count);
must be a new record linelength

>  #endif
>  
> -   return shader->emitted_vertices;
> +   return 0;
>  }
>  
>  void draw_geometry_shader_prepare(struct draw_geometry_shader *shader,
> @@ -695,16 +733,23 @@ boolean
>  draw_gs_init( struct draw_context *draw )
>  {
>     if (!draw->llvm) {
> +      int i;
>        draw->gs.tgsi.machine = tgsi_exec_machine_create();
>        if (!draw->gs.tgsi.machine)
>           return FALSE;
>  
> -      draw->gs.tgsi.machine->Primitives = align_malloc(
> -         MAX_PRIMITIVES * sizeof(struct tgsi_exec_vector), 16);
> -      if (!draw->gs.tgsi.machine->Primitives)
> -         return FALSE;
> -      memset(draw->gs.tgsi.machine->Primitives, 0,
> -             MAX_PRIMITIVES * sizeof(struct tgsi_exec_vector));
> +      for (i = 0; i < TGSI_MAX_VERTEX_STREAMS; i++) {
> +         draw->gs.tgsi.machine->Primitives[i] = align_malloc(
> +            MAX_PRIMITIVES * sizeof(struct tgsi_exec_vector), 16);
> +         draw->gs.tgsi.machine->PrimitiveOffsets[i] = align_malloc(
> +            MAX_PRIMITIVES * sizeof(struct tgsi_exec_vector), 16);
> +         if (!draw->gs.tgsi.machine->Primitives[i] || !draw->gs.tgsi.machine->PrimitiveOffsets)
> +            return FALSE;
> +         memset(draw->gs.tgsi.machine->Primitives[i], 0,
> +                MAX_PRIMITIVES * sizeof(struct tgsi_exec_vector));
> +         memset(draw->gs.tgsi.machine->PrimitiveOffsets[i], 0,
> +                MAX_PRIMITIVES * sizeof(struct tgsi_exec_vector));
> +      }
>     }
>  
>     return TRUE;
> @@ -712,8 +757,10 @@ draw_gs_init( struct draw_context *draw )
>  
>  void draw_gs_destroy( struct draw_context *draw )
>  {
> +   int i;
>     if (draw->gs.tgsi.machine) {
> -      align_free(draw->gs.tgsi.machine->Primitives);
> +      for (i = 0; i < TGSI_MAX_VERTEX_STREAMS; i++)
> +         align_free(draw->gs.tgsi.machine->Primitives[i]);
>        tgsi_exec_machine_destroy(draw->gs.tgsi.machine);
>     }
>  }
> @@ -817,6 +864,12 @@ draw_create_geometry_shader(struct draw_context *draw,
>  
>     gs->machine = draw->gs.tgsi.machine;
>  
> +   gs->num_vertex_streams = 1;
> +   for (i = 0; i < gs->state.stream_output.num_outputs; i++) {
> +      if (gs->state.stream_output.output[i].stream >= gs->num_vertex_streams)
> +         gs->num_vertex_streams = gs->state.stream_output.output[i].stream + 1;
> +   }
> +
>  #ifdef HAVE_LLVM
>     if (use_llvm) {
>        int vector_size = gs->vector_length * sizeof(float);
> @@ -872,6 +925,7 @@ void draw_bind_geometry_shader(struct draw_context *draw,
>  void draw_delete_geometry_shader(struct draw_context *draw,
>                                   struct draw_geometry_shader *dgs)
>  {
> +   int i;
>     if (!dgs) {
>        return;
>     }
> @@ -904,7 +958,8 @@ void draw_delete_geometry_shader(struct draw_context *draw,
>     }
>  #endif
>  
> -   FREE(dgs->primitive_lengths);
> +   for (i = 0; i < TGSI_MAX_VERTEX_STREAMS; i++)
> +      FREE(dgs->stream[i].primitive_lengths);
>     FREE((void*) dgs->state.tokens);
>     FREE(dgs);
>  }
> diff --git a/src/gallium/auxiliary/draw/draw_gs.h b/src/gallium/auxiliary/draw/draw_gs.h
> index 663ba84..c79c6d7 100644
> --- a/src/gallium/auxiliary/draw/draw_gs.h
> +++ b/src/gallium/auxiliary/draw/draw_gs.h
> @@ -56,6 +56,13 @@ struct draw_gs_inputs {
>  /**
>   * Private version of the compiled geometry shader
>   */
> +struct draw_vertex_stream {
> +   unsigned *primitive_lengths;
> +   unsigned emitted_vertices;
> +   unsigned emitted_primitives;
> +   float (*tmp_output)[4];
> +};
> +
>  struct draw_geometry_shader {
>     struct draw_context *draw;
>  
> @@ -74,14 +81,11 @@ struct draw_geometry_shader {
>     unsigned primitive_boundary;
>     unsigned input_primitive;
>     unsigned output_primitive;
> -
> -   unsigned *primitive_lengths;
> -   unsigned emitted_vertices;
> -   unsigned emitted_primitives;
> -
> -   float (*tmp_output)[4];
>     unsigned vertex_size;
>  
> +   struct draw_vertex_stream stream[TGSI_MAX_VERTEX_STREAMS];
> +   unsigned num_vertex_streams;
> +
>     unsigned in_prim_idx;
>     unsigned input_vertex_stride;
>     unsigned fetched_prim_count;
> @@ -109,14 +113,15 @@ struct draw_geometry_shader {
>                          unsigned num_vertices,
>                          unsigned prim_idx);
>     void (*fetch_outputs)(struct draw_geometry_shader *shader,
> +                         unsigned vertex_stream,
>                           unsigned num_primitives,
>                           float (**p_output)[4]);
>  
>     void     (*prepare)(struct draw_geometry_shader *shader,
>                         const void *constants[PIPE_MAX_CONSTANT_BUFFERS], 
>                         const unsigned constants_size[PIPE_MAX_CONSTANT_BUFFERS]);
> -   unsigned (*run)(struct draw_geometry_shader *shader,
> -                   unsigned input_primitives);
> +   void (*run)(struct draw_geometry_shader *shader,
> +               unsigned input_primitives, unsigned *out_prims);
>  };
>  
>  void draw_geometry_shader_new_instance(struct draw_geometry_shader *gs);
> diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h
> index cb9a1b3..0052752 100644
> --- a/src/gallium/auxiliary/draw/draw_pt.h
> +++ b/src/gallium/auxiliary/draw/draw_pt.h
> @@ -188,6 +188,7 @@ struct pt_so_emit;
>  void draw_pt_so_emit_prepare(struct pt_so_emit *emit, boolean use_pre_clip_pos);
>  
>  void draw_pt_so_emit( struct pt_so_emit *emit,
> +                      int num_vertex_streams,
>                        const struct draw_vertex_info *vert_info,
>                        const struct draw_prim_info *prim_info );
>  
> diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
> index 5af845f..5c74455 100644
> --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
> +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
> @@ -235,16 +235,17 @@ fetch_pipeline_generic(struct draw_pt_middle_end *middle,
>     struct draw_context *draw = fpme->draw;
>     struct draw_vertex_shader *vshader = draw->vs.vertex_shader;
>     struct draw_geometry_shader *gshader = draw->gs.geometry_shader;
> -   struct draw_prim_info gs_prim_info;
> +   struct draw_prim_info gs_prim_info[TGSI_MAX_VERTEX_STREAMS];
>     struct draw_vertex_info fetched_vert_info;
>     struct draw_vertex_info vs_vert_info;
> -   struct draw_vertex_info gs_vert_info;
> +   struct draw_vertex_info gs_vert_info[TGSI_MAX_VERTEX_STREAMS];
>     struct draw_vertex_info *vert_info;
>     struct draw_prim_info ia_prim_info;
>     struct draw_vertex_info ia_vert_info;
>     const struct draw_prim_info *prim_info = in_prim_info;
>     boolean free_prim_info = FALSE;
>     unsigned opt = fpme->opt;
> +   int num_vertex_streams = 1;
>  
>     fetched_vert_info.count = fetch_info->count;
>     fetched_vert_info.vertex_size = fpme->vertex_size;
> @@ -293,12 +294,13 @@ fetch_pipeline_generic(struct draw_pt_middle_end *middle,
>                                 vert_info,
>                                 prim_info,
>                                 &vshader->info,
> -                               &gs_vert_info,
> -                               &gs_prim_info);
> +                               gs_vert_info,
> +                               gs_prim_info);
>  
>        FREE(vert_info->verts);
> -      vert_info = &gs_vert_info;
> -      prim_info = &gs_prim_info;
> +      vert_info = &gs_vert_info[0];
> +      prim_info = &gs_prim_info[0];
> +      num_vertex_streams = TGSI_MAX_VERTEX_STREAMS;
>     } else {
>        if (draw_prim_assembler_is_required(draw, prim_info, vert_info)) {
>           draw_prim_assembler_run(draw, prim_info, vert_info,
> @@ -328,7 +330,7 @@ fetch_pipeline_generic(struct draw_pt_middle_end *middle,
>      * XXX: Stream output surely needs to respect the prim_info->elt
>      *      lists.
>      */
> -   draw_pt_so_emit( fpme->so_emit, vert_info, prim_info );
> +   draw_pt_so_emit( fpme->so_emit, num_vertex_streams, vert_info, prim_info );
>  
>     draw_stats_clipper_primitives(draw, prim_info);
>  
> diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
> index d17d695..31097e0 100644
> --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
> +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
> @@ -344,9 +344,9 @@ llvm_pipeline_generic(struct draw_pt_middle_end *middle,
>     struct llvm_middle_end *fpme = llvm_middle_end(middle);
>     struct draw_context *draw = fpme->draw;
>     struct draw_geometry_shader *gshader = draw->gs.geometry_shader;
> -   struct draw_prim_info gs_prim_info;
> +   struct draw_prim_info gs_prim_info[TGSI_MAX_VERTEX_STREAMS];
>     struct draw_vertex_info llvm_vert_info;
> -   struct draw_vertex_info gs_vert_info;
> +   struct draw_vertex_info gs_vert_info[TGSI_MAX_VERTEX_STREAMS];
>     struct draw_vertex_info *vert_info;
>     struct draw_prim_info ia_prim_info;
>     struct draw_vertex_info ia_vert_info;
> @@ -410,12 +410,12 @@ llvm_pipeline_generic(struct draw_pt_middle_end *middle,
>                                 vert_info,
>                                 prim_info,
>                                 &vshader->info,
> -                               &gs_vert_info,
> -                               &gs_prim_info);
> +                               gs_vert_info,
> +                               gs_prim_info);
>  
>        FREE(vert_info->verts);
> -      vert_info = &gs_vert_info;
> -      prim_info = &gs_prim_info;
> +      vert_info = &gs_vert_info[0];
> +      prim_info = &gs_prim_info[0];
>     } else {
>        if (draw_prim_assembler_is_required(draw, prim_info, vert_info)) {
>           draw_prim_assembler_run(draw, prim_info, vert_info,
> @@ -440,7 +440,7 @@ llvm_pipeline_generic(struct draw_pt_middle_end *middle,
>     }
>  
>     /* stream output needs to be done before clipping */
> -   draw_pt_so_emit( fpme->so_emit, vert_info, prim_info );
> +   draw_pt_so_emit( fpme->so_emit, 1, vert_info, prim_info );
>  
>     draw_stats_clipper_primitives(draw, prim_info);
>  
> diff --git a/src/gallium/auxiliary/draw/draw_pt_so_emit.c b/src/gallium/auxiliary/draw/draw_pt_so_emit.c
> index 581e2d6..08121a3 100644
> --- a/src/gallium/auxiliary/draw/draw_pt_so_emit.c
> +++ b/src/gallium/auxiliary/draw/draw_pt_so_emit.c
> @@ -49,6 +49,7 @@ struct pt_so_emit {
>     int pos_idx;
>     unsigned emitted_primitives;
>     unsigned generated_primitives;
> +   unsigned stream;
>  };
>  
>  static const struct pipe_stream_output_info *
> @@ -144,6 +145,9 @@ static void so_emit_prim(struct pt_so_emit *so,
>           int ob = state->output[slot].output_buffer;
>           unsigned dst_offset = state->output[slot].dst_offset * sizeof(float);
>           unsigned write_size = num_comps * sizeof(float);
> +
> +         if (state->output[slot].stream != so->stream)
> +            continue;
>           /* If a buffer is missing then that's equivalent to
>            * an overflow */
>           if (!draw->so.targets[ob]) {
> @@ -175,7 +179,10 @@ static void so_emit_prim(struct pt_so_emit *so,
>           unsigned idx = state->output[slot].register_index;
>           unsigned start_comp = state->output[slot].start_component;
>           unsigned num_comps = state->output[slot].num_components;
> +         unsigned stream = state->output[slot].stream;
>  
> +         if (stream != so->stream)
> +            continue;
>           ob = state->output[slot].output_buffer;
>           buffer_written[ob] = TRUE;
>  
> @@ -184,7 +191,7 @@ static void so_emit_prim(struct pt_so_emit *so,
>                              draw->so.targets[ob]->internal_offset) +
>              state->output[slot].dst_offset;
>           
> -         if (idx == so->pos_idx && pcp_ptr)
> +         if (idx == so->pos_idx && pcp_ptr && so->stream == 0)
>              memcpy(buffer, &pre_clip_pos[start_comp],
>                     num_comps * sizeof(float));
>           else
> @@ -193,8 +200,8 @@ static void so_emit_prim(struct pt_so_emit *so,
>  #if 0
>           {
>              int j;
> -            debug_printf("VERT[%d], offset = %d, slot[%d] sc = %d, num_c = %d, idx = %d = [",
> -                         i,
> +            debug_printf("VERT[%d], stream = %d, offset = %d, slot[%d] sc = %d, num_c = %d, idx = %d = [",
> +                         i, stream,
>                           draw->so.targets[ob]->internal_offset,
>                           slot, start_comp, num_comps, idx);
>              for (j = 0; j < num_comps; ++j) {
> @@ -258,12 +265,13 @@ static void so_tri(struct pt_so_emit *so, int i0, int i1, int i2)
>  
>  
>  void draw_pt_so_emit( struct pt_so_emit *emit,
> +                      int num_vertex_streams,
>                        const struct draw_vertex_info *input_verts,
>                        const struct draw_prim_info *input_prims )
>  {
>     struct draw_context *draw = emit->draw;
>     struct vbuf_render *render = draw->render;
> -   unsigned start, i;
> +   unsigned start, i, stream;
>  
>     if (!emit->has_so)
>        return;
> @@ -271,34 +279,36 @@ void draw_pt_so_emit( struct pt_so_emit *emit,
>     if (!draw->so.num_targets)
>        return;
>  
> -   emit->emitted_primitives = 0;
> -   emit->generated_primitives = 0;
> -   emit->input_vertex_stride = input_verts->stride;
> -   if (emit->use_pre_clip_pos)
> -      emit->pre_clip_pos = input_verts->verts->pre_clip_pos;
> -
> -   emit->inputs = (const float (*)[4])input_verts->verts->data;
> -
>     /* XXX: need to flush to get prim_vbuf.c to release its allocation??*/
>     draw_do_flush( draw, DRAW_FLUSH_BACKEND );
>  
> -   for (start = i = 0; i < input_prims->primitive_count;
> -        start += input_prims->primitive_lengths[i], i++)
> -   {
> -      unsigned count = input_prims->primitive_lengths[i];
> -
> -      if (input_prims->linear) {
> -         so_run_linear(emit, input_prims, input_verts,
> -                       start, count);
> -      } else {
> -         so_run_elts(emit, input_prims, input_verts,
> -                     start, count);
> +   for (stream = 0; stream < num_vertex_streams; stream++) {
> +      emit->emitted_primitives = 0;
> +      emit->generated_primitives = 0;
> +      if (emit->use_pre_clip_pos)
> +         emit->pre_clip_pos = input_verts[stream].verts->pre_clip_pos;
> +
> +      emit->input_vertex_stride = input_verts[stream].stride;
> +      emit->inputs = (const float (*)[4])input_verts[stream].verts->data;
> +      emit->stream = stream;
> +      for (start = i = 0; i < input_prims[stream].primitive_count;
> +           start += input_prims[stream].primitive_lengths[i], i++)
> +      {
> +         unsigned count = input_prims[stream].primitive_lengths[i];
> +
> +         if (input_prims->linear) {
> +            so_run_linear(emit, &input_prims[stream], &input_verts[stream],
> +                          start, count);
> +         } else {
> +            so_run_elts(emit, &input_prims[stream], &input_verts[stream],
> +                        start, count);
> +         }
>        }
> +      render->set_stream_output_info(render,
> +                                     stream,
> +                                     emit->emitted_primitives,
> +                                     emit->generated_primitives);
>     }
> -
> -   render->set_stream_output_info(render, 0,
> -                                  emit->emitted_primitives,
> -                                  emit->generated_primitives);
>  }
>  
>  
> 

Overall, this doesn't look particularly elegant, but I can't think of
something better, it's probably inherent to how multiple streams work.

So, other than the things I mentioned, for the series:
Reviewed-by: Roland Scheidegger <sroland at vmware.com>



More information about the mesa-dev mailing list