[Mesa-dev] [PATCH] nv50: add edgeflag support when using inline vertex data submit

Sun Apr 3 14:08:07 UTC 2016

Ping? (~4 months later). Ilia? :)

On 11/23/2015 09:36 PM, Samuel Pitoiset wrote:
> Fixes the following piglit tests:
>   - gl-1.0-edgeflag
>   - gl-1.0-edgeflag-quads
>   - gl-2.0-edgeflag
>   - gl-2.0-edgeflag-immediate
>   - point-vertex-id divisor
>   - point-vertex-id gl_InstanceID
>   - point-vertex-id gl_InstanceID divisor
>
> I still need to figure out why point-vertex-id fails with gl_vertexID.
>
> Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
> ---
>   src/gallium/drivers/nouveau/nv50/nv50_program.c |   5 +-
>   src/gallium/drivers/nouveau/nv50/nv50_push.c    | 246 ++++++++++++++++++++----
>   src/gallium/drivers/nouveau/nv50/nv50_surface.c |   2 +-
>   src/gallium/drivers/nouveau/nv50/nv50_vbo.c     |   3 +-
>   4 files changed, 210 insertions(+), 46 deletions(-)
>
> diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.c b/src/gallium/drivers/nouveau/nv50/nv50_program.c
> index a4b8ddf..b904581 100644
> --- a/src/gallium/drivers/nouveau/nv50/nv50_program.c
> +++ b/src/gallium/drivers/nouveau/nv50/nv50_program.c
> @@ -97,9 +97,6 @@ nv50_vertprog_assign_slots(struct nv50_ir_prog_info *info)
>         case TGSI_SEMANTIC_CLIPDIST:
>            prog->vp.clpd[info->out[i].si] = n;
>            break;
> -      case TGSI_SEMANTIC_EDGEFLAG:
> -         prog->vp.edgeflag = i;
> -         break;
>         case TGSI_SEMANTIC_BCOLOR:
>            prog->vp.bfc[info->out[i].si] = i;
>            break;
> @@ -349,7 +346,6 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset,
>
>      prog->vp.bfc[0] = 0xff;
>      prog->vp.bfc[1] = 0xff;
> -   prog->vp.edgeflag = 0xff;
>      prog->vp.clpd[0] = map_undef;
>      prog->vp.clpd[1] = map_undef;
>      prog->vp.psiz = map_undef;
> @@ -383,6 +379,7 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset,
>      prog->tls_space = info->bin.tlsSpace;
>
>      prog->vp.need_vertex_id = info->io.vertexId < PIPE_MAX_SHADER_INPUTS;
> +   prog->vp.edgeflag = info->io.edgeFlagIn;
>
>      if (prog->type == PIPE_SHADER_FRAGMENT) {
>         if (info->prop.fp.writesDepth) {
> diff --git a/src/gallium/drivers/nouveau/nv50/nv50_push.c b/src/gallium/drivers/nouveau/nv50/nv50_push.c
> index cbef95d..dae1e34 100644
> --- a/src/gallium/drivers/nouveau/nv50/nv50_push.c
> +++ b/src/gallium/drivers/nouveau/nv50/nv50_push.c
> @@ -15,9 +15,6 @@ struct push_context {
>
>      const void *idxbuf;
>
> -   float edgeflag;
> -   int edgeflag_attr;
> -
>      uint32_t vertex_words;
>      uint32_t packet_vertex_limit;
>
> @@ -31,8 +28,40 @@ struct push_context {
>      uint32_t prim;
>      uint32_t restart_index;
>      uint32_t instance_id;
> +
> +   struct {
> +      bool enabled;
> +      bool value;
> +      uint8_t width;
> +      unsigned stride;
> +      const uint8_t *data;
> +   } edgeflag;
>   };
>
> +static inline void
> +nv50_push_map_edgeflag(struct push_context *ctx, struct nv50_context *nv50,
> +                       int32_t index_bias)
> +{
> +
> +   unsigned attr = nv50->vertprog->vp.edgeflag;
> +   struct pipe_vertex_element *ve = &nv50->vertex->element[attr].pipe;
> +   struct pipe_vertex_buffer *vb = &nv50->vtxbuf[ve->vertex_buffer_index];
> +   struct nv04_resource *buf = nv04_resource(vb->buffer);
> +
> +   ctx->edgeflag.stride = vb->stride;
> +   ctx->edgeflag.width = util_format_get_blocksize(ve->src_format);
> +   if (buf) {
> +      unsigned offset = vb->buffer_offset + ve->src_offset;
> +      ctx->edgeflag.data =
> +         nouveau_resource_map_offset(&nv50->base, buf, offset, NOUVEAU_BO_RD);
> +   } else {
> +      ctx->edgeflag.data = (const uint8_t *)vb->user_buffer + ve->src_offset;
> +   }
> +
> +   if (index_bias)
> +      ctx->edgeflag.data += (intptr_t)index_bias * vb->stride;
> +}
> +
>   static inline unsigned
>   prim_restart_search_i08(uint8_t *elts, unsigned push, uint8_t index)
>   {
> @@ -63,6 +92,75 @@ prim_restart_search_i32(uint32_t *elts, unsigned push, uint32_t index)
>      return i;
>   }
>
> +static inline bool
> +ef_value_8(const struct push_context *ctx, uint32_t index)
> +{
> +   uint8_t *pf = (uint8_t *)&ctx->edgeflag.data[index * ctx->edgeflag.stride];
> +   return !!*pf;
> +}
> +
> +static inline bool
> +ef_value_32(const struct push_context *ctx, uint32_t index)
> +{
> +   uint32_t *pf = (uint32_t *)&ctx->edgeflag.data[index * ctx->edgeflag.stride];
> +   return !!*pf;
> +}
> +
> +static inline bool
> +ef_toggle(struct push_context *ctx)
> +{
> +   ctx->edgeflag.value = !ctx->edgeflag.value;
> +   return ctx->edgeflag.value;
> +}
> +
> +static inline unsigned
> +ef_toggle_search_i08(struct push_context *ctx, const uint8_t *elts, unsigned n)
> +{
> +   unsigned i;
> +   bool ef = ctx->edgeflag.value;
> +   if (ctx->edgeflag.width == 1)
> +      for (i = 0; i < n && ef_value_8(ctx, elts[i]) == ef; ++i);
> +   else
> +      for (i = 0; i < n && ef_value_32(ctx, elts[i]) == ef; ++i);
> +   return i;
> +}
> +
> +static inline unsigned
> +ef_toggle_search_i16(struct push_context *ctx, const uint16_t *elts, unsigned n)
> +{
> +   unsigned i;
> +   bool ef = ctx->edgeflag.value;
> +   if (ctx->edgeflag.width == 1)
> +      for (i = 0; i < n && ef_value_8(ctx, elts[i]) == ef; ++i);
> +   else
> +      for (i = 0; i < n && ef_value_32(ctx, elts[i]) == ef; ++i);
> +   return i;
> +}
> +
> +static inline unsigned
> +ef_toggle_search_i32(struct push_context *ctx, const uint32_t *elts, unsigned n)
> +{
> +   unsigned i;
> +   bool ef = ctx->edgeflag.value;
> +   if (ctx->edgeflag.width == 1)
> +      for (i = 0; i < n && ef_value_8(ctx, elts[i]) == ef; ++i);
> +   else
> +      for (i = 0; i < n && ef_value_32(ctx, elts[i]) == ef; ++i);
> +   return i;
> +}
> +
> +static inline unsigned
> +ef_toggle_search_seq(struct push_context *ctx, unsigned start, unsigned n)
> +{
> +   unsigned i;
> +   bool ef = ctx->edgeflag.value;
> +   if (ctx->edgeflag.width == 1)
> +      for (i = 0; i < n && ef_value_8(ctx, start++) == ef; ++i);
> +   else
> +      for (i = 0; i < n && ef_value_32(ctx, start++) == ef; ++i);
> +   return i;
> +}
> +
>   static void
>   emit_vertices_i08(struct push_context *ctx, unsigned start, unsigned count)
>   {
> @@ -70,29 +168,43 @@ emit_vertices_i08(struct push_context *ctx, unsigned start, unsigned count)
>
>      while (count) {
>         unsigned push = MIN2(count, ctx->packet_vertex_limit);
> +      bool need_restart_idx = false;
>         unsigned size, nr;
>
>         nr = push;
>         if (ctx->primitive_restart)
>            nr = prim_restart_search_i08(elts, push, ctx->restart_index);
> +      need_restart_idx = nr != push;
>
> -      size = ctx->vertex_words * nr;
> +      while (nr) {
> +         unsigned ne = nr;
>
> -      if (unlikely(ctx->need_vertex_id)) {
> -         BEGIN_NV04(ctx->push, NV84_3D(VERTEX_ID_BASE), 1);
> -         PUSH_DATA (ctx->push, *elts + ctx->index_bias);
> -      }
> +         if (unlikely(ctx->edgeflag.enabled))
> +             ne = ef_toggle_search_i08(ctx, elts, nr);
> +         size = ctx->vertex_words * ne;
>
> -      BEGIN_NI04(ctx->push, NV50_3D(VERTEX_DATA), size);
> +         if (unlikely(ctx->need_vertex_id)) {
> +            BEGIN_NV04(ctx->push, NV84_3D(VERTEX_ID_BASE), 1);
> +            PUSH_DATA (ctx->push, *elts + ctx->index_bias);
> +         }
>
> -      ctx->translate->run_elts8(ctx->translate, elts, nr, 0, ctx->instance_id,
> -                                ctx->push->cur);
> +         BEGIN_NI04(ctx->push, NV50_3D(VERTEX_DATA), size);
>
> -      ctx->push->cur += size;
> -      count -= nr;
> -      elts += nr;
> +         ctx->translate->run_elts8(ctx->translate, elts, ne, 0, ctx->instance_id,
> +                                   ctx->push->cur);
> +         ctx->push->cur += size;
> +
> +         if (unlikely(ne != nr)) {
> +             BEGIN_NV04(ctx->push, NV50_3D(EDGEFLAG), 1);
> +             PUSH_DATA (ctx->push, ef_toggle(ctx));
> +         }
> +
> +         count -= ne;
> +         elts += ne;
> +         nr -= ne;
> +      }
>
> -      if (nr != push) {
> +      if (need_restart_idx) {
>            count--;
>            elts++;
>            BEGIN_NV04(ctx->push, NV50_3D(VB_ELEMENT_U32), 1);
> @@ -108,29 +220,44 @@ emit_vertices_i16(struct push_context *ctx, unsigned start, unsigned count)
>
>      while (count) {
>         unsigned push = MIN2(count, ctx->packet_vertex_limit);
> +      bool need_restart_idx = false;
>         unsigned size, nr;
>
>         nr = push;
>         if (ctx->primitive_restart)
>            nr = prim_restart_search_i16(elts, push, ctx->restart_index);
> +      need_restart_idx = nr != push;
>
> -      size = ctx->vertex_words * nr;
> +      while (nr) {
> +         unsigned ne= nr;
>
> -      if (unlikely(ctx->need_vertex_id)) {
> -         BEGIN_NV04(ctx->push, NV84_3D(VERTEX_ID_BASE), 1);
> -         PUSH_DATA (ctx->push, *elts + ctx->index_bias);
> -      }
> +         if (unlikely(ctx->edgeflag.enabled))
> +             ne = ef_toggle_search_i16(ctx, elts, nr);
> +         size = ctx->vertex_words * ne;
>
> -      BEGIN_NI04(ctx->push, NV50_3D(VERTEX_DATA), size);
>
> -      ctx->translate->run_elts16(ctx->translate, elts, nr, 0, ctx->instance_id,
> -                                 ctx->push->cur);
> +         if (unlikely(ctx->need_vertex_id)) {
> +            BEGIN_NV04(ctx->push, NV84_3D(VERTEX_ID_BASE), 1);
> +            PUSH_DATA (ctx->push, *elts + ctx->index_bias);
> +         }
> +
> +         BEGIN_NI04(ctx->push, NV50_3D(VERTEX_DATA), size);
>
> -      ctx->push->cur += size;
> -      count -= nr;
> -      elts += nr;
> +         ctx->translate->run_elts16(ctx->translate, elts, ne, 0, ctx->instance_id,
> +                                    ctx->push->cur);
> +         ctx->push->cur += size;
>
> -      if (nr != push) {
> +         if (unlikely(ne != nr)) {
> +             BEGIN_NV04(ctx->push, NV50_3D(EDGEFLAG), 1);
> +             PUSH_DATA (ctx->push, ef_toggle(ctx));
> +         }
> +
> +         count -= ne;
> +         elts += ne;
> +         nr -= ne;
> +      }
> +
> +      if (need_restart_idx) {
>            count--;
>            elts++;
>            BEGIN_NV04(ctx->push, NV50_3D(VB_ELEMENT_U32), 1);
> @@ -146,29 +273,43 @@ emit_vertices_i32(struct push_context *ctx, unsigned start, unsigned count)
>
>      while (count) {
>         unsigned push = MIN2(count, ctx->packet_vertex_limit);
> +      bool need_restart_idx = false;
>         unsigned size, nr;
>
>         nr = push;
>         if (ctx->primitive_restart)
>            nr = prim_restart_search_i32(elts, push, ctx->restart_index);
> +      need_restart_idx = nr != push;
>
> -      size = ctx->vertex_words * nr;
> +      while (nr) {
> +         unsigned ne = nr;
>
> -      if (unlikely(ctx->need_vertex_id)) {
> -         BEGIN_NV04(ctx->push, NV84_3D(VERTEX_ID_BASE), 1);
> -         PUSH_DATA (ctx->push, *elts + ctx->index_bias);
> -      }
> +         if (unlikely(ctx->edgeflag.enabled))
> +             ne = ef_toggle_search_i32(ctx, elts, nr);
> +         size = ctx->vertex_words * ne;
>
> -      BEGIN_NI04(ctx->push, NV50_3D(VERTEX_DATA), size);
> +         if (unlikely(ctx->need_vertex_id)) {
> +            BEGIN_NV04(ctx->push, NV84_3D(VERTEX_ID_BASE), 1);
> +            PUSH_DATA (ctx->push, *elts + ctx->index_bias);
> +         }
>
> -      ctx->translate->run_elts(ctx->translate, elts, nr, 0, ctx->instance_id,
> -                               ctx->push->cur);
> +         BEGIN_NI04(ctx->push, NV50_3D(VERTEX_DATA), size);
>
> -      ctx->push->cur += size;
> -      count -= nr;
> -      elts += nr;
> +         ctx->translate->run_elts(ctx->translate, elts, ne, 0, ctx->instance_id,
> +                                  ctx->push->cur);
> +         ctx->push->cur += size;
> +
> +         if (unlikely(ne != nr)) {
> +             BEGIN_NV04(ctx->push, NV50_3D(EDGEFLAG), 1);
> +             PUSH_DATA (ctx->push, ef_toggle(ctx));
> +         }
> +
> +         count -= ne;
> +         elts += ne;
> +         nr -= ne;
> +      }
>
> -      if (nr != push) {
> +      if (need_restart_idx) {
>            count--;
>            elts++;
>            BEGIN_NV04(ctx->push, NV50_3D(VB_ELEMENT_U32), 1);
> @@ -184,7 +325,7 @@ emit_vertices_seq(struct push_context *ctx, unsigned start, unsigned count)
>
>      while (count) {
>         unsigned push = MIN2(count, ctx->packet_vertex_limit);
> -      unsigned size = ctx->vertex_words * push;
> +      unsigned size;
>
>         if (unlikely(ctx->need_vertex_id)) {
>            /* For non-indexed draws, gl_VertexID goes up after each vertex. */
> @@ -192,11 +333,22 @@ emit_vertices_seq(struct push_context *ctx, unsigned start, unsigned count)
>            PUSH_DATA (ctx->push, elts++);
>         }
>
> +      if (unlikely(ctx->edgeflag.enabled))
> +         push = ef_toggle_search_seq(ctx, start, push);
> +      size = ctx->vertex_words * push;
> +
>         BEGIN_NI04(ctx->push, NV50_3D(VERTEX_DATA), size);
>
>         ctx->translate->run(ctx->translate, start, push, 0, ctx->instance_id,
>                             ctx->push->cur);
>         ctx->push->cur += size;
> +
> +      if (unlikely(push != count)) {
> +         PUSH_SPACE(ctx->push, 2);
> +         BEGIN_NV04(ctx->push, NV50_3D(EDGEFLAG), 1);
> +         PUSH_DATA (ctx->push, ef_toggle(ctx));
> +      }
> +
>         count -= push;
>         start += push;
>      }
> @@ -253,6 +405,12 @@ nv50_push_vbo(struct nv50_context *nv50, const struct pipe_draw_info *info)
>         ctx.need_vertex_id ? 1 : nv50->vertex->packet_vertex_limit;
>      ctx.vertex_words = nv50->vertex->vertex_size;
>
> +   ctx.edgeflag.value = true;
> +   ctx.edgeflag.enabled = nv50->vertprog->vp.edgeflag < PIPE_MAX_ATTRIBS;
> +   ctx.edgeflag.data = NULL;
> +   ctx.edgeflag.stride = 0;
> +   ctx.edgeflag.width = 0;
> +
>      assert(nv50->num_vtxbufs <= PIPE_MAX_ATTRIBS);
>      for (i = 0; i < nv50->num_vtxbufs; ++i) {
>         const struct pipe_vertex_buffer *vb = &nv50->vtxbuf[i];
> @@ -270,6 +428,9 @@ nv50_push_vbo(struct nv50_context *nv50, const struct pipe_draw_info *info)
>         ctx.translate->set_buffer(ctx.translate, i, data, vb->stride, ~0);
>      }
>
> +   if (unlikely(ctx.edgeflag.enabled))
> +      nv50_push_map_edgeflag(&ctx, nv50, info->index_bias);
> +
>      if (info->indexed) {
>         if (nv50->idxbuf.buffer) {
>            ctx.idxbuf = nouveau_resource_map_offset(&nv50->base,
> @@ -347,4 +508,9 @@ nv50_push_vbo(struct nv50_context *nv50, const struct pipe_draw_info *info)
>         BEGIN_NV04(ctx.push, NV84_3D(VERTEX_ID_BASE), 1);
>         PUSH_DATA (ctx.push, nv50->state.index_bias);
>      }
> +
> +   if (unlikely(!ctx.edgeflag.value)) {
> +      BEGIN_NV04(ctx.push, NV50_3D(EDGEFLAG), 1);
> +      PUSH_DATA (ctx.push, 0);
> +   }
>   }
> diff --git a/src/gallium/drivers/nouveau/nv50/nv50_surface.c b/src/gallium/drivers/nouveau/nv50/nv50_surface.c
> index 8ba19d2..a854669 100644
> --- a/src/gallium/drivers/nouveau/nv50/nv50_surface.c
> +++ b/src/gallium/drivers/nouveau/nv50/nv50_surface.c
> @@ -779,7 +779,7 @@ nv50_blitter_make_vp(struct nv50_blitter *blit)
>      blit->vp.out[1].si = 0;
>      blit->vp.vp.attrs[0] = 0x73;
>      blit->vp.vp.psiz = 0x40;
> -   blit->vp.vp.edgeflag = 0x40;
> +   blit->vp.vp.edgeflag = PIPE_MAX_ATTRIBS;
>   }
>
>   void *
> diff --git a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
> index 85878d5..ad131d5 100644
> --- a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
> +++ b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
> @@ -302,7 +302,8 @@ nv50_vertex_arrays_validate(struct nv50_context *nv50)
>      unsigned i;
>      const unsigned n = MAX2(vertex->num_elements, nv50->state.num_vtxelts);
>
> -   if (unlikely(vertex->need_conversion))
> +   if (unlikely(vertex->need_conversion) ||
> +       unlikely(nv50->vertprog->vp.edgeflag < PIPE_MAX_ATTRIBS))
>         nv50->vbo_fifo = ~0;
>      else
>      if (nv50->vbo_user & ~nv50->vbo_constant)
>