[Mesa-dev] [PATCH] nv50: add edgeflag support when using inline vertex data submit
Samuel Pitoiset
samuel.pitoiset at gmail.com
Sun Apr 3 14:08:07 UTC 2016
Ping? (~4 months later). Ilia? :)
On 11/23/2015 09:36 PM, Samuel Pitoiset wrote:
> Fixes the following piglit tests:
> - gl-1.0-edgeflag
> - gl-1.0-edgeflag-quads
> - gl-2.0-edgeflag
> - gl-2.0-edgeflag-immediate
> - point-vertex-id divisor
> - point-vertex-id gl_InstanceID
> - point-vertex-id gl_InstanceID divisor
>
> I still need to figure out why point-vertex-id fails with gl_vertexID.
>
> Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
> ---
> src/gallium/drivers/nouveau/nv50/nv50_program.c | 5 +-
> src/gallium/drivers/nouveau/nv50/nv50_push.c | 246 ++++++++++++++++++++----
> src/gallium/drivers/nouveau/nv50/nv50_surface.c | 2 +-
> src/gallium/drivers/nouveau/nv50/nv50_vbo.c | 3 +-
> 4 files changed, 210 insertions(+), 46 deletions(-)
>
> diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.c b/src/gallium/drivers/nouveau/nv50/nv50_program.c
> index a4b8ddf..b904581 100644
> --- a/src/gallium/drivers/nouveau/nv50/nv50_program.c
> +++ b/src/gallium/drivers/nouveau/nv50/nv50_program.c
> @@ -97,9 +97,6 @@ nv50_vertprog_assign_slots(struct nv50_ir_prog_info *info)
> case TGSI_SEMANTIC_CLIPDIST:
> prog->vp.clpd[info->out[i].si] = n;
> break;
> - case TGSI_SEMANTIC_EDGEFLAG:
> - prog->vp.edgeflag = i;
> - break;
> case TGSI_SEMANTIC_BCOLOR:
> prog->vp.bfc[info->out[i].si] = i;
> break;
> @@ -349,7 +346,6 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset,
>
> prog->vp.bfc[0] = 0xff;
> prog->vp.bfc[1] = 0xff;
> - prog->vp.edgeflag = 0xff;
> prog->vp.clpd[0] = map_undef;
> prog->vp.clpd[1] = map_undef;
> prog->vp.psiz = map_undef;
> @@ -383,6 +379,7 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset,
> prog->tls_space = info->bin.tlsSpace;
>
> prog->vp.need_vertex_id = info->io.vertexId < PIPE_MAX_SHADER_INPUTS;
> + prog->vp.edgeflag = info->io.edgeFlagIn;
>
> if (prog->type == PIPE_SHADER_FRAGMENT) {
> if (info->prop.fp.writesDepth) {
> diff --git a/src/gallium/drivers/nouveau/nv50/nv50_push.c b/src/gallium/drivers/nouveau/nv50/nv50_push.c
> index cbef95d..dae1e34 100644
> --- a/src/gallium/drivers/nouveau/nv50/nv50_push.c
> +++ b/src/gallium/drivers/nouveau/nv50/nv50_push.c
> @@ -15,9 +15,6 @@ struct push_context {
>
> const void *idxbuf;
>
> - float edgeflag;
> - int edgeflag_attr;
> -
> uint32_t vertex_words;
> uint32_t packet_vertex_limit;
>
> @@ -31,8 +28,40 @@ struct push_context {
> uint32_t prim;
> uint32_t restart_index;
> uint32_t instance_id;
> +
> + struct {
> + bool enabled;
> + bool value;
> + uint8_t width;
> + unsigned stride;
> + const uint8_t *data;
> + } edgeflag;
> };
>
> +static inline void
> +nv50_push_map_edgeflag(struct push_context *ctx, struct nv50_context *nv50,
> + int32_t index_bias)
> +{
> +
> + unsigned attr = nv50->vertprog->vp.edgeflag;
> + struct pipe_vertex_element *ve = &nv50->vertex->element[attr].pipe;
> + struct pipe_vertex_buffer *vb = &nv50->vtxbuf[ve->vertex_buffer_index];
> + struct nv04_resource *buf = nv04_resource(vb->buffer);
> +
> + ctx->edgeflag.stride = vb->stride;
> + ctx->edgeflag.width = util_format_get_blocksize(ve->src_format);
> + if (buf) {
> + unsigned offset = vb->buffer_offset + ve->src_offset;
> + ctx->edgeflag.data =
> + nouveau_resource_map_offset(&nv50->base, buf, offset, NOUVEAU_BO_RD);
> + } else {
> + ctx->edgeflag.data = (const uint8_t *)vb->user_buffer + ve->src_offset;
> + }
> +
> + if (index_bias)
> + ctx->edgeflag.data += (intptr_t)index_bias * vb->stride;
> +}
> +
> static inline unsigned
> prim_restart_search_i08(uint8_t *elts, unsigned push, uint8_t index)
> {
> @@ -63,6 +92,75 @@ prim_restart_search_i32(uint32_t *elts, unsigned push, uint32_t index)
> return i;
> }
>
> +static inline bool
> +ef_value_8(const struct push_context *ctx, uint32_t index)
> +{
> + uint8_t *pf = (uint8_t *)&ctx->edgeflag.data[index * ctx->edgeflag.stride];
> + return !!*pf;
> +}
> +
> +static inline bool
> +ef_value_32(const struct push_context *ctx, uint32_t index)
> +{
> + uint32_t *pf = (uint32_t *)&ctx->edgeflag.data[index * ctx->edgeflag.stride];
> + return !!*pf;
> +}
> +
> +static inline bool
> +ef_toggle(struct push_context *ctx)
> +{
> + ctx->edgeflag.value = !ctx->edgeflag.value;
> + return ctx->edgeflag.value;
> +}
> +
> +static inline unsigned
> +ef_toggle_search_i08(struct push_context *ctx, const uint8_t *elts, unsigned n)
> +{
> + unsigned i;
> + bool ef = ctx->edgeflag.value;
> + if (ctx->edgeflag.width == 1)
> + for (i = 0; i < n && ef_value_8(ctx, elts[i]) == ef; ++i);
> + else
> + for (i = 0; i < n && ef_value_32(ctx, elts[i]) == ef; ++i);
> + return i;
> +}
> +
> +static inline unsigned
> +ef_toggle_search_i16(struct push_context *ctx, const uint16_t *elts, unsigned n)
> +{
> + unsigned i;
> + bool ef = ctx->edgeflag.value;
> + if (ctx->edgeflag.width == 1)
> + for (i = 0; i < n && ef_value_8(ctx, elts[i]) == ef; ++i);
> + else
> + for (i = 0; i < n && ef_value_32(ctx, elts[i]) == ef; ++i);
> + return i;
> +}
> +
> +static inline unsigned
> +ef_toggle_search_i32(struct push_context *ctx, const uint32_t *elts, unsigned n)
> +{
> + unsigned i;
> + bool ef = ctx->edgeflag.value;
> + if (ctx->edgeflag.width == 1)
> + for (i = 0; i < n && ef_value_8(ctx, elts[i]) == ef; ++i);
> + else
> + for (i = 0; i < n && ef_value_32(ctx, elts[i]) == ef; ++i);
> + return i;
> +}
> +
> +static inline unsigned
> +ef_toggle_search_seq(struct push_context *ctx, unsigned start, unsigned n)
> +{
> + unsigned i;
> + bool ef = ctx->edgeflag.value;
> + if (ctx->edgeflag.width == 1)
> + for (i = 0; i < n && ef_value_8(ctx, start++) == ef; ++i);
> + else
> + for (i = 0; i < n && ef_value_32(ctx, start++) == ef; ++i);
> + return i;
> +}
> +
> static void
> emit_vertices_i08(struct push_context *ctx, unsigned start, unsigned count)
> {
> @@ -70,29 +168,43 @@ emit_vertices_i08(struct push_context *ctx, unsigned start, unsigned count)
>
> while (count) {
> unsigned push = MIN2(count, ctx->packet_vertex_limit);
> + bool need_restart_idx = false;
> unsigned size, nr;
>
> nr = push;
> if (ctx->primitive_restart)
> nr = prim_restart_search_i08(elts, push, ctx->restart_index);
> + need_restart_idx = nr != push;
>
> - size = ctx->vertex_words * nr;
> + while (nr) {
> + unsigned ne = nr;
>
> - if (unlikely(ctx->need_vertex_id)) {
> - BEGIN_NV04(ctx->push, NV84_3D(VERTEX_ID_BASE), 1);
> - PUSH_DATA (ctx->push, *elts + ctx->index_bias);
> - }
> + if (unlikely(ctx->edgeflag.enabled))
> + ne = ef_toggle_search_i08(ctx, elts, nr);
> + size = ctx->vertex_words * ne;
>
> - BEGIN_NI04(ctx->push, NV50_3D(VERTEX_DATA), size);
> + if (unlikely(ctx->need_vertex_id)) {
> + BEGIN_NV04(ctx->push, NV84_3D(VERTEX_ID_BASE), 1);
> + PUSH_DATA (ctx->push, *elts + ctx->index_bias);
> + }
>
> - ctx->translate->run_elts8(ctx->translate, elts, nr, 0, ctx->instance_id,
> - ctx->push->cur);
> + BEGIN_NI04(ctx->push, NV50_3D(VERTEX_DATA), size);
>
> - ctx->push->cur += size;
> - count -= nr;
> - elts += nr;
> + ctx->translate->run_elts8(ctx->translate, elts, ne, 0, ctx->instance_id,
> + ctx->push->cur);
> + ctx->push->cur += size;
> +
> + if (unlikely(ne != nr)) {
> + BEGIN_NV04(ctx->push, NV50_3D(EDGEFLAG), 1);
> + PUSH_DATA (ctx->push, ef_toggle(ctx));
> + }
> +
> + count -= ne;
> + elts += ne;
> + nr -= ne;
> + }
>
> - if (nr != push) {
> + if (need_restart_idx) {
> count--;
> elts++;
> BEGIN_NV04(ctx->push, NV50_3D(VB_ELEMENT_U32), 1);
> @@ -108,29 +220,44 @@ emit_vertices_i16(struct push_context *ctx, unsigned start, unsigned count)
>
> while (count) {
> unsigned push = MIN2(count, ctx->packet_vertex_limit);
> + bool need_restart_idx = false;
> unsigned size, nr;
>
> nr = push;
> if (ctx->primitive_restart)
> nr = prim_restart_search_i16(elts, push, ctx->restart_index);
> + need_restart_idx = nr != push;
>
> - size = ctx->vertex_words * nr;
> + while (nr) {
> + unsigned ne= nr;
>
> - if (unlikely(ctx->need_vertex_id)) {
> - BEGIN_NV04(ctx->push, NV84_3D(VERTEX_ID_BASE), 1);
> - PUSH_DATA (ctx->push, *elts + ctx->index_bias);
> - }
> + if (unlikely(ctx->edgeflag.enabled))
> + ne = ef_toggle_search_i16(ctx, elts, nr);
> + size = ctx->vertex_words * ne;
>
> - BEGIN_NI04(ctx->push, NV50_3D(VERTEX_DATA), size);
>
> - ctx->translate->run_elts16(ctx->translate, elts, nr, 0, ctx->instance_id,
> - ctx->push->cur);
> + if (unlikely(ctx->need_vertex_id)) {
> + BEGIN_NV04(ctx->push, NV84_3D(VERTEX_ID_BASE), 1);
> + PUSH_DATA (ctx->push, *elts + ctx->index_bias);
> + }
> +
> + BEGIN_NI04(ctx->push, NV50_3D(VERTEX_DATA), size);
>
> - ctx->push->cur += size;
> - count -= nr;
> - elts += nr;
> + ctx->translate->run_elts16(ctx->translate, elts, ne, 0, ctx->instance_id,
> + ctx->push->cur);
> + ctx->push->cur += size;
>
> - if (nr != push) {
> + if (unlikely(ne != nr)) {
> + BEGIN_NV04(ctx->push, NV50_3D(EDGEFLAG), 1);
> + PUSH_DATA (ctx->push, ef_toggle(ctx));
> + }
> +
> + count -= ne;
> + elts += ne;
> + nr -= ne;
> + }
> +
> + if (need_restart_idx) {
> count--;
> elts++;
> BEGIN_NV04(ctx->push, NV50_3D(VB_ELEMENT_U32), 1);
> @@ -146,29 +273,43 @@ emit_vertices_i32(struct push_context *ctx, unsigned start, unsigned count)
>
> while (count) {
> unsigned push = MIN2(count, ctx->packet_vertex_limit);
> + bool need_restart_idx = false;
> unsigned size, nr;
>
> nr = push;
> if (ctx->primitive_restart)
> nr = prim_restart_search_i32(elts, push, ctx->restart_index);
> + need_restart_idx = nr != push;
>
> - size = ctx->vertex_words * nr;
> + while (nr) {
> + unsigned ne = nr;
>
> - if (unlikely(ctx->need_vertex_id)) {
> - BEGIN_NV04(ctx->push, NV84_3D(VERTEX_ID_BASE), 1);
> - PUSH_DATA (ctx->push, *elts + ctx->index_bias);
> - }
> + if (unlikely(ctx->edgeflag.enabled))
> + ne = ef_toggle_search_i32(ctx, elts, nr);
> + size = ctx->vertex_words * ne;
>
> - BEGIN_NI04(ctx->push, NV50_3D(VERTEX_DATA), size);
> + if (unlikely(ctx->need_vertex_id)) {
> + BEGIN_NV04(ctx->push, NV84_3D(VERTEX_ID_BASE), 1);
> + PUSH_DATA (ctx->push, *elts + ctx->index_bias);
> + }
>
> - ctx->translate->run_elts(ctx->translate, elts, nr, 0, ctx->instance_id,
> - ctx->push->cur);
> + BEGIN_NI04(ctx->push, NV50_3D(VERTEX_DATA), size);
>
> - ctx->push->cur += size;
> - count -= nr;
> - elts += nr;
> + ctx->translate->run_elts(ctx->translate, elts, ne, 0, ctx->instance_id,
> + ctx->push->cur);
> + ctx->push->cur += size;
> +
> + if (unlikely(ne != nr)) {
> + BEGIN_NV04(ctx->push, NV50_3D(EDGEFLAG), 1);
> + PUSH_DATA (ctx->push, ef_toggle(ctx));
> + }
> +
> + count -= ne;
> + elts += ne;
> + nr -= ne;
> + }
>
> - if (nr != push) {
> + if (need_restart_idx) {
> count--;
> elts++;
> BEGIN_NV04(ctx->push, NV50_3D(VB_ELEMENT_U32), 1);
> @@ -184,7 +325,7 @@ emit_vertices_seq(struct push_context *ctx, unsigned start, unsigned count)
>
> while (count) {
> unsigned push = MIN2(count, ctx->packet_vertex_limit);
> - unsigned size = ctx->vertex_words * push;
> + unsigned size;
>
> if (unlikely(ctx->need_vertex_id)) {
> /* For non-indexed draws, gl_VertexID goes up after each vertex. */
> @@ -192,11 +333,22 @@ emit_vertices_seq(struct push_context *ctx, unsigned start, unsigned count)
> PUSH_DATA (ctx->push, elts++);
> }
>
> + if (unlikely(ctx->edgeflag.enabled))
> + push = ef_toggle_search_seq(ctx, start, push);
> + size = ctx->vertex_words * push;
> +
> BEGIN_NI04(ctx->push, NV50_3D(VERTEX_DATA), size);
>
> ctx->translate->run(ctx->translate, start, push, 0, ctx->instance_id,
> ctx->push->cur);
> ctx->push->cur += size;
> +
> + if (unlikely(push != count)) {
> + PUSH_SPACE(ctx->push, 2);
> + BEGIN_NV04(ctx->push, NV50_3D(EDGEFLAG), 1);
> + PUSH_DATA (ctx->push, ef_toggle(ctx));
> + }
> +
> count -= push;
> start += push;
> }
> @@ -253,6 +405,12 @@ nv50_push_vbo(struct nv50_context *nv50, const struct pipe_draw_info *info)
> ctx.need_vertex_id ? 1 : nv50->vertex->packet_vertex_limit;
> ctx.vertex_words = nv50->vertex->vertex_size;
>
> + ctx.edgeflag.value = true;
> + ctx.edgeflag.enabled = nv50->vertprog->vp.edgeflag < PIPE_MAX_ATTRIBS;
> + ctx.edgeflag.data = NULL;
> + ctx.edgeflag.stride = 0;
> + ctx.edgeflag.width = 0;
> +
> assert(nv50->num_vtxbufs <= PIPE_MAX_ATTRIBS);
> for (i = 0; i < nv50->num_vtxbufs; ++i) {
> const struct pipe_vertex_buffer *vb = &nv50->vtxbuf[i];
> @@ -270,6 +428,9 @@ nv50_push_vbo(struct nv50_context *nv50, const struct pipe_draw_info *info)
> ctx.translate->set_buffer(ctx.translate, i, data, vb->stride, ~0);
> }
>
> + if (unlikely(ctx.edgeflag.enabled))
> + nv50_push_map_edgeflag(&ctx, nv50, info->index_bias);
> +
> if (info->indexed) {
> if (nv50->idxbuf.buffer) {
> ctx.idxbuf = nouveau_resource_map_offset(&nv50->base,
> @@ -347,4 +508,9 @@ nv50_push_vbo(struct nv50_context *nv50, const struct pipe_draw_info *info)
> BEGIN_NV04(ctx.push, NV84_3D(VERTEX_ID_BASE), 1);
> PUSH_DATA (ctx.push, nv50->state.index_bias);
> }
> +
> + if (unlikely(!ctx.edgeflag.value)) {
> + BEGIN_NV04(ctx.push, NV50_3D(EDGEFLAG), 1);
> + PUSH_DATA (ctx.push, 0);
> + }
> }
> diff --git a/src/gallium/drivers/nouveau/nv50/nv50_surface.c b/src/gallium/drivers/nouveau/nv50/nv50_surface.c
> index 8ba19d2..a854669 100644
> --- a/src/gallium/drivers/nouveau/nv50/nv50_surface.c
> +++ b/src/gallium/drivers/nouveau/nv50/nv50_surface.c
> @@ -779,7 +779,7 @@ nv50_blitter_make_vp(struct nv50_blitter *blit)
> blit->vp.out[1].si = 0;
> blit->vp.vp.attrs[0] = 0x73;
> blit->vp.vp.psiz = 0x40;
> - blit->vp.vp.edgeflag = 0x40;
> + blit->vp.vp.edgeflag = PIPE_MAX_ATTRIBS;
> }
>
> void *
> diff --git a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
> index 85878d5..ad131d5 100644
> --- a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
> +++ b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
> @@ -302,7 +302,8 @@ nv50_vertex_arrays_validate(struct nv50_context *nv50)
> unsigned i;
> const unsigned n = MAX2(vertex->num_elements, nv50->state.num_vtxelts);
>
> - if (unlikely(vertex->need_conversion))
> + if (unlikely(vertex->need_conversion) ||
> + unlikely(nv50->vertprog->vp.edgeflag < PIPE_MAX_ATTRIBS))
> nv50->vbo_fifo = ~0;
> else
> if (nv50->vbo_user & ~nv50->vbo_constant)
>
More information about the mesa-dev
mailing list