[Mesa-dev] [PATCH v3] nv50, nvc0: optimize coherent buffer checking at draw time

Ilia Mirkin imirkin at alum.mit.edu
Wed Dec 16 13:01:28 PST 2015


On Thu, Dec 10, 2015 at 4:58 PM, Samuel Pitoiset
<samuel.pitoiset at gmail.com> wrote:
> Instead of iterating over all the buffer resources looking for coherent
> buffers, we keep track of a context-wide count. This will save some
> iterations (and CPU cycles) in 99.99% case because usually coherent
> buffers are not so used.
>
> Changes from v3:
>  - check if views[i] and views[i]->texture are not NULL
>  - fix use of nv50->textures_coherent
>  - check if vb[i].buffer is not NULL
>  - clear out the flag for UBO
>
> Changes from v2:
>  - forgot to apply some changes for nv50 (texture/vertex bufs)
>
> Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
> ---
>  src/gallium/drivers/nouveau/nv50/nv50_context.h |  3 ++
>  src/gallium/drivers/nouveau/nv50/nv50_state.c   | 23 ++++++++++++++
>  src/gallium/drivers/nouveau/nv50/nv50_vbo.c     | 42 +++++--------------------
>  src/gallium/drivers/nouveau/nvc0/nvc0_context.h |  3 ++
>  src/gallium/drivers/nouveau/nvc0/nvc0_state.c   | 32 +++++++++++++++++++
>  src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c     | 41 +++++-------------------
>  6 files changed, 76 insertions(+), 68 deletions(-)
>
> diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.h b/src/gallium/drivers/nouveau/nv50/nv50_context.h
> index 2cebcd9..712d00e 100644
> --- a/src/gallium/drivers/nouveau/nv50/nv50_context.h
> +++ b/src/gallium/drivers/nouveau/nv50/nv50_context.h
> @@ -134,9 +134,11 @@ struct nv50_context {
>     struct nv50_constbuf constbuf[3][NV50_MAX_PIPE_CONSTBUFS];
>     uint16_t constbuf_dirty[3];
>     uint16_t constbuf_valid[3];
> +   uint16_t constbuf_coherent[3];
>
>     struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS];
>     unsigned num_vtxbufs;
> +   uint32_t vtxbufs_coherent;
>     struct pipe_index_buffer idxbuf;
>     uint32_t vbo_fifo; /* bitmask of vertex elements to be pushed to FIFO */
>     uint32_t vbo_user; /* bitmask of vertex buffers pointing to user memory */
> @@ -148,6 +150,7 @@ struct nv50_context {
>
>     struct pipe_sampler_view *textures[3][PIPE_MAX_SAMPLERS];
>     unsigned num_textures[3];
> +   uint32_t textures_coherent[3];
>     struct nv50_tsc_entry *samplers[3][PIPE_MAX_SAMPLERS];
>     unsigned num_samplers[3];
>
> diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state.c b/src/gallium/drivers/nouveau/nv50/nv50_state.c
> index fd7c7cd..0cd1e2d 100644
> --- a/src/gallium/drivers/nouveau/nv50/nv50_state.c
> +++ b/src/gallium/drivers/nouveau/nv50/nv50_state.c
> @@ -664,6 +664,15 @@ nv50_stage_set_sampler_views(struct nv50_context *nv50, int s,
>        if (old)
>           nv50_screen_tic_unlock(nv50->screen, old);
>
> +      if (views[i] && views[i]->texture) {
> +         struct pipe_resource *res = views[i]->texture;
> +         if (res->target == PIPE_BUFFER &&
> +             (res->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT))
> +            nv50->textures_coherent[s] |= 1 << i;
> +         else
> +            nv50->textures_coherent[s] &= ~(1 << i);
> +      }

;(

> +
>        pipe_sampler_view_reference(&nv50->textures[s][i], views[i]);
>     }
>
> @@ -847,13 +856,19 @@ nv50_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
>        nv50->constbuf[s][i].u.data = cb->user_buffer;
>        nv50->constbuf[s][i].size = MIN2(cb->buffer_size, 0x10000);
>        nv50->constbuf_valid[s] |= 1 << i;
> +      nv50->constbuf_coherent[s] &= ~(1 << i);
>     } else
>     if (res) {
>        nv50->constbuf[s][i].offset = cb->buffer_offset;
>        nv50->constbuf[s][i].size = MIN2(align(cb->buffer_size, 0x100), 0x10000);
>        nv50->constbuf_valid[s] |= 1 << i;
> +      if (res->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)
> +         nv50->constbuf_coherent[s] |= 1 << i;
> +      else
> +         nv50->constbuf_coherent[s] &= ~(1 << i);
>     } else {
>        nv50->constbuf_valid[s] &= ~(1 << i);
> +      nv50->constbuf_coherent[s] &= ~(1 << i);
>     }
>     nv50->constbuf_dirty[s] |= 1 << i;
>
> @@ -1000,6 +1015,7 @@ nv50_set_vertex_buffers(struct pipe_context *pipe,
>     if (!vb) {
>        nv50->vbo_user &= ~(((1ull << count) - 1) << start_slot);
>        nv50->vbo_constant &= ~(((1ull << count) - 1) << start_slot);
> +      nv50->vtxbufs_coherent &= ~(((1ull << count) - 1) << start_slot);
>        return;
>     }
>
> @@ -1012,9 +1028,16 @@ nv50_set_vertex_buffers(struct pipe_context *pipe,
>              nv50->vbo_constant |= 1 << dst_index;
>           else
>              nv50->vbo_constant &= ~(1 << dst_index);
> +         nv50->vtxbufs_coherent &= ~(1 << dst_index);
>        } else {
>           nv50->vbo_user &= ~(1 << dst_index);
>           nv50->vbo_constant &= ~(1 << dst_index);
> +
> +         if (vb[i].buffer &&
> +             vb[i].buffer->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)
> +            nv50->vtxbufs_coherent |= (1 << dst_index);
> +         else
> +            nv50->vtxbufs_coherent &= ~(1 << dst_index);
>        }
>     }
>
> diff --git a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
> index 85878d5..0f6804d 100644
> --- a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
> +++ b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
> @@ -762,7 +762,7 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
>     struct nv50_context *nv50 = nv50_context(pipe);
>     struct nouveau_pushbuf *push = nv50->base.pushbuf;
>     bool tex_dirty = false;
> -   int i, s;
> +   int s;
>
>     /* NOTE: caller must ensure that (min_index + index_bias) is >= 0 */
>     nv50->vb_elt_first = info->min_index + info->index_bias;
> @@ -791,27 +791,9 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
>
>     push->kick_notify = nv50_draw_vbo_kick_notify;
>
> -   /* TODO: Instead of iterating over all the buffer resources looking for
> -    * coherent buffers, keep track of a context-wide count.
> -    */
>     for (s = 0; s < 3 && !nv50->cb_dirty; ++s) {
> -      uint32_t valid = nv50->constbuf_valid[s];
> -
> -      while (valid && !nv50->cb_dirty) {
> -         const unsigned i = ffs(valid) - 1;
> -         struct pipe_resource *res;
> -
> -         valid &= ~(1 << i);
> -         if (nv50->constbuf[s][i].user)
> -            continue;
> -
> -         res = nv50->constbuf[s][i].u.buf;
> -         if (!res)
> -            continue;
> -
> -         if (res->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)
> -            nv50->cb_dirty = true;
> -      }
> +      if (nv50->constbuf_coherent[s])
> +         nv50->cb_dirty = true;
>     }
>
>     /* If there are any coherent constbufs, flush the cache */
> @@ -822,15 +804,10 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
>     }
>
>     for (s = 0; s < 3 && !tex_dirty; ++s) {
> -      for (i = 0; i < nv50->num_textures[s] && !tex_dirty; ++i) {
> -         if (!nv50->textures[s][i] ||
> -             nv50->textures[s][i]->texture->target != PIPE_BUFFER)
> -            continue;
> -         if (nv50->textures[s][i]->texture->flags &
> -             PIPE_RESOURCE_FLAG_MAP_COHERENT)
> -            tex_dirty = true;
> -      }
> +      if (nv50->textures_coherent[s])
> +         tex_dirty = true;
>     }
> +
>     if (tex_dirty) {
>        BEGIN_NV04(push, NV50_3D(TEX_CACHE_CTL), 1);
>        PUSH_DATA (push, 0x20);
> @@ -850,12 +827,7 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
>        PUSH_DATA (push, info->start_instance);
>     }
>
> -   for (i = 0; i < nv50->num_vtxbufs && !nv50->base.vbo_dirty; ++i) {
> -      if (!nv50->vtxbuf[i].buffer)
> -         continue;
> -      if (nv50->vtxbuf[i].buffer->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)
> -         nv50->base.vbo_dirty = true;
> -   }
> +   nv50->base.vbo_dirty |= !!nv50->vtxbufs_coherent;
>
>     if (nv50->base.vbo_dirty) {
>        BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_FLUSH), 1);
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
> index 39b73ec..1219548 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
> @@ -134,10 +134,12 @@ struct nvc0_context {
>     struct nvc0_constbuf constbuf[6][NVC0_MAX_PIPE_CONSTBUFS];
>     uint16_t constbuf_dirty[6];
>     uint16_t constbuf_valid[6];
> +   uint16_t constbuf_coherent[6];
>     bool cb_dirty;
>
>     struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS];
>     unsigned num_vtxbufs;
> +   uint32_t vtxbufs_coherent;
>     struct pipe_index_buffer idxbuf;
>     uint32_t constant_vbos;
>     uint32_t vbo_user; /* bitmask of vertex buffers pointing to user memory */
> @@ -149,6 +151,7 @@ struct nvc0_context {
>     struct pipe_sampler_view *textures[6][PIPE_MAX_SAMPLERS];
>     unsigned num_textures[6];
>     uint32_t textures_dirty[6];
> +   uint32_t textures_coherent[6];
>     struct nv50_tsc_entry *samplers[6][PIPE_MAX_SAMPLERS];
>     unsigned num_samplers[6];
>     uint16_t samplers_dirty[6];
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
> index 5da0ea8..a1fc401 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
> @@ -554,6 +554,15 @@ nvc0_stage_set_sampler_views(struct nvc0_context *nvc0, int s,
>           continue;
>        nvc0->textures_dirty[s] |= 1 << i;
>
> +      if (views[i] && views[i]->texture) {
> +         struct pipe_resource *res = views[i]->texture;
> +         if (res->target == PIPE_BUFFER &&
> +             (res->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT))
> +            nvc0->textures_coherent[s] |= 1 << i;
> +         else
> +            nvc0->textures_coherent[s] &= ~(1 << i);
> +      }

;(

> +
>        if (old) {
>           nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TEX(s, i));
>           nvc0_screen_tic_unlock(nvc0->screen, old);
> @@ -596,6 +605,15 @@ nvc0_stage_set_sampler_views_range(struct nvc0_context *nvc0, const unsigned s,
>              continue;
>           nvc0->textures_dirty[s] |= 1 << i;
>
> +         if (views[p] && views[p]->texture) {
> +            struct pipe_resource *res = views[p]->texture;
> +            if (res->target == PIPE_BUFFER &&
> +                (res->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT))
> +               nvc0->textures_coherent[s] |= 1 << i;
> +            else
> +               nvc0->textures_coherent[s] &= ~(1 << i);
> +         }

;(

> +
>           if (nvc0->textures[s][i]) {
>              struct nv50_tic_entry *old = nv50_tic_entry(nvc0->textures[s][i]);
>              nouveau_bufctx_reset(bctx, bin + i);
> @@ -842,14 +860,20 @@ nvc0_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
>        nvc0->constbuf[s][i].u.data = cb->user_buffer;
>        nvc0->constbuf[s][i].size = MIN2(cb->buffer_size, 0x10000);
>        nvc0->constbuf_valid[s] |= 1 << i;
> +      nvc0->constbuf_coherent[s] &= ~(1 << i);
>     } else
>     if (cb) {
>        nvc0->constbuf[s][i].offset = cb->buffer_offset;
>        nvc0->constbuf[s][i].size = MIN2(align(cb->buffer_size, 0x100), 0x10000);
>        nvc0->constbuf_valid[s] |= 1 << i;
> +      if (res && res->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)
> +         nvc0->constbuf_coherent[s] |= 1 << i;
> +      else
> +         nvc0->constbuf_coherent[s] &= ~(1 << i);
>     }
>     else {
>        nvc0->constbuf_valid[s] &= ~(1 << i);
> +      nvc0->constbuf_coherent[s] &= ~(1 << i);
>     }
>  }
>
> @@ -1006,6 +1030,7 @@ nvc0_set_vertex_buffers(struct pipe_context *pipe,
>      if (!vb) {
>         nvc0->vbo_user &= ~(((1ull << count) - 1) << start_slot);
>         nvc0->constant_vbos &= ~(((1ull << count) - 1) << start_slot);
> +       nvc0->vtxbufs_coherent &= ~(((1ull << count) - 1) << start_slot);
>         return;
>      }
>
> @@ -1018,9 +1043,16 @@ nvc0_set_vertex_buffers(struct pipe_context *pipe,
>               nvc0->constant_vbos |= 1 << dst_index;
>            else
>               nvc0->constant_vbos &= ~(1 << dst_index);
> +          nvc0->vtxbufs_coherent &= ~(1 << dst_index);
>         } else {
>            nvc0->vbo_user &= ~(1 << dst_index);
>            nvc0->constant_vbos &= ~(1 << dst_index);
> +
> +          if (vb[i].buffer &&
> +              vb[i].buffer->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)
> +             nvc0->vtxbufs_coherent |= (1 << dst_index);
> +          else
> +             nvc0->vtxbufs_coherent &= ~(1 << dst_index);
>         }
>      }
>
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
> index c464904..585f786 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
> @@ -861,7 +861,7 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
>  {
>     struct nvc0_context *nvc0 = nvc0_context(pipe);
>     struct nouveau_pushbuf *push = nvc0->base.pushbuf;
> -   int i, s;
> +   int s;
>
>     /* NOTE: caller must ensure that (min_index + index_bias) is >= 0 */
>     nvc0->vb_elt_first = info->min_index + info->index_bias;
> @@ -900,27 +900,9 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
>
>     push->kick_notify = nvc0_draw_vbo_kick_notify;
>
> -   /* TODO: Instead of iterating over all the buffer resources looking for
> -    * coherent buffers, keep track of a context-wide count.
> -    */
>     for (s = 0; s < 5 && !nvc0->cb_dirty; ++s) {
> -      uint32_t valid = nvc0->constbuf_valid[s];
> -
> -      while (valid && !nvc0->cb_dirty) {
> -         const unsigned i = ffs(valid) - 1;
> -         struct pipe_resource *res;
> -
> -         valid &= ~(1 << i);
> -         if (nvc0->constbuf[s][i].user)
> -            continue;
> -
> -         res = nvc0->constbuf[s][i].u.buf;
> -         if (!res)
> -            continue;
> -
> -         if (res->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)
> -            nvc0->cb_dirty = true;
> -      }
> +      if (nvc0->constbuf_coherent[s])
> +         nvc0->cb_dirty = true;
>     }
>
>     if (nvc0->cb_dirty) {
> @@ -929,14 +911,12 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
>     }
>
>     for (s = 0; s < 5; ++s) {
> +      if (!nvc0->textures_coherent[s])
> +         continue;
> +
>        for (int i = 0; i < nvc0->num_textures[s]; ++i) {
>           struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]);
> -         struct pipe_resource *res;
> -         if (!tic)
> -            continue;
> -         res = nvc0->textures[s][i]->texture;
> -         if (res->target != PIPE_BUFFER ||
> -             !(res->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT))
> +         if (!(nvc0->textures_coherent[s] & (1 << i)))
>              continue;
>
>           BEGIN_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 1);
> @@ -962,12 +942,7 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
>        PUSH_DATA (push, info->start_instance);
>     }
>
> -   for (i = 0; i < nvc0->num_vtxbufs && !nvc0->base.vbo_dirty; ++i) {
> -      if (!nvc0->vtxbuf[i].buffer)
> -         continue;
> -      if (nvc0->vtxbuf[i].buffer->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)
> -         nvc0->base.vbo_dirty = true;
> -   }
> +   nvc0->base.vbo_dirty |= !!nvc0->vtxbufs_coherent;
>
>     if (!nvc0->base.vbo_dirty && nvc0->idxbuf.buffer &&
>         nvc0->idxbuf.buffer->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)

Sadness in your textures_coherent setting, but otherwise looks good.


More information about the mesa-dev mailing list