Mesa (master): nvc0: improve vertex state validation

Christoph Bumiller chrisbmr at kemper.freedesktop.org
Fri Apr 13 22:16:06 UTC 2012


Module: Mesa
Branch: master
Commit: edbfeed56f1ebd8517840ef48f8c87e24bb98157
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=edbfeed56f1ebd8517840ef48f8c87e24bb98157

Author: Christoph Bumiller <e0425955 at student.tuwien.ac.at>
Date:   Fri Mar 16 17:37:32 2012 +0100

nvc0: improve vertex state validation

Now updating vertex attribute format only when necessary.

---

 src/gallium/drivers/nvc0/nvc0_3d.xml.h       |    2 +
 src/gallium/drivers/nvc0/nvc0_context.h      |    5 +-
 src/gallium/drivers/nvc0/nvc0_graph_macros.h |   14 ++
 src/gallium/drivers/nvc0/nvc0_screen.c       |    1 +
 src/gallium/drivers/nvc0/nvc0_state.c        |   45 +++++--
 src/gallium/drivers/nvc0/nvc0_vbo.c          |  189 ++++++++++++++------------
 6 files changed, 159 insertions(+), 97 deletions(-)

diff --git a/src/gallium/drivers/nvc0/nvc0_3d.xml.h b/src/gallium/drivers/nvc0/nvc0_3d.xml.h
index 05f9673..61ca1b8 100644
--- a/src/gallium/drivers/nvc0/nvc0_3d.xml.h
+++ b/src/gallium/drivers/nvc0/nvc0_3d.xml.h
@@ -1307,6 +1307,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #define NVC0_3D_TFB_VARYING_LOCS__ESIZE				0x00000004
 #define NVC0_3D_TFB_VARYING_LOCS__LEN				0x00000020
 
+#define NVC0_3D_MACRO_VERTEX_ARRAY_PER_INSTANCE		0x00003800
+
 #define NVC0_3D_VERTEX_ARRAY_SELECT				0x00003820
 
 #define NVC0_3D_BLEND_ENABLES					0x00003858
diff --git a/src/gallium/drivers/nvc0/nvc0_context.h b/src/gallium/drivers/nvc0/nvc0_context.h
index fdb5889..8b80f2f 100644
--- a/src/gallium/drivers/nvc0/nvc0_context.h
+++ b/src/gallium/drivers/nvc0/nvc0_context.h
@@ -84,6 +84,8 @@ struct nvc0_context {
       boolean prim_restart;
       uint32_t instance_elts; /* bitmask of per-instance elements */
       uint32_t instance_base;
+      uint32_t constant_vbos;
+      uint32_t constant_elts;
       int32_t index_bias;
       uint16_t scissor;
       uint8_t num_vtxbufs;
@@ -115,6 +117,7 @@ struct nvc0_context {
    struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS];
    unsigned num_vtxbufs;
    struct pipe_index_buffer idxbuf;
+   uint32_t constant_vbos;
    uint32_t vbo_fifo; /* bitmask of vertex elements to be pushed to FIFO */
    uint32_t vbo_user; /* bitmask of vertex buffers pointing to user memory */
    unsigned vbo_min_index; /* from pipe_draw_info, for vertex upload */
@@ -240,7 +243,7 @@ nvc0_vertex_state_create(struct pipe_context *pipe,
 void
 nvc0_vertex_state_delete(struct pipe_context *pipe, void *hwcso);
 
-void nvc0_vertex_arrays_validate(struct nvc0_context *nvc0);
+void nvc0_vertex_arrays_validate(struct nvc0_context *);
 
 void nvc0_idxbuf_validate(struct nvc0_context *);
 
diff --git a/src/gallium/drivers/nvc0/nvc0_graph_macros.h b/src/gallium/drivers/nvc0/nvc0_graph_macros.h
index a0a875f..f009980 100644
--- a/src/gallium/drivers/nvc0/nvc0_graph_macros.h
+++ b/src/gallium/drivers/nvc0/nvc0_graph_macros.h
@@ -8,6 +8,20 @@
  * bra(n)z annul: no delay slot
  */
 
+/* Bitfield version of NVC0_3D_VERTEX_ARRAY_PER_INSTANCE[].
+ * Args: size, bitfield
+ */
+static const uint32_t nvc0_9097_per_instance_bf[] =
+{
+   0x00000301, /* parm $r3 (the bitfield) */
+   0x00000211, /* mov $r2 0 */
+   0x05880021, /* maddr [NVC0_3D_VERTEX_ARRAY_PER_INSTANCE(0), increment = 4] */
+   0xffffc911, /* mov $r1 (add $r1 -0x1) */
+   0x0040d043, /* send (extrshl $r3 $r2 0x1 0) */
+   0xffff8897, /* exit branz $r1 0x3 */
+   0x00005211  /* mov $r2 (add $r2 0x1) */
+};
+
 /* The comments above the macros describe what they *should* be doing,
  * but we use less functionality for now.
  */
diff --git a/src/gallium/drivers/nvc0/nvc0_screen.c b/src/gallium/drivers/nvc0/nvc0_screen.c
index 52cc001..949443d 100644
--- a/src/gallium/drivers/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nvc0/nvc0_screen.c
@@ -623,6 +623,7 @@ nvc0_screen_create(struct nouveau_device *dev)
 #define MK_MACRO(m, n) i = nvc0_graph_set_macro(screen, m, i, sizeof(n), n);
 
    i = 0;
+   MK_MACRO(NVC0_3D_MACRO_VERTEX_ARRAY_PER_INSTANCE, nvc0_9097_per_instance_bf);
    MK_MACRO(NVC0_3D_BLEND_ENABLES, nvc0_9097_blend_enables);
    MK_MACRO(NVC0_3D_VERTEX_ARRAY_SELECT, nvc0_9097_vertex_array_select);
    MK_MACRO(NVC0_3D_TEP_SELECT, nvc0_9097_tep_select);
diff --git a/src/gallium/drivers/nvc0/nvc0_state.c b/src/gallium/drivers/nvc0/nvc0_state.c
index d493f6e..eb71e6b 100644
--- a/src/gallium/drivers/nvc0/nvc0_state.c
+++ b/src/gallium/drivers/nvc0/nvc0_state.c
@@ -746,19 +746,44 @@ nvc0_set_vertex_buffers(struct pipe_context *pipe,
                         const struct pipe_vertex_buffer *vb)
 {
     struct nvc0_context *nvc0 = nvc0_context(pipe);
+    uint32_t constant_vbos = 0;
     unsigned i;
 
-    for (i = 0; i < count; ++i)
-       pipe_resource_reference(&nvc0->vtxbuf[i].buffer, vb[i].buffer);
-    for (; i < nvc0->num_vtxbufs; ++i)
-       pipe_resource_reference(&nvc0->vtxbuf[i].buffer, NULL);
-
-    memcpy(nvc0->vtxbuf, vb, sizeof(*vb) * count);
-    nvc0->num_vtxbufs = count;
-
-    nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_VTX);
+    if (count != nvc0->num_vtxbufs) {
+       for (i = 0; i < count; ++i) {
+          pipe_resource_reference(&nvc0->vtxbuf[i].buffer, vb[i].buffer);
+          nvc0->vtxbuf[i].buffer_offset = vb[i].buffer_offset;
+          nvc0->vtxbuf[i].stride = vb[i].stride;
+          if (!vb[i].stride)
+             constant_vbos |= 1 << i;
+       }
+       for (; i < nvc0->num_vtxbufs; ++i)
+          pipe_resource_reference(&nvc0->vtxbuf[i].buffer, NULL);
+
+       nvc0->num_vtxbufs = count;
+       nvc0->dirty |= NVC0_NEW_ARRAYS;
+    } else {
+       for (i = 0; i < count; ++i) {
+          if (nvc0->vtxbuf[i].buffer == vb[i].buffer &&
+              nvc0->vtxbuf[i].buffer_offset == vb[i].buffer_offset &&
+              nvc0->vtxbuf[i].stride == vb[i].stride)
+             continue;
+          pipe_resource_reference(&nvc0->vtxbuf[i].buffer, vb[i].buffer);
+          nvc0->vtxbuf[i].buffer_offset = vb[i].buffer_offset;
+          nvc0->vtxbuf[i].stride = vb[i].stride;
+          if (likely(vb[i].stride))
+             nvc0->dirty |= NVC0_NEW_ARRAYS;
+          else
+             constant_vbos |= 1 << i;
+       }
+    }
+    if (constant_vbos != nvc0->constant_vbos) {
+       nvc0->constant_vbos = constant_vbos;
+       nvc0->dirty |= NVC0_NEW_ARRAYS;
+    }
 
-    nvc0->dirty |= NVC0_NEW_ARRAYS;
+    if (nvc0->dirty & NVC0_NEW_ARRAYS)
+       nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_VTX);
 }
 
 static void
diff --git a/src/gallium/drivers/nvc0/nvc0_vbo.c b/src/gallium/drivers/nvc0/nvc0_vbo.c
index 9c13adf..7cb1e0a 100644
--- a/src/gallium/drivers/nvc0/nvc0_vbo.c
+++ b/src/gallium/drivers/nvc0/nvc0_vbo.c
@@ -126,26 +126,44 @@ nvc0_vertex_state_create(struct pipe_context *pipe,
     ((c) << NVC0_3D_VTX_ATTR_DEFINE_COMP__SHIFT))
 
 static void
-nvc0_emit_vtxattr(struct nvc0_context *nvc0, struct pipe_vertex_buffer *vb,
-                  struct pipe_vertex_element *ve, unsigned attr)
+nvc0_update_constant_vertex_attribs(struct nvc0_context *nvc0)
 {
-   const void *data;
-   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
-   struct nv04_resource *res = nv04_resource(vb->buffer);
-   float v[4];
-   int i;
-   const unsigned nc = util_format_get_nr_components(ve->src_format);
+   uint32_t mask = nvc0->state.constant_elts;
 
-   data = nouveau_resource_map_offset(&nvc0->base, res, vb->buffer_offset +
-                                      ve->src_offset, NOUVEAU_BO_RD);
+   while (unlikely(mask)) {
+      const int i = ffs(mask) - 1;
+      uint32_t mode;
+      struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+      struct pipe_vertex_element *ve = &nvc0->vertex->element[i].pipe;
+      struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[ve->vertex_buffer_index];
+      const struct util_format_description *desc;
+      void *dst;
+      const void *src = nouveau_resource_map_offset(&nvc0->base,
+         nv04_resource(vb->buffer),
+         vb->buffer_offset + ve->src_offset, NOUVEAU_BO_RD);
 
-   util_format_read_4f(ve->src_format, v, 0, data, 0, 0, 0, 1, 1);
+      mask &= ~(1 << i);
 
-   PUSH_SPACE(push, 6);
-   BEGIN_NVC0(push, NVC0_3D(VTX_ATTR_DEFINE), nc + 1);
-   PUSH_DATA (push, VTX_ATTR(attr, nc, FLOAT, 32));
-   for (i = 0; i < nc; ++i)
-      PUSH_DATAf(push, v[i]);
+      desc = util_format_description(ve->src_format);
+
+      PUSH_SPACE(push, 6);
+      BEGIN_NVC0(push, NVC0_3D(VTX_ATTR_DEFINE), 5);
+      dst = push->cur + 1;
+      if (desc->channel[0].pure_integer) {
+         if (desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) {
+            mode = VTX_ATTR(i, 4, SINT, 32);
+            desc->unpack_rgba_sint(dst, 0, src, 0, 1, 1);
+         } else {
+            mode = VTX_ATTR(i, 4, UINT, 32);
+            desc->unpack_rgba_uint(dst, 0, src, 0, 1, 1);
+         }
+      } else {
+         mode = VTX_ATTR(i, 4, FLOAT, 32);
+         desc->unpack_rgba_float(dst, 0, src, 0, 1, 1);
+      }
+      *push->cur = mode;
+      push->cur += 5;
+   }
 }
 
 static INLINE void
@@ -225,13 +243,8 @@ nvc0_update_user_vbufs(struct nvc0_context *nvc0)
       struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[b];
       struct nv04_resource *buf = nv04_resource(vb->buffer);
 
-      if (!(nvc0->vbo_user & (1 << b)))
-         continue;
-
-      if (!vb->stride) {
-         nvc0_emit_vtxattr(nvc0, vb, ve, i);
+      if (!(nvc0->vbo_user & (1 << b)) || !vb->stride)
          continue;
-      }
       nvc0_vbuf_range(nvc0, b, &base, &size);
 
       if (!(written & (1 << b))) {
@@ -268,83 +281,88 @@ nvc0_vertex_arrays_validate(struct nvc0_context *nvc0)
    struct nvc0_vertex_stateobj *vertex = nvc0->vertex;
    struct pipe_vertex_buffer *vb;
    struct nvc0_vertex_element *ve;
+   uint32_t const_vbos;
    unsigned i;
+   boolean update_vertex;
 
    if (unlikely(vertex->need_conversion) ||
        unlikely(nvc0->vertprog->vp.edgeflag < PIPE_MAX_ATTRIBS)) {
-      nvc0->vbo_fifo = ~0;
       nvc0->vbo_user = 0;
+      nvc0->vbo_fifo = ~nvc0->constant_vbos;
    } else {
       nvc0_prevalidate_vbufs(nvc0);
+      nvc0->vbo_fifo &= ~nvc0->constant_vbos;
    }
+   const_vbos = nvc0->vbo_fifo ? 0 : nvc0->constant_vbos;
+
+   update_vertex = (nvc0->dirty & NVC0_NEW_VERTEX) ||
+      (const_vbos != nvc0->state.constant_vbos);
+   if (update_vertex) {
+      uint32_t *restrict data;
+      const unsigned n = MAX2(vertex->num_elements, nvc0->state.num_vtxelts);
+
+      if (unlikely(vertex->instance_elts != nvc0->state.instance_elts)) {
+         nvc0->state.instance_elts = vertex->instance_elts;
+         assert(n); /* if (n == 0), both masks should be 0 */
+         PUSH_SPACE(push, 3);
+         BEGIN_NVC0(push, NVC0_3D(MACRO_VERTEX_ARRAY_PER_INSTANCE), 2);
+         PUSH_DATA (push, n);
+         PUSH_DATA (push, vertex->instance_elts);
+      }
 
-   PUSH_SPACE(push, vertex->num_elements + 1);
-   BEGIN_NVC0(push, NVC0_3D(VERTEX_ATTRIB_FORMAT(0)), vertex->num_elements);
-   for (i = 0; i < vertex->num_elements; ++i) {
-      ve = &vertex->element[i];
-      vb = &nvc0->vtxbuf[ve->pipe.vertex_buffer_index];
-
-      if (likely(vb->stride) || nvc0->vbo_fifo) {
-         PUSH_DATA(push, ve->state);
-      } else {
-         PUSH_DATA(push, ve->state | NVC0_3D_VERTEX_ATTRIB_FORMAT_CONST);
-         nvc0->vbo_fifo &= ~(1 << i);
+      nvc0->state.num_vtxelts = vertex->num_elements;
+      nvc0->state.constant_vbos = const_vbos;
+      nvc0->state.constant_elts = 0;
+
+      PUSH_SPACE(push, n * 2 + 1);
+      BEGIN_NVC0(push, NVC0_3D(VERTEX_ATTRIB_FORMAT(0)), n);
+      data = push->cur;
+      push->cur += n;
+      for (i = 0; i < vertex->num_elements; ++data, ++i) {
+         ve = &vertex->element[i];
+         *data = ve->state;
+         if (unlikely(const_vbos & (1 << ve->pipe.vertex_buffer_index))) {
+            *data |= NVC0_3D_VERTEX_ATTRIB_FORMAT_CONST;
+            nvc0->state.constant_elts |= 1 << i;
+            IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(i)), 0);
+         }
+      }
+      for (; i < n; ++data, ++i) {
+         IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(i)), 0);
+         *data = NVC0_3D_VERTEX_ATTRIB_INACTIVE;
       }
    }
 
-   PUSH_SPACE(push, vertex->num_elements * 16);
+   PUSH_SPACE(push, vertex->num_elements * 8);
    for (i = 0; i < vertex->num_elements; ++i) {
       struct nv04_resource *res;
       unsigned size, offset;
-      
+
+      if (nvc0->state.constant_elts & (1 << i))
+         continue;
       ve = &vertex->element[i];
       vb = &nvc0->vtxbuf[ve->pipe.vertex_buffer_index];
 
-      if (unlikely(ve->pipe.instance_divisor)) {
-         if (!(nvc0->state.instance_elts & (1 << i))) {
-            IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_PER_INSTANCE(i)), 1);
-         }
-         BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_DIVISOR(i)), 1);
-         PUSH_DATA (push, ve->pipe.instance_divisor);
-      } else
-      if (unlikely(nvc0->state.instance_elts & (1 << i))) {
-         IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_PER_INSTANCE(i)), 0);
-      }
-
       res = nv04_resource(vb->buffer);
-
-      if (nvc0->vbo_fifo || unlikely(vb->stride == 0)) {
-         if (!nvc0->vbo_fifo)
-            nvc0_emit_vtxattr(nvc0, vb, &ve->pipe, i);
-         BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(i)), 1);
-         PUSH_DATA (push, 0);
-         continue;
-      }
-
-      size = vb->buffer->width0;
       offset = ve->pipe.src_offset + vb->buffer_offset;
+      size = vb->buffer->width0;
 
-      BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(i)), 1);
-      PUSH_DATA (push, (1 << 12) | vb->stride);
-      BEGIN_1IC0(push, NVC0_3D(VERTEX_ARRAY_SELECT), 5);
-      PUSH_DATA (push, i);
+      if (unlikely(ve->pipe.instance_divisor)) {
+         BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(i)), 4);
+         PUSH_DATA (push, (1 << 12) | vb->stride);
+         PUSH_DATAh(push, res->address + offset);
+         PUSH_DATA (push, res->address + offset);
+         PUSH_DATA (push, ve->pipe.instance_divisor);
+      } else {
+         BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(i)), 3);
+         PUSH_DATA (push, (1 << 12) | vb->stride);
+         PUSH_DATAh(push, res->address + offset);
+         PUSH_DATA (push, res->address + offset);
+      }
+      BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(i)), 2);
       PUSH_DATAh(push, res->address + size - 1);
       PUSH_DATA (push, res->address + size - 1);
-      PUSH_DATAh(push, res->address + offset);
-      PUSH_DATA (push, res->address + offset);
    }
-   for (; i < nvc0->state.num_vtxelts; ++i) {
-      PUSH_SPACE(push, 5);
-      BEGIN_NVC0(push, NVC0_3D(VERTEX_ATTRIB_FORMAT(i)), 1);
-      PUSH_DATA (push, NVC0_3D_VERTEX_ATTRIB_INACTIVE);
-      if (unlikely(nvc0->state.instance_elts & (1 << i)))
-         IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_PER_INSTANCE(i)), 0);
-      BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(i)), 1);
-      PUSH_DATA (push, 0);
-   }
-
-   nvc0->state.num_vtxelts = vertex->num_elements;
-   nvc0->state.instance_elts = vertex->instance_elts;
 }
 
 void
@@ -393,7 +411,6 @@ nvc0_prim_gl(unsigned prim)
    NVC0_PRIM_GL_CASE(PATCHES); */
    default:
       return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS;
-      break;
    }
 }
 
@@ -666,6 +683,7 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
       push->kick_notify = nvc0_default_kick_notify;
       return;
    }
+   nvc0_update_constant_vertex_attribs(nvc0);
 
    /* space for base instance, flush, and prim restart */
    PUSH_SPACE(push, 8);
@@ -678,19 +696,11 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
    }
 
    if (nvc0->base.vbo_dirty) {
-      BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FLUSH), 1);
-      PUSH_DATA (push, 0);
+      IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_FLUSH), 0);
       nvc0->base.vbo_dirty = FALSE;
    }
 
-   if (unlikely(info->count_from_stream_output)) {
-      nvc0_draw_stream_output(nvc0, info);
-   } else
-   if (!info->indexed) {
-      nvc0_draw_arrays(nvc0,
-                       info->mode, info->start, info->count,
-                       info->instance_count);
-   } else {
+   if (info->indexed) {
       boolean shorten = info->max_index <= 65535;
 
       assert(nvc0->idxbuf.buffer);
@@ -719,6 +729,13 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
       nvc0_draw_elements(nvc0, shorten,
                          info->mode, info->start, info->count,
                          info->instance_count, info->index_bias);
+   } else
+   if (unlikely(info->count_from_stream_output)) {
+      nvc0_draw_stream_output(nvc0, info);
+   } else {
+      nvc0_draw_arrays(nvc0,
+                       info->mode, info->start, info->count,
+                       info->instance_count);
    }
    push->kick_notify = nvc0_default_kick_notify;
 




More information about the mesa-commit mailing list