[Mesa-dev] [PATCH v3] nvc0: serialize before updating some constant buffer bindings

Rhys Perry pendingchaos02 at gmail.com
Thu Jul 26 14:24:49 UTC 2018


To avoid serializing, this has the user constant buffer always be 65536
bytes and enabled unless it's required that something else is used for
constant buffer 0.

Fixes artifacts with at least XCOM: Enemy Within, 0 A.D. and Unigine
Valley, Heaven and Superposition.

v2: changed uniform_buffer_bound to be bool instead of a uint32_t
v3: remove magic constants
v3: remove pointless code in nvc0_validate_driverconst

Fixes: https://bugs.freedesktop.org/show_bug.cgi?id=100177
Signed-off-by: Rhys Perry <pendingchaos02 at gmail.com>
---
 src/gallium/drivers/nouveau/nvc0/nvc0_compute.c    | 13 +++---
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c     | 48 ++++++++++++++++++---
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.h     | 17 +++++++-
 .../drivers/nouveau/nvc0/nvc0_state_validate.c     | 50 ++++++++--------------
 4 files changed, 81 insertions(+), 47 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
index 11635c9465..4963493877 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
@@ -181,7 +181,7 @@ nvc0_compute_invalidate_constbufs(struct nvc0_context *nvc0)
    /* Invalidate all 3D constbufs because they are aliased with COMPUTE. */
    for (s = 0; s < 5; s++) {
       nvc0->constbuf_dirty[s] |= nvc0->constbuf_valid[s];
-      nvc0->state.uniform_buffer_bound[s] = 0;
+      nvc0->state.uniform_buffer_bound[s] = false;
    }
    nvc0->dirty_3d |= NVC0_NEW_3D_CONSTBUF;
 }
@@ -203,19 +203,18 @@ nvc0_compute_validate_constbufs(struct nvc0_context *nvc0)
          assert(i == 0); /* we really only want OpenGL uniforms here */
          assert(nvc0->constbuf[s][0].u.data);
 
-         if (nvc0->state.uniform_buffer_bound[s] < size) {
-            nvc0->state.uniform_buffer_bound[s] = align(size, 0x100);
+         if (!nvc0->state.uniform_buffer_bound[s]) {
+            nvc0->state.uniform_buffer_bound[s] = true;
 
             BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
-            PUSH_DATA (push, nvc0->state.uniform_buffer_bound[s]);
+            PUSH_DATA (push, NVC0_MAX_CONSTBUF_SIZE);
             PUSH_DATAh(push, bo->offset + base);
             PUSH_DATA (push, bo->offset + base);
             BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1);
             PUSH_DATA (push, (0 << 8) | 1);
          }
          nvc0_cb_bo_push(&nvc0->base, bo, NV_VRAM_DOMAIN(&nvc0->screen->base),
-                         base, nvc0->state.uniform_buffer_bound[s],
-                         0, (size + 3) / 4,
+                         base, NVC0_MAX_CONSTBUF_SIZE, 0, (size + 3) / 4,
                          nvc0->constbuf[s][0].u.data);
       } else {
          struct nv04_resource *res =
@@ -236,7 +235,7 @@ nvc0_compute_validate_constbufs(struct nvc0_context *nvc0)
             PUSH_DATA (push, (i << 8) | 0);
          }
          if (i == 0)
-            nvc0->state.uniform_buffer_bound[s] = 0;
+            nvc0->state.uniform_buffer_bound[s] = false;
       }
    }
 
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index 37e10dcd07..bcd30b560b 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -382,7 +382,7 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen,
    case PIPE_SHADER_CAP_MAX_OUTPUTS:
       return 32;
    case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:
-      return 65536;
+      return NVC0_MAX_CONSTBUF_SIZE;
    case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
       return NVC0_MAX_PIPE_CONSTBUFS;
    case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
@@ -829,6 +829,40 @@ nvc0_screen_resize_text_area(struct nvc0_screen *screen, uint64_t size)
    return 0;
 }
 
+void
+nvc0_screen_bind_cb_3d(struct nvc0_screen *screen, bool *can_serialize,
+                       int stage, int index, int size, uint64_t addr)
+{
+   assert(stage != 5);
+
+   struct nouveau_pushbuf *push = screen->base.pushbuf;
+
+   if (screen->base.class_3d >= GM107_3D_CLASS) {
+      struct nvc0_cb_binding *binding = &screen->cb_bindings[stage][index];
+
+      // TODO: Better figure out the conditions in which this is needed
+      bool serialize = binding->addr == addr && binding->size != size;
+      if (can_serialize)
+         serialize = serialize && *can_serialize;
+      if (serialize) {
+         IMMED_NVC0(push, NVC0_3D(SERIALIZE), 0);
+         if (can_serialize)
+            *can_serialize = false;
+      }
+
+      binding->addr = addr;
+      binding->size = size;
+   }
+
+   if (size >= 0) {
+      BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
+      PUSH_DATA (push, size);
+      PUSH_DATAh(push, addr);
+      PUSH_DATA (push, addr);
+   }
+   IMMED_NVC0(push, NVC0_3D(CB_BIND(stage)), (index << 4) | (size >= 0));
+}
+
 #define FAIL_SCREEN_INIT(str, err)                    \
    do {                                               \
       NOUVEAU_ERR(str, err);                          \
@@ -1272,14 +1306,14 @@ nvc0_screen_create(struct nouveau_device *dev)
 
    /* XXX: Compute and 3D are somehow aliased on Fermi. */
    for (i = 0; i < 5; ++i) {
+      unsigned j = 0;
+      for (j = 0; j < 16; j++)
+         screen->cb_bindings[i][j].size = -1;
+
       /* TIC and TSC entries for each unit (nve4+ only) */
       /* auxiliary constants (6 user clip planes, base instance id) */
-      BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
-      PUSH_DATA (push, NVC0_CB_AUX_SIZE);
-      PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(i));
-      PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(i));
-      BEGIN_NVC0(push, NVC0_3D(CB_BIND(i)), 1);
-      PUSH_DATA (push, (15 << 4) | 1);
+      nvc0_screen_bind_cb_3d(screen, NULL, i, 15, NVC0_CB_AUX_SIZE,
+                             screen->uniform_bo->offset + NVC0_CB_AUX_INFO(i));
       if (screen->eng3d->oclass >= NVE4_3D_CLASS) {
          unsigned j;
          BEGIN_1IC0(push, NVC0_3D(CB_POS), 9);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
index efd62a8a41..d8223ba9d5 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
@@ -16,7 +16,9 @@
 #define NVE4_IMG_MAX_HANDLES 512
 
 /* doesn't count driver-reserved slot */
-#define NVC0_MAX_PIPE_CONSTBUFS         15
+#define NVC0_MAX_PIPE_CONSTBUFS 15
+#define NVC0_MAX_CONST_BUFFERS  16
+#define NVC0_MAX_CONSTBUF_SIZE  65536
 
 #define NVC0_MAX_SURFACE_SLOTS 16
 
@@ -53,12 +55,17 @@ struct nvc0_graph_state {
    uint8_t tls_required; /* bitmask of shader types using l[] */
    uint8_t clip_enable;
    uint32_t clip_mode;
-   uint32_t uniform_buffer_bound[6];
+   bool uniform_buffer_bound[6];
    struct nvc0_transform_feedback_state *tfb;
    bool seamless_cube_map;
    bool post_depth_coverage;
 };
 
+struct nvc0_cb_binding {
+   uint64_t addr;
+   int size;
+};
+
 struct nvc0_screen {
    struct nouveau_screen base;
 
@@ -114,6 +121,9 @@ struct nvc0_screen {
       bool mp_counters_enabled;
    } pm;
 
+   /* only maintained on Maxwell+ */
+   struct nvc0_cb_binding cb_bindings[5][NVC0_MAX_CONST_BUFFERS];
+
    struct nouveau_object *eng3d; /* sqrt(1/2)|kepler> + sqrt(1/2)|fermi> */
    struct nouveau_object *eng2d;
    struct nouveau_object *m2mf;
@@ -146,6 +156,9 @@ int nvc0_screen_compute_setup(struct nvc0_screen *, struct nouveau_pushbuf *);
 
 int nvc0_screen_resize_text_area(struct nvc0_screen *, uint64_t);
 
+// 3D Only
+void nvc0_screen_bind_cb_3d(struct nvc0_screen *, bool *, int, int, int, uint64_t);
+
 static inline void
 nvc0_resource_fence(struct nv04_resource *res, uint32_t flags)
 {
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
index cc18f41c4b..4f004a4f70 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
@@ -565,9 +565,10 @@ nvc0_validate_rasterizer(struct nvc0_context *nvc0)
 static void
 nvc0_constbufs_validate(struct nvc0_context *nvc0)
 {
-   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
    unsigned s;
 
+   bool can_serialize = true;
+
    for (s = 0; s < 5; ++s) {
       while (nvc0->constbuf_dirty[s]) {
          int i = ffs(nvc0->constbuf_dirty[s]) - 1;
@@ -580,41 +581,34 @@ nvc0_constbufs_validate(struct nvc0_context *nvc0)
             assert(i == 0); /* we really only want OpenGL uniforms here */
             assert(nvc0->constbuf[s][0].u.data);
 
-            if (nvc0->state.uniform_buffer_bound[s] < size) {
-               nvc0->state.uniform_buffer_bound[s] = align(size, 0x100);
+            if (!nvc0->state.uniform_buffer_bound[s]) {
+               nvc0->state.uniform_buffer_bound[s] = true;
 
-               BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
-               PUSH_DATA (push, nvc0->state.uniform_buffer_bound[s]);
-               PUSH_DATAh(push, bo->offset + base);
-               PUSH_DATA (push, bo->offset + base);
-               BEGIN_NVC0(push, NVC0_3D(CB_BIND(s)), 1);
-               PUSH_DATA (push, (0 << 4) | 1);
+               nvc0_screen_bind_cb_3d(nvc0->screen, &can_serialize, s, i,
+                                      NVC0_MAX_CONSTBUF_SIZE, bo->offset + base);
             }
             nvc0_cb_bo_push(&nvc0->base, bo, NV_VRAM_DOMAIN(&nvc0->screen->base),
-                         base, nvc0->state.uniform_buffer_bound[s],
+                         base, NVC0_MAX_CONSTBUF_SIZE,
                          0, (size + 3) / 4,
                          nvc0->constbuf[s][0].u.data);
          } else {
             struct nv04_resource *res =
                nv04_resource(nvc0->constbuf[s][i].u.buf);
             if (res) {
-               BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
-               PUSH_DATA (push, nvc0->constbuf[s][i].size);
-               PUSH_DATAh(push, res->address + nvc0->constbuf[s][i].offset);
-               PUSH_DATA (push, res->address + nvc0->constbuf[s][i].offset);
-               BEGIN_NVC0(push, NVC0_3D(CB_BIND(s)), 1);
-               PUSH_DATA (push, (i << 4) | 1);
+               nvc0_screen_bind_cb_3d(nvc0->screen, &can_serialize, s, i,
+                                      nvc0->constbuf[s][i].size,
+                                      res->address + nvc0->constbuf[s][i].offset);
 
                BCTX_REFN(nvc0->bufctx_3d, 3D_CB(s, i), res, RD);
 
                nvc0->cb_dirty = 1; /* Force cache flush for UBO. */
                res->cb_bindings[s] |= 1 << i;
-            } else {
-               BEGIN_NVC0(push, NVC0_3D(CB_BIND(s)), 1);
-               PUSH_DATA (push, (i << 4) | 0);
+
+               if (i == 0)
+                  nvc0->state.uniform_buffer_bound[s] = false;
+            } else if (i != 0) {
+               nvc0_screen_bind_cb_3d(nvc0->screen, &can_serialize, s, i, -1, 0);
             }
-            if (i == 0)
-               nvc0->state.uniform_buffer_bound[s] = 0;
          }
       }
    }
@@ -623,7 +617,7 @@ nvc0_constbufs_validate(struct nvc0_context *nvc0)
       /* Invalidate all COMPUTE constbufs because they are aliased with 3D. */
       nvc0->dirty_cp |= NVC0_NEW_CP_CONSTBUF;
       nvc0->constbuf_dirty[5] |= nvc0->constbuf_valid[5];
-      nvc0->state.uniform_buffer_bound[5] = 0;
+      nvc0->state.uniform_buffer_bound[5] = false;
    }
 }
 
@@ -710,18 +704,12 @@ nvc0_validate_min_samples(struct nvc0_context *nvc0)
 static void
 nvc0_validate_driverconst(struct nvc0_context *nvc0)
 {
-   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
    struct nvc0_screen *screen = nvc0->screen;
    int i;
 
-   for (i = 0; i < 5; ++i) {
-      BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
-      PUSH_DATA (push, NVC0_CB_AUX_SIZE);
-      PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(i));
-      PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(i));
-      BEGIN_NVC0(push, NVC0_3D(CB_BIND(i)), 1);
-      PUSH_DATA (push, (15 << 4) | 1);
-   }
+   for (i = 0; i < 5; ++i)
+      nvc0_screen_bind_cb_3d(screen, NULL, i, 15, NVC0_CB_AUX_SIZE,
+                             screen->uniform_bo->offset + NVC0_CB_AUX_INFO(i));
 
    nvc0->dirty_cp |= NVC0_NEW_CP_DRIVERCONST;
 }
-- 
2.14.4



More information about the mesa-dev mailing list