[Mesa-dev] [PATCH] nvc0: serialize before updating some constant buffer bindings

Ilia Mirkin imirkin at alum.mit.edu
Wed Jul 25 13:55:31 UTC 2018


On Wed, Jul 25, 2018 at 9:13 AM, Rhys Perry <pendingchaos02 at gmail.com> wrote:
> To avoid serializing, this has the user constant buffer always be 65536
> bytes and enabled unless it's required that something else is used for
> constant buffer 0.
>
> Fixes artifacts with at least XCOM: Enemy Within, 0 A.D. and Unigine
> Valley, Heaven and Superposition.
>
> v2: changed uniform_buffer_bound to be bool instead of a uint32_t
>
> Fixes: https://bugs.freedesktop.org/show_bug.cgi?id=100177
> Signed-off-by: Rhys Perry <pendingchaos02 at gmail.com>
> ---
>  src/gallium/drivers/nouveau/nvc0/nvc0_compute.c    | 13 +++---
>  src/gallium/drivers/nouveau/nvc0/nvc0_screen.c     | 46 +++++++++++++++++++---
>  src/gallium/drivers/nouveau/nvc0/nvc0_screen.h     | 13 +++++-
>  .../drivers/nouveau/nvc0/nvc0_state_validate.c     | 40 +++++++++----------
>  4 files changed, 76 insertions(+), 36 deletions(-)
>
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
> index 11635c9465..578bf11d70 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
> @@ -181,7 +181,7 @@ nvc0_compute_invalidate_constbufs(struct nvc0_context *nvc0)
>     /* Invalidate all 3D constbufs because they are aliased with COMPUTE. */
>     for (s = 0; s < 5; s++) {
>        nvc0->constbuf_dirty[s] |= nvc0->constbuf_valid[s];
> -      nvc0->state.uniform_buffer_bound[s] = 0;
> +      nvc0->state.uniform_buffer_bound[s] = false;
>     }
>     nvc0->dirty_3d |= NVC0_NEW_3D_CONSTBUF;
>  }
> @@ -203,19 +203,18 @@ nvc0_compute_validate_constbufs(struct nvc0_context *nvc0)
>           assert(i == 0); /* we really only want OpenGL uniforms here */
>           assert(nvc0->constbuf[s][0].u.data);
>
> -         if (nvc0->state.uniform_buffer_bound[s] < size) {
> -            nvc0->state.uniform_buffer_bound[s] = align(size, 0x100);
> +         if (!nvc0->state.uniform_buffer_bound[s]) {

Above:

   const int s = 5;

That won't end well. Bump the allocation to 6?

> +            nvc0->state.uniform_buffer_bound[s] = true;
>
>              BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
> -            PUSH_DATA (push, nvc0->state.uniform_buffer_bound[s]);
> +            PUSH_DATA (push, 65536);

Please add a #define NVC0_MAX_CONSTBUF_SIZE 65536 and usage thereof
instead of the magic constant, and use here and below.

>              PUSH_DATAh(push, bo->offset + base);
>              PUSH_DATA (push, bo->offset + base);
>              BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1);
>              PUSH_DATA (push, (0 << 8) | 1);
>           }
>           nvc0_cb_bo_push(&nvc0->base, bo, NV_VRAM_DOMAIN(&nvc0->screen->base),
> -                         base, nvc0->state.uniform_buffer_bound[s],
> -                         0, (size + 3) / 4,
> +                         base, 65536, 0, (size + 3) / 4,
>                           nvc0->constbuf[s][0].u.data);
>        } else {
>           struct nv04_resource *res =
> @@ -236,7 +235,7 @@ nvc0_compute_validate_constbufs(struct nvc0_context *nvc0)
>              PUSH_DATA (push, (i << 8) | 0);
>           }
>           if (i == 0)
> -            nvc0->state.uniform_buffer_bound[s] = 0;
> +            nvc0->state.uniform_buffer_bound[s] = false;
>        }
>     }
>
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
> index 37e10dcd07..197b0c1911 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
> @@ -829,6 +829,40 @@ nvc0_screen_resize_text_area(struct nvc0_screen *screen, uint64_t size)
>     return 0;
>  }
>
> +void
> +nvc0_screen_bind_cb_3d(struct nvc0_screen *screen, bool *can_serialize,
> +                       int stage, int index, int size, uint64_t addr)
> +{
> +   assert(stage != 5);
> +
> +   struct nouveau_pushbuf *push = screen->base.pushbuf;
> +
> +   if (screen->base.class_3d >= GM107_3D_CLASS) {
> +      struct nvc0_cb_binding *binding = &screen->cb_bindings[stage][index];
> +
> +      // TODO: Better figure out the conditions in which this is needed
> +      bool serialize = binding->addr == addr && binding->size != size;
> +      if (can_serialize)
> +         serialize = serialize && *can_serialize;
> +      if (serialize) {
> +         IMMED_NVC0(push, NVC0_3D(SERIALIZE), 0);
> +         if (can_serialize)
> +            *can_serialize = false;
> +      }
> +
> +      binding->addr = addr;
> +      binding->size = size;
> +   }
> +
> +   if (size >= 0) {
> +      BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
> +      PUSH_DATA (push, size);
> +      PUSH_DATAh(push, addr);
> +      PUSH_DATA (push, addr);
> +   }
> +   IMMED_NVC0(push, NVC0_3D(CB_BIND(stage)), (index << 4) | (size >= 0));
> +}
> +
>  #define FAIL_SCREEN_INIT(str, err)                    \
>     do {                                               \
>        NOUVEAU_ERR(str, err);                          \
> @@ -1272,14 +1306,14 @@ nvc0_screen_create(struct nouveau_device *dev)
>
>     /* XXX: Compute and 3D are somehow aliased on Fermi. */
>     for (i = 0; i < 5; ++i) {
> +      unsigned j = 0;
> +      for (j = 0; j < 16; j++)
> +         screen->cb_bindings[i][j].size = -1;
> +
>        /* TIC and TSC entries for each unit (nve4+ only) */
>        /* auxiliary constants (6 user clip planes, base instance id) */
> -      BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
> -      PUSH_DATA (push, NVC0_CB_AUX_SIZE);
> -      PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(i));
> -      PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(i));
> -      BEGIN_NVC0(push, NVC0_3D(CB_BIND(i)), 1);
> -      PUSH_DATA (push, (15 << 4) | 1);
> +      nvc0_screen_bind_cb_3d(screen, NULL, i, 15, NVC0_CB_AUX_SIZE,
> +                             screen->uniform_bo->offset + NVC0_CB_AUX_INFO(i));
>        if (screen->eng3d->oclass >= NVE4_3D_CLASS) {
>           unsigned j;
>           BEGIN_1IC0(push, NVC0_3D(CB_POS), 9);
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
> index efd62a8a41..d088db984a 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
> @@ -53,12 +53,17 @@ struct nvc0_graph_state {
>     uint8_t tls_required; /* bitmask of shader types using l[] */
>     uint8_t clip_enable;
>     uint32_t clip_mode;
> -   uint32_t uniform_buffer_bound[6];
> +   bool uniform_buffer_bound[6];
>     struct nvc0_transform_feedback_state *tfb;
>     bool seamless_cube_map;
>     bool post_depth_coverage;
>  };
>
> +struct nvc0_cb_binding {
> +   uint64_t addr;
> +   int size;
> +};
> +
>  struct nvc0_screen {
>     struct nouveau_screen base;
>
> @@ -114,6 +119,9 @@ struct nvc0_screen {
>        bool mp_counters_enabled;
>     } pm;
>
> +   /* only maintained on Maxwell+ */
> +   struct nvc0_cb_binding cb_bindings[5][16];

I hope that there's a define for 16.... NVC0_MAX_CONST_BUFFERS or
something along those lines. Use it.

> +
>     struct nouveau_object *eng3d; /* sqrt(1/2)|kepler> + sqrt(1/2)|fermi> */
>     struct nouveau_object *eng2d;
>     struct nouveau_object *m2mf;
> @@ -146,6 +154,9 @@ int nvc0_screen_compute_setup(struct nvc0_screen *, struct nouveau_pushbuf *);
>
>  int nvc0_screen_resize_text_area(struct nvc0_screen *, uint64_t);
>
> +// 3D Only
> +void nvc0_screen_bind_cb_3d(struct nvc0_screen *, bool *, int, int, int, uint64_t);
> +
>  static inline void
>  nvc0_resource_fence(struct nv04_resource *res, uint32_t flags)
>  {
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
> index cc18f41c4b..d95522a1d3 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
> @@ -565,9 +565,10 @@ nvc0_validate_rasterizer(struct nvc0_context *nvc0)
>  static void
>  nvc0_constbufs_validate(struct nvc0_context *nvc0)
>  {
> -   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
>     unsigned s;
>
> +   bool can_serialize = true;
> +
>     for (s = 0; s < 5; ++s) {
>        while (nvc0->constbuf_dirty[s]) {
>           int i = ffs(nvc0->constbuf_dirty[s]) - 1;
> @@ -580,41 +581,34 @@ nvc0_constbufs_validate(struct nvc0_context *nvc0)
>              assert(i == 0); /* we really only want OpenGL uniforms here */
>              assert(nvc0->constbuf[s][0].u.data);
>
> -            if (nvc0->state.uniform_buffer_bound[s] < size) {
> -               nvc0->state.uniform_buffer_bound[s] = align(size, 0x100);
> +            if (!nvc0->state.uniform_buffer_bound[s]) {
> +               nvc0->state.uniform_buffer_bound[s] = true;
>
> -               BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
> -               PUSH_DATA (push, nvc0->state.uniform_buffer_bound[s]);
> -               PUSH_DATAh(push, bo->offset + base);
> -               PUSH_DATA (push, bo->offset + base);
> -               BEGIN_NVC0(push, NVC0_3D(CB_BIND(s)), 1);
> -               PUSH_DATA (push, (0 << 4) | 1);
> +               nvc0_screen_bind_cb_3d(nvc0->screen, &can_serialize, s, i,
> +                                      65536, bo->offset + base);

Same comment as above about the magic 65536.

>              }
>              nvc0_cb_bo_push(&nvc0->base, bo, NV_VRAM_DOMAIN(&nvc0->screen->base),
> -                         base, nvc0->state.uniform_buffer_bound[s],
> +                         base, 65536,
>                           0, (size + 3) / 4,
>                           nvc0->constbuf[s][0].u.data);
>           } else {
>              struct nv04_resource *res =
>                 nv04_resource(nvc0->constbuf[s][i].u.buf);
>              if (res) {
> -               BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
> -               PUSH_DATA (push, nvc0->constbuf[s][i].size);
> -               PUSH_DATAh(push, res->address + nvc0->constbuf[s][i].offset);
> -               PUSH_DATA (push, res->address + nvc0->constbuf[s][i].offset);
> -               BEGIN_NVC0(push, NVC0_3D(CB_BIND(s)), 1);
> -               PUSH_DATA (push, (i << 4) | 1);
> +               nvc0_screen_bind_cb_3d(nvc0->screen, &can_serialize, s, i,
> +                                      nvc0->constbuf[s][i].size,
> +                                      res->address + nvc0->constbuf[s][i].offset);
>
>                 BCTX_REFN(nvc0->bufctx_3d, 3D_CB(s, i), res, RD);
>
>                 nvc0->cb_dirty = 1; /* Force cache flush for UBO. */
>                 res->cb_bindings[s] |= 1 << i;
> -            } else {
> -               BEGIN_NVC0(push, NVC0_3D(CB_BIND(s)), 1);
> -               PUSH_DATA (push, (i << 4) | 0);
> +
> +               if (i == 0)
> +                  nvc0->state.uniform_buffer_bound[s] = false;
> +            } else if (i != 0) {
> +               nvc0_screen_bind_cb_3d(nvc0->screen, &can_serialize, s, i, -1, 0);
>              }
> -            if (i == 0)
> -               nvc0->state.uniform_buffer_bound[s] = 0;
>           }
>        }
>     }
> @@ -623,7 +617,7 @@ nvc0_constbufs_validate(struct nvc0_context *nvc0)
>        /* Invalidate all COMPUTE constbufs because they are aliased with 3D. */
>        nvc0->dirty_cp |= NVC0_NEW_CP_CONSTBUF;
>        nvc0->constbuf_dirty[5] |= nvc0->constbuf_valid[5];
> -      nvc0->state.uniform_buffer_bound[5] = 0;
> +      nvc0->state.uniform_buffer_bound[5] = false;
>     }
>  }
>
> @@ -721,6 +715,8 @@ nvc0_validate_driverconst(struct nvc0_context *nvc0)
>        PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(i));
>        BEGIN_NVC0(push, NVC0_3D(CB_BIND(i)), 1);
>        PUSH_DATA (push, (15 << 4) | 1);
> +      nvc0_screen_bind_cb_3d(screen, NULL, i, 15, NVC0_CB_AUX_SIZE,
> +                             screen->uniform_bo->offset + NVC0_CB_AUX_INFO(i));

Doesn't the bind_cb_3d call mean you can remove some of the above lines?

>     }
>
>     nvc0->dirty_cp |= NVC0_NEW_CP_DRIVERCONST;
> --
> 2.14.4
>


More information about the mesa-dev mailing list