[Mesa-dev] [PATCH 3/3] nvc0: refactor textures/samplers validation

Samuel Pitoiset samuel.pitoiset at gmail.com
Wed Oct 26 20:00:11 UTC 2016


The first goal is to reduce code duplication between 3d and
compute and increase readability of that area.

This refactoring also tries to reduce the number of commands
send through the pushbuffer and to not invalidate all caches
when binding new textures/samplers. Although I don't see any
improvements with Elemental but this might help in some cases.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
---
 src/gallium/drivers/nouveau/nvc0/nvc0_compute.c |  12 +-
 src/gallium/drivers/nouveau/nvc0/nvc0_context.h |   7 +-
 src/gallium/drivers/nouveau/nvc0/nvc0_tex.c     | 159 ++++++++++++++----------
 src/gallium/drivers/nouveau/nvc0/nve4_compute.c |  98 ++-------------
 4 files changed, 113 insertions(+), 163 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
index 11635c9..041cf1c 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
@@ -143,11 +143,7 @@ nvc0_screen_compute_setup(struct nvc0_screen *screen,
 static void
 nvc0_compute_validate_samplers(struct nvc0_context *nvc0)
 {
-   bool need_flush = nvc0_validate_tsc(nvc0, 5);
-   if (need_flush) {
-      BEGIN_NVC0(nvc0->base.pushbuf, NVC0_CP(TSC_FLUSH), 1);
-      PUSH_DATA (nvc0->base.pushbuf, 0);
-   }
+   nvc0_validate_tsc(nvc0, 5);
 
    /* Invalidate all 3D samplers because they are aliased. */
    for (int s = 0; s < 5; s++)
@@ -158,11 +154,7 @@ nvc0_compute_validate_samplers(struct nvc0_context *nvc0)
 static void
 nvc0_compute_validate_textures(struct nvc0_context *nvc0)
 {
-   bool need_flush = nvc0_validate_tic(nvc0, 5);
-   if (need_flush) {
-      BEGIN_NVC0(nvc0->base.pushbuf, NVC0_CP(TIC_FLUSH), 1);
-      PUSH_DATA (nvc0->base.pushbuf, 0);
-   }
+   nvc0_validate_tic(nvc0, 5);
 
    /* Invalidate all 3D textures because they are aliased. */
    for (int s = 0; s < 5; s++) {
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
index 37aecae..8750edc 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
@@ -330,9 +330,10 @@ extern void nvc0_clear(struct pipe_context *, unsigned buffers,
 extern void nvc0_init_surface_functions(struct nvc0_context *);
 
 /* nvc0_tex.c */
-bool nvc0_validate_tic(struct nvc0_context *nvc0, int s);
-bool nvc0_validate_tsc(struct nvc0_context *nvc0, int s);
-bool nve4_validate_tsc(struct nvc0_context *nvc0, int s);
+void nvc0_validate_tic(struct nvc0_context *nvc0, int s);
+void nvc0_validate_tsc(struct nvc0_context *nvc0, int s);
+void nve4_validate_tic(struct nvc0_context *nvc0, int s);
+void nve4_validate_tsc(struct nvc0_context *nvc0, int s);
 void nvc0_validate_suf(struct nvc0_context *nvc0, int s);
 void nvc0_validate_textures(struct nvc0_context *);
 void nvc0_validate_samplers(struct nvc0_context *);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
index 23c9daa..4f6788c 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
@@ -24,6 +24,7 @@
 #include "nvc0/nvc0_resource.h"
 #include "nvc0/gm107_texture.xml.h"
 #include "nvc0/nvc0_compute.xml.h"
+#include "nvc0/nve4_compute.xml.h"
 #include "nv50/g80_texture.xml.h"
 #include "nv50/g80_defs.xml.h"
 
@@ -468,14 +469,13 @@ nvc0_update_tic(struct nvc0_context *nvc0, struct nv50_tic_entry *tic,
    tic->tic[2] |= address >> 32;
 }
 
-bool
+void
 nvc0_validate_tic(struct nvc0_context *nvc0, int s)
 {
-   uint32_t commands[32];
    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+   uint32_t commands[3][16];
+   unsigned n[3] = { 0, 0, 0 };
    unsigned i;
-   unsigned n = 0;
-   bool need_flush = false;
 
    for (i = 0; i < nvc0->num_textures[s]; ++i) {
       struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]);
@@ -484,7 +484,7 @@ nvc0_validate_tic(struct nvc0_context *nvc0, int s)
 
       if (!tic) {
          if (dirty)
-            commands[n++] = (i << 1) | 0;
+            commands[0][n[0]++] = (i << 1) | 0;
          continue;
       }
       res = nv04_resource(tic->pipe.texture);
@@ -496,15 +496,11 @@ nvc0_validate_tic(struct nvc0_context *nvc0, int s)
          nvc0_m2mf_push_linear(&nvc0->base, nvc0->screen->txc, tic->id * 32,
                                NV_VRAM_DOMAIN(&nvc0->screen->base), 32,
                                tic->tic);
-         need_flush = true;
+
+         commands[1][n[1]++] = (tic->id << 4) | 1;
       } else
       if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
-         if (unlikely(s == 5))
-            BEGIN_NVC0(push, NVC0_CP(TEX_CACHE_CTL), 1);
-         else
-            BEGIN_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 1);
-         PUSH_DATA (push, (tic->id << 4) | 1);
-         NOUVEAU_DRV_STAT(&nvc0->screen->base, tex_cache_flush_count, 1);
+         commands[2][n[2]++] = (tic->id << 4) | 1;
       }
       nvc0_screen_tic_lock(nvc0->screen, tic);
 
@@ -513,7 +509,7 @@ nvc0_validate_tic(struct nvc0_context *nvc0, int s)
 
       if (!dirty)
          continue;
-      commands[n++] = (tic->id << 9) | (i << 1) | 1;
+      commands[0][n[0]++] = (tic->id << 9) | (i << 1) | 1;
 
       if (unlikely(s == 5))
          BCTX_REFN(nvc0->bufctx_cp, CP_TEX(i), res, RD);
@@ -521,28 +517,41 @@ nvc0_validate_tic(struct nvc0_context *nvc0, int s)
          BCTX_REFN(nvc0->bufctx_3d, 3D_TEX(s, i), res, RD);
    }
    for (; i < nvc0->state.num_textures[s]; ++i)
-      commands[n++] = (i << 1) | 0;
+      commands[0][n[0]++] = (i << 1) | 0;
 
    nvc0->state.num_textures[s] = nvc0->num_textures[s];
 
-   if (n) {
+   if (n[0]) {
       if (unlikely(s == 5))
-         BEGIN_NIC0(push, NVC0_CP(BIND_TIC), n);
+         BEGIN_NIC0(push, NVC0_CP(BIND_TIC), n[0]);
       else
-         BEGIN_NIC0(push, NVC0_3D(BIND_TIC(s)), n);
-      PUSH_DATAp(push, commands, n);
+         BEGIN_NIC0(push, NVC0_3D(BIND_TIC(s)), n[0]);
+      PUSH_DATAp(push, commands[0], n[0]);
+   }
+   if (n[1]) {
+      if (unlikely(s == 5))
+         BEGIN_NIC0(push, NVC0_CP(TIC_FLUSH), n[1]);
+      else
+         BEGIN_NIC0(push, NVC0_3D(TIC_FLUSH), n[1]);
+      PUSH_DATAp(push, commands[1], n[1]);
+   }
+   if (n[2]) {
+      if (unlikely(s == 5))
+         BEGIN_NIC0(push, NVC0_CP(TEX_CACHE_CTL), n[2]);
+      else
+         BEGIN_NIC0(push, NVC0_3D(TEX_CACHE_CTL), n[2]);
+      PUSH_DATAp(push, commands[2], n[2]);
    }
    nvc0->textures_dirty[s] = 0;
-
-   return need_flush;
 }
 
-static bool
-nve4_validate_tic(struct nvc0_context *nvc0, unsigned s)
+void
+nve4_validate_tic(struct nvc0_context *nvc0, int s)
 {
    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+   uint32_t commands[2][PIPE_MAX_SAMPLERS];
+   unsigned n[2] = { 0, 0 };
    unsigned i;
-   bool need_flush = false;
 
    for (i = 0; i < nvc0->num_textures[s]; ++i) {
       struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]);
@@ -562,11 +571,11 @@ nve4_validate_tic(struct nvc0_context *nvc0, unsigned s)
          nve4_p2mf_push_linear(&nvc0->base, nvc0->screen->txc, tic->id * 32,
                                NV_VRAM_DOMAIN(&nvc0->screen->base), 32,
                                tic->tic);
-         need_flush = true;
+
+         commands[0][n[0]++] = (tic->id << 4) | 1;
       } else
       if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
-         BEGIN_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 1);
-         PUSH_DATA (push, (tic->id << 4) | 1);
+         commands[1][n[1]++] = (tic->id << 4) | 1;
       }
       nvc0_screen_tic_lock(nvc0->screen, tic);
 
@@ -575,34 +584,46 @@ nve4_validate_tic(struct nvc0_context *nvc0, unsigned s)
 
       nvc0->tex_handles[s][i] &= ~NVE4_TIC_ENTRY_INVALID;
       nvc0->tex_handles[s][i] |= tic->id;
-      if (dirty)
-         BCTX_REFN(nvc0->bufctx_3d, 3D_TEX(s, i), res, RD);
+
+      if (dirty) {
+         if (unlikely(s == 5))
+            BCTX_REFN(nvc0->bufctx_cp, CP_TEX(i), res, RD);
+         else
+            BCTX_REFN(nvc0->bufctx_3d, 3D_TEX(s, i), res, RD);
+      }
    }
    for (; i < nvc0->state.num_textures[s]; ++i) {
       nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID;
       nvc0->textures_dirty[s] |= 1 << i;
    }
 
-   nvc0->state.num_textures[s] = nvc0->num_textures[s];
+   if (n[0]) {
+      if (unlikely(s == 5))
+         BEGIN_NIC0(push, NVE4_CP(TIC_FLUSH), n[0]);
+      else
+         BEGIN_NIC0(push, NVC0_3D(TIC_FLUSH), n[0]);
+      PUSH_DATAp(push, commands[0], n[0]);
+   }
+   if (n[1]) {
+      if (unlikely(s == 5))
+         BEGIN_NIC0(push, NVE4_CP(TEX_CACHE_CTL), n[1]);
+      else
+         BEGIN_NIC0(push, NVC0_3D(TEX_CACHE_CTL), n[1]);
+      PUSH_DATAp(push, commands[1], n[1]);
+   }
 
-   return need_flush;
+   nvc0->state.num_textures[s] = nvc0->num_textures[s];
 }
 
 void nvc0_validate_textures(struct nvc0_context *nvc0)
 {
-   bool need_flush = false;
    int i;
 
    for (i = 0; i < 5; i++) {
       if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS)
-         need_flush |= nve4_validate_tic(nvc0, i);
+         nve4_validate_tic(nvc0, i);
       else
-         need_flush |= nvc0_validate_tic(nvc0, i);
-   }
-
-   if (need_flush) {
-      BEGIN_NVC0(nvc0->base.pushbuf, NVC0_3D(TIC_FLUSH), 1);
-      PUSH_DATA (nvc0->base.pushbuf, 0);
+         nvc0_validate_tic(nvc0, i);
    }
 
    /* Invalidate all CP textures because they are aliased. */
@@ -612,14 +633,13 @@ void nvc0_validate_textures(struct nvc0_context *nvc0)
    nvc0->dirty_cp |= NVC0_NEW_CP_TEXTURES;
 }
 
-bool
+void
 nvc0_validate_tsc(struct nvc0_context *nvc0, int s)
 {
-   uint32_t commands[16];
    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+   uint32_t commands[2][16];
+   unsigned n[2] = { 0, 0 };
    unsigned i;
-   unsigned n = 0;
-   bool need_flush = false;
 
    for (i = 0; i < nvc0->num_samplers[s]; ++i) {
       struct nv50_tsc_entry *tsc = nv50_tsc_entry(nvc0->samplers[s][i]);
@@ -627,7 +647,7 @@ nvc0_validate_tsc(struct nvc0_context *nvc0, int s)
       if (!(nvc0->samplers_dirty[s] & (1 << i)))
          continue;
       if (!tsc) {
-         commands[n++] = (i << 4) | 0;
+         commands[0][n[0]++] = (i << 4) | 0;
          continue;
       }
       nvc0->seamless_cube_map = tsc->seamless_cube_map;
@@ -637,34 +657,42 @@ nvc0_validate_tsc(struct nvc0_context *nvc0, int s)
          nvc0_m2mf_push_linear(&nvc0->base, nvc0->screen->txc,
                                65536 + tsc->id * 32, NV_VRAM_DOMAIN(&nvc0->screen->base),
                                32, tsc->tsc);
-         need_flush = true;
+
+         commands[1][n[1]++] = (tsc->id << 4) | 1;
       }
       nvc0_screen_tsc_lock(nvc0->screen, tsc);
 
-      commands[n++] = (tsc->id << 12) | (i << 4) | 1;
+      commands[0][n[0]++] = (tsc->id << 12) | (i << 4) | 1;
    }
    for (; i < nvc0->state.num_samplers[s]; ++i)
-      commands[n++] = (i << 4) | 0;
+      commands[0][n[0]++] = (i << 4) | 0;
 
    nvc0->state.num_samplers[s] = nvc0->num_samplers[s];
 
-   if (n) {
+   if (n[0]) {
       if (unlikely(s == 5))
-         BEGIN_NIC0(push, NVC0_CP(BIND_TSC), n);
+         BEGIN_NIC0(push, NVC0_CP(BIND_TSC), n[0]);
       else
-         BEGIN_NIC0(push, NVC0_3D(BIND_TSC(s)), n);
-      PUSH_DATAp(push, commands, n);
+         BEGIN_NIC0(push, NVC0_3D(BIND_TSC(s)), n[0]);
+      PUSH_DATAp(push, commands[0], n[0]);
+   }
+   if (n[1]) {
+      if (unlikely(s == 5))
+         BEGIN_NIC0(push, NVC0_CP(TSC_FLUSH), n[1]);
+      else
+         BEGIN_NIC0(push, NVC0_3D(TSC_FLUSH), n[1]);
+      PUSH_DATAp(push, commands[1], n[1]);
    }
    nvc0->samplers_dirty[s] = 0;
-
-   return need_flush;
 }
 
-bool
+void
 nve4_validate_tsc(struct nvc0_context *nvc0, int s)
 {
+   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+   uint32_t commands[PIPE_MAX_SAMPLERS];
+   unsigned n = 0;
    unsigned i;
-   bool need_flush = false;
 
    for (i = 0; i < nvc0->num_samplers[s]; ++i) {
       struct nv50_tsc_entry *tsc = nv50_tsc_entry(nvc0->samplers[s][i]);
@@ -680,7 +708,8 @@ nve4_validate_tsc(struct nvc0_context *nvc0, int s)
                                65536 + tsc->id * 32,
                                NV_VRAM_DOMAIN(&nvc0->screen->base),
                                32, tsc->tsc);
-         need_flush = true;
+
+         commands[n++] = (tsc->id << 4) | 1;
       }
       nvc0_screen_tsc_lock(nvc0->screen, tsc);
 
@@ -692,26 +721,26 @@ nve4_validate_tsc(struct nvc0_context *nvc0, int s)
       nvc0->samplers_dirty[s] |= 1 << i;
    }
 
-   nvc0->state.num_samplers[s] = nvc0->num_samplers[s];
+   if (n) {
+      if (unlikely(s == 5))
+         BEGIN_NIC0(push, NVE4_CP(TSC_FLUSH), n);
+      else
+         BEGIN_NIC0(push, NVC0_3D(TSC_FLUSH), n);
+      PUSH_DATAp(push, commands, n);
+   }
 
-   return need_flush;
+   nvc0->state.num_samplers[s] = nvc0->num_samplers[s];
 }
 
 void nvc0_validate_samplers(struct nvc0_context *nvc0)
 {
-   bool need_flush = false;
    int i;
 
    for (i = 0; i < 5; i++) {
       if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS)
-         need_flush |= nve4_validate_tsc(nvc0, i);
+         nve4_validate_tsc(nvc0, i);
       else
-         need_flush |= nvc0_validate_tsc(nvc0, i);
-   }
-
-   if (need_flush) {
-      BEGIN_NVC0(nvc0->base.pushbuf, NVC0_3D(TSC_FLUSH), 1);
-      PUSH_DATA (nvc0->base.pushbuf, 0);
+         nvc0_validate_tsc(nvc0, i);
    }
 
    /* Invalidate all CP samplers because they are aliased. */
diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
index 26732a1..9947b37 100644
--- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
+++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
@@ -276,15 +276,10 @@ nve4_compute_validate_surfaces(struct nvc0_context *nvc0)
    }
 }
 
-/* Thankfully, textures with samplers follow the normal rules. */
 static void
 nve4_compute_validate_samplers(struct nvc0_context *nvc0)
 {
-   bool need_flush = nve4_validate_tsc(nvc0, 5);
-   if (need_flush) {
-      BEGIN_NVC0(nvc0->base.pushbuf, NVE4_CP(TSC_FLUSH), 1);
-      PUSH_DATA (nvc0->base.pushbuf, 0);
-   }
+   nve4_validate_tsc(nvc0, 5);
 
    /* Invalidate all 3D samplers because they are aliased. */
    for (int s = 0; s < 5; s++)
@@ -292,13 +287,18 @@ nve4_compute_validate_samplers(struct nvc0_context *nvc0)
    nvc0->dirty_3d |= NVC0_NEW_3D_SAMPLERS;
 }
 
-/* (Code duplicated at bottom for various non-convincing reasons.
- *  E.g. we might want to use the COMPUTE subchannel to upload TIC/TSC
- *  entries to avoid a subchannel switch.
- *  Same for texture cache flushes.
- *  Also, the bufctx differs, and more IFs in the 3D version looks ugly.)
- */
-static void nve4_compute_validate_textures(struct nvc0_context *);
+static void
+nve4_compute_validate_textures(struct nvc0_context *nvc0)
+{
+   nve4_validate_tic(nvc0, 5);
+
+   /* Invalidate all 3D textures because they are aliased. */
+   for (int s = 0; s < 5; s++) {
+      for (int i = 0; i < nvc0->num_textures[s]; i++)
+         nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TEX(s, i));
+      nvc0->textures_dirty[s] = ~0;
+   }
+}
 
 static void
 nve4_compute_set_tex_handles(struct nvc0_context *nvc0)
@@ -660,78 +660,6 @@ out:
    nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_DESC);
 }
 
-
-#define NVE4_TIC_ENTRY_INVALID 0x000fffff
-
-static void
-nve4_compute_validate_textures(struct nvc0_context *nvc0)
-{
-   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
-   const unsigned s = 5;
-   unsigned i;
-   uint32_t commands[2][32];
-   unsigned n[2] = { 0, 0 };
-
-   for (i = 0; i < nvc0->num_textures[s]; ++i) {
-      struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]);
-      struct nv04_resource *res;
-      const bool dirty = !!(nvc0->textures_dirty[s] & (1 << i));
-
-      if (!tic) {
-         nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID;
-         continue;
-      }
-      res = nv04_resource(tic->pipe.texture);
-      nvc0_update_tic(nvc0, tic, res);
-
-      if (tic->id < 0) {
-         tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic);
-
-         nve4_p2mf_push_linear(&nvc0->base, nvc0->screen->txc, tic->id * 32,
-                               NV_VRAM_DOMAIN(&nvc0->screen->base),
-                               32, tic->tic);
-
-         commands[0][n[0]++] = (tic->id << 4) | 1;
-      } else
-      if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
-         commands[1][n[1]++] = (tic->id << 4) | 1;
-      }
-      nvc0_screen_tic_lock(nvc0->screen, tic);
-
-      res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
-      res->status |=  NOUVEAU_BUFFER_STATUS_GPU_READING;
-
-      nvc0->tex_handles[s][i] &= ~NVE4_TIC_ENTRY_INVALID;
-      nvc0->tex_handles[s][i] |= tic->id;
-      if (dirty)
-         BCTX_REFN(nvc0->bufctx_cp, CP_TEX(i), res, RD);
-   }
-   for (; i < nvc0->state.num_textures[s]; ++i) {
-      nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID;
-      nvc0->textures_dirty[s] |= 1 << i;
-   }
-
-   if (n[0]) {
-      BEGIN_NIC0(push, NVE4_CP(TIC_FLUSH), n[0]);
-      PUSH_DATAp(push, commands[0], n[0]);
-   }
-   if (n[1]) {
-      BEGIN_NIC0(push, NVE4_CP(TEX_CACHE_CTL), n[1]);
-      PUSH_DATAp(push, commands[1], n[1]);
-   }
-
-   nvc0->state.num_textures[s] = nvc0->num_textures[s];
-
-   /* Invalidate all 3D textures because they are aliased. */
-   for (int s = 0; s < 5; s++) {
-      for (int i = 0; i < nvc0->num_textures[s]; i++)
-         nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TEX(s, i));
-      nvc0->textures_dirty[s] = ~0;
-   }
-   nvc0->dirty_3d |= NVC0_NEW_3D_TEXTURES;
-}
-
-
 #ifdef DEBUG
 static const char *nve4_cache_split_name(unsigned value)
 {
-- 
2.10.1



More information about the mesa-dev mailing list