Mesa (master): radeonsi: kill 16-bit VS outputs if PS doesn't use them or doing Z-only draw

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Wed Apr 14 01:25:54 UTC 2021


Module: Mesa
Branch: master
Commit: c53f25b66834cc4357325b7d18ab61002bcc8946
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=c53f25b66834cc4357325b7d18ab61002bcc8946

Author: Marek Olšák <marek.olsak at amd.com>
Date:   Mon Mar 29 07:54:11 2021 -0400

radeonsi: kill 16-bit VS outputs if PS doesn't use them or doing Z-only draw

The kill_outputs logic uses our internal IO indices. Just add indices for
16-bit varyings. We don't have enough free indices to use, but we can reuse
the indices that GLES doesn't have. Those are all the legacy desktop GL
varyings.

Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer at amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9051>

---

 src/gallium/drivers/radeonsi/si_shader.c         | 50 +++++++++++++-----------
 src/gallium/drivers/radeonsi/si_shader.h         |  5 ---
 src/gallium/drivers/radeonsi/si_shader_llvm_vs.c |  2 +-
 src/gallium/drivers/radeonsi/si_state_shaders.c  |  6 +--
 4 files changed, 32 insertions(+), 31 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 3cb7c8fb856..121feb6fbd7 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -88,34 +88,41 @@ unsigned si_shader_io_get_unique_index(unsigned semantic, bool is_varying)
    case VARYING_SLOT_POS:
       return 0;
    default:
-      /* Since some shader stages use the the highest used IO index
+      /* Since some shader stages use the highest used IO index
        * to determine the size to allocate for inputs/outputs
        * (in LDS, tess and GS rings). GENERIC should be placed right
        * after POSITION to make that size as small as possible.
        */
-      if (semantic >= VARYING_SLOT_VAR0 &&
-          semantic < VARYING_SLOT_VAR0 + SI_MAX_IO_GENERIC)
-         return 1 + (semantic - VARYING_SLOT_VAR0);
+      if (semantic >= VARYING_SLOT_VAR0 && semantic <= VARYING_SLOT_VAR31)
+         return 1 + (semantic - VARYING_SLOT_VAR0); /* 1..32 */
+
+      /* Put 16-bit GLES varyings after 32-bit varyings. They can use the same indices as
+       * legacy desktop GL varyings because they are mutually exclusive.
+       */
+      if (semantic >= VARYING_SLOT_VAR0_16BIT && semantic <= VARYING_SLOT_VAR15_16BIT)
+         return 33 + (semantic - VARYING_SLOT_VAR0_16BIT); /* 33..48 */
 
       assert(!"invalid generic index");
       return 0;
+
+   /* Legacy desktop GL varyings. */
    case VARYING_SLOT_FOGC:
-      return SI_MAX_IO_GENERIC + 1;
+      return 33;
    case VARYING_SLOT_COL0:
-      return SI_MAX_IO_GENERIC + 2;
+      return 34;
    case VARYING_SLOT_COL1:
-      return SI_MAX_IO_GENERIC + 3;
+      return 35;
    case VARYING_SLOT_BFC0:
       /* If it's a varying, COLOR and BCOLOR alias. */
       if (is_varying)
-         return SI_MAX_IO_GENERIC + 2;
+         return 34;
       else
-         return SI_MAX_IO_GENERIC + 4;
+         return 36;
    case VARYING_SLOT_BFC1:
       if (is_varying)
-         return SI_MAX_IO_GENERIC + 3;
+         return 35;
       else
-         return SI_MAX_IO_GENERIC + 5;
+         return 37;
    case VARYING_SLOT_TEX0:
    case VARYING_SLOT_TEX1:
    case VARYING_SLOT_TEX2:
@@ -124,26 +131,25 @@ unsigned si_shader_io_get_unique_index(unsigned semantic, bool is_varying)
    case VARYING_SLOT_TEX5:
    case VARYING_SLOT_TEX6:
    case VARYING_SLOT_TEX7:
-      return SI_MAX_IO_GENERIC + 6 + (semantic - VARYING_SLOT_TEX0);
+      return 38 + (semantic - VARYING_SLOT_TEX0);
+   case VARYING_SLOT_CLIP_VERTEX:
+      return 46;
 
-   /* These are rarely used between LS and HS or ES and GS. */
+   /* Varyings present in both GLES and desktop GL must start at 49 after 16-bit varyings. */
    case VARYING_SLOT_CLIP_DIST0:
-      return SI_MAX_IO_GENERIC + 6 + 8;
+      return 49;
    case VARYING_SLOT_CLIP_DIST1:
-      return SI_MAX_IO_GENERIC + 6 + 8 + 1;
-   case VARYING_SLOT_CLIP_VERTEX:
-      return SI_MAX_IO_GENERIC + 6 + 8 + 2;
+      return 50;
    case VARYING_SLOT_PSIZ:
-      return SI_MAX_IO_GENERIC + 6 + 8 + 3;
+      return 51;
 
    /* These can't be written by LS, HS, and ES. */
    case VARYING_SLOT_LAYER:
-      return SI_MAX_IO_GENERIC + 6 + 8 + 4;
+      return 52;
    case VARYING_SLOT_VIEWPORT:
-      return SI_MAX_IO_GENERIC + 6 + 8 + 5;
+      return 53;
    case VARYING_SLOT_PRIMITIVE_ID:
-      STATIC_ASSERT(SI_MAX_IO_GENERIC + 6 + 8 + 6 <= 63);
-      return SI_MAX_IO_GENERIC + 6 + 8 + 6;
+      return 54;
    }
 }
 
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index 5346dbb5cf9..c58388ee325 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -156,11 +156,6 @@ struct si_context;
 #define SI_MAX_ATTRIBS    16
 #define SI_MAX_VS_OUTPUTS 40
 
-/* Shader IO unique indices are supported for VARYING_SLOT_VARn with an
- * index smaller than this.
- */
-#define SI_MAX_IO_GENERIC 32
-
 #define SI_NGG_PRIM_EDGE_FLAG_BITS ((1 << 9) | (1 << 19) | (1 << 29))
 
 /* SGPR user data indices */
diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c b/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c
index 1b1067621cc..6b073eca3d9 100644
--- a/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c
+++ b/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c
@@ -476,7 +476,7 @@ static void si_build_param_exports(struct si_shader_context *ctx,
             continue;
       }
 
-      if (semantic < VARYING_SLOT_VAR0 + SI_MAX_IO_GENERIC &&
+      if ((semantic <= VARYING_SLOT_VAR31 || semantic >= VARYING_SLOT_VAR0_16BIT) &&
           shader->key.opt.kill_outputs &
              (1ull << si_shader_io_get_unique_index(semantic, true)))
          continue;
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 90ad60851a0..edb5d306014 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -2565,7 +2565,7 @@ static void si_init_shader_selector_async(void *job, int thread_index)
             unsigned semantic = sel->info.output_semantic[i];
             unsigned id;
 
-            if (semantic < VARYING_SLOT_MAX &&
+            if ((semantic <= VARYING_SLOT_VAR31 || semantic >= VARYING_SLOT_VAR0_16BIT) &&
                 semantic != VARYING_SLOT_POS &&
                 semantic != VARYING_SLOT_PSIZ &&
                 semantic != VARYING_SLOT_CLIP_VERTEX &&
@@ -2734,7 +2734,7 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
              semantic == VARYING_SLOT_TESS_LEVEL_OUTER ||
              (semantic >= VARYING_SLOT_PATCH0 && semantic < VARYING_SLOT_TESS_MAX)) {
             sel->patch_outputs_written |= 1ull << si_shader_io_get_unique_index_patch(semantic);
-         } else if (semantic < VARYING_SLOT_MAX &&
+         } else if ((semantic <= VARYING_SLOT_VAR31 || semantic >= VARYING_SLOT_VAR0_16BIT) &&
                     semantic != VARYING_SLOT_EDGE) {
             sel->outputs_written |= 1ull << si_shader_io_get_unique_index(semantic, false);
             sel->outputs_written_before_ps |= 1ull
@@ -2807,7 +2807,7 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
       for (i = 0; i < sel->info.num_inputs; i++) {
          unsigned semantic = sel->info.input_semantic[i];
 
-         if (semantic < VARYING_SLOT_MAX &&
+         if ((semantic <= VARYING_SLOT_VAR31 || semantic >= VARYING_SLOT_VAR0_16BIT) &&
              semantic != VARYING_SLOT_PNTC) {
             sel->inputs_read |= 1ull << si_shader_io_get_unique_index(semantic, true);
          }



More information about the mesa-commit mailing list