Mesa (main): zink: remove gfx program slot mapping

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Tue Jul 6 18:15:32 UTC 2021


Module: Mesa
Branch: main
Commit: 61f2667cf5da6fd1a6d957761c8e244c1bc0da35
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=61f2667cf5da6fd1a6d957761c8e244c1bc0da35

Author: Mike Blumenkrantz <michael.blumenkrantz at gmail.com>
Date:   Fri May 14 13:29:48 2021 -0400

zink: remove gfx program slot mapping

if shaders aren't being shared between programs, they can instead directly
assign io between the shader stages such that only the producer and consumer
pair need to have their slots match up

this requires that each gfx program struct stores base nir for a given shader
in order to avoid breaking io for other programs which reuse shader states

Reviewed-by: Dave Airlie <airlied at redhat.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11691>

---

 src/gallium/drivers/zink/zink_compiler.c | 188 ++++++++++++++++++++++---------
 src/gallium/drivers/zink/zink_compiler.h |   6 +-
 src/gallium/drivers/zink/zink_program.c  | 109 +++++-------------
 src/gallium/drivers/zink/zink_program.h  |   3 +-
 4 files changed, 169 insertions(+), 137 deletions(-)

diff --git a/src/gallium/drivers/zink/zink_compiler.c b/src/gallium/drivers/zink/zink_compiler.c
index 02f44f72a76..40ca2c62a20 100644
--- a/src/gallium/drivers/zink/zink_compiler.c
+++ b/src/gallium/drivers/zink/zink_compiler.c
@@ -524,69 +524,155 @@ update_so_info(struct zink_shader *zs, const struct pipe_stream_output_info *so_
 }
 
 static void
-assign_io_locations(nir_shader *nir, unsigned char *shader_slot_map,
-                    unsigned char *shader_slots_reserved)
+assign_producer_var_io(gl_shader_stage stage, nir_variable *var, unsigned *reserved, unsigned char *slot_map)
 {
-   unsigned reserved = shader_slots_reserved ? *shader_slots_reserved : 0;
-   nir_foreach_variable_with_modes(var, nir, nir_var_shader_in | nir_var_shader_out) {
-      if ((nir->info.stage == MESA_SHADER_VERTEX && var->data.mode == nir_var_shader_in) ||
-          (nir->info.stage == MESA_SHADER_FRAGMENT && var->data.mode == nir_var_shader_out))
-         continue;
+   unsigned slot = var->data.location;
+   switch (var->data.location) {
+   case VARYING_SLOT_POS:
+   case VARYING_SLOT_PNTC:
+   case VARYING_SLOT_PSIZ:
+   case VARYING_SLOT_LAYER:
+   case VARYING_SLOT_PRIMITIVE_ID:
+   case VARYING_SLOT_CLIP_DIST0:
+   case VARYING_SLOT_CULL_DIST0:
+   case VARYING_SLOT_VIEWPORT:
+   case VARYING_SLOT_FACE:
+   case VARYING_SLOT_TESS_LEVEL_OUTER:
+   case VARYING_SLOT_TESS_LEVEL_INNER:
+      /* use a sentinel value to avoid counting later */
+      var->data.driver_location = UINT_MAX;
+      break;
+
+   default:
+      if (var->data.patch) {
+         assert(var->data.location >= VARYING_SLOT_PATCH0);
+         slot = var->data.location - VARYING_SLOT_PATCH0;
+      } else if (var->data.location >= VARYING_SLOT_VAR0 &&
+                 var->data.mode == nir_var_shader_in &&
+                  stage == MESA_SHADER_TESS_EVAL) {
+         slot = var->data.location - VARYING_SLOT_VAR0;
+      } else {
+         if (slot_map[var->data.location] == 0xff) {
+            assert(*reserved < MAX_VARYING);
+            slot_map[var->data.location] = *reserved;
+            *reserved += glsl_count_vec4_slots(var->type, false, false);
+         }
+         slot = slot_map[var->data.location];
+         assert(slot < MAX_VARYING);
+      }
+      var->data.driver_location = slot;
+   }
+}
 
-      unsigned slot = var->data.location;
-      switch (var->data.location) {
-      case VARYING_SLOT_POS:
-      case VARYING_SLOT_PNTC:
-      case VARYING_SLOT_PSIZ:
-      case VARYING_SLOT_LAYER:
-      case VARYING_SLOT_PRIMITIVE_ID:
-      case VARYING_SLOT_CLIP_DIST0:
-      case VARYING_SLOT_CULL_DIST0:
-      case VARYING_SLOT_VIEWPORT:
-      case VARYING_SLOT_FACE:
-      case VARYING_SLOT_TESS_LEVEL_OUTER:
-      case VARYING_SLOT_TESS_LEVEL_INNER:
-         /* use a sentinel value to avoid counting later */
-         var->data.driver_location = UINT_MAX;
-         break;
+ALWAYS_INLINE static bool
+is_texcoord(gl_shader_stage stage, const nir_variable *var)
+{
+   if (stage != MESA_SHADER_FRAGMENT)
+      return false;
+   return var->data.location >= VARYING_SLOT_TEX0 && 
+          var->data.location <= VARYING_SLOT_TEX7;
+}
 
-      default:
-         if (var->data.patch) {
-            assert(var->data.location >= VARYING_SLOT_PATCH0);
-            slot = var->data.location - VARYING_SLOT_PATCH0;
-         } else if (var->data.location >= VARYING_SLOT_VAR0 &&
-                     ((var->data.mode == nir_var_shader_out &&
-                     nir->info.stage == MESA_SHADER_TESS_CTRL) ||
-                    (var->data.mode != nir_var_shader_out &&
-                     nir->info.stage == MESA_SHADER_TESS_EVAL))) {
-            slot = var->data.location - VARYING_SLOT_VAR0;
-         } else {
-            if (shader_slot_map[var->data.location] == 0xff) {
-               assert(reserved < MAX_VARYING);
-               shader_slot_map[var->data.location] = reserved;
-               if (nir->info.stage == MESA_SHADER_TESS_CTRL && var->data.location >= VARYING_SLOT_VAR0)
-                  reserved += (glsl_count_vec4_slots(var->type, false, false) / 32 /*MAX_PATCH_VERTICES*/);
-               else
-                  reserved += glsl_count_vec4_slots(var->type, false, false);
-            }
-            slot = shader_slot_map[var->data.location];
-            assert(slot < MAX_VARYING);
+static bool
+assign_consumer_var_io(gl_shader_stage stage, nir_variable *var, unsigned *reserved, unsigned char *slot_map)
+{
+   switch (var->data.location) {
+   case VARYING_SLOT_POS:
+   case VARYING_SLOT_PNTC:
+   case VARYING_SLOT_PSIZ:
+   case VARYING_SLOT_LAYER:
+   case VARYING_SLOT_PRIMITIVE_ID:
+   case VARYING_SLOT_CLIP_DIST0:
+   case VARYING_SLOT_CULL_DIST0:
+   case VARYING_SLOT_VIEWPORT:
+   case VARYING_SLOT_FACE:
+   case VARYING_SLOT_TESS_LEVEL_OUTER:
+   case VARYING_SLOT_TESS_LEVEL_INNER:
+      /* use a sentinel value to avoid counting later */
+      var->data.driver_location = UINT_MAX;
+      break;
+   default:
+      if (var->data.patch) {
+         assert(var->data.location >= VARYING_SLOT_PATCH0);
+         var->data.driver_location = var->data.location - VARYING_SLOT_PATCH0;
+      } else if (var->data.location >= VARYING_SLOT_VAR0 &&
+          stage == MESA_SHADER_TESS_CTRL &&
+          var->data.mode == nir_var_shader_out)
+         var->data.driver_location = var->data.location - VARYING_SLOT_VAR0;
+      else {
+         if (slot_map[var->data.location] == (unsigned char)-1) {
+            if (!is_texcoord(stage, var))
+               /* dead io */
+               return false;
+            /* texcoords can't be eliminated in fs due to GL_COORD_REPLACE */
+            slot_map[var->data.location] = (*reserved)++;
          }
-         var->data.driver_location = slot;
+         var->data.driver_location = slot_map[var->data.location];
       }
    }
+   return true;
+}
 
-   if (shader_slots_reserved)
-      *shader_slots_reserved = reserved;
+
+static bool
+rewrite_and_discard_read(nir_builder *b, nir_instr *instr, void *data)
+{
+   nir_variable *var = data;
+   if (instr->type != nir_instr_type_intrinsic)
+      return false;
+
+   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+   if (intr->intrinsic != nir_intrinsic_load_deref)
+      return false;
+   nir_variable *deref_var = nir_intrinsic_get_var(intr, 0);
+   if (deref_var != var)
+      return false;
+   nir_ssa_def *undef = nir_ssa_undef(b, nir_dest_num_components(intr->dest), nir_dest_bit_size(intr->dest));
+   nir_ssa_def_rewrite_uses(&intr->dest.ssa, undef);
+   return true;
+}
+
+void
+zink_compiler_assign_io(nir_shader *producer, nir_shader *consumer)
+{
+   unsigned reserved = 0;
+   unsigned char slot_map[VARYING_SLOT_MAX];
+   memset(slot_map, -1, sizeof(slot_map));
+   bool do_fixup = false;
+   nir_shader *nir = producer->info.stage == MESA_SHADER_TESS_CTRL ? producer : consumer;
+   if (producer->info.stage == MESA_SHADER_TESS_CTRL) {
+      /* never assign from tcs -> tes, always invert */
+      nir_foreach_variable_with_modes(var, consumer, nir_var_shader_in)
+         assign_producer_var_io(consumer->info.stage, var, &reserved, slot_map);
+      nir_foreach_variable_with_modes_safe(var, producer, nir_var_shader_out) {
+         if (!assign_consumer_var_io(producer->info.stage, var, &reserved, slot_map))
+            /* this is an output, nothing more needs to be done for it to be dropped */
+            do_fixup = true;
+      }
+   } else {
+      nir_foreach_variable_with_modes(var, producer, nir_var_shader_out)
+         assign_producer_var_io(producer->info.stage, var, &reserved, slot_map);
+      nir_foreach_variable_with_modes_safe(var, consumer, nir_var_shader_in) {
+         if (!assign_consumer_var_io(consumer->info.stage, var, &reserved, slot_map)) {
+            do_fixup = true;
+            /* input needs to be rewritten as an undef to ensure the entire deref chain is deleted */
+            nir_shader_instructions_pass(consumer, rewrite_and_discard_read, nir_metadata_dominance, var);
+         }
+      }
+   }
+   if (!do_fixup)
+      return;
+   nir_fixup_deref_modes(nir);
+   NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
+   optimize_nir(nir);
 }
 
 VkShaderModule
-zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs, struct zink_shader_key *key,
-                    unsigned char *shader_slot_map, unsigned char *shader_slots_reserved)
+zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs, nir_shader *base_nir, struct zink_shader_key *key)
 {
    VkShaderModule mod = VK_NULL_HANDLE;
    void *streamout = NULL;
-   nir_shader *nir = nir_shader_clone(NULL, zs->nir);
+   nir_shader *nir = nir_shader_clone(NULL, base_nir);
 
    if (key) {
       if (key->inline_uniforms) {
@@ -640,8 +726,6 @@ zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs, struct z
    }
    NIR_PASS_V(nir, nir_convert_from_ssa, true);
 
-   assign_io_locations(nir, shader_slot_map, shader_slots_reserved);
-
    struct spirv_shader *spirv = nir_to_spirv(nir, streamout, screen->spirv_version);
    if (!spirv)
       goto done;
diff --git a/src/gallium/drivers/zink/zink_compiler.h b/src/gallium/drivers/zink/zink_compiler.h
index 7191f3a8412..4f6a4ddfa77 100644
--- a/src/gallium/drivers/zink/zink_compiler.h
+++ b/src/gallium/drivers/zink/zink_compiler.h
@@ -93,10 +93,10 @@ struct zink_shader {
 
 void
 zink_screen_init_compiler(struct zink_screen *screen);
-
+void
+zink_compiler_assign_io(nir_shader *producer, nir_shader *consumer);
 VkShaderModule
-zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs, struct zink_shader_key *key,
-                    unsigned char *shader_slot_map, unsigned char *shader_slots_reserved);
+zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs, nir_shader *nir, struct zink_shader_key *key);
 
 struct zink_shader *
 zink_shader_create(struct zink_screen *screen, struct nir_shader *nir,
diff --git a/src/gallium/drivers/zink/zink_program.c b/src/gallium/drivers/zink/zink_program.c
index f22481093a7..9fd07a4010d 100644
--- a/src/gallium/drivers/zink/zink_program.c
+++ b/src/gallium/drivers/zink/zink_program.c
@@ -249,8 +249,7 @@ get_shader_module_for_stage(struct zink_context *ctx, struct zink_shader *zs, st
          return NULL;
       }
       pipe_reference_init(&zm->reference, 1);
-      mod = zink_shader_compile(zink_screen(ctx->base.screen), zs, &key,
-                                prog->shader_slot_map, &prog->shader_slots_reserved);
+      mod = zink_shader_compile(zink_screen(ctx->base.screen), zs, prog->nir[stage], &key);
       if (!mod) {
          ralloc_free(keybox);
          FREE(zm);
@@ -370,82 +369,6 @@ equals_gfx_pipeline_state(const void *a, const void *b)
           !memcmp(a, b, offsetof(struct zink_gfx_pipeline_state, hash));
 }
 
-static void
-init_slot_map(struct zink_context *ctx, struct zink_gfx_program *prog)
-{
-   unsigned existing_shaders = 0;
-   bool needs_new_map = false;
-
-   /* if there's a case where we'll be reusing any shaders, we need to (maybe) reuse the slot map too */
-   if (ctx->curr_program) {
-      for (int i = 0; i < ZINK_SHADER_COUNT; ++i) {
-          if (ctx->curr_program->shaders[i])
-             existing_shaders |= 1 << i;
-      }
-      /* if there's reserved slots, check whether we have enough remaining slots */
-      if (ctx->curr_program->shader_slots_reserved) {
-         uint64_t max_outputs = 0;
-         uint32_t num_xfb_outputs = 0;
-         for (int i = 0; i < ZINK_SHADER_COUNT; ++i) {
-            if (i != PIPE_SHADER_TESS_CTRL &&
-                i != PIPE_SHADER_FRAGMENT &&
-                ctx->gfx_stages[i]) {
-               uint32_t user_outputs = ctx->gfx_stages[i]->nir->info.outputs_written >> 32;
-               uint32_t builtin_outputs = ctx->gfx_stages[i]->nir->info.outputs_written;
-               num_xfb_outputs = MAX2(num_xfb_outputs, ctx->gfx_stages[i]->streamout.so_info.num_outputs);
-               unsigned user_outputs_count = 0;
-               /* check builtins first */
-               u_foreach_bit(slot, builtin_outputs) {
-                  switch (slot) {
-                  /* none of these require slot map entries */
-                  case VARYING_SLOT_POS:
-                  case VARYING_SLOT_PSIZ:
-                  case VARYING_SLOT_LAYER:
-                  case VARYING_SLOT_PRIMITIVE_ID:
-                  case VARYING_SLOT_CULL_DIST0:
-                  case VARYING_SLOT_CLIP_DIST0:
-                  case VARYING_SLOT_VIEWPORT:
-                  case VARYING_SLOT_TESS_LEVEL_INNER:
-                  case VARYING_SLOT_TESS_LEVEL_OUTER:
-                     break;
-                  default:
-                     /* remaining legacy builtins only require 1 slot each */
-                     if (ctx->curr_program->shader_slot_map[slot] == -1)
-                        user_outputs_count++;
-                     break;
-                  }
-               }
-               u_foreach_bit(slot, user_outputs) {
-                  if (ctx->curr_program->shader_slot_map[slot] == -1) {
-                     /* user variables can span multiple slots */
-                     nir_variable *var = nir_find_variable_with_location(ctx->gfx_stages[i]->nir,
-                                                                         nir_var_shader_out, slot);
-                     assert(var);
-                     if (i == PIPE_SHADER_TESS_CTRL && var->data.location >= VARYING_SLOT_VAR0)
-                        user_outputs_count += (glsl_count_vec4_slots(var->type, false, false) / 32 /*MAX_PATCH_VERTICES*/);
-                     else
-                        user_outputs_count += glsl_count_vec4_slots(var->type, false, false);
-                  }
-               }
-               max_outputs = MAX2(max_outputs, user_outputs_count);
-            }
-         }
-         /* slot map can only hold 32 entries, so dump this one if we'll exceed that */
-         if (ctx->curr_program->shader_slots_reserved + max_outputs + num_xfb_outputs > 32)
-            needs_new_map = true;
-      }
-   }
-
-   if (needs_new_map || ctx->dirty_shader_stages == existing_shaders || !existing_shaders) {
-      /* all shaders are being recompiled: new slot map */
-      memset(prog->shader_slot_map, -1, sizeof(prog->shader_slot_map));
-   } else {
-      /* at least some shaders are being reused: use existing slot map so locations match up */
-      memcpy(prog->shader_slot_map, ctx->curr_program->shader_slot_map, sizeof(prog->shader_slot_map));
-      prog->shader_slots_reserved = ctx->curr_program->shader_slots_reserved;
-   }
-}
-
 void
 zink_update_gfx_program(struct zink_context *ctx, struct zink_gfx_program *prog)
 {
@@ -489,6 +412,32 @@ zink_pipeline_layout_create(struct zink_screen *screen, struct zink_program *pg)
    return layout;
 }
 
+static void
+assign_io(struct zink_gfx_program *prog, struct zink_shader *stages[ZINK_SHADER_COUNT])
+{
+   struct zink_shader *shaders[PIPE_SHADER_TYPES];
+
+   /* build array in pipeline order */
+   for (unsigned i = 0; i < ZINK_SHADER_COUNT; i++)
+      shaders[tgsi_processor_to_shader_stage(i)] = stages[i];
+
+   for (unsigned i = 0; i < MESA_SHADER_FRAGMENT;) {
+      nir_shader *producer = shaders[i]->nir;
+      for (unsigned j = i + 1; j < ZINK_SHADER_COUNT; i++, j++) {
+         struct zink_shader *consumer = shaders[j];
+         if (!consumer)
+            continue;
+         if (!prog->nir[producer->info.stage])
+            prog->nir[producer->info.stage] = nir_shader_clone(prog, producer);
+         if (!prog->nir[j])
+            prog->nir[j] = nir_shader_clone(prog, consumer->nir);
+         zink_compiler_assign_io(prog->nir[producer->info.stage], prog->nir[j]);
+         i = j;
+         break;
+      }
+   }
+}
+
 struct zink_gfx_program *
 zink_create_gfx_program(struct zink_context *ctx,
                         struct zink_shader *stages[ZINK_SHADER_COUNT])
@@ -513,7 +462,7 @@ zink_create_gfx_program(struct zink_context *ctx,
       ctx->dirty_shader_stages |= BITFIELD_BIT(PIPE_SHADER_TESS_CTRL);
    }
 
-   init_slot_map(ctx, prog);
+   assign_io(prog, prog->shaders);
 
    update_shader_modules(ctx, prog->shaders, prog, false);
 
@@ -604,7 +553,7 @@ zink_create_compute_program(struct zink_context *ctx, struct zink_shader *shader
    comp->module = CALLOC_STRUCT(zink_shader_module);
    assert(comp->module);
    pipe_reference_init(&comp->module->reference, 1);
-   comp->module->shader = zink_shader_compile(screen, shader, NULL, NULL, NULL);
+   comp->module->shader = zink_shader_compile(screen, shader, shader->nir, NULL);
    assert(comp->module->shader);
    _mesa_hash_table_insert(&comp->base.shader_cache[0], &shader->shader_id, comp->module);
 
diff --git a/src/gallium/drivers/zink/zink_program.h b/src/gallium/drivers/zink/zink_program.h
index 89aabdb65bf..087a0fa0124 100644
--- a/src/gallium/drivers/zink/zink_program.h
+++ b/src/gallium/drivers/zink/zink_program.h
@@ -90,13 +90,12 @@ struct zink_program {
 struct zink_gfx_program {
    struct zink_program base;
 
+   struct nir_shader *nir[ZINK_SHADER_COUNT];
    struct zink_shader_module *modules[ZINK_SHADER_COUNT]; // compute stage doesn't belong here
 
    struct zink_shader_module *default_variants[ZINK_SHADER_COUNT][2]; //[default, no streamout]
    const void *default_variant_key[ZINK_SHADER_COUNT];
    struct zink_shader *shaders[ZINK_SHADER_COUNT];
-   unsigned char shader_slot_map[VARYING_SLOT_MAX];
-   unsigned char shader_slots_reserved;
    struct hash_table *pipelines[11]; // number of draw modes we support
 };
 



More information about the mesa-commit mailing list