Mesa (main): intel/fs,vec4: Drop uniform compaction and pull constant support

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Fri Dec 10 21:36:55 UTC 2021


Module: Mesa
Branch: main
Commit: 8f3c100d61c673115880cc4c2d9a3d5d2ad7db3d
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=8f3c100d61c673115880cc4c2d9a3d5d2ad7db3d

Author: Jason Ekstrand <jason at jlekstrand.net>
Date:   Fri Dec  3 21:34:06 2021 -0600

intel/fs,vec4: Drop uniform compaction and pull constant support

The only driver using these was i965 and it's gone now.  This is all
dead code.

Reviewed-by: Caio Oliveira <caio.oliveira at intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14056>

---

 src/gallium/drivers/crocus/crocus_disk_cache.c    |   3 -
 src/gallium/drivers/crocus/crocus_program_cache.c |   4 +-
 src/gallium/drivers/crocus/crocus_screen.c        |   2 -
 src/gallium/drivers/iris/iris_disk_cache.c        |   3 -
 src/gallium/drivers/iris/iris_program.c           |   1 -
 src/gallium/drivers/iris/iris_screen.c            |   2 -
 src/intel/compiler/brw_compiler.h                 |  14 -
 src/intel/compiler/brw_fs.cpp                     | 331 +---------------------
 src/intel/compiler/brw_fs.h                       |   6 -
 src/intel/compiler/brw_fs_nir.cpp                 |   4 +-
 src/intel/compiler/brw_fs_visitor.cpp             |   2 -
 src/intel/compiler/brw_vec4.cpp                   | 289 +------------------
 src/intel/compiler/brw_vec4.h                     |   7 -
 src/intel/compiler/brw_vec4_gs_visitor.cpp        |   1 -
 src/intel/compiler/brw_vec4_visitor.cpp           | 140 ---------
 src/intel/vulkan/anv_device.c                     |   2 -
 16 files changed, 20 insertions(+), 791 deletions(-)

diff --git a/src/gallium/drivers/crocus/crocus_disk_cache.c b/src/gallium/drivers/crocus/crocus_disk_cache.c
index 037136ec43d..3a3d302da88 100644
--- a/src/gallium/drivers/crocus/crocus_disk_cache.c
+++ b/src/gallium/drivers/crocus/crocus_disk_cache.c
@@ -181,9 +181,6 @@ crocus_disk_cache_retrieve(struct crocus_context *ice,
    }
 
    prog_data->param = NULL;
-   prog_data->pull_param = NULL;
-   assert(prog_data->nr_pull_params == 0);
-
    if (prog_data->nr_params) {
       prog_data->param = ralloc_array(NULL, uint32_t, prog_data->nr_params);
       blob_copy_bytes(&blob, prog_data->param,
diff --git a/src/gallium/drivers/crocus/crocus_program_cache.c b/src/gallium/drivers/crocus/crocus_program_cache.c
index 52d8bbf0b4c..11c3e400290 100644
--- a/src/gallium/drivers/crocus/crocus_program_cache.c
+++ b/src/gallium/drivers/crocus/crocus_program_cache.c
@@ -224,10 +224,8 @@ crocus_upload_shader(struct crocus_context *ice,
    shader->bt = *bt;
 
    ralloc_steal(shader, shader->prog_data);
-   if (prog_data_size > 16) {
+   if (prog_data_size > 16)
       ralloc_steal(shader->prog_data, prog_data->param);
-      ralloc_steal(shader->prog_data, prog_data->pull_param);
-   }
    ralloc_steal(shader, shader->streamout);
    ralloc_steal(shader, shader->system_values);
 
diff --git a/src/gallium/drivers/crocus/crocus_screen.c b/src/gallium/drivers/crocus/crocus_screen.c
index f4e37bcdcee..9e2a7004d42 100644
--- a/src/gallium/drivers/crocus/crocus_screen.c
+++ b/src/gallium/drivers/crocus/crocus_screen.c
@@ -779,9 +779,7 @@ crocus_screen_create(int fd, const struct pipe_screen_config *config)
    screen->compiler = brw_compiler_create(screen, &screen->devinfo);
    screen->compiler->shader_debug_log = crocus_shader_debug_log;
    screen->compiler->shader_perf_log = crocus_shader_perf_log;
-   screen->compiler->supports_pull_constants = false;
    screen->compiler->supports_shader_constants = false;
-   screen->compiler->compact_params = false;
    screen->compiler->constant_buffer_0_is_relative = true;
 
    if (screen->devinfo.ver >= 7) {
diff --git a/src/gallium/drivers/iris/iris_disk_cache.c b/src/gallium/drivers/iris/iris_disk_cache.c
index 2ad12002a61..520bfd83397 100644
--- a/src/gallium/drivers/iris/iris_disk_cache.c
+++ b/src/gallium/drivers/iris/iris_disk_cache.c
@@ -207,9 +207,6 @@ iris_disk_cache_retrieve(struct iris_screen *screen,
    }
 
    prog_data->param = NULL;
-   prog_data->pull_param = NULL;
-   assert(prog_data->nr_pull_params == 0);
-
    if (prog_data->nr_params) {
       prog_data->param = ralloc_array(NULL, uint32_t, prog_data->nr_params);
       blob_copy_bytes(&blob, prog_data->param,
diff --git a/src/gallium/drivers/iris/iris_program.c b/src/gallium/drivers/iris/iris_program.c
index 2e5227ad33e..5a21228a8a7 100644
--- a/src/gallium/drivers/iris/iris_program.c
+++ b/src/gallium/drivers/iris/iris_program.c
@@ -90,7 +90,6 @@ iris_finalize_program(struct iris_compiled_shader *shader,
    ralloc_steal(shader, shader->prog_data);
    ralloc_steal(shader->prog_data, (void *)prog_data->relocs);
    ralloc_steal(shader->prog_data, prog_data->param);
-   ralloc_steal(shader->prog_data, prog_data->pull_param);
    ralloc_steal(shader, shader->streamout);
    ralloc_steal(shader, shader->system_values);
 }
diff --git a/src/gallium/drivers/iris/iris_screen.c b/src/gallium/drivers/iris/iris_screen.c
index 870bb24aaff..09c69594578 100644
--- a/src/gallium/drivers/iris/iris_screen.c
+++ b/src/gallium/drivers/iris/iris_screen.c
@@ -839,9 +839,7 @@ iris_screen_create(int fd, const struct pipe_screen_config *config)
    screen->compiler = brw_compiler_create(screen, &screen->devinfo);
    screen->compiler->shader_debug_log = iris_shader_debug_log;
    screen->compiler->shader_perf_log = iris_shader_perf_log;
-   screen->compiler->supports_pull_constants = false;
    screen->compiler->supports_shader_constants = true;
-   screen->compiler->compact_params = false;
    screen->compiler->indirect_ubos_use_sampler = screen->devinfo.ver < 12;
 
    screen->l3_config_3d = iris_get_default_l3_config(&screen->devinfo, false);
diff --git a/src/intel/compiler/brw_compiler.h b/src/intel/compiler/brw_compiler.h
index 4c932fb46a4..113c00eb7d1 100644
--- a/src/intel/compiler/brw_compiler.h
+++ b/src/intel/compiler/brw_compiler.h
@@ -91,24 +91,12 @@ struct brw_compiler {
     */
    bool constant_buffer_0_is_relative;
 
-   /**
-    * Whether or not the driver supports pull constants.  If not, the compiler
-    * will attempt to push everything.
-    */
-   bool supports_pull_constants;
-
    /**
     * Whether or not the driver supports NIR shader constants.  This controls
     * whether nir_opt_large_constants will be run.
     */
    bool supports_shader_constants;
 
-   /**
-    * Whether or not the driver wants uniform params to be compacted by the
-    * back-end compiler.
-    */
-   bool compact_params;
-
    /**
     * Whether or not the driver wants variable group size to be lowered by the
     * back-end compiler.
@@ -775,7 +763,6 @@ struct brw_stage_prog_data {
    struct brw_ubo_range ubo_ranges[4];
 
    GLuint nr_params;       /**< number of float params/constants */
-   GLuint nr_pull_params;
 
    gl_shader_stage stage;
 
@@ -822,7 +809,6 @@ struct brw_stage_prog_data {
     * above.
     */
    uint32_t *param;
-   uint32_t *pull_param;
 
    /* Whether shader uses atomic operations. */
    bool uses_atomic_load_store;
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 99c6979dbdf..3325bac0025 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -1234,7 +1234,6 @@ void
 fs_visitor::import_uniforms(fs_visitor *v)
 {
    this->push_constant_loc = v->push_constant_loc;
-   this->pull_constant_loc = v->pull_constant_loc;
    this->uniforms = v->uniforms;
    this->subgroup_id = v->subgroup_id;
    for (unsigned i = 0; i < ARRAY_SIZE(this->group_size); i++)
@@ -1801,7 +1800,6 @@ fs_visitor::assign_curb_setup()
 
    uint64_t want_zero = used & stage_prog_data->zero_push_reg;
    if (want_zero) {
-      assert(!compiler->compact_params);
       fs_builder ubld = bld.exec_all().group(8, 0).at(
          cfg->first_block(), cfg->first_block()->start());
 
@@ -2396,109 +2394,6 @@ get_subgroup_id_param_index(const intel_device_info *devinfo,
    return -1;
 }
 
-/**
- * Struct for handling complex alignments.
- *
- * A complex alignment is stored as multiplier and an offset.  A value is
- * considered to be aligned if it is {offset} larger than a multiple of {mul}.
- * For instance, with an alignment of {8, 2}, cplx_align_apply would do the
- * following:
- *
- *  N  | cplx_align_apply({8, 2}, N)
- * ----+-----------------------------
- *  4  | 6
- *  6  | 6
- *  8  | 14
- *  10 | 14
- *  12 | 14
- *  14 | 14
- *  16 | 22
- */
-struct cplx_align {
-   unsigned mul:4;
-   unsigned offset:4;
-};
-
-#define CPLX_ALIGN_MAX_MUL 8
-
-static void
-cplx_align_assert_sane(struct cplx_align a)
-{
-   assert(a.mul > 0 && util_is_power_of_two_nonzero(a.mul));
-   assert(a.offset < a.mul);
-}
-
-/**
- * Combines two alignments to produce a least multiple of sorts.
- *
- * The returned alignment is the smallest (in terms of multiplier) such that
- * anything aligned to both a and b will be aligned to the new alignment.
- * This function will assert-fail if a and b are not compatible, i.e. if the
- * offset parameters are such that no common alignment is possible.
- */
-static struct cplx_align
-cplx_align_combine(struct cplx_align a, struct cplx_align b)
-{
-   cplx_align_assert_sane(a);
-   cplx_align_assert_sane(b);
-
-   /* Assert that the alignments agree. */
-   assert((a.offset & (b.mul - 1)) == (b.offset & (a.mul - 1)));
-
-   return a.mul > b.mul ? a : b;
-}
-
-/**
- * Apply a complex alignment
- *
- * This function will return the smallest number greater than or equal to
- * offset that is aligned to align.
- */
-static unsigned
-cplx_align_apply(struct cplx_align align, unsigned offset)
-{
-   return ALIGN(offset - align.offset, align.mul) + align.offset;
-}
-
-#define UNIFORM_SLOT_SIZE 4
-
-struct uniform_slot_info {
-   /** True if the given uniform slot is live */
-   unsigned is_live:1;
-
-   /** True if this slot and the next slot must remain contiguous */
-   unsigned contiguous:1;
-
-   struct cplx_align align;
-};
-
-static void
-mark_uniform_slots_read(struct uniform_slot_info *slots,
-                        unsigned num_slots, unsigned alignment)
-{
-   assert(alignment > 0 && util_is_power_of_two_nonzero(alignment));
-   assert(alignment <= CPLX_ALIGN_MAX_MUL);
-
-   /* We can't align a slot to anything less than the slot size */
-   alignment = MAX2(alignment, UNIFORM_SLOT_SIZE);
-
-   struct cplx_align align = {alignment, 0};
-   cplx_align_assert_sane(align);
-
-   for (unsigned i = 0; i < num_slots; i++) {
-      slots[i].is_live = true;
-      if (i < num_slots - 1)
-         slots[i].contiguous = true;
-
-      align.offset = (i * UNIFORM_SLOT_SIZE) & (align.mul - 1);
-      if (slots[i].align.mul == 0) {
-         slots[i].align = align;
-      } else {
-         slots[i].align = cplx_align_combine(slots[i].align, align);
-      }
-   }
-}
-
 /**
  * Assign UNIFORM file registers to either push constants or pull constants.
  *
@@ -2512,197 +2407,12 @@ void
 fs_visitor::assign_constant_locations()
 {
    /* Only the first compile gets to decide on locations. */
-   if (push_constant_loc) {
-      assert(pull_constant_loc);
+   if (push_constant_loc)
       return;
-   }
-
-   if (compiler->compact_params) {
-      struct uniform_slot_info slots[uniforms + 1];
-      memset(slots, 0, sizeof(slots));
-
-      foreach_block_and_inst_safe(block, fs_inst, inst, cfg) {
-         for (int i = 0 ; i < inst->sources; i++) {
-            if (inst->src[i].file != UNIFORM)
-               continue;
-
-            /* NIR tightly packs things so the uniform number might not be
-             * aligned (if we have a double right after a float, for
-             * instance).  This is fine because the process of re-arranging
-             * them will ensure that things are properly aligned.  The offset
-             * into that uniform, however, must be aligned.
-             *
-             * In Vulkan, we have explicit offsets but everything is crammed
-             * into a single "variable" so inst->src[i].nr will always be 0.
-             * Everything will be properly aligned relative to that one base.
-             */
-            assert(inst->src[i].offset % type_sz(inst->src[i].type) == 0);
-
-            unsigned u = inst->src[i].nr +
-                         inst->src[i].offset / UNIFORM_SLOT_SIZE;
-
-            if (u >= uniforms)
-               continue;
-
-            unsigned slots_read;
-            if (inst->opcode == SHADER_OPCODE_MOV_INDIRECT && i == 0) {
-               slots_read = DIV_ROUND_UP(inst->src[2].ud, UNIFORM_SLOT_SIZE);
-            } else {
-               unsigned bytes_read = inst->components_read(i) *
-                                     type_sz(inst->src[i].type);
-               slots_read = DIV_ROUND_UP(bytes_read, UNIFORM_SLOT_SIZE);
-            }
-
-            assert(u + slots_read <= uniforms);
-            mark_uniform_slots_read(&slots[u], slots_read,
-                                    type_sz(inst->src[i].type));
-         }
-      }
 
-      int subgroup_id_index = get_subgroup_id_param_index(devinfo,
-                                                          stage_prog_data);
-
-      /* Only allow 16 registers (128 uniform components) as push constants.
-       *
-       * Just demote the end of the list.  We could probably do better
-       * here, demoting things that are rarely used in the program first.
-       *
-       * If changing this value, note the limitation about total_regs in
-       * brw_curbe.c.
-       */
-      unsigned int max_push_components = 16 * 8;
-      if (subgroup_id_index >= 0)
-         max_push_components--; /* Save a slot for the thread ID */
-
-      /* We push small arrays, but no bigger than 16 floats.  This is big
-       * enough for a vec4 but hopefully not large enough to push out other
-       * stuff.  We should probably use a better heuristic at some point.
-       */
-      const unsigned int max_chunk_size = 16;
-
-      unsigned int num_push_constants = 0;
-      unsigned int num_pull_constants = 0;
-
-      push_constant_loc = ralloc_array(mem_ctx, int, uniforms);
-      pull_constant_loc = ralloc_array(mem_ctx, int, uniforms);
-
-      /* Default to -1 meaning no location */
-      memset(push_constant_loc, -1, uniforms * sizeof(*push_constant_loc));
-      memset(pull_constant_loc, -1, uniforms * sizeof(*pull_constant_loc));
-
-      int chunk_start = -1;
-      struct cplx_align align;
-      for (unsigned u = 0; u < uniforms; u++) {
-         if (!slots[u].is_live) {
-            assert(chunk_start == -1);
-            continue;
-         }
-
-         /* Skip subgroup_id_index to put it in the last push register. */
-         if (subgroup_id_index == (int)u)
-            continue;
-
-         if (chunk_start == -1) {
-            chunk_start = u;
-            align = slots[u].align;
-         } else {
-            /* Offset into the chunk */
-            unsigned chunk_offset = (u - chunk_start) * UNIFORM_SLOT_SIZE;
-
-            /* Shift the slot alignment down by the chunk offset so it is
-             * comparable with the base chunk alignment.
-             */
-            struct cplx_align slot_align = slots[u].align;
-            slot_align.offset =
-               (slot_align.offset - chunk_offset) & (align.mul - 1);
-
-            align = cplx_align_combine(align, slot_align);
-         }
-
-         /* Sanity check the alignment */
-         cplx_align_assert_sane(align);
-
-         if (slots[u].contiguous)
-            continue;
-
-         /* Adjust the alignment to be in terms of slots, not bytes */
-         assert((align.mul & (UNIFORM_SLOT_SIZE - 1)) == 0);
-         assert((align.offset & (UNIFORM_SLOT_SIZE - 1)) == 0);
-         align.mul /= UNIFORM_SLOT_SIZE;
-         align.offset /= UNIFORM_SLOT_SIZE;
-
-         unsigned push_start_align = cplx_align_apply(align, num_push_constants);
-         unsigned chunk_size = u - chunk_start + 1;
-         if ((!compiler->supports_pull_constants && u < UBO_START) ||
-             (chunk_size < max_chunk_size &&
-              push_start_align + chunk_size <= max_push_components)) {
-            /* Align up the number of push constants */
-            num_push_constants = push_start_align;
-            for (unsigned i = 0; i < chunk_size; i++)
-               push_constant_loc[chunk_start + i] = num_push_constants++;
-         } else {
-            /* We need to pull this one */
-            num_pull_constants = cplx_align_apply(align, num_pull_constants);
-            for (unsigned i = 0; i < chunk_size; i++)
-               pull_constant_loc[chunk_start + i] = num_pull_constants++;
-         }
-
-         /* Reset the chunk and start again */
-         chunk_start = -1;
-      }
-
-      /* Add the CS local thread ID uniform at the end of the push constants */
-      if (subgroup_id_index >= 0)
-         push_constant_loc[subgroup_id_index] = num_push_constants++;
-
-      /* As the uniforms are going to be reordered, stash the old array and
-       * create two new arrays for push/pull params.
-       */
-      uint32_t *param = stage_prog_data->param;
-      stage_prog_data->nr_params = num_push_constants;
-      if (num_push_constants) {
-         stage_prog_data->param = rzalloc_array(mem_ctx, uint32_t,
-                                                num_push_constants);
-      } else {
-         stage_prog_data->param = NULL;
-      }
-      assert(stage_prog_data->nr_pull_params == 0);
-      assert(stage_prog_data->pull_param == NULL);
-      if (num_pull_constants > 0) {
-         stage_prog_data->nr_pull_params = num_pull_constants;
-         stage_prog_data->pull_param = rzalloc_array(mem_ctx, uint32_t,
-                                                     num_pull_constants);
-      }
-
-      /* Up until now, the param[] array has been indexed by reg + offset
-       * of UNIFORM registers.  Move pull constants into pull_param[] and
-       * condense param[] to only contain the uniforms we chose to push.
-       *
-       * NOTE: Because we are condensing the params[] array, we know that
-       * push_constant_loc[i] <= i and we can do it in one smooth loop without
-       * having to make a copy.
-       */
-      for (unsigned int i = 0; i < uniforms; i++) {
-         uint32_t value = param[i];
-         if (pull_constant_loc[i] != -1) {
-            stage_prog_data->pull_param[pull_constant_loc[i]] = value;
-         } else if (push_constant_loc[i] != -1) {
-            stage_prog_data->param[push_constant_loc[i]] = value;
-         }
-      }
-      ralloc_free(param);
-   } else {
-      /* If we don't want to compact anything, just set up dummy push/pull
-       * arrays.  All the rest of the compiler cares about are these arrays.
-       */
-      push_constant_loc = ralloc_array(mem_ctx, int, uniforms);
-      pull_constant_loc = ralloc_array(mem_ctx, int, uniforms);
-
-      for (unsigned u = 0; u < uniforms; u++)
-         push_constant_loc[u] = u;
-
-      memset(pull_constant_loc, -1, uniforms * sizeof(*pull_constant_loc));
-   }
+   push_constant_loc = ralloc_array(mem_ctx, int, uniforms);
+   for (unsigned u = 0; u < uniforms; u++)
+      push_constant_loc[u] = u;
 
    /* Now that we know how many regular uniforms we'll push, reduce the
     * UBO push ranges so we don't exceed the 3DSTATE_CONSTANT limits.
@@ -2733,33 +2443,22 @@ fs_visitor::get_pull_locs(const fs_reg &src,
 {
    assert(src.file == UNIFORM);
 
-   if (src.nr >= UBO_START) {
-      const struct brw_ubo_range *range =
-         &prog_data->ubo_ranges[src.nr - UBO_START];
-
-      /* If this access is in our (reduced) range, use the push data. */
-      if (src.offset / 32 < range->length)
-         return false;
+   if (src.nr < UBO_START)
+      return false;
 
-      *out_surf_index = prog_data->binding_table.ubo_start + range->block;
-      *out_pull_index = (32 * range->start + src.offset) / 4;
+   const struct brw_ubo_range *range =
+      &prog_data->ubo_ranges[src.nr - UBO_START];
 
-      prog_data->has_ubo_pull = true;
-      return true;
-   }
-
-   const unsigned location = src.nr + src.offset / 4;
+   /* If this access is in our (reduced) range, use the push data. */
+   if (src.offset / 32 < range->length)
+      return false;
 
-   if (location < uniforms && pull_constant_loc[location] != -1) {
-      /* A regular uniform push constant */
-      *out_surf_index = stage_prog_data->binding_table.pull_constants_start;
-      *out_pull_index = pull_constant_loc[location];
+   *out_surf_index = prog_data->binding_table.ubo_start + range->block;
+   *out_pull_index = (32 * range->start + src.offset) / 4;
 
-      prog_data->has_ubo_pull = true;
-      return true;
-   }
+   prog_data->has_ubo_pull = true;
 
-   return false;
+   return true;
 }
 
 /**
diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h
index 160a1e4d952..1faee57f23a 100644
--- a/src/intel/compiler/brw_fs.h
+++ b/src/intel/compiler/brw_fs.h
@@ -369,12 +369,6 @@ public:
    /** Byte-offset for the next available spot in the scratch space buffer. */
    unsigned last_scratch;
 
-   /**
-    * Array mapping UNIFORM register numbers to the pull parameter index,
-    * or -1 if this uniform register isn't being uploaded as a pull constant.
-    */
-   int *pull_constant_loc;
-
    /**
     * Array mapping UNIFORM register numbers to the push parameter index,
     * or -1 if this uniform register isn't being uploaded as a push constant.
diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp
index 2d3b81a2363..671dfa8e2ce 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -103,10 +103,8 @@ void
 fs_visitor::nir_setup_uniforms()
 {
    /* Only the first compile gets to set up uniforms. */
-   if (push_constant_loc) {
-      assert(pull_constant_loc);
+   if (push_constant_loc)
       return;
-   }
 
    uniforms = nir->num_uniforms / 4;
 
diff --git a/src/intel/compiler/brw_fs_visitor.cpp b/src/intel/compiler/brw_fs_visitor.cpp
index 4de37671b90..7f8d69a7d0a 100644
--- a/src/intel/compiler/brw_fs_visitor.cpp
+++ b/src/intel/compiler/brw_fs_visitor.cpp
@@ -126,7 +126,6 @@ fs_visitor::emit_dummy_fs()
 
    /* We don't have any uniforms. */
    stage_prog_data->nr_params = 0;
-   stage_prog_data->nr_pull_params = 0;
    stage_prog_data->curb_read_length = 0;
    stage_prog_data->dispatch_grf_start_reg = 2;
    wm_prog_data->dispatch_grf_start_reg_16 = 2;
@@ -1192,7 +1191,6 @@ fs_visitor::init()
 
    this->uniforms = 0;
    this->last_scratch = 0;
-   this->pull_constant_loc = NULL;
    this->push_constant_loc = NULL;
 
    this->shader_stats.scheduler_mode = NULL;
diff --git a/src/intel/compiler/brw_vec4.cpp b/src/intel/compiler/brw_vec4.cpp
index 72165932c55..3d70f920500 100644
--- a/src/intel/compiler/brw_vec4.cpp
+++ b/src/intel/compiler/brw_vec4.cpp
@@ -604,194 +604,6 @@ vec4_visitor::split_uniform_registers()
    }
 }
 
-/* This function returns the register number where we placed the uniform */
-static int
-set_push_constant_loc(const int nr_uniforms, int *new_uniform_count,
-                      const int src, const int size, const int channel_size,
-                      int *new_loc, int *new_chan,
-                      int *new_chans_used)
-{
-   int dst;
-   /* Find the lowest place we can slot this uniform in. */
-   for (dst = 0; dst < nr_uniforms; dst++) {
-      if (ALIGN(new_chans_used[dst], channel_size) + size <= 4)
-         break;
-   }
-
-   assert(dst < nr_uniforms);
-
-   new_loc[src] = dst;
-   new_chan[src] = ALIGN(new_chans_used[dst], channel_size);
-   new_chans_used[dst] = ALIGN(new_chans_used[dst], channel_size) + size;
-
-   *new_uniform_count = MAX2(*new_uniform_count, dst + 1);
-   return dst;
-}
-
-void
-vec4_visitor::pack_uniform_registers()
-{
-   if (!compiler->compact_params)
-      return;
-
-   uint8_t chans_used[this->uniforms];
-   int new_loc[this->uniforms];
-   int new_chan[this->uniforms];
-   bool is_aligned_to_dvec4[this->uniforms];
-   int new_chans_used[this->uniforms];
-   int channel_sizes[this->uniforms];
-
-   memset(chans_used, 0, sizeof(chans_used));
-   memset(new_loc, 0, sizeof(new_loc));
-   memset(new_chan, 0, sizeof(new_chan));
-   memset(new_chans_used, 0, sizeof(new_chans_used));
-   memset(is_aligned_to_dvec4, 0, sizeof(is_aligned_to_dvec4));
-   memset(channel_sizes, 0, sizeof(channel_sizes));
-
-   /* Find which uniform vectors are actually used by the program.  We
-    * expect unused vector elements when we've moved array access out
-    * to pull constants, and from some GLSL code generators like wine.
-    */
-   foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
-      unsigned readmask;
-      switch (inst->opcode) {
-      case VEC4_OPCODE_PACK_BYTES:
-      case BRW_OPCODE_DP4:
-      case BRW_OPCODE_DPH:
-         readmask = 0xf;
-         break;
-      case BRW_OPCODE_DP3:
-         readmask = 0x7;
-         break;
-      case BRW_OPCODE_DP2:
-         readmask = 0x3;
-         break;
-      default:
-         readmask = inst->dst.writemask;
-         break;
-      }
-
-      for (int i = 0 ; i < 3; i++) {
-         if (inst->src[i].file != UNIFORM || inst->src[i].nr >= UBO_START)
-            continue;
-
-         assert(type_sz(inst->src[i].type) % 4 == 0);
-         int channel_size = type_sz(inst->src[i].type) / 4;
-
-         int reg = inst->src[i].nr;
-         for (int c = 0; c < 4; c++) {
-            if (!(readmask & (1 << c)))
-               continue;
-
-            unsigned channel = BRW_GET_SWZ(inst->src[i].swizzle, c) + 1;
-            unsigned used = MAX2(chans_used[reg], channel * channel_size);
-            if (used <= 4) {
-               chans_used[reg] = used;
-               channel_sizes[reg] = MAX2(channel_sizes[reg], channel_size);
-            } else {
-               is_aligned_to_dvec4[reg] = true;
-               is_aligned_to_dvec4[reg + 1] = true;
-               chans_used[reg + 1] = used - 4;
-               channel_sizes[reg + 1] = MAX2(channel_sizes[reg + 1], channel_size);
-            }
-         }
-      }
-
-      if (inst->opcode == SHADER_OPCODE_MOV_INDIRECT &&
-          inst->src[0].file == UNIFORM) {
-         assert(inst->src[2].file == BRW_IMMEDIATE_VALUE);
-         assert(inst->src[0].subnr == 0);
-
-         unsigned bytes_read = inst->src[2].ud;
-         assert(bytes_read % 4 == 0);
-         unsigned vec4s_read = DIV_ROUND_UP(bytes_read, 16);
-
-         /* We just mark every register touched by a MOV_INDIRECT as being
-          * fully used.  This ensures that it doesn't broken up piecewise by
-          * the next part of our packing algorithm.
-          */
-         int reg = inst->src[0].nr;
-         int channel_size = type_sz(inst->src[0].type) / 4;
-         for (unsigned i = 0; i < vec4s_read; i++) {
-            chans_used[reg + i] = 4;
-            channel_sizes[reg + i] = MAX2(channel_sizes[reg + i], channel_size);
-         }
-      }
-   }
-
-   int new_uniform_count = 0;
-
-   /* As the uniforms are going to be reordered, take the data from a temporary
-    * copy of the original param[].
-    */
-   uint32_t *param = ralloc_array(NULL, uint32_t, stage_prog_data->nr_params);
-   memcpy(param, stage_prog_data->param,
-          sizeof(uint32_t) * stage_prog_data->nr_params);
-
-   /* Now, figure out a packing of the live uniform vectors into our
-    * push constants. Start with dvec{3,4} because they are aligned to
-    * dvec4 size (2 vec4).
-    */
-   for (int src = 0; src < uniforms; src++) {
-      int size = chans_used[src];
-
-      if (size == 0 || !is_aligned_to_dvec4[src])
-         continue;
-
-      /* dvec3 are aligned to dvec4 size, apply the alignment of the size
-       * to 4 to avoid moving last component of a dvec3 to the available
-       * location at the end of a previous dvec3. These available locations
-       * could be filled by smaller variables in next loop.
-       */
-      size = ALIGN(size, 4);
-      int dst = set_push_constant_loc(uniforms, &new_uniform_count,
-                                      src, size, channel_sizes[src],
-                                      new_loc, new_chan,
-                                      new_chans_used);
-      /* Move the references to the data */
-      for (int j = 0; j < size; j++) {
-         stage_prog_data->param[dst * 4 + new_chan[src] + j] =
-            param[src * 4 + j];
-      }
-   }
-
-   /* Continue with the rest of data, which is aligned to vec4. */
-   for (int src = 0; src < uniforms; src++) {
-      int size = chans_used[src];
-
-      if (size == 0 || is_aligned_to_dvec4[src])
-         continue;
-
-      int dst = set_push_constant_loc(uniforms, &new_uniform_count,
-                                      src, size, channel_sizes[src],
-                                      new_loc, new_chan,
-                                      new_chans_used);
-      /* Move the references to the data */
-      for (int j = 0; j < size; j++) {
-         stage_prog_data->param[dst * 4 + new_chan[src] + j] =
-            param[src * 4 + j];
-      }
-   }
-
-   ralloc_free(param);
-   this->uniforms = new_uniform_count;
-   stage_prog_data->nr_params = new_uniform_count * 4;
-
-   /* Now, update the instructions for our repacked uniforms. */
-   foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
-      for (int i = 0 ; i < 3; i++) {
-         int src = inst->src[i].nr;
-
-         if (inst->src[i].file != UNIFORM || inst->src[i].nr >= UBO_START)
-            continue;
-
-         int chan = new_chan[src] / channel_sizes[src];
-         inst->src[i].nr = new_loc[src];
-         inst->src[i].swizzle += BRW_SWIZZLE4(chan, chan, chan, chan);
-      }
-   }
-}
-
 /**
  * Does algebraic optimizations (0 * a = 0, 1 * a = a, a + 0 = a).
  *
@@ -910,97 +722,6 @@ vec4_visitor::opt_algebraic()
    return progress;
 }
 
-/**
- * Only a limited number of hardware registers may be used for push
- * constants, so this turns access to the overflowed constants into
- * pull constants.
- */
-void
-vec4_visitor::move_push_constants_to_pull_constants()
-{
-   int pull_constant_loc[this->uniforms];
-
-   const int max_uniform_components = push_length * 8;
-
-   if (this->uniforms * 4 <= max_uniform_components)
-      return;
-
-   assert(compiler->supports_pull_constants);
-   assert(compiler->compact_params);
-
-   /* If we got here, we also can't have any push ranges */
-   for (unsigned i = 0; i < 4; i++)
-      assert(prog_data->base.ubo_ranges[i].length == 0);
-
-   /* Make some sort of choice as to which uniforms get sent to pull
-    * constants.  We could potentially do something clever here like
-    * look for the most infrequently used uniform vec4s, but leave
-    * that for later.
-    */
-   for (int i = 0; i < this->uniforms * 4; i += 4) {
-      pull_constant_loc[i / 4] = -1;
-
-      if (i >= max_uniform_components) {
-         uint32_t *values = &stage_prog_data->param[i];
-
-         /* Try to find an existing copy of this uniform in the pull
-          * constants if it was part of an array access already.
-          */
-         for (unsigned int j = 0; j < stage_prog_data->nr_pull_params; j += 4) {
-            int matches;
-
-            for (matches = 0; matches < 4; matches++) {
-               if (stage_prog_data->pull_param[j + matches] != values[matches])
-                  break;
-            }
-
-            if (matches == 4) {
-               pull_constant_loc[i / 4] = j / 4;
-               break;
-            }
-         }
-
-         if (pull_constant_loc[i / 4] == -1) {
-            assert(stage_prog_data->nr_pull_params % 4 == 0);
-            pull_constant_loc[i / 4] = stage_prog_data->nr_pull_params / 4;
-
-            for (int j = 0; j < 4; j++) {
-               stage_prog_data->pull_param[stage_prog_data->nr_pull_params++] =
-                  values[j];
-            }
-         }
-      }
-   }
-
-   /* Now actually rewrite usage of the things we've moved to pull
-    * constants.
-    */
-   foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) {
-      for (int i = 0 ; i < 3; i++) {
-         if (inst->src[i].file != UNIFORM || inst->src[i].nr >= UBO_START ||
-             pull_constant_loc[inst->src[i].nr] == -1)
-            continue;
-
-         int uniform = inst->src[i].nr;
-
-         const glsl_type *temp_type = type_sz(inst->src[i].type) == 8 ?
-            glsl_type::dvec4_type : glsl_type::vec4_type;
-         dst_reg temp = dst_reg(this, temp_type);
-
-         emit_pull_constant_load(block, inst, temp, inst->src[i],
-                                 pull_constant_loc[uniform], src_reg());
-
-         inst->src[i].file = temp.file;
-         inst->src[i].nr = temp.nr;
-         inst->src[i].offset %= 16;
-         inst->src[i].reladdr = NULL;
-      }
-   }
-
-   /* Repack push constants to remove the now-unused ones. */
-   pack_uniform_registers();
-}
-
 /* Conditions for which we want to avoid setting the dependency control bits */
 bool
 vec4_visitor::is_dep_ctrl_unsafe(const vec4_instruction *inst)
@@ -1842,15 +1563,13 @@ vec4_visitor::setup_uniforms(int reg)
    /* It's possible that uniform compaction will shrink further than expected
     * so we re-compute the layout and set up our UBO push starts.
     */
-   const unsigned old_push_length = push_length;
+   ASSERTED const unsigned old_push_length = push_length;
    push_length = DIV_ROUND_UP(prog_data->base.nr_params, 8);
    for (unsigned i = 0; i < 4; i++) {
       ubo_push_start[i] = push_length;
       push_length += stage_prog_data->ubo_ranges[i].length;
    }
-   assert(push_length <= old_push_length);
-   if (push_length < old_push_length)
-      assert(compiler->compact_params);
+   assert(push_length == old_push_length);
 
    /* The pre-gfx6 VS requires that some push constants get loaded no
     * matter what, or the GPU would hang.
@@ -2738,10 +2457,8 @@ vec4_visitor::run()
     * often do repeated subexpressions for those.
     */
    move_grf_array_access_to_scratch();
-   move_uniform_array_access_to_pull_constants();
+   split_uniform_registers();
 
-   pack_uniform_registers();
-   move_push_constants_to_pull_constants();
    split_virtual_grfs();
 
 #define OPT(pass, args...) ({                                          \
diff --git a/src/intel/compiler/brw_vec4.h b/src/intel/compiler/brw_vec4.h
index f27e3d3c4ad..2529d69c974 100644
--- a/src/intel/compiler/brw_vec4.h
+++ b/src/intel/compiler/brw_vec4.h
@@ -138,9 +138,7 @@ public:
    void spill_reg(unsigned spill_reg);
    void move_grf_array_access_to_scratch();
    void move_uniform_array_access_to_pull_constants();
-   void move_push_constants_to_pull_constants();
    void split_uniform_registers();
-   void pack_uniform_registers();
    void setup_push_ranges();
    virtual void invalidate_analysis(brw::analysis_dependency_class c);
    void split_virtual_grfs();
@@ -292,11 +290,6 @@ public:
 			  int base_offset);
    void emit_scratch_write(bblock_t *block, vec4_instruction *inst,
 			   int base_offset);
-   void emit_pull_constant_load(bblock_t *block, vec4_instruction *inst,
-				dst_reg dst,
-				src_reg orig_src,
-                                int base_offset,
-                                src_reg indirect);
    void emit_pull_constant_load_reg(dst_reg dst,
                                     src_reg surf_index,
                                     src_reg offset,
diff --git a/src/intel/compiler/brw_vec4_gs_visitor.cpp b/src/intel/compiler/brw_vec4_gs_visitor.cpp
index 1b55e9234e2..aa396eaab70 100644
--- a/src/intel/compiler/brw_vec4_gs_visitor.cpp
+++ b/src/intel/compiler/brw_vec4_gs_visitor.cpp
@@ -889,7 +889,6 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
             memcpy(prog_data->base.base.param, param,
                    sizeof(uint32_t) * param_count);
             prog_data->base.base.nr_params = param_count;
-            prog_data->base.base.nr_pull_params = 0;
             ralloc_free(param);
          }
       }
diff --git a/src/intel/compiler/brw_vec4_visitor.cpp b/src/intel/compiler/brw_vec4_visitor.cpp
index 3ad8868ac5f..8bfb7ee872a 100644
--- a/src/intel/compiler/brw_vec4_visitor.cpp
+++ b/src/intel/compiler/brw_vec4_visitor.cpp
@@ -1592,146 +1592,6 @@ vec4_visitor::move_grf_array_access_to_scratch()
    }
 }
 
-/**
- * Emits an instruction before @inst to load the value named by @orig_src
- * from the pull constant buffer (surface) at @base_offset to @temp.
- */
-void
-vec4_visitor::emit_pull_constant_load(bblock_t *block, vec4_instruction *inst,
-                                      dst_reg temp, src_reg orig_src,
-                                      int base_offset, src_reg indirect)
-{
-   assert(orig_src.offset % 16 == 0);
-   const unsigned index = prog_data->base.binding_table.pull_constants_start;
-
-   /* For 64bit loads we need to emit two 32-bit load messages and we also
-    * we need to shuffle the 32-bit data result into proper 64-bit data. To do
-    * that we emit the 32-bit loads into a temporary and we shuffle the result
-    * into the original destination.
-    */
-   dst_reg orig_temp = temp;
-   bool is_64bit = type_sz(orig_src.type) == 8;
-   if (is_64bit) {
-      assert(type_sz(temp.type) == 8);
-      dst_reg temp_df = dst_reg(this, glsl_type::dvec4_type);
-      temp = retype(temp_df, BRW_REGISTER_TYPE_F);
-   }
-
-   src_reg src = orig_src;
-   for (int i = 0; i < (is_64bit ? 2 : 1); i++) {
-      int reg_offset = base_offset + src.offset / 16;
-
-      src_reg offset;
-      if (indirect.file != BAD_FILE) {
-         offset = src_reg(this, glsl_type::uint_type);
-         emit_before(block, inst, ADD(dst_reg(offset), indirect,
-                                      brw_imm_ud(reg_offset * 16)));
-      } else {
-         offset = brw_imm_d(reg_offset * 16);
-      }
-
-      emit_pull_constant_load_reg(byte_offset(temp, i * REG_SIZE),
-                                  brw_imm_ud(index),
-                                  offset,
-                                  block, inst);
-
-      src = byte_offset(src, 16);
-   }
-
-   if (is_64bit) {
-      temp = retype(temp, BRW_REGISTER_TYPE_DF);
-      shuffle_64bit_data(orig_temp, src_reg(temp), false, false, block, inst);
-   }
-}
-
-/**
- * Implements array access of uniforms by inserting a
- * PULL_CONSTANT_LOAD instruction.
- *
- * Unlike temporary GRF array access (where we don't support it due to
- * the difficulty of doing relative addressing on instruction
- * destinations), we could potentially do array access of uniforms
- * that were loaded in GRF space as push constants.  In real-world
- * usage we've seen, though, the arrays being used are always larger
- * than we could load as push constants, so just always move all
- * uniform array access out to a pull constant buffer.
- */
-void
-vec4_visitor::move_uniform_array_access_to_pull_constants()
-{
-   /* The vulkan dirver doesn't support pull constants other than UBOs so
-    * everything has to be pushed regardless.
-    */
-   if (!compiler->supports_pull_constants) {
-      split_uniform_registers();
-      return;
-   }
-
-   /* Allocate the pull_params array */
-   assert(stage_prog_data->nr_pull_params == 0);
-   stage_prog_data->pull_param = ralloc_array(mem_ctx, uint32_t,
-                                              this->uniforms * 4);
-
-   int pull_constant_loc[this->uniforms];
-   memset(pull_constant_loc, -1, sizeof(pull_constant_loc));
-
-   /* First, walk through the instructions and determine which things need to
-    * be pulled.  We mark something as needing to be pulled by setting
-    * pull_constant_loc to 0.
-    */
-   foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
-      /* We only care about MOV_INDIRECT of a uniform */
-      if (inst->opcode != SHADER_OPCODE_MOV_INDIRECT ||
-          inst->src[0].file != UNIFORM)
-         continue;
-
-      int uniform_nr = inst->src[0].nr + inst->src[0].offset / 16;
-
-      for (unsigned j = 0; j < DIV_ROUND_UP(inst->src[2].ud, 16); j++)
-         pull_constant_loc[uniform_nr + j] = 0;
-   }
-
-   /* Next, we walk the list of uniforms and assign real pull constant
-    * locations and set their corresponding entries in pull_param.
-    */
-   for (int j = 0; j < this->uniforms; j++) {
-      if (pull_constant_loc[j] < 0)
-         continue;
-
-      pull_constant_loc[j] = stage_prog_data->nr_pull_params / 4;
-
-      for (int i = 0; i < 4; i++) {
-         stage_prog_data->pull_param[stage_prog_data->nr_pull_params++]
-            = stage_prog_data->param[j * 4 + i];
-      }
-   }
-
-   /* Finally, we can walk through the instructions and lower MOV_INDIRECT
-    * instructions to actual uniform pulls.
-    */
-   foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) {
-      /* We only care about MOV_INDIRECT of a uniform */
-      if (inst->opcode != SHADER_OPCODE_MOV_INDIRECT ||
-          inst->src[0].file != UNIFORM)
-         continue;
-
-      int uniform_nr = inst->src[0].nr + inst->src[0].offset / 16;
-
-      assert(inst->src[0].swizzle == BRW_SWIZZLE_NOOP);
-
-      emit_pull_constant_load(block, inst, inst->dst, inst->src[0],
-                              pull_constant_loc[uniform_nr], inst->src[1]);
-      inst->remove(block);
-   }
-
-   /* Now there are no accesses of the UNIFORM file with a reladdr, so
-    * no need to track them as larger-than-vec4 objects.  This will be
-    * relied on in cutting out unused uniform vectors from push
-    * constants.
-    */
-   split_uniform_registers();
-}
-
 void
 vec4_visitor::resolve_ud_negate(src_reg *reg)
 {
diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
index bcaddf88a99..939bca12986 100644
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -974,11 +974,9 @@ anv_physical_device_try_create(struct anv_instance *instance,
    }
    device->compiler->shader_debug_log = compiler_debug_log;
    device->compiler->shader_perf_log = compiler_perf_log;
-   device->compiler->supports_pull_constants = false;
    device->compiler->constant_buffer_0_is_relative =
       device->info.ver < 8 || !device->has_context_isolation;
    device->compiler->supports_shader_constants = true;
-   device->compiler->compact_params = false;
    device->compiler->indirect_ubos_use_sampler = device->info.ver < 12;
 
    isl_device_init(&device->isl_dev, &device->info);



More information about the mesa-commit mailing list