Mesa (master): v3d: Upload all of UBO[0] if any indirect load occurs.

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Thu Mar 21 21:29:45 UTC 2019


Module: Mesa
Branch: master
Commit: 16f2770eb40e7e11d149b4551de27d8d663f4e22
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=16f2770eb40e7e11d149b4551de27d8d663f4e22

Author: Eric Anholt <eric at anholt.net>
Date:   Tue Mar 19 09:58:14 2019 -0700

v3d: Upload all of UBO[0] if any indirect load occurs.

The idea was that we could skip uploading the constant-indexed uniform
data and just upload the uniforms that are variably-indexed.  However,
since the VS bin and render shaders may have a different set of uniforms
used, this meant that we had to upload the UBO for each of them.  The
first case is generally a fairly small impact (usually the uniform array
is the most space, other than a couple of FSes in shader-db), while the
second is a larger impact: 3DMMES2 was uploading 38k/frame of uniforms
instead of 18k.

Given that the optimization is of dubious value, has a big downside, and
is quite a bit of code, just drop it.  No change in shader-db.  No change
on 3DMMES2 (n=15).

---

 src/broadcom/compiler/nir_to_vir.c     | 65 +---------------------------------
 src/broadcom/compiler/v3d_compiler.h   | 29 ---------------
 src/broadcom/compiler/vir.c            | 36 -------------------
 src/gallium/drivers/v3d/v3d_uniforms.c | 57 ++++++++++-------------------
 4 files changed, 20 insertions(+), 167 deletions(-)

diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c
index 846a2a704af..2c411b86ed1 100644
--- a/src/broadcom/compiler/nir_to_vir.c
+++ b/src/broadcom/compiler/nir_to_vir.c
@@ -231,31 +231,7 @@ ntq_emit_tmu_general(struct v3d_compile *c, nir_intrinsic_instr *instr,
 
         struct qreg offset;
         if (instr->intrinsic == nir_intrinsic_load_uniform) {
-                /* Find what variable in the default uniform block this
-                 * uniform load is coming from.
-                 */
-                uint32_t base = nir_intrinsic_base(instr);
-                int i;
-                struct v3d_ubo_range *range = NULL;
-                for (i = 0; i < c->num_ubo_ranges; i++) {
-                        range = &c->ubo_ranges[i];
-                        if (base >= range->src_offset &&
-                            base < range->src_offset + range->size) {
-                                break;
-                        }
-                }
-                /* The driver-location-based offset always has to be within a
-                 * declared uniform range.
-                 */
-                assert(i != c->num_ubo_ranges);
-                if (!c->ubo_range_used[i]) {
-                        c->ubo_range_used[i] = true;
-                        range->dst_offset = c->next_ubo_dst_offset;
-                        c->next_ubo_dst_offset += range->size;
-                }
-
-                const_offset += base - range->src_offset + range->dst_offset;
-
+                const_offset += nir_intrinsic_base(instr);
                 offset = vir_uniform(c, QUNIFORM_UBO_ADDR,
                                      v3d_unit_data_create(0, const_offset));
                 const_offset = 0;
@@ -668,27 +644,6 @@ add_output(struct v3d_compile *c,
                 v3d_slot_from_slot_and_component(slot, swizzle);
 }
 
-static void
-declare_uniform_range(struct v3d_compile *c, uint32_t start, uint32_t size)
-{
-        unsigned array_id = c->num_ubo_ranges++;
-        if (array_id >= c->ubo_ranges_array_size) {
-                c->ubo_ranges_array_size = MAX2(c->ubo_ranges_array_size * 2,
-                                                array_id + 1);
-                c->ubo_ranges = reralloc(c, c->ubo_ranges,
-                                         struct v3d_ubo_range,
-                                         c->ubo_ranges_array_size);
-                c->ubo_range_used = reralloc(c, c->ubo_range_used,
-                                             bool,
-                                             c->ubo_ranges_array_size);
-        }
-
-        c->ubo_ranges[array_id].dst_offset = 0;
-        c->ubo_ranges[array_id].src_offset = start;
-        c->ubo_ranges[array_id].size = size;
-        c->ubo_range_used[array_id] = false;
-}
-
 /**
  * If compare_instr is a valid comparison instruction, emits the
  * compare_instr's comparison and returns the sel_instr's return value based
@@ -1536,23 +1491,6 @@ ntq_setup_outputs(struct v3d_compile *c)
         }
 }
 
-static void
-ntq_setup_uniforms(struct v3d_compile *c)
-{
-        nir_foreach_variable(var, &c->s->uniforms) {
-                uint32_t vec4_count = glsl_count_attribute_slots(var->type,
-                                                                 false);
-                unsigned vec4_size = 4 * sizeof(float);
-
-                if (var->data.mode != nir_var_uniform)
-                        continue;
-
-                declare_uniform_range(c, var->data.driver_location * vec4_size,
-                                      vec4_count * vec4_size);
-
-        }
-}
-
 /**
  * Sets up the mapping from nir_register to struct qreg *.
  *
@@ -2361,7 +2299,6 @@ nir_to_vir(struct v3d_compile *c)
                 ntq_setup_vpm_inputs(c);
 
         ntq_setup_outputs(c);
-        ntq_setup_uniforms(c);
         ntq_setup_registers(c, &c->s->registers);
 
         /* Find the main function and emit the body. */
diff --git a/src/broadcom/compiler/v3d_compiler.h b/src/broadcom/compiler/v3d_compiler.h
index d8d916fb14b..155e112d8cd 100644
--- a/src/broadcom/compiler/v3d_compiler.h
+++ b/src/broadcom/compiler/v3d_compiler.h
@@ -318,25 +318,6 @@ static inline uint8_t v3d_slot_get_component(struct v3d_varying_slot slot)
         return slot.slot_and_component & 3;
 }
 
-struct v3d_ubo_range {
-        /**
-         * offset in bytes from the start of the ubo where this range is
-         * uploaded.
-         *
-         * Only set once used is set.
-         */
-        uint32_t dst_offset;
-
-        /**
-         * offset in bytes from the start of the gallium uniforms where the
-         * data comes from.
-         */
-        uint32_t src_offset;
-
-        /** size in bytes of this ubo range */
-        uint32_t size;
-};
-
 struct v3d_key {
         void *shader_state;
         struct {
@@ -533,13 +514,6 @@ struct v3d_compile {
         bool uses_center_w;
         bool writes_z;
 
-        struct v3d_ubo_range *ubo_ranges;
-        bool *ubo_range_used;
-        uint32_t ubo_ranges_array_size;
-        /** Number of uniform areas tracked in ubo_ranges. */
-        uint32_t num_ubo_ranges;
-        uint32_t next_ubo_dst_offset;
-
         /* State for whether we're executing on each channel currently.  0 if
          * yes, otherwise a block number + 1 that the channel jumped to.
          */
@@ -674,9 +648,6 @@ struct v3d_uniform_list {
 struct v3d_prog_data {
         struct v3d_uniform_list uniforms;
 
-        struct v3d_ubo_range *ubo_ranges;
-        uint32_t num_ubo_ranges;
-        uint32_t ubo_size;
         uint32_t spill_size;
 
         uint8_t threads;
diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c
index bb04c82d777..8963dd1e350 100644
--- a/src/broadcom/compiler/vir.c
+++ b/src/broadcom/compiler/vir.c
@@ -582,41 +582,6 @@ v3d_set_prog_data_uniforms(struct v3d_compile *c,
                count * sizeof(*ulist->contents));
 }
 
-/* Copy the compiler UBO range state to the compiled shader, dropping out
- * arrays that were never referenced by an indirect load.
- *
- * (Note that QIR dead code elimination of an array access still leaves that
- * array alive, though)
- */
-static void
-v3d_set_prog_data_ubo(struct v3d_compile *c,
-                      struct v3d_prog_data *prog_data)
-{
-        if (!c->num_ubo_ranges)
-                return;
-
-        prog_data->num_ubo_ranges = 0;
-        prog_data->ubo_ranges = ralloc_array(prog_data, struct v3d_ubo_range,
-                                             c->num_ubo_ranges);
-        for (int i = 0; i < c->num_ubo_ranges; i++) {
-                if (!c->ubo_range_used[i])
-                        continue;
-
-                struct v3d_ubo_range *range = &c->ubo_ranges[i];
-                prog_data->ubo_ranges[prog_data->num_ubo_ranges++] = *range;
-                prog_data->ubo_size += range->size;
-        }
-
-        if (prog_data->ubo_size) {
-                if (V3D_DEBUG & V3D_DEBUG_SHADERDB) {
-                        fprintf(stderr, "SHADER-DB: %s prog %d/%d: %d UBO uniforms\n",
-                                vir_get_stage_name(c),
-                                c->program_id, c->variant_id,
-                                prog_data->ubo_size / 4);
-                }
-        }
-}
-
 static void
 v3d_vs_set_prog_data(struct v3d_compile *c,
                      struct v3d_vs_prog_data *prog_data)
@@ -713,7 +678,6 @@ v3d_set_prog_data(struct v3d_compile *c,
         prog_data->spill_size = c->spill_size;
 
         v3d_set_prog_data_uniforms(c, prog_data);
-        v3d_set_prog_data_ubo(c, prog_data);
 
         if (c->s->info.stage == MESA_SHADER_VERTEX) {
                 v3d_vs_set_prog_data(c, (struct v3d_vs_prog_data *)prog_data);
diff --git a/src/gallium/drivers/v3d/v3d_uniforms.c b/src/gallium/drivers/v3d/v3d_uniforms.c
index b48f6526d61..a5532bdf2b4 100644
--- a/src/gallium/drivers/v3d/v3d_uniforms.c
+++ b/src/gallium/drivers/v3d/v3d_uniforms.c
@@ -22,6 +22,7 @@
  */
 
 #include "util/u_pack_color.h"
+#include "util/u_upload_mgr.h"
 #include "util/format_srgb.h"
 
 #include "v3d_context.h"
@@ -95,28 +96,6 @@ get_image_size(struct v3d_shaderimg_stateobj *shaderimg,
         }
 }
 
-static struct v3d_bo *
-v3d_upload_ubo(struct v3d_context *v3d,
-               struct v3d_compiled_shader *shader,
-               const uint32_t *gallium_uniforms)
-{
-        if (!shader->prog_data.base->ubo_size)
-                return NULL;
-
-        struct v3d_bo *ubo = v3d_bo_alloc(v3d->screen,
-                                          shader->prog_data.base->ubo_size,
-                                          "ubo");
-        void *data = v3d_bo_map(ubo);
-        for (uint32_t i = 0; i < shader->prog_data.base->num_ubo_ranges; i++) {
-                memcpy(data + shader->prog_data.base->ubo_ranges[i].dst_offset,
-                       ((const void *)gallium_uniforms +
-                        shader->prog_data.base->ubo_ranges[i].src_offset),
-                       shader->prog_data.base->ubo_ranges[i].size);
-        }
-
-        return ubo;
-}
-
 /**
  *  Writes the V3D 3.x P0 (CFG_MODE=1) texture parameter.
  *
@@ -235,7 +214,6 @@ v3d_write_uniforms(struct v3d_context *v3d, struct v3d_compiled_shader *shader,
         struct v3d_uniform_list *uinfo = &shader->prog_data.base->uniforms;
         struct v3d_job *job = v3d->job;
         const uint32_t *gallium_uniforms = cb->cb[0].user_buffer;
-        struct v3d_bo *ubo = v3d_upload_ubo(v3d, shader, gallium_uniforms);
 
         /* We always need to return some space for uniforms, because the HW
          * will be prefetching, even if we don't read any in the program.
@@ -329,21 +307,26 @@ v3d_write_uniforms(struct v3d_context *v3d, struct v3d_compiled_shader *shader,
                                      v3d->zsa->base.alpha.ref_value);
                         break;
 
-                case QUNIFORM_UBO_ADDR:
-                        if (data == 0) {
-                                cl_aligned_reloc(&job->indirect, &uniforms,
-                                                 ubo, 0);
-                        } else {
-                                int ubo_index = v3d_unit_data_get_unit(data);
-                                struct v3d_resource *rsc =
-                                        v3d_resource(cb->cb[ubo_index].buffer);
-
-                                cl_aligned_reloc(&job->indirect, &uniforms,
-                                                 rsc->bo,
-                                                 cb->cb[ubo_index].buffer_offset +
-                                                 v3d_unit_data_get_offset(data));
+                case QUNIFORM_UBO_ADDR: {
+                        uint32_t unit = v3d_unit_data_get_unit(data);
+                        /* Constant buffer 0 may be a system memory pointer,
+                         * in which case we want to upload a shadow copy to
+                         * the GPU.
+                        */
+                        if (!cb->cb[unit].buffer) {
+                                u_upload_data(v3d->uploader, 0,
+                                              cb->cb[unit].buffer_size, 16,
+                                              cb->cb[unit].user_buffer,
+                                              &cb->cb[unit].buffer_offset,
+                                              &cb->cb[unit].buffer);
                         }
+
+                        cl_aligned_reloc(&job->indirect, &uniforms,
+                                         v3d_resource(cb->cb[unit].buffer)->bo,
+                                         cb->cb[unit].buffer_offset +
+                                         v3d_unit_data_get_offset(data));
                         break;
+                }
 
                 case QUNIFORM_SSBO_OFFSET: {
                         struct pipe_shader_buffer *sb =
@@ -397,8 +380,6 @@ v3d_write_uniforms(struct v3d_context *v3d, struct v3d_compiled_shader *shader,
 
         cl_end(&job->indirect, uniforms);
 
-        v3d_bo_unreference(&ubo);
-
         return uniform_stream;
 }
 




More information about the mesa-commit mailing list