Mesa (master): vc4: Emit resets of the uniform stream at the starts of blocks.

Eric Anholt anholt at kemper.freedesktop.org
Thu Jul 14 07:01:58 UTC 2016


Module: Mesa
Branch: master
Commit: 9194473dd260fe72042807a97be0072c6f0537da
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=9194473dd260fe72042807a97be0072c6f0537da

Author: Eric Anholt <eric at anholt.net>
Date:   Thu May  5 18:11:04 2016 -0700

vc4: Emit resets of the uniform stream at the starts of blocks.

If a block might be entered from multiple locations, then the uniform
stream will (probably) be at different points, and we need to make sure
that it's pointing where we expect it to be.  The kernel also enforces
that any block reading a uniform resets uniforms, to prevent reading
outside of the uniform stream by using looping.

---

 src/gallium/drivers/vc4/Makefile.sources           |   1 +
 src/gallium/drivers/vc4/vc4_program.c              |   1 +
 src/gallium/drivers/vc4/vc4_qir.c                  |   1 +
 src/gallium/drivers/vc4/vc4_qir.h                  |  12 +++
 .../vc4/vc4_qir_emit_uniform_stream_resets.c       | 101 +++++++++++++++++++++
 src/gallium/drivers/vc4/vc4_qir_schedule.c         |  16 ++++
 src/gallium/drivers/vc4/vc4_qpu_emit.c             |   8 ++
 src/gallium/drivers/vc4/vc4_qpu_schedule.c         |  21 +++++
 src/gallium/drivers/vc4/vc4_uniforms.c             |   6 ++
 9 files changed, 167 insertions(+)

diff --git a/src/gallium/drivers/vc4/Makefile.sources b/src/gallium/drivers/vc4/Makefile.sources
index 76e46f5..76e52ce 100644
--- a/src/gallium/drivers/vc4/Makefile.sources
+++ b/src/gallium/drivers/vc4/Makefile.sources
@@ -31,6 +31,7 @@ C_SOURCES := \
 	vc4_opt_vpm.c \
 	vc4_program.c \
 	vc4_qir.c \
+	vc4_qir_emit_uniform_stream_resets.c \
 	vc4_qir_live_variables.c \
 	vc4_qir_lower_uniforms.c \
 	vc4_qir_schedule.c \
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index 465e052..521f971 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -2114,6 +2114,7 @@ vc4_shader_ntq(struct vc4_context *vc4, enum qstage stage,
         qir_lower_uniforms(c);
 
         qir_schedule_instructions(c);
+        qir_emit_uniform_stream_resets(c);
 
         if (vc4_debug & VC4_DEBUG_QIR) {
                 fprintf(stderr, "%s prog %d/%d QIR:\n",
diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c
index e1d663d..9ff1561 100644
--- a/src/gallium/drivers/vc4/vc4_qir.c
+++ b/src/gallium/drivers/vc4/vc4_qir.c
@@ -85,6 +85,7 @@ static const struct qir_op_info qir_op_info[] = {
         [QOP_LOAD_IMM] = { "load_imm", 0, 1 },
 
         [QOP_BRANCH] = { "branch", 0, 0, true },
+        [QOP_UNIFORMS_RESET] = { "uniforms_reset", 0, 2, true },
 };
 
 static const char *
diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h
index e7ddfaa..88eda22 100644
--- a/src/gallium/drivers/vc4/vc4_qir.h
+++ b/src/gallium/drivers/vc4/vc4_qir.h
@@ -162,6 +162,12 @@ enum qop {
          * that block->successor[1] may be unset if the condition is ALWAYS.
          */
         QOP_BRANCH,
+
+        /* Emits an ADD from src[0] to src[1], where src[0] must be a
+         * QOP_LOAD_IMM result and src[1] is a QUNIFORM_UNIFORMS_ADDRESS,
+         * required by the kernel as part of its branch validation.
+         */
+        QOP_UNIFORMS_RESET,
 };
 
 struct queued_qpu_inst {
@@ -260,6 +266,11 @@ enum quniform_contents {
 
         QUNIFORM_ALPHA_REF,
         QUNIFORM_SAMPLE_MASK,
+
+        /* Placeholder uniform that will be updated by the kernel when used by
+         * an instruction writing to QPU_W_UNIFORMS_ADDRESS.
+         */
+        QUNIFORM_UNIFORMS_ADDRESS,
 };
 
 struct vc4_varying_slot {
@@ -521,6 +532,7 @@ struct qreg qir_uniform(struct vc4_compile *c,
                         uint32_t data);
 void qir_schedule_instructions(struct vc4_compile *c);
 void qir_reorder_uniforms(struct vc4_compile *c);
+void qir_emit_uniform_stream_resets(struct vc4_compile *c);
 
 struct qreg qir_emit_def(struct vc4_compile *c, struct qinst *inst);
 struct qinst *qir_emit_nondef(struct vc4_compile *c, struct qinst *inst);
diff --git a/src/gallium/drivers/vc4/vc4_qir_emit_uniform_stream_resets.c b/src/gallium/drivers/vc4/vc4_qir_emit_uniform_stream_resets.c
new file mode 100644
index 0000000..3fd6358
--- /dev/null
+++ b/src/gallium/drivers/vc4/vc4_qir_emit_uniform_stream_resets.c
@@ -0,0 +1,101 @@
+/*
+ * Copyright © 2014 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/**
+ * @file vc4_qir_emit_uniform_stream_resets.c
+ *
+ * Adds updates to the uniform stream address at the start of each basic block
+ * that uses uniforms.
+ *
+ * This will be done just before the translation to QPU instructions, once we
+ * have performed optimization know how many uniforms are used in each block.
+ */
+
+#include "vc4_qir.h"
+#include "util/hash_table.h"
+#include "util/u_math.h"
+
+static bool
+inst_reads_a_uniform(struct qinst *inst)
+{
+        if (qir_is_tex(inst))
+                return true;
+
+        for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) {
+                if (inst->src[i].file == QFILE_UNIF)
+                        return true;
+        }
+
+        return false;
+}
+
+static bool
+block_reads_any_uniform(struct qblock *block)
+{
+        qir_for_each_inst(inst, block) {
+                if (inst_reads_a_uniform(inst))
+                        return true;
+        }
+
+        return false;
+}
+
+void
+qir_emit_uniform_stream_resets(struct vc4_compile *c)
+{
+        uint32_t uniform_count = 0;
+
+        qir_for_each_block(block, c) {
+                if (block != qir_entry_block(c) &&
+                    (block_reads_any_uniform(block) ||
+                     block == qir_exit_block(c))) {
+                        struct qreg t = qir_get_temp(c);
+                        struct qreg uni_addr =
+                                qir_uniform(c, QUNIFORM_UNIFORMS_ADDRESS, 0);
+
+                        /* Load the offset of the next uniform in the stream
+                         * after the one we're generating here.
+                         */
+                        struct qinst *load_imm =
+                                qir_inst(QOP_LOAD_IMM,
+                                         t,
+                                         qir_reg(QFILE_LOAD_IMM,
+                                                 (uniform_count + 1) * 4),
+                                         c->undef);
+                        struct qinst *add =
+                                qir_inst(QOP_UNIFORMS_RESET, c->undef,
+                                         t, uni_addr);
+
+                        /* Pushes to the top of the block, so in reverse
+                         * order.
+                         */
+                        list_add(&add->link, &block->instructions);
+                        list_add(&load_imm->link, &block->instructions);
+                }
+
+                qir_for_each_inst(inst, block) {
+                        if (inst_reads_a_uniform(inst))
+                                uniform_count++;
+                }
+        }
+}
diff --git a/src/gallium/drivers/vc4/vc4_qir_schedule.c b/src/gallium/drivers/vc4/vc4_qir_schedule.c
index 903c610..69bd0dd 100644
--- a/src/gallium/drivers/vc4/vc4_qir_schedule.c
+++ b/src/gallium/drivers/vc4/vc4_qir_schedule.c
@@ -138,6 +138,7 @@ struct schedule_setup_state {
         struct schedule_node *last_tex_coord;
         struct schedule_node *last_tex_result;
         struct schedule_node *last_tlb;
+        struct schedule_node *last_uniforms_reset;
         enum direction dir;
 
 	/**
@@ -280,6 +281,16 @@ calculate_forward_deps(struct vc4_compile *c, void *mem_ctx,
 
                 calculate_deps(&state, n);
 
+                for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) {
+                        switch (inst->src[i].file) {
+                        case QFILE_UNIF:
+                                add_dep(state.dir, state.last_uniforms_reset, n);
+                                break;
+                        default:
+                                break;
+                        }
+                }
+
                 switch (inst->op) {
                 case QOP_TEX_S:
                 case QOP_TEX_T:
@@ -324,6 +335,11 @@ calculate_forward_deps(struct vc4_compile *c, void *mem_ctx,
                         memset(&state.tex_fifo[state.tex_fifo_pos], 0,
                                sizeof(state.tex_fifo[0]));
                         break;
+
+                case QOP_UNIFORMS_RESET:
+                        add_write_dep(state.dir, &state.last_uniforms_reset, n);
+                        break;
+
                 default:
                         assert(!qir_is_tex(inst));
                         break;
diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c
index 9001643..6a10e1b 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_emit.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c
@@ -427,6 +427,14 @@ vc4_generate_code_block(struct vc4_compile *c,
                         handled_qinst_cond = true;
                         break;
 
+                case QOP_UNIFORMS_RESET:
+                        fixup_raddr_conflict(block, dst, &src[0], &src[1],
+                                             qinst, &unpack);
+
+                        queue(block, qpu_a_ADD(qpu_ra(QPU_W_UNIFORMS_ADDRESS),
+                                               src[0], src[1]));
+                        break;
+
                 default:
                         assert(qinst->op < ARRAY_SIZE(translate));
                         assert(translate[qinst->op].op != 0); /* NOPs */
diff --git a/src/gallium/drivers/vc4/vc4_qpu_schedule.c b/src/gallium/drivers/vc4/vc4_qpu_schedule.c
index a55b035..1caee51 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_schedule.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_schedule.c
@@ -92,6 +92,7 @@ struct schedule_state {
         struct schedule_node *last_tmu_write;
         struct schedule_node *last_tlb;
         struct schedule_node *last_vpm;
+        struct schedule_node *last_uniforms_reset;
         enum direction dir;
         /* Estimated cycle when the current instruction would start. */
         uint32_t time;
@@ -184,6 +185,9 @@ process_raddr_deps(struct schedule_state *state, struct schedule_node *n,
                 break;
 
         case QPU_R_UNIF:
+                add_read_dep(state, state->last_uniforms_reset, n);
+                break;
+
         case QPU_R_NOP:
         case QPU_R_ELEM_QPU:
         case QPU_R_XY_PIXEL_COORD:
@@ -259,6 +263,7 @@ process_waddr_deps(struct schedule_state *state, struct schedule_node *n,
                 }
         } else if (is_tmu_write(waddr)) {
                 add_write_dep(state, &state->last_tmu_write, n);
+                add_read_dep(state, state->last_uniforms_reset, n);
         } else if (qpu_waddr_is_tlb(waddr) ||
                    waddr == QPU_W_MS_FLAGS) {
                 add_write_dep(state, &state->last_tlb, n);
@@ -305,6 +310,10 @@ process_waddr_deps(struct schedule_state *state, struct schedule_node *n,
                         add_write_dep(state, &state->last_tlb, n);
                         break;
 
+                case QPU_W_UNIFORMS_ADDRESS:
+                        add_write_dep(state, &state->last_uniforms_reset, n);
+                        break;
+
                 case QPU_W_NOP:
                         break;
 
@@ -442,6 +451,7 @@ calculate_reverse_deps(struct vc4_compile *c, struct list_head *schedule_list)
 struct choose_scoreboard {
         int tick;
         int last_sfu_write_tick;
+        int last_uniforms_reset_tick;
         uint32_t last_waddr_a, last_waddr_b;
 };
 
@@ -476,6 +486,11 @@ reads_too_soon_after_write(struct choose_scoreboard *scoreboard, uint64_t inst)
                 }
         }
 
+        if (reads_uniform(inst) &&
+            scoreboard->tick - scoreboard->last_uniforms_reset_tick <= 2) {
+                return true;
+        }
+
         return false;
 }
 
@@ -614,6 +629,11 @@ update_scoreboard_for_chosen(struct choose_scoreboard *scoreboard,
             (waddr_mul >= QPU_W_SFU_RECIP && waddr_mul <= QPU_W_SFU_LOG)) {
                 scoreboard->last_sfu_write_tick = scoreboard->tick;
         }
+
+        if (waddr_add == QPU_W_UNIFORMS_ADDRESS ||
+            waddr_mul == QPU_W_UNIFORMS_ADDRESS) {
+                scoreboard->last_uniforms_reset_tick = scoreboard->tick;
+        }
 }
 
 static void
@@ -971,6 +991,7 @@ qpu_schedule_instructions(struct vc4_compile *c)
         scoreboard.last_waddr_a = ~0;
         scoreboard.last_waddr_b = ~0;
         scoreboard.last_sfu_write_tick = -10;
+        scoreboard.last_uniforms_reset_tick = -10;
 
         if (debug) {
                 fprintf(stderr, "Pre-schedule instructions\n");
diff --git a/src/gallium/drivers/vc4/vc4_uniforms.c b/src/gallium/drivers/vc4/vc4_uniforms.c
index 4715a7f..ee21771 100644
--- a/src/gallium/drivers/vc4/vc4_uniforms.c
+++ b/src/gallium/drivers/vc4/vc4_uniforms.c
@@ -324,6 +324,11 @@ vc4_write_uniforms(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
                 case QUNIFORM_SAMPLE_MASK:
                         cl_aligned_u32(&uniforms, vc4->sample_mask);
                         break;
+
+                case QUNIFORM_UNIFORMS_ADDRESS:
+                        /* This will be filled in by the kernel. */
+                        cl_aligned_u32(&uniforms, 0xd0d0d0d0);
+                        break;
                 }
 #if 0
                 uint32_t written_val = *((uint32_t *)uniforms - 1);
@@ -345,6 +350,7 @@ vc4_set_shader_uniform_dirty_flags(struct vc4_compiled_shader *shader)
         for (int i = 0; i < shader->uniforms.count; i++) {
                 switch (shader->uniforms.contents[i]) {
                 case QUNIFORM_CONSTANT:
+                case QUNIFORM_UNIFORMS_ADDRESS:
                         break;
                 case QUNIFORM_UNIFORM:
                 case QUNIFORM_UBO_ADDR:




More information about the mesa-commit mailing list