Mesa (master): v3d: Don't rely on sorting input vars for VPM read setup.

Tue Oct 30 17:58:28 UTC 2018

Module: Mesa
Branch: master
Commit: fc85f7cfdc154e6c2f29445b6023b379c3c18864
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=fc85f7cfdc154e6c2f29445b6023b379c3c18864

Author: Eric Anholt <eric at anholt.net>
Date:   Tue Sep 18 11:56:22 2018 -0700

v3d: Don't rely on sorting input vars for VPM read setup.

For supporting scalar VPM i/o at the NIR level, we need to do a pass over
the vars to figure out how big each attribute is after DCE.  Once we've
done that, we can just walk over c->vattr_sizes[] instead of bothering
with vars.

---

 src/broadcom/compiler/nir_to_vir.c | 48 ++++++++++++++++----------------------
 1 file changed, 20 insertions(+), 28 deletions(-)

diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c
index 9bcca9dfe7..4becc972a6 100644
--- a/src/broadcom/compiler/nir_to_vir.c
+++ b/src/broadcom/compiler/nir_to_vir.c
@@ -1276,35 +1276,36 @@ ntq_emit_vpm_read(struct v3d_compile *c,
 static void
 ntq_setup_vpm_inputs(struct v3d_compile *c)
 {
-        unsigned num_entries = 0;
-        unsigned num_components = 0;
+        /* Figure out how many components of each vertex attribute the shader
+         * uses.  Each variable should have been split to individual
+         * components and unused ones DCEed.  The vertex fetcher will load
+         * from the start of the attribute to the number of components we
+         * declare we need in c->vattr_sizes[].
+         */
         nir_foreach_variable(var, &c->s->inputs) {
-                num_entries++;
-                num_components += glsl_get_components(var->type);
-        }
-
-        nir_variable *vars[num_entries];
+                /* No VS attribute array support. */
+                assert(MAX2(glsl_get_length(var->type), 1) == 1);
 
-        unsigned i = 0;
-        nir_foreach_variable(var, &c->s->inputs)
-                vars[i++] = var;
+                unsigned loc = var->data.driver_location;
+                int start_component = var->data.location_frac;
+                int num_components = glsl_get_components(var->type);
 
-        /* Sort the variables so that we emit the input setup in
-         * driver_location order.  This is required for VPM reads, whose data
-         * is fetched into the VPM in driver_location (TGSI register index)
-         * order.
-         */
-        qsort(&vars, num_entries, sizeof(*vars), driver_location_compare);
+                c->vattr_sizes[loc] = MAX2(c->vattr_sizes[loc],
+                                           start_component + num_components);
+        }
 
+        unsigned num_components = 0;
         uint32_t vpm_components_queued = 0;
         bool uses_iid = c->s->info.system_values_read &
                 (1ull << SYSTEM_VALUE_INSTANCE_ID);
         bool uses_vid = c->s->info.system_values_read &
                 (1ull << SYSTEM_VALUE_VERTEX_ID);
-
         num_components += uses_iid;
         num_components += uses_vid;
 
+        for (int i = 0; i < ARRAY_SIZE(c->vattr_sizes); i++)
+                num_components += c->vattr_sizes[i];
+
         if (uses_iid) {
                 c->iid = ntq_emit_vpm_read(c, &vpm_components_queued,
                                            &num_components, ~0);
@@ -1315,19 +1316,11 @@ ntq_setup_vpm_inputs(struct v3d_compile *c)
                                            &num_components, ~0);
         }
 
-        for (unsigned i = 0; i < num_entries; i++) {
-                nir_variable *var = vars[i];
-                unsigned array_len = MAX2(glsl_get_length(var->type), 1);
-                unsigned loc = var->data.driver_location;
-
-                assert(array_len == 1);
-                (void)array_len;
+        for (int loc = 0; loc < ARRAY_SIZE(c->vattr_sizes); loc++) {
                 resize_qreg_array(c, &c->inputs, &c->inputs_array_size,
                                   (loc + 1) * 4);
 
-                int var_components = glsl_get_components(var->type);
-
-                for (int i = 0; i < var_components; i++) {
+                for (int i = 0; i < c->vattr_sizes[loc]; i++) {
                         c->inputs[loc * 4 + i] =
                                 ntq_emit_vpm_read(c,
                                                   &vpm_components_queued,
@@ -1335,7 +1328,6 @@ ntq_setup_vpm_inputs(struct v3d_compile *c)
                                                   loc * 4 + i);
 
                 }
-                c->vattr_sizes[loc] = var_components;
         }
 
         if (c->devinfo->ver >= 40) {