Mesa (master): i965: enable component packing for vs and fs

Timothy Arceri tarceri at kemper.freedesktop.org
Wed Jul 20 23:11:09 UTC 2016


Module: Mesa
Branch: master
Commit: 7f53fead5cf9a85c74a94d359dd5fccfbb87856c
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=7f53fead5cf9a85c74a94d359dd5fccfbb87856c

Author: Timothy Arceri <timothy.arceri at collabora.com>
Date:   Mon May 23 16:48:05 2016 +1000

i965: enable component packing for vs and fs

Rather than trying to work out the total number of components
used at a location we simply treat all outputs as vec4s. This
removes the need for complex code looping over varyings to match
packed locations and the need for storing the total number of
components used at each location.

Reviewed-by: Alejandro Piñeiro <apinheiro at igalia.com>
Reviewed-by: Kenneth Graunke <kenneth at whitecape.org>

---

 src/mesa/drivers/dri/i965/brw_fs.h           |  1 -
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp     | 22 ++++++++++------------
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 10 ++--------
 src/mesa/drivers/dri/i965/brw_nir.c          |  8 ++++----
 4 files changed, 16 insertions(+), 25 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 574475f..fc1e1c4 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -317,7 +317,6 @@ public:
    fs_reg frag_stencil;
    fs_reg sample_mask;
    fs_reg outputs[VARYING_SLOT_MAX];
-   unsigned output_components[VARYING_SLOT_MAX];
    fs_reg dual_src_output;
    bool do_dual_src;
    int first_non_payload_grf;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 4d4c94a..65bca6d 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -67,13 +67,12 @@ fs_visitor::nir_setup_single_output_varying(fs_reg *reg,
       }
    } else {
       assert(type->is_scalar() || type->is_vector());
-      unsigned num_elements = type->vector_elements;
+      unsigned num_iter = 1;
       if (type->is_double())
-         num_elements *= 2;
-      for (unsigned count = 0; count < num_elements; count += 4) {
+         num_iter = 2;
+      for (unsigned count = 0; count < num_iter; count++) {
          this->outputs[*location] = *reg;
-         this->output_components[*location] = MIN2(4, num_elements - count);
-         *reg = offset(*reg, bld, this->output_components[*location]);
+         *reg = offset(*reg, bld, 4);
          (*location)++;
       }
    }
@@ -114,7 +113,6 @@ fs_visitor::nir_setup_outputs()
             /* Writing gl_FragColor outputs to all color regions. */
             for (unsigned int i = 0; i < MAX2(key->nr_color_regions, 1); i++) {
                this->outputs[i] = reg;
-               this->output_components[i] = 4;
             }
          } else if (var->data.location == FRAG_RESULT_DEPTH) {
             this->frag_depth = reg;
@@ -123,8 +121,6 @@ fs_visitor::nir_setup_outputs()
          } else if (var->data.location == FRAG_RESULT_SAMPLE_MASK) {
             this->sample_mask = reg;
          } else {
-            int vector_elements = var->type->without_array()->vector_elements;
-
             /* gl_FragData or a user-defined FS output */
             assert(var->data.location >= FRAG_RESULT_DATA0 &&
                    var->data.location < FRAG_RESULT_DATA0+BRW_MAX_DRAW_BUFFERS);
@@ -132,8 +128,7 @@ fs_visitor::nir_setup_outputs()
             /* General color output. */
             for (unsigned int i = 0; i < MAX2(1, var->type->length); i++) {
                int output = var->data.location - FRAG_RESULT_DATA0 + i;
-               this->outputs[output] = offset(reg, bld, vector_elements * i);
-               this->output_components[output] = vector_elements;
+               this->outputs[output] = offset(reg, bld, 4 * i);
             }
          }
          break;
@@ -2360,6 +2355,7 @@ fs_visitor::nir_emit_vs_intrinsic(const fs_builder &bld,
 
    case nir_intrinsic_load_input: {
       fs_reg src = fs_reg(ATTR, instr->const_index[0], dest.type);
+      unsigned first_component = nir_intrinsic_component(instr);
       unsigned num_components = instr->num_components;
       enum brw_reg_type type = dest.type;
 
@@ -2368,7 +2364,7 @@ fs_visitor::nir_emit_vs_intrinsic(const fs_builder &bld,
       src = offset(src, bld, const_offset->u32[0]);
 
       for (unsigned j = 0; j < num_components; j++) {
-         bld.MOV(offset(dest, bld, j), offset(src, bld, j));
+         bld.MOV(offset(dest, bld, j), offset(src, bld, j + first_component));
       }
 
       if (type == BRW_REGISTER_TYPE_DF) {
@@ -4103,6 +4099,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
       new_dest = offset(new_dest, bld, const_offset->u32[0]);
 
       unsigned num_components = instr->num_components;
+      unsigned first_component = nir_intrinsic_component(instr);
       unsigned bit_size = instr->src[0].is_ssa ?
          instr->src[0].ssa->bit_size : instr->src[0].reg.reg->bit_size;
       if (bit_size == 64) {
@@ -4116,7 +4113,8 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
       }
 
       for (unsigned j = 0; j < num_components; j++) {
-         bld.MOV(offset(new_dest, bld, j), offset(src, bld, j));
+         bld.MOV(offset(new_dest, bld, j + first_component),
+                 offset(src, bld, j));
       }
       break;
    }
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 156a630..6d84374 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -459,8 +459,7 @@ fs_visitor::emit_fb_writes()
             src0_alpha = offset(outputs[0], bld, 3);
 
          inst = emit_single_fb_write(abld, this->outputs[target], reg_undef,
-                                     src0_alpha,
-                                     this->output_components[target]);
+                                     src0_alpha, 4);
          inst->target = target;
       }
    }
@@ -545,9 +544,7 @@ void fs_visitor::compute_clip_distance(gl_clip_plane *clip_planes)
    const fs_builder abld = bld.annotate("user clip distances");
 
    this->outputs[VARYING_SLOT_CLIP_DIST0] = vgrf(glsl_type::vec4_type);
-   this->output_components[VARYING_SLOT_CLIP_DIST0] = 4;
    this->outputs[VARYING_SLOT_CLIP_DIST1] = vgrf(glsl_type::vec4_type);
-   this->output_components[VARYING_SLOT_CLIP_DIST1] = 4;
 
    for (int i = 0; i < key->nr_userclip_plane_consts; i++) {
       fs_reg u = userplane[i];
@@ -724,10 +721,8 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count)
                sources[length++] = reg;
             }
          } else {
-            for (unsigned i = 0; i < output_components[varying]; i++)
+            for (unsigned i = 0; i < 4; i++)
                sources[length++] = offset(this->outputs[varying], bld, i);
-            for (unsigned i = output_components[varying]; i < 4; i++)
-               sources[length++] = brw_imm_d(0);
          }
          break;
       }
@@ -901,7 +896,6 @@ fs_visitor::init()
    this->nir_ssa_values = NULL;
 
    memset(&this->payload, 0, sizeof(this->payload));
-   memset(this->output_components, 0, sizeof(this->output_components));
    this->source_depth_to_render_target = false;
    this->runtime_check_aads_emit = false;
    this->first_non_payload_grf = 0;
diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c
index fe76531..388ae9e 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.c
+++ b/src/mesa/drivers/dri/i965/brw_nir.c
@@ -302,8 +302,8 @@ brw_nir_lower_vue_outputs(nir_shader *nir,
    if (is_scalar) {
       nir_assign_var_locations(&nir->outputs, &nir->num_outputs,
                                VARYING_SLOT_VAR0,
-                               type_size_scalar);
-      nir_lower_io(nir, nir_var_shader_out, type_size_scalar);
+                               type_size_vec4_times_4);
+      nir_lower_io(nir, nir_var_shader_out, type_size_vec4_times_4);
    } else {
       nir_foreach_variable(var, &nir->outputs)
          var->data.driver_location = var->data.location;
@@ -340,8 +340,8 @@ void
 brw_nir_lower_fs_outputs(nir_shader *nir)
 {
    nir_assign_var_locations(&nir->outputs, &nir->num_outputs,
-                            FRAG_RESULT_DATA0, type_size_scalar);
-   nir_lower_io(nir, nir_var_shader_out, type_size_scalar);
+                            FRAG_RESULT_DATA0, type_size_vec4_times_4);
+   nir_lower_io(nir, nir_var_shader_out, type_size_vec4_times_4);
 }
 
 void




More information about the mesa-commit mailing list