[Mesa-dev] [PATCH 02/15] i965: enable component packing for vs and fs

Timothy Arceri timothy.arceri at collabora.com
Tue Jul 19 06:33:14 UTC 2016


Rather than trying to work out the total number of components
used at a location we simply treat all outputs as vec4s.
---
 src/mesa/drivers/dri/i965/brw_fs.h           |  1 -
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp     | 22 ++++++++++------------
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 10 ++--------
 src/mesa/drivers/dri/i965/brw_nir.c          |  8 ++++----
 4 files changed, 16 insertions(+), 25 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 574475f..fc1e1c4 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -317,7 +317,6 @@ public:
    fs_reg frag_stencil;
    fs_reg sample_mask;
    fs_reg outputs[VARYING_SLOT_MAX];
-   unsigned output_components[VARYING_SLOT_MAX];
    fs_reg dual_src_output;
    bool do_dual_src;
    int first_non_payload_grf;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 610c151..395594f 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -67,13 +67,12 @@ fs_visitor::nir_setup_single_output_varying(fs_reg *reg,
       }
    } else {
       assert(type->is_scalar() || type->is_vector());
-      unsigned num_elements = type->vector_elements;
+      unsigned num_iter = 1;
       if (type->is_double())
-         num_elements *= 2;
-      for (unsigned count = 0; count < num_elements; count += 4) {
+         num_iter = 2;
+      for (unsigned count = 0; count < num_iter; count++) {
          this->outputs[*location] = *reg;
-         this->output_components[*location] = MIN2(4, num_elements - count);
-         *reg = offset(*reg, bld, this->output_components[*location]);
+         *reg = offset(*reg, bld, 4);
          (*location)++;
       }
    }
@@ -114,7 +113,6 @@ fs_visitor::nir_setup_outputs()
             /* Writing gl_FragColor outputs to all color regions. */
             for (unsigned int i = 0; i < MAX2(key->nr_color_regions, 1); i++) {
                this->outputs[i] = reg;
-               this->output_components[i] = 4;
             }
          } else if (var->data.location == FRAG_RESULT_DEPTH) {
             this->frag_depth = reg;
@@ -123,8 +121,6 @@ fs_visitor::nir_setup_outputs()
          } else if (var->data.location == FRAG_RESULT_SAMPLE_MASK) {
             this->sample_mask = reg;
          } else {
-            int vector_elements = var->type->without_array()->vector_elements;
-
             /* gl_FragData or a user-defined FS output */
             assert(var->data.location >= FRAG_RESULT_DATA0 &&
                    var->data.location < FRAG_RESULT_DATA0+BRW_MAX_DRAW_BUFFERS);
@@ -132,8 +128,7 @@ fs_visitor::nir_setup_outputs()
             /* General color output. */
             for (unsigned int i = 0; i < MAX2(1, var->type->length); i++) {
                int output = var->data.location - FRAG_RESULT_DATA0 + i;
-               this->outputs[output] = offset(reg, bld, vector_elements * i);
-               this->output_components[output] = vector_elements;
+               this->outputs[output] = offset(reg, bld, 4 * i);
             }
          }
          break;
@@ -3892,6 +3887,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
 
    case nir_intrinsic_load_input: {
       fs_reg src = fs_reg(ATTR, instr->const_index[0], dest.type);
+      unsigned first_component = nir_intrinsic_component(instr);
       unsigned num_components = instr->num_components;
       enum brw_reg_type type = dest.type;
 
@@ -3900,7 +3896,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
       src = offset(src, bld, const_offset->u32[0]);
 
       for (unsigned j = 0; j < num_components; j++) {
-         bld.MOV(offset(dest, bld, j), offset(src, bld, j));
+         bld.MOV(offset(dest, bld, j), offset(src, bld, j + first_component));
       }
 
       if (type == BRW_REGISTER_TYPE_DF) {
@@ -4026,6 +4022,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
       new_dest = offset(new_dest, bld, const_offset->u32[0]);
 
       unsigned num_components = instr->num_components;
+      unsigned first_component = nir_intrinsic_component(instr);
       unsigned bit_size = instr->src[0].is_ssa ?
          instr->src[0].ssa->bit_size : instr->src[0].reg.reg->bit_size;
       if (bit_size == 64) {
@@ -4039,7 +4036,8 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
       }
 
       for (unsigned j = 0; j < num_components; j++) {
-         bld.MOV(offset(new_dest, bld, j), offset(src, bld, j));
+         bld.MOV(offset(new_dest, bld, j + first_component),
+                 offset(src, bld, j));
       }
       break;
    }
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 156a630..6d84374 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -459,8 +459,7 @@ fs_visitor::emit_fb_writes()
             src0_alpha = offset(outputs[0], bld, 3);
 
          inst = emit_single_fb_write(abld, this->outputs[target], reg_undef,
-                                     src0_alpha,
-                                     this->output_components[target]);
+                                     src0_alpha, 4);
          inst->target = target;
       }
    }
@@ -545,9 +544,7 @@ void fs_visitor::compute_clip_distance(gl_clip_plane *clip_planes)
    const fs_builder abld = bld.annotate("user clip distances");
 
    this->outputs[VARYING_SLOT_CLIP_DIST0] = vgrf(glsl_type::vec4_type);
-   this->output_components[VARYING_SLOT_CLIP_DIST0] = 4;
    this->outputs[VARYING_SLOT_CLIP_DIST1] = vgrf(glsl_type::vec4_type);
-   this->output_components[VARYING_SLOT_CLIP_DIST1] = 4;
 
    for (int i = 0; i < key->nr_userclip_plane_consts; i++) {
       fs_reg u = userplane[i];
@@ -724,10 +721,8 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count)
                sources[length++] = reg;
             }
          } else {
-            for (unsigned i = 0; i < output_components[varying]; i++)
+            for (unsigned i = 0; i < 4; i++)
                sources[length++] = offset(this->outputs[varying], bld, i);
-            for (unsigned i = output_components[varying]; i < 4; i++)
-               sources[length++] = brw_imm_d(0);
          }
          break;
       }
@@ -901,7 +896,6 @@ fs_visitor::init()
    this->nir_ssa_values = NULL;
 
    memset(&this->payload, 0, sizeof(this->payload));
-   memset(this->output_components, 0, sizeof(this->output_components));
    this->source_depth_to_render_target = false;
    this->runtime_check_aads_emit = false;
    this->first_non_payload_grf = 0;
diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c
index d1a823a..8f68c39 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.c
+++ b/src/mesa/drivers/dri/i965/brw_nir.c
@@ -302,8 +302,8 @@ brw_nir_lower_vue_outputs(nir_shader *nir,
    if (is_scalar) {
       nir_assign_var_locations(&nir->outputs, &nir->num_outputs,
                                VARYING_SLOT_VAR0,
-                               type_size_scalar);
-      nir_lower_io(nir, nir_var_shader_out, type_size_scalar, false);
+                               type_size_vec4_times_4);
+      nir_lower_io(nir, nir_var_shader_out, type_size_vec4_times_4, false);
    } else {
       nir_foreach_variable(var, &nir->outputs)
          var->data.driver_location = var->data.location;
@@ -340,8 +340,8 @@ void
 brw_nir_lower_fs_outputs(nir_shader *nir)
 {
    nir_assign_var_locations(&nir->outputs, &nir->num_outputs,
-                            FRAG_RESULT_DATA0, type_size_scalar);
-   nir_lower_io(nir, nir_var_shader_out, type_size_scalar, false);
+                            FRAG_RESULT_DATA0, type_size_vec4_times_4);
+   nir_lower_io(nir, nir_var_shader_out, type_size_vec4_times_4, false);
 }
 
 void
-- 
2.7.4



More information about the mesa-dev mailing list