[Mesa-dev] [PATCH v2 13/15] SQUASH: i965/fs: Simplify setup_color_payload

Tue May 5 18:28:16 PDT 2015

Previously, setup_color_payload was a a big helper function that did a lot
of gen-specific special casing for setting up the color sources of the
LOAD_PAYLOAD instruction.  Now that lower_load_payload is much more sane,
most of that complexity isn't needed anymore.  Instead, we can do a simple
fixup pass for color clamps and then just stash sources directly in the
LOAD_PAYLOAD.  We can trust lower_load_payload to do the right thing with
respect to COMPR4.
---
 src/mesa/drivers/dri/i965/brw_fs.h           |   4 +-
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 155 +++++++--------------------
 2 files changed, 43 insertions(+), 116 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 30cefe4..1d7de2e 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -394,8 +394,8 @@ public:
    bool optimize_frontfacing_ternary(nir_alu_instr *instr,
                                      const fs_reg &result);
 
-   int setup_color_payload(fs_reg *dst, fs_reg color, unsigned components,
-                           bool use_2nd_half);
+   void setup_color_payload(fs_reg *dst, fs_reg color, unsigned components,
+                            unsigned exec_size, bool use_2nd_half);
    void emit_alpha_test();
    fs_inst *emit_single_fb_write(fs_reg color1, fs_reg color2,
                                  fs_reg src0_alpha, unsigned components,
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 772285e..80ca1b7 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -3558,105 +3558,30 @@ fs_visitor::emit_interpolation_setup_gen6()
    this->current_annotation = NULL;
 }
 
-int
+void
 fs_visitor::setup_color_payload(fs_reg *dst, fs_reg color, unsigned components,
-                                bool use_2nd_half)
+                                unsigned exec_size, bool use_2nd_half)
 {
    brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
    fs_inst *inst;
 
-   if (color.file == BAD_FILE) {
-      return 4 * (dispatch_width / 8);
-   }
-
-   uint8_t colors_enabled;
-   if (components == 0) {
-      /* We want to write one component to the alpha channel */
-      colors_enabled = 0x8;
-   } else {
-      /* Enable the first components-many channels */
-      colors_enabled = (1 << components) - 1;
+   if (key->clamp_fragment_color) {
+      fs_reg tmp = vgrf(glsl_type::vec4_type);
+      assert(color.type == BRW_REGISTER_TYPE_F);
+      for (unsigned i = 0; i < components; i++) {
+         inst = emit(MOV(offset(tmp, i), offset(color, i)));
+         inst->saturate = true;
+      }
+      color = tmp;
    }
 
-   if (dispatch_width == 8 || (devinfo->gen >= 6 && !do_dual_src)) {
-      /* SIMD8 write looks like:
-       * m + 0: r0
-       * m + 1: r1
-       * m + 2: g0
-       * m + 3: g1
-       *
-       * gen6 SIMD16 DP write looks like:
-       * m + 0: r0
-       * m + 1: r1
-       * m + 2: g0
-       * m + 3: g1
-       * m + 4: b0
-       * m + 5: b1
-       * m + 6: a0
-       * m + 7: a1
-       */
-      int len = 0;
-      for (unsigned i = 0; i < 4; ++i) {
-         if (colors_enabled & (1 << i)) {
-            dst[len] = fs_reg(GRF, alloc.allocate(color.width / 8),
-                              color.type, color.width);
-            inst = emit(MOV(dst[len], offset(color, i)));
-            inst->saturate = key->clamp_fragment_color;
-         }
-         len++;
-      }
-      return len;
-   } else if (devinfo->gen >= 6 && do_dual_src) {
-      /* SIMD16 dual source blending for gen6+.
-       *
-       * From the SNB PRM, volume 4, part 1, page 193:
-       *
-       * "The dual source render target messages only have SIMD8 forms due to
-       *  maximum message length limitations. SIMD16 pixel shaders must send two
-       *  of these messages to cover all of the pixels. Each message contains
-       *  two colors (4 channels each) for each pixel in the message payload."
-       *
-       * So in SIMD16 dual source blending we will send 2 SIMD8 messages,
-       * each one will call this function twice (one for each color involved),
-       * so in each pass we only write 4 registers. Notice that the second
-       * SIMD8 message needs to read color data from the 2nd half of the color
-       * registers, so it needs to call this with use_2nd_half = true.
-       */
-      for (unsigned i = 0; i < 4; ++i) {
-         if (colors_enabled & (1 << i)) {
-            dst[i] = fs_reg(GRF, alloc.allocate(1), color.type);
-            inst = emit(MOV(dst[i], half(offset(color, i),
-                                         use_2nd_half ? 1 : 0)));
-            inst->saturate = key->clamp_fragment_color;
-            if (use_2nd_half)
-               inst->force_sechalf = true;
-         }
-      }
-      return 4;
+   if (exec_size < dispatch_width) {
+      unsigned half_idx = use_2nd_half ? 1 : 0;
+      for (unsigned i = 0; i < components; i++)
+         dst[i] = half(offset(color, i), half_idx);
    } else {
-      /* pre-gen6 SIMD16 single source DP write looks like:
-       * m + 0: r0
-       * m + 1: g0
-       * m + 2: b0
-       * m + 3: a0
-       * m + 4: r1
-       * m + 5: g1
-       * m + 6: b1
-       * m + 7: a1
-       */
-      for (unsigned i = 0; i < 4; ++i) {
-         if (colors_enabled & (1 << i)) {
-            dst[i] = fs_reg(GRF, alloc.allocate(1), color.type);
-            inst = emit(MOV(dst[i], half(offset(color, i), 0)));
-            inst->saturate = key->clamp_fragment_color;
-
-            dst[i + 4] = fs_reg(GRF, alloc.allocate(1), color.type);
-            inst = emit(MOV(dst[i + 4], half(offset(color, i), 1)));
-            inst->saturate = key->clamp_fragment_color;
-            inst->force_sechalf = true;
-         }
-      }
-      return 8;
+      for (unsigned i = 0; i < components; i++)
+         dst[i] = offset(color, i);
    }
 }
 
@@ -3725,7 +3650,6 @@ fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1,
 
    this->current_annotation = "FB write header";
    int header_size = 2, payload_header_size;
-   int reg_size = exec_size / 8;
 
    /* We can potentially have a message length of up to 15, so we have to set
     * base_mrf to either 0 or 1 in order to fit in m0..m15.
@@ -3781,24 +3705,26 @@ fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1,
        * alpha out the pipeline to our null renderbuffer to support
        * alpha-testing, alpha-to-coverage, and so on.
        */
-      length += setup_color_payload(sources + length, this->outputs[0], 0,
-                                    false);
+      if (this->outputs[0].file != BAD_FILE)
+         setup_color_payload(&sources[length + 3], offset(this->outputs[0], 3),
+                             1, exec_size, false);
+      length += 4;
    } else if (color1.file == BAD_FILE) {
       if (src0_alpha.file != BAD_FILE) {
-         sources[length] = fs_reg(GRF, alloc.allocate(reg_size),
-                                  src0_alpha.type, src0_alpha.width);
-         fs_inst *inst = emit(MOV(sources[length], src0_alpha));
-         inst->saturate = key->clamp_fragment_color;
+         setup_color_payload(&sources[length], src0_alpha, 1, exec_size, false);
          length++;
       }
 
-      length += setup_color_payload(sources + length, color0, components,
-                                    false);
+      setup_color_payload(&sources[length], color0, components,
+                          exec_size, use_2nd_half);
+      length += 4;
    } else {
-      length += setup_color_payload(sources + length, color0, components,
-                                    use_2nd_half);
-      length += setup_color_payload(sources + length, color1, components,
-                                    use_2nd_half);
+      setup_color_payload(&sources[length], color0, components,
+                          exec_size, use_2nd_half);
+      length += 4;
+      setup_color_payload(&sources[length], color1, components,
+                          exec_size, use_2nd_half);
+      length += 4;
    }
 
    if (source_depth_to_render_target) {
@@ -3811,25 +3737,19 @@ fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1,
 	 no16("Missing support for simd16 depth writes on gen6\n");
       }
 
-      sources[length] = vgrf(glsl_type::float_type);
       if (prog->OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
 	 /* Hand over gl_FragDepth. */
 	 assert(this->frag_depth.file != BAD_FILE);
-	 emit(MOV(sources[length], this->frag_depth));
+         sources[length] = this->frag_depth;
       } else {
 	 /* Pass through the payload depth. */
-	 emit(MOV(sources[length],
-                  fs_reg(brw_vec8_grf(payload.source_depth_reg, 0))));
+         sources[length] = fs_reg(brw_vec8_grf(payload.source_depth_reg, 0));
       }
       length++;
    }
 
-   if (payload.dest_depth_reg) {
-      sources[length] = vgrf(glsl_type::float_type);
-      emit(MOV(sources[length],
-               fs_reg(brw_vec8_grf(payload.dest_depth_reg, 0))));
-      length++;
-   }
+   if (payload.dest_depth_reg)
+      sources[length++] = fs_reg(brw_vec8_grf(payload.dest_depth_reg, 0));
 
    fs_inst *load;
    fs_inst *write;
@@ -3845,6 +3765,13 @@ fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1,
       /* Send from the MRF */
       load = emit(LOAD_PAYLOAD(fs_reg(MRF, 1, BRW_REGISTER_TYPE_F, exec_size),
                                sources, length, payload_header_size));
+
+      /* On pre-SNB, we have to interlace the color values.  LOAD_PAYLOAD
+       * will do this for us if we just give it a COMPR4 destination.
+       */
+      if (brw->gen < 6 && exec_size == 16)
+         load->dst.reg |= BRW_MRF_COMPR4;
+
       write = emit(FS_OPCODE_FB_WRITE);
       write->exec_size = exec_size;
       write->base_mrf = 1;
-- 
2.3.6