<div dir="ltr"><div><div><div>This looks fine to me.  I just kicked off a build on our test farm and, assuming that looks good (I'll send another e-mail in the morning if it does),<br><br></div>Reviewed-by: Jason Ekstrand <<a href="mailto:jason.ekstrand@intel.com">jason.ekstrand@intel.com</a>><br><br></div><div>I ran shader-db on the change and I was kind of surprised to see that it doesn't really do anything.<br><br>GAINED: shaders/dolphin/smg.1.shader_test FS SIMD16<br><br>total instructions in shared programs: 5769629 -> 5769629 (0.00%)<br>instructions in affected programs:     0 -> 0<br>helped:                                0<br>HURT:                                  0<br>GAINED:                                1<br>LOST:                                  0<br><br></div><div>Perhaps shader-db doesn't account for some other GL state required for dual-source because I doubt only one shader uses it.  Ken?<br></div><div><br></div></div>--Jason<br></div><div class="gmail_extra"><br><div class="gmail_quote">On Thu, Mar 5, 2015 at 3:21 AM, Iago Toral Quiroga <span dir="ltr"><<a href="mailto:itoral@igalia.com" target="_blank">itoral@igalia.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">From the SNB PRM, volume 4, part 1, page 193:<br>
<br>
"The dual source render target messages only have SIMD8 forms due to<br>
 maximum message length limitations. SIMD16 pixel shaders must send two of<br>
 these messages to cover all of the pixels. Each message contains two colors<br>
 (4 channels each) for each pixel in the message payload."<br>
<br>
Bugzilla: <a href="https://bugs.freedesktop.org/show_bug.cgi?id=82831" target="_blank">https://bugs.freedesktop.org/show_bug.cgi?id=82831</a><br>
---<br>
I sent this patch for review some months ago, but it was bad timing because<br>
it was when Jason was doing a large rewrite of the visitor code handling<br>
FB writes, so the patch became immediately obsolete. This is the up-to-date<br>
version.<br>
<br>
If anyone wants to test this, I sent this patch to piglit with a test that<br>
can be used to check for correct SIMD16 implementation specifically:<br>
<a href="http://lists.freedesktop.org/archives/piglit/2015-March/015015.html" target="_blank">http://lists.freedesktop.org/archives/piglit/2015-March/015015.html</a><br>
<br>
 src/mesa/drivers/dri/i965/brw_eu.h             |  1 +<br>
 src/mesa/drivers/dri/i965/brw_eu_emit.c        |  3 +-<br>
 src/mesa/drivers/dri/i965/brw_fs.h             |  6 +-<br>
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 15 ++++-<br>
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp   | 77 +++++++++++++++++++++-----<br>
 5 files changed, 83 insertions(+), 19 deletions(-)<br>
<br>
diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h<br>
index 736c54b..d9ad5bd 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_eu.h<br>
+++ b/src/mesa/drivers/dri/i965/brw_eu.h<br>
@@ -266,6 +266,7 @@ void brw_fb_WRITE(struct brw_compile *p,<br>
                   unsigned msg_length,<br>
                   unsigned response_length,<br>
                   bool eot,<br>
+                  bool last_render_target,<br>
                   bool header_present);<br>
<br>
 void brw_SAMPLE(struct brw_compile *p,<br>
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c<br>
index 1d6fd67..74cf138 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c<br>
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c<br>
@@ -2292,6 +2292,7 @@ void brw_fb_WRITE(struct brw_compile *p,<br>
                   unsigned msg_length,<br>
                   unsigned response_length,<br>
                   bool eot,<br>
+                  bool last_render_target,<br>
                   bool header_present)<br>
 {<br>
    struct brw_context *brw = p->brw;<br>
@@ -2333,7 +2334,7 @@ void brw_fb_WRITE(struct brw_compile *p,<br>
                            msg_type,<br>
                            msg_length,<br>
                            header_present,<br>
-                           eot, /* last render target write */<br>
+                           last_render_target,<br>
                            response_length,<br>
                            eot,<br>
                            0 /* send_commit_msg */);<br>
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h<br>
index 70098d8..5a4f66c 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_fs.h<br>
+++ b/src/mesa/drivers/dri/i965/brw_fs.h<br>
@@ -369,10 +369,12 @@ public:<br>
    bool optimize_frontfacing_ternary(nir_alu_instr *instr,<br>
                                      const fs_reg &result);<br>
<br>
-   int setup_color_payload(fs_reg *dst, fs_reg color, unsigned components);<br>
+   int setup_color_payload(fs_reg *dst, fs_reg color, unsigned components,<br>
+                           bool use_2nd_half);<br>
    void emit_alpha_test();<br>
    fs_inst *emit_single_fb_write(fs_reg color1, fs_reg color2,<br>
-                                 fs_reg src0_alpha, unsigned components);<br>
+                                 fs_reg src0_alpha, unsigned components,<br>
+                                 bool use_2nd_half = false);<br>
    void emit_fb_writes();<br>
    void emit_urb_writes();<br>
<br>
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp<br>
index cbe6191..db70df5 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp<br>
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp<br>
@@ -217,9 +217,12 @@ fs_generator::fire_fb_write(fs_inst *inst,<br>
<br>
    if (inst->opcode == FS_OPCODE_REP_FB_WRITE)<br>
       msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED;<br>
-   else if (prog_data->dual_src_blend)<br>
-      msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01;<br>
-   else if (dispatch_width == 16)<br>
+   else if (prog_data->dual_src_blend) {<br>
+      if (dispatch_width == 8 || !inst->eot)<br>
+         msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01;<br>
+      else<br>
+         msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23;<br>
+   } else if (dispatch_width == 16)<br>
       msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;<br>
    else<br>
       msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01;<br>
@@ -227,6 +230,10 @@ fs_generator::fire_fb_write(fs_inst *inst,<br>
    uint32_t surf_index =<br>
       prog_data->binding_table.render_target_start + inst->target;<br>
<br>
+   bool last_render_target = inst->eot ||<br>
+                             (prog_data->dual_src_blend && dispatch_width == 16);<br>
+<br>
+<br>
    brw_fb_WRITE(p,<br>
                 dispatch_width,<br>
                 payload,<br>
@@ -236,6 +243,7 @@ fs_generator::fire_fb_write(fs_inst *inst,<br>
                 nr,<br>
                 0,<br>
                 inst->eot,<br>
+                last_render_target,<br>
                 inst->header_present);<br>
<br>
    brw_mark_surface_used(&prog_data->base, surf_index);<br>
@@ -373,6 +381,7 @@ fs_generator::generate_blorp_fb_write(fs_inst *inst)<br>
                 inst->mlen,<br>
                 0,<br>
                 true,<br>
+                true,<br>
                 inst->header_present);<br>
 }<br>
<br>
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp<br>
index 57c4d66..d3b8345 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp<br>
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp<br>
@@ -3366,7 +3366,8 @@ fs_visitor::emit_interpolation_setup_gen6()<br>
 }<br>
<br>
 int<br>
-fs_visitor::setup_color_payload(fs_reg *dst, fs_reg color, unsigned components)<br>
+fs_visitor::setup_color_payload(fs_reg *dst, fs_reg color, unsigned components,<br>
+                                bool use_2nd_half)<br>
 {<br>
    brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;<br>
    fs_inst *inst;<br>
@@ -3384,7 +3385,7 @@ fs_visitor::setup_color_payload(fs_reg *dst, fs_reg color, unsigned components)<br>
       colors_enabled = (1 << components) - 1;<br>
    }<br>
<br>
-   if (dispatch_width == 8 || brw->gen >= 6) {<br>
+   if (dispatch_width == 8 || (brw->gen >= 6 && !do_dual_src)) {<br>
       /* SIMD8 write looks like:<br>
        * m + 0: r0<br>
        * m + 1: r1<br>
@@ -3415,6 +3416,33 @@ fs_visitor::setup_color_payload(fs_reg *dst, fs_reg color, unsigned components)<br>
          len++;<br>
       }<br>
       return len;<br>
+   } else if (brw->gen >= 6 && do_dual_src) {<br>
+      /* SIMD16 dual source blending for gen6+.<br>
+       *<br>
+       * From the SNB PRM, volume 4, part 1, page 193:<br>
+       *<br>
+       * "The dual source render target messages only have SIMD8 forms due to<br>
+       *  maximum message length limitations. SIMD16 pixel shaders must send two<br>
+       *  of these messages to cover all of the pixels. Each message contains<br>
+       *  two colors (4 channels each) for each pixel in the message payload."<br>
+       *<br>
+       * So in SIMD16 dual source blending we will send 2 SIMD8 messages,<br>
+       * each one will call this function twice (one for each color involved),<br>
+       * so in each pass we only write 4 registers. Notice that the second<br>
+       * SIMD8 message needs to read color data from the 2nd half of the color<br>
+       * registers, so it needs to call this with use_2nd_half = true.<br>
+       */<br>
+      for (unsigned i = 0; i < 4; ++i) {<br>
+         if (colors_enabled & (1 << i)) {<br>
+            dst[i] = fs_reg(GRF, alloc.allocate(1), color.type);<br>
+            inst = emit(MOV(dst[i], half(offset(color, i),<br>
+                                         use_2nd_half ? 1 : 0)));<br>
+            inst->saturate = key->clamp_fragment_color;<br>
+            if (use_2nd_half)<br>
+               inst->force_sechalf = true;<br>
+         }<br>
+      }<br>
+      return 4;<br>
    } else {<br>
       /* pre-gen6 SIMD16 single source DP write looks like:<br>
        * m + 0: r0<br>
@@ -3498,7 +3526,8 @@ fs_visitor::emit_alpha_test()<br>
<br>
 fs_inst *<br>
 fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1,<br>
-                                 fs_reg src0_alpha, unsigned components)<br>
+                                 fs_reg src0_alpha, unsigned components,<br>
+                                 bool use_2nd_half)<br>
 {<br>
    assert(stage == MESA_SHADER_FRAGMENT);<br>
    brw_wm_prog_data *prog_data = (brw_wm_prog_data*) this->prog_data;<br>
@@ -3558,7 +3587,8 @@ fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1,<br>
        * alpha out the pipeline to our null renderbuffer to support<br>
        * alpha-testing, alpha-to-coverage, and so on.<br>
        */<br>
-      length += setup_color_payload(sources + length, this->outputs[0], 0);<br>
+      length += setup_color_payload(sources + length, this->outputs[0], 0,<br>
+                                    false);<br>
    } else if (color1.file == BAD_FILE) {<br>
       if (src0_alpha.file != BAD_FILE) {<br>
          sources[length] = fs_reg(GRF, alloc.allocate(reg_size),<br>
@@ -3568,10 +3598,13 @@ fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1,<br>
          length++;<br>
       }<br>
<br>
-      length += setup_color_payload(sources + length, color0, components);<br>
+      length += setup_color_payload(sources + length, color0, components,<br>
+                                    false);<br>
    } else {<br>
-      length += setup_color_payload(sources + length, color0, components);<br>
-      length += setup_color_payload(sources + length, color1, components);<br>
+      length += setup_color_payload(sources + length, color0, components,<br>
+                                    use_2nd_half);<br>
+      length += setup_color_payload(sources + length, color1, components,<br>
+                                    use_2nd_half);<br>
    }<br>
<br>
    if (source_depth_to_render_target) {<br>
@@ -3640,12 +3673,6 @@ fs_visitor::emit_fb_writes()<br>
    brw_wm_prog_data *prog_data = (brw_wm_prog_data*) this->prog_data;<br>
    brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;<br>
<br>
-   if (do_dual_src) {<br>
-      no16("GL_ARB_blend_func_extended not yet supported in SIMD16.");<br>
-      if (dispatch_width == 16)<br>
-         do_dual_src = false;<br>
-   }<br>
-<br>
    fs_inst *inst;<br>
    if (do_dual_src) {<br>
       if (INTEL_DEBUG & DEBUG_SHADER_TIME)<br>
@@ -3656,6 +3683,30 @@ fs_visitor::emit_fb_writes()<br>
       inst = emit_single_fb_write(this->outputs[0], this->dual_src_output,<br>
                                   reg_undef, 4);<br>
       inst->target = 0;<br>
+<br>
+      /* SIMD16 dual source blending requires to send two SIMD8 dual source<br>
+       * messages, where each message contains color data for 8 pixels. Color<br>
+       * data for the first group of pixels is stored in the "lower" half of<br>
+       * the color registers, so in SIMD16, the previous message did:<br>
+       * m + 0: r0<br>
+       * m + 1: g0<br>
+       * m + 2: b0<br>
+       * m + 3: a0<br>
+       *<br>
+       * Here goes the second message, which packs color data for the<br>
+       * remaining 8 pixels. Color data for these pixels is stored in the<br>
+       * "upper" half of the color registers, so we need to do:<br>
+       * m + 0: r1<br>
+       * m + 1: g1<br>
+       * m + 2: b1<br>
+       * m + 3: a1<br>
+       */<br>
+      if (dispatch_width == 16) {<br>
+         inst = emit_single_fb_write(this->outputs[0], this->dual_src_output,<br>
+                                     reg_undef, 4, true);<br>
+         inst->target = 0;<br>
+      }<br>
+<br>
       prog_data->dual_src_blend = true;<br>
    } else if (key->nr_color_regions > 0) {<br>
       for (int target = 0; target < key->nr_color_regions; target++) {<br>
<span class="HOEnZb"><font color="#888888">--<br>
1.9.1<br>
<br>
_______________________________________________<br>
mesa-dev mailing list<br>
<a href="mailto:mesa-dev@lists.freedesktop.org">mesa-dev@lists.freedesktop.org</a><br>
<a href="http://lists.freedesktop.org/mailman/listinfo/mesa-dev" target="_blank">http://lists.freedesktop.org/mailman/listinfo/mesa-dev</a><br>
</font></span></blockquote></div><br></div>