[Mesa-dev] [PATCH v2] i965/fs: Implement SIMD16 dual source blending.

Iago Toral Quiroga itoral at igalia.com
Thu Sep 18 00:53:53 PDT 2014


>From the SNB PRM, volume 4, part 1, page 193:
"The dual source render target messages only have SIMD8 forms due to
maximum message length limitations. SIMD16 pixel shaders must send two of
these messages to cover all of the pixels. Each message contains two colors
(4 channels each) for each pixel in the message payload."

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=82831
---
 src/mesa/drivers/dri/i965/brw_eu.h             |  1 +
 src/mesa/drivers/dri/i965/brw_eu_emit.c        |  3 +-
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 14 ++++-
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp   | 79 ++++++++++++++++++++++----
 4 files changed, 83 insertions(+), 14 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h
index e6c26e3..5908ba5 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -266,6 +266,7 @@ void brw_fb_WRITE(struct brw_compile *p,
 		   unsigned msg_length,
 		   unsigned response_length,
 		   bool eot,
+		   bool last_render_target,
 		   bool header_present);
 
 void brw_SAMPLE(struct brw_compile *p,
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index 39f94e9..ffdbe6d 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -2251,6 +2251,7 @@ void brw_fb_WRITE(struct brw_compile *p,
                   unsigned msg_length,
                   unsigned response_length,
                   bool eot,
+                  bool last_render_target,
                   bool header_present)
 {
    struct brw_context *brw = p->brw;
@@ -2290,7 +2291,7 @@ void brw_fb_WRITE(struct brw_compile *p,
 			    msg_type,
 			    msg_length,
 			    header_present,
-			    eot, /* last render target write */
+			    last_render_target,
 			    response_length,
 			    eot,
 			    0 /* send_commit_msg */);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index 1bc10f5..a4b84aa 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -121,9 +121,12 @@ fs_generator::fire_fb_write(fs_inst *inst,
 
    if (inst->opcode == FS_OPCODE_REP_FB_WRITE)
       msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED;
-   else if (prog_data->dual_src_blend)
-      msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01;
-   else if (dispatch_width == 16)
+   else if (prog_data->dual_src_blend) {
+      if (dispatch_width == 8 || !inst->eot)
+         msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01;
+      else
+         msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23;
+   } else if (dispatch_width == 16)
       msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
    else
       msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01;
@@ -131,6 +134,9 @@ fs_generator::fire_fb_write(fs_inst *inst,
    uint32_t surf_index =
       prog_data->binding_table.render_target_start + inst->target;
 
+   bool last_render_target = inst->eot ||
+                             (prog_data->dual_src_blend && dispatch_width == 16);
+
    brw_fb_WRITE(p,
                 dispatch_width,
                 base_reg,
@@ -140,6 +146,7 @@ fs_generator::fire_fb_write(fs_inst *inst,
                 nr,
                 0,
                 inst->eot,
+                last_render_target,
                 inst->header_present);
 
    brw_mark_surface_used(&prog_data->base, surf_index);
@@ -254,6 +261,7 @@ fs_generator::generate_blorp_fb_write(fs_inst *inst)
                 inst->mlen,
                 0,
                 true,
+                true,
                 inst->header_present);
 }
 
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 2d5318a..99d04c0 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -3064,12 +3064,6 @@ fs_visitor::emit_fb_writes()
    int reg_width = dispatch_width / 8;
    bool src0_alpha_to_render_target = false;
 
-   if (do_dual_src) {
-      no16("GL_ARB_blend_func_extended not yet supported in SIMD16.");
-      if (dispatch_width == 16)
-         do_dual_src = false;
-   }
-
    /* From the Sandy Bridge PRM, volume 4, page 198:
     *
     *     "Dispatched Pixel Enables. One bit per pixel indicating
@@ -3109,11 +3103,22 @@ fs_visitor::emit_fb_writes()
       nr += 1;
    }
 
-   /* Reserve space for color. It'll be filled in per MRT below. */
+   /* Reserve space for color. It'll be filled in per MRT below.
+    *
+    * From the SNB PRM, volume 4, part 1, page 193:
+    * "The dual source render target messages only have SIMD8 forms due to
+    * maximum message length limitations. SIMD16 pixel shaders must send two of
+    * these messages to cover all of the pixels. Each message contains two
+    * colors (4 channels each) for each pixel in the message payload."
+    *
+    * So color data in dual source mode, whether this is a SIMD8 or SIMD16
+    * program, always requires 8 MRF registers.
+    */
    int color_mrf = nr;
-   nr += 4 * reg_width;
    if (do_dual_src)
-      nr += 4;
+      nr += 8;
+   else
+      nr += 4 * reg_width;
    if (src0_alpha_to_render_target)
       nr += reg_width;
 
@@ -3148,6 +3153,8 @@ fs_visitor::emit_fb_writes()
    if (do_dual_src) {
       fs_reg src0 = this->outputs[0];
       fs_reg src1 = this->dual_src_output;
+      int src0_offset = src0.reg_offset;
+      int src1_offset = src1.reg_offset;
 
       this->current_annotation = ralloc_asprintf(this->mem_ctx,
 						 "FB write src0");
@@ -3173,13 +3180,65 @@ fs_visitor::emit_fb_writes()
       inst->target = 0;
       inst->base_mrf = base_mrf;
       inst->mlen = nr - base_mrf;
-      inst->eot = true;
+      inst->eot = dispatch_width == 8;
       inst->header_present = header_present;
       if ((brw->gen >= 8 || brw->is_haswell) && prog_data->uses_kill) {
          inst->predicate = BRW_PREDICATE_NORMAL;
          inst->flag_subreg = 1;
       }
 
+      if (dispatch_width == 16) {
+         /* SIMD16 dual source blending requires to send two SIMD8 dual source
+          * messages, where each message contains color data for 8 pixels. Color
+          * data for the first group of pixels is stored in the "lower" half of
+          * the color registers, so in SIMD16, the previous message did:
+          * m + 0: r0
+          * m + 1: g0
+          * m + 2: b0
+          * m + 3: a0
+          *
+          * Here goes the second message, which packs color data for the
+          * remaining 8 pixels. Color data for these pixels is stored in the
+          * "upper" half of the color registers, so we need to do:
+          * m + 0: r1
+          * m + 1: g1
+          * m + 2: b1
+          * m + 3: a1
+          */
+         this->current_annotation = ralloc_asprintf(this->mem_ctx,
+                                                    "FB write src0 (2)");
+         src0.reg_offset = src0_offset;
+         for (int i = 0; i < 4; i++) {
+            fs_inst *inst = emit(MOV(fs_reg(MRF, color_mrf + i, src0.type),
+                                     half(src0, 1)));
+            inst->force_sechalf = true;
+            src0.reg_offset++;
+            inst->saturate = key->clamp_fragment_color;
+         }
+
+         this->current_annotation = ralloc_asprintf(this->mem_ctx,
+                                                    "FB write src1 (2)");
+         src1.reg_offset = src1_offset;
+         for (int i = 0; i < 4; i++) {
+            fs_inst *inst = emit(MOV(fs_reg(MRF, color_mrf + 4 + i, src1.type),
+                                            half(src1, 1)));
+            inst->force_sechalf = true;
+            src1.reg_offset++;
+            inst->saturate = key->clamp_fragment_color;
+         }
+
+         fs_inst *inst = emit(FS_OPCODE_FB_WRITE);
+         inst->target = 0;
+         inst->base_mrf = base_mrf;
+         inst->mlen = nr - base_mrf;
+         inst->eot = true;
+         inst->header_present = header_present;
+         if ((brw->gen >= 8 || brw->is_haswell) && prog_data->uses_kill) {
+            inst->predicate = BRW_PREDICATE_NORMAL;
+            inst->flag_subreg = 1;
+         }
+      }
+
       prog_data->dual_src_blend = true;
       this->current_annotation = NULL;
       return;
-- 
1.9.1



More information about the mesa-dev mailing list