[Mesa-dev] [PATCH v2 40/41] i965/fs: Use the GRF for FB writes on gen >= 7

Jason Ekstrand jason at jlekstrand.net
Fri Sep 26 12:24:58 PDT 2014


On gen 7, the MRF was removed and we gained the ability to do send
instructions directly from the GRF.  This commit enables that functinoality
for FB writes.

v2: Make handling of components more sane.
---
 src/mesa/drivers/dri/i965/brw_fs.cpp         |   4 +
 src/mesa/drivers/dri/i965/brw_fs.h           |   1 +
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 167 +++++++++++++++++++++------
 src/mesa/drivers/dri/i965/brw_shader.cpp     |   1 +
 4 files changed, 136 insertions(+), 37 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index b43032b..143b590 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -514,6 +514,8 @@ fs_inst::is_send_from_grf() const
       return true;
    case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
       return src[1].file == GRF;
+   case FS_OPCODE_FB_WRITE:
+      return src[0].file == GRF;
    default:
       if (is_tex())
          return src[0].file == GRF;
@@ -917,6 +919,8 @@ fs_inst::regs_read(fs_visitor *v, int arg) const
 {
    if (is_tex() && arg == 0 && src[0].file == GRF) {
       return mlen;
+   } else if (opcode == FS_OPCODE_FB_WRITE && arg == 0) {
+      return mlen;
    } else if (opcode == SHADER_OPCODE_UNTYPED_ATOMIC && arg == 0) {
       return mlen;
    } else if (opcode == SHADER_OPCODE_UNTYPED_SURFACE_READ && arg == 0) {
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 7500e8e..a91bf9f 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -521,6 +521,7 @@ public:
                     fs_reg dst, fs_reg src0, fs_reg src1, fs_reg one);
 
    void emit_color_write(fs_reg color, int index, int first_color_mrf);
+   int setup_color_payload(fs_reg *dst, fs_reg color, unsigned components);
    void emit_alpha_test();
    fs_inst *emit_single_fb_write(fs_reg color1, fs_reg color2,
                                  fs_reg src0_alpha, unsigned components);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 8e38315..e72fb62 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -3005,6 +3005,82 @@ fs_visitor::emit_color_write(fs_reg color, int index, int first_color_mrf)
    }
 }
 
+int
+fs_visitor::setup_color_payload(fs_reg *dst, fs_reg color, unsigned components)
+{
+   fs_inst *inst;
+
+   if (color.file == BAD_FILE) {
+      return 4 * (dispatch_width / 8);
+   }
+
+   uint8_t colors_enabled;
+   if (components == 0) {
+      /* We want to write one component to the alpha channel */
+      colors_enabled = 0x8;
+   } else {
+      /* Enable the first components-many channels */
+      colors_enabled = (1 << components) - 1;
+   }
+
+   if (dispatch_width == 8 || brw->gen >= 6) {
+      /* SIMD8 write looks like:
+       * m + 0: r0
+       * m + 1: r1
+       * m + 2: g0
+       * m + 3: g1
+       *
+       * gen6 SIMD16 DP write looks like:
+       * m + 0: r0
+       * m + 1: r1
+       * m + 2: g0
+       * m + 3: g1
+       * m + 4: b0
+       * m + 5: b1
+       * m + 6: a0
+       * m + 7: a1
+       */
+      int len = 0;
+      for (unsigned i = 0; i < 4; ++i) {
+         if (colors_enabled & (1 << i)) {
+            dst[len] = fs_reg(GRF, virtual_grf_alloc(color.width / 8),
+                              color.type, color.width);
+            inst = emit(MOV(dst[len], offset(color, i)));
+            inst->saturate = key->clamp_fragment_color;
+         } else if (color.width == 16) {
+            /* We need two BAD_FILE slots for a 16-wide color */
+            len++;
+         }
+         len++;
+      }
+      return len;
+   } else {
+      /* pre-gen6 SIMD16 single source DP write looks like:
+       * m + 0: r0
+       * m + 1: g0
+       * m + 2: b0
+       * m + 3: a0
+       * m + 4: r1
+       * m + 5: g1
+       * m + 6: b1
+       * m + 7: a1
+       */
+      for (unsigned i = 0; i < 4; ++i) {
+         if (colors_enabled & (1 << i)) {
+            dst[i] = fs_reg(GRF, virtual_grf_alloc(1), color.type);
+            inst = emit(MOV(dst[i], half(offset(color, i), 0)));
+            inst->saturate = key->clamp_fragment_color;
+
+            dst[i + 4] = fs_reg(GRF, virtual_grf_alloc(1), color.type);
+            inst = emit(MOV(dst[i + 4], half(offset(color, i), 1)));
+            inst->saturate = key->clamp_fragment_color;
+            inst->force_sechalf = true;
+         }
+      }
+      return 8;
+   }
+}
+
 static enum brw_conditional_mod
 cond_for_alpha_func(GLenum func)
 {
@@ -3063,12 +3139,13 @@ fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1,
 {
    this->current_annotation = "FB write header";
    bool header_present = true;
+   int reg_size = dispatch_width / 8;
+
    /* We can potentially have a message length of up to 15, so we have to set
     * base_mrf to either 0 or 1 in order to fit in m0..m15.
     */
-   int base_mrf = 1;
-   int nr = base_mrf;
-   int reg_width = dispatch_width / 8;
+   fs_reg *sources = ralloc_array(mem_ctx, fs_reg, 15);
+   int length = 0;
 
    /* From the Sandy Bridge PRM, volume 4, page 198:
     *
@@ -3085,12 +3162,14 @@ fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1,
    }
 
    if (header_present)
-      /* m2, m3 header */
-      nr += 2;
+      /* Allocate 2 registers for a header */
+      length += 2;
 
    if (payload.aa_dest_stencil_reg) {
-      emit(MOV(fs_reg(MRF, nr++),
+      sources[length] = fs_reg(GRF, virtual_grf_alloc(1));
+      emit(MOV(sources[length],
                fs_reg(brw_vec8_grf(payload.aa_dest_stencil_reg, 0))));
+      length++;
    }
 
    prog_data->uses_omask =
@@ -3098,9 +3177,13 @@ fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1,
    if (prog_data->uses_omask) {
       this->current_annotation = "FB write oMask";
       assert(this->sample_mask.file != BAD_FILE);
-      /* Hand over gl_SampleMask. Only lower 16 bits are relevant. */
-      emit(FS_OPCODE_SET_OMASK, fs_reg(MRF, nr, BRW_REGISTER_TYPE_UW), this->sample_mask);
-      nr += 1;
+      /* Hand over gl_SampleMask. Only lower 16 bits are relevant.  Since
+       * it's unsinged single words, one vgrf is always 16-wide.
+       */
+      sources[length] = fs_reg(GRF, virtual_grf_alloc(1),
+                               BRW_REGISTER_TYPE_UW, 16);
+      emit(FS_OPCODE_SET_OMASK, sources[length], this->sample_mask);
+      length++;
    }
 
    if (color0.file == BAD_FILE) {
@@ -3108,28 +3191,20 @@ fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1,
        * alpha out the pipeline to our null renderbuffer to support
        * alpha-testing, alpha-to-coverage, and so on.
        */
-      emit_color_write(this->outputs[0], 3, nr);
-      nr += 4 * reg_width;
+      length += setup_color_payload(sources + length, this->outputs[0], 0);
    } else if (color1.file == BAD_FILE) {
       if (src0_alpha.file != BAD_FILE) {
-         fs_inst *inst;
-         inst = emit(MOV(fs_reg(MRF, nr, src0_alpha.type), src0_alpha));
+         sources[length] = fs_reg(GRF, virtual_grf_alloc(reg_size),
+                                  src0_alpha.type, src0_alpha.width);
+         fs_inst *inst = emit(MOV(sources[length], src0_alpha));
          inst->saturate = key->clamp_fragment_color;
-         nr += reg_width;
+         length++;
       }
 
-      for (unsigned i = 0; i < components; i++)
-         emit_color_write(color0, i, nr);
-
-      nr += 4 * reg_width;
+      length += setup_color_payload(sources + length, color0, components);
    } else {
-      for (unsigned i = 0; i < components; i++)
-         emit_color_write(color0, i, nr);
-      nr += 4 * reg_width;
-
-      for (unsigned i = 0; i < components; i++)
-         emit_color_write(color1, i, nr);
-      nr += 4 * reg_width;
+      length += setup_color_payload(sources + length, color0, components);
+      length += setup_color_payload(sources + length, color1, components);
    }
 
    if (source_depth_to_render_target) {
@@ -3142,33 +3217,51 @@ fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1,
 	 no16("Missing support for simd16 depth writes on gen6\n");
       }
 
+      sources[length] = fs_reg(this, glsl_type::float_type);
       if (prog->OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
 	 /* Hand over gl_FragDepth. */
 	 assert(this->frag_depth.file != BAD_FILE);
-	 emit(MOV(fs_reg(MRF, nr), this->frag_depth));
+	 emit(MOV(sources[length], this->frag_depth));
       } else {
 	 /* Pass through the payload depth. */
-	 emit(MOV(fs_reg(MRF, nr),
+	 emit(MOV(sources[length],
                   fs_reg(brw_vec8_grf(payload.source_depth_reg, 0))));
       }
-      nr += reg_width;
+      length++;
    }
 
    if (payload.dest_depth_reg) {
-      emit(MOV(fs_reg(MRF, nr),
+      sources[length] = fs_reg(this, glsl_type::float_type);
+      emit(MOV(sources[length],
                fs_reg(brw_vec8_grf(payload.dest_depth_reg, 0))));
-      nr += reg_width;
+      length++;
    }
 
-   fs_inst *inst = emit(FS_OPCODE_FB_WRITE);
-   inst->base_mrf = base_mrf;
-   inst->mlen = nr - base_mrf;
-   inst->header_present = header_present;
+   fs_inst *load;
+   fs_inst *write;
+   if (brw->gen >= 7) {
+      /* Send from the GRF */
+      fs_reg payload = fs_reg(GRF, -1, BRW_REGISTER_TYPE_F);
+      load = emit(LOAD_PAYLOAD(payload, sources, length));
+      payload.reg = virtual_grf_alloc(load->regs_written);
+      load->dst = payload;
+      write = emit(FS_OPCODE_FB_WRITE, reg_undef, payload);
+      write->base_mrf = -1;
+   } else {
+      /* Send from the MRF */
+      load = emit(LOAD_PAYLOAD(fs_reg(MRF, 1, BRW_REGISTER_TYPE_F),
+                               sources, length));
+      write = emit(FS_OPCODE_FB_WRITE);
+      write->base_mrf = 1;
+   }
+
+   write->mlen = load->regs_written;
+   write->header_present = header_present;
    if ((brw->gen >= 8 || brw->is_haswell) && prog_data->uses_kill) {
-      inst->predicate = BRW_PREDICATE_NORMAL;
-      inst->flag_subreg = 1;
+      write->predicate = BRW_PREDICATE_NORMAL;
+      write->flag_subreg = 1;
    }
-   return inst;
+   return write;
 }
 
 void
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 0a33063..55e2481 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -738,6 +738,7 @@ backend_instruction::has_side_effects() const
 {
    switch (opcode) {
    case SHADER_OPCODE_UNTYPED_ATOMIC:
+   case FS_OPCODE_FB_WRITE:
       return true;
    default:
       return false;
-- 
2.1.0



More information about the mesa-dev mailing list