[Mesa-dev] [PATCH 5/8] i965: Implement FS backend for ARB_sample_shading

Anuj Phogat anuj.phogat at gmail.com
Mon Oct 14 19:12:53 CEST 2013


Implement the FS backend for new builtins added by the extension:
in vec2 gl_SamplePosition
in int gl_SampleID
in int gl_NumSamples
out int gl_SampleMask[]

Signed-off-by: Anuj Phogat <anuj.phogat at gmail.com>
---
 src/mesa/drivers/dri/i965/brw_fs.cpp         | 109 +++++++++++++++++++++++++++
 src/mesa/drivers/dri/i965/brw_fs.h           |   4 +
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp |  23 ++++++
 src/mesa/drivers/dri/i965/brw_wm.h           |   1 +
 4 files changed, 137 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index e5d6e4b..e4f7745 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -1115,6 +1115,109 @@ fs_visitor::emit_frontfacing_interpolation(ir_variable *ir)
    return reg;
 }
 
+void
+fs_visitor::compute_sample_position(fs_reg dst, fs_reg int_sample_pos)
+{
+   int num_samples = ctx->DrawBuffer->Visual.samples;
+   assert(num_samples >= 0 && num_samples <= 8);
+
+   /* From arb_sample_shading specification:
+    * "When rendering to a non-multisample buffer, or if multisample
+    *  rasterization is disabled, gl_SamplePosition will always be
+    *  (0.5, 0.5).
+    */
+   if (!ctx->Multisample.Enabled || num_samples == 0) {
+      emit(BRW_OPCODE_MOV, dst, fs_reg(0.5f));
+   }
+   else {
+      /* For num_samples = {4, 8} */
+      emit(BRW_OPCODE_MOV, dst, int_sample_pos);
+      emit(BRW_OPCODE_MUL, dst, dst, fs_reg(1 / 16.0f));
+   }
+}
+
+fs_reg *
+fs_visitor::emit_samplepos_interpolation(ir_variable *ir)
+{
+   assert(brw->gen >= 6);
+
+   this->current_annotation = "compute sample position";
+   fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type);
+   fs_reg pos = *reg;
+   fs_reg int_sample_x = fs_reg(this, glsl_type::int_type);
+   fs_reg int_sample_y = fs_reg(this, glsl_type::int_type);
+
+   /* WM will be run in MSDISPMODE_PERSAMPLE. So, only SIMD8 mode will be
+    * enabled. The X, Y sample positions come in as bytes in  thread payload.
+    * Sample IDs and sample positions remain same for all four slots in a
+    * subspan. So, read the positions using vstride=2, width=4, hstride=0.
+    */
+   emit(BRW_OPCODE_AND, int_sample_x,
+        fs_reg(stride(retype(brw_vec1_grf(c->sample_pos_reg, 0),
+                             BRW_REGISTER_TYPE_D), 2, 4, 0)),
+        fs_reg(brw_imm_d(0xff)));
+
+   /* Compute gl_SamplePosition.x */
+   compute_sample_position(pos, int_sample_x);
+   pos.reg_offset++;
+
+   emit(BRW_OPCODE_SHR, int_sample_y,
+        fs_reg(stride(retype(brw_vec1_grf(c->sample_pos_reg, 0),
+                             BRW_REGISTER_TYPE_D), 2, 4, 0)),
+        fs_reg(8));
+   emit(BRW_OPCODE_AND, int_sample_y, int_sample_y, fs_reg(brw_imm_d(0xff)));
+
+   /* Compute gl_SamplePosition.y */
+   compute_sample_position(pos, int_sample_y);
+   return reg;
+}
+
+fs_reg *
+fs_visitor::emit_sampleid_interpolation(ir_variable *ir)
+{
+   assert(brw->gen >= 6);
+   bool multisampled_fbo = ctx->DrawBuffer->Visual.samples > 1;
+
+   this->current_annotation = "compute sample id";
+   fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type);
+
+   if (multisampled_fbo && ctx->Multisample.Enabled) {
+      fs_reg t1 = fs_reg(this, glsl_type::int_type);
+      fs_reg t2 = fs_reg(this, glsl_type::int_type);
+      t2.type = BRW_REGISTER_TYPE_UW;
+
+      /* The WM will be run in MSDISPMODE_PERSAMPLE with num_samples = 8.
+       * Therefore, subspan 0 will represent sample N (where N is 0, 2, 4
+       * or 6), subspan 1 will represent sample 1, 3, 5 or 7.  We can find
+       * the value of N by looking at R0.0 bits 7:6 ("Starting Sample Pair
+       * Index (SSPI)") and multiplying by two (since samples are always
+       * delivered in pairs). That is, we compute 2*((R0.0 & 0xc0) >> 6)
+       * == (R0.0 & 0xc0) >> 5.
+       *
+       * Then we need to add N to the sequence (0, 0, 0, 0, 1, 1, 1, 1),
+       * which we compute by populating a temporary variable with the
+       * sequence (0, 1), and then reading from it using vstride=1,
+       * width=4, hstride=0.
+       * Same holds true for num_samples = 4.
+       */
+      emit(BRW_OPCODE_AND, t1,
+           fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_D)),
+           fs_reg(brw_imm_d(0xc0)));
+      emit(BRW_OPCODE_SHR, t1, t1, fs_reg(5));
+      emit(BRW_OPCODE_MOV, t2, fs_reg(brw_imm_v(0x11110000)));
+      emit(BRW_OPCODE_ADD, *reg, t1, t2);
+   }
+   else {
+      /* As per GL_ARB_sample_shading specification:
+       * "When rendering to a non-multisample buffer, or if multisample
+       *  rasterization is disabled, gl_SampleID will always be zero."
+       */
+      emit(BRW_OPCODE_MOV, *reg, fs_reg(0));
+   }
+
+   return reg;
+}
+
 fs_reg
 fs_visitor::fix_math_operand(fs_reg src)
 {
@@ -2966,7 +3069,13 @@ fs_visitor::setup_payload_gen6()
          c->nr_payload_regs++;
       }
    }
+
    /* R31: MSAA position offsets. */
+   if (fp->Base.InputsRead & VARYING_BIT_SAMPLE_POS) {
+      c->sample_pos_reg = c->nr_payload_regs;
+      c->nr_payload_regs++;
+   }
+
    /* R32-: bary for 32-pixel. */
    /* R58-59: interp W for 32-pixel. */
 
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index c78f9ae..812e9c7 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -332,9 +332,12 @@ public:
                          glsl_interp_qualifier interpolation_mode,
                          bool is_centroid);
    fs_reg *emit_frontfacing_interpolation(ir_variable *ir);
+   fs_reg *emit_samplepos_interpolation(ir_variable *ir);
+   fs_reg *emit_sampleid_interpolation(ir_variable *ir);
    fs_reg *emit_general_interpolation(ir_variable *ir);
    void emit_interpolation_setup_gen4();
    void emit_interpolation_setup_gen6();
+   void compute_sample_position(fs_reg dst, fs_reg int_sample_pos);
    fs_reg rescale_texcoord(ir_texture *ir, fs_reg coordinate,
                            bool is_rect, int sampler, int texunit);
    fs_inst *emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate,
@@ -432,6 +435,7 @@ public:
 
    struct hash_table *variable_ht;
    fs_reg frag_depth;
+   fs_reg sample_mask;
    fs_reg outputs[BRW_MAX_DRAW_BUFFERS];
    unsigned output_components[BRW_MAX_DRAW_BUFFERS];
    fs_reg dual_src_output;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index e659203..75ced1d 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -61,9 +61,14 @@ fs_visitor::visit(ir_variable *ir)
 	 reg = emit_fragcoord_interpolation(ir);
       } else if (!strcmp(ir->name, "gl_FrontFacing")) {
 	 reg = emit_frontfacing_interpolation(ir);
+      } else if (!strcmp(ir->name, "gl_SamplePosition")) {
+	 reg = emit_samplepos_interpolation(ir);
+      } else if (!strcmp(ir->name, "gl_SampleID")) {
+	 reg = emit_sampleid_interpolation(ir);
       } else {
 	 reg = emit_general_interpolation(ir);
       }
+
       assert(reg);
       hash_table_insert(this->variable_ht, reg, ir);
       return;
@@ -82,6 +87,8 @@ fs_visitor::visit(ir_variable *ir)
 	 }
       } else if (ir->location == FRAG_RESULT_DEPTH) {
 	 this->frag_depth = *reg;
+      } else if (ir->location == FRAG_RESULT_SAMPLE_MASK) {
+         this->sample_mask = *reg;
       } else {
 	 /* gl_FragData or a user-defined FS output */
 	 assert(ir->location >= FRAG_RESULT_DATA0 &&
@@ -2502,6 +2509,22 @@ fs_visitor::emit_fb_writes()
       pop_force_uncompressed();
    }
 
+   if (fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)) {
+      this->current_annotation = "FB write oMask";
+      assert(this->sample_mask.file != BAD_FILE);
+      fs_reg reg = fs_reg(this, glsl_type::int_type);
+
+      /* Hand over gl_SampleMask. Only lower 16 bits are relevant. */
+      emit(AND(this->sample_mask, this->sample_mask, fs_reg(255)));
+      emit(SHL(reg, this->sample_mask, fs_reg(16)));
+
+      this->sample_mask.type = BRW_REGISTER_TYPE_UW;
+      reg.type = BRW_REGISTER_TYPE_UW;
+      emit(OR(fs_reg(MRF, nr, BRW_REGISTER_TYPE_UW), this->sample_mask, reg));
+
+      nr += 1;
+   }
+
    /* Reserve space for color. It'll be filled in per MRT below. */
    int color_mrf = nr;
    nr += 4 * reg_width;
diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h
index aa786de..fda74ab 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.h
+++ b/src/mesa/drivers/dri/i965/brw_wm.h
@@ -83,6 +83,7 @@ struct brw_wm_compile {
    uint8_t source_w_reg;
    uint8_t aa_dest_stencil_reg;
    uint8_t dest_depth_reg;
+   uint8_t sample_pos_reg;
    uint8_t barycentric_coord_reg[BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT];
    uint8_t nr_payload_regs;
    GLuint source_depth_to_render_target:1;
-- 
1.8.1.4



More information about the mesa-dev mailing list