[Mesa-dev] [PATCH V2 09/12] i965: Add FS backend for builtin gl_SampleMask[]

Anuj Phogat anuj.phogat at gmail.com
Wed Oct 30 03:22:48 CET 2013


On Mon, Oct 28, 2013 at 5:10 PM, Paul Berry <stereotype441 at gmail.com> wrote:

> On 25 October 2013 16:45, Anuj Phogat <anuj.phogat at gmail.com> wrote:
>
>> V2:
>>    - Update comments
>>    - Use fs_reg(0xffff) in AND instruction to get the 16 bit
>>      sample_mask.
>>    - Add a special backend instructions to compute sample_mask.
>>    - Add a new variable uses_omask in brw_wm_prog_data.
>>
>> Signed-off-by: Anuj Phogat <anuj.phogat at gmail.com>
>> ---
>>  src/mesa/drivers/dri/i965/brw_context.h        |  1 +
>>  src/mesa/drivers/dri/i965/brw_defines.h        |  1 +
>>  src/mesa/drivers/dri/i965/brw_fs.h             |  5 +++++
>>  src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 23
>> +++++++++++++++++++++++
>>  src/mesa/drivers/dri/i965/brw_fs_visitor.cpp   | 15 +++++++++++++++
>>  5 files changed, 45 insertions(+)
>>
>> diff --git a/src/mesa/drivers/dri/i965/brw_context.h
>> b/src/mesa/drivers/dri/i965/brw_context.h
>> index d16f257..d623368 100644
>> --- a/src/mesa/drivers/dri/i965/brw_context.h
>> +++ b/src/mesa/drivers/dri/i965/brw_context.h
>> @@ -381,6 +381,7 @@ struct brw_wm_prog_data {
>>     GLuint nr_pull_params;
>>     bool dual_src_blend;
>>     bool uses_pos_offset;
>> +   bool uses_omask;
>>     uint32_t prog_offset_16;
>>
>>     /**
>> diff --git a/src/mesa/drivers/dri/i965/brw_defines.h
>> b/src/mesa/drivers/dri/i965/brw_defines.h
>> index f3c994b..f9556a5 100644
>> --- a/src/mesa/drivers/dri/i965/brw_defines.h
>> +++ b/src/mesa/drivers/dri/i965/brw_defines.h
>> @@ -788,6 +788,7 @@ enum opcode {
>>     FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7,
>>     FS_OPCODE_MOV_DISPATCH_TO_FLAGS,
>>     FS_OPCODE_DISCARD_JUMP,
>> +   FS_OPCODE_SET_OMASK,
>>     FS_OPCODE_SET_SAMPLE_ID,
>>     FS_OPCODE_SET_SIMD4X2_OFFSET,
>>     FS_OPCODE_PACK_HALF_2x16_SPLIT,
>> diff --git a/src/mesa/drivers/dri/i965/brw_fs.h
>> b/src/mesa/drivers/dri/i965/brw_fs.h
>> index 8a1a414..c9bcc4e 100644
>> --- a/src/mesa/drivers/dri/i965/brw_fs.h
>> +++ b/src/mesa/drivers/dri/i965/brw_fs.h
>> @@ -436,6 +436,7 @@ public:
>>
>>     struct hash_table *variable_ht;
>>     fs_reg frag_depth;
>> +   fs_reg sample_mask;
>>     fs_reg outputs[BRW_MAX_DRAW_BUFFERS];
>>     unsigned output_components[BRW_MAX_DRAW_BUFFERS];
>>     fs_reg dual_src_output;
>> @@ -540,6 +541,10 @@ private:
>>                                                   struct brw_reg offset);
>>     void generate_mov_dispatch_to_flags(fs_inst *inst);
>>
>> +   void generate_set_omask(fs_inst *inst,
>> +                           struct brw_reg dst,
>> +                           struct brw_reg sample_mask);
>> +
>>     void generate_set_sample_id(fs_inst *inst,
>>                                 struct brw_reg dst,
>>                                 struct brw_reg src0,
>> diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
>> b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
>> index 4f15ed7..fc8e0bd 100644
>> --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
>> @@ -1024,6 +1024,25 @@ fs_generator::generate_set_simd4x2_offset(fs_inst
>> *inst,
>>     brw_pop_insn_state(p);
>>  }
>>
>> +/* Sets vstride=16, width=8, hstride=2 of register mask before
>> + * moving to register dst.
>> + */
>> +void
>> +fs_generator::generate_set_omask(fs_inst *inst,
>> +                                 struct brw_reg dst,
>> +                                 struct brw_reg mask)
>> +{
>> +   assert(dst.type == BRW_REGISTER_TYPE_UW);
>> +   if (dispatch_width == 16)
>> +      dst = vec16(dst);
>> +   brw_push_insn_state(p);
>> +   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
>> +   brw_set_mask_control(p, BRW_MASK_DISABLE);
>> +   brw_MOV(p, dst, stride(retype(brw_vec1_reg(mask.file, mask.nr, 0),
>> +                                 dst.type), 16, 8, 2));
>> +   brw_pop_insn_state(p);
>> +}
>> +
>>  /* Sets vstride=1, width=4, hstride=0 of register src1 during
>>   * the ADD instruction.
>>   */
>> @@ -1576,6 +1595,10 @@ fs_generator::generate_code(exec_list
>> *instructions)
>>           generate_set_simd4x2_offset(inst, dst, src[0]);
>>           break;
>>
>> +      case FS_OPCODE_SET_OMASK:
>> +         generate_set_omask(inst, dst, src[0]);
>> +         break;
>> +
>>        case FS_OPCODE_SET_SAMPLE_ID:
>>           generate_set_sample_id(inst, dst, src[0], src[1]);
>>           break;
>> diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
>> b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
>> index 7a6a0b5..b9eb5b8 100644
>> --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
>> @@ -82,6 +82,8 @@ fs_visitor::visit(ir_variable *ir)
>>          }
>>        } else if (ir->location == FRAG_RESULT_DEPTH) {
>>          this->frag_depth = *reg;
>> +      } else if (ir->location == FRAG_RESULT_SAMPLE_MASK) {
>> +         this->sample_mask = *reg;
>>        } else {
>>          /* gl_FragData or a user-defined FS output */
>>          assert(ir->location >= FRAG_RESULT_DATA0 &&
>> @@ -2510,6 +2512,19 @@ fs_visitor::emit_fb_writes()
>>        pop_force_uncompressed();
>>     }
>>
>> +   c->prog_data.uses_omask =
>> +      fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK);
>> +   if(c->prog_data.uses_omask) {
>> +      this->current_annotation = "FB write oMask";
>> +      assert(this->sample_mask.file != BAD_FILE);
>> +      fs_reg reg = fs_reg(this, glsl_type::int_type);
>> +
>> +      /* Hand over gl_SampleMask. Only lower 16 bits are relevant. */
>> +      emit(AND(reg, this->sample_mask, fs_reg(0xffff)));
>>
>
> I think we can drop this AND instruction, since FS_OPCODE_SET_OMASK wll
> treat reg as UW, and it will use a stride that causes it to skip alternate
> words.  So the upper 16 bits of sample mask are ignored anyway.
>
Dropping the AND instruction works fine if we're doing something like this
in FS:
out  vec4 out_color;
main()
{
  gl_SampleMask[0] = 0x1234;
  out_color = vec4(0.0, 1.0, 0.0, 1.0);
}
Relevant instructions generated for sample mask are:
mov(16)         g2<1>D          0x1234
mov(16)         g113<1>UW       g2<16,8,2>UW

But if we do this in FS:
out vec4 out_color;
uniform int mask;
main()
{
  gl_SampleMask[0] = mask;
  out_color = vec4(0.0, 1.0, 0.0, 1.0);
}

Relevant instructions generated for sample mask are:
 mov(16) g113<1>UW       g2<16,8,2>UW

It doesn't work because uniform values are passed in push constants. So,
this case require a different stride of g2<0,1,0>.
If we use AND instruction both of above cases can use same stride of
g2<16,8,2>. Using MOV in place of AND doesn't help either because the MOV
instruction gets optimized away. We may need a new backend instruction to
make the MOV work. But this doesn't seem worth avoiding an AND instruction.
Please correct me if I'm missing something here.

With that changed, this patch is:
>
> Reviewed-by: Paul Berry <stereotype441 at gmail.com>
>
>
>> +      emit(FS_OPCODE_SET_OMASK, fs_reg(MRF, nr, BRW_REGISTER_TYPE_UW),
>> reg);
>> +      nr += 1;
>> +   }
>> +
>>     /* Reserve space for color. It'll be filled in per MRT below. */
>>     int color_mrf = nr;
>>     nr += 4 * reg_width;
>> --
>> 1.8.1.4
>>
>>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20131029/be437102/attachment-0001.html>


More information about the mesa-dev mailing list