[Mesa-dev] [PATCH V2 09/12] i965: Add FS backend for builtin gl_SampleMask[]

Anuj Phogat anuj.phogat at gmail.com
Wed Oct 30 19:23:53 CET 2013


On Tue, Oct 29, 2013 at 10:04 PM, Paul Berry <stereotype441 at gmail.com> wrote:
> On 29 October 2013 19:22, Anuj Phogat <anuj.phogat at gmail.com> wrote:
>>
>>
>>
>>
>> On Mon, Oct 28, 2013 at 5:10 PM, Paul Berry <stereotype441 at gmail.com>
>> wrote:
>>>
>>> On 25 October 2013 16:45, Anuj Phogat <anuj.phogat at gmail.com> wrote:
>>>>
>>>> V2:
>>>>    - Update comments
>>>>    - Use fs_reg(0xffff) in AND instruction to get the 16 bit
>>>>      sample_mask.
>>>>    - Add a special backend instructions to compute sample_mask.
>>>>    - Add a new variable uses_omask in brw_wm_prog_data.
>>>>
>>>> Signed-off-by: Anuj Phogat <anuj.phogat at gmail.com>
>>>> ---
>>>>  src/mesa/drivers/dri/i965/brw_context.h        |  1 +
>>>>  src/mesa/drivers/dri/i965/brw_defines.h        |  1 +
>>>>  src/mesa/drivers/dri/i965/brw_fs.h             |  5 +++++
>>>>  src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 23
>>>> +++++++++++++++++++++++
>>>>  src/mesa/drivers/dri/i965/brw_fs_visitor.cpp   | 15 +++++++++++++++
>>>>  5 files changed, 45 insertions(+)
>>>>
>>>> diff --git a/src/mesa/drivers/dri/i965/brw_context.h
>>>> b/src/mesa/drivers/dri/i965/brw_context.h
>>>> index d16f257..d623368 100644
>>>> --- a/src/mesa/drivers/dri/i965/brw_context.h
>>>> +++ b/src/mesa/drivers/dri/i965/brw_context.h
>>>> @@ -381,6 +381,7 @@ struct brw_wm_prog_data {
>>>>     GLuint nr_pull_params;
>>>>     bool dual_src_blend;
>>>>     bool uses_pos_offset;
>>>> +   bool uses_omask;
>>>>     uint32_t prog_offset_16;
>>>>
>>>>     /**
>>>> diff --git a/src/mesa/drivers/dri/i965/brw_defines.h
>>>> b/src/mesa/drivers/dri/i965/brw_defines.h
>>>> index f3c994b..f9556a5 100644
>>>> --- a/src/mesa/drivers/dri/i965/brw_defines.h
>>>> +++ b/src/mesa/drivers/dri/i965/brw_defines.h
>>>> @@ -788,6 +788,7 @@ enum opcode {
>>>>     FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7,
>>>>     FS_OPCODE_MOV_DISPATCH_TO_FLAGS,
>>>>     FS_OPCODE_DISCARD_JUMP,
>>>> +   FS_OPCODE_SET_OMASK,
>>>>     FS_OPCODE_SET_SAMPLE_ID,
>>>>     FS_OPCODE_SET_SIMD4X2_OFFSET,
>>>>     FS_OPCODE_PACK_HALF_2x16_SPLIT,
>>>> diff --git a/src/mesa/drivers/dri/i965/brw_fs.h
>>>> b/src/mesa/drivers/dri/i965/brw_fs.h
>>>> index 8a1a414..c9bcc4e 100644
>>>> --- a/src/mesa/drivers/dri/i965/brw_fs.h
>>>> +++ b/src/mesa/drivers/dri/i965/brw_fs.h
>>>> @@ -436,6 +436,7 @@ public:
>>>>
>>>>     struct hash_table *variable_ht;
>>>>     fs_reg frag_depth;
>>>> +   fs_reg sample_mask;
>>>>     fs_reg outputs[BRW_MAX_DRAW_BUFFERS];
>>>>     unsigned output_components[BRW_MAX_DRAW_BUFFERS];
>>>>     fs_reg dual_src_output;
>>>> @@ -540,6 +541,10 @@ private:
>>>>                                                   struct brw_reg
>>>> offset);
>>>>     void generate_mov_dispatch_to_flags(fs_inst *inst);
>>>>
>>>> +   void generate_set_omask(fs_inst *inst,
>>>> +                           struct brw_reg dst,
>>>> +                           struct brw_reg sample_mask);
>>>> +
>>>>     void generate_set_sample_id(fs_inst *inst,
>>>>                                 struct brw_reg dst,
>>>>                                 struct brw_reg src0,
>>>> diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
>>>> b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
>>>> index 4f15ed7..fc8e0bd 100644
>>>> --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
>>>> +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
>>>> @@ -1024,6 +1024,25 @@ fs_generator::generate_set_simd4x2_offset(fs_inst
>>>> *inst,
>>>>     brw_pop_insn_state(p);
>>>>  }
>>>>
>>>> +/* Sets vstride=16, width=8, hstride=2 of register mask before
>>>> + * moving to register dst.
>>>> + */
>>>> +void
>>>> +fs_generator::generate_set_omask(fs_inst *inst,
>>>> +                                 struct brw_reg dst,
>>>> +                                 struct brw_reg mask)
>>>> +{
>>>> +   assert(dst.type == BRW_REGISTER_TYPE_UW);
>>>> +   if (dispatch_width == 16)
>>>> +      dst = vec16(dst);
>>>> +   brw_push_insn_state(p);
>>>> +   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
>>>> +   brw_set_mask_control(p, BRW_MASK_DISABLE);
>>>> +   brw_MOV(p, dst, stride(retype(brw_vec1_reg(mask.file, mask.nr, 0),
>>>> +                                 dst.type), 16, 8, 2));
>>>> +   brw_pop_insn_state(p);
>>>> +}
>>>> +
>>>>  /* Sets vstride=1, width=4, hstride=0 of register src1 during
>>>>   * the ADD instruction.
>>>>   */
>>>> @@ -1576,6 +1595,10 @@ fs_generator::generate_code(exec_list
>>>> *instructions)
>>>>           generate_set_simd4x2_offset(inst, dst, src[0]);
>>>>           break;
>>>>
>>>> +      case FS_OPCODE_SET_OMASK:
>>>> +         generate_set_omask(inst, dst, src[0]);
>>>> +         break;
>>>> +
>>>>        case FS_OPCODE_SET_SAMPLE_ID:
>>>>           generate_set_sample_id(inst, dst, src[0], src[1]);
>>>>           break;
>>>> diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
>>>> b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
>>>> index 7a6a0b5..b9eb5b8 100644
>>>> --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
>>>> +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
>>>> @@ -82,6 +82,8 @@ fs_visitor::visit(ir_variable *ir)
>>>>          }
>>>>        } else if (ir->location == FRAG_RESULT_DEPTH) {
>>>>          this->frag_depth = *reg;
>>>> +      } else if (ir->location == FRAG_RESULT_SAMPLE_MASK) {
>>>> +         this->sample_mask = *reg;
>>>>        } else {
>>>>          /* gl_FragData or a user-defined FS output */
>>>>          assert(ir->location >= FRAG_RESULT_DATA0 &&
>>>> @@ -2510,6 +2512,19 @@ fs_visitor::emit_fb_writes()
>>>>        pop_force_uncompressed();
>>>>     }
>>>>
>>>> +   c->prog_data.uses_omask =
>>>> +      fp->Base.OutputsWritten &
>>>> BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK);
>>>> +   if(c->prog_data.uses_omask) {
>>>> +      this->current_annotation = "FB write oMask";
>>>> +      assert(this->sample_mask.file != BAD_FILE);
>>>> +      fs_reg reg = fs_reg(this, glsl_type::int_type);
>>>> +
>>>> +      /* Hand over gl_SampleMask. Only lower 16 bits are relevant. */
>>>> +      emit(AND(reg, this->sample_mask, fs_reg(0xffff)));
>>>
>>>
>>> I think we can drop this AND instruction, since FS_OPCODE_SET_OMASK wll
>>> treat reg as UW, and it will use a stride that causes it to skip alternate
>>> words.  So the upper 16 bits of sample mask are ignored anyway.
>>
>> Dropping the AND instruction works fine if we're doing something like this
>> in FS:
>> out  vec4 out_color;
>> main()
>> {
>>   gl_SampleMask[0] = 0x1234;
>>   out_color = vec4(0.0, 1.0, 0.0, 1.0);
>> }
>> Relevant instructions generated for sample mask are:
>> mov(16)         g2<1>D          0x1234
>> mov(16)         g113<1>UW       g2<16,8,2>UW
>>
>> But if we do this in FS:
>> out vec4 out_color;
>> uniform int mask;
>> main()
>> {
>>   gl_SampleMask[0] = mask;
>>   out_color = vec4(0.0, 1.0, 0.0, 1.0);
>> }
>>
>> Relevant instructions generated for sample mask are:
>>  mov(16) g113<1>UW       g2<16,8,2>UW
>>
>> It doesn't work because uniform values are passed in push constants. So,
>> this case require a different stride of g2<0,1,0>.
>> If we use AND instruction both of above cases can use same stride of
>> g2<16,8,2>. Using MOV in place of AND doesn't help either because the MOV
>> instruction gets optimized away. We may need a new backend instruction to
>> make the MOV work. But this doesn't seem worth avoiding an AND instruction.
>> Please correct me if I'm missing something here.
>
>
> Oh, wow.  I didn't think about that case.  Good catch.
>
> I have one last idea, and then I'll drop the subject.  What if, instead of
> making a new backend instruction to make the MOV work, we just modify
> generate_set_omask() so that it examines the stride of the incoming mask
> register.  If it's <8;8,1> (an ordinary SIMD8 or SIMD16 register), it
> modifies it to <16;8,2>.  If it's <0;1,0> (a uniform), it leaves it as
> <0;1,0>.  Would that work?
Yes, It works :).

> Assuming that it works, I have a preference for doing it that way, partly
> because it saves an instruction, and partly because it seems a little
> fragile to me to use an AND instruction to work around the fs_generator not
> handling a uniform properly.
I'm glad to get rid of extra AND instruction. Thanks Paul.

> But I don't want to get in the way of progress.  So either way, consider the
> patch:
>
> Reviewed-by: Paul Berry <stereotype441 at gmail.com>
>
> Thanks for being willing to follow up on all these small details, Anuj.


More information about the mesa-dev mailing list