[Mesa-dev] [PATCH 16/24] i965: Use LZD to implement nir_op_ifind_msb on Gen < 7

Ian Romanick idr at freedesktop.org
Wed Jul 6 04:07:23 UTC 2016


On 06/30/2016 04:55 PM, Francisco Jerez wrote:
> Ian Romanick <idr at freedesktop.org> writes:
> 
>> From: Ian Romanick <ian.d.romanick at intel.com>
>>
>> Signed-off-by: Ian Romanick <ian.d.romanick at intel.com>
>> ---
>>  src/mesa/drivers/dri/i965/brw_fs_nir.cpp   | 50 ++++++++++++++++++++++------
>>  src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 52 +++++++++++++++++++++++-------
>>  2 files changed, 81 insertions(+), 21 deletions(-)
>>
>> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
>> index f15bf3e..f8db28a 100644
>> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
>> @@ -623,8 +623,32 @@ fs_visitor::nir_emit_find_msb_using_lzd(const fs_builder &bld,
>>                                          bool is_signed)
>>  {
>>     fs_inst *inst;
>> +   fs_reg temp = src;
>>  
>> -   bld.LZD(retype(result, BRW_REGISTER_TYPE_UD), src);
>> +   if (is_signed) {
>> +      /* LZD of an absolute value source almost always does the right
>> +       * thing.  There are two problem values:
>> +       *
> 
> This comment seems somewhat misleading, there are many more problem
> values if you attempt to just take the LZD of the absolute value of the
> argument: Take the negative of any power of two e.g. -8 represented as
> 0xfffffff8.  findMSB(-8) is supposed to give you 2 as result, but '31
> - lzd(abs(-8)) == 31 - 28 == 3'.

Right... I did a bunch of experimentation with LZD and various
modifiers.  I'm trying to remember whether is did the literal
interpretation of LZD(abs(x)) or did something "smarter."  I'll update
the comment (here and below) to mention at least potential problems with
negative powers of two.

I should have updated versions of these 3 LZD patches pretty soon.

>> +       * * 0x80000000.  Since abs(0x80000000) == 0x80000000, LZD returns
>> +       *   0.  However, findMSB(int(0x80000000)) == 30.
>> +       *
>> +       * * 0xffffffff.  Since abs(0xffffffff) == 1, LZD returns
>> +       *   31.  Section 8.8 (Integer Functions) of the GLSL 4.50 spec says:
>> +       *
>> +       *    For a value of zero or negative one, -1 will be returned.
>> +       *
>> +       * For all negative number cases, including 0x80000000 and
>> +       * 0xffffffff, the correct value is obtained from LZD if instead of
>> +       * negating the (already negative) value the logical-not is used.  A
>> +       * conditonal logical-not can be achieved in two instructions.
>> +       */
>> +      temp = vgrf(glsl_type::int_type);
>> +
>> +      bld.ASR(temp, src, brw_imm_d(31));
>> +      bld.XOR(temp, temp, src);
>> +   }
>> +
>> +   bld.LZD(retype(result, BRW_REGISTER_TYPE_UD), temp);
>>  
>>     /* LZD counts from the MSB side, while GLSL's findMSB() wants the count
>>      * from the LSB side. Subtract the result from 31 to convert the MSB
>> @@ -1339,17 +1363,23 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
>>  
>>     case nir_op_ifind_msb: {
>>        assert(nir_dest_bit_size(instr->dest.dest) < 64);
>> -      bld.FBH(retype(result, BRW_REGISTER_TYPE_UD), op[0]);
>>  
>> -      /* FBH counts from the MSB side, while GLSL's findMSB() wants the count
>> -       * from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then
>> -       * subtract the result from 31 to convert the MSB count into an LSB count.
>> -       */
>> -      bld.CMP(bld.null_reg_d(), result, brw_imm_d(-1), BRW_CONDITIONAL_NZ);
>> +      if (devinfo->gen < 7) {
>> +         nir_emit_find_msb_using_lzd(bld, result, op[0], true);
>> +      } else {
>> +         bld.FBH(retype(result, BRW_REGISTER_TYPE_UD), op[0]);
>>  
>> -      inst = bld.ADD(result, result, brw_imm_d(31));
>> -      inst->predicate = BRW_PREDICATE_NORMAL;
>> -      inst->src[0].negate = true;
>> +         /* FBH counts from the MSB side, while GLSL's findMSB() wants the
>> +          * count from the LSB side. If FBH didn't return an error
>> +          * (0xFFFFFFFF), then subtract the result from 31 to convert the MSB
>> +          * count into an LSB count.
>> +          */
>> +         bld.CMP(bld.null_reg_d(), result, brw_imm_d(-1), BRW_CONDITIONAL_NZ);
>> +
>> +         inst = bld.ADD(result, result, brw_imm_d(31));
>> +         inst->predicate = BRW_PREDICATE_NORMAL;
>> +         inst->src[0].negate = true;
>> +      }
>>        break;
>>     }
>>  
>> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
>> index cd88fb6..2fc2cf2 100644
>> --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
>> @@ -999,8 +999,32 @@ vec4_visitor::nir_emit_find_msb_using_lzd(const dst_reg &dst,
>>                                            bool is_signed)
>>  {
>>     vec4_instruction *inst;
>> +   src_reg temp = src;
>>  
>> -   emit(BRW_OPCODE_LZD, retype(dst, BRW_REGISTER_TYPE_UD), src);
>> +   if (is_signed) {
>> +      /* LZD of an absolute value source almost always does the right
>> +       * thing.  There are two problem values:
>> +       *
>> +       * * 0x80000000.  Since abs(0x80000000) == 0x80000000, LZD returns
>> +       *   0.  However, findMSB(int(0x80000000)) == 30.
>> +       *
>> +       * * 0xffffffff.  Since abs(0xffffffff) == 1, LZD returns
>> +       *   31.  Section 8.8 (Integer Functions) of the GLSL 4.50 spec says:
>> +       *
>> +       *    For a value of zero or negative one, -1 will be returned.
>> +       *
>> +       * For all negative number cases, including 0x80000000 and
>> +       * 0xffffffff, the correct value is obtained from LZD if instead of
>> +       * negating the (already negative) value the logical-not is used.  A
>> +       * conditonal logical-not can be achieved in two instructions.
>> +       */
>> +      temp = src_reg(this, glsl_type::ivec4_type);
>> +
>> +      emit(BRW_OPCODE_ASR, dst_reg(temp), src, brw_imm_d(31));
>> +      emit(BRW_OPCODE_XOR, dst_reg(temp), temp, src);
>> +   }
>> +
>> +   emit(BRW_OPCODE_LZD, retype(dst, BRW_REGISTER_TYPE_UD), temp);
>>  
>>     /* LZD counts from the MSB side, while GLSL's findMSB() wants the count
>>      * from the LSB side. Subtract the result from 31 to convert the MSB count
>> @@ -1484,18 +1508,24 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
>>        break;
>>  
>>     case nir_op_ifind_msb: {
>> -      emit(FBH(retype(dst, BRW_REGISTER_TYPE_UD), op[0]));
>> -
>> -      /* FBH counts from the MSB side, while GLSL's findMSB() wants the count
>> -       * from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then
>> -       * subtract the result from 31 to convert the MSB count into an LSB count.
>> -       */
>>        src_reg src(dst);
>> -      emit(CMP(dst_null_d(), src, brw_imm_d(-1), BRW_CONDITIONAL_NZ));
>>  
>> -      inst = emit(ADD(dst, src, brw_imm_d(31)));
>> -      inst->predicate = BRW_PREDICATE_NORMAL;
>> -      inst->src[0].negate = true;
>> +      if (devinfo->gen < 7) {
>> +         nir_emit_find_msb_using_lzd(dst, op[0], true);
>> +      } else {
>> +         emit(FBH(retype(dst, BRW_REGISTER_TYPE_UD), op[0]));
>> +
>> +         /* FBH counts from the MSB side, while GLSL's findMSB() wants the
>> +          * count from the LSB side. If FBH didn't return an error
>> +          * (0xFFFFFFFF), then subtract the result from 31 to convert the MSB
>> +          * count into an LSB count.
>> +          */
>> +         emit(CMP(dst_null_d(), src, brw_imm_d(-1), BRW_CONDITIONAL_NZ));
>> +
>> +         inst = emit(ADD(dst, src, brw_imm_d(31)));
>> +         inst->predicate = BRW_PREDICATE_NORMAL;
>> +         inst->src[0].negate = true;
>> +      }
>>        break;
>>     }
>>  
>> -- 
>> 2.5.5
>>
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 181 bytes
Desc: OpenPGP digital signature
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20160705/ec80c6e6/attachment-0001.sig>


More information about the mesa-dev mailing list