[Mesa-dev] [PATCH 16/24] i965: Use LZD to implement nir_op_ifind_msb on Gen < 7

Francisco Jerez currojerez at riseup.net
Thu Jun 30 23:55:38 UTC 2016


Ian Romanick <idr at freedesktop.org> writes:

> From: Ian Romanick <ian.d.romanick at intel.com>
>
> Signed-off-by: Ian Romanick <ian.d.romanick at intel.com>
> ---
>  src/mesa/drivers/dri/i965/brw_fs_nir.cpp   | 50 ++++++++++++++++++++++------
>  src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 52 +++++++++++++++++++++++-------
>  2 files changed, 81 insertions(+), 21 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> index f15bf3e..f8db28a 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> @@ -623,8 +623,32 @@ fs_visitor::nir_emit_find_msb_using_lzd(const fs_builder &bld,
>                                          bool is_signed)
>  {
>     fs_inst *inst;
> +   fs_reg temp = src;
>  
> -   bld.LZD(retype(result, BRW_REGISTER_TYPE_UD), src);
> +   if (is_signed) {
> +      /* LZD of an absolute value source almost always does the right
> +       * thing.  There are two problem values:
> +       *

This comment seems somewhat misleading, there are many more problem
values if you attempt to just take the LZD of the absolute value of the
argument: Take the negative of any power of two e.g. -8 represented as
0xfffffff8.  findMSB(-8) is supposed to give you 2 as result, but '31
- lzd(abs(-8)) == 31 - 28 == 3'.

> +       * * 0x80000000.  Since abs(0x80000000) == 0x80000000, LZD returns
> +       *   0.  However, findMSB(int(0x80000000)) == 30.
> +       *
> +       * * 0xffffffff.  Since abs(0xffffffff) == 1, LZD returns
> +       *   31.  Section 8.8 (Integer Functions) of the GLSL 4.50 spec says:
> +       *
> +       *    For a value of zero or negative one, -1 will be returned.
> +       *
> +       * For all negative number cases, including 0x80000000 and
> +       * 0xffffffff, the correct value is obtained from LZD if instead of
> +       * negating the (already negative) value the logical-not is used.  A
> +       * conditonal logical-not can be achieved in two instructions.
> +       */
> +      temp = vgrf(glsl_type::int_type);
> +
> +      bld.ASR(temp, src, brw_imm_d(31));
> +      bld.XOR(temp, temp, src);
> +   }
> +
> +   bld.LZD(retype(result, BRW_REGISTER_TYPE_UD), temp);
>  
>     /* LZD counts from the MSB side, while GLSL's findMSB() wants the count
>      * from the LSB side. Subtract the result from 31 to convert the MSB
> @@ -1339,17 +1363,23 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
>  
>     case nir_op_ifind_msb: {
>        assert(nir_dest_bit_size(instr->dest.dest) < 64);
> -      bld.FBH(retype(result, BRW_REGISTER_TYPE_UD), op[0]);
>  
> -      /* FBH counts from the MSB side, while GLSL's findMSB() wants the count
> -       * from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then
> -       * subtract the result from 31 to convert the MSB count into an LSB count.
> -       */
> -      bld.CMP(bld.null_reg_d(), result, brw_imm_d(-1), BRW_CONDITIONAL_NZ);
> +      if (devinfo->gen < 7) {
> +         nir_emit_find_msb_using_lzd(bld, result, op[0], true);
> +      } else {
> +         bld.FBH(retype(result, BRW_REGISTER_TYPE_UD), op[0]);
>  
> -      inst = bld.ADD(result, result, brw_imm_d(31));
> -      inst->predicate = BRW_PREDICATE_NORMAL;
> -      inst->src[0].negate = true;
> +         /* FBH counts from the MSB side, while GLSL's findMSB() wants the
> +          * count from the LSB side. If FBH didn't return an error
> +          * (0xFFFFFFFF), then subtract the result from 31 to convert the MSB
> +          * count into an LSB count.
> +          */
> +         bld.CMP(bld.null_reg_d(), result, brw_imm_d(-1), BRW_CONDITIONAL_NZ);
> +
> +         inst = bld.ADD(result, result, brw_imm_d(31));
> +         inst->predicate = BRW_PREDICATE_NORMAL;
> +         inst->src[0].negate = true;
> +      }
>        break;
>     }
>  
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
> index cd88fb6..2fc2cf2 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
> @@ -999,8 +999,32 @@ vec4_visitor::nir_emit_find_msb_using_lzd(const dst_reg &dst,
>                                            bool is_signed)
>  {
>     vec4_instruction *inst;
> +   src_reg temp = src;
>  
> -   emit(BRW_OPCODE_LZD, retype(dst, BRW_REGISTER_TYPE_UD), src);
> +   if (is_signed) {
> +      /* LZD of an absolute value source almost always does the right
> +       * thing.  There are two problem values:
> +       *
> +       * * 0x80000000.  Since abs(0x80000000) == 0x80000000, LZD returns
> +       *   0.  However, findMSB(int(0x80000000)) == 30.
> +       *
> +       * * 0xffffffff.  Since abs(0xffffffff) == 1, LZD returns
> +       *   31.  Section 8.8 (Integer Functions) of the GLSL 4.50 spec says:
> +       *
> +       *    For a value of zero or negative one, -1 will be returned.
> +       *
> +       * For all negative number cases, including 0x80000000 and
> +       * 0xffffffff, the correct value is obtained from LZD if instead of
> +       * negating the (already negative) value the logical-not is used.  A
> +       * conditonal logical-not can be achieved in two instructions.
> +       */
> +      temp = src_reg(this, glsl_type::ivec4_type);
> +
> +      emit(BRW_OPCODE_ASR, dst_reg(temp), src, brw_imm_d(31));
> +      emit(BRW_OPCODE_XOR, dst_reg(temp), temp, src);
> +   }
> +
> +   emit(BRW_OPCODE_LZD, retype(dst, BRW_REGISTER_TYPE_UD), temp);
>  
>     /* LZD counts from the MSB side, while GLSL's findMSB() wants the count
>      * from the LSB side. Subtract the result from 31 to convert the MSB count
> @@ -1484,18 +1508,24 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
>        break;
>  
>     case nir_op_ifind_msb: {
> -      emit(FBH(retype(dst, BRW_REGISTER_TYPE_UD), op[0]));
> -
> -      /* FBH counts from the MSB side, while GLSL's findMSB() wants the count
> -       * from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then
> -       * subtract the result from 31 to convert the MSB count into an LSB count.
> -       */
>        src_reg src(dst);
> -      emit(CMP(dst_null_d(), src, brw_imm_d(-1), BRW_CONDITIONAL_NZ));
>  
> -      inst = emit(ADD(dst, src, brw_imm_d(31)));
> -      inst->predicate = BRW_PREDICATE_NORMAL;
> -      inst->src[0].negate = true;
> +      if (devinfo->gen < 7) {
> +         nir_emit_find_msb_using_lzd(dst, op[0], true);
> +      } else {
> +         emit(FBH(retype(dst, BRW_REGISTER_TYPE_UD), op[0]));
> +
> +         /* FBH counts from the MSB side, while GLSL's findMSB() wants the
> +          * count from the LSB side. If FBH didn't return an error
> +          * (0xFFFFFFFF), then subtract the result from 31 to convert the MSB
> +          * count into an LSB count.
> +          */
> +         emit(CMP(dst_null_d(), src, brw_imm_d(-1), BRW_CONDITIONAL_NZ));
> +
> +         inst = emit(ADD(dst, src, brw_imm_d(31)));
> +         inst->predicate = BRW_PREDICATE_NORMAL;
> +         inst->src[0].negate = true;
> +      }
>        break;
>     }
>  
> -- 
> 2.5.5
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 212 bytes
Desc: not available
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20160630/a69086af/attachment.sig>


More information about the mesa-dev mailing list