[Mesa-dev] [PATCH v2 16/24] i965: Use LZD to implement nir_op_ifind_msb on Gen < 7
Ian Romanick
idr at freedesktop.org
Thu Jul 7 17:16:08 UTC 2016
From: Ian Romanick <ian.d.romanick at intel.com>
v2: Retype LZD source as UD to avoid potential problems with 0x80000000.
Suggested by Matt. Also update comment about problem values with
LZD(abs(x)). Suggested by Curro.
Signed-off-by: Ian Romanick <ian.d.romanick at intel.com>
---
src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 54 ++++++++++++++++++++++------
src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 57 ++++++++++++++++++++++++------
2 files changed, 90 insertions(+), 21 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 65f6406..93d5e9d 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -623,8 +623,36 @@ emit_find_msb_using_lzd(const fs_builder &bld,
bool is_signed)
{
fs_inst *inst;
+ fs_reg temp = src;
- bld.LZD(retype(result, BRW_REGISTER_TYPE_UD), src);
+ if (is_signed) {
+ /* LZD of an absolute value source almost always does the right
+ * thing. There are two problem values:
+ *
+ * * 0x80000000. Since abs(0x80000000) == 0x80000000, LZD returns
+ * 0. However, findMSB(int(0x80000000)) == 30.
+ *
+ * * 0xffffffff. Since abs(0xffffffff) == 1, LZD returns
+ * 31. Section 8.8 (Integer Functions) of the GLSL 4.50 spec says:
+ *
+ * For a value of zero or negative one, -1 will be returned.
+ *
+ * * Negative powers of two. LZD(abs(-(1<<x))) returns x, but
+ * findMSB(-(1<<x)) should return x-1.
+ *
+ * For all negative number cases, including 0x80000000 and
+ * 0xffffffff, the correct value is obtained from LZD if instead of
+ * negating the (already negative) value the logical-not is used. A
+ * conditonal logical-not can be achieved in two instructions.
+ */
+ temp = bld.vgrf(BRW_REGISTER_TYPE_D);
+
+ bld.ASR(temp, src, brw_imm_d(31));
+ bld.XOR(temp, temp, src);
+ }
+
+ bld.LZD(retype(result, BRW_REGISTER_TYPE_UD),
+ retype(temp, BRW_REGISTER_TYPE_UD));
/* LZD counts from the MSB side, while GLSL's findMSB() wants the count
* from the LSB side. Subtract the result from 31 to convert the MSB
@@ -1339,17 +1367,23 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
case nir_op_ifind_msb: {
assert(nir_dest_bit_size(instr->dest.dest) < 64);
- bld.FBH(retype(result, BRW_REGISTER_TYPE_UD), op[0]);
- /* FBH counts from the MSB side, while GLSL's findMSB() wants the count
- * from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then
- * subtract the result from 31 to convert the MSB count into an LSB count.
- */
- bld.CMP(bld.null_reg_d(), result, brw_imm_d(-1), BRW_CONDITIONAL_NZ);
+ if (devinfo->gen < 7) {
+ emit_find_msb_using_lzd(bld, result, op[0], true);
+ } else {
+ bld.FBH(retype(result, BRW_REGISTER_TYPE_UD), op[0]);
- inst = bld.ADD(result, result, brw_imm_d(31));
- inst->predicate = BRW_PREDICATE_NORMAL;
- inst->src[0].negate = true;
+ /* FBH counts from the MSB side, while GLSL's findMSB() wants the
+ * count from the LSB side. If FBH didn't return an error
+ * (0xFFFFFFFF), then subtract the result from 31 to convert the MSB
+ * count into an LSB count.
+ */
+ bld.CMP(bld.null_reg_d(), result, brw_imm_d(-1), BRW_CONDITIONAL_NZ);
+
+ inst = bld.ADD(result, result, brw_imm_d(31));
+ inst->predicate = BRW_PREDICATE_NORMAL;
+ inst->src[0].negate = true;
+ }
break;
}
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
index 352d88a..85fa775 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
@@ -1000,8 +1000,36 @@ emit_find_msb_using_lzd(const vec4_builder &bld,
bool is_signed)
{
vec4_instruction *inst;
+ src_reg temp = src;
- bld.LZD(retype(dst, BRW_REGISTER_TYPE_UD), src);
+ if (is_signed) {
+ /* LZD of an absolute value source almost always does the right
+ * thing. There are two problem values:
+ *
+ * * 0x80000000. Since abs(0x80000000) == 0x80000000, LZD returns
+ * 0. However, findMSB(int(0x80000000)) == 30.
+ *
+ * * 0xffffffff. Since abs(0xffffffff) == 1, LZD returns
+ * 31. Section 8.8 (Integer Functions) of the GLSL 4.50 spec says:
+ *
+ * For a value of zero or negative one, -1 will be returned.
+ *
+ * * Negative powers of two. LZD(abs(-(1<<x))) returns x, but
+ * findMSB(-(1<<x)) should return x-1.
+ *
+ * For all negative number cases, including 0x80000000 and
+ * 0xffffffff, the correct value is obtained from LZD if instead of
+ * negating the (already negative) value the logical-not is used. A
+ * conditonal logical-not can be achieved in two instructions.
+ */
+ temp = src_reg(bld.vgrf(BRW_REGISTER_TYPE_D));
+
+ bld.ASR(dst_reg(temp), src, brw_imm_d(31));
+ bld.XOR(dst_reg(temp), temp, src);
+ }
+
+ bld.LZD(retype(dst, BRW_REGISTER_TYPE_UD),
+ retype(temp, BRW_REGISTER_TYPE_UD));
/* LZD counts from the MSB side, while GLSL's findMSB() wants the count
* from the LSB side. Subtract the result from 31 to convert the MSB count
@@ -1485,18 +1513,25 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
break;
case nir_op_ifind_msb: {
- emit(FBH(retype(dst, BRW_REGISTER_TYPE_UD), op[0]));
-
- /* FBH counts from the MSB side, while GLSL's findMSB() wants the count
- * from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then
- * subtract the result from 31 to convert the MSB count into an LSB count.
- */
+ vec4_builder bld = vec4_builder(this).at_end();
src_reg src(dst);
- emit(CMP(dst_null_d(), src, brw_imm_d(-1), BRW_CONDITIONAL_NZ));
- inst = emit(ADD(dst, src, brw_imm_d(31)));
- inst->predicate = BRW_PREDICATE_NORMAL;
- inst->src[0].negate = true;
+ if (devinfo->gen < 7) {
+ emit_find_msb_using_lzd(bld, dst, op[0], true);
+ } else {
+ emit(FBH(retype(dst, BRW_REGISTER_TYPE_UD), op[0]));
+
+ /* FBH counts from the MSB side, while GLSL's findMSB() wants the
+ * count from the LSB side. If FBH didn't return an error
+ * (0xFFFFFFFF), then subtract the result from 31 to convert the MSB
+ * count into an LSB count.
+ */
+ bld.CMP(dst_null_d(), src, brw_imm_d(-1), BRW_CONDITIONAL_NZ);
+
+ inst = bld.ADD(dst, src, brw_imm_d(31));
+ inst->predicate = BRW_PREDICATE_NORMAL;
+ inst->src[0].negate = true;
+ }
break;
}
--
2.5.5
More information about the mesa-dev
mailing list