[Mesa-dev] [PATCH v2 15/24] i965: Use LZD to implement nir_op_ufind_msb
Ian Romanick
idr at freedesktop.org
Thu Jul 7 17:15:15 UTC 2016
From: Ian Romanick <ian.d.romanick at intel.com>
This uses one less instruction.
v2: Move emit_find_msb_using_lzd out of the visitor classes. Suggested
by Curro.
Signed-off-by: Ian Romanick <ian.d.romanick at intel.com>
---
src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 3 +++
src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 26 +++++++++++++++++++++++-
src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 3 +++
src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 23 +++++++++++++++++++++
4 files changed, 54 insertions(+), 1 deletion(-)
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index d25d26a..bda4a26 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -1761,6 +1761,9 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
/* FBL only supports UD type for dst. */
brw_FBL(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
break;
+ case BRW_OPCODE_LZD:
+ brw_LZD(p, dst, src[0]);
+ break;
case BRW_OPCODE_CBIT:
assert(devinfo->gen >= 7);
/* CBIT only supports UD type for dst. */
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 04ed42e..65f6406 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -616,6 +616,25 @@ fs_visitor::optimize_frontfacing_ternary(nir_alu_instr *instr,
return true;
}
+static void
+emit_find_msb_using_lzd(const fs_builder &bld,
+ const fs_reg &result,
+ const fs_reg &src,
+ bool is_signed)
+{
+ fs_inst *inst;
+
+ bld.LZD(retype(result, BRW_REGISTER_TYPE_UD), src);
+
+ /* LZD counts from the MSB side, while GLSL's findMSB() wants the count
+ * from the LSB side. Subtract the result from 31 to convert the MSB
+ * count into an LSB count. If no bits are set, LZD will return 32.
+ * 31-32 = -1, which is exactly what findMSB() is supposed to return.
+ */
+ inst = bld.ADD(result, retype(result, BRW_REGISTER_TYPE_D), brw_imm_d(31));
+ inst->src[0].negate = true;
+}
+
void
fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
{
@@ -1312,7 +1331,12 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
bld.CBIT(result, op[0]);
break;
- case nir_op_ufind_msb:
+ case nir_op_ufind_msb: {
+ assert(nir_dest_bit_size(instr->dest.dest) < 64);
+ emit_find_msb_using_lzd(bld, result, op[0], false);
+ break;
+ }
+
case nir_op_ifind_msb: {
assert(nir_dest_bit_size(instr->dest.dest) < 64);
bld.FBH(retype(result, BRW_REGISTER_TYPE_UD), op[0]);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
index bb0254e..193e748 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
@@ -1637,6 +1637,9 @@ generate_code(struct brw_codegen *p,
/* FBL only supports UD type for dst. */
brw_FBL(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
break;
+ case BRW_OPCODE_LZD:
+ brw_LZD(p, dst, src[0]);
+ break;
case BRW_OPCODE_CBIT:
assert(devinfo->gen >= 7);
/* CBIT only supports UD type for dst. */
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
index f3b4528..352d88a 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
@@ -993,6 +993,26 @@ vec4_visitor::optimize_predicate(nir_alu_instr *instr,
return true;
}
+static void
+emit_find_msb_using_lzd(const vec4_builder &bld,
+ const dst_reg &dst,
+ const src_reg &src,
+ bool is_signed)
+{
+ vec4_instruction *inst;
+
+ bld.LZD(retype(dst, BRW_REGISTER_TYPE_UD), src);
+
+ /* LZD counts from the MSB side, while GLSL's findMSB() wants the count
+ * from the LSB side. Subtract the result from 31 to convert the MSB count
+ * into an LSB count. If no bits are set, LZD will return 32. 31-32 = -1,
+ * which is exactly what findMSB() is supposed to return.
+ */
+ inst = bld.ADD(dst, retype(src_reg(dst), BRW_REGISTER_TYPE_D),
+ brw_imm_d(31));
+ inst->src[0].negate = true;
+}
+
void
vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
{
@@ -1461,6 +1481,9 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
break;
case nir_op_ufind_msb:
+ emit_find_msb_using_lzd(vec4_builder(this).at_end(), dst, op[0], false);
+ break;
+
case nir_op_ifind_msb: {
emit(FBH(retype(dst, BRW_REGISTER_TYPE_UD), op[0]));
--
2.5.5
More information about the mesa-dev
mailing list