[Mesa-dev] [PATCH 15/24] i965: Use LZD to implement nir_op_ufind_msb

Ian Romanick idr at freedesktop.org
Wed Jun 29 21:04:22 UTC 2016


From: Ian Romanick <ian.d.romanick at intel.com>

This uses one less instruction.

Signed-off-by: Ian Romanick <ian.d.romanick at intel.com>
---
 src/mesa/drivers/dri/i965/brw_fs.h               |  4 ++++
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp   |  3 +++
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp         | 26 +++++++++++++++++++++++-
 src/mesa/drivers/dri/i965/brw_vec4.h             |  4 ++++
 src/mesa/drivers/dri/i965/brw_vec4_generator.cpp |  3 +++
 src/mesa/drivers/dri/i965/brw_vec4_nir.cpp       | 22 ++++++++++++++++++++
 6 files changed, 61 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 4237197..22ce092 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -237,6 +237,10 @@ public:
                          nir_tex_instr *instr);
    void nir_emit_jump(const brw::fs_builder &bld,
                       nir_jump_instr *instr);
+   void nir_emit_find_msb_using_lzd(const brw::fs_builder &bld,
+                                    const fs_reg &result,
+                                    const fs_reg &src,
+                                    bool is_signed);
    fs_reg get_nir_src(const nir_src &src);
    fs_reg get_nir_src_imm(const nir_src &src);
    fs_reg get_nir_dest(const nir_dest &dest);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index d25d26a..bda4a26 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -1761,6 +1761,9 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
          /* FBL only supports UD type for dst. */
          brw_FBL(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
          break;
+      case BRW_OPCODE_LZD:
+         brw_LZD(p, dst, src[0]);
+         break;
       case BRW_OPCODE_CBIT:
          assert(devinfo->gen >= 7);
          /* CBIT only supports UD type for dst. */
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index b3f5dfd..f15bf3e 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -617,6 +617,25 @@ fs_visitor::optimize_frontfacing_ternary(nir_alu_instr *instr,
 }
 
 void
+fs_visitor::nir_emit_find_msb_using_lzd(const fs_builder &bld,
+                                        const fs_reg &result,
+                                        const fs_reg &src,
+                                        bool is_signed)
+{
+   fs_inst *inst;
+
+   bld.LZD(retype(result, BRW_REGISTER_TYPE_UD), src);
+
+   /* LZD counts from the MSB side, while GLSL's findMSB() wants the count
+    * from the LSB side. Subtract the result from 31 to convert the MSB
+    * count into an LSB count.  If no bits are set, LZD will return 32.
+    * 31-32 = -1, which is exactly what findMSB() is supposed to return.
+    */
+   inst = bld.ADD(result, retype(result, BRW_REGISTER_TYPE_D), brw_imm_d(31));
+   inst->src[0].negate = true;
+}
+
+void
 fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
 {
    struct brw_wm_prog_key *fs_key = (struct brw_wm_prog_key *) this->key;
@@ -1312,7 +1331,12 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
       bld.CBIT(result, op[0]);
       break;
 
-   case nir_op_ufind_msb:
+   case nir_op_ufind_msb: {
+      assert(nir_dest_bit_size(instr->dest.dest) < 64);
+      nir_emit_find_msb_using_lzd(bld, result, op[0], false);
+      break;
+   }
+
    case nir_op_ifind_msb: {
       assert(nir_dest_bit_size(instr->dest.dest) < 64);
       bld.FBH(retype(result, BRW_REGISTER_TYPE_UD), op[0]);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index 76dea04..4be6833 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -326,6 +326,10 @@ public:
    virtual void nir_emit_undef(nir_ssa_undef_instr *instr);
    virtual void nir_emit_ssbo_atomic(int op, nir_intrinsic_instr *instr);
 
+   void nir_emit_find_msb_using_lzd(const dst_reg &dst,
+                                    const src_reg &src,
+                                    bool is_signed);
+
    dst_reg get_nir_dest(const nir_dest &dest, enum brw_reg_type type);
    dst_reg get_nir_dest(const nir_dest &dest, nir_alu_type type);
    dst_reg get_nir_dest(const nir_dest &dest);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
index bb0254e..193e748 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
@@ -1637,6 +1637,9 @@ generate_code(struct brw_codegen *p,
          /* FBL only supports UD type for dst. */
          brw_FBL(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
          break;
+      case BRW_OPCODE_LZD:
+         brw_LZD(p, dst, src[0]);
+         break;
       case BRW_OPCODE_CBIT:
          assert(devinfo->gen >= 7);
          /* CBIT only supports UD type for dst. */
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
index f3b4528..cd88fb6 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
@@ -994,6 +994,25 @@ vec4_visitor::optimize_predicate(nir_alu_instr *instr,
 }
 
 void
+vec4_visitor::nir_emit_find_msb_using_lzd(const dst_reg &dst,
+                                          const src_reg &src,
+                                          bool is_signed)
+{
+   vec4_instruction *inst;
+
+   emit(BRW_OPCODE_LZD, retype(dst, BRW_REGISTER_TYPE_UD), src);
+
+   /* LZD counts from the MSB side, while GLSL's findMSB() wants the count
+    * from the LSB side. Subtract the result from 31 to convert the MSB count
+    * into an LSB count.  If no bits are set, LZD will return 32.  31-32 = -1,
+    * which is exactly what findMSB() is supposed to return.
+    */
+   inst = emit(ADD(dst, retype(src_reg(dst), BRW_REGISTER_TYPE_D),
+                   brw_imm_d(31)));
+   inst->src[0].negate = true;
+}
+
+void
 vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
 {
    vec4_instruction *inst;
@@ -1461,6 +1480,9 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
       break;
 
    case nir_op_ufind_msb:
+      nir_emit_find_msb_using_lzd(dst, op[0], false);
+      break;
+
    case nir_op_ifind_msb: {
       emit(FBH(retype(dst, BRW_REGISTER_TYPE_UD), op[0]));
 
-- 
2.5.5



More information about the mesa-dev mailing list