[Mesa-dev] [PATCH 08/16] glsl: Add BFE-to-bitops lowering pass.

Matt Turner mattst88 at gmail.com
Mon Apr 22 17:08:24 PDT 2013


---
 src/glsl/ir_optimization.h      |    1 +
 src/glsl/lower_instructions.cpp |   82 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 83 insertions(+), 0 deletions(-)

diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
index 445dc49..c5405e5 100644
--- a/src/glsl/ir_optimization.h
+++ b/src/glsl/ir_optimization.h
@@ -39,6 +39,7 @@
 #define LRP_TO_ARITH       0x80
 #define BITFIELD_INSERT_TO_BFM_BFI 0x100
 #define BITFIELD_INSERT_TO_BFM_BITOPS 0x200
+#define BFE_TO_BITOPS      0x400
 
 /**
  * \see class lower_packing_builtins_visitor
diff --git a/src/glsl/lower_instructions.cpp b/src/glsl/lower_instructions.cpp
index 1c1cad8..d49c419 100644
--- a/src/glsl/lower_instructions.cpp
+++ b/src/glsl/lower_instructions.cpp
@@ -40,6 +40,7 @@
  * - LRP_TO_ARITH
  * - BITFIELD_INSERT_TO_BFM_BFI
  * - BITFIELD_INSERT_TO_BFM_BITOPS
+ * - BFE_TO_BITOPS
  *
  * SUB_TO_ADD_NEG:
  * ---------------
@@ -106,6 +107,10 @@
  *
  * It's difficult to use vector three-source instructions in i965's vertex
  * shader, so don't emit ir_triop_bfi, but rather bit operations.
+ *
+ * BFE_TO_BITOPS:
+ * --------------
+ * Breaks ir_triop_bitfield_extract into bit operations (and, or, shift).
  */
 
 #include "main/core.h" /* for M_LOG2E */
@@ -138,6 +143,7 @@ private:
    void lrp_to_arith(ir_expression *);
    void bitfield_insert_to_bfm_bfi(ir_expression *);
    void bitfield_insert_to_bfm_bitops(ir_expression *ir);
+   void bfe_to_bitops(ir_expression *);
 };
 
 /**
@@ -394,6 +400,77 @@ void lower_instructions_visitor::bitfield_insert_to_bfm_bitops(ir_expression *ir
    ir->operands[3] = NULL;
 }
 
+void
+lower_instructions_visitor::bfe_to_bitops(ir_expression *ir)
+{
+   /* Translates
+    *    (bfe value offset bits)
+    * into
+    *    (asr (shl value (- (- 32 bits) offset)) (- 32 bits))
+    *
+    * which more simply is
+    *    value <<= 32 - bits - offset;
+    *    value >>= 32 - bits; // >>= is ASR.
+    *
+    * but some hardware (like i965) can only shift by 0-31, so the corner case
+    * of bits == 0 leads to shifting by 0 instead of 32. Instead, do
+    *    if (bits == 0)
+    *       result = 0;
+    *    else {
+    *       value <<= 32 - bits - offset;
+    *       value >>= 32 - bits; // >>= is ASR.
+    *       result = value;
+    *    }
+    */
+
+   /* TODO:
+    *    - Allow hardware that can shift by 32 to avoid the branch.
+    *    - Allow skipping the lowering pass if type of <value> is scalar.
+    *    - Emit (value >> offset) & (1 << bits) - 1) for unsigned <value>s.
+    */
+
+   ir_variable *value = new(ir) ir_variable(ir->operands[0]->type, "bfe_value",
+                                            ir_var_temporary);
+   this->base_ir->insert_before(value);
+   this->base_ir->insert_before(assign(value, ir->operands[0]));
+
+   ir_variable *offset = new(ir) ir_variable(ir->operands[1]->type, "bfe_offset",
+                                             ir_var_temporary);
+   this->base_ir->insert_before(offset);
+   this->base_ir->insert_before(assign(offset, ir->operands[1]));
+
+   ir_variable *bits = new(ir) ir_variable(ir->operands[2]->type, "bfe_bits",
+                                           ir_var_temporary);
+   this->base_ir->insert_before(bits);
+   this->base_ir->insert_before(assign(bits, ir->operands[2]));
+
+   ir_constant *immed_32 = new(ir) ir_constant(32);
+   ir_variable *width_minus_bits = new(ir) ir_variable(glsl_type::int_type,
+                                                       "width_minus_bits",
+                                                       ir_var_temporary);
+   this->base_ir->insert_before(width_minus_bits);
+   this->base_ir->insert_before(assign(width_minus_bits, sub(immed_32, bits)));
+
+   ir_variable *result = new(ir) ir_variable(ir->type, "bfe_result",
+                                             ir_var_temporary);
+   this->base_ir->insert_before(result);
+
+   ir_if *bits_zero_if = if_tree(equal(bits, new(ir) ir_constant(0)),
+                                 assign(result, new(ir) ir_constant(0)),
+                                 assign(result,
+                                        rshift(lshift(value, swizzle_xxxx(sub(width_minus_bits, offset))),
+                                               swizzle_xxxx(width_minus_bits))));
+   this->base_ir->insert_before(bits_zero_if);
+
+   /* XXX: Seems like there should be a better way of doing this. */
+   ir->operation = ir_binop_add;
+   ir->operands[0] = new(ir) ir_dereference_variable(result);
+   ir->operands[1] = new(ir) ir_constant(0);
+   ir->operands[2] = NULL;
+
+   this->progress = true;
+}
+
 ir_visitor_status
 lower_instructions_visitor::visit_leave(ir_expression *ir)
 {
@@ -442,6 +519,11 @@ lower_instructions_visitor::visit_leave(ir_expression *ir)
          bitfield_insert_to_bfm_bitops(ir);
       break;
 
+   case ir_triop_bitfield_extract:
+      if (lowering(BFE_TO_BITOPS))
+         bfe_to_bitops(ir);
+      break;
+
    default:
       return visit_continue;
    }
-- 
1.7.8.6



More information about the mesa-dev mailing list