[Mesa-dev] [PATCH 06/16] glsl: Add a pass to lower bitfield-insert into bfm+bfi.

Mon Apr 22 17:08:22 PDT 2013

i965/Gen7+ and Radeon/Evergreen+ have bfm/bfi instructions to implement
bitfieldInsert() from ARB_gpu_shader5.
---
 src/glsl/ir.cpp                 |    2 +
 src/glsl/ir.h                   |   18 ++++++++++++++++
 src/glsl/ir_optimization.h      |    1 +
 src/glsl/ir_validate.cpp        |   12 +++++++++++
 src/glsl/lower_instructions.cpp |   42 +++++++++++++++++++++++++++++++++++++++
 src/mesa/program/ir_to_mesa.cpp |    2 +
 6 files changed, 77 insertions(+), 0 deletions(-)

diff --git a/src/glsl/ir.cpp b/src/glsl/ir.cpp
index 2c989c9..2c54525 100644
--- a/src/glsl/ir.cpp
+++ b/src/glsl/ir.cpp
@@ -508,8 +508,10 @@ static const char *const operator_strs[] = {
    "max",
    "pow",
    "packHalf2x16_split",
+   "bfm",
    "ubo_load",
    "lrp",
+   "bfi",
    "bitfield_extract",
    "bitfield_insert",
    "vector",
diff --git a/src/glsl/ir.h b/src/glsl/ir.h
index 277b815..f23dc19 100644
--- a/src/glsl/ir.h
+++ b/src/glsl/ir.h
@@ -1118,6 +1118,15 @@ enum ir_expression_operation {
    /*@}*/
 
    /**
+    * \name First half of a lowered bitfieldInsert() operation.
+    *
+    * \see lower_instructions::bitfield_insert_to_bfm_bfi
+    */
+   /*@{*/
+   ir_binop_bfm,
+   /*@}*/
+
+   /**
     * Load a value the size of a given GLSL type from a uniform block.
     *
     * operand0 is the ir_constant uniform block index in the linked shader.
@@ -1132,6 +1141,15 @@ enum ir_expression_operation {
 
    ir_triop_lrp,
 
+   /**
+    * \name Second half of a lowered bitfieldInsert() operation.
+    *
+    * \see lower_instructions::bitfield_insert_to_bfm_bfi
+    */
+   /*@{*/
+   ir_triop_bfi,
+   /*@}*/
+
    ir_triop_bitfield_extract,
 
    /**
diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
index a8885d7..49b1475 100644
--- a/src/glsl/ir_optimization.h
+++ b/src/glsl/ir_optimization.h
@@ -37,6 +37,7 @@
 #define MOD_TO_FRACT       0x20
 #define INT_DIV_TO_MUL_RCP 0x40
 #define LRP_TO_ARITH       0x80
+#define BITFIELD_INSERT_TO_BFM_BFI 0x100
 
 /**
  * \see class lower_packing_builtins_visitor
diff --git a/src/glsl/ir_validate.cpp b/src/glsl/ir_validate.cpp
index 4a8df69..26f09c7 100644
--- a/src/glsl/ir_validate.cpp
+++ b/src/glsl/ir_validate.cpp
@@ -474,6 +474,12 @@ ir_validate::visit_leave(ir_expression *ir)
       assert(ir->operands[1]->type == glsl_type::float_type);
       break;
 
+   case ir_binop_bfm:
+      assert(ir->type->is_integer());
+      assert(ir->operands[0]->type->is_integer());
+      assert(ir->operands[1]->type->is_integer());
+      break;
+
    case ir_binop_ubo_load:
       assert(ir->operands[0]->as_constant());
       assert(ir->operands[0]->type == glsl_type::uint_type);
@@ -487,6 +493,12 @@ ir_validate::visit_leave(ir_expression *ir)
       assert(ir->operands[2]->type == ir->operands[0]->type || ir->operands[2]->type == glsl_type::float_type);
       break;
 
+   case ir_triop_bfi:
+      assert(ir->operands[0]->type->is_integer());
+      assert(ir->operands[1]->type == ir->operands[2]->type);
+      assert(ir->operands[1]->type == ir->type);
+      break;
+
    case ir_triop_bitfield_extract:
       assert(ir->operands[0]->type == ir->type);
       assert(ir->operands[1]->type == glsl_type::int_type);
diff --git a/src/glsl/lower_instructions.cpp b/src/glsl/lower_instructions.cpp
index 1ce7b7c..ff9715d 100644
--- a/src/glsl/lower_instructions.cpp
+++ b/src/glsl/lower_instructions.cpp
@@ -38,6 +38,7 @@
  * - LOG_TO_LOG2
  * - MOD_TO_FRACT
  * - LRP_TO_ARITH
+ * - BITFIELD_INSERT_TO_BFM_BFI
  *
  * SUB_TO_ADD_NEG:
  * ---------------
@@ -84,6 +85,15 @@
  * LRP_TO_ARITH:
  * -------------
  * Converts ir_triop_lrp to (op0 * (1.0f - op2)) + (op1 * op2).
+ *
+ * BITFIELD_INSERT_TO_BFM_BFI:
+ * ---------------------------
+ * Breaks ir_quadop_bitfield_insert into ir_binop_bfm (bitfield mask) and
+ * ir_triop_bfi (bitfield insert).
+ *
+ * Many GPUs implement the bitfieldInsert() built-in from ARB_gpu_shader_5
+ * with a pair of instructions.
+ *
  */
 
 #include "main/core.h" /* for M_LOG2E */
@@ -114,6 +124,7 @@ private:
    void pow_to_exp2(ir_expression *);
    void log_to_log2(ir_expression *);
    void lrp_to_arith(ir_expression *);
+   void bitfield_insert_to_bfm_bfi(ir_expression *);
 };
 
 /**
@@ -298,6 +309,32 @@ lower_instructions_visitor::lrp_to_arith(ir_expression *ir)
    this->progress = true;
 }
 
+void
+lower_instructions_visitor::bitfield_insert_to_bfm_bfi(ir_expression *ir)
+{
+   /* Translates
+    *    ir_quadop_bitfield_insert base insert offset bits
+    * into
+    *    ir_triop_bfi (ir_binop_bfm bits offset) insert base
+    */
+
+   /* Save op0 */
+   ir_variable *temp = new(ir) ir_variable(ir->operands[0]->type, "bfi_base",
+                                           ir_var_temporary);
+   this->base_ir->insert_before(temp);
+   this->base_ir->insert_before(assign(temp, ir->operands[0]));
+
+   ir->operation = ir_triop_bfi;
+   ir->operands[0] = new(ir) ir_expression(ir_binop_bfm, ir->type,
+                                           swizzle_xxxx(ir->operands[3]),
+                                           swizzle_xxxx(ir->operands[2]));
+   /* ir->operands[1] is still the value to insert. */
+   ir->operands[2] = new(ir) ir_dereference_variable(temp);
+   ir->operands[3] = NULL;
+
+   this->progress = true;
+}
+
 ir_visitor_status
 lower_instructions_visitor::visit_leave(ir_expression *ir)
 {
@@ -339,6 +376,11 @@ lower_instructions_visitor::visit_leave(ir_expression *ir)
 	 lrp_to_arith(ir);
       break;
 
+   case ir_quadop_bitfield_insert:
+      if (lowering(BITFIELD_INSERT_TO_BFM_BFI))
+         bitfield_insert_to_bfm_bfi(ir);
+      break;
+
    default:
       return visit_continue;
    }
diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index c6f6bf4..0848462 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -1489,6 +1489,8 @@ ir_to_mesa_visitor::visit(ir_expression *ir)
       emit(ir, OPCODE_LRP, result_dst, op[2], op[1], op[0]);
       break;
 
+   case ir_binop_bfm:
+   case ir_triop_bfi:
    case ir_triop_bitfield_extract:
    case ir_quadop_bitfield_insert:
       assert(!"not supported");
-- 
1.7.8.6