[Mesa-dev] [PATCH 12/14] i965/fs: Add support for bit instructions.

Matt Turner mattst88 at gmail.com
Thu May 2 13:43:58 PDT 2013


Don't bother scalarizing ir_binop_bfm, since its results are
identical for all channels.

v2: Subtract result of FBH from 31 (unless an error) to convert
    MSB counts to LSB counts.
v3: Use op0->clone() in ir_triop_bfi to prevent (var_ref
    channel_expressions) from appearing multiple times in the IR.

Reviewed-by: Chris Forbes <chrisf at ijw.co.nz> [v2]
---
 src/mesa/drivers/dri/i965/brw_fs.cpp               |  7 ++++
 src/mesa/drivers/dri/i965/brw_fs.h                 |  7 ++++
 .../dri/i965/brw_fs_channel_expressions.cpp        | 37 +++++++++++++++++
 src/mesa/drivers/dri/i965/brw_fs_emit.cpp          | 48 ++++++++++++++++++++++
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp       | 43 +++++++++++++++++++
 5 files changed, 142 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 66e78d0..cac898d 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -173,6 +173,13 @@ ALU2(SHL)
 ALU2(SHR)
 ALU2(ASR)
 ALU3(LRP)
+ALU1(BFREV)
+ALU3(BFE)
+ALU2(BFI1)
+ALU3(BFI2)
+ALU1(FBH)
+ALU1(FBL)
+ALU1(CBIT)
 
 /** Gen4 predicated IF. */
 fs_inst *
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 86a9ec5..fe7eddc 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -288,6 +288,13 @@ public:
                 uint32_t condition);
    fs_inst *LRP(fs_reg dst, fs_reg a, fs_reg y, fs_reg x);
    fs_inst *DEP_RESOLVE_MOV(int grf);
+   fs_inst *BFREV(fs_reg dst, fs_reg value);
+   fs_inst *BFE(fs_reg dst, fs_reg bits, fs_reg offset, fs_reg value);
+   fs_inst *BFI1(fs_reg dst, fs_reg bits, fs_reg offset);
+   fs_inst *BFI2(fs_reg dst, fs_reg bfi1_dst, fs_reg insert, fs_reg base);
+   fs_inst *FBH(fs_reg dst, fs_reg value);
+   fs_inst *FBL(fs_reg dst, fs_reg value);
+   fs_inst *CBIT(fs_reg dst, fs_reg value);
 
    int type_size(const struct glsl_type *type);
    fs_inst *get_instruction_generating_reg(fs_inst *start,
diff --git a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
index 30d8d9b..0f3d4ab 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
@@ -216,6 +216,10 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir)
    case ir_unop_cos_reduced:
    case ir_unop_dFdx:
    case ir_unop_dFdy:
+   case ir_unop_bitfield_reverse:
+   case ir_unop_bit_count:
+   case ir_unop_find_msb:
+   case ir_unop_find_lsb:
       for (i = 0; i < vector_elements; i++) {
 	 ir_rvalue *op0 = get_element(op_var[0], i);
 
@@ -338,11 +342,26 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir)
       assert(!"noise should have been broken down to function call");
       break;
 
+   case ir_binop_bfm: {
+      /* Does not need to be scalarized, since its result will be identical
+       * for all channels.
+       */
+      ir_rvalue *op0 = get_element(op_var[0], 0);
+      ir_rvalue *op1 = get_element(op_var[1], 0);
+
+      assign(ir, 0, new(mem_ctx) ir_expression(expr->operation,
+                                               element_type,
+                                               op0,
+                                               op1));
+      break;
+   }
+
    case ir_binop_ubo_load:
       assert(!"not yet supported");
       break;
 
    case ir_triop_lrp:
+   case ir_triop_bitfield_extract:
       for (i = 0; i < vector_elements; i++) {
 	 ir_rvalue *op0 = get_element(op_var[0], i);
 	 ir_rvalue *op1 = get_element(op_var[1], i);
@@ -356,6 +375,23 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir)
       }
       break;
 
+   case ir_triop_bfi: {
+      /* Only a single BFM is needed for multiple BFIs. */
+      ir_rvalue *op0 = get_element(op_var[0], 0);
+
+      for (i = 0; i < vector_elements; i++) {
+         ir_rvalue *op1 = get_element(op_var[1], i);
+         ir_rvalue *op2 = get_element(op_var[2], i);
+
+         assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
+                                                  element_type,
+                                                  op0->clone(mem_ctx, NULL),
+                                                  op1,
+                                                  op2));
+      }
+      break;
+   }
+
    case ir_unop_pack_snorm_2x16:
    case ir_unop_pack_snorm_4x8:
    case ir_unop_pack_unorm_2x16:
@@ -366,6 +402,7 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir)
    case ir_unop_unpack_unorm_2x16:
    case ir_unop_unpack_unorm_4x8:
    case ir_unop_unpack_half_2x16:
+   case ir_quadop_bitfield_insert:
    case ir_quadop_vector:
       assert(!"should have been lowered");
       break;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
index 0f6b715..b7c85ef 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
@@ -1209,6 +1209,54 @@ fs_generator::generate_code(exec_list *instructions)
       case BRW_OPCODE_SEL:
 	 brw_SEL(p, dst, src[0], src[1]);
 	 break;
+      case BRW_OPCODE_BFREV:
+         /* BFREV only supports UD type for src and dst. */
+         brw_BFREV(p, retype(dst, BRW_REGISTER_TYPE_UD),
+                      retype(src[0], BRW_REGISTER_TYPE_UD));
+         break;
+      case BRW_OPCODE_FBH:
+         /* FBH only supports UD type for dst. */
+         brw_FBH(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
+         break;
+      case BRW_OPCODE_FBL:
+         /* FBL only supports UD type for dst. */
+         brw_FBL(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
+         break;
+      case BRW_OPCODE_CBIT:
+         /* CBIT only supports UD type for dst. */
+         brw_CBIT(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
+         break;
+
+      case BRW_OPCODE_BFE:
+         brw_set_access_mode(p, BRW_ALIGN_16);
+         if (dispatch_width == 16) {
+            brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+            brw_BFE(p, dst, src[0], src[1], src[2]);
+            brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+            brw_BFE(p, sechalf(dst), sechalf(src[0]), sechalf(src[1]), sechalf(src[2]));
+            brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+         } else {
+            brw_BFE(p, dst, src[0], src[1], src[2]);
+         }
+         brw_set_access_mode(p, BRW_ALIGN_1);
+         break;
+
+      case BRW_OPCODE_BFI1:
+         brw_BFI1(p, dst, src[0], src[1]);
+         break;
+      case BRW_OPCODE_BFI2:
+         brw_set_access_mode(p, BRW_ALIGN_16);
+         if (dispatch_width == 16) {
+            brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+            brw_BFI2(p, dst, src[0], src[1], src[2]);
+            brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+            brw_BFI2(p, sechalf(dst), sechalf(src[0]), sechalf(src[1]), sechalf(src[2]));
+            brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+         } else {
+            brw_BFI2(p, dst, src[0], src[1], src[2]);
+         }
+         brw_set_access_mode(p, BRW_ALIGN_1);
+         break;
 
       case BRW_OPCODE_IF:
 	 if (inst->src[0].file != BAD_FILE) {
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index f1539d5..417e8a8 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -584,6 +584,49 @@ fs_visitor::visit(ir_expression *ir)
       emit_math(SHADER_OPCODE_POW, this->result, op[0], op[1]);
       break;
 
+   case ir_unop_bitfield_reverse:
+      emit(BFREV(this->result, op[0]));
+      break;
+   case ir_unop_bit_count:
+      emit(CBIT(this->result, op[0]));
+      break;
+   case ir_unop_find_msb:
+      temp = fs_reg(this, glsl_type::uint_type);
+      emit(FBH(temp, op[0]));
+
+      /* FBH counts from the MSB side, while GLSL's findMSB() wants the count
+       * from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then
+       * subtract the result from 31 to convert the MSB count into an LSB count.
+       */
+
+      /* FBH only supports UD type for dst, so use a MOV to convert UD to D. */
+      emit(MOV(this->result, temp));
+      emit(CMP(reg_null_d, this->result, fs_reg(-1), BRW_CONDITIONAL_NZ));
+
+      temp.negate = true;
+      inst = emit(ADD(this->result, temp, fs_reg(31)));
+      inst->predicate = BRW_PREDICATE_NORMAL;
+      break;
+   case ir_unop_find_lsb:
+      emit(FBL(this->result, op[0]));
+      break;
+   case ir_triop_bitfield_extract:
+      /* Note that the instruction's argument order is reversed from GLSL
+       * and the IR.
+       */
+      emit(BFE(this->result, op[2], op[1], op[0]));
+      break;
+   case ir_binop_bfm:
+      emit(BFI1(this->result, op[0], op[1]));
+      break;
+   case ir_triop_bfi:
+      emit(BFI2(this->result, op[0], op[1], op[2]));
+      break;
+   case ir_quadop_bitfield_insert:
+      assert(!"not reached: should be handled by "
+              "lower_instructions::bitfield_insert_to_bfm_bfi");
+      break;
+
    case ir_unop_bit_not:
       emit(NOT(this->result, op[0]));
       break;
-- 
1.8.1.5



More information about the mesa-dev mailing list