[Mesa-dev] [PATCH 12/14] i965/fs: Add support for bit instructions.
Matt Turner
mattst88 at gmail.com
Thu May 2 13:43:58 PDT 2013
Don't bother scalarizing ir_binop_bfm, since its results are
identical for all channels.
v2: Subtract result of FBH from 31 (unless an error) to convert
MSB counts to LSB counts.
v3: Use op0->clone() in ir_triop_bfi to prevent (var_ref
channel_expressions) from appearing multiple times in the IR.
Reviewed-by: Chris Forbes <chrisf at ijw.co.nz> [v2]
---
src/mesa/drivers/dri/i965/brw_fs.cpp | 7 ++++
src/mesa/drivers/dri/i965/brw_fs.h | 7 ++++
.../dri/i965/brw_fs_channel_expressions.cpp | 37 +++++++++++++++++
src/mesa/drivers/dri/i965/brw_fs_emit.cpp | 48 ++++++++++++++++++++++
src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 43 +++++++++++++++++++
5 files changed, 142 insertions(+)
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 66e78d0..cac898d 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -173,6 +173,13 @@ ALU2(SHL)
ALU2(SHR)
ALU2(ASR)
ALU3(LRP)
+ALU1(BFREV)
+ALU3(BFE)
+ALU2(BFI1)
+ALU3(BFI2)
+ALU1(FBH)
+ALU1(FBL)
+ALU1(CBIT)
/** Gen4 predicated IF. */
fs_inst *
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 86a9ec5..fe7eddc 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -288,6 +288,13 @@ public:
uint32_t condition);
fs_inst *LRP(fs_reg dst, fs_reg a, fs_reg y, fs_reg x);
fs_inst *DEP_RESOLVE_MOV(int grf);
+ fs_inst *BFREV(fs_reg dst, fs_reg value);
+ fs_inst *BFE(fs_reg dst, fs_reg bits, fs_reg offset, fs_reg value);
+ fs_inst *BFI1(fs_reg dst, fs_reg bits, fs_reg offset);
+ fs_inst *BFI2(fs_reg dst, fs_reg bfi1_dst, fs_reg insert, fs_reg base);
+ fs_inst *FBH(fs_reg dst, fs_reg value);
+ fs_inst *FBL(fs_reg dst, fs_reg value);
+ fs_inst *CBIT(fs_reg dst, fs_reg value);
int type_size(const struct glsl_type *type);
fs_inst *get_instruction_generating_reg(fs_inst *start,
diff --git a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
index 30d8d9b..0f3d4ab 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
@@ -216,6 +216,10 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir)
case ir_unop_cos_reduced:
case ir_unop_dFdx:
case ir_unop_dFdy:
+ case ir_unop_bitfield_reverse:
+ case ir_unop_bit_count:
+ case ir_unop_find_msb:
+ case ir_unop_find_lsb:
for (i = 0; i < vector_elements; i++) {
ir_rvalue *op0 = get_element(op_var[0], i);
@@ -338,11 +342,26 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir)
assert(!"noise should have been broken down to function call");
break;
+ case ir_binop_bfm: {
+ /* Does not need to be scalarized, since its result will be identical
+ * for all channels.
+ */
+ ir_rvalue *op0 = get_element(op_var[0], 0);
+ ir_rvalue *op1 = get_element(op_var[1], 0);
+
+ assign(ir, 0, new(mem_ctx) ir_expression(expr->operation,
+ element_type,
+ op0,
+ op1));
+ break;
+ }
+
case ir_binop_ubo_load:
assert(!"not yet supported");
break;
case ir_triop_lrp:
+ case ir_triop_bitfield_extract:
for (i = 0; i < vector_elements; i++) {
ir_rvalue *op0 = get_element(op_var[0], i);
ir_rvalue *op1 = get_element(op_var[1], i);
@@ -356,6 +375,23 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir)
}
break;
+ case ir_triop_bfi: {
+ /* Only a single BFM is needed for multiple BFIs. */
+ ir_rvalue *op0 = get_element(op_var[0], 0);
+
+ for (i = 0; i < vector_elements; i++) {
+ ir_rvalue *op1 = get_element(op_var[1], i);
+ ir_rvalue *op2 = get_element(op_var[2], i);
+
+ assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
+ element_type,
+ op0->clone(mem_ctx, NULL),
+ op1,
+ op2));
+ }
+ break;
+ }
+
case ir_unop_pack_snorm_2x16:
case ir_unop_pack_snorm_4x8:
case ir_unop_pack_unorm_2x16:
@@ -366,6 +402,7 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir)
case ir_unop_unpack_unorm_2x16:
case ir_unop_unpack_unorm_4x8:
case ir_unop_unpack_half_2x16:
+ case ir_quadop_bitfield_insert:
case ir_quadop_vector:
assert(!"should have been lowered");
break;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
index 0f6b715..b7c85ef 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
@@ -1209,6 +1209,54 @@ fs_generator::generate_code(exec_list *instructions)
case BRW_OPCODE_SEL:
brw_SEL(p, dst, src[0], src[1]);
break;
+ case BRW_OPCODE_BFREV:
+ /* BFREV only supports UD type for src and dst. */
+ brw_BFREV(p, retype(dst, BRW_REGISTER_TYPE_UD),
+ retype(src[0], BRW_REGISTER_TYPE_UD));
+ break;
+ case BRW_OPCODE_FBH:
+ /* FBH only supports UD type for dst. */
+ brw_FBH(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
+ break;
+ case BRW_OPCODE_FBL:
+ /* FBL only supports UD type for dst. */
+ brw_FBL(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
+ break;
+ case BRW_OPCODE_CBIT:
+ /* CBIT only supports UD type for dst. */
+ brw_CBIT(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
+ break;
+
+ case BRW_OPCODE_BFE:
+ brw_set_access_mode(p, BRW_ALIGN_16);
+ if (dispatch_width == 16) {
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_BFE(p, dst, src[0], src[1], src[2]);
+ brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+ brw_BFE(p, sechalf(dst), sechalf(src[0]), sechalf(src[1]), sechalf(src[2]));
+ brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+ } else {
+ brw_BFE(p, dst, src[0], src[1], src[2]);
+ }
+ brw_set_access_mode(p, BRW_ALIGN_1);
+ break;
+
+ case BRW_OPCODE_BFI1:
+ brw_BFI1(p, dst, src[0], src[1]);
+ break;
+ case BRW_OPCODE_BFI2:
+ brw_set_access_mode(p, BRW_ALIGN_16);
+ if (dispatch_width == 16) {
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_BFI2(p, dst, src[0], src[1], src[2]);
+ brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+ brw_BFI2(p, sechalf(dst), sechalf(src[0]), sechalf(src[1]), sechalf(src[2]));
+ brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+ } else {
+ brw_BFI2(p, dst, src[0], src[1], src[2]);
+ }
+ brw_set_access_mode(p, BRW_ALIGN_1);
+ break;
case BRW_OPCODE_IF:
if (inst->src[0].file != BAD_FILE) {
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index f1539d5..417e8a8 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -584,6 +584,49 @@ fs_visitor::visit(ir_expression *ir)
emit_math(SHADER_OPCODE_POW, this->result, op[0], op[1]);
break;
+ case ir_unop_bitfield_reverse:
+ emit(BFREV(this->result, op[0]));
+ break;
+ case ir_unop_bit_count:
+ emit(CBIT(this->result, op[0]));
+ break;
+ case ir_unop_find_msb:
+ temp = fs_reg(this, glsl_type::uint_type);
+ emit(FBH(temp, op[0]));
+
+ /* FBH counts from the MSB side, while GLSL's findMSB() wants the count
+ * from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then
+ * subtract the result from 31 to convert the MSB count into an LSB count.
+ */
+
+ /* FBH only supports UD type for dst, so use a MOV to convert UD to D. */
+ emit(MOV(this->result, temp));
+ emit(CMP(reg_null_d, this->result, fs_reg(-1), BRW_CONDITIONAL_NZ));
+
+ temp.negate = true;
+ inst = emit(ADD(this->result, temp, fs_reg(31)));
+ inst->predicate = BRW_PREDICATE_NORMAL;
+ break;
+ case ir_unop_find_lsb:
+ emit(FBL(this->result, op[0]));
+ break;
+ case ir_triop_bitfield_extract:
+ /* Note that the instruction's argument order is reversed from GLSL
+ * and the IR.
+ */
+ emit(BFE(this->result, op[2], op[1], op[0]));
+ break;
+ case ir_binop_bfm:
+ emit(BFI1(this->result, op[0], op[1]));
+ break;
+ case ir_triop_bfi:
+ emit(BFI2(this->result, op[0], op[1], op[2]));
+ break;
+ case ir_quadop_bitfield_insert:
+ assert(!"not reached: should be handled by "
+ "lower_instructions::bitfield_insert_to_bfm_bfi");
+ break;
+
case ir_unop_bit_not:
emit(NOT(this->result, op[0]));
break;
--
1.8.1.5
More information about the mesa-dev
mailing list