[Mesa-dev] [PATCH 6/9] i965/fs: Use the LRP instruction for ir_triop_lrp when possible.
Matt Turner
mattst88 at gmail.com
Tue Feb 19 17:03:11 PST 2013
From: Kenneth Graunke <kenneth at whitecape.org>
v2 [mattst88]:
- Add BRW_OPCODE_LRP to list of CSE-able expressions.
- Fix op_var[] array size.
- Rename arguments to emit_lrp to (x, y, a) to clear confusion.
- Add LRP function to brw_fs.cpp/.h.
- Corrected comment about LRP instruction arguments in emit_lrp.
Reviewed-by: Matt Turner <mattst88 at gmail.com>
---
src/mesa/drivers/dri/i965/brw_fs.cpp | 8 ++++
src/mesa/drivers/dri/i965/brw_fs.h | 2 +
.../dri/i965/brw_fs_channel_expressions.cpp | 16 ++++++++-
src/mesa/drivers/dri/i965/brw_fs_cse.cpp | 1 +
src/mesa/drivers/dri/i965/brw_fs_emit.cpp | 15 +++++++--
src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 35 ++++++++++++++++++--
src/mesa/drivers/dri/i965/brw_shader.cpp | 2 +-
7 files changed, 71 insertions(+), 8 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index c1ccd92..bdb6616 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -146,6 +146,13 @@ fs_inst::fs_inst(enum opcode opcode, fs_reg dst,
return new(mem_ctx) fs_inst(BRW_OPCODE_##op, dst, src0, src1); \
}
+#define ALU3(op) \
+ fs_inst * \
+ fs_visitor::op(fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2) \
+ { \
+ return new(mem_ctx) fs_inst(BRW_OPCODE_##op, dst, src0, src1, src2);\
+ }
+
ALU1(NOT)
ALU1(MOV)
ALU1(FRC)
@@ -161,6 +168,7 @@ ALU2(XOR)
ALU2(SHL)
ALU2(SHR)
ALU2(ASR)
+ALU3(LRP)
/** Gen4 predicated IF. */
fs_inst *
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index d5ebd51..9c1b359 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -285,6 +285,7 @@ public:
fs_inst *IF(fs_reg src0, fs_reg src1, uint32_t condition);
fs_inst *CMP(fs_reg dst, fs_reg src0, fs_reg src1,
uint32_t condition);
+ fs_inst *LRP(fs_reg dst, fs_reg a, fs_reg y, fs_reg x);
fs_inst *DEP_RESOLVE_MOV(int grf);
int type_size(const struct glsl_type *type);
@@ -360,6 +361,7 @@ public:
fs_reg fix_math_operand(fs_reg src);
fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0);
fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0, fs_reg src1);
+ void emit_lrp(fs_reg dst, fs_reg x, fs_reg y, fs_reg a);
void emit_minmax(uint32_t conditionalmod, fs_reg dst,
fs_reg src0, fs_reg src1);
bool try_emit_saturate(ir_expression *ir);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
index ea06225..30d8d9b 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
@@ -135,7 +135,7 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir)
ir_expression *expr = ir->rhs->as_expression();
bool found_vector = false;
unsigned int i, vector_elements = 1;
- ir_variable *op_var[2];
+ ir_variable *op_var[3];
if (!expr)
return visit_continue;
@@ -342,6 +342,20 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir)
assert(!"not yet supported");
break;
+ case ir_triop_lrp:
+ for (i = 0; i < vector_elements; i++) {
+ ir_rvalue *op0 = get_element(op_var[0], i);
+ ir_rvalue *op1 = get_element(op_var[1], i);
+ ir_rvalue *op2 = get_element(op_var[2], i);
+
+ assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
+ element_type,
+ op0,
+ op1,
+ op2));
+ }
+ break;
+
case ir_unop_pack_snorm_2x16:
case ir_unop_pack_snorm_4x8:
case ir_unop_pack_unorm_2x16:
diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
index 70c143a..0b74d2e 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
@@ -66,6 +66,7 @@ is_expression(const fs_inst *const inst)
case BRW_OPCODE_LINE:
case BRW_OPCODE_PLN:
case BRW_OPCODE_MAD:
+ case BRW_OPCODE_LRP:
case FS_OPCODE_CINTERP:
case FS_OPCODE_LINTERP:
return true;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
index 3d1f3b3..38d6332 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
@@ -1082,18 +1082,27 @@ fs_generator::generate_code(exec_list *instructions)
break;
case BRW_OPCODE_MAD:
+ case BRW_OPCODE_LRP: {
+ struct brw_instruction *(*brw_inst)(struct brw_compile *p,
+ struct brw_reg dest,
+ struct brw_reg src0,
+ struct brw_reg src1,
+ struct brw_reg src2);
+ brw_inst = (inst->opcode == BRW_OPCODE_MAD) ?
+ brw_MAD : brw_LRP;
brw_set_access_mode(p, BRW_ALIGN_16);
if (dispatch_width == 16) {
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
- brw_MAD(p, dst, src[0], src[1], src[2]);
+ brw_inst(p, dst, src[0], src[1], src[2]);
brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
- brw_MAD(p, sechalf(dst), sechalf(src[0]), sechalf(src[1]), sechalf(src[2]));
+ brw_inst(p, sechalf(dst), sechalf(src[0]), sechalf(src[1]), sechalf(src[2]));
brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
} else {
- brw_MAD(p, dst, src[0], src[1], src[2]);
+ brw_inst(p, dst, src[0], src[1], src[2]);
}
brw_set_access_mode(p, BRW_ALIGN_1);
break;
+ }
case BRW_OPCODE_FRC:
brw_FRC(p, dst, src[0]);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index d4f6fc9..7c03f6b 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -199,6 +199,30 @@ fs_visitor::visit(ir_dereference_array *ir)
}
void
+fs_visitor::emit_lrp(fs_reg dst, fs_reg x, fs_reg y, fs_reg a)
+{
+ if (intel->gen < 6 || x.file == IMM || y.file == IMM || a.file == IMM) {
+ /* We can't use the LRP instruction. Emit x*(1-a) + y*a. */
+ fs_reg y_times_a = fs_reg(this, glsl_type::float_type);
+ fs_reg one_minus_a = fs_reg(this, glsl_type::float_type);
+ fs_reg x_times_one_minus_a = fs_reg(this, glsl_type::float_type);
+
+ emit(MUL(y_times_a, y, a));
+
+ a.negate = !a.negate;
+ emit(ADD(one_minus_a, fs_reg(1.0f), a));
+ emit(MUL(x_times_one_minus_a, x, one_minus_a));
+
+ emit(ADD(dst, x_times_one_minus_a, y_times_a));
+ } else {
+ /* The LRP instruction actually does op1 * op0 + op2 * (1 - op0), so
+ * we need to reorder the operands.
+ */
+ emit(LRP(dst, a, y, x));
+ }
+}
+
+void
fs_visitor::emit_minmax(uint32_t conditionalmod, fs_reg dst,
fs_reg src0, fs_reg src1)
{
@@ -291,10 +315,10 @@ void
fs_visitor::visit(ir_expression *ir)
{
unsigned int operand;
- fs_reg op[2], temp;
+ fs_reg op[3], temp;
fs_inst *inst;
- assert(ir->get_num_operands() <= 2);
+ assert(ir->get_num_operands() <= 3);
if (try_emit_saturate(ir))
return;
@@ -586,7 +610,7 @@ fs_visitor::visit(ir_expression *ir)
case ir_binop_pack_half_2x16_split:
emit(FS_OPCODE_PACK_HALF_2x16_SPLIT, this->result, op[0], op[1]);
break;
- case ir_binop_ubo_load:
+ case ir_binop_ubo_load: {
/* This IR node takes a constant uniform block and a constant or
* variable byte offset within the block and loads a vector from that.
*/
@@ -662,6 +686,11 @@ fs_visitor::visit(ir_expression *ir)
result.reg_offset = 0;
break;
}
+
+ case ir_triop_lrp:
+ emit_lrp(this->result, op[0], op[1], op[2]);
+ break;
+ }
}
void
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 9ab18cc..6965d72 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -156,7 +156,7 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg)
SUB_TO_ADD_NEG |
EXP_TO_EXP2 |
LOG_TO_LOG2 |
- LRP_TO_ARITH);
+ (stage == MESA_SHADER_FRAGMENT ? 0 : LRP_TO_ARITH));
/* Pre-gen6 HW can only nest if-statements 16 deep. Beyond this,
* if-statements need to be flattened.
--
1.7.8.6
More information about the mesa-dev
mailing list