[Mesa-dev] [PATCH 2/8] i965/fs: Split generate_math into gen4/gen6 and 1/2 operand variants.
Kenneth Graunke
kenneth at whitecape.org
Wed Sep 7 07:39:11 PDT 2011
This mirrors the structure Eric used in the new VS backend, and seems
simpler. In particular, the math1/math2 split will avoid having to
figure out how many operands there are, as this is already known by the
caller.
Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
---
src/mesa/drivers/dri/i965/brw_fs.cpp | 3 +-
src/mesa/drivers/dri/i965/brw_fs.h | 15 +++-
src/mesa/drivers/dri/i965/brw_fs_emit.cpp | 142 ++++++++++++++++++-----------
3 files changed, 104 insertions(+), 56 deletions(-)
Tested on Sandybridge and Ironlake.
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 7f5194b..83737bc 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -597,8 +597,7 @@ fs_visitor::emit_math(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1)
inst = emit(opcode, dst, src0, src1);
} else {
- emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + 1), src1);
- inst = emit(opcode, dst, src0, reg_null_f);
+ inst = emit(opcode, dst, src0, src1);
inst->base_mrf = base_mrf;
inst->mlen = 2 * c->dispatch_width / 8;
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index f3d8fbf..6f0c802 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -486,7 +486,20 @@ public:
void generate_linterp(fs_inst *inst, struct brw_reg dst,
struct brw_reg *src);
void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
- void generate_math(fs_inst *inst, struct brw_reg dst, struct brw_reg *src);
+ void generate_math1_gen6(fs_inst *inst,
+ struct brw_reg dst,
+ struct brw_reg src);
+ void generate_math2_gen6(fs_inst *inst,
+ struct brw_reg dst,
+ struct brw_reg src0,
+ struct brw_reg src1);
+ void generate_math1_gen4(fs_inst *inst,
+ struct brw_reg dst,
+ struct brw_reg src);
+ void generate_math2_gen4(fs_inst *inst,
+ struct brw_reg dst,
+ struct brw_reg src0,
+ struct brw_reg src1);
void generate_discard(fs_inst *inst);
void generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
void generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
index ba0d2a2..dc6a211 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
@@ -143,73 +143,99 @@ fs_visitor::generate_linterp(fs_inst *inst,
}
void
-fs_visitor::generate_math(fs_inst *inst,
- struct brw_reg dst, struct brw_reg *src)
+fs_visitor::generate_math1_gen6(fs_inst *inst,
+ struct brw_reg dst,
+ struct brw_reg src0)
{
int op = brw_math_function(inst->opcode);
- if (intel->gen >= 6) {
- assert(inst->mlen == 0);
-
- if (inst->opcode == SHADER_OPCODE_POW) {
- brw_set_compression_control(p, BRW_COMPRESSION_NONE);
- brw_math2(p, dst, op, src[0], src[1]);
+ assert(inst->mlen == 0);
- if (c->dispatch_width == 16) {
- brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
- brw_math2(p, sechalf(dst), op, sechalf(src[0]), sechalf(src[1]));
- brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
- }
- } else {
- brw_set_compression_control(p, BRW_COMPRESSION_NONE);
- brw_math(p, dst,
- op,
- inst->saturate ? BRW_MATH_SATURATE_SATURATE :
- BRW_MATH_SATURATE_NONE,
- 0, src[0],
- BRW_MATH_DATA_VECTOR,
- BRW_MATH_PRECISION_FULL);
-
- if (c->dispatch_width == 16) {
- brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
- brw_math(p, sechalf(dst),
- op,
- inst->saturate ? BRW_MATH_SATURATE_SATURATE :
- BRW_MATH_SATURATE_NONE,
- 0, sechalf(src[0]),
- BRW_MATH_DATA_VECTOR,
- BRW_MATH_PRECISION_FULL);
- brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
- }
- }
- } else /* gen <= 5 */{
- assert(inst->mlen >= 1);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_math(p, dst,
+ op,
+ inst->saturate ? BRW_MATH_SATURATE_SATURATE :
+ BRW_MATH_SATURATE_NONE,
+ 0, src0,
+ BRW_MATH_DATA_VECTOR,
+ BRW_MATH_PRECISION_FULL);
- brw_set_compression_control(p, BRW_COMPRESSION_NONE);
- brw_math(p, dst,
+ if (c->dispatch_width == 16) {
+ brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+ brw_math(p, sechalf(dst),
op,
inst->saturate ? BRW_MATH_SATURATE_SATURATE :
BRW_MATH_SATURATE_NONE,
- inst->base_mrf, src[0],
+ 0, sechalf(src0),
BRW_MATH_DATA_VECTOR,
BRW_MATH_PRECISION_FULL);
+ brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+ }
+}
- if (c->dispatch_width == 16) {
- brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
- brw_math(p, sechalf(dst),
- op,
- inst->saturate ? BRW_MATH_SATURATE_SATURATE :
- BRW_MATH_SATURATE_NONE,
- inst->base_mrf + 1, sechalf(src[0]),
- BRW_MATH_DATA_VECTOR,
- BRW_MATH_PRECISION_FULL);
+void
+fs_visitor::generate_math2_gen6(fs_inst *inst,
+ struct brw_reg dst,
+ struct brw_reg src0,
+ struct brw_reg src1)
+{
+ int op = brw_math_function(inst->opcode);
- brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
- }
+ assert(inst->mlen == 0);
+
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_math2(p, dst, op, src0, src1);
+
+ if (c->dispatch_width == 16) {
+ brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+ brw_math2(p, sechalf(dst), op, sechalf(src0), sechalf(src1));
+ brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
}
}
void
+fs_visitor::generate_math1_gen4(fs_inst *inst,
+ struct brw_reg dst,
+ struct brw_reg src0)
+{
+ int op = brw_math_function(inst->opcode);
+
+ assert(inst->mlen >= 1);
+
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_math(p, dst,
+ op,
+ inst->saturate ? BRW_MATH_SATURATE_SATURATE :
+ BRW_MATH_SATURATE_NONE,
+ inst->base_mrf, src0,
+ BRW_MATH_DATA_VECTOR,
+ BRW_MATH_PRECISION_FULL);
+
+ if (c->dispatch_width == 16) {
+ brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+ brw_math(p, sechalf(dst),
+ op,
+ inst->saturate ? BRW_MATH_SATURATE_SATURATE :
+ BRW_MATH_SATURATE_NONE,
+ inst->base_mrf + 1, sechalf(src0),
+ BRW_MATH_DATA_VECTOR,
+ BRW_MATH_PRECISION_FULL);
+
+ brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+ }
+}
+
+void
+fs_visitor::generate_math2_gen4(fs_inst *inst,
+ struct brw_reg dst,
+ struct brw_reg src0,
+ struct brw_reg src1)
+{
+ brw_MOV(p, brw_message_reg(inst->base_mrf + 1), src1);
+ generate_math1_gen4(inst, dst, src0);
+}
+
+void
fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
{
int msg_type = -1;
@@ -762,10 +788,20 @@ fs_visitor::generate_code()
case SHADER_OPCODE_SQRT:
case SHADER_OPCODE_EXP2:
case SHADER_OPCODE_LOG2:
- case SHADER_OPCODE_POW:
case SHADER_OPCODE_SIN:
case SHADER_OPCODE_COS:
- generate_math(inst, dst, src);
+ if (intel->gen >= 6) {
+ generate_math1_gen6(inst, dst, src[0]);
+ } else {
+ generate_math1_gen4(inst, dst, src[0]);
+ }
+ break;
+ case SHADER_OPCODE_POW:
+ if (intel->gen >= 6) {
+ generate_math2_gen6(inst, dst, src[0], src[1]);
+ } else {
+ generate_math2_gen4(inst, dst, src[0], src[1]);
+ }
break;
case FS_OPCODE_PIXEL_X:
generate_pixel_xy(dst, true);
--
1.7.6.1
More information about the mesa-dev
mailing list