[Mesa-dev] [PATCH] i965: Enable faster workaround-free math on Ivybridge.
Paul Berry
stereotype441 at gmail.com
Thu Oct 20 13:56:24 PDT 2011
On 18 October 2011 12:24, Kenneth Graunke <kenneth at whitecape.org> wrote:
> According to the documentation, Ivybridge's math instruction works in
> SIMD16 mode for the fragment shader, and no longer forbids align16 mode
> for the vertex shader.
>
> The documentation claims that SIMD16 mode isn't supported for INT DIV,
> but empirical evidence shows that it works fine. Presumably the note
> is trying to warn us that the variant that returns both quotient and
> remainder in (dst, dst + 1) doesn't work in SIMD16 mode since dst + 1
> would be sechalf(dst), trashing half your results. Since we don't use
> that variant, we don't care and can just enable SIMD16 everywhere.
>
> The documentation also still claims that source modifiers and
> conditional modifiers aren't supported, but empirical evidence and
> study of the simulator both show that they work just fine.
>
> Goodbye workarounds. Math just works now.
>
> Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
> ---
> src/mesa/drivers/dri/i965/brw_eu_emit.c | 29
> +++++++++++++++---------
> src/mesa/drivers/dri/i965/brw_fs.cpp | 6 +++-
> src/mesa/drivers/dri/i965/brw_fs.h | 7 ++++++
> src/mesa/drivers/dri/i965/brw_fs_emit.cpp | 31
> ++++++++++++++++++++++++++-
> src/mesa/drivers/dri/i965/brw_vec4.h | 4 +++
> src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 19 ++++++++++++++-
> 6 files changed, 80 insertions(+), 16 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c
> b/src/mesa/drivers/dri/i965/brw_eu_emit.c
> index 5caebfc..0841478 100644
> --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
> +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
> @@ -1496,11 +1496,14 @@ void brw_math( struct brw_compile *p,
> assert(src.file == BRW_GENERAL_REGISTER_FILE);
>
> assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
> - assert(src.hstride == BRW_HORIZONTAL_STRIDE_1);
> + if (intel->gen == 6)
> + assert(src.hstride == BRW_HORIZONTAL_STRIDE_1);
>
> - /* Source modifiers are ignored for extended math instructions. */
> - assert(!src.negate);
> - assert(!src.abs);
> + /* Source modifiers are ignored for extended math instructions on
> Gen6. */
> + if (intel->gen == 6) {
> + assert(!src.negate);
> + assert(!src.abs);
> + }
>
> if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
> function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||
> @@ -1560,8 +1563,10 @@ void brw_math2(struct brw_compile *p,
> assert(src1.file == BRW_GENERAL_REGISTER_FILE);
>
> assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
> - assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1);
> - assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1);
> + if (intel->gen == 6) {
> + assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1);
> + assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1);
> + }
>
> if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
> function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||
> @@ -1573,11 +1578,13 @@ void brw_math2(struct brw_compile *p,
> assert(src1.type == BRW_REGISTER_TYPE_F);
> }
>
> - /* Source modifiers are ignored for extended math instructions. */
> - assert(!src0.negate);
> - assert(!src0.abs);
> - assert(!src1.negate);
> - assert(!src1.abs);
> + /* Source modifiers are ignored for extended math instructions on Gen6.
> */
> + if (intel->gen == 6) {
> + assert(!src0.negate);
> + assert(!src0.abs);
> + assert(!src1.negate);
> + assert(!src1.abs);
> + }
>
> /* Math is the same ISA format as other opcodes, except that
> CondModifier
> * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp
> b/src/mesa/drivers/dri/i965/brw_fs.cpp
> index f731662..6c417d4 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
> @@ -554,7 +554,7 @@ fs_visitor::emit_math(enum opcode opcode, fs_reg dst,
> fs_reg src)
> * The hardware ignores source modifiers (negate and abs) on math
> * instructions, so we also move to a temp to set those up.
>
Probably should change this comment to clarify that this is only the case
for Gen6.
> */
> - if (intel->gen >= 6 && (src.file == UNIFORM ||
> + if (intel->gen == 6 && (src.file == UNIFORM ||
> src.abs ||
> src.negate)) {
> fs_reg expanded = fs_reg(this, glsl_type::float_type);
> @@ -588,7 +588,9 @@ fs_visitor::emit_math(enum opcode opcode, fs_reg dst,
> fs_reg src0, fs_reg src1)
> return NULL;
> }
>
> - if (intel->gen >= 6) {
> + if (intel->gen >= 7) {
> + inst = emit(opcode, dst, src0, src1);
> + } else if (intel->gen == 6) {
> /* Can't do hstride == 0 args to gen6 math, so expand it out.
> *
> * The hardware ignores source modifiers (negate and abs) on math
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.h
> b/src/mesa/drivers/dri/i965/brw_fs.h
> index 4035186..85d3cad 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.h
> +++ b/src/mesa/drivers/dri/i965/brw_fs.h
> @@ -490,6 +490,13 @@ public:
> void generate_linterp(fs_inst *inst, struct brw_reg dst,
> struct brw_reg *src);
> void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg
> src);
> + void generate_math1_gen7(fs_inst *inst,
> + struct brw_reg dst,
> + struct brw_reg src);
> + void generate_math2_gen7(fs_inst *inst,
> + struct brw_reg dst,
> + struct brw_reg src0,
> + struct brw_reg src1);
> void generate_math1_gen6(fs_inst *inst,
> struct brw_reg dst,
> struct brw_reg src);
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
> b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
> index 4c158fe..9697762 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
> @@ -143,6 +143,31 @@ fs_visitor::generate_linterp(fs_inst *inst,
> }
>
> void
> +fs_visitor::generate_math1_gen7(fs_inst *inst,
> + struct brw_reg dst,
> + struct brw_reg src0)
> +{
> + assert(inst->mlen == 0);
> + brw_math(p, dst,
> + brw_math_function(inst->opcode),
> + inst->saturate ? BRW_MATH_SATURATE_SATURATE
> + : BRW_MATH_SATURATE_NONE,
> + 0, src0,
> + BRW_MATH_DATA_VECTOR,
> + BRW_MATH_PRECISION_FULL);
> +}
> +
> +void
> +fs_visitor::generate_math2_gen7(fs_inst *inst,
> + struct brw_reg dst,
> + struct brw_reg src0,
> + struct brw_reg src1)
> +{
> + assert(inst->mlen == 0);
> + brw_math2(p, dst, brw_math_function(inst->opcode), src0, src1);
> +}
> +
> +void
> fs_visitor::generate_math1_gen6(fs_inst *inst,
> struct brw_reg dst,
> struct brw_reg src0)
> @@ -788,7 +813,9 @@ fs_visitor::generate_code()
> case SHADER_OPCODE_LOG2:
> case SHADER_OPCODE_SIN:
> case SHADER_OPCODE_COS:
> - if (intel->gen >= 6) {
> + if (intel->gen >= 7) {
> + generate_math1_gen7(inst, dst, src[0]);
> + } else if (intel->gen == 6) {
> generate_math1_gen6(inst, dst, src[0]);
> } else {
> generate_math_gen4(inst, dst, src[0]);
> @@ -798,6 +825,8 @@ fs_visitor::generate_code()
> case SHADER_OPCODE_INT_REMAINDER:
> case SHADER_OPCODE_POW:
> if (intel->gen >= 6) {
> + generate_math2_gen7(inst, dst, src[0], src[1]);
> + } else if (intel->gen == 6) {
> generate_math2_gen6(inst, dst, src[0], src[1]);
> } else {
> generate_math_gen4(inst, dst, src[0]);
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h
> b/src/mesa/drivers/dri/i965/brw_vec4.h
> index eae841c..4d8bb2d 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4.h
> +++ b/src/mesa/drivers/dri/i965/brw_vec4.h
> @@ -532,6 +532,10 @@ public:
> struct brw_reg dst,
> struct brw_reg src0,
> struct brw_reg src1);
> + void generate_math2_gen7(vec4_instruction *inst,
> + struct brw_reg dst,
> + struct brw_reg src0,
> + struct brw_reg src1);
>
> void generate_urb_write(vec4_instruction *inst);
> void generate_oword_dual_block_offsets(struct brw_reg m1,
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
> b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
> index ccbad25..f66091d 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
> @@ -282,6 +282,18 @@ vec4_visitor::generate_math1_gen6(vec4_instruction
> *inst,
> }
>
> void
> +vec4_visitor::generate_math2_gen7(vec4_instruction *inst,
> + struct brw_reg dst,
> + struct brw_reg src0,
> + struct brw_reg src1)
> +{
> + brw_math2(p,
> + dst,
> + brw_math_function(inst->opcode),
> + src0, src1);
> +}
> +
> +void
> vec4_visitor::generate_math2_gen6(vec4_instruction *inst,
> struct brw_reg dst,
> struct brw_reg src0,
> @@ -549,9 +561,10 @@ vec4_visitor::generate_vs_instruction(vec4_instruction
> *instruction,
> case SHADER_OPCODE_LOG2:
> case SHADER_OPCODE_SIN:
> case SHADER_OPCODE_COS:
> - if (intel->gen >= 6) {
> + if (intel->gen == 6) {
> generate_math1_gen6(inst, dst, src[0]);
> } else {
> + /* Also works for Gen7. */
> generate_math1_gen4(inst, dst, src[0]);
> }
> break;
> @@ -559,7 +572,9 @@ vec4_visitor::generate_vs_instruction(vec4_instruction
> *instruction,
> case SHADER_OPCODE_POW:
> case SHADER_OPCODE_INT_QUOTIENT:
> case SHADER_OPCODE_INT_REMAINDER:
> - if (intel->gen >= 6) {
> + if (intel->gen >= 7) {
> + generate_math2_gen7(inst, dst, src[0], src[1]);
> + } else if (intel->gen == 6) {
> generate_math2_gen6(inst, dst, src[0], src[1]);
> } else {
> generate_math2_gen4(inst, dst, src[0], src[1]);
> --
> 1.7.7
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
Other than the above comment, this patch is:
Reviewed-by: Paul Berry <stereotype441 at gmail.com>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20111020/3700b765/attachment.htm>
More information about the mesa-dev
mailing list