[Mesa-dev] [PATCH] i965: Enable faster workaround-free math on Ivybridge.

Paul Berry stereotype441 at gmail.com
Thu Oct 20 13:56:24 PDT 2011


On 18 October 2011 12:24, Kenneth Graunke <kenneth at whitecape.org> wrote:

> According to the documentation, Ivybridge's math instruction works in
> SIMD16 mode for the fragment shader, and no longer forbids align16 mode
> for the vertex shader.
>
> The documentation claims that SIMD16 mode isn't supported for INT DIV,
> but empirical evidence shows that it works fine.  Presumably the note
> is trying to warn us that the variant that returns both quotient and
> remainder in (dst, dst + 1) doesn't work in SIMD16 mode since dst + 1
> would be sechalf(dst), trashing half your results.  Since we don't use
> that variant, we don't care and can just enable SIMD16 everywhere.
>
> The documentation also still claims that source modifiers and
> conditional modifiers aren't supported, but empirical evidence and
> study of the simulator both show that they work just fine.
>
> Goodbye workarounds.  Math just works now.
>
> Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
> ---
>  src/mesa/drivers/dri/i965/brw_eu_emit.c     |   29
> +++++++++++++++---------
>  src/mesa/drivers/dri/i965/brw_fs.cpp        |    6 +++-
>  src/mesa/drivers/dri/i965/brw_fs.h          |    7 ++++++
>  src/mesa/drivers/dri/i965/brw_fs_emit.cpp   |   31
> ++++++++++++++++++++++++++-
>  src/mesa/drivers/dri/i965/brw_vec4.h        |    4 +++
>  src/mesa/drivers/dri/i965/brw_vec4_emit.cpp |   19 ++++++++++++++-
>  6 files changed, 80 insertions(+), 16 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c
> b/src/mesa/drivers/dri/i965/brw_eu_emit.c
> index 5caebfc..0841478 100644
> --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
> +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
> @@ -1496,11 +1496,14 @@ void brw_math( struct brw_compile *p,
>       assert(src.file == BRW_GENERAL_REGISTER_FILE);
>
>       assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
> -      assert(src.hstride == BRW_HORIZONTAL_STRIDE_1);
> +      if (intel->gen == 6)
> +        assert(src.hstride == BRW_HORIZONTAL_STRIDE_1);
>
> -      /* Source modifiers are ignored for extended math instructions. */
> -      assert(!src.negate);
> -      assert(!src.abs);
> +      /* Source modifiers are ignored for extended math instructions on
> Gen6. */
> +      if (intel->gen == 6) {
> +        assert(!src.negate);
> +        assert(!src.abs);
> +      }
>
>       if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
>          function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||
> @@ -1560,8 +1563,10 @@ void brw_math2(struct brw_compile *p,
>    assert(src1.file == BRW_GENERAL_REGISTER_FILE);
>
>    assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
> -   assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1);
> -   assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1);
> +   if (intel->gen == 6) {
> +      assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1);
> +      assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1);
> +   }
>
>    if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
>        function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||
> @@ -1573,11 +1578,13 @@ void brw_math2(struct brw_compile *p,
>       assert(src1.type == BRW_REGISTER_TYPE_F);
>    }
>
> -   /* Source modifiers are ignored for extended math instructions. */
> -   assert(!src0.negate);
> -   assert(!src0.abs);
> -   assert(!src1.negate);
> -   assert(!src1.abs);
> +   /* Source modifiers are ignored for extended math instructions on Gen6.
> */
> +   if (intel->gen == 6) {
> +      assert(!src0.negate);
> +      assert(!src0.abs);
> +      assert(!src1.negate);
> +      assert(!src1.abs);
> +   }
>
>    /* Math is the same ISA format as other opcodes, except that
> CondModifier
>     * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp
> b/src/mesa/drivers/dri/i965/brw_fs.cpp
> index f731662..6c417d4 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
> @@ -554,7 +554,7 @@ fs_visitor::emit_math(enum opcode opcode, fs_reg dst,
> fs_reg src)
>     * The hardware ignores source modifiers (negate and abs) on math
>     * instructions, so we also move to a temp to set those up.
>

Probably should change this comment to clarify that this is only the case
for Gen6.


>     */
> -   if (intel->gen >= 6 && (src.file == UNIFORM ||
> +   if (intel->gen == 6 && (src.file == UNIFORM ||
>                           src.abs ||
>                           src.negate)) {
>       fs_reg expanded = fs_reg(this, glsl_type::float_type);
> @@ -588,7 +588,9 @@ fs_visitor::emit_math(enum opcode opcode, fs_reg dst,
> fs_reg src0, fs_reg src1)
>       return NULL;
>    }
>
> -   if (intel->gen >= 6) {
> +   if (intel->gen >= 7) {
> +      inst = emit(opcode, dst, src0, src1);
> +   } else if (intel->gen == 6) {
>       /* Can't do hstride == 0 args to gen6 math, so expand it out.
>        *
>        * The hardware ignores source modifiers (negate and abs) on math
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.h
> b/src/mesa/drivers/dri/i965/brw_fs.h
> index 4035186..85d3cad 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.h
> +++ b/src/mesa/drivers/dri/i965/brw_fs.h
> @@ -490,6 +490,13 @@ public:
>    void generate_linterp(fs_inst *inst, struct brw_reg dst,
>                         struct brw_reg *src);
>    void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg
> src);
> +   void generate_math1_gen7(fs_inst *inst,
> +                           struct brw_reg dst,
> +                           struct brw_reg src);
> +   void generate_math2_gen7(fs_inst *inst,
> +                           struct brw_reg dst,
> +                           struct brw_reg src0,
> +                           struct brw_reg src1);
>    void generate_math1_gen6(fs_inst *inst,
>                            struct brw_reg dst,
>                            struct brw_reg src);
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
> b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
> index 4c158fe..9697762 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
> @@ -143,6 +143,31 @@ fs_visitor::generate_linterp(fs_inst *inst,
>  }
>
>  void
> +fs_visitor::generate_math1_gen7(fs_inst *inst,
> +                               struct brw_reg dst,
> +                               struct brw_reg src0)
> +{
> +   assert(inst->mlen == 0);
> +   brw_math(p, dst,
> +           brw_math_function(inst->opcode),
> +           inst->saturate ? BRW_MATH_SATURATE_SATURATE
> +                          : BRW_MATH_SATURATE_NONE,
> +           0, src0,
> +           BRW_MATH_DATA_VECTOR,
> +           BRW_MATH_PRECISION_FULL);
> +}
> +
> +void
> +fs_visitor::generate_math2_gen7(fs_inst *inst,
> +                               struct brw_reg dst,
> +                               struct brw_reg src0,
> +                               struct brw_reg src1)
> +{
> +   assert(inst->mlen == 0);
> +   brw_math2(p, dst, brw_math_function(inst->opcode), src0, src1);
> +}
> +
> +void
>  fs_visitor::generate_math1_gen6(fs_inst *inst,
>                                struct brw_reg dst,
>                                struct brw_reg src0)
> @@ -788,7 +813,9 @@ fs_visitor::generate_code()
>       case SHADER_OPCODE_LOG2:
>       case SHADER_OPCODE_SIN:
>       case SHADER_OPCODE_COS:
> -        if (intel->gen >= 6) {
> +        if (intel->gen >= 7) {
> +           generate_math1_gen7(inst, dst, src[0]);
> +        } else if (intel->gen == 6) {
>            generate_math1_gen6(inst, dst, src[0]);
>         } else {
>            generate_math_gen4(inst, dst, src[0]);
> @@ -798,6 +825,8 @@ fs_visitor::generate_code()
>       case SHADER_OPCODE_INT_REMAINDER:
>       case SHADER_OPCODE_POW:
>         if (intel->gen >= 6) {
> +           generate_math2_gen7(inst, dst, src[0], src[1]);
> +        } else if (intel->gen == 6) {
>            generate_math2_gen6(inst, dst, src[0], src[1]);
>         } else {
>            generate_math_gen4(inst, dst, src[0]);
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h
> b/src/mesa/drivers/dri/i965/brw_vec4.h
> index eae841c..4d8bb2d 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4.h
> +++ b/src/mesa/drivers/dri/i965/brw_vec4.h
> @@ -532,6 +532,10 @@ public:
>                            struct brw_reg dst,
>                            struct brw_reg src0,
>                            struct brw_reg src1);
> +   void generate_math2_gen7(vec4_instruction *inst,
> +                           struct brw_reg dst,
> +                           struct brw_reg src0,
> +                           struct brw_reg src1);
>
>    void generate_urb_write(vec4_instruction *inst);
>    void generate_oword_dual_block_offsets(struct brw_reg m1,
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
> b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
> index ccbad25..f66091d 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
> @@ -282,6 +282,18 @@ vec4_visitor::generate_math1_gen6(vec4_instruction
> *inst,
>  }
>
>  void
> +vec4_visitor::generate_math2_gen7(vec4_instruction *inst,
> +                                 struct brw_reg dst,
> +                                 struct brw_reg src0,
> +                                 struct brw_reg src1)
> +{
> +   brw_math2(p,
> +            dst,
> +            brw_math_function(inst->opcode),
> +            src0, src1);
> +}
> +
> +void
>  vec4_visitor::generate_math2_gen6(vec4_instruction *inst,
>                                  struct brw_reg dst,
>                                  struct brw_reg src0,
> @@ -549,9 +561,10 @@ vec4_visitor::generate_vs_instruction(vec4_instruction
> *instruction,
>    case SHADER_OPCODE_LOG2:
>    case SHADER_OPCODE_SIN:
>    case SHADER_OPCODE_COS:
> -      if (intel->gen >= 6) {
> +      if (intel->gen == 6) {
>         generate_math1_gen6(inst, dst, src[0]);
>       } else {
> +        /* Also works for Gen7. */
>         generate_math1_gen4(inst, dst, src[0]);
>       }
>       break;
> @@ -559,7 +572,9 @@ vec4_visitor::generate_vs_instruction(vec4_instruction
> *instruction,
>    case SHADER_OPCODE_POW:
>    case SHADER_OPCODE_INT_QUOTIENT:
>    case SHADER_OPCODE_INT_REMAINDER:
> -      if (intel->gen >= 6) {
> +      if (intel->gen >= 7) {
> +        generate_math2_gen7(inst, dst, src[0], src[1]);
> +      } else if (intel->gen == 6) {
>         generate_math2_gen6(inst, dst, src[0], src[1]);
>       } else {
>         generate_math2_gen4(inst, dst, src[0], src[1]);
> --
> 1.7.7
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>

Other than the above comment, this patch is:

Reviewed-by: Paul Berry <stereotype441 at gmail.com>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20111020/3700b765/attachment.htm>


More information about the mesa-dev mailing list