[Mesa-dev] [PATCH 1/2] gallium: add TGSI_OPCODE_FMA

Mon Mar 2 07:55:06 PST 2015

Am 02.03.2015 um 12:52 schrieb Marek Olšák:
> From: Marek Olšák <marek.olsak at amd.com>
> 
> Needed by ARB_gpu_shader5.
> ---
>  src/gallium/auxiliary/gallivm/lp_bld_limits.h    |  1 +
>  src/gallium/auxiliary/tgsi/tgsi_exec.h           |  1 +
>  src/gallium/auxiliary/tgsi/tgsi_info.c           |  2 +-
>  src/gallium/auxiliary/tgsi/tgsi_util.c           |  1 +
>  src/gallium/docs/source/screen.rst               |  1 +
>  src/gallium/docs/source/tgsi.rst                 | 23 +++++++++++++++++++++++
>  src/gallium/drivers/freedreno/freedreno_screen.c |  1 +
>  src/gallium/drivers/i915/i915_screen.c           |  1 +
>  src/gallium/drivers/nouveau/nv30/nv30_screen.c   |  2 ++
>  src/gallium/drivers/nouveau/nv50/nv50_screen.c   |  1 +
>  src/gallium/drivers/nouveau/nvc0/nvc0_screen.c   |  1 +
>  src/gallium/drivers/r300/r300_screen.c           |  2 ++
>  src/gallium/drivers/r600/r600_pipe.c             |  1 +
>  src/gallium/drivers/r600/r600_shader.c           |  6 +++---
>  src/gallium/drivers/radeonsi/si_pipe.c           |  1 +
>  src/gallium/drivers/svga/svga_screen.c           |  2 ++
>  src/gallium/drivers/vc4/vc4_screen.c             |  1 +
>  src/gallium/include/pipe/p_defines.h             |  1 +
>  src/gallium/include/pipe/p_shader_tokens.h       |  2 +-
>  src/mesa/state_tracker/st_glsl_to_tgsi.cpp       | 12 ++++++++----
>  20 files changed, 54 insertions(+), 9 deletions(-)
> 
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_limits.h b/src/gallium/auxiliary/gallivm/lp_bld_limits.h
> index 2962360..c5c51c1 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_limits.h
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_limits.h
> @@ -129,6 +129,7 @@ gallivm_get_shader_param(enum pipe_shader_cap param)
>     case PIPE_SHADER_CAP_DOUBLES:
>     case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
>     case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
> +   case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
>        return 0;
>     }
>     /* if we get here, we missed a shader cap above (and should have seen
> diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h
> index 609c81b..0e59b88 100644
> --- a/src/gallium/auxiliary/tgsi/tgsi_exec.h
> +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h
> @@ -459,6 +459,7 @@ tgsi_exec_get_shader_param(enum pipe_shader_cap param)
>     case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
>        return 1;
>     case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
> +   case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
>        return 0;
>     }
>     /* if we get here, we missed a shader cap above (and should have seen
> diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c
> index 4d838fd..e6e0a60 100644
> --- a/src/gallium/auxiliary/tgsi/tgsi_info.c
> +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c
> @@ -56,7 +56,7 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] =
>     { 1, 3, 0, 0, 0, 0, COMP, "MAD", TGSI_OPCODE_MAD },
>     { 1, 2, 0, 0, 0, 0, COMP, "SUB", TGSI_OPCODE_SUB },
>     { 1, 3, 0, 0, 0, 0, COMP, "LRP", TGSI_OPCODE_LRP },
> -   { 0, 0, 0, 0, 0, 0, NONE, "", 19 },      /* removed */
> +   { 1, 3, 0, 0, 0, 0, COMP, "FMA", TGSI_OPCODE_FMA },
>     { 1, 1, 0, 0, 0, 0, REPL, "SQRT", TGSI_OPCODE_SQRT },
>     { 1, 3, 0, 0, 0, 0, REPL, "DP2A", TGSI_OPCODE_DP2A },
>     { 0, 0, 0, 0, 0, 0, NONE, "", 22 },      /* removed */
> diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.c b/src/gallium/auxiliary/tgsi/tgsi_util.c
> index d572ff0..e5b8427 100644
> --- a/src/gallium/auxiliary/tgsi/tgsi_util.c
> +++ b/src/gallium/auxiliary/tgsi/tgsi_util.c
> @@ -193,6 +193,7 @@ tgsi_util_get_inst_usage_mask(const struct tgsi_full_instruction *inst,
>     case TGSI_OPCODE_MAD:
>     case TGSI_OPCODE_SUB:
>     case TGSI_OPCODE_LRP:
> +   case TGSI_OPCODE_FMA:
>     case TGSI_OPCODE_FRC:
>     case TGSI_OPCODE_CEIL:
>     case TGSI_OPCODE_CLAMP:
> diff --git a/src/gallium/docs/source/screen.rst b/src/gallium/docs/source/screen.rst
> index e0fd1a2..dd7a012 100644
> --- a/src/gallium/docs/source/screen.rst
> +++ b/src/gallium/docs/source/screen.rst
> @@ -336,6 +336,7 @@ to be 0.
>    is supported. If it is, DTRUNC/DCEIL/DFLR/DROUND opcodes may be used.
>  * ``PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED``: Whether DFRACEXP and
>    DLDEXP are supported.
> +* ``PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED``: Whether TGSI_OPCODE_FMA is supported.
>  
>  
>  .. _pipe_compute_cap:
> diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst
> index b0a975a..6871676 100644
> --- a/src/gallium/docs/source/tgsi.rst
> +++ b/src/gallium/docs/source/tgsi.rst
> @@ -272,6 +272,29 @@ This instruction replicates its result.
>    dst.w = src0.w \times src1.w + (1 - src0.w) \times src2.w
>  
>  
> +.. opcode:: FMA - Fused Multiply-Add
> +
> +The results may not be identical to evaluating the expression (a*b)+c,
> +because the computation may be performed in a single operation with
> +intermediate precision different from that used to compute a non-FMA
> +expression.
> +
> +The results of FMA are guaranteed to be invariant given fixed inputs
> +<src0>, <src1>, and <src2>. That means the implementation is not allowed
> +to expand the opcode to MUL+ADD and apply algebraic optimizations affecting
> +the floating-point results.
I think these paragraphs are slightly confusing,  especially "because
the computation may be performed in a single operation with intermediate
precision different from that used to compute a non-FMA expression".
Would be more obvious to say something along the lines that (in contrast
to MAD) no intermediate rounding is happening. Otherwise this sounds
like it would be allowed to do some sort of intermediate rounding, as
long as the intermediate precision is larger than what you'd get by
separate mul+mad, which I don't think is what you wanted.
(FWIW I don't think we really clarified MAD wrt intermediate rounding, I
particularly like opencl convention that FMA = no rounding, MUL + ADD =
rounding, MAD = do whatever is fastest (because optimizing backends can
fuse back MUL+ADD back into a MAD themselves if the hw can do that with
intermediate rounding) but traditionally of course MAD always did
intermediate rounding.)

Otherwise this looks good to me (though I'd think we'd need a DFMA too
rather sooner than later).

Roland

> +
> +.. math::
> +
> +  dst.x = src0.x \times src1.x + src2.x
> +
> +  dst.y = src0.y \times src1.y + src2.y
> +
> +  dst.z = src0.z \times src1.z + src2.z
> +
> +  dst.w = src0.w \times src1.w + src2.w
> +
> +
>  .. opcode:: DP2A - 2-component Dot Product And Add
>  
>  .. math::
> diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c
> index 2973458..d617465 100644
> --- a/src/gallium/drivers/freedreno/freedreno_screen.c
> +++ b/src/gallium/drivers/freedreno/freedreno_screen.c
> @@ -362,6 +362,7 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
>  	case PIPE_SHADER_CAP_DOUBLES:
>  	case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
>  	case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
> +	case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
>  		return 0;
>  	case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
>  		return 1;
> diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c
> index dc76464..50847e2 100644
> --- a/src/gallium/drivers/i915/i915_screen.c
> +++ b/src/gallium/drivers/i915/i915_screen.c
> @@ -158,6 +158,7 @@ i915_get_shader_param(struct pipe_screen *screen, unsigned shader, enum pipe_sha
>        case PIPE_SHADER_CAP_DOUBLES:
>        case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
>        case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
> +      case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
>           return 0;
>        default:
>           debug_printf("%s: Unknown cap %u.\n", __FUNCTION__, cap);
> diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
> index 0fca9e0..eeb7148 100644
> --- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c
> +++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
> @@ -250,6 +250,7 @@ nv30_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
>        case PIPE_SHADER_CAP_DOUBLES:
>        case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
>        case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
> +      case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
>           return 0;
>        default:
>           debug_printf("unknown vertex shader param %d\n", param);
> @@ -289,6 +290,7 @@ nv30_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
>        case PIPE_SHADER_CAP_DOUBLES:
>        case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
>        case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
> +      case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
>           return 0;
>        default:
>           debug_printf("unknown fragment shader param %d\n", param);
> diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
> index ed07ba4..829dfbc 100644
> --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
> +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
> @@ -289,6 +289,7 @@ nv50_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
>     case PIPE_SHADER_CAP_DOUBLES:
>     case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
>     case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
> +   case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
>        return 0;
>     default:
>        NOUVEAU_ERR("unknown PIPE_SHADER_CAP %d\n", param);
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
> index 686d892..04c34f5 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
> @@ -295,6 +295,7 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
>     case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
>        return 1;
>     case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
> +   case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
>        return 0;
>     case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
>        return 16; /* would be 32 in linked (OpenGL-style) mode */
> diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
> index fca8001..752d7e5 100644
> --- a/src/gallium/drivers/r300/r300_screen.c
> +++ b/src/gallium/drivers/r300/r300_screen.c
> @@ -287,6 +287,7 @@ static int r300_get_shader_param(struct pipe_screen *pscreen, unsigned shader, e
>          case PIPE_SHADER_CAP_DOUBLES:
>          case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
>          case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
> +        case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
>              return 0;
>          case PIPE_SHADER_CAP_PREFERRED_IR:
>              return PIPE_SHADER_IR_TGSI;
> @@ -341,6 +342,7 @@ static int r300_get_shader_param(struct pipe_screen *pscreen, unsigned shader, e
>          case PIPE_SHADER_CAP_DOUBLES:
>          case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
>          case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
> +        case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
>              return 0;
>          case PIPE_SHADER_CAP_PREFERRED_IR:
>              return PIPE_SHADER_IR_TGSI;
> diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
> index c8a0e9c..2670e53 100644
> --- a/src/gallium/drivers/r600/r600_pipe.c
> +++ b/src/gallium/drivers/r600/r600_pipe.c
> @@ -493,6 +493,7 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e
>  		return 0;
>  	case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
>  	case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
> +	case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
>  		return 0;
>  	}
>  	return 0;
> diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
> index 2ee59c8..54540c3 100644
> --- a/src/gallium/drivers/r600/r600_shader.c
> +++ b/src/gallium/drivers/r600/r600_shader.c
> @@ -7295,7 +7295,7 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
>  	{TGSI_OPCODE_MAD,	1, ALU_OP3_MULADD, tgsi_op3},
>  	{TGSI_OPCODE_SUB,	0, ALU_OP2_ADD, tgsi_op2},
>  	{TGSI_OPCODE_LRP,	0, ALU_OP0_NOP, tgsi_lrp},
> -	{19,			0, ALU_OP0_NOP, tgsi_unsupported},
> +	{TGSI_OPCODE_FMA,	0, ALU_OP0_NOP, tgsi_unsupported},
>  	{TGSI_OPCODE_SQRT,	0, ALU_OP1_SQRT_IEEE, tgsi_trans_srcx_replicate},
>  	{TGSI_OPCODE_DP2A,	0, ALU_OP0_NOP, tgsi_unsupported},
>  	{22,			0, ALU_OP0_NOP, tgsi_unsupported},
> @@ -7494,7 +7494,7 @@ static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
>  	{TGSI_OPCODE_MAD,	1, ALU_OP3_MULADD, tgsi_op3},
>  	{TGSI_OPCODE_SUB,	0, ALU_OP2_ADD, tgsi_op2},
>  	{TGSI_OPCODE_LRP,	0, ALU_OP0_NOP, tgsi_lrp},
> -	{19,			0, ALU_OP0_NOP, tgsi_unsupported},
> +	{TGSI_OPCODE_FMA,	0, ALU_OP0_NOP, tgsi_unsupported},
>  	{TGSI_OPCODE_SQRT,	0, ALU_OP1_SQRT_IEEE, tgsi_trans_srcx_replicate},
>  	{TGSI_OPCODE_DP2A,	0, ALU_OP0_NOP, tgsi_unsupported},
>  	{22,			0, ALU_OP0_NOP, tgsi_unsupported},
> @@ -7693,7 +7693,7 @@ static struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = {
>  	{TGSI_OPCODE_MAD,	1, ALU_OP3_MULADD, tgsi_op3},
>  	{TGSI_OPCODE_SUB,	0, ALU_OP2_ADD, tgsi_op2},
>  	{TGSI_OPCODE_LRP,	0, ALU_OP0_NOP, tgsi_lrp},
> -	{19,			0, ALU_OP0_NOP, tgsi_unsupported},
> +	{TGSI_OPCODE_FMA,	0, ALU_OP0_NOP, tgsi_unsupported},
>  	{TGSI_OPCODE_SQRT,	0, ALU_OP1_SQRT_IEEE, cayman_emit_float_instr},
>  	{TGSI_OPCODE_DP2A,	0, ALU_OP0_NOP, tgsi_unsupported},
>  	{22,			0, ALU_OP0_NOP, tgsi_unsupported},
> diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
> index f1a5388..0aacab1 100644
> --- a/src/gallium/drivers/radeonsi/si_pipe.c
> +++ b/src/gallium/drivers/radeonsi/si_pipe.c
> @@ -425,6 +425,7 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu
>  	case PIPE_SHADER_CAP_DOUBLES:
>  	case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
>  	case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
> +	case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
>  		return 0;
>  	}
>  	return 0;
> diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c
> index bac0dbc..7b01d35 100644
> --- a/src/gallium/drivers/svga/svga_screen.c
> +++ b/src/gallium/drivers/svga/svga_screen.c
> @@ -375,6 +375,7 @@ static int svga_get_shader_param(struct pipe_screen *screen, unsigned shader, en
>        case PIPE_SHADER_CAP_DOUBLES:
>        case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
>        case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
> +      case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
>           return 0;
>        }
>        /* If we get here, we failed to handle a cap above */
> @@ -431,6 +432,7 @@ static int svga_get_shader_param(struct pipe_screen *screen, unsigned shader, en
>        case PIPE_SHADER_CAP_DOUBLES:
>        case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
>        case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
> +      case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
>           return 0;
>        }
>        /* If we get here, we failed to handle a cap above */
> diff --git a/src/gallium/drivers/vc4/vc4_screen.c b/src/gallium/drivers/vc4/vc4_screen.c
> index 7c62847..0be8ec2 100644
> --- a/src/gallium/drivers/vc4/vc4_screen.c
> +++ b/src/gallium/drivers/vc4/vc4_screen.c
> @@ -319,6 +319,7 @@ vc4_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
>          case PIPE_SHADER_CAP_DOUBLES:
>          case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
>          case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
> +        case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
>                  return 0;
>          case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
>          case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
> diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h
> index a8ffe9c..67f48e4 100644
> --- a/src/gallium/include/pipe/p_defines.h
> +++ b/src/gallium/include/pipe/p_defines.h
> @@ -644,6 +644,7 @@ enum pipe_shader_cap
>     PIPE_SHADER_CAP_DOUBLES,
>     PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED, /* all rounding modes */
>     PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED,
> +   PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED,
>  };
>  
>  /**
> diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h
> index 95ac590..a64686a 100644
> --- a/src/gallium/include/pipe/p_shader_tokens.h
> +++ b/src/gallium/include/pipe/p_shader_tokens.h
> @@ -306,7 +306,7 @@ struct tgsi_property_data {
>  #define TGSI_OPCODE_MAD                 16
>  #define TGSI_OPCODE_SUB                 17
>  #define TGSI_OPCODE_LRP                 18
> -                                /* gap */
> +#define TGSI_OPCODE_FMA                 19
>  #define TGSI_OPCODE_SQRT                20
>  #define TGSI_OPCODE_DP2A                21
>                                  /* gap */
> diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> index b305507..dbd2070 100644
> --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> @@ -332,6 +332,7 @@ public:
>     int glsl_version;
>     bool native_integers;
>     bool have_sqrt;
> +   bool have_fma;
>  
>     variable_storage *find_variable_storage(ir_variable *var);
>  
> @@ -2222,10 +2223,11 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
>        emit(ir, TGSI_OPCODE_IMUL_HI, result_dst, op[0], op[1]);
>        break;
>     case ir_triop_fma:
> -      /* NOTE: Perhaps there should be a special opcode that enforces fused
> -       * mul-add. Just use MAD for now.
> -       */
> -      emit(ir, TGSI_OPCODE_MAD, result_dst, op[0], op[1], op[2]);
> +      /* In theory, MAD is incorrect here. */
> +      if (have_fma)
> +         emit(ir, TGSI_OPCODE_FMA, result_dst, op[0], op[1], op[2]);
> +      else
> +         emit(ir, TGSI_OPCODE_MAD, result_dst, op[0], op[1], op[2]);
>        break;
>     case ir_unop_interpolate_at_centroid:
>        emit(ir, TGSI_OPCODE_INTERP_CENTROID, result_dst, op[0]);
> @@ -5564,6 +5566,8 @@ get_mesa_program(struct gl_context *ctx,
>  
>     v->have_sqrt = pscreen->get_shader_param(pscreen, ptarget,
>                                              PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED);
> +   v->have_fma = pscreen->get_shader_param(pscreen, ptarget,
> +                                           PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED);
>  
>     _mesa_copy_linked_program_data(shader->Stage, shader_program, prog);
>     _mesa_generate_parameters_list_for_uniforms(shader_program, shader,
>