[Mesa-dev] [PATCH 4/8] gallium: remove TGSI opcode DP2A

Sun Aug 20 00:49:10 UTC 2017

From: Marek Olšák <marek.olsak at amd.com>

use DP3 instead.
---
 src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c | 29 ------------
 src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c    |  3 --
 src/gallium/auxiliary/nir/tgsi_to_nir.c            | 14 ------
 src/gallium/auxiliary/tgsi/tgsi_exec.c             | 29 ------------
 src/gallium/auxiliary/tgsi/tgsi_info.c             |  2 +-
 src/gallium/auxiliary/tgsi/tgsi_lowering.c         | 30 +------------
 src/gallium/auxiliary/tgsi/tgsi_lowering.h         |  1 -
 src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h       |  1 -
 src/gallium/auxiliary/tgsi/tgsi_util.c             |  4 --
 src/gallium/docs/source/tgsi.rst                   | 13 ------
 src/gallium/drivers/etnaviv/etnaviv_compiler.c     |  1 -
 src/gallium/drivers/r300/r300_tgsi_to_rc.c         |  1 -
 src/gallium/drivers/r600/r600_shader.c             |  6 +--
 src/gallium/drivers/svga/svga_tgsi_insn.c          |  1 -
 src/gallium/drivers/svga/svga_tgsi_vgpu10.c        | 51 ----------------------
 src/gallium/include/pipe/p_shader_tokens.h         |  2 +-
 src/mesa/state_tracker/st_atifs_to_tgsi.c          |  1 -
 17 files changed, 6 insertions(+), 183 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
index dc6568a..0319b88 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
@@ -152,48 +152,20 @@ dp2_emit(
                                     emit_data->args[3] /* src1.y */);
    emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base,
                                                     TGSI_OPCODE_ADD, tmp0, tmp1);
 }
 
 static struct lp_build_tgsi_action dp2_action = {
    dp2_fetch_args,	 /* fetch_args */
    dp2_emit	 /* emit */
 };
 
-/* TGSI_OPCODE_DP2A */
-static void
-dp2a_fetch_args(
-   struct lp_build_tgsi_context * bld_base,
-   struct lp_build_emit_data * emit_data)
-{
-   dp_fetch_args(bld_base, emit_data, 2);
-   emit_data->args[5] = lp_build_emit_fetch(bld_base, emit_data->inst,
-                                            2, TGSI_CHAN_X);
-}
-
-static void
-dp2a_emit(
-   const struct lp_build_tgsi_action * action,
-   struct lp_build_tgsi_context * bld_base,
-   struct lp_build_emit_data * emit_data)
-{
-   LLVMValueRef tmp;
-   tmp = lp_build_emit_llvm(bld_base, TGSI_OPCODE_DP2, emit_data);
-   emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_ADD,
-                                    emit_data->args[5], tmp);
-}
-
-static struct lp_build_tgsi_action dp2a_action = {
-   dp2a_fetch_args,	 /* fetch_args */
-   dp2a_emit	 /* emit */
-};
-
 /* TGSI_OPCODE_DP3 */
 static void
 dp3_fetch_args(
    struct lp_build_tgsi_context * bld_base,
    struct lp_build_emit_data * emit_data)
 {
    dp_fetch_args(bld_base, emit_data, 3);
 }
 
 static void
@@ -1279,21 +1251,20 @@ u642d_emit(
                       emit_data->args[0],
                       bld_base->dbl_bld.vec_type, "");
 }
 
 void
 lp_set_default_actions(struct lp_build_tgsi_context * bld_base)
 {
    bld_base->op_actions[TGSI_OPCODE_DP2] = dp2_action;
    bld_base->op_actions[TGSI_OPCODE_DP3] = dp3_action;
    bld_base->op_actions[TGSI_OPCODE_DP4] = dp4_action;
-   bld_base->op_actions[TGSI_OPCODE_DP2A] = dp2a_action;
    bld_base->op_actions[TGSI_OPCODE_DPH] = dph_action;
    bld_base->op_actions[TGSI_OPCODE_DST] = dst_action;
    bld_base->op_actions[TGSI_OPCODE_EXP] = exp_action;
    bld_base->op_actions[TGSI_OPCODE_LIT] = lit_action;
    bld_base->op_actions[TGSI_OPCODE_LOG] = log_action;
    bld_base->op_actions[TGSI_OPCODE_PK2H] = pk2h_action;
    bld_base->op_actions[TGSI_OPCODE_RSQ] = rsq_action;
    bld_base->op_actions[TGSI_OPCODE_SQRT] = sqrt_action;
    bld_base->op_actions[TGSI_OPCODE_POW] = pow_action;
    bld_base->op_actions[TGSI_OPCODE_SCS] = scs_action;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
index 92ecb43..3cc5079 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
@@ -547,23 +547,20 @@ lp_emit_instruction_aos(
 
    case TGSI_OPCODE_LRP:
       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
       src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
       tmp0 = lp_build_sub(&bld->bld_base.base, src1, src2);
       tmp0 = lp_build_mul(&bld->bld_base.base, src0, tmp0);
       dst0 = lp_build_add(&bld->bld_base.base, tmp0, src2);
       break;
 
-   case TGSI_OPCODE_DP2A:
-      return FALSE;
-
    case TGSI_OPCODE_FRC:
       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
       tmp0 = lp_build_floor(&bld->bld_base.base, src0);
       dst0 = lp_build_sub(&bld->bld_base.base, src0, tmp0);
       break;
 
    case TGSI_OPCODE_FLR:
       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
       dst0 = lp_build_floor(&bld->bld_base.base, src0);
       break;
diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c
index 46238a1..3daa896 100644
--- a/src/gallium/auxiliary/nir/tgsi_to_nir.c
+++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c
@@ -994,29 +994,20 @@ ttn_xpd(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
                                           ttn_swizzle(b, src[0], Y, Z, X, X),
                                           ttn_swizzle(b, src[1], Z, X, Y, X)),
                                  nir_fmul(b,
                                           ttn_swizzle(b, src[1], Y, Z, X, X),
                                           ttn_swizzle(b, src[0], Z, X, Y, X))),
                         TGSI_WRITEMASK_XYZ);
    ttn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), TGSI_WRITEMASK_W);
 }
 
 static void
-ttn_dp2a(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
-{
-   ttn_move_dest(b, dest,
-                 ttn_channel(b, nir_fadd(b, nir_fdot2(b, src[0], src[1]),
-                                         src[2]),
-                             X));
-}
-
-static void
 ttn_dp2(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
 {
    ttn_move_dest(b, dest, nir_fdot2(b, src[0], src[1]));
 }
 
 static void
 ttn_dp3(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
 {
    ttn_move_dest(b, dest, nir_fdot3(b, src[0], src[1]));
 }
@@ -1529,21 +1520,20 @@ static const nir_op op_trans[TGSI_OPCODE_LAST] = {
    [TGSI_OPCODE_DP3] = 0,
    [TGSI_OPCODE_DP4] = 0,
    [TGSI_OPCODE_DST] = 0,
    [TGSI_OPCODE_MIN] = nir_op_fmin,
    [TGSI_OPCODE_MAX] = nir_op_fmax,
    [TGSI_OPCODE_SLT] = nir_op_slt,
    [TGSI_OPCODE_SGE] = nir_op_sge,
    [TGSI_OPCODE_MAD] = nir_op_ffma,
    [TGSI_OPCODE_LRP] = 0,
    [TGSI_OPCODE_SQRT] = nir_op_fsqrt,
-   [TGSI_OPCODE_DP2A] = 0,
    [TGSI_OPCODE_FRC] = nir_op_ffract,
    [TGSI_OPCODE_FLR] = nir_op_ffloor,
    [TGSI_OPCODE_ROUND] = nir_op_fround_even,
    [TGSI_OPCODE_EX2] = nir_op_fexp2,
    [TGSI_OPCODE_LG2] = nir_op_flog2,
    [TGSI_OPCODE_POW] = nir_op_fpow,
    [TGSI_OPCODE_XPD] = 0,
    [TGSI_OPCODE_DPH] = 0,
    [TGSI_OPCODE_COS] = nir_op_fcos,
    [TGSI_OPCODE_DDX] = nir_op_fddx,
@@ -1766,24 +1756,20 @@ ttn_emit_instruction(struct ttn_compile *c)
       break;
 
    case TGSI_OPCODE_DP3:
       ttn_dp3(b, op_trans[tgsi_op], dest, src);
       break;
 
    case TGSI_OPCODE_DP4:
       ttn_dp4(b, op_trans[tgsi_op], dest, src);
       break;
 
-   case TGSI_OPCODE_DP2A:
-      ttn_dp2a(b, op_trans[tgsi_op], dest, src);
-      break;
-
    case TGSI_OPCODE_DPH:
       ttn_dph(b, op_trans[tgsi_op], dest, src);
       break;
 
    case TGSI_OPCODE_UMAD:
       ttn_umad(b, op_trans[tgsi_op], dest, src);
       break;
 
    case TGSI_OPCODE_LRP:
       ttn_move_dest(b, dest, nir_flrp(b, src[2], src[1], src[0]));
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index cc3e232..34a4af6 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -3178,45 +3178,20 @@ exec_dp4(struct tgsi_exec_machine *mach,
    }
 
    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
          store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
       }
    }
 }
 
 static void
-exec_dp2a(struct tgsi_exec_machine *mach,
-          const struct tgsi_full_instruction *inst)
-{
-   unsigned int chan;
-   union tgsi_exec_channel arg[3];
-
-   fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
-   fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
-   micro_mul(&arg[2], &arg[0], &arg[1]);
-
-   fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
-   fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
-   micro_mad(&arg[0], &arg[0], &arg[1], &arg[2]);
-
-   fetch_source(mach, &arg[1], &inst->Src[2], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
-   micro_add(&arg[0], &arg[0], &arg[1]);
-
-   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
-      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
-         store_dest(mach, &arg[0], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
-      }
-   }
-}
-
-static void
 exec_dph(struct tgsi_exec_machine *mach,
          const struct tgsi_full_instruction *inst)
 {
    unsigned int chan;
    union tgsi_exec_channel arg[3];
 
    fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
    fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
    micro_mul(&arg[2], &arg[0], &arg[1]);
 
@@ -5176,24 +5151,20 @@ exec_instruction(
       break;
 
    case TGSI_OPCODE_LRP:
       exec_vector_trinary(mach, inst, micro_lrp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
       break;
 
    case TGSI_OPCODE_SQRT:
       exec_scalar_unary(mach, inst, micro_sqrt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
       break;
 
-   case TGSI_OPCODE_DP2A:
-      exec_dp2a(mach, inst);
-      break;
-
    case TGSI_OPCODE_FRC:
       exec_vector_unary(mach, inst, micro_frc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
       break;
 
    case TGSI_OPCODE_FLR:
       exec_vector_unary(mach, inst, micro_flr, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
       break;
 
    case TGSI_OPCODE_ROUND:
       exec_vector_unary(mach, inst, micro_rnd, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c
index 8450cd7..c31705a 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_info.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_info.c
@@ -51,21 +51,21 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] =
    { 1, 2, 0, 0, 0, 0, 0, CHAN, "DST", TGSI_OPCODE_DST },
    { 1, 2, 0, 0, 0, 0, 0, COMP, "MIN", TGSI_OPCODE_MIN },
    { 1, 2, 0, 0, 0, 0, 0, COMP, "MAX", TGSI_OPCODE_MAX },
    { 1, 2, 0, 0, 0, 0, 0, COMP, "SLT", TGSI_OPCODE_SLT },
    { 1, 2, 0, 0, 0, 0, 0, COMP, "SGE", TGSI_OPCODE_SGE },
    { 1, 3, 0, 0, 0, 0, 0, COMP, "MAD", TGSI_OPCODE_MAD },
    { 1, 2, 1, 0, 0, 0, 0, OTHR, "TEX_LZ", TGSI_OPCODE_TEX_LZ },
    { 1, 3, 0, 0, 0, 0, 0, COMP, "LRP", TGSI_OPCODE_LRP },
    { 1, 3, 0, 0, 0, 0, 0, COMP, "FMA", TGSI_OPCODE_FMA },
    { 1, 1, 0, 0, 0, 0, 0, REPL, "SQRT", TGSI_OPCODE_SQRT },
-   { 1, 3, 0, 0, 0, 0, 0, REPL, "DP2A", TGSI_OPCODE_DP2A },
+   { 1, 3, 0, 0, 0, 0, 0, REPL, "", 21 }, /* removed */
    { 1, 1, 0, 0, 0, 0, 0, COMP, "F2U64", TGSI_OPCODE_F2U64 },
    { 1, 1, 0, 0, 0, 0, 0, COMP, "F2I64", TGSI_OPCODE_F2I64 },
    { 1, 1, 0, 0, 0, 0, 0, COMP, "FRC", TGSI_OPCODE_FRC },
    { 1, 2, 1, 0, 0, 0, 0, OTHR, "TXF_LZ", TGSI_OPCODE_TXF_LZ },
    { 1, 1, 0, 0, 0, 0, 0, COMP, "FLR", TGSI_OPCODE_FLR },
    { 1, 1, 0, 0, 0, 0, 0, COMP, "ROUND", TGSI_OPCODE_ROUND },
    { 1, 1, 0, 0, 0, 0, 0, REPL, "EX2", TGSI_OPCODE_EX2 },
    { 1, 1, 0, 0, 0, 0, 0, REPL, "LG2", TGSI_OPCODE_LG2 },
    { 1, 2, 0, 0, 0, 0, 0, REPL, "POW", TGSI_OPCODE_POW },
    { 1, 2, 0, 0, 0, 0, 0, COMP, "XPD", TGSI_OPCODE_XPD },
diff --git a/src/gallium/auxiliary/tgsi/tgsi_lowering.c b/src/gallium/auxiliary/tgsi/tgsi_lowering.c
index c26c13b..3013a41 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_lowering.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_lowering.c
@@ -913,59 +913,52 @@ transform_log(struct tgsi_transform_context *tctx,
  *
  * DP3 - 3-component Dot Product
  *   dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z
  *
  * DPH - Homogeneous Dot Product
  *   dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src1.w
  *
  * DP2 - 2-component Dot Product
  *   dst = src0.x \times src1.x + src0.y \times src1.y
  *
- * DP2A - 2-component Dot Product And Add
- *   dst = src0.x \times src1.x + src0.y \times src1.y + src2.x
- *
  * NOTE: these are translated into sequence of MUL/MAD(/ADD) scalar
  * operations, which is what you'd prefer for a ISA that is natively
  * scalar.  Probably a native vector ISA would at least already have
  * DP4/DP3 instructions, but perhaps there is room for an alternative
- * translation for DPH/DP2/DP2A using vector instructions.
+ * translation for DPH/DP2 using vector instructions.
  *
  * ; needs: 1 tmp
  * MUL tmpA.x, src0.x, src1.x
  * MAD tmpA.x, src0.y, src1.y, tmpA.x
  * if (DPH || DP3 || DP4) {
  *   MAD tmpA.x, src0.z, src1.z, tmpA.x
  *   if (DPH) {
  *     ADD tmpA.x, src1.w, tmpA.x
  *   } else if (DP4) {
  *     MAD tmpA.x, src0.w, src1.w, tmpA.x
  *   }
- * } else if (DP2A) {
- *   ADD tmpA.x, src2.x, tmpA.x
  * }
  * ; fixup last instruction to replicate into dst
  */
 #define DP4_GROW  (NINST(2) + NINST(3) + NINST(3) + NINST(3) - OINST(2))
 #define DP3_GROW  (NINST(2) + NINST(3) + NINST(3) - OINST(2))
 #define DPH_GROW  (NINST(2) + NINST(3) + NINST(3) + NINST(2) - OINST(2))
 #define DP2_GROW  (NINST(2) + NINST(3) - OINST(2))
-#define DP2A_GROW (NINST(2) + NINST(3) + NINST(2) - OINST(3))
 #define DOTP_TMP  1
 static void
 transform_dotp(struct tgsi_transform_context *tctx,
                struct tgsi_full_instruction *inst)
 {
    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
    struct tgsi_full_dst_register *dst  = &inst->Dst[0];
    struct tgsi_full_src_register *src0 = &inst->Src[0];
    struct tgsi_full_src_register *src1 = &inst->Src[1];
-   struct tgsi_full_src_register *src2 = &inst->Src[2]; /* only DP2A */
    struct tgsi_full_instruction new_inst;
    unsigned opcode = inst->Instruction.Opcode;
 
    /* NOTE: any potential last instruction must replicate src on all
     * components (since it could be re-written to write to final dst)
     */
 
    if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
       /* MUL tmpA.x, src0.x, src1.x */
       new_inst = tgsi_default_full_instruction();
@@ -1019,31 +1012,20 @@ transform_dotp(struct tgsi_transform_context *tctx,
             /* MAD tmpA.x, src0.w, src1.w, tmpA.x */
             new_inst = tgsi_default_full_instruction();
             new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
             new_inst.Instruction.NumDstRegs = 1;
             reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
             new_inst.Instruction.NumSrcRegs = 3;
             reg_src(&new_inst.Src[0], src0, SWIZ(W, W, W, W));
             reg_src(&new_inst.Src[1], src1, SWIZ(W, W, W, W));
             reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X));
          }
-      } else if (opcode == TGSI_OPCODE_DP2A) {
-         tctx->emit_instruction(tctx, &new_inst);
-
-         /* ADD tmpA.x, src2.x, tmpA.x */
-         new_inst = tgsi_default_full_instruction();
-         new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
-         new_inst.Instruction.NumDstRegs = 1;
-         reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
-         new_inst.Instruction.NumSrcRegs = 2;
-         reg_src(&new_inst.Src[0], src2, SWIZ(X, X, X, X));
-         reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, X, X, X));
       }
 
       /* fixup last instruction to write to dst: */
       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
 
       tctx->emit_instruction(tctx, &new_inst);
    }
 }
 
 /* FLR - floor, CEIL - ceil
@@ -1555,25 +1537,20 @@ transform_instr(struct tgsi_transform_context *tctx,
    case TGSI_OPCODE_DPH:
       if (!ctx->config->lower_DPH)
          goto skip;
       transform_dotp(tctx, inst);
       break;
    case TGSI_OPCODE_DP2:
       if (!ctx->config->lower_DP2)
          goto skip;
       transform_dotp(tctx, inst);
       break;
-   case TGSI_OPCODE_DP2A:
-      if (!ctx->config->lower_DP2A)
-         goto skip;
-      transform_dotp(tctx, inst);
-      break;
    case TGSI_OPCODE_FLR:
       if (!ctx->config->lower_FLR)
          goto skip;
       transform_flr_ceil(tctx, inst);
       break;
    case TGSI_OPCODE_CEIL:
       if (!ctx->config->lower_CEIL)
          goto skip;
       transform_flr_ceil(tctx, inst);
       break;
@@ -1650,21 +1627,20 @@ tgsi_transform_lowering(const struct tgsi_lowering_config *config,
          OPCS(LRP) ||
          OPCS(FRC) ||
          OPCS(POW) ||
          OPCS(LIT) ||
          OPCS(EXP) ||
          OPCS(LOG) ||
          OPCS(DP4) ||
          OPCS(DP3) ||
          OPCS(DPH) ||
          OPCS(DP2) ||
-         OPCS(DP2A) ||
          OPCS(FLR) ||
          OPCS(CEIL) ||
          OPCS(TRUNC) ||
          OPCS(TXP) ||
          ctx.two_side_colors ||
          ctx.saturate))
       return NULL;
 
 #if 0  /* debug */
    _debug_printf("BEFORE:");
@@ -1718,24 +1694,20 @@ tgsi_transform_lowering(const struct tgsi_lowering_config *config,
       numtmp = MAX2(numtmp, DOTP_TMP);
    }
    if (OPCS(DPH)) {
       newlen += DPH_GROW * OPCS(DPH);
       numtmp = MAX2(numtmp, DOTP_TMP);
    }
    if (OPCS(DP2)) {
       newlen += DP2_GROW * OPCS(DP2);
       numtmp = MAX2(numtmp, DOTP_TMP);
    }
-   if (OPCS(DP2A)) {
-      newlen += DP2A_GROW * OPCS(DP2A);
-      numtmp = MAX2(numtmp, DOTP_TMP);
-   }
    if (OPCS(FLR)) {
       newlen += FLR_GROW * OPCS(FLR);
       numtmp = MAX2(numtmp, FLR_TMP);
    }
    if (OPCS(CEIL)) {
       newlen += CEIL_GROW * OPCS(CEIL);
       numtmp = MAX2(numtmp, CEIL_TMP);
    }
    if (OPCS(TRUNC)) {
       newlen += TRUNC_GROW * OPCS(TRUNC);
diff --git a/src/gallium/auxiliary/tgsi/tgsi_lowering.h b/src/gallium/auxiliary/tgsi/tgsi_lowering.h
index 20e4f84..85e4b8e 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_lowering.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_lowering.h
@@ -60,21 +60,20 @@ struct tgsi_lowering_config
    unsigned lower_LRP:1;
    unsigned lower_FRC:1;
    unsigned lower_POW:1;
    unsigned lower_LIT:1;
    unsigned lower_EXP:1;
    unsigned lower_LOG:1;
    unsigned lower_DP4:1;
    unsigned lower_DP3:1;
    unsigned lower_DPH:1;
    unsigned lower_DP2:1;
-   unsigned lower_DP2A:1;
    unsigned lower_FLR:1;
    unsigned lower_CEIL:1;
    unsigned lower_TRUNC:1;
 
    /* bitmask of (1 << TGSI_TEXTURE_type): */
    unsigned lower_TXP;
 
    /* To emulate certain texture wrap modes, this can be used
     * to saturate the specified tex coord to [0.0, 1.0].  The
     * bits are according to sampler #, ie. if, for example:
diff --git a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
index f244db6..e4cbdae 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
@@ -49,21 +49,20 @@ OP12(DP3)
 OP12(DP4)
 OP12(DST)
 OP12(MIN)
 OP12(MAX)
 OP12(SLT)
 OP12(SGE)
 OP13(MAD)
 OP12_TEX(TEX_LZ)
 OP13(LRP)
 OP11(SQRT)
-OP13(DP2A)
 OP11(FRC)
 OP12_TEX(TXF_LZ)
 OP11(FLR)
 OP11(ROUND)
 OP11(EX2)
 OP11(LG2)
 OP12(POW)
 OP12(XPD)
 OP12(DPH)
 OP11(COS)
diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.c b/src/gallium/auxiliary/tgsi/tgsi_util.c
index 932545c..fc61351 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_util.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_util.c
@@ -255,24 +255,20 @@ tgsi_util_get_inst_usage_mask(const struct tgsi_full_instruction *inst,
 
    case TGSI_OPCODE_SCS:
       read_mask = write_mask & TGSI_WRITEMASK_XY ? TGSI_WRITEMASK_X : 0;
       break;
 
    case TGSI_OPCODE_EXP:
    case TGSI_OPCODE_LOG:
       read_mask = write_mask & TGSI_WRITEMASK_XYZ ? TGSI_WRITEMASK_X : 0;
       break;
 
-   case TGSI_OPCODE_DP2A:
-      read_mask = src_idx == 2 ? TGSI_WRITEMASK_X : TGSI_WRITEMASK_XY;
-      break;
-
    case TGSI_OPCODE_DP2:
       read_mask = TGSI_WRITEMASK_XY;
       break;
 
    case TGSI_OPCODE_DP3:
       read_mask = TGSI_WRITEMASK_XYZ;
       break;
 
    case TGSI_OPCODE_DP4:
       read_mask = TGSI_WRITEMASK_XYZW;
diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst
index 20749a1..6602162 100644
--- a/src/gallium/docs/source/tgsi.rst
+++ b/src/gallium/docs/source/tgsi.rst
@@ -278,33 +278,20 @@ Perform a * b + c with no intermediate rounding step.
 
   dst.x = src0.x \times src1.x + src2.x
 
   dst.y = src0.y \times src1.y + src2.y
 
   dst.z = src0.z \times src1.z + src2.z
 
   dst.w = src0.w \times src1.w + src2.w
 
 
-.. opcode:: DP2A - 2-component Dot Product And Add
-
-.. math::
-
-  dst.x = src0.x \times src1.x + src0.y \times src1.y + src2.x
-
-  dst.y = src0.x \times src1.x + src0.y \times src1.y + src2.x
-
-  dst.z = src0.x \times src1.x + src0.y \times src1.y + src2.x
-
-  dst.w = src0.x \times src1.x + src0.y \times src1.y + src2.x
-
-
 .. opcode:: FRC - Fraction
 
 .. math::
 
   dst.x = src.x - \lfloor src.x\rfloor
 
   dst.y = src.y - \lfloor src.y\rfloor
 
   dst.z = src.z - \lfloor src.z\rfloor
 
diff --git a/src/gallium/drivers/etnaviv/etnaviv_compiler.c b/src/gallium/drivers/etnaviv/etnaviv_compiler.c
index f65a168..88f204c 100644
--- a/src/gallium/drivers/etnaviv/etnaviv_compiler.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_compiler.c
@@ -2337,21 +2337,20 @@ etna_compile_shader(struct etna_shader_variant *v)
    const struct etna_specs *specs = v->shader->specs;
 
    struct tgsi_lowering_config lconfig = {
       .lower_SCS = specs->has_sin_cos_sqrt,
       .lower_FLR = !specs->has_sign_floor_ceil,
       .lower_CEIL = !specs->has_sign_floor_ceil,
       .lower_POW = true,
       .lower_EXP = true,
       .lower_LOG = true,
       .lower_DP2 = true,
-      .lower_DP2A = true,
       .lower_TRUNC = true,
       .lower_XPD = true
    };
 
    c = CALLOC_STRUCT(etna_compile);
    if (!c)
       return false;
 
    memset(&c->lbl_usage, -1, sizeof(c->lbl_usage));
 
diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
index a458330..cc0ac48 100644
--- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c
+++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
@@ -44,21 +44,20 @@ static unsigned translate_opcode(unsigned opcode)
         case TGSI_OPCODE_ADD: return RC_OPCODE_ADD;
         case TGSI_OPCODE_DP3: return RC_OPCODE_DP3;
         case TGSI_OPCODE_DP4: return RC_OPCODE_DP4;
         case TGSI_OPCODE_DST: return RC_OPCODE_DST;
         case TGSI_OPCODE_MIN: return RC_OPCODE_MIN;
         case TGSI_OPCODE_MAX: return RC_OPCODE_MAX;
         case TGSI_OPCODE_SLT: return RC_OPCODE_SLT;
         case TGSI_OPCODE_SGE: return RC_OPCODE_SGE;
         case TGSI_OPCODE_MAD: return RC_OPCODE_MAD;
         case TGSI_OPCODE_LRP: return RC_OPCODE_LRP;
-     /* case TGSI_OPCODE_DP2A: return RC_OPCODE_DP2A; */
         case TGSI_OPCODE_FRC: return RC_OPCODE_FRC;
         case TGSI_OPCODE_FLR: return RC_OPCODE_FLR;
         case TGSI_OPCODE_ROUND: return RC_OPCODE_ROUND;
         case TGSI_OPCODE_EX2: return RC_OPCODE_EX2;
         case TGSI_OPCODE_LG2: return RC_OPCODE_LG2;
         case TGSI_OPCODE_POW: return RC_OPCODE_POW;
         case TGSI_OPCODE_XPD: return RC_OPCODE_XPD;
         case TGSI_OPCODE_DPH: return RC_OPCODE_DPH;
         case TGSI_OPCODE_COS: return RC_OPCODE_COS;
         case TGSI_OPCODE_DDX: return RC_OPCODE_DDX;
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index b49ecba..d3728fb 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -9082,21 +9082,21 @@ static const struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[]
 	[TGSI_OPCODE_DP4]	= { ALU_OP2_DOT4_IEEE, tgsi_dp},
 	[TGSI_OPCODE_DST]	= { ALU_OP0_NOP, tgsi_opdst},
 	[TGSI_OPCODE_MIN]	= { ALU_OP2_MIN, tgsi_op2},
 	[TGSI_OPCODE_MAX]	= { ALU_OP2_MAX, tgsi_op2},
 	[TGSI_OPCODE_SLT]	= { ALU_OP2_SETGT, tgsi_op2_swap},
 	[TGSI_OPCODE_SGE]	= { ALU_OP2_SETGE, tgsi_op2},
 	[TGSI_OPCODE_MAD]	= { ALU_OP3_MULADD_IEEE, tgsi_op3},
 	[TGSI_OPCODE_LRP]	= { ALU_OP0_NOP, tgsi_lrp},
 	[TGSI_OPCODE_FMA]	= { ALU_OP0_NOP, tgsi_unsupported},
 	[TGSI_OPCODE_SQRT]	= { ALU_OP1_SQRT_IEEE, tgsi_trans_srcx_replicate},
-	[TGSI_OPCODE_DP2A]	= { ALU_OP0_NOP, tgsi_unsupported},
+	[21]	= { ALU_OP0_NOP, tgsi_unsupported},
 	[22]			= { ALU_OP0_NOP, tgsi_unsupported},
 	[23]			= { ALU_OP0_NOP, tgsi_unsupported},
 	[TGSI_OPCODE_FRC]	= { ALU_OP1_FRACT, tgsi_op2},
 	[25]			= { ALU_OP0_NOP, tgsi_unsupported},
 	[TGSI_OPCODE_FLR]	= { ALU_OP1_FLOOR, tgsi_op2},
 	[TGSI_OPCODE_ROUND]	= { ALU_OP1_RNDNE, tgsi_op2},
 	[TGSI_OPCODE_EX2]	= { ALU_OP1_EXP_IEEE, tgsi_trans_srcx_replicate},
 	[TGSI_OPCODE_LG2]	= { ALU_OP1_LOG_IEEE, tgsi_trans_srcx_replicate},
 	[TGSI_OPCODE_POW]	= { ALU_OP0_NOP, tgsi_pow},
 	[TGSI_OPCODE_XPD]	= { ALU_OP0_NOP, tgsi_xpd},
@@ -9280,21 +9280,21 @@ static const struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] =
 	[TGSI_OPCODE_DP4]	= { ALU_OP2_DOT4_IEEE, tgsi_dp},
 	[TGSI_OPCODE_DST]	= { ALU_OP0_NOP, tgsi_opdst},
 	[TGSI_OPCODE_MIN]	= { ALU_OP2_MIN, tgsi_op2},
 	[TGSI_OPCODE_MAX]	= { ALU_OP2_MAX, tgsi_op2},
 	[TGSI_OPCODE_SLT]	= { ALU_OP2_SETGT, tgsi_op2_swap},
 	[TGSI_OPCODE_SGE]	= { ALU_OP2_SETGE, tgsi_op2},
 	[TGSI_OPCODE_MAD]	= { ALU_OP3_MULADD_IEEE, tgsi_op3},
 	[TGSI_OPCODE_LRP]	= { ALU_OP0_NOP, tgsi_lrp},
 	[TGSI_OPCODE_FMA]	= { ALU_OP3_FMA, tgsi_op3},
 	[TGSI_OPCODE_SQRT]	= { ALU_OP1_SQRT_IEEE, tgsi_trans_srcx_replicate},
-	[TGSI_OPCODE_DP2A]	= { ALU_OP0_NOP, tgsi_unsupported},
+	[21]	= { ALU_OP0_NOP, tgsi_unsupported},
 	[22]			= { ALU_OP0_NOP, tgsi_unsupported},
 	[23]			= { ALU_OP0_NOP, tgsi_unsupported},
 	[TGSI_OPCODE_FRC]	= { ALU_OP1_FRACT, tgsi_op2},
 	[25]			= { ALU_OP0_NOP, tgsi_unsupported},
 	[TGSI_OPCODE_FLR]	= { ALU_OP1_FLOOR, tgsi_op2},
 	[TGSI_OPCODE_ROUND]	= { ALU_OP1_RNDNE, tgsi_op2},
 	[TGSI_OPCODE_EX2]	= { ALU_OP1_EXP_IEEE, tgsi_trans_srcx_replicate},
 	[TGSI_OPCODE_LG2]	= { ALU_OP1_LOG_IEEE, tgsi_trans_srcx_replicate},
 	[TGSI_OPCODE_POW]	= { ALU_OP0_NOP, tgsi_pow},
 	[TGSI_OPCODE_XPD]	= { ALU_OP0_NOP, tgsi_xpd},
@@ -9503,21 +9503,21 @@ static const struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] =
 	[TGSI_OPCODE_DP4]	= { ALU_OP2_DOT4_IEEE, tgsi_dp},
 	[TGSI_OPCODE_DST]	= { ALU_OP0_NOP, tgsi_opdst},
 	[TGSI_OPCODE_MIN]	= { ALU_OP2_MIN, tgsi_op2},
 	[TGSI_OPCODE_MAX]	= { ALU_OP2_MAX, tgsi_op2},
 	[TGSI_OPCODE_SLT]	= { ALU_OP2_SETGT, tgsi_op2_swap},
 	[TGSI_OPCODE_SGE]	= { ALU_OP2_SETGE, tgsi_op2},
 	[TGSI_OPCODE_MAD]	= { ALU_OP3_MULADD_IEEE, tgsi_op3},
 	[TGSI_OPCODE_LRP]	= { ALU_OP0_NOP, tgsi_lrp},
 	[TGSI_OPCODE_FMA]	= { ALU_OP3_FMA, tgsi_op3},
 	[TGSI_OPCODE_SQRT]	= { ALU_OP1_SQRT_IEEE, cayman_emit_float_instr},
-	[TGSI_OPCODE_DP2A]	= { ALU_OP0_NOP, tgsi_unsupported},
+	[21]	= { ALU_OP0_NOP, tgsi_unsupported},
 	[22]			= { ALU_OP0_NOP, tgsi_unsupported},
 	[23]			= { ALU_OP0_NOP, tgsi_unsupported},
 	[TGSI_OPCODE_FRC]	= { ALU_OP1_FRACT, tgsi_op2},
 	[25]			= { ALU_OP0_NOP, tgsi_unsupported},
 	[TGSI_OPCODE_FLR]	= { ALU_OP1_FLOOR, tgsi_op2},
 	[TGSI_OPCODE_ROUND]	= { ALU_OP1_RNDNE, tgsi_op2},
 	[TGSI_OPCODE_EX2]	= { ALU_OP1_EXP_IEEE, cayman_emit_float_instr},
 	[TGSI_OPCODE_LG2]	= { ALU_OP1_LOG_IEEE, cayman_emit_float_instr},
 	[TGSI_OPCODE_POW]	= { ALU_OP0_NOP, cayman_pow},
 	[TGSI_OPCODE_XPD]	= { ALU_OP0_NOP, tgsi_xpd},
diff --git a/src/gallium/drivers/svga/svga_tgsi_insn.c b/src/gallium/drivers/svga/svga_tgsi_insn.c
index fc3ec5e..dd29f74 100644
--- a/src/gallium/drivers/svga/svga_tgsi_insn.c
+++ b/src/gallium/drivers/svga/svga_tgsi_insn.c
@@ -37,21 +37,20 @@
 
 static boolean emit_vs_postamble( struct svga_shader_emitter *emit );
 static boolean emit_ps_postamble( struct svga_shader_emitter *emit );
 
 
 static unsigned
 translate_opcode(uint opcode)
 {
    switch (opcode) {
    case TGSI_OPCODE_ADD:        return SVGA3DOP_ADD;
-   case TGSI_OPCODE_DP2A:       return SVGA3DOP_DP2ADD;
    case TGSI_OPCODE_DP3:        return SVGA3DOP_DP3;
    case TGSI_OPCODE_DP4:        return SVGA3DOP_DP4;
    case TGSI_OPCODE_FRC:        return SVGA3DOP_FRC;
    case TGSI_OPCODE_MAD:        return SVGA3DOP_MAD;
    case TGSI_OPCODE_MAX:        return SVGA3DOP_MAX;
    case TGSI_OPCODE_MIN:        return SVGA3DOP_MIN;
    case TGSI_OPCODE_MOV:        return SVGA3DOP_MOV;
    case TGSI_OPCODE_MUL:        return SVGA3DOP_MUL;
    case TGSI_OPCODE_NOP:        return SVGA3DOP_NOP;
    default:
diff --git a/src/gallium/drivers/svga/svga_tgsi_vgpu10.c b/src/gallium/drivers/svga/svga_tgsi_vgpu10.c
index d7ec48e..56afd49 100644
--- a/src/gallium/drivers/svga/svga_tgsi_vgpu10.c
+++ b/src/gallium/drivers/svga/svga_tgsi_vgpu10.c
@@ -3571,69 +3571,20 @@ emit_cmp(struct svga_shader_emitter_v10 *emit,
                         &tmp_src, &inst->Src[1], &inst->Src[2],
                         inst->Instruction.Saturate);
 
    free_temp_indexes(emit);
 
    return TRUE;
 }
 
 
 /**
- * Emit code for TGSI_OPCODE_DP2A instruction.
- */
-static boolean
-emit_dp2a(struct svga_shader_emitter_v10 *emit,
-          const struct tgsi_full_instruction *inst)
-{
-   /* dst.x = src0.x * src1.x + src0.y * src1.y + src2.x
-    * dst.y = src0.x * src1.x + src0.y * src1.y + src2.x
-    * dst.z = src0.x * src1.x + src0.y * src1.y + src2.x
-    * dst.w = src0.x * src1.x + src0.y * src1.y + src2.x
-    * Translate into
-    *   MAD tmp.x, s0.y, s1.y, s2.x
-    *   MAD tmp.x, s0.x, s1.x, tmp.x
-    *   MOV dst.xyzw, tmp.xxxx
-    */
-   unsigned tmp = get_temp_index(emit);
-   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
-   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
-
-   struct tgsi_full_src_register tmp_src_xxxx =
-      scalar_src(&tmp_src, TGSI_SWIZZLE_X);
-   struct tgsi_full_dst_register tmp_dst_x =
-      writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
-
-   struct tgsi_full_src_register src0_xxxx =
-      scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
-   struct tgsi_full_src_register src0_yyyy =
-      scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y);
-   struct tgsi_full_src_register src1_xxxx =
-      scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
-   struct tgsi_full_src_register src1_yyyy =
-      scalar_src(&inst->Src[1], TGSI_SWIZZLE_Y);
-   struct tgsi_full_src_register src2_xxxx =
-      scalar_src(&inst->Src[2], TGSI_SWIZZLE_X);
-
-   emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &tmp_dst_x, &src0_yyyy,
-                        &src1_yyyy, &src2_xxxx, FALSE);
-   emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &tmp_dst_x, &src0_xxxx,
-                        &src1_xxxx, &tmp_src_xxxx, FALSE);
-   emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0],
-                        &tmp_src_xxxx, inst->Instruction.Saturate);
-
-   free_temp_indexes(emit);
-
-   return TRUE;
-}
-
-
-/**
  * Emit code for TGSI_OPCODE_DPH instruction.
  */
 static boolean
 emit_dph(struct svga_shader_emitter_v10 *emit,
          const struct tgsi_full_instruction *inst)
 {
    /*
     * DP3 tmp, s0, s1
     * ADD dst, tmp, s1.wwww
     */
@@ -5754,22 +5705,20 @@ emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit,
       return emit_arl_uarl(emit, inst);
    case TGSI_OPCODE_BGNSUB:
       /* no-op */
       return TRUE;
    case TGSI_OPCODE_CAL:
       return emit_cal(emit, inst);
    case TGSI_OPCODE_CMP:
       return emit_cmp(emit, inst);
    case TGSI_OPCODE_COS:
       return emit_sincos(emit, inst);
-   case TGSI_OPCODE_DP2A:
-      return emit_dp2a(emit, inst);
    case TGSI_OPCODE_DPH:
       return emit_dph(emit, inst);
    case TGSI_OPCODE_DST:
       return emit_dst(emit, inst);
    case TGSI_OPCODE_EX2:
       return emit_ex2(emit, inst);
    case TGSI_OPCODE_EXP:
       return emit_exp(emit, inst);
    case TGSI_OPCODE_IF:
       return emit_if(emit, inst);
diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h
index 9fd8419..5c4af89 100644
--- a/src/gallium/include/pipe/p_shader_tokens.h
+++ b/src/gallium/include/pipe/p_shader_tokens.h
@@ -351,21 +351,21 @@ struct tgsi_property_data {
 #define TGSI_OPCODE_DST                 11
 #define TGSI_OPCODE_MIN                 12
 #define TGSI_OPCODE_MAX                 13
 #define TGSI_OPCODE_SLT                 14
 #define TGSI_OPCODE_SGE                 15
 #define TGSI_OPCODE_MAD                 16
 #define TGSI_OPCODE_TEX_LZ              17
 #define TGSI_OPCODE_LRP                 18
 #define TGSI_OPCODE_FMA                 19
 #define TGSI_OPCODE_SQRT                20
-#define TGSI_OPCODE_DP2A                21
+/* gap */
 #define TGSI_OPCODE_F2U64               22
 #define TGSI_OPCODE_F2I64               23
 #define TGSI_OPCODE_FRC                 24
 #define TGSI_OPCODE_TXF_LZ              25
 #define TGSI_OPCODE_FLR                 26
 #define TGSI_OPCODE_ROUND               27
 #define TGSI_OPCODE_EX2                 28
 #define TGSI_OPCODE_LG2                 29
 #define TGSI_OPCODE_POW                 30
 #define TGSI_OPCODE_XPD                 31
diff --git a/src/mesa/state_tracker/st_atifs_to_tgsi.c b/src/mesa/state_tracker/st_atifs_to_tgsi.c
index 13e013c..2a171b5 100644
--- a/src/mesa/state_tracker/st_atifs_to_tgsi.c
+++ b/src/mesa/state_tracker/st_atifs_to_tgsi.c
@@ -221,21 +221,20 @@ emit_special_inst(struct st_translate *t, const struct instruction_desc *desc,
       src[0] = ureg_src(tmp[0]);
       src[1] = args[0];
       src[2] = args[1];
       ureg_insn(t->ureg, TGSI_OPCODE_CMP, dst, 1, src, 3, 0);
    } else if (!strcmp(desc->name, "CND0")) {
       src[0] = args[2];
       src[1] = args[1];
       src[2] = args[0];
       ureg_insn(t->ureg, TGSI_OPCODE_CMP, dst, 1, src, 3, 0);
    } else if (!strcmp(desc->name, "DOT2_ADD")) {
-      /* note: DP2A is not implemented in most pipe drivers */
       tmp[0] = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI); /* re-purpose a1 */
       src[0] = args[0];
       src[1] = args[1];
       ureg_insn(t->ureg, TGSI_OPCODE_DP2, tmp, 1, src, 2, 0);
       src[0] = ureg_src(tmp[0]);
       src[1] = ureg_scalar(args[2], TGSI_SWIZZLE_Z);
       ureg_insn(t->ureg, TGSI_OPCODE_ADD, dst, 1, src, 2, 0);
    }
 }
 
-- 
2.7.4