Mesa (master): gallium: remove TGSI opcode SCS

Tue Aug 22 14:43:33 UTC 2017

Module: Mesa
Branch: master
Commit: 497506ad93d737db0c75f512626df2ae82c27560
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=497506ad93d737db0c75f512626df2ae82c27560

Author: Marek Olšák <marek.olsak at amd.com>
Date:   Sat Aug 19 20:25:08 2017 +0200

gallium: remove TGSI opcode SCS

use COS+SIN instead.

Reviewed-by: Roland Scheidegger <sroland at vmware.com>
Acked-by: Jose Fonseca <jfonseca at vmware.com>

---

 src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c |  26 -----
 src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c    |   3 -
 src/gallium/auxiliary/nir/tgsi_to_nir.c            |  22 ----
 src/gallium/auxiliary/tgsi/tgsi_exec.c             |  31 -----
 src/gallium/auxiliary/tgsi/tgsi_info.c             |   2 +-
 src/gallium/auxiliary/tgsi/tgsi_lowering.c         |  75 ------------
 src/gallium/auxiliary/tgsi/tgsi_lowering.h         |   1 -
 src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h       |   1 -
 src/gallium/auxiliary/tgsi/tgsi_util.c             |   4 -
 src/gallium/docs/source/tgsi.rst                   |  13 ---
 src/gallium/drivers/etnaviv/etnaviv_compiler.c     |  24 +---
 src/gallium/drivers/i915/i915_fpc_optimize.c       |   1 -
 src/gallium/drivers/i915/i915_fpc_translate.c      |  64 -----------
 .../drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp  |  14 ---
 src/gallium/drivers/nouveau/nv30/nvfx_fragprog.c   |  17 ---
 src/gallium/drivers/nouveau/nv30/nvfx_shader.h     |   1 -
 src/gallium/drivers/r300/r300_tgsi_to_rc.c         |   1 -
 src/gallium/drivers/r600/r600_shader.c             | 127 +--------------------
 src/gallium/drivers/svga/svga_tgsi_insn.c          |  26 -----
 src/gallium/drivers/svga/svga_tgsi_vgpu10.c        |  38 ------
 src/gallium/include/pipe/p_shader_tokens.h         |   2 +-
 src/gallium/state_trackers/nine/nine_shader.c      |  12 +-
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp         |   5 -
 src/mesa/state_tracker/st_mesa_to_tgsi.c           |  11 +-
 24 files changed, 18 insertions(+), 503 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
index ea65daffd2..ce2b927db8 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
@@ -686,31 +686,6 @@ const struct lp_build_tgsi_action sqrt_action = {
    sqrt_emit	 /* emit */
 };
 
-/* TGSI_OPCODE_SCS */
-static void
-scs_emit(
-   const struct lp_build_tgsi_action * action,
-   struct lp_build_tgsi_context * bld_base,
-   struct lp_build_emit_data * emit_data)
-{
-   /* dst.x */
-   emit_data->output[TGSI_CHAN_X] = lp_build_emit_llvm_unary(bld_base,
-                                           TGSI_OPCODE_COS, emit_data->args[0]);
-   /* dst.y */
-   emit_data->output[TGSI_CHAN_Y] = lp_build_emit_llvm_unary(bld_base,
-                                           TGSI_OPCODE_SIN, emit_data->args[0]);
-   /* dst.z */
-   emit_data->output[TGSI_CHAN_Z] = bld_base->base.zero;
-
-   /* dst.w */
-   emit_data->output[TGSI_CHAN_W] = bld_base->base.one;
-}
-
-const struct lp_build_tgsi_action scs_action = {
-   scalar_unary_fetch_args,	 /* fetch_args */
-   scs_emit	 /* emit */
-};
-
 /* TGSI_OPCODE_F2U */
 static void
 f2u_emit(
@@ -1195,7 +1170,6 @@ lp_set_default_actions(struct lp_build_tgsi_context * bld_base)
    bld_base->op_actions[TGSI_OPCODE_RSQ] = rsq_action;
    bld_base->op_actions[TGSI_OPCODE_SQRT] = sqrt_action;
    bld_base->op_actions[TGSI_OPCODE_POW] = pow_action;
-   bld_base->op_actions[TGSI_OPCODE_SCS] = scs_action;
    bld_base->op_actions[TGSI_OPCODE_UP2H] = up2h_action;
 
    bld_base->op_actions[TGSI_OPCODE_SWITCH].fetch_args = scalar_unary_fetch_args;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
index 567ed68927..b76c065e32 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
@@ -719,9 +719,6 @@ lp_emit_instruction_aos(
       dst0 = lp_build_select(&bld->bld_base.base, tmp0, src1, src2);
       break;
 
-   case TGSI_OPCODE_SCS:
-      return FALSE;
-
    case TGSI_OPCODE_TXB:
       dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS);
       break;
diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c
index 88f56ac5d6..733eca0764 100644
--- a/src/gallium/auxiliary/nir/tgsi_to_nir.c
+++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c
@@ -956,23 +956,6 @@ ttn_lit(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
    }
 }
 
-/* SCS - Sine Cosine
- *   dst.x = \cos{src.x}
- *   dst.y = \sin{src.x}
- *   dst.z = 0.0
- *   dst.w = 1.0
- */
-static void
-ttn_scs(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
-{
-   ttn_move_dest_masked(b, dest, nir_fcos(b, ttn_channel(b, src[0], X)),
-                        TGSI_WRITEMASK_X);
-   ttn_move_dest_masked(b, dest, nir_fsin(b, ttn_channel(b, src[0], X)),
-                        TGSI_WRITEMASK_Y);
-   ttn_move_dest_masked(b, dest, nir_imm_float(b, 0.0), TGSI_WRITEMASK_Z);
-   ttn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), TGSI_WRITEMASK_W);
-}
-
 static void
 ttn_sle(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
 {
@@ -1539,7 +1522,6 @@ static const nir_op op_trans[TGSI_OPCODE_LAST] = {
 
    [TGSI_OPCODE_SSG] = nir_op_fsign,
    [TGSI_OPCODE_CMP] = 0,
-   [TGSI_OPCODE_SCS] = 0,
    [TGSI_OPCODE_TXB] = 0,
    [TGSI_OPCODE_DIV] = nir_op_fdiv,
    [TGSI_OPCODE_DP2] = 0,
@@ -1756,10 +1738,6 @@ ttn_emit_instruction(struct ttn_compile *c)
       ttn_ucmp(b, op_trans[tgsi_op], dest, src);
       break;
 
-   case TGSI_OPCODE_SCS:
-      ttn_scs(b, op_trans[tgsi_op], dest, src);
-      break;
-
    case TGSI_OPCODE_SGT:
       ttn_sgt(b, op_trans[tgsi_op], dest, src);
       break;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 3be5bd5dd5..c58ea6ad09 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -3286,33 +3286,6 @@ exec_ucmp(struct tgsi_exec_machine *mach,
 }
 
 static void
-exec_scs(struct tgsi_exec_machine *mach,
-         const struct tgsi_full_instruction *inst)
-{
-   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) {
-      union tgsi_exec_channel arg;
-      union tgsi_exec_channel result;
-
-      fetch_source(mach, &arg, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
-
-      if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
-         micro_cos(&result, &arg);
-         store_dest(mach, &result, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
-      }
-      if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
-         micro_sin(&result, &arg);
-         store_dest(mach, &result, &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
-      }
-   }
-   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
-      store_dest(mach, &ZeroVec, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
-   }
-   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
-      store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
-   }
-}
-
-static void
 exec_dst(struct tgsi_exec_machine *mach,
          const struct tgsi_full_instruction *inst)
 {
@@ -5334,10 +5307,6 @@ exec_instruction(
       exec_vector_trinary(mach, inst, micro_cmp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
       break;
 
-   case TGSI_OPCODE_SCS:
-      exec_scs(mach, inst);
-      break;
-
    case TGSI_OPCODE_DIV:
       exec_vector_binary(mach, inst, micro_div, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
       break;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c
index 0368c457c4..0a82dbb14c 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_info.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_info.c
@@ -104,7 +104,7 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] =
    { 0, 0, 0, 0, 0, 0, 0, NONE, "RET", TGSI_OPCODE_RET },
    { 1, 1, 0, 0, 0, 0, 0, COMP, "SSG", TGSI_OPCODE_SSG },
    { 1, 3, 0, 0, 0, 0, 0, COMP, "CMP", TGSI_OPCODE_CMP },
-   { 1, 1, 0, 0, 0, 0, 0, CHAN, "SCS", TGSI_OPCODE_SCS },
+   { 1, 1, 0, 0, 0, 0, 0, CHAN, "", 67 }, /* removed */
    { 1, 2, 1, 0, 0, 0, 0, OTHR, "TXB", TGSI_OPCODE_TXB },
    { 1, 1, 0, 0, 0, 0, 0, OTHR, "FBFETCH", TGSI_OPCODE_FBFETCH },
    { 1, 2, 0, 0, 0, 0, 0, COMP, "DIV", TGSI_OPCODE_DIV },
diff --git a/src/gallium/auxiliary/tgsi/tgsi_lowering.c b/src/gallium/auxiliary/tgsi/tgsi_lowering.c
index fa9d579f77..bfc3a6bfe3 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_lowering.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_lowering.c
@@ -258,71 +258,6 @@ transform_dst(struct tgsi_transform_context *tctx,
    }
 }
 
-/* SCS - Sine Cosine
- *   dst.x = \cos{src.x}
- *   dst.y = \sin{src.x}
- *   dst.z = 0.0
- *   dst.w = 1.0
- *
- * ; needs: 1 tmp, imm{0.0, 1.0}
- * if (dst.x aliases src.x) {
- *   MOV tmpA.x, src.x
- *   src = tmpA
- * }
- * COS dst.x, src.x
- * SIN dst.y, src.x
- * MOV dst.zw, imm{0.0, 1.0}
- */
-#define SCS_GROW (NINST(1) + NINST(1) + NINST(1) + NINST(1) - OINST(1))
-#define SCS_TMP  1
-static void
-transform_scs(struct tgsi_transform_context *tctx,
-              struct tgsi_full_instruction *inst)
-{
-   struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
-   struct tgsi_full_dst_register *dst = &inst->Dst[0];
-   struct tgsi_full_src_register *src = &inst->Src[0];
-   struct tgsi_full_instruction new_inst;
-
-   if (aliases(dst, TGSI_WRITEMASK_X, src, TGSI_WRITEMASK_X)) {
-      create_mov(tctx, &ctx->tmp[A].dst, src, TGSI_WRITEMASK_X, 0);
-      src = &ctx->tmp[A].src;
-   }
-
-   if (dst->Register.WriteMask & TGSI_WRITEMASK_X) {
-      /* COS dst.x, src.x */
-      new_inst = tgsi_default_full_instruction();
-      new_inst.Instruction.Opcode = TGSI_OPCODE_COS;
-      new_inst.Instruction.NumDstRegs = 1;
-      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_X);
-      new_inst.Instruction.NumSrcRegs = 1;
-      reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
-      tctx->emit_instruction(tctx, &new_inst);
-   }
-
-   if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
-      /* SIN dst.y, src.x */
-      new_inst = tgsi_default_full_instruction();
-      new_inst.Instruction.Opcode = TGSI_OPCODE_SIN;
-      new_inst.Instruction.NumDstRegs = 1;
-      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
-      new_inst.Instruction.NumSrcRegs = 1;
-      reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
-      tctx->emit_instruction(tctx, &new_inst);
-   }
-
-   if (dst->Register.WriteMask & TGSI_WRITEMASK_ZW) {
-      /* MOV dst.zw, imm{0.0, 1.0} */
-      new_inst = tgsi_default_full_instruction();
-      new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
-      new_inst.Instruction.NumDstRegs = 1;
-      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_ZW);
-      new_inst.Instruction.NumSrcRegs = 1;
-      reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_, _, X, Y));
-      tctx->emit_instruction(tctx, &new_inst);
-   }
-}
-
 /* LRP - Linear Interpolate
  *  dst.x = src0.x \times src1.x + (1.0 - src0.x) \times src2.x
  *  dst.y = src0.y \times src1.y + (1.0 - src0.y) \times src2.y
@@ -1407,11 +1342,6 @@ transform_instr(struct tgsi_transform_context *tctx,
          goto skip;
       transform_dst(tctx, inst);
       break;
-   case TGSI_OPCODE_SCS:
-      if (!ctx->config->lower_SCS)
-         goto skip;
-      transform_scs(tctx, inst);
-      break;
    case TGSI_OPCODE_LRP:
       if (!ctx->config->lower_LRP)
          goto skip;
@@ -1535,7 +1465,6 @@ tgsi_transform_lowering(const struct tgsi_lowering_config *config,
 #define OPCS(x) ((config->lower_ ## x) ? info->opcode_count[TGSI_OPCODE_ ## x] : 0)
    /* if there are no instructions to lower, then we are done: */
    if (!(OPCS(DST) ||
-         OPCS(SCS) ||
          OPCS(LRP) ||
          OPCS(FRC) ||
          OPCS(POW) ||
@@ -1564,10 +1493,6 @@ tgsi_transform_lowering(const struct tgsi_lowering_config *config,
       newlen += DST_GROW * OPCS(DST);
       numtmp = MAX2(numtmp, DST_TMP);
    }
-   if (OPCS(SCS)) {
-      newlen += SCS_GROW * OPCS(SCS);
-      numtmp = MAX2(numtmp, SCS_TMP);
-   }
    if (OPCS(LRP)) {
       newlen += LRP_GROW * OPCS(LRP);
       numtmp = MAX2(numtmp, LRP_TMP);
diff --git a/src/gallium/auxiliary/tgsi/tgsi_lowering.h b/src/gallium/auxiliary/tgsi/tgsi_lowering.h
index 709a63a601..fd4c38f016 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_lowering.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_lowering.h
@@ -55,7 +55,6 @@ struct tgsi_lowering_config
     * enable lowering of TGSI_OPCODE_<opc>
     */
    unsigned lower_DST:1;
-   unsigned lower_SCS:1;
    unsigned lower_LRP:1;
    unsigned lower_FRC:1;
    unsigned lower_POW:1;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
index 826bcaf5a0..9a13fa6684 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
@@ -90,7 +90,6 @@ OP00_LBL(CAL)
 OP00(RET)
 OP11(SSG)
 OP13(CMP)
-OP11(SCS)
 OP12_TEX(TXB)
 OP12(DIV)
 OP12(DP2)
diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.c b/src/gallium/auxiliary/tgsi/tgsi_util.c
index 6756bca23d..327e42f740 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_util.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_util.c
@@ -253,10 +253,6 @@ tgsi_util_get_inst_usage_mask(const struct tgsi_full_instruction *inst,
       read_mask = TGSI_WRITEMASK_X;
       break;
 
-   case TGSI_OPCODE_SCS:
-      read_mask = write_mask & TGSI_WRITEMASK_XY ? TGSI_WRITEMASK_X : 0;
-      break;
-
    case TGSI_OPCODE_EXP:
    case TGSI_OPCODE_LOG:
       read_mask = write_mask & TGSI_WRITEMASK_XYZ ? TGSI_WRITEMASK_X : 0;
diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst
index 08166c394b..b148c3c939 100644
--- a/src/gallium/docs/source/tgsi.rst
+++ b/src/gallium/docs/source/tgsi.rst
@@ -651,19 +651,6 @@ This instruction replicates its result.
   Unconditional discard.  Allowed in fragment shaders only.
 
 
-.. opcode:: SCS - Sine Cosine
-
-.. math::
-
-  dst.x = \cos{src.x}
-
-  dst.y = \sin{src.x}
-
-  dst.z = 0
-
-  dst.w = 1
-
-
 .. opcode:: TXB - Texture Lookup With Bias
 
   for cube map array textures and shadow cube maps, the bias value
diff --git a/src/gallium/drivers/etnaviv/etnaviv_compiler.c b/src/gallium/drivers/etnaviv/etnaviv_compiler.c
index 4f09f71cc1..c4ca80fe19 100644
--- a/src/gallium/drivers/etnaviv/etnaviv_compiler.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_compiler.c
@@ -1474,9 +1474,6 @@ trans_trig(const struct instr_translater *t, struct etna_compile *c,
        * - Output an x and y component, which need to be multiplied to
        *   get the result
        */
-      /* TGSI lowering should deal with SCS */
-      assert(inst->Instruction.Opcode != TGSI_OPCODE_SCS);
-
       struct etna_native_reg temp = etna_compile_get_inner_temp(c); /* only using .xyz */
       emit_inst(c, &(struct etna_inst) {
          .opcode = INST_OPCODE_MUL,
@@ -1503,9 +1500,6 @@ trans_trig(const struct instr_translater *t, struct etna_compile *c,
       });
 
    } else if (c->specs->has_sin_cos_sqrt) {
-      /* TGSI lowering should deal with SCS */
-      assert(inst->Instruction.Opcode != TGSI_OPCODE_SCS);
-
       struct etna_native_reg temp = etna_compile_get_inner_temp(c);
       /* add divide by PI/2, using a temp register. GC2000
        * fails with src==dst for the trig instruction. */
@@ -1540,8 +1534,6 @@ trans_trig(const struct instr_translater *t, struct etna_compile *c,
        *  DP3 t.x___, t.xyww, C, void         (for scs)
        *  MAD t._y_w, t,xxzz, |t.xxzz|, -t.xxzz
        *  MAD dst, t.ywyw, .2225, t.xzxz
-       *
-       * TODO: we don't set dst.zw correctly for SCS.
        */
       struct etna_inst *p, ins[9] = { };
       struct etna_native_reg t0 = etna_compile_get_inner_temp(c);
@@ -1597,19 +1589,7 @@ trans_trig(const struct instr_translater *t, struct etna_compile *c,
       ins[4].src[0] = swizzle(t0s, dp3_swiz);
       ins[4].src[1] = swizzle(sincos[0], SWIZZLE(Z, W, W, W));
 
-      if (inst->Instruction.Opcode == TGSI_OPCODE_SCS) {
-         ins[5] = ins[3];
-         ins[6] = ins[4];
-         ins[4].dst.comps = INST_COMPS_X;
-         ins[6].dst.comps = INST_COMPS_Z;
-         ins[5].src[0] = swizzle(t0s, SWIZZLE(W, Z, W, W));
-         ins[6].src[0] = swizzle(t0s, SWIZZLE(Z, Y, W, W));
-         ins[5].src[1] = absolute(ins[5].src[0]);
-         p = &ins[7];
-      } else {
-         p = &ins[5];
-      }
-
+      p = &ins[5];
       p->opcode = INST_OPCODE_MAD;
       p->dst = etna_native_to_dst(t0, INST_COMPS_Y | INST_COMPS_W);
       p->src[0] = swizzle(t0s, SWIZZLE(X, X, Z, Z));
@@ -1809,7 +1789,6 @@ static const struct instr_translater translaters[TGSI_OPCODE_LAST] = {
 
    INSTR(SIN, trans_trig),
    INSTR(COS, trans_trig),
-   INSTR(SCS, trans_trig),
 
    INSTR(SLT, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_LT),
    INSTR(SGE, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_GE),
@@ -2309,7 +2288,6 @@ etna_compile_shader(struct etna_shader_variant *v)
    const struct etna_specs *specs = v->shader->specs;
 
    struct tgsi_lowering_config lconfig = {
-      .lower_SCS = specs->has_sin_cos_sqrt,
       .lower_FLR = !specs->has_sign_floor_ceil,
       .lower_CEIL = !specs->has_sign_floor_ceil,
       .lower_POW = true,
diff --git a/src/gallium/drivers/i915/i915_fpc_optimize.c b/src/gallium/drivers/i915/i915_fpc_optimize.c
index fb97e9ea86..76ef20d84c 100644
--- a/src/gallium/drivers/i915/i915_fpc_optimize.c
+++ b/src/gallium/drivers/i915/i915_fpc_optimize.c
@@ -105,7 +105,6 @@ static const struct {
    [ TGSI_OPCODE_RCP     ] = { false,  false,                  0,  1,  1 },
    [ TGSI_OPCODE_RET     ] = { false,  false,                  0,  0,  0 },
    [ TGSI_OPCODE_RSQ     ] = { false,  false,                  0,  1,  1 },
-   [ TGSI_OPCODE_SCS     ] = { false,  false,                  0,  1,  1 },
    [ TGSI_OPCODE_SEQ     ] = { false,  false,                  0,  1,  2 },
    [ TGSI_OPCODE_SGE     ] = { false,  false,                  0,  1,  2 },
    [ TGSI_OPCODE_SGT     ] = { false,  false,                  0,  1,  2 },
diff --git a/src/gallium/drivers/i915/i915_fpc_translate.c b/src/gallium/drivers/i915/i915_fpc_translate.c
index 2faab338b7..a9601e82ca 100644
--- a/src/gallium/drivers/i915/i915_fpc_translate.c
+++ b/src/gallium/drivers/i915/i915_fpc_translate.c
@@ -803,70 +803,6 @@ i915_translate_instruction(struct i915_fp_compile *p,
                       swizzle(src0, X, X, X, X), 0, 0);
       break;
 
-   case TGSI_OPCODE_SCS:
-      src0 = src_vector(p, &inst->Src[0], fs);
-      tmp = i915_get_utemp(p);
-
-      /* 
-       * t0.xy = MUL x.xx11, x.x1111  ; x^2, x, 1, 1
-       * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x
-       * t1 = MUL t0.xyyw t0.yz11    ; x^7 x^5 x^3 x
-       * scs.x = DP4 t1, scs_sin_constants
-       * t1 = MUL t0.xxz1 t0.z111    ; x^6 x^4 x^2 1
-       * scs.y = DP4 t1, scs_cos_constants
-       */
-      i915_emit_arith(p,
-                      A0_MUL,
-                      tmp, A0_DEST_CHANNEL_XY, 0,
-                      swizzle(src0, X, X, ONE, ONE),
-                      swizzle(src0, X, ONE, ONE, ONE), 0);
-
-      i915_emit_arith(p,
-                      A0_MUL,
-                      tmp, A0_DEST_CHANNEL_ALL, 0,
-                      swizzle(tmp, X, Y, X, Y),
-                      swizzle(tmp, X, X, ONE, ONE), 0);
-
-      writemask = inst->Dst[0].Register.WriteMask;
-
-      if (writemask & TGSI_WRITEMASK_Y) {
-         uint tmp1;
-
-         if (writemask & TGSI_WRITEMASK_X)
-            tmp1 = i915_get_utemp(p);
-         else
-            tmp1 = tmp;
-
-         i915_emit_arith(p,
-                         A0_MUL,
-                         tmp1, A0_DEST_CHANNEL_ALL, 0,
-                         swizzle(tmp, X, Y, Y, W),
-                         swizzle(tmp, X, Z, ONE, ONE), 0);
-
-         i915_emit_arith(p,
-                         A0_DP4,
-                         get_result_vector(p, &inst->Dst[0]),
-                         A0_DEST_CHANNEL_Y, 0,
-                         swizzle(tmp1, W, Z, Y, X),
-                         i915_emit_const4fv(p, scs_sin_constants), 0);
-      }
-
-      if (writemask & TGSI_WRITEMASK_X) {
-         i915_emit_arith(p,
-                         A0_MUL,
-                         tmp, A0_DEST_CHANNEL_XYZ, 0,
-                         swizzle(tmp, X, X, Z, ONE),
-                         swizzle(tmp, Z, ONE, ONE, ONE), 0);
-
-         i915_emit_arith(p,
-                         A0_DP4,
-                         get_result_vector(p, &inst->Dst[0]),
-                         A0_DEST_CHANNEL_X, 0,
-                         swizzle(tmp, ONE, Z, Y, X),
-                         i915_emit_const4fv(p, scs_cos_constants), 0);
-      }
-      break;
-
    case TGSI_OPCODE_SEQ:
       /* if we're both >= and <= then we're == */
       src0 = src_vector(p, &inst->Src[0], fs);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
index a862f985fd..27806057c5 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
@@ -288,7 +288,6 @@ unsigned int Instruction::srcMask(unsigned int s) const
    case TGSI_OPCODE_POW:
    case TGSI_OPCODE_RCP:
    case TGSI_OPCODE_RSQ:
-   case TGSI_OPCODE_SCS:
       return 0x1;
    case TGSI_OPCODE_IF:
    case TGSI_OPCODE_UIF:
@@ -3258,19 +3257,6 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
          mkOp1(op, TYPE_F32, dst0[3], val0);
       }
       break;
-   case TGSI_OPCODE_SCS:
-      if (mask & 3) {
-         val0 = mkOp1v(OP_PRESIN, TYPE_F32, getSSA(), fetchSrc(0, 0));
-         if (dst0[0])
-            mkOp1(OP_COS, TYPE_F32, dst0[0], val0);
-         if (dst0[1])
-            mkOp1(OP_SIN, TYPE_F32, dst0[1], val0);
-      }
-      if (dst0[2])
-         loadImm(dst0[2], 0.0f);
-      if (dst0[3])
-         loadImm(dst0[3], 1.0f);
-      break;
    case TGSI_OPCODE_EXP:
       src0 = fetchSrc(0, 0);
       val0 = mkOp1v(OP_FLOOR, TYPE_F32, getSSA(), src0);
diff --git a/src/gallium/drivers/nouveau/nv30/nvfx_fragprog.c b/src/gallium/drivers/nouveau/nv30/nvfx_fragprog.c
index 7d006fb2dd..86e3599325 100644
--- a/src/gallium/drivers/nouveau/nv30/nvfx_fragprog.c
+++ b/src/gallium/drivers/nouveau/nv30/nvfx_fragprog.c
@@ -689,23 +689,6 @@ nvfx_fragprog_parse_instruction(struct nvfx_fpc *fpc,
          nvfx_fp_emit(fpc, arith(sat, EX2, dst, mask, neg(swz(tmp, X, X, X, X)), none, none));
       }
       break;
-   case TGSI_OPCODE_SCS:
-      /* avoid overwriting the source */
-      if(src[0].swz[NVFX_SWZ_X] != NVFX_SWZ_X)
-      {
-         if (mask & NVFX_FP_MASK_X)
-            nvfx_fp_emit(fpc, arith(sat, COS, dst, NVFX_FP_MASK_X, swz(src[0], X, X, X, X), none, none));
-         if (mask & NVFX_FP_MASK_Y)
-            nvfx_fp_emit(fpc, arith(sat, SIN, dst, NVFX_FP_MASK_Y, swz(src[0], X, X, X, X), none, none));
-      }
-      else
-      {
-         if (mask & NVFX_FP_MASK_Y)
-            nvfx_fp_emit(fpc, arith(sat, SIN, dst, NVFX_FP_MASK_Y, swz(src[0], X, X, X, X), none, none));
-         if (mask & NVFX_FP_MASK_X)
-            nvfx_fp_emit(fpc, arith(sat, COS, dst, NVFX_FP_MASK_X, swz(src[0], X, X, X, X), none, none));
-      }
-      break;
    case TGSI_OPCODE_SEQ:
       nvfx_fp_emit(fpc, arith(sat, SEQ, dst, mask, src[0], src[1], none));
       break;
diff --git a/src/gallium/drivers/nouveau/nv30/nvfx_shader.h b/src/gallium/drivers/nouveau/nv30/nvfx_shader.h
index f196c4fc17..907ca17b0a 100644
--- a/src/gallium/drivers/nouveau/nv30/nvfx_shader.h
+++ b/src/gallium/drivers/nouveau/nv30/nvfx_shader.h
@@ -163,7 +163,6 @@
  *   SUB - ADD, negate second source
  *   RSQ - LG2 + EX2
  *   POW - LG2 + MUL + EX2
- *   SCS - COS + SIN
  *
  * NV40 Looping
  *   Loops appear to be fairly expensive on NV40 at least, the proprietary
diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
index 9b7f108708..3030698989 100644
--- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c
+++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
@@ -82,7 +82,6 @@ static unsigned translate_opcode(unsigned opcode)
      /* case TGSI_OPCODE_RET: return RC_OPCODE_RET; */
         case TGSI_OPCODE_SSG: return RC_OPCODE_SSG;
         case TGSI_OPCODE_CMP: return RC_OPCODE_CMP;
-        case TGSI_OPCODE_SCS: return RC_OPCODE_SCS;
         case TGSI_OPCODE_TXB: return RC_OPCODE_TXB;
      /* case TGSI_OPCODE_DIV: return RC_OPCODE_DIV; */
         case TGSI_OPCODE_DP2: return RC_OPCODE_DP2;
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index e06fb52481..c9c922fc02 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -4602,127 +4602,6 @@ static int tgsi_trig(struct r600_shader_ctx *ctx)
 	return 0;
 }
 
-static int tgsi_scs(struct r600_shader_ctx *ctx)
-{
-	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
-	struct r600_bytecode_alu alu;
-	int i, r;
-
-	/* We'll only need the trig stuff if we are going to write to the
-	 * X or Y components of the destination vector.
-	 */
-	if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) {
-		r = tgsi_setup_trig(ctx);
-		if (r)
-			return r;
-	}
-
-	/* dst.x = COS */
-	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
-		if (ctx->bc->chip_class == CAYMAN) {
-			for (i = 0 ; i < 3; i++) {
-				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
-				alu.op = ALU_OP1_COS;
-				tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
-
-				if (i == 0)
-					alu.dst.write = 1;
-				else
-					alu.dst.write = 0;
-				alu.src[0].sel = ctx->temp_reg;
-				alu.src[0].chan = 0;
-				if (i == 2)
-					alu.last = 1;
-				r = r600_bytecode_add_alu(ctx->bc, &alu);
-				if (r)
-					return r;
-			}
-		} else {
-			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
-			alu.op = ALU_OP1_COS;
-			tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
-
-			alu.src[0].sel = ctx->temp_reg;
-			alu.src[0].chan = 0;
-			alu.last = 1;
-			r = r600_bytecode_add_alu(ctx->bc, &alu);
-			if (r)
-				return r;
-		}
-	}
-
-	/* dst.y = SIN */
-	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
-		if (ctx->bc->chip_class == CAYMAN) {
-			for (i = 0 ; i < 3; i++) {
-				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
-				alu.op = ALU_OP1_SIN;
-				tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
-				if (i == 1)
-					alu.dst.write = 1;
-				else
-					alu.dst.write = 0;
-				alu.src[0].sel = ctx->temp_reg;
-				alu.src[0].chan = 0;
-				if (i == 2)
-					alu.last = 1;
-				r = r600_bytecode_add_alu(ctx->bc, &alu);
-				if (r)
-					return r;
-			}
-		} else {
-			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
-			alu.op = ALU_OP1_SIN;
-			tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
-
-			alu.src[0].sel = ctx->temp_reg;
-			alu.src[0].chan = 0;
-			alu.last = 1;
-			r = r600_bytecode_add_alu(ctx->bc, &alu);
-			if (r)
-				return r;
-		}
-	}
-
-	/* dst.z = 0.0; */
-	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
-		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
-
-		alu.op = ALU_OP1_MOV;
-
-		tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
-
-		alu.src[0].sel = V_SQ_ALU_SRC_0;
-		alu.src[0].chan = 0;
-
-		alu.last = 1;
-
-		r = r600_bytecode_add_alu(ctx->bc, &alu);
-		if (r)
-			return r;
-	}
-
-	/* dst.w = 1.0; */
-	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
-		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
-
-		alu.op = ALU_OP1_MOV;
-
-		tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
-
-		alu.src[0].sel = V_SQ_ALU_SRC_1;
-		alu.src[0].chan = 0;
-
-		alu.last = 1;
-
-		r = r600_bytecode_add_alu(ctx->bc, &alu);
-		if (r)
-			return r;
-	}
-
-	return 0;
-}
-
 static int tgsi_kill(struct r600_shader_ctx *ctx)
 {
 	const struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
@@ -9017,7 +8896,7 @@ static const struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[]
 	[TGSI_OPCODE_RET]	= { ALU_OP0_NOP, tgsi_unsupported},
 	[TGSI_OPCODE_SSG]	= { ALU_OP0_NOP, tgsi_ssg},
 	[TGSI_OPCODE_CMP]	= { ALU_OP0_NOP, tgsi_cmp},
-	[TGSI_OPCODE_SCS]	= { ALU_OP0_NOP, tgsi_scs},
+	[67]			= { ALU_OP0_NOP, tgsi_unsupported},
 	[TGSI_OPCODE_TXB]	= { FETCH_OP_SAMPLE_LB, tgsi_tex},
 	[69]			= { ALU_OP0_NOP, tgsi_unsupported},
 	[TGSI_OPCODE_DIV]	= { ALU_OP0_NOP, tgsi_unsupported},
@@ -9215,7 +9094,7 @@ static const struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] =
 	[TGSI_OPCODE_RET]	= { ALU_OP0_NOP, tgsi_unsupported},
 	[TGSI_OPCODE_SSG]	= { ALU_OP0_NOP, tgsi_ssg},
 	[TGSI_OPCODE_CMP]	= { ALU_OP0_NOP, tgsi_cmp},
-	[TGSI_OPCODE_SCS]	= { ALU_OP0_NOP, tgsi_scs},
+	[67]			= { ALU_OP0_NOP, tgsi_unsupported},
 	[TGSI_OPCODE_TXB]	= { FETCH_OP_SAMPLE_LB, tgsi_tex},
 	[69]			= { ALU_OP0_NOP, tgsi_unsupported},
 	[TGSI_OPCODE_DIV]	= { ALU_OP0_NOP, tgsi_unsupported},
@@ -9438,7 +9317,7 @@ static const struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] =
 	[TGSI_OPCODE_RET]	= { ALU_OP0_NOP, tgsi_unsupported},
 	[TGSI_OPCODE_SSG]	= { ALU_OP0_NOP, tgsi_ssg},
 	[TGSI_OPCODE_CMP]	= { ALU_OP0_NOP, tgsi_cmp},
-	[TGSI_OPCODE_SCS]	= { ALU_OP0_NOP, tgsi_scs},
+	[67]			= { ALU_OP0_NOP, tgsi_unsupported},
 	[TGSI_OPCODE_TXB]	= { FETCH_OP_SAMPLE_LB, tgsi_tex},
 	[69]			= { ALU_OP0_NOP, tgsi_unsupported},
 	[TGSI_OPCODE_DIV]	= { ALU_OP0_NOP, tgsi_unsupported},
diff --git a/src/gallium/drivers/svga/svga_tgsi_insn.c b/src/gallium/drivers/svga/svga_tgsi_insn.c
index a325a567c1..be99ad511b 100644
--- a/src/gallium/drivers/svga/svga_tgsi_insn.c
+++ b/src/gallium/drivers/svga/svga_tgsi_insn.c
@@ -1250,29 +1250,6 @@ do_emit_sincos(struct svga_shader_emitter *emit,
 
 
 /**
- * Translate/emit a TGSI SIN, COS or CSC instruction.
- */
-static boolean
-emit_sincos(struct svga_shader_emitter *emit,
-            const struct tgsi_full_instruction *insn)
-{
-   SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
-   struct src_register src0 = translate_src_register(emit, &insn->Src[0]);
-   SVGA3dShaderDestToken temp = get_temp( emit );
-
-   /* SCS TMP SRC */
-   if (!do_emit_sincos(emit, writemask(temp, TGSI_WRITEMASK_XY), src0 ))
-      return FALSE;
-
-   /* MOV DST TMP */
-   if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src( temp ) ))
-      return FALSE;
-
-   return TRUE;
-}
-
-
-/**
  * Translate TGSI SIN instruction into:
  * SCS TMP SRC
  * MOV DST TMP.yyyy
@@ -2840,9 +2817,6 @@ svga_emit_instruction(struct svga_shader_emitter *emit,
    case TGSI_OPCODE_SIN:
       return emit_sin( emit, insn );
 
-   case TGSI_OPCODE_SCS:
-      return emit_sincos( emit, insn );
-
    case TGSI_OPCODE_END:
       /* TGSI always finishes the main func with an END */
       return emit_end( emit );
diff --git a/src/gallium/drivers/svga/svga_tgsi_vgpu10.c b/src/gallium/drivers/svga/svga_tgsi_vgpu10.c
index d8013570ef..a864616c05 100644
--- a/src/gallium/drivers/svga/svga_tgsi_vgpu10.c
+++ b/src/gallium/drivers/svga/svga_tgsi_vgpu10.c
@@ -4279,42 +4279,6 @@ emit_rsq(struct svga_shader_emitter_v10 *emit,
 
 
 /**
- * Emit code for TGSI_OPCODE_SCS instruction.
- */
-static boolean
-emit_scs(struct svga_shader_emitter_v10 *emit,
-         const struct tgsi_full_instruction *inst)
-{
-   /* dst.x = cos(src.x)
-    * dst.y = sin(src.x)
-    * dst.z = 0.0
-    * dst.w = 1.0
-    */
-   struct tgsi_full_dst_register dst_x =
-      writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_X);
-   struct tgsi_full_dst_register dst_y =
-      writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Y);
-   struct tgsi_full_dst_register dst_zw =
-      writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_ZW);
-
-   struct tgsi_full_src_register zero_one =
-      make_immediate_reg_float4(emit, 0.0f, 0.0f, 0.0f, 1.0f);
-
-   begin_emit_instruction(emit);
-   emit_opcode(emit, VGPU10_OPCODE_SINCOS, inst->Instruction.Saturate);
-   emit_dst_register(emit, &dst_y);
-   emit_dst_register(emit, &dst_x);
-   emit_src_register(emit, &inst->Src[0]);
-   end_emit_instruction(emit);
-
-   emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
-                        &dst_zw, &zero_one, inst->Instruction.Saturate);
-
-   return TRUE;
-}
-
-
-/**
  * Emit code for TGSI_OPCODE_SEQ (Set Equal) instruction.
  */
 static boolean
@@ -5593,8 +5557,6 @@ emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit,
       return emit_rsq(emit, inst);
    case TGSI_OPCODE_SAMPLE:
       return emit_sample(emit, inst);
-   case TGSI_OPCODE_SCS:
-      return emit_scs(emit, inst);
    case TGSI_OPCODE_SEQ:
       return emit_seq(emit, inst);
    case TGSI_OPCODE_SGE:
diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h
index 5cf68db90a..9648f5ae8a 100644
--- a/src/gallium/include/pipe/p_shader_tokens.h
+++ b/src/gallium/include/pipe/p_shader_tokens.h
@@ -404,7 +404,7 @@ struct tgsi_property_data {
 #define TGSI_OPCODE_RET                 64
 #define TGSI_OPCODE_SSG                 65 /* SGN */
 #define TGSI_OPCODE_CMP                 66
-#define TGSI_OPCODE_SCS                 67
+/* gap */
 #define TGSI_OPCODE_TXB                 68
 #define TGSI_OPCODE_FBFETCH             69
 #define TGSI_OPCODE_DIV                 70
diff --git a/src/gallium/state_trackers/nine/nine_shader.c b/src/gallium/state_trackers/nine/nine_shader.c
index 50750cb547..5b60dcbac8 100644
--- a/src/gallium/state_trackers/nine/nine_shader.c
+++ b/src/gallium/state_trackers/nine/nine_shader.c
@@ -1784,13 +1784,17 @@ DECL_SPECIAL(LABEL)
 
 DECL_SPECIAL(SINCOS)
 {
+    struct ureg_program *ureg = tx->ureg;
     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
 
     assert(!(dst.WriteMask & 0xc));
 
-    dst.WriteMask &= TGSI_WRITEMASK_XY; /* z undefined, w untouched */
-    ureg_SCS(tx->ureg, dst, src);
+    /* z undefined, w untouched */
+    ureg_COS(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X),
+             ureg_scalar(src, TGSI_SWIZZLE_X));
+    ureg_SIN(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y),
+             ureg_scalar(src, TGSI_SWIZZLE_X));
     return D3D_OK;
 }
 
@@ -2943,8 +2947,8 @@ struct sm1_op_info inst_table[] =
     _OPI(ABS, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(ABS)),
     _OPI(NRM, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(NRM)), /* NRM doesn't fit */
 
-    _OPI(SINCOS, SCS, V(2,0), V(2,1), V(2,0), V(2,1), 1, 3, SPECIAL(SINCOS)),
-    _OPI(SINCOS, SCS, V(3,0), V(3,0), V(3,0), V(3,0), 1, 1, SPECIAL(SINCOS)),
+    _OPI(SINCOS, NOP, V(2,0), V(2,1), V(2,0), V(2,1), 1, 3, SPECIAL(SINCOS)),
+    _OPI(SINCOS, NOP, V(3,0), V(3,0), V(3,0), V(3,0), 1, 1, SPECIAL(SINCOS)),
 
     /* More flow control */
     _OPI(REP,    NOP,    V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(REP)),
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index d12434101f..9f021962e4 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -6044,11 +6044,6 @@ compile_tgsi_instruction(struct st_translate *t,
                        tex_target, inst->image_format);
       break;
 
-   case TGSI_OPCODE_SCS:
-      dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY);
-      ureg_insn(ureg, inst->op, dst, num_dst, src, num_src, inst->precise);
-      break;
-
    default:
       ureg_insn(ureg,
                 inst->op,
diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c
index d2ec23ee61..916f7fa2e8 100644
--- a/src/mesa/state_tracker/st_mesa_to_tgsi.c
+++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c
@@ -491,8 +491,6 @@ translate_opcode( unsigned op )
       return TGSI_OPCODE_POW;
    case OPCODE_RCP:
       return TGSI_OPCODE_RCP;
-   case OPCODE_SCS:
-      return TGSI_OPCODE_SCS;
    case OPCODE_SGE:
       return TGSI_OPCODE_SGE;
    case OPCODE_SIN:
@@ -558,11 +556,10 @@ compile_instruction(
       return;
 
    case OPCODE_SCS:
-      dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY );
-      ureg_insn( ureg, 
-                 translate_opcode( inst->Opcode ), 
-                 dst, num_dst, 
-                 src, num_src, 0 );
+      ureg_COS(ureg, ureg_writemask(dst[0], TGSI_WRITEMASK_X),
+               ureg_scalar(src[0], TGSI_SWIZZLE_X));
+      ureg_SIN(ureg, ureg_writemask(dst[0], TGSI_WRITEMASK_Y),
+               ureg_scalar(src[0], TGSI_SWIZZLE_X));
       break;
 
    case OPCODE_XPD: