[Mesa-dev] [PATCH 7/8] gallium: remove TGSI opcode SCS

Sun Aug 20 00:49:13 UTC 2017

From: Marek Olšák <marek.olsak at amd.com>

use COS+SIN instead.
---
 src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c |  26 -----
 src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c    |   3 -
 src/gallium/auxiliary/nir/tgsi_to_nir.c            |  22 ----
 src/gallium/auxiliary/tgsi/tgsi_exec.c             |  31 -----
 src/gallium/auxiliary/tgsi/tgsi_info.c             |   2 +-
 src/gallium/auxiliary/tgsi/tgsi_lowering.c         |  75 ------------
 src/gallium/auxiliary/tgsi/tgsi_lowering.h         |   1 -
 src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h       |   1 -
 src/gallium/auxiliary/tgsi/tgsi_util.c             |   4 -
 src/gallium/docs/source/tgsi.rst                   |  13 ---
 src/gallium/drivers/etnaviv/etnaviv_compiler.c     |  24 +---
 src/gallium/drivers/i915/i915_fpc_optimize.c       |   1 -
 src/gallium/drivers/i915/i915_fpc_translate.c      |  64 -----------
 .../drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp  |  14 ---
 src/gallium/drivers/nouveau/nv30/nvfx_fragprog.c   |  17 ---
 src/gallium/drivers/nouveau/nv30/nvfx_shader.h     |   1 -
 src/gallium/drivers/r300/r300_tgsi_to_rc.c         |   1 -
 src/gallium/drivers/r600/r600_shader.c             | 127 +--------------------
 src/gallium/drivers/svga/svga_tgsi_insn.c          |  26 -----
 src/gallium/drivers/svga/svga_tgsi_vgpu10.c        |  38 ------
 src/gallium/include/pipe/p_shader_tokens.h         |   2 +-
 src/gallium/state_trackers/nine/nine_shader.c      |   7 +-
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp         |   5 -
 src/mesa/state_tracker/st_mesa_to_tgsi.c           |  11 +-
 24 files changed, 15 insertions(+), 501 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
index 52c9a86..4b8b7c8 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
@@ -679,45 +679,20 @@ sqrt_emit(
    } else {
       emit_data->output[emit_data->chan] = bld_base->base.undef;
    }
 }
 
 const struct lp_build_tgsi_action sqrt_action = {
    scalar_unary_fetch_args,	 /* fetch_args */
    sqrt_emit	 /* emit */
 };
 
-/* TGSI_OPCODE_SCS */
-static void
-scs_emit(
-   const struct lp_build_tgsi_action * action,
-   struct lp_build_tgsi_context * bld_base,
-   struct lp_build_emit_data * emit_data)
-{
-   /* dst.x */
-   emit_data->output[TGSI_CHAN_X] = lp_build_emit_llvm_unary(bld_base,
-                                           TGSI_OPCODE_COS, emit_data->args[0]);
-   /* dst.y */
-   emit_data->output[TGSI_CHAN_Y] = lp_build_emit_llvm_unary(bld_base,
-                                           TGSI_OPCODE_SIN, emit_data->args[0]);
-   /* dst.z */
-   emit_data->output[TGSI_CHAN_Z] = bld_base->base.zero;
-
-   /* dst.w */
-   emit_data->output[TGSI_CHAN_W] = bld_base->base.one;
-}
-
-const struct lp_build_tgsi_action scs_action = {
-   scalar_unary_fetch_args,	 /* fetch_args */
-   scs_emit	 /* emit */
-};
-
 /* TGSI_OPCODE_F2U */
 static void
 f2u_emit(
    const struct lp_build_tgsi_action * action,
    struct lp_build_tgsi_context * bld_base,
    struct lp_build_emit_data * emit_data)
 {
    emit_data->output[emit_data->chan] =
       LLVMBuildFPToUI(bld_base->base.gallivm->builder,
                       emit_data->args[0],
@@ -1188,21 +1163,20 @@ lp_set_default_actions(struct lp_build_tgsi_context * bld_base)
    bld_base->op_actions[TGSI_OPCODE_DP3] = dp3_action;
    bld_base->op_actions[TGSI_OPCODE_DP4] = dp4_action;
    bld_base->op_actions[TGSI_OPCODE_DST] = dst_action;
    bld_base->op_actions[TGSI_OPCODE_EXP] = exp_action;
    bld_base->op_actions[TGSI_OPCODE_LIT] = lit_action;
    bld_base->op_actions[TGSI_OPCODE_LOG] = log_action;
    bld_base->op_actions[TGSI_OPCODE_PK2H] = pk2h_action;
    bld_base->op_actions[TGSI_OPCODE_RSQ] = rsq_action;
    bld_base->op_actions[TGSI_OPCODE_SQRT] = sqrt_action;
    bld_base->op_actions[TGSI_OPCODE_POW] = pow_action;
-   bld_base->op_actions[TGSI_OPCODE_SCS] = scs_action;
    bld_base->op_actions[TGSI_OPCODE_UP2H] = up2h_action;
 
    bld_base->op_actions[TGSI_OPCODE_BREAKC].fetch_args = scalar_unary_fetch_args;
    bld_base->op_actions[TGSI_OPCODE_SWITCH].fetch_args = scalar_unary_fetch_args;
    bld_base->op_actions[TGSI_OPCODE_CASE].fetch_args = scalar_unary_fetch_args;
    bld_base->op_actions[TGSI_OPCODE_COS].fetch_args = scalar_unary_fetch_args;
    bld_base->op_actions[TGSI_OPCODE_EX2].fetch_args = scalar_unary_fetch_args;
    bld_base->op_actions[TGSI_OPCODE_IF].fetch_args = scalar_unary_fetch_args;
    bld_base->op_actions[TGSI_OPCODE_UIF].fetch_args = scalar_unary_fetch_args;
    bld_base->op_actions[TGSI_OPCODE_KILL_IF].fetch_args = kil_fetch_args;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
index 567ed68..b76c065 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
@@ -712,23 +712,20 @@ lp_emit_instruction_aos(
       break;
 
    case TGSI_OPCODE_CMP:
       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
       src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
       tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LESS, src0, bld->bld_base.base.zero);
       dst0 = lp_build_select(&bld->bld_base.base, tmp0, src1, src2);
       break;
 
-   case TGSI_OPCODE_SCS:
-      return FALSE;
-
    case TGSI_OPCODE_TXB:
       dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS);
       break;
 
    case TGSI_OPCODE_DIV:
       assert(0);
       return FALSE;
       break;
 
    case TGSI_OPCODE_DP2:
diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c
index 55deb29..62f8feb 100644
--- a/src/gallium/auxiliary/nir/tgsi_to_nir.c
+++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c
@@ -949,37 +949,20 @@ ttn_lit(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
                            nir_bcsel(b,
                                      nir_fge(b,
                                              nir_imm_float(b, 0.0),
                                              ttn_channel(b, src[0], X)),
                                      nir_imm_float(b, 0.0),
                                      pow),
                            TGSI_WRITEMASK_Z);
    }
 }
 
-/* SCS - Sine Cosine
- *   dst.x = \cos{src.x}
- *   dst.y = \sin{src.x}
- *   dst.z = 0.0
- *   dst.w = 1.0
- */
-static void
-ttn_scs(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
-{
-   ttn_move_dest_masked(b, dest, nir_fcos(b, ttn_channel(b, src[0], X)),
-                        TGSI_WRITEMASK_X);
-   ttn_move_dest_masked(b, dest, nir_fsin(b, ttn_channel(b, src[0], X)),
-                        TGSI_WRITEMASK_Y);
-   ttn_move_dest_masked(b, dest, nir_imm_float(b, 0.0), TGSI_WRITEMASK_Z);
-   ttn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), TGSI_WRITEMASK_W);
-}
-
 static void
 ttn_sle(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
 {
    ttn_move_dest(b, dest, nir_sge(b, src[1], src[0]));
 }
 
 static void
 ttn_sgt(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
 {
    ttn_move_dest(b, dest, nir_slt(b, src[1], src[0]));
@@ -1532,21 +1515,20 @@ static const nir_op op_trans[TGSI_OPCODE_LAST] = {
    [TGSI_OPCODE_UP4B] = 0, /* XXX */
    [TGSI_OPCODE_UP4UB] = 0, /* XXX */
    [TGSI_OPCODE_ARR] = 0,
 
    /* No function calls, yet. */
    [TGSI_OPCODE_CAL] = 0, /* XXX */
    [TGSI_OPCODE_RET] = 0, /* XXX */
 
    [TGSI_OPCODE_SSG] = nir_op_fsign,
    [TGSI_OPCODE_CMP] = 0,
-   [TGSI_OPCODE_SCS] = 0,
    [TGSI_OPCODE_TXB] = 0,
    [TGSI_OPCODE_DIV] = nir_op_fdiv,
    [TGSI_OPCODE_DP2] = 0,
    [TGSI_OPCODE_TXL] = 0,
 
    [TGSI_OPCODE_BRK] = 0,
    [TGSI_OPCODE_IF] = 0,
    [TGSI_OPCODE_UIF] = 0,
    [TGSI_OPCODE_ELSE] = 0,
    [TGSI_OPCODE_ENDIF] = 0,
@@ -1752,24 +1734,20 @@ ttn_emit_instruction(struct ttn_compile *c)
       break;
 
    case TGSI_OPCODE_CMP:
       ttn_cmp(b, op_trans[tgsi_op], dest, src);
       break;
 
    case TGSI_OPCODE_UCMP:
       ttn_ucmp(b, op_trans[tgsi_op], dest, src);
       break;
 
-   case TGSI_OPCODE_SCS:
-      ttn_scs(b, op_trans[tgsi_op], dest, src);
-      break;
-
    case TGSI_OPCODE_SGT:
       ttn_sgt(b, op_trans[tgsi_op], dest, src);
       break;
 
    case TGSI_OPCODE_SLE:
       ttn_sle(b, op_trans[tgsi_op], dest, src);
       break;
 
    case TGSI_OPCODE_KILL_IF:
       ttn_kill_if(b, op_trans[tgsi_op], dest, src);
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index bce158b..b7ec309 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -3279,47 +3279,20 @@ exec_ucmp(struct tgsi_exec_machine *mach,
    }
    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
          store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan,
                     TGSI_EXEC_DATA_FLOAT);
       }
    }
 }
 
 static void
-exec_scs(struct tgsi_exec_machine *mach,
-         const struct tgsi_full_instruction *inst)
-{
-   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) {
-      union tgsi_exec_channel arg;
-      union tgsi_exec_channel result;
-
-      fetch_source(mach, &arg, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
-
-      if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
-         micro_cos(&result, &arg);
-         store_dest(mach, &result, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
-      }
-      if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
-         micro_sin(&result, &arg);
-         store_dest(mach, &result, &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
-      }
-   }
-   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
-      store_dest(mach, &ZeroVec, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
-   }
-   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
-      store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
-   }
-}
-
-static void
 exec_dst(struct tgsi_exec_machine *mach,
          const struct tgsi_full_instruction *inst)
 {
    union tgsi_exec_channel r[2];
    union tgsi_exec_channel d[4];
 
    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
       fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
       fetch_source(mach, &r[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
       micro_mul(&d[TGSI_CHAN_Y], &r[0], &r[1]);
@@ -5327,24 +5300,20 @@ exec_instruction(
       break;
 
    case TGSI_OPCODE_SSG:
       exec_vector_unary(mach, inst, micro_sgn, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
       break;
 
    case TGSI_OPCODE_CMP:
       exec_vector_trinary(mach, inst, micro_cmp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
       break;
 
-   case TGSI_OPCODE_SCS:
-      exec_scs(mach, inst);
-      break;
-
    case TGSI_OPCODE_DIV:
       exec_vector_binary(mach, inst, micro_div, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
       break;
 
    case TGSI_OPCODE_DP2:
       exec_dp2(mach, inst);
       break;
 
    case TGSI_OPCODE_IF:
       /* push CondMask */
diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c
index 17f56fd..6d36e40 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_info.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_info.c
@@ -97,21 +97,21 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] =
    { 1, 1, 0, 0, 0, 0, 0, CHAN, "UP4B", TGSI_OPCODE_UP4B },
    { 1, 1, 0, 0, 0, 0, 0, CHAN, "UP4UB", TGSI_OPCODE_UP4UB },
    { 1, 1, 0, 0, 0, 0, 1, COMP, "U642F", TGSI_OPCODE_U642F },
    { 1, 1, 0, 0, 0, 0, 1, COMP, "I642F", TGSI_OPCODE_I642F },
    { 1, 1, 0, 0, 0, 0, 0, COMP, "ARR", TGSI_OPCODE_ARR },
    { 1, 1, 0, 0, 0, 0, 1, COMP, "I642D", TGSI_OPCODE_I642D },
    { 0, 0, 0, 0, 1, 0, 0, NONE, "CAL", TGSI_OPCODE_CAL },
    { 0, 0, 0, 0, 0, 0, 0, NONE, "RET", TGSI_OPCODE_RET },
    { 1, 1, 0, 0, 0, 0, 0, COMP, "SSG", TGSI_OPCODE_SSG },
    { 1, 3, 0, 0, 0, 0, 0, COMP, "CMP", TGSI_OPCODE_CMP },
-   { 1, 1, 0, 0, 0, 0, 0, CHAN, "SCS", TGSI_OPCODE_SCS },
+   { 1, 1, 0, 0, 0, 0, 0, CHAN, "", 67 }, /* removed */
    { 1, 2, 1, 0, 0, 0, 0, OTHR, "TXB", TGSI_OPCODE_TXB },
    { 1, 1, 0, 0, 0, 0, 0, OTHR, "FBFETCH", TGSI_OPCODE_FBFETCH },
    { 1, 2, 0, 0, 0, 0, 0, COMP, "DIV", TGSI_OPCODE_DIV },
    { 1, 2, 0, 0, 0, 0, 0, REPL, "DP2", TGSI_OPCODE_DP2 },
    { 1, 2, 1, 0, 0, 0, 0, OTHR, "TXL", TGSI_OPCODE_TXL },
    { 0, 0, 0, 0, 0, 0, 0, NONE, "BRK", TGSI_OPCODE_BRK },
    { 0, 1, 0, 0, 1, 0, 1, NONE, "IF", TGSI_OPCODE_IF },
    { 0, 1, 0, 0, 1, 0, 1, NONE, "UIF", TGSI_OPCODE_UIF },
    { 1, 2, 0, 0, 0, 0, 0, COMP, "READ_INVOC", TGSI_OPCODE_READ_INVOC },
    { 0, 0, 0, 0, 1, 1, 1, NONE, "ELSE", TGSI_OPCODE_ELSE },
diff --git a/src/gallium/auxiliary/tgsi/tgsi_lowering.c b/src/gallium/auxiliary/tgsi/tgsi_lowering.c
index fa9d579..bfc3a6b 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_lowering.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_lowering.c
@@ -251,85 +251,20 @@ transform_dst(struct tgsi_transform_context *tctx,
       new_inst = tgsi_default_full_instruction();
       new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
       new_inst.Instruction.NumDstRegs = 1;
       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_X);
       new_inst.Instruction.NumSrcRegs = 1;
       reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(Y, _, _, _));
       tctx->emit_instruction(tctx, &new_inst);
    }
 }
 
-/* SCS - Sine Cosine
- *   dst.x = \cos{src.x}
- *   dst.y = \sin{src.x}
- *   dst.z = 0.0
- *   dst.w = 1.0
- *
- * ; needs: 1 tmp, imm{0.0, 1.0}
- * if (dst.x aliases src.x) {
- *   MOV tmpA.x, src.x
- *   src = tmpA
- * }
- * COS dst.x, src.x
- * SIN dst.y, src.x
- * MOV dst.zw, imm{0.0, 1.0}
- */
-#define SCS_GROW (NINST(1) + NINST(1) + NINST(1) + NINST(1) - OINST(1))
-#define SCS_TMP  1
-static void
-transform_scs(struct tgsi_transform_context *tctx,
-              struct tgsi_full_instruction *inst)
-{
-   struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
-   struct tgsi_full_dst_register *dst = &inst->Dst[0];
-   struct tgsi_full_src_register *src = &inst->Src[0];
-   struct tgsi_full_instruction new_inst;
-
-   if (aliases(dst, TGSI_WRITEMASK_X, src, TGSI_WRITEMASK_X)) {
-      create_mov(tctx, &ctx->tmp[A].dst, src, TGSI_WRITEMASK_X, 0);
-      src = &ctx->tmp[A].src;
-   }
-
-   if (dst->Register.WriteMask & TGSI_WRITEMASK_X) {
-      /* COS dst.x, src.x */
-      new_inst = tgsi_default_full_instruction();
-      new_inst.Instruction.Opcode = TGSI_OPCODE_COS;
-      new_inst.Instruction.NumDstRegs = 1;
-      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_X);
-      new_inst.Instruction.NumSrcRegs = 1;
-      reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
-      tctx->emit_instruction(tctx, &new_inst);
-   }
-
-   if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
-      /* SIN dst.y, src.x */
-      new_inst = tgsi_default_full_instruction();
-      new_inst.Instruction.Opcode = TGSI_OPCODE_SIN;
-      new_inst.Instruction.NumDstRegs = 1;
-      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
-      new_inst.Instruction.NumSrcRegs = 1;
-      reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
-      tctx->emit_instruction(tctx, &new_inst);
-   }
-
-   if (dst->Register.WriteMask & TGSI_WRITEMASK_ZW) {
-      /* MOV dst.zw, imm{0.0, 1.0} */
-      new_inst = tgsi_default_full_instruction();
-      new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
-      new_inst.Instruction.NumDstRegs = 1;
-      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_ZW);
-      new_inst.Instruction.NumSrcRegs = 1;
-      reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_, _, X, Y));
-      tctx->emit_instruction(tctx, &new_inst);
-   }
-}
-
 /* LRP - Linear Interpolate
  *  dst.x = src0.x \times src1.x + (1.0 - src0.x) \times src2.x
  *  dst.y = src0.y \times src1.y + (1.0 - src0.y) \times src2.y
  *  dst.z = src0.z \times src1.z + (1.0 - src0.z) \times src2.z
  *  dst.w = src0.w \times src1.w + (1.0 - src0.w) \times src2.w
  *
  * This becomes: src0 \times src1 + src2 - src0 \times src2, which
  * can then become: src0 \times src1 - (src0 \times src2 - src2)
  *
  * ; needs: 1 tmp
@@ -1400,25 +1335,20 @@ transform_instr(struct tgsi_transform_context *tctx,
     */
    if (ctx->two_side_colors)
       rename_color_inputs(ctx, inst);
 
    switch (inst->Instruction.Opcode) {
    case TGSI_OPCODE_DST:
       if (!ctx->config->lower_DST)
          goto skip;
       transform_dst(tctx, inst);
       break;
-   case TGSI_OPCODE_SCS:
-      if (!ctx->config->lower_SCS)
-         goto skip;
-      transform_scs(tctx, inst);
-      break;
    case TGSI_OPCODE_LRP:
       if (!ctx->config->lower_LRP)
          goto skip;
       transform_lrp(tctx, inst);
       break;
    case TGSI_OPCODE_FRC:
       if (!ctx->config->lower_FRC)
          goto skip;
       transform_frc(tctx, inst);
       break;
@@ -1528,21 +1458,20 @@ tgsi_transform_lowering(const struct tgsi_lowering_config *config,
          if (info->input_semantic_name[i] == TGSI_SEMANTIC_FACE)
             ctx.face_idx = i;
       }
    }
 
    ctx.saturate = config->saturate_r | config->saturate_s | config->saturate_t;
 
 #define OPCS(x) ((config->lower_ ## x) ? info->opcode_count[TGSI_OPCODE_ ## x] : 0)
    /* if there are no instructions to lower, then we are done: */
    if (!(OPCS(DST) ||
-         OPCS(SCS) ||
          OPCS(LRP) ||
          OPCS(FRC) ||
          OPCS(POW) ||
          OPCS(LIT) ||
          OPCS(EXP) ||
          OPCS(LOG) ||
          OPCS(DP4) ||
          OPCS(DP3) ||
          OPCS(DP2) ||
          OPCS(FLR) ||
@@ -1557,24 +1486,20 @@ tgsi_transform_lowering(const struct tgsi_lowering_config *config,
    _debug_printf("BEFORE:");
    tgsi_dump(tokens, 0);
 #endif
 
    numtmp = 0;
    newlen = tgsi_num_tokens(tokens);
    if (OPCS(DST)) {
       newlen += DST_GROW * OPCS(DST);
       numtmp = MAX2(numtmp, DST_TMP);
    }
-   if (OPCS(SCS)) {
-      newlen += SCS_GROW * OPCS(SCS);
-      numtmp = MAX2(numtmp, SCS_TMP);
-   }
    if (OPCS(LRP)) {
       newlen += LRP_GROW * OPCS(LRP);
       numtmp = MAX2(numtmp, LRP_TMP);
    }
    if (OPCS(FRC)) {
       newlen += FRC_GROW * OPCS(FRC);
       numtmp = MAX2(numtmp, FRC_TMP);
    }
    if (OPCS(POW)) {
       newlen += POW_GROW * OPCS(POW);
diff --git a/src/gallium/auxiliary/tgsi/tgsi_lowering.h b/src/gallium/auxiliary/tgsi/tgsi_lowering.h
index 709a63a..fd4c38f 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_lowering.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_lowering.h
@@ -48,21 +48,20 @@ struct tgsi_lowering_config
     *     same was as linking other outs/ins).
     */
    unsigned color_two_side:1;
 
    /* TODO support for alpha_to_one as well?? */
 
    /* Individual OPC lowerings, if lower_<opc> is TRUE then
     * enable lowering of TGSI_OPCODE_<opc>
     */
    unsigned lower_DST:1;
-   unsigned lower_SCS:1;
    unsigned lower_LRP:1;
    unsigned lower_FRC:1;
    unsigned lower_POW:1;
    unsigned lower_LIT:1;
    unsigned lower_EXP:1;
    unsigned lower_LOG:1;
    unsigned lower_DP4:1;
    unsigned lower_DP3:1;
    unsigned lower_DP2:1;
    unsigned lower_FLR:1;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
index 111edf3..895e0b0 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
@@ -83,21 +83,20 @@ OP14_TEX(TXD)
 OP12_TEX(TXP)
 OP11(UP2H)
 OP11(UP2US)
 OP11(UP4B)
 OP11(UP4UB)
 OP11(ARR)
 OP00_LBL(CAL)
 OP00(RET)
 OP11(SSG)
 OP13(CMP)
-OP11(SCS)
 OP12_TEX(TXB)
 OP12(DIV)
 OP12(DP2)
 OP12_TEX(TXL)
 OP00(BRK)
 OP01_LBL(IF)
 OP01_LBL(UIF)
 OP00_LBL(ELSE)
 OP00(ENDIF)
 OP11(CEIL)
diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.c b/src/gallium/auxiliary/tgsi/tgsi_util.c
index 6756bca..327e42f 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_util.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_util.c
@@ -246,24 +246,20 @@ tgsi_util_get_inst_usage_mask(const struct tgsi_full_instruction *inst,
    case TGSI_OPCODE_DDY_FINE:
       /* Channel-wise operations */
       read_mask = write_mask;
       break;
 
    case TGSI_OPCODE_EX2:
    case TGSI_OPCODE_LG2:
       read_mask = TGSI_WRITEMASK_X;
       break;
 
-   case TGSI_OPCODE_SCS:
-      read_mask = write_mask & TGSI_WRITEMASK_XY ? TGSI_WRITEMASK_X : 0;
-      break;
-
    case TGSI_OPCODE_EXP:
    case TGSI_OPCODE_LOG:
       read_mask = write_mask & TGSI_WRITEMASK_XYZ ? TGSI_WRITEMASK_X : 0;
       break;
 
    case TGSI_OPCODE_DP2:
       read_mask = TGSI_WRITEMASK_XY;
       break;
 
    case TGSI_OPCODE_DP3:
diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst
index 3441907..8441316 100644
--- a/src/gallium/docs/source/tgsi.rst
+++ b/src/gallium/docs/source/tgsi.rst
@@ -644,33 +644,20 @@ This instruction replicates its result.
   if (src.x < 0 || src.y < 0 || src.z < 0 || src.w < 0)
     discard
   endif
 
 
 .. opcode:: KILL - Discard
 
   Unconditional discard.  Allowed in fragment shaders only.
 
 
-.. opcode:: SCS - Sine Cosine
-
-.. math::
-
-  dst.x = \cos{src.x}
-
-  dst.y = \sin{src.x}
-
-  dst.z = 0
-
-  dst.w = 1
-
-
 .. opcode:: TXB - Texture Lookup With Bias
 
   for cube map array textures and shadow cube maps, the bias value
   cannot be passed in src0.w, and TXB2 must be used instead.
 
   if the target is a shadow texture, the reference value is always
   in src.z (this prevents shadow 3d and shadow 2d arrays from
   using this instruction, but this is not needed).
 
 .. math::
diff --git a/src/gallium/drivers/etnaviv/etnaviv_compiler.c b/src/gallium/drivers/etnaviv/etnaviv_compiler.c
index 4f09f71..c4ca80f 100644
--- a/src/gallium/drivers/etnaviv/etnaviv_compiler.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_compiler.c
@@ -1467,23 +1467,20 @@ static void
 trans_trig(const struct instr_translater *t, struct etna_compile *c,
            const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
 {
    if (c->specs->has_new_transcendentals) { /* Alternative SIN/COS */
       /* On newer chips alternative SIN/COS instructions are implemented,
        * which:
        * - Need their input scaled by 1/pi instead of 2/pi
        * - Output an x and y component, which need to be multiplied to
        *   get the result
        */
-      /* TGSI lowering should deal with SCS */
-      assert(inst->Instruction.Opcode != TGSI_OPCODE_SCS);
-
       struct etna_native_reg temp = etna_compile_get_inner_temp(c); /* only using .xyz */
       emit_inst(c, &(struct etna_inst) {
          .opcode = INST_OPCODE_MUL,
          .sat = 0,
          .dst = etna_native_to_dst(temp, INST_COMPS_Z),
          .src[0] = src[0], /* any swizzling happens here */
          .src[1] = alloc_imm_f32(c, 1.0f / M_PI),
       });
       emit_inst(c, &(struct etna_inst) {
          .opcode = inst->Instruction.Opcode == TGSI_OPCODE_COS
@@ -1496,23 +1493,20 @@ trans_trig(const struct instr_translater *t, struct etna_compile *c,
       });
       emit_inst(c, &(struct etna_inst) {
          .opcode = INST_OPCODE_MUL,
          .sat = inst->Instruction.Saturate,
          .dst = convert_dst(c, &inst->Dst[0]),
          .src[0] = etna_native_to_src(temp, SWIZZLE(X, X, X, X)),
          .src[1] = etna_native_to_src(temp, SWIZZLE(Y, Y, Y, Y)),
       });
 
    } else if (c->specs->has_sin_cos_sqrt) {
-      /* TGSI lowering should deal with SCS */
-      assert(inst->Instruction.Opcode != TGSI_OPCODE_SCS);
-
       struct etna_native_reg temp = etna_compile_get_inner_temp(c);
       /* add divide by PI/2, using a temp register. GC2000
        * fails with src==dst for the trig instruction. */
       emit_inst(c, &(struct etna_inst) {
          .opcode = INST_OPCODE_MUL,
          .sat = 0,
          .dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y |
                                          INST_COMPS_Z | INST_COMPS_W),
          .src[0] = src[0], /* any swizzling happens here */
          .src[1] = alloc_imm_f32(c, 2.0f / M_PI),
@@ -1533,22 +1527,20 @@ trans_trig(const struct instr_translater *t, struct etna_compile *c,
        *  FRC t.x_z_, void, void, t.xwzw
        *  MAD t.x_z_, t.xwzw, 2, -1
        *  MUL t._y__, t.wzww, |t.wzww|, void  (for sin/scs)
        *  DP3 t.x_z_, t.zyww, C, void         (for sin)
        *  DP3 t.__z_, t.zyww, C, void         (for scs)
        *  MUL t._y__, t.wxww, |t.wxww|, void  (for cos/scs)
        *  DP3 t.x_z_, t.xyww, C, void         (for cos)
        *  DP3 t.x___, t.xyww, C, void         (for scs)
        *  MAD t._y_w, t,xxzz, |t.xxzz|, -t.xxzz
        *  MAD dst, t.ywyw, .2225, t.xzxz
-       *
-       * TODO: we don't set dst.zw correctly for SCS.
        */
       struct etna_inst *p, ins[9] = { };
       struct etna_native_reg t0 = etna_compile_get_inner_temp(c);
       struct etna_inst_src t0s = etna_native_to_src(t0, INST_SWIZ_IDENTITY);
       struct etna_inst_src sincos[3], in = src[0];
       sincos[0] = etna_imm_vec4f(c, sincos_const[0]);
       sincos[1] = etna_imm_vec4f(c, sincos_const[1]);
 
       /* A uniform source will cause the inner temp limit to
        * be exceeded.  Explicitly deal with that scenario.
@@ -1590,33 +1582,21 @@ trans_trig(const struct instr_translater *t, struct etna_compile *c,
       ins[3].opcode = INST_OPCODE_MUL;
       ins[3].dst = etna_native_to_dst(t0, INST_COMPS_Y);
       ins[3].src[0] = swizzle(t0s, mul_swiz);
       ins[3].src[1] = absolute(ins[3].src[0]);
 
       ins[4].opcode = INST_OPCODE_DP3;
       ins[4].dst = etna_native_to_dst(t0, INST_COMPS_X | INST_COMPS_Z);
       ins[4].src[0] = swizzle(t0s, dp3_swiz);
       ins[4].src[1] = swizzle(sincos[0], SWIZZLE(Z, W, W, W));
 
-      if (inst->Instruction.Opcode == TGSI_OPCODE_SCS) {
-         ins[5] = ins[3];
-         ins[6] = ins[4];
-         ins[4].dst.comps = INST_COMPS_X;
-         ins[6].dst.comps = INST_COMPS_Z;
-         ins[5].src[0] = swizzle(t0s, SWIZZLE(W, Z, W, W));
-         ins[6].src[0] = swizzle(t0s, SWIZZLE(Z, Y, W, W));
-         ins[5].src[1] = absolute(ins[5].src[0]);
-         p = &ins[7];
-      } else {
-         p = &ins[5];
-      }
-
+      p = &ins[5];
       p->opcode = INST_OPCODE_MAD;
       p->dst = etna_native_to_dst(t0, INST_COMPS_Y | INST_COMPS_W);
       p->src[0] = swizzle(t0s, SWIZZLE(X, X, Z, Z));
       p->src[1] = absolute(p->src[0]);
       p->src[2] = negate(p->src[0]);
 
       p++;
       p->opcode = INST_OPCODE_MAD;
       p->sat = inst->Instruction.Saturate;
       p->dst = convert_dst(c, &inst->Dst[0]),
@@ -1802,21 +1782,20 @@ static const struct instr_translater translaters[TGSI_OPCODE_LAST] = {
    INSTR(MIN, trans_min_max, .opc = INST_OPCODE_SELECT, .cond = INST_CONDITION_GT),
    INSTR(MAX, trans_min_max, .opc = INST_OPCODE_SELECT, .cond = INST_CONDITION_LT),
 
    INSTR(ARL, trans_arl),
    INSTR(LRP, trans_lrp),
    INSTR(LIT, trans_lit),
    INSTR(SSG, trans_ssg),
 
    INSTR(SIN, trans_trig),
    INSTR(COS, trans_trig),
-   INSTR(SCS, trans_trig),
 
    INSTR(SLT, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_LT),
    INSTR(SGE, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_GE),
    INSTR(SEQ, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_EQ),
    INSTR(SGT, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_GT),
    INSTR(SLE, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_LE),
    INSTR(SNE, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_NE),
 
    INSTR(TEX, trans_sampler),
    INSTR(TXB, trans_sampler),
@@ -2302,21 +2281,20 @@ etna_compile_shader(struct etna_shader_variant *v)
     */
    bool ret;
    struct etna_compile *c;
 
    if (unlikely(!v))
       return false;
 
    const struct etna_specs *specs = v->shader->specs;
 
    struct tgsi_lowering_config lconfig = {
-      .lower_SCS = specs->has_sin_cos_sqrt,
       .lower_FLR = !specs->has_sign_floor_ceil,
       .lower_CEIL = !specs->has_sign_floor_ceil,
       .lower_POW = true,
       .lower_EXP = true,
       .lower_LOG = true,
       .lower_DP2 = true,
       .lower_TRUNC = true,
    };
 
    c = CALLOC_STRUCT(etna_compile);
diff --git a/src/gallium/drivers/i915/i915_fpc_optimize.c b/src/gallium/drivers/i915/i915_fpc_optimize.c
index fb97e9e..76ef20d 100644
--- a/src/gallium/drivers/i915/i915_fpc_optimize.c
+++ b/src/gallium/drivers/i915/i915_fpc_optimize.c
@@ -98,21 +98,20 @@ static const struct {
    [ TGSI_OPCODE_MAX     ] = { false,  false,                  0,  1,  2 },
    [ TGSI_OPCODE_MAD     ] = { false,  false,                  0,  1,  3 },
    [ TGSI_OPCODE_MIN     ] = { false,  false,                  0,  1,  2 },
    [ TGSI_OPCODE_MOV     ] = { false,  false,                  0,  1,  1 },
    [ TGSI_OPCODE_MUL     ] = { false,   true,   TGSI_SWIZZLE_ONE,  1,  2 },
    [ TGSI_OPCODE_NOP     ] = { false,  false,                  0,  0,  0 },
    [ TGSI_OPCODE_POW     ] = { false,  false,                  0,  1,  2 },
    [ TGSI_OPCODE_RCP     ] = { false,  false,                  0,  1,  1 },
    [ TGSI_OPCODE_RET     ] = { false,  false,                  0,  0,  0 },
    [ TGSI_OPCODE_RSQ     ] = { false,  false,                  0,  1,  1 },
-   [ TGSI_OPCODE_SCS     ] = { false,  false,                  0,  1,  1 },
    [ TGSI_OPCODE_SEQ     ] = { false,  false,                  0,  1,  2 },
    [ TGSI_OPCODE_SGE     ] = { false,  false,                  0,  1,  2 },
    [ TGSI_OPCODE_SGT     ] = { false,  false,                  0,  1,  2 },
    [ TGSI_OPCODE_SIN     ] = { false,  false,                  0,  1,  1 },
    [ TGSI_OPCODE_SLE     ] = { false,  false,                  0,  1,  2 },
    [ TGSI_OPCODE_SLT     ] = { false,  false,                  0,  1,  2 },
    [ TGSI_OPCODE_SNE     ] = { false,  false,                  0,  1,  2 },
    [ TGSI_OPCODE_SSG     ] = { false,  false,                  0,  1,  1 },
    [ TGSI_OPCODE_TEX     ] = {  true,  false,                  0,  1,  2 },
    [ TGSI_OPCODE_TRUNC   ] = { false,  false,                  0,  1,  1 },
diff --git a/src/gallium/drivers/i915/i915_fpc_translate.c b/src/gallium/drivers/i915/i915_fpc_translate.c
index 2faab33..a9601e8 100644
--- a/src/gallium/drivers/i915/i915_fpc_translate.c
+++ b/src/gallium/drivers/i915/i915_fpc_translate.c
@@ -796,84 +796,20 @@ i915_translate_instruction(struct i915_fp_compile *p,
    case TGSI_OPCODE_RSQ:
       src0 = src_vector(p, &inst->Src[0], fs);
 
       i915_emit_arith(p,
                       A0_RSQ,
                       get_result_vector(p, &inst->Dst[0]),
                       get_result_flags(inst), 0,
                       swizzle(src0, X, X, X, X), 0, 0);
       break;
 
-   case TGSI_OPCODE_SCS:
-      src0 = src_vector(p, &inst->Src[0], fs);
-      tmp = i915_get_utemp(p);
-
-      /* 
-       * t0.xy = MUL x.xx11, x.x1111  ; x^2, x, 1, 1
-       * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x
-       * t1 = MUL t0.xyyw t0.yz11    ; x^7 x^5 x^3 x
-       * scs.x = DP4 t1, scs_sin_constants
-       * t1 = MUL t0.xxz1 t0.z111    ; x^6 x^4 x^2 1
-       * scs.y = DP4 t1, scs_cos_constants
-       */
-      i915_emit_arith(p,
-                      A0_MUL,
-                      tmp, A0_DEST_CHANNEL_XY, 0,
-                      swizzle(src0, X, X, ONE, ONE),
-                      swizzle(src0, X, ONE, ONE, ONE), 0);
-
-      i915_emit_arith(p,
-                      A0_MUL,
-                      tmp, A0_DEST_CHANNEL_ALL, 0,
-                      swizzle(tmp, X, Y, X, Y),
-                      swizzle(tmp, X, X, ONE, ONE), 0);
-
-      writemask = inst->Dst[0].Register.WriteMask;
-
-      if (writemask & TGSI_WRITEMASK_Y) {
-         uint tmp1;
-
-         if (writemask & TGSI_WRITEMASK_X)
-            tmp1 = i915_get_utemp(p);
-         else
-            tmp1 = tmp;
-
-         i915_emit_arith(p,
-                         A0_MUL,
-                         tmp1, A0_DEST_CHANNEL_ALL, 0,
-                         swizzle(tmp, X, Y, Y, W),
-                         swizzle(tmp, X, Z, ONE, ONE), 0);
-
-         i915_emit_arith(p,
-                         A0_DP4,
-                         get_result_vector(p, &inst->Dst[0]),
-                         A0_DEST_CHANNEL_Y, 0,
-                         swizzle(tmp1, W, Z, Y, X),
-                         i915_emit_const4fv(p, scs_sin_constants), 0);
-      }
-
-      if (writemask & TGSI_WRITEMASK_X) {
-         i915_emit_arith(p,
-                         A0_MUL,
-                         tmp, A0_DEST_CHANNEL_XYZ, 0,
-                         swizzle(tmp, X, X, Z, ONE),
-                         swizzle(tmp, Z, ONE, ONE, ONE), 0);
-
-         i915_emit_arith(p,
-                         A0_DP4,
-                         get_result_vector(p, &inst->Dst[0]),
-                         A0_DEST_CHANNEL_X, 0,
-                         swizzle(tmp, ONE, Z, Y, X),
-                         i915_emit_const4fv(p, scs_cos_constants), 0);
-      }
-      break;
-
    case TGSI_OPCODE_SEQ:
       /* if we're both >= and <= then we're == */
       src0 = src_vector(p, &inst->Src[0], fs);
       src1 = src_vector(p, &inst->Src[1], fs);
       tmp = i915_get_utemp(p);
 
       i915_emit_arith(p,
                       A0_SGE,
                       tmp, A0_DEST_CHANNEL_ALL, 0,
                       src0,
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
index a862f98..2780605 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
@@ -281,21 +281,20 @@ unsigned int Instruction::srcMask(unsigned int s) const
       return 0xf;
    case TGSI_OPCODE_DST:
       return mask & (s ? 0xa : 0x6);
    case TGSI_OPCODE_EX2:
    case TGSI_OPCODE_EXP:
    case TGSI_OPCODE_LG2:
    case TGSI_OPCODE_LOG:
    case TGSI_OPCODE_POW:
    case TGSI_OPCODE_RCP:
    case TGSI_OPCODE_RSQ:
-   case TGSI_OPCODE_SCS:
       return 0x1;
    case TGSI_OPCODE_IF:
    case TGSI_OPCODE_UIF:
       return 0x1;
    case TGSI_OPCODE_LIT:
       return 0xb;
    case TGSI_OPCODE_TEX2:
    case TGSI_OPCODE_TXB2:
    case TGSI_OPCODE_TXL2:
       return (s == 0) ? 0xf : 0x3;
@@ -3251,33 +3250,20 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
          mkOp1(op, TYPE_F32, val0, val0);
          for (c = 0; c < 3; ++c)
             if (dst0[c])
                mkMov(dst0[c], val0);
       }
       if (dst0[3]) {
          mkOp1(OP_PRESIN, TYPE_F32, val0, fetchSrc(0, 3));
          mkOp1(op, TYPE_F32, dst0[3], val0);
       }
       break;
-   case TGSI_OPCODE_SCS:
-      if (mask & 3) {
-         val0 = mkOp1v(OP_PRESIN, TYPE_F32, getSSA(), fetchSrc(0, 0));
-         if (dst0[0])
-            mkOp1(OP_COS, TYPE_F32, dst0[0], val0);
-         if (dst0[1])
-            mkOp1(OP_SIN, TYPE_F32, dst0[1], val0);
-      }
-      if (dst0[2])
-         loadImm(dst0[2], 0.0f);
-      if (dst0[3])
-         loadImm(dst0[3], 1.0f);
-      break;
    case TGSI_OPCODE_EXP:
       src0 = fetchSrc(0, 0);
       val0 = mkOp1v(OP_FLOOR, TYPE_F32, getSSA(), src0);
       if (dst0[1])
          mkOp2(OP_SUB, TYPE_F32, dst0[1], src0, val0);
       if (dst0[0])
          mkOp1(OP_EX2, TYPE_F32, dst0[0], val0);
       if (dst0[2])
          mkOp1(OP_EX2, TYPE_F32, dst0[2], src0);
       if (dst0[3])
diff --git a/src/gallium/drivers/nouveau/nv30/nvfx_fragprog.c b/src/gallium/drivers/nouveau/nv30/nvfx_fragprog.c
index 7d006fb..86e3599 100644
--- a/src/gallium/drivers/nouveau/nv30/nvfx_fragprog.c
+++ b/src/gallium/drivers/nouveau/nv30/nvfx_fragprog.c
@@ -682,37 +682,20 @@ nvfx_fragprog_parse_instruction(struct nvfx_fpc *fpc,
       if(!fpc->is_nv4x)
          nvfx_fp_emit(fpc, arith(sat, RSQ_NV30, dst, mask, abs(swz(src[0], X, X, X, X)), none, none));
       else {
          tmp = nvfx_src(temp(fpc));
          insn = arith(0, LG2, tmp.reg, NVFX_FP_MASK_X, abs(swz(src[0], X, X, X, X)), none, none);
          insn.scale = NVFX_FP_OP_DST_SCALE_INV_2X;
          nvfx_fp_emit(fpc, insn);
          nvfx_fp_emit(fpc, arith(sat, EX2, dst, mask, neg(swz(tmp, X, X, X, X)), none, none));
       }
       break;
-   case TGSI_OPCODE_SCS:
-      /* avoid overwriting the source */
-      if(src[0].swz[NVFX_SWZ_X] != NVFX_SWZ_X)
-      {
-         if (mask & NVFX_FP_MASK_X)
-            nvfx_fp_emit(fpc, arith(sat, COS, dst, NVFX_FP_MASK_X, swz(src[0], X, X, X, X), none, none));
-         if (mask & NVFX_FP_MASK_Y)
-            nvfx_fp_emit(fpc, arith(sat, SIN, dst, NVFX_FP_MASK_Y, swz(src[0], X, X, X, X), none, none));
-      }
-      else
-      {
-         if (mask & NVFX_FP_MASK_Y)
-            nvfx_fp_emit(fpc, arith(sat, SIN, dst, NVFX_FP_MASK_Y, swz(src[0], X, X, X, X), none, none));
-         if (mask & NVFX_FP_MASK_X)
-            nvfx_fp_emit(fpc, arith(sat, COS, dst, NVFX_FP_MASK_X, swz(src[0], X, X, X, X), none, none));
-      }
-      break;
    case TGSI_OPCODE_SEQ:
       nvfx_fp_emit(fpc, arith(sat, SEQ, dst, mask, src[0], src[1], none));
       break;
    case TGSI_OPCODE_SGE:
       nvfx_fp_emit(fpc, arith(sat, SGE, dst, mask, src[0], src[1], none));
       break;
    case TGSI_OPCODE_SGT:
       nvfx_fp_emit(fpc, arith(sat, SGT, dst, mask, src[0], src[1], none));
       break;
    case TGSI_OPCODE_SIN:
diff --git a/src/gallium/drivers/nouveau/nv30/nvfx_shader.h b/src/gallium/drivers/nouveau/nv30/nvfx_shader.h
index f196c4f..907ca17 100644
--- a/src/gallium/drivers/nouveau/nv30/nvfx_shader.h
+++ b/src/gallium/drivers/nouveau/nv30/nvfx_shader.h
@@ -156,21 +156,20 @@
  *
  * Conditional execution
  *   TODO
  *
  * Non-native instructions:
  *   LIT
  *   LRP - MAD+MAD
  *   SUB - ADD, negate second source
  *   RSQ - LG2 + EX2
  *   POW - LG2 + MUL + EX2
- *   SCS - COS + SIN
  *
  * NV40 Looping
  *   Loops appear to be fairly expensive on NV40 at least, the proprietary
  *   driver goes to a lot of effort to avoid using the native looping
  *   instructions.  If the total number of *executed* instructions between
  *   REP/ENDREP or LOOP/ENDLOOP is <=500, the driver will unroll the loop.
  *   The maximum loop count is 255.
  *
  */
 
diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
index fa6c0b9..a21a714 100644
--- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c
+++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
@@ -75,21 +75,20 @@ static unsigned translate_opcode(unsigned opcode)
         case TGSI_OPCODE_TXP: return RC_OPCODE_TXP;
      /* case TGSI_OPCODE_UP2H: return RC_OPCODE_UP2H; */
      /* case TGSI_OPCODE_UP2US: return RC_OPCODE_UP2US; */
      /* case TGSI_OPCODE_UP4B: return RC_OPCODE_UP4B; */
      /* case TGSI_OPCODE_UP4UB: return RC_OPCODE_UP4UB; */
         case TGSI_OPCODE_ARR: return RC_OPCODE_ARR;
      /* case TGSI_OPCODE_CAL: return RC_OPCODE_CAL; */
      /* case TGSI_OPCODE_RET: return RC_OPCODE_RET; */
         case TGSI_OPCODE_SSG: return RC_OPCODE_SSG;
         case TGSI_OPCODE_CMP: return RC_OPCODE_CMP;
-        case TGSI_OPCODE_SCS: return RC_OPCODE_SCS;
         case TGSI_OPCODE_TXB: return RC_OPCODE_TXB;
      /* case TGSI_OPCODE_DIV: return RC_OPCODE_DIV; */
         case TGSI_OPCODE_DP2: return RC_OPCODE_DP2;
         case TGSI_OPCODE_TXL: return RC_OPCODE_TXL;
         case TGSI_OPCODE_BRK: return RC_OPCODE_BRK;
         case TGSI_OPCODE_IF: return RC_OPCODE_IF;
         case TGSI_OPCODE_BGNLOOP: return RC_OPCODE_BGNLOOP;
         case TGSI_OPCODE_ELSE: return RC_OPCODE_ELSE;
         case TGSI_OPCODE_ENDIF: return RC_OPCODE_ENDIF;
         case TGSI_OPCODE_ENDLOOP: return RC_OPCODE_ENDLOOP;
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 18d4bc4..45d256c 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -4595,141 +4595,20 @@ static int tgsi_trig(struct r600_shader_ctx *ctx)
 		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
 		if (i == lasti)
 			alu.last = 1;
 		r = r600_bytecode_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
 	}
 	return 0;
 }
 
-static int tgsi_scs(struct r600_shader_ctx *ctx)
-{
-	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
-	struct r600_bytecode_alu alu;
-	int i, r;
-
-	/* We'll only need the trig stuff if we are going to write to the
-	 * X or Y components of the destination vector.
-	 */
-	if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) {
-		r = tgsi_setup_trig(ctx);
-		if (r)
-			return r;
-	}
-
-	/* dst.x = COS */
-	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
-		if (ctx->bc->chip_class == CAYMAN) {
-			for (i = 0 ; i < 3; i++) {
-				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
-				alu.op = ALU_OP1_COS;
-				tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
-
-				if (i == 0)
-					alu.dst.write = 1;
-				else
-					alu.dst.write = 0;
-				alu.src[0].sel = ctx->temp_reg;
-				alu.src[0].chan = 0;
-				if (i == 2)
-					alu.last = 1;
-				r = r600_bytecode_add_alu(ctx->bc, &alu);
-				if (r)
-					return r;
-			}
-		} else {
-			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
-			alu.op = ALU_OP1_COS;
-			tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
-
-			alu.src[0].sel = ctx->temp_reg;
-			alu.src[0].chan = 0;
-			alu.last = 1;
-			r = r600_bytecode_add_alu(ctx->bc, &alu);
-			if (r)
-				return r;
-		}
-	}
-
-	/* dst.y = SIN */
-	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
-		if (ctx->bc->chip_class == CAYMAN) {
-			for (i = 0 ; i < 3; i++) {
-				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
-				alu.op = ALU_OP1_SIN;
-				tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
-				if (i == 1)
-					alu.dst.write = 1;
-				else
-					alu.dst.write = 0;
-				alu.src[0].sel = ctx->temp_reg;
-				alu.src[0].chan = 0;
-				if (i == 2)
-					alu.last = 1;
-				r = r600_bytecode_add_alu(ctx->bc, &alu);
-				if (r)
-					return r;
-			}
-		} else {
-			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
-			alu.op = ALU_OP1_SIN;
-			tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
-
-			alu.src[0].sel = ctx->temp_reg;
-			alu.src[0].chan = 0;
-			alu.last = 1;
-			r = r600_bytecode_add_alu(ctx->bc, &alu);
-			if (r)
-				return r;
-		}
-	}
-
-	/* dst.z = 0.0; */
-	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
-		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
-
-		alu.op = ALU_OP1_MOV;
-
-		tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
-
-		alu.src[0].sel = V_SQ_ALU_SRC_0;
-		alu.src[0].chan = 0;
-
-		alu.last = 1;
-
-		r = r600_bytecode_add_alu(ctx->bc, &alu);
-		if (r)
-			return r;
-	}
-
-	/* dst.w = 1.0; */
-	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
-		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
-
-		alu.op = ALU_OP1_MOV;
-
-		tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
-
-		alu.src[0].sel = V_SQ_ALU_SRC_1;
-		alu.src[0].chan = 0;
-
-		alu.last = 1;
-
-		r = r600_bytecode_add_alu(ctx->bc, &alu);
-		if (r)
-			return r;
-	}
-
-	return 0;
-}
-
 static int tgsi_kill(struct r600_shader_ctx *ctx)
 {
 	const struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
 	struct r600_bytecode_alu alu;
 	int i, r;
 
 	for (i = 0; i < 4; i++) {
 		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 		alu.op = ctx->inst_info->op;
 
@@ -9049,21 +8928,21 @@ static const struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[]
 	[TGSI_OPCODE_UP4B]	= { ALU_OP0_NOP, tgsi_unsupported},
 	[TGSI_OPCODE_UP4UB]	= { ALU_OP0_NOP, tgsi_unsupported},
 	[59]			= { ALU_OP0_NOP, tgsi_unsupported},
 	[60]			= { ALU_OP0_NOP, tgsi_unsupported},
 	[TGSI_OPCODE_ARR]	= { ALU_OP0_NOP, tgsi_r600_arl},
 	[62]			= { ALU_OP0_NOP, tgsi_unsupported},
 	[TGSI_OPCODE_CAL]	= { ALU_OP0_NOP, tgsi_unsupported},
 	[TGSI_OPCODE_RET]	= { ALU_OP0_NOP, tgsi_unsupported},
 	[TGSI_OPCODE_SSG]	= { ALU_OP0_NOP, tgsi_ssg},
 	[TGSI_OPCODE_CMP]	= { ALU_OP0_NOP, tgsi_cmp},
-	[TGSI_OPCODE_SCS]	= { ALU_OP0_NOP, tgsi_scs},
+	[67]			= { ALU_OP0_NOP, tgsi_unsupported},
 	[TGSI_OPCODE_TXB]	= { FETCH_OP_SAMPLE_LB, tgsi_tex},
 	[69]			= { ALU_OP0_NOP, tgsi_unsupported},
 	[TGSI_OPCODE_DIV]	= { ALU_OP0_NOP, tgsi_unsupported},
 	[TGSI_OPCODE_DP2]	= { ALU_OP2_DOT4_IEEE, tgsi_dp},
 	[TGSI_OPCODE_TXL]	= { FETCH_OP_SAMPLE_L, tgsi_tex},
 	[TGSI_OPCODE_BRK]	= { CF_OP_LOOP_BREAK, tgsi_loop_brk_cont},
 	[TGSI_OPCODE_IF]	= { ALU_OP0_NOP, tgsi_if},
 	[TGSI_OPCODE_UIF]	= { ALU_OP0_NOP, tgsi_uif},
 	[76]			= { ALU_OP0_NOP, tgsi_unsupported},
 	[TGSI_OPCODE_ELSE]	= { ALU_OP0_NOP, tgsi_else},
@@ -9247,21 +9126,21 @@ static const struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] =
 	[TGSI_OPCODE_UP4B]	= { ALU_OP0_NOP, tgsi_unsupported},
 	[TGSI_OPCODE_UP4UB]	= { ALU_OP0_NOP, tgsi_unsupported},
 	[59]			= { ALU_OP0_NOP, tgsi_unsupported},
 	[60]			= { ALU_OP0_NOP, tgsi_unsupported},
 	[TGSI_OPCODE_ARR]	= { ALU_OP0_NOP, tgsi_eg_arl},
 	[62]			= { ALU_OP0_NOP, tgsi_unsupported},
 	[TGSI_OPCODE_CAL]	= { ALU_OP0_NOP, tgsi_unsupported},
 	[TGSI_OPCODE_RET]	= { ALU_OP0_NOP, tgsi_unsupported},
 	[TGSI_OPCODE_SSG]	= { ALU_OP0_NOP, tgsi_ssg},
 	[TGSI_OPCODE_CMP]	= { ALU_OP0_NOP, tgsi_cmp},
-	[TGSI_OPCODE_SCS]	= { ALU_OP0_NOP, tgsi_scs},
+	[67]			= { ALU_OP0_NOP, tgsi_unsupported},
 	[TGSI_OPCODE_TXB]	= { FETCH_OP_SAMPLE_LB, tgsi_tex},
 	[69]			= { ALU_OP0_NOP, tgsi_unsupported},
 	[TGSI_OPCODE_DIV]	= { ALU_OP0_NOP, tgsi_unsupported},
 	[TGSI_OPCODE_DP2]	= { ALU_OP2_DOT4_IEEE, tgsi_dp},
 	[TGSI_OPCODE_TXL]	= { FETCH_OP_SAMPLE_L, tgsi_tex},
 	[TGSI_OPCODE_BRK]	= { CF_OP_LOOP_BREAK, tgsi_loop_brk_cont},
 	[TGSI_OPCODE_IF]	= { ALU_OP0_NOP, tgsi_if},
 	[TGSI_OPCODE_UIF]	= { ALU_OP0_NOP, tgsi_uif},
 	[76]			= { ALU_OP0_NOP, tgsi_unsupported},
 	[TGSI_OPCODE_ELSE]	= { ALU_OP0_NOP, tgsi_else},
@@ -9470,21 +9349,21 @@ static const struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] =
 	[TGSI_OPCODE_UP4B]	= { ALU_OP0_NOP, tgsi_unsupported},
 	[TGSI_OPCODE_UP4UB]	= { ALU_OP0_NOP, tgsi_unsupported},
 	[59]			= { ALU_OP0_NOP, tgsi_unsupported},
 	[60]			= { ALU_OP0_NOP, tgsi_unsupported},
 	[TGSI_OPCODE_ARR]	= { ALU_OP0_NOP, tgsi_eg_arl},
 	[62]			= { ALU_OP0_NOP, tgsi_unsupported},
 	[TGSI_OPCODE_CAL]	= { ALU_OP0_NOP, tgsi_unsupported},
 	[TGSI_OPCODE_RET]	= { ALU_OP0_NOP, tgsi_unsupported},
 	[TGSI_OPCODE_SSG]	= { ALU_OP0_NOP, tgsi_ssg},
 	[TGSI_OPCODE_CMP]	= { ALU_OP0_NOP, tgsi_cmp},
-	[TGSI_OPCODE_SCS]	= { ALU_OP0_NOP, tgsi_scs},
+	[67]			= { ALU_OP0_NOP, tgsi_unsupported},
 	[TGSI_OPCODE_TXB]	= { FETCH_OP_SAMPLE_LB, tgsi_tex},
 	[69]			= { ALU_OP0_NOP, tgsi_unsupported},
 	[TGSI_OPCODE_DIV]	= { ALU_OP0_NOP, tgsi_unsupported},
 	[TGSI_OPCODE_DP2]	= { ALU_OP2_DOT4_IEEE, tgsi_dp},
 	[TGSI_OPCODE_TXL]	= { FETCH_OP_SAMPLE_L, tgsi_tex},
 	[TGSI_OPCODE_BRK]	= { CF_OP_LOOP_BREAK, tgsi_loop_brk_cont},
 	[TGSI_OPCODE_IF]	= { ALU_OP0_NOP, tgsi_if},
 	[TGSI_OPCODE_UIF]	= { ALU_OP0_NOP, tgsi_uif},
 	[76]			= { ALU_OP0_NOP, tgsi_unsupported},
 	[TGSI_OPCODE_ELSE]	= { ALU_OP0_NOP, tgsi_else},
diff --git a/src/gallium/drivers/svga/svga_tgsi_insn.c b/src/gallium/drivers/svga/svga_tgsi_insn.c
index a325a56..be99ad5 100644
--- a/src/gallium/drivers/svga/svga_tgsi_insn.c
+++ b/src/gallium/drivers/svga/svga_tgsi_insn.c
@@ -1243,43 +1243,20 @@ static boolean
 do_emit_sincos(struct svga_shader_emitter *emit,
                SVGA3dShaderDestToken dst,
                struct src_register src0)
 {
    src0 = scalar(src0, TGSI_SWIZZLE_X);
    return submit_op1(emit, inst_token(SVGA3DOP_SINCOS), dst, src0);
 }
 
 
 /**
- * Translate/emit a TGSI SIN, COS or CSC instruction.
- */
-static boolean
-emit_sincos(struct svga_shader_emitter *emit,
-            const struct tgsi_full_instruction *insn)
-{
-   SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
-   struct src_register src0 = translate_src_register(emit, &insn->Src[0]);
-   SVGA3dShaderDestToken temp = get_temp( emit );
-
-   /* SCS TMP SRC */
-   if (!do_emit_sincos(emit, writemask(temp, TGSI_WRITEMASK_XY), src0 ))
-      return FALSE;
-
-   /* MOV DST TMP */
-   if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src( temp ) ))
-      return FALSE;
-
-   return TRUE;
-}
-
-
-/**
  * Translate TGSI SIN instruction into:
  * SCS TMP SRC
  * MOV DST TMP.yyyy
  */
 static boolean
 emit_sin(struct svga_shader_emitter *emit,
          const struct tgsi_full_instruction *insn )
 {
    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
    struct src_register src0 =
@@ -2833,23 +2810,20 @@ svga_emit_instruction(struct svga_shader_emitter *emit,
 
    case TGSI_OPCODE_DP2:
       return emit_dp2( emit, insn );
 
    case TGSI_OPCODE_COS:
       return emit_cos( emit, insn );
 
    case TGSI_OPCODE_SIN:
       return emit_sin( emit, insn );
 
-   case TGSI_OPCODE_SCS:
-      return emit_sincos( emit, insn );
-
    case TGSI_OPCODE_END:
       /* TGSI always finishes the main func with an END */
       return emit_end( emit );
 
    case TGSI_OPCODE_KILL_IF:
       return emit_kill_if( emit, insn );
 
       /* Selection opcodes.  The underlying language is fairly
        * non-orthogonal about these.
        */
diff --git a/src/gallium/drivers/svga/svga_tgsi_vgpu10.c b/src/gallium/drivers/svga/svga_tgsi_vgpu10.c
index 9d86f72..ac312b1 100644
--- a/src/gallium/drivers/svga/svga_tgsi_vgpu10.c
+++ b/src/gallium/drivers/svga/svga_tgsi_vgpu10.c
@@ -4274,56 +4274,20 @@ emit_rsq(struct svga_shader_emitter_v10 *emit,
                         &tmp_src_xxxx, inst->Instruction.Saturate);
 
    /* free tmp */
    free_temp_indexes(emit);
 
    return TRUE;
 }
 
 
 /**
- * Emit code for TGSI_OPCODE_SCS instruction.
- */
-static boolean
-emit_scs(struct svga_shader_emitter_v10 *emit,
-         const struct tgsi_full_instruction *inst)
-{
-   /* dst.x = cos(src.x)
-    * dst.y = sin(src.x)
-    * dst.z = 0.0
-    * dst.w = 1.0
-    */
-   struct tgsi_full_dst_register dst_x =
-      writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_X);
-   struct tgsi_full_dst_register dst_y =
-      writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Y);
-   struct tgsi_full_dst_register dst_zw =
-      writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_ZW);
-
-   struct tgsi_full_src_register zero_one =
-      make_immediate_reg_float4(emit, 0.0f, 0.0f, 0.0f, 1.0f);
-
-   begin_emit_instruction(emit);
-   emit_opcode(emit, VGPU10_OPCODE_SINCOS, inst->Instruction.Saturate);
-   emit_dst_register(emit, &dst_y);
-   emit_dst_register(emit, &dst_x);
-   emit_src_register(emit, &inst->Src[0]);
-   end_emit_instruction(emit);
-
-   emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
-                        &dst_zw, &zero_one, inst->Instruction.Saturate);
-
-   return TRUE;
-}
-
-
-/**
  * Emit code for TGSI_OPCODE_SEQ (Set Equal) instruction.
  */
 static boolean
 emit_seq(struct svga_shader_emitter_v10 *emit,
          const struct tgsi_full_instruction *inst)
 {
    /* dst = SEQ(s0, s1):
     *   dst = s0 == s1 ? 1.0 : 0.0  (per component)
     * Translates into:
     *   EQ tmp, s0, s1;           tmp = s0 == s1 : 0xffffffff : 0 (per comp)
@@ -5588,22 +5552,20 @@ emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit,
    case TGSI_OPCODE_LRP:
       return emit_lrp(emit, inst);
    case TGSI_OPCODE_POW:
       return emit_pow(emit, inst);
    case TGSI_OPCODE_RCP:
       return emit_rcp(emit, inst);
    case TGSI_OPCODE_RSQ:
       return emit_rsq(emit, inst);
    case TGSI_OPCODE_SAMPLE:
       return emit_sample(emit, inst);
-   case TGSI_OPCODE_SCS:
-      return emit_scs(emit, inst);
    case TGSI_OPCODE_SEQ:
       return emit_seq(emit, inst);
    case TGSI_OPCODE_SGE:
       return emit_sge(emit, inst);
    case TGSI_OPCODE_SGT:
       return emit_sgt(emit, inst);
    case TGSI_OPCODE_SIN:
       return emit_sincos(emit, inst);
    case TGSI_OPCODE_SLE:
       return emit_sle(emit, inst);
diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h
index c8f31be..ae36a1e 100644
--- a/src/gallium/include/pipe/p_shader_tokens.h
+++ b/src/gallium/include/pipe/p_shader_tokens.h
@@ -397,21 +397,21 @@ struct tgsi_property_data {
 #define TGSI_OPCODE_UP4B                57
 #define TGSI_OPCODE_UP4UB               58
 #define TGSI_OPCODE_U642F               59
 #define TGSI_OPCODE_I642F               60
 #define TGSI_OPCODE_ARR                 61
 #define TGSI_OPCODE_I642D               62
 #define TGSI_OPCODE_CAL                 63
 #define TGSI_OPCODE_RET                 64
 #define TGSI_OPCODE_SSG                 65 /* SGN */
 #define TGSI_OPCODE_CMP                 66
-#define TGSI_OPCODE_SCS                 67
+/* gap */
 #define TGSI_OPCODE_TXB                 68
 #define TGSI_OPCODE_FBFETCH             69
 #define TGSI_OPCODE_DIV                 70
 #define TGSI_OPCODE_DP2                 71
 #define TGSI_OPCODE_TXL                 72
 #define TGSI_OPCODE_BRK                 73
 #define TGSI_OPCODE_IF                  74
 #define TGSI_OPCODE_UIF                 75
 #define TGSI_OPCODE_READ_INVOC          76
 #define TGSI_OPCODE_ELSE                77
diff --git a/src/gallium/state_trackers/nine/nine_shader.c b/src/gallium/state_trackers/nine/nine_shader.c
index 60d56af..d36d497 100644
--- a/src/gallium/state_trackers/nine/nine_shader.c
+++ b/src/gallium/state_trackers/nine/nine_shader.c
@@ -1782,22 +1782,25 @@ DECL_SPECIAL(LABEL)
     return D3D_OK;
 }
 
 DECL_SPECIAL(SINCOS)
 {
     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
 
     assert(!(dst.WriteMask & 0xc));
 
-    dst.WriteMask &= TGSI_WRITEMASK_XY; /* z undefined, w untouched */
-    ureg_SCS(tx->ureg, dst, src);
+    /* z undefined, w untouched */
+    ureg_COS(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X),
+             ureg_scalar(src, TGSI_SWIZZLE_X));
+    ureg_SIN(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y),
+             ureg_scalar(src, TGSI_SWIZZLE_X));
     return D3D_OK;
 }
 
 DECL_SPECIAL(SGN)
 {
     ureg_SSG(tx->ureg,
              tx_dst_param(tx, &tx->insn.dst[0]),
              tx_src_param(tx, &tx->insn.src[0]));
     return D3D_OK;
 }
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index f4f3092..c44805e 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -6072,25 +6072,20 @@ compile_tgsi_instruction(struct st_translate *t,
       }
       dst[0] = ureg_writemask(dst[0], inst->dst[0].writemask);
       if (inst->resource.reladdr)
          dst[0] = ureg_dst_indirect(dst[0], ureg_src(t->address[2]));
       assert(dst[0].File != TGSI_FILE_NULL);
       ureg_memory_insn(ureg, inst->op, dst, num_dst, src, num_src,
                        inst->buffer_access,
                        tex_target, inst->image_format);
       break;
 
-   case TGSI_OPCODE_SCS:
-      dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY);
-      ureg_insn(ureg, inst->op, dst, num_dst, src, num_src, inst->precise);
-      break;
-
    default:
       ureg_insn(ureg,
                 inst->op,
                 dst, num_dst,
                 src, num_src, inst->precise);
       break;
    }
 }
 
 /**
diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c
index d2ec23e..916f7fa 100644
--- a/src/mesa/state_tracker/st_mesa_to_tgsi.c
+++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c
@@ -484,22 +484,20 @@ translate_opcode( unsigned op )
    case OPCODE_MIN:
       return TGSI_OPCODE_MIN;
    case OPCODE_MOV:
       return TGSI_OPCODE_MOV;
    case OPCODE_MUL:
       return TGSI_OPCODE_MUL;
    case OPCODE_POW:
       return TGSI_OPCODE_POW;
    case OPCODE_RCP:
       return TGSI_OPCODE_RCP;
-   case OPCODE_SCS:
-      return TGSI_OPCODE_SCS;
    case OPCODE_SGE:
       return TGSI_OPCODE_SGE;
    case OPCODE_SIN:
       return TGSI_OPCODE_SIN;
    case OPCODE_SLT:
       return TGSI_OPCODE_SLT;
    case OPCODE_TEX:
       return TGSI_OPCODE_TEX;
    case OPCODE_TXB:
       return TGSI_OPCODE_TXB;
@@ -551,25 +549,24 @@ compile_instruction(
                      translate_opcode( inst->Opcode ),
                      dst, num_dst, 
                      st_translate_texture_target( inst->TexSrcTarget,
                                                inst->TexShadow ),
                      TGSI_RETURN_TYPE_FLOAT,
                      NULL, 0,
                      src, num_src );
       return;
 
    case OPCODE_SCS:
-      dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY );
-      ureg_insn( ureg, 
-                 translate_opcode( inst->Opcode ), 
-                 dst, num_dst, 
-                 src, num_src, 0 );
+      ureg_COS(ureg, ureg_writemask(dst[0], TGSI_WRITEMASK_X),
+               ureg_scalar(src[0], TGSI_SWIZZLE_X));
+      ureg_SIN(ureg, ureg_writemask(dst[0], TGSI_WRITEMASK_Y),
+               ureg_scalar(src[0], TGSI_SWIZZLE_X));
       break;
 
    case OPCODE_XPD:
       ureg_MUL(ureg, ureg_writemask(dst[0], TGSI_WRITEMASK_XYZ),
                ureg_swizzle(src[0], TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z,
                             TGSI_SWIZZLE_X, 0),
                ureg_swizzle(src[1], TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X,
                             TGSI_SWIZZLE_Y, 0));
       ureg_MAD(ureg, ureg_writemask(dst[0], TGSI_WRITEMASK_XYZ),
                ureg_swizzle(src[0], TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X,
-- 
2.7.4