[Mesa-dev] [PATCH 6/8] gallium: remove TGSI opcode XPD

Marek Olšák maraeo at gmail.com
Sun Aug 20 00:49:12 UTC 2017


From: Marek Olšák <marek.olsak at amd.com>

use MUL+MAD+MOV instead.
---
 src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c |  56 ----------
 src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c    |   3 -
 src/gallium/auxiliary/nir/tgsi_to_nir.c            |  20 ----
 src/gallium/auxiliary/tgsi/tgsi_exec.c             |  49 ---------
 src/gallium/auxiliary/tgsi/tgsi_info.c             |   2 +-
 src/gallium/auxiliary/tgsi/tgsi_lowering.c         |  69 -------------
 src/gallium/auxiliary/tgsi/tgsi_lowering.h         |   1 -
 src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h       |   1 -
 src/gallium/docs/source/tgsi.rst                   |  14 +--
 src/gallium/drivers/etnaviv/etnaviv_compiler.c     |   1 -
 src/gallium/drivers/i915/i915_fpc_optimize.c       |   1 -
 src/gallium/drivers/i915/i915_fpc_translate.c      |  26 -----
 .../drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp  |  27 -----
 src/gallium/drivers/nouveau/nv30/nv30_vertprog.h   |   1 -
 src/gallium/drivers/nouveau/nv30/nvfx_fragprog.c   |   5 -
 src/gallium/drivers/nouveau/nv30/nvfx_shader.h     |   1 -
 src/gallium/drivers/nouveau/nv30/nvfx_vertprog.c   |   5 -
 src/gallium/drivers/r300/r300_tgsi_to_rc.c         |   1 -
 src/gallium/drivers/r600/r600_shader.c             |  78 +-------------
 src/gallium/drivers/svga/svga_tgsi_insn.c          |  61 -----------
 src/gallium/drivers/svga/svga_tgsi_vgpu10.c        | 113 ---------------------
 src/gallium/include/pipe/p_shader_tokens.h         |   2 +-
 src/gallium/state_trackers/nine/nine_shader.c      |  25 ++++-
 src/mesa/state_tracker/st_mesa_to_tgsi.c           |  18 ++--
 24 files changed, 41 insertions(+), 539 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
index f6baca0..52c9a86 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
@@ -851,75 +851,20 @@ static void fmin_emit(
    struct lp_build_tgsi_context * bld_base,
    struct lp_build_emit_data * emit_data)
 {
    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
    emit_data->output[emit_data->chan] = LLVMBuildSelect(builder,
                                    LLVMBuildFCmp(builder, LLVMRealUGE,
                                    emit_data->args[0], emit_data->args[1], ""),
                                    emit_data->args[1], emit_data->args[0], "");
 }
 
-/* TGSI_OPCODE_XPD */
-
-static void
-xpd_fetch_args(
-   struct lp_build_tgsi_context * bld_base,
-   struct lp_build_emit_data * emit_data)
-{
-   dp_fetch_args(bld_base, emit_data, 3);
-}
-
-/**
- * (a * b) - (c * d)
- */
-static LLVMValueRef
-xpd_helper(
-  struct lp_build_tgsi_context * bld_base,
-  LLVMValueRef a,
-  LLVMValueRef b,
-  LLVMValueRef c,
-  LLVMValueRef d)
-{
-   LLVMValueRef tmp0, tmp1;
-
-   tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL, a, b);
-   tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL, c, d);
-
-   return lp_build_sub(&bld_base->base, tmp0, tmp1);
-}
-
-static void
-xpd_emit(
-   const struct lp_build_tgsi_action * action,
-   struct lp_build_tgsi_context * bld_base,
-   struct lp_build_emit_data * emit_data)
-{
-   emit_data->output[TGSI_CHAN_X] = xpd_helper(bld_base,
-              emit_data->args[1] /* src0.y */, emit_data->args[5] /* src1.z */,
-              emit_data->args[4] /* src1.y */, emit_data->args[2] /* src0.z */);
-
-   emit_data->output[TGSI_CHAN_Y] = xpd_helper(bld_base,
-              emit_data->args[2] /* src0.z */, emit_data->args[3] /* src1.x */,
-              emit_data->args[5] /* src1.z */, emit_data->args[0] /* src0.x */);
-
-   emit_data->output[TGSI_CHAN_Z] = xpd_helper(bld_base,
-              emit_data->args[0] /* src0.x */, emit_data->args[4] /* src1.y */,
-              emit_data->args[3] /* src1.x */, emit_data->args[1] /* src0.y */);
-
-   emit_data->output[TGSI_CHAN_W] = bld_base->base.one;
-}
-
-const struct lp_build_tgsi_action xpd_action = {
-   xpd_fetch_args,	 /* fetch_args */
-   xpd_emit	 /* emit */
-};
-
 /* TGSI_OPCODE_D2F */
 static void
 d2f_emit(
    const struct lp_build_tgsi_action * action,
    struct lp_build_tgsi_context * bld_base,
    struct lp_build_emit_data * emit_data)
 {
    emit_data->output[emit_data->chan] =
       LLVMBuildFPTrunc(bld_base->base.gallivm->builder,
                       emit_data->args[0],
@@ -1245,21 +1190,20 @@ lp_set_default_actions(struct lp_build_tgsi_context * bld_base)
    bld_base->op_actions[TGSI_OPCODE_DST] = dst_action;
    bld_base->op_actions[TGSI_OPCODE_EXP] = exp_action;
    bld_base->op_actions[TGSI_OPCODE_LIT] = lit_action;
    bld_base->op_actions[TGSI_OPCODE_LOG] = log_action;
    bld_base->op_actions[TGSI_OPCODE_PK2H] = pk2h_action;
    bld_base->op_actions[TGSI_OPCODE_RSQ] = rsq_action;
    bld_base->op_actions[TGSI_OPCODE_SQRT] = sqrt_action;
    bld_base->op_actions[TGSI_OPCODE_POW] = pow_action;
    bld_base->op_actions[TGSI_OPCODE_SCS] = scs_action;
    bld_base->op_actions[TGSI_OPCODE_UP2H] = up2h_action;
-   bld_base->op_actions[TGSI_OPCODE_XPD] = xpd_action;
 
    bld_base->op_actions[TGSI_OPCODE_BREAKC].fetch_args = scalar_unary_fetch_args;
    bld_base->op_actions[TGSI_OPCODE_SWITCH].fetch_args = scalar_unary_fetch_args;
    bld_base->op_actions[TGSI_OPCODE_CASE].fetch_args = scalar_unary_fetch_args;
    bld_base->op_actions[TGSI_OPCODE_COS].fetch_args = scalar_unary_fetch_args;
    bld_base->op_actions[TGSI_OPCODE_EX2].fetch_args = scalar_unary_fetch_args;
    bld_base->op_actions[TGSI_OPCODE_IF].fetch_args = scalar_unary_fetch_args;
    bld_base->op_actions[TGSI_OPCODE_UIF].fetch_args = scalar_unary_fetch_args;
    bld_base->op_actions[TGSI_OPCODE_KILL_IF].fetch_args = kil_fetch_args;
    bld_base->op_actions[TGSI_OPCODE_KILL].fetch_args = kilp_fetch_args;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
index 46abda0..567ed68 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
@@ -583,23 +583,20 @@ lp_emit_instruction_aos(
       break;
 
    case TGSI_OPCODE_POW:
       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
       src0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
       src1 = swizzle_scalar_aos(bld, src1, TGSI_SWIZZLE_X);
       dst0 = lp_build_pow(&bld->bld_base.base, src0, src1);
       break;
 
-   case TGSI_OPCODE_XPD:
-      return FALSE;
-
    case TGSI_OPCODE_COS:
       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
       tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
       dst0 = lp_build_cos(&bld->bld_base.base, tmp0);
       break;
 
    case TGSI_OPCODE_DDX:
       return FALSE;
 
    case TGSI_OPCODE_DDY:
diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c
index ea1f064..55deb29 100644
--- a/src/gallium/auxiliary/nir/tgsi_to_nir.c
+++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c
@@ -979,35 +979,20 @@ ttn_sle(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
    ttn_move_dest(b, dest, nir_sge(b, src[1], src[0]));
 }
 
 static void
 ttn_sgt(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
 {
    ttn_move_dest(b, dest, nir_slt(b, src[1], src[0]));
 }
 
 static void
-ttn_xpd(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
-{
-   ttn_move_dest_masked(b, dest,
-                        nir_fsub(b,
-                                 nir_fmul(b,
-                                          ttn_swizzle(b, src[0], Y, Z, X, X),
-                                          ttn_swizzle(b, src[1], Z, X, Y, X)),
-                                 nir_fmul(b,
-                                          ttn_swizzle(b, src[1], Y, Z, X, X),
-                                          ttn_swizzle(b, src[0], Z, X, Y, X))),
-                        TGSI_WRITEMASK_XYZ);
-   ttn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), TGSI_WRITEMASK_W);
-}
-
-static void
 ttn_dp2(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
 {
    ttn_move_dest(b, dest, nir_fdot2(b, src[0], src[1]));
 }
 
 static void
 ttn_dp3(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
 {
    ttn_move_dest(b, dest, nir_fdot3(b, src[0], src[1]));
 }
@@ -1519,21 +1504,20 @@ static const nir_op op_trans[TGSI_OPCODE_LAST] = {
    [TGSI_OPCODE_SGE] = nir_op_sge,
    [TGSI_OPCODE_MAD] = nir_op_ffma,
    [TGSI_OPCODE_LRP] = 0,
    [TGSI_OPCODE_SQRT] = nir_op_fsqrt,
    [TGSI_OPCODE_FRC] = nir_op_ffract,
    [TGSI_OPCODE_FLR] = nir_op_ffloor,
    [TGSI_OPCODE_ROUND] = nir_op_fround_even,
    [TGSI_OPCODE_EX2] = nir_op_fexp2,
    [TGSI_OPCODE_LG2] = nir_op_flog2,
    [TGSI_OPCODE_POW] = nir_op_fpow,
-   [TGSI_OPCODE_XPD] = 0,
    [TGSI_OPCODE_COS] = nir_op_fcos,
    [TGSI_OPCODE_DDX] = nir_op_fddx,
    [TGSI_OPCODE_DDY] = nir_op_fddy,
    [TGSI_OPCODE_KILL] = 0,
    [TGSI_OPCODE_PK2H] = 0, /* XXX */
    [TGSI_OPCODE_PK2US] = 0, /* XXX */
    [TGSI_OPCODE_PK4B] = 0, /* XXX */
    [TGSI_OPCODE_PK4UB] = 0, /* XXX */
    [TGSI_OPCODE_SEQ] = nir_op_seq,
    [TGSI_OPCODE_SGT] = 0,
@@ -1732,24 +1716,20 @@ ttn_emit_instruction(struct ttn_compile *c)
       break;
 
    case TGSI_OPCODE_DST:
       ttn_dst(b, op_trans[tgsi_op], dest, src);
       break;
 
    case TGSI_OPCODE_LIT:
       ttn_lit(b, op_trans[tgsi_op], dest, src);
       break;
 
-   case TGSI_OPCODE_XPD:
-      ttn_xpd(b, op_trans[tgsi_op], dest, src);
-      break;
-
    case TGSI_OPCODE_DP2:
       ttn_dp2(b, op_trans[tgsi_op], dest, src);
       break;
 
    case TGSI_OPCODE_DP3:
       ttn_dp3(b, op_trans[tgsi_op], dest, src);
       break;
 
    case TGSI_OPCODE_DP4:
       ttn_dp4(b, op_trans[tgsi_op], dest, src);
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index c9265fb..bce158b 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -3306,65 +3306,20 @@ exec_scs(struct tgsi_exec_machine *mach,
    }
    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
       store_dest(mach, &ZeroVec, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
    }
    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
       store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
    }
 }
 
 static void
-exec_xpd(struct tgsi_exec_machine *mach,
-         const struct tgsi_full_instruction *inst)
-{
-   union tgsi_exec_channel r[6];
-   union tgsi_exec_channel d[3];
-
-   fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
-   fetch_source(mach, &r[1], &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
-
-   micro_mul(&r[2], &r[0], &r[1]);
-
-   fetch_source(mach, &r[3], &inst->Src[0], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
-   fetch_source(mach, &r[4], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
-
-   micro_mul(&r[5], &r[3], &r[4] );
-   micro_sub(&d[TGSI_CHAN_X], &r[2], &r[5]);
-
-   fetch_source(mach, &r[2], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
-
-   micro_mul(&r[3], &r[3], &r[2]);
-
-   fetch_source(mach, &r[5], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
-
-   micro_mul(&r[1], &r[1], &r[5]);
-   micro_sub(&d[TGSI_CHAN_Y], &r[3], &r[1]);
-
-   micro_mul(&r[5], &r[5], &r[4]);
-   micro_mul(&r[0], &r[0], &r[2]);
-   micro_sub(&d[TGSI_CHAN_Z], &r[5], &r[0]);
-
-   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
-      store_dest(mach, &d[TGSI_CHAN_X], &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
-   }
-   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
-      store_dest(mach, &d[TGSI_CHAN_Y], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
-   }
-   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
-      store_dest(mach, &d[TGSI_CHAN_Z], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
-   }
-   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
-      store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
-   }
-}
-
-static void
 exec_dst(struct tgsi_exec_machine *mach,
          const struct tgsi_full_instruction *inst)
 {
    union tgsi_exec_channel r[2];
    union tgsi_exec_channel d[4];
 
    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
       fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
       fetch_source(mach, &r[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
       micro_mul(&d[TGSI_CHAN_Y], &r[0], &r[1]);
@@ -5146,24 +5101,20 @@ exec_instruction(
       break;
 
    case TGSI_OPCODE_LG2:
       exec_scalar_unary(mach, inst, micro_lg2, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
       break;
 
    case TGSI_OPCODE_POW:
       exec_scalar_binary(mach, inst, micro_pow, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
       break;
 
-   case TGSI_OPCODE_XPD:
-      exec_xpd(mach, inst);
-      break;
-
    case TGSI_OPCODE_COS:
       exec_scalar_unary(mach, inst, micro_cos, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
       break;
 
    case TGSI_OPCODE_DDX:
       exec_vector_unary(mach, inst, micro_ddx, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
       break;
 
    case TGSI_OPCODE_DDY:
       exec_vector_unary(mach, inst, micro_ddy, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c
index b247794..17f56fd 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_info.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_info.c
@@ -61,21 +61,21 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] =
    { 1, 3, 0, 0, 0, 0, 0, REPL, "", 21 }, /* removed */
    { 1, 1, 0, 0, 0, 0, 0, COMP, "F2U64", TGSI_OPCODE_F2U64 },
    { 1, 1, 0, 0, 0, 0, 0, COMP, "F2I64", TGSI_OPCODE_F2I64 },
    { 1, 1, 0, 0, 0, 0, 0, COMP, "FRC", TGSI_OPCODE_FRC },
    { 1, 2, 1, 0, 0, 0, 0, OTHR, "TXF_LZ", TGSI_OPCODE_TXF_LZ },
    { 1, 1, 0, 0, 0, 0, 0, COMP, "FLR", TGSI_OPCODE_FLR },
    { 1, 1, 0, 0, 0, 0, 0, COMP, "ROUND", TGSI_OPCODE_ROUND },
    { 1, 1, 0, 0, 0, 0, 0, REPL, "EX2", TGSI_OPCODE_EX2 },
    { 1, 1, 0, 0, 0, 0, 0, REPL, "LG2", TGSI_OPCODE_LG2 },
    { 1, 2, 0, 0, 0, 0, 0, REPL, "POW", TGSI_OPCODE_POW },
-   { 1, 2, 0, 0, 0, 0, 0, COMP, "XPD", TGSI_OPCODE_XPD },
+   { 1, 2, 0, 0, 0, 0, 0, COMP, "", 31 }, /* removed */
    { 1, 1, 0, 0, 0, 0, 0, COMP, "U2I64", TGSI_OPCODE_U2I64 },
    { 1, 0, 0, 0, 0, 0, 0, OTHR, "CLOCK", TGSI_OPCODE_CLOCK },
    { 1, 1, 0, 0, 0, 0, 0, COMP, "I2I64", TGSI_OPCODE_I2I64 },
    { 1, 2, 0, 0, 0, 0, 0, REPL, "", 35 }, /* removed */
    { 1, 1, 0, 0, 0, 0, 0, REPL, "COS", TGSI_OPCODE_COS },
    { 1, 1, 0, 0, 0, 0, 0, COMP, "DDX", TGSI_OPCODE_DDX },
    { 1, 1, 0, 0, 0, 0, 0, COMP, "DDY", TGSI_OPCODE_DDY },
    { 0, 0, 0, 0, 0, 0, 0, NONE, "KILL", TGSI_OPCODE_KILL },
    { 1, 1, 0, 0, 0, 0, 0, REPL, "PK2H", TGSI_OPCODE_PK2H },
    { 1, 1, 0, 0, 0, 0, 0, REPL, "PK2US", TGSI_OPCODE_PK2US },
diff --git a/src/gallium/auxiliary/tgsi/tgsi_lowering.c b/src/gallium/auxiliary/tgsi/tgsi_lowering.c
index f3b5ade..fa9d579 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_lowering.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_lowering.c
@@ -251,79 +251,20 @@ transform_dst(struct tgsi_transform_context *tctx,
       new_inst = tgsi_default_full_instruction();
       new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
       new_inst.Instruction.NumDstRegs = 1;
       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_X);
       new_inst.Instruction.NumSrcRegs = 1;
       reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(Y, _, _, _));
       tctx->emit_instruction(tctx, &new_inst);
    }
 }
 
-/* XPD - Cross Product
- *   dst.x = src0.y \times src1.z - src1.y \times src0.z
- *   dst.y = src0.z \times src1.x - src1.z \times src0.x
- *   dst.z = src0.x \times src1.y - src1.x \times src0.y
- *   dst.w = 1.0
- *
- * ; needs: 1 tmp, imm{1.0}
- * MUL tmpA.xyz, src1.yzx, src0.zxy
- * MAD dst.xyz, src0.yzx, src1.zxy, -tmpA.xyz
- * MOV dst.w, imm{1.0}
- */
-#define XPD_GROW (NINST(2) + NINST(3) + NINST(1) - OINST(2))
-#define XPD_TMP  1
-static void
-transform_xpd(struct tgsi_transform_context *tctx,
-              struct tgsi_full_instruction *inst)
-{
-   struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
-   struct tgsi_full_dst_register *dst  = &inst->Dst[0];
-   struct tgsi_full_src_register *src0 = &inst->Src[0];
-   struct tgsi_full_src_register *src1 = &inst->Src[1];
-   struct tgsi_full_instruction new_inst;
-
-   if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZ) {
-      /* MUL tmpA.xyz, src1.yzx, src0.zxy */
-      new_inst = tgsi_default_full_instruction();
-      new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
-      new_inst.Instruction.NumDstRegs = 1;
-      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZ);
-      new_inst.Instruction.NumSrcRegs = 2;
-      reg_src(&new_inst.Src[0], src1, SWIZ(Y, Z, X, _));
-      reg_src(&new_inst.Src[1], src0, SWIZ(Z, X, Y, _));
-      tctx->emit_instruction(tctx, &new_inst);
-
-      /* MAD dst.xyz, src0.yzx, src1.zxy, -tmpA.xyz */
-      new_inst = tgsi_default_full_instruction();
-      new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
-      new_inst.Instruction.NumDstRegs = 1;
-      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZ);
-      new_inst.Instruction.NumSrcRegs = 3;
-      reg_src(&new_inst.Src[0], src0, SWIZ(Y, Z, X, _));
-      reg_src(&new_inst.Src[1], src1, SWIZ(Z, X, Y, _));
-      reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, Y, Z, _));
-      new_inst.Src[2].Register.Negate = true;
-      tctx->emit_instruction(tctx, &new_inst);
-   }
-
-   if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
-      /* MOV dst.w, imm{1.0} */
-      new_inst = tgsi_default_full_instruction();
-      new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
-      new_inst.Instruction.NumDstRegs = 1;
-      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
-      new_inst.Instruction.NumSrcRegs = 1;
-      reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_, _, _, Y));
-      tctx->emit_instruction(tctx, &new_inst);
-   }
-}
-
 /* SCS - Sine Cosine
  *   dst.x = \cos{src.x}
  *   dst.y = \sin{src.x}
  *   dst.z = 0.0
  *   dst.w = 1.0
  *
  * ; needs: 1 tmp, imm{0.0, 1.0}
  * if (dst.x aliases src.x) {
  *   MOV tmpA.x, src.x
  *   src = tmpA
@@ -1459,25 +1400,20 @@ transform_instr(struct tgsi_transform_context *tctx,
     */
    if (ctx->two_side_colors)
       rename_color_inputs(ctx, inst);
 
    switch (inst->Instruction.Opcode) {
    case TGSI_OPCODE_DST:
       if (!ctx->config->lower_DST)
          goto skip;
       transform_dst(tctx, inst);
       break;
-   case TGSI_OPCODE_XPD:
-      if (!ctx->config->lower_XPD)
-         goto skip;
-      transform_xpd(tctx, inst);
-      break;
    case TGSI_OPCODE_SCS:
       if (!ctx->config->lower_SCS)
          goto skip;
       transform_scs(tctx, inst);
       break;
    case TGSI_OPCODE_LRP:
       if (!ctx->config->lower_LRP)
          goto skip;
       transform_lrp(tctx, inst);
       break;
@@ -1592,21 +1528,20 @@ tgsi_transform_lowering(const struct tgsi_lowering_config *config,
          if (info->input_semantic_name[i] == TGSI_SEMANTIC_FACE)
             ctx.face_idx = i;
       }
    }
 
    ctx.saturate = config->saturate_r | config->saturate_s | config->saturate_t;
 
 #define OPCS(x) ((config->lower_ ## x) ? info->opcode_count[TGSI_OPCODE_ ## x] : 0)
    /* if there are no instructions to lower, then we are done: */
    if (!(OPCS(DST) ||
-         OPCS(XPD) ||
          OPCS(SCS) ||
          OPCS(LRP) ||
          OPCS(FRC) ||
          OPCS(POW) ||
          OPCS(LIT) ||
          OPCS(EXP) ||
          OPCS(LOG) ||
          OPCS(DP4) ||
          OPCS(DP3) ||
          OPCS(DP2) ||
@@ -1622,24 +1557,20 @@ tgsi_transform_lowering(const struct tgsi_lowering_config *config,
    _debug_printf("BEFORE:");
    tgsi_dump(tokens, 0);
 #endif
 
    numtmp = 0;
    newlen = tgsi_num_tokens(tokens);
    if (OPCS(DST)) {
       newlen += DST_GROW * OPCS(DST);
       numtmp = MAX2(numtmp, DST_TMP);
    }
-   if (OPCS(XPD)) {
-      newlen += XPD_GROW * OPCS(XPD);
-      numtmp = MAX2(numtmp, XPD_TMP);
-   }
    if (OPCS(SCS)) {
       newlen += SCS_GROW * OPCS(SCS);
       numtmp = MAX2(numtmp, SCS_TMP);
    }
    if (OPCS(LRP)) {
       newlen += LRP_GROW * OPCS(LRP);
       numtmp = MAX2(numtmp, LRP_TMP);
    }
    if (OPCS(FRC)) {
       newlen += FRC_GROW * OPCS(FRC);
diff --git a/src/gallium/auxiliary/tgsi/tgsi_lowering.h b/src/gallium/auxiliary/tgsi/tgsi_lowering.h
index f65d915..709a63a 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_lowering.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_lowering.h
@@ -48,21 +48,20 @@ struct tgsi_lowering_config
     *     same was as linking other outs/ins).
     */
    unsigned color_two_side:1;
 
    /* TODO support for alpha_to_one as well?? */
 
    /* Individual OPC lowerings, if lower_<opc> is TRUE then
     * enable lowering of TGSI_OPCODE_<opc>
     */
    unsigned lower_DST:1;
-   unsigned lower_XPD:1;
    unsigned lower_SCS:1;
    unsigned lower_LRP:1;
    unsigned lower_FRC:1;
    unsigned lower_POW:1;
    unsigned lower_LIT:1;
    unsigned lower_EXP:1;
    unsigned lower_LOG:1;
    unsigned lower_DP4:1;
    unsigned lower_DP3:1;
    unsigned lower_DP2:1;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
index 68b4af8..111edf3 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
@@ -56,21 +56,20 @@ OP13(MAD)
 OP12_TEX(TEX_LZ)
 OP13(LRP)
 OP11(SQRT)
 OP11(FRC)
 OP12_TEX(TXF_LZ)
 OP11(FLR)
 OP11(ROUND)
 OP11(EX2)
 OP11(LG2)
 OP12(POW)
-OP12(XPD)
 OP11(COS)
 OP11(DDX)
 OP11(DDY)
 OP11(DDX_FINE)
 OP11(DDY_FINE)
 OP00(KILL)
 OP11(PK2H)
 OP11(PK2US)
 OP11(PK4B)
 OP11(PK4UB)
diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst
index 54767a7..3441907 100644
--- a/src/gallium/docs/source/tgsi.rst
+++ b/src/gallium/docs/source/tgsi.rst
@@ -343,32 +343,20 @@ This instruction replicates its result.
 
 
 .. opcode:: POW - Power
 
 This instruction replicates its result.
 
 .. math::
 
   dst = src0.x^{src1.x}
 
-.. opcode:: XPD - Cross Product
-
-.. math::
-
-  dst.x = src0.y \times src1.z - src1.y \times src0.z
-
-  dst.y = src0.z \times src1.x - src1.z \times src0.x
-
-  dst.z = src0.x \times src1.y - src1.x \times src0.y
-
-  dst.w = 1
-
 
 .. opcode:: COS - Cosine
 
 This instruction replicates its result.
 
 .. math::
 
   dst = \cos{src.x}
 
 
@@ -3656,21 +3644,21 @@ is known all three should be at least 1. If it is unknown they should all be set
 to 0 or not set.
 
 MUL_ZERO_WINS
 """""""""""""
 
 The MUL TGSI operation (FP32 multiplication) will return 0 if either
 of the operands are equal to 0. That means that 0 * Inf = 0. This
 should be set the same way for an entire pipeline. Note that this
 applies not only to the literal MUL TGSI opcode, but all FP32
 multiplications implied by other operations, such as MAD, FMA, DP2,
-DP3, DP4, DST, LOG, LRP, XPD, and possibly others. If there is a
+DP3, DP4, DST, LOG, LRP, and possibly others. If there is a
 mismatch between shaders, then it is unspecified whether this behavior
 will be enabled.
 
 FS_POST_DEPTH_COVERAGE
 """"""""""""""""""""""
 
 When enabled, the input for TGSI_SEMANTIC_SAMPLEMASK will exclude samples
 that have failed the depth/stencil tests. This is only valid when
 FS_EARLY_DEPTH_STENCIL is also specified.
 
diff --git a/src/gallium/drivers/etnaviv/etnaviv_compiler.c b/src/gallium/drivers/etnaviv/etnaviv_compiler.c
index 3ccb737..4f09f71 100644
--- a/src/gallium/drivers/etnaviv/etnaviv_compiler.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_compiler.c
@@ -2310,21 +2310,20 @@ etna_compile_shader(struct etna_shader_variant *v)
 
    struct tgsi_lowering_config lconfig = {
       .lower_SCS = specs->has_sin_cos_sqrt,
       .lower_FLR = !specs->has_sign_floor_ceil,
       .lower_CEIL = !specs->has_sign_floor_ceil,
       .lower_POW = true,
       .lower_EXP = true,
       .lower_LOG = true,
       .lower_DP2 = true,
       .lower_TRUNC = true,
-      .lower_XPD = true
    };
 
    c = CALLOC_STRUCT(etna_compile);
    if (!c)
       return false;
 
    memset(&c->lbl_usage, -1, sizeof(c->lbl_usage));
 
    const struct tgsi_token *tokens = v->shader->tokens;
 
diff --git a/src/gallium/drivers/i915/i915_fpc_optimize.c b/src/gallium/drivers/i915/i915_fpc_optimize.c
index da06e16..fb97e9e 100644
--- a/src/gallium/drivers/i915/i915_fpc_optimize.c
+++ b/src/gallium/drivers/i915/i915_fpc_optimize.c
@@ -111,21 +111,20 @@ static const struct {
    [ TGSI_OPCODE_SGT     ] = { false,  false,                  0,  1,  2 },
    [ TGSI_OPCODE_SIN     ] = { false,  false,                  0,  1,  1 },
    [ TGSI_OPCODE_SLE     ] = { false,  false,                  0,  1,  2 },
    [ TGSI_OPCODE_SLT     ] = { false,  false,                  0,  1,  2 },
    [ TGSI_OPCODE_SNE     ] = { false,  false,                  0,  1,  2 },
    [ TGSI_OPCODE_SSG     ] = { false,  false,                  0,  1,  1 },
    [ TGSI_OPCODE_TEX     ] = {  true,  false,                  0,  1,  2 },
    [ TGSI_OPCODE_TRUNC   ] = { false,  false,                  0,  1,  1 },
    [ TGSI_OPCODE_TXB     ] = {  true,  false,                  0,  1,  2 },
    [ TGSI_OPCODE_TXP     ] = {  true,  false,                  0,  1,  2 },
-   [ TGSI_OPCODE_XPD     ] = { false,  false,                  0,  1,  2 },
 };
 
 static boolean op_has_dst(unsigned opcode)
 {
    return (op_table[opcode].num_dst > 0);
 }
 
 static int op_num_dst(unsigned opcode)
 {
    return op_table[opcode].num_dst;
diff --git a/src/gallium/drivers/i915/i915_fpc_translate.c b/src/gallium/drivers/i915/i915_fpc_translate.c
index 22a42ee..2faab33 100644
--- a/src/gallium/drivers/i915/i915_fpc_translate.c
+++ b/src/gallium/drivers/i915/i915_fpc_translate.c
@@ -1020,46 +1020,20 @@ i915_translate_instruction(struct i915_fp_compile *p,
       break;
 
    case TGSI_OPCODE_TXB:
       emit_tex(p, inst, T0_TEXLDB, fs);
       break;
 
    case TGSI_OPCODE_TXP:
       emit_tex(p, inst, T0_TEXLDP, fs);
       break;
 
-   case TGSI_OPCODE_XPD:
-      /* Cross product:
-       *      result.x = src0.y * src1.z - src0.z * src1.y;
-       *      result.y = src0.z * src1.x - src0.x * src1.z;
-       *      result.z = src0.x * src1.y - src0.y * src1.x;
-       *      result.w = undef;
-       */
-      src0 = src_vector(p, &inst->Src[0], fs);
-      src1 = src_vector(p, &inst->Src[1], fs);
-      tmp = i915_get_utemp(p);
-
-      i915_emit_arith(p,
-                      A0_MUL,
-                      tmp, A0_DEST_CHANNEL_ALL, 0,
-                      swizzle(src0, Z, X, Y, ONE),
-                      swizzle(src1, Y, Z, X, ONE), 0);
-
-      i915_emit_arith(p,
-                      A0_MAD,
-                      get_result_vector(p, &inst->Dst[0]),
-                      get_result_flags(inst), 0,
-                      swizzle(src0, Y, Z, X, ONE),
-                      swizzle(src1, Z, X, Y, ONE),
-                      negate(tmp, 1, 1, 1, 0));
-      break;
-
    default:
       i915_program_error(p, "bad opcode %d", inst->Instruction.Opcode);
       p->error = 1;
       return;
    }
 
    i915_release_utemps(p);
 }
 
 
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
index c06e74e..a862f98 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
@@ -339,28 +339,20 @@ unsigned int Instruction::srcMask(unsigned int s) const
       case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
          mask |= 0x8;
          break;
       default:
          break;
       }
    }
       return mask;
    case TGSI_OPCODE_TXQ:
       return 1;
-   case TGSI_OPCODE_XPD:
-   {
-      unsigned int x = 0;
-      if (mask & 1) x |= 0x6;
-      if (mask & 2) x |= 0x5;
-      if (mask & 4) x |= 0x3;
-      return x;
-   }
    case TGSI_OPCODE_D2I:
    case TGSI_OPCODE_D2U:
    case TGSI_OPCODE_D2F:
    case TGSI_OPCODE_DSLT:
    case TGSI_OPCODE_DSGE:
    case TGSI_OPCODE_DSEQ:
    case TGSI_OPCODE_DSNE:
    case TGSI_OPCODE_U64SEQ:
    case TGSI_OPCODE_U64SNE:
    case TGSI_OPCODE_I64SLT:
@@ -3340,39 +3332,20 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
          src1 = fetchSrc(1, c);
          src2 = fetchSrc(2, c);
          mkOp3(OP_MAD, TYPE_F32, dst0[c],
                mkOp2v(OP_SUB, TYPE_F32, getSSA(), src1, src2), src0, src2)
             ->dnz = info->io.mul_zero_wins;
       }
       break;
    case TGSI_OPCODE_LIT:
       handleLIT(dst0);
       break;
-   case TGSI_OPCODE_XPD:
-      FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
-         if (c < 3) {
-            val0 = getSSA();
-            src0 = fetchSrc(1, (c + 1) % 3);
-            src1 = fetchSrc(0, (c + 2) % 3);
-            mkOp2(OP_MUL, TYPE_F32, val0, src0, src1)
-               ->dnz = info->io.mul_zero_wins;
-            mkOp1(OP_NEG, TYPE_F32, val0, val0);
-
-            src0 = fetchSrc(0, (c + 1) % 3);
-            src1 = fetchSrc(1, (c + 2) % 3);
-            mkOp3(OP_MAD, TYPE_F32, dst0[c], src0, src1, val0)
-               ->dnz = info->io.mul_zero_wins;
-         } else {
-            loadImm(dst0[c], 1.0f);
-         }
-      }
-      break;
    case TGSI_OPCODE_ISSG:
    case TGSI_OPCODE_SSG:
       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
          src0 = fetchSrc(0, c);
          val0 = getScratch();
          val1 = getScratch();
          mkCmp(OP_SET, CC_GT, srcTy, val0, srcTy, src0, zero);
          mkCmp(OP_SET, CC_LT, srcTy, val1, srcTy, src0, zero);
          if (srcTy == TYPE_F32)
             mkOp2(OP_SUB, TYPE_F32, dst0[c], val0, val1);
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_vertprog.h b/src/gallium/drivers/nouveau/nv30/nv30_vertprog.h
index 5556e0c..89d5b93 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_vertprog.h
+++ b/src/gallium/drivers/nouveau/nv30/nv30_vertprog.h
@@ -3,21 +3,20 @@
 
 /* Vertex programs instruction set
  *
  * 128bit opcodes, split into 4 32-bit ones for ease of use.
  *
  * Non-native instructions
  *   ABS - MOV + NV40_VP_INST0_DEST_ABS
  *   POW - EX2 + MUL + LG2
  *   SUB - ADD, second source negated
  *   SWZ - MOV
- *   XPD -
  *
  * Register access
  *   - Only one INPUT can be accessed per-instruction (move extras into TEMPs)
  *   - Only one CONST can be accessed per-instruction (move extras into TEMPs)
  *
  * Relative Addressing
  *   According to the value returned for
  *   MAX_PROGRAM_NATIVE_ADDRESS_REGISTERS_ARB
  *
  *   there are only two address registers available.  The destination in the
diff --git a/src/gallium/drivers/nouveau/nv30/nvfx_fragprog.c b/src/gallium/drivers/nouveau/nv30/nvfx_fragprog.c
index a3ed5c6..7d006fb 100644
--- a/src/gallium/drivers/nouveau/nv30/nvfx_fragprog.c
+++ b/src/gallium/drivers/nouveau/nv30/nvfx_fragprog.c
@@ -767,25 +767,20 @@ nvfx_fragprog_parse_instruction(struct nvfx_fpc *fpc,
                 break;
         case TGSI_OPCODE_TXL:
                 if(fpc->is_nv4x)
                         nvfx_fp_emit(fpc, tex(sat, TXL_NV40, unit, dst, mask, src[0], none, none));
                 else /* unsupported on nv30, use TEX and hope they like it */
                         nvfx_fp_emit(fpc, tex(sat, TEX, unit, dst, mask, src[0], none, none));
                 break;
         case TGSI_OPCODE_TXP:
                 nvfx_fp_emit(fpc, tex(sat, TXP, unit, dst, mask, src[0], none, none));
                 break;
-   case TGSI_OPCODE_XPD:
-      tmp = nvfx_src(temp(fpc));
-      nvfx_fp_emit(fpc, arith(0, MUL, tmp.reg, mask, swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none));
-      nvfx_fp_emit(fpc, arith(sat, MAD, dst, (mask & ~NVFX_FP_MASK_W), swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), neg(tmp)));
-      break;
 
    case TGSI_OPCODE_IF:
       // MOVRC0 R31 (TR0.xyzw), R<src>:
       // IF (NE.xxxx) ELSE <else> END <end>
       if(!fpc->is_nv4x)
          goto nv3x_cflow;
       nv40_fp_if(fpc, src[0]);
       break;
 
    case TGSI_OPCODE_ELSE:
diff --git a/src/gallium/drivers/nouveau/nv30/nvfx_shader.h b/src/gallium/drivers/nouveau/nv30/nvfx_shader.h
index e66d8af..f196c4f 100644
--- a/src/gallium/drivers/nouveau/nv30/nvfx_shader.h
+++ b/src/gallium/drivers/nouveau/nv30/nvfx_shader.h
@@ -157,21 +157,20 @@
  * Conditional execution
  *   TODO
  *
  * Non-native instructions:
  *   LIT
  *   LRP - MAD+MAD
  *   SUB - ADD, negate second source
  *   RSQ - LG2 + EX2
  *   POW - LG2 + MUL + EX2
  *   SCS - COS + SIN
- *   XPD
  *
  * NV40 Looping
  *   Loops appear to be fairly expensive on NV40 at least, the proprietary
  *   driver goes to a lot of effort to avoid using the native looping
  *   instructions.  If the total number of *executed* instructions between
  *   REP/ENDREP or LOOP/ENDLOOP is <=500, the driver will unroll the loop.
  *   The maximum loop count is 255.
  *
  */
 
diff --git a/src/gallium/drivers/nouveau/nv30/nvfx_vertprog.c b/src/gallium/drivers/nouveau/nv30/nvfx_vertprog.c
index 8ba3d5a..83823a1 100644
--- a/src/gallium/drivers/nouveau/nv30/nvfx_vertprog.c
+++ b/src/gallium/drivers/nouveau/nv30/nvfx_vertprog.c
@@ -676,25 +676,20 @@ nvfx_vertprog_parse_instruction(struct nvfx_vpc *vpc,
       insn.cc_update = 1;
       nvfx_vp_emit(vpc, insn);
 
       nvfx_vp_emit(vpc, arith(0, VEC, FLR, tmp.reg, mask, abs(src[0]), none, none));
       nvfx_vp_emit(vpc, arith(sat, VEC, MOV, dst, mask, tmp, none, none));
 
       insn = arith(sat, VEC, MOV, dst, mask, neg(tmp), none, none);
       insn.cc_test = NVFX_COND_LT;
       nvfx_vp_emit(vpc, insn);
       break;
-   case TGSI_OPCODE_XPD:
-      tmp = nvfx_src(temp(vpc));
-      nvfx_vp_emit(vpc, arith(0, VEC, MUL, tmp.reg, mask, swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none));
-      nvfx_vp_emit(vpc, arith(sat, VEC, MAD, dst, (mask & ~NVFX_VP_MASK_W), swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), neg(tmp)));
-      break;
    case TGSI_OPCODE_IF:
       insn = arith(0, VEC, MOV, none.reg, NVFX_VP_MASK_X, src[0], none, none);
       insn.cc_update = 1;
       nvfx_vp_emit(vpc, insn);
 
       reloc.location = vpc->vp->nr_insns;
       reloc.target = finst->Label.Label + 1;
       util_dynarray_append(&vpc->label_relocs, struct nvfx_relocation, reloc);
 
       insn = arith(0, SCA, BRA, none.reg, 0, none, none, none);
diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
index 78af124..fa6c0b9 100644
--- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c
+++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
@@ -50,21 +50,20 @@ static unsigned translate_opcode(unsigned opcode)
         case TGSI_OPCODE_SLT: return RC_OPCODE_SLT;
         case TGSI_OPCODE_SGE: return RC_OPCODE_SGE;
         case TGSI_OPCODE_MAD: return RC_OPCODE_MAD;
         case TGSI_OPCODE_LRP: return RC_OPCODE_LRP;
         case TGSI_OPCODE_FRC: return RC_OPCODE_FRC;
         case TGSI_OPCODE_FLR: return RC_OPCODE_FLR;
         case TGSI_OPCODE_ROUND: return RC_OPCODE_ROUND;
         case TGSI_OPCODE_EX2: return RC_OPCODE_EX2;
         case TGSI_OPCODE_LG2: return RC_OPCODE_LG2;
         case TGSI_OPCODE_POW: return RC_OPCODE_POW;
-        case TGSI_OPCODE_XPD: return RC_OPCODE_XPD;
         case TGSI_OPCODE_COS: return RC_OPCODE_COS;
         case TGSI_OPCODE_DDX: return RC_OPCODE_DDX;
         case TGSI_OPCODE_DDY: return RC_OPCODE_DDY;
         case TGSI_OPCODE_KILL: return RC_OPCODE_KILP;
      /* case TGSI_OPCODE_PK2H: return RC_OPCODE_PK2H; */
      /* case TGSI_OPCODE_PK2US: return RC_OPCODE_PK2US; */
      /* case TGSI_OPCODE_PK4B: return RC_OPCODE_PK4B; */
      /* case TGSI_OPCODE_PK4UB: return RC_OPCODE_PK4UB; */
         case TGSI_OPCODE_SEQ: return RC_OPCODE_SEQ;
         case TGSI_OPCODE_SGT: return RC_OPCODE_SGT;
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index fd76c93..18d4bc4 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -7837,92 +7837,20 @@ static int tgsi_ucmp(struct r600_shader_ctx *ctx)
 		alu.is_op3 = 1;
 		if (i == lasti)
 			alu.last = 1;
 		r = r600_bytecode_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
 	}
 	return 0;
 }
 
-static int tgsi_xpd(struct r600_shader_ctx *ctx)
-{
-	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
-	static const unsigned int src0_swizzle[] = {2, 0, 1};
-	static const unsigned int src1_swizzle[] = {1, 2, 0};
-	struct r600_bytecode_alu alu;
-	uint32_t use_temp = 0;
-	int i, r;
-
-	if (inst->Dst[0].Register.WriteMask != 0xf)
-		use_temp = 1;
-
-	for (i = 0; i < 4; i++) {
-		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
-		alu.op = ALU_OP2_MUL;
-		if (i < 3) {
-			r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]);
-			r600_bytecode_src(&alu.src[1], &ctx->src[1], src1_swizzle[i]);
-		} else {
-			alu.src[0].sel = V_SQ_ALU_SRC_0;
-			alu.src[0].chan = i;
-			alu.src[1].sel = V_SQ_ALU_SRC_0;
-			alu.src[1].chan = i;
-		}
-
-		alu.dst.sel = ctx->temp_reg;
-		alu.dst.chan = i;
-		alu.dst.write = 1;
-
-		if (i == 3)
-			alu.last = 1;
-		r = r600_bytecode_add_alu(ctx->bc, &alu);
-		if (r)
-			return r;
-	}
-
-	for (i = 0; i < 4; i++) {
-		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
-		alu.op = ALU_OP3_MULADD;
-
-		if (i < 3) {
-			r600_bytecode_src(&alu.src[0], &ctx->src[0], src1_swizzle[i]);
-			r600_bytecode_src(&alu.src[1], &ctx->src[1], src0_swizzle[i]);
-		} else {
-			alu.src[0].sel = V_SQ_ALU_SRC_0;
-			alu.src[0].chan = i;
-			alu.src[1].sel = V_SQ_ALU_SRC_0;
-			alu.src[1].chan = i;
-		}
-
-		alu.src[2].sel = ctx->temp_reg;
-		alu.src[2].neg = 1;
-		alu.src[2].chan = i;
-
-		if (use_temp)
-			alu.dst.sel = ctx->temp_reg;
-		else
-			tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
-		alu.dst.chan = i;
-		alu.dst.write = 1;
-		alu.is_op3 = 1;
-		if (i == 3)
-			alu.last = 1;
-		r = r600_bytecode_add_alu(ctx->bc, &alu);
-		if (r)
-			return r;
-	}
-	if (use_temp)
-		return tgsi_helper_copy(ctx, inst);
-	return 0;
-}
-
 static int tgsi_exp(struct r600_shader_ctx *ctx)
 {
 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
 	struct r600_bytecode_alu alu;
 	int r;
 	unsigned i;
 
 	/* result.x = 2^floor(src); */
 	if (inst->Dst[0].Register.WriteMask & 1) {
 		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
@@ -9085,21 +9013,21 @@ static const struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[]
 	[21]	= { ALU_OP0_NOP, tgsi_unsupported},
 	[22]			= { ALU_OP0_NOP, tgsi_unsupported},
 	[23]			= { ALU_OP0_NOP, tgsi_unsupported},
 	[TGSI_OPCODE_FRC]	= { ALU_OP1_FRACT, tgsi_op2},
 	[25]			= { ALU_OP0_NOP, tgsi_unsupported},
 	[TGSI_OPCODE_FLR]	= { ALU_OP1_FLOOR, tgsi_op2},
 	[TGSI_OPCODE_ROUND]	= { ALU_OP1_RNDNE, tgsi_op2},
 	[TGSI_OPCODE_EX2]	= { ALU_OP1_EXP_IEEE, tgsi_trans_srcx_replicate},
 	[TGSI_OPCODE_LG2]	= { ALU_OP1_LOG_IEEE, tgsi_trans_srcx_replicate},
 	[TGSI_OPCODE_POW]	= { ALU_OP0_NOP, tgsi_pow},
-	[TGSI_OPCODE_XPD]	= { ALU_OP0_NOP, tgsi_xpd},
+	[31]	= { ALU_OP0_NOP, tgsi_unsupported},
 	[32]			= { ALU_OP0_NOP, tgsi_unsupported},
 	[33]			= { ALU_OP0_NOP, tgsi_unsupported},
 	[34]			= { ALU_OP0_NOP, tgsi_unsupported},
 	[35]			= { ALU_OP0_NOP, tgsi_unsupported},
 	[TGSI_OPCODE_COS]	= { ALU_OP1_COS, tgsi_trig},
 	[TGSI_OPCODE_DDX]	= { FETCH_OP_GET_GRADIENTS_H, tgsi_tex},
 	[TGSI_OPCODE_DDY]	= { FETCH_OP_GET_GRADIENTS_V, tgsi_tex},
 	[TGSI_OPCODE_KILL]	= { ALU_OP2_KILLGT, tgsi_kill},  /* unconditional kill */
 	[TGSI_OPCODE_PK2H]	= { ALU_OP0_NOP, tgsi_unsupported},
 	[TGSI_OPCODE_PK2US]	= { ALU_OP0_NOP, tgsi_unsupported},
@@ -9283,21 +9211,21 @@ static const struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] =
 	[21]	= { ALU_OP0_NOP, tgsi_unsupported},
 	[22]			= { ALU_OP0_NOP, tgsi_unsupported},
 	[23]			= { ALU_OP0_NOP, tgsi_unsupported},
 	[TGSI_OPCODE_FRC]	= { ALU_OP1_FRACT, tgsi_op2},
 	[25]			= { ALU_OP0_NOP, tgsi_unsupported},
 	[TGSI_OPCODE_FLR]	= { ALU_OP1_FLOOR, tgsi_op2},
 	[TGSI_OPCODE_ROUND]	= { ALU_OP1_RNDNE, tgsi_op2},
 	[TGSI_OPCODE_EX2]	= { ALU_OP1_EXP_IEEE, tgsi_trans_srcx_replicate},
 	[TGSI_OPCODE_LG2]	= { ALU_OP1_LOG_IEEE, tgsi_trans_srcx_replicate},
 	[TGSI_OPCODE_POW]	= { ALU_OP0_NOP, tgsi_pow},
-	[TGSI_OPCODE_XPD]	= { ALU_OP0_NOP, tgsi_xpd},
+	[31]	= { ALU_OP0_NOP, tgsi_unsupported},
 	[32]			= { ALU_OP0_NOP, tgsi_unsupported},
 	[33]			= { ALU_OP0_NOP, tgsi_unsupported},
 	[34]			= { ALU_OP0_NOP, tgsi_unsupported},
 	[35]			= { ALU_OP0_NOP, tgsi_unsupported},
 	[TGSI_OPCODE_COS]	= { ALU_OP1_COS, tgsi_trig},
 	[TGSI_OPCODE_DDX]	= { FETCH_OP_GET_GRADIENTS_H, tgsi_tex},
 	[TGSI_OPCODE_DDY]	= { FETCH_OP_GET_GRADIENTS_V, tgsi_tex},
 	[TGSI_OPCODE_KILL]	= { ALU_OP2_KILLGT, tgsi_kill},  /* unconditional kill */
 	[TGSI_OPCODE_PK2H]	= { ALU_OP0_NOP, tgsi_pk2h},
 	[TGSI_OPCODE_PK2US]	= { ALU_OP0_NOP, tgsi_unsupported},
@@ -9506,21 +9434,21 @@ static const struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] =
 	[21]	= { ALU_OP0_NOP, tgsi_unsupported},
 	[22]			= { ALU_OP0_NOP, tgsi_unsupported},
 	[23]			= { ALU_OP0_NOP, tgsi_unsupported},
 	[TGSI_OPCODE_FRC]	= { ALU_OP1_FRACT, tgsi_op2},
 	[25]			= { ALU_OP0_NOP, tgsi_unsupported},
 	[TGSI_OPCODE_FLR]	= { ALU_OP1_FLOOR, tgsi_op2},
 	[TGSI_OPCODE_ROUND]	= { ALU_OP1_RNDNE, tgsi_op2},
 	[TGSI_OPCODE_EX2]	= { ALU_OP1_EXP_IEEE, cayman_emit_float_instr},
 	[TGSI_OPCODE_LG2]	= { ALU_OP1_LOG_IEEE, cayman_emit_float_instr},
 	[TGSI_OPCODE_POW]	= { ALU_OP0_NOP, cayman_pow},
-	[TGSI_OPCODE_XPD]	= { ALU_OP0_NOP, tgsi_xpd},
+	[31]	= { ALU_OP0_NOP, tgsi_unsupported},
 	[32]			= { ALU_OP0_NOP, tgsi_unsupported},
 	[33]			= { ALU_OP0_NOP, tgsi_unsupported},
 	[34]			= { ALU_OP0_NOP, tgsi_unsupported},
 	[35]			= { ALU_OP0_NOP, tgsi_unsupported},
 	[TGSI_OPCODE_COS]	= { ALU_OP1_COS, cayman_trig},
 	[TGSI_OPCODE_DDX]	= { FETCH_OP_GET_GRADIENTS_H, tgsi_tex},
 	[TGSI_OPCODE_DDY]	= { FETCH_OP_GET_GRADIENTS_V, tgsi_tex},
 	[TGSI_OPCODE_KILL]	= { ALU_OP2_KILLGT, tgsi_kill},  /* unconditional kill */
 	[TGSI_OPCODE_PK2H]	= { ALU_OP0_NOP, tgsi_pk2h},
 	[TGSI_OPCODE_PK2US]	= { ALU_OP0_NOP, tgsi_unsupported},
diff --git a/src/gallium/drivers/svga/svga_tgsi_insn.c b/src/gallium/drivers/svga/svga_tgsi_insn.c
index 928330c..a325a56 100644
--- a/src/gallium/drivers/svga/svga_tgsi_insn.c
+++ b/src/gallium/drivers/svga/svga_tgsi_insn.c
@@ -2189,77 +2189,20 @@ emit_pow(struct svga_shader_emitter *emit,
       return submit_op1(emit, inst_token( SVGA3DOP_MOV ),
                         dst, scalar(src(tmp), 0) );
    }
    else {
       return submit_op2(emit, inst_token( SVGA3DOP_POW ), dst, src0, src1);
    }
 }
 
 
 /**
- * Translate/emit TGSI XPD (vector cross product) instruction.
- */
-static boolean
-emit_xpd(struct svga_shader_emitter *emit,
-         const struct tgsi_full_instruction *insn)
-{
-   SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
-   const struct src_register src0 = translate_src_register(
-      emit, &insn->Src[0] );
-   const struct src_register src1 = translate_src_register(
-      emit, &insn->Src[1] );
-   boolean need_dst_tmp = FALSE;
-
-   /* XPD can only output to a temporary */
-   if (SVGA3dShaderGetRegType(dst.value) != SVGA3DREG_TEMP)
-      need_dst_tmp = TRUE;
-
-   /* The dst reg must not be the same as src0 or src1*/
-   if (alias_src_dst(src0, dst) ||
-       alias_src_dst(src1, dst))
-      need_dst_tmp = TRUE;
-
-   if (need_dst_tmp) {
-      SVGA3dShaderDestToken tmp = get_temp( emit );
-
-      /* Obey DX9 restrictions on mask:
-       */
-      tmp.mask = dst.mask & TGSI_WRITEMASK_XYZ;
-
-      if (!submit_op2(emit, inst_token( SVGA3DOP_CRS ), tmp, src0, src1))
-         return FALSE;
-
-      if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), dst, src( tmp )))
-         return FALSE;
-   }
-   else {
-      if (!submit_op2(emit, inst_token( SVGA3DOP_CRS ), dst, src0, src1))
-         return FALSE;
-   }
-
-   /* Need to emit 1.0 to dst.w?
-    */
-   if (dst.mask & TGSI_WRITEMASK_W) {
-      struct src_register one = get_one_immediate( emit );
-
-      if (!submit_op1(emit,
-                      inst_token( SVGA3DOP_MOV ),
-                      writemask(dst, TGSI_WRITEMASK_W),
-                      one))
-         return FALSE;
-   }
-
-   return TRUE;
-}
-
-
-/**
  * Emit a LRP (linear interpolation) instruction.
  */
 static boolean
 submit_lrp(struct svga_shader_emitter *emit,
            SVGA3dShaderDestToken dst,
            struct src_register src0,
            struct src_register src1,
            struct src_register src2)
 {
    SVGA3dShaderDestToken tmp;
@@ -2979,23 +2922,20 @@ svga_emit_instruction(struct svga_shader_emitter *emit,
    case TGSI_OPCODE_ENDIF:
       return emit_endif( emit, insn );
 
    case TGSI_OPCODE_BGNLOOP:
       return emit_bgnloop( emit, insn );
    case TGSI_OPCODE_ENDLOOP:
       return emit_endloop( emit, insn );
    case TGSI_OPCODE_BRK:
       return emit_brk( emit, insn );
 
-   case TGSI_OPCODE_XPD:
-      return emit_xpd( emit, insn );
-
    case TGSI_OPCODE_KILL:
       return emit_kill( emit, insn );
 
    case TGSI_OPCODE_DST:
       return emit_dst_insn( emit, insn );
 
    case TGSI_OPCODE_LIT:
       return emit_lit( emit, insn );
 
    case TGSI_OPCODE_LRP:
@@ -3597,21 +3537,20 @@ needs_to_create_common_immediate(const struct svga_shader_emitter *emit)
        emit->info.opcode_count[TGSI_OPCODE_DDY] >= 1 ||
        emit->info.opcode_count[TGSI_OPCODE_ROUND] >= 1 ||
        emit->info.opcode_count[TGSI_OPCODE_SGE] >= 1 ||
        emit->info.opcode_count[TGSI_OPCODE_SGT] >= 1 ||
        emit->info.opcode_count[TGSI_OPCODE_SLE] >= 1 ||
        emit->info.opcode_count[TGSI_OPCODE_SLT] >= 1 ||
        emit->info.opcode_count[TGSI_OPCODE_SNE] >= 1 ||
        emit->info.opcode_count[TGSI_OPCODE_SEQ] >= 1 ||
        emit->info.opcode_count[TGSI_OPCODE_EXP] >= 1 ||
        emit->info.opcode_count[TGSI_OPCODE_LOG] >= 1 ||
-       emit->info.opcode_count[TGSI_OPCODE_XPD] >= 1 ||
        emit->info.opcode_count[TGSI_OPCODE_KILL] >= 1)
       return TRUE;
 
    return FALSE;
 }
 
 
 /**
  * Do we need to create a looping constant?
  */
diff --git a/src/gallium/drivers/svga/svga_tgsi_vgpu10.c b/src/gallium/drivers/svga/svga_tgsi_vgpu10.c
index e57e78d..9d86f72 100644
--- a/src/gallium/drivers/svga/svga_tgsi_vgpu10.c
+++ b/src/gallium/drivers/svga/svga_tgsi_vgpu10.c
@@ -5203,131 +5203,20 @@ emit_txp(struct svga_shader_emitter_v10 *emit,
    end_emit_instruction(emit);
 
    end_tex_swizzle(emit, &swz_info);
 
    free_temp_indexes(emit);
 
    return TRUE;
 }
 
 
-/*
- * Emit code for TGSI_OPCODE_XPD instruction.
- */
-static boolean
-emit_xpd(struct svga_shader_emitter_v10 *emit,
-         const struct tgsi_full_instruction *inst)
-{
-   /* dst.x = src0.y * src1.z - src1.y * src0.z
-    * dst.y = src0.z * src1.x - src1.z * src0.x
-    * dst.z = src0.x * src1.y - src1.x * src0.y
-    * dst.w = 1
-    */
-   struct tgsi_full_src_register s0_xxxx =
-      scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
-   struct tgsi_full_src_register s0_yyyy =
-      scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y);
-   struct tgsi_full_src_register s0_zzzz =
-      scalar_src(&inst->Src[0], TGSI_SWIZZLE_Z);
-
-   struct tgsi_full_src_register s1_xxxx =
-      scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
-   struct tgsi_full_src_register s1_yyyy =
-      scalar_src(&inst->Src[1], TGSI_SWIZZLE_Y);
-   struct tgsi_full_src_register s1_zzzz =
-      scalar_src(&inst->Src[1], TGSI_SWIZZLE_Z);
-
-   unsigned tmp1 = get_temp_index(emit);
-   struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1);
-   struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1);
-
-   unsigned tmp2 = get_temp_index(emit);
-   struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2);
-   struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2);
-   struct tgsi_full_src_register neg_tmp2_src = negate_src(&tmp2_src);
-
-   unsigned tmp3 = get_temp_index(emit);
-   struct tgsi_full_src_register tmp3_src = make_src_temp_reg(tmp3);
-   struct tgsi_full_dst_register tmp3_dst = make_dst_temp_reg(tmp3);
-   struct tgsi_full_dst_register tmp3_dst_x =
-      writemask_dst(&tmp3_dst, TGSI_WRITEMASK_X);
-   struct tgsi_full_dst_register tmp3_dst_y =
-      writemask_dst(&tmp3_dst, TGSI_WRITEMASK_Y);
-   struct tgsi_full_dst_register tmp3_dst_z =
-      writemask_dst(&tmp3_dst, TGSI_WRITEMASK_Z);
-   struct tgsi_full_dst_register tmp3_dst_w =
-      writemask_dst(&tmp3_dst, TGSI_WRITEMASK_W);
-
-   /* Note: we put all the intermediate computations into tmp3 in case
-    * the XPD dest register is that same as one of the src regs (in which
-    * case we could clobber a src reg before we're done with it) .
-    *
-    * Note: we could get by with just one temp register instead of three
-    * since we're doing scalar operations and there's enough room in one
-    * temp for everything.
-    */
-
-   /* MUL tmp1, src0.y, src1.z */
-   /* MUL tmp2, src1.y, src0.z */
-   /* ADD tmp3.x, tmp1, -tmp2 */
-   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
-      emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst,
-                           &s0_yyyy, &s1_zzzz, FALSE);
-      emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp2_dst,
-                           &s1_yyyy, &s0_zzzz, FALSE);
-      emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp3_dst_x,
-                           &tmp1_src, &neg_tmp2_src, FALSE);
-   }
-
-   /* MUL tmp1, src0.z, src1.x */
-   /* MUL tmp2, src1.z, src0.x */
-   /* ADD tmp3.y, tmp1, -tmp2 */
-   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
-      emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, &s0_zzzz,
-                           &s1_xxxx, FALSE);
-      emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp2_dst, &s1_zzzz,
-                           &s0_xxxx, FALSE);
-      emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp3_dst_y,
-                           &tmp1_src, &neg_tmp2_src, FALSE);
-   }
-
-   /* MUL tmp1, src0.x, src1.y */
-   /* MUL tmp2, src1.x, src0.y */
-   /* ADD tmp3.z, tmp1, -tmp2 */
-   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
-      emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, &s0_xxxx,
-                           &s1_yyyy, FALSE);
-      emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp2_dst, &s1_xxxx,
-                           &s0_yyyy, FALSE);
-      emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp3_dst_z,
-                           &tmp1_src, &neg_tmp2_src, FALSE);
-   }
-
-   /* MOV tmp3.w, 1.0 */
-   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
-      struct tgsi_full_src_register one =
-         make_immediate_reg_float(emit, 1.0f);
-
-      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &tmp3_dst_w, &one, FALSE);
-   }
-
-   /* MOV dst, tmp3 */
-   emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &tmp3_src,
-                        inst->Instruction.Saturate);
-
-
-   free_temp_indexes(emit);
-
-   return TRUE;
-}
-
-
 /**
  * Emit code for TGSI_OPCODE_TXD (explicit derivatives)
  */
 static boolean
 emit_txd(struct svga_shader_emitter_v10 *emit,
          const struct tgsi_full_instruction *inst)
 {
    const uint unit = inst->Src[3].Register.Index;
    unsigned target = inst->Texture.Texture;
    int offsets[3];
@@ -5735,22 +5624,20 @@ emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit,
    case TGSI_OPCODE_TXL:
       return emit_txl_txb(emit, inst);
    case TGSI_OPCODE_TXD:
       return emit_txd(emit, inst);
    case TGSI_OPCODE_TXF:
       return emit_txf(emit, inst);
    case TGSI_OPCODE_TXQ:
       return emit_txq(emit, inst);
    case TGSI_OPCODE_UIF:
       return emit_if(emit, inst);
-   case TGSI_OPCODE_XPD:
-      return emit_xpd(emit, inst);
    case TGSI_OPCODE_UMUL_HI:
    case TGSI_OPCODE_IMUL_HI:
    case TGSI_OPCODE_UDIV:
    case TGSI_OPCODE_IDIV:
       /* These cases use only the FIRST of two destination registers */
       return emit_simple_1dst(emit, inst, 2, 0);
    case TGSI_OPCODE_UMUL:
    case TGSI_OPCODE_UMOD:
    case TGSI_OPCODE_MOD:
       /* These cases use only the SECOND of two destination registers */
diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h
index af0d025..c8f31be 100644
--- a/src/gallium/include/pipe/p_shader_tokens.h
+++ b/src/gallium/include/pipe/p_shader_tokens.h
@@ -361,21 +361,21 @@ struct tgsi_property_data {
 /* gap */
 #define TGSI_OPCODE_F2U64               22
 #define TGSI_OPCODE_F2I64               23
 #define TGSI_OPCODE_FRC                 24
 #define TGSI_OPCODE_TXF_LZ              25
 #define TGSI_OPCODE_FLR                 26
 #define TGSI_OPCODE_ROUND               27
 #define TGSI_OPCODE_EX2                 28
 #define TGSI_OPCODE_LG2                 29
 #define TGSI_OPCODE_POW                 30
-#define TGSI_OPCODE_XPD                 31
+/* gap */
 #define TGSI_OPCODE_U2I64               32
 #define TGSI_OPCODE_CLOCK               33
 #define TGSI_OPCODE_I2I64               34
 /* gap */
 #define TGSI_OPCODE_COS                 36
 #define TGSI_OPCODE_DDX                 37
 #define TGSI_OPCODE_DDY                 38
 #define TGSI_OPCODE_KILL                39 /* unconditional */
 #define TGSI_OPCODE_PK2H                40
 #define TGSI_OPCODE_PK2US               41
diff --git a/src/gallium/state_trackers/nine/nine_shader.c b/src/gallium/state_trackers/nine/nine_shader.c
index f405090..60d56af 100644
--- a/src/gallium/state_trackers/nine/nine_shader.c
+++ b/src/gallium/state_trackers/nine/nine_shader.c
@@ -1581,20 +1581,43 @@ DECL_SPECIAL(SUB)
 DECL_SPECIAL(ABS)
 {
     struct ureg_program *ureg = tx->ureg;
     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
 
     ureg_MOV(ureg, dst, ureg_abs(src));
     return D3D_OK;
 }
 
+DECL_SPECIAL(XPD)
+{
+    struct ureg_program *ureg = tx->ureg;
+    struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
+    struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]);
+    struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]);
+
+    ureg_MUL(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYZ),
+             ureg_swizzle(src0, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z,
+                          TGSI_SWIZZLE_X, 0),
+             ureg_swizzle(src1, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X,
+                          TGSI_SWIZZLE_Y, 0));
+    ureg_MAD(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYZ),
+             ureg_swizzle(src0, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X,
+                          TGSI_SWIZZLE_Y, 0),
+             ureg_negate(ureg_swizzle(src1, TGSI_SWIZZLE_Y,
+                                      TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X, 0)),
+             ureg_src(dst));
+    ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W),
+             ureg_imm1f(ureg, 1));
+    return D3D_OK;
+}
+
 DECL_SPECIAL(M4x4)
 {
     return NineTranslateInstruction_Mkxn(tx, 4, 4);
 }
 
 DECL_SPECIAL(M4x3)
 {
     return NineTranslateInstruction_Mkxn(tx, 4, 3);
 }
 
@@ -2908,21 +2931,21 @@ struct sm1_op_info inst_table[] =
     _OPI(CALL,    CAL,     V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(CALL)),
     _OPI(CALLNZ,  CAL,     V(2,0), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(CALLNZ)),
     _OPI(LOOP,    BGNLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 2, SPECIAL(LOOP)),
     _OPI(RET,     RET,     V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(RET)),
     _OPI(ENDLOOP, ENDLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 0, SPECIAL(ENDLOOP)),
     _OPI(LABEL,   NOP,     V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(LABEL)),
 
     _OPI(DCL, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(DCL)),
 
     _OPI(POW, POW, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(POW)),
-    _OPI(CRS, XPD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* XXX: .w */
+    _OPI(CRS, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(XPD)), /* XXX: .w */
     _OPI(SGN, SSG, V(2,0), V(3,0), V(0,0), V(0,0), 1, 3, SPECIAL(SGN)), /* ignore src1,2 */
     _OPI(ABS, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(ABS)),
     _OPI(NRM, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(NRM)), /* NRM doesn't fit */
 
     _OPI(SINCOS, SCS, V(2,0), V(2,1), V(2,0), V(2,1), 1, 3, SPECIAL(SINCOS)),
     _OPI(SINCOS, SCS, V(3,0), V(3,0), V(3,0), V(3,0), 1, 1, SPECIAL(SINCOS)),
 
     /* More flow control */
     _OPI(REP,    NOP,    V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(REP)),
     _OPI(ENDREP, NOP,    V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDREP)),
diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c
index 5f78026..d2ec23e 100644
--- a/src/mesa/state_tracker/st_mesa_to_tgsi.c
+++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c
@@ -498,22 +498,20 @@ translate_opcode( unsigned op )
    case OPCODE_SIN:
       return TGSI_OPCODE_SIN;
    case OPCODE_SLT:
       return TGSI_OPCODE_SLT;
    case OPCODE_TEX:
       return TGSI_OPCODE_TEX;
    case OPCODE_TXB:
       return TGSI_OPCODE_TXB;
    case OPCODE_TXP:
       return TGSI_OPCODE_TXP;
-   case OPCODE_XPD:
-      return TGSI_OPCODE_XPD;
    case OPCODE_END:
       return TGSI_OPCODE_END;
    default:
       debug_assert( 0 );
       return TGSI_OPCODE_NOP;
    }
 }
 
 
 static void
@@ -561,25 +559,31 @@ compile_instruction(
 
    case OPCODE_SCS:
       dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY );
       ureg_insn( ureg, 
                  translate_opcode( inst->Opcode ), 
                  dst, num_dst, 
                  src, num_src, 0 );
       break;
 
    case OPCODE_XPD:
-      dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XYZ );
-      ureg_insn( ureg, 
-                 translate_opcode( inst->Opcode ), 
-                 dst, num_dst, 
-                 src, num_src, 0 );
+      ureg_MUL(ureg, ureg_writemask(dst[0], TGSI_WRITEMASK_XYZ),
+               ureg_swizzle(src[0], TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z,
+                            TGSI_SWIZZLE_X, 0),
+               ureg_swizzle(src[1], TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X,
+                            TGSI_SWIZZLE_Y, 0));
+      ureg_MAD(ureg, ureg_writemask(dst[0], TGSI_WRITEMASK_XYZ),
+               ureg_swizzle(src[0], TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X,
+                            TGSI_SWIZZLE_Y, 0),
+               ureg_negate(ureg_swizzle(src[1], TGSI_SWIZZLE_Y,
+                                        TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X, 0)),
+               ureg_src(dst[0]));
       break;
 
    case OPCODE_RSQ:
       ureg_RSQ( ureg, dst[0], ureg_abs(src[0]) );
       break;
 
    case OPCODE_ABS:
       ureg_MOV(ureg, dst[0], ureg_abs(src[0]));
       break;
 
-- 
2.7.4



More information about the mesa-dev mailing list