[Mesa-dev] [PATCH 2/3] mesa: add TGSI_OPCODE_ROUNDEVEN

Mon Jul 2 15:59:59 PDT 2012

This instructions lets drivers implement GLSL's round() and roundEven()
differently if they need or want to.  For now, ROUNDEVEN just does whatever
ROUND did.
---
 src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c |    3 ++-
 src/gallium/auxiliary/tgsi/tgsi_exec.c             |    1 +
 src/gallium/auxiliary/tgsi/tgsi_info.c             |    2 +-
 src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h       |    1 +
 src/gallium/auxiliary/tgsi/tgsi_util.c             |    1 +
 .../drivers/nv50/codegen/nv50_ir_from_tgsi.cpp     |    1 +
 src/gallium/drivers/r300/r300_tgsi_to_rc.c         |    1 +
 src/gallium/drivers/r600/r600_shader.c             |    6 +++---
 .../drivers/radeon/radeon_setup_tgsi_llvm.c        |    2 ++
 src/gallium/drivers/svga/svga_tgsi_insn.c          |    1 +
 src/gallium/include/pipe/p_shader_tokens.h         |    1 +
 11 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
index 17f288f..a307568 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
@@ -1267,7 +1267,7 @@ recip_sqrt_emit_cpu(
                                                          emit_data->args[0]);
 }
 
-/* TGSI_OPCODE_ROUND (CPU Only) */
+/* TGSI_OPCODE_ROUND,ROUNDEVEN (CPU Only) */
 static void
 round_emit_cpu(
    const struct lp_build_tgsi_action * action,
@@ -1597,6 +1597,7 @@ lp_set_default_actions_cpu(
    bld_base->op_actions[TGSI_OPCODE_POW].emit = pow_emit_cpu;
    bld_base->op_actions[TGSI_OPCODE_RCP].emit = rcp_emit_cpu;
    bld_base->op_actions[TGSI_OPCODE_ROUND].emit = round_emit_cpu;
+   bld_base->op_actions[TGSI_OPCODE_ROUNDEVEN].emit = round_emit_cpu;
    bld_base->op_actions[TGSI_OPCODE_SEQ].emit = seq_emit_cpu;
    bld_base->op_actions[TGSI_OPCODE_SGE].emit = sge_emit_cpu;
    bld_base->op_actions[TGSI_OPCODE_SGT].emit = sgt_emit_cpu;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 5e23f5d..4c5e9d1 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -3550,6 +3550,7 @@ exec_instruction(
       break;
 
    case TGSI_OPCODE_ROUND:
+   case TGSI_OPCODE_ROUNDEVEN:
       exec_vector_unary(mach, inst, micro_rnd, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
       break;
 
diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c
index 8bf9aeb..d0e63a3 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_info.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_info.c
@@ -60,7 +60,7 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] =
    { 0, 0, 0, 0, 0, 0, NONE, "", 20 },      /* removed */
    { 1, 3, 0, 0, 0, 0, REPL, "DP2A", TGSI_OPCODE_DP2A },
    { 0, 0, 0, 0, 0, 0, NONE, "", 22 },      /* removed */
-   { 0, 0, 0, 0, 0, 0, NONE, "", 23 },      /* removed */
+   { 1, 1, 0, 0, 0, 0, COMP, "ROUNDEVEN", TGSI_OPCODE_ROUNDEVEN },
    { 1, 1, 0, 0, 0, 0, COMP, "FRC", TGSI_OPCODE_FRC },
    { 1, 3, 0, 0, 0, 0, COMP, "CLAMP", TGSI_OPCODE_CLAMP },
    { 1, 1, 0, 0, 0, 0, COMP, "FLR", TGSI_OPCODE_FLR },
diff --git a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
index 96b864f..a424795 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
@@ -65,6 +65,7 @@ OP11(FRC)
 OP13(CLAMP)
 OP11(FLR)
 OP11(ROUND)
+OP11(ROUNDEVEN)
 OP11(EX2)
 OP11(LG2)
 OP12(POW)
diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.c b/src/gallium/auxiliary/tgsi/tgsi_util.c
index 36dc10d..cdbf1aa 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_util.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_util.c
@@ -199,6 +199,7 @@ tgsi_util_get_inst_usage_mask(const struct tgsi_full_instruction *inst,
    case TGSI_OPCODE_CLAMP:
    case TGSI_OPCODE_FLR:
    case TGSI_OPCODE_ROUND:
+   case TGSI_OPCODE_ROUNDEVEN:
    case TGSI_OPCODE_POW:
    case TGSI_OPCODE_ABS:
    case TGSI_OPCODE_COS:
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp
index 16f191d..0e6d21b 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp
@@ -1978,6 +1978,7 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
       }
       break;
    case TGSI_OPCODE_ROUND:
+   case TGSI_OPCODE_ROUNDEVEN:
       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
          mkCvt(OP_CVT, TYPE_F32, dst0[c], TYPE_F32, fetchSrc(0, c))
          ->rnd = ROUND_NI;
diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
index 4cb08b5..83bd1da 100644
--- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c
+++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
@@ -58,6 +58,7 @@ static unsigned translate_opcode(unsigned opcode)
         case TGSI_OPCODE_CLAMP: return RC_OPCODE_CLAMP;
         case TGSI_OPCODE_FLR: return RC_OPCODE_FLR;
         case TGSI_OPCODE_ROUND: return RC_OPCODE_ROUND;
+        case TGSI_OPCODE_ROUNDEVEN: return RC_OPCODE_ROUND;
         case TGSI_OPCODE_EX2: return RC_OPCODE_EX2;
         case TGSI_OPCODE_LG2: return RC_OPCODE_LG2;
         case TGSI_OPCODE_POW: return RC_OPCODE_POW;
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 4dd4243..7122781 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -5215,7 +5215,7 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
 	{TGSI_OPCODE_DP2A,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
 	/* gap */
 	{22,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
-	{23,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+	{TGSI_OPCODE_ROUNDEVEN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2},
 	{TGSI_OPCODE_FRC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
 	{TGSI_OPCODE_CLAMP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
 	{TGSI_OPCODE_FLR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
@@ -5389,7 +5389,7 @@ static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
 	{TGSI_OPCODE_DP2A,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
 	/* gap */
 	{22,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
-	{23,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+	{TGSI_OPCODE_ROUNDEVEN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2},
 	{TGSI_OPCODE_FRC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
 	{TGSI_OPCODE_CLAMP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
 	{TGSI_OPCODE_FLR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
@@ -5563,7 +5563,7 @@ static struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = {
 	{TGSI_OPCODE_DP2A,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
 	/* gap */
 	{22,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
-	{23,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+	{TGSI_OPCODE_ROUNDEVEN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2},
 	{TGSI_OPCODE_FRC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
 	{TGSI_OPCODE_CLAMP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
 	{TGSI_OPCODE_FLR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
index 9c2c449..c61baf0 100644
--- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
+++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
@@ -1070,6 +1070,8 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
 	bld_base->op_actions[TGSI_OPCODE_ISLT].emit = emit_icmp;
 	bld_base->op_actions[TGSI_OPCODE_ROUND].emit = build_tgsi_intrinsic_nomem;
 	bld_base->op_actions[TGSI_OPCODE_ROUND].intr_name = "llvm.AMDIL.round.nearest.";
+	bld_base->op_actions[TGSI_OPCODE_ROUNDEVEN].emit = build_tgsi_intrinsic_nomem;
+	bld_base->op_actions[TGSI_OPCODE_ROUNDEVEN].intr_name = "llvm.AMDIL.round.nearest.";
 	bld_base->op_actions[TGSI_OPCODE_MIN].emit = build_tgsi_intrinsic_nomem;
 	bld_base->op_actions[TGSI_OPCODE_MIN].intr_name = "llvm.AMDIL.min.";
 	bld_base->op_actions[TGSI_OPCODE_MAX].emit = build_tgsi_intrinsic_nomem;
diff --git a/src/gallium/drivers/svga/svga_tgsi_insn.c b/src/gallium/drivers/svga/svga_tgsi_insn.c
index 64dfa55..75853aa 100644
--- a/src/gallium/drivers/svga/svga_tgsi_insn.c
+++ b/src/gallium/drivers/svga/svga_tgsi_insn.c
@@ -2559,6 +2559,7 @@ static boolean svga_emit_instruction( struct svga_shader_emitter *emit,
        */
    case TGSI_OPCODE_CLAMP:
    case TGSI_OPCODE_ROUND:
+   case TGSI_OPCODE_ROUNDEVEN:
    case TGSI_OPCODE_AND:
    case TGSI_OPCODE_OR:
    case TGSI_OPCODE_I2F:
diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h
index 6b58293..496d8f3 100644
--- a/src/gallium/include/pipe/p_shader_tokens.h
+++ b/src/gallium/include/pipe/p_shader_tokens.h
@@ -278,6 +278,7 @@ struct tgsi_property_data {
                                 /* gap */
 #define TGSI_OPCODE_DP2A                21
                                 /* gap */
+#define TGSI_OPCODE_ROUNDEVEN           23
 #define TGSI_OPCODE_FRC                 24
 #define TGSI_OPCODE_CLAMP               25
 #define TGSI_OPCODE_FLR                 26
-- 
1.7.3.4