[Spice-commits] 11 commits - target-arm/translate.c tcg/optimize.c tcg/tcg-op.h tcg/tcg.c

Wed Oct 17 22:11:41 PDT 2012

target-arm/translate.c |    2 
 tcg/optimize.c         |  471 ++++++++++++++++++++++++++++++++-----------------
 tcg/tcg-op.h           |   11 +
 tcg/tcg.c              |   53 +++++
 4 files changed, 378 insertions(+), 159 deletions(-)

New commits:
commit b6348f29d033d5a8a26f633d2ee94362595f32a4
Author: Peter Crosthwaite <peter.crosthwaite at petalogix.com>
Date:   Tue Oct 16 19:15:50 2012 +1000

    target-arm/translate: Fix RRX operands
    
    Instructions that both use the RRX second operand and update CS were
    incorrect, as the Carry flag was updated too early. An example of such an
    instruction would be:
    
    ands r12,r13,RRX
    
    Ands, because of the "s" flag will update the carry flag. But the RRX second
    operand rotates through the C flag which should happen before the update.
    Fixed the ordering of the two, the old carry is read by "r13,RRX" before being
    updated.
    
    Signed-off-by: Peter Crosthwaite <peter.crosthwaite at xilinx.com>
    Reported-by: Vinesh Peringat <vineshp at xilinx.com>
    Reviewed-by: Peter Maydell <peter.maydell at linaro.org>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/target-arm/translate.c b/target-arm/translate.c
index c6840b7..daccb15 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -516,10 +516,10 @@ static inline void gen_arm_shift_im(TCGv var, int shiftop, int shift, int flags)
             tcg_gen_rotri_i32(var, var, shift); break;
         } else {
             TCGv tmp = tcg_temp_new_i32();
+            tcg_gen_shli_i32(tmp, cpu_CF, 31);
             if (flags)
                 shifter_out_im(var, 0);
             tcg_gen_shri_i32(var, var, 1);
-            tcg_gen_shli_i32(tmp, cpu_CF, 31);
             tcg_gen_or_i32(var, var, tmp);
             tcg_temp_free_i32(tmp);
         }
commit 1414968a6aecd23cb037bc9e718d6f05ead2afaf
Author: Richard Henderson <rth at twiddle.net>
Date:   Tue Oct 2 11:32:30 2012 -0700

    tcg: Optimize mulu2
    
    Like add2, do operand ordering, constant folding, and dead operand
    elimination.  The latter happens about 15% of all mulu2 during an
    x86_64 bios boot.
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/tcg/optimize.c b/tcg/optimize.c
index 05891ef..a06c8eb 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -543,6 +543,9 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             swap_commutative(args[0], &args[2], &args[4]);
             swap_commutative(args[1], &args[3], &args[5]);
             break;
+        case INDEX_op_mulu2_i32:
+            swap_commutative(args[0], &args[2], &args[3]);
+            break;
         case INDEX_op_brcond2_i32:
             if (swap_commutative2(&args[0], &args[2])) {
                 args[4] = tcg_swap_cond(args[4]);
@@ -831,6 +834,29 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             }
             goto do_default;
 
+        case INDEX_op_mulu2_i32:
+            if (temps[args[2]].state == TCG_TEMP_CONST
+                && temps[args[3]].state == TCG_TEMP_CONST) {
+                uint32_t a = temps[args[2]].val;
+                uint32_t b = temps[args[3]].val;
+                uint64_t r = (uint64_t)a * b;
+                TCGArg rl, rh;
+
+                /* We emit the extra nop when we emit the mulu2.  */
+                assert(gen_opc_buf[op_index + 1] == INDEX_op_nop);
+
+                rl = args[0];
+                rh = args[1];
+                gen_opc_buf[op_index] = INDEX_op_movi_i32;
+                gen_opc_buf[++op_index] = INDEX_op_movi_i32;
+                tcg_opt_gen_movi(&gen_args[0], rl, (uint32_t)r);
+                tcg_opt_gen_movi(&gen_args[2], rh, (uint32_t)(r >> 32));
+                gen_args += 4;
+                args += 4;
+                break;
+            }
+            goto do_default;
+
         case INDEX_op_brcond2_i32:
             tmp = do_constant_folding_cond2(&args[0], &args[2], args[4]);
             if (tmp != 2) {
diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
index a580cfe..8100a5a 100644
--- a/tcg/tcg-op.h
+++ b/tcg/tcg-op.h
@@ -1027,6 +1027,8 @@ static inline void tcg_gen_mul_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
 
     tcg_gen_op4_i32(INDEX_op_mulu2_i32, TCGV_LOW(t0), TCGV_HIGH(t0),
                     TCGV_LOW(arg1), TCGV_LOW(arg2));
+    /* Allow the optimizer room to replace mulu2 with two moves.  */
+    tcg_gen_op0(INDEX_op_nop);
 
     tcg_gen_mul_i32(t1, TCGV_LOW(arg1), TCGV_HIGH(arg2));
     tcg_gen_add_i32(TCGV_HIGH(t0), TCGV_HIGH(t0), t1);
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 0eb407b..f0deea2 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -1338,6 +1338,25 @@ static void tcg_liveness_analysis(TCGContext *s)
             }
             goto do_not_remove;
 
+        case INDEX_op_mulu2_i32:
+            args -= 4;
+            nb_iargs = 2;
+            nb_oargs = 2;
+            /* Likewise, test for the high part of the operation dead.  */
+            if (dead_temps[args[1]]) {
+                if (dead_temps[args[0]]) {
+                    goto do_remove;
+                }
+                gen_opc_buf[op_index] = op = INDEX_op_mul_i32;
+                args[1] = args[2];
+                args[2] = args[3];
+                assert(gen_opc_buf[op_index + 1] == INDEX_op_nop);
+                tcg_set_nop(s, gen_opc_buf + op_index + 1, args + 3, 1);
+                /* Fall through and mark the single-word operation live.  */
+                nb_oargs = 1;
+            }
+            goto do_not_remove;
+
         default:
             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
             args -= def->nb_args;
commit 1305c451e67e3def030720013415103f5e0f5e11
Author: Richard Henderson <rth at twiddle.net>
Date:   Tue Oct 2 11:32:29 2012 -0700

    tcg: Optimize half-dead add2/sub2
    
    When x86_64 guest is not in 64-bit mode, the high-part of the 64-bit
    add is dead.  When the host is 32-bit, we can simplify to 32-bit
    arithmetic.
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/tcg/tcg.c b/tcg/tcg.c
index 32cd0c6..0eb407b 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -1307,8 +1307,39 @@ static void tcg_liveness_analysis(TCGContext *s)
             break;
         case INDEX_op_end:
             break;
-            /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
+
+        case INDEX_op_add2_i32:
+        case INDEX_op_sub2_i32:
+            args -= 6;
+            nb_iargs = 4;
+            nb_oargs = 2;
+            /* Test if the high part of the operation is dead, but not
+               the low part.  The result can be optimized to a simple
+               add or sub.  This happens often for x86_64 guest when the
+               cpu mode is set to 32 bit.  */
+            if (dead_temps[args[1]]) {
+                if (dead_temps[args[0]]) {
+                    goto do_remove;
+                }
+                /* Create the single operation plus nop.  */
+                if (op == INDEX_op_add2_i32) {
+                    op = INDEX_op_add_i32;
+                } else {
+                    op = INDEX_op_sub_i32;
+                }
+                gen_opc_buf[op_index] = op;
+                args[1] = args[2];
+                args[2] = args[4];
+                assert(gen_opc_buf[op_index + 1] == INDEX_op_nop);
+                tcg_set_nop(s, gen_opc_buf + op_index + 1, args + 3, 3);
+                /* Fall through and mark the single-word operation live.  */
+                nb_iargs = 2;
+                nb_oargs = 1;
+            }
+            goto do_not_remove;
+
         default:
+            /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
             args -= def->nb_args;
             nb_iargs = def->nb_iargs;
             nb_oargs = def->nb_oargs;
@@ -1322,6 +1353,7 @@ static void tcg_liveness_analysis(TCGContext *s)
                     if (!dead_temps[arg])
                         goto do_not_remove;
                 }
+            do_remove:
                 tcg_set_nop(s, gen_opc_buf + op_index, args, def->nb_args);
 #ifdef CONFIG_PROFILER
                 s->del_op_count++;
commit 212c328d615f6750659d39c6fd544d05be314fa1
Author: Richard Henderson <rth at twiddle.net>
Date:   Tue Oct 2 11:32:28 2012 -0700

    tcg: Constant fold add2 and sub2
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/tcg/optimize.c b/tcg/optimize.c
index d9251e4..05891ef 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -796,6 +796,41 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             }
             goto do_default;
 
+        case INDEX_op_add2_i32:
+        case INDEX_op_sub2_i32:
+            if (temps[args[2]].state == TCG_TEMP_CONST
+                && temps[args[3]].state == TCG_TEMP_CONST
+                && temps[args[4]].state == TCG_TEMP_CONST
+                && temps[args[5]].state == TCG_TEMP_CONST) {
+                uint32_t al = temps[args[2]].val;
+                uint32_t ah = temps[args[3]].val;
+                uint32_t bl = temps[args[4]].val;
+                uint32_t bh = temps[args[5]].val;
+                uint64_t a = ((uint64_t)ah << 32) | al;
+                uint64_t b = ((uint64_t)bh << 32) | bl;
+                TCGArg rl, rh;
+
+                if (op == INDEX_op_add2_i32) {
+                    a += b;
+                } else {
+                    a -= b;
+                }
+
+                /* We emit the extra nop when we emit the add2/sub2.  */
+                assert(gen_opc_buf[op_index + 1] == INDEX_op_nop);
+
+                rl = args[0];
+                rh = args[1];
+                gen_opc_buf[op_index] = INDEX_op_movi_i32;
+                gen_opc_buf[++op_index] = INDEX_op_movi_i32;
+                tcg_opt_gen_movi(&gen_args[0], rl, (uint32_t)a);
+                tcg_opt_gen_movi(&gen_args[2], rh, (uint32_t)(a >> 32));
+                gen_args += 4;
+                args += 6;
+                break;
+            }
+            goto do_default;
+
         case INDEX_op_brcond2_i32:
             tmp = do_constant_folding_cond2(&args[0], &args[2], args[4]);
             if (tmp != 2) {
diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
index 5518458..a580cfe 100644
--- a/tcg/tcg-op.h
+++ b/tcg/tcg-op.h
@@ -25,6 +25,11 @@
 
 int gen_new_label(void);
 
+static inline void tcg_gen_op0(TCGOpcode opc)
+{
+    *gen_opc_ptr++ = opc;
+}
+
 static inline void tcg_gen_op1_i32(TCGOpcode opc, TCGv_i32 arg1)
 {
     *gen_opc_ptr++ = opc;
@@ -886,6 +891,8 @@ static inline void tcg_gen_add_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
     tcg_gen_op6_i32(INDEX_op_add2_i32, TCGV_LOW(ret), TCGV_HIGH(ret),
                     TCGV_LOW(arg1), TCGV_HIGH(arg1), TCGV_LOW(arg2),
                     TCGV_HIGH(arg2));
+    /* Allow the optimizer room to replace add2 with two moves.  */
+    tcg_gen_op0(INDEX_op_nop);
 }
 
 static inline void tcg_gen_sub_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
@@ -893,6 +900,8 @@ static inline void tcg_gen_sub_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
     tcg_gen_op6_i32(INDEX_op_sub2_i32, TCGV_LOW(ret), TCGV_HIGH(ret),
                     TCGV_LOW(arg1), TCGV_HIGH(arg1), TCGV_LOW(arg2),
                     TCGV_HIGH(arg2));
+    /* Allow the optimizer room to replace sub2 with two moves.  */
+    tcg_gen_op0(INDEX_op_nop);
 }
 
 static inline void tcg_gen_and_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
commit 6c4382f8f47d12eec24c7c5335141a9c78104016
Author: Richard Henderson <rth at twiddle.net>
Date:   Tue Oct 2 11:32:27 2012 -0700

    tcg: Do constant folding on double-word comparisons
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/tcg/optimize.c b/tcg/optimize.c
index 38027dc..d9251e4 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -398,6 +398,40 @@ static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
     }
 }
 
+/* Return 2 if the condition can't be simplified, and the result
+   of the condition (0 or 1) if it can */
+static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
+{
+    TCGArg al = p1[0], ah = p1[1];
+    TCGArg bl = p2[0], bh = p2[1];
+
+    if (temps[bl].state == TCG_TEMP_CONST
+        && temps[bh].state == TCG_TEMP_CONST) {
+        uint64_t b = ((uint64_t)temps[bh].val << 32) | (uint32_t)temps[bl].val;
+
+        if (temps[al].state == TCG_TEMP_CONST
+            && temps[ah].state == TCG_TEMP_CONST) {
+            uint64_t a;
+            a = ((uint64_t)temps[ah].val << 32) | (uint32_t)temps[al].val;
+            return do_constant_folding_cond_64(a, b, c);
+        }
+        if (b == 0) {
+            switch (c) {
+            case TCG_COND_LTU:
+                return 0;
+            case TCG_COND_GEU:
+                return 1;
+            default:
+                break;
+            }
+        }
+    }
+    if (temps_are_copies(al, bl) && temps_are_copies(ah, bh)) {
+        return do_constant_folding_cond_eq(c);
+    }
+    return 2;
+}
+
 static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
 {
     TCGArg a1 = *p1, a2 = *p2;
@@ -763,43 +797,60 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             goto do_default;
 
         case INDEX_op_brcond2_i32:
-            /* Simplify LT/GE comparisons vs zero to a single compare
-               vs the high word of the input.  */
-            if ((args[4] == TCG_COND_LT || args[4] == TCG_COND_GE)
-                && temps[args[2]].state == TCG_TEMP_CONST
-                && temps[args[3]].state == TCG_TEMP_CONST
-                && temps[args[2]].val == 0
-                && temps[args[3]].val == 0) {
+            tmp = do_constant_folding_cond2(&args[0], &args[2], args[4]);
+            if (tmp != 2) {
+                if (tmp) {
+                    memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
+                    gen_opc_buf[op_index] = INDEX_op_br;
+                    gen_args[0] = args[5];
+                    gen_args += 1;
+                } else {
+                    gen_opc_buf[op_index] = INDEX_op_nop;
+                }
+            } else if ((args[4] == TCG_COND_LT || args[4] == TCG_COND_GE)
+                       && temps[args[2]].state == TCG_TEMP_CONST
+                       && temps[args[3]].state == TCG_TEMP_CONST
+                       && temps[args[2]].val == 0
+                       && temps[args[3]].val == 0) {
+                /* Simplify LT/GE comparisons vs zero to a single compare
+                   vs the high word of the input.  */
+                memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
                 gen_opc_buf[op_index] = INDEX_op_brcond_i32;
                 gen_args[0] = args[1];
                 gen_args[1] = args[3];
                 gen_args[2] = args[4];
                 gen_args[3] = args[5];
                 gen_args += 4;
-                args += 6;
-                memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
-                break;
+            } else {
+                goto do_default;
             }
-            goto do_default;
+            args += 6;
+            break;
 
         case INDEX_op_setcond2_i32:
-            /* Simplify LT/GE comparisons vs zero to a single compare
-               vs the high word of the input.  */
-            if ((args[5] == TCG_COND_LT || args[5] == TCG_COND_GE)
-                && temps[args[3]].state == TCG_TEMP_CONST
-                && temps[args[4]].state == TCG_TEMP_CONST
-                && temps[args[3]].val == 0
-                && temps[args[4]].val == 0) {
+            tmp = do_constant_folding_cond2(&args[1], &args[3], args[5]);
+            if (tmp != 2) {
+                gen_opc_buf[op_index] = INDEX_op_movi_i32;
+                tcg_opt_gen_movi(gen_args, args[0], tmp);
+                gen_args += 2;
+            } else if ((args[5] == TCG_COND_LT || args[5] == TCG_COND_GE)
+                       && temps[args[3]].state == TCG_TEMP_CONST
+                       && temps[args[4]].state == TCG_TEMP_CONST
+                       && temps[args[3]].val == 0
+                       && temps[args[4]].val == 0) {
+                /* Simplify LT/GE comparisons vs zero to a single compare
+                   vs the high word of the input.  */
                 gen_opc_buf[op_index] = INDEX_op_setcond_i32;
                 gen_args[0] = args[0];
                 gen_args[1] = args[2];
                 gen_args[2] = args[4];
                 gen_args[3] = args[5];
                 gen_args += 4;
-                args += 6;
-                break;
+            } else {
+                goto do_default;
             }
-            goto do_default;
+            args += 6;
+            break;
 
         case INDEX_op_call:
             nb_call_args = (args[0] >> 16) + (args[0] & 0xffff);
commit 9519da7e39516c5cb5bd7146a849a5d8c0958105
Author: Richard Henderson <rth at twiddle.net>
Date:   Tue Oct 2 11:32:26 2012 -0700

    tcg: Split out subroutines from do_constant_folding_cond
    
    We can re-use these for implementing double-word folding.
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/tcg/optimize.c b/tcg/optimize.c
index 0a55352..38027dc 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -292,6 +292,82 @@ static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y)
     return res;
 }
 
+static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c)
+{
+    switch (c) {
+    case TCG_COND_EQ:
+        return x == y;
+    case TCG_COND_NE:
+        return x != y;
+    case TCG_COND_LT:
+        return (int32_t)x < (int32_t)y;
+    case TCG_COND_GE:
+        return (int32_t)x >= (int32_t)y;
+    case TCG_COND_LE:
+        return (int32_t)x <= (int32_t)y;
+    case TCG_COND_GT:
+        return (int32_t)x > (int32_t)y;
+    case TCG_COND_LTU:
+        return x < y;
+    case TCG_COND_GEU:
+        return x >= y;
+    case TCG_COND_LEU:
+        return x <= y;
+    case TCG_COND_GTU:
+        return x > y;
+    default:
+        tcg_abort();
+    }
+}
+
+static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c)
+{
+    switch (c) {
+    case TCG_COND_EQ:
+        return x == y;
+    case TCG_COND_NE:
+        return x != y;
+    case TCG_COND_LT:
+        return (int64_t)x < (int64_t)y;
+    case TCG_COND_GE:
+        return (int64_t)x >= (int64_t)y;
+    case TCG_COND_LE:
+        return (int64_t)x <= (int64_t)y;
+    case TCG_COND_GT:
+        return (int64_t)x > (int64_t)y;
+    case TCG_COND_LTU:
+        return x < y;
+    case TCG_COND_GEU:
+        return x >= y;
+    case TCG_COND_LEU:
+        return x <= y;
+    case TCG_COND_GTU:
+        return x > y;
+    default:
+        tcg_abort();
+    }
+}
+
+static bool do_constant_folding_cond_eq(TCGCond c)
+{
+    switch (c) {
+    case TCG_COND_GT:
+    case TCG_COND_LTU:
+    case TCG_COND_LT:
+    case TCG_COND_GTU:
+    case TCG_COND_NE:
+        return 0;
+    case TCG_COND_GE:
+    case TCG_COND_GEU:
+    case TCG_COND_LE:
+    case TCG_COND_LEU:
+    case TCG_COND_EQ:
+        return 1;
+    default:
+        tcg_abort();
+    }
+}
+
 /* Return 2 if the condition can't be simplified, and the result
    of the condition (0 or 1) if it can */
 static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
@@ -300,75 +376,14 @@ static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
     if (temps[x].state == TCG_TEMP_CONST && temps[y].state == TCG_TEMP_CONST) {
         switch (op_bits(op)) {
         case 32:
-            switch (c) {
-            case TCG_COND_EQ:
-                return (uint32_t)temps[x].val == (uint32_t)temps[y].val;
-            case TCG_COND_NE:
-                return (uint32_t)temps[x].val != (uint32_t)temps[y].val;
-            case TCG_COND_LT:
-                return (int32_t)temps[x].val < (int32_t)temps[y].val;
-            case TCG_COND_GE:
-                return (int32_t)temps[x].val >= (int32_t)temps[y].val;
-            case TCG_COND_LE:
-                return (int32_t)temps[x].val <= (int32_t)temps[y].val;
-            case TCG_COND_GT:
-                return (int32_t)temps[x].val > (int32_t)temps[y].val;
-            case TCG_COND_LTU:
-                return (uint32_t)temps[x].val < (uint32_t)temps[y].val;
-            case TCG_COND_GEU:
-                return (uint32_t)temps[x].val >= (uint32_t)temps[y].val;
-            case TCG_COND_LEU:
-                return (uint32_t)temps[x].val <= (uint32_t)temps[y].val;
-            case TCG_COND_GTU:
-                return (uint32_t)temps[x].val > (uint32_t)temps[y].val;
-            default:
-                break;
-            }
-            break;
+            return do_constant_folding_cond_32(temps[x].val, temps[y].val, c);
         case 64:
-            switch (c) {
-            case TCG_COND_EQ:
-                return (uint64_t)temps[x].val == (uint64_t)temps[y].val;
-            case TCG_COND_NE:
-                return (uint64_t)temps[x].val != (uint64_t)temps[y].val;
-            case TCG_COND_LT:
-                return (int64_t)temps[x].val < (int64_t)temps[y].val;
-            case TCG_COND_GE:
-                return (int64_t)temps[x].val >= (int64_t)temps[y].val;
-            case TCG_COND_LE:
-                return (int64_t)temps[x].val <= (int64_t)temps[y].val;
-            case TCG_COND_GT:
-                return (int64_t)temps[x].val > (int64_t)temps[y].val;
-            case TCG_COND_LTU:
-                return (uint64_t)temps[x].val < (uint64_t)temps[y].val;
-            case TCG_COND_GEU:
-                return (uint64_t)temps[x].val >= (uint64_t)temps[y].val;
-            case TCG_COND_LEU:
-                return (uint64_t)temps[x].val <= (uint64_t)temps[y].val;
-            case TCG_COND_GTU:
-                return (uint64_t)temps[x].val > (uint64_t)temps[y].val;
-            default:
-                break;
-            }
-            break;
-        }
-    } else if (temps_are_copies(x, y)) {
-        switch (c) {
-        case TCG_COND_GT:
-        case TCG_COND_LTU:
-        case TCG_COND_LT:
-        case TCG_COND_GTU:
-        case TCG_COND_NE:
-            return 0;
-        case TCG_COND_GE:
-        case TCG_COND_GEU:
-        case TCG_COND_LE:
-        case TCG_COND_LEU:
-        case TCG_COND_EQ:
-            return 1;
+            return do_constant_folding_cond_64(temps[x].val, temps[y].val, c);
         default:
-            break;
+            tcg_abort();
         }
+    } else if (temps_are_copies(x, y)) {
+        return do_constant_folding_cond_eq(c);
     } else if (temps[y].state == TCG_TEMP_CONST && temps[y].val == 0) {
         switch (c) {
         case TCG_COND_LTU:
@@ -381,11 +396,6 @@ static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
     } else {
         return 2;
     }
-
-    fprintf(stderr,
-            "Unrecognized bitness %d or condition %d in "
-            "do_constant_folding_cond.\n", op_bits(op), c);
-    tcg_abort();
 }
 
 static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
commit bc1473eff4d9a3b62d669b7232383f85eb82b376
Author: Richard Henderson <rth at twiddle.net>
Date:   Tue Oct 2 11:32:25 2012 -0700

    tcg: Optimize double-word comparisons against zero
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/tcg/optimize.c b/tcg/optimize.c
index 0fd7db3..0a55352 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -752,6 +752,45 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             }
             goto do_default;
 
+        case INDEX_op_brcond2_i32:
+            /* Simplify LT/GE comparisons vs zero to a single compare
+               vs the high word of the input.  */
+            if ((args[4] == TCG_COND_LT || args[4] == TCG_COND_GE)
+                && temps[args[2]].state == TCG_TEMP_CONST
+                && temps[args[3]].state == TCG_TEMP_CONST
+                && temps[args[2]].val == 0
+                && temps[args[3]].val == 0) {
+                gen_opc_buf[op_index] = INDEX_op_brcond_i32;
+                gen_args[0] = args[1];
+                gen_args[1] = args[3];
+                gen_args[2] = args[4];
+                gen_args[3] = args[5];
+                gen_args += 4;
+                args += 6;
+                memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
+                break;
+            }
+            goto do_default;
+
+        case INDEX_op_setcond2_i32:
+            /* Simplify LT/GE comparisons vs zero to a single compare
+               vs the high word of the input.  */
+            if ((args[5] == TCG_COND_LT || args[5] == TCG_COND_GE)
+                && temps[args[3]].state == TCG_TEMP_CONST
+                && temps[args[4]].state == TCG_TEMP_CONST
+                && temps[args[3]].val == 0
+                && temps[args[4]].val == 0) {
+                gen_opc_buf[op_index] = INDEX_op_setcond_i32;
+                gen_args[0] = args[0];
+                gen_args[1] = args[2];
+                gen_args[2] = args[4];
+                gen_args[3] = args[5];
+                gen_args += 4;
+                args += 6;
+                break;
+            }
+            goto do_default;
+
         case INDEX_op_call:
             nb_call_args = (args[0] >> 16) + (args[0] & 0xffff);
             if (!(args[nb_call_args + 1] & (TCG_CALL_CONST | TCG_CALL_PURE))) {
commit 6e14e91b6681dac9614b7b09b561351813046193
Author: Richard Henderson <rth at twiddle.net>
Date:   Tue Oct 2 11:32:24 2012 -0700

    tcg: Use common code when failing to optimize
    
    This saves a whole lot of repetitive code sequences.
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/tcg/optimize.c b/tcg/optimize.c
index 9ecea70..0fd7db3 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -645,6 +645,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             gen_args += 2;
             args += 2;
             break;
+
         CASE_OP_32_64(not):
         CASE_OP_32_64(neg):
         CASE_OP_32_64(ext8s):
@@ -657,14 +658,12 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                 gen_opc_buf[op_index] = op_to_movi(op);
                 tmp = do_constant_folding(op, temps[args[1]].val, 0);
                 tcg_opt_gen_movi(gen_args, args[0], tmp);
-            } else {
-                reset_temp(args[0]);
-                gen_args[0] = args[0];
-                gen_args[1] = args[1];
+                gen_args += 2;
+                args += 2;
+                break;
             }
-            gen_args += 2;
-            args += 2;
-            break;
+            goto do_default;
+
         CASE_OP_32_64(add):
         CASE_OP_32_64(sub):
         CASE_OP_32_64(mul):
@@ -688,15 +687,11 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                                           temps[args[2]].val);
                 tcg_opt_gen_movi(gen_args, args[0], tmp);
                 gen_args += 2;
-            } else {
-                reset_temp(args[0]);
-                gen_args[0] = args[0];
-                gen_args[1] = args[1];
-                gen_args[2] = args[2];
-                gen_args += 3;
+                args += 3;
+                break;
             }
-            args += 3;
-            break;
+            goto do_default;
+
         CASE_OP_32_64(deposit):
             if (temps[args[1]].state == TCG_TEMP_CONST
                 && temps[args[2]].state == TCG_TEMP_CONST) {
@@ -706,33 +701,22 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                       | ((temps[args[2]].val & tmp) << args[3]);
                 tcg_opt_gen_movi(gen_args, args[0], tmp);
                 gen_args += 2;
-            } else {
-                reset_temp(args[0]);
-                gen_args[0] = args[0];
-                gen_args[1] = args[1];
-                gen_args[2] = args[2];
-                gen_args[3] = args[3];
-                gen_args[4] = args[4];
-                gen_args += 5;
+                args += 5;
+                break;
             }
-            args += 5;
-            break;
+            goto do_default;
+
         CASE_OP_32_64(setcond):
             tmp = do_constant_folding_cond(op, args[1], args[2], args[3]);
             if (tmp != 2) {
                 gen_opc_buf[op_index] = op_to_movi(op);
                 tcg_opt_gen_movi(gen_args, args[0], tmp);
                 gen_args += 2;
-            } else {
-                reset_temp(args[0]);
-                gen_args[0] = args[0];
-                gen_args[1] = args[1];
-                gen_args[2] = args[2];
-                gen_args[3] = args[3];
-                gen_args += 4;
+                args += 4;
+                break;
             }
-            args += 4;
-            break;
+            goto do_default;
+
         CASE_OP_32_64(brcond):
             tmp = do_constant_folding_cond(op, args[0], args[1], args[2]);
             if (tmp != 2) {
@@ -744,17 +728,11 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                 } else {
                     gen_opc_buf[op_index] = INDEX_op_nop;
                 }
-            } else {
-                memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
-                reset_temp(args[0]);
-                gen_args[0] = args[0];
-                gen_args[1] = args[1];
-                gen_args[2] = args[2];
-                gen_args[3] = args[3];
-                gen_args += 4;
+                args += 4;
+                break;
             }
-            args += 4;
-            break;
+            goto do_default;
+
         CASE_OP_32_64(movcond):
             tmp = do_constant_folding_cond(op, args[1], args[2], args[5]);
             if (tmp != 2) {
@@ -769,18 +747,11 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                     tcg_opt_gen_mov(s, gen_args, args[0], args[4-tmp]);
                     gen_args += 2;
                 }
-            } else {
-                reset_temp(args[0]);
-                gen_args[0] = args[0];
-                gen_args[1] = args[1];
-                gen_args[2] = args[2];
-                gen_args[3] = args[3];
-                gen_args[4] = args[4];
-                gen_args[5] = args[5];
-                gen_args += 6;
+                args += 6;
+                break;
             }
-            args += 6;
-            break;
+            goto do_default;
+
         case INDEX_op_call:
             nb_call_args = (args[0] >> 16) + (args[0] & 0xffff);
             if (!(args[nb_call_args + 1] & (TCG_CALL_CONST | TCG_CALL_PURE))) {
@@ -799,11 +770,13 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                 i--;
             }
             break;
+
         default:
-            /* Default case: we do know nothing about operation so no
-               propagation is done.  We trash everything if the operation
-               is the end of a basic block, otherwise we only trash the
-               output args.  */
+        do_default:
+            /* Default case: we know nothing about operation (or were unable
+               to compute the operation result) so no propagation is done.
+               We trash everything if the operation is the end of a basic
+               block, otherwise we only trash the output args.  */
             if (def->flags & TCG_OPF_BB_END) {
                 memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
             } else {
commit 0bfcb86538d0bcffa3ef5434810f91aa861431ce
Author: Richard Henderson <rth at twiddle.net>
Date:   Tue Oct 2 11:32:23 2012 -0700

    tcg: Swap commutative double-word comparisons
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/tcg/optimize.c b/tcg/optimize.c
index 8d74186..9ecea70 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -405,6 +405,22 @@ static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
     return false;
 }
 
+static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
+{
+    int sum = 0;
+    sum += temps[p1[0]].state == TCG_TEMP_CONST;
+    sum += temps[p1[1]].state == TCG_TEMP_CONST;
+    sum -= temps[p2[0]].state == TCG_TEMP_CONST;
+    sum -= temps[p2[1]].state == TCG_TEMP_CONST;
+    if (sum > 0) {
+        TCGArg t;
+        t = p1[0], p1[0] = p2[0], p2[0] = t;
+        t = p1[1], p1[1] = p2[1], p2[1] = t;
+        return true;
+    }
+    return false;
+}
+
 /* Propagate constants and copies, fold constant expressions. */
 static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                                     TCGArg *args, TCGOpDef *tcg_op_defs)
@@ -483,6 +499,16 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             swap_commutative(args[0], &args[2], &args[4]);
             swap_commutative(args[1], &args[3], &args[5]);
             break;
+        case INDEX_op_brcond2_i32:
+            if (swap_commutative2(&args[0], &args[2])) {
+                args[4] = tcg_swap_cond(args[4]);
+            }
+            break;
+        case INDEX_op_setcond2_i32:
+            if (swap_commutative2(&args[1], &args[3])) {
+                args[5] = tcg_swap_cond(args[5]);
+            }
+            break;
         default:
             break;
         }
commit 1e484e61e2d9387c18a40e9e239b304003f5d183
Author: Richard Henderson <rth at twiddle.net>
Date:   Tue Oct 2 11:32:22 2012 -0700

    tcg: Canonicalize add2 operand ordering
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/tcg/optimize.c b/tcg/optimize.c
index ff4ddb2..8d74186 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -478,6 +478,11 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             if (swap_commutative(args[0], &args[4], &args[3])) {
                 args[5] = tcg_invert_cond(args[5]);
             }
+            break;
+        case INDEX_op_add2_i32:
+            swap_commutative(args[0], &args[2], &args[4]);
+            swap_commutative(args[1], &args[3], &args[5]);
+            break;
         default:
             break;
         }
commit 24c9ae4eba5eec59256d0d0ace4d868a19f87528
Author: Richard Henderson <rth at twiddle.net>
Date:   Tue Oct 2 11:32:21 2012 -0700

    tcg: Split out swap_commutative as a subroutine
    
    Reduces code duplication and prefers
    
      movcond d, c1, c2, const, s
    to
      movcond d, c1, c2, s, const
    
    It also prefers
    
      add r, r, c
    over
      add r, c, r
    
    when both inputs are known constants.  This doesn't matter for true add, as
    we will fully constant fold that.  But it matters for a follow-on patch using
    this routine for add2 which may not be fully foldable.
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/tcg/optimize.c b/tcg/optimize.c
index edb2b0e..ff4ddb2 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -388,6 +388,23 @@ static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
     tcg_abort();
 }
 
+static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
+{
+    TCGArg a1 = *p1, a2 = *p2;
+    int sum = 0;
+    sum += temps[a1].state == TCG_TEMP_CONST;
+    sum -= temps[a2].state == TCG_TEMP_CONST;
+
+    /* Prefer the constant in second argument, and then the form
+       op a, a, b, which is better handled on non-RISC hosts. */
+    if (sum > 0 || (sum == 0 && dest == a2)) {
+        *p1 = a2;
+        *p2 = a1;
+        return true;
+    }
+    return false;
+}
+
 /* Propagate constants and copies, fold constant expressions. */
 static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                                     TCGArg *args, TCGOpDef *tcg_op_defs)
@@ -397,7 +414,6 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
     const TCGOpDef *def;
     TCGArg *gen_args;
     TCGArg tmp;
-    TCGCond cond;
 
     /* Array VALS has an element for each temp.
        If this temp holds a constant then its value is kept in VALS' element.
@@ -440,52 +456,28 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
         CASE_OP_32_64(eqv):
         CASE_OP_32_64(nand):
         CASE_OP_32_64(nor):
-            /* Prefer the constant in second argument, and then the form
-               op a, a, b, which is better handled on non-RISC hosts. */
-            if (temps[args[1]].state == TCG_TEMP_CONST || (args[0] == args[2]
-                && temps[args[2]].state != TCG_TEMP_CONST)) {
-                tmp = args[1];
-                args[1] = args[2];
-                args[2] = tmp;
-            }
+            swap_commutative(args[0], &args[1], &args[2]);
             break;
         CASE_OP_32_64(brcond):
-            if (temps[args[0]].state == TCG_TEMP_CONST
-                && temps[args[1]].state != TCG_TEMP_CONST) {
-                tmp = args[0];
-                args[0] = args[1];
-                args[1] = tmp;
+            if (swap_commutative(-1, &args[0], &args[1])) {
                 args[2] = tcg_swap_cond(args[2]);
             }
             break;
         CASE_OP_32_64(setcond):
-            if (temps[args[1]].state == TCG_TEMP_CONST
-                && temps[args[2]].state != TCG_TEMP_CONST) {
-                tmp = args[1];
-                args[1] = args[2];
-                args[2] = tmp;
+            if (swap_commutative(args[0], &args[1], &args[2])) {
                 args[3] = tcg_swap_cond(args[3]);
             }
             break;
         CASE_OP_32_64(movcond):
-            cond = args[5];
-            if (temps[args[1]].state == TCG_TEMP_CONST
-                && temps[args[2]].state != TCG_TEMP_CONST) {
-                tmp = args[1];
-                args[1] = args[2];
-                args[2] = tmp;
-                cond = tcg_swap_cond(cond);
+            if (swap_commutative(-1, &args[1], &args[2])) {
+                args[5] = tcg_swap_cond(args[5]);
             }
             /* For movcond, we canonicalize the "false" input reg to match
                the destination reg so that the tcg backend can implement
                a "move if true" operation.  */
-            if (args[0] == args[3]) {
-                tmp = args[3];
-                args[3] = args[4];
-                args[4] = tmp;
-                cond = tcg_invert_cond(cond);
+            if (swap_commutative(args[0], &args[4], &args[3])) {
+                args[5] = tcg_invert_cond(args[5]);
             }
-            args[5] = cond;
         default:
             break;
         }