[Spice-commits] 113 commits - configure coroutine-sigaltstack.c coroutine-ucontext.c cpu-exec.c disas/i386.c disas/m68k.c hw/grlib_apbuart.c hw/usb include/exec include/qemu include/sysemu monitor.c po/de_DE.po po/it.po po/messages.po target-alpha/helper.h target-alpha/int_helper.c target-alpha/translate.c target-arm/helper.c target-arm/helper.h target-arm/op_helper.c target-arm/translate.c target-cris/translate.c target-i386/cc_helper.c target-i386/cc_helper_template.h target-i386/cpu.c target-i386/cpu.h target-i386/helper.c target-i386/helper.h target-i386/int_helper.c target-i386/shift_helper_template.h target-i386/translate.c target-lm32/translate.c target-microblaze/translate.c target-mips/dsp_helper.c target-mips/helper.h target-mips/op_helper.c target-mips/translate.c target-openrisc/translate.c target-ppc/cpu.h target-ppc/helper.h target-ppc/int_helper.c target-ppc/kvm.c target-ppc/machine.c target-ppc/translate.c target-ppc/translate_init.c target-s390x/helper.h target-s390 x/int_helper.c target-s390x/translate.c target-sh4/translate.c target-sparc/cpu.c target-sparc/cpu.h target-sparc/helper.c target-sparc/helper.h target-sparc/ldst_helper.c target-sparc/translate.c target-unicore32/translate.c target-xtensa/translate.c tcg-runtime.c tcg/README tcg/arm tcg/hppa tcg/i386 tcg/ia64 tcg/mips tcg/optimize.c tcg/ppc tcg/ppc64 tcg/s390 tcg/sparc tcg/tcg-op.h tcg/tcg-opc.h tcg/tcg-runtime.h tcg/tcg.c tcg/tcg.h tcg/tci tests/tcg ui/gtk.c user-exec.c

Gerd Hoffmann kraxel at kemper.freedesktop.org
Wed Feb 27 05:12:26 PST 2013


 configure                               |   21 
 coroutine-sigaltstack.c                 |   26 
 coroutine-ucontext.c                    |   27 
 cpu-exec.c                              |    6 
 disas/i386.c                            |    9 
 disas/m68k.c                            |   11 
 hw/grlib_apbuart.c                      |   52 
 hw/usb/hcd-xhci.c                       |    4 
 include/exec/cpu-defs.h                 |    2 
 include/qemu/log.h                      |    8 
 include/sysemu/os-win32.h               |    8 
 monitor.c                               |    6 
 po/de_DE.po                             |    4 
 po/it.po                                |    4 
 po/messages.po                          |    4 
 target-alpha/helper.h                   |    1 
 target-alpha/int_helper.c               |    7 
 target-alpha/translate.c                |   20 
 target-arm/helper.c                     |    5 
 target-arm/helper.h                     |    5 
 target-arm/op_helper.c                  |   30 
 target-arm/translate.c                  |  157 +
 target-cris/translate.c                 |   46 
 target-i386/cc_helper.c                 |  260 +--
 target-i386/cc_helper_template.h        |  261 +--
 target-i386/cpu.c                       |   18 
 target-i386/cpu.h                       |   26 
 target-i386/helper.c                    |   13 
 target-i386/helper.h                    |   13 
 target-i386/int_helper.c                |   69 
 target-i386/shift_helper_template.h     |   12 
 target-i386/translate.c                 | 2703 ++++++++++++++++++--------------
 target-lm32/translate.c                 |    2 
 target-microblaze/translate.c           |    2 
 target-mips/dsp_helper.c                |    4 
 target-mips/helper.h                    |    2 
 target-mips/op_helper.c                 |   12 
 target-mips/translate.c                 |   48 
 target-openrisc/translate.c             |    2 
 target-ppc/cpu.h                        |   24 
 target-ppc/helper.h                     |    2 
 target-ppc/int_helper.c                 |   56 
 target-ppc/kvm.c                        |    4 
 target-ppc/machine.c                    |    8 
 target-ppc/translate.c                  |  676 +++-----
 target-ppc/translate_init.c             |    4 
 target-s390x/helper.h                   |    1 
 target-s390x/int_helper.c               |    8 
 target-s390x/translate.c                |    3 
 target-sh4/translate.c                  |   30 
 target-sparc/cpu.c                      |    6 
 target-sparc/cpu.h                      |    1 
 target-sparc/helper.c                   |   11 
 target-sparc/helper.h                   |    1 
 target-sparc/ldst_helper.c              |    6 
 target-sparc/translate.c                |   92 -
 target-unicore32/translate.c            |   83 
 target-xtensa/translate.c               |   49 
 tcg-runtime.c                           |   16 
 tcg/README                              |   30 
 tcg/arm/tcg-target.c                    |    4 
 tcg/arm/tcg-target.h                    |    1 
 tcg/hppa/tcg-target.h                   |    1 
 tcg/i386/tcg-target.c                   |   49 
 tcg/i386/tcg-target.h                   |    8 
 tcg/ia64/tcg-target.h                   |    8 
 tcg/mips/tcg-target.h                   |    1 
 tcg/optimize.c                          |    5 
 tcg/ppc/tcg-target.h                    |    1 
 tcg/ppc64/tcg-target.h                  |    8 
 tcg/s390/tcg-target.h                   |    8 
 tcg/sparc/tcg-target.c                  |    6 
 tcg/sparc/tcg-target.h                  |    8 
 tcg/tcg-op.h                            |  228 ++
 tcg/tcg-opc.h                           |   12 
 tcg/tcg-runtime.h                       |    2 
 tcg/tcg.c                               |   28 
 tcg/tcg.h                               |   10 
 tcg/tci/tcg-target.h                    |    9 
 tests/tcg/mips/mips32-dspr2/mulq_rs_w.c |    2 
 tests/tcg/mips/mips32-dspr2/mulq_s_ph.c |   15 
 tests/tcg/mips/mips32-dspr2/mulq_s_w.c  |    2 
 tests/tcg/test-i386.c                   |   10 
 ui/gtk.c                                |  104 +
 user-exec.c                             |    2 
 85 files changed, 3091 insertions(+), 2462 deletions(-)

New commits:
commit a345481baa2b2fb3d54f8c9ddb58dfcaf75786df
Author: Petar Jovanovic <petarj at mips.com>
Date:   Thu Feb 7 19:36:09 2013 +0100

    target-mips: fix for sign-issue in MULQ_W helper
    
    Correct sign-propagation before multiplication in MULQ_W helper.
    The change also fixes previously incorrect expected values in the
    tests for MULQ_RS.W and MULQ_S.W.
    
    Signed-off-by: Petar Jovanovic <petarj at mips.com>
    Richard Henderson <rth at twiddle.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/target-mips/dsp_helper.c b/target-mips/dsp_helper.c
index 6781da8..841f47b 100644
--- a/target-mips/dsp_helper.c
+++ b/target-mips/dsp_helper.c
@@ -2689,7 +2689,7 @@ MAQ_SA_W(maq_sa_w_phr, 0);
 target_ulong helper_##name(target_ulong rs, target_ulong rt,   \
                            CPUMIPSState *env)                  \
 {                                                              \
-    uint32_t rs_t, rt_t;                                       \
+    int32_t rs_t, rt_t;                                        \
     int32_t tempI;                                             \
     int64_t tempL;                                             \
                                                                \
diff --git a/tests/tcg/mips/mips32-dspr2/mulq_rs_w.c b/tests/tcg/mips/mips32-dspr2/mulq_rs_w.c
index 669405f..7ba633b 100644
--- a/tests/tcg/mips/mips32-dspr2/mulq_rs_w.c
+++ b/tests/tcg/mips/mips32-dspr2/mulq_rs_w.c
@@ -8,7 +8,7 @@ int main()
 
     rs = 0x80001234;
     rt = 0x80004321;
-    result = 0x80005555;
+    result = 0x7FFFAAAB;
 
     __asm
         ("mulq_rs.w %0, %1, %2\n\t"
diff --git a/tests/tcg/mips/mips32-dspr2/mulq_s_w.c b/tests/tcg/mips/mips32-dspr2/mulq_s_w.c
index df148b7..9c2be06 100644
--- a/tests/tcg/mips/mips32-dspr2/mulq_s_w.c
+++ b/tests/tcg/mips/mips32-dspr2/mulq_s_w.c
@@ -8,7 +8,7 @@ int main()
 
     rs = 0x80001234;
     rt = 0x80004321;
-    result = 0x80005555;
+    result = 0x7FFFAAAB;
 
     __asm
         ("mulq_s.w %0, %1, %2\n\t"
commit 9c19eb1e205b29018f6f61c5f43db6abbe7dc0e5
Author: Petar Jovanovic <petarj at mips.com>
Date:   Wed Feb 6 18:05:25 2013 +0100

    target-mips: fix for incorrect multiplication with MULQ_S.PH
    
    The change corrects sign-related issue with MULQ_S.PH. It also includes
    extension to the already existing test which will trigger the issue.
    
    Signed-off-by: Petar Jovanovic <petarj at mips.com>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/target-mips/dsp_helper.c b/target-mips/dsp_helper.c
index 96cb044..6781da8 100644
--- a/target-mips/dsp_helper.c
+++ b/target-mips/dsp_helper.c
@@ -652,7 +652,7 @@ static inline int32_t mipsdsp_sat16_mul_q15_q15(uint16_t a, uint16_t b,
         temp = 0x7FFF0000;
         set_DSPControl_overflow_flag(1, 21, env);
     } else {
-        temp = ((uint32_t)a * (uint32_t)b);
+        temp = (int16_t)a * (int16_t)b;
         temp = temp << 1;
     }
 
diff --git a/tests/tcg/mips/mips32-dspr2/mulq_s_ph.c b/tests/tcg/mips/mips32-dspr2/mulq_s_ph.c
index d0f7674..00e0155 100644
--- a/tests/tcg/mips/mips32-dspr2/mulq_s_ph.c
+++ b/tests/tcg/mips/mips32-dspr2/mulq_s_ph.c
@@ -6,6 +6,21 @@ int main()
     int rd, rs, rt, dsp;
     int result, resultdsp;
 
+    rs = 0x80000000;
+    rt = 0x0ffc0000;
+    result = 0xF0040000;
+    resultdsp = 0;
+
+    __asm
+        ("mulq_s.ph %0, %2, %3\n\t"
+         "rddsp %1\n\t"
+         : "=r"(rd), "=r"(dsp)
+         : "r"(rs), "r"(rt)
+        );
+    dsp = (dsp >> 21) & 0x01;
+    assert(rd  == result);
+    assert(dsp == resultdsp);
+
     rs = 0x80001234;
     rt = 0x80004321;
     result = 0x7FFF098B;
commit d2123a079d983677ec8333940aa4bec803d98cde
Author: Richard Henderson <rth at twiddle.net>
Date:   Tue Feb 19 23:52:26 2013 -0800

    target-xtensa: Use add2/sub2 for mac
    
    Cc: Max Filippov <jcmvbkbc at gmail.com>
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/target-xtensa/translate.c b/target-xtensa/translate.c
index b41d12c..11e06a3 100644
--- a/target-xtensa/translate.c
+++ b/target-xtensa/translate.c
@@ -2487,27 +2487,24 @@ static void disas_xtensa_insn(CPUXtensaState *env, DisasContext *dc)
                             tcg_gen_sari_i32(cpu_SR[ACCHI], cpu_SR[ACCLO], 31);
                         }
                     } else {
-                        TCGv_i32 res = tcg_temp_new_i32();
-                        TCGv_i64 res64 = tcg_temp_new_i64();
-                        TCGv_i64 tmp = tcg_temp_new_i64();
-
-                        tcg_gen_mul_i32(res, m1, m2);
-                        tcg_gen_ext_i32_i64(res64, res);
-                        tcg_gen_concat_i32_i64(tmp,
-                                cpu_SR[ACCLO], cpu_SR[ACCHI]);
+                        TCGv_i32 lo = tcg_temp_new_i32();
+                        TCGv_i32 hi = tcg_temp_new_i32();
+
+                        tcg_gen_mul_i32(lo, m1, m2);
+                        tcg_gen_sari_i32(hi, lo, 31);
                         if (op == MAC16_MULA) {
-                            tcg_gen_add_i64(tmp, tmp, res64);
+                            tcg_gen_add2_i32(cpu_SR[ACCLO], cpu_SR[ACCHI],
+                                             cpu_SR[ACCLO], cpu_SR[ACCHI],
+                                             lo, hi);
                         } else {
-                            tcg_gen_sub_i64(tmp, tmp, res64);
+                            tcg_gen_sub2_i32(cpu_SR[ACCLO], cpu_SR[ACCHI],
+                                             cpu_SR[ACCLO], cpu_SR[ACCHI],
+                                             lo, hi);
                         }
-                        tcg_gen_trunc_i64_i32(cpu_SR[ACCLO], tmp);
-                        tcg_gen_shri_i64(tmp, tmp, 32);
-                        tcg_gen_trunc_i64_i32(cpu_SR[ACCHI], tmp);
                         tcg_gen_ext8s_i32(cpu_SR[ACCHI], cpu_SR[ACCHI]);
 
-                        tcg_temp_free(res);
-                        tcg_temp_free_i64(res64);
-                        tcg_temp_free_i64(tmp);
+                        tcg_temp_free_i32(lo);
+                        tcg_temp_free_i32(hi);
                     }
                     tcg_temp_free(m1);
                     tcg_temp_free(m2);
commit c9cda20bc55e549d31e791bfa55eabe3642b73a7
Author: Richard Henderson <rth at twiddle.net>
Date:   Tue Feb 19 23:52:25 2013 -0800

    target-xtensa: Use mul*2 for mul*hi
    
    Cc: Max Filippov <jcmvbkbc at gmail.com>
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/target-xtensa/translate.c b/target-xtensa/translate.c
index 7029ac4..b41d12c 100644
--- a/target-xtensa/translate.c
+++ b/target-xtensa/translate.c
@@ -1652,24 +1652,16 @@ static void disas_xtensa_insn(CPUXtensaState *env, DisasContext *dc)
             case 11: /*MULSHi*/
                 HAS_OPTION(XTENSA_OPTION_32_BIT_IMUL_HIGH);
                 {
-                    TCGv_i64 r = tcg_temp_new_i64();
-                    TCGv_i64 s = tcg_temp_new_i64();
-                    TCGv_i64 t = tcg_temp_new_i64();
+                    TCGv lo = tcg_temp_new();
 
                     if (OP2 == 10) {
-                        tcg_gen_extu_i32_i64(s, cpu_R[RRR_S]);
-                        tcg_gen_extu_i32_i64(t, cpu_R[RRR_T]);
+                        tcg_gen_mulu2_i32(lo, cpu_R[RRR_R],
+                                          cpu_R[RRR_S], cpu_R[RRR_T]);
                     } else {
-                        tcg_gen_ext_i32_i64(s, cpu_R[RRR_S]);
-                        tcg_gen_ext_i32_i64(t, cpu_R[RRR_T]);
+                        tcg_gen_muls2_i32(lo, cpu_R[RRR_R],
+                                          cpu_R[RRR_S], cpu_R[RRR_T]);
                     }
-                    tcg_gen_mul_i64(r, s, t);
-                    tcg_gen_shri_i64(r, r, 32);
-                    tcg_gen_trunc_i64_i32(cpu_R[RRR_R], r);
-
-                    tcg_temp_free_i64(r);
-                    tcg_temp_free_i64(s);
-                    tcg_temp_free_i64(t);
+                    tcg_temp_free(lo);
                 }
                 break;
 
commit d1f8cd839136337b713a05aaf11a5f189e81532d
Author: Richard Henderson <rth at twiddle.net>
Date:   Tue Feb 19 23:52:24 2013 -0800

    target-unicore32: Use mul*2 for do_mult
    
    Cc: Guan Xuetao <gxt at mprc.pku.edu.cn>
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/target-unicore32/translate.c b/target-unicore32/translate.c
index f4498bc..d5039e2 100644
--- a/target-unicore32/translate.c
+++ b/target-unicore32/translate.c
@@ -267,37 +267,6 @@ static void gen_exception(int excp)
     dead_tmp(tmp);
 }
 
-/* FIXME: Most targets have native widening multiplication.
-   It would be good to use that instead of a full wide multiply.  */
-/* 32x32->64 multiply.  Marks inputs as dead.  */
-static TCGv_i64 gen_mulu_i64_i32(TCGv a, TCGv b)
-{
-    TCGv_i64 tmp1 = tcg_temp_new_i64();
-    TCGv_i64 tmp2 = tcg_temp_new_i64();
-
-    tcg_gen_extu_i32_i64(tmp1, a);
-    dead_tmp(a);
-    tcg_gen_extu_i32_i64(tmp2, b);
-    dead_tmp(b);
-    tcg_gen_mul_i64(tmp1, tmp1, tmp2);
-    tcg_temp_free_i64(tmp2);
-    return tmp1;
-}
-
-static TCGv_i64 gen_muls_i64_i32(TCGv a, TCGv b)
-{
-    TCGv_i64 tmp1 = tcg_temp_new_i64();
-    TCGv_i64 tmp2 = tcg_temp_new_i64();
-
-    tcg_gen_ext_i32_i64(tmp1, a);
-    dead_tmp(a);
-    tcg_gen_ext_i32_i64(tmp2, b);
-    dead_tmp(b);
-    tcg_gen_mul_i64(tmp1, tmp1, tmp2);
-    tcg_temp_free_i64(tmp2);
-    return tmp1;
-}
-
 #define gen_set_CF(var) tcg_gen_st_i32(var, cpu_env, offsetof(CPUUniCore32State, CF))
 
 /* Set CF to the top bit of var.  */
@@ -1219,38 +1188,6 @@ static void disas_coproc_insn(CPUUniCore32State *env, DisasContext *s,
     }
 }
 
-
-/* Store a 64-bit value to a register pair.  Clobbers val.  */
-static void gen_storeq_reg(DisasContext *s, int rlow, int rhigh, TCGv_i64 val)
-{
-    TCGv tmp;
-    tmp = new_tmp();
-    tcg_gen_trunc_i64_i32(tmp, val);
-    store_reg(s, rlow, tmp);
-    tmp = new_tmp();
-    tcg_gen_shri_i64(val, val, 32);
-    tcg_gen_trunc_i64_i32(tmp, val);
-    store_reg(s, rhigh, tmp);
-}
-
-/* load and add a 64-bit value from a register pair.  */
-static void gen_addq(DisasContext *s, TCGv_i64 val, int rlow, int rhigh)
-{
-    TCGv_i64 tmp;
-    TCGv tmpl;
-    TCGv tmph;
-
-    /* Load 64-bit value rd:rn.  */
-    tmpl = load_reg(s, rlow);
-    tmph = load_reg(s, rhigh);
-    tmp = tcg_temp_new_i64();
-    tcg_gen_concat_i32_i64(tmp, tmpl, tmph);
-    dead_tmp(tmpl);
-    dead_tmp(tmph);
-    tcg_gen_add_i64(val, val, tmp);
-    tcg_temp_free_i64(tmp);
-}
-
 /* data processing instructions */
 static void do_datap(CPUUniCore32State *env, DisasContext *s, uint32_t insn)
 {
@@ -1445,24 +1382,26 @@ static void do_datap(CPUUniCore32State *env, DisasContext *s, uint32_t insn)
 /* multiply */
 static void do_mult(CPUUniCore32State *env, DisasContext *s, uint32_t insn)
 {
-    TCGv tmp;
-    TCGv tmp2;
-    TCGv_i64 tmp64;
+    TCGv tmp, tmp2, tmp3, tmp4;
 
     if (UCOP_SET(27)) {
         /* 64 bit mul */
         tmp = load_reg(s, UCOP_REG_M);
         tmp2 = load_reg(s, UCOP_REG_N);
         if (UCOP_SET(26)) {
-            tmp64 = gen_muls_i64_i32(tmp, tmp2);
+            tcg_gen_muls2_i32(tmp, tmp2, tmp, tmp2);
         } else {
-            tmp64 = gen_mulu_i64_i32(tmp, tmp2);
+            tcg_gen_mulu2_i32(tmp, tmp2, tmp, tmp2);
         }
         if (UCOP_SET(25)) { /* mult accumulate */
-            gen_addq(s, tmp64, UCOP_REG_LO, UCOP_REG_HI);
-        }
-        gen_storeq_reg(s, UCOP_REG_LO, UCOP_REG_HI, tmp64);
-        tcg_temp_free_i64(tmp64);
+            tmp3 = load_reg(s, UCOP_REG_LO);
+            tmp4 = load_reg(s, UCOP_REG_HI);
+            tcg_gen_add2_i32(tmp, tmp2, tmp, tmp2, tmp3, tmp4);
+            dead_tmp(tmp3);
+            dead_tmp(tmp4);
+        }
+        store_reg(s, UCOP_REG_LO, tmp);
+        store_reg(s, UCOP_REG_HI, tmp2);
     } else {
         /* 32 bit mul */
         tmp = load_reg(s, UCOP_REG_M);
commit 1d3b708491b9d7dde573261fdee8ca0afc6980fd
Author: Richard Henderson <rth at twiddle.net>
Date:   Tue Feb 19 23:52:23 2013 -0800

    target-sh4: Use mul*2 for dmul*
    
    Cc: Aurelien Jarno <aurelien at aurel32.net>
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/target-sh4/translate.c b/target-sh4/translate.c
index c58d79a..d255066 100644
--- a/target-sh4/translate.c
+++ b/target-sh4/translate.c
@@ -833,36 +833,10 @@ static void _decode_opc(DisasContext * ctx)
         gen_helper_div1(REG(B11_8), cpu_env, REG(B7_4), REG(B11_8));
 	return;
     case 0x300d:		/* dmuls.l Rm,Rn */
-	{
-	    TCGv_i64 tmp1 = tcg_temp_new_i64();
-	    TCGv_i64 tmp2 = tcg_temp_new_i64();
-
-	    tcg_gen_ext_i32_i64(tmp1, REG(B7_4));
-	    tcg_gen_ext_i32_i64(tmp2, REG(B11_8));
-	    tcg_gen_mul_i64(tmp1, tmp1, tmp2);
-	    tcg_gen_trunc_i64_i32(cpu_macl, tmp1);
-	    tcg_gen_shri_i64(tmp1, tmp1, 32);
-	    tcg_gen_trunc_i64_i32(cpu_mach, tmp1);
-
-	    tcg_temp_free_i64(tmp2);
-	    tcg_temp_free_i64(tmp1);
-	}
+        tcg_gen_muls2_i32(cpu_macl, cpu_mach, REG(B7_4), REG(B11_8));
 	return;
     case 0x3005:		/* dmulu.l Rm,Rn */
-	{
-	    TCGv_i64 tmp1 = tcg_temp_new_i64();
-	    TCGv_i64 tmp2 = tcg_temp_new_i64();
-
-	    tcg_gen_extu_i32_i64(tmp1, REG(B7_4));
-	    tcg_gen_extu_i32_i64(tmp2, REG(B11_8));
-	    tcg_gen_mul_i64(tmp1, tmp1, tmp2);
-	    tcg_gen_trunc_i64_i32(cpu_macl, tmp1);
-	    tcg_gen_shri_i64(tmp1, tmp1, 32);
-	    tcg_gen_trunc_i64_i32(cpu_mach, tmp1);
-
-	    tcg_temp_free_i64(tmp2);
-	    tcg_temp_free_i64(tmp1);
-	}
+        tcg_gen_mulu2_i32(cpu_macl, cpu_mach, REG(B7_4), REG(B11_8));
 	return;
     case 0x600e:		/* exts.b Rm,Rn */
 	tcg_gen_ext8s_i32(REG(B11_8), REG(B7_4));
commit 528692a8a4fb6c545d818957e758d6ad70fa255c
Author: Richard Henderson <rth at twiddle.net>
Date:   Tue Feb 19 23:52:22 2013 -0800

    target-sparc: Use mul*2 for multiply
    
    Cc: Blue Swirl <blauwirbel at gmail.com>
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/target-sparc/translate.c b/target-sparc/translate.c
index a5678b0..12276d5 100644
--- a/target-sparc/translate.c
+++ b/target-sparc/translate.c
@@ -642,39 +642,30 @@ static inline void gen_op_mulscc(TCGv dst, TCGv src1, TCGv src2)
 
 static inline void gen_op_multiply(TCGv dst, TCGv src1, TCGv src2, int sign_ext)
 {
-    TCGv_i32 r_src1, r_src2;
-    TCGv_i64 r_temp, r_temp2;
-
-    r_src1 = tcg_temp_new_i32();
-    r_src2 = tcg_temp_new_i32();
-
-    tcg_gen_trunc_tl_i32(r_src1, src1);
-    tcg_gen_trunc_tl_i32(r_src2, src2);
-
-    r_temp = tcg_temp_new_i64();
-    r_temp2 = tcg_temp_new_i64();
-
+#if TARGET_LONG_BITS == 32
     if (sign_ext) {
-        tcg_gen_ext_i32_i64(r_temp, r_src2);
-        tcg_gen_ext_i32_i64(r_temp2, r_src1);
+        tcg_gen_muls2_tl(dst, cpu_y, src1, src2);
     } else {
-        tcg_gen_extu_i32_i64(r_temp, r_src2);
-        tcg_gen_extu_i32_i64(r_temp2, r_src1);
+        tcg_gen_mulu2_tl(dst, cpu_y, src1, src2);
     }
+#else
+    TCGv t0 = tcg_temp_new_i64();
+    TCGv t1 = tcg_temp_new_i64();
 
-    tcg_gen_mul_i64(r_temp2, r_temp, r_temp2);
-
-    tcg_gen_shri_i64(r_temp, r_temp2, 32);
-    tcg_gen_trunc_i64_tl(cpu_y, r_temp);
-    tcg_temp_free_i64(r_temp);
-    tcg_gen_andi_tl(cpu_y, cpu_y, 0xffffffff);
-
-    tcg_gen_trunc_i64_tl(dst, r_temp2);
+    if (sign_ext) {
+        tcg_gen_ext32s_i64(t0, src1);
+        tcg_gen_ext32s_i64(t1, src2);
+    } else {
+        tcg_gen_ext32u_i64(t0, src1);
+        tcg_gen_ext32u_i64(t1, src2);
+    }
 
-    tcg_temp_free_i64(r_temp2);
+    tcg_gen_mul_i64(dst, t0, t1);
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
 
-    tcg_temp_free_i32(r_src1);
-    tcg_temp_free_i32(r_src2);
+    tcg_gen_shri_i64(cpu_y, dst, 32);
+#endif
 }
 
 static inline void gen_op_umul(TCGv dst, TCGv src1, TCGv src2)
commit 15fe216fc510c2a0ecf39536bbbc92ba75beb963
Author: Richard Henderson <rth at twiddle.net>
Date:   Tue Feb 19 23:52:21 2013 -0800

    target-sparc: Use official add2/sub2 interfaces for addx/subx
    
    Cc: Blue Swirl <blauwirbel at gmail.com>
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/target-sparc/translate.c b/target-sparc/translate.c
index 26c2056..a5678b0 100644
--- a/target-sparc/translate.c
+++ b/target-sparc/translate.c
@@ -448,19 +448,16 @@ static void gen_op_addx_int(DisasContext *dc, TCGv dst, TCGv src1,
     case CC_OP_ADD:
     case CC_OP_TADD:
     case CC_OP_TADDTV:
-#if TCG_TARGET_REG_BITS == 32 && TARGET_LONG_BITS == 32
-        {
-            /* For 32-bit hosts, we can re-use the host's hardware carry
-               generation by using an ADD2 opcode.  We discard the low
-               part of the output.  Ideally we'd combine this operation
-               with the add that generated the carry in the first place.  */
-            TCGv dst_low = tcg_temp_new();
-            tcg_gen_op6_i32(INDEX_op_add2_i32, dst_low, dst,
-                            cpu_cc_src, src1, cpu_cc_src2, src2);
-            tcg_temp_free(dst_low);
+        if (TARGET_LONG_BITS == 32) {
+            /* We can re-use the host's hardware carry generation by using
+               an ADD2 opcode.  We discard the low part of the output.
+               Ideally we'd combine this operation with the add that
+               generated the carry in the first place.  */
+            carry = tcg_temp_new();
+            tcg_gen_add2_tl(carry, dst, cpu_cc_src, src1, cpu_cc_src2, src2);
+            tcg_temp_free(carry);
             goto add_done;
         }
-#endif
         carry_32 = gen_add32_carry32();
         break;
 
@@ -492,9 +489,7 @@ static void gen_op_addx_int(DisasContext *dc, TCGv dst, TCGv src1,
     tcg_temp_free(carry);
 #endif
 
-#if TCG_TARGET_REG_BITS == 32 && TARGET_LONG_BITS == 32
  add_done:
-#endif
     if (update_cc) {
         tcg_gen_mov_tl(cpu_cc_src, src1);
         tcg_gen_mov_tl(cpu_cc_src2, src2);
@@ -554,19 +549,16 @@ static void gen_op_subx_int(DisasContext *dc, TCGv dst, TCGv src1,
     case CC_OP_SUB:
     case CC_OP_TSUB:
     case CC_OP_TSUBTV:
-#if TCG_TARGET_REG_BITS == 32 && TARGET_LONG_BITS == 32
-        {
-            /* For 32-bit hosts, we can re-use the host's hardware carry
-               generation by using a SUB2 opcode.  We discard the low
-               part of the output.  Ideally we'd combine this operation
-               with the add that generated the carry in the first place.  */
-            TCGv dst_low = tcg_temp_new();
-            tcg_gen_op6_i32(INDEX_op_sub2_i32, dst_low, dst,
-                            cpu_cc_src, src1, cpu_cc_src2, src2);
-            tcg_temp_free(dst_low);
+        if (TARGET_LONG_BITS == 32) {
+            /* We can re-use the host's hardware carry generation by using
+               a SUB2 opcode.  We discard the low part of the output.
+               Ideally we'd combine this operation with the add that
+               generated the carry in the first place.  */
+            carry = tcg_temp_new();
+            tcg_gen_sub2_tl(carry, dst, cpu_cc_src, src1, cpu_cc_src2, src2);
+            tcg_temp_free(carry);
             goto sub_done;
         }
-#endif
         carry_32 = gen_sub32_carry32();
         break;
 
@@ -592,9 +584,7 @@ static void gen_op_subx_int(DisasContext *dc, TCGv dst, TCGv src1,
     tcg_temp_free(carry);
 #endif
 
-#if TCG_TARGET_REG_BITS == 32 && TARGET_LONG_BITS == 32
  sub_done:
-#endif
     if (update_cc) {
         tcg_gen_mov_tl(cpu_cc_src, src1);
         tcg_gen_mov_tl(cpu_cc_src2, src2);
commit e4a2c846248ff8e786e741bc4bc3103b24dfba74
Author: Richard Henderson <rth at twiddle.net>
Date:   Tue Feb 19 23:52:20 2013 -0800

    target-ppc: Compute mullwo without branches
    
    Cc: Alexander Graf <agraf at suse.de>
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 4311119..f886441 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -1036,35 +1036,21 @@ static void gen_mullw(DisasContext *ctx)
 /* mullwo  mullwo. */
 static void gen_mullwo(DisasContext *ctx)
 {
-    int l1;
-    TCGv_i64 t0, t1;
+    TCGv_i32 t0 = tcg_temp_new_i32();
+    TCGv_i32 t1 = tcg_temp_new_i32();
 
-    t0 = tcg_temp_new_i64();
-    t1 = tcg_temp_new_i64();
-    l1 = gen_new_label();
-    /* Start with XER OV disabled, the most likely case */
-    tcg_gen_movi_tl(cpu_ov, 0);
-#if defined(TARGET_PPC64)
-    tcg_gen_ext32s_i64(t0, cpu_gpr[rA(ctx->opcode)]);
-    tcg_gen_ext32s_i64(t1, cpu_gpr[rB(ctx->opcode)]);
-#else
-    tcg_gen_ext_tl_i64(t0, cpu_gpr[rA(ctx->opcode)]);
-    tcg_gen_ext_tl_i64(t1, cpu_gpr[rB(ctx->opcode)]);
-#endif
-    tcg_gen_mul_i64(t0, t0, t1);
-#if defined(TARGET_PPC64)
-    tcg_gen_ext32s_i64(cpu_gpr[rD(ctx->opcode)], t0);
-    tcg_gen_brcond_i64(TCG_COND_EQ, t0, cpu_gpr[rD(ctx->opcode)], l1);
-#else
-    tcg_gen_trunc_i64_tl(cpu_gpr[rD(ctx->opcode)], t0);
-    tcg_gen_ext32s_i64(t1, t0);
-    tcg_gen_brcond_i64(TCG_COND_EQ, t0, t1, l1);
-#endif
-    tcg_gen_movi_tl(cpu_ov, 1);
-    tcg_gen_movi_tl(cpu_so, 1);
-    gen_set_label(l1);
-    tcg_temp_free_i64(t0);
-    tcg_temp_free_i64(t1);
+    tcg_gen_trunc_tl_i32(t0, cpu_gpr[rA(ctx->opcode)]);
+    tcg_gen_trunc_tl_i32(t1, cpu_gpr[rB(ctx->opcode)]);
+    tcg_gen_muls2_i32(t0, t1, t0, t1);
+    tcg_gen_ext_i32_tl(cpu_gpr[rD(ctx->opcode)], t0);
+
+    tcg_gen_sari_i32(t0, t0, 31);
+    tcg_gen_setcond_i32(TCG_COND_NE, t0, t0, t1);
+    tcg_gen_extu_i32_tl(cpu_ov, t0);
+    tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov);
+
+    tcg_temp_free_i32(t0);
+    tcg_temp_free_i32(t1);
     if (unlikely(Rc(ctx->opcode) != 0))
         gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]);
 }
commit ba4af3e422f7ba2de58fd752d6ca89922c259a74
Author: Richard Henderson <rth at twiddle.net>
Date:   Tue Feb 19 23:52:19 2013 -0800

    target-ppc: Compute arithmetic shift carry without branches
    
    Cc: Alexander Graf <agraf at suse.de>
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 05f93f6..4311119 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -1773,30 +1773,25 @@ static void gen_sraw(DisasContext *ctx)
 static void gen_srawi(DisasContext *ctx)
 {
     int sh = SH(ctx->opcode);
-    if (sh != 0) {
-        int l1, l2;
-        TCGv t0;
-        l1 = gen_new_label();
-        l2 = gen_new_label();
-        t0 = tcg_temp_local_new();
-        tcg_gen_ext32s_tl(t0, cpu_gpr[rS(ctx->opcode)]);
-        tcg_gen_brcondi_tl(TCG_COND_GE, t0, 0, l1);
-        tcg_gen_andi_tl(t0, cpu_gpr[rS(ctx->opcode)], (1ULL << sh) - 1);
-        tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0, l1);
-        tcg_gen_movi_tl(cpu_ca, 1);
-        tcg_gen_br(l2);
-        gen_set_label(l1);
+    TCGv dst = cpu_gpr[rA(ctx->opcode)];
+    TCGv src = cpu_gpr[rS(ctx->opcode)];
+    if (sh == 0) {
+        tcg_gen_mov_tl(dst, src);
         tcg_gen_movi_tl(cpu_ca, 0);
-        gen_set_label(l2);
-        tcg_gen_ext32s_tl(t0, cpu_gpr[rS(ctx->opcode)]);
-        tcg_gen_sari_tl(cpu_gpr[rA(ctx->opcode)], t0, sh);
-        tcg_temp_free(t0);
     } else {
-        tcg_gen_mov_tl(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rS(ctx->opcode)]);
-        tcg_gen_movi_tl(cpu_ca, 0);
+        TCGv t0;
+        tcg_gen_ext32s_tl(dst, src);
+        tcg_gen_andi_tl(cpu_ca, dst, (1ULL << sh) - 1);
+        t0 = tcg_temp_new();
+        tcg_gen_sari_tl(t0, dst, TARGET_LONG_BITS - 1);
+        tcg_gen_and_tl(cpu_ca, cpu_ca, t0);
+        tcg_temp_free(t0);
+        tcg_gen_setcondi_tl(TCG_COND_NE, cpu_ca, cpu_ca, 0);
+        tcg_gen_sari_tl(dst, dst, sh);
+    }
+    if (unlikely(Rc(ctx->opcode) != 0)) {
+        gen_set_Rc0(ctx, dst);
     }
-    if (unlikely(Rc(ctx->opcode) != 0))
-        gen_set_Rc0(ctx, cpu_gpr[rA(ctx->opcode)]);
 }
 
 /* srw & srw. */
@@ -1856,28 +1851,24 @@ static void gen_srad(DisasContext *ctx)
 static inline void gen_sradi(DisasContext *ctx, int n)
 {
     int sh = SH(ctx->opcode) + (n << 5);
-    if (sh != 0) {
-        int l1, l2;
-        TCGv t0;
-        l1 = gen_new_label();
-        l2 = gen_new_label();
-        t0 = tcg_temp_local_new();
-        tcg_gen_brcondi_tl(TCG_COND_GE, cpu_gpr[rS(ctx->opcode)], 0, l1);
-        tcg_gen_andi_tl(t0, cpu_gpr[rS(ctx->opcode)], (1ULL << sh) - 1);
-        tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0, l1);
-        tcg_gen_movi_tl(cpu_ca, 1);
-        tcg_gen_br(l2);
-        gen_set_label(l1);
+    TCGv dst = cpu_gpr[rA(ctx->opcode)];
+    TCGv src = cpu_gpr[rS(ctx->opcode)];
+    if (sh == 0) {
+        tcg_gen_mov_tl(dst, src);
         tcg_gen_movi_tl(cpu_ca, 0);
-        gen_set_label(l2);
-        tcg_temp_free(t0);
-        tcg_gen_sari_tl(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rS(ctx->opcode)], sh);
     } else {
-        tcg_gen_mov_tl(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rS(ctx->opcode)]);
-        tcg_gen_movi_tl(cpu_ca, 0);
+        TCGv t0;
+        tcg_gen_andi_tl(cpu_ca, src, (1ULL << sh) - 1);
+        t0 = tcg_temp_new();
+        tcg_gen_sari_tl(t0, src, TARGET_LONG_BITS - 1);
+        tcg_gen_and_tl(cpu_ca, cpu_ca, t0);
+        tcg_temp_free(t0);
+        tcg_gen_setcondi_tl(TCG_COND_NE, cpu_ca, cpu_ca, 0);
+        tcg_gen_sari_tl(dst, src, sh);
+    }
+    if (unlikely(Rc(ctx->opcode) != 0)) {
+        gen_set_Rc0(ctx, dst);
     }
-    if (unlikely(Rc(ctx->opcode) != 0))
-        gen_set_Rc0(ctx, cpu_gpr[rA(ctx->opcode)]);
 }
 
 static void gen_sradi0(DisasContext *ctx)
commit fd3f0081e5d873b26b9988b48f7118a9914bbd64
Author: Richard Henderson <rth at twiddle.net>
Date:   Tue Feb 19 23:52:18 2013 -0800

    target-ppc: Implement neg in terms of subf
    
    Cc: Alexander Graf <agraf at suse.de>
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index abc75e2..05f93f6 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -1121,50 +1121,6 @@ static void gen_mulldo(DisasContext *ctx)
 }
 #endif
 
-/* neg neg. nego nego. */
-static inline void gen_op_arith_neg(DisasContext *ctx, TCGv ret, TCGv arg1,
-                                    int ov_check)
-{
-    int l1 = gen_new_label();
-    int l2 = gen_new_label();
-    TCGv t0 = tcg_temp_local_new();
-#if defined(TARGET_PPC64)
-    if (ctx->sf_mode) {
-        tcg_gen_mov_tl(t0, arg1);
-        tcg_gen_brcondi_tl(TCG_COND_EQ, t0, INT64_MIN, l1);
-    } else
-#endif
-    {
-        tcg_gen_ext32s_tl(t0, arg1);
-        tcg_gen_brcondi_tl(TCG_COND_EQ, t0, INT32_MIN, l1);
-    }
-    tcg_gen_neg_tl(ret, arg1);
-    if (ov_check) {
-        tcg_gen_movi_tl(cpu_ov, 0);
-    }
-    tcg_gen_br(l2);
-    gen_set_label(l1);
-    tcg_gen_mov_tl(ret, t0);
-    if (ov_check) {
-        tcg_gen_movi_tl(cpu_ov, 1);
-        tcg_gen_movi_tl(cpu_so, 1);
-    }
-    gen_set_label(l2);
-    tcg_temp_free(t0);
-    if (unlikely(Rc(ctx->opcode) != 0))
-        gen_set_Rc0(ctx, ret);
-}
-
-static void gen_neg(DisasContext *ctx)
-{
-    gen_op_arith_neg(ctx, cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)], 0);
-}
-
-static void gen_nego(DisasContext *ctx)
-{
-    gen_op_arith_neg(ctx, cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)], 1);
-}
-
 /* Common subf function */
 static inline void gen_op_arith_subf(DisasContext *ctx, TCGv ret, TCGv arg1,
                                      TCGv arg2, bool add_ca, bool compute_ca,
@@ -1254,6 +1210,25 @@ static void gen_subfic(DisasContext *ctx)
     tcg_temp_free(c);
 }
 
+/* neg neg. nego nego. */
+static inline void gen_op_arith_neg(DisasContext *ctx, bool compute_ov)
+{
+    TCGv zero = tcg_const_tl(0);
+    gen_op_arith_subf(ctx, cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)],
+                      zero, 0, 0, compute_ov, Rc(ctx->opcode));
+    tcg_temp_free(zero);
+}
+
+static void gen_neg(DisasContext *ctx)
+{
+    gen_op_arith_neg(ctx, 0);
+}
+
+static void gen_nego(DisasContext *ctx)
+{
+    gen_op_arith_neg(ctx, 1);
+}
+
 /***                            Integer logical                            ***/
 #define GEN_LOGICAL2(name, tcg_op, opc, type)                                 \
 static void glue(gen_, name)(DisasContext *ctx)                                       \
commit b5a73f8d8a57e940f9bbeb399a9e47897522ee9a
Author: Richard Henderson <rth at twiddle.net>
Date:   Tue Feb 19 23:52:17 2013 -0800

    target-ppc: Use add2 for carry generation
    
    Cc: Alexander Graf <agraf at suse.de>
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index fce261b..abc75e2 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -765,73 +765,40 @@ static inline void gen_op_arith_compute_ov(DisasContext *ctx, TCGv arg0,
     tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov);
 }
 
-static inline void gen_op_arith_compute_ca(DisasContext *ctx, TCGv arg1,
-                                           TCGv arg2, int sub)
-{
-    TCGv t0 = tcg_temp_new(), t1 = arg1, t2 = arg2;
-
-#if defined(TARGET_PPC64)
-    if (!(ctx->sf_mode)) {
-        t1 = t0;
-        tcg_gen_ext32u_tl(t1, arg1);
-        t2 = tcg_temp_new();
-        tcg_gen_ext32u_tl(t2, arg2);
-    }
-#endif
-
-    tcg_gen_setcond_tl(sub ? TCG_COND_LEU : TCG_COND_LTU, t0, t1, t2);
-    tcg_gen_or_tl(cpu_ca, cpu_ca, t0);
-
-    tcg_temp_free(t0);
-#if defined(TARGET_PPC64)
-    if (!(ctx->sf_mode)) {
-        tcg_temp_free(t2);
-    }
-#endif
-}
-
 /* Common add function */
 static inline void gen_op_arith_add(DisasContext *ctx, TCGv ret, TCGv arg1,
-                                    TCGv arg2, int add_ca, int compute_ca,
-                                    int compute_ov)
+                                    TCGv arg2, bool add_ca, bool compute_ca,
+                                    bool compute_ov, bool compute_rc0)
 {
-    TCGv t0, t1;
+    TCGv t0 = ret;
 
-    if ((!compute_ca && !compute_ov) ||
-        (!TCGV_EQUAL(ret,arg1) && !TCGV_EQUAL(ret, arg2)))  {
-        t0 = ret;
-    } else {
+    if (((compute_ca && add_ca) || compute_ov)
+        && (TCGV_EQUAL(ret, arg1) || TCGV_EQUAL(ret, arg2)))  {
         t0 = tcg_temp_new();
     }
 
-    if (add_ca) {
-        t1 = tcg_temp_local_new();
-        tcg_gen_mov_tl(t1, cpu_ca);
-    } else {
-        TCGV_UNUSED(t1);
-    }
-
     if (compute_ca) {
-        /* Start with XER CA disabled, the most likely case */
-        tcg_gen_movi_tl(cpu_ca, 0);
+        TCGv zero = tcg_const_tl(0);
+        if (add_ca) {
+            tcg_gen_add2_tl(t0, cpu_ca, arg1, zero, cpu_ca, zero);
+            tcg_gen_add2_tl(t0, cpu_ca, t0, cpu_ca, arg2, zero);
+        } else {
+            tcg_gen_add2_tl(t0, cpu_ca, arg1, zero, arg2, zero);
+        }
+        tcg_temp_free(zero);
+    } else {
+        tcg_gen_add_tl(t0, arg1, arg2);
+        if (add_ca) {
+            tcg_gen_add_tl(t0, t0, cpu_ca);
+        }
     }
 
-    tcg_gen_add_tl(t0, arg1, arg2);
-
-    if (compute_ca) {
-        gen_op_arith_compute_ca(ctx, t0, arg1, 0);
-    }
-    if (add_ca) {
-        tcg_gen_add_tl(t0, t0, t1);
-        gen_op_arith_compute_ca(ctx, t0, t1, 0);
-        tcg_temp_free(t1);
-    }
     if (compute_ov) {
         gen_op_arith_compute_ov(ctx, t0, arg1, arg2, 0);
     }
-
-    if (unlikely(Rc(ctx->opcode) != 0))
+    if (unlikely(compute_rc0)) {
         gen_set_Rc0(ctx, t0);
+    }
 
     if (!TCGV_EQUAL(t0, ret)) {
         tcg_gen_mov_tl(ret, t0);
@@ -840,21 +807,21 @@ static inline void gen_op_arith_add(DisasContext *ctx, TCGv ret, TCGv arg1,
 }
 /* Add functions with two operands */
 #define GEN_INT_ARITH_ADD(name, opc3, add_ca, compute_ca, compute_ov)         \
-static void glue(gen_, name)(DisasContext *ctx)                                       \
+static void glue(gen_, name)(DisasContext *ctx)                               \
 {                                                                             \
     gen_op_arith_add(ctx, cpu_gpr[rD(ctx->opcode)],                           \
                      cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)],      \
-                     add_ca, compute_ca, compute_ov);                         \
+                     add_ca, compute_ca, compute_ov, Rc(ctx->opcode));        \
 }
 /* Add functions with one operand and one immediate */
 #define GEN_INT_ARITH_ADD_CONST(name, opc3, const_val,                        \
                                 add_ca, compute_ca, compute_ov)               \
-static void glue(gen_, name)(DisasContext *ctx)                                       \
+static void glue(gen_, name)(DisasContext *ctx)                               \
 {                                                                             \
-    TCGv t0 = tcg_const_local_tl(const_val);                                  \
+    TCGv t0 = tcg_const_tl(const_val);                                        \
     gen_op_arith_add(ctx, cpu_gpr[rD(ctx->opcode)],                           \
                      cpu_gpr[rA(ctx->opcode)], t0,                            \
-                     add_ca, compute_ca, compute_ov);                         \
+                     add_ca, compute_ca, compute_ov, Rc(ctx->opcode));        \
     tcg_temp_free(t0);                                                        \
 }
 
@@ -882,40 +849,27 @@ static void gen_addi(DisasContext *ctx)
         /* li case */
         tcg_gen_movi_tl(cpu_gpr[rD(ctx->opcode)], simm);
     } else {
-        tcg_gen_addi_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)], simm);
+        tcg_gen_addi_tl(cpu_gpr[rD(ctx->opcode)],
+                        cpu_gpr[rA(ctx->opcode)], simm);
     }
 }
 /* addic  addic.*/
-static inline void gen_op_addic(DisasContext *ctx, TCGv ret, TCGv arg1,
-                                int compute_Rc0)
+static inline void gen_op_addic(DisasContext *ctx, bool compute_rc0)
 {
-    target_long simm = SIMM(ctx->opcode);
-
-    /* Start with XER CA disabled, the most likely case */
-    tcg_gen_movi_tl(cpu_ca, 0);
-
-    if (likely(simm != 0)) {
-        TCGv t0 = tcg_temp_local_new();
-        tcg_gen_addi_tl(t0, arg1, simm);
-        gen_op_arith_compute_ca(ctx, t0, arg1, 0);
-        tcg_gen_mov_tl(ret, t0);
-        tcg_temp_free(t0);
-    } else {
-        tcg_gen_mov_tl(ret, arg1);
-    }
-    if (compute_Rc0) {
-        gen_set_Rc0(ctx, ret);
-    }
+    TCGv c = tcg_const_tl(SIMM(ctx->opcode));
+    gen_op_arith_add(ctx, cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)],
+                     c, 0, 1, 0, compute_rc0);
+    tcg_temp_free(c);
 }
 
 static void gen_addic(DisasContext *ctx)
 {
-    gen_op_addic(ctx, cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)], 0);
+    gen_op_addic(ctx, 0);
 }
 
 static void gen_addic_(DisasContext *ctx)
 {
-    gen_op_addic(ctx, cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)], 1);
+    gen_op_addic(ctx, 1);
 }
 
 /* addis */
@@ -927,7 +881,8 @@ static void gen_addis(DisasContext *ctx)
         /* lis case */
         tcg_gen_movi_tl(cpu_gpr[rD(ctx->opcode)], simm << 16);
     } else {
-        tcg_gen_addi_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)], simm << 16);
+        tcg_gen_addi_tl(cpu_gpr[rD(ctx->opcode)],
+                        cpu_gpr[rA(ctx->opcode)], simm << 16);
     }
 }
 
@@ -1212,49 +1167,43 @@ static void gen_nego(DisasContext *ctx)
 
 /* Common subf function */
 static inline void gen_op_arith_subf(DisasContext *ctx, TCGv ret, TCGv arg1,
-                                     TCGv arg2, int add_ca, int compute_ca,
-                                     int compute_ov)
+                                     TCGv arg2, bool add_ca, bool compute_ca,
+                                     bool compute_ov, bool compute_rc0)
 {
-    TCGv t0, t1;
-
-    if ((!compute_ca && !compute_ov) ||
-        (!TCGV_EQUAL(ret, arg1) && !TCGV_EQUAL(ret, arg2)))  {
-        t0 = ret;
-    } else {
-        t0 = tcg_temp_local_new();
-    }
-
-    if (add_ca) {
-        t1 = tcg_temp_local_new();
-        tcg_gen_mov_tl(t1, cpu_ca);
-    } else {
-        TCGV_UNUSED(t1);
-    }
+    TCGv t0 = ret;
 
-    if (compute_ca) {
-        /* Start with XER CA disabled, the most likely case */
-        tcg_gen_movi_tl(cpu_ca, 0);
+    if (((add_ca && compute_ca) || compute_ov)
+        && (TCGV_EQUAL(ret, arg1) || TCGV_EQUAL(ret, arg2)))  {
+        t0 = tcg_temp_new();
     }
 
     if (add_ca) {
-        tcg_gen_not_tl(t0, arg1);
-        tcg_gen_add_tl(t0, t0, arg2);
-        gen_op_arith_compute_ca(ctx, t0, arg2, 0);
-        tcg_gen_add_tl(t0, t0, t1);
-        gen_op_arith_compute_ca(ctx, t0, t1, 0);
-        tcg_temp_free(t1);
+        /* dest = ~arg1 + arg2 + ca = arg2 - arg1 + ca - 1.  */
+        if (compute_ca) {
+            TCGv zero;
+            tcg_gen_subi_tl(cpu_ca, cpu_ca, 1);
+            zero = tcg_const_tl(0);
+            tcg_gen_add2_tl(t0, cpu_ca, arg2, zero, cpu_ca, zero);
+            tcg_gen_sub2_tl(t0, cpu_ca, t0, cpu_ca, arg1, zero);
+            tcg_temp_free(zero);
+        } else {
+            tcg_gen_sub_tl(t0, arg2, arg1);
+            tcg_gen_add_tl(t0, t0, cpu_ca);
+            tcg_gen_subi_tl(t0, t0, 1);
+        }
     } else {
-        tcg_gen_sub_tl(t0, arg2, arg1);
         if (compute_ca) {
-            gen_op_arith_compute_ca(ctx, t0, arg2, 1);
+            tcg_gen_setcond_tl(TCG_COND_GEU, cpu_ca, arg2, arg1);
         }
+        tcg_gen_sub_tl(t0, arg2, arg1);
     }
+
     if (compute_ov) {
         gen_op_arith_compute_ov(ctx, t0, arg1, arg2, 1);
     }
-
-    if (unlikely(Rc(ctx->opcode) != 0))
+    if (unlikely(compute_rc0)) {
         gen_set_Rc0(ctx, t0);
+    }
 
     if (!TCGV_EQUAL(t0, ret)) {
         tcg_gen_mov_tl(ret, t0);
@@ -1263,21 +1212,21 @@ static inline void gen_op_arith_subf(DisasContext *ctx, TCGv ret, TCGv arg1,
 }
 /* Sub functions with Two operands functions */
 #define GEN_INT_ARITH_SUBF(name, opc3, add_ca, compute_ca, compute_ov)        \
-static void glue(gen_, name)(DisasContext *ctx)                                       \
+static void glue(gen_, name)(DisasContext *ctx)                               \
 {                                                                             \
     gen_op_arith_subf(ctx, cpu_gpr[rD(ctx->opcode)],                          \
                       cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)],     \
-                      add_ca, compute_ca, compute_ov);                        \
+                      add_ca, compute_ca, compute_ov, Rc(ctx->opcode));       \
 }
 /* Sub functions with one operand and one immediate */
 #define GEN_INT_ARITH_SUBF_CONST(name, opc3, const_val,                       \
                                 add_ca, compute_ca, compute_ov)               \
-static void glue(gen_, name)(DisasContext *ctx)                                       \
+static void glue(gen_, name)(DisasContext *ctx)                               \
 {                                                                             \
-    TCGv t0 = tcg_const_local_tl(const_val);                                  \
+    TCGv t0 = tcg_const_tl(const_val);                                        \
     gen_op_arith_subf(ctx, cpu_gpr[rD(ctx->opcode)],                          \
                       cpu_gpr[rA(ctx->opcode)], t0,                           \
-                      add_ca, compute_ca, compute_ov);                        \
+                      add_ca, compute_ca, compute_ov, Rc(ctx->opcode));       \
     tcg_temp_free(t0);                                                        \
 }
 /* subf  subf.  subfo  subfo. */
@@ -1299,15 +1248,10 @@ GEN_INT_ARITH_SUBF_CONST(subfzeo, 0x16, 0, 1, 1, 1)
 /* subfic */
 static void gen_subfic(DisasContext *ctx)
 {
-    /* Start with XER CA disabled, the most likely case */
-    tcg_gen_movi_tl(cpu_ca, 0);
-    TCGv t0 = tcg_temp_local_new();
-    TCGv t1 = tcg_const_local_tl(SIMM(ctx->opcode));
-    tcg_gen_sub_tl(t0, t1, cpu_gpr[rA(ctx->opcode)]);
-    gen_op_arith_compute_ca(ctx, t0, t1, 1);
-    tcg_temp_free(t1);
-    tcg_gen_mov_tl(cpu_gpr[rD(ctx->opcode)], t0);
-    tcg_temp_free(t0);
+    TCGv c = tcg_const_tl(SIMM(ctx->opcode));
+    gen_op_arith_subf(ctx, cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)],
+                      c, 0, 1, 0, 0);
+    tcg_temp_free(c);
 }
 
 /***                            Integer logical                            ***/
commit 146de60dcade65a401c6665ae4b51c2b15dfaa55
Author: Richard Henderson <rth at twiddle.net>
Date:   Tue Feb 19 23:52:16 2013 -0800

    target-ppc: Compute addition carry with setcond
    
    Cc: Alexander Graf <agraf at suse.de>
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 116cf12..fce261b 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -768,36 +768,26 @@ static inline void gen_op_arith_compute_ov(DisasContext *ctx, TCGv arg0,
 static inline void gen_op_arith_compute_ca(DisasContext *ctx, TCGv arg1,
                                            TCGv arg2, int sub)
 {
-    int l1 = gen_new_label();
+    TCGv t0 = tcg_temp_new(), t1 = arg1, t2 = arg2;
 
 #if defined(TARGET_PPC64)
     if (!(ctx->sf_mode)) {
-        TCGv t0, t1;
-        t0 = tcg_temp_new();
-        t1 = tcg_temp_new();
-
-        tcg_gen_ext32u_tl(t0, arg1);
-        tcg_gen_ext32u_tl(t1, arg2);
-        if (sub) {
-            tcg_gen_brcond_tl(TCG_COND_GTU, t0, t1, l1);
-        } else {
-            tcg_gen_brcond_tl(TCG_COND_GEU, t0, t1, l1);
-        }
-        tcg_gen_movi_tl(cpu_ca, 1);
-        gen_set_label(l1);
-        tcg_temp_free(t0);
-        tcg_temp_free(t1);
-    } else
+        t1 = t0;
+        tcg_gen_ext32u_tl(t1, arg1);
+        t2 = tcg_temp_new();
+        tcg_gen_ext32u_tl(t2, arg2);
+    }
 #endif
-    {
-        if (sub) {
-            tcg_gen_brcond_tl(TCG_COND_GTU, arg1, arg2, l1);
-        } else {
-            tcg_gen_brcond_tl(TCG_COND_GEU, arg1, arg2, l1);
-        }
-        tcg_gen_movi_tl(cpu_ca, 1);
-        gen_set_label(l1);
+
+    tcg_gen_setcond_tl(sub ? TCG_COND_LEU : TCG_COND_LTU, t0, t1, t2);
+    tcg_gen_or_tl(cpu_ca, cpu_ca, t0);
+
+    tcg_temp_free(t0);
+#if defined(TARGET_PPC64)
+    if (!(ctx->sf_mode)) {
+        tcg_temp_free(t2);
     }
+#endif
 }
 
 /* Common add function */
@@ -811,7 +801,7 @@ static inline void gen_op_arith_add(DisasContext *ctx, TCGv ret, TCGv arg1,
         (!TCGV_EQUAL(ret,arg1) && !TCGV_EQUAL(ret, arg2)))  {
         t0 = ret;
     } else {
-        t0 = tcg_temp_local_new();
+        t0 = tcg_temp_new();
     }
 
     if (add_ca) {
commit ffe30937c89dd67a53bf3f35b962701cd9d8f70e
Author: Richard Henderson <rth at twiddle.net>
Date:   Tue Feb 19 23:52:15 2013 -0800

    target-ppc: Compute addition overflow without branches
    
    Cc: Alexander Graf <agraf at suse.de>
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 7aab6ae..116cf12 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -746,35 +746,23 @@ static void gen_isel(DisasContext *ctx)
 static inline void gen_op_arith_compute_ov(DisasContext *ctx, TCGv arg0,
                                            TCGv arg1, TCGv arg2, int sub)
 {
-    int l1;
-    TCGv t0;
+    TCGv t0 = tcg_temp_new();
 
-    l1 = gen_new_label();
-    /* Start with XER OV disabled, the most likely case */
-    tcg_gen_movi_tl(cpu_ov, 0);
-    t0 = tcg_temp_local_new();
-    tcg_gen_xor_tl(t0, arg0, arg1);
-#if defined(TARGET_PPC64)
-    if (!ctx->sf_mode)
-        tcg_gen_ext32s_tl(t0, t0);
-#endif
-    if (sub)
-        tcg_gen_brcondi_tl(TCG_COND_LT, t0, 0, l1);
-    else
-        tcg_gen_brcondi_tl(TCG_COND_GE, t0, 0, l1);
+    tcg_gen_xor_tl(cpu_ov, arg0, arg1);
     tcg_gen_xor_tl(t0, arg1, arg2);
+    if (sub) {
+        tcg_gen_and_tl(cpu_ov, cpu_ov, t0);
+    } else {
+        tcg_gen_andc_tl(cpu_ov, cpu_ov, t0);
+    }
+    tcg_temp_free(t0);
 #if defined(TARGET_PPC64)
-    if (!ctx->sf_mode)
-        tcg_gen_ext32s_tl(t0, t0);
+    if (!ctx->sf_mode) {
+        tcg_gen_ext32s_tl(cpu_ov, cpu_ov);
+    }
 #endif
-    if (sub)
-        tcg_gen_brcondi_tl(TCG_COND_GE, t0, 0, l1);
-    else
-        tcg_gen_brcondi_tl(TCG_COND_LT, t0, 0, l1);
-    tcg_gen_movi_tl(cpu_ov, 1);
-    tcg_gen_movi_tl(cpu_so, 1);
-    gen_set_label(l1);
-    tcg_temp_free(t0);
+    tcg_gen_shri_tl(cpu_ov, cpu_ov, TARGET_LONG_BITS - 1);
+    tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov);
 }
 
 static inline void gen_op_arith_compute_ca(DisasContext *ctx, TCGv arg1,
@@ -837,10 +825,6 @@ static inline void gen_op_arith_add(DisasContext *ctx, TCGv ret, TCGv arg1,
         /* Start with XER CA disabled, the most likely case */
         tcg_gen_movi_tl(cpu_ca, 0);
     }
-    if (compute_ov) {
-        /* Start with XER OV disabled, the most likely case */
-        tcg_gen_movi_tl(cpu_ov, 0);
-    }
 
     tcg_gen_add_tl(t0, arg1, arg2);
 
@@ -1261,10 +1245,6 @@ static inline void gen_op_arith_subf(DisasContext *ctx, TCGv ret, TCGv arg1,
         /* Start with XER CA disabled, the most likely case */
         tcg_gen_movi_tl(cpu_ca, 0);
     }
-    if (compute_ov) {
-        /* Start with XER OV disabled, the most likely case */
-        tcg_gen_movi_tl(cpu_ov, 0);
-    }
 
     if (add_ca) {
         tcg_gen_not_tl(t0, arg1);
commit 2fdcb629071cb6206028bc7d6b69f3585fc365ec
Author: Richard Henderson <rth at twiddle.net>
Date:   Tue Feb 19 23:52:14 2013 -0800

    target-ppc: Use setcond in gen_op_cmp
    
    Which means that callers need not copy data into local tmps.
    
    Cc: Alexander Graf <agraf at suse.de>
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 0ac072c..7aab6ae 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -596,33 +596,33 @@ static opc_handler_t invalid_handler = {
 
 static inline void gen_op_cmp(TCGv arg0, TCGv arg1, int s, int crf)
 {
-    int l1, l2, l3;
+    TCGv t0 = tcg_temp_new();
+    TCGv_i32 t1 = tcg_temp_new_i32();
 
     tcg_gen_trunc_tl_i32(cpu_crf[crf], cpu_so);
 
-    l1 = gen_new_label();
-    l2 = gen_new_label();
-    l3 = gen_new_label();
-    if (s) {
-        tcg_gen_brcond_tl(TCG_COND_LT, arg0, arg1, l1);
-        tcg_gen_brcond_tl(TCG_COND_GT, arg0, arg1, l2);
-    } else {
-        tcg_gen_brcond_tl(TCG_COND_LTU, arg0, arg1, l1);
-        tcg_gen_brcond_tl(TCG_COND_GTU, arg0, arg1, l2);
-    }
-    tcg_gen_ori_i32(cpu_crf[crf], cpu_crf[crf], 1 << CRF_EQ);
-    tcg_gen_br(l3);
-    gen_set_label(l1);
-    tcg_gen_ori_i32(cpu_crf[crf], cpu_crf[crf], 1 << CRF_LT);
-    tcg_gen_br(l3);
-    gen_set_label(l2);
-    tcg_gen_ori_i32(cpu_crf[crf], cpu_crf[crf], 1 << CRF_GT);
-    gen_set_label(l3);
+    tcg_gen_setcond_tl((s ? TCG_COND_LT: TCG_COND_LTU), t0, arg0, arg1);
+    tcg_gen_trunc_tl_i32(t1, t0);
+    tcg_gen_shli_i32(t1, t1, CRF_LT);
+    tcg_gen_or_i32(cpu_crf[crf], cpu_crf[crf], t1);
+
+    tcg_gen_setcond_tl((s ? TCG_COND_GT: TCG_COND_GTU), t0, arg0, arg1);
+    tcg_gen_trunc_tl_i32(t1, t0);
+    tcg_gen_shli_i32(t1, t1, CRF_GT);
+    tcg_gen_or_i32(cpu_crf[crf], cpu_crf[crf], t1);
+
+    tcg_gen_setcond_tl(TCG_COND_EQ, t0, arg0, arg1);
+    tcg_gen_trunc_tl_i32(t1, t0);
+    tcg_gen_shli_i32(t1, t1, CRF_EQ);
+    tcg_gen_or_i32(cpu_crf[crf], cpu_crf[crf], t1);
+
+    tcg_temp_free(t0);
+    tcg_temp_free_i32(t1);
 }
 
 static inline void gen_op_cmpi(TCGv arg0, target_ulong arg1, int s, int crf)
 {
-    TCGv t0 = tcg_const_local_tl(arg1);
+    TCGv t0 = tcg_const_tl(arg1);
     gen_op_cmp(arg0, t0, s, crf);
     tcg_temp_free(t0);
 }
@@ -631,8 +631,8 @@ static inline void gen_op_cmpi(TCGv arg0, target_ulong arg1, int s, int crf)
 static inline void gen_op_cmp32(TCGv arg0, TCGv arg1, int s, int crf)
 {
     TCGv t0, t1;
-    t0 = tcg_temp_local_new();
-    t1 = tcg_temp_local_new();
+    t0 = tcg_temp_new();
+    t1 = tcg_temp_new();
     if (s) {
         tcg_gen_ext32s_tl(t0, arg0);
         tcg_gen_ext32s_tl(t1, arg1);
@@ -647,7 +647,7 @@ static inline void gen_op_cmp32(TCGv arg0, TCGv arg1, int s, int crf)
 
 static inline void gen_op_cmpi32(TCGv arg0, target_ulong arg1, int s, int crf)
 {
-    TCGv t0 = tcg_const_local_tl(arg1);
+    TCGv t0 = tcg_const_tl(arg1);
     gen_op_cmp32(arg0, t0, s, crf);
     tcg_temp_free(t0);
 }
commit da91a00f191fc70ea7d81d7476ef933c562e6fcd
Author: Richard Henderson <rth at twiddle.net>
Date:   Tue Feb 19 23:52:13 2013 -0800

    target-ppc: Split out SO, OV, CA fields from XER
    
    In preparation for more efficient setting of these fields.
    
    Cc: Alexander Graf <agraf at suse.de>
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
index 8c081db..20f4565 100644
--- a/target-ppc/cpu.h
+++ b/target-ppc/cpu.h
@@ -941,8 +941,11 @@ struct CPUPPCState {
     /* CFAR */
     target_ulong cfar;
 #endif
-    /* XER */
+    /* XER (with SO, OV, CA split out) */
     target_ulong xer;
+    target_ulong so;
+    target_ulong ov;
+    target_ulong ca;
     /* Reservation address */
     target_ulong reserve_addr;
     /* Reservation value */
@@ -1268,9 +1271,9 @@ static inline void cpu_clone_regs(CPUPPCState *env, target_ulong newsp)
 #define XER_CA  29
 #define XER_CMP  8
 #define XER_BC   0
-#define xer_so  ((env->xer >> XER_SO)  &    1)
-#define xer_ov  ((env->xer >> XER_OV)  &    1)
-#define xer_ca  ((env->xer >> XER_CA)  &    1)
+#define xer_so  (env->so)
+#define xer_ov  (env->ov)
+#define xer_ca  (env->ca)
 #define xer_cmp ((env->xer >> XER_CMP) & 0xFF)
 #define xer_bc  ((env->xer >> XER_BC)  & 0x7F)
 
@@ -2087,6 +2090,19 @@ enum {
 
 /*****************************************************************************/
 
+static inline target_ulong cpu_read_xer(CPUPPCState *env)
+{
+    return env->xer | (env->so << XER_SO) | (env->ov << XER_OV) | (env->ca << XER_CA);
+}
+
+static inline void cpu_write_xer(CPUPPCState *env, target_ulong xer)
+{
+    env->so = (xer >> XER_SO) & 1;
+    env->ov = (xer >> XER_OV) & 1;
+    env->ca = (xer >> XER_CA) & 1;
+    env->xer = xer & ~((1u << XER_SO) | (1u << XER_OV) | (1u << XER_CA));
+}
+
 static inline void cpu_get_tb_cpu_state(CPUPPCState *env, target_ulong *pc,
                                         target_ulong *cs_base, int *flags)
 {
diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
index 8653151..54eca9b 100644
--- a/target-ppc/int_helper.c
+++ b/target-ppc/int_helper.c
@@ -33,9 +33,9 @@ uint64_t helper_mulldo(CPUPPCState *env, uint64_t arg1, uint64_t arg2)
     muls64(&tl, (uint64_t *)&th, arg1, arg2);
     /* If th != 0 && th != -1, then we had an overflow */
     if (likely((uint64_t)(th + 1) <= 1)) {
-        env->xer &= ~(1 << XER_OV);
+        env->ov = 0;
     } else {
-        env->xer |= (1 << XER_OV) | (1 << XER_SO);
+        env->so = env->ov = 1;
     }
     return (int64_t)tl;
 }
@@ -64,21 +64,17 @@ target_ulong helper_sraw(CPUPPCState *env, target_ulong value,
             shift &= 0x1f;
             ret = (int32_t)value >> shift;
             if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
-                env->xer &= ~(1 << XER_CA);
+                env->ca = 0;
             } else {
-                env->xer |= (1 << XER_CA);
+                env->ca = 1;
             }
         } else {
             ret = (int32_t)value;
-            env->xer &= ~(1 << XER_CA);
+            env->ca = 0;
         }
     } else {
         ret = (int32_t)value >> 31;
-        if (ret) {
-            env->xer |= (1 << XER_CA);
-        } else {
-            env->xer &= ~(1 << XER_CA);
-        }
+        env->ca = (ret != 0);
     }
     return (target_long)ret;
 }
@@ -94,21 +90,17 @@ target_ulong helper_srad(CPUPPCState *env, target_ulong value,
             shift &= 0x3f;
             ret = (int64_t)value >> shift;
             if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
-                env->xer &= ~(1 << XER_CA);
+                env->ca = 0;
             } else {
-                env->xer |= (1 << XER_CA);
+                env->ca = 1;
             }
         } else {
             ret = (int64_t)value;
-            env->xer &= ~(1 << XER_CA);
+            env->ca = 0;
         }
     } else {
         ret = (int64_t)value >> 63;
-        if (ret) {
-            env->xer |= (1 << XER_CA);
-        } else {
-            env->xer &= ~(1 << XER_CA);
-        }
+        env->ca = (ret != 0);
     }
     return ret;
 }
@@ -188,16 +180,16 @@ target_ulong helper_divo(CPUPPCState *env, target_ulong arg1,
 
     if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
         (int32_t)arg2 == 0) {
-        env->xer |= (1 << XER_OV) | (1 << XER_SO);
+        env->so = env->ov = 1;
         env->spr[SPR_MQ] = 0;
         return INT32_MIN;
     } else {
         env->spr[SPR_MQ] = tmp % arg2;
         tmp /= (int32_t)arg2;
         if ((int32_t)tmp != tmp) {
-            env->xer |= (1 << XER_OV) | (1 << XER_SO);
+            env->so = env->ov = 1;
         } else {
-            env->xer &= ~(1 << XER_OV);
+            env->ov = 0;
         }
         return tmp;
     }
@@ -221,11 +213,11 @@ target_ulong helper_divso(CPUPPCState *env, target_ulong arg1,
 {
     if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
         (int32_t)arg2 == 0) {
-        env->xer |= (1 << XER_OV) | (1 << XER_SO);
+        env->so = env->ov = 1;
         env->spr[SPR_MQ] = 0;
         return INT32_MIN;
     } else {
-        env->xer &= ~(1 << XER_OV);
+        env->ov = 0;
         env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
         return (int32_t)arg1 / (int32_t)arg2;
     }
diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
index 2c64c63..8e64416 100644
--- a/target-ppc/kvm.c
+++ b/target-ppc/kvm.c
@@ -464,7 +464,7 @@ int kvm_arch_put_registers(CPUState *cs, int level)
 
     regs.ctr = env->ctr;
     regs.lr  = env->lr;
-    regs.xer = env->xer;
+    regs.xer = cpu_read_xer(env);
     regs.msr = env->msr;
     regs.pc = env->nip;
 
@@ -566,7 +566,7 @@ int kvm_arch_get_registers(CPUState *cs)
 
     env->ctr = regs.ctr;
     env->lr = regs.lr;
-    env->xer = regs.xer;
+    cpu_write_xer(env, regs.xer);
     env->msr = regs.msr;
     env->nip = regs.pc;
 
diff --git a/target-ppc/machine.c b/target-ppc/machine.c
index e014c0c..708a840 100644
--- a/target-ppc/machine.c
+++ b/target-ppc/machine.c
@@ -7,6 +7,7 @@ void cpu_save(QEMUFile *f, void *opaque)
     CPUPPCState *env = (CPUPPCState *)opaque;
     unsigned int i, j;
     uint32_t fpscr;
+    target_ulong xer;
 
     for (i = 0; i < 32; i++)
         qemu_put_betls(f, &env->gpr[i]);
@@ -18,7 +19,8 @@ void cpu_save(QEMUFile *f, void *opaque)
     qemu_put_betls(f, &env->ctr);
     for (i = 0; i < 8; i++)
         qemu_put_be32s(f, &env->crf[i]);
-    qemu_put_betls(f, &env->xer);
+    xer = cpu_read_xer(env);
+    qemu_put_betls(f, &xer);
     qemu_put_betls(f, &env->reserve_addr);
     qemu_put_betls(f, &env->msr);
     for (i = 0; i < 4; i++)
@@ -93,6 +95,7 @@ int cpu_load(QEMUFile *f, void *opaque, int version_id)
     unsigned int i, j;
     target_ulong sdr1;
     uint32_t fpscr;
+    target_ulong xer;
 
     for (i = 0; i < 32; i++)
         qemu_get_betls(f, &env->gpr[i]);
@@ -104,7 +107,8 @@ int cpu_load(QEMUFile *f, void *opaque, int version_id)
     qemu_get_betls(f, &env->ctr);
     for (i = 0; i < 8; i++)
         qemu_get_be32s(f, &env->crf[i]);
-    qemu_get_betls(f, &env->xer);
+    qemu_get_betls(f, &xer);
+    cpu_write_xer(env, xer);
     qemu_get_betls(f, &env->reserve_addr);
     qemu_get_betls(f, &env->msr);
     for (i = 0; i < 4; i++)
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 2673a89..0ac072c 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -66,7 +66,7 @@ static TCGv cpu_lr;
 #if defined(TARGET_PPC64)
 static TCGv cpu_cfar;
 #endif
-static TCGv cpu_xer;
+static TCGv cpu_xer, cpu_so, cpu_ov, cpu_ca;
 static TCGv cpu_reserve;
 static TCGv cpu_fpscr;
 static TCGv_i32 cpu_access_type;
@@ -158,6 +158,12 @@ void ppc_translate_init(void)
 
     cpu_xer = tcg_global_mem_new(TCG_AREG0,
                                  offsetof(CPUPPCState, xer), "xer");
+    cpu_so = tcg_global_mem_new(TCG_AREG0,
+                                offsetof(CPUPPCState, so), "SO");
+    cpu_ov = tcg_global_mem_new(TCG_AREG0,
+                                offsetof(CPUPPCState, ov), "OV");
+    cpu_ca = tcg_global_mem_new(TCG_AREG0,
+                                offsetof(CPUPPCState, ca), "CA");
 
     cpu_reserve = tcg_global_mem_new(TCG_AREG0,
                                      offsetof(CPUPPCState, reserve_addr),
@@ -592,9 +598,7 @@ static inline void gen_op_cmp(TCGv arg0, TCGv arg1, int s, int crf)
 {
     int l1, l2, l3;
 
-    tcg_gen_trunc_tl_i32(cpu_crf[crf], cpu_xer);
-    tcg_gen_shri_i32(cpu_crf[crf], cpu_crf[crf], XER_SO);
-    tcg_gen_andi_i32(cpu_crf[crf], cpu_crf[crf], 1);
+    tcg_gen_trunc_tl_i32(cpu_crf[crf], cpu_so);
 
     l1 = gen_new_label();
     l2 = gen_new_label();
@@ -747,7 +751,7 @@ static inline void gen_op_arith_compute_ov(DisasContext *ctx, TCGv arg0,
 
     l1 = gen_new_label();
     /* Start with XER OV disabled, the most likely case */
-    tcg_gen_andi_tl(cpu_xer, cpu_xer, ~(1 << XER_OV));
+    tcg_gen_movi_tl(cpu_ov, 0);
     t0 = tcg_temp_local_new();
     tcg_gen_xor_tl(t0, arg0, arg1);
 #if defined(TARGET_PPC64)
@@ -767,7 +771,8 @@ static inline void gen_op_arith_compute_ov(DisasContext *ctx, TCGv arg0,
         tcg_gen_brcondi_tl(TCG_COND_GE, t0, 0, l1);
     else
         tcg_gen_brcondi_tl(TCG_COND_LT, t0, 0, l1);
-    tcg_gen_ori_tl(cpu_xer, cpu_xer, (1 << XER_OV) | (1 << XER_SO));
+    tcg_gen_movi_tl(cpu_ov, 1);
+    tcg_gen_movi_tl(cpu_so, 1);
     gen_set_label(l1);
     tcg_temp_free(t0);
 }
@@ -790,7 +795,7 @@ static inline void gen_op_arith_compute_ca(DisasContext *ctx, TCGv arg1,
         } else {
             tcg_gen_brcond_tl(TCG_COND_GEU, t0, t1, l1);
         }
-        tcg_gen_ori_tl(cpu_xer, cpu_xer, 1 << XER_CA);
+        tcg_gen_movi_tl(cpu_ca, 1);
         gen_set_label(l1);
         tcg_temp_free(t0);
         tcg_temp_free(t1);
@@ -802,7 +807,7 @@ static inline void gen_op_arith_compute_ca(DisasContext *ctx, TCGv arg1,
         } else {
             tcg_gen_brcond_tl(TCG_COND_GEU, arg1, arg2, l1);
         }
-        tcg_gen_ori_tl(cpu_xer, cpu_xer, 1 << XER_CA);
+        tcg_gen_movi_tl(cpu_ca, 1);
         gen_set_label(l1);
     }
 }
@@ -823,21 +828,18 @@ static inline void gen_op_arith_add(DisasContext *ctx, TCGv ret, TCGv arg1,
 
     if (add_ca) {
         t1 = tcg_temp_local_new();
-        tcg_gen_andi_tl(t1, cpu_xer, (1 << XER_CA));
-        tcg_gen_shri_tl(t1, t1, XER_CA);
+        tcg_gen_mov_tl(t1, cpu_ca);
     } else {
         TCGV_UNUSED(t1);
     }
 
-    if (compute_ca && compute_ov) {
-        /* Start with XER CA and OV disabled, the most likely case */
-        tcg_gen_andi_tl(cpu_xer, cpu_xer, ~((1 << XER_CA) | (1 << XER_OV)));
-    } else if (compute_ca) {
+    if (compute_ca) {
         /* Start with XER CA disabled, the most likely case */
-        tcg_gen_andi_tl(cpu_xer, cpu_xer, ~(1 << XER_CA));
-    } else if (compute_ov) {
+        tcg_gen_movi_tl(cpu_ca, 0);
+    }
+    if (compute_ov) {
         /* Start with XER OV disabled, the most likely case */
-        tcg_gen_andi_tl(cpu_xer, cpu_xer, ~(1 << XER_OV));
+        tcg_gen_movi_tl(cpu_ov, 0);
     }
 
     tcg_gen_add_tl(t0, arg1, arg2);
@@ -915,8 +917,8 @@ static inline void gen_op_addic(DisasContext *ctx, TCGv ret, TCGv arg1,
 {
     target_long simm = SIMM(ctx->opcode);
 
-    /* Start with XER CA and OV disabled, the most likely case */
-    tcg_gen_andi_tl(cpu_xer, cpu_xer, ~(1 << XER_CA));
+    /* Start with XER CA disabled, the most likely case */
+    tcg_gen_movi_tl(cpu_ca, 0);
 
     if (likely(simm != 0)) {
         TCGv t0 = tcg_temp_local_new();
@@ -976,7 +978,7 @@ static inline void gen_op_arith_divw(DisasContext *ctx, TCGv ret, TCGv arg1,
         tcg_gen_divu_i32(t0, t0, t1);
     }
     if (compute_ov) {
-        tcg_gen_andi_tl(cpu_xer, cpu_xer, ~(1 << XER_OV));
+        tcg_gen_movi_tl(cpu_ov, 0);
     }
     tcg_gen_br(l2);
     gen_set_label(l1);
@@ -986,7 +988,8 @@ static inline void gen_op_arith_divw(DisasContext *ctx, TCGv ret, TCGv arg1,
         tcg_gen_movi_i32(t0, 0);
     }
     if (compute_ov) {
-        tcg_gen_ori_tl(cpu_xer, cpu_xer, (1 << XER_OV) | (1 << XER_SO));
+        tcg_gen_movi_tl(cpu_ov, 1);
+        tcg_gen_movi_tl(cpu_so, 1);
     }
     gen_set_label(l2);
     tcg_gen_extu_i32_tl(ret, t0);
@@ -1027,7 +1030,7 @@ static inline void gen_op_arith_divd(DisasContext *ctx, TCGv ret, TCGv arg1,
         tcg_gen_divu_i64(ret, arg1, arg2);
     }
     if (compute_ov) {
-        tcg_gen_andi_tl(cpu_xer, cpu_xer, ~(1 << XER_OV));
+        tcg_gen_movi_tl(cpu_ov, 0);
     }
     tcg_gen_br(l2);
     gen_set_label(l1);
@@ -1037,7 +1040,8 @@ static inline void gen_op_arith_divd(DisasContext *ctx, TCGv ret, TCGv arg1,
         tcg_gen_movi_i64(ret, 0);
     }
     if (compute_ov) {
-        tcg_gen_ori_tl(cpu_xer, cpu_xer, (1 << XER_OV) | (1 << XER_SO));
+        tcg_gen_movi_tl(cpu_ov, 1);
+        tcg_gen_movi_tl(cpu_so, 1);
     }
     gen_set_label(l2);
     if (unlikely(Rc(ctx->opcode) != 0))
@@ -1110,7 +1114,7 @@ static void gen_mullwo(DisasContext *ctx)
     t1 = tcg_temp_new_i64();
     l1 = gen_new_label();
     /* Start with XER OV disabled, the most likely case */
-    tcg_gen_andi_tl(cpu_xer, cpu_xer, ~(1 << XER_OV));
+    tcg_gen_movi_tl(cpu_ov, 0);
 #if defined(TARGET_PPC64)
     tcg_gen_ext32s_i64(t0, cpu_gpr[rA(ctx->opcode)]);
     tcg_gen_ext32s_i64(t1, cpu_gpr[rB(ctx->opcode)]);
@@ -1127,7 +1131,8 @@ static void gen_mullwo(DisasContext *ctx)
     tcg_gen_ext32s_i64(t1, t0);
     tcg_gen_brcond_i64(TCG_COND_EQ, t0, t1, l1);
 #endif
-    tcg_gen_ori_tl(cpu_xer, cpu_xer, (1 << XER_OV) | (1 << XER_SO));
+    tcg_gen_movi_tl(cpu_ov, 1);
+    tcg_gen_movi_tl(cpu_so, 1);
     gen_set_label(l1);
     tcg_temp_free_i64(t0);
     tcg_temp_free_i64(t1);
@@ -1206,13 +1211,14 @@ static inline void gen_op_arith_neg(DisasContext *ctx, TCGv ret, TCGv arg1,
     }
     tcg_gen_neg_tl(ret, arg1);
     if (ov_check) {
-        tcg_gen_andi_tl(cpu_xer, cpu_xer, ~(1 << XER_OV));
+        tcg_gen_movi_tl(cpu_ov, 0);
     }
     tcg_gen_br(l2);
     gen_set_label(l1);
     tcg_gen_mov_tl(ret, t0);
     if (ov_check) {
-        tcg_gen_ori_tl(cpu_xer, cpu_xer, (1 << XER_OV) | (1 << XER_SO));
+        tcg_gen_movi_tl(cpu_ov, 1);
+        tcg_gen_movi_tl(cpu_so, 1);
     }
     gen_set_label(l2);
     tcg_temp_free(t0);
@@ -1246,21 +1252,18 @@ static inline void gen_op_arith_subf(DisasContext *ctx, TCGv ret, TCGv arg1,
 
     if (add_ca) {
         t1 = tcg_temp_local_new();
-        tcg_gen_andi_tl(t1, cpu_xer, (1 << XER_CA));
-        tcg_gen_shri_tl(t1, t1, XER_CA);
+        tcg_gen_mov_tl(t1, cpu_ca);
     } else {
         TCGV_UNUSED(t1);
     }
 
-    if (compute_ca && compute_ov) {
-        /* Start with XER CA and OV disabled, the most likely case */
-        tcg_gen_andi_tl(cpu_xer, cpu_xer, ~((1 << XER_CA) | (1 << XER_OV)));
-    } else if (compute_ca) {
+    if (compute_ca) {
         /* Start with XER CA disabled, the most likely case */
-        tcg_gen_andi_tl(cpu_xer, cpu_xer, ~(1 << XER_CA));
-    } else if (compute_ov) {
+        tcg_gen_movi_tl(cpu_ca, 0);
+    }
+    if (compute_ov) {
         /* Start with XER OV disabled, the most likely case */
-        tcg_gen_andi_tl(cpu_xer, cpu_xer, ~(1 << XER_OV));
+        tcg_gen_movi_tl(cpu_ov, 0);
     }
 
     if (add_ca) {
@@ -1326,8 +1329,8 @@ GEN_INT_ARITH_SUBF_CONST(subfzeo, 0x16, 0, 1, 1, 1)
 /* subfic */
 static void gen_subfic(DisasContext *ctx)
 {
-    /* Start with XER CA and OV disabled, the most likely case */
-    tcg_gen_andi_tl(cpu_xer, cpu_xer, ~(1 << XER_CA));
+    /* Start with XER CA disabled, the most likely case */
+    tcg_gen_movi_tl(cpu_ca, 0);
     TCGv t0 = tcg_temp_local_new();
     TCGv t1 = tcg_const_local_tl(SIMM(ctx->opcode));
     tcg_gen_sub_tl(t0, t1, cpu_gpr[rA(ctx->opcode)]);
@@ -1891,17 +1894,17 @@ static void gen_srawi(DisasContext *ctx)
         tcg_gen_brcondi_tl(TCG_COND_GE, t0, 0, l1);
         tcg_gen_andi_tl(t0, cpu_gpr[rS(ctx->opcode)], (1ULL << sh) - 1);
         tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0, l1);
-        tcg_gen_ori_tl(cpu_xer, cpu_xer, 1 << XER_CA);
+        tcg_gen_movi_tl(cpu_ca, 1);
         tcg_gen_br(l2);
         gen_set_label(l1);
-        tcg_gen_andi_tl(cpu_xer, cpu_xer, ~(1 << XER_CA));
+        tcg_gen_movi_tl(cpu_ca, 0);
         gen_set_label(l2);
         tcg_gen_ext32s_tl(t0, cpu_gpr[rS(ctx->opcode)]);
         tcg_gen_sari_tl(cpu_gpr[rA(ctx->opcode)], t0, sh);
         tcg_temp_free(t0);
     } else {
         tcg_gen_mov_tl(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rS(ctx->opcode)]);
-        tcg_gen_andi_tl(cpu_xer, cpu_xer, ~(1 << XER_CA));
+        tcg_gen_movi_tl(cpu_ca, 0);
     }
     if (unlikely(Rc(ctx->opcode) != 0))
         gen_set_Rc0(ctx, cpu_gpr[rA(ctx->opcode)]);
@@ -1973,16 +1976,16 @@ static inline void gen_sradi(DisasContext *ctx, int n)
         tcg_gen_brcondi_tl(TCG_COND_GE, cpu_gpr[rS(ctx->opcode)], 0, l1);
         tcg_gen_andi_tl(t0, cpu_gpr[rS(ctx->opcode)], (1ULL << sh) - 1);
         tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0, l1);
-        tcg_gen_ori_tl(cpu_xer, cpu_xer, 1 << XER_CA);
+        tcg_gen_movi_tl(cpu_ca, 1);
         tcg_gen_br(l2);
         gen_set_label(l1);
-        tcg_gen_andi_tl(cpu_xer, cpu_xer, ~(1 << XER_CA));
+        tcg_gen_movi_tl(cpu_ca, 0);
         gen_set_label(l2);
         tcg_temp_free(t0);
         tcg_gen_sari_tl(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rS(ctx->opcode)], sh);
     } else {
         tcg_gen_mov_tl(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rS(ctx->opcode)]);
-        tcg_gen_andi_tl(cpu_xer, cpu_xer, ~(1 << XER_CA));
+        tcg_gen_movi_tl(cpu_ca, 0);
     }
     if (unlikely(Rc(ctx->opcode) != 0))
         gen_set_Rc0(ctx, cpu_gpr[rA(ctx->opcode)]);
@@ -3170,9 +3173,7 @@ static void gen_stwcx_(DisasContext *ctx)
     {
         int l1;
 
-        tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_xer);
-        tcg_gen_shri_i32(cpu_crf[0], cpu_crf[0], XER_SO);
-        tcg_gen_andi_i32(cpu_crf[0], cpu_crf[0], 1);
+        tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_so);
         l1 = gen_new_label();
         tcg_gen_brcond_tl(TCG_COND_NE, t0, cpu_reserve, l1);
         tcg_gen_ori_i32(cpu_crf[0], cpu_crf[0], 1 << CRF_EQ);
@@ -3213,9 +3214,7 @@ static void gen_stdcx_(DisasContext *ctx)
 #else
     {
         int l1;
-        tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_xer);
-        tcg_gen_shri_i32(cpu_crf[0], cpu_crf[0], XER_SO);
-        tcg_gen_andi_i32(cpu_crf[0], cpu_crf[0], 1);
+        tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_so);
         l1 = gen_new_label();
         tcg_gen_brcond_tl(TCG_COND_NE, t0, cpu_reserve, l1);
         tcg_gen_ori_i32(cpu_crf[0], cpu_crf[0], 1 << CRF_EQ);
@@ -3791,12 +3790,55 @@ static void gen_tdi(DisasContext *ctx)
 
 /***                          Processor control                            ***/
 
+static void gen_read_xer(TCGv dst)
+{
+    TCGv t0 = tcg_temp_new();
+    TCGv t1 = tcg_temp_new();
+    TCGv t2 = tcg_temp_new();
+    tcg_gen_mov_tl(dst, cpu_xer);
+    tcg_gen_shli_tl(t0, cpu_so, XER_SO);
+    tcg_gen_shli_tl(t1, cpu_ov, XER_OV);
+    tcg_gen_shli_tl(t2, cpu_ca, XER_CA);
+    tcg_gen_or_tl(t0, t0, t1);
+    tcg_gen_or_tl(dst, dst, t2);
+    tcg_gen_or_tl(dst, dst, t0);
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+    tcg_temp_free(t2);
+}
+
+static void gen_write_xer(TCGv src)
+{
+    tcg_gen_andi_tl(cpu_xer, src,
+                    ~((1u << XER_SO) | (1u << XER_OV) | (1u << XER_CA)));
+    tcg_gen_shri_tl(cpu_so, src, XER_SO);
+    tcg_gen_shri_tl(cpu_ov, src, XER_OV);
+    tcg_gen_shri_tl(cpu_ca, src, XER_CA);
+    tcg_gen_andi_tl(cpu_so, cpu_so, 1);
+    tcg_gen_andi_tl(cpu_ov, cpu_ov, 1);
+    tcg_gen_andi_tl(cpu_ca, cpu_ca, 1);
+}
+
 /* mcrxr */
 static void gen_mcrxr(DisasContext *ctx)
 {
-    tcg_gen_trunc_tl_i32(cpu_crf[crfD(ctx->opcode)], cpu_xer);
-    tcg_gen_shri_i32(cpu_crf[crfD(ctx->opcode)], cpu_crf[crfD(ctx->opcode)], XER_CA);
-    tcg_gen_andi_tl(cpu_xer, cpu_xer, ~(1 << XER_SO | 1 << XER_OV | 1 << XER_CA));
+    TCGv_i32 t0 = tcg_temp_new_i32();
+    TCGv_i32 t1 = tcg_temp_new_i32();
+    TCGv_i32 dst = cpu_crf[crfD(ctx->opcode)];
+
+    tcg_gen_trunc_tl_i32(t0, cpu_so);
+    tcg_gen_trunc_tl_i32(t1, cpu_ov);
+    tcg_gen_trunc_tl_i32(dst, cpu_ca);
+    tcg_gen_shri_i32(t0, t0, 2);
+    tcg_gen_shri_i32(t1, t1, 1);
+    tcg_gen_or_i32(dst, dst, t0);
+    tcg_gen_or_i32(dst, dst, t1);
+    tcg_temp_free_i32(t0);
+    tcg_temp_free_i32(t1);
+
+    tcg_gen_movi_tl(cpu_so, 0);
+    tcg_gen_movi_tl(cpu_ov, 0);
+    tcg_gen_movi_tl(cpu_ca, 0);
 }
 
 /* mfcr mfocrf */
@@ -4526,10 +4568,11 @@ static void gen_abso(DisasContext *ctx)
     int l2 = gen_new_label();
     int l3 = gen_new_label();
     /* Start with XER OV disabled, the most likely case */
-    tcg_gen_andi_tl(cpu_xer, cpu_xer, ~(1 << XER_OV));
+    tcg_gen_movi_tl(cpu_ov, 0);
     tcg_gen_brcondi_tl(TCG_COND_GE, cpu_gpr[rA(ctx->opcode)], 0, l2);
     tcg_gen_brcondi_tl(TCG_COND_NE, cpu_gpr[rA(ctx->opcode)], 0x80000000, l1);
-    tcg_gen_ori_tl(cpu_xer, cpu_xer, (1 << XER_OV) | (1 << XER_SO));
+    tcg_gen_movi_tl(cpu_ov, 1);
+    tcg_gen_movi_tl(cpu_so, 1);
     tcg_gen_br(l2);
     gen_set_label(l1);
     tcg_gen_neg_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)]);
@@ -4610,7 +4653,7 @@ static void gen_dozo(DisasContext *ctx)
     TCGv t1 = tcg_temp_new();
     TCGv t2 = tcg_temp_new();
     /* Start with XER OV disabled, the most likely case */
-    tcg_gen_andi_tl(cpu_xer, cpu_xer, ~(1 << XER_OV));
+    tcg_gen_movi_tl(cpu_ov, 0);
     tcg_gen_brcond_tl(TCG_COND_GE, cpu_gpr[rB(ctx->opcode)], cpu_gpr[rA(ctx->opcode)], l1);
     tcg_gen_sub_tl(t0, cpu_gpr[rB(ctx->opcode)], cpu_gpr[rA(ctx->opcode)]);
     tcg_gen_xor_tl(t1, cpu_gpr[rB(ctx->opcode)], cpu_gpr[rA(ctx->opcode)]);
@@ -4618,7 +4661,8 @@ static void gen_dozo(DisasContext *ctx)
     tcg_gen_andc_tl(t1, t1, t2);
     tcg_gen_mov_tl(cpu_gpr[rD(ctx->opcode)], t0);
     tcg_gen_brcondi_tl(TCG_COND_GE, t1, 0, l2);
-    tcg_gen_ori_tl(cpu_xer, cpu_xer, (1 << XER_OV) | (1 << XER_SO));
+    tcg_gen_movi_tl(cpu_ov, 1);
+    tcg_gen_movi_tl(cpu_so, 1);
     tcg_gen_br(l2);
     gen_set_label(l1);
     tcg_gen_movi_tl(cpu_gpr[rD(ctx->opcode)], 0);
@@ -4736,7 +4780,7 @@ static void gen_mulo(DisasContext *ctx)
     TCGv_i64 t1 = tcg_temp_new_i64();
     TCGv t2 = tcg_temp_new();
     /* Start with XER OV disabled, the most likely case */
-    tcg_gen_andi_tl(cpu_xer, cpu_xer, ~(1 << XER_OV));
+    tcg_gen_movi_tl(cpu_ov, 0);
     tcg_gen_extu_tl_i64(t0, cpu_gpr[rA(ctx->opcode)]);
     tcg_gen_extu_tl_i64(t1, cpu_gpr[rB(ctx->opcode)]);
     tcg_gen_mul_i64(t0, t0, t1);
@@ -4746,7 +4790,8 @@ static void gen_mulo(DisasContext *ctx)
     tcg_gen_trunc_i64_tl(cpu_gpr[rD(ctx->opcode)], t1);
     tcg_gen_ext32s_i64(t1, t0);
     tcg_gen_brcond_i64(TCG_COND_EQ, t0, t1, l1);
-    tcg_gen_ori_tl(cpu_xer, cpu_xer, (1 << XER_OV) | (1 << XER_SO));
+    tcg_gen_movi_tl(cpu_ov, 1);
+    tcg_gen_movi_tl(cpu_so, 1);
     gen_set_label(l1);
     tcg_temp_free_i64(t0);
     tcg_temp_free_i64(t1);
@@ -4782,7 +4827,7 @@ static void gen_nabso(DisasContext *ctx)
     tcg_gen_neg_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)]);
     gen_set_label(l2);
     /* nabs never overflows */
-    tcg_gen_andi_tl(cpu_xer, cpu_xer, ~(1 << XER_OV));
+    tcg_gen_movi_tl(cpu_ov, 0);
     if (unlikely(Rc(ctx->opcode) != 0))
         gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]);
 }
@@ -4959,10 +5004,10 @@ static void gen_sraiq(DisasContext *ctx)
     tcg_gen_shli_tl(t1, cpu_gpr[rS(ctx->opcode)], 32 - sh);
     tcg_gen_or_tl(t0, t0, t1);
     gen_store_spr(SPR_MQ, t0);
-    tcg_gen_andi_tl(cpu_xer, cpu_xer, ~(1 << XER_CA));
+    tcg_gen_movi_tl(cpu_ca, 0);
     tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, l1);
     tcg_gen_brcondi_tl(TCG_COND_GE, cpu_gpr[rS(ctx->opcode)], 0, l1);
-    tcg_gen_ori_tl(cpu_xer, cpu_xer, (1 << XER_CA));
+    tcg_gen_movi_tl(cpu_ca, 1);
     gen_set_label(l1);
     tcg_gen_sari_tl(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rS(ctx->opcode)], sh);
     tcg_temp_free(t0);
@@ -4993,10 +5038,10 @@ static void gen_sraq(DisasContext *ctx)
     gen_set_label(l1);
     tcg_temp_free(t0);
     tcg_gen_mov_tl(cpu_gpr[rA(ctx->opcode)], t1);
-    tcg_gen_andi_tl(cpu_xer, cpu_xer, ~(1 << XER_CA));
+    tcg_gen_movi_tl(cpu_ca, 0);
     tcg_gen_brcondi_tl(TCG_COND_GE, t1, 0, l2);
     tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, l2);
-    tcg_gen_ori_tl(cpu_xer, cpu_xer, (1 << XER_CA));
+    tcg_gen_movi_tl(cpu_ca, 1);
     gen_set_label(l2);
     tcg_temp_free(t1);
     tcg_temp_free(t2);
@@ -5565,7 +5610,7 @@ static inline void gen_405_mulladd_insn(DisasContext *ctx, int opc2, int opc3,
 
             if (opc3 & 0x10) {
                 /* Start with XER OV disabled, the most likely case */
-                tcg_gen_andi_tl(cpu_xer, cpu_xer, ~(1 << XER_OV));
+                tcg_gen_movi_tl(cpu_ov, 0);
             }
             if (opc3 & 0x01) {
                 /* Signed */
@@ -5588,7 +5633,8 @@ static inline void gen_405_mulladd_insn(DisasContext *ctx, int opc2, int opc3,
             }
             if (opc3 & 0x10) {
                 /* Check overflow */
-                tcg_gen_ori_tl(cpu_xer, cpu_xer, (1 << XER_OV) | (1 << XER_SO));
+                tcg_gen_movi_tl(cpu_ov, 1);
+                tcg_gen_movi_tl(cpu_so, 1);
             }
             gen_set_label(l1);
             tcg_gen_mov_tl(cpu_gpr[rt], t0);
@@ -5976,9 +6022,7 @@ static void gen_tlbsx_40x(DisasContext *ctx)
     tcg_temp_free(t0);
     if (Rc(ctx->opcode)) {
         int l1 = gen_new_label();
-        tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_xer);
-        tcg_gen_shri_i32(cpu_crf[0], cpu_crf[0], XER_SO);
-        tcg_gen_andi_i32(cpu_crf[0], cpu_crf[0], 1);
+        tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_so);
         tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_gpr[rD(ctx->opcode)], -1, l1);
         tcg_gen_ori_i32(cpu_crf[0], cpu_crf[0], 0x02);
         gen_set_label(l1);
@@ -6059,9 +6103,7 @@ static void gen_tlbsx_440(DisasContext *ctx)
     tcg_temp_free(t0);
     if (Rc(ctx->opcode)) {
         int l1 = gen_new_label();
-        tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_xer);
-        tcg_gen_shri_i32(cpu_crf[0], cpu_crf[0], XER_SO);
-        tcg_gen_andi_i32(cpu_crf[0], cpu_crf[0], 1);
+        tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_so);
         tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_gpr[rD(ctx->opcode)], -1, l1);
         tcg_gen_ori_i32(cpu_crf[0], cpu_crf[0], 0x02);
         gen_set_label(l1);
@@ -9410,7 +9452,7 @@ void cpu_dump_state (CPUPPCState *env, FILE *f, fprintf_function cpu_fprintf,
 
     cpu_fprintf(f, "NIP " TARGET_FMT_lx "   LR " TARGET_FMT_lx " CTR "
                 TARGET_FMT_lx " XER " TARGET_FMT_lx "\n",
-                env->nip, env->lr, env->ctr, env->xer);
+                env->nip, env->lr, env->ctr, cpu_read_xer(env));
     cpu_fprintf(f, "MSR " TARGET_FMT_lx " HID0 " TARGET_FMT_lx "  HF "
                 TARGET_FMT_lx " idx %d\n", env->msr, env->spr[SPR_HID0],
                 env->hflags, env->mmu_idx);
diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index 5df2057..f5fc9b1 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -118,12 +118,12 @@ static void spr_write_clear (void *opaque, int sprn, int gprn)
 /* XER */
 static void spr_read_xer (void *opaque, int gprn, int sprn)
 {
-    tcg_gen_mov_tl(cpu_gpr[gprn], cpu_xer);
+    gen_read_xer(cpu_gpr[gprn]);
 }
 
 static void spr_write_xer (void *opaque, int sprn, int gprn)
 {
-    tcg_gen_mov_tl(cpu_xer, cpu_gpr[gprn]);
+    gen_write_xer(cpu_gpr[gprn]);
 }
 
 /* LR */
commit 23ad1d5d3c00cd07ab7aedc128565c6029802c30
Author: Richard Henderson <rth at twiddle.net>
Date:   Tue Feb 19 23:52:12 2013 -0800

    target-ppc: Use mul*2 in mulh* insns
    
    Cc: Alexander Graf <agraf at suse.de>
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/target-ppc/helper.h b/target-ppc/helper.h
index 18e0394..fcf372a 100644
--- a/target-ppc/helper.h
+++ b/target-ppc/helper.h
@@ -30,8 +30,6 @@ DEF_HELPER_2(icbi, void, env, tl)
 DEF_HELPER_5(lscbx, tl, env, tl, i32, i32, i32)
 
 #if defined(TARGET_PPC64)
-DEF_HELPER_FLAGS_2(mulhd, TCG_CALL_NO_RWG_SE, i64, i64, i64)
-DEF_HELPER_FLAGS_2(mulhdu, TCG_CALL_NO_RWG_SE, i64, i64, i64)
 DEF_HELPER_3(mulldo, i64, env, i64, i64)
 #endif
 
diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
index 783079d..8653151 100644
--- a/target-ppc/int_helper.c
+++ b/target-ppc/int_helper.c
@@ -25,24 +25,6 @@
 /* Fixed point operations helpers */
 #if defined(TARGET_PPC64)
 
-/* multiply high word */
-uint64_t helper_mulhd(uint64_t arg1, uint64_t arg2)
-{
-    uint64_t tl, th;
-
-    muls64(&tl, &th, arg1, arg2);
-    return th;
-}
-
-/* multiply high word unsigned */
-uint64_t helper_mulhdu(uint64_t arg1, uint64_t arg2)
-{
-    uint64_t tl, th;
-
-    mulu64(&tl, &th, arg1, arg2);
-    return th;
-}
-
 uint64_t helper_mulldo(CPUPPCState *env, uint64_t arg1, uint64_t arg2)
 {
     int64_t th;
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 2ac5794..2673a89 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -1061,24 +1061,15 @@ GEN_INT_ARITH_DIVD(divdo, 0x1F, 1, 1);
 /* mulhw  mulhw. */
 static void gen_mulhw(DisasContext *ctx)
 {
-    TCGv_i64 t0, t1;
+    TCGv_i32 t0 = tcg_temp_new_i32();
+    TCGv_i32 t1 = tcg_temp_new_i32();
 
-    t0 = tcg_temp_new_i64();
-    t1 = tcg_temp_new_i64();
-#if defined(TARGET_PPC64)
-    tcg_gen_ext32s_tl(t0, cpu_gpr[rA(ctx->opcode)]);
-    tcg_gen_ext32s_tl(t1, cpu_gpr[rB(ctx->opcode)]);
-    tcg_gen_mul_i64(t0, t0, t1);
-    tcg_gen_shri_i64(cpu_gpr[rD(ctx->opcode)], t0, 32);
-#else
-    tcg_gen_ext_tl_i64(t0, cpu_gpr[rA(ctx->opcode)]);
-    tcg_gen_ext_tl_i64(t1, cpu_gpr[rB(ctx->opcode)]);
-    tcg_gen_mul_i64(t0, t0, t1);
-    tcg_gen_shri_i64(t0, t0, 32);
-    tcg_gen_trunc_i64_tl(cpu_gpr[rD(ctx->opcode)], t0);
-#endif
-    tcg_temp_free_i64(t0);
-    tcg_temp_free_i64(t1);
+    tcg_gen_trunc_tl_i32(t0, cpu_gpr[rA(ctx->opcode)]);
+    tcg_gen_trunc_tl_i32(t1, cpu_gpr[rB(ctx->opcode)]);
+    tcg_gen_muls2_i32(t0, t1, t0, t1);
+    tcg_gen_extu_i32_tl(cpu_gpr[rD(ctx->opcode)], t1);
+    tcg_temp_free_i32(t0);
+    tcg_temp_free_i32(t1);
     if (unlikely(Rc(ctx->opcode) != 0))
         gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]);
 }
@@ -1086,24 +1077,15 @@ static void gen_mulhw(DisasContext *ctx)
 /* mulhwu  mulhwu.  */
 static void gen_mulhwu(DisasContext *ctx)
 {
-    TCGv_i64 t0, t1;
+    TCGv_i32 t0 = tcg_temp_new_i32();
+    TCGv_i32 t1 = tcg_temp_new_i32();
 
-    t0 = tcg_temp_new_i64();
-    t1 = tcg_temp_new_i64();
-#if defined(TARGET_PPC64)
-    tcg_gen_ext32u_i64(t0, cpu_gpr[rA(ctx->opcode)]);
-    tcg_gen_ext32u_i64(t1, cpu_gpr[rB(ctx->opcode)]);
-    tcg_gen_mul_i64(t0, t0, t1);
-    tcg_gen_shri_i64(cpu_gpr[rD(ctx->opcode)], t0, 32);
-#else
-    tcg_gen_extu_tl_i64(t0, cpu_gpr[rA(ctx->opcode)]);
-    tcg_gen_extu_tl_i64(t1, cpu_gpr[rB(ctx->opcode)]);
-    tcg_gen_mul_i64(t0, t0, t1);
-    tcg_gen_shri_i64(t0, t0, 32);
-    tcg_gen_trunc_i64_tl(cpu_gpr[rD(ctx->opcode)], t0);
-#endif
-    tcg_temp_free_i64(t0);
-    tcg_temp_free_i64(t1);
+    tcg_gen_trunc_tl_i32(t0, cpu_gpr[rA(ctx->opcode)]);
+    tcg_gen_trunc_tl_i32(t1, cpu_gpr[rB(ctx->opcode)]);
+    tcg_gen_mulu2_i32(t0, t1, t0, t1);
+    tcg_gen_extu_i32_tl(cpu_gpr[rD(ctx->opcode)], t1);
+    tcg_temp_free_i32(t0);
+    tcg_temp_free_i32(t1);
     if (unlikely(Rc(ctx->opcode) != 0))
         gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]);
 }
@@ -1159,19 +1141,31 @@ static void gen_mulli(DisasContext *ctx)
     tcg_gen_muli_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)],
                     SIMM(ctx->opcode));
 }
+
 #if defined(TARGET_PPC64)
-#define GEN_INT_ARITH_MUL_HELPER(name, opc3)                                  \
-static void glue(gen_, name)(DisasContext *ctx)                                       \
-{                                                                             \
-    gen_helper_##name (cpu_gpr[rD(ctx->opcode)],                              \
-                       cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)]);   \
-    if (unlikely(Rc(ctx->opcode) != 0))                                       \
-        gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]);                           \
-}
 /* mulhd  mulhd. */
-GEN_INT_ARITH_MUL_HELPER(mulhdu, 0x00);
+static void gen_mulhd(DisasContext *ctx)
+{
+    TCGv lo = tcg_temp_new();
+    tcg_gen_muls2_tl(lo, cpu_gpr[rD(ctx->opcode)],
+                     cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)]);
+    tcg_temp_free(lo);
+    if (unlikely(Rc(ctx->opcode) != 0)) {
+        gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]);
+    }
+}
+
 /* mulhdu  mulhdu. */
-GEN_INT_ARITH_MUL_HELPER(mulhd, 0x02);
+static void gen_mulhdu(DisasContext *ctx)
+{
+    TCGv lo = tcg_temp_new();
+    tcg_gen_mulu2_tl(lo, cpu_gpr[rD(ctx->opcode)],
+                     cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)]);
+    tcg_temp_free(lo);
+    if (unlikely(Rc(ctx->opcode) != 0)) {
+        gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]);
+    }
+}
 
 /* mulld  mulld. */
 static void gen_mulld(DisasContext *ctx)
commit bf45f97133b7f81d27711971a9e28d60528d90c8
Author: Richard Henderson <rth at twiddle.net>
Date:   Tue Feb 19 23:52:11 2013 -0800

    target-cris: Use mul*2 in mul* insns
    
    Cc: Edgar E. Iglesias <edgar.iglesias at gmail.com>
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/target-cris/translate.c b/target-cris/translate.c
index 2cf01a5..14c167f 100644
--- a/target-cris/translate.c
+++ b/target-cris/translate.c
@@ -340,46 +340,6 @@ static void t_gen_asr(TCGv d, TCGv a, TCGv b)
     tcg_temp_free(t_31);
 }
 
-/* 64-bit signed mul, lower result in d and upper in d2.  */
-static void t_gen_muls(TCGv d, TCGv d2, TCGv a, TCGv b)
-{
-    TCGv_i64 t0, t1;
-
-    t0 = tcg_temp_new_i64();
-    t1 = tcg_temp_new_i64();
-
-    tcg_gen_ext_i32_i64(t0, a);
-    tcg_gen_ext_i32_i64(t1, b);
-    tcg_gen_mul_i64(t0, t0, t1);
-
-    tcg_gen_trunc_i64_i32(d, t0);
-    tcg_gen_shri_i64(t0, t0, 32);
-    tcg_gen_trunc_i64_i32(d2, t0);
-
-    tcg_temp_free_i64(t0);
-    tcg_temp_free_i64(t1);
-}
-
-/* 64-bit unsigned muls, lower result in d and upper in d2.  */
-static void t_gen_mulu(TCGv d, TCGv d2, TCGv a, TCGv b)
-{
-    TCGv_i64 t0, t1;
-
-    t0 = tcg_temp_new_i64();
-    t1 = tcg_temp_new_i64();
-
-    tcg_gen_extu_i32_i64(t0, a);
-    tcg_gen_extu_i32_i64(t1, b);
-    tcg_gen_mul_i64(t0, t0, t1);
-
-    tcg_gen_trunc_i64_i32(d, t0);
-    tcg_gen_shri_i64(t0, t0, 32);
-    tcg_gen_trunc_i64_i32(d2, t0);
-
-    tcg_temp_free_i64(t0);
-    tcg_temp_free_i64(t1);
-}
-
 static void t_gen_cris_dstep(TCGv d, TCGv a, TCGv b)
 {
     int l1;
@@ -832,10 +792,10 @@ static void cris_alu_op_exec(DisasContext *dc, int op,
         gen_helper_lz(dst, b);
         break;
     case CC_OP_MULS:
-        t_gen_muls(dst, cpu_PR[PR_MOF], a, b);
+        tcg_gen_muls2_tl(dst, cpu_PR[PR_MOF], a, b);
         break;
     case CC_OP_MULU:
-        t_gen_mulu(dst, cpu_PR[PR_MOF], a, b);
+        tcg_gen_mulu2_tl(dst, cpu_PR[PR_MOF], a, b);
         break;
     case CC_OP_DSTEP:
         t_gen_cris_dstep(dst, a, b);
commit ce1dd5d1bbb0a3769566cb6967714c8c8c97a815
Author: Richard Henderson <rth at twiddle.net>
Date:   Tue Feb 19 23:52:10 2013 -0800

    target-mips: Use mul[us]2 in [D]MULT[U] insns
    
    Cc: Aurelien Jarno <aurelien at aurel32.net>
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/target-mips/helper.h b/target-mips/helper.h
index cd48738..ed75e2c 100644
--- a/target-mips/helper.h
+++ b/target-mips/helper.h
@@ -24,8 +24,6 @@ DEF_HELPER_FLAGS_1(clz, TCG_CALL_NO_RWG_SE, tl, tl)
 #ifdef TARGET_MIPS64
 DEF_HELPER_FLAGS_1(dclo, TCG_CALL_NO_RWG_SE, tl, tl)
 DEF_HELPER_FLAGS_1(dclz, TCG_CALL_NO_RWG_SE, tl, tl)
-DEF_HELPER_3(dmult, void, env, tl, tl)
-DEF_HELPER_3(dmultu, void, env, tl, tl)
 #endif
 
 DEF_HELPER_3(muls, tl, env, tl, tl)
diff --git a/target-mips/op_helper.c b/target-mips/op_helper.c
index 526f84f..45cbb2f 100644
--- a/target-mips/op_helper.c
+++ b/target-mips/op_helper.c
@@ -267,18 +267,6 @@ target_ulong helper_mulshiu(CPUMIPSState *env, target_ulong arg1,
                        (uint64_t)(uint32_t)arg2);
 }
 
-#ifdef TARGET_MIPS64
-void helper_dmult(CPUMIPSState *env, target_ulong arg1, target_ulong arg2)
-{
-    muls64(&(env->active_tc.LO[0]), &(env->active_tc.HI[0]), arg1, arg2);
-}
-
-void helper_dmultu(CPUMIPSState *env, target_ulong arg1, target_ulong arg2)
-{
-    mulu64(&(env->active_tc.LO[0]), &(env->active_tc.HI[0]), arg1, arg2);
-}
-#endif
-
 #ifndef CONFIG_USER_ONLY
 
 static inline hwaddr do_translate_address(CPUMIPSState *env,
diff --git a/target-mips/translate.c b/target-mips/translate.c
index 4ee9615..f10a533 100644
--- a/target-mips/translate.c
+++ b/target-mips/translate.c
@@ -2715,47 +2715,39 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
         break;
     case OPC_MULT:
         {
-            TCGv_i64 t2 = tcg_temp_new_i64();
-            TCGv_i64 t3 = tcg_temp_new_i64();
+            TCGv_i32 t2 = tcg_temp_new_i32();
+            TCGv_i32 t3 = tcg_temp_new_i32();
             acc = ((ctx->opcode) >> 11) & 0x03;
             if (acc != 0) {
                 check_dsp(ctx);
             }
 
-            tcg_gen_ext_tl_i64(t2, t0);
-            tcg_gen_ext_tl_i64(t3, t1);
-            tcg_gen_mul_i64(t2, t2, t3);
-            tcg_temp_free_i64(t3);
-            tcg_gen_trunc_i64_tl(t0, t2);
-            tcg_gen_shri_i64(t2, t2, 32);
-            tcg_gen_trunc_i64_tl(t1, t2);
-            tcg_temp_free_i64(t2);
-            tcg_gen_ext32s_tl(cpu_LO[acc], t0);
-            tcg_gen_ext32s_tl(cpu_HI[acc], t1);
+            tcg_gen_trunc_tl_i32(t2, t0);
+            tcg_gen_trunc_tl_i32(t3, t1);
+            tcg_gen_muls2_i32(t2, t3, t2, t3);
+            tcg_gen_ext_i32_tl(cpu_LO[acc], t2);
+            tcg_gen_ext_i32_tl(cpu_HI[acc], t3);
+            tcg_temp_free_i32(t2);
+            tcg_temp_free_i32(t3);
         }
         opn = "mult";
         break;
     case OPC_MULTU:
         {
-            TCGv_i64 t2 = tcg_temp_new_i64();
-            TCGv_i64 t3 = tcg_temp_new_i64();
+            TCGv_i32 t2 = tcg_temp_new_i32();
+            TCGv_i32 t3 = tcg_temp_new_i32();
             acc = ((ctx->opcode) >> 11) & 0x03;
             if (acc != 0) {
                 check_dsp(ctx);
             }
 
-            tcg_gen_ext32u_tl(t0, t0);
-            tcg_gen_ext32u_tl(t1, t1);
-            tcg_gen_extu_tl_i64(t2, t0);
-            tcg_gen_extu_tl_i64(t3, t1);
-            tcg_gen_mul_i64(t2, t2, t3);
-            tcg_temp_free_i64(t3);
-            tcg_gen_trunc_i64_tl(t0, t2);
-            tcg_gen_shri_i64(t2, t2, 32);
-            tcg_gen_trunc_i64_tl(t1, t2);
-            tcg_temp_free_i64(t2);
-            tcg_gen_ext32s_tl(cpu_LO[acc], t0);
-            tcg_gen_ext32s_tl(cpu_HI[acc], t1);
+            tcg_gen_trunc_tl_i32(t2, t0);
+            tcg_gen_trunc_tl_i32(t3, t1);
+            tcg_gen_mulu2_i32(t2, t3, t2, t3);
+            tcg_gen_ext_i32_tl(cpu_LO[acc], t2);
+            tcg_gen_ext_i32_tl(cpu_HI[acc], t3);
+            tcg_temp_free_i32(t2);
+            tcg_temp_free_i32(t3);
         }
         opn = "multu";
         break;
@@ -2791,11 +2783,11 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
         opn = "ddivu";
         break;
     case OPC_DMULT:
-        gen_helper_dmult(cpu_env, t0, t1);
+        tcg_gen_muls2_i64(cpu_LO[0], cpu_HI[0], t0, t1);
         opn = "dmult";
         break;
     case OPC_DMULTU:
-        gen_helper_dmultu(cpu_env, t0, t1);
+        tcg_gen_mulu2_i64(cpu_LO[0], cpu_HI[0], t0, t1);
         opn = "dmultu";
         break;
 #endif
commit 2de68a4900ef6eb67380b0c128abfe1976bc66e8
Author: Richard Henderson <rth at twiddle.net>
Date:   Tue Feb 19 23:52:09 2013 -0800

    target-arm: Implement sbc_cc inline
    
    Use sub2 if available, otherwise use 64-bit arithmetic.
    
    Cc: Peter Maydell <peter.maydell at linaro.org>
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/target-arm/helper.h b/target-arm/helper.h
index 507bb9c..63ae13a 100644
--- a/target-arm/helper.h
+++ b/target-arm/helper.h
@@ -140,8 +140,6 @@ DEF_HELPER_2(recpe_u32, i32, i32, env)
 DEF_HELPER_2(rsqrte_u32, i32, i32, env)
 DEF_HELPER_5(neon_tbl, i32, env, i32, i32, i32, i32)
 
-DEF_HELPER_3(sbc_cc, i32, env, i32, i32)
-
 DEF_HELPER_3(shl_cc, i32, env, i32, i32)
 DEF_HELPER_3(shr_cc, i32, env, i32, i32)
 DEF_HELPER_3(sar_cc, i32, env, i32, i32)
diff --git a/target-arm/op_helper.c b/target-arm/op_helper.c
index 49fc036..a522313 100644
--- a/target-arm/op_helper.c
+++ b/target-arm/op_helper.c
@@ -315,21 +315,6 @@ uint64_t HELPER(get_cp_reg64)(CPUARMState *env, void *rip)
    The only way to do that in TCG is a conditional branch, which clobbers
    all our temporaries.  For now implement these as helper functions.  */
 
-uint32_t HELPER(sbc_cc)(CPUARMState *env, uint32_t a, uint32_t b)
-{
-    uint32_t result;
-    if (!env->CF) {
-        result = a - b - 1;
-        env->CF = a > b;
-    } else {
-        result = a - b;
-        env->CF = a >= b;
-    }
-    env->VF = (a ^ b) & (a ^ result);
-    env->NF = env->ZF = result;
-    return result;
-}
-
 /* Similarly for variable shift instructions.  */
 
 uint32_t HELPER(shl_cc)(CPUARMState *env, uint32_t x, uint32_t i)
diff --git a/target-arm/translate.c b/target-arm/translate.c
index 493448a..9993aea 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -464,6 +464,35 @@ static void gen_sub_CC(TCGv dest, TCGv t0, TCGv t1)
     tcg_gen_mov_i32(dest, cpu_NF);
 }
 
+/* dest = T0 + ~T1 + CF = T0 - T1 + CF - 1.  Compute C, N, V and Z flags */
+static void gen_sbc_CC(TCGv dest, TCGv t0, TCGv t1)
+{
+    TCGv tmp = tcg_temp_new_i32();
+    tcg_gen_subi_i32(cpu_CF, cpu_CF, 1);
+    if (TCG_TARGET_HAS_add2_i32) {
+        tcg_gen_movi_i32(tmp, 0);
+        tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, cpu_CF, tmp);
+        tcg_gen_sub2_i32(cpu_NF, cpu_CF, t0, cpu_CF, t1, tmp);
+    } else {
+        TCGv_i64 q0 = tcg_temp_new_i64();
+        TCGv_i64 q1 = tcg_temp_new_i64();
+        tcg_gen_extu_i32_i64(q0, t0);
+        tcg_gen_extu_i32_i64(q1, t1);
+        tcg_gen_sub_i64(q0, q0, q1);
+        tcg_gen_extu_i32_i64(q1, cpu_CF);
+        tcg_gen_add_i64(q0, q0, q1);
+        tcg_gen_extr_i64_i32(cpu_NF, cpu_CF, q0);
+        tcg_temp_free_i64(q0);
+        tcg_temp_free_i64(q1);
+    }
+    tcg_gen_mov_i32(cpu_ZF, cpu_NF);
+    tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
+    tcg_gen_xor_i32(tmp, t0, t1);
+    tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
+    tcg_temp_free_i32(tmp);
+    tcg_gen_mov_i32(dest, cpu_NF);
+}
+
 #define GEN_SHIFT(name)                                               \
 static void gen_##name(TCGv dest, TCGv t0, TCGv t1)                   \
 {                                                                     \
@@ -7109,7 +7138,7 @@ static void disas_arm_insn(CPUARMState * env, DisasContext *s)
             break;
         case 0x06:
             if (set_cc) {
-                gen_helper_sbc_cc(tmp, cpu_env, tmp, tmp2);
+                gen_sbc_CC(tmp, tmp, tmp2);
             } else {
                 gen_sub_carry(tmp, tmp, tmp2);
             }
@@ -7117,7 +7146,7 @@ static void disas_arm_insn(CPUARMState * env, DisasContext *s)
             break;
         case 0x07:
             if (set_cc) {
-                gen_helper_sbc_cc(tmp, cpu_env, tmp2, tmp);
+                gen_sbc_CC(tmp, tmp2, tmp);
             } else {
                 gen_sub_carry(tmp, tmp2, tmp);
             }
@@ -7947,10 +7976,11 @@ gen_thumb2_data_op(DisasContext *s, int op, int conds, uint32_t shifter_out, TCG
             gen_adc(t0, t1);
         break;
     case 11: /* sbc */
-        if (conds)
-            gen_helper_sbc_cc(t0, cpu_env, t0, t1);
-        else
+        if (conds) {
+            gen_sbc_CC(t0, t0, t1);
+        } else {
             gen_sub_carry(t0, t0, t1);
+        }
         break;
     case 13: /* sub */
         if (conds)
@@ -9267,10 +9297,11 @@ static void disas_thumb_insn(CPUARMState *env, DisasContext *s)
             }
             break;
         case 0x6: /* sbc */
-            if (s->condexec_mask)
+            if (s->condexec_mask) {
                 gen_sub_carry(tmp, tmp, tmp2);
-            else
-                gen_helper_sbc_cc(tmp, cpu_env, tmp, tmp2);
+            } else {
+                gen_sbc_CC(tmp, tmp, tmp2);
+            }
             break;
         case 0x7: /* ror */
             if (s->condexec_mask) {
commit 49b4c31efcce45ab714f286f14fa5d5173f9069d
Author: Richard Henderson <rth at twiddle.net>
Date:   Tue Feb 19 23:52:08 2013 -0800

    target-arm: Implement adc_cc inline
    
    Use add2 if available, otherwise use 64-bit arithmetic.
    
    Cc: Peter Maydell <peter.maydell at linaro.org>
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/target-arm/helper.h b/target-arm/helper.h
index bca5a5b..507bb9c 100644
--- a/target-arm/helper.h
+++ b/target-arm/helper.h
@@ -140,7 +140,6 @@ DEF_HELPER_2(recpe_u32, i32, i32, env)
 DEF_HELPER_2(rsqrte_u32, i32, i32, env)
 DEF_HELPER_5(neon_tbl, i32, env, i32, i32, i32, i32)
 
-DEF_HELPER_3(adc_cc, i32, env, i32, i32)
 DEF_HELPER_3(sbc_cc, i32, env, i32, i32)
 
 DEF_HELPER_3(shl_cc, i32, env, i32, i32)
diff --git a/target-arm/op_helper.c b/target-arm/op_helper.c
index 99610d7..49fc036 100644
--- a/target-arm/op_helper.c
+++ b/target-arm/op_helper.c
@@ -315,21 +315,6 @@ uint64_t HELPER(get_cp_reg64)(CPUARMState *env, void *rip)
    The only way to do that in TCG is a conditional branch, which clobbers
    all our temporaries.  For now implement these as helper functions.  */
 
-uint32_t HELPER(adc_cc)(CPUARMState *env, uint32_t a, uint32_t b)
-{
-    uint32_t result;
-    if (!env->CF) {
-        result = a + b;
-        env->CF = result < a;
-    } else {
-        result = a + b + 1;
-        env->CF = result <= a;
-    }
-    env->VF = (a ^ b ^ -1) & (a ^ result);
-    env->NF = env->ZF = result;
-    return result;
-}
-
 uint32_t HELPER(sbc_cc)(CPUARMState *env, uint32_t a, uint32_t b)
 {
     uint32_t result;
diff --git a/target-arm/translate.c b/target-arm/translate.c
index ca6f0af..493448a 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -421,6 +421,34 @@ static void gen_add_CC(TCGv dest, TCGv t0, TCGv t1)
     tcg_gen_mov_i32(dest, cpu_NF);
 }
 
+/* dest = T0 + T1 + CF.  Compute C, N, V and Z flags */
+static void gen_adc_CC(TCGv dest, TCGv t0, TCGv t1)
+{
+    TCGv tmp = tcg_temp_new_i32();
+    if (TCG_TARGET_HAS_add2_i32) {
+        tcg_gen_movi_i32(tmp, 0);
+        tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, cpu_CF, tmp);
+        tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, cpu_CF, t1, tmp);
+    } else {
+        TCGv_i64 q0 = tcg_temp_new_i64();
+        TCGv_i64 q1 = tcg_temp_new_i64();
+        tcg_gen_extu_i32_i64(q0, t0);
+        tcg_gen_extu_i32_i64(q1, t1);
+        tcg_gen_add_i64(q0, q0, q1);
+        tcg_gen_extu_i32_i64(q1, cpu_CF);
+        tcg_gen_add_i64(q0, q0, q1);
+        tcg_gen_extr_i64_i32(cpu_NF, cpu_CF, q0);
+        tcg_temp_free_i64(q0);
+        tcg_temp_free_i64(q1);
+    }
+    tcg_gen_mov_i32(cpu_ZF, cpu_NF);
+    tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
+    tcg_gen_xor_i32(tmp, t0, t1);
+    tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
+    tcg_temp_free_i32(tmp);
+    tcg_gen_mov_i32(dest, cpu_NF);
+}
+
 /* dest = T0 - T1. Compute C, N, V and Z flags */
 static void gen_sub_CC(TCGv dest, TCGv t0, TCGv t1)
 {
@@ -7073,7 +7101,7 @@ static void disas_arm_insn(CPUARMState * env, DisasContext *s)
             break;
         case 0x05:
             if (set_cc) {
-                gen_helper_adc_cc(tmp, cpu_env, tmp, tmp2);
+                gen_adc_CC(tmp, tmp, tmp2);
             } else {
                 gen_add_carry(tmp, tmp, tmp2);
             }
@@ -7914,7 +7942,7 @@ gen_thumb2_data_op(DisasContext *s, int op, int conds, uint32_t shifter_out, TCG
         break;
     case 10: /* adc */
         if (conds)
-            gen_helper_adc_cc(t0, cpu_env, t0, t1);
+            gen_adc_CC(t0, t0, t1);
         else
             gen_adc(t0, t1);
         break;
@@ -9232,10 +9260,11 @@ static void disas_thumb_insn(CPUARMState *env, DisasContext *s)
             }
             break;
         case 0x5: /* adc */
-            if (s->condexec_mask)
+            if (s->condexec_mask) {
                 gen_adc(tmp, tmp2);
-            else
-                gen_helper_adc_cc(tmp, cpu_env, tmp, tmp2);
+            } else {
+                gen_adc_CC(tmp, tmp, tmp2);
+            }
             break;
         case 0x6: /* sbc */
             if (s->condexec_mask)
commit e3482cb8063575f9fe0f39b701a4b6dc5a55c9cd
Author: Richard Henderson <rth at twiddle.net>
Date:   Tue Feb 19 23:52:07 2013 -0800

    target-arm: Use add2 in gen_add_CC
    
    Cc: Peter Maydell <peter.maydell at linaro.org>
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/target-arm/translate.c b/target-arm/translate.c
index efe76d0..ca6f0af 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -410,12 +410,11 @@ static void gen_sub_carry(TCGv dest, TCGv t0, TCGv t1)
 /* dest = T0 + T1. Compute C, N, V and Z flags */
 static void gen_add_CC(TCGv dest, TCGv t0, TCGv t1)
 {
-    TCGv tmp;
-    tcg_gen_add_i32(cpu_NF, t0, t1);
+    TCGv tmp = tcg_temp_new_i32();
+    tcg_gen_movi_i32(tmp, 0);
+    tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, t1, tmp);
     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
-    tcg_gen_setcond_i32(TCG_COND_LTU, cpu_CF, cpu_NF, t0);
     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
-    tmp = tcg_temp_new_i32();
     tcg_gen_xor_i32(tmp, t0, t1);
     tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
     tcg_temp_free_i32(tmp);
commit c9f10124a2704b6bab21b31e79735b18d414a654
Author: Richard Henderson <rth at twiddle.net>
Date:   Tue Feb 19 23:52:06 2013 -0800

    target-arm: Use mul[us]2 and add2 in umlal et al
    
    Cc: Peter Maydell <peter.maydell at linaro.org>
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/target-arm/helper.c b/target-arm/helper.c
index e63da57..e97e1a5 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -2893,11 +2893,6 @@ uint32_t HELPER(sel_flags)(uint32_t flags, uint32_t a, uint32_t b)
     return (a & mask) | (b & ~mask);
 }
 
-uint32_t HELPER(logicq_cc)(uint64_t val)
-{
-    return (val >> 32) | (val != 0);
-}
-
 /* VFP support.  We follow the convention used for VFP instructions:
    Single precision routines have a "s" suffix, double precision a
    "d" suffix.  */
diff --git a/target-arm/helper.h b/target-arm/helper.h
index 8544f82..bca5a5b 100644
--- a/target-arm/helper.h
+++ b/target-arm/helper.h
@@ -46,8 +46,6 @@ DEF_HELPER_3(usat16, i32, env, i32, i32)
 
 DEF_HELPER_FLAGS_2(usad8, TCG_CALL_NO_RWG_SE, i32, i32, i32)
 
-DEF_HELPER_1(logicq_cc, i32, i64)
-
 DEF_HELPER_FLAGS_3(sel_flags, TCG_CALL_NO_RWG_SE,
                    i32, i32, i32, i32)
 DEF_HELPER_2(exception, void, env, i32)
diff --git a/target-arm/translate.c b/target-arm/translate.c
index 129f674..efe76d0 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -6433,13 +6433,11 @@ static void gen_addq(DisasContext *s, TCGv_i64 val, int rlow, int rhigh)
     tcg_temp_free_i64(tmp);
 }
 
-/* Set N and Z flags from a 64-bit value.  */
-static void gen_logicq_cc(TCGv_i64 val)
+/* Set N and Z flags from hi|lo.  */
+static void gen_logicq_cc(TCGv lo, TCGv hi)
 {
-    TCGv tmp = tcg_temp_new_i32();
-    gen_helper_logicq_cc(tmp, val);
-    gen_logic_CC(tmp);
-    tcg_temp_free_i32(tmp);
+    tcg_gen_mov_i32(cpu_NF, hi);
+    tcg_gen_or_i32(cpu_ZF, lo, hi);
 }
 
 /* Load/Store exclusive instructions are implemented by remembering
@@ -7219,18 +7217,22 @@ static void disas_arm_insn(CPUARMState * env, DisasContext *s)
                         tmp = load_reg(s, rs);
                         tmp2 = load_reg(s, rm);
                         if (insn & (1 << 22)) {
-                            tmp64 = gen_muls_i64_i32(tmp, tmp2);
+                            tcg_gen_muls2_i32(tmp, tmp2, tmp, tmp2);
                         } else {
-                            tmp64 = gen_mulu_i64_i32(tmp, tmp2);
+                            tcg_gen_mulu2_i32(tmp, tmp2, tmp, tmp2);
                         }
                         if (insn & (1 << 21)) { /* mult accumulate */
-                            gen_addq(s, tmp64, rn, rd);
+                            TCGv al = load_reg(s, rn);
+                            TCGv ah = load_reg(s, rd);
+                            tcg_gen_add2_i32(tmp, tmp2, tmp, tmp2, al, ah);
+                            tcg_temp_free(al);
+                            tcg_temp_free(ah);
                         }
                         if (insn & (1 << 20)) {
-                            gen_logicq_cc(tmp64);
+                            gen_logicq_cc(tmp, tmp2);
                         }
-                        gen_storeq_reg(s, rn, rd, tmp64);
-                        tcg_temp_free_i64(tmp64);
+                        store_reg(s, rn, tmp);
+                        store_reg(s, rd, tmp2);
                         break;
                     default:
                         goto illegal_op;
commit 831d7fe800774db0d7142fdf2a8f8758c8bf9c92
Author: Richard Henderson <rth at twiddle.net>
Date:   Tue Feb 19 23:52:05 2013 -0800

    target-arm: Use mul[us]2 in gen_mul[us]_i64_i32
    
    Cc: Peter Maydell <peter.maydell at linaro.org>
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/target-arm/translate.c b/target-arm/translate.c
index a8893f7..129f674 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -305,35 +305,41 @@ static TCGv_i64 gen_subq_msw(TCGv_i64 a, TCGv b)
     return a;
 }
 
-/* FIXME: Most targets have native widening multiplication.
-   It would be good to use that instead of a full wide multiply.  */
 /* 32x32->64 multiply.  Marks inputs as dead.  */
 static TCGv_i64 gen_mulu_i64_i32(TCGv a, TCGv b)
 {
-    TCGv_i64 tmp1 = tcg_temp_new_i64();
-    TCGv_i64 tmp2 = tcg_temp_new_i64();
+    TCGv lo = tcg_temp_new_i32();
+    TCGv hi = tcg_temp_new_i32();
+    TCGv_i64 ret;
 
-    tcg_gen_extu_i32_i64(tmp1, a);
+    tcg_gen_mulu2_i32(lo, hi, a, b);
     tcg_temp_free_i32(a);
-    tcg_gen_extu_i32_i64(tmp2, b);
     tcg_temp_free_i32(b);
-    tcg_gen_mul_i64(tmp1, tmp1, tmp2);
-    tcg_temp_free_i64(tmp2);
-    return tmp1;
+
+    ret = tcg_temp_new_i64();
+    tcg_gen_concat_i32_i64(ret, lo, hi);
+    tcg_temp_free(lo);
+    tcg_temp_free(hi);
+
+    return ret;
 }
 
 static TCGv_i64 gen_muls_i64_i32(TCGv a, TCGv b)
 {
-    TCGv_i64 tmp1 = tcg_temp_new_i64();
-    TCGv_i64 tmp2 = tcg_temp_new_i64();
+    TCGv lo = tcg_temp_new_i32();
+    TCGv hi = tcg_temp_new_i32();
+    TCGv_i64 ret;
 
-    tcg_gen_ext_i32_i64(tmp1, a);
+    tcg_gen_muls2_i32(lo, hi, a, b);
     tcg_temp_free_i32(a);
-    tcg_gen_ext_i32_i64(tmp2, b);
     tcg_temp_free_i32(b);
-    tcg_gen_mul_i64(tmp1, tmp1, tmp2);
-    tcg_temp_free_i64(tmp2);
-    return tmp1;
+
+    ret = tcg_temp_new_i64();
+    tcg_gen_concat_i32_i64(ret, lo, hi);
+    tcg_temp_free(lo);
+    tcg_temp_free(hi);
+
+    return ret;
 }
 
 /* Swap low and high halfwords.  */
commit dc46d1c68aa107b8e3c95f66e87cd9d02e6452a9
Author: Richard Henderson <rth at twiddle.net>
Date:   Tue Feb 19 23:52:04 2013 -0800

    target-s390x: Use mulu2 for mlgr insn
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/target-s390x/helper.h b/target-s390x/helper.h
index dd90d93..0d80aa0 100644
--- a/target-s390x/helper.h
+++ b/target-s390x/helper.h
@@ -8,7 +8,6 @@ DEF_HELPER_FLAGS_4(mvc, TCG_CALL_NO_WG, void, env, i32, i64, i64)
 DEF_HELPER_FLAGS_4(clc, TCG_CALL_NO_WG, i32, env, i32, i64, i64)
 DEF_HELPER_3(mvcl, i32, env, i32, i32)
 DEF_HELPER_FLAGS_4(clm, TCG_CALL_NO_WG, i32, env, i32, i32, i64)
-DEF_HELPER_FLAGS_3(mul128, TCG_CALL_NO_RWG, i64, env, i64, i64)
 DEF_HELPER_FLAGS_3(divs32, TCG_CALL_NO_WG, s64, env, s64, s64)
 DEF_HELPER_FLAGS_3(divu32, TCG_CALL_NO_WG, i64, env, i64, i64)
 DEF_HELPER_FLAGS_3(divs64, TCG_CALL_NO_WG, s64, env, s64, s64)
diff --git a/target-s390x/int_helper.c b/target-s390x/int_helper.c
index 6858301..af16b21 100644
--- a/target-s390x/int_helper.c
+++ b/target-s390x/int_helper.c
@@ -29,14 +29,6 @@
 #define HELPER_LOG(x...)
 #endif
 
-/* 64/64 -> 128 unsigned multiplication */
-uint64_t HELPER(mul128)(CPUS390XState *env, uint64_t v1, uint64_t v2)
-{
-    uint64_t reth;
-    mulu64(&env->retxl, &reth, v1, v2);
-    return reth;
-}
-
 /* 64/32 -> 32 signed division */
 int64_t HELPER(divs32)(CPUS390XState *env, int64_t a, int64_t b64)
 {
diff --git a/target-s390x/translate.c b/target-s390x/translate.c
index a57296c..bdf69a3 100644
--- a/target-s390x/translate.c
+++ b/target-s390x/translate.c
@@ -2566,8 +2566,7 @@ static ExitStatus op_mul(DisasContext *s, DisasOps *o)
 
 static ExitStatus op_mul128(DisasContext *s, DisasOps *o)
 {
-    gen_helper_mul128(o->out, cpu_env, o->in1, o->in2);
-    return_low128(o->out2);
+    tcg_gen_mulu2_i64(o->out2, o->out, o->in1, o->in2);
     return NO_EXIT;
 }
 
commit 962415fcd5f8223a6fbc6f7bb8c5fdf2500f2f84
Author: Richard Henderson <rth at twiddle.net>
Date:   Tue Feb 19 23:52:03 2013 -0800

    target-alpha: Use mulu2 for umulh insn
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/target-alpha/helper.h b/target-alpha/helper.h
index eac3041..3321fde 100644
--- a/target-alpha/helper.h
+++ b/target-alpha/helper.h
@@ -9,7 +9,6 @@ DEF_HELPER_FLAGS_3(subqv, TCG_CALL_NO_WG, i64, env, i64, i64)
 DEF_HELPER_FLAGS_3(sublv, TCG_CALL_NO_WG, i64, env, i64, i64)
 DEF_HELPER_FLAGS_3(mullv, TCG_CALL_NO_WG, i64, env, i64, i64)
 DEF_HELPER_FLAGS_3(mulqv, TCG_CALL_NO_WG, i64, env, i64, i64)
-DEF_HELPER_FLAGS_2(umulh, TCG_CALL_NO_RWG_SE, i64, i64, i64)
 
 DEF_HELPER_FLAGS_1(ctpop, TCG_CALL_NO_RWG_SE, i64, i64)
 DEF_HELPER_FLAGS_1(ctlz, TCG_CALL_NO_RWG_SE, i64, i64)
diff --git a/target-alpha/int_helper.c b/target-alpha/int_helper.c
index c9b42b6..51ccd41 100644
--- a/target-alpha/int_helper.c
+++ b/target-alpha/int_helper.c
@@ -22,13 +22,6 @@
 #include "qemu/host-utils.h"
 
 
-uint64_t helper_umulh(uint64_t op1, uint64_t op2)
-{
-    uint64_t tl, th;
-    mulu64(&tl, &th, op1, op2);
-    return th;
-}
-
 uint64_t helper_ctpop(uint64_t arg)
 {
     return ctpop64(arg);
diff --git a/target-alpha/translate.c b/target-alpha/translate.c
index f687b95..f8f7695 100644
--- a/target-alpha/translate.c
+++ b/target-alpha/translate.c
@@ -1390,7 +1390,6 @@ static inline void glue(gen_, name)(int ra, int rb, int rc, int islit,\
         tcg_temp_free(tmp1);                                          \
     }                                                                 \
 }
-ARITH3(umulh)
 ARITH3(cmpbge)
 ARITH3(minub8)
 ARITH3(minsb8)
@@ -2426,7 +2425,24 @@ static ExitStatus translate_one(DisasContext *ctx, uint32_t insn)
             break;
         case 0x30:
             /* UMULH */
-            gen_umulh(ra, rb, rc, islit, lit);
+            {
+                TCGv low;
+                if (unlikely(rc == 31)){
+                    break;
+                }
+                if (ra == 31) {
+                    tcg_gen_movi_i64(cpu_ir[rc], 0);
+                    break;
+                }
+                low = tcg_temp_new();
+                if (islit) {
+                    tcg_gen_movi_tl(low, lit);
+                    tcg_gen_mulu2_i64(low, cpu_ir[rc], cpu_ir[ra], low);
+                } else {
+                    tcg_gen_mulu2_i64(low, cpu_ir[rc], cpu_ir[ra], cpu_ir[rb]);
+                }
+                tcg_temp_free(low);
+            }
             break;
         case 0x40:
             /* MULL/V */
commit f1fae40c61fd4558c7d10992c98b4bb47f99e0ed
Author: Richard Henderson <rth at twiddle.net>
Date:   Tue Feb 19 23:52:02 2013 -0800

    tcg: Apply life analysis to 64-bit multiword arithmetic ops
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/tcg/tcg.c b/tcg/tcg.c
index c8a843e..1d8265e 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -1217,7 +1217,7 @@ static inline void tcg_la_bb_end(TCGContext *s, uint8_t *dead_temps,
 static void tcg_liveness_analysis(TCGContext *s)
 {
     int i, op_index, nb_args, nb_iargs, nb_oargs, arg, nb_ops;
-    TCGOpcode op;
+    TCGOpcode op, op_new;
     TCGArg *args;
     const TCGOpDef *def;
     uint8_t *dead_temps, *mem_temps;
@@ -1324,7 +1324,17 @@ static void tcg_liveness_analysis(TCGContext *s)
             break;
 
         case INDEX_op_add2_i32:
+            op_new = INDEX_op_add_i32;
+            goto do_addsub2;
         case INDEX_op_sub2_i32:
+            op_new = INDEX_op_sub_i32;
+            goto do_addsub2;
+        case INDEX_op_add2_i64:
+            op_new = INDEX_op_add_i64;
+            goto do_addsub2;
+        case INDEX_op_sub2_i64:
+            op_new = INDEX_op_sub_i64;
+        do_addsub2:
             args -= 6;
             nb_iargs = 4;
             nb_oargs = 2;
@@ -1337,12 +1347,7 @@ static void tcg_liveness_analysis(TCGContext *s)
                     goto do_remove;
                 }
                 /* Create the single operation plus nop.  */
-                if (op == INDEX_op_add2_i32) {
-                    op = INDEX_op_add_i32;
-                } else {
-                    op = INDEX_op_sub_i32;
-                }
-                s->gen_opc_buf[op_index] = op;
+                s->gen_opc_buf[op_index] = op = op_new;
                 args[1] = args[2];
                 args[2] = args[4];
                 assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop);
@@ -1354,6 +1359,13 @@ static void tcg_liveness_analysis(TCGContext *s)
             goto do_not_remove;
 
         case INDEX_op_mulu2_i32:
+        case INDEX_op_muls2_i32:
+            op_new = INDEX_op_mul_i32;
+            goto do_mul2;
+        case INDEX_op_mulu2_i64:
+        case INDEX_op_muls2_i64:
+            op_new = INDEX_op_mul_i64;
+        do_mul2:
             args -= 4;
             nb_iargs = 2;
             nb_oargs = 2;
@@ -1362,7 +1374,7 @@ static void tcg_liveness_analysis(TCGContext *s)
                 if (dead_temps[args[0]] && !mem_temps[args[0]]) {
                     goto do_remove;
                 }
-                s->gen_opc_buf[op_index] = op = INDEX_op_mul_i32;
+                s->gen_opc_buf[op_index] = op = op_new;
                 args[1] = args[2];
                 args[2] = args[3];
                 assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop);
commit f402f38f439f17d4361b28248f948a6170d30133
Author: Richard Henderson <rth at twiddle.net>
Date:   Tue Feb 19 23:52:01 2013 -0800

    tcg: Implement muls2 with mulu2
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
index dac3b4e..d70b2eb 100644
--- a/tcg/tcg-op.h
+++ b/tcg/tcg-op.h
@@ -2396,6 +2396,26 @@ static inline void tcg_gen_muls2_i32(TCGv_i32 rl, TCGv_i32 rh,
         tcg_gen_op4_i32(INDEX_op_muls2_i32, rl, rh, arg1, arg2);
         /* Allow the optimizer room to replace muls2 with two moves.  */
         tcg_gen_op0(INDEX_op_nop);
+    } else if (TCG_TARGET_REG_BITS == 32 && TCG_TARGET_HAS_mulu2_i32) {
+        TCGv_i32 t0 = tcg_temp_new_i32();
+        TCGv_i32 t1 = tcg_temp_new_i32();
+        TCGv_i32 t2 = tcg_temp_new_i32();
+        TCGv_i32 t3 = tcg_temp_new_i32();
+        tcg_gen_op4_i32(INDEX_op_mulu2_i32, t0, t1, arg1, arg2);
+        /* Allow the optimizer room to replace mulu2 with two moves.  */
+        tcg_gen_op0(INDEX_op_nop);
+        /* Adjust for negative inputs.  */
+        tcg_gen_sari_i32(t2, arg1, 31);
+        tcg_gen_sari_i32(t3, arg2, 31);
+        tcg_gen_and_i32(t2, t2, arg2);
+        tcg_gen_and_i32(t3, t3, arg1);
+        tcg_gen_sub_i32(rh, t1, t2);
+        tcg_gen_sub_i32(rh, rh, t3);
+        tcg_gen_mov_i32(rl, t0);
+        tcg_temp_free_i32(t0);
+        tcg_temp_free_i32(t1);
+        tcg_temp_free_i32(t2);
+        tcg_temp_free_i32(t3);
     } else {
         TCGv_i64 t0 = tcg_temp_new_i64();
         TCGv_i64 t1 = tcg_temp_new_i64();
@@ -2455,6 +2475,26 @@ static inline void tcg_gen_mulu2_i64(TCGv_i64 rl, TCGv_i64 rh,
         tcg_gen_op4_i64(INDEX_op_mulu2_i64, rl, rh, arg1, arg2);
         /* Allow the optimizer room to replace mulu2 with two moves.  */
         tcg_gen_op0(INDEX_op_nop);
+    } else if (TCG_TARGET_HAS_mulu2_i64) {
+        TCGv_i64 t0 = tcg_temp_new_i64();
+        TCGv_i64 t1 = tcg_temp_new_i64();
+        TCGv_i64 t2 = tcg_temp_new_i64();
+        TCGv_i64 t3 = tcg_temp_new_i64();
+        tcg_gen_op4_i64(INDEX_op_mulu2_i64, t0, t1, arg1, arg2);
+        /* Allow the optimizer room to replace mulu2 with two moves.  */
+        tcg_gen_op0(INDEX_op_nop);
+        /* Adjust for negative inputs.  */
+        tcg_gen_sari_i64(t2, arg1, 63);
+        tcg_gen_sari_i64(t3, arg2, 63);
+        tcg_gen_and_i64(t2, t2, arg2);
+        tcg_gen_and_i64(t3, t3, arg1);
+        tcg_gen_sub_i64(rh, t1, t2);
+        tcg_gen_sub_i64(rh, rh, t3);
+        tcg_gen_mov_i64(rl, t0);
+        tcg_temp_free_i64(t0);
+        tcg_temp_free_i64(t1);
+        tcg_temp_free_i64(t2);
+        tcg_temp_free_i64(t3);
     } else {
         TCGv_i64 t0 = tcg_temp_new_i64();
         int sizemask = 0;
commit 76f131332310d8317880ef73a2bc90725d857ed3
Author: Richard Henderson <rth at twiddle.net>
Date:   Tue Feb 19 23:52:00 2013 -0800

    target-i386: Use add2 to implement the ADX extension
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/target-i386/translate.c b/target-i386/translate.c
index 439d19e..605cd88 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -4182,7 +4182,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_ADX)) {
                     goto illegal_op;
                 } else {
-                    TCGv carry_in, carry_out;
+                    TCGv carry_in, carry_out, zero;
                     int end_op;
 
                     ot = (s->dflag == 2 ? OT_QUAD : OT_LONG);
@@ -4242,18 +4242,16 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
 #endif
                     default:
                         /* Otherwise compute the carry-out in two steps.  */
-                        tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_regs[reg]);
-                        tcg_gen_setcond_tl(TCG_COND_LTU, cpu_tmp4,
-                                           cpu_T[0], cpu_regs[reg]);
-                        tcg_gen_add_tl(cpu_regs[reg], cpu_T[0], carry_in);
-                        tcg_gen_setcond_tl(TCG_COND_LTU, carry_out,
-                                           cpu_regs[reg], cpu_T[0]);
-                        tcg_gen_or_tl(carry_out, carry_out, cpu_tmp4);
+                        zero = tcg_const_tl(0);
+                        tcg_gen_add2_tl(cpu_T[0], carry_out,
+                                        cpu_T[0], zero,
+                                        carry_in, zero);
+                        tcg_gen_add2_tl(cpu_regs[reg], carry_out,
+                                        cpu_regs[reg], carry_out,
+                                        cpu_T[0], zero);
+                        tcg_temp_free(zero);
                         break;
                     }
-                    /* We began with all flags computed to CC_SRC, and we
-                       have now placed the carry-out in CC_DST.  All that
-                       is left is to record the CC_OP.  */
                     set_cc_op(s, end_op);
                 }
                 break;
commit d693e14733509f9a2124b03a912058790411140a
Author: Richard Henderson <rth at twiddle.net>
Date:   Tue Feb 19 23:51:58 2013 -0800

    tcg-arm: Implement muls2_i32
    
    We even had the encoding of smull already handy...
    
    Cc: Andrzej Zaborowski <balrogg at gmail.com>
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c
index d9c33d8..94c6ca4 100644
--- a/tcg/arm/tcg-target.c
+++ b/tcg/arm/tcg-target.c
@@ -1647,6 +1647,9 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
     case INDEX_op_mulu2_i32:
         tcg_out_umull32(s, COND_AL, args[0], args[1], args[2], args[3]);
         break;
+    case INDEX_op_muls2_i32:
+        tcg_out_smull32(s, COND_AL, args[0], args[1], args[2], args[3]);
+        break;
     /* XXX: Perhaps args[2] & 0x1f is wrong */
     case INDEX_op_shl_i32:
         c = const_args[2] ?
@@ -1798,6 +1801,7 @@ static const TCGTargetOpDef arm_op_defs[] = {
     { INDEX_op_sub_i32, { "r", "r", "rI" } },
     { INDEX_op_mul_i32, { "r", "r", "r" } },
     { INDEX_op_mulu2_i32, { "r", "r", "r", "r" } },
+    { INDEX_op_muls2_i32, { "r", "r", "r", "r" } },
     { INDEX_op_and_i32, { "r", "r", "rI" } },
     { INDEX_op_andc_i32, { "r", "r", "rI" } },
     { INDEX_op_or_i32, { "r", "r", "rI" } },
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
index f9599bd..b6eed1f 100644
--- a/tcg/arm/tcg-target.h
+++ b/tcg/arm/tcg-target.h
@@ -75,7 +75,7 @@ typedef enum {
 #define TCG_TARGET_HAS_nor_i32          0
 #define TCG_TARGET_HAS_deposit_i32      0
 #define TCG_TARGET_HAS_movcond_i32      1
-#define TCG_TARGET_HAS_muls2_i32        0
+#define TCG_TARGET_HAS_muls2_i32        1
 
 enum {
     TCG_AREG0 = TCG_REG_R6,
commit 624988a53b4db34ee2a2b96dc2bccdf52e133a0a
Author: Richard Henderson <rth at twiddle.net>
Date:   Tue Feb 19 23:51:57 2013 -0800

    tcg-i386: Implement multiword arithmetic ops
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index f645529..9eec06c 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -1922,31 +1922,34 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         tcg_out_qemu_st(s, args, 3);
         break;
 
-    case INDEX_op_mulu2_i32:
-        tcg_out_modrm(s, OPC_GRP3_Ev, EXT3_MUL, args[3]);
+    OP_32_64(mulu2):
+        tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_MUL, args[3]);
         break;
-    case INDEX_op_add2_i32:
+    OP_32_64(muls2):
+        tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IMUL, args[3]);
+        break;
+    OP_32_64(add2):
         if (const_args[4]) {
-            tgen_arithi(s, ARITH_ADD, args[0], args[4], 1);
+            tgen_arithi(s, ARITH_ADD + rexw, args[0], args[4], 1);
         } else {
-            tgen_arithr(s, ARITH_ADD, args[0], args[4]);
+            tgen_arithr(s, ARITH_ADD + rexw, args[0], args[4]);
         }
         if (const_args[5]) {
-            tgen_arithi(s, ARITH_ADC, args[1], args[5], 1);
+            tgen_arithi(s, ARITH_ADC + rexw, args[1], args[5], 1);
         } else {
-            tgen_arithr(s, ARITH_ADC, args[1], args[5]);
+            tgen_arithr(s, ARITH_ADC + rexw, args[1], args[5]);
         }
         break;
-    case INDEX_op_sub2_i32:
+    OP_32_64(sub2):
         if (const_args[4]) {
-            tgen_arithi(s, ARITH_SUB, args[0], args[4], 1);
+            tgen_arithi(s, ARITH_SUB + rexw, args[0], args[4], 1);
         } else {
-            tgen_arithr(s, ARITH_SUB, args[0], args[4]);
+            tgen_arithr(s, ARITH_SUB + rexw, args[0], args[4]);
         }
         if (const_args[5]) {
-            tgen_arithi(s, ARITH_SBB, args[1], args[5], 1);
+            tgen_arithi(s, ARITH_SBB + rexw, args[1], args[5], 1);
         } else {
-            tgen_arithr(s, ARITH_SBB, args[1], args[5]);
+            tgen_arithr(s, ARITH_SBB + rexw, args[1], args[5]);
         }
         break;
 
@@ -2080,6 +2083,7 @@ static const TCGTargetOpDef x86_op_defs[] = {
 #endif
 
     { INDEX_op_mulu2_i32, { "a", "d", "a", "r" } },
+    { INDEX_op_muls2_i32, { "a", "d", "a", "r" } },
     { INDEX_op_add2_i32, { "r", "r", "0", "1", "ri", "ri" } },
     { INDEX_op_sub2_i32, { "r", "r", "0", "1", "ri", "ri" } },
 
@@ -2134,6 +2138,11 @@ static const TCGTargetOpDef x86_op_defs[] = {
 
     { INDEX_op_deposit_i64, { "Q", "0", "Q" } },
     { INDEX_op_movcond_i64, { "r", "r", "re", "r", "0" } },
+
+    { INDEX_op_mulu2_i64, { "a", "d", "a", "r" } },
+    { INDEX_op_muls2_i64, { "a", "d", "a", "r" } },
+    { INDEX_op_add2_i64, { "r", "r", "0", "1", "re", "re" } },
+    { INDEX_op_sub2_i64, { "r", "r", "0", "1", "re", "re" } },
 #endif
 
 #if TCG_TARGET_REG_BITS == 64
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index 2b08ef7..e3f6bb9 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -95,7 +95,7 @@ typedef enum {
 #define TCG_TARGET_HAS_add2_i32         1
 #define TCG_TARGET_HAS_sub2_i32         1
 #define TCG_TARGET_HAS_mulu2_i32        1
-#define TCG_TARGET_HAS_muls2_i32        0
+#define TCG_TARGET_HAS_muls2_i32        1
 
 #if TCG_TARGET_REG_BITS == 64
 #define TCG_TARGET_HAS_div2_i64         1
@@ -118,10 +118,10 @@ typedef enum {
 #define TCG_TARGET_HAS_nor_i64          0
 #define TCG_TARGET_HAS_deposit_i64      1
 #define TCG_TARGET_HAS_movcond_i64      1
-#define TCG_TARGET_HAS_add2_i64         0
-#define TCG_TARGET_HAS_sub2_i64         0
-#define TCG_TARGET_HAS_mulu2_i64        0
-#define TCG_TARGET_HAS_muls2_i64        0
+#define TCG_TARGET_HAS_add2_i64         1
+#define TCG_TARGET_HAS_sub2_i64         1
+#define TCG_TARGET_HAS_mulu2_i64        1
+#define TCG_TARGET_HAS_muls2_i64        1
 #endif
 
 #define TCG_TARGET_deposit_i32_valid(ofs, len) \
commit f6953a739972353f2cc5e3d5994127ca8c8236ce
Author: Richard Henderson <rth at twiddle.net>
Date:   Tue Feb 19 23:51:56 2013 -0800

    tcg: Implement multiword addition helpers
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
index 97e0795..dac3b4e 100644
--- a/tcg/tcg-op.h
+++ b/tcg/tcg-op.h
@@ -2332,6 +2332,44 @@ static inline void tcg_gen_movcond_i64(TCGCond cond, TCGv_i64 ret,
 #endif
 }
 
+static inline void tcg_gen_add2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 al,
+                                    TCGv_i32 ah, TCGv_i32 bl, TCGv_i32 bh)
+{
+    if (TCG_TARGET_HAS_add2_i32) {
+        tcg_gen_op6_i32(INDEX_op_add2_i32, rl, rh, al, ah, bl, bh);
+        /* Allow the optimizer room to replace add2 with two moves.  */
+        tcg_gen_op0(INDEX_op_nop);
+    } else {
+        TCGv_i64 t0 = tcg_temp_new_i64();
+        TCGv_i64 t1 = tcg_temp_new_i64();
+        tcg_gen_concat_i32_i64(t0, al, ah);
+        tcg_gen_concat_i32_i64(t1, bl, bh);
+        tcg_gen_add_i64(t0, t0, t1);
+        tcg_gen_extr_i64_i32(rl, rh, t0);
+        tcg_temp_free_i64(t0);
+        tcg_temp_free_i64(t1);
+    }
+}
+
+static inline void tcg_gen_sub2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 al,
+                                    TCGv_i32 ah, TCGv_i32 bl, TCGv_i32 bh)
+{
+    if (TCG_TARGET_HAS_sub2_i32) {
+        tcg_gen_op6_i32(INDEX_op_sub2_i32, rl, rh, al, ah, bl, bh);
+        /* Allow the optimizer room to replace sub2 with two moves.  */
+        tcg_gen_op0(INDEX_op_nop);
+    } else {
+        TCGv_i64 t0 = tcg_temp_new_i64();
+        TCGv_i64 t1 = tcg_temp_new_i64();
+        tcg_gen_concat_i32_i64(t0, al, ah);
+        tcg_gen_concat_i32_i64(t1, bl, bh);
+        tcg_gen_sub_i64(t0, t0, t1);
+        tcg_gen_extr_i64_i32(rl, rh, t0);
+        tcg_temp_free_i64(t0);
+        tcg_temp_free_i64(t1);
+    }
+}
+
 static inline void tcg_gen_mulu2_i32(TCGv_i32 rl, TCGv_i32 rh,
                                      TCGv_i32 arg1, TCGv_i32 arg2)
 {
@@ -2370,6 +2408,46 @@ static inline void tcg_gen_muls2_i32(TCGv_i32 rl, TCGv_i32 rh,
     }
 }
 
+static inline void tcg_gen_add2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 al,
+                                    TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh)
+{
+    if (TCG_TARGET_HAS_add2_i64) {
+        tcg_gen_op6_i64(INDEX_op_add2_i64, rl, rh, al, ah, bl, bh);
+        /* Allow the optimizer room to replace add2 with two moves.  */
+        tcg_gen_op0(INDEX_op_nop);
+    } else {
+        TCGv_i64 t0 = tcg_temp_new_i64();
+        TCGv_i64 t1 = tcg_temp_new_i64();
+        tcg_gen_add_i64(t0, al, bl);
+        tcg_gen_setcond_i64(TCG_COND_LTU, t1, t0, al);
+        tcg_gen_add_i64(rh, ah, bh);
+        tcg_gen_add_i64(rh, rh, t1);
+        tcg_gen_mov_i64(rl, t0);
+        tcg_temp_free_i64(t0);
+        tcg_temp_free_i64(t1);
+    }
+}
+
+static inline void tcg_gen_sub2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 al,
+                                    TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh)
+{
+    if (TCG_TARGET_HAS_sub2_i64) {
+        tcg_gen_op6_i64(INDEX_op_sub2_i64, rl, rh, al, ah, bl, bh);
+        /* Allow the optimizer room to replace sub2 with two moves.  */
+        tcg_gen_op0(INDEX_op_nop);
+    } else {
+        TCGv_i64 t0 = tcg_temp_new_i64();
+        TCGv_i64 t1 = tcg_temp_new_i64();
+        tcg_gen_sub_i64(t0, al, bl);
+        tcg_gen_setcond_i64(TCG_COND_LTU, t1, al, bl);
+        tcg_gen_sub_i64(rh, ah, bh);
+        tcg_gen_sub_i64(rh, rh, t1);
+        tcg_gen_mov_i64(rl, t0);
+        tcg_temp_free_i64(t0);
+        tcg_temp_free_i64(t1);
+    }
+}
+
 static inline void tcg_gen_mulu2_i64(TCGv_i64 rl, TCGv_i64 rh,
                                      TCGv_i64 arg1, TCGv_i64 arg2)
 {
@@ -2739,6 +2817,8 @@ static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index)
 #define tcg_const_tl tcg_const_i64
 #define tcg_const_local_tl tcg_const_local_i64
 #define tcg_gen_movcond_tl tcg_gen_movcond_i64
+#define tcg_gen_add2_tl tcg_gen_add2_i64
+#define tcg_gen_sub2_tl tcg_gen_sub2_i64
 #define tcg_gen_mulu2_tl tcg_gen_mulu2_i64
 #define tcg_gen_muls2_tl tcg_gen_muls2_i64
 #else
@@ -2814,6 +2894,8 @@ static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index)
 #define tcg_const_tl tcg_const_i32
 #define tcg_const_local_tl tcg_const_local_i32
 #define tcg_gen_movcond_tl tcg_gen_movcond_i32
+#define tcg_gen_add2_tl tcg_gen_add2_i32
+#define tcg_gen_sub2_tl tcg_gen_sub2_i32
 #define tcg_gen_mulu2_tl tcg_gen_mulu2_i32
 #define tcg_gen_muls2_tl tcg_gen_muls2_i32
 #endif
commit 696a8be6a077a5760bbf9822209999c908cdf0b1
Author: Richard Henderson <rth at twiddle.net>
Date:   Tue Feb 19 23:51:55 2013 -0800

    tcg: Implement multiword multiply helpers
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/tcg-runtime.c b/tcg-runtime.c
index abfc364..4b66e51 100644
--- a/tcg-runtime.c
+++ b/tcg-runtime.c
@@ -22,7 +22,7 @@
  * THE SOFTWARE.
  */
 #include <stdint.h>
-
+#include "qemu/host-utils.h"
 #include "tcg/tcg-runtime.h"
 
 /* 32-bit helpers */
@@ -83,3 +83,17 @@ uint64_t tcg_helper_remu_i64(uint64_t arg1, uint64_t arg2)
 {
     return arg1 % arg2;
 }
+
+uint64_t tcg_helper_muluh_i64(uint64_t arg1, uint64_t arg2)
+{
+    uint64_t l, h;
+    mulu64(&l, &h, arg1, arg2);
+    return h;
+}
+
+int64_t tcg_helper_mulsh_i64(int64_t arg1, int64_t arg2)
+{
+    uint64_t l, h;
+    muls64(&l, &h, arg1, arg2);
+    return h;
+}
diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
index 4ded249..97e0795 100644
--- a/tcg/tcg-op.h
+++ b/tcg/tcg-op.h
@@ -2332,6 +2332,86 @@ static inline void tcg_gen_movcond_i64(TCGCond cond, TCGv_i64 ret,
 #endif
 }
 
+static inline void tcg_gen_mulu2_i32(TCGv_i32 rl, TCGv_i32 rh,
+                                     TCGv_i32 arg1, TCGv_i32 arg2)
+{
+    if (TCG_TARGET_HAS_mulu2_i32) {
+        tcg_gen_op4_i32(INDEX_op_mulu2_i32, rl, rh, arg1, arg2);
+        /* Allow the optimizer room to replace mulu2 with two moves.  */
+        tcg_gen_op0(INDEX_op_nop);
+    } else {
+        TCGv_i64 t0 = tcg_temp_new_i64();
+        TCGv_i64 t1 = tcg_temp_new_i64();
+        tcg_gen_extu_i32_i64(t0, arg1);
+        tcg_gen_extu_i32_i64(t1, arg2);
+        tcg_gen_mul_i64(t0, t0, t1);
+        tcg_gen_extr_i64_i32(rl, rh, t0);
+        tcg_temp_free_i64(t0);
+        tcg_temp_free_i64(t1);
+    }
+}
+
+static inline void tcg_gen_muls2_i32(TCGv_i32 rl, TCGv_i32 rh,
+                                     TCGv_i32 arg1, TCGv_i32 arg2)
+{
+    if (TCG_TARGET_HAS_muls2_i32) {
+        tcg_gen_op4_i32(INDEX_op_muls2_i32, rl, rh, arg1, arg2);
+        /* Allow the optimizer room to replace muls2 with two moves.  */
+        tcg_gen_op0(INDEX_op_nop);
+    } else {
+        TCGv_i64 t0 = tcg_temp_new_i64();
+        TCGv_i64 t1 = tcg_temp_new_i64();
+        tcg_gen_ext_i32_i64(t0, arg1);
+        tcg_gen_ext_i32_i64(t1, arg2);
+        tcg_gen_mul_i64(t0, t0, t1);
+        tcg_gen_extr_i64_i32(rl, rh, t0);
+        tcg_temp_free_i64(t0);
+        tcg_temp_free_i64(t1);
+    }
+}
+
+static inline void tcg_gen_mulu2_i64(TCGv_i64 rl, TCGv_i64 rh,
+                                     TCGv_i64 arg1, TCGv_i64 arg2)
+{
+    if (TCG_TARGET_HAS_mulu2_i64) {
+        tcg_gen_op4_i64(INDEX_op_mulu2_i64, rl, rh, arg1, arg2);
+        /* Allow the optimizer room to replace mulu2 with two moves.  */
+        tcg_gen_op0(INDEX_op_nop);
+    } else {
+        TCGv_i64 t0 = tcg_temp_new_i64();
+        int sizemask = 0;
+        /* Return value and both arguments are 64-bit and unsigned.  */
+        sizemask |= tcg_gen_sizemask(0, 1, 0);
+        sizemask |= tcg_gen_sizemask(1, 1, 0);
+        sizemask |= tcg_gen_sizemask(2, 1, 0);
+        tcg_gen_mul_i64(t0, arg1, arg2);
+        tcg_gen_helper64(tcg_helper_muluh_i64, sizemask, rh, arg1, arg2);
+        tcg_gen_mov_i64(rl, t0);
+        tcg_temp_free_i64(t0);
+    }
+}
+
+static inline void tcg_gen_muls2_i64(TCGv_i64 rl, TCGv_i64 rh,
+                                     TCGv_i64 arg1, TCGv_i64 arg2)
+{
+    if (TCG_TARGET_HAS_muls2_i64) {
+        tcg_gen_op4_i64(INDEX_op_muls2_i64, rl, rh, arg1, arg2);
+        /* Allow the optimizer room to replace muls2 with two moves.  */
+        tcg_gen_op0(INDEX_op_nop);
+    } else {
+        TCGv_i64 t0 = tcg_temp_new_i64();
+        int sizemask = 0;
+        /* Return value and both arguments are 64-bit and signed.  */
+        sizemask |= tcg_gen_sizemask(0, 1, 1);
+        sizemask |= tcg_gen_sizemask(1, 1, 1);
+        sizemask |= tcg_gen_sizemask(2, 1, 1);
+        tcg_gen_mul_i64(t0, arg1, arg2);
+        tcg_gen_helper64(tcg_helper_mulsh_i64, sizemask, rh, arg1, arg2);
+        tcg_gen_mov_i64(rl, t0);
+        tcg_temp_free_i64(t0);
+    }
+}
+
 /***************************************/
 /* QEMU specific operations. Their type depend on the QEMU CPU
    type. */
@@ -2659,6 +2739,8 @@ static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index)
 #define tcg_const_tl tcg_const_i64
 #define tcg_const_local_tl tcg_const_local_i64
 #define tcg_gen_movcond_tl tcg_gen_movcond_i64
+#define tcg_gen_mulu2_tl tcg_gen_mulu2_i64
+#define tcg_gen_muls2_tl tcg_gen_muls2_i64
 #else
 #define tcg_gen_movi_tl tcg_gen_movi_i32
 #define tcg_gen_mov_tl tcg_gen_mov_i32
@@ -2732,6 +2814,8 @@ static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index)
 #define tcg_const_tl tcg_const_i32
 #define tcg_const_local_tl tcg_const_local_i32
 #define tcg_gen_movcond_tl tcg_gen_movcond_i32
+#define tcg_gen_mulu2_tl tcg_gen_mulu2_i32
+#define tcg_gen_muls2_tl tcg_gen_muls2_i32
 #endif
 
 #if TCG_TARGET_REG_BITS == 32
diff --git a/tcg/tcg-runtime.h b/tcg/tcg-runtime.h
index 5615b13..a1ebef9 100644
--- a/tcg/tcg-runtime.h
+++ b/tcg/tcg-runtime.h
@@ -12,7 +12,9 @@ int64_t tcg_helper_shr_i64(int64_t arg1, int64_t arg2);
 int64_t tcg_helper_sar_i64(int64_t arg1, int64_t arg2);
 int64_t tcg_helper_div_i64(int64_t arg1, int64_t arg2);
 int64_t tcg_helper_rem_i64(int64_t arg1, int64_t arg2);
+int64_t tcg_helper_mulsh_i64(int64_t arg1, int64_t arg2);
 uint64_t tcg_helper_divu_i64(uint64_t arg1, uint64_t arg2);
 uint64_t tcg_helper_remu_i64(uint64_t arg1, uint64_t arg2);
+uint64_t tcg_helper_muluh_i64(uint64_t arg1, uint64_t arg2);
 
 #endif
commit 3c51a98507f9ff64fc2a3841c0e5b8a0c9e3c2b7
Author: Richard Henderson <rth at twiddle.net>
Date:   Tue Feb 19 23:51:54 2013 -0800

    tcg: Implement a 64-bit to 32-bit extraction helper
    
    We're going to have use for this shortly in implementing other helpers.
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
index 91c9d80..4ded249 100644
--- a/tcg/tcg-op.h
+++ b/tcg/tcg-op.h
@@ -2246,6 +2246,26 @@ static inline void tcg_gen_concat32_i64(TCGv_i64 dest, TCGv_i64 low,
     tcg_gen_deposit_i64(dest, low, high, 32, 32);
 }
 
+static inline void tcg_gen_extr_i64_i32(TCGv_i32 lo, TCGv_i32 hi, TCGv_i64 arg)
+{
+#if TCG_TARGET_REG_BITS == 32
+    tcg_gen_mov_i32(lo, TCGV_LOW(arg));
+    tcg_gen_mov_i32(hi, TCGV_HIGH(arg));
+#else
+    TCGv_i64 t0 = tcg_temp_new_i64();
+    tcg_gen_trunc_i64_i32(lo, arg);
+    tcg_gen_shri_i64(t0, arg, 32);
+    tcg_gen_trunc_i64_i32(hi, t0);
+    tcg_temp_free_i64(t0);
+#endif
+}
+
+static inline void tcg_gen_extr32_i64(TCGv_i64 lo, TCGv_i64 hi, TCGv_i64 arg)
+{
+    tcg_gen_ext32u_i64(lo, arg);
+    tcg_gen_shri_i64(hi, arg, 32);
+}
+
 static inline void tcg_gen_movcond_i32(TCGCond cond, TCGv_i32 ret,
                                        TCGv_i32 c1, TCGv_i32 c2,
                                        TCGv_i32 v1, TCGv_i32 v2)
@@ -2625,6 +2645,7 @@ static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index)
 #define tcg_gen_bswap32_tl tcg_gen_bswap32_i64
 #define tcg_gen_bswap64_tl tcg_gen_bswap64_i64
 #define tcg_gen_concat_tl_i64 tcg_gen_concat32_i64
+#define tcg_gen_extr_i64_tl tcg_gen_extr32_i64
 #define tcg_gen_andc_tl tcg_gen_andc_i64
 #define tcg_gen_eqv_tl tcg_gen_eqv_i64
 #define tcg_gen_nand_tl tcg_gen_nand_i64
@@ -2697,6 +2718,7 @@ static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index)
 #define tcg_gen_bswap16_tl tcg_gen_bswap16_i32
 #define tcg_gen_bswap32_tl tcg_gen_bswap32_i32
 #define tcg_gen_concat_tl_i64 tcg_gen_concat_i32_i64
+#define tcg_gen_extr_tl_i64 tcg_gen_extr_i32_i64
 #define tcg_gen_andc_tl tcg_gen_andc_i32
 #define tcg_gen_eqv_tl tcg_gen_eqv_i32
 #define tcg_gen_nand_tl tcg_gen_nand_i32
commit 4d3203fd0b5d17e39f631f2534e7cbb37d04ce3f
Author: Richard Henderson <rth at twiddle.net>
Date:   Tue Feb 19 23:51:53 2013 -0800

    tcg: Add signed multiword multiplication operations
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/tcg/README b/tcg/README
index 89f0cdd..934e7af 100644
--- a/tcg/README
+++ b/tcg/README
@@ -375,6 +375,10 @@ is returned in two single-word outputs.
 Similar to mul, except two unsigned inputs T1 and T2 yielding the full
 double-word product T0.  The later is returned in two single-word outputs.
 
+* muls2_i32/i64 t0_low, t0_high, t1, t2
+
+Similar to mulu2, except the two inputs T1 and T2 are signed.
+
 ********* 64-bit target on 32-bit host support
 
 The following opcodes are internal to TCG.  Thus they are to be implemented by
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
index 7083f3a..f9599bd 100644
--- a/tcg/arm/tcg-target.h
+++ b/tcg/arm/tcg-target.h
@@ -75,6 +75,7 @@ typedef enum {
 #define TCG_TARGET_HAS_nor_i32          0
 #define TCG_TARGET_HAS_deposit_i32      0
 #define TCG_TARGET_HAS_movcond_i32      1
+#define TCG_TARGET_HAS_muls2_i32        0
 
 enum {
     TCG_AREG0 = TCG_REG_R6,
diff --git a/tcg/hppa/tcg-target.h b/tcg/hppa/tcg-target.h
index e2754fe..ebd53d9 100644
--- a/tcg/hppa/tcg-target.h
+++ b/tcg/hppa/tcg-target.h
@@ -98,6 +98,7 @@ typedef enum {
 #define TCG_TARGET_HAS_nor_i32          0
 #define TCG_TARGET_HAS_deposit_i32      1
 #define TCG_TARGET_HAS_movcond_i32      1
+#define TCG_TARGET_HAS_muls2_i32        0
 
 /* optional instructions automatically implemented */
 #define TCG_TARGET_HAS_neg_i32          0 /* sub rd, 0, rs */
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index 4f00171..2b08ef7 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -95,6 +95,7 @@ typedef enum {
 #define TCG_TARGET_HAS_add2_i32         1
 #define TCG_TARGET_HAS_sub2_i32         1
 #define TCG_TARGET_HAS_mulu2_i32        1
+#define TCG_TARGET_HAS_muls2_i32        0
 
 #if TCG_TARGET_REG_BITS == 64
 #define TCG_TARGET_HAS_div2_i64         1
@@ -120,6 +121,7 @@ typedef enum {
 #define TCG_TARGET_HAS_add2_i64         0
 #define TCG_TARGET_HAS_sub2_i64         0
 #define TCG_TARGET_HAS_mulu2_i64        0
+#define TCG_TARGET_HAS_muls2_i64        0
 #endif
 
 #define TCG_TARGET_deposit_i32_valid(ofs, len) \
diff --git a/tcg/ia64/tcg-target.h b/tcg/ia64/tcg-target.h
index 40f442e..e3d72ea 100644
--- a/tcg/ia64/tcg-target.h
+++ b/tcg/ia64/tcg-target.h
@@ -142,6 +142,8 @@ typedef enum {
 #define TCG_TARGET_HAS_sub2_i64         0
 #define TCG_TARGET_HAS_mulu2_i32        0
 #define TCG_TARGET_HAS_mulu2_i64        0
+#define TCG_TARGET_HAS_muls2_i32        0
+#define TCG_TARGET_HAS_muls2_i64        0
 
 #define TCG_TARGET_deposit_i32_valid(ofs, len) ((len) <= 16)
 #define TCG_TARGET_deposit_i64_valid(ofs, len) ((len) <= 16)
diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
index 78af664..0384bd3 100644
--- a/tcg/mips/tcg-target.h
+++ b/tcg/mips/tcg-target.h
@@ -87,6 +87,7 @@ typedef enum {
 #define TCG_TARGET_HAS_orc_i32          0
 #define TCG_TARGET_HAS_eqv_i32          0
 #define TCG_TARGET_HAS_nand_i32         0
+#define TCG_TARGET_HAS_muls2_i32        0
 
 /* optional instructions only implemented on MIPS4, MIPS32 and Loongson 2 */
 #if (defined(__mips_isa_rev) && (__mips_isa_rev >= 1)) || \
diff --git a/tcg/optimize.c b/tcg/optimize.c
index 027b3a5..bc6e5c1 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -559,6 +559,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             swap_commutative(args[1], &args[3], &args[5]);
             break;
         CASE_OP_32_64(mulu2):
+        CASE_OP_32_64(muls2):
             swap_commutative(args[0], &args[2], &args[3]);
             break;
         case INDEX_op_brcond2_i32:
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
index 0fdad04..17a6bb3 100644
--- a/tcg/ppc/tcg-target.h
+++ b/tcg/ppc/tcg-target.h
@@ -94,6 +94,7 @@ typedef enum {
 #define TCG_TARGET_HAS_nor_i32          1
 #define TCG_TARGET_HAS_deposit_i32      1
 #define TCG_TARGET_HAS_movcond_i32      1
+#define TCG_TARGET_HAS_muls2_i32        0
 
 #define TCG_AREG0 TCG_REG_R27
 
diff --git a/tcg/ppc64/tcg-target.h b/tcg/ppc64/tcg-target.h
index 86929c1..aa6a0f0 100644
--- a/tcg/ppc64/tcg-target.h
+++ b/tcg/ppc64/tcg-target.h
@@ -88,6 +88,7 @@ typedef enum {
 #define TCG_TARGET_HAS_add2_i32         0
 #define TCG_TARGET_HAS_sub2_i32         0
 #define TCG_TARGET_HAS_mulu2_i32        0
+#define TCG_TARGET_HAS_muls2_i32        0
 
 #define TCG_TARGET_HAS_div_i64          1
 #define TCG_TARGET_HAS_rot_i64          0
@@ -112,6 +113,7 @@ typedef enum {
 #define TCG_TARGET_HAS_add2_i64         0
 #define TCG_TARGET_HAS_sub2_i64         0
 #define TCG_TARGET_HAS_mulu2_i64        0
+#define TCG_TARGET_HAS_muls2_i64        0
 
 #define TCG_AREG0 TCG_REG_R27
 
diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h
index ee31c37..40211e6 100644
--- a/tcg/s390/tcg-target.h
+++ b/tcg/s390/tcg-target.h
@@ -68,6 +68,7 @@ typedef enum TCGReg {
 #define TCG_TARGET_HAS_add2_i32         0
 #define TCG_TARGET_HAS_sub2_i32         0
 #define TCG_TARGET_HAS_mulu2_i32        0
+#define TCG_TARGET_HAS_muls2_i32        0
 
 #if TCG_TARGET_REG_BITS == 64
 #define TCG_TARGET_HAS_div2_i64         1
@@ -93,6 +94,7 @@ typedef enum TCGReg {
 #define TCG_TARGET_HAS_add2_i64         0
 #define TCG_TARGET_HAS_sub2_i64         0
 #define TCG_TARGET_HAS_mulu2_i64        0
+#define TCG_TARGET_HAS_muls2_i64        0
 #endif
 
 /* used for function call generation */
diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h
index e440ad2..b5217be 100644
--- a/tcg/sparc/tcg-target.h
+++ b/tcg/sparc/tcg-target.h
@@ -105,6 +105,7 @@ typedef enum {
 #define TCG_TARGET_HAS_add2_i32         1
 #define TCG_TARGET_HAS_sub2_i32         1
 #define TCG_TARGET_HAS_mulu2_i32        1
+#define TCG_TARGET_HAS_muls2_i32        0
 
 #if TCG_TARGET_REG_BITS == 64
 #define TCG_TARGET_HAS_div_i64          1
@@ -130,6 +131,7 @@ typedef enum {
 #define TCG_TARGET_HAS_add2_i64         0
 #define TCG_TARGET_HAS_sub2_i64         0
 #define TCG_TARGET_HAS_mulu2_i64        0
+#define TCG_TARGET_HAS_muls2_i64        0
 #endif
 
 #define TCG_AREG0 TCG_REG_I0
diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h
index e93698e..4246e9c 100644
--- a/tcg/tcg-opc.h
+++ b/tcg/tcg-opc.h
@@ -86,6 +86,7 @@ DEF(brcond_i32, 0, 2, 2, TCG_OPF_BB_END)
 DEF(add2_i32, 2, 4, 0, IMPL(TCG_TARGET_HAS_add2_i32))
 DEF(sub2_i32, 2, 4, 0, IMPL(TCG_TARGET_HAS_sub2_i32))
 DEF(mulu2_i32, 2, 2, 0, IMPL(TCG_TARGET_HAS_mulu2_i32))
+DEF(muls2_i32, 2, 2, 0, IMPL(TCG_TARGET_HAS_muls2_i32))
 DEF(brcond2_i32, 0, 4, 2, TCG_OPF_BB_END | IMPL(TCG_TARGET_REG_BITS == 32))
 DEF(setcond2_i32, 1, 4, 1, IMPL(TCG_TARGET_REG_BITS == 32))
 
@@ -161,6 +162,7 @@ DEF(nor_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_nor_i64))
 DEF(add2_i64, 2, 4, 0, IMPL64 | IMPL(TCG_TARGET_HAS_add2_i64))
 DEF(sub2_i64, 2, 4, 0, IMPL64 | IMPL(TCG_TARGET_HAS_sub2_i64))
 DEF(mulu2_i64, 2, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_mulu2_i64))
+DEF(muls2_i64, 2, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_muls2_i64))
 
 /* QEMU specific */
 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
diff --git a/tcg/tcg.h b/tcg/tcg.h
index 255cbdb..b195396 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -83,6 +83,7 @@ typedef uint64_t TCGRegSet;
 #define TCG_TARGET_HAS_add2_i64         0
 #define TCG_TARGET_HAS_sub2_i64         0
 #define TCG_TARGET_HAS_mulu2_i64        0
+#define TCG_TARGET_HAS_muls2_i64        0
 /* Turn some undef macros into true macros.  */
 #define TCG_TARGET_HAS_add2_i32         1
 #define TCG_TARGET_HAS_sub2_i32         1
diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
index 5986da2..1f17576 100644
--- a/tcg/tci/tcg-target.h
+++ b/tcg/tci/tcg-target.h
@@ -76,6 +76,7 @@
 #define TCG_TARGET_HAS_orc_i32          0
 #define TCG_TARGET_HAS_rot_i32          1
 #define TCG_TARGET_HAS_movcond_i32      0
+#define TCG_TARGET_HAS_muls2_i32        0
 
 #if TCG_TARGET_REG_BITS == 64
 #define TCG_TARGET_HAS_bswap16_i64      1
@@ -100,6 +101,7 @@
 #define TCG_TARGET_HAS_orc_i64          0
 #define TCG_TARGET_HAS_rot_i64          1
 #define TCG_TARGET_HAS_movcond_i64      0
+#define TCG_TARGET_HAS_muls2_i64        0
 
 #define TCG_TARGET_HAS_add2_i32         0
 #define TCG_TARGET_HAS_sub2_i32         0
commit d7156f7ce4581c874df4a27409e7d99873faa413
Author: Richard Henderson <rth at twiddle.net>
Date:   Tue Feb 19 23:51:52 2013 -0800

    tcg: Add 64-bit multiword arithmetic operations
    
    Matching the 32-bit multiword arithmetic that we already have.
    
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/tcg/README b/tcg/README
index ec1ac79..89f0cdd 100644
--- a/tcg/README
+++ b/tcg/README
@@ -361,6 +361,20 @@ Write 8, 16, 32 or 64 bits to host memory.
 All this opcodes assume that the pointed host memory doesn't correspond
 to a global. In the latter case the behaviour is unpredictable.
 
+********* Multiword arithmetic support
+
+* add2_i32/i64 t0_low, t0_high, t1_low, t1_high, t2_low, t2_high
+* sub2_i32/i64 t0_low, t0_high, t1_low, t1_high, t2_low, t2_high
+
+Similar to add/sub, except that the double-word inputs T1 and T2 are
+formed from two single-word arguments, and the double-word output T0
+is returned in two single-word outputs.
+
+* mulu2_i32/i64 t0_low, t0_high, t1, t2
+
+Similar to mul, except two unsigned inputs T1 and T2 yielding the full
+double-word product T0.  The later is returned in two single-word outputs.
+
 ********* 64-bit target on 32-bit host support
 
 The following opcodes are internal to TCG.  Thus they are to be implemented by
@@ -372,18 +386,6 @@ They are emitted as needed by inline functions within "tcg-op.h".
 Similar to brcond, except that the 64-bit values T0 and T1
 are formed from two 32-bit arguments.
 
-* add2_i32 t0_low, t0_high, t1_low, t1_high, t2_low, t2_high
-* sub2_i32 t0_low, t0_high, t1_low, t1_high, t2_low, t2_high
-
-Similar to add/sub, except that the 64-bit inputs T1 and T2 are
-formed from two 32-bit arguments, and the 64-bit output T0
-is returned in two 32-bit outputs.
-
-* mulu2_i32 t0_low, t0_high, t1, t2
-
-Similar to mul, except two 32-bit (unsigned) inputs T1 and T2 yielding
-the full 64-bit product T0.  The later is returned in two 32-bit outputs.
-
 * setcond2_i32 dest, t1_low, t1_high, t2_low, t2_high, cond
 
 Similar to setcond, except that the 64-bit values T1 and T2 are
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index 487dc23..4f00171 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -117,6 +117,9 @@ typedef enum {
 #define TCG_TARGET_HAS_nor_i64          0
 #define TCG_TARGET_HAS_deposit_i64      1
 #define TCG_TARGET_HAS_movcond_i64      1
+#define TCG_TARGET_HAS_add2_i64         0
+#define TCG_TARGET_HAS_sub2_i64         0
+#define TCG_TARGET_HAS_mulu2_i64        0
 #endif
 
 #define TCG_TARGET_deposit_i32_valid(ofs, len) \
diff --git a/tcg/ia64/tcg-target.h b/tcg/ia64/tcg-target.h
index b4ff7c3..40f442e 100644
--- a/tcg/ia64/tcg-target.h
+++ b/tcg/ia64/tcg-target.h
@@ -137,8 +137,11 @@ typedef enum {
 #define TCG_TARGET_HAS_deposit_i32      1
 #define TCG_TARGET_HAS_deposit_i64      1
 #define TCG_TARGET_HAS_add2_i32         0
+#define TCG_TARGET_HAS_add2_i64         0
 #define TCG_TARGET_HAS_sub2_i32         0
+#define TCG_TARGET_HAS_sub2_i64         0
 #define TCG_TARGET_HAS_mulu2_i32        0
+#define TCG_TARGET_HAS_mulu2_i64        0
 
 #define TCG_TARGET_deposit_i32_valid(ofs, len) ((len) <= 16)
 #define TCG_TARGET_deposit_i64_valid(ofs, len) ((len) <= 16)
diff --git a/tcg/optimize.c b/tcg/optimize.c
index 973d2d6..027b3a5 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -554,11 +554,11 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                 args[5] = tcg_invert_cond(args[5]);
             }
             break;
-        case INDEX_op_add2_i32:
+        CASE_OP_32_64(add2):
             swap_commutative(args[0], &args[2], &args[4]);
             swap_commutative(args[1], &args[3], &args[5]);
             break;
-        case INDEX_op_mulu2_i32:
+        CASE_OP_32_64(mulu2):
             swap_commutative(args[0], &args[2], &args[3]);
             break;
         case INDEX_op_brcond2_i32:
diff --git a/tcg/ppc64/tcg-target.h b/tcg/ppc64/tcg-target.h
index ea976ad..86929c1 100644
--- a/tcg/ppc64/tcg-target.h
+++ b/tcg/ppc64/tcg-target.h
@@ -109,6 +109,9 @@ typedef enum {
 #define TCG_TARGET_HAS_nor_i64          0
 #define TCG_TARGET_HAS_deposit_i64      0
 #define TCG_TARGET_HAS_movcond_i64      0
+#define TCG_TARGET_HAS_add2_i64         0
+#define TCG_TARGET_HAS_sub2_i64         0
+#define TCG_TARGET_HAS_mulu2_i64        0
 
 #define TCG_AREG0 TCG_REG_R27
 
diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h
index 7772c35..ee31c37 100644
--- a/tcg/s390/tcg-target.h
+++ b/tcg/s390/tcg-target.h
@@ -90,6 +90,9 @@ typedef enum TCGReg {
 #define TCG_TARGET_HAS_nor_i64          0
 #define TCG_TARGET_HAS_deposit_i64      0
 #define TCG_TARGET_HAS_movcond_i64      0
+#define TCG_TARGET_HAS_add2_i64         0
+#define TCG_TARGET_HAS_sub2_i64         0
+#define TCG_TARGET_HAS_mulu2_i64        0
 #endif
 
 /* used for function call generation */
diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h
index 8446721..e440ad2 100644
--- a/tcg/sparc/tcg-target.h
+++ b/tcg/sparc/tcg-target.h
@@ -127,6 +127,9 @@ typedef enum {
 #define TCG_TARGET_HAS_nor_i64          0
 #define TCG_TARGET_HAS_deposit_i64      0
 #define TCG_TARGET_HAS_movcond_i64      1
+#define TCG_TARGET_HAS_add2_i64         0
+#define TCG_TARGET_HAS_sub2_i64         0
+#define TCG_TARGET_HAS_mulu2_i64        0
 #endif
 
 #define TCG_AREG0 TCG_REG_I0
diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h
index 1d9a9a2..e93698e 100644
--- a/tcg/tcg-opc.h
+++ b/tcg/tcg-opc.h
@@ -158,6 +158,10 @@ DEF(eqv_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_eqv_i64))
 DEF(nand_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_nand_i64))
 DEF(nor_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_nor_i64))
 
+DEF(add2_i64, 2, 4, 0, IMPL64 | IMPL(TCG_TARGET_HAS_add2_i64))
+DEF(sub2_i64, 2, 4, 0, IMPL64 | IMPL(TCG_TARGET_HAS_sub2_i64))
+DEF(mulu2_i64, 2, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_mulu2_i64))
+
 /* QEMU specific */
 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
 DEF(debug_insn_start, 0, 0, 2, 0)
diff --git a/tcg/tcg.h b/tcg/tcg.h
index e5c7ce4..255cbdb 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -80,6 +80,9 @@ typedef uint64_t TCGRegSet;
 #define TCG_TARGET_HAS_nor_i64          0
 #define TCG_TARGET_HAS_deposit_i64      0
 #define TCG_TARGET_HAS_movcond_i64      0
+#define TCG_TARGET_HAS_add2_i64         0
+#define TCG_TARGET_HAS_sub2_i64         0
+#define TCG_TARGET_HAS_mulu2_i64        0
 /* Turn some undef macros into true macros.  */
 #define TCG_TARGET_HAS_add2_i32         1
 #define TCG_TARGET_HAS_sub2_i32         1
diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
index 3e235bd..5986da2 100644
--- a/tcg/tci/tcg-target.h
+++ b/tcg/tci/tcg-target.h
@@ -104,6 +104,9 @@
 #define TCG_TARGET_HAS_add2_i32         0
 #define TCG_TARGET_HAS_sub2_i32         0
 #define TCG_TARGET_HAS_mulu2_i32        0
+#define TCG_TARGET_HAS_add2_i64         0
+#define TCG_TARGET_HAS_sub2_i64         0
+#define TCG_TARGET_HAS_mulu2_i64        0
 #endif /* TCG_TARGET_REG_BITS == 64 */
 
 /* Number of registers available.
commit 803d805bcef4ea7b7d6ef0b4929263e1160d6b3c
Author: Richard Henderson <rth at twiddle.net>
Date:   Tue Feb 19 23:51:51 2013 -0800

    tcg-sparc: Always implement 32-bit multiword ops
    
    Cc: Blue Swirl <blauwirbel at gmail.com>
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/tcg/sparc/tcg-target.c b/tcg/sparc/tcg-target.c
index 03db514..6d489fc 100644
--- a/tcg/sparc/tcg-target.c
+++ b/tcg/sparc/tcg-target.c
@@ -1327,6 +1327,8 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
                              args[3], const_args[3],
                              args[4], const_args[4]);
         break;
+#endif
+
     case INDEX_op_add2_i32:
         tcg_out_addsub2(s, args[0], args[1], args[2], args[3],
                         args[4], const_args[4], args[5], const_args[5],
@@ -1342,7 +1344,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
                        ARITH_UMUL);
         tcg_out_rdy(s, args[1]);
         break;
-#endif
 
     case INDEX_op_qemu_ld8u:
         tcg_out_qemu_ld(s, args, 0);
@@ -1511,10 +1512,11 @@ static const TCGTargetOpDef sparc_op_defs[] = {
 #if TCG_TARGET_REG_BITS == 32
     { INDEX_op_brcond2_i32, { "rZ", "rZ", "rJ", "rJ" } },
     { INDEX_op_setcond2_i32, { "r", "rZ", "rZ", "rJ", "rJ" } },
+#endif
+
     { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rJ", "rJ" } },
     { INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rJ", "rJ" } },
     { INDEX_op_mulu2_i32, { "r", "r", "rZ", "rJ" } },
-#endif
 
 #if TCG_TARGET_REG_BITS == 64
     { INDEX_op_mov_i64, { "r", "r" } },
diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h
index 6c62e45..8446721 100644
--- a/tcg/sparc/tcg-target.h
+++ b/tcg/sparc/tcg-target.h
@@ -102,6 +102,9 @@ typedef enum {
 #define TCG_TARGET_HAS_nor_i32          0
 #define TCG_TARGET_HAS_deposit_i32      0
 #define TCG_TARGET_HAS_movcond_i32      1
+#define TCG_TARGET_HAS_add2_i32         1
+#define TCG_TARGET_HAS_sub2_i32         1
+#define TCG_TARGET_HAS_mulu2_i32        1
 
 #if TCG_TARGET_REG_BITS == 64
 #define TCG_TARGET_HAS_div_i64          1
@@ -124,10 +127,6 @@ typedef enum {
 #define TCG_TARGET_HAS_nor_i64          0
 #define TCG_TARGET_HAS_deposit_i64      0
 #define TCG_TARGET_HAS_movcond_i64      1
-
-#define TCG_TARGET_HAS_add2_i32         0
-#define TCG_TARGET_HAS_sub2_i32         0
-#define TCG_TARGET_HAS_mulu2_i32        0
 #endif
 
 #define TCG_AREG0 TCG_REG_I0
commit bbc863bfecfb3e3a3e21ce569e25046e24c0487c
Author: Richard Henderson <rth at twiddle.net>
Date:   Tue Feb 19 23:51:50 2013 -0800

    tcg-i386: Always implement 32-bit multiword ops
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index 7aec304..f645529 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -1922,13 +1922,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         tcg_out_qemu_st(s, args, 3);
         break;
 
-#if TCG_TARGET_REG_BITS == 32
-    case INDEX_op_brcond2_i32:
-        tcg_out_brcond2(s, args, const_args, 0);
-        break;
-    case INDEX_op_setcond2_i32:
-        tcg_out_setcond2(s, args, const_args);
-        break;
     case INDEX_op_mulu2_i32:
         tcg_out_modrm(s, OPC_GRP3_Ev, EXT3_MUL, args[3]);
         break;
@@ -1956,6 +1949,14 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
             tgen_arithr(s, ARITH_SBB, args[1], args[5]);
         }
         break;
+
+#if TCG_TARGET_REG_BITS == 32
+    case INDEX_op_brcond2_i32:
+        tcg_out_brcond2(s, args, const_args, 0);
+        break;
+    case INDEX_op_setcond2_i32:
+        tcg_out_setcond2(s, args, const_args);
+        break;
 #else /* TCG_TARGET_REG_BITS == 64 */
     case INDEX_op_movi_i64:
         tcg_out_movi(s, TCG_TYPE_I64, args[0], args[1]);
@@ -2078,10 +2079,11 @@ static const TCGTargetOpDef x86_op_defs[] = {
     { INDEX_op_movcond_i32, { "r", "r", "ri", "r", "0" } },
 #endif
 
-#if TCG_TARGET_REG_BITS == 32
     { INDEX_op_mulu2_i32, { "a", "d", "a", "r" } },
     { INDEX_op_add2_i32, { "r", "r", "0", "1", "ri", "ri" } },
     { INDEX_op_sub2_i32, { "r", "r", "0", "1", "ri", "ri" } },
+
+#if TCG_TARGET_REG_BITS == 32
     { INDEX_op_brcond2_i32, { "r", "r", "ri", "ri" } },
     { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } },
 #else
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index 43ad2c4..487dc23 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -92,6 +92,9 @@ typedef enum {
 #define TCG_TARGET_HAS_nor_i32          0
 #define TCG_TARGET_HAS_deposit_i32      1
 #define TCG_TARGET_HAS_movcond_i32      1
+#define TCG_TARGET_HAS_add2_i32         1
+#define TCG_TARGET_HAS_sub2_i32         1
+#define TCG_TARGET_HAS_mulu2_i32        1
 
 #if TCG_TARGET_REG_BITS == 64
 #define TCG_TARGET_HAS_div2_i64         1
@@ -114,10 +117,6 @@ typedef enum {
 #define TCG_TARGET_HAS_nor_i64          0
 #define TCG_TARGET_HAS_deposit_i64      1
 #define TCG_TARGET_HAS_movcond_i64      1
-
-#define TCG_TARGET_HAS_add2_i32         0
-#define TCG_TARGET_HAS_sub2_i32         0
-#define TCG_TARGET_HAS_mulu2_i32        0
 #endif
 
 #define TCG_TARGET_deposit_i32_valid(ofs, len) \
commit e6a72734549bd05d06d19957518811c24a6cbee4
Author: Richard Henderson <rth at twiddle.net>
Date:   Tue Feb 19 23:51:49 2013 -0800

    tcg: Make 32-bit multiword operations optional for 64-bit hosts
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index e63db9c..43ad2c4 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -114,6 +114,10 @@ typedef enum {
 #define TCG_TARGET_HAS_nor_i64          0
 #define TCG_TARGET_HAS_deposit_i64      1
 #define TCG_TARGET_HAS_movcond_i64      1
+
+#define TCG_TARGET_HAS_add2_i32         0
+#define TCG_TARGET_HAS_sub2_i32         0
+#define TCG_TARGET_HAS_mulu2_i32        0
 #endif
 
 #define TCG_TARGET_deposit_i32_valid(ofs, len) \
diff --git a/tcg/ia64/tcg-target.h b/tcg/ia64/tcg-target.h
index 7f3401e..b4ff7c3 100644
--- a/tcg/ia64/tcg-target.h
+++ b/tcg/ia64/tcg-target.h
@@ -136,6 +136,9 @@ typedef enum {
 #define TCG_TARGET_HAS_movcond_i64      1
 #define TCG_TARGET_HAS_deposit_i32      1
 #define TCG_TARGET_HAS_deposit_i64      1
+#define TCG_TARGET_HAS_add2_i32         0
+#define TCG_TARGET_HAS_sub2_i32         0
+#define TCG_TARGET_HAS_mulu2_i32        0
 
 #define TCG_TARGET_deposit_i32_valid(ofs, len) ((len) <= 16)
 #define TCG_TARGET_deposit_i64_valid(ofs, len) ((len) <= 16)
diff --git a/tcg/ppc64/tcg-target.h b/tcg/ppc64/tcg-target.h
index 9b8e9a0..ea976ad 100644
--- a/tcg/ppc64/tcg-target.h
+++ b/tcg/ppc64/tcg-target.h
@@ -85,6 +85,9 @@ typedef enum {
 #define TCG_TARGET_HAS_nor_i32          0
 #define TCG_TARGET_HAS_deposit_i32      0
 #define TCG_TARGET_HAS_movcond_i32      0
+#define TCG_TARGET_HAS_add2_i32         0
+#define TCG_TARGET_HAS_sub2_i32         0
+#define TCG_TARGET_HAS_mulu2_i32        0
 
 #define TCG_TARGET_HAS_div_i64          1
 #define TCG_TARGET_HAS_rot_i64          0
diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h
index c87b413..7772c35 100644
--- a/tcg/s390/tcg-target.h
+++ b/tcg/s390/tcg-target.h
@@ -65,6 +65,9 @@ typedef enum TCGReg {
 #define TCG_TARGET_HAS_nor_i32          0
 #define TCG_TARGET_HAS_deposit_i32      0
 #define TCG_TARGET_HAS_movcond_i32      0
+#define TCG_TARGET_HAS_add2_i32         0
+#define TCG_TARGET_HAS_sub2_i32         0
+#define TCG_TARGET_HAS_mulu2_i32        0
 
 #if TCG_TARGET_REG_BITS == 64
 #define TCG_TARGET_HAS_div2_i64         1
diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h
index 256f973..6c62e45 100644
--- a/tcg/sparc/tcg-target.h
+++ b/tcg/sparc/tcg-target.h
@@ -124,6 +124,10 @@ typedef enum {
 #define TCG_TARGET_HAS_nor_i64          0
 #define TCG_TARGET_HAS_deposit_i64      0
 #define TCG_TARGET_HAS_movcond_i64      1
+
+#define TCG_TARGET_HAS_add2_i32         0
+#define TCG_TARGET_HAS_sub2_i32         0
+#define TCG_TARGET_HAS_mulu2_i32        0
 #endif
 
 #define TCG_AREG0 TCG_REG_I0
diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h
index 9651063..1d9a9a2 100644
--- a/tcg/tcg-opc.h
+++ b/tcg/tcg-opc.h
@@ -83,10 +83,10 @@ DEF(deposit_i32, 1, 2, 2, IMPL(TCG_TARGET_HAS_deposit_i32))
 
 DEF(brcond_i32, 0, 2, 2, TCG_OPF_BB_END)
 
-DEF(add2_i32, 2, 4, 0, IMPL(TCG_TARGET_REG_BITS == 32))
-DEF(sub2_i32, 2, 4, 0, IMPL(TCG_TARGET_REG_BITS == 32))
+DEF(add2_i32, 2, 4, 0, IMPL(TCG_TARGET_HAS_add2_i32))
+DEF(sub2_i32, 2, 4, 0, IMPL(TCG_TARGET_HAS_sub2_i32))
+DEF(mulu2_i32, 2, 2, 0, IMPL(TCG_TARGET_HAS_mulu2_i32))
 DEF(brcond2_i32, 0, 4, 2, TCG_OPF_BB_END | IMPL(TCG_TARGET_REG_BITS == 32))
-DEF(mulu2_i32, 2, 2, 0, IMPL(TCG_TARGET_REG_BITS == 32))
 DEF(setcond2_i32, 1, 4, 1, IMPL(TCG_TARGET_REG_BITS == 32))
 
 DEF(ext8s_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ext8s_i32))
diff --git a/tcg/tcg.h b/tcg/tcg.h
index 51c8176..e5c7ce4 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -57,8 +57,8 @@ typedef uint64_t TCGRegSet;
 #error unsupported
 #endif
 
-/* Turn some undef macros into false macros.  */
 #if TCG_TARGET_REG_BITS == 32
+/* Turn some undef macros into false macros.  */
 #define TCG_TARGET_HAS_div_i64          0
 #define TCG_TARGET_HAS_div2_i64         0
 #define TCG_TARGET_HAS_rot_i64          0
@@ -80,6 +80,10 @@ typedef uint64_t TCGRegSet;
 #define TCG_TARGET_HAS_nor_i64          0
 #define TCG_TARGET_HAS_deposit_i64      0
 #define TCG_TARGET_HAS_movcond_i64      0
+/* Turn some undef macros into true macros.  */
+#define TCG_TARGET_HAS_add2_i32         1
+#define TCG_TARGET_HAS_sub2_i32         1
+#define TCG_TARGET_HAS_mulu2_i32        1
 #endif
 
 #ifndef TCG_TARGET_deposit_i32_valid
diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
index a832f5c..3e235bd 100644
--- a/tcg/tci/tcg-target.h
+++ b/tcg/tci/tcg-target.h
@@ -100,6 +100,10 @@
 #define TCG_TARGET_HAS_orc_i64          0
 #define TCG_TARGET_HAS_rot_i64          1
 #define TCG_TARGET_HAS_movcond_i64      0
+
+#define TCG_TARGET_HAS_add2_i32         0
+#define TCG_TARGET_HAS_sub2_i32         0
+#define TCG_TARGET_HAS_mulu2_i32        0
 #endif /* TCG_TARGET_REG_BITS == 64 */
 
 /* Number of registers available.
commit f708e736d0dafc05f8b7e9e73d6440c930b94686
Merge: 6ab7e54 f437d0a
Author: Blue Swirl <blauwirbel at gmail.com>
Date:   Sat Feb 23 17:21:41 2013 +0000

    Merge branch 'eflags3' of git://github.com/rth7680/qemu
    
    * 'eflags3' of git://github.com/rth7680/qemu: (61 commits)
      target-i386: Use movcond to implement shiftd.
      target-i386: Discard CC_OP computation in set_cc_op also
      target-i386: Use movcond to implement rotate flags.
      target-i386: Use movcond to implement shift flags.
      target-i386: Add CC_OP_CLR
      target-i386: Implement tzcnt and fix lzcnt
      target-i386: Use clz/ctz for bsf/bsr helpers
      target-i386: Implement ADX extension
      target-i386: Implement RORX
      target-i386: Implement SHLX, SARX, SHRX
      target-i386: Implement PDEP, PEXT
      target-i386: Implement MULX
      target-i386: Implement BZHI
      target-i386: Implement BLSR, BLSMSK, BLSI
      target-i386: Implement BEXTR
      target-i386: Implement ANDN
      target-i386: Implement MOVBE
      target-i386: Decode the VEX prefixes
      target-i386: Tidy prefix parsing
      target-i386: Use CC_SRC2 for ADC and SBB
      ...

commit 6ab7e5465a4d6188e29398fb43a30dbab1015b75
Author: Peter Maydell <peter.maydell at linaro.org>
Date:   Wed Feb 20 15:21:09 2013 +0000

    Replace all setjmp()/longjmp() with sigsetjmp()/siglongjmp()
    
    The setjmp() function doesn't specify whether signal masks are saved and
    restored; on Linux they are not, but on BSD (including MacOSX) they are.
    We want to have consistent behaviour across platforms, so we should
    always use "don't save/restore signal mask" (this is also generally
    going to be faster). This also works around a bug in MacOSX where the
    signal-restoration on longjmp() affects the signal mask for a completely
    different thread, not just the mask for the thread which did the longjmp.
    The most visible effect of this was that ctrl-C was ignored on MacOSX
    because the CPU thread did a longjmp which resulted in its signal mask
    being applied to every thread, so that all threads had SIGINT and SIGTERM
    blocked.
    
    The POSIX-sanctioned portable way to do a jump without affecting signal
    masks is to siglongjmp() to a sigjmp_buf which was created by calling
    sigsetjmp() with a zero savemask parameter, so change all uses of
    setjmp()/longjmp() accordingly. [Technically POSIX allows sigsetjmp(buf, 0)
    to save the signal mask; however the following siglongjmp() must not
    restore the signal mask, so the pair can be effectively considered as
    "sigjmp/longjmp which don't touch the mask".]
    
    For Windows we provide a trivial sigsetjmp/siglongjmp in terms of
    setjmp/longjmp -- this is OK because no user will ever pass a non-zero
    savemask.
    
    The setjmp() uses in tests/tcg/test-i386.c and tests/tcg/linux-test.c
    are left untouched because these are self-contained singlethreaded
    test programs intended to be run under QEMU's Linux emulation, so they
    have neither the portability nor the multithreading issues to deal with.
    
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>
    Reviewed-by: Richard Henderson <rth at twiddle.net>
    Tested-by: Stefan Weil <sw at weilnetz.de>
    Reviewed-by: Laszlo Ersek <lersek at redhat.com>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/coroutine-sigaltstack.c b/coroutine-sigaltstack.c
index e37ebac..1fb41c9 100644
--- a/coroutine-sigaltstack.c
+++ b/coroutine-sigaltstack.c
@@ -45,7 +45,7 @@ static unsigned int pool_size;
 typedef struct {
     Coroutine base;
     void *stack;
-    jmp_buf env;
+    sigjmp_buf env;
 } CoroutineUContext;
 
 /**
@@ -59,7 +59,7 @@ typedef struct {
     CoroutineUContext leader;
 
     /** Information for the signal handler (trampoline) */
-    jmp_buf tr_reenter;
+    sigjmp_buf tr_reenter;
     volatile sig_atomic_t tr_called;
     void *tr_handler;
 } CoroutineThreadState;
@@ -115,8 +115,8 @@ static void __attribute__((constructor)) coroutine_init(void)
 static void coroutine_bootstrap(CoroutineUContext *self, Coroutine *co)
 {
     /* Initialize longjmp environment and switch back the caller */
-    if (!setjmp(self->env)) {
-        longjmp(*(jmp_buf *)co->entry_arg, 1);
+    if (!sigsetjmp(self->env, 0)) {
+        siglongjmp(*(sigjmp_buf *)co->entry_arg, 1);
     }
 
     while (true) {
@@ -145,14 +145,14 @@ static void coroutine_trampoline(int signal)
     /*
      * Here we have to do a bit of a ping pong between the caller, given that
      * this is a signal handler and we have to do a return "soon". Then the
-     * caller can reestablish everything and do a longjmp here again.
+     * caller can reestablish everything and do a siglongjmp here again.
      */
-    if (!setjmp(coTS->tr_reenter)) {
+    if (!sigsetjmp(coTS->tr_reenter, 0)) {
         return;
     }
 
     /*
-     * Ok, the caller has longjmp'ed back to us, so now prepare
+     * Ok, the caller has siglongjmp'ed back to us, so now prepare
      * us for the real machine state switching. We have to jump
      * into another function here to get a new stack context for
      * the auto variables (which have to be auto-variables
@@ -179,7 +179,7 @@ static Coroutine *coroutine_new(void)
 
     /* The way to manipulate stack is with the sigaltstack function. We
      * prepare a stack, with it delivering a signal to ourselves and then
-     * put setjmp/longjmp where needed.
+     * put sigsetjmp/siglongjmp where needed.
      * This has been done keeping coroutine-ucontext as a model and with the
      * pth ideas (GNU Portable Threads). See coroutine-ucontext for the basics
      * of the coroutines and see pth_mctx.c (from the pth project) for the
@@ -220,7 +220,7 @@ static Coroutine *coroutine_new(void)
 
     /*
      * Now transfer control onto the signal stack and set it up.
-     * It will return immediately via "return" after the setjmp()
+     * It will return immediately via "return" after the sigsetjmp()
      * was performed. Be careful here with race conditions.  The
      * signal can be delivered the first time sigsuspend() is
      * called.
@@ -261,8 +261,8 @@ static Coroutine *coroutine_new(void)
      * type-conversion warnings related to the `volatile' qualifier and
      * the fact that `jmp_buf' usually is an array type.
      */
-    if (!setjmp(old_env)) {
-        longjmp(coTS->tr_reenter, 1);
+    if (!sigsetjmp(old_env, 0)) {
+        siglongjmp(coTS->tr_reenter, 1);
     }
 
     /*
@@ -311,9 +311,9 @@ CoroutineAction qemu_coroutine_switch(Coroutine *from_, Coroutine *to_,
 
     s->current = to_;
 
-    ret = setjmp(from->env);
+    ret = sigsetjmp(from->env, 0);
     if (ret == 0) {
-        longjmp(to->env, action);
+        siglongjmp(to->env, action);
     }
     return ret;
 }
diff --git a/coroutine-ucontext.c b/coroutine-ucontext.c
index a9c30e9..bd20e38 100644
--- a/coroutine-ucontext.c
+++ b/coroutine-ucontext.c
@@ -46,7 +46,7 @@ static unsigned int pool_size;
 typedef struct {
     Coroutine base;
     void *stack;
-    jmp_buf env;
+    sigjmp_buf env;
 
 #ifdef CONFIG_VALGRIND_H
     unsigned int valgrind_stack_id;
@@ -130,8 +130,8 @@ static void coroutine_trampoline(int i0, int i1)
     co = &self->base;
 
     /* Initialize longjmp environment and switch back the caller */
-    if (!setjmp(self->env)) {
-        longjmp(*(jmp_buf *)co->entry_arg, 1);
+    if (!sigsetjmp(self->env, 0)) {
+        siglongjmp(*(sigjmp_buf *)co->entry_arg, 1);
     }
 
     while (true) {
@@ -145,14 +145,15 @@ static Coroutine *coroutine_new(void)
     const size_t stack_size = 1 << 20;
     CoroutineUContext *co;
     ucontext_t old_uc, uc;
-    jmp_buf old_env;
+    sigjmp_buf old_env;
     union cc_arg arg = {0};
 
-    /* The ucontext functions preserve signal masks which incurs a system call
-     * overhead.  setjmp()/longjmp() does not preserve signal masks but only
-     * works on the current stack.  Since we need a way to create and switch to
-     * a new stack, use the ucontext functions for that but setjmp()/longjmp()
-     * for everything else.
+    /* The ucontext functions preserve signal masks which incurs a
+     * system call overhead.  sigsetjmp(buf, 0)/siglongjmp() does not
+     * preserve signal masks but only works on the current stack.
+     * Since we need a way to create and switch to a new stack, use
+     * the ucontext functions for that but sigsetjmp()/siglongjmp() for
+     * everything else.
      */
 
     if (getcontext(&uc) == -1) {
@@ -178,8 +179,8 @@ static Coroutine *coroutine_new(void)
     makecontext(&uc, (void (*)(void))coroutine_trampoline,
                 2, arg.i[0], arg.i[1]);
 
-    /* swapcontext() in, longjmp() back out */
-    if (!setjmp(old_env)) {
+    /* swapcontext() in, siglongjmp() back out */
+    if (!sigsetjmp(old_env, 0)) {
         swapcontext(&old_uc, &uc);
     }
     return &co->base;
@@ -242,9 +243,9 @@ CoroutineAction qemu_coroutine_switch(Coroutine *from_, Coroutine *to_,
 
     s->current = to_;
 
-    ret = setjmp(from->env);
+    ret = sigsetjmp(from->env, 0);
     if (ret == 0) {
-        longjmp(to->env, action);
+        siglongjmp(to->env, action);
     }
     return ret;
 }
diff --git a/cpu-exec.c b/cpu-exec.c
index 9fcfe9e..afbe497 100644
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -35,7 +35,7 @@ void cpu_loop_exit(CPUArchState *env)
     CPUState *cpu = ENV_GET_CPU(env);
 
     cpu->current_tb = NULL;
-    longjmp(env->jmp_env, 1);
+    siglongjmp(env->jmp_env, 1);
 }
 
 /* exit the current TB from a signal handler. The host registers are
@@ -47,7 +47,7 @@ void cpu_resume_from_signal(CPUArchState *env, void *puc)
     /* XXX: restore cpu registers saved in host registers */
 
     env->exception_index = -1;
-    longjmp(env->jmp_env, 1);
+    siglongjmp(env->jmp_env, 1);
 }
 #endif
 
@@ -234,7 +234,7 @@ int cpu_exec(CPUArchState *env)
 
     /* prepare setjmp context for exception handling */
     for(;;) {
-        if (setjmp(env->jmp_env) == 0) {
+        if (sigsetjmp(env->jmp_env, 0) == 0) {
             /* if an exception is pending, we execute it here */
             if (env->exception_index >= 0) {
                 if (env->exception_index >= EXCP_INTERRUPT) {
diff --git a/disas/i386.c b/disas/i386.c
index dbecf1f..73cc06f 100644
--- a/disas/i386.c
+++ b/disas/i386.c
@@ -226,7 +226,7 @@ struct dis_private {
   bfd_byte the_buffer[MAX_MNEM_SIZE];
   bfd_vma insn_start;
   int orig_sizeflag;
-  jmp_buf bailout;
+  sigjmp_buf bailout;
 };
 
 enum address_mode
@@ -303,7 +303,7 @@ fetch_data2(struct disassemble_info *info, bfd_byte *addr)
 	 STATUS.  */
       if (priv->max_fetched == priv->the_buffer)
 	(*info->memory_error_func) (status, start, info);
-      longjmp (priv->bailout, 1);
+      siglongjmp(priv->bailout, 1);
     }
   else
     priv->max_fetched = addr;
@@ -3661,7 +3661,7 @@ print_insn (bfd_vma pc, disassemble_info *info)
   start_codep = priv.the_buffer;
   codep = priv.the_buffer;
 
-  if (setjmp (priv.bailout) != 0)
+  if (sigsetjmp(priv.bailout, 0) != 0)
     {
       const char *name;
 
diff --git a/disas/m68k.c b/disas/m68k.c
index c950241..cc0db96 100644
--- a/disas/m68k.c
+++ b/disas/m68k.c
@@ -624,7 +624,7 @@ struct private
   bfd_byte *max_fetched;
   bfd_byte the_buffer[MAXLEN];
   bfd_vma insn_start;
-  jmp_buf bailout;
+  sigjmp_buf bailout;
 };
 
 /* Make sure that bytes from INFO->PRIVATE_DATA->BUFFER (inclusive)
@@ -644,7 +644,7 @@ fetch_data2(struct disassemble_info *info, bfd_byte *addr)
   if (status != 0)
     {
       (*info->memory_error_func) (status, start, info);
-      longjmp (priv->bailout, 1);
+      siglongjmp(priv->bailout, 1);
     }
   else
     priv->max_fetched = addr;
@@ -1912,9 +1912,10 @@ print_insn_m68k (bfd_vma memaddr, disassemble_info *info)
   priv.max_fetched = priv.the_buffer;
   priv.insn_start = memaddr;
 
-  if (setjmp (priv.bailout) != 0)
-    /* Error return.  */
-    return -1;
+  if (sigsetjmp(priv.bailout, 0) != 0) {
+      /* Error return.  */
+      return -1;
+  }
 
   switch (info->mach)
     {
diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h
index ae64590..3dc9656 100644
--- a/include/exec/cpu-defs.h
+++ b/include/exec/cpu-defs.h
@@ -184,7 +184,7 @@ typedef struct CPUWatchpoint {
     struct GDBRegisterState *gdb_regs;                                  \
                                                                         \
     /* Core interrupt code */                                           \
-    jmp_buf jmp_env;                                                    \
+    sigjmp_buf jmp_env;                                                 \
     int exception_index;                                                \
                                                                         \
     CPUArchState *next_cpu; /* next CPU sharing TB cache */                 \
diff --git a/include/sysemu/os-win32.h b/include/sysemu/os-win32.h
index bf9edeb..71f5fa0 100644
--- a/include/sysemu/os-win32.h
+++ b/include/sysemu/os-win32.h
@@ -63,6 +63,14 @@
 # undef setjmp
 # define setjmp(env) _setjmp(env, NULL)
 #endif
+/* QEMU uses sigsetjmp()/siglongjmp() as the portable way to specify
+ * "longjmp and don't touch the signal masks". Since we know that the
+ * savemask parameter will always be zero we can safely define these
+ * in terms of setjmp/longjmp on Win32.
+ */
+#define sigjmp_buf jmp_buf
+#define sigsetjmp(env, savemask) setjmp(env)
+#define siglongjmp(env, val) longjmp(env, val)
 
 /* Declaration of ffs() is missing in MinGW's strings.h. */
 int ffs(int i);
diff --git a/monitor.c b/monitor.c
index 6a0f257..32a6e74 100644
--- a/monitor.c
+++ b/monitor.c
@@ -2740,7 +2740,7 @@ static const mon_cmd_t qmp_cmds[] = {
 /*******************************************************************/
 
 static const char *pch;
-static jmp_buf expr_env;
+static sigjmp_buf expr_env;
 
 #define MD_TLONG 0
 #define MD_I32   1
@@ -3135,7 +3135,7 @@ static const MonitorDef monitor_defs[] = {
 static void expr_error(Monitor *mon, const char *msg)
 {
     monitor_printf(mon, "%s\n", msg);
-    longjmp(expr_env, 1);
+    siglongjmp(expr_env, 1);
 }
 
 /* return 0 if OK, -1 if not found */
@@ -3345,7 +3345,7 @@ static int64_t expr_sum(Monitor *mon)
 static int get_expr(Monitor *mon, int64_t *pval, const char **pp)
 {
     pch = *pp;
-    if (setjmp(expr_env)) {
+    if (sigsetjmp(expr_env, 0)) {
         *pp = pch;
         return -1;
     }
diff --git a/user-exec.c b/user-exec.c
index c71acbc..71bd6c5 100644
--- a/user-exec.c
+++ b/user-exec.c
@@ -70,7 +70,7 @@ void cpu_resume_from_signal(CPUArchState *env1, void *puc)
 #endif
     }
     env1->exception_index = -1;
-    longjmp(env1->jmp_env, 1);
+    siglongjmp(env1->jmp_env, 1);
 }
 
 /* 'pc' is the host PC at which the exception was raised. 'address' is
commit d1c36ba707637173b818652e51181370d51b6c58
Author: Ronald Hecht <ronald.hecht at gmx.de>
Date:   Tue Feb 19 12:45:07 2013 +0100

    SPARC LEON power-down support added
    
    Signed-off-by: Ronald Hecht <address at hidden>
    Signed-off-by: Fabien Chouteau <chouteau at adacore.com>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/target-sparc/cpu.c b/target-sparc/cpu.c
index cc1453e..50def61 100644
--- a/target-sparc/cpu.c
+++ b/target-sparc/cpu.c
@@ -586,7 +586,7 @@ static const sparc_def_t sparc_defs[] = {
         .mmu_trcr_mask = 0xffffffff,
         .nwindows = 8,
         .features = CPU_DEFAULT_FEATURES | CPU_FEATURE_TA0_SHUTDOWN |
-        CPU_FEATURE_ASR17 | CPU_FEATURE_CACHE_CTRL,
+        CPU_FEATURE_ASR17 | CPU_FEATURE_CACHE_CTRL | CPU_FEATURE_POWERDOWN,
     },
 #endif
 };
diff --git a/target-sparc/cpu.h b/target-sparc/cpu.h
index 7389b03..a2f2cc8 100644
--- a/target-sparc/cpu.h
+++ b/target-sparc/cpu.h
@@ -270,6 +270,7 @@ typedef struct sparc_def_t {
 #define CPU_FEATURE_TA0_SHUTDOWN (1 << 14) /* Shutdown on "ta 0x0" */
 #define CPU_FEATURE_ASR17        (1 << 15)
 #define CPU_FEATURE_CACHE_CTRL   (1 << 16)
+#define CPU_FEATURE_POWERDOWN    (1 << 17)
 
 #ifndef TARGET_SPARC64
 #define CPU_DEFAULT_FEATURES (CPU_FEATURE_FLOAT | CPU_FEATURE_SWAP |  \
diff --git a/target-sparc/helper.c b/target-sparc/helper.c
index 91ecfc7..58e7efe 100644
--- a/target-sparc/helper.c
+++ b/target-sparc/helper.c
@@ -225,3 +225,14 @@ target_ulong helper_tsubcctv(CPUSPARCState *env, target_ulong src1,
     cpu_restore_state(env, GETPC());
     helper_raise_exception(env, TT_TOVF);
 }
+
+#ifndef TARGET_SPARC64
+void helper_power_down(CPUSPARCState *env)
+{
+    env->halted = 1;
+    env->exception_index = EXCP_HLT;
+    env->pc = env->npc;
+    env->npc = env->pc + 4;
+    cpu_loop_exit(env);
+}
+#endif
diff --git a/target-sparc/helper.h b/target-sparc/helper.h
index cfcdab1..15f7328 100644
--- a/target-sparc/helper.h
+++ b/target-sparc/helper.h
@@ -4,6 +4,7 @@
 DEF_HELPER_1(rett, void, env)
 DEF_HELPER_2(wrpsr, void, env, tl)
 DEF_HELPER_1(rdpsr, tl, env)
+DEF_HELPER_1(power_down, void, env)
 #else
 DEF_HELPER_2(wrpil, void, env, tl)
 DEF_HELPER_2(wrpstate, void, env, tl)
diff --git a/target-sparc/translate.c b/target-sparc/translate.c
index ca75e1a..26c2056 100644
--- a/target-sparc/translate.c
+++ b/target-sparc/translate.c
@@ -3642,6 +3642,11 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                                                    in the SPARCv8
                                                    manual, nop on the
                                                    microSPARC II */
+                                if ((rd == 0x13) && (dc->def->features &
+                                                     CPU_FEATURE_POWERDOWN)) {
+                                    /* LEON3 power-down */
+                                    gen_helper_power_down(cpu_env);
+                                }
                                 break;
 #else
                             case 0x2: /* V9 wrccr */
commit 7a0a9c2c64be242d5953d5ce6172976b05f6c14f
Author: Ronald Hecht <ronald.hecht at gmx.de>
Date:   Tue Feb 19 12:45:06 2013 +0100

    Added LEON MMU ASI mappings and corrected LEON3 MMU masks.
    
    This patch adds SPARC ASI mappings that are used by the LEON processor.It also
    corrects the MMU context register and context table pointer mask of the LEON3.
    
    Signed-off-by: Ronald Hecht <ronald.hecht at gmx.de>
    Signed-off-by: Fabien Chouteau <chouteau at adacore.com>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/target-sparc/cpu.c b/target-sparc/cpu.c
index ef52df6..cc1453e 100644
--- a/target-sparc/cpu.c
+++ b/target-sparc/cpu.c
@@ -580,8 +580,8 @@ static const sparc_def_t sparc_defs[] = {
         .fpu_version = 4 << 17, /* FPU version 4 (Meiko) */
         .mmu_version = 0xf3000000,
         .mmu_bm = 0x00000000,
-        .mmu_ctpr_mask = 0x007ffff0,
-        .mmu_cxr_mask = 0x0000003f,
+        .mmu_ctpr_mask = 0xfffffffc,
+        .mmu_cxr_mask = 0x000000ff,
         .mmu_sfsr_mask = 0xffffffff,
         .mmu_trcr_mask = 0xffffffff,
         .nwindows = 8,
diff --git a/target-sparc/ldst_helper.c b/target-sparc/ldst_helper.c
index 7decd66..6d767fb 100644
--- a/target-sparc/ldst_helper.c
+++ b/target-sparc/ldst_helper.c
@@ -514,6 +514,7 @@ uint64_t helper_ld_asi(CPUSPARCState *env, target_ulong addr, int asi, int size,
 #endif
         break;
     case 3: /* MMU probe */
+    case 0x18: /* LEON3 MMU probe */
         {
             int mmulev;
 
@@ -528,6 +529,7 @@ uint64_t helper_ld_asi(CPUSPARCState *env, target_ulong addr, int asi, int size,
         }
         break;
     case 4: /* read MMU regs */
+    case 0x19: /* LEON3 read MMU regs */
         {
             int reg = (addr >> 8) & 0x1f;
 
@@ -603,6 +605,7 @@ uint64_t helper_ld_asi(CPUSPARCState *env, target_ulong addr, int asi, int size,
     case 0xf: /* D-cache data */
         break;
     case 0x20: /* MMU passthrough */
+    case 0x1c: /* LEON MMU passthrough */
         switch (size) {
         case 1:
             ret = ldub_phys(addr);
@@ -844,6 +847,7 @@ void helper_st_asi(CPUSPARCState *env, target_ulong addr, uint64_t val, int asi,
 #endif
         break;
     case 3: /* MMU flush */
+    case 0x18: /* LEON3 MMU flush */
         {
             int mmulev;
 
@@ -868,6 +872,7 @@ void helper_st_asi(CPUSPARCState *env, target_ulong addr, uint64_t val, int asi,
         }
         break;
     case 4: /* write MMU regs */
+    case 0x19: /* LEON3 write MMU regs */
         {
             int reg = (addr >> 8) & 0x1f;
             uint32_t oldreg;
@@ -996,6 +1001,7 @@ void helper_st_asi(CPUSPARCState *env, target_ulong addr, uint64_t val, int asi,
         }
         break;
     case 0x20: /* MMU passthrough */
+    case 0x1c: /* LEON MMU passthrough */
         {
             switch (size) {
             case 1:
commit 99e448006d9267d71c2e3a629b6e5d29ed67bb30
Author: Ronald Hecht <ronald.hecht at gmx.de>
Date:   Tue Feb 19 17:22:11 2013 +0100

    grlib-apbuart: Add support of various flags
    
     - enable/disable Rx and Tx
     - Rx and Tx interrupt
     - Tx FIFO empty and Tx SHIFT empty
    
    Signed-off-by: Fabien Chouteau <chouteau at adacore.com>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/hw/grlib_apbuart.c b/hw/grlib_apbuart.c
index 3a61788..ba1685a 100644
--- a/hw/grlib_apbuart.c
+++ b/hw/grlib_apbuart.c
@@ -75,7 +75,6 @@ typedef struct UART {
     CharDriverState *chr;
 
     /* registers */
-    uint32_t receive;
     uint32_t status;
     uint32_t control;
 
@@ -136,12 +135,14 @@ static void grlib_apbuart_receive(void *opaque, const uint8_t *buf, int size)
 {
     UART *uart = opaque;
 
-    uart_add_to_fifo(uart, buf, size);
+    if (uart->control & UART_RECEIVE_ENABLE) {
+        uart_add_to_fifo(uart, buf, size);
 
-    uart->status |= UART_DATA_READY;
+        uart->status |= UART_DATA_READY;
 
-    if (uart->control & UART_RECEIVE_INTERRUPT) {
-        qemu_irq_pulse(uart->irq);
+        if (uart->control & UART_RECEIVE_INTERRUPT) {
+            qemu_irq_pulse(uart->irq);
+        }
     }
 }
 
@@ -193,8 +194,15 @@ static void grlib_apbuart_write(void *opaque, hwaddr addr,
     switch (addr) {
     case DATA_OFFSET:
     case DATA_OFFSET + 3:       /* When only one byte write */
-        c = value & 0xFF;
-        qemu_chr_fe_write(uart->chr, &c, 1);
+        /* Transmit when character device available and transmitter enabled */
+        if ((uart->chr) && (uart->control & UART_TRANSMIT_ENABLE)) {
+            c = value & 0xFF;
+            qemu_chr_fe_write(uart->chr, &c, 1);
+            /* Generate interrupt */
+            if (uart->control & UART_TRANSMIT_INTERRUPT) {
+                qemu_irq_pulse(uart->irq);
+            }
+        }
         return;
 
     case STATUS_OFFSET:
@@ -242,6 +250,19 @@ static int grlib_apbuart_init(SysBusDevice *dev)
     return 0;
 }
 
+static void grlib_apbuart_reset(DeviceState *d)
+{
+    UART *uart = container_of(d, UART, busdev.qdev);
+
+    /* Transmitter FIFO and shift registers are always empty in QEMU */
+    uart->status =  UART_TRANSMIT_FIFO_EMPTY | UART_TRANSMIT_SHIFT_EMPTY;
+    /* Everything is off */
+    uart->control = 0;
+    /* Flush receive FIFO */
+    uart->len = 0;
+    uart->current = 0;
+}
+
 static Property grlib_apbuart_properties[] = {
     DEFINE_PROP_CHR("chrdev", UART, chr),
     DEFINE_PROP_END_OF_LIST(),
@@ -253,6 +274,7 @@ static void grlib_apbuart_class_init(ObjectClass *klass, void *data)
     SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
 
     k->init = grlib_apbuart_init;
+    dc->reset = grlib_apbuart_reset;
     dc->props = grlib_apbuart_properties;
 }
 
commit 8eda222831d31e6562bf1ce50d22fa29e1b6d958
Author: Fabien Chouteau <chouteau at adacore.com>
Date:   Tue Feb 19 17:22:10 2013 +0100

    Typo: replace gptimer by apbuart
    
    Signed-off-by: Fabien Chouteau <chouteau at adacore.com>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/hw/grlib_apbuart.c b/hw/grlib_apbuart.c
index 760bed0..3a61788 100644
--- a/hw/grlib_apbuart.c
+++ b/hw/grlib_apbuart.c
@@ -242,30 +242,30 @@ static int grlib_apbuart_init(SysBusDevice *dev)
     return 0;
 }
 
-static Property grlib_gptimer_properties[] = {
+static Property grlib_apbuart_properties[] = {
     DEFINE_PROP_CHR("chrdev", UART, chr),
     DEFINE_PROP_END_OF_LIST(),
 };
 
-static void grlib_gptimer_class_init(ObjectClass *klass, void *data)
+static void grlib_apbuart_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
     SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
 
     k->init = grlib_apbuart_init;
-    dc->props = grlib_gptimer_properties;
+    dc->props = grlib_apbuart_properties;
 }
 
-static const TypeInfo grlib_gptimer_info = {
+static const TypeInfo grlib_apbuart_info = {
     .name          = "grlib,apbuart",
     .parent        = TYPE_SYS_BUS_DEVICE,
     .instance_size = sizeof(UART),
-    .class_init    = grlib_gptimer_class_init,
+    .class_init    = grlib_apbuart_class_init,
 };
 
-static void grlib_gptimer_register_types(void)
+static void grlib_apbuart_register_types(void)
 {
-    type_register_static(&grlib_gptimer_info);
+    type_register_static(&grlib_apbuart_info);
 }
 
-type_init(grlib_gptimer_register_types)
+type_init(grlib_apbuart_register_types)
commit af18078d8057203b1ed26ac5534d233aabb36886
Author: Peter Maydell <peter.maydell at linaro.org>
Date:   Sat Feb 2 17:17:54 2013 +0000

    disas/i386.c: Add explicit braces round empty for-loop body
    
    Add explicit braces round an empty for-loop body; this fits
    QEMU style and is easier to read than an inconspicuous semicolon
    at the end of the line. It also silences a clang warning:
    
    disas/i386.c:4723:49: warning: for loop has empty body [-Wempty-body]
              for (i = 0; tmp[i] == '0' && tmp[i + 1]; i++);
                                                           ^
    disas/i386.c:4723:49: note: put the semicolon on a separate line to silence this warning [-Wempty-body]
    
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/disas/i386.c b/disas/i386.c
index 3b006b1..dbecf1f 100644
--- a/disas/i386.c
+++ b/disas/i386.c
@@ -4720,7 +4720,8 @@ print_operand_value (char *buf, size_t bufsize, int hex, bfd_vma disp)
 	  buf[0] = '0';
 	  buf[1] = 'x';
           snprintf_vma (tmp, sizeof(tmp), disp);
-	  for (i = 0; tmp[i] == '0' && tmp[i + 1]; i++);
+          for (i = 0; tmp[i] == '0' && tmp[i + 1]; i++) {
+          }
           pstrcpy (buf + 2, bufsize - 2, tmp + i);
 	}
       else
commit 632314c49ce20ee9c974f07544d9125fbbbfbe1b
Author: Peter Maydell <peter.maydell at linaro.org>
Date:   Tue Feb 12 16:13:27 2013 +0000

    qemu-log: Remove qemu_log_try_set_file() and its users
    
    Remove the function qemu_log_try_set_file() and its users (which
    are all in TCG code generation functions for various targets).
    This function was added to abstract out code which was originally
    written as "if (!logfile) logfile = stderr;" in order that BUG:
    case code which did an unguarded "fprintf(logfile, ...)" would
    not crash if debug logging was not enabled. Since those direct
    uses of logfile have also been abstracted away into qemu_log()
    calls which check for a NULL logfile, there is no need for the
    target-* files to mess with the user's chosen logging settings.
    
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>
    Reviewed-by: Andreas Färber <afaerber at suse.de>
    Acked-by: Michael Walle <michael at walle.cc>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/include/qemu/log.h b/include/qemu/log.h
index 5a46555..4527003 100644
--- a/include/qemu/log.h
+++ b/include/qemu/log.h
@@ -126,14 +126,6 @@ static inline void qemu_log_set_file(FILE *f)
     qemu_logfile = f;
 }
 
-/* Set up a new log file, only if none is set */
-static inline void qemu_log_try_set_file(FILE *f)
-{
-    if (!qemu_logfile) {
-        qemu_logfile = f;
-    }
-}
-
 /* define log items */
 typedef struct QEMULogItem {
     int mask;
diff --git a/target-cris/translate.c b/target-cris/translate.c
index 04a5379..2cf01a5 100644
--- a/target-cris/translate.c
+++ b/target-cris/translate.c
@@ -3215,8 +3215,6 @@ gen_intermediate_code_internal(CPUCRISState *env, TranslationBlock *tb,
     int num_insns;
     int max_insns;
 
-    qemu_log_try_set_file(stderr);
-
     if (env->pregs[PR_VR] == 32) {
         dc->decoder = crisv32_decoder;
         dc->clear_locked_irq = 0;
diff --git a/target-lm32/translate.c b/target-lm32/translate.c
index 6b87340..ccaf838 100644
--- a/target-lm32/translate.c
+++ b/target-lm32/translate.c
@@ -1012,8 +1012,6 @@ static void gen_intermediate_code_internal(CPULM32State *env,
     int num_insns;
     int max_insns;
 
-    qemu_log_try_set_file(stderr);
-
     pc_start = tb->pc;
     dc->env = env;
     dc->tb = tb;
diff --git a/target-microblaze/translate.c b/target-microblaze/translate.c
index 12ea820..687b7d1 100644
--- a/target-microblaze/translate.c
+++ b/target-microblaze/translate.c
@@ -1734,8 +1734,6 @@ gen_intermediate_code_internal(CPUMBState *env, TranslationBlock *tb,
     int num_insns;
     int max_insns;
 
-    qemu_log_try_set_file(stderr);
-
     pc_start = tb->pc;
     dc->env = env;
     dc->tb = tb;
diff --git a/target-openrisc/translate.c b/target-openrisc/translate.c
index 1e1b30c..23e853e 100644
--- a/target-openrisc/translate.c
+++ b/target-openrisc/translate.c
@@ -1670,8 +1670,6 @@ static inline void gen_intermediate_code_internal(OpenRISCCPU *cpu,
     int num_insns;
     int max_insns;
 
-    qemu_log_try_set_file(stderr);
-
     pc_start = tb->pc;
     dc->tb = tb;
 
commit 685cbd2f63a48bd111bd2c3c4a2228029595ba12
Author: Hervé Poussineau <hpoussin at reactos.org>
Date:   Thu Feb 21 22:58:08 2013 +0100

    xhci: fix bad print specifier
    
    This fixes the following compilation error:
    hw/usb/hcd-xhci.c:1156:17: error: format ‘%llx’ expects argument of type
    ‘long long unsigned int’, but argument 4 has type ‘unsigned int’
    
    Signed-off-by: Hervé Poussineau <hpoussin at reactos.org>
    Reviewed-by: Stefan Weil <sw at weilnetz.de>
    Acked-by: Gerd Hoffmann <kraxel at redhat.com>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c
index 5796102..07afdee 100644
--- a/hw/usb/hcd-xhci.c
+++ b/hw/usb/hcd-xhci.c
@@ -1152,8 +1152,8 @@ static XHCIStreamContext *xhci_find_stream(XHCIEPContext *epctx,
 
     if (sctx->sct == -1) {
         xhci_dma_read_u32s(epctx->xhci, sctx->pctx, ctx, sizeof(ctx));
-        fprintf(stderr, "%s: init sctx #%d @ %lx: %08x %08x\n", __func__,
-                streamid, sctx->pctx, ctx[0], ctx[1]);
+        fprintf(stderr, "%s: init sctx #%d @ " DMA_ADDR_FMT ": %08x %08x\n",
+                __func__, streamid, sctx->pctx, ctx[0], ctx[1]);
         sct = (ctx[0] >> 1) & 0x07;
         if (epctx->lsa && sct != 1) {
             *cc_error = CC_INVALID_STREAM_TYPE_ERROR;
commit 30e8f22b7bc6694b9abea43f45db6fd5be4df429
Author: Jan Kiszka <jan.kiszka at siemens.com>
Date:   Fri Feb 22 20:53:33 2013 +0100

    gtk: Rename File to Machine menu and add pause, reset and power down items
    
    This adds basic guest control commands to the "Machine" menu - a nice
    added-value for the GTK UI.
    
    We use "pause" as the term for stopping the machine here. So reword also
    the related caption tag.
    
    Signed-off-by: Jan Kiszka <jan.kiszka at siemens.com>
    Signed-off-by: Anthony Liguori <aliguori at us.ibm.com>

diff --git a/ui/gtk.c b/ui/gtk.c
index 0384f26..dcce36d 100644
--- a/ui/gtk.c
+++ b/ui/gtk.c
@@ -91,8 +91,11 @@ typedef struct GtkDisplayState
 
     GtkAccelGroup *accel_group;
 
-    GtkWidget *file_menu_item;
-    GtkWidget *file_menu;
+    GtkWidget *machine_menu_item;
+    GtkWidget *machine_menu;
+    GtkWidget *pause_item;
+    GtkWidget *reset_item;
+    GtkWidget *powerdown_item;
     GtkWidget *quit_item;
 
     GtkWidget *view_menu_item;
@@ -128,6 +131,8 @@ typedef struct GtkDisplayState
     GdkCursor *null_cursor;
     Notifier mouse_mode_notifier;
     gboolean free_scale;
+
+    bool external_pause_update;
 } GtkDisplayState;
 
 static GtkDisplayState *global_state;
@@ -171,14 +176,19 @@ static void gd_update_caption(GtkDisplayState *s)
     const char *status = "";
     gchar *title;
     const char *grab = "";
+    bool is_paused = !runstate_is_running();
 
     if (gd_is_grab_active(s)) {
         grab = " - Press Ctrl+Alt+G to release grab";
     }
 
-    if (!runstate_is_running()) {
-        status = " [Stopped]";
+    if (is_paused) {
+        status = " [Paused]";
     }
+    s->external_pause_update = true;
+    gtk_check_menu_item_set_active(GTK_CHECK_MENU_ITEM(s->pause_item),
+                                   is_paused);
+    s->external_pause_update = false;
 
     if (qemu_name) {
         title = g_strdup_printf("QEMU (%s)%s%s", qemu_name, status, grab);
@@ -595,6 +605,30 @@ static gboolean gd_key_event(GtkWidget *widget, GdkEventKey *key, void *opaque)
 
 /** Window Menu Actions **/
 
+static void gd_menu_pause(GtkMenuItem *item, void *opaque)
+{
+    GtkDisplayState *s = opaque;
+
+    if (s->external_pause_update) {
+        return;
+    }
+    if (runstate_is_running()) {
+        qmp_stop(NULL);
+    } else {
+        qmp_cont(NULL);
+    }
+}
+
+static void gd_menu_reset(GtkMenuItem *item, void *opaque)
+{
+    qmp_system_reset(NULL);
+}
+
+static void gd_menu_powerdown(GtkMenuItem *item, void *opaque)
+{
+    qmp_system_powerdown(NULL);
+}
+
 static void gd_menu_quit(GtkMenuItem *item, void *opaque)
 {
     qmp_quit(NULL);
@@ -968,6 +1002,12 @@ static void gd_connect_signals(GtkDisplayState *s)
     g_signal_connect(s->drawing_area, "key-release-event",
                      G_CALLBACK(gd_key_event), s);
 
+    g_signal_connect(s->pause_item, "activate",
+                     G_CALLBACK(gd_menu_pause), s);
+    g_signal_connect(s->reset_item, "activate",
+                     G_CALLBACK(gd_menu_reset), s);
+    g_signal_connect(s->powerdown_item, "activate",
+                     G_CALLBACK(gd_menu_powerdown), s);
     g_signal_connect(s->quit_item, "activate",
                      G_CALLBACK(gd_menu_quit), s);
     g_signal_connect(s->full_screen_item, "activate",
@@ -1001,15 +1041,31 @@ static void gd_create_menus(GtkDisplayState *s)
     int i;
 
     accel_group = gtk_accel_group_new();
-    s->file_menu = gtk_menu_new();
-    gtk_menu_set_accel_group(GTK_MENU(s->file_menu), accel_group);
-    s->file_menu_item = gtk_menu_item_new_with_mnemonic(_("_File"));
+    s->machine_menu = gtk_menu_new();
+    gtk_menu_set_accel_group(GTK_MENU(s->machine_menu), accel_group);
+    s->machine_menu_item = gtk_menu_item_new_with_mnemonic(_("_Machine"));
+
+    s->pause_item = gtk_check_menu_item_new_with_mnemonic(_("_Pause"));
+    gtk_menu_append(GTK_MENU(s->machine_menu), s->pause_item);
+
+    separator = gtk_separator_menu_item_new();
+    gtk_menu_append(GTK_MENU(s->machine_menu), separator);
+
+    s->reset_item = gtk_image_menu_item_new_with_mnemonic(_("_Reset"));
+    gtk_menu_append(GTK_MENU(s->machine_menu), s->reset_item);
+
+    s->powerdown_item = gtk_image_menu_item_new_with_mnemonic(_("Power _Down"));
+    gtk_menu_append(GTK_MENU(s->machine_menu), s->powerdown_item);
+
+    separator = gtk_separator_menu_item_new();
+    gtk_menu_append(GTK_MENU(s->machine_menu), separator);
 
     s->quit_item = gtk_image_menu_item_new_from_stock(GTK_STOCK_QUIT, NULL);
     gtk_stock_lookup(GTK_STOCK_QUIT, &item);
     gtk_menu_item_set_accel_path(GTK_MENU_ITEM(s->quit_item),
-                                 "<QEMU>/File/Quit");
-    gtk_accel_map_add_entry("<QEMU>/File/Quit", item.keyval, item.modifier);
+                                 "<QEMU>/Machine/Quit");
+    gtk_accel_map_add_entry("<QEMU>/Machine/Quit", item.keyval, item.modifier);
+    gtk_menu_append(GTK_MENU(s->machine_menu), s->quit_item);
 
     s->view_menu = gtk_menu_new();
     gtk_menu_set_accel_group(GTK_MENU(s->view_menu), accel_group);
@@ -1085,9 +1141,9 @@ static void gd_create_menus(GtkDisplayState *s)
     gtk_window_add_accel_group(GTK_WINDOW(s->window), accel_group);
     s->accel_group = accel_group;
 
-    gtk_menu_append(GTK_MENU(s->file_menu), s->quit_item);
-    gtk_menu_item_set_submenu(GTK_MENU_ITEM(s->file_menu_item), s->file_menu);
-    gtk_menu_shell_append(GTK_MENU_SHELL(s->menu_bar), s->file_menu_item);
+    gtk_menu_item_set_submenu(GTK_MENU_ITEM(s->machine_menu_item),
+                              s->machine_menu);
+    gtk_menu_shell_append(GTK_MENU_SHELL(s->menu_bar), s->machine_menu_item);
 
     gtk_menu_item_set_submenu(GTK_MENU_ITEM(s->view_menu_item), s->view_menu);
     gtk_menu_shell_append(GTK_MENU_SHELL(s->menu_bar), s->view_menu_item);
commit 104092825ac3274d16ffc35b7ef9ac8c636e1c48
Author: Stefan Weil <sw at weilnetz.de>
Date:   Fri Feb 22 20:33:34 2013 +0100

    ui/gtk: Use menu item from stock for full screen
    
    This reduces the required translations and gives a nicer menu
    with an icon.
    
    The full screen menu item is no longer a check menu item.
    A checked item is not visible in full screen mode,
    so it is not needed for this special menu item.
    
    Signed-off-by: Stefan Weil <sw at weilnetz.de>
    Message-id: 1361561614-11180-1-git-send-email-sw at weilnetz.de
    Signed-off-by: Anthony Liguori <aliguori at us.ibm.com>

diff --git a/po/de_DE.po b/po/de_DE.po
index cb74d7c..8755783 100644
--- a/po/de_DE.po
+++ b/po/de_DE.po
@@ -24,10 +24,6 @@ msgstr "_Datei"
 msgid "_View"
 msgstr "_Ansicht"
 
-#: ../ui/gtk.c:1002
-msgid "_Full Screen"
-msgstr "Voll_bild"
-
 #: ../ui/gtk.c:1029
 msgid "Zoom To _Fit"
 msgstr "Auf _Fenstergröße skalieren"
diff --git a/po/it.po b/po/it.po
index 2b23491..7d77fff 100644
--- a/po/it.po
+++ b/po/it.po
@@ -24,10 +24,6 @@ msgstr "_File"
 msgid "_View"
 msgstr "_Visualizza"
 
-#: ../ui/gtk.c:1002
-msgid "_Full Screen"
-msgstr "_Schermo intero"
-
 #: ../ui/gtk.c:1029
 msgid "Zoom To _Fit"
 msgstr "Adatta alla _finestra"
diff --git a/po/messages.po b/po/messages.po
index a90cd6f..191e81c 100644
--- a/po/messages.po
+++ b/po/messages.po
@@ -24,10 +24,6 @@ msgstr ""
 msgid "_View"
 msgstr ""
 
-#: ../ui/gtk.c:1002
-msgid "_Full Screen"
-msgstr ""
-
 #: ../ui/gtk.c:1029
 msgid "Zoom To _Fit"
 msgstr ""
diff --git a/ui/gtk.c b/ui/gtk.c
index bc8bdfd..0384f26 100644
--- a/ui/gtk.c
+++ b/ui/gtk.c
@@ -633,7 +633,7 @@ static void gd_menu_full_screen(GtkMenuItem *item, void *opaque)
 {
     GtkDisplayState *s = opaque;
 
-    if (gtk_check_menu_item_get_active(GTK_CHECK_MENU_ITEM(s->full_screen_item))) {
+    if (!s->full_screen) {
         gtk_notebook_set_show_tabs(GTK_NOTEBOOK(s->notebook), FALSE);
         gtk_widget_set_size_request(s->menu_bar, 0, 0);
         gtk_widget_set_size_request(s->drawing_area, -1, -1);
@@ -1015,7 +1015,8 @@ static void gd_create_menus(GtkDisplayState *s)
     gtk_menu_set_accel_group(GTK_MENU(s->view_menu), accel_group);
     s->view_menu_item = gtk_menu_item_new_with_mnemonic(_("_View"));
 
-    s->full_screen_item = gtk_check_menu_item_new_with_mnemonic(_("_Full Screen"));
+    s->full_screen_item =
+        gtk_image_menu_item_new_from_stock(GTK_STOCK_FULLSCREEN, NULL);
     gtk_menu_item_set_accel_path(GTK_MENU_ITEM(s->full_screen_item),
                                  "<QEMU>/View/Full Screen");
     gtk_accel_map_add_entry("<QEMU>/View/Full Screen", GDK_KEY_f, GDK_CONTROL_MASK | GDK_MOD1_MASK);
commit 28d2e5b27d538d94d2489d657b563c58b4d69bc4
Author: Stefan Weil <sw at weilnetz.de>
Date:   Fri Feb 22 20:09:59 2013 +0100

    ui/gtk: Support versions of VTE before 0.26
    
    This is needed for current Debian stable (Squeeze).
    
    VTE versions before 0.26 did not support VtePty.
    
    Lower the version requirement and use alternate code which works for Debian.
    
    Signed-off-by: Stefan Weil <sw at weilnetz.de>
    Message-id: 1361560199-28906-1-git-send-email-sw at weilnetz.de
    Signed-off-by: Anthony Liguori <aliguori at us.ibm.com>

diff --git a/configure b/configure
index 544c3f6..dcaa67c 100755
--- a/configure
+++ b/configure
@@ -1645,7 +1645,7 @@ fi
 
 if test "$gtk" != "no"; then
     if $pkg_config --exists 'gtk+-2.0 >= 2.18.0' && \
-       $pkg_config --exists 'vte >= 0.26.0'; then
+       $pkg_config --exists 'vte >= 0.24.0'; then
 	gtk_cflags=`$pkg_config --cflags gtk+-2.0 2>/dev/null`
 	gtk_libs=`$pkg_config --libs gtk+-2.0 2>/dev/null`
 	vte_cflags=`$pkg_config --cflags vte 2>/dev/null`
diff --git a/ui/gtk.c b/ui/gtk.c
index 008a6f8..bc8bdfd 100644
--- a/ui/gtk.c
+++ b/ui/gtk.c
@@ -868,7 +868,9 @@ static GSList *gd_vc_init(GtkDisplayState *s, VirtualConsole *vc, int index, GSL
     const char *label;
     char buffer[32];
     char path[32];
+#if VTE_CHECK_VERSION(0, 26, 0)
     VtePty *pty;
+#endif
     GIOChannel *chan;
     GtkWidget *scrolled_window;
     GtkAdjustment *vadjustment;
@@ -901,9 +903,12 @@ static GSList *gd_vc_init(GtkDisplayState *s, VirtualConsole *vc, int index, GSL
     cfmakeraw(&tty);
     tcsetattr(slave_fd, TCSAFLUSH, &tty);
 
+#if VTE_CHECK_VERSION(0, 26, 0)
     pty = vte_pty_new_foreign(master_fd, NULL);
-
     vte_terminal_set_pty_object(VTE_TERMINAL(vc->terminal), pty);
+#else
+    vte_terminal_set_pty(VTE_TERMINAL(vc->terminal), master_fd);
+#endif
 
     vte_terminal_set_scrollback_lines(VTE_TERMINAL(vc->terminal), -1);
 
commit c95e3080a44946ac5739542b549f5a10ee4ec377
Author: Kevin Wolf <kwolf at redhat.com>
Date:   Fri Feb 22 21:08:51 2013 +0100

    Reenable -Wstrict-prototypes
    
    One part of this patch reverts commit 22bc9a46, which disabled the
    warning. The rest of it deals with the warning by adding a #pragma for
    newer gcc and by disabling -Werror for compilers that can't deal with
    the #pragma.
    
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>
    Message-id: 1361563731-13307-1-git-send-email-kwolf at redhat.com
    Signed-off-by: Anthony Liguori <aliguori at us.ibm.com>

diff --git a/configure b/configure
index 0eb25dd..544c3f6 100755
--- a/configure
+++ b/configure
@@ -284,7 +284,7 @@ sdl_config="${SDL_CONFIG-${cross_prefix}sdl-config}"
 # default flags for all hosts
 QEMU_CFLAGS="-fno-strict-aliasing $QEMU_CFLAGS"
 QEMU_CFLAGS="-Wall -Wundef -Wwrite-strings -Wmissing-prototypes $QEMU_CFLAGS"
-QEMU_CFLAGS="-Wredundant-decls $QEMU_CFLAGS"
+QEMU_CFLAGS="-Wstrict-prototypes -Wredundant-decls $QEMU_CFLAGS"
 QEMU_CFLAGS="-D_GNU_SOURCE -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE $QEMU_CFLAGS"
 QEMU_INCLUDES="-I. -I\$(SRC_PATH) -I\$(SRC_PATH)/include"
 if test "$debug_info" = "yes"; then
@@ -3115,20 +3115,27 @@ if compile_prog "" "" ; then
 fi
 
 ########################################
-# check whether we can disable the -Wunused-but-set-variable
-# option with a pragma (this is needed to silence a warning in
-# some versions of the valgrind VALGRIND_STACK_DEREGISTER macro.)
-# This test has to be compiled with -Werror as otherwise an
-# unknown pragma is only a warning.
+# check whether we can disable warning option with a pragma (this is needed
+# to silence warnings in the headers of some versions of external libraries).
+# This test has to be compiled with -Werror as otherwise an unknown pragma is
+# only a warning.
+#
+# If we can't selectively disable warning in the code, disable -Werror so that
+# the build doesn't fail anyway.
+
 pragma_disable_unused_but_set=no
 cat > $TMPC << EOF
 #pragma GCC diagnostic ignored "-Wunused-but-set-variable"
+#pragma GCC diagnostic ignored "-Wstrict-prototypes"
+
 int main(void) {
     return 0;
 }
 EOF
 if compile_prog "-Werror" "" ; then
     pragma_diagnostic_available=yes
+else
+    werror=no
 fi
 
 ########################################
diff --git a/ui/gtk.c b/ui/gtk.c
index 5f91de4..008a6f8 100644
--- a/ui/gtk.c
+++ b/ui/gtk.c
@@ -34,7 +34,18 @@
 #define GETTEXT_PACKAGE "qemu"
 #define LOCALEDIR "po"
 
+#include "qemu-common.h"
+
+#ifdef CONFIG_PRAGMA_DIAGNOSTIC_AVAILABLE
+/* Work around an -Wstrict-prototypes warning in GTK headers */
+#pragma GCC diagnostic ignored "-Wstrict-prototypes"
+#endif
 #include <gtk/gtk.h>
+#ifdef CONFIG_PRAGMA_DIAGNOSTIC_AVAILABLE
+#pragma GCC diagnostic error "-Wstrict-prototypes"
+#endif
+
+
 #include <gdk/gdkkeysyms.h>
 #include <glib/gi18n.h>
 #include <locale.h>
@@ -46,7 +57,6 @@
 #include <pty.h>
 #include <math.h>
 
-#include "qemu-common.h"
 #include "ui/console.h"
 #include "sysemu/sysemu.h"
 #include "qmp-commands.h"
commit f437d0a3c24e471a855da33a086fe529e09a06af
Author: Richard Henderson <rth at twiddle.net>
Date:   Tue Feb 19 21:06:31 2013 -0800

    target-i386: Use movcond to implement shiftd.
    
    With this being all straight-line code, it can get deleted
    when the cc variables die.
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>

diff --git a/target-i386/translate.c b/target-i386/translate.c
index b9a2692..439d19e 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -72,7 +72,6 @@ static TCGv cpu_tmp0, cpu_tmp4;
 static TCGv_ptr cpu_ptr0, cpu_ptr1;
 static TCGv_i32 cpu_tmp2_i32, cpu_tmp3_i32;
 static TCGv_i64 cpu_tmp1_i64;
-static TCGv cpu_tmp5;
 
 static uint8_t gen_opc_cc_op[OPC_BUF_SIZE];
 
@@ -1577,12 +1576,55 @@ static void gen_inc(DisasContext *s1, int ot, int d, int c)
     tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
 }
 
+static void gen_shift_flags(DisasContext *s, int ot, TCGv result, TCGv shm1,
+                            TCGv count, bool is_right)
+{
+    TCGv_i32 z32, s32, oldop;
+    TCGv z_tl;
+
+    /* Store the results into the CC variables.  If we know that the
+       variable must be dead, store unconditionally.  Otherwise we'll
+       need to not disrupt the current contents.  */
+    z_tl = tcg_const_tl(0);
+    if (cc_op_live[s->cc_op] & USES_CC_DST) {
+        tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_dst, count, z_tl,
+                           result, cpu_cc_dst);
+    } else {
+        tcg_gen_mov_tl(cpu_cc_dst, result);
+    }
+    if (cc_op_live[s->cc_op] & USES_CC_SRC) {
+        tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_src, count, z_tl,
+                           shm1, cpu_cc_src);
+    } else {
+        tcg_gen_mov_tl(cpu_cc_src, shm1);
+    }
+    tcg_temp_free(z_tl);
+
+    /* Get the two potential CC_OP values into temporaries.  */
+    tcg_gen_movi_i32(cpu_tmp2_i32, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
+    if (s->cc_op == CC_OP_DYNAMIC) {
+        oldop = cpu_cc_op;
+    } else {
+        tcg_gen_movi_i32(cpu_tmp3_i32, s->cc_op);
+        oldop = cpu_tmp3_i32;
+    }
+
+    /* Conditionally store the CC_OP value.  */
+    z32 = tcg_const_i32(0);
+    s32 = tcg_temp_new_i32();
+    tcg_gen_trunc_tl_i32(s32, count);
+    tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, s32, z32, cpu_tmp2_i32, oldop);
+    tcg_temp_free_i32(z32);
+    tcg_temp_free_i32(s32);
+
+    /* The CC_OP value is no longer predictable.  */
+    set_cc_op(s, CC_OP_DYNAMIC);
+}
+
 static void gen_shift_rm_T1(DisasContext *s, int ot, int op1, 
                             int is_right, int is_arith)
 {
     target_ulong mask = (ot == OT_QUAD ? 0x3f : 0x1f);
-    TCGv_i32 z32, s32, oldop;
-    TCGv z_tl;
 
     /* load */
     if (op1 == OR_TMP0) {
@@ -1616,43 +1658,7 @@ static void gen_shift_rm_T1(DisasContext *s, int ot, int op1,
         gen_op_mov_reg_T0(ot, op1);
     }
 
-    /* Store the results into the CC variables.  If we know that the
-       variable must be dead, store unconditionally.  Otherwise we'll
-       need to not disrupt the current contents.  */
-    z_tl = tcg_const_tl(0);
-    if (cc_op_live[s->cc_op] & USES_CC_DST) {
-        tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_dst, cpu_T[1], z_tl,
-                           cpu_T[0], cpu_cc_dst);
-    } else {
-        tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
-    }
-    if (cc_op_live[s->cc_op] & USES_CC_SRC) {
-        tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_src, cpu_T[1], z_tl,
-                           cpu_tmp0, cpu_cc_src);
-    } else {
-        tcg_gen_mov_tl(cpu_cc_src, cpu_tmp0);
-    }
-    tcg_temp_free(z_tl);
-
-    /* Get the two potential CC_OP values into temporaries.  */
-    tcg_gen_movi_i32(cpu_tmp2_i32, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
-    if (s->cc_op == CC_OP_DYNAMIC) {
-        oldop = cpu_cc_op;
-    } else {
-        tcg_gen_movi_i32(cpu_tmp3_i32, s->cc_op);
-        oldop = cpu_tmp3_i32;
-    }
-
-    /* Conditionally store the CC_OP value.  */
-    z32 = tcg_const_i32(0);
-    s32 = tcg_temp_new_i32();
-    tcg_gen_trunc_tl_i32(s32, cpu_T[1]);
-    tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, s32, z32, cpu_tmp2_i32, oldop);
-    tcg_temp_free_i32(z32);
-    tcg_temp_free_i32(s32);
-
-    /* The CC_OP value is no longer predictable.  */
-    set_cc_op(s, CC_OP_DYNAMIC);
+    gen_shift_flags(s, ot, cpu_T[0], cpu_tmp0, cpu_T[1], is_right);
 }
 
 static void gen_shift_rm_im(DisasContext *s, int ot, int op1, int op2,
@@ -1931,128 +1937,88 @@ static void gen_rotc_rm_T1(DisasContext *s, int ot, int op1,
 
 /* XXX: add faster immediate case */
 static void gen_shiftd_rm_T1(DisasContext *s, int ot, int op1,
-                             int is_right, TCGv count)
+                             bool is_right, TCGv count_in)
 {
-    int label1, label2, data_bits;
-    target_ulong mask;
-    TCGv t0, t1, t2, a0;
-
-    t0 = tcg_temp_local_new();
-    t1 = tcg_temp_local_new();
-    t2 = tcg_temp_local_new();
-    a0 = tcg_temp_local_new();
-
-    if (ot == OT_QUAD)
-        mask = 0x3f;
-    else
-        mask = 0x1f;
+    target_ulong mask = (ot == OT_QUAD ? 63 : 31);
+    TCGv count;
 
     /* load */
     if (op1 == OR_TMP0) {
-        tcg_gen_mov_tl(a0, cpu_A0);
-        gen_op_ld_v(ot + s->mem_index, t0, a0);
+        gen_op_ld_T0_A0(ot + s->mem_index);
     } else {
-        gen_op_mov_v_reg(ot, t0, op1);
+        gen_op_mov_TN_reg(ot, 0, op1);
     }
 
-    tcg_gen_andi_tl(t2, count, mask);
-    tcg_gen_mov_tl(t1, cpu_T[1]);
+    count = tcg_temp_new();
+    tcg_gen_andi_tl(count, count_in, mask);
 
-    /* Must test zero case to avoid using undefined behaviour in TCG
-       shifts. */
-    label1 = gen_new_label();
-    tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, label1);
-    
-    tcg_gen_addi_tl(cpu_tmp5, t2, -1);
-    if (ot == OT_WORD) {
-        /* Note: we implement the Intel behaviour for shift count > 16 */
+    switch (ot) {
+    case OT_WORD:
+        /* Note: we implement the Intel behaviour for shift count > 16.
+           This means "shrdw C, B, A" shifts A:B:A >> C.  Build the B:A
+           portion by constructing it as a 32-bit value.  */
         if (is_right) {
-            tcg_gen_andi_tl(t0, t0, 0xffff);
-            tcg_gen_shli_tl(cpu_tmp0, t1, 16);
-            tcg_gen_or_tl(t0, t0, cpu_tmp0);
-            tcg_gen_ext32u_tl(t0, t0);
-
-            tcg_gen_shr_tl(cpu_tmp4, t0, cpu_tmp5);
-            
-            /* only needed if count > 16, but a test would complicate */
-            tcg_gen_subfi_tl(cpu_tmp5, 32, t2);
-            tcg_gen_shl_tl(cpu_tmp0, t0, cpu_tmp5);
-
-            tcg_gen_shr_tl(t0, t0, t2);
-
-            tcg_gen_or_tl(t0, t0, cpu_tmp0);
+            tcg_gen_deposit_tl(cpu_tmp0, cpu_T[0], cpu_T[1], 16, 16);
+            tcg_gen_mov_tl(cpu_T[1], cpu_T[0]);
+            tcg_gen_mov_tl(cpu_T[0], cpu_tmp0);
         } else {
-            /* XXX: not optimal */
-            tcg_gen_andi_tl(t0, t0, 0xffff);
-            tcg_gen_shli_tl(t1, t1, 16);
-            tcg_gen_or_tl(t1, t1, t0);
-            tcg_gen_ext32u_tl(t1, t1);
-            
-            tcg_gen_shl_tl(cpu_tmp4, t0, cpu_tmp5);
-            tcg_gen_subfi_tl(cpu_tmp0, 32, cpu_tmp5);
-            tcg_gen_shr_tl(cpu_tmp5, t1, cpu_tmp0);
-            tcg_gen_or_tl(cpu_tmp4, cpu_tmp4, cpu_tmp5);
-
-            tcg_gen_shl_tl(t0, t0, t2);
-            tcg_gen_subfi_tl(cpu_tmp5, 32, t2);
-            tcg_gen_shr_tl(t1, t1, cpu_tmp5);
-            tcg_gen_or_tl(t0, t0, t1);
+            tcg_gen_deposit_tl(cpu_T[1], cpu_T[0], cpu_T[1], 16, 16);
         }
-    } else {
-        data_bits = 8 << ot;
+        /* FALLTHRU */
+#ifdef TARGET_X86_64
+    case OT_LONG:
+        /* Concatenate the two 32-bit values and use a 64-bit shift.  */
+        tcg_gen_subi_tl(cpu_tmp0, count, 1);
         if (is_right) {
-            if (ot == OT_LONG)
-                tcg_gen_ext32u_tl(t0, t0);
-
-            tcg_gen_shr_tl(cpu_tmp4, t0, cpu_tmp5);
+            tcg_gen_concat_tl_i64(cpu_T[0], cpu_T[0], cpu_T[1]);
+            tcg_gen_shr_i64(cpu_tmp0, cpu_T[0], cpu_tmp0);
+            tcg_gen_shr_i64(cpu_T[0], cpu_T[0], count);
+        } else {
+            tcg_gen_concat_tl_i64(cpu_T[0], cpu_T[1], cpu_T[0]);
+            tcg_gen_shl_i64(cpu_tmp0, cpu_T[0], cpu_tmp0);
+            tcg_gen_shl_i64(cpu_T[0], cpu_T[0], count);
+            tcg_gen_shri_i64(cpu_tmp0, cpu_tmp0, 32);
+            tcg_gen_shri_i64(cpu_T[0], cpu_T[0], 32);
+        }
+        break;
+#endif
+    default:
+        tcg_gen_subi_tl(cpu_tmp0, count, 1);
+        if (is_right) {
+            tcg_gen_shr_tl(cpu_tmp0, cpu_T[0], cpu_tmp0);
 
-            tcg_gen_shr_tl(t0, t0, t2);
-            tcg_gen_subfi_tl(cpu_tmp5, data_bits, t2);
-            tcg_gen_shl_tl(t1, t1, cpu_tmp5);
-            tcg_gen_or_tl(t0, t0, t1);
-            
+            tcg_gen_subfi_tl(cpu_tmp4, mask + 1, count);
+            tcg_gen_shr_tl(cpu_T[0], cpu_T[0], count);
+            tcg_gen_shl_tl(cpu_T[1], cpu_T[1], cpu_tmp4);
         } else {
-            if (ot == OT_LONG)
-                tcg_gen_ext32u_tl(t1, t1);
-
-            tcg_gen_shl_tl(cpu_tmp4, t0, cpu_tmp5);
-            
-            tcg_gen_shl_tl(t0, t0, t2);
-            tcg_gen_subfi_tl(cpu_tmp5, data_bits, t2);
-            tcg_gen_shr_tl(t1, t1, cpu_tmp5);
-            tcg_gen_or_tl(t0, t0, t1);
+            tcg_gen_shl_tl(cpu_tmp0, cpu_T[0], cpu_tmp0);
+            if (ot == OT_WORD) {
+                /* Only needed if count > 16, for Intel behaviour.  */
+                tcg_gen_subfi_tl(cpu_tmp4, 33, count);
+                tcg_gen_shr_tl(cpu_tmp4, cpu_T[1], cpu_tmp4);
+                tcg_gen_or_tl(cpu_tmp0, cpu_tmp0, cpu_tmp4);
+            }
+
+            tcg_gen_subfi_tl(cpu_tmp4, mask + 1, count);
+            tcg_gen_shl_tl(cpu_T[0], cpu_T[0], count);
+            tcg_gen_shr_tl(cpu_T[1], cpu_T[1], cpu_tmp4);
         }
+        tcg_gen_movi_tl(cpu_tmp4, 0);
+        tcg_gen_movcond_tl(TCG_COND_EQ, cpu_T[1], count, cpu_tmp4,
+                           cpu_tmp4, cpu_T[1]);
+        tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+        break;
     }
-    tcg_gen_mov_tl(t1, cpu_tmp4);
 
-    gen_set_label(label1);
     /* store */
     if (op1 == OR_TMP0) {
-        gen_op_st_v(ot + s->mem_index, t0, a0);
-    } else {
-        gen_op_mov_reg_v(ot, op1, t0);
-    }
-    
-    /* Update eflags data because we cannot predict flags afterward.  */
-    gen_update_cc_op(s);
-    set_cc_op(s, CC_OP_DYNAMIC);
-
-    label2 = gen_new_label();
-    tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, label2);
-
-    tcg_gen_mov_tl(cpu_cc_src, t1);
-    tcg_gen_mov_tl(cpu_cc_dst, t0);
-    if (is_right) {
-        tcg_gen_movi_i32(cpu_cc_op, CC_OP_SARB + ot);
+        gen_op_st_T0_A0(ot + s->mem_index);
     } else {
-        tcg_gen_movi_i32(cpu_cc_op, CC_OP_SHLB + ot);
+        gen_op_mov_reg_T0(ot, op1);
     }
-    gen_set_label(label2);
 
-    tcg_temp_free(t0);
-    tcg_temp_free(t1);
-    tcg_temp_free(t2);
-    tcg_temp_free(a0);
+    gen_shift_flags(s, ot, cpu_T[0], cpu_tmp0, count, is_right);
+    tcg_temp_free(count);
 }
 
 static void gen_shift(DisasContext *s1, int op, int ot, int d, int s)
@@ -8401,7 +8367,6 @@ static inline void gen_intermediate_code_internal(CPUX86State *env,
     cpu_tmp2_i32 = tcg_temp_new_i32();
     cpu_tmp3_i32 = tcg_temp_new_i32();
     cpu_tmp4 = tcg_temp_new();
-    cpu_tmp5 = tcg_temp_new();
     cpu_ptr0 = tcg_temp_new_ptr();
     cpu_ptr1 = tcg_temp_new_ptr();
     cpu_cc_srcT = tcg_temp_local_new();
commit e2f515cf2f3795b9edb68eee42262e7c5f88fe98
Author: Richard Henderson <rth at twiddle.net>
Date:   Tue Feb 19 14:48:43 2013 -0800

    target-i386: Discard CC_OP computation in set_cc_op also
    
    The shift and rotate insns use movcond to set CC_OP, and thus
    achieve a conditional EFLAGS setting.  By discarding CC_OP in
    a later flags setting insn, we can discard that movcond.
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>

diff --git a/target-i386/translate.c b/target-i386/translate.c
index 6b109e8..b9a2692 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -239,10 +239,18 @@ static void set_cc_op(DisasContext *s, CCOp op)
         tcg_gen_discard_tl(cpu_cc_srcT);
     }
 
+    if (op == CC_OP_DYNAMIC) {
+        /* The DYNAMIC setting is translator only, and should never be
+           stored.  Thus we always consider it clean.  */
+        s->cc_op_dirty = false;
+    } else {
+        /* Discard any computed CC_OP value (see shifts).  */
+        if (s->cc_op == CC_OP_DYNAMIC) {
+            tcg_gen_discard_i32(cpu_cc_op);
+        }
+        s->cc_op_dirty = true;
+    }
     s->cc_op = op;
-    /* The DYNAMIC setting is translator only, and should never be
-       stored.  Thus we always consider it clean.  */
-    s->cc_op_dirty = (op != CC_OP_DYNAMIC);
 }
 
 static void gen_update_cc_op(DisasContext *s)
commit 34d80a55ff8517fd37bcfea5063b9797e2bd9132
Author: Richard Henderson <rth at twiddle.net>
Date:   Wed Jan 30 19:16:45 2013 -0800

    target-i386: Use movcond to implement rotate flags.
    
    With this being all straight-line code, it can get deleted
    when the cc variables die.
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>

diff --git a/target-i386/translate.c b/target-i386/translate.c
index 74694d1..6b109e8 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -1698,167 +1698,172 @@ static inline void tcg_gen_lshift(TCGv ret, TCGv arg1, target_long arg2)
         tcg_gen_shri_tl(ret, arg1, -arg2);
 }
 
-static void gen_rot_rm_T1(DisasContext *s, int ot, int op1, 
-                          int is_right)
+static void gen_rot_rm_T1(DisasContext *s, int ot, int op1, int is_right)
 {
-    target_ulong mask;
-    int label1, label2, data_bits;
-    TCGv t0, t1, t2, a0;
-
-    /* XXX: inefficient, but we must use local temps */
-    t0 = tcg_temp_local_new();
-    t1 = tcg_temp_local_new();
-    t2 = tcg_temp_local_new();
-    a0 = tcg_temp_local_new();
-
-    if (ot == OT_QUAD)
-        mask = 0x3f;
-    else
-        mask = 0x1f;
+    target_ulong mask = (ot == OT_QUAD ? 0x3f : 0x1f);
+    TCGv_i32 t0, t1;
 
     /* load */
     if (op1 == OR_TMP0) {
-        tcg_gen_mov_tl(a0, cpu_A0);
-        gen_op_ld_v(ot + s->mem_index, t0, a0);
+        gen_op_ld_T0_A0(ot + s->mem_index);
     } else {
-        gen_op_mov_v_reg(ot, t0, op1);
+        gen_op_mov_TN_reg(ot, 0, op1);
     }
 
-    tcg_gen_mov_tl(t1, cpu_T[1]);
-
-    tcg_gen_andi_tl(t1, t1, mask);
-
-    /* Must test zero case to avoid using undefined behaviour in TCG
-       shifts. */
-    label1 = gen_new_label();
-    tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, label1);
-    
-    if (ot <= OT_WORD)
-        tcg_gen_andi_tl(cpu_tmp0, t1, (1 << (3 + ot)) - 1);
-    else
-        tcg_gen_mov_tl(cpu_tmp0, t1);
-    
-    gen_extu(ot, t0);
-    tcg_gen_mov_tl(t2, t0);
+    tcg_gen_andi_tl(cpu_T[1], cpu_T[1], mask);
 
-    data_bits = 8 << ot;
-    /* XXX: rely on behaviour of shifts when operand 2 overflows (XXX:
-       fix TCG definition) */
-    if (is_right) {
-        tcg_gen_shr_tl(cpu_tmp4, t0, cpu_tmp0);
-        tcg_gen_subfi_tl(cpu_tmp0, data_bits, cpu_tmp0);
-        tcg_gen_shl_tl(t0, t0, cpu_tmp0);
-    } else {
-        tcg_gen_shl_tl(cpu_tmp4, t0, cpu_tmp0);
-        tcg_gen_subfi_tl(cpu_tmp0, data_bits, cpu_tmp0);
-        tcg_gen_shr_tl(t0, t0, cpu_tmp0);
+    switch (ot) {
+    case OT_BYTE:
+        /* Replicate the 8-bit input so that a 32-bit rotate works.  */
+        tcg_gen_ext8u_tl(cpu_T[0], cpu_T[0]);
+        tcg_gen_muli_tl(cpu_T[0], cpu_T[0], 0x01010101);
+        goto do_long;
+    case OT_WORD:
+        /* Replicate the 16-bit input so that a 32-bit rotate works.  */
+        tcg_gen_deposit_tl(cpu_T[0], cpu_T[0], cpu_T[0], 16, 16);
+        goto do_long;
+    do_long:
+#ifdef TARGET_X86_64
+    case OT_LONG:
+        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+        tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T[1]);
+        if (is_right) {
+            tcg_gen_rotr_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
+        } else {
+            tcg_gen_rotl_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
+        }
+        tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
+        break;
+#endif
+    default:
+        if (is_right) {
+            tcg_gen_rotr_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+        } else {
+            tcg_gen_rotl_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+        }
+        break;
     }
-    tcg_gen_or_tl(t0, t0, cpu_tmp4);
 
-    gen_set_label(label1);
     /* store */
     if (op1 == OR_TMP0) {
-        gen_op_st_v(ot + s->mem_index, t0, a0);
+        gen_op_st_T0_A0(ot + s->mem_index);
     } else {
-        gen_op_mov_reg_v(ot, op1, t0);
+        gen_op_mov_reg_T0(ot, op1);
     }
-    
-    /* update eflags.  It is needed anyway most of the time, do it always.  */
-    gen_compute_eflags(s);
-    assert(s->cc_op == CC_OP_EFLAGS);
 
-    label2 = gen_new_label();
-    tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, label2);
+    /* We'll need the flags computed into CC_SRC.  */
+    gen_compute_eflags(s);
 
-    tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~(CC_O | CC_C));
-    tcg_gen_xor_tl(cpu_tmp0, t2, t0);
-    tcg_gen_lshift(cpu_tmp0, cpu_tmp0, 11 - (data_bits - 1));
-    tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, CC_O);
-    tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, cpu_tmp0);
+    /* The value that was "rotated out" is now present at the other end
+       of the word.  Compute C into CC_DST and O into CC_SRC2.  Note that
+       since we've computed the flags into CC_SRC, these variables are
+       currently dead.  */
     if (is_right) {
-        tcg_gen_shri_tl(t0, t0, data_bits - 1);
+        tcg_gen_shri_tl(cpu_cc_src2, cpu_T[0], mask - 1);
+        tcg_gen_shri_tl(cpu_cc_dst, cpu_T[0], mask);
+    } else {
+        tcg_gen_shri_tl(cpu_cc_src2, cpu_T[0], mask);
+        tcg_gen_andi_tl(cpu_cc_dst, cpu_T[0], 1);
     }
-    tcg_gen_andi_tl(t0, t0, CC_C);
-    tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t0);
-
-    gen_set_label(label2);
-
-    tcg_temp_free(t0);
-    tcg_temp_free(t1);
-    tcg_temp_free(t2);
-    tcg_temp_free(a0);
+    tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
+    tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
+
+    /* Now conditionally store the new CC_OP value.  If the shift count
+       is 0 we keep the CC_OP_EFLAGS setting so that only CC_SRC is live.
+       Otherwise reuse CC_OP_ADCOX which have the C and O flags split out
+       exactly as we computed above.  */
+    t0 = tcg_const_i32(0);
+    t1 = tcg_temp_new_i32();
+    tcg_gen_trunc_tl_i32(t1, cpu_T[1]);
+    tcg_gen_movi_i32(cpu_tmp2_i32, CC_OP_ADCOX); 
+    tcg_gen_movi_i32(cpu_tmp3_i32, CC_OP_EFLAGS);
+    tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, t1, t0,
+                        cpu_tmp2_i32, cpu_tmp3_i32);
+    tcg_temp_free_i32(t0);
+    tcg_temp_free_i32(t1);
+
+    /* The CC_OP value is no longer predictable.  */ 
+    set_cc_op(s, CC_OP_DYNAMIC);
 }
 
 static void gen_rot_rm_im(DisasContext *s, int ot, int op1, int op2,
                           int is_right)
 {
-    int mask;
-    int data_bits;
-    TCGv t0, t1, a0;
-
-    /* XXX: inefficient, but we must use local temps */
-    t0 = tcg_temp_local_new();
-    t1 = tcg_temp_local_new();
-    a0 = tcg_temp_local_new();
-
-    if (ot == OT_QUAD)
-        mask = 0x3f;
-    else
-        mask = 0x1f;
+    int mask = (ot == OT_QUAD ? 0x3f : 0x1f);
+    int shift;
 
     /* load */
     if (op1 == OR_TMP0) {
-        tcg_gen_mov_tl(a0, cpu_A0);
-        gen_op_ld_v(ot + s->mem_index, t0, a0);
+        gen_op_ld_T0_A0(ot + s->mem_index);
     } else {
-        gen_op_mov_v_reg(ot, t0, op1);
+        gen_op_mov_TN_reg(ot, 0, op1);
     }
 
-    gen_extu(ot, t0);
-    tcg_gen_mov_tl(t1, t0);
-
     op2 &= mask;
-    data_bits = 8 << ot;
     if (op2 != 0) {
-        int shift = op2 & ((1 << (3 + ot)) - 1);
-        if (is_right) {
-            tcg_gen_shri_tl(cpu_tmp4, t0, shift);
-            tcg_gen_shli_tl(t0, t0, data_bits - shift);
-        }
-        else {
-            tcg_gen_shli_tl(cpu_tmp4, t0, shift);
-            tcg_gen_shri_tl(t0, t0, data_bits - shift);
+        switch (ot) {
+#ifdef TARGET_X86_64
+        case OT_LONG:
+            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+            if (is_right) {
+                tcg_gen_rotri_i32(cpu_tmp2_i32, cpu_tmp2_i32, op2);
+            } else {
+                tcg_gen_rotli_i32(cpu_tmp2_i32, cpu_tmp2_i32, op2);
+            }
+            tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
+            break;
+#endif
+        default:
+            if (is_right) {
+                tcg_gen_rotri_tl(cpu_T[0], cpu_T[0], op2);
+            } else {
+                tcg_gen_rotli_tl(cpu_T[0], cpu_T[0], op2);
+            }
+            break;
+        case OT_BYTE:
+            mask = 7;
+            goto do_shifts;
+        case OT_WORD:
+            mask = 15;
+        do_shifts:
+            shift = op2 & mask;
+            if (is_right) {
+                shift = mask + 1 - shift;
+            }
+            gen_extu(ot, cpu_T[0]);
+            tcg_gen_shli_tl(cpu_tmp0, cpu_T[0], shift);
+            tcg_gen_shri_tl(cpu_T[0], cpu_T[0], mask + 1 - shift);
+            tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
+            break;
         }
-        tcg_gen_or_tl(t0, t0, cpu_tmp4);
     }
 
     /* store */
     if (op1 == OR_TMP0) {
-        gen_op_st_v(ot + s->mem_index, t0, a0);
+        gen_op_st_T0_A0(ot + s->mem_index);
     } else {
-        gen_op_mov_reg_v(ot, op1, t0);
+        gen_op_mov_reg_T0(ot, op1);
     }
 
     if (op2 != 0) {
-        /* update eflags */
+        /* Compute the flags into CC_SRC.  */
         gen_compute_eflags(s);
-        assert(s->cc_op == CC_OP_EFLAGS);
 
-        tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~(CC_O | CC_C));
-        tcg_gen_xor_tl(cpu_tmp0, t1, t0);
-        tcg_gen_lshift(cpu_tmp0, cpu_tmp0, 11 - (data_bits - 1));
-        tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, CC_O);
-        tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, cpu_tmp0);
+        /* The value that was "rotated out" is now present at the other end
+           of the word.  Compute C into CC_DST and O into CC_SRC2.  Note that
+           since we've computed the flags into CC_SRC, these variables are
+           currently dead.  */
         if (is_right) {
-            tcg_gen_shri_tl(t0, t0, data_bits - 1);
+            tcg_gen_shri_tl(cpu_cc_src2, cpu_T[0], mask - 1);
+            tcg_gen_shri_tl(cpu_cc_dst, cpu_T[0], mask);
+        } else {
+            tcg_gen_shri_tl(cpu_cc_src2, cpu_T[0], mask);
+            tcg_gen_andi_tl(cpu_cc_dst, cpu_T[0], 1);
         }
-        tcg_gen_andi_tl(t0, t0, CC_C);
-        tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t0);
+        tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
+        tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
+        set_cc_op(s, CC_OP_ADCOX);
     }
-
-    tcg_temp_free(t0);
-    tcg_temp_free(t1);
-    tcg_temp_free(a0);
 }
 
 /* XXX: add faster immediate = 1 case */
commit a41f62f592d9ecf97df4a12023760fe082b1ee68
Author: Richard Henderson <rth at twiddle.net>
Date:   Wed Jan 30 17:52:59 2013 -0800

    target-i386: Use movcond to implement shift flags.
    
    With this being all straight-line code, it can get deleted
    when the cc variables die.
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>

diff --git a/target-i386/translate.c b/target-i386/translate.c
index 007d326..74694d1 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -1572,15 +1572,9 @@ static void gen_inc(DisasContext *s1, int ot, int d, int c)
 static void gen_shift_rm_T1(DisasContext *s, int ot, int op1, 
                             int is_right, int is_arith)
 {
-    target_ulong mask;
-    int shift_label;
-    TCGv t0, t1, t2;
-
-    if (ot == OT_QUAD) {
-        mask = 0x3f;
-    } else {
-        mask = 0x1f;
-    }
+    target_ulong mask = (ot == OT_QUAD ? 0x3f : 0x1f);
+    TCGv_i32 z32, s32, oldop;
+    TCGv z_tl;
 
     /* load */
     if (op1 == OR_TMP0) {
@@ -1589,25 +1583,22 @@ static void gen_shift_rm_T1(DisasContext *s, int ot, int op1,
         gen_op_mov_TN_reg(ot, 0, op1);
     }
 
-    t0 = tcg_temp_local_new();
-    t1 = tcg_temp_local_new();
-    t2 = tcg_temp_local_new();
-
-    tcg_gen_andi_tl(t2, cpu_T[1], mask);
+    tcg_gen_andi_tl(cpu_T[1], cpu_T[1], mask);
+    tcg_gen_subi_tl(cpu_tmp0, cpu_T[1], 1);
 
     if (is_right) {
         if (is_arith) {
             gen_exts(ot, cpu_T[0]);
-            tcg_gen_mov_tl(t0, cpu_T[0]);
-            tcg_gen_sar_tl(cpu_T[0], cpu_T[0], t2);
+            tcg_gen_sar_tl(cpu_tmp0, cpu_T[0], cpu_tmp0);
+            tcg_gen_sar_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
         } else {
             gen_extu(ot, cpu_T[0]);
-            tcg_gen_mov_tl(t0, cpu_T[0]);
-            tcg_gen_shr_tl(cpu_T[0], cpu_T[0], t2);
+            tcg_gen_shr_tl(cpu_tmp0, cpu_T[0], cpu_tmp0);
+            tcg_gen_shr_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
         }
     } else {
-        tcg_gen_mov_tl(t0, cpu_T[0]);
-        tcg_gen_shl_tl(cpu_T[0], cpu_T[0], t2);
+        tcg_gen_shl_tl(cpu_tmp0, cpu_T[0], cpu_tmp0);
+        tcg_gen_shl_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
     }
 
     /* store */
@@ -1617,50 +1608,49 @@ static void gen_shift_rm_T1(DisasContext *s, int ot, int op1,
         gen_op_mov_reg_T0(ot, op1);
     }
 
-    /* Update eflags data because we cannot predict flags afterward.  */
-    gen_update_cc_op(s);
-    set_cc_op(s, CC_OP_DYNAMIC);
-
-    tcg_gen_mov_tl(t1, cpu_T[0]);
-
-    shift_label = gen_new_label();
-    tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, shift_label);
-
-    tcg_gen_addi_tl(t2, t2, -1);
-    tcg_gen_mov_tl(cpu_cc_dst, t1);
-
-    if (is_right) {
-        if (is_arith) {
-            tcg_gen_sar_tl(cpu_cc_src, t0, t2);
-        } else {
-            tcg_gen_shr_tl(cpu_cc_src, t0, t2);
-        }
+    /* Store the results into the CC variables.  If we know that the
+       variable must be dead, store unconditionally.  Otherwise we'll
+       need to not disrupt the current contents.  */
+    z_tl = tcg_const_tl(0);
+    if (cc_op_live[s->cc_op] & USES_CC_DST) {
+        tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_dst, cpu_T[1], z_tl,
+                           cpu_T[0], cpu_cc_dst);
+    } else {
+        tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+    }
+    if (cc_op_live[s->cc_op] & USES_CC_SRC) {
+        tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_src, cpu_T[1], z_tl,
+                           cpu_tmp0, cpu_cc_src);
     } else {
-        tcg_gen_shl_tl(cpu_cc_src, t0, t2);
+        tcg_gen_mov_tl(cpu_cc_src, cpu_tmp0);
     }
+    tcg_temp_free(z_tl);
 
-    if (is_right) {
-        tcg_gen_movi_i32(cpu_cc_op, CC_OP_SARB + ot);
+    /* Get the two potential CC_OP values into temporaries.  */
+    tcg_gen_movi_i32(cpu_tmp2_i32, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
+    if (s->cc_op == CC_OP_DYNAMIC) {
+        oldop = cpu_cc_op;
     } else {
-        tcg_gen_movi_i32(cpu_cc_op, CC_OP_SHLB + ot);
+        tcg_gen_movi_i32(cpu_tmp3_i32, s->cc_op);
+        oldop = cpu_tmp3_i32;
     }
 
-    gen_set_label(shift_label);
+    /* Conditionally store the CC_OP value.  */
+    z32 = tcg_const_i32(0);
+    s32 = tcg_temp_new_i32();
+    tcg_gen_trunc_tl_i32(s32, cpu_T[1]);
+    tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, s32, z32, cpu_tmp2_i32, oldop);
+    tcg_temp_free_i32(z32);
+    tcg_temp_free_i32(s32);
 
-    tcg_temp_free(t0);
-    tcg_temp_free(t1);
-    tcg_temp_free(t2);
+    /* The CC_OP value is no longer predictable.  */
+    set_cc_op(s, CC_OP_DYNAMIC);
 }
 
 static void gen_shift_rm_im(DisasContext *s, int ot, int op1, int op2,
                             int is_right, int is_arith)
 {
-    int mask;
-    
-    if (ot == OT_QUAD)
-        mask = 0x3f;
-    else
-        mask = 0x1f;
+    int mask = (ot == OT_QUAD ? 0x3f : 0x1f);
 
     /* load */
     if (op1 == OR_TMP0)
commit 436ff2d227588d42970c4f0ed1cdfcb87c872fba
Author: Richard Henderson <rth at twiddle.net>
Date:   Tue Jan 29 13:38:43 2013 -0800

    target-i386: Add CC_OP_CLR
    
    Special case xor with self.  We need not even store the known
    zero into cc_src.
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>

diff --git a/target-i386/cc_helper.c b/target-i386/cc_helper.c
index 6cf57a7..9daa1a0 100644
--- a/target-i386/cc_helper.c
+++ b/target-i386/cc_helper.c
@@ -102,6 +102,8 @@ target_ulong helper_cc_compute_all(target_ulong dst, target_ulong src1,
 
     case CC_OP_EFLAGS:
         return src1;
+    case CC_OP_CLR:
+        return CC_Z;
 
     case CC_OP_MULB:
         return compute_all_mulb(dst, src1);
@@ -228,6 +230,7 @@ target_ulong helper_cc_compute_c(target_ulong dst, target_ulong src1,
     case CC_OP_LOGICW:
     case CC_OP_LOGICL:
     case CC_OP_LOGICQ:
+    case CC_OP_CLR:
         return 0;
 
     case CC_OP_EFLAGS:
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index e0443d8..493dda8 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -645,6 +645,8 @@ typedef enum {
     CC_OP_ADOX, /* CC_DST = O, CC_SRC = rest.  */
     CC_OP_ADCOX, /* CC_DST = C, CC_SRC2 = O, CC_SRC = rest.  */
 
+    CC_OP_CLR, /* Z set, all other flags clear.  */
+
     CC_OP_NB,
 } CCOp;
 
diff --git a/target-i386/helper.c b/target-i386/helper.c
index 66c3624..82a731c 100644
--- a/target-i386/helper.c
+++ b/target-i386/helper.c
@@ -117,6 +117,8 @@ static const char *cc_op_str[CC_OP_NB] = {
     "ADCX",
     "ADOX",
     "ADCOX",
+
+    "CLR",
 };
 
 static void
diff --git a/target-i386/translate.c b/target-i386/translate.c
index 97c565f..007d326 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -213,6 +213,7 @@ static const uint8_t cc_op_live[CC_OP_NB] = {
     [CC_OP_ADCX] = USES_CC_DST | USES_CC_SRC,
     [CC_OP_ADOX] = USES_CC_SRC | USES_CC_SRC2,
     [CC_OP_ADCOX] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
+    [CC_OP_CLR] = 0,
 };
 
 static void set_cc_op(DisasContext *s, CCOp op)
@@ -906,6 +907,11 @@ static void gen_compute_eflags(DisasContext *s)
     if (s->cc_op == CC_OP_EFLAGS) {
         return;
     }
+    if (s->cc_op == CC_OP_CLR) {
+        tcg_gen_movi_tl(cpu_cc_src, CC_Z);
+        set_cc_op(s, CC_OP_EFLAGS);
+        return;
+    }
 
     TCGV_UNUSED(zero);
     dst = cpu_cc_dst;
@@ -974,6 +980,7 @@ static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
                              .reg2 = t1, .mask = -1, .use_reg2 = true };
 
     case CC_OP_LOGICB ... CC_OP_LOGICQ:
+    case CC_OP_CLR:
         return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
 
     case CC_OP_INCB ... CC_OP_INCQ:
@@ -1040,6 +1047,8 @@ static CCPrepare gen_prepare_eflags_s(DisasContext *s, TCGv reg)
     case CC_OP_ADCOX:
         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
                              .mask = CC_S };
+    case CC_OP_CLR:
+        return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
     default:
         {
             int size = (s->cc_op - CC_OP_ADDB) & 3;
@@ -1057,7 +1066,8 @@ static CCPrepare gen_prepare_eflags_o(DisasContext *s, TCGv reg)
     case CC_OP_ADCOX:
         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src2,
                              .mask = -1, .no_setcond = true };
-
+    case CC_OP_CLR:
+        return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
     default:
         gen_compute_eflags(s);
         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
@@ -1078,6 +1088,8 @@ static CCPrepare gen_prepare_eflags_z(DisasContext *s, TCGv reg)
     case CC_OP_ADCOX:
         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
                              .mask = CC_Z };
+    case CC_OP_CLR:
+        return (CCPrepare) { .cond = TCG_COND_ALWAYS, .mask = -1 };
     default:
         {
             int size = (s->cc_op - CC_OP_ADDB) & 3;
@@ -4890,10 +4902,9 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                 } else if (op == OP_XORL && rm == reg) {
                 xor_zero:
                     /* xor reg, reg optimisation */
+                    set_cc_op(s, CC_OP_CLR);
                     gen_op_movl_T0_0();
-                    set_cc_op(s, CC_OP_LOGICB + ot);
                     gen_op_mov_reg_T0(ot, reg);
-                    gen_op_update1_cc();
                     break;
                 } else {
                     opreg = rm;
commit 321c535105a182501b888f095f7ec4dbb5f3f6ae
Author: Richard Henderson <rth at twiddle.net>
Date:   Mon Jan 21 13:32:02 2013 -0800

    target-i386: Implement tzcnt and fix lzcnt
    
    We weren't computing flags for lzcnt at all.  At the same time,
    adjust the implementation of bsf/bsr to avoid the local branch,
    using movcond instead.
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>

diff --git a/target-i386/helper.h b/target-i386/helper.h
index e1ecdb8..26a0cc8 100644
--- a/target-i386/helper.h
+++ b/target-i386/helper.h
@@ -195,9 +195,8 @@ DEF_HELPER_3(frstor, void, env, tl, int)
 DEF_HELPER_3(fxsave, void, env, tl, int)
 DEF_HELPER_3(fxrstor, void, env, tl, int)
 
-DEF_HELPER_FLAGS_1(bsf, TCG_CALL_NO_RWG_SE, tl, tl)
-DEF_HELPER_FLAGS_1(bsr, TCG_CALL_NO_RWG_SE, tl, tl)
-DEF_HELPER_FLAGS_2(lzcnt, TCG_CALL_NO_RWG_SE, tl, tl, int)
+DEF_HELPER_FLAGS_1(clz, TCG_CALL_NO_RWG_SE, tl, tl)
+DEF_HELPER_FLAGS_1(ctz, TCG_CALL_NO_RWG_SE, tl, tl)
 DEF_HELPER_FLAGS_2(pdep, TCG_CALL_NO_RWG_SE, tl, tl, tl)
 DEF_HELPER_FLAGS_2(pext, TCG_CALL_NO_RWG_SE, tl, tl, tl)
 
diff --git a/target-i386/int_helper.c b/target-i386/int_helper.c
index 7bec4eb..3b56075 100644
--- a/target-i386/int_helper.c
+++ b/target-i386/int_helper.c
@@ -456,19 +456,14 @@ void helper_idivq_EAX(CPUX86State *env, target_ulong t0)
 #endif
 
 /* bit operations */
-target_ulong helper_bsf(target_ulong t0)
+target_ulong helper_ctz(target_ulong t0)
 {
     return ctztl(t0);
 }
 
-target_ulong helper_lzcnt(target_ulong t0, int wordsize)
+target_ulong helper_clz(target_ulong t0)
 {
-    return clztl(t0) - (TARGET_LONG_BITS - wordsize);
-}
-
-target_ulong helper_bsr(target_ulong t0)
-{
-    return clztl(t0) ^ (TARGET_LONG_BITS - 1);
+    return clztl(t0);
 }
 
 target_ulong helper_pdep(target_ulong src, target_ulong mask)
diff --git a/target-i386/translate.c b/target-i386/translate.c
index 436658a..97c565f 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -7157,46 +7157,58 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             tcg_gen_movi_tl(cpu_cc_dst, 0);
         }
         break;
-    case 0x1bc: /* bsf */
-    case 0x1bd: /* bsr */
-        {
-            int label1;
-            TCGv t0;
-
-            ot = dflag + OT_WORD;
-            modrm = cpu_ldub_code(env, s->pc++);
-            reg = ((modrm >> 3) & 7) | rex_r;
-            gen_ldst_modrm(env, s,modrm, ot, OR_TMP0, 0);
-            gen_extu(ot, cpu_T[0]);
-            t0 = tcg_temp_local_new();
-            tcg_gen_mov_tl(t0, cpu_T[0]);
-            if ((b & 1) && (prefixes & PREFIX_REPZ) &&
-                (s->cpuid_ext3_features & CPUID_EXT3_ABM)) {
-                switch(ot) {
-                case OT_WORD: gen_helper_lzcnt(cpu_T[0], t0,
-                    tcg_const_i32(16)); break;
-                case OT_LONG: gen_helper_lzcnt(cpu_T[0], t0,
-                    tcg_const_i32(32)); break;
-                case OT_QUAD: gen_helper_lzcnt(cpu_T[0], t0,
-                    tcg_const_i32(64)); break;
-                }
-                gen_op_mov_reg_T0(ot, reg);
+    case 0x1bc: /* bsf / tzcnt */
+    case 0x1bd: /* bsr / lzcnt */
+        ot = dflag + OT_WORD;
+        modrm = cpu_ldub_code(env, s->pc++);
+        reg = ((modrm >> 3) & 7) | rex_r;
+        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+        gen_extu(ot, cpu_T[0]);
+
+        /* Note that lzcnt and tzcnt are in different extensions.  */
+        if ((prefixes & PREFIX_REPZ)
+            && (b & 1
+                ? s->cpuid_ext3_features & CPUID_EXT3_ABM
+                : s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)) {
+            int size = 8 << ot;
+            tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]);
+            if (b & 1) {
+                /* For lzcnt, reduce the target_ulong result by the
+                   number of zeros that we expect to find at the top.  */
+                gen_helper_clz(cpu_T[0], cpu_T[0]);
+                tcg_gen_subi_tl(cpu_T[0], cpu_T[0], TARGET_LONG_BITS - size);
             } else {
-                label1 = gen_new_label();
-                tcg_gen_movi_tl(cpu_cc_dst, 0);
-                tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0, label1);
-                if (b & 1) {
-                    gen_helper_bsr(cpu_T[0], t0);
-                } else {
-                    gen_helper_bsf(cpu_T[0], t0);
-                }
-                gen_op_mov_reg_T0(ot, reg);
-                tcg_gen_movi_tl(cpu_cc_dst, 1);
-                gen_set_label(label1);
-                set_cc_op(s, CC_OP_LOGICB + ot);
+                /* For tzcnt, a zero input must return the operand size:
+                   force all bits outside the operand size to 1.  */
+                target_ulong mask = (target_ulong)-2 << (size - 1);
+                tcg_gen_ori_tl(cpu_T[0], cpu_T[0], mask);
+                gen_helper_ctz(cpu_T[0], cpu_T[0]);
+            }
+            /* For lzcnt/tzcnt, C and Z bits are defined and are
+               related to the result.  */
+            gen_op_update1_cc();
+            set_cc_op(s, CC_OP_BMILGB + ot);
+        } else {
+            /* For bsr/bsf, only the Z bit is defined and it is related
+               to the input and not the result.  */
+            tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+            set_cc_op(s, CC_OP_LOGICB + ot);
+            if (b & 1) {
+                /* For bsr, return the bit index of the first 1 bit,
+                   not the count of leading zeros.  */
+                gen_helper_clz(cpu_T[0], cpu_T[0]);
+                tcg_gen_xori_tl(cpu_T[0], cpu_T[0], TARGET_LONG_BITS - 1);
+            } else {
+                gen_helper_ctz(cpu_T[0], cpu_T[0]);
             }
-            tcg_temp_free(t0);
+            /* ??? The manual says that the output is undefined when the
+               input is zero, but real hardware leaves it unchanged, and
+               real programs appear to depend on that.  */
+            tcg_gen_movi_tl(cpu_tmp0, 0);
+            tcg_gen_movcond_tl(TCG_COND_EQ, cpu_T[0], cpu_cc_dst, cpu_tmp0,
+                               cpu_regs[reg], cpu_T[0]);
         }
+        gen_op_mov_reg_T0(ot, reg);
         break;
         /************************/
         /* bcd */
commit f1300734cbca515d30953b2c87e259fa378ea301
Author: Richard Henderson <rth at twiddle.net>
Date:   Mon Jan 21 11:52:26 2013 -0800

    target-i386: Use clz/ctz for bsf/bsr helpers
    
    And mark the helpers as NO_RWG_SE.
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>

diff --git a/target-i386/helper.h b/target-i386/helper.h
index 81e0fbd..e1ecdb8 100644
--- a/target-i386/helper.h
+++ b/target-i386/helper.h
@@ -195,9 +195,9 @@ DEF_HELPER_3(frstor, void, env, tl, int)
 DEF_HELPER_3(fxsave, void, env, tl, int)
 DEF_HELPER_3(fxrstor, void, env, tl, int)
 
-DEF_HELPER_1(bsf, tl, tl)
-DEF_HELPER_1(bsr, tl, tl)
-DEF_HELPER_2(lzcnt, tl, tl, int)
+DEF_HELPER_FLAGS_1(bsf, TCG_CALL_NO_RWG_SE, tl, tl)
+DEF_HELPER_FLAGS_1(bsr, TCG_CALL_NO_RWG_SE, tl, tl)
+DEF_HELPER_FLAGS_2(lzcnt, TCG_CALL_NO_RWG_SE, tl, tl, int)
 DEF_HELPER_FLAGS_2(pdep, TCG_CALL_NO_RWG_SE, tl, tl, tl)
 DEF_HELPER_FLAGS_2(pext, TCG_CALL_NO_RWG_SE, tl, tl, tl)
 
diff --git a/target-i386/int_helper.c b/target-i386/int_helper.c
index 527af40..7bec4eb 100644
--- a/target-i386/int_helper.c
+++ b/target-i386/int_helper.c
@@ -447,53 +447,30 @@ void helper_idivq_EAX(CPUX86State *env, target_ulong t0)
 }
 #endif
 
+#if TARGET_LONG_BITS == 32
+# define ctztl  ctz32
+# define clztl  clz32
+#else
+# define ctztl  ctz64
+# define clztl  clz64
+#endif
+
 /* bit operations */
 target_ulong helper_bsf(target_ulong t0)
 {
-    int count;
-    target_ulong res;
-
-    res = t0;
-    count = 0;
-    while ((res & 1) == 0) {
-        count++;
-        res >>= 1;
-    }
-    return count;
+    return ctztl(t0);
 }
 
 target_ulong helper_lzcnt(target_ulong t0, int wordsize)
 {
-    int count;
-    target_ulong res, mask;
-
-    if (wordsize > 0 && t0 == 0) {
-        return wordsize;
-    }
-    res = t0;
-    count = TARGET_LONG_BITS - 1;
-    mask = (target_ulong)1 << (TARGET_LONG_BITS - 1);
-    while ((res & mask) == 0) {
-        count--;
-        res <<= 1;
-    }
-    if (wordsize > 0) {
-        return wordsize - 1 - count;
-    }
-    return count;
+    return clztl(t0) - (TARGET_LONG_BITS - wordsize);
 }
 
 target_ulong helper_bsr(target_ulong t0)
 {
-    return helper_lzcnt(t0, 0);
+    return clztl(t0) ^ (TARGET_LONG_BITS - 1);
 }
 
-#if TARGET_LONG_BITS == 32
-# define ctztl  ctz32
-#else
-# define ctztl  ctz64
-#endif
-
 target_ulong helper_pdep(target_ulong src, target_ulong mask)
 {
     target_ulong dest = 0;
commit cd7f97cafdd80d6bd4950ccfdcd9acb7850184b2
Author: Richard Henderson <rth at twiddle.net>
Date:   Wed Jan 23 18:17:33 2013 -0800

    target-i386: Implement ADX extension
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>

diff --git a/target-i386/cc_helper.c b/target-i386/cc_helper.c
index 5ea6a0a..6cf57a7 100644
--- a/target-i386/cc_helper.c
+++ b/target-i386/cc_helper.c
@@ -75,6 +75,24 @@ const uint8_t parity_table[256] = {
 
 #endif
 
+static target_ulong compute_all_adcx(target_ulong dst, target_ulong src1,
+                                     target_ulong src2)
+{
+    return (src1 & ~CC_C) | (dst * CC_C);
+}
+
+static target_ulong compute_all_adox(target_ulong dst, target_ulong src1,
+                                     target_ulong src2)
+{
+    return (src1 & ~CC_O) | (src2 * CC_O);
+}
+
+static target_ulong compute_all_adcox(target_ulong dst, target_ulong src1,
+                                      target_ulong src2)
+{
+    return (src1 & ~(CC_C | CC_O)) | (dst * CC_C) | (src2 * CC_O);
+}
+
 target_ulong helper_cc_compute_all(target_ulong dst, target_ulong src1,
                                    target_ulong src2, int op)
 {
@@ -162,6 +180,13 @@ target_ulong helper_cc_compute_all(target_ulong dst, target_ulong src1,
     case CC_OP_BMILGL:
         return compute_all_bmilgl(dst, src1);
 
+    case CC_OP_ADCX:
+        return compute_all_adcx(dst, src1, src2);
+    case CC_OP_ADOX:
+        return compute_all_adox(dst, src1, src2);
+    case CC_OP_ADCOX:
+        return compute_all_adcox(dst, src1, src2);
+
 #ifdef TARGET_X86_64
     case CC_OP_MULQ:
         return compute_all_mulq(dst, src1);
@@ -210,6 +235,7 @@ target_ulong helper_cc_compute_c(target_ulong dst, target_ulong src1,
     case CC_OP_SARW:
     case CC_OP_SARL:
     case CC_OP_SARQ:
+    case CC_OP_ADOX:
         return src1 & 1;
 
     case CC_OP_INCB:
@@ -228,6 +254,10 @@ target_ulong helper_cc_compute_c(target_ulong dst, target_ulong src1,
     case CC_OP_MULQ:
         return src1 != 0;
 
+    case CC_OP_ADCX:
+    case CC_OP_ADCOX:
+        return dst;
+
     case CC_OP_ADDB:
         return compute_c_addb(dst, src1);
     case CC_OP_ADDW:
diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index 0cb64ab..5582e5f 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -407,11 +407,11 @@ typedef struct x86_def_t {
           CPUID_EXT3_CR8LEG | CPUID_EXT3_ABM | CPUID_EXT3_SSE4A)
 #define TCG_SVM_FEATURES 0
 #define TCG_7_0_EBX_FEATURES (CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_SMAP \
-          CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_BMI2)
+          CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ADX)
           /* missing:
           CPUID_7_0_EBX_FSGSBASE, CPUID_7_0_EBX_HLE, CPUID_7_0_EBX_AVX2,
           CPUID_7_0_EBX_ERMS, CPUID_7_0_EBX_INVPCID, CPUID_7_0_EBX_RTM,
-          CPUID_7_0_EBX_RDSEED, CPUID_7_0_EBX_ADX */
+          CPUID_7_0_EBX_RDSEED */
 
 /* built-in CPU model definitions
  */
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index 960676b..e0443d8 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -641,6 +641,10 @@ typedef enum {
     CC_OP_BMILGL,
     CC_OP_BMILGQ,
 
+    CC_OP_ADCX, /* CC_DST = C, CC_SRC = rest.  */
+    CC_OP_ADOX, /* CC_DST = O, CC_SRC = rest.  */
+    CC_OP_ADCOX, /* CC_DST = C, CC_SRC2 = O, CC_SRC = rest.  */
+
     CC_OP_NB,
 } CCOp;
 
diff --git a/target-i386/helper.c b/target-i386/helper.c
index 74d600f..66c3624 100644
--- a/target-i386/helper.c
+++ b/target-i386/helper.c
@@ -113,6 +113,10 @@ static const char *cc_op_str[CC_OP_NB] = {
     "BMILGW",
     "BMILGL",
     "BMILGQ",
+
+    "ADCX",
+    "ADOX",
+    "ADCOX",
 };
 
 static void
diff --git a/target-i386/translate.c b/target-i386/translate.c
index 68e30e6..436658a 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -210,6 +210,9 @@ static const uint8_t cc_op_live[CC_OP_NB] = {
     [CC_OP_SHLB ... CC_OP_SHLQ] = USES_CC_DST | USES_CC_SRC,
     [CC_OP_SARB ... CC_OP_SARQ] = USES_CC_DST | USES_CC_SRC,
     [CC_OP_BMILGB ... CC_OP_BMILGQ] = USES_CC_DST | USES_CC_SRC,
+    [CC_OP_ADCX] = USES_CC_DST | USES_CC_SRC,
+    [CC_OP_ADOX] = USES_CC_SRC | USES_CC_SRC2,
+    [CC_OP_ADCOX] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
 };
 
 static void set_cc_op(DisasContext *s, CCOp op)
@@ -994,6 +997,11 @@ static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
         t0 = gen_ext_tl(reg, cpu_cc_src, size, false);
         return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
 
+    case CC_OP_ADCX:
+    case CC_OP_ADCOX:
+        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_dst,
+                             .mask = -1, .no_setcond = true };
+
     case CC_OP_EFLAGS:
     case CC_OP_SARB ... CC_OP_SARQ:
         /* CC_SRC & 1 */
@@ -1027,6 +1035,9 @@ static CCPrepare gen_prepare_eflags_s(DisasContext *s, TCGv reg)
         gen_compute_eflags(s);
         /* FALLTHRU */
     case CC_OP_EFLAGS:
+    case CC_OP_ADCX:
+    case CC_OP_ADOX:
+    case CC_OP_ADCOX:
         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
                              .mask = CC_S };
     default:
@@ -1041,9 +1052,17 @@ static CCPrepare gen_prepare_eflags_s(DisasContext *s, TCGv reg)
 /* compute eflags.O to reg */
 static CCPrepare gen_prepare_eflags_o(DisasContext *s, TCGv reg)
 {
-    gen_compute_eflags(s);
-    return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
-                         .mask = CC_O };
+    switch (s->cc_op) {
+    case CC_OP_ADOX:
+    case CC_OP_ADCOX:
+        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src2,
+                             .mask = -1, .no_setcond = true };
+
+    default:
+        gen_compute_eflags(s);
+        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
+                             .mask = CC_O };
+    }
 }
 
 /* compute eflags.Z to reg */
@@ -1054,6 +1073,9 @@ static CCPrepare gen_prepare_eflags_z(DisasContext *s, TCGv reg)
         gen_compute_eflags(s);
         /* FALLTHRU */
     case CC_OP_EFLAGS:
+    case CC_OP_ADCX:
+    case CC_OP_ADOX:
+    case CC_OP_ADCOX:
         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
                              .mask = CC_Z };
     default:
@@ -4174,6 +4196,87 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 gen_helper_pext(cpu_regs[reg], cpu_T[0], cpu_T[1]);
                 break;
 
+            case 0x1f6: /* adcx Gy, Ey */
+            case 0x2f6: /* adox Gy, Ey */
+                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_ADX)) {
+                    goto illegal_op;
+                } else {
+                    TCGv carry_in, carry_out;
+                    int end_op;
+
+                    ot = (s->dflag == 2 ? OT_QUAD : OT_LONG);
+                    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+
+                    /* Re-use the carry-out from a previous round.  */
+                    TCGV_UNUSED(carry_in);
+                    carry_out = (b == 0x1f6 ? cpu_cc_dst : cpu_cc_src2);
+                    switch (s->cc_op) {
+                    case CC_OP_ADCX:
+                        if (b == 0x1f6) {
+                            carry_in = cpu_cc_dst;
+                            end_op = CC_OP_ADCX;
+                        } else {
+                            end_op = CC_OP_ADCOX;
+                        }
+                        break;
+                    case CC_OP_ADOX:
+                        if (b == 0x1f6) {
+                            end_op = CC_OP_ADCOX;
+                        } else {
+                            carry_in = cpu_cc_src2;
+                            end_op = CC_OP_ADOX;
+                        }
+                        break;
+                    case CC_OP_ADCOX:
+                        end_op = CC_OP_ADCOX;
+                        carry_in = carry_out;
+                        break;
+                    default:
+                        end_op = (b == 0x1f6 ? CC_OP_ADCX : CC_OP_ADCOX);
+                        break;
+                    }
+                    /* If we can't reuse carry-out, get it out of EFLAGS.  */
+                    if (TCGV_IS_UNUSED(carry_in)) {
+                        if (s->cc_op != CC_OP_ADCX && s->cc_op != CC_OP_ADOX) {
+                            gen_compute_eflags(s);
+                        }
+                        carry_in = cpu_tmp0;
+                        tcg_gen_shri_tl(carry_in, cpu_cc_src,
+                                        ctz32(b == 0x1f6 ? CC_C : CC_O));
+                        tcg_gen_andi_tl(carry_in, carry_in, 1);
+                    }
+
+                    switch (ot) {
+#ifdef TARGET_X86_64
+                    case OT_LONG:
+                        /* If we know TL is 64-bit, and we want a 32-bit
+                           result, just do everything in 64-bit arithmetic.  */
+                        tcg_gen_ext32u_i64(cpu_regs[reg], cpu_regs[reg]);
+                        tcg_gen_ext32u_i64(cpu_T[0], cpu_T[0]);
+                        tcg_gen_add_i64(cpu_T[0], cpu_T[0], cpu_regs[reg]);
+                        tcg_gen_add_i64(cpu_T[0], cpu_T[0], carry_in);
+                        tcg_gen_ext32u_i64(cpu_regs[reg], cpu_T[0]);
+                        tcg_gen_shri_i64(carry_out, cpu_T[0], 32);
+                        break;
+#endif
+                    default:
+                        /* Otherwise compute the carry-out in two steps.  */
+                        tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_regs[reg]);
+                        tcg_gen_setcond_tl(TCG_COND_LTU, cpu_tmp4,
+                                           cpu_T[0], cpu_regs[reg]);
+                        tcg_gen_add_tl(cpu_regs[reg], cpu_T[0], carry_in);
+                        tcg_gen_setcond_tl(TCG_COND_LTU, carry_out,
+                                           cpu_regs[reg], cpu_T[0]);
+                        tcg_gen_or_tl(carry_out, carry_out, cpu_tmp4);
+                        break;
+                    }
+                    /* We began with all flags computed to CC_SRC, and we
+                       have now placed the carry-out in CC_DST.  All that
+                       is left is to record the CC_OP.  */
+                    set_cc_op(s, end_op);
+                }
+                break;
+
             case 0x1f7: /* shlx Gy, Ey, By */
             case 0x2f7: /* sarx Gy, Ey, By */
             case 0x3f7: /* shrx Gy, Ey, By */
commit e2c3c2c551bccd843135eab1ba202f8d2f86800b
Author: Richard Henderson <rth at twiddle.net>
Date:   Wed Jan 16 14:55:09 2013 -0800

    target-i386: Implement RORX
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>

diff --git a/target-i386/translate.c b/target-i386/translate.c
index c1a2886..68e30e6 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -4433,6 +4433,38 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
             sse_fn_eppi(cpu_env, cpu_ptr0, cpu_ptr1, tcg_const_i32(val));
             break;
+
+        case 0x33a:
+            /* Various integer extensions at 0f 3a f[0-f].  */
+            b = modrm | (b1 << 8);
+            modrm = cpu_ldub_code(env, s->pc++);
+            reg = ((modrm >> 3) & 7) | rex_r;
+
+            switch (b) {
+            case 0x3f0: /* rorx Gy,Ey, Ib */
+                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
+                    || !(s->prefix & PREFIX_VEX)
+                    || s->vex_l != 0) {
+                    goto illegal_op;
+                }
+                ot = s->dflag == 2 ? OT_QUAD : OT_LONG;
+                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                b = cpu_ldub_code(env, s->pc++);
+                if (ot == OT_QUAD) {
+                    tcg_gen_rotri_tl(cpu_T[0], cpu_T[0], b & 63);
+                } else {
+                    tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+                    tcg_gen_rotri_i32(cpu_tmp2_i32, cpu_tmp2_i32, b & 31);
+                    tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
+                }
+                gen_op_mov_reg_T0(ot, reg);
+                break;
+
+            default:
+                goto illegal_op;
+            }
+            break;
+
         default:
             goto illegal_op;
         }
commit 4a554890e479a43568de8b5354d9ca8583f5ec7f
Author: Richard Henderson <rth at twiddle.net>
Date:   Wed Jan 23 18:12:13 2013 -0800

    target-i386: Implement SHLX, SARX, SHRX
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>

diff --git a/target-i386/translate.c b/target-i386/translate.c
index 51016fe..c1a2886 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -4174,6 +4174,37 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 gen_helper_pext(cpu_regs[reg], cpu_T[0], cpu_T[1]);
                 break;
 
+            case 0x1f7: /* shlx Gy, Ey, By */
+            case 0x2f7: /* sarx Gy, Ey, By */
+            case 0x3f7: /* shrx Gy, Ey, By */
+                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
+                    || !(s->prefix & PREFIX_VEX)
+                    || s->vex_l != 0) {
+                    goto illegal_op;
+                }
+                ot = (s->dflag == 2 ? OT_QUAD : OT_LONG);
+                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                if (ot == OT_QUAD) {
+                    tcg_gen_andi_tl(cpu_T[1], cpu_regs[s->vex_v], 63);
+                } else {
+                    tcg_gen_andi_tl(cpu_T[1], cpu_regs[s->vex_v], 31);
+                }
+                if (b == 0x1f7) {
+                    tcg_gen_shl_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+                } else if (b == 0x2f7) {
+                    if (ot != OT_QUAD) {
+                        tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]);
+                    }
+                    tcg_gen_sar_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+                } else {
+                    if (ot != OT_QUAD) {
+                        tcg_gen_ext32u_tl(cpu_T[0], cpu_T[0]);
+                    }
+                    tcg_gen_shr_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+                }
+                gen_op_mov_reg_T0(ot, reg);
+                break;
+
             case 0x0f3:
             case 0x1f3:
             case 0x2f3:
commit 0592f74a75ab695efd48a151219667adc0fa7cc4
Author: Richard Henderson <rth at twiddle.net>
Date:   Wed Jan 23 18:09:43 2013 -0800

    target-i386: Implement PDEP, PEXT
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>

diff --git a/target-i386/helper.h b/target-i386/helper.h
index d750754..81e0fbd 100644
--- a/target-i386/helper.h
+++ b/target-i386/helper.h
@@ -194,9 +194,12 @@ DEF_HELPER_3(fsave, void, env, tl, int)
 DEF_HELPER_3(frstor, void, env, tl, int)
 DEF_HELPER_3(fxsave, void, env, tl, int)
 DEF_HELPER_3(fxrstor, void, env, tl, int)
+
 DEF_HELPER_1(bsf, tl, tl)
 DEF_HELPER_1(bsr, tl, tl)
 DEF_HELPER_2(lzcnt, tl, tl, int)
+DEF_HELPER_FLAGS_2(pdep, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+DEF_HELPER_FLAGS_2(pext, TCG_CALL_NO_RWG_SE, tl, tl, tl)
 
 /* MMX/SSE */
 
diff --git a/target-i386/int_helper.c b/target-i386/int_helper.c
index 4ec8cb7..527af40 100644
--- a/target-i386/int_helper.c
+++ b/target-i386/int_helper.c
@@ -488,6 +488,38 @@ target_ulong helper_bsr(target_ulong t0)
     return helper_lzcnt(t0, 0);
 }
 
+#if TARGET_LONG_BITS == 32
+# define ctztl  ctz32
+#else
+# define ctztl  ctz64
+#endif
+
+target_ulong helper_pdep(target_ulong src, target_ulong mask)
+{
+    target_ulong dest = 0;
+    int i, o;
+
+    for (i = 0; mask != 0; i++) {
+        o = ctztl(mask);
+        mask &= mask - 1;
+        dest |= ((src >> i) & 1) << o;
+    }
+    return dest;
+}
+
+target_ulong helper_pext(target_ulong src, target_ulong mask)
+{
+    target_ulong dest = 0;
+    int i, o;
+
+    for (o = 0; mask != 0; o++) {
+        i = ctztl(mask);
+        mask &= mask - 1;
+        dest |= ((src >> i) & 1) << o;
+    }
+    return dest;
+}
+
 #define SHIFT 0
 #include "shift_helper_template.h"
 #undef SHIFT
diff --git a/target-i386/translate.c b/target-i386/translate.c
index 3017d63..51016fe 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -4138,6 +4138,42 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 }
                 break;
 
+            case 0x3f5: /* pdep Gy, By, Ey */
+                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
+                    || !(s->prefix & PREFIX_VEX)
+                    || s->vex_l != 0) {
+                    goto illegal_op;
+                }
+                ot = s->dflag == 2 ? OT_QUAD : OT_LONG;
+                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                /* Note that by zero-extending the mask operand, we
+                   automatically handle zero-extending the result.  */
+                if (s->dflag == 2) {
+                    tcg_gen_mov_tl(cpu_T[1], cpu_regs[s->vex_v]);
+                } else {
+                    tcg_gen_ext32u_tl(cpu_T[1], cpu_regs[s->vex_v]);
+                }
+                gen_helper_pdep(cpu_regs[reg], cpu_T[0], cpu_T[1]);
+                break;
+
+            case 0x2f5: /* pext Gy, By, Ey */
+                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
+                    || !(s->prefix & PREFIX_VEX)
+                    || s->vex_l != 0) {
+                    goto illegal_op;
+                }
+                ot = s->dflag == 2 ? OT_QUAD : OT_LONG;
+                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                /* Note that by zero-extending the mask operand, we
+                   automatically handle zero-extending the result.  */
+                if (s->dflag == 2) {
+                    tcg_gen_mov_tl(cpu_T[1], cpu_regs[s->vex_v]);
+                } else {
+                    tcg_gen_ext32u_tl(cpu_T[1], cpu_regs[s->vex_v]);
+                }
+                gen_helper_pext(cpu_regs[reg], cpu_T[0], cpu_T[1]);
+                break;
+
             case 0x0f3:
             case 0x1f3:
             case 0x2f3:
commit 5f1f4b177152286102475f9bffc359002a14d9c9
Author: Richard Henderson <rth at twiddle.net>
Date:   Wed Jan 23 18:06:18 2013 -0800

    target-i386: Implement MULX
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>

diff --git a/target-i386/helper.h b/target-i386/helper.h
index 4c46ab1..d750754 100644
--- a/target-i386/helper.h
+++ b/target-i386/helper.h
@@ -19,6 +19,7 @@ DEF_HELPER_2(imulq_EAX_T0, void, env, tl)
 DEF_HELPER_3(imulq_T0_T1, tl, env, tl, tl)
 DEF_HELPER_2(divq_EAX, void, env, tl)
 DEF_HELPER_2(idivq_EAX, void, env, tl)
+DEF_HELPER_FLAGS_2(umulh, TCG_CALL_NO_RWG_SE, tl, tl, tl)
 #endif
 
 DEF_HELPER_2(aam, void, env, int)
diff --git a/target-i386/int_helper.c b/target-i386/int_helper.c
index 84b812d..4ec8cb7 100644
--- a/target-i386/int_helper.c
+++ b/target-i386/int_helper.c
@@ -385,6 +385,13 @@ void helper_mulq_EAX_T0(CPUX86State *env, target_ulong t0)
     CC_SRC = r1;
 }
 
+target_ulong helper_umulh(target_ulong t0, target_ulong t1)
+{
+    uint64_t h, l;
+    mulu64(&l, &h, t0, t1);
+    return h;
+}
+
 void helper_imulq_EAX_T0(CPUX86State *env, target_ulong t0)
 {
     uint64_t r0, r1;
diff --git a/target-i386/translate.c b/target-i386/translate.c
index 2bb8d9f..3017d63 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -4099,6 +4099,45 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 set_cc_op(s, CC_OP_BMILGB + ot);
                 break;
 
+            case 0x3f6: /* mulx By, Gy, rdx, Ey */
+                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
+                    || !(s->prefix & PREFIX_VEX)
+                    || s->vex_l != 0) {
+                    goto illegal_op;
+                }
+                ot = s->dflag == 2 ? OT_QUAD : OT_LONG;
+                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                switch (ot) {
+                    TCGv_i64 t0, t1;
+                default:
+                    t0 = tcg_temp_new_i64();
+                    t1 = tcg_temp_new_i64();
+#ifdef TARGET_X86_64
+                    tcg_gen_ext32u_i64(t0, cpu_T[0]);
+                    tcg_gen_ext32u_i64(t1, cpu_regs[R_EDX]);
+#else
+                    tcg_gen_extu_i32_i64(t0, cpu_T[0]);
+                    tcg_gen_extu_i32_i64(t0, cpu_regs[R_EDX]);
+#endif
+                    tcg_gen_mul_i64(t0, t0, t1);
+                    tcg_gen_trunc_i64_tl(cpu_T[0], t0);
+                    tcg_gen_shri_i64(t0, t0, 32);
+                    tcg_gen_trunc_i64_tl(cpu_T[1], t0);
+                    tcg_temp_free_i64(t0);
+                    tcg_temp_free_i64(t1);
+                    gen_op_mov_reg_T0(OT_LONG, s->vex_v);
+                    gen_op_mov_reg_T1(OT_LONG, reg);
+                    break;
+#ifdef TARGET_X86_64
+                case OT_QUAD:
+                    tcg_gen_mov_tl(cpu_T[1], cpu_regs[R_EDX]);
+                    tcg_gen_mul_tl(cpu_regs[s->vex_v], cpu_T[0], cpu_T[1]);
+                    gen_helper_umulh(cpu_regs[reg], cpu_T[0], cpu_T[1]);
+                    break;
+#endif
+                }
+                break;
+
             case 0x0f3:
             case 0x1f3:
             case 0x2f3:
commit 02ea1e6b4fab803551bbea47eea29bc7709ba008
Author: Richard Henderson <rth at twiddle.net>
Date:   Wed Jan 23 17:01:10 2013 -0800

    target-i386: Implement BZHI
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>

diff --git a/target-i386/translate.c b/target-i386/translate.c
index 2322d5c..2bb8d9f 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -4072,6 +4072,33 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 }
                 break;
 
+            case 0x0f5: /* bzhi Gy, Ey, By */
+                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
+                    || !(s->prefix & PREFIX_VEX)
+                    || s->vex_l != 0) {
+                    goto illegal_op;
+                }
+                ot = s->dflag == 2 ? OT_QUAD : OT_LONG;
+                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                tcg_gen_ext8u_tl(cpu_T[1], cpu_regs[s->vex_v]);
+                {
+                    TCGv bound = tcg_const_tl(ot == OT_QUAD ? 63 : 31);
+                    /* Note that since we're using BMILG (in order to get O
+                       cleared) we need to store the inverse into C.  */
+                    tcg_gen_setcond_tl(TCG_COND_LT, cpu_cc_src,
+                                       cpu_T[1], bound);
+                    tcg_gen_movcond_tl(TCG_COND_GT, cpu_T[1], cpu_T[1],
+                                       bound, bound, cpu_T[1]);
+                    tcg_temp_free(bound);
+                }
+                tcg_gen_movi_tl(cpu_A0, -1);
+                tcg_gen_shl_tl(cpu_A0, cpu_A0, cpu_T[1]);
+                tcg_gen_andc_tl(cpu_T[0], cpu_T[0], cpu_A0);
+                gen_op_mov_reg_T0(ot, reg);
+                gen_op_update1_cc();
+                set_cc_op(s, CC_OP_BMILGB + ot);
+                break;
+
             case 0x0f3:
             case 0x1f3:
             case 0x2f3:
commit bc4b43dc2fe88712ad921c05fc1ab9ebc4cb6778
Author: Richard Henderson <rth at twiddle.net>
Date:   Wed Jan 23 16:44:37 2013 -0800

    target-i386: Implement BLSR, BLSMSK, BLSI
    
    Do all of group 17 at one time for ease.
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>

diff --git a/target-i386/cc_helper.c b/target-i386/cc_helper.c
index 218a9b5..5ea6a0a 100644
--- a/target-i386/cc_helper.c
+++ b/target-i386/cc_helper.c
@@ -155,6 +155,13 @@ target_ulong helper_cc_compute_all(target_ulong dst, target_ulong src1,
     case CC_OP_SARL:
         return compute_all_sarl(dst, src1);
 
+    case CC_OP_BMILGB:
+        return compute_all_bmilgb(dst, src1);
+    case CC_OP_BMILGW:
+        return compute_all_bmilgw(dst, src1);
+    case CC_OP_BMILGL:
+        return compute_all_bmilgl(dst, src1);
+
 #ifdef TARGET_X86_64
     case CC_OP_MULQ:
         return compute_all_mulq(dst, src1);
@@ -176,6 +183,8 @@ target_ulong helper_cc_compute_all(target_ulong dst, target_ulong src1,
         return compute_all_shlq(dst, src1);
     case CC_OP_SARQ:
         return compute_all_sarq(dst, src1);
+    case CC_OP_BMILGQ:
+        return compute_all_bmilgq(dst, src1);
 #endif
     }
 }
@@ -254,6 +263,13 @@ target_ulong helper_cc_compute_c(target_ulong dst, target_ulong src1,
     case CC_OP_SHLL:
         return compute_c_shll(dst, src1);
 
+    case CC_OP_BMILGB:
+        return compute_c_bmilgb(dst, src1);
+    case CC_OP_BMILGW:
+        return compute_c_bmilgw(dst, src1);
+    case CC_OP_BMILGL:
+        return compute_c_bmilgl(dst, src1);
+
 #ifdef TARGET_X86_64
     case CC_OP_ADDQ:
         return compute_c_addq(dst, src1);
@@ -265,6 +281,8 @@ target_ulong helper_cc_compute_c(target_ulong dst, target_ulong src1,
         return compute_c_sbbq(dst, src1, src2);
     case CC_OP_SHLQ:
         return compute_c_shlq(dst, src1);
+    case CC_OP_BMILGQ:
+        return compute_c_bmilgq(dst, src1);
 #endif
     }
 }
diff --git a/target-i386/cc_helper_template.h b/target-i386/cc_helper_template.h
index 87f47d2..607311f 100644
--- a/target-i386/cc_helper_template.h
+++ b/target-i386/cc_helper_template.h
@@ -217,6 +217,24 @@ static int glue(compute_all_mul, SUFFIX)(DATA_TYPE dst, target_long src1)
     return cf | pf | af | zf | sf | of;
 }
 
+static int glue(compute_all_bmilg, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
+{
+    int cf, pf, af, zf, sf, of;
+
+    cf = (src1 == 0);
+    pf = 0; /* undefined */
+    af = 0; /* undefined */
+    zf = (dst == 0) * CC_Z;
+    sf = lshift(dst, 8 - DATA_BITS) & CC_S;
+    of = 0;
+    return cf | pf | af | zf | sf | of;
+}
+
+static int glue(compute_c_bmilg, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
+{
+    return src1 == 0;
+}
+
 #undef DATA_BITS
 #undef SIGN_MASK
 #undef DATA_TYPE
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index 1fa9dc8..960676b 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -636,6 +636,11 @@ typedef enum {
     CC_OP_SARL,
     CC_OP_SARQ,
 
+    CC_OP_BMILGB, /* Z,S via CC_DST, C = SRC==0; O=0; P,A undefined */
+    CC_OP_BMILGW,
+    CC_OP_BMILGL,
+    CC_OP_BMILGQ,
+
     CC_OP_NB,
 } CCOp;
 
diff --git a/target-i386/helper.c b/target-i386/helper.c
index 4bf9db7..74d600f 100644
--- a/target-i386/helper.c
+++ b/target-i386/helper.c
@@ -55,7 +55,7 @@ int cpu_x86_support_mca_broadcast(CPUX86State *env)
 /***********************************************************/
 /* x86 debug */
 
-static const char *cc_op_str[] = {
+static const char *cc_op_str[CC_OP_NB] = {
     "DYNAMIC",
     "EFLAGS",
 
@@ -108,6 +108,11 @@ static const char *cc_op_str[] = {
     "SARW",
     "SARL",
     "SARQ",
+
+    "BMILGB",
+    "BMILGW",
+    "BMILGL",
+    "BMILGQ",
 };
 
 static void
diff --git a/target-i386/translate.c b/target-i386/translate.c
index d742fe3..2322d5c 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -209,6 +209,7 @@ static const uint8_t cc_op_live[CC_OP_NB] = {
     [CC_OP_DECB ... CC_OP_DECQ] = USES_CC_DST | USES_CC_SRC,
     [CC_OP_SHLB ... CC_OP_SHLQ] = USES_CC_DST | USES_CC_SRC,
     [CC_OP_SARB ... CC_OP_SARQ] = USES_CC_DST | USES_CC_SRC,
+    [CC_OP_BMILGB ... CC_OP_BMILGQ] = USES_CC_DST | USES_CC_SRC,
 };
 
 static void set_cc_op(DisasContext *s, CCOp op)
@@ -988,6 +989,11 @@ static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
         return (CCPrepare) { .cond = TCG_COND_NE,
                              .reg = cpu_cc_src, .mask = -1 };
 
+    case CC_OP_BMILGB ... CC_OP_BMILGQ:
+        size = s->cc_op - CC_OP_BMILGB;
+        t0 = gen_ext_tl(reg, cpu_cc_src, size, false);
+        return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
+
     case CC_OP_EFLAGS:
     case CC_OP_SARB ... CC_OP_SARQ:
         /* CC_SRC & 1 */
@@ -4066,6 +4072,48 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 }
                 break;
 
+            case 0x0f3:
+            case 0x1f3:
+            case 0x2f3:
+            case 0x3f3: /* Group 17 */
+                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
+                    || !(s->prefix & PREFIX_VEX)
+                    || s->vex_l != 0) {
+                    goto illegal_op;
+                }
+                ot = s->dflag == 2 ? OT_QUAD : OT_LONG;
+                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+
+                switch (reg & 7) {
+                case 1: /* blsr By,Ey */
+                    tcg_gen_neg_tl(cpu_T[1], cpu_T[0]);
+                    tcg_gen_and_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+                    gen_op_mov_reg_T0(ot, s->vex_v);
+                    gen_op_update2_cc();
+                    set_cc_op(s, CC_OP_BMILGB + ot);
+                    break;
+
+                case 2: /* blsmsk By,Ey */
+                    tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]);
+                    tcg_gen_subi_tl(cpu_T[0], cpu_T[0], 1);
+                    tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_cc_src);
+                    tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+                    set_cc_op(s, CC_OP_BMILGB + ot);
+                    break;
+
+                case 3: /* blsi By, Ey */
+                    tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]);
+                    tcg_gen_subi_tl(cpu_T[0], cpu_T[0], 1);
+                    tcg_gen_and_tl(cpu_T[0], cpu_T[0], cpu_cc_src);
+                    tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+                    set_cc_op(s, CC_OP_BMILGB + ot);
+                    break;
+
+                default:
+                    goto illegal_op;
+                }
+                break;
+
             default:
                 goto illegal_op;
             }
commit c7ab7565bc6d52cc140230aa4d0533d13d89c8b1
Author: Richard Henderson <rth at twiddle.net>
Date:   Wed Jan 23 16:21:33 2013 -0800

    target-i386: Implement BEXTR
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>

diff --git a/target-i386/translate.c b/target-i386/translate.c
index 01ff131..d742fe3 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -4026,6 +4026,46 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 set_cc_op(s, CC_OP_LOGICB + ot);
                 break;
 
+            case 0x0f7: /* bextr Gy, Ey, By */
+                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
+                    || !(s->prefix & PREFIX_VEX)
+                    || s->vex_l != 0) {
+                    goto illegal_op;
+                }
+                ot = s->dflag == 2 ? OT_QUAD : OT_LONG;
+                {
+                    TCGv bound, zero;
+
+                    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                    /* Extract START, and shift the operand.
+                       Shifts larger than operand size get zeros.  */
+                    tcg_gen_ext8u_tl(cpu_A0, cpu_regs[s->vex_v]);
+                    tcg_gen_shr_tl(cpu_T[0], cpu_T[0], cpu_A0);
+
+                    bound = tcg_const_tl(ot == OT_QUAD ? 63 : 31);
+                    zero = tcg_const_tl(0);
+                    tcg_gen_movcond_tl(TCG_COND_LEU, cpu_T[0], cpu_A0, bound,
+                                       cpu_T[0], zero);
+                    tcg_temp_free(zero);
+
+                    /* Extract the LEN into a mask.  Lengths larger than
+                       operand size get all ones.  */
+                    tcg_gen_shri_tl(cpu_A0, cpu_regs[s->vex_v], 8);
+                    tcg_gen_ext8u_tl(cpu_A0, cpu_A0);
+                    tcg_gen_movcond_tl(TCG_COND_LEU, cpu_A0, cpu_A0, bound,
+                                       cpu_A0, bound);
+                    tcg_temp_free(bound);
+                    tcg_gen_movi_tl(cpu_T[1], 1);
+                    tcg_gen_shl_tl(cpu_T[1], cpu_T[1], cpu_A0);
+                    tcg_gen_subi_tl(cpu_T[1], cpu_T[1], 1);
+                    tcg_gen_and_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+
+                    gen_op_mov_reg_T0(ot, reg);
+                    gen_op_update1_cc();
+                    set_cc_op(s, CC_OP_LOGICB + ot);
+                }
+                break;
+
             default:
                 goto illegal_op;
             }
commit 7073fbada733c8d10992f00772c9b9299d740e9b
Author: Richard Henderson <rth at twiddle.net>
Date:   Wed Jan 23 16:17:10 2013 -0800

    target-i386: Implement ANDN
    
    As this is the first of the BMI insns to be implemented,
    this carries quite a bit more baggage than normal.
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>

diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index 0f19533..0cb64ab 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -406,12 +406,12 @@ typedef struct x86_def_t {
 #define TCG_EXT3_FEATURES (CPUID_EXT3_LAHF_LM | CPUID_EXT3_SVM | \
           CPUID_EXT3_CR8LEG | CPUID_EXT3_ABM | CPUID_EXT3_SSE4A)
 #define TCG_SVM_FEATURES 0
-#define TCG_7_0_EBX_FEATURES (CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_SMAP)
+#define TCG_7_0_EBX_FEATURES (CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_SMAP \
+          CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_BMI2)
           /* missing:
-          CPUID_7_0_EBX_FSGSBASE, CPUID_7_0_EBX_BMI1, CPUID_7_0_EBX_HLE,
-          CPUID_7_0_EBX_AVX2, CPUID_7_0_EBX_BMI2, CPUID_7_0_EBX_ERMS,
-          CPUID_7_0_EBX_INVPCID, CPUID_7_0_EBX_RTM, CPUID_7_0_EBX_RDSEED,
-          CPUID_7_0_EBX_ADX */
+          CPUID_7_0_EBX_FSGSBASE, CPUID_7_0_EBX_HLE, CPUID_7_0_EBX_AVX2,
+          CPUID_7_0_EBX_ERMS, CPUID_7_0_EBX_INVPCID, CPUID_7_0_EBX_RTM,
+          CPUID_7_0_EBX_RDSEED, CPUID_7_0_EBX_ADX */
 
 /* built-in CPU model definitions
  */
diff --git a/target-i386/translate.c b/target-i386/translate.c
index 5a91ff1..01ff131 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -2955,8 +2955,9 @@ static const SSEFunc_0_epp sse_op_table1[256][4] = {
     [0xc6] = { (SSEFunc_0_epp)gen_helper_shufps,
                (SSEFunc_0_epp)gen_helper_shufpd }, /* XXX: casts */
 
-    [0x38] = { SSE_SPECIAL, SSE_SPECIAL, NULL, SSE_SPECIAL }, /* SSSE3/SSE4 */
-    [0x3a] = { SSE_SPECIAL, SSE_SPECIAL }, /* SSSE3/SSE4 */
+    /* SSSE3, SSE4, MOVBE, CRC32, BMI1, BMI2, ADX.  */
+    [0x38] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
+    [0x3a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
 
     /* MMX ops and their SSE extensions */
     [0x60] = MMX_OP2(punpcklbw),
@@ -4011,6 +4012,20 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 }
                 break;
 
+            case 0x0f2: /* andn Gy, By, Ey */
+                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
+                    || !(s->prefix & PREFIX_VEX)
+                    || s->vex_l != 0) {
+                    goto illegal_op;
+                }
+                ot = s->dflag == 2 ? OT_QUAD : OT_LONG;
+                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                tcg_gen_andc_tl(cpu_T[0], cpu_regs[s->vex_v], cpu_T[0]);
+                gen_op_mov_reg_T0(ot, reg);
+                gen_op_update1_cc();
+                set_cc_op(s, CC_OP_LOGICB + ot);
+                break;
+
             default:
                 goto illegal_op;
             }
commit 111994ee05b810d81dc6abea7fac5280e48dc198
Author: Richard Henderson <rth at twiddle.net>
Date:   Thu Jan 10 12:06:59 2013 -0800

    target-i386: Implement MOVBE
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>

diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index dfcf86e..0f19533 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -389,10 +389,15 @@ typedef struct x86_def_t {
           CPUID_VME, CPUID_DTS, CPUID_SS, CPUID_HT, CPUID_TM, CPUID_PBE */
 #define TCG_EXT_FEATURES (CPUID_EXT_SSE3 | CPUID_EXT_MONITOR | \
           CPUID_EXT_SSSE3 | CPUID_EXT_CX16 | CPUID_EXT_POPCNT | \
-          CPUID_EXT_HYPERVISOR)
+          CPUID_EXT_MOVBE | CPUID_EXT_HYPERVISOR)
           /* missing:
-          CPUID_EXT_DTES64, CPUID_EXT_DSCPL, CPUID_EXT_VMX, CPUID_EXT_EST,
-          CPUID_EXT_TM2, CPUID_EXT_XTPR, CPUID_EXT_PDCM, CPUID_EXT_XSAVE */
+          CPUID_EXT_PCLMULQDQ, CPUID_EXT_DTES64, CPUID_EXT_DSCPL,
+          CPUID_EXT_VMX, CPUID_EXT_SMX, CPUID_EXT_EST, CPUID_EXT_TM2,
+          CPUID_EXT_CID, CPUID_EXT_FMA, CPUID_EXT_XTPR, CPUID_EXT_PDCM,
+          CPUID_EXT_PCID, CPUID_EXT_DCA, CPUID_EXT_SSE41, CPUID_EXT_SSE42,
+          CPUID_EXT_X2APIC, CPUID_EXT_TSC_DEADLINE_TIMER, CPUID_EXT_AES,
+          CPUID_EXT_XSAVE, CPUID_EXT_OSXSAVE, CPUID_EXT_AVX,
+          CPUID_EXT_F16C, CPUID_EXT_RDRAND */
 #define TCG_EXT2_FEATURES ((TCG_FEATURES & CPUID_EXT2_AMD_ALIASES) | \
           CPUID_EXT2_NX | CPUID_EXT2_MMXEXT | CPUID_EXT2_RDTSCP | \
           CPUID_EXT2_3DNOW | CPUID_EXT2_3DNOWEXT)
@@ -402,6 +407,11 @@ typedef struct x86_def_t {
           CPUID_EXT3_CR8LEG | CPUID_EXT3_ABM | CPUID_EXT3_SSE4A)
 #define TCG_SVM_FEATURES 0
 #define TCG_7_0_EBX_FEATURES (CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_SMAP)
+          /* missing:
+          CPUID_7_0_EBX_FSGSBASE, CPUID_7_0_EBX_BMI1, CPUID_7_0_EBX_HLE,
+          CPUID_7_0_EBX_AVX2, CPUID_7_0_EBX_BMI2, CPUID_7_0_EBX_ERMS,
+          CPUID_7_0_EBX_INVPCID, CPUID_7_0_EBX_RTM, CPUID_7_0_EBX_RDSEED,
+          CPUID_7_0_EBX_ADX */
 
 /* built-in CPU model definitions
  */
diff --git a/target-i386/translate.c b/target-i386/translate.c
index f824b99..5a91ff1 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -3837,11 +3837,13 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             reg = ((modrm >> 3) & 7) | rex_r;
             gen_op_mov_reg_T0(OT_LONG, reg);
             break;
+
         case 0x138:
-            if (s->prefix & PREFIX_REPNZ)
-                goto crc32;
         case 0x038:
             b = modrm;
+            if ((b & 0xf0) == 0xf0) {
+                goto do_0f_38_fx;
+            }
             modrm = cpu_ldub_code(env, s->pc++);
             rm = modrm & 7;
             reg = ((modrm >> 3) & 7) | rex_r;
@@ -3914,36 +3916,106 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 set_cc_op(s, CC_OP_EFLAGS);
             }
             break;
-        case 0x338: /* crc32 */
-        crc32:
-            b = modrm;
+
+        case 0x238:
+        case 0x338:
+        do_0f_38_fx:
+            /* Various integer extensions at 0f 38 f[0-f].  */
+            b = modrm | (b1 << 8);
             modrm = cpu_ldub_code(env, s->pc++);
             reg = ((modrm >> 3) & 7) | rex_r;
 
-            if (b != 0xf0 && b != 0xf1)
-                goto illegal_op;
-            if (!(s->cpuid_ext_features & CPUID_EXT_SSE42))
-                goto illegal_op;
+            switch (b) {
+            case 0x3f0: /* crc32 Gd,Eb */
+            case 0x3f1: /* crc32 Gd,Ey */
+            do_crc32:
+                if (!(s->cpuid_ext_features & CPUID_EXT_SSE42)) {
+                    goto illegal_op;
+                }
+                if ((b & 0xff) == 0xf0) {
+                    ot = OT_BYTE;
+                } else if (s->dflag != 2) {
+                    ot = (s->prefix & PREFIX_DATA ? OT_WORD : OT_LONG);
+                } else {
+                    ot = OT_QUAD;
+                }
 
-            if (b == 0xf0)
-                ot = OT_BYTE;
-            else if (b == 0xf1 && s->dflag != 2)
-                if (s->prefix & PREFIX_DATA)
-                    ot = OT_WORD;
-                else
-                    ot = OT_LONG;
-            else
-                ot = OT_QUAD;
+                gen_op_mov_TN_reg(OT_LONG, 0, reg);
+                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                gen_helper_crc32(cpu_T[0], cpu_tmp2_i32,
+                                 cpu_T[0], tcg_const_i32(8 << ot));
 
-            gen_op_mov_TN_reg(OT_LONG, 0, reg);
-            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
-            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
-            gen_helper_crc32(cpu_T[0], cpu_tmp2_i32,
-                             cpu_T[0], tcg_const_i32(8 << ot));
+                ot = (s->dflag == 2) ? OT_QUAD : OT_LONG;
+                gen_op_mov_reg_T0(ot, reg);
+                break;
 
-            ot = (s->dflag == 2) ? OT_QUAD : OT_LONG;
-            gen_op_mov_reg_T0(ot, reg);
+            case 0x1f0: /* crc32 or movbe */
+            case 0x1f1:
+                /* For these insns, the f3 prefix is supposed to have priority
+                   over the 66 prefix, but that's not what we implement above
+                   setting b1.  */
+                if (s->prefix & PREFIX_REPNZ) {
+                    goto do_crc32;
+                }
+                /* FALLTHRU */
+            case 0x0f0: /* movbe Gy,My */
+            case 0x0f1: /* movbe My,Gy */
+                if (!(s->cpuid_ext_features & CPUID_EXT_MOVBE)) {
+                    goto illegal_op;
+                }
+                if (s->dflag != 2) {
+                    ot = (s->prefix & PREFIX_DATA ? OT_WORD : OT_LONG);
+                } else {
+                    ot = OT_QUAD;
+                }
+
+                /* Load the data incoming to the bswap.  Note that the TCG
+                   implementation of bswap requires the input be zero
+                   extended.  In the case of the loads, we simply know that
+                   gen_op_ld_v via gen_ldst_modrm does that already.  */
+                if ((b & 1) == 0) {
+                    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                } else {
+                    switch (ot) {
+                    case OT_WORD:
+                        tcg_gen_ext16u_tl(cpu_T[0], cpu_regs[reg]);
+                        break;
+                    default:
+                        tcg_gen_ext32u_tl(cpu_T[0], cpu_regs[reg]);
+                        break;
+                    case OT_QUAD:
+                        tcg_gen_mov_tl(cpu_T[0], cpu_regs[reg]);
+                        break;
+                    }
+                }
+
+                switch (ot) {
+                case OT_WORD:
+                    tcg_gen_bswap16_tl(cpu_T[0], cpu_T[0]);
+                    break;
+                default:
+                    tcg_gen_bswap32_tl(cpu_T[0], cpu_T[0]);
+                    break;
+#ifdef TARGET_X86_64
+                case OT_QUAD:
+                    tcg_gen_bswap64_tl(cpu_T[0], cpu_T[0]);
+                    break;
+#endif
+                }
+
+                if ((b & 1) == 0) {
+                    gen_op_mov_reg_T0(ot, reg);
+                } else {
+                    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
+                }
+                break;
+
+            default:
+                goto illegal_op;
+            }
             break;
+
         case 0x03a:
         case 0x13a:
             b = modrm;
commit 701ed211d62b2b0dba732d75997c4bbf37010c1e
Author: Richard Henderson <rth at twiddle.net>
Date:   Fri Jan 11 11:35:02 2013 -0800

    target-i386: Decode the VEX prefixes
    
    No actual required uses of these encodings yet.
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>

diff --git a/target-i386/translate.c b/target-i386/translate.c
index e5cda94..f824b99 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -37,6 +37,7 @@
 #define PREFIX_LOCK   0x04
 #define PREFIX_DATA   0x08
 #define PREFIX_ADR    0x10
+#define PREFIX_VEX    0x20
 
 #ifdef TARGET_X86_64
 #define CODE64(s) ((s)->code64)
@@ -98,6 +99,8 @@ typedef struct DisasContext {
     int code64; /* 64 bit code segment */
     int rex_x, rex_b;
 #endif
+    int vex_l;  /* vex vector length */
+    int vex_v;  /* vex vvvv register, without 1's compliment.  */
     int ss32;   /* 32 bit stack segment */
     CCOp cc_op;  /* current CC operation */
     bool cc_op_dirty;
@@ -4264,6 +4267,8 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
     x86_64_hregs = 0;
 #endif
     s->rip_offset = 0; /* for relative ip address */
+    s->vex_l = 0;
+    s->vex_v = 0;
  next_byte:
     b = cpu_ldub_code(env, s->pc);
     s->pc++;
@@ -4315,6 +4320,63 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         }
         break;
 #endif
+    case 0xc5: /* 2-byte VEX */
+    case 0xc4: /* 3-byte VEX */
+        /* VEX prefixes cannot be used except in 32-bit mode.
+           Otherwise the instruction is LES or LDS.  */
+        if (s->code32 && !s->vm86) {
+            static const int pp_prefix[4] = {
+                0, PREFIX_DATA, PREFIX_REPZ, PREFIX_REPNZ
+            };
+            int vex3, vex2 = cpu_ldub_code(env, s->pc);
+
+            if (!CODE64(s) && (vex2 & 0xc0) != 0xc0) {
+                /* 4.1.4.6: In 32-bit mode, bits [7:6] must be 11b,
+                   otherwise the instruction is LES or LDS.  */
+                break;
+            }
+            s->pc++;
+
+            /* 4.1.1-4.1.3: No preceeding lock, 66, f2, f3, or rex prefixes. */
+            if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ
+                            | PREFIX_LOCK | PREFIX_DATA)) {
+                goto illegal_op;
+            }
+#ifdef TARGET_X86_64
+            if (x86_64_hregs) {
+                goto illegal_op;
+            }
+#endif
+            rex_r = (~vex2 >> 4) & 8;
+            if (b == 0xc5) {
+                vex3 = vex2;
+                b = cpu_ldub_code(env, s->pc++);
+            } else {
+#ifdef TARGET_X86_64
+                s->rex_x = (~vex2 >> 3) & 8;
+                s->rex_b = (~vex2 >> 2) & 8;
+#endif
+                vex3 = cpu_ldub_code(env, s->pc++);
+                rex_w = (vex3 >> 7) & 1;
+                switch (vex2 & 0x1f) {
+                case 0x01: /* Implied 0f leading opcode bytes.  */
+                    b = cpu_ldub_code(env, s->pc++) | 0x100;
+                    break;
+                case 0x02: /* Implied 0f 38 leading opcode bytes.  */
+                    b = 0x138;
+                    break;
+                case 0x03: /* Implied 0f 3a leading opcode bytes.  */
+                    b = 0x13a;
+                    break;
+                default:   /* Reserved for future use.  */
+                    goto illegal_op;
+                }
+            }
+            s->vex_v = (~vex3 >> 3) & 0xf;
+            s->vex_l = (vex3 >> 2) & 1;
+            prefixes |= pp_prefix[vex3 & 3] | PREFIX_VEX;
+        }
+        break;
     }
 
     /* Post-process prefixes.  */
@@ -5461,13 +5523,11 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         }
         break;
     case 0xc4: /* les Gv */
-        if (CODE64(s))
-            goto illegal_op;
+        /* In CODE64 this is VEX3; see above.  */
         op = R_ES;
         goto do_lxx;
     case 0xc5: /* lds Gv */
-        if (CODE64(s))
-            goto illegal_op;
+        /* In CODE64 this is VEX2; see above.  */
         op = R_DS;
         goto do_lxx;
     case 0x1b2: /* lss Gv */
commit 4a6fd938f5457ee161d2acbd9364608a2a68b7a1
Author: Richard Henderson <rth at twiddle.net>
Date:   Thu Jan 10 13:29:23 2013 -0800

    target-i386: Tidy prefix parsing
    
    Avoid duplicating switch statement between 32 and 64-bit modes.
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>

diff --git a/target-i386/translate.c b/target-i386/translate.c
index f667f93..e5cda94 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -4267,44 +4267,44 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
  next_byte:
     b = cpu_ldub_code(env, s->pc);
     s->pc++;
-    /* check prefixes */
+    /* Collect prefixes.  */
+    switch (b) {
+    case 0xf3:
+        prefixes |= PREFIX_REPZ;
+        goto next_byte;
+    case 0xf2:
+        prefixes |= PREFIX_REPNZ;
+        goto next_byte;
+    case 0xf0:
+        prefixes |= PREFIX_LOCK;
+        goto next_byte;
+    case 0x2e:
+        s->override = R_CS;
+        goto next_byte;
+    case 0x36:
+        s->override = R_SS;
+        goto next_byte;
+    case 0x3e:
+        s->override = R_DS;
+        goto next_byte;
+    case 0x26:
+        s->override = R_ES;
+        goto next_byte;
+    case 0x64:
+        s->override = R_FS;
+        goto next_byte;
+    case 0x65:
+        s->override = R_GS;
+        goto next_byte;
+    case 0x66:
+        prefixes |= PREFIX_DATA;
+        goto next_byte;
+    case 0x67:
+        prefixes |= PREFIX_ADR;
+        goto next_byte;
 #ifdef TARGET_X86_64
-    if (CODE64(s)) {
-        switch (b) {
-        case 0xf3:
-            prefixes |= PREFIX_REPZ;
-            goto next_byte;
-        case 0xf2:
-            prefixes |= PREFIX_REPNZ;
-            goto next_byte;
-        case 0xf0:
-            prefixes |= PREFIX_LOCK;
-            goto next_byte;
-        case 0x2e:
-            s->override = R_CS;
-            goto next_byte;
-        case 0x36:
-            s->override = R_SS;
-            goto next_byte;
-        case 0x3e:
-            s->override = R_DS;
-            goto next_byte;
-        case 0x26:
-            s->override = R_ES;
-            goto next_byte;
-        case 0x64:
-            s->override = R_FS;
-            goto next_byte;
-        case 0x65:
-            s->override = R_GS;
-            goto next_byte;
-        case 0x66:
-            prefixes |= PREFIX_DATA;
-            goto next_byte;
-        case 0x67:
-            prefixes |= PREFIX_ADR;
-            goto next_byte;
-        case 0x40 ... 0x4f:
+    case 0x40 ... 0x4f:
+        if (CODE64(s)) {
             /* REX prefix */
             rex_w = (b >> 3) & 1;
             rex_r = (b & 0x4) << 1;
@@ -4313,58 +4313,28 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             x86_64_hregs = 1; /* select uniform byte register addressing */
             goto next_byte;
         }
+        break;
+#endif
+    }
+
+    /* Post-process prefixes.  */
+    if (prefixes & PREFIX_DATA) {
+        dflag ^= 1;
+    }
+    if (prefixes & PREFIX_ADR) {
+        aflag ^= 1;
+    }
+#ifdef TARGET_X86_64
+    if (CODE64(s)) {
         if (rex_w == 1) {
             /* 0x66 is ignored if rex.w is set */
             dflag = 2;
-        } else {
-            if (prefixes & PREFIX_DATA)
-                dflag ^= 1;
         }
-        if (!(prefixes & PREFIX_ADR))
+        if (!(prefixes & PREFIX_ADR)) {
             aflag = 2;
-    } else
-#endif
-    {
-        switch (b) {
-        case 0xf3:
-            prefixes |= PREFIX_REPZ;
-            goto next_byte;
-        case 0xf2:
-            prefixes |= PREFIX_REPNZ;
-            goto next_byte;
-        case 0xf0:
-            prefixes |= PREFIX_LOCK;
-            goto next_byte;
-        case 0x2e:
-            s->override = R_CS;
-            goto next_byte;
-        case 0x36:
-            s->override = R_SS;
-            goto next_byte;
-        case 0x3e:
-            s->override = R_DS;
-            goto next_byte;
-        case 0x26:
-            s->override = R_ES;
-            goto next_byte;
-        case 0x64:
-            s->override = R_FS;
-            goto next_byte;
-        case 0x65:
-            s->override = R_GS;
-            goto next_byte;
-        case 0x66:
-            prefixes |= PREFIX_DATA;
-            goto next_byte;
-        case 0x67:
-            prefixes |= PREFIX_ADR;
-            goto next_byte;
         }
-        if (prefixes & PREFIX_DATA)
-            dflag ^= 1;
-        if (prefixes & PREFIX_ADR)
-            aflag ^= 1;
     }
+#endif
 
     s->prefix = prefixes;
     s->aflag = aflag;
commit 988c3eb0d6f41ac13f4ec145c637f12c776de602
Author: Richard Henderson <rth at twiddle.net>
Date:   Wed Jan 23 16:03:16 2013 -0800

    target-i386: Use CC_SRC2 for ADC and SBB
    
    Add another slot in ENV and store two of the three inputs.  This lets us
    do less work when carry-out is not needed, and avoids the unpredictable
    CC_OP after translating these insns.
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>

diff --git a/target-i386/cc_helper.c b/target-i386/cc_helper.c
index a5d8181..218a9b5 100644
--- a/target-i386/cc_helper.c
+++ b/target-i386/cc_helper.c
@@ -75,7 +75,8 @@ const uint8_t parity_table[256] = {
 
 #endif
 
-target_ulong helper_cc_compute_all(target_ulong dst, target_ulong src1, int op)
+target_ulong helper_cc_compute_all(target_ulong dst, target_ulong src1,
+                                   target_ulong src2, int op)
 {
     switch (op) {
     default: /* should never happen */
@@ -99,11 +100,11 @@ target_ulong helper_cc_compute_all(target_ulong dst, target_ulong src1, int op)
         return compute_all_addl(dst, src1);
 
     case CC_OP_ADCB:
-        return compute_all_adcb(dst, src1);
+        return compute_all_adcb(dst, src1, src2);
     case CC_OP_ADCW:
-        return compute_all_adcw(dst, src1);
+        return compute_all_adcw(dst, src1, src2);
     case CC_OP_ADCL:
-        return compute_all_adcl(dst, src1);
+        return compute_all_adcl(dst, src1, src2);
 
     case CC_OP_SUBB:
         return compute_all_subb(dst, src1);
@@ -113,11 +114,11 @@ target_ulong helper_cc_compute_all(target_ulong dst, target_ulong src1, int op)
         return compute_all_subl(dst, src1);
 
     case CC_OP_SBBB:
-        return compute_all_sbbb(dst, src1);
+        return compute_all_sbbb(dst, src1, src2);
     case CC_OP_SBBW:
-        return compute_all_sbbw(dst, src1);
+        return compute_all_sbbw(dst, src1, src2);
     case CC_OP_SBBL:
-        return compute_all_sbbl(dst, src1);
+        return compute_all_sbbl(dst, src1, src2);
 
     case CC_OP_LOGICB:
         return compute_all_logicb(dst, src1);
@@ -160,11 +161,11 @@ target_ulong helper_cc_compute_all(target_ulong dst, target_ulong src1, int op)
     case CC_OP_ADDQ:
         return compute_all_addq(dst, src1);
     case CC_OP_ADCQ:
-        return compute_all_adcq(dst, src1);
+        return compute_all_adcq(dst, src1, src2);
     case CC_OP_SUBQ:
         return compute_all_subq(dst, src1);
     case CC_OP_SBBQ:
-        return compute_all_sbbq(dst, src1);
+        return compute_all_sbbq(dst, src1, src2);
     case CC_OP_LOGICQ:
         return compute_all_logicq(dst, src1);
     case CC_OP_INCQ:
@@ -181,10 +182,11 @@ target_ulong helper_cc_compute_all(target_ulong dst, target_ulong src1, int op)
 
 uint32_t cpu_cc_compute_all(CPUX86State *env, int op)
 {
-    return helper_cc_compute_all(CC_DST, CC_SRC, op);
+    return helper_cc_compute_all(CC_DST, CC_SRC, CC_SRC2, op);
 }
 
-target_ulong helper_cc_compute_c(target_ulong dst, target_ulong src1, int op)
+target_ulong helper_cc_compute_c(target_ulong dst, target_ulong src1,
+                                 target_ulong src2, int op)
 {
     switch (op) {
     default: /* should never happen */
@@ -225,11 +227,11 @@ target_ulong helper_cc_compute_c(target_ulong dst, target_ulong src1, int op)
         return compute_c_addl(dst, src1);
 
     case CC_OP_ADCB:
-        return compute_c_adcb(dst, src1);
+        return compute_c_adcb(dst, src1, src2);
     case CC_OP_ADCW:
-        return compute_c_adcw(dst, src1);
+        return compute_c_adcw(dst, src1, src2);
     case CC_OP_ADCL:
-        return compute_c_adcl(dst, src1);
+        return compute_c_adcl(dst, src1, src2);
 
     case CC_OP_SUBB:
         return compute_c_subb(dst, src1);
@@ -239,11 +241,11 @@ target_ulong helper_cc_compute_c(target_ulong dst, target_ulong src1, int op)
         return compute_c_subl(dst, src1);
 
     case CC_OP_SBBB:
-        return compute_c_sbbb(dst, src1);
+        return compute_c_sbbb(dst, src1, src2);
     case CC_OP_SBBW:
-        return compute_c_sbbw(dst, src1);
+        return compute_c_sbbw(dst, src1, src2);
     case CC_OP_SBBL:
-        return compute_c_sbbl(dst, src1);
+        return compute_c_sbbl(dst, src1, src2);
 
     case CC_OP_SHLB:
         return compute_c_shlb(dst, src1);
@@ -256,11 +258,11 @@ target_ulong helper_cc_compute_c(target_ulong dst, target_ulong src1, int op)
     case CC_OP_ADDQ:
         return compute_c_addq(dst, src1);
     case CC_OP_ADCQ:
-        return compute_c_adcq(dst, src1);
+        return compute_c_adcq(dst, src1, src2);
     case CC_OP_SUBQ:
         return compute_c_subq(dst, src1);
     case CC_OP_SBBQ:
-        return compute_c_sbbq(dst, src1);
+        return compute_c_sbbq(dst, src1, src2);
     case CC_OP_SHLQ:
         return compute_c_shlq(dst, src1);
 #endif
diff --git a/target-i386/cc_helper_template.h b/target-i386/cc_helper_template.h
index 522b462..87f47d2 100644
--- a/target-i386/cc_helper_template.h
+++ b/target-i386/cc_helper_template.h
@@ -58,12 +58,13 @@ static int glue(compute_c_add, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
     return dst < src1;
 }
 
-static int glue(compute_all_adc, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
+static int glue(compute_all_adc, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1,
+                                         DATA_TYPE src3)
 {
     int cf, pf, af, zf, sf, of;
-    DATA_TYPE src2 = dst - src1 - 1;
+    DATA_TYPE src2 = dst - src1 - src3;
 
-    cf = dst <= src1;
+    cf = (src3 ? dst <= src1 : dst < src1);
     pf = parity_table[(uint8_t)dst];
     af = (dst ^ src1 ^ src2) & 0x10;
     zf = (dst == 0) << 6;
@@ -72,9 +73,10 @@ static int glue(compute_all_adc, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
     return cf | pf | af | zf | sf | of;
 }
 
-static int glue(compute_c_adc, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
+static int glue(compute_c_adc, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1,
+                                       DATA_TYPE src3)
 {
-    return dst <= src1;
+    return src3 ? dst <= src1 : dst < src1;
 }
 
 static int glue(compute_all_sub, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2)
@@ -98,12 +100,13 @@ static int glue(compute_c_sub, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2)
     return src1 < src2;
 }
 
-static int glue(compute_all_sbb, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2)
+static int glue(compute_all_sbb, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2,
+                                         DATA_TYPE src3)
 {
     int cf, pf, af, zf, sf, of;
-    DATA_TYPE src1 = dst + src2 + 1;
+    DATA_TYPE src1 = dst + src2 + src3;
 
-    cf = src1 <= src2;
+    cf = (src3 ? src1 <= src2 : src1 < src2);
     pf = parity_table[(uint8_t)dst];
     af = (dst ^ src1 ^ src2) & 0x10;
     zf = (dst == 0) << 6;
@@ -112,11 +115,12 @@ static int glue(compute_all_sbb, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2)
     return cf | pf | af | zf | sf | of;
 }
 
-static int glue(compute_c_sbb, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2)
+static int glue(compute_c_sbb, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2,
+                                       DATA_TYPE src3)
 {
-    DATA_TYPE src1 = dst + src2 + 1;
+    DATA_TYPE src1 = dst + src2 + src3;
 
-    return src1 <= src2;
+    return (src3 ? src1 <= src2 : src1 < src2);
 }
 
 static int glue(compute_all_logic, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index 8c4c605..1fa9dc8 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -725,8 +725,9 @@ typedef struct CPUX86State {
                         stored elsewhere */
 
     /* emulator internal eflags handling */
-    target_ulong cc_src;
     target_ulong cc_dst;
+    target_ulong cc_src;
+    target_ulong cc_src2;
     uint32_t cc_op;
     int32_t df; /* D flag : 1 if D = 0, -1 if D = 1 */
     uint32_t hflags; /* TB flags, see HF_xxx constants. These flags
@@ -1116,9 +1117,10 @@ static inline int cpu_mmu_index (CPUX86State *env)
 #define EIP (env->eip)
 #define DF  (env->df)
 
-#define CC_SRC (env->cc_src)
-#define CC_DST (env->cc_dst)
-#define CC_OP  (env->cc_op)
+#define CC_DST  (env->cc_dst)
+#define CC_SRC  (env->cc_src)
+#define CC_SRC2 (env->cc_src2)
+#define CC_OP   (env->cc_op)
 
 /* n must be a constant to be efficient */
 static inline target_long lshift(target_long x, int n)
diff --git a/target-i386/helper.h b/target-i386/helper.h
index 901ff73..4c46ab1 100644
--- a/target-i386/helper.h
+++ b/target-i386/helper.h
@@ -1,7 +1,7 @@
 #include "exec/def-helper.h"
 
-DEF_HELPER_FLAGS_3(cc_compute_all, TCG_CALL_NO_RWG_SE, tl, tl, tl, int)
-DEF_HELPER_FLAGS_3(cc_compute_c, TCG_CALL_NO_RWG_SE, tl, tl, tl, int)
+DEF_HELPER_FLAGS_4(cc_compute_all, TCG_CALL_NO_RWG_SE, tl, tl, tl, tl, int)
+DEF_HELPER_FLAGS_4(cc_compute_c, TCG_CALL_NO_RWG_SE, tl, tl, tl, tl, int)
 
 DEF_HELPER_0(lock, void)
 DEF_HELPER_0(unlock, void)
diff --git a/target-i386/translate.c b/target-i386/translate.c
index 5235aff..f667f93 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -61,7 +61,7 @@
 /* global register indexes */
 static TCGv_ptr cpu_env;
 static TCGv cpu_A0;
-static TCGv cpu_cc_src, cpu_cc_dst, cpu_cc_srcT;
+static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2, cpu_cc_srcT;
 static TCGv_i32 cpu_cc_op;
 static TCGv cpu_regs[CPU_NB_REGS];
 /* local temps */
@@ -188,18 +188,19 @@ enum {
 enum {
     USES_CC_DST  = 1,
     USES_CC_SRC  = 2,
-    USES_CC_SRCT = 4,
+    USES_CC_SRC2 = 4,
+    USES_CC_SRCT = 8,
 };
 
 /* Bit set if the global variable is live after setting CC_OP to X.  */
 static const uint8_t cc_op_live[CC_OP_NB] = {
-    [CC_OP_DYNAMIC] = USES_CC_DST | USES_CC_SRC,
+    [CC_OP_DYNAMIC] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
     [CC_OP_EFLAGS] = USES_CC_SRC,
     [CC_OP_MULB ... CC_OP_MULQ] = USES_CC_DST | USES_CC_SRC,
     [CC_OP_ADDB ... CC_OP_ADDQ] = USES_CC_DST | USES_CC_SRC,
-    [CC_OP_ADCB ... CC_OP_ADCQ] = USES_CC_DST | USES_CC_SRC,
+    [CC_OP_ADCB ... CC_OP_ADCQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
     [CC_OP_SUBB ... CC_OP_SUBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRCT,
-    [CC_OP_SBBB ... CC_OP_SBBQ] = USES_CC_DST | USES_CC_SRC,
+    [CC_OP_SBBB ... CC_OP_SBBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
     [CC_OP_LOGICB ... CC_OP_LOGICQ] = USES_CC_DST,
     [CC_OP_INCB ... CC_OP_INCQ] = USES_CC_DST | USES_CC_SRC,
     [CC_OP_DECB ... CC_OP_DECQ] = USES_CC_DST | USES_CC_SRC,
@@ -223,6 +224,9 @@ static void set_cc_op(DisasContext *s, CCOp op)
     if (dead & USES_CC_SRC) {
         tcg_gen_discard_tl(cpu_cc_src);
     }
+    if (dead & USES_CC_SRC2) {
+        tcg_gen_discard_tl(cpu_cc_src2);
+    }
     if (dead & USES_CC_SRCT) {
         tcg_gen_discard_tl(cpu_cc_srcT);
     }
@@ -867,6 +871,13 @@ static void gen_op_update2_cc(void)
     tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
 }
 
+static void gen_op_update3_cc(TCGv reg)
+{
+    tcg_gen_mov_tl(cpu_cc_src2, reg);
+    tcg_gen_mov_tl(cpu_cc_src, cpu_T[1]);
+    tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+}
+
 static inline void gen_op_testl_T0_T1_cc(void)
 {
     tcg_gen_and_tl(cpu_cc_dst, cpu_T[0], cpu_T[1]);
@@ -882,7 +893,7 @@ static void gen_op_update_neg_cc(void)
 /* compute all eflags to cc_src */
 static void gen_compute_eflags(DisasContext *s)
 {
-    TCGv zero, dst, src1;
+    TCGv zero, dst, src1, src2;
     int live, dead;
 
     if (s->cc_op == CC_OP_EFLAGS) {
@@ -892,10 +903,11 @@ static void gen_compute_eflags(DisasContext *s)
     TCGV_UNUSED(zero);
     dst = cpu_cc_dst;
     src1 = cpu_cc_src;
+    src2 = cpu_cc_src2;
 
     /* Take care to not read values that are not live.  */
     live = cc_op_live[s->cc_op] & ~USES_CC_SRCT;
-    dead = live ^ (USES_CC_DST | USES_CC_SRC);
+    dead = live ^ (USES_CC_DST | USES_CC_SRC | USES_CC_SRC2);
     if (dead) {
         zero = tcg_const_tl(0);
         if (dead & USES_CC_DST) {
@@ -904,10 +916,13 @@ static void gen_compute_eflags(DisasContext *s)
         if (dead & USES_CC_SRC) {
             src1 = zero;
         }
+        if (dead & USES_CC_SRC2) {
+            src2 = zero;
+        }
     }
 
     gen_update_cc_op(s);
-    gen_helper_cc_compute_all(cpu_cc_src, dst, src1, cpu_cc_op);
+    gen_helper_cc_compute_all(cpu_cc_src, dst, src1, src2, cpu_cc_op);
     set_cc_op(s, CC_OP_EFLAGS);
 
     if (dead) {
@@ -951,30 +966,6 @@ static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
         return (CCPrepare) { .cond = TCG_COND_LTU, .reg = t0,
                              .reg2 = t1, .mask = -1, .use_reg2 = true };
 
-    case CC_OP_SBBB ... CC_OP_SBBQ:
-        /* (DATA_TYPE)(CC_DST + CC_SRC + 1) <= (DATA_TYPE)CC_SRC */
-        size = s->cc_op - CC_OP_SBBB;
-        t1 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
-        if (TCGV_EQUAL(t1, reg) && TCGV_EQUAL(reg, cpu_cc_src)) {
-            tcg_gen_mov_tl(cpu_tmp0, cpu_cc_src);
-            t1 = cpu_tmp0;
-        }
-
-        tcg_gen_add_tl(reg, cpu_cc_dst, cpu_cc_src);
-        tcg_gen_addi_tl(reg, reg, 1);
-        gen_extu(size, reg);
-        t0 = reg;
-        goto adc_sbb;
-
-    case CC_OP_ADCB ... CC_OP_ADCQ:
-        /* (DATA_TYPE)CC_DST <= (DATA_TYPE)CC_SRC */
-        size = s->cc_op - CC_OP_ADCB;
-        t1 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
-        t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
-    adc_sbb:
-        return (CCPrepare) { .cond = TCG_COND_LEU, .reg = t0,
-                             .reg2 = t1, .mask = -1, .use_reg2 = true };
-
     case CC_OP_LOGICB ... CC_OP_LOGICQ:
         return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
 
@@ -1004,7 +995,8 @@ static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
        /* The need to compute only C from CC_OP_DYNAMIC is important
           in efficiently implementing e.g. INC at the start of a TB.  */
        gen_update_cc_op(s);
-       gen_helper_cc_compute_c(reg, cpu_cc_dst, cpu_cc_src, cpu_cc_op);
+       gen_helper_cc_compute_c(reg, cpu_cc_dst, cpu_cc_src,
+                               cpu_cc_src2, cpu_cc_op);
        return (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
                             .mask = -1, .no_setcond = true };
     }
@@ -1442,18 +1434,10 @@ static void gen_op(DisasContext *s1, int op, int ot, int d)
             gen_op_mov_reg_T0(ot, d);
         else
             gen_op_st_T0_A0(ot + s1->mem_index);
-        tcg_gen_mov_tl(cpu_cc_src, cpu_T[1]);
-        tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
-        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_tmp4);
-        tcg_gen_shli_i32(cpu_tmp2_i32, cpu_tmp2_i32, 2);
-        tcg_gen_addi_i32(cpu_cc_op, cpu_tmp2_i32, CC_OP_ADDB + ot);
-        set_cc_op(s1, CC_OP_DYNAMIC);
+        gen_op_update3_cc(cpu_tmp4);
+        set_cc_op(s1, CC_OP_ADCB + ot);
         break;
     case OP_SBBL:
-        /*
-         * No need to store cpu_cc_srcT, because it is used only
-         * when the cc_op is known.
-         */
         gen_compute_eflags_c(s1, cpu_tmp4);
         tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
         tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_tmp4);
@@ -1461,12 +1445,8 @@ static void gen_op(DisasContext *s1, int op, int ot, int d)
             gen_op_mov_reg_T0(ot, d);
         else
             gen_op_st_T0_A0(ot + s1->mem_index);
-        tcg_gen_mov_tl(cpu_cc_src, cpu_T[1]);
-        tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
-        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_tmp4);
-        tcg_gen_shli_i32(cpu_tmp2_i32, cpu_tmp2_i32, 2);
-        tcg_gen_addi_i32(cpu_cc_op, cpu_tmp2_i32, CC_OP_SUBB + ot);
-        set_cc_op(s1, CC_OP_DYNAMIC);
+        gen_op_update3_cc(cpu_tmp4);
+        set_cc_op(s1, CC_OP_SBBB + ot);
         break;
     case OP_ADDL:
         gen_op_addl_T0_T1();
@@ -7788,6 +7768,8 @@ void optimize_flags_init(void)
                                     "cc_dst");
     cpu_cc_src = tcg_global_mem_new(TCG_AREG0, offsetof(CPUX86State, cc_src),
                                     "cc_src");
+    cpu_cc_src2 = tcg_global_mem_new(TCG_AREG0, offsetof(CPUX86State, cc_src2),
+                                     "cc_src2");
 
 #ifdef TARGET_X86_64
     cpu_regs[R_EAX] = tcg_global_mem_new_i64(TCG_AREG0,
commit db9f2597722d5d8bc5f2330f186288d893114338
Author: Richard Henderson <rth at twiddle.net>
Date:   Wed Jan 23 16:10:49 2013 -0800

    target-i386: Make helper_cc_compute_{all,c} const
    
    Pass the data in explicitly, rather than indirectly via env.
    This avoids all sorts of unnecessary register spillage.
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>

diff --git a/target-i386/cc_helper.c b/target-i386/cc_helper.c
index 61427dd..a5d8181 100644
--- a/target-i386/cc_helper.c
+++ b/target-i386/cc_helper.c
@@ -75,10 +75,8 @@ const uint8_t parity_table[256] = {
 
 #endif
 
-uint32_t helper_cc_compute_all(CPUX86State *env, int op)
+target_ulong helper_cc_compute_all(target_ulong dst, target_ulong src1, int op)
 {
-    target_ulong dst = CC_DST, src1 = CC_SRC;
-
     switch (op) {
     default: /* should never happen */
         return 0;
@@ -183,13 +181,11 @@ uint32_t helper_cc_compute_all(CPUX86State *env, int op)
 
 uint32_t cpu_cc_compute_all(CPUX86State *env, int op)
 {
-    return helper_cc_compute_all(env, op);
+    return helper_cc_compute_all(CC_DST, CC_SRC, op);
 }
 
-uint32_t helper_cc_compute_c(CPUX86State *env, int op)
+target_ulong helper_cc_compute_c(target_ulong dst, target_ulong src1, int op)
 {
-    target_ulong dst = CC_DST, src1 = CC_SRC;
-
     switch (op) {
     default: /* should never happen */
     case CC_OP_LOGICB:
@@ -281,7 +277,7 @@ target_ulong helper_read_eflags(CPUX86State *env)
 {
     uint32_t eflags;
 
-    eflags = helper_cc_compute_all(env, CC_OP);
+    eflags = cpu_cc_compute_all(env, CC_OP);
     eflags |= (DF & DF_MASK);
     eflags |= env->eflags & ~(VM_MASK | RF_MASK);
     return eflags;
diff --git a/target-i386/helper.h b/target-i386/helper.h
index 9ed720d..901ff73 100644
--- a/target-i386/helper.h
+++ b/target-i386/helper.h
@@ -1,7 +1,7 @@
 #include "exec/def-helper.h"
 
-DEF_HELPER_FLAGS_2(cc_compute_all, TCG_CALL_NO_SE, i32, env, int)
-DEF_HELPER_FLAGS_2(cc_compute_c, TCG_CALL_NO_SE, i32, env, int)
+DEF_HELPER_FLAGS_3(cc_compute_all, TCG_CALL_NO_RWG_SE, tl, tl, tl, int)
+DEF_HELPER_FLAGS_3(cc_compute_c, TCG_CALL_NO_RWG_SE, tl, tl, tl, int)
 
 DEF_HELPER_0(lock, void)
 DEF_HELPER_0(unlock, void)
diff --git a/target-i386/translate.c b/target-i386/translate.c
index 31e3442..5235aff 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -882,13 +882,37 @@ static void gen_op_update_neg_cc(void)
 /* compute all eflags to cc_src */
 static void gen_compute_eflags(DisasContext *s)
 {
+    TCGv zero, dst, src1;
+    int live, dead;
+
     if (s->cc_op == CC_OP_EFLAGS) {
         return;
     }
+
+    TCGV_UNUSED(zero);
+    dst = cpu_cc_dst;
+    src1 = cpu_cc_src;
+
+    /* Take care to not read values that are not live.  */
+    live = cc_op_live[s->cc_op] & ~USES_CC_SRCT;
+    dead = live ^ (USES_CC_DST | USES_CC_SRC);
+    if (dead) {
+        zero = tcg_const_tl(0);
+        if (dead & USES_CC_DST) {
+            dst = zero;
+        }
+        if (dead & USES_CC_SRC) {
+            src1 = zero;
+        }
+    }
+
     gen_update_cc_op(s);
-    gen_helper_cc_compute_all(cpu_tmp2_i32, cpu_env, cpu_cc_op);
+    gen_helper_cc_compute_all(cpu_cc_src, dst, src1, cpu_cc_op);
     set_cc_op(s, CC_OP_EFLAGS);
-    tcg_gen_extu_i32_tl(cpu_cc_src, cpu_tmp2_i32);
+
+    if (dead) {
+        tcg_temp_free(zero);
+    }
 }
 
 typedef struct CCPrepare {
@@ -980,8 +1004,7 @@ static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
        /* The need to compute only C from CC_OP_DYNAMIC is important
           in efficiently implementing e.g. INC at the start of a TB.  */
        gen_update_cc_op(s);
-       gen_helper_cc_compute_c(cpu_tmp2_i32, cpu_env, cpu_cc_op);
-       tcg_gen_extu_i32_tl(reg, cpu_tmp2_i32);
+       gen_helper_cc_compute_c(reg, cpu_cc_dst, cpu_cc_src, cpu_cc_op);
        return (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
                             .mask = -1, .no_setcond = true };
     }
commit 8601c0b6c553a018fc62007efa8ac2a71d77f449
Author: Richard Henderson <rth at twiddle.net>
Date:   Wed Jan 23 16:06:38 2013 -0800

    target-i386: Don't reference ENV through most of cc helpers
    
    In preparation for making this a const helper.
    
    By using the proper types in the parameters to the helper functions,
    we get to avoid quite a lot of subsequent casting.
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>

diff --git a/target-i386/cc_helper.c b/target-i386/cc_helper.c
index 9422003..61427dd 100644
--- a/target-i386/cc_helper.c
+++ b/target-i386/cc_helper.c
@@ -75,125 +75,108 @@ const uint8_t parity_table[256] = {
 
 #endif
 
-static int compute_all_eflags(CPUX86State *env)
-{
-    return CC_SRC;
-}
-
-static int compute_c_eflags(CPUX86State *env)
-{
-    return CC_SRC & CC_C;
-}
-
 uint32_t helper_cc_compute_all(CPUX86State *env, int op)
 {
+    target_ulong dst = CC_DST, src1 = CC_SRC;
+
     switch (op) {
     default: /* should never happen */
         return 0;
 
     case CC_OP_EFLAGS:
-        return compute_all_eflags(env);
+        return src1;
 
     case CC_OP_MULB:
-        return compute_all_mulb(env);
+        return compute_all_mulb(dst, src1);
     case CC_OP_MULW:
-        return compute_all_mulw(env);
+        return compute_all_mulw(dst, src1);
     case CC_OP_MULL:
-        return compute_all_mull(env);
+        return compute_all_mull(dst, src1);
 
     case CC_OP_ADDB:
-        return compute_all_addb(env);
+        return compute_all_addb(dst, src1);
     case CC_OP_ADDW:
-        return compute_all_addw(env);
+        return compute_all_addw(dst, src1);
     case CC_OP_ADDL:
-        return compute_all_addl(env);
+        return compute_all_addl(dst, src1);
 
     case CC_OP_ADCB:
-        return compute_all_adcb(env);
+        return compute_all_adcb(dst, src1);
     case CC_OP_ADCW:
-        return compute_all_adcw(env);
+        return compute_all_adcw(dst, src1);
     case CC_OP_ADCL:
-        return compute_all_adcl(env);
+        return compute_all_adcl(dst, src1);
 
     case CC_OP_SUBB:
-        return compute_all_subb(env);
+        return compute_all_subb(dst, src1);
     case CC_OP_SUBW:
-        return compute_all_subw(env);
+        return compute_all_subw(dst, src1);
     case CC_OP_SUBL:
-        return compute_all_subl(env);
+        return compute_all_subl(dst, src1);
 
     case CC_OP_SBBB:
-        return compute_all_sbbb(env);
+        return compute_all_sbbb(dst, src1);
     case CC_OP_SBBW:
-        return compute_all_sbbw(env);
+        return compute_all_sbbw(dst, src1);
     case CC_OP_SBBL:
-        return compute_all_sbbl(env);
+        return compute_all_sbbl(dst, src1);
 
     case CC_OP_LOGICB:
-        return compute_all_logicb(env);
+        return compute_all_logicb(dst, src1);
     case CC_OP_LOGICW:
-        return compute_all_logicw(env);
+        return compute_all_logicw(dst, src1);
     case CC_OP_LOGICL:
-        return compute_all_logicl(env);
+        return compute_all_logicl(dst, src1);
 
     case CC_OP_INCB:
-        return compute_all_incb(env);
+        return compute_all_incb(dst, src1);
     case CC_OP_INCW:
-        return compute_all_incw(env);
+        return compute_all_incw(dst, src1);
     case CC_OP_INCL:
-        return compute_all_incl(env);
+        return compute_all_incl(dst, src1);
 
     case CC_OP_DECB:
-        return compute_all_decb(env);
+        return compute_all_decb(dst, src1);
     case CC_OP_DECW:
-        return compute_all_decw(env);
+        return compute_all_decw(dst, src1);
     case CC_OP_DECL:
-        return compute_all_decl(env);
+        return compute_all_decl(dst, src1);
 
     case CC_OP_SHLB:
-        return compute_all_shlb(env);
+        return compute_all_shlb(dst, src1);
     case CC_OP_SHLW:
-        return compute_all_shlw(env);
+        return compute_all_shlw(dst, src1);
     case CC_OP_SHLL:
-        return compute_all_shll(env);
+        return compute_all_shll(dst, src1);
 
     case CC_OP_SARB:
-        return compute_all_sarb(env);
+        return compute_all_sarb(dst, src1);
     case CC_OP_SARW:
-        return compute_all_sarw(env);
+        return compute_all_sarw(dst, src1);
     case CC_OP_SARL:
-        return compute_all_sarl(env);
+        return compute_all_sarl(dst, src1);
 
 #ifdef TARGET_X86_64
     case CC_OP_MULQ:
-        return compute_all_mulq(env);
-
+        return compute_all_mulq(dst, src1);
     case CC_OP_ADDQ:
-        return compute_all_addq(env);
-
+        return compute_all_addq(dst, src1);
     case CC_OP_ADCQ:
-        return compute_all_adcq(env);
-
+        return compute_all_adcq(dst, src1);
     case CC_OP_SUBQ:
-        return compute_all_subq(env);
-
+        return compute_all_subq(dst, src1);
     case CC_OP_SBBQ:
-        return compute_all_sbbq(env);
-
+        return compute_all_sbbq(dst, src1);
     case CC_OP_LOGICQ:
-        return compute_all_logicq(env);
-
+        return compute_all_logicq(dst, src1);
     case CC_OP_INCQ:
-        return compute_all_incq(env);
-
+        return compute_all_incq(dst, src1);
     case CC_OP_DECQ:
-        return compute_all_decq(env);
-
+        return compute_all_decq(dst, src1);
     case CC_OP_SHLQ:
-        return compute_all_shlq(env);
-
+        return compute_all_shlq(dst, src1);
     case CC_OP_SARQ:
-        return compute_all_sarq(env);
+        return compute_all_sarq(dst, src1);
 #endif
     }
 }
@@ -205,113 +188,85 @@ uint32_t cpu_cc_compute_all(CPUX86State *env, int op)
 
 uint32_t helper_cc_compute_c(CPUX86State *env, int op)
 {
+    target_ulong dst = CC_DST, src1 = CC_SRC;
+
     switch (op) {
     default: /* should never happen */
+    case CC_OP_LOGICB:
+    case CC_OP_LOGICW:
+    case CC_OP_LOGICL:
+    case CC_OP_LOGICQ:
         return 0;
 
     case CC_OP_EFLAGS:
-        return compute_c_eflags(env);
+    case CC_OP_SARB:
+    case CC_OP_SARW:
+    case CC_OP_SARL:
+    case CC_OP_SARQ:
+        return src1 & 1;
+
+    case CC_OP_INCB:
+    case CC_OP_INCW:
+    case CC_OP_INCL:
+    case CC_OP_INCQ:
+    case CC_OP_DECB:
+    case CC_OP_DECW:
+    case CC_OP_DECL:
+    case CC_OP_DECQ:
+        return src1;
 
     case CC_OP_MULB:
-        return compute_c_mull(env);
     case CC_OP_MULW:
-        return compute_c_mull(env);
     case CC_OP_MULL:
-        return compute_c_mull(env);
+    case CC_OP_MULQ:
+        return src1 != 0;
 
     case CC_OP_ADDB:
-        return compute_c_addb(env);
+        return compute_c_addb(dst, src1);
     case CC_OP_ADDW:
-        return compute_c_addw(env);
+        return compute_c_addw(dst, src1);
     case CC_OP_ADDL:
-        return compute_c_addl(env);
+        return compute_c_addl(dst, src1);
 
     case CC_OP_ADCB:
-        return compute_c_adcb(env);
+        return compute_c_adcb(dst, src1);
     case CC_OP_ADCW:
-        return compute_c_adcw(env);
+        return compute_c_adcw(dst, src1);
     case CC_OP_ADCL:
-        return compute_c_adcl(env);
+        return compute_c_adcl(dst, src1);
 
     case CC_OP_SUBB:
-        return compute_c_subb(env);
+        return compute_c_subb(dst, src1);
     case CC_OP_SUBW:
-        return compute_c_subw(env);
+        return compute_c_subw(dst, src1);
     case CC_OP_SUBL:
-        return compute_c_subl(env);
+        return compute_c_subl(dst, src1);
 
     case CC_OP_SBBB:
-        return compute_c_sbbb(env);
+        return compute_c_sbbb(dst, src1);
     case CC_OP_SBBW:
-        return compute_c_sbbw(env);
+        return compute_c_sbbw(dst, src1);
     case CC_OP_SBBL:
-        return compute_c_sbbl(env);
-
-    case CC_OP_LOGICB:
-        return compute_c_logicb();
-    case CC_OP_LOGICW:
-        return compute_c_logicw();
-    case CC_OP_LOGICL:
-        return compute_c_logicl();
-
-    case CC_OP_INCB:
-        return compute_c_incl(env);
-    case CC_OP_INCW:
-        return compute_c_incl(env);
-    case CC_OP_INCL:
-        return compute_c_incl(env);
-
-    case CC_OP_DECB:
-        return compute_c_incl(env);
-    case CC_OP_DECW:
-        return compute_c_incl(env);
-    case CC_OP_DECL:
-        return compute_c_incl(env);
+        return compute_c_sbbl(dst, src1);
 
     case CC_OP_SHLB:
-        return compute_c_shlb(env);
+        return compute_c_shlb(dst, src1);
     case CC_OP_SHLW:
-        return compute_c_shlw(env);
+        return compute_c_shlw(dst, src1);
     case CC_OP_SHLL:
-        return compute_c_shll(env);
-
-    case CC_OP_SARB:
-        return compute_c_sarl(env);
-    case CC_OP_SARW:
-        return compute_c_sarl(env);
-    case CC_OP_SARL:
-        return compute_c_sarl(env);
+        return compute_c_shll(dst, src1);
 
 #ifdef TARGET_X86_64
-    case CC_OP_MULQ:
-        return compute_c_mull(env);
-
     case CC_OP_ADDQ:
-        return compute_c_addq(env);
-
+        return compute_c_addq(dst, src1);
     case CC_OP_ADCQ:
-        return compute_c_adcq(env);
-
+        return compute_c_adcq(dst, src1);
     case CC_OP_SUBQ:
-        return compute_c_subq(env);
-
+        return compute_c_subq(dst, src1);
     case CC_OP_SBBQ:
-        return compute_c_sbbq(env);
-
-    case CC_OP_LOGICQ:
-        return compute_c_logicq();
-
-    case CC_OP_INCQ:
-        return compute_c_incl(env);
-
-    case CC_OP_DECQ:
-        return compute_c_incl(env);
-
+        return compute_c_sbbq(dst, src1);
     case CC_OP_SHLQ:
-        return compute_c_shlq(env);
-
-    case CC_OP_SARQ:
-        return compute_c_sarl(env);
+        return compute_c_shlq(dst, src1);
 #endif
     }
 }
diff --git a/target-i386/cc_helper_template.h b/target-i386/cc_helper_template.h
index 1f94e11..522b462 100644
--- a/target-i386/cc_helper_template.h
+++ b/target-i386/cc_helper_template.h
@@ -18,255 +18,198 @@
  */
 
 #define DATA_BITS (1 << (3 + SHIFT))
-#define SIGN_MASK (((target_ulong)1) << (DATA_BITS - 1))
 
 #if DATA_BITS == 8
 #define SUFFIX b
 #define DATA_TYPE uint8_t
-#define DATA_MASK 0xff
 #elif DATA_BITS == 16
 #define SUFFIX w
 #define DATA_TYPE uint16_t
-#define DATA_MASK 0xffff
 #elif DATA_BITS == 32
 #define SUFFIX l
 #define DATA_TYPE uint32_t
-#define DATA_MASK 0xffffffff
 #elif DATA_BITS == 64
 #define SUFFIX q
 #define DATA_TYPE uint64_t
-#define DATA_MASK 0xffffffffffffffffULL
 #else
 #error unhandled operand size
 #endif
 
+#define SIGN_MASK (((DATA_TYPE)1) << (DATA_BITS - 1))
+
 /* dynamic flags computation */
 
-static int glue(compute_all_add, SUFFIX)(CPUX86State *env)
+static int glue(compute_all_add, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
 {
     int cf, pf, af, zf, sf, of;
-    target_long src1, src2;
-
-    src1 = CC_SRC;
-    src2 = CC_DST - CC_SRC;
-    cf = (DATA_TYPE)CC_DST < (DATA_TYPE)src1;
-    pf = parity_table[(uint8_t)CC_DST];
-    af = (CC_DST ^ src1 ^ src2) & 0x10;
-    zf = ((DATA_TYPE)CC_DST == 0) << 6;
-    sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
-    of = lshift((src1 ^ src2 ^ -1) & (src1 ^ CC_DST), 12 - DATA_BITS) & CC_O;
+    DATA_TYPE src2 = dst - src1;
+
+    cf = dst < src1;
+    pf = parity_table[(uint8_t)dst];
+    af = (dst ^ src1 ^ src2) & CC_A;
+    zf = (dst == 0) * CC_Z;
+    sf = lshift(dst, 8 - DATA_BITS) & CC_S;
+    of = lshift((src1 ^ src2 ^ -1) & (src1 ^ dst), 12 - DATA_BITS) & CC_O;
     return cf | pf | af | zf | sf | of;
 }
 
-static int glue(compute_c_add, SUFFIX)(CPUX86State *env)
+static int glue(compute_c_add, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
 {
-    int cf;
-    target_long src1;
-
-    src1 = CC_SRC;
-    cf = (DATA_TYPE)CC_DST < (DATA_TYPE)src1;
-    return cf;
+    return dst < src1;
 }
 
-static int glue(compute_all_adc, SUFFIX)(CPUX86State *env)
+static int glue(compute_all_adc, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
 {
     int cf, pf, af, zf, sf, of;
-    target_long src1, src2;
-
-    src1 = CC_SRC;
-    src2 = CC_DST - CC_SRC - 1;
-    cf = (DATA_TYPE)CC_DST <= (DATA_TYPE)src1;
-    pf = parity_table[(uint8_t)CC_DST];
-    af = (CC_DST ^ src1 ^ src2) & 0x10;
-    zf = ((DATA_TYPE)CC_DST == 0) << 6;
-    sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
-    of = lshift((src1 ^ src2 ^ -1) & (src1 ^ CC_DST), 12 - DATA_BITS) & CC_O;
+    DATA_TYPE src2 = dst - src1 - 1;
+
+    cf = dst <= src1;
+    pf = parity_table[(uint8_t)dst];
+    af = (dst ^ src1 ^ src2) & 0x10;
+    zf = (dst == 0) << 6;
+    sf = lshift(dst, 8 - DATA_BITS) & 0x80;
+    of = lshift((src1 ^ src2 ^ -1) & (src1 ^ dst), 12 - DATA_BITS) & CC_O;
     return cf | pf | af | zf | sf | of;
 }
 
-static int glue(compute_c_adc, SUFFIX)(CPUX86State *env)
+static int glue(compute_c_adc, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
 {
-    int cf;
-    target_long src1;
-
-    src1 = CC_SRC;
-    cf = (DATA_TYPE)CC_DST <= (DATA_TYPE)src1;
-    return cf;
+    return dst <= src1;
 }
 
-static int glue(compute_all_sub, SUFFIX)(CPUX86State *env)
+static int glue(compute_all_sub, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2)
 {
     int cf, pf, af, zf, sf, of;
-    target_long src1, src2;
-
-    src1 = CC_DST + CC_SRC;
-    src2 = CC_SRC;
-    cf = (DATA_TYPE)src1 < (DATA_TYPE)src2;
-    pf = parity_table[(uint8_t)CC_DST];
-    af = (CC_DST ^ src1 ^ src2) & 0x10;
-    zf = ((DATA_TYPE)CC_DST == 0) << 6;
-    sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
-    of = lshift((src1 ^ src2) & (src1 ^ CC_DST), 12 - DATA_BITS) & CC_O;
+    DATA_TYPE src1 = dst + src2;
+
+    cf = src1 < src2;
+    pf = parity_table[(uint8_t)dst];
+    af = (dst ^ src1 ^ src2) & CC_A;
+    zf = (dst == 0) * CC_Z;
+    sf = lshift(dst, 8 - DATA_BITS) & CC_S;
+    of = lshift((src1 ^ src2) & (src1 ^ dst), 12 - DATA_BITS) & CC_O;
     return cf | pf | af | zf | sf | of;
 }
 
-static int glue(compute_c_sub, SUFFIX)(CPUX86State *env)
+static int glue(compute_c_sub, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2)
 {
-    int cf;
-    target_long src1, src2;
+    DATA_TYPE src1 = dst + src2;
 
-    src1 = CC_DST + CC_SRC;
-    src2 = CC_SRC;
-    cf = (DATA_TYPE)src1 < (DATA_TYPE)src2;
-    return cf;
+    return src1 < src2;
 }
 
-static int glue(compute_all_sbb, SUFFIX)(CPUX86State *env)
+static int glue(compute_all_sbb, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2)
 {
     int cf, pf, af, zf, sf, of;
-    target_long src1, src2;
-
-    src1 = CC_DST + CC_SRC + 1;
-    src2 = CC_SRC;
-    cf = (DATA_TYPE)src1 <= (DATA_TYPE)src2;
-    pf = parity_table[(uint8_t)CC_DST];
-    af = (CC_DST ^ src1 ^ src2) & 0x10;
-    zf = ((DATA_TYPE)CC_DST == 0) << 6;
-    sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
-    of = lshift((src1 ^ src2) & (src1 ^ CC_DST), 12 - DATA_BITS) & CC_O;
+    DATA_TYPE src1 = dst + src2 + 1;
+
+    cf = src1 <= src2;
+    pf = parity_table[(uint8_t)dst];
+    af = (dst ^ src1 ^ src2) & 0x10;
+    zf = (dst == 0) << 6;
+    sf = lshift(dst, 8 - DATA_BITS) & 0x80;
+    of = lshift((src1 ^ src2) & (src1 ^ dst), 12 - DATA_BITS) & CC_O;
     return cf | pf | af | zf | sf | of;
 }
 
-static int glue(compute_c_sbb, SUFFIX)(CPUX86State *env)
+static int glue(compute_c_sbb, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2)
 {
-    int cf;
-    target_long src1, src2;
+    DATA_TYPE src1 = dst + src2 + 1;
 
-    src1 = CC_DST + CC_SRC + 1;
-    src2 = CC_SRC;
-    cf = (DATA_TYPE)src1 <= (DATA_TYPE)src2;
-    return cf;
+    return src1 <= src2;
 }
 
-static int glue(compute_all_logic, SUFFIX)(CPUX86State *env)
+static int glue(compute_all_logic, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
 {
     int cf, pf, af, zf, sf, of;
 
     cf = 0;
-    pf = parity_table[(uint8_t)CC_DST];
+    pf = parity_table[(uint8_t)dst];
     af = 0;
-    zf = ((DATA_TYPE)CC_DST == 0) << 6;
-    sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
+    zf = (dst == 0) * CC_Z;
+    sf = lshift(dst, 8 - DATA_BITS) & CC_S;
     of = 0;
     return cf | pf | af | zf | sf | of;
 }
 
-static int glue(compute_c_logic, SUFFIX)(void)
-{
-    return 0;
-}
-
-static int glue(compute_all_inc, SUFFIX)(CPUX86State *env)
+static int glue(compute_all_inc, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
 {
     int cf, pf, af, zf, sf, of;
-    target_long src1, src2;
+    DATA_TYPE src2;
 
-    src1 = CC_DST - 1;
+    cf = src1;
+    src1 = dst - 1;
     src2 = 1;
-    cf = CC_SRC;
-    pf = parity_table[(uint8_t)CC_DST];
-    af = (CC_DST ^ src1 ^ src2) & 0x10;
-    zf = ((DATA_TYPE)CC_DST == 0) << 6;
-    sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
-    of = ((CC_DST & DATA_MASK) == SIGN_MASK) << 11;
+    pf = parity_table[(uint8_t)dst];
+    af = (dst ^ src1 ^ src2) & CC_A;
+    zf = (dst == 0) * CC_Z;
+    sf = lshift(dst, 8 - DATA_BITS) & CC_S;
+    of = (dst == SIGN_MASK) * CC_O;
     return cf | pf | af | zf | sf | of;
 }
 
-#if DATA_BITS == 32
-static int glue(compute_c_inc, SUFFIX)(CPUX86State *env)
-{
-    return CC_SRC;
-}
-#endif
-
-static int glue(compute_all_dec, SUFFIX)(CPUX86State *env)
+static int glue(compute_all_dec, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
 {
     int cf, pf, af, zf, sf, of;
-    target_long src1, src2;
+    DATA_TYPE src2;
 
-    src1 = CC_DST + 1;
+    cf = src1;
+    src1 = dst + 1;
     src2 = 1;
-    cf = CC_SRC;
-    pf = parity_table[(uint8_t)CC_DST];
-    af = (CC_DST ^ src1 ^ src2) & 0x10;
-    zf = ((DATA_TYPE)CC_DST == 0) << 6;
-    sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
-    of = ((CC_DST & DATA_MASK) == ((target_ulong)SIGN_MASK - 1)) << 11;
+    pf = parity_table[(uint8_t)dst];
+    af = (dst ^ src1 ^ src2) & CC_A;
+    zf = (dst == 0) * CC_Z;
+    sf = lshift(dst, 8 - DATA_BITS) & CC_S;
+    of = (dst == SIGN_MASK - 1) * CC_O;
     return cf | pf | af | zf | sf | of;
 }
 
-static int glue(compute_all_shl, SUFFIX)(CPUX86State *env)
+static int glue(compute_all_shl, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
 {
     int cf, pf, af, zf, sf, of;
 
-    cf = (CC_SRC >> (DATA_BITS - 1)) & CC_C;
-    pf = parity_table[(uint8_t)CC_DST];
+    cf = (src1 >> (DATA_BITS - 1)) & CC_C;
+    pf = parity_table[(uint8_t)dst];
     af = 0; /* undefined */
-    zf = ((DATA_TYPE)CC_DST == 0) << 6;
-    sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
-    /* of is defined if shift count == 1 */
-    of = lshift(CC_SRC ^ CC_DST, 12 - DATA_BITS) & CC_O;
+    zf = (dst == 0) * CC_Z;
+    sf = lshift(dst, 8 - DATA_BITS) & CC_S;
+    /* of is defined iff shift count == 1 */
+    of = lshift(src1 ^ dst, 12 - DATA_BITS) & CC_O;
     return cf | pf | af | zf | sf | of;
 }
 
-static int glue(compute_c_shl, SUFFIX)(CPUX86State *env)
+static int glue(compute_c_shl, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
 {
-    return (CC_SRC >> (DATA_BITS - 1)) & CC_C;
+    return (src1 >> (DATA_BITS - 1)) & CC_C;
 }
 
-#if DATA_BITS == 32
-static int glue(compute_c_sar, SUFFIX)(CPUX86State *env)
-{
-    return CC_SRC & 1;
-}
-#endif
-
-static int glue(compute_all_sar, SUFFIX)(CPUX86State *env)
+static int glue(compute_all_sar, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
 {
     int cf, pf, af, zf, sf, of;
 
-    cf = CC_SRC & 1;
-    pf = parity_table[(uint8_t)CC_DST];
+    cf = src1 & 1;
+    pf = parity_table[(uint8_t)dst];
     af = 0; /* undefined */
-    zf = ((DATA_TYPE)CC_DST == 0) << 6;
-    sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
-    /* of is defined if shift count == 1 */
-    of = lshift(CC_SRC ^ CC_DST, 12 - DATA_BITS) & CC_O;
+    zf = (dst == 0) * CC_Z;
+    sf = lshift(dst, 8 - DATA_BITS) & CC_S;
+    /* of is defined iff shift count == 1 */
+    of = lshift(src1 ^ dst, 12 - DATA_BITS) & CC_O;
     return cf | pf | af | zf | sf | of;
 }
 
-#if DATA_BITS == 32
-static int glue(compute_c_mul, SUFFIX)(CPUX86State *env)
-{
-    int cf;
-
-    cf = (CC_SRC != 0);
-    return cf;
-}
-#endif
-
 /* NOTE: we compute the flags like the P4. On olders CPUs, only OF and
-   CF are modified and it is slower to do that. */
-static int glue(compute_all_mul, SUFFIX)(CPUX86State *env)
+   CF are modified and it is slower to do that.  Note as well that we
+   don't truncate SRC1 for computing carry to DATA_TYPE.  */
+static int glue(compute_all_mul, SUFFIX)(DATA_TYPE dst, target_long src1)
 {
     int cf, pf, af, zf, sf, of;
 
-    cf = (CC_SRC != 0);
-    pf = parity_table[(uint8_t)CC_DST];
+    cf = (src1 != 0);
+    pf = parity_table[(uint8_t)dst];
     af = 0; /* undefined */
-    zf = ((DATA_TYPE)CC_DST == 0) << 6;
-    sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
-    of = cf << 11;
+    zf = (dst == 0) * CC_Z;
+    sf = lshift(dst, 8 - DATA_BITS) & CC_S;
+    of = cf * CC_O;
     return cf | pf | af | zf | sf | of;
 }
 
commit a3251186fc6a04d421e9c4b65aa04ec32379ec38
Author: Richard Henderson <rth at twiddle.net>
Date:   Wed Jan 23 15:43:03 2013 -0800

    target-i386: optimize flags checking after sub using CC_SRCT
    
    After a comparison or subtraction, the original value of the LHS will
    currently be reconstructed using an addition.  However, in most cases
    it is already available: store it in a temp-local variable and save 1
    or 2 TCG ops (2 if the result of the addition needs to be extended).
    
    The temp-local can be declared dead as soon as the cc_op changes again,
    or also before the translation block ends because gen_prepare_cc will
    always make a copy before returning it.  All this magic, plus copy
    propagation and dead-code elimination, ensures that the temp local will
    (almost) never be spilled.
    
    Example (cmp $0x21,%rax + jbe):
    
     Before                                     After
    ----------------------------------------------------------------------------
     movi_i64 tmp1,$0x21                        movi_i64 tmp1,$0x21
     movi_i64 cc_src,$0x21                      movi_i64 cc_src,$0x21
     sub_i64 cc_dst,rax,tmp1                    sub_i64 cc_dst,rax,tmp1
     add_i64 tmp7,cc_dst,cc_src
     movi_i32 cc_op,$0x11                       movi_i32 cc_op,$0x11
     brcond_i64 tmp7,cc_src,leu,$0x0            discard loc11
                                                brcond_i64 rax,cc_src,leu,$0x0
    
     Before                                     After
    ----------------------------------------------------------------------------
      mov    (%r14),%rbp                        mov    (%r14),%rbp
      mov    %rbp,%rbx                          mov    %rbp,%rbx
      sub    $0x21,%rbx                         sub    $0x21,%rbx
      lea    0x21(%rbx),%r12
      movl   $0x11,0xa0(%r14)                   movl   $0x11,0xa0(%r14)
      movq   $0x21,0x90(%r14)                   movq   $0x21,0x90(%r14)
      mov    %rbx,0x98(%r14)                    mov    %rbx,0x98(%r14)
      cmp    $0x21,%r12                     |   cmp    $0x21,%rbp
      jbe    ...                                jbe    ...
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Richard Henderson <rth at twiddle.net>

diff --git a/target-i386/translate.c b/target-i386/translate.c
index 7f2d65f..31e3442 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -60,7 +60,8 @@
 
 /* global register indexes */
 static TCGv_ptr cpu_env;
-static TCGv cpu_A0, cpu_cc_src, cpu_cc_dst;
+static TCGv cpu_A0;
+static TCGv cpu_cc_src, cpu_cc_dst, cpu_cc_srcT;
 static TCGv_i32 cpu_cc_op;
 static TCGv cpu_regs[CPU_NB_REGS];
 /* local temps */
@@ -185,8 +186,9 @@ enum {
 };
 
 enum {
-    USES_CC_DST = 1,
-    USES_CC_SRC = 2,
+    USES_CC_DST  = 1,
+    USES_CC_SRC  = 2,
+    USES_CC_SRCT = 4,
 };
 
 /* Bit set if the global variable is live after setting CC_OP to X.  */
@@ -196,7 +198,7 @@ static const uint8_t cc_op_live[CC_OP_NB] = {
     [CC_OP_MULB ... CC_OP_MULQ] = USES_CC_DST | USES_CC_SRC,
     [CC_OP_ADDB ... CC_OP_ADDQ] = USES_CC_DST | USES_CC_SRC,
     [CC_OP_ADCB ... CC_OP_ADCQ] = USES_CC_DST | USES_CC_SRC,
-    [CC_OP_SUBB ... CC_OP_SUBQ] = USES_CC_DST | USES_CC_SRC,
+    [CC_OP_SUBB ... CC_OP_SUBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRCT,
     [CC_OP_SBBB ... CC_OP_SBBQ] = USES_CC_DST | USES_CC_SRC,
     [CC_OP_LOGICB ... CC_OP_LOGICQ] = USES_CC_DST,
     [CC_OP_INCB ... CC_OP_INCQ] = USES_CC_DST | USES_CC_SRC,
@@ -221,6 +223,9 @@ static void set_cc_op(DisasContext *s, CCOp op)
     if (dead & USES_CC_SRC) {
         tcg_gen_discard_tl(cpu_cc_src);
     }
+    if (dead & USES_CC_SRCT) {
+        tcg_gen_discard_tl(cpu_cc_srcT);
+    }
 
     s->cc_op = op;
     /* The DYNAMIC setting is translator only, and should never be
@@ -869,8 +874,9 @@ static inline void gen_op_testl_T0_T1_cc(void)
 
 static void gen_op_update_neg_cc(void)
 {
-    tcg_gen_neg_tl(cpu_cc_src, cpu_T[0]);
     tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+    tcg_gen_neg_tl(cpu_cc_src, cpu_T[0]);
+    tcg_gen_movi_tl(cpu_cc_srcT, 0);
 }
 
 /* compute all eflags to cc_src */
@@ -903,12 +909,12 @@ static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
 
     switch (s->cc_op) {
     case CC_OP_SUBB ... CC_OP_SUBQ:
-        /* (DATA_TYPE)(CC_DST + CC_SRC) < (DATA_TYPE)CC_SRC */
+        /* (DATA_TYPE)CC_SRCT < (DATA_TYPE)CC_SRC */
         size = s->cc_op - CC_OP_SUBB;
         t1 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
         /* If no temporary was used, be careful not to alias t1 and t0.  */
         t0 = TCGV_EQUAL(t1, cpu_cc_src) ? cpu_tmp0 : reg;
-        tcg_gen_add_tl(t0, cpu_cc_dst, cpu_cc_src);
+        tcg_gen_mov_tl(t0, cpu_cc_srcT);
         gen_extu(size, t0);
         goto add_sub;
 
@@ -1052,7 +1058,7 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
         size = s->cc_op - CC_OP_SUBB;
         switch (jcc_op) {
         case JCC_BE:
-            tcg_gen_add_tl(cpu_tmp4, cpu_cc_dst, cpu_cc_src);
+            tcg_gen_mov_tl(cpu_tmp4, cpu_cc_srcT);
             gen_extu(size, cpu_tmp4);
             t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
             cc = (CCPrepare) { .cond = TCG_COND_LEU, .reg = cpu_tmp4,
@@ -1065,7 +1071,7 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
         case JCC_LE:
             cond = TCG_COND_LE;
         fast_jcc_l:
-            tcg_gen_add_tl(cpu_tmp4, cpu_cc_dst, cpu_cc_src);
+            tcg_gen_mov_tl(cpu_tmp4, cpu_cc_srcT);
             gen_exts(size, cpu_tmp4);
             t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, true);
             cc = (CCPrepare) { .cond = cond, .reg = cpu_tmp4,
@@ -1421,6 +1427,10 @@ static void gen_op(DisasContext *s1, int op, int ot, int d)
         set_cc_op(s1, CC_OP_DYNAMIC);
         break;
     case OP_SBBL:
+        /*
+         * No need to store cpu_cc_srcT, because it is used only
+         * when the cc_op is known.
+         */
         gen_compute_eflags_c(s1, cpu_tmp4);
         tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
         tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_tmp4);
@@ -1445,6 +1455,7 @@ static void gen_op(DisasContext *s1, int op, int ot, int d)
         set_cc_op(s1, CC_OP_ADDB + ot);
         break;
     case OP_SUBL:
+        tcg_gen_mov_tl(cpu_cc_srcT, cpu_T[0]);
         tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
         if (d != OR_TMP0)
             gen_op_mov_reg_T0(ot, d);
@@ -1483,6 +1494,7 @@ static void gen_op(DisasContext *s1, int op, int ot, int d)
         break;
     case OP_CMPL:
         tcg_gen_mov_tl(cpu_cc_src, cpu_T[1]);
+        tcg_gen_mov_tl(cpu_cc_srcT, cpu_T[0]);
         tcg_gen_sub_tl(cpu_cc_dst, cpu_T[0], cpu_T[1]);
         set_cc_op(s1, CC_OP_SUBB + ot);
         break;
@@ -2799,8 +2811,9 @@ static void gen_eob(DisasContext *s)
    direct call to the next block may occur */
 static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num)
 {
+    gen_update_cc_op(s);
+    set_cc_op(s, CC_OP_DYNAMIC);
     if (s->jmp_opt) {
-        gen_update_cc_op(s);
         gen_goto_tb(s, tb_num, eip);
         s->is_jmp = DISAS_TB_JUMP;
     } else {
@@ -5017,9 +5030,10 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                 rm = 0; /* avoid warning */
             }
             label1 = gen_new_label();
-            tcg_gen_sub_tl(t2, cpu_regs[R_EAX], t0);
+            tcg_gen_mov_tl(t2, cpu_regs[R_EAX]);
+            gen_extu(ot, t0);
             gen_extu(ot, t2);
-            tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, label1);
+            tcg_gen_brcond_tl(TCG_COND_EQ, t2, t0, label1);
             label2 = gen_new_label();
             if (mod == 3) {
                 gen_op_mov_reg_v(ot, R_EAX, t0);
@@ -5038,7 +5052,8 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             }
             gen_set_label(label2);
             tcg_gen_mov_tl(cpu_cc_src, t0);
-            tcg_gen_mov_tl(cpu_cc_dst, t2);
+            tcg_gen_mov_tl(cpu_cc_srcT, t2);
+            tcg_gen_sub_tl(cpu_cc_dst, t2, t0);
             set_cc_op(s, CC_OP_SUBB + ot);
             tcg_temp_free(t0);
             tcg_temp_free(t1);
@@ -7746,10 +7761,10 @@ void optimize_flags_init(void)
     cpu_env = tcg_global_reg_new_ptr(TCG_AREG0, "env");
     cpu_cc_op = tcg_global_mem_new_i32(TCG_AREG0,
                                        offsetof(CPUX86State, cc_op), "cc_op");
-    cpu_cc_src = tcg_global_mem_new(TCG_AREG0, offsetof(CPUX86State, cc_src),
-                                    "cc_src");
     cpu_cc_dst = tcg_global_mem_new(TCG_AREG0, offsetof(CPUX86State, cc_dst),
                                     "cc_dst");
+    cpu_cc_src = tcg_global_mem_new(TCG_AREG0, offsetof(CPUX86State, cc_src),
+                                    "cc_src");
 
 #ifdef TARGET_X86_64
     cpu_regs[R_EAX] = tcg_global_mem_new_i64(TCG_AREG0,
@@ -7885,6 +7900,7 @@ static inline void gen_intermediate_code_internal(CPUX86State *env,
     cpu_tmp5 = tcg_temp_new();
     cpu_ptr0 = tcg_temp_new_ptr();
     cpu_ptr1 = tcg_temp_new_ptr();
+    cpu_cc_srcT = tcg_temp_local_new();
 
     gen_opc_end = tcg_ctx.gen_opc_buf + OPC_MAX_SIZE;
 
commit 891a5133f1637296c3823229180b5851132ed5f5
Author: Richard Henderson <rth at twiddle.net>
Date:   Fri Jan 18 10:06:55 2013 -0800

    target-i386: Update cc_op before TCG branches
    
    Placing the CC_OP_DYNAMIC at the join is less effective than
    before the branch, as the branch will have forced global registers
    to their home locations.  This way we have a chance to discard
    CC_SRC2 before it gets stored.
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>

diff --git a/target-i386/translate.c b/target-i386/translate.c
index 948a048..7f2d65f 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -1559,8 +1559,9 @@ static void gen_shift_rm_T1(DisasContext *s, int ot, int op1,
         gen_op_mov_reg_T0(ot, op1);
     }
 
-    /* update eflags */
+    /* Update eflags data because we cannot predict flags afterward.  */
     gen_update_cc_op(s);
+    set_cc_op(s, CC_OP_DYNAMIC);
 
     tcg_gen_mov_tl(t1, cpu_T[0]);
 
@@ -1587,7 +1588,6 @@ static void gen_shift_rm_T1(DisasContext *s, int ot, int op1,
     }
 
     gen_set_label(shift_label);
-    set_cc_op(s, CC_OP_DYNAMIC); /* cannot predict flags after */
 
     tcg_temp_free(t0);
     tcg_temp_free(t1);
@@ -1972,8 +1972,9 @@ static void gen_shiftd_rm_T1(DisasContext *s, int ot, int op1,
         gen_op_mov_reg_v(ot, op1, t0);
     }
     
-    /* update eflags */
+    /* Update eflags data because we cannot predict flags afterward.  */
     gen_update_cc_op(s);
+    set_cc_op(s, CC_OP_DYNAMIC);
 
     label2 = gen_new_label();
     tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, label2);
@@ -1986,7 +1987,6 @@ static void gen_shiftd_rm_T1(DisasContext *s, int ot, int op1,
         tcg_gen_movi_i32(cpu_cc_op, CC_OP_SHLB + ot);
     }
     gen_set_label(label2);
-    set_cc_op(s, CC_OP_DYNAMIC); /* cannot predict flags after */
 
     tcg_temp_free(t0);
     tcg_temp_free(t1);
commit dc259201f8b471f27136ffe50cc7019c8311ccb6
Author: Richard Henderson <rth at twiddle.net>
Date:   Wed Jan 23 15:01:35 2013 -0800

    target-i386: introduce gen_jcc1_noeob
    
    A jump that ends a basic block or otherwise falls back to CC_OP_DYNAMIC
    will always have to call gen_op_set_cc_op.  However, not all jumps end
    a basic block, so introduce a variant that does not do this.
    
    This was partially undone earlier (i386: drop cc_op argument of gen_jcc1),
    redo it now also to prepare for the introduction of src2.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Richard Henderson <rth at twiddle.net>

diff --git a/target-i386/translate.c b/target-i386/translate.c
index f8d5e68..948a048 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -1169,14 +1169,34 @@ static inline void gen_compute_eflags_c(DisasContext *s, TCGv reg)
 
 /* generate a conditional jump to label 'l1' according to jump opcode
    value 'b'. In the fast case, T0 is guaranted not to be used. */
+static inline void gen_jcc1_noeob(DisasContext *s, int b, int l1)
+{
+    CCPrepare cc = gen_prepare_cc(s, b, cpu_T[0]);
+
+    if (cc.mask != -1) {
+        tcg_gen_andi_tl(cpu_T[0], cc.reg, cc.mask);
+        cc.reg = cpu_T[0];
+    }
+    if (cc.use_reg2) {
+        tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
+    } else {
+        tcg_gen_brcondi_tl(cc.cond, cc.reg, cc.imm, l1);
+    }
+}
+
+/* Generate a conditional jump to label 'l1' according to jump opcode
+   value 'b'. In the fast case, T0 is guaranted not to be used.
+   A translation block must end soon.  */
 static inline void gen_jcc1(DisasContext *s, int b, int l1)
 {
     CCPrepare cc = gen_prepare_cc(s, b, cpu_T[0]);
 
+    gen_update_cc_op(s);
     if (cc.mask != -1) {
         tcg_gen_andi_tl(cpu_T[0], cc.reg, cc.mask);
         cc.reg = cpu_T[0];
     }
+    set_cc_op(s, CC_OP_DYNAMIC);
     if (cc.use_reg2) {
         tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
     } else {
@@ -1310,7 +1330,6 @@ static inline void gen_repz_ ## op(DisasContext *s, int ot,                   \
     if (!s->jmp_opt)                                                          \
         gen_op_jz_ecx(s->aflag, l2);                                          \
     gen_jmp(s, cur_eip);                                                      \
-    set_cc_op(s, CC_OP_DYNAMIC);                                              \
 }
 
 GEN_REPZ(movs)
@@ -2379,11 +2398,9 @@ static inline void gen_jcc(DisasContext *s, int b,
     int l1, l2;
 
     if (s->jmp_opt) {
-        gen_update_cc_op(s);
         l1 = gen_new_label();
         gen_jcc1(s, b, l1);
-        set_cc_op(s, CC_OP_DYNAMIC);
-        
+
         gen_goto_tb(s, 0, next_eip);
 
         gen_set_label(l1);
@@ -6077,7 +6094,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                     };
                     op1 = fcmov_cc[op & 3] | (((op >> 3) & 1) ^ 1);
                     l1 = gen_new_label();
-                    gen_jcc1(s, op1, l1);
+                    gen_jcc1_noeob(s, op1, l1);
                     gen_helper_fmov_ST0_STN(cpu_env, tcg_const_i32(opreg));
                     gen_set_label(l1);
                 }
commit 63633fe6eb15107d688f3b7f61a4b379f57fc4ca
Author: Richard Henderson <rth at twiddle.net>
Date:   Wed Jan 23 14:51:34 2013 -0800

    target-i386: use gen_op for cmps/scas
    
    Replace low-level ops with a higher-level "cmp %al, (A0)" in the case
    of scas, and "cmp T0, (A0)" in the case of cmps.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Richard Henderson <rth at twiddle.net>

diff --git a/target-i386/translate.c b/target-i386/translate.c
index 60c1fdd..f8d5e68 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -123,6 +123,7 @@ typedef struct DisasContext {
 static void gen_eob(DisasContext *s);
 static void gen_jmp(DisasContext *s, target_ulong eip);
 static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num);
+static void gen_op(DisasContext *s1, int op, int ot, int d);
 
 /* i386 arith/logic operations */
 enum {
@@ -861,12 +862,6 @@ static void gen_op_update2_cc(void)
     tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
 }
 
-static inline void gen_op_cmpl_T0_T1_cc(void)
-{
-    tcg_gen_mov_tl(cpu_cc_src, cpu_T[1]);
-    tcg_gen_sub_tl(cpu_cc_dst, cpu_T[0], cpu_T[1]);
-}
-
 static inline void gen_op_testl_T0_T1_cc(void)
 {
     tcg_gen_and_tl(cpu_cc_dst, cpu_T[0], cpu_T[1]);
@@ -1224,26 +1219,22 @@ static inline void gen_lods(DisasContext *s, int ot)
 
 static inline void gen_scas(DisasContext *s, int ot)
 {
-    gen_op_mov_TN_reg(OT_LONG, 0, R_EAX);
     gen_string_movl_A0_EDI(s);
     gen_op_ld_T1_A0(ot + s->mem_index);
-    gen_op_cmpl_T0_T1_cc();
+    gen_op(s, OP_CMPL, ot, R_EAX);
     gen_op_movl_T0_Dshift(ot);
     gen_op_add_reg_T0(s->aflag, R_EDI);
-    set_cc_op(s, CC_OP_SUBB + ot);
 }
 
 static inline void gen_cmps(DisasContext *s, int ot)
 {
-    gen_string_movl_A0_ESI(s);
-    gen_op_ld_T0_A0(ot + s->mem_index);
     gen_string_movl_A0_EDI(s);
     gen_op_ld_T1_A0(ot + s->mem_index);
-    gen_op_cmpl_T0_T1_cc();
+    gen_string_movl_A0_ESI(s);
+    gen_op(s, OP_CMPL, ot, OR_TMP0);
     gen_op_movl_T0_Dshift(ot);
     gen_op_add_reg_T0(s->aflag, R_ESI);
     gen_op_add_reg_T0(s->aflag, R_EDI);
-    set_cc_op(s, CC_OP_SUBB + ot);
 }
 
 static inline void gen_ins(DisasContext *s, int ot)
@@ -1472,7 +1463,8 @@ static void gen_op(DisasContext *s1, int op, int ot, int d)
         set_cc_op(s1, CC_OP_LOGICB + ot);
         break;
     case OP_CMPL:
-        gen_op_cmpl_T0_T1_cc();
+        tcg_gen_mov_tl(cpu_cc_src, cpu_T[1]);
+        tcg_gen_sub_tl(cpu_cc_dst, cpu_T[0], cpu_T[1]);
         set_cc_op(s1, CC_OP_SUBB + ot);
         break;
     }
commit 3b9d3cf1609ec98411508c1e8b6dde711117825f
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Fri Oct 12 15:04:10 2012 +0200

    target-i386: kill cpu_T3
    
    It is almost unused, and it is simpler to pass a TCG value directly
    to gen_shiftd_rm_T1_T3.  This value is then written to t2 without
    going through a temporary register.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Richard Henderson <rth at twiddle.net>

diff --git a/target-i386/translate.c b/target-i386/translate.c
index 9d5467d..60c1fdd 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -64,7 +64,7 @@ static TCGv cpu_A0, cpu_cc_src, cpu_cc_dst;
 static TCGv_i32 cpu_cc_op;
 static TCGv cpu_regs[CPU_NB_REGS];
 /* local temps */
-static TCGv cpu_T[2], cpu_T3;
+static TCGv cpu_T[2];
 /* local register indexes (only used inside old micro ops) */
 static TCGv cpu_tmp0, cpu_tmp4;
 static TCGv_ptr cpu_ptr0, cpu_ptr1;
@@ -1858,8 +1858,8 @@ static void gen_rotc_rm_T1(DisasContext *s, int ot, int op1,
 }
 
 /* XXX: add faster immediate case */
-static void gen_shiftd_rm_T1_T3(DisasContext *s, int ot, int op1, 
-                                int is_right)
+static void gen_shiftd_rm_T1(DisasContext *s, int ot, int op1,
+                             int is_right, TCGv count)
 {
     int label1, label2, data_bits;
     target_ulong mask;
@@ -1883,10 +1883,8 @@ static void gen_shiftd_rm_T1_T3(DisasContext *s, int ot, int op1,
         gen_op_mov_v_reg(ot, t0, op1);
     }
 
-    tcg_gen_andi_tl(cpu_T3, cpu_T3, mask);
-
+    tcg_gen_andi_tl(t2, count, mask);
     tcg_gen_mov_tl(t1, cpu_T[1]);
-    tcg_gen_mov_tl(t2, cpu_T3);
 
     /* Must test zero case to avoid using undefined behaviour in TCG
        shifts. */
@@ -5583,12 +5581,12 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         gen_op_mov_TN_reg(ot, 1, reg);
 
         if (shift) {
-            val = cpu_ldub_code(env, s->pc++);
-            tcg_gen_movi_tl(cpu_T3, val);
+            TCGv imm = tcg_const_tl(cpu_ldub_code(env, s->pc++));
+            gen_shiftd_rm_T1(s, ot, opreg, op, imm);
+            tcg_temp_free(imm);
         } else {
-            tcg_gen_mov_tl(cpu_T3, cpu_regs[R_ECX]);
+            gen_shiftd_rm_T1(s, ot, opreg, op, cpu_regs[R_ECX]);
         }
-        gen_shiftd_rm_T1_T3(s, ot, opreg, op);
         break;
 
         /************************/
@@ -7869,7 +7867,6 @@ static inline void gen_intermediate_code_internal(CPUX86State *env,
     cpu_T[0] = tcg_temp_new();
     cpu_T[1] = tcg_temp_new();
     cpu_A0 = tcg_temp_new();
-    cpu_T3 = tcg_temp_new();
 
     cpu_tmp0 = tcg_temp_new();
     cpu_tmp1_i64 = tcg_temp_new_i64();
commit 57eb0cc85469a8948d1036ab830951e63aa32f66
Author: Richard Henderson <rth at twiddle.net>
Date:   Wed Jan 16 11:00:14 2013 -0800

    target-i386: expand cmov via movcond
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>

diff --git a/target-i386/translate.c b/target-i386/translate.c
index 4b0a701..9d5467d 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -2417,35 +2417,30 @@ static inline void gen_jcc(DisasContext *s, int b,
 static void gen_cmovcc1(CPUX86State *env, DisasContext *s, int ot, int b,
                         int modrm, int reg)
 {
-    int l1, mod = (modrm >> 6) & 3;
-    TCGv t0 = tcg_temp_local_new();
+    CCPrepare cc;
 
-    if (mod != 3) {
-        int reg_addr, offset_addr;
-        gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
-        gen_op_ld_v(ot + s->mem_index, t0, cpu_A0);
-    } else {
-        int rm = (modrm & 7) | REX_B(s);
-        gen_op_mov_v_reg(ot, t0, rm);
-    }
+    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
 
-    l1 = gen_new_label();
-    gen_jcc1(s, b ^ 1, l1);
-    switch (ot) {
-#ifdef TARGET_X86_64
-    case OT_LONG:
-        tcg_gen_mov_tl(cpu_regs[reg], t0);
-        gen_set_label(l1);
-        tcg_gen_ext32u_tl(cpu_regs[reg], cpu_regs[reg]);
-        break;
-#endif
-    default:
-        gen_op_mov_reg_v(ot, reg, t0);
-        gen_set_label(l1);
-        break;
+    cc = gen_prepare_cc(s, b, cpu_T[1]);
+    if (cc.mask != -1) {
+        TCGv t0 = tcg_temp_new();
+        tcg_gen_andi_tl(t0, cc.reg, cc.mask);
+        cc.reg = t0;
+    }
+    if (!cc.use_reg2) {
+        cc.reg2 = tcg_const_tl(cc.imm);
     }
 
-    tcg_temp_free(t0);
+    tcg_gen_movcond_tl(cc.cond, cpu_T[0], cc.reg, cc.reg2,
+                       cpu_T[0], cpu_regs[reg]);
+    gen_op_mov_reg_T0(ot, reg);
+
+    if (cc.mask != -1) {
+        tcg_temp_free(cc.reg);
+    }
+    if (!cc.use_reg2) {
+        tcg_temp_free(cc.reg2);
+    }
 }
 
 static inline void gen_op_movl_T0_seg(int seg_reg)
commit f32d3781de8328237c2db45ff774cbd4b30134d6
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Sun Oct 7 17:55:26 2012 +0200

    target-i386: introduce gen_cmovcc1
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>

diff --git a/target-i386/translate.c b/target-i386/translate.c
index c83b56f..4b0a701 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -2414,6 +2414,40 @@ static inline void gen_jcc(DisasContext *s, int b,
     }
 }
 
+static void gen_cmovcc1(CPUX86State *env, DisasContext *s, int ot, int b,
+                        int modrm, int reg)
+{
+    int l1, mod = (modrm >> 6) & 3;
+    TCGv t0 = tcg_temp_local_new();
+
+    if (mod != 3) {
+        int reg_addr, offset_addr;
+        gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
+        gen_op_ld_v(ot + s->mem_index, t0, cpu_A0);
+    } else {
+        int rm = (modrm & 7) | REX_B(s);
+        gen_op_mov_v_reg(ot, t0, rm);
+    }
+
+    l1 = gen_new_label();
+    gen_jcc1(s, b ^ 1, l1);
+    switch (ot) {
+#ifdef TARGET_X86_64
+    case OT_LONG:
+        tcg_gen_mov_tl(cpu_regs[reg], t0);
+        gen_set_label(l1);
+        tcg_gen_ext32u_tl(cpu_regs[reg], cpu_regs[reg]);
+        break;
+#endif
+    default:
+        gen_op_mov_reg_v(ot, reg, t0);
+        gen_set_label(l1);
+        break;
+    }
+
+    tcg_temp_free(t0);
+}
+
 static inline void gen_op_movl_T0_seg(int seg_reg)
 {
     tcg_gen_ld32u_tl(cpu_T[0], cpu_env, 
@@ -6427,40 +6461,10 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         gen_ldst_modrm(env, s, modrm, OT_BYTE, OR_TMP0, 1);
         break;
     case 0x140 ... 0x14f: /* cmov Gv, Ev */
-        {
-            int l1;
-            TCGv t0;
-
-            ot = dflag + OT_WORD;
-            modrm = cpu_ldub_code(env, s->pc++);
-            reg = ((modrm >> 3) & 7) | rex_r;
-            mod = (modrm >> 6) & 3;
-            t0 = tcg_temp_local_new();
-            if (mod != 3) {
-                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
-                gen_op_ld_v(ot + s->mem_index, t0, cpu_A0);
-            } else {
-                rm = (modrm & 7) | REX_B(s);
-                gen_op_mov_v_reg(ot, t0, rm);
-            }
-#ifdef TARGET_X86_64
-            if (ot == OT_LONG) {
-                /* XXX: specific Intel behaviour ? */
-                l1 = gen_new_label();
-                gen_jcc1(s, b ^ 1, l1);
-                tcg_gen_mov_tl(cpu_regs[reg], t0);
-                gen_set_label(l1);
-                tcg_gen_ext32u_tl(cpu_regs[reg], cpu_regs[reg]);
-            } else
-#endif
-            {
-                l1 = gen_new_label();
-                gen_jcc1(s, b ^ 1, l1);
-                gen_op_mov_reg_v(ot, reg, t0);
-                gen_set_label(l1);
-            }
-            tcg_temp_free(t0);
-        }
+        ot = dflag + OT_WORD;
+        modrm = cpu_ldub_code(env, s->pc++);
+        reg = ((modrm >> 3) & 7) | rex_r;
+        gen_cmovcc1(env, s, ot, b, modrm, reg);
         break;
 
         /************************/
commit cc8b6f5b39ae47a93074a5384faa734bf2a6ae61
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Mon Oct 8 09:42:48 2012 +0200

    target-i386: cleanup temporary macros for CCPrepare
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Richard Henderson <rth at twiddle.net>

diff --git a/target-i386/translate.c b/target-i386/translate.c
index 0b88eae..c83b56f 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -1040,41 +1040,6 @@ static CCPrepare gen_prepare_eflags_z(DisasContext *s, TCGv reg)
     }
 }
 
-#define gen_compute_eflags_c(s, reg, inv) \
-    gen_do_setcc(reg, gen_prepare_eflags_c(s, reg), inv)
-
-static void gen_do_setcc(TCGv reg, struct CCPrepare cc, bool inv)
-{
-    if (inv) {
-        cc.cond = tcg_invert_cond(cc.cond);
-    }
-
-    if (cc.no_setcond) {
-        if (cc.cond == TCG_COND_EQ) {
-            tcg_gen_xori_tl(reg, cc.reg, 1);
-        } else {
-            tcg_gen_mov_tl(reg, cc.reg);
-        }
-        return;
-    }
-
-    if (cc.cond == TCG_COND_NE && !cc.use_reg2 && cc.imm == 0 &&
-        cc.mask != 0 && (cc.mask & (cc.mask - 1)) == 0) {
-        tcg_gen_shri_tl(reg, cc.reg, ctztl(cc.mask));
-        tcg_gen_andi_tl(reg, reg, 1);
-        return;
-    }
-    if (cc.mask != -1) {
-        tcg_gen_andi_tl(reg, cc.reg, cc.mask);
-        cc.reg = reg;
-    }
-    if (cc.use_reg2) {
-        tcg_gen_setcond_tl(cc.cond, reg, cc.reg, cc.reg2);
-    } else {
-        tcg_gen_setcondi_tl(cc.cond, reg, cc.reg, cc.imm);
-    }
-}
-
 /* perform a conditional store into register 'reg' according to jump opcode
    value 'b'. In the fast case, T0 is guaranted not to be used. */
 static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
@@ -1172,8 +1137,40 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
     return cc;
 }
 
-#define gen_setcc1(s, b, reg) \
-    gen_do_setcc(reg, gen_prepare_cc(s, b, reg), false)
+static void gen_setcc1(DisasContext *s, int b, TCGv reg)
+{
+    CCPrepare cc = gen_prepare_cc(s, b, reg);
+
+    if (cc.no_setcond) {
+        if (cc.cond == TCG_COND_EQ) {
+            tcg_gen_xori_tl(reg, cc.reg, 1);
+        } else {
+            tcg_gen_mov_tl(reg, cc.reg);
+        }
+        return;
+    }
+
+    if (cc.cond == TCG_COND_NE && !cc.use_reg2 && cc.imm == 0 &&
+        cc.mask != 0 && (cc.mask & (cc.mask - 1)) == 0) {
+        tcg_gen_shri_tl(reg, cc.reg, ctztl(cc.mask));
+        tcg_gen_andi_tl(reg, reg, 1);
+        return;
+    }
+    if (cc.mask != -1) {
+        tcg_gen_andi_tl(reg, cc.reg, cc.mask);
+        cc.reg = reg;
+    }
+    if (cc.use_reg2) {
+        tcg_gen_setcond_tl(cc.cond, reg, cc.reg, cc.reg2);
+    } else {
+        tcg_gen_setcondi_tl(cc.cond, reg, cc.reg, cc.imm);
+    }
+}
+
+static inline void gen_compute_eflags_c(DisasContext *s, TCGv reg)
+{
+    gen_setcc1(s, JCC_B << 1, reg);
+}
 
 /* generate a conditional jump to label 'l1' according to jump opcode
    value 'b'. In the fast case, T0 is guaranted not to be used. */
@@ -1399,7 +1396,7 @@ static void gen_op(DisasContext *s1, int op, int ot, int d)
     }
     switch(op) {
     case OP_ADCL:
-        gen_compute_eflags_c(s1, cpu_tmp4, false);
+        gen_compute_eflags_c(s1, cpu_tmp4);
         tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
         tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_tmp4);
         if (d != OR_TMP0)
@@ -1414,7 +1411,7 @@ static void gen_op(DisasContext *s1, int op, int ot, int d)
         set_cc_op(s1, CC_OP_DYNAMIC);
         break;
     case OP_SBBL:
-        gen_compute_eflags_c(s1, cpu_tmp4, false);
+        gen_compute_eflags_c(s1, cpu_tmp4);
         tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
         tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_tmp4);
         if (d != OR_TMP0)
@@ -1488,7 +1485,7 @@ static void gen_inc(DisasContext *s1, int ot, int d, int c)
         gen_op_mov_TN_reg(ot, 0, d);
     else
         gen_op_ld_T0_A0(ot + s1->mem_index);
-    gen_compute_eflags_c(s1, cpu_cc_src, false);
+    gen_compute_eflags_c(s1, cpu_cc_src);
     if (c > 0) {
         tcg_gen_addi_tl(cpu_T[0], cpu_T[0], 1);
         set_cc_op(s1, CC_OP_INCB + ot);
@@ -2417,11 +2414,6 @@ static inline void gen_jcc(DisasContext *s, int b,
     }
 }
 
-static void gen_setcc(DisasContext *s, int b)
-{
-    gen_setcc1(s, b, cpu_T[0]);
-}
-
 static inline void gen_op_movl_T0_seg(int seg_reg)
 {
     tcg_gen_ld32u_tl(cpu_T[0], cpu_env, 
@@ -6431,7 +6423,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
 
     case 0x190 ... 0x19f: /* setcc Gv */
         modrm = cpu_ldub_code(env, s->pc++);
-        gen_setcc(s, b);
+        gen_setcc1(s, b, cpu_T[0]);
         gen_ldst_modrm(env, s, modrm, OT_BYTE, OR_TMP0, 1);
         break;
     case 0x140 ... 0x14f: /* cmov Gv, Ev */
@@ -6889,7 +6881,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
     case 0xd6: /* salc */
         if (CODE64(s))
             goto illegal_op;
-        gen_compute_eflags_c(s, cpu_T[0], false);
+        gen_compute_eflags_c(s, cpu_T[0]);
         tcg_gen_neg_tl(cpu_T[0], cpu_T[0]);
         gen_op_mov_reg_T0(OT_BYTE, R_EAX);
         break;
commit 69d1aa31f7551050bf918dc22f0fe3307b779186
Author: Richard Henderson <rth at twiddle.net>
Date:   Wed Jan 23 14:41:21 2013 -0800

    target-i386: inline gen_prepare_cc_slow
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Richard Henderson <rth at twiddle.net>

diff --git a/target-i386/translate.c b/target-i386/translate.c
index b081fc0..0b88eae 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -1075,44 +1075,6 @@ static void gen_do_setcc(TCGv reg, struct CCPrepare cc, bool inv)
     }
 }
 
-static CCPrepare gen_prepare_cc_slow(DisasContext *s, int jcc_op, TCGv reg)
-{
-    switch(jcc_op) {
-    case JCC_O:
-        return gen_prepare_eflags_o(s, reg);
-    case JCC_B:
-        return gen_prepare_eflags_c(s, reg);
-    case JCC_Z:
-        return gen_prepare_eflags_z(s, reg);
-    case JCC_BE:
-        gen_compute_eflags(s);
-        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
-                             .mask = CC_Z | CC_C };
-    case JCC_S:
-        return gen_prepare_eflags_s(s, reg);
-    case JCC_P:
-        return gen_prepare_eflags_p(s, reg);
-    case JCC_L:
-        gen_compute_eflags(s);
-        if (TCGV_EQUAL(reg, cpu_cc_src)) {
-            reg = cpu_tmp0;
-        }
-        tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
-        tcg_gen_xor_tl(reg, reg, cpu_cc_src);
-        return (CCPrepare) { .cond = TCG_COND_NE, .reg = reg, .mask = CC_S };
-    default:
-    case JCC_LE:
-        gen_compute_eflags(s);
-        if (TCGV_EQUAL(reg, cpu_cc_src)) {
-            reg = cpu_tmp0;
-        }
-        tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
-        tcg_gen_xor_tl(reg, reg, cpu_cc_src);
-        return (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
-                             .mask = CC_S | CC_Z };
-    }
-}
-
 /* perform a conditional store into register 'reg' according to jump opcode
    value 'b'. In the fast case, T0 is guaranted not to be used. */
 static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
@@ -1125,11 +1087,8 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
     jcc_op = (b >> 1) & 7;
 
     switch (s->cc_op) {
-        /* we optimize relational operators for the cmp/jcc case */
-    case CC_OP_SUBB:
-    case CC_OP_SUBW:
-    case CC_OP_SUBL:
-    case CC_OP_SUBQ:
+    case CC_OP_SUBB ... CC_OP_SUBQ:
+        /* We optimize relational operators for the cmp/jcc case.  */
         size = s->cc_op - CC_OP_SUBB;
         switch (jcc_op) {
         case JCC_BE:
@@ -1160,8 +1119,50 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
 
     default:
     slow_jcc:
-        /* gen_prepare_cc_slow actually generates good code for JC, JZ and JS */
-        cc = gen_prepare_cc_slow(s, jcc_op, reg);
+        /* This actually generates good code for JC, JZ and JS.  */
+        switch (jcc_op) {
+        case JCC_O:
+            cc = gen_prepare_eflags_o(s, reg);
+            break;
+        case JCC_B:
+            cc = gen_prepare_eflags_c(s, reg);
+            break;
+        case JCC_Z:
+            cc = gen_prepare_eflags_z(s, reg);
+            break;
+        case JCC_BE:
+            gen_compute_eflags(s);
+            cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
+                               .mask = CC_Z | CC_C };
+            break;
+        case JCC_S:
+            cc = gen_prepare_eflags_s(s, reg);
+            break;
+        case JCC_P:
+            cc = gen_prepare_eflags_p(s, reg);
+            break;
+        case JCC_L:
+            gen_compute_eflags(s);
+            if (TCGV_EQUAL(reg, cpu_cc_src)) {
+                reg = cpu_tmp0;
+            }
+            tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
+            tcg_gen_xor_tl(reg, reg, cpu_cc_src);
+            cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
+                               .mask = CC_S };
+            break;
+        default:
+        case JCC_LE:
+            gen_compute_eflags(s);
+            if (TCGV_EQUAL(reg, cpu_cc_src)) {
+                reg = cpu_tmp0;
+            }
+            tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
+            tcg_gen_xor_tl(reg, reg, cpu_cc_src);
+            cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
+                               .mask = CC_S | CC_Z };
+            break;
+        }
         break;
     }
 
commit 943131ca98af142da7b99111b410e741a5d42338
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Sun Oct 7 15:53:23 2012 +0200

    target-i386: use CCPrepare to generate conditional jumps
    
    This simplifies all the jump generation code.  CCPrepare allows the
    code to create an efficient brcond always, so there is no need to
    duplicate the setcc and jcc code.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Richard Henderson <rth at twiddle.net>

diff --git a/target-i386/translate.c b/target-i386/translate.c
index 046d82f..b081fc0 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -1178,117 +1178,16 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
    value 'b'. In the fast case, T0 is guaranted not to be used. */
 static inline void gen_jcc1(DisasContext *s, int b, int l1)
 {
-    int inv, jcc_op, size, cond;
-    TCGv t0;
+    CCPrepare cc = gen_prepare_cc(s, b, cpu_T[0]);
 
-    inv = b & 1;
-    jcc_op = (b >> 1) & 7;
-
-    switch (s->cc_op) {
-        /* we optimize the cmp/jcc case */
-    case CC_OP_SUBB:
-    case CC_OP_SUBW:
-    case CC_OP_SUBL:
-    case CC_OP_SUBQ:
-        
-        size = s->cc_op - CC_OP_SUBB;
-        switch(jcc_op) {
-        case JCC_Z:
-        fast_jcc_z:
-            t0 = gen_ext_tl(cpu_tmp0, cpu_cc_dst, size, false);
-            tcg_gen_brcondi_tl(inv ? TCG_COND_NE : TCG_COND_EQ, t0, 0, l1);
-            break;
-        case JCC_S:
-        fast_jcc_s:
-            t0 = gen_ext_tl(cpu_tmp0, cpu_cc_dst, size, true);
-            tcg_gen_brcondi_tl(inv ? TCG_COND_GE : TCG_COND_LT, t0, 0, l1);
-            break;
-
-        case JCC_B:
-            cond = inv ? TCG_COND_GEU : TCG_COND_LTU;
-            goto fast_jcc_b;
-        case JCC_BE:
-            cond = inv ? TCG_COND_GTU : TCG_COND_LEU;
-        fast_jcc_b:
-            tcg_gen_add_tl(cpu_tmp4, cpu_cc_dst, cpu_cc_src);
-            gen_extu(size, cpu_tmp4);
-            t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
-            tcg_gen_brcond_tl(cond, cpu_tmp4, t0, l1);
-            break;
-            
-        case JCC_L:
-            cond = inv ? TCG_COND_GE : TCG_COND_LT;
-            goto fast_jcc_l;
-        case JCC_LE:
-            cond = inv ? TCG_COND_GT : TCG_COND_LE;
-        fast_jcc_l:
-            tcg_gen_add_tl(cpu_tmp4, cpu_cc_dst, cpu_cc_src);
-            gen_exts(size, cpu_tmp4);
-            t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, true);
-            tcg_gen_brcond_tl(cond, cpu_tmp4, t0, l1);
-            break;
-            
-        default:
-            goto slow_jcc;
-        }
-        break;
-        
-        /* some jumps are easy to compute */
-    case CC_OP_ADDB:
-    case CC_OP_ADDW:
-    case CC_OP_ADDL:
-    case CC_OP_ADDQ:
-        
-    case CC_OP_ADCB:
-    case CC_OP_ADCW:
-    case CC_OP_ADCL:
-    case CC_OP_ADCQ:
-        
-    case CC_OP_SBBB:
-    case CC_OP_SBBW:
-    case CC_OP_SBBL:
-    case CC_OP_SBBQ:
-        
-    case CC_OP_LOGICB:
-    case CC_OP_LOGICW:
-    case CC_OP_LOGICL:
-    case CC_OP_LOGICQ:
-        
-    case CC_OP_INCB:
-    case CC_OP_INCW:
-    case CC_OP_INCL:
-    case CC_OP_INCQ:
-        
-    case CC_OP_DECB:
-    case CC_OP_DECW:
-    case CC_OP_DECL:
-    case CC_OP_DECQ:
-        
-    case CC_OP_SHLB:
-    case CC_OP_SHLW:
-    case CC_OP_SHLL:
-    case CC_OP_SHLQ:
-        
-    case CC_OP_SARB:
-    case CC_OP_SARW:
-    case CC_OP_SARL:
-    case CC_OP_SARQ:
-        switch(jcc_op) {
-        case JCC_Z:
-            size = (s->cc_op - CC_OP_ADDB) & 3;
-            goto fast_jcc_z;
-        case JCC_S:
-            size = (s->cc_op - CC_OP_ADDB) & 3;
-            goto fast_jcc_s;
-        default:
-            goto slow_jcc;
-        }
-        break;
-    default:
-    slow_jcc:
-        gen_setcc1(s, b, cpu_T[0]);
-        tcg_gen_brcondi_tl(TCG_COND_NE, cpu_T[0], 0, l1);
-        break;
+    if (cc.mask != -1) {
+        tcg_gen_andi_tl(cpu_T[0], cc.reg, cc.mask);
+        cc.reg = cpu_T[0];
+    }
+    if (cc.use_reg2) {
+        tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
+    } else {
+        tcg_gen_brcondi_tl(cc.cond, cc.reg, cc.imm, l1);
     }
 }
 
commit 276e6b5f069e189e204a4320f824daa07db10286
Author: Richard Henderson <rth at twiddle.net>
Date:   Wed Jan 23 14:33:45 2013 -0800

    target-i386: introduce gen_prepare_cc
    
    This makes the i386 front-end able to create CCPrepare structs for all
    condition, not just those that come from a single flag.  In particular,
    JCC_L and JCC_LE can be optimized because gen_prepare_cc is not forced
    to return a result in bit 0 (unlike gen_setcc_slow).
    
    However, for now the slow jcc operations will still go through CC
    computation in a single-bit temporary, followed by a brcond if the
    temporary is nonzero.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Richard Henderson <rth at twiddle.net>

diff --git a/target-i386/translate.c b/target-i386/translate.c
index 06f0fbc..046d82f 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -1042,14 +1042,6 @@ static CCPrepare gen_prepare_eflags_z(DisasContext *s, TCGv reg)
 
 #define gen_compute_eflags_c(s, reg, inv) \
     gen_do_setcc(reg, gen_prepare_eflags_c(s, reg), inv)
-#define gen_compute_eflags_p(s, reg) \
-    gen_do_setcc(reg, gen_prepare_eflags_p(s, reg), false)
-#define gen_compute_eflags_s(s, reg, inv) \
-    gen_do_setcc(reg, gen_prepare_eflags_s(s, reg), inv)
-#define gen_compute_eflags_o(s, reg) \
-    gen_do_setcc(reg, gen_prepare_eflags_o(s, reg), false)
-#define gen_compute_eflags_z(s, reg, inv) \
-    gen_do_setcc(reg, gen_prepare_eflags_z(s, reg), inv)
 
 static void gen_do_setcc(TCGv reg, struct CCPrepare cc, bool inv)
 {
@@ -1074,6 +1066,7 @@ static void gen_do_setcc(TCGv reg, struct CCPrepare cc, bool inv)
     }
     if (cc.mask != -1) {
         tcg_gen_andi_tl(reg, cc.reg, cc.mask);
+        cc.reg = reg;
     }
     if (cc.use_reg2) {
         tcg_gen_setcond_tl(cc.cond, reg, cc.reg, cc.reg2);
@@ -1082,58 +1075,50 @@ static void gen_do_setcc(TCGv reg, struct CCPrepare cc, bool inv)
     }
 }
 
-static void gen_setcc_slow(DisasContext *s, int jcc_op, TCGv reg, bool inv)
+static CCPrepare gen_prepare_cc_slow(DisasContext *s, int jcc_op, TCGv reg)
 {
     switch(jcc_op) {
     case JCC_O:
-        gen_compute_eflags_o(s, reg);
-        break;
+        return gen_prepare_eflags_o(s, reg);
     case JCC_B:
-        gen_compute_eflags_c(s, reg, inv);
-        inv = false;
-        break;
+        return gen_prepare_eflags_c(s, reg);
     case JCC_Z:
-        gen_compute_eflags_z(s, reg, inv);
-        inv = false;
-        break;
+        return gen_prepare_eflags_z(s, reg);
     case JCC_BE:
         gen_compute_eflags(s);
-        tcg_gen_andi_tl(reg, cpu_cc_src, CC_Z | CC_C);
-        tcg_gen_setcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, reg, reg, 0);
-        return;
+        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
+                             .mask = CC_Z | CC_C };
     case JCC_S:
-        gen_compute_eflags_s(s, reg, inv);
-        inv = false;
-        break;
+        return gen_prepare_eflags_s(s, reg);
     case JCC_P:
-        gen_compute_eflags_p(s, reg);
-        break;
+        return gen_prepare_eflags_p(s, reg);
     case JCC_L:
         gen_compute_eflags(s);
-        tcg_gen_shri_tl(cpu_tmp0, cpu_cc_src, 11); /* CC_O */
-        tcg_gen_shri_tl(reg, cpu_cc_src, 7); /* CC_S */
-        tcg_gen_xor_tl(reg, reg, cpu_tmp0);
-        tcg_gen_andi_tl(reg, reg, 1);
-        break;
+        if (TCGV_EQUAL(reg, cpu_cc_src)) {
+            reg = cpu_tmp0;
+        }
+        tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
+        tcg_gen_xor_tl(reg, reg, cpu_cc_src);
+        return (CCPrepare) { .cond = TCG_COND_NE, .reg = reg, .mask = CC_S };
     default:
     case JCC_LE:
         gen_compute_eflags(s);
-        tcg_gen_shri_tl(cpu_tmp0, cpu_cc_src, 4); /* CC_O -> CC_S */
-        tcg_gen_xor_tl(reg, cpu_tmp0, cpu_cc_src);
-        tcg_gen_andi_tl(reg, reg, CC_S | CC_Z);
-        tcg_gen_setcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, reg, reg, 0);
-        break;
-    }
-    if (inv) {
-        tcg_gen_xori_tl(reg, reg, 1);
+        if (TCGV_EQUAL(reg, cpu_cc_src)) {
+            reg = cpu_tmp0;
+        }
+        tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
+        tcg_gen_xor_tl(reg, reg, cpu_cc_src);
+        return (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
+                             .mask = CC_S | CC_Z };
     }
 }
 
 /* perform a conditional store into register 'reg' according to jump opcode
    value 'b'. In the fast case, T0 is guaranted not to be used. */
-static inline void gen_setcc1(DisasContext *s, int b, TCGv reg)
+static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
 {
     int inv, jcc_op, size, cond;
+    CCPrepare cc;
     TCGv t0;
 
     inv = b & 1;
@@ -1148,23 +1133,24 @@ static inline void gen_setcc1(DisasContext *s, int b, TCGv reg)
         size = s->cc_op - CC_OP_SUBB;
         switch (jcc_op) {
         case JCC_BE:
-            cond = inv ? TCG_COND_GTU : TCG_COND_LEU;
             tcg_gen_add_tl(cpu_tmp4, cpu_cc_dst, cpu_cc_src);
             gen_extu(size, cpu_tmp4);
             t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
-            tcg_gen_setcond_tl(cond, reg, cpu_tmp4, t0);
+            cc = (CCPrepare) { .cond = TCG_COND_LEU, .reg = cpu_tmp4,
+                               .reg2 = t0, .mask = -1, .use_reg2 = true };
             break;
 
         case JCC_L:
-            cond = inv ? TCG_COND_GE : TCG_COND_LT;
+            cond = TCG_COND_LT;
             goto fast_jcc_l;
         case JCC_LE:
-            cond = inv ? TCG_COND_GT : TCG_COND_LE;
+            cond = TCG_COND_LE;
         fast_jcc_l:
             tcg_gen_add_tl(cpu_tmp4, cpu_cc_dst, cpu_cc_src);
             gen_exts(size, cpu_tmp4);
             t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, true);
-            tcg_gen_setcond_tl(cond, reg, cpu_tmp4, t0);
+            cc = (CCPrepare) { .cond = cond, .reg = cpu_tmp4,
+                               .reg2 = t0, .mask = -1, .use_reg2 = true };
             break;
 
         default:
@@ -1174,12 +1160,20 @@ static inline void gen_setcc1(DisasContext *s, int b, TCGv reg)
 
     default:
     slow_jcc:
-        /* gen_setcc_slow actually generates good code for JC, JZ and JS */
-        gen_setcc_slow(s, jcc_op, reg, inv);
+        /* gen_prepare_cc_slow actually generates good code for JC, JZ and JS */
+        cc = gen_prepare_cc_slow(s, jcc_op, reg);
         break;
     }
+
+    if (inv) {
+        cc.cond = tcg_invert_cond(cc.cond);
+    }
+    return cc;
 }
 
+#define gen_setcc1(s, b, reg) \
+    gen_do_setcc(reg, gen_prepare_cc(s, b, reg), false)
+
 /* generate a conditional jump to label 'l1' according to jump opcode
    value 'b'. In the fast case, T0 is guaranted not to be used. */
 static inline void gen_jcc1(DisasContext *s, int b, int l1)
@@ -1292,9 +1286,8 @@ static inline void gen_jcc1(DisasContext *s, int b, int l1)
         break;
     default:
     slow_jcc:
-        gen_setcc_slow(s, jcc_op, cpu_T[0], false);
-        tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, 
-                           cpu_T[0], 0, l1);
+        gen_setcc1(s, b, cpu_T[0]);
+        tcg_gen_brcondi_tl(TCG_COND_NE, cpu_T[0], 0, l1);
         break;
     }
 }
commit bec93d7283b635aabaf0bbff67b6da7fc99e020a
Author: Richard Henderson <rth at twiddle.net>
Date:   Wed Jan 23 14:21:52 2013 -0800

    target-i386: introduce CCPrepare
    
    Introduce a struct that describes how to build a *cond operation
    that checks for a given x86 condition code.  For now, just change
    gen_compute_eflags_* to return the new struct, generate code for
    the CCPrepare struct, and go on as before.
    
    [rth: Use ctz with the proper width rather than ffs.]
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Richard Henderson <rth at twiddle.net>

diff --git a/target-i386/translate.c b/target-i386/translate.c
index 5c9211f..06f0fbc 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -23,6 +23,7 @@
 #include <inttypes.h>
 #include <signal.h>
 
+#include "qemu/host-utils.h"
 #include "cpu.h"
 #include "disas/disas.h"
 #include "tcg-op.h"
@@ -47,6 +48,14 @@
 #define REX_B(s) 0
 #endif
 
+#ifdef TARGET_X86_64
+# define ctztl  ctz64
+# define clztl  clz64
+#else
+# define ctztl  ctz32
+# define clztl  clz32
+#endif
+
 //#define MACRO_TEST   1
 
 /* global register indexes */
@@ -881,11 +890,21 @@ static void gen_compute_eflags(DisasContext *s)
     tcg_gen_extu_i32_tl(cpu_cc_src, cpu_tmp2_i32);
 }
 
+typedef struct CCPrepare {
+    TCGCond cond;
+    TCGv reg;
+    TCGv reg2;
+    target_ulong imm;
+    target_ulong mask;
+    bool use_reg2;
+    bool no_setcond;
+} CCPrepare;
+
 /* compute eflags.C to reg */
-static void gen_compute_eflags_c(DisasContext *s, TCGv reg, bool inv)
+static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
 {
     TCGv t0, t1;
-    int size;
+    int size, shift;
 
     switch (s->cc_op) {
     case CC_OP_SUBB ... CC_OP_SUBQ:
@@ -904,9 +923,8 @@ static void gen_compute_eflags_c(DisasContext *s, TCGv reg, bool inv)
         t1 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
         t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
     add_sub:
-        tcg_gen_setcond_tl(inv ? TCG_COND_GEU : TCG_COND_LTU, reg, t0, t1);
-        inv = false;
-        break;
+        return (CCPrepare) { .cond = TCG_COND_LTU, .reg = t0,
+                             .reg2 = t1, .mask = -1, .use_reg2 = true };
 
     case CC_OP_SBBB ... CC_OP_SBBQ:
         /* (DATA_TYPE)(CC_DST + CC_SRC + 1) <= (DATA_TYPE)CC_SRC */
@@ -929,42 +947,33 @@ static void gen_compute_eflags_c(DisasContext *s, TCGv reg, bool inv)
         t1 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
         t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
     adc_sbb:
-        tcg_gen_setcond_tl(inv ? TCG_COND_GTU : TCG_COND_LEU, reg, t0, t1);
-        inv = false;
-        break;
+        return (CCPrepare) { .cond = TCG_COND_LEU, .reg = t0,
+                             .reg2 = t1, .mask = -1, .use_reg2 = true };
 
     case CC_OP_LOGICB ... CC_OP_LOGICQ:
-        tcg_gen_movi_tl(reg, 0);
-        break;
+        return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
 
     case CC_OP_INCB ... CC_OP_INCQ:
     case CC_OP_DECB ... CC_OP_DECQ:
-        if (inv) {
-            tcg_gen_xori_tl(reg, cpu_cc_src, 1);
-        } else {
-            tcg_gen_mov_tl(reg, cpu_cc_src);
-        }
-        inv = false;
-        break;
+        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
+                             .mask = -1, .no_setcond = true };
 
     case CC_OP_SHLB ... CC_OP_SHLQ:
         /* (CC_SRC >> (DATA_BITS - 1)) & 1 */
         size = s->cc_op - CC_OP_SHLB;
-        tcg_gen_shri_tl(reg, cpu_cc_src, (8 << size) - 1);
-        tcg_gen_andi_tl(reg, reg, 1);
-        break;
+        shift = (8 << size) - 1;
+        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
+                             .mask = (target_ulong)1 << shift };
 
     case CC_OP_MULB ... CC_OP_MULQ:
-        tcg_gen_setcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE,
-                            reg, cpu_cc_src, 0);
-        inv = false;
-        break;
+        return (CCPrepare) { .cond = TCG_COND_NE,
+                             .reg = cpu_cc_src, .mask = -1 };
 
     case CC_OP_EFLAGS:
     case CC_OP_SARB ... CC_OP_SARQ:
         /* CC_SRC & 1 */
-        tcg_gen_andi_tl(reg, cpu_cc_src, 1);
-        break;
+        return (CCPrepare) { .cond = TCG_COND_NE,
+                             .reg = cpu_cc_src, .mask = CC_C };
 
     default:
        /* The need to compute only C from CC_OP_DYNAMIC is important
@@ -972,74 +981,104 @@ static void gen_compute_eflags_c(DisasContext *s, TCGv reg, bool inv)
        gen_update_cc_op(s);
        gen_helper_cc_compute_c(cpu_tmp2_i32, cpu_env, cpu_cc_op);
        tcg_gen_extu_i32_tl(reg, cpu_tmp2_i32);
-       break;
-    }
-    if (inv) {
-        tcg_gen_xori_tl(reg, reg, 1);
+       return (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
+                            .mask = -1, .no_setcond = true };
     }
 }
 
 /* compute eflags.P to reg */
-static void gen_compute_eflags_p(DisasContext *s, TCGv reg)
+static CCPrepare gen_prepare_eflags_p(DisasContext *s, TCGv reg)
 {
     gen_compute_eflags(s);
-    tcg_gen_shri_tl(reg, cpu_cc_src, 2);
-    tcg_gen_andi_tl(reg, reg, 1);
+    return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
+                         .mask = CC_P };
 }
 
 /* compute eflags.S to reg */
-static void gen_compute_eflags_s(DisasContext *s, TCGv reg, bool inv)
+static CCPrepare gen_prepare_eflags_s(DisasContext *s, TCGv reg)
 {
     switch (s->cc_op) {
     case CC_OP_DYNAMIC:
         gen_compute_eflags(s);
         /* FALLTHRU */
     case CC_OP_EFLAGS:
-        tcg_gen_shri_tl(reg, cpu_cc_src, 7);
-        tcg_gen_andi_tl(reg, reg, 1);
-        if (inv) {
-            tcg_gen_xori_tl(reg, reg, 1);
-        }
-        break;
+        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
+                             .mask = CC_S };
     default:
         {
             int size = (s->cc_op - CC_OP_ADDB) & 3;
             TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, true);
-            tcg_gen_setcondi_tl(inv ? TCG_COND_GE : TCG_COND_LT, reg, t0, 0);
+            return (CCPrepare) { .cond = TCG_COND_LT, .reg = t0, .mask = -1 };
         }
-        break;
     }
 }
 
 /* compute eflags.O to reg */
-static void gen_compute_eflags_o(DisasContext *s, TCGv reg)
+static CCPrepare gen_prepare_eflags_o(DisasContext *s, TCGv reg)
 {
     gen_compute_eflags(s);
-    tcg_gen_shri_tl(reg, cpu_cc_src, 11);
-    tcg_gen_andi_tl(reg, reg, 1);
+    return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
+                         .mask = CC_O };
 }
 
 /* compute eflags.Z to reg */
-static void gen_compute_eflags_z(DisasContext *s, TCGv reg, bool inv)
+static CCPrepare gen_prepare_eflags_z(DisasContext *s, TCGv reg)
 {
     switch (s->cc_op) {
     case CC_OP_DYNAMIC:
         gen_compute_eflags(s);
         /* FALLTHRU */
     case CC_OP_EFLAGS:
-        tcg_gen_shri_tl(reg, cpu_cc_src, 6);
-        tcg_gen_andi_tl(reg, reg, 1);
-        if (inv) {
-            tcg_gen_xori_tl(reg, reg, 1);
-        }
-        break;
+        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
+                             .mask = CC_Z };
     default:
         {
             int size = (s->cc_op - CC_OP_ADDB) & 3;
             TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
-            tcg_gen_setcondi_tl(inv ? TCG_COND_NE : TCG_COND_EQ, reg, t0, 0);
+            return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
         }
-        break;
+    }
+}
+
+#define gen_compute_eflags_c(s, reg, inv) \
+    gen_do_setcc(reg, gen_prepare_eflags_c(s, reg), inv)
+#define gen_compute_eflags_p(s, reg) \
+    gen_do_setcc(reg, gen_prepare_eflags_p(s, reg), false)
+#define gen_compute_eflags_s(s, reg, inv) \
+    gen_do_setcc(reg, gen_prepare_eflags_s(s, reg), inv)
+#define gen_compute_eflags_o(s, reg) \
+    gen_do_setcc(reg, gen_prepare_eflags_o(s, reg), false)
+#define gen_compute_eflags_z(s, reg, inv) \
+    gen_do_setcc(reg, gen_prepare_eflags_z(s, reg), inv)
+
+static void gen_do_setcc(TCGv reg, struct CCPrepare cc, bool inv)
+{
+    if (inv) {
+        cc.cond = tcg_invert_cond(cc.cond);
+    }
+
+    if (cc.no_setcond) {
+        if (cc.cond == TCG_COND_EQ) {
+            tcg_gen_xori_tl(reg, cc.reg, 1);
+        } else {
+            tcg_gen_mov_tl(reg, cc.reg);
+        }
+        return;
+    }
+
+    if (cc.cond == TCG_COND_NE && !cc.use_reg2 && cc.imm == 0 &&
+        cc.mask != 0 && (cc.mask & (cc.mask - 1)) == 0) {
+        tcg_gen_shri_tl(reg, cc.reg, ctztl(cc.mask));
+        tcg_gen_andi_tl(reg, reg, 1);
+        return;
+    }
+    if (cc.mask != -1) {
+        tcg_gen_andi_tl(reg, cc.reg, cc.mask);
+    }
+    if (cc.use_reg2) {
+        tcg_gen_setcond_tl(cc.cond, reg, cc.reg, cc.reg2);
+    } else {
+        tcg_gen_setcondi_tl(cc.cond, reg, cc.reg, cc.imm);
     }
 }
 
commit c365395e9bd2b3bcac48ef562c187ea6ab9820ad
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Fri Oct 5 23:00:10 2012 +0200

    target-i386: optimize setcc instructions
    
    Reconstruct the arguments for complex conditions involving CC_OP_SUBx (BE,
    L, LE).  In the others do it via setcond and gen_setcc_slow (which is
    not that slow in many cases).
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Richard Henderson <rth at twiddle.net>

diff --git a/target-i386/translate.c b/target-i386/translate.c
index fea43c7..5c9211f 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -1090,55 +1090,55 @@ static void gen_setcc_slow(DisasContext *s, int jcc_op, TCGv reg, bool inv)
     }
 }
 
-/* return true if setcc_slow is not needed (WARNING: must be kept in
-   sync with gen_jcc1) */
-static int is_fast_jcc_case(DisasContext *s, int b)
+/* perform a conditional store into register 'reg' according to jump opcode
+   value 'b'. In the fast case, T0 is guaranted not to be used. */
+static inline void gen_setcc1(DisasContext *s, int b, TCGv reg)
 {
-    int jcc_op;
+    int inv, jcc_op, size, cond;
+    TCGv t0;
+
+    inv = b & 1;
     jcc_op = (b >> 1) & 7;
-    switch(s->cc_op) {
-        /* we optimize the cmp/jcc case */
+
+    switch (s->cc_op) {
+        /* we optimize relational operators for the cmp/jcc case */
     case CC_OP_SUBB:
     case CC_OP_SUBW:
     case CC_OP_SUBL:
     case CC_OP_SUBQ:
-        if (jcc_op == JCC_O || jcc_op == JCC_P)
-            goto slow_jcc;
-        break;
-
-        /* some jumps are easy to compute */
-    case CC_OP_ADDB:
-    case CC_OP_ADDW:
-    case CC_OP_ADDL:
-    case CC_OP_ADDQ:
-
-    case CC_OP_LOGICB:
-    case CC_OP_LOGICW:
-    case CC_OP_LOGICL:
-    case CC_OP_LOGICQ:
-
-    case CC_OP_INCB:
-    case CC_OP_INCW:
-    case CC_OP_INCL:
-    case CC_OP_INCQ:
+        size = s->cc_op - CC_OP_SUBB;
+        switch (jcc_op) {
+        case JCC_BE:
+            cond = inv ? TCG_COND_GTU : TCG_COND_LEU;
+            tcg_gen_add_tl(cpu_tmp4, cpu_cc_dst, cpu_cc_src);
+            gen_extu(size, cpu_tmp4);
+            t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
+            tcg_gen_setcond_tl(cond, reg, cpu_tmp4, t0);
+            break;
 
-    case CC_OP_DECB:
-    case CC_OP_DECW:
-    case CC_OP_DECL:
-    case CC_OP_DECQ:
+        case JCC_L:
+            cond = inv ? TCG_COND_GE : TCG_COND_LT;
+            goto fast_jcc_l;
+        case JCC_LE:
+            cond = inv ? TCG_COND_GT : TCG_COND_LE;
+        fast_jcc_l:
+            tcg_gen_add_tl(cpu_tmp4, cpu_cc_dst, cpu_cc_src);
+            gen_exts(size, cpu_tmp4);
+            t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, true);
+            tcg_gen_setcond_tl(cond, reg, cpu_tmp4, t0);
+            break;
 
-    case CC_OP_SHLB:
-    case CC_OP_SHLW:
-    case CC_OP_SHLL:
-    case CC_OP_SHLQ:
-        if (jcc_op != JCC_Z && jcc_op != JCC_S)
+        default:
             goto slow_jcc;
+        }
         break;
+
     default:
     slow_jcc:
-        return 0;
+        /* gen_setcc_slow actually generates good code for JC, JZ and JS */
+        gen_setcc_slow(s, jcc_op, reg, inv);
+        break;
     }
-    return 1;
 }
 
 /* generate a conditional jump to label 'l1' according to jump opcode
@@ -2487,28 +2487,7 @@ static inline void gen_jcc(DisasContext *s, int b,
 
 static void gen_setcc(DisasContext *s, int b)
 {
-    int inv, jcc_op, l1;
-    TCGv t0;
-
-    if (is_fast_jcc_case(s, b)) {
-        /* nominal case: we use a jump */
-        /* XXX: make it faster by adding new instructions in TCG */
-        t0 = tcg_temp_local_new();
-        tcg_gen_movi_tl(t0, 0);
-        l1 = gen_new_label();
-        gen_jcc1(s, b ^ 1, l1);
-        tcg_gen_movi_tl(t0, 1);
-        gen_set_label(l1);
-        tcg_gen_mov_tl(cpu_T[0], t0);
-        tcg_temp_free(t0);
-    } else {
-        /* slow case: it is more efficient not to generate a jump,
-           although it is questionnable whether this optimization is
-           worth to */
-        inv = b & 1;
-        jcc_op = (b >> 1) & 7;
-        gen_setcc_slow(s, jcc_op, cpu_T[0], inv);
-    }
+    gen_setcc1(s, b, cpu_T[0]);
 }
 
 static inline void gen_op_movl_T0_seg(int seg_reg)
commit be10b289d697420b6e0d8d1a681aa64555066639
Author: Richard Henderson <rth at twiddle.net>
Date:   Wed Jan 23 13:53:51 2013 -0800

    target-i386: optimize setle
    
    And allow gen_setcc_slow to operate on cpu_cc_src.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Richard Henderson <rth at twiddle.net>

diff --git a/target-i386/translate.c b/target-i386/translate.c
index dab6983..fea43c7 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -1045,7 +1045,6 @@ static void gen_compute_eflags_z(DisasContext *s, TCGv reg, bool inv)
 
 static void gen_setcc_slow(DisasContext *s, int jcc_op, TCGv reg, bool inv)
 {
-    assert(!TCGV_EQUAL(reg, cpu_cc_src));
     switch(jcc_op) {
     case JCC_O:
         gen_compute_eflags_o(s, reg);
@@ -1072,20 +1071,18 @@ static void gen_setcc_slow(DisasContext *s, int jcc_op, TCGv reg, bool inv)
         break;
     case JCC_L:
         gen_compute_eflags(s);
-        tcg_gen_shri_tl(reg, cpu_cc_src, 11); /* CC_O */
-        tcg_gen_shri_tl(cpu_tmp0, cpu_cc_src, 7); /* CC_S */
+        tcg_gen_shri_tl(cpu_tmp0, cpu_cc_src, 11); /* CC_O */
+        tcg_gen_shri_tl(reg, cpu_cc_src, 7); /* CC_S */
         tcg_gen_xor_tl(reg, reg, cpu_tmp0);
         tcg_gen_andi_tl(reg, reg, 1);
         break;
     default:
     case JCC_LE:
         gen_compute_eflags(s);
-        tcg_gen_shri_tl(reg, cpu_cc_src, 11); /* CC_O */
-        tcg_gen_shri_tl(cpu_tmp4, cpu_cc_src, 7); /* CC_S */
-        tcg_gen_shri_tl(cpu_tmp0, cpu_cc_src, 6); /* CC_Z */
-        tcg_gen_xor_tl(reg, reg, cpu_tmp4);
-        tcg_gen_or_tl(reg, reg, cpu_tmp0);
-        tcg_gen_andi_tl(reg, reg, 1);
+        tcg_gen_shri_tl(cpu_tmp0, cpu_cc_src, 4); /* CC_O -> CC_S */
+        tcg_gen_xor_tl(reg, cpu_tmp0, cpu_cc_src);
+        tcg_gen_andi_tl(reg, reg, CC_S | CC_Z);
+        tcg_gen_setcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, reg, reg, 0);
         break;
     }
     if (inv) {
commit 2cb4764577f270eec259123955a6396ad6a2f161
Author: Richard Henderson <rth at twiddle.net>
Date:   Wed Jan 23 13:49:43 2013 -0800

    target-i386: optimize setbe
    
    This is looking at EFLAGS, but it can do so more efficiently with
    setcond.
    
    Reviewed-by: Blue Swirl <blauwirbel at gmail.com>
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Richard Henderson <rth at twiddle.net>

diff --git a/target-i386/translate.c b/target-i386/translate.c
index c510732..dab6983 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -1060,10 +1060,9 @@ static void gen_setcc_slow(DisasContext *s, int jcc_op, TCGv reg, bool inv)
         break;
     case JCC_BE:
         gen_compute_eflags(s);
-        tcg_gen_shri_tl(reg, cpu_cc_src, 6);
-        tcg_gen_or_tl(reg, reg, cpu_cc_src);
-        tcg_gen_andi_tl(reg, reg, 1);
-        break;
+        tcg_gen_andi_tl(reg, cpu_cc_src, CC_Z | CC_C);
+        tcg_gen_setcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, reg, reg, 0);
+        return;
     case JCC_S:
         gen_compute_eflags_s(s, reg, inv);
         inv = false;
commit 1a5c635947e60167c4626dd274531b8b0eacc2e5
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Fri Oct 5 22:54:34 2012 +0200

    target-i386: change gen_setcc_slow_T0 to gen_setcc_slow
    
    Do not hard code the destination register.
    
    Reviewed-by: Blue Swirl <blauwirbel at gmail.com>
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Richard Henderson <rth at twiddle.net>

diff --git a/target-i386/translate.c b/target-i386/translate.c
index ea1b003..c510732 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -1043,53 +1043,54 @@ static void gen_compute_eflags_z(DisasContext *s, TCGv reg, bool inv)
     }
 }
 
-static inline void gen_setcc_slow_T0(DisasContext *s, int jcc_op, bool inv)
+static void gen_setcc_slow(DisasContext *s, int jcc_op, TCGv reg, bool inv)
 {
+    assert(!TCGV_EQUAL(reg, cpu_cc_src));
     switch(jcc_op) {
     case JCC_O:
-        gen_compute_eflags_o(s, cpu_T[0]);
+        gen_compute_eflags_o(s, reg);
         break;
     case JCC_B:
-        gen_compute_eflags_c(s, cpu_T[0], inv);
+        gen_compute_eflags_c(s, reg, inv);
         inv = false;
         break;
     case JCC_Z:
-        gen_compute_eflags_z(s, cpu_T[0], inv);
+        gen_compute_eflags_z(s, reg, inv);
         inv = false;
         break;
     case JCC_BE:
         gen_compute_eflags(s);
-        tcg_gen_shri_tl(cpu_T[0], cpu_cc_src, 6);
-        tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_cc_src);
-        tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
+        tcg_gen_shri_tl(reg, cpu_cc_src, 6);
+        tcg_gen_or_tl(reg, reg, cpu_cc_src);
+        tcg_gen_andi_tl(reg, reg, 1);
         break;
     case JCC_S:
-        gen_compute_eflags_s(s, cpu_T[0], inv);
+        gen_compute_eflags_s(s, reg, inv);
         inv = false;
         break;
     case JCC_P:
-        gen_compute_eflags_p(s, cpu_T[0]);
+        gen_compute_eflags_p(s, reg);
         break;
     case JCC_L:
         gen_compute_eflags(s);
-        tcg_gen_shri_tl(cpu_T[0], cpu_cc_src, 11); /* CC_O */
+        tcg_gen_shri_tl(reg, cpu_cc_src, 11); /* CC_O */
         tcg_gen_shri_tl(cpu_tmp0, cpu_cc_src, 7); /* CC_S */
-        tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
-        tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
+        tcg_gen_xor_tl(reg, reg, cpu_tmp0);
+        tcg_gen_andi_tl(reg, reg, 1);
         break;
     default:
     case JCC_LE:
         gen_compute_eflags(s);
-        tcg_gen_shri_tl(cpu_T[0], cpu_cc_src, 11); /* CC_O */
+        tcg_gen_shri_tl(reg, cpu_cc_src, 11); /* CC_O */
         tcg_gen_shri_tl(cpu_tmp4, cpu_cc_src, 7); /* CC_S */
         tcg_gen_shri_tl(cpu_tmp0, cpu_cc_src, 6); /* CC_Z */
-        tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_tmp4);
-        tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
-        tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
+        tcg_gen_xor_tl(reg, reg, cpu_tmp4);
+        tcg_gen_or_tl(reg, reg, cpu_tmp0);
+        tcg_gen_andi_tl(reg, reg, 1);
         break;
     }
     if (inv) {
-        tcg_gen_xori_tl(cpu_T[0], cpu_T[0], 1);
+        tcg_gen_xori_tl(reg, reg, 1);
     }
 }
 
@@ -1256,7 +1257,7 @@ static inline void gen_jcc1(DisasContext *s, int b, int l1)
         break;
     default:
     slow_jcc:
-        gen_setcc_slow_T0(s, jcc_op, false);
+        gen_setcc_slow(s, jcc_op, cpu_T[0], false);
         tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, 
                            cpu_T[0], 0, l1);
         break;
@@ -2510,7 +2511,7 @@ static void gen_setcc(DisasContext *s, int b)
            worth to */
         inv = b & 1;
         jcc_op = (b >> 1) & 7;
-        gen_setcc_slow_T0(s, jcc_op, inv);
+        gen_setcc_slow(s, jcc_op, cpu_T[0], inv);
     }
 }
 
commit 06847f1f1a7cff71f68dc6416cdd729c01ae2305
Author: Richard Henderson <rth at twiddle.net>
Date:   Wed Jan 23 13:46:02 2013 -0800

    target-i386: convert gen_compute_eflags_c to TCG
    
    Do the switch at translation time, converting the helper templates to
    TCG opcodes.  In some cases CF can be computed with a single setcond,
    though others it may require a little more work.
    
    In the CC_OP_DYNAMIC case, compute the whole EFLAGS, same as for ZF/SF/PF.
    
    Reviewed-by: Blue Swirl <blauwirbel at gmail.com>
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Richard Henderson <rth at twiddle.net>

diff --git a/target-i386/translate.c b/target-i386/translate.c
index 06aa7bf..ea1b003 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -869,17 +869,6 @@ static void gen_op_update_neg_cc(void)
     tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
 }
 
-/* compute eflags.C to reg */
-static void gen_compute_eflags_c(DisasContext *s, TCGv reg, bool inv)
-{
-    gen_update_cc_op(s);
-    gen_helper_cc_compute_c(cpu_tmp2_i32, cpu_env, cpu_cc_op);
-    tcg_gen_extu_i32_tl(reg, cpu_tmp2_i32);
-    if (inv) {
-        tcg_gen_xori_tl(reg, reg, 1);
-    }
-}
-
 /* compute all eflags to cc_src */
 static void gen_compute_eflags(DisasContext *s)
 {
@@ -892,6 +881,104 @@ static void gen_compute_eflags(DisasContext *s)
     tcg_gen_extu_i32_tl(cpu_cc_src, cpu_tmp2_i32);
 }
 
+/* compute eflags.C to reg */
+static void gen_compute_eflags_c(DisasContext *s, TCGv reg, bool inv)
+{
+    TCGv t0, t1;
+    int size;
+
+    switch (s->cc_op) {
+    case CC_OP_SUBB ... CC_OP_SUBQ:
+        /* (DATA_TYPE)(CC_DST + CC_SRC) < (DATA_TYPE)CC_SRC */
+        size = s->cc_op - CC_OP_SUBB;
+        t1 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
+        /* If no temporary was used, be careful not to alias t1 and t0.  */
+        t0 = TCGV_EQUAL(t1, cpu_cc_src) ? cpu_tmp0 : reg;
+        tcg_gen_add_tl(t0, cpu_cc_dst, cpu_cc_src);
+        gen_extu(size, t0);
+        goto add_sub;
+
+    case CC_OP_ADDB ... CC_OP_ADDQ:
+        /* (DATA_TYPE)CC_DST < (DATA_TYPE)CC_SRC */
+        size = s->cc_op - CC_OP_ADDB;
+        t1 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
+        t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
+    add_sub:
+        tcg_gen_setcond_tl(inv ? TCG_COND_GEU : TCG_COND_LTU, reg, t0, t1);
+        inv = false;
+        break;
+
+    case CC_OP_SBBB ... CC_OP_SBBQ:
+        /* (DATA_TYPE)(CC_DST + CC_SRC + 1) <= (DATA_TYPE)CC_SRC */
+        size = s->cc_op - CC_OP_SBBB;
+        t1 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
+        if (TCGV_EQUAL(t1, reg) && TCGV_EQUAL(reg, cpu_cc_src)) {
+            tcg_gen_mov_tl(cpu_tmp0, cpu_cc_src);
+            t1 = cpu_tmp0;
+        }
+
+        tcg_gen_add_tl(reg, cpu_cc_dst, cpu_cc_src);
+        tcg_gen_addi_tl(reg, reg, 1);
+        gen_extu(size, reg);
+        t0 = reg;
+        goto adc_sbb;
+
+    case CC_OP_ADCB ... CC_OP_ADCQ:
+        /* (DATA_TYPE)CC_DST <= (DATA_TYPE)CC_SRC */
+        size = s->cc_op - CC_OP_ADCB;
+        t1 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
+        t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
+    adc_sbb:
+        tcg_gen_setcond_tl(inv ? TCG_COND_GTU : TCG_COND_LEU, reg, t0, t1);
+        inv = false;
+        break;
+
+    case CC_OP_LOGICB ... CC_OP_LOGICQ:
+        tcg_gen_movi_tl(reg, 0);
+        break;
+
+    case CC_OP_INCB ... CC_OP_INCQ:
+    case CC_OP_DECB ... CC_OP_DECQ:
+        if (inv) {
+            tcg_gen_xori_tl(reg, cpu_cc_src, 1);
+        } else {
+            tcg_gen_mov_tl(reg, cpu_cc_src);
+        }
+        inv = false;
+        break;
+
+    case CC_OP_SHLB ... CC_OP_SHLQ:
+        /* (CC_SRC >> (DATA_BITS - 1)) & 1 */
+        size = s->cc_op - CC_OP_SHLB;
+        tcg_gen_shri_tl(reg, cpu_cc_src, (8 << size) - 1);
+        tcg_gen_andi_tl(reg, reg, 1);
+        break;
+
+    case CC_OP_MULB ... CC_OP_MULQ:
+        tcg_gen_setcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE,
+                            reg, cpu_cc_src, 0);
+        inv = false;
+        break;
+
+    case CC_OP_EFLAGS:
+    case CC_OP_SARB ... CC_OP_SARQ:
+        /* CC_SRC & 1 */
+        tcg_gen_andi_tl(reg, cpu_cc_src, 1);
+        break;
+
+    default:
+       /* The need to compute only C from CC_OP_DYNAMIC is important
+          in efficiently implementing e.g. INC at the start of a TB.  */
+       gen_update_cc_op(s);
+       gen_helper_cc_compute_c(cpu_tmp2_i32, cpu_env, cpu_cc_op);
+       tcg_gen_extu_i32_tl(reg, cpu_tmp2_i32);
+       break;
+    }
+    if (inv) {
+        tcg_gen_xori_tl(reg, reg, 1);
+    }
+}
+
 /* compute eflags.P to reg */
 static void gen_compute_eflags_p(DisasContext *s, TCGv reg)
 {
commit 8115f117357a63bff84522caac6c3bcadee0a285
Author: Richard Henderson <rth at twiddle.net>
Date:   Wed Jan 23 13:37:28 2013 -0800

    target-i386: use inverted setcond when computing NS or NZ
    
    Make gen_compute_eflags_z and gen_compute_eflags_s able to compute the
    inverted condition, and use this in gen_setcc_slow_T0.  We cannot do it
    yet in gen_compute_eflags_c, but prepare the code for it anyway.  It is
    not worthwhile for PF, as usual.
    
    shr+and+xor could be replaced by and+setcond.  I'm not doing it yet.
    
    Reviewed-by: Blue Swirl <blauwirbel at gmail.com>
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Richard Henderson <rth at twiddle.net>

diff --git a/target-i386/translate.c b/target-i386/translate.c
index 026fbd6..06aa7bf 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -870,11 +870,14 @@ static void gen_op_update_neg_cc(void)
 }
 
 /* compute eflags.C to reg */
-static void gen_compute_eflags_c(DisasContext *s, TCGv reg)
+static void gen_compute_eflags_c(DisasContext *s, TCGv reg, bool inv)
 {
     gen_update_cc_op(s);
     gen_helper_cc_compute_c(cpu_tmp2_i32, cpu_env, cpu_cc_op);
     tcg_gen_extu_i32_tl(reg, cpu_tmp2_i32);
+    if (inv) {
+        tcg_gen_xori_tl(reg, reg, 1);
+    }
 }
 
 /* compute all eflags to cc_src */
@@ -898,7 +901,7 @@ static void gen_compute_eflags_p(DisasContext *s, TCGv reg)
 }
 
 /* compute eflags.S to reg */
-static void gen_compute_eflags_s(DisasContext *s, TCGv reg)
+static void gen_compute_eflags_s(DisasContext *s, TCGv reg, bool inv)
 {
     switch (s->cc_op) {
     case CC_OP_DYNAMIC:
@@ -907,12 +910,15 @@ static void gen_compute_eflags_s(DisasContext *s, TCGv reg)
     case CC_OP_EFLAGS:
         tcg_gen_shri_tl(reg, cpu_cc_src, 7);
         tcg_gen_andi_tl(reg, reg, 1);
+        if (inv) {
+            tcg_gen_xori_tl(reg, reg, 1);
+        }
         break;
     default:
         {
             int size = (s->cc_op - CC_OP_ADDB) & 3;
             TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, true);
-            tcg_gen_setcondi_tl(TCG_COND_LT, reg, t0, 0);
+            tcg_gen_setcondi_tl(inv ? TCG_COND_GE : TCG_COND_LT, reg, t0, 0);
         }
         break;
     }
@@ -927,7 +933,7 @@ static void gen_compute_eflags_o(DisasContext *s, TCGv reg)
 }
 
 /* compute eflags.Z to reg */
-static void gen_compute_eflags_z(DisasContext *s, TCGv reg)
+static void gen_compute_eflags_z(DisasContext *s, TCGv reg, bool inv)
 {
     switch (s->cc_op) {
     case CC_OP_DYNAMIC:
@@ -936,27 +942,33 @@ static void gen_compute_eflags_z(DisasContext *s, TCGv reg)
     case CC_OP_EFLAGS:
         tcg_gen_shri_tl(reg, cpu_cc_src, 6);
         tcg_gen_andi_tl(reg, reg, 1);
+        if (inv) {
+            tcg_gen_xori_tl(reg, reg, 1);
+        }
         break;
     default:
         {
             int size = (s->cc_op - CC_OP_ADDB) & 3;
             TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
-            tcg_gen_setcondi_tl(TCG_COND_EQ, reg, t0, 0);
+            tcg_gen_setcondi_tl(inv ? TCG_COND_NE : TCG_COND_EQ, reg, t0, 0);
         }
+        break;
     }
 }
 
-static inline void gen_setcc_slow_T0(DisasContext *s, int jcc_op)
+static inline void gen_setcc_slow_T0(DisasContext *s, int jcc_op, bool inv)
 {
     switch(jcc_op) {
     case JCC_O:
         gen_compute_eflags_o(s, cpu_T[0]);
         break;
     case JCC_B:
-        gen_compute_eflags_c(s, cpu_T[0]);
+        gen_compute_eflags_c(s, cpu_T[0], inv);
+        inv = false;
         break;
     case JCC_Z:
-        gen_compute_eflags_z(s, cpu_T[0]);
+        gen_compute_eflags_z(s, cpu_T[0], inv);
+        inv = false;
         break;
     case JCC_BE:
         gen_compute_eflags(s);
@@ -965,7 +977,8 @@ static inline void gen_setcc_slow_T0(DisasContext *s, int jcc_op)
         tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
         break;
     case JCC_S:
-        gen_compute_eflags_s(s, cpu_T[0]);
+        gen_compute_eflags_s(s, cpu_T[0], inv);
+        inv = false;
         break;
     case JCC_P:
         gen_compute_eflags_p(s, cpu_T[0]);
@@ -988,6 +1001,9 @@ static inline void gen_setcc_slow_T0(DisasContext *s, int jcc_op)
         tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
         break;
     }
+    if (inv) {
+        tcg_gen_xori_tl(cpu_T[0], cpu_T[0], 1);
+    }
 }
 
 /* return true if setcc_slow is not needed (WARNING: must be kept in
@@ -1153,7 +1169,7 @@ static inline void gen_jcc1(DisasContext *s, int b, int l1)
         break;
     default:
     slow_jcc:
-        gen_setcc_slow_T0(s, jcc_op);
+        gen_setcc_slow_T0(s, jcc_op, false);
         tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, 
                            cpu_T[0], 0, l1);
         break;
@@ -1367,7 +1383,7 @@ static void gen_op(DisasContext *s1, int op, int ot, int d)
     }
     switch(op) {
     case OP_ADCL:
-        gen_compute_eflags_c(s1, cpu_tmp4);
+        gen_compute_eflags_c(s1, cpu_tmp4, false);
         tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
         tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_tmp4);
         if (d != OR_TMP0)
@@ -1382,7 +1398,7 @@ static void gen_op(DisasContext *s1, int op, int ot, int d)
         set_cc_op(s1, CC_OP_DYNAMIC);
         break;
     case OP_SBBL:
-        gen_compute_eflags_c(s1, cpu_tmp4);
+        gen_compute_eflags_c(s1, cpu_tmp4, false);
         tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
         tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_tmp4);
         if (d != OR_TMP0)
@@ -1456,7 +1472,7 @@ static void gen_inc(DisasContext *s1, int ot, int d, int c)
         gen_op_mov_TN_reg(ot, 0, d);
     else
         gen_op_ld_T0_A0(ot + s1->mem_index);
-    gen_compute_eflags_c(s1, cpu_cc_src);
+    gen_compute_eflags_c(s1, cpu_cc_src, false);
     if (c > 0) {
         tcg_gen_addi_tl(cpu_T[0], cpu_T[0], 1);
         set_cc_op(s1, CC_OP_INCB + ot);
@@ -2407,10 +2423,7 @@ static void gen_setcc(DisasContext *s, int b)
            worth to */
         inv = b & 1;
         jcc_op = (b >> 1) & 7;
-        gen_setcc_slow_T0(s, jcc_op);
-        if (inv) {
-            tcg_gen_xori_tl(cpu_T[0], cpu_T[0], 1);
-        }
+        gen_setcc_slow_T0(s, jcc_op, inv);
     }
 }
 
@@ -6881,7 +6894,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
     case 0xd6: /* salc */
         if (CODE64(s))
             goto illegal_op;
-        gen_compute_eflags_c(s, cpu_T[0]);
+        gen_compute_eflags_c(s, cpu_T[0], false);
         tcg_gen_neg_tl(cpu_T[0], cpu_T[0]);
         gen_op_mov_reg_T0(OT_BYTE, R_EAX);
         break;
commit 086c40778485f9a52d41a66fd4ef0d8723a2ac0a
Author: Richard Henderson <rth at twiddle.net>
Date:   Wed Jan 23 13:33:59 2013 -0800

    target-i386: do not call helper to compute ZF/SF
    
    ZF, SF and PF can always be computed from CC_DST except in the
    CC_OP_EFLAGS case (and CC_OP_DYNAMIC, which just resolves to CC_OP_EFLAGS
    in gen_compute_eflags).  Use setcond to compute ZF and SF.
    
    We could also use a table lookup to compute PF.
    
    Reviewed-by: Blue Swirl <blauwirbel at gmail.com>
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Richard Henderson <rth at twiddle.net>

diff --git a/target-i386/translate.c b/target-i386/translate.c
index a767b50..026fbd6 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -900,9 +900,22 @@ static void gen_compute_eflags_p(DisasContext *s, TCGv reg)
 /* compute eflags.S to reg */
 static void gen_compute_eflags_s(DisasContext *s, TCGv reg)
 {
-    gen_compute_eflags(s);
-    tcg_gen_shri_tl(reg, cpu_cc_src, 7);
-    tcg_gen_andi_tl(reg, reg, 1);
+    switch (s->cc_op) {
+    case CC_OP_DYNAMIC:
+        gen_compute_eflags(s);
+        /* FALLTHRU */
+    case CC_OP_EFLAGS:
+        tcg_gen_shri_tl(reg, cpu_cc_src, 7);
+        tcg_gen_andi_tl(reg, reg, 1);
+        break;
+    default:
+        {
+            int size = (s->cc_op - CC_OP_ADDB) & 3;
+            TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, true);
+            tcg_gen_setcondi_tl(TCG_COND_LT, reg, t0, 0);
+        }
+        break;
+    }
 }
 
 /* compute eflags.O to reg */
@@ -916,9 +929,21 @@ static void gen_compute_eflags_o(DisasContext *s, TCGv reg)
 /* compute eflags.Z to reg */
 static void gen_compute_eflags_z(DisasContext *s, TCGv reg)
 {
-    gen_compute_eflags(s);
-    tcg_gen_shri_tl(reg, cpu_cc_src, 6);
-    tcg_gen_andi_tl(reg, reg, 1);
+    switch (s->cc_op) {
+    case CC_OP_DYNAMIC:
+        gen_compute_eflags(s);
+        /* FALLTHRU */
+    case CC_OP_EFLAGS:
+        tcg_gen_shri_tl(reg, cpu_cc_src, 6);
+        tcg_gen_andi_tl(reg, reg, 1);
+        break;
+    default:
+        {
+            int size = (s->cc_op - CC_OP_ADDB) & 3;
+            TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
+            tcg_gen_setcondi_tl(TCG_COND_EQ, reg, t0, 0);
+        }
+    }
 }
 
 static inline void gen_setcc_slow_T0(DisasContext *s, int jcc_op)
commit b666265b2071e4288110f6553b598efe00246d06
Author: Richard Henderson <rth at twiddle.net>
Date:   Wed Jan 23 13:26:38 2013 -0800

    target-i386: Move CC discards to set_cc_op
    
    This gets us universal coverage, rather than scattering discards
    around at various places.  As a bonus, we do not emit redundant
    discards e.g. between sequential logic insns.
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>

diff --git a/target-i386/translate.c b/target-i386/translate.c
index 71104fb..a767b50 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -174,14 +174,48 @@ enum {
     OR_A0, /* temporary register used when doing address evaluation */
 };
 
+enum {
+    USES_CC_DST = 1,
+    USES_CC_SRC = 2,
+};
+
+/* Bit set if the global variable is live after setting CC_OP to X.  */
+static const uint8_t cc_op_live[CC_OP_NB] = {
+    [CC_OP_DYNAMIC] = USES_CC_DST | USES_CC_SRC,
+    [CC_OP_EFLAGS] = USES_CC_SRC,
+    [CC_OP_MULB ... CC_OP_MULQ] = USES_CC_DST | USES_CC_SRC,
+    [CC_OP_ADDB ... CC_OP_ADDQ] = USES_CC_DST | USES_CC_SRC,
+    [CC_OP_ADCB ... CC_OP_ADCQ] = USES_CC_DST | USES_CC_SRC,
+    [CC_OP_SUBB ... CC_OP_SUBQ] = USES_CC_DST | USES_CC_SRC,
+    [CC_OP_SBBB ... CC_OP_SBBQ] = USES_CC_DST | USES_CC_SRC,
+    [CC_OP_LOGICB ... CC_OP_LOGICQ] = USES_CC_DST,
+    [CC_OP_INCB ... CC_OP_INCQ] = USES_CC_DST | USES_CC_SRC,
+    [CC_OP_DECB ... CC_OP_DECQ] = USES_CC_DST | USES_CC_SRC,
+    [CC_OP_SHLB ... CC_OP_SHLQ] = USES_CC_DST | USES_CC_SRC,
+    [CC_OP_SARB ... CC_OP_SARQ] = USES_CC_DST | USES_CC_SRC,
+};
+
 static void set_cc_op(DisasContext *s, CCOp op)
 {
-    if (s->cc_op != op) {
-        s->cc_op = op;
-        /* The DYNAMIC setting is translator only, and should never be
-           stored.  Thus we always consider it clean.  */
-        s->cc_op_dirty = (op != CC_OP_DYNAMIC);
+    int dead;
+
+    if (s->cc_op == op) {
+        return;
+    }
+
+    /* Discard CC computation that will no longer be used.  */
+    dead = cc_op_live[s->cc_op] & ~cc_op_live[op];
+    if (dead & USES_CC_DST) {
+        tcg_gen_discard_tl(cpu_cc_dst);
     }
+    if (dead & USES_CC_SRC) {
+        tcg_gen_discard_tl(cpu_cc_src);
+    }
+
+    s->cc_op = op;
+    /* The DYNAMIC setting is translator only, and should never be
+       stored.  Thus we always consider it clean.  */
+    s->cc_op_dirty = (op != CC_OP_DYNAMIC);
 }
 
 static void gen_update_cc_op(DisasContext *s)
@@ -809,7 +843,6 @@ static inline void gen_movs(DisasContext *s, int ot)
 
 static void gen_op_update1_cc(void)
 {
-    tcg_gen_discard_tl(cpu_cc_src);
     tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
 }
 
@@ -827,7 +860,6 @@ static inline void gen_op_cmpl_T0_T1_cc(void)
 
 static inline void gen_op_testl_T0_T1_cc(void)
 {
-    tcg_gen_discard_tl(cpu_cc_src);
     tcg_gen_and_tl(cpu_cc_dst, cpu_T[0], cpu_T[1]);
 }
 
@@ -853,7 +885,6 @@ static void gen_compute_eflags(DisasContext *s)
     }
     gen_update_cc_op(s);
     gen_helper_cc_compute_all(cpu_tmp2_i32, cpu_env, cpu_cc_op);
-    tcg_gen_discard_tl(cpu_cc_dst);
     set_cc_op(s, CC_OP_EFLAGS);
     tcg_gen_extu_i32_tl(cpu_cc_src, cpu_tmp2_i32);
 }
@@ -6640,7 +6671,6 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                 gen_op_mov_reg_T0(ot, reg);
                 tcg_gen_movi_tl(cpu_cc_dst, 1);
                 gen_set_label(label1);
-                tcg_gen_discard_tl(cpu_cc_src);
                 set_cc_op(s, CC_OP_LOGICB + ot);
             }
             tcg_temp_free(t0);
commit ccfcdd09bf91aabe039d2dae0b5ec3a05f083e59
Author: Richard Henderson <rth at twiddle.net>
Date:   Wed Jan 23 13:07:10 2013 -0800

    target-i386: no need to flush out cc_op before gen_eob
    
    This makes code more similar to the other callers of gen_eob, especially
    loopz/loopnz/jcxz.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Richard Henderson <rth at twiddle.net>

diff --git a/target-i386/translate.c b/target-i386/translate.c
index 6204764..71104fb 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -2303,8 +2303,8 @@ static inline void gen_jcc(DisasContext *s, int b,
 {
     int l1, l2;
 
-    gen_update_cc_op(s);
     if (s->jmp_opt) {
+        gen_update_cc_op(s);
         l1 = gen_new_label();
         gen_jcc1(s, b, l1);
         set_cc_op(s, CC_OP_DYNAMIC);
@@ -2315,11 +2315,9 @@ static inline void gen_jcc(DisasContext *s, int b,
         gen_goto_tb(s, 1, val);
         s->is_jmp = DISAS_TB_JUMP;
     } else {
-
         l1 = gen_new_label();
         l2 = gen_new_label();
         gen_jcc1(s, b, l1);
-        set_cc_op(s, CC_OP_DYNAMIC);
 
         gen_jmp_im(next_eip);
         tcg_gen_br(l2);
commit d229edce1c58e6bb13d386bef4c31fc2e3850cb6
Author: Richard Henderson <rth at twiddle.net>
Date:   Wed Jan 23 13:03:26 2013 -0800

    target-i386: do not compute eflags multiple times consecutively
    
    After calling gen_compute_eflags, leave the computed value in cc_reg_src
    and set cc_op to CC_OP_EFLAGS.  The next few patches will remove anyway
    most calls to gen_compute_eflags.
    
    As a result of this change it is more natural to remove the register
    argument from gen_compute_eflags and change all the callers.
    
    Reviewed-by: Blue Swirl <blauwirbel at gmail.com>
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Richard Henderson <rth at twiddle.net>

diff --git a/target-i386/translate.c b/target-i386/translate.c
index 9bbe969..6204764 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -845,47 +845,48 @@ static void gen_compute_eflags_c(DisasContext *s, TCGv reg)
     tcg_gen_extu_i32_tl(reg, cpu_tmp2_i32);
 }
 
-/* compute all eflags to reg */
-static void gen_compute_eflags(DisasContext *s, TCGv reg)
+/* compute all eflags to cc_src */
+static void gen_compute_eflags(DisasContext *s)
 {
+    if (s->cc_op == CC_OP_EFLAGS) {
+        return;
+    }
     gen_update_cc_op(s);
     gen_helper_cc_compute_all(cpu_tmp2_i32, cpu_env, cpu_cc_op);
-    if (TCGV_EQUAL(reg, cpu_cc_src)) {
-        tcg_gen_discard_tl(cpu_cc_dst);
-        set_cc_op(s, CC_OP_EFLAGS);
-    }
-    tcg_gen_extu_i32_tl(reg, cpu_tmp2_i32);
+    tcg_gen_discard_tl(cpu_cc_dst);
+    set_cc_op(s, CC_OP_EFLAGS);
+    tcg_gen_extu_i32_tl(cpu_cc_src, cpu_tmp2_i32);
 }
 
 /* compute eflags.P to reg */
 static void gen_compute_eflags_p(DisasContext *s, TCGv reg)
 {
-    gen_compute_eflags(s, reg);
-    tcg_gen_shri_tl(reg, reg, 2);
+    gen_compute_eflags(s);
+    tcg_gen_shri_tl(reg, cpu_cc_src, 2);
     tcg_gen_andi_tl(reg, reg, 1);
 }
 
 /* compute eflags.S to reg */
 static void gen_compute_eflags_s(DisasContext *s, TCGv reg)
 {
-    gen_compute_eflags(s, reg);
-    tcg_gen_shri_tl(reg, reg, 7);
+    gen_compute_eflags(s);
+    tcg_gen_shri_tl(reg, cpu_cc_src, 7);
     tcg_gen_andi_tl(reg, reg, 1);
 }
 
 /* compute eflags.O to reg */
 static void gen_compute_eflags_o(DisasContext *s, TCGv reg)
 {
-    gen_compute_eflags(s, reg);
-    tcg_gen_shri_tl(reg, reg, 11);
+    gen_compute_eflags(s);
+    tcg_gen_shri_tl(reg, cpu_cc_src, 11);
     tcg_gen_andi_tl(reg, reg, 1);
 }
 
 /* compute eflags.Z to reg */
 static void gen_compute_eflags_z(DisasContext *s, TCGv reg)
 {
-    gen_compute_eflags(s, reg);
-    tcg_gen_shri_tl(reg, reg, 6);
+    gen_compute_eflags(s);
+    tcg_gen_shri_tl(reg, cpu_cc_src, 6);
     tcg_gen_andi_tl(reg, reg, 1);
 }
 
@@ -902,9 +903,9 @@ static inline void gen_setcc_slow_T0(DisasContext *s, int jcc_op)
         gen_compute_eflags_z(s, cpu_T[0]);
         break;
     case JCC_BE:
-        gen_compute_eflags(s, cpu_tmp0);
-        tcg_gen_shri_tl(cpu_T[0], cpu_tmp0, 6);
-        tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
+        gen_compute_eflags(s);
+        tcg_gen_shri_tl(cpu_T[0], cpu_cc_src, 6);
+        tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_cc_src);
         tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
         break;
     case JCC_S:
@@ -914,18 +915,18 @@ static inline void gen_setcc_slow_T0(DisasContext *s, int jcc_op)
         gen_compute_eflags_p(s, cpu_T[0]);
         break;
     case JCC_L:
-        gen_compute_eflags(s, cpu_tmp0);
-        tcg_gen_shri_tl(cpu_T[0], cpu_tmp0, 11); /* CC_O */
-        tcg_gen_shri_tl(cpu_tmp0, cpu_tmp0, 7); /* CC_S */
+        gen_compute_eflags(s);
+        tcg_gen_shri_tl(cpu_T[0], cpu_cc_src, 11); /* CC_O */
+        tcg_gen_shri_tl(cpu_tmp0, cpu_cc_src, 7); /* CC_S */
         tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
         tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
         break;
     default:
     case JCC_LE:
-        gen_compute_eflags(s, cpu_tmp0);
-        tcg_gen_shri_tl(cpu_T[0], cpu_tmp0, 11); /* CC_O */
-        tcg_gen_shri_tl(cpu_tmp4, cpu_tmp0, 7); /* CC_S */
-        tcg_gen_shri_tl(cpu_tmp0, cpu_tmp0, 6); /* CC_Z */
+        gen_compute_eflags(s);
+        tcg_gen_shri_tl(cpu_T[0], cpu_cc_src, 11); /* CC_O */
+        tcg_gen_shri_tl(cpu_tmp4, cpu_cc_src, 7); /* CC_S */
+        tcg_gen_shri_tl(cpu_tmp0, cpu_cc_src, 6); /* CC_Z */
         tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_tmp4);
         tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
         tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
@@ -1619,7 +1620,7 @@ static void gen_rot_rm_T1(DisasContext *s, int ot, int op1,
     }
     
     /* update eflags.  It is needed anyway most of the time, do it always.  */
-    gen_compute_eflags(s, cpu_cc_src);
+    gen_compute_eflags(s);
     assert(s->cc_op == CC_OP_EFLAGS);
 
     label2 = gen_new_label();
@@ -1696,7 +1697,7 @@ static void gen_rot_rm_im(DisasContext *s, int ot, int op1, int op2,
 
     if (op2 != 0) {
         /* update eflags */
-        gen_compute_eflags(s, cpu_cc_src);
+        gen_compute_eflags(s);
         assert(s->cc_op == CC_OP_EFLAGS);
 
         tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~(CC_O | CC_C));
@@ -1720,8 +1721,7 @@ static void gen_rot_rm_im(DisasContext *s, int ot, int op1, int op2,
 static void gen_rotc_rm_T1(DisasContext *s, int ot, int op1, 
                            int is_right)
 {
-    gen_update_cc_op(s);
-    gen_compute_eflags(s, cpu_cc_src);
+    gen_compute_eflags(s);
     assert(s->cc_op == CC_OP_EFLAGS);
 
     /* load */
@@ -6483,7 +6483,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
             goto illegal_op;
         gen_op_mov_TN_reg(OT_BYTE, 0, R_AH);
-        gen_compute_eflags(s, cpu_cc_src);
+        gen_compute_eflags(s);
         tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, CC_O);
         tcg_gen_andi_tl(cpu_T[0], cpu_T[0], CC_S | CC_Z | CC_A | CC_P | CC_C);
         tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, cpu_T[0]);
@@ -6491,21 +6491,21 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
     case 0x9f: /* lahf */
         if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
             goto illegal_op;
-        gen_compute_eflags(s, cpu_T[0]);
+        gen_compute_eflags(s);
         /* Note: gen_compute_eflags() only gives the condition codes */
-        tcg_gen_ori_tl(cpu_T[0], cpu_T[0], 0x02);
+        tcg_gen_ori_tl(cpu_T[0], cpu_cc_src, 0x02);
         gen_op_mov_reg_T0(OT_BYTE, R_AH);
         break;
     case 0xf5: /* cmc */
-        gen_compute_eflags(s, cpu_cc_src);
+        gen_compute_eflags(s);
         tcg_gen_xori_tl(cpu_cc_src, cpu_cc_src, CC_C);
         break;
     case 0xf8: /* clc */
-        gen_compute_eflags(s, cpu_cc_src);
+        gen_compute_eflags(s);
         tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_C);
         break;
     case 0xf9: /* stc */
-        gen_compute_eflags(s, cpu_cc_src);
+        gen_compute_eflags(s);
         tcg_gen_ori_tl(cpu_cc_src, cpu_cc_src, CC_C);
         break;
     case 0xfc: /* cld */
@@ -7381,7 +7381,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
            } else {
                 gen_op_mov_reg_v(ot, rm, t0);
             }
-            gen_compute_eflags(s, cpu_cc_src);
+            gen_compute_eflags(s);
             tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_Z);
             tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t2);
             tcg_temp_free(t0);
commit 1608ecca95188dcf4f78072be48f41dbe2062b25
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Fri Oct 5 18:42:59 2012 +0200

    target-i386: add helper functions to get other flags
    
    Introduce new functions to extract PF, SF, OF, ZF in addition to CF.
    These provide single entry points for optimizing accesses to a single
    flag.
    
    Reviewed-by: Blue Swirl <blauwirbel at gmail.com>
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Richard Henderson <rth at twiddle.net>

diff --git a/target-i386/translate.c b/target-i386/translate.c
index 9dd3081..9bbe969 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -857,21 +857,49 @@ static void gen_compute_eflags(DisasContext *s, TCGv reg)
     tcg_gen_extu_i32_tl(reg, cpu_tmp2_i32);
 }
 
+/* compute eflags.P to reg */
+static void gen_compute_eflags_p(DisasContext *s, TCGv reg)
+{
+    gen_compute_eflags(s, reg);
+    tcg_gen_shri_tl(reg, reg, 2);
+    tcg_gen_andi_tl(reg, reg, 1);
+}
+
+/* compute eflags.S to reg */
+static void gen_compute_eflags_s(DisasContext *s, TCGv reg)
+{
+    gen_compute_eflags(s, reg);
+    tcg_gen_shri_tl(reg, reg, 7);
+    tcg_gen_andi_tl(reg, reg, 1);
+}
+
+/* compute eflags.O to reg */
+static void gen_compute_eflags_o(DisasContext *s, TCGv reg)
+{
+    gen_compute_eflags(s, reg);
+    tcg_gen_shri_tl(reg, reg, 11);
+    tcg_gen_andi_tl(reg, reg, 1);
+}
+
+/* compute eflags.Z to reg */
+static void gen_compute_eflags_z(DisasContext *s, TCGv reg)
+{
+    gen_compute_eflags(s, reg);
+    tcg_gen_shri_tl(reg, reg, 6);
+    tcg_gen_andi_tl(reg, reg, 1);
+}
+
 static inline void gen_setcc_slow_T0(DisasContext *s, int jcc_op)
 {
     switch(jcc_op) {
     case JCC_O:
-        gen_compute_eflags(s, cpu_T[0]);
-        tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 11);
-        tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
+        gen_compute_eflags_o(s, cpu_T[0]);
         break;
     case JCC_B:
         gen_compute_eflags_c(s, cpu_T[0]);
         break;
     case JCC_Z:
-        gen_compute_eflags(s, cpu_T[0]);
-        tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 6);
-        tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
+        gen_compute_eflags_z(s, cpu_T[0]);
         break;
     case JCC_BE:
         gen_compute_eflags(s, cpu_tmp0);
@@ -880,14 +908,10 @@ static inline void gen_setcc_slow_T0(DisasContext *s, int jcc_op)
         tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
         break;
     case JCC_S:
-        gen_compute_eflags(s, cpu_T[0]);
-        tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 7);
-        tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
+        gen_compute_eflags_s(s, cpu_T[0]);
         break;
     case JCC_P:
-        gen_compute_eflags(s, cpu_T[0]);
-        tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 2);
-        tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
+        gen_compute_eflags_p(s, cpu_T[0]);
         break;
     case JCC_L:
         gen_compute_eflags(s, cpu_tmp0);
commit 773cdfccb835cc82aca2b2ff34277b4bf58d6bb9
Author: Richard Henderson <rth at twiddle.net>
Date:   Wed Jan 23 12:43:12 2013 -0800

    target-i386: Use gen_update_cc_op everywhere
    
    All of the conditional calls to gen_op_set_cc_op go away, and
    gen_op_set_cc_op itself gets inlined into its only remaining caller.
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>

diff --git a/target-i386/translate.c b/target-i386/translate.c
index cabdeda..9dd3081 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -184,15 +184,10 @@ static void set_cc_op(DisasContext *s, CCOp op)
     }
 }
 
-static inline void gen_op_set_cc_op(int32_t val)
-{
-    tcg_gen_movi_i32(cpu_cc_op, val);
-}
-
 static void gen_update_cc_op(DisasContext *s)
 {
     if (s->cc_op_dirty) {
-        gen_op_set_cc_op(s->cc_op);
+        tcg_gen_movi_i32(cpu_cc_op, s->cc_op);
         s->cc_op_dirty = false;
     }
 }
@@ -771,8 +766,7 @@ static void gen_check_io(DisasContext *s, int ot, target_ulong cur_eip,
 
     state_saved = 0;
     if (s->pe && (s->cpl > s->iopl || s->vm86)) {
-        if (s->cc_op != CC_OP_DYNAMIC)
-            gen_op_set_cc_op(s->cc_op);
+        gen_update_cc_op(s);
         gen_jmp_im(cur_eip);
         state_saved = 1;
         tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
@@ -790,8 +784,7 @@ static void gen_check_io(DisasContext *s, int ot, target_ulong cur_eip,
     }
     if(s->flags & HF_SVMI_MASK) {
         if (!state_saved) {
-            if (s->cc_op != CC_OP_DYNAMIC)
-                gen_op_set_cc_op(s->cc_op);
+            gen_update_cc_op(s);
             gen_jmp_im(cur_eip);
         }
         svm_flags |= (1 << (4 + ot));
@@ -847,9 +840,7 @@ static void gen_op_update_neg_cc(void)
 /* compute eflags.C to reg */
 static void gen_compute_eflags_c(DisasContext *s, TCGv reg)
 {
-    if (s->cc_op != CC_OP_DYNAMIC) {
-        gen_op_set_cc_op(s->cc_op);
-    }
+    gen_update_cc_op(s);
     gen_helper_cc_compute_c(cpu_tmp2_i32, cpu_env, cpu_cc_op);
     tcg_gen_extu_i32_tl(reg, cpu_tmp2_i32);
 }
@@ -857,9 +848,7 @@ static void gen_compute_eflags_c(DisasContext *s, TCGv reg)
 /* compute all eflags to reg */
 static void gen_compute_eflags(DisasContext *s, TCGv reg)
 {
-    if (s->cc_op != CC_OP_DYNAMIC) {
-        gen_op_set_cc_op(s->cc_op);
-    }
+    gen_update_cc_op(s);
     gen_helper_cc_compute_all(cpu_tmp2_i32, cpu_env, cpu_cc_op);
     if (TCGV_EQUAL(reg, cpu_cc_src)) {
         tcg_gen_discard_tl(cpu_cc_dst);
@@ -1215,7 +1204,7 @@ static inline void gen_repz_ ## op(DisasContext *s, int ot,                   \
     l2 = gen_jz_ecx_string(s, next_eip);                                      \
     gen_ ## op(s, ot);                                                        \
     gen_op_add_reg_im(s->aflag, R_ECX, -1);                                   \
-    gen_op_set_cc_op(s->cc_op);                                               \
+    gen_update_cc_op(s);                                                      \
     gen_jcc1(s, (JCC_Z << 1) | (nz ^ 1), l2);                                 \
     if (!s->jmp_opt)                                                          \
         gen_op_jz_ecx(s->aflag, l2);                                          \
@@ -1449,10 +1438,8 @@ static void gen_shift_rm_T1(DisasContext *s, int ot, int op1,
         gen_op_mov_reg_T0(ot, op1);
     }
 
-    /* update eflags if non zero shift */
-    if (s->cc_op != CC_OP_DYNAMIC) {
-        gen_op_set_cc_op(s->cc_op);
-    }
+    /* update eflags */
+    gen_update_cc_op(s);
 
     tcg_gen_mov_tl(t1, cpu_T[0]);
 
@@ -1709,8 +1696,7 @@ static void gen_rot_rm_im(DisasContext *s, int ot, int op1, int op2,
 static void gen_rotc_rm_T1(DisasContext *s, int ot, int op1, 
                            int is_right)
 {
-    if (s->cc_op != CC_OP_DYNAMIC)
-        gen_op_set_cc_op(s->cc_op);
+    gen_update_cc_op(s);
     gen_compute_eflags(s, cpu_cc_src);
     assert(s->cc_op == CC_OP_EFLAGS);
 
@@ -1869,8 +1855,7 @@ static void gen_shiftd_rm_T1_T3(DisasContext *s, int ot, int op1,
     }
     
     /* update eflags */
-    if (s->cc_op != CC_OP_DYNAMIC)
-        gen_op_set_cc_op(s->cc_op);
+    gen_update_cc_op(s);
 
     label2 = gen_new_label();
     tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, label2);
@@ -2294,9 +2279,7 @@ static inline void gen_jcc(DisasContext *s, int b,
 {
     int l1, l2;
 
-    if (s->cc_op != CC_OP_DYNAMIC) {
-        gen_op_set_cc_op(s->cc_op);
-    }
+    gen_update_cc_op(s);
     if (s->jmp_opt) {
         l1 = gen_new_label();
         gen_jcc1(s, b, l1);
@@ -2375,8 +2358,7 @@ static void gen_movl_seg_T0(DisasContext *s, int seg_reg, target_ulong cur_eip)
 {
     if (s->pe && !s->vm86) {
         /* XXX: optimize by finding processor state dynamically */
-        if (s->cc_op != CC_OP_DYNAMIC)
-            gen_op_set_cc_op(s->cc_op);
+        gen_update_cc_op(s);
         gen_jmp_im(cur_eip);
         tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
         gen_helper_load_seg(cpu_env, tcg_const_i32(seg_reg), cpu_tmp2_i32);
@@ -2405,8 +2387,7 @@ gen_svm_check_intercept_param(DisasContext *s, target_ulong pc_start,
     /* no SVM activated; fast case */
     if (likely(!(s->flags & HF_SVMI_MASK)))
         return;
-    if (s->cc_op != CC_OP_DYNAMIC)
-        gen_op_set_cc_op(s->cc_op);
+    gen_update_cc_op(s);
     gen_jmp_im(pc_start - s->cs_base);
     gen_helper_svm_check_intercept_param(cpu_env, tcg_const_i32(type),
                                          tcg_const_i64(param));
@@ -2653,8 +2634,7 @@ static void gen_enter(DisasContext *s, int esp_addend, int level)
 
 static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip)
 {
-    if (s->cc_op != CC_OP_DYNAMIC)
-        gen_op_set_cc_op(s->cc_op);
+    gen_update_cc_op(s);
     gen_jmp_im(cur_eip);
     gen_helper_raise_exception(cpu_env, tcg_const_i32(trapno));
     s->is_jmp = DISAS_TB_JUMP;
@@ -2665,8 +2645,7 @@ static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip)
 static void gen_interrupt(DisasContext *s, int intno,
                           target_ulong cur_eip, target_ulong next_eip)
 {
-    if (s->cc_op != CC_OP_DYNAMIC)
-        gen_op_set_cc_op(s->cc_op);
+    gen_update_cc_op(s);
     gen_jmp_im(cur_eip);
     gen_helper_raise_interrupt(cpu_env, tcg_const_i32(intno),
                                tcg_const_i32(next_eip - cur_eip));
@@ -2675,8 +2654,7 @@ static void gen_interrupt(DisasContext *s, int intno,
 
 static void gen_debug(DisasContext *s, target_ulong cur_eip)
 {
-    if (s->cc_op != CC_OP_DYNAMIC)
-        gen_op_set_cc_op(s->cc_op);
+    gen_update_cc_op(s);
     gen_jmp_im(cur_eip);
     gen_helper_debug(cpu_env);
     s->is_jmp = DISAS_TB_JUMP;
@@ -2686,8 +2664,7 @@ static void gen_debug(DisasContext *s, target_ulong cur_eip)
    if needed */
 static void gen_eob(DisasContext *s)
 {
-    if (s->cc_op != CC_OP_DYNAMIC)
-        gen_op_set_cc_op(s->cc_op);
+    gen_update_cc_op(s);
     if (s->tb->flags & HF_INHIBIT_IRQ_MASK) {
         gen_helper_reset_inhibit_irq(cpu_env);
     }
@@ -4695,8 +4672,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             gen_op_ldu_T0_A0(OT_WORD + s->mem_index);
         do_lcall:
             if (s->pe && !s->vm86) {
-                if (s->cc_op != CC_OP_DYNAMIC)
-                    gen_op_set_cc_op(s->cc_op);
+                gen_update_cc_op(s);
                 gen_jmp_im(pc_start - s->cs_base);
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
                 gen_helper_lcall_protected(cpu_env, cpu_tmp2_i32, cpu_T[1],
@@ -4722,8 +4698,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             gen_op_ldu_T0_A0(OT_WORD + s->mem_index);
         do_ljmp:
             if (s->pe && !s->vm86) {
-                if (s->cc_op != CC_OP_DYNAMIC)
-                    gen_op_set_cc_op(s->cc_op);
+                gen_update_cc_op(s);
                 gen_jmp_im(pc_start - s->cs_base);
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
                 gen_helper_ljmp_protected(cpu_env, cpu_tmp2_i32, cpu_T[1],
@@ -4967,8 +4942,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             if (!(s->cpuid_ext_features & CPUID_EXT_CX16))
                 goto illegal_op;
             gen_jmp_im(pc_start - s->cs_base);
-            if (s->cc_op != CC_OP_DYNAMIC)
-                gen_op_set_cc_op(s->cc_op);
+            gen_update_cc_op(s);
             gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
             gen_helper_cmpxchg16b(cpu_env, cpu_A0);
         } else
@@ -4977,8 +4951,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             if (!(s->cpuid_features & CPUID_CX8))
                 goto illegal_op;
             gen_jmp_im(pc_start - s->cs_base);
-            if (s->cc_op != CC_OP_DYNAMIC)
-                gen_op_set_cc_op(s->cc_op);
+            gen_update_cc_op(s);
             gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
             gen_helper_cmpxchg8b(cpu_env, cpu_A0);
         }
@@ -5651,8 +5624,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                 }
                 break;
             case 0x0c: /* fldenv mem */
-                if (s->cc_op != CC_OP_DYNAMIC)
-                    gen_op_set_cc_op(s->cc_op);
+                gen_update_cc_op(s);
                 gen_jmp_im(pc_start - s->cs_base);
                 gen_helper_fldenv(cpu_env, cpu_A0, tcg_const_i32(s->dflag));
                 break;
@@ -5662,8 +5634,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                 gen_helper_fldcw(cpu_env, cpu_tmp2_i32);
                 break;
             case 0x0e: /* fnstenv mem */
-                if (s->cc_op != CC_OP_DYNAMIC)
-                    gen_op_set_cc_op(s->cc_op);
+                gen_update_cc_op(s);
                 gen_jmp_im(pc_start - s->cs_base);
                 gen_helper_fstenv(cpu_env, cpu_A0, tcg_const_i32(s->dflag));
                 break;
@@ -5673,27 +5644,23 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                 gen_op_st_T0_A0(OT_WORD + s->mem_index);
                 break;
             case 0x1d: /* fldt mem */
-                if (s->cc_op != CC_OP_DYNAMIC)
-                    gen_op_set_cc_op(s->cc_op);
+                gen_update_cc_op(s);
                 gen_jmp_im(pc_start - s->cs_base);
                 gen_helper_fldt_ST0(cpu_env, cpu_A0);
                 break;
             case 0x1f: /* fstpt mem */
-                if (s->cc_op != CC_OP_DYNAMIC)
-                    gen_op_set_cc_op(s->cc_op);
+                gen_update_cc_op(s);
                 gen_jmp_im(pc_start - s->cs_base);
                 gen_helper_fstt_ST0(cpu_env, cpu_A0);
                 gen_helper_fpop(cpu_env);
                 break;
             case 0x2c: /* frstor mem */
-                if (s->cc_op != CC_OP_DYNAMIC)
-                    gen_op_set_cc_op(s->cc_op);
+                gen_update_cc_op(s);
                 gen_jmp_im(pc_start - s->cs_base);
                 gen_helper_frstor(cpu_env, cpu_A0, tcg_const_i32(s->dflag));
                 break;
             case 0x2e: /* fnsave mem */
-                if (s->cc_op != CC_OP_DYNAMIC)
-                    gen_op_set_cc_op(s->cc_op);
+                gen_update_cc_op(s);
                 gen_jmp_im(pc_start - s->cs_base);
                 gen_helper_fsave(cpu_env, cpu_A0, tcg_const_i32(s->dflag));
                 break;
@@ -5703,14 +5670,12 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                 gen_op_st_T0_A0(OT_WORD + s->mem_index);
                 break;
             case 0x3c: /* fbld */
-                if (s->cc_op != CC_OP_DYNAMIC)
-                    gen_op_set_cc_op(s->cc_op);
+                gen_update_cc_op(s);
                 gen_jmp_im(pc_start - s->cs_base);
                 gen_helper_fbld_ST0(cpu_env, cpu_A0);
                 break;
             case 0x3e: /* fbstp */
-                if (s->cc_op != CC_OP_DYNAMIC)
-                    gen_op_set_cc_op(s->cc_op);
+                gen_update_cc_op(s);
                 gen_jmp_im(pc_start - s->cs_base);
                 gen_helper_fbst_ST0(cpu_env, cpu_A0);
                 gen_helper_fpop(cpu_env);
@@ -5748,8 +5713,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                 switch(rm) {
                 case 0: /* fnop */
                     /* check exceptions (FreeBSD FPU probe) */
-                    if (s->cc_op != CC_OP_DYNAMIC)
-                        gen_op_set_cc_op(s->cc_op);
+                    gen_update_cc_op(s);
                     gen_jmp_im(pc_start - s->cs_base);
                     gen_helper_fwait(cpu_env);
                     break;
@@ -5930,15 +5894,13 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                 }
                 break;
             case 0x1d: /* fucomi */
-                if (s->cc_op != CC_OP_DYNAMIC)
-                    gen_op_set_cc_op(s->cc_op);
+                gen_update_cc_op(s);
                 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
                 gen_helper_fucomi_ST0_FT0(cpu_env);
                 set_cc_op(s, CC_OP_EFLAGS);
                 break;
             case 0x1e: /* fcomi */
-                if (s->cc_op != CC_OP_DYNAMIC)
-                    gen_op_set_cc_op(s->cc_op);
+                gen_update_cc_op(s);
                 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
                 gen_helper_fcomi_ST0_FT0(cpu_env);
                 set_cc_op(s, CC_OP_EFLAGS);
@@ -5993,16 +5955,14 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                 }
                 break;
             case 0x3d: /* fucomip */
-                if (s->cc_op != CC_OP_DYNAMIC)
-                    gen_op_set_cc_op(s->cc_op);
+                gen_update_cc_op(s);
                 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
                 gen_helper_fucomi_ST0_FT0(cpu_env);
                 gen_helper_fpop(cpu_env);
                 set_cc_op(s, CC_OP_EFLAGS);
                 break;
             case 0x3e: /* fcomip */
-                if (s->cc_op != CC_OP_DYNAMIC)
-                    gen_op_set_cc_op(s->cc_op);
+                gen_update_cc_op(s);
                 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
                 gen_helper_fcomi_ST0_FT0(cpu_env);
                 gen_helper_fpop(cpu_env);
@@ -6255,8 +6215,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         s->pc += 2;
     do_lret:
         if (s->pe && !s->vm86) {
-            if (s->cc_op != CC_OP_DYNAMIC)
-                gen_op_set_cc_op(s->cc_op);
+            gen_update_cc_op(s);
             gen_jmp_im(pc_start - s->cs_base);
             gen_helper_lret_protected(cpu_env, tcg_const_i32(s->dflag),
                                       tcg_const_i32(val));
@@ -6295,8 +6254,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                 set_cc_op(s, CC_OP_EFLAGS);
             }
         } else {
-            if (s->cc_op != CC_OP_DYNAMIC)
-                gen_op_set_cc_op(s->cc_op);
+            gen_update_cc_op(s);
             gen_jmp_im(pc_start - s->cs_base);
             gen_helper_iret_protected(cpu_env, tcg_const_i32(s->dflag),
                                       tcg_const_i32(s->pc - s->cs_base));
@@ -6434,8 +6392,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         if (s->vm86 && s->iopl != 3) {
             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
         } else {
-            if (s->cc_op != CC_OP_DYNAMIC)
-                gen_op_set_cc_op(s->cc_op);
+            gen_update_cc_op(s);
             gen_helper_read_eflags(cpu_T[0], cpu_env);
             gen_push_T0(s);
         }
@@ -6672,32 +6629,28 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
     case 0x27: /* daa */
         if (CODE64(s))
             goto illegal_op;
-        if (s->cc_op != CC_OP_DYNAMIC)
-            gen_op_set_cc_op(s->cc_op);
+        gen_update_cc_op(s);
         gen_helper_daa(cpu_env);
         set_cc_op(s, CC_OP_EFLAGS);
         break;
     case 0x2f: /* das */
         if (CODE64(s))
             goto illegal_op;
-        if (s->cc_op != CC_OP_DYNAMIC)
-            gen_op_set_cc_op(s->cc_op);
+        gen_update_cc_op(s);
         gen_helper_das(cpu_env);
         set_cc_op(s, CC_OP_EFLAGS);
         break;
     case 0x37: /* aaa */
         if (CODE64(s))
             goto illegal_op;
-        if (s->cc_op != CC_OP_DYNAMIC)
-            gen_op_set_cc_op(s->cc_op);
+        gen_update_cc_op(s);
         gen_helper_aaa(cpu_env);
         set_cc_op(s, CC_OP_EFLAGS);
         break;
     case 0x3f: /* aas */
         if (CODE64(s))
             goto illegal_op;
-        if (s->cc_op != CC_OP_DYNAMIC)
-            gen_op_set_cc_op(s->cc_op);
+        gen_update_cc_op(s);
         gen_helper_aas(cpu_env);
         set_cc_op(s, CC_OP_EFLAGS);
         break;
@@ -6739,8 +6692,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             (HF_MP_MASK | HF_TS_MASK)) {
             gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
         } else {
-            if (s->cc_op != CC_OP_DYNAMIC)
-                gen_op_set_cc_op(s->cc_op);
+            gen_update_cc_op(s);
             gen_jmp_im(pc_start - s->cs_base);
             gen_helper_fwait(cpu_env);
         }
@@ -6759,8 +6711,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
     case 0xce: /* into */
         if (CODE64(s))
             goto illegal_op;
-        if (s->cc_op != CC_OP_DYNAMIC)
-            gen_op_set_cc_op(s->cc_op);
+        gen_update_cc_op(s);
         gen_jmp_im(pc_start - s->cs_base);
         gen_helper_into(cpu_env, tcg_const_i32(s->pc - pc_start));
         break;
@@ -6906,8 +6857,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         if (s->cpl != 0) {
             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
         } else {
-            if (s->cc_op != CC_OP_DYNAMIC)
-                gen_op_set_cc_op(s->cc_op);
+            gen_update_cc_op(s);
             gen_jmp_im(pc_start - s->cs_base);
             if (b & 2) {
                 gen_helper_rdmsr(cpu_env);
@@ -6917,8 +6867,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         }
         break;
     case 0x131: /* rdtsc */
-        if (s->cc_op != CC_OP_DYNAMIC)
-            gen_op_set_cc_op(s->cc_op);
+        gen_update_cc_op(s);
         gen_jmp_im(pc_start - s->cs_base);
         if (use_icount)
             gen_io_start();
@@ -6929,8 +6878,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         }
         break;
     case 0x133: /* rdpmc */
-        if (s->cc_op != CC_OP_DYNAMIC)
-            gen_op_set_cc_op(s->cc_op);
+        gen_update_cc_op(s);
         gen_jmp_im(pc_start - s->cs_base);
         gen_helper_rdpmc(cpu_env);
         break;
@@ -6984,8 +6932,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         break;
 #endif
     case 0x1a2: /* cpuid */
-        if (s->cc_op != CC_OP_DYNAMIC)
-            gen_op_set_cc_op(s->cc_op);
+        gen_update_cc_op(s);
         gen_jmp_im(pc_start - s->cs_base);
         gen_helper_cpuid(cpu_env);
         break;
@@ -6993,8 +6940,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         if (s->cpl != 0) {
             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
         } else {
-            if (s->cc_op != CC_OP_DYNAMIC)
-                gen_op_set_cc_op(s->cc_op);
+            gen_update_cc_op(s);
             gen_jmp_im(pc_start - s->cs_base);
             gen_helper_hlt(cpu_env, tcg_const_i32(s->pc - pc_start));
             s->is_jmp = DISAS_TB_JUMP;
@@ -7056,8 +7002,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             if (!s->pe || s->vm86)
                 goto illegal_op;
             gen_ldst_modrm(env, s, modrm, OT_WORD, OR_TMP0, 0);
-            if (s->cc_op != CC_OP_DYNAMIC)
-                gen_op_set_cc_op(s->cc_op);
+            gen_update_cc_op(s);
             if (op == 4) {
                 gen_helper_verr(cpu_env, cpu_T[0]);
             } else {
@@ -7095,8 +7040,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                     if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) ||
                         s->cpl != 0)
                         goto illegal_op;
-                    if (s->cc_op != CC_OP_DYNAMIC)
-                        gen_op_set_cc_op(s->cc_op);
+                    gen_update_cc_op(s);
                     gen_jmp_im(pc_start - s->cs_base);
 #ifdef TARGET_X86_64
                     if (s->aflag == 2) {
@@ -7156,8 +7100,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         case 2: /* lgdt */
         case 3: /* lidt */
             if (mod == 3) {
-                if (s->cc_op != CC_OP_DYNAMIC)
-                    gen_op_set_cc_op(s->cc_op);
+                gen_update_cc_op(s);
                 gen_jmp_im(pc_start - s->cs_base);
                 switch(rm) {
                 case 0: /* VMRUN */
@@ -7285,8 +7228,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                 if (s->cpl != 0) {
                     gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
                 } else {
-                    if (s->cc_op != CC_OP_DYNAMIC)
-                        gen_op_set_cc_op(s->cc_op);
+                    gen_update_cc_op(s);
                     gen_jmp_im(pc_start - s->cs_base);
                     gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
                     gen_helper_invlpg(cpu_env, cpu_A0);
@@ -7319,8 +7261,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                 case 1: /* rdtscp */
                     if (!(s->cpuid_ext2_features & CPUID_EXT2_RDTSCP))
                         goto illegal_op;
-                    if (s->cc_op != CC_OP_DYNAMIC)
-                        gen_op_set_cc_op(s->cc_op);
+                    gen_update_cc_op(s);
                     gen_jmp_im(pc_start - s->cs_base);
                     if (use_icount)
                         gen_io_start();
@@ -7436,8 +7377,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             reg = ((modrm >> 3) & 7) | rex_r;
             gen_ldst_modrm(env, s, modrm, OT_WORD, OR_TMP0, 0);
             t0 = tcg_temp_local_new();
-            if (s->cc_op != CC_OP_DYNAMIC)
-                gen_op_set_cc_op(s->cc_op);
+            gen_update_cc_op(s);
             if (b == 0x102) {
                 gen_helper_lar(t0, cpu_env, cpu_T[0]);
             } else {
@@ -7502,8 +7442,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             case 3:
             case 4:
             case 8:
-                if (s->cc_op != CC_OP_DYNAMIC)
-                    gen_op_set_cc_op(s->cc_op);
+                gen_update_cc_op(s);
                 gen_jmp_im(pc_start - s->cs_base);
                 if (b & 2) {
                     gen_op_mov_TN_reg(ot, 0, rm);
@@ -7592,8 +7531,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                 break;
             }
             gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
-            if (s->cc_op != CC_OP_DYNAMIC)
-                gen_op_set_cc_op(s->cc_op);
+            gen_update_cc_op(s);
             gen_jmp_im(pc_start - s->cs_base);
             gen_helper_fxsave(cpu_env, cpu_A0, tcg_const_i32((s->dflag == 2)));
             break;
@@ -7606,8 +7544,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                 break;
             }
             gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
-            if (s->cc_op != CC_OP_DYNAMIC)
-                gen_op_set_cc_op(s->cc_op);
+            gen_update_cc_op(s);
             gen_jmp_im(pc_start - s->cs_base);
             gen_helper_fxrstor(cpu_env, cpu_A0,
                                tcg_const_i32((s->dflag == 2)));
commit e207582f6660e0e2d10a2e79e664e456e80b2887
Author: Richard Henderson <rth at twiddle.net>
Date:   Wed Jan 23 12:34:26 2013 -0800

    target-i386: Don't clobber s->cc_op in gen_update_cc_op
    
    Use a dirty flag to know whether env->cc_op is up to date,
    rather than forcing s->cc_op to DYNAMIC and losing info.
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>

diff --git a/target-i386/translate.c b/target-i386/translate.c
index 6df76d6..cabdeda 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -90,6 +90,7 @@ typedef struct DisasContext {
 #endif
     int ss32;   /* 32 bit stack segment */
     CCOp cc_op;  /* current CC operation */
+    bool cc_op_dirty;
     int addseg; /* non zero if either DS/ES/SS have a non zero base */
     int f_st;   /* currently unused */
     int vm86;   /* vm86 mode */
@@ -173,9 +174,27 @@ enum {
     OR_A0, /* temporary register used when doing address evaluation */
 };
 
-static inline void set_cc_op(DisasContext *s, CCOp op)
+static void set_cc_op(DisasContext *s, CCOp op)
 {
-    s->cc_op = op;
+    if (s->cc_op != op) {
+        s->cc_op = op;
+        /* The DYNAMIC setting is translator only, and should never be
+           stored.  Thus we always consider it clean.  */
+        s->cc_op_dirty = (op != CC_OP_DYNAMIC);
+    }
+}
+
+static inline void gen_op_set_cc_op(int32_t val)
+{
+    tcg_gen_movi_i32(cpu_cc_op, val);
+}
+
+static void gen_update_cc_op(DisasContext *s)
+{
+    if (s->cc_op_dirty) {
+        gen_op_set_cc_op(s->cc_op);
+        s->cc_op_dirty = false;
+    }
 }
 
 static inline void gen_op_movl_T0_0(void)
@@ -444,11 +463,6 @@ static inline void gen_op_add_reg_T0(int size, int reg)
     }
 }
 
-static inline void gen_op_set_cc_op(int32_t val)
-{
-    tcg_gen_movi_i32(cpu_cc_op, val);
-}
-
 static inline void gen_op_addl_A0_reg_sN(int shift, int reg)
 {
     tcg_gen_mov_tl(cpu_tmp0, cpu_regs[reg]);
@@ -800,14 +814,6 @@ static inline void gen_movs(DisasContext *s, int ot)
     gen_op_add_reg_T0(s->aflag, R_EDI);
 }
 
-static inline void gen_update_cc_op(DisasContext *s)
-{
-    if (s->cc_op != CC_OP_DYNAMIC) {
-        gen_op_set_cc_op(s->cc_op);
-        set_cc_op(s, CC_OP_DYNAMIC);
-    }
-}
-
 static void gen_op_update1_cc(void)
 {
     tcg_gen_discard_tl(cpu_cc_src);
@@ -7816,6 +7822,7 @@ static inline void gen_intermediate_code_internal(CPUX86State *env,
     dc->tf = (flags >> TF_SHIFT) & 1;
     dc->singlestep_enabled = env->singlestep_enabled;
     dc->cc_op = CC_OP_DYNAMIC;
+    dc->cc_op_dirty = false;
     dc->cs_base = cs_base;
     dc->tb = tb;
     dc->popl_esp_hack = 0;
commit 3ca51d07dae5b2d2301431c55b08d4faaad95d91
Author: Richard Henderson <rth at twiddle.net>
Date:   Wed Jan 23 12:30:52 2013 -0800

    target-i386: Introduce set_cc_op
    
    This will provide a good hook into which we can consolidate
    all of the cc variable discards.
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>

diff --git a/target-i386/translate.c b/target-i386/translate.c
index cf71878..6df76d6 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -173,6 +173,11 @@ enum {
     OR_A0, /* temporary register used when doing address evaluation */
 };
 
+static inline void set_cc_op(DisasContext *s, CCOp op)
+{
+    s->cc_op = op;
+}
+
 static inline void gen_op_movl_T0_0(void)
 {
     tcg_gen_movi_tl(cpu_T[0], 0);
@@ -799,7 +804,7 @@ static inline void gen_update_cc_op(DisasContext *s)
 {
     if (s->cc_op != CC_OP_DYNAMIC) {
         gen_op_set_cc_op(s->cc_op);
-        s->cc_op = CC_OP_DYNAMIC;
+        set_cc_op(s, CC_OP_DYNAMIC);
     }
 }
 
@@ -852,7 +857,7 @@ static void gen_compute_eflags(DisasContext *s, TCGv reg)
     gen_helper_cc_compute_all(cpu_tmp2_i32, cpu_env, cpu_cc_op);
     if (TCGV_EQUAL(reg, cpu_cc_src)) {
         tcg_gen_discard_tl(cpu_cc_dst);
-        s->cc_op = CC_OP_EFLAGS;
+        set_cc_op(s, CC_OP_EFLAGS);
     }
     tcg_gen_extu_i32_tl(reg, cpu_tmp2_i32);
 }
@@ -1120,7 +1125,7 @@ static inline void gen_scas(DisasContext *s, int ot)
     gen_op_cmpl_T0_T1_cc();
     gen_op_movl_T0_Dshift(ot);
     gen_op_add_reg_T0(s->aflag, R_EDI);
-    s->cc_op = CC_OP_SUBB + ot;
+    set_cc_op(s, CC_OP_SUBB + ot);
 }
 
 static inline void gen_cmps(DisasContext *s, int ot)
@@ -1133,7 +1138,7 @@ static inline void gen_cmps(DisasContext *s, int ot)
     gen_op_movl_T0_Dshift(ot);
     gen_op_add_reg_T0(s->aflag, R_ESI);
     gen_op_add_reg_T0(s->aflag, R_EDI);
-    s->cc_op = CC_OP_SUBB + ot;
+    set_cc_op(s, CC_OP_SUBB + ot);
 }
 
 static inline void gen_ins(DisasContext *s, int ot)
@@ -1209,7 +1214,7 @@ static inline void gen_repz_ ## op(DisasContext *s, int ot,                   \
     if (!s->jmp_opt)                                                          \
         gen_op_jz_ecx(s->aflag, l2);                                          \
     gen_jmp(s, cur_eip);                                                      \
-    s->cc_op = CC_OP_DYNAMIC;                                                 \
+    set_cc_op(s, CC_OP_DYNAMIC);                                              \
 }
 
 GEN_REPZ(movs)
@@ -1298,7 +1303,7 @@ static void gen_op(DisasContext *s1, int op, int ot, int d)
         tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_tmp4);
         tcg_gen_shli_i32(cpu_tmp2_i32, cpu_tmp2_i32, 2);
         tcg_gen_addi_i32(cpu_cc_op, cpu_tmp2_i32, CC_OP_ADDB + ot);
-        s1->cc_op = CC_OP_DYNAMIC;
+        set_cc_op(s1, CC_OP_DYNAMIC);
         break;
     case OP_SBBL:
         gen_compute_eflags_c(s1, cpu_tmp4);
@@ -1313,7 +1318,7 @@ static void gen_op(DisasContext *s1, int op, int ot, int d)
         tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_tmp4);
         tcg_gen_shli_i32(cpu_tmp2_i32, cpu_tmp2_i32, 2);
         tcg_gen_addi_i32(cpu_cc_op, cpu_tmp2_i32, CC_OP_SUBB + ot);
-        s1->cc_op = CC_OP_DYNAMIC;
+        set_cc_op(s1, CC_OP_DYNAMIC);
         break;
     case OP_ADDL:
         gen_op_addl_T0_T1();
@@ -1322,7 +1327,7 @@ static void gen_op(DisasContext *s1, int op, int ot, int d)
         else
             gen_op_st_T0_A0(ot + s1->mem_index);
         gen_op_update2_cc();
-        s1->cc_op = CC_OP_ADDB + ot;
+        set_cc_op(s1, CC_OP_ADDB + ot);
         break;
     case OP_SUBL:
         tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
@@ -1331,7 +1336,7 @@ static void gen_op(DisasContext *s1, int op, int ot, int d)
         else
             gen_op_st_T0_A0(ot + s1->mem_index);
         gen_op_update2_cc();
-        s1->cc_op = CC_OP_SUBB + ot;
+        set_cc_op(s1, CC_OP_SUBB + ot);
         break;
     default:
     case OP_ANDL:
@@ -1341,7 +1346,7 @@ static void gen_op(DisasContext *s1, int op, int ot, int d)
         else
             gen_op_st_T0_A0(ot + s1->mem_index);
         gen_op_update1_cc();
-        s1->cc_op = CC_OP_LOGICB + ot;
+        set_cc_op(s1, CC_OP_LOGICB + ot);
         break;
     case OP_ORL:
         tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
@@ -1350,7 +1355,7 @@ static void gen_op(DisasContext *s1, int op, int ot, int d)
         else
             gen_op_st_T0_A0(ot + s1->mem_index);
         gen_op_update1_cc();
-        s1->cc_op = CC_OP_LOGICB + ot;
+        set_cc_op(s1, CC_OP_LOGICB + ot);
         break;
     case OP_XORL:
         tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
@@ -1359,11 +1364,11 @@ static void gen_op(DisasContext *s1, int op, int ot, int d)
         else
             gen_op_st_T0_A0(ot + s1->mem_index);
         gen_op_update1_cc();
-        s1->cc_op = CC_OP_LOGICB + ot;
+        set_cc_op(s1, CC_OP_LOGICB + ot);
         break;
     case OP_CMPL:
         gen_op_cmpl_T0_T1_cc();
-        s1->cc_op = CC_OP_SUBB + ot;
+        set_cc_op(s1, CC_OP_SUBB + ot);
         break;
     }
 }
@@ -1378,10 +1383,10 @@ static void gen_inc(DisasContext *s1, int ot, int d, int c)
     gen_compute_eflags_c(s1, cpu_cc_src);
     if (c > 0) {
         tcg_gen_addi_tl(cpu_T[0], cpu_T[0], 1);
-        s1->cc_op = CC_OP_INCB + ot;
+        set_cc_op(s1, CC_OP_INCB + ot);
     } else {
         tcg_gen_addi_tl(cpu_T[0], cpu_T[0], -1);
-        s1->cc_op = CC_OP_DECB + ot;
+        set_cc_op(s1, CC_OP_DECB + ot);
     }
     if (d != OR_TMP0)
         gen_op_mov_reg_T0(ot, d);
@@ -1468,7 +1473,7 @@ static void gen_shift_rm_T1(DisasContext *s, int ot, int op1,
     }
 
     gen_set_label(shift_label);
-    s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */
+    set_cc_op(s, CC_OP_DYNAMIC); /* cannot predict flags after */
 
     tcg_temp_free(t0);
     tcg_temp_free(t1);
@@ -1519,10 +1524,7 @@ static void gen_shift_rm_im(DisasContext *s, int ot, int op1, int op2,
     if (op2 != 0) {
         tcg_gen_mov_tl(cpu_cc_src, cpu_tmp4);
         tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
-        if (is_right)
-            s->cc_op = CC_OP_SARB + ot;
-        else
-            s->cc_op = CC_OP_SHLB + ot;
+        set_cc_op(s, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
     }
 }
 
@@ -1875,7 +1877,7 @@ static void gen_shiftd_rm_T1_T3(DisasContext *s, int ot, int op1,
         tcg_gen_movi_i32(cpu_cc_op, CC_OP_SHLB + ot);
     }
     gen_set_label(label2);
-    s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */
+    set_cc_op(s, CC_OP_DYNAMIC); /* cannot predict flags after */
 
     tcg_temp_free(t0);
     tcg_temp_free(t1);
@@ -2292,7 +2294,7 @@ static inline void gen_jcc(DisasContext *s, int b,
     if (s->jmp_opt) {
         l1 = gen_new_label();
         gen_jcc1(s, b, l1);
-        s->cc_op = CC_OP_DYNAMIC;
+        set_cc_op(s, CC_OP_DYNAMIC);
         
         gen_goto_tb(s, 0, next_eip);
 
@@ -2304,7 +2306,7 @@ static inline void gen_jcc(DisasContext *s, int b,
         l1 = gen_new_label();
         l2 = gen_new_label();
         gen_jcc1(s, b, l1);
-        s->cc_op = CC_OP_DYNAMIC;
+        set_cc_op(s, CC_OP_DYNAMIC);
 
         gen_jmp_im(next_eip);
         tcg_gen_br(l2);
@@ -3792,8 +3794,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
             sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1);
 
-            if (b == 0x17)
-                s->cc_op = CC_OP_EFLAGS;
+            if (b == 0x17) {
+                set_cc_op(s, CC_OP_EFLAGS);
+            }
             break;
         case 0x338: /* crc32 */
         crc32:
@@ -3995,7 +3998,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             val = cpu_ldub_code(env, s->pc++);
 
             if ((b & 0xfc) == 0x60) { /* pcmpXstrX */
-                s->cc_op = CC_OP_EFLAGS;
+                set_cc_op(s, CC_OP_EFLAGS);
 
                 if (s->dflag == 2)
                     /* The helper must use entire 64-bit gp registers */
@@ -4116,7 +4119,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             break;
         }
         if (b == 0x2e || b == 0x2f) {
-            s->cc_op = CC_OP_EFLAGS;
+            set_cc_op(s, CC_OP_EFLAGS);
         }
     }
 }
@@ -4300,7 +4303,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                 xor_zero:
                     /* xor reg, reg optimisation */
                     gen_op_movl_T0_0();
-                    s->cc_op = CC_OP_LOGICB + ot;
+                    set_cc_op(s, CC_OP_LOGICB + ot);
                     gen_op_mov_reg_T0(ot, reg);
                     gen_op_update1_cc();
                     break;
@@ -4415,7 +4418,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             val = insn_get(env, s, ot);
             gen_op_movl_T1_im(val);
             gen_op_testl_T0_T1_cc();
-            s->cc_op = CC_OP_LOGICB + ot;
+            set_cc_op(s, CC_OP_LOGICB + ot);
             break;
         case 2: /* not */
             tcg_gen_not_tl(cpu_T[0], cpu_T[0]);
@@ -4433,7 +4436,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                 gen_op_mov_reg_T0(ot, rm);
             }
             gen_op_update_neg_cc();
-            s->cc_op = CC_OP_SUBB + ot;
+            set_cc_op(s, CC_OP_SUBB + ot);
             break;
         case 4: /* mul */
             switch(ot) {
@@ -4446,7 +4449,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                 gen_op_mov_reg_T0(OT_WORD, R_EAX);
                 tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
                 tcg_gen_andi_tl(cpu_cc_src, cpu_T[0], 0xff00);
-                s->cc_op = CC_OP_MULB;
+                set_cc_op(s, CC_OP_MULB);
                 break;
             case OT_WORD:
                 gen_op_mov_TN_reg(OT_WORD, 1, R_EAX);
@@ -4459,7 +4462,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                 tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 16);
                 gen_op_mov_reg_T0(OT_WORD, R_EDX);
                 tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]);
-                s->cc_op = CC_OP_MULW;
+                set_cc_op(s, CC_OP_MULW);
                 break;
             default:
             case OT_LONG:
@@ -4491,12 +4494,12 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                     tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]);
                 }
 #endif
-                s->cc_op = CC_OP_MULL;
+                set_cc_op(s, CC_OP_MULL);
                 break;
 #ifdef TARGET_X86_64
             case OT_QUAD:
                 gen_helper_mulq_EAX_T0(cpu_env, cpu_T[0]);
-                s->cc_op = CC_OP_MULQ;
+                set_cc_op(s, CC_OP_MULQ);
                 break;
 #endif
             }
@@ -4513,7 +4516,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                 tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
                 tcg_gen_ext8s_tl(cpu_tmp0, cpu_T[0]);
                 tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
-                s->cc_op = CC_OP_MULB;
+                set_cc_op(s, CC_OP_MULB);
                 break;
             case OT_WORD:
                 gen_op_mov_TN_reg(OT_WORD, 1, R_EAX);
@@ -4527,7 +4530,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                 tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
                 tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 16);
                 gen_op_mov_reg_T0(OT_WORD, R_EDX);
-                s->cc_op = CC_OP_MULW;
+                set_cc_op(s, CC_OP_MULW);
                 break;
             default:
             case OT_LONG:
@@ -4561,12 +4564,12 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                     tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
                 }
 #endif
-                s->cc_op = CC_OP_MULL;
+                set_cc_op(s, CC_OP_MULL);
                 break;
 #ifdef TARGET_X86_64
             case OT_QUAD:
                 gen_helper_imulq_EAX_T0(cpu_env, cpu_T[0]);
-                s->cc_op = CC_OP_MULQ;
+                set_cc_op(s, CC_OP_MULQ);
                 break;
 #endif
             }
@@ -4747,7 +4750,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
         gen_op_mov_TN_reg(ot, 1, reg);
         gen_op_testl_T0_T1_cc();
-        s->cc_op = CC_OP_LOGICB + ot;
+        set_cc_op(s, CC_OP_LOGICB + ot);
         break;
 
     case 0xa8: /* test eAX, Iv */
@@ -4761,7 +4764,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         gen_op_mov_TN_reg(ot, 0, OR_EAX);
         gen_op_movl_T1_im(val);
         gen_op_testl_T0_T1_cc();
-        s->cc_op = CC_OP_LOGICB + ot;
+        set_cc_op(s, CC_OP_LOGICB + ot);
         break;
 
     case 0x98: /* CWDE/CBW */
@@ -4862,7 +4865,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
         }
         gen_op_mov_reg_T0(ot, reg);
-        s->cc_op = CC_OP_MULB + ot;
+        set_cc_op(s, CC_OP_MULB + ot);
         break;
     case 0x1c0:
     case 0x1c1: /* xadd Ev, Gv */
@@ -4889,7 +4892,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             gen_op_mov_reg_T1(ot, reg);
         }
         gen_op_update2_cc();
-        s->cc_op = CC_OP_ADDB + ot;
+        set_cc_op(s, CC_OP_ADDB + ot);
         break;
     case 0x1b0:
     case 0x1b1: /* cmpxchg Ev, Gv */
@@ -4941,7 +4944,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             gen_set_label(label2);
             tcg_gen_mov_tl(cpu_cc_src, t0);
             tcg_gen_mov_tl(cpu_cc_dst, t2);
-            s->cc_op = CC_OP_SUBB + ot;
+            set_cc_op(s, CC_OP_SUBB + ot);
             tcg_temp_free(t0);
             tcg_temp_free(t1);
             tcg_temp_free(t2);
@@ -4973,7 +4976,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
             gen_helper_cmpxchg8b(cpu_env, cpu_A0);
         }
-        s->cc_op = CC_OP_EFLAGS;
+        set_cc_op(s, CC_OP_EFLAGS);
         break;
 
         /**************************/
@@ -5925,14 +5928,14 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                     gen_op_set_cc_op(s->cc_op);
                 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
                 gen_helper_fucomi_ST0_FT0(cpu_env);
-                s->cc_op = CC_OP_EFLAGS;
+                set_cc_op(s, CC_OP_EFLAGS);
                 break;
             case 0x1e: /* fcomi */
                 if (s->cc_op != CC_OP_DYNAMIC)
                     gen_op_set_cc_op(s->cc_op);
                 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
                 gen_helper_fcomi_ST0_FT0(cpu_env);
-                s->cc_op = CC_OP_EFLAGS;
+                set_cc_op(s, CC_OP_EFLAGS);
                 break;
             case 0x28: /* ffree sti */
                 gen_helper_ffree_STN(cpu_env, tcg_const_i32(opreg));
@@ -5989,7 +5992,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
                 gen_helper_fucomi_ST0_FT0(cpu_env);
                 gen_helper_fpop(cpu_env);
-                s->cc_op = CC_OP_EFLAGS;
+                set_cc_op(s, CC_OP_EFLAGS);
                 break;
             case 0x3e: /* fcomip */
                 if (s->cc_op != CC_OP_DYNAMIC)
@@ -5997,7 +6000,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
                 gen_helper_fcomi_ST0_FT0(cpu_env);
                 gen_helper_fpop(cpu_env);
-                s->cc_op = CC_OP_EFLAGS;
+                set_cc_op(s, CC_OP_EFLAGS);
                 break;
             case 0x10 ... 0x13: /* fcmovxx */
             case 0x18 ... 0x1b:
@@ -6277,13 +6280,13 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         if (!s->pe) {
             /* real mode */
             gen_helper_iret_real(cpu_env, tcg_const_i32(s->dflag));
-            s->cc_op = CC_OP_EFLAGS;
+            set_cc_op(s, CC_OP_EFLAGS);
         } else if (s->vm86) {
             if (s->iopl != 3) {
                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
             } else {
                 gen_helper_iret_real(cpu_env, tcg_const_i32(s->dflag));
-                s->cc_op = CC_OP_EFLAGS;
+                set_cc_op(s, CC_OP_EFLAGS);
             }
         } else {
             if (s->cc_op != CC_OP_DYNAMIC)
@@ -6291,7 +6294,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             gen_jmp_im(pc_start - s->cs_base);
             gen_helper_iret_protected(cpu_env, tcg_const_i32(s->dflag),
                                       tcg_const_i32(s->pc - s->cs_base));
-            s->cc_op = CC_OP_EFLAGS;
+            set_cc_op(s, CC_OP_EFLAGS);
         }
         gen_eob(s);
         break;
@@ -6483,7 +6486,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                 }
             }
             gen_pop_update(s);
-            s->cc_op = CC_OP_EFLAGS;
+            set_cc_op(s, CC_OP_EFLAGS);
             /* abort translation because TF/AC flag may change */
             gen_jmp_im(s->pc - s->cs_base);
             gen_eob(s);
@@ -6606,7 +6609,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
             break;
         }
-        s->cc_op = CC_OP_SARB + ot;
+        set_cc_op(s, CC_OP_SARB + ot);
         if (op != 0) {
             if (mod != 3)
                 gen_op_st_T0_A0(ot + s->mem_index);
@@ -6653,7 +6656,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                 tcg_gen_movi_tl(cpu_cc_dst, 1);
                 gen_set_label(label1);
                 tcg_gen_discard_tl(cpu_cc_src);
-                s->cc_op = CC_OP_LOGICB + ot;
+                set_cc_op(s, CC_OP_LOGICB + ot);
             }
             tcg_temp_free(t0);
         }
@@ -6666,7 +6669,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         if (s->cc_op != CC_OP_DYNAMIC)
             gen_op_set_cc_op(s->cc_op);
         gen_helper_daa(cpu_env);
-        s->cc_op = CC_OP_EFLAGS;
+        set_cc_op(s, CC_OP_EFLAGS);
         break;
     case 0x2f: /* das */
         if (CODE64(s))
@@ -6674,7 +6677,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         if (s->cc_op != CC_OP_DYNAMIC)
             gen_op_set_cc_op(s->cc_op);
         gen_helper_das(cpu_env);
-        s->cc_op = CC_OP_EFLAGS;
+        set_cc_op(s, CC_OP_EFLAGS);
         break;
     case 0x37: /* aaa */
         if (CODE64(s))
@@ -6682,7 +6685,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         if (s->cc_op != CC_OP_DYNAMIC)
             gen_op_set_cc_op(s->cc_op);
         gen_helper_aaa(cpu_env);
-        s->cc_op = CC_OP_EFLAGS;
+        set_cc_op(s, CC_OP_EFLAGS);
         break;
     case 0x3f: /* aas */
         if (CODE64(s))
@@ -6690,7 +6693,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         if (s->cc_op != CC_OP_DYNAMIC)
             gen_op_set_cc_op(s->cc_op);
         gen_helper_aas(cpu_env);
-        s->cc_op = CC_OP_EFLAGS;
+        set_cc_op(s, CC_OP_EFLAGS);
         break;
     case 0xd4: /* aam */
         if (CODE64(s))
@@ -6700,7 +6703,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             gen_exception(s, EXCP00_DIVZ, pc_start - s->cs_base);
         } else {
             gen_helper_aam(cpu_env, tcg_const_i32(val));
-            s->cc_op = CC_OP_LOGICB;
+            set_cc_op(s, CC_OP_LOGICB);
         }
         break;
     case 0xd5: /* aad */
@@ -6708,7 +6711,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             goto illegal_op;
         val = cpu_ldub_code(env, s->pc++);
         gen_helper_aad(cpu_env, tcg_const_i32(val));
-        s->cc_op = CC_OP_LOGICB;
+        set_cc_op(s, CC_OP_LOGICB);
         break;
         /************************/
         /* misc */
@@ -6967,8 +6970,9 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             gen_jmp_im(pc_start - s->cs_base);
             gen_helper_sysret(cpu_env, tcg_const_i32(s->dflag));
             /* condition codes are modified only in long mode */
-            if (s->lma)
-                s->cc_op = CC_OP_EFLAGS;
+            if (s->lma) {
+                set_cc_op(s, CC_OP_EFLAGS);
+            }
             gen_eob(s);
         }
         break;
@@ -7053,7 +7057,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             } else {
                 gen_helper_verw(cpu_env, cpu_T[0]);
             }
-            s->cc_op = CC_OP_EFLAGS;
+            set_cc_op(s, CC_OP_EFLAGS);
             break;
         default:
             goto illegal_op;
@@ -7438,7 +7442,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_tmp0, 0, label1);
             gen_op_mov_reg_v(ot, reg, t0);
             gen_set_label(label1);
-            s->cc_op = CC_OP_EFLAGS;
+            set_cc_op(s, CC_OP_EFLAGS);
             tcg_temp_free(t0);
         }
         break;
@@ -7681,7 +7685,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         gen_helper_popcnt(cpu_T[0], cpu_env, cpu_T[0], tcg_const_i32(ot));
         gen_op_mov_reg_T0(ot, reg);
 
-        s->cc_op = CC_OP_EFLAGS;
+        set_cc_op(s, CC_OP_EFLAGS);
         break;
     case 0x10e ... 0x10f:
         /* 3DNow! instructions, ignore prefixes */
commit fee71888a29ab9f31b23386383812a4f5c953829
Author: Richard Henderson <rth at twiddle.net>
Date:   Wed Jan 16 16:23:46 2013 -0800

    target-i386: Name the cc_op enumeration
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>

diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index cd35cd5..8c4c605 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -582,7 +582,7 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS];
 #define CPU_INTERRUPT_TPR       CPU_INTERRUPT_TGT_INT_3
 
 
-enum {
+typedef enum {
     CC_OP_DYNAMIC, /* must use dynamic code to get cc_op */
     CC_OP_EFLAGS,  /* all cc are explicitly computed, CC_SRC = flags */
 
@@ -637,7 +637,7 @@ enum {
     CC_OP_SARQ,
 
     CC_OP_NB,
-};
+} CCOp;
 
 typedef struct SegmentCache {
     uint32_t selector;
diff --git a/target-i386/translate.c b/target-i386/translate.c
index 89f2908..cf71878 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -89,7 +89,7 @@ typedef struct DisasContext {
     int rex_x, rex_b;
 #endif
     int ss32;   /* 32 bit stack segment */
-    int cc_op;  /* current CC operation */
+    CCOp cc_op;  /* current CC operation */
     int addseg; /* non zero if either DS/ES/SS have a non zero base */
     int f_st;   /* currently unused */
     int vm86;   /* vm86 mode */
commit c7b3c87397a3458d3d26499c483e0badaf79849c
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Fri Oct 5 18:29:21 2012 +0200

    target-i386: factor gen_op_set_cc_op/tcg_gen_discard_tl around computing flags
    
    Before computing flags we need to store the cc_op to memory.  Move this
    to gen_compute_eflags_c and gen_compute_eflags rather than doing it all
    over the place.
    
    Alo, after computing the flags in cpu_cc_src we are in EFLAGS mode.
    Set s->cc_op and discard cpu_cc_dst in gen_compute_eflags, rather than
    doing it all over the place.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Richard Henderson <rth at twiddle.net>

diff --git a/target-i386/translate.c b/target-i386/translate.c
index 6fcd0f6..89f2908 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -834,55 +834,63 @@ static void gen_op_update_neg_cc(void)
 }
 
 /* compute eflags.C to reg */
-static void gen_compute_eflags_c(TCGv reg)
+static void gen_compute_eflags_c(DisasContext *s, TCGv reg)
 {
+    if (s->cc_op != CC_OP_DYNAMIC) {
+        gen_op_set_cc_op(s->cc_op);
+    }
     gen_helper_cc_compute_c(cpu_tmp2_i32, cpu_env, cpu_cc_op);
     tcg_gen_extu_i32_tl(reg, cpu_tmp2_i32);
 }
 
-/* compute all eflags to cc_src */
-static void gen_compute_eflags(TCGv reg)
+/* compute all eflags to reg */
+static void gen_compute_eflags(DisasContext *s, TCGv reg)
 {
+    if (s->cc_op != CC_OP_DYNAMIC) {
+        gen_op_set_cc_op(s->cc_op);
+    }
     gen_helper_cc_compute_all(cpu_tmp2_i32, cpu_env, cpu_cc_op);
+    if (TCGV_EQUAL(reg, cpu_cc_src)) {
+        tcg_gen_discard_tl(cpu_cc_dst);
+        s->cc_op = CC_OP_EFLAGS;
+    }
     tcg_gen_extu_i32_tl(reg, cpu_tmp2_i32);
 }
 
 static inline void gen_setcc_slow_T0(DisasContext *s, int jcc_op)
 {
-    if (s->cc_op != CC_OP_DYNAMIC)
-        gen_op_set_cc_op(s->cc_op);
     switch(jcc_op) {
     case JCC_O:
-        gen_compute_eflags(cpu_T[0]);
+        gen_compute_eflags(s, cpu_T[0]);
         tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 11);
         tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
         break;
     case JCC_B:
-        gen_compute_eflags_c(cpu_T[0]);
+        gen_compute_eflags_c(s, cpu_T[0]);
         break;
     case JCC_Z:
-        gen_compute_eflags(cpu_T[0]);
+        gen_compute_eflags(s, cpu_T[0]);
         tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 6);
         tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
         break;
     case JCC_BE:
-        gen_compute_eflags(cpu_tmp0);
+        gen_compute_eflags(s, cpu_tmp0);
         tcg_gen_shri_tl(cpu_T[0], cpu_tmp0, 6);
         tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
         tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
         break;
     case JCC_S:
-        gen_compute_eflags(cpu_T[0]);
+        gen_compute_eflags(s, cpu_T[0]);
         tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 7);
         tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
         break;
     case JCC_P:
-        gen_compute_eflags(cpu_T[0]);
+        gen_compute_eflags(s, cpu_T[0]);
         tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 2);
         tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
         break;
     case JCC_L:
-        gen_compute_eflags(cpu_tmp0);
+        gen_compute_eflags(s, cpu_tmp0);
         tcg_gen_shri_tl(cpu_T[0], cpu_tmp0, 11); /* CC_O */
         tcg_gen_shri_tl(cpu_tmp0, cpu_tmp0, 7); /* CC_S */
         tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
@@ -890,7 +898,7 @@ static inline void gen_setcc_slow_T0(DisasContext *s, int jcc_op)
         break;
     default:
     case JCC_LE:
-        gen_compute_eflags(cpu_tmp0);
+        gen_compute_eflags(s, cpu_tmp0);
         tcg_gen_shri_tl(cpu_T[0], cpu_tmp0, 11); /* CC_O */
         tcg_gen_shri_tl(cpu_tmp4, cpu_tmp0, 7); /* CC_S */
         tcg_gen_shri_tl(cpu_tmp0, cpu_tmp0, 6); /* CC_Z */
@@ -1278,9 +1286,7 @@ static void gen_op(DisasContext *s1, int op, int ot, int d)
     }
     switch(op) {
     case OP_ADCL:
-        if (s1->cc_op != CC_OP_DYNAMIC)
-            gen_op_set_cc_op(s1->cc_op);
-        gen_compute_eflags_c(cpu_tmp4);
+        gen_compute_eflags_c(s1, cpu_tmp4);
         tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
         tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_tmp4);
         if (d != OR_TMP0)
@@ -1295,9 +1301,7 @@ static void gen_op(DisasContext *s1, int op, int ot, int d)
         s1->cc_op = CC_OP_DYNAMIC;
         break;
     case OP_SBBL:
-        if (s1->cc_op != CC_OP_DYNAMIC)
-            gen_op_set_cc_op(s1->cc_op);
-        gen_compute_eflags_c(cpu_tmp4);
+        gen_compute_eflags_c(s1, cpu_tmp4);
         tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
         tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_tmp4);
         if (d != OR_TMP0)
@@ -1371,9 +1375,7 @@ static void gen_inc(DisasContext *s1, int ot, int d, int c)
         gen_op_mov_TN_reg(ot, 0, d);
     else
         gen_op_ld_T0_A0(ot + s1->mem_index);
-    if (s1->cc_op != CC_OP_DYNAMIC)
-        gen_op_set_cc_op(s1->cc_op);
-    gen_compute_eflags_c(cpu_cc_src);
+    gen_compute_eflags_c(s1, cpu_cc_src);
     if (c > 0) {
         tcg_gen_addi_tl(cpu_T[0], cpu_T[0], 1);
         s1->cc_op = CC_OP_INCB + ot;
@@ -1598,11 +1600,8 @@ static void gen_rot_rm_T1(DisasContext *s, int ot, int op1,
     }
     
     /* update eflags.  It is needed anyway most of the time, do it always.  */
-    if (s->cc_op != CC_OP_DYNAMIC)
-        gen_op_set_cc_op(s->cc_op);
-    gen_compute_eflags(cpu_cc_src);
-    tcg_gen_discard_tl(cpu_cc_dst);
-    s->cc_op = CC_OP_EFLAGS;
+    gen_compute_eflags(s, cpu_cc_src);
+    assert(s->cc_op == CC_OP_EFLAGS);
 
     label2 = gen_new_label();
     tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, label2);
@@ -1678,12 +1677,8 @@ static void gen_rot_rm_im(DisasContext *s, int ot, int op1, int op2,
 
     if (op2 != 0) {
         /* update eflags */
-        if (s->cc_op != CC_OP_DYNAMIC)
-            gen_op_set_cc_op(s->cc_op);
-
-        gen_compute_eflags(cpu_cc_src);
-        tcg_gen_discard_tl(cpu_cc_dst);
-        s->cc_op = CC_OP_EFLAGS;
+        gen_compute_eflags(s, cpu_cc_src);
+        assert(s->cc_op == CC_OP_EFLAGS);
 
         tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~(CC_O | CC_C));
         tcg_gen_xor_tl(cpu_tmp0, t1, t0);
@@ -1708,9 +1703,8 @@ static void gen_rotc_rm_T1(DisasContext *s, int ot, int op1,
 {
     if (s->cc_op != CC_OP_DYNAMIC)
         gen_op_set_cc_op(s->cc_op);
-    gen_compute_eflags(cpu_cc_src);
-    tcg_gen_discard_tl(cpu_cc_dst);
-    s->cc_op = CC_OP_EFLAGS;
+    gen_compute_eflags(s, cpu_cc_src);
+    assert(s->cc_op == CC_OP_EFLAGS);
 
     /* load */
     if (op1 == OR_TMP0)
@@ -6499,12 +6493,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
             goto illegal_op;
         gen_op_mov_TN_reg(OT_BYTE, 0, R_AH);
-        if (s->cc_op != CC_OP_DYNAMIC)
-            gen_op_set_cc_op(s->cc_op);
-        gen_compute_eflags(cpu_cc_src);
-        tcg_gen_discard_tl(cpu_cc_dst);
-        s->cc_op = CC_OP_EFLAGS;
-
+        gen_compute_eflags(s, cpu_cc_src);
         tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, CC_O);
         tcg_gen_andi_tl(cpu_T[0], cpu_T[0], CC_S | CC_Z | CC_A | CC_P | CC_C);
         tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, cpu_T[0]);
@@ -6512,33 +6501,22 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
     case 0x9f: /* lahf */
         if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
             goto illegal_op;
-        if (s->cc_op != CC_OP_DYNAMIC)
-            gen_op_set_cc_op(s->cc_op);
-        gen_compute_eflags(cpu_T[0]);
+        gen_compute_eflags(s, cpu_T[0]);
         /* Note: gen_compute_eflags() only gives the condition codes */
         tcg_gen_ori_tl(cpu_T[0], cpu_T[0], 0x02);
         gen_op_mov_reg_T0(OT_BYTE, R_AH);
         break;
     case 0xf5: /* cmc */
-        if (s->cc_op != CC_OP_DYNAMIC)
-            gen_op_set_cc_op(s->cc_op);
-        gen_compute_eflags(cpu_cc_src);
+        gen_compute_eflags(s, cpu_cc_src);
         tcg_gen_xori_tl(cpu_cc_src, cpu_cc_src, CC_C);
-        s->cc_op = CC_OP_EFLAGS;
         break;
     case 0xf8: /* clc */
-        if (s->cc_op != CC_OP_DYNAMIC)
-            gen_op_set_cc_op(s->cc_op);
-        gen_compute_eflags(cpu_cc_src);
+        gen_compute_eflags(s, cpu_cc_src);
         tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_C);
-        s->cc_op = CC_OP_EFLAGS;
         break;
     case 0xf9: /* stc */
-        if (s->cc_op != CC_OP_DYNAMIC)
-            gen_op_set_cc_op(s->cc_op);
-        gen_compute_eflags(cpu_cc_src);
+        gen_compute_eflags(s, cpu_cc_src);
         tcg_gen_ori_tl(cpu_cc_src, cpu_cc_src, CC_C);
-        s->cc_op = CC_OP_EFLAGS;
         break;
     case 0xfc: /* cld */
         tcg_gen_movi_i32(cpu_tmp2_i32, 1);
@@ -6866,9 +6844,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
     case 0xd6: /* salc */
         if (CODE64(s))
             goto illegal_op;
-        if (s->cc_op != CC_OP_DYNAMIC)
-            gen_op_set_cc_op(s->cc_op);
-        gen_compute_eflags_c(cpu_T[0]);
+        gen_compute_eflags_c(s, cpu_T[0]);
         tcg_gen_neg_tl(cpu_T[0], cpu_T[0]);
         gen_op_mov_reg_T0(OT_BYTE, R_EAX);
         break;
@@ -6892,8 +6868,6 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             switch(b) {
             case 0: /* loopnz */
             case 1: /* loopz */
-                if (s->cc_op != CC_OP_DYNAMIC)
-                    gen_op_set_cc_op(s->cc_op);
                 gen_op_add_reg_im(s->aflag, R_ECX, -1);
                 gen_op_jz_ecx(s->aflag, l3);
                 gen_jcc1(s, (JCC_Z << 1) | (b ^ 1), l1);
@@ -7432,12 +7406,9 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
            } else {
                 gen_op_mov_reg_v(ot, rm, t0);
             }
-            if (s->cc_op != CC_OP_DYNAMIC)
-                gen_op_set_cc_op(s->cc_op);
-            gen_compute_eflags(cpu_cc_src);
+            gen_compute_eflags(s, cpu_cc_src);
             tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_Z);
             tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t2);
-            s->cc_op = CC_OP_EFLAGS;
             tcg_temp_free(t0);
             tcg_temp_free(t1);
             tcg_temp_free(t2);
commit 5bdb91b0dd66b7e0fdfc801601c433ad4752aeb0
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Fri Oct 12 13:35:40 2012 +0200

    target-i386: use gen_jcc1 to compile loopz
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Richard Henderson <rth at twiddle.net>

diff --git a/target-i386/translate.c b/target-i386/translate.c
index 64564e0..6fcd0f6 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -6896,13 +6896,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                     gen_op_set_cc_op(s->cc_op);
                 gen_op_add_reg_im(s->aflag, R_ECX, -1);
                 gen_op_jz_ecx(s->aflag, l3);
-                gen_compute_eflags(cpu_tmp0);
-                tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, CC_Z);
-                if (b == 0) {
-                    tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_tmp0, 0, l1);
-                } else {
-                    tcg_gen_brcondi_tl(TCG_COND_NE, cpu_tmp0, 0, l1);
-                }
+                gen_jcc1(s, (JCC_Z << 1) | (b ^ 1), l1);
                 break;
             case 2: /* loop */
                 gen_op_add_reg_im(s->aflag, R_ECX, -1);
commit 6fa38ed219587723fcab9b878f42269489d51705
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Sat Oct 6 00:18:55 2012 +0200

    target-i386: clean up sahf
    
    Discard CC_DST and set s->cc_op immediately after computing EFLAGS.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Richard Henderson <rth at twiddle.net>

diff --git a/target-i386/translate.c b/target-i386/translate.c
index 80483c0..64564e0 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -6502,10 +6502,12 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         if (s->cc_op != CC_OP_DYNAMIC)
             gen_op_set_cc_op(s->cc_op);
         gen_compute_eflags(cpu_cc_src);
+        tcg_gen_discard_tl(cpu_cc_dst);
+        s->cc_op = CC_OP_EFLAGS;
+
         tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, CC_O);
         tcg_gen_andi_tl(cpu_T[0], cpu_T[0], CC_S | CC_Z | CC_A | CC_P | CC_C);
         tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, cpu_T[0]);
-        s->cc_op = CC_OP_EFLAGS;
         break;
     case 0x9f: /* lahf */
         if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
commit f5847c912d62d60a9917ed1e88cd6d4548fd40f3
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Sat Oct 6 00:18:55 2012 +0200

    target-i386: compute eflags outside rcl/rcr helper
    
    Always compute EFLAGS first since it is needed whenever
    the shift is non-zero, i.e. most of the time.  This makes it possible
    to remove some writes of CC_OP_EFLAGS to cpu_cc_op and more importantly
    removes cases where s->cc_op becomes CC_OP_DYNAMIC.  Also, we can
    remove cc_tmp and just modify cc_src from within the helper.
    
    Finally, always follow gen_compute_eflags(cpu_cc_src) by setting s->cc_op
    and discarding cpu_cc_dst.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Richard Henderson <rth at twiddle.net>

diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index 7577e4f..cd35cd5 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -764,7 +764,6 @@ typedef struct CPUX86State {
     XMMReg xmm_regs[CPU_NB_REGS];
     XMMReg xmm_t0;
     MMXReg mmx_t0;
-    target_ulong cc_tmp; /* temporary for rcr/rcl */
 
     /* sysenter registers */
     uint32_t sysenter_cs;
diff --git a/target-i386/shift_helper_template.h b/target-i386/shift_helper_template.h
index dda0da3..cf91a2d 100644
--- a/target-i386/shift_helper_template.h
+++ b/target-i386/shift_helper_template.h
@@ -55,7 +55,7 @@ target_ulong glue(helper_rcl, SUFFIX)(CPUX86State *env, target_ulong t0,
     count = rclb_table[count];
 #endif
     if (count) {
-        eflags = helper_cc_compute_all(env, CC_OP);
+        eflags = env->cc_src;
         t0 &= DATA_MASK;
         src = t0;
         res = (t0 << count) | ((target_ulong)(eflags & CC_C) << (count - 1));
@@ -63,11 +63,9 @@ target_ulong glue(helper_rcl, SUFFIX)(CPUX86State *env, target_ulong t0,
             res |= t0 >> (DATA_BITS + 1 - count);
         }
         t0 = res;
-        env->cc_tmp = (eflags & ~(CC_C | CC_O)) |
+        env->cc_src = (eflags & ~(CC_C | CC_O)) |
             (lshift(src ^ t0, 11 - (DATA_BITS - 1)) & CC_O) |
             ((src >> (DATA_BITS - count)) & CC_C);
-    } else {
-        env->cc_tmp = -1;
     }
     return t0;
 }
@@ -86,7 +84,7 @@ target_ulong glue(helper_rcr, SUFFIX)(CPUX86State *env, target_ulong t0,
     count = rclb_table[count];
 #endif
     if (count) {
-        eflags = helper_cc_compute_all(env, CC_OP);
+        eflags = env->cc_src;
         t0 &= DATA_MASK;
         src = t0;
         res = (t0 >> count) |
@@ -95,11 +93,9 @@ target_ulong glue(helper_rcr, SUFFIX)(CPUX86State *env, target_ulong t0,
             res |= t0 << (DATA_BITS + 1 - count);
         }
         t0 = res;
-        env->cc_tmp = (eflags & ~(CC_C | CC_O)) |
+        env->cc_src = (eflags & ~(CC_C | CC_O)) |
             (lshift(src ^ t0, 11 - (DATA_BITS - 1)) & CC_O) |
             ((src >> (count - 1)) & CC_C);
-    } else {
-        env->cc_tmp = -1;
     }
     return t0;
 }
diff --git a/target-i386/translate.c b/target-i386/translate.c
index 0970954..80483c0 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -51,7 +51,7 @@
 
 /* global register indexes */
 static TCGv_ptr cpu_env;
-static TCGv cpu_A0, cpu_cc_src, cpu_cc_dst, cpu_cc_tmp;
+static TCGv cpu_A0, cpu_cc_src, cpu_cc_dst;
 static TCGv_i32 cpu_cc_op;
 static TCGv cpu_regs[CPU_NB_REGS];
 /* local temps */
@@ -1706,10 +1706,11 @@ static void gen_rot_rm_im(DisasContext *s, int ot, int op1, int op2,
 static void gen_rotc_rm_T1(DisasContext *s, int ot, int op1, 
                            int is_right)
 {
-    int label1;
-
     if (s->cc_op != CC_OP_DYNAMIC)
         gen_op_set_cc_op(s->cc_op);
+    gen_compute_eflags(cpu_cc_src);
+    tcg_gen_discard_tl(cpu_cc_dst);
+    s->cc_op = CC_OP_EFLAGS;
 
     /* load */
     if (op1 == OR_TMP0)
@@ -1757,17 +1758,6 @@ static void gen_rotc_rm_T1(DisasContext *s, int ot, int op1,
         gen_op_st_T0_A0(ot + s->mem_index);
     else
         gen_op_mov_reg_T0(ot, op1);
-
-    /* update eflags */
-    label1 = gen_new_label();
-    tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_cc_tmp, -1, label1);
-
-    tcg_gen_mov_tl(cpu_cc_src, cpu_cc_tmp);
-    tcg_gen_discard_tl(cpu_cc_dst);
-    tcg_gen_movi_i32(cpu_cc_op, CC_OP_EFLAGS);
-        
-    gen_set_label(label1);
-    s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */
 }
 
 /* XXX: add faster immediate case */
@@ -7763,8 +7753,6 @@ void optimize_flags_init(void)
                                     "cc_src");
     cpu_cc_dst = tcg_global_mem_new(TCG_AREG0, offsetof(CPUX86State, cc_dst),
                                     "cc_dst");
-    cpu_cc_tmp = tcg_global_mem_new(TCG_AREG0, offsetof(CPUX86State, cc_tmp),
-                                    "cc_tmp");
 
 #ifdef TARGET_X86_64
     cpu_regs[R_EAX] = tcg_global_mem_new_i64(TCG_AREG0,
commit 0ff6addd92979b9759efa1c0945526e6ac78ce5b
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Sat Oct 6 00:18:55 2012 +0200

    target-i386: move eflags computation closer to gen_op_set_cc_op
    
    This ensures the invariant that cpu_cc_op matches s->cc_op when calling
    the helpers.  The next patches need this because gen_compute_eflags and
    gen_compute_eflags_c will take care of setting cpu_cc_op.
    
    Always compute EFLAGS first since it is needed whenever the shift is
    non-zero, i.e. most of the time.  This makes it possible to remove some
    writes of CC_OP_EFLAGS to cpu_cc_op and more importantly removes cases
    where s->cc_op becomes CC_OP_DYNAMIC.  These are slow and we want to
    avoid them: CC_OP_EFLAGS is quite efficient once we paid the initial
    cost of computing the flags.
    
    Finally, always follow gen_compute_eflags(cpu_cc_src) by setting s->cc_op
    and discarding cpu_cc_dst.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Richard Henderson <rth at twiddle.net>

diff --git a/target-i386/translate.c b/target-i386/translate.c
index ed373c3..0970954 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -1597,14 +1597,16 @@ static void gen_rot_rm_T1(DisasContext *s, int ot, int op1,
         gen_op_mov_reg_v(ot, op1, t0);
     }
     
-    /* update eflags */
+    /* update eflags.  It is needed anyway most of the time, do it always.  */
     if (s->cc_op != CC_OP_DYNAMIC)
         gen_op_set_cc_op(s->cc_op);
+    gen_compute_eflags(cpu_cc_src);
+    tcg_gen_discard_tl(cpu_cc_dst);
+    s->cc_op = CC_OP_EFLAGS;
 
     label2 = gen_new_label();
     tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, label2);
 
-    gen_compute_eflags(cpu_cc_src);
     tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~(CC_O | CC_C));
     tcg_gen_xor_tl(cpu_tmp0, t2, t0);
     tcg_gen_lshift(cpu_tmp0, cpu_tmp0, 11 - (data_bits - 1));
@@ -1615,12 +1617,8 @@ static void gen_rot_rm_T1(DisasContext *s, int ot, int op1,
     }
     tcg_gen_andi_tl(t0, t0, CC_C);
     tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t0);
-    
-    tcg_gen_discard_tl(cpu_cc_dst);
-    tcg_gen_movi_i32(cpu_cc_op, CC_OP_EFLAGS);
-        
+
     gen_set_label(label2);
-    s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */
 
     tcg_temp_free(t0);
     tcg_temp_free(t1);
@@ -1684,6 +1682,9 @@ static void gen_rot_rm_im(DisasContext *s, int ot, int op1, int op2,
             gen_op_set_cc_op(s->cc_op);
 
         gen_compute_eflags(cpu_cc_src);
+        tcg_gen_discard_tl(cpu_cc_dst);
+        s->cc_op = CC_OP_EFLAGS;
+
         tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~(CC_O | CC_C));
         tcg_gen_xor_tl(cpu_tmp0, t1, t0);
         tcg_gen_lshift(cpu_tmp0, cpu_tmp0, 11 - (data_bits - 1));
@@ -1694,10 +1695,6 @@ static void gen_rot_rm_im(DisasContext *s, int ot, int op1, int op2,
         }
         tcg_gen_andi_tl(t0, t0, CC_C);
         tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t0);
-
-        tcg_gen_discard_tl(cpu_cc_dst);
-        tcg_gen_movi_i32(cpu_cc_op, CC_OP_EFLAGS);
-        s->cc_op = CC_OP_EFLAGS;
     }
 
     tcg_temp_free(t0);
commit 52320e15dbb0c2531501a924972e63cdb59742a7
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Sat Oct 6 00:18:55 2012 +0200

    target-i386: move carry computation for inc/dec closer to gen_op_set_cc_op
    
    This ensures the invariant that cpu_cc_op matches s->cc_op when calling
    the helpers.  The next patches need this because gen_compute_eflags and
    gen_compute_eflags_c will take care of setting cpu_cc_op.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Richard Henderson <rth at twiddle.net>

diff --git a/target-i386/translate.c b/target-i386/translate.c
index 48a3255..ed373c3 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -1373,6 +1373,7 @@ static void gen_inc(DisasContext *s1, int ot, int d, int c)
         gen_op_ld_T0_A0(ot + s1->mem_index);
     if (s1->cc_op != CC_OP_DYNAMIC)
         gen_op_set_cc_op(s1->cc_op);
+    gen_compute_eflags_c(cpu_cc_src);
     if (c > 0) {
         tcg_gen_addi_tl(cpu_T[0], cpu_T[0], 1);
         s1->cc_op = CC_OP_INCB + ot;
@@ -1384,7 +1385,6 @@ static void gen_inc(DisasContext *s1, int ot, int d, int c)
         gen_op_mov_reg_T0(ot, d);
     else
         gen_op_st_T0_A0(ot + s1->mem_index);
-    gen_compute_eflags_c(cpu_cc_src);
     tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
 }
 
commit b27fc131fe8dc18924904e4dd0b82dfd77dc51c7
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Sat Oct 6 01:36:45 2012 +0200

    target-i386: drop cc_op argument of gen_jcc1
    
    As in the gen_repz_scas/gen_repz_cmps case, delay setting
    CC_OP_DYNAMIC in gen_jcc until after code generation.  All of
    gen_jcc1/is_fast_jcc/gen_setcc_slow_T0 now work on s->cc_op, which makes
    things a bit easier to follow and to patch.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Richard Henderson <rth at twiddle.net>

diff --git a/target-i386/translate.c b/target-i386/translate.c
index 9ac66b9..48a3255 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -954,7 +954,7 @@ static int is_fast_jcc_case(DisasContext *s, int b)
 
 /* generate a conditional jump to label 'l1' according to jump opcode
    value 'b'. In the fast case, T0 is guaranted not to be used. */
-static inline void gen_jcc1(DisasContext *s, int cc_op, int b, int l1)
+static inline void gen_jcc1(DisasContext *s, int b, int l1)
 {
     int inv, jcc_op, size, cond;
     TCGv t0;
@@ -962,14 +962,14 @@ static inline void gen_jcc1(DisasContext *s, int cc_op, int b, int l1)
     inv = b & 1;
     jcc_op = (b >> 1) & 7;
 
-    switch(cc_op) {
+    switch (s->cc_op) {
         /* we optimize the cmp/jcc case */
     case CC_OP_SUBB:
     case CC_OP_SUBW:
     case CC_OP_SUBL:
     case CC_OP_SUBQ:
         
-        size = cc_op - CC_OP_SUBB;
+        size = s->cc_op - CC_OP_SUBB;
         switch(jcc_op) {
         case JCC_Z:
         fast_jcc_z:
@@ -1053,10 +1053,10 @@ static inline void gen_jcc1(DisasContext *s, int cc_op, int b, int l1)
     case CC_OP_SARQ:
         switch(jcc_op) {
         case JCC_Z:
-            size = (cc_op - CC_OP_ADDB) & 3;
+            size = (s->cc_op - CC_OP_ADDB) & 3;
             goto fast_jcc_z;
         case JCC_S:
-            size = (cc_op - CC_OP_ADDB) & 3;
+            size = (s->cc_op - CC_OP_ADDB) & 3;
             goto fast_jcc_s;
         default:
             goto slow_jcc;
@@ -1197,7 +1197,7 @@ static inline void gen_repz_ ## op(DisasContext *s, int ot,                   \
     gen_ ## op(s, ot);                                                        \
     gen_op_add_reg_im(s->aflag, R_ECX, -1);                                   \
     gen_op_set_cc_op(s->cc_op);                                               \
-    gen_jcc1(s, s->cc_op, (JCC_Z << 1) | (nz ^ 1), l2);                       \
+    gen_jcc1(s, (JCC_Z << 1) | (nz ^ 1), l2);                                 \
     if (!s->jmp_opt)                                                          \
         gen_op_jz_ecx(s->aflag, l2);                                          \
     gen_jmp(s, cur_eip);                                                      \
@@ -2303,13 +2303,15 @@ static inline void gen_goto_tb(DisasContext *s, int tb_num, target_ulong eip)
 static inline void gen_jcc(DisasContext *s, int b,
                            target_ulong val, target_ulong next_eip)
 {
-    int l1, l2, cc_op;
+    int l1, l2;
 
-    cc_op = s->cc_op;
-    gen_update_cc_op(s);
+    if (s->cc_op != CC_OP_DYNAMIC) {
+        gen_op_set_cc_op(s->cc_op);
+    }
     if (s->jmp_opt) {
         l1 = gen_new_label();
-        gen_jcc1(s, cc_op, b, l1);
+        gen_jcc1(s, b, l1);
+        s->cc_op = CC_OP_DYNAMIC;
         
         gen_goto_tb(s, 0, next_eip);
 
@@ -2320,7 +2322,8 @@ static inline void gen_jcc(DisasContext *s, int b,
 
         l1 = gen_new_label();
         l2 = gen_new_label();
-        gen_jcc1(s, cc_op, b, l1);
+        gen_jcc1(s, b, l1);
+        s->cc_op = CC_OP_DYNAMIC;
 
         gen_jmp_im(next_eip);
         tcg_gen_br(l2);
@@ -2343,7 +2346,7 @@ static void gen_setcc(DisasContext *s, int b)
         t0 = tcg_temp_local_new();
         tcg_gen_movi_tl(t0, 0);
         l1 = gen_new_label();
-        gen_jcc1(s, s->cc_op, b ^ 1, l1);
+        gen_jcc1(s, b ^ 1, l1);
         tcg_gen_movi_tl(t0, 1);
         gen_set_label(l1);
         tcg_gen_mov_tl(cpu_T[0], t0);
@@ -6027,7 +6030,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                     };
                     op1 = fcmov_cc[op & 3] | (((op >> 3) & 1) ^ 1);
                     l1 = gen_new_label();
-                    gen_jcc1(s, s->cc_op, op1, l1);
+                    gen_jcc1(s, op1, l1);
                     gen_helper_fmov_ST0_STN(cpu_env, tcg_const_i32(opreg));
                     gen_set_label(l1);
                 }
@@ -6418,7 +6421,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             if (ot == OT_LONG) {
                 /* XXX: specific Intel behaviour ? */
                 l1 = gen_new_label();
-                gen_jcc1(s, s->cc_op, b ^ 1, l1);
+                gen_jcc1(s, b ^ 1, l1);
                 tcg_gen_mov_tl(cpu_regs[reg], t0);
                 gen_set_label(l1);
                 tcg_gen_ext32u_tl(cpu_regs[reg], cpu_regs[reg]);
@@ -6426,7 +6429,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
 #endif
             {
                 l1 = gen_new_label();
-                gen_jcc1(s, s->cc_op, b ^ 1, l1);
+                gen_jcc1(s, b ^ 1, l1);
                 gen_op_mov_reg_v(ot, reg, t0);
                 gen_set_label(l1);
             }
commit 91642ff80607ad90c66ba044fe91e4a53b09bdbb
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Sat Oct 6 01:22:09 2012 +0200

    target-i386: factor setting of s->cc_op handling for string functions
    
    Set it to the appropriate CC_OP_SUBx constant in gen_scas/gen_cmps.
    In the repz case it can be overridden to CC_OP_DYNAMIC after generating
    the code.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Richard Henderson <rth at twiddle.net>

diff --git a/target-i386/translate.c b/target-i386/translate.c
index ccb06e2..9ac66b9 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -1112,6 +1112,7 @@ static inline void gen_scas(DisasContext *s, int ot)
     gen_op_cmpl_T0_T1_cc();
     gen_op_movl_T0_Dshift(ot);
     gen_op_add_reg_T0(s->aflag, R_EDI);
+    s->cc_op = CC_OP_SUBB + ot;
 }
 
 static inline void gen_cmps(DisasContext *s, int ot)
@@ -1124,6 +1125,7 @@ static inline void gen_cmps(DisasContext *s, int ot)
     gen_op_movl_T0_Dshift(ot);
     gen_op_add_reg_T0(s->aflag, R_ESI);
     gen_op_add_reg_T0(s->aflag, R_EDI);
+    s->cc_op = CC_OP_SUBB + ot;
 }
 
 static inline void gen_ins(DisasContext *s, int ot)
@@ -1194,11 +1196,12 @@ static inline void gen_repz_ ## op(DisasContext *s, int ot,                   \
     l2 = gen_jz_ecx_string(s, next_eip);                                      \
     gen_ ## op(s, ot);                                                        \
     gen_op_add_reg_im(s->aflag, R_ECX, -1);                                   \
-    gen_op_set_cc_op(CC_OP_SUBB + ot);                                        \
-    gen_jcc1(s, CC_OP_SUBB + ot, (JCC_Z << 1) | (nz ^ 1), l2);                \
+    gen_op_set_cc_op(s->cc_op);                                               \
+    gen_jcc1(s, s->cc_op, (JCC_Z << 1) | (nz ^ 1), l2);                       \
     if (!s->jmp_opt)                                                          \
         gen_op_jz_ecx(s->aflag, l2);                                          \
     gen_jmp(s, cur_eip);                                                      \
+    s->cc_op = CC_OP_DYNAMIC;                                                 \
 }
 
 GEN_REPZ(movs)
@@ -6088,7 +6091,6 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
         } else {
             gen_scas(s, ot);
-            s->cc_op = CC_OP_SUBB + ot;
         }
         break;
 
@@ -6104,7 +6106,6 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
         } else {
             gen_cmps(s, ot);
-            s->cc_op = CC_OP_SUBB + ot;
         }
         break;
     case 0x6c: /* insS */
commit d824df34e8cdd2fbe55258f26731d7ef3ac7ced2
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Fri Oct 5 18:02:41 2012 +0200

    target-i386: introduce gen_ext_tl
    
    Introduce a function that abstracts extracting an 8, 16, 32 or 64-bit value
    with or without sign, generalizing gen_extu and gen_exts.
    
    Reviewed-by: Blue Swirl <blauwirbel at gmail.com>
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Richard Henderson <rth at twiddle.net>

diff --git a/target-i386/translate.c b/target-i386/translate.c
index 94e1434..ccb06e2 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -659,38 +659,45 @@ static inline void gen_op_movl_T0_Dshift(int ot)
     tcg_gen_shli_tl(cpu_T[0], cpu_T[0], ot);
 };
 
-static void gen_extu(int ot, TCGv reg)
+static TCGv gen_ext_tl(TCGv dst, TCGv src, int size, bool sign)
 {
-    switch(ot) {
+    switch (size) {
     case OT_BYTE:
-        tcg_gen_ext8u_tl(reg, reg);
-        break;
+        if (sign) {
+            tcg_gen_ext8s_tl(dst, src);
+        } else {
+            tcg_gen_ext8u_tl(dst, src);
+        }
+        return dst;
     case OT_WORD:
-        tcg_gen_ext16u_tl(reg, reg);
-        break;
+        if (sign) {
+            tcg_gen_ext16s_tl(dst, src);
+        } else {
+            tcg_gen_ext16u_tl(dst, src);
+        }
+        return dst;
+#ifdef TARGET_X86_64
     case OT_LONG:
-        tcg_gen_ext32u_tl(reg, reg);
-        break;
+        if (sign) {
+            tcg_gen_ext32s_tl(dst, src);
+        } else {
+            tcg_gen_ext32u_tl(dst, src);
+        }
+        return dst;
+#endif
     default:
-        break;
+        return src;
     }
 }
 
+static void gen_extu(int ot, TCGv reg)
+{
+    gen_ext_tl(reg, reg, ot, false);
+}
+
 static void gen_exts(int ot, TCGv reg)
 {
-    switch(ot) {
-    case OT_BYTE:
-        tcg_gen_ext8s_tl(reg, reg);
-        break;
-    case OT_WORD:
-        tcg_gen_ext16s_tl(reg, reg);
-        break;
-    case OT_LONG:
-        tcg_gen_ext32s_tl(reg, reg);
-        break;
-    default:
-        break;
-    }
+    gen_ext_tl(reg, reg, ot, true);
 }
 
 static inline void gen_op_jnz_ecx(int size, int label1)
@@ -966,54 +973,15 @@ static inline void gen_jcc1(DisasContext *s, int cc_op, int b, int l1)
         switch(jcc_op) {
         case JCC_Z:
         fast_jcc_z:
-            switch(size) {
-            case 0:
-                tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0xff);
-                t0 = cpu_tmp0;
-                break;
-            case 1:
-                tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0xffff);
-                t0 = cpu_tmp0;
-                break;
-#ifdef TARGET_X86_64
-            case 2:
-                tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0xffffffff);
-                t0 = cpu_tmp0;
-                break;
-#endif
-            default:
-                t0 = cpu_cc_dst;
-                break;
-            }
+            t0 = gen_ext_tl(cpu_tmp0, cpu_cc_dst, size, false);
             tcg_gen_brcondi_tl(inv ? TCG_COND_NE : TCG_COND_EQ, t0, 0, l1);
             break;
         case JCC_S:
         fast_jcc_s:
-            switch(size) {
-            case 0:
-                tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0x80);
-                tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, cpu_tmp0, 
-                                   0, l1);
-                break;
-            case 1:
-                tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0x8000);
-                tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, cpu_tmp0, 
-                                   0, l1);
-                break;
-#ifdef TARGET_X86_64
-            case 2:
-                tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0x80000000);
-                tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, cpu_tmp0, 
-                                   0, l1);
-                break;
-#endif
-            default:
-                tcg_gen_brcondi_tl(inv ? TCG_COND_GE : TCG_COND_LT, cpu_cc_dst, 
-                                   0, l1);
-                break;
-            }
+            t0 = gen_ext_tl(cpu_tmp0, cpu_cc_dst, size, true);
+            tcg_gen_brcondi_tl(inv ? TCG_COND_GE : TCG_COND_LT, t0, 0, l1);
             break;
-            
+
         case JCC_B:
             cond = inv ? TCG_COND_GEU : TCG_COND_LTU;
             goto fast_jcc_b;
@@ -1021,28 +989,8 @@ static inline void gen_jcc1(DisasContext *s, int cc_op, int b, int l1)
             cond = inv ? TCG_COND_GTU : TCG_COND_LEU;
         fast_jcc_b:
             tcg_gen_add_tl(cpu_tmp4, cpu_cc_dst, cpu_cc_src);
-            switch(size) {
-            case 0:
-                t0 = cpu_tmp0;
-                tcg_gen_andi_tl(cpu_tmp4, cpu_tmp4, 0xff);
-                tcg_gen_andi_tl(t0, cpu_cc_src, 0xff);
-                break;
-            case 1:
-                t0 = cpu_tmp0;
-                tcg_gen_andi_tl(cpu_tmp4, cpu_tmp4, 0xffff);
-                tcg_gen_andi_tl(t0, cpu_cc_src, 0xffff);
-                break;
-#ifdef TARGET_X86_64
-            case 2:
-                t0 = cpu_tmp0;
-                tcg_gen_andi_tl(cpu_tmp4, cpu_tmp4, 0xffffffff);
-                tcg_gen_andi_tl(t0, cpu_cc_src, 0xffffffff);
-                break;
-#endif
-            default:
-                t0 = cpu_cc_src;
-                break;
-            }
+            gen_extu(size, cpu_tmp4);
+            t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
             tcg_gen_brcond_tl(cond, cpu_tmp4, t0, l1);
             break;
             
@@ -1053,28 +1001,8 @@ static inline void gen_jcc1(DisasContext *s, int cc_op, int b, int l1)
             cond = inv ? TCG_COND_GT : TCG_COND_LE;
         fast_jcc_l:
             tcg_gen_add_tl(cpu_tmp4, cpu_cc_dst, cpu_cc_src);
-            switch(size) {
-            case 0:
-                t0 = cpu_tmp0;
-                tcg_gen_ext8s_tl(cpu_tmp4, cpu_tmp4);
-                tcg_gen_ext8s_tl(t0, cpu_cc_src);
-                break;
-            case 1:
-                t0 = cpu_tmp0;
-                tcg_gen_ext16s_tl(cpu_tmp4, cpu_tmp4);
-                tcg_gen_ext16s_tl(t0, cpu_cc_src);
-                break;
-#ifdef TARGET_X86_64
-            case 2:
-                t0 = cpu_tmp0;
-                tcg_gen_ext32s_tl(cpu_tmp4, cpu_tmp4);
-                tcg_gen_ext32s_tl(t0, cpu_cc_src);
-                break;
-#endif
-            default:
-                t0 = cpu_cc_src;
-                break;
-            }
+            gen_exts(size, cpu_tmp4);
+            t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, true);
             tcg_gen_brcond_tl(cond, cpu_tmp4, t0, l1);
             break;
             
commit 93ab25d7d129fbe47a99fd8c91292ea99bff747e
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Sat Oct 6 01:56:03 2012 +0200

    target-i386: use OT_* consistently
    
    Reviewed-by: Blue Swirl <blauwirbel at gmail.com>
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Richard Henderson <rth at twiddle.net>

diff --git a/target-i386/translate.c b/target-i386/translate.c
index 112c310..94e1434 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -323,17 +323,17 @@ static inline void gen_op_mov_reg_T1(int ot, int reg)
 static inline void gen_op_mov_reg_A0(int size, int reg)
 {
     switch(size) {
-    case 0:
+    case OT_BYTE:
         tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], cpu_A0, 0, 16);
         break;
     default: /* XXX this shouldn't be reached;  abort? */
-    case 1:
+    case OT_WORD:
         /* For x86_64, this sets the higher half of register to zero.
            For i386, this is equivalent to a mov. */
         tcg_gen_ext32u_tl(cpu_regs[reg], cpu_A0);
         break;
 #ifdef TARGET_X86_64
-    case 2:
+    case OT_LONG:
         tcg_gen_mov_tl(cpu_regs[reg], cpu_A0);
         break;
 #endif
@@ -398,11 +398,11 @@ static inline void gen_op_jmp_T0(void)
 static inline void gen_op_add_reg_im(int size, int reg, int32_t val)
 {
     switch(size) {
-    case 0:
+    case OT_BYTE:
         tcg_gen_addi_tl(cpu_tmp0, cpu_regs[reg], val);
         tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], cpu_tmp0, 0, 16);
         break;
-    case 1:
+    case OT_WORD:
         tcg_gen_addi_tl(cpu_tmp0, cpu_regs[reg], val);
         /* For x86_64, this sets the higher half of register to zero.
            For i386, this is equivalent to a nop. */
@@ -410,7 +410,7 @@ static inline void gen_op_add_reg_im(int size, int reg, int32_t val)
         tcg_gen_mov_tl(cpu_regs[reg], cpu_tmp0);
         break;
 #ifdef TARGET_X86_64
-    case 2:
+    case OT_LONG:
         tcg_gen_addi_tl(cpu_regs[reg], cpu_regs[reg], val);
         break;
 #endif
@@ -420,11 +420,11 @@ static inline void gen_op_add_reg_im(int size, int reg, int32_t val)
 static inline void gen_op_add_reg_T0(int size, int reg)
 {
     switch(size) {
-    case 0:
+    case OT_BYTE:
         tcg_gen_add_tl(cpu_tmp0, cpu_regs[reg], cpu_T[0]);
         tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], cpu_tmp0, 0, 16);
         break;
-    case 1:
+    case OT_WORD:
         tcg_gen_add_tl(cpu_tmp0, cpu_regs[reg], cpu_T[0]);
         /* For x86_64, this sets the higher half of register to zero.
            For i386, this is equivalent to a nop. */
@@ -432,7 +432,7 @@ static inline void gen_op_add_reg_T0(int size, int reg)
         tcg_gen_mov_tl(cpu_regs[reg], cpu_tmp0);
         break;
 #ifdef TARGET_X86_64
-    case 2:
+    case OT_LONG:
         tcg_gen_add_tl(cpu_regs[reg], cpu_regs[reg], cpu_T[0]);
         break;
 #endif
@@ -506,14 +506,14 @@ static inline void gen_op_lds_T0_A0(int idx)
 {
     int mem_index = (idx >> 2) - 1;
     switch(idx & 3) {
-    case 0:
+    case OT_BYTE:
         tcg_gen_qemu_ld8s(cpu_T[0], cpu_A0, mem_index);
         break;
-    case 1:
+    case OT_WORD:
         tcg_gen_qemu_ld16s(cpu_T[0], cpu_A0, mem_index);
         break;
     default:
-    case 2:
+    case OT_LONG:
         tcg_gen_qemu_ld32s(cpu_T[0], cpu_A0, mem_index);
         break;
     }
@@ -523,17 +523,17 @@ static inline void gen_op_ld_v(int idx, TCGv t0, TCGv a0)
 {
     int mem_index = (idx >> 2) - 1;
     switch(idx & 3) {
-    case 0:
+    case OT_BYTE:
         tcg_gen_qemu_ld8u(t0, a0, mem_index);
         break;
-    case 1:
+    case OT_WORD:
         tcg_gen_qemu_ld16u(t0, a0, mem_index);
         break;
-    case 2:
+    case OT_LONG:
         tcg_gen_qemu_ld32u(t0, a0, mem_index);
         break;
     default:
-    case 3:
+    case OT_QUAD:
         /* Should never happen on 32-bit targets.  */
 #ifdef TARGET_X86_64
         tcg_gen_qemu_ld64(t0, a0, mem_index);
@@ -562,17 +562,17 @@ static inline void gen_op_st_v(int idx, TCGv t0, TCGv a0)
 {
     int mem_index = (idx >> 2) - 1;
     switch(idx & 3) {
-    case 0:
+    case OT_BYTE:
         tcg_gen_qemu_st8(t0, a0, mem_index);
         break;
-    case 1:
+    case OT_WORD:
         tcg_gen_qemu_st16(t0, a0, mem_index);
         break;
-    case 2:
+    case OT_LONG:
         tcg_gen_qemu_st32(t0, a0, mem_index);
         break;
     default:
-    case 3:
+    case OT_QUAD:
         /* Should never happen on 32-bit targets.  */
 #ifdef TARGET_X86_64
         tcg_gen_qemu_st64(t0, a0, mem_index);
@@ -710,21 +710,31 @@ static inline void gen_op_jz_ecx(int size, int label1)
 static void gen_helper_in_func(int ot, TCGv v, TCGv_i32 n)
 {
     switch (ot) {
-    case 0: gen_helper_inb(v, n); break;
-    case 1: gen_helper_inw(v, n); break;
-    case 2: gen_helper_inl(v, n); break;
+    case OT_BYTE:
+        gen_helper_inb(v, n);
+        break;
+    case OT_WORD:
+        gen_helper_inw(v, n);
+        break;
+    case OT_LONG:
+        gen_helper_inl(v, n);
+        break;
     }
-
 }
 
 static void gen_helper_out_func(int ot, TCGv_i32 v, TCGv_i32 n)
 {
     switch (ot) {
-    case 0: gen_helper_outb(v, n); break;
-    case 1: gen_helper_outw(v, n); break;
-    case 2: gen_helper_outl(v, n); break;
+    case OT_BYTE:
+        gen_helper_outb(v, n);
+        break;
+    case OT_WORD:
+        gen_helper_outw(v, n);
+        break;
+    case OT_LONG:
+        gen_helper_outl(v, n);
+        break;
     }
-
 }
 
 static void gen_check_io(DisasContext *s, int ot, target_ulong cur_eip,
@@ -741,13 +751,13 @@ static void gen_check_io(DisasContext *s, int ot, target_ulong cur_eip,
         state_saved = 1;
         tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
         switch (ot) {
-        case 0:
+        case OT_BYTE:
             gen_helper_check_iob(cpu_env, cpu_tmp2_i32);
             break;
-        case 1:
+        case OT_WORD:
             gen_helper_check_iow(cpu_env, cpu_tmp2_i32);
             break;
-        case 2:
+        case OT_LONG:
             gen_helper_check_iol(cpu_env, cpu_tmp2_i32);
             break;
         }
@@ -1781,34 +1791,34 @@ static void gen_rotc_rm_T1(DisasContext *s, int ot, int op1,
     
     if (is_right) {
         switch (ot) {
-        case 0:
+        case OT_BYTE:
             gen_helper_rcrb(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
             break;
-        case 1:
+        case OT_WORD:
             gen_helper_rcrw(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
             break;
-        case 2:
+        case OT_LONG:
             gen_helper_rcrl(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
             break;
 #ifdef TARGET_X86_64
-        case 3:
+        case OT_QUAD:
             gen_helper_rcrq(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
             break;
 #endif
         }
     } else {
         switch (ot) {
-        case 0:
+        case OT_BYTE:
             gen_helper_rclb(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
             break;
-        case 1:
+        case OT_WORD:
             gen_helper_rclw(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
             break;
-        case 2:
+        case OT_LONG:
             gen_helper_rcll(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
             break;
 #ifdef TARGET_X86_64
-        case 3:
+        case OT_QUAD:
             gen_helper_rclq(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
             break;
 #endif
commit 1b99f83e3946c447eefb3417ec1ea4c2f3b44582
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Fri Oct 12 16:40:21 2012 +0200

    test-i386: make it compile with a recent gcc
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Richard Henderson <rth at twiddle.net>

diff --git a/tests/tcg/test-i386.c b/tests/tcg/test-i386.c
index b18fe20..b05572b 100644
--- a/tests/tcg/test-i386.c
+++ b/tests/tcg/test-i386.c
@@ -209,7 +209,7 @@ static inline long i2l(long v)
 #define TEST_LEA16(STR)\
 {\
     asm(".code16 ; .byte 0x67 ; leal " STR ", %0 ; .code32"\
-        : "=wq" (res)\
+        : "=r" (res)\
         : "a" (eax), "b" (ebx), "c" (ecx), "d" (edx), "S" (esi), "D" (edi));\
     printf("lea %s = %08lx\n", STR, res);\
 }
@@ -1828,7 +1828,7 @@ void test_exceptions(void)
     printf("lock nop exception:\n");
     if (setjmp(jmp_env) == 0) {
         /* now execute an invalid instruction */
-        asm volatile(".byte 0xf0, 0x90"); /* lock nop */
+        asm volatile(".byte 0xf0, 0x90");
     }
 
     printf("INT exception:\n");
commit 40475087a5ee80f5251dac6087142458d8dc7d99
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Fri Oct 12 16:40:41 2012 +0200

    test-i386: QEMU_PACKED is not defined here
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Richard Henderson <rth at twiddle.net>

diff --git a/tests/tcg/test-i386.c b/tests/tcg/test-i386.c
index 6dc730d..b18fe20 100644
--- a/tests/tcg/test-i386.c
+++ b/tests/tcg/test-i386.c
@@ -925,7 +925,7 @@ void test_fbcd(double a)
 
 void test_fenv(void)
 {
-    struct QEMU_PACKED {
+    struct __attribute__((__packed__)) {
         uint16_t fpuc;
         uint16_t dummy1;
         uint16_t fpus;
@@ -935,7 +935,7 @@ void test_fenv(void)
         uint32_t ignored[4];
         long double fpregs[8];
     } float_env32;
-    struct QEMU_PACKED {
+    struct __attribute__((__packed__)) {
         uint16_t fpuc;
         uint16_t fpus;
         uint16_t fptag;
@@ -1280,7 +1280,7 @@ void test_segs(void)
     struct {
         uint32_t offset;
         uint16_t seg;
-    } QEMU_PACKED segoff;
+    } __attribute__((__packed__)) segoff;
 
     ldt.entry_number = 1;
     ldt.base_addr = (unsigned long)&seg_data1;


More information about the Spice-commits mailing list