[Spice-commits] 90 commits - audio/audio_template.h configure disas.c exec-all.h exec.c fpu/softfloat-specialize.h fpu/softfloat.h gdbstub.c hw/usb linux-user/qemu.h oslib-win32.c qemu-os-win32.h qemu-timer.h target-alpha/translate.c target-sh4/helper.h target-sh4/op_helper.c target-sh4/translate.c target-xtensa/cpu.h target-xtensa/helper.h target-xtensa/op_helper.c target-xtensa/overlay_tool.h target-xtensa/translate.c tcg/README tcg/arm tcg/hppa tcg/i386 tcg/ia64 tcg/mips tcg/optimize.c tcg/ppc tcg/ppc64 tcg/s390 tcg/sparc tcg/tcg-op.h tcg/tcg-opc.h tcg/tcg.c tcg/tcg.h tcg/tci trace-events

Gerd Hoffmann kraxel at kemper.freedesktop.org
Tue Sep 25 04:03:15 PDT 2012


 audio/audio_template.h       |    6 
 configure                    |   54 --
 disas.c                      |    2 
 exec-all.h                   |    9 
 exec.c                       |   12 
 fpu/softfloat-specialize.h   |   72 +++
 fpu/softfloat.h              |    7 
 gdbstub.c                    |  152 ++++--
 hw/usb/hcd-ehci.c            |  241 ++++++----
 hw/usb/hcd-uhci.c            |    5 
 hw/usb/host-linux.c          |    2 
 hw/usb/redirect.c            |  504 ++++++++++++++++++++--
 linux-user/qemu.h            |   47 --
 oslib-win32.c                |   24 +
 qemu-os-win32.h              |    6 
 qemu-timer.h                 |    8 
 target-alpha/translate.c     |  102 ++--
 target-sh4/helper.h          |   24 -
 target-sh4/op_helper.c       |  123 -----
 target-sh4/translate.c       |  181 ++++---
 target-xtensa/cpu.h          |    8 
 target-xtensa/helper.h       |   21 
 target-xtensa/op_helper.c    |  134 +++++
 target-xtensa/overlay_tool.h |    1 
 target-xtensa/translate.c    |  338 ++++++++++++++
 tcg/README                   |    9 
 tcg/arm/tcg-target.c         |    6 
 tcg/arm/tcg-target.h         |    1 
 tcg/hppa/tcg-target.c        |  214 ++++++---
 tcg/hppa/tcg-target.h        |    1 
 tcg/i386/tcg-target.c        |  139 +++---
 tcg/i386/tcg-target.h        |   11 
 tcg/ia64/tcg-target.c        |    6 
 tcg/ia64/tcg-target.h        |    2 
 tcg/mips/tcg-target.c        |  342 ++++++++++-----
 tcg/mips/tcg-target.h        |   24 -
 tcg/optimize.c               |  390 +++++++++++------
 tcg/ppc/tcg-target.c         |   81 +++
 tcg/ppc/tcg-target.h         |    1 
 tcg/ppc64/tcg-target.c       |    6 
 tcg/ppc64/tcg-target.h       |    2 
 tcg/s390/tcg-target.c        |    5 
 tcg/s390/tcg-target.h        |    2 
 tcg/sparc/tcg-target.c       |  976 ++++++++++++++++++++-----------------------
 tcg/sparc/tcg-target.h       |   35 -
 tcg/tcg-op.h                 |   40 +
 tcg/tcg-opc.h                |    2 
 tcg/tcg.c                    |   28 -
 tcg/tcg.h                    |    3 
 tcg/tci/tcg-target.c         |    6 
 tcg/tci/tcg-target.h         |    2 
 trace-events                 |    9 
 52 files changed, 2921 insertions(+), 1505 deletions(-)

New commits:
commit d3e8f95753114a827f9cd8e819b1d5cc8333f76b
Author: Stefan Weil <sw at weilnetz.de>
Date:   Sat Sep 22 22:26:19 2012 +0200

    w32: Add implementation of gmtime_r, localtime_r
    
    Those functions are missing in MinGW.
    
    Some versions of MinGW-w64 include defines for gmtime_r and localtime_r.
    Older versions of these macros are buggy (they return a pointer to a
    static variable), therefore we don't want them. Newer versions are
    similar to the code used here, but without the memset.
    
    The implementation which is used here is not strictly reentrant,
    but sufficiently good for QEMU on w32 or w64.
    
    Signed-off-by: Stefan Weil <sw at weilnetz.de>
    [blauwirbel at gmail.com: added comment about locking]
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/oslib-win32.c b/oslib-win32.c
index ffbc6d0..51b33e8 100644
--- a/oslib-win32.c
+++ b/oslib-win32.c
@@ -74,6 +74,30 @@ void qemu_vfree(void *ptr)
     VirtualFree(ptr, 0, MEM_RELEASE);
 }
 
+/* FIXME: add proper locking */
+struct tm *gmtime_r(const time_t *timep, struct tm *result)
+{
+    struct tm *p = gmtime(timep);
+    memset(result, 0, sizeof(*result));
+    if (p) {
+        *result = *p;
+        p = result;
+    }
+    return p;
+}
+
+/* FIXME: add proper locking */
+struct tm *localtime_r(const time_t *timep, struct tm *result)
+{
+    struct tm *p = localtime(timep);
+    memset(result, 0, sizeof(*result));
+    if (p) {
+        *result = *p;
+        p = result;
+    }
+    return p;
+}
+
 void socket_set_block(int fd)
 {
     unsigned long opt = 0;
diff --git a/qemu-os-win32.h b/qemu-os-win32.h
index 753679b..3b5a35b 100644
--- a/qemu-os-win32.h
+++ b/qemu-os-win32.h
@@ -68,6 +68,12 @@
 /* Declaration of ffs() is missing in MinGW's strings.h. */
 int ffs(int i);
 
+/* Missing POSIX functions. Don't use MinGW-w64 macros. */
+#undef gmtime_r
+struct tm *gmtime_r(const time_t *timep, struct tm *result);
+#undef localtime_r
+struct tm *localtime_r(const time_t *timep, struct tm *result);
+
 static inline void os_setup_signal_handling(void) {}
 static inline void os_daemonize(void) {}
 static inline void os_setup_post(void) {}
commit 93b6599734f81328ee3d608f57667742cafeea72
Author: Stefan Weil <sw at weilnetz.de>
Date:   Mon Sep 3 09:25:16 2012 +0000

    audio: Fix warning from static code analysis
    
    smatch report:
    audio/audio_template.h:416 AUD_open_out(18) warn:
     variable dereferenced before check 'as' (see line 414)
    
    Moving the ldebug statement after the statement which checks 'as'
    fixes that warning.
    
    Signed-off-by: Stefan Weil <sw at weilnetz.de>
    Signed-off-by: malc <av1474 at comtv.ru>

diff --git a/audio/audio_template.h b/audio/audio_template.h
index 519432a..16f7880 100644
--- a/audio/audio_template.h
+++ b/audio/audio_template.h
@@ -410,15 +410,15 @@ SW *glue (AUD_open_, TYPE) (
     SW *old_sw = NULL;
 #endif
 
-    ldebug ("open %s, freq %d, nchannels %d, fmt %d\n",
-            name, as->freq, as->nchannels, as->fmt);
-
     if (audio_bug (AUDIO_FUNC, !card || !name || !callback_fn || !as)) {
         dolog ("card=%p name=%p callback_fn=%p as=%p\n",
                card, name, callback_fn, as);
         goto fail;
     }
 
+    ldebug ("open %s, freq %d, nchannels %d, fmt %d\n",
+            name, as->freq, as->nchannels, as->fmt);
+
     if (audio_bug (AUDIO_FUNC, audio_validate_settings (as))) {
         audio_print_settings (as);
         goto fail;
commit 04cbbdeefdda2615b11f7890f1e61b750e83129f
Merge: ef04a84... f4bf0b9...
Author: Blue Swirl <blauwirbel at gmail.com>
Date:   Sat Sep 22 17:59:15 2012 +0000

    Merge branch 'tcg-sparc' of git://repo.or.cz/qemu/rth
    
    * 'tcg-sparc' of git://repo.or.cz/qemu/rth:
      tcg-sparc: Preserve branch destinations during retranslation
      tcg-sparc: Fix and enable direct TB chaining.
      tcg-sparc: Add %g/%o registers to alloc_order
      tcg-sparc: Use defines for temporaries.
      tcg-sparc: Mask shift immediates to avoid illegal insns.
      tcg-sparc: Clean up cruft stemming from attempts to use global registers.
      tcg-sparc: Change AREG0 in generated code to %i0.
      tcg-sparc: Support GUEST_BASE.
      tcg-sparc: Fix qemu_ld/st to handle 32-bit host.
      tcg-sparc: Assume v9 cpu always, i.e. force v8plus in 32-bit mode.
      tcg-sparc: Don't MAP_FIXED on top of the program
      tcg-sparc: Fix ADDX opcode.
      tcg-sparc: Hack in qemu_ld/st64 for 32-bit.
      linux-user: Use memcpy in get_user/put_user.

commit ef04a8467eae31bc93b9458ad3d30a6ad1303327
Author: Max Filippov <jcmvbkbc at gmail.com>
Date:   Wed Sep 19 04:23:59 2012 +0400

    target-xtensa: implement coprocessor context option
    
    In case Coprocessor Context option is enabled CPENABLE SR bits control
    whether access to coprocessors is allowed or would rise one of
    CoprocessorXDisabled exceptions.
    
    See ISA, 4.4.5 for more details.
    
    FP is coprocessor 0.
    
    Signed-off-by: Max Filippov <jcmvbkbc at gmail.com>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/target-xtensa/cpu.h b/target-xtensa/cpu.h
index b456283..7348277 100644
--- a/target-xtensa/cpu.h
+++ b/target-xtensa/cpu.h
@@ -468,6 +468,8 @@ static inline int cpu_mmu_index(CPUXtensaState *env)
 #define XTENSA_TBFLAG_LITBASE 0x8
 #define XTENSA_TBFLAG_DEBUG 0x10
 #define XTENSA_TBFLAG_ICOUNT 0x20
+#define XTENSA_TBFLAG_CPENABLE_MASK 0x3fc0
+#define XTENSA_TBFLAG_CPENABLE_SHIFT 6
 
 static inline void cpu_get_tb_cpu_state(CPUXtensaState *env, target_ulong *pc,
         target_ulong *cs_base, int *flags)
@@ -491,6 +493,9 @@ static inline void cpu_get_tb_cpu_state(CPUXtensaState *env, target_ulong *pc,
             *flags |= XTENSA_TBFLAG_ICOUNT;
         }
     }
+    if (xtensa_option_enabled(env->config, XTENSA_OPTION_COPROCESSOR)) {
+        *flags |= env->sregs[CPENABLE] << XTENSA_TBFLAG_CPENABLE_SHIFT;
+    }
 }
 
 #include "cpu-all.h"
diff --git a/target-xtensa/translate.c b/target-xtensa/translate.c
index 9308cdf..ba3ffcb 100644
--- a/target-xtensa/translate.c
+++ b/target-xtensa/translate.c
@@ -65,6 +65,8 @@ typedef struct DisasContext {
     bool debug;
     bool icount;
     TCGv_i32 next_icount;
+
+    unsigned cpenable;
 } DisasContext;
 
 static TCGv_ptr cpu_env;
@@ -331,6 +333,15 @@ static void gen_check_privilege(DisasContext *dc)
     }
 }
 
+static void gen_check_cpenable(DisasContext *dc, unsigned cp)
+{
+    if (option_enabled(dc, XTENSA_OPTION_COPROCESSOR) &&
+            !(dc->cpenable & (1 << cp))) {
+        gen_exception_cause(dc, COPROCESSOR0_DISABLED + cp);
+        dc->is_jmp = DISAS_UPDATE;
+    }
+}
+
 static void gen_jump_slot(DisasContext *dc, TCGv dest, int slot)
 {
     tcg_gen_mov_i32(cpu_pc, dest);
@@ -579,6 +590,13 @@ static void gen_wsr_dbreakc(DisasContext *dc, uint32_t sr, TCGv_i32 v)
     }
 }
 
+static void gen_wsr_cpenable(DisasContext *dc, uint32_t sr, TCGv_i32 v)
+{
+    tcg_gen_andi_i32(cpu_SR[sr], v, 0xff);
+    /* This can change tb->flags, so exit tb */
+    gen_jumpi_check_loop_end(dc, -1);
+}
+
 static void gen_wsr_intset(DisasContext *dc, uint32_t sr, TCGv_i32 v)
 {
     tcg_gen_andi_i32(cpu_SR[sr], v,
@@ -681,6 +699,7 @@ static void gen_wsr(DisasContext *dc, uint32_t sr, TCGv_i32 s)
         [DBREAKA + 1] = gen_wsr_dbreaka,
         [DBREAKC] = gen_wsr_dbreakc,
         [DBREAKC + 1] = gen_wsr_dbreakc,
+        [CPENABLE] = gen_wsr_cpenable,
         [INTSET] = gen_wsr_intset,
         [INTCLEAR] = gen_wsr_intclear,
         [INTENABLE] = gen_wsr_intenable,
@@ -1850,6 +1869,7 @@ static void disas_xtensa_insn(DisasContext *dc)
             case 5: /*SSXUf*/
                 HAS_OPTION(XTENSA_OPTION_FP_COPROCESSOR);
                 gen_window_check2(dc, RRR_S, RRR_T);
+                gen_check_cpenable(dc, 0);
                 {
                     TCGv_i32 addr = tcg_temp_new_i32();
                     tcg_gen_add_i32(addr, cpu_R[RRR_S], cpu_R[RRR_T]);
@@ -1909,26 +1929,31 @@ static void disas_xtensa_insn(DisasContext *dc)
             HAS_OPTION(XTENSA_OPTION_FP_COPROCESSOR);
             switch (OP2) {
             case 0: /*ADD.Sf*/
+                gen_check_cpenable(dc, 0);
                 gen_helper_add_s(cpu_FR[RRR_R], cpu_env,
                         cpu_FR[RRR_S], cpu_FR[RRR_T]);
                 break;
 
             case 1: /*SUB.Sf*/
+                gen_check_cpenable(dc, 0);
                 gen_helper_sub_s(cpu_FR[RRR_R], cpu_env,
                         cpu_FR[RRR_S], cpu_FR[RRR_T]);
                 break;
 
             case 2: /*MUL.Sf*/
+                gen_check_cpenable(dc, 0);
                 gen_helper_mul_s(cpu_FR[RRR_R], cpu_env,
                         cpu_FR[RRR_S], cpu_FR[RRR_T]);
                 break;
 
             case 4: /*MADD.Sf*/
+                gen_check_cpenable(dc, 0);
                 gen_helper_madd_s(cpu_FR[RRR_R], cpu_env,
                         cpu_FR[RRR_R], cpu_FR[RRR_S], cpu_FR[RRR_T]);
                 break;
 
             case 5: /*MSUB.Sf*/
+                gen_check_cpenable(dc, 0);
                 gen_helper_msub_s(cpu_FR[RRR_R], cpu_env,
                         cpu_FR[RRR_R], cpu_FR[RRR_S], cpu_FR[RRR_T]);
                 break;
@@ -1939,6 +1964,7 @@ static void disas_xtensa_insn(DisasContext *dc)
             case 11: /*CEIL.Sf*/
             case 14: /*UTRUNC.Sf*/
                 gen_window_check1(dc, RRR_R);
+                gen_check_cpenable(dc, 0);
                 {
                     static const unsigned rounding_mode_const[] = {
                         float_round_nearest_even,
@@ -1967,6 +1993,7 @@ static void disas_xtensa_insn(DisasContext *dc)
             case 12: /*FLOAT.Sf*/
             case 13: /*UFLOAT.Sf*/
                 gen_window_check1(dc, RRR_S);
+                gen_check_cpenable(dc, 0);
                 {
                     TCGv_i32 scale = tcg_const_i32(-RRR_T);
 
@@ -1984,24 +2011,29 @@ static void disas_xtensa_insn(DisasContext *dc)
             case 15: /*FP1OP*/
                 switch (RRR_T) {
                 case 0: /*MOV.Sf*/
+                    gen_check_cpenable(dc, 0);
                     tcg_gen_mov_i32(cpu_FR[RRR_R], cpu_FR[RRR_S]);
                     break;
 
                 case 1: /*ABS.Sf*/
+                    gen_check_cpenable(dc, 0);
                     gen_helper_abs_s(cpu_FR[RRR_R], cpu_FR[RRR_S]);
                     break;
 
                 case 4: /*RFRf*/
                     gen_window_check1(dc, RRR_R);
+                    gen_check_cpenable(dc, 0);
                     tcg_gen_mov_i32(cpu_R[RRR_R], cpu_FR[RRR_S]);
                     break;
 
                 case 5: /*WFRf*/
                     gen_window_check1(dc, RRR_S);
+                    gen_check_cpenable(dc, 0);
                     tcg_gen_mov_i32(cpu_FR[RRR_R], cpu_R[RRR_S]);
                     break;
 
                 case 6: /*NEG.Sf*/
+                    gen_check_cpenable(dc, 0);
                     gen_helper_neg_s(cpu_FR[RRR_R], cpu_FR[RRR_S]);
                     break;
 
@@ -2024,6 +2056,7 @@ static void disas_xtensa_insn(DisasContext *dc)
     do { \
         TCGv_i32 bit = tcg_const_i32(1 << br); \
         \
+        gen_check_cpenable(dc, 0); \
         gen_helper_##rel(cpu_env, bit, cpu_FR[a], cpu_FR[b]); \
         tcg_temp_free(bit); \
     } while (0)
@@ -2064,6 +2097,7 @@ static void disas_xtensa_insn(DisasContext *dc)
             case 10: /*MOVLTZ.Sf*/
             case 11: /*MOVGEZ.Sf*/
                 gen_window_check1(dc, RRR_T);
+                gen_check_cpenable(dc, 0);
                 {
                     static const TCGCond cond[] = {
                         TCG_COND_NE,
@@ -2081,6 +2115,7 @@ static void disas_xtensa_insn(DisasContext *dc)
             case 12: /*MOVF.Sf*/
             case 13: /*MOVT.Sf*/
                 HAS_OPTION(XTENSA_OPTION_BOOLEAN);
+                gen_check_cpenable(dc, 0);
                 {
                     int label = gen_new_label();
                     TCGv_i32 tmp = tcg_temp_new_i32();
@@ -2336,6 +2371,7 @@ static void disas_xtensa_insn(DisasContext *dc)
         case 12: /*SSIUf*/
             HAS_OPTION(XTENSA_OPTION_FP_COPROCESSOR);
             gen_window_check1(dc, RRI8_S);
+            gen_check_cpenable(dc, 0);
             {
                 TCGv_i32 addr = tcg_temp_new_i32();
                 tcg_gen_addi_i32(addr, cpu_R[RRI8_S], RRI8_IMM8 << 2);
@@ -2853,6 +2889,8 @@ static void gen_intermediate_code_internal(
     dc.ccount_delta = 0;
     dc.debug = tb->flags & XTENSA_TBFLAG_DEBUG;
     dc.icount = tb->flags & XTENSA_TBFLAG_ICOUNT;
+    dc.cpenable = (tb->flags & XTENSA_TBFLAG_CPENABLE_MASK) >>
+        XTENSA_TBFLAG_CPENABLE_SHIFT;
 
     init_litbase(&dc);
     init_sar_tracker(&dc);
commit 4e273869671d78660cd709183d3c5dec3a17de79
Author: Max Filippov <jcmvbkbc at gmail.com>
Date:   Wed Sep 19 04:23:58 2012 +0400

    target-xtensa: implement FP1 group
    
    These are comparison and conditional move opcodes.
    See ISA, 4.3.10 for more details.
    
    Signed-off-by: Max Filippov <jcmvbkbc at gmail.com>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/target-xtensa/helper.h b/target-xtensa/helper.h
index 9557347..4cc0088 100644
--- a/target-xtensa/helper.h
+++ b/target-xtensa/helper.h
@@ -49,4 +49,12 @@ DEF_HELPER_FLAGS_3(ftoui, TCG_CALL_CONST | TCG_CALL_PURE, i32, f32, i32, i32)
 DEF_HELPER_3(itof, f32, env, i32, i32)
 DEF_HELPER_3(uitof, f32, env, i32, i32)
 
+DEF_HELPER_4(un_s, void, env, i32, f32, f32)
+DEF_HELPER_4(oeq_s, void, env, i32, f32, f32)
+DEF_HELPER_4(ueq_s, void, env, i32, f32, f32)
+DEF_HELPER_4(olt_s, void, env, i32, f32, f32)
+DEF_HELPER_4(ult_s, void, env, i32, f32, f32)
+DEF_HELPER_4(ole_s, void, env, i32, f32, f32)
+DEF_HELPER_4(ule_s, void, env, i32, f32, f32)
+
 #include "def-helper.h"
diff --git a/target-xtensa/op_helper.c b/target-xtensa/op_helper.c
index 5cf9c02..ae0c099 100644
--- a/target-xtensa/op_helper.c
+++ b/target-xtensa/op_helper.c
@@ -858,3 +858,50 @@ float32 HELPER(uitof)(CPUXtensaState *env, uint32_t v, uint32_t scale)
     return float32_scalbn(uint32_to_float32(v, &env->fp_status),
             (int32_t)scale, &env->fp_status);
 }
+
+static inline void set_br(CPUXtensaState *env, bool v, uint32_t br)
+{
+    if (v) {
+        env->sregs[BR] |= br;
+    } else {
+        env->sregs[BR] &= ~br;
+    }
+}
+
+void HELPER(un_s)(CPUXtensaState *env, uint32_t br, float32 a, float32 b)
+{
+    set_br(env, float32_unordered_quiet(a, b, &env->fp_status), br);
+}
+
+void HELPER(oeq_s)(CPUXtensaState *env, uint32_t br, float32 a, float32 b)
+{
+    set_br(env, float32_eq_quiet(a, b, &env->fp_status), br);
+}
+
+void HELPER(ueq_s)(CPUXtensaState *env, uint32_t br, float32 a, float32 b)
+{
+    int v = float32_compare_quiet(a, b, &env->fp_status);
+    set_br(env, v == float_relation_equal || v == float_relation_unordered, br);
+}
+
+void HELPER(olt_s)(CPUXtensaState *env, uint32_t br, float32 a, float32 b)
+{
+    set_br(env, float32_lt_quiet(a, b, &env->fp_status), br);
+}
+
+void HELPER(ult_s)(CPUXtensaState *env, uint32_t br, float32 a, float32 b)
+{
+    int v = float32_compare_quiet(a, b, &env->fp_status);
+    set_br(env, v == float_relation_less || v == float_relation_unordered, br);
+}
+
+void HELPER(ole_s)(CPUXtensaState *env, uint32_t br, float32 a, float32 b)
+{
+    set_br(env, float32_le_quiet(a, b, &env->fp_status), br);
+}
+
+void HELPER(ule_s)(CPUXtensaState *env, uint32_t br, float32 a, float32 b)
+{
+    int v = float32_compare_quiet(a, b, &env->fp_status);
+    set_br(env, v != float_relation_greater, br);
+}
diff --git a/target-xtensa/translate.c b/target-xtensa/translate.c
index 1627ad0..9308cdf 100644
--- a/target-xtensa/translate.c
+++ b/target-xtensa/translate.c
@@ -2019,7 +2019,86 @@ static void disas_xtensa_insn(DisasContext *dc)
 
         case 11: /*FP1*/
             HAS_OPTION(XTENSA_OPTION_FP_COPROCESSOR);
-            TBD();
+
+#define gen_compare(rel, br, a, b) \
+    do { \
+        TCGv_i32 bit = tcg_const_i32(1 << br); \
+        \
+        gen_helper_##rel(cpu_env, bit, cpu_FR[a], cpu_FR[b]); \
+        tcg_temp_free(bit); \
+    } while (0)
+
+            switch (OP2) {
+            case 1: /*UN.Sf*/
+                gen_compare(un_s, RRR_R, RRR_S, RRR_T);
+                break;
+
+            case 2: /*OEQ.Sf*/
+                gen_compare(oeq_s, RRR_R, RRR_S, RRR_T);
+                break;
+
+            case 3: /*UEQ.Sf*/
+                gen_compare(ueq_s, RRR_R, RRR_S, RRR_T);
+                break;
+
+            case 4: /*OLT.Sf*/
+                gen_compare(olt_s, RRR_R, RRR_S, RRR_T);
+                break;
+
+            case 5: /*ULT.Sf*/
+                gen_compare(ult_s, RRR_R, RRR_S, RRR_T);
+                break;
+
+            case 6: /*OLE.Sf*/
+                gen_compare(ole_s, RRR_R, RRR_S, RRR_T);
+                break;
+
+            case 7: /*ULE.Sf*/
+                gen_compare(ule_s, RRR_R, RRR_S, RRR_T);
+                break;
+
+#undef gen_compare
+
+            case 8: /*MOVEQZ.Sf*/
+            case 9: /*MOVNEZ.Sf*/
+            case 10: /*MOVLTZ.Sf*/
+            case 11: /*MOVGEZ.Sf*/
+                gen_window_check1(dc, RRR_T);
+                {
+                    static const TCGCond cond[] = {
+                        TCG_COND_NE,
+                        TCG_COND_EQ,
+                        TCG_COND_GE,
+                        TCG_COND_LT
+                    };
+                    int label = gen_new_label();
+                    tcg_gen_brcondi_i32(cond[OP2 - 8], cpu_R[RRR_T], 0, label);
+                    tcg_gen_mov_i32(cpu_FR[RRR_R], cpu_FR[RRR_S]);
+                    gen_set_label(label);
+                }
+                break;
+
+            case 12: /*MOVF.Sf*/
+            case 13: /*MOVT.Sf*/
+                HAS_OPTION(XTENSA_OPTION_BOOLEAN);
+                {
+                    int label = gen_new_label();
+                    TCGv_i32 tmp = tcg_temp_new_i32();
+
+                    tcg_gen_andi_i32(tmp, cpu_SR[BR], 1 << RRR_T);
+                    tcg_gen_brcondi_i32(
+                            OP2 & 1 ? TCG_COND_EQ : TCG_COND_NE,
+                            tmp, 0, label);
+                    tcg_gen_mov_i32(cpu_FR[RRR_R], cpu_FR[RRR_S]);
+                    gen_set_label(label);
+                    tcg_temp_free(tmp);
+                }
+                break;
+
+            default: /*reserved*/
+                RESERVED();
+                break;
+            }
             break;
 
         default: /*reserved*/
commit b7ee8c6a32987e5039f0a2c2976fad556965727d
Author: Max Filippov <jcmvbkbc at gmail.com>
Date:   Wed Sep 19 04:23:57 2012 +0400

    target-xtensa: implement FP0 conversions
    
    These are FP to integer and integer to FP conversion opcodes.
    See ISA, 4.3.10 for more details.
    
    Note that ISA description for utrunc.s is currently incorrect and will
    be fixed in future revisions.
    
    Signed-off-by: Max Filippov <jcmvbkbc at gmail.com>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/target-xtensa/helper.h b/target-xtensa/helper.h
index 4e6e417..9557347 100644
--- a/target-xtensa/helper.h
+++ b/target-xtensa/helper.h
@@ -44,5 +44,9 @@ DEF_HELPER_3(sub_s, f32, env, f32, f32)
 DEF_HELPER_3(mul_s, f32, env, f32, f32)
 DEF_HELPER_4(madd_s, f32, env, f32, f32, f32)
 DEF_HELPER_4(msub_s, f32, env, f32, f32, f32)
+DEF_HELPER_FLAGS_3(ftoi, TCG_CALL_CONST | TCG_CALL_PURE, i32, f32, i32, i32)
+DEF_HELPER_FLAGS_3(ftoui, TCG_CALL_CONST | TCG_CALL_PURE, i32, f32, i32, i32)
+DEF_HELPER_3(itof, f32, env, i32, i32)
+DEF_HELPER_3(uitof, f32, env, i32, i32)
 
 #include "def-helper.h"
diff --git a/target-xtensa/op_helper.c b/target-xtensa/op_helper.c
index ba935a8..5cf9c02 100644
--- a/target-xtensa/op_helper.c
+++ b/target-xtensa/op_helper.c
@@ -821,3 +821,40 @@ float32 HELPER(msub_s)(CPUXtensaState *env, float32 a, float32 b, float32 c)
     return float32_muladd(b, c, a, float_muladd_negate_product,
             &env->fp_status);
 }
+
+uint32_t HELPER(ftoi)(float32 v, uint32_t rounding_mode, uint32_t scale)
+{
+    float_status fp_status = {0};
+
+    set_float_rounding_mode(rounding_mode, &fp_status);
+    return float32_to_int32(
+            float32_scalbn(v, scale, &fp_status), &fp_status);
+}
+
+uint32_t HELPER(ftoui)(float32 v, uint32_t rounding_mode, uint32_t scale)
+{
+    float_status fp_status = {0};
+    float32 res;
+
+    set_float_rounding_mode(rounding_mode, &fp_status);
+
+    res = float32_scalbn(v, scale, &fp_status);
+
+    if (float32_is_neg(v) && !float32_is_any_nan(v)) {
+        return float32_to_int32(res, &fp_status);
+    } else {
+        return float32_to_uint32(res, &fp_status);
+    }
+}
+
+float32 HELPER(itof)(CPUXtensaState *env, uint32_t v, uint32_t scale)
+{
+    return float32_scalbn(int32_to_float32(v, &env->fp_status),
+            (int32_t)scale, &env->fp_status);
+}
+
+float32 HELPER(uitof)(CPUXtensaState *env, uint32_t v, uint32_t scale)
+{
+    return float32_scalbn(uint32_to_float32(v, &env->fp_status),
+            (int32_t)scale, &env->fp_status);
+}
diff --git a/target-xtensa/translate.c b/target-xtensa/translate.c
index 2125b1d..1627ad0 100644
--- a/target-xtensa/translate.c
+++ b/target-xtensa/translate.c
@@ -1933,6 +1933,54 @@ static void disas_xtensa_insn(DisasContext *dc)
                         cpu_FR[RRR_R], cpu_FR[RRR_S], cpu_FR[RRR_T]);
                 break;
 
+            case 8: /*ROUND.Sf*/
+            case 9: /*TRUNC.Sf*/
+            case 10: /*FLOOR.Sf*/
+            case 11: /*CEIL.Sf*/
+            case 14: /*UTRUNC.Sf*/
+                gen_window_check1(dc, RRR_R);
+                {
+                    static const unsigned rounding_mode_const[] = {
+                        float_round_nearest_even,
+                        float_round_to_zero,
+                        float_round_down,
+                        float_round_up,
+                        [6] = float_round_to_zero,
+                    };
+                    TCGv_i32 rounding_mode = tcg_const_i32(
+                            rounding_mode_const[OP2 & 7]);
+                    TCGv_i32 scale = tcg_const_i32(RRR_T);
+
+                    if (OP2 == 14) {
+                        gen_helper_ftoui(cpu_R[RRR_R], cpu_FR[RRR_S],
+                                rounding_mode, scale);
+                    } else {
+                        gen_helper_ftoi(cpu_R[RRR_R], cpu_FR[RRR_S],
+                                rounding_mode, scale);
+                    }
+
+                    tcg_temp_free(rounding_mode);
+                    tcg_temp_free(scale);
+                }
+                break;
+
+            case 12: /*FLOAT.Sf*/
+            case 13: /*UFLOAT.Sf*/
+                gen_window_check1(dc, RRR_S);
+                {
+                    TCGv_i32 scale = tcg_const_i32(-RRR_T);
+
+                    if (OP2 == 13) {
+                        gen_helper_uitof(cpu_FR[RRR_R], cpu_env,
+                                cpu_R[RRR_S], scale);
+                    } else {
+                        gen_helper_itof(cpu_FR[RRR_R], cpu_env,
+                                cpu_R[RRR_S], scale);
+                    }
+                    tcg_temp_free(scale);
+                }
+                break;
+
             case 15: /*FP1OP*/
                 switch (RRR_T) {
                 case 0: /*MOV.Sf*/
commit 0b6df8385d24a4eed0ff546493c59e047b16537b
Author: Max Filippov <jcmvbkbc at gmail.com>
Date:   Wed Sep 19 04:23:56 2012 +0400

    target-xtensa: implement FP0 arithmetic
    
    These are FP arithmetic opcodes.
    See ISA, 4.3.10 for more details.
    
    Signed-off-by: Max Filippov <jcmvbkbc at gmail.com>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/target-xtensa/helper.h b/target-xtensa/helper.h
index 1662552..4e6e417 100644
--- a/target-xtensa/helper.h
+++ b/target-xtensa/helper.h
@@ -37,5 +37,12 @@ DEF_HELPER_3(wsr_dbreaka, void, env, i32, i32)
 DEF_HELPER_3(wsr_dbreakc, void, env, i32, i32)
 
 DEF_HELPER_2(wur_fcr, void, env, i32)
+DEF_HELPER_FLAGS_1(abs_s, TCG_CALL_CONST | TCG_CALL_PURE, f32, f32)
+DEF_HELPER_FLAGS_1(neg_s, TCG_CALL_CONST | TCG_CALL_PURE, f32, f32)
+DEF_HELPER_3(add_s, f32, env, f32, f32)
+DEF_HELPER_3(sub_s, f32, env, f32, f32)
+DEF_HELPER_3(mul_s, f32, env, f32, f32)
+DEF_HELPER_4(madd_s, f32, env, f32, f32, f32)
+DEF_HELPER_4(msub_s, f32, env, f32, f32, f32)
 
 #include "def-helper.h"
diff --git a/target-xtensa/op_helper.c b/target-xtensa/op_helper.c
index 3bf7339..ba935a8 100644
--- a/target-xtensa/op_helper.c
+++ b/target-xtensa/op_helper.c
@@ -784,3 +784,40 @@ void HELPER(wur_fcr)(CPUXtensaState *env, uint32_t v)
     env->uregs[FCR] = v & 0xfffff07f;
     set_float_rounding_mode(rounding_mode[v & 3], &env->fp_status);
 }
+
+float32 HELPER(abs_s)(float32 v)
+{
+    return float32_abs(v);
+}
+
+float32 HELPER(neg_s)(float32 v)
+{
+    return float32_chs(v);
+}
+
+float32 HELPER(add_s)(CPUXtensaState *env, float32 a, float32 b)
+{
+    return float32_add(a, b, &env->fp_status);
+}
+
+float32 HELPER(sub_s)(CPUXtensaState *env, float32 a, float32 b)
+{
+    return float32_sub(a, b, &env->fp_status);
+}
+
+float32 HELPER(mul_s)(CPUXtensaState *env, float32 a, float32 b)
+{
+    return float32_mul(a, b, &env->fp_status);
+}
+
+float32 HELPER(madd_s)(CPUXtensaState *env, float32 a, float32 b, float32 c)
+{
+    return float32_muladd(b, c, a, 0,
+            &env->fp_status);
+}
+
+float32 HELPER(msub_s)(CPUXtensaState *env, float32 a, float32 b, float32 c)
+{
+    return float32_muladd(b, c, a, float_muladd_negate_product,
+            &env->fp_status);
+}
diff --git a/target-xtensa/translate.c b/target-xtensa/translate.c
index 0f85b36..2125b1d 100644
--- a/target-xtensa/translate.c
+++ b/target-xtensa/translate.c
@@ -1907,7 +1907,66 @@ static void disas_xtensa_insn(DisasContext *dc)
 
         case 10: /*FP0*/
             HAS_OPTION(XTENSA_OPTION_FP_COPROCESSOR);
-            TBD();
+            switch (OP2) {
+            case 0: /*ADD.Sf*/
+                gen_helper_add_s(cpu_FR[RRR_R], cpu_env,
+                        cpu_FR[RRR_S], cpu_FR[RRR_T]);
+                break;
+
+            case 1: /*SUB.Sf*/
+                gen_helper_sub_s(cpu_FR[RRR_R], cpu_env,
+                        cpu_FR[RRR_S], cpu_FR[RRR_T]);
+                break;
+
+            case 2: /*MUL.Sf*/
+                gen_helper_mul_s(cpu_FR[RRR_R], cpu_env,
+                        cpu_FR[RRR_S], cpu_FR[RRR_T]);
+                break;
+
+            case 4: /*MADD.Sf*/
+                gen_helper_madd_s(cpu_FR[RRR_R], cpu_env,
+                        cpu_FR[RRR_R], cpu_FR[RRR_S], cpu_FR[RRR_T]);
+                break;
+
+            case 5: /*MSUB.Sf*/
+                gen_helper_msub_s(cpu_FR[RRR_R], cpu_env,
+                        cpu_FR[RRR_R], cpu_FR[RRR_S], cpu_FR[RRR_T]);
+                break;
+
+            case 15: /*FP1OP*/
+                switch (RRR_T) {
+                case 0: /*MOV.Sf*/
+                    tcg_gen_mov_i32(cpu_FR[RRR_R], cpu_FR[RRR_S]);
+                    break;
+
+                case 1: /*ABS.Sf*/
+                    gen_helper_abs_s(cpu_FR[RRR_R], cpu_FR[RRR_S]);
+                    break;
+
+                case 4: /*RFRf*/
+                    gen_window_check1(dc, RRR_R);
+                    tcg_gen_mov_i32(cpu_R[RRR_R], cpu_FR[RRR_S]);
+                    break;
+
+                case 5: /*WFRf*/
+                    gen_window_check1(dc, RRR_S);
+                    tcg_gen_mov_i32(cpu_FR[RRR_R], cpu_R[RRR_S]);
+                    break;
+
+                case 6: /*NEG.Sf*/
+                    gen_helper_neg_s(cpu_FR[RRR_R], cpu_FR[RRR_S]);
+                    break;
+
+                default: /*reserved*/
+                    RESERVED();
+                    break;
+                }
+                break;
+
+            default: /*reserved*/
+                RESERVED();
+                break;
+            }
             break;
 
         case 11: /*FP1*/
commit 9ed7ae12afacdd234a749aa5af27dd93ea1594ec
Author: Max Filippov <jcmvbkbc at gmail.com>
Date:   Wed Sep 19 04:23:55 2012 +0400

    target-xtensa: implement LSCX and LSCI groups
    
    These are load/store instructions for FP registers with immediate or
    register index and optional base post-update.
    See ISA, 4.3.10 for more details.
    
    Signed-off-by: Max Filippov <jcmvbkbc at gmail.com>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/target-xtensa/translate.c b/target-xtensa/translate.c
index 74b1b54..0f85b36 100644
--- a/target-xtensa/translate.c
+++ b/target-xtensa/translate.c
@@ -1843,8 +1843,33 @@ static void disas_xtensa_insn(DisasContext *dc)
             break;
 
         case 8: /*LSCXp*/
-            HAS_OPTION(XTENSA_OPTION_COPROCESSOR);
-            TBD();
+            switch (OP2) {
+            case 0: /*LSXf*/
+            case 1: /*LSXUf*/
+            case 4: /*SSXf*/
+            case 5: /*SSXUf*/
+                HAS_OPTION(XTENSA_OPTION_FP_COPROCESSOR);
+                gen_window_check2(dc, RRR_S, RRR_T);
+                {
+                    TCGv_i32 addr = tcg_temp_new_i32();
+                    tcg_gen_add_i32(addr, cpu_R[RRR_S], cpu_R[RRR_T]);
+                    gen_load_store_alignment(dc, 2, addr, false);
+                    if (OP2 & 0x4) {
+                        tcg_gen_qemu_st32(cpu_FR[RRR_R], addr, dc->cring);
+                    } else {
+                        tcg_gen_qemu_ld32u(cpu_FR[RRR_R], addr, dc->cring);
+                    }
+                    if (OP2 & 0x1) {
+                        tcg_gen_mov_i32(cpu_R[RRR_S], addr);
+                    }
+                    tcg_temp_free(addr);
+                }
+                break;
+
+            default: /*reserved*/
+                RESERVED();
+                break;
+            }
             break;
 
         case 9: /*LSC4*/
@@ -2118,8 +2143,33 @@ static void disas_xtensa_insn(DisasContext *dc)
         break;
 
     case 3: /*LSCIp*/
-        HAS_OPTION(XTENSA_OPTION_COPROCESSOR);
-        TBD();
+        switch (RRI8_R) {
+        case 0: /*LSIf*/
+        case 4: /*SSIf*/
+        case 8: /*LSIUf*/
+        case 12: /*SSIUf*/
+            HAS_OPTION(XTENSA_OPTION_FP_COPROCESSOR);
+            gen_window_check1(dc, RRI8_S);
+            {
+                TCGv_i32 addr = tcg_temp_new_i32();
+                tcg_gen_addi_i32(addr, cpu_R[RRI8_S], RRI8_IMM8 << 2);
+                gen_load_store_alignment(dc, 2, addr, false);
+                if (RRI8_R & 0x4) {
+                    tcg_gen_qemu_st32(cpu_FR[RRI8_T], addr, dc->cring);
+                } else {
+                    tcg_gen_qemu_ld32u(cpu_FR[RRI8_T], addr, dc->cring);
+                }
+                if (RRI8_R & 0x8) {
+                    tcg_gen_mov_i32(cpu_R[RRI8_S], addr);
+                }
+                tcg_temp_free(addr);
+            }
+            break;
+
+        default: /*reserved*/
+            RESERVED();
+            break;
+        }
         break;
 
     case 4: /*MAC16d*/
commit dd519cbec63434f2b883f0b4a20f827bbd59793e
Author: Max Filippov <jcmvbkbc at gmail.com>
Date:   Wed Sep 19 04:23:54 2012 +0400

    target-xtensa: add FP registers
    
    There are 16 32-bit FP registers (f0 - f15), control and status user
    registers (fcr, fsr).
    
    See ISA, 4.3.10 for more details.
    
    Signed-off-by: Max Filippov <jcmvbkbc at gmail.com>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/gdbstub.c b/gdbstub.c
index a91709f..d02ec75 100644
--- a/gdbstub.c
+++ b/gdbstub.c
@@ -1696,6 +1696,10 @@ static int cpu_gdb_read_register(CPUXtensaState *env, uint8_t *mem_buf, int n)
         GET_REG32(env->uregs[reg->targno & 0xff]);
         break;
 
+    case 4: /*f*/
+        GET_REG32(float32_val(env->fregs[reg->targno & 0x0f]));
+        break;
+
     case 8: /*a*/
         GET_REG32(env->regs[reg->targno & 0x0f]);
         break;
@@ -1736,6 +1740,10 @@ static int cpu_gdb_write_register(CPUXtensaState *env, uint8_t *mem_buf, int n)
         env->uregs[reg->targno & 0xff] = tmp;
         break;
 
+    case 4: /*f*/
+        env->fregs[reg->targno & 0x0f] = make_float32(tmp);
+        break;
+
     case 8: /*a*/
         env->regs[reg->targno & 0x0f] = tmp;
         break;
diff --git a/target-xtensa/cpu.h b/target-xtensa/cpu.h
index 177094a..b456283 100644
--- a/target-xtensa/cpu.h
+++ b/target-xtensa/cpu.h
@@ -36,6 +36,7 @@
 #include "config.h"
 #include "qemu-common.h"
 #include "cpu-defs.h"
+#include "fpu/softfloat.h"
 
 #define TARGET_HAS_ICE 1
 
@@ -325,6 +326,8 @@ typedef struct CPUXtensaState {
     uint32_t sregs[256];
     uint32_t uregs[256];
     uint32_t phys_regs[MAX_NAREG];
+    float32 fregs[16];
+    float_status fp_status;
 
     xtensa_tlb_entry itlb[7][MAX_TLB_WAY_SIZE];
     xtensa_tlb_entry dtlb[10][MAX_TLB_WAY_SIZE];
diff --git a/target-xtensa/helper.h b/target-xtensa/helper.h
index 152fec0..1662552 100644
--- a/target-xtensa/helper.h
+++ b/target-xtensa/helper.h
@@ -36,4 +36,6 @@ DEF_HELPER_3(wsr_ibreaka, void, env, i32, i32)
 DEF_HELPER_3(wsr_dbreaka, void, env, i32, i32)
 DEF_HELPER_3(wsr_dbreakc, void, env, i32, i32)
 
+DEF_HELPER_2(wur_fcr, void, env, i32)
+
 #include "def-helper.h"
diff --git a/target-xtensa/op_helper.c b/target-xtensa/op_helper.c
index 2659c0e..3bf7339 100644
--- a/target-xtensa/op_helper.c
+++ b/target-xtensa/op_helper.c
@@ -771,3 +771,16 @@ void HELPER(wsr_dbreakc)(CPUXtensaState *env, uint32_t i, uint32_t v)
     }
     env->sregs[DBREAKC + i] = v;
 }
+
+void HELPER(wur_fcr)(CPUXtensaState *env, uint32_t v)
+{
+    static const int rounding_mode[] = {
+        float_round_nearest_even,
+        float_round_to_zero,
+        float_round_up,
+        float_round_down,
+    };
+
+    env->uregs[FCR] = v & 0xfffff07f;
+    set_float_rounding_mode(rounding_mode[v & 3], &env->fp_status);
+}
diff --git a/target-xtensa/translate.c b/target-xtensa/translate.c
index b6643eb..74b1b54 100644
--- a/target-xtensa/translate.c
+++ b/target-xtensa/translate.c
@@ -70,6 +70,7 @@ typedef struct DisasContext {
 static TCGv_ptr cpu_env;
 static TCGv_i32 cpu_pc;
 static TCGv_i32 cpu_R[16];
+static TCGv_i32 cpu_FR[16];
 static TCGv_i32 cpu_SR[256];
 static TCGv_i32 cpu_UR[256];
 
@@ -155,6 +156,12 @@ void xtensa_translate_init(void)
         "ar8", "ar9", "ar10", "ar11",
         "ar12", "ar13", "ar14", "ar15",
     };
+    static const char * const fregnames[] = {
+        "f0", "f1", "f2", "f3",
+        "f4", "f5", "f6", "f7",
+        "f8", "f9", "f10", "f11",
+        "f12", "f13", "f14", "f15",
+    };
     int i;
 
     cpu_env = tcg_global_reg_new_ptr(TCG_AREG0, "env");
@@ -167,6 +174,12 @@ void xtensa_translate_init(void)
                 regnames[i]);
     }
 
+    for (i = 0; i < 16; i++) {
+        cpu_FR[i] = tcg_global_mem_new_i32(TCG_AREG0,
+                offsetof(CPUXtensaState, fregs[i]),
+                fregnames[i]);
+    }
+
     for (i = 0; i < 256; ++i) {
         if (sregnames[i]) {
             cpu_SR[i] = tcg_global_mem_new_i32(TCG_AREG0,
@@ -692,6 +705,23 @@ static void gen_wsr(DisasContext *dc, uint32_t sr, TCGv_i32 s)
     }
 }
 
+static void gen_wur(uint32_t ur, TCGv_i32 s)
+{
+    switch (ur) {
+    case FCR:
+        gen_helper_wur_fcr(cpu_env, s);
+        break;
+
+    case FSR:
+        tcg_gen_andi_i32(cpu_UR[ur], s, 0xffffff80);
+        break;
+
+    default:
+        tcg_gen_mov_i32(cpu_UR[ur], s);
+        break;
+    }
+}
+
 static void gen_load_store_alignment(DisasContext *dc, int shift,
         TCGv_i32 addr, bool no_hw_alignment)
 {
@@ -1761,13 +1791,11 @@ static void disas_xtensa_insn(DisasContext *dc)
 
             case 15: /*WUR*/
                 gen_window_check1(dc, RRR_T);
-                {
-                    if (uregnames[RSR_SR]) {
-                        tcg_gen_mov_i32(cpu_UR[RSR_SR], cpu_R[RRR_T]);
-                    } else {
-                        qemu_log("WUR %d not implemented, ", RSR_SR);
-                        TBD();
-                    }
+                if (uregnames[RSR_SR]) {
+                    gen_wur(RSR_SR, cpu_R[RRR_T]);
+                } else {
+                    qemu_log("WUR %d not implemented, ", RSR_SR);
+                    TBD();
                 }
                 break;
 
@@ -2730,6 +2758,16 @@ void cpu_dump_state(CPUXtensaState *env, FILE *f, fprintf_function cpu_fprintf,
         cpu_fprintf(f, "AR%02d=%08x%c", i, env->phys_regs[i],
                 (i % 4) == 3 ? '\n' : ' ');
     }
+
+    if (xtensa_option_enabled(env->config, XTENSA_OPTION_FP_COPROCESSOR)) {
+        cpu_fprintf(f, "\n");
+
+        for (i = 0; i < 16; ++i) {
+            cpu_fprintf(f, "F%02d=%08x (%+10.8e)%c", i,
+                    float32_val(env->fregs[i]),
+                    *(float *)&env->fregs[i], (i % 2) == 1 ? '\n' : ' ');
+        }
+    }
 }
 
 void restore_state_to_opc(CPUXtensaState *env, TranslationBlock *tb, int pc_pos)
commit b81fe822dad134871cd336e26ed55d165e597f4e
Author: Max Filippov <jcmvbkbc at gmail.com>
Date:   Wed Sep 19 04:23:53 2012 +0400

    target-xtensa: specialize softfloat NaN rules
    
    NaN propagation rule: leftmost NaN in the expression gets propagated to
    the result.
    
    Signed-off-by: Max Filippov <jcmvbkbc at gmail.com>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/fpu/softfloat-specialize.h b/fpu/softfloat-specialize.h
index 50b54b8..a1d489e 100644
--- a/fpu/softfloat-specialize.h
+++ b/fpu/softfloat-specialize.h
@@ -64,7 +64,8 @@ const float16 float16_default_nan = const_float16(0xFE00);
 *----------------------------------------------------------------------------*/
 #if defined(TARGET_SPARC)
 const float32 float32_default_nan = const_float32(0x7FFFFFFF);
-#elif defined(TARGET_PPC) || defined(TARGET_ARM) || defined(TARGET_ALPHA)
+#elif defined(TARGET_PPC) || defined(TARGET_ARM) || defined(TARGET_ALPHA) || \
+      defined(TARGET_XTENSA)
 const float32 float32_default_nan = const_float32(0x7FC00000);
 #elif SNAN_BIT_IS_ONE
 const float32 float32_default_nan = const_float32(0x7FBFFFFF);
@@ -403,7 +404,7 @@ static int pickNaN(flag aIsQNaN, flag aIsSNaN, flag bIsQNaN, flag bIsSNaN,
         return 1;
     }
 }
-#elif defined(TARGET_PPC)
+#elif defined(TARGET_PPC) || defined(TARGET_XTENSA)
 static int pickNaN(flag aIsQNaN, flag aIsSNaN, flag bIsQNaN, flag bIsSNaN,
                    flag aIsLargerSignificand)
 {
commit 10f6ca034089e201aaff1da5ed99b127811081d5
Author: Max Filippov <jcmvbkbc at gmail.com>
Date:   Wed Sep 19 04:23:52 2012 +0400

    target-xtensa: handle boolean option in overlays
    
    Signed-off-by: Max Filippov <jcmvbkbc at gmail.com>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/target-xtensa/overlay_tool.h b/target-xtensa/overlay_tool.h
index a3a5650..e395053 100644
--- a/target-xtensa/overlay_tool.h
+++ b/target-xtensa/overlay_tool.h
@@ -58,6 +58,7 @@
     XCHAL_OPTION(XCHAL_HAVE_SEXT, XTENSA_OPTION_MISC_OP_SEXT) | \
     XCHAL_OPTION(XCHAL_HAVE_CLAMPS, XTENSA_OPTION_MISC_OP_CLAMPS) | \
     XCHAL_OPTION(XCHAL_HAVE_CP, XTENSA_OPTION_COPROCESSOR) | \
+    XCHAL_OPTION(XCHAL_HAVE_BOOLEANS, XTENSA_OPTION_BOOLEAN) | \
     XCHAL_OPTION(XCHAL_HAVE_FP, XTENSA_OPTION_FP_COPROCESSOR) | \
     XCHAL_OPTION(XCHAL_HAVE_RELEASE_SYNC, XTENSA_OPTION_MP_SYNCHRO) | \
     XCHAL_OPTION(XCHAL_HAVE_S32C1I, XTENSA_OPTION_CONDITIONAL_STORE) | \
commit 213ff4e6df0f74e93995a5d3d7429acb4ad3ee86
Author: Max Filippov <jcmvbkbc at gmail.com>
Date:   Wed Sep 19 04:23:51 2012 +0400

    softfloat: add NO_SIGNALING_NANS
    
    Architectures that don't have signaling NaNs can define
    NO_SIGNALING_NANS, it will make float*_is_quiet_nan return 1 for any NaN
    and float*_is_signaling_nan always return 0.
    
    Signed-off-by: Max Filippov <jcmvbkbc at gmail.com>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/fpu/softfloat-specialize.h b/fpu/softfloat-specialize.h
index 4902450..50b54b8 100644
--- a/fpu/softfloat-specialize.h
+++ b/fpu/softfloat-specialize.h
@@ -41,6 +41,13 @@ these four paragraphs for those parts of this code that are retained.
 #define SNAN_BIT_IS_ONE		0
 #endif
 
+#if defined(TARGET_XTENSA)
+/* Define for architectures which deviate from IEEE in not supporting
+ * signaling NaNs (so all NaNs are treated as quiet).
+ */
+#define NO_SIGNALING_NANS 1
+#endif
+
 /*----------------------------------------------------------------------------
 | The pattern for a default generated half-precision NaN.
 *----------------------------------------------------------------------------*/
@@ -127,6 +134,17 @@ typedef struct {
     uint64_t high, low;
 } commonNaNT;
 
+#ifdef NO_SIGNALING_NANS
+int float16_is_quiet_nan(float16 a_)
+{
+    return float16_is_any_nan(a_);
+}
+
+int float16_is_signaling_nan(float16 a_)
+{
+    return 0;
+}
+#else
 /*----------------------------------------------------------------------------
 | Returns 1 if the half-precision floating-point value `a' is a quiet
 | NaN; otherwise returns 0.
@@ -156,6 +174,7 @@ int float16_is_signaling_nan(float16 a_)
     return (((a >> 9) & 0x3F) == 0x3E) && (a & 0x1FF);
 #endif
 }
+#endif
 
 /*----------------------------------------------------------------------------
 | Returns a quiet NaN if the half-precision floating point value `a' is a
@@ -217,6 +236,17 @@ static float16 commonNaNToFloat16(commonNaNT a STATUS_PARAM)
     }
 }
 
+#ifdef NO_SIGNALING_NANS
+int float32_is_quiet_nan(float32 a_)
+{
+    return float32_is_any_nan(a_);
+}
+
+int float32_is_signaling_nan(float32 a_)
+{
+    return 0;
+}
+#else
 /*----------------------------------------------------------------------------
 | Returns 1 if the single-precision floating-point value `a' is a quiet
 | NaN; otherwise returns 0.
@@ -246,6 +276,7 @@ int float32_is_signaling_nan( float32 a_ )
     return ( ( ( a>>22 ) & 0x1FF ) == 0x1FE ) && ( a & 0x003FFFFF );
 #endif
 }
+#endif
 
 /*----------------------------------------------------------------------------
 | Returns a quiet NaN if the single-precision floating point value `a' is a
@@ -586,6 +617,17 @@ static float32 propagateFloat32MulAddNaN(float32 a, float32 b,
     }
 }
 
+#ifdef NO_SIGNALING_NANS
+int float64_is_quiet_nan(float64 a_)
+{
+    return float64_is_any_nan(a_);
+}
+
+int float64_is_signaling_nan(float64 a_)
+{
+    return 0;
+}
+#else
 /*----------------------------------------------------------------------------
 | Returns 1 if the double-precision floating-point value `a' is a quiet
 | NaN; otherwise returns 0.
@@ -619,6 +661,7 @@ int float64_is_signaling_nan( float64 a_ )
         && ( a & LIT64( 0x0007FFFFFFFFFFFF ) );
 #endif
 }
+#endif
 
 /*----------------------------------------------------------------------------
 | Returns a quiet NaN if the double-precision floating point value `a' is a
@@ -773,6 +816,17 @@ static float64 propagateFloat64MulAddNaN(float64 a, float64 b,
     }
 }
 
+#ifdef NO_SIGNALING_NANS
+int floatx80_is_quiet_nan(floatx80 a_)
+{
+    return floatx80_is_any_nan(a_);
+}
+
+int floatx80_is_signaling_nan(floatx80 a_)
+{
+    return 0;
+}
+#else
 /*----------------------------------------------------------------------------
 | Returns 1 if the extended double-precision floating-point value `a' is a
 | quiet NaN; otherwise returns 0. This slightly differs from the same
@@ -816,6 +870,7 @@ int floatx80_is_signaling_nan( floatx80 a )
         && ( a.low == aLow );
 #endif
 }
+#endif
 
 /*----------------------------------------------------------------------------
 | Returns a quiet NaN if the extended double-precision floating point value
@@ -929,6 +984,17 @@ static floatx80 propagateFloatx80NaN( floatx80 a, floatx80 b STATUS_PARAM)
     }
 }
 
+#ifdef NO_SIGNALING_NANS
+int float128_is_quiet_nan(float128 a_)
+{
+    return float128_is_any_nan(a_);
+}
+
+int float128_is_signaling_nan(float128 a_)
+{
+    return 0;
+}
+#else
 /*----------------------------------------------------------------------------
 | Returns 1 if the quadruple-precision floating-point value `a' is a quiet
 | NaN; otherwise returns 0.
@@ -964,6 +1030,7 @@ int float128_is_signaling_nan( float128 a )
         && ( a.low || ( a.high & LIT64( 0x00007FFFFFFFFFFF ) ) );
 #endif
 }
+#endif
 
 /*----------------------------------------------------------------------------
 | Returns a quiet NaN if the quadruple-precision floating point value `a' is
diff --git a/fpu/softfloat.h b/fpu/softfloat.h
index 2860ca0..d8999b3 100644
--- a/fpu/softfloat.h
+++ b/fpu/softfloat.h
@@ -251,6 +251,11 @@ int float16_is_quiet_nan( float16 );
 int float16_is_signaling_nan( float16 );
 float16 float16_maybe_silence_nan( float16 );
 
+INLINE int float16_is_any_nan(float16 a)
+{
+    return ((float16_val(a) & ~0x8000) > 0x7c00);
+}
+
 /*----------------------------------------------------------------------------
 | The pattern for a default generated half-precision NaN.
 *----------------------------------------------------------------------------*/
commit 66176802960ad1f7c3d061d17b784088c2e28d59
Author: Max Filippov <jcmvbkbc at gmail.com>
Date:   Wed Sep 19 04:23:50 2012 +0400

    softfloat: make float_muladd_negate_* flags independent
    
    Flags passed into float{32,64}_muladd are treated as bits; assign
    independent bits to float_muladd_negate_* to allow precise control over
    what gets negated in float{32,64}_muladd.
    
    Signed-off-by: Max Filippov <jcmvbkbc at gmail.com>
    Reviewed-by: Aurelien Jarno <aurelien at aurel32.net>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/fpu/softfloat.h b/fpu/softfloat.h
index feec3a1..2860ca0 100644
--- a/fpu/softfloat.h
+++ b/fpu/softfloat.h
@@ -219,7 +219,7 @@ void float_raise( int8 flags STATUS_PARAM);
 enum {
     float_muladd_negate_c = 1,
     float_muladd_negate_product = 2,
-    float_muladd_negate_result = 3,
+    float_muladd_negate_result = 4,
 };
 
 /*----------------------------------------------------------------------------
commit e809c0dc70522cba07dc2f16f677d1adf01523fc
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Sat Sep 22 19:24:49 2012 +0200

    Revert "tcg/mips"
    
    This reverts commit ad49d1f75115663731bfe06dec61eed6775526ad.
    
    This commit was not supposed to be pushed.
    
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c
index dc80abd..f70910a 100644
--- a/tcg/mips/tcg-target.c
+++ b/tcg/mips/tcg-target.c
@@ -1681,8 +1681,8 @@ static void tcg_target_qemu_prologue(TCGContext *s)
                  + CPU_TEMP_BUF_NLONGS * sizeof(long);
     frame_size = (frame_size + TCG_TARGET_STACK_ALIGN - 1) &
                  ~(TCG_TARGET_STACK_ALIGN - 1);
-    tcg_set_frame(s, TCG_REG_SP, frame_size
-                  - CPU_TEMP_BUF_NLONGS * sizeof(long),
+    tcg_set_frame(s, TCG_REG_SP, ARRAY_SIZE(tcg_target_callee_save_regs) * 4
+                  + TCG_STATIC_CALL_ARGS_SIZE,
                   CPU_TEMP_BUF_NLONGS * sizeof(long));
 
     /* TB prologue */
commit 23f3ff2604679a0ba7a23be7576529169e5e7921
Author: malc <av1474 at comtv.ru>
Date:   Sat Sep 22 19:14:33 2012 +0400

    tcg/ppc32: Implement movcond32
    
    Thanks to Richard Henderson
    
    Signed-off-by: malc <av1474 at comtv.ru>

diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c
index ec02867..90c275d 100644
--- a/tcg/ppc/tcg-target.c
+++ b/tcg/ppc/tcg-target.c
@@ -384,6 +384,7 @@ static int tcg_target_const_match(tcg_target_long val,
 #define ORC    XO31(412)
 #define EQV    XO31(284)
 #define NAND   XO31(476)
+#define ISEL   XO31( 15)
 
 #define LBZX   XO31( 87)
 #define LHZX   XO31(279)
@@ -1263,6 +1264,72 @@ static void tcg_out_setcond2 (TCGContext *s, const TCGArg *args,
         );
 }
 
+static void tcg_out_movcond (TCGContext *s, TCGCond cond,
+                             TCGArg dest,
+                             TCGArg c1, TCGArg c2,
+                             TCGArg v1, TCGArg v2,
+                             int const_c2)
+{
+    tcg_out_cmp (s, cond, c1, c2, const_c2, 7);
+
+    if (1) {
+        /* At least here on 7747A bit twiddling hacks are outperformed
+           by jumpy code (the testing was not scientific) */
+        if (dest == v2) {
+            cond = tcg_invert_cond (cond);
+            v2 = v1;
+        }
+        else {
+            if (dest != v1) {
+                tcg_out_mov (s, TCG_TYPE_I32, dest, v1);
+            }
+        }
+        /* Branch forward over one insn */
+        tcg_out32 (s, tcg_to_bc[cond] | 8);
+        tcg_out_mov (s, TCG_TYPE_I32, dest, v2);
+    }
+    else {
+        /* isel version, "if (1)" above should be replaced once a way
+           to figure out availability of isel on the underlying
+           hardware is found */
+        int tab, bc;
+
+        switch (cond) {
+        case TCG_COND_EQ:
+            tab = TAB (dest, v1, v2);
+            bc = CR_EQ;
+            break;
+        case TCG_COND_NE:
+            tab = TAB (dest, v2, v1);
+            bc = CR_EQ;
+            break;
+        case TCG_COND_LTU:
+        case TCG_COND_LT:
+            tab = TAB (dest, v1, v2);
+            bc = CR_LT;
+            break;
+        case TCG_COND_GEU:
+        case TCG_COND_GE:
+            tab = TAB (dest, v2, v1);
+            bc = CR_LT;
+            break;
+        case TCG_COND_LEU:
+        case TCG_COND_LE:
+            tab = TAB (dest, v2, v1);
+            bc = CR_GT;
+            break;
+        case TCG_COND_GTU:
+        case TCG_COND_GT:
+            tab = TAB (dest, v1, v2);
+            bc = CR_GT;
+            break;
+        default:
+            tcg_abort ();
+        }
+        tcg_out32 (s, ISEL | tab | ((bc + 28) << 6));
+    }
+}
+
 static void tcg_out_brcond (TCGContext *s, TCGCond cond,
                             TCGArg arg1, TCGArg arg2, int const_arg2,
                             int label_index)
@@ -1820,6 +1887,13 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
             );
         break;
 
+    case INDEX_op_movcond_i32:
+        tcg_out_movcond (s, args[5], args[0],
+                         args[1], args[2],
+                         args[3], args[4],
+                         const_args[2]);
+        break;
+
     default:
         tcg_dump_ops (s);
         tcg_abort ();
@@ -1916,6 +1990,7 @@ static const TCGTargetOpDef ppc_op_defs[] = {
     { INDEX_op_ext16u_i32, { "r", "r" } },
 
     { INDEX_op_deposit_i32, { "r", "0", "r" } },
+    { INDEX_op_movcond_i32, { "r", "r", "ri", "r", "r" } },
 
     { -1 },
 };
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
index 177eea1..3259d89 100644
--- a/tcg/ppc/tcg-target.h
+++ b/tcg/ppc/tcg-target.h
@@ -92,7 +92,7 @@ typedef enum {
 #define TCG_TARGET_HAS_nand_i32         1
 #define TCG_TARGET_HAS_nor_i32          1
 #define TCG_TARGET_HAS_deposit_i32      1
-#define TCG_TARGET_HAS_movcond_i32      0
+#define TCG_TARGET_HAS_movcond_i32      1
 
 #define TCG_AREG0 TCG_REG_R27
 
commit ad49d1f75115663731bfe06dec61eed6775526ad
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Sat Sep 22 17:07:23 2012 +0200

    tcg/mips

diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c
index f70910a..dc80abd 100644
--- a/tcg/mips/tcg-target.c
+++ b/tcg/mips/tcg-target.c
@@ -1681,8 +1681,8 @@ static void tcg_target_qemu_prologue(TCGContext *s)
                  + CPU_TEMP_BUF_NLONGS * sizeof(long);
     frame_size = (frame_size + TCG_TARGET_STACK_ALIGN - 1) &
                  ~(TCG_TARGET_STACK_ALIGN - 1);
-    tcg_set_frame(s, TCG_REG_SP, ARRAY_SIZE(tcg_target_callee_save_regs) * 4
-                  + TCG_STATIC_CALL_ARGS_SIZE,
+    tcg_set_frame(s, TCG_REG_SP, frame_size
+                  - CPU_TEMP_BUF_NLONGS * sizeof(long),
                   CPU_TEMP_BUF_NLONGS * sizeof(long));
 
     /* TB prologue */
commit 6e17d0c5cdf7b3d0086708ba1d2df931e18cb5b5
Author: Stefan Weil <sw at weilnetz.de>
Date:   Thu Sep 13 19:37:46 2012 +0200

    tcg: Remove tcg_target_get_call_iarg_regs_count
    
    The TCG targets no longer need individual implementations.
    
    Since commit 6a18ae2d2947532d5c26439548afa0481c4529f9,
    'flags' is no longer used in tcg_target_get_call_iarg_regs_count.
    
    The remaining tcg_target_get_call_iarg_regs_count is trivial and only
    called once. Therefore the patch eliminates it completely.
    
    Signed-off-by: Stefan Weil <sw at weilnetz.de>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c
index aed3b53..2bad0a2 100644
--- a/tcg/arm/tcg-target.c
+++ b/tcg/arm/tcg-target.c
@@ -145,12 +145,6 @@ static void patch_reloc(uint8_t *code_ptr, int type,
     }
 }
 
-/* maximum number of register used for input function arguments */
-static inline int tcg_target_get_call_iarg_regs_count(int flags)
-{
-    return 4;
-}
-
 /* parse target specific constraints */
 static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
 {
diff --git a/tcg/hppa/tcg-target.c b/tcg/hppa/tcg-target.c
index 793b90d..2c79c10 100644
--- a/tcg/hppa/tcg-target.c
+++ b/tcg/hppa/tcg-target.c
@@ -175,12 +175,6 @@ static void patch_reloc(uint8_t *code_ptr, int type,
     *insn_ptr = insn;
 }
 
-/* maximum number of register used for input function arguments */
-static inline int tcg_target_get_call_iarg_regs_count(int flags)
-{
-    return 4;
-}
-
 /* parse target specific constraints */
 static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
 {
diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index 43c9719..122d636 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -124,12 +124,6 @@ static void patch_reloc(uint8_t *code_ptr, int type,
     }
 }
 
-/* maximum number of register used for input function arguments */
-static inline int tcg_target_get_call_iarg_regs_count(int flags)
-{
-    return ARRAY_SIZE(tcg_target_call_iarg_regs);
-}
-
 /* parse target specific constraints */
 static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
 {
diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
index 1745038..dc9c12c 100644
--- a/tcg/ia64/tcg-target.c
+++ b/tcg/ia64/tcg-target.c
@@ -176,12 +176,6 @@ static const int tcg_target_call_oarg_regs[] = {
     TCG_REG_R8
 };
 
-/* maximum number of register used for input function arguments */
-static inline int tcg_target_get_call_iarg_regs_count(int flags)
-{
-    return 8;
-}
-
 /*
  * opcode formation
  */
diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c
index c272b38..f70910a 100644
--- a/tcg/mips/tcg-target.c
+++ b/tcg/mips/tcg-target.c
@@ -185,12 +185,6 @@ static void patch_reloc(uint8_t *code_ptr, int type,
     }
 }
 
-/* maximum number of register used for input function arguments */
-static inline int tcg_target_get_call_iarg_regs_count(int flags)
-{
-    return 4;
-}
-
 /* parse target specific constraints */
 static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
 {
diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c
index 26c4b33..ec02867 100644
--- a/tcg/ppc/tcg-target.c
+++ b/tcg/ppc/tcg-target.c
@@ -221,12 +221,6 @@ static void patch_reloc(uint8_t *code_ptr, int type,
     }
 }
 
-/* maximum number of register used for input function arguments */
-static int tcg_target_get_call_iarg_regs_count(int flags)
-{
-    return ARRAY_SIZE (tcg_target_call_iarg_regs);
-}
-
 /* parse target specific constraints */
 static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
 {
diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
index 337cd41..19944bc 100644
--- a/tcg/ppc64/tcg-target.c
+++ b/tcg/ppc64/tcg-target.c
@@ -208,12 +208,6 @@ static void patch_reloc (uint8_t *code_ptr, int type,
     }
 }
 
-/* maximum number of register used for input function arguments */
-static int tcg_target_get_call_iarg_regs_count (int flags)
-{
-    return ARRAY_SIZE (tcg_target_call_iarg_regs);
-}
-
 /* parse target specific constraints */
 static int target_parse_constraint (TCGArgConstraint *ct, const char **pct_str)
 {
diff --git a/tcg/s390/tcg-target.c b/tcg/s390/tcg-target.c
index aac11d9..3b90605 100644
--- a/tcg/s390/tcg-target.c
+++ b/tcg/s390/tcg-target.c
@@ -356,11 +356,6 @@ static void patch_reloc(uint8_t *code_ptr, int type,
     }
 }
 
-static int tcg_target_get_call_iarg_regs_count(int flags)
-{
-    return sizeof(tcg_target_call_iarg_regs) / sizeof(int);
-}
-
 /* parse target specific constraints */
 static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
 {
diff --git a/tcg/sparc/tcg-target.c b/tcg/sparc/tcg-target.c
index baed3b4..014bbfd 100644
--- a/tcg/sparc/tcg-target.c
+++ b/tcg/sparc/tcg-target.c
@@ -133,12 +133,6 @@ static void patch_reloc(uint8_t *code_ptr, int type,
     }
 }
 
-/* maximum number of register used for input function arguments */
-static inline int tcg_target_get_call_iarg_regs_count(int flags)
-{
-    return 6;
-}
-
 /* parse target specific constraints */
 static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
 {
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 4afd035..6edf170 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -89,7 +89,6 @@ static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
                        tcg_target_long arg2);
 static int tcg_target_const_match(tcg_target_long val,
                                   const TCGArgConstraint *arg_ct);
-static int tcg_target_get_call_iarg_regs_count(int flags);
 
 TCGOpDef tcg_op_defs[] = {
 #define DEF(s, oargs, iargs, cargs, flags) { #s, oargs, iargs, cargs, iargs + oargs + cargs, flags },
@@ -1853,7 +1852,7 @@ static int tcg_reg_alloc_call(TCGContext *s, const TCGOpDef *def,
 
     flags = args[nb_oargs + nb_iargs];
 
-    nb_regs = tcg_target_get_call_iarg_regs_count(flags);
+    nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
     if (nb_regs > nb_params)
         nb_regs = nb_params;
 
diff --git a/tcg/tci/tcg-target.c b/tcg/tci/tcg-target.c
index 003244c..3f4a24b 100644
--- a/tcg/tci/tcg-target.c
+++ b/tcg/tci/tcg-target.c
@@ -863,12 +863,6 @@ static int tcg_target_const_match(tcg_target_long val,
     return arg_ct->ct & TCG_CT_CONST;
 }
 
-/* Maximum number of register used for input function arguments. */
-static int tcg_target_get_call_iarg_regs_count(int flags)
-{
-    return ARRAY_SIZE(tcg_target_call_iarg_regs);
-}
-
 static void tcg_target_init(TCGContext *s)
 {
 #if defined(CONFIG_DEBUG_TCG_INTERPRETER)
commit d73685e3c3936fd7851a32eff5443405cb5368be
Author: Stefan Weil <sw at weilnetz.de>
Date:   Thu Sep 13 19:37:45 2012 +0200

    tcg/i386: Remove unused registers from tcg_target_call_iarg_regs
    
    32 bit x86 hosts don't need registers for helper function arguments
    because they use the default stack based calling convention.
    
    Removing the registers allows simpler code for function
    tcg_target_get_call_iarg_regs_count.
    
    Signed-off-by: Stefan Weil <sw at weilnetz.de>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index a87c8ba..43c9719 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -75,9 +75,7 @@ static const int tcg_target_call_iarg_regs[] = {
     TCG_REG_R8,
     TCG_REG_R9,
 #else
-    TCG_REG_EAX,
-    TCG_REG_EDX,
-    TCG_REG_ECX
+    /* 32 bit mode uses stack based calling convention (GCC default). */
 #endif
 };
 
@@ -129,11 +127,7 @@ static void patch_reloc(uint8_t *code_ptr, int type,
 /* maximum number of register used for input function arguments */
 static inline int tcg_target_get_call_iarg_regs_count(int flags)
 {
-    if (TCG_TARGET_REG_BITS == 64) {
-        return ARRAY_SIZE(tcg_target_call_iarg_regs);
-    }
-
-    return 0;
+    return ARRAY_SIZE(tcg_target_call_iarg_regs);
 }
 
 /* parse target specific constraints */
commit b18212c66870f119111e0777fcc19462ae07eb70
Author: Stefan Weil <sw at weilnetz.de>
Date:   Thu Sep 13 19:37:44 2012 +0200

    tcg/i386: Add shortcuts for registers used in L constraint
    
    While 64 bit hosts use the first three registers which are also used
    as function input parameters, 32 bit hosts use TCG_REG_EAX and
    TCG_REG_EDX which are not used in parameter passing.
    
    After defining new register macros for the registers used in L
    constraint, the patch replaces most occurrences of
    tcg_target_call_iarg_regs[0], tcg_target_call_iarg_regs[1] and
    tcg_target_call_iarg_regs[2] by those new macros.
    
    tcg_target_call_iarg_regs remains unchanged when it is used for input
    arguments (only with 64 bit hosts) before tcg_out_calli.
    
    A comment related to those registers was fixed, too.
    
    Signed-off-by: Stefan Weil <sw at weilnetz.de>
    [aurel32: build fix on i386, small optimization for i386 in the prologue]
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index 85c6b81..a87c8ba 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -88,6 +88,18 @@ static const int tcg_target_call_oarg_regs[] = {
 #endif
 };
 
+/* Registers used with L constraint, which are the first argument 
+   registers on x86_64, and two random call clobbered registers on
+   i386. */
+#if TCG_TARGET_REG_BITS == 64
+# define TCG_REG_L0 tcg_target_call_iarg_regs[0]
+# define TCG_REG_L1 tcg_target_call_iarg_regs[1]
+# define TCG_REG_L2 tcg_target_call_iarg_regs[2]
+#else
+# define TCG_REG_L0 TCG_REG_EAX
+# define TCG_REG_L1 TCG_REG_EDX
+#endif
+
 static uint8_t *tb_ret_addr;
 
 static void patch_reloc(uint8_t *code_ptr, int type,
@@ -179,16 +191,16 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
         /* qemu_ld/st address constraint */
     case 'L':
         ct->ct |= TCG_CT_REG;
-        if (TCG_TARGET_REG_BITS == 64) {
+#if TCG_TARGET_REG_BITS == 64
             tcg_regset_set32(ct->u.regs, 0, 0xffff);
-            tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[0]);
-            tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[1]);
-            tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[2]);
-        } else {
+            tcg_regset_reset_reg(ct->u.regs, TCG_REG_L0);
+            tcg_regset_reset_reg(ct->u.regs, TCG_REG_L1);
+            tcg_regset_reset_reg(ct->u.regs, TCG_REG_L2);
+#else
             tcg_regset_set32(ct->u.regs, 0, 0xff);
-            tcg_regset_reset_reg(ct->u.regs, TCG_REG_EAX);
-            tcg_regset_reset_reg(ct->u.regs, TCG_REG_EDX);
-        }
+            tcg_regset_reset_reg(ct->u.regs, TCG_REG_L0);
+            tcg_regset_reset_reg(ct->u.regs, TCG_REG_L1);
+#endif
         break;
 
     case 'e':
@@ -1029,8 +1041,8 @@ static inline void tcg_out_tlb_load(TCGContext *s, int addrlo_idx,
                                     uint8_t **label_ptr, int which)
 {
     const int addrlo = args[addrlo_idx];
-    const int r0 = tcg_target_call_iarg_regs[0];
-    const int r1 = tcg_target_call_iarg_regs[1];
+    const int r0 = TCG_REG_L0;
+    const int r1 = TCG_REG_L1;
     TCGType type = TCG_TYPE_I32;
     int rexw = 0;
 
@@ -1192,8 +1204,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
                      label_ptr, offsetof(CPUTLBEntry, addr_read));
 
     /* TLB Hit.  */
-    tcg_out_qemu_ld_direct(s, data_reg, data_reg2,
-                           tcg_target_call_iarg_regs[0], 0, opc);
+    tcg_out_qemu_ld_direct(s, data_reg, data_reg2, TCG_REG_L0, 0, opc);
 
     /* jmp label2 */
     tcg_out8(s, OPC_JMP_short);
@@ -1226,14 +1237,10 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
     tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[arg_idx],
                  mem_index);
     /* XXX/FIXME: suboptimal */
-    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[3],
-                tcg_target_call_iarg_regs[2]);
-    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
-                tcg_target_call_iarg_regs[1]);
-    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[1],
-                tcg_target_call_iarg_regs[0]);
-    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0],
-                TCG_AREG0);
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[3], TCG_REG_L2);
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2], TCG_REG_L1);
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[1], TCG_REG_L0);
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0], TCG_AREG0);
 #endif
 
     tcg_out_calli(s, (tcg_target_long)qemu_ld_helpers[s_bits]);
@@ -1299,11 +1306,9 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
                use the ADDR32 prefix.  For now, do nothing.  */
 
             if (offset != GUEST_BASE) {
-                tcg_out_movi(s, TCG_TYPE_I64,
-                             tcg_target_call_iarg_regs[0], GUEST_BASE);
-                tgen_arithr(s, ARITH_ADD + P_REXW,
-                            tcg_target_call_iarg_regs[0], base);
-                base = tcg_target_call_iarg_regs[0];
+                tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L0, GUEST_BASE);
+                tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L0, base);
+                base = TCG_REG_L0;
                 offset = 0;
             }
         }
@@ -1324,8 +1329,8 @@ static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi,
     /* ??? Ideally we wouldn't need a scratch register.  For user-only,
        we could perform the bswap twice to restore the original value
        instead of moving to the scratch.  But as it is, the L constraint
-       means that the second argument reg is definitely free here.  */
-    int scratch = tcg_target_call_iarg_regs[1];
+       means that TCG_REG_L1 is definitely free here.  */
+    const int scratch = TCG_REG_L1;
 
     switch (sizeop) {
     case 0:
@@ -1398,8 +1403,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
                      label_ptr, offsetof(CPUTLBEntry, addr_write));
 
     /* TLB Hit.  */
-    tcg_out_qemu_st_direct(s, data_reg, data_reg2,
-                           tcg_target_call_iarg_regs[0], 0, opc);
+    tcg_out_qemu_st_direct(s, data_reg, data_reg2, TCG_REG_L0, 0, opc);
 
     /* jmp label2 */
     tcg_out8(s, OPC_JMP_short);
@@ -1434,18 +1438,14 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
     stack_adjust += 4;
 #else
     tcg_out_mov(s, (opc == 3 ? TCG_TYPE_I64 : TCG_TYPE_I32),
-                tcg_target_call_iarg_regs[1], data_reg);
-    tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2], mem_index);
+                TCG_REG_L1, data_reg);
+    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_L2, mem_index);
     stack_adjust = 0;
     /* XXX/FIXME: suboptimal */
-    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[3],
-                tcg_target_call_iarg_regs[2]);
-    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
-                tcg_target_call_iarg_regs[1]);
-    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[1],
-                tcg_target_call_iarg_regs[0]);
-    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0],
-                TCG_AREG0);
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[3], TCG_REG_L2);
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2], TCG_REG_L1);
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[1], TCG_REG_L0);
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0], TCG_AREG0);
 #endif
 
     tcg_out_calli(s, (tcg_target_long)qemu_st_helpers[s_bits]);
@@ -1472,11 +1472,9 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
                use the ADDR32 prefix.  For now, do nothing.  */
 
             if (offset != GUEST_BASE) {
-                tcg_out_movi(s, TCG_TYPE_I64,
-                             tcg_target_call_iarg_regs[0], GUEST_BASE);
-                tgen_arithr(s, ARITH_ADD + P_REXW,
-                            tcg_target_call_iarg_regs[0], base);
-                base = tcg_target_call_iarg_regs[0];
+                tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L0, GUEST_BASE);
+                tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L0, base);
+                base = TCG_REG_L0;
                 offset = 0;
             }
         }
@@ -2061,15 +2059,17 @@ static void tcg_target_qemu_prologue(TCGContext *s)
 #if TCG_TARGET_REG_BITS == 32
     tcg_out_ld(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP,
                (ARRAY_SIZE(tcg_target_callee_save_regs) + 1) * 4);
-    tcg_out_ld(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[1], TCG_REG_ESP,
-               (ARRAY_SIZE(tcg_target_callee_save_regs) + 2) * 4);
+    tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
+    /* jmp *tb.  */
+    tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, TCG_REG_ESP,
+		         (ARRAY_SIZE(tcg_target_callee_save_regs) + 2) * 4
+			 + stack_addend);
 #else
     tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
-#endif
     tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
-
     /* jmp *tb.  */
     tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[1]);
+#endif
 
     /* TB epilogue */
     tb_ret_addr = s->code_ptr;
commit 1b7621ad99d352629c096cba40eadc6fb2b0c294
Author: Stefan Weil <sw at weilnetz.de>
Date:   Thu Sep 13 19:37:43 2012 +0200

    w64: Fix TCG helper functions with 5 arguments
    
    TCG uses 6 registers for function arguments on 64 bit Linux hosts,
    but only 4 registers on W64 hosts.
    
    Commit 2999a0b20074a7e4a58f56572bb1436749368f59 increased the number
    of arguments for some important helper functions from 4 to 5
    which triggered a bug for W64 hosts: QEMU aborts when executing
    helper_lcall_real in the guest's BIOS because function
    tcg_target_get_call_iarg_regs_count always returned 6.
    
    As W64 has only 4 registers for arguments, the 5th argument must be
    passed on the stack using a correct stack offset.
    
    Signed-off-by: Stefan Weil <sw at weilnetz.de>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index aa1fa9f..85c6b81 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -118,7 +118,7 @@ static void patch_reloc(uint8_t *code_ptr, int type,
 static inline int tcg_target_get_call_iarg_regs_count(int flags)
 {
     if (TCG_TARGET_REG_BITS == 64) {
-        return 6;
+        return ARRAY_SIZE(tcg_target_call_iarg_regs);
     }
 
     return 0;
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index b356d76..ace63ba 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -67,7 +67,11 @@ typedef enum {
 /* used for function call generation */
 #define TCG_REG_CALL_STACK TCG_REG_ESP 
 #define TCG_TARGET_STACK_ALIGN 16
+#if defined(_WIN64)
+#define TCG_TARGET_CALL_STACK_OFFSET 32
+#else
 #define TCG_TARGET_CALL_STACK_OFFSET 0
+#endif
 
 /* optional instructions */
 #define TCG_TARGET_HAS_div2_i32         1
commit 9bacf4143190ca57771c8db5981d040f9c625ae5
Author: Max Filippov <jcmvbkbc at gmail.com>
Date:   Fri Sep 21 04:18:07 2012 +0400

    tcg/README: document tcg_gen_goto_tb restrictions
    
    See
    http://lists.nongnu.org/archive/html/qemu-devel/2012-09/msg03196.html
    for the whole story.
    
    Signed-off-by: Max Filippov <jcmvbkbc at gmail.com>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/tcg/README b/tcg/README
index d03ae05..33783ee 100644
--- a/tcg/README
+++ b/tcg/README
@@ -392,7 +392,8 @@ Exit the current TB and return the value t0 (word type).
 
 Exit the current TB and jump to the TB index 'index' (constant) if the
 current TB was linked to this TB. Otherwise execute the next
-instructions.
+instructions. Only indices 0 and 1 are valid and tcg_gen_goto_tb may be issued
+at most once with each slot index per TB.
 
 * qemu_ld8u t0, t1, flags
 qemu_ld8s t0, t1, flags
commit f0da375754666511acefa89b57487a22a2b9f945
Author: Richard Henderson <rth at twiddle.net>
Date:   Fri Sep 21 18:46:32 2012 -0700

    tcg-hppa: Implement movcond
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/tcg/hppa/tcg-target.c b/tcg/hppa/tcg-target.c
index 5385d45..793b90d 100644
--- a/tcg/hppa/tcg-target.c
+++ b/tcg/hppa/tcg-target.c
@@ -912,6 +912,18 @@ static void tcg_out_setcond2(TCGContext *s, int cond, TCGArg ret,
     tcg_out_mov(s, TCG_TYPE_I32, ret, scratch);
 }
 
+static void tcg_out_movcond(TCGContext *s, int cond, TCGArg ret,
+                            TCGArg c1, TCGArg c2, int c2const,
+                            TCGArg v1, int v1const)
+{
+    tcg_out_comclr(s, tcg_invert_cond(cond), TCG_REG_R0, c1, c2, c2const);
+    if (v1const) {
+        tcg_out_movi(s, TCG_TYPE_I32, ret, v1);
+    } else {
+        tcg_out_mov(s, TCG_TYPE_I32, ret, v1);
+    }
+}
+
 #if defined(CONFIG_SOFTMMU)
 #include "../../softmmu_defs.h"
 
@@ -1520,6 +1532,11 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
                          args[3], const_args[3], args[4], const_args[4]);
         break;
 
+    case INDEX_op_movcond_i32:
+        tcg_out_movcond(s, args[5], args[0], args[1], args[2], const_args[2],
+                        args[3], const_args[3]);
+        break;
+
     case INDEX_op_add2_i32:
         tcg_out_add2(s, args[0], args[1], args[2], args[3],
                      args[4], args[5], const_args[4]);
@@ -1628,6 +1645,10 @@ static const TCGTargetOpDef hppa_op_defs[] = {
     { INDEX_op_setcond_i32, { "r", "rZ", "rI" } },
     { INDEX_op_setcond2_i32, { "r", "rZ", "rZ", "rI", "rI" } },
 
+    /* ??? We can actually support a signed 14-bit arg3, but we
+       only have existing constraints for a signed 11-bit.  */
+    { INDEX_op_movcond_i32, { "r", "rZ", "rI", "rI", "0" } },
+
     { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rI", "rZ" } },
     { INDEX_op_sub2_i32, { "r", "r", "rI", "rZ", "rK", "rZ" } },
 
diff --git a/tcg/hppa/tcg-target.h b/tcg/hppa/tcg-target.h
index 4defd28..5351353 100644
--- a/tcg/hppa/tcg-target.h
+++ b/tcg/hppa/tcg-target.h
@@ -96,7 +96,7 @@ typedef enum {
 #define TCG_TARGET_HAS_nand_i32         0
 #define TCG_TARGET_HAS_nor_i32          0
 #define TCG_TARGET_HAS_deposit_i32      1
-#define TCG_TARGET_HAS_movcond_i32      0
+#define TCG_TARGET_HAS_movcond_i32      1
 
 /* optional instructions automatically implemented */
 #define TCG_TARGET_HAS_neg_i32          0 /* sub rd, 0, rs */
commit 7ef55fc91926f518f905692db19ed0b4a8018989
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Fri Sep 21 11:07:29 2012 +0200

    tcg/optimize: add constant folding for deposit
    
    Reviewed-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/tcg/optimize.c b/tcg/optimize.c
index c8ae50b..35532a1 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -668,6 +668,26 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             }
             args += 3;
             break;
+        CASE_OP_32_64(deposit):
+            if (temps[args[1]].state == TCG_TEMP_CONST
+                && temps[args[2]].state == TCG_TEMP_CONST) {
+                gen_opc_buf[op_index] = op_to_movi(op);
+                tmp = ((1ull << args[4]) - 1);
+                tmp = (temps[args[1]].val & ~(tmp << args[3]))
+                      | ((temps[args[2]].val & tmp) << args[3]);
+                tcg_opt_gen_movi(gen_args, args[0], tmp);
+                gen_args += 2;
+            } else {
+                reset_temp(args[0]);
+                gen_args[0] = args[0];
+                gen_args[1] = args[1];
+                gen_args[2] = args[2];
+                gen_args[3] = args[3];
+                gen_args[4] = args[4];
+                gen_args += 5;
+            }
+            args += 5;
+            break;
         CASE_OP_32_64(setcond):
             tmp = do_constant_folding_cond(op, args[1], args[2], args[3]);
             if (tmp != 2) {
commit fba3161fd289f1282f5afce5f91c32e2575e5e8e
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Fri Sep 21 10:02:45 2012 +0200

    tcg: remove #ifdef #endif around TCGOpcode tests
    
    Commit 25c4d9cc changed all TCGOpcode enums to be available, so we don't
    need to #ifdef #endif the one that are available only on some targets.
    This makes the code easier to read.
    
    Reviewed-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/tcg/tcg.c b/tcg/tcg.c
index bb9c995..4afd035 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -937,11 +937,7 @@ void tcg_dump_ops(TCGContext *s)
                                                        args[nb_oargs + i]));
                 }
             }
-        } else if (c == INDEX_op_movi_i32 
-#if TCG_TARGET_REG_BITS == 64
-                   || c == INDEX_op_movi_i64
-#endif
-                   ) {
+        } else if (c == INDEX_op_movi_i32 || c == INDEX_op_movi_i64) {
             tcg_target_ulong val;
             TCGHelperInfo *th;
 
@@ -993,14 +989,11 @@ void tcg_dump_ops(TCGContext *s)
             case INDEX_op_brcond_i32:
             case INDEX_op_setcond_i32:
             case INDEX_op_movcond_i32:
-#if TCG_TARGET_REG_BITS == 32
             case INDEX_op_brcond2_i32:
             case INDEX_op_setcond2_i32:
-#else
             case INDEX_op_brcond_i64:
             case INDEX_op_setcond_i64:
             case INDEX_op_movcond_i64:
-#endif
                 if (args[k] < ARRAY_SIZE(cond_name) && cond_name[args[k]]) {
                     qemu_log(",%s", cond_name[args[k++]]);
                 } else {
@@ -2102,16 +2095,12 @@ static inline int tcg_gen_code_common(TCGContext *s, uint8_t *gen_code_buf,
 #endif
         switch(opc) {
         case INDEX_op_mov_i32:
-#if TCG_TARGET_REG_BITS == 64
         case INDEX_op_mov_i64:
-#endif
             dead_args = s->op_dead_args[op_index];
             tcg_reg_alloc_mov(s, def, args, dead_args);
             break;
         case INDEX_op_movi_i32:
-#if TCG_TARGET_REG_BITS == 64
         case INDEX_op_movi_i64:
-#endif
             tcg_reg_alloc_movi(s, args);
             break;
         case INDEX_op_debug_insn_start:
commit c2b0e2fea2ef7a183233d3b86c37c5d4bcb89544
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Wed Sep 19 22:00:22 2012 +0200

    tcg/optimize: prefer the "op a, a, b" form for commutative ops
    
    The "op a, a, b" form is better handled on non-RISC host than the "op
    a, b, a" form, so swap the arguments to this form when possible, and
    when b is not a constant.
    
    This reduces the number of generated instructions by a tiny bit.
    
    Reviewed-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/tcg/optimize.c b/tcg/optimize.c
index abe016a..c8ae50b 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -434,7 +434,10 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
         CASE_OP_32_64(eqv):
         CASE_OP_32_64(nand):
         CASE_OP_32_64(nor):
-            if (temps[args[1]].state == TCG_TEMP_CONST) {
+            /* Prefer the constant in second argument, and then the form
+               op a, a, b, which is better handled on non-RISC hosts. */
+            if (temps[args[1]].state == TCG_TEMP_CONST || (args[0] == args[2]
+                && temps[args[2]].state != TCG_TEMP_CONST)) {
                 tmp = args[1];
                 args[1] = args[2];
                 args[2] = tmp;
commit b336ceb6918b8f9eb54dcbb1043521482c7be83b
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Tue Sep 18 19:37:00 2012 +0200

    tcg/optimize: further optimize brcond/movcond/setcond
    
    When both argument of brcond/movcond/setcond are the same or when one
    of the two values is a constant equal to zero, it's possible to do
    further optimizations.
    
    Reviewed-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/tcg/optimize.c b/tcg/optimize.c
index ceea644..abe016a 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -292,58 +292,88 @@ static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y)
     return res;
 }
 
+/* Return 2 if the condition can't be simplified, and the result
+   of the condition (0 or 1) if it can */
 static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
                                        TCGArg y, TCGCond c)
 {
-    switch (op_bits(op)) {
-    case 32:
+    if (temps[x].state == TCG_TEMP_CONST && temps[y].state == TCG_TEMP_CONST) {
+        switch (op_bits(op)) {
+        case 32:
+            switch (c) {
+            case TCG_COND_EQ:
+                return (uint32_t)temps[x].val == (uint32_t)temps[y].val;
+            case TCG_COND_NE:
+                return (uint32_t)temps[x].val != (uint32_t)temps[y].val;
+            case TCG_COND_LT:
+                return (int32_t)temps[x].val < (int32_t)temps[y].val;
+            case TCG_COND_GE:
+                return (int32_t)temps[x].val >= (int32_t)temps[y].val;
+            case TCG_COND_LE:
+                return (int32_t)temps[x].val <= (int32_t)temps[y].val;
+            case TCG_COND_GT:
+                return (int32_t)temps[x].val > (int32_t)temps[y].val;
+            case TCG_COND_LTU:
+                return (uint32_t)temps[x].val < (uint32_t)temps[y].val;
+            case TCG_COND_GEU:
+                return (uint32_t)temps[x].val >= (uint32_t)temps[y].val;
+            case TCG_COND_LEU:
+                return (uint32_t)temps[x].val <= (uint32_t)temps[y].val;
+            case TCG_COND_GTU:
+                return (uint32_t)temps[x].val > (uint32_t)temps[y].val;
+            }
+            break;
+        case 64:
+            switch (c) {
+            case TCG_COND_EQ:
+                return (uint64_t)temps[x].val == (uint64_t)temps[y].val;
+            case TCG_COND_NE:
+                return (uint64_t)temps[x].val != (uint64_t)temps[y].val;
+            case TCG_COND_LT:
+                return (int64_t)temps[x].val < (int64_t)temps[y].val;
+            case TCG_COND_GE:
+                return (int64_t)temps[x].val >= (int64_t)temps[y].val;
+            case TCG_COND_LE:
+                return (int64_t)temps[x].val <= (int64_t)temps[y].val;
+            case TCG_COND_GT:
+                return (int64_t)temps[x].val > (int64_t)temps[y].val;
+            case TCG_COND_LTU:
+                return (uint64_t)temps[x].val < (uint64_t)temps[y].val;
+            case TCG_COND_GEU:
+                return (uint64_t)temps[x].val >= (uint64_t)temps[y].val;
+            case TCG_COND_LEU:
+                return (uint64_t)temps[x].val <= (uint64_t)temps[y].val;
+            case TCG_COND_GTU:
+                return (uint64_t)temps[x].val > (uint64_t)temps[y].val;
+            }
+            break;
+        }
+    } else if (temps_are_copies(x, y)) {
         switch (c) {
-        case TCG_COND_EQ:
-            return (uint32_t)x == (uint32_t)y;
-        case TCG_COND_NE:
-            return (uint32_t)x != (uint32_t)y;
-        case TCG_COND_LT:
-            return (int32_t)x < (int32_t)y;
-        case TCG_COND_GE:
-            return (int32_t)x >= (int32_t)y;
-        case TCG_COND_LE:
-            return (int32_t)x <= (int32_t)y;
         case TCG_COND_GT:
-            return (int32_t)x > (int32_t)y;
         case TCG_COND_LTU:
-            return (uint32_t)x < (uint32_t)y;
-        case TCG_COND_GEU:
-            return (uint32_t)x >= (uint32_t)y;
-        case TCG_COND_LEU:
-            return (uint32_t)x <= (uint32_t)y;
+        case TCG_COND_LT:
         case TCG_COND_GTU:
-            return (uint32_t)x > (uint32_t)y;
-        }
-        break;
-    case 64:
-        switch (c) {
-        case TCG_COND_EQ:
-            return (uint64_t)x == (uint64_t)y;
         case TCG_COND_NE:
-            return (uint64_t)x != (uint64_t)y;
-        case TCG_COND_LT:
-            return (int64_t)x < (int64_t)y;
+            return 0;
         case TCG_COND_GE:
-            return (int64_t)x >= (int64_t)y;
+        case TCG_COND_GEU:
         case TCG_COND_LE:
-            return (int64_t)x <= (int64_t)y;
-        case TCG_COND_GT:
-            return (int64_t)x > (int64_t)y;
+        case TCG_COND_LEU:
+        case TCG_COND_EQ:
+            return 1;
+        }
+    } else if (temps[y].state == TCG_TEMP_CONST && temps[y].val == 0) {
+        switch (c) {
         case TCG_COND_LTU:
-            return (uint64_t)x < (uint64_t)y;
+            return 0;
         case TCG_COND_GEU:
-            return (uint64_t)x >= (uint64_t)y;
-        case TCG_COND_LEU:
-            return (uint64_t)x <= (uint64_t)y;
-        case TCG_COND_GTU:
-            return (uint64_t)x > (uint64_t)y;
+            return 1;
+        default:
+            return 2;
         }
-        break;
+    } else {
+        return 2;
     }
 
     fprintf(stderr,
@@ -636,11 +666,9 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             args += 3;
             break;
         CASE_OP_32_64(setcond):
-            if (temps[args[1]].state == TCG_TEMP_CONST
-                && temps[args[2]].state == TCG_TEMP_CONST) {
+            tmp = do_constant_folding_cond(op, args[1], args[2], args[3]);
+            if (tmp != 2) {
                 gen_opc_buf[op_index] = op_to_movi(op);
-                tmp = do_constant_folding_cond(op, temps[args[1]].val,
-                                               temps[args[2]].val, args[3]);
                 tcg_opt_gen_movi(gen_args, args[0], tmp);
                 gen_args += 2;
             } else {
@@ -654,10 +682,9 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             args += 4;
             break;
         CASE_OP_32_64(brcond):
-            if (temps[args[0]].state == TCG_TEMP_CONST
-                && temps[args[1]].state == TCG_TEMP_CONST) {
-                if (do_constant_folding_cond(op, temps[args[0]].val,
-                                             temps[args[1]].val, args[2])) {
+            tmp = do_constant_folding_cond(op, args[0], args[1], args[2]);
+            if (tmp != 2) {
+                if (tmp) {
                     memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
                     gen_opc_buf[op_index] = INDEX_op_br;
                     gen_args[0] = args[3];
@@ -677,10 +704,8 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             args += 4;
             break;
         CASE_OP_32_64(movcond):
-            if (temps[args[1]].state == TCG_TEMP_CONST
-                && temps[args[2]].state == TCG_TEMP_CONST) {
-                tmp = do_constant_folding_cond(op, temps[args[1]].val,
-                                               temps[args[2]].val, args[5]);
+            tmp = do_constant_folding_cond(op, args[1], args[2], args[5]);
+            if (tmp != 2) {
                 if (temps_are_copies(args[0], args[4-tmp])) {
                     gen_opc_buf[op_index] = INDEX_op_nop;
                 } else if (temps[args[4-tmp]].state == TCG_TEMP_CONST) {
commit 3c94193e0bbdd855bcbedabd27e3cbe1e6bc6242
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Tue Sep 18 19:12:36 2012 +0200

    tcg/optimize: optimize "op r, a, a => movi r, 0"
    
    Now that it's possible to detect copies, we can optimize the case
    the "op r, a, a => movi r, 0". This helps in the computation of
    overflow flags when one of the two args is 0.
    
    Reviewed-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/tcg/optimize.c b/tcg/optimize.c
index b9a7da9..ceea644 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -540,6 +540,22 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             break;
         }
 
+        /* Simplify expression for "op r, a, a => movi r, 0" cases */
+        switch (op) {
+        CASE_OP_32_64(sub):
+        CASE_OP_32_64(xor):
+            if (temps_are_copies(args[1], args[2])) {
+                gen_opc_buf[op_index] = op_to_movi(op);
+                tcg_opt_gen_movi(gen_args, args[0], 0);
+                gen_args += 2;
+                args += 3;
+                continue;
+            }
+            break;
+        default:
+            break;
+        }
+
         /* Propagate constants through copy operations and do constant
            folding.  Constants will be substituted to arguments by register
            allocator where needed and possible.  Also detect copies. */
commit 0aba1c7376ad325202f328211ab6dfcae94c7f2a
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Tue Sep 18 19:11:32 2012 +0200

    tcg/optimize: optimize "op r, a, a => mov r, a"
    
    Now that we can easily detect all copies, we can optimize the
    "op r, a, a => mov r, a" case a bit more.
    
    Reviewed-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/tcg/optimize.c b/tcg/optimize.c
index aeb2225..b9a7da9 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -524,7 +524,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
         switch (op) {
         CASE_OP_32_64(or):
         CASE_OP_32_64(and):
-            if (args[1] == args[2]) {
+            if (temps_are_copies(args[1], args[2])) {
                 if (temps_are_copies(args[0], args[1])) {
                     gen_opc_buf[op_index] = INDEX_op_nop;
                 } else {
commit 1ff8c5418a680d6766493908eaa07cc11dce7f13
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Tue Sep 11 16:18:49 2012 +0200

    tcg/optimize: do copy propagation for all operations
    
    It is possible to due copy propagation for all operations, even the one
    that have side effects or clobber arguments (it only concerns input
    arguments). That said, the call operation should be handled differently
    due to the variable number of arguments.
    
    Reviewed-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/tcg/optimize.c b/tcg/optimize.c
index 1904b39..aeb2225 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -378,8 +378,15 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
         op = gen_opc_buf[op_index];
         def = &tcg_op_defs[op];
         /* Do copy propagation */
-        if (!(def->flags & (TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS))) {
-            assert(op != INDEX_op_call);
+        if (op == INDEX_op_call) {
+            int nb_oargs = args[0] >> 16;
+            int nb_iargs = args[0] & 0xffff;
+            for (i = nb_oargs + 1; i < nb_oargs + nb_iargs + 1; i++) {
+                if (temps[args[i]].state == TCG_TEMP_COPY) {
+                    args[i] = find_better_copy(s, args[i]);
+                }
+            }
+        } else {
             for (i = def->nb_oargs; i < def->nb_oargs + def->nb_iargs; i++) {
                 if (temps[args[i]].state == TCG_TEMP_COPY) {
                     args[i] = find_better_copy(s, args[i]);
commit e590d4e6b3c73b38a9d9ed10c898f73ed8a29f1d
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Tue Sep 11 12:31:21 2012 +0200

    tcg/optimize: rework copy progagation
    
    The copy propagation pass tries to keep track what is a copy of what
    and what has copy of what, and in addition it keep a circular list of
    of all the copies. Unfortunately this doesn't fully work: a mov from
    a temp which has a state "COPY" changed it into a state "HAS_COPY".
    Later when this temp is used again, it is considered has not having
    copy and thus no propagation is done.
    
    This patch fixes that by removing the hiearchy between copies, and thus
    only keeping a "COPY" state both meaning "is a copy" and "has a copy".
    The decision of which copy to use is deferred to the actual temp
    replacement. At this stage there is not one best choice to do, but only
    better choices than others. For doing the best choice the operation
    would have to be parsed in reversed to know if a temp is going to be
    used later or not. That what is done by the liveness analysis. At this
    stage it is known that globals will be always live, that local temps
    will be dead at the end of the translation block, and that the temps
    will be dead at the end of the basic block. This means that this stage
    should try to replace temps by local temps or globals and local temps
    by globals.
    
    Reviewed-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/tcg/optimize.c b/tcg/optimize.c
index da8dffe..1904b39 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -39,7 +39,6 @@ typedef enum {
     TCG_TEMP_UNDEF = 0,
     TCG_TEMP_CONST,
     TCG_TEMP_COPY,
-    TCG_TEMP_HAS_COPY
 } tcg_temp_state;
 
 struct tcg_temp_info {
@@ -51,39 +50,19 @@ struct tcg_temp_info {
 
 static struct tcg_temp_info temps[TCG_MAX_TEMPS];
 
-/* Reset TEMP's state to TCG_TEMP_UNDEF.  If TEMP was a representative of some
-   class of equivalent temp's, a new representative should be chosen in this
-   class. */
-static void reset_temp(TCGArg temp, int nb_temps, int nb_globals)
+/* Reset TEMP's state to TCG_TEMP_UNDEF.  If TEMP only had one copy, remove
+   the copy flag from the left temp.  */
+static void reset_temp(TCGArg temp)
 {
-    int i;
-    TCGArg new_base = (TCGArg)-1;
-    if (temps[temp].state == TCG_TEMP_HAS_COPY) {
-        for (i = temps[temp].next_copy; i != temp; i = temps[i].next_copy) {
-            if (i >= nb_globals) {
-                temps[i].state = TCG_TEMP_HAS_COPY;
-                new_base = i;
-                break;
-            }
-        }
-        for (i = temps[temp].next_copy; i != temp; i = temps[i].next_copy) {
-            if (new_base == (TCGArg)-1) {
-                temps[i].state = TCG_TEMP_UNDEF;
-            } else {
-                temps[i].val = new_base;
-            }
+    if (temps[temp].state == TCG_TEMP_COPY) {
+        if (temps[temp].prev_copy == temps[temp].next_copy) {
+            temps[temps[temp].next_copy].state = TCG_TEMP_UNDEF;
+        } else {
+            temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy;
+            temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy;
         }
-        temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy;
-        temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy;
-    } else if (temps[temp].state == TCG_TEMP_COPY) {
-        temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy;
-        temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy;
-        new_base = temps[temp].val;
     }
     temps[temp].state = TCG_TEMP_UNDEF;
-    if (new_base != (TCGArg)-1 && temps[new_base].next_copy == new_base) {
-        temps[new_base].state = TCG_TEMP_UNDEF;
-    }
 }
 
 static int op_bits(TCGOpcode op)
@@ -106,34 +85,83 @@ static TCGOpcode op_to_movi(TCGOpcode op)
     }
 }
 
+static TCGArg find_better_copy(TCGContext *s, TCGArg temp)
+{
+    TCGArg i;
+
+    /* If this is already a global, we can't do better. */
+    if (temp < s->nb_globals) {
+        return temp;
+    }
+
+    /* Search for a global first. */
+    for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) {
+        if (i < s->nb_globals) {
+            return i;
+        }
+    }
+
+    /* If it is a temp, search for a temp local. */
+    if (!s->temps[temp].temp_local) {
+        for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) {
+            if (s->temps[i].temp_local) {
+                return i;
+            }
+        }
+    }
+
+    /* Failure to find a better representation, return the same temp. */
+    return temp;
+}
+
+static bool temps_are_copies(TCGArg arg1, TCGArg arg2)
+{
+    TCGArg i;
+
+    if (arg1 == arg2) {
+        return true;
+    }
+
+    if (temps[arg1].state != TCG_TEMP_COPY
+        || temps[arg2].state != TCG_TEMP_COPY) {
+        return false;
+    }
+
+    for (i = temps[arg1].next_copy ; i != arg1 ; i = temps[i].next_copy) {
+        if (i == arg2) {
+            return true;
+        }
+    }
+
+    return false;
+}
+
 static void tcg_opt_gen_mov(TCGContext *s, TCGArg *gen_args,
                             TCGArg dst, TCGArg src)
 {
-        reset_temp(dst, s->nb_temps, s->nb_globals);
-        assert(temps[src].state != TCG_TEMP_COPY);
-        /* Only consider temps with the same type (width) as copies. */
-        if (src >= s->nb_globals && s->temps[dst].type == s->temps[src].type) {
-            assert(temps[src].state != TCG_TEMP_CONST);
-            if (temps[src].state != TCG_TEMP_HAS_COPY) {
-                temps[src].state = TCG_TEMP_HAS_COPY;
+        reset_temp(dst);
+        assert(temps[src].state != TCG_TEMP_CONST);
+
+        if (s->temps[src].type == s->temps[dst].type) {
+            if (temps[src].state != TCG_TEMP_COPY) {
+                temps[src].state = TCG_TEMP_COPY;
                 temps[src].next_copy = src;
                 temps[src].prev_copy = src;
             }
             temps[dst].state = TCG_TEMP_COPY;
-            temps[dst].val = src;
             temps[dst].next_copy = temps[src].next_copy;
             temps[dst].prev_copy = src;
             temps[temps[dst].next_copy].prev_copy = dst;
             temps[src].next_copy = dst;
         }
+
         gen_args[0] = dst;
         gen_args[1] = src;
 }
 
-static void tcg_opt_gen_movi(TCGArg *gen_args, TCGArg dst, TCGArg val,
-                             int nb_temps, int nb_globals)
+static void tcg_opt_gen_movi(TCGArg *gen_args, TCGArg dst, TCGArg val)
 {
-        reset_temp(dst, nb_temps, nb_globals);
+        reset_temp(dst);
         temps[dst].state = TCG_TEMP_CONST;
         temps[dst].val = val;
         gen_args[0] = dst;
@@ -324,7 +352,6 @@ static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
     tcg_abort();
 }
 
-
 /* Propagate constants and copies, fold constant expressions. */
 static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                                     TCGArg *args, TCGOpDef *tcg_op_defs)
@@ -338,10 +365,8 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
 
     /* Array VALS has an element for each temp.
        If this temp holds a constant then its value is kept in VALS' element.
-       If this temp is a copy of other ones then this equivalence class'
-       representative is kept in VALS' element.
-       If this temp is neither copy nor constant then corresponding VALS'
-       element is unused. */
+       If this temp is a copy of other ones then the other copies are
+       available through the doubly linked circular list. */
 
     nb_temps = s->nb_temps;
     nb_globals = s->nb_globals;
@@ -357,7 +382,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             assert(op != INDEX_op_call);
             for (i = def->nb_oargs; i < def->nb_oargs + def->nb_iargs; i++) {
                 if (temps[args[i]].state == TCG_TEMP_COPY) {
-                    args[i] = temps[args[i]].val;
+                    args[i] = find_better_copy(s, args[i]);
                 }
             }
         }
@@ -429,7 +454,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             if (temps[args[1]].state == TCG_TEMP_CONST
                 && temps[args[1]].val == 0) {
                 gen_opc_buf[op_index] = op_to_movi(op);
-                tcg_opt_gen_movi(gen_args, args[0], 0, nb_temps, nb_globals);
+                tcg_opt_gen_movi(gen_args, args[0], 0);
                 args += 3;
                 gen_args += 2;
                 continue;
@@ -456,9 +481,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             }
             if (temps[args[2]].state == TCG_TEMP_CONST
                 && temps[args[2]].val == 0) {
-                if ((temps[args[0]].state == TCG_TEMP_COPY
-                    && temps[args[0]].val == args[1])
-                    || args[0] == args[1]) {
+                if (temps_are_copies(args[0], args[1])) {
                     gen_opc_buf[op_index] = INDEX_op_nop;
                 } else {
                     gen_opc_buf[op_index] = op_to_mov(op);
@@ -480,7 +503,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             if ((temps[args[2]].state == TCG_TEMP_CONST
                 && temps[args[2]].val == 0)) {
                 gen_opc_buf[op_index] = op_to_movi(op);
-                tcg_opt_gen_movi(gen_args, args[0], 0, nb_temps, nb_globals);
+                tcg_opt_gen_movi(gen_args, args[0], 0);
                 args += 3;
                 gen_args += 2;
                 continue;
@@ -495,7 +518,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
         CASE_OP_32_64(or):
         CASE_OP_32_64(and):
             if (args[1] == args[2]) {
-                if (args[1] == args[0]) {
+                if (temps_are_copies(args[0], args[1])) {
                     gen_opc_buf[op_index] = INDEX_op_nop;
                 } else {
                     gen_opc_buf[op_index] = op_to_mov(op);
@@ -515,9 +538,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
            allocator where needed and possible.  Also detect copies. */
         switch (op) {
         CASE_OP_32_64(mov):
-            if ((temps[args[1]].state == TCG_TEMP_COPY
-                && temps[args[1]].val == args[0])
-                || args[0] == args[1]) {
+            if (temps_are_copies(args[0], args[1])) {
                 args += 2;
                 gen_opc_buf[op_index] = INDEX_op_nop;
                 break;
@@ -535,7 +556,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             args[1] = temps[args[1]].val;
             /* fallthrough */
         CASE_OP_32_64(movi):
-            tcg_opt_gen_movi(gen_args, args[0], args[1], nb_temps, nb_globals);
+            tcg_opt_gen_movi(gen_args, args[0], args[1]);
             gen_args += 2;
             args += 2;
             break;
@@ -550,9 +571,9 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             if (temps[args[1]].state == TCG_TEMP_CONST) {
                 gen_opc_buf[op_index] = op_to_movi(op);
                 tmp = do_constant_folding(op, temps[args[1]].val, 0);
-                tcg_opt_gen_movi(gen_args, args[0], tmp, nb_temps, nb_globals);
+                tcg_opt_gen_movi(gen_args, args[0], tmp);
             } else {
-                reset_temp(args[0], nb_temps, nb_globals);
+                reset_temp(args[0]);
                 gen_args[0] = args[0];
                 gen_args[1] = args[1];
             }
@@ -580,10 +601,10 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                 gen_opc_buf[op_index] = op_to_movi(op);
                 tmp = do_constant_folding(op, temps[args[1]].val,
                                           temps[args[2]].val);
-                tcg_opt_gen_movi(gen_args, args[0], tmp, nb_temps, nb_globals);
+                tcg_opt_gen_movi(gen_args, args[0], tmp);
                 gen_args += 2;
             } else {
-                reset_temp(args[0], nb_temps, nb_globals);
+                reset_temp(args[0]);
                 gen_args[0] = args[0];
                 gen_args[1] = args[1];
                 gen_args[2] = args[2];
@@ -597,10 +618,10 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                 gen_opc_buf[op_index] = op_to_movi(op);
                 tmp = do_constant_folding_cond(op, temps[args[1]].val,
                                                temps[args[2]].val, args[3]);
-                tcg_opt_gen_movi(gen_args, args[0], tmp, nb_temps, nb_globals);
+                tcg_opt_gen_movi(gen_args, args[0], tmp);
                 gen_args += 2;
             } else {
-                reset_temp(args[0], nb_temps, nb_globals);
+                reset_temp(args[0]);
                 gen_args[0] = args[0];
                 gen_args[1] = args[1];
                 gen_args[2] = args[2];
@@ -623,7 +644,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                 }
             } else {
                 memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
-                reset_temp(args[0], nb_temps, nb_globals);
+                reset_temp(args[0]);
                 gen_args[0] = args[0];
                 gen_args[1] = args[1];
                 gen_args[2] = args[2];
@@ -637,23 +658,19 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                 && temps[args[2]].state == TCG_TEMP_CONST) {
                 tmp = do_constant_folding_cond(op, temps[args[1]].val,
                                                temps[args[2]].val, args[5]);
-                if (args[0] == args[4-tmp]
-                    || (temps[args[4-tmp]].state == TCG_TEMP_COPY
-                        && temps[args[4-tmp]].val == args[0])) {
+                if (temps_are_copies(args[0], args[4-tmp])) {
                     gen_opc_buf[op_index] = INDEX_op_nop;
                 } else if (temps[args[4-tmp]].state == TCG_TEMP_CONST) {
                     gen_opc_buf[op_index] = op_to_movi(op);
-                    tcg_opt_gen_movi(gen_args, args[0], temps[args[4-tmp]].val,
-                                     nb_temps, nb_globals);
+                    tcg_opt_gen_movi(gen_args, args[0], temps[args[4-tmp]].val);
                     gen_args += 2;
                 } else {
                     gen_opc_buf[op_index] = op_to_mov(op);
-                    tcg_opt_gen_mov(gen_args, args[0], args[4-tmp],
-                                    nb_temps, nb_globals);
+                    tcg_opt_gen_mov(s, gen_args, args[0], args[4-tmp]);
                     gen_args += 2;
                 }
             } else {
-                reset_temp(args[0], nb_temps, nb_globals);
+                reset_temp(args[0]);
                 gen_args[0] = args[0];
                 gen_args[1] = args[1];
                 gen_args[2] = args[2];
@@ -668,11 +685,11 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             nb_call_args = (args[0] >> 16) + (args[0] & 0xffff);
             if (!(args[nb_call_args + 1] & (TCG_CALL_CONST | TCG_CALL_PURE))) {
                 for (i = 0; i < nb_globals; i++) {
-                    reset_temp(i, nb_temps, nb_globals);
+                    reset_temp(i);
                 }
             }
             for (i = 0; i < (args[0] >> 16); i++) {
-                reset_temp(args[i + 1], nb_temps, nb_globals);
+                reset_temp(args[i + 1]);
             }
             i = nb_call_args + 3;
             while (i) {
@@ -691,7 +708,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                 memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
             } else {
                 for (i = 0; i < def->nb_oargs; i++) {
-                    reset_temp(args[i], nb_temps, nb_globals);
+                    reset_temp(args[i]);
                 }
             }
             for (i = 0; i < def->nb_args; i++) {
commit b80bb016d8c8e9d74345a90ab6dac1cb547904e0
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Tue Sep 11 12:26:23 2012 +0200

    tcg/optimize: check types in copy propagation
    
    The copy propagation doesn't check the types of the temps during copy
    propagation. However TCG is using the mov_i32 for the i64 to i32
    conversion and thus the two are not equivalent.
    
    With this patch tcg_opt_gen_mov() doesn't consider two temps of
    different type as copies anymore.
    
    So far it seems the optimization was not aggressive enough to trigger
    this bug, but it will be triggered later in this series once the copy
    propagation is improved.
    
    Reviewed-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/tcg/optimize.c b/tcg/optimize.c
index 308b7f9..da8dffe 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -106,12 +106,13 @@ static TCGOpcode op_to_movi(TCGOpcode op)
     }
 }
 
-static void tcg_opt_gen_mov(TCGArg *gen_args, TCGArg dst, TCGArg src,
-                            int nb_temps, int nb_globals)
+static void tcg_opt_gen_mov(TCGContext *s, TCGArg *gen_args,
+                            TCGArg dst, TCGArg src)
 {
-        reset_temp(dst, nb_temps, nb_globals);
+        reset_temp(dst, s->nb_temps, s->nb_globals);
         assert(temps[src].state != TCG_TEMP_COPY);
-        if (src >= nb_globals) {
+        /* Only consider temps with the same type (width) as copies. */
+        if (src >= s->nb_globals && s->temps[dst].type == s->temps[src].type) {
             assert(temps[src].state != TCG_TEMP_CONST);
             if (temps[src].state != TCG_TEMP_HAS_COPY) {
                 temps[src].state = TCG_TEMP_HAS_COPY;
@@ -461,8 +462,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                     gen_opc_buf[op_index] = INDEX_op_nop;
                 } else {
                     gen_opc_buf[op_index] = op_to_mov(op);
-                    tcg_opt_gen_mov(gen_args, args[0], args[1],
-                                    nb_temps, nb_globals);
+                    tcg_opt_gen_mov(s, gen_args, args[0], args[1]);
                     gen_args += 2;
                 }
                 args += 3;
@@ -499,8 +499,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                     gen_opc_buf[op_index] = INDEX_op_nop;
                 } else {
                     gen_opc_buf[op_index] = op_to_mov(op);
-                    tcg_opt_gen_mov(gen_args, args[0], args[1], nb_temps,
-                                    nb_globals);
+                    tcg_opt_gen_mov(s, gen_args, args[0], args[1]);
                     gen_args += 2;
                 }
                 args += 3;
@@ -524,8 +523,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                 break;
             }
             if (temps[args[1]].state != TCG_TEMP_CONST) {
-                tcg_opt_gen_mov(gen_args, args[0], args[1],
-                                nb_temps, nb_globals);
+                tcg_opt_gen_mov(s, gen_args, args[0], args[1]);
                 gen_args += 2;
                 args += 2;
                 break;
commit 48b56ce1683dec02a29448f31861fca4dd0a0b33
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Mon Sep 10 23:51:42 2012 +0200

    tcg/optimize: remove TCG_TEMP_ANY
    
    TCG_TEMP_ANY has no different meaning than TCG_TEMP_UNDEF, so use
    the later instead.
    
    Reviewed-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/tcg/optimize.c b/tcg/optimize.c
index 1be7631..308b7f9 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -39,8 +39,7 @@ typedef enum {
     TCG_TEMP_UNDEF = 0,
     TCG_TEMP_CONST,
     TCG_TEMP_COPY,
-    TCG_TEMP_HAS_COPY,
-    TCG_TEMP_ANY
+    TCG_TEMP_HAS_COPY
 } tcg_temp_state;
 
 struct tcg_temp_info {
@@ -52,7 +51,7 @@ struct tcg_temp_info {
 
 static struct tcg_temp_info temps[TCG_MAX_TEMPS];
 
-/* Reset TEMP's state to TCG_TEMP_ANY.  If TEMP was a representative of some
+/* Reset TEMP's state to TCG_TEMP_UNDEF.  If TEMP was a representative of some
    class of equivalent temp's, a new representative should be chosen in this
    class. */
 static void reset_temp(TCGArg temp, int nb_temps, int nb_globals)
@@ -69,7 +68,7 @@ static void reset_temp(TCGArg temp, int nb_temps, int nb_globals)
         }
         for (i = temps[temp].next_copy; i != temp; i = temps[i].next_copy) {
             if (new_base == (TCGArg)-1) {
-                temps[i].state = TCG_TEMP_ANY;
+                temps[i].state = TCG_TEMP_UNDEF;
             } else {
                 temps[i].val = new_base;
             }
@@ -81,9 +80,9 @@ static void reset_temp(TCGArg temp, int nb_temps, int nb_globals)
         temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy;
         new_base = temps[temp].val;
     }
-    temps[temp].state = TCG_TEMP_ANY;
+    temps[temp].state = TCG_TEMP_UNDEF;
     if (new_base != (TCGArg)-1 && temps[new_base].next_copy == new_base) {
-        temps[new_base].state = TCG_TEMP_ANY;
+        temps[new_base].state = TCG_TEMP_UNDEF;
     }
 }
 
commit 7d7c4930ab83349ee31945d93a1747331c37c0eb
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Fri Sep 21 18:20:26 2012 +0200

    tcg/mips: implement movcond op on MIPS32R2
    
    movcond operation can be implemented on MIPS32 Release 2 using the MOVN,
    MOVZ, SLT and SLTU instructions.
    
    Reviewed-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c
index b2e1056..c272b38 100644
--- a/tcg/mips/tcg-target.c
+++ b/tcg/mips/tcg-target.c
@@ -308,6 +308,8 @@ enum {
     OPC_SRAV     = OPC_SPECIAL | 0x07,
     OPC_JR       = OPC_SPECIAL | 0x08,
     OPC_JALR     = OPC_SPECIAL | 0x09,
+    OPC_MOVZ     = OPC_SPECIAL | 0x0A,
+    OPC_MOVN     = OPC_SPECIAL | 0x0B,
     OPC_MFHI     = OPC_SPECIAL | 0x10,
     OPC_MFLO     = OPC_SPECIAL | 0x12,
     OPC_MULT     = OPC_SPECIAL | 0x18,
@@ -735,6 +737,68 @@ static void tcg_out_brcond2(TCGContext *s, TCGCond cond, TCGArg arg1,
     reloc_pc16(label_ptr, (tcg_target_long) s->code_ptr);
 }
 
+static void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGReg ret,
+                            TCGArg c1, TCGArg c2, TCGArg v)
+{
+    switch (cond) {
+    case TCG_COND_EQ:
+        if (c1 == 0) {
+            tcg_out_opc_reg(s, OPC_MOVZ, ret, v, c2);
+        } else if (c2 == 0) {
+            tcg_out_opc_reg(s, OPC_MOVZ, ret, v, c1);
+        } else {
+            tcg_out_opc_reg(s, OPC_XOR, TCG_REG_AT, c1, c2);
+            tcg_out_opc_reg(s, OPC_MOVZ, ret, v, TCG_REG_AT);
+        }
+        break;
+    case TCG_COND_NE:
+        if (c1 == 0) {
+            tcg_out_opc_reg(s, OPC_MOVN, ret, v, c2);
+        } else if (c2 == 0) {
+            tcg_out_opc_reg(s, OPC_MOVN, ret, v, c1);
+        } else {
+            tcg_out_opc_reg(s, OPC_XOR, TCG_REG_AT, c1, c2);
+            tcg_out_opc_reg(s, OPC_MOVN, ret, v, TCG_REG_AT);
+        }
+        break;
+    case TCG_COND_LT:
+        tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, c1, c2);
+        tcg_out_opc_reg(s, OPC_MOVN, ret, v, TCG_REG_AT);
+        break;
+    case TCG_COND_LTU:
+        tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_AT, c1, c2);
+        tcg_out_opc_reg(s, OPC_MOVN, ret, v, TCG_REG_AT);
+        break;
+    case TCG_COND_GE:
+        tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, c1, c2);
+        tcg_out_opc_reg(s, OPC_MOVZ, ret, v, TCG_REG_AT);
+        break;
+    case TCG_COND_GEU:
+        tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_AT, c1, c2);
+        tcg_out_opc_reg(s, OPC_MOVZ, ret, v, TCG_REG_AT);
+        break;
+    case TCG_COND_LE:
+        tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, c2, c1);
+        tcg_out_opc_reg(s, OPC_MOVZ, ret, v, TCG_REG_AT);
+        break;
+    case TCG_COND_LEU:
+        tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_AT, c2, c1);
+        tcg_out_opc_reg(s, OPC_MOVZ, ret, v, TCG_REG_AT);
+        break;
+    case TCG_COND_GT:
+        tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, c2, c1);
+        tcg_out_opc_reg(s, OPC_MOVN, ret, v, TCG_REG_AT);
+        break;
+    case TCG_COND_GTU:
+        tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_AT, c2, c1);
+        tcg_out_opc_reg(s, OPC_MOVN, ret, v, TCG_REG_AT);
+        break;
+    default:
+        tcg_abort();
+        break;
+    }
+}
+
 static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret,
                             TCGArg arg1, TCGArg arg2)
 {
@@ -1468,6 +1532,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         tcg_out_brcond2(s, args[4], args[0], args[1], args[2], args[3], args[5]);
         break;
 
+    case INDEX_op_movcond_i32:
+        tcg_out_movcond(s, args[5], args[0], args[1], args[2], args[3]);
+        break;
+
     case INDEX_op_setcond_i32:
         tcg_out_setcond(s, args[3], args[0], args[1], args[2]);
         break;
@@ -1559,6 +1627,7 @@ static const TCGTargetOpDef mips_op_defs[] = {
     { INDEX_op_deposit_i32, { "r", "0", "rZ" } },
 
     { INDEX_op_brcond_i32, { "rZ", "rZ" } },
+    { INDEX_op_movcond_i32, { "r", "rZ", "rZ", "rZ", "0" } },
     { INDEX_op_setcond_i32, { "r", "rZ", "rZ" } },
     { INDEX_op_setcond2_i32, { "r", "rZ", "rZ", "rZ", "rZ" } },
 
diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
index 897a737..d147e70 100644
--- a/tcg/mips/tcg-target.h
+++ b/tcg/mips/tcg-target.h
@@ -86,7 +86,15 @@ typedef enum {
 #define TCG_TARGET_HAS_orc_i32          0
 #define TCG_TARGET_HAS_eqv_i32          0
 #define TCG_TARGET_HAS_nand_i32         0
+
+/* optional instructions only implemented on MIPS4, MIPS32 and Loongson 2 */
+#if defined(_MIPS_ARCH_MIPS4) || defined(_MIPS_ARCH_MIPS32) || \
+    defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_LOONGSON2E) || \
+    defined(_MIPS_ARCH_LOONGSON2F)
+#define TCG_TARGET_HAS_movcond_i32      1
+#else
 #define TCG_TARGET_HAS_movcond_i32      0
+#endif
 
 /* optional instructions only implemented on MIPS32R2 */
 #ifdef _MIPS_ARCH_MIPS32R2
commit 04f71aa3fd002d8c3aeb6db72a9219e1a8b3fef4
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Fri Sep 21 18:20:26 2012 +0200

    tcg/mips: implement deposit op on MIPS32R2
    
    deposit operations can be optimized on MIPS32 Release 2 using the INS
    instruction.
    
    Reviewed-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c
index 592e42a..b2e1056 100644
--- a/tcg/mips/tcg-target.c
+++ b/tcg/mips/tcg-target.c
@@ -328,6 +328,7 @@ enum {
     OPC_BGEZ     = OPC_REGIMM | (0x01 << 16),
 
     OPC_SPECIAL3 = 0x1f << 26,
+    OPC_INS      = OPC_SPECIAL3 | 0x004,
     OPC_WSBH     = OPC_SPECIAL3 | 0x0a0,
     OPC_SEB      = OPC_SPECIAL3 | 0x420,
     OPC_SEH      = OPC_SPECIAL3 | 0x620,
@@ -1455,6 +1456,11 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         tcg_out_ext16s(s, args[0], args[1]);
         break;
 
+    case INDEX_op_deposit_i32:
+        tcg_out_opc_imm(s, OPC_INS, args[0], args[2],
+                        ((args[3] + args[4] - 1) << 11) | (args[3] << 6));
+        break;
+
     case INDEX_op_brcond_i32:
         tcg_out_brcond(s, args[2], args[0], args[1], args[3]);
         break;
@@ -1550,6 +1556,8 @@ static const TCGTargetOpDef mips_op_defs[] = {
     { INDEX_op_ext8s_i32, { "r", "rZ" } },
     { INDEX_op_ext16s_i32, { "r", "rZ" } },
 
+    { INDEX_op_deposit_i32, { "r", "0", "rZ" } },
+
     { INDEX_op_brcond_i32, { "rZ", "rZ" } },
     { INDEX_op_setcond_i32, { "r", "rZ", "rZ" } },
     { INDEX_op_setcond2_i32, { "r", "rZ", "rZ", "rZ", "rZ" } },
diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
index 470314c..897a737 100644
--- a/tcg/mips/tcg-target.h
+++ b/tcg/mips/tcg-target.h
@@ -86,7 +86,6 @@ typedef enum {
 #define TCG_TARGET_HAS_orc_i32          0
 #define TCG_TARGET_HAS_eqv_i32          0
 #define TCG_TARGET_HAS_nand_i32         0
-#define TCG_TARGET_HAS_deposit_i32      0
 #define TCG_TARGET_HAS_movcond_i32      0
 
 /* optional instructions only implemented on MIPS32R2 */
@@ -94,10 +93,12 @@ typedef enum {
 #define TCG_TARGET_HAS_bswap16_i32      1
 #define TCG_TARGET_HAS_bswap32_i32      1
 #define TCG_TARGET_HAS_rot_i32          1
+#define TCG_TARGET_HAS_deposit_i32      1
 #else
 #define TCG_TARGET_HAS_bswap16_i32      0
 #define TCG_TARGET_HAS_bswap32_i32      0
 #define TCG_TARGET_HAS_rot_i32          0
+#define TCG_TARGET_HAS_deposit_i32      0
 #endif
 
 /* optional instructions automatically implemented */
commit 9a152519a9f767297c92b5840c91a22235295c8d
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Fri Sep 21 18:20:26 2012 +0200

    tcg/mips: implement rotl/rotr ops on MIPS32R2
    
    rotr operations can be optimized on MIPS32 Release 2 using the ROTR and
    ROTRV instructions. Also implemented rotl operations by subtracting the
    shift from 32.
    
    Reviewed-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c
index 8b2f9fc..592e42a 100644
--- a/tcg/mips/tcg-target.c
+++ b/tcg/mips/tcg-target.c
@@ -300,9 +300,11 @@ enum {
     OPC_SPECIAL  = 0x00 << 26,
     OPC_SLL      = OPC_SPECIAL | 0x00,
     OPC_SRL      = OPC_SPECIAL | 0x02,
+    OPC_ROTR     = OPC_SPECIAL | (0x01 << 21) | 0x02,
     OPC_SRA      = OPC_SPECIAL | 0x03,
     OPC_SLLV     = OPC_SPECIAL | 0x04,
     OPC_SRLV     = OPC_SPECIAL | 0x06,
+    OPC_ROTRV    = OPC_SPECIAL | (0x01 <<  6) | 0x06,
     OPC_SRAV     = OPC_SPECIAL | 0x07,
     OPC_JR       = OPC_SPECIAL | 0x08,
     OPC_JALR     = OPC_SPECIAL | 0x09,
@@ -1420,6 +1422,22 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
             tcg_out_opc_reg(s, OPC_SRLV, args[0], args[2], args[1]);
         }
         break;
+    case INDEX_op_rotl_i32:
+        if (const_args[2]) {
+            tcg_out_opc_sa(s, OPC_ROTR, args[0], args[1], 0x20 - args[2]);
+        } else {
+            tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_AT, 32);
+            tcg_out_opc_reg(s, OPC_SUBU, TCG_REG_AT, TCG_REG_AT, args[2]);
+            tcg_out_opc_reg(s, OPC_ROTRV, args[0], TCG_REG_AT, args[1]);
+        }
+        break;
+    case INDEX_op_rotr_i32:
+        if (const_args[2]) {
+            tcg_out_opc_sa(s, OPC_ROTR, args[0], args[1], args[2]);
+        } else {
+            tcg_out_opc_reg(s, OPC_ROTRV, args[0], args[2], args[1]);
+        }
+        break;
 
     /* The bswap routines do not work on non-R2 CPU. In that case
        we let TCG generating the corresponding code. */
@@ -1523,6 +1541,8 @@ static const TCGTargetOpDef mips_op_defs[] = {
     { INDEX_op_shl_i32, { "r", "rZ", "ri" } },
     { INDEX_op_shr_i32, { "r", "rZ", "ri" } },
     { INDEX_op_sar_i32, { "r", "rZ", "ri" } },
+    { INDEX_op_rotr_i32, { "r", "rZ", "ri" } },
+    { INDEX_op_rotl_i32, { "r", "rZ", "ri" } },
 
     { INDEX_op_bswap16_i32, { "r", "r" } },
     { INDEX_op_bswap32_i32, { "r", "r" } },
diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
index c5c13f7..470314c 100644
--- a/tcg/mips/tcg-target.h
+++ b/tcg/mips/tcg-target.h
@@ -80,7 +80,6 @@ typedef enum {
 #define TCG_TARGET_HAS_div_i32          1
 #define TCG_TARGET_HAS_not_i32          1
 #define TCG_TARGET_HAS_nor_i32          1
-#define TCG_TARGET_HAS_rot_i32          0
 #define TCG_TARGET_HAS_ext8s_i32        1
 #define TCG_TARGET_HAS_ext16s_i32       1
 #define TCG_TARGET_HAS_andc_i32         0
@@ -94,9 +93,11 @@ typedef enum {
 #ifdef _MIPS_ARCH_MIPS32R2
 #define TCG_TARGET_HAS_bswap16_i32      1
 #define TCG_TARGET_HAS_bswap32_i32      1
+#define TCG_TARGET_HAS_rot_i32          1
 #else
 #define TCG_TARGET_HAS_bswap16_i32      0
 #define TCG_TARGET_HAS_bswap32_i32      0
+#define TCG_TARGET_HAS_rot_i32          0
 #endif
 
 /* optional instructions automatically implemented */
commit c1cf85c9acb847c4871c0b241ced8fabf410d502
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Fri Sep 21 18:20:26 2012 +0200

    tcg/mips: optimize bswap{16,16s,32} on MIPS32R2
    
    bswap operations can be optimized on MIPS32 Release 2 using the ROTR,
    WSBH and SEH instructions. We can't use the non-R2 code to implement the
    ops due to registers constraints, so don't define the corresponding
    TCG_TARGET_HAS_bswap* values.
    
    Also bswap16* operations are supposed to be called with the 16 high bits
    zeroed. This is the case everywhere (including for TCG by definition)
    except when called from the store helper. Remove the AND instructions from
    bswap16* and move it there.
    
    Reviewed-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c
index 6aa4527..8b2f9fc 100644
--- a/tcg/mips/tcg-target.c
+++ b/tcg/mips/tcg-target.c
@@ -326,6 +326,7 @@ enum {
     OPC_BGEZ     = OPC_REGIMM | (0x01 << 16),
 
     OPC_SPECIAL3 = 0x1f << 26,
+    OPC_WSBH     = OPC_SPECIAL3 | 0x0a0,
     OPC_SEB      = OPC_SPECIAL3 | 0x420,
     OPC_SEH      = OPC_SPECIAL3 | 0x620,
 };
@@ -419,36 +420,45 @@ static inline void tcg_out_movi(TCGContext *s, TCGType type,
 
 static inline void tcg_out_bswap16(TCGContext *s, TCGReg ret, TCGReg arg)
 {
+#ifdef _MIPS_ARCH_MIPS32R2
+    tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg);
+#else
     /* ret and arg can't be register at */
     if (ret == TCG_REG_AT || arg == TCG_REG_AT) {
         tcg_abort();
     }
 
     tcg_out_opc_sa(s, OPC_SRL, TCG_REG_AT, arg, 8);
-    tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_AT, TCG_REG_AT, 0x00ff);
-
     tcg_out_opc_sa(s, OPC_SLL, ret, arg, 8);
     tcg_out_opc_imm(s, OPC_ANDI, ret, ret, 0xff00);
     tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_REG_AT);
+#endif
 }
 
 static inline void tcg_out_bswap16s(TCGContext *s, TCGReg ret, TCGReg arg)
 {
+#ifdef _MIPS_ARCH_MIPS32R2
+    tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg);
+    tcg_out_opc_reg(s, OPC_SEH, ret, 0, ret);
+#else
     /* ret and arg can't be register at */
     if (ret == TCG_REG_AT || arg == TCG_REG_AT) {
         tcg_abort();
     }
 
     tcg_out_opc_sa(s, OPC_SRL, TCG_REG_AT, arg, 8);
-    tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_AT, TCG_REG_AT, 0xff);
-
     tcg_out_opc_sa(s, OPC_SLL, ret, arg, 24);
     tcg_out_opc_sa(s, OPC_SRA, ret, ret, 16);
     tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_REG_AT);
+#endif
 }
 
 static inline void tcg_out_bswap32(TCGContext *s, TCGReg ret, TCGReg arg)
 {
+#ifdef _MIPS_ARCH_MIPS32R2
+    tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg);
+    tcg_out_opc_sa(s, OPC_ROTR, ret, ret, 16);
+#else
     /* ret and arg must be different and can't be register at */
     if (ret == arg || ret == TCG_REG_AT || arg == TCG_REG_AT) {
         tcg_abort();
@@ -466,6 +476,7 @@ static inline void tcg_out_bswap32(TCGContext *s, TCGReg ret, TCGReg arg)
     tcg_out_opc_sa(s, OPC_SRL, TCG_REG_AT, arg, 8);
     tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_AT, TCG_REG_AT, 0xff00);
     tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_REG_AT);
+#endif
 }
 
 static inline void tcg_out_ext8s(TCGContext *s, TCGReg ret, TCGReg arg)
@@ -1188,7 +1199,8 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
         break;
     case 1:
         if (TCG_NEED_BSWAP) {
-            tcg_out_bswap16(s, TCG_REG_T0, data_reg1);
+            tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_T0, data_reg1, 0xffff);
+            tcg_out_bswap16(s, TCG_REG_T0, TCG_REG_T0);
             tcg_out_opc_imm(s, OPC_SH, TCG_REG_T0, TCG_REG_A0, 0);
         } else {
             tcg_out_opc_imm(s, OPC_SH, data_reg1, TCG_REG_A0, 0);
@@ -1409,6 +1421,15 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         }
         break;
 
+    /* The bswap routines do not work on non-R2 CPU. In that case
+       we let TCG generating the corresponding code. */
+    case INDEX_op_bswap16_i32:
+        tcg_out_bswap16(s, args[0], args[1]);
+        break;
+    case INDEX_op_bswap32_i32:
+        tcg_out_bswap32(s, args[0], args[1]);
+        break;
+
     case INDEX_op_ext8s_i32:
         tcg_out_ext8s(s, args[0], args[1]);
         break;
@@ -1503,6 +1524,9 @@ static const TCGTargetOpDef mips_op_defs[] = {
     { INDEX_op_shr_i32, { "r", "rZ", "ri" } },
     { INDEX_op_sar_i32, { "r", "rZ", "ri" } },
 
+    { INDEX_op_bswap16_i32, { "r", "r" } },
+    { INDEX_op_bswap32_i32, { "r", "r" } },
+
     { INDEX_op_ext8s_i32, { "r", "rZ" } },
     { INDEX_op_ext16s_i32, { "r", "rZ" } },
 
diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
index 9c68a32..c5c13f7 100644
--- a/tcg/mips/tcg-target.h
+++ b/tcg/mips/tcg-target.h
@@ -83,8 +83,6 @@ typedef enum {
 #define TCG_TARGET_HAS_rot_i32          0
 #define TCG_TARGET_HAS_ext8s_i32        1
 #define TCG_TARGET_HAS_ext16s_i32       1
-#define TCG_TARGET_HAS_bswap32_i32      0
-#define TCG_TARGET_HAS_bswap16_i32      0
 #define TCG_TARGET_HAS_andc_i32         0
 #define TCG_TARGET_HAS_orc_i32          0
 #define TCG_TARGET_HAS_eqv_i32          0
@@ -92,6 +90,15 @@ typedef enum {
 #define TCG_TARGET_HAS_deposit_i32      0
 #define TCG_TARGET_HAS_movcond_i32      0
 
+/* optional instructions only implemented on MIPS32R2 */
+#ifdef _MIPS_ARCH_MIPS32R2
+#define TCG_TARGET_HAS_bswap16_i32      1
+#define TCG_TARGET_HAS_bswap32_i32      1
+#else
+#define TCG_TARGET_HAS_bswap16_i32      0
+#define TCG_TARGET_HAS_bswap32_i32      0
+#endif
+
 /* optional instructions automatically implemented */
 #define TCG_TARGET_HAS_neg_i32          0 /* sub  rd, zero, rt   */
 #define TCG_TARGET_HAS_ext8u_i32        0 /* andi rt, rs, 0xff   */
commit 0f46c064eebc9c93b8f925416e0812cc7b4bb503
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Fri Sep 21 18:20:26 2012 +0200

    tcg/mips: optimize brcond arg, 0
    
    MIPS has some conditional branch instructions when comparing with zero.
    Use them.
    
    Reviewed-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c
index c05169f..6aa4527 100644
--- a/tcg/mips/tcg-target.c
+++ b/tcg/mips/tcg-target.c
@@ -278,6 +278,8 @@ static inline int tcg_target_const_match(tcg_target_long val,
 enum {
     OPC_BEQ      = 0x04 << 26,
     OPC_BNE      = 0x05 << 26,
+    OPC_BLEZ     = 0x06 << 26,
+    OPC_BGTZ     = 0x07 << 26,
     OPC_ADDIU    = 0x09 << 26,
     OPC_SLTI     = 0x0A << 26,
     OPC_SLTIU    = 0x0B << 26,
@@ -319,6 +321,10 @@ enum {
     OPC_SLT      = OPC_SPECIAL | 0x2A,
     OPC_SLTU     = OPC_SPECIAL | 0x2B,
 
+    OPC_REGIMM   = 0x01 << 26,
+    OPC_BLTZ     = OPC_REGIMM | (0x00 << 16),
+    OPC_BGEZ     = OPC_REGIMM | (0x01 << 16),
+
     OPC_SPECIAL3 = 0x1f << 26,
     OPC_SEB      = OPC_SPECIAL3 | 0x420,
     OPC_SEH      = OPC_SPECIAL3 | 0x620,
@@ -590,32 +596,48 @@ static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGArg arg1,
         tcg_out_opc_br(s, OPC_BNE, arg1, arg2);
         break;
     case TCG_COND_LT:
-        tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, arg1, arg2);
-        tcg_out_opc_br(s, OPC_BNE, TCG_REG_AT, TCG_REG_ZERO);
+        if (arg2 == 0) {
+            tcg_out_opc_br(s, OPC_BLTZ, 0, arg1);
+        } else {
+            tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, arg1, arg2);
+            tcg_out_opc_br(s, OPC_BNE, TCG_REG_AT, TCG_REG_ZERO);
+        }
         break;
     case TCG_COND_LTU:
         tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_AT, arg1, arg2);
         tcg_out_opc_br(s, OPC_BNE, TCG_REG_AT, TCG_REG_ZERO);
         break;
     case TCG_COND_GE:
-        tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, arg1, arg2);
-        tcg_out_opc_br(s, OPC_BEQ, TCG_REG_AT, TCG_REG_ZERO);
+        if (arg2 == 0) {
+            tcg_out_opc_br(s, OPC_BGEZ, 0, arg1);
+        } else {
+            tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, arg1, arg2);
+            tcg_out_opc_br(s, OPC_BEQ, TCG_REG_AT, TCG_REG_ZERO);
+        }
         break;
     case TCG_COND_GEU:
         tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_AT, arg1, arg2);
         tcg_out_opc_br(s, OPC_BEQ, TCG_REG_AT, TCG_REG_ZERO);
         break;
     case TCG_COND_LE:
-        tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, arg2, arg1);
-        tcg_out_opc_br(s, OPC_BEQ, TCG_REG_AT, TCG_REG_ZERO);
+        if (arg2 == 0) {
+            tcg_out_opc_br(s, OPC_BLEZ, 0, arg1);
+        } else {
+            tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, arg2, arg1);
+            tcg_out_opc_br(s, OPC_BEQ, TCG_REG_AT, TCG_REG_ZERO);
+        }
         break;
     case TCG_COND_LEU:
         tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_AT, arg2, arg1);
         tcg_out_opc_br(s, OPC_BEQ, TCG_REG_AT, TCG_REG_ZERO);
         break;
     case TCG_COND_GT:
-        tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, arg2, arg1);
-        tcg_out_opc_br(s, OPC_BNE, TCG_REG_AT, TCG_REG_ZERO);
+        if (arg2 == 0) {
+            tcg_out_opc_br(s, OPC_BGTZ, 0, arg1);
+        } else {
+            tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, arg2, arg1);
+            tcg_out_opc_br(s, OPC_BNE, TCG_REG_AT, TCG_REG_ZERO);
+        }
         break;
     case TCG_COND_GTU:
         tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_AT, arg2, arg1);
commit 0d0b53a6708d0ae3ea37828ec6a967d55827ed6d
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Fri Sep 21 18:20:26 2012 +0200

    tcg/mips: use stack for TCG temps
    
    Use stack instead of temp_buf array in CPUState for TCG
    temps.
    
    Reviewed-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c
index 0ea6a76..c05169f 100644
--- a/tcg/mips/tcg-target.c
+++ b/tcg/mips/tcg-target.c
@@ -1538,11 +1538,15 @@ static void tcg_target_qemu_prologue(TCGContext *s)
 {
     int i, frame_size;
 
-    /* reserve some stack space */
+    /* reserve some stack space, also for TCG temps. */
     frame_size = ARRAY_SIZE(tcg_target_callee_save_regs) * 4
-                 + TCG_STATIC_CALL_ARGS_SIZE;
+                 + TCG_STATIC_CALL_ARGS_SIZE
+                 + CPU_TEMP_BUF_NLONGS * sizeof(long);
     frame_size = (frame_size + TCG_TARGET_STACK_ALIGN - 1) &
                  ~(TCG_TARGET_STACK_ALIGN - 1);
+    tcg_set_frame(s, TCG_REG_SP, ARRAY_SIZE(tcg_target_callee_save_regs) * 4
+                  + TCG_STATIC_CALL_ARGS_SIZE,
+                  CPU_TEMP_BUF_NLONGS * sizeof(long));
 
     /* TB prologue */
     tcg_out_addi(s, TCG_REG_SP, -frame_size);
@@ -1597,6 +1601,4 @@ static void tcg_target_init(TCGContext *s)
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_GP);   /* global pointer */
 
     tcg_add_target_add_op_defs(mips_op_defs);
-    tcg_set_frame(s, TCG_AREG0, offsetof(CPUArchState, temp_buf),
-                  CPU_TEMP_BUF_NLONGS * sizeof(long));
 }
commit 3314e0089f1bae4b0430f9bf3299c3b16d6ea32b
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Fri Sep 21 18:20:26 2012 +0200

    tcg/mips: don't use global pointer
    
    Don't use the global pointer in TCG, in case helpers try access global
    variables.
    
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c
index 8b38f98..0ea6a76 100644
--- a/tcg/mips/tcg-target.c
+++ b/tcg/mips/tcg-target.c
@@ -1529,7 +1529,6 @@ static int tcg_target_callee_save_regs[] = {
     TCG_REG_S5,
     TCG_REG_S6,
     TCG_REG_S7,
-    TCG_REG_GP,
     TCG_REG_FP,
     TCG_REG_RA,       /* should be last for ABI compliance */
 };
@@ -1595,6 +1594,7 @@ static void tcg_target_init(TCGContext *s)
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_T0);   /* internal use */
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_RA);   /* return address */
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);   /* stack pointer */
+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_GP);   /* global pointer */
 
     tcg_add_target_add_op_defs(mips_op_defs);
     tcg_set_frame(s, TCG_AREG0, offsetof(CPUArchState, temp_buf),
commit 5a0eed379ddff07f1c136c7acedd380b60eecd28
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Fri Sep 21 18:20:26 2012 +0200

    tcg/mips: use TCGArg or TCGReg instead of int
    
    Instead of int, use the correct TCGArg and TCGReg type: TCGReg when
    representing a TCG target register, TCGArg when representing the latter
    or a constant.
    
    Reviewed-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c
index a09c0d6..8b38f98 100644
--- a/tcg/mips/tcg-target.c
+++ b/tcg/mips/tcg-target.c
@@ -68,7 +68,7 @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
 #endif
 
 /* check if we really need so many registers :P */
-static const int tcg_target_reg_alloc_order[] = {
+static const TCGReg tcg_target_reg_alloc_order[] = {
     TCG_REG_S0,
     TCG_REG_S1,
     TCG_REG_S2,
@@ -94,14 +94,14 @@ static const int tcg_target_reg_alloc_order[] = {
     TCG_REG_V1
 };
 
-static const int tcg_target_call_iarg_regs[4] = {
+static const TCGReg tcg_target_call_iarg_regs[4] = {
     TCG_REG_A0,
     TCG_REG_A1,
     TCG_REG_A2,
     TCG_REG_A3
 };
 
-static const int tcg_target_call_oarg_regs[2] = {
+static const TCGReg tcg_target_call_oarg_regs[2] = {
     TCG_REG_V0,
     TCG_REG_V1
 };
@@ -327,7 +327,8 @@ enum {
 /*
  * Type reg
  */
-static inline void tcg_out_opc_reg(TCGContext *s, int opc, int rd, int rs, int rt)
+static inline void tcg_out_opc_reg(TCGContext *s, int opc,
+                                   TCGReg rd, TCGReg rs, TCGReg rt)
 {
     int32_t inst;
 
@@ -341,7 +342,8 @@ static inline void tcg_out_opc_reg(TCGContext *s, int opc, int rd, int rs, int r
 /*
  * Type immediate
  */
-static inline void tcg_out_opc_imm(TCGContext *s, int opc, int rt, int rs, int imm)
+static inline void tcg_out_opc_imm(TCGContext *s, int opc,
+                                   TCGReg rt, TCGReg rs, TCGArg imm)
 {
     int32_t inst;
 
@@ -355,7 +357,8 @@ static inline void tcg_out_opc_imm(TCGContext *s, int opc, int rt, int rs, int i
 /*
  * Type branch
  */
-static inline void tcg_out_opc_br(TCGContext *s, int opc, int rt, int rs)
+static inline void tcg_out_opc_br(TCGContext *s, int opc,
+                                  TCGReg rt, TCGReg rs)
 {
     /* We pay attention here to not modify the branch target by reading
        the existing value and using it again. This ensure that caches and
@@ -368,7 +371,8 @@ static inline void tcg_out_opc_br(TCGContext *s, int opc, int rt, int rs)
 /*
  * Type sa
  */
-static inline void tcg_out_opc_sa(TCGContext *s, int opc, int rd, int rt, int sa)
+static inline void tcg_out_opc_sa(TCGContext *s, int opc,
+                                  TCGReg rd, TCGReg rt, TCGArg sa)
 {
     int32_t inst;
 
@@ -407,7 +411,7 @@ static inline void tcg_out_movi(TCGContext *s, TCGType type,
     }
 }
 
-static inline void tcg_out_bswap16(TCGContext *s, int ret, int arg)
+static inline void tcg_out_bswap16(TCGContext *s, TCGReg ret, TCGReg arg)
 {
     /* ret and arg can't be register at */
     if (ret == TCG_REG_AT || arg == TCG_REG_AT) {
@@ -422,7 +426,7 @@ static inline void tcg_out_bswap16(TCGContext *s, int ret, int arg)
     tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_REG_AT);
 }
 
-static inline void tcg_out_bswap16s(TCGContext *s, int ret, int arg)
+static inline void tcg_out_bswap16s(TCGContext *s, TCGReg ret, TCGReg arg)
 {
     /* ret and arg can't be register at */
     if (ret == TCG_REG_AT || arg == TCG_REG_AT) {
@@ -437,7 +441,7 @@ static inline void tcg_out_bswap16s(TCGContext *s, int ret, int arg)
     tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_REG_AT);
 }
 
-static inline void tcg_out_bswap32(TCGContext *s, int ret, int arg)
+static inline void tcg_out_bswap32(TCGContext *s, TCGReg ret, TCGReg arg)
 {
     /* ret and arg must be different and can't be register at */
     if (ret == arg || ret == TCG_REG_AT || arg == TCG_REG_AT) {
@@ -458,7 +462,7 @@ static inline void tcg_out_bswap32(TCGContext *s, int ret, int arg)
     tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_REG_AT);
 }
 
-static inline void tcg_out_ext8s(TCGContext *s, int ret, int arg)
+static inline void tcg_out_ext8s(TCGContext *s, TCGReg ret, TCGReg arg)
 {
 #ifdef _MIPS_ARCH_MIPS32R2
     tcg_out_opc_reg(s, OPC_SEB, ret, 0, arg);
@@ -468,7 +472,7 @@ static inline void tcg_out_ext8s(TCGContext *s, int ret, int arg)
 #endif
 }
 
-static inline void tcg_out_ext16s(TCGContext *s, int ret, int arg)
+static inline void tcg_out_ext16s(TCGContext *s, TCGReg ret, TCGReg arg)
 {
 #ifdef _MIPS_ARCH_MIPS32R2
     tcg_out_opc_reg(s, OPC_SEH, ret, 0, arg);
@@ -478,8 +482,8 @@ static inline void tcg_out_ext16s(TCGContext *s, int ret, int arg)
 #endif
 }
 
-static inline void tcg_out_ldst(TCGContext *s, int opc, int arg,
-                              int arg1, tcg_target_long arg2)
+static inline void tcg_out_ldst(TCGContext *s, int opc, TCGArg arg,
+                                TCGReg arg1, TCGArg arg2)
 {
     if (arg2 == (int16_t) arg2) {
         tcg_out_opc_imm(s, opc, arg, arg1, arg2);
@@ -502,7 +506,7 @@ static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
     tcg_out_ldst(s, OPC_SW, arg, arg1, arg2);
 }
 
-static inline void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)
+static inline void tcg_out_addi(TCGContext *s, TCGReg reg, TCGArg val)
 {
     if (val == (int16_t)val) {
         tcg_out_opc_imm(s, OPC_ADDIU, reg, reg, val);
@@ -543,7 +547,7 @@ DEFINE_TCG_OUT_CALL_IARG(tcg_out_call_iarg_reg16, TCGReg arg)
 #undef DEFINE_TCG_OUT_CALL_IARG_GET_ARG
 #define DEFINE_TCG_OUT_CALL_IARG_GET_ARG(A) \
     tcg_out_movi(s, TCG_TYPE_I32, A, arg);
-DEFINE_TCG_OUT_CALL_IARG(tcg_out_call_iarg_imm32, uint32_t arg)
+DEFINE_TCG_OUT_CALL_IARG(tcg_out_call_iarg_imm32, TCGArg arg)
 #undef DEFINE_TCG_OUT_CALL_IARG_GET_ARG
 
 /* We don't use the macro for this one to avoid an unnecessary reg-reg
@@ -573,8 +577,8 @@ static inline void tcg_out_call_iarg_reg64(TCGContext *s, int *arg_num,
 #endif
 }
 
-static void tcg_out_brcond(TCGContext *s, TCGCond cond, int arg1,
-                           int arg2, int label_index)
+static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGArg arg1,
+                           TCGArg arg2, int label_index)
 {
     TCGLabel *l = &s->labels[label_index];
 
@@ -631,8 +635,9 @@ static void tcg_out_brcond(TCGContext *s, TCGCond cond, int arg1,
 
 /* XXX: we implement it at the target level to avoid having to
    handle cross basic blocks temporaries */
-static void tcg_out_brcond2(TCGContext *s, TCGCond cond, int arg1,
-                            int arg2, int arg3, int arg4, int label_index)
+static void tcg_out_brcond2(TCGContext *s, TCGCond cond, TCGArg arg1,
+                            TCGArg arg2, TCGArg arg3, TCGArg arg4,
+                            int label_index)
 {
     void *label_ptr;
 
@@ -694,8 +699,8 @@ static void tcg_out_brcond2(TCGContext *s, TCGCond cond, int arg1,
     reloc_pc16(label_ptr, (tcg_target_long) s->code_ptr);
 }
 
-static void tcg_out_setcond(TCGContext *s, TCGCond cond, int ret,
-                            int arg1, int arg2)
+static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret,
+                            TCGArg arg1, TCGArg arg2)
 {
     switch (cond) {
     case TCG_COND_EQ:
@@ -754,8 +759,8 @@ static void tcg_out_setcond(TCGContext *s, TCGCond cond, int ret,
 
 /* XXX: we implement it at the target level to avoid having to
    handle cross basic blocks temporaries */
-static void tcg_out_setcond2(TCGContext *s, TCGCond cond, int ret,
-                             int arg1, int arg2, int arg3, int arg4)
+static void tcg_out_setcond2(TCGContext *s, TCGCond cond, TCGReg ret,
+                             TCGArg arg1, TCGArg arg2, TCGArg arg3, TCGArg arg4)
 {
     switch (cond) {
     case TCG_COND_EQ:
@@ -842,7 +847,7 @@ static const void * const qemu_st_helpers[4] = {
 static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
                             int opc)
 {
-    int addr_regl, data_regl, data_regh, data_reg1, data_reg2;
+    TCGReg addr_regl, data_regl, data_regh, data_reg1, data_reg2;
 #if defined(CONFIG_SOFTMMU)
     void *label1_ptr, *label2_ptr;
     int arg_num;
@@ -850,7 +855,8 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
     int addr_meml;
 # if TARGET_LONG_BITS == 64
     uint8_t *label3_ptr;
-    int addr_regh, addr_memh;
+    TCGReg addr_regh;
+    int addr_memh;
 # endif
 #endif
     data_regl = *args++;
@@ -1026,7 +1032,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
 static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
                             int opc)
 {
-    int addr_regl, data_regl, data_regh, data_reg1, data_reg2;
+    TCGReg addr_regl, data_regl, data_regh, data_reg1, data_reg2;
 #if defined(CONFIG_SOFTMMU)
     uint8_t *label1_ptr, *label2_ptr;
     int arg_num;
@@ -1036,7 +1042,8 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
 #if TARGET_LONG_BITS == 64
 # if defined(CONFIG_SOFTMMU)
     uint8_t *label3_ptr;
-    int addr_regh, addr_memh;
+    TCGReg addr_regh;
+    int addr_memh;
 # endif
 #endif
     data_regl = *args++;
commit 0834c9eac31d6a6780594be17c7b3a3d6403ac16
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Fri Sep 21 18:20:25 2012 +0200

    tcg/mips: kill warnings in user mode
    
    Recent versions of GCC emit warnings when compiling user mode targets.
    Kill them by reordering a bit the #ifdef.
    
    Reviewed-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c
index 9293745..a09c0d6 100644
--- a/tcg/mips/tcg-target.c
+++ b/tcg/mips/tcg-target.c
@@ -842,18 +842,16 @@ static const void * const qemu_st_helpers[4] = {
 static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
                             int opc)
 {
-    int addr_regl, addr_meml;
-    int data_regl, data_regh, data_reg1, data_reg2;
-    int mem_index, s_bits;
+    int addr_regl, data_regl, data_regh, data_reg1, data_reg2;
 #if defined(CONFIG_SOFTMMU)
     void *label1_ptr, *label2_ptr;
     int arg_num;
-#endif
-#if TARGET_LONG_BITS == 64
-# if defined(CONFIG_SOFTMMU)
+    int mem_index, s_bits;
+    int addr_meml;
+# if TARGET_LONG_BITS == 64
     uint8_t *label3_ptr;
-# endif
     int addr_regh, addr_memh;
+# endif
 #endif
     data_regl = *args++;
     if (opc == 3)
@@ -861,11 +859,22 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
     else
         data_regh = 0;
     addr_regl = *args++;
-#if TARGET_LONG_BITS == 64
+#if defined(CONFIG_SOFTMMU)
+# if TARGET_LONG_BITS == 64
     addr_regh = *args++;
-#endif
+#  if defined(TCG_TARGET_WORDS_BIGENDIAN)
+    addr_memh = 0;
+    addr_meml = 4;
+#  else
+    addr_memh = 4;
+    addr_meml = 0;
+#  endif
+# else
+    addr_meml = 0;
+# endif
     mem_index = *args;
     s_bits = opc & 3;
+#endif
 
     if (opc == 3) {
 #if defined(TCG_TARGET_WORDS_BIGENDIAN)
@@ -879,18 +888,6 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
         data_reg1 = data_regl;
         data_reg2 = 0;
     }
-#if TARGET_LONG_BITS == 64
-# if defined(TCG_TARGET_WORDS_BIGENDIAN)
-    addr_memh = 0;
-    addr_meml = 4;
-# else
-    addr_memh = 4;
-    addr_meml = 0;
-# endif
-#else
-    addr_meml = 0;
-#endif
-
 #if defined(CONFIG_SOFTMMU)
     tcg_out_opc_sa(s, OPC_SRL, TCG_REG_A0, addr_regl, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
     tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_A0, TCG_REG_A0, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS);
@@ -1029,50 +1026,55 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
 static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
                             int opc)
 {
-    int addr_regl, addr_meml;
-    int data_regl, data_regh, data_reg1, data_reg2;
-    int mem_index, s_bits;
+    int addr_regl, data_regl, data_regh, data_reg1, data_reg2;
 #if defined(CONFIG_SOFTMMU)
     uint8_t *label1_ptr, *label2_ptr;
     int arg_num;
+    int mem_index, s_bits;
+    int addr_meml;
 #endif
 #if TARGET_LONG_BITS == 64
 # if defined(CONFIG_SOFTMMU)
     uint8_t *label3_ptr;
-# endif
     int addr_regh, addr_memh;
+# endif
 #endif
-
     data_regl = *args++;
     if (opc == 3) {
         data_regh = *args++;
-#if defined(TCG_TARGET_WORDS_BIGENDIAN)
-        data_reg1 = data_regh;
-        data_reg2 = data_regl;
-#else
-        data_reg1 = data_regl;
-        data_reg2 = data_regh;
-#endif
     } else {
-        data_reg1 = data_regl;
-        data_reg2 = 0;
         data_regh = 0;
     }
     addr_regl = *args++;
-#if TARGET_LONG_BITS == 64
+#if defined(CONFIG_SOFTMMU)
+# if TARGET_LONG_BITS == 64
     addr_regh = *args++;
-# if defined(TCG_TARGET_WORDS_BIGENDIAN)
+#  if defined(TCG_TARGET_WORDS_BIGENDIAN)
     addr_memh = 0;
     addr_meml = 4;
-# else
+#  else
     addr_memh = 4;
     addr_meml = 0;
-# endif
-#else
+#  endif
+# else
     addr_meml = 0;
-#endif
+# endif
     mem_index = *args;
     s_bits = opc;
+#endif
+
+    if (opc == 3) {
+#if defined(TCG_TARGET_WORDS_BIGENDIAN)
+        data_reg1 = data_regh;
+        data_reg2 = data_regl;
+#else
+        data_reg1 = data_regl;
+        data_reg2 = data_regh;
+#endif
+    } else {
+        data_reg1 = data_regl;
+        data_reg2 = 0;
+    }
 
 #if defined(CONFIG_SOFTMMU)
     tcg_out_opc_sa(s, OPC_SRL, TCG_REG_A0, addr_regl, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
commit 2ceb3a9e0f2110c7d96134f2db7593609ed50bc2
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Fri Sep 21 18:20:25 2012 +0200

    tcg-mips: fix wrong usage of 'Z' constraint
    
    The 'Z' constraint has been introduced to map the zero register. However
    when the op also accept a constant, there is no point to accept the zero
    register in addition.
    
    Reviewed-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c
index 74db83d..9293745 100644
--- a/tcg/mips/tcg-target.c
+++ b/tcg/mips/tcg-target.c
@@ -1453,24 +1453,24 @@ static const TCGTargetOpDef mips_op_defs[] = {
     { INDEX_op_st16_i32, { "rZ", "r" } },
     { INDEX_op_st_i32, { "rZ", "r" } },
 
-    { INDEX_op_add_i32, { "r", "rZ", "rJZ" } },
+    { INDEX_op_add_i32, { "r", "rZ", "rJ" } },
     { INDEX_op_mul_i32, { "r", "rZ", "rZ" } },
     { INDEX_op_mulu2_i32, { "r", "r", "rZ", "rZ" } },
     { INDEX_op_div_i32, { "r", "rZ", "rZ" } },
     { INDEX_op_divu_i32, { "r", "rZ", "rZ" } },
     { INDEX_op_rem_i32, { "r", "rZ", "rZ" } },
     { INDEX_op_remu_i32, { "r", "rZ", "rZ" } },
-    { INDEX_op_sub_i32, { "r", "rZ", "rJZ" } },
+    { INDEX_op_sub_i32, { "r", "rZ", "rJ" } },
 
-    { INDEX_op_and_i32, { "r", "rZ", "rIZ" } },
+    { INDEX_op_and_i32, { "r", "rZ", "rI" } },
     { INDEX_op_nor_i32, { "r", "rZ", "rZ" } },
     { INDEX_op_not_i32, { "r", "rZ" } },
     { INDEX_op_or_i32, { "r", "rZ", "rIZ" } },
     { INDEX_op_xor_i32, { "r", "rZ", "rIZ" } },
 
-    { INDEX_op_shl_i32, { "r", "rZ", "riZ" } },
-    { INDEX_op_shr_i32, { "r", "rZ", "riZ" } },
-    { INDEX_op_sar_i32, { "r", "rZ", "riZ" } },
+    { INDEX_op_shl_i32, { "r", "rZ", "ri" } },
+    { INDEX_op_shr_i32, { "r", "rZ", "ri" } },
+    { INDEX_op_sar_i32, { "r", "rZ", "ri" } },
 
     { INDEX_op_ext8s_i32, { "r", "rZ" } },
     { INDEX_op_ext16s_i32, { "r", "rZ" } },
@@ -1479,8 +1479,8 @@ static const TCGTargetOpDef mips_op_defs[] = {
     { INDEX_op_setcond_i32, { "r", "rZ", "rZ" } },
     { INDEX_op_setcond2_i32, { "r", "rZ", "rZ", "rZ", "rZ" } },
 
-    { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rJZ", "rJZ" } },
-    { INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rJZ", "rJZ" } },
+    { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rJ", "rJ" } },
+    { INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rJ", "rJ" } },
     { INDEX_op_brcond2_i32, { "rZ", "rZ", "rZ", "rZ" } },
 
 #if TARGET_LONG_BITS == 32
commit f4bf0b912e780978a37979f7a9fad40f99aa2241
Author: Richard Henderson <rth at twiddle.net>
Date:   Fri Sep 21 11:00:23 2012 -0700

    tcg-sparc: Preserve branch destinations during retranslation
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>

diff --git a/tcg/sparc/tcg-target.c b/tcg/sparc/tcg-target.c
index 1db0c9d..876da4f 100644
--- a/tcg/sparc/tcg-target.c
+++ b/tcg/sparc/tcg-target.c
@@ -488,30 +488,33 @@ static inline void tcg_out_nop(TCGContext *s)
 static void tcg_out_branch_i32(TCGContext *s, int opc, int label_index)
 {
     TCGLabel *l = &s->labels[label_index];
+    uint32_t off22;
 
     if (l->has_value) {
-        tcg_out32(s, (INSN_OP(0) | INSN_COND(opc, 0) | INSN_OP2(0x2)
-                      | INSN_OFF22(l->u.value - (unsigned long)s->code_ptr)));
+        off22 = INSN_OFF22(l->u.value - (unsigned long)s->code_ptr);
     } else {
+        /* Make sure to preserve destinations during retranslation.  */
+        off22 = *(uint32_t *)s->code_ptr & INSN_OFF22(-1);
         tcg_out_reloc(s, s->code_ptr, R_SPARC_WDISP22, label_index, 0);
-        tcg_out32(s, (INSN_OP(0) | INSN_COND(opc, 0) | INSN_OP2(0x2) | 0));
     }
+    tcg_out32(s, INSN_OP(0) | INSN_COND(opc, 0) | INSN_OP2(0x2) | off22);
 }
 
 #if TCG_TARGET_REG_BITS == 64
 static void tcg_out_branch_i64(TCGContext *s, int opc, int label_index)
 {
     TCGLabel *l = &s->labels[label_index];
+    uint32_t off19;
 
     if (l->has_value) {
-        tcg_out32(s, (INSN_OP(0) | INSN_COND(opc, 0) | INSN_OP2(0x1) |
-                      (0x5 << 19) |
-                      INSN_OFF19(l->u.value - (unsigned long)s->code_ptr)));
+        off19 = INSN_OFF19(l->u.value - (unsigned long)s->code_ptr);
     } else {
+        /* Make sure to preserve destinations during retranslation.  */
+        off19 = *(uint32_t *)s->code_ptr & INSN_OFF19(-1);
         tcg_out_reloc(s, s->code_ptr, R_SPARC_WDISP19, label_index, 0);
-        tcg_out32(s, (INSN_OP(0) | INSN_COND(opc, 0) | INSN_OP2(0x1) |
-                      (0x5 << 19) | 0));
     }
+    tcg_out32(s, (INSN_OP(0) | INSN_COND(opc, 0) | INSN_OP2(0x1) |
+                  (0x5 << 19) | off19));
 }
 #endif
 
commit 5bbd2cae8e087b4329e17c3f44439096e4c3ec6a
Author: Richard Henderson <rth at twiddle.net>
Date:   Fri Sep 21 10:48:51 2012 -0700

    tcg-sparc: Fix and enable direct TB chaining.
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>

diff --git a/exec-all.h b/exec-all.h
index dba9609..6516da0 100644
--- a/exec-all.h
+++ b/exec-all.h
@@ -132,9 +132,10 @@ static inline void tlb_flush(CPUArchState *env, int flush_global)
 #define CODE_GEN_AVG_BLOCK_SIZE 64
 #endif
 
-#if defined(_ARCH_PPC) || defined(__x86_64__) || defined(__arm__) || defined(__i386__)
-#define USE_DIRECT_JUMP
-#elif defined(CONFIG_TCG_INTERPRETER)
+#if defined(__arm__) || defined(_ARCH_PPC) \
+    || defined(__x86_64__) || defined(__i386__) \
+    || defined(__sparc__) \
+    || defined(CONFIG_TCG_INTERPRETER)
 #define USE_DIRECT_JUMP
 #endif
 
@@ -244,6 +245,8 @@ static inline void tb_set_jmp_target1(uintptr_t jmp_addr, uintptr_t addr)
     __asm __volatile__ ("swi 0x9f0002" : : "r" (_beg), "r" (_end), "r" (_flg));
 #endif
 }
+#elif defined(__sparc__)
+void tb_set_jmp_target1(uintptr_t jmp_addr, uintptr_t addr);
 #else
 #error tb_set_jmp_target1 is missing
 #endif
diff --git a/tcg/sparc/tcg-target.c b/tcg/sparc/tcg-target.c
index 03c385a..1db0c9d 100644
--- a/tcg/sparc/tcg-target.c
+++ b/tcg/sparc/tcg-target.c
@@ -1072,10 +1072,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
     case INDEX_op_goto_tb:
         if (s->tb_jmp_offset) {
             /* direct jump method */
-            tcg_out_sethi(s, TCG_REG_T1, args[0] & 0xffffe000);
-            tcg_out32(s, JMPL | INSN_RD(TCG_REG_G0) | INSN_RS1(TCG_REG_T1) |
-                      INSN_IMM13((args[0] & 0x1fff)));
+            uint32_t old_insn = *(uint32_t *)s->code_ptr;
             s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf;
+            /* Make sure to preserve links during retranslation.  */
+            tcg_out32(s, CALL | (old_insn & ~INSN_OP(-1)));
         } else {
             /* indirect jump method */
             tcg_out_ld_ptr(s, TCG_REG_T1,
@@ -1595,3 +1595,18 @@ void tcg_register_jit(void *buf, size_t buf_size)
 
     tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
 }
+
+void tb_set_jmp_target1(uintptr_t jmp_addr, uintptr_t addr)
+{
+    uint32_t *ptr = (uint32_t *)jmp_addr;
+    tcg_target_long disp = (tcg_target_long)(addr - jmp_addr) >> 2;
+
+    /* We can reach the entire address space for 32-bit.  For 64-bit
+       the code_gen_buffer can't be larger than 2GB.  */
+    if (TCG_TARGET_REG_BITS == 64 && !check_fit_tl(disp, 30)) {
+        tcg_abort();
+    }
+
+    *ptr = CALL | (disp & 0x3fffffff);
+    flush_icache_range(jmp_addr, jmp_addr + 4);
+}
commit 26adfb759c4c6ad42958c365812945316f3468ae
Author: Richard Henderson <rth at twiddle.net>
Date:   Sun Mar 25 22:43:17 2012 +0200

    tcg-sparc: Add %g/%o registers to alloc_order
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>

diff --git a/tcg/sparc/tcg-target.c b/tcg/sparc/tcg-target.c
index d401f8e..03c385a 100644
--- a/tcg/sparc/tcg-target.c
+++ b/tcg/sparc/tcg-target.c
@@ -78,12 +78,25 @@ static const int tcg_target_reg_alloc_order[] = {
     TCG_REG_L5,
     TCG_REG_L6,
     TCG_REG_L7,
+
     TCG_REG_I0,
     TCG_REG_I1,
     TCG_REG_I2,
     TCG_REG_I3,
     TCG_REG_I4,
     TCG_REG_I5,
+
+    TCG_REG_G2,
+    TCG_REG_G3,
+    TCG_REG_G4,
+    TCG_REG_G5,
+
+    TCG_REG_O0,
+    TCG_REG_O1,
+    TCG_REG_O2,
+    TCG_REG_O3,
+    TCG_REG_O4,
+    TCG_REG_O5,
 };
 
 static const int tcg_target_call_iarg_regs[6] = {
commit 375816f84b820e087fab8ff584bd5b8f27811cdb
Author: Richard Henderson <rth at twiddle.net>
Date:   Sun Mar 25 22:04:59 2012 +0200

    tcg-sparc: Use defines for temporaries.
    
    And change from %i4/%i5 to %g1/%o7 to remove a v8plus fixme.
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>

diff --git a/tcg/sparc/tcg-target.c b/tcg/sparc/tcg-target.c
index be5c170..d401f8e 100644
--- a/tcg/sparc/tcg-target.c
+++ b/tcg/sparc/tcg-target.c
@@ -59,8 +59,12 @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
 };
 #endif
 
+/* Define some temporary registers.  T2 is used for constant generation.  */
+#define TCG_REG_T1  TCG_REG_G1
+#define TCG_REG_T2  TCG_REG_O7
+
 #ifdef CONFIG_USE_GUEST_BASE
-# define TCG_GUEST_BASE_REG TCG_REG_I3
+# define TCG_GUEST_BASE_REG TCG_REG_I5
 #else
 # define TCG_GUEST_BASE_REG TCG_REG_G0
 #endif
@@ -79,6 +83,7 @@ static const int tcg_target_reg_alloc_order[] = {
     TCG_REG_I2,
     TCG_REG_I3,
     TCG_REG_I4,
+    TCG_REG_I5,
 };
 
 static const int tcg_target_call_iarg_regs[6] = {
@@ -366,10 +371,10 @@ static inline void tcg_out_movi(TCGContext *s, TCGType type,
         tcg_out_sethi(s, ret, ~arg);
         tcg_out_arithi(s, ret, ret, (arg & 0x3ff) | -0x400, ARITH_XOR);
     } else {
-        tcg_out_movi_imm32(s, TCG_REG_I4, arg >> (TCG_TARGET_REG_BITS / 2));
-        tcg_out_arithi(s, TCG_REG_I4, TCG_REG_I4, 32, SHIFT_SLLX);
-        tcg_out_movi_imm32(s, ret, arg);
-        tcg_out_arith(s, ret, ret, TCG_REG_I4, ARITH_OR);
+        tcg_out_movi_imm32(s, ret, arg >> (TCG_TARGET_REG_BITS / 2));
+        tcg_out_arithi(s, ret, ret, 32, SHIFT_SLLX);
+        tcg_out_movi_imm32(s, TCG_REG_T2, arg);
+        tcg_out_arith(s, ret, ret, TCG_REG_T2, ARITH_OR);
     }
 }
 
@@ -386,8 +391,8 @@ static inline void tcg_out_ldst(TCGContext *s, int ret, int addr,
         tcg_out32(s, op | INSN_RD(ret) | INSN_RS1(addr) |
                   INSN_IMM13(offset));
     } else {
-        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_I5, offset);
-        tcg_out_ldst_rr(s, ret, addr, TCG_REG_I5, op);
+        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T1, offset);
+        tcg_out_ldst_rr(s, ret, addr, TCG_REG_T1, op);
     }
 }
 
@@ -428,8 +433,8 @@ static inline void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)
         if (check_fit_tl(val, 13))
             tcg_out_arithi(s, reg, reg, val, ARITH_ADD);
         else {
-            tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_I5, val);
-            tcg_out_arith(s, reg, reg, TCG_REG_I5, ARITH_ADD);
+            tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T1, val);
+            tcg_out_arith(s, reg, reg, TCG_REG_T1, ARITH_ADD);
         }
     }
 }
@@ -441,8 +446,8 @@ static inline void tcg_out_andi(TCGContext *s, int rd, int rs,
         if (check_fit_tl(val, 13))
             tcg_out_arithi(s, rd, rs, val, ARITH_AND);
         else {
-            tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_I5, val);
-            tcg_out_arith(s, rd, rs, TCG_REG_I5, ARITH_AND);
+            tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_T1, val);
+            tcg_out_arith(s, rd, rs, TCG_REG_T1, ARITH_AND);
         }
     }
 }
@@ -454,8 +459,8 @@ static void tcg_out_div32(TCGContext *s, int rd, int rs1,
     if (uns) {
         tcg_out_sety(s, TCG_REG_G0);
     } else {
-        tcg_out_arithi(s, TCG_REG_I5, rs1, 31, SHIFT_SRA);
-        tcg_out_sety(s, TCG_REG_I5);
+        tcg_out_arithi(s, TCG_REG_T1, rs1, 31, SHIFT_SRA);
+        tcg_out_sety(s, TCG_REG_T1);
     }
 
     tcg_out_arithc(s, rd, rs1, val2, val2const,
@@ -601,8 +606,8 @@ static void tcg_out_setcond_i32(TCGContext *s, TCGCond cond, TCGArg ret,
     case TCG_COND_GTU:
     case TCG_COND_GEU:
         if (c2const && c2 != 0) {
-            tcg_out_movi_imm13(s, TCG_REG_I5, c2);
-            c2 = TCG_REG_I5;
+            tcg_out_movi_imm13(s, TCG_REG_T1, c2);
+            c2 = TCG_REG_T1;
         }
         t = c1, c1 = c2, c2 = t, c2const = 0;
         cond = tcg_swap_cond(cond);
@@ -649,15 +654,15 @@ static void tcg_out_setcond2_i32(TCGContext *s, TCGCond cond, TCGArg ret,
 
     switch (cond) {
     case TCG_COND_EQ:
-        tcg_out_setcond_i32(s, TCG_COND_EQ, TCG_REG_I5, al, bl, blconst);
+        tcg_out_setcond_i32(s, TCG_COND_EQ, TCG_REG_T1, al, bl, blconst);
         tcg_out_setcond_i32(s, TCG_COND_EQ, ret, ah, bh, bhconst);
-        tcg_out_arith(s, ret, ret, TCG_REG_I5, ARITH_AND);
+        tcg_out_arith(s, ret, ret, TCG_REG_T1, ARITH_AND);
         break;
 
     case TCG_COND_NE:
-        tcg_out_setcond_i32(s, TCG_COND_NE, TCG_REG_I5, al, al, blconst);
+        tcg_out_setcond_i32(s, TCG_COND_NE, TCG_REG_T1, al, al, blconst);
         tcg_out_setcond_i32(s, TCG_COND_NE, ret, ah, bh, bhconst);
-        tcg_out_arith(s, ret, ret, TCG_REG_I5, ARITH_OR);
+        tcg_out_arith(s, ret, ret, TCG_REG_T1, ARITH_OR);
         break;
 
     default:
@@ -935,8 +940,8 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int sizeop)
 #else
     addr_reg = args[addrlo_idx];
     if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 32) {
-        tcg_out_arithi(s, TCG_REG_I5, addr_reg, 0, SHIFT_SRL);
-        addr_reg = TCG_REG_I5;
+        tcg_out_arithi(s, TCG_REG_T1, addr_reg, 0, SHIFT_SRL);
+        addr_reg = TCG_REG_T1;
     }
     if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) {
         int reg64 = (datalo < 16 ? datalo : TCG_REG_O0);
@@ -979,12 +984,11 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int sizeop)
                                 offsetof(CPUTLBEntry, addr_write));
 
     if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) {
-        /* Reconstruct the full 64-bit value in %g1, using %o2 as temp.  */
-        /* ??? Redefine the temps from %i4/%i5 so that we have a o/g temp. */
-        tcg_out_arithi(s, TCG_REG_G1, datalo, 0, SHIFT_SRL);
+        /* Reconstruct the full 64-bit value.  */
+        tcg_out_arithi(s, TCG_REG_T1, datalo, 0, SHIFT_SRL);
         tcg_out_arithi(s, TCG_REG_O2, datahi, 32, SHIFT_SLLX);
-        tcg_out_arith(s, TCG_REG_G1, TCG_REG_G1, TCG_REG_O2, ARITH_OR);
-        datalo = TCG_REG_G1;
+        tcg_out_arith(s, TCG_REG_O2, TCG_REG_T1, TCG_REG_O2, ARITH_OR);
+        datalo = TCG_REG_O2;
     }
 
     /* The fast path is exactly one insn.  Thus we can perform the entire
@@ -1024,16 +1028,14 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int sizeop)
 #else
     addr_reg = args[addrlo_idx];
     if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 32) {
-        tcg_out_arithi(s, TCG_REG_I5, addr_reg, 0, SHIFT_SRL);
-        addr_reg = TCG_REG_I5;
+        tcg_out_arithi(s, TCG_REG_T1, addr_reg, 0, SHIFT_SRL);
+        addr_reg = TCG_REG_T1;
     }
     if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) {
-        /* Reconstruct the full 64-bit value in %g1, using %o2 as temp.  */
-        /* ??? Redefine the temps from %i4/%i5 so that we have a o/g temp. */
-        tcg_out_arithi(s, TCG_REG_G1, datalo, 0, SHIFT_SRL);
+        tcg_out_arithi(s, TCG_REG_T1, datalo, 0, SHIFT_SRL);
         tcg_out_arithi(s, TCG_REG_O2, datahi, 32, SHIFT_SLLX);
-        tcg_out_arith(s, TCG_REG_G1, TCG_REG_G1, TCG_REG_O2, ARITH_OR);
-        datalo = TCG_REG_G1;
+        tcg_out_arith(s, TCG_REG_O2, TCG_REG_T1, TCG_REG_O2, ARITH_OR);
+        datalo = TCG_REG_O2;
     }
     tcg_out_ldst_rr(s, datalo, addr_reg,
                     (GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_G0),
@@ -1057,28 +1059,29 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
     case INDEX_op_goto_tb:
         if (s->tb_jmp_offset) {
             /* direct jump method */
-            tcg_out_sethi(s, TCG_REG_I5, args[0] & 0xffffe000);
-            tcg_out32(s, JMPL | INSN_RD(TCG_REG_G0) | INSN_RS1(TCG_REG_I5) |
+            tcg_out_sethi(s, TCG_REG_T1, args[0] & 0xffffe000);
+            tcg_out32(s, JMPL | INSN_RD(TCG_REG_G0) | INSN_RS1(TCG_REG_T1) |
                       INSN_IMM13((args[0] & 0x1fff)));
             s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf;
         } else {
             /* indirect jump method */
-            tcg_out_ld_ptr(s, TCG_REG_I5, (tcg_target_long)(s->tb_next + args[0]));
-            tcg_out32(s, JMPL | INSN_RD(TCG_REG_G0) | INSN_RS1(TCG_REG_I5) |
+            tcg_out_ld_ptr(s, TCG_REG_T1,
+                           (tcg_target_long)(s->tb_next + args[0]));
+            tcg_out32(s, JMPL | INSN_RD(TCG_REG_G0) | INSN_RS1(TCG_REG_T1) |
                       INSN_RS2(TCG_REG_G0));
         }
         tcg_out_nop(s);
         s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf;
         break;
     case INDEX_op_call:
-        if (const_args[0])
+        if (const_args[0]) {
             tcg_out32(s, CALL | ((((tcg_target_ulong)args[0]
                                    - (tcg_target_ulong)s->code_ptr) >> 2)
                                  & 0x3fffffff));
-        else {
-            tcg_out_ld_ptr(s, TCG_REG_I5,
+        } else {
+            tcg_out_ld_ptr(s, TCG_REG_T1,
                            (tcg_target_long)(s->tb_next + args[0]));
-            tcg_out32(s, JMPL | INSN_RD(TCG_REG_O7) | INSN_RS1(TCG_REG_I5) |
+            tcg_out32(s, JMPL | INSN_RD(TCG_REG_O7) | INSN_RS1(TCG_REG_T1) |
                       INSN_RS2(TCG_REG_G0));
         }
         /* delay slot */
@@ -1184,11 +1187,11 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
 
     case INDEX_op_rem_i32:
     case INDEX_op_remu_i32:
-        tcg_out_div32(s, TCG_REG_I5, args[1], args[2], const_args[2],
+        tcg_out_div32(s, TCG_REG_T1, args[1], args[2], const_args[2],
                       opc == INDEX_op_remu_i32);
-        tcg_out_arithc(s, TCG_REG_I5, TCG_REG_I5, args[2], const_args[2],
+        tcg_out_arithc(s, TCG_REG_T1, TCG_REG_T1, args[2], const_args[2],
                        ARITH_UMUL);
-        tcg_out_arith(s, args[0], args[1], TCG_REG_I5, ARITH_SUB);
+        tcg_out_arith(s, args[0], args[1], TCG_REG_T1, ARITH_SUB);
         break;
 
     case INDEX_op_brcond_i32:
@@ -1305,11 +1308,11 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
         goto gen_arith;
     case INDEX_op_rem_i64:
     case INDEX_op_remu_i64:
-        tcg_out_arithc(s, TCG_REG_I5, args[1], args[2], const_args[2],
+        tcg_out_arithc(s, TCG_REG_T1, args[1], args[2], const_args[2],
                        opc == INDEX_op_rem_i64 ? ARITH_SDIVX : ARITH_UDIVX);
-        tcg_out_arithc(s, TCG_REG_I5, TCG_REG_I5, args[2], const_args[2],
+        tcg_out_arithc(s, TCG_REG_T1, TCG_REG_T1, args[2], const_args[2],
                        ARITH_MULX);
-        tcg_out_arith(s, args[0], args[1], TCG_REG_I5, ARITH_SUB);
+        tcg_out_arith(s, args[0], args[1], TCG_REG_T1, ARITH_SUB);
         break;
     case INDEX_op_ext32s_i64:
         if (const_args[1]) {
@@ -1507,15 +1510,15 @@ static void tcg_target_init(TCGContext *s)
                      (1 << TCG_REG_O7));
 
     tcg_regset_clear(s->reserved_regs);
-    tcg_regset_set_reg(s->reserved_regs, TCG_REG_G0);
-#if TCG_TARGET_REG_BITS == 64
-    tcg_regset_set_reg(s->reserved_regs, TCG_REG_I4); // for internal use
-#endif
-    tcg_regset_set_reg(s->reserved_regs, TCG_REG_I5); // for internal use
-    tcg_regset_set_reg(s->reserved_regs, TCG_REG_I6);
-    tcg_regset_set_reg(s->reserved_regs, TCG_REG_I7);
-    tcg_regset_set_reg(s->reserved_regs, TCG_REG_O6);
-    tcg_regset_set_reg(s->reserved_regs, TCG_REG_O7);
+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_G0); /* zero */
+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_G6); /* reserved for os */
+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_G7); /* thread pointer */
+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_I6); /* frame pointer */
+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_I7); /* return address */
+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_O6); /* stack pointer */
+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_T1); /* for internal use */
+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_T2); /* for internal use */
+
     tcg_add_target_add_op_defs(sparc_op_defs);
 }
 
commit 1fd959466574c3d46f4898f2e27cd3b1060338e4
Author: Richard Henderson <rth at twiddle.net>
Date:   Sun Mar 25 21:36:28 2012 +0200

    tcg-sparc: Mask shift immediates to avoid illegal insns.
    
    The xtensa-test image generates a sra_i32 with count 0x40.
    Whether this is accident of tcg constant propagation or
    originating directly from the instruction stream is immaterial.
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>

diff --git a/tcg/sparc/tcg-target.c b/tcg/sparc/tcg-target.c
index e625aa3..be5c170 100644
--- a/tcg/sparc/tcg-target.c
+++ b/tcg/sparc/tcg-target.c
@@ -1154,13 +1154,16 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
         goto gen_arith;
     case INDEX_op_shl_i32:
         c = SHIFT_SLL;
-        goto gen_arith;
+    do_shift32:
+        /* Limit immediate shift count lest we create an illegal insn.  */
+        tcg_out_arithc(s, args[0], args[1], args[2] & 31, const_args[2], c);
+        break;
     case INDEX_op_shr_i32:
         c = SHIFT_SRL;
-        goto gen_arith;
+        goto do_shift32;
     case INDEX_op_sar_i32:
         c = SHIFT_SRA;
-        goto gen_arith;
+        goto do_shift32;
     case INDEX_op_mul_i32:
         c = ARITH_UMUL;
         goto gen_arith;
@@ -1281,13 +1284,16 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
         break;
     case INDEX_op_shl_i64:
         c = SHIFT_SLLX;
-        goto gen_arith;
+    do_shift64:
+        /* Limit immediate shift count lest we create an illegal insn.  */
+        tcg_out_arithc(s, args[0], args[1], args[2] & 63, const_args[2], c);
+        break;
     case INDEX_op_shr_i64:
         c = SHIFT_SRLX;
-        goto gen_arith;
+        goto do_shift64;
     case INDEX_op_sar_i64:
         c = SHIFT_SRAX;
-        goto gen_arith;
+        goto do_shift64;
     case INDEX_op_mul_i64:
         c = ARITH_MULX;
         goto gen_arith;
commit 4c3204cb12a966d0d1255eeeaabf30ee4bd34629
Author: Richard Henderson <rth at twiddle.net>
Date:   Sun Mar 25 21:21:46 2012 +0200

    tcg-sparc: Clean up cruft stemming from attempts to use global registers.
    
    Don't use -ffixed-gN.  Don't link statically.  Don't save/restore
    AREG0 around calls.  Don't allocate space on the stack for AREG0 save.
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>

diff --git a/configure b/configure
index 0dc4170..1b86517 100755
--- a/configure
+++ b/configure
@@ -866,19 +866,11 @@ case "$cpu" in
     sparc)
            LDFLAGS="-m32 $LDFLAGS"
            QEMU_CFLAGS="-m32 -mcpu=ultrasparc $QEMU_CFLAGS"
-           QEMU_CFLAGS="-ffixed-g2 -ffixed-g3 $QEMU_CFLAGS"
-           if test "$solaris" = "no" ; then
-             QEMU_CFLAGS="-ffixed-g1 -ffixed-g6 $QEMU_CFLAGS"
-           fi
            host_guest_base="yes"
            ;;
     sparc64)
            LDFLAGS="-m64 $LDFLAGS"
            QEMU_CFLAGS="-m64 -mcpu=ultrasparc $QEMU_CFLAGS"
-           QEMU_CFLAGS="-ffixed-g5 -ffixed-g6 -ffixed-g7 $QEMU_CFLAGS"
-           if test "$solaris" != "no" ; then
-             QEMU_CFLAGS="-ffixed-g1 $QEMU_CFLAGS"
-           fi
            host_guest_base="yes"
            ;;
     s390)
@@ -4101,10 +4093,6 @@ fi
 
 if test "$target_linux_user" = "yes" -o "$target_bsd_user" = "yes" ; then
   case "$ARCH" in
-  sparc)
-    # -static is used to avoid g1/g3 usage by the dynamic linker
-    ldflags="$linker_script -static $ldflags"
-    ;;
   alpha | s390x)
     # The default placement of the application is fine.
     ;;
diff --git a/tcg/sparc/tcg-target.c b/tcg/sparc/tcg-target.c
index 9ab5746..e625aa3 100644
--- a/tcg/sparc/tcg-target.c
+++ b/tcg/sparc/tcg-target.c
@@ -161,7 +161,6 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_O0);
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_O1);
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_O2);
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_O3);
         break;
     case 'I':
         ct->ct |= TCG_CT_CONST_S11;
@@ -681,11 +680,22 @@ static void tcg_out_setcond2_i32(TCGContext *s, TCGCond cond, TCGArg ret,
 /* Generate global QEMU prologue and epilogue code */
 static void tcg_target_qemu_prologue(TCGContext *s)
 {
-    tcg_set_frame(s, TCG_REG_I6, TCG_TARGET_CALL_STACK_OFFSET,
-                  CPU_TEMP_BUF_NLONGS * (int)sizeof(long));
+    int tmp_buf_size, frame_size;
+
+    /* The TCG temp buffer is at the top of the frame, immediately
+       below the frame pointer.  */
+    tmp_buf_size = CPU_TEMP_BUF_NLONGS * (int)sizeof(long);
+    tcg_set_frame(s, TCG_REG_I6, TCG_TARGET_STACK_BIAS - tmp_buf_size,
+                  tmp_buf_size);
+
+    /* TCG_TARGET_CALL_STACK_OFFSET includes the stack bias, but is
+       otherwise the minimal frame usable by callees.  */
+    frame_size = TCG_TARGET_CALL_STACK_OFFSET - TCG_TARGET_STACK_BIAS;
+    frame_size += TCG_STATIC_CALL_ARGS_SIZE + tmp_buf_size;
+    frame_size += TCG_TARGET_STACK_ALIGN - 1;
+    frame_size &= -TCG_TARGET_STACK_ALIGN;
     tcg_out32(s, SAVE | INSN_RD(TCG_REG_O6) | INSN_RS1(TCG_REG_O6) |
-              INSN_IMM13(-(TCG_TARGET_STACK_MINFRAME +
-                           CPU_TEMP_BUF_NLONGS * (int)sizeof(long))));
+              INSN_IMM13(-frame_size));
 
 #ifdef CONFIG_USE_GUEST_BASE
     if (GUEST_BASE != 0) {
@@ -698,6 +708,8 @@ static void tcg_target_qemu_prologue(TCGContext *s)
               INSN_RS2(TCG_REG_G0));
     /* delay slot */
     tcg_out_nop(s);
+
+    /* No epilogue required.  We issue ret + restore directly in the TB.  */
 }
 
 #if defined(CONFIG_SOFTMMU)
@@ -880,12 +892,6 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int sizeop)
     tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[n++],
                 args[addrlo_idx]);
 
-    /* Store AREG0 in stack to avoid ugly glibc bugs that mangle
-       global registers */
-    tcg_out_st(s, TCG_TYPE_REG, TCG_AREG0, TCG_REG_CALL_STACK,
-               TCG_TARGET_CALL_STACK_OFFSET - TCG_STATIC_CALL_ARGS_SIZE -
-               sizeof(long));
-
     /* qemu_ld_helper[s_bits](arg0, arg1) */
     tcg_out32(s, CALL | ((((tcg_target_ulong)qemu_ld_helpers[s_bits]
                            - (tcg_target_ulong)s->code_ptr) >> 2)
@@ -893,11 +899,6 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int sizeop)
     /* delay slot */
     tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[n], memi);
 
-    /* Reload AREG0.  */
-    tcg_out_ld(s, TCG_TYPE_REG, TCG_AREG0, TCG_REG_CALL_STACK,
-               TCG_TARGET_CALL_STACK_OFFSET - TCG_STATIC_CALL_ARGS_SIZE -
-               sizeof(long));
-
     n = tcg_target_call_oarg_regs[0];
     /* datalo = sign_extend(arg0) */
     switch (sizeop) {
@@ -1011,12 +1012,6 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int sizeop)
     }
     tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[n++], datalo);
 
-    /* Store AREG0 in stack to avoid ugly glibc bugs that mangle
-       global registers */
-    tcg_out_st(s, TCG_TYPE_REG, TCG_AREG0, TCG_REG_CALL_STACK,
-               TCG_TARGET_CALL_STACK_OFFSET - TCG_STATIC_CALL_ARGS_SIZE -
-               sizeof(long));
-
     /* qemu_st_helper[s_bits](arg0, arg1, arg2) */
     tcg_out32(s, CALL | ((((tcg_target_ulong)qemu_st_helpers[sizeop]
                            - (tcg_target_ulong)s->code_ptr) >> 2)
@@ -1024,11 +1019,6 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int sizeop)
     /* delay slot */
     tcg_out_movi(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[n], memi);
 
-    /* Reload AREG0.  */
-    tcg_out_ld(s, TCG_TYPE_REG, TCG_AREG0, TCG_REG_CALL_STACK,
-               TCG_TARGET_CALL_STACK_OFFSET - TCG_STATIC_CALL_ARGS_SIZE -
-               sizeof(long));
-
     *label_ptr |= INSN_OFF19((unsigned long)s->code_ptr -
                              (unsigned long)label_ptr);
 #else
@@ -1091,15 +1081,8 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
             tcg_out32(s, JMPL | INSN_RD(TCG_REG_O7) | INSN_RS1(TCG_REG_I5) |
                       INSN_RS2(TCG_REG_G0));
         }
-        /* Store AREG0 in stack to avoid ugly glibc bugs that mangle
-           global registers */
-        // delay slot
-        tcg_out_st(s, TCG_TYPE_REG, TCG_AREG0, TCG_REG_CALL_STACK,
-                   TCG_TARGET_CALL_STACK_OFFSET - TCG_STATIC_CALL_ARGS_SIZE -
-                   sizeof(long));
-        tcg_out_ld(s, TCG_TYPE_REG, TCG_AREG0, TCG_REG_CALL_STACK,
-                   TCG_TARGET_CALL_STACK_OFFSET - TCG_STATIC_CALL_ARGS_SIZE -
-                   sizeof(long));
+        /* delay slot */
+        tcg_out_nop(s);
         break;
     case INDEX_op_jmp:
     case INDEX_op_br:
diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h
index ee154d0..6314ffb 100644
--- a/tcg/sparc/tcg-target.h
+++ b/tcg/sparc/tcg-target.h
@@ -66,20 +66,16 @@ typedef enum {
 #define TCG_CT_CONST_S13 0x200
 
 /* used for function call generation */
-#define TCG_REG_CALL_STACK TCG_REG_I6
+#define TCG_REG_CALL_STACK TCG_REG_O6
 
 #if TCG_TARGET_REG_BITS == 64
-// Reserve space for AREG0
-#define TCG_TARGET_STACK_MINFRAME (176 + 4 * (int)sizeof(long) + \
-                                   TCG_STATIC_CALL_ARGS_SIZE)
-#define TCG_TARGET_CALL_STACK_OFFSET (2047 - 16)
-#define TCG_TARGET_STACK_ALIGN 16
+#define TCG_TARGET_STACK_BIAS           2047
+#define TCG_TARGET_STACK_ALIGN          16
+#define TCG_TARGET_CALL_STACK_OFFSET    (128 + 6*8 + TCG_TARGET_STACK_BIAS)
 #else
-// AREG0 + one word for alignment
-#define TCG_TARGET_STACK_MINFRAME (92 + (2 + 1) * (int)sizeof(long) + \
-                                   TCG_STATIC_CALL_ARGS_SIZE)
-#define TCG_TARGET_CALL_STACK_OFFSET TCG_TARGET_STACK_MINFRAME
-#define TCG_TARGET_STACK_ALIGN 8
+#define TCG_TARGET_STACK_BIAS           0
+#define TCG_TARGET_STACK_ALIGN          8
+#define TCG_TARGET_CALL_STACK_OFFSET    (64 + 4 + 6*4)
 #endif
 
 #if TCG_TARGET_REG_BITS == 64
commit 0c554161b6fadf75e42d4228e952ff100176ff8a
Author: Richard Henderson <rth at twiddle.net>
Date:   Sun Mar 25 19:52:11 2012 +0200

    tcg-sparc: Change AREG0 in generated code to %i0.
    
    We can now move the TCG variable from %g[56] to a call-preserved
    windowed register.
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>

diff --git a/tcg/sparc/tcg-target.c b/tcg/sparc/tcg-target.c
index 5acfeba..9ab5746 100644
--- a/tcg/sparc/tcg-target.c
+++ b/tcg/sparc/tcg-target.c
@@ -696,7 +696,8 @@ static void tcg_target_qemu_prologue(TCGContext *s)
 
     tcg_out32(s, JMPL | INSN_RD(TCG_REG_G0) | INSN_RS1(TCG_REG_I1) |
               INSN_RS2(TCG_REG_G0));
-    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_I0);
+    /* delay slot */
+    tcg_out_nop(s);
 }
 
 #if defined(CONFIG_SOFTMMU)
diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h
index 99e9f57..ee154d0 100644
--- a/tcg/sparc/tcg-target.h
+++ b/tcg/sparc/tcg-target.h
@@ -130,13 +130,7 @@ typedef enum {
 
 #define TCG_TARGET_HAS_GUEST_BASE
 
-#ifdef CONFIG_SOLARIS
-#define TCG_AREG0 TCG_REG_G2
-#elif HOST_LONG_BITS == 64
-#define TCG_AREG0 TCG_REG_G5
-#else
-#define TCG_AREG0 TCG_REG_G6
-#endif
+#define TCG_AREG0 TCG_REG_I0
 
 static inline void flush_icache_range(tcg_target_ulong start,
                                       tcg_target_ulong stop)
commit c6f7e4fb9a0f82bb826f4bd7dff6f2c90d6a83be
Author: Richard Henderson <rth at twiddle.net>
Date:   Sat Mar 24 22:11:25 2012 +0100

    tcg-sparc: Support GUEST_BASE.
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>

diff --git a/configure b/configure
index df888f2..0dc4170 100755
--- a/configure
+++ b/configure
@@ -870,6 +870,7 @@ case "$cpu" in
            if test "$solaris" = "no" ; then
              QEMU_CFLAGS="-ffixed-g1 -ffixed-g6 $QEMU_CFLAGS"
            fi
+           host_guest_base="yes"
            ;;
     sparc64)
            LDFLAGS="-m64 $LDFLAGS"
@@ -878,6 +879,7 @@ case "$cpu" in
            if test "$solaris" != "no" ; then
              QEMU_CFLAGS="-ffixed-g1 $QEMU_CFLAGS"
            fi
+           host_guest_base="yes"
            ;;
     s390)
            QEMU_CFLAGS="-m31 -march=z990 $QEMU_CFLAGS"
diff --git a/tcg/sparc/tcg-target.c b/tcg/sparc/tcg-target.c
index d89c19b..5acfeba 100644
--- a/tcg/sparc/tcg-target.c
+++ b/tcg/sparc/tcg-target.c
@@ -59,6 +59,12 @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
 };
 #endif
 
+#ifdef CONFIG_USE_GUEST_BASE
+# define TCG_GUEST_BASE_REG TCG_REG_I3
+#else
+# define TCG_GUEST_BASE_REG TCG_REG_G0
+#endif
+
 static const int tcg_target_reg_alloc_order[] = {
     TCG_REG_L0,
     TCG_REG_L1,
@@ -680,6 +686,14 @@ static void tcg_target_qemu_prologue(TCGContext *s)
     tcg_out32(s, SAVE | INSN_RD(TCG_REG_O6) | INSN_RS1(TCG_REG_O6) |
               INSN_IMM13(-(TCG_TARGET_STACK_MINFRAME +
                            CPU_TEMP_BUF_NLONGS * (int)sizeof(long))));
+
+#ifdef CONFIG_USE_GUEST_BASE
+    if (GUEST_BASE != 0) {
+        tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, GUEST_BASE);
+        tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
+    }
+#endif
+
     tcg_out32(s, JMPL | INSN_RD(TCG_REG_G0) | INSN_RS1(TCG_REG_I1) |
               INSN_RS2(TCG_REG_G0));
     tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_I0);
@@ -925,14 +939,18 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int sizeop)
     if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) {
         int reg64 = (datalo < 16 ? datalo : TCG_REG_O0);
 
-        tcg_out_ldst_rr(s, reg64, addr_reg, TCG_REG_G0, qemu_ld_opc[sizeop]);
+        tcg_out_ldst_rr(s, reg64, addr_reg,
+                        (GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_G0),
+                        qemu_ld_opc[sizeop]);
 
         tcg_out_arithi(s, datahi, reg64, 32, SHIFT_SRLX);
         if (reg64 != datalo) {
             tcg_out_mov(s, TCG_TYPE_I32, datalo, reg64);
         }
     } else {
-        tcg_out_ldst_rr(s, datalo, addr_reg, TCG_REG_G0, qemu_ld_opc[sizeop]);
+        tcg_out_ldst_rr(s, datalo, addr_reg,
+                        (GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_G0),
+                        qemu_ld_opc[sizeop]);
     }
 #endif /* CONFIG_SOFTMMU */
 }
@@ -1026,7 +1044,9 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int sizeop)
         tcg_out_arith(s, TCG_REG_G1, TCG_REG_G1, TCG_REG_O2, ARITH_OR);
         datalo = TCG_REG_G1;
     }
-    tcg_out_ldst_rr(s, datalo, addr_reg, TCG_REG_G0, qemu_st_opc[sizeop]);
+    tcg_out_ldst_rr(s, datalo, addr_reg,
+                    (GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_G0),
+                    qemu_st_opc[sizeop]);
 #endif /* CONFIG_SOFTMMU */
 }
 
diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h
index adca1d2..99e9f57 100644
--- a/tcg/sparc/tcg-target.h
+++ b/tcg/sparc/tcg-target.h
@@ -128,6 +128,8 @@ typedef enum {
 #define TCG_TARGET_HAS_movcond_i64      0
 #endif
 
+#define TCG_TARGET_HAS_GUEST_BASE
+
 #ifdef CONFIG_SOLARIS
 #define TCG_AREG0 TCG_REG_G2
 #elif HOST_LONG_BITS == 64
commit a0ce341aac61ca3d56bcb56dcfb6ac11272fd567
Author: Richard Henderson <rth at twiddle.net>
Date:   Fri Mar 23 23:27:39 2012 +0100

    tcg-sparc: Fix qemu_ld/st to handle 32-bit host.
    
    At the same time, split out the tlb load logic to a new function.
    Fixes the cases of two data registers and two address registers.
    Fixes the signature of, and adds missing, qemu_ld/st opcodes.
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>

diff --git a/tcg/sparc/tcg-target.c b/tcg/sparc/tcg-target.c
index 23c2fda..d89c19b 100644
--- a/tcg/sparc/tcg-target.c
+++ b/tcg/sparc/tcg-target.c
@@ -59,8 +59,6 @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
 };
 #endif
 
-#define ARG_OFFSET 1
-
 static const int tcg_target_reg_alloc_order[] = {
     TCG_REG_L0,
     TCG_REG_L1,
@@ -288,6 +286,16 @@ static inline int tcg_target_const_match(tcg_target_long val,
 #define ASI_PRIMARY_LITTLE 0x88
 #endif
 
+#define LDUH_LE    (LDUHA | INSN_ASI(ASI_PRIMARY_LITTLE))
+#define LDSH_LE    (LDSHA | INSN_ASI(ASI_PRIMARY_LITTLE))
+#define LDUW_LE    (LDUWA | INSN_ASI(ASI_PRIMARY_LITTLE))
+#define LDSW_LE    (LDSWA | INSN_ASI(ASI_PRIMARY_LITTLE))
+#define LDX_LE     (LDXA  | INSN_ASI(ASI_PRIMARY_LITTLE))
+
+#define STH_LE     (STHA  | INSN_ASI(ASI_PRIMARY_LITTLE))
+#define STW_LE     (STWA  | INSN_ASI(ASI_PRIMARY_LITTLE))
+#define STX_LE     (STXA  | INSN_ASI(ASI_PRIMARY_LITTLE))
+
 static inline void tcg_out_arith(TCGContext *s, int rd, int rs1, int rs2,
                                  int op)
 {
@@ -360,64 +368,43 @@ static inline void tcg_out_movi(TCGContext *s, TCGType type,
     }
 }
 
-static inline void tcg_out_ld_raw(TCGContext *s, int ret,
-                                  tcg_target_long arg)
-{
-    tcg_out_sethi(s, ret, arg);
-    tcg_out32(s, LDUW | INSN_RD(ret) | INSN_RS1(ret) |
-              INSN_IMM13(arg & 0x3ff));
-}
-
-static inline void tcg_out_ld_ptr(TCGContext *s, int ret,
-                                  tcg_target_long arg)
+static inline void tcg_out_ldst_rr(TCGContext *s, int data, int a1,
+                                   int a2, int op)
 {
-    if (!check_fit_tl(arg, 10))
-        tcg_out_movi(s, TCG_TYPE_PTR, ret, arg & ~0x3ffULL);
-    if (TCG_TARGET_REG_BITS == 64) {
-        tcg_out32(s, LDX | INSN_RD(ret) | INSN_RS1(ret) |
-                  INSN_IMM13(arg & 0x3ff));
-    } else {
-        tcg_out32(s, LDUW | INSN_RD(ret) | INSN_RS1(ret) |
-                  INSN_IMM13(arg & 0x3ff));
-    }
+    tcg_out32(s, op | INSN_RD(data) | INSN_RS1(a1) | INSN_RS2(a2));
 }
 
-static inline void tcg_out_ldst(TCGContext *s, int ret, int addr, int offset, int op)
+static inline void tcg_out_ldst(TCGContext *s, int ret, int addr,
+                                int offset, int op)
 {
-    if (check_fit_tl(offset, 13))
+    if (check_fit_tl(offset, 13)) {
         tcg_out32(s, op | INSN_RD(ret) | INSN_RS1(addr) |
                   INSN_IMM13(offset));
-    else {
+    } else {
         tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_I5, offset);
-        tcg_out32(s, op | INSN_RD(ret) | INSN_RS1(TCG_REG_I5) |
-                  INSN_RS2(addr));
+        tcg_out_ldst_rr(s, ret, addr, TCG_REG_I5, op);
     }
 }
 
-static inline void tcg_out_ldst_asi(TCGContext *s, int ret, int addr,
-                                    int offset, int op, int asi)
-{
-    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_I5, offset);
-    tcg_out32(s, op | INSN_RD(ret) | INSN_RS1(TCG_REG_I5) |
-              INSN_ASI(asi) | INSN_RS2(addr));
-}
-
 static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
                               TCGReg arg1, tcg_target_long arg2)
 {
-    if (type == TCG_TYPE_I32)
-        tcg_out_ldst(s, ret, arg1, arg2, LDUW);
-    else
-        tcg_out_ldst(s, ret, arg1, arg2, LDX);
+    tcg_out_ldst(s, ret, arg1, arg2, (type == TCG_TYPE_I32 ? LDUW : LDX));
 }
 
 static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
                               TCGReg arg1, tcg_target_long arg2)
 {
-    if (type == TCG_TYPE_I32)
-        tcg_out_ldst(s, arg, arg1, arg2, STW);
-    else
-        tcg_out_ldst(s, arg, arg1, arg2, STX);
+    tcg_out_ldst(s, arg, arg1, arg2, (type == TCG_TYPE_I32 ? STW : STX));
+}
+
+static inline void tcg_out_ld_ptr(TCGContext *s, int ret,
+                                  tcg_target_long arg)
+{
+    if (!check_fit_tl(arg, 10)) {
+        tcg_out_movi(s, TCG_TYPE_PTR, ret, arg & ~0x3ff);
+    }
+    tcg_out_ld(s, TCG_TYPE_PTR, ret, ret, arg & 0x3ff);
 }
 
 static inline void tcg_out_sety(TCGContext *s, int rs)
@@ -442,14 +429,15 @@ static inline void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)
     }
 }
 
-static inline void tcg_out_andi(TCGContext *s, int reg, tcg_target_long val)
+static inline void tcg_out_andi(TCGContext *s, int rd, int rs,
+                                tcg_target_long val)
 {
     if (val != 0) {
         if (check_fit_tl(val, 13))
-            tcg_out_arithi(s, reg, reg, val, ARITH_AND);
+            tcg_out_arithi(s, rd, rs, val, ARITH_AND);
         else {
             tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_I5, val);
-            tcg_out_arith(s, reg, reg, TCG_REG_I5, ARITH_AND);
+            tcg_out_arith(s, rd, rs, TCG_REG_I5, ARITH_AND);
         }
     }
 }
@@ -718,418 +706,328 @@ static const void * const qemu_st_helpers[4] = {
     helper_stl_mmu,
     helper_stq_mmu,
 };
-#endif
 
-#if TARGET_LONG_BITS == 32
-#define TARGET_LD_OP LDUW
-#else
-#define TARGET_LD_OP LDX
-#endif
+/* Perform the TLB load and compare.
 
-#if defined(CONFIG_SOFTMMU)
-#if HOST_LONG_BITS == 32
-#define TARGET_ADDEND_LD_OP LDUW
+   Inputs:
+   ADDRLO_IDX contains the index into ARGS of the low part of the
+   address; the high part of the address is at ADDR_LOW_IDX+1.
+
+   MEM_INDEX and S_BITS are the memory context and log2 size of the load.
+
+   WHICH is the offset into the CPUTLBEntry structure of the slot to read.
+   This should be offsetof addr_read or addr_write.
+
+   The result of the TLB comparison is in %[ix]cc.  The sanitized address
+   is in the returned register, maybe %o0.  The TLB addend is in %o1.  */
+
+static int tcg_out_tlb_load(TCGContext *s, int addrlo_idx, int mem_index,
+                            int s_bits, const TCGArg *args, int which)
+{
+    const int addrlo = args[addrlo_idx];
+    const int r0 = TCG_REG_O0;
+    const int r1 = TCG_REG_O1;
+    const int r2 = TCG_REG_O2;
+    int addr = addrlo;
+    int tlb_ofs;
+
+    if (TCG_TARGET_REG_BITS == 32 && TARGET_LONG_BITS == 64) {
+        /* Assemble the 64-bit address in R0.  */
+        tcg_out_arithi(s, r0, addrlo, 0, SHIFT_SRL);
+        tcg_out_arithi(s, r1, args[addrlo_idx + 1], 32, SHIFT_SLLX);
+        tcg_out_arith(s, r0, r0, r1, ARITH_OR);
+    }
+
+    /* Shift the page number down to tlb-entry.  */
+    tcg_out_arithi(s, r1, addrlo,
+                   TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS, SHIFT_SRL);
+
+    /* Mask out the page offset, except for the required alignment.  */
+    tcg_out_andi(s, r0, addr, TARGET_PAGE_MASK | ((1 << s_bits) - 1));
+
+    /* Compute tlb index, modulo tlb size.  */
+    tcg_out_andi(s, r1, r1, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS);
+
+    /* Relative to the current ENV.  */
+    tcg_out_arith(s, r1, TCG_AREG0, r1, ARITH_ADD);
+
+    /* Find a base address that can load both tlb comparator and addend.  */
+    tlb_ofs = offsetof(CPUArchState, tlb_table[mem_index][0]);
+    if (!check_fit_tl(tlb_ofs + sizeof(CPUTLBEntry), 13)) {
+        tcg_out_addi(s, r1, tlb_ofs);
+        tlb_ofs = 0;
+    }
+
+    /* Load the tlb comparator and the addend.  */
+    tcg_out_ld(s, TCG_TYPE_TL, r2, r1, tlb_ofs + which);
+    tcg_out_ld(s, TCG_TYPE_PTR, r1, r1, tlb_ofs+offsetof(CPUTLBEntry, addend));
+
+    /* subcc arg0, arg2, %g0 */
+    tcg_out_cmp(s, r0, r2, 0);
+
+    /* If the guest address must be zero-extended, do so now.  */
+    if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 32) {
+        tcg_out_arithi(s, r0, addrlo, 0, SHIFT_SRL);
+        return r0;
+    }
+    return addrlo;
+}
+#endif /* CONFIG_SOFTMMU */
+
+static const int qemu_ld_opc[8] = {
+#ifdef TARGET_WORDS_BIGENDIAN
+    LDUB, LDUH, LDUW, LDX, LDSB, LDSH, LDSW, LDX
 #else
-#define TARGET_ADDEND_LD_OP LDX
-#endif
+    LDUB, LDUH_LE, LDUW_LE, LDX_LE, LDSB, LDSH_LE, LDSW_LE, LDX_LE
 #endif
+};
 
-#if TCG_TARGET_REG_BITS == 64
-#define HOST_LD_OP LDX
-#define HOST_ST_OP STX
-#define HOST_SLL_OP SHIFT_SLLX
-#define HOST_SRA_OP SHIFT_SRAX
+static const int qemu_st_opc[4] = {
+#ifdef TARGET_WORDS_BIGENDIAN
+    STB, STH, STW, STX
 #else
-#define HOST_LD_OP LDUW
-#define HOST_ST_OP STW
-#define HOST_SLL_OP SHIFT_SLL
-#define HOST_SRA_OP SHIFT_SRA
+    STB, STH_LE, STW_LE, STX_LE
 #endif
+};
 
-static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
-                            int opc)
+static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int sizeop)
 {
-    int addr_reg, data_reg, arg0, arg1, arg2, mem_index, s_bits;
+    int addrlo_idx = 1, datalo, datahi, addr_reg;
 #if defined(CONFIG_SOFTMMU)
-    uint32_t *label1_ptr, *label2_ptr;
+    int memi_idx, memi, s_bits, n;
+    uint32_t *label_ptr[2];
 #endif
 
-    data_reg = *args++;
-    addr_reg = *args++;
-    mem_index = *args;
-    s_bits = opc & 3;
-
-    arg0 = TCG_REG_O0;
-    arg1 = TCG_REG_O1;
-    arg2 = TCG_REG_O2;
+    datahi = datalo = args[0];
+    if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) {
+        datahi = args[1];
+        addrlo_idx = 2;
+    }
 
 #if defined(CONFIG_SOFTMMU)
-    /* srl addr_reg, x, arg1 */
-    tcg_out_arithi(s, arg1, addr_reg, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS,
-                   SHIFT_SRL);
-    /* and addr_reg, x, arg0 */
-    tcg_out_arithi(s, arg0, addr_reg, TARGET_PAGE_MASK | ((1 << s_bits) - 1),
-                   ARITH_AND);
+    memi_idx = addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS);
+    memi = args[memi_idx];
+    s_bits = sizeop & 3;
+
+    addr_reg = tcg_out_tlb_load(s, addrlo_idx, memi, s_bits, args,
+                                offsetof(CPUTLBEntry, addr_read));
+
+    if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) {
+        int reg64;
+
+        /* bne,pn %[xi]cc, label0 */
+        label_ptr[0] = (uint32_t *)s->code_ptr;
+        tcg_out32(s, (INSN_OP(0) | INSN_COND(COND_NE, 0) | INSN_OP2(0x1)
+                      | ((TARGET_LONG_BITS == 64) << 21)));
+
+        /* TLB Hit.  */
+        /* Load all 64-bits into an O/G register.  */
+        reg64 = (datalo < 16 ? datalo : TCG_REG_O0);
+        tcg_out_ldst_rr(s, reg64, addr_reg, TCG_REG_O1, qemu_ld_opc[sizeop]);
+
+        /* Move the two 32-bit pieces into the destination registers.  */
+        tcg_out_arithi(s, datahi, reg64, 32, SHIFT_SRLX);
+        if (reg64 != datalo) {
+            tcg_out_mov(s, TCG_TYPE_I32, datalo, reg64);
+        }
 
-    /* and arg1, x, arg1 */
-    tcg_out_andi(s, arg1, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS);
+        /* b,a,pt label1 */
+        label_ptr[1] = (uint32_t *)s->code_ptr;
+        tcg_out32(s, (INSN_OP(0) | INSN_COND(COND_A, 0) | INSN_OP2(0x1)
+                      | (1 << 29) | (1 << 19)));
+    } else {
+        /* The fast path is exactly one insn.  Thus we can perform the
+           entire TLB Hit in the (annulled) delay slot of the branch
+           over the TLB Miss case.  */
+
+        /* beq,a,pt %[xi]cc, label0 */
+        label_ptr[0] = NULL;
+        label_ptr[1] = (uint32_t *)s->code_ptr;
+        tcg_out32(s, (INSN_OP(0) | INSN_COND(COND_E, 0) | INSN_OP2(0x1)
+                      | ((TARGET_LONG_BITS == 64) << 21)
+                      | (1 << 29) | (1 << 19)));
+        /* delay slot */
+        tcg_out_ldst_rr(s, datalo, addr_reg, TCG_REG_O1, qemu_ld_opc[sizeop]);
+    }
 
-    /* add arg1, x, arg1 */
-    tcg_out_addi(s, arg1, offsetof(CPUArchState,
-                                   tlb_table[mem_index][0].addr_read));
+    /* TLB Miss.  */
 
-    /* add env, arg1, arg1 */
-    tcg_out_arith(s, arg1, TCG_AREG0, arg1, ARITH_ADD);
+    if (label_ptr[0]) {
+        *label_ptr[0] |= INSN_OFF19((unsigned long)s->code_ptr -
+                                    (unsigned long)label_ptr[0]);
+    }
+    n = 0;
+    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[n++], TCG_AREG0);
+    if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
+        tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[n++],
+                    args[addrlo_idx + 1]);
+    }
+    tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[n++],
+                args[addrlo_idx]);
 
-    /* ld [arg1], arg2 */
-    tcg_out32(s, TARGET_LD_OP | INSN_RD(arg2) | INSN_RS1(arg1) |
-              INSN_RS2(TCG_REG_G0));
+    /* Store AREG0 in stack to avoid ugly glibc bugs that mangle
+       global registers */
+    tcg_out_st(s, TCG_TYPE_REG, TCG_AREG0, TCG_REG_CALL_STACK,
+               TCG_TARGET_CALL_STACK_OFFSET - TCG_STATIC_CALL_ARGS_SIZE -
+               sizeof(long));
 
-    /* subcc arg0, arg2, %g0 */
-    tcg_out_arith(s, TCG_REG_G0, arg0, arg2, ARITH_SUBCC);
-
-    /* will become:
-       be label1
-        or
-       be,pt %xcc label1 */
-    label1_ptr = (uint32_t *)s->code_ptr;
-    tcg_out32(s, 0);
-
-    /* mov (delay slot) */
-    tcg_out_mov(s, TCG_TYPE_PTR, arg0, addr_reg);
-
-    /* mov */
-    tcg_out_movi(s, TCG_TYPE_I32, arg1, mem_index);
-    /* XXX/FIXME: suboptimal */
-    tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3],
-                tcg_target_call_iarg_regs[2]);
-    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
-                tcg_target_call_iarg_regs[1]);
-    tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
-                tcg_target_call_iarg_regs[0]);
-    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
-                TCG_AREG0);
-
-    /* XXX: move that code at the end of the TB */
     /* qemu_ld_helper[s_bits](arg0, arg1) */
     tcg_out32(s, CALL | ((((tcg_target_ulong)qemu_ld_helpers[s_bits]
                            - (tcg_target_ulong)s->code_ptr) >> 2)
                          & 0x3fffffff));
-    /* Store AREG0 in stack to avoid ugly glibc bugs that mangle
-       global registers */
-    // delay slot
-    tcg_out_ldst(s, TCG_AREG0, TCG_REG_CALL_STACK,
-                 TCG_TARGET_CALL_STACK_OFFSET - TCG_STATIC_CALL_ARGS_SIZE -
-                 sizeof(long), HOST_ST_OP);
-    tcg_out_ldst(s, TCG_AREG0, TCG_REG_CALL_STACK,
-                 TCG_TARGET_CALL_STACK_OFFSET - TCG_STATIC_CALL_ARGS_SIZE -
-                 sizeof(long), HOST_LD_OP);
-
-    /* data_reg = sign_extend(arg0) */
-    switch(opc) {
+    /* delay slot */
+    tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[n], memi);
+
+    /* Reload AREG0.  */
+    tcg_out_ld(s, TCG_TYPE_REG, TCG_AREG0, TCG_REG_CALL_STACK,
+               TCG_TARGET_CALL_STACK_OFFSET - TCG_STATIC_CALL_ARGS_SIZE -
+               sizeof(long));
+
+    n = tcg_target_call_oarg_regs[0];
+    /* datalo = sign_extend(arg0) */
+    switch (sizeop) {
     case 0 | 4:
-        /* sll arg0, 24/56, data_reg */
-        tcg_out_arithi(s, data_reg, arg0, (int)sizeof(tcg_target_long) * 8 - 8,
-                       HOST_SLL_OP);
-        /* sra data_reg, 24/56, data_reg */
-        tcg_out_arithi(s, data_reg, data_reg,
-                       (int)sizeof(tcg_target_long) * 8 - 8, HOST_SRA_OP);
+        /* Recall that SRA sign extends from bit 31 through bit 63.  */
+        tcg_out_arithi(s, datalo, n, 24, SHIFT_SLL);
+        tcg_out_arithi(s, datalo, datalo, 24, SHIFT_SRA);
         break;
     case 1 | 4:
-        /* sll arg0, 16/48, data_reg */
-        tcg_out_arithi(s, data_reg, arg0,
-                       (int)sizeof(tcg_target_long) * 8 - 16, HOST_SLL_OP);
-        /* sra data_reg, 16/48, data_reg */
-        tcg_out_arithi(s, data_reg, data_reg,
-                       (int)sizeof(tcg_target_long) * 8 - 16, HOST_SRA_OP);
+        tcg_out_arithi(s, datalo, n, 16, SHIFT_SLL);
+        tcg_out_arithi(s, datalo, datalo, 16, SHIFT_SRA);
         break;
     case 2 | 4:
-        /* sll arg0, 32, data_reg */
-        tcg_out_arithi(s, data_reg, arg0, 32, HOST_SLL_OP);
-        /* sra data_reg, 32, data_reg */
-        tcg_out_arithi(s, data_reg, data_reg, 32, HOST_SRA_OP);
+        tcg_out_arithi(s, datalo, n, 0, SHIFT_SRA);
         break;
+    case 3:
+        if (TCG_TARGET_REG_BITS == 32) {
+            tcg_out_mov(s, TCG_TYPE_REG, datahi, n);
+            tcg_out_mov(s, TCG_TYPE_REG, datalo, n + 1);
+            break;
+        }
+        /* FALLTHRU */
     case 0:
     case 1:
     case 2:
-    case 3:
     default:
         /* mov */
-        tcg_out_mov(s, TCG_TYPE_REG, data_reg, arg0);
+        tcg_out_mov(s, TCG_TYPE_REG, datalo, n);
         break;
     }
 
-    /* will become:
-       ba label2 */
-    label2_ptr = (uint32_t *)s->code_ptr;
-    tcg_out32(s, 0);
-
-    /* nop (delay slot */
-    tcg_out_nop(s);
-
-    /* label1: */
-#if TARGET_LONG_BITS == 32
-    /* be label1 */
-    *label1_ptr = (INSN_OP(0) | INSN_COND(COND_E, 0) | INSN_OP2(0x2) |
-                   INSN_OFF22((unsigned long)s->code_ptr -
-                              (unsigned long)label1_ptr));
-#else
-    /* be,pt %xcc label1 */
-    *label1_ptr = (INSN_OP(0) | INSN_COND(COND_E, 0) | INSN_OP2(0x1) |
-                   (0x5 << 19) | INSN_OFF19((unsigned long)s->code_ptr -
-                              (unsigned long)label1_ptr));
-#endif
-
-    /* ld [arg1 + x], arg1 */
-    tcg_out_ldst(s, arg1, arg1, offsetof(CPUTLBEntry, addend) -
-                 offsetof(CPUTLBEntry, addr_read), TARGET_ADDEND_LD_OP);
-
-#if TARGET_LONG_BITS == 32
-    /* and addr_reg, x, arg0 */
-    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_I5, 0xffffffff);
-    tcg_out_arith(s, arg0, addr_reg, TCG_REG_I5, ARITH_AND);
-    /* add arg0, arg1, arg0 */
-    tcg_out_arith(s, arg0, arg0, arg1, ARITH_ADD);
+    *label_ptr[1] |= INSN_OFF19((unsigned long)s->code_ptr -
+                                (unsigned long)label_ptr[1]);
 #else
-    /* add addr_reg, arg1, arg0 */
-    tcg_out_arith(s, arg0, addr_reg, arg1, ARITH_ADD);
-#endif
+    addr_reg = args[addrlo_idx];
+    if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 32) {
+        tcg_out_arithi(s, TCG_REG_I5, addr_reg, 0, SHIFT_SRL);
+        addr_reg = TCG_REG_I5;
+    }
+    if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) {
+        int reg64 = (datalo < 16 ? datalo : TCG_REG_O0);
 
-#else
-    arg0 = addr_reg;
-#endif
+        tcg_out_ldst_rr(s, reg64, addr_reg, TCG_REG_G0, qemu_ld_opc[sizeop]);
 
-    switch(opc) {
-    case 0:
-        /* ldub [arg0], data_reg */
-        tcg_out_ldst(s, data_reg, arg0, 0, LDUB);
-        break;
-    case 0 | 4:
-        /* ldsb [arg0], data_reg */
-        tcg_out_ldst(s, data_reg, arg0, 0, LDSB);
-        break;
-    case 1:
-#ifdef TARGET_WORDS_BIGENDIAN
-        /* lduh [arg0], data_reg */
-        tcg_out_ldst(s, data_reg, arg0, 0, LDUH);
-#else
-        /* lduha [arg0] ASI_PRIMARY_LITTLE, data_reg */
-        tcg_out_ldst_asi(s, data_reg, arg0, 0, LDUHA, ASI_PRIMARY_LITTLE);
-#endif
-        break;
-    case 1 | 4:
-#ifdef TARGET_WORDS_BIGENDIAN
-        /* ldsh [arg0], data_reg */
-        tcg_out_ldst(s, data_reg, arg0, 0, LDSH);
-#else
-        /* ldsha [arg0] ASI_PRIMARY_LITTLE, data_reg */
-        tcg_out_ldst_asi(s, data_reg, arg0, 0, LDSHA, ASI_PRIMARY_LITTLE);
-#endif
-        break;
-    case 2:
-#ifdef TARGET_WORDS_BIGENDIAN
-        /* lduw [arg0], data_reg */
-        tcg_out_ldst(s, data_reg, arg0, 0, LDUW);
-#else
-        /* lduwa [arg0] ASI_PRIMARY_LITTLE, data_reg */
-        tcg_out_ldst_asi(s, data_reg, arg0, 0, LDUWA, ASI_PRIMARY_LITTLE);
-#endif
-        break;
-    case 2 | 4:
-#ifdef TARGET_WORDS_BIGENDIAN
-        /* ldsw [arg0], data_reg */
-        tcg_out_ldst(s, data_reg, arg0, 0, LDSW);
-#else
-        /* ldswa [arg0] ASI_PRIMARY_LITTLE, data_reg */
-        tcg_out_ldst_asi(s, data_reg, arg0, 0, LDSWA, ASI_PRIMARY_LITTLE);
-#endif
-        break;
-    case 3:
-#ifdef TARGET_WORDS_BIGENDIAN
-        /* ldx [arg0], data_reg */
-        tcg_out_ldst(s, data_reg, arg0, 0, LDX);
-#else
-        /* ldxa [arg0] ASI_PRIMARY_LITTLE, data_reg */
-        tcg_out_ldst_asi(s, data_reg, arg0, 0, LDXA, ASI_PRIMARY_LITTLE);
-#endif
-        break;
-    default:
-        tcg_abort();
+        tcg_out_arithi(s, datahi, reg64, 32, SHIFT_SRLX);
+        if (reg64 != datalo) {
+            tcg_out_mov(s, TCG_TYPE_I32, datalo, reg64);
+        }
+    } else {
+        tcg_out_ldst_rr(s, datalo, addr_reg, TCG_REG_G0, qemu_ld_opc[sizeop]);
     }
-
-#if defined(CONFIG_SOFTMMU)
-    /* label2: */
-    *label2_ptr = (INSN_OP(0) | INSN_COND(COND_A, 0) | INSN_OP2(0x2) |
-                   INSN_OFF22((unsigned long)s->code_ptr -
-                              (unsigned long)label2_ptr));
-#endif
+#endif /* CONFIG_SOFTMMU */
 }
 
-static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
-                            int opc)
+static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int sizeop)
 {
-    int addr_reg, data_reg, arg0, arg1, arg2, mem_index, s_bits;
+    int addrlo_idx = 1, datalo, datahi, addr_reg;
 #if defined(CONFIG_SOFTMMU)
-    uint32_t *label1_ptr, *label2_ptr;
+    int memi_idx, memi, n;
+    uint32_t *label_ptr;
 #endif
 
-    data_reg = *args++;
-    addr_reg = *args++;
-    mem_index = *args;
-
-    s_bits = opc;
-
-    arg0 = TCG_REG_O0;
-    arg1 = TCG_REG_O1;
-    arg2 = TCG_REG_O2;
+    datahi = datalo = args[0];
+    if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) {
+        datahi = args[1];
+        addrlo_idx = 2;
+    }
 
 #if defined(CONFIG_SOFTMMU)
-    /* srl addr_reg, x, arg1 */
-    tcg_out_arithi(s, arg1, addr_reg, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS,
-                   SHIFT_SRL);
-
-    /* and addr_reg, x, arg0 */
-    tcg_out_arithi(s, arg0, addr_reg, TARGET_PAGE_MASK | ((1 << s_bits) - 1),
-                   ARITH_AND);
-
-    /* and arg1, x, arg1 */
-    tcg_out_andi(s, arg1, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS);
-
-    /* add arg1, x, arg1 */
-    tcg_out_addi(s, arg1, offsetof(CPUArchState,
-                                   tlb_table[mem_index][0].addr_write));
+    memi_idx = addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS);
+    memi = args[memi_idx];
+
+    addr_reg = tcg_out_tlb_load(s, addrlo_idx, memi, sizeop, args,
+                                offsetof(CPUTLBEntry, addr_write));
+
+    if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) {
+        /* Reconstruct the full 64-bit value in %g1, using %o2 as temp.  */
+        /* ??? Redefine the temps from %i4/%i5 so that we have a o/g temp. */
+        tcg_out_arithi(s, TCG_REG_G1, datalo, 0, SHIFT_SRL);
+        tcg_out_arithi(s, TCG_REG_O2, datahi, 32, SHIFT_SLLX);
+        tcg_out_arith(s, TCG_REG_G1, TCG_REG_G1, TCG_REG_O2, ARITH_OR);
+        datalo = TCG_REG_G1;
+    }
 
-    /* add env, arg1, arg1 */
-    tcg_out_arith(s, arg1, TCG_AREG0, arg1, ARITH_ADD);
+    /* The fast path is exactly one insn.  Thus we can perform the entire
+       TLB Hit in the (annulled) delay slot of the branch over TLB Miss.  */
+    /* beq,a,pt %[xi]cc, label0 */
+    label_ptr = (uint32_t *)s->code_ptr;
+    tcg_out32(s, (INSN_OP(0) | INSN_COND(COND_E, 0) | INSN_OP2(0x1)
+                  | ((TARGET_LONG_BITS == 64) << 21)
+                  | (1 << 29) | (1 << 19)));
+    /* delay slot */
+    tcg_out_ldst_rr(s, datalo, addr_reg, TCG_REG_O1, qemu_st_opc[sizeop]);
+
+    /* TLB Miss.  */
+
+    n = 0;
+    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[n++], TCG_AREG0);
+    if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
+        tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[n++],
+                    args[addrlo_idx + 1]);
+    }
+    tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[n++],
+                args[addrlo_idx]);
+    if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) {
+        tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[n++], datahi);
+    }
+    tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[n++], datalo);
 
-    /* ld [arg1], arg2 */
-    tcg_out32(s, TARGET_LD_OP | INSN_RD(arg2) | INSN_RS1(arg1) |
-              INSN_RS2(TCG_REG_G0));
+    /* Store AREG0 in stack to avoid ugly glibc bugs that mangle
+       global registers */
+    tcg_out_st(s, TCG_TYPE_REG, TCG_AREG0, TCG_REG_CALL_STACK,
+               TCG_TARGET_CALL_STACK_OFFSET - TCG_STATIC_CALL_ARGS_SIZE -
+               sizeof(long));
 
-    /* subcc arg0, arg2, %g0 */
-    tcg_out_arith(s, TCG_REG_G0, arg0, arg2, ARITH_SUBCC);
-
-    /* will become:
-       be label1
-        or
-       be,pt %xcc label1 */
-    label1_ptr = (uint32_t *)s->code_ptr;
-    tcg_out32(s, 0);
-
-    /* mov (delay slot) */
-    tcg_out_mov(s, TCG_TYPE_PTR, arg0, addr_reg);
-
-    /* mov */
-    tcg_out_mov(s, TCG_TYPE_REG, arg1, data_reg);
-
-    /* mov */
-    tcg_out_movi(s, TCG_TYPE_I32, arg2, mem_index);
-
-    /* XXX/FIXME: suboptimal */
-    tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3],
-                tcg_target_call_iarg_regs[2]);
-    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
-                tcg_target_call_iarg_regs[1]);
-    tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
-                tcg_target_call_iarg_regs[0]);
-    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
-                TCG_AREG0);
-    /* XXX: move that code at the end of the TB */
     /* qemu_st_helper[s_bits](arg0, arg1, arg2) */
-    tcg_out32(s, CALL | ((((tcg_target_ulong)qemu_st_helpers[s_bits]
+    tcg_out32(s, CALL | ((((tcg_target_ulong)qemu_st_helpers[sizeop]
                            - (tcg_target_ulong)s->code_ptr) >> 2)
                          & 0x3fffffff));
-    /* Store AREG0 in stack to avoid ugly glibc bugs that mangle
-       global registers */
-    // delay slot
-    tcg_out_ldst(s, TCG_AREG0, TCG_REG_CALL_STACK,
-                 TCG_TARGET_CALL_STACK_OFFSET - TCG_STATIC_CALL_ARGS_SIZE -
-                 sizeof(long), HOST_ST_OP);
-    tcg_out_ldst(s, TCG_AREG0, TCG_REG_CALL_STACK,
-                 TCG_TARGET_CALL_STACK_OFFSET - TCG_STATIC_CALL_ARGS_SIZE -
-                 sizeof(long), HOST_LD_OP);
-
-    /* will become:
-       ba label2 */
-    label2_ptr = (uint32_t *)s->code_ptr;
-    tcg_out32(s, 0);
-
-    /* nop (delay slot) */
-    tcg_out_nop(s);
+    /* delay slot */
+    tcg_out_movi(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[n], memi);
 
-#if TARGET_LONG_BITS == 32
-    /* be label1 */
-    *label1_ptr = (INSN_OP(0) | INSN_COND(COND_E, 0) | INSN_OP2(0x2) |
-                   INSN_OFF22((unsigned long)s->code_ptr -
-                              (unsigned long)label1_ptr));
-#else
-    /* be,pt %xcc label1 */
-    *label1_ptr = (INSN_OP(0) | INSN_COND(COND_E, 0) | INSN_OP2(0x1) |
-                   (0x5 << 19) | INSN_OFF19((unsigned long)s->code_ptr -
-                              (unsigned long)label1_ptr));
-#endif
-
-    /* ld [arg1 + x], arg1 */
-    tcg_out_ldst(s, arg1, arg1, offsetof(CPUTLBEntry, addend) -
-                 offsetof(CPUTLBEntry, addr_write), TARGET_ADDEND_LD_OP);
+    /* Reload AREG0.  */
+    tcg_out_ld(s, TCG_TYPE_REG, TCG_AREG0, TCG_REG_CALL_STACK,
+               TCG_TARGET_CALL_STACK_OFFSET - TCG_STATIC_CALL_ARGS_SIZE -
+               sizeof(long));
 
-#if TARGET_LONG_BITS == 32
-    /* and addr_reg, x, arg0 */
-    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_I5, 0xffffffff);
-    tcg_out_arith(s, arg0, addr_reg, TCG_REG_I5, ARITH_AND);
-    /* add arg0, arg1, arg0 */
-    tcg_out_arith(s, arg0, arg0, arg1, ARITH_ADD);
+    *label_ptr |= INSN_OFF19((unsigned long)s->code_ptr -
+                             (unsigned long)label_ptr);
 #else
-    /* add addr_reg, arg1, arg0 */
-    tcg_out_arith(s, arg0, addr_reg, arg1, ARITH_ADD);
-#endif
-
-#else
-    arg0 = addr_reg;
-#endif
-
-    switch(opc) {
-    case 0:
-        /* stb data_reg, [arg0] */
-        tcg_out_ldst(s, data_reg, arg0, 0, STB);
-        break;
-    case 1:
-#ifdef TARGET_WORDS_BIGENDIAN
-        /* sth data_reg, [arg0] */
-        tcg_out_ldst(s, data_reg, arg0, 0, STH);
-#else
-        /* stha data_reg, [arg0] ASI_PRIMARY_LITTLE */
-        tcg_out_ldst_asi(s, data_reg, arg0, 0, STHA, ASI_PRIMARY_LITTLE);
-#endif
-        break;
-    case 2:
-#ifdef TARGET_WORDS_BIGENDIAN
-        /* stw data_reg, [arg0] */
-        tcg_out_ldst(s, data_reg, arg0, 0, STW);
-#else
-        /* stwa data_reg, [arg0] ASI_PRIMARY_LITTLE */
-        tcg_out_ldst_asi(s, data_reg, arg0, 0, STWA, ASI_PRIMARY_LITTLE);
-#endif
-        break;
-    case 3:
-#ifdef TARGET_WORDS_BIGENDIAN
-        /* stx data_reg, [arg0] */
-        tcg_out_ldst(s, data_reg, arg0, 0, STX);
-#else
-        /* stxa data_reg, [arg0] ASI_PRIMARY_LITTLE */
-        tcg_out_ldst_asi(s, data_reg, arg0, 0, STXA, ASI_PRIMARY_LITTLE);
-#endif
-        break;
-    default:
-        tcg_abort();
+    addr_reg = args[addrlo_idx];
+    if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 32) {
+        tcg_out_arithi(s, TCG_REG_I5, addr_reg, 0, SHIFT_SRL);
+        addr_reg = TCG_REG_I5;
     }
-
-#if defined(CONFIG_SOFTMMU)
-    /* label2: */
-    *label2_ptr = (INSN_OP(0) | INSN_COND(COND_A, 0) | INSN_OP2(0x2) |
-                   INSN_OFF22((unsigned long)s->code_ptr -
-                              (unsigned long)label2_ptr));
-#endif
+    if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) {
+        /* Reconstruct the full 64-bit value in %g1, using %o2 as temp.  */
+        /* ??? Redefine the temps from %i4/%i5 so that we have a o/g temp. */
+        tcg_out_arithi(s, TCG_REG_G1, datalo, 0, SHIFT_SRL);
+        tcg_out_arithi(s, TCG_REG_O2, datahi, 32, SHIFT_SLLX);
+        tcg_out_arith(s, TCG_REG_G1, TCG_REG_G1, TCG_REG_O2, ARITH_OR);
+        datalo = TCG_REG_G1;
+    }
+    tcg_out_ldst_rr(s, datalo, addr_reg, TCG_REG_G0, qemu_st_opc[sizeop]);
+#endif /* CONFIG_SOFTMMU */
 }
 
 static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
@@ -1175,12 +1073,12 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
         /* Store AREG0 in stack to avoid ugly glibc bugs that mangle
            global registers */
         // delay slot
-        tcg_out_ldst(s, TCG_AREG0, TCG_REG_CALL_STACK,
-                     TCG_TARGET_CALL_STACK_OFFSET - TCG_STATIC_CALL_ARGS_SIZE -
-                     sizeof(long), HOST_ST_OP);
-        tcg_out_ldst(s, TCG_AREG0, TCG_REG_CALL_STACK,
-                     TCG_TARGET_CALL_STACK_OFFSET - TCG_STATIC_CALL_ARGS_SIZE -
-                     sizeof(long), HOST_LD_OP);
+        tcg_out_st(s, TCG_TYPE_REG, TCG_AREG0, TCG_REG_CALL_STACK,
+                   TCG_TARGET_CALL_STACK_OFFSET - TCG_STATIC_CALL_ARGS_SIZE -
+                   sizeof(long));
+        tcg_out_ld(s, TCG_TYPE_REG, TCG_AREG0, TCG_REG_CALL_STACK,
+                   TCG_TARGET_CALL_STACK_OFFSET - TCG_STATIC_CALL_ARGS_SIZE -
+                   sizeof(long));
         break;
     case INDEX_op_jmp:
     case INDEX_op_br:
@@ -1348,6 +1246,9 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
         tcg_out_qemu_ld(s, args, 2 | 4);
         break;
 #endif
+    case INDEX_op_qemu_ld64:
+        tcg_out_qemu_ld(s, args, 3);
+        break;
     case INDEX_op_qemu_st8:
         tcg_out_qemu_st(s, args, 0);
         break;
@@ -1357,6 +1258,9 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
     case INDEX_op_qemu_st32:
         tcg_out_qemu_st(s, args, 2);
         break;
+    case INDEX_op_qemu_st64:
+        tcg_out_qemu_st(s, args, 3);
+        break;
 
 #if TCG_TARGET_REG_BITS == 64
     case INDEX_op_movi_i64:
@@ -1421,13 +1325,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
                             args[2], const_args[2]);
         break;
 
-    case INDEX_op_qemu_ld64:
-        tcg_out_qemu_ld(s, args, 3);
-        break;
-    case INDEX_op_qemu_st64:
-        tcg_out_qemu_st(s, args, 3);
-        break;
-
 #endif
     gen_arith:
         tcg_out_arithc(s, args[0], args[1], args[2], const_args[2], c);
@@ -1492,20 +1389,6 @@ static const TCGTargetOpDef sparc_op_defs[] = {
     { INDEX_op_mulu2_i32, { "r", "r", "r", "rJ" } },
 #endif
 
-    { INDEX_op_qemu_ld8u, { "r", "L" } },
-    { INDEX_op_qemu_ld8s, { "r", "L" } },
-    { INDEX_op_qemu_ld16u, { "r", "L" } },
-    { INDEX_op_qemu_ld16s, { "r", "L" } },
-    { INDEX_op_qemu_ld32, { "r", "L" } },
-#if TCG_TARGET_REG_BITS == 64
-    { INDEX_op_qemu_ld32u, { "r", "L" } },
-    { INDEX_op_qemu_ld32s, { "r", "L" } },
-#endif
-
-    { INDEX_op_qemu_st8, { "L", "L" } },
-    { INDEX_op_qemu_st16, { "L", "L" } },
-    { INDEX_op_qemu_st32, { "L", "L" } },
-
 #if TCG_TARGET_REG_BITS == 64
     { INDEX_op_mov_i64, { "r", "r" } },
     { INDEX_op_movi_i64, { "r" } },
@@ -1520,8 +1403,6 @@ static const TCGTargetOpDef sparc_op_defs[] = {
     { INDEX_op_st16_i64, { "r", "r" } },
     { INDEX_op_st32_i64, { "r", "r" } },
     { INDEX_op_st_i64, { "r", "r" } },
-    { INDEX_op_qemu_ld64, { "L", "L" } },
-    { INDEX_op_qemu_st64, { "L", "L" } },
 
     { INDEX_op_add_i64, { "r", "r", "rJ" } },
     { INDEX_op_mul_i64, { "r", "r", "rJ" } },
@@ -1548,10 +1429,48 @@ static const TCGTargetOpDef sparc_op_defs[] = {
 
     { INDEX_op_brcond_i64, { "r", "rJ" } },
     { INDEX_op_setcond_i64, { "r", "r", "rJ" } },
-#else
-    { INDEX_op_qemu_ld64, { "L", "L", "L" } },
+#endif
+
+#if TCG_TARGET_REG_BITS == 64
+    { INDEX_op_qemu_ld8u, { "r", "L" } },
+    { INDEX_op_qemu_ld8s, { "r", "L" } },
+    { INDEX_op_qemu_ld16u, { "r", "L" } },
+    { INDEX_op_qemu_ld16s, { "r", "L" } },
+    { INDEX_op_qemu_ld32, { "r", "L" } },
+    { INDEX_op_qemu_ld32u, { "r", "L" } },
+    { INDEX_op_qemu_ld32s, { "r", "L" } },
+    { INDEX_op_qemu_ld64, { "r", "L" } },
+
+    { INDEX_op_qemu_st8, { "L", "L" } },
+    { INDEX_op_qemu_st16, { "L", "L" } },
+    { INDEX_op_qemu_st32, { "L", "L" } },
+    { INDEX_op_qemu_st64, { "L", "L" } },
+#elif TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
+    { INDEX_op_qemu_ld8u, { "r", "L" } },
+    { INDEX_op_qemu_ld8s, { "r", "L" } },
+    { INDEX_op_qemu_ld16u, { "r", "L" } },
+    { INDEX_op_qemu_ld16s, { "r", "L" } },
+    { INDEX_op_qemu_ld32, { "r", "L" } },
+    { INDEX_op_qemu_ld64, { "r", "r", "L" } },
+
+    { INDEX_op_qemu_st8, { "L", "L" } },
+    { INDEX_op_qemu_st16, { "L", "L" } },
+    { INDEX_op_qemu_st32, { "L", "L" } },
     { INDEX_op_qemu_st64, { "L", "L", "L" } },
+#else
+    { INDEX_op_qemu_ld8u, { "r", "L", "L" } },
+    { INDEX_op_qemu_ld8s, { "r", "L", "L" } },
+    { INDEX_op_qemu_ld16u, { "r", "L", "L" } },
+    { INDEX_op_qemu_ld16s, { "r", "L", "L" } },
+    { INDEX_op_qemu_ld32, { "r", "L", "L" } },
+    { INDEX_op_qemu_ld64, { "L", "L", "L", "L" } },
+
+    { INDEX_op_qemu_st8, { "L", "L", "L" } },
+    { INDEX_op_qemu_st16, { "L", "L", "L" } },
+    { INDEX_op_qemu_st32, { "L", "L", "L" } },
+    { INDEX_op_qemu_st64, { "L", "L", "L", "L" } },
 #endif
+
     { -1 },
 };
 
commit 9b9c37c36439ee0452632253dac7a31897f27f70
Author: Richard Henderson <rth at twiddle.net>
Date:   Fri Sep 21 10:34:21 2012 -0700

    tcg-sparc: Assume v9 cpu always, i.e. force v8plus in 32-bit mode.
    
    Current code doesn't actually work in 32-bit mode at all.  Since
    no one really noticed, drop the complication of v7 and v8 cpus.
    Eliminate the --sparc_cpu configure option and standardize macro
    testing on TCG_TARGET_REG_BITS / HOST_LONG_BITS
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>

diff --git a/configure b/configure
index 9caa545..df888f2 100755
--- a/configure
+++ b/configure
@@ -111,7 +111,6 @@ source_path=`dirname "$0"`
 cpu=""
 interp_prefix="/usr/gnemul/qemu-%M"
 static="no"
-sparc_cpu=""
 cross_prefix=""
 audio_drv_list=""
 audio_card_list="ac97 es1370 sb16 hda"
@@ -241,21 +240,6 @@ for opt do
   ;;
   --disable-debug-info) debug_info="no"
   ;;
-  --sparc_cpu=*)
-    sparc_cpu="$optarg"
-    case $sparc_cpu in
-    v7|v8|v8plus|v8plusa)
-      cpu="sparc"
-    ;;
-    v9)
-      cpu="sparc64"
-    ;;
-    *)
-      echo "undefined SPARC architecture. Exiting";
-      exit 1
-    ;;
-    esac
-  ;;
   esac
 done
 # OS specific
@@ -343,8 +327,6 @@ elif check_define __i386__ ; then
 elif check_define __x86_64__ ; then
   cpu="x86_64"
 elif check_define __sparc__ ; then
-  # We can't check for 64 bit (when gcc is biarch) or V8PLUSA
-  # They must be specified using --sparc_cpu
   if check_define __arch64__ ; then
     cpu="sparc64"
   else
@@ -792,8 +774,6 @@ for opt do
   ;;
   --enable-uname-release=*) uname_release="$optarg"
   ;;
-  --sparc_cpu=*)
-  ;;
   --enable-werror) werror="yes"
   ;;
   --disable-werror) werror="no"
@@ -881,31 +861,19 @@ for opt do
   esac
 done
 
-#
-# If cpu ~= sparc and  sparc_cpu hasn't been defined, plug in the right
-# QEMU_CFLAGS/LDFLAGS (assume sparc_v8plus for 32-bit and sparc_v9 for 64-bit)
-#
 host_guest_base="no"
 case "$cpu" in
-    sparc) case $sparc_cpu in
-           v7|v8)
-             QEMU_CFLAGS="-mcpu=${sparc_cpu} -D__sparc_${sparc_cpu}__ $QEMU_CFLAGS"
-           ;;
-           v8plus|v8plusa)
-             QEMU_CFLAGS="-mcpu=ultrasparc -D__sparc_${sparc_cpu}__ $QEMU_CFLAGS"
-           ;;
-           *) # sparc_cpu not defined in the command line
-             QEMU_CFLAGS="-mcpu=ultrasparc -D__sparc_v8plus__ $QEMU_CFLAGS"
-           esac
+    sparc)
            LDFLAGS="-m32 $LDFLAGS"
-           QEMU_CFLAGS="-m32 -ffixed-g2 -ffixed-g3 $QEMU_CFLAGS"
+           QEMU_CFLAGS="-m32 -mcpu=ultrasparc $QEMU_CFLAGS"
+           QEMU_CFLAGS="-ffixed-g2 -ffixed-g3 $QEMU_CFLAGS"
            if test "$solaris" = "no" ; then
              QEMU_CFLAGS="-ffixed-g1 -ffixed-g6 $QEMU_CFLAGS"
            fi
            ;;
     sparc64)
-           QEMU_CFLAGS="-m64 -mcpu=ultrasparc -D__sparc_v9__ $QEMU_CFLAGS"
            LDFLAGS="-m64 $LDFLAGS"
+           QEMU_CFLAGS="-m64 -mcpu=ultrasparc $QEMU_CFLAGS"
            QEMU_CFLAGS="-ffixed-g5 -ffixed-g6 -ffixed-g7 $QEMU_CFLAGS"
            if test "$solaris" != "no" ; then
              QEMU_CFLAGS="-ffixed-g1 $QEMU_CFLAGS"
diff --git a/disas.c b/disas.c
index 7b2acc9..b801c8f 100644
--- a/disas.c
+++ b/disas.c
@@ -316,9 +316,7 @@ void disas(FILE *out, void *code, unsigned long size)
     print_insn = print_insn_alpha;
 #elif defined(__sparc__)
     print_insn = print_insn_sparc;
-#if defined(__sparc_v8plus__) || defined(__sparc_v8plusa__) || defined(__sparc_v9__)
     disasm_info.mach = bfd_mach_sparc_v9b;
-#endif
 #elif defined(__arm__)
     print_insn = print_insn_arm;
 #elif defined(__MIPSEB__)
diff --git a/exec.c b/exec.c
index 4510985..bb6aa4a 100644
--- a/exec.c
+++ b/exec.c
@@ -86,7 +86,7 @@ static int nb_tbs;
 /* any access to the tbs or the page table must use this lock */
 spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
 
-#if defined(__arm__) || defined(__sparc_v9__)
+#if defined(__arm__) || defined(__sparc__)
 /* The prologue must be reachable with a direct jump. ARM and Sparc64
  have limited branch ranges (possibly also PPC) so place it in a
  section close to code segment. */
@@ -541,7 +541,7 @@ static void code_gen_alloc(unsigned long tb_size)
         /* Cannot map more than that */
         if (code_gen_buffer_size > (800 * 1024 * 1024))
             code_gen_buffer_size = (800 * 1024 * 1024);
-#elif defined(__sparc_v9__)
+#elif defined(__sparc__) && HOST_LONG_BITS == 64
         // Map the buffer below 2G, so we can use direct calls and branches
         start = (void *) 0x40000000UL;
         if (code_gen_buffer_size > (512 * 1024 * 1024))
@@ -581,7 +581,7 @@ static void code_gen_alloc(unsigned long tb_size)
         /* Cannot map more than that */
         if (code_gen_buffer_size > (800 * 1024 * 1024))
             code_gen_buffer_size = (800 * 1024 * 1024);
-#elif defined(__sparc_v9__)
+#elif defined(__sparc__) && HOST_LONG_BITS == 64
         // Map the buffer below 2G, so we can use direct calls and branches
         addr = (void *) 0x40000000UL;
         if (code_gen_buffer_size > (512 * 1024 * 1024)) {
diff --git a/qemu-timer.h b/qemu-timer.h
index f8af595..da7e97c 100644
--- a/qemu-timer.h
+++ b/qemu-timer.h
@@ -218,7 +218,7 @@ static inline int64_t cpu_get_real_ticks(void)
     return val;
 }
 
-#elif defined(__sparc_v8plus__) || defined(__sparc_v8plusa__) || defined(__sparc_v9__)
+#elif defined(__sparc__)
 
 static inline int64_t cpu_get_real_ticks (void)
 {
@@ -227,6 +227,8 @@ static inline int64_t cpu_get_real_ticks (void)
     asm volatile("rd %%tick,%0" : "=r"(rval));
     return rval;
 #else
+    /* We need an %o or %g register for this.  For recent enough gcc
+       there is an "h" constraint for that.  Don't bother with that.  */
     union {
         uint64_t i64;
         struct {
@@ -234,8 +236,8 @@ static inline int64_t cpu_get_real_ticks (void)
             uint32_t low;
         }       i32;
     } rval;
-    asm volatile("rd %%tick,%1; srlx %1,32,%0"
-                 : "=r"(rval.i32.high), "=r"(rval.i32.low));
+    asm volatile("rd %%tick,%%g1; srlx %%g1,32,%0; mov %%g1,%1"
+                 : "=r"(rval.i32.high), "=r"(rval.i32.low) : : "g1");
     return rval.i64;
 #endif
 }
diff --git a/tcg/sparc/tcg-target.c b/tcg/sparc/tcg-target.c
index 0a19313..23c2fda 100644
--- a/tcg/sparc/tcg-target.c
+++ b/tcg/sparc/tcg-target.c
@@ -621,18 +621,10 @@ static void tcg_out_setcond_i32(TCGContext *s, TCGCond cond, TCGArg ret,
 
     default:
         tcg_out_cmp(s, c1, c2, c2const);
-#if defined(__sparc_v9__) || defined(__sparc_v8plus__)
         tcg_out_movi_imm13(s, ret, 0);
-        tcg_out32 (s, ARITH_MOVCC | INSN_RD(ret)
-                   | INSN_RS1(tcg_cond_to_bcond[cond])
-                   | MOVCC_ICC | INSN_IMM11(1));
-#else
-        t = gen_new_label();
-        tcg_out_branch_i32(s, INSN_COND(tcg_cond_to_bcond[cond], 1), t);
-        tcg_out_movi_imm13(s, ret, 1);
-        tcg_out_movi_imm13(s, ret, 0);
-        tcg_out_label(s, t, s->code_ptr);
-#endif
+        tcg_out32(s, ARITH_MOVCC | INSN_RD(ret)
+                  | INSN_RS1(tcg_cond_to_bcond[cond])
+                  | MOVCC_ICC | INSN_IMM11(1));
         return;
     }
 
@@ -742,7 +734,7 @@ static const void * const qemu_st_helpers[4] = {
 #endif
 #endif
 
-#ifdef __arch64__
+#if TCG_TARGET_REG_BITS == 64
 #define HOST_LD_OP LDX
 #define HOST_ST_OP STX
 #define HOST_SLL_OP SHIFT_SLLX
@@ -1600,11 +1592,9 @@ static void tcg_target_init(TCGContext *s)
 
 #if TCG_TARGET_REG_BITS == 64
 # define ELF_HOST_MACHINE  EM_SPARCV9
-#elif defined(__sparc_v8plus__)
+#else
 # define ELF_HOST_MACHINE  EM_SPARC32PLUS
 # define ELF_HOST_FLAGS    EF_SPARC_32PLUS
-#else
-# define ELF_HOST_MACHINE  EM_SPARC
 #endif
 
 typedef struct {
diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h
index d762574..adca1d2 100644
--- a/tcg/sparc/tcg-target.h
+++ b/tcg/sparc/tcg-target.h
@@ -67,7 +67,8 @@ typedef enum {
 
 /* used for function call generation */
 #define TCG_REG_CALL_STACK TCG_REG_I6
-#ifdef __arch64__
+
+#if TCG_TARGET_REG_BITS == 64
 // Reserve space for AREG0
 #define TCG_TARGET_STACK_MINFRAME (176 + 4 * (int)sizeof(long) + \
                                    TCG_STATIC_CALL_ARGS_SIZE)
@@ -81,7 +82,7 @@ typedef enum {
 #define TCG_TARGET_STACK_ALIGN 8
 #endif
 
-#ifdef __arch64__
+#if TCG_TARGET_REG_BITS == 64
 #define TCG_TARGET_EXTEND_ARGS 1
 #endif
 
@@ -129,7 +130,7 @@ typedef enum {
 
 #ifdef CONFIG_SOLARIS
 #define TCG_AREG0 TCG_REG_G2
-#elif defined(__sparc_v9__)
+#elif HOST_LONG_BITS == 64
 #define TCG_AREG0 TCG_REG_G5
 #else
 #define TCG_AREG0 TCG_REG_G6
diff --git a/tcg/tcg.c b/tcg/tcg.c
index bb9c995..45a26f2 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -1457,7 +1457,8 @@ static void temp_allocate_frame(TCGContext *s, int temp)
 {
     TCGTemp *ts;
     ts = &s->temps[temp];
-#ifndef __sparc_v9__ /* Sparc64 stack is accessed with offset of 2047 */
+#if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64)
+    /* Sparc64 stack is accessed with offset of 2047 */
     s->current_frame_offset = (s->current_frame_offset +
                                (tcg_target_long)sizeof(tcg_target_long) - 1) &
         ~(sizeof(tcg_target_long) - 1);
commit d5dd696fe3049657c08eb14f40cb74b6e94e79cc
Author: Richard Henderson <rth at twiddle.net>
Date:   Fri Sep 21 10:40:48 2012 -0700

    tcg-sparc: Don't MAP_FIXED on top of the program
    
    The address we pick in sparc64.ld is also 0x60000000, so doing a fixed map
    on top of that is guaranteed to blow up.  Choosing 0x40000000 is exactly
    right for the max of code_gen_buffer_size set below.
    
    No need to ever use MAP_FIXED.  While getting our desired address helps
    optimize the generated code, we won't fail if we don't get it.
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>

diff --git a/exec.c b/exec.c
index f22e9e6..4510985 100644
--- a/exec.c
+++ b/exec.c
@@ -543,8 +543,7 @@ static void code_gen_alloc(unsigned long tb_size)
             code_gen_buffer_size = (800 * 1024 * 1024);
 #elif defined(__sparc_v9__)
         // Map the buffer below 2G, so we can use direct calls and branches
-        flags |= MAP_FIXED;
-        start = (void *) 0x60000000UL;
+        start = (void *) 0x40000000UL;
         if (code_gen_buffer_size > (512 * 1024 * 1024))
             code_gen_buffer_size = (512 * 1024 * 1024);
 #elif defined(__arm__)
@@ -584,8 +583,7 @@ static void code_gen_alloc(unsigned long tb_size)
             code_gen_buffer_size = (800 * 1024 * 1024);
 #elif defined(__sparc_v9__)
         // Map the buffer below 2G, so we can use direct calls and branches
-        flags |= MAP_FIXED;
-        addr = (void *) 0x60000000UL;
+        addr = (void *) 0x40000000UL;
         if (code_gen_buffer_size > (512 * 1024 * 1024)) {
             code_gen_buffer_size = (512 * 1024 * 1024);
         }
commit a221ae3fcc55fcc3911587bc572af88eed3342dd
Author: Richard Henderson <rth at twiddle.net>
Date:   Fri Mar 23 23:57:12 2012 +0100

    tcg-sparc: Fix ADDX opcode.
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>

diff --git a/tcg/sparc/tcg-target.c b/tcg/sparc/tcg-target.c
index 608fc46..0a19313 100644
--- a/tcg/sparc/tcg-target.c
+++ b/tcg/sparc/tcg-target.c
@@ -236,7 +236,7 @@ static inline int tcg_target_const_match(tcg_target_long val,
 #define ARITH_XOR  (INSN_OP(2) | INSN_OP3(0x03))
 #define ARITH_SUB  (INSN_OP(2) | INSN_OP3(0x04))
 #define ARITH_SUBCC (INSN_OP(2) | INSN_OP3(0x14))
-#define ARITH_ADDX (INSN_OP(2) | INSN_OP3(0x10))
+#define ARITH_ADDX (INSN_OP(2) | INSN_OP3(0x08))
 #define ARITH_SUBX (INSN_OP(2) | INSN_OP3(0x0c))
 #define ARITH_UMUL (INSN_OP(2) | INSN_OP3(0x0a))
 #define ARITH_UDIV (INSN_OP(2) | INSN_OP3(0x0e))
commit 3ee60ad44a3a8673ad38e3da5af05455fed377b8
Author: Richard Henderson <rth at twiddle.net>
Date:   Sat Mar 24 21:30:20 2012 +0100

    tcg-sparc: Hack in qemu_ld/st64 for 32-bit.
    
    Not actually implemented, but at least we avoid the tcg assert at startup.
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>

diff --git a/tcg/sparc/tcg-target.c b/tcg/sparc/tcg-target.c
index baed3b4..608fc46 100644
--- a/tcg/sparc/tcg-target.c
+++ b/tcg/sparc/tcg-target.c
@@ -1556,6 +1556,9 @@ static const TCGTargetOpDef sparc_op_defs[] = {
 
     { INDEX_op_brcond_i64, { "r", "rJ" } },
     { INDEX_op_setcond_i64, { "r", "r", "rJ" } },
+#else
+    { INDEX_op_qemu_ld64, { "L", "L", "L" } },
+    { INDEX_op_qemu_st64, { "L", "L", "L" } },
 #endif
     { -1 },
 };
commit 62724cc5ebd7fb9113f156cbc0d2bb5e3cb0a9ae
Author: Richard Henderson <rth at twiddle.net>
Date:   Sat Mar 24 21:31:25 2012 +0100

    linux-user: Use memcpy in get_user/put_user.
    
    When host and target have differing alignment rules, using a cast
    and direct memory operation can result in SIGBUS.  Use memcpy instead,
    which the compiler will happily optimize when alignment is satisfied.
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Reviewed-by: Peter Maydell <peter.maydell at linaro.org>

diff --git a/linux-user/qemu.h b/linux-user/qemu.h
index 69b27d7..fc4cc00 100644
--- a/linux-user/qemu.h
+++ b/linux-user/qemu.h
@@ -289,46 +289,29 @@ static inline int access_ok(int type, abi_ulong addr, abi_ulong size)
  * struct has been locked - usually with lock_user_struct().
  */
 #define __put_user(x, hptr)\
-({\
+({ __typeof(*hptr) pu_ = (x);\
     switch(sizeof(*hptr)) {\
-    case 1:\
-        *(uint8_t *)(hptr) = (uint8_t)(typeof(*hptr))(x);\
-        break;\
-    case 2:\
-        *(uint16_t *)(hptr) = tswap16((uint16_t)(typeof(*hptr))(x));\
-        break;\
-    case 4:\
-        *(uint32_t *)(hptr) = tswap32((uint32_t)(typeof(*hptr))(x));\
-        break;\
-    case 8:\
-        *(uint64_t *)(hptr) = tswap64((typeof(*hptr))(x));\
-        break;\
-    default:\
-        abort();\
+    case 1: break;\
+    case 2: pu_ = tswap16(pu_); break; \
+    case 4: pu_ = tswap32(pu_); break; \
+    case 8: pu_ = tswap64(pu_); break; \
+    default: abort();\
     }\
+    memcpy(hptr, &pu_, sizeof(pu_)); \
     0;\
 })
 
 #define __get_user(x, hptr) \
-({\
+({ __typeof(*hptr) gu_; \
+    memcpy(&gu_, hptr, sizeof(gu_)); \
     switch(sizeof(*hptr)) {\
-    case 1:\
-        x = (typeof(*hptr))*(uint8_t *)(hptr);\
-        break;\
-    case 2:\
-        x = (typeof(*hptr))tswap16(*(uint16_t *)(hptr));\
-        break;\
-    case 4:\
-        x = (typeof(*hptr))tswap32(*(uint32_t *)(hptr));\
-        break;\
-    case 8:\
-        x = (typeof(*hptr))tswap64(*(uint64_t *)(hptr));\
-        break;\
-    default:\
-        /* avoid warning */\
-        x = 0;\
-        abort();\
+    case 1: break; \
+    case 2: gu_ = tswap16(gu_); break; \
+    case 4: gu_ = tswap32(gu_); break; \
+    case 8: gu_ = tswap64(gu_); break; \
+    default: abort();\
     }\
+    (x) = gu_; \
     0;\
 })
 
commit cfb75cb9807463ebe18b127096b48b5d0db1ce03
Merge: e55f523... 72a04d0...
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Fri Sep 21 19:53:26 2012 +0200

    Merge branch 'usb.65' of git://git.kraxel.org/qemu
    
    * 'usb.65' of git://git.kraxel.org/qemu:
      uhci: Don't queue up packets after one with the SPD flag set
      usb-redir: Revert usb-redir part of commit 93bfef4c
      usb-redir: Add chardev open / close debug logging
      usb-redir: Add support for migration
      usb-redir: Store max_packet_size in endp_data
      usb-redir: Add an already_in_flight packet-id queue
      usb-redir: Change cancelled packet code into a generic packet-id queue
      ehci: Walk async schedule before and after migration
      ehci: Don't set seen to 0 when removing unseen queue-heads
      configure: usbredir fixes
      ehci: Don't process too much frames in 1 timer tick (v2)
      ehci: Fix interrupts stopping when Interrupt Threshold Control is 8
      ehci: switch to new-style memory ops
      usb-host: allow emulated (non-async) control requests without USBPacket

commit e55f523d7977480462151d8abb0ebb4b1747eabf
Author: Richard Henderson <rth at twiddle.net>
Date:   Tue Sep 18 19:59:48 2012 -0700

    tcg-hppa: Fix broken load/store helpers
    
    The CONFIG_TCG_PASS_AREG0 code for calling ld/st helpers
    was not respecting the ABI requirement for 64-bit values
    being aligned in registers.
    
    Mirror the ARM port in use of helper functions to marshal
    arguments into the correct registers.
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/tcg/hppa/tcg-target.c b/tcg/hppa/tcg-target.c
index a76569d..5385d45 100644
--- a/tcg/hppa/tcg-target.c
+++ b/tcg/hppa/tcg-target.c
@@ -976,10 +976,11 @@ static int tcg_out_tlb_read(TCGContext *s, int r0, int r1, int addrlo,
         tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R20, r1, offset);
     }
 
-    /* Compute the value that ought to appear in the TLB for a hit, namely, the page
-       of the address.  We include the low N bits of the address to catch unaligned
-       accesses and force them onto the slow path.  Do this computation after having
-       issued the load from the TLB slot to give the load time to complete.  */
+    /* Compute the value that ought to appear in the TLB for a hit, namely,
+       the page of the address.  We include the low N bits of the address
+       to catch unaligned accesses and force them onto the slow path.  Do
+       this computation after having issued the load from the TLB slot to
+       give the load time to complete.  */
     tcg_out_andi(s, r0, addrlo, TARGET_PAGE_MASK | ((1 << s_bits) - 1));
 
     /* If not equal, jump to lab_miss. */
@@ -992,6 +993,36 @@ static int tcg_out_tlb_read(TCGContext *s, int r0, int r1, int addrlo,
 
     return ret;
 }
+
+static int tcg_out_arg_reg32(TCGContext *s, int argno, TCGArg v, bool vconst)
+{
+    if (argno < 4) {
+        if (vconst) {
+            tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[argno], v);
+        } else {
+            tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[argno], v);
+        }
+    } else {
+        if (vconst && v != 0) {
+            tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R20, v);
+            v = TCG_REG_R20;
+        }
+        tcg_out_st(s, TCG_TYPE_I32, v, TCG_REG_CALL_STACK,
+                   TCG_TARGET_CALL_STACK_OFFSET - ((argno - 3) * 4));
+    }
+    return argno + 1;
+}
+
+static int tcg_out_arg_reg64(TCGContext *s, int argno, TCGArg vl, TCGArg vh)
+{
+    /* 64-bit arguments must go in even reg pairs and stack slots.  */
+    if (argno & 1) {
+        argno++;
+    }
+    argno = tcg_out_arg_reg32(s, argno, vl, false);
+    argno = tcg_out_arg_reg32(s, argno, vh, false);
+    return argno;
+}
 #endif
 
 static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo_reg, int datahi_reg,
@@ -1072,39 +1103,36 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
     /* Note that addrhi_reg is only used for 64-bit guests.  */
     int addrhi_reg = (TARGET_LONG_BITS == 64 ? *args++ : TCG_REG_R0);
     int mem_index = *args;
-    int lab1, lab2, argreg, offset;
+    int lab1, lab2, argno, offset;
 
     lab1 = gen_new_label();
     lab2 = gen_new_label();
 
     offset = offsetof(CPUArchState, tlb_table[mem_index][0].addr_read);
-    offset = tcg_out_tlb_read(s, TCG_REG_R26, TCG_REG_R25, addrlo_reg, addrhi_reg,
-                              opc & 3, lab1, offset);
+    offset = tcg_out_tlb_read(s, TCG_REG_R26, TCG_REG_R25, addrlo_reg,
+                              addrhi_reg, opc & 3, lab1, offset);
 
     /* TLB Hit.  */
-    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R20, (offset ? TCG_REG_R1 : TCG_REG_R25),
+    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R20,
+               (offset ? TCG_REG_R1 : TCG_REG_R25),
                offsetof(CPUArchState, tlb_table[mem_index][0].addend) - offset);
-    tcg_out_qemu_ld_direct(s, datalo_reg, datahi_reg, addrlo_reg, TCG_REG_R20, opc);
+    tcg_out_qemu_ld_direct(s, datalo_reg, datahi_reg, addrlo_reg,
+                           TCG_REG_R20, opc);
     tcg_out_branch(s, lab2, 1);
 
     /* TLB Miss.  */
     /* label1: */
     tcg_out_label(s, lab1, s->code_ptr);
 
-    argreg = TCG_REG_R26;
-    tcg_out_mov(s, TCG_TYPE_I32, argreg--, addrlo_reg);
+    argno = 0;
+    argno = tcg_out_arg_reg32(s, argno, TCG_AREG0, false);
     if (TARGET_LONG_BITS == 64) {
-        tcg_out_mov(s, TCG_TYPE_I32, argreg--, addrhi_reg);
+        argno = tcg_out_arg_reg64(s, argno, addrlo_reg, addrhi_reg);
+    } else {
+        argno = tcg_out_arg_reg32(s, argno, addrlo_reg, false);
     }
-    tcg_out_movi(s, TCG_TYPE_I32, argreg, mem_index);
-
-    /* XXX/FIXME: suboptimal */
-    tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2],
-                tcg_target_call_iarg_regs[1]);
-    tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
-                tcg_target_call_iarg_regs[0]);
-    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
-                TCG_AREG0);
+    argno = tcg_out_arg_reg32(s, argno, mem_index, true);
+
     tcg_out_call(s, qemu_ld_helpers[opc & 3]);
 
     switch (opc) {
@@ -1140,8 +1168,8 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
 #endif
 }
 
-static void tcg_out_qemu_st_direct(TCGContext *s, int datalo_reg, int datahi_reg,
-                                   int addr_reg, int opc)
+static void tcg_out_qemu_st_direct(TCGContext *s, int datalo_reg,
+                                   int datahi_reg, int addr_reg, int opc)
 {
 #ifdef TARGET_WORDS_BIGENDIAN
     const int bswap = 0;
@@ -1194,17 +1222,18 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
     /* Note that addrhi_reg is only used for 64-bit guests.  */
     int addrhi_reg = (TARGET_LONG_BITS == 64 ? *args++ : TCG_REG_R0);
     int mem_index = *args;
-    int lab1, lab2, argreg, offset;
+    int lab1, lab2, argno, next, offset;
 
     lab1 = gen_new_label();
     lab2 = gen_new_label();
 
     offset = offsetof(CPUArchState, tlb_table[mem_index][0].addr_write);
-    offset = tcg_out_tlb_read(s, TCG_REG_R26, TCG_REG_R25, addrlo_reg, addrhi_reg,
-                              opc, lab1, offset);
+    offset = tcg_out_tlb_read(s, TCG_REG_R26, TCG_REG_R25, addrlo_reg,
+                              addrhi_reg, opc, lab1, offset);
 
     /* TLB Hit.  */
-    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R20, (offset ? TCG_REG_R1 : TCG_REG_R25),
+    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R20,
+               (offset ? TCG_REG_R1 : TCG_REG_R25),
                offsetof(CPUArchState, tlb_table[mem_index][0].addend) - offset);
 
     /* There are no indexed stores, so we must do this addition explitly.
@@ -1217,63 +1246,46 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
     /* label1: */
     tcg_out_label(s, lab1, s->code_ptr);
 
-    argreg = TCG_REG_R26;
-    tcg_out_mov(s, TCG_TYPE_I32, argreg--, addrlo_reg);
+    argno = 0;
+    argno = tcg_out_arg_reg32(s, argno, TCG_AREG0, false);
     if (TARGET_LONG_BITS == 64) {
-        tcg_out_mov(s, TCG_TYPE_I32, argreg--, addrhi_reg);
+        argno = tcg_out_arg_reg64(s, argno, addrlo_reg, addrhi_reg);
+    } else {
+        argno = tcg_out_arg_reg32(s, argno, addrlo_reg, false);
     }
 
+    next = (argno < 4 ? tcg_target_call_iarg_regs[argno] : TCG_REG_R20);
     switch(opc) {
     case 0:
-        tcg_out_andi(s, argreg--, datalo_reg, 0xff);
-        tcg_out_movi(s, TCG_TYPE_I32, argreg, mem_index);
+        tcg_out_andi(s, next, datalo_reg, 0xff);
+        argno = tcg_out_arg_reg32(s, argno, next, false);
         break;
     case 1:
-        tcg_out_andi(s, argreg--, datalo_reg, 0xffff);
-        tcg_out_movi(s, TCG_TYPE_I32, argreg, mem_index);
+        tcg_out_andi(s, next, datalo_reg, 0xffff);
+        argno = tcg_out_arg_reg32(s, argno, next, false);
         break;
     case 2:
-        tcg_out_mov(s, TCG_TYPE_I32, argreg--, datalo_reg);
-        tcg_out_movi(s, TCG_TYPE_I32, argreg, mem_index);
+        argno = tcg_out_arg_reg32(s, argno, datalo_reg, false);
         break;
     case 3:
-        /* Because of the alignment required by the 64-bit data argument,
-           we will always use R23/R24.  Also, we will always run out of
-           argument registers for storing mem_index, so that will have
-           to go on the stack.  */
-        if (mem_index == 0) {
-            argreg = TCG_REG_R0;
-        } else {
-            argreg = TCG_REG_R20;
-            tcg_out_movi(s, TCG_TYPE_I32, argreg, mem_index);
-        }
-        tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R23, datahi_reg);
-        tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R24, datalo_reg);
-        tcg_out_st(s, TCG_TYPE_I32, argreg, TCG_REG_CALL_STACK,
-                   TCG_TARGET_CALL_STACK_OFFSET - 4);
+        argno = tcg_out_arg_reg64(s, argno, datalo_reg, datahi_reg);
         break;
     default:
         tcg_abort();
     }
+    argno = tcg_out_arg_reg32(s, argno, mem_index, true);
 
-    /* XXX/FIXME: suboptimal */
-    tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3],
-                tcg_target_call_iarg_regs[2]);
-    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
-                tcg_target_call_iarg_regs[1]);
-    tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
-                tcg_target_call_iarg_regs[0]);
-    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
-                TCG_AREG0);
     tcg_out_call(s, qemu_st_helpers[opc]);
 
     /* label2: */
     tcg_out_label(s, lab2, s->code_ptr);
 #else
-    /* There are no indexed stores, so if GUEST_BASE is set we must do the add
-       explicitly.  Careful to avoid R20, which is used for the bswaps to follow.  */
+    /* There are no indexed stores, so if GUEST_BASE is set we must do
+       the add explicitly.  Careful to avoid R20, which is used for the
+       bswaps to follow.  */
     if (GUEST_BASE != 0) {
-        tcg_out_arith(s, TCG_REG_R31, addrlo_reg, TCG_GUEST_BASE_REG, INSN_ADDL);
+        tcg_out_arith(s, TCG_REG_R31, addrlo_reg,
+                      TCG_GUEST_BASE_REG, INSN_ADDL);
         addrlo_reg = TCG_REG_R31;
     }
     tcg_out_qemu_st_direct(s, datalo_reg, datahi_reg, addrlo_reg, opc);
commit c08d9ee38d240ef45496620cbfe8e1b5ed81accb
Author: Richard Henderson <rth at twiddle.net>
Date:   Tue Sep 18 19:59:47 2012 -0700

    tcg-hppa: Fix brcond2 and setcond2
    
    Neither of these functions were performing double-word
    compares properly.
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/tcg/hppa/tcg-target.c b/tcg/hppa/tcg-target.c
index 8b81b70..a76569d 100644
--- a/tcg/hppa/tcg-target.c
+++ b/tcg/hppa/tcg-target.c
@@ -820,19 +820,34 @@ static void tcg_out_comclr(TCGContext *s, int cond, TCGArg ret,
     tcg_out32(s, op);
 }
 
+static TCGCond const tcg_high_cond[] = {
+    [TCG_COND_EQ] = TCG_COND_EQ,
+    [TCG_COND_NE] = TCG_COND_NE,
+    [TCG_COND_LT] = TCG_COND_LT,
+    [TCG_COND_LE] = TCG_COND_LT,
+    [TCG_COND_GT] = TCG_COND_GT,
+    [TCG_COND_GE] = TCG_COND_GT,
+    [TCG_COND_LTU] = TCG_COND_LTU,
+    [TCG_COND_LEU] = TCG_COND_LTU,
+    [TCG_COND_GTU] = TCG_COND_GTU,
+    [TCG_COND_GEU] = TCG_COND_GTU
+};
+
 static void tcg_out_brcond2(TCGContext *s, int cond, TCGArg al, TCGArg ah,
                             TCGArg bl, int blconst, TCGArg bh, int bhconst,
                             int label_index)
 {
     switch (cond) {
     case TCG_COND_EQ:
+        tcg_out_comclr(s, TCG_COND_NE, TCG_REG_R0, al, bl, blconst);
+        tcg_out_brcond(s, TCG_COND_EQ, ah, bh, bhconst, label_index);
+        break;
     case TCG_COND_NE:
-        tcg_out_comclr(s, tcg_invert_cond(cond), TCG_REG_R0, al, bl, blconst);
-        tcg_out_brcond(s, cond, ah, bh, bhconst, label_index);
+        tcg_out_brcond(s, TCG_COND_NE, al, bl, bhconst, label_index);
+        tcg_out_brcond(s, TCG_COND_NE, ah, bh, bhconst, label_index);
         break;
-
     default:
-        tcg_out_brcond(s, cond, ah, bh, bhconst, label_index);
+        tcg_out_brcond(s, tcg_high_cond[cond], ah, bh, bhconst, label_index);
         tcg_out_comclr(s, TCG_COND_NE, TCG_REG_R0, ah, bh, bhconst);
         tcg_out_brcond(s, tcg_unsigned_cond(cond),
                        al, bl, blconst, label_index);
@@ -853,9 +868,8 @@ static void tcg_out_setcond2(TCGContext *s, int cond, TCGArg ret,
 {
     int scratch = TCG_REG_R20;
 
-    if (ret != al && ret != ah
-        && (blconst || ret != bl)
-        && (bhconst || ret != bh)) {
+    /* Note that the low parts are fully consumed before scratch is set.  */
+    if (ret != ah && (bhconst || ret != bh)) {
         scratch = ret;
     }
 
@@ -867,13 +881,32 @@ static void tcg_out_setcond2(TCGContext *s, int cond, TCGArg ret,
         tcg_out_movi(s, TCG_TYPE_I32, scratch, cond == TCG_COND_NE);
         break;
 
-    default:
+    case TCG_COND_GE:
+    case TCG_COND_GEU:
+    case TCG_COND_LT:
+    case TCG_COND_LTU:
+        /* Optimize compares with low part zero.  */
+        if (bl == 0) {
+            tcg_out_setcond(s, cond, ret, ah, bh, bhconst);
+            return;
+        }
+        /* FALLTHRU */
+
+    case TCG_COND_LE:
+    case TCG_COND_LEU:
+    case TCG_COND_GT:
+    case TCG_COND_GTU:
+        /* <= : ah < bh | (ah == bh && al <= bl) */
         tcg_out_setcond(s, tcg_unsigned_cond(cond), scratch, al, bl, blconst);
         tcg_out_comclr(s, TCG_COND_EQ, TCG_REG_R0, ah, bh, bhconst);
         tcg_out_movi(s, TCG_TYPE_I32, scratch, 0);
-        tcg_out_comclr(s, cond, TCG_REG_R0, ah, bh, bhconst);
+        tcg_out_comclr(s, tcg_invert_cond(tcg_high_cond[cond]),
+                       TCG_REG_R0, ah, bh, bhconst);
         tcg_out_movi(s, TCG_TYPE_I32, scratch, 1);
         break;
+
+    default:
+        tcg_abort();
     }
 
     tcg_out_mov(s, TCG_TYPE_I32, ret, scratch);
commit fe7e1d3ec4410d91a1a687ff4a9288870aefac40
Author: Richard Henderson <rth at twiddle.net>
Date:   Mon Sep 17 08:28:52 2012 -0700

    tcg: Fix !USE_DIRECT_JUMP
    
    Commit 6375e09e changed the type of TranslationBlock.tb_next,
    but failed to change the type of TCGContext.tb_next.
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Reviewed-by: Andreas Färber <afaerber at suse.de>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/tcg/tcg.h b/tcg/tcg.h
index 7e903f3..48a56f0 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -344,7 +344,7 @@ struct TCGContext {
 
     /* goto_tb support */
     uint8_t *code_buf;
-    unsigned long *tb_next;
+    uintptr_t *tb_next;
     uint16_t *tb_next_offset;
     uint16_t *tb_jmp_offset; /* != NULL if USE_DIRECT_JUMP */
 
commit eca5c303339016c8640567f2e5a31ef1d6487df9
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Sun Sep 16 13:12:21 2012 +0200

    gdbstub/sh4: fix build with USE_SOFTFLOAT_STRUCT_TYPES
    
    We have to use different type to access float values when
    USE_SOFTFLOAT_STRUCT_TYPES is defined.
    
    Rework SH4 version of cpu_gdb_{read,write}_register() using
    a single case, and fixing the coding style. Use ldll_p() and
    stfl_p() to access float values.
    
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/gdbstub.c b/gdbstub.c
index 5d37dd9..a91709f 100644
--- a/gdbstub.c
+++ b/gdbstub.c
@@ -1226,33 +1226,48 @@ static int cpu_gdb_write_register(CPUOpenRISCState *env,
 
 static int cpu_gdb_read_register(CPUSH4State *env, uint8_t *mem_buf, int n)
 {
-    if (n < 8) {
+    switch (n) {
+    case 0 ... 7:
         if ((env->sr & (SR_MD | SR_RB)) == (SR_MD | SR_RB)) {
             GET_REGL(env->gregs[n + 16]);
         } else {
             GET_REGL(env->gregs[n]);
         }
-    } else if (n < 16) {
+    case 8 ... 15:
         GET_REGL(env->gregs[n]);
-    } else if (n >= 25 && n < 41) {
-	GET_REGL(env->fregs[(n - 25) + ((env->fpscr & FPSCR_FR) ? 16 : 0)]);
-    } else if (n >= 43 && n < 51) {
-	GET_REGL(env->gregs[n - 43]);
-    } else if (n >= 51 && n < 59) {
-	GET_REGL(env->gregs[n - (51 - 16)]);
-    }
-    switch (n) {
-    case 16: GET_REGL(env->pc);
-    case 17: GET_REGL(env->pr);
-    case 18: GET_REGL(env->gbr);
-    case 19: GET_REGL(env->vbr);
-    case 20: GET_REGL(env->mach);
-    case 21: GET_REGL(env->macl);
-    case 22: GET_REGL(env->sr);
-    case 23: GET_REGL(env->fpul);
-    case 24: GET_REGL(env->fpscr);
-    case 41: GET_REGL(env->ssr);
-    case 42: GET_REGL(env->spc);
+    case 16:
+        GET_REGL(env->pc);
+    case 17:
+        GET_REGL(env->pr);
+    case 18:
+        GET_REGL(env->gbr);
+    case 19:
+        GET_REGL(env->vbr);
+    case 20:
+        GET_REGL(env->mach);
+    case 21:
+        GET_REGL(env->macl);
+    case 22:
+        GET_REGL(env->sr);
+    case 23:
+        GET_REGL(env->fpul);
+    case 24:
+        GET_REGL(env->fpscr);
+    case 25 ... 40:
+        if (env->fpscr & FPSCR_FR) {
+            stfl_p(mem_buf, env->fregs[n - 9]);
+        } else {
+            stfl_p(mem_buf, env->fregs[n - 25]);
+        }
+        return 4;
+    case 41:
+        GET_REGL(env->ssr);
+    case 42:
+        GET_REGL(env->spc);
+    case 43 ... 50:
+        GET_REGL(env->gregs[n - 43]);
+    case 51 ... 58:
+        GET_REGL(env->gregs[n - (51 - 16)]);
     }
 
     return 0;
@@ -1260,42 +1275,63 @@ static int cpu_gdb_read_register(CPUSH4State *env, uint8_t *mem_buf, int n)
 
 static int cpu_gdb_write_register(CPUSH4State *env, uint8_t *mem_buf, int n)
 {
-    uint32_t tmp;
-
-    tmp = ldl_p(mem_buf);
-
-    if (n < 8) {
+    switch (n) {
+    case 0 ... 7:
         if ((env->sr & (SR_MD | SR_RB)) == (SR_MD | SR_RB)) {
-            env->gregs[n + 16] = tmp;
+            env->gregs[n + 16] = ldl_p(mem_buf);
         } else {
-            env->gregs[n] = tmp;
+            env->gregs[n] = ldl_p(mem_buf);
         }
-	return 4;
-    } else if (n < 16) {
-        env->gregs[n] = tmp;
-	return 4;
-    } else if (n >= 25 && n < 41) {
-	env->fregs[(n - 25) + ((env->fpscr & FPSCR_FR) ? 16 : 0)] = tmp;
-	return 4;
-    } else if (n >= 43 && n < 51) {
-	env->gregs[n - 43] = tmp;
-	return 4;
-    } else if (n >= 51 && n < 59) {
-	env->gregs[n - (51 - 16)] = tmp;
-	return 4;
-    }
-    switch (n) {
-    case 16: env->pc = tmp; break;
-    case 17: env->pr = tmp; break;
-    case 18: env->gbr = tmp; break;
-    case 19: env->vbr = tmp; break;
-    case 20: env->mach = tmp; break;
-    case 21: env->macl = tmp; break;
-    case 22: env->sr = tmp; break;
-    case 23: env->fpul = tmp; break;
-    case 24: env->fpscr = tmp; break;
-    case 41: env->ssr = tmp; break;
-    case 42: env->spc = tmp; break;
+        break;
+    case 8 ... 15:
+        env->gregs[n] = ldl_p(mem_buf);
+        break;
+    case 16:
+        env->pc = ldl_p(mem_buf);
+        break;
+    case 17:
+        env->pr = ldl_p(mem_buf);
+        break;
+    case 18:
+        env->gbr = ldl_p(mem_buf);
+        break;
+    case 19:
+        env->vbr = ldl_p(mem_buf);
+        break;
+    case 20:
+        env->mach = ldl_p(mem_buf);
+        break;
+    case 21:
+        env->macl = ldl_p(mem_buf);
+        break;
+    case 22:
+        env->sr = ldl_p(mem_buf);
+        break;
+    case 23:
+        env->fpul = ldl_p(mem_buf);
+        break;
+    case 24:
+        env->fpscr = ldl_p(mem_buf);
+        break;
+    case 25 ... 40:
+        if (env->fpscr & FPSCR_FR) {
+            env->fregs[n - 9] = ldfl_p(mem_buf);
+        } else {
+            env->fregs[n - 25] = ldfl_p(mem_buf);
+        }
+        break;
+    case 41:
+        env->ssr = ldl_p(mem_buf);
+        break;
+    case 42:
+        env->spc = ldl_p(mem_buf);
+        break;
+    case 43 ... 50:
+        env->gregs[n - 43] = ldl_p(mem_buf);
+        break;
+    case 51 ... 58:
+        env->gregs[n - (51 - 16)] = ldl_p(mem_buf);
+        break;
     default: return 0;
     }
 
commit 5d8f53630011f93dc774f1b2dc9557c7eac3ad89
Author: Richard Henderson <rth at twiddle.net>
Date:   Fri Sep 21 10:13:38 2012 -0700

    tcg: Optimize two-address commutative operations
    
    While swapping constants to the second operand, swap
    sources matching destinations to the first operand.
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/tcg/optimize.c b/tcg/optimize.c
index 26038a6..1be7631 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -334,6 +334,8 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
     const TCGOpDef *def;
     TCGArg *gen_args;
     TCGArg tmp;
+    TCGCond cond;
+
     /* Array VALS has an element for each temp.
        If this temp holds a constant then its value is kept in VALS' element.
        If this temp is a copy of other ones then this equivalence class'
@@ -395,13 +397,24 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             }
             break;
         CASE_OP_32_64(movcond):
+            cond = args[5];
             if (temps[args[1]].state == TCG_TEMP_CONST
                 && temps[args[2]].state != TCG_TEMP_CONST) {
                 tmp = args[1];
                 args[1] = args[2];
                 args[2] = tmp;
-                args[5] = tcg_swap_cond(args[5]);
+                cond = tcg_swap_cond(cond);
+            }
+            /* For movcond, we canonicalize the "false" input reg to match
+               the destination reg so that the tcg backend can implement
+               a "move if true" operation.  */
+            if (args[0] == args[3]) {
+                tmp = args[3];
+                args[3] = args[4];
+                args[4] = tmp;
+                cond = tcg_invert_cond(cond);
             }
+            args[5] = cond;
         default:
             break;
         }
commit fa01a2084e5643504b28ebba2d9dbdb73d2f74a4
Author: Richard Henderson <rth at twiddle.net>
Date:   Fri Sep 21 10:13:37 2012 -0700

    tcg: Optimize movcond for constant comparisons
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/tcg/optimize.c b/tcg/optimize.c
index 9da333c..26038a6 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -394,6 +394,14 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                 args[3] = tcg_swap_cond(args[3]);
             }
             break;
+        CASE_OP_32_64(movcond):
+            if (temps[args[1]].state == TCG_TEMP_CONST
+                && temps[args[2]].state != TCG_TEMP_CONST) {
+                tmp = args[1];
+                args[1] = args[2];
+                args[2] = tmp;
+                args[5] = tcg_swap_cond(args[5]);
+            }
         default:
             break;
         }
@@ -614,6 +622,38 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             }
             args += 4;
             break;
+        CASE_OP_32_64(movcond):
+            if (temps[args[1]].state == TCG_TEMP_CONST
+                && temps[args[2]].state == TCG_TEMP_CONST) {
+                tmp = do_constant_folding_cond(op, temps[args[1]].val,
+                                               temps[args[2]].val, args[5]);
+                if (args[0] == args[4-tmp]
+                    || (temps[args[4-tmp]].state == TCG_TEMP_COPY
+                        && temps[args[4-tmp]].val == args[0])) {
+                    gen_opc_buf[op_index] = INDEX_op_nop;
+                } else if (temps[args[4-tmp]].state == TCG_TEMP_CONST) {
+                    gen_opc_buf[op_index] = op_to_movi(op);
+                    tcg_opt_gen_movi(gen_args, args[0], temps[args[4-tmp]].val,
+                                     nb_temps, nb_globals);
+                    gen_args += 2;
+                } else {
+                    gen_opc_buf[op_index] = op_to_mov(op);
+                    tcg_opt_gen_mov(gen_args, args[0], args[4-tmp],
+                                    nb_temps, nb_globals);
+                    gen_args += 2;
+                }
+            } else {
+                reset_temp(args[0], nb_temps, nb_globals);
+                gen_args[0] = args[0];
+                gen_args[1] = args[1];
+                gen_args[2] = args[2];
+                gen_args[3] = args[3];
+                gen_args[4] = args[4];
+                gen_args[5] = args[5];
+                gen_args += 6;
+            }
+            args += 6;
+            break;
         case INDEX_op_call:
             nb_call_args = (args[0] >> 16) + (args[0] & 0xffff);
             if (!(args[nb_call_args + 1] & (TCG_CALL_CONST | TCG_CALL_PURE))) {
commit d0a16297c97ecf6be259a9a98af7a2dac9230939
Author: Richard Henderson <rth at twiddle.net>
Date:   Fri Sep 21 10:13:36 2012 -0700

    tcg-i386: Implement movcond
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Reviewed-by: Aurelien Jarno <aurelien at aurel32.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index 3017858..aa1fa9f 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -249,6 +249,7 @@ static inline int tcg_target_const_match(tcg_target_long val,
 #define OPC_ADD_GvEv	(OPC_ARITH_GvEv | (ARITH_ADD << 3))
 #define OPC_BSWAP	(0xc8 | P_EXT)
 #define OPC_CALL_Jz	(0xe8)
+#define OPC_CMOVCC      (0x40 | P_EXT)  /* ... plus condition code */
 #define OPC_CMP_GvEv	(OPC_ARITH_GvEv | (ARITH_CMP << 3))
 #define OPC_DEC_r32	(0x48)
 #define OPC_IMUL_GvEv	(0xaf | P_EXT)
@@ -936,6 +937,24 @@ static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
 }
 #endif
 
+static void tcg_out_movcond32(TCGContext *s, TCGCond cond, TCGArg dest,
+                              TCGArg c1, TCGArg c2, int const_c2,
+                              TCGArg v1)
+{
+    tcg_out_cmp(s, c1, c2, const_c2, 0);
+    tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond], dest, v1);
+}
+
+#if TCG_TARGET_REG_BITS == 64
+static void tcg_out_movcond64(TCGContext *s, TCGCond cond, TCGArg dest,
+                              TCGArg c1, TCGArg c2, int const_c2,
+                              TCGArg v1)
+{
+    tcg_out_cmp(s, c1, c2, const_c2, P_REXW);
+    tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond] | P_REXW, dest, v1);
+}
+#endif
+
 static void tcg_out_branch(TCGContext *s, int call, tcg_target_long dest)
 {
     tcg_target_long disp = dest - (tcg_target_long)s->code_ptr - 5;
@@ -1668,6 +1687,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         tcg_out_setcond32(s, args[3], args[0], args[1],
                           args[2], const_args[2]);
         break;
+    case INDEX_op_movcond_i32:
+        tcg_out_movcond32(s, args[5], args[0], args[1],
+                          args[2], const_args[2], args[3]);
+        break;
 
     OP_32_64(bswap16):
         tcg_out_rolw_8(s, args[0]);
@@ -1796,6 +1819,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         tcg_out_setcond64(s, args[3], args[0], args[1],
                           args[2], const_args[2]);
         break;
+    case INDEX_op_movcond_i64:
+        tcg_out_movcond64(s, args[5], args[0], args[1],
+                          args[2], const_args[2], args[3]);
+        break;
 
     case INDEX_op_bswap64_i64:
         tcg_out_bswap64(s, args[0]);
@@ -1880,6 +1907,7 @@ static const TCGTargetOpDef x86_op_defs[] = {
     { INDEX_op_setcond_i32, { "q", "r", "ri" } },
 
     { INDEX_op_deposit_i32, { "Q", "0", "Q" } },
+    { INDEX_op_movcond_i32, { "r", "r", "ri", "r", "0" } },
 
 #if TCG_TARGET_REG_BITS == 32
     { INDEX_op_mulu2_i32, { "a", "d", "a", "r" } },
@@ -1934,6 +1962,7 @@ static const TCGTargetOpDef x86_op_defs[] = {
     { INDEX_op_ext32u_i64, { "r", "r" } },
 
     { INDEX_op_deposit_i64, { "Q", "0", "Q" } },
+    { INDEX_op_movcond_i64, { "r", "r", "re", "r", "0" } },
 #endif
 
 #if TCG_TARGET_REG_BITS == 64
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index 504f953..b356d76 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -86,7 +86,12 @@ typedef enum {
 #define TCG_TARGET_HAS_nand_i32         0
 #define TCG_TARGET_HAS_nor_i32          0
 #define TCG_TARGET_HAS_deposit_i32      1
+#if defined(__x86_64__) || defined(__i686__)
+/* Use cmov only if the compiler is already doing so.  */
+#define TCG_TARGET_HAS_movcond_i32      1
+#else
 #define TCG_TARGET_HAS_movcond_i32      0
+#endif
 
 #if TCG_TARGET_REG_BITS == 64
 #define TCG_TARGET_HAS_div2_i64         1
@@ -108,7 +113,7 @@ typedef enum {
 #define TCG_TARGET_HAS_nand_i64         0
 #define TCG_TARGET_HAS_nor_i64          0
 #define TCG_TARGET_HAS_deposit_i64      1
-#define TCG_TARGET_HAS_movcond_i64      0
+#define TCG_TARGET_HAS_movcond_i64      1
 #endif
 
 #define TCG_TARGET_deposit_i32_valid(ofs, len) \
commit 57e289de7330a97ff4c9919a1dad54ca0650cfa7
Author: Richard Henderson <rth at twiddle.net>
Date:   Fri Sep 21 10:13:35 2012 -0700

    target-alpha: Use movcond
    
    For proper cmov insns, as well as the non-goto-tb case
    of conditional branch.
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/target-alpha/translate.c b/target-alpha/translate.c
index 12de6a3..4a9011a 100644
--- a/target-alpha/translate.c
+++ b/target-alpha/translate.c
@@ -426,27 +426,15 @@ static ExitStatus gen_bcond_internal(DisasContext *ctx, TCGCond cond,
 
         return EXIT_GOTO_TB;
     } else {
-        int lab_over = gen_new_label();
-
-        /* ??? Consider using either
-             movi pc, next
-             addi tmp, pc, disp
-             movcond pc, cond, 0, tmp, pc
-           or
-             setcond tmp, cond, 0
-             movi pc, next
-             neg tmp, tmp
-             andi tmp, tmp, disp
-             add pc, pc, tmp
-           The current diamond subgraph surely isn't efficient.  */
+        TCGv_i64 z = tcg_const_i64(0);
+        TCGv_i64 d = tcg_const_i64(dest);
+        TCGv_i64 p = tcg_const_i64(ctx->pc);
 
-        tcg_gen_brcondi_i64(cond, cmp, 0, lab_true);
-        tcg_gen_movi_i64(cpu_pc, ctx->pc);
-        tcg_gen_br(lab_over);
-        gen_set_label(lab_true);
-        tcg_gen_movi_i64(cpu_pc, dest);
-        gen_set_label(lab_over);
+        tcg_gen_movcond_i64(cond, cpu_pc, cmp, z, d, p);
 
+        tcg_temp_free_i64(z);
+        tcg_temp_free_i64(d);
+        tcg_temp_free_i64(p);
         return EXIT_PC_UPDATED;
     }
 }
@@ -521,61 +509,67 @@ static ExitStatus gen_fbcond(DisasContext *ctx, TCGCond cond, int ra,
 static void gen_cmov(TCGCond cond, int ra, int rb, int rc,
                      int islit, uint8_t lit, int mask)
 {
-    TCGCond inv_cond = tcg_invert_cond(cond);
-    int l1;
+    TCGv_i64 c1, z, v1;
 
-    if (unlikely(rc == 31))
+    if (unlikely(rc == 31)) {
         return;
+    }
 
-    l1 = gen_new_label();
-
-    if (ra != 31) {
-        if (mask) {
-            TCGv tmp = tcg_temp_new();
-            tcg_gen_andi_i64(tmp, cpu_ir[ra], 1);
-            tcg_gen_brcondi_i64(inv_cond, tmp, 0, l1);
-            tcg_temp_free(tmp);
-        } else
-            tcg_gen_brcondi_i64(inv_cond, cpu_ir[ra], 0, l1);
-    } else {
+    if (ra == 31) {
         /* Very uncommon case - Do not bother to optimize.  */
-        TCGv tmp = tcg_const_i64(0);
-        tcg_gen_brcondi_i64(inv_cond, tmp, 0, l1);
-        tcg_temp_free(tmp);
+        c1 = tcg_const_i64(0);
+    } else if (mask) {
+        c1 = tcg_const_i64(1);
+        tcg_gen_and_i64(c1, c1, cpu_ir[ra]);
+    } else {
+        c1 = cpu_ir[ra];
     }
+    if (islit) {
+        v1 = tcg_const_i64(lit);
+    } else {
+        v1 = cpu_ir[rb];
+    }
+    z = tcg_const_i64(0);
 
-    if (islit)
-        tcg_gen_movi_i64(cpu_ir[rc], lit);
-    else
-        tcg_gen_mov_i64(cpu_ir[rc], cpu_ir[rb]);
-    gen_set_label(l1);
+    tcg_gen_movcond_i64(cond, cpu_ir[rc], c1, z, v1, cpu_ir[rc]);
+
+    tcg_temp_free_i64(z);
+    if (ra == 31 || mask) {
+        tcg_temp_free_i64(c1);
+    }
+    if (islit) {
+        tcg_temp_free_i64(v1);
+    }
 }
 
 static void gen_fcmov(TCGCond cond, int ra, int rb, int rc)
 {
-    TCGv cmp_tmp;
-    int l1;
+    TCGv_i64 c1, z, v1;
 
     if (unlikely(rc == 31)) {
         return;
     }
 
-    cmp_tmp = tcg_temp_new();
+    c1 = tcg_temp_new_i64();
     if (unlikely(ra == 31)) {
-        tcg_gen_movi_i64(cmp_tmp, 0);
+        tcg_gen_movi_i64(c1, 0);
+    } else {
+        gen_fold_mzero(cond, c1, cpu_fir[ra]);
+    }
+    if (rb == 31) {
+        v1 = tcg_const_i64(0);
     } else {
-        gen_fold_mzero(cond, cmp_tmp, cpu_fir[ra]);
+        v1 = cpu_fir[rb];
     }
+    z = tcg_const_i64(0);
 
-    l1 = gen_new_label();
-    tcg_gen_brcondi_i64(tcg_invert_cond(cond), cmp_tmp, 0, l1);
-    tcg_temp_free(cmp_tmp);
+    tcg_gen_movcond_i64(cond, cpu_fir[rc], c1, z, v1, cpu_fir[rc]);
 
-    if (rb != 31)
-        tcg_gen_mov_i64(cpu_fir[rc], cpu_fir[rb]);
-    else
-        tcg_gen_movi_i64(cpu_fir[rc], 0);
-    gen_set_label(l1);
+    tcg_temp_free_i64(z);
+    tcg_temp_free_i64(c1);
+    if (rb == 31) {
+        tcg_temp_free_i64(v1);
+    }
 }
 
 #define QUAL_RM_N       0x080   /* Round mode nearest even */
commit ffc5ea09afb8f9487ed9d660f54a492889a067c7
Author: Richard Henderson <rth at twiddle.net>
Date:   Fri Sep 21 10:13:34 2012 -0700

    tcg: Introduce movcond
    
    Implemented with setcond if the target does not provide
    the optional opcode.
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/tcg/README b/tcg/README
index cfdfd96..d03ae05 100644
--- a/tcg/README
+++ b/tcg/README
@@ -307,6 +307,12 @@ dest = (t1 cond t2)
 
 Set DEST to 1 if (T1 cond T2) is true, otherwise set to 0.
 
+* movcond_i32/i64 cond, dest, c1, c2, v1, v2
+
+dest = (c1 cond c2 ? v1 : v2)
+
+Set DEST to V1 if (C1 cond C2) is true, otherwise set to V2.
+
 ********* Type conversions
 
 * ext_i32_i64 t0, t1
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
index c0b8f72..e2299ca 100644
--- a/tcg/arm/tcg-target.h
+++ b/tcg/arm/tcg-target.h
@@ -73,6 +73,7 @@ typedef enum {
 #define TCG_TARGET_HAS_nand_i32         0
 #define TCG_TARGET_HAS_nor_i32          0
 #define TCG_TARGET_HAS_deposit_i32      0
+#define TCG_TARGET_HAS_movcond_i32      0
 
 #define TCG_TARGET_HAS_GUEST_BASE
 
diff --git a/tcg/hppa/tcg-target.h b/tcg/hppa/tcg-target.h
index 01ef960..4defd28 100644
--- a/tcg/hppa/tcg-target.h
+++ b/tcg/hppa/tcg-target.h
@@ -96,6 +96,7 @@ typedef enum {
 #define TCG_TARGET_HAS_nand_i32         0
 #define TCG_TARGET_HAS_nor_i32          0
 #define TCG_TARGET_HAS_deposit_i32      1
+#define TCG_TARGET_HAS_movcond_i32      0
 
 /* optional instructions automatically implemented */
 #define TCG_TARGET_HAS_neg_i32          0 /* sub rd, 0, rs */
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index 8be42f3..504f953 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -86,6 +86,7 @@ typedef enum {
 #define TCG_TARGET_HAS_nand_i32         0
 #define TCG_TARGET_HAS_nor_i32          0
 #define TCG_TARGET_HAS_deposit_i32      1
+#define TCG_TARGET_HAS_movcond_i32      0
 
 #if TCG_TARGET_REG_BITS == 64
 #define TCG_TARGET_HAS_div2_i64         1
@@ -107,6 +108,7 @@ typedef enum {
 #define TCG_TARGET_HAS_nand_i64         0
 #define TCG_TARGET_HAS_nor_i64          0
 #define TCG_TARGET_HAS_deposit_i64      1
+#define TCG_TARGET_HAS_movcond_i64      0
 #endif
 
 #define TCG_TARGET_deposit_i32_valid(ofs, len) \
diff --git a/tcg/ia64/tcg-target.h b/tcg/ia64/tcg-target.h
index c22962a..368aee4 100644
--- a/tcg/ia64/tcg-target.h
+++ b/tcg/ia64/tcg-target.h
@@ -133,6 +133,8 @@ typedef enum {
 #define TCG_TARGET_HAS_rot_i64          1
 #define TCG_TARGET_HAS_deposit_i32      0
 #define TCG_TARGET_HAS_deposit_i64      0
+#define TCG_TARGET_HAS_movcond_i32      0
+#define TCG_TARGET_HAS_movcond_i64      0
 
 /* optional instructions automatically implemented */
 #define TCG_TARGET_HAS_neg_i32          0 /* sub r1, r0, r3 */
diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
index 1c61931..9c68a32 100644
--- a/tcg/mips/tcg-target.h
+++ b/tcg/mips/tcg-target.h
@@ -90,6 +90,7 @@ typedef enum {
 #define TCG_TARGET_HAS_eqv_i32          0
 #define TCG_TARGET_HAS_nand_i32         0
 #define TCG_TARGET_HAS_deposit_i32      0
+#define TCG_TARGET_HAS_movcond_i32      0
 
 /* optional instructions automatically implemented */
 #define TCG_TARGET_HAS_neg_i32          0 /* sub  rd, zero, rt   */
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
index 2f37fd2..177eea1 100644
--- a/tcg/ppc/tcg-target.h
+++ b/tcg/ppc/tcg-target.h
@@ -92,6 +92,7 @@ typedef enum {
 #define TCG_TARGET_HAS_nand_i32         1
 #define TCG_TARGET_HAS_nor_i32          1
 #define TCG_TARGET_HAS_deposit_i32      1
+#define TCG_TARGET_HAS_movcond_i32      0
 
 #define TCG_AREG0 TCG_REG_R27
 
diff --git a/tcg/ppc64/tcg-target.h b/tcg/ppc64/tcg-target.h
index 97eec08..57569e8 100644
--- a/tcg/ppc64/tcg-target.h
+++ b/tcg/ppc64/tcg-target.h
@@ -83,6 +83,7 @@ typedef enum {
 #define TCG_TARGET_HAS_nand_i32         0
 #define TCG_TARGET_HAS_nor_i32          0
 #define TCG_TARGET_HAS_deposit_i32      0
+#define TCG_TARGET_HAS_movcond_i32      0
 
 #define TCG_TARGET_HAS_div_i64          1
 #define TCG_TARGET_HAS_rot_i64          0
@@ -103,6 +104,7 @@ typedef enum {
 #define TCG_TARGET_HAS_nand_i64         0
 #define TCG_TARGET_HAS_nor_i64          0
 #define TCG_TARGET_HAS_deposit_i64      0
+#define TCG_TARGET_HAS_movcond_i64      0
 
 #define TCG_AREG0 TCG_REG_R27
 
diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h
index 4f7dfab..ed55c33 100644
--- a/tcg/s390/tcg-target.h
+++ b/tcg/s390/tcg-target.h
@@ -63,6 +63,7 @@ typedef enum TCGReg {
 #define TCG_TARGET_HAS_nand_i32         0
 #define TCG_TARGET_HAS_nor_i32          0
 #define TCG_TARGET_HAS_deposit_i32      0
+#define TCG_TARGET_HAS_movcond_i32      0
 
 #if TCG_TARGET_REG_BITS == 64
 #define TCG_TARGET_HAS_div2_i64         1
@@ -84,6 +85,7 @@ typedef enum TCGReg {
 #define TCG_TARGET_HAS_nand_i64         0
 #define TCG_TARGET_HAS_nor_i64          0
 #define TCG_TARGET_HAS_deposit_i64      0
+#define TCG_TARGET_HAS_movcond_i64      0
 #endif
 
 #define TCG_TARGET_HAS_GUEST_BASE
diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h
index 0ea87be..d762574 100644
--- a/tcg/sparc/tcg-target.h
+++ b/tcg/sparc/tcg-target.h
@@ -102,6 +102,7 @@ typedef enum {
 #define TCG_TARGET_HAS_nand_i32         0
 #define TCG_TARGET_HAS_nor_i32          0
 #define TCG_TARGET_HAS_deposit_i32      0
+#define TCG_TARGET_HAS_movcond_i32      0
 
 #if TCG_TARGET_REG_BITS == 64
 #define TCG_TARGET_HAS_div_i64          1
@@ -123,6 +124,7 @@ typedef enum {
 #define TCG_TARGET_HAS_nand_i64         0
 #define TCG_TARGET_HAS_nor_i64          0
 #define TCG_TARGET_HAS_deposit_i64      0
+#define TCG_TARGET_HAS_movcond_i64      0
 #endif
 
 #ifdef CONFIG_SOLARIS
diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
index 169d3b2..6d28f82 100644
--- a/tcg/tcg-op.h
+++ b/tcg/tcg-op.h
@@ -2118,6 +2118,44 @@ static inline void tcg_gen_deposit_i64(TCGv_i64 ret, TCGv_i64 arg1,
     tcg_temp_free_i64(t1);
 }
 
+static inline void tcg_gen_movcond_i32(TCGCond cond, TCGv_i32 ret,
+                                       TCGv_i32 c1, TCGv_i32 c2,
+                                       TCGv_i32 v1, TCGv_i32 v2)
+{
+    if (TCG_TARGET_HAS_movcond_i32) {
+        tcg_gen_op6i_i32(INDEX_op_movcond_i32, ret, c1, c2, v1, v2, cond);
+    } else {
+        TCGv_i32 t0 = tcg_temp_new_i32();
+        TCGv_i32 t1 = tcg_temp_new_i32();
+        tcg_gen_setcond_i32(cond, t0, c1, c2);
+        tcg_gen_neg_i32(t0, t0);
+        tcg_gen_and_i32(t1, v1, t0);
+        tcg_gen_andc_i32(ret, v2, t0);
+        tcg_gen_or_i32(ret, ret, t1);
+        tcg_temp_free_i32(t0);
+        tcg_temp_free_i32(t1);
+    }
+}
+
+static inline void tcg_gen_movcond_i64(TCGCond cond, TCGv_i64 ret,
+                                       TCGv_i64 c1, TCGv_i64 c2,
+                                       TCGv_i64 v1, TCGv_i64 v2)
+{
+    if (TCG_TARGET_HAS_movcond_i64) {
+        tcg_gen_op6i_i64(INDEX_op_movcond_i64, ret, c1, c2, v1, v2, cond);
+    } else {
+        TCGv_i64 t0 = tcg_temp_new_i64();
+        TCGv_i64 t1 = tcg_temp_new_i64();
+        tcg_gen_setcond_i64(cond, t0, c1, c2);
+        tcg_gen_neg_i64(t0, t0);
+        tcg_gen_and_i64(t1, v1, t0);
+        tcg_gen_andc_i64(ret, v2, t0);
+        tcg_gen_or_i64(ret, ret, t1);
+        tcg_temp_free_i64(t0);
+        tcg_temp_free_i64(t1);
+    }
+}
+
 /***************************************/
 /* QEMU specific operations. Their type depend on the QEMU CPU
    type. */
@@ -2434,6 +2472,7 @@ static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index)
 #define tcg_gen_deposit_tl tcg_gen_deposit_i64
 #define tcg_const_tl tcg_const_i64
 #define tcg_const_local_tl tcg_const_local_i64
+#define tcg_gen_movcond_tl tcg_gen_movcond_i64
 #else
 #define tcg_gen_movi_tl tcg_gen_movi_i32
 #define tcg_gen_mov_tl tcg_gen_mov_i32
@@ -2505,6 +2544,7 @@ static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index)
 #define tcg_gen_deposit_tl tcg_gen_deposit_i32
 #define tcg_const_tl tcg_const_i32
 #define tcg_const_local_tl tcg_const_local_i32
+#define tcg_gen_movcond_tl tcg_gen_movcond_i32
 #endif
 
 #if TCG_TARGET_REG_BITS == 32
diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h
index d12e8d0..dbb0e39 100644
--- a/tcg/tcg-opc.h
+++ b/tcg/tcg-opc.h
@@ -51,6 +51,7 @@ DEF(br, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
 DEF(mov_i32, 1, 1, 0, 0)
 DEF(movi_i32, 1, 0, 1, 0)
 DEF(setcond_i32, 1, 2, 1, 0)
+DEF(movcond_i32, 1, 4, 1, IMPL(TCG_TARGET_HAS_movcond_i32))
 /* load/store */
 DEF(ld8u_i32, 1, 1, 1, 0)
 DEF(ld8s_i32, 1, 1, 1, 0)
@@ -107,6 +108,7 @@ DEF(nor_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_nor_i32))
 DEF(mov_i64, 1, 1, 0, IMPL64)
 DEF(movi_i64, 1, 0, 1, IMPL64)
 DEF(setcond_i64, 1, 2, 1, IMPL64)
+DEF(movcond_i64, 1, 4, 1, IMPL64 | IMPL(TCG_TARGET_HAS_movcond_i64))
 /* load/store */
 DEF(ld8u_i64, 1, 1, 1, IMPL64)
 DEF(ld8s_i64, 1, 1, 1, IMPL64)
diff --git a/tcg/tcg.c b/tcg/tcg.c
index b8a1bec..bb9c995 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -991,16 +991,15 @@ void tcg_dump_ops(TCGContext *s)
             }
             switch (c) {
             case INDEX_op_brcond_i32:
-#if TCG_TARGET_REG_BITS == 32
-            case INDEX_op_brcond2_i32:
-#elif TCG_TARGET_REG_BITS == 64
-            case INDEX_op_brcond_i64:
-#endif
             case INDEX_op_setcond_i32:
+            case INDEX_op_movcond_i32:
 #if TCG_TARGET_REG_BITS == 32
+            case INDEX_op_brcond2_i32:
             case INDEX_op_setcond2_i32:
-#elif TCG_TARGET_REG_BITS == 64
+#else
+            case INDEX_op_brcond_i64:
             case INDEX_op_setcond_i64:
+            case INDEX_op_movcond_i64:
 #endif
                 if (args[k] < ARRAY_SIZE(cond_name) && cond_name[args[k]]) {
                     qemu_log(",%s", cond_name[args[k++]]);
diff --git a/tcg/tcg.h b/tcg/tcg.h
index 477775d..7e903f3 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -79,6 +79,7 @@ typedef uint64_t TCGRegSet;
 #define TCG_TARGET_HAS_nand_i64         0
 #define TCG_TARGET_HAS_nor_i64          0
 #define TCG_TARGET_HAS_deposit_i64      0
+#define TCG_TARGET_HAS_movcond_i64      0
 #endif
 
 #ifndef TCG_TARGET_deposit_i32_valid
diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
index 30a0f21..6d89495 100644
--- a/tcg/tci/tcg-target.h
+++ b/tcg/tci/tcg-target.h
@@ -75,6 +75,7 @@
 #define TCG_TARGET_HAS_not_i32          1
 #define TCG_TARGET_HAS_orc_i32          0
 #define TCG_TARGET_HAS_rot_i32          1
+#define TCG_TARGET_HAS_movcond_i32      0
 
 #if TCG_TARGET_REG_BITS == 64
 #define TCG_TARGET_HAS_bswap16_i64      1
@@ -98,6 +99,7 @@
 #define TCG_TARGET_HAS_not_i64          1
 #define TCG_TARGET_HAS_orc_i64          0
 #define TCG_TARGET_HAS_rot_i64          1
+#define TCG_TARGET_HAS_movcond_i64      0
 #endif /* TCG_TARGET_REG_BITS == 64 */
 
 /* Offset to user memory in user mode. */
commit 080df491db6400fade2bdf1bdeee3a459529398b
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Sun Sep 16 13:12:21 2012 +0200

    target-sh4: remove useless code
    
    Almost dead code.
    
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/target-sh4/translate.c b/target-sh4/translate.c
index e5478cb..0fa83ca 100644
--- a/target-sh4/translate.c
+++ b/target-sh4/translate.c
@@ -18,7 +18,6 @@
  */
 
 #define DEBUG_DISAS
-#define SH4_DEBUG_DISAS
 //#define SH4_SINGLE_STEP
 
 #include "cpu.h"
@@ -2069,9 +2068,6 @@ gen_intermediate_code_internal(CPUSH4State * env, TranslationBlock * tb,
     }
 
 #ifdef DEBUG_DISAS
-#ifdef SH4_DEBUG_DISAS
-    qemu_log_mask(CPU_LOG_TB_IN_ASM, "\n");
-#endif
     if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
 	qemu_log("IN:\n");	/* , lookup_symbol(pc_start)); */
 	log_target_disas(pc_start, ctx.pc - pc_start, 0);
commit 7a64244fda7b9b6257de30cfe93498ccd6017630
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Sun Sep 16 13:12:21 2012 +0200

    target-sh4: cleanup DisasContext
    
    We should avoid accessing env at translation stage, except of course for
    static values like the supported features.
    
    Remove variables copied from env in DisasContext and use the TB flags
    instead.
    
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/target-sh4/translate.c b/target-sh4/translate.c
index 6305db8..e5478cb 100644
--- a/target-sh4/translate.c
+++ b/target-sh4/translate.c
@@ -32,8 +32,6 @@
 typedef struct DisasContext {
     struct TranslationBlock *tb;
     target_ulong pc;
-    uint32_t sr;
-    uint32_t fpscr;
     uint16_t opcode;
     uint32_t flags;
     int bstate;
@@ -47,7 +45,7 @@ typedef struct DisasContext {
 #if defined(CONFIG_USER_ONLY)
 #define IS_USER(ctx) 1
 #else
-#define IS_USER(ctx) (!(ctx->sr & SR_MD))
+#define IS_USER(ctx) (!(ctx->flags & SR_MD))
 #endif
 
 enum {
@@ -413,15 +411,15 @@ static inline void gen_store_fpr64 (TCGv_i64 t, int reg)
 #define B11_8 ((ctx->opcode >> 8) & 0xf)
 #define B15_12 ((ctx->opcode >> 12) & 0xf)
 
-#define REG(x) ((x) < 8 && (ctx->sr & (SR_MD | SR_RB)) == (SR_MD | SR_RB) ? \
-		(cpu_gregs[x + 16]) : (cpu_gregs[x]))
+#define REG(x) ((x) < 8 && (ctx->flags & (SR_MD | SR_RB)) == (SR_MD | SR_RB) \
+                ? (cpu_gregs[x + 16]) : (cpu_gregs[x]))
 
-#define ALTREG(x) ((x) < 8 && (ctx->sr & (SR_MD | SR_RB)) != (SR_MD | SR_RB) \
+#define ALTREG(x) ((x) < 8 && (ctx->flags & (SR_MD | SR_RB)) != (SR_MD | SR_RB)\
 		? (cpu_gregs[x + 16]) : (cpu_gregs[x]))
 
-#define FREG(x) (ctx->fpscr & FPSCR_FR ? (x) ^ 0x10 : (x))
+#define FREG(x) (ctx->flags & FPSCR_FR ? (x) ^ 0x10 : (x))
 #define XHACK(x) ((((x) & 1 ) << 4) | ((x) & 0xe))
-#define XREG(x) (ctx->fpscr & FPSCR_FR ? XHACK(x) ^ 0x10 : XHACK(x))
+#define XREG(x) (ctx->flags & FPSCR_FR ? XHACK(x) ^ 0x10 : XHACK(x))
 #define DREG(x) FREG(x) /* Assumes lsb of (x) is always 0 */
 
 #define CHECK_NOT_DELAY_SLOT \
@@ -537,7 +535,7 @@ static void _decode_opc(DisasContext * ctx)
 	ctx->bstate = BS_STOP;
 	return;
     case 0xf3fd:		/* fschg */
-	tcg_gen_xori_i32(cpu_fpscr, cpu_fpscr, FPSCR_SZ);
+        tcg_gen_xori_i32(cpu_fpscr, cpu_fpscr, FPSCR_SZ);
 	ctx->bstate = BS_STOP;
 	return;
     case 0x0009:		/* nop */
@@ -1080,7 +1078,7 @@ static void _decode_opc(DisasContext * ctx)
 	return;
     case 0xf00c: /* fmov {F,D,X}Rm,{F,D,X}Rn - FPSCR: Nothing */
 	CHECK_FPU_ENABLED
-	if (ctx->fpscr & FPSCR_SZ) {
+        if (ctx->flags & FPSCR_SZ) {
 	    TCGv_i64 fp = tcg_temp_new_i64();
 	    gen_load_fpr64(fp, XREG(B7_4));
 	    gen_store_fpr64(fp, XREG(B11_8));
@@ -1091,7 +1089,7 @@ static void _decode_opc(DisasContext * ctx)
 	return;
     case 0xf00a: /* fmov {F,D,X}Rm, at Rn - FPSCR: Nothing */
 	CHECK_FPU_ENABLED
-	if (ctx->fpscr & FPSCR_SZ) {
+        if (ctx->flags & FPSCR_SZ) {
 	    TCGv addr_hi = tcg_temp_new();
 	    int fr = XREG(B7_4);
 	    tcg_gen_addi_i32(addr_hi, REG(B11_8), 4);
@@ -1104,7 +1102,7 @@ static void _decode_opc(DisasContext * ctx)
 	return;
     case 0xf008: /* fmov @Rm,{F,D,X}Rn - FPSCR: Nothing */
 	CHECK_FPU_ENABLED
-	if (ctx->fpscr & FPSCR_SZ) {
+        if (ctx->flags & FPSCR_SZ) {
 	    TCGv addr_hi = tcg_temp_new();
 	    int fr = XREG(B11_8);
 	    tcg_gen_addi_i32(addr_hi, REG(B7_4), 4);
@@ -1117,7 +1115,7 @@ static void _decode_opc(DisasContext * ctx)
 	return;
     case 0xf009: /* fmov @Rm+,{F,D,X}Rn - FPSCR: Nothing */
 	CHECK_FPU_ENABLED
-	if (ctx->fpscr & FPSCR_SZ) {
+        if (ctx->flags & FPSCR_SZ) {
 	    TCGv addr_hi = tcg_temp_new();
 	    int fr = XREG(B11_8);
 	    tcg_gen_addi_i32(addr_hi, REG(B7_4), 4);
@@ -1132,7 +1130,7 @@ static void _decode_opc(DisasContext * ctx)
 	return;
     case 0xf00b: /* fmov {F,D,X}Rm, at -Rn - FPSCR: Nothing */
 	CHECK_FPU_ENABLED
-	if (ctx->fpscr & FPSCR_SZ) {
+        if (ctx->flags & FPSCR_SZ) {
 	    TCGv addr = tcg_temp_new_i32();
 	    int fr = XREG(B7_4);
 	    tcg_gen_subi_i32(addr, REG(B11_8), 4);
@@ -1155,7 +1153,7 @@ static void _decode_opc(DisasContext * ctx)
 	{
 	    TCGv addr = tcg_temp_new_i32();
 	    tcg_gen_add_i32(addr, REG(B7_4), REG(0));
-	    if (ctx->fpscr & FPSCR_SZ) {
+            if (ctx->flags & FPSCR_SZ) {
 		int fr = XREG(B11_8);
 		tcg_gen_qemu_ld32u(cpu_fregs[fr	 ], addr, ctx->memidx);
 		tcg_gen_addi_i32(addr, addr, 4);
@@ -1171,7 +1169,7 @@ static void _decode_opc(DisasContext * ctx)
 	{
 	    TCGv addr = tcg_temp_new();
 	    tcg_gen_add_i32(addr, REG(B11_8), REG(0));
-	    if (ctx->fpscr & FPSCR_SZ) {
+            if (ctx->flags & FPSCR_SZ) {
 		int fr = XREG(B7_4);
 		tcg_gen_qemu_ld32u(cpu_fregs[fr	 ], addr, ctx->memidx);
 		tcg_gen_addi_i32(addr, addr, 4);
@@ -1190,7 +1188,7 @@ static void _decode_opc(DisasContext * ctx)
     case 0xf005: /* fcmp/gt Rm,Rn - FPSCR: R[PR,Enable.V]/W[Cause,Flag] */
 	{
 	    CHECK_FPU_ENABLED
-	    if (ctx->fpscr & FPSCR_PR) {
+            if (ctx->flags & FPSCR_PR) {
                 TCGv_i64 fp0, fp1;
 
 		if (ctx->opcode & 0x0110)
@@ -1259,7 +1257,7 @@ static void _decode_opc(DisasContext * ctx)
     case 0xf00e: /* fmac FR0,RM,Rn */
         {
             CHECK_FPU_ENABLED
-            if (ctx->fpscr & FPSCR_PR) {
+            if (ctx->flags & FPSCR_PR) {
                 break; /* illegal instruction */
             } else {
                 gen_helper_fmac_FT(cpu_fregs[FREG(B11_8)], cpu_env,
@@ -1789,7 +1787,7 @@ static void _decode_opc(DisasContext * ctx)
 	return;
     case 0xf02d: /* float FPUL,FRn/DRn - FPSCR: R[PR,Enable.I]/W[Cause,Flag] */
 	CHECK_FPU_ENABLED
-	if (ctx->fpscr & FPSCR_PR) {
+        if (ctx->flags & FPSCR_PR) {
 	    TCGv_i64 fp;
 	    if (ctx->opcode & 0x0100)
 		break; /* illegal instruction */
@@ -1804,7 +1802,7 @@ static void _decode_opc(DisasContext * ctx)
 	return;
     case 0xf03d: /* ftrc FRm/DRm,FPUL - FPSCR: R[PR,Enable.V]/W[Cause,Flag] */
 	CHECK_FPU_ENABLED
-	if (ctx->fpscr & FPSCR_PR) {
+        if (ctx->flags & FPSCR_PR) {
 	    TCGv_i64 fp;
 	    if (ctx->opcode & 0x0100)
 		break; /* illegal instruction */
@@ -1825,7 +1823,7 @@ static void _decode_opc(DisasContext * ctx)
 	return;
     case 0xf05d: /* fabs FRn/DRn */
 	CHECK_FPU_ENABLED
-	if (ctx->fpscr & FPSCR_PR) {
+        if (ctx->flags & FPSCR_PR) {
 	    if (ctx->opcode & 0x0100)
 		break; /* illegal instruction */
 	    TCGv_i64 fp = tcg_temp_new_i64();
@@ -1839,7 +1837,7 @@ static void _decode_opc(DisasContext * ctx)
 	return;
     case 0xf06d: /* fsqrt FRn */
 	CHECK_FPU_ENABLED
-	if (ctx->fpscr & FPSCR_PR) {
+        if (ctx->flags & FPSCR_PR) {
 	    if (ctx->opcode & 0x0100)
 		break; /* illegal instruction */
 	    TCGv_i64 fp = tcg_temp_new_i64();
@@ -1857,13 +1855,13 @@ static void _decode_opc(DisasContext * ctx)
 	break;
     case 0xf08d: /* fldi0 FRn - FPSCR: R[PR] */
 	CHECK_FPU_ENABLED
-	if (!(ctx->fpscr & FPSCR_PR)) {
+        if (!(ctx->flags & FPSCR_PR)) {
 	    tcg_gen_movi_i32(cpu_fregs[FREG(B11_8)], 0);
 	}
 	return;
     case 0xf09d: /* fldi1 FRn - FPSCR: R[PR] */
 	CHECK_FPU_ENABLED
-	if (!(ctx->fpscr & FPSCR_PR)) {
+        if (!(ctx->flags & FPSCR_PR)) {
 	    tcg_gen_movi_i32(cpu_fregs[FREG(B11_8)], 0x3f800000);
 	}
 	return;
@@ -1887,7 +1885,7 @@ static void _decode_opc(DisasContext * ctx)
 	return;
     case 0xf0ed: /* fipr FVm,FVn */
         CHECK_FPU_ENABLED
-        if ((ctx->fpscr & FPSCR_PR) == 0) {
+        if ((ctx->flags & FPSCR_PR) == 0) {
             TCGv m, n;
             m = tcg_const_i32((ctx->opcode >> 8) & 3);
             n = tcg_const_i32((ctx->opcode >> 10) & 3);
@@ -1900,7 +1898,7 @@ static void _decode_opc(DisasContext * ctx)
     case 0xf0fd: /* ftrv XMTRX,FVn */
         CHECK_FPU_ENABLED
         if ((ctx->opcode & 0x0300) == 0x0100 &&
-            (ctx->fpscr & FPSCR_PR) == 0) {
+            (ctx->flags & FPSCR_PR) == 0) {
             TCGv n;
             n = tcg_const_i32((ctx->opcode >> 10) & 3);
             gen_helper_ftrv(cpu_env, n);
@@ -1974,16 +1972,14 @@ gen_intermediate_code_internal(CPUSH4State * env, TranslationBlock * tb,
     ctx.pc = pc_start;
     ctx.flags = (uint32_t)tb->flags;
     ctx.bstate = BS_NONE;
-    ctx.sr = env->sr;
-    ctx.fpscr = env->fpscr;
-    ctx.memidx = (env->sr & SR_MD) == 0 ? 1 : 0;
+    ctx.memidx = (ctx.flags & SR_MD) == 0 ? 1 : 0;
     /* We don't know if the delayed pc came from a dynamic or static branch,
        so assume it is a dynamic branch.  */
     ctx.delayed_pc = -1; /* use delayed pc from env pointer */
     ctx.tb = tb;
     ctx.singlestep_enabled = env->singlestep_enabled;
     ctx.features = env->features;
-    ctx.has_movcal = (tb->flags & TB_FLAG_PENDING_MOVCA);
+    ctx.has_movcal = (ctx.flags & TB_FLAG_PENDING_MOVCA);
 
     ii = -1;
     num_insns = 0;
commit 1012740098d0307ce5d957ebbe9a7f020da7f574
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Sun Sep 16 13:12:21 2012 +0200

    target-sh4: rework exceptions handling
    
    Since commit fd4bab102 PC is restored in case of exception through code
    retranslation. While it is clearly the thing to do in case it is not
    not known if an helper is going to trigger an exception or not
    (e.g. for load/store, FPU, etc.), it just make things slower when the
    exception is already known at translation time.
    
    Partially revert this commit and save PC in the TCG code. Set bstate to
    BS_BRANCH to not generate TCG exit code. Micro-optimize the sleep
    helper. Make all the exception helpers to call raise_exception and mark
    it as noreturn.
    
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/target-sh4/helper.h b/target-sh4/helper.h
index a00b7dd..6c1a47d 100644
--- a/target-sh4/helper.h
+++ b/target-sh4/helper.h
@@ -1,13 +1,13 @@
 #include "def-helper.h"
 
 DEF_HELPER_1(ldtlb, void, env)
-DEF_HELPER_1(raise_illegal_instruction, void, env)
-DEF_HELPER_1(raise_slot_illegal_instruction, void, env)
-DEF_HELPER_1(raise_fpu_disable, void, env)
-DEF_HELPER_1(raise_slot_fpu_disable, void, env)
-DEF_HELPER_1(debug, void, env)
-DEF_HELPER_2(sleep, void, env, i32)
-DEF_HELPER_2(trapa, void, env, i32)
+DEF_HELPER_1(raise_illegal_instruction, noreturn, env)
+DEF_HELPER_1(raise_slot_illegal_instruction, noreturn, env)
+DEF_HELPER_1(raise_fpu_disable, noreturn, env)
+DEF_HELPER_1(raise_slot_fpu_disable, noreturn, env)
+DEF_HELPER_1(debug, noreturn, env)
+DEF_HELPER_1(sleep, noreturn, env)
+DEF_HELPER_2(trapa, noreturn, env, i32)
 
 DEF_HELPER_3(movcal, void, env, i32, i32)
 DEF_HELPER_1(discard_movcal_backup, void, env)
diff --git a/target-sh4/op_helper.c b/target-sh4/op_helper.c
index 4f1f754..60ec4cb 100644
--- a/target-sh4/op_helper.c
+++ b/target-sh4/op_helper.c
@@ -21,7 +21,8 @@
 #include "cpu.h"
 #include "helper.h"
 
-static void cpu_restore_state_from_retaddr(CPUSH4State *env, uintptr_t retaddr)
+static inline void cpu_restore_state_from_retaddr(CPUSH4State *env,
+                                                  uintptr_t retaddr)
 {
     TranslationBlock *tb;
 
@@ -77,8 +78,8 @@ void helper_ldtlb(CPUSH4State *env)
 #endif
 }
 
-static inline void raise_exception(CPUSH4State *env, int index,
-                                   uintptr_t retaddr)
+static inline void QEMU_NORETURN raise_exception(CPUSH4State *env, int index,
+                                                 uintptr_t retaddr)
 {
     env->exception_index = index;
     cpu_restore_state_from_retaddr(env, retaddr);
@@ -87,43 +88,40 @@ static inline void raise_exception(CPUSH4State *env, int index,
 
 void helper_raise_illegal_instruction(CPUSH4State *env)
 {
-    raise_exception(env, 0x180, GETPC());
+    raise_exception(env, 0x180, 0);
 }
 
 void helper_raise_slot_illegal_instruction(CPUSH4State *env)
 {
-    raise_exception(env, 0x1a0, GETPC());
+    raise_exception(env, 0x1a0, 0);
 }
 
 void helper_raise_fpu_disable(CPUSH4State *env)
 {
-    raise_exception(env, 0x800, GETPC());
+    raise_exception(env, 0x800, 0);
 }
 
 void helper_raise_slot_fpu_disable(CPUSH4State *env)
 {
-    raise_exception(env, 0x820, GETPC());
+    raise_exception(env, 0x820, 0);
 }
 
 void helper_debug(CPUSH4State *env)
 {
-    env->exception_index = EXCP_DEBUG;
-    cpu_loop_exit(env);
+    raise_exception(env, EXCP_DEBUG, 0);
 }
 
-void helper_sleep(CPUSH4State *env, uint32_t next_pc)
+void helper_sleep(CPUSH4State *env)
 {
     env->halted = 1;
     env->in_sleep = 1;
-    env->exception_index = EXCP_HLT;
-    env->pc = next_pc;
-    cpu_loop_exit(env);
+    raise_exception(env, EXCP_HLT, 0);
 }
 
 void helper_trapa(CPUSH4State *env, uint32_t tra)
 {
     env->tra = tra << 2;
-    raise_exception(env, 0x160, GETPC());
+    raise_exception(env, 0x160, 0);
 }
 
 void helper_movcal(CPUSH4State *env, uint32_t address, uint32_t value)
@@ -385,9 +383,7 @@ static void update_fpscr(CPUSH4State *env, uintptr_t retaddr)
         cause = (env->fpscr & FPSCR_CAUSE_MASK) >> FPSCR_CAUSE_SHIFT;
         enable = (env->fpscr & FPSCR_ENABLE_MASK) >> FPSCR_ENABLE_SHIFT;
         if (cause & enable) {
-            cpu_restore_state_from_retaddr(env, retaddr);
-            env->exception_index = 0x120;
-            cpu_loop_exit(env);
+            raise_exception(env, 0x120, retaddr);
         }
     }
 }
diff --git a/target-sh4/translate.c b/target-sh4/translate.c
index cdc4e3b..6305db8 100644
--- a/target-sh4/translate.c
+++ b/target-sh4/translate.c
@@ -427,30 +427,33 @@ static inline void gen_store_fpr64 (TCGv_i64 t, int reg)
 #define CHECK_NOT_DELAY_SLOT \
   if (ctx->flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL))     \
   {                                                           \
+      tcg_gen_movi_i32(cpu_pc, ctx->pc);                      \
       gen_helper_raise_slot_illegal_instruction(cpu_env);     \
-      ctx->bstate = BS_EXCP;                                  \
+      ctx->bstate = BS_BRANCH;                                \
       return;                                                 \
   }
 
 #define CHECK_PRIVILEGED                                        \
   if (IS_USER(ctx)) {                                           \
+      tcg_gen_movi_i32(cpu_pc, ctx->pc);                        \
       if (ctx->flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) { \
           gen_helper_raise_slot_illegal_instruction(cpu_env);   \
       } else {                                                  \
           gen_helper_raise_illegal_instruction(cpu_env);        \
       }                                                         \
-      ctx->bstate = BS_EXCP;                                    \
+      ctx->bstate = BS_BRANCH;                                  \
       return;                                                   \
   }
 
 #define CHECK_FPU_ENABLED                                       \
   if (ctx->flags & SR_FD) {                                     \
+      tcg_gen_movi_i32(cpu_pc, ctx->pc);                        \
       if (ctx->flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) { \
           gen_helper_raise_slot_fpu_disable(cpu_env);           \
       } else {                                                  \
           gen_helper_raise_fpu_disable(cpu_env);                \
       }                                                         \
-      ctx->bstate = BS_EXCP;                                    \
+      ctx->bstate = BS_BRANCH;                                  \
       return;                                                   \
   }
 
@@ -541,7 +544,8 @@ static void _decode_opc(DisasContext * ctx)
 	return;
     case 0x001b:		/* sleep */
 	CHECK_PRIVILEGED
-        gen_helper_sleep(cpu_env, tcg_const_i32(ctx->pc + 2));
+        tcg_gen_movi_i32(cpu_pc, ctx->pc + 2);
+        gen_helper_sleep(cpu_env);
 	return;
     }
 
@@ -1411,6 +1415,7 @@ static void _decode_opc(DisasContext * ctx)
 	{
 	    TCGv imm;
 	    CHECK_NOT_DELAY_SLOT
+            tcg_gen_movi_i32(cpu_pc, ctx->pc);
 	    imm = tcg_const_i32(B7_0);
             gen_helper_trapa(cpu_env, imm);
 	    tcg_temp_free(imm);
@@ -1909,12 +1914,13 @@ static void _decode_opc(DisasContext * ctx)
 	    ctx->opcode, ctx->pc);
     fflush(stderr);
 #endif
+    tcg_gen_movi_i32(cpu_pc, ctx->pc);
     if (ctx->flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) {
         gen_helper_raise_slot_illegal_instruction(cpu_env);
     } else {
         gen_helper_raise_illegal_instruction(cpu_env);
     }
-    ctx->bstate = BS_EXCP;
+    ctx->bstate = BS_BRANCH;
 }
 
 static void decode_opc(DisasContext * ctx)
@@ -1992,7 +1998,7 @@ gen_intermediate_code_internal(CPUSH4State * env, TranslationBlock * tb,
 		    /* We have hit a breakpoint - make sure PC is up-to-date */
 		    tcg_gen_movi_i32(cpu_pc, ctx.pc);
                     gen_helper_debug(cpu_env);
-		    ctx.bstate = BS_EXCP;
+                    ctx.bstate = BS_BRANCH;
 		    break;
 		}
 	    }
commit ed22e6f30e7f8c723f9b1be30869b9c0368e3e4e
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Sun Sep 16 13:12:20 2012 +0200

    target-sh4: remove gen_clr_t() and gen_set_t()
    
    gen_clr_t() and gen_set_t() have very few callers and can be remplaced
    by a single line. Remove them.
    
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/target-sh4/translate.c b/target-sh4/translate.c
index 9ecbe47..cdc4e3b 100644
--- a/target-sh4/translate.c
+++ b/target-sh4/translate.c
@@ -339,16 +339,6 @@ static void gen_delayed_conditional_jump(DisasContext * ctx)
     gen_jump(ctx);
 }
 
-static inline void gen_set_t(void)
-{
-    tcg_gen_ori_i32(cpu_sr, cpu_sr, SR_T);
-}
-
-static inline void gen_clr_t(void)
-{
-    tcg_gen_andi_i32(cpu_sr, cpu_sr, ~SR_T);
-}
-
 static inline void gen_cmp(int cond, TCGv t0, TCGv t1)
 {
     TCGv t;
@@ -519,7 +509,7 @@ static void _decode_opc(DisasContext * ctx)
 	tcg_gen_andi_i32(cpu_sr, cpu_sr, ~SR_S);
 	return;
     case 0x0008:		/* clrt */
-	gen_clr_t();
+        tcg_gen_andi_i32(cpu_sr, cpu_sr, ~SR_T);
 	return;
     case 0x0038:		/* ldtlb */
 	CHECK_PRIVILEGED
@@ -537,7 +527,7 @@ static void _decode_opc(DisasContext * ctx)
 	tcg_gen_ori_i32(cpu_sr, cpu_sr, SR_S);
 	return;
     case 0x0018:		/* sett */
-	gen_set_t();
+        tcg_gen_ori_i32(cpu_sr, cpu_sr, SR_T);
 	return;
     case 0xfbfd:		/* frchg */
 	tcg_gen_xori_i32(cpu_fpscr, cpu_fpscr, FPSCR_FR);
@@ -1660,7 +1650,7 @@ static void _decode_opc(DisasContext * ctx)
         */
         if (ctx->features & SH_FEATURE_SH4A) {
 	    int label = gen_new_label();
-	    gen_clr_t();
+            tcg_gen_andi_i32(cpu_sr, cpu_sr, ~SR_T);
 	    tcg_gen_or_i32(cpu_sr, cpu_sr, cpu_ldst);
 	    tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_ldst, 0, label);
 	    tcg_gen_qemu_st32(REG(0), REG(B11_8), ctx->memidx);
commit c53b36d210a55c67eca3ae89ebfdf665c0f2c3e2
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Sun Sep 16 13:12:20 2012 +0200

    target-sh4: optimize swap.w
    
    It's possible swap the two 16-bit words of a 32-bit register using a
    rotation. If the TCG target doesn't implement rotation, the replacement
    code is similar to the previously implemented code.
    
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/target-sh4/translate.c b/target-sh4/translate.c
index 92c5a1f..9ecbe47 100644
--- a/target-sh4/translate.c
+++ b/target-sh4/translate.c
@@ -732,17 +732,7 @@ static void _decode_opc(DisasContext * ctx)
 	}
 	return;
     case 0x6009:		/* swap.w Rm,Rn */
-	{
-	    TCGv high, low;
-	    high = tcg_temp_new();
-	    tcg_gen_shli_i32(high, REG(B7_4), 16);
-	    low = tcg_temp_new();
-	    tcg_gen_shri_i32(low, REG(B7_4), 16);
-	    tcg_gen_ext16u_i32(low, low);
-	    tcg_gen_or_i32(REG(B11_8), high, low);
-	    tcg_temp_free(low);
-	    tcg_temp_free(high);
-	}
+        tcg_gen_rotli_i32(REG(B11_8), REG(B7_4), 16);
 	return;
     case 0x200d:		/* xtrct Rm,Rn */
 	{
commit f16640f47bb0b8f907fff08a2e8d1c891ce82480
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Sun Sep 16 13:12:20 2012 +0200

    target-sh4: optimize xtrct
    
    The register being 32 bit long, after a shift to the right by 16 bits,
    the upper 16 bit are already cleared. There is no need to call ext16u
    to clear them.
    
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/target-sh4/translate.c b/target-sh4/translate.c
index 41a1f22..92c5a1f 100644
--- a/target-sh4/translate.c
+++ b/target-sh4/translate.c
@@ -751,7 +751,6 @@ static void _decode_opc(DisasContext * ctx)
 	    tcg_gen_shli_i32(high, REG(B7_4), 16);
 	    low = tcg_temp_new();
 	    tcg_gen_shri_i32(low, REG(B11_8), 16);
-	    tcg_gen_ext16u_i32(low, low);
 	    tcg_gen_or_i32(REG(B11_8), high, low);
 	    tcg_temp_free(low);
 	    tcg_temp_free(high);
commit ad8d25a11f13cb8acc69558d03cd69202f5a3cc2
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Sun Sep 16 13:12:20 2012 +0200

    target-sh4: implement addv and subv using TCG
    
    addv and subv helpers implementation is directly copied from the SH4
    manual and looks quite complex. It is however possible to explain it
    without branches, and is therefore possible to implement it with TCG.
    
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/target-sh4/helper.h b/target-sh4/helper.h
index 92d6dd7..a00b7dd 100644
--- a/target-sh4/helper.h
+++ b/target-sh4/helper.h
@@ -13,8 +13,6 @@ DEF_HELPER_3(movcal, void, env, i32, i32)
 DEF_HELPER_1(discard_movcal_backup, void, env)
 DEF_HELPER_2(ocbi, void, env, i32)
 
-DEF_HELPER_3(addv, i32, env, i32, i32)
-DEF_HELPER_3(subv, i32, env, i32, i32)
 DEF_HELPER_3(div1, i32, env, i32, i32)
 DEF_HELPER_3(macl, void, env, i32, i32)
 DEF_HELPER_3(macw, void, env, i32, i32)
diff --git a/target-sh4/op_helper.c b/target-sh4/op_helper.c
index 3ad10ba..4f1f754 100644
--- a/target-sh4/op_helper.c
+++ b/target-sh4/op_helper.c
@@ -177,35 +177,6 @@ void helper_ocbi(CPUSH4State *env, uint32_t address)
     }
 }
 
-uint32_t helper_addv(CPUSH4State *env, uint32_t arg0, uint32_t arg1)
-{
-    uint32_t dest, src, ans;
-
-    if ((int32_t) arg1 >= 0)
-	dest = 0;
-    else
-	dest = 1;
-    if ((int32_t) arg0 >= 0)
-	src = 0;
-    else
-	src = 1;
-    src += dest;
-    arg1 += arg0;
-    if ((int32_t) arg1 >= 0)
-	ans = 0;
-    else
-	ans = 1;
-    ans += dest;
-    if (src == 0 || src == 2) {
-	if (ans == 1)
-	    env->sr |= SR_T;
-	else
-	    env->sr &= ~SR_T;
-    } else
-	env->sr &= ~SR_T;
-    return arg1;
-}
-
 #define T (env->sr & SR_T)
 #define Q (env->sr & SR_Q ? 1 : 0)
 #define M (env->sr & SR_M ? 1 : 0)
@@ -359,35 +330,6 @@ void helper_macw(CPUSH4State *env, uint32_t arg0, uint32_t arg1)
     }
 }
 
-uint32_t helper_subv(CPUSH4State *env, uint32_t arg0, uint32_t arg1)
-{
-    int32_t dest, src, ans;
-
-    if ((int32_t) arg1 >= 0)
-	dest = 0;
-    else
-	dest = 1;
-    if ((int32_t) arg0 >= 0)
-	src = 0;
-    else
-	src = 1;
-    src += dest;
-    arg1 -= arg0;
-    if ((int32_t) arg1 >= 0)
-	ans = 0;
-    else
-	ans = 1;
-    ans += dest;
-    if (src == 1) {
-	if (ans == 1)
-	    env->sr |= SR_T;
-	else
-	    env->sr &= ~SR_T;
-    } else
-	env->sr &= ~SR_T;
-    return arg1;
-}
-
 static inline void set_t(CPUSH4State *env)
 {
     env->sr |= SR_T;
diff --git a/target-sh4/translate.c b/target-sh4/translate.c
index 92f9b46..41a1f22 100644
--- a/target-sh4/translate.c
+++ b/target-sh4/translate.c
@@ -781,7 +781,23 @@ static void _decode_opc(DisasContext * ctx)
         }
 	return;
     case 0x300f:		/* addv Rm,Rn */
-        gen_helper_addv(REG(B11_8), cpu_env, REG(B7_4), REG(B11_8));
+        {
+            TCGv t0, t1, t2;
+            t0 = tcg_temp_new();
+            tcg_gen_add_i32(t0, REG(B7_4), REG(B11_8));
+            t1 = tcg_temp_new();
+            tcg_gen_xor_i32(t1, t0, REG(B11_8));
+            t2 = tcg_temp_new();
+            tcg_gen_xor_i32(t2, REG(B7_4), REG(B11_8));
+            tcg_gen_andc_i32(t1, t1, t2);
+            tcg_temp_free(t2);
+            tcg_gen_shri_i32(t1, t1, 31);
+            tcg_gen_andi_i32(cpu_sr, cpu_sr, ~SR_T);
+            tcg_gen_or_i32(cpu_sr, cpu_sr, t1);
+            tcg_temp_free(t1);
+            tcg_gen_mov_i32(REG(B7_4), t0);
+            tcg_temp_free(t0);
+        }
 	return;
     case 0x2009:		/* and Rm,Rn */
 	tcg_gen_and_i32(REG(B11_8), REG(B11_8), REG(B7_4));
@@ -1050,7 +1066,23 @@ static void _decode_opc(DisasContext * ctx)
         }
 	return;
     case 0x300b:		/* subv Rm,Rn */
-        gen_helper_subv(REG(B11_8), cpu_env, REG(B7_4), REG(B11_8));
+        {
+            TCGv t0, t1, t2;
+            t0 = tcg_temp_new();
+            tcg_gen_sub_i32(t0, REG(B11_8), REG(B7_4));
+            t1 = tcg_temp_new();
+            tcg_gen_xor_i32(t1, t0, REG(B7_4));
+            t2 = tcg_temp_new();
+            tcg_gen_xor_i32(t2, REG(B11_8), REG(B7_4));
+            tcg_gen_and_i32(t1, t1, t2);
+            tcg_temp_free(t2);
+            tcg_gen_shri_i32(t1, t1, 31);
+            tcg_gen_andi_i32(cpu_sr, cpu_sr, ~SR_T);
+            tcg_gen_or_i32(cpu_sr, cpu_sr, t1);
+            tcg_temp_free(t1);
+            tcg_gen_mov_i32(REG(B11_8), t0);
+            tcg_temp_free(t0);
+        }
 	return;
     case 0x2008:		/* tst Rm,Rn */
 	{
commit 22b88fd77e2fbb8aefb6e50a6a24d670b0ecb022
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Sun Sep 16 13:12:20 2012 +0200

    target-sh4: implement addc and subc using TCG
    
    Now that setcond is available, the addc and subc can easily be
    implemented using TCG.
    
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/target-sh4/helper.h b/target-sh4/helper.h
index d498719..92d6dd7 100644
--- a/target-sh4/helper.h
+++ b/target-sh4/helper.h
@@ -14,9 +14,7 @@ DEF_HELPER_1(discard_movcal_backup, void, env)
 DEF_HELPER_2(ocbi, void, env, i32)
 
 DEF_HELPER_3(addv, i32, env, i32, i32)
-DEF_HELPER_3(addc, i32, env, i32, i32)
 DEF_HELPER_3(subv, i32, env, i32, i32)
-DEF_HELPER_3(subc, i32, env, i32, i32)
 DEF_HELPER_3(div1, i32, env, i32, i32)
 DEF_HELPER_3(macl, void, env, i32, i32)
 DEF_HELPER_3(macw, void, env, i32, i32)
diff --git a/target-sh4/op_helper.c b/target-sh4/op_helper.c
index bf835fa..3ad10ba 100644
--- a/target-sh4/op_helper.c
+++ b/target-sh4/op_helper.c
@@ -177,22 +177,6 @@ void helper_ocbi(CPUSH4State *env, uint32_t address)
     }
 }
 
-uint32_t helper_addc(CPUSH4State *env, uint32_t arg0, uint32_t arg1)
-{
-    uint32_t tmp0, tmp1;
-
-    tmp1 = arg0 + arg1;
-    tmp0 = arg1;
-    arg1 = tmp1 + (env->sr & 1);
-    if (tmp0 > tmp1)
-	env->sr |= SR_T;
-    else
-	env->sr &= ~SR_T;
-    if (tmp1 > arg1)
-	env->sr |= SR_T;
-    return arg1;
-}
-
 uint32_t helper_addv(CPUSH4State *env, uint32_t arg0, uint32_t arg1)
 {
     uint32_t dest, src, ans;
@@ -375,22 +359,6 @@ void helper_macw(CPUSH4State *env, uint32_t arg0, uint32_t arg1)
     }
 }
 
-uint32_t helper_subc(CPUSH4State *env, uint32_t arg0, uint32_t arg1)
-{
-    uint32_t tmp0, tmp1;
-
-    tmp1 = arg1 - arg0;
-    tmp0 = arg1;
-    arg1 = tmp1 - (env->sr & SR_T);
-    if (tmp0 < tmp1)
-	env->sr |= SR_T;
-    else
-	env->sr &= ~SR_T;
-    if (tmp1 < arg1)
-	env->sr |= SR_T;
-    return arg1;
-}
-
 uint32_t helper_subv(CPUSH4State *env, uint32_t arg0, uint32_t arg1)
 {
     int32_t dest, src, ans;
diff --git a/target-sh4/translate.c b/target-sh4/translate.c
index d05c74c..92f9b46 100644
--- a/target-sh4/translate.c
+++ b/target-sh4/translate.c
@@ -761,7 +761,24 @@ static void _decode_opc(DisasContext * ctx)
 	tcg_gen_add_i32(REG(B11_8), REG(B11_8), REG(B7_4));
 	return;
     case 0x300e:		/* addc Rm,Rn */
-        gen_helper_addc(REG(B11_8), cpu_env, REG(B7_4), REG(B11_8));
+        {
+            TCGv t0, t1, t2;
+            t0 = tcg_temp_new();
+            tcg_gen_andi_i32(t0, cpu_sr, SR_T);
+            t1 = tcg_temp_new();
+            tcg_gen_add_i32(t1, REG(B7_4), REG(B11_8));
+            tcg_gen_add_i32(t0, t0, t1);
+            t2 = tcg_temp_new();
+            tcg_gen_setcond_i32(TCG_COND_GTU, t2, REG(B11_8), t1);
+            tcg_gen_setcond_i32(TCG_COND_GTU, t1, t1, t0);
+            tcg_gen_or_i32(t1, t1, t2);
+            tcg_temp_free(t2);
+            tcg_gen_andi_i32(cpu_sr, cpu_sr, ~SR_T);
+            tcg_gen_or_i32(cpu_sr, cpu_sr, t1);
+            tcg_temp_free(t1);
+            tcg_gen_mov_i32(REG(B11_8), t0);
+            tcg_temp_free(t0);
+        }
 	return;
     case 0x300f:		/* addv Rm,Rn */
         gen_helper_addv(REG(B11_8), cpu_env, REG(B7_4), REG(B11_8));
@@ -1013,7 +1030,24 @@ static void _decode_opc(DisasContext * ctx)
 	tcg_gen_sub_i32(REG(B11_8), REG(B11_8), REG(B7_4));
 	return;
     case 0x300a:		/* subc Rm,Rn */
-        gen_helper_subc(REG(B11_8), cpu_env, REG(B7_4), REG(B11_8));
+        {
+            TCGv t0, t1, t2;
+            t0 = tcg_temp_new();
+            tcg_gen_andi_i32(t0, cpu_sr, SR_T);
+            t1 = tcg_temp_new();
+            tcg_gen_sub_i32(t1, REG(B11_8), REG(B7_4));
+            tcg_gen_sub_i32(t0, t1, t0);
+            t2 = tcg_temp_new();
+            tcg_gen_setcond_i32(TCG_COND_LTU, t2, REG(B11_8), t1);
+            tcg_gen_setcond_i32(TCG_COND_LTU, t1, t1, t0);
+            tcg_gen_or_i32(t1, t1, t2);
+            tcg_temp_free(t2);
+            tcg_gen_andi_i32(cpu_sr, cpu_sr, ~SR_T);
+            tcg_gen_or_i32(cpu_sr, cpu_sr, t1);
+            tcg_temp_free(t1);
+            tcg_gen_mov_i32(REG(B11_8), t0);
+            tcg_temp_free(t0);
+        }
 	return;
     case 0x300b:		/* subv Rm,Rn */
         gen_helper_subv(REG(B11_8), cpu_env, REG(B7_4), REG(B11_8));
commit ff2086fed268d2b2fedb273f6794949ed2ef9e10
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Sun Sep 16 13:12:20 2012 +0200

    target-sh4: use float32_muladd() to implement fmac
    
    There is no need to add a SH4 specific pickNaNMulAdd() to softfloat as
    SH4 is always returning a default NaN.
    
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/target-sh4/op_helper.c b/target-sh4/op_helper.c
index 9b4328d..bf835fa 100644
--- a/target-sh4/op_helper.c
+++ b/target-sh4/op_helper.c
@@ -623,8 +623,7 @@ float64 helper_float_DT(CPUSH4State *env, uint32_t t0)
 float32 helper_fmac_FT(CPUSH4State *env, float32 t0, float32 t1, float32 t2)
 {
     set_float_exception_flags(0, &env->fp_status);
-    t0 = float32_mul(t0, t1, &env->fp_status);
-    t0 = float32_add(t0, t2, &env->fp_status);
+    t0 = float32_muladd(t0, t1, t2, 0, &env->fp_status);
     update_fpscr(env, GETPC());
     return t0;
 }
commit f8e7343956c91ba6cf26bf41b5a9a94e97ad4261
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Sun Sep 16 13:12:20 2012 +0200

    target-sh4: mark a few helpers const and pure
    
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/target-sh4/helper.h b/target-sh4/helper.h
index 6e4f108..d498719 100644
--- a/target-sh4/helper.h
+++ b/target-sh4/helper.h
@@ -23,8 +23,8 @@ DEF_HELPER_3(macw, void, env, i32, i32)
 
 DEF_HELPER_2(ld_fpscr, void, env, i32)
 
-DEF_HELPER_1(fabs_FT, f32, f32)
-DEF_HELPER_1(fabs_DT, f64, f64)
+DEF_HELPER_FLAGS_1(fabs_FT, TCG_CALL_CONST | TCG_CALL_PURE, f32, f32)
+DEF_HELPER_FLAGS_1(fabs_DT, TCG_CALL_CONST | TCG_CALL_PURE, f64, f64)
 DEF_HELPER_3(fadd_FT, f32, env, f32, f32)
 DEF_HELPER_3(fadd_DT, f64, env, f64, f64)
 DEF_HELPER_2(fcnvsd_FT_DT, f64, env, f32)
@@ -41,7 +41,7 @@ DEF_HELPER_2(float_DT, f64, env, i32)
 DEF_HELPER_4(fmac_FT, f32, env, f32, f32, f32)
 DEF_HELPER_3(fmul_FT, f32, env, f32, f32)
 DEF_HELPER_3(fmul_DT, f64, env, f64, f64)
-DEF_HELPER_1(fneg_T, f32, f32)
+DEF_HELPER_FLAGS_1(fneg_T, TCG_CALL_CONST | TCG_CALL_PURE, f32, f32)
 DEF_HELPER_3(fsub_FT, f32, env, f32, f32)
 DEF_HELPER_3(fsub_DT, f64, env, f64, f64)
 DEF_HELPER_2(fsqrt_FT, f32, env, f32)
commit 72a04d0c178f01908d74539230d9de64ffc6da19
Author: Hans de Goede <hdegoede at redhat.com>
Date:   Wed Sep 12 15:08:40 2012 +0200

    uhci: Don't queue up packets after one with the SPD flag set
    
    Don't queue up packets after a packet with the SPD (short packet detect)
    flag set. Since we won't know if the packet will actually be short until it
    has completed, and if it is short we should stop the queue.
    
    This fixes a miniature photoframe emulating a USB cdrom with the windows
    software for it not working.
    
    Signed-off-by: Hans de Goede <hdegoede at redhat.com>
    Signed-off-by: Gerd Hoffmann <kraxel at redhat.com>

diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c
index c7c8786..cdc8bc3 100644
--- a/hw/usb/hcd-uhci.c
+++ b/hw/usb/hcd-uhci.c
@@ -1000,6 +1000,9 @@ static void uhci_fill_queue(UHCIState *s, UHCI_TD *td)
         }
         assert(ret == TD_RESULT_ASYNC_START);
         assert(int_mask == 0);
+        if (ptd.ctrl & TD_CTRL_SPD) {
+            break;
+        }
         plink = ptd.link;
     }
 }
@@ -1097,7 +1100,7 @@ static void uhci_process_frame(UHCIState *s)
 
         case TD_RESULT_ASYNC_START:
             trace_usb_uhci_td_async(curr_qh & ~0xf, link & ~0xf);
-            if (is_valid(td.link)) {
+            if (is_valid(td.link) && !(td.ctrl & TD_CTRL_SPD)) {
                 uhci_fill_queue(s, &td);
             }
             link = curr_qh ? qh.link : td.link;
commit 35efba2cc6812dc980c336d7b9bf81dbfb5daf00
Author: Hans de Goede <hdegoede at redhat.com>
Date:   Wed Sep 12 15:08:39 2012 +0200

    usb-redir: Revert usb-redir part of commit 93bfef4c
    
    Commit 93bfef4c6e4b23caea9d51e1099d06433d8835a4 makes qemu-devices
    which report the qemu version string to the guest in some way use a
    qemu_get_version function which reports a machine-specific version string.
    
    However usb-redir does not expose the qemu version to the guest, only to
    the usbredir-host as part of the initial handshake. This can then be logged
    on the usbredir-host side for debugging purposes and is otherwise completely
    unused! For debugging purposes it is important to have the real qemu version
    in there, rather then the machine-specific version.
    
    Signed-off-by: Hans de Goede <hdegoede at redhat.com>
    Signed-off-by: Gerd Hoffmann <kraxel at redhat.com>

diff --git a/hw/usb/redirect.c b/hw/usb/redirect.c
index 9a4be6d..b10241a 100644
--- a/hw/usb/redirect.c
+++ b/hw/usb/redirect.c
@@ -142,6 +142,8 @@ static void usbredir_interrupt_packet(void *priv, uint64_t id,
 static int usbredir_handle_status(USBRedirDevice *dev,
                                        int status, int actual_len);
 
+#define VERSION "qemu usb-redir guest " QEMU_VERSION
+
 /*
  * Logging stuff
  */
@@ -863,7 +865,6 @@ static void usbredir_chardev_close_bh(void *opaque)
 static void usbredir_chardev_open(USBRedirDevice *dev)
 {
     uint32_t caps[USB_REDIR_CAPS_SIZE] = { 0, };
-    char version[32];
     int flags = 0;
 
     /* Make sure any pending closes are handled (no-op if none pending) */
@@ -872,9 +873,6 @@ static void usbredir_chardev_open(USBRedirDevice *dev)
 
     DPRINTF("creating usbredirparser\n");
 
-    strcpy(version, "qemu usb-redir guest ");
-    pstrcat(version, sizeof(version), qemu_get_version());
-
     dev->parser = qemu_oom_check(usbredirparser_create());
     dev->parser->priv = dev;
     dev->parser->log_func = usbredir_log;
@@ -906,7 +904,7 @@ static void usbredir_chardev_open(USBRedirDevice *dev)
     if (runstate_check(RUN_STATE_INMIGRATE)) {
         flags |= usbredirparser_fl_no_hello;
     }
-    usbredirparser_init(dev->parser, version, caps, USB_REDIR_CAPS_SIZE,
+    usbredirparser_init(dev->parser, VERSION, caps, USB_REDIR_CAPS_SIZE,
                         flags);
     usbredirparser_do_write(dev->parser);
 }
commit 09054d19e72f6991cd359d41c479badd92f62a06
Author: Hans de Goede <hdegoede at redhat.com>
Date:   Wed Sep 12 15:08:38 2012 +0200

    usb-redir: Add chardev open / close debug logging
    
    Signed-off-by: Hans de Goede <hdegoede at redhat.com>
    Signed-off-by: Gerd Hoffmann <kraxel at redhat.com>

diff --git a/hw/usb/redirect.c b/hw/usb/redirect.c
index 5e987e9..9a4be6d 100644
--- a/hw/usb/redirect.c
+++ b/hw/usb/redirect.c
@@ -854,6 +854,7 @@ static void usbredir_chardev_close_bh(void *opaque)
     usbredir_device_disconnect(dev);
 
     if (dev->parser) {
+        DPRINTF("destroying usbredirparser\n");
         usbredirparser_destroy(dev->parser);
         dev->parser = NULL;
     }
@@ -869,6 +870,8 @@ static void usbredir_chardev_open(USBRedirDevice *dev)
     usbredir_chardev_close_bh(dev);
     qemu_bh_cancel(dev->chardev_close_bh);
 
+    DPRINTF("creating usbredirparser\n");
+
     strcpy(version, "qemu usb-redir guest ");
     pstrcat(version, sizeof(version), qemu_get_version());
 
@@ -980,9 +983,11 @@ static void usbredir_chardev_event(void *opaque, int event)
 
     switch (event) {
     case CHR_EVENT_OPENED:
+        DPRINTF("chardev open\n");
         usbredir_chardev_open(dev);
         break;
     case CHR_EVENT_CLOSED:
+        DPRINTF("chardev close\n");
         qemu_bh_schedule(dev->chardev_close_bh);
         break;
     }
@@ -1228,6 +1233,7 @@ static void usbredir_device_disconnect(void *priv)
     qemu_del_timer(dev->attach_timer);
 
     if (dev->dev.attached) {
+        DPRINTF("detaching device\n");
         usb_device_detach(&dev->dev);
         /*
          * Delay next usb device attach to give the guest a chance to see
commit fc3f6e1b106abcf6b8cf487ac8f8e5fc2fd86776
Author: Hans de Goede <hdegoede at redhat.com>
Date:   Wed Sep 12 15:08:37 2012 +0200

    usb-redir: Add support for migration
    
    Signed-off-by: Hans de Goede <hdegoede at redhat.com>
    Signed-off-by: Gerd Hoffmann <kraxel at redhat.com>

diff --git a/hw/usb/redirect.c b/hw/usb/redirect.c
index 3196665..5e987e9 100644
--- a/hw/usb/redirect.c
+++ b/hw/usb/redirect.c
@@ -65,8 +65,8 @@ struct endp_data {
     uint8_t bufpq_prefilled;
     uint8_t bufpq_dropping_packets;
     QTAILQ_HEAD(, buf_packet) bufpq;
-    int bufpq_size;
-    int bufpq_target_size;
+    int32_t bufpq_size;
+    int32_t bufpq_target_size;
 };
 
 struct PacketIdQueueEntry {
@@ -240,6 +240,11 @@ static int usbredir_write(void *priv, uint8_t *data, int count)
         return 0;
     }
 
+    /* Don't send new data to the chardev until our state is fully synced */
+    if (!runstate_check(RUN_STATE_RUNNING)) {
+        return 0;
+    }
+
     return qemu_chr_fe_write(dev->cs, data, count);
 }
 
@@ -858,6 +863,7 @@ static void usbredir_chardev_open(USBRedirDevice *dev)
 {
     uint32_t caps[USB_REDIR_CAPS_SIZE] = { 0, };
     char version[32];
+    int flags = 0;
 
     /* Make sure any pending closes are handled (no-op if none pending) */
     usbredir_chardev_close_bh(dev);
@@ -893,7 +899,12 @@ static void usbredir_chardev_open(USBRedirDevice *dev)
     usbredirparser_caps_set_cap(caps, usb_redir_cap_filter);
     usbredirparser_caps_set_cap(caps, usb_redir_cap_ep_info_max_packet_size);
     usbredirparser_caps_set_cap(caps, usb_redir_cap_64bits_ids);
-    usbredirparser_init(dev->parser, version, caps, USB_REDIR_CAPS_SIZE, 0);
+
+    if (runstate_check(RUN_STATE_INMIGRATE)) {
+        flags |= usbredirparser_fl_no_hello;
+    }
+    usbredirparser_init(dev->parser, version, caps, USB_REDIR_CAPS_SIZE,
+                        flags);
     usbredirparser_do_write(dev->parser);
 }
 
@@ -939,6 +950,11 @@ static int usbredir_chardev_can_read(void *opaque)
         return 0;
     }
 
+    /* Don't read new data from the chardev until our state is fully synced */
+    if (!runstate_check(RUN_STATE_RUNNING)) {
+        return 0;
+    }
+
     /* usbredir_parser_do_read will consume *all* data we give it */
     return 1024 * 1024;
 }
@@ -976,6 +992,15 @@ static void usbredir_chardev_event(void *opaque, int event)
  * init + destroy
  */
 
+static void usbredir_vm_state_change(void *priv, int running, RunState state)
+{
+    USBRedirDevice *dev = priv;
+
+    if (state == RUN_STATE_RUNNING && dev->parser != NULL) {
+        usbredirparser_do_write(dev->parser); /* Flush any pending writes */
+    }
+}
+
 static int usbredir_initfn(USBDevice *udev)
 {
     USBRedirDevice *dev = DO_UPCAST(USBRedirDevice, dev, udev);
@@ -1014,6 +1039,7 @@ static int usbredir_initfn(USBDevice *udev)
     qemu_chr_add_handlers(dev->cs, usbredir_chardev_can_read,
                           usbredir_chardev_read, usbredir_chardev_event, dev);
 
+    qemu_add_vm_change_state_handler(usbredir_vm_state_change, dev);
     add_boot_device_path(dev->bootindex, &udev->qdev, NULL);
     return 0;
 }
@@ -1503,6 +1529,322 @@ static void usbredir_interrupt_packet(void *priv, uint64_t id,
     }
 }
 
+/*
+ * Migration code
+ */
+
+static void usbredir_pre_save(void *priv)
+{
+    USBRedirDevice *dev = priv;
+
+    usbredir_fill_already_in_flight(dev);
+}
+
+static int usbredir_post_load(void *priv, int version_id)
+{
+    USBRedirDevice *dev = priv;
+    struct USBEndpoint *usb_ep;
+    int i;
+
+    switch (dev->device_info.speed) {
+    case usb_redir_speed_low:
+        dev->dev.speed = USB_SPEED_LOW;
+        break;
+    case usb_redir_speed_full:
+        dev->dev.speed = USB_SPEED_FULL;
+        break;
+    case usb_redir_speed_high:
+        dev->dev.speed = USB_SPEED_HIGH;
+        break;
+    case usb_redir_speed_super:
+        dev->dev.speed = USB_SPEED_SUPER;
+        break;
+    default:
+        dev->dev.speed = USB_SPEED_FULL;
+    }
+    dev->dev.speedmask = (1 << dev->dev.speed);
+
+    for (i = 0; i < MAX_ENDPOINTS; i++) {
+        usb_ep = usb_ep_get(&dev->dev,
+                            (i & 0x10) ? USB_TOKEN_IN : USB_TOKEN_OUT,
+                            i & 0x0f);
+        usb_ep->type = dev->endpoint[i].type;
+        usb_ep->ifnum = dev->endpoint[i].interface;
+        usb_ep->max_packet_size = dev->endpoint[i].max_packet_size;
+        if (dev->endpoint[i].type == usb_redir_type_bulk) {
+            usb_ep->pipeline = true;
+        }
+    }
+    return 0;
+}
+
+/* For usbredirparser migration */
+static void usbredir_put_parser(QEMUFile *f, void *priv, size_t unused)
+{
+    USBRedirDevice *dev = priv;
+    uint8_t *data;
+    int len;
+
+    if (dev->parser == NULL) {
+        qemu_put_be32(f, 0);
+        return;
+    }
+
+    usbredirparser_serialize(dev->parser, &data, &len);
+    qemu_oom_check(data);
+
+    qemu_put_be32(f, len);
+    qemu_put_buffer(f, data, len);
+
+    free(data);
+}
+
+static int usbredir_get_parser(QEMUFile *f, void *priv, size_t unused)
+{
+    USBRedirDevice *dev = priv;
+    uint8_t *data;
+    int len, ret;
+
+    len = qemu_get_be32(f);
+    if (len == 0) {
+        return 0;
+    }
+
+    /*
+     * Our chardev should be open already at this point, otherwise
+     * the usbredir channel will be broken (ie spice without seamless)
+     */
+    if (dev->parser == NULL) {
+        ERROR("get_parser called with closed chardev, failing migration\n");
+        return -1;
+    }
+
+    data = g_malloc(len);
+    qemu_get_buffer(f, data, len);
+
+    ret = usbredirparser_unserialize(dev->parser, data, len);
+
+    g_free(data);
+
+    return ret;
+}
+
+static const VMStateInfo usbredir_parser_vmstate_info = {
+    .name = "usb-redir-parser",
+    .put  = usbredir_put_parser,
+    .get  = usbredir_get_parser,
+};
+
+
+/* For buffered packets (iso/irq) queue migration */
+static void usbredir_put_bufpq(QEMUFile *f, void *priv, size_t unused)
+{
+    struct endp_data *endp = priv;
+    struct buf_packet *bufp;
+    int remain = endp->bufpq_size;
+
+    qemu_put_be32(f, endp->bufpq_size);
+    QTAILQ_FOREACH(bufp, &endp->bufpq, next) {
+        qemu_put_be32(f, bufp->len);
+        qemu_put_be32(f, bufp->status);
+        qemu_put_buffer(f, bufp->data, bufp->len);
+        remain--;
+    }
+    assert(remain == 0);
+}
+
+static int usbredir_get_bufpq(QEMUFile *f, void *priv, size_t unused)
+{
+    struct endp_data *endp = priv;
+    struct buf_packet *bufp;
+    int i;
+
+    endp->bufpq_size = qemu_get_be32(f);
+    for (i = 0; i < endp->bufpq_size; i++) {
+        bufp = g_malloc(sizeof(struct buf_packet));
+        bufp->len = qemu_get_be32(f);
+        bufp->status = qemu_get_be32(f);
+        bufp->data = qemu_oom_check(malloc(bufp->len)); /* regular malloc! */
+        qemu_get_buffer(f, bufp->data, bufp->len);
+        QTAILQ_INSERT_TAIL(&endp->bufpq, bufp, next);
+    }
+    return 0;
+}
+
+static const VMStateInfo usbredir_ep_bufpq_vmstate_info = {
+    .name = "usb-redir-bufpq",
+    .put  = usbredir_put_bufpq,
+    .get  = usbredir_get_bufpq,
+};
+
+
+/* For endp_data migration */
+static const VMStateDescription usbredir_ep_vmstate = {
+    .name = "usb-redir-ep",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT8(type, struct endp_data),
+        VMSTATE_UINT8(interval, struct endp_data),
+        VMSTATE_UINT8(interface, struct endp_data),
+        VMSTATE_UINT16(max_packet_size, struct endp_data),
+        VMSTATE_UINT8(iso_started, struct endp_data),
+        VMSTATE_UINT8(iso_error, struct endp_data),
+        VMSTATE_UINT8(interrupt_started, struct endp_data),
+        VMSTATE_UINT8(interrupt_error, struct endp_data),
+        VMSTATE_UINT8(bufpq_prefilled, struct endp_data),
+        VMSTATE_UINT8(bufpq_dropping_packets, struct endp_data),
+        {
+            .name         = "bufpq",
+            .version_id   = 0,
+            .field_exists = NULL,
+            .size         = 0,
+            .info         = &usbredir_ep_bufpq_vmstate_info,
+            .flags        = VMS_SINGLE,
+            .offset       = 0,
+        },
+        VMSTATE_INT32(bufpq_target_size, struct endp_data),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+
+/* For PacketIdQueue migration */
+static void usbredir_put_packet_id_q(QEMUFile *f, void *priv, size_t unused)
+{
+    struct PacketIdQueue *q = priv;
+    USBRedirDevice *dev = q->dev;
+    struct PacketIdQueueEntry *e;
+    int remain = q->size;
+
+    DPRINTF("put_packet_id_q %s size %d\n", q->name, q->size);
+    qemu_put_be32(f, q->size);
+    QTAILQ_FOREACH(e, &q->head, next) {
+        qemu_put_be64(f, e->id);
+        remain--;
+    }
+    assert(remain == 0);
+}
+
+static int usbredir_get_packet_id_q(QEMUFile *f, void *priv, size_t unused)
+{
+    struct PacketIdQueue *q = priv;
+    USBRedirDevice *dev = q->dev;
+    int i, size;
+    uint64_t id;
+
+    size = qemu_get_be32(f);
+    DPRINTF("get_packet_id_q %s size %d\n", q->name, size);
+    for (i = 0; i < size; i++) {
+        id = qemu_get_be64(f);
+        packet_id_queue_add(q, id);
+    }
+    assert(q->size == size);
+    return 0;
+}
+
+static const VMStateInfo usbredir_ep_packet_id_q_vmstate_info = {
+    .name = "usb-redir-packet-id-q",
+    .put  = usbredir_put_packet_id_q,
+    .get  = usbredir_get_packet_id_q,
+};
+
+static const VMStateDescription usbredir_ep_packet_id_queue_vmstate = {
+    .name = "usb-redir-packet-id-queue",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        {
+            .name         = "queue",
+            .version_id   = 0,
+            .field_exists = NULL,
+            .size         = 0,
+            .info         = &usbredir_ep_packet_id_q_vmstate_info,
+            .flags        = VMS_SINGLE,
+            .offset       = 0,
+        },
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+
+/* For usb_redir_device_connect_header migration */
+static const VMStateDescription usbredir_device_info_vmstate = {
+    .name = "usb-redir-device-info",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT8(speed, struct usb_redir_device_connect_header),
+        VMSTATE_UINT8(device_class, struct usb_redir_device_connect_header),
+        VMSTATE_UINT8(device_subclass, struct usb_redir_device_connect_header),
+        VMSTATE_UINT8(device_protocol, struct usb_redir_device_connect_header),
+        VMSTATE_UINT16(vendor_id, struct usb_redir_device_connect_header),
+        VMSTATE_UINT16(product_id, struct usb_redir_device_connect_header),
+        VMSTATE_UINT16(device_version_bcd,
+                       struct usb_redir_device_connect_header),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+
+/* For usb_redir_interface_info_header migration */
+static const VMStateDescription usbredir_interface_info_vmstate = {
+    .name = "usb-redir-interface-info",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32(interface_count,
+                       struct usb_redir_interface_info_header),
+        VMSTATE_UINT8_ARRAY(interface,
+                            struct usb_redir_interface_info_header, 32),
+        VMSTATE_UINT8_ARRAY(interface_class,
+                            struct usb_redir_interface_info_header, 32),
+        VMSTATE_UINT8_ARRAY(interface_subclass,
+                            struct usb_redir_interface_info_header, 32),
+        VMSTATE_UINT8_ARRAY(interface_protocol,
+                            struct usb_redir_interface_info_header, 32),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+
+/* And finally the USBRedirDevice vmstate itself */
+static const VMStateDescription usbredir_vmstate = {
+    .name = "usb-redir",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .pre_save = usbredir_pre_save,
+    .post_load = usbredir_post_load,
+    .fields = (VMStateField[]) {
+        VMSTATE_USB_DEVICE(dev, USBRedirDevice),
+        VMSTATE_TIMER(attach_timer, USBRedirDevice),
+        {
+            .name         = "parser",
+            .version_id   = 0,
+            .field_exists = NULL,
+            .size         = 0,
+            .info         = &usbredir_parser_vmstate_info,
+            .flags        = VMS_SINGLE,
+            .offset       = 0,
+        },
+        VMSTATE_STRUCT_ARRAY(endpoint, USBRedirDevice, MAX_ENDPOINTS, 1,
+                             usbredir_ep_vmstate, struct endp_data),
+        VMSTATE_STRUCT(cancelled, USBRedirDevice, 1,
+                       usbredir_ep_packet_id_queue_vmstate,
+                       struct PacketIdQueue),
+        VMSTATE_STRUCT(already_in_flight, USBRedirDevice, 1,
+                       usbredir_ep_packet_id_queue_vmstate,
+                       struct PacketIdQueue),
+        VMSTATE_STRUCT(device_info, USBRedirDevice, 1,
+                       usbredir_device_info_vmstate,
+                       struct usb_redir_device_connect_header),
+        VMSTATE_STRUCT(interface_info, USBRedirDevice, 1,
+                       usbredir_interface_info_vmstate,
+                       struct usb_redir_interface_info_header),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
 static Property usbredir_properties[] = {
     DEFINE_PROP_CHR("chardev", USBRedirDevice, cs),
     DEFINE_PROP_UINT8("debug", USBRedirDevice, debug, 0),
@@ -1523,6 +1865,7 @@ static void usbredir_class_initfn(ObjectClass *klass, void *data)
     uc->handle_reset   = usbredir_handle_reset;
     uc->handle_data    = usbredir_handle_data;
     uc->handle_control = usbredir_handle_control;
+    dc->vmsd           = &usbredir_vmstate;
     dc->props          = usbredir_properties;
 }
 
commit 3f4be32824ccc5f6a5566b93c900df989a96d048
Author: Hans de Goede <hdegoede at redhat.com>
Date:   Wed Sep 12 15:08:36 2012 +0200

    usb-redir: Store max_packet_size in endp_data
    
    So that we've a place to migrate it to / from to allow restoring it after
    migration.
    
    Signed-off-by: Hans de Goede <hdegoede at redhat.com>
    Signed-off-by: Gerd Hoffmann <kraxel at redhat.com>

diff --git a/hw/usb/redirect.c b/hw/usb/redirect.c
index f474da8..3196665 100644
--- a/hw/usb/redirect.c
+++ b/hw/usb/redirect.c
@@ -57,6 +57,7 @@ struct endp_data {
     uint8_t type;
     uint8_t interval;
     uint8_t interface; /* bInterfaceNumber this ep belongs to */
+    uint16_t max_packet_size; /* In bytes, not wMaxPacketSize format !! */
     uint8_t iso_started;
     uint8_t iso_error; /* For reporting iso errors to the HC */
     uint8_t interrupt_started;
@@ -1278,7 +1279,8 @@ static void usbredir_ep_info(void *priv,
         usb_ep->ifnum = dev->endpoint[i].interface;
         if (usbredirparser_peer_has_cap(dev->parser,
                                      usb_redir_cap_ep_info_max_packet_size)) {
-            usb_ep->max_packet_size = ep_info->max_packet_size[i];
+            dev->endpoint[i].max_packet_size =
+                usb_ep->max_packet_size = ep_info->max_packet_size[i];
         }
         if (ep_info->type[i] == usb_redir_type_bulk) {
             usb_ep->pipeline = true;
commit 9a8d4067a63702dfb62d0ae3399600a853ad5daa
Author: Hans de Goede <hdegoede at redhat.com>
Date:   Wed Sep 12 15:08:35 2012 +0200

    usb-redir: Add an already_in_flight packet-id queue
    
    After a live migration, the usb-hcd will re-queue all packets by
    walking over the schedule in the guest memory again, but requests which
    were encountered on the migration source before will already be in flight,
    so these should *not* be re-send to the usbredir-host.
    
    This patch adds an already in flight packet ud queue, which will be filled by
    the source before migration and then moved over to the migration dest, any
    async handled packets are then checked against this queue to avoid sending
    the same packet to the usbredir-host twice.
    
    Signed-off-by: Hans de Goede <hdegoede at redhat,com>
    Signed-off-by: Gerd Hoffmann <kraxel at redhat.com>

diff --git a/hw/usb/redirect.c b/hw/usb/redirect.c
index 603262a..f474da8 100644
--- a/hw/usb/redirect.c
+++ b/hw/usb/redirect.c
@@ -98,6 +98,7 @@ struct USBRedirDevice {
     struct usbredirparser *parser;
     struct endp_data endpoint[MAX_ENDPOINTS];
     struct PacketIdQueue cancelled;
+    struct PacketIdQueue already_in_flight;
     /* Data for device filtering */
     struct usb_redir_device_connect_header device_info;
     struct usb_redir_interface_info_header interface_info;
@@ -316,6 +317,34 @@ static int usbredir_is_cancelled(USBRedirDevice *dev, uint64_t id)
     return packet_id_queue_remove(&dev->cancelled, id);
 }
 
+static void usbredir_fill_already_in_flight_from_ep(USBRedirDevice *dev,
+    struct USBEndpoint *ep)
+{
+    static USBPacket *p;
+
+    QTAILQ_FOREACH(p, &ep->queue, queue) {
+        packet_id_queue_add(&dev->already_in_flight, p->id);
+    }
+}
+
+static void usbredir_fill_already_in_flight(USBRedirDevice *dev)
+{
+    int ep;
+    struct USBDevice *udev = &dev->dev;
+
+    usbredir_fill_already_in_flight_from_ep(dev, &udev->ep_ctl);
+
+    for (ep = 0; ep < USB_MAX_ENDPOINTS; ep++) {
+        usbredir_fill_already_in_flight_from_ep(dev, &udev->ep_in[ep]);
+        usbredir_fill_already_in_flight_from_ep(dev, &udev->ep_out[ep]);
+    }
+}
+
+static int usbredir_already_in_flight(USBRedirDevice *dev, uint64_t id)
+{
+    return packet_id_queue_remove(&dev->already_in_flight, id);
+}
+
 static USBPacket *usbredir_find_packet_by_id(USBRedirDevice *dev,
     uint8_t ep, uint64_t id)
 {
@@ -531,6 +560,10 @@ static int usbredir_handle_bulk_data(USBRedirDevice *dev, USBPacket *p,
 
     DPRINTF("bulk-out ep %02X len %zd id %"PRIu64"\n", ep, p->iov.size, p->id);
 
+    if (usbredir_already_in_flight(dev, p->id)) {
+        return USB_RET_ASYNC;
+    }
+
     bulk_packet.endpoint  = ep;
     bulk_packet.length    = p->iov.size;
     bulk_packet.stream_id = 0;
@@ -611,6 +644,10 @@ static int usbredir_handle_interrupt_data(USBRedirDevice *dev,
         DPRINTF("interrupt-out ep %02X len %zd id %"PRIu64"\n", ep,
                 p->iov.size, p->id);
 
+        if (usbredir_already_in_flight(dev, p->id)) {
+            return USB_RET_ASYNC;
+        }
+
         interrupt_packet.endpoint  = ep;
         interrupt_packet.length    = p->iov.size;
 
@@ -753,6 +790,10 @@ static int usbredir_handle_control(USBDevice *udev, USBPacket *p,
     USBRedirDevice *dev = DO_UPCAST(USBRedirDevice, dev, udev);
     struct usb_redir_control_packet_header control_packet;
 
+    if (usbredir_already_in_flight(dev, p->id)) {
+        return USB_RET_ASYNC;
+    }
+
     /* Special cases for certain standard device requests */
     switch (request) {
     case DeviceOutRequest | USB_REQ_SET_ADDRESS:
@@ -959,6 +1000,7 @@ static int usbredir_initfn(USBDevice *udev)
     dev->attach_timer = qemu_new_timer_ms(vm_clock, usbredir_do_attach, dev);
 
     packet_id_queue_init(&dev->cancelled, dev, "cancelled");
+    packet_id_queue_init(&dev->already_in_flight, dev, "already-in-flight");
     for (i = 0; i < MAX_ENDPOINTS; i++) {
         QTAILQ_INIT(&dev->endpoint[i].bufpq);
     }
@@ -980,6 +1022,7 @@ static void usbredir_cleanup_device_queues(USBRedirDevice *dev)
     int i;
 
     packet_id_queue_empty(&dev->cancelled);
+    packet_id_queue_empty(&dev->already_in_flight);
     for (i = 0; i < MAX_ENDPOINTS; i++) {
         usbredir_free_bufpq(dev, I2EP(i));
     }
commit 8e60452a954978f2f52026aa5f5cfd948e4e2be0
Author: Hans de Goede <hdegoede at redhat.com>
Date:   Wed Sep 12 15:08:34 2012 +0200

    usb-redir: Change cancelled packet code into a generic packet-id queue
    
    Signed-off-by: Hans de Goede <hdegoede at redhat.com>
    Signed-off-by: Gerd Hoffmann <kraxel at redhat.com>

diff --git a/hw/usb/redirect.c b/hw/usb/redirect.c
index 5301a69..603262a 100644
--- a/hw/usb/redirect.c
+++ b/hw/usb/redirect.c
@@ -43,7 +43,6 @@
 #define EP2I(ep_address) (((ep_address & 0x80) >> 3) | (ep_address & 0x0f))
 #define I2EP(i) (((i & 0x10) << 3) | (i & 0x0f))
 
-typedef struct Cancelled Cancelled;
 typedef struct USBRedirDevice USBRedirDevice;
 
 /* Struct to hold buffered packets (iso or int input packets) */
@@ -69,6 +68,18 @@ struct endp_data {
     int bufpq_target_size;
 };
 
+struct PacketIdQueueEntry {
+    uint64_t id;
+    QTAILQ_ENTRY(PacketIdQueueEntry)next;
+};
+
+struct PacketIdQueue {
+    USBRedirDevice *dev;
+    const char *name;
+    QTAILQ_HEAD(, PacketIdQueueEntry) head;
+    int size;
+};
+
 struct USBRedirDevice {
     USBDevice dev;
     /* Properties */
@@ -86,7 +97,7 @@ struct USBRedirDevice {
     int64_t next_attach_time;
     struct usbredirparser *parser;
     struct endp_data endpoint[MAX_ENDPOINTS];
-    QTAILQ_HEAD(, Cancelled) cancelled;
+    struct PacketIdQueue cancelled;
     /* Data for device filtering */
     struct usb_redir_device_connect_header device_info;
     struct usb_redir_interface_info_header interface_info;
@@ -94,11 +105,6 @@ struct USBRedirDevice {
     int filter_rules_count;
 };
 
-struct Cancelled {
-    uint64_t id;
-    QTAILQ_ENTRY(Cancelled)next;
-};
-
 static void usbredir_hello(void *priv, struct usb_redir_hello_header *h);
 static void usbredir_device_connect(void *priv,
     struct usb_redir_device_connect_header *device_connect);
@@ -239,37 +245,75 @@ static int usbredir_write(void *priv, uint8_t *data, int count)
  * Cancelled and buffered packets helpers
  */
 
-static void usbredir_cancel_packet(USBDevice *udev, USBPacket *p)
+static void packet_id_queue_init(struct PacketIdQueue *q,
+    USBRedirDevice *dev, const char *name)
 {
-    USBRedirDevice *dev = DO_UPCAST(USBRedirDevice, dev, udev);
-    Cancelled *c;
+    q->dev = dev;
+    q->name = name;
+    QTAILQ_INIT(&q->head);
+    q->size = 0;
+}
+
+static void packet_id_queue_add(struct PacketIdQueue *q, uint64_t id)
+{
+    USBRedirDevice *dev = q->dev;
+    struct PacketIdQueueEntry *e;
+
+    DPRINTF("adding packet id %"PRIu64" to %s queue\n", id, q->name);
+
+    e = g_malloc0(sizeof(struct PacketIdQueueEntry));
+    e->id = id;
+    QTAILQ_INSERT_TAIL(&q->head, e, next);
+    q->size++;
+}
+
+static int packet_id_queue_remove(struct PacketIdQueue *q, uint64_t id)
+{
+    USBRedirDevice *dev = q->dev;
+    struct PacketIdQueueEntry *e;
+
+    QTAILQ_FOREACH(e, &q->head, next) {
+        if (e->id == id) {
+            DPRINTF("removing packet id %"PRIu64" from %s queue\n",
+                    id, q->name);
+            QTAILQ_REMOVE(&q->head, e, next);
+            q->size--;
+            g_free(e);
+            return 1;
+        }
+    }
+    return 0;
+}
+
+static void packet_id_queue_empty(struct PacketIdQueue *q)
+{
+    USBRedirDevice *dev = q->dev;
+    struct PacketIdQueueEntry *e, *next_e;
 
-    DPRINTF("cancel packet id %"PRIu64"\n", p->id);
+    DPRINTF("removing %d packet-ids from %s queue\n", q->size, q->name);
 
-    c = g_malloc0(sizeof(Cancelled));
-    c->id = p->id;
-    QTAILQ_INSERT_TAIL(&dev->cancelled, c, next);
+    QTAILQ_FOREACH_SAFE(e, &q->head, next, next_e) {
+        QTAILQ_REMOVE(&q->head, e, next);
+        g_free(e);
+    }
+    q->size = 0;
+}
 
+static void usbredir_cancel_packet(USBDevice *udev, USBPacket *p)
+{
+    USBRedirDevice *dev = DO_UPCAST(USBRedirDevice, dev, udev);
+
+    packet_id_queue_add(&dev->cancelled, p->id);
     usbredirparser_send_cancel_data_packet(dev->parser, p->id);
     usbredirparser_do_write(dev->parser);
 }
 
 static int usbredir_is_cancelled(USBRedirDevice *dev, uint64_t id)
 {
-    Cancelled *c;
-
     if (!dev->dev.attached) {
         return 1; /* Treat everything as cancelled after a disconnect */
     }
-
-    QTAILQ_FOREACH(c, &dev->cancelled, next) {
-        if (c->id == id) {
-            QTAILQ_REMOVE(&dev->cancelled, c, next);
-            g_free(c);
-            return 1;
-        }
-    }
-    return 0;
+    return packet_id_queue_remove(&dev->cancelled, id);
 }
 
 static USBPacket *usbredir_find_packet_by_id(USBRedirDevice *dev,
@@ -914,7 +958,7 @@ static int usbredir_initfn(USBDevice *udev)
     dev->chardev_close_bh = qemu_bh_new(usbredir_chardev_close_bh, dev);
     dev->attach_timer = qemu_new_timer_ms(vm_clock, usbredir_do_attach, dev);
 
-    QTAILQ_INIT(&dev->cancelled);
+    packet_id_queue_init(&dev->cancelled, dev, "cancelled");
     for (i = 0; i < MAX_ENDPOINTS; i++) {
         QTAILQ_INIT(&dev->endpoint[i].bufpq);
     }
@@ -933,13 +977,9 @@ static int usbredir_initfn(USBDevice *udev)
 
 static void usbredir_cleanup_device_queues(USBRedirDevice *dev)
 {
-    Cancelled *c, *next_c;
     int i;
 
-    QTAILQ_FOREACH_SAFE(c, &dev->cancelled, next, next_c) {
-        QTAILQ_REMOVE(&dev->cancelled, c, next);
-        g_free(c);
-    }
+    packet_id_queue_empty(&dev->cancelled);
     for (i = 0; i < MAX_ENDPOINTS; i++) {
         usbredir_free_bufpq(dev, I2EP(i));
     }
commit ceab6f96454fe6589d1b09ce64403c041d79f9d9
Author: Hans de Goede <hdegoede at redhat.com>
Date:   Wed Sep 12 15:08:33 2012 +0200

    ehci: Walk async schedule before and after migration
    
    Signed-off-by: Hans de Goede <hdegoede at redhat.com>
    Signed-off-by: Gerd Hoffmann <kraxel at redhat.com>

diff --git a/hw/usb/hcd-ehci.c b/hw/usb/hcd-ehci.c
index bc86460..6a5da84 100644
--- a/hw/usb/hcd-ehci.c
+++ b/hw/usb/hcd-ehci.c
@@ -34,6 +34,7 @@
 #include "monitor.h"
 #include "trace.h"
 #include "dma.h"
+#include "sysemu.h"
 
 #define EHCI_DEBUG   0
 
@@ -2572,6 +2573,32 @@ static int usb_ehci_post_load(void *opaque, int version_id)
     return 0;
 }
 
+static void usb_ehci_vm_state_change(void *opaque, int running, RunState state)
+{
+    EHCIState *ehci = opaque;
+
+    /*
+     * We don't migrate the EHCIQueue-s, instead we rebuild them for the
+     * schedule in guest memory. We must do the rebuilt ASAP, so that
+     * USB-devices which have async handled packages have a packet in the
+     * ep queue to match the completion with.
+     */
+    if (state == RUN_STATE_RUNNING) {
+        ehci_advance_async_state(ehci);
+    }
+
+    /*
+     * The schedule rebuilt from guest memory could cause the migration dest
+     * to miss a QH unlink, and fail to cancel packets, since the unlinked QH
+     * will never have existed on the destination. Therefor we must flush the
+     * async schedule on savevm to catch any not yet noticed unlinks.
+     */
+    if (state == RUN_STATE_SAVE_VM) {
+        ehci_advance_async_state(ehci);
+        ehci_queues_rip_unseen(ehci, 1);
+    }
+}
+
 static const VMStateDescription vmstate_ehci = {
     .name        = "ehci",
     .version_id  = 2,
@@ -2721,6 +2748,7 @@ static int usb_ehci_initfn(PCIDevice *dev)
     usb_packet_init(&s->ipacket);
 
     qemu_register_reset(ehci_reset, s);
+    qemu_add_vm_change_state_handler(usb_ehci_vm_state_change, s);
 
     memory_region_init(&s->mem, "ehci", MMIO_SIZE);
     memory_region_init_io(&s->mem_caps, &ehci_mmio_caps_ops, s,
commit 8f5457eb04140714eaf57a99bc08dc661d83fa87
Author: Hans de Goede <hdegoede at redhat.com>
Date:   Wed Sep 12 15:08:32 2012 +0200

    ehci: Don't set seen to 0 when removing unseen queue-heads
    
    When removing unseen queue-heads from the async queue list, we should not
    set the seen flag to 0, as this may cause them to be removed by
    ehci_queues_rip_unused() during the next call to ehci_advance_async_state()
    if the timer is late or running at a low frequency.
    
    Note:
    1) This *may* have caused the instant unlink / relinks described in commit
       9bc3a3a216e2689bfcdd36c3e079333bbdbf3ba0
    
    2) Rather then putting more if-s inside ehci_queues_rip_unused, this patch
       instead introduces a new ehci_queues_rip_unseen function.
    
    3) This patch also makes it save to call ehci_queues_rip_unseen() multiple
       times, which gets used in the folluw up patch titled:
       "ehci: Walk async schedule before and after migration"
    
    Signed-off-by: Hans de Goede <hdegoede at redhat.com>
    Signed-off-by: Gerd Hoffmann <kraxel at redhat.com>

diff --git a/hw/usb/hcd-ehci.c b/hw/usb/hcd-ehci.c
index 017a01d..bc86460 100644
--- a/hw/usb/hcd-ehci.c
+++ b/hw/usb/hcd-ehci.c
@@ -848,10 +848,10 @@ static EHCIQueue *ehci_find_queue_by_qh(EHCIState *ehci, uint32_t addr,
     return NULL;
 }
 
-static void ehci_queues_rip_unused(EHCIState *ehci, int async, int flush)
+static void ehci_queues_rip_unused(EHCIState *ehci, int async)
 {
     EHCIQueueHead *head = async ? &ehci->aqueues : &ehci->pqueues;
-    const char *warn = (async && !flush) ? "guest unlinked busy QH" : NULL;
+    const char *warn = async ? "guest unlinked busy QH" : NULL;
     uint64_t maxage = FRAME_TIMER_NS * ehci->maxframes * 4;
     EHCIQueue *q, *tmp;
 
@@ -861,13 +861,25 @@ static void ehci_queues_rip_unused(EHCIState *ehci, int async, int flush)
             q->ts = ehci->last_run_ns;
             continue;
         }
-        if (!flush && ehci->last_run_ns < q->ts + maxage) {
+        if (ehci->last_run_ns < q->ts + maxage) {
             continue;
         }
         ehci_free_queue(q, warn);
     }
 }
 
+static void ehci_queues_rip_unseen(EHCIState *ehci, int async)
+{
+    EHCIQueueHead *head = async ? &ehci->aqueues : &ehci->pqueues;
+    EHCIQueue *q, *tmp;
+
+    QTAILQ_FOREACH_SAFE(q, head, next, tmp) {
+        if (!q->seen) {
+            ehci_free_queue(q, NULL);
+        }
+    }
+}
+
 static void ehci_queues_rip_device(EHCIState *ehci, USBDevice *dev, int async)
 {
     EHCIQueueHead *head = async ? &ehci->aqueues : &ehci->pqueues;
@@ -1700,7 +1712,7 @@ static int ehci_state_waitlisthead(EHCIState *ehci,  int async)
         ehci_set_usbsts(ehci, USBSTS_REC);
     }
 
-    ehci_queues_rip_unused(ehci, async, 0);
+    ehci_queues_rip_unused(ehci, async);
 
     /*  Find the head of the list (4.9.1.1) */
     for(i = 0; i < MAX_QH; i++) {
@@ -2332,7 +2344,7 @@ static void ehci_advance_async_state(EHCIState *ehci)
          */
         if (ehci->usbcmd & USBCMD_IAAD) {
             /* Remove all unseen qhs from the async qhs queue */
-            ehci_queues_rip_unused(ehci, async, 1);
+            ehci_queues_rip_unseen(ehci, async);
             trace_usb_ehci_doorbell_ack();
             ehci->usbcmd &= ~USBCMD_IAAD;
             ehci_raise_irq(ehci, USBSTS_IAA);
@@ -2385,7 +2397,7 @@ static void ehci_advance_periodic_state(EHCIState *ehci)
         ehci_set_fetch_addr(ehci, async,entry);
         ehci_set_state(ehci, async, EST_FETCHENTRY);
         ehci_advance_state(ehci, async);
-        ehci_queues_rip_unused(ehci, async, 0);
+        ehci_queues_rip_unused(ehci, async);
         break;
 
     default:
commit 56ab2ad177dc43d474dc0a0bd84e81ef00f31e11
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Tue Sep 11 20:57:58 2012 +0200

    configure: usbredir fixes
    
    usbredir is only used by system emulation, so add the libraries to
    libs_softmmu instead of LIBS.
    
    Cc: Michael Tokarev <mjt at tls.msk.ru>
    Cc: Gerd Hoffmann <kraxel at redhat.com>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>
    Signed-off-by: Hans de Goede <hdegoede at redhat.com>
    Signed-off-by: Gerd Hoffmann <kraxel at redhat.com>

diff --git a/configure b/configure
index 7656c32..e42c812 100755
--- a/configure
+++ b/configure
@@ -2763,7 +2763,7 @@ if test "$usb_redir" != "no" ; then
         usb_redir_cflags=$($pkg_config --cflags libusbredirparser 2>/dev/null)
         usb_redir_libs=$($pkg_config --libs libusbredirparser 2>/dev/null)
         QEMU_CFLAGS="$QEMU_CFLAGS $usb_redir_cflags"
-        LIBS="$LIBS $usb_redir_libs"
+        libs_softmmu="$libs_softmmu $usb_redir_libs"
     else
         if test "$usb_redir" = "yes"; then
             feature_not_found "usb-redir"
commit 8f74ed1e43263293301031a10e440549bab19a6e
Author: Hans de Goede <hdegoede at redhat.com>
Date:   Mon Sep 10 12:44:11 2012 +0200

    ehci: Don't process too much frames in 1 timer tick (v2)
    
    The Linux ehci isoc scheduling code fills the entire schedule ahead of
    time minus 80 frames. If we make a large jump in where we are in the
    schedule, ie 40 frames, then the scheduler all of a sudden will only have
    40 frames left to work in, causing it to fail packet submissions
    with error -27 (-EFBIG).
    
    Changes in v2:
    -Don't hardcode a maximum number of frames to process in one tick, instead:
     -Process a minimum number of frames to ensure we do eventually catch up
     -Stop (after the minimum number) when the guest has requested an irq
    
    Signed-off-by: Hans de Goede <hdegoede at redhat.com>
    Signed-off-by: Gerd Hoffmann <kraxel at redhat.com>

diff --git a/hw/usb/hcd-ehci.c b/hw/usb/hcd-ehci.c
index 54273d7..017a01d 100644
--- a/hw/usb/hcd-ehci.c
+++ b/hw/usb/hcd-ehci.c
@@ -139,6 +139,7 @@
 #define NB_PORTS         6        // Number of downstream ports
 #define BUFF_SIZE        5*4096   // Max bytes to transfer per transaction
 #define MAX_QH           100      // Max allowable queue heads in a chain
+#define MIN_FR_PER_TICK  3        // Min frames to process when catching up
 
 /*  Internal periodic / asynchronous schedule state machine states
  */
@@ -2448,6 +2449,19 @@ static void ehci_frame_timer(void *opaque)
         }
 
         for (i = 0; i < frames; i++) {
+            /*
+             * If we're running behind schedule, we should not catch up
+             * too fast, as that will make some guests unhappy:
+             * 1) We must process a minimum of MIN_FR_PER_TICK frames,
+             *    otherwise we will never catch up
+             * 2) Process frames until the guest has requested an irq (IOC)
+             */
+            if (i >= MIN_FR_PER_TICK) {
+                ehci_commit_irq(ehci);
+                if ((ehci->usbsts & USBINTR_MASK) & ehci->usbintr) {
+                    break;
+                }
+            }
             ehci_update_frindex(ehci, 1);
             ehci_advance_periodic_state(ehci);
             ehci->last_run_ns += FRAME_TIMER_NS;
commit ffa1f2e088eb7e3d57f2fc35f21e7bdb23e592c5
Author: Hans de Goede <hdegoede at redhat.com>
Date:   Mon Sep 10 12:44:10 2012 +0200

    ehci: Fix interrupts stopping when Interrupt Threshold Control is 8
    
    If Interrupt Threshold Control is 8 or a multiple of 8, then
    s->usbsts_frindex can become exactly 0x4000, at which point
    (s->usbsts_frindex > s->frindex) will never become true, as
    s->usbsts_frindex will not be lowered / reset in this case.
    
    This patch fixes this.
    
    Signed-off-by: Hans de Goede <hdegoede at redhat.com>
    Signed-off-by: Gerd Hoffmann <kraxel at redhat.com>

diff --git a/hw/usb/hcd-ehci.c b/hw/usb/hcd-ehci.c
index f5ba8e1..54273d7 100644
--- a/hw/usb/hcd-ehci.c
+++ b/hw/usb/hcd-ehci.c
@@ -2413,7 +2413,7 @@ static void ehci_update_frindex(EHCIState *ehci, int frames)
         if (ehci->frindex == 0x00004000) {
             ehci_raise_irq(ehci, USBSTS_FLR);
             ehci->frindex = 0;
-            if (ehci->usbsts_frindex > 0x00004000) {
+            if (ehci->usbsts_frindex >= 0x00004000) {
                 ehci->usbsts_frindex -= 0x00004000;
             } else {
                 ehci->usbsts_frindex = 0;
commit 3e4f910c8d490a1490409a7e381dbbb229f9d272
Author: Gerd Hoffmann <kraxel at redhat.com>
Date:   Thu Sep 6 11:24:51 2012 +0200

    ehci: switch to new-style memory ops
    
    Also register different memory regions for capabilities,
    operational registers and port status registers.  Create
    separate tracepoints for operational regs and port status
    regs.  Ditch a bunch of sanity checks because the memory
    core will do this for us now.
    
    Offloading the byte, word and dword access handling to the
    memory core also has the side effect of fixing ehci register
    access on bigendian hosts.
    
    Cc: David Gibson <david at gibson.dropbear.id.au>
    Signed-off-by: Gerd Hoffmann <kraxel at redhat.com>

diff --git a/hw/usb/hcd-ehci.c b/hw/usb/hcd-ehci.c
index 2f3e9c0..f5ba8e1 100644
--- a/hw/usb/hcd-ehci.c
+++ b/hw/usb/hcd-ehci.c
@@ -389,6 +389,9 @@ struct EHCIState {
     USBBus bus;
     qemu_irq irq;
     MemoryRegion mem;
+    MemoryRegion mem_caps;
+    MemoryRegion mem_opreg;
+    MemoryRegion mem_ports;
     int companion_count;
 
     /* properties */
@@ -398,10 +401,10 @@ struct EHCIState {
      *  EHCI spec version 1.0 Section 2.3
      *  Host Controller Operational Registers
      */
+    uint8_t caps[OPREGBASE];
     union {
-        uint8_t mmio[MMIO_SIZE];
+        uint32_t opreg[(PORTSC_BEGIN-OPREGBASE)/sizeof(uint32_t)];
         struct {
-            uint8_t cap[OPREGBASE];
             uint32_t usbcmd;
             uint32_t usbsts;
             uint32_t usbintr;
@@ -411,9 +414,9 @@ struct EHCIState {
             uint32_t asynclistaddr;
             uint32_t notused[9];
             uint32_t configflag;
-            uint32_t portsc[NB_PORTS];
         };
     };
+    uint32_t portsc[NB_PORTS];
 
     /*
      *  Internal states, shadow registers, etc
@@ -471,22 +474,12 @@ static const char *ehci_state_names[] = {
 };
 
 static const char *ehci_mmio_names[] = {
-    [CAPLENGTH]         = "CAPLENGTH",
-    [HCIVERSION]        = "HCIVERSION",
-    [HCSPARAMS]         = "HCSPARAMS",
-    [HCCPARAMS]         = "HCCPARAMS",
     [USBCMD]            = "USBCMD",
     [USBSTS]            = "USBSTS",
     [USBINTR]           = "USBINTR",
     [FRINDEX]           = "FRINDEX",
     [PERIODICLISTBASE]  = "P-LIST BASE",
     [ASYNCLISTADDR]     = "A-LIST ADDR",
-    [PORTSC_BEGIN]      = "PORTSC #0",
-    [PORTSC_BEGIN + 4]  = "PORTSC #1",
-    [PORTSC_BEGIN + 8]  = "PORTSC #2",
-    [PORTSC_BEGIN + 12] = "PORTSC #3",
-    [PORTSC_BEGIN + 16] = "PORTSC #4",
-    [PORTSC_BEGIN + 20] = "PORTSC #5",
     [CONFIGFLAG]        = "CONFIGFLAG",
 };
 
@@ -509,7 +502,8 @@ static const char *state2str(uint32_t state)
 
 static const char *addr2str(target_phys_addr_t addr)
 {
-    return nr2str(ehci_mmio_names, ARRAY_SIZE(ehci_mmio_names), addr);
+    return nr2str(ehci_mmio_names, ARRAY_SIZE(ehci_mmio_names),
+                  addr + OPREGBASE);
 }
 
 static void ehci_trace_usbsts(uint32_t mask, int state)
@@ -1018,7 +1012,7 @@ static int ehci_register_companion(USBBus *bus, USBPort *ports[],
     }
 
     s->companion_count++;
-    s->mmio[0x05] = (s->companion_count << 4) | portcount;
+    s->caps[0x05] = (s->companion_count << 4) | portcount;
 
     return 0;
 }
@@ -1063,7 +1057,8 @@ static void ehci_reset(void *opaque)
         }
     }
 
-    memset(&s->mmio[OPREGBASE], 0x00, MMIO_SIZE - OPREGBASE);
+    memset(&s->opreg, 0x00, sizeof(s->opreg));
+    memset(&s->portsc, 0x00, sizeof(s->portsc));
 
     s->usbcmd = NB_MAXINTRATE << USBCMD_ITC_SH;
     s->usbsts = USBSTS_HALT;
@@ -1090,50 +1085,35 @@ static void ehci_reset(void *opaque)
     qemu_bh_cancel(s->async_bh);
 }
 
-static uint32_t ehci_mem_readb(void *ptr, target_phys_addr_t addr)
+static uint64_t ehci_caps_read(void *ptr, target_phys_addr_t addr,
+                               unsigned size)
 {
     EHCIState *s = ptr;
-    uint32_t val;
-
-    val = s->mmio[addr];
-
-    return val;
+    return s->caps[addr];
 }
 
-static uint32_t ehci_mem_readw(void *ptr, target_phys_addr_t addr)
+static uint64_t ehci_opreg_read(void *ptr, target_phys_addr_t addr,
+                                unsigned size)
 {
     EHCIState *s = ptr;
     uint32_t val;
 
-    val = s->mmio[addr] | (s->mmio[addr+1] << 8);
-
+    val = s->opreg[addr >> 2];
+    trace_usb_ehci_opreg_read(addr + OPREGBASE, addr2str(addr), val);
     return val;
 }
 
-static uint32_t ehci_mem_readl(void *ptr, target_phys_addr_t addr)
+static uint64_t ehci_port_read(void *ptr, target_phys_addr_t addr,
+                               unsigned size)
 {
     EHCIState *s = ptr;
     uint32_t val;
 
-    val = s->mmio[addr] | (s->mmio[addr+1] << 8) |
-          (s->mmio[addr+2] << 16) | (s->mmio[addr+3] << 24);
-
-    trace_usb_ehci_mmio_readl(addr, addr2str(addr), val);
+    val = s->portsc[addr >> 2];
+    trace_usb_ehci_portsc_read(addr + PORTSC_BEGIN, addr >> 2, val);
     return val;
 }
 
-static void ehci_mem_writeb(void *ptr, target_phys_addr_t addr, uint32_t val)
-{
-    fprintf(stderr, "EHCI doesn't handle byte writes to MMIO\n");
-    exit(1);
-}
-
-static void ehci_mem_writew(void *ptr, target_phys_addr_t addr, uint32_t val)
-{
-    fprintf(stderr, "EHCI doesn't handle 16-bit writes to MMIO\n");
-    exit(1);
-}
-
 static void handle_port_owner_write(EHCIState *s, int port, uint32_t owner)
 {
     USBDevice *dev = s->ports[port].dev;
@@ -1162,11 +1142,17 @@ static void handle_port_owner_write(EHCIState *s, int port, uint32_t owner)
     }
 }
 
-static void handle_port_status_write(EHCIState *s, int port, uint32_t val)
+static void ehci_port_write(void *ptr, target_phys_addr_t addr,
+                            uint64_t val, unsigned size)
 {
+    EHCIState *s = ptr;
+    int port = addr >> 2;
     uint32_t *portsc = &s->portsc[port];
+    uint32_t old = *portsc;
     USBDevice *dev = s->ports[port].dev;
 
+    trace_usb_ehci_portsc_write(addr + PORTSC_BEGIN, addr >> 2, val);
+
     /* Clear rwc bits */
     *portsc &= ~(val & PORTSC_RWC_MASK);
     /* The guest may clear, but not set the PED bit */
@@ -1198,39 +1184,20 @@ static void handle_port_status_write(EHCIState *s, int port, uint32_t val)
 
     *portsc &= ~PORTSC_RO_MASK;
     *portsc |= val;
+    trace_usb_ehci_portsc_change(addr + PORTSC_BEGIN, addr >> 2, *portsc, old);
 }
 
-static void ehci_mem_writel(void *ptr, target_phys_addr_t addr, uint32_t val)
+static void ehci_opreg_write(void *ptr, target_phys_addr_t addr,
+                             uint64_t val, unsigned size)
 {
     EHCIState *s = ptr;
-    uint32_t *mmio = (uint32_t *)(&s->mmio[addr]);
+    uint32_t *mmio = s->opreg + (addr >> 2);
     uint32_t old = *mmio;
     int i;
 
-    trace_usb_ehci_mmio_writel(addr, addr2str(addr), val);
-
-    /* Only aligned reads are allowed on OHCI */
-    if (addr & 3) {
-        fprintf(stderr, "usb-ehci: Mis-aligned write to addr 0x"
-                TARGET_FMT_plx "\n", addr);
-        return;
-    }
-
-    if (addr >= PORTSC && addr < PORTSC + 4 * NB_PORTS) {
-        handle_port_status_write(s, (addr-PORTSC)/4, val);
-        trace_usb_ehci_mmio_change(addr, addr2str(addr), *mmio, old);
-        return;
-    }
-
-    if (addr < OPREGBASE) {
-        fprintf(stderr, "usb-ehci: write attempt to read-only register"
-                TARGET_FMT_plx "\n", addr);
-        return;
-    }
-
+    trace_usb_ehci_opreg_write(addr + OPREGBASE, addr2str(addr), val);
 
-    /* Do any register specific pre-write processing here.  */
-    switch(addr) {
+    switch (addr + OPREGBASE) {
     case USBCMD:
         if (val & USBCMD_HCRESET) {
             ehci_reset(s);
@@ -1241,7 +1208,7 @@ static void ehci_mem_writel(void *ptr, target_phys_addr_t addr, uint32_t val)
         /* not supporting dynamic frame list size at the moment */
         if ((val & USBCMD_FLS) && !(s->usbcmd & USBCMD_FLS)) {
             fprintf(stderr, "attempt to set frame list size -- value %d\n",
-                    val & USBCMD_FLS);
+                    (int)val & USBCMD_FLS);
             val &= ~USBCMD_FLS;
         }
 
@@ -1308,7 +1275,7 @@ static void ehci_mem_writel(void *ptr, target_phys_addr_t addr, uint32_t val)
     }
 
     *mmio = val;
-    trace_usb_ehci_mmio_change(addr, addr2str(addr), *mmio, old);
+    trace_usb_ehci_opreg_change(addr + OPREGBASE, addr2str(addr), *mmio, old);
 }
 
 
@@ -2520,11 +2487,28 @@ static void ehci_async_bh(void *opaque)
     ehci_advance_async_state(ehci);
 }
 
-static const MemoryRegionOps ehci_mem_ops = {
-    .old_mmio = {
-        .read = { ehci_mem_readb, ehci_mem_readw, ehci_mem_readl },
-        .write = { ehci_mem_writeb, ehci_mem_writew, ehci_mem_writel },
-    },
+static const MemoryRegionOps ehci_mmio_caps_ops = {
+    .read = ehci_caps_read,
+    .valid.min_access_size = 1,
+    .valid.max_access_size = 4,
+    .impl.min_access_size = 1,
+    .impl.max_access_size = 1,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+};
+
+static const MemoryRegionOps ehci_mmio_opreg_ops = {
+    .read = ehci_opreg_read,
+    .write = ehci_opreg_write,
+    .valid.min_access_size = 4,
+    .valid.max_access_size = 4,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+};
+
+static const MemoryRegionOps ehci_mmio_port_ops = {
+    .read = ehci_port_read,
+    .write = ehci_port_write,
+    .valid.min_access_size = 4,
+    .valid.max_access_size = 4,
     .endianness = DEVICE_LITTLE_ENDIAN,
 };
 
@@ -2681,19 +2665,19 @@ static int usb_ehci_initfn(PCIDevice *dev)
     pci_conf[0x6e] = 0x00;
     pci_conf[0x6f] = 0xc0;  // USBLEFCTLSTS
 
-    // 2.2 host controller interface version
-    s->mmio[0x00] = (uint8_t) OPREGBASE;
-    s->mmio[0x01] = 0x00;
-    s->mmio[0x02] = 0x00;
-    s->mmio[0x03] = 0x01;        // HC version
-    s->mmio[0x04] = NB_PORTS;    // Number of downstream ports
-    s->mmio[0x05] = 0x00;        // No companion ports at present
-    s->mmio[0x06] = 0x00;
-    s->mmio[0x07] = 0x00;
-    s->mmio[0x08] = 0x80;        // We can cache whole frame, not 64-bit capable
-    s->mmio[0x09] = 0x68;        // EECP
-    s->mmio[0x0a] = 0x00;
-    s->mmio[0x0b] = 0x00;
+    /* 2.2 host controller interface version */
+    s->caps[0x00] = (uint8_t) OPREGBASE;
+    s->caps[0x01] = 0x00;
+    s->caps[0x02] = 0x00;
+    s->caps[0x03] = 0x01;        /* HC version */
+    s->caps[0x04] = NB_PORTS;    /* Number of downstream ports */
+    s->caps[0x05] = 0x00;        /* No companion ports at present */
+    s->caps[0x06] = 0x00;
+    s->caps[0x07] = 0x00;
+    s->caps[0x08] = 0x80;        /* We can cache whole frame, no 64-bit */
+    s->caps[0x09] = 0x68;        /* EECP */
+    s->caps[0x0a] = 0x00;
+    s->caps[0x0b] = 0x00;
 
     s->irq = s->dev.irq[3];
 
@@ -2712,7 +2696,18 @@ static int usb_ehci_initfn(PCIDevice *dev)
 
     qemu_register_reset(ehci_reset, s);
 
-    memory_region_init_io(&s->mem, &ehci_mem_ops, s, "ehci", MMIO_SIZE);
+    memory_region_init(&s->mem, "ehci", MMIO_SIZE);
+    memory_region_init_io(&s->mem_caps, &ehci_mmio_caps_ops, s,
+                          "capabilities", OPREGBASE);
+    memory_region_init_io(&s->mem_opreg, &ehci_mmio_opreg_ops, s,
+                          "operational", PORTSC_BEGIN - OPREGBASE);
+    memory_region_init_io(&s->mem_ports, &ehci_mmio_port_ops, s,
+                          "ports", PORTSC_END - PORTSC_BEGIN);
+
+    memory_region_add_subregion(&s->mem, 0,            &s->mem_caps);
+    memory_region_add_subregion(&s->mem, OPREGBASE,    &s->mem_opreg);
+    memory_region_add_subregion(&s->mem, PORTSC_BEGIN, &s->mem_ports);
+
     pci_register_bar(&s->dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &s->mem);
 
     return 0;
diff --git a/trace-events b/trace-events
index b25ae1c..a58b0b7 100644
--- a/trace-events
+++ b/trace-events
@@ -243,9 +243,12 @@ usb_port_release(int bus, const char *port) "bus %d, port %s"
 
 # hw/usb/hcd-ehci.c
 usb_ehci_reset(void) "=== RESET ==="
-usb_ehci_mmio_readl(uint32_t addr, const char *str, uint32_t val) "rd mmio %04x [%s] = %x"
-usb_ehci_mmio_writel(uint32_t addr, const char *str, uint32_t val) "wr mmio %04x [%s] = %x"
-usb_ehci_mmio_change(uint32_t addr, const char *str, uint32_t new, uint32_t old) "ch mmio %04x [%s] = %x (old: %x)"
+usb_ehci_opreg_read(uint32_t addr, const char *str, uint32_t val) "rd mmio %04x [%s] = %x"
+usb_ehci_opreg_write(uint32_t addr, const char *str, uint32_t val) "wr mmio %04x [%s] = %x"
+usb_ehci_opreg_change(uint32_t addr, const char *str, uint32_t new, uint32_t old) "ch mmio %04x [%s] = %x (old: %x)"
+usb_ehci_portsc_read(uint32_t addr, uint32_t port, uint32_t val) "rd mmio %04x [port %d] = %x"
+usb_ehci_portsc_write(uint32_t addr, uint32_t port, uint32_t val) "wr mmio %04x [port %d] = %x"
+usb_ehci_portsc_change(uint32_t addr, uint32_t port, uint32_t new, uint32_t old) "ch mmio %04x [port %d] = %x (old: %x)"
 usb_ehci_usbsts(const char *sts, int state) "usbsts %s %d"
 usb_ehci_state(const char *schedule, const char *state) "%s schedule %s"
 usb_ehci_qh_ptrs(void *q, uint32_t addr, uint32_t nxt, uint32_t c_qtd, uint32_t n_qtd, uint32_t a_qtd) "q %p - QH @ %08x: next %08x qtds %08x,%08x,%08x"
commit 63587e31353b6652cadfcfb869f5692a2b69daeb
Author: Gerd Hoffmann <kraxel at redhat.com>
Date:   Thu Sep 6 12:03:41 2012 +0200

    usb-host: allow emulated (non-async) control requests without USBPacket
    
    xhci needs this for USB_REQ_SET_ADDRESS due to the way
    usb addressing is handled by the xhci hardware.
    
    Signed-off-by: Gerd Hoffmann <kraxel at redhat.com>

diff --git a/hw/usb/host-linux.c b/hw/usb/host-linux.c
index 8df9207..44f1a64 100644
--- a/hw/usb/host-linux.c
+++ b/hw/usb/host-linux.c
@@ -1045,7 +1045,6 @@ static int usb_host_handle_control(USBDevice *dev, USBPacket *p,
 
     /* Note request is (bRequestType << 8) | bRequest */
     trace_usb_host_req_control(s->bus_num, s->addr, p, request, value, index);
-    assert(p->result == 0);
 
     switch (request) {
     case DeviceOutRequest | USB_REQ_SET_ADDRESS:
@@ -1074,6 +1073,7 @@ static int usb_host_handle_control(USBDevice *dev, USBPacket *p,
     }
 
     /* The rest are asynchronous */
+    assert(p && p->result == 0);
 
     if (length > sizeof(dev->data_buf)) {
         fprintf(stderr, "husb: ctrl buffer too small (%d > %zu)\n",


More information about the Spice-commits mailing list