[Spice-commits] 114 commits - arch_init.c async.c block.c block.h block/blkdebug.c block/commit.c block/mirror.c block/qcow2-cluster.c block/qcow2.c block/qcow2.h block/raw-posix.c block/raw-win32.c block/rbd.c block/stream.c block/vpc.c block_int.h blockdev.c blockdev.h configure console.h exec-all.h exec.c hw/arm_boot.c hw/arm_gic.c hw/arm_gic_common.c hw/armv7m_nvic.c hw/boards.h hw/device-hotplug.c hw/ds1338.c hw/exynos4210.c hw/exynos4210_mct.c hw/highbank.c hw/ide hw/kvmvapic.c hw/leon3.c hw/mips_jazz.c hw/msix.c hw/openpic.c hw/openpic.h hw/pc_sysfw.c hw/ppc hw/ppc_newworld.c hw/ppce500_pci.c hw/ppce500_pci.h hw/puv3.c hw/realview.c hw/s390-virtio.c hw/spapr.c hw/spapr.h hw/spapr_iommu.c hw/spapr_nvram.c hw/spapr_rtas.c hw/sun4m.c hw/versatilepb.c hw/vexpress.c hw/vfio_pci.c hw/vga.c hw/virtio-blk.h hw/virtio-pci.c hw/watchdog.c hw/xics.c hw/xics.h hw/xilinx_zynq.c main-loop.c pc-bios/README pc-bios/acpi-dsdt.aml pc-bios/bios.bin pc-bios/q35-acpi-dsdt.aml pc-bios/slof.bin pix man qemu-aio.h qemu-config.c qemu-img.c qemu-io.c qemu-option.c qemu-option.h qemu-pixman.c qemu-pixman.h qemu-sockets.c roms/SLOF roms/seabios target-alpha/helper.c target-alpha/mem_helper.c target-arm/op_helper.c target-cris/op_helper.c target-i386/helper.c target-i386/mem_helper.c target-lm32/op_helper.c target-m68k/op_helper.c target-microblaze/op_helper.c target-mips/op_helper.c target-openrisc/mmu_helper.c target-ppc/cpu.h target-ppc/mem_helper.c target-s390x/mem_helper.c target-sh4/op_helper.c target-sparc/cpu.h target-sparc/helper.c target-sparc/ldst_helper.c target-unicore32/op_helper.c target-xtensa/helper.c target-xtensa/op_helper.c tests/qemu-iotests tests/test-aio.c tests/test-thread-pool.c trace-events translate-all.c translate-all.h ui/vnc-enc-tight.c ui/vnc.c user-exec.c vl.c

Gerd Hoffmann kraxel at kemper.freedesktop.org
Mon Dec 17 01:18:46 PST 2012


 arch_init.c                   |    5 
 async.c                       |    5 
 block.c                       |  219 +++--
 block.h                       |   11 
 block/blkdebug.c              |  128 ++-
 block/commit.c                |    2 
 block/mirror.c                |    2 
 block/qcow2-cluster.c         |  191 ++--
 block/qcow2.c                 |   87 +-
 block/qcow2.h                 |   49 +
 block/raw-posix.c             |   27 
 block/raw-win32.c             |   17 
 block/rbd.c                   |   20 
 block/stream.c                |    2 
 block/vpc.c                   |   24 
 block_int.h                   |    6 
 blockdev.c                    |   35 
 blockdev.h                    |    9 
 configure                     |    4 
 console.h                     |   10 
 exec-all.h                    |    6 
 exec.c                        | 1667 ----------------------------------------
 hw/arm_boot.c                 |   17 
 hw/arm_gic.c                  |    7 
 hw/arm_gic_common.c           |    6 
 hw/armv7m_nvic.c              |    4 
 hw/boards.h                   |    3 
 hw/device-hotplug.c           |    2 
 hw/ds1338.c                   |   81 +
 hw/exynos4210.c               |   10 
 hw/exynos4210_mct.c           |    2 
 hw/highbank.c                 |    9 
 hw/ide/core.c                 |    2 
 hw/kvmvapic.c                 |    4 
 hw/leon3.c                    |    1 
 hw/mips_jazz.c                |    4 
 hw/msix.c                     |    6 
 hw/openpic.c                  | 1397 +++++++++++----------------------
 hw/openpic.h                  |    7 
 hw/pc_sysfw.c                 |    2 
 hw/ppc/Makefile.objs          |    2 
 hw/ppc/e500-ccsr.h            |   17 
 hw/ppc/e500.c                 |  205 +++-
 hw/ppc/e500.h                 |    2 
 hw/ppc/e500plat.c             |    3 
 hw/ppc/mpc8544ds.c            |    2 
 hw/ppc_newworld.c             |   25 
 hw/ppce500_pci.c              |   58 +
 hw/ppce500_pci.h              |    9 
 hw/puv3.c                     |    1 
 hw/realview.c                 |    6 
 hw/s390-virtio.c              |   17 
 hw/spapr.c                    |   37 
 hw/spapr.h                    |    4 
 hw/spapr_iommu.c              |    6 
 hw/spapr_nvram.c              |  196 ++++
 hw/spapr_rtas.c               |    6 
 hw/sun4m.c                    |   24 
 hw/versatilepb.c              |    4 
 hw/vexpress.c                 |    4 
 hw/vfio_pci.c                 |    5 
 hw/vga.c                      |    2 
 hw/virtio-blk.h               |    1 
 hw/virtio-pci.c               |    1 
 hw/watchdog.c                 |    2 
 hw/xics.c                     |   47 -
 hw/xics.h                     |    1 
 hw/xilinx_zynq.c              |   11 
 main-loop.c                   |    5 
 pc-bios/README                |    2 
 pc-bios/acpi-dsdt.aml         |binary
 pc-bios/bios.bin              |binary
 pc-bios/q35-acpi-dsdt.aml     |binary
 pc-bios/slof.bin              |binary
 pixman                        |    2 
 qemu-aio.h                    |    9 
 qemu-config.c                 |    8 
 qemu-img.c                    |   21 
 qemu-io.c                     |   87 ++
 qemu-option.c                 |  110 +-
 qemu-option.h                 |    2 
 qemu-pixman.c                 |    6 
 qemu-pixman.h                 |    2 
 qemu-sockets.c                |   16 
 roms/SLOF                     |    2 
 roms/seabios                  |    2 
 target-alpha/helper.c         |   14 
 target-alpha/mem_helper.c     |    8 
 target-arm/op_helper.c        |    8 
 target-cris/op_helper.c       |    8 
 target-i386/helper.c          |    5 
 target-i386/mem_helper.c      |    8 
 target-lm32/op_helper.c       |    8 
 target-m68k/op_helper.c       |    8 
 target-microblaze/op_helper.c |    8 
 target-mips/op_helper.c       |    8 
 target-openrisc/mmu_helper.c  |   10 
 target-ppc/cpu.h              |    2 
 target-ppc/mem_helper.c       |    8 
 target-s390x/mem_helper.c     |    8 
 target-sh4/op_helper.c        |   23 
 target-sparc/cpu.h            |    1 
 target-sparc/helper.c         |   12 
 target-sparc/ldst_helper.c    |   24 
 target-unicore32/op_helper.c  |    9 
 target-xtensa/helper.c        |    3 
 target-xtensa/op_helper.c     |   14 
 tests/qemu-iotests/045        |  129 +++
 tests/qemu-iotests/045.out    |    5 
 tests/qemu-iotests/046        |  215 +++++
 tests/qemu-iotests/046.out    |  163 +++
 tests/qemu-iotests/group      |    2 
 tests/qemu-iotests/iotests.py |   12 
 tests/test-aio.c              |   31 
 tests/test-thread-pool.c      |   20 
 trace-events                  |   13 
 translate-all.c               | 1736 +++++++++++++++++++++++++++++++++++++++++-
 translate-all.h               |   34 
 ui/vnc-enc-tight.c            |    4 
 ui/vnc.c                      |    2 
 user-exec.c                   |    8 
 vl.c                          |   41 
 122 files changed, 4338 insertions(+), 3336 deletions(-)

New commits:
commit a8a826a3c3b8c8a1c4def0e9e22b46e78e6163a0
Author: Blue Swirl <blauwirbel at gmail.com>
Date:   Tue Dec 4 20:16:07 2012 +0000

    exec: refactor cpu_restore_state
    
    Refactor common code around calls to cpu_restore_state().
    
    tb_find_pc() has now no external users, make it static.
    
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/exec-all.h b/exec-all.h
index b18d4ca..e9b07cd 100644
--- a/exec-all.h
+++ b/exec-all.h
@@ -80,8 +80,8 @@ void restore_state_to_opc(CPUArchState *env, struct TranslationBlock *tb,
 void cpu_gen_init(void);
 int cpu_gen_code(CPUArchState *env, struct TranslationBlock *tb,
                  int *gen_code_size_ptr);
-int cpu_restore_state(struct TranslationBlock *tb,
-                      CPUArchState *env, uintptr_t searched_pc);
+bool cpu_restore_state(CPUArchState *env, uintptr_t searched_pc);
+
 void QEMU_NORETURN cpu_resume_from_signal(CPUArchState *env1, void *puc);
 void QEMU_NORETURN cpu_io_recompile(CPUArchState *env, uintptr_t retaddr);
 TranslationBlock *tb_gen_code(CPUArchState *env, 
@@ -275,8 +275,6 @@ static inline void tb_add_jump(TranslationBlock *tb, int n,
     }
 }
 
-TranslationBlock *tb_find_pc(uintptr_t pc_ptr);
-
 #include "qemu-lock.h"
 
 extern spinlock_t tb_lock;
diff --git a/hw/kvmvapic.c b/hw/kvmvapic.c
index e04c401..60c8fc4 100644
--- a/hw/kvmvapic.c
+++ b/hw/kvmvapic.c
@@ -387,7 +387,6 @@ static void patch_instruction(VAPICROMState *s, CPUX86State *env, target_ulong i
     VAPICHandlers *handlers;
     uint8_t opcode[2];
     uint32_t imm32;
-    TranslationBlock *current_tb;
     target_ulong current_pc = 0;
     target_ulong current_cs_base = 0;
     int current_flags = 0;
@@ -399,8 +398,7 @@ static void patch_instruction(VAPICROMState *s, CPUX86State *env, target_ulong i
     }
 
     if (!kvm_enabled()) {
-        current_tb = tb_find_pc(env->mem_io_pc);
-        cpu_restore_state(current_tb, env, env->mem_io_pc);
+        cpu_restore_state(env, env->mem_io_pc);
         cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
                              &current_flags);
     }
diff --git a/target-alpha/helper.c b/target-alpha/helper.c
index d9d7f75..2430f70 100644
--- a/target-alpha/helper.c
+++ b/target-alpha/helper.c
@@ -494,16 +494,6 @@ void cpu_dump_state (CPUAlphaState *env, FILE *f, fprintf_function cpu_fprintf,
     cpu_fprintf(f, "\n");
 }
 
-void do_restore_state(CPUAlphaState *env, uintptr_t retaddr)
-{
-    if (retaddr) {
-        TranslationBlock *tb = tb_find_pc(retaddr);
-        if (tb) {
-            cpu_restore_state(tb, env, retaddr);
-        }
-    }
-}
-
 /* This should only be called from translate, via gen_excp.
    We expect that ENV->PC has already been updated.  */
 void QEMU_NORETURN helper_excp(CPUAlphaState *env, int excp, int error)
@@ -519,7 +509,9 @@ void QEMU_NORETURN dynamic_excp(CPUAlphaState *env, uintptr_t retaddr,
 {
     env->exception_index = excp;
     env->error_code = error;
-    do_restore_state(env, retaddr);
+    if (retaddr) {
+        cpu_restore_state(env, retaddr);
+    }
     cpu_loop_exit(env);
 }
 
diff --git a/target-alpha/mem_helper.c b/target-alpha/mem_helper.c
index 617836c..64b33f6 100644
--- a/target-alpha/mem_helper.c
+++ b/target-alpha/mem_helper.c
@@ -94,7 +94,9 @@ static void do_unaligned_access(CPUAlphaState *env, target_ulong addr,
     uint64_t pc;
     uint32_t insn;
 
-    do_restore_state(env, retaddr);
+    if (retaddr) {
+        cpu_restore_state(env, retaddr);
+    }
 
     pc = env->pc;
     insn = cpu_ldl_code(env, pc);
@@ -143,7 +145,9 @@ void tlb_fill(CPUAlphaState *env, target_ulong addr, int is_write,
 
     ret = cpu_alpha_handle_mmu_fault(env, addr, is_write, mmu_idx);
     if (unlikely(ret != 0)) {
-        do_restore_state(env, retaddr);
+        if (retaddr) {
+            cpu_restore_state(env, retaddr);
+        }
         /* Exception index and error code are already set */
         cpu_loop_exit(env);
     }
diff --git a/target-arm/op_helper.c b/target-arm/op_helper.c
index 6e3ab90..1fcc975 100644
--- a/target-arm/op_helper.c
+++ b/target-arm/op_helper.c
@@ -74,19 +74,13 @@ uint32_t HELPER(neon_tbl)(CPUARMState *env, uint32_t ireg, uint32_t def,
 void tlb_fill(CPUARMState *env, target_ulong addr, int is_write, int mmu_idx,
               uintptr_t retaddr)
 {
-    TranslationBlock *tb;
     int ret;
 
     ret = cpu_arm_handle_mmu_fault(env, addr, is_write, mmu_idx);
     if (unlikely(ret)) {
         if (retaddr) {
             /* now we have a real cpu fault */
-            tb = tb_find_pc(retaddr);
-            if (tb) {
-                /* the PC is inside the translated code. It means that we have
-                   a virtual CPU fault */
-                cpu_restore_state(tb, env, retaddr);
-            }
+            cpu_restore_state(env, retaddr);
         }
         raise_exception(env, env->exception_index);
     }
diff --git a/target-cris/op_helper.c b/target-cris/op_helper.c
index a7468d4..31db424 100644
--- a/target-cris/op_helper.c
+++ b/target-cris/op_helper.c
@@ -57,7 +57,6 @@
 void tlb_fill(CPUCRISState *env, target_ulong addr, int is_write, int mmu_idx,
               uintptr_t retaddr)
 {
-    TranslationBlock *tb;
     int ret;
 
     D_LOG("%s pc=%x tpc=%x ra=%p\n", __func__,
@@ -66,12 +65,7 @@ void tlb_fill(CPUCRISState *env, target_ulong addr, int is_write, int mmu_idx,
     if (unlikely(ret)) {
         if (retaddr) {
             /* now we have a real cpu fault */
-            tb = tb_find_pc(retaddr);
-            if (tb) {
-                /* the PC is inside the translated code. It means that we have
-                   a virtual CPU fault */
-                cpu_restore_state(tb, env, retaddr);
-
+            if (cpu_restore_state(env, retaddr)) {
 		/* Evaluate flags after retranslation.  */
                 helper_top_evaluate_flags(env);
             }
diff --git a/target-i386/helper.c b/target-i386/helper.c
index bf206cf..00341c5 100644
--- a/target-i386/helper.c
+++ b/target-i386/helper.c
@@ -1196,15 +1196,12 @@ void cpu_x86_inject_mce(Monitor *mon, X86CPU *cpu, int bank,
 
 void cpu_report_tpr_access(CPUX86State *env, TPRAccess access)
 {
-    TranslationBlock *tb;
-
     if (kvm_enabled()) {
         env->tpr_access_type = access;
 
         cpu_interrupt(env, CPU_INTERRUPT_TPR);
     } else {
-        tb = tb_find_pc(env->mem_io_pc);
-        cpu_restore_state(tb, env, env->mem_io_pc);
+        cpu_restore_state(env, env->mem_io_pc);
 
         apic_handle_tpr_access_report(env->apic_state, env->eip, access);
     }
diff --git a/target-i386/mem_helper.c b/target-i386/mem_helper.c
index 7f99c7c..d0be77b 100644
--- a/target-i386/mem_helper.c
+++ b/target-i386/mem_helper.c
@@ -135,19 +135,13 @@ void helper_boundl(CPUX86State *env, target_ulong a0, int v)
 void tlb_fill(CPUX86State *env, target_ulong addr, int is_write, int mmu_idx,
               uintptr_t retaddr)
 {
-    TranslationBlock *tb;
     int ret;
 
     ret = cpu_x86_handle_mmu_fault(env, addr, is_write, mmu_idx);
     if (ret) {
         if (retaddr) {
             /* now we have a real cpu fault */
-            tb = tb_find_pc(retaddr);
-            if (tb) {
-                /* the PC is inside the translated code. It means that we have
-                   a virtual CPU fault */
-                cpu_restore_state(tb, env, retaddr);
-            }
+            cpu_restore_state(env, retaddr);
         }
         raise_exception_err(env, env->exception_index, env->error_code);
     }
diff --git a/target-lm32/op_helper.c b/target-lm32/op_helper.c
index 7b91d8c..97b9625 100644
--- a/target-lm32/op_helper.c
+++ b/target-lm32/op_helper.c
@@ -76,19 +76,13 @@ uint32_t helper_rcsr_jrx(CPULM32State *env)
 void tlb_fill(CPULM32State *env, target_ulong addr, int is_write, int mmu_idx,
               uintptr_t retaddr)
 {
-    TranslationBlock *tb;
     int ret;
 
     ret = cpu_lm32_handle_mmu_fault(env, addr, is_write, mmu_idx);
     if (unlikely(ret)) {
         if (retaddr) {
             /* now we have a real cpu fault */
-            tb = tb_find_pc(retaddr);
-            if (tb) {
-                /* the PC is inside the translated code. It means that we have
-                   a virtual CPU fault */
-                cpu_restore_state(tb, env, retaddr);
-            }
+            cpu_restore_state(env, retaddr);
         }
         cpu_loop_exit(env);
     }
diff --git a/target-m68k/op_helper.c b/target-m68k/op_helper.c
index aa00504..b97ba5e 100644
--- a/target-m68k/op_helper.c
+++ b/target-m68k/op_helper.c
@@ -56,19 +56,13 @@ extern int semihosting_enabled;
 void tlb_fill(CPUM68KState *env, target_ulong addr, int is_write, int mmu_idx,
               uintptr_t retaddr)
 {
-    TranslationBlock *tb;
     int ret;
 
     ret = cpu_m68k_handle_mmu_fault(env, addr, is_write, mmu_idx);
     if (unlikely(ret)) {
         if (retaddr) {
             /* now we have a real cpu fault */
-            tb = tb_find_pc(retaddr);
-            if (tb) {
-                /* the PC is inside the translated code. It means that we have
-                   a virtual CPU fault */
-                cpu_restore_state(tb, env, retaddr);
-            }
+            cpu_restore_state(env, retaddr);
         }
         cpu_loop_exit(env);
     }
diff --git a/target-microblaze/op_helper.c b/target-microblaze/op_helper.c
index 210296b..7593517 100644
--- a/target-microblaze/op_helper.c
+++ b/target-microblaze/op_helper.c
@@ -44,19 +44,13 @@
 void tlb_fill(CPUMBState *env, target_ulong addr, int is_write, int mmu_idx,
               uintptr_t retaddr)
 {
-    TranslationBlock *tb;
     int ret;
 
     ret = cpu_mb_handle_mmu_fault(env, addr, is_write, mmu_idx);
     if (unlikely(ret)) {
         if (retaddr) {
             /* now we have a real cpu fault */
-            tb = tb_find_pc(retaddr);
-            if (tb) {
-                /* the PC is inside the translated code. It means that we have
-                   a virtual CPU fault */
-                cpu_restore_state(tb, env, retaddr);
-            }
+            cpu_restore_state(env, retaddr);
         }
         cpu_loop_exit(env);
     }
diff --git a/target-mips/op_helper.c b/target-mips/op_helper.c
index f45d494..2972ae3 100644
--- a/target-mips/op_helper.c
+++ b/target-mips/op_helper.c
@@ -38,7 +38,6 @@ static inline void QEMU_NORETURN do_raise_exception_err(CPUMIPSState *env,
                                                         int error_code,
                                                         uintptr_t pc)
 {
-    TranslationBlock *tb;
 #if 1
     if (exception < 0x100)
         qemu_log("%s: %d %d\n", __func__, exception, error_code);
@@ -48,12 +47,7 @@ static inline void QEMU_NORETURN do_raise_exception_err(CPUMIPSState *env,
 
     if (pc) {
         /* now we have a real cpu fault */
-        tb = tb_find_pc(pc);
-        if (tb) {
-            /* the PC is inside the translated code. It means that we have
-               a virtual CPU fault */
-            cpu_restore_state(tb, env, pc);
-        }
+        cpu_restore_state(env, pc);
     }
 
     cpu_loop_exit(env);
diff --git a/target-openrisc/mmu_helper.c b/target-openrisc/mmu_helper.c
index 59ed371..d2edebc 100644
--- a/target-openrisc/mmu_helper.c
+++ b/target-openrisc/mmu_helper.c
@@ -39,8 +39,6 @@
 void tlb_fill(CPUOpenRISCState *env, target_ulong addr, int is_write,
               int mmu_idx, uintptr_t retaddr)
 {
-    TranslationBlock *tb;
-    unsigned long pc;
     int ret;
 
     ret = cpu_openrisc_handle_mmu_fault(env, addr, is_write, mmu_idx);
@@ -48,13 +46,7 @@ void tlb_fill(CPUOpenRISCState *env, target_ulong addr, int is_write,
     if (ret) {
         if (retaddr) {
             /* now we have a real cpu fault.  */
-            pc = (unsigned long)retaddr;
-            tb = tb_find_pc(pc);
-            if (tb) {
-                /* the PC is inside the translated code. It means that we
-                   have a virtual CPU fault.  */
-                cpu_restore_state(tb, env, pc);
-            }
+            cpu_restore_state(env, retaddr);
         }
         /* Raise Exception.  */
         cpu_loop_exit(env);
diff --git a/target-ppc/mem_helper.c b/target-ppc/mem_helper.c
index 5b5f1bd..04c0144 100644
--- a/target-ppc/mem_helper.c
+++ b/target-ppc/mem_helper.c
@@ -275,19 +275,13 @@ STVE(stvewx, cpu_stl_data, bswap32, u32)
 void tlb_fill(CPUPPCState *env, target_ulong addr, int is_write, int mmu_idx,
               uintptr_t retaddr)
 {
-    TranslationBlock *tb;
     int ret;
 
     ret = cpu_ppc_handle_mmu_fault(env, addr, is_write, mmu_idx);
     if (unlikely(ret != 0)) {
         if (likely(retaddr)) {
             /* now we have a real cpu fault */
-            tb = tb_find_pc(retaddr);
-            if (likely(tb)) {
-                /* the PC is inside the translated code. It means that we have
-                   a virtual CPU fault */
-                cpu_restore_state(tb, env, retaddr);
-            }
+            cpu_restore_state(env, retaddr);
         }
         helper_raise_exception_err(env, env->exception_index, env->error_code);
     }
diff --git a/target-s390x/mem_helper.c b/target-s390x/mem_helper.c
index 6ebc22d..91b25e3 100644
--- a/target-s390x/mem_helper.c
+++ b/target-s390x/mem_helper.c
@@ -47,19 +47,13 @@
 void tlb_fill(CPUS390XState *env, target_ulong addr, int is_write, int mmu_idx,
               uintptr_t retaddr)
 {
-    TranslationBlock *tb;
     int ret;
 
     ret = cpu_s390x_handle_mmu_fault(env, addr, is_write, mmu_idx);
     if (unlikely(ret != 0)) {
         if (likely(retaddr)) {
             /* now we have a real cpu fault */
-            tb = tb_find_pc(retaddr);
-            if (likely(tb)) {
-                /* the PC is inside the translated code. It means that we have
-                   a virtual CPU fault */
-                cpu_restore_state(tb, env, retaddr);
-            }
+            cpu_restore_state(env, retaddr);
         }
         cpu_loop_exit(env);
     }
diff --git a/target-sh4/op_helper.c b/target-sh4/op_helper.c
index 60ec4cb..e8e87f5 100644
--- a/target-sh4/op_helper.c
+++ b/target-sh4/op_helper.c
@@ -21,21 +21,6 @@
 #include "cpu.h"
 #include "helper.h"
 
-static inline void cpu_restore_state_from_retaddr(CPUSH4State *env,
-                                                  uintptr_t retaddr)
-{
-    TranslationBlock *tb;
-
-    if (retaddr) {
-        tb = tb_find_pc(retaddr);
-        if (tb) {
-            /* the PC is inside the translated code. It means that we have
-               a virtual CPU fault */
-            cpu_restore_state(tb, env, retaddr);
-        }
-    }
-}
-
 #ifndef CONFIG_USER_ONLY
 #include "softmmu_exec.h"
 
@@ -61,7 +46,9 @@ void tlb_fill(CPUSH4State *env, target_ulong addr, int is_write, int mmu_idx,
     ret = cpu_sh4_handle_mmu_fault(env, addr, is_write, mmu_idx);
     if (ret) {
         /* now we have a real cpu fault */
-        cpu_restore_state_from_retaddr(env, retaddr);
+        if (retaddr) {
+            cpu_restore_state(env, retaddr);
+        }
         cpu_loop_exit(env);
     }
 }
@@ -82,7 +69,9 @@ static inline void QEMU_NORETURN raise_exception(CPUSH4State *env, int index,
                                                  uintptr_t retaddr)
 {
     env->exception_index = index;
-    cpu_restore_state_from_retaddr(env, retaddr);
+    if (retaddr) {
+        cpu_restore_state(env, retaddr);
+    }
     cpu_loop_exit(env);
 }
 
diff --git a/target-sparc/cpu.h b/target-sparc/cpu.h
index 375f20a..013ecbd 100644
--- a/target-sparc/cpu.h
+++ b/target-sparc/cpu.h
@@ -710,7 +710,6 @@ uint64_t cpu_tick_get_count(CPUTimer *timer);
 void cpu_tick_set_limit(CPUTimer *timer, uint64_t limit);
 trap_state* cpu_tsptr(CPUSPARCState* env);
 #endif
-void cpu_restore_state2(CPUSPARCState *env, uintptr_t retaddr);
 
 #define TB_FLAG_FPU_ENABLED (1 << 4)
 #define TB_FLAG_AM_ENABLED (1 << 5)
diff --git a/target-sparc/helper.c b/target-sparc/helper.c
index 556ac28..3c8e865 100644
--- a/target-sparc/helper.c
+++ b/target-sparc/helper.c
@@ -75,7 +75,7 @@ static target_ulong helper_udiv_common(CPUSPARCState *env, target_ulong a,
     x1 = (b & 0xffffffff);
 
     if (x1 == 0) {
-        cpu_restore_state2(env, GETPC());
+        cpu_restore_state(env, GETPC());
         helper_raise_exception(env, TT_DIV_ZERO);
     }
 
@@ -114,7 +114,7 @@ static target_ulong helper_sdiv_common(CPUSPARCState *env, target_ulong a,
     x1 = (b & 0xffffffff);
 
     if (x1 == 0) {
-        cpu_restore_state2(env, GETPC());
+        cpu_restore_state(env, GETPC());
         helper_raise_exception(env, TT_DIV_ZERO);
     }
 
@@ -147,7 +147,7 @@ int64_t helper_sdivx(CPUSPARCState *env, int64_t a, int64_t b)
 {
     if (b == 0) {
         /* Raise divide by zero trap.  */
-        cpu_restore_state2(env, GETPC());
+        cpu_restore_state(env, GETPC());
         helper_raise_exception(env, TT_DIV_ZERO);
     } else if (b == -1) {
         /* Avoid overflow trap with i386 divide insn.  */
@@ -161,7 +161,7 @@ uint64_t helper_udivx(CPUSPARCState *env, uint64_t a, uint64_t b)
 {
     if (b == 0) {
         /* Raise divide by zero trap.  */
-        cpu_restore_state2(env, GETPC());
+        cpu_restore_state(env, GETPC());
         helper_raise_exception(env, TT_DIV_ZERO);
     }
     return a / b;
@@ -193,7 +193,7 @@ target_ulong helper_taddcctv(CPUSPARCState *env, target_ulong src1,
     return dst;
 
  tag_overflow:
-    cpu_restore_state2(env, GETPC());
+    cpu_restore_state(env, GETPC());
     helper_raise_exception(env, TT_TOVF);
 }
 
@@ -222,6 +222,6 @@ target_ulong helper_tsubcctv(CPUSPARCState *env, target_ulong src1,
     return dst;
 
  tag_overflow:
-    cpu_restore_state2(env, GETPC());
+    cpu_restore_state(env, GETPC());
     helper_raise_exception(env, TT_TOVF);
 }
diff --git a/target-sparc/ldst_helper.c b/target-sparc/ldst_helper.c
index f3e08fd..8d815e5 100644
--- a/target-sparc/ldst_helper.c
+++ b/target-sparc/ldst_helper.c
@@ -2393,22 +2393,6 @@ void cpu_unassigned_access(CPUSPARCState *env, hwaddr addr,
 #endif
 #endif
 
-/* XXX: make it generic ? */
-void cpu_restore_state2(CPUSPARCState *env, uintptr_t retaddr)
-{
-    TranslationBlock *tb;
-
-    if (retaddr) {
-        /* now we have a real cpu fault */
-        tb = tb_find_pc(retaddr);
-        if (tb) {
-            /* the PC is inside the translated code. It means that we have
-               a virtual CPU fault */
-            cpu_restore_state(tb, env, retaddr);
-        }
-    }
-}
-
 #if !defined(CONFIG_USER_ONLY)
 static void QEMU_NORETURN do_unaligned_access(CPUSPARCState *env,
                                               target_ulong addr, int is_write,
@@ -2418,7 +2402,9 @@ static void QEMU_NORETURN do_unaligned_access(CPUSPARCState *env,
     printf("Unaligned access to 0x" TARGET_FMT_lx " from 0x" TARGET_FMT_lx
            "\n", addr, env->pc);
 #endif
-    cpu_restore_state2(env, retaddr);
+    if (retaddr) {
+        cpu_restore_state(env, retaddr);
+    }
     helper_raise_exception(env, TT_UNALIGNED);
 }
 
@@ -2433,7 +2419,9 @@ void tlb_fill(CPUSPARCState *env, target_ulong addr, int is_write, int mmu_idx,
 
     ret = cpu_sparc_handle_mmu_fault(env, addr, is_write, mmu_idx);
     if (ret) {
-        cpu_restore_state2(env, retaddr);
+        if (retaddr) {
+            cpu_restore_state(env, retaddr);
+        }
         cpu_loop_exit(env);
     }
 }
diff --git a/target-unicore32/op_helper.c b/target-unicore32/op_helper.c
index f474d1b..b8172ba 100644
--- a/target-unicore32/op_helper.c
+++ b/target-unicore32/op_helper.c
@@ -256,20 +256,13 @@ uint32_t HELPER(ror_cc)(CPUUniCore32State *env, uint32_t x, uint32_t i)
 void tlb_fill(CPUUniCore32State *env, target_ulong addr, int is_write,
               int mmu_idx, uintptr_t retaddr)
 {
-    TranslationBlock *tb;
-    unsigned long pc;
     int ret;
 
     ret = uc32_cpu_handle_mmu_fault(env, addr, is_write, mmu_idx);
     if (unlikely(ret)) {
         if (retaddr) {
             /* now we have a real cpu fault */
-            pc = (unsigned long)retaddr;
-            tb = tb_find_pc(pc);
-            if (tb) {/* the PC is inside the translated code.
-                        It means that we have a virtual CPU fault */
-                cpu_restore_state(tb, env, pc);
-            }
+            cpu_restore_state(env, retaddr);
         }
         cpu_loop_exit(env);
     }
diff --git a/target-xtensa/op_helper.c b/target-xtensa/op_helper.c
index 0e0f21d..84f0449 100644
--- a/target-xtensa/op_helper.c
+++ b/target-xtensa/op_helper.c
@@ -47,22 +47,12 @@ static void do_unaligned_access(CPUXtensaState *env,
 #define SHIFT 3
 #include "softmmu_template.h"
 
-static void do_restore_state(CPUXtensaState *env, uintptr_t pc)
-{
-    TranslationBlock *tb;
-
-    tb = tb_find_pc(pc);
-    if (tb) {
-        cpu_restore_state(tb, env, pc);
-    }
-}
-
 static void do_unaligned_access(CPUXtensaState *env,
         target_ulong addr, int is_write, int is_user, uintptr_t retaddr)
 {
     if (xtensa_option_enabled(env->config, XTENSA_OPTION_UNALIGNED_EXCEPTION) &&
             !xtensa_option_enabled(env->config, XTENSA_OPTION_HW_ALIGNMENT)) {
-        do_restore_state(env, retaddr);
+        cpu_restore_state(env, retaddr);
         HELPER(exception_cause_vaddr)(env,
                 env->pc, LOAD_STORE_ALIGNMENT_CAUSE, addr);
     }
@@ -86,7 +76,7 @@ void tlb_fill(CPUXtensaState *env,
                 paddr & TARGET_PAGE_MASK,
                 access, mmu_idx, page_size);
     } else {
-        do_restore_state(env, retaddr);
+        cpu_restore_state(env, retaddr);
         HELPER(exception_cause_vaddr)(env, env->pc, ret, vaddr);
     }
 }
diff --git a/translate-all.c b/translate-all.c
index b958342..164870a 100644
--- a/translate-all.c
+++ b/translate-all.c
@@ -142,6 +142,7 @@ TCGContext tcg_ctx;
 
 static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
                          tb_page_addr_t phys_page2);
+static TranslationBlock *tb_find_pc(uintptr_t tc_ptr);
 
 void cpu_gen_init(void)
 {
@@ -211,8 +212,8 @@ int cpu_gen_code(CPUArchState *env, TranslationBlock *tb, int *gen_code_size_ptr
 
 /* The cpu state corresponding to 'searched_pc' is restored.
  */
-int cpu_restore_state(TranslationBlock *tb,
-                      CPUArchState *env, uintptr_t searched_pc)
+static int cpu_restore_state_from_tb(TranslationBlock *tb, CPUArchState *env,
+                                     uintptr_t searched_pc)
 {
     TCGContext *s = &tcg_ctx;
     int j;
@@ -266,6 +267,18 @@ int cpu_restore_state(TranslationBlock *tb,
     return 0;
 }
 
+bool cpu_restore_state(CPUArchState *env, uintptr_t retaddr)
+{
+    TranslationBlock *tb;
+
+    tb = tb_find_pc(retaddr);
+    if (tb) {
+        cpu_restore_state_from_tb(tb, env, retaddr);
+        return true;
+    }
+    return false;
+}
+
 #ifdef _WIN32
 static inline void map_exec(void *addr, long size)
 {
@@ -1057,7 +1070,7 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
                 restore the CPU state */
 
                 current_tb_modified = 1;
-                cpu_restore_state(current_tb, env, env->mem_io_pc);
+                cpu_restore_state_from_tb(current_tb, env, env->mem_io_pc);
                 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
                                      &current_flags);
             }
@@ -1171,7 +1184,7 @@ static void tb_invalidate_phys_page(tb_page_addr_t addr,
                    restore the CPU state */
 
             current_tb_modified = 1;
-            cpu_restore_state(current_tb, env, pc);
+            cpu_restore_state_from_tb(current_tb, env, pc);
             cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
                                  &current_flags);
         }
@@ -1308,7 +1321,7 @@ bool is_tcg_gen_code(uintptr_t tc_ptr)
 
 /* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
    tb[1].tc_ptr. Return NULL if not found */
-TranslationBlock *tb_find_pc(uintptr_t tc_ptr)
+static TranslationBlock *tb_find_pc(uintptr_t tc_ptr)
 {
     int m_min, m_max, m;
     uintptr_t v;
@@ -1435,7 +1448,7 @@ void tb_check_watchpoint(CPUArchState *env)
         cpu_abort(env, "check_watchpoint: could not find TB for pc=%p",
                   (void *)env->mem_io_pc);
     }
-    cpu_restore_state(tb, env, env->mem_io_pc);
+    cpu_restore_state_from_tb(tb, env, env->mem_io_pc);
     tb_phys_invalidate(tb, -1);
 }
 
@@ -1486,7 +1499,7 @@ void cpu_io_recompile(CPUArchState *env, uintptr_t retaddr)
                   (void *)retaddr);
     }
     n = env->icount_decr.u16.low + tb->icount;
-    cpu_restore_state(tb, env, retaddr);
+    cpu_restore_state_from_tb(tb, env, retaddr);
     /* Calculate how many instructions had been executed before the fault
        occurred.  */
     n = n - env->icount_decr.u16.low;
diff --git a/user-exec.c b/user-exec.c
index ef9b172..1185cb0 100644
--- a/user-exec.c
+++ b/user-exec.c
@@ -81,7 +81,6 @@ static inline int handle_cpu_signal(uintptr_t pc, unsigned long address,
                                     int is_write, sigset_t *old_set,
                                     void *puc)
 {
-    TranslationBlock *tb;
     int ret;
 
 #if defined(DEBUG_SIGNAL)
@@ -104,12 +103,7 @@ static inline int handle_cpu_signal(uintptr_t pc, unsigned long address,
         return 1; /* the MMU fault was handled without causing real CPU fault */
     }
     /* now we have a real cpu fault */
-    tb = tb_find_pc(pc);
-    if (tb) {
-        /* the PC is inside the translated code. It means that we have
-           a virtual CPU fault */
-        cpu_restore_state(tb, cpu_single_env, pc);
-    }
+    cpu_restore_state(cpu_single_env, pc);
 
     /* we restore the process signal mask as the sigreturn should
        do it (XXX: use sigsetjmp) */
commit 5b6dd8683dc30e8e0970db3dd9176732dc819410
Author: Blue Swirl <blauwirbel at gmail.com>
Date:   Sun Dec 2 16:04:43 2012 +0000

    exec: move TB handling to translate-all.c
    
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/exec.c b/exec.c
index a1f617d..4c1246a 100644
--- a/exec.c
+++ b/exec.c
@@ -1,5 +1,5 @@
 /*
- *  virtual page mapping and translated block handling
+ *  Virtual page mapping
  *
  *  Copyright (c) 2003 Fabrice Bellard
  *
@@ -38,63 +38,20 @@
 #include "exec-memory.h"
 #if defined(CONFIG_USER_ONLY)
 #include <qemu.h>
-#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
-#include <sys/param.h>
-#if __FreeBSD_version >= 700104
-#define HAVE_KINFO_GETVMMAP
-#define sigqueue sigqueue_freebsd  /* avoid redefinition */
-#include <sys/time.h>
-#include <sys/proc.h>
-#include <machine/profile.h>
-#define _KERNEL
-#include <sys/user.h>
-#undef _KERNEL
-#undef sigqueue
-#include <libutil.h>
-#endif
-#endif
 #else /* !CONFIG_USER_ONLY */
 #include "xen-mapcache.h"
 #include "trace.h"
 #endif
 
 #include "cputlb.h"
+#include "translate-all.h"
 
 #include "memory-internal.h"
 
-//#define DEBUG_TB_INVALIDATE
-//#define DEBUG_FLUSH
 //#define DEBUG_UNASSIGNED
-
-/* make various TB consistency checks */
-//#define DEBUG_TB_CHECK
-
-//#define DEBUG_IOPORT
 //#define DEBUG_SUBPAGE
 
 #if !defined(CONFIG_USER_ONLY)
-/* TB consistency checks only implemented for usermode emulation.  */
-#undef DEBUG_TB_CHECK
-#endif
-
-#define SMC_BITMAP_USE_THRESHOLD 10
-
-/* Code generation and translation blocks */
-static TranslationBlock *tbs;
-static int code_gen_max_blocks;
-TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
-static int nb_tbs;
-/* any access to the tbs or the page table must use this lock */
-spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
-
-uint8_t *code_gen_prologue;
-static uint8_t *code_gen_buffer;
-static size_t code_gen_buffer_size;
-/* threshold to flush the translated code buffer */
-static size_t code_gen_buffer_max_size;
-static uint8_t *code_gen_ptr;
-
-#if !defined(CONFIG_USER_ONLY)
 int phys_ram_fd;
 static int in_migration;
 
@@ -121,59 +78,6 @@ DEFINE_TLS(CPUArchState *,cpu_single_env);
    2 = Adaptive rate instruction counting.  */
 int use_icount = 0;
 
-typedef struct PageDesc {
-    /* list of TBs intersecting this ram page */
-    TranslationBlock *first_tb;
-    /* in order to optimize self modifying code, we count the number
-       of lookups we do to a given page to use a bitmap */
-    unsigned int code_write_count;
-    uint8_t *code_bitmap;
-#if defined(CONFIG_USER_ONLY)
-    unsigned long flags;
-#endif
-} PageDesc;
-
-/* In system mode we want L1_MAP to be based on ram offsets,
-   while in user mode we want it to be based on virtual addresses.  */
-#if !defined(CONFIG_USER_ONLY)
-#if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
-# define L1_MAP_ADDR_SPACE_BITS  HOST_LONG_BITS
-#else
-# define L1_MAP_ADDR_SPACE_BITS  TARGET_PHYS_ADDR_SPACE_BITS
-#endif
-#else
-# define L1_MAP_ADDR_SPACE_BITS  TARGET_VIRT_ADDR_SPACE_BITS
-#endif
-
-/* Size of the L2 (and L3, etc) page tables.  */
-#define L2_BITS 10
-#define L2_SIZE (1 << L2_BITS)
-
-#define P_L2_LEVELS \
-    (((TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / L2_BITS) + 1)
-
-/* The bits remaining after N lower levels of page tables.  */
-#define V_L1_BITS_REM \
-    ((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
-
-#if V_L1_BITS_REM < 4
-#define V_L1_BITS  (V_L1_BITS_REM + L2_BITS)
-#else
-#define V_L1_BITS  V_L1_BITS_REM
-#endif
-
-#define V_L1_SIZE  ((target_ulong)1 << V_L1_BITS)
-
-#define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS)
-
-uintptr_t qemu_real_host_page_size;
-uintptr_t qemu_host_page_size;
-uintptr_t qemu_host_page_mask;
-
-/* This is a multi-level map on the virtual address space.
-   The bottom level has pointers to PageDesc.  */
-static void *l1_map[V_L1_SIZE];
-
 #if !defined(CONFIG_USER_ONLY)
 
 static MemoryRegionSection *phys_sections;
@@ -195,180 +99,6 @@ static void *qemu_safe_ram_ptr(ram_addr_t addr);
 
 static MemoryRegion io_mem_watch;
 #endif
-static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
-                         tb_page_addr_t phys_page2);
-
-/* statistics */
-static int tb_flush_count;
-static int tb_phys_invalidate_count;
-
-#ifdef _WIN32
-static inline void map_exec(void *addr, long size)
-{
-    DWORD old_protect;
-    VirtualProtect(addr, size,
-                   PAGE_EXECUTE_READWRITE, &old_protect);
-}
-#else
-static inline void map_exec(void *addr, long size)
-{
-    unsigned long start, end, page_size;
-
-    page_size = getpagesize();
-    start = (unsigned long)addr;
-    start &= ~(page_size - 1);
-
-    end = (unsigned long)addr + size;
-    end += page_size - 1;
-    end &= ~(page_size - 1);
-
-    mprotect((void *)start, end - start,
-             PROT_READ | PROT_WRITE | PROT_EXEC);
-}
-#endif
-
-static void page_init(void)
-{
-    /* NOTE: we can always suppose that qemu_host_page_size >=
-       TARGET_PAGE_SIZE */
-#ifdef _WIN32
-    {
-        SYSTEM_INFO system_info;
-
-        GetSystemInfo(&system_info);
-        qemu_real_host_page_size = system_info.dwPageSize;
-    }
-#else
-    qemu_real_host_page_size = getpagesize();
-#endif
-    if (qemu_host_page_size == 0) {
-        qemu_host_page_size = qemu_real_host_page_size;
-    }
-    if (qemu_host_page_size < TARGET_PAGE_SIZE) {
-        qemu_host_page_size = TARGET_PAGE_SIZE;
-    }
-    qemu_host_page_mask = ~(qemu_host_page_size - 1);
-
-#if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
-    {
-#ifdef HAVE_KINFO_GETVMMAP
-        struct kinfo_vmentry *freep;
-        int i, cnt;
-
-        freep = kinfo_getvmmap(getpid(), &cnt);
-        if (freep) {
-            mmap_lock();
-            for (i = 0; i < cnt; i++) {
-                unsigned long startaddr, endaddr;
-
-                startaddr = freep[i].kve_start;
-                endaddr = freep[i].kve_end;
-                if (h2g_valid(startaddr)) {
-                    startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
-
-                    if (h2g_valid(endaddr)) {
-                        endaddr = h2g(endaddr);
-                        page_set_flags(startaddr, endaddr, PAGE_RESERVED);
-                    } else {
-#if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
-                        endaddr = ~0ul;
-                        page_set_flags(startaddr, endaddr, PAGE_RESERVED);
-#endif
-                    }
-                }
-            }
-            free(freep);
-            mmap_unlock();
-        }
-#else
-        FILE *f;
-
-        last_brk = (unsigned long)sbrk(0);
-
-        f = fopen("/compat/linux/proc/self/maps", "r");
-        if (f) {
-            mmap_lock();
-
-            do {
-                unsigned long startaddr, endaddr;
-                int n;
-
-                n = fscanf(f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
-
-                if (n == 2 && h2g_valid(startaddr)) {
-                    startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
-
-                    if (h2g_valid(endaddr)) {
-                        endaddr = h2g(endaddr);
-                    } else {
-                        endaddr = ~0ul;
-                    }
-                    page_set_flags(startaddr, endaddr, PAGE_RESERVED);
-                }
-            } while (!feof(f));
-
-            fclose(f);
-            mmap_unlock();
-        }
-#endif
-    }
-#endif
-}
-
-static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
-{
-    PageDesc *pd;
-    void **lp;
-    int i;
-
-#if defined(CONFIG_USER_ONLY)
-    /* We can't use g_malloc because it may recurse into a locked mutex. */
-# define ALLOC(P, SIZE)                                 \
-    do {                                                \
-        P = mmap(NULL, SIZE, PROT_READ | PROT_WRITE,    \
-                 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);   \
-    } while (0)
-#else
-# define ALLOC(P, SIZE) \
-    do { P = g_malloc0(SIZE); } while (0)
-#endif
-
-    /* Level 1.  Always allocated.  */
-    lp = l1_map + ((index >> V_L1_SHIFT) & (V_L1_SIZE - 1));
-
-    /* Level 2..N-1.  */
-    for (i = V_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
-        void **p = *lp;
-
-        if (p == NULL) {
-            if (!alloc) {
-                return NULL;
-            }
-            ALLOC(p, sizeof(void *) * L2_SIZE);
-            *lp = p;
-        }
-
-        lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
-    }
-
-    pd = *lp;
-    if (pd == NULL) {
-        if (!alloc) {
-            return NULL;
-        }
-        ALLOC(pd, sizeof(PageDesc) * L2_SIZE);
-        *lp = pd;
-    }
-
-#undef ALLOC
-
-    return pd + (index & (L2_SIZE - 1));
-}
-
-static inline PageDesc *page_find(tb_page_addr_t index)
-{
-    return page_find_alloc(index, 0);
-}
 
 #if !defined(CONFIG_USER_ONLY)
 
@@ -474,1046 +204,93 @@ bool memory_region_is_unassigned(MemoryRegion *mr)
 {
     return mr != &io_mem_ram && mr != &io_mem_rom
         && mr != &io_mem_notdirty && !mr->rom_device
-        && mr != &io_mem_watch;
-}
-
-#define mmap_lock() do { } while(0)
-#define mmap_unlock() do { } while(0)
-#endif
-
-#if defined(CONFIG_USER_ONLY)
-/* Currently it is not recommended to allocate big chunks of data in
-   user mode. It will change when a dedicated libc will be used.  */
-/* ??? 64-bit hosts ought to have no problem mmaping data outside the
-   region in which the guest needs to run.  Revisit this.  */
-#define USE_STATIC_CODE_GEN_BUFFER
-#endif
-
-/* ??? Should configure for this, not list operating systems here.  */
-#if (defined(__linux__) \
-    || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
-    || defined(__DragonFly__) || defined(__OpenBSD__) \
-    || defined(__NetBSD__))
-# define USE_MMAP
-#endif
-
-/* Minimum size of the code gen buffer.  This number is randomly chosen,
-   but not so small that we can't have a fair number of TB's live.  */
-#define MIN_CODE_GEN_BUFFER_SIZE     (1024u * 1024)
-
-/* Maximum size of the code gen buffer we'd like to use.  Unless otherwise
-   indicated, this is constrained by the range of direct branches on the
-   host cpu, as used by the TCG implementation of goto_tb.  */
-#if defined(__x86_64__)
-# define MAX_CODE_GEN_BUFFER_SIZE  (2ul * 1024 * 1024 * 1024)
-#elif defined(__sparc__)
-# define MAX_CODE_GEN_BUFFER_SIZE  (2ul * 1024 * 1024 * 1024)
-#elif defined(__arm__)
-# define MAX_CODE_GEN_BUFFER_SIZE  (16u * 1024 * 1024)
-#elif defined(__s390x__)
-  /* We have a +- 4GB range on the branches; leave some slop.  */
-# define MAX_CODE_GEN_BUFFER_SIZE  (3ul * 1024 * 1024 * 1024)
-#else
-# define MAX_CODE_GEN_BUFFER_SIZE  ((size_t)-1)
-#endif
-
-#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (32u * 1024 * 1024)
-
-#define DEFAULT_CODE_GEN_BUFFER_SIZE \
-  (DEFAULT_CODE_GEN_BUFFER_SIZE_1 < MAX_CODE_GEN_BUFFER_SIZE \
-   ? DEFAULT_CODE_GEN_BUFFER_SIZE_1 : MAX_CODE_GEN_BUFFER_SIZE)
-
-static inline size_t size_code_gen_buffer(size_t tb_size)
-{
-    /* Size the buffer.  */
-    if (tb_size == 0) {
-#ifdef USE_STATIC_CODE_GEN_BUFFER
-        tb_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
-#else
-        /* ??? Needs adjustments.  */
-        /* ??? If we relax the requirement that CONFIG_USER_ONLY use the
-           static buffer, we could size this on RESERVED_VA, on the text
-           segment size of the executable, or continue to use the default.  */
-        tb_size = (unsigned long)(ram_size / 4);
-#endif
-    }
-    if (tb_size < MIN_CODE_GEN_BUFFER_SIZE) {
-        tb_size = MIN_CODE_GEN_BUFFER_SIZE;
-    }
-    if (tb_size > MAX_CODE_GEN_BUFFER_SIZE) {
-        tb_size = MAX_CODE_GEN_BUFFER_SIZE;
-    }
-    code_gen_buffer_size = tb_size;
-    return tb_size;
-}
-
-#ifdef USE_STATIC_CODE_GEN_BUFFER
-static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
-    __attribute__((aligned(CODE_GEN_ALIGN)));
-
-static inline void *alloc_code_gen_buffer(void)
-{
-    map_exec(static_code_gen_buffer, code_gen_buffer_size);
-    return static_code_gen_buffer;
-}
-#elif defined(USE_MMAP)
-static inline void *alloc_code_gen_buffer(void)
-{
-    int flags = MAP_PRIVATE | MAP_ANONYMOUS;
-    uintptr_t start = 0;
-    void *buf;
-
-    /* Constrain the position of the buffer based on the host cpu.
-       Note that these addresses are chosen in concert with the
-       addresses assigned in the relevant linker script file.  */
-# if defined(__PIE__) || defined(__PIC__)
-    /* Don't bother setting a preferred location if we're building
-       a position-independent executable.  We're more likely to get
-       an address near the main executable if we let the kernel
-       choose the address.  */
-# elif defined(__x86_64__) && defined(MAP_32BIT)
-    /* Force the memory down into low memory with the executable.
-       Leave the choice of exact location with the kernel.  */
-    flags |= MAP_32BIT;
-    /* Cannot expect to map more than 800MB in low memory.  */
-    if (code_gen_buffer_size > 800u * 1024 * 1024) {
-        code_gen_buffer_size = 800u * 1024 * 1024;
-    }
-# elif defined(__sparc__)
-    start = 0x40000000ul;
-# elif defined(__s390x__)
-    start = 0x90000000ul;
-# endif
-
-    buf = mmap((void *)start, code_gen_buffer_size,
-               PROT_WRITE | PROT_READ | PROT_EXEC, flags, -1, 0);
-    return buf == MAP_FAILED ? NULL : buf;
-}
-#else
-static inline void *alloc_code_gen_buffer(void)
-{
-    void *buf = g_malloc(code_gen_buffer_size);
-
-    if (buf) {
-        map_exec(buf, code_gen_buffer_size);
-    }
-    return buf;
-}
-#endif /* USE_STATIC_CODE_GEN_BUFFER, USE_MMAP */
-
-static inline void code_gen_alloc(size_t tb_size)
-{
-    code_gen_buffer_size = size_code_gen_buffer(tb_size);
-    code_gen_buffer = alloc_code_gen_buffer();
-    if (code_gen_buffer == NULL) {
-        fprintf(stderr, "Could not allocate dynamic translator buffer\n");
-        exit(1);
-    }
-
-    qemu_madvise(code_gen_buffer, code_gen_buffer_size, QEMU_MADV_HUGEPAGE);
-
-    /* Steal room for the prologue at the end of the buffer.  This ensures
-       (via the MAX_CODE_GEN_BUFFER_SIZE limits above) that direct branches
-       from TB's to the prologue are going to be in range.  It also means
-       that we don't need to mark (additional) portions of the data segment
-       as executable.  */
-    code_gen_prologue = code_gen_buffer + code_gen_buffer_size - 1024;
-    code_gen_buffer_size -= 1024;
-
-    code_gen_buffer_max_size = code_gen_buffer_size -
-        (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
-    code_gen_max_blocks = code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
-    tbs = g_malloc(code_gen_max_blocks * sizeof(TranslationBlock));
-}
-
-/* Must be called before using the QEMU cpus. 'tb_size' is the size
-   (in bytes) allocated to the translation buffer. Zero means default
-   size. */
-void tcg_exec_init(unsigned long tb_size)
-{
-    cpu_gen_init();
-    code_gen_alloc(tb_size);
-    code_gen_ptr = code_gen_buffer;
-    tcg_register_jit(code_gen_buffer, code_gen_buffer_size);
-    page_init();
-#if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE)
-    /* There's no guest base to take into account, so go ahead and
-       initialize the prologue now.  */
-    tcg_prologue_init(&tcg_ctx);
-#endif
-}
-
-bool tcg_enabled(void)
-{
-    return code_gen_buffer != NULL;
-}
-
-void cpu_exec_init_all(void)
-{
-#if !defined(CONFIG_USER_ONLY)
-    memory_map_init();
-    io_mem_init();
-#endif
-}
-
-#if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
-
-static int cpu_common_post_load(void *opaque, int version_id)
-{
-    CPUArchState *env = opaque;
-
-    /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
-       version_id is increased. */
-    env->interrupt_request &= ~0x01;
-    tlb_flush(env, 1);
-
-    return 0;
-}
-
-static const VMStateDescription vmstate_cpu_common = {
-    .name = "cpu_common",
-    .version_id = 1,
-    .minimum_version_id = 1,
-    .minimum_version_id_old = 1,
-    .post_load = cpu_common_post_load,
-    .fields      = (VMStateField []) {
-        VMSTATE_UINT32(halted, CPUArchState),
-        VMSTATE_UINT32(interrupt_request, CPUArchState),
-        VMSTATE_END_OF_LIST()
-    }
-};
-#endif
-
-CPUArchState *qemu_get_cpu(int cpu)
-{
-    CPUArchState *env = first_cpu;
-
-    while (env) {
-        if (env->cpu_index == cpu)
-            break;
-        env = env->next_cpu;
-    }
-
-    return env;
-}
-
-void cpu_exec_init(CPUArchState *env)
-{
-#ifndef CONFIG_USER_ONLY
-    CPUState *cpu = ENV_GET_CPU(env);
-#endif
-    CPUArchState **penv;
-    int cpu_index;
-
-#if defined(CONFIG_USER_ONLY)
-    cpu_list_lock();
-#endif
-    env->next_cpu = NULL;
-    penv = &first_cpu;
-    cpu_index = 0;
-    while (*penv != NULL) {
-        penv = &(*penv)->next_cpu;
-        cpu_index++;
-    }
-    env->cpu_index = cpu_index;
-    env->numa_node = 0;
-    QTAILQ_INIT(&env->breakpoints);
-    QTAILQ_INIT(&env->watchpoints);
-#ifndef CONFIG_USER_ONLY
-    cpu->thread_id = qemu_get_thread_id();
-#endif
-    *penv = env;
-#if defined(CONFIG_USER_ONLY)
-    cpu_list_unlock();
-#endif
-#if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
-    vmstate_register(NULL, cpu_index, &vmstate_cpu_common, env);
-    register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
-                    cpu_save, cpu_load, env);
-#endif
-}
-
-/* Allocate a new translation block. Flush the translation buffer if
-   too many translation blocks or too much generated code. */
-static TranslationBlock *tb_alloc(target_ulong pc)
-{
-    TranslationBlock *tb;
-
-    if (nb_tbs >= code_gen_max_blocks ||
-        (code_gen_ptr - code_gen_buffer) >= code_gen_buffer_max_size) {
-        return NULL;
-    }
-    tb = &tbs[nb_tbs++];
-    tb->pc = pc;
-    tb->cflags = 0;
-    return tb;
-}
-
-void tb_free(TranslationBlock *tb)
-{
-    /* In practice this is mostly used for single use temporary TB
-       Ignore the hard cases and just back up if this TB happens to
-       be the last one generated.  */
-    if (nb_tbs > 0 && tb == &tbs[nb_tbs - 1]) {
-        code_gen_ptr = tb->tc_ptr;
-        nb_tbs--;
-    }
-}
-
-static inline void invalidate_page_bitmap(PageDesc *p)
-{
-    if (p->code_bitmap) {
-        g_free(p->code_bitmap);
-        p->code_bitmap = NULL;
-    }
-    p->code_write_count = 0;
-}
-
-/* Set to NULL all the 'first_tb' fields in all PageDescs. */
-static void page_flush_tb_1(int level, void **lp)
-{
-    int i;
-
-    if (*lp == NULL) {
-        return;
-    }
-    if (level == 0) {
-        PageDesc *pd = *lp;
-
-        for (i = 0; i < L2_SIZE; ++i) {
-            pd[i].first_tb = NULL;
-            invalidate_page_bitmap(pd + i);
-        }
-    } else {
-        void **pp = *lp;
-
-        for (i = 0; i < L2_SIZE; ++i) {
-            page_flush_tb_1(level - 1, pp + i);
-        }
-    }
-}
-
-static void page_flush_tb(void)
-{
-    int i;
-
-    for (i = 0; i < V_L1_SIZE; i++) {
-        page_flush_tb_1(V_L1_SHIFT / L2_BITS - 1, l1_map + i);
-    }
-}
-
-/* flush all the translation blocks */
-/* XXX: tb_flush is currently not thread safe */
-void tb_flush(CPUArchState *env1)
-{
-    CPUArchState *env;
-
-#if defined(DEBUG_FLUSH)
-    printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
-           (unsigned long)(code_gen_ptr - code_gen_buffer),
-           nb_tbs, nb_tbs > 0 ?
-           ((unsigned long)(code_gen_ptr - code_gen_buffer)) / nb_tbs : 0);
-#endif
-    if ((unsigned long)(code_gen_ptr - code_gen_buffer)
-        > code_gen_buffer_size) {
-        cpu_abort(env1, "Internal error: code buffer overflow\n");
-    }
-    nb_tbs = 0;
-
-    for (env = first_cpu; env != NULL; env = env->next_cpu) {
-        memset(env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof(void *));
-    }
-
-    memset(tb_phys_hash, 0, CODE_GEN_PHYS_HASH_SIZE * sizeof(void *));
-    page_flush_tb();
-
-    code_gen_ptr = code_gen_buffer;
-    /* XXX: flush processor icache at this point if cache flush is
-       expensive */
-    tb_flush_count++;
-}
-
-#ifdef DEBUG_TB_CHECK
-
-static void tb_invalidate_check(target_ulong address)
-{
-    TranslationBlock *tb;
-    int i;
-
-    address &= TARGET_PAGE_MASK;
-    for (i = 0; i < CODE_GEN_PHYS_HASH_SIZE; i++) {
-        for (tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
-            if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
-                  address >= tb->pc + tb->size)) {
-                printf("ERROR invalidate: address=" TARGET_FMT_lx
-                       " PC=%08lx size=%04x\n",
-                       address, (long)tb->pc, tb->size);
-            }
-        }
-    }
-}
-
-/* verify that all the pages have correct rights for code */
-static void tb_page_check(void)
-{
-    TranslationBlock *tb;
-    int i, flags1, flags2;
-
-    for (i = 0; i < CODE_GEN_PHYS_HASH_SIZE; i++) {
-        for (tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
-            flags1 = page_get_flags(tb->pc);
-            flags2 = page_get_flags(tb->pc + tb->size - 1);
-            if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
-                printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
-                       (long)tb->pc, tb->size, flags1, flags2);
-            }
-        }
-    }
-}
-
-#endif
-
-
-/* invalidate one TB */
-static inline void tb_remove(TranslationBlock **ptb, TranslationBlock *tb,
-                             int next_offset)
-{
-    TranslationBlock *tb1;
-
-    for (;;) {
-        tb1 = *ptb;
-        if (tb1 == tb) {
-            *ptb = *(TranslationBlock **)((char *)tb1 + next_offset);
-            break;
-        }
-        ptb = (TranslationBlock **)((char *)tb1 + next_offset);
-    }
-}
-
-static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb)
-{
-    TranslationBlock *tb1;
-    unsigned int n1;
-
-    for (;;) {
-        tb1 = *ptb;
-        n1 = (uintptr_t)tb1 & 3;
-        tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
-        if (tb1 == tb) {
-            *ptb = tb1->page_next[n1];
-            break;
-        }
-        ptb = &tb1->page_next[n1];
-    }
-}
-
-static inline void tb_jmp_remove(TranslationBlock *tb, int n)
-{
-    TranslationBlock *tb1, **ptb;
-    unsigned int n1;
-
-    ptb = &tb->jmp_next[n];
-    tb1 = *ptb;
-    if (tb1) {
-        /* find tb(n) in circular list */
-        for (;;) {
-            tb1 = *ptb;
-            n1 = (uintptr_t)tb1 & 3;
-            tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
-            if (n1 == n && tb1 == tb) {
-                break;
-            }
-            if (n1 == 2) {
-                ptb = &tb1->jmp_first;
-            } else {
-                ptb = &tb1->jmp_next[n1];
-            }
-        }
-        /* now we can suppress tb(n) from the list */
-        *ptb = tb->jmp_next[n];
-
-        tb->jmp_next[n] = NULL;
-    }
-}
-
-/* reset the jump entry 'n' of a TB so that it is not chained to
-   another TB */
-static inline void tb_reset_jump(TranslationBlock *tb, int n)
-{
-    tb_set_jmp_target(tb, n, (uintptr_t)(tb->tc_ptr + tb->tb_next_offset[n]));
-}
-
-void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
-{
-    CPUArchState *env;
-    PageDesc *p;
-    unsigned int h, n1;
-    tb_page_addr_t phys_pc;
-    TranslationBlock *tb1, *tb2;
-
-    /* remove the TB from the hash list */
-    phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
-    h = tb_phys_hash_func(phys_pc);
-    tb_remove(&tb_phys_hash[h], tb,
-              offsetof(TranslationBlock, phys_hash_next));
-
-    /* remove the TB from the page list */
-    if (tb->page_addr[0] != page_addr) {
-        p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
-        tb_page_remove(&p->first_tb, tb);
-        invalidate_page_bitmap(p);
-    }
-    if (tb->page_addr[1] != -1 && tb->page_addr[1] != page_addr) {
-        p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
-        tb_page_remove(&p->first_tb, tb);
-        invalidate_page_bitmap(p);
-    }
-
-    tb_invalidated_flag = 1;
-
-    /* remove the TB from the hash list */
-    h = tb_jmp_cache_hash_func(tb->pc);
-    for (env = first_cpu; env != NULL; env = env->next_cpu) {
-        if (env->tb_jmp_cache[h] == tb) {
-            env->tb_jmp_cache[h] = NULL;
-        }
-    }
-
-    /* suppress this TB from the two jump lists */
-    tb_jmp_remove(tb, 0);
-    tb_jmp_remove(tb, 1);
-
-    /* suppress any remaining jumps to this TB */
-    tb1 = tb->jmp_first;
-    for (;;) {
-        n1 = (uintptr_t)tb1 & 3;
-        if (n1 == 2) {
-            break;
-        }
-        tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
-        tb2 = tb1->jmp_next[n1];
-        tb_reset_jump(tb1, n1);
-        tb1->jmp_next[n1] = NULL;
-        tb1 = tb2;
-    }
-    tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2); /* fail safe */
-
-    tb_phys_invalidate_count++;
-}
-
-static inline void set_bits(uint8_t *tab, int start, int len)
-{
-    int end, mask, end1;
-
-    end = start + len;
-    tab += start >> 3;
-    mask = 0xff << (start & 7);
-    if ((start & ~7) == (end & ~7)) {
-        if (start < end) {
-            mask &= ~(0xff << (end & 7));
-            *tab |= mask;
-        }
-    } else {
-        *tab++ |= mask;
-        start = (start + 8) & ~7;
-        end1 = end & ~7;
-        while (start < end1) {
-            *tab++ = 0xff;
-            start += 8;
-        }
-        if (start < end) {
-            mask = ~(0xff << (end & 7));
-            *tab |= mask;
-        }
-    }
-}
-
-static void build_page_bitmap(PageDesc *p)
-{
-    int n, tb_start, tb_end;
-    TranslationBlock *tb;
-
-    p->code_bitmap = g_malloc0(TARGET_PAGE_SIZE / 8);
-
-    tb = p->first_tb;
-    while (tb != NULL) {
-        n = (uintptr_t)tb & 3;
-        tb = (TranslationBlock *)((uintptr_t)tb & ~3);
-        /* NOTE: this is subtle as a TB may span two physical pages */
-        if (n == 0) {
-            /* NOTE: tb_end may be after the end of the page, but
-               it is not a problem */
-            tb_start = tb->pc & ~TARGET_PAGE_MASK;
-            tb_end = tb_start + tb->size;
-            if (tb_end > TARGET_PAGE_SIZE) {
-                tb_end = TARGET_PAGE_SIZE;
-            }
-        } else {
-            tb_start = 0;
-            tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
-        }
-        set_bits(p->code_bitmap, tb_start, tb_end - tb_start);
-        tb = tb->page_next[n];
-    }
-}
-
-TranslationBlock *tb_gen_code(CPUArchState *env,
-                              target_ulong pc, target_ulong cs_base,
-                              int flags, int cflags)
-{
-    TranslationBlock *tb;
-    uint8_t *tc_ptr;
-    tb_page_addr_t phys_pc, phys_page2;
-    target_ulong virt_page2;
-    int code_gen_size;
-
-    phys_pc = get_page_addr_code(env, pc);
-    tb = tb_alloc(pc);
-    if (!tb) {
-        /* flush must be done */
-        tb_flush(env);
-        /* cannot fail at this point */
-        tb = tb_alloc(pc);
-        /* Don't forget to invalidate previous TB info.  */
-        tb_invalidated_flag = 1;
-    }
-    tc_ptr = code_gen_ptr;
-    tb->tc_ptr = tc_ptr;
-    tb->cs_base = cs_base;
-    tb->flags = flags;
-    tb->cflags = cflags;
-    cpu_gen_code(env, tb, &code_gen_size);
-    code_gen_ptr = (void *)(((uintptr_t)code_gen_ptr + code_gen_size +
-                             CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
-
-    /* check next page if needed */
-    virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
-    phys_page2 = -1;
-    if ((pc & TARGET_PAGE_MASK) != virt_page2) {
-        phys_page2 = get_page_addr_code(env, virt_page2);
-    }
-    tb_link_page(tb, phys_pc, phys_page2);
-    return tb;
-}
-
-/*
- * Invalidate all TBs which intersect with the target physical address range
- * [start;end[. NOTE: start and end may refer to *different* physical pages.
- * 'is_cpu_write_access' should be true if called from a real cpu write
- * access: the virtual CPU will exit the current TB if code is modified inside
- * this TB.
- */
-void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end,
-                              int is_cpu_write_access)
-{
-    while (start < end) {
-        tb_invalidate_phys_page_range(start, end, is_cpu_write_access);
-        start &= TARGET_PAGE_MASK;
-        start += TARGET_PAGE_SIZE;
-    }
-}
-
-/*
- * Invalidate all TBs which intersect with the target physical address range
- * [start;end[. NOTE: start and end must refer to the *same* physical page.
- * 'is_cpu_write_access' should be true if called from a real cpu write
- * access: the virtual CPU will exit the current TB if code is modified inside
- * this TB.
- */
-void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
-                                   int is_cpu_write_access)
-{
-    TranslationBlock *tb, *tb_next, *saved_tb;
-    CPUArchState *env = cpu_single_env;
-    tb_page_addr_t tb_start, tb_end;
-    PageDesc *p;
-    int n;
-#ifdef TARGET_HAS_PRECISE_SMC
-    int current_tb_not_found = is_cpu_write_access;
-    TranslationBlock *current_tb = NULL;
-    int current_tb_modified = 0;
-    target_ulong current_pc = 0;
-    target_ulong current_cs_base = 0;
-    int current_flags = 0;
-#endif /* TARGET_HAS_PRECISE_SMC */
-
-    p = page_find(start >> TARGET_PAGE_BITS);
-    if (!p) {
-        return;
-    }
-    if (!p->code_bitmap &&
-        ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD &&
-        is_cpu_write_access) {
-        /* build code bitmap */
-        build_page_bitmap(p);
-    }
-
-    /* we remove all the TBs in the range [start, end[ */
-    /* XXX: see if in some cases it could be faster to invalidate all
-       the code */
-    tb = p->first_tb;
-    while (tb != NULL) {
-        n = (uintptr_t)tb & 3;
-        tb = (TranslationBlock *)((uintptr_t)tb & ~3);
-        tb_next = tb->page_next[n];
-        /* NOTE: this is subtle as a TB may span two physical pages */
-        if (n == 0) {
-            /* NOTE: tb_end may be after the end of the page, but
-               it is not a problem */
-            tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
-            tb_end = tb_start + tb->size;
-        } else {
-            tb_start = tb->page_addr[1];
-            tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
-        }
-        if (!(tb_end <= start || tb_start >= end)) {
-#ifdef TARGET_HAS_PRECISE_SMC
-            if (current_tb_not_found) {
-                current_tb_not_found = 0;
-                current_tb = NULL;
-                if (env->mem_io_pc) {
-                    /* now we have a real cpu fault */
-                    current_tb = tb_find_pc(env->mem_io_pc);
-                }
-            }
-            if (current_tb == tb &&
-                (current_tb->cflags & CF_COUNT_MASK) != 1) {
-                /* If we are modifying the current TB, we must stop
-                its execution. We could be more precise by checking
-                that the modification is after the current PC, but it
-                would require a specialized function to partially
-                restore the CPU state */
-
-                current_tb_modified = 1;
-                cpu_restore_state(current_tb, env, env->mem_io_pc);
-                cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
-                                     &current_flags);
-            }
-#endif /* TARGET_HAS_PRECISE_SMC */
-            /* we need to do that to handle the case where a signal
-               occurs while doing tb_phys_invalidate() */
-            saved_tb = NULL;
-            if (env) {
-                saved_tb = env->current_tb;
-                env->current_tb = NULL;
-            }
-            tb_phys_invalidate(tb, -1);
-            if (env) {
-                env->current_tb = saved_tb;
-                if (env->interrupt_request && env->current_tb) {
-                    cpu_interrupt(env, env->interrupt_request);
-                }
-            }
-        }
-        tb = tb_next;
-    }
-#if !defined(CONFIG_USER_ONLY)
-    /* if no code remaining, no need to continue to use slow writes */
-    if (!p->first_tb) {
-        invalidate_page_bitmap(p);
-        if (is_cpu_write_access) {
-            tlb_unprotect_code_phys(env, start, env->mem_io_vaddr);
-        }
-    }
-#endif
-#ifdef TARGET_HAS_PRECISE_SMC
-    if (current_tb_modified) {
-        /* we generate a block containing just the instruction
-           modifying the memory. It will ensure that it cannot modify
-           itself */
-        env->current_tb = NULL;
-        tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
-        cpu_resume_from_signal(env, NULL);
-    }
-#endif
-}
-
-/* len must be <= 8 and start must be a multiple of len */
-static inline void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
-{
-    PageDesc *p;
-    int offset, b;
-
-#if 0
-    if (1) {
-        qemu_log("modifying code at 0x%x size=%d EIP=%x PC=%08x\n",
-                  cpu_single_env->mem_io_vaddr, len,
-                  cpu_single_env->eip,
-                  cpu_single_env->eip +
-                  (intptr_t)cpu_single_env->segs[R_CS].base);
-    }
-#endif
-    p = page_find(start >> TARGET_PAGE_BITS);
-    if (!p) {
-        return;
-    }
-    if (p->code_bitmap) {
-        offset = start & ~TARGET_PAGE_MASK;
-        b = p->code_bitmap[offset >> 3] >> (offset & 7);
-        if (b & ((1 << len) - 1)) {
-            goto do_invalidate;
-        }
-    } else {
-    do_invalidate:
-        tb_invalidate_phys_page_range(start, start + len, 1);
-    }
-}
-
-#if !defined(CONFIG_SOFTMMU)
-static void tb_invalidate_phys_page(tb_page_addr_t addr,
-                                    uintptr_t pc, void *puc)
-{
-    TranslationBlock *tb;
-    PageDesc *p;
-    int n;
-#ifdef TARGET_HAS_PRECISE_SMC
-    TranslationBlock *current_tb = NULL;
-    CPUArchState *env = cpu_single_env;
-    int current_tb_modified = 0;
-    target_ulong current_pc = 0;
-    target_ulong current_cs_base = 0;
-    int current_flags = 0;
-#endif
-
-    addr &= TARGET_PAGE_MASK;
-    p = page_find(addr >> TARGET_PAGE_BITS);
-    if (!p) {
-        return;
-    }
-    tb = p->first_tb;
-#ifdef TARGET_HAS_PRECISE_SMC
-    if (tb && pc != 0) {
-        current_tb = tb_find_pc(pc);
-    }
-#endif
-    while (tb != NULL) {
-        n = (uintptr_t)tb & 3;
-        tb = (TranslationBlock *)((uintptr_t)tb & ~3);
-#ifdef TARGET_HAS_PRECISE_SMC
-        if (current_tb == tb &&
-            (current_tb->cflags & CF_COUNT_MASK) != 1) {
-                /* If we are modifying the current TB, we must stop
-                   its execution. We could be more precise by checking
-                   that the modification is after the current PC, but it
-                   would require a specialized function to partially
-                   restore the CPU state */
-
-            current_tb_modified = 1;
-            cpu_restore_state(current_tb, env, pc);
-            cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
-                                 &current_flags);
-        }
-#endif /* TARGET_HAS_PRECISE_SMC */
-        tb_phys_invalidate(tb, addr);
-        tb = tb->page_next[n];
-    }
-    p->first_tb = NULL;
-#ifdef TARGET_HAS_PRECISE_SMC
-    if (current_tb_modified) {
-        /* we generate a block containing just the instruction
-           modifying the memory. It will ensure that it cannot modify
-           itself */
-        env->current_tb = NULL;
-        tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
-        cpu_resume_from_signal(env, puc);
-    }
-#endif
-}
-#endif
-
-/* add the tb in the target page and protect it if necessary */
-static inline void tb_alloc_page(TranslationBlock *tb,
-                                 unsigned int n, tb_page_addr_t page_addr)
-{
-    PageDesc *p;
-#ifndef CONFIG_USER_ONLY
-    bool page_already_protected;
-#endif
-
-    tb->page_addr[n] = page_addr;
-    p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1);
-    tb->page_next[n] = p->first_tb;
-#ifndef CONFIG_USER_ONLY
-    page_already_protected = p->first_tb != NULL;
-#endif
-    p->first_tb = (TranslationBlock *)((uintptr_t)tb | n);
-    invalidate_page_bitmap(p);
-
-#if defined(TARGET_HAS_SMC) || 1
-
-#if defined(CONFIG_USER_ONLY)
-    if (p->flags & PAGE_WRITE) {
-        target_ulong addr;
-        PageDesc *p2;
-        int prot;
-
-        /* force the host page as non writable (writes will have a
-           page fault + mprotect overhead) */
-        page_addr &= qemu_host_page_mask;
-        prot = 0;
-        for (addr = page_addr; addr < page_addr + qemu_host_page_size;
-            addr += TARGET_PAGE_SIZE) {
-
-            p2 = page_find(addr >> TARGET_PAGE_BITS);
-            if (!p2) {
-                continue;
-            }
-            prot |= p2->flags;
-            p2->flags &= ~PAGE_WRITE;
-          }
-        mprotect(g2h(page_addr), qemu_host_page_size,
-                 (prot & PAGE_BITS) & ~PAGE_WRITE);
-#ifdef DEBUG_TB_INVALIDATE
-        printf("protecting code page: 0x" TARGET_FMT_lx "\n",
-               page_addr);
-#endif
-    }
-#else
-    /* if some code is already present, then the pages are already
-       protected. So we handle the case where only the first TB is
-       allocated in a physical page */
-    if (!page_already_protected) {
-        tlb_protect_code(page_addr);
-    }
-#endif
-
-#endif /* TARGET_HAS_SMC */
-}
-
-/* add a new TB and link it to the physical page tables. phys_page2 is
-   (-1) to indicate that only one page contains the TB. */
-static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
-                         tb_page_addr_t phys_page2)
-{
-    unsigned int h;
-    TranslationBlock **ptb;
-
-    /* Grab the mmap lock to stop another thread invalidating this TB
-       before we are done.  */
-    mmap_lock();
-    /* add in the physical hash table */
-    h = tb_phys_hash_func(phys_pc);
-    ptb = &tb_phys_hash[h];
-    tb->phys_hash_next = *ptb;
-    *ptb = tb;
-
-    /* add in the page list */
-    tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK);
-    if (phys_page2 != -1) {
-        tb_alloc_page(tb, 1, phys_page2);
-    } else {
-        tb->page_addr[1] = -1;
-    }
-
-    tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2);
-    tb->jmp_next[0] = NULL;
-    tb->jmp_next[1] = NULL;
-
-    /* init original jump addresses */
-    if (tb->tb_next_offset[0] != 0xffff) {
-        tb_reset_jump(tb, 0);
-    }
-    if (tb->tb_next_offset[1] != 0xffff) {
-        tb_reset_jump(tb, 1);
-    }
-
-#ifdef DEBUG_TB_CHECK
-    tb_page_check();
-#endif
-    mmap_unlock();
+        && mr != &io_mem_watch;
 }
+#endif
 
-#if defined(CONFIG_QEMU_LDST_OPTIMIZATION) && defined(CONFIG_SOFTMMU)
-/* check whether the given addr is in TCG generated code buffer or not */
-bool is_tcg_gen_code(uintptr_t tc_ptr)
+void cpu_exec_init_all(void)
 {
-    /* This can be called during code generation, code_gen_buffer_max_size
-       is used instead of code_gen_ptr for upper boundary checking */
-    return (tc_ptr >= (uintptr_t)code_gen_buffer &&
-            tc_ptr < (uintptr_t)(code_gen_buffer + code_gen_buffer_max_size));
-}
+#if !defined(CONFIG_USER_ONLY)
+    memory_map_init();
+    io_mem_init();
 #endif
+}
 
-/* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
-   tb[1].tc_ptr. Return NULL if not found */
-TranslationBlock *tb_find_pc(uintptr_t tc_ptr)
+#if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
+
+static int cpu_common_post_load(void *opaque, int version_id)
 {
-    int m_min, m_max, m;
-    uintptr_t v;
-    TranslationBlock *tb;
+    CPUArchState *env = opaque;
 
-    if (nb_tbs <= 0) {
-        return NULL;
-    }
-    if (tc_ptr < (uintptr_t)code_gen_buffer ||
-        tc_ptr >= (uintptr_t)code_gen_ptr) {
-        return NULL;
-    }
-    /* binary search (cf Knuth) */
-    m_min = 0;
-    m_max = nb_tbs - 1;
-    while (m_min <= m_max) {
-        m = (m_min + m_max) >> 1;
-        tb = &tbs[m];
-        v = (uintptr_t)tb->tc_ptr;
-        if (v == tc_ptr) {
-            return tb;
-        } else if (tc_ptr < v) {
-            m_max = m - 1;
-        } else {
-            m_min = m + 1;
-        }
-    }
-    return &tbs[m_max];
+    /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
+       version_id is increased. */
+    env->interrupt_request &= ~0x01;
+    tlb_flush(env, 1);
+
+    return 0;
 }
 
-static void tb_reset_jump_recursive(TranslationBlock *tb);
+static const VMStateDescription vmstate_cpu_common = {
+    .name = "cpu_common",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .minimum_version_id_old = 1,
+    .post_load = cpu_common_post_load,
+    .fields      = (VMStateField []) {
+        VMSTATE_UINT32(halted, CPUArchState),
+        VMSTATE_UINT32(interrupt_request, CPUArchState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+#endif
 
-static inline void tb_reset_jump_recursive2(TranslationBlock *tb, int n)
+CPUArchState *qemu_get_cpu(int cpu)
 {
-    TranslationBlock *tb1, *tb_next, **ptb;
-    unsigned int n1;
-
-    tb1 = tb->jmp_next[n];
-    if (tb1 != NULL) {
-        /* find head of list */
-        for (;;) {
-            n1 = (uintptr_t)tb1 & 3;
-            tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
-            if (n1 == 2) {
-                break;
-            }
-            tb1 = tb1->jmp_next[n1];
-        }
-        /* we are now sure now that tb jumps to tb1 */
-        tb_next = tb1;
-
-        /* remove tb from the jmp_first list */
-        ptb = &tb_next->jmp_first;
-        for (;;) {
-            tb1 = *ptb;
-            n1 = (uintptr_t)tb1 & 3;
-            tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
-            if (n1 == n && tb1 == tb) {
-                break;
-            }
-            ptb = &tb1->jmp_next[n1];
-        }
-        *ptb = tb->jmp_next[n];
-        tb->jmp_next[n] = NULL;
-
-        /* suppress the jump to next tb in generated code */
-        tb_reset_jump(tb, n);
+    CPUArchState *env = first_cpu;
 
-        /* suppress jumps in the tb on which we could have jumped */
-        tb_reset_jump_recursive(tb_next);
+    while (env) {
+        if (env->cpu_index == cpu)
+            break;
+        env = env->next_cpu;
     }
+
+    return env;
 }
 
-static void tb_reset_jump_recursive(TranslationBlock *tb)
+void cpu_exec_init(CPUArchState *env)
 {
-    tb_reset_jump_recursive2(tb, 0);
-    tb_reset_jump_recursive2(tb, 1);
+#ifndef CONFIG_USER_ONLY
+    CPUState *cpu = ENV_GET_CPU(env);
+#endif
+    CPUArchState **penv;
+    int cpu_index;
+
+#if defined(CONFIG_USER_ONLY)
+    cpu_list_lock();
+#endif
+    env->next_cpu = NULL;
+    penv = &first_cpu;
+    cpu_index = 0;
+    while (*penv != NULL) {
+        penv = &(*penv)->next_cpu;
+        cpu_index++;
+    }
+    env->cpu_index = cpu_index;
+    env->numa_node = 0;
+    QTAILQ_INIT(&env->breakpoints);
+    QTAILQ_INIT(&env->watchpoints);
+#ifndef CONFIG_USER_ONLY
+    cpu->thread_id = qemu_get_thread_id();
+#endif
+    *penv = env;
+#if defined(CONFIG_USER_ONLY)
+    cpu_list_unlock();
+#endif
+#if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
+    vmstate_register(NULL, cpu_index, &vmstate_cpu_common, env);
+    register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
+                    cpu_save, cpu_load, env);
+#endif
 }
 
 #if defined(TARGET_HAS_ICE)
@@ -1523,22 +300,6 @@ static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
     tb_invalidate_phys_page_range(pc, pc + 1, 0);
 }
 #else
-void tb_invalidate_phys_addr(hwaddr addr)
-{
-    ram_addr_t ram_addr;
-    MemoryRegionSection *section;
-
-    section = phys_page_find(address_space_memory.dispatch,
-                             addr >> TARGET_PAGE_BITS);
-    if (!(memory_region_is_ram(section->mr)
-          || (section->mr->rom_device && section->mr->readable))) {
-        return;
-    }
-    ram_addr = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
-        + memory_region_section_addr(section, addr);
-    tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0);
-}
-
 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
 {
     tb_invalidate_phys_addr(cpu_get_phys_page_debug(env, pc) |
@@ -1720,67 +481,6 @@ void cpu_single_step(CPUArchState *env, int enabled)
 #endif
 }
 
-static void cpu_unlink_tb(CPUArchState *env)
-{
-    /* FIXME: TB unchaining isn't SMP safe.  For now just ignore the
-       problem and hope the cpu will stop of its own accord.  For userspace
-       emulation this often isn't actually as bad as it sounds.  Often
-       signals are used primarily to interrupt blocking syscalls.  */
-    TranslationBlock *tb;
-    static spinlock_t interrupt_lock = SPIN_LOCK_UNLOCKED;
-
-    spin_lock(&interrupt_lock);
-    tb = env->current_tb;
-    /* if the cpu is currently executing code, we must unlink it and
-       all the potentially executing TB */
-    if (tb) {
-        env->current_tb = NULL;
-        tb_reset_jump_recursive(tb);
-    }
-    spin_unlock(&interrupt_lock);
-}
-
-#ifndef CONFIG_USER_ONLY
-/* mask must never be zero, except for A20 change call */
-static void tcg_handle_interrupt(CPUArchState *env, int mask)
-{
-    CPUState *cpu = ENV_GET_CPU(env);
-    int old_mask;
-
-    old_mask = env->interrupt_request;
-    env->interrupt_request |= mask;
-
-    /*
-     * If called from iothread context, wake the target cpu in
-     * case its halted.
-     */
-    if (!qemu_cpu_is_self(cpu)) {
-        qemu_cpu_kick(cpu);
-        return;
-    }
-
-    if (use_icount) {
-        env->icount_decr.u16.high = 0xffff;
-        if (!can_do_io(env)
-            && (mask & ~old_mask) != 0) {
-            cpu_abort(env, "Raised interrupt while not in I/O function");
-        }
-    } else {
-        cpu_unlink_tb(env);
-    }
-}
-
-CPUInterruptHandler cpu_interrupt_handler = tcg_handle_interrupt;
-
-#else /* CONFIG_USER_ONLY */
-
-void cpu_interrupt(CPUArchState *env, int mask)
-{
-    env->interrupt_request |= mask;
-    cpu_unlink_tb(env);
-}
-#endif /* CONFIG_USER_ONLY */
-
 void cpu_reset_interrupt(CPUArchState *env, int mask)
 {
     env->interrupt_request &= ~mask;
@@ -1859,21 +559,6 @@ CPUArchState *cpu_copy(CPUArchState *env)
 }
 
 #if !defined(CONFIG_USER_ONLY)
-void tb_flush_jmp_cache(CPUArchState *env, target_ulong addr)
-{
-    unsigned int i;
-
-    /* Discard jump cache entries for any tb which might potentially
-       overlap the flushed page.  */
-    i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE);
-    memset (&env->tb_jmp_cache[i], 0, 
-            TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
-
-    i = tb_jmp_cache_hash_page(addr);
-    memset (&env->tb_jmp_cache[i], 0, 
-            TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
-}
-
 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t end,
                                       uintptr_t length)
 {
@@ -1963,272 +648,6 @@ hwaddr memory_region_section_get_iotlb(CPUArchState *env,
 
     return iotlb;
 }
-
-#else
-/*
- * Walks guest process memory "regions" one by one
- * and calls callback function 'fn' for each region.
- */
-struct walk_memory_regions_data {
-    walk_memory_regions_fn fn;
-    void *priv;
-    uintptr_t start;
-    int prot;
-};
-
-static int walk_memory_regions_end(struct walk_memory_regions_data *data,
-                                   abi_ulong end, int new_prot)
-{
-    if (data->start != -1ul) {
-        int rc = data->fn(data->priv, data->start, end, data->prot);
-        if (rc != 0) {
-            return rc;
-        }
-    }
-
-    data->start = (new_prot ? end : -1ul);
-    data->prot = new_prot;
-
-    return 0;
-}
-
-static int walk_memory_regions_1(struct walk_memory_regions_data *data,
-                                 abi_ulong base, int level, void **lp)
-{
-    abi_ulong pa;
-    int i, rc;
-
-    if (*lp == NULL) {
-        return walk_memory_regions_end(data, base, 0);
-    }
-
-    if (level == 0) {
-        PageDesc *pd = *lp;
-
-        for (i = 0; i < L2_SIZE; ++i) {
-            int prot = pd[i].flags;
-
-            pa = base | (i << TARGET_PAGE_BITS);
-            if (prot != data->prot) {
-                rc = walk_memory_regions_end(data, pa, prot);
-                if (rc != 0) {
-                    return rc;
-                }
-            }
-        }
-    } else {
-        void **pp = *lp;
-
-        for (i = 0; i < L2_SIZE; ++i) {
-            pa = base | ((abi_ulong)i <<
-                (TARGET_PAGE_BITS + L2_BITS * level));
-            rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
-            if (rc != 0) {
-                return rc;
-            }
-        }
-    }
-
-    return 0;
-}
-
-int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
-{
-    struct walk_memory_regions_data data;
-    uintptr_t i;
-
-    data.fn = fn;
-    data.priv = priv;
-    data.start = -1ul;
-    data.prot = 0;
-
-    for (i = 0; i < V_L1_SIZE; i++) {
-        int rc = walk_memory_regions_1(&data, (abi_ulong)i << V_L1_SHIFT,
-                                       V_L1_SHIFT / L2_BITS - 1, l1_map + i);
-
-        if (rc != 0) {
-            return rc;
-        }
-    }
-
-    return walk_memory_regions_end(&data, 0, 0);
-}
-
-static int dump_region(void *priv, abi_ulong start,
-    abi_ulong end, unsigned long prot)
-{
-    FILE *f = (FILE *)priv;
-
-    (void) fprintf(f, TARGET_ABI_FMT_lx"-"TARGET_ABI_FMT_lx
-        " "TARGET_ABI_FMT_lx" %c%c%c\n",
-        start, end, end - start,
-        ((prot & PAGE_READ) ? 'r' : '-'),
-        ((prot & PAGE_WRITE) ? 'w' : '-'),
-        ((prot & PAGE_EXEC) ? 'x' : '-'));
-
-    return 0;
-}
-
-/* dump memory mappings */
-void page_dump(FILE *f)
-{
-    (void) fprintf(f, "%-8s %-8s %-8s %s\n",
-            "start", "end", "size", "prot");
-    walk_memory_regions(f, dump_region);
-}
-
-int page_get_flags(target_ulong address)
-{
-    PageDesc *p;
-
-    p = page_find(address >> TARGET_PAGE_BITS);
-    if (!p) {
-        return 0;
-    }
-    return p->flags;
-}
-
-/* Modify the flags of a page and invalidate the code if necessary.
-   The flag PAGE_WRITE_ORG is positioned automatically depending
-   on PAGE_WRITE.  The mmap_lock should already be held.  */
-void page_set_flags(target_ulong start, target_ulong end, int flags)
-{
-    target_ulong addr, len;
-
-    /* This function should never be called with addresses outside the
-       guest address space.  If this assert fires, it probably indicates
-       a missing call to h2g_valid.  */
-#if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
-    assert(end < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
-#endif
-    assert(start < end);
-
-    start = start & TARGET_PAGE_MASK;
-    end = TARGET_PAGE_ALIGN(end);
-
-    if (flags & PAGE_WRITE) {
-        flags |= PAGE_WRITE_ORG;
-    }
-
-    for (addr = start, len = end - start;
-         len != 0;
-         len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
-        PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
-
-        /* If the write protection bit is set, then we invalidate
-           the code inside.  */
-        if (!(p->flags & PAGE_WRITE) &&
-            (flags & PAGE_WRITE) &&
-            p->first_tb) {
-            tb_invalidate_phys_page(addr, 0, NULL);
-        }
-        p->flags = flags;
-    }
-}
-
-int page_check_range(target_ulong start, target_ulong len, int flags)
-{
-    PageDesc *p;
-    target_ulong end;
-    target_ulong addr;
-
-    /* This function should never be called with addresses outside the
-       guest address space.  If this assert fires, it probably indicates
-       a missing call to h2g_valid.  */
-#if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
-    assert(start < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
-#endif
-
-    if (len == 0) {
-        return 0;
-    }
-    if (start + len - 1 < start) {
-        /* We've wrapped around.  */
-        return -1;
-    }
-
-    /* must do before we loose bits in the next step */
-    end = TARGET_PAGE_ALIGN(start + len);
-    start = start & TARGET_PAGE_MASK;
-
-    for (addr = start, len = end - start;
-         len != 0;
-         len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
-        p = page_find(addr >> TARGET_PAGE_BITS);
-        if (!p) {
-            return -1;
-        }
-        if (!(p->flags & PAGE_VALID)) {
-            return -1;
-        }
-
-        if ((flags & PAGE_READ) && !(p->flags & PAGE_READ)) {
-            return -1;
-        }
-        if (flags & PAGE_WRITE) {
-            if (!(p->flags & PAGE_WRITE_ORG)) {
-                return -1;
-            }
-            /* unprotect the page if it was put read-only because it
-               contains translated code */
-            if (!(p->flags & PAGE_WRITE)) {
-                if (!page_unprotect(addr, 0, NULL)) {
-                    return -1;
-                }
-            }
-            return 0;
-        }
-    }
-    return 0;
-}
-
-/* called from signal handler: invalidate the code and unprotect the
-   page. Return TRUE if the fault was successfully handled. */
-int page_unprotect(target_ulong address, uintptr_t pc, void *puc)
-{
-    unsigned int prot;
-    PageDesc *p;
-    target_ulong host_start, host_end, addr;
-
-    /* Technically this isn't safe inside a signal handler.  However we
-       know this only ever happens in a synchronous SEGV handler, so in
-       practice it seems to be ok.  */
-    mmap_lock();
-
-    p = page_find(address >> TARGET_PAGE_BITS);
-    if (!p) {
-        mmap_unlock();
-        return 0;
-    }
-
-    /* if the page was really writable, then we change its
-       protection back to writable */
-    if ((p->flags & PAGE_WRITE_ORG) && !(p->flags & PAGE_WRITE)) {
-        host_start = address & qemu_host_page_mask;
-        host_end = host_start + qemu_host_page_size;
-
-        prot = 0;
-        for (addr = host_start ; addr < host_end ; addr += TARGET_PAGE_SIZE) {
-            p = page_find(addr >> TARGET_PAGE_BITS);
-            p->flags |= PAGE_WRITE;
-            prot |= p->flags;
-
-            /* and since the content will be modified, we must invalidate
-               the corresponding translated code. */
-            tb_invalidate_phys_page(addr, pc, puc);
-#ifdef DEBUG_TB_CHECK
-            tb_invalidate_check(addr);
-#endif
-        }
-        mprotect((void *)g2h(host_start), qemu_host_page_size,
-                 prot & PAGE_BITS);
-
-        mmap_unlock();
-        return 1;
-    }
-    mmap_unlock();
-    return 0;
-}
 #endif /* defined(CONFIG_USER_ONLY) */
 
 #if !defined(CONFIG_USER_ONLY)
@@ -2987,19 +1406,6 @@ static const MemoryRegionOps notdirty_mem_ops = {
     .endianness = DEVICE_NATIVE_ENDIAN,
 };
 
-static void tb_check_watchpoint(CPUArchState *env)
-{
-    TranslationBlock *tb;
-
-    tb = tb_find_pc(env->mem_io_pc);
-    if (!tb) {
-        cpu_abort(env, "check_watchpoint: could not find TB for pc=%p",
-                  (void *)env->mem_io_pc);
-    }
-    cpu_restore_state(tb, env, env->mem_io_pc);
-    tb_phys_invalidate(tb, -1);
-}
-
 /* Generate a debug exception if a watchpoint has been hit.  */
 static void check_watchpoint(int offset, int len_mask, int flags)
 {
@@ -4134,123 +2540,8 @@ int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
 }
 #endif
 
-/* in deterministic execution mode, instructions doing device I/Os
-   must be at the end of the TB */
-void cpu_io_recompile(CPUArchState *env, uintptr_t retaddr)
-{
-    TranslationBlock *tb;
-    uint32_t n, cflags;
-    target_ulong pc, cs_base;
-    uint64_t flags;
-
-    tb = tb_find_pc(retaddr);
-    if (!tb) {
-        cpu_abort(env, "cpu_io_recompile: could not find TB for pc=%p",
-                  (void *)retaddr);
-    }
-    n = env->icount_decr.u16.low + tb->icount;
-    cpu_restore_state(tb, env, retaddr);
-    /* Calculate how many instructions had been executed before the fault
-       occurred.  */
-    n = n - env->icount_decr.u16.low;
-    /* Generate a new TB ending on the I/O insn.  */
-    n++;
-    /* On MIPS and SH, delay slot instructions can only be restarted if
-       they were already the first instruction in the TB.  If this is not
-       the first instruction in a TB then re-execute the preceding
-       branch.  */
-#if defined(TARGET_MIPS)
-    if ((env->hflags & MIPS_HFLAG_BMASK) != 0 && n > 1) {
-        env->active_tc.PC -= 4;
-        env->icount_decr.u16.low++;
-        env->hflags &= ~MIPS_HFLAG_BMASK;
-    }
-#elif defined(TARGET_SH4)
-    if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
-            && n > 1) {
-        env->pc -= 2;
-        env->icount_decr.u16.low++;
-        env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
-    }
-#endif
-    /* This should never happen.  */
-    if (n > CF_COUNT_MASK) {
-        cpu_abort(env, "TB too big during recompile");
-    }
-
-    cflags = n | CF_LAST_IO;
-    pc = tb->pc;
-    cs_base = tb->cs_base;
-    flags = tb->flags;
-    tb_phys_invalidate(tb, -1);
-    /* FIXME: In theory this could raise an exception.  In practice
-       we have already translated the block once so it's probably ok.  */
-    tb_gen_code(env, pc, cs_base, flags, cflags);
-    /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
-       the first in the TB) then we end up generating a whole new TB and
-       repeating the fault, which is horribly inefficient.
-       Better would be to execute just this insn uncached, or generate a
-       second new TB.  */
-    cpu_resume_from_signal(env, NULL);
-}
-
 #if !defined(CONFIG_USER_ONLY)
 
-void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
-{
-    int i, target_code_size, max_target_code_size;
-    int direct_jmp_count, direct_jmp2_count, cross_page;
-    TranslationBlock *tb;
-
-    target_code_size = 0;
-    max_target_code_size = 0;
-    cross_page = 0;
-    direct_jmp_count = 0;
-    direct_jmp2_count = 0;
-    for (i = 0; i < nb_tbs; i++) {
-        tb = &tbs[i];
-        target_code_size += tb->size;
-        if (tb->size > max_target_code_size) {
-            max_target_code_size = tb->size;
-        }
-        if (tb->page_addr[1] != -1) {
-            cross_page++;
-        }
-        if (tb->tb_next_offset[0] != 0xffff) {
-            direct_jmp_count++;
-            if (tb->tb_next_offset[1] != 0xffff) {
-                direct_jmp2_count++;
-            }
-        }
-    }
-    /* XXX: avoid using doubles ? */
-    cpu_fprintf(f, "Translation buffer state:\n");
-    cpu_fprintf(f, "gen code size       %td/%zd\n",
-                code_gen_ptr - code_gen_buffer, code_gen_buffer_max_size);
-    cpu_fprintf(f, "TB count            %d/%d\n",
-                nb_tbs, code_gen_max_blocks);
-    cpu_fprintf(f, "TB avg target size  %d max=%d bytes\n",
-                nb_tbs ? target_code_size / nb_tbs : 0,
-                max_target_code_size);
-    cpu_fprintf(f, "TB avg host size    %td bytes (expansion ratio: %0.1f)\n",
-                nb_tbs ? (code_gen_ptr - code_gen_buffer) / nb_tbs : 0,
-                target_code_size ? (double) (code_gen_ptr - code_gen_buffer)
-                / target_code_size : 0);
-    cpu_fprintf(f, "cross page TB count %d (%d%%)\n",
-            cross_page,
-            nb_tbs ? (cross_page * 100) / nb_tbs : 0);
-    cpu_fprintf(f, "direct jump count   %d (%d%%) (2 jumps=%d %d%%)\n",
-                direct_jmp_count,
-                nb_tbs ? (direct_jmp_count * 100) / nb_tbs : 0,
-                direct_jmp2_count,
-                nb_tbs ? (direct_jmp2_count * 100) / nb_tbs : 0);
-    cpu_fprintf(f, "\nStatistics:\n");
-    cpu_fprintf(f, "TB flush count      %d\n", tb_flush_count);
-    cpu_fprintf(f, "TB invalidate count %d\n", tb_phys_invalidate_count);
-    cpu_fprintf(f, "TLB flush count     %d\n", tlb_flush_count);
-    tcg_dump_info(f, cpu_fprintf);
-}
-
 /*
  * A helper function for the _utterly broken_ virtio device model to find out if
  * it's running on a big endian machine. Don't do this at home kids!
diff --git a/translate-all.c b/translate-all.c
index f22e3ee..b958342 100644
--- a/translate-all.c
+++ b/translate-all.c
@@ -16,6 +16,12 @@
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  */
+#ifdef _WIN32
+#include <windows.h>
+#else
+#include <sys/types.h>
+#include <sys/mman.h>
+#endif
 #include <stdarg.h>
 #include <stdlib.h>
 #include <stdio.h>
@@ -24,15 +30,119 @@
 
 #include "config.h"
 
+#include "qemu-common.h"
 #define NO_CPU_IO_DEFS
 #include "cpu.h"
 #include "disas.h"
 #include "tcg.h"
 #include "qemu-timer.h"
+#include "memory.h"
+#include "exec-memory.h"
+#if defined(CONFIG_USER_ONLY)
+#include "qemu.h"
+#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
+#include <sys/param.h>
+#if __FreeBSD_version >= 700104
+#define HAVE_KINFO_GETVMMAP
+#define sigqueue sigqueue_freebsd  /* avoid redefinition */
+#include <sys/time.h>
+#include <sys/proc.h>
+#include <machine/profile.h>
+#define _KERNEL
+#include <sys/user.h>
+#undef _KERNEL
+#undef sigqueue
+#include <libutil.h>
+#endif
+#endif
+#endif
+
+#include "cputlb.h"
+#include "translate-all.h"
+
+//#define DEBUG_TB_INVALIDATE
+//#define DEBUG_FLUSH
+/* make various TB consistency checks */
+//#define DEBUG_TB_CHECK
+
+#if !defined(CONFIG_USER_ONLY)
+/* TB consistency checks only implemented for usermode emulation.  */
+#undef DEBUG_TB_CHECK
+#endif
+
+#define SMC_BITMAP_USE_THRESHOLD 10
+
+/* Code generation and translation blocks */
+static TranslationBlock *tbs;
+static int code_gen_max_blocks;
+TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
+static int nb_tbs;
+/* any access to the tbs or the page table must use this lock */
+spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
+
+uint8_t *code_gen_prologue;
+static uint8_t *code_gen_buffer;
+static size_t code_gen_buffer_size;
+/* threshold to flush the translated code buffer */
+static size_t code_gen_buffer_max_size;
+static uint8_t *code_gen_ptr;
+
+typedef struct PageDesc {
+    /* list of TBs intersecting this ram page */
+    TranslationBlock *first_tb;
+    /* in order to optimize self modifying code, we count the number
+       of lookups we do to a given page to use a bitmap */
+    unsigned int code_write_count;
+    uint8_t *code_bitmap;
+#if defined(CONFIG_USER_ONLY)
+    unsigned long flags;
+#endif
+} PageDesc;
+
+/* In system mode we want L1_MAP to be based on ram offsets,
+   while in user mode we want it to be based on virtual addresses.  */
+#if !defined(CONFIG_USER_ONLY)
+#if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
+# define L1_MAP_ADDR_SPACE_BITS  HOST_LONG_BITS
+#else
+# define L1_MAP_ADDR_SPACE_BITS  TARGET_PHYS_ADDR_SPACE_BITS
+#endif
+#else
+# define L1_MAP_ADDR_SPACE_BITS  TARGET_VIRT_ADDR_SPACE_BITS
+#endif
+
+/* The bits remaining after N lower levels of page tables.  */
+#define V_L1_BITS_REM \
+    ((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
+
+#if V_L1_BITS_REM < 4
+#define V_L1_BITS  (V_L1_BITS_REM + L2_BITS)
+#else
+#define V_L1_BITS  V_L1_BITS_REM
+#endif
+
+#define V_L1_SIZE  ((target_ulong)1 << V_L1_BITS)
+
+#define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS)
+
+uintptr_t qemu_real_host_page_size;
+uintptr_t qemu_host_page_size;
+uintptr_t qemu_host_page_mask;
+
+/* This is a multi-level map on the virtual address space.
+   The bottom level has pointers to PageDesc.  */
+static void *l1_map[V_L1_SIZE];
+
+/* statistics */
+static int tb_flush_count;
+static int tb_phys_invalidate_count;
 
 /* code generation context */
 TCGContext tcg_ctx;
 
+static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
+                         tb_page_addr_t phys_page2);
+
 void cpu_gen_init(void)
 {
     tcg_context_init(&tcg_ctx); 
@@ -155,3 +265,1612 @@ int cpu_restore_state(TranslationBlock *tb,
 #endif
     return 0;
 }
+
+#ifdef _WIN32
+static inline void map_exec(void *addr, long size)
+{
+    DWORD old_protect;
+    VirtualProtect(addr, size,
+                   PAGE_EXECUTE_READWRITE, &old_protect);
+}
+#else
+static inline void map_exec(void *addr, long size)
+{
+    unsigned long start, end, page_size;
+
+    page_size = getpagesize();
+    start = (unsigned long)addr;
+    start &= ~(page_size - 1);
+
+    end = (unsigned long)addr + size;
+    end += page_size - 1;
+    end &= ~(page_size - 1);
+
+    mprotect((void *)start, end - start,
+             PROT_READ | PROT_WRITE | PROT_EXEC);
+}
+#endif
+
+static void page_init(void)
+{
+    /* NOTE: we can always suppose that qemu_host_page_size >=
+       TARGET_PAGE_SIZE */
+#ifdef _WIN32
+    {
+        SYSTEM_INFO system_info;
+
+        GetSystemInfo(&system_info);
+        qemu_real_host_page_size = system_info.dwPageSize;
+    }
+#else
+    qemu_real_host_page_size = getpagesize();
+#endif
+    if (qemu_host_page_size == 0) {
+        qemu_host_page_size = qemu_real_host_page_size;
+    }
+    if (qemu_host_page_size < TARGET_PAGE_SIZE) {
+        qemu_host_page_size = TARGET_PAGE_SIZE;
+    }
+    qemu_host_page_mask = ~(qemu_host_page_size - 1);
+
+#if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
+    {
+#ifdef HAVE_KINFO_GETVMMAP
+        struct kinfo_vmentry *freep;
+        int i, cnt;
+
+        freep = kinfo_getvmmap(getpid(), &cnt);
+        if (freep) {
+            mmap_lock();
+            for (i = 0; i < cnt; i++) {
+                unsigned long startaddr, endaddr;
+
+                startaddr = freep[i].kve_start;
+                endaddr = freep[i].kve_end;
+                if (h2g_valid(startaddr)) {
+                    startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
+
+                    if (h2g_valid(endaddr)) {
+                        endaddr = h2g(endaddr);
+                        page_set_flags(startaddr, endaddr, PAGE_RESERVED);
+                    } else {
+#if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
+                        endaddr = ~0ul;
+                        page_set_flags(startaddr, endaddr, PAGE_RESERVED);
+#endif
+                    }
+                }
+            }
+            free(freep);
+            mmap_unlock();
+        }
+#else
+        FILE *f;
+
+        last_brk = (unsigned long)sbrk(0);
+
+        f = fopen("/compat/linux/proc/self/maps", "r");
+        if (f) {
+            mmap_lock();
+
+            do {
+                unsigned long startaddr, endaddr;
+                int n;
+
+                n = fscanf(f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
+
+                if (n == 2 && h2g_valid(startaddr)) {
+                    startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
+
+                    if (h2g_valid(endaddr)) {
+                        endaddr = h2g(endaddr);
+                    } else {
+                        endaddr = ~0ul;
+                    }
+                    page_set_flags(startaddr, endaddr, PAGE_RESERVED);
+                }
+            } while (!feof(f));
+
+            fclose(f);
+            mmap_unlock();
+        }
+#endif
+    }
+#endif
+}
+
+static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
+{
+    PageDesc *pd;
+    void **lp;
+    int i;
+
+#if defined(CONFIG_USER_ONLY)
+    /* We can't use g_malloc because it may recurse into a locked mutex. */
+# define ALLOC(P, SIZE)                                 \
+    do {                                                \
+        P = mmap(NULL, SIZE, PROT_READ | PROT_WRITE,    \
+                 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);   \
+    } while (0)
+#else
+# define ALLOC(P, SIZE) \
+    do { P = g_malloc0(SIZE); } while (0)
+#endif
+
+    /* Level 1.  Always allocated.  */
+    lp = l1_map + ((index >> V_L1_SHIFT) & (V_L1_SIZE - 1));
+
+    /* Level 2..N-1.  */
+    for (i = V_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
+        void **p = *lp;
+
+        if (p == NULL) {
+            if (!alloc) {
+                return NULL;
+            }
+            ALLOC(p, sizeof(void *) * L2_SIZE);
+            *lp = p;
+        }
+
+        lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
+    }
+
+    pd = *lp;
+    if (pd == NULL) {
+        if (!alloc) {
+            return NULL;
+        }
+        ALLOC(pd, sizeof(PageDesc) * L2_SIZE);
+        *lp = pd;
+    }
+
+#undef ALLOC
+
+    return pd + (index & (L2_SIZE - 1));
+}
+
+static inline PageDesc *page_find(tb_page_addr_t index)
+{
+    return page_find_alloc(index, 0);
+}
+
+#if !defined(CONFIG_USER_ONLY)
+#define mmap_lock() do { } while (0)
+#define mmap_unlock() do { } while (0)
+#endif
+
+#if defined(CONFIG_USER_ONLY)
+/* Currently it is not recommended to allocate big chunks of data in
+   user mode. It will change when a dedicated libc will be used.  */
+/* ??? 64-bit hosts ought to have no problem mmaping data outside the
+   region in which the guest needs to run.  Revisit this.  */
+#define USE_STATIC_CODE_GEN_BUFFER
+#endif
+
+/* ??? Should configure for this, not list operating systems here.  */
+#if (defined(__linux__) \
+    || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
+    || defined(__DragonFly__) || defined(__OpenBSD__) \
+    || defined(__NetBSD__))
+# define USE_MMAP
+#endif
+
+/* Minimum size of the code gen buffer.  This number is randomly chosen,
+   but not so small that we can't have a fair number of TB's live.  */
+#define MIN_CODE_GEN_BUFFER_SIZE     (1024u * 1024)
+
+/* Maximum size of the code gen buffer we'd like to use.  Unless otherwise
+   indicated, this is constrained by the range of direct branches on the
+   host cpu, as used by the TCG implementation of goto_tb.  */
+#if defined(__x86_64__)
+# define MAX_CODE_GEN_BUFFER_SIZE  (2ul * 1024 * 1024 * 1024)
+#elif defined(__sparc__)
+# define MAX_CODE_GEN_BUFFER_SIZE  (2ul * 1024 * 1024 * 1024)
+#elif defined(__arm__)
+# define MAX_CODE_GEN_BUFFER_SIZE  (16u * 1024 * 1024)
+#elif defined(__s390x__)
+  /* We have a +- 4GB range on the branches; leave some slop.  */
+# define MAX_CODE_GEN_BUFFER_SIZE  (3ul * 1024 * 1024 * 1024)
+#else
+# define MAX_CODE_GEN_BUFFER_SIZE  ((size_t)-1)
+#endif
+
+#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (32u * 1024 * 1024)
+
+#define DEFAULT_CODE_GEN_BUFFER_SIZE \
+  (DEFAULT_CODE_GEN_BUFFER_SIZE_1 < MAX_CODE_GEN_BUFFER_SIZE \
+   ? DEFAULT_CODE_GEN_BUFFER_SIZE_1 : MAX_CODE_GEN_BUFFER_SIZE)
+
+static inline size_t size_code_gen_buffer(size_t tb_size)
+{
+    /* Size the buffer.  */
+    if (tb_size == 0) {
+#ifdef USE_STATIC_CODE_GEN_BUFFER
+        tb_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
+#else
+        /* ??? Needs adjustments.  */
+        /* ??? If we relax the requirement that CONFIG_USER_ONLY use the
+           static buffer, we could size this on RESERVED_VA, on the text
+           segment size of the executable, or continue to use the default.  */
+        tb_size = (unsigned long)(ram_size / 4);
+#endif
+    }
+    if (tb_size < MIN_CODE_GEN_BUFFER_SIZE) {
+        tb_size = MIN_CODE_GEN_BUFFER_SIZE;
+    }
+    if (tb_size > MAX_CODE_GEN_BUFFER_SIZE) {
+        tb_size = MAX_CODE_GEN_BUFFER_SIZE;
+    }
+    code_gen_buffer_size = tb_size;
+    return tb_size;
+}
+
+#ifdef USE_STATIC_CODE_GEN_BUFFER
+static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
+    __attribute__((aligned(CODE_GEN_ALIGN)));
+
+static inline void *alloc_code_gen_buffer(void)
+{
+    map_exec(static_code_gen_buffer, code_gen_buffer_size);
+    return static_code_gen_buffer;
+}
+#elif defined(USE_MMAP)
+static inline void *alloc_code_gen_buffer(void)
+{
+    int flags = MAP_PRIVATE | MAP_ANONYMOUS;
+    uintptr_t start = 0;
+    void *buf;
+
+    /* Constrain the position of the buffer based on the host cpu.
+       Note that these addresses are chosen in concert with the
+       addresses assigned in the relevant linker script file.  */
+# if defined(__PIE__) || defined(__PIC__)
+    /* Don't bother setting a preferred location if we're building
+       a position-independent executable.  We're more likely to get
+       an address near the main executable if we let the kernel
+       choose the address.  */
+# elif defined(__x86_64__) && defined(MAP_32BIT)
+    /* Force the memory down into low memory with the executable.
+       Leave the choice of exact location with the kernel.  */
+    flags |= MAP_32BIT;
+    /* Cannot expect to map more than 800MB in low memory.  */
+    if (code_gen_buffer_size > 800u * 1024 * 1024) {
+        code_gen_buffer_size = 800u * 1024 * 1024;
+    }
+# elif defined(__sparc__)
+    start = 0x40000000ul;
+# elif defined(__s390x__)
+    start = 0x90000000ul;
+# endif
+
+    buf = mmap((void *)start, code_gen_buffer_size,
+               PROT_WRITE | PROT_READ | PROT_EXEC, flags, -1, 0);
+    return buf == MAP_FAILED ? NULL : buf;
+}
+#else
+static inline void *alloc_code_gen_buffer(void)
+{
+    void *buf = g_malloc(code_gen_buffer_size);
+
+    if (buf) {
+        map_exec(buf, code_gen_buffer_size);
+    }
+    return buf;
+}
+#endif /* USE_STATIC_CODE_GEN_BUFFER, USE_MMAP */
+
+static inline void code_gen_alloc(size_t tb_size)
+{
+    code_gen_buffer_size = size_code_gen_buffer(tb_size);
+    code_gen_buffer = alloc_code_gen_buffer();
+    if (code_gen_buffer == NULL) {
+        fprintf(stderr, "Could not allocate dynamic translator buffer\n");
+        exit(1);
+    }
+
+    qemu_madvise(code_gen_buffer, code_gen_buffer_size, QEMU_MADV_HUGEPAGE);
+
+    /* Steal room for the prologue at the end of the buffer.  This ensures
+       (via the MAX_CODE_GEN_BUFFER_SIZE limits above) that direct branches
+       from TB's to the prologue are going to be in range.  It also means
+       that we don't need to mark (additional) portions of the data segment
+       as executable.  */
+    code_gen_prologue = code_gen_buffer + code_gen_buffer_size - 1024;
+    code_gen_buffer_size -= 1024;
+
+    code_gen_buffer_max_size = code_gen_buffer_size -
+        (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
+    code_gen_max_blocks = code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
+    tbs = g_malloc(code_gen_max_blocks * sizeof(TranslationBlock));
+}
+
+/* Must be called before using the QEMU cpus. 'tb_size' is the size
+   (in bytes) allocated to the translation buffer. Zero means default
+   size. */
+void tcg_exec_init(unsigned long tb_size)
+{
+    cpu_gen_init();
+    code_gen_alloc(tb_size);
+    code_gen_ptr = code_gen_buffer;
+    tcg_register_jit(code_gen_buffer, code_gen_buffer_size);
+    page_init();
+#if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE)
+    /* There's no guest base to take into account, so go ahead and
+       initialize the prologue now.  */
+    tcg_prologue_init(&tcg_ctx);
+#endif
+}
+
+bool tcg_enabled(void)
+{
+    return code_gen_buffer != NULL;
+}
+
+/* Allocate a new translation block. Flush the translation buffer if
+   too many translation blocks or too much generated code. */
+static TranslationBlock *tb_alloc(target_ulong pc)
+{
+    TranslationBlock *tb;
+
+    if (nb_tbs >= code_gen_max_blocks ||
+        (code_gen_ptr - code_gen_buffer) >= code_gen_buffer_max_size) {
+        return NULL;
+    }
+    tb = &tbs[nb_tbs++];
+    tb->pc = pc;
+    tb->cflags = 0;
+    return tb;
+}
+
+void tb_free(TranslationBlock *tb)
+{
+    /* In practice this is mostly used for single use temporary TB
+       Ignore the hard cases and just back up if this TB happens to
+       be the last one generated.  */
+    if (nb_tbs > 0 && tb == &tbs[nb_tbs - 1]) {
+        code_gen_ptr = tb->tc_ptr;
+        nb_tbs--;
+    }
+}
+
+static inline void invalidate_page_bitmap(PageDesc *p)
+{
+    if (p->code_bitmap) {
+        g_free(p->code_bitmap);
+        p->code_bitmap = NULL;
+    }
+    p->code_write_count = 0;
+}
+
+/* Set to NULL all the 'first_tb' fields in all PageDescs. */
+static void page_flush_tb_1(int level, void **lp)
+{
+    int i;
+
+    if (*lp == NULL) {
+        return;
+    }
+    if (level == 0) {
+        PageDesc *pd = *lp;
+
+        for (i = 0; i < L2_SIZE; ++i) {
+            pd[i].first_tb = NULL;
+            invalidate_page_bitmap(pd + i);
+        }
+    } else {
+        void **pp = *lp;
+
+        for (i = 0; i < L2_SIZE; ++i) {
+            page_flush_tb_1(level - 1, pp + i);
+        }
+    }
+}
+
+static void page_flush_tb(void)
+{
+    int i;
+
+    for (i = 0; i < V_L1_SIZE; i++) {
+        page_flush_tb_1(V_L1_SHIFT / L2_BITS - 1, l1_map + i);
+    }
+}
+
+/* flush all the translation blocks */
+/* XXX: tb_flush is currently not thread safe */
+void tb_flush(CPUArchState *env1)
+{
+    CPUArchState *env;
+
+#if defined(DEBUG_FLUSH)
+    printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
+           (unsigned long)(code_gen_ptr - code_gen_buffer),
+           nb_tbs, nb_tbs > 0 ?
+           ((unsigned long)(code_gen_ptr - code_gen_buffer)) / nb_tbs : 0);
+#endif
+    if ((unsigned long)(code_gen_ptr - code_gen_buffer)
+        > code_gen_buffer_size) {
+        cpu_abort(env1, "Internal error: code buffer overflow\n");
+    }
+    nb_tbs = 0;
+
+    for (env = first_cpu; env != NULL; env = env->next_cpu) {
+        memset(env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof(void *));
+    }
+
+    memset(tb_phys_hash, 0, CODE_GEN_PHYS_HASH_SIZE * sizeof(void *));
+    page_flush_tb();
+
+    code_gen_ptr = code_gen_buffer;
+    /* XXX: flush processor icache at this point if cache flush is
+       expensive */
+    tb_flush_count++;
+}
+
+#ifdef DEBUG_TB_CHECK
+
+static void tb_invalidate_check(target_ulong address)
+{
+    TranslationBlock *tb;
+    int i;
+
+    address &= TARGET_PAGE_MASK;
+    for (i = 0; i < CODE_GEN_PHYS_HASH_SIZE; i++) {
+        for (tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
+            if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
+                  address >= tb->pc + tb->size)) {
+                printf("ERROR invalidate: address=" TARGET_FMT_lx
+                       " PC=%08lx size=%04x\n",
+                       address, (long)tb->pc, tb->size);
+            }
+        }
+    }
+}
+
+/* verify that all the pages have correct rights for code */
+static void tb_page_check(void)
+{
+    TranslationBlock *tb;
+    int i, flags1, flags2;
+
+    for (i = 0; i < CODE_GEN_PHYS_HASH_SIZE; i++) {
+        for (tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
+            flags1 = page_get_flags(tb->pc);
+            flags2 = page_get_flags(tb->pc + tb->size - 1);
+            if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
+                printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
+                       (long)tb->pc, tb->size, flags1, flags2);
+            }
+        }
+    }
+}
+
+#endif
+
+/* invalidate one TB */
+static inline void tb_remove(TranslationBlock **ptb, TranslationBlock *tb,
+                             int next_offset)
+{
+    TranslationBlock *tb1;
+
+    for (;;) {
+        tb1 = *ptb;
+        if (tb1 == tb) {
+            *ptb = *(TranslationBlock **)((char *)tb1 + next_offset);
+            break;
+        }
+        ptb = (TranslationBlock **)((char *)tb1 + next_offset);
+    }
+}
+
+static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb)
+{
+    TranslationBlock *tb1;
+    unsigned int n1;
+
+    for (;;) {
+        tb1 = *ptb;
+        n1 = (uintptr_t)tb1 & 3;
+        tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
+        if (tb1 == tb) {
+            *ptb = tb1->page_next[n1];
+            break;
+        }
+        ptb = &tb1->page_next[n1];
+    }
+}
+
+static inline void tb_jmp_remove(TranslationBlock *tb, int n)
+{
+    TranslationBlock *tb1, **ptb;
+    unsigned int n1;
+
+    ptb = &tb->jmp_next[n];
+    tb1 = *ptb;
+    if (tb1) {
+        /* find tb(n) in circular list */
+        for (;;) {
+            tb1 = *ptb;
+            n1 = (uintptr_t)tb1 & 3;
+            tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
+            if (n1 == n && tb1 == tb) {
+                break;
+            }
+            if (n1 == 2) {
+                ptb = &tb1->jmp_first;
+            } else {
+                ptb = &tb1->jmp_next[n1];
+            }
+        }
+        /* now we can suppress tb(n) from the list */
+        *ptb = tb->jmp_next[n];
+
+        tb->jmp_next[n] = NULL;
+    }
+}
+
+/* reset the jump entry 'n' of a TB so that it is not chained to
+   another TB */
+static inline void tb_reset_jump(TranslationBlock *tb, int n)
+{
+    tb_set_jmp_target(tb, n, (uintptr_t)(tb->tc_ptr + tb->tb_next_offset[n]));
+}
+
+void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
+{
+    CPUArchState *env;
+    PageDesc *p;
+    unsigned int h, n1;
+    tb_page_addr_t phys_pc;
+    TranslationBlock *tb1, *tb2;
+
+    /* remove the TB from the hash list */
+    phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
+    h = tb_phys_hash_func(phys_pc);
+    tb_remove(&tb_phys_hash[h], tb,
+              offsetof(TranslationBlock, phys_hash_next));
+
+    /* remove the TB from the page list */
+    if (tb->page_addr[0] != page_addr) {
+        p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
+        tb_page_remove(&p->first_tb, tb);
+        invalidate_page_bitmap(p);
+    }
+    if (tb->page_addr[1] != -1 && tb->page_addr[1] != page_addr) {
+        p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
+        tb_page_remove(&p->first_tb, tb);
+        invalidate_page_bitmap(p);
+    }
+
+    tb_invalidated_flag = 1;
+
+    /* remove the TB from the hash list */
+    h = tb_jmp_cache_hash_func(tb->pc);
+    for (env = first_cpu; env != NULL; env = env->next_cpu) {
+        if (env->tb_jmp_cache[h] == tb) {
+            env->tb_jmp_cache[h] = NULL;
+        }
+    }
+
+    /* suppress this TB from the two jump lists */
+    tb_jmp_remove(tb, 0);
+    tb_jmp_remove(tb, 1);
+
+    /* suppress any remaining jumps to this TB */
+    tb1 = tb->jmp_first;
+    for (;;) {
+        n1 = (uintptr_t)tb1 & 3;
+        if (n1 == 2) {
+            break;
+        }
+        tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
+        tb2 = tb1->jmp_next[n1];
+        tb_reset_jump(tb1, n1);
+        tb1->jmp_next[n1] = NULL;
+        tb1 = tb2;
+    }
+    tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2); /* fail safe */
+
+    tb_phys_invalidate_count++;
+}
+
+static inline void set_bits(uint8_t *tab, int start, int len)
+{
+    int end, mask, end1;
+
+    end = start + len;
+    tab += start >> 3;
+    mask = 0xff << (start & 7);
+    if ((start & ~7) == (end & ~7)) {
+        if (start < end) {
+            mask &= ~(0xff << (end & 7));
+            *tab |= mask;
+        }
+    } else {
+        *tab++ |= mask;
+        start = (start + 8) & ~7;
+        end1 = end & ~7;
+        while (start < end1) {
+            *tab++ = 0xff;
+            start += 8;
+        }
+        if (start < end) {
+            mask = ~(0xff << (end & 7));
+            *tab |= mask;
+        }
+    }
+}
+
+static void build_page_bitmap(PageDesc *p)
+{
+    int n, tb_start, tb_end;
+    TranslationBlock *tb;
+
+    p->code_bitmap = g_malloc0(TARGET_PAGE_SIZE / 8);
+
+    tb = p->first_tb;
+    while (tb != NULL) {
+        n = (uintptr_t)tb & 3;
+        tb = (TranslationBlock *)((uintptr_t)tb & ~3);
+        /* NOTE: this is subtle as a TB may span two physical pages */
+        if (n == 0) {
+            /* NOTE: tb_end may be after the end of the page, but
+               it is not a problem */
+            tb_start = tb->pc & ~TARGET_PAGE_MASK;
+            tb_end = tb_start + tb->size;
+            if (tb_end > TARGET_PAGE_SIZE) {
+                tb_end = TARGET_PAGE_SIZE;
+            }
+        } else {
+            tb_start = 0;
+            tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
+        }
+        set_bits(p->code_bitmap, tb_start, tb_end - tb_start);
+        tb = tb->page_next[n];
+    }
+}
+
+TranslationBlock *tb_gen_code(CPUArchState *env,
+                              target_ulong pc, target_ulong cs_base,
+                              int flags, int cflags)
+{
+    TranslationBlock *tb;
+    uint8_t *tc_ptr;
+    tb_page_addr_t phys_pc, phys_page2;
+    target_ulong virt_page2;
+    int code_gen_size;
+
+    phys_pc = get_page_addr_code(env, pc);
+    tb = tb_alloc(pc);
+    if (!tb) {
+        /* flush must be done */
+        tb_flush(env);
+        /* cannot fail at this point */
+        tb = tb_alloc(pc);
+        /* Don't forget to invalidate previous TB info.  */
+        tb_invalidated_flag = 1;
+    }
+    tc_ptr = code_gen_ptr;
+    tb->tc_ptr = tc_ptr;
+    tb->cs_base = cs_base;
+    tb->flags = flags;
+    tb->cflags = cflags;
+    cpu_gen_code(env, tb, &code_gen_size);
+    code_gen_ptr = (void *)(((uintptr_t)code_gen_ptr + code_gen_size +
+                             CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
+
+    /* check next page if needed */
+    virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
+    phys_page2 = -1;
+    if ((pc & TARGET_PAGE_MASK) != virt_page2) {
+        phys_page2 = get_page_addr_code(env, virt_page2);
+    }
+    tb_link_page(tb, phys_pc, phys_page2);
+    return tb;
+}
+
+/*
+ * Invalidate all TBs which intersect with the target physical address range
+ * [start;end[. NOTE: start and end may refer to *different* physical pages.
+ * 'is_cpu_write_access' should be true if called from a real cpu write
+ * access: the virtual CPU will exit the current TB if code is modified inside
+ * this TB.
+ */
+void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end,
+                              int is_cpu_write_access)
+{
+    while (start < end) {
+        tb_invalidate_phys_page_range(start, end, is_cpu_write_access);
+        start &= TARGET_PAGE_MASK;
+        start += TARGET_PAGE_SIZE;
+    }
+}
+
+/*
+ * Invalidate all TBs which intersect with the target physical address range
+ * [start;end[. NOTE: start and end must refer to the *same* physical page.
+ * 'is_cpu_write_access' should be true if called from a real cpu write
+ * access: the virtual CPU will exit the current TB if code is modified inside
+ * this TB.
+ */
+void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
+                                   int is_cpu_write_access)
+{
+    TranslationBlock *tb, *tb_next, *saved_tb;
+    CPUArchState *env = cpu_single_env;
+    tb_page_addr_t tb_start, tb_end;
+    PageDesc *p;
+    int n;
+#ifdef TARGET_HAS_PRECISE_SMC
+    int current_tb_not_found = is_cpu_write_access;
+    TranslationBlock *current_tb = NULL;
+    int current_tb_modified = 0;
+    target_ulong current_pc = 0;
+    target_ulong current_cs_base = 0;
+    int current_flags = 0;
+#endif /* TARGET_HAS_PRECISE_SMC */
+
+    p = page_find(start >> TARGET_PAGE_BITS);
+    if (!p) {
+        return;
+    }
+    if (!p->code_bitmap &&
+        ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD &&
+        is_cpu_write_access) {
+        /* build code bitmap */
+        build_page_bitmap(p);
+    }
+
+    /* we remove all the TBs in the range [start, end[ */
+    /* XXX: see if in some cases it could be faster to invalidate all
+       the code */
+    tb = p->first_tb;
+    while (tb != NULL) {
+        n = (uintptr_t)tb & 3;
+        tb = (TranslationBlock *)((uintptr_t)tb & ~3);
+        tb_next = tb->page_next[n];
+        /* NOTE: this is subtle as a TB may span two physical pages */
+        if (n == 0) {
+            /* NOTE: tb_end may be after the end of the page, but
+               it is not a problem */
+            tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
+            tb_end = tb_start + tb->size;
+        } else {
+            tb_start = tb->page_addr[1];
+            tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
+        }
+        if (!(tb_end <= start || tb_start >= end)) {
+#ifdef TARGET_HAS_PRECISE_SMC
+            if (current_tb_not_found) {
+                current_tb_not_found = 0;
+                current_tb = NULL;
+                if (env->mem_io_pc) {
+                    /* now we have a real cpu fault */
+                    current_tb = tb_find_pc(env->mem_io_pc);
+                }
+            }
+            if (current_tb == tb &&
+                (current_tb->cflags & CF_COUNT_MASK) != 1) {
+                /* If we are modifying the current TB, we must stop
+                its execution. We could be more precise by checking
+                that the modification is after the current PC, but it
+                would require a specialized function to partially
+                restore the CPU state */
+
+                current_tb_modified = 1;
+                cpu_restore_state(current_tb, env, env->mem_io_pc);
+                cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
+                                     &current_flags);
+            }
+#endif /* TARGET_HAS_PRECISE_SMC */
+            /* we need to do that to handle the case where a signal
+               occurs while doing tb_phys_invalidate() */
+            saved_tb = NULL;
+            if (env) {
+                saved_tb = env->current_tb;
+                env->current_tb = NULL;
+            }
+            tb_phys_invalidate(tb, -1);
+            if (env) {
+                env->current_tb = saved_tb;
+                if (env->interrupt_request && env->current_tb) {
+                    cpu_interrupt(env, env->interrupt_request);
+                }
+            }
+        }
+        tb = tb_next;
+    }
+#if !defined(CONFIG_USER_ONLY)
+    /* if no code remaining, no need to continue to use slow writes */
+    if (!p->first_tb) {
+        invalidate_page_bitmap(p);
+        if (is_cpu_write_access) {
+            tlb_unprotect_code_phys(env, start, env->mem_io_vaddr);
+        }
+    }
+#endif
+#ifdef TARGET_HAS_PRECISE_SMC
+    if (current_tb_modified) {
+        /* we generate a block containing just the instruction
+           modifying the memory. It will ensure that it cannot modify
+           itself */
+        env->current_tb = NULL;
+        tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
+        cpu_resume_from_signal(env, NULL);
+    }
+#endif
+}
+
+/* len must be <= 8 and start must be a multiple of len */
+void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
+{
+    PageDesc *p;
+    int offset, b;
+
+#if 0
+    if (1) {
+        qemu_log("modifying code at 0x%x size=%d EIP=%x PC=%08x\n",
+                  cpu_single_env->mem_io_vaddr, len,
+                  cpu_single_env->eip,
+                  cpu_single_env->eip +
+                  (intptr_t)cpu_single_env->segs[R_CS].base);
+    }
+#endif
+    p = page_find(start >> TARGET_PAGE_BITS);
+    if (!p) {
+        return;
+    }
+    if (p->code_bitmap) {
+        offset = start & ~TARGET_PAGE_MASK;
+        b = p->code_bitmap[offset >> 3] >> (offset & 7);
+        if (b & ((1 << len) - 1)) {
+            goto do_invalidate;
+        }
+    } else {
+    do_invalidate:
+        tb_invalidate_phys_page_range(start, start + len, 1);
+    }
+}
+
+#if !defined(CONFIG_SOFTMMU)
+static void tb_invalidate_phys_page(tb_page_addr_t addr,
+                                    uintptr_t pc, void *puc)
+{
+    TranslationBlock *tb;
+    PageDesc *p;
+    int n;
+#ifdef TARGET_HAS_PRECISE_SMC
+    TranslationBlock *current_tb = NULL;
+    CPUArchState *env = cpu_single_env;
+    int current_tb_modified = 0;
+    target_ulong current_pc = 0;
+    target_ulong current_cs_base = 0;
+    int current_flags = 0;
+#endif
+
+    addr &= TARGET_PAGE_MASK;
+    p = page_find(addr >> TARGET_PAGE_BITS);
+    if (!p) {
+        return;
+    }
+    tb = p->first_tb;
+#ifdef TARGET_HAS_PRECISE_SMC
+    if (tb && pc != 0) {
+        current_tb = tb_find_pc(pc);
+    }
+#endif
+    while (tb != NULL) {
+        n = (uintptr_t)tb & 3;
+        tb = (TranslationBlock *)((uintptr_t)tb & ~3);
+#ifdef TARGET_HAS_PRECISE_SMC
+        if (current_tb == tb &&
+            (current_tb->cflags & CF_COUNT_MASK) != 1) {
+                /* If we are modifying the current TB, we must stop
+                   its execution. We could be more precise by checking
+                   that the modification is after the current PC, but it
+                   would require a specialized function to partially
+                   restore the CPU state */
+
+            current_tb_modified = 1;
+            cpu_restore_state(current_tb, env, pc);
+            cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
+                                 &current_flags);
+        }
+#endif /* TARGET_HAS_PRECISE_SMC */
+        tb_phys_invalidate(tb, addr);
+        tb = tb->page_next[n];
+    }
+    p->first_tb = NULL;
+#ifdef TARGET_HAS_PRECISE_SMC
+    if (current_tb_modified) {
+        /* we generate a block containing just the instruction
+           modifying the memory. It will ensure that it cannot modify
+           itself */
+        env->current_tb = NULL;
+        tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
+        cpu_resume_from_signal(env, puc);
+    }
+#endif
+}
+#endif
+
+/* add the tb in the target page and protect it if necessary */
+static inline void tb_alloc_page(TranslationBlock *tb,
+                                 unsigned int n, tb_page_addr_t page_addr)
+{
+    PageDesc *p;
+#ifndef CONFIG_USER_ONLY
+    bool page_already_protected;
+#endif
+
+    tb->page_addr[n] = page_addr;
+    p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1);
+    tb->page_next[n] = p->first_tb;
+#ifndef CONFIG_USER_ONLY
+    page_already_protected = p->first_tb != NULL;
+#endif
+    p->first_tb = (TranslationBlock *)((uintptr_t)tb | n);
+    invalidate_page_bitmap(p);
+
+#if defined(TARGET_HAS_SMC) || 1
+
+#if defined(CONFIG_USER_ONLY)
+    if (p->flags & PAGE_WRITE) {
+        target_ulong addr;
+        PageDesc *p2;
+        int prot;
+
+        /* force the host page as non writable (writes will have a
+           page fault + mprotect overhead) */
+        page_addr &= qemu_host_page_mask;
+        prot = 0;
+        for (addr = page_addr; addr < page_addr + qemu_host_page_size;
+            addr += TARGET_PAGE_SIZE) {
+
+            p2 = page_find(addr >> TARGET_PAGE_BITS);
+            if (!p2) {
+                continue;
+            }
+            prot |= p2->flags;
+            p2->flags &= ~PAGE_WRITE;
+          }
+        mprotect(g2h(page_addr), qemu_host_page_size,
+                 (prot & PAGE_BITS) & ~PAGE_WRITE);
+#ifdef DEBUG_TB_INVALIDATE
+        printf("protecting code page: 0x" TARGET_FMT_lx "\n",
+               page_addr);
+#endif
+    }
+#else
+    /* if some code is already present, then the pages are already
+       protected. So we handle the case where only the first TB is
+       allocated in a physical page */
+    if (!page_already_protected) {
+        tlb_protect_code(page_addr);
+    }
+#endif
+
+#endif /* TARGET_HAS_SMC */
+}
+
+/* add a new TB and link it to the physical page tables. phys_page2 is
+   (-1) to indicate that only one page contains the TB. */
+static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
+                         tb_page_addr_t phys_page2)
+{
+    unsigned int h;
+    TranslationBlock **ptb;
+
+    /* Grab the mmap lock to stop another thread invalidating this TB
+       before we are done.  */
+    mmap_lock();
+    /* add in the physical hash table */
+    h = tb_phys_hash_func(phys_pc);
+    ptb = &tb_phys_hash[h];
+    tb->phys_hash_next = *ptb;
+    *ptb = tb;
+
+    /* add in the page list */
+    tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK);
+    if (phys_page2 != -1) {
+        tb_alloc_page(tb, 1, phys_page2);
+    } else {
+        tb->page_addr[1] = -1;
+    }
+
+    tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2);
+    tb->jmp_next[0] = NULL;
+    tb->jmp_next[1] = NULL;
+
+    /* init original jump addresses */
+    if (tb->tb_next_offset[0] != 0xffff) {
+        tb_reset_jump(tb, 0);
+    }
+    if (tb->tb_next_offset[1] != 0xffff) {
+        tb_reset_jump(tb, 1);
+    }
+
+#ifdef DEBUG_TB_CHECK
+    tb_page_check();
+#endif
+    mmap_unlock();
+}
+
+#if defined(CONFIG_QEMU_LDST_OPTIMIZATION) && defined(CONFIG_SOFTMMU)
+/* check whether the given addr is in TCG generated code buffer or not */
+bool is_tcg_gen_code(uintptr_t tc_ptr)
+{
+    /* This can be called during code generation, code_gen_buffer_max_size
+       is used instead of code_gen_ptr for upper boundary checking */
+    return (tc_ptr >= (uintptr_t)code_gen_buffer &&
+            tc_ptr < (uintptr_t)(code_gen_buffer + code_gen_buffer_max_size));
+}
+#endif
+
+/* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
+   tb[1].tc_ptr. Return NULL if not found */
+TranslationBlock *tb_find_pc(uintptr_t tc_ptr)
+{
+    int m_min, m_max, m;
+    uintptr_t v;
+    TranslationBlock *tb;
+
+    if (nb_tbs <= 0) {
+        return NULL;
+    }
+    if (tc_ptr < (uintptr_t)code_gen_buffer ||
+        tc_ptr >= (uintptr_t)code_gen_ptr) {
+        return NULL;
+    }
+    /* binary search (cf Knuth) */
+    m_min = 0;
+    m_max = nb_tbs - 1;
+    while (m_min <= m_max) {
+        m = (m_min + m_max) >> 1;
+        tb = &tbs[m];
+        v = (uintptr_t)tb->tc_ptr;
+        if (v == tc_ptr) {
+            return tb;
+        } else if (tc_ptr < v) {
+            m_max = m - 1;
+        } else {
+            m_min = m + 1;
+        }
+    }
+    return &tbs[m_max];
+}
+
+static void tb_reset_jump_recursive(TranslationBlock *tb);
+
+static inline void tb_reset_jump_recursive2(TranslationBlock *tb, int n)
+{
+    TranslationBlock *tb1, *tb_next, **ptb;
+    unsigned int n1;
+
+    tb1 = tb->jmp_next[n];
+    if (tb1 != NULL) {
+        /* find head of list */
+        for (;;) {
+            n1 = (uintptr_t)tb1 & 3;
+            tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
+            if (n1 == 2) {
+                break;
+            }
+            tb1 = tb1->jmp_next[n1];
+        }
+        /* we are now sure now that tb jumps to tb1 */
+        tb_next = tb1;
+
+        /* remove tb from the jmp_first list */
+        ptb = &tb_next->jmp_first;
+        for (;;) {
+            tb1 = *ptb;
+            n1 = (uintptr_t)tb1 & 3;
+            tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
+            if (n1 == n && tb1 == tb) {
+                break;
+            }
+            ptb = &tb1->jmp_next[n1];
+        }
+        *ptb = tb->jmp_next[n];
+        tb->jmp_next[n] = NULL;
+
+        /* suppress the jump to next tb in generated code */
+        tb_reset_jump(tb, n);
+
+        /* suppress jumps in the tb on which we could have jumped */
+        tb_reset_jump_recursive(tb_next);
+    }
+}
+
+static void tb_reset_jump_recursive(TranslationBlock *tb)
+{
+    tb_reset_jump_recursive2(tb, 0);
+    tb_reset_jump_recursive2(tb, 1);
+}
+
+#if defined(TARGET_HAS_ICE) && !defined(CONFIG_USER_ONLY)
+void tb_invalidate_phys_addr(hwaddr addr)
+{
+    ram_addr_t ram_addr;
+    MemoryRegionSection *section;
+
+    section = phys_page_find(address_space_memory.dispatch,
+                             addr >> TARGET_PAGE_BITS);
+    if (!(memory_region_is_ram(section->mr)
+          || (section->mr->rom_device && section->mr->readable))) {
+        return;
+    }
+    ram_addr = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
+        + memory_region_section_addr(section, addr);
+    tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0);
+}
+#endif /* TARGET_HAS_ICE && !defined(CONFIG_USER_ONLY) */
+
+void cpu_unlink_tb(CPUArchState *env)
+{
+    /* FIXME: TB unchaining isn't SMP safe.  For now just ignore the
+       problem and hope the cpu will stop of its own accord.  For userspace
+       emulation this often isn't actually as bad as it sounds.  Often
+       signals are used primarily to interrupt blocking syscalls.  */
+    TranslationBlock *tb;
+    static spinlock_t interrupt_lock = SPIN_LOCK_UNLOCKED;
+
+    spin_lock(&interrupt_lock);
+    tb = env->current_tb;
+    /* if the cpu is currently executing code, we must unlink it and
+       all the potentially executing TB */
+    if (tb) {
+        env->current_tb = NULL;
+        tb_reset_jump_recursive(tb);
+    }
+    spin_unlock(&interrupt_lock);
+}
+
+void tb_check_watchpoint(CPUArchState *env)
+{
+    TranslationBlock *tb;
+
+    tb = tb_find_pc(env->mem_io_pc);
+    if (!tb) {
+        cpu_abort(env, "check_watchpoint: could not find TB for pc=%p",
+                  (void *)env->mem_io_pc);
+    }
+    cpu_restore_state(tb, env, env->mem_io_pc);
+    tb_phys_invalidate(tb, -1);
+}
+
+#ifndef CONFIG_USER_ONLY
+/* mask must never be zero, except for A20 change call */
+static void tcg_handle_interrupt(CPUArchState *env, int mask)
+{
+    CPUState *cpu = ENV_GET_CPU(env);
+    int old_mask;
+
+    old_mask = env->interrupt_request;
+    env->interrupt_request |= mask;
+
+    /*
+     * If called from iothread context, wake the target cpu in
+     * case its halted.
+     */
+    if (!qemu_cpu_is_self(cpu)) {
+        qemu_cpu_kick(cpu);
+        return;
+    }
+
+    if (use_icount) {
+        env->icount_decr.u16.high = 0xffff;
+        if (!can_do_io(env)
+            && (mask & ~old_mask) != 0) {
+            cpu_abort(env, "Raised interrupt while not in I/O function");
+        }
+    } else {
+        cpu_unlink_tb(env);
+    }
+}
+
+CPUInterruptHandler cpu_interrupt_handler = tcg_handle_interrupt;
+
+/* in deterministic execution mode, instructions doing device I/Os
+   must be at the end of the TB */
+void cpu_io_recompile(CPUArchState *env, uintptr_t retaddr)
+{
+    TranslationBlock *tb;
+    uint32_t n, cflags;
+    target_ulong pc, cs_base;
+    uint64_t flags;
+
+    tb = tb_find_pc(retaddr);
+    if (!tb) {
+        cpu_abort(env, "cpu_io_recompile: could not find TB for pc=%p",
+                  (void *)retaddr);
+    }
+    n = env->icount_decr.u16.low + tb->icount;
+    cpu_restore_state(tb, env, retaddr);
+    /* Calculate how many instructions had been executed before the fault
+       occurred.  */
+    n = n - env->icount_decr.u16.low;
+    /* Generate a new TB ending on the I/O insn.  */
+    n++;
+    /* On MIPS and SH, delay slot instructions can only be restarted if
+       they were already the first instruction in the TB.  If this is not
+       the first instruction in a TB then re-execute the preceding
+       branch.  */
+#if defined(TARGET_MIPS)
+    if ((env->hflags & MIPS_HFLAG_BMASK) != 0 && n > 1) {
+        env->active_tc.PC -= 4;
+        env->icount_decr.u16.low++;
+        env->hflags &= ~MIPS_HFLAG_BMASK;
+    }
+#elif defined(TARGET_SH4)
+    if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
+            && n > 1) {
+        env->pc -= 2;
+        env->icount_decr.u16.low++;
+        env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
+    }
+#endif
+    /* This should never happen.  */
+    if (n > CF_COUNT_MASK) {
+        cpu_abort(env, "TB too big during recompile");
+    }
+
+    cflags = n | CF_LAST_IO;
+    pc = tb->pc;
+    cs_base = tb->cs_base;
+    flags = tb->flags;
+    tb_phys_invalidate(tb, -1);
+    /* FIXME: In theory this could raise an exception.  In practice
+       we have already translated the block once so it's probably ok.  */
+    tb_gen_code(env, pc, cs_base, flags, cflags);
+    /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
+       the first in the TB) then we end up generating a whole new TB and
+       repeating the fault, which is horribly inefficient.
+       Better would be to execute just this insn uncached, or generate a
+       second new TB.  */
+    cpu_resume_from_signal(env, NULL);
+}
+
+void tb_flush_jmp_cache(CPUArchState *env, target_ulong addr)
+{
+    unsigned int i;
+
+    /* Discard jump cache entries for any tb which might potentially
+       overlap the flushed page.  */
+    i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE);
+    memset(&env->tb_jmp_cache[i], 0,
+           TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
+
+    i = tb_jmp_cache_hash_page(addr);
+    memset(&env->tb_jmp_cache[i], 0,
+           TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
+}
+
+void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
+{
+    int i, target_code_size, max_target_code_size;
+    int direct_jmp_count, direct_jmp2_count, cross_page;
+    TranslationBlock *tb;
+
+    target_code_size = 0;
+    max_target_code_size = 0;
+    cross_page = 0;
+    direct_jmp_count = 0;
+    direct_jmp2_count = 0;
+    for (i = 0; i < nb_tbs; i++) {
+        tb = &tbs[i];
+        target_code_size += tb->size;
+        if (tb->size > max_target_code_size) {
+            max_target_code_size = tb->size;
+        }
+        if (tb->page_addr[1] != -1) {
+            cross_page++;
+        }
+        if (tb->tb_next_offset[0] != 0xffff) {
+            direct_jmp_count++;
+            if (tb->tb_next_offset[1] != 0xffff) {
+                direct_jmp2_count++;
+            }
+        }
+    }
+    /* XXX: avoid using doubles ? */
+    cpu_fprintf(f, "Translation buffer state:\n");
+    cpu_fprintf(f, "gen code size       %td/%zd\n",
+                code_gen_ptr - code_gen_buffer, code_gen_buffer_max_size);
+    cpu_fprintf(f, "TB count            %d/%d\n",
+                nb_tbs, code_gen_max_blocks);
+    cpu_fprintf(f, "TB avg target size  %d max=%d bytes\n",
+                nb_tbs ? target_code_size / nb_tbs : 0,
+                max_target_code_size);
+    cpu_fprintf(f, "TB avg host size    %td bytes (expansion ratio: %0.1f)\n",
+                nb_tbs ? (code_gen_ptr - code_gen_buffer) / nb_tbs : 0,
+                target_code_size ? (double) (code_gen_ptr - code_gen_buffer)
+                / target_code_size : 0);
+    cpu_fprintf(f, "cross page TB count %d (%d%%)\n",
+            cross_page,
+            nb_tbs ? (cross_page * 100) / nb_tbs : 0);
+    cpu_fprintf(f, "direct jump count   %d (%d%%) (2 jumps=%d %d%%)\n",
+                direct_jmp_count,
+                nb_tbs ? (direct_jmp_count * 100) / nb_tbs : 0,
+                direct_jmp2_count,
+                nb_tbs ? (direct_jmp2_count * 100) / nb_tbs : 0);
+    cpu_fprintf(f, "\nStatistics:\n");
+    cpu_fprintf(f, "TB flush count      %d\n", tb_flush_count);
+    cpu_fprintf(f, "TB invalidate count %d\n", tb_phys_invalidate_count);
+    cpu_fprintf(f, "TLB flush count     %d\n", tlb_flush_count);
+    tcg_dump_info(f, cpu_fprintf);
+}
+
+#else /* CONFIG_USER_ONLY */
+
+void cpu_interrupt(CPUArchState *env, int mask)
+{
+    env->interrupt_request |= mask;
+    cpu_unlink_tb(env);
+}
+
+/*
+ * Walks guest process memory "regions" one by one
+ * and calls callback function 'fn' for each region.
+ */
+struct walk_memory_regions_data {
+    walk_memory_regions_fn fn;
+    void *priv;
+    uintptr_t start;
+    int prot;
+};
+
+static int walk_memory_regions_end(struct walk_memory_regions_data *data,
+                                   abi_ulong end, int new_prot)
+{
+    if (data->start != -1ul) {
+        int rc = data->fn(data->priv, data->start, end, data->prot);
+        if (rc != 0) {
+            return rc;
+        }
+    }
+
+    data->start = (new_prot ? end : -1ul);
+    data->prot = new_prot;
+
+    return 0;
+}
+
+static int walk_memory_regions_1(struct walk_memory_regions_data *data,
+                                 abi_ulong base, int level, void **lp)
+{
+    abi_ulong pa;
+    int i, rc;
+
+    if (*lp == NULL) {
+        return walk_memory_regions_end(data, base, 0);
+    }
+
+    if (level == 0) {
+        PageDesc *pd = *lp;
+
+        for (i = 0; i < L2_SIZE; ++i) {
+            int prot = pd[i].flags;
+
+            pa = base | (i << TARGET_PAGE_BITS);
+            if (prot != data->prot) {
+                rc = walk_memory_regions_end(data, pa, prot);
+                if (rc != 0) {
+                    return rc;
+                }
+            }
+        }
+    } else {
+        void **pp = *lp;
+
+        for (i = 0; i < L2_SIZE; ++i) {
+            pa = base | ((abi_ulong)i <<
+                (TARGET_PAGE_BITS + L2_BITS * level));
+            rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
+            if (rc != 0) {
+                return rc;
+            }
+        }
+    }
+
+    return 0;
+}
+
+int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
+{
+    struct walk_memory_regions_data data;
+    uintptr_t i;
+
+    data.fn = fn;
+    data.priv = priv;
+    data.start = -1ul;
+    data.prot = 0;
+
+    for (i = 0; i < V_L1_SIZE; i++) {
+        int rc = walk_memory_regions_1(&data, (abi_ulong)i << V_L1_SHIFT,
+                                       V_L1_SHIFT / L2_BITS - 1, l1_map + i);
+
+        if (rc != 0) {
+            return rc;
+        }
+    }
+
+    return walk_memory_regions_end(&data, 0, 0);
+}
+
+static int dump_region(void *priv, abi_ulong start,
+    abi_ulong end, unsigned long prot)
+{
+    FILE *f = (FILE *)priv;
+
+    (void) fprintf(f, TARGET_ABI_FMT_lx"-"TARGET_ABI_FMT_lx
+        " "TARGET_ABI_FMT_lx" %c%c%c\n",
+        start, end, end - start,
+        ((prot & PAGE_READ) ? 'r' : '-'),
+        ((prot & PAGE_WRITE) ? 'w' : '-'),
+        ((prot & PAGE_EXEC) ? 'x' : '-'));
+
+    return 0;
+}
+
+/* dump memory mappings */
+void page_dump(FILE *f)
+{
+    (void) fprintf(f, "%-8s %-8s %-8s %s\n",
+            "start", "end", "size", "prot");
+    walk_memory_regions(f, dump_region);
+}
+
+int page_get_flags(target_ulong address)
+{
+    PageDesc *p;
+
+    p = page_find(address >> TARGET_PAGE_BITS);
+    if (!p) {
+        return 0;
+    }
+    return p->flags;
+}
+
+/* Modify the flags of a page and invalidate the code if necessary.
+   The flag PAGE_WRITE_ORG is positioned automatically depending
+   on PAGE_WRITE.  The mmap_lock should already be held.  */
+void page_set_flags(target_ulong start, target_ulong end, int flags)
+{
+    target_ulong addr, len;
+
+    /* This function should never be called with addresses outside the
+       guest address space.  If this assert fires, it probably indicates
+       a missing call to h2g_valid.  */
+#if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
+    assert(end < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
+#endif
+    assert(start < end);
+
+    start = start & TARGET_PAGE_MASK;
+    end = TARGET_PAGE_ALIGN(end);
+
+    if (flags & PAGE_WRITE) {
+        flags |= PAGE_WRITE_ORG;
+    }
+
+    for (addr = start, len = end - start;
+         len != 0;
+         len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
+        PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
+
+        /* If the write protection bit is set, then we invalidate
+           the code inside.  */
+        if (!(p->flags & PAGE_WRITE) &&
+            (flags & PAGE_WRITE) &&
+            p->first_tb) {
+            tb_invalidate_phys_page(addr, 0, NULL);
+        }
+        p->flags = flags;
+    }
+}
+
+int page_check_range(target_ulong start, target_ulong len, int flags)
+{
+    PageDesc *p;
+    target_ulong end;
+    target_ulong addr;
+
+    /* This function should never be called with addresses outside the
+       guest address space.  If this assert fires, it probably indicates
+       a missing call to h2g_valid.  */
+#if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
+    assert(start < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
+#endif
+
+    if (len == 0) {
+        return 0;
+    }
+    if (start + len - 1 < start) {
+        /* We've wrapped around.  */
+        return -1;
+    }
+
+    /* must do before we loose bits in the next step */
+    end = TARGET_PAGE_ALIGN(start + len);
+    start = start & TARGET_PAGE_MASK;
+
+    for (addr = start, len = end - start;
+         len != 0;
+         len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
+        p = page_find(addr >> TARGET_PAGE_BITS);
+        if (!p) {
+            return -1;
+        }
+        if (!(p->flags & PAGE_VALID)) {
+            return -1;
+        }
+
+        if ((flags & PAGE_READ) && !(p->flags & PAGE_READ)) {
+            return -1;
+        }
+        if (flags & PAGE_WRITE) {
+            if (!(p->flags & PAGE_WRITE_ORG)) {
+                return -1;
+            }
+            /* unprotect the page if it was put read-only because it
+               contains translated code */
+            if (!(p->flags & PAGE_WRITE)) {
+                if (!page_unprotect(addr, 0, NULL)) {
+                    return -1;
+                }
+            }
+            return 0;
+        }
+    }
+    return 0;
+}
+
+/* called from signal handler: invalidate the code and unprotect the
+   page. Return TRUE if the fault was successfully handled. */
+int page_unprotect(target_ulong address, uintptr_t pc, void *puc)
+{
+    unsigned int prot;
+    PageDesc *p;
+    target_ulong host_start, host_end, addr;
+
+    /* Technically this isn't safe inside a signal handler.  However we
+       know this only ever happens in a synchronous SEGV handler, so in
+       practice it seems to be ok.  */
+    mmap_lock();
+
+    p = page_find(address >> TARGET_PAGE_BITS);
+    if (!p) {
+        mmap_unlock();
+        return 0;
+    }
+
+    /* if the page was really writable, then we change its
+       protection back to writable */
+    if ((p->flags & PAGE_WRITE_ORG) && !(p->flags & PAGE_WRITE)) {
+        host_start = address & qemu_host_page_mask;
+        host_end = host_start + qemu_host_page_size;
+
+        prot = 0;
+        for (addr = host_start ; addr < host_end ; addr += TARGET_PAGE_SIZE) {
+            p = page_find(addr >> TARGET_PAGE_BITS);
+            p->flags |= PAGE_WRITE;
+            prot |= p->flags;
+
+            /* and since the content will be modified, we must invalidate
+               the corresponding translated code. */
+            tb_invalidate_phys_page(addr, pc, puc);
+#ifdef DEBUG_TB_CHECK
+            tb_invalidate_check(addr);
+#endif
+        }
+        mprotect((void *)g2h(host_start), qemu_host_page_size,
+                 prot & PAGE_BITS);
+
+        mmap_unlock();
+        return 1;
+    }
+    mmap_unlock();
+    return 0;
+}
+#endif /* CONFIG_USER_ONLY */
diff --git a/translate-all.h b/translate-all.h
new file mode 100644
index 0000000..b181fb4
--- /dev/null
+++ b/translate-all.h
@@ -0,0 +1,34 @@
+/*
+ *  Translated block handling
+ *
+ *  Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef TRANSLATE_ALL_H
+#define TRANSLATE_ALL_H
+
+/* Size of the L2 (and L3, etc) page tables.  */
+#define L2_BITS 10
+#define L2_SIZE (1 << L2_BITS)
+
+#define P_L2_LEVELS \
+    (((TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / L2_BITS) + 1)
+
+/* translate-all.c */
+void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len);
+void cpu_unlink_tb(CPUArchState *env);
+void tb_check_watchpoint(CPUArchState *env);
+
+#endif /* TRANSLATE_ALL_H */
commit 5a3165263ae6782a7fe712a0a0c29c17468b9b68
Author: Blue Swirl <blauwirbel at gmail.com>
Date:   Sun Dec 2 21:28:09 2012 +0000

    exec: extract TB watchpoint check
    
    Will be moved by the next patch.
    
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/exec.c b/exec.c
index 2e56a8a..a1f617d 100644
--- a/exec.c
+++ b/exec.c
@@ -2987,12 +2987,24 @@ static const MemoryRegionOps notdirty_mem_ops = {
     .endianness = DEVICE_NATIVE_ENDIAN,
 };
 
+static void tb_check_watchpoint(CPUArchState *env)
+{
+    TranslationBlock *tb;
+
+    tb = tb_find_pc(env->mem_io_pc);
+    if (!tb) {
+        cpu_abort(env, "check_watchpoint: could not find TB for pc=%p",
+                  (void *)env->mem_io_pc);
+    }
+    cpu_restore_state(tb, env, env->mem_io_pc);
+    tb_phys_invalidate(tb, -1);
+}
+
 /* Generate a debug exception if a watchpoint has been hit.  */
 static void check_watchpoint(int offset, int len_mask, int flags)
 {
     CPUArchState *env = cpu_single_env;
     target_ulong pc, cs_base;
-    TranslationBlock *tb;
     target_ulong vaddr;
     CPUWatchpoint *wp;
     int cpu_flags;
@@ -3011,13 +3023,7 @@ static void check_watchpoint(int offset, int len_mask, int flags)
             wp->flags |= BP_WATCHPOINT_HIT;
             if (!env->watchpoint_hit) {
                 env->watchpoint_hit = wp;
-                tb = tb_find_pc(env->mem_io_pc);
-                if (!tb) {
-                    cpu_abort(env, "check_watchpoint: could not find TB for "
-                              "pc=%p", (void *)env->mem_io_pc);
-                }
-                cpu_restore_state(tb, env, env->mem_io_pc);
-                tb_phys_invalidate(tb, -1);
+                tb_check_watchpoint(env);
                 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
                     env->exception_index = EXCP_DEBUG;
                     cpu_loop_exit(env);
commit 44209fc4edfd92464eb0413acfd434b687be945a
Author: Blue Swirl <blauwirbel at gmail.com>
Date:   Sun Dec 2 17:25:06 2012 +0000

    exec: fix coding style
    
    Fix coding style in areas to be moved by later patches.
    
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/exec.c b/exec.c
index 0594b07..2e56a8a 100644
--- a/exec.c
+++ b/exec.c
@@ -79,6 +79,7 @@
 
 #define SMC_BITMAP_USE_THRESHOLD 10
 
+/* Code generation and translation blocks */
 static TranslationBlock *tbs;
 static int code_gen_max_blocks;
 TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
@@ -207,21 +208,20 @@ static inline void map_exec(void *addr, long size)
     DWORD old_protect;
     VirtualProtect(addr, size,
                    PAGE_EXECUTE_READWRITE, &old_protect);
-    
 }
 #else
 static inline void map_exec(void *addr, long size)
 {
     unsigned long start, end, page_size;
-    
+
     page_size = getpagesize();
     start = (unsigned long)addr;
     start &= ~(page_size - 1);
-    
+
     end = (unsigned long)addr + size;
     end += page_size - 1;
     end &= ~(page_size - 1);
-    
+
     mprotect((void *)start, end - start,
              PROT_READ | PROT_WRITE | PROT_EXEC);
 }
@@ -241,10 +241,12 @@ static void page_init(void)
 #else
     qemu_real_host_page_size = getpagesize();
 #endif
-    if (qemu_host_page_size == 0)
+    if (qemu_host_page_size == 0) {
         qemu_host_page_size = qemu_real_host_page_size;
-    if (qemu_host_page_size < TARGET_PAGE_SIZE)
+    }
+    if (qemu_host_page_size < TARGET_PAGE_SIZE) {
         qemu_host_page_size = TARGET_PAGE_SIZE;
+    }
     qemu_host_page_mask = ~(qemu_host_page_size - 1);
 
 #if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
@@ -291,7 +293,7 @@ static void page_init(void)
                 unsigned long startaddr, endaddr;
                 int n;
 
-                n = fscanf (f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
+                n = fscanf(f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
 
                 if (n == 2 && h2g_valid(startaddr)) {
                     startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
@@ -591,6 +593,7 @@ static inline void *alloc_code_gen_buffer(void)
 static inline void *alloc_code_gen_buffer(void)
 {
     void *buf = g_malloc(code_gen_buffer_size);
+
     if (buf) {
         map_exec(buf, code_gen_buffer_size);
     }
@@ -737,8 +740,9 @@ static TranslationBlock *tb_alloc(target_ulong pc)
     TranslationBlock *tb;
 
     if (nb_tbs >= code_gen_max_blocks ||
-        (code_gen_ptr - code_gen_buffer) >= code_gen_buffer_max_size)
+        (code_gen_ptr - code_gen_buffer) >= code_gen_buffer_max_size) {
         return NULL;
+    }
     tb = &tbs[nb_tbs++];
     tb->pc = pc;
     tb->cflags = 0;
@@ -766,8 +770,7 @@ static inline void invalidate_page_bitmap(PageDesc *p)
 }
 
 /* Set to NULL all the 'first_tb' fields in all PageDescs. */
-
-static void page_flush_tb_1 (int level, void **lp)
+static void page_flush_tb_1(int level, void **lp)
 {
     int i;
 
@@ -776,14 +779,16 @@ static void page_flush_tb_1 (int level, void **lp)
     }
     if (level == 0) {
         PageDesc *pd = *lp;
+
         for (i = 0; i < L2_SIZE; ++i) {
             pd[i].first_tb = NULL;
             invalidate_page_bitmap(pd + i);
         }
     } else {
         void **pp = *lp;
+
         for (i = 0; i < L2_SIZE; ++i) {
-            page_flush_tb_1 (level - 1, pp + i);
+            page_flush_tb_1(level - 1, pp + i);
         }
     }
 }
@@ -791,6 +796,7 @@ static void page_flush_tb_1 (int level, void **lp)
 static void page_flush_tb(void)
 {
     int i;
+
     for (i = 0; i < V_L1_SIZE; i++) {
         page_flush_tb_1(V_L1_SHIFT / L2_BITS - 1, l1_map + i);
     }
@@ -801,22 +807,24 @@ static void page_flush_tb(void)
 void tb_flush(CPUArchState *env1)
 {
     CPUArchState *env;
+
 #if defined(DEBUG_FLUSH)
     printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
            (unsigned long)(code_gen_ptr - code_gen_buffer),
            nb_tbs, nb_tbs > 0 ?
            ((unsigned long)(code_gen_ptr - code_gen_buffer)) / nb_tbs : 0);
 #endif
-    if ((unsigned long)(code_gen_ptr - code_gen_buffer) > code_gen_buffer_size)
+    if ((unsigned long)(code_gen_ptr - code_gen_buffer)
+        > code_gen_buffer_size) {
         cpu_abort(env1, "Internal error: code buffer overflow\n");
-
+    }
     nb_tbs = 0;
 
-    for(env = first_cpu; env != NULL; env = env->next_cpu) {
-        memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
+    for (env = first_cpu; env != NULL; env = env->next_cpu) {
+        memset(env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof(void *));
     }
 
-    memset (tb_phys_hash, 0, CODE_GEN_PHYS_HASH_SIZE * sizeof (void *));
+    memset(tb_phys_hash, 0, CODE_GEN_PHYS_HASH_SIZE * sizeof(void *));
     page_flush_tb();
 
     code_gen_ptr = code_gen_buffer;
@@ -831,9 +839,10 @@ static void tb_invalidate_check(target_ulong address)
 {
     TranslationBlock *tb;
     int i;
+
     address &= TARGET_PAGE_MASK;
-    for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
-        for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
+    for (i = 0; i < CODE_GEN_PHYS_HASH_SIZE; i++) {
+        for (tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
             if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
                   address >= tb->pc + tb->size)) {
                 printf("ERROR invalidate: address=" TARGET_FMT_lx
@@ -850,8 +859,8 @@ static void tb_page_check(void)
     TranslationBlock *tb;
     int i, flags1, flags2;
 
-    for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
-        for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
+    for (i = 0; i < CODE_GEN_PHYS_HASH_SIZE; i++) {
+        for (tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
             flags1 = page_get_flags(tb->pc);
             flags2 = page_get_flags(tb->pc + tb->size - 1);
             if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
@@ -864,12 +873,14 @@ static void tb_page_check(void)
 
 #endif
 
+
 /* invalidate one TB */
 static inline void tb_remove(TranslationBlock **ptb, TranslationBlock *tb,
                              int next_offset)
 {
     TranslationBlock *tb1;
-    for(;;) {
+
+    for (;;) {
         tb1 = *ptb;
         if (tb1 == tb) {
             *ptb = *(TranslationBlock **)((char *)tb1 + next_offset);
@@ -884,7 +895,7 @@ static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb)
     TranslationBlock *tb1;
     unsigned int n1;
 
-    for(;;) {
+    for (;;) {
         tb1 = *ptb;
         n1 = (uintptr_t)tb1 & 3;
         tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
@@ -905,12 +916,13 @@ static inline void tb_jmp_remove(TranslationBlock *tb, int n)
     tb1 = *ptb;
     if (tb1) {
         /* find tb(n) in circular list */
-        for(;;) {
+        for (;;) {
             tb1 = *ptb;
             n1 = (uintptr_t)tb1 & 3;
             tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
-            if (n1 == n && tb1 == tb)
+            if (n1 == n && tb1 == tb) {
                 break;
+            }
             if (n1 == 2) {
                 ptb = &tb1->jmp_first;
             } else {
@@ -961,9 +973,10 @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
 
     /* remove the TB from the hash list */
     h = tb_jmp_cache_hash_func(tb->pc);
-    for(env = first_cpu; env != NULL; env = env->next_cpu) {
-        if (env->tb_jmp_cache[h] == tb)
+    for (env = first_cpu; env != NULL; env = env->next_cpu) {
+        if (env->tb_jmp_cache[h] == tb) {
             env->tb_jmp_cache[h] = NULL;
+        }
     }
 
     /* suppress this TB from the two jump lists */
@@ -972,10 +985,11 @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
 
     /* suppress any remaining jumps to this TB */
     tb1 = tb->jmp_first;
-    for(;;) {
+    for (;;) {
         n1 = (uintptr_t)tb1 & 3;
-        if (n1 == 2)
+        if (n1 == 2) {
             break;
+        }
         tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
         tb2 = tb1->jmp_next[n1];
         tb_reset_jump(tb1, n1);
@@ -1031,8 +1045,9 @@ static void build_page_bitmap(PageDesc *p)
                it is not a problem */
             tb_start = tb->pc & ~TARGET_PAGE_MASK;
             tb_end = tb_start + tb->size;
-            if (tb_end > TARGET_PAGE_SIZE)
+            if (tb_end > TARGET_PAGE_SIZE) {
                 tb_end = TARGET_PAGE_SIZE;
+            }
         } else {
             tb_start = 0;
             tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
@@ -1123,8 +1138,9 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
 #endif /* TARGET_HAS_PRECISE_SMC */
 
     p = page_find(start >> TARGET_PAGE_BITS);
-    if (!p)
+    if (!p) {
         return;
+    }
     if (!p->code_bitmap &&
         ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD &&
         is_cpu_write_access) {
@@ -1133,7 +1149,8 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
     }
 
     /* we remove all the TBs in the range [start, end[ */
-    /* XXX: see if in some cases it could be faster to invalidate all the code */
+    /* XXX: see if in some cases it could be faster to invalidate all
+       the code */
     tb = p->first_tb;
     while (tb != NULL) {
         n = (uintptr_t)tb & 3;
@@ -1183,8 +1200,9 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
             tb_phys_invalidate(tb, -1);
             if (env) {
                 env->current_tb = saved_tb;
-                if (env->interrupt_request && env->current_tb)
+                if (env->interrupt_request && env->current_tb) {
                     cpu_interrupt(env, env->interrupt_request);
+                }
             }
         }
         tb = tb_next;
@@ -1215,6 +1233,7 @@ static inline void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
 {
     PageDesc *p;
     int offset, b;
+
 #if 0
     if (1) {
         qemu_log("modifying code at 0x%x size=%d EIP=%x PC=%08x\n",
@@ -1225,13 +1244,15 @@ static inline void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
     }
 #endif
     p = page_find(start >> TARGET_PAGE_BITS);
-    if (!p)
+    if (!p) {
         return;
+    }
     if (p->code_bitmap) {
         offset = start & ~TARGET_PAGE_MASK;
         b = p->code_bitmap[offset >> 3] >> (offset & 7);
-        if (b & ((1 << len) - 1))
+        if (b & ((1 << len) - 1)) {
             goto do_invalidate;
+        }
     } else {
     do_invalidate:
         tb_invalidate_phys_page_range(start, start + len, 1);
@@ -1256,8 +1277,9 @@ static void tb_invalidate_phys_page(tb_page_addr_t addr,
 
     addr &= TARGET_PAGE_MASK;
     p = page_find(addr >> TARGET_PAGE_BITS);
-    if (!p)
+    if (!p) {
         return;
+    }
     tb = p->first_tb;
 #ifdef TARGET_HAS_PRECISE_SMC
     if (tb && pc != 0) {
@@ -1329,12 +1351,13 @@ static inline void tb_alloc_page(TranslationBlock *tb,
            page fault + mprotect overhead) */
         page_addr &= qemu_host_page_mask;
         prot = 0;
-        for(addr = page_addr; addr < page_addr + qemu_host_page_size;
+        for (addr = page_addr; addr < page_addr + qemu_host_page_size;
             addr += TARGET_PAGE_SIZE) {
 
-            p2 = page_find (addr >> TARGET_PAGE_BITS);
-            if (!p2)
+            p2 = page_find(addr >> TARGET_PAGE_BITS);
+            if (!p2) {
                 continue;
+            }
             prot |= p2->flags;
             p2->flags &= ~PAGE_WRITE;
           }
@@ -1376,20 +1399,23 @@ static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
 
     /* add in the page list */
     tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK);
-    if (phys_page2 != -1)
+    if (phys_page2 != -1) {
         tb_alloc_page(tb, 1, phys_page2);
-    else
+    } else {
         tb->page_addr[1] = -1;
+    }
 
     tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2);
     tb->jmp_next[0] = NULL;
     tb->jmp_next[1] = NULL;
 
     /* init original jump addresses */
-    if (tb->tb_next_offset[0] != 0xffff)
+    if (tb->tb_next_offset[0] != 0xffff) {
         tb_reset_jump(tb, 0);
-    if (tb->tb_next_offset[1] != 0xffff)
+    }
+    if (tb->tb_next_offset[1] != 0xffff) {
         tb_reset_jump(tb, 1);
+    }
 
 #ifdef DEBUG_TB_CHECK
     tb_page_check();
@@ -1416,8 +1442,9 @@ TranslationBlock *tb_find_pc(uintptr_t tc_ptr)
     uintptr_t v;
     TranslationBlock *tb;
 
-    if (nb_tbs <= 0)
+    if (nb_tbs <= 0) {
         return NULL;
+    }
     if (tc_ptr < (uintptr_t)code_gen_buffer ||
         tc_ptr >= (uintptr_t)code_gen_ptr) {
         return NULL;
@@ -1429,9 +1456,9 @@ TranslationBlock *tb_find_pc(uintptr_t tc_ptr)
         m = (m_min + m_max) >> 1;
         tb = &tbs[m];
         v = (uintptr_t)tb->tc_ptr;
-        if (v == tc_ptr)
+        if (v == tc_ptr) {
             return tb;
-        else if (tc_ptr < v) {
+        } else if (tc_ptr < v) {
             m_max = m - 1;
         } else {
             m_min = m + 1;
@@ -1450,11 +1477,12 @@ static inline void tb_reset_jump_recursive2(TranslationBlock *tb, int n)
     tb1 = tb->jmp_next[n];
     if (tb1 != NULL) {
         /* find head of list */
-        for(;;) {
+        for (;;) {
             n1 = (uintptr_t)tb1 & 3;
             tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
-            if (n1 == 2)
+            if (n1 == 2) {
                 break;
+            }
             tb1 = tb1->jmp_next[n1];
         }
         /* we are now sure now that tb jumps to tb1 */
@@ -1462,12 +1490,13 @@ static inline void tb_reset_jump_recursive2(TranslationBlock *tb, int n)
 
         /* remove tb from the jmp_first list */
         ptb = &tb_next->jmp_first;
-        for(;;) {
+        for (;;) {
             tb1 = *ptb;
             n1 = (uintptr_t)tb1 & 3;
             tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
-            if (n1 == n && tb1 == tb)
+            if (n1 == n && tb1 == tb) {
                 break;
+            }
             ptb = &tb1->jmp_next[n1];
         }
         *ptb = tb->jmp_next[n];
@@ -1499,7 +1528,8 @@ void tb_invalidate_phys_addr(hwaddr addr)
     ram_addr_t ram_addr;
     MemoryRegionSection *section;
 
-    section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
+    section = phys_page_find(address_space_memory.dispatch,
+                             addr >> TARGET_PAGE_BITS);
     if (!(memory_region_is_ram(section->mr)
           || (section->mr->rom_device && section->mr->readable))) {
         return;
@@ -1939,9 +1969,7 @@ hwaddr memory_region_section_get_iotlb(CPUArchState *env,
  * Walks guest process memory "regions" one by one
  * and calls callback function 'fn' for each region.
  */
-
-struct walk_memory_regions_data
-{
+struct walk_memory_regions_data {
     walk_memory_regions_fn fn;
     void *priv;
     uintptr_t start;
@@ -1976,6 +2004,7 @@ static int walk_memory_regions_1(struct walk_memory_regions_data *data,
 
     if (level == 0) {
         PageDesc *pd = *lp;
+
         for (i = 0; i < L2_SIZE; ++i) {
             int prot = pd[i].flags;
 
@@ -1989,6 +2018,7 @@ static int walk_memory_regions_1(struct walk_memory_regions_data *data,
         }
     } else {
         void **pp = *lp;
+
         for (i = 0; i < L2_SIZE; ++i) {
             pa = base | ((abi_ulong)i <<
                 (TARGET_PAGE_BITS + L2_BITS * level));
@@ -2015,6 +2045,7 @@ int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
     for (i = 0; i < V_L1_SIZE; i++) {
         int rc = walk_memory_regions_1(&data, (abi_ulong)i << V_L1_SHIFT,
                                        V_L1_SHIFT / L2_BITS - 1, l1_map + i);
+
         if (rc != 0) {
             return rc;
         }
@@ -2035,7 +2066,7 @@ static int dump_region(void *priv, abi_ulong start,
         ((prot & PAGE_WRITE) ? 'w' : '-'),
         ((prot & PAGE_EXEC) ? 'x' : '-'));
 
-    return (0);
+    return 0;
 }
 
 /* dump memory mappings */
@@ -2051,8 +2082,9 @@ int page_get_flags(target_ulong address)
     PageDesc *p;
 
     p = page_find(address >> TARGET_PAGE_BITS);
-    if (!p)
+    if (!p) {
         return 0;
+    }
     return p->flags;
 }
 
@@ -2115,28 +2147,34 @@ int page_check_range(target_ulong start, target_ulong len, int flags)
         return -1;
     }
 
-    end = TARGET_PAGE_ALIGN(start+len); /* must do before we loose bits in the next step */
+    /* must do before we loose bits in the next step */
+    end = TARGET_PAGE_ALIGN(start + len);
     start = start & TARGET_PAGE_MASK;
 
     for (addr = start, len = end - start;
          len != 0;
          len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
         p = page_find(addr >> TARGET_PAGE_BITS);
-        if( !p )
+        if (!p) {
             return -1;
-        if( !(p->flags & PAGE_VALID) )
+        }
+        if (!(p->flags & PAGE_VALID)) {
             return -1;
+        }
 
-        if ((flags & PAGE_READ) && !(p->flags & PAGE_READ))
+        if ((flags & PAGE_READ) && !(p->flags & PAGE_READ)) {
             return -1;
+        }
         if (flags & PAGE_WRITE) {
-            if (!(p->flags & PAGE_WRITE_ORG))
+            if (!(p->flags & PAGE_WRITE_ORG)) {
                 return -1;
+            }
             /* unprotect the page if it was put read-only because it
                contains translated code */
             if (!(p->flags & PAGE_WRITE)) {
-                if (!page_unprotect(addr, 0, NULL))
+                if (!page_unprotect(addr, 0, NULL)) {
                     return -1;
+                }
             }
             return 0;
         }
@@ -4101,7 +4139,7 @@ void cpu_io_recompile(CPUArchState *env, uintptr_t retaddr)
 
     tb = tb_find_pc(retaddr);
     if (!tb) {
-        cpu_abort(env, "cpu_io_recompile: could not find TB for pc=%p", 
+        cpu_abort(env, "cpu_io_recompile: could not find TB for pc=%p",
                   (void *)retaddr);
     }
     n = env->icount_decr.u16.low + tb->icount;
@@ -4130,8 +4168,9 @@ void cpu_io_recompile(CPUArchState *env, uintptr_t retaddr)
     }
 #endif
     /* This should never happen.  */
-    if (n > CF_COUNT_MASK)
+    if (n > CF_COUNT_MASK) {
         cpu_abort(env, "TB too big during recompile");
+    }
 
     cflags = n | CF_LAST_IO;
     pc = tb->pc;
@@ -4162,13 +4201,15 @@ void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
     cross_page = 0;
     direct_jmp_count = 0;
     direct_jmp2_count = 0;
-    for(i = 0; i < nb_tbs; i++) {
+    for (i = 0; i < nb_tbs; i++) {
         tb = &tbs[i];
         target_code_size += tb->size;
-        if (tb->size > max_target_code_size)
+        if (tb->size > max_target_code_size) {
             max_target_code_size = tb->size;
-        if (tb->page_addr[1] != -1)
+        }
+        if (tb->page_addr[1] != -1) {
             cross_page++;
+        }
         if (tb->tb_next_offset[0] != 0xffff) {
             direct_jmp_count++;
             if (tb->tb_next_offset[1] != 0xffff) {
@@ -4180,14 +4221,15 @@ void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
     cpu_fprintf(f, "Translation buffer state:\n");
     cpu_fprintf(f, "gen code size       %td/%zd\n",
                 code_gen_ptr - code_gen_buffer, code_gen_buffer_max_size);
-    cpu_fprintf(f, "TB count            %d/%d\n", 
+    cpu_fprintf(f, "TB count            %d/%d\n",
                 nb_tbs, code_gen_max_blocks);
     cpu_fprintf(f, "TB avg target size  %d max=%d bytes\n",
                 nb_tbs ? target_code_size / nb_tbs : 0,
                 max_target_code_size);
     cpu_fprintf(f, "TB avg host size    %td bytes (expansion ratio: %0.1f)\n",
                 nb_tbs ? (code_gen_ptr - code_gen_buffer) / nb_tbs : 0,
-                target_code_size ? (double) (code_gen_ptr - code_gen_buffer) / target_code_size : 0);
+                target_code_size ? (double) (code_gen_ptr - code_gen_buffer)
+                / target_code_size : 0);
     cpu_fprintf(f, "cross page TB count %d (%d%%)\n",
             cross_page,
             nb_tbs ? (cross_page * 100) / nb_tbs : 0);
commit 6d4e18925a65306afabaad7b63a4588c18919e99
Merge: 659f807 a64ae61
Author: Blue Swirl <blauwirbel at gmail.com>
Date:   Sat Dec 15 09:05:26 2012 +0000

    Merge branch 'ppc-for-upstream' of git://repo.or.cz/qemu/agraf
    
    * 'ppc-for-upstream' of git://repo.or.cz/qemu/agraf: (40 commits)
      pseries: Increase default NVRAM size
      target-ppc: Don't use hwaddr to represent hardware state
      PPC: e500: pci: Export slot2irq calculation
      PPC: E500plat: Make a lot of PCI slots available
      PPC: E500: Move PCI slot information into params
      PPC: E500: Generate dt pci irq map dynamically
      PPC: E500: PCI: Make IRQ calculation more generic
      PPC: E500: PCI: Make first slot qdev settable
      openpic: Accelerate pending irq search
      openpic: fix minor coding style issues
      MSI-X: Fix endianness
      PPC: e500: Declare pci bridge as bridge
      PPC: e500: Add MSI support
      openpic: add Shared MSI support
      openpic: make brr1 model specific
      openpic: convert to qdev
      openpic: remove irq_out
      openpic: rename openpic_t to OpenPICState
      openpic: convert simple reg operations to builtin bitops
      openpic: remove unused type variable
      ...

commit 659f807c0a700317a7a0fae7a6e6ebfe68bfbbc4
Author: Max Filippov <jcmvbkbc at gmail.com>
Date:   Thu Dec 13 04:13:41 2012 +0400

    target-xtensa: fix ITLB/DTLB page protection flags
    
    With MMU option xtensa architecture has two TLBs: ITLB and DTLB. ITLB is
    only used for code access, DTLB is only for data. However TLB entries in
    both TLBs have attribute field controlling write and exec access. These
    bits need to be properly masked off depending on TLB type before being
    used as tlb_set_page prot argument. Otherwise the following happens:
    
    (1) ITLB entry for some PFN gets invalidated
    (2) DTLB entry for the same PFN gets updated, attributes allow code
        execution
    (3) code at the page with that PFN is executed (possible due to step 2),
        entry for the TB is written into the jump cache
    (4) QEMU TLB entry for the PFN gets replaced with an entry for some
        other PFN
    (5) code in the TB from step 3 is executed (possible due to jump cache)
        and it accesses data, for which there's no DTLB entry, causing DTLB
        miss exception
    (6) re-translation of the TB from step 5 is attempted, but there's no
        QEMU TLB entry nor xtensa ITLB entry for that PFN, which causes ITLB
        miss exception at the TB start address
    (7) ITLB miss exception is handled by the guest, but execution is
        resumed from the beginning of the faulting TB (the point where ITLB
        miss occured), not from the point where DTLB miss occured, which is
        wrong.
    
    With that fix the above scenario causes ITLB miss exception (that used
    to be step 7) at step 3, right at the beginning of the TB.
    
    Signed-off-by: Max Filippov <jcmvbkbc at gmail.com>
    Cc: qemu-stable at nongnu.org
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/target-xtensa/helper.c b/target-xtensa/helper.c
index 200fb43..bf05575 100644
--- a/target-xtensa/helper.c
+++ b/target-xtensa/helper.c
@@ -522,7 +522,8 @@ static int get_physical_addr_mmu(CPUXtensaState *env, bool update_tlb,
             INST_FETCH_PRIVILEGE_CAUSE;
     }
 
-    *access = mmu_attr_to_access(entry->attr);
+    *access = mmu_attr_to_access(entry->attr) &
+        ~(dtlb ? PAGE_EXEC : PAGE_READ | PAGE_WRITE);
     if (!is_access_granted(*access, is_write)) {
         return dtlb ?
             (is_write ?
commit 4b4496dbccc5f286f0ef411f0ff702d67cb95145
Author: Gerd Hoffmann <kraxel at redhat.com>
Date:   Fri Dec 14 08:54:25 2012 +0100

    console: clip update rectangle
    
    Signed-off-by: Gerd Hoffmann <kraxel at redhat.com>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/console.h b/console.h
index 50a0512..edb1950 100644
--- a/console.h
+++ b/console.h
@@ -229,6 +229,16 @@ static inline void unregister_displaychangelistener(DisplayState *ds,
 static inline void dpy_gfx_update(DisplayState *s, int x, int y, int w, int h)
 {
     struct DisplayChangeListener *dcl;
+    int width = pixman_image_get_width(s->surface->image);
+    int height = pixman_image_get_height(s->surface->image);
+
+    x = MAX(x, 0);
+    y = MAX(y, 0);
+    x = MIN(x, width);
+    y = MIN(y, height);
+    w = MIN(w, width - x);
+    h = MIN(h, height - y);
+
     QLIST_FOREACH(dcl, &s->listeners, next) {
         if (dcl->dpy_gfx_update) {
             dcl->dpy_gfx_update(s, x, y, w, h);
commit bc210eb163b162ff2e94e5c8f4307715731257f8
Author: Gerd Hoffmann <1087974 at bugs.launchpad.net>
Date:   Fri Dec 14 07:54:24 2012 +0000

    pixman: fix vnc tight png/jpeg support
    
    This patch adds an x argument to qemu_pixman_linebuf_fill so it can
    also be used to convert a partial scanline.  Then fix tight + png/jpeg
    encoding by passing in the x+y offset, so the data is read from the
    correct screen location instead of the upper left corner.
    
    Cc: 1087974 at bugs.launchpad.net
    Cc: qemu-stable at nongnu.org
    Reported-by: Tim Hardeneck <thardeck at suse.de>
    Signed-off-by: Gerd Hoffmann <kraxel at redhat.com>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/hw/vga.c b/hw/vga.c
index 2b0200a..c266161 100644
--- a/hw/vga.c
+++ b/hw/vga.c
@@ -2413,7 +2413,7 @@ void ppm_save(const char *filename, struct DisplaySurface *ds, Error **errp)
     }
     linebuf = qemu_pixman_linebuf_create(PIXMAN_BE_r8g8b8, width);
     for (y = 0; y < height; y++) {
-        qemu_pixman_linebuf_fill(linebuf, ds->image, width, y);
+        qemu_pixman_linebuf_fill(linebuf, ds->image, width, 0, y);
         clearerr(f);
         ret = fwrite(pixman_image_get_data(linebuf), 1,
                      pixman_image_get_stride(linebuf), f);
diff --git a/qemu-pixman.c b/qemu-pixman.c
index 79e175b..e7263fb 100644
--- a/qemu-pixman.c
+++ b/qemu-pixman.c
@@ -52,10 +52,10 @@ pixman_image_t *qemu_pixman_linebuf_create(pixman_format_code_t format,
 }
 
 void qemu_pixman_linebuf_fill(pixman_image_t *linebuf, pixman_image_t *fb,
-                              int width, int y)
+                              int width, int x, int y)
 {
     pixman_image_composite(PIXMAN_OP_SRC, fb, NULL, linebuf,
-                           0, y, 0, 0, 0, 0, width, 1);
+                           x, y, 0, 0, 0, 0, width, 1);
 }
 
 pixman_image_t *qemu_pixman_mirror_create(pixman_format_code_t format,
diff --git a/qemu-pixman.h b/qemu-pixman.h
index bee55eb..3c05c83 100644
--- a/qemu-pixman.h
+++ b/qemu-pixman.h
@@ -31,7 +31,7 @@ pixman_format_code_t qemu_pixman_get_format(PixelFormat *pf);
 pixman_image_t *qemu_pixman_linebuf_create(pixman_format_code_t format,
                                            int width);
 void qemu_pixman_linebuf_fill(pixman_image_t *linebuf, pixman_image_t *fb,
-                              int width, int y);
+                              int width, int x, int y);
 pixman_image_t *qemu_pixman_mirror_create(pixman_format_code_t format,
                                           pixman_image_t *image);
 void qemu_pixman_image_unref(pixman_image_t *image);
diff --git a/ui/vnc-enc-tight.c b/ui/vnc-enc-tight.c
index 9ae4cab..62d0fde 100644
--- a/ui/vnc-enc-tight.c
+++ b/ui/vnc-enc-tight.c
@@ -1212,7 +1212,7 @@ static int send_jpeg_rect(VncState *vs, int x, int y, int w, int h, int quality)
     buf = (uint8_t *)pixman_image_get_data(linebuf);
     row[0] = buf;
     for (dy = 0; dy < h; dy++) {
-        qemu_pixman_linebuf_fill(linebuf, vs->vd->server, w, dy);
+        qemu_pixman_linebuf_fill(linebuf, vs->vd->server, w, x, y + dy);
         jpeg_write_scanlines(&cinfo, row, 1);
     }
     qemu_pixman_image_unref(linebuf);
@@ -1356,7 +1356,7 @@ static int send_png_rect(VncState *vs, int x, int y, int w, int h,
         if (color_type == PNG_COLOR_TYPE_PALETTE) {
             memcpy(buf, vs->tight.tight.buffer + (dy * w), w);
         } else {
-            qemu_pixman_linebuf_fill(linebuf, vs->vd->server, w, dy);
+            qemu_pixman_linebuf_fill(linebuf, vs->vd->server, w, x, y + dy);
         }
         png_write_row(png_ptr, buf);
     }
diff --git a/ui/vnc.c b/ui/vnc.c
index ba30362..04afcff 100644
--- a/ui/vnc.c
+++ b/ui/vnc.c
@@ -2569,7 +2569,7 @@ static int vnc_refresh_server_surface(VncDisplay *vd)
             uint8_t *server_ptr;
 
             if (vd->guest.format != VNC_SERVER_FB_FORMAT) {
-                qemu_pixman_linebuf_fill(tmpbuf, vd->guest.fb, width, y);
+                qemu_pixman_linebuf_fill(tmpbuf, vd->guest.fb, width, 0, y);
                 guest_ptr = (uint8_t *)pixman_image_get_data(tmpbuf);
             } else {
                 guest_ptr = guest_row;
commit f27b2e1dfe79f993567652411d1ba16295b99719
Author: Gerd Hoffmann <kraxel at redhat.com>
Date:   Fri Dec 14 08:54:23 2012 +0100

    pixman: update internal copy to pixman-0.28.2
    
    Some w64 fixes by Stefan Weil found their way into 0.28.2,
    so update the internal copy to that version to improve
    windows support.
    
    Signed-off-by: Gerd Hoffmann <kraxel at redhat.com>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/pixman b/pixman
index 97336fa..a5e5179 160000
--- a/pixman
+++ b/pixman
@@ -1 +1 @@
-Subproject commit 97336fad32acf802003855cd8bd6477fa49a12e3
+Subproject commit a5e5179b5624c99c812e9bf6e7b907e355a811e8
commit cb1d40d7ccfc18ba1fcb9e064402d930349ee047
Author: Gerd Hoffmann <kraxel at redhat.com>
Date:   Fri Dec 14 08:54:22 2012 +0100

    Revert "pixman: require 0.18.4 or newer"
    
    This reverts commit 288fa40736e6eb63132d01aa6dc21ee831b796ae.
    
    The only reason old pixman versions didn't work was the missing
    PIXMAN_TYPE_BGRA, which is properly #ifdef'ed now.  So we don't
    have to require a minimum pixman version.
    
    Signed-off-by: Gerd Hoffmann <kraxel at redhat.com>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/configure b/configure
index 38b1cc6..ecdb33a 100755
--- a/configure
+++ b/configure
@@ -2127,7 +2127,7 @@ fi
 # pixman support probe
 
 if test "$pixman" = ""; then
-  if $pkg_config --atleast-version=0.18.4 pixman-1 > /dev/null 2>&1; then
+  if $pkg_config pixman-1 > /dev/null 2>&1; then
     pixman="system"
   else
     pixman="internal"
@@ -2138,7 +2138,7 @@ if test "$pixman" = "system"; then
   pixman_libs=`$pkg_config --libs pixman-1 2>/dev/null`
 else
   if test ! -d ${source_path}/pixman/pixman; then
-    echo "ERROR: pixman not present (or older than 0.18.4). Your options:"
+    echo "ERROR: pixman not present. Your options:"
     echo "  (1) Preferred: Install the pixman devel package (any recent"
     echo "      distro should have packages as Xorg needs pixman too)."
     echo "  (2) Fetch the pixman submodule, using:"
commit fbddfc727bde692f009a269e8e628d8c152b537b
Author: Gerd Hoffmann <kraxel at redhat.com>
Date:   Fri Dec 14 08:54:21 2012 +0100

    pixman: fix version check for PIXMAN_TYPE_BGRA
    
    Signed-off-by: Gerd Hoffmann <kraxel at redhat.com>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/qemu-pixman.c b/qemu-pixman.c
index e46e180..79e175b 100644
--- a/qemu-pixman.c
+++ b/qemu-pixman.c
@@ -21,7 +21,7 @@ int qemu_pixman_get_type(int rshift, int gshift, int bshift)
         if (rshift == 0) {
             type = PIXMAN_TYPE_ABGR;
         } else {
-#if PIXMAN_VERSION >= PIXMAN_VERSION_ENCODE(0, 21, 8)
+#if PIXMAN_VERSION >= PIXMAN_VERSION_ENCODE(0, 16, 0)
             type = PIXMAN_TYPE_BGRA;
 #endif
         }
commit a64ae610b978dfd8ccfb7f6c5d4cfe62d7542fbd
Author: David Gibson <david at gibson.dropbear.id.au>
Date:   Mon Dec 3 16:42:16 2012 +0000

    pseries: Increase default NVRAM size
    
    If no image file for NVRAM is specified, the pseries machine currently
    creates a 16K non-persistent NVRAM by default.  This basically works, but
    is not large enough for current firmware and guest kernels to create all
    the NVRAM partitions they would like to.  Increasing the default size to
    64K addresses this and stops the guest generating error messages.
    
    Signed-off-by: David Gibson <david at gibson.dropbear.id.au>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/spapr_nvram.c b/hw/spapr_nvram.c
index 641de48..512bb8d 100644
--- a/hw/spapr_nvram.c
+++ b/hw/spapr_nvram.c
@@ -37,7 +37,7 @@ typedef struct sPAPRNVRAM {
 } sPAPRNVRAM;
 
 #define MIN_NVRAM_SIZE 8192
-#define DEFAULT_NVRAM_SIZE 16384
+#define DEFAULT_NVRAM_SIZE 65536
 #define MAX_NVRAM_SIZE (UINT16_MAX * 16)
 
 static void rtas_nvram_fetch(sPAPREnvironment *spapr,
commit b162d02e9450201c656edce290f33994a6d2ad33
Author: David Gibson <david at gibson.dropbear.id.au>
Date:   Mon Dec 3 16:42:14 2012 +0000

    target-ppc: Don't use hwaddr to represent hardware state
    
    The hwaddr type is somewhat vaguely defined as being able to contain bus
    addresses on the widest possible bus in the system.  For that reason it's
    discouraged for representing specific pieces of persistent hardware state,
    which should instead use an explicit width type that matches the bits
    available in real hardware.  In particular, because of the possibility that
    the size of hwaddr might change if different buses are added to the target
    in future, it's not suitable for use in vm state descriptions for savevm
    and migration.
    
    This patch purges such unwise uses of hwaddr from the ppc target code,
    which turns out to be just one.  The ppcemb_tlb_t struct, used on a number
    of embedded ppc models to represent a TLB entry contains a hwaddr for the
    real address field.  This patch changes it to be a fixed uint64_t which is
    suitable enough for all machine types which use this structure.
    
    Other uses of hwaddr in CPUPPCState turn out not to be problematic:
    htab_base and htab_mask are just used for the convenience of the TCG code;
    the underlying machine state is the SDR1 register, which is stored with
    a suitable type already.  Likewise the mpic_cpu_base field is only used
    internally and does not represent fundamental hardware state which needs to
    be saved.
    
    Signed-off-by: David Gibson <david at gibson.dropbear.id.au>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
index 5f1dc8b..742d4f8 100644
--- a/target-ppc/cpu.h
+++ b/target-ppc/cpu.h
@@ -355,7 +355,7 @@ struct ppc6xx_tlb_t {
 
 typedef struct ppcemb_tlb_t ppcemb_tlb_t;
 struct ppcemb_tlb_t {
-    hwaddr RPN;
+    uint64_t RPN;
     target_ulong EPN;
     target_ulong PID;
     target_ulong size;
commit 9e2c12988bebca7b99c0cd064b23fb7ea6643c86
Author: Alexander Graf <agraf at suse.de>
Date:   Thu Dec 13 01:16:24 2012 +0100

    PPC: e500: pci: Export slot2irq calculation
    
    We need the calculation method to get from a PCI slot ID to its respective
    interrupt line twice. Once in the internal map function and once when
    assembling the device tree.
    
    So let's extract the calculation to a separate function that can be called
    by both users.
    
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c
index 564f654..af6b671 100644
--- a/hw/ppc/e500.c
+++ b/hw/ppc/e500.c
@@ -34,6 +34,7 @@
 #include "hw/sysbus.h"
 #include "exec-memory.h"
 #include "host-utils.h"
+#include "hw/ppce500_pci.h"
 
 #define BINARY_DEVICE_TREE_FILE    "mpc8544ds.dtb"
 #define UIMAGE_LOAD_BASE           0
@@ -72,6 +73,7 @@ static uint32_t *pci_map_create(void *fdt, uint32_t mpic, int first_slot,
     int i = 0;
     int slot;
     int pci_irq;
+    int host_irq;
     int last_slot = first_slot + nr_slots;
     uint32_t *pci_map;
 
@@ -85,7 +87,8 @@ static uint32_t *pci_map_create(void *fdt, uint32_t mpic, int first_slot,
             pci_map[i++] = cpu_to_be32(0x0);
             pci_map[i++] = cpu_to_be32(pci_irq + 1);
             pci_map[i++] = cpu_to_be32(mpic);
-            pci_map[i++] = cpu_to_be32(((pci_irq + slot) % 4) + 1);
+            host_irq = ppce500_pci_map_irq_slot(slot, pci_irq);
+            pci_map[i++] = cpu_to_be32(host_irq + 1);
             pci_map[i++] = cpu_to_be32(0x1);
         }
     }
diff --git a/hw/ppce500_pci.c b/hw/ppce500_pci.c
index 561a776..09e3507 100644
--- a/hw/ppce500_pci.c
+++ b/hw/ppce500_pci.c
@@ -19,6 +19,7 @@
 #include "pci.h"
 #include "pci_host.h"
 #include "bswap.h"
+#include "ppce500_pci.h"
 
 #ifdef DEBUG_PCI
 #define pci_debug(fmt, ...) fprintf(stderr, fmt, ## __VA_ARGS__)
@@ -256,7 +257,7 @@ static int mpc85xx_pci_map_irq(PCIDevice *pci_dev, int irq_num)
     int devno = pci_dev->devfn >> 3;
     int ret;
 
-    ret = (irq_num + devno) % 4;
+    ret = ppce500_pci_map_irq_slot(devno, irq_num);
 
     pci_debug("%s: devfn %x irq %d -> %d  devno:%x\n", __func__,
            pci_dev->devfn, irq_num, ret, devno);
diff --git a/hw/ppce500_pci.h b/hw/ppce500_pci.h
new file mode 100644
index 0000000..61f773e
--- /dev/null
+++ b/hw/ppce500_pci.h
@@ -0,0 +1,9 @@
+#ifndef PPCE500_PCI_H
+#define PPCE500_PCI_H
+
+static inline int ppce500_pci_map_irq_slot(int devno, int irq_num)
+{
+    return (devno + irq_num) % 4;
+}
+
+#endif
commit 3bb7e02a9725a24e5bf915b35f914f82f5b07a1f
Author: Alexander Graf <agraf at suse.de>
Date:   Wed Dec 12 14:58:30 2012 +0100

    PPC: E500plat: Make a lot of PCI slots available
    
    The ppce500 machine doesn't have to stick to hardware limitations,
    as it's defined as being fully device tree based.
    
    Thus we can change the initial PCI slot ID to 0x1 which gives us a
    whopping 31 PCI devices we can support with this machine now!
    
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/ppc/e500plat.c b/hw/ppc/e500plat.c
index 9365213..2992bd9 100644
--- a/hw/ppc/e500plat.c
+++ b/hw/ppc/e500plat.c
@@ -14,6 +14,7 @@
 #include "e500.h"
 #include "../boards.h"
 #include "device_tree.h"
+#include "hw/pci.h"
 
 static void e500plat_fixup_devtree(PPCE500Params *params, void *fdt)
 {
@@ -40,8 +41,8 @@ static void e500plat_init(QEMUMachineInitArgs *args)
         .kernel_cmdline = kernel_cmdline,
         .initrd_filename = initrd_filename,
         .cpu_model = cpu_model,
-        .pci_first_slot = 0x11,
-        .pci_nr_slots = 2,
+        .pci_first_slot = 0x1,
+        .pci_nr_slots = PCI_SLOT_MAX - 1,
         .fixup_devtree = e500plat_fixup_devtree,
     };
 
commit 492ec48dc2d99ca13b24d554e1970af7e2581e23
Author: Alexander Graf <agraf at suse.de>
Date:   Wed Dec 12 13:53:53 2012 +0100

    PPC: E500: Move PCI slot information into params
    
    We have a params struct that allows us to expose differences between
    e500 machine models. Include PCI slot information there, so we can have
    different machines with different PCI slot topology.
    
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c
index ebb6d96..564f654 100644
--- a/hw/ppc/e500.c
+++ b/hw/ppc/e500.c
@@ -339,7 +339,8 @@ static int ppce500_load_device_tree(CPUPPCState *env,
     qemu_devtree_setprop_cells(fdt, pci, "interrupt-map-mask", 0xf800, 0x0,
                                0x0, 0x7);
     pci_map = pci_map_create(fdt, qemu_devtree_get_phandle(fdt, mpic),
-                             0x11, 2, &len);
+                             params->pci_first_slot, params->pci_nr_slots,
+                             &len);
     qemu_devtree_setprop(fdt, pci, "interrupt-map", pci_map, len);
     qemu_devtree_setprop_phandle(fdt, pci, "interrupt-parent", mpic);
     qemu_devtree_setprop_cells(fdt, pci, "interrupts", 24, 2);
@@ -569,6 +570,7 @@ void ppce500_init(PPCE500Params *params)
 
     /* PCI */
     dev = qdev_create(NULL, "e500-pcihost");
+    qdev_prop_set_uint32(dev, "first_slot", params->pci_first_slot);
     qdev_init_nofail(dev);
     s = SYS_BUS_DEVICE(dev);
     sysbus_connect_irq(s, 0, mpic[pci_irq_nrs[0]]);
diff --git a/hw/ppc/e500.h b/hw/ppc/e500.h
index 7ae87f4..f5ff273 100644
--- a/hw/ppc/e500.h
+++ b/hw/ppc/e500.h
@@ -9,6 +9,8 @@ typedef struct PPCE500Params {
     const char *kernel_cmdline;
     const char *initrd_filename;
     const char *cpu_model;
+    int pci_first_slot;
+    int pci_nr_slots;
 
     /* e500-specific params */
 
diff --git a/hw/ppc/e500plat.c b/hw/ppc/e500plat.c
index 4cfb940..9365213 100644
--- a/hw/ppc/e500plat.c
+++ b/hw/ppc/e500plat.c
@@ -40,6 +40,8 @@ static void e500plat_init(QEMUMachineInitArgs *args)
         .kernel_cmdline = kernel_cmdline,
         .initrd_filename = initrd_filename,
         .cpu_model = cpu_model,
+        .pci_first_slot = 0x11,
+        .pci_nr_slots = 2,
         .fixup_devtree = e500plat_fixup_devtree,
     };
 
diff --git a/hw/ppc/mpc8544ds.c b/hw/ppc/mpc8544ds.c
index e651661..7e1761d 100644
--- a/hw/ppc/mpc8544ds.c
+++ b/hw/ppc/mpc8544ds.c
@@ -40,6 +40,8 @@ static void mpc8544ds_init(QEMUMachineInitArgs *args)
         .kernel_cmdline = kernel_cmdline,
         .initrd_filename = initrd_filename,
         .cpu_model = cpu_model,
+        .pci_first_slot = 0x11,
+        .pci_nr_slots = 2,
         .fixup_devtree = mpc8544ds_fixup_devtree,
     };
 
commit 347dd79dccf41a679115213da673dfd06c4c8cc8
Author: Alexander Graf <agraf at suse.de>
Date:   Wed Dec 12 13:47:07 2012 +0100

    PPC: E500: Generate dt pci irq map dynamically
    
    Today we're hardcoding the PCI interrupt map in the e500 machine file.
    Instead, let's write it dynamically so that different machine types
    can have different slot properties.
    
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c
index 1034f93..ebb6d96 100644
--- a/hw/ppc/e500.c
+++ b/hw/ppc/e500.c
@@ -66,25 +66,33 @@ struct boot_info
     uint32_t entry;
 };
 
-static void pci_map_create(void *fdt, uint32_t *pci_map, uint32_t mpic)
+static uint32_t *pci_map_create(void *fdt, uint32_t mpic, int first_slot,
+                                int nr_slots, int *len)
 {
-    int i;
-    const uint32_t tmp[] = {
-                             /* IDSEL 0x11 J17 Slot 1 */
-                             0x8800, 0x0, 0x0, 0x1, mpic, 0x2, 0x1,
-                             0x8800, 0x0, 0x0, 0x2, mpic, 0x3, 0x1,
-                             0x8800, 0x0, 0x0, 0x3, mpic, 0x4, 0x1,
-                             0x8800, 0x0, 0x0, 0x4, mpic, 0x1, 0x1,
-
-                             /* IDSEL 0x12 J16 Slot 2 */
-                             0x9000, 0x0, 0x0, 0x1, mpic, 0x3, 0x1,
-                             0x9000, 0x0, 0x0, 0x2, mpic, 0x4, 0x1,
-                             0x9000, 0x0, 0x0, 0x3, mpic, 0x2, 0x1,
-                             0x9000, 0x0, 0x0, 0x4, mpic, 0x1, 0x1,
-                           };
-    for (i = 0; i < (7 * 8); i++) {
-        pci_map[i] = cpu_to_be32(tmp[i]);
+    int i = 0;
+    int slot;
+    int pci_irq;
+    int last_slot = first_slot + nr_slots;
+    uint32_t *pci_map;
+
+    *len = nr_slots * 4 * 7 * sizeof(uint32_t);
+    pci_map = g_malloc(*len);
+
+    for (slot = first_slot; slot < last_slot; slot++) {
+        for (pci_irq = 0; pci_irq < 4; pci_irq++) {
+            pci_map[i++] = cpu_to_be32(slot << 11);
+            pci_map[i++] = cpu_to_be32(0x0);
+            pci_map[i++] = cpu_to_be32(0x0);
+            pci_map[i++] = cpu_to_be32(pci_irq + 1);
+            pci_map[i++] = cpu_to_be32(mpic);
+            pci_map[i++] = cpu_to_be32(((pci_irq + slot) % 4) + 1);
+            pci_map[i++] = cpu_to_be32(0x1);
+        }
     }
+
+    assert((i * sizeof(uint32_t)) == *len);
+
+    return pci_map;
 }
 
 static void dt_serial_create(void *fdt, unsigned long long offset,
@@ -132,7 +140,8 @@ static int ppce500_load_device_tree(CPUPPCState *env,
     char gutil[128];
     char pci[128];
     char msi[128];
-    uint32_t pci_map[7 * 8];
+    uint32_t *pci_map = NULL;
+    int len;
     uint32_t pci_ranges[14] =
         {
             0x2000000, 0x0, 0xc0000000,
@@ -329,8 +338,9 @@ static int ppce500_load_device_tree(CPUPPCState *env,
     qemu_devtree_setprop_string(fdt, pci, "device_type", "pci");
     qemu_devtree_setprop_cells(fdt, pci, "interrupt-map-mask", 0xf800, 0x0,
                                0x0, 0x7);
-    pci_map_create(fdt, pci_map, qemu_devtree_get_phandle(fdt, mpic));
-    qemu_devtree_setprop(fdt, pci, "interrupt-map", pci_map, sizeof(pci_map));
+    pci_map = pci_map_create(fdt, qemu_devtree_get_phandle(fdt, mpic),
+                             0x11, 2, &len);
+    qemu_devtree_setprop(fdt, pci, "interrupt-map", pci_map, len);
     qemu_devtree_setprop_phandle(fdt, pci, "interrupt-parent", mpic);
     qemu_devtree_setprop_cells(fdt, pci, "interrupts", 24, 2);
     qemu_devtree_setprop_cells(fdt, pci, "bus-range", 0, 255);
@@ -364,6 +374,7 @@ done:
     ret = fdt_size;
 
 out:
+    g_free(pci_map);
 
     return ret;
 }
commit 05f57d9de8e99bf5f7ca762c6dc2f1e054c2074c
Author: Alexander Graf <agraf at suse.de>
Date:   Wed Dec 12 12:58:12 2012 +0100

    PPC: E500: PCI: Make IRQ calculation more generic
    
    The IRQ line calculation is more or less hardcoded today. Instead, let's
    write it as an algorithmic function that theoretically allows an arbitrary
    number of PCI slots.
    
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/ppce500_pci.c b/hw/ppce500_pci.c
index 4cd4edc..561a776 100644
--- a/hw/ppce500_pci.c
+++ b/hw/ppce500_pci.c
@@ -253,17 +253,10 @@ static const MemoryRegionOps e500_pci_reg_ops = {
 
 static int mpc85xx_pci_map_irq(PCIDevice *pci_dev, int irq_num)
 {
-    int devno = pci_dev->devfn >> 3, ret = 0;
+    int devno = pci_dev->devfn >> 3;
+    int ret;
 
-    switch (devno) {
-        /* Two PCI slot */
-        case 0x11:
-        case 0x12:
-            ret = (irq_num + devno - 0x10) % 4;
-            break;
-        default:
-            printf("Error:%s:unknown dev number\n", __func__);
-    }
+    ret = (irq_num + devno) % 4;
 
     pci_debug("%s: devfn %x irq %d -> %d  devno:%x\n", __func__,
            pci_dev->devfn, irq_num, ret, devno);
commit eafb325fb11af21ed9df3f5a310fd26e70954318
Author: Alexander Graf <agraf at suse.de>
Date:   Wed Dec 12 12:56:40 2012 +0100

    PPC: E500: PCI: Make first slot qdev settable
    
    Today the first slot id in our e500 pci implementation is hardcoded to
    0x11. Keep it there as default, but allow users to change the default to
    a different id.
    
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/ppce500_pci.c b/hw/ppce500_pci.c
index e534341..4cd4edc 100644
--- a/hw/ppce500_pci.c
+++ b/hw/ppce500_pci.c
@@ -87,6 +87,7 @@ struct PPCE500PCIState {
     struct pci_inbound pib[PPCE500_PCI_NR_PIBS];
     uint32_t gasket_time;
     qemu_irq irq[4];
+    uint32_t first_slot;
     /* mmio maps */
     MemoryRegion container;
     MemoryRegion iomem;
@@ -361,7 +362,7 @@ static int e500_pcihost_initfn(SysBusDevice *dev)
 
     b = pci_register_bus(DEVICE(dev), NULL, mpc85xx_pci_set_irq,
                          mpc85xx_pci_map_irq, s->irq, address_space_mem,
-                         &s->pio, PCI_DEVFN(0x11, 0), 4);
+                         &s->pio, PCI_DEVFN(s->first_slot, 0), 4);
     h->bus = b;
 
     pci_create_simple(b, 0, "e500-host-bridge");
@@ -401,12 +402,18 @@ static const TypeInfo e500_host_bridge_info = {
     .class_init    = e500_host_bridge_class_init,
 };
 
+static Property pcihost_properties[] = {
+    DEFINE_PROP_UINT32("first_slot", PPCE500PCIState, first_slot, 0x11),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
 static void e500_pcihost_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
     SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
 
     k->init = e500_pcihost_initfn;
+    dc->props = pcihost_properties;
     dc->vmsd = &vmstate_ppce500_pci;
 }
 
commit 76aec1f8b6549d14576a3eb739c731df8f678ffb
Author: Alexander Graf <agraf at suse.de>
Date:   Thu Dec 13 12:48:14 2012 +0100

    openpic: Accelerate pending irq search
    
    When we're done with one interrupt, we need to search for the next pending
    interrupt in the queue. This search has grown quite big now that we have
    more than 256 possible irq lines.
    
    So let's memorize how many interrupts we have pending in our bitmaps, so
    that we can always bail out in the usual case - the one where we're all done.
    
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/openpic.c b/hw/openpic.c
index 25d5cd7..3cbcea8 100644
--- a/hw/openpic.c
+++ b/hw/openpic.c
@@ -169,6 +169,7 @@ typedef struct IRQ_queue_t {
     uint32_t queue[BF_WIDTH(MAX_IRQ)];
     int next;
     int priority;
+    int pending;    /* nr of pending bits in queue */
 } IRQ_queue_t;
 
 typedef struct IRQ_src_t {
@@ -251,11 +252,13 @@ static void openpic_irq_raise(OpenPICState *opp, int n_CPU, IRQ_src_t *src);
 
 static inline void IRQ_setbit(IRQ_queue_t *q, int n_IRQ)
 {
+    q->pending++;
     set_bit(q->queue, n_IRQ);
 }
 
 static inline void IRQ_resetbit(IRQ_queue_t *q, int n_IRQ)
 {
+    q->pending--;
     reset_bit(q->queue, n_IRQ);
 }
 
@@ -271,6 +274,12 @@ static void IRQ_check(OpenPICState *opp, IRQ_queue_t *q)
 
     next = -1;
     priority = -1;
+
+    if (!q->pending) {
+        /* IRQ bitmap is empty */
+        goto out;
+    }
+
     for (i = 0; i < opp->max_irq; i++) {
         if (IRQ_testbit(q, i)) {
             DPRINTF("IRQ_check: irq %d set ipvp_pr=%d pr=%d\n",
@@ -281,6 +290,8 @@ static void IRQ_check(OpenPICState *opp, IRQ_queue_t *q)
             }
         }
     }
+
+out:
     q->next = next;
     q->priority = priority;
 }
commit dbe30e13e87a71e85e88ae3ffd3460173cbc8193
Author: Alexander Graf <agraf at suse.de>
Date:   Thu Dec 13 00:44:22 2012 +0100

    openpic: fix minor coding style issues
    
    This patch removes all remaining occurences of spaces before function
    parameter indicating parenthesis.
    
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/openpic.c b/hw/openpic.c
index 4bea1e7..25d5cd7 100644
--- a/hw/openpic.c
+++ b/hw/openpic.c
@@ -140,17 +140,17 @@
 #define BF_WIDTH(_bits_) \
 (((_bits_) + (sizeof(uint32_t) * 8) - 1) / (sizeof(uint32_t) * 8))
 
-static inline void set_bit (uint32_t *field, int bit)
+static inline void set_bit(uint32_t *field, int bit)
 {
     field[bit >> 5] |= 1 << (bit & 0x1F);
 }
 
-static inline void reset_bit (uint32_t *field, int bit)
+static inline void reset_bit(uint32_t *field, int bit)
 {
     field[bit >> 5] &= ~(1 << (bit & 0x1F));
 }
 
-static inline int test_bit (uint32_t *field, int bit)
+static inline int test_bit(uint32_t *field, int bit)
 {
     return (field[bit >> 5] & 1 << (bit & 0x1F)) != 0;
 }
@@ -249,17 +249,17 @@ typedef struct OpenPICState {
 
 static void openpic_irq_raise(OpenPICState *opp, int n_CPU, IRQ_src_t *src);
 
-static inline void IRQ_setbit (IRQ_queue_t *q, int n_IRQ)
+static inline void IRQ_setbit(IRQ_queue_t *q, int n_IRQ)
 {
     set_bit(q->queue, n_IRQ);
 }
 
-static inline void IRQ_resetbit (IRQ_queue_t *q, int n_IRQ)
+static inline void IRQ_resetbit(IRQ_queue_t *q, int n_IRQ)
 {
     reset_bit(q->queue, n_IRQ);
 }
 
-static inline int IRQ_testbit (IRQ_queue_t *q, int n_IRQ)
+static inline int IRQ_testbit(IRQ_queue_t *q, int n_IRQ)
 {
     return test_bit(q->queue, n_IRQ);
 }
commit 68d1e1f52d73ddcec4b0358f269d9a8c2ea216d9
Author: Alexander Graf <agraf at suse.de>
Date:   Thu Dec 6 04:11:33 2012 +0100

    MSI-X: Fix endianness
    
    The MSI-X vector tables are usually stored in little endian in memory,
    so let's mark the accessors as such.
    
    This fixes MSI-X on e500 for me.
    
    Signed-off-by: Alexander Graf <agraf at suse.de>
    Acked-by: Michael S. Tsirkin <mst at redhat.com>

diff --git a/hw/msix.c b/hw/msix.c
index 136ef09..b57ae60 100644
--- a/hw/msix.c
+++ b/hw/msix.c
@@ -180,8 +180,7 @@ static void msix_table_mmio_write(void *opaque, hwaddr addr,
 static const MemoryRegionOps msix_table_mmio_ops = {
     .read = msix_table_mmio_read,
     .write = msix_table_mmio_write,
-    /* TODO: MSIX should be LITTLE_ENDIAN. */
-    .endianness = DEVICE_NATIVE_ENDIAN,
+    .endianness = DEVICE_LITTLE_ENDIAN,
     .valid = {
         .min_access_size = 4,
         .max_access_size = 4,
@@ -198,8 +197,7 @@ static uint64_t msix_pba_mmio_read(void *opaque, hwaddr addr,
 
 static const MemoryRegionOps msix_pba_mmio_ops = {
     .read = msix_pba_mmio_read,
-    /* TODO: MSIX should be LITTLE_ENDIAN. */
-    .endianness = DEVICE_NATIVE_ENDIAN,
+    .endianness = DEVICE_LITTLE_ENDIAN,
     .valid = {
         .min_access_size = 4,
         .max_access_size = 4,
commit 997505065dc92e533debf5cb23012ba4e673d387
Author: Alexander Graf <agraf at suse.de>
Date:   Sat Dec 8 14:27:26 2012 +0100

    PPC: e500: Declare pci bridge as bridge
    
    The new PCI host bridge device needs to identify itself as PCI host bridge.
    Declare it as such.
    
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/ppce500_pci.c b/hw/ppce500_pci.c
index 54c72b4..e534341 100644
--- a/hw/ppce500_pci.c
+++ b/hw/ppce500_pci.c
@@ -330,9 +330,15 @@ static int e500_pcihost_bridge_initfn(PCIDevice *d)
     PPCE500CCSRState *ccsr = CCSR(container_get(qdev_get_machine(),
                                   "/e500-ccsr"));
 
+    pci_config_set_class(d->config, PCI_CLASS_BRIDGE_PCI);
+    d->config[PCI_HEADER_TYPE] =
+        (d->config[PCI_HEADER_TYPE] & PCI_HEADER_TYPE_MULTI_FUNCTION) |
+        PCI_HEADER_TYPE_BRIDGE;
+
     memory_region_init_alias(&b->bar0, "e500-pci-bar0", &ccsr->ccsr_space,
                              0, int128_get64(ccsr->ccsr_space.size));
     pci_register_bar(d, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &b->bar0);
+
     return 0;
 }
 
commit a911b7a92064d17b862ae85fe8e5ec91b7ba1aa9
Author: Alexander Graf <agraf at suse.de>
Date:   Sat Dec 8 14:26:37 2012 +0100

    PPC: e500: Add MSI support
    
    Now that our interrupt controller supports MSIs, let's expose that feature
    to the guest through the device tree!
    
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c
index fa9b8ed..1034f93 100644
--- a/hw/ppc/e500.c
+++ b/hw/ppc/e500.c
@@ -48,6 +48,7 @@
 #define MPC8544_CCSRBAR_BASE       0xE0000000ULL
 #define MPC8544_CCSRBAR_SIZE       0x00100000ULL
 #define MPC8544_MPIC_REGS_OFFSET   0x40000ULL
+#define MPC8544_MSI_REGS_OFFSET   0x41600ULL
 #define MPC8544_SERIAL0_REGS_OFFSET 0x4500ULL
 #define MPC8544_SERIAL1_REGS_OFFSET 0x4600ULL
 #define MPC8544_PCI_REGS_OFFSET    0x8000ULL
@@ -127,8 +128,10 @@ static int ppce500_load_device_tree(CPUPPCState *env,
     char soc[128];
     char mpic[128];
     uint32_t mpic_ph;
+    uint32_t msi_ph;
     char gutil[128];
     char pci[128];
+    char msi[128];
     uint32_t pci_map[7 * 8];
     uint32_t pci_ranges[14] =
         {
@@ -300,6 +303,25 @@ static int ppce500_load_device_tree(CPUPPCState *env,
     qemu_devtree_setprop_cells(fdt, gutil, "reg", MPC8544_UTIL_OFFSET, 0x1000);
     qemu_devtree_setprop(fdt, gutil, "fsl,has-rstcr", NULL, 0);
 
+    snprintf(msi, sizeof(msi), "/%s/msi@%llx", soc, MPC8544_MSI_REGS_OFFSET);
+    qemu_devtree_add_subnode(fdt, msi);
+    qemu_devtree_setprop_string(fdt, msi, "compatible", "fsl,mpic-msi");
+    qemu_devtree_setprop_cells(fdt, msi, "reg", MPC8544_MSI_REGS_OFFSET, 0x200);
+    msi_ph = qemu_devtree_alloc_phandle(fdt);
+    qemu_devtree_setprop_cells(fdt, msi, "msi-available-ranges", 0x0, 0x100);
+    qemu_devtree_setprop_phandle(fdt, msi, "interrupt-parent", mpic);
+    qemu_devtree_setprop_cells(fdt, msi, "interrupts",
+        0xe0, 0x0,
+        0xe1, 0x0,
+        0xe2, 0x0,
+        0xe3, 0x0,
+        0xe4, 0x0,
+        0xe5, 0x0,
+        0xe6, 0x0,
+        0xe7, 0x0);
+    qemu_devtree_setprop_cell(fdt, msi, "phandle", msi_ph);
+    qemu_devtree_setprop_cell(fdt, msi, "linux,phandle", msi_ph);
+
     snprintf(pci, sizeof(pci), "/pci@%llx", MPC8544_PCI_REGS_BASE);
     qemu_devtree_add_subnode(fdt, pci);
     qemu_devtree_setprop_cell(fdt, pci, "cell-index", 0);
@@ -315,6 +337,7 @@ static int ppce500_load_device_tree(CPUPPCState *env,
     for (i = 0; i < 14; i++) {
         pci_ranges[i] = cpu_to_be32(pci_ranges[i]);
     }
+    qemu_devtree_setprop_cell(fdt, pci, "fsl,msi", msi_ph);
     qemu_devtree_setprop(fdt, pci, "ranges", pci_ranges, sizeof(pci_ranges));
     qemu_devtree_setprop_cells(fdt, pci, "reg", MPC8544_PCI_REGS_BASE >> 32,
                                MPC8544_PCI_REGS_BASE, 0, 0x1000);
commit 732aa6ec2639ace8bcb0b27b9c0d71103bd1d153
Author: Alexander Graf <agraf at suse.de>
Date:   Sat Dec 8 14:18:00 2012 +0100

    openpic: add Shared MSI support
    
    The OpenPIC allows MSI access through shared MSI registers. Implement
    them for the MPC8544 MPIC, so we can support MSIs.
    
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/openpic.c b/hw/openpic.c
index 5bf16ea..4bea1e7 100644
--- a/hw/openpic.c
+++ b/hw/openpic.c
@@ -38,6 +38,7 @@
 #include "pci.h"
 #include "openpic.h"
 #include "sysbus.h"
+#include "msi.h"
 
 //#define DEBUG_OPENPIC
 
@@ -52,6 +53,7 @@
 #define MAX_TMR     4
 #define VECTOR_BITS 8
 #define MAX_IPI     4
+#define MAX_MSI     8
 #define MAX_IRQ     (MAX_SRC + MAX_IPI + MAX_TMR)
 #define VID         0x03 /* MPIC version ID */
 
@@ -63,6 +65,8 @@
 #define OPENPIC_GLB_REG_SIZE         0x10F0
 #define OPENPIC_TMR_REG_START        0x10F0
 #define OPENPIC_TMR_REG_SIZE         0x220
+#define OPENPIC_MSI_REG_START        0x1600
+#define OPENPIC_MSI_REG_SIZE         0x200
 #define OPENPIC_SRC_REG_START        0x10000
 #define OPENPIC_SRC_REG_SIZE         (MAX_SRC * 0x20)
 #define OPENPIC_CPU_REG_START        0x20000
@@ -127,6 +131,12 @@
 #define IDR_P1_SHIFT      1
 #define IDR_P0_SHIFT      0
 
+#define MSIIR_OFFSET       0x140
+#define MSIIR_SRS_SHIFT    29
+#define MSIIR_SRS_MASK     (0x7 << MSIIR_SRS_SHIFT)
+#define MSIIR_IBS_SHIFT    24
+#define MSIIR_IBS_MASK     (0x1f << MSIIR_IBS_SHIFT)
+
 #define BF_WIDTH(_bits_) \
 (((_bits_) + (sizeof(uint32_t) * 8) - 1) / (sizeof(uint32_t) * 8))
 
@@ -209,7 +219,7 @@ typedef struct OpenPICState {
     uint32_t brr1;
 
     /* Sub-regions */
-    MemoryRegion sub_io_mem[7];
+    MemoryRegion sub_io_mem[5];
 
     /* Global registers */
     uint32_t frep; /* Feature reporting register */
@@ -227,9 +237,14 @@ typedef struct OpenPICState {
         uint32_t ticc;  /* Global timer current count register */
         uint32_t tibc;  /* Global timer base count register */
     } timers[MAX_TMR];
+    /* Shared MSI registers */
+    struct {
+        uint32_t msir;   /* Shared Message Signaled Interrupt Register */
+    } msi[MAX_MSI];
     uint32_t max_irq;
     uint32_t irq_ipi0;
     uint32_t irq_tim0;
+    uint32_t irq_msi;
 } OpenPICState;
 
 static void openpic_irq_raise(OpenPICState *opp, int n_CPU, IRQ_src_t *src);
@@ -704,6 +719,68 @@ static uint64_t openpic_src_read(void *opaque, uint64_t addr, unsigned len)
     return retval;
 }
 
+static void openpic_msi_write(void *opaque, hwaddr addr, uint64_t val,
+                              unsigned size)
+{
+    OpenPICState *opp = opaque;
+    int idx = opp->irq_msi;
+    int srs, ibs;
+
+    DPRINTF("%s: addr " TARGET_FMT_plx " <= %08x\n", __func__, addr, val);
+    if (addr & 0xF) {
+        return;
+    }
+
+    switch (addr) {
+    case MSIIR_OFFSET:
+        srs = val >> MSIIR_SRS_SHIFT;
+        idx += srs;
+        ibs = (val & MSIIR_IBS_MASK) >> MSIIR_IBS_SHIFT;
+        opp->msi[srs].msir |= 1 << ibs;
+        openpic_set_irq(opp, idx, 1);
+        break;
+    default:
+        /* most registers are read-only, thus ignored */
+        break;
+    }
+}
+
+static uint64_t openpic_msi_read(void *opaque, hwaddr addr, unsigned size)
+{
+    OpenPICState *opp = opaque;
+    uint64_t r = 0;
+    int i, srs;
+
+    DPRINTF("%s: addr " TARGET_FMT_plx "\n", __func__, addr);
+    if (addr & 0xF) {
+        return -1;
+    }
+
+    srs = addr >> 4;
+
+    switch (addr) {
+    case 0x00:
+    case 0x10:
+    case 0x20:
+    case 0x30:
+    case 0x40:
+    case 0x50:
+    case 0x60:
+    case 0x70: /* MSIRs */
+        r = opp->msi[srs].msir;
+        /* Clear on read */
+        opp->msi[srs].msir = 0;
+        break;
+    case 0x120: /* MSISR */
+        for (i = 0; i < MAX_MSI; i++) {
+            r |= (opp->msi[i].msir ? 1 : 0) << i;
+        }
+        break;
+    }
+
+    return r;
+}
+
 static void openpic_cpu_write_internal(void *opaque, hwaddr addr,
                                        uint32_t val, int idx)
 {
@@ -932,6 +1009,26 @@ static const MemoryRegionOps openpic_src_ops_be = {
     },
 };
 
+static const MemoryRegionOps openpic_msi_ops_le = {
+    .read = openpic_msi_read,
+    .write = openpic_msi_write,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+    .impl = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
+};
+
+static const MemoryRegionOps openpic_msi_ops_be = {
+    .read = openpic_msi_read,
+    .write = openpic_msi_write,
+    .endianness = DEVICE_BIG_ENDIAN,
+    .impl = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
+};
+
 static void openpic_save_IRQ_queue(QEMUFile* f, IRQ_queue_t *q)
 {
     unsigned int i;
@@ -1039,6 +1136,7 @@ static void openpic_irq_raise(OpenPICState *opp, int n_CPU, IRQ_src_t *src)
 struct memreg {
     const char             *name;
     MemoryRegionOps const  *ops;
+    bool                   map;
     hwaddr      start_addr;
     ram_addr_t              size;
 };
@@ -1047,27 +1145,31 @@ static int openpic_init(SysBusDevice *dev)
 {
     OpenPICState *opp = FROM_SYSBUS(typeof (*opp), dev);
     int i, j;
-    const struct memreg list_le[] = {
-        {"glb", &openpic_glb_ops_le, OPENPIC_GLB_REG_START,
-                                     OPENPIC_GLB_REG_SIZE},
-        {"tmr", &openpic_tmr_ops_le, OPENPIC_TMR_REG_START,
-                                     OPENPIC_TMR_REG_SIZE},
-        {"src", &openpic_src_ops_le, OPENPIC_SRC_REG_START,
-                                     OPENPIC_SRC_REG_SIZE},
-        {"cpu", &openpic_cpu_ops_le, OPENPIC_CPU_REG_START,
-                                     OPENPIC_CPU_REG_SIZE},
+    struct memreg list_le[] = {
+        {"glb", &openpic_glb_ops_le, true,
+                OPENPIC_GLB_REG_START, OPENPIC_GLB_REG_SIZE},
+        {"tmr", &openpic_tmr_ops_le, true,
+                OPENPIC_TMR_REG_START, OPENPIC_TMR_REG_SIZE},
+        {"msi", &openpic_msi_ops_le, true,
+                OPENPIC_MSI_REG_START, OPENPIC_MSI_REG_SIZE},
+        {"src", &openpic_src_ops_le, true,
+                OPENPIC_SRC_REG_START, OPENPIC_SRC_REG_SIZE},
+        {"cpu", &openpic_cpu_ops_le, true,
+                OPENPIC_CPU_REG_START, OPENPIC_CPU_REG_SIZE},
     };
-    const struct memreg list_be[] = {
-        {"glb", &openpic_glb_ops_be, OPENPIC_GLB_REG_START,
-                                     OPENPIC_GLB_REG_SIZE},
-        {"tmr", &openpic_tmr_ops_be, OPENPIC_TMR_REG_START,
-                                     OPENPIC_TMR_REG_SIZE},
-        {"src", &openpic_src_ops_be, OPENPIC_SRC_REG_START,
-                                     OPENPIC_SRC_REG_SIZE},
-        {"cpu", &openpic_cpu_ops_be, OPENPIC_CPU_REG_START,
-                                     OPENPIC_CPU_REG_SIZE},
+    struct memreg list_be[] = {
+        {"glb", &openpic_glb_ops_be, true,
+                OPENPIC_GLB_REG_START, OPENPIC_GLB_REG_SIZE},
+        {"tmr", &openpic_tmr_ops_be, true,
+                OPENPIC_TMR_REG_START, OPENPIC_TMR_REG_SIZE},
+        {"msi", &openpic_msi_ops_be, true,
+                OPENPIC_MSI_REG_START, OPENPIC_MSI_REG_SIZE},
+        {"src", &openpic_src_ops_be, true,
+                OPENPIC_SRC_REG_START, OPENPIC_SRC_REG_SIZE},
+        {"cpu", &openpic_cpu_ops_be, true,
+                OPENPIC_CPU_REG_START, OPENPIC_CPU_REG_SIZE},
     };
-    struct memreg const *list;
+    struct memreg *list;
 
     switch (opp->model) {
     case OPENPIC_MODEL_FSL_MPIC_20:
@@ -1083,7 +1185,9 @@ static int openpic_init(SysBusDevice *dev)
         opp->max_irq = FSL_MPIC_20_MAX_IRQ;
         opp->irq_ipi0 = FSL_MPIC_20_IPI_IRQ;
         opp->irq_tim0 = FSL_MPIC_20_TMR_IRQ;
+        opp->irq_msi = FSL_MPIC_20_MSI_IRQ;
         opp->brr1 = FSL_BRR1_IPID | FSL_BRR1_IPMJ | FSL_BRR1_IPMN;
+        msi_supported = true;
         list = list_be;
         break;
     case OPENPIC_MODEL_RAVEN:
@@ -1099,6 +1203,8 @@ static int openpic_init(SysBusDevice *dev)
         opp->irq_tim0 = RAVEN_TMR_IRQ;
         opp->brr1 = -1;
         list = list_le;
+        /* Don't map MSI region */
+        list[2].map = false;
 
         /* Only UP supported today */
         if (opp->nb_cpus != 1) {
@@ -1110,6 +1216,10 @@ static int openpic_init(SysBusDevice *dev)
     memory_region_init(&opp->mem, "openpic", 0x40000);
 
     for (i = 0; i < ARRAY_SIZE(list_le); i++) {
+        if (!list[i].map) {
+            continue;
+        }
+
         memory_region_init_io(&opp->sub_io_mem[i], list[i].ops, opp,
                               list[i].name, list[i].size);
 
commit dbbbfd6058dda61f57d1f72133aa54eb27330411
Author: Alexander Graf <agraf at suse.de>
Date:   Sat Dec 8 13:51:50 2012 +0100

    openpic: make brr1 model specific
    
    Now that we can properly distinguish between openpic model differences,
    let's move brr1 out of the raven code path.
    
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/openpic.c b/hw/openpic.c
index 591b291..5bf16ea 100644
--- a/hw/openpic.c
+++ b/hw/openpic.c
@@ -206,6 +206,7 @@ typedef struct OpenPICState {
     uint32_t tifr_reset;
     uint32_t ipvp_reset;
     uint32_t ide_reset;
+    uint32_t brr1;
 
     /* Sub-regions */
     MemoryRegion sub_io_mem[7];
@@ -784,7 +785,7 @@ static uint32_t openpic_cpu_read_internal(void *opaque, hwaddr addr,
     addr &= 0xFF0;
     switch (addr) {
     case 0x00: /* Block Revision Register1 (BRR1) */
-        retval = FSL_BRR1_IPID | FSL_BRR1_IPMJ | FSL_BRR1_IPMN;
+        retval = opp->brr1;
         break;
     case 0x80: /* PCTP */
         retval = dst->pctp;
@@ -1082,6 +1083,7 @@ static int openpic_init(SysBusDevice *dev)
         opp->max_irq = FSL_MPIC_20_MAX_IRQ;
         opp->irq_ipi0 = FSL_MPIC_20_IPI_IRQ;
         opp->irq_tim0 = FSL_MPIC_20_TMR_IRQ;
+        opp->brr1 = FSL_BRR1_IPID | FSL_BRR1_IPMJ | FSL_BRR1_IPMN;
         list = list_be;
         break;
     case OPENPIC_MODEL_RAVEN:
@@ -1095,6 +1097,7 @@ static int openpic_init(SysBusDevice *dev)
         opp->max_irq = RAVEN_MAX_IRQ;
         opp->irq_ipi0 = RAVEN_IPI_IRQ;
         opp->irq_tim0 = RAVEN_TMR_IRQ;
+        opp->brr1 = -1;
         list = list_le;
 
         /* Only UP supported today */
commit d0b7263134dfd4d487698b639f2069951f3fdb26
Author: Alexander Graf <agraf at suse.de>
Date:   Sat Dec 8 05:17:14 2012 +0100

    openpic: convert to qdev
    
    This patch converts the OpenPIC device to qdev. Along the way it
    renames the "openpic" target to "raven" and the "mpic" target to
    "fsl_mpic_20", to better reflect the actual models they implement.
    
    This way we have a generic OpenPIC device now that can handle
    different flavors of the OpenPIC specification.
    
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/openpic.c b/hw/openpic.c
index 5116b3e..591b291 100644
--- a/hw/openpic.c
+++ b/hw/openpic.c
@@ -37,6 +37,7 @@
 #include "ppc_mac.h"
 #include "pci.h"
 #include "openpic.h"
+#include "sysbus.h"
 
 //#define DEBUG_OPENPIC
 
@@ -54,30 +55,10 @@
 #define MAX_IRQ     (MAX_SRC + MAX_IPI + MAX_TMR)
 #define VID         0x03 /* MPIC version ID */
 
-enum {
-    IRQ_IPVP = 0,
-    IRQ_IDE,
-};
-
-/* OpenPIC */
-#define OPENPIC_MAX_CPU      2
-#define OPENPIC_MAX_IRQ     64
-#define OPENPIC_EXT_IRQ     48
-#define OPENPIC_MAX_TMR      MAX_TMR
-#define OPENPIC_MAX_IPI      MAX_IPI
-
-/* Interrupt definitions */
-#define OPENPIC_IRQ_FE     (OPENPIC_EXT_IRQ)     /* Internal functional IRQ */
-#define OPENPIC_IRQ_ERR    (OPENPIC_EXT_IRQ + 1) /* Error IRQ */
-#define OPENPIC_IRQ_TIM0   (OPENPIC_EXT_IRQ + 2) /* First timer IRQ */
-#if OPENPIC_MAX_IPI > 0
-#define OPENPIC_IRQ_IPI0   (OPENPIC_IRQ_TIM0 + OPENPIC_MAX_TMR) /* First IPI IRQ */
-#define OPENPIC_IRQ_DBL0   (OPENPIC_IRQ_IPI0 + (OPENPIC_MAX_CPU * OPENPIC_MAX_IPI)) /* First doorbell IRQ */
-#else
-#define OPENPIC_IRQ_DBL0   (OPENPIC_IRQ_TIM0 + OPENPIC_MAX_TMR) /* First doorbell IRQ */
-#define OPENPIC_IRQ_MBX0   (OPENPIC_IRQ_DBL0 + OPENPIC_MAX_DBL) /* First mailbox IRQ */
-#endif
+/* OpenPIC capability flags */
+#define OPENPIC_FLAG_IDE_CRIT     (1 << 0)
 
+/* OpenPIC address map */
 #define OPENPIC_GLB_REG_START        0x0
 #define OPENPIC_GLB_REG_SIZE         0x10F0
 #define OPENPIC_TMR_REG_START        0x10F0
@@ -87,31 +68,37 @@ enum {
 #define OPENPIC_CPU_REG_START        0x20000
 #define OPENPIC_CPU_REG_SIZE         0x100 + ((MAX_CPU - 1) * 0x1000)
 
-/* MPIC */
-#define MPIC_MAX_CPU      1
-#define MPIC_MAX_EXT     12
-#define MPIC_MAX_INT     64
-#define MPIC_MAX_IRQ     MAX_IRQ
+/* Raven */
+#define RAVEN_MAX_CPU      2
+#define RAVEN_MAX_EXT     48
+#define RAVEN_MAX_IRQ     64
+#define RAVEN_MAX_TMR      MAX_TMR
+#define RAVEN_MAX_IPI      MAX_IPI
+
+/* Interrupt definitions */
+#define RAVEN_FE_IRQ     (RAVEN_MAX_EXT)     /* Internal functional IRQ */
+#define RAVEN_ERR_IRQ    (RAVEN_MAX_EXT + 1) /* Error IRQ */
+#define RAVEN_TMR_IRQ    (RAVEN_MAX_EXT + 2) /* First timer IRQ */
+#define RAVEN_IPI_IRQ    (RAVEN_TMR_IRQ + RAVEN_MAX_TMR) /* First IPI IRQ */
+/* First doorbell IRQ */
+#define RAVEN_DBL_IRQ    (RAVEN_IPI_IRQ + (RAVEN_MAX_CPU * RAVEN_MAX_IPI))
+
+/* FSL_MPIC_20 */
+#define FSL_MPIC_20_MAX_CPU      1
+#define FSL_MPIC_20_MAX_EXT     12
+#define FSL_MPIC_20_MAX_INT     64
+#define FSL_MPIC_20_MAX_IRQ     MAX_IRQ
 
 /* Interrupt definitions */
 /* IRQs, accessible through the IRQ region */
-#define MPIC_EXT_IRQ      0x00
-#define MPIC_INT_IRQ      0x10
-#define MPIC_MSG_IRQ      0xb0
-#define MPIC_MSI_IRQ      0xe0
+#define FSL_MPIC_20_EXT_IRQ      0x00
+#define FSL_MPIC_20_INT_IRQ      0x10
+#define FSL_MPIC_20_MSG_IRQ      0xb0
+#define FSL_MPIC_20_MSI_IRQ      0xe0
 /* These are available through separate regions, but
    for simplicity's sake mapped into the same number space */
-#define MPIC_TMR_IRQ      0x100
-#define MPIC_IPI_IRQ      0x104
-
-#define MPIC_GLB_REG_START        0x0
-#define MPIC_GLB_REG_SIZE         0x10F0
-#define MPIC_TMR_REG_START        0x10F0
-#define MPIC_TMR_REG_SIZE         0x220
-#define MPIC_SRC_REG_START        0x10000
-#define MPIC_SRC_REG_SIZE         (MAX_SRC * 0x20)
-#define MPIC_CPU_REG_START        0x20000
-#define MPIC_CPU_REG_SIZE         0x100 + ((MAX_CPU - 1) * 0x1000)
+#define FSL_MPIC_20_TMR_IRQ      0x100
+#define FSL_MPIC_20_IPI_IRQ      0x104
 
 /*
  * Block Revision Register1 (BRR1): QEMU does not fully emulate
@@ -129,6 +116,7 @@ enum {
 #define FREP_VID_SHIFT     0
 
 #define VID_REVISION_1_2   2
+#define VID_REVISION_1_3   3
 
 #define VENI_GENERIC      0x00000000 /* Generic Vendor ID */
 
@@ -205,10 +193,11 @@ typedef struct IRQ_dst_t {
 } IRQ_dst_t;
 
 typedef struct OpenPICState {
-    PCIDevice pci_dev;
+    SysBusDevice busdev;
     MemoryRegion mem;
 
     /* Behavior control */
+    uint32_t model;
     uint32_t flags;
     uint32_t nb_irqs;
     uint32_t vid;
@@ -231,15 +220,15 @@ typedef struct OpenPICState {
     IRQ_src_t src[MAX_IRQ];
     /* Local registers per output pin */
     IRQ_dst_t dst[MAX_CPU];
-    int nb_cpus;
+    uint32_t nb_cpus;
     /* Timer registers */
     struct {
         uint32_t ticc;  /* Global timer current count register */
         uint32_t tibc;  /* Global timer base count register */
     } timers[MAX_TMR];
-    int max_irq;
-    int irq_ipi0;
-    int irq_tim0;
+    uint32_t max_irq;
+    uint32_t irq_ipi0;
+    uint32_t irq_tim0;
 } OpenPICState;
 
 static void openpic_irq_raise(OpenPICState *opp, int n_CPU, IRQ_src_t *src);
@@ -411,9 +400,9 @@ static void openpic_set_irq(void *opaque, int n_IRQ, int level)
     openpic_update_irq(opp, n_IRQ);
 }
 
-static void openpic_reset (void *opaque)
+static void openpic_reset(DeviceState *d)
 {
-    OpenPICState *opp = (OpenPICState *)opaque;
+    OpenPICState *opp = FROM_SYSBUS(typeof (*opp), sysbus_from_qdev(d));
     int i;
 
     opp->glbc = 0x80000000;
@@ -506,7 +495,7 @@ static void openpic_gbl_write(void *opaque, hwaddr addr, uint64_t val,
         break;
     case 0x1020: /* GLBC */
         if (val & 0x80000000) {
-            openpic_reset(opp);
+            openpic_reset(&opp->busdev.qdev);
         }
         break;
     case 0x1080: /* VENI */
@@ -971,7 +960,7 @@ static void openpic_save(QEMUFile* f, void *opaque)
         qemu_put_sbe32s(f, &opp->src[i].pending);
     }
 
-    qemu_put_sbe32s(f, &opp->nb_cpus);
+    qemu_put_be32s(f, &opp->nb_cpus);
 
     for (i = 0; i < opp->nb_cpus; i++) {
         qemu_put_be32s(f, &opp->dst[i].pctp);
@@ -984,8 +973,6 @@ static void openpic_save(QEMUFile* f, void *opaque)
         qemu_put_be32s(f, &opp->timers[i].ticc);
         qemu_put_be32s(f, &opp->timers[i].tibc);
     }
-
-    pci_device_save(&opp->pci_dev, f);
 }
 
 static void openpic_load_IRQ_queue(QEMUFile* f, IRQ_queue_t *q)
@@ -1020,7 +1007,7 @@ static int openpic_load(QEMUFile* f, void *opaque, int version_id)
         qemu_get_sbe32s(f, &opp->src[i].pending);
     }
 
-    qemu_get_sbe32s(f, &opp->nb_cpus);
+    qemu_get_be32s(f, &opp->nb_cpus);
 
     for (i = 0; i < opp->nb_cpus; i++) {
         qemu_get_be32s(f, &opp->dst[i].pctp);
@@ -1034,7 +1021,7 @@ static int openpic_load(QEMUFile* f, void *opaque, int version_id)
         qemu_get_be32s(f, &opp->timers[i].tibc);
     }
 
-    return pci_device_load(&opp->pci_dev, f);
+    return 0;
 }
 
 static void openpic_irq_raise(OpenPICState *opp, int n_CPU, IRQ_src_t *src)
@@ -1048,17 +1035,18 @@ static void openpic_irq_raise(OpenPICState *opp, int n_CPU, IRQ_src_t *src)
     }
 }
 
-qemu_irq *openpic_init (MemoryRegion **pmem, int nb_cpus,
-                        qemu_irq **irqs)
+struct memreg {
+    const char             *name;
+    MemoryRegionOps const  *ops;
+    hwaddr      start_addr;
+    ram_addr_t              size;
+};
+
+static int openpic_init(SysBusDevice *dev)
 {
-    OpenPICState *opp;
-    int i;
-    struct {
-        const char             *name;
-        MemoryRegionOps const  *ops;
-        hwaddr      start_addr;
-        ram_addr_t              size;
-    } const list[] = {
+    OpenPICState *opp = FROM_SYSBUS(typeof (*opp), dev);
+    int i, j;
+    const struct memreg list_le[] = {
         {"glb", &openpic_glb_ops_le, OPENPIC_GLB_REG_START,
                                      OPENPIC_GLB_REG_SIZE},
         {"tmr", &openpic_tmr_ops_le, OPENPIC_TMR_REG_START,
@@ -1068,16 +1056,57 @@ qemu_irq *openpic_init (MemoryRegion **pmem, int nb_cpus,
         {"cpu", &openpic_cpu_ops_le, OPENPIC_CPU_REG_START,
                                      OPENPIC_CPU_REG_SIZE},
     };
+    const struct memreg list_be[] = {
+        {"glb", &openpic_glb_ops_be, OPENPIC_GLB_REG_START,
+                                     OPENPIC_GLB_REG_SIZE},
+        {"tmr", &openpic_tmr_ops_be, OPENPIC_TMR_REG_START,
+                                     OPENPIC_TMR_REG_SIZE},
+        {"src", &openpic_src_ops_be, OPENPIC_SRC_REG_START,
+                                     OPENPIC_SRC_REG_SIZE},
+        {"cpu", &openpic_cpu_ops_be, OPENPIC_CPU_REG_START,
+                                     OPENPIC_CPU_REG_SIZE},
+    };
+    struct memreg const *list;
 
-    /* XXX: for now, only one CPU is supported */
-    if (nb_cpus != 1)
-        return NULL;
-    opp = g_malloc0(sizeof(OpenPICState));
+    switch (opp->model) {
+    case OPENPIC_MODEL_FSL_MPIC_20:
+    default:
+        opp->flags |= OPENPIC_FLAG_IDE_CRIT;
+        opp->nb_irqs = 80;
+        opp->vid = VID_REVISION_1_2;
+        opp->veni = VENI_GENERIC;
+        opp->spve_mask = 0xFFFF;
+        opp->tifr_reset = 0x00000000;
+        opp->ipvp_reset = 0x80000000;
+        opp->ide_reset = 0x00000001;
+        opp->max_irq = FSL_MPIC_20_MAX_IRQ;
+        opp->irq_ipi0 = FSL_MPIC_20_IPI_IRQ;
+        opp->irq_tim0 = FSL_MPIC_20_TMR_IRQ;
+        list = list_be;
+        break;
+    case OPENPIC_MODEL_RAVEN:
+        opp->nb_irqs = RAVEN_MAX_EXT;
+        opp->vid = VID_REVISION_1_3;
+        opp->veni = VENI_GENERIC;
+        opp->spve_mask = 0xFF;
+        opp->tifr_reset = 0x003F7A00;
+        opp->ipvp_reset = 0xA0000000;
+        opp->ide_reset = 0x00000000;
+        opp->max_irq = RAVEN_MAX_IRQ;
+        opp->irq_ipi0 = RAVEN_IPI_IRQ;
+        opp->irq_tim0 = RAVEN_TMR_IRQ;
+        list = list_le;
+
+        /* Only UP supported today */
+        if (opp->nb_cpus != 1) {
+            return -EINVAL;
+        }
+        break;
+    }
 
     memory_region_init(&opp->mem, "openpic", 0x40000);
 
-    for (i = 0; i < ARRAY_SIZE(list); i++) {
-
+    for (i = 0; i < ARRAY_SIZE(list_le); i++) {
         memory_region_init_io(&opp->sub_io_mem[i], list[i].ops, opp,
                               list[i].name, list[i].size);
 
@@ -1085,83 +1114,48 @@ qemu_irq *openpic_init (MemoryRegion **pmem, int nb_cpus,
                                     &opp->sub_io_mem[i]);
     }
 
-    //    isu_base &= 0xFFFC0000;
-    opp->nb_cpus = nb_cpus;
-    opp->nb_irqs = OPENPIC_EXT_IRQ;
-    opp->vid = VID;
-    opp->veni = VENI_GENERIC;
-    opp->spve_mask = 0xFF;
-    opp->tifr_reset = 0x003F7A00;
-    opp->max_irq = OPENPIC_MAX_IRQ;
-    opp->irq_ipi0 = OPENPIC_IRQ_IPI0;
-    opp->irq_tim0 = OPENPIC_IRQ_TIM0;
-
-    for (i = 0; i < nb_cpus; i++)
-        opp->dst[i].irqs = irqs[i];
-
-    register_savevm(&opp->pci_dev.qdev, "openpic", 0, 2,
+    for (i = 0; i < opp->nb_cpus; i++) {
+        opp->dst[i].irqs = g_new(qemu_irq, OPENPIC_OUTPUT_NB);
+        for (j = 0; j < OPENPIC_OUTPUT_NB; j++) {
+            sysbus_init_irq(dev, &opp->dst[i].irqs[j]);
+        }
+    }
+
+    register_savevm(&opp->busdev.qdev, "openpic", 0, 2,
                     openpic_save, openpic_load, opp);
-    qemu_register_reset(openpic_reset, opp);
 
-    if (pmem)
-        *pmem = &opp->mem;
+    sysbus_init_mmio(dev, &opp->mem);
+    qdev_init_gpio_in(&dev->qdev, openpic_set_irq, opp->max_irq);
 
-    return qemu_allocate_irqs(openpic_set_irq, opp, opp->max_irq);
+    return 0;
 }
 
-qemu_irq *mpic_init (MemoryRegion *address_space, hwaddr base,
-                     int nb_cpus, qemu_irq **irqs)
-{
-    OpenPICState    *mpp;
-    int           i;
-    struct {
-        const char             *name;
-        MemoryRegionOps const  *ops;
-        hwaddr      start_addr;
-        ram_addr_t              size;
-    } const list[] = {
-        {"glb", &openpic_glb_ops_be, MPIC_GLB_REG_START, MPIC_GLB_REG_SIZE},
-        {"tmr", &openpic_tmr_ops_be, MPIC_TMR_REG_START, MPIC_TMR_REG_SIZE},
-        {"src", &openpic_src_ops_be, MPIC_SRC_REG_START, MPIC_SRC_REG_SIZE},
-        {"cpu", &openpic_cpu_ops_be, MPIC_CPU_REG_START, MPIC_CPU_REG_SIZE},
-    };
-
-    mpp = g_malloc0(sizeof(OpenPICState));
-
-    memory_region_init(&mpp->mem, "mpic", 0x40000);
-    memory_region_add_subregion(address_space, base, &mpp->mem);
+static Property openpic_properties[] = {
+    DEFINE_PROP_UINT32("model", OpenPICState, model, OPENPIC_MODEL_FSL_MPIC_20),
+    DEFINE_PROP_UINT32("nb_cpus", OpenPICState, nb_cpus, 1),
+    DEFINE_PROP_END_OF_LIST(),
+};
 
-    for (i = 0; i < sizeof(list)/sizeof(list[0]); i++) {
+static void openpic_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
 
-        memory_region_init_io(&mpp->sub_io_mem[i], list[i].ops, mpp,
-                              list[i].name, list[i].size);
+    k->init = openpic_init;
+    dc->props = openpic_properties;
+    dc->reset = openpic_reset;
+}
 
-        memory_region_add_subregion(&mpp->mem, list[i].start_addr,
-                                    &mpp->sub_io_mem[i]);
-    }
+static TypeInfo openpic_info = {
+    .name          = "openpic",
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(OpenPICState),
+    .class_init    = openpic_class_init,
+};
 
-    mpp->nb_cpus = nb_cpus;
-    /* 12 external sources, 48 internal sources , 4 timer sources,
-       4 IPI sources, 4 messaging sources, and 8 Shared MSI sources */
-    mpp->nb_irqs = 80;
-    mpp->vid = VID_REVISION_1_2;
-    mpp->veni = VENI_GENERIC;
-    mpp->spve_mask = 0xFFFF;
-    mpp->tifr_reset = 0x00000000;
-    mpp->ipvp_reset = 0x80000000;
-    mpp->ide_reset = 0x00000001;
-    mpp->max_irq = MPIC_MAX_IRQ;
-    mpp->irq_ipi0 = MPIC_IPI_IRQ;
-    mpp->irq_tim0 = MPIC_TMR_IRQ;
-
-    for (i = 0; i < nb_cpus; i++)
-        mpp->dst[i].irqs = irqs[i];
-
-    /* Enable critical interrupt support */
-    mpp->flags |= OPENPIC_FLAG_IDE_CRIT;
-
-    register_savevm(NULL, "mpic", 0, 2, openpic_save, openpic_load, mpp);
-    qemu_register_reset(openpic_reset, mpp);
-
-    return qemu_allocate_irqs(openpic_set_irq, mpp, mpp->max_irq);
+static void openpic_register_types(void)
+{
+    type_register_static(&openpic_info);
 }
+
+type_init(openpic_register_types)
diff --git a/hw/openpic.h b/hw/openpic.h
index 8a68f20..e226d7b 100644
--- a/hw/openpic.h
+++ b/hw/openpic.h
@@ -11,11 +11,7 @@ enum {
     OPENPIC_OUTPUT_NB,
 };
 
-/* OpenPIC capability flags */
-#define OPENPIC_FLAG_IDE_CRIT    (1 << 0)
+#define OPENPIC_MODEL_RAVEN       0
+#define OPENPIC_MODEL_FSL_MPIC_20 1
 
-qemu_irq *openpic_init (MemoryRegion **pmem, int nb_cpus,
-                        qemu_irq **irqs);
-qemu_irq *mpic_init (MemoryRegion *address_space, hwaddr base,
-                     int nb_cpus, qemu_irq **irqs);
 #endif /* __OPENPIC_H__ */
diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c
index 3f6d58c..fa9b8ed 100644
--- a/hw/ppc/e500.c
+++ b/hw/ppc/e500.c
@@ -418,7 +418,7 @@ void ppce500_init(PPCE500Params *params)
     target_ulong dt_base = 0;
     target_ulong initrd_base = 0;
     target_long initrd_size=0;
-    int i=0;
+    int i = 0, j, k;
     unsigned int pci_irq_nrs[4] = {1, 2, 3, 4};
     qemu_irq **irqs, *mpic;
     DeviceState *dev;
@@ -492,13 +492,27 @@ void ppce500_init(PPCE500Params *params)
                                 ccsr_addr_space);
 
     /* MPIC */
-    mpic = mpic_init(ccsr_addr_space, MPC8544_MPIC_REGS_OFFSET,
-                     smp_cpus, irqs);
+    mpic = g_new(qemu_irq, 256);
+    dev = qdev_create(NULL, "openpic");
+    qdev_prop_set_uint32(dev, "nb_cpus", smp_cpus);
+    qdev_prop_set_uint32(dev, "model", OPENPIC_MODEL_FSL_MPIC_20);
+    qdev_init_nofail(dev);
+    s = sysbus_from_qdev(dev);
+
+    k = 0;
+    for (i = 0; i < smp_cpus; i++) {
+        for (j = 0; j < OPENPIC_OUTPUT_NB; j++) {
+            sysbus_connect_irq(s, k++, irqs[i][j]);
+        }
+    }
 
-    if (!mpic) {
-        cpu_abort(env, "MPIC failed to initialize\n");
+    for (i = 0; i < 256; i++) {
+        mpic[i] = qdev_get_gpio_in(dev, i);
     }
 
+    memory_region_add_subregion(ccsr_addr_space, MPC8544_MPIC_REGS_OFFSET,
+                                s->mmio[0].memory);
+
     /* Serial */
     if (serial_hds[0]) {
         serial_mm_init(ccsr_addr_space, MPC8544_SERIAL0_REGS_OFFSET,
diff --git a/hw/ppc_newworld.c b/hw/ppc_newworld.c
index b9c2cd8..8c2114e 100644
--- a/hw/ppc_newworld.c
+++ b/hw/ppc_newworld.c
@@ -67,6 +67,7 @@
 #include "hw/usb.h"
 #include "blockdev.h"
 #include "exec-memory.h"
+#include "sysbus.h"
 
 #define MAX_IDE_BUS 2
 #define CFG_ADDR 0xf0000510
@@ -141,7 +142,7 @@ static void ppc_core99_init(QEMUMachineInitArgs *args)
     char *filename;
     qemu_irq *pic, **openpic_irqs;
     MemoryRegion *unin_memory = g_new(MemoryRegion, 1);
-    int linux_boot, i;
+    int linux_boot, i, j, k;
     MemoryRegion *ram = g_new(MemoryRegion, 1), *bios = g_new(MemoryRegion, 1);
     hwaddr kernel_base, initrd_base, cmdline_base = 0;
     long kernel_size, initrd_size;
@@ -156,6 +157,8 @@ static void ppc_core99_init(QEMUMachineInitArgs *args)
     void *fw_cfg;
     void *dbdma;
     int machine_arch;
+    SysBusDevice *s;
+    DeviceState *dev;
 
     linux_boot = (kernel_filename != NULL);
 
@@ -320,7 +323,25 @@ static void ppc_core99_init(QEMUMachineInitArgs *args)
             exit(1);
         }
     }
-    pic = openpic_init(&pic_mem, smp_cpus, openpic_irqs);
+
+    pic = g_new(qemu_irq, 64);
+
+    dev = qdev_create(NULL, "openpic");
+    qdev_prop_set_uint32(dev, "model", OPENPIC_MODEL_RAVEN);
+    qdev_init_nofail(dev);
+    s = sysbus_from_qdev(dev);
+    pic_mem = s->mmio[0].memory;
+    k = 0;
+    for (i = 0; i < smp_cpus; i++) {
+        for (j = 0; j < OPENPIC_OUTPUT_NB; j++) {
+            sysbus_connect_irq(s, k++, openpic_irqs[i][j]);
+        }
+    }
+
+    for (i = 0; i < 64; i++) {
+        pic[i] = qdev_get_gpio_in(dev, i);
+    }
+
     if (PPC_INPUT(env) == PPC_FLAGS_INPUT_970) {
         /* 970 gets a U3 bus */
         pci_bus = pci_pmac_u3_init(pic, get_system_memory(), get_system_io());
commit 5bac0701113f4de4fee053a3939b0f569a04b88c
Author: Alexander Graf <agraf at suse.de>
Date:   Sat Dec 8 02:18:58 2012 +0100

    openpic: remove irq_out
    
    The current openpic emulation contains half-ready code for bypass mode.
    Remove it, so that when someone wants to finish it they can start from a
    clean state.
    
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/openpic.c b/hw/openpic.c
index d5c2705..5116b3e 100644
--- a/hw/openpic.c
+++ b/hw/openpic.c
@@ -237,8 +237,6 @@ typedef struct OpenPICState {
         uint32_t ticc;  /* Global timer current count register */
         uint32_t tibc;  /* Global timer base count register */
     } timers[MAX_TMR];
-    /* IRQ out is used when in bypass mode (not implemented) */
-    qemu_irq irq_out;
     int max_irq;
     int irq_ipi0;
     int irq_tim0;
@@ -1051,7 +1049,7 @@ static void openpic_irq_raise(OpenPICState *opp, int n_CPU, IRQ_src_t *src)
 }
 
 qemu_irq *openpic_init (MemoryRegion **pmem, int nb_cpus,
-                        qemu_irq **irqs, qemu_irq irq_out)
+                        qemu_irq **irqs)
 {
     OpenPICState *opp;
     int i;
@@ -1100,7 +1098,6 @@ qemu_irq *openpic_init (MemoryRegion **pmem, int nb_cpus,
 
     for (i = 0; i < nb_cpus; i++)
         opp->dst[i].irqs = irqs[i];
-    opp->irq_out = irq_out;
 
     register_savevm(&opp->pci_dev.qdev, "openpic", 0, 2,
                     openpic_save, openpic_load, opp);
@@ -1113,7 +1110,7 @@ qemu_irq *openpic_init (MemoryRegion **pmem, int nb_cpus,
 }
 
 qemu_irq *mpic_init (MemoryRegion *address_space, hwaddr base,
-                     int nb_cpus, qemu_irq **irqs, qemu_irq irq_out)
+                     int nb_cpus, qemu_irq **irqs)
 {
     OpenPICState    *mpp;
     int           i;
@@ -1159,7 +1156,6 @@ qemu_irq *mpic_init (MemoryRegion *address_space, hwaddr base,
 
     for (i = 0; i < nb_cpus; i++)
         mpp->dst[i].irqs = irqs[i];
-    mpp->irq_out = irq_out;
 
     /* Enable critical interrupt support */
     mpp->flags |= OPENPIC_FLAG_IDE_CRIT;
diff --git a/hw/openpic.h b/hw/openpic.h
index 1232d10..8a68f20 100644
--- a/hw/openpic.h
+++ b/hw/openpic.h
@@ -15,7 +15,7 @@ enum {
 #define OPENPIC_FLAG_IDE_CRIT    (1 << 0)
 
 qemu_irq *openpic_init (MemoryRegion **pmem, int nb_cpus,
-                        qemu_irq **irqs, qemu_irq irq_out);
+                        qemu_irq **irqs);
 qemu_irq *mpic_init (MemoryRegion *address_space, hwaddr base,
-                     int nb_cpus, qemu_irq **irqs, qemu_irq irq_out);
+                     int nb_cpus, qemu_irq **irqs);
 #endif /* __OPENPIC_H__ */
diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c
index f3e97d8..3f6d58c 100644
--- a/hw/ppc/e500.c
+++ b/hw/ppc/e500.c
@@ -493,7 +493,7 @@ void ppce500_init(PPCE500Params *params)
 
     /* MPIC */
     mpic = mpic_init(ccsr_addr_space, MPC8544_MPIC_REGS_OFFSET,
-                     smp_cpus, irqs, NULL);
+                     smp_cpus, irqs);
 
     if (!mpic) {
         cpu_abort(env, "MPIC failed to initialize\n");
diff --git a/hw/ppc_newworld.c b/hw/ppc_newworld.c
index 664747e..b9c2cd8 100644
--- a/hw/ppc_newworld.c
+++ b/hw/ppc_newworld.c
@@ -320,7 +320,7 @@ static void ppc_core99_init(QEMUMachineInitArgs *args)
             exit(1);
         }
     }
-    pic = openpic_init(&pic_mem, smp_cpus, openpic_irqs, NULL);
+    pic = openpic_init(&pic_mem, smp_cpus, openpic_irqs);
     if (PPC_INPUT(env) == PPC_FLAGS_INPUT_970) {
         /* 970 gets a U3 bus */
         pci_bus = pci_pmac_u3_init(pic, get_system_memory(), get_system_io());
commit 6d544ee8ac2097c87fc97b53d6a1310d9daa0562
Author: Alexander Graf <agraf at suse.de>
Date:   Sat Dec 8 01:59:20 2012 +0100

    openpic: rename openpic_t to OpenPICState
    
    Rename the openpic_t struct to OpenPICState, so it adheres better to
    the current coding style rules.
    
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/openpic.c b/hw/openpic.c
index d2038d8..d5c2705 100644
--- a/hw/openpic.c
+++ b/hw/openpic.c
@@ -204,7 +204,7 @@ typedef struct IRQ_dst_t {
     qemu_irq *irqs;
 } IRQ_dst_t;
 
-typedef struct openpic_t {
+typedef struct OpenPICState {
     PCIDevice pci_dev;
     MemoryRegion mem;
 
@@ -242,9 +242,9 @@ typedef struct openpic_t {
     int max_irq;
     int irq_ipi0;
     int irq_tim0;
-} openpic_t;
+} OpenPICState;
 
-static void openpic_irq_raise(openpic_t *opp, int n_CPU, IRQ_src_t *src);
+static void openpic_irq_raise(OpenPICState *opp, int n_CPU, IRQ_src_t *src);
 
 static inline void IRQ_setbit (IRQ_queue_t *q, int n_IRQ)
 {
@@ -261,7 +261,7 @@ static inline int IRQ_testbit (IRQ_queue_t *q, int n_IRQ)
     return test_bit(q->queue, n_IRQ);
 }
 
-static void IRQ_check (openpic_t *opp, IRQ_queue_t *q)
+static void IRQ_check(OpenPICState *opp, IRQ_queue_t *q)
 {
     int next, i;
     int priority;
@@ -282,7 +282,7 @@ static void IRQ_check (openpic_t *opp, IRQ_queue_t *q)
     q->priority = priority;
 }
 
-static int IRQ_get_next (openpic_t *opp, IRQ_queue_t *q)
+static int IRQ_get_next(OpenPICState *opp, IRQ_queue_t *q)
 {
     if (q->next == -1) {
         /* XXX: optimize */
@@ -292,7 +292,7 @@ static int IRQ_get_next (openpic_t *opp, IRQ_queue_t *q)
     return q->next;
 }
 
-static void IRQ_local_pipe (openpic_t *opp, int n_CPU, int n_IRQ)
+static void IRQ_local_pipe(OpenPICState *opp, int n_CPU, int n_IRQ)
 {
     IRQ_dst_t *dst;
     IRQ_src_t *src;
@@ -334,7 +334,7 @@ static void IRQ_local_pipe (openpic_t *opp, int n_CPU, int n_IRQ)
 }
 
 /* update pic state because registers for n_IRQ have changed value */
-static void openpic_update_irq(openpic_t *opp, int n_IRQ)
+static void openpic_update_irq(OpenPICState *opp, int n_IRQ)
 {
     IRQ_src_t *src;
     int i;
@@ -393,7 +393,7 @@ static void openpic_update_irq(openpic_t *opp, int n_IRQ)
 
 static void openpic_set_irq(void *opaque, int n_IRQ, int level)
 {
-    openpic_t *opp = opaque;
+    OpenPICState *opp = opaque;
     IRQ_src_t *src;
 
     src = &opp->src[n_IRQ];
@@ -415,7 +415,7 @@ static void openpic_set_irq(void *opaque, int n_IRQ, int level)
 
 static void openpic_reset (void *opaque)
 {
-    openpic_t *opp = (openpic_t *)opaque;
+    OpenPICState *opp = (OpenPICState *)opaque;
     int i;
 
     opp->glbc = 0x80000000;
@@ -450,17 +450,17 @@ static void openpic_reset (void *opaque)
     opp->glbc = 0x00000000;
 }
 
-static inline uint32_t read_IRQreg_ide(openpic_t *opp, int n_IRQ)
+static inline uint32_t read_IRQreg_ide(OpenPICState *opp, int n_IRQ)
 {
     return opp->src[n_IRQ].ide;
 }
 
-static inline uint32_t read_IRQreg_ipvp(openpic_t *opp, int n_IRQ)
+static inline uint32_t read_IRQreg_ipvp(OpenPICState *opp, int n_IRQ)
 {
     return opp->src[n_IRQ].ipvp;
 }
 
-static inline void write_IRQreg_ide(openpic_t *opp, int n_IRQ, uint32_t val)
+static inline void write_IRQreg_ide(OpenPICState *opp, int n_IRQ, uint32_t val)
 {
     uint32_t tmp;
 
@@ -470,7 +470,7 @@ static inline void write_IRQreg_ide(openpic_t *opp, int n_IRQ, uint32_t val)
     DPRINTF("Set IDE %d to 0x%08x\n", n_IRQ, opp->src[n_IRQ].ide);
 }
 
-static inline void write_IRQreg_ipvp(openpic_t *opp, int n_IRQ, uint32_t val)
+static inline void write_IRQreg_ipvp(OpenPICState *opp, int n_IRQ, uint32_t val)
 {
     /* NOTE: not fully accurate for special IRQs, but simple and sufficient */
     /* ACTIVITY bit is read-only */
@@ -484,7 +484,7 @@ static inline void write_IRQreg_ipvp(openpic_t *opp, int n_IRQ, uint32_t val)
 static void openpic_gbl_write(void *opaque, hwaddr addr, uint64_t val,
                               unsigned len)
 {
-    openpic_t *opp = opaque;
+    OpenPICState *opp = opaque;
     IRQ_dst_t *dst;
     int idx;
 
@@ -547,7 +547,7 @@ static void openpic_gbl_write(void *opaque, hwaddr addr, uint64_t val,
 
 static uint64_t openpic_gbl_read(void *opaque, hwaddr addr, unsigned len)
 {
-    openpic_t *opp = opaque;
+    OpenPICState *opp = opaque;
     uint32_t retval;
 
     DPRINTF("%s: addr " TARGET_FMT_plx "\n", __func__, addr);
@@ -599,10 +599,10 @@ static uint64_t openpic_gbl_read(void *opaque, hwaddr addr, unsigned len)
     return retval;
 }
 
-static void openpic_timer_write(void *opaque, hwaddr addr, uint64_t val,
+static void openpic_tmr_write(void *opaque, hwaddr addr, uint64_t val,
                                 unsigned len)
 {
-    openpic_t *opp = opaque;
+    OpenPICState *opp = opaque;
     int idx;
 
     DPRINTF("%s: addr %08x <= %08x\n", __func__, addr, val);
@@ -635,9 +635,9 @@ static void openpic_timer_write(void *opaque, hwaddr addr, uint64_t val,
     }
 }
 
-static uint64_t openpic_timer_read(void *opaque, hwaddr addr, unsigned len)
+static uint64_t openpic_tmr_read(void *opaque, hwaddr addr, unsigned len)
 {
-    openpic_t *opp = opaque;
+    OpenPICState *opp = opaque;
     uint32_t retval = -1;
     int idx;
 
@@ -675,7 +675,7 @@ out:
 static void openpic_src_write(void *opaque, hwaddr addr, uint64_t val,
                               unsigned len)
 {
-    openpic_t *opp = opaque;
+    OpenPICState *opp = opaque;
     int idx;
 
     DPRINTF("%s: addr %08x <= %08x\n", __func__, addr, val);
@@ -694,7 +694,7 @@ static void openpic_src_write(void *opaque, hwaddr addr, uint64_t val,
 
 static uint64_t openpic_src_read(void *opaque, uint64_t addr, unsigned len)
 {
-    openpic_t *opp = opaque;
+    OpenPICState *opp = opaque;
     uint32_t retval;
     int idx;
 
@@ -719,7 +719,7 @@ static uint64_t openpic_src_read(void *opaque, uint64_t addr, unsigned len)
 static void openpic_cpu_write_internal(void *opaque, hwaddr addr,
                                        uint32_t val, int idx)
 {
-    openpic_t *opp = opaque;
+    OpenPICState *opp = opaque;
     IRQ_src_t *src;
     IRQ_dst_t *dst;
     int s_IRQ, n_IRQ;
@@ -783,7 +783,7 @@ static void openpic_cpu_write(void *opaque, hwaddr addr, uint64_t val,
 static uint32_t openpic_cpu_read_internal(void *opaque, hwaddr addr,
                                           int idx)
 {
-    openpic_t *opp = opaque;
+    OpenPICState *opp = opaque;
     IRQ_src_t *src;
     IRQ_dst_t *dst;
     uint32_t retval;
@@ -885,8 +885,8 @@ static const MemoryRegionOps openpic_glb_ops_be = {
 };
 
 static const MemoryRegionOps openpic_tmr_ops_le = {
-    .write = openpic_timer_write,
-    .read  = openpic_timer_read,
+    .write = openpic_tmr_write,
+    .read  = openpic_tmr_read,
     .endianness = DEVICE_LITTLE_ENDIAN,
     .impl = {
         .min_access_size = 4,
@@ -895,8 +895,8 @@ static const MemoryRegionOps openpic_tmr_ops_le = {
 };
 
 static const MemoryRegionOps openpic_tmr_ops_be = {
-    .write = openpic_timer_write,
-    .read  = openpic_timer_read,
+    .write = openpic_tmr_write,
+    .read  = openpic_tmr_read,
     .endianness = DEVICE_BIG_ENDIAN,
     .impl = {
         .min_access_size = 4,
@@ -957,7 +957,7 @@ static void openpic_save_IRQ_queue(QEMUFile* f, IRQ_queue_t *q)
 
 static void openpic_save(QEMUFile* f, void *opaque)
 {
-    openpic_t *opp = (openpic_t *)opaque;
+    OpenPICState *opp = (OpenPICState *)opaque;
     unsigned int i;
 
     qemu_put_be32s(f, &opp->glbc);
@@ -1003,7 +1003,7 @@ static void openpic_load_IRQ_queue(QEMUFile* f, IRQ_queue_t *q)
 
 static int openpic_load(QEMUFile* f, void *opaque, int version_id)
 {
-    openpic_t *opp = (openpic_t *)opaque;
+    OpenPICState *opp = (OpenPICState *)opaque;
     unsigned int i;
 
     if (version_id != 1)
@@ -1039,7 +1039,7 @@ static int openpic_load(QEMUFile* f, void *opaque, int version_id)
     return pci_device_load(&opp->pci_dev, f);
 }
 
-static void openpic_irq_raise(openpic_t *opp, int n_CPU, IRQ_src_t *src)
+static void openpic_irq_raise(OpenPICState *opp, int n_CPU, IRQ_src_t *src)
 {
     int n_ci = IDR_CI0_SHIFT - n_CPU;
 
@@ -1053,7 +1053,7 @@ static void openpic_irq_raise(openpic_t *opp, int n_CPU, IRQ_src_t *src)
 qemu_irq *openpic_init (MemoryRegion **pmem, int nb_cpus,
                         qemu_irq **irqs, qemu_irq irq_out)
 {
-    openpic_t *opp;
+    OpenPICState *opp;
     int i;
     struct {
         const char             *name;
@@ -1074,7 +1074,7 @@ qemu_irq *openpic_init (MemoryRegion **pmem, int nb_cpus,
     /* XXX: for now, only one CPU is supported */
     if (nb_cpus != 1)
         return NULL;
-    opp = g_malloc0(sizeof(openpic_t));
+    opp = g_malloc0(sizeof(OpenPICState));
 
     memory_region_init(&opp->mem, "openpic", 0x40000);
 
@@ -1115,7 +1115,7 @@ qemu_irq *openpic_init (MemoryRegion **pmem, int nb_cpus,
 qemu_irq *mpic_init (MemoryRegion *address_space, hwaddr base,
                      int nb_cpus, qemu_irq **irqs, qemu_irq irq_out)
 {
-    openpic_t    *mpp;
+    OpenPICState    *mpp;
     int           i;
     struct {
         const char             *name;
@@ -1129,7 +1129,7 @@ qemu_irq *mpic_init (MemoryRegion *address_space, hwaddr base,
         {"cpu", &openpic_cpu_ops_be, MPIC_CPU_REG_START, MPIC_CPU_REG_SIZE},
     };
 
-    mpp = g_malloc0(sizeof(openpic_t));
+    mpp = g_malloc0(sizeof(OpenPICState));
 
     memory_region_init(&mpp->mem, "mpic", 0x40000);
     memory_region_add_subregion(address_space, base, &mpp->mem);
commit 1945dbc15f0f1ffdc9a10526448e9eba7c599d98
Author: Alexander Graf <agraf at suse.de>
Date:   Sat Dec 8 01:49:52 2012 +0100

    openpic: convert simple reg operations to builtin bitops
    
    The openpic code has its own bitmap code to access bits inside of a
    bitmap. However, that is overkill when we simply want to check for a
    bit inside of a uint32_t.
    
    So instead, let's use normal bit masks and C builtin shifts and ands.
    
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/openpic.c b/hw/openpic.c
index 91e87b7..d2038d8 100644
--- a/hw/openpic.c
+++ b/hw/openpic.c
@@ -132,13 +132,12 @@ enum {
 
 #define VENI_GENERIC      0x00000000 /* Generic Vendor ID */
 
-enum mpic_ide_bits {
-    IDR_EP     = 31,
-    IDR_CI0     = 30,
-    IDR_CI1     = 29,
-    IDR_P1     = 1,
-    IDR_P0     = 0,
-};
+#define IDR_EP_SHIFT      31
+#define IDR_EP_MASK       (1 << IDR_EP_SHIFT)
+#define IDR_CI0_SHIFT     30
+#define IDR_CI1_SHIFT     29
+#define IDR_P1_SHIFT      1
+#define IDR_P0_SHIFT      0
 
 #define BF_WIDTH(_bits_) \
 (((_bits_) + (sizeof(uint32_t) * 8) - 1) / (sizeof(uint32_t) * 8))
@@ -181,13 +180,17 @@ typedef struct IRQ_src_t {
     int pending;    /* TRUE if IRQ is pending */
 } IRQ_src_t;
 
-enum IPVP_bits {
-    IPVP_MASK     = 31,
-    IPVP_ACTIVITY = 30,
-    IPVP_MODE     = 29,
-    IPVP_POLARITY = 23,
-    IPVP_SENSE    = 22,
-};
+#define IPVP_MASK_SHIFT       31
+#define IPVP_MASK_MASK        (1 << IPVP_MASK_SHIFT)
+#define IPVP_ACTIVITY_SHIFT   30
+#define IPVP_ACTIVITY_MASK    (1 << IPVP_ACTIVITY_SHIFT)
+#define IPVP_MODE_SHIFT       29
+#define IPVP_MODE_MASK        (1 << IPVP_MODE_SHIFT)
+#define IPVP_POLARITY_SHIFT   23
+#define IPVP_POLARITY_MASK    (1 << IPVP_POLARITY_SHIFT)
+#define IPVP_SENSE_SHIFT      22
+#define IPVP_SENSE_MASK       (1 << IPVP_SENSE_SHIFT)
+
 #define IPVP_PRIORITY_MASK     (0x1F << 16)
 #define IPVP_PRIORITY(_ipvpr_) ((int)(((_ipvpr_) & IPVP_PRIORITY_MASK) >> 16))
 #define IPVP_VECTOR_MASK       ((1 << VECTOR_BITS) - 1)
@@ -310,7 +313,7 @@ static void IRQ_local_pipe (openpic_t *opp, int n_CPU, int n_IRQ)
                 __func__, n_IRQ, n_CPU);
         return;
     }
-    set_bit(&src->ipvp, IPVP_ACTIVITY);
+    src->ipvp |= IPVP_ACTIVITY_MASK;
     IRQ_setbit(&dst->raised, n_IRQ);
     if (priority < dst->raised.priority) {
         /* An higher priority IRQ is already raised */
@@ -343,7 +346,7 @@ static void openpic_update_irq(openpic_t *opp, int n_IRQ)
         DPRINTF("%s: IRQ %d is not pending\n", __func__, n_IRQ);
         return;
     }
-    if (test_bit(&src->ipvp, IPVP_MASK)) {
+    if (src->ipvp & IPVP_MASK_MASK) {
         /* Interrupt source is disabled */
         DPRINTF("%s: IRQ %d is disabled\n", __func__, n_IRQ);
         return;
@@ -353,7 +356,7 @@ static void openpic_update_irq(openpic_t *opp, int n_IRQ)
         DPRINTF("%s: IRQ %d has 0 priority\n", __func__, n_IRQ);
         return;
     }
-    if (test_bit(&src->ipvp, IPVP_ACTIVITY)) {
+    if (src->ipvp & IPVP_ACTIVITY_MASK) {
         /* IRQ already active */
         DPRINTF("%s: IRQ %d is already active\n", __func__, n_IRQ);
         return;
@@ -367,18 +370,19 @@ static void openpic_update_irq(openpic_t *opp, int n_IRQ)
     if (src->ide == (1 << src->last_cpu)) {
         /* Only one CPU is allowed to receive this IRQ */
         IRQ_local_pipe(opp, src->last_cpu, n_IRQ);
-    } else if (!test_bit(&src->ipvp, IPVP_MODE)) {
+    } else if (!(src->ipvp & IPVP_MODE_MASK)) {
         /* Directed delivery mode */
         for (i = 0; i < opp->nb_cpus; i++) {
-            if (test_bit(&src->ide, i))
+            if (src->ide & (1 << i)) {
                 IRQ_local_pipe(opp, i, n_IRQ);
+            }
         }
     } else {
         /* Distributed delivery mode */
         for (i = src->last_cpu + 1; i != src->last_cpu; i++) {
             if (i == opp->nb_cpus)
                 i = 0;
-            if (test_bit(&src->ide, i)) {
+            if (src->ide & (1 << i)) {
                 IRQ_local_pipe(opp, i, n_IRQ);
                 src->last_cpu = i;
                 break;
@@ -395,11 +399,12 @@ static void openpic_set_irq(void *opaque, int n_IRQ, int level)
     src = &opp->src[n_IRQ];
     DPRINTF("openpic: set irq %d = %d ipvp=%08x\n",
             n_IRQ, level, src->ipvp);
-    if (test_bit(&src->ipvp, IPVP_SENSE)) {
+    if (src->ipvp & IPVP_SENSE_MASK) {
         /* level-sensitive irq */
         src->pending = level;
-        if (!level)
-            reset_bit(&src->ipvp, IPVP_ACTIVITY);
+        if (!level) {
+            src->ipvp &= ~IPVP_ACTIVITY_MASK;
+        }
     } else {
         /* edge-sensitive irq */
         if (level)
@@ -810,13 +815,13 @@ static uint32_t openpic_cpu_read_internal(void *opaque, hwaddr addr,
             retval = IPVP_VECTOR(opp->spve);
         } else {
             src = &opp->src[n_IRQ];
-            if (!test_bit(&src->ipvp, IPVP_ACTIVITY) ||
+            if (!(src->ipvp & IPVP_ACTIVITY_MASK) ||
                 !(IPVP_PRIORITY(src->ipvp) > dst->pctp)) {
                 /* - Spurious level-sensitive IRQ
                  * - Priorities has been changed
                  *   and the pending IRQ isn't allowed anymore
                  */
-                reset_bit(&src->ipvp, IPVP_ACTIVITY);
+                src->ipvp &= ~IPVP_ACTIVITY_MASK;
                 retval = IPVP_VECTOR(opp->spve);
             } else {
                 /* IRQ enter servicing state */
@@ -825,20 +830,20 @@ static uint32_t openpic_cpu_read_internal(void *opaque, hwaddr addr,
             }
             IRQ_resetbit(&dst->raised, n_IRQ);
             dst->raised.next = -1;
-            if (!test_bit(&src->ipvp, IPVP_SENSE)) {
+            if (!(src->ipvp & IPVP_SENSE_MASK)) {
                 /* edge-sensitive IRQ */
-                reset_bit(&src->ipvp, IPVP_ACTIVITY);
+                src->ipvp &= ~IPVP_ACTIVITY_MASK;
                 src->pending = 0;
             }
 
             if ((n_IRQ >= opp->irq_ipi0) &&  (n_IRQ < (opp->irq_ipi0 + MAX_IPI))) {
                 src->ide &= ~(1 << idx);
-                if (src->ide && !test_bit(&src->ipvp, IPVP_SENSE)) {
+                if (src->ide && !(src->ipvp & IPVP_SENSE_MASK)) {
                     /* trigger on CPUs that didn't know about it yet */
                     openpic_set_irq(opp, n_IRQ, 1);
                     openpic_set_irq(opp, n_IRQ, 0);
                     /* if all CPUs knew about it, set active bit again */
-                    set_bit(&src->ipvp, IPVP_ACTIVITY);
+                    src->ipvp |= IPVP_ACTIVITY_MASK;
                 }
             }
         }
@@ -1036,9 +1041,9 @@ static int openpic_load(QEMUFile* f, void *opaque, int version_id)
 
 static void openpic_irq_raise(openpic_t *opp, int n_CPU, IRQ_src_t *src)
 {
-    int n_ci = IDR_CI0 - n_CPU;
+    int n_ci = IDR_CI0_SHIFT - n_CPU;
 
-    if ((opp->flags & OPENPIC_FLAG_IDE_CRIT) && test_bit(&src->ide, n_ci)) {
+    if ((opp->flags & OPENPIC_FLAG_IDE_CRIT) && (src->ide & (1 << n_ci))) {
         qemu_irq_raise(opp->dst[n_CPU].irqs[OPENPIC_OUTPUT_CINT]);
     } else {
         qemu_irq_raise(opp->dst[n_CPU].irqs[OPENPIC_OUTPUT_INT]);
commit e1d10851522d7262a266f95d54c48eb2b1d8eb9b
Author: Alexander Graf <agraf at suse.de>
Date:   Sat Dec 8 01:25:21 2012 +0100

    openpic: remove unused type variable
    
    The openpic source irqs are carrying around a type indicator that
    is never accessed by anything. Remove it.
    
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/openpic.c b/hw/openpic.c
index 623c807..91e87b7 100644
--- a/hw/openpic.c
+++ b/hw/openpic.c
@@ -168,13 +168,6 @@ static uint32_t openpic_cpu_read_internal(void *opaque, hwaddr addr,
 static void openpic_cpu_write_internal(void *opaque, hwaddr addr,
                                        uint32_t val, int idx);
 
-enum {
-    IRQ_EXTERNAL = 0x01,
-    IRQ_INTERNAL = 0x02,
-    IRQ_TIMER    = 0x04,
-    IRQ_SPECIAL  = 0x08,
-};
-
 typedef struct IRQ_queue_t {
     uint32_t queue[BF_WIDTH(MAX_IRQ)];
     int next;
@@ -184,7 +177,6 @@ typedef struct IRQ_queue_t {
 typedef struct IRQ_src_t {
     uint32_t ipvp;  /* IRQ vector/priority register */
     uint32_t ide;   /* IRQ destination register */
-    int type;
     int last_cpu;
     int pending;    /* TRUE if IRQ is pending */
 } IRQ_src_t;
@@ -972,7 +964,6 @@ static void openpic_save(QEMUFile* f, void *opaque)
     for (i = 0; i < opp->max_irq; i++) {
         qemu_put_be32s(f, &opp->src[i].ipvp);
         qemu_put_be32s(f, &opp->src[i].ide);
-        qemu_put_sbe32s(f, &opp->src[i].type);
         qemu_put_sbe32s(f, &opp->src[i].last_cpu);
         qemu_put_sbe32s(f, &opp->src[i].pending);
     }
@@ -1022,7 +1013,6 @@ static int openpic_load(QEMUFile* f, void *opaque, int version_id)
     for (i = 0; i < opp->max_irq; i++) {
         qemu_get_be32s(f, &opp->src[i].ipvp);
         qemu_get_be32s(f, &opp->src[i].ide);
-        qemu_get_sbe32s(f, &opp->src[i].type);
         qemu_get_sbe32s(f, &opp->src[i].last_cpu);
         qemu_get_sbe32s(f, &opp->src[i].pending);
     }
@@ -1059,7 +1049,7 @@ qemu_irq *openpic_init (MemoryRegion **pmem, int nb_cpus,
                         qemu_irq **irqs, qemu_irq irq_out)
 {
     openpic_t *opp;
-    int i, m;
+    int i;
     struct {
         const char             *name;
         MemoryRegionOps const  *ops;
@@ -1102,20 +1092,7 @@ qemu_irq *openpic_init (MemoryRegion **pmem, int nb_cpus,
     opp->max_irq = OPENPIC_MAX_IRQ;
     opp->irq_ipi0 = OPENPIC_IRQ_IPI0;
     opp->irq_tim0 = OPENPIC_IRQ_TIM0;
-    /* Set IRQ types */
-    for (i = 0; i < OPENPIC_EXT_IRQ; i++) {
-        opp->src[i].type = IRQ_EXTERNAL;
-    }
-    for (; i < OPENPIC_IRQ_TIM0; i++) {
-        opp->src[i].type = IRQ_SPECIAL;
-    }
-    m = OPENPIC_IRQ_IPI0;
-    for (; i < m; i++) {
-        opp->src[i].type = IRQ_TIMER;
-    }
-    for (; i < OPENPIC_MAX_IRQ; i++) {
-        opp->src[i].type = IRQ_INTERNAL;
-    }
+
     for (i = 0; i < nb_cpus; i++)
         opp->dst[i].irqs = irqs[i];
     opp->irq_out = irq_out;
commit 35732cb41e8f8608bb0fd6ae023daee56d439bf1
Author: Alexander Graf <agraf at suse.de>
Date:   Sat Dec 8 01:04:48 2012 +0100

    openpic: unify memory api subregions
    
    The only difference between the "openpic" and "mpic" memory api subregion
    descriptors is the endianness. Unify them as openpic accessors with explicit
    endianness markers in their names.
    
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/openpic.c b/hw/openpic.c
index e94529b..623c807 100644
--- a/hw/openpic.c
+++ b/hw/openpic.c
@@ -867,7 +867,7 @@ static uint64_t openpic_cpu_read(void *opaque, hwaddr addr, unsigned len)
     return openpic_cpu_read_internal(opaque, addr, (addr & 0x1f000) >> 12);
 }
 
-static const MemoryRegionOps openpic_glb_ops = {
+static const MemoryRegionOps openpic_glb_ops_le = {
     .write = openpic_gbl_write,
     .read  = openpic_gbl_read,
     .endianness = DEVICE_LITTLE_ENDIAN,
@@ -877,7 +877,17 @@ static const MemoryRegionOps openpic_glb_ops = {
     },
 };
 
-static const MemoryRegionOps openpic_tmr_ops = {
+static const MemoryRegionOps openpic_glb_ops_be = {
+    .write = openpic_gbl_write,
+    .read  = openpic_gbl_read,
+    .endianness = DEVICE_BIG_ENDIAN,
+    .impl = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
+};
+
+static const MemoryRegionOps openpic_tmr_ops_le = {
     .write = openpic_timer_write,
     .read  = openpic_timer_read,
     .endianness = DEVICE_LITTLE_ENDIAN,
@@ -887,7 +897,17 @@ static const MemoryRegionOps openpic_tmr_ops = {
     },
 };
 
-static const MemoryRegionOps openpic_cpu_ops = {
+static const MemoryRegionOps openpic_tmr_ops_be = {
+    .write = openpic_timer_write,
+    .read  = openpic_timer_read,
+    .endianness = DEVICE_BIG_ENDIAN,
+    .impl = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
+};
+
+static const MemoryRegionOps openpic_cpu_ops_le = {
     .write = openpic_cpu_write,
     .read  = openpic_cpu_read,
     .endianness = DEVICE_LITTLE_ENDIAN,
@@ -897,7 +917,17 @@ static const MemoryRegionOps openpic_cpu_ops = {
     },
 };
 
-static const MemoryRegionOps openpic_src_ops = {
+static const MemoryRegionOps openpic_cpu_ops_be = {
+    .write = openpic_cpu_write,
+    .read  = openpic_cpu_read,
+    .endianness = DEVICE_BIG_ENDIAN,
+    .impl = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
+};
+
+static const MemoryRegionOps openpic_src_ops_le = {
     .write = openpic_src_write,
     .read  = openpic_src_read,
     .endianness = DEVICE_LITTLE_ENDIAN,
@@ -907,6 +937,16 @@ static const MemoryRegionOps openpic_src_ops = {
     },
 };
 
+static const MemoryRegionOps openpic_src_ops_be = {
+    .write = openpic_src_write,
+    .read  = openpic_src_read,
+    .endianness = DEVICE_BIG_ENDIAN,
+    .impl = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
+};
+
 static void openpic_save_IRQ_queue(QEMUFile* f, IRQ_queue_t *q)
 {
     unsigned int i;
@@ -1026,10 +1066,14 @@ qemu_irq *openpic_init (MemoryRegion **pmem, int nb_cpus,
         hwaddr      start_addr;
         ram_addr_t              size;
     } const list[] = {
-        {"glb", &openpic_glb_ops, OPENPIC_GLB_REG_START, OPENPIC_GLB_REG_SIZE},
-        {"tmr", &openpic_tmr_ops, OPENPIC_TMR_REG_START, OPENPIC_TMR_REG_SIZE},
-        {"src", &openpic_src_ops, OPENPIC_SRC_REG_START, OPENPIC_SRC_REG_SIZE},
-        {"cpu", &openpic_cpu_ops, OPENPIC_CPU_REG_START, OPENPIC_CPU_REG_SIZE},
+        {"glb", &openpic_glb_ops_le, OPENPIC_GLB_REG_START,
+                                     OPENPIC_GLB_REG_SIZE},
+        {"tmr", &openpic_tmr_ops_le, OPENPIC_TMR_REG_START,
+                                     OPENPIC_TMR_REG_SIZE},
+        {"src", &openpic_src_ops_le, OPENPIC_SRC_REG_START,
+                                     OPENPIC_SRC_REG_SIZE},
+        {"cpu", &openpic_cpu_ops_le, OPENPIC_CPU_REG_START,
+                                     OPENPIC_CPU_REG_SIZE},
     };
 
     /* XXX: for now, only one CPU is supported */
@@ -1086,46 +1130,6 @@ qemu_irq *openpic_init (MemoryRegion **pmem, int nb_cpus,
     return qemu_allocate_irqs(openpic_set_irq, opp, opp->max_irq);
 }
 
-static const MemoryRegionOps mpic_glb_ops = {
-    .write = openpic_gbl_write,
-    .read  = openpic_gbl_read,
-    .endianness = DEVICE_BIG_ENDIAN,
-    .impl = {
-        .min_access_size = 4,
-        .max_access_size = 4,
-    },
-};
-
-static const MemoryRegionOps mpic_tmr_ops = {
-    .write = openpic_timer_write,
-    .read  = openpic_timer_read,
-    .endianness = DEVICE_BIG_ENDIAN,
-    .impl = {
-        .min_access_size = 4,
-        .max_access_size = 4,
-    },
-};
-
-static const MemoryRegionOps mpic_cpu_ops = {
-    .write = openpic_cpu_write,
-    .read  = openpic_cpu_read,
-    .endianness = DEVICE_BIG_ENDIAN,
-    .impl = {
-        .min_access_size = 4,
-        .max_access_size = 4,
-    },
-};
-
-static const MemoryRegionOps mpic_irq_ops = {
-    .write = openpic_src_write,
-    .read  = openpic_src_read,
-    .endianness = DEVICE_BIG_ENDIAN,
-    .impl = {
-        .min_access_size = 4,
-        .max_access_size = 4,
-    },
-};
-
 qemu_irq *mpic_init (MemoryRegion *address_space, hwaddr base,
                      int nb_cpus, qemu_irq **irqs, qemu_irq irq_out)
 {
@@ -1137,10 +1141,10 @@ qemu_irq *mpic_init (MemoryRegion *address_space, hwaddr base,
         hwaddr      start_addr;
         ram_addr_t              size;
     } const list[] = {
-        {"glb", &mpic_glb_ops, MPIC_GLB_REG_START, MPIC_GLB_REG_SIZE},
-        {"tmr", &mpic_tmr_ops, MPIC_TMR_REG_START, MPIC_TMR_REG_SIZE},
-        {"src", &mpic_irq_ops, MPIC_SRC_REG_START, MPIC_SRC_REG_SIZE},
-        {"cpu", &mpic_cpu_ops, MPIC_CPU_REG_START, MPIC_CPU_REG_SIZE},
+        {"glb", &openpic_glb_ops_be, MPIC_GLB_REG_START, MPIC_GLB_REG_SIZE},
+        {"tmr", &openpic_tmr_ops_be, MPIC_TMR_REG_START, MPIC_TMR_REG_SIZE},
+        {"src", &openpic_src_ops_be, MPIC_SRC_REG_START, MPIC_SRC_REG_SIZE},
+        {"cpu", &openpic_cpu_ops_be, MPIC_CPU_REG_START, MPIC_CPU_REG_SIZE},
     };
 
     mpp = g_malloc0(sizeof(openpic_t));
commit 825463b38414c9afb657caee1ce20eff2d521317
Author: Alexander Graf <agraf at suse.de>
Date:   Sat Dec 8 00:58:54 2012 +0100

    openpic: combine openpic and mpic reset functions
    
    The openpic and mpic reset handlers are almost identical. Combine
    them and extract the differences into state variables.
    
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/openpic.c b/hw/openpic.c
index 80016a2..e94529b 100644
--- a/hw/openpic.c
+++ b/hw/openpic.c
@@ -53,7 +53,6 @@
 #define MAX_IPI     4
 #define MAX_IRQ     (MAX_SRC + MAX_IPI + MAX_TMR)
 #define VID         0x03 /* MPIC version ID */
-#define VENI        0x00000000 /* Vendor ID */
 
 enum {
     IRQ_IPVP = 0,
@@ -125,6 +124,14 @@ enum {
 #define FSL_BRR1_IPMJ (0x00 << 8) /* 8 bit IP major number */
 #define FSL_BRR1_IPMN 0x00 /* 8 bit IP minor number */
 
+#define FREP_NIRQ_SHIFT   16
+#define FREP_NCPU_SHIFT    8
+#define FREP_VID_SHIFT     0
+
+#define VID_REVISION_1_2   2
+
+#define VENI_GENERIC      0x00000000 /* Generic Vendor ID */
+
 enum mpic_ide_bits {
     IDR_EP     = 31,
     IDR_CI0     = 30,
@@ -208,6 +215,13 @@ typedef struct openpic_t {
 
     /* Behavior control */
     uint32_t flags;
+    uint32_t nb_irqs;
+    uint32_t vid;
+    uint32_t veni; /* Vendor identification register */
+    uint32_t spve_mask;
+    uint32_t tifr_reset;
+    uint32_t ipvp_reset;
+    uint32_t ide_reset;
 
     /* Sub-regions */
     MemoryRegion sub_io_mem[7];
@@ -215,8 +229,6 @@ typedef struct openpic_t {
     /* Global registers */
     uint32_t frep; /* Feature reporting register */
     uint32_t glbc; /* Global configuration register  */
-    uint32_t micr; /* MPIC interrupt configuration register */
-    uint32_t veni; /* Vendor identification register */
     uint32_t pint; /* Processor initialization register */
     uint32_t spve; /* Spurious vector register */
     uint32_t tifr; /* Timer frequency reporting register */
@@ -235,7 +247,6 @@ typedef struct openpic_t {
     int max_irq;
     int irq_ipi0;
     int irq_tim0;
-    void (*reset) (void *);
 } openpic_t;
 
 static void openpic_irq_raise(openpic_t *opp, int n_CPU, IRQ_src_t *src);
@@ -412,17 +423,17 @@ static void openpic_reset (void *opaque)
 
     opp->glbc = 0x80000000;
     /* Initialise controller registers */
-    opp->frep = ((OPENPIC_EXT_IRQ - 1) << 16) | ((MAX_CPU - 1) << 8) | VID;
-    opp->veni = VENI;
+    opp->frep = ((opp->nb_irqs -1) << FREP_NIRQ_SHIFT) |
+                ((opp->nb_cpus -1) << FREP_NCPU_SHIFT) |
+                (opp->vid << FREP_VID_SHIFT);
+
     opp->pint = 0x00000000;
-    opp->spve = 0x000000FF;
-    opp->tifr = 0x003F7A00;
-    /* ? */
-    opp->micr = 0x00000000;
+    opp->spve = -1 & opp->spve_mask;
+    opp->tifr = opp->tifr_reset;
     /* Initialise IRQ sources */
     for (i = 0; i < opp->max_irq; i++) {
-        opp->src[i].ipvp = 0xA0000000;
-        opp->src[i].ide  = 0x00000000;
+        opp->src[i].ipvp = opp->ipvp_reset;
+        opp->src[i].ide  = opp->ide_reset;
     }
     /* Initialise IRQ destinations */
     for (i = 0; i < MAX_CPU; i++) {
@@ -499,9 +510,9 @@ static void openpic_gbl_write(void *opaque, hwaddr addr, uint64_t val,
     case 0x1000: /* FREP */
         break;
     case 0x1020: /* GLBC */
-        if (val & 0x80000000 && opp->reset)
-            opp->reset(opp);
-        opp->glbc = val & ~0x80000000;
+        if (val & 0x80000000) {
+            openpic_reset(opp);
+        }
         break;
     case 0x1080: /* VENI */
         break;
@@ -530,7 +541,7 @@ static void openpic_gbl_write(void *opaque, hwaddr addr, uint64_t val,
         }
         break;
     case 0x10E0: /* SPVE */
-        opp->spve = val & 0x000000FF;
+        opp->spve = val & opp->spve_mask;
         break;
     default:
         break;
@@ -912,9 +923,7 @@ static void openpic_save(QEMUFile* f, void *opaque)
     openpic_t *opp = (openpic_t *)opaque;
     unsigned int i;
 
-    qemu_put_be32s(f, &opp->frep);
     qemu_put_be32s(f, &opp->glbc);
-    qemu_put_be32s(f, &opp->micr);
     qemu_put_be32s(f, &opp->veni);
     qemu_put_be32s(f, &opp->pint);
     qemu_put_be32s(f, &opp->spve);
@@ -964,9 +973,7 @@ static int openpic_load(QEMUFile* f, void *opaque, int version_id)
     if (version_id != 1)
         return -EINVAL;
 
-    qemu_get_be32s(f, &opp->frep);
     qemu_get_be32s(f, &opp->glbc);
-    qemu_get_be32s(f, &opp->micr);
     qemu_get_be32s(f, &opp->veni);
     qemu_get_be32s(f, &opp->pint);
     qemu_get_be32s(f, &opp->spve);
@@ -1043,6 +1050,11 @@ qemu_irq *openpic_init (MemoryRegion **pmem, int nb_cpus,
 
     //    isu_base &= 0xFFFC0000;
     opp->nb_cpus = nb_cpus;
+    opp->nb_irqs = OPENPIC_EXT_IRQ;
+    opp->vid = VID;
+    opp->veni = VENI_GENERIC;
+    opp->spve_mask = 0xFF;
+    opp->tifr_reset = 0x003F7A00;
     opp->max_irq = OPENPIC_MAX_IRQ;
     opp->irq_ipi0 = OPENPIC_IRQ_IPI0;
     opp->irq_tim0 = OPENPIC_IRQ_TIM0;
@@ -1068,51 +1080,12 @@ qemu_irq *openpic_init (MemoryRegion **pmem, int nb_cpus,
                     openpic_save, openpic_load, opp);
     qemu_register_reset(openpic_reset, opp);
 
-    opp->reset = openpic_reset;
-
     if (pmem)
         *pmem = &opp->mem;
 
     return qemu_allocate_irqs(openpic_set_irq, opp, opp->max_irq);
 }
 
-static void mpic_reset (void *opaque)
-{
-    openpic_t *mpp = (openpic_t *)opaque;
-    int i;
-
-    mpp->glbc = 0x80000000;
-    /* Initialise controller registers */
-    mpp->frep = 0x004f0002 | ((mpp->nb_cpus - 1) << 8);
-    mpp->veni = VENI;
-    mpp->pint = 0x00000000;
-    mpp->spve = 0x0000FFFF;
-    /* Initialise IRQ sources */
-    for (i = 0; i < mpp->max_irq; i++) {
-        mpp->src[i].ipvp = 0x80800000;
-        mpp->src[i].ide  = 0x00000001;
-    }
-    /* Set IDE for IPIs to 0 so we don't get spurious interrupts */
-    for (i = mpp->irq_ipi0; i < (mpp->irq_ipi0 + MAX_IPI); i++) {
-        mpp->src[i].ide = 0;
-    }
-    /* Initialise IRQ destinations */
-    for (i = 0; i < MAX_CPU; i++) {
-        mpp->dst[i].pctp      = 0x0000000F;
-        memset(&mpp->dst[i].raised, 0, sizeof(IRQ_queue_t));
-        mpp->dst[i].raised.next = -1;
-        memset(&mpp->dst[i].servicing, 0, sizeof(IRQ_queue_t));
-        mpp->dst[i].servicing.next = -1;
-    }
-    /* Initialise timers */
-    for (i = 0; i < MAX_TMR; i++) {
-        mpp->timers[i].ticc = 0x00000000;
-        mpp->timers[i].tibc = 0x80000000;
-    }
-    /* Go out of RESET state */
-    mpp->glbc = 0x00000000;
-}
-
 static const MemoryRegionOps mpic_glb_ops = {
     .write = openpic_gbl_write,
     .read  = openpic_gbl_read,
@@ -1185,6 +1158,15 @@ qemu_irq *mpic_init (MemoryRegion *address_space, hwaddr base,
     }
 
     mpp->nb_cpus = nb_cpus;
+    /* 12 external sources, 48 internal sources , 4 timer sources,
+       4 IPI sources, 4 messaging sources, and 8 Shared MSI sources */
+    mpp->nb_irqs = 80;
+    mpp->vid = VID_REVISION_1_2;
+    mpp->veni = VENI_GENERIC;
+    mpp->spve_mask = 0xFFFF;
+    mpp->tifr_reset = 0x00000000;
+    mpp->ipvp_reset = 0x80000000;
+    mpp->ide_reset = 0x00000001;
     mpp->max_irq = MPIC_MAX_IRQ;
     mpp->irq_ipi0 = MPIC_IPI_IRQ;
     mpp->irq_tim0 = MPIC_TMR_IRQ;
@@ -1195,10 +1177,9 @@ qemu_irq *mpic_init (MemoryRegion *address_space, hwaddr base,
 
     /* Enable critical interrupt support */
     mpp->flags |= OPENPIC_FLAG_IDE_CRIT;
-    mpp->reset = mpic_reset;
 
     register_savevm(NULL, "mpic", 0, 2, openpic_save, openpic_load, mpp);
-    qemu_register_reset(mpic_reset, mpp);
+    qemu_register_reset(openpic_reset, mpp);
 
     return qemu_allocate_irqs(openpic_set_irq, mpp, mpp->max_irq);
 }
commit c38c0b8ad0551e470984f2ae7e8e54aae304ed4b
Author: Alexander Graf <agraf at suse.de>
Date:   Sat Dec 8 00:43:42 2012 +0100

    openpic: merge mpic and openpic timer handling
    
    The openpic and mpic timer handling code is basically the same.
    Merge them.
    
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/openpic.c b/hw/openpic.c
index d709e36..80016a2 100644
--- a/hw/openpic.c
+++ b/hw/openpic.c
@@ -195,7 +195,6 @@ enum IPVP_bits {
 #define IPVP_VECTOR(_ipvpr_)   ((_ipvpr_) & IPVP_VECTOR_MASK)
 
 typedef struct IRQ_dst_t {
-    uint32_t tfrr;
     uint32_t pctp; /* CPU current task priority */
     uint32_t pcsr; /* CPU sensitivity register */
     IRQ_queue_t raised;
@@ -533,9 +532,6 @@ static void openpic_gbl_write(void *opaque, hwaddr addr, uint64_t val,
     case 0x10E0: /* SPVE */
         opp->spve = val & 0x000000FF;
         break;
-    case 0x10F0: /* TIFR */
-        opp->tifr = val;
-        break;
     default:
         break;
     }
@@ -587,9 +583,6 @@ static uint64_t openpic_gbl_read(void *opaque, hwaddr addr, unsigned len)
     case 0x10E0: /* SPVE */
         retval = opp->spve;
         break;
-    case 0x10F0: /* TIFR */
-        retval = opp->tifr;
-        break;
     default:
         break;
     }
@@ -607,24 +600,28 @@ static void openpic_timer_write(void *opaque, hwaddr addr, uint64_t val,
     DPRINTF("%s: addr %08x <= %08x\n", __func__, addr, val);
     if (addr & 0xF)
         return;
-    addr -= 0x10;
-    addr &= 0xFFFF;
-    idx = (addr & 0xFFF0) >> 6;
+    idx = (addr >> 6) & 0x3;
     addr = addr & 0x30;
-    switch (addr) {
-    case 0x00: /* TICC */
+
+    if (addr == 0x0) {
+        /* TIFR (TFRR) */
+        opp->tifr = val;
+        return;
+    }
+    switch (addr & 0x30) {
+    case 0x00: /* TICC (GTCCR) */
         break;
-    case 0x10: /* TIBC */
+    case 0x10: /* TIBC (GTBCR) */
         if ((opp->timers[idx].ticc & 0x80000000) != 0 &&
             (val & 0x80000000) == 0 &&
             (opp->timers[idx].tibc & 0x80000000) != 0)
             opp->timers[idx].ticc &= ~0x80000000;
         opp->timers[idx].tibc = val;
         break;
-    case 0x20: /* TIVP */
+    case 0x20: /* TIVP (GTIVPR) */
         write_IRQreg_ipvp(opp, opp->irq_tim0 + idx, val);
         break;
-    case 0x30: /* TIDE */
+    case 0x30: /* TIDE (GTIDR) */
         write_IRQreg_ide(opp, opp->irq_tim0 + idx, val);
         break;
     }
@@ -633,31 +630,35 @@ static void openpic_timer_write(void *opaque, hwaddr addr, uint64_t val,
 static uint64_t openpic_timer_read(void *opaque, hwaddr addr, unsigned len)
 {
     openpic_t *opp = opaque;
-    uint32_t retval;
+    uint32_t retval = -1;
     int idx;
 
     DPRINTF("%s: addr %08x\n", __func__, addr);
-    retval = 0xFFFFFFFF;
-    if (addr & 0xF)
-        return retval;
-    addr -= 0x10;
-    addr &= 0xFFFF;
-    idx = (addr & 0xFFF0) >> 6;
-    addr = addr & 0x30;
-    switch (addr) {
-    case 0x00: /* TICC */
+    if (addr & 0xF) {
+        goto out;
+    }
+    idx = (addr >> 6) & 0x3;
+    if (addr == 0x0) {
+        /* TIFR (TFRR) */
+        retval = opp->tifr;
+        goto out;
+    }
+    switch (addr & 0x30) {
+    case 0x00: /* TICC (GTCCR) */
         retval = opp->timers[idx].ticc;
         break;
-    case 0x10: /* TIBC */
+    case 0x10: /* TIBC (GTBCR) */
         retval = opp->timers[idx].tibc;
         break;
-    case 0x20: /* TIPV */
+    case 0x20: /* TIPV (TIPV) */
         retval = read_IRQreg_ipvp(opp, opp->irq_tim0 + idx);
         break;
-    case 0x30: /* TIDE */
+    case 0x30: /* TIDE (TIDR) */
         retval = read_IRQreg_ide(opp, opp->irq_tim0 + idx);
         break;
     }
+
+out:
     DPRINTF("%s: => %08x\n", __func__, retval);
 
     return retval;
@@ -930,7 +931,6 @@ static void openpic_save(QEMUFile* f, void *opaque)
     qemu_put_sbe32s(f, &opp->nb_cpus);
 
     for (i = 0; i < opp->nb_cpus; i++) {
-        qemu_put_be32s(f, &opp->dst[i].tfrr);
         qemu_put_be32s(f, &opp->dst[i].pctp);
         qemu_put_be32s(f, &opp->dst[i].pcsr);
         openpic_save_IRQ_queue(f, &opp->dst[i].raised);
@@ -983,7 +983,6 @@ static int openpic_load(QEMUFile* f, void *opaque, int version_id)
     qemu_get_sbe32s(f, &opp->nb_cpus);
 
     for (i = 0; i < opp->nb_cpus; i++) {
-        qemu_get_be32s(f, &opp->dst[i].tfrr);
         qemu_get_be32s(f, &opp->dst[i].pctp);
         qemu_get_be32s(f, &opp->dst[i].pcsr);
         openpic_load_IRQ_queue(f, &opp->dst[i].raised);
@@ -1100,7 +1099,6 @@ static void mpic_reset (void *opaque)
     /* Initialise IRQ destinations */
     for (i = 0; i < MAX_CPU; i++) {
         mpp->dst[i].pctp      = 0x0000000F;
-        mpp->dst[i].tfrr      = 0x00000000;
         memset(&mpp->dst[i].raised, 0, sizeof(IRQ_queue_t));
         mpp->dst[i].raised.next = -1;
         memset(&mpp->dst[i].servicing, 0, sizeof(IRQ_queue_t));
@@ -1115,73 +1113,6 @@ static void mpic_reset (void *opaque)
     mpp->glbc = 0x00000000;
 }
 
-static void mpic_timer_write(void *opaque, hwaddr addr, uint64_t val,
-                             unsigned len)
-{
-    openpic_t *mpp = opaque;
-    int idx, cpu;
-
-    DPRINTF("%s: addr " TARGET_FMT_plx " <= %08x\n", __func__, addr, val);
-    if (addr & 0xF)
-        return;
-    cpu = addr >> 12;
-    idx = (addr >> 6) & 0x3;
-    switch (addr & 0x30) {
-    case 0x00: /* gtccr */
-        break;
-    case 0x10: /* gtbcr */
-        if ((mpp->timers[idx].ticc & 0x80000000) != 0 &&
-            (val & 0x80000000) == 0 &&
-            (mpp->timers[idx].tibc & 0x80000000) != 0)
-            mpp->timers[idx].ticc &= ~0x80000000;
-        mpp->timers[idx].tibc = val;
-        break;
-    case 0x20: /* GTIVPR */
-        write_IRQreg_ipvp(mpp, MPIC_TMR_IRQ + idx, val);
-        break;
-    case 0x30: /* GTIDR & TFRR */
-        if ((addr & 0xF0) == 0xF0)
-            mpp->dst[cpu].tfrr = val;
-        else
-            write_IRQreg_ide(mpp, MPIC_TMR_IRQ + idx, val);
-        break;
-    }
-}
-
-static uint64_t mpic_timer_read(void *opaque, hwaddr addr, unsigned len)
-{
-    openpic_t *mpp = opaque;
-    uint32_t retval;
-    int idx, cpu;
-
-    DPRINTF("%s: addr " TARGET_FMT_plx "\n", __func__, addr);
-    retval = 0xFFFFFFFF;
-    if (addr & 0xF)
-        return retval;
-    cpu = addr >> 12;
-    idx = (addr >> 6) & 0x3;
-    switch (addr & 0x30) {
-    case 0x00: /* gtccr */
-        retval = mpp->timers[idx].ticc;
-        break;
-    case 0x10: /* gtbcr */
-        retval = mpp->timers[idx].tibc;
-        break;
-    case 0x20: /* TIPV */
-        retval = read_IRQreg_ipvp(mpp, MPIC_TMR_IRQ + idx);
-        break;
-    case 0x30: /* TIDR */
-        if ((addr &0xF0) == 0XF0)
-            retval = mpp->dst[cpu].tfrr;
-        else
-            retval = read_IRQreg_ide(mpp, MPIC_TMR_IRQ + idx);
-        break;
-    }
-    DPRINTF("%s: => %08x\n", __func__, retval);
-
-    return retval;
-}
-
 static const MemoryRegionOps mpic_glb_ops = {
     .write = openpic_gbl_write,
     .read  = openpic_gbl_read,
@@ -1193,8 +1124,8 @@ static const MemoryRegionOps mpic_glb_ops = {
 };
 
 static const MemoryRegionOps mpic_tmr_ops = {
-    .write = mpic_timer_write,
-    .read  = mpic_timer_read,
+    .write = openpic_timer_write,
+    .read  = openpic_timer_read,
     .endianness = DEVICE_BIG_ENDIAN,
     .impl = {
         .min_access_size = 4,
commit 5861a33898bbddfd1a80c2e202cb9352e3b1ba62
Author: Alexander Graf <agraf at suse.de>
Date:   Fri Dec 7 23:51:09 2012 +0100

    openpic: combine mpic and openpic irq raise functions
    
    The IRQ raise mechanisms of the OpenPIC and MPIC controllers is identical,
    just that the MPIC one can also raise critical interrupts.
    
    Combine those two and check for critical raise capability during runtime.
    
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/openpic.c b/hw/openpic.c
index 2a3b56a..d709e36 100644
--- a/hw/openpic.c
+++ b/hw/openpic.c
@@ -207,6 +207,9 @@ typedef struct openpic_t {
     PCIDevice pci_dev;
     MemoryRegion mem;
 
+    /* Behavior control */
+    uint32_t flags;
+
     /* Sub-regions */
     MemoryRegion sub_io_mem[7];
 
@@ -234,9 +237,10 @@ typedef struct openpic_t {
     int irq_ipi0;
     int irq_tim0;
     void (*reset) (void *);
-    void (*irq_raise) (struct openpic_t *, int, IRQ_src_t *);
 } openpic_t;
 
+static void openpic_irq_raise(openpic_t *opp, int n_CPU, IRQ_src_t *src);
+
 static inline void IRQ_setbit (IRQ_queue_t *q, int n_IRQ)
 {
     set_bit(q->queue, n_IRQ);
@@ -321,7 +325,7 @@ static void IRQ_local_pipe (openpic_t *opp, int n_CPU, int n_IRQ)
         return;
     }
     DPRINTF("Raise OpenPIC INT output cpu %d irq %d\n", n_CPU, n_IRQ);
-    opp->irq_raise(opp, n_CPU, src);
+    openpic_irq_raise(opp, n_CPU, src);
 }
 
 /* update pic state because registers for n_IRQ have changed value */
@@ -753,7 +757,7 @@ static void openpic_cpu_write_internal(void *opaque, hwaddr addr,
              IPVP_PRIORITY(src->ipvp) > dst->servicing.priority)) {
             DPRINTF("Raise OpenPIC INT output cpu %d irq %d\n",
                     idx, n_IRQ);
-            opp->irq_raise(opp, idx, src);
+            openpic_irq_raise(opp, idx, src);
         }
         break;
     default:
@@ -996,7 +1000,13 @@ static int openpic_load(QEMUFile* f, void *opaque, int version_id)
 
 static void openpic_irq_raise(openpic_t *opp, int n_CPU, IRQ_src_t *src)
 {
-    qemu_irq_raise(opp->dst[n_CPU].irqs[OPENPIC_OUTPUT_INT]);
+    int n_ci = IDR_CI0 - n_CPU;
+
+    if ((opp->flags & OPENPIC_FLAG_IDE_CRIT) && test_bit(&src->ide, n_ci)) {
+        qemu_irq_raise(opp->dst[n_CPU].irqs[OPENPIC_OUTPUT_CINT]);
+    } else {
+        qemu_irq_raise(opp->dst[n_CPU].irqs[OPENPIC_OUTPUT_INT]);
+    }
 }
 
 qemu_irq *openpic_init (MemoryRegion **pmem, int nb_cpus,
@@ -1059,7 +1069,6 @@ qemu_irq *openpic_init (MemoryRegion **pmem, int nb_cpus,
                     openpic_save, openpic_load, opp);
     qemu_register_reset(openpic_reset, opp);
 
-    opp->irq_raise = openpic_irq_raise;
     opp->reset = openpic_reset;
 
     if (pmem)
@@ -1068,18 +1077,6 @@ qemu_irq *openpic_init (MemoryRegion **pmem, int nb_cpus,
     return qemu_allocate_irqs(openpic_set_irq, opp, opp->max_irq);
 }
 
-static void mpic_irq_raise(openpic_t *mpp, int n_CPU, IRQ_src_t *src)
-{
-    int n_ci = IDR_CI0 - n_CPU;
-
-    if(test_bit(&src->ide, n_ci)) {
-        qemu_irq_raise(mpp->dst[n_CPU].irqs[OPENPIC_OUTPUT_CINT]);
-    }
-    else {
-        qemu_irq_raise(mpp->dst[n_CPU].irqs[OPENPIC_OUTPUT_INT]);
-    }
-}
-
 static void mpic_reset (void *opaque)
 {
     openpic_t *mpp = (openpic_t *)opaque;
@@ -1265,7 +1262,8 @@ qemu_irq *mpic_init (MemoryRegion *address_space, hwaddr base,
         mpp->dst[i].irqs = irqs[i];
     mpp->irq_out = irq_out;
 
-    mpp->irq_raise = mpic_irq_raise;
+    /* Enable critical interrupt support */
+    mpp->flags |= OPENPIC_FLAG_IDE_CRIT;
     mpp->reset = mpic_reset;
 
     register_savevm(NULL, "mpic", 0, 2, openpic_save, openpic_load, mpp);
diff --git a/hw/openpic.h b/hw/openpic.h
index f50a1e4..1232d10 100644
--- a/hw/openpic.h
+++ b/hw/openpic.h
@@ -11,6 +11,9 @@ enum {
     OPENPIC_OUTPUT_NB,
 };
 
+/* OpenPIC capability flags */
+#define OPENPIC_FLAG_IDE_CRIT    (1 << 0)
+
 qemu_irq *openpic_init (MemoryRegion **pmem, int nb_cpus,
                         qemu_irq **irqs, qemu_irq irq_out);
 qemu_irq *mpic_init (MemoryRegion *address_space, hwaddr base,
commit 780d16b77f992db81e90b7dd7474a9397ce393b9
Author: Alexander Graf <agraf at suse.de>
Date:   Fri Dec 7 17:15:15 2012 +0100

    openpic: Convert subregions to memory api
    
    The "openpic" controller is currently using one big region and does
    subregion dispatching manually. Move this to the memory api.
    
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/openpic.c b/hw/openpic.c
index b671d9d..2a3b56a 100644
--- a/hw/openpic.c
+++ b/hw/openpic.c
@@ -79,6 +79,15 @@ enum {
 #define OPENPIC_IRQ_MBX0   (OPENPIC_IRQ_DBL0 + OPENPIC_MAX_DBL) /* First mailbox IRQ */
 #endif
 
+#define OPENPIC_GLB_REG_START        0x0
+#define OPENPIC_GLB_REG_SIZE         0x10F0
+#define OPENPIC_TMR_REG_START        0x10F0
+#define OPENPIC_TMR_REG_SIZE         0x220
+#define OPENPIC_SRC_REG_START        0x10000
+#define OPENPIC_SRC_REG_SIZE         (MAX_SRC * 0x20)
+#define OPENPIC_CPU_REG_START        0x20000
+#define OPENPIC_CPU_REG_SIZE         0x100 + ((MAX_CPU - 1) * 0x1000)
+
 /* MPIC */
 #define MPIC_MAX_CPU      1
 #define MPIC_MAX_EXT     12
@@ -842,53 +851,39 @@ static uint64_t openpic_cpu_read(void *opaque, hwaddr addr, unsigned len)
     return openpic_cpu_read_internal(opaque, addr, (addr & 0x1f000) >> 12);
 }
 
-static void openpic_write(void *opaque, hwaddr addr, uint64_t val,
-                          unsigned len)
-{
-    openpic_t *opp = opaque;
-
-    DPRINTF("%s: offset %08x val: %08x\n", __func__, (int)addr, val);
-    if (addr < 0x1100) {
-        /* Global registers */
-        openpic_gbl_write(opp, addr, val, len);
-    } else if (addr < 0x10000) {
-        /* Timers registers */
-        openpic_timer_write(opp, addr, val, len);
-    } else if (addr < 0x20000) {
-        /* Source registers */
-        openpic_src_write(opp, addr, val, len);
-    } else {
-        /* CPU registers */
-        openpic_cpu_write(opp, addr, val, len);
-    }
-}
-
-static uint64_t openpic_read(void *opaque, hwaddr addr, unsigned len)
-{
-    openpic_t *opp = opaque;
-    uint32_t retval;
+static const MemoryRegionOps openpic_glb_ops = {
+    .write = openpic_gbl_write,
+    .read  = openpic_gbl_read,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+    .impl = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
+};
 
-    DPRINTF("%s: offset %08x\n", __func__, (int)addr);
-    if (addr < 0x1100) {
-        /* Global registers */
-        retval = openpic_gbl_read(opp, addr, len);
-    } else if (addr < 0x10000) {
-        /* Timers registers */
-        retval = openpic_timer_read(opp, addr, len);
-    } else if (addr < 0x20000) {
-        /* Source registers */
-        retval = openpic_src_read(opp, addr, len);
-    } else {
-        /* CPU registers */
-        retval = openpic_cpu_read(opp, addr, len);
-    }
+static const MemoryRegionOps openpic_tmr_ops = {
+    .write = openpic_timer_write,
+    .read  = openpic_timer_read,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+    .impl = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
+};
 
-    return retval;
-}
+static const MemoryRegionOps openpic_cpu_ops = {
+    .write = openpic_cpu_write,
+    .read  = openpic_cpu_read,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+    .impl = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
+};
 
-static const MemoryRegionOps openpic_ops = {
-    .read = openpic_read,
-    .write = openpic_write,
+static const MemoryRegionOps openpic_src_ops = {
+    .write = openpic_src_write,
+    .read  = openpic_src_read,
     .endianness = DEVICE_LITTLE_ENDIAN,
     .impl = {
         .min_access_size = 4,
@@ -1009,12 +1004,33 @@ qemu_irq *openpic_init (MemoryRegion **pmem, int nb_cpus,
 {
     openpic_t *opp;
     int i, m;
+    struct {
+        const char             *name;
+        MemoryRegionOps const  *ops;
+        hwaddr      start_addr;
+        ram_addr_t              size;
+    } const list[] = {
+        {"glb", &openpic_glb_ops, OPENPIC_GLB_REG_START, OPENPIC_GLB_REG_SIZE},
+        {"tmr", &openpic_tmr_ops, OPENPIC_TMR_REG_START, OPENPIC_TMR_REG_SIZE},
+        {"src", &openpic_src_ops, OPENPIC_SRC_REG_START, OPENPIC_SRC_REG_SIZE},
+        {"cpu", &openpic_cpu_ops, OPENPIC_CPU_REG_START, OPENPIC_CPU_REG_SIZE},
+    };
 
     /* XXX: for now, only one CPU is supported */
     if (nb_cpus != 1)
         return NULL;
     opp = g_malloc0(sizeof(openpic_t));
-    memory_region_init_io(&opp->mem, &openpic_ops, opp, "openpic", 0x40000);
+
+    memory_region_init(&opp->mem, "openpic", 0x40000);
+
+    for (i = 0; i < ARRAY_SIZE(list); i++) {
+
+        memory_region_init_io(&opp->sub_io_mem[i], list[i].ops, opp,
+                              list[i].name, list[i].size);
+
+        memory_region_add_subregion(&opp->mem, list[i].start_addr,
+                                    &opp->sub_io_mem[i]);
+    }
 
     //    isu_base &= 0xFFFC0000;
     opp->nb_cpus = nb_cpus;
commit a285f1ca703a434fa8edf584f94a1dc29067ab29
Author: Alexander Graf <agraf at suse.de>
Date:   Fri Dec 7 16:45:40 2012 +0100

    openpic: combine mpic and openpic src handlers
    
    The MPIC source irq handler suddenly became identical to the standard
    OpenPIC source irq handler. Combine them into the same function.
    
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/openpic.c b/hw/openpic.c
index 1d714f4..b671d9d 100644
--- a/hw/openpic.c
+++ b/hw/openpic.c
@@ -100,8 +100,8 @@ enum {
 #define MPIC_GLB_REG_SIZE         0x10F0
 #define MPIC_TMR_REG_START        0x10F0
 #define MPIC_TMR_REG_SIZE         0x220
-#define MPIC_IRQ_REG_START        0x10000
-#define MPIC_IRQ_REG_SIZE         (MAX_SRC * 0x20)
+#define MPIC_SRC_REG_START        0x10000
+#define MPIC_SRC_REG_SIZE         (MAX_SRC * 0x20)
 #define MPIC_CPU_REG_START        0x20000
 #define MPIC_CPU_REG_SIZE         0x100 + ((MAX_CPU - 1) * 0x1000)
 
@@ -1169,48 +1169,6 @@ static uint64_t mpic_timer_read(void *opaque, hwaddr addr, unsigned len)
     return retval;
 }
 
-static void mpic_src_irq_write(void *opaque, hwaddr addr,
-                               uint64_t val, unsigned len)
-{
-    openpic_t *mpp = opaque;
-    int idx = addr / 0x20;
-
-    DPRINTF("%s: addr " TARGET_FMT_plx " <= %08" PRIx64 "\n",
-            __func__, addr, val);
-    if (addr & 0xF)
-        return;
-
-    if (addr & 0x10) {
-        /* EXDE / IFEDE / IEEDE */
-        write_IRQreg_ide(mpp, idx, val);
-    } else {
-        /* EXVP / IFEVP / IEEVP */
-        write_IRQreg_ipvp(mpp, idx, val);
-    }
-}
-
-static uint64_t mpic_src_irq_read(void *opaque, hwaddr addr, unsigned len)
-{
-    openpic_t *mpp = opaque;
-    uint32_t retval;
-    int idx = addr / 0x20;
-
-    DPRINTF("%s: addr " TARGET_FMT_plx "\n", __func__, addr);
-    if (addr & 0xF)
-        return -1;
-
-    if (addr & 0x10) {
-        /* EXDE / IFEDE / IEEDE */
-        retval = read_IRQreg_ide(mpp, idx);
-    } else {
-        /* EXVP / IFEVP / IEEVP */
-        retval = read_IRQreg_ipvp(mpp, idx);
-    }
-    DPRINTF("%s: => %08x\n", __func__, retval);
-
-    return retval;
-}
-
 static const MemoryRegionOps mpic_glb_ops = {
     .write = openpic_gbl_write,
     .read  = openpic_gbl_read,
@@ -1242,8 +1200,8 @@ static const MemoryRegionOps mpic_cpu_ops = {
 };
 
 static const MemoryRegionOps mpic_irq_ops = {
-    .write = mpic_src_irq_write,
-    .read  = mpic_src_irq_read,
+    .write = openpic_src_write,
+    .read  = openpic_src_read,
     .endianness = DEVICE_BIG_ENDIAN,
     .impl = {
         .min_access_size = 4,
@@ -1264,7 +1222,7 @@ qemu_irq *mpic_init (MemoryRegion *address_space, hwaddr base,
     } const list[] = {
         {"glb", &mpic_glb_ops, MPIC_GLB_REG_START, MPIC_GLB_REG_SIZE},
         {"tmr", &mpic_tmr_ops, MPIC_TMR_REG_START, MPIC_TMR_REG_SIZE},
-        {"irq", &mpic_irq_ops, MPIC_IRQ_REG_START, MPIC_IRQ_REG_SIZE},
+        {"src", &mpic_irq_ops, MPIC_SRC_REG_START, MPIC_SRC_REG_SIZE},
         {"cpu", &mpic_cpu_ops, MPIC_CPU_REG_START, MPIC_CPU_REG_SIZE},
     };
 
commit b9b2aaa3c6926cf8af21fe75457ca6c6e0dc1f5d
Author: Alexander Graf <agraf at suse.de>
Date:   Fri Dec 7 16:31:55 2012 +0100

    openpic: update to proper memory api
    
    The openpic code was still using the old mmio memory api. Convert it to
    be a generic memory api user and clean up some code that becomes redundant
    that way.
    
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/openpic.c b/hw/openpic.c
index 122ce76..1d714f4 100644
--- a/hw/openpic.c
+++ b/hw/openpic.c
@@ -461,7 +461,8 @@ static inline void write_IRQreg_ipvp(openpic_t *opp, int n_IRQ, uint32_t val)
             opp->src[n_IRQ].ipvp);
 }
 
-static void openpic_gbl_write (void *opaque, hwaddr addr, uint32_t val)
+static void openpic_gbl_write(void *opaque, hwaddr addr, uint64_t val,
+                              unsigned len)
 {
     openpic_t *opp = opaque;
     IRQ_dst_t *dst;
@@ -527,7 +528,7 @@ static void openpic_gbl_write (void *opaque, hwaddr addr, uint32_t val)
     }
 }
 
-static uint32_t openpic_gbl_read (void *opaque, hwaddr addr)
+static uint64_t openpic_gbl_read(void *opaque, hwaddr addr, unsigned len)
 {
     openpic_t *opp = opaque;
     uint32_t retval;
@@ -584,7 +585,8 @@ static uint32_t openpic_gbl_read (void *opaque, hwaddr addr)
     return retval;
 }
 
-static void openpic_timer_write (void *opaque, uint32_t addr, uint32_t val)
+static void openpic_timer_write(void *opaque, hwaddr addr, uint64_t val,
+                                unsigned len)
 {
     openpic_t *opp = opaque;
     int idx;
@@ -615,7 +617,7 @@ static void openpic_timer_write (void *opaque, uint32_t addr, uint32_t val)
     }
 }
 
-static uint32_t openpic_timer_read (void *opaque, uint32_t addr)
+static uint64_t openpic_timer_read(void *opaque, hwaddr addr, unsigned len)
 {
     openpic_t *opp = opaque;
     uint32_t retval;
@@ -648,7 +650,8 @@ static uint32_t openpic_timer_read (void *opaque, uint32_t addr)
     return retval;
 }
 
-static void openpic_src_write (void *opaque, uint32_t addr, uint32_t val)
+static void openpic_src_write(void *opaque, hwaddr addr, uint64_t val,
+                              unsigned len)
 {
     openpic_t *opp = opaque;
     int idx;
@@ -667,7 +670,7 @@ static void openpic_src_write (void *opaque, uint32_t addr, uint32_t val)
     }
 }
 
-static uint32_t openpic_src_read (void *opaque, uint32_t addr)
+static uint64_t openpic_src_read(void *opaque, uint64_t addr, unsigned len)
 {
     openpic_t *opp = opaque;
     uint32_t retval;
@@ -749,7 +752,8 @@ static void openpic_cpu_write_internal(void *opaque, hwaddr addr,
     }
 }
 
-static void openpic_cpu_write(void *opaque, hwaddr addr, uint32_t val)
+static void openpic_cpu_write(void *opaque, hwaddr addr, uint64_t val,
+                              unsigned len)
 {
     openpic_cpu_write_internal(opaque, addr, val, (addr & 0x1f000) >> 12);
 }
@@ -833,96 +837,63 @@ static uint32_t openpic_cpu_read_internal(void *opaque, hwaddr addr,
     return retval;
 }
 
-static uint32_t openpic_cpu_read(void *opaque, hwaddr addr)
+static uint64_t openpic_cpu_read(void *opaque, hwaddr addr, unsigned len)
 {
     return openpic_cpu_read_internal(opaque, addr, (addr & 0x1f000) >> 12);
 }
 
-static void openpic_buggy_write (void *opaque,
-                                 hwaddr addr, uint32_t val)
-{
-    printf("Invalid OPENPIC write access !\n");
-}
-
-static uint32_t openpic_buggy_read (void *opaque, hwaddr addr)
-{
-    printf("Invalid OPENPIC read access !\n");
-
-    return -1;
-}
-
-static void openpic_writel (void *opaque,
-                            hwaddr addr, uint32_t val)
+static void openpic_write(void *opaque, hwaddr addr, uint64_t val,
+                          unsigned len)
 {
     openpic_t *opp = opaque;
 
-    addr &= 0x3FFFF;
     DPRINTF("%s: offset %08x val: %08x\n", __func__, (int)addr, val);
     if (addr < 0x1100) {
         /* Global registers */
-        openpic_gbl_write(opp, addr, val);
+        openpic_gbl_write(opp, addr, val, len);
     } else if (addr < 0x10000) {
         /* Timers registers */
-        openpic_timer_write(opp, addr, val);
+        openpic_timer_write(opp, addr, val, len);
     } else if (addr < 0x20000) {
         /* Source registers */
-        openpic_src_write(opp, addr, val);
+        openpic_src_write(opp, addr, val, len);
     } else {
         /* CPU registers */
-        openpic_cpu_write(opp, addr, val);
+        openpic_cpu_write(opp, addr, val, len);
     }
 }
 
-static uint32_t openpic_readl (void *opaque,hwaddr addr)
+static uint64_t openpic_read(void *opaque, hwaddr addr, unsigned len)
 {
     openpic_t *opp = opaque;
     uint32_t retval;
 
-    addr &= 0x3FFFF;
     DPRINTF("%s: offset %08x\n", __func__, (int)addr);
     if (addr < 0x1100) {
         /* Global registers */
-        retval = openpic_gbl_read(opp, addr);
+        retval = openpic_gbl_read(opp, addr, len);
     } else if (addr < 0x10000) {
         /* Timers registers */
-        retval = openpic_timer_read(opp, addr);
+        retval = openpic_timer_read(opp, addr, len);
     } else if (addr < 0x20000) {
         /* Source registers */
-        retval = openpic_src_read(opp, addr);
+        retval = openpic_src_read(opp, addr, len);
     } else {
         /* CPU registers */
-        retval = openpic_cpu_read(opp, addr);
+        retval = openpic_cpu_read(opp, addr, len);
     }
 
     return retval;
 }
 
-static uint64_t openpic_read(void *opaque, hwaddr addr,
-                             unsigned size)
-{
-    openpic_t *opp = opaque;
-
-    switch (size) {
-    case 4: return openpic_readl(opp, addr);
-    default: return openpic_buggy_read(opp, addr);
-    }
-}
-
-static void openpic_write(void *opaque, hwaddr addr,
-                          uint64_t data, unsigned size)
-{
-    openpic_t *opp = opaque;
-
-    switch (size) {
-    case 4: return openpic_writel(opp, addr, data);
-    default: return openpic_buggy_write(opp, addr, data);
-    }
-}
-
 static const MemoryRegionOps openpic_ops = {
     .read = openpic_read,
     .write = openpic_write,
     .endianness = DEVICE_LITTLE_ENDIAN,
+    .impl = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
 };
 
 static void openpic_save_IRQ_queue(QEMUFile* f, IRQ_queue_t *q)
@@ -1131,7 +1102,8 @@ static void mpic_reset (void *opaque)
     mpp->glbc = 0x00000000;
 }
 
-static void mpic_timer_write (void *opaque, hwaddr addr, uint32_t val)
+static void mpic_timer_write(void *opaque, hwaddr addr, uint64_t val,
+                             unsigned len)
 {
     openpic_t *mpp = opaque;
     int idx, cpu;
@@ -1139,7 +1111,6 @@ static void mpic_timer_write (void *opaque, hwaddr addr, uint32_t val)
     DPRINTF("%s: addr " TARGET_FMT_plx " <= %08x\n", __func__, addr, val);
     if (addr & 0xF)
         return;
-    addr &= 0xFFFF;
     cpu = addr >> 12;
     idx = (addr >> 6) & 0x3;
     switch (addr & 0x30) {
@@ -1164,7 +1135,7 @@ static void mpic_timer_write (void *opaque, hwaddr addr, uint32_t val)
     }
 }
 
-static uint32_t mpic_timer_read (void *opaque, hwaddr addr)
+static uint64_t mpic_timer_read(void *opaque, hwaddr addr, unsigned len)
 {
     openpic_t *mpp = opaque;
     uint32_t retval;
@@ -1174,7 +1145,6 @@ static uint32_t mpic_timer_read (void *opaque, hwaddr addr)
     retval = 0xFFFFFFFF;
     if (addr & 0xF)
         return retval;
-    addr &= 0xFFFF;
     cpu = addr >> 12;
     idx = (addr >> 6) & 0x3;
     switch (addr & 0x30) {
@@ -1242,45 +1212,33 @@ static uint64_t mpic_src_irq_read(void *opaque, hwaddr addr, unsigned len)
 }
 
 static const MemoryRegionOps mpic_glb_ops = {
-    .old_mmio = {
-        .write = { openpic_buggy_write,
-                   openpic_buggy_write,
-                   openpic_gbl_write,
-        },
-        .read  = { openpic_buggy_read,
-                   openpic_buggy_read,
-                   openpic_gbl_read,
-        },
-    },
+    .write = openpic_gbl_write,
+    .read  = openpic_gbl_read,
     .endianness = DEVICE_BIG_ENDIAN,
+    .impl = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
 };
 
 static const MemoryRegionOps mpic_tmr_ops = {
-    .old_mmio = {
-        .write = { openpic_buggy_write,
-                   openpic_buggy_write,
-                   mpic_timer_write,
-        },
-        .read  = { openpic_buggy_read,
-                   openpic_buggy_read,
-                   mpic_timer_read,
-        },
-    },
+    .write = mpic_timer_write,
+    .read  = mpic_timer_read,
     .endianness = DEVICE_BIG_ENDIAN,
+    .impl = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
 };
 
 static const MemoryRegionOps mpic_cpu_ops = {
-    .old_mmio = {
-        .write = { openpic_buggy_write,
-                   openpic_buggy_write,
-                   openpic_cpu_write,
-        },
-        .read  = { openpic_buggy_read,
-                   openpic_buggy_read,
-                   openpic_cpu_read,
-        },
-    },
+    .write = openpic_cpu_write,
+    .read  = openpic_cpu_read,
     .endianness = DEVICE_BIG_ENDIAN,
+    .impl = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
 };
 
 static const MemoryRegionOps mpic_irq_ops = {
commit cdbb912a6f8b5f325df3a6fd42cb2843032050c9
Author: Alexander Graf <agraf at suse.de>
Date:   Fri Dec 7 16:10:34 2012 +0100

    mpic: Unify numbering scheme
    
    MPIC interrupt numbers in Linux (device tree) and in QEMU are different,
    because QEMU takes the sparseness of the IRQ number space into account.
    
    Remove that cleverness and instead assume a flat number space. This makes
    the code easier to understand, because we are actually aligned with Linux
    on the view of our worlds.
    
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/openpic.c b/hw/openpic.c
index b30c853..122ce76 100644
--- a/hw/openpic.c
+++ b/hw/openpic.c
@@ -46,11 +46,12 @@
 #define DPRINTF(fmt, ...) do { } while (0)
 #endif
 
-#define MAX_CPU    15
-#define MAX_IRQ   128
+#define MAX_CPU     15
+#define MAX_SRC     256
 #define MAX_TMR     4
 #define VECTOR_BITS 8
 #define MAX_IPI     4
+#define MAX_IRQ     (MAX_SRC + MAX_IPI + MAX_TMR)
 #define VID         0x03 /* MPIC version ID */
 #define VENI        0x00000000 /* Vendor ID */
 
@@ -82,32 +83,25 @@ enum {
 #define MPIC_MAX_CPU      1
 #define MPIC_MAX_EXT     12
 #define MPIC_MAX_INT     64
-#define MPIC_MAX_MSG      4
-#define MPIC_MAX_MSI      8
-#define MPIC_MAX_TMR      MAX_TMR
-#define MPIC_MAX_IPI      MAX_IPI
-#define MPIC_MAX_IRQ     (MPIC_MAX_EXT + MPIC_MAX_INT + MPIC_MAX_TMR + MPIC_MAX_MSG + MPIC_MAX_MSI + (MPIC_MAX_IPI * MPIC_MAX_CPU))
+#define MPIC_MAX_IRQ     MAX_IRQ
 
 /* Interrupt definitions */
-#define MPIC_EXT_IRQ      0
-#define MPIC_INT_IRQ      (MPIC_EXT_IRQ + MPIC_MAX_EXT)
-#define MPIC_TMR_IRQ      (MPIC_INT_IRQ + MPIC_MAX_INT)
-#define MPIC_MSG_IRQ      (MPIC_TMR_IRQ + MPIC_MAX_TMR)
-#define MPIC_MSI_IRQ      (MPIC_MSG_IRQ + MPIC_MAX_MSG)
-#define MPIC_IPI_IRQ      (MPIC_MSI_IRQ + MPIC_MAX_MSI)
+/* IRQs, accessible through the IRQ region */
+#define MPIC_EXT_IRQ      0x00
+#define MPIC_INT_IRQ      0x10
+#define MPIC_MSG_IRQ      0xb0
+#define MPIC_MSI_IRQ      0xe0
+/* These are available through separate regions, but
+   for simplicity's sake mapped into the same number space */
+#define MPIC_TMR_IRQ      0x100
+#define MPIC_IPI_IRQ      0x104
 
 #define MPIC_GLB_REG_START        0x0
 #define MPIC_GLB_REG_SIZE         0x10F0
 #define MPIC_TMR_REG_START        0x10F0
 #define MPIC_TMR_REG_SIZE         0x220
-#define MPIC_EXT_REG_START        0x10000
-#define MPIC_EXT_REG_SIZE         0x180
-#define MPIC_INT_REG_START        0x10200
-#define MPIC_INT_REG_SIZE         0x800
-#define MPIC_MSG_REG_START        0x11600
-#define MPIC_MSG_REG_SIZE         0x100
-#define MPIC_MSI_REG_START        0x11C00
-#define MPIC_MSI_REG_SIZE         0x100
+#define MPIC_IRQ_REG_START        0x10000
+#define MPIC_IRQ_REG_SIZE         (MAX_SRC * 0x20)
 #define MPIC_CPU_REG_START        0x20000
 #define MPIC_CPU_REG_SIZE         0x100 + ((MAX_CPU - 1) * 0x1000)
 
@@ -1205,193 +1199,44 @@ static uint32_t mpic_timer_read (void *opaque, hwaddr addr)
     return retval;
 }
 
-static void mpic_src_ext_write (void *opaque, hwaddr addr,
-                                uint32_t val)
+static void mpic_src_irq_write(void *opaque, hwaddr addr,
+                               uint64_t val, unsigned len)
 {
     openpic_t *mpp = opaque;
-    int idx = MPIC_EXT_IRQ;
+    int idx = addr / 0x20;
 
-    DPRINTF("%s: addr " TARGET_FMT_plx " <= %08x\n", __func__, addr, val);
-    if (addr & 0xF)
-        return;
-
-    if (addr < MPIC_EXT_REG_SIZE) {
-        idx += (addr & 0xFFF0) >> 5;
-        if (addr & 0x10) {
-            /* EXDE / IFEDE / IEEDE */
-            write_IRQreg_ide(mpp, idx, val);
-        } else {
-            /* EXVP / IFEVP / IEEVP */
-            write_IRQreg_ipvp(mpp, idx, val);
-        }
-    }
-}
-
-static uint32_t mpic_src_ext_read (void *opaque, hwaddr addr)
-{
-    openpic_t *mpp = opaque;
-    uint32_t retval;
-    int idx = MPIC_EXT_IRQ;
-
-    DPRINTF("%s: addr " TARGET_FMT_plx "\n", __func__, addr);
-    retval = 0xFFFFFFFF;
-    if (addr & 0xF)
-        return retval;
-
-    if (addr < MPIC_EXT_REG_SIZE) {
-        idx += (addr & 0xFFF0) >> 5;
-        if (addr & 0x10) {
-            /* EXDE / IFEDE / IEEDE */
-            retval = read_IRQreg_ide(mpp, idx);
-        } else {
-            /* EXVP / IFEVP / IEEVP */
-            retval = read_IRQreg_ipvp(mpp, idx);
-        }
-        DPRINTF("%s: => %08x\n", __func__, retval);
-    }
-
-    return retval;
-}
-
-static void mpic_src_int_write (void *opaque, hwaddr addr,
-                                uint32_t val)
-{
-    openpic_t *mpp = opaque;
-    int idx = MPIC_INT_IRQ;
-
-    DPRINTF("%s: addr " TARGET_FMT_plx " <= %08x\n", __func__, addr, val);
-    if (addr & 0xF)
-        return;
-
-    if (addr < MPIC_INT_REG_SIZE) {
-        idx += (addr & 0xFFF0) >> 5;
-        if (addr & 0x10) {
-            /* EXDE / IFEDE / IEEDE */
-            write_IRQreg_ide(mpp, idx, val);
-        } else {
-            /* EXVP / IFEVP / IEEVP */
-            write_IRQreg_ipvp(mpp, idx, val);
-        }
-    }
-}
-
-static uint32_t mpic_src_int_read (void *opaque, hwaddr addr)
-{
-    openpic_t *mpp = opaque;
-    uint32_t retval;
-    int idx = MPIC_INT_IRQ;
-
-    DPRINTF("%s: addr " TARGET_FMT_plx "\n", __func__, addr);
-    retval = 0xFFFFFFFF;
-    if (addr & 0xF)
-        return retval;
-
-    if (addr < MPIC_INT_REG_SIZE) {
-        idx += (addr & 0xFFF0) >> 5;
-        if (addr & 0x10) {
-            /* EXDE / IFEDE / IEEDE */
-            retval = read_IRQreg_ide(mpp, idx);
-        } else {
-            /* EXVP / IFEVP / IEEVP */
-            retval = read_IRQreg_ipvp(mpp, idx);
-        }
-        DPRINTF("%s: => %08x\n", __func__, retval);
-    }
-
-    return retval;
-}
-
-static void mpic_src_msg_write (void *opaque, hwaddr addr,
-                                uint32_t val)
-{
-    openpic_t *mpp = opaque;
-    int idx = MPIC_MSG_IRQ;
-
-    DPRINTF("%s: addr " TARGET_FMT_plx " <= %08x\n", __func__, addr, val);
+    DPRINTF("%s: addr " TARGET_FMT_plx " <= %08" PRIx64 "\n",
+            __func__, addr, val);
     if (addr & 0xF)
         return;
 
-    if (addr < MPIC_MSG_REG_SIZE) {
-        idx += (addr & 0xFFF0) >> 5;
-        if (addr & 0x10) {
-            /* EXDE / IFEDE / IEEDE */
-            write_IRQreg_ide(mpp, idx, val);
-        } else {
-            /* EXVP / IFEVP / IEEVP */
-            write_IRQreg_ipvp(mpp, idx, val);
-        }
-    }
-}
-
-static uint32_t mpic_src_msg_read (void *opaque, hwaddr addr)
-{
-    openpic_t *mpp = opaque;
-    uint32_t retval;
-    int idx = MPIC_MSG_IRQ;
-
-    DPRINTF("%s: addr " TARGET_FMT_plx "\n", __func__, addr);
-    retval = 0xFFFFFFFF;
-    if (addr & 0xF)
-        return retval;
-
-    if (addr < MPIC_MSG_REG_SIZE) {
-        idx += (addr & 0xFFF0) >> 5;
-        if (addr & 0x10) {
-            /* EXDE / IFEDE / IEEDE */
-            retval = read_IRQreg_ide(mpp, idx);
-        } else {
-            /* EXVP / IFEVP / IEEVP */
-            retval = read_IRQreg_ipvp(mpp, idx);
-        }
-        DPRINTF("%s: => %08x\n", __func__, retval);
+    if (addr & 0x10) {
+        /* EXDE / IFEDE / IEEDE */
+        write_IRQreg_ide(mpp, idx, val);
+    } else {
+        /* EXVP / IFEVP / IEEVP */
+        write_IRQreg_ipvp(mpp, idx, val);
     }
-
-    return retval;
 }
 
-static void mpic_src_msi_write (void *opaque, hwaddr addr,
-                                uint32_t val)
-{
-    openpic_t *mpp = opaque;
-    int idx = MPIC_MSI_IRQ;
-
-    DPRINTF("%s: addr " TARGET_FMT_plx " <= %08x\n", __func__, addr, val);
-    if (addr & 0xF)
-        return;
-
-    if (addr < MPIC_MSI_REG_SIZE) {
-        idx += (addr & 0xFFF0) >> 5;
-        if (addr & 0x10) {
-            /* EXDE / IFEDE / IEEDE */
-            write_IRQreg_ide(mpp, idx, val);
-        } else {
-            /* EXVP / IFEVP / IEEVP */
-            write_IRQreg_ipvp(mpp, idx, val);
-        }
-    }
-}
-static uint32_t mpic_src_msi_read (void *opaque, hwaddr addr)
+static uint64_t mpic_src_irq_read(void *opaque, hwaddr addr, unsigned len)
 {
     openpic_t *mpp = opaque;
     uint32_t retval;
-    int idx = MPIC_MSI_IRQ;
+    int idx = addr / 0x20;
 
     DPRINTF("%s: addr " TARGET_FMT_plx "\n", __func__, addr);
-    retval = 0xFFFFFFFF;
     if (addr & 0xF)
-        return retval;
+        return -1;
 
-    if (addr < MPIC_MSI_REG_SIZE) {
-        idx += (addr & 0xFFF0) >> 5;
-        if (addr & 0x10) {
-            /* EXDE / IFEDE / IEEDE */
-            retval = read_IRQreg_ide(mpp, idx);
-        } else {
-            /* EXVP / IFEVP / IEEVP */
-            retval = read_IRQreg_ipvp(mpp, idx);
-        }
-        DPRINTF("%s: => %08x\n", __func__, retval);
+    if (addr & 0x10) {
+        /* EXDE / IFEDE / IEEDE */
+        retval = read_IRQreg_ide(mpp, idx);
+    } else {
+        /* EXVP / IFEVP / IEEVP */
+        retval = read_IRQreg_ipvp(mpp, idx);
     }
+    DPRINTF("%s: => %08x\n", __func__, retval);
 
     return retval;
 }
@@ -1438,60 +1283,14 @@ static const MemoryRegionOps mpic_cpu_ops = {
     .endianness = DEVICE_BIG_ENDIAN,
 };
 
-static const MemoryRegionOps mpic_ext_ops = {
-    .old_mmio = {
-        .write = { openpic_buggy_write,
-                   openpic_buggy_write,
-                   mpic_src_ext_write,
-        },
-        .read  = { openpic_buggy_read,
-                   openpic_buggy_read,
-                   mpic_src_ext_read,
-        },
-    },
-    .endianness = DEVICE_BIG_ENDIAN,
-};
-
-static const MemoryRegionOps mpic_int_ops = {
-    .old_mmio = {
-        .write = { openpic_buggy_write,
-                   openpic_buggy_write,
-                   mpic_src_int_write,
-        },
-        .read  = { openpic_buggy_read,
-                   openpic_buggy_read,
-                   mpic_src_int_read,
-        },
-    },
-    .endianness = DEVICE_BIG_ENDIAN,
-};
-
-static const MemoryRegionOps mpic_msg_ops = {
-    .old_mmio = {
-        .write = { openpic_buggy_write,
-                   openpic_buggy_write,
-                   mpic_src_msg_write,
-        },
-        .read  = { openpic_buggy_read,
-                   openpic_buggy_read,
-                   mpic_src_msg_read,
-        },
-    },
+static const MemoryRegionOps mpic_irq_ops = {
+    .write = mpic_src_irq_write,
+    .read  = mpic_src_irq_read,
     .endianness = DEVICE_BIG_ENDIAN,
-};
-
-static const MemoryRegionOps mpic_msi_ops = {
-    .old_mmio = {
-        .write = { openpic_buggy_write,
-                   openpic_buggy_write,
-                   mpic_src_msi_write,
-        },
-        .read  = { openpic_buggy_read,
-                   openpic_buggy_read,
-                   mpic_src_msi_read,
-        },
+    .impl = {
+        .min_access_size = 4,
+        .max_access_size = 4,
     },
-    .endianness = DEVICE_BIG_ENDIAN,
 };
 
 qemu_irq *mpic_init (MemoryRegion *address_space, hwaddr base,
@@ -1507,10 +1306,7 @@ qemu_irq *mpic_init (MemoryRegion *address_space, hwaddr base,
     } const list[] = {
         {"glb", &mpic_glb_ops, MPIC_GLB_REG_START, MPIC_GLB_REG_SIZE},
         {"tmr", &mpic_tmr_ops, MPIC_TMR_REG_START, MPIC_TMR_REG_SIZE},
-        {"ext", &mpic_ext_ops, MPIC_EXT_REG_START, MPIC_EXT_REG_SIZE},
-        {"int", &mpic_int_ops, MPIC_INT_REG_START, MPIC_INT_REG_SIZE},
-        {"msg", &mpic_msg_ops, MPIC_MSG_REG_START, MPIC_MSG_REG_SIZE},
-        {"msi", &mpic_msi_ops, MPIC_MSI_REG_START, MPIC_MSI_REG_SIZE},
+        {"irq", &mpic_irq_ops, MPIC_IRQ_REG_START, MPIC_IRQ_REG_SIZE},
         {"cpu", &mpic_cpu_ops, MPIC_CPU_REG_START, MPIC_CPU_REG_SIZE},
     };
 
diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c
index 47e2d41..f3e97d8 100644
--- a/hw/ppc/e500.c
+++ b/hw/ppc/e500.c
@@ -502,13 +502,13 @@ void ppce500_init(PPCE500Params *params)
     /* Serial */
     if (serial_hds[0]) {
         serial_mm_init(ccsr_addr_space, MPC8544_SERIAL0_REGS_OFFSET,
-                       0, mpic[12+26], 399193,
+                       0, mpic[42], 399193,
                        serial_hds[0], DEVICE_BIG_ENDIAN);
     }
 
     if (serial_hds[1]) {
         serial_mm_init(ccsr_addr_space, MPC8544_SERIAL1_REGS_OFFSET,
-                       0, mpic[12+26], 399193,
+                       0, mpic[42], 399193,
                        serial_hds[1], DEVICE_BIG_ENDIAN);
     }
 
commit 38898d7ed803cee5105246150725487add78cbd7
Author: Alexander Graf <agraf at suse.de>
Date:   Thu Dec 6 15:59:27 2012 +0100

    openpic: Remove unused code
    
    The openpic code had a few WIP bits left that nobody reanimated within
    the last few years. Remove that code.
    
    Signed-off-by: Alexander Graf <agraf at suse.de>
    Acked-by: Hervé Poussineau <hpoussin at reactos.org>

diff --git a/hw/openpic.c b/hw/openpic.c
index 8b3784a..b30c853 100644
--- a/hw/openpic.c
+++ b/hw/openpic.c
@@ -46,27 +46,8 @@
 #define DPRINTF(fmt, ...) do { } while (0)
 #endif
 
-#define USE_MPCxxx /* Intel model is broken, for now */
-
-#if defined (USE_INTEL_GW80314)
-/* Intel GW80314 I/O Companion chip */
-
-#define MAX_CPU     4
-#define MAX_IRQ    32
-#define MAX_DBL     4
-#define MAX_MBX     4
-#define MAX_TMR     4
-#define VECTOR_BITS 8
-#define MAX_IPI     4
-
-#define VID (0x00000000)
-
-#elif defined(USE_MPCxxx)
-
 #define MAX_CPU    15
 #define MAX_IRQ   128
-#define MAX_DBL     0
-#define MAX_MBX     0
 #define MAX_TMR     4
 #define VECTOR_BITS 8
 #define MAX_IPI     4
@@ -149,12 +130,6 @@ enum mpic_ide_bits {
     IDR_P0     = 0,
 };
 
-#else
-#error "Please select which OpenPic implementation is to be emulated"
-#endif
-
-#define OPENPIC_PAGE_SIZE 4096
-
 #define BF_WIDTH(_bits_) \
 (((_bits_) + (sizeof(uint32_t) * 8) - 1) / (sizeof(uint32_t) * 8))
 
@@ -250,19 +225,6 @@ typedef struct openpic_t {
         uint32_t ticc;  /* Global timer current count register */
         uint32_t tibc;  /* Global timer base count register */
     } timers[MAX_TMR];
-#if MAX_DBL > 0
-    /* Doorbell registers */
-    uint32_t dar;        /* Doorbell activate register */
-    struct {
-        uint32_t dmr;    /* Doorbell messaging register */
-    } doorbells[MAX_DBL];
-#endif
-#if MAX_MBX > 0
-    /* Mailbox registers */
-    struct {
-        uint32_t mbr;    /* Mailbox register */
-    } mailboxes[MAX_MAILBOXES];
-#endif
     /* IRQ out is used when in bypass mode (not implemented) */
     qemu_irq irq_out;
     int max_irq;
@@ -470,19 +432,6 @@ static void openpic_reset (void *opaque)
         opp->timers[i].ticc = 0x00000000;
         opp->timers[i].tibc = 0x80000000;
     }
-    /* Initialise doorbells */
-#if MAX_DBL > 0
-    opp->dar = 0x00000000;
-    for (i = 0; i < MAX_DBL; i++) {
-        opp->doorbells[i].dmr  = 0x00000000;
-    }
-#endif
-    /* Initialise mailboxes */
-#if MAX_MBX > 0
-    for (i = 0; i < MAX_MBX; i++) { /* ? */
-        opp->mailboxes[i].mbr   = 0x00000000;
-    }
-#endif
     /* Go out of RESET state */
     opp->glbc = 0x00000000;
 }
@@ -518,84 +467,6 @@ static inline void write_IRQreg_ipvp(openpic_t *opp, int n_IRQ, uint32_t val)
             opp->src[n_IRQ].ipvp);
 }
 
-#if 0 // Code provision for Intel model
-#if MAX_DBL > 0
-static uint32_t read_doorbell_register (openpic_t *opp,
-                                        int n_dbl, uint32_t offset)
-{
-    uint32_t retval;
-
-    switch (offset) {
-    case DBL_IPVP_OFFSET:
-        retval = read_IRQreg_ipvp(opp, IRQ_DBL0 + n_dbl);
-        break;
-    case DBL_IDE_OFFSET:
-        retval = read_IRQreg_ide(opp, IRQ_DBL0 + n_dbl);
-        break;
-    case DBL_DMR_OFFSET:
-        retval = opp->doorbells[n_dbl].dmr;
-        break;
-    }
-
-    return retval;
-}
-
-static void write_doorbell_register (penpic_t *opp, int n_dbl,
-                                     uint32_t offset, uint32_t value)
-{
-    switch (offset) {
-    case DBL_IVPR_OFFSET:
-        write_IRQreg_ipvp(opp, IRQ_DBL0 + n_dbl, value);
-        break;
-    case DBL_IDE_OFFSET:
-        write_IRQreg_ide(opp, IRQ_DBL0 + n_dbl, value);
-        break;
-    case DBL_DMR_OFFSET:
-        opp->doorbells[n_dbl].dmr = value;
-        break;
-    }
-}
-#endif
-
-#if MAX_MBX > 0
-static uint32_t read_mailbox_register (openpic_t *opp,
-                                       int n_mbx, uint32_t offset)
-{
-    uint32_t retval;
-
-    switch (offset) {
-    case MBX_MBR_OFFSET:
-        retval = opp->mailboxes[n_mbx].mbr;
-        break;
-    case MBX_IVPR_OFFSET:
-        retval = read_IRQreg_ipvp(opp, IRQ_MBX0 + n_mbx);
-        break;
-    case MBX_DMR_OFFSET:
-        retval = read_IRQreg_ide(opp, IRQ_MBX0 + n_mbx);
-        break;
-    }
-
-    return retval;
-}
-
-static void write_mailbox_register (openpic_t *opp, int n_mbx,
-                                    uint32_t address, uint32_t value)
-{
-    switch (offset) {
-    case MBX_MBR_OFFSET:
-        opp->mailboxes[n_mbx].mbr = value;
-        break;
-    case MBX_IVPR_OFFSET:
-        write_IRQreg_ipvp(opp, IRQ_MBX0 + n_mbx, value);
-        break;
-    case MBX_DMR_OFFSET:
-        write_IRQreg_ide(opp, IRQ_MBX0 + n_mbx, value);
-        break;
-    }
-}
-#endif
-#endif /* 0 : Code provision for Intel model */
-
 static void openpic_gbl_write (void *opaque, hwaddr addr, uint32_t val)
 {
     openpic_t *opp = opaque;
@@ -841,7 +712,6 @@ static void openpic_cpu_write_internal(void *opaque, hwaddr addr,
     dst = &opp->dst[idx];
     addr &= 0xFF0;
     switch (addr) {
-#if MAX_IPI > 0
     case 0x40: /* IPIDR */
     case 0x50:
     case 0x60:
@@ -853,7 +723,6 @@ static void openpic_cpu_write_internal(void *opaque, hwaddr addr,
         openpic_set_irq(opp, opp->irq_ipi0 + idx, 1);
         openpic_set_irq(opp, opp->irq_ipi0 + idx, 0);
         break;
-#endif
     case 0x80: /* PCTP */
         dst->pctp = val & 0x0000000F;
         break;
@@ -1109,20 +978,6 @@ static void openpic_save(QEMUFile* f, void *opaque)
         qemu_put_be32s(f, &opp->timers[i].tibc);
     }
 
-#if MAX_DBL > 0
-    qemu_put_be32s(f, &opp->dar);
-
-    for (i = 0; i < MAX_DBL; i++) {
-        qemu_put_be32s(f, &opp->doorbells[i].dmr);
-    }
-#endif
-
-#if MAX_MBX > 0
-    for (i = 0; i < MAX_MAILBOXES; i++) {
-        qemu_put_be32s(f, &opp->mailboxes[i].mbr);
-    }
-#endif
-
     pci_device_save(&opp->pci_dev, f);
 }
 
@@ -1176,20 +1031,6 @@ static int openpic_load(QEMUFile* f, void *opaque, int version_id)
         qemu_get_be32s(f, &opp->timers[i].tibc);
     }
 
-#if MAX_DBL > 0
-    qemu_get_be32s(f, &opp->dar);
-
-    for (i = 0; i < MAX_DBL; i++) {
-        qemu_get_be32s(f, &opp->doorbells[i].dmr);
-    }
-#endif
-
-#if MAX_MBX > 0
-    for (i = 0; i < MAX_MAILBOXES; i++) {
-        qemu_get_be32s(f, &opp->mailboxes[i].mbr);
-    }
-#endif
-
     return pci_device_load(&opp->pci_dev, f);
 }
 
@@ -1222,11 +1063,7 @@ qemu_irq *openpic_init (MemoryRegion **pmem, int nb_cpus,
     for (; i < OPENPIC_IRQ_TIM0; i++) {
         opp->src[i].type = IRQ_SPECIAL;
     }
-#if MAX_IPI > 0
     m = OPENPIC_IRQ_IPI0;
-#else
-    m = OPENPIC_IRQ_DBL0;
-#endif
     for (; i < m; i++) {
         opp->src[i].type = IRQ_TIMER;
     }
commit 8b1853e7d8c2bf8c6a9f023ab98ba0e8a38bd086
Author: David Gibson <david at gibson.dropbear.id.au>
Date:   Mon Dec 3 16:42:13 2012 +0000

    pseries: Don't allow TCE (iommu) tables to be registered with duplicate LIOBNs
    
    The PAPR specification requires that every bus or device mediated by the
    IOMMU have a unique Logical IO Bus Number (LIOBN).  This patch adds a check
    to enforce this, which will help catch errors in configuration earlier.
    
    Signed-off-by: David Gibson <david at gibson.dropbear.id.au>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/spapr_iommu.c b/hw/spapr_iommu.c
index 02d78cc..3011b25 100644
--- a/hw/spapr_iommu.c
+++ b/hw/spapr_iommu.c
@@ -120,6 +120,12 @@ DMAContext *spapr_tce_new_dma_context(uint32_t liobn, size_t window_size)
 {
     sPAPRTCETable *tcet;
 
+    if (spapr_tce_find_by_liobn(liobn)) {
+        fprintf(stderr, "Attempted to create TCE table with duplicate"
+                " LIOBN 0x%x\n", liobn);
+        return NULL;
+    }
+
     if (!window_size) {
         return NULL;
     }
commit 3eddc1be1ccb26387f8f960f8a3d8c417064a91f
Author: Bharat Bhushan <r65777 at freescale.com>
Date:   Wed Oct 10 04:28:28 2012 +0000

    Adding BAR0 for e500 PCI controller
    
    PCI Root complex have TYPE-1 configuration header while PCI endpoint
    have type-0 configuration header. The type-1 configuration header have
    a BAR (BAR0). In Freescale PCI controller BAR0 is used for mapping pci
    address space to CCSR address space. This can used for 2 purposes: 1)
    for MSI interrupt generation 2) Allow CCSR registers access when configured
    as PCI endpoint, which I am not sure is a use case with QEMU-KVM guest.
    
    What I observed is that when guest read the size of BAR0 of host controller
    configuration header (TYPE1 header) then it always reads it as 0. When
    looking into the QEMU hw/ppce500_pci.c, I do not find the PCI controller
    device registering BAR0. I do not find any other controller also doing so
    may they do not use BAR0.
    
    There are two issues when BAR0 is not there (which I can think of):
    1) There should be BAR0 emulated for PCI Root complex (TYPE1 header) and
    when reading the size of BAR0, it should give size as per real h/w.
    
    2) Do we need this BAR0 inbound address translation?
            When BAR0 is of non-zero size then it will be configured for PCI
    address space to local address(CCSR) space translation on inbound access.
    The primary use case is for MSI interrupt generation. The device is
    configured with an address offsets in PCI address space, which will be
    translated to MSI interrupt generation MPIC registers. Currently I do
    not understand the MSI interrupt generation mechanism in QEMU and also
    IIRC we do not use QEMU MSI interrupt mechanism on e500 guest machines.
    But this BAR0 will be used when using MSI on e500.
    
    I can see one more issue, There are ATMUs emulated in hw/ppce500_pci.c,
    but i do not see these being used for address translation.
    So far that works because pci address space and local address space are 1:1
    mapped. BAR0 inbound translation + ATMU translation will complete the address
    translation of inbound traffic.
    
    Signed-off-by: Bharat Bhushan <bharat.bhushan at freescale.com>
    [agraf: fix double variable assignment w/o read]
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/ppc/e500-ccsr.h b/hw/ppc/e500-ccsr.h
new file mode 100644
index 0000000..f20f51b
--- /dev/null
+++ b/hw/ppc/e500-ccsr.h
@@ -0,0 +1,17 @@
+#ifndef E500_CCSR_H
+#define E500_CCSR_H
+
+#include "../sysbus.h"
+
+typedef struct PPCE500CCSRState {
+    /*< private >*/
+    SysBusDevice parent;
+    /*< public >*/
+
+    MemoryRegion ccsr_space;
+} PPCE500CCSRState;
+
+#define TYPE_CCSR "e500-ccsr"
+#define CCSR(obj) OBJECT_CHECK(PPCE500CCSRState, (obj), TYPE_CCSR)
+
+#endif /* E500_CCSR_H */
diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c
index 8538933..47e2d41 100644
--- a/hw/ppc/e500.c
+++ b/hw/ppc/e500.c
@@ -17,6 +17,7 @@
 #include "config.h"
 #include "qemu-common.h"
 #include "e500.h"
+#include "e500-ccsr.h"
 #include "net.h"
 #include "hw/hw.h"
 #include "hw/serial.h"
@@ -422,8 +423,9 @@ void ppce500_init(PPCE500Params *params)
     qemu_irq **irqs, *mpic;
     DeviceState *dev;
     CPUPPCState *firstenv = NULL;
-    MemoryRegion *ccsr;
+    MemoryRegion *ccsr_addr_space;
     SysBusDevice *s;
+    PPCE500CCSRState *ccsr;
 
     /* Setup CPUs */
     if (params->cpu_model == NULL) {
@@ -480,12 +482,17 @@ void ppce500_init(PPCE500Params *params)
     vmstate_register_ram_global(ram);
     memory_region_add_subregion(address_space_mem, 0, ram);
 
-    ccsr = g_malloc0(sizeof(MemoryRegion));
-    memory_region_init(ccsr, "e500-ccsr", MPC8544_CCSRBAR_SIZE);
-    memory_region_add_subregion(address_space_mem, MPC8544_CCSRBAR_BASE, ccsr);
+    dev = qdev_create(NULL, "e500-ccsr");
+    object_property_add_child(qdev_get_machine(), "e500-ccsr",
+                              OBJECT(dev), NULL);
+    qdev_init_nofail(dev);
+    ccsr = CCSR(dev);
+    ccsr_addr_space = &ccsr->ccsr_space;
+    memory_region_add_subregion(address_space_mem, MPC8544_CCSRBAR_BASE,
+                                ccsr_addr_space);
 
     /* MPIC */
-    mpic = mpic_init(ccsr, MPC8544_MPIC_REGS_OFFSET,
+    mpic = mpic_init(ccsr_addr_space, MPC8544_MPIC_REGS_OFFSET,
                      smp_cpus, irqs, NULL);
 
     if (!mpic) {
@@ -494,13 +501,13 @@ void ppce500_init(PPCE500Params *params)
 
     /* Serial */
     if (serial_hds[0]) {
-        serial_mm_init(ccsr, MPC8544_SERIAL0_REGS_OFFSET,
+        serial_mm_init(ccsr_addr_space, MPC8544_SERIAL0_REGS_OFFSET,
                        0, mpic[12+26], 399193,
                        serial_hds[0], DEVICE_BIG_ENDIAN);
     }
 
     if (serial_hds[1]) {
-        serial_mm_init(ccsr, MPC8544_SERIAL1_REGS_OFFSET,
+        serial_mm_init(ccsr_addr_space, MPC8544_SERIAL1_REGS_OFFSET,
                        0, mpic[12+26], 399193,
                        serial_hds[1], DEVICE_BIG_ENDIAN);
     }
@@ -509,7 +516,7 @@ void ppce500_init(PPCE500Params *params)
     dev = qdev_create(NULL, "mpc8544-guts");
     qdev_init_nofail(dev);
     s = SYS_BUS_DEVICE(dev);
-    memory_region_add_subregion(ccsr, MPC8544_UTIL_OFFSET,
+    memory_region_add_subregion(ccsr_addr_space, MPC8544_UTIL_OFFSET,
                                 sysbus_mmio_get_region(s, 0));
 
     /* PCI */
@@ -520,7 +527,7 @@ void ppce500_init(PPCE500Params *params)
     sysbus_connect_irq(s, 1, mpic[pci_irq_nrs[1]]);
     sysbus_connect_irq(s, 2, mpic[pci_irq_nrs[2]]);
     sysbus_connect_irq(s, 3, mpic[pci_irq_nrs[3]]);
-    memory_region_add_subregion(ccsr, MPC8544_PCI_REGS_OFFSET,
+    memory_region_add_subregion(ccsr_addr_space, MPC8544_PCI_REGS_OFFSET,
                                 sysbus_mmio_get_region(s, 0));
 
     pci_bus = (PCIBus *)qdev_get_child_bus(dev, "pci.0");
@@ -595,3 +602,33 @@ void ppce500_init(PPCE500Params *params)
         kvmppc_init();
     }
 }
+
+static int e500_ccsr_initfn(SysBusDevice *dev)
+{
+    PPCE500CCSRState *ccsr;
+
+    ccsr = CCSR(dev);
+    memory_region_init(&ccsr->ccsr_space, "e500-ccsr",
+                       MPC8544_CCSRBAR_SIZE);
+    return 0;
+}
+
+static void e500_ccsr_class_init(ObjectClass *klass, void *data)
+{
+    SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
+    k->init = e500_ccsr_initfn;
+}
+
+static const TypeInfo e500_ccsr_info = {
+    .name          = TYPE_CCSR,
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(PPCE500CCSRState),
+    .class_init    = e500_ccsr_class_init,
+};
+
+static void e500_register_types(void)
+{
+    type_register_static(&e500_ccsr_info);
+}
+
+type_init(e500_register_types)
diff --git a/hw/ppce500_pci.c b/hw/ppce500_pci.c
index 2ff7438..54c72b4 100644
--- a/hw/ppce500_pci.c
+++ b/hw/ppce500_pci.c
@@ -15,6 +15,7 @@
  */
 
 #include "hw.h"
+#include "hw/ppc/e500-ccsr.h"
 #include "pci.h"
 #include "pci_host.h"
 #include "bswap.h"
@@ -92,6 +93,19 @@ struct PPCE500PCIState {
     MemoryRegion pio;
 };
 
+#define TYPE_PPC_E500_PCI_BRIDGE "e500-host-bridge"
+#define PPC_E500_PCI_BRIDGE(obj) \
+    OBJECT_CHECK(PPCE500PCIBridgeState, (obj), TYPE_PPC_E500_PCI_BRIDGE)
+
+struct PPCE500PCIBridgeState {
+    /*< private >*/
+    PCIDevice parent;
+    /*< public >*/
+
+    MemoryRegion bar0;
+};
+
+typedef struct PPCE500PCIBridgeState PPCE500PCIBridgeState;
 typedef struct PPCE500PCIState PPCE500PCIState;
 
 static uint64_t pci_reg_read4(void *opaque, hwaddr addr,
@@ -310,6 +324,18 @@ static const VMStateDescription vmstate_ppce500_pci = {
 
 #include "exec-memory.h"
 
+static int e500_pcihost_bridge_initfn(PCIDevice *d)
+{
+    PPCE500PCIBridgeState *b = PPC_E500_PCI_BRIDGE(d);
+    PPCE500CCSRState *ccsr = CCSR(container_get(qdev_get_machine(),
+                                  "/e500-ccsr"));
+
+    memory_region_init_alias(&b->bar0, "e500-pci-bar0", &ccsr->ccsr_space,
+                             0, int128_get64(ccsr->ccsr_space.size));
+    pci_register_bar(d, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &b->bar0);
+    return 0;
+}
+
 static int e500_pcihost_initfn(SysBusDevice *dev)
 {
     PCIHostState *h;
@@ -355,6 +381,7 @@ static void e500_host_bridge_class_init(ObjectClass *klass, void *data)
     DeviceClass *dc = DEVICE_CLASS(klass);
     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
 
+    k->init = e500_pcihost_bridge_initfn;
     k->vendor_id = PCI_VENDOR_ID_FREESCALE;
     k->device_id = PCI_DEVICE_ID_MPC8533E;
     k->class_id = PCI_CLASS_PROCESSOR_POWERPC;
@@ -364,7 +391,7 @@ static void e500_host_bridge_class_init(ObjectClass *klass, void *data)
 static const TypeInfo e500_host_bridge_info = {
     .name          = "e500-host-bridge",
     .parent        = TYPE_PCI_DEVICE,
-    .instance_size = sizeof(PCIDevice),
+    .instance_size = sizeof(PPCE500PCIBridgeState),
     .class_init    = e500_host_bridge_class_init,
 };
 
commit dffb1dc29fb364aaafc41b34100a06517d7f065e
Author: Bharat Bhushan <r65777 at freescale.com>
Date:   Wed Oct 10 04:28:27 2012 +0000

    e500: Adding CCSR memory region
    
    All devices are also placed under CCSR memory region.
    The CCSR memory region is exported to pci device. The MSI interrupt
    generation is the main reason to export the CCSR region to PCI device.
    This put the requirement to move mpic under CCSR region, but logically
    all devices should be under CCSR. So this patch places all emulated
    devices under ccsr region.
    
    Signed-off-by: Bharat Bhushan <bharat.bhushan at freescale.com>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c
index 6749fff..8538933 100644
--- a/hw/ppc/e500.c
+++ b/hw/ppc/e500.c
@@ -46,13 +46,15 @@
 /* TODO: parameterize */
 #define MPC8544_CCSRBAR_BASE       0xE0000000ULL
 #define MPC8544_CCSRBAR_SIZE       0x00100000ULL
-#define MPC8544_MPIC_REGS_BASE     (MPC8544_CCSRBAR_BASE + 0x40000ULL)
-#define MPC8544_SERIAL0_REGS_BASE  (MPC8544_CCSRBAR_BASE + 0x4500ULL)
-#define MPC8544_SERIAL1_REGS_BASE  (MPC8544_CCSRBAR_BASE + 0x4600ULL)
-#define MPC8544_PCI_REGS_BASE      (MPC8544_CCSRBAR_BASE + 0x8000ULL)
+#define MPC8544_MPIC_REGS_OFFSET   0x40000ULL
+#define MPC8544_SERIAL0_REGS_OFFSET 0x4500ULL
+#define MPC8544_SERIAL1_REGS_OFFSET 0x4600ULL
+#define MPC8544_PCI_REGS_OFFSET    0x8000ULL
+#define MPC8544_PCI_REGS_BASE      (MPC8544_CCSRBAR_BASE + \
+                                    MPC8544_PCI_REGS_OFFSET)
 #define MPC8544_PCI_REGS_SIZE      0x1000ULL
 #define MPC8544_PCI_IO             0xE1000000ULL
-#define MPC8544_UTIL_BASE          (MPC8544_CCSRBAR_BASE + 0xe0000ULL)
+#define MPC8544_UTIL_OFFSET        0xe0000ULL
 #define MPC8544_SPIN_BASE          0xEF000000ULL
 
 struct boot_info
@@ -267,13 +269,12 @@ static int ppce500_load_device_tree(CPUPPCState *env,
     /* XXX should contain a reasonable value */
     qemu_devtree_setprop_cell(fdt, soc, "bus-frequency", 0);
 
-    snprintf(mpic, sizeof(mpic), "%s/pic@%llx", soc,
-             MPC8544_MPIC_REGS_BASE - MPC8544_CCSRBAR_BASE);
+    snprintf(mpic, sizeof(mpic), "%s/pic@%llx", soc, MPC8544_MPIC_REGS_OFFSET);
     qemu_devtree_add_subnode(fdt, mpic);
     qemu_devtree_setprop_string(fdt, mpic, "device_type", "open-pic");
     qemu_devtree_setprop_string(fdt, mpic, "compatible", "chrp,open-pic");
-    qemu_devtree_setprop_cells(fdt, mpic, "reg", MPC8544_MPIC_REGS_BASE -
-                               MPC8544_CCSRBAR_BASE, 0x40000);
+    qemu_devtree_setprop_cells(fdt, mpic, "reg", MPC8544_MPIC_REGS_OFFSET,
+                               0x40000);
     qemu_devtree_setprop_cell(fdt, mpic, "#address-cells", 0);
     qemu_devtree_setprop_cell(fdt, mpic, "#interrupt-cells", 2);
     mpic_ph = qemu_devtree_alloc_phandle(fdt);
@@ -286,17 +287,16 @@ static int ppce500_load_device_tree(CPUPPCState *env,
      * device it finds in the dt as serial output device. And we generate
      * devices in reverse order to the dt.
      */
-    dt_serial_create(fdt, MPC8544_SERIAL1_REGS_BASE - MPC8544_CCSRBAR_BASE,
+    dt_serial_create(fdt, MPC8544_SERIAL1_REGS_OFFSET,
                      soc, mpic, "serial1", 1, false);
-    dt_serial_create(fdt, MPC8544_SERIAL0_REGS_BASE - MPC8544_CCSRBAR_BASE,
+    dt_serial_create(fdt, MPC8544_SERIAL0_REGS_OFFSET,
                      soc, mpic, "serial0", 0, true);
 
     snprintf(gutil, sizeof(gutil), "%s/global-utilities@%llx", soc,
-             MPC8544_UTIL_BASE - MPC8544_CCSRBAR_BASE);
+             MPC8544_UTIL_OFFSET);
     qemu_devtree_add_subnode(fdt, gutil);
     qemu_devtree_setprop_string(fdt, gutil, "compatible", "fsl,mpc8544-guts");
-    qemu_devtree_setprop_cells(fdt, gutil, "reg", MPC8544_UTIL_BASE -
-                               MPC8544_CCSRBAR_BASE, 0x1000);
+    qemu_devtree_setprop_cells(fdt, gutil, "reg", MPC8544_UTIL_OFFSET, 0x1000);
     qemu_devtree_setprop(fdt, gutil, "fsl,has-rstcr", NULL, 0);
 
     snprintf(pci, sizeof(pci), "/pci@%llx", MPC8544_PCI_REGS_BASE);
@@ -422,6 +422,8 @@ void ppce500_init(PPCE500Params *params)
     qemu_irq **irqs, *mpic;
     DeviceState *dev;
     CPUPPCState *firstenv = NULL;
+    MemoryRegion *ccsr;
+    SysBusDevice *s;
 
     /* Setup CPUs */
     if (params->cpu_model == NULL) {
@@ -450,7 +452,8 @@ void ppce500_init(PPCE500Params *params)
         irqs[i][OPENPIC_OUTPUT_INT] = input[PPCE500_INPUT_INT];
         irqs[i][OPENPIC_OUTPUT_CINT] = input[PPCE500_INPUT_CINT];
         env->spr[SPR_BOOKE_PIR] = env->cpu_index = i;
-        env->mpic_cpu_base = MPC8544_MPIC_REGS_BASE + 0x20000;
+        env->mpic_cpu_base = MPC8544_CCSRBAR_BASE +
+                              MPC8544_MPIC_REGS_OFFSET + 0x20000;
 
         ppc_booke_timers_init(env, 400000000, PPC_TIMER_E500);
 
@@ -477,8 +480,12 @@ void ppce500_init(PPCE500Params *params)
     vmstate_register_ram_global(ram);
     memory_region_add_subregion(address_space_mem, 0, ram);
 
+    ccsr = g_malloc0(sizeof(MemoryRegion));
+    memory_region_init(ccsr, "e500-ccsr", MPC8544_CCSRBAR_SIZE);
+    memory_region_add_subregion(address_space_mem, MPC8544_CCSRBAR_BASE, ccsr);
+
     /* MPIC */
-    mpic = mpic_init(address_space_mem, MPC8544_MPIC_REGS_BASE,
+    mpic = mpic_init(ccsr, MPC8544_MPIC_REGS_OFFSET,
                      smp_cpus, irqs, NULL);
 
     if (!mpic) {
@@ -487,25 +494,35 @@ void ppce500_init(PPCE500Params *params)
 
     /* Serial */
     if (serial_hds[0]) {
-        serial_mm_init(address_space_mem, MPC8544_SERIAL0_REGS_BASE,
+        serial_mm_init(ccsr, MPC8544_SERIAL0_REGS_OFFSET,
                        0, mpic[12+26], 399193,
                        serial_hds[0], DEVICE_BIG_ENDIAN);
     }
 
     if (serial_hds[1]) {
-        serial_mm_init(address_space_mem, MPC8544_SERIAL1_REGS_BASE,
+        serial_mm_init(ccsr, MPC8544_SERIAL1_REGS_OFFSET,
                        0, mpic[12+26], 399193,
                        serial_hds[1], DEVICE_BIG_ENDIAN);
     }
 
     /* General Utility device */
-    sysbus_create_simple("mpc8544-guts", MPC8544_UTIL_BASE, NULL);
+    dev = qdev_create(NULL, "mpc8544-guts");
+    qdev_init_nofail(dev);
+    s = SYS_BUS_DEVICE(dev);
+    memory_region_add_subregion(ccsr, MPC8544_UTIL_OFFSET,
+                                sysbus_mmio_get_region(s, 0));
 
     /* PCI */
-    dev = sysbus_create_varargs("e500-pcihost", MPC8544_PCI_REGS_BASE,
-                                mpic[pci_irq_nrs[0]], mpic[pci_irq_nrs[1]],
-                                mpic[pci_irq_nrs[2]], mpic[pci_irq_nrs[3]],
-                                NULL);
+    dev = qdev_create(NULL, "e500-pcihost");
+    qdev_init_nofail(dev);
+    s = SYS_BUS_DEVICE(dev);
+    sysbus_connect_irq(s, 0, mpic[pci_irq_nrs[0]]);
+    sysbus_connect_irq(s, 1, mpic[pci_irq_nrs[1]]);
+    sysbus_connect_irq(s, 2, mpic[pci_irq_nrs[2]]);
+    sysbus_connect_irq(s, 3, mpic[pci_irq_nrs[3]]);
+    memory_region_add_subregion(ccsr, MPC8544_PCI_REGS_OFFSET,
+                                sysbus_mmio_get_region(s, 0));
+
     pci_bus = (PCIBus *)qdev_get_child_bus(dev, "pci.0");
     if (!pci_bus)
         printf("couldn't create PCI controller!\n");
commit 4fd50339c0b55fa6387fa3c28f755c306997064c
Author: David Gibson <david at gibson.dropbear.id.au>
Date:   Mon Nov 12 16:46:58 2012 +0000

    pseries: Update SLOF for NVRAM support
    
    Now that we have implemented PAPR compatible NVRAM interfaces in qemu, this
    updates the SLOF firmware to actually initialize and use the NVRAM as a
    PAPR guest firmware is expected to do.
    
    This SLOF update also includes an ugly but useful workaround for a bug in
    the SLES11 installer which caused it to fail under KVM.
    
    Signed-off-by: David Gibson <david at gibson.dropbear.id.au>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/pc-bios/README b/pc-bios/README
index 3037130..eff3de7 100644
--- a/pc-bios/README
+++ b/pc-bios/README
@@ -17,7 +17,7 @@
 - SLOF (Slimline Open Firmware) is a free IEEE 1275 Open Firmware
   implementation for certain IBM POWER hardware.  The sources are at
   https://github.com/dgibson/SLOF, and the image currently in qemu is
-  built from git tag qemu-slof-20120731.
+  built from git tag qemu-slof-20121018.
 
 - sgabios (the Serial Graphics Adapter option ROM) provides a means for
   legacy x86 software to communicate with an attached serial console as
diff --git a/pc-bios/slof.bin b/pc-bios/slof.bin
index 84ba6b8..3410f4f 100644
Binary files a/pc-bios/slof.bin and b/pc-bios/slof.bin differ
diff --git a/roms/SLOF b/roms/SLOF
index f21f7a3..0ad10f2 160000
--- a/roms/SLOF
+++ b/roms/SLOF
@@ -1 +1 @@
-Subproject commit f21f7a3f46b557eb5923f899ce8b4401b3cc6d91
+Subproject commit 0ad10f26c94a86a0c9c3970e53f9a9f6a744055d
commit 639e8102ae71ce2e46ebeffc6080767e573c0c56
Author: David Gibson <david at gibson.dropbear.id.au>
Date:   Mon Nov 12 16:46:57 2012 +0000

    pseries: Implement PAPR NVRAM
    
    The PAPR specification requires a certain amount of NVRAM, accessed via
    RTAS, which we don't currently implement in qemu.  This patch addresses
    this deficiency, implementing the NVRAM as a VIO device, with some glue to
    instantiate it automatically based on a machine option.
    
    The machine option specifies a drive id, which is used to back the NVRAM,
    making it persistent.  If nothing is specified, the driver instead simply
    allocates space for the NVRAM, which will not be persistent
    
    Signed-off-by: David Gibson <david at gibson.dropbear.id.au>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs
index 8fe2123..4492127 100644
--- a/hw/ppc/Makefile.objs
+++ b/hw/ppc/Makefile.objs
@@ -11,7 +11,7 @@ obj-y += ppc_newworld.o
 obj-$(CONFIG_PSERIES) += spapr.o spapr_hcall.o spapr_rtas.o spapr_vio.o
 obj-$(CONFIG_PSERIES) += xics.o spapr_vty.o spapr_llan.o spapr_vscsi.o
 obj-$(CONFIG_PSERIES) += spapr_pci.o pci-hotplug.o spapr_iommu.o
-obj-$(CONFIG_PSERIES) += spapr_events.o
+obj-$(CONFIG_PSERIES) += spapr_events.o spapr_nvram.o
 # PowerPC 4xx boards
 obj-y += ppc4xx_devs.o ppc4xx_pci.o ppc405_uc.o ppc405_boards.o
 obj-y += ppc440_bamboo.o
diff --git a/hw/spapr.c b/hw/spapr.c
index eafee03..d23aa9d 100644
--- a/hw/spapr.c
+++ b/hw/spapr.c
@@ -657,6 +657,36 @@ static void spapr_cpu_reset(void *opaque)
         (spapr->htab_shift - 18);
 }
 
+static void spapr_create_nvram(sPAPREnvironment *spapr)
+{
+    QemuOpts *machine_opts;
+    DeviceState *dev;
+
+    dev = qdev_create(&spapr->vio_bus->bus, "spapr-nvram");
+
+    machine_opts = qemu_opts_find(qemu_find_opts("machine"), 0);
+    if (machine_opts) {
+        const char *drivename;
+
+        drivename = qemu_opt_get(machine_opts, "nvram");
+        if (drivename) {
+            BlockDriverState *bs;
+
+            bs = bdrv_find(drivename);
+            if (!bs) {
+                fprintf(stderr, "No such block device \"%s\" for nvram\n",
+                        drivename);
+                exit(1);
+            }
+            qdev_prop_set_drive_nofail(dev, "drive", bs);
+        }
+    }
+
+    qdev_init_nofail(dev);
+
+    spapr->nvram = (struct sPAPRNVRAM *)dev;
+}
+
 /* Returns whether we want to use VGA or not */
 static int spapr_vga_init(PCIBus *pci_bus)
 {
@@ -818,6 +848,9 @@ static void ppc_spapr_init(QEMUMachineInitArgs *args)
         }
     }
 
+    /* We always have at least the nvram device on VIO */
+    spapr_create_nvram(spapr);
+
     /* Set up PCI */
     spapr_pci_rtas_init();
 
diff --git a/hw/spapr.h b/hw/spapr.h
index 971a50a..600722f 100644
--- a/hw/spapr.h
+++ b/hw/spapr.h
@@ -6,11 +6,13 @@
 
 struct VIOsPAPRBus;
 struct sPAPRPHBState;
+struct sPAPRNVRAM;
 struct icp_state;
 
 typedef struct sPAPREnvironment {
     struct VIOsPAPRBus *vio_bus;
     QLIST_HEAD(, sPAPRPHBState) phbs;
+    struct sPAPRNVRAM *nvram;
     struct icp_state *icp;
 
     hwaddr ram_limit;
diff --git a/hw/spapr_nvram.c b/hw/spapr_nvram.c
new file mode 100644
index 0000000..641de48
--- /dev/null
+++ b/hw/spapr_nvram.c
@@ -0,0 +1,196 @@
+/*
+ * QEMU sPAPR NVRAM emulation
+ *
+ * Copyright (C) 2012 David Gibson, IBM Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include <sys/mman.h>
+#include <libfdt.h>
+
+#include "device_tree.h"
+#include "hw/sysbus.h"
+#include "hw/spapr.h"
+#include "hw/spapr_vio.h"
+
+typedef struct sPAPRNVRAM {
+    VIOsPAPRDevice sdev;
+    uint32_t size;
+    uint8_t *buf;
+    BlockDriverState *drive;
+} sPAPRNVRAM;
+
+#define MIN_NVRAM_SIZE 8192
+#define DEFAULT_NVRAM_SIZE 16384
+#define MAX_NVRAM_SIZE (UINT16_MAX * 16)
+
+static void rtas_nvram_fetch(sPAPREnvironment *spapr,
+                             uint32_t token, uint32_t nargs,
+                             target_ulong args,
+                             uint32_t nret, target_ulong rets)
+{
+    sPAPRNVRAM *nvram = spapr->nvram;
+    hwaddr offset, buffer, len;
+    int alen;
+    void *membuf;
+
+    if ((nargs != 3) || (nret != 2)) {
+        rtas_st(rets, 0, -3);
+        return;
+    }
+
+    if (!nvram) {
+        rtas_st(rets, 0, -1);
+        rtas_st(rets, 1, 0);
+        return;
+    }
+
+    offset = rtas_ld(args, 0);
+    buffer = rtas_ld(args, 1);
+    len = rtas_ld(args, 2);
+
+    if (((offset + len) < offset)
+        || ((offset + len) > nvram->size)) {
+        rtas_st(rets, 0, -3);
+        rtas_st(rets, 1, 0);
+        return;
+    }
+
+    membuf = cpu_physical_memory_map(buffer, &len, 1);
+    if (nvram->drive) {
+        alen = bdrv_pread(nvram->drive, offset, membuf, len);
+    } else {
+        assert(nvram->buf);
+
+        memcpy(membuf, nvram->buf + offset, len);
+        alen = len;
+    }
+    cpu_physical_memory_unmap(membuf, len, 1, len);
+
+    rtas_st(rets, 0, (alen < len) ? -1 : 0);
+    rtas_st(rets, 1, (alen < 0) ? 0 : alen);
+}
+
+static void rtas_nvram_store(sPAPREnvironment *spapr,
+                             uint32_t token, uint32_t nargs,
+                             target_ulong args,
+                             uint32_t nret, target_ulong rets)
+{
+    sPAPRNVRAM *nvram = spapr->nvram;
+    hwaddr offset, buffer, len;
+    int alen;
+    void *membuf;
+
+    if ((nargs != 3) || (nret != 2)) {
+        rtas_st(rets, 0, -3);
+        return;
+    }
+
+    if (!nvram) {
+        rtas_st(rets, 0, -1);
+        return;
+    }
+
+    offset = rtas_ld(args, 0);
+    buffer = rtas_ld(args, 1);
+    len = rtas_ld(args, 2);
+
+    if (((offset + len) < offset)
+        || ((offset + len) > nvram->size)) {
+        rtas_st(rets, 0, -3);
+        return;
+    }
+
+    membuf = cpu_physical_memory_map(buffer, &len, 0);
+    if (nvram->drive) {
+        alen = bdrv_pwrite(nvram->drive, offset, membuf, len);
+    } else {
+        assert(nvram->buf);
+
+        memcpy(nvram->buf + offset, membuf, len);
+        alen = len;
+    }
+    cpu_physical_memory_unmap(membuf, len, 0, len);
+
+    rtas_st(rets, 0, (alen < len) ? -1 : 0);
+    rtas_st(rets, 1, (alen < 0) ? 0 : alen);
+}
+
+static int spapr_nvram_init(VIOsPAPRDevice *dev)
+{
+    sPAPRNVRAM *nvram = (sPAPRNVRAM *)dev;
+
+    if (nvram->drive) {
+        nvram->size = bdrv_getlength(nvram->drive);
+    } else {
+        nvram->size = DEFAULT_NVRAM_SIZE;
+        nvram->buf = g_malloc0(nvram->size);
+    }
+
+    if ((nvram->size < MIN_NVRAM_SIZE) || (nvram->size > MAX_NVRAM_SIZE)) {
+        fprintf(stderr, "spapr-nvram must be between %d and %d bytes in size\n",
+                MIN_NVRAM_SIZE, MAX_NVRAM_SIZE);
+        return -1;
+    }
+
+    spapr_rtas_register("nvram-fetch", rtas_nvram_fetch);
+    spapr_rtas_register("nvram-store", rtas_nvram_store);
+
+    return 0;
+}
+
+static int spapr_nvram_devnode(VIOsPAPRDevice *dev, void *fdt, int node_off)
+{
+    sPAPRNVRAM *nvram = (sPAPRNVRAM *)dev;
+
+    return fdt_setprop_cell(fdt, node_off, "#bytes", nvram->size);
+}
+
+static Property spapr_nvram_properties[] = {
+    DEFINE_SPAPR_PROPERTIES(sPAPRNVRAM, sdev),
+    DEFINE_PROP_DRIVE("drive", sPAPRNVRAM, drive),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void spapr_nvram_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    VIOsPAPRDeviceClass *k = VIO_SPAPR_DEVICE_CLASS(klass);
+
+    k->init = spapr_nvram_init;
+    k->devnode = spapr_nvram_devnode;
+    k->dt_name = "nvram";
+    k->dt_type = "nvram";
+    k->dt_compatible = "qemu,spapr-nvram";
+    dc->props = spapr_nvram_properties;
+}
+
+static const TypeInfo spapr_nvram_type_info = {
+    .name          = "spapr-nvram",
+    .parent        = TYPE_VIO_SPAPR_DEVICE,
+    .instance_size = sizeof(sPAPRNVRAM),
+    .class_init    = spapr_nvram_class_init,
+};
+
+static void spapr_nvram_register_types(void)
+{
+    type_register_static(&spapr_nvram_type_info);
+}
+
+type_init(spapr_nvram_register_types)
diff --git a/qemu-config.c b/qemu-config.c
index aa78fb9..de10051 100644
--- a/qemu-config.c
+++ b/qemu-config.c
@@ -579,6 +579,10 @@ static QemuOptsList qemu_machine_opts = {
             .name = "usb",
             .type = QEMU_OPT_BOOL,
             .help = "Set on/off to enable/disable usb",
+        }, {
+            .name = "nvram",
+            .type = QEMU_OPT_STRING,
+            .help = "Drive backing persistent NVRAM",
         },
         { /* End of list */ }
     },
commit 22a2611c9cef4a8c8ad96fe17b3511a6cc5fb3a1
Author: David Gibson <david at gibson.dropbear.id.au>
Date:   Mon Nov 12 16:46:55 2012 +0000

    pseries: Split xics irq configuration from state information
    
    Currently the XICS irq controller code has a per-irq state structure which
    amongst other things includes whether the interrupt is level or message
    triggered - this is configured by the platform code, and is not directly
    visible to the guest.  This leads to a slightly awkward construct at reset
    time where we need to reset everything in the state structure _except_ the
    lsi/msi flag, which needs to retain the information given at platform init
    time.
    
    More importantly this flag will make matching the qemu state to the KVM
    state for the upcoming in-kernel XICS implementation more awkward.  This
    patch, therefore, removes this flag from the per-irq state structure,
    instead adding a parallel array giving the lsi/msi configuration per irq.
    
    Signed-off-by: David Gibson <david at gibson.dropbear.id.au>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/xics.c b/hw/xics.c
index 33f99c7..55899ce 100644
--- a/hw/xics.c
+++ b/hw/xics.c
@@ -179,13 +179,13 @@ struct ics_irq_state {
 #define XICS_STATUS_REJECTED           0x4
 #define XICS_STATUS_MASKED_PENDING     0x8
     uint8_t status;
-    bool lsi;
 };
 
 struct ics_state {
     int nr_irqs;
     int offset;
     qemu_irq *qirqs;
+    bool *islsi;
     struct ics_irq_state *irqs;
     struct icp_state *icp;
 };
@@ -254,9 +254,8 @@ static void set_irq_lsi(struct ics_state *ics, int srcno, int val)
 static void ics_set_irq(void *opaque, int srcno, int val)
 {
     struct ics_state *ics = (struct ics_state *)opaque;
-    struct ics_irq_state *irq = ics->irqs + srcno;
 
-    if (irq->lsi) {
+    if (ics->islsi[srcno]) {
         set_irq_lsi(ics, srcno, val);
     } else {
         set_irq_msi(ics, srcno, val);
@@ -293,7 +292,7 @@ static void ics_write_xive(struct ics_state *ics, int nr, int server,
 
     trace_xics_ics_write_xive(nr, srcno, server, priority);
 
-    if (irq->lsi) {
+    if (ics->islsi[srcno]) {
         write_xive_lsi(ics, srcno);
     } else {
         write_xive_msi(ics, srcno);
@@ -314,10 +313,8 @@ static void ics_resend(struct ics_state *ics)
     int i;
 
     for (i = 0; i < ics->nr_irqs; i++) {
-        struct ics_irq_state *irq = ics->irqs + i;
-
         /* FIXME: filter by server#? */
-        if (irq->lsi) {
+        if (ics->islsi[i]) {
             resend_lsi(ics, i);
         } else {
             resend_msi(ics, i);
@@ -332,7 +329,7 @@ static void ics_eoi(struct ics_state *ics, int nr)
 
     trace_xics_ics_eoi(nr);
 
-    if (irq->lsi) {
+    if (ics->islsi[srcno]) {
         irq->status &= ~XICS_STATUS_SENT;
     }
 }
@@ -354,7 +351,7 @@ void xics_set_irq_type(struct icp_state *icp, int irq, bool lsi)
 {
     assert(ics_valid_irq(icp->ics, irq));
 
-    icp->ics->irqs[irq - icp->ics->offset].lsi = lsi;
+    icp->ics->islsi[irq - icp->ics->offset] = lsi;
 }
 
 static target_ulong h_cppr(PowerPCCPU *cpu, sPAPREnvironment *spapr,
@@ -518,10 +515,8 @@ static void xics_reset(void *opaque)
         qemu_set_irq(icp->ss[i].output, 0);
     }
 
+    memset(ics->irqs, 0, sizeof(struct ics_irq_state) * ics->nr_irqs);
     for (i = 0; i < ics->nr_irqs; i++) {
-        /* Reset everything *except* the type */
-        ics->irqs[i].server = 0;
-        ics->irqs[i].status = 0;
         ics->irqs[i].priority = 0xff;
         ics->irqs[i].saved_priority = 0xff;
     }
@@ -568,6 +563,7 @@ struct icp_state *xics_system_init(int nr_irqs)
     ics->nr_irqs = nr_irqs;
     ics->offset = XICS_IRQ_BASE;
     ics->irqs = g_malloc0(nr_irqs * sizeof(struct ics_irq_state));
+    ics->islsi = g_malloc0(nr_irqs * sizeof(bool));
 
     icp->ics = ics;
     ics->icp = icp;
commit 500efa2319d1f1074b1d61e5ceb7a0fd61d0831d
Author: David Gibson <david at gibson.dropbear.id.au>
Date:   Mon Nov 12 16:46:54 2012 +0000

    pseries: Add tracepoints to the XICS interrupt controller
    
    This patch adds tracing / debugging calls to the XICS interrupt controller
    implementation used on the pseries machine.
    
    Signed-off-by: Ben Herrenschmidt <benh at kernel.crashing.org>
    Signed-off-by: David Gibson <david at gibson.dropbear.id.au>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/xics.c b/hw/xics.c
index b8887cd..33f99c7 100644
--- a/hw/xics.c
+++ b/hw/xics.c
@@ -26,6 +26,7 @@
  */
 
 #include "hw.h"
+#include "trace.h"
 #include "hw/spapr.h"
 #include "hw/xics.h"
 
@@ -66,6 +67,8 @@ static void icp_check_ipi(struct icp_state *icp, int server)
         return;
     }
 
+    trace_xics_icp_check_ipi(server, ss->mfrr);
+
     if (XISR(ss)) {
         ics_reject(icp->ics, XISR(ss));
     }
@@ -120,11 +123,13 @@ static void icp_set_mfrr(struct icp_state *icp, int server, uint8_t mfrr)
 
 static uint32_t icp_accept(struct icp_server_state *ss)
 {
-    uint32_t xirr;
+    uint32_t xirr = ss->xirr;
 
     qemu_irq_lower(ss->output);
-    xirr = ss->xirr;
     ss->xirr = ss->pending_priority << 24;
+
+    trace_xics_icp_accept(xirr, ss->xirr);
+
     return xirr;
 }
 
@@ -134,6 +139,7 @@ static void icp_eoi(struct icp_state *icp, int server, uint32_t xirr)
 
     /* Send EOI -> ICS */
     ss->xirr = (ss->xirr & ~CPPR_MASK) | (xirr & CPPR_MASK);
+    trace_xics_icp_eoi(server, xirr, ss->xirr);
     ics_eoi(icp->ics, xirr & XISR_MASK);
     if (!XISR(ss)) {
         icp_resend(icp, server);
@@ -144,6 +150,8 @@ static void icp_irq(struct icp_state *icp, int server, int nr, uint8_t priority)
 {
     struct icp_server_state *ss = icp->ss + server;
 
+    trace_xics_icp_irq(server, nr, priority);
+
     if ((priority >= CPPR(ss))
         || (XISR(ss) && (ss->pending_priority <= priority))) {
         ics_reject(icp->ics, nr);
@@ -153,6 +161,7 @@ static void icp_irq(struct icp_state *icp, int server, int nr, uint8_t priority)
         }
         ss->xirr = (ss->xirr & ~XISR_MASK) | (nr & XISR_MASK);
         ss->pending_priority = priority;
+        trace_xics_icp_raise(ss->xirr, ss->pending_priority);
         qemu_irq_raise(ss->output);
     }
 }
@@ -217,10 +226,12 @@ static void set_irq_msi(struct ics_state *ics, int srcno, int val)
 {
     struct ics_irq_state *irq = ics->irqs + srcno;
 
+    trace_xics_set_irq_msi(srcno, srcno + ics->offset);
+
     if (val) {
         if (irq->priority == 0xff) {
             irq->status |= XICS_STATUS_MASKED_PENDING;
-            /* masked pending */ ;
+            trace_xics_masked_pending();
         } else  {
             icp_irq(ics->icp, irq->server, srcno + ics->offset, irq->priority);
         }
@@ -231,6 +242,7 @@ static void set_irq_lsi(struct ics_state *ics, int srcno, int val)
 {
     struct ics_irq_state *irq = ics->irqs + srcno;
 
+    trace_xics_set_irq_lsi(srcno, srcno + ics->offset);
     if (val) {
         irq->status |= XICS_STATUS_ASSERTED;
     } else {
@@ -279,6 +291,8 @@ static void ics_write_xive(struct ics_state *ics, int nr, int server,
     irq->priority = priority;
     irq->saved_priority = saved_priority;
 
+    trace_xics_ics_write_xive(nr, srcno, server, priority);
+
     if (irq->lsi) {
         write_xive_lsi(ics, srcno);
     } else {
@@ -290,6 +304,7 @@ static void ics_reject(struct ics_state *ics, int nr)
 {
     struct ics_irq_state *irq = ics->irqs + nr - ics->offset;
 
+    trace_xics_ics_reject(nr, nr - ics->offset);
     irq->status |= XICS_STATUS_REJECTED; /* Irrelevant but harmless for LSI */
     irq->status &= ~XICS_STATUS_SENT; /* Irrelevant but harmless for MSI */
 }
@@ -315,6 +330,8 @@ static void ics_eoi(struct ics_state *ics, int nr)
     int srcno = nr - ics->offset;
     struct ics_irq_state *irq = ics->irqs + srcno;
 
+    trace_xics_ics_eoi(nr);
+
     if (irq->lsi) {
         irq->status &= ~XICS_STATUS_SENT;
     }
diff --git a/trace-events b/trace-events
index 6c6cbf1..6cb450a 100644
--- a/trace-events
+++ b/trace-events
@@ -1022,3 +1022,16 @@ spapr_pci_rtas_ibm_change_msi(unsigned func, unsigned req) "func %u, requested %
 spapr_pci_rtas_ibm_query_interrupt_source_number(unsigned ioa, unsigned intr) "queries for #%u, IRQ%u"
 spapr_pci_msi_write(uint64_t addr, uint64_t data, uint32_t dt_irq) "@%"PRIx64"<=%"PRIx64" IRQ %u"
 spapr_pci_lsi_set(const char *busname, int pin, uint32_t irq) "%s PIN%d IRQ %u"
+
+# hw/xics.c
+xics_icp_check_ipi(int server, uint8_t mfrr) "CPU %d can take IPI mfrr=%#x"
+xics_icp_accept(uint32_t old_xirr, uint32_t new_xirr) "icp_accept: XIRR %#"PRIx32"->%#"PRIx32
+xics_icp_eoi(int server, uint32_t xirr, uint32_t new_xirr) "icp_eoi: server %d given XIRR %#"PRIx32" new XIRR %#"PRIx32
+xics_icp_irq(int server, int nr, uint8_t priority) "cpu %d trying to deliver irq %#"PRIx32" priority %#x"
+xics_icp_raise(uint32_t xirr, uint8_t pending_priority) "raising IRQ new XIRR=%#x new pending priority=%#x"
+xics_set_irq_msi(int srcno, int nr) "set_irq_msi: srcno %d [irq %#x]"
+xics_masked_pending(void) "set_irq_msi: masked pending"
+xics_set_irq_lsi(int srcno, int nr) "set_irq_lsi: srcno %d [irq %#x]"
+xics_ics_write_xive(int nr, int srcno, int server, uint8_t priority) "ics_write_xive: irq %#x [src %d] server %#x prio %#x"
+xics_ics_reject(int nr, int srcno) "reject irq %#x [src %d]"
+xics_ics_eoi(int nr) "ics_eoi: irq %#x"
commit d36b66f7a420737dcc24de15b35a465ff6b1798d
Author: Ben Herrenschmidt <benh at kernel.crashing.org>
Date:   Mon Nov 12 16:46:53 2012 +0000

    pseries: Allow RTAS tokens without a qemu handler
    
    Kernel-based RTAS calls will not have a qemu handler, but will
    still be registered in qemu in order to be assigned a token
    number and appear in the device-tree.
    
    Let's test for the name being NULL rather than the handler
    when deciding to skip an entry while building the device-tree
    
    Signed-off-by: Benjamin Herrenschmidt <benh at kernel.crashing.org>
    Signed-off-by: David Gibson <david at gibson.dropbear.id.au>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/spapr_rtas.c b/hw/spapr_rtas.c
index 45294e8..e618c2d 100644
--- a/hw/spapr_rtas.c
+++ b/hw/spapr_rtas.c
@@ -301,7 +301,7 @@ int spapr_rtas_device_tree_setup(void *fdt, hwaddr rtas_addr,
     for (i = 0; i < TOKEN_MAX; i++) {
         struct rtas_call *call = &rtas_table[i];
 
-        if (!call->fn) {
+        if (!call->name) {
             continue;
         }
 
commit 4aac82c34675fcbd3722dfc3a2d04c839215ec6b
Author: Michael Ellerman <michael at ellerman.id.au>
Date:   Mon Nov 12 16:46:52 2012 +0000

    pseries: Return the token when we register an RTAS call
    
    The kernel will soon be able to service some RTAS calls. However the
    choice of tokens will still be up to userspace. To support this have
    spapr_rtas_register() return the token that is allocated for an
    RTAS call, that allows the calling code to tell the kernel what the
    token value is.
    
    Signed-off-by: Michael Ellerman <michael at ellerman.id.au>
    Signed-off-by: Benjamin Herrenschmidt <benh at kernel.crashing.org>
    Signed-off-by: David Gibson <david at gibson.dropbear.id.au>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/spapr.h b/hw/spapr.h
index efe7f57..971a50a 100644
--- a/hw/spapr.h
+++ b/hw/spapr.h
@@ -320,7 +320,7 @@ static inline void rtas_st(target_ulong phys, int n, uint32_t val)
 typedef void (*spapr_rtas_fn)(sPAPREnvironment *spapr, uint32_t token,
                               uint32_t nargs, target_ulong args,
                               uint32_t nret, target_ulong rets);
-void spapr_rtas_register(const char *name, spapr_rtas_fn fn);
+int spapr_rtas_register(const char *name, spapr_rtas_fn fn);
 target_ulong spapr_rtas_call(sPAPREnvironment *spapr,
                              uint32_t token, uint32_t nargs, target_ulong args,
                              uint32_t nret, target_ulong rets);
diff --git a/hw/spapr_rtas.c b/hw/spapr_rtas.c
index 6d5c48a..45294e8 100644
--- a/hw/spapr_rtas.c
+++ b/hw/spapr_rtas.c
@@ -242,7 +242,7 @@ target_ulong spapr_rtas_call(sPAPREnvironment *spapr,
     return H_PARAMETER;
 }
 
-void spapr_rtas_register(const char *name, spapr_rtas_fn fn)
+int spapr_rtas_register(const char *name, spapr_rtas_fn fn)
 {
     int i;
 
@@ -258,7 +258,7 @@ void spapr_rtas_register(const char *name, spapr_rtas_fn fn)
     rtas_next->name = name;
     rtas_next->fn = fn;
 
-    rtas_next++;
+    return (rtas_next++ - rtas_table) + TOKEN_BASE;
 }
 
 int spapr_rtas_device_tree_setup(void *fdt, hwaddr rtas_addr,
commit bf3bc4c4e992fb9914e2f1f7e8a569394d298b57
Author: Ben Herrenschmidt <benh at kernel.crashing.org>
Date:   Mon Nov 12 16:46:50 2012 +0000

    pseries: Use #define for XICS base irq number
    
    Currently the lowest "real" irq number for the XICS irq controller (as
    opposed to numbers reserved for IPIs and other special purposes) is
    hard coded as 16 in two places - in xics_system_init() and in spapr.c.
    
    As well as being generally bad practice, we're going to need to change this
    number soon to fit in with the in-kernel XICS implementation.  This patch
    adds a #define for this number to avoid future breakage.
    
    Signed-off-by: Michael Ellerman <michael at ellerman.id.au>
    Signed-off-by: Ben Herrenschmidt <benh at kernel.crashing.org>
    Signed-off-by: David Gibson <david at gibson.dropbear.id.au>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/spapr.c b/hw/spapr.c
index ad3f0ea..eafee03 100644
--- a/hw/spapr.c
+++ b/hw/spapr.c
@@ -801,7 +801,7 @@ static void ppc_spapr_init(QEMUMachineInitArgs *args)
 
     /* Set up Interrupt Controller */
     spapr->icp = xics_system_init(XICS_IRQS);
-    spapr->next_irq = 16;
+    spapr->next_irq = XICS_IRQ_BASE;
 
     /* Set up EPOW events infrastructure */
     spapr_events_init(spapr);
diff --git a/hw/xics.c b/hw/xics.c
index edf5833..b8887cd 100644
--- a/hw/xics.c
+++ b/hw/xics.c
@@ -549,7 +549,7 @@ struct icp_state *xics_system_init(int nr_irqs)
 
     ics = g_malloc0(sizeof(*ics));
     ics->nr_irqs = nr_irqs;
-    ics->offset = 16;
+    ics->offset = XICS_IRQ_BASE;
     ics->irqs = g_malloc0(nr_irqs * sizeof(struct ics_irq_state));
 
     icp->ics = ics;
diff --git a/hw/xics.h b/hw/xics.h
index 6817268..c3bf008 100644
--- a/hw/xics.h
+++ b/hw/xics.h
@@ -28,6 +28,7 @@
 #define __XICS_H__
 
 #define XICS_IPI        0x2
+#define XICS_IRQ_BASE   0x10
 
 struct icp_state;
 
commit 044f4c8b0ee90290b6cbbc616c4be3c8aeffcaab
Author: David Gibson <david at gibson.dropbear.id.au>
Date:   Mon Nov 12 16:46:49 2012 +0000

    pseries: Fix incorrect initialization of interrupt controller
    
    Currently in the reset code for the XICS interrupt controller, we
    initialize the pending_priority field to 0 (most favored, by XICS
    convention).  This is incorrect, since there is no pending interrupt, it
    should be set to least favored - 0xff.  At the moment our XICS
    implementation doesn't get hurt by this edge case, but it does confuse the
    upcoming kernel XICS implementation.
    
    Signed-off-by: David Gibson <david at gibson.dropbear.id.au>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/xics.c b/hw/xics.c
index 1da3106..edf5833 100644
--- a/hw/xics.c
+++ b/hw/xics.c
@@ -495,7 +495,7 @@ static void xics_reset(void *opaque)
 
     for (i = 0; i < icp->nr_servers; i++) {
         icp->ss[i].xirr = 0;
-        icp->ss[i].pending_priority = 0;
+        icp->ss[i].pending_priority = 0xff;
         icp->ss[i].mfrr = 0xff;
         /* Make all outputs are deasserted */
         qemu_set_irq(icp->ss[i].output, 0);
commit e376a788ae130454ad5e797f60cb70d0308babb6
Merge: df93300 226c3c2
Author: Anthony Liguori <aliguori at us.ibm.com>
Date:   Thu Dec 13 14:32:28 2012 -0600

    Merge remote-tracking branch 'kwolf/for-anthony' into staging
    
    * kwolf/for-anthony: (43 commits)
      qcow2: Factor out handle_dependencies()
      qcow2: Execute run_dependent_requests() without lock
      qcow2: Enable dirty flag in qcow2_alloc_cluster_link_l2
      qcow2: Allocate l2meta only for cluster allocations
      qcow2: Drop l2meta.cluster_offset
      qcow2: Allocate l2meta dynamically
      qcow2: Introduce Qcow2COWRegion
      qcow2: Round QCowL2Meta.offset down to cluster boundary
      atapi: reset cdrom tray statuses on ide_reset
      qemu-iotests: Test concurrent cluster allocations
      qcow2: Move BLKDBG_EVENT out of the lock
      qemu-io: Add AIO debugging commands
      blkdebug: Implement suspend/resume of AIO requests
      blkdebug: Factor out remove_rule()
      blkdebug: Allow usage without config file
      create new function: qemu_opt_set_number
      use qemu_opts_create_nofail
      introduce qemu_opts_create_nofail function
      qemu-option: qemu_opt_set_bool(): fix code duplication
      qemu-option: qemu_opts_validate(): fix duplicated code
      ...
    
    Signed-off-by: Anthony Liguori <aliguori at us.ibm.com>

commit df9330070e671134544f872dc5c027443878b764
Merge: aa1246a 871edc5
Author: Anthony Liguori <aliguori at us.ibm.com>
Date:   Thu Dec 13 11:41:57 2012 -0600

    Merge remote-tracking branch 'pmaydell/arm-devs.next' into staging
    
    * pmaydell/arm-devs.next:
      hw/ds1338.c: Fix handling of DAY (wday) register.
      hw/ds1338.c: Implement support for the control register.
      hw/ds1338.c: Ensure state is properly initialized.
      hw/ds1338.c: Fix handling of HOURS register.
      hw/ds1338.c: Add definitions for various flags in the RTC registers.
      hw/ds1338.c: Correct bug in conversion to BCD.
      exynos4210/mct: Avoid infinite loop on non incremental timers
      hw/arm_gic: fix target CPUs affected by set enable/pending ops
      xilinx_zynq: Add one variable to avoid overwriting QSPI bus
      hw/arm_gic_common: Correct GICC_PMR reset value for newer GICs
      hw/arm_gic: Fix comparison with priority mask register
      hw/arm_boot, exynos4210, highbank: Fix secondary boot GIC init
    
    Signed-off-by: Anthony Liguori <aliguori at us.ibm.com>

commit aa1246aee2914424f101a7e462bd1393ececef95
Merge: 5a58598 ff15629
Author: Anthony Liguori <aliguori at us.ibm.com>
Date:   Thu Dec 13 11:41:25 2012 -0600

    Merge remote-tracking branch 'kraxel/seabios-e8a76b0' into staging
    
    * kraxel/seabios-e8a76b0:
      seabios: update to e8a76b0f225bba5ba9d63ab227e0a37b3beb1059
    
    Signed-off-by: Anthony Liguori <aliguori at us.ibm.com>

commit 5a58598090d1349d2bd3e9015e6999dcf87ec5b5
Merge: 45e6cee d281084
Author: Anthony Liguori <aliguori at us.ibm.com>
Date:   Thu Dec 13 11:40:23 2012 -0600

    qMerge remote-tracking branch 'awilliam/tags/vfio-pci-for-qemu-20121210.0' into staging
    
    vfio-pci: fix kvm disabled path
    
    * awilliam/tags/vfio-pci-for-qemu-20121210.0:
      vfio-pci: Don't use kvm_irqchip_in_kernel
    
    Signed-off-by: Anthony Liguori <aliguori at us.ibm.com>

commit 226c3c26b9800b7c6a8d3100e1faad6d2b97b0f5
Author: Kevin Wolf <kwolf at redhat.com>
Date:   Fri Dec 7 18:08:49 2012 +0100

    qcow2: Factor out handle_dependencies()
    
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 7a038ac..468ef1b 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -753,38 +753,16 @@ out:
 }
 
 /*
- * Allocates new clusters for the given guest_offset.
- *
- * At most *nb_clusters are allocated, and on return *nb_clusters is updated to
- * contain the number of clusters that have been allocated and are contiguous
- * in the image file.
- *
- * If *host_offset is non-zero, it specifies the offset in the image file at
- * which the new clusters must start. *nb_clusters can be 0 on return in this
- * case if the cluster at host_offset is already in use. If *host_offset is
- * zero, the clusters can be allocated anywhere in the image file.
- *
- * *host_offset is updated to contain the offset into the image file at which
- * the first allocated cluster starts.
- *
- * Return 0 on success and -errno in error cases. -EAGAIN means that the
- * function has been waiting for another request and the allocation must be
- * restarted, but the whole request should not be failed.
+ * Check if there already is an AIO write request in flight which allocates
+ * the same cluster. In this case we need to wait until the previous
+ * request has completed and updated the L2 table accordingly.
  */
-static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset,
-    uint64_t *host_offset, unsigned int *nb_clusters)
+static int handle_dependencies(BlockDriverState *bs, uint64_t guest_offset,
+    unsigned int *nb_clusters)
 {
     BDRVQcowState *s = bs->opaque;
     QCowL2Meta *old_alloc;
 
-    trace_qcow2_do_alloc_clusters_offset(qemu_coroutine_self(), guest_offset,
-                                         *host_offset, *nb_clusters);
-
-    /*
-     * Check if there already is an AIO write request in flight which allocates
-     * the same cluster. In this case we need to wait until the previous
-     * request has completed and updated the L2 table accordingly.
-     */
     QLIST_FOREACH(old_alloc, &s->cluster_allocs, next_in_flight) {
 
         uint64_t start = guest_offset >> s->cluster_bits;
@@ -817,6 +795,42 @@ static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset,
         abort();
     }
 
+    return 0;
+}
+
+/*
+ * Allocates new clusters for the given guest_offset.
+ *
+ * At most *nb_clusters are allocated, and on return *nb_clusters is updated to
+ * contain the number of clusters that have been allocated and are contiguous
+ * in the image file.
+ *
+ * If *host_offset is non-zero, it specifies the offset in the image file at
+ * which the new clusters must start. *nb_clusters can be 0 on return in this
+ * case if the cluster at host_offset is already in use. If *host_offset is
+ * zero, the clusters can be allocated anywhere in the image file.
+ *
+ * *host_offset is updated to contain the offset into the image file at which
+ * the first allocated cluster starts.
+ *
+ * Return 0 on success and -errno in error cases. -EAGAIN means that the
+ * function has been waiting for another request and the allocation must be
+ * restarted, but the whole request should not be failed.
+ */
+static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset,
+    uint64_t *host_offset, unsigned int *nb_clusters)
+{
+    BDRVQcowState *s = bs->opaque;
+    int ret;
+
+    trace_qcow2_do_alloc_clusters_offset(qemu_coroutine_self(), guest_offset,
+                                         *host_offset, *nb_clusters);
+
+    ret = handle_dependencies(bs, guest_offset, nb_clusters);
+    if (ret < 0) {
+        return ret;
+    }
+
     /* Allocate new clusters */
     trace_qcow2_cluster_alloc_phys(qemu_coroutine_self());
     if (*host_offset == 0) {
@@ -828,7 +842,7 @@ static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset,
         *host_offset = cluster_offset;
         return 0;
     } else {
-        int ret = qcow2_alloc_clusters_at(bs, *host_offset, *nb_clusters);
+        ret = qcow2_alloc_clusters_at(bs, *host_offset, *nb_clusters);
         if (ret < 0) {
             return ret;
         }
commit 4e95314e2bb7baa64f2a9026df5e2649081b7060
Author: Kevin Wolf <kwolf at redhat.com>
Date:   Fri Dec 7 18:08:48 2012 +0100

    qcow2: Execute run_dependent_requests() without lock
    
    There's no reason for run_dependent_requests() to hold s->lock, and a
    later patch will require that in fact the lock is not held.
    
    Also, before this patch, run_dependent_requests() not only does what its
    name suggests, but also removes the l2meta from the list of in-flight
    requests. When changing this, it becomes an one-liner, so just inline it
    completely.
    
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/block/qcow2.c b/block/qcow2.c
index dbcc0ff..8520bda 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -745,21 +745,6 @@ fail:
     return ret;
 }
 
-static void run_dependent_requests(BDRVQcowState *s, QCowL2Meta *m)
-{
-    /* Take the request off the list of running requests */
-    if (m->nb_clusters != 0) {
-        QLIST_REMOVE(m, next_in_flight);
-    }
-
-    /* Restart all dependent requests */
-    if (!qemu_co_queue_empty(&m->dependent_requests)) {
-        qemu_co_mutex_unlock(&s->lock);
-        qemu_co_queue_restart_all(&m->dependent_requests);
-        qemu_co_mutex_lock(&s->lock);
-    }
-}
-
 static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
                            int64_t sector_num,
                            int remaining_sectors,
@@ -845,7 +830,15 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
                 goto fail;
             }
 
-            run_dependent_requests(s, l2meta);
+            /* Take the request off the list of running requests */
+            if (l2meta->nb_clusters != 0) {
+                QLIST_REMOVE(l2meta, next_in_flight);
+            }
+
+            qemu_co_mutex_unlock(&s->lock);
+            qemu_co_queue_restart_all(&l2meta->dependent_requests);
+            qemu_co_mutex_lock(&s->lock);
+
             g_free(l2meta);
             l2meta = NULL;
         }
@@ -858,13 +851,16 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
     ret = 0;
 
 fail:
+    qemu_co_mutex_unlock(&s->lock);
+
     if (l2meta != NULL) {
-        run_dependent_requests(s, l2meta);
+        if (l2meta->nb_clusters != 0) {
+            QLIST_REMOVE(l2meta, next_in_flight);
+        }
+        qemu_co_queue_restart_all(&l2meta->dependent_requests);
         g_free(l2meta);
     }
 
-    qemu_co_mutex_unlock(&s->lock);
-
     qemu_iovec_destroy(&hd_qiov);
     qemu_vfree(cluster_data);
     trace_qcow2_writev_done_req(qemu_coroutine_self(), ret);
@@ -1152,7 +1148,7 @@ static int preallocate(BlockDriverState *bs)
         /* There are no dependent requests, but we need to remove our request
          * from the list of in-flight requests */
         if (meta != NULL) {
-            run_dependent_requests(bs->opaque, meta);
+            QLIST_REMOVE(meta, next_in_flight);
         }
 
         /* TODO Preallocate data if requested */
commit 280d373579558f73a8b70e329d9a6206933d3809
Author: Kevin Wolf <kwolf at redhat.com>
Date:   Fri Dec 7 18:08:47 2012 +0100

    qcow2: Enable dirty flag in qcow2_alloc_cluster_link_l2
    
    This is closer to where the dirty flag is really needed, and it avoids
    having checks for special cases related to cluster allocation directly
    in the writev loop.
    
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index c2b59e7..7a038ac 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -668,11 +668,14 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
     }
 
     /* Update L2 table. */
-
+    if (s->compatible_features & QCOW2_COMPAT_LAZY_REFCOUNTS) {
+        qcow2_mark_dirty(bs);
+    }
     if (qcow2_need_accurate_refcounts(s)) {
         qcow2_cache_set_dependency(bs, s->l2_table_cache,
                                    s->refcount_block_cache);
     }
+
     ret = get_cluster_table(bs, m->offset, &l2_table, &l2_index);
     if (ret < 0) {
         goto err;
diff --git a/block/qcow2.c b/block/qcow2.c
index 08d92cc..dbcc0ff 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -222,7 +222,7 @@ static void report_unsupported_feature(BlockDriverState *bs,
  * updated successfully.  Therefore it is not required to check the return
  * value of this function.
  */
-static int qcow2_mark_dirty(BlockDriverState *bs)
+int qcow2_mark_dirty(BlockDriverState *bs)
 {
     BDRVQcowState *s = bs->opaque;
     uint64_t val;
@@ -803,11 +803,6 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
             goto fail;
         }
 
-        if (l2meta->nb_clusters > 0 &&
-            (s->compatible_features & QCOW2_COMPAT_LAZY_REFCOUNTS)) {
-            qcow2_mark_dirty(bs);
-        }
-
         assert((cluster_offset & 511) == 0);
 
         qemu_iovec_reset(&hd_qiov);
diff --git a/block/qcow2.h b/block/qcow2.h
index 6dc79b5..a60fcb4 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -303,6 +303,8 @@ static inline bool qcow2_need_accurate_refcounts(BDRVQcowState *s)
 /* qcow2.c functions */
 int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov,
                   int64_t sector_num, int nb_sectors);
+
+int qcow2_mark_dirty(BlockDriverState *bs);
 int qcow2_update_header(BlockDriverState *bs);
 
 /* qcow2-refcount.c functions */
commit f50f88b9fea09fef12cc293126cf45dcf0ef600b
Author: Kevin Wolf <kwolf at redhat.com>
Date:   Fri Dec 7 18:08:46 2012 +0100

    qcow2: Allocate l2meta only for cluster allocations
    
    Even for writes to already allocated clusters, an l2meta is allocated,
    though it stays effectively unused. After this patch, only allocating
    requests still have one. Each l2meta now describes an in-flight request
    that writes to clusters that are not yet hooked up in the L2 table.
    
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index c4752ee..c2b59e7 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -652,9 +652,7 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
     uint64_t cluster_offset = m->alloc_offset;
 
     trace_qcow2_cluster_link_l2(qemu_coroutine_self(), m->nb_clusters);
-
-    if (m->nb_clusters == 0)
-        return 0;
+    assert(m->nb_clusters > 0);
 
     old_cluster = g_malloc(m->nb_clusters * sizeof(uint64_t));
 
@@ -856,7 +854,7 @@ static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset,
  * Return 0 on success and -errno in error cases
  */
 int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
-    int n_start, int n_end, int *num, uint64_t *host_offset, QCowL2Meta *m)
+    int n_start, int n_end, int *num, uint64_t *host_offset, QCowL2Meta **m)
 {
     BDRVQcowState *s = bs->opaque;
     int l2_index, ret, sectors;
@@ -928,11 +926,6 @@ again:
     }
 
     /* If there is something left to allocate, do that now */
-    *m = (QCowL2Meta) {
-        .nb_clusters        = 0,
-    };
-    qemu_co_queue_init(&m->dependent_requests);
-
     if (nb_clusters > 0) {
         uint64_t alloc_offset;
         uint64_t alloc_cluster_offset;
@@ -980,7 +973,9 @@ again:
                 cluster_offset = alloc_cluster_offset;
             }
 
-            *m = (QCowL2Meta) {
+            *m = g_malloc0(sizeof(**m));
+
+            **m = (QCowL2Meta) {
                 .alloc_offset   = alloc_cluster_offset,
                 .offset         = alloc_offset & ~(s->cluster_size - 1),
                 .nb_clusters    = nb_clusters,
@@ -995,8 +990,8 @@ again:
                     .nb_sectors = avail_sectors - nb_sectors,
                 },
             };
-            qemu_co_queue_init(&m->dependent_requests);
-            QLIST_INSERT_HEAD(&s->cluster_allocs, m, next_in_flight);
+            qemu_co_queue_init(&(*m)->dependent_requests);
+            QLIST_INSERT_HEAD(&s->cluster_allocs, *m, next_in_flight);
         }
     }
 
@@ -1013,8 +1008,8 @@ again:
     return 0;
 
 fail:
-    if (m->nb_clusters > 0) {
-        QLIST_REMOVE(m, next_in_flight);
+    if (*m && (*m)->nb_clusters > 0) {
+        QLIST_REMOVE(*m, next_in_flight);
     }
     return ret;
 }
diff --git a/block/qcow2.c b/block/qcow2.c
index 66ca12f..08d92cc 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -787,8 +787,7 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
 
     while (remaining_sectors != 0) {
 
-        l2meta = g_malloc0(sizeof(*l2meta));
-        qemu_co_queue_init(&l2meta->dependent_requests);
+        l2meta = NULL;
 
         trace_qcow2_writev_start_part(qemu_coroutine_self());
         index_in_cluster = sector_num & (s->cluster_sectors - 1);
@@ -799,7 +798,7 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
         }
 
         ret = qcow2_alloc_cluster_offset(bs, sector_num << 9,
-            index_in_cluster, n_end, &cur_nr_sectors, &cluster_offset, l2meta);
+            index_in_cluster, n_end, &cur_nr_sectors, &cluster_offset, &l2meta);
         if (ret < 0) {
             goto fail;
         }
@@ -845,14 +844,16 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
             goto fail;
         }
 
-        ret = qcow2_alloc_cluster_link_l2(bs, l2meta);
-        if (ret < 0) {
-            goto fail;
-        }
+        if (l2meta != NULL) {
+            ret = qcow2_alloc_cluster_link_l2(bs, l2meta);
+            if (ret < 0) {
+                goto fail;
+            }
 
-        run_dependent_requests(s, l2meta);
-        g_free(l2meta);
-        l2meta = NULL;
+            run_dependent_requests(s, l2meta);
+            g_free(l2meta);
+            l2meta = NULL;
+        }
 
         remaining_sectors -= cur_nr_sectors;
         sector_num += cur_nr_sectors;
@@ -1134,11 +1135,10 @@ static int preallocate(BlockDriverState *bs)
     uint64_t host_offset = 0;
     int num;
     int ret;
-    QCowL2Meta meta;
+    QCowL2Meta *meta;
 
     nb_sectors = bdrv_getlength(bs) >> 9;
     offset = 0;
-    qemu_co_queue_init(&meta.dependent_requests);
 
     while (nb_sectors) {
         num = MIN(nb_sectors, INT_MAX >> 9);
@@ -1148,15 +1148,17 @@ static int preallocate(BlockDriverState *bs)
             return ret;
         }
 
-        ret = qcow2_alloc_cluster_link_l2(bs, &meta);
+        ret = qcow2_alloc_cluster_link_l2(bs, meta);
         if (ret < 0) {
-            qcow2_free_any_clusters(bs, meta.alloc_offset, meta.nb_clusters);
+            qcow2_free_any_clusters(bs, meta->alloc_offset, meta->nb_clusters);
             return ret;
         }
 
         /* There are no dependent requests, but we need to remove our request
          * from the list of in-flight requests */
-        run_dependent_requests(bs->opaque, &meta);
+        if (meta != NULL) {
+            run_dependent_requests(bs->opaque, meta);
+        }
 
         /* TODO Preallocate data if requested */
 
diff --git a/block/qcow2.h b/block/qcow2.h
index 24f1001..6dc79b5 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -207,7 +207,10 @@ typedef struct Qcow2COWRegion {
     int         nb_sectors;
 } Qcow2COWRegion;
 
-/* XXX This could be private for qcow2-cluster.c */
+/**
+ * Describes an in-flight (part of a) write request that writes to clusters
+ * that are not referenced in their L2 table yet.
+ */
 typedef struct QCowL2Meta
 {
     /** Guest offset of the first newly allocated cluster */
@@ -333,7 +336,7 @@ void qcow2_encrypt_sectors(BDRVQcowState *s, int64_t sector_num,
 int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
     int *num, uint64_t *cluster_offset);
 int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
-    int n_start, int n_end, int *num, uint64_t *host_offset, QCowL2Meta *m);
+    int n_start, int n_end, int *num, uint64_t *host_offset, QCowL2Meta **m);
 uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
                                          uint64_t offset,
                                          int compressed_size);
commit 060bee8943c27d4d53f65570fafaa2559fcd87c3
Author: Kevin Wolf <kwolf at redhat.com>
Date:   Fri Dec 7 18:08:45 2012 +0100

    qcow2: Drop l2meta.cluster_offset
    
    There's no real reason to have an l2meta for normal requests that don't
    allocate anything. Before we can get rid of it, we must return the host
    cluster offset in a different way.
    
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 94b7f13..c4752ee 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -856,7 +856,7 @@ static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset,
  * Return 0 on success and -errno in error cases
  */
 int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
-    int n_start, int n_end, int *num, QCowL2Meta *m)
+    int n_start, int n_end, int *num, uint64_t *host_offset, QCowL2Meta *m)
 {
     BDRVQcowState *s = bs->opaque;
     int l2_index, ret, sectors;
@@ -929,7 +929,6 @@ again:
 
     /* If there is something left to allocate, do that now */
     *m = (QCowL2Meta) {
-        .cluster_offset     = cluster_offset,
         .nb_clusters        = 0,
     };
     qemu_co_queue_init(&m->dependent_requests);
@@ -977,9 +976,11 @@ again:
             int alloc_n_start = keep_clusters == 0 ? n_start : 0;
             int nb_sectors = MIN(requested_sectors, avail_sectors);
 
+            if (keep_clusters == 0) {
+                cluster_offset = alloc_cluster_offset;
+            }
+
             *m = (QCowL2Meta) {
-                .cluster_offset = keep_clusters == 0 ?
-                                  alloc_cluster_offset : cluster_offset,
                 .alloc_offset   = alloc_cluster_offset,
                 .offset         = alloc_offset & ~(s->cluster_size - 1),
                 .nb_clusters    = nb_clusters,
@@ -1007,6 +1008,7 @@ again:
 
     assert(sectors > n_start);
     *num = sectors - n_start;
+    *host_offset = cluster_offset;
 
     return 0;
 
diff --git a/block/qcow2.c b/block/qcow2.c
index 71f870d..66ca12f 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -799,7 +799,7 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
         }
 
         ret = qcow2_alloc_cluster_offset(bs, sector_num << 9,
-            index_in_cluster, n_end, &cur_nr_sectors, l2meta);
+            index_in_cluster, n_end, &cur_nr_sectors, &cluster_offset, l2meta);
         if (ret < 0) {
             goto fail;
         }
@@ -809,7 +809,6 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
             qcow2_mark_dirty(bs);
         }
 
-        cluster_offset = l2meta->cluster_offset;
         assert((cluster_offset & 511) == 0);
 
         qemu_iovec_reset(&hd_qiov);
@@ -1132,6 +1131,7 @@ static int preallocate(BlockDriverState *bs)
 {
     uint64_t nb_sectors;
     uint64_t offset;
+    uint64_t host_offset = 0;
     int num;
     int ret;
     QCowL2Meta meta;
@@ -1139,18 +1139,18 @@ static int preallocate(BlockDriverState *bs)
     nb_sectors = bdrv_getlength(bs) >> 9;
     offset = 0;
     qemu_co_queue_init(&meta.dependent_requests);
-    meta.cluster_offset = 0;
 
     while (nb_sectors) {
         num = MIN(nb_sectors, INT_MAX >> 9);
-        ret = qcow2_alloc_cluster_offset(bs, offset, 0, num, &num, &meta);
+        ret = qcow2_alloc_cluster_offset(bs, offset, 0, num, &num,
+                                         &host_offset, &meta);
         if (ret < 0) {
             return ret;
         }
 
         ret = qcow2_alloc_cluster_link_l2(bs, &meta);
         if (ret < 0) {
-            qcow2_free_any_clusters(bs, meta.cluster_offset, meta.nb_clusters);
+            qcow2_free_any_clusters(bs, meta.alloc_offset, meta.nb_clusters);
             return ret;
         }
 
@@ -1169,10 +1169,10 @@ static int preallocate(BlockDriverState *bs)
      * all of the allocated clusters (otherwise we get failing reads after
      * EOF). Extend the image to the last allocated sector.
      */
-    if (meta.cluster_offset != 0) {
+    if (host_offset != 0) {
         uint8_t buf[512];
         memset(buf, 0, 512);
-        ret = bdrv_write(bs->file, (meta.cluster_offset >> 9) + num - 1, buf, 1);
+        ret = bdrv_write(bs->file, (host_offset >> 9) + num - 1, buf, 1);
         if (ret < 0) {
             return ret;
         }
diff --git a/block/qcow2.h b/block/qcow2.h
index 1106b33..24f1001 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -213,9 +213,6 @@ typedef struct QCowL2Meta
     /** Guest offset of the first newly allocated cluster */
     uint64_t offset;
 
-    /** Host offset of the first cluster of the request */
-    uint64_t cluster_offset;
-
     /** Host offset of the first newly allocated cluster */
     uint64_t alloc_offset;
 
@@ -336,7 +333,7 @@ void qcow2_encrypt_sectors(BDRVQcowState *s, int64_t sector_num,
 int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
     int *num, uint64_t *cluster_offset);
 int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
-    int n_start, int n_end, int *num, QCowL2Meta *m);
+    int n_start, int n_end, int *num, uint64_t *host_offset, QCowL2Meta *m);
 uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
                                          uint64_t offset,
                                          int compressed_size);
commit cf5c1a231ee99ac21fe8258faf50bb1f65884343
Author: Kevin Wolf <kwolf at redhat.com>
Date:   Fri Dec 7 18:08:44 2012 +0100

    qcow2: Allocate l2meta dynamically
    
    As soon as delayed COW is introduced, the l2meta struct is needed even
    after completion of the request, so it can't live on the stack.
    
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/block/qcow2.c b/block/qcow2.c
index 0a08ec7..71f870d 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -774,15 +774,11 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
     QEMUIOVector hd_qiov;
     uint64_t bytes_done = 0;
     uint8_t *cluster_data = NULL;
-    QCowL2Meta l2meta = {
-        .nb_clusters = 0,
-    };
+    QCowL2Meta *l2meta;
 
     trace_qcow2_writev_start_req(qemu_coroutine_self(), sector_num,
                                  remaining_sectors);
 
-    qemu_co_queue_init(&l2meta.dependent_requests);
-
     qemu_iovec_init(&hd_qiov, qiov->niov);
 
     s->cluster_cache_offset = -1; /* disable compressed cache */
@@ -791,6 +787,9 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
 
     while (remaining_sectors != 0) {
 
+        l2meta = g_malloc0(sizeof(*l2meta));
+        qemu_co_queue_init(&l2meta->dependent_requests);
+
         trace_qcow2_writev_start_part(qemu_coroutine_self());
         index_in_cluster = sector_num & (s->cluster_sectors - 1);
         n_end = index_in_cluster + remaining_sectors;
@@ -800,17 +799,17 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
         }
 
         ret = qcow2_alloc_cluster_offset(bs, sector_num << 9,
-            index_in_cluster, n_end, &cur_nr_sectors, &l2meta);
+            index_in_cluster, n_end, &cur_nr_sectors, l2meta);
         if (ret < 0) {
             goto fail;
         }
 
-        if (l2meta.nb_clusters > 0 &&
+        if (l2meta->nb_clusters > 0 &&
             (s->compatible_features & QCOW2_COMPAT_LAZY_REFCOUNTS)) {
             qcow2_mark_dirty(bs);
         }
 
-        cluster_offset = l2meta.cluster_offset;
+        cluster_offset = l2meta->cluster_offset;
         assert((cluster_offset & 511) == 0);
 
         qemu_iovec_reset(&hd_qiov);
@@ -847,12 +846,14 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
             goto fail;
         }
 
-        ret = qcow2_alloc_cluster_link_l2(bs, &l2meta);
+        ret = qcow2_alloc_cluster_link_l2(bs, l2meta);
         if (ret < 0) {
             goto fail;
         }
 
-        run_dependent_requests(s, &l2meta);
+        run_dependent_requests(s, l2meta);
+        g_free(l2meta);
+        l2meta = NULL;
 
         remaining_sectors -= cur_nr_sectors;
         sector_num += cur_nr_sectors;
@@ -862,7 +863,10 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
     ret = 0;
 
 fail:
-    run_dependent_requests(s, &l2meta);
+    if (l2meta != NULL) {
+        run_dependent_requests(s, l2meta);
+        g_free(l2meta);
+    }
 
     qemu_co_mutex_unlock(&s->lock);
 
commit 593fb83cacf3818a5623f31a6c04c24d87519ad0
Author: Kevin Wolf <kwolf at redhat.com>
Date:   Fri Dec 7 18:08:43 2012 +0100

    qcow2: Introduce Qcow2COWRegion
    
    This makes it easier to address the areas for which a COW must be
    performed. As a nice side effect, the COW code in
    qcow2_alloc_cluster_link_l2 becomes really trivial.
    
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index d17a37c..94b7f13 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -615,13 +615,41 @@ uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
     return cluster_offset;
 }
 
+static int perform_cow(BlockDriverState *bs, QCowL2Meta *m, Qcow2COWRegion *r)
+{
+    BDRVQcowState *s = bs->opaque;
+    int ret;
+
+    if (r->nb_sectors == 0) {
+        return 0;
+    }
+
+    qemu_co_mutex_unlock(&s->lock);
+    ret = copy_sectors(bs, m->offset / BDRV_SECTOR_SIZE, m->alloc_offset,
+                       r->offset / BDRV_SECTOR_SIZE,
+                       r->offset / BDRV_SECTOR_SIZE + r->nb_sectors);
+    qemu_co_mutex_lock(&s->lock);
+
+    if (ret < 0) {
+        return ret;
+    }
+
+    /*
+     * Before we update the L2 table to actually point to the new cluster, we
+     * need to be sure that the refcounts have been increased and COW was
+     * handled.
+     */
+    qcow2_cache_depends_on_flush(s->l2_table_cache);
+
+    return 0;
+}
+
 int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
 {
     BDRVQcowState *s = bs->opaque;
     int i, j = 0, l2_index, ret;
-    uint64_t *old_cluster, start_sect, *l2_table;
+    uint64_t *old_cluster, *l2_table;
     uint64_t cluster_offset = m->alloc_offset;
-    bool cow = false;
 
     trace_qcow2_cluster_link_l2(qemu_coroutine_self(), m->nb_clusters);
 
@@ -631,36 +659,17 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
     old_cluster = g_malloc(m->nb_clusters * sizeof(uint64_t));
 
     /* copy content of unmodified sectors */
-    start_sect = m->offset >> 9;
-    if (m->n_start) {
-        cow = true;
-        qemu_co_mutex_unlock(&s->lock);
-        ret = copy_sectors(bs, start_sect, cluster_offset, 0, m->n_start);
-        qemu_co_mutex_lock(&s->lock);
-        if (ret < 0)
-            goto err;
+    ret = perform_cow(bs, m, &m->cow_start);
+    if (ret < 0) {
+        goto err;
     }
 
-    if (m->nb_available & (s->cluster_sectors - 1)) {
-        cow = true;
-        qemu_co_mutex_unlock(&s->lock);
-        ret = copy_sectors(bs, start_sect, cluster_offset, m->nb_available,
-                           align_offset(m->nb_available, s->cluster_sectors));
-        qemu_co_mutex_lock(&s->lock);
-        if (ret < 0)
-            goto err;
+    ret = perform_cow(bs, m, &m->cow_end);
+    if (ret < 0) {
+        goto err;
     }
 
-    /*
-     * Update L2 table.
-     *
-     * Before we update the L2 table to actually point to the new cluster, we
-     * need to be sure that the refcounts have been increased and COW was
-     * handled.
-     */
-    if (cow) {
-        qcow2_cache_depends_on_flush(s->l2_table_cache);
-    }
+    /* Update L2 table. */
 
     if (qcow2_need_accurate_refcounts(s)) {
         qcow2_cache_set_dependency(bs, s->l2_table_cache,
@@ -957,19 +966,33 @@ again:
              *
              * avail_sectors: Number of sectors from the start of the first
              * newly allocated to the end of the last newly allocated cluster.
+             *
+             * nb_sectors: The number of sectors from the start of the first
+             * newly allocated cluster to the end of the aread that the write
+             * request actually writes to (excluding COW at the end)
              */
             int requested_sectors = n_end - keep_clusters * s->cluster_sectors;
             int avail_sectors = nb_clusters
                                 << (s->cluster_bits - BDRV_SECTOR_BITS);
+            int alloc_n_start = keep_clusters == 0 ? n_start : 0;
+            int nb_sectors = MIN(requested_sectors, avail_sectors);
 
             *m = (QCowL2Meta) {
                 .cluster_offset = keep_clusters == 0 ?
                                   alloc_cluster_offset : cluster_offset,
                 .alloc_offset   = alloc_cluster_offset,
                 .offset         = alloc_offset & ~(s->cluster_size - 1),
-                .n_start        = keep_clusters == 0 ? n_start : 0,
                 .nb_clusters    = nb_clusters,
-                .nb_available   = MIN(requested_sectors, avail_sectors),
+                .nb_available   = nb_sectors,
+
+                .cow_start = {
+                    .offset     = 0,
+                    .nb_sectors = alloc_n_start,
+                },
+                .cow_end = {
+                    .offset     = nb_sectors * BDRV_SECTOR_SIZE,
+                    .nb_sectors = avail_sectors - nb_sectors,
+                },
             };
             qemu_co_queue_init(&m->dependent_requests);
             QLIST_INSERT_HEAD(&s->cluster_allocs, m, next_in_flight);
diff --git a/block/qcow2.h b/block/qcow2.h
index 2a406a7..1106b33 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -196,6 +196,17 @@ typedef struct QCowCreateState {
 
 struct QCowAIOCB;
 
+typedef struct Qcow2COWRegion {
+    /**
+     * Offset of the COW region in bytes from the start of the first cluster
+     * touched by the request.
+     */
+    uint64_t    offset;
+
+    /** Number of sectors to copy */
+    int         nb_sectors;
+} Qcow2COWRegion;
+
 /* XXX This could be private for qcow2-cluster.c */
 typedef struct QCowL2Meta
 {
@@ -209,12 +220,6 @@ typedef struct QCowL2Meta
     uint64_t alloc_offset;
 
     /**
-     * Number of sectors between the start of the first allocated cluster and
-     * the area that the guest actually writes to.
-     */
-    int n_start;
-
-    /**
      * Number of sectors from the start of the first allocated cluster to
      * the end of the (possibly shortened) request
      */
@@ -229,6 +234,18 @@ typedef struct QCowL2Meta
      */
     CoQueue dependent_requests;
 
+    /**
+     * The COW Region between the start of the first allocated cluster and the
+     * area the guest actually writes to.
+     */
+    Qcow2COWRegion cow_start;
+
+    /**
+     * The COW Region between the area the guest actually writes to and the
+     * end of the last allocated cluster.
+     */
+    Qcow2COWRegion cow_end;
+
     QLIST_ENTRY(QCowL2Meta) next_in_flight;
 } QCowL2Meta;
 
commit 1d3afd649bc77aa14bc2741e2da6475822d41c5f
Author: Kevin Wolf <kwolf at redhat.com>
Date:   Fri Dec 7 18:08:42 2012 +0100

    qcow2: Round QCowL2Meta.offset down to cluster boundary
    
    The offset within the cluster is already present as n_start and this is
    what the code uses. QCowL2Meta.offset is only needed at a cluster
    granularity.
    
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index e179211..d17a37c 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -631,7 +631,7 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
     old_cluster = g_malloc(m->nb_clusters * sizeof(uint64_t));
 
     /* copy content of unmodified sectors */
-    start_sect = (m->offset & ~(s->cluster_size - 1)) >> 9;
+    start_sect = m->offset >> 9;
     if (m->n_start) {
         cow = true;
         qemu_co_mutex_unlock(&s->lock);
@@ -966,7 +966,7 @@ again:
                 .cluster_offset = keep_clusters == 0 ?
                                   alloc_cluster_offset : cluster_offset,
                 .alloc_offset   = alloc_cluster_offset,
-                .offset         = alloc_offset,
+                .offset         = alloc_offset & ~(s->cluster_size - 1),
                 .n_start        = keep_clusters == 0 ? n_start : 0,
                 .nb_clusters    = nb_clusters,
                 .nb_available   = MIN(requested_sectors, avail_sectors),
diff --git a/block/qcow2.h b/block/qcow2.h
index b4eb654..2a406a7 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -199,12 +199,34 @@ struct QCowAIOCB;
 /* XXX This could be private for qcow2-cluster.c */
 typedef struct QCowL2Meta
 {
+    /** Guest offset of the first newly allocated cluster */
     uint64_t offset;
+
+    /** Host offset of the first cluster of the request */
     uint64_t cluster_offset;
+
+    /** Host offset of the first newly allocated cluster */
     uint64_t alloc_offset;
+
+    /**
+     * Number of sectors between the start of the first allocated cluster and
+     * the area that the guest actually writes to.
+     */
     int n_start;
+
+    /**
+     * Number of sectors from the start of the first allocated cluster to
+     * the end of the (possibly shortened) request
+     */
     int nb_available;
+
+    /** Number of newly allocated clusters */
     int nb_clusters;
+
+    /**
+     * Requests that overlap with this allocation and wait to be restarted
+     * when the allocating request has completed.
+     */
     CoQueue dependent_requests;
 
     QLIST_ENTRY(QCowL2Meta) next_in_flight;
commit 871edc5fdba884929102b89d28ff363c94f0822d
Author: Antoine Mathys <barsamin at gmail.com>
Date:   Thu Dec 13 14:05:28 2012 +0000

    hw/ds1338.c: Fix handling of DAY (wday) register.
    
    Per the datasheet, the DAY (wday) register is user defined. Implement this.
    
    Signed-off-by: Antoine Mathys <barsamin at gmail.com>
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>

diff --git a/hw/ds1338.c b/hw/ds1338.c
index 94a2f54..1aefa3b 100644
--- a/hw/ds1338.c
+++ b/hw/ds1338.c
@@ -26,6 +26,7 @@
 typedef struct {
     I2CSlave i2c;
     int64_t offset;
+    uint8_t wday_offset;
     uint8_t nvram[NVRAM_SIZE];
     int32_t ptr;
     bool addr_byte;
@@ -33,12 +34,13 @@ typedef struct {
 
 static const VMStateDescription vmstate_ds1338 = {
     .name = "ds1338",
-    .version_id = 1,
+    .version_id = 2,
     .minimum_version_id = 1,
     .minimum_version_id_old = 1,
     .fields = (VMStateField[]) {
         VMSTATE_I2C_SLAVE(i2c, DS1338State),
         VMSTATE_INT64(offset, DS1338State),
+        VMSTATE_UINT8_V(wday_offset, DS1338State, 2),
         VMSTATE_UINT8_ARRAY(nvram, DS1338State, NVRAM_SIZE),
         VMSTATE_INT32(ptr, DS1338State),
         VMSTATE_BOOL(addr_byte, DS1338State),
@@ -68,7 +70,7 @@ static void capture_current_time(DS1338State *s)
     } else {
         s->nvram[2] = to_bcd(now.tm_hour);
     }
-    s->nvram[3] = to_bcd(now.tm_wday + 1);
+    s->nvram[3] = (now.tm_wday + s->wday_offset) % 7 + 1;
     s->nvram[4] = to_bcd(now.tm_mday);
     s->nvram[5] = to_bcd(now.tm_mon + 1);
     s->nvram[6] = to_bcd(now.tm_year - 100);
@@ -152,7 +154,13 @@ static int ds1338_send(I2CSlave *i2c, uint8_t data)
             }
             break;
         case 3:
-            now.tm_wday = from_bcd(data & 7) - 1;
+            {
+                /* The day field is supposed to contain a value in
+                   the range 1-7. Otherwise behavior is undefined.
+                 */
+                int user_wday = (data & 7) - 1;
+                s->wday_offset = (user_wday - now.tm_wday + 7) % 7;
+            }
             break;
         case 4:
             now.tm_mday = from_bcd(data & 0x3f);
@@ -194,6 +202,7 @@ static void ds1338_reset(DeviceState *dev)
 
     /* The clock is running and synchronized with the host */
     s->offset = 0;
+    s->wday_offset = 0;
     memset(s->nvram, 0, NVRAM_SIZE);
     s->ptr = 0;
     s->addr_byte = false;
commit 996e91f04b9cc55cf246052856abe9189a5a0f28
Author: Antoine Mathys <barsamin at gmail.com>
Date:   Thu Dec 13 14:05:28 2012 +0000

    hw/ds1338.c: Implement support for the control register.
    
    Signed-off-by: Antoine Mathys <barsamin at gmail.com>
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>

diff --git a/hw/ds1338.c b/hw/ds1338.c
index d2f52fc..94a2f54 100644
--- a/hw/ds1338.c
+++ b/hw/ds1338.c
@@ -125,7 +125,8 @@ static int ds1338_send(I2CSlave *i2c, uint8_t data)
         s->addr_byte = false;
         return 0;
     }
-    if (s->ptr < 8) {
+    if (s->ptr < 7) {
+        /* Time register. */
         struct tm now;
         qemu_get_timedate(&now, s->offset);
         switch(s->ptr) {
@@ -162,11 +163,19 @@ static int ds1338_send(I2CSlave *i2c, uint8_t data)
         case 6:
             now.tm_year = from_bcd(data) + 100;
             break;
-        case 7:
-            /* Control register. Currently ignored.  */
-            break;
         }
         s->offset = qemu_timedate_diff(&now);
+    } else if (s->ptr == 7) {
+        /* Control register. */
+
+        /* Ensure bits 2, 3 and 6 will read back as zero. */
+        data &= 0xB3;
+
+        /* Attempting to write the OSF flag to logic 1 leaves the
+           value unchanged. */
+        data = (data & ~CTRL_OSF) | (data & s->nvram[s->ptr] & CTRL_OSF);
+
+        s->nvram[s->ptr] = data;
     } else {
         s->nvram[s->ptr] = data;
     }
commit ed3d37d287300b7bcdb4605b921e5ec593afd214
Author: Antoine Mathys <barsamin at gmail.com>
Date:   Thu Dec 13 14:05:28 2012 +0000

    hw/ds1338.c: Ensure state is properly initialized.
    
    Signed-off-by: Antoine Mathys <barsamin at gmail.com>
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>

diff --git a/hw/ds1338.c b/hw/ds1338.c
index 0f88720..d2f52fc 100644
--- a/hw/ds1338.c
+++ b/hw/ds1338.c
@@ -179,6 +179,17 @@ static int ds1338_init(I2CSlave *i2c)
     return 0;
 }
 
+static void ds1338_reset(DeviceState *dev)
+{
+    DS1338State *s = FROM_I2C_SLAVE(DS1338State, I2C_SLAVE_FROM_QDEV(dev));
+
+    /* The clock is running and synchronized with the host */
+    s->offset = 0;
+    memset(s->nvram, 0, NVRAM_SIZE);
+    s->ptr = 0;
+    s->addr_byte = false;
+}
+
 static void ds1338_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
@@ -188,6 +199,7 @@ static void ds1338_class_init(ObjectClass *klass, void *data)
     k->event = ds1338_event;
     k->recv = ds1338_recv;
     k->send = ds1338_send;
+    dc->reset = ds1338_reset;
     dc->vmsd = &vmstate_ds1338;
 }
 
commit 59dda8e05b015471d456177141a7c2eeda3dab14
Author: Antoine Mathys <barsamin at gmail.com>
Date:   Thu Dec 13 14:05:27 2012 +0000

    hw/ds1338.c: Fix handling of HOURS register.
    
    Per the datasheet, the mapping between 12 and 24 hours modes is:
          0      <->  12   PM
          1-12   <->  1-12 AM
          13-23  <->  1-11 PM
    
    Signed-off-by: Antoine Mathys <barsamin at gmail.com>
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>

diff --git a/hw/ds1338.c b/hw/ds1338.c
index 69018bc..0f88720 100644
--- a/hw/ds1338.c
+++ b/hw/ds1338.c
@@ -55,10 +55,15 @@ static void capture_current_time(DS1338State *s)
     qemu_get_timedate(&now, s->offset);
     s->nvram[0] = to_bcd(now.tm_sec);
     s->nvram[1] = to_bcd(now.tm_min);
-    if (s->nvram[2] & 0x40) {
-        s->nvram[2] = (to_bcd((now.tm_hour % 12)) + 1) | 0x40;
-        if (now.tm_hour >= 12) {
-            s->nvram[2] |= 0x20;
+    if (s->nvram[2] & HOURS_12) {
+        int tmp = now.tm_hour;
+        if (tmp == 0) {
+            tmp = 24;
+        }
+        if (tmp <= 12) {
+            s->nvram[2] = HOURS_12 | to_bcd(tmp);
+        } else {
+            s->nvram[2] = HOURS_12 | HOURS_PM | to_bcd(tmp - 12);
         }
     } else {
         s->nvram[2] = to_bcd(now.tm_hour);
@@ -132,16 +137,18 @@ static int ds1338_send(I2CSlave *i2c, uint8_t data)
             now.tm_min = from_bcd(data & 0x7f);
             break;
         case 2:
-            if (data & 0x40) {
-                if (data & 0x20) {
-                    data = from_bcd(data & 0x4f) + 11;
-                } else {
-                    data = from_bcd(data & 0x1f) - 1;
+            if (data & HOURS_12) {
+                int tmp = from_bcd(data & (HOURS_PM - 1));
+                if (data & HOURS_PM) {
+                    tmp += 12;
+                }
+                if (tmp == 24) {
+                    tmp = 0;
                 }
+                now.tm_hour = tmp;
             } else {
-                data = from_bcd(data);
+                now.tm_hour = from_bcd(data & (HOURS_12 - 1));
             }
-            now.tm_hour = data;
             break;
         case 3:
             now.tm_wday = from_bcd(data & 7) - 1;
commit 95c9361598e66de42facdac64e614e3de85186f5
Author: Antoine Mathys <barsamin at gmail.com>
Date:   Thu Dec 13 14:05:27 2012 +0000

    hw/ds1338.c: Add definitions for various flags in the RTC registers.
    
    Signed-off-by: Antoine Mathys <barsamin at gmail.com>
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>

diff --git a/hw/ds1338.c b/hw/ds1338.c
index faaa4a0..69018bc 100644
--- a/hw/ds1338.c
+++ b/hw/ds1338.c
@@ -17,6 +17,12 @@
  */
 #define NVRAM_SIZE 64
 
+/* Flags definitions */
+#define SECONDS_CH 0x80
+#define HOURS_12   0x40
+#define HOURS_PM   0x20
+#define CTRL_OSF   0x20
+
 typedef struct {
     I2CSlave i2c;
     int64_t offset;
commit 580f5c000809108f51a77ae74709100d32be6ea5
Author: Antoine Mathys <barsamin at gmail.com>
Date:   Thu Dec 13 14:05:27 2012 +0000

    hw/ds1338.c: Correct bug in conversion to BCD.
    
    Signed-off-by: Antoine Mathys <barsamin at gmail.com>
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>

diff --git a/hw/ds1338.c b/hw/ds1338.c
index b576d56..faaa4a0 100644
--- a/hw/ds1338.c
+++ b/hw/ds1338.c
@@ -57,9 +57,9 @@ static void capture_current_time(DS1338State *s)
     } else {
         s->nvram[2] = to_bcd(now.tm_hour);
     }
-    s->nvram[3] = to_bcd(now.tm_wday) + 1;
+    s->nvram[3] = to_bcd(now.tm_wday + 1);
     s->nvram[4] = to_bcd(now.tm_mday);
-    s->nvram[5] = to_bcd(now.tm_mon) + 1;
+    s->nvram[5] = to_bcd(now.tm_mon + 1);
     s->nvram[6] = to_bcd(now.tm_year - 100);
 }
 
commit a7f3d65b65b8c86a5ff0c0abcfefb45e2ec6fe4c
Author: Pavel Hrdina <phrdina at redhat.com>
Date:   Tue Dec 11 08:55:48 2012 +0100

    atapi: reset cdrom tray statuses on ide_reset
    
    Tray statuses should be also reseted. Some guests may lock the tray
    and after reset before any kernel is loaded the tray should be unlocked.
    
    Also if you reset the real computer the tray is closed. We should
    do the same in qemu.
    
    Signed-off-by: Pavel Hrdina <phrdina at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/hw/ide/core.c b/hw/ide/core.c
index c4f93d0..1235612 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -1869,6 +1869,8 @@ static void ide_reset(IDEState *s)
     s->io_buffer_index = 0;
     s->cd_sector_size = 0;
     s->atapi_dma = 0;
+    s->tray_locked = 0;
+    s->tray_open = 0;
     /* ATA DMA state */
     s->io_buffer_size = 0;
     s->req_nb_sectors = 0;
commit 45e6cee42b98d10e2e14885ab656541a9ffd5187
Author: David Gibson <david at gibson.dropbear.id.au>
Date:   Tue Dec 4 11:38:39 2012 +1100

    migration: Fix madvise breakage if host and guest have different page sizes
    
    madvise(DONTNEED) will throw away the contents of the whole page at the
    given address, even if the given length is less than the page size.  One
    can argue about whether that's the correct behaviour, but that's what it's
    done for a long time in Linux at least.
    
    That means that the madvise() in ram_load(), on a setup where
    TARGET_PAGE_SIZE is smaller than the host page size, can throw away data
    in guest pages adjacent to the one it's actually processing right now,
    leading to guest memory corruption on an incoming migration.
    
    This patch therefore, disables the madvise() if the host page size is
    larger than TARGET_PAGE_SIZE.  This means we don't get the benefits of that
    madvise() in this case, but a more complete fix is more difficult to
    accomplish.  This at least fixes the guest memory corruption.
    
    Signed-off-by: David Gibson <david at gibson.dropbear.id.au>
    Reported-by: Alexey Kardashevskiy <aik at ozlabs.ru>
    Signed-off-by: Anthony Liguori <aliguori at us.ibm.com>

diff --git a/arch_init.c b/arch_init.c
index b75a4c5..83dcc53 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -840,7 +840,8 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
             memset(host, ch, TARGET_PAGE_SIZE);
 #ifndef _WIN32
             if (ch == 0 &&
-                (!kvm_enabled() || kvm_has_sync_mmu())) {
+                (!kvm_enabled() || kvm_has_sync_mmu()) &&
+                getpagesize() <= TARGET_PAGE_SIZE) {
                 qemu_madvise(host, TARGET_PAGE_SIZE, QEMU_MADV_DONTNEED);
             }
 #endif
commit 7ec81e56edc2b2007ce0ae3982aa5c18af9546ab
Author: David Gibson <david at gibson.dropbear.id.au>
Date:   Tue Dec 4 11:38:38 2012 +1100

    Fix off-by-1 error in RAM migration code
    
    The code for migrating (or savevm-ing) memory pages starts off by creating
    a dirty bitmap and filling it with 1s.  Except, actually, because bit
    addresses are 0-based it fills every bit except bit 0 with 1s and puts an
    extra 1 beyond the end of the bitmap, potentially corrupting unrelated
    memory.  Oops.  This patch fixes it.
    
    Signed-off-by: David Gibson <david at gibson.dropbear.id.au>
    Signed-off-by: Anthony Liguori <aliguori at us.ibm.com>

diff --git a/arch_init.c b/arch_init.c
index e6effe8..b75a4c5 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -568,7 +568,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
     int64_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS;
 
     migration_bitmap = bitmap_new(ram_pages);
-    bitmap_set(migration_bitmap, 1, ram_pages);
+    bitmap_set(migration_bitmap, 0, ram_pages);
     migration_dirty_pages = ram_pages;
 
     bytes_transferred = 0;
commit 91d4093dce58e343e2336324794daa93517b86c2
Author: Kevin Wolf <kwolf at redhat.com>
Date:   Thu Dec 6 14:33:00 2012 +0100

    qemu-iotests: Test concurrent cluster allocations
    
    This adds some first tests for qcow2's dependency handling when two
    parallel write requests access the same cluster.
    
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/tests/qemu-iotests/046 b/tests/qemu-iotests/046
new file mode 100755
index 0000000..e0176f4
--- /dev/null
+++ b/tests/qemu-iotests/046
@@ -0,0 +1,215 @@
+#!/bin/bash
+#
+# Test concurrent cluster allocations
+#
+# Copyright (C) 2012 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+# creator
+owner=kwolf at redhat.com
+
+seq=`basename $0`
+echo "QA output created by $seq"
+
+here=`pwd`
+tmp=/tmp/$$
+status=1	# failure is the default!
+
+_cleanup()
+{
+	_cleanup_test_img
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+. ./common.rc
+. ./common.filter
+
+_supported_fmt qcow2
+_supported_proto generic
+_supported_os Linux
+
+CLUSTER_SIZE=64k
+size=128M
+
+echo
+echo "== creating backing file for COW tests =="
+
+_make_test_img $size
+
+function backing_io()
+{
+    local offset=$1
+    local sectors=$2
+    local op=$3
+    local pattern=0
+    local cur_sec=0
+
+    for i in $(seq 0 $((sectors - 1))); do
+        cur_sec=$((offset / 65536 + i))
+        pattern=$(( ( (cur_sec % 128) + (cur_sec / 128)) % 128 ))
+
+        echo "$op -P $pattern $((cur_sec * 64))k 64k"
+    done
+}
+
+backing_io 0 16 write | $QEMU_IO $TEST_IMG | _filter_qemu_io
+
+mv $TEST_IMG $TEST_IMG.base
+
+_make_test_img -b $TEST_IMG.base 6G
+
+echo
+echo "== Some concurrent requests touching the same cluster =="
+
+function overlay_io()
+{
+# Allocate middle of cluster 1, then write to somewhere before and after it
+cat  <<EOF
+break write_aio A
+aio_write -P 10 0x18000 0x2000
+wait_break A
+
+aio_write -P 11 0x12000 0x2000
+aio_write -P 12 0x1c000 0x2000
+
+resume A
+aio_flush
+EOF
+
+# Sequential write case: Alloc middle of cluster 2, then write overlapping
+# to next cluster
+cat  <<EOF
+break write_aio A
+aio_write -P 20 0x28000 0x2000
+wait_break A
+aio_write -P 21 0x2a000 0x10000
+resume A
+aio_flush
+EOF
+
+# The same with a gap between both requests
+cat  <<EOF
+break write_aio A
+aio_write -P 40 0x48000 0x2000
+wait_break A
+aio_write -P 41 0x4c000 0x10000
+resume A
+aio_flush
+EOF
+
+# Sequential write, but the next cluster is already allocated
+cat  <<EOF
+write -P 70 0x76000 0x8000
+aio_flush
+break write_aio A
+aio_write -P 60 0x66000 0x2000
+wait_break A
+aio_write -P 61 0x6a000 0xe000
+resume A
+aio_flush
+EOF
+
+# Sequential write, but the next cluster is already allocated
+# and phyiscally in the right position
+cat  <<EOF
+write -P 89 0x80000 0x1000
+write -P 90 0x96000 0x8000
+aio_flush
+discard 0x80000 0x10000
+aio_flush
+break write_aio A
+aio_write -P 80 0x86000 0x2000
+wait_break A
+aio_write -P 81 0x8a000 0xe000
+resume A
+aio_flush
+EOF
+
+# Sequential write, and the next cluster is compressed
+cat  <<EOF
+write    -P 109 0xa0000 0x1000
+write -c -P 110 0xb0000 0x10000
+aio_flush
+discard 0xa0000 0x10000
+aio_flush
+break write_aio A
+aio_write -P 100 0xa6000 0x2000
+wait_break A
+aio_write -P 101 0xaa000 0xe000
+resume A
+aio_flush
+EOF
+}
+
+overlay_io | $QEMU_IO blkdebug::$TEST_IMG | _filter_qemu_io |\
+	sed -e 's/bytes at offset [0-9]*/bytes at offset XXX/g'
+
+echo
+echo "== Verify image content =="
+
+function verify_io()
+{
+    echo read -P 0 0 0x10000
+
+    echo read -P 1  0x10000 0x2000
+    echo read -P 11 0x12000 0x2000
+    echo read -P 1  0x14000 0x4000
+    echo read -P 10 0x18000 0x2000
+    echo read -P 1  0x1a000 0x2000
+    echo read -P 12 0x1c000 0x2000
+    echo read -P 1  0x1e000 0x2000
+
+    echo read -P 2  0x20000 0x8000
+    echo read -P 20 0x28000 0x2000
+    echo read -P 21 0x2a000 0x10000
+    echo read -P 3  0x3a000 0x6000
+
+    echo read -P 4  0x40000 0x8000
+    echo read -P 40 0x48000 0x2000
+    echo read -P 4  0x4a000 0x2000
+    echo read -P 41 0x4c000 0x10000
+    echo read -P 5  0x5c000 0x4000
+
+    echo read -P 6  0x60000 0x6000
+    echo read -P 60 0x66000 0x2000
+    echo read -P 6  0x68000 0x2000
+    echo read -P 61 0x6a000 0xe000
+    echo read -P 70 0x78000 0x6000
+    echo read -P 7  0x7e000 0x2000
+
+    echo read -P 8  0x80000 0x6000
+    echo read -P 80 0x86000 0x2000
+    echo read -P 8  0x88000 0x2000
+    echo read -P 81 0x8a000 0xe000
+    echo read -P 90 0x98000 0x6000
+    echo read -P 9  0x9e000 0x2000
+
+    echo read -P 10  0xa0000 0x6000
+    echo read -P 100 0xa6000 0x2000
+    echo read -P 10  0xa8000 0x2000
+    echo read -P 101 0xaa000 0xe000
+    echo read -P 110 0xb8000 0x8000
+}
+
+verify_io | $QEMU_IO $TEST_IMG | _filter_qemu_io
+
+_check_test_img
+
+# success, all done
+echo "*** done"
+rm -f $seq.full
+status=0
diff --git a/tests/qemu-iotests/046.out b/tests/qemu-iotests/046.out
new file mode 100644
index 0000000..565360f
--- /dev/null
+++ b/tests/qemu-iotests/046.out
@@ -0,0 +1,163 @@
+QA output created by 046
+
+== creating backing file for COW tests ==
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 
+qemu-io> wrote 65536/65536 bytes at offset 0
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> wrote 65536/65536 bytes at offset 65536
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> wrote 65536/65536 bytes at offset 131072
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> wrote 65536/65536 bytes at offset 196608
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> wrote 65536/65536 bytes at offset 262144
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> wrote 65536/65536 bytes at offset 327680
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> wrote 65536/65536 bytes at offset 393216
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> wrote 65536/65536 bytes at offset 458752
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> wrote 65536/65536 bytes at offset 524288
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> wrote 65536/65536 bytes at offset 589824
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> wrote 65536/65536 bytes at offset 655360
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> wrote 65536/65536 bytes at offset 720896
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> wrote 65536/65536 bytes at offset 786432
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> wrote 65536/65536 bytes at offset 851968
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> wrote 65536/65536 bytes at offset 917504
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> wrote 65536/65536 bytes at offset 983040
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=6442450944 backing_file='TEST_DIR/t.IMGFMT.base' 
+
+== Some concurrent requests touching the same cluster ==
+qemu-io> qemu-io> qemu-io> blkdebug: Suspended request 'A'
+qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> blkdebug: Resuming request 'A'
+qemu-io> wrote 8192/8192 bytes at offset XXX
+8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 8192/8192 bytes at offset XXX
+8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 8192/8192 bytes at offset XXX
+8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> qemu-io> blkdebug: Suspended request 'A'
+qemu-io> qemu-io> qemu-io> blkdebug: Resuming request 'A'
+qemu-io> wrote 8192/8192 bytes at offset XXX
+8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 65536/65536 bytes at offset XXX
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> qemu-io> blkdebug: Suspended request 'A'
+qemu-io> qemu-io> qemu-io> blkdebug: Resuming request 'A'
+qemu-io> wrote 8192/8192 bytes at offset XXX
+8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 65536/65536 bytes at offset XXX
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> wrote 32768/32768 bytes at offset XXX
+32 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> qemu-io> qemu-io> blkdebug: Suspended request 'A'
+qemu-io> qemu-io> qemu-io> blkdebug: Resuming request 'A'
+qemu-io> wrote 8192/8192 bytes at offset XXX
+8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 57344/57344 bytes at offset XXX
+56 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> wrote 4096/4096 bytes at offset XXX
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> wrote 32768/32768 bytes at offset XXX
+32 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> qemu-io> discard 65536/65536 bytes at offset XXX
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> qemu-io> qemu-io> blkdebug: Suspended request 'A'
+qemu-io> qemu-io> qemu-io> blkdebug: Resuming request 'A'
+qemu-io> wrote 8192/8192 bytes at offset XXX
+8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 57344/57344 bytes at offset XXX
+56 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> wrote 4096/4096 bytes at offset XXX
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> wrote 65536/65536 bytes at offset XXX
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> qemu-io> discard 65536/65536 bytes at offset XXX
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> qemu-io> qemu-io> blkdebug: Suspended request 'A'
+qemu-io> qemu-io> qemu-io> blkdebug: Resuming request 'A'
+qemu-io> wrote 8192/8192 bytes at offset XXX
+8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 57344/57344 bytes at offset XXX
+56 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> 
+== Verify image content ==
+qemu-io> read 65536/65536 bytes at offset 0
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> read 8192/8192 bytes at offset 65536
+8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> read 8192/8192 bytes at offset 73728
+8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> read 16384/16384 bytes at offset 81920
+16 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> read 8192/8192 bytes at offset 98304
+8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> read 8192/8192 bytes at offset 106496
+8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> read 8192/8192 bytes at offset 114688
+8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> read 8192/8192 bytes at offset 122880
+8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> read 32768/32768 bytes at offset 131072
+32 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> read 8192/8192 bytes at offset 163840
+8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> read 65536/65536 bytes at offset 172032
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> read 24576/24576 bytes at offset 237568
+24 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> read 32768/32768 bytes at offset 262144
+32 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> read 8192/8192 bytes at offset 294912
+8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> read 8192/8192 bytes at offset 303104
+8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> read 65536/65536 bytes at offset 311296
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> read 16384/16384 bytes at offset 376832
+16 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> read 24576/24576 bytes at offset 393216
+24 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> read 8192/8192 bytes at offset 417792
+8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> read 8192/8192 bytes at offset 425984
+8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> read 57344/57344 bytes at offset 434176
+56 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> read 24576/24576 bytes at offset 491520
+24 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> read 8192/8192 bytes at offset 516096
+8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> read 24576/24576 bytes at offset 524288
+24 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> read 8192/8192 bytes at offset 548864
+8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> read 8192/8192 bytes at offset 557056
+8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> read 57344/57344 bytes at offset 565248
+56 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> read 24576/24576 bytes at offset 622592
+24 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> read 8192/8192 bytes at offset 647168
+8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> read 24576/24576 bytes at offset 655360
+24 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> read 8192/8192 bytes at offset 679936
+8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> read 8192/8192 bytes at offset 688128
+8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> read 57344/57344 bytes at offset 696320
+56 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> read 32768/32768 bytes at offset 753664
+32 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> No errors were found on the image.
+*** done
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index 5b39785..a0307de 100644
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -52,3 +52,4 @@
 043 rw auto backing
 044 rw auto
 045 rw auto
+046 rw auto aio
commit 67a7a0ebe5ef0f337d5f7e7e618b08c562a55da0
Author: Kevin Wolf <kwolf at redhat.com>
Date:   Thu Dec 6 14:32:59 2012 +0100

    qcow2: Move BLKDBG_EVENT out of the lock
    
    We want to use these events to suspend requests for testing concurrent
    AIO requests. Suspending requests while they are holding the CoMutex is
    rather boring for this purpose.
    
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/block/qcow2.c b/block/qcow2.c
index c1ff31f..0a08ec7 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -835,8 +835,8 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
                 cur_nr_sectors * 512);
         }
 
-        BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
         qemu_co_mutex_unlock(&s->lock);
+        BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
         trace_qcow2_writev_data(qemu_coroutine_self(),
                                 (cluster_offset >> 9) + index_in_cluster);
         ret = bdrv_co_writev(bs->file,
commit 41c695c749b84d40e53e64faadedc0392aaea07e
Author: Kevin Wolf <kwolf at redhat.com>
Date:   Thu Dec 6 14:32:58 2012 +0100

    qemu-io: Add AIO debugging commands
    
    This makes the blkdebug suspend/resume functionality available in
    qemu-io. Use it like this:
    
      $ ./qemu-io blkdebug::/tmp/test.qcow2
      qemu-io> break write_aio req_a
      qemu-io> aio_write 0 4k
      qemu-io> blkdebug: Suspended request 'req_a'
      qemu-io> resume req_a
      blkdebug: Resuming request 'req_a'
      qemu-io> wrote 4096/4096 bytes at offset 0
      4 KiB, 1 ops; 0:00:30.71 (133.359788 bytes/sec and 0.0326 ops/sec)
    
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/block.c b/block.c
index b3faf3a..0668c4b 100644
--- a/block.c
+++ b/block.c
@@ -3045,7 +3045,46 @@ void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
     }
 
     drv->bdrv_debug_event(bs, event);
+}
+
+int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
+                          const char *tag)
+{
+    while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
+        bs = bs->file;
+    }
+
+    if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
+        return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
+    }
+
+    return -ENOTSUP;
+}
+
+int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
+{
+    while (bs && bs->drv && !bs->drv->bdrv_debug_resume) {
+        bs = bs->file;
+    }
 
+    if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
+        return bs->drv->bdrv_debug_resume(bs, tag);
+    }
+
+    return -ENOTSUP;
+}
+
+bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
+{
+    while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
+        bs = bs->file;
+    }
+
+    if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
+        return bs->drv->bdrv_debug_is_suspended(bs, tag);
+    }
+
+    return false;
 }
 
 /**************************************************************/
diff --git a/block.h b/block.h
index 24bea09..893448a 100644
--- a/block.h
+++ b/block.h
@@ -431,4 +431,9 @@ typedef enum {
 #define BLKDBG_EVENT(bs, evt) bdrv_debug_event(bs, evt)
 void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event);
 
+int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
+                           const char *tag);
+int bdrv_debug_resume(BlockDriverState *bs, const char *tag);
+bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag);
+
 #endif
diff --git a/block_int.h b/block_int.h
index 9deedb8..bf3f79b 100644
--- a/block_int.h
+++ b/block_int.h
@@ -190,6 +190,12 @@ struct BlockDriver {
 
     void (*bdrv_debug_event)(BlockDriverState *bs, BlkDebugEvent event);
 
+    /* TODO Better pass a option string/QDict/QemuOpts to add any rule? */
+    int (*bdrv_debug_breakpoint)(BlockDriverState *bs, const char *event,
+        const char *tag);
+    int (*bdrv_debug_resume)(BlockDriverState *bs, const char *tag);
+    bool (*bdrv_debug_is_suspended)(BlockDriverState *bs, const char *tag);
+
     /*
      * Returns 1 if newly created images are guaranteed to contain only
      * zeros, 0 otherwise.
diff --git a/qemu-io.c b/qemu-io.c
index b4b0898..1637773 100644
--- a/qemu-io.c
+++ b/qemu-io.c
@@ -1671,6 +1671,67 @@ static const cmdinfo_t map_cmd = {
        .oneline        = "prints the allocated areas of a file",
 };
 
+static int break_f(int argc, char **argv)
+{
+    int ret;
+
+    ret = bdrv_debug_breakpoint(bs, argv[1], argv[2]);
+    if (ret < 0) {
+        printf("Could not set breakpoint: %s\n", strerror(-ret));
+    }
+
+    return 0;
+}
+
+static const cmdinfo_t break_cmd = {
+       .name           = "break",
+       .argmin         = 2,
+       .argmax         = 2,
+       .cfunc          = break_f,
+       .args           = "event tag",
+       .oneline        = "sets a breakpoint on event and tags the stopped "
+                         "request as tag",
+};
+
+static int resume_f(int argc, char **argv)
+{
+    int ret;
+
+    ret = bdrv_debug_resume(bs, argv[1]);
+    if (ret < 0) {
+        printf("Could not resume request: %s\n", strerror(-ret));
+    }
+
+    return 0;
+}
+
+static const cmdinfo_t resume_cmd = {
+       .name           = "resume",
+       .argmin         = 1,
+       .argmax         = 1,
+       .cfunc          = resume_f,
+       .args           = "tag",
+       .oneline        = "resumes the request tagged as tag",
+};
+
+static int wait_break_f(int argc, char **argv)
+{
+    while (!bdrv_debug_is_suspended(bs, argv[1])) {
+        qemu_aio_wait();
+    }
+
+    return 0;
+}
+
+static const cmdinfo_t wait_break_cmd = {
+       .name           = "wait_break",
+       .argmin         = 1,
+       .argmax         = 1,
+       .cfunc          = wait_break_f,
+       .args           = "tag",
+       .oneline        = "waits for the suspension of a request",
+};
+
 static int abort_f(int argc, char **argv)
 {
     abort();
@@ -1934,6 +1995,9 @@ int main(int argc, char **argv)
     add_command(&discard_cmd);
     add_command(&alloc_cmd);
     add_command(&map_cmd);
+    add_command(&break_cmd);
+    add_command(&resume_cmd);
+    add_command(&wait_break_cmd);
     add_command(&abort_cmd);
 
     add_args_command(init_args_command);
commit 3c90c65d7adab49a41952ee14e1d65f81355e408
Author: Kevin Wolf <kwolf at redhat.com>
Date:   Thu Dec 6 14:32:57 2012 +0100

    blkdebug: Implement suspend/resume of AIO requests
    
    This allows more systematic AIO testing. The patch adds three new
    operations to blkdebug:
    
     * Setting a "breakpoint" on a blkdebug event. The next request that
       triggers this breakpoint is suspended and is tagged with a name.
       The breakpoint is removed after a request has triggered it.
    
     * A suspended request (identified by it's tag) can be resumed
    
     * It's possible to check whether a suspended request with a given
       tag exists. This can be used for waiting for an event.
    
    Ideally, we would instead tag requests right when they are created and
    set breakpoints for individual requests. However, at this point the
    block layer doesn't allow this easily, and breakpoints that trigger for
    any request already allow a lot of useful testing.
    
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/block/blkdebug.c b/block/blkdebug.c
index 859792b..294e983 100644
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -29,8 +29,10 @@
 typedef struct BDRVBlkdebugState {
     int state;
     int new_state;
+
     QLIST_HEAD(, BlkdebugRule) rules[BLKDBG_EVENT_MAX];
     QSIMPLEQ_HEAD(, BlkdebugRule) active_rules;
+    QLIST_HEAD(, BlkdebugSuspendedReq) suspended_reqs;
 } BDRVBlkdebugState;
 
 typedef struct BlkdebugAIOCB {
@@ -39,6 +41,12 @@ typedef struct BlkdebugAIOCB {
     int ret;
 } BlkdebugAIOCB;
 
+typedef struct BlkdebugSuspendedReq {
+    Coroutine *co;
+    char *tag;
+    QLIST_ENTRY(BlkdebugSuspendedReq) next;
+} BlkdebugSuspendedReq;
+
 static void blkdebug_aio_cancel(BlockDriverAIOCB *blockacb);
 
 static const AIOCBInfo blkdebug_aiocb_info = {
@@ -49,6 +57,7 @@ static const AIOCBInfo blkdebug_aiocb_info = {
 enum {
     ACTION_INJECT_ERROR,
     ACTION_SET_STATE,
+    ACTION_SUSPEND,
 };
 
 typedef struct BlkdebugRule {
@@ -65,6 +74,9 @@ typedef struct BlkdebugRule {
         struct {
             int new_state;
         } set_state;
+        struct {
+            char *tag;
+        } suspend;
     } options;
     QLIST_ENTRY(BlkdebugRule) next;
     QSIMPLEQ_ENTRY(BlkdebugRule) active_next;
@@ -226,6 +238,11 @@ static int add_rule(QemuOpts *opts, void *opaque)
         rule->options.set_state.new_state =
             qemu_opt_get_number(opts, "new_state", 0);
         break;
+
+    case ACTION_SUSPEND:
+        rule->options.suspend.tag =
+            g_strdup(qemu_opt_get(opts, "tag"));
+        break;
     };
 
     /* Add the rule */
@@ -240,6 +257,9 @@ static void remove_rule(BlkdebugRule *rule)
     case ACTION_INJECT_ERROR:
     case ACTION_SET_STATE:
         break;
+    case ACTION_SUSPEND:
+        g_free(rule->options.suspend.tag);
+        break;
     }
 
     QLIST_REMOVE(rule, next);
@@ -406,6 +426,7 @@ static BlockDriverAIOCB *blkdebug_aio_writev(BlockDriverState *bs,
     return bdrv_aio_writev(bs->file, sector_num, qiov, nb_sectors, cb, opaque);
 }
 
+
 static void blkdebug_close(BlockDriverState *bs)
 {
     BDRVBlkdebugState *s = bs->opaque;
@@ -419,6 +440,27 @@ static void blkdebug_close(BlockDriverState *bs)
     }
 }
 
+static void suspend_request(BlockDriverState *bs, BlkdebugRule *rule)
+{
+    BDRVBlkdebugState *s = bs->opaque;
+    BlkdebugSuspendedReq r;
+
+    r = (BlkdebugSuspendedReq) {
+        .co         = qemu_coroutine_self(),
+        .tag        = g_strdup(rule->options.suspend.tag),
+    };
+
+    remove_rule(rule);
+    QLIST_INSERT_HEAD(&s->suspended_reqs, &r, next);
+
+    printf("blkdebug: Suspended request '%s'\n", r.tag);
+    qemu_coroutine_yield();
+    printf("blkdebug: Resuming request '%s'\n", r.tag);
+
+    QLIST_REMOVE(&r, next);
+    g_free(r.tag);
+}
+
 static bool process_rule(BlockDriverState *bs, struct BlkdebugRule *rule,
     bool injected)
 {
@@ -442,6 +484,10 @@ static bool process_rule(BlockDriverState *bs, struct BlkdebugRule *rule,
     case ACTION_SET_STATE:
         s->new_state = rule->options.set_state.new_state;
         break;
+
+    case ACTION_SUSPEND:
+        suspend_request(bs, rule);
+        break;
     }
     return injected;
 }
@@ -449,19 +495,72 @@ static bool process_rule(BlockDriverState *bs, struct BlkdebugRule *rule,
 static void blkdebug_debug_event(BlockDriverState *bs, BlkDebugEvent event)
 {
     BDRVBlkdebugState *s = bs->opaque;
-    struct BlkdebugRule *rule;
+    struct BlkdebugRule *rule, *next;
     bool injected;
 
     assert((int)event >= 0 && event < BLKDBG_EVENT_MAX);
 
     injected = false;
     s->new_state = s->state;
-    QLIST_FOREACH(rule, &s->rules[event], next) {
+    QLIST_FOREACH_SAFE(rule, &s->rules[event], next, next) {
         injected = process_rule(bs, rule, injected);
     }
     s->state = s->new_state;
 }
 
+static int blkdebug_debug_breakpoint(BlockDriverState *bs, const char *event,
+                                     const char *tag)
+{
+    BDRVBlkdebugState *s = bs->opaque;
+    struct BlkdebugRule *rule;
+    BlkDebugEvent blkdebug_event;
+
+    if (get_event_by_name(event, &blkdebug_event) < 0) {
+        return -ENOENT;
+    }
+
+
+    rule = g_malloc(sizeof(*rule));
+    *rule = (struct BlkdebugRule) {
+        .event  = blkdebug_event,
+        .action = ACTION_SUSPEND,
+        .state  = 0,
+        .options.suspend.tag = g_strdup(tag),
+    };
+
+    QLIST_INSERT_HEAD(&s->rules[blkdebug_event], rule, next);
+
+    return 0;
+}
+
+static int blkdebug_debug_resume(BlockDriverState *bs, const char *tag)
+{
+    BDRVBlkdebugState *s = bs->opaque;
+    BlkdebugSuspendedReq *r;
+
+    QLIST_FOREACH(r, &s->suspended_reqs, next) {
+        if (!strcmp(r->tag, tag)) {
+            qemu_coroutine_enter(r->co, NULL);
+            return 0;
+        }
+    }
+    return -ENOENT;
+}
+
+
+static bool blkdebug_debug_is_suspended(BlockDriverState *bs, const char *tag)
+{
+    BDRVBlkdebugState *s = bs->opaque;
+    BlkdebugSuspendedReq *r;
+
+    QLIST_FOREACH(r, &s->suspended_reqs, next) {
+        if (!strcmp(r->tag, tag)) {
+            return true;
+        }
+    }
+    return false;
+}
+
 static int64_t blkdebug_getlength(BlockDriverState *bs)
 {
     return bdrv_getlength(bs->file);
@@ -480,7 +579,10 @@ static BlockDriver bdrv_blkdebug = {
     .bdrv_aio_readv     = blkdebug_aio_readv,
     .bdrv_aio_writev    = blkdebug_aio_writev,
 
-    .bdrv_debug_event   = blkdebug_debug_event,
+    .bdrv_debug_event           = blkdebug_debug_event,
+    .bdrv_debug_breakpoint      = blkdebug_debug_breakpoint,
+    .bdrv_debug_resume          = blkdebug_debug_resume,
+    .bdrv_debug_is_suspended    = blkdebug_debug_is_suspended,
 };
 
 static void bdrv_blkdebug_init(void)
commit 9e35542b0fc3871caac15ccd57548b99df2c94b7
Author: Kevin Wolf <kwolf at redhat.com>
Date:   Thu Dec 6 14:32:56 2012 +0100

    blkdebug: Factor out remove_rule()
    
    The cleanup work to remove a rule depends on the type of the rule. It's
    easy for the existing rules as there is no data that must be cleaned up
    and is specific to a type yet, but the next patch will change this.
    
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/block/blkdebug.c b/block/blkdebug.c
index c9041ec..859792b 100644
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -234,6 +234,18 @@ static int add_rule(QemuOpts *opts, void *opaque)
     return 0;
 }
 
+static void remove_rule(BlkdebugRule *rule)
+{
+    switch (rule->action) {
+    case ACTION_INJECT_ERROR:
+    case ACTION_SET_STATE:
+        break;
+    }
+
+    QLIST_REMOVE(rule, next);
+    g_free(rule);
+}
+
 static int read_config(BDRVBlkdebugState *s, const char *filename)
 {
     FILE *f;
@@ -402,8 +414,7 @@ static void blkdebug_close(BlockDriverState *bs)
 
     for (i = 0; i < BLKDBG_EVENT_MAX; i++) {
         QLIST_FOREACH_SAFE(rule, &s->rules[i], next, next) {
-            QLIST_REMOVE(rule, next);
-            g_free(rule);
+            remove_rule(rule);
         }
     }
 }
commit 312a2ba0eb8ab19646517aeaa785475d3fbcfd51
Author: Kevin Wolf <kwolf at redhat.com>
Date:   Thu Dec 6 14:32:55 2012 +0100

    blkdebug: Allow usage without config file
    
    As soon as new rules can be set during runtime, as introduced by the
    next patch, blkdebug makes sense even without a config file.
    
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/block/blkdebug.c b/block/blkdebug.c
index d61ece8..c9041ec 100644
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -240,6 +240,11 @@ static int read_config(BDRVBlkdebugState *s, const char *filename)
     int ret;
     struct add_rule_data d;
 
+    /* Allow usage without config file */
+    if (!*filename) {
+        return 0;
+    }
+
     f = fopen(filename, "r");
     if (f == NULL) {
         return -errno;
commit b83c18e225cf82a21535561270b6dfd86b1c9031
Author: Dong Xu Wang <wdongxu at linux.vnet.ibm.com>
Date:   Thu Dec 6 14:47:23 2012 +0800

    create new function: qemu_opt_set_number
    
    Signed-off-by: Dong Xu Wang <wdongxu at linux.vnet.ibm.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/qemu-option.c b/qemu-option.c
index 1303188..94557cf 100644
--- a/qemu-option.c
+++ b/qemu-option.c
@@ -695,6 +695,28 @@ int qemu_opt_set_bool(QemuOpts *opts, const char *name, bool val)
     return 0;
 }
 
+int qemu_opt_set_number(QemuOpts *opts, const char *name, int64_t val)
+{
+    QemuOpt *opt;
+    const QemuOptDesc *desc = opts->list->desc;
+
+    opt = g_malloc0(sizeof(*opt));
+    opt->desc = find_desc_by_name(desc, name);
+    if (!opt->desc && !opts_accepts_any(opts)) {
+        qerror_report(QERR_INVALID_PARAMETER, name);
+        g_free(opt);
+        return -1;
+    }
+
+    opt->name = g_strdup(name);
+    opt->opts = opts;
+    opt->value.uint = val;
+    opt->str = g_strdup_printf("%" PRId64, val);
+    QTAILQ_INSERT_TAIL(&opts->head, opt, next);
+
+    return 0;
+}
+
 int qemu_opt_foreach(QemuOpts *opts, qemu_opt_loopfunc func, void *opaque,
                      int abort_on_failure)
 {
diff --git a/qemu-option.h b/qemu-option.h
index b0f8d1e..002dd07 100644
--- a/qemu-option.h
+++ b/qemu-option.h
@@ -126,6 +126,7 @@ int qemu_opt_set(QemuOpts *opts, const char *name, const char *value);
 void qemu_opt_set_err(QemuOpts *opts, const char *name, const char *value,
                       Error **errp);
 int qemu_opt_set_bool(QemuOpts *opts, const char *name, bool val);
+int qemu_opt_set_number(QemuOpts *opts, const char *name, int64_t val);
 typedef int (*qemu_opt_loopfunc)(const char *name, const char *value, void *opaque);
 int qemu_opt_foreach(QemuOpts *opts, qemu_opt_loopfunc func, void *opaque,
                      int abort_on_failure);
commit e478b448d7c36046462733ffaeaea0961575790a
Author: Dong Xu Wang <wdongxu at linux.vnet.ibm.com>
Date:   Thu Dec 6 14:47:22 2012 +0800

    use qemu_opts_create_nofail
    
    We will use qemu_opts_create_nofail function, it can make code
    more readable.
    
    Signed-off-by: Dong Xu Wang <wdongxu at linux.vnet.ibm.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/blockdev.c b/blockdev.c
index 463f4c2..9a05e57 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -568,7 +568,7 @@ DriveInfo *drive_init(QemuOpts *opts, BlockInterfaceType block_default_type)
         break;
     case IF_VIRTIO:
         /* add virtio block device */
-        opts = qemu_opts_create(qemu_find_opts("device"), NULL, 0, NULL);
+        opts = qemu_opts_create_nofail(qemu_find_opts("device"));
         if (arch_type == QEMU_ARCH_S390X) {
             qemu_opt_set(opts, "driver", "virtio-blk-s390");
         } else {
diff --git a/hw/watchdog.c b/hw/watchdog.c
index b52aced..5c82c17 100644
--- a/hw/watchdog.c
+++ b/hw/watchdog.c
@@ -66,7 +66,7 @@ int select_watchdog(const char *p)
     QLIST_FOREACH(model, &watchdog_list, entry) {
         if (strcasecmp(model->wdt_name, p) == 0) {
             /* add the device */
-            opts = qemu_opts_create(qemu_find_opts("device"), NULL, 0, NULL);
+            opts = qemu_opts_create_nofail(qemu_find_opts("device"));
             qemu_opt_set(opts, "driver", p);
             return 0;
         }
diff --git a/qemu-config.c b/qemu-config.c
index aa78fb9..54db981 100644
--- a/qemu-config.c
+++ b/qemu-config.c
@@ -756,7 +756,7 @@ int qemu_global_option(const char *str)
         return -1;
     }
 
-    opts = qemu_opts_create(&qemu_global_opts, NULL, 0, NULL);
+    opts = qemu_opts_create_nofail(&qemu_global_opts);
     qemu_opt_set(opts, "driver", driver);
     qemu_opt_set(opts, "property", property);
     qemu_opt_set(opts, "value", str+offset+1);
@@ -843,7 +843,7 @@ int qemu_config_parse(FILE *fp, QemuOptsList **lists, const char *fname)
                 error_free(local_err);
                 goto out;
             }
-            opts = qemu_opts_create(list, NULL, 0, NULL);
+            opts = qemu_opts_create_nofail(list);
             continue;
         }
         if (sscanf(line, " %63s = \"%1023[^\"]\"", arg, value) == 2) {
diff --git a/qemu-img.c b/qemu-img.c
index c4dae88..c989a52 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -1934,7 +1934,7 @@ static int img_resize(int argc, char **argv)
     }
 
     /* Parse size */
-    param = qemu_opts_create(&resize_options, NULL, 0, NULL);
+    param = qemu_opts_create_nofail(&resize_options);
     if (qemu_opt_set(param, BLOCK_OPT_SIZE, size)) {
         /* Error message already printed when size parsing fails */
         ret = -1;
diff --git a/qemu-sockets.c b/qemu-sockets.c
index d314cf1..c52a40a 100644
--- a/qemu-sockets.c
+++ b/qemu-sockets.c
@@ -579,7 +579,7 @@ int inet_listen(const char *str, char *ostr, int olen,
 
     addr = inet_parse(str, errp);
     if (addr != NULL) {
-        opts = qemu_opts_create(&dummy_opts, NULL, 0, NULL);
+        opts = qemu_opts_create_nofail(&dummy_opts);
         inet_addr_to_opts(opts, addr);
         qapi_free_InetSocketAddress(addr);
         sock = inet_listen_opts(opts, port_offset, errp);
@@ -618,7 +618,7 @@ int inet_connect(const char *str, Error **errp)
 
     addr = inet_parse(str, errp);
     if (addr != NULL) {
-        opts = qemu_opts_create(&dummy_opts, NULL, 0, NULL);
+        opts = qemu_opts_create_nofail(&dummy_opts);
         inet_addr_to_opts(opts, addr);
         qapi_free_InetSocketAddress(addr);
         sock = inet_connect_opts(opts, errp, NULL, NULL);
@@ -652,7 +652,7 @@ int inet_nonblocking_connect(const char *str,
 
     addr = inet_parse(str, errp);
     if (addr != NULL) {
-        opts = qemu_opts_create(&dummy_opts, NULL, 0, NULL);
+        opts = qemu_opts_create_nofail(&dummy_opts);
         inet_addr_to_opts(opts, addr);
         qapi_free_InetSocketAddress(addr);
         sock = inet_connect_opts(opts, errp, callback, opaque);
@@ -795,7 +795,7 @@ int unix_listen(const char *str, char *ostr, int olen, Error **errp)
     char *path, *optstr;
     int sock, len;
 
-    opts = qemu_opts_create(&dummy_opts, NULL, 0, NULL);
+    opts = qemu_opts_create_nofail(&dummy_opts);
 
     optstr = strchr(str, ',');
     if (optstr) {
@@ -823,7 +823,7 @@ int unix_connect(const char *path, Error **errp)
     QemuOpts *opts;
     int sock;
 
-    opts = qemu_opts_create(&dummy_opts, NULL, 0, NULL);
+    opts = qemu_opts_create_nofail(&dummy_opts);
     qemu_opt_set(opts, "path", path);
     sock = unix_connect_opts(opts, errp, NULL, NULL);
     qemu_opts_del(opts);
@@ -840,7 +840,7 @@ int unix_nonblocking_connect(const char *path,
 
     g_assert(callback != NULL);
 
-    opts = qemu_opts_create(&dummy_opts, NULL, 0, NULL);
+    opts = qemu_opts_create_nofail(&dummy_opts);
     qemu_opt_set(opts, "path", path);
     sock = unix_connect_opts(opts, errp, callback, opaque);
     qemu_opts_del(opts);
@@ -891,7 +891,7 @@ int socket_connect(SocketAddress *addr, Error **errp,
     QemuOpts *opts;
     int fd;
 
-    opts = qemu_opts_create(&dummy_opts, NULL, 0, NULL);
+    opts = qemu_opts_create_nofail(&dummy_opts);
     switch (addr->kind) {
     case SOCKET_ADDRESS_KIND_INET:
         inet_addr_to_opts(opts, addr->inet);
@@ -922,7 +922,7 @@ int socket_listen(SocketAddress *addr, Error **errp)
     QemuOpts *opts;
     int fd;
 
-    opts = qemu_opts_create(&dummy_opts, NULL, 0, NULL);
+    opts = qemu_opts_create_nofail(&dummy_opts);
     switch (addr->kind) {
     case SOCKET_ADDRESS_KIND_INET:
         inet_addr_to_opts(opts, addr->inet);
diff --git a/vl.c b/vl.c
index 6b3827c..3ebf01f 100644
--- a/vl.c
+++ b/vl.c
@@ -1996,7 +1996,7 @@ static int balloon_parse(const char *arg)
                 return  -1;
         } else {
             /* create empty opts */
-            opts = qemu_opts_create(qemu_find_opts("device"), NULL, 0, NULL);
+            opts = qemu_opts_create_nofail(qemu_find_opts("device"));
         }
         qemu_opt_set(opts, "driver", "virtio-balloon");
         return 0;
@@ -2246,14 +2246,14 @@ static int virtcon_parse(const char *devname)
         exit(1);
     }
 
-    bus_opts = qemu_opts_create(device, NULL, 0, NULL);
+    bus_opts = qemu_opts_create_nofail(device);
     if (arch_type == QEMU_ARCH_S390X) {
         qemu_opt_set(bus_opts, "driver", "virtio-serial-s390");
     } else {
         qemu_opt_set(bus_opts, "driver", "virtio-serial-pci");
     } 
 
-    dev_opts = qemu_opts_create(device, NULL, 0, NULL);
+    dev_opts = qemu_opts_create_nofail(device);
     qemu_opt_set(dev_opts, "driver", "virtconsole");
 
     snprintf(label, sizeof(label), "virtcon%d", index);
@@ -3105,8 +3105,7 @@ int main(int argc, char **argv, char **envp)
 
                 qemu_opt_set_bool(fsdev, "readonly",
                                 qemu_opt_get_bool(opts, "readonly", 0));
-                device = qemu_opts_create(qemu_find_opts("device"), NULL, 0,
-                                          NULL);
+                device = qemu_opts_create_nofail(qemu_find_opts("device"));
                 qemu_opt_set(device, "driver", "virtio-9p-pci");
                 qemu_opt_set(device, "fsdev",
                              qemu_opt_get(opts, "mount_tag"));
@@ -3126,8 +3125,7 @@ int main(int argc, char **argv, char **envp)
                 }
                 qemu_opt_set(fsdev, "fsdriver", "synth");
 
-                device = qemu_opts_create(qemu_find_opts("device"), NULL, 0,
-                                          NULL);
+                device = qemu_opts_create_nofail(qemu_find_opts("device"));
                 qemu_opt_set(device, "driver", "virtio-9p-pci");
                 qemu_opt_set(device, "fsdev", "v_synth");
                 qemu_opt_set(device, "mount_tag", "v_synth");
commit dd39244978627e41a66b98d20eceddb1d7d25def
Author: Dong Xu Wang <wdongxu at linux.vnet.ibm.com>
Date:   Thu Dec 6 14:47:21 2012 +0800

    introduce qemu_opts_create_nofail function
    
    While id is NULL, qemu_opts_create can not fail, so ignore
    errors is fine.
    
    Signed-off-by: Dong Xu Wang <wdongxu at linux.vnet.ibm.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/qemu-option.c b/qemu-option.c
index e0131ce..1303188 100644
--- a/qemu-option.c
+++ b/qemu-option.c
@@ -780,6 +780,15 @@ QemuOpts *qemu_opts_create(QemuOptsList *list, const char *id,
     return opts;
 }
 
+QemuOpts *qemu_opts_create_nofail(QemuOptsList *list)
+{
+    QemuOpts *opts;
+    Error *errp = NULL;
+    opts = qemu_opts_create(list, NULL, 0, &errp);
+    assert_no_error(errp);
+    return opts;
+}
+
 void qemu_opts_reset(QemuOptsList *list)
 {
     QemuOpts *opts, *next_opts;
diff --git a/qemu-option.h b/qemu-option.h
index ca72986..b0f8d1e 100644
--- a/qemu-option.h
+++ b/qemu-option.h
@@ -133,6 +133,7 @@ int qemu_opt_foreach(QemuOpts *opts, qemu_opt_loopfunc func, void *opaque,
 QemuOpts *qemu_opts_find(QemuOptsList *list, const char *id);
 QemuOpts *qemu_opts_create(QemuOptsList *list, const char *id,
                            int fail_if_exists, Error **errp);
+QemuOpts *qemu_opts_create_nofail(QemuOptsList *list);
 void qemu_opts_reset(QemuOptsList *list);
 void qemu_opts_loc_restore(QemuOpts *opts);
 int qemu_opts_set(QemuOptsList *list, const char *id,
commit ad718d01ba0af531d10b0a8685cf5047edfd1891
Author: Dong Xu Wang <wdongxu at linux.vnet.ibm.com>
Date:   Thu Dec 6 14:47:20 2012 +0800

    qemu-option: qemu_opt_set_bool(): fix code duplication
    
    It will set opt->str in qemu_opt_set_bool, without opt->str, there
    will be some potential bugs.
    
    These are uses of opt->str, and what happens when it isn't set:
    
    * qemu_opt_get(): returns NULL, which means "not set".  Bug can bite
      when value isn't the default value.
    
    * qemu_opt_parse(): passes NULL to parse_option_bool(), which treats it
      like "on".  Wrong if the value is actually false.  Bug can bite when
      qemu_opts_validate() runs after qemu_opt_set_bool().
    
    * qemu_opt_del(): passes NULL to g_free(), which is just fine.
    
    * qemu_opt_foreach(): passes NULL to the callback, which is unlikely to
      be prepared for it.
    
    * qemu_opts_print(): prints NULL, which crashes on some systems.
    
    * qemu_opts_to_qdict(): passes NULL to qstring_from_str(), which
      crashes.
    
    It also makes qemu_opt_set_bool more readable by using find_desc_by_name
    and opts_accepts_any.
    
    It is based on Luiz's patch and uses Markus's comments. Discussions can
    be found at:
    http://lists.nongnu.org/archive/html/qemu-devel/2012-07/msg02716.html
    
    Signed-off-by: Dong Xu Wang <wdongxu at linux.vnet.ibm.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/qemu-option.c b/qemu-option.c
index 74321bb..e0131ce 100644
--- a/qemu-option.c
+++ b/qemu-option.c
@@ -677,30 +677,21 @@ int qemu_opt_set_bool(QemuOpts *opts, const char *name, bool val)
 {
     QemuOpt *opt;
     const QemuOptDesc *desc = opts->list->desc;
-    int i;
 
-    for (i = 0; desc[i].name != NULL; i++) {
-        if (strcmp(desc[i].name, name) == 0) {
-            break;
-        }
-    }
-    if (desc[i].name == NULL) {
-        if (i == 0) {
-            /* empty list -> allow any */;
-        } else {
-            qerror_report(QERR_INVALID_PARAMETER, name);
-            return -1;
-        }
+    opt = g_malloc0(sizeof(*opt));
+    opt->desc = find_desc_by_name(desc, name);
+    if (!opt->desc && !opts_accepts_any(opts)) {
+        qerror_report(QERR_INVALID_PARAMETER, name);
+        g_free(opt);
+        return -1;
     }
 
-    opt = g_malloc0(sizeof(*opt));
     opt->name = g_strdup(name);
     opt->opts = opts;
-    QTAILQ_INSERT_TAIL(&opts->head, opt, next);
-    if (desc[i].name != NULL) {
-        opt->desc = desc+i;
-    }
     opt->value.boolean = !!val;
+    opt->str = g_strdup(val ? "on" : "off");
+    QTAILQ_INSERT_TAIL(&opts->head, opt, next);
+
     return 0;
 }
 
commit db97ceba1e17db59188e91b66e61bf84a6a71081
Author: Dong Xu Wang <wdongxu at linux.vnet.ibm.com>
Date:   Thu Dec 6 14:47:19 2012 +0800

    qemu-option: qemu_opts_validate(): fix duplicated code
    
    Use opts_accepts_any() and find_desc_by_name().
    
    Signed-off-by: Luiz Capitulino <lcapitulino at redhat.com>
    Signed-off-by: Dong Xu Wang <wdongxu at linux.vnet.ibm.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/qemu-option.c b/qemu-option.c
index 375daaa..74321bb 100644
--- a/qemu-option.c
+++ b/qemu-option.c
@@ -1076,23 +1076,15 @@ void qemu_opts_validate(QemuOpts *opts, const QemuOptDesc *desc, Error **errp)
     QemuOpt *opt;
     Error *local_err = NULL;
 
-    assert(opts->list->desc[0].name == NULL);
+    assert(opts_accepts_any(opts));
 
     QTAILQ_FOREACH(opt, &opts->head, next) {
-        int i;
-
-        for (i = 0; desc[i].name != NULL; i++) {
-            if (strcmp(desc[i].name, opt->name) == 0) {
-                break;
-            }
-        }
-        if (desc[i].name == NULL) {
+        opt->desc = find_desc_by_name(desc, opt->name);
+        if (!opt->desc) {
             error_set(errp, QERR_INVALID_PARAMETER, opt->name);
             return;
         }
 
-        opt->desc = &desc[i];
-
         qemu_opt_parse(opt, &local_err);
         if (error_is_set(&local_err)) {
             error_propagate(errp, local_err);
commit c474ced8fe6684265fbb6a3183eb0cbea561409f
Author: Dong Xu Wang <wdongxu at linux.vnet.ibm.com>
Date:   Thu Dec 6 14:47:18 2012 +0800

    qemu-option: opt_set(): split it up into more functions
    
    The new functions are opts_accepts_any() and find_desc_by_name(), which
    are also going to be used by qemu_opts_validate() (see next commit).
    
    This also makes opt_set() slightly more readable.
    
    Signed-off-by: Luiz Capitulino <lcapitulino at redhat.com>
    Signed-off-by: Dong Xu Wang <wdongxu at linux.vnet.ibm.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/qemu-option.c b/qemu-option.c
index 27891e7..375daaa 100644
--- a/qemu-option.c
+++ b/qemu-option.c
@@ -602,26 +602,36 @@ static void qemu_opt_del(QemuOpt *opt)
     g_free(opt);
 }
 
-static void opt_set(QemuOpts *opts, const char *name, const char *value,
-                    bool prepend, Error **errp)
+static bool opts_accepts_any(const QemuOpts *opts)
+{
+    return opts->list->desc[0].name == NULL;
+}
+
+static const QemuOptDesc *find_desc_by_name(const QemuOptDesc *desc,
+                                            const char *name)
 {
-    QemuOpt *opt;
-    const QemuOptDesc *desc = opts->list->desc;
-    Error *local_err = NULL;
     int i;
 
     for (i = 0; desc[i].name != NULL; i++) {
         if (strcmp(desc[i].name, name) == 0) {
-            break;
+            return &desc[i];
         }
     }
-    if (desc[i].name == NULL) {
-        if (i == 0) {
-            /* empty list -> allow any */;
-        } else {
-            error_set(errp, QERR_INVALID_PARAMETER, name);
-            return;
-        }
+
+    return NULL;
+}
+
+static void opt_set(QemuOpts *opts, const char *name, const char *value,
+                    bool prepend, Error **errp)
+{
+    QemuOpt *opt;
+    const QemuOptDesc *desc;
+    Error *local_err = NULL;
+
+    desc = find_desc_by_name(opts->list->desc, name);
+    if (!desc && !opts_accepts_any(opts)) {
+        error_set(errp, QERR_INVALID_PARAMETER, name);
+        return;
     }
 
     opt = g_malloc0(sizeof(*opt));
@@ -632,9 +642,7 @@ static void opt_set(QemuOpts *opts, const char *name, const char *value,
     } else {
         QTAILQ_INSERT_TAIL(&opts->head, opt, next);
     }
-    if (desc[i].name != NULL) {
-        opt->desc = desc+i;
-    }
+    opt->desc = desc;
     if (value) {
         opt->str = g_strdup(value);
     }
commit 97331270e50f5858c82a0c6d146da81f5b776535
Author: Jean-Christophe DUBOIS <jcd at tribudubois.net>
Date:   Mon Dec 3 12:55:57 2012 +0000

    exynos4210/mct: Avoid infinite loop on non incremental timers
    
    Check for a 0 "distance" value to avoid infinite loop when the
    expired FCR timer was not programed with auto-increment.
    
    With this change the behavior is coherent with the same type
    of code in the exynos4210_gfrc_restart() function in the same
    file.
    
    Linux seems to mostly use this timer with auto-increment
    which explain why it is not a problem most of the time.
    
    However other OS might have a problem with this if they
    don't use the auto-increment feature.
    
    Signed-off-by: Jean-Christophe DUBOIS <jcd at tribudubois.net>
    Reviewed-by: Evgeny Voevodin <e.voevodin at samsung.com>
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>

diff --git a/hw/exynos4210_mct.c b/hw/exynos4210_mct.c
index e79cd6a..37dbda9 100644
--- a/hw/exynos4210_mct.c
+++ b/hw/exynos4210_mct.c
@@ -568,7 +568,7 @@ static void exynos4210_gfrc_event(void *opaque)
     /* Reload FRC to reach nearest comparator */
     s->g_timer.curr_comp = exynos4210_gcomp_find(s);
     distance = exynos4210_gcomp_get_distance(s, s->g_timer.curr_comp);
-    if (distance > MCT_GT_COUNTER_STEP) {
+    if (distance > MCT_GT_COUNTER_STEP || !distance) {
         distance = MCT_GT_COUNTER_STEP;
     }
     exynos4210_gfrc_set_count(&s->g_timer, distance);
commit f47b48fb678581d6ee369cfe26b3513100b7d53e
Author: Daniel Sangorrin <dsl at ertl.jp>
Date:   Tue Dec 11 11:30:38 2012 +0000

    hw/arm_gic: fix target CPUs affected by set enable/pending ops
    
    Fix a bug on the ARM GIC model where interrupts are not
    set pending on the correct target CPUs when they are
    triggered by writes to the Interrupt Set Enable or
    Set Pending registers.
    
    Signed-off-by: Daniel Sangorrin <dsl at ertl.jp>
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>

diff --git a/hw/arm_gic.c b/hw/arm_gic.c
index 672d539..8d769de 100644
--- a/hw/arm_gic.c
+++ b/hw/arm_gic.c
@@ -374,7 +374,8 @@ static void gic_dist_writeb(void *opaque, hwaddr offset,
           value = 0xff;
         for (i = 0; i < 8; i++) {
             if (value & (1 << i)) {
-                int mask = (irq < GIC_INTERNAL) ? (1 << cpu) : GIC_TARGET(irq);
+                int mask =
+                    (irq < GIC_INTERNAL) ? (1 << cpu) : GIC_TARGET(irq + i);
                 int cm = (irq < GIC_INTERNAL) ? (1 << cpu) : ALL_CPU_MASK;
 
                 if (!GIC_TEST_ENABLED(irq + i, cm)) {
@@ -417,7 +418,7 @@ static void gic_dist_writeb(void *opaque, hwaddr offset,
 
         for (i = 0; i < 8; i++) {
             if (value & (1 << i)) {
-                GIC_SET_PENDING(irq + i, GIC_TARGET(irq));
+                GIC_SET_PENDING(irq + i, GIC_TARGET(irq + i));
             }
         }
     } else if (offset < 0x300) {
commit 79f5d67e9db35d53b478699393590392f7be03ac
Author: walimis <walimisdev at gmail.com>
Date:   Tue Dec 11 11:30:37 2012 +0000

    xilinx_zynq: Add one variable to avoid overwriting QSPI bus
    
    commit 7b482bcf xilinx_zynq: added QSPI controller
    
    Adds one QSPI controller, which has two spi buses, one is for
    spi0, and another is for spi1. But when initializing the spi1
    bus, "dev" has been overwrited by the ssi_create_slave_no_init() function,
    so that qdev_get_child_bus() returns NULL and the last two m25p80 flashes
    won't be attached to the spi1 bus, but to main-system-bus.
    
    Here we add one variable to avoid overwriting.
    
    Signed-off-by: Liming Wang <walimisdev at gmail.com>
    Reviewed-by: Peter Crosthwaite <peter.crosthwaite at xilinx.com>
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>

diff --git a/hw/xilinx_zynq.c b/hw/xilinx_zynq.c
index 1f12a3d..49233d8 100644
--- a/hw/xilinx_zynq.c
+++ b/hw/xilinx_zynq.c
@@ -57,6 +57,7 @@ static inline void zynq_init_spi_flashes(uint32_t base_addr, qemu_irq irq,
     DeviceState *dev;
     SysBusDevice *busdev;
     SSIBus *spi;
+    DeviceState *flash_dev;
     int i, j;
     int num_busses =  is_qspi ? NUM_QSPI_BUSSES : 1;
     int num_ss = is_qspi ? NUM_QSPI_FLASHES : NUM_SPI_FLASHES;
@@ -81,11 +82,11 @@ static inline void zynq_init_spi_flashes(uint32_t base_addr, qemu_irq irq,
         spi = (SSIBus *)qdev_get_child_bus(dev, bus_name);
 
         for (j = 0; j < num_ss; ++j) {
-            dev = ssi_create_slave_no_init(spi, "m25p80");
-            qdev_prop_set_string(dev, "partname", "n25q128");
-            qdev_init_nofail(dev);
+            flash_dev = ssi_create_slave_no_init(spi, "m25p80");
+            qdev_prop_set_string(flash_dev, "partname", "n25q128");
+            qdev_init_nofail(flash_dev);
 
-            cs_line = qdev_get_gpio_in(dev, 0);
+            cs_line = qdev_get_gpio_in(flash_dev, 0);
             sysbus_connect_irq(busdev, i * num_ss + j + 1, cs_line);
         }
     }
commit ee3f095680e4f578f4f1371a90acc20375b48966
Author: Peter Maydell <peter.maydell at linaro.org>
Date:   Tue Dec 11 11:30:37 2012 +0000

    hw/arm_gic_common: Correct GICC_PMR reset value for newer GICs
    
    The GIC architecture specification for v1 and v2 GICs (as found
    on the Cortex-A9 and newer) states that the GICC_PMR reset value
    is zero; this differs from the 0xf0 reset value used on 11MPCore.
    The NVIC is different again in not having a CPU interface; since
    we share the GIC code we must force the priority mask field to
    allow through all interrupts.
    
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>
    Reviewed-by: Igor Mitsyanko <i.mitsyanko at samsung.com>

diff --git a/hw/arm_gic_common.c b/hw/arm_gic_common.c
index 8369309..73ae331 100644
--- a/hw/arm_gic_common.c
+++ b/hw/arm_gic_common.c
@@ -127,7 +127,11 @@ static void arm_gic_common_reset(DeviceState *dev)
     int i;
     memset(s->irq_state, 0, GIC_MAXIRQ * sizeof(gic_irq_state));
     for (i = 0 ; i < s->num_cpu; i++) {
-        s->priority_mask[i] = 0xf0;
+        if (s->revision == REV_11MPCORE) {
+            s->priority_mask[i] = 0xf0;
+        } else {
+            s->priority_mask[i] = 0;
+        }
         s->current_pending[i] = 1023;
         s->running_irq[i] = 1023;
         s->running_priority[i] = 0x100;
diff --git a/hw/armv7m_nvic.c b/hw/armv7m_nvic.c
index f0a2e7b..4963678 100644
--- a/hw/armv7m_nvic.c
+++ b/hw/armv7m_nvic.c
@@ -455,9 +455,11 @@ static void armv7m_nvic_reset(DeviceState *dev)
     nc->parent_reset(dev);
     /* Common GIC reset resets to disabled; the NVIC doesn't have
      * per-CPU interfaces so mark our non-existent CPU interface
-     * as enabled by default.
+     * as enabled by default, and with a priority mask which allows
+     * all interrupts through.
      */
     s->gic.cpu_enabled[0] = 1;
+    s->gic.priority_mask[0] = 0x100;
     /* The NVIC as a whole is always enabled. */
     s->gic.enabled = 1;
     systick_reset(s);
commit cad065f18e1ca7694385f42f560da637d4e651b6
Author: Peter Maydell <peter.maydell at linaro.org>
Date:   Tue Dec 11 11:30:37 2012 +0000

    hw/arm_gic: Fix comparison with priority mask register
    
    The GIC spec states that only interrupts with higher priority
    than the value in the GICC_PMR priority mask register are
    passed through to the processor. We were incorrectly allowing
    through interrupts with a priority equal to the specified
    value: correct the comparison operation to match the spec.
    
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>
    Reviewed-by: Igor Mitsyanko <i.mitsyanko at samsung.com>

diff --git a/hw/arm_gic.c b/hw/arm_gic.c
index f9e423f..672d539 100644
--- a/hw/arm_gic.c
+++ b/hw/arm_gic.c
@@ -73,7 +73,7 @@ void gic_update(GICState *s)
             }
         }
         level = 0;
-        if (best_prio <= s->priority_mask[cpu]) {
+        if (best_prio < s->priority_mask[cpu]) {
             s->current_pending[cpu] = best_irq;
             if (best_prio < s->running_priority[cpu]) {
                 DPRINTF("Raised pending IRQ %d\n", best_irq);
commit bf471f7950e9dc9416747b2774eb712f63afe5a7
Author: Peter Maydell <peter.maydell at linaro.org>
Date:   Tue Dec 11 11:30:37 2012 +0000

    hw/arm_boot, exynos4210, highbank: Fix secondary boot GIC init
    
    Fix the code in the secondary CPU boot stubs so that it correctly
    initialises the GIC rather than relying on bugs or implementation
    dependent aspects of the QEMU GIC implementation:
     * set the GIC_PMR.Priority field to all-ones, so that all
       interrupts are passed through. The default of all-zeroes
       means all interrupts are masked, and QEMU only booted because
       of a bug in the priority masking in our GIC implementation.
     * add a barrier after GIC setup and before WFI to ensure that
       GIC config is complete before we go into a possible low power
       state. This isn't needed with the software GIC model but could
       be required when using KVM and executing this code on the
       real hardware CPU.
    
    Note that of the three secondary stub implementations, only
    the common generic one needs to support both v6 and v7 DSB
    encodings; highbank and exynos4210 will always be v7 CPUs.
    
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>
    Reviewed-by: Igor Mitsyanko <i.mitsyanko at samsung.com>

diff --git a/hw/arm_boot.c b/hw/arm_boot.c
index 92e2cab..ec3b8d5 100644
--- a/hw/arm_boot.c
+++ b/hw/arm_boot.c
@@ -44,11 +44,17 @@ static uint32_t bootloader[] = {
  * for an interprocessor interrupt and polling a configurable
  * location for the kernel secondary CPU entry point.
  */
+#define DSB_INSN 0xf57ff04f
+#define CP15_DSB_INSN 0xee070f9a /* mcr cp15, 0, r0, c7, c10, 4 */
+
 static uint32_t smpboot[] = {
-  0xe59f201c, /* ldr r2, gic_cpu_if */
-  0xe59f001c, /* ldr r0, startaddr */
+  0xe59f2028, /* ldr r2, gic_cpu_if */
+  0xe59f0028, /* ldr r0, startaddr */
   0xe3a01001, /* mov r1, #1 */
-  0xe5821000, /* str r1, [r2] */
+  0xe5821000, /* str r1, [r2] - set GICC_CTLR.Enable */
+  0xe3a010ff, /* mov r1, #0xff */
+  0xe5821004, /* str r1, [r2, 4] - set GIC_PMR.Priority to 0xff */
+  DSB_INSN,   /* dsb */
   0xe320f003, /* wfi */
   0xe5901000, /* ldr     r1, [r0] */
   0xe1110001, /* tst     r1, r1 */
@@ -65,6 +71,11 @@ static void default_write_secondary(ARMCPU *cpu,
     smpboot[ARRAY_SIZE(smpboot) - 1] = info->smp_bootreg_addr;
     smpboot[ARRAY_SIZE(smpboot) - 2] = info->gic_cpu_if_addr;
     for (n = 0; n < ARRAY_SIZE(smpboot); n++) {
+        /* Replace DSB with the pre-v7 DSB if necessary. */
+        if (!arm_feature(&cpu->env, ARM_FEATURE_V7) &&
+            smpboot[n] == DSB_INSN) {
+            smpboot[n] = CP15_DSB_INSN;
+        }
         smpboot[n] = tswap32(smpboot[n]);
     }
     rom_add_blob_fixed("smpboot", smpboot, sizeof(smpboot),
diff --git a/hw/exynos4210.c b/hw/exynos4210.c
index 00d4db8..22148cd 100644
--- a/hw/exynos4210.c
+++ b/hw/exynos4210.c
@@ -80,12 +80,16 @@ void exynos4210_write_secondary(ARMCPU *cpu,
 {
     int n;
     uint32_t smpboot[] = {
-        0xe59f3024, /* ldr r3, External gic_cpu_if */
-        0xe59f2024, /* ldr r2, Internal gic_cpu_if */
-        0xe59f0024, /* ldr r0, startaddr */
+        0xe59f3034, /* ldr r3, External gic_cpu_if */
+        0xe59f2034, /* ldr r2, Internal gic_cpu_if */
+        0xe59f0034, /* ldr r0, startaddr */
         0xe3a01001, /* mov r1, #1 */
         0xe5821000, /* str r1, [r2] */
         0xe5831000, /* str r1, [r3] */
+        0xe3a010ff, /* mov r1, #0xff */
+        0xe5821004, /* str r1, [r2, #4] */
+        0xe5831004, /* str r1, [r3, #4] */
+        0xf57ff04f, /* dsb */
         0xe320f003, /* wfi */
         0xe5901000, /* ldr     r1, [r0] */
         0xe1110001, /* tst     r1, r1 */
diff --git a/hw/highbank.c b/hw/highbank.c
index afbb005..447e57d 100644
--- a/hw/highbank.c
+++ b/hw/highbank.c
@@ -44,9 +44,12 @@ static void hb_write_secondary(ARMCPU *cpu, const struct arm_boot_info *info)
         0xe210000f, /* ands r0, r0, #0x0f */
         0xe3a03040, /* mov r3, #0x40 - jump address is 0x40 + 0x10 * core id */
         0xe0830200, /* add r0, r3, r0, lsl #4 */
-        0xe59f2018, /* ldr r2, privbase */
+        0xe59f2024, /* ldr r2, privbase */
         0xe3a01001, /* mov r1, #1 */
-        0xe5821100, /* str r1, [r2, #256] */
+        0xe5821100, /* str r1, [r2, #256] - set GICC_CTLR.Enable */
+        0xe3a010ff, /* mov r1, #0xff */
+        0xe5821104, /* str r1, [r2, #260] - set GICC_PMR.Priority to 0xff */
+        0xf57ff04f, /* dsb */
         0xe320f003, /* wfi */
         0xe5901000, /* ldr     r1, [r0] */
         0xe1110001, /* tst     r1, r1 */
commit fbcad04d6bfdff937536eb23088a01a280a1a3af
Author: Fabien Chouteau <chouteau at adacore.com>
Date:   Mon Dec 10 12:56:22 2012 +0100

    Fix error code checking for SetFilePointer() call
    
    An error has occurred if the return value is invalid_set_file_pointer
    and getlasterror doesn't return no_error.
    
    Signed-off-by: Fabien Chouteau <chouteau at adacore.com>
    Acked-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/block/raw-win32.c b/block/raw-win32.c
index 0c05c58..ce207a3 100644
--- a/block/raw-win32.c
+++ b/block/raw-win32.c
@@ -303,13 +303,24 @@ static int raw_truncate(BlockDriverState *bs, int64_t offset)
 {
     BDRVRawState *s = bs->opaque;
     LONG low, high;
+    DWORD dwPtrLow;
 
     low = offset;
     high = offset >> 32;
-    if (!SetFilePointer(s->hfile, low, &high, FILE_BEGIN))
-	return -EIO;
-    if (!SetEndOfFile(s->hfile))
+
+    /*
+     * An error has occurred if the return value is INVALID_SET_FILE_POINTER
+     * and GetLastError doesn't return NO_ERROR.
+     */
+    dwPtrLow = SetFilePointer(s->hfile, low, &high, FILE_BEGIN);
+    if (dwPtrLow == INVALID_SET_FILE_POINTER && GetLastError() != NO_ERROR) {
+        fprintf(stderr, "SetFilePointer error: %d\n", GetLastError());
+        return -EIO;
+    }
+    if (SetEndOfFile(s->hfile) == 0) {
+        fprintf(stderr, "SetEndOfFile error: %d\n", GetLastError());
         return -EIO;
+    }
     return 0;
 }
 
commit 473c7f0255920bcaf37411990a3725898772817f
Author: Stefan Priebe <s.priebe at profihost.ag>
Date:   Fri Nov 30 09:55:46 2012 +0100

    rbd: Fix race between aio completition and aio cancel
    
    This one fixes a race which qemu had also in iscsi block driver
    between cancellation and io completition.
    
    qemu_rbd_aio_cancel was not synchronously waiting for the end of
    the command.
    
    To archieve this it introduces a new status flag which uses
    -EINPROGRESS.
    
    Signed-off-by: Stefan Priebe <s.priebe at profihost.ag>
    Reviewed-by: Stefan Hajnoczi <stefanha at gmail.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/block/rbd.c b/block/rbd.c
index f3becc7..737bab1 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -77,6 +77,7 @@ typedef struct RBDAIOCB {
     int error;
     struct BDRVRBDState *s;
     int cancelled;
+    int status;
 } RBDAIOCB;
 
 typedef struct RADOSCB {
@@ -376,12 +377,6 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb)
     RBDAIOCB *acb = rcb->acb;
     int64_t r;
 
-    if (acb->cancelled) {
-        qemu_vfree(acb->bounce);
-        qemu_aio_release(acb);
-        goto done;
-    }
-
     r = rcb->ret;
 
     if (acb->cmd == RBD_AIO_WRITE ||
@@ -409,7 +404,6 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb)
     /* Note that acb->bh can be NULL in case where the aio was cancelled */
     acb->bh = qemu_bh_new(rbd_aio_bh_cb, acb);
     qemu_bh_schedule(acb->bh);
-done:
     g_free(rcb);
 }
 
@@ -568,6 +562,12 @@ static void qemu_rbd_aio_cancel(BlockDriverAIOCB *blockacb)
 {
     RBDAIOCB *acb = (RBDAIOCB *) blockacb;
     acb->cancelled = 1;
+
+    while (acb->status == -EINPROGRESS) {
+        qemu_aio_wait();
+    }
+
+    qemu_aio_release(acb);
 }
 
 static const AIOCBInfo rbd_aiocb_info = {
@@ -639,8 +639,11 @@ static void rbd_aio_bh_cb(void *opaque)
     acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret));
     qemu_bh_delete(acb->bh);
     acb->bh = NULL;
+    acb->status = 0;
 
-    qemu_aio_release(acb);
+    if (!acb->cancelled) {
+        qemu_aio_release(acb);
+    }
 }
 
 static int rbd_aio_discard_wrapper(rbd_image_t image,
@@ -685,6 +688,7 @@ static BlockDriverAIOCB *rbd_start_aio(BlockDriverState *bs,
     acb->s = s;
     acb->cancelled = 0;
     acb->bh = NULL;
+    acb->status = -EINPROGRESS;
 
     if (cmd == RBD_AIO_WRITE) {
         qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
commit 791bfa35ee00ca10b13bedfb048ffda385b151c7
Author: Kevin Wolf <kwolf at redhat.com>
Date:   Tue Dec 4 16:35:12 2012 +0100

    qemu-io: Implement write -c for compressed clusters
    
    This makes it easier to create images with both compressed and
    uncompressed clusters for testing.
    
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/qemu-io.c b/qemu-io.c
index 92cdb2a..b4b0898 100644
--- a/qemu-io.c
+++ b/qemu-io.c
@@ -265,6 +265,18 @@ static int do_co_write_zeroes(int64_t offset, int count, int *total)
     }
 }
 
+static int do_write_compressed(char *buf, int64_t offset, int count, int *total)
+{
+    int ret;
+
+    ret = bdrv_write_compressed(bs, offset >> 9, (uint8_t *)buf, count >> 9);
+    if (ret < 0) {
+        return ret;
+    }
+    *total = count;
+    return 1;
+}
+
 static int do_load_vmstate(char *buf, int64_t offset, int count, int *total)
 {
     *total = bdrv_load_vmstate(bs, (uint8_t *)buf, offset, count);
@@ -687,6 +699,7 @@ static void write_help(void)
 " Writes into a segment of the currently open file, using a buffer\n"
 " filled with a set pattern (0xcdcdcdcd).\n"
 " -b, -- write to the VM state rather than the virtual disk\n"
+" -c, -- write compressed data with bdrv_write_compressed\n"
 " -p, -- use bdrv_pwrite to write the file\n"
 " -P, -- use different pattern to fill file\n"
 " -C, -- report statistics in a machine parsable format\n"
@@ -703,7 +716,7 @@ static const cmdinfo_t write_cmd = {
     .cfunc      = write_f,
     .argmin     = 2,
     .argmax     = -1,
-    .args       = "[-bCpqz] [-P pattern ] off len",
+    .args       = "[-bcCpqz] [-P pattern ] off len",
     .oneline    = "writes a number of bytes at a specified offset",
     .help       = write_help,
 };
@@ -712,6 +725,7 @@ static int write_f(int argc, char **argv)
 {
     struct timeval t1, t2;
     int Cflag = 0, pflag = 0, qflag = 0, bflag = 0, Pflag = 0, zflag = 0;
+    int cflag = 0;
     int c, cnt;
     char *buf = NULL;
     int64_t offset;
@@ -720,11 +734,14 @@ static int write_f(int argc, char **argv)
     int total = 0;
     int pattern = 0xcd;
 
-    while ((c = getopt(argc, argv, "bCpP:qz")) != EOF) {
+    while ((c = getopt(argc, argv, "bcCpP:qz")) != EOF) {
         switch (c) {
         case 'b':
             bflag = 1;
             break;
+        case 'c':
+            cflag = 1;
+            break;
         case 'C':
             Cflag = 1;
             break;
@@ -801,6 +818,8 @@ static int write_f(int argc, char **argv)
         cnt = do_save_vmstate(buf, offset, count, &total);
     } else if (zflag) {
         cnt = do_co_write_zeroes(offset, count, &total);
+    } else if (cflag) {
+        cnt = do_write_compressed(buf, offset, count, &total);
     } else {
         cnt = do_write(buf, offset, count, &total);
     }
commit 23e956bfe6af6f71046772478ed08d4e5c9c62d4
Author: Corey Bryant <coreyb at linux.vnet.ibm.com>
Date:   Wed Nov 14 17:53:16 2012 -0500

    tests: Add tests for fdsets
    
    Signed-off-by: Corey Bryant <coreyb at linux.vnet.ibm.com>
    Reviewed-by: Kevin Wolf <kwolf at redhat.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/tests/qemu-iotests/045 b/tests/qemu-iotests/045
new file mode 100755
index 0000000..2b6f1af
--- /dev/null
+++ b/tests/qemu-iotests/045
@@ -0,0 +1,129 @@
+#!/usr/bin/env python
+#
+# Tests for fdsets.
+#
+# Copyright (C) 2012 IBM Corp.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+import os
+import iotests
+from iotests import qemu_img
+
+image0 = os.path.join(iotests.test_dir, 'image0')
+image1 = os.path.join(iotests.test_dir, 'image1')
+image2 = os.path.join(iotests.test_dir, 'image2')
+image3 = os.path.join(iotests.test_dir, 'image3')
+image4 = os.path.join(iotests.test_dir, 'image4')
+
+class TestFdSets(iotests.QMPTestCase):
+
+    def setUp(self):
+        self.vm = iotests.VM()
+        qemu_img('create', '-f', iotests.imgfmt, image0, '128K')
+        qemu_img('create', '-f', iotests.imgfmt, image1, '128K')
+        qemu_img('create', '-f', iotests.imgfmt, image2, '128K')
+        qemu_img('create', '-f', iotests.imgfmt, image3, '128K')
+        qemu_img('create', '-f', iotests.imgfmt, image4, '128K')
+        self.file0 = open(image0, 'r')
+        self.file1 = open(image1, 'w+')
+        self.file2 = open(image2, 'r')
+        self.file3 = open(image3, 'r')
+        self.file4 = open(image4, 'r')
+        self.vm.add_fd(self.file0.fileno(), 1, 'image0:r')
+        self.vm.add_fd(self.file1.fileno(), 1, 'image1:w+')
+        self.vm.add_fd(self.file2.fileno(), 0, 'image2:r')
+        self.vm.add_fd(self.file3.fileno(), 2, 'image3:r')
+        self.vm.add_fd(self.file4.fileno(), 2, 'image4:r')
+        self.vm.add_drive("/dev/fdset/1")
+        self.vm.launch()
+
+    def tearDown(self):
+        self.vm.shutdown()
+        self.file0.close()
+        self.file1.close()
+        self.file2.close()
+        self.file3.close()
+        self.file4.close()
+        os.remove(image0)
+        os.remove(image1)
+        os.remove(image2)
+        os.remove(image3)
+        os.remove(image4)
+
+    def test_query_fdset(self):
+        result = self.vm.qmp('query-fdsets')
+        self.assert_qmp(result, 'return[0]/fdset-id', 2)
+        self.assert_qmp(result, 'return[1]/fdset-id', 1)
+        self.assert_qmp(result, 'return[2]/fdset-id', 0)
+        self.assert_qmp(result, 'return[0]/fds[0]/opaque', 'image3:r')
+        self.assert_qmp(result, 'return[0]/fds[1]/opaque', 'image4:r')
+        self.assert_qmp(result, 'return[1]/fds[0]/opaque', 'image0:r')
+        self.assert_qmp(result, 'return[1]/fds[1]/opaque', 'image1:w+')
+        self.assert_qmp(result, 'return[2]/fds[0]/opaque', 'image2:r')
+        self.vm.shutdown()
+
+    def test_remove_fdset(self):
+        result = self.vm.qmp('remove-fd', fdset_id=2)
+        self.assert_qmp(result, 'return', {})
+        result = self.vm.qmp('query-fdsets')
+        self.assert_qmp(result, 'return[0]/fdset-id', 1)
+        self.assert_qmp(result, 'return[1]/fdset-id', 0)
+        self.assert_qmp(result, 'return[0]/fds[0]/opaque', 'image0:r')
+        self.assert_qmp(result, 'return[0]/fds[1]/opaque', 'image1:w+')
+        self.assert_qmp(result, 'return[1]/fds[0]/opaque', 'image2:r')
+        self.vm.shutdown()
+
+    def test_remove_fd(self):
+        result = self.vm.qmp('query-fdsets')
+        fd_image3 = result['return'][0]['fds'][0]['fd']
+        result = self.vm.qmp('remove-fd', fdset_id=2, fd=fd_image3)
+        self.assert_qmp(result, 'return', {})
+        result = self.vm.qmp('query-fdsets')
+        self.assert_qmp(result, 'return[0]/fdset-id', 2)
+        self.assert_qmp(result, 'return[1]/fdset-id', 1)
+        self.assert_qmp(result, 'return[2]/fdset-id', 0)
+        self.assert_qmp(result, 'return[0]/fds[0]/opaque', 'image4:r')
+        self.assert_qmp(result, 'return[1]/fds[0]/opaque', 'image0:r')
+        self.assert_qmp(result, 'return[1]/fds[1]/opaque', 'image1:w+')
+        self.assert_qmp(result, 'return[2]/fds[0]/opaque', 'image2:r')
+        self.vm.shutdown()
+
+    def test_remove_fd_invalid_fdset(self):
+        result = self.vm.qmp('query-fdsets')
+        fd_image3 = result['return'][0]['fds'][0]['fd']
+        result = self.vm.qmp('remove-fd', fdset_id=3, fd=fd_image3)
+        self.assert_qmp(result, 'error/class', 'GenericError')
+        self.assert_qmp(result, 'error/desc',
+            'File descriptor named \'fdset-id:3, fd:%d\' not found' % fd_image3)
+        self.vm.shutdown()
+
+    def test_remove_fd_invalid_fd(self):
+        result = self.vm.qmp('query-fdsets')
+        result = self.vm.qmp('remove-fd', fdset_id=2, fd=999)
+        self.assert_qmp(result, 'error/class', 'GenericError')
+        self.assert_qmp(result, 'error/desc',
+            'File descriptor named \'fdset-id:2, fd:999\' not found')
+        self.vm.shutdown()
+
+    def test_add_fd_invalid_fd(self):
+        result = self.vm.qmp('add-fd', fdset_id=2)
+        self.assert_qmp(result, 'error/class', 'GenericError')
+        self.assert_qmp(result, 'error/desc',
+                'No file descriptor supplied via SCM_RIGHTS')
+        self.vm.shutdown()
+
+if __name__ == '__main__':
+    iotests.main(supported_fmts=['raw'])
diff --git a/tests/qemu-iotests/045.out b/tests/qemu-iotests/045.out
new file mode 100644
index 0000000..3f8a935
--- /dev/null
+++ b/tests/qemu-iotests/045.out
@@ -0,0 +1,5 @@
+......
+----------------------------------------------------------------------
+Ran 6 tests
+
+OK
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index a4a9044..5b39785 100644
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -51,3 +51,4 @@
 042 rw auto quick
 043 rw auto backing
 044 rw auto
+045 rw auto
diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
index 0be5c7e..569ca3d 100644
--- a/tests/qemu-iotests/iotests.py
+++ b/tests/qemu-iotests/iotests.py
@@ -79,6 +79,18 @@ class VM(object):
         self._num_drives += 1
         return self
 
+    def add_fd(self, fd, fdset, opaque, opts=''):
+        '''Pass a file descriptor to the VM'''
+        options = ['fd=%d' % fd,
+                   'set=%d' % fdset,
+                   'opaque=%s' % opaque]
+        if opts:
+            options.append(opts)
+
+        self._args.append('-add-fd')
+        self._args.append(','.join(options))
+        return self
+
     def launch(self):
         '''Launch the VM and establish a QMP connection'''
         devnull = open('/dev/null', 'rb')
commit d92ada2202a0730e396304908ff7b870168387d2
Author: Luiz Capitulino <lcapitulino at redhat.com>
Date:   Fri Nov 30 10:52:09 2012 -0200

    block: bdrv_img_create(): drop unused error handling code
    
    Signed-off-by: Luiz Capitulino <lcapitulino at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/block.c b/block.c
index 066bd66..b3faf3a 100644
--- a/block.c
+++ b/block.c
@@ -4442,9 +4442,9 @@ bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie)
     bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
 }
 
-int bdrv_img_create(const char *filename, const char *fmt,
-                    const char *base_filename, const char *base_fmt,
-                    char *options, uint64_t img_size, int flags, Error **errp)
+void bdrv_img_create(const char *filename, const char *fmt,
+                     const char *base_filename, const char *base_fmt,
+                     char *options, uint64_t img_size, int flags, Error **errp)
 {
     QEMUOptionParameter *param = NULL, *create_options = NULL;
     QEMUOptionParameter *backing_fmt, *backing_file, *size;
@@ -4456,18 +4456,14 @@ int bdrv_img_create(const char *filename, const char *fmt,
     /* Find driver and parse its options */
     drv = bdrv_find_format(fmt);
     if (!drv) {
-        error_report("Unknown file format '%s'", fmt);
         error_setg(errp, "Unknown file format '%s'", fmt);
-        ret = -EINVAL;
-        goto out;
+        return;
     }
 
     proto_drv = bdrv_find_protocol(filename);
     if (!proto_drv) {
-        error_report("Unknown protocol '%s'", filename);
         error_setg(errp, "Unknown protocol '%s'", filename);
-        ret = -EINVAL;
-        goto out;
+        return;
     }
 
     create_options = append_option_parameters(create_options,
@@ -4484,9 +4480,7 @@ int bdrv_img_create(const char *filename, const char *fmt,
     if (options) {
         param = parse_option_parameters(options, create_options, param);
         if (param == NULL) {
-            error_report("Invalid options for file format '%s'.", fmt);
             error_setg(errp, "Invalid options for file format '%s'.", fmt);
-            ret = -EINVAL;
             goto out;
         }
     }
@@ -4494,22 +4488,16 @@ int bdrv_img_create(const char *filename, const char *fmt,
     if (base_filename) {
         if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
                                  base_filename)) {
-            error_report("Backing file not supported for file format '%s'",
-                         fmt);
             error_setg(errp, "Backing file not supported for file format '%s'",
                        fmt);
-            ret = -EINVAL;
             goto out;
         }
     }
 
     if (base_fmt) {
         if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
-            error_report("Backing file format not supported for file "
-                         "format '%s'", fmt);
             error_setg(errp, "Backing file format not supported for file "
                              "format '%s'", fmt);
-            ret = -EINVAL;
             goto out;
         }
     }
@@ -4517,11 +4505,8 @@ int bdrv_img_create(const char *filename, const char *fmt,
     backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
     if (backing_file && backing_file->value.s) {
         if (!strcmp(filename, backing_file->value.s)) {
-            error_report("Error: Trying to create an image with the "
-                         "same filename as the backing file");
             error_setg(errp, "Error: Trying to create an image with the "
                              "same filename as the backing file");
-            ret = -EINVAL;
             goto out;
         }
     }
@@ -4530,11 +4515,8 @@ int bdrv_img_create(const char *filename, const char *fmt,
     if (backing_fmt && backing_fmt->value.s) {
         backing_drv = bdrv_find_format(backing_fmt->value.s);
         if (!backing_drv) {
-            error_report("Unknown backing file format '%s'",
-                         backing_fmt->value.s);
             error_setg(errp, "Unknown backing file format '%s'",
                        backing_fmt->value.s);
-            ret = -EINVAL;
             goto out;
         }
     }
@@ -4556,7 +4538,6 @@ int bdrv_img_create(const char *filename, const char *fmt,
 
             ret = bdrv_open(bs, backing_file->value.s, back_flags, backing_drv);
             if (ret < 0) {
-                error_report("Could not open '%s'", backing_file->value.s);
                 error_setg_errno(errp, -ret, "Could not open '%s'",
                                  backing_file->value.s);
                 goto out;
@@ -4567,9 +4548,7 @@ int bdrv_img_create(const char *filename, const char *fmt,
             snprintf(buf, sizeof(buf), "%" PRId64, size);
             set_option_parameter(param, BLOCK_OPT_SIZE, buf);
         } else {
-            error_report("Image creation needs a size parameter");
             error_setg(errp, "Image creation needs a size parameter");
-            ret = -EINVAL;
             goto out;
         }
     }
@@ -4579,21 +4558,14 @@ int bdrv_img_create(const char *filename, const char *fmt,
     puts("");
 
     ret = bdrv_create(drv, filename, param);
-
     if (ret < 0) {
         if (ret == -ENOTSUP) {
-            error_report("Formatting or formatting option not supported for "
-                         "file format '%s'", fmt);
             error_setg(errp,"Formatting or formatting option not supported for "
                             "file format '%s'", fmt);
         } else if (ret == -EFBIG) {
-            error_report("The image size is too large for file format '%s'",
-                         fmt);
             error_setg(errp, "The image size is too large for file format '%s'",
                        fmt);
         } else {
-            error_report("%s: error while creating %s: %s", filename, fmt,
-                         strerror(-ret));
             error_setg(errp, "%s: error while creating %s: %s", filename, fmt,
                        strerror(-ret));
         }
@@ -4606,6 +4578,4 @@ out:
     if (bs) {
         bdrv_delete(bs);
     }
-
-    return ret;
 }
diff --git a/block.h b/block.h
index ff54d15..24bea09 100644
--- a/block.h
+++ b/block.h
@@ -343,9 +343,9 @@ int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
 int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
                       int64_t pos, int size);
 
-int bdrv_img_create(const char *filename, const char *fmt,
-                    const char *base_filename, const char *base_fmt,
-                    char *options, uint64_t img_size, int flags, Error **errp);
+void bdrv_img_create(const char *filename, const char *fmt,
+                     const char *base_filename, const char *base_fmt,
+                     char *options, uint64_t img_size, int flags, Error **errp);
 
 void bdrv_set_buffer_alignment(BlockDriverState *bs, int align);
 void *qemu_blockalign(BlockDriverState *bs, size_t size);
commit cf8f2426c55245f437a91f2fdabbed4ea24e7786
Author: Luiz Capitulino <lcapitulino at redhat.com>
Date:   Fri Nov 30 10:52:08 2012 -0200

    qmp: qmp_drive_mirror(): pass Error object to bdrv_img_create()
    
    Signed-off-by: Luiz Capitulino <lcapitulino at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/blockdev.c b/blockdev.c
index 6fb3362..463f4c2 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1264,8 +1264,8 @@ void qmp_drive_mirror(const char *device, const char *target,
         assert(format && drv);
         bdrv_get_geometry(bs, &size);
         size *= 512;
-        ret = bdrv_img_create(target, format,
-                              NULL, NULL, NULL, size, flags, NULL);
+        bdrv_img_create(target, format,
+                        NULL, NULL, NULL, size, flags, &local_err);
     } else {
         switch (mode) {
         case NEW_IMAGE_MODE_EXISTING:
@@ -1273,18 +1273,18 @@ void qmp_drive_mirror(const char *device, const char *target,
             break;
         case NEW_IMAGE_MODE_ABSOLUTE_PATHS:
             /* create new image with backing file */
-            ret = bdrv_img_create(target, format,
-                                  source->filename,
-                                  source->drv->format_name,
-                                  NULL, -1, flags, NULL);
+            bdrv_img_create(target, format,
+                            source->filename,
+                            source->drv->format_name,
+                            NULL, -1, flags, &local_err);
             break;
         default:
             abort();
         }
     }
 
-    if (ret) {
-        error_set(errp, QERR_OPEN_FILE_FAILED, target);
+    if (error_is_set(&local_err)) {
+        error_propagate(errp, local_err);
         return;
     }
 
commit 43e17041156ddecac8a7500648e71287ba270c0a
Author: Luiz Capitulino <lcapitulino at redhat.com>
Date:   Fri Nov 30 10:52:07 2012 -0200

    qmp: qmp_transaction(): pass Error object to bdrv_img_create()
    
    Signed-off-by: Luiz Capitulino <lcapitulino at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/blockdev.c b/blockdev.c
index 500f091..6fb3362 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -707,6 +707,7 @@ void qmp_transaction(BlockdevActionList *dev_list, Error **errp)
     int ret = 0;
     BlockdevActionList *dev_entry = dev_list;
     BlkTransactionStates *states, *next;
+    Error *local_err = NULL;
 
     QSIMPLEQ_HEAD(snap_bdrv_states, BlkTransactionStates) snap_bdrv_states;
     QSIMPLEQ_INIT(&snap_bdrv_states);
@@ -786,12 +787,12 @@ void qmp_transaction(BlockdevActionList *dev_list, Error **errp)
 
         /* create new image w/backing file */
         if (mode != NEW_IMAGE_MODE_EXISTING) {
-            ret = bdrv_img_create(new_image_file, format,
-                                  states->old_bs->filename,
-                                  states->old_bs->drv->format_name,
-                                  NULL, -1, flags, NULL);
-            if (ret) {
-                error_set(errp, QERR_OPEN_FILE_FAILED, new_image_file);
+            bdrv_img_create(new_image_file, format,
+                            states->old_bs->filename,
+                            states->old_bs->drv->format_name,
+                            NULL, -1, flags, &local_err);
+            if (error_is_set(&local_err)) {
+                error_propagate(errp, local_err);
                 goto delete_and_fail;
             }
         }
commit a930091189cedcc0023dd38f705e2a46e530f4a4
Author: Luiz Capitulino <lcapitulino at redhat.com>
Date:   Fri Nov 30 10:52:06 2012 -0200

    qemu-img: img_create(): drop unneeded goto and ret variable
    
    Signed-off-by: Luiz Capitulino <lcapitulino at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/qemu-img.c b/qemu-img.c
index 595b6f5..c4dae88 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -294,7 +294,7 @@ static int add_old_style_options(const char *fmt, QEMUOptionParameter *list,
 
 static int img_create(int argc, char **argv)
 {
-    int c, ret = 0;
+    int c;
     uint64_t img_size = -1;
     const char *fmt = "raw";
     const char *base_fmt = NULL;
@@ -351,15 +351,13 @@ static int img_create(int argc, char **argv)
             error_report("Invalid image size specified! You may use k, M, G or "
                   "T suffixes for ");
             error_report("kilobytes, megabytes, gigabytes and terabytes.");
-            ret = -1;
-            goto out;
+            return 1;
         }
         img_size = (uint64_t)sval;
     }
 
     if (options && is_help_option(options)) {
-        ret = print_block_option_help(filename, fmt);
-        goto out;
+        return print_block_option_help(filename, fmt);
     }
 
     bdrv_img_create(filename, fmt, base_filename, base_fmt,
@@ -367,13 +365,9 @@ static int img_create(int argc, char **argv)
     if (error_is_set(&local_err)) {
         error_report("%s", error_get_pretty(local_err));
         error_free(local_err);
-        ret = -1;
-    }
-
-out:
-    if (ret) {
         return 1;
     }
+
     return 0;
 }
 
commit 9b37525a7dbc4f5eef0023fc92716259a3d94612
Author: Luiz Capitulino <lcapitulino at redhat.com>
Date:   Fri Nov 30 10:52:05 2012 -0200

    qemu-img: img_create(): pass Error object to bdrv_img_create()
    
    Signed-off-by: Luiz Capitulino <lcapitulino at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/qemu-img.c b/qemu-img.c
index 3896689..595b6f5 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -301,6 +301,7 @@ static int img_create(int argc, char **argv)
     const char *filename;
     const char *base_filename = NULL;
     char *options = NULL;
+    Error *local_err = NULL;
 
     for(;;) {
         c = getopt(argc, argv, "F:b:f:he6o:");
@@ -361,8 +362,14 @@ static int img_create(int argc, char **argv)
         goto out;
     }
 
-    ret = bdrv_img_create(filename, fmt, base_filename, base_fmt,
-                          options, img_size, BDRV_O_FLAGS, NULL);
+    bdrv_img_create(filename, fmt, base_filename, base_fmt,
+                    options, img_size, BDRV_O_FLAGS, &local_err);
+    if (error_is_set(&local_err)) {
+        error_report("%s", error_get_pretty(local_err));
+        error_free(local_err);
+        ret = -1;
+    }
+
 out:
     if (ret) {
         return 1;
commit 71c79813d83b5b45ba934cf995436063da458f66
Author: Luiz Capitulino <lcapitulino at redhat.com>
Date:   Fri Nov 30 10:52:04 2012 -0200

    block: bdrv_img_create(): add Error ** argument
    
    This commit adds an Error ** argument to bdrv_img_create() and set it
    appropriately on error.
    
    Callers of bdrv_img_create() pass NULL for the new argument and still
    rely on bdrv_img_create()'s return value. Next commits will change
    callers to use the Error object instead.
    
    Signed-off-by: Luiz Capitulino <lcapitulino at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/block.c b/block.c
index 2ec3afe..066bd66 100644
--- a/block.c
+++ b/block.c
@@ -4444,7 +4444,7 @@ bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie)
 
 int bdrv_img_create(const char *filename, const char *fmt,
                     const char *base_filename, const char *base_fmt,
-                    char *options, uint64_t img_size, int flags)
+                    char *options, uint64_t img_size, int flags, Error **errp)
 {
     QEMUOptionParameter *param = NULL, *create_options = NULL;
     QEMUOptionParameter *backing_fmt, *backing_file, *size;
@@ -4457,6 +4457,7 @@ int bdrv_img_create(const char *filename, const char *fmt,
     drv = bdrv_find_format(fmt);
     if (!drv) {
         error_report("Unknown file format '%s'", fmt);
+        error_setg(errp, "Unknown file format '%s'", fmt);
         ret = -EINVAL;
         goto out;
     }
@@ -4464,6 +4465,7 @@ int bdrv_img_create(const char *filename, const char *fmt,
     proto_drv = bdrv_find_protocol(filename);
     if (!proto_drv) {
         error_report("Unknown protocol '%s'", filename);
+        error_setg(errp, "Unknown protocol '%s'", filename);
         ret = -EINVAL;
         goto out;
     }
@@ -4483,6 +4485,7 @@ int bdrv_img_create(const char *filename, const char *fmt,
         param = parse_option_parameters(options, create_options, param);
         if (param == NULL) {
             error_report("Invalid options for file format '%s'.", fmt);
+            error_setg(errp, "Invalid options for file format '%s'.", fmt);
             ret = -EINVAL;
             goto out;
         }
@@ -4493,6 +4496,8 @@ int bdrv_img_create(const char *filename, const char *fmt,
                                  base_filename)) {
             error_report("Backing file not supported for file format '%s'",
                          fmt);
+            error_setg(errp, "Backing file not supported for file format '%s'",
+                       fmt);
             ret = -EINVAL;
             goto out;
         }
@@ -4502,6 +4507,8 @@ int bdrv_img_create(const char *filename, const char *fmt,
         if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
             error_report("Backing file format not supported for file "
                          "format '%s'", fmt);
+            error_setg(errp, "Backing file format not supported for file "
+                             "format '%s'", fmt);
             ret = -EINVAL;
             goto out;
         }
@@ -4512,6 +4519,8 @@ int bdrv_img_create(const char *filename, const char *fmt,
         if (!strcmp(filename, backing_file->value.s)) {
             error_report("Error: Trying to create an image with the "
                          "same filename as the backing file");
+            error_setg(errp, "Error: Trying to create an image with the "
+                             "same filename as the backing file");
             ret = -EINVAL;
             goto out;
         }
@@ -4523,6 +4532,8 @@ int bdrv_img_create(const char *filename, const char *fmt,
         if (!backing_drv) {
             error_report("Unknown backing file format '%s'",
                          backing_fmt->value.s);
+            error_setg(errp, "Unknown backing file format '%s'",
+                       backing_fmt->value.s);
             ret = -EINVAL;
             goto out;
         }
@@ -4546,6 +4557,8 @@ int bdrv_img_create(const char *filename, const char *fmt,
             ret = bdrv_open(bs, backing_file->value.s, back_flags, backing_drv);
             if (ret < 0) {
                 error_report("Could not open '%s'", backing_file->value.s);
+                error_setg_errno(errp, -ret, "Could not open '%s'",
+                                 backing_file->value.s);
                 goto out;
             }
             bdrv_get_geometry(bs, &size);
@@ -4555,6 +4568,7 @@ int bdrv_img_create(const char *filename, const char *fmt,
             set_option_parameter(param, BLOCK_OPT_SIZE, buf);
         } else {
             error_report("Image creation needs a size parameter");
+            error_setg(errp, "Image creation needs a size parameter");
             ret = -EINVAL;
             goto out;
         }
@@ -4570,12 +4584,18 @@ int bdrv_img_create(const char *filename, const char *fmt,
         if (ret == -ENOTSUP) {
             error_report("Formatting or formatting option not supported for "
                          "file format '%s'", fmt);
+            error_setg(errp,"Formatting or formatting option not supported for "
+                            "file format '%s'", fmt);
         } else if (ret == -EFBIG) {
             error_report("The image size is too large for file format '%s'",
                          fmt);
+            error_setg(errp, "The image size is too large for file format '%s'",
+                       fmt);
         } else {
             error_report("%s: error while creating %s: %s", filename, fmt,
                          strerror(-ret));
+            error_setg(errp, "%s: error while creating %s: %s", filename, fmt,
+                       strerror(-ret));
         }
     }
 
diff --git a/block.h b/block.h
index 722c620..ff54d15 100644
--- a/block.h
+++ b/block.h
@@ -345,7 +345,7 @@ int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
 
 int bdrv_img_create(const char *filename, const char *fmt,
                     const char *base_filename, const char *base_fmt,
-                    char *options, uint64_t img_size, int flags);
+                    char *options, uint64_t img_size, int flags, Error **errp);
 
 void bdrv_set_buffer_alignment(BlockDriverState *bs, int align);
 void *qemu_blockalign(BlockDriverState *bs, size_t size);
diff --git a/blockdev.c b/blockdev.c
index 4a4d99f..500f091 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -789,7 +789,7 @@ void qmp_transaction(BlockdevActionList *dev_list, Error **errp)
             ret = bdrv_img_create(new_image_file, format,
                                   states->old_bs->filename,
                                   states->old_bs->drv->format_name,
-                                  NULL, -1, flags);
+                                  NULL, -1, flags, NULL);
             if (ret) {
                 error_set(errp, QERR_OPEN_FILE_FAILED, new_image_file);
                 goto delete_and_fail;
@@ -1264,7 +1264,7 @@ void qmp_drive_mirror(const char *device, const char *target,
         bdrv_get_geometry(bs, &size);
         size *= 512;
         ret = bdrv_img_create(target, format,
-                              NULL, NULL, NULL, size, flags);
+                              NULL, NULL, NULL, size, flags, NULL);
     } else {
         switch (mode) {
         case NEW_IMAGE_MODE_EXISTING:
@@ -1275,7 +1275,7 @@ void qmp_drive_mirror(const char *device, const char *target,
             ret = bdrv_img_create(target, format,
                                   source->filename,
                                   source->drv->format_name,
-                                  NULL, -1, flags);
+                                  NULL, -1, flags, NULL);
             break;
         default:
             abort();
diff --git a/qemu-img.c b/qemu-img.c
index e29e01b..3896689 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -362,7 +362,7 @@ static int img_create(int argc, char **argv)
     }
 
     ret = bdrv_img_create(filename, fmt, base_filename, base_fmt,
-                          options, img_size, BDRV_O_FLAGS);
+                          options, img_size, BDRV_O_FLAGS, NULL);
 out:
     if (ret) {
         return 1;
commit 3c42ea66888f149d72d600bab63624b2d849e4bf
Author: Christian Borntraeger <borntraeger at de.ibm.com>
Date:   Thu Nov 22 21:02:55 2012 +0100

    block: simplify default_drive
    
    Markus Armbruster pointed out that there is only one caller
    to default_drive with IF_DEFAULT as a type. Lets get rid
    of the block_default_type parameter and adopt the caller
    to do the right thing (asking the machine struct).
    
    Signed-off-by: Christian Borntraeger <borntraeger at de.ibm.com>
    Reviewed-by: Markus Armbruster <armbru at redhat.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/vl.c b/vl.c
index ee10d21..6b3827c 100644
--- a/vl.c
+++ b/vl.c
@@ -899,17 +899,11 @@ static int drive_enable_snapshot(QemuOpts *opts, void *opaque)
     return 0;
 }
 
-static void default_drive(int enable, int snapshot,
-                          BlockInterfaceType block_default_type,
-                          BlockInterfaceType type, int index,
-                          const char *optstr)
+static void default_drive(int enable, int snapshot, BlockInterfaceType type,
+                          int index, const char *optstr)
 {
     QemuOpts *opts;
 
-    if (type == IF_DEFAULT) {
-        type = block_default_type;
-    }
-
     if (!enable || drive_get_by_index(type, index)) {
         return;
     }
@@ -3776,12 +3770,10 @@ int main(int argc, char **argv, char **envp)
         exit(1);
     }
 
-    default_drive(default_cdrom, snapshot, machine->block_default_type,
-                  IF_DEFAULT, 2, CDROM_OPTS);
-    default_drive(default_floppy, snapshot, machine->block_default_type,
-                  IF_FLOPPY, 0, FD_OPTS);
-    default_drive(default_sdcard, snapshot, machine->block_default_type,
-                  IF_SD, 0, SD_OPTS);
+    default_drive(default_cdrom, snapshot, machine->block_default_type, 2,
+                  CDROM_OPTS);
+    default_drive(default_floppy, snapshot, IF_FLOPPY, 0, FD_OPTS);
+    default_drive(default_sdcard, snapshot, IF_SD, 0, SD_OPTS);
 
     register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, NULL);
 
commit 2d0d2837dcf786da415cf4165d37f4ddd684ff57
Author: Christian Borntraeger <borntraeger at de.ibm.com>
Date:   Tue Nov 20 15:30:34 2012 +0100

    Support default block interfaces per QEMUMachine
    
    There are QEMUMachines that have neither IF_IDE nor IF_SCSI as a
    default/standard interface to their block devices / drives. Therefore,
    this patch introduces a new field default_block_type per QEMUMachine
    struct. The prior use_scsi field becomes thereby obsolete and is
    replaced through .default_block_type = IF_SCSI.
    
    This patch also changes the default for s390x to IF_VIRTIO and
    removes an early hack that converts IF_IDE drives.
    Other parties have already claimed interest (e.g. IF_SD for exynos)
    
    To create a sane default, for machines that dont specify a
    default_block_type, this patch makes IF_IDE = 0 and IF_NONE = 1.
    I checked all users of IF_NONE (blockdev.c and ww/device-hotplug.c)
    as well as IF_IDE and it seems that it is ok to change the defines -
    in other words, I found no obvious (to me) assumption in the code
    regarding IF_NONE==0. IF_NONE is only set if there is an
    explicit if=none. Without if=* the interface becomes IF_DEFAULT.
    
    I would suggest to have some additional care, e.g. by letting
    this patch sit some days in the block tree.
    
    Based on an initial patch from Einar Lueck <elelueck at de.ibm.com>
    
    Signed-off-by: Christian Borntraeger <borntraeger at de.ibm.com>
    CC: Igor Mitsyanko <i.mitsyanko at samsung.com>
    CC: Markus Armbruster <armbru at redhat.com>
    CC: Kevin Wolf <kwolf at redhat.com>
    Reviewed-by: Alexander Graf <agraf at suse.de>
    Acked-by: Igor Mitsyanko <i.mitsyanko at samsung.com>
    Reviewed-by: Markus Armbruster <armbru at redhat.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/blockdev.c b/blockdev.c
index e73fd6e..4a4d99f 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -275,7 +275,7 @@ static bool do_check_io_limits(BlockIOLimit *io_limits)
     return true;
 }
 
-DriveInfo *drive_init(QemuOpts *opts, int default_to_scsi)
+DriveInfo *drive_init(QemuOpts *opts, BlockInterfaceType block_default_type)
 {
     const char *buf;
     const char *file = NULL;
@@ -325,7 +325,7 @@ DriveInfo *drive_init(QemuOpts *opts, int default_to_scsi)
             return NULL;
 	}
     } else {
-        type = default_to_scsi ? IF_SCSI : IF_IDE;
+        type = block_default_type;
     }
 
     max_devs = if_max_devs[type];
diff --git a/blockdev.h b/blockdev.h
index 5f27b64..d73d552 100644
--- a/blockdev.h
+++ b/blockdev.h
@@ -19,8 +19,13 @@ void blockdev_auto_del(BlockDriverState *bs);
 
 typedef enum {
     IF_DEFAULT = -1,            /* for use with drive_add() only */
+    /*
+     * IF_IDE must be zero, because we want QEMUMachine member
+     * block_default_type to default-initialize to IF_IDE
+     */
+    IF_IDE = 0,
     IF_NONE,
-    IF_IDE, IF_SCSI, IF_FLOPPY, IF_PFLASH, IF_MTD, IF_SD, IF_VIRTIO, IF_XEN,
+    IF_SCSI, IF_FLOPPY, IF_PFLASH, IF_MTD, IF_SD, IF_VIRTIO, IF_XEN,
     IF_COUNT
 } BlockInterfaceType;
 
@@ -51,7 +56,7 @@ DriveInfo *drive_get_by_blockdev(BlockDriverState *bs);
 QemuOpts *drive_def(const char *optstr);
 QemuOpts *drive_add(BlockInterfaceType type, int index, const char *file,
                     const char *optstr);
-DriveInfo *drive_init(QemuOpts *arg, int default_to_scsi);
+DriveInfo *drive_init(QemuOpts *arg, BlockInterfaceType block_default_type);
 
 /* device-hotplug */
 
diff --git a/hw/boards.h b/hw/boards.h
index 813d0e5..c66fa16 100644
--- a/hw/boards.h
+++ b/hw/boards.h
@@ -3,6 +3,7 @@
 #ifndef HW_BOARDS_H
 #define HW_BOARDS_H
 
+#include "blockdev.h"
 #include "qdev.h"
 
 typedef struct QEMUMachineInitArgs {
@@ -24,7 +25,7 @@ typedef struct QEMUMachine {
     const char *desc;
     QEMUMachineInitFunc *init;
     QEMUMachineResetFunc *reset;
-    int use_scsi;
+    BlockInterfaceType block_default_type;
     int max_cpus;
     unsigned int no_serial:1,
         no_parallel:1,
diff --git a/hw/device-hotplug.c b/hw/device-hotplug.c
index 6d9c080..839b9ea 100644
--- a/hw/device-hotplug.c
+++ b/hw/device-hotplug.c
@@ -39,7 +39,7 @@ DriveInfo *add_init_drive(const char *optstr)
     if (!opts)
         return NULL;
 
-    dinfo = drive_init(opts, current_machine->use_scsi);
+    dinfo = drive_init(opts, current_machine->block_default_type);
     if (!dinfo) {
         qemu_opts_del(opts);
         return NULL;
diff --git a/hw/highbank.c b/hw/highbank.c
index afbb005..e8e5bf0 100644
--- a/hw/highbank.c
+++ b/hw/highbank.c
@@ -326,7 +326,7 @@ static QEMUMachine highbank_machine = {
     .name = "highbank",
     .desc = "Calxeda Highbank (ECX-1000)",
     .init = highbank_init,
-    .use_scsi = 1,
+    .block_default_type = IF_SCSI,
     .max_cpus = 4,
 };
 
diff --git a/hw/leon3.c b/hw/leon3.c
index 7742738..ef83dff 100644
--- a/hw/leon3.c
+++ b/hw/leon3.c
@@ -212,7 +212,6 @@ static QEMUMachine leon3_generic_machine = {
     .name     = "leon3_generic",
     .desc     = "Leon-3 generic",
     .init     = leon3_generic_hw_init,
-    .use_scsi = 0,
 };
 
 static void leon3_machine_init(void)
diff --git a/hw/mips_jazz.c b/hw/mips_jazz.c
index 0847427..ea1416a 100644
--- a/hw/mips_jazz.c
+++ b/hw/mips_jazz.c
@@ -324,14 +324,14 @@ static QEMUMachine mips_magnum_machine = {
     .name = "magnum",
     .desc = "MIPS Magnum",
     .init = mips_magnum_init,
-    .use_scsi = 1,
+    .block_default_type = IF_SCSI,
 };
 
 static QEMUMachine mips_pica61_machine = {
     .name = "pica61",
     .desc = "Acer Pica 61",
     .init = mips_pica61_init,
-    .use_scsi = 1,
+    .block_default_type = IF_SCSI,
 };
 
 static void mips_jazz_machine_init(void)
diff --git a/hw/pc_sysfw.c b/hw/pc_sysfw.c
index 40bced2..d7ea3a5 100644
--- a/hw/pc_sysfw.c
+++ b/hw/pc_sysfw.c
@@ -98,7 +98,7 @@ static void pc_fw_add_pflash_drv(void)
       return;
     }
 
-    if (!drive_init(opts, machine->use_scsi)) {
+    if (!drive_init(opts, machine->block_default_type)) {
         qemu_opts_del(opts);
     }
 }
diff --git a/hw/puv3.c b/hw/puv3.c
index 764799c..3d77349 100644
--- a/hw/puv3.c
+++ b/hw/puv3.c
@@ -122,7 +122,6 @@ static QEMUMachine puv3_machine = {
     .desc = "PKUnity Version-3 based on UniCore32",
     .init = puv3_init,
     .is_default = 1,
-    .use_scsi = 0,
 };
 
 static void puv3_machine_init(void)
diff --git a/hw/realview.c b/hw/realview.c
index e789c15..8ea4ad7 100644
--- a/hw/realview.c
+++ b/hw/realview.c
@@ -364,14 +364,14 @@ static QEMUMachine realview_eb_machine = {
     .name = "realview-eb",
     .desc = "ARM RealView Emulation Baseboard (ARM926EJ-S)",
     .init = realview_eb_init,
-    .use_scsi = 1,
+    .block_default_type = IF_SCSI,
 };
 
 static QEMUMachine realview_eb_mpcore_machine = {
     .name = "realview-eb-mpcore",
     .desc = "ARM RealView Emulation Baseboard (ARM11MPCore)",
     .init = realview_eb_mpcore_init,
-    .use_scsi = 1,
+    .block_default_type = IF_SCSI,
     .max_cpus = 4,
 };
 
@@ -385,7 +385,7 @@ static QEMUMachine realview_pbx_a9_machine = {
     .name = "realview-pbx-a9",
     .desc = "ARM RealView Platform Baseboard Explore for Cortex-A9",
     .init = realview_pbx_a9_init,
-    .use_scsi = 1,
+    .block_default_type = IF_SCSI,
     .max_cpus = 4,
 };
 
diff --git a/hw/s390-virtio.c b/hw/s390-virtio.c
index ca1bb09..7aca0c4 100644
--- a/hw/s390-virtio.c
+++ b/hw/s390-virtio.c
@@ -314,21 +314,6 @@ static void s390_init(QEMUMachineInitArgs *args)
         qdev_set_nic_properties(dev, nd);
         qdev_init_nofail(dev);
     }
-
-    /* Create VirtIO disk drives */
-    for(i = 0; i < MAX_BLK_DEVS; i++) {
-        DriveInfo *dinfo;
-        DeviceState *dev;
-
-        dinfo = drive_get(IF_IDE, 0, i);
-        if (!dinfo) {
-            continue;
-        }
-
-        dev = qdev_create((BusState *)s390_bus, "virtio-blk-s390");
-        qdev_prop_set_drive_nofail(dev, "drive", dinfo->bdrv);
-        qdev_init_nofail(dev);
-    }
 }
 
 static QEMUMachine s390_machine = {
@@ -336,6 +321,7 @@ static QEMUMachine s390_machine = {
     .alias = "s390",
     .desc = "VirtIO based S390 machine",
     .init = s390_init,
+    .block_default_type = IF_VIRTIO,
     .no_cdrom = 1,
     .no_floppy = 1,
     .no_serial = 1,
@@ -352,3 +338,4 @@ static void s390_machine_init(void)
 }
 
 machine_init(s390_machine_init);
+
diff --git a/hw/spapr.c b/hw/spapr.c
index ad3f0ea..d955f02 100644
--- a/hw/spapr.c
+++ b/hw/spapr.c
@@ -924,9 +924,9 @@ static QEMUMachine spapr_machine = {
     .desc = "pSeries Logical Partition (PAPR compliant)",
     .init = ppc_spapr_init,
     .reset = ppc_spapr_reset,
+    .block_default_type = IF_SCSI,
     .max_cpus = MAX_CPUS,
     .no_parallel = 1,
-    .use_scsi = 1,
 };
 
 static void spapr_machine_init(void)
diff --git a/hw/sun4m.c b/hw/sun4m.c
index 1a78676..52cf82b 100644
--- a/hw/sun4m.c
+++ b/hw/sun4m.c
@@ -1426,7 +1426,7 @@ static QEMUMachine ss5_machine = {
     .name = "SS-5",
     .desc = "Sun4m platform, SPARCstation 5",
     .init = ss5_init,
-    .use_scsi = 1,
+    .block_default_type = IF_SCSI,
     .is_default = 1,
 };
 
@@ -1434,7 +1434,7 @@ static QEMUMachine ss10_machine = {
     .name = "SS-10",
     .desc = "Sun4m platform, SPARCstation 10",
     .init = ss10_init,
-    .use_scsi = 1,
+    .block_default_type = IF_SCSI,
     .max_cpus = 4,
 };
 
@@ -1442,7 +1442,7 @@ static QEMUMachine ss600mp_machine = {
     .name = "SS-600MP",
     .desc = "Sun4m platform, SPARCserver 600MP",
     .init = ss600mp_init,
-    .use_scsi = 1,
+    .block_default_type = IF_SCSI,
     .max_cpus = 4,
 };
 
@@ -1450,7 +1450,7 @@ static QEMUMachine ss20_machine = {
     .name = "SS-20",
     .desc = "Sun4m platform, SPARCstation 20",
     .init = ss20_init,
-    .use_scsi = 1,
+    .block_default_type = IF_SCSI,
     .max_cpus = 4,
 };
 
@@ -1458,35 +1458,35 @@ static QEMUMachine voyager_machine = {
     .name = "Voyager",
     .desc = "Sun4m platform, SPARCstation Voyager",
     .init = vger_init,
-    .use_scsi = 1,
+    .block_default_type = IF_SCSI,
 };
 
 static QEMUMachine ss_lx_machine = {
     .name = "LX",
     .desc = "Sun4m platform, SPARCstation LX",
     .init = ss_lx_init,
-    .use_scsi = 1,
+    .block_default_type = IF_SCSI,
 };
 
 static QEMUMachine ss4_machine = {
     .name = "SS-4",
     .desc = "Sun4m platform, SPARCstation 4",
     .init = ss4_init,
-    .use_scsi = 1,
+    .block_default_type = IF_SCSI,
 };
 
 static QEMUMachine scls_machine = {
     .name = "SPARCClassic",
     .desc = "Sun4m platform, SPARCClassic",
     .init = scls_init,
-    .use_scsi = 1,
+    .block_default_type = IF_SCSI,
 };
 
 static QEMUMachine sbook_machine = {
     .name = "SPARCbook",
     .desc = "Sun4m platform, SPARCbook",
     .init = sbook_init,
-    .use_scsi = 1,
+    .block_default_type = IF_SCSI,
 };
 
 static const struct sun4d_hwdef sun4d_hwdefs[] = {
@@ -1709,7 +1709,7 @@ static QEMUMachine ss1000_machine = {
     .name = "SS-1000",
     .desc = "Sun4d platform, SPARCserver 1000",
     .init = ss1000_init,
-    .use_scsi = 1,
+    .block_default_type = IF_SCSI,
     .max_cpus = 8,
 };
 
@@ -1717,7 +1717,7 @@ static QEMUMachine ss2000_machine = {
     .name = "SS-2000",
     .desc = "Sun4d platform, SPARCcenter 2000",
     .init = ss2000_init,
-    .use_scsi = 1,
+    .block_default_type = IF_SCSI,
     .max_cpus = 20,
 };
 
@@ -1896,7 +1896,7 @@ static QEMUMachine ss2_machine = {
     .name = "SS-2",
     .desc = "Sun4c platform, SPARCstation 2",
     .init = ss2_init,
-    .use_scsi = 1,
+    .block_default_type = IF_SCSI,
 };
 
 static void sun4m_register_types(void)
diff --git a/hw/versatilepb.c b/hw/versatilepb.c
index 25e652b..4892c1d 100644
--- a/hw/versatilepb.c
+++ b/hw/versatilepb.c
@@ -358,14 +358,14 @@ static QEMUMachine versatilepb_machine = {
     .name = "versatilepb",
     .desc = "ARM Versatile/PB (ARM926EJ-S)",
     .init = vpb_init,
-    .use_scsi = 1,
+    .block_default_type = IF_SCSI,
 };
 
 static QEMUMachine versatileab_machine = {
     .name = "versatileab",
     .desc = "ARM Versatile/AB (ARM926EJ-S)",
     .init = vab_init,
-    .use_scsi = 1,
+    .block_default_type = IF_SCSI,
 };
 
 static void versatile_machine_init(void)
diff --git a/hw/vexpress.c b/hw/vexpress.c
index d93f057..e89694c 100644
--- a/hw/vexpress.c
+++ b/hw/vexpress.c
@@ -477,7 +477,7 @@ static QEMUMachine vexpress_a9_machine = {
     .name = "vexpress-a9",
     .desc = "ARM Versatile Express for Cortex-A9",
     .init = vexpress_a9_init,
-    .use_scsi = 1,
+    .block_default_type = IF_SCSI,
     .max_cpus = 4,
 };
 
@@ -485,7 +485,7 @@ static QEMUMachine vexpress_a15_machine = {
     .name = "vexpress-a15",
     .desc = "ARM Versatile Express for Cortex-A15",
     .init = vexpress_a15_init,
-    .use_scsi = 1,
+    .block_default_type = IF_SCSI,
     .max_cpus = 4,
 };
 
diff --git a/hw/xilinx_zynq.c b/hw/xilinx_zynq.c
index 1f12a3d..a4a71c2 100644
--- a/hw/xilinx_zynq.c
+++ b/hw/xilinx_zynq.c
@@ -200,7 +200,7 @@ static QEMUMachine zynq_machine = {
     .name = "xilinx-zynq-a9",
     .desc = "Xilinx Zynq Platform Baseboard for Cortex-A9",
     .init = zynq_init,
-    .use_scsi = 1,
+    .block_default_type = IF_SCSI,
     .max_cpus = 1,
     .no_sdcard = 1
 };
diff --git a/vl.c b/vl.c
index a3ab384..ee10d21 100644
--- a/vl.c
+++ b/vl.c
@@ -886,9 +886,9 @@ static int cleanup_add_fd(QemuOpts *opts, void *opaque)
 
 static int drive_init_func(QemuOpts *opts, void *opaque)
 {
-    int *use_scsi = opaque;
+    BlockInterfaceType *block_default_type = opaque;
 
-    return drive_init(opts, *use_scsi) == NULL;
+    return drive_init(opts, *block_default_type) == NULL;
 }
 
 static int drive_enable_snapshot(QemuOpts *opts, void *opaque)
@@ -899,14 +899,15 @@ static int drive_enable_snapshot(QemuOpts *opts, void *opaque)
     return 0;
 }
 
-static void default_drive(int enable, int snapshot, int use_scsi,
+static void default_drive(int enable, int snapshot,
+                          BlockInterfaceType block_default_type,
                           BlockInterfaceType type, int index,
                           const char *optstr)
 {
     QemuOpts *opts;
 
     if (type == IF_DEFAULT) {
-        type = use_scsi ? IF_SCSI : IF_IDE;
+        type = block_default_type;
     }
 
     if (!enable || drive_get_by_index(type, index)) {
@@ -917,7 +918,7 @@ static void default_drive(int enable, int snapshot, int use_scsi,
     if (snapshot) {
         drive_enable_snapshot(opts, NULL);
     }
-    if (!drive_init(opts, use_scsi)) {
+    if (!drive_init(opts, type)) {
         exit(1);
     }
 }
@@ -3770,14 +3771,16 @@ int main(int argc, char **argv, char **envp)
     /* open the virtual block devices */
     if (snapshot)
         qemu_opts_foreach(qemu_find_opts("drive"), drive_enable_snapshot, NULL, 0);
-    if (qemu_opts_foreach(qemu_find_opts("drive"), drive_init_func, &machine->use_scsi, 1) != 0)
+    if (qemu_opts_foreach(qemu_find_opts("drive"), drive_init_func,
+                          &machine->block_default_type, 1) != 0) {
         exit(1);
+    }
 
-    default_drive(default_cdrom, snapshot, machine->use_scsi,
+    default_drive(default_cdrom, snapshot, machine->block_default_type,
                   IF_DEFAULT, 2, CDROM_OPTS);
-    default_drive(default_floppy, snapshot, machine->use_scsi,
+    default_drive(default_floppy, snapshot, machine->block_default_type,
                   IF_FLOPPY, 0, FD_OPTS);
-    default_drive(default_sdcard, snapshot, machine->use_scsi,
+    default_drive(default_sdcard, snapshot, machine->block_default_type,
                   IF_SD, 0, SD_OPTS);
 
     register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, NULL);
commit c208e8c2d88eea2bbafc2850d8856525637e495d
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Fri Nov 2 16:14:20 2012 +0100

    raw-posix: inline paio_ioctl into hdev_aio_ioctl
    
    clang now warns about an unused function:
      CC    block/raw-posix.o
    block/raw-posix.c:707:26: warning: unused function paio_ioctl
    [-Wunused-function]
    static BlockDriverAIOCB *paio_ioctl(BlockDriverState *bs, int fd,
                             ^
    1 warning generated.
    
    because the only use of paio_ioctl() is inside a #if defined(__linux__)
    guard and it is static now.
    
    Reported-by: Peter Maydell <peter.maydell at linaro.org>
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/block/raw-posix.c b/block/raw-posix.c
index 550c81f..abfedbe 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -708,22 +708,6 @@ static BlockDriverAIOCB *paio_submit(BlockDriverState *bs, int fd,
     return thread_pool_submit_aio(aio_worker, acb, cb, opaque);
 }
 
-static BlockDriverAIOCB *paio_ioctl(BlockDriverState *bs, int fd,
-        unsigned long int req, void *buf,
-        BlockDriverCompletionFunc *cb, void *opaque)
-{
-    RawPosixAIOData *acb = g_slice_new(RawPosixAIOData);
-
-    acb->bs = bs;
-    acb->aio_type = QEMU_AIO_IOCTL;
-    acb->aio_fildes = fd;
-    acb->aio_offset = 0;
-    acb->aio_ioctl_buf = buf;
-    acb->aio_ioctl_cmd = req;
-
-    return thread_pool_submit_aio(aio_worker, acb, cb, opaque);
-}
-
 static BlockDriverAIOCB *raw_aio_submit(BlockDriverState *bs,
         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
         BlockDriverCompletionFunc *cb, void *opaque, int type)
@@ -1346,10 +1330,19 @@ static BlockDriverAIOCB *hdev_aio_ioctl(BlockDriverState *bs,
         BlockDriverCompletionFunc *cb, void *opaque)
 {
     BDRVRawState *s = bs->opaque;
+    RawPosixAIOData *acb;
 
     if (fd_open(bs) < 0)
         return NULL;
-    return paio_ioctl(bs, s->fd, req, buf, cb, opaque);
+
+    acb = g_slice_new(RawPosixAIOData);
+    acb->bs = bs;
+    acb->aio_type = QEMU_AIO_IOCTL;
+    acb->aio_fildes = s->fd;
+    acb->aio_offset = 0;
+    acb->aio_ioctl_buf = buf;
+    acb->aio_ioctl_cmd = req;
+    return thread_pool_submit_aio(aio_worker, acb, cb, opaque);
 }
 
 #elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
commit 258d2edbcd4bb5d267c96163333820332e1c14fa
Author: Charles Arnold <carnold at suse.com>
Date:   Tue Oct 30 20:59:32 2012 -0600

    block: vpc support for ~2 TB disks
    
    The VHD specification allows for up to a 2 TB disk size. The current
    implementation in qemu emulates EIDE and ATA-2 hardware which only allows
    for up to 127 GB.  This disk size limitation can be overridden by allowing
    up to 255 heads instead of the normal 4 bit limitation of 16.  Doing so
    allows disk images to be created of up to nearly 2 TB.  This change does
    not violate the VHD format specification nor does it change how smaller
    disks (ie, <=127GB) are defined.
    
    [Charles Arnold also writes: "In analyzing a 160 GB VHD fixed disk image
    created on Windows 2008 R2, it appears that MS is also ignoring the CHS
    values in the footer geometry field in whatever driver they use for
    accessing the image.  The CHS values are set at 65535,16,255 which
    obviously doesn't represent an image size of 160 GB." -- Stefan]
    
    Signed-off-by: Charles Arnold <carnold at suse.com>
    Reviewed-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/block/vpc.c b/block/vpc.c
index f14c6ae..566e9a3 100644
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -201,7 +201,8 @@ static int vpc_open(BlockDriverState *bs, int flags)
     bs->total_sectors = (int64_t)
         be16_to_cpu(footer->cyls) * footer->heads * footer->secs_per_cyl;
 
-    if (bs->total_sectors >= 65535 * 16 * 255) {
+    /* Allow a maximum disk size of approximately 2 TB */
+    if (bs->total_sectors >= 65535LL * 255 * 255) {
         err = -EFBIG;
         goto fail;
     }
@@ -527,19 +528,27 @@ static coroutine_fn int vpc_co_write(BlockDriverState *bs, int64_t sector_num,
  * Note that the geometry doesn't always exactly match total_sectors but
  * may round it down.
  *
- * Returns 0 on success, -EFBIG if the size is larger than 127 GB
+ * Returns 0 on success, -EFBIG if the size is larger than ~2 TB. Override
+ * the hardware EIDE and ATA-2 limit of 16 heads (max disk size of 127 GB)
+ * and instead allow up to 255 heads.
  */
 static int calculate_geometry(int64_t total_sectors, uint16_t* cyls,
     uint8_t* heads, uint8_t* secs_per_cyl)
 {
     uint32_t cyls_times_heads;
 
-    if (total_sectors > 65535 * 16 * 255)
+    /* Allow a maximum disk size of approximately 2 TB */
+    if (total_sectors > 65535LL * 255 * 255) {
         return -EFBIG;
+    }
 
     if (total_sectors > 65535 * 16 * 63) {
         *secs_per_cyl = 255;
-        *heads = 16;
+        if (total_sectors > 65535 * 16 * 255) {
+            *heads = 255;
+        } else {
+            *heads = 16;
+        }
         cyls_times_heads = total_sectors / *secs_per_cyl;
     } else {
         *secs_per_cyl = 17;
commit 1fe1fa510aa3d4eb1fb4246d4951ef48e4c949c2
Author: Charles Arnold <carnold at suse.com>
Date:   Fri Nov 2 09:54:24 2012 -0600

    block: vpc initialize the uuid footer field
    
    Initialize the uuid field in the footer with a generated uuid.
    
    Signed-off-by: Charles Arnold <carnold at suse.com>
    Reviewed-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/block/vpc.c b/block/vpc.c
index b6bf52f..f14c6ae 100644
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -26,6 +26,9 @@
 #include "block_int.h"
 #include "module.h"
 #include "migration.h"
+#if defined(CONFIG_UUID)
+#include <uuid/uuid.h>
+#endif
 
 /**************************************************************/
 
@@ -739,7 +742,9 @@ static int vpc_create(const char *filename, QEMUOptionParameter *options)
 
     footer->type = be32_to_cpu(disk_type);
 
-    /* TODO uuid is missing */
+#if defined(CONFIG_UUID)
+    uuid_generate(footer->uuid);
+#endif
 
     footer->checksum = be32_to_cpu(vpc_checksum(buf, HEADER_SIZE));
 
commit d567e62f98d5789ff4d273b924a0474931c71e8b
Author: David Gibson <david at gibson.dropbear.id.au>
Date:   Thu Nov 15 13:12:14 2012 +1100

    virtio-blk: Remove duplicate property definition
    
    For the virtio-blk device (via virtio-pci) the property "config-wce" is
    defined in two places.  First, it's defined from the
    DEFINE_VIRTIO_BLK_FEATURES macro, second it's defined directly in
    virtio-pci, just two lines above the call to that macro.
    
    The direct definition in virtio-pci.c is broken, since it operates on the
    'config_wce' field of VirtIOBlkConf, which is never used anywhere else.
    Therefore, this patch removes both the extra property definition and the
    redundant field it works on.
    
    Cc: Kevin Wolf <kwolf at redhat.com>
    Cc: Anthony Liguori <aliguori at us.ibm.com>
    Cc: Paul 'Rusty' Russell <rusty at rustcorp.com.au>
    
    Signed-off-by: David Gibson <david at gibson.dropbear.id.au>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/hw/virtio-blk.h b/hw/virtio-blk.h
index f0740d0..651a000 100644
--- a/hw/virtio-blk.h
+++ b/hw/virtio-blk.h
@@ -104,7 +104,6 @@ struct VirtIOBlkConf
     BlockConf conf;
     char *serial;
     uint32_t scsi;
-    uint32_t config_wce;
 };
 
 #define DEFINE_VIRTIO_BLK_FEATURES(_state, _field) \
diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
index 71f4fb5..7684ac9 100644
--- a/hw/virtio-pci.c
+++ b/hw/virtio-pci.c
@@ -895,7 +895,6 @@ static Property virtio_blk_properties[] = {
 #ifdef __linux__
     DEFINE_PROP_BIT("scsi", VirtIOPCIProxy, blk.scsi, 0, true),
 #endif
-    DEFINE_PROP_BIT("config-wce", VirtIOPCIProxy, blk.config_wce, 0, true),
     DEFINE_PROP_BIT("ioeventfd", VirtIOPCIProxy, flags, VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT, true),
     DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, 2),
     DEFINE_VIRTIO_BLK_FEATURES(VirtIOPCIProxy, host_features),
commit f500a6d3c2b9ef0bb06d0080d91d8ed3c1d68f58
Author: Kevin Wolf <kwolf at redhat.com>
Date:   Mon Nov 12 17:35:27 2012 +0100

    block: Avoid second open for format probing
    
    This fixes problems that are caused by the additional open/close cycle
    of the existing format probing, for example related to qemu-nbd without
    -t option or file descriptor passing.
    
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/block.c b/block.c
index c4f5566..2ec3afe 100644
--- a/block.c
+++ b/block.c
@@ -518,22 +518,16 @@ BlockDriver *bdrv_find_protocol(const char *filename)
     return NULL;
 }
 
-static int find_image_format(const char *filename, BlockDriver **pdrv)
+static int find_image_format(BlockDriverState *bs, const char *filename,
+                             BlockDriver **pdrv)
 {
-    int ret, score, score_max;
+    int score, score_max;
     BlockDriver *drv1, *drv;
     uint8_t buf[2048];
-    BlockDriverState *bs;
-
-    ret = bdrv_file_open(&bs, filename, 0);
-    if (ret < 0) {
-        *pdrv = NULL;
-        return ret;
-    }
+    int ret = 0;
 
     /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
     if (bs->sg || !bdrv_is_inserted(bs)) {
-        bdrv_delete(bs);
         drv = bdrv_find_format("raw");
         if (!drv) {
             ret = -ENOENT;
@@ -543,7 +537,6 @@ static int find_image_format(const char *filename, BlockDriver **pdrv)
     }
 
     ret = bdrv_pread(bs, 0, buf, sizeof(buf));
-    bdrv_delete(bs);
     if (ret < 0) {
         *pdrv = NULL;
         return ret;
@@ -657,7 +650,8 @@ static int bdrv_open_flags(BlockDriverState *bs, int flags)
 /*
  * Common part for opening disk images and files
  */
-static int bdrv_open_common(BlockDriverState *bs, const char *filename,
+static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
+    const char *filename,
     int flags, BlockDriver *drv)
 {
     int ret, open_flags;
@@ -691,12 +685,16 @@ static int bdrv_open_common(BlockDriverState *bs, const char *filename,
 
     /* Open the image, either directly or using a protocol */
     if (drv->bdrv_file_open) {
-        ret = drv->bdrv_file_open(bs, filename, open_flags);
-    } else {
-        ret = bdrv_file_open(&bs->file, filename, open_flags);
-        if (ret >= 0) {
-            ret = drv->bdrv_open(bs, open_flags);
+        if (file != NULL) {
+            bdrv_swap(file, bs);
+            ret = 0;
+        } else {
+            ret = drv->bdrv_file_open(bs, filename, open_flags);
         }
+    } else {
+        assert(file != NULL);
+        bs->file = file;
+        ret = drv->bdrv_open(bs, open_flags);
     }
 
     if (ret < 0) {
@@ -716,10 +714,7 @@ static int bdrv_open_common(BlockDriverState *bs, const char *filename,
     return 0;
 
 free_and_fail:
-    if (bs->file) {
-        bdrv_delete(bs->file);
-        bs->file = NULL;
-    }
+    bs->file = NULL;
     g_free(bs->opaque);
     bs->opaque = NULL;
     bs->drv = NULL;
@@ -741,7 +736,7 @@ int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
     }
 
     bs = bdrv_new("");
-    ret = bdrv_open_common(bs, filename, flags, drv);
+    ret = bdrv_open_common(bs, NULL, filename, flags, drv);
     if (ret < 0) {
         bdrv_delete(bs);
         return ret;
@@ -796,6 +791,7 @@ int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
     int ret;
     /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
     char tmp_filename[PATH_MAX + 1];
+    BlockDriverState *file = NULL;
 
     if (flags & BDRV_O_SNAPSHOT) {
         BlockDriverState *bs1;
@@ -855,25 +851,36 @@ int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
         bs->is_temporary = 1;
     }
 
+    /* Open image file without format layer */
+    if (flags & BDRV_O_RDWR) {
+        flags |= BDRV_O_ALLOW_RDWR;
+    }
+
+    ret = bdrv_file_open(&file, filename, bdrv_open_flags(bs, flags));
+    if (ret < 0) {
+        return ret;
+    }
+
     /* Find the right image format driver */
     if (!drv) {
-        ret = find_image_format(filename, &drv);
+        ret = find_image_format(file, filename, &drv);
     }
 
     if (!drv) {
         goto unlink_and_fail;
     }
 
-    if (flags & BDRV_O_RDWR) {
-        flags |= BDRV_O_ALLOW_RDWR;
-    }
-
     /* Open the image */
-    ret = bdrv_open_common(bs, filename, flags, drv);
+    ret = bdrv_open_common(bs, file, filename, flags, drv);
     if (ret < 0) {
         goto unlink_and_fail;
     }
 
+    if (bs->file != file) {
+        bdrv_delete(file);
+        file = NULL;
+    }
+
     /* If there is a backing file, use it */
     if ((flags & BDRV_O_NO_BACKING) == 0) {
         ret = bdrv_open_backing_file(bs);
@@ -895,6 +902,9 @@ int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
     return 0;
 
 unlink_and_fail:
+    if (file != NULL) {
+        bdrv_delete(file);
+    }
     if (bs->is_temporary) {
         unlink(filename);
     }
commit 7b272452398135e4f8e48341239705d03c82dae3
Author: Kevin Wolf <kwolf at redhat.com>
Date:   Mon Nov 12 17:05:39 2012 +0100

    block: Factor out bdrv_open_flags
    
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/block.c b/block.c
index c7a1a3c..c4f5566 100644
--- a/block.c
+++ b/block.c
@@ -634,6 +634,26 @@ void bdrv_disable_copy_on_read(BlockDriverState *bs)
     bs->copy_on_read--;
 }
 
+static int bdrv_open_flags(BlockDriverState *bs, int flags)
+{
+    int open_flags = flags | BDRV_O_CACHE_WB;
+
+    /*
+     * Clear flags that are internal to the block layer before opening the
+     * image.
+     */
+    open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
+
+    /*
+     * Snapshots should be writable.
+     */
+    if (bs->is_temporary) {
+        open_flags |= BDRV_O_RDWR;
+    }
+
+    return open_flags;
+}
+
 /*
  * Common part for opening disk images and files
  */
@@ -665,20 +685,7 @@ static int bdrv_open_common(BlockDriverState *bs, const char *filename,
     bs->opaque = g_malloc0(drv->instance_size);
 
     bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
-    open_flags = flags | BDRV_O_CACHE_WB;
-
-    /*
-     * Clear flags that are internal to the block layer before opening the
-     * image.
-     */
-    open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
-
-    /*
-     * Snapshots should be writable.
-     */
-    if (bs->is_temporary) {
-        open_flags |= BDRV_O_RDWR;
-    }
+    open_flags = bdrv_open_flags(bs, flags);
 
     bs->read_only = !(open_flags & BDRV_O_RDWR);
 
commit c57b6656c3168bccca7f78b3f740e9149893b3da
Author: Kevin Wolf <kwolf at redhat.com>
Date:   Tue Nov 13 16:35:13 2012 +0100

    aio: Get rid of qemu_aio_flush()
    
    There are no remaining users, and new users should probably be
    using bdrv_drain_all() in the first place.
    
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/async.c b/async.c
index 3f0e8f3..41ae0c1 100644
--- a/async.c
+++ b/async.c
@@ -215,8 +215,3 @@ void aio_context_unref(AioContext *ctx)
 {
     g_source_unref(&ctx->source);
 }
-
-void aio_flush(AioContext *ctx)
-{
-    while (aio_poll(ctx, true));
-}
diff --git a/block/commit.c b/block/commit.c
index fae7958..e2bb1e2 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -103,7 +103,7 @@ static void coroutine_fn commit_run(void *opaque)
 
 wait:
         /* Note that even when no rate limit is applied we need to yield
-         * with no pending I/O here so that qemu_aio_flush() returns.
+         * with no pending I/O here so that bdrv_drain_all() returns.
          */
         block_job_sleep_ns(&s->common, rt_clock, delay_ns);
         if (block_job_is_cancelled(&s->common)) {
diff --git a/block/mirror.c b/block/mirror.c
index d6618a4..b1f5d4f 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -205,7 +205,7 @@ static void coroutine_fn mirror_run(void *opaque)
             }
 
             /* Note that even when no rate limit is applied we need to yield
-             * with no pending I/O here so that qemu_aio_flush() returns.
+             * with no pending I/O here so that bdrv_drain_all() returns.
              */
             block_job_sleep_ns(&s->common, rt_clock, delay_ns);
             if (block_job_is_cancelled(&s->common)) {
diff --git a/block/stream.c b/block/stream.c
index 0c0fc7a..0dcd286 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -108,7 +108,7 @@ static void coroutine_fn stream_run(void *opaque)
 
 wait:
         /* Note that even when no rate limit is applied we need to yield
-         * with no pending I/O here so that qemu_aio_flush() returns.
+         * with no pending I/O here so that bdrv_drain_all() returns.
          */
         block_job_sleep_ns(&s->common, rt_clock, delay_ns);
         if (block_job_is_cancelled(&s->common)) {
diff --git a/main-loop.c b/main-loop.c
index c87624e..7dba6f6 100644
--- a/main-loop.c
+++ b/main-loop.c
@@ -432,11 +432,6 @@ QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque)
     return aio_bh_new(qemu_aio_context, cb, opaque);
 }
 
-void qemu_aio_flush(void)
-{
-    aio_flush(qemu_aio_context);
-}
-
 bool qemu_aio_wait(void)
 {
     return aio_poll(qemu_aio_context, true);
diff --git a/qemu-aio.h b/qemu-aio.h
index 3889fe9..31884a8 100644
--- a/qemu-aio.h
+++ b/qemu-aio.h
@@ -162,10 +162,6 @@ void qemu_bh_cancel(QEMUBH *bh);
  */
 void qemu_bh_delete(QEMUBH *bh);
 
-/* Flush any pending AIO operation. This function will block until all
- * outstanding AIO operations have been completed or cancelled. */
-void aio_flush(AioContext *ctx);
-
 /* Return whether there are any pending callbacks from the GSource
  * attached to the AioContext.
  *
@@ -196,7 +192,7 @@ typedef int (AioFlushHandler)(void *opaque);
 
 /* Register a file descriptor and associated callbacks.  Behaves very similarly
  * to qemu_set_fd_handler2.  Unlike qemu_set_fd_handler2, these callbacks will
- * be invoked when using either qemu_aio_wait() or qemu_aio_flush().
+ * be invoked when using qemu_aio_wait().
  *
  * Code that invokes AIO completion functions should rely on this function
  * instead of qemu_set_fd_handler[2].
@@ -211,7 +207,7 @@ void aio_set_fd_handler(AioContext *ctx,
 
 /* Register an event notifier and associated callbacks.  Behaves very similarly
  * to event_notifier_set_handler.  Unlike event_notifier_set_handler, these callbacks
- * will be invoked when using either qemu_aio_wait() or qemu_aio_flush().
+ * will be invoked when using qemu_aio_wait().
  *
  * Code that invokes AIO completion functions should rely on this function
  * instead of event_notifier_set_handler.
@@ -228,7 +224,6 @@ GSource *aio_get_g_source(AioContext *ctx);
 
 /* Functions to operate on the main QEMU AioContext.  */
 
-void qemu_aio_flush(void);
 bool qemu_aio_wait(void);
 void qemu_aio_set_event_notifier(EventNotifier *notifier,
                                  EventNotifierHandler *io_read,
commit d318aea9325c99b15c87a7c14865386c2fde0d2c
Author: Kevin Wolf <kwolf at redhat.com>
Date:   Tue Nov 13 16:35:08 2012 +0100

    block: Improve bdrv_aio_co_cancel_em
    
    Instead of waiting for all requests to complete, wait just for the
    specific request that should be cancelled.
    
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/block.c b/block.c
index c05875f..c7a1a3c 100644
--- a/block.c
+++ b/block.c
@@ -3778,12 +3778,20 @@ typedef struct BlockDriverAIOCBCoroutine {
     BlockDriverAIOCB common;
     BlockRequest req;
     bool is_write;
+    bool *done;
     QEMUBH* bh;
 } BlockDriverAIOCBCoroutine;
 
 static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
 {
-    qemu_aio_flush();
+    BlockDriverAIOCBCoroutine *acb =
+        container_of(blockacb, BlockDriverAIOCBCoroutine, common);
+    bool done = false;
+
+    acb->done = &done;
+    while (!done) {
+        qemu_aio_wait();
+    }
 }
 
 static const AIOCBInfo bdrv_em_co_aiocb_info = {
@@ -3796,6 +3804,11 @@ static void bdrv_co_em_bh(void *opaque)
     BlockDriverAIOCBCoroutine *acb = opaque;
 
     acb->common.cb(acb->common.opaque, acb->req.error);
+
+    if (acb->done) {
+        *acb->done = true;
+    }
+
     qemu_bh_delete(acb->bh);
     qemu_aio_release(acb);
 }
@@ -3834,6 +3847,7 @@ static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
     acb->req.nb_sectors = nb_sectors;
     acb->req.qiov = qiov;
     acb->is_write = is_write;
+    acb->done = NULL;
 
     co = qemu_coroutine_create(bdrv_co_do_rw);
     qemu_coroutine_enter(co, acb);
@@ -3860,6 +3874,8 @@ BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
     BlockDriverAIOCBCoroutine *acb;
 
     acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
+    acb->done = NULL;
+
     co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
     qemu_coroutine_enter(co, acb);
 
@@ -3888,6 +3904,7 @@ BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs,
     acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
     acb->req.sector = sector_num;
     acb->req.nb_sectors = nb_sectors;
+    acb->done = NULL;
     co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
     qemu_coroutine_enter(co, acb);
 
commit 8a805c222caa0e20bf11d2267f726d0bb5917d94
Author: Stefan Hajnoczi <stefanha at redhat.com>
Date:   Tue Dec 4 16:12:19 2012 +0100

    tests: avoid qemu_aio_flush() in test-thread-pool.c
    
    We need to eliminate calls to qemu_aio_flush() since the function is
    being removed.  Most callers will use bdrv_drain_all() instead but
    test-thread-pool.c is lower level.
    
    Since the test uses the global AioContext we can loop on qemu_aio_wait()
    to wait for aio and bh activity to complete.
    
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>
    Acked-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/tests/test-thread-pool.c b/tests/test-thread-pool.c
index fea0445..ea8e676 100644
--- a/tests/test-thread-pool.c
+++ b/tests/test-thread-pool.c
@@ -47,11 +47,19 @@ static void qemu_aio_wait_nonblocking(void)
     qemu_aio_wait();
 }
 
+/* Wait until all aio and bh activity has finished */
+static void qemu_aio_wait_all(void)
+{
+    while (qemu_aio_wait()) {
+        /* Do nothing */
+    }
+}
+
 static void test_submit(void)
 {
     WorkerTestData data = { .n = 0 };
     thread_pool_submit(worker_cb, &data);
-    qemu_aio_flush();
+    qemu_aio_wait_all();
     g_assert_cmpint(data.n, ==, 1);
 }
 
@@ -63,7 +71,7 @@ static void test_submit_aio(void)
     /* The callbacks are not called until after the first wait.  */
     active = 1;
     g_assert_cmpint(data.ret, ==, -EINPROGRESS);
-    qemu_aio_flush();
+    qemu_aio_wait_all();
     g_assert_cmpint(active, ==, 0);
     g_assert_cmpint(data.n, ==, 1);
     g_assert_cmpint(data.ret, ==, 0);
@@ -84,7 +92,7 @@ static void co_test_cb(void *opaque)
     data->ret = 0;
     active--;
 
-    /* The test continues in test_submit_co, after qemu_aio_flush... */
+    /* The test continues in test_submit_co, after qemu_aio_wait_all... */
 }
 
 static void test_submit_co(void)
@@ -99,9 +107,9 @@ static void test_submit_co(void)
     g_assert_cmpint(active, ==, 1);
     g_assert_cmpint(data.ret, ==, -EINPROGRESS);
 
-    /* qemu_aio_flush will execute the rest of the coroutine.  */
+    /* qemu_aio_wait_all will execute the rest of the coroutine.  */
 
-    qemu_aio_flush();
+    qemu_aio_wait_all();
 
     /* Back here after the coroutine has finished.  */
 
@@ -184,7 +192,7 @@ static void test_cancel(void)
     }
 
     /* Finish execution and execute any remaining callbacks.  */
-    qemu_aio_flush();
+    qemu_aio_wait_all();
     g_assert_cmpint(active, ==, 0);
     for (i = 0; i < 100; i++) {
         if (data[i].n == 3) {
commit 9fe3781f09f94f3ce76e52899bcdeb0d5164dbb1
Author: Stefan Hajnoczi <stefanha at redhat.com>
Date:   Tue Dec 4 16:12:18 2012 +0100

    tests: use aio_poll() instead of aio_flush() in test-aio.c
    
    There has been confusion between various aio wait and flush functions.
    It's time to get rid of qemu_aio_flush() but in the aio test cases we
    really do want this low-level functionality.
    
    Therefore declare a local wait_for_aio() helper for the test cases.
    Drop the aio_flush() test case.
    
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>
    Acked-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/tests/test-aio.c b/tests/test-aio.c
index f53c908..a8a4f0c 100644
--- a/tests/test-aio.c
+++ b/tests/test-aio.c
@@ -15,6 +15,14 @@
 
 AioContext *ctx;
 
+/* Wait until there are no more BHs or AIO requests */
+static void wait_for_aio(void)
+{
+    while (aio_poll(ctx, true)) {
+        /* Do nothing */
+    }
+}
+
 /* Simple callbacks for testing.  */
 
 typedef struct {
@@ -78,14 +86,6 @@ static void test_notify(void)
     g_assert(!aio_poll(ctx, false));
 }
 
-static void test_flush(void)
-{
-    g_assert(!aio_poll(ctx, false));
-    aio_notify(ctx);
-    aio_flush(ctx);
-    g_assert(!aio_poll(ctx, false));
-}
-
 static void test_bh_schedule(void)
 {
     BHTestData data = { .n = 0 };
@@ -116,7 +116,7 @@ static void test_bh_schedule10(void)
     g_assert(aio_poll(ctx, true));
     g_assert_cmpint(data.n, ==, 2);
 
-    aio_flush(ctx);
+    wait_for_aio();
     g_assert_cmpint(data.n, ==, 10);
 
     g_assert(!aio_poll(ctx, false));
@@ -164,7 +164,7 @@ static void test_bh_delete_from_cb(void)
     qemu_bh_schedule(data1.bh);
     g_assert_cmpint(data1.n, ==, 0);
 
-    aio_flush(ctx);
+    wait_for_aio();
     g_assert_cmpint(data1.n, ==, data1.max);
     g_assert(data1.bh == NULL);
 
@@ -200,7 +200,7 @@ static void test_bh_delete_from_cb_many(void)
     g_assert_cmpint(data4.n, ==, 1);
     g_assert(data1.bh == NULL);
 
-    aio_flush(ctx);
+    wait_for_aio();
     g_assert_cmpint(data1.n, ==, data1.max);
     g_assert_cmpint(data2.n, ==, data2.max);
     g_assert_cmpint(data3.n, ==, data3.max);
@@ -219,7 +219,7 @@ static void test_bh_flush(void)
     qemu_bh_schedule(data.bh);
     g_assert_cmpint(data.n, ==, 0);
 
-    aio_flush(ctx);
+    wait_for_aio();
     g_assert_cmpint(data.n, ==, 1);
 
     g_assert(!aio_poll(ctx, false));
@@ -281,7 +281,7 @@ static void test_flush_event_notifier(void)
     g_assert_cmpint(data.active, ==, 9);
     g_assert(aio_poll(ctx, false));
 
-    aio_flush(ctx);
+    wait_for_aio();
     g_assert_cmpint(data.n, ==, 10);
     g_assert_cmpint(data.active, ==, 0);
     g_assert(!aio_poll(ctx, false));
@@ -325,7 +325,7 @@ static void test_wait_event_notifier_noflush(void)
     g_assert_cmpint(data.n, ==, 2);
 
     event_notifier_set(&dummy.e);
-    aio_flush(ctx);
+    wait_for_aio();
     g_assert_cmpint(data.n, ==, 2);
     g_assert_cmpint(dummy.n, ==, 1);
     g_assert_cmpint(dummy.active, ==, 0);
@@ -346,7 +346,7 @@ static void test_wait_event_notifier_noflush(void)
  * - sometimes both the AioContext and the glib main loop wake
  *   themselves up.  Hence, some "g_assert(!aio_poll(ctx, false));"
  *   are replaced by "while (g_main_context_iteration(NULL, false));".
- * - there is no exact replacement for aio_flush's blocking wait.
+ * - there is no exact replacement for a blocking wait.
  *   "while (g_main_context_iteration(NULL, true)" seems to work,
  *   but it is not documented _why_ it works.  For these tests a
  *   non-blocking loop like "while (g_main_context_iteration(NULL, false)"
@@ -637,7 +637,6 @@ int main(int argc, char **argv)
 
     g_test_init(&argc, &argv, NULL);
     g_test_add_func("/aio/notify",                  test_notify);
-    g_test_add_func("/aio/flush",                   test_flush);
     g_test_add_func("/aio/bh/schedule",             test_bh_schedule);
     g_test_add_func("/aio/bh/schedule10",           test_bh_schedule10);
     g_test_add_func("/aio/bh/cancel",               test_bh_cancel);
commit ff1562908d1da12362aa9e3f3bfc7ba0da8114a4
Author: Gerd Hoffmann <kraxel at redhat.com>
Date:   Tue Dec 11 08:24:30 2012 +0100

    seabios: update to e8a76b0f225bba5ba9d63ab227e0a37b3beb1059
    
    This patch updates seabios to latest git master.  Changes:
    
      (1) q35 patches merged.
      (2) some acpi cleanups.
      (3) fixes irq 8 conflict.
    
    (3) makes this a candidate for the stable branch
    
    Cc: qemu-stable at nongnu.org
    Signed-off-by: Gerd Hoffmann <kraxel at redhat.com>

diff --git a/pc-bios/acpi-dsdt.aml b/pc-bios/acpi-dsdt.aml
index bb3dd83..18b4dc1 100644
Binary files a/pc-bios/acpi-dsdt.aml and b/pc-bios/acpi-dsdt.aml differ
diff --git a/pc-bios/bios.bin b/pc-bios/bios.bin
index dc9b57d..3eefff4 100644
Binary files a/pc-bios/bios.bin and b/pc-bios/bios.bin differ
diff --git a/pc-bios/q35-acpi-dsdt.aml b/pc-bios/q35-acpi-dsdt.aml
new file mode 100644
index 0000000..8a50559
Binary files /dev/null and b/pc-bios/q35-acpi-dsdt.aml differ
diff --git a/roms/seabios b/roms/seabios
index b1c35f2..e8a76b0 160000
--- a/roms/seabios
+++ b/roms/seabios
@@ -1 +1 @@
-Subproject commit b1c35f2b28cc0c94ebed8176ff61ac0e0b377798
+Subproject commit e8a76b0f225bba5ba9d63ab227e0a37b3beb1059
commit d281084d3e51f03999d12a506491a0c6f31b40e8
Author: Alex Williamson <alex.williamson at redhat.com>
Date:   Mon Dec 10 11:30:03 2012 -0700

    vfio-pci: Don't use kvm_irqchip_in_kernel
    
    kvm_irqchip_in_kernel() has an architecture specific meaning, so
    we shouldn't be using it to determine whether to enabled KVM INTx
    bypass.  kvm_irqfds_enabled() seems most appropriate.  Also use this
    to protect our other call to kvm_check_extension() as that explodes
    when KVM isn't enabled.
    
    Signed-off-by: Alex Williamson <alex.williamson at redhat.com>
    Cc: qemu-stable at nongnu.org

diff --git a/hw/vfio_pci.c b/hw/vfio_pci.c
index 7c27834..fbfe670 100644
--- a/hw/vfio_pci.c
+++ b/hw/vfio_pci.c
@@ -275,7 +275,7 @@ static void vfio_enable_intx_kvm(VFIODevice *vdev)
     int ret, argsz;
     int32_t *pfd;
 
-    if (!kvm_irqchip_in_kernel() ||
+    if (!kvm_irqfds_enabled() ||
         vdev->intx.route.mode != PCI_INTX_ENABLED ||
         !kvm_check_extension(kvm_state, KVM_CAP_IRQFD_RESAMPLE)) {
         return;
@@ -438,7 +438,8 @@ static int vfio_enable_intx(VFIODevice *vdev)
      * Only conditional to avoid generating error messages on platforms
      * where we won't actually use the result anyway.
      */
-    if (kvm_check_extension(kvm_state, KVM_CAP_IRQFD_RESAMPLE)) {
+    if (kvm_irqfds_enabled() &&
+        kvm_check_extension(kvm_state, KVM_CAP_IRQFD_RESAMPLE)) {
         vdev->intx.route = pci_device_route_intx_to_irq(&vdev->pdev,
                                                         vdev->intx.pin);
     }


More information about the Spice-commits mailing list