[Spice-commits] 231 commits - .gitignore HACKING MAINTAINERS Makefile Makefile.hw Makefile.objs Makefile.target QMP/qemu-ga-client QMP/qmp-events.txt QMP/qmp.py aio.c block-migration.c block.c block.h block/Makefile.objs block/blkdebug.c block/commit.c block/gluster.c block/qcow2.c block/qed-table.c block/sheepdog.c block/stream.c block/vmdk.c block_int.h blockdev.c blockjob.c blockjob.h configure cpu-all.h cpu-common.h cpu-exec.c cpus.c cutils.c device_tree.c device_tree.h dma.h dump.c exec-obsolete.h exec.c fpu/softfloat.c hmp-commands.hx hmp.c hmp.h hw/9pfs hw/Makefile.objs hw/acpi.c hw/alpha_dp264.c hw/alpha_pci.c hw/alpha_sys.h hw/armv7m_nvic.c hw/blizzard.c hw/bt-hci.c hw/cirrus_vga.c hw/device-hotplug.c hw/exynos4210_combiner.c hw/exynos4210_gic.c hw/exynos4210_mct.c hw/fdc.c hw/framebuffer.c hw/hw.h hw/ide hw/intel-hda.c hw/isa-bus.c hw/isa.h hw/ivshmem.c hw/kvm hw/leon3.c hw/lm32_hwsetup.h hw/mc146818rtc.c hw/mc146818rtc_regs.h hw/megasas.c hw/mips_fulong2e.c hw/mips_malta. c hw/msix.c hw/nand.c hw/onenand.c hw/pc.c hw/pc.h hw/pci.c hw/pci.h hw/pflash_cfi01.c hw/pflash_cfi02.c hw/pl190.c hw/ppc hw/ppc405_uc.c hw/ppc_newworld.c hw/ppc_oldworld.c hw/ppc_prep.c hw/pxa2xx_keypad.c hw/qdev.c hw/qdev.h hw/r2d.c hw/rtl8139.c hw/sb16.c hw/scsi-bus.c hw/scsi-disk.c hw/scsi-generic.c hw/sd.c hw/spapr.c hw/spapr.h hw/spapr_hcall.c hw/spapr_iommu.c hw/spapr_pci.c hw/spapr_rtas.c hw/spapr_vio.c hw/spapr_vio.h hw/spapr_vscsi.c hw/sun4u.c hw/sysbus.c hw/tcx.c hw/usb hw/versatilepb.c hw/vexpress.c hw/vfio_pci.c hw/vfio_pci_int.h hw/vga-pci.c hw/vga-pci.h hw/virtio-blk.c hw/virtio-net.c hw/virtio-serial-bus.c hw/virtio.c hw/virtio.h hw/vmware_vga.c hw/vmware_vga.h hw/xen.h hw/xen_domainbuild.c hw/xen_platform.c hw/xen_pt.c hw/xen_pt_config_init.c hw/xics.c hw/xics.h input.c iov.c iov.h libcacard/Makefile libcacard/vcard.c libcacard/vcard_emul_nss.c libcacard/vreader.c linux-headers/linux linux-user/elfload.c linux-user/signal.c migration-fd.c monitor.c monitor. h os-posix.c qapi-schema.json qemu-barrier.h qemu-common.h qemu-sockets.c qemu-tool.c qerror.h qga/commands-posix.c qga/commands-win32.c qmp-commands.hx qmp.c savevm.c scripts/qapi-types.py scripts/qapi-visit.py scripts/qapi.py scripts/update-linux-headers.sh slirp/ip_icmp.h slirp/ip_input.c slirp/tcp_input.c slirp/udp.c sysemu.h target-alpha/translate.c target-arm/cpu.h target-arm/helper.h target-arm/op_helper.c target-arm/translate.c target-cris/translate.c target-i386/cc_helper.c target-i386/cpu.c target-i386/cpu.h target-i386/helper.c target-i386/helper.h target-i386/kvm.c target-i386/seg_helper.c target-i386/smm_helper.c target-i386/translate.c target-lm32/translate.c target-m68k/translate.c target-microblaze/translate.c target-mips/translate.c target-openrisc/translate.c target-ppc/cpu.h target-ppc/int_helper.c target-ppc/kvm.c target-ppc/kvm_ppc.h target-ppc/machine.c target-ppc/mmu_helper.c target-ppc/translate.c target-ppc/translate_init.c target-s390x/helper.c targ et-s390x/misc_helper.c target-s390x/translate.c target-sh4/translate.c target-sparc/cpu.c target-sparc/cpu.h target-sparc/helper.c target-sparc/helper.h target-sparc/int32_helper.c target-sparc/int64_helper.c target-sparc/ldst_helper.c target-sparc/translate.c target-unicore32/translate.c target-xtensa/translate.c targphys.h tcg/README tcg/arm tcg/hppa tcg/i386 tcg/ia64 tcg/mips tcg/optimize.c tcg/ppc tcg/ppc64 tcg/s390 tcg/sparc tcg/tcg-op.h tcg/tcg-opc.h tcg/tcg.c tcg/tcg.h tcg/tci tci.c tests/libqtest.c tests/qemu-iotests tests/rtc-test.c trace-events ui/vnc-auth-sasl.c ui/vnc-tls.c uri.c uri.h vl.c xen-all.c xen-stub.c

Gerd Hoffmann kraxel at kemper.freedesktop.org
Mon Oct 8 01:07:39 PDT 2012


 .gitignore                      |    2 
 HACKING                         |    9 
 MAINTAINERS                     |   55 
 Makefile                        |    2 
 Makefile.hw                     |   23 
 Makefile.objs                   |   14 
 Makefile.target                 |    4 
 QMP/qemu-ga-client              |  299 +++++
 QMP/qmp-events.txt              |   28 
 QMP/qmp.py                      |   12 
 aio.c                           |   14 
 block-migration.c               |    2 
 block.c                         |  351 +++---
 block.h                         |   25 
 block/Makefile.objs             |    5 
 block/blkdebug.c                |   12 
 block/commit.c                  |  268 ++++
 block/gluster.c                 |  624 +++++++++++
 block/qcow2.c                   |    1 
 block/qed-table.c               |    1 
 block/sheepdog.c                |   34 
 block/stream.c                  |   29 
 block/vmdk.c                    |    3 
 block_int.h                     |  178 ---
 blockdev.c                      |  150 ++
 blockjob.c                      |  249 ++++
 blockjob.h                      |  243 ++++
 configure                       |   77 -
 cpu-all.h                       |    3 
 cpu-common.h                    |    2 
 cpu-exec.c                      |    2 
 cpus.c                          |    8 
 cutils.c                        |    2 
 device_tree.c                   |   15 
 device_tree.h                   |    2 
 dma.h                           |    2 
 dump.c                          |   18 
 exec-obsolete.h                 |    2 
 exec.c                          |   65 -
 fpu/softfloat.c                 |    6 
 hmp-commands.hx                 |   43 
 hmp.c                           |   77 +
 hmp.h                           |    2 
 hw/9pfs/Makefile.objs           |   14 
 hw/9pfs/virtio-9p-posix-acl.c   |    6 
 hw/9pfs/virtio-9p-synth.c       |    4 
 hw/9pfs/virtio-9p-xattr-user.c  |    3 
 hw/9pfs/virtio-9p-xattr.c       |    3 
 hw/9pfs/virtio-9p.c             |    5 
 hw/Makefile.objs                |  229 ++--
 hw/acpi.c                       |   24 
 hw/alpha_dp264.c                |    2 
 hw/alpha_pci.c                  |   24 
 hw/alpha_sys.h                  |    2 
 hw/armv7m_nvic.c                |    3 
 hw/blizzard.c                   |    2 
 hw/bt-hci.c                     |    8 
 hw/cirrus_vga.c                 |    6 
 hw/device-hotplug.c             |    1 
 hw/exynos4210_combiner.c        |    4 
 hw/exynos4210_gic.c             |    4 
 hw/exynos4210_mct.c             |    2 
 hw/fdc.c                        |    6 
 hw/framebuffer.c                |    1 
 hw/hw.h                         |    2 
 hw/ide/Makefile.objs            |   20 
 hw/ide/core.c                   |   22 
 hw/ide/macio.c                  |    1 
 hw/ide/pci.c                    |    4 
 hw/ide/qdev.c                   |    2 
 hw/intel-hda.c                  |    8 
 hw/isa-bus.c                    |   22 
 hw/isa.h                        |    2 
 hw/ivshmem.c                    |    4 
 hw/kvm/pci-assign.c             |   12 
 hw/leon3.c                      |    2 
 hw/lm32_hwsetup.h               |    2 
 hw/mc146818rtc.c                |   40 
 hw/mc146818rtc_regs.h           |    4 
 hw/megasas.c                    |    1 
 hw/mips_fulong2e.c              |    2 
 hw/mips_malta.c                 |   10 
 hw/msix.c                       |    1 
 hw/nand.c                       |   34 
 hw/onenand.c                    |    2 
 hw/pc.c                         |   35 
 hw/pc.h                         |   13 
 hw/pci.c                        |   20 
 hw/pci.h                        |    3 
 hw/pflash_cfi01.c               |    1 
 hw/pflash_cfi02.c               |    1 
 hw/pl190.c                      |   18 
 hw/ppc/e500.c                   |   24 
 hw/ppc405_uc.c                  |   16 
 hw/ppc_newworld.c               |    1 
 hw/ppc_oldworld.c               |    1 
 hw/ppc_prep.c                   |    1 
 hw/pxa2xx_keypad.c              |    1 
 hw/qdev.c                       |    8 
 hw/qdev.h                       |    1 
 hw/r2d.c                        |    2 
 hw/rtl8139.c                    |    6 
 hw/sb16.c                       |    1 
 hw/scsi-bus.c                   |    8 
 hw/scsi-disk.c                  |   27 
 hw/scsi-generic.c               |    4 
 hw/sd.c                         |   16 
 hw/spapr.c                      |  336 +++--
 hw/spapr.h                      |   14 
 hw/spapr_hcall.c                |   49 
 hw/spapr_iommu.c                |   24 
 hw/spapr_pci.c                  |   12 
 hw/spapr_rtas.c                 |    5 
 hw/spapr_vio.c                  |   37 
 hw/spapr_vio.h                  |    2 
 hw/spapr_vscsi.c                |    2 
 hw/sun4u.c                      |    1 
 hw/sysbus.c                     |    2 
 hw/tcx.c                        |    1 
 hw/usb/Makefile.objs            |   14 
 hw/usb/dev-uas.c                |    2 
 hw/usb/hcd-ehci.c               |   40 
 hw/usb/hcd-ohci.c               |    2 
 hw/usb/hcd-xhci.c               |  180 +--
 hw/usb/libhw.c                  |   24 
 hw/versatilepb.c                |    3 
 hw/vexpress.c                   |   33 
 hw/vfio_pci.c                   | 1864 +++++++++++++++++++++++++++++++++
 hw/vfio_pci_int.h               |  114 ++
 hw/vga-pci.c                    |   10 
 hw/vga-pci.h                    |   12 
 hw/virtio-blk.c                 |   23 
 hw/virtio-net.c                 |    4 
 hw/virtio-serial-bus.c          |   11 
 hw/virtio.c                     |   32 
 hw/virtio.h                     |    5 
 hw/vmware_vga.c                 |    1 
 hw/vmware_vga.h                 |   15 
 hw/xen.h                        |    1 
 hw/xen_domainbuild.c            |    1 
 hw/xen_platform.c               |    8 
 hw/xen_pt.c                     |    7 
 hw/xen_pt_config_init.c         |   39 
 hw/xics.c                       |  114 +-
 hw/xics.h                       |    8 
 input.c                         |   75 -
 iov.c                           |    2 
 iov.h                           |    2 
 libcacard/Makefile              |    3 
 libcacard/vcard.c               |    1 
 libcacard/vcard_emul_nss.c      |    6 
 libcacard/vreader.c             |    1 
 linux-headers/linux/vfio.h      |  368 ++++++
 linux-user/elfload.c            |   12 
 linux-user/signal.c             |    2 
 migration-fd.c                  |    2 
 monitor.c                       |   61 -
 monitor.h                       |    3 
 os-posix.c                      |    4 
 qapi-schema.json                |  219 +++
 qemu-barrier.h                  |    4 
 qemu-common.h                   |   10 
 qemu-sockets.c                  |    3 
 qemu-tool.c                     |    6 
 qerror.h                        |    6 
 qga/commands-posix.c            |    4 
 qga/commands-win32.c            |    2 
 qmp-commands.hx                 |   49 
 qmp.c                           |   43 
 savevm.c                        |    1 
 scripts/qapi-types.py           |    4 
 scripts/qapi-visit.py           |    2 
 scripts/qapi.py                 |   10 
 scripts/update-linux-headers.sh |    2 
 slirp/ip_icmp.h                 |    4 
 slirp/ip_input.c                |    1 
 slirp/tcp_input.c               |    2 
 slirp/udp.c                     |    1 
 sysemu.h                        |    3 
 target-alpha/translate.c        |    2 
 target-arm/cpu.h                |    2 
 target-arm/helper.h             |   24 
 target-arm/op_helper.c          |   44 
 target-arm/translate.c          |  304 ++---
 target-cris/translate.c         |    3 
 target-i386/cc_helper.c         |   10 
 target-i386/cpu.c               |   95 +
 target-i386/cpu.h               |   49 
 target-i386/helper.c            |  154 ++
 target-i386/helper.h            |    2 
 target-i386/kvm.c               |    2 
 target-i386/seg_helper.c        |    4 
 target-i386/smm_helper.c        |    4 
 target-i386/translate.c         |   30 
 target-lm32/translate.c         |    2 
 target-m68k/translate.c         |    4 
 target-microblaze/translate.c   |    3 
 target-mips/translate.c         |    3 
 target-openrisc/translate.c     |    2 
 target-ppc/cpu.h                |    1 
 target-ppc/int_helper.c         |  127 --
 target-ppc/kvm.c                |  141 +-
 target-ppc/kvm_ppc.h            |   19 
 target-ppc/machine.c            |    4 
 target-ppc/mmu_helper.c         |    4 
 target-ppc/translate.c          |    3 
 target-ppc/translate_init.c     |    8 
 target-s390x/helper.c           |    7 
 target-s390x/misc_helper.c      |    3 
 target-s390x/translate.c        |   32 
 target-sh4/translate.c          |    2 
 target-sparc/cpu.c              |   11 
 target-sparc/cpu.h              |    1 
 target-sparc/helper.c           |   86 +
 target-sparc/helper.h           |    6 
 target-sparc/int32_helper.c     |    7 
 target-sparc/int64_helper.c     |    8 
 target-sparc/ldst_helper.c      |    4 
 target-sparc/translate.c        | 1136 +++++++++-----------
 target-unicore32/translate.c    |    6 
 target-xtensa/translate.c       |   24 
 targphys.h                      |   19 
 tcg/README                      |    7 
 tcg/arm/tcg-target.c            |   24 
 tcg/hppa/tcg-target.c           |   25 
 tcg/i386/tcg-target.c           |   11 
 tcg/ia64/tcg-target.c           |    4 
 tcg/mips/tcg-target.c           |    5 
 tcg/optimize.c                  |    6 
 tcg/ppc/tcg-target.c            |   12 
 tcg/ppc64/tcg-target.c          |   12 
 tcg/s390/tcg-target.c           |   12 
 tcg/sparc/tcg-target.c          |    4 
 tcg/tcg-op.h                    |   82 +
 tcg/tcg-opc.h                   |    1 
 tcg/tcg.c                       |    3 
 tcg/tcg.h                       |   60 -
 tcg/tci/tcg-target.c            |   14 
 tci.c                           |   14 
 tests/libqtest.c                |   38 
 tests/qemu-iotests/030          |  260 ++++
 tests/qemu-iotests/030.out      |    4 
 tests/qemu-iotests/040          |  178 +++
 tests/qemu-iotests/040.out      |    5 
 tests/qemu-iotests/group        |    3 
 tests/qemu-iotests/iotests.py   |   15 
 tests/rtc-test.c                |   73 +
 trace-events                    |    6 
 ui/vnc-auth-sasl.c              |    5 
 ui/vnc-tls.c                    |    2 
 uri.c                           | 2249 ++++++++++++++++++++++++++++++++++++++++
 uri.h                           |  113 ++
 vl.c                            |   28 
 xen-all.c                       |   40 
 xen-stub.c                      |    9 
 255 files changed, 10499 insertions(+), 2918 deletions(-)

New commits:
commit 4bb26682f70a5f626cad3e0ac82bf4b6252ea7a4
Merge: 2a484ec... f182978...
Author: Blue Swirl <blauwirbel at gmail.com>
Date:   Sun Oct 7 18:42:18 2012 +0000

    Merge branch 'master' of git.qemu.org:/pub/git/qemu
    
    * 'master' of git.qemu.org:/pub/git/qemu:
      qemu-barrier: Fix compilation on i386 hosts

commit f1829782d6cb931973d71649f8ad1dad66188c34
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Sun Oct 7 20:07:11 2012 +0200

    qemu-barrier: Fix compilation on i386 hosts
    
    Commit 1d31fca470648ec66afd8743491bfb5846306341 tried to fix bug
    introduced by 610b823ef66b993660f1ab1447a769f190e4f3b3 by including
    qemu-common.h, which breaks the build further.
    
    Include compiler.h instead, as suggested by Blue Swirl.
    
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/qemu-barrier.h b/qemu-barrier.h
index 1de914e..faa83d2 100644
--- a/qemu-barrier.h
+++ b/qemu-barrier.h
@@ -6,7 +6,7 @@
 
 #if defined(__i386__)
 
-#include "qemu-common.h"        /* QEMU_GNUC_PREREQ */
+#include "compiler.h"        /* QEMU_GNUC_PREREQ */
 
 /*
  * Because of the strongly ordered x86 storage model, wmb() and rmb() are nops
commit 2a484ecf8283814c92b5743845e285f0efc23247
Author: Richard Henderson <rth at twiddle.net>
Date:   Fri Oct 5 16:55:10 2012 -0700

    target-sparc: Optimize conditionals using SUBCC
    
    Aka "normal" comparisons.  We now have the infrastructure to
    pass back non-boolean results from gen_compare.  This will
    automatically get used by both branches and conditional moves.
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/target-sparc/translate.c b/target-sparc/translate.c
index a7f6407..472eb51 100644
--- a/target-sparc/translate.c
+++ b/target-sparc/translate.c
@@ -1046,83 +1046,153 @@ static void free_compare(DisasCompare *cmp)
     }
 }
 
-static void gen_compare(DisasCompare *cmp, unsigned int cc, unsigned int cond,
+static void gen_compare(DisasCompare *cmp, bool xcc, unsigned int cond,
                         DisasContext *dc)
 {
+    static int subcc_cond[16] = {
+        -1, /* never */
+        TCG_COND_EQ,
+        TCG_COND_LE,
+        TCG_COND_LT,
+        TCG_COND_LEU,
+        TCG_COND_LTU,
+        -1, /* neg */
+        -1, /* overflow */
+        -1, /* always */
+        TCG_COND_NE,
+        TCG_COND_GT,
+        TCG_COND_GE,
+        TCG_COND_GTU,
+        TCG_COND_GEU,
+        -1, /* pos */
+        -1, /* no overflow */
+    };
+
     TCGv_i32 r_src;
     TCGv r_dst;
 
-    /* For now we still generate a straight boolean result.  */
-    cmp->cond = TCG_COND_NE;
-    cmp->is_bool = true;
-    cmp->g1 = cmp->g2 = false;
-    cmp->c1 = r_dst = tcg_temp_new();
-    cmp->c2 = tcg_const_tl(0);
-
 #ifdef TARGET_SPARC64
-    if (cc)
+    if (xcc) {
         r_src = cpu_xcc;
-    else
+    } else {
         r_src = cpu_psr;
+    }
 #else
     r_src = cpu_psr;
 #endif
+
     switch (dc->cc_op) {
-    case CC_OP_FLAGS:
+    case CC_OP_SUB:
+        switch (cond) {
+        case 6:  /* neg */
+        case 14: /* pos */
+            cmp->cond = (cond == 6 ? TCG_COND_LT : TCG_COND_GE);
+            cmp->is_bool = false;
+            cmp->g2 = false;
+            cmp->c2 = tcg_const_tl(0);
+#ifdef TARGET_SPARC64
+            if (!xcc) {
+                cmp->g1 = false;
+                cmp->c1 = tcg_temp_new();
+                tcg_gen_ext32s_tl(cmp->c1, cpu_cc_dst);
+                break;
+            }
+#endif
+            cmp->g1 = true;
+            cmp->c1 = cpu_cc_dst;
+            break;
+
+        case 0: /* never */
+        case 8: /* always */
+        case 7: /* overflow */
+        case 15: /* !overflow */
+            goto do_dynamic;
+
+        default:
+            cmp->cond = subcc_cond[cond];
+            cmp->is_bool = false;
+#ifdef TARGET_SPARC64
+            if (!xcc) {
+                /* Note that sign-extension works for unsigned compares as
+                   long as both operands are sign-extended.  */
+                cmp->g1 = cmp->g2 = false;
+                cmp->c1 = tcg_temp_new();
+                cmp->c2 = tcg_temp_new();
+                tcg_gen_ext32s_tl(cmp->c1, cpu_cc_src);
+                tcg_gen_ext32s_tl(cmp->c2, cpu_cc_src2);
+            }
+#endif
+            cmp->g1 = cmp->g2 = true;
+            cmp->c1 = cpu_cc_src;
+            cmp->c2 = cpu_cc_src2;
+            break;
+        }
         break;
+
     default:
+    do_dynamic:
         gen_helper_compute_psr(cpu_env);
         dc->cc_op = CC_OP_FLAGS;
-        break;
-    }
-    switch (cond) {
-    case 0x0:
-        gen_op_eval_bn(r_dst);
-        break;
-    case 0x1:
-        gen_op_eval_be(r_dst, r_src);
-        break;
-    case 0x2:
-        gen_op_eval_ble(r_dst, r_src);
-        break;
-    case 0x3:
-        gen_op_eval_bl(r_dst, r_src);
-        break;
-    case 0x4:
-        gen_op_eval_bleu(r_dst, r_src);
-        break;
-    case 0x5:
-        gen_op_eval_bcs(r_dst, r_src);
-        break;
-    case 0x6:
-        gen_op_eval_bneg(r_dst, r_src);
-        break;
-    case 0x7:
-        gen_op_eval_bvs(r_dst, r_src);
-        break;
-    case 0x8:
-        gen_op_eval_ba(r_dst);
-        break;
-    case 0x9:
-        gen_op_eval_bne(r_dst, r_src);
-        break;
-    case 0xa:
-        gen_op_eval_bg(r_dst, r_src);
-        break;
-    case 0xb:
-        gen_op_eval_bge(r_dst, r_src);
-        break;
-    case 0xc:
-        gen_op_eval_bgu(r_dst, r_src);
-        break;
-    case 0xd:
-        gen_op_eval_bcc(r_dst, r_src);
-        break;
-    case 0xe:
-        gen_op_eval_bpos(r_dst, r_src);
-        break;
-    case 0xf:
-        gen_op_eval_bvc(r_dst, r_src);
+        /* FALLTHRU */
+
+    case CC_OP_FLAGS:
+        /* We're going to generate a boolean result.  */
+        cmp->cond = TCG_COND_NE;
+        cmp->is_bool = true;
+        cmp->g1 = cmp->g2 = false;
+        cmp->c1 = r_dst = tcg_temp_new();
+        cmp->c2 = tcg_const_tl(0);
+
+        switch (cond) {
+        case 0x0:
+            gen_op_eval_bn(r_dst);
+            break;
+        case 0x1:
+            gen_op_eval_be(r_dst, r_src);
+            break;
+        case 0x2:
+            gen_op_eval_ble(r_dst, r_src);
+            break;
+        case 0x3:
+            gen_op_eval_bl(r_dst, r_src);
+            break;
+        case 0x4:
+            gen_op_eval_bleu(r_dst, r_src);
+            break;
+        case 0x5:
+            gen_op_eval_bcs(r_dst, r_src);
+            break;
+        case 0x6:
+            gen_op_eval_bneg(r_dst, r_src);
+            break;
+        case 0x7:
+            gen_op_eval_bvs(r_dst, r_src);
+            break;
+        case 0x8:
+            gen_op_eval_ba(r_dst);
+            break;
+        case 0x9:
+            gen_op_eval_bne(r_dst, r_src);
+            break;
+        case 0xa:
+            gen_op_eval_bg(r_dst, r_src);
+            break;
+        case 0xb:
+            gen_op_eval_bge(r_dst, r_src);
+            break;
+        case 0xc:
+            gen_op_eval_bgu(r_dst, r_src);
+            break;
+        case 0xd:
+            gen_op_eval_bcc(r_dst, r_src);
+            break;
+        case 0xe:
+            gen_op_eval_bpos(r_dst, r_src);
+            break;
+        case 0xf:
+            gen_op_eval_bvc(r_dst, r_src);
+            break;
+        }
         break;
     }
 }
commit fe1755cbb21c34a40234e2c380d049b9b031c4e2
Author: Richard Henderson <rth at twiddle.net>
Date:   Fri Oct 5 16:55:09 2012 -0700

    target-sparc: Fall through from not-taken trap
    
    Now that we've cleaned up global temporary allocation, we can
    continue translating the fallthru path of a conditional trap.
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/target-sparc/translate.c b/target-sparc/translate.c
index 48c245e..a7f6407 100644
--- a/target-sparc/translate.c
+++ b/target-sparc/translate.c
@@ -2573,13 +2573,15 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                 gen_helper_raise_exception(cpu_env, trap);
                 tcg_temp_free_i32(trap);
 
-                if (cond != 8) {
+                if (cond == 8) {
+                    /* An unconditional trap ends the TB.  */
+                    dc->is_br = 1;
+                    goto jmp_insn;
+                } else {
+                    /* A conditional trap falls through to the next insn.  */
                     gen_set_label(l1);
-                    gen_op_next_insn();
-                    tcg_gen_exit_tb(0);
+                    break;
                 }
-                dc->is_br = 1;
-                goto jmp_insn;
             } else if (xop == 0x28) {
                 rs1 = GET_FIELD(insn, 13, 17);
                 switch(rs1) {
commit b09b2fd30ce3079fbca1bbd7c1a87459378c5bd7
Author: Richard Henderson <rth at twiddle.net>
Date:   Fri Oct 5 16:55:08 2012 -0700

    target-sparc: Cleanup "global" temporary allocation
    
    There are 6 temporaries that disas_sparc_insn relies on having been
    allocated.  Now that they are no longer referenced across branches,
    they need not be allocated as local temps.
    
    Move the allocation/free of these temporaries to make it clear that
    they are local to the translation of a single insn.
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/target-sparc/translate.c b/target-sparc/translate.c
index d594284..48c245e 100644
--- a/target-sparc/translate.c
+++ b/target-sparc/translate.c
@@ -5177,16 +5177,6 @@ static inline void gen_intermediate_code_internal(TranslationBlock * tb,
     dc->singlestep = (env->singlestep_enabled || singlestep);
     gen_opc_end = gen_opc_buf + OPC_MAX_SIZE;
 
-    cpu_tmp0 = tcg_temp_new();
-    cpu_tmp32 = tcg_temp_new_i32();
-    cpu_tmp64 = tcg_temp_new_i64();
-
-    cpu_dst = tcg_temp_local_new();
-
-    // loads and stores
-    cpu_val = tcg_temp_local_new();
-    cpu_addr = tcg_temp_local_new();
-
     num_insns = 0;
     max_insns = tb->cflags & CF_COUNT_MASK;
     if (max_insns == 0)
@@ -5222,9 +5212,24 @@ static inline void gen_intermediate_code_internal(TranslationBlock * tb,
             gen_io_start();
         last_pc = dc->pc;
         insn = cpu_ldl_code(env, dc->pc);
+
+        cpu_tmp0 = tcg_temp_new();
+        cpu_tmp32 = tcg_temp_new_i32();
+        cpu_tmp64 = tcg_temp_new_i64();
+        cpu_dst = tcg_temp_new();
+        cpu_val = tcg_temp_new();
+        cpu_addr = tcg_temp_new();
+
         disas_sparc_insn(dc, insn);
         num_insns++;
 
+        tcg_temp_free(cpu_addr);
+        tcg_temp_free(cpu_val);
+        tcg_temp_free(cpu_dst);
+        tcg_temp_free_i64(cpu_tmp64);
+        tcg_temp_free_i32(cpu_tmp32);
+        tcg_temp_free(cpu_tmp0);
+
         if (dc->is_br)
             break;
         /* if the next PC is different, we abort now */
@@ -5244,23 +5249,18 @@ static inline void gen_intermediate_code_internal(TranslationBlock * tb,
              num_insns < max_insns);
 
  exit_gen_loop:
-    tcg_temp_free(cpu_addr);
-    tcg_temp_free(cpu_val);
-    tcg_temp_free(cpu_dst);
-    tcg_temp_free_i64(cpu_tmp64);
-    tcg_temp_free_i32(cpu_tmp32);
-    tcg_temp_free(cpu_tmp0);
-
-    if (tb->cflags & CF_LAST_IO)
+    if (tb->cflags & CF_LAST_IO) {
         gen_io_end();
+    }
     if (!dc->is_br) {
         if (dc->pc != DYNAMIC_PC &&
             (dc->npc != DYNAMIC_PC && dc->npc != JUMP_PC)) {
             /* static PC and NPC: we can use direct chaining */
             gen_goto_tb(dc, 0, dc->pc, dc->npc);
         } else {
-            if (dc->pc != DYNAMIC_PC)
+            if (dc->pc != DYNAMIC_PC) {
                 tcg_gen_movi_tl(cpu_pc, dc->pc);
+            }
             save_npc(dc);
             tcg_gen_exit_tb(0);
         }
commit 690995a615ed584a4d494a7b61ede4f849d83e4a
Author: Richard Henderson <rth at twiddle.net>
Date:   Fri Oct 5 16:55:07 2012 -0700

    target-sparc: Use movcond for FMOV*R
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/target-sparc/translate.c b/target-sparc/translate.c
index 614905e..d594284 100644
--- a/target-sparc/translate.c
+++ b/target-sparc/translate.c
@@ -3051,44 +3051,30 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                 rs2 = GET_FIELD(insn, 27, 31);
                 xop = GET_FIELD(insn, 18, 26);
                 save_state(dc);
-#ifdef TARGET_SPARC64
-                if ((xop & 0x11f) == 0x005) { // V9 fmovsr
-                    int l1;
 
-                    l1 = gen_new_label();
-                    cond = GET_FIELD_SP(insn, 14, 17);
-                    cpu_src1 = get_src1(insn, cpu_src1);
-                    tcg_gen_brcondi_tl(gen_tcg_cond_reg[cond], cpu_src1,
-                                       0, l1);
-                    cpu_src1_32 = gen_load_fpr_F(dc, rs2);
-                    gen_store_fpr_F(dc, rd, cpu_src1_32);
-                    gen_set_label(l1);
+#ifdef TARGET_SPARC64
+#define FMOVR(sz)                                                  \
+                do {                                               \
+                    DisasCompare cmp;                              \
+                    cond = GET_FIELD_SP(insn, 14, 17);             \
+                    cpu_src1 = get_src1(insn, cpu_src1);           \
+                    gen_compare_reg(&cmp, cond, cpu_src1);         \
+                    gen_fmov##sz(dc, &cmp, rd, rs2);               \
+                    free_compare(&cmp);                            \
+                } while (0)
+
+                if ((xop & 0x11f) == 0x005) { /* V9 fmovsr */
+                    FMOVR(s);
                     break;
                 } else if ((xop & 0x11f) == 0x006) { // V9 fmovdr
-                    int l1;
-
-                    l1 = gen_new_label();
-                    cond = GET_FIELD_SP(insn, 14, 17);
-                    cpu_src1 = get_src1(insn, cpu_src1);
-                    tcg_gen_brcondi_tl(gen_tcg_cond_reg[cond], cpu_src1,
-                                       0, l1);
-                    cpu_src1_64 = gen_load_fpr_D(dc, rs2);
-                    gen_store_fpr_D(dc, rd, cpu_src1_64);
-                    gen_set_label(l1);
+                    FMOVR(d);
                     break;
                 } else if ((xop & 0x11f) == 0x007) { // V9 fmovqr
-                    int l1;
-
                     CHECK_FPU_FEATURE(dc, FLOAT128);
-                    l1 = gen_new_label();
-                    cond = GET_FIELD_SP(insn, 14, 17);
-                    cpu_src1 = get_src1(insn, cpu_src1);
-                    tcg_gen_brcondi_tl(gen_tcg_cond_reg[cond], cpu_src1,
-                                       0, l1);
-                    gen_move_Q(rd, rs2);
-                    gen_set_label(l1);
+                    FMOVR(q);
                     break;
                 }
+#undef FMOVR
 #endif
                 switch (xop) {
 #ifdef TARGET_SPARC64
commit 6cb675b0f89131dcb7bc4f9e0801ac7620374185
Author: Richard Henderson <rth at twiddle.net>
Date:   Fri Oct 5 16:55:06 2012 -0700

    target-sparc: Use movcond in mulscc
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/target-sparc/translate.c b/target-sparc/translate.c
index 98efb84..614905e 100644
--- a/target-sparc/translate.c
+++ b/target-sparc/translate.c
@@ -582,22 +582,21 @@ static void gen_op_subx_int(DisasContext *dc, TCGv dst, TCGv src1,
 
 static inline void gen_op_mulscc(TCGv dst, TCGv src1, TCGv src2)
 {
-    TCGv r_temp;
-    int l1;
+    TCGv r_temp, zero;
 
-    l1 = gen_new_label();
     r_temp = tcg_temp_new();
 
     /* old op:
     if (!(env->y & 1))
         T1 = 0;
     */
+    zero = tcg_const_tl(0);
     tcg_gen_andi_tl(cpu_cc_src, src1, 0xffffffff);
     tcg_gen_andi_tl(r_temp, cpu_y, 0x1);
     tcg_gen_andi_tl(cpu_cc_src2, src2, 0xffffffff);
-    tcg_gen_brcondi_tl(TCG_COND_NE, r_temp, 0, l1);
-    tcg_gen_movi_tl(cpu_cc_src2, 0);
-    gen_set_label(l1);
+    tcg_gen_movcond_tl(TCG_COND_EQ, cpu_cc_src2, r_temp, zero,
+                       zero, cpu_cc_src2);
+    tcg_temp_free(zero);
 
     // b2 = T0 & 1;
     // env->y = (b2 << 31) | (env->y >> 1);
commit a2ea4aa9898086c1e45e1db9b5f94d16dbf0762e
Author: Richard Henderson <rth at twiddle.net>
Date:   Fri Oct 5 16:55:05 2012 -0700

    target-sparc: Move taddcctv and tsubcctv out of line
    
    The branches around the exception are maintaining an otherwise
    unnecessary use of local temps for the cpu destination.
    
    Note that gen_op_t{add,sub}_cc were identical to gen_op_{add,sub}_cc.
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/target-sparc/helper.c b/target-sparc/helper.c
index 4555d2b..556ac28 100644
--- a/target-sparc/helper.c
+++ b/target-sparc/helper.c
@@ -167,3 +167,61 @@ uint64_t helper_udivx(CPUSPARCState *env, uint64_t a, uint64_t b)
     return a / b;
 }
 #endif
+
+target_ulong helper_taddcctv(CPUSPARCState *env, target_ulong src1,
+                             target_ulong src2)
+{
+    target_ulong dst;
+
+    /* Tag overflow occurs if either input has bits 0 or 1 set.  */
+    if ((src1 | src2) & 3) {
+        goto tag_overflow;
+    }
+
+    dst = src1 + src2;
+
+    /* Tag overflow occurs if the addition overflows.  */
+    if (~(src1 ^ src2) & (src1 ^ dst) & (1u << 31)) {
+        goto tag_overflow;
+    }
+
+    /* Only modify the CC after any exceptions have been generated.  */
+    env->cc_op = CC_OP_TADDTV;
+    env->cc_src = src1;
+    env->cc_src2 = src2;
+    env->cc_dst = dst;
+    return dst;
+
+ tag_overflow:
+    cpu_restore_state2(env, GETPC());
+    helper_raise_exception(env, TT_TOVF);
+}
+
+target_ulong helper_tsubcctv(CPUSPARCState *env, target_ulong src1,
+                             target_ulong src2)
+{
+    target_ulong dst;
+
+    /* Tag overflow occurs if either input has bits 0 or 1 set.  */
+    if ((src1 | src2) & 3) {
+        goto tag_overflow;
+    }
+
+    dst = src1 - src2;
+
+    /* Tag overflow occurs if the subtraction overflows.  */
+    if ((src1 ^ src2) & (src1 ^ dst) & (1u << 31)) {
+        goto tag_overflow;
+    }
+
+    /* Only modify the CC after any exceptions have been generated.  */
+    env->cc_op = CC_OP_TSUBTV;
+    env->cc_src = src1;
+    env->cc_src2 = src2;
+    env->cc_dst = dst;
+    return dst;
+
+ tag_overflow:
+    cpu_restore_state2(env, GETPC());
+    helper_raise_exception(env, TT_TOVF);
+}
diff --git a/target-sparc/helper.h b/target-sparc/helper.h
index 827df67..e1ae3c7 100644
--- a/target-sparc/helper.h
+++ b/target-sparc/helper.h
@@ -38,6 +38,8 @@ DEF_HELPER_3(udiv, tl, env, tl, tl)
 DEF_HELPER_3(udiv_cc, tl, env, tl, tl)
 DEF_HELPER_3(sdiv, tl, env, tl, tl)
 DEF_HELPER_3(sdiv_cc, tl, env, tl, tl)
+DEF_HELPER_3(taddcctv, tl, env, tl, tl)
+DEF_HELPER_3(tsubcctv, tl, env, tl, tl)
 #ifdef TARGET_SPARC64
 DEF_HELPER_3(sdivx, s64, env, s64, s64)
 DEF_HELPER_3(udivx, i64, env, i64, i64)
diff --git a/target-sparc/translate.c b/target-sparc/translate.c
index 111c025..98efb84 100644
--- a/target-sparc/translate.c
+++ b/target-sparc/translate.c
@@ -336,43 +336,6 @@ static inline void gen_mov_reg_C(TCGv reg, TCGv_i32 src)
     tcg_gen_andi_tl(reg, reg, 0x1);
 }
 
-static inline void gen_add_tv(TCGv dst, TCGv src1, TCGv src2)
-{
-    TCGv r_temp;
-    TCGv_i32 r_const;
-    int l1;
-
-    l1 = gen_new_label();
-
-    r_temp = tcg_temp_new();
-    tcg_gen_xor_tl(r_temp, src1, src2);
-    tcg_gen_not_tl(r_temp, r_temp);
-    tcg_gen_xor_tl(cpu_tmp0, src1, dst);
-    tcg_gen_and_tl(r_temp, r_temp, cpu_tmp0);
-    tcg_gen_andi_tl(r_temp, r_temp, (1ULL << 31));
-    tcg_gen_brcondi_tl(TCG_COND_EQ, r_temp, 0, l1);
-    r_const = tcg_const_i32(TT_TOVF);
-    gen_helper_raise_exception(cpu_env, r_const);
-    tcg_temp_free_i32(r_const);
-    gen_set_label(l1);
-    tcg_temp_free(r_temp);
-}
-
-static inline void gen_tag_tv(TCGv src1, TCGv src2)
-{
-    int l1;
-    TCGv_i32 r_const;
-
-    l1 = gen_new_label();
-    tcg_gen_or_tl(cpu_tmp0, src1, src2);
-    tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, 0x3);
-    tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_tmp0, 0, l1);
-    r_const = tcg_const_i32(TT_TOVF);
-    gen_helper_raise_exception(cpu_env, r_const);
-    tcg_temp_free_i32(r_const);
-    gen_set_label(l1);
-}
-
 static inline void gen_op_addi_cc(TCGv dst, TCGv src1, target_long src2)
 {
     tcg_gen_mov_tl(cpu_cc_src, src1);
@@ -517,45 +480,6 @@ static void gen_op_addx_int(DisasContext *dc, TCGv dst, TCGv src1,
     }
 }
 
-static inline void gen_op_tadd_cc(TCGv dst, TCGv src1, TCGv src2)
-{
-    tcg_gen_mov_tl(cpu_cc_src, src1);
-    tcg_gen_mov_tl(cpu_cc_src2, src2);
-    tcg_gen_add_tl(cpu_cc_dst, cpu_cc_src, cpu_cc_src2);
-    tcg_gen_mov_tl(dst, cpu_cc_dst);
-}
-
-static inline void gen_op_tadd_ccTV(TCGv dst, TCGv src1, TCGv src2)
-{
-    tcg_gen_mov_tl(cpu_cc_src, src1);
-    tcg_gen_mov_tl(cpu_cc_src2, src2);
-    gen_tag_tv(cpu_cc_src, cpu_cc_src2);
-    tcg_gen_add_tl(cpu_cc_dst, cpu_cc_src, cpu_cc_src2);
-    gen_add_tv(cpu_cc_dst, cpu_cc_src, cpu_cc_src2);
-    tcg_gen_mov_tl(dst, cpu_cc_dst);
-}
-
-static inline void gen_sub_tv(TCGv dst, TCGv src1, TCGv src2)
-{
-    TCGv r_temp;
-    TCGv_i32 r_const;
-    int l1;
-
-    l1 = gen_new_label();
-
-    r_temp = tcg_temp_new();
-    tcg_gen_xor_tl(r_temp, src1, src2);
-    tcg_gen_xor_tl(cpu_tmp0, src1, dst);
-    tcg_gen_and_tl(r_temp, r_temp, cpu_tmp0);
-    tcg_gen_andi_tl(r_temp, r_temp, (1ULL << 31));
-    tcg_gen_brcondi_tl(TCG_COND_EQ, r_temp, 0, l1);
-    r_const = tcg_const_i32(TT_TOVF);
-    gen_helper_raise_exception(cpu_env, r_const);
-    tcg_temp_free_i32(r_const);
-    gen_set_label(l1);
-    tcg_temp_free(r_temp);
-}
-
 static inline void gen_op_subi_cc(TCGv dst, TCGv src1, target_long src2, DisasContext *dc)
 {
     tcg_gen_mov_tl(cpu_cc_src, src1);
@@ -656,24 +580,6 @@ static void gen_op_subx_int(DisasContext *dc, TCGv dst, TCGv src1,
     }
 }
 
-static inline void gen_op_tsub_cc(TCGv dst, TCGv src1, TCGv src2)
-{
-    tcg_gen_mov_tl(cpu_cc_src, src1);
-    tcg_gen_mov_tl(cpu_cc_src2, src2);
-    tcg_gen_sub_tl(cpu_cc_dst, cpu_cc_src, cpu_cc_src2);
-    tcg_gen_mov_tl(dst, cpu_cc_dst);
-}
-
-static inline void gen_op_tsub_ccTV(TCGv dst, TCGv src1, TCGv src2)
-{
-    tcg_gen_mov_tl(cpu_cc_src, src1);
-    tcg_gen_mov_tl(cpu_cc_src2, src2);
-    gen_tag_tv(cpu_cc_src, cpu_cc_src2);
-    tcg_gen_sub_tl(cpu_cc_dst, cpu_cc_src, cpu_cc_src2);
-    gen_sub_tv(cpu_cc_dst, cpu_cc_src, cpu_cc_src2);
-    tcg_gen_mov_tl(dst, cpu_cc_dst);
-}
-
 static inline void gen_op_mulscc(TCGv dst, TCGv src1, TCGv src2)
 {
     TCGv r_temp;
@@ -3602,29 +3508,27 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                     cpu_src2 = get_src2(insn, cpu_src2);
                     switch (xop) {
                     case 0x20: /* taddcc */
-                        gen_op_tadd_cc(cpu_dst, cpu_src1, cpu_src2);
+                        gen_op_add_cc(cpu_dst, cpu_src1, cpu_src2);
                         gen_movl_TN_reg(rd, cpu_dst);
                         tcg_gen_movi_i32(cpu_cc_op, CC_OP_TADD);
                         dc->cc_op = CC_OP_TADD;
                         break;
                     case 0x21: /* tsubcc */
-                        gen_op_tsub_cc(cpu_dst, cpu_src1, cpu_src2);
+                        gen_op_sub_cc(cpu_dst, cpu_src1, cpu_src2);
                         gen_movl_TN_reg(rd, cpu_dst);
                         tcg_gen_movi_i32(cpu_cc_op, CC_OP_TSUB);
                         dc->cc_op = CC_OP_TSUB;
                         break;
                     case 0x22: /* taddcctv */
-                        save_state(dc);
-                        gen_op_tadd_ccTV(cpu_dst, cpu_src1, cpu_src2);
+                        gen_helper_taddcctv(cpu_dst, cpu_env,
+                                            cpu_src1, cpu_src2);
                         gen_movl_TN_reg(rd, cpu_dst);
-                        tcg_gen_movi_i32(cpu_cc_op, CC_OP_TADDTV);
                         dc->cc_op = CC_OP_TADDTV;
                         break;
                     case 0x23: /* tsubcctv */
-                        save_state(dc);
-                        gen_op_tsub_ccTV(cpu_dst, cpu_src1, cpu_src2);
+                        gen_helper_tsubcctv(cpu_dst, cpu_env,
+                                            cpu_src1, cpu_src2);
                         gen_movl_TN_reg(rd, cpu_dst);
-                        tcg_gen_movi_i32(cpu_cc_op, CC_OP_TSUBTV);
                         dc->cc_op = CC_OP_TSUBTV;
                         break;
                     case 0x24: /* mulscc */
commit bd49ed41ebe518c79bd52e46ce5b9cf278f8a2af
Author: Richard Henderson <rth at twiddle.net>
Date:   Fri Oct 5 16:55:04 2012 -0700

    target-sparc: Tidy Tcc
    
    Share more code between unconditional and conditional paths.
    
    Move the computation of the trap number into the conditional BB;
    avoid using temporaries that have gone out of scope (cpu_tmp32)
    or rely on local temps (cpu_dst).
    
    Fully fold the exception number when the trap number is %g0+imm.
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/target-sparc/translate.c b/target-sparc/translate.c
index 1628cf3..111c025 100644
--- a/target-sparc/translate.c
+++ b/target-sparc/translate.c
@@ -2594,41 +2594,23 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
         {
             unsigned int xop = GET_FIELD(insn, 7, 12);
             if (xop == 0x3a) {  /* generate trap */
-                int cond;
+                int cond = GET_FIELD(insn, 3, 6);
+                TCGv_i32 trap;
+                int l1 = -1, mask;
 
-                cpu_src1 = get_src1(insn, cpu_src1);
-                if (IS_IMM) {
-                    rs2 = GET_FIELD(insn, 25, 31);
-                    tcg_gen_addi_tl(cpu_dst, cpu_src1, rs2);
-                } else {
-                    rs2 = GET_FIELD(insn, 27, 31);
-                    if (rs2 != 0) {
-                        gen_movl_reg_TN(rs2, cpu_src2);
-                        tcg_gen_add_tl(cpu_dst, cpu_src1, cpu_src2);
-                    } else
-                        tcg_gen_mov_tl(cpu_dst, cpu_src1);
+                if (cond == 0) {
+                    /* Trap never.  */
+                    break;
                 }
 
-                cond = GET_FIELD(insn, 3, 6);
-                if (cond == 0x8) { /* Trap Always */
-                    save_state(dc);
-                    if ((dc->def->features & CPU_FEATURE_HYPV) &&
-                        supervisor(dc))
-                        tcg_gen_andi_tl(cpu_dst, cpu_dst, UA2005_HTRAP_MASK);
-                    else
-                        tcg_gen_andi_tl(cpu_dst, cpu_dst, V8_TRAP_MASK);
-                    tcg_gen_addi_tl(cpu_dst, cpu_dst, TT_TRAP);
-                    tcg_gen_trunc_tl_i32(cpu_tmp32, cpu_dst);
-                    gen_helper_raise_exception(cpu_env, cpu_tmp32);
+                save_state(dc);
 
-                } else if (cond != 0) {
+                if (cond != 8) {
+                    /* Conditional trap.  */
                     DisasCompare cmp;
-                    int l1;
 #ifdef TARGET_SPARC64
                     /* V9 icc/xcc */
                     int cc = GET_FIELD_SP(insn, 11, 12);
-
-                    save_state(dc);
                     if (cc == 0) {
                         gen_compare(&cmp, 0, cond, dc);
                     } else if (cc == 2) {
@@ -2637,27 +2619,60 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                         goto illegal_insn;
                     }
 #else
-                    save_state(dc);
                     gen_compare(&cmp, 0, cond, dc);
 #endif
                     l1 = gen_new_label();
                     tcg_gen_brcond_tl(tcg_invert_cond(cmp.cond),
                                       cmp.c1, cmp.c2, l1);
                     free_compare(&cmp);
+                }
 
-                    if ((dc->def->features & CPU_FEATURE_HYPV) &&
-                        supervisor(dc))
-                        tcg_gen_andi_tl(cpu_dst, cpu_dst, UA2005_HTRAP_MASK);
-                    else
-                        tcg_gen_andi_tl(cpu_dst, cpu_dst, V8_TRAP_MASK);
-                    tcg_gen_addi_tl(cpu_dst, cpu_dst, TT_TRAP);
-                    tcg_gen_trunc_tl_i32(cpu_tmp32, cpu_dst);
-                    gen_helper_raise_exception(cpu_env, cpu_tmp32);
+                mask = ((dc->def->features & CPU_FEATURE_HYPV) && supervisor(dc)
+                        ? UA2005_HTRAP_MASK : V8_TRAP_MASK);
+
+                /* Don't use the normal temporaries, as they may well have
+                   gone out of scope with the branch above.  While we're
+                   doing that we might as well pre-truncate to 32-bit.  */
+                trap = tcg_temp_new_i32();
+
+                rs1 = GET_FIELD_SP(insn, 14, 18);
+                if (IS_IMM) {
+                    rs2 = GET_FIELD_SP(insn, 0, 6);
+                    if (rs1 == 0) {
+                        tcg_gen_movi_i32(trap, (rs2 & mask) + TT_TRAP);
+                        /* Signal that the trap value is fully constant.  */
+                        mask = 0;
+                    } else {
+                        TCGv t1 = tcg_temp_new();
+                        gen_movl_reg_TN(rs1, t1);
+                        tcg_gen_trunc_tl_i32(trap, t1);
+                        tcg_temp_free(t1);
+                        tcg_gen_addi_i32(trap, trap, rs2);
+                    }
+                } else {
+                    TCGv t1 = tcg_temp_new();
+                    TCGv t2 = tcg_temp_new();
+                    rs2 = GET_FIELD_SP(insn, 0, 4);
+                    gen_movl_reg_TN(rs1, t1);
+                    gen_movl_reg_TN(rs2, t2);
+                    tcg_gen_add_tl(t1, t1, t2);
+                    tcg_gen_trunc_tl_i32(trap, t1);
+                    tcg_temp_free(t1);
+                    tcg_temp_free(t2);
+                }
+                if (mask != 0) {
+                    tcg_gen_andi_i32(trap, trap, mask);
+                    tcg_gen_addi_i32(trap, trap, TT_TRAP);
+                }
+
+                gen_helper_raise_exception(cpu_env, trap);
+                tcg_temp_free_i32(trap);
 
+                if (cond != 8) {
                     gen_set_label(l1);
+                    gen_op_next_insn();
+                    tcg_gen_exit_tb(0);
                 }
-                gen_op_next_insn();
-                tcg_gen_exit_tb(0);
                 dc->is_br = 1;
                 goto jmp_insn;
             } else if (xop == 0x28) {
commit c28ae41ecd3bec70f1db8545e0800b9023891057
Author: Richard Henderson <rth at twiddle.net>
Date:   Fri Oct 5 16:55:03 2012 -0700

    target-sparc: Move sdivx and udivx out of line
    
    The branches around the exception are maintaining an otherwise
    unnecessary use of local temps for the cpu destination.
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/target-sparc/cpu.h b/target-sparc/cpu.h
index e16b7b3..214d01d 100644
--- a/target-sparc/cpu.h
+++ b/target-sparc/cpu.h
@@ -714,6 +714,7 @@ trap_state* cpu_tsptr(CPUSPARCState* env);
 void QEMU_NORETURN do_unaligned_access(CPUSPARCState *env, target_ulong addr,
                                        int is_write, int is_user,
                                        uintptr_t retaddr);
+void cpu_restore_state2(CPUSPARCState *env, uintptr_t retaddr);
 
 #define TB_FLAG_FPU_ENABLED (1 << 4)
 #define TB_FLAG_AM_ENABLED (1 << 5)
diff --git a/target-sparc/helper.c b/target-sparc/helper.c
index 65e1740..4555d2b 100644
--- a/target-sparc/helper.c
+++ b/target-sparc/helper.c
@@ -75,6 +75,7 @@ static target_ulong helper_udiv_common(CPUSPARCState *env, target_ulong a,
     x1 = (b & 0xffffffff);
 
     if (x1 == 0) {
+        cpu_restore_state2(env, GETPC());
         helper_raise_exception(env, TT_DIV_ZERO);
     }
 
@@ -113,6 +114,7 @@ static target_ulong helper_sdiv_common(CPUSPARCState *env, target_ulong a,
     x1 = (b & 0xffffffff);
 
     if (x1 == 0) {
+        cpu_restore_state2(env, GETPC());
         helper_raise_exception(env, TT_DIV_ZERO);
     }
 
@@ -139,3 +141,29 @@ target_ulong helper_sdiv_cc(CPUSPARCState *env, target_ulong a, target_ulong b)
 {
     return helper_sdiv_common(env, a, b, 1);
 }
+
+#ifdef TARGET_SPARC64
+int64_t helper_sdivx(CPUSPARCState *env, int64_t a, int64_t b)
+{
+    if (b == 0) {
+        /* Raise divide by zero trap.  */
+        cpu_restore_state2(env, GETPC());
+        helper_raise_exception(env, TT_DIV_ZERO);
+    } else if (b == -1) {
+        /* Avoid overflow trap with i386 divide insn.  */
+        return -a;
+    } else {
+        return a / b;
+    }
+}
+
+uint64_t helper_udivx(CPUSPARCState *env, uint64_t a, uint64_t b)
+{
+    if (b == 0) {
+        /* Raise divide by zero trap.  */
+        cpu_restore_state2(env, GETPC());
+        helper_raise_exception(env, TT_DIV_ZERO);
+    }
+    return a / b;
+}
+#endif
diff --git a/target-sparc/helper.h b/target-sparc/helper.h
index e3c7fdd..827df67 100644
--- a/target-sparc/helper.h
+++ b/target-sparc/helper.h
@@ -38,6 +38,10 @@ DEF_HELPER_3(udiv, tl, env, tl, tl)
 DEF_HELPER_3(udiv_cc, tl, env, tl, tl)
 DEF_HELPER_3(sdiv, tl, env, tl, tl)
 DEF_HELPER_3(sdiv_cc, tl, env, tl, tl)
+#ifdef TARGET_SPARC64
+DEF_HELPER_3(sdivx, s64, env, s64, s64)
+DEF_HELPER_3(udivx, i64, env, i64, i64)
+#endif
 DEF_HELPER_3(ldqf, void, env, tl, int)
 DEF_HELPER_3(stqf, void, env, tl, int)
 #if !defined(CONFIG_USER_ONLY) || defined(TARGET_SPARC64)
diff --git a/target-sparc/ldst_helper.c b/target-sparc/ldst_helper.c
index 9bec7a9..2ca9a5c 100644
--- a/target-sparc/ldst_helper.c
+++ b/target-sparc/ldst_helper.c
@@ -2390,9 +2390,8 @@ void cpu_unassigned_access(CPUSPARCState *env, target_phys_addr_t addr,
 #endif
 #endif
 
-#if !defined(CONFIG_USER_ONLY)
 /* XXX: make it generic ? */
-static void cpu_restore_state2(CPUSPARCState *env, uintptr_t retaddr)
+void cpu_restore_state2(CPUSPARCState *env, uintptr_t retaddr)
 {
     TranslationBlock *tb;
 
@@ -2407,6 +2406,7 @@ static void cpu_restore_state2(CPUSPARCState *env, uintptr_t retaddr)
     }
 }
 
+#if !defined(CONFIG_USER_ONLY)
 void do_unaligned_access(CPUSPARCState *env, target_ulong addr, int is_write,
                          int is_user, uintptr_t retaddr)
 {
diff --git a/target-sparc/translate.c b/target-sparc/translate.c
index c1a1dc4..1628cf3 100644
--- a/target-sparc/translate.c
+++ b/target-sparc/translate.c
@@ -768,44 +768,6 @@ static inline void gen_op_smul(TCGv dst, TCGv src1, TCGv src2)
     gen_op_multiply(dst, src1, src2, 1);
 }
 
-#ifdef TARGET_SPARC64
-static inline void gen_trap_ifdivzero_tl(TCGv divisor)
-{
-    TCGv_i32 r_const;
-    int l1;
-
-    l1 = gen_new_label();
-    tcg_gen_brcondi_tl(TCG_COND_NE, divisor, 0, l1);
-    r_const = tcg_const_i32(TT_DIV_ZERO);
-    gen_helper_raise_exception(cpu_env, r_const);
-    tcg_temp_free_i32(r_const);
-    gen_set_label(l1);
-}
-
-static inline void gen_op_sdivx(TCGv dst, TCGv src1, TCGv src2)
-{
-    int l1, l2;
-    TCGv r_temp1, r_temp2;
-
-    l1 = gen_new_label();
-    l2 = gen_new_label();
-    r_temp1 = tcg_temp_local_new();
-    r_temp2 = tcg_temp_local_new();
-    tcg_gen_mov_tl(r_temp1, src1);
-    tcg_gen_mov_tl(r_temp2, src2);
-    gen_trap_ifdivzero_tl(r_temp2);
-    tcg_gen_brcondi_tl(TCG_COND_NE, r_temp1, INT64_MIN, l1);
-    tcg_gen_brcondi_tl(TCG_COND_NE, r_temp2, -1, l1);
-    tcg_gen_movi_i64(dst, INT64_MIN);
-    tcg_gen_br(l2);
-    gen_set_label(l1);
-    tcg_gen_div_i64(dst, r_temp1, r_temp2);
-    gen_set_label(l2);
-    tcg_temp_free(r_temp1);
-    tcg_temp_free(r_temp2);
-}
-#endif
-
 // 1
 static inline void gen_op_eval_ba(TCGv dst)
 {
@@ -3591,17 +3553,7 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                         break;
 #ifdef TARGET_SPARC64
                     case 0xd: /* V9 udivx */
-                        {
-                            TCGv r_temp1, r_temp2;
-                            r_temp1 = tcg_temp_local_new();
-                            r_temp2 = tcg_temp_local_new();
-                            tcg_gen_mov_tl(r_temp1, cpu_src1);
-                            tcg_gen_mov_tl(r_temp2, cpu_src2);
-                            gen_trap_ifdivzero_tl(r_temp2);
-                            tcg_gen_divu_i64(cpu_dst, r_temp1, r_temp2);
-                            tcg_temp_free(r_temp1);
-                            tcg_temp_free(r_temp2);
-                        }
+                        gen_helper_udivx(cpu_dst, cpu_env, cpu_src1, cpu_src2);
                         break;
 #endif
                     case 0xe: /* udiv */
@@ -4102,7 +4054,7 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                             break;
                         }
                     case 0x2d: /* V9 sdivx */
-                        gen_op_sdivx(cpu_dst, cpu_src1, cpu_src2);
+                        gen_helper_sdivx(cpu_dst, cpu_env, cpu_src1, cpu_src2);
                         gen_movl_TN_reg(rd, cpu_dst);
                         break;
                     case 0x2e: /* V9 popc */
commit 61316742e2bc7b5b5257198f2248c42a9d238c84
Author: Richard Henderson <rth at twiddle.net>
Date:   Fri Oct 5 16:55:02 2012 -0700

    target-sparc: Use movcond in gen_generic_branch
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/target-sparc/translate.c b/target-sparc/translate.c
index 9aef8e5..c1a1dc4 100644
--- a/target-sparc/translate.c
+++ b/target-sparc/translate.c
@@ -1107,19 +1107,15 @@ static inline void gen_branch_a(DisasContext *dc, target_ulong pc1,
 
 static inline void gen_generic_branch(DisasContext *dc)
 {
-    int l1, l2;
-
-    l1 = gen_new_label();
-    l2 = gen_new_label();
+    TCGv npc0 = tcg_const_tl(dc->jump_pc[0]);
+    TCGv npc1 = tcg_const_tl(dc->jump_pc[1]);
+    TCGv zero = tcg_const_tl(0);
 
-    tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_cond, 0, l1);
+    tcg_gen_movcond_tl(TCG_COND_NE, cpu_npc, cpu_cond, zero, npc0, npc1);
 
-    tcg_gen_movi_tl(cpu_npc, dc->jump_pc[0]);
-    tcg_gen_br(l2);
-
-    gen_set_label(l1);
-    tcg_gen_movi_tl(cpu_npc, dc->jump_pc[1]);
-    gen_set_label(l2);
+    tcg_temp_free(npc0);
+    tcg_temp_free(npc1);
+    tcg_temp_free(zero);
 }
 
 /* call this function before using the condition register as it may
commit c33f80f52a4621e6ed2d021adb08c7de8bec09d8
Author: Richard Henderson <rth at twiddle.net>
Date:   Fri Oct 5 16:55:01 2012 -0700

    target-sparc: Use DisasCompare and movcond in MOVR
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/target-sparc/translate.c b/target-sparc/translate.c
index 6c9be29..9aef8e5 100644
--- a/target-sparc/translate.c
+++ b/target-sparc/translate.c
@@ -4118,27 +4118,24 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                     case 0x2f: /* V9 movr */
                         {
                             int cond = GET_FIELD_SP(insn, 10, 12);
-                            int l1;
-
-                            cpu_src1 = get_src1(insn, cpu_src1);
-
-                            l1 = gen_new_label();
+                            DisasCompare cmp;
 
-                            tcg_gen_brcondi_tl(gen_tcg_cond_reg[cond],
-                                              cpu_src1, 0, l1);
-                            if (IS_IMM) {       /* immediate */
-                                TCGv r_const;
+                            gen_compare_reg(&cmp, cond, cpu_src1);
 
+                            /* The get_src2 above loaded the normal 13-bit
+                               immediate field, not the 10-bit field we have
+                               in movr.  But it did handle the reg case.  */
+                            if (IS_IMM) {
                                 simm = GET_FIELD_SPs(insn, 0, 9);
-                                r_const = tcg_const_tl(simm);
-                                gen_movl_TN_reg(rd, r_const);
-                                tcg_temp_free(r_const);
-                            } else {
-                                rs2 = GET_FIELD_SP(insn, 0, 4);
-                                gen_movl_reg_TN(rs2, cpu_tmp0);
-                                gen_movl_TN_reg(rd, cpu_tmp0);
+                                tcg_gen_movi_tl(cpu_src2, simm);
                             }
-                            gen_set_label(l1);
+
+                            gen_movl_reg_TN(rd, cpu_dst);
+                            tcg_gen_movcond_tl(cmp.cond, cpu_dst,
+                                               cmp.c1, cmp.c2,
+                                               cpu_src2, cpu_dst);
+                            free_compare(&cmp);
+                            gen_movl_TN_reg(rd, cpu_dst);
                             break;
                         }
 #endif
commit f52879b4410aa511e7c2baccc6a2fe37096b471e
Author: Richard Henderson <rth at twiddle.net>
Date:   Fri Oct 5 16:55:00 2012 -0700

    target-sparc: Use DisasCompare and movcond in MOVCC
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/target-sparc/translate.c b/target-sparc/translate.c
index a7cd677..6c9be29 100644
--- a/target-sparc/translate.c
+++ b/target-sparc/translate.c
@@ -4075,38 +4075,34 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                         {
                             int cc = GET_FIELD_SP(insn, 11, 12);
                             int cond = GET_FIELD_SP(insn, 14, 17);
-                            TCGv r_cond;
-                            int l1;
+                            DisasCompare cmp;
 
-                            r_cond = tcg_temp_new();
                             if (insn & (1 << 18)) {
-                                if (cc == 0)
-                                    gen_cond(r_cond, 0, cond, dc);
-                                else if (cc == 2)
-                                    gen_cond(r_cond, 1, cond, dc);
-                                else
+                                if (cc == 0) {
+                                    gen_compare(&cmp, 0, cond, dc);
+                                } else if (cc == 2) {
+                                    gen_compare(&cmp, 1, cond, dc);
+                                } else {
                                     goto illegal_insn;
+                                }
                             } else {
-                                gen_fcond(r_cond, cc, cond);
+                                gen_fcompare(&cmp, cc, cond);
                             }
 
-                            l1 = gen_new_label();
-
-                            tcg_gen_brcondi_tl(TCG_COND_EQ, r_cond, 0, l1);
-                            if (IS_IMM) {       /* immediate */
-                                TCGv r_const;
-
+                            /* The get_src2 above loaded the normal 13-bit
+                               immediate field, not the 11-bit field we have
+                               in movcc.  But it did handle the reg case.  */
+                            if (IS_IMM) {
                                 simm = GET_FIELD_SPs(insn, 0, 10);
-                                r_const = tcg_const_tl(simm);
-                                gen_movl_TN_reg(rd, r_const);
-                                tcg_temp_free(r_const);
-                            } else {
-                                rs2 = GET_FIELD_SP(insn, 0, 4);
-                                gen_movl_reg_TN(rs2, cpu_tmp0);
-                                gen_movl_TN_reg(rd, cpu_tmp0);
+                                tcg_gen_movi_tl(cpu_src2, simm);
                             }
-                            gen_set_label(l1);
-                            tcg_temp_free(r_cond);
+
+                            gen_movl_reg_TN(rd, cpu_dst);
+                            tcg_gen_movcond_tl(cmp.cond, cpu_dst,
+                                               cmp.c1, cmp.c2,
+                                               cpu_src2, cpu_dst);
+                            free_compare(&cmp);
+                            gen_movl_TN_reg(rd, cpu_dst);
                             break;
                         }
                     case 0x2d: /* V9 sdivx */
commit 7e480893cc3eb9f307199517f0e1d5c8e72a030f
Author: Richard Henderson <rth at twiddle.net>
Date:   Fri Oct 5 16:54:59 2012 -0700

    target-sparc: Use DisasCompare and movcond in FMOVR, FMOVCC
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/target-sparc/translate.c b/target-sparc/translate.c
index fa9a057..a7cd677 100644
--- a/target-sparc/translate.c
+++ b/target-sparc/translate.c
@@ -2281,6 +2281,57 @@ static inline TCGv get_src2(unsigned int insn, TCGv def)
 }
 
 #ifdef TARGET_SPARC64
+static void gen_fmovs(DisasContext *dc, DisasCompare *cmp, int rd, int rs)
+{
+    TCGv_i32 c32, zero, dst, s1, s2;
+
+    /* We have two choices here: extend the 32 bit data and use movcond_i64,
+       or fold the comparison down to 32 bits and use movcond_i32.  Choose
+       the later.  */
+    c32 = tcg_temp_new_i32();
+    if (cmp->is_bool) {
+        tcg_gen_trunc_i64_i32(c32, cmp->c1);
+    } else {
+        TCGv_i64 c64 = tcg_temp_new_i64();
+        tcg_gen_setcond_i64(cmp->cond, c64, cmp->c1, cmp->c2);
+        tcg_gen_trunc_i64_i32(c32, c64);
+        tcg_temp_free_i64(c64);
+    }
+
+    s1 = gen_load_fpr_F(dc, rs);
+    s2 = gen_load_fpr_F(dc, rd);
+    dst = gen_dest_fpr_F();
+    zero = tcg_const_i32(0);
+
+    tcg_gen_movcond_i32(TCG_COND_NE, dst, c32, zero, s1, s2);
+
+    tcg_temp_free_i32(c32);
+    tcg_temp_free_i32(zero);
+    gen_store_fpr_F(dc, rd, dst);
+}
+
+static void gen_fmovd(DisasContext *dc, DisasCompare *cmp, int rd, int rs)
+{
+    TCGv_i64 dst = gen_dest_fpr_D();
+    tcg_gen_movcond_i64(cmp->cond, dst, cmp->c1, cmp->c2,
+                        gen_load_fpr_D(dc, rs),
+                        gen_load_fpr_D(dc, rd));
+    gen_store_fpr_D(dc, rd, dst);
+}
+
+static void gen_fmovq(DisasContext *dc, DisasCompare *cmp, int rd, int rs)
+{
+    int qd = QFPREG(rd);
+    int qs = QFPREG(rs);
+
+    tcg_gen_movcond_i64(cmp->cond, cpu_fpr[qd / 2], cmp->c1, cmp->c2,
+                        cpu_fpr[qs / 2], cpu_fpr[qd / 2]);
+    tcg_gen_movcond_i64(cmp->cond, cpu_fpr[qd / 2 + 1], cmp->c1, cmp->c2,
+                        cpu_fpr[qs / 2 + 1], cpu_fpr[qd / 2 + 1]);
+
+    gen_update_fprs_dirty(qd);
+}
+
 static inline void gen_load_trap_state_at_tl(TCGv_ptr r_tsptr, TCGv_ptr cpu_env)
 {
     TCGv_i32 r_tl = tcg_temp_new_i32();
@@ -3163,168 +3214,86 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
 #endif
                 switch (xop) {
 #ifdef TARGET_SPARC64
-#define FMOVSCC(fcc)                                                    \
-                    {                                                   \
-                        TCGv r_cond;                                    \
-                        int l1;                                         \
-                                                                        \
-                        l1 = gen_new_label();                           \
-                        r_cond = tcg_temp_new();                        \
-                        cond = GET_FIELD_SP(insn, 14, 17);              \
-                        gen_fcond(r_cond, fcc, cond);                   \
-                        tcg_gen_brcondi_tl(TCG_COND_EQ, r_cond,         \
-                                           0, l1);                      \
-                        cpu_src1_32 = gen_load_fpr_F(dc, rs2);          \
-                        gen_store_fpr_F(dc, rd, cpu_src1_32);           \
-                        gen_set_label(l1);                              \
-                        tcg_temp_free(r_cond);                          \
-                    }
-#define FMOVDCC(fcc)                                                    \
-                    {                                                   \
-                        TCGv r_cond;                                    \
-                        int l1;                                         \
-                                                                        \
-                        l1 = gen_new_label();                           \
-                        r_cond = tcg_temp_new();                        \
-                        cond = GET_FIELD_SP(insn, 14, 17);              \
-                        gen_fcond(r_cond, fcc, cond);                   \
-                        tcg_gen_brcondi_tl(TCG_COND_EQ, r_cond,         \
-                                           0, l1);                      \
-                        cpu_src1_64 = gen_load_fpr_D(dc, rs2);          \
-                        gen_store_fpr_D(dc, rd, cpu_src1_64);           \
-                        gen_set_label(l1);                              \
-                        tcg_temp_free(r_cond);                          \
-                    }
-#define FMOVQCC(fcc)                                                    \
-                    {                                                   \
-                        TCGv r_cond;                                    \
-                        int l1;                                         \
-                                                                        \
-                        l1 = gen_new_label();                           \
-                        r_cond = tcg_temp_new();                        \
+#define FMOVCC(fcc, sz)                                                 \
+                    do {                                                \
+                        DisasCompare cmp;                               \
                         cond = GET_FIELD_SP(insn, 14, 17);              \
-                        gen_fcond(r_cond, fcc, cond);                   \
-                        tcg_gen_brcondi_tl(TCG_COND_EQ, r_cond,         \
-                                           0, l1);                      \
-                        gen_move_Q(rd, rs2);                            \
-                        gen_set_label(l1);                              \
-                        tcg_temp_free(r_cond);                          \
-                    }
+                        gen_fcompare(&cmp, fcc, cond);                  \
+                        gen_fmov##sz(dc, &cmp, rd, rs2);                \
+                        free_compare(&cmp);                             \
+                    } while (0)
+
                     case 0x001: /* V9 fmovscc %fcc0 */
-                        FMOVSCC(0);
+                        FMOVCC(0, s);
                         break;
                     case 0x002: /* V9 fmovdcc %fcc0 */
-                        FMOVDCC(0);
+                        FMOVCC(0, d);
                         break;
                     case 0x003: /* V9 fmovqcc %fcc0 */
                         CHECK_FPU_FEATURE(dc, FLOAT128);
-                        FMOVQCC(0);
+                        FMOVCC(0, q);
                         break;
                     case 0x041: /* V9 fmovscc %fcc1 */
-                        FMOVSCC(1);
+                        FMOVCC(1, s);
                         break;
                     case 0x042: /* V9 fmovdcc %fcc1 */
-                        FMOVDCC(1);
+                        FMOVCC(1, d);
                         break;
                     case 0x043: /* V9 fmovqcc %fcc1 */
                         CHECK_FPU_FEATURE(dc, FLOAT128);
-                        FMOVQCC(1);
+                        FMOVCC(1, q);
                         break;
                     case 0x081: /* V9 fmovscc %fcc2 */
-                        FMOVSCC(2);
+                        FMOVCC(2, s);
                         break;
                     case 0x082: /* V9 fmovdcc %fcc2 */
-                        FMOVDCC(2);
+                        FMOVCC(2, d);
                         break;
                     case 0x083: /* V9 fmovqcc %fcc2 */
                         CHECK_FPU_FEATURE(dc, FLOAT128);
-                        FMOVQCC(2);
+                        FMOVCC(2, q);
                         break;
                     case 0x0c1: /* V9 fmovscc %fcc3 */
-                        FMOVSCC(3);
+                        FMOVCC(3, s);
                         break;
                     case 0x0c2: /* V9 fmovdcc %fcc3 */
-                        FMOVDCC(3);
+                        FMOVCC(3, d);
                         break;
                     case 0x0c3: /* V9 fmovqcc %fcc3 */
                         CHECK_FPU_FEATURE(dc, FLOAT128);
-                        FMOVQCC(3);
+                        FMOVCC(3, q);
                         break;
-#undef FMOVSCC
-#undef FMOVDCC
-#undef FMOVQCC
-#define FMOVSCC(icc)                                                    \
-                    {                                                   \
-                        TCGv r_cond;                                    \
-                        int l1;                                         \
-                                                                        \
-                        l1 = gen_new_label();                           \
-                        r_cond = tcg_temp_new();                        \
-                        cond = GET_FIELD_SP(insn, 14, 17);              \
-                        gen_cond(r_cond, icc, cond, dc);                \
-                        tcg_gen_brcondi_tl(TCG_COND_EQ, r_cond,         \
-                                           0, l1);                      \
-                        cpu_src1_32 = gen_load_fpr_F(dc, rs2);          \
-                        gen_store_fpr_F(dc, rd, cpu_src1_32);           \
-                        gen_set_label(l1);                              \
-                        tcg_temp_free(r_cond);                          \
-                    }
-#define FMOVDCC(icc)                                                    \
-                    {                                                   \
-                        TCGv r_cond;                                    \
-                        int l1;                                         \
-                                                                        \
-                        l1 = gen_new_label();                           \
-                        r_cond = tcg_temp_new();                        \
+#undef FMOVCC
+#define FMOVCC(xcc, sz)                                                 \
+                    do {                                                \
+                        DisasCompare cmp;                               \
                         cond = GET_FIELD_SP(insn, 14, 17);              \
-                        gen_cond(r_cond, icc, cond, dc);                \
-                        tcg_gen_brcondi_tl(TCG_COND_EQ, r_cond,         \
-                                           0, l1);                      \
-                        cpu_src1_64 = gen_load_fpr_D(dc, rs2);          \
-                        gen_store_fpr_D(dc, rd, cpu_src1_64);           \
-                        gen_update_fprs_dirty(DFPREG(rd));              \
-                        gen_set_label(l1);                              \
-                        tcg_temp_free(r_cond);                          \
-                    }
-#define FMOVQCC(icc)                                                    \
-                    {                                                   \
-                        TCGv r_cond;                                    \
-                        int l1;                                         \
-                                                                        \
-                        l1 = gen_new_label();                           \
-                        r_cond = tcg_temp_new();                        \
-                        cond = GET_FIELD_SP(insn, 14, 17);              \
-                        gen_cond(r_cond, icc, cond, dc);                \
-                        tcg_gen_brcondi_tl(TCG_COND_EQ, r_cond,         \
-                                           0, l1);                      \
-                        gen_move_Q(rd, rs2);                            \
-                        gen_set_label(l1);                              \
-                        tcg_temp_free(r_cond);                          \
-                    }
+                        gen_compare(&cmp, xcc, cond, dc);               \
+                        gen_fmov##sz(dc, &cmp, rd, rs2);                \
+                        free_compare(&cmp);                             \
+                    } while (0)
 
                     case 0x101: /* V9 fmovscc %icc */
-                        FMOVSCC(0);
+                        FMOVCC(0, s);
                         break;
                     case 0x102: /* V9 fmovdcc %icc */
-                        FMOVDCC(0);
+                        FMOVCC(0, d);
                         break;
                     case 0x103: /* V9 fmovqcc %icc */
                         CHECK_FPU_FEATURE(dc, FLOAT128);
-                        FMOVQCC(0);
+                        FMOVCC(0, q);
                         break;
                     case 0x181: /* V9 fmovscc %xcc */
-                        FMOVSCC(1);
+                        FMOVCC(1, s);
                         break;
                     case 0x182: /* V9 fmovdcc %xcc */
-                        FMOVDCC(1);
+                        FMOVCC(1, d);
                         break;
                     case 0x183: /* V9 fmovqcc %xcc */
                         CHECK_FPU_FEATURE(dc, FLOAT128);
-                        FMOVQCC(1);
+                        FMOVCC(1, q);
                         break;
-#undef FMOVSCC
-#undef FMOVDCC
-#undef FMOVQCC
+#undef FMOVCC
 #endif
                     case 0x51: /* fcmps, V9 %fcc */
                         cpu_src1_32 = gen_load_fpr_F(dc, rs1);
commit 3a49e7598b78bca019e35c42590914faab2a817a
Author: Richard Henderson <rth at twiddle.net>
Date:   Fri Oct 5 16:54:58 2012 -0700

    target-sparc: Use DisasCompare in Tcc
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/target-sparc/translate.c b/target-sparc/translate.c
index d61a9a0..fa9a057 100644
--- a/target-sparc/translate.c
+++ b/target-sparc/translate.c
@@ -2613,25 +2613,28 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                     gen_helper_raise_exception(cpu_env, cpu_tmp32);
 
                 } else if (cond != 0) {
-                    TCGv r_cond = tcg_temp_new();
+                    DisasCompare cmp;
                     int l1;
 #ifdef TARGET_SPARC64
                     /* V9 icc/xcc */
                     int cc = GET_FIELD_SP(insn, 11, 12);
 
                     save_state(dc);
-                    if (cc == 0)
-                        gen_cond(r_cond, 0, cond, dc);
-                    else if (cc == 2)
-                        gen_cond(r_cond, 1, cond, dc);
-                    else
+                    if (cc == 0) {
+                        gen_compare(&cmp, 0, cond, dc);
+                    } else if (cc == 2) {
+                        gen_compare(&cmp, 1, cond, dc);
+                    } else {
                         goto illegal_insn;
+                    }
 #else
                     save_state(dc);
-                    gen_cond(r_cond, 0, cond, dc);
+                    gen_compare(&cmp, 0, cond, dc);
 #endif
                     l1 = gen_new_label();
-                    tcg_gen_brcondi_tl(TCG_COND_EQ, r_cond, 0, l1);
+                    tcg_gen_brcond_tl(tcg_invert_cond(cmp.cond),
+                                      cmp.c1, cmp.c2, l1);
+                    free_compare(&cmp);
 
                     if ((dc->def->features & CPU_FEATURE_HYPV) &&
                         supervisor(dc))
@@ -2643,7 +2646,6 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                     gen_helper_raise_exception(cpu_env, cpu_tmp32);
 
                     gen_set_label(l1);
-                    tcg_temp_free(r_cond);
                 }
                 gen_op_next_insn();
                 tcg_gen_exit_tb(0);
commit 416fcaea1ef7e9a71ccb0a968de594618ed7675e
Author: Richard Henderson <rth at twiddle.net>
Date:   Fri Oct 5 16:54:57 2012 -0700

    target-sparc: Introduce DisasCompare and functions to generate it
    
    For the moment gen_cond et al retain their existing interface,
    using setcond to turn a (potential) comparison back into a boolean.
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/target-sparc/translate.c b/target-sparc/translate.c
index d759da2..d61a9a0 100644
--- a/target-sparc/translate.c
+++ b/target-sparc/translate.c
@@ -86,6 +86,13 @@ typedef struct DisasContext {
     int n_t32;
 } DisasContext;
 
+typedef struct {
+    TCGCond cond;
+    bool is_bool;
+    bool g1, g2;
+    TCGv c1, c2;
+} DisasCompare;
+
 // This function uses non-native bit order
 #define GET_FIELD(X, FROM, TO)                                  \
     ((X) >> (31 - (TO)) & ((1 << ((TO) - (FROM) + 1)) - 1))
@@ -1166,10 +1173,28 @@ static inline void gen_op_next_insn(void)
     tcg_gen_addi_tl(cpu_npc, cpu_npc, 4);
 }
 
-static inline void gen_cond(TCGv r_dst, unsigned int cc, unsigned int cond,
-                            DisasContext *dc)
+static void free_compare(DisasCompare *cmp)
+{
+    if (!cmp->g1) {
+        tcg_temp_free(cmp->c1);
+    }
+    if (!cmp->g2) {
+        tcg_temp_free(cmp->c2);
+    }
+}
+
+static void gen_compare(DisasCompare *cmp, unsigned int cc, unsigned int cond,
+                        DisasContext *dc)
 {
     TCGv_i32 r_src;
+    TCGv r_dst;
+
+    /* For now we still generate a straight boolean result.  */
+    cmp->cond = TCG_COND_NE;
+    cmp->is_bool = true;
+    cmp->g1 = cmp->g2 = false;
+    cmp->c1 = r_dst = tcg_temp_new();
+    cmp->c2 = tcg_const_tl(0);
 
 #ifdef TARGET_SPARC64
     if (cc)
@@ -1239,9 +1264,17 @@ static inline void gen_cond(TCGv r_dst, unsigned int cc, unsigned int cond,
     }
 }
 
-static inline void gen_fcond(TCGv r_dst, unsigned int cc, unsigned int cond)
+static void gen_fcompare(DisasCompare *cmp, unsigned int cc, unsigned int cond)
 {
     unsigned int offset;
+    TCGv r_dst;
+
+    /* For now we still generate a straight boolean result.  */
+    cmp->cond = TCG_COND_NE;
+    cmp->is_bool = true;
+    cmp->g1 = cmp->g2 = false;
+    cmp->c1 = r_dst = tcg_temp_new();
+    cmp->c2 = tcg_const_tl(0);
 
     switch (cc) {
     default:
@@ -1311,6 +1344,37 @@ static inline void gen_fcond(TCGv r_dst, unsigned int cc, unsigned int cond)
     }
 }
 
+static void gen_cond(TCGv r_dst, unsigned int cc, unsigned int cond,
+                     DisasContext *dc)
+{
+    DisasCompare cmp;
+    gen_compare(&cmp, cc, cond, dc);
+
+    /* The interface is to return a boolean in r_dst.  */
+    if (cmp.is_bool) {
+        tcg_gen_mov_tl(r_dst, cmp.c1);
+    } else {
+        tcg_gen_setcond_tl(cmp.cond, r_dst, cmp.c1, cmp.c2);
+    }
+
+    free_compare(&cmp);
+}
+
+static void gen_fcond(TCGv r_dst, unsigned int cc, unsigned int cond)
+{
+    DisasCompare cmp;
+    gen_fcompare(&cmp, cc, cond);
+
+    /* The interface is to return a boolean in r_dst.  */
+    if (cmp.is_bool) {
+        tcg_gen_mov_tl(r_dst, cmp.c1);
+    } else {
+        tcg_gen_setcond_tl(cmp.cond, r_dst, cmp.c1, cmp.c2);
+    }
+
+    free_compare(&cmp);
+}
+
 #ifdef TARGET_SPARC64
 // Inverted logic
 static const int gen_tcg_cond_reg[8] = {
@@ -1324,15 +1388,25 @@ static const int gen_tcg_cond_reg[8] = {
     TCG_COND_LT,
 };
 
+static void gen_compare_reg(DisasCompare *cmp, int cond, TCGv r_src)
+{
+    cmp->cond = tcg_invert_cond(gen_tcg_cond_reg[cond]);
+    cmp->is_bool = false;
+    cmp->g1 = true;
+    cmp->g2 = false;
+    cmp->c1 = r_src;
+    cmp->c2 = tcg_const_tl(0);
+}
+
 static inline void gen_cond_reg(TCGv r_dst, int cond, TCGv r_src)
 {
-    int l1;
+    DisasCompare cmp;
+    gen_compare_reg(&cmp, cond, r_src);
 
-    l1 = gen_new_label();
-    tcg_gen_movi_tl(r_dst, 0);
-    tcg_gen_brcondi_tl(gen_tcg_cond_reg[cond], r_src, 0, l1);
-    tcg_gen_movi_tl(r_dst, 1);
-    gen_set_label(l1);
+    /* The interface is to return a boolean in r_dst.  */
+    tcg_gen_setcond_tl(cmp.cond, r_dst, cmp.c1, cmp.c2);
+
+    free_compare(&cmp);
 }
 #endif
 
commit 2e655fe768d9873d07be3be264c042c35720694b
Author: Richard Henderson <rth at twiddle.net>
Date:   Fri Oct 5 16:54:56 2012 -0700

    target-sparc: Tidy gen_generic_branch interface
    
    The arguments passed are always the same.
    Pass down just DisasContext instead.
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/target-sparc/translate.c b/target-sparc/translate.c
index 4cc7887..d759da2 100644
--- a/target-sparc/translate.c
+++ b/target-sparc/translate.c
@@ -1098,21 +1098,20 @@ static inline void gen_branch_a(DisasContext *dc, target_ulong pc1,
     gen_goto_tb(dc, 1, pc2 + 4, pc2 + 8);
 }
 
-static inline void gen_generic_branch(target_ulong npc1, target_ulong npc2,
-                                      TCGv r_cond)
+static inline void gen_generic_branch(DisasContext *dc)
 {
     int l1, l2;
 
     l1 = gen_new_label();
     l2 = gen_new_label();
 
-    tcg_gen_brcondi_tl(TCG_COND_EQ, r_cond, 0, l1);
+    tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_cond, 0, l1);
 
-    tcg_gen_movi_tl(cpu_npc, npc1);
+    tcg_gen_movi_tl(cpu_npc, dc->jump_pc[0]);
     tcg_gen_br(l2);
 
     gen_set_label(l1);
-    tcg_gen_movi_tl(cpu_npc, npc2);
+    tcg_gen_movi_tl(cpu_npc, dc->jump_pc[1]);
     gen_set_label(l2);
 }
 
@@ -1121,7 +1120,7 @@ static inline void gen_generic_branch(target_ulong npc1, target_ulong npc2,
 static inline void flush_cond(DisasContext *dc)
 {
     if (dc->npc == JUMP_PC) {
-        gen_generic_branch(dc->jump_pc[0], dc->jump_pc[1], cpu_cond);
+        gen_generic_branch(dc);
         dc->npc = DYNAMIC_PC;
     }
 }
@@ -1129,7 +1128,7 @@ static inline void flush_cond(DisasContext *dc)
 static inline void save_npc(DisasContext *dc)
 {
     if (dc->npc == JUMP_PC) {
-        gen_generic_branch(dc->jump_pc[0], dc->jump_pc[1], cpu_cond);
+        gen_generic_branch(dc);
         dc->npc = DYNAMIC_PC;
     } else if (dc->npc != DYNAMIC_PC) {
         tcg_gen_movi_tl(cpu_npc, dc->npc);
@@ -1150,7 +1149,7 @@ static inline void save_state(DisasContext *dc)
 static inline void gen_mov_pc_npc(DisasContext *dc)
 {
     if (dc->npc == JUMP_PC) {
-        gen_generic_branch(dc->jump_pc[0], dc->jump_pc[1], cpu_cond);
+        gen_generic_branch(dc);
         tcg_gen_mov_tl(cpu_pc, cpu_npc);
         dc->pc = DYNAMIC_PC;
     } else if (dc->npc == DYNAMIC_PC) {
commit 934da7ee0840f582c7720d0ce4507456d9955feb
Author: Richard Henderson <rth at twiddle.net>
Date:   Fri Oct 5 16:54:55 2012 -0700

    target-sparc: Tidy save_npc interface
    
    Use the cpu_cond global register directly instead of passing it down.
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/target-sparc/translate.c b/target-sparc/translate.c
index d9e1b01..4cc7887 100644
--- a/target-sparc/translate.c
+++ b/target-sparc/translate.c
@@ -1126,10 +1126,10 @@ static inline void flush_cond(DisasContext *dc)
     }
 }
 
-static inline void save_npc(DisasContext *dc, TCGv cond)
+static inline void save_npc(DisasContext *dc)
 {
     if (dc->npc == JUMP_PC) {
-        gen_generic_branch(dc->jump_pc[0], dc->jump_pc[1], cond);
+        gen_generic_branch(dc->jump_pc[0], dc->jump_pc[1], cpu_cond);
         dc->npc = DYNAMIC_PC;
     } else if (dc->npc != DYNAMIC_PC) {
         tcg_gen_movi_tl(cpu_npc, dc->npc);
@@ -1144,7 +1144,7 @@ static inline void save_state(DisasContext *dc)
         dc->cc_op = CC_OP_FLAGS;
         gen_helper_compute_psr(cpu_env);
     }
-    save_npc(dc, cpu_cond);
+    save_npc(dc);
 }
 
 static inline void gen_mov_pc_npc(DisasContext *dc)
@@ -5372,7 +5372,7 @@ static inline void gen_intermediate_code_internal(TranslationBlock * tb,
         } else {
             if (dc->pc != DYNAMIC_PC)
                 tcg_gen_movi_tl(cpu_pc, dc->pc);
-            save_npc(dc, cpu_cond);
+            save_npc(dc);
             tcg_gen_exit_tb(0);
         }
     }
commit 13a6dd00433005831c2bc671b0e085cb5b04b424
Author: Richard Henderson <rth at twiddle.net>
Date:   Fri Oct 5 16:54:54 2012 -0700

    target-sparc: Tidy gen_mov_pc_npc interface
    
    Use the cpu_cond global register directly instead of passing it down.
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/target-sparc/translate.c b/target-sparc/translate.c
index eb95260..d9e1b01 100644
--- a/target-sparc/translate.c
+++ b/target-sparc/translate.c
@@ -1147,10 +1147,10 @@ static inline void save_state(DisasContext *dc)
     save_npc(dc, cpu_cond);
 }
 
-static inline void gen_mov_pc_npc(DisasContext *dc, TCGv cond)
+static inline void gen_mov_pc_npc(DisasContext *dc)
 {
     if (dc->npc == JUMP_PC) {
-        gen_generic_branch(dc->jump_pc[0], dc->jump_pc[1], cond);
+        gen_generic_branch(dc->jump_pc[0], dc->jump_pc[1], cpu_cond);
         tcg_gen_mov_tl(cpu_pc, cpu_npc);
         dc->pc = DYNAMIC_PC;
     } else if (dc->npc == DYNAMIC_PC) {
@@ -2499,7 +2499,7 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
             gen_movl_TN_reg(15, r_const);
             tcg_temp_free(r_const);
             target += dc->pc;
-            gen_mov_pc_npc(dc, cpu_cond);
+            gen_mov_pc_npc(dc);
 #ifdef TARGET_SPARC64
             if (unlikely(AM_CHECK(dc))) {
                 target &= 0xffffffffULL;
@@ -4573,7 +4573,7 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                         tcg_gen_mov_tl(cpu_dst, cpu_src1);
                 }
                 gen_helper_restore(cpu_env);
-                gen_mov_pc_npc(dc, cpu_cond);
+                gen_mov_pc_npc(dc);
                 r_const = tcg_const_i32(3);
                 gen_helper_check_align(cpu_env, cpu_dst, r_const);
                 tcg_temp_free_i32(r_const);
@@ -4603,7 +4603,7 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                         r_pc = tcg_const_tl(dc->pc);
                         gen_movl_TN_reg(rd, r_pc);
                         tcg_temp_free(r_pc);
-                        gen_mov_pc_npc(dc, cpu_cond);
+                        gen_mov_pc_npc(dc);
                         r_const = tcg_const_i32(3);
                         gen_helper_check_align(cpu_env, cpu_dst, r_const);
                         tcg_temp_free_i32(r_const);
@@ -4619,7 +4619,7 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
 
                         if (!supervisor(dc))
                             goto priv_insn;
-                        gen_mov_pc_npc(dc, cpu_cond);
+                        gen_mov_pc_npc(dc);
                         r_const = tcg_const_i32(3);
                         gen_helper_check_align(cpu_env, cpu_dst, r_const);
                         tcg_temp_free_i32(r_const);
commit 66442b07ae37a23c727ecd8c30a25054a7f21886
Author: Richard Henderson <rth at twiddle.net>
Date:   Fri Oct 5 16:54:53 2012 -0700

    target-sparc: Tidy save_state interface
    
    Use the cpu_cond global register directly instead of passing it down.
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/target-sparc/translate.c b/target-sparc/translate.c
index 9787664..eb95260 100644
--- a/target-sparc/translate.c
+++ b/target-sparc/translate.c
@@ -1136,7 +1136,7 @@ static inline void save_npc(DisasContext *dc, TCGv cond)
     }
 }
 
-static inline void save_state(DisasContext *dc, TCGv cond)
+static inline void save_state(DisasContext *dc)
 {
     tcg_gen_movi_tl(cpu_pc, dc->pc);
     /* flush pending conditional evaluations before exposing cpu state */
@@ -1144,7 +1144,7 @@ static inline void save_state(DisasContext *dc, TCGv cond)
         dc->cc_op = CC_OP_FLAGS;
         gen_helper_compute_psr(cpu_env);
     }
-    save_npc(dc, cond);
+    save_npc(dc, cpu_cond);
 }
 
 static inline void gen_mov_pc_npc(DisasContext *dc, TCGv cond)
@@ -1621,7 +1621,7 @@ static int gen_trap_ifnofpu(DisasContext *dc)
     if (!dc->fpu_enabled) {
         TCGv_i32 r_const;
 
-        save_state(dc, cpu_cond);
+        save_state(dc);
         r_const = tcg_const_i32(TT_NFPU_INSN);
         gen_helper_raise_exception(cpu_env, r_const);
         tcg_temp_free_i32(r_const);
@@ -2529,7 +2529,7 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
 
                 cond = GET_FIELD(insn, 3, 6);
                 if (cond == 0x8) { /* Trap Always */
-                    save_state(dc, cpu_cond);
+                    save_state(dc);
                     if ((dc->def->features & CPU_FEATURE_HYPV) &&
                         supervisor(dc))
                         tcg_gen_andi_tl(cpu_dst, cpu_dst, UA2005_HTRAP_MASK);
@@ -2546,7 +2546,7 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                     /* V9 icc/xcc */
                     int cc = GET_FIELD_SP(insn, 11, 12);
 
-                    save_state(dc, cpu_cond);
+                    save_state(dc);
                     if (cc == 0)
                         gen_cond(r_cond, 0, cond, dc);
                     else if (cc == 2)
@@ -2554,7 +2554,7 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                     else
                         goto illegal_insn;
 #else
-                    save_state(dc, cpu_cond);
+                    save_state(dc);
                     gen_cond(r_cond, 0, cond, dc);
 #endif
                     l1 = gen_new_label();
@@ -2854,7 +2854,7 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                 break;
             } else if (xop == 0x2b) { /* rdtbr / V9 flushw */
 #ifdef TARGET_SPARC64
-                save_state(dc, cpu_cond);
+                save_state(dc);
                 gen_helper_flushw(cpu_env);
 #else
                 if (!supervisor(dc))
@@ -2871,7 +2871,7 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                 rs1 = GET_FIELD(insn, 13, 17);
                 rs2 = GET_FIELD(insn, 27, 31);
                 xop = GET_FIELD(insn, 18, 26);
-                save_state(dc, cpu_cond);
+                save_state(dc);
                 switch (xop) {
                 case 0x1: /* fmovs */
                     cpu_src1_32 = gen_load_fpr_F(dc, rs2);
@@ -3046,7 +3046,7 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                 rs1 = GET_FIELD(insn, 13, 17);
                 rs2 = GET_FIELD(insn, 27, 31);
                 xop = GET_FIELD(insn, 18, 26);
-                save_state(dc, cpu_cond);
+                save_state(dc);
 #ifdef TARGET_SPARC64
                 if ((xop & 0x11f) == 0x005) { // V9 fmovsr
                     int l1;
@@ -3607,14 +3607,14 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                         dc->cc_op = CC_OP_TSUB;
                         break;
                     case 0x22: /* taddcctv */
-                        save_state(dc, cpu_cond);
+                        save_state(dc);
                         gen_op_tadd_ccTV(cpu_dst, cpu_src1, cpu_src2);
                         gen_movl_TN_reg(rd, cpu_dst);
                         tcg_gen_movi_i32(cpu_cc_op, CC_OP_TADDTV);
                         dc->cc_op = CC_OP_TADDTV;
                         break;
                     case 0x23: /* tsubcctv */
-                        save_state(dc, cpu_cond);
+                        save_state(dc);
                         gen_op_tsub_ccTV(cpu_dst, cpu_src1, cpu_src2);
                         gen_movl_TN_reg(rd, cpu_dst);
                         tcg_gen_movi_i32(cpu_cc_op, CC_OP_TSUBTV);
@@ -3691,7 +3691,7 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                             case 0x6: /* V9 wrfprs */
                                 tcg_gen_xor_tl(cpu_dst, cpu_src1, cpu_src2);
                                 tcg_gen_trunc_tl_i32(cpu_fprs, cpu_dst);
-                                save_state(dc, cpu_cond);
+                                save_state(dc);
                                 gen_op_next_insn();
                                 tcg_gen_exit_tb(0);
                                 dc->is_br = 1;
@@ -3818,7 +3818,7 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                             gen_helper_wrpsr(cpu_env, cpu_dst);
                             tcg_gen_movi_i32(cpu_cc_op, CC_OP_FLAGS);
                             dc->cc_op = CC_OP_FLAGS;
-                            save_state(dc, cpu_cond);
+                            save_state(dc);
                             gen_op_next_insn();
                             tcg_gen_exit_tb(0);
                             dc->is_br = 1;
@@ -3898,7 +3898,7 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                                     TCGv r_tmp = tcg_temp_local_new();
 
                                     tcg_gen_mov_tl(r_tmp, cpu_tmp0);
-                                    save_state(dc, cpu_cond);
+                                    save_state(dc);
                                     gen_helper_wrpstate(cpu_env, r_tmp);
                                     tcg_temp_free(r_tmp);
                                     dc->npc = DYNAMIC_PC;
@@ -3909,7 +3909,7 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                                     TCGv r_tmp = tcg_temp_local_new();
 
                                     tcg_gen_mov_tl(r_tmp, cpu_tmp0);
-                                    save_state(dc, cpu_cond);
+                                    save_state(dc);
                                     tcg_gen_trunc_tl_i32(cpu_tmp32, r_tmp);
                                     tcg_temp_free(r_tmp);
                                     tcg_gen_st_i32(cpu_tmp32, cpu_env,
@@ -3991,7 +3991,7 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                             switch (rd) {
                             case 0: // hpstate
                                 // XXX gen_op_wrhpstate();
-                                save_state(dc, cpu_cond);
+                                save_state(dc);
                                 gen_op_next_insn();
                                 tcg_gen_exit_tb(0);
                                 dc->is_br = 1;
@@ -4559,7 +4559,7 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
             } else if (xop == 0x39) { /* V9 return */
                 TCGv_i32 r_const;
 
-                save_state(dc, cpu_cond);
+                save_state(dc);
                 cpu_src1 = get_src1(insn, cpu_src1);
                 if (IS_IMM) {   /* immediate */
                     simm = GET_FIELDs(insn, 19, 31);
@@ -4635,12 +4635,12 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                     /* nop */
                     break;
                 case 0x3c:      /* save */
-                    save_state(dc, cpu_cond);
+                    save_state(dc);
                     gen_helper_save(cpu_env);
                     gen_movl_TN_reg(rd, cpu_dst);
                     break;
                 case 0x3d:      /* restore */
-                    save_state(dc, cpu_cond);
+                    save_state(dc);
                     gen_helper_restore(cpu_env);
                     gen_movl_TN_reg(rd, cpu_dst);
                     break;
@@ -4723,7 +4723,7 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                     else {
                         TCGv_i32 r_const;
 
-                        save_state(dc, cpu_cond);
+                        save_state(dc);
                         r_const = tcg_const_i32(7);
                         /* XXX remove alignment check */
                         gen_helper_check_align(cpu_env, cpu_addr, r_const);
@@ -4774,7 +4774,7 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                     if (!supervisor(dc))
                         goto priv_insn;
 #endif
-                    save_state(dc, cpu_cond);
+                    save_state(dc);
                     gen_ld_asi(cpu_val, cpu_addr, insn, 4, 0);
                     break;
                 case 0x11:      /* lduba, load unsigned byte alternate */
@@ -4784,7 +4784,7 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                     if (!supervisor(dc))
                         goto priv_insn;
 #endif
-                    save_state(dc, cpu_cond);
+                    save_state(dc);
                     gen_ld_asi(cpu_val, cpu_addr, insn, 1, 0);
                     break;
                 case 0x12:      /* lduha, load unsigned halfword alternate */
@@ -4794,7 +4794,7 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                     if (!supervisor(dc))
                         goto priv_insn;
 #endif
-                    save_state(dc, cpu_cond);
+                    save_state(dc);
                     gen_ld_asi(cpu_val, cpu_addr, insn, 2, 0);
                     break;
                 case 0x13:      /* ldda, load double word alternate */
@@ -4806,7 +4806,7 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
 #endif
                     if (rd & 1)
                         goto illegal_insn;
-                    save_state(dc, cpu_cond);
+                    save_state(dc);
                     gen_ldda_asi(cpu_val, cpu_addr, insn, rd);
                     goto skip_move;
                 case 0x19:      /* ldsba, load signed byte alternate */
@@ -4816,7 +4816,7 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                     if (!supervisor(dc))
                         goto priv_insn;
 #endif
-                    save_state(dc, cpu_cond);
+                    save_state(dc);
                     gen_ld_asi(cpu_val, cpu_addr, insn, 1, 1);
                     break;
                 case 0x1a:      /* ldsha, load signed halfword alternate */
@@ -4826,7 +4826,7 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                     if (!supervisor(dc))
                         goto priv_insn;
 #endif
-                    save_state(dc, cpu_cond);
+                    save_state(dc);
                     gen_ld_asi(cpu_val, cpu_addr, insn, 2, 1);
                     break;
                 case 0x1d:      /* ldstuba -- XXX: should be atomically */
@@ -4836,7 +4836,7 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                     if (!supervisor(dc))
                         goto priv_insn;
 #endif
-                    save_state(dc, cpu_cond);
+                    save_state(dc);
                     gen_ldstub_asi(cpu_val, cpu_addr, insn);
                     break;
                 case 0x1f:      /* swapa, swap reg with alt. memory. Also
@@ -4848,7 +4848,7 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                     if (!supervisor(dc))
                         goto priv_insn;
 #endif
-                    save_state(dc, cpu_cond);
+                    save_state(dc);
                     gen_movl_reg_TN(rd, cpu_val);
                     gen_swap_asi(cpu_val, cpu_addr, insn);
                     break;
@@ -4870,11 +4870,11 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                     tcg_gen_qemu_ld64(cpu_val, cpu_addr, dc->mem_idx);
                     break;
                 case 0x18: /* V9 ldswa */
-                    save_state(dc, cpu_cond);
+                    save_state(dc);
                     gen_ld_asi(cpu_val, cpu_addr, insn, 4, 1);
                     break;
                 case 0x1b: /* V9 ldxa */
-                    save_state(dc, cpu_cond);
+                    save_state(dc);
                     gen_ld_asi(cpu_val, cpu_addr, insn, 8, 0);
                     break;
                 case 0x2d: /* V9 prefetch, no effect */
@@ -4883,7 +4883,7 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                     if (gen_trap_ifnofpu(dc)) {
                         goto jmp_insn;
                     }
-                    save_state(dc, cpu_cond);
+                    save_state(dc);
                     gen_ldf_asi(cpu_addr, insn, 4, rd);
                     gen_update_fprs_dirty(rd);
                     goto skip_move;
@@ -4891,7 +4891,7 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                     if (gen_trap_ifnofpu(dc)) {
                         goto jmp_insn;
                     }
-                    save_state(dc, cpu_cond);
+                    save_state(dc);
                     gen_ldf_asi(cpu_addr, insn, 8, DFPREG(rd));
                     gen_update_fprs_dirty(DFPREG(rd));
                     goto skip_move;
@@ -4902,7 +4902,7 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                     if (gen_trap_ifnofpu(dc)) {
                         goto jmp_insn;
                     }
-                    save_state(dc, cpu_cond);
+                    save_state(dc);
                     gen_ldf_asi(cpu_addr, insn, 16, QFPREG(rd));
                     gen_update_fprs_dirty(QFPREG(rd));
                     goto skip_move;
@@ -4918,7 +4918,7 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                 if (gen_trap_ifnofpu(dc)) {
                     goto jmp_insn;
                 }
-                save_state(dc, cpu_cond);
+                save_state(dc);
                 switch (xop) {
                 case 0x20:      /* ldf, load fpreg */
                     gen_address_mask(dc, cpu_addr);
@@ -4989,7 +4989,7 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                     else {
                         TCGv_i32 r_const;
 
-                        save_state(dc, cpu_cond);
+                        save_state(dc);
                         gen_address_mask(dc, cpu_addr);
                         r_const = tcg_const_i32(7);
                         /* XXX remove alignment check */
@@ -5008,7 +5008,7 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                     if (!supervisor(dc))
                         goto priv_insn;
 #endif
-                    save_state(dc, cpu_cond);
+                    save_state(dc);
                     gen_st_asi(cpu_val, cpu_addr, insn, 4);
                     dc->npc = DYNAMIC_PC;
                     break;
@@ -5019,7 +5019,7 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                     if (!supervisor(dc))
                         goto priv_insn;
 #endif
-                    save_state(dc, cpu_cond);
+                    save_state(dc);
                     gen_st_asi(cpu_val, cpu_addr, insn, 1);
                     dc->npc = DYNAMIC_PC;
                     break;
@@ -5030,7 +5030,7 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                     if (!supervisor(dc))
                         goto priv_insn;
 #endif
-                    save_state(dc, cpu_cond);
+                    save_state(dc);
                     gen_st_asi(cpu_val, cpu_addr, insn, 2);
                     dc->npc = DYNAMIC_PC;
                     break;
@@ -5044,7 +5044,7 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                     if (rd & 1)
                         goto illegal_insn;
                     else {
-                        save_state(dc, cpu_cond);
+                        save_state(dc);
                         gen_stda_asi(cpu_val, cpu_addr, insn, rd);
                     }
                     break;
@@ -5055,7 +5055,7 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                     tcg_gen_qemu_st64(cpu_val, cpu_addr, dc->mem_idx);
                     break;
                 case 0x1e: /* V9 stxa */
-                    save_state(dc, cpu_cond);
+                    save_state(dc);
                     gen_st_asi(cpu_val, cpu_addr, insn, 8);
                     dc->npc = DYNAMIC_PC;
                     break;
@@ -5067,7 +5067,7 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                 if (gen_trap_ifnofpu(dc)) {
                     goto jmp_insn;
                 }
-                save_state(dc, cpu_cond);
+                save_state(dc);
                 switch (xop) {
                 case 0x24: /* stf, store fpreg */
                     gen_address_mask(dc, cpu_addr);
@@ -5124,7 +5124,7 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                     goto illegal_insn;
                 }
             } else if (xop > 0x33 && xop < 0x3f) {
-                save_state(dc, cpu_cond);
+                save_state(dc);
                 switch (xop) {
 #ifdef TARGET_SPARC64
                 case 0x34: /* V9 stfa */
@@ -5194,7 +5194,7 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
     {
         TCGv_i32 r_const;
 
-        save_state(dc, cpu_cond);
+        save_state(dc);
         r_const = tcg_const_i32(TT_ILL_INSN);
         gen_helper_raise_exception(cpu_env, r_const);
         tcg_temp_free_i32(r_const);
@@ -5205,7 +5205,7 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
     {
         TCGv_i32 r_const;
 
-        save_state(dc, cpu_cond);
+        save_state(dc);
         r_const = tcg_const_i32(TT_UNIMP_FLUSH);
         gen_helper_raise_exception(cpu_env, r_const);
         tcg_temp_free_i32(r_const);
@@ -5217,7 +5217,7 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
     {
         TCGv_i32 r_const;
 
-        save_state(dc, cpu_cond);
+        save_state(dc);
         r_const = tcg_const_i32(TT_PRIV_INSN);
         gen_helper_raise_exception(cpu_env, r_const);
         tcg_temp_free_i32(r_const);
@@ -5226,13 +5226,13 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
     goto egress;
 #endif
  nfpu_insn:
-    save_state(dc, cpu_cond);
+    save_state(dc);
     gen_op_fpexception_im(FSR_FTT_UNIMPFPOP);
     dc->is_br = 1;
     goto egress;
 #if !defined(CONFIG_USER_ONLY) && !defined(TARGET_SPARC64)
  nfq_insn:
-    save_state(dc, cpu_cond);
+    save_state(dc);
     gen_op_fpexception_im(FSR_FTT_SEQ_ERROR);
     dc->is_br = 1;
     goto egress;
@@ -5242,7 +5242,7 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
     {
         TCGv r_const;
 
-        save_state(dc, cpu_cond);
+        save_state(dc);
         r_const = tcg_const_i32(TT_NCP_INSN);
         gen_helper_raise_exception(cpu_env, r_const);
         tcg_temp_free(r_const);
@@ -5308,7 +5308,7 @@ static inline void gen_intermediate_code_internal(TranslationBlock * tb,
             QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
                 if (bp->pc == dc->pc) {
                     if (dc->pc != pc_start)
-                        save_state(dc, cpu_cond);
+                        save_state(dc);
                     gen_helper_debug(cpu_env);
                     tcg_gen_exit_tb(0);
                     dc->is_br = 1;
commit 5b12f1e8a1aad8030c3ec5790cb5f8f370a1df1f
Author: Richard Henderson <rth at twiddle.net>
Date:   Fri Oct 5 16:54:52 2012 -0700

    target-sparc: Tidy gen_trap_ifnofpu interface
    
    We always pass cpu_cond to the cond parameter.  Use that global
    register directly instead of passing it down.
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/target-sparc/translate.c b/target-sparc/translate.c
index 5b2d5ea..9787664 100644
--- a/target-sparc/translate.c
+++ b/target-sparc/translate.c
@@ -1615,13 +1615,13 @@ static inline void gen_op_fpexception_im(int fsr_flags)
     tcg_temp_free_i32(r_const);
 }
 
-static int gen_trap_ifnofpu(DisasContext *dc, TCGv r_cond)
+static int gen_trap_ifnofpu(DisasContext *dc)
 {
 #if !defined(CONFIG_USER_ONLY)
     if (!dc->fpu_enabled) {
         TCGv_i32 r_const;
 
-        save_state(dc, r_cond);
+        save_state(dc, cpu_cond);
         r_const = tcg_const_i32(TT_NFPU_INSN);
         gen_helper_raise_exception(cpu_env, r_const);
         tcg_temp_free_i32(r_const);
@@ -2439,8 +2439,9 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
             case 0x5:           /* V9 FBPcc */
                 {
                     int cc = GET_FIELD_SP(insn, 20, 21);
-                    if (gen_trap_ifnofpu(dc, cpu_cond))
+                    if (gen_trap_ifnofpu(dc)) {
                         goto jmp_insn;
+                    }
                     target = GET_FIELD_SP(insn, 0, 18);
                     target = sign_extend(target, 19);
                     target <<= 2;
@@ -2463,8 +2464,9 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                 }
             case 0x6:           /* FBN+x */
                 {
-                    if (gen_trap_ifnofpu(dc, cpu_cond))
+                    if (gen_trap_ifnofpu(dc)) {
                         goto jmp_insn;
+                    }
                     target = GET_FIELD(insn, 10, 31);
                     target = sign_extend(target, 22);
                     target <<= 2;
@@ -2643,8 +2645,9 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                 case 0xf: /* V9 membar */
                     break; /* no effect */
                 case 0x13: /* Graphics Status */
-                    if (gen_trap_ifnofpu(dc, cpu_cond))
+                    if (gen_trap_ifnofpu(dc)) {
                         goto jmp_insn;
+                    }
                     gen_movl_TN_reg(rd, cpu_gsr);
                     break;
                 case 0x16: /* Softint */
@@ -2861,8 +2864,9 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                 break;
 #endif
             } else if (xop == 0x34) {   /* FPU Operations */
-                if (gen_trap_ifnofpu(dc, cpu_cond))
+                if (gen_trap_ifnofpu(dc)) {
                     goto jmp_insn;
+                }
                 gen_op_clear_ieee_excp_and_FTT();
                 rs1 = GET_FIELD(insn, 13, 17);
                 rs2 = GET_FIELD(insn, 27, 31);
@@ -3035,8 +3039,9 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
 #ifdef TARGET_SPARC64
                 int cond;
 #endif
-                if (gen_trap_ifnofpu(dc, cpu_cond))
+                if (gen_trap_ifnofpu(dc)) {
                     goto jmp_insn;
+                }
                 gen_op_clear_ieee_excp_and_FTT();
                 rs1 = GET_FIELD(insn, 13, 17);
                 rs2 = GET_FIELD(insn, 27, 31);
@@ -3699,8 +3704,9 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
 #endif
                                 break;
                             case 0x13: /* Graphics Status */
-                                if (gen_trap_ifnofpu(dc, cpu_cond))
+                                if (gen_trap_ifnofpu(dc)) {
                                     goto jmp_insn;
+                                }
                                 tcg_gen_xor_tl(cpu_gsr, cpu_src1, cpu_src2);
                                 break;
                             case 0x14: /* Softint set */
@@ -4105,8 +4111,9 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                 int opf = GET_FIELD_SP(insn, 5, 13);
                 rs1 = GET_FIELD(insn, 13, 17);
                 rs2 = GET_FIELD(insn, 27, 31);
-                if (gen_trap_ifnofpu(dc, cpu_cond))
+                if (gen_trap_ifnofpu(dc)) {
                     goto jmp_insn;
+                }
 
                 switch (opf) {
                 case 0x000: /* VIS I edge8cc */
@@ -4873,7 +4880,7 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                 case 0x2d: /* V9 prefetch, no effect */
                     goto skip_move;
                 case 0x30: /* V9 ldfa */
-                    if (gen_trap_ifnofpu(dc, cpu_cond)) {
+                    if (gen_trap_ifnofpu(dc)) {
                         goto jmp_insn;
                     }
                     save_state(dc, cpu_cond);
@@ -4881,7 +4888,7 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                     gen_update_fprs_dirty(rd);
                     goto skip_move;
                 case 0x33: /* V9 lddfa */
-                    if (gen_trap_ifnofpu(dc, cpu_cond)) {
+                    if (gen_trap_ifnofpu(dc)) {
                         goto jmp_insn;
                     }
                     save_state(dc, cpu_cond);
@@ -4892,7 +4899,7 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                     goto skip_move;
                 case 0x32: /* V9 ldqfa */
                     CHECK_FPU_FEATURE(dc, FLOAT128);
-                    if (gen_trap_ifnofpu(dc, cpu_cond)) {
+                    if (gen_trap_ifnofpu(dc)) {
                         goto jmp_insn;
                     }
                     save_state(dc, cpu_cond);
@@ -4908,8 +4915,9 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
             skip_move: ;
 #endif
             } else if (xop >= 0x20 && xop < 0x24) {
-                if (gen_trap_ifnofpu(dc, cpu_cond))
+                if (gen_trap_ifnofpu(dc)) {
                     goto jmp_insn;
+                }
                 save_state(dc, cpu_cond);
                 switch (xop) {
                 case 0x20:      /* ldf, load fpreg */
@@ -5056,8 +5064,9 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                     goto illegal_insn;
                 }
             } else if (xop > 0x23 && xop < 0x28) {
-                if (gen_trap_ifnofpu(dc, cpu_cond))
+                if (gen_trap_ifnofpu(dc)) {
                     goto jmp_insn;
+                }
                 save_state(dc, cpu_cond);
                 switch (xop) {
                 case 0x24: /* stf, store fpreg */
@@ -5100,8 +5109,9 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
 #else
                     if (!supervisor(dc))
                         goto priv_insn;
-                    if (gen_trap_ifnofpu(dc, cpu_cond))
+                    if (gen_trap_ifnofpu(dc)) {
                         goto jmp_insn;
+                    }
                     goto nfq_insn;
 #endif
 #endif
@@ -5118,7 +5128,7 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                 switch (xop) {
 #ifdef TARGET_SPARC64
                 case 0x34: /* V9 stfa */
-                    if (gen_trap_ifnofpu(dc, cpu_cond)) {
+                    if (gen_trap_ifnofpu(dc)) {
                         goto jmp_insn;
                     }
                     gen_stf_asi(cpu_addr, insn, 4, rd);
@@ -5128,7 +5138,7 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                         TCGv_i32 r_const;
 
                         CHECK_FPU_FEATURE(dc, FLOAT128);
-                        if (gen_trap_ifnofpu(dc, cpu_cond)) {
+                        if (gen_trap_ifnofpu(dc)) {
                             goto jmp_insn;
                         }
                         r_const = tcg_const_i32(7);
@@ -5138,7 +5148,7 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                     }
                     break;
                 case 0x37: /* V9 stdfa */
-                    if (gen_trap_ifnofpu(dc, cpu_cond)) {
+                    if (gen_trap_ifnofpu(dc)) {
                         goto jmp_insn;
                     }
                     gen_stf_asi(cpu_addr, insn, 8, DFPREG(rd));
commit dee8913cbf82a03fed661ac6c461491045dac7f5
Author: Richard Henderson <rth at twiddle.net>
Date:   Fri Oct 5 16:54:51 2012 -0700

    target-sparc: Tidy flush_cond interface
    
    We always pass cpu_cond to the cond parameter.  Use that global
    register directly instead of passing it down.
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/target-sparc/translate.c b/target-sparc/translate.c
index 079a040..5b2d5ea 100644
--- a/target-sparc/translate.c
+++ b/target-sparc/translate.c
@@ -1118,10 +1118,10 @@ static inline void gen_generic_branch(target_ulong npc1, target_ulong npc2,
 
 /* call this function before using the condition register as it may
    have been set for a jump */
-static inline void flush_cond(DisasContext *dc, TCGv cond)
+static inline void flush_cond(DisasContext *dc)
 {
     if (dc->npc == JUMP_PC) {
-        gen_generic_branch(dc->jump_pc[0], dc->jump_pc[1], cond);
+        gen_generic_branch(dc->jump_pc[0], dc->jump_pc[1], cpu_cond);
         dc->npc = DYNAMIC_PC;
     }
 }
@@ -1367,7 +1367,7 @@ static void do_branch(DisasContext *dc, int32_t offset, uint32_t insn, int cc)
             tcg_gen_mov_tl(cpu_pc, cpu_npc);
         }
     } else {
-        flush_cond(dc, cpu_cond);
+        flush_cond(dc);
         gen_cond(cpu_cond, cc, cond, dc);
         if (a) {
             gen_branch_a(dc, target, dc->npc, cpu_cond);
@@ -1416,7 +1416,7 @@ static void do_fbranch(DisasContext *dc, int32_t offset, uint32_t insn, int cc)
             tcg_gen_mov_tl(cpu_pc, cpu_npc);
         }
     } else {
-        flush_cond(dc, cpu_cond);
+        flush_cond(dc);
         gen_fcond(cpu_cond, cc, cond);
         if (a) {
             gen_branch_a(dc, target, dc->npc, cpu_cond);
@@ -1445,7 +1445,7 @@ static void do_branch_reg(DisasContext *dc, int32_t offset, uint32_t insn,
     if (unlikely(AM_CHECK(dc))) {
         target &= 0xffffffffULL;
     }
-    flush_cond(dc, cpu_cond);
+    flush_cond(dc);
     gen_cond_reg(cpu_cond, cond, r_reg);
     if (a) {
         gen_branch_a(dc, target, dc->npc, cpu_cond);
commit d4a288ef9c2b432307961429bdcacb2416ad8a99
Author: Richard Henderson <rth at twiddle.net>
Date:   Fri Oct 5 16:54:50 2012 -0700

    target-sparc: Tidy do_branch interfaces
    
    We always pass cpu_cond to the r_cond parameter.  Use that global
    register directly instead of passing it down.
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/target-sparc/translate.c b/target-sparc/translate.c
index e5ebedf..079a040 100644
--- a/target-sparc/translate.c
+++ b/target-sparc/translate.c
@@ -1337,8 +1337,7 @@ static inline void gen_cond_reg(TCGv r_dst, int cond, TCGv r_src)
 }
 #endif
 
-static void do_branch(DisasContext *dc, int32_t offset, uint32_t insn, int cc,
-                      TCGv r_cond)
+static void do_branch(DisasContext *dc, int32_t offset, uint32_t insn, int cc)
 {
     unsigned int cond = GET_FIELD(insn, 3, 6), a = (insn & (1 << 29));
     target_ulong target = dc->pc + offset;
@@ -1368,10 +1367,10 @@ static void do_branch(DisasContext *dc, int32_t offset, uint32_t insn, int cc,
             tcg_gen_mov_tl(cpu_pc, cpu_npc);
         }
     } else {
-        flush_cond(dc, r_cond);
-        gen_cond(r_cond, cc, cond, dc);
+        flush_cond(dc, cpu_cond);
+        gen_cond(cpu_cond, cc, cond, dc);
         if (a) {
-            gen_branch_a(dc, target, dc->npc, r_cond);
+            gen_branch_a(dc, target, dc->npc, cpu_cond);
             dc->is_br = 1;
         } else {
             dc->pc = dc->npc;
@@ -1387,8 +1386,7 @@ static void do_branch(DisasContext *dc, int32_t offset, uint32_t insn, int cc,
     }
 }
 
-static void do_fbranch(DisasContext *dc, int32_t offset, uint32_t insn, int cc,
-                      TCGv r_cond)
+static void do_fbranch(DisasContext *dc, int32_t offset, uint32_t insn, int cc)
 {
     unsigned int cond = GET_FIELD(insn, 3, 6), a = (insn & (1 << 29));
     target_ulong target = dc->pc + offset;
@@ -1418,10 +1416,10 @@ static void do_fbranch(DisasContext *dc, int32_t offset, uint32_t insn, int cc,
             tcg_gen_mov_tl(cpu_pc, cpu_npc);
         }
     } else {
-        flush_cond(dc, r_cond);
-        gen_fcond(r_cond, cc, cond);
+        flush_cond(dc, cpu_cond);
+        gen_fcond(cpu_cond, cc, cond);
         if (a) {
-            gen_branch_a(dc, target, dc->npc, r_cond);
+            gen_branch_a(dc, target, dc->npc, cpu_cond);
             dc->is_br = 1;
         } else {
             dc->pc = dc->npc;
@@ -1439,7 +1437,7 @@ static void do_fbranch(DisasContext *dc, int32_t offset, uint32_t insn, int cc,
 
 #ifdef TARGET_SPARC64
 static void do_branch_reg(DisasContext *dc, int32_t offset, uint32_t insn,
-                          TCGv r_cond, TCGv r_reg)
+                          TCGv r_reg)
 {
     unsigned int cond = GET_FIELD_SP(insn, 25, 27), a = (insn & (1 << 29));
     target_ulong target = dc->pc + offset;
@@ -1447,10 +1445,10 @@ static void do_branch_reg(DisasContext *dc, int32_t offset, uint32_t insn,
     if (unlikely(AM_CHECK(dc))) {
         target &= 0xffffffffULL;
     }
-    flush_cond(dc, r_cond);
-    gen_cond_reg(r_cond, cond, r_reg);
+    flush_cond(dc, cpu_cond);
+    gen_cond_reg(cpu_cond, cond, r_reg);
     if (a) {
-        gen_branch_a(dc, target, dc->npc, r_cond);
+        gen_branch_a(dc, target, dc->npc, cpu_cond);
         dc->is_br = 1;
     } else {
         dc->pc = dc->npc;
@@ -2421,9 +2419,9 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                     target <<= 2;
                     cc = GET_FIELD_SP(insn, 20, 21);
                     if (cc == 0)
-                        do_branch(dc, target, insn, 0, cpu_cond);
+                        do_branch(dc, target, insn, 0);
                     else if (cc == 2)
-                        do_branch(dc, target, insn, 1, cpu_cond);
+                        do_branch(dc, target, insn, 1);
                     else
                         goto illegal_insn;
                     goto jmp_insn;
@@ -2435,7 +2433,7 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                     target = sign_extend(target, 16);
                     target <<= 2;
                     cpu_src1 = get_src1(insn, cpu_src1);
-                    do_branch_reg(dc, target, insn, cpu_cond, cpu_src1);
+                    do_branch_reg(dc, target, insn, cpu_src1);
                     goto jmp_insn;
                 }
             case 0x5:           /* V9 FBPcc */
@@ -2446,7 +2444,7 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                     target = GET_FIELD_SP(insn, 0, 18);
                     target = sign_extend(target, 19);
                     target <<= 2;
-                    do_fbranch(dc, target, insn, cc, cpu_cond);
+                    do_fbranch(dc, target, insn, cc);
                     goto jmp_insn;
                 }
 #else
@@ -2460,7 +2458,7 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                     target = GET_FIELD(insn, 10, 31);
                     target = sign_extend(target, 22);
                     target <<= 2;
-                    do_branch(dc, target, insn, 0, cpu_cond);
+                    do_branch(dc, target, insn, 0);
                     goto jmp_insn;
                 }
             case 0x6:           /* FBN+x */
@@ -2470,7 +2468,7 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                     target = GET_FIELD(insn, 10, 31);
                     target = sign_extend(target, 22);
                     target <<= 2;
-                    do_fbranch(dc, target, insn, 0, cpu_cond);
+                    do_fbranch(dc, target, insn, 0);
                     goto jmp_insn;
                 }
             case 0x4:           /* SETHI */
commit b884fc5ecaacd241829f225282356d59eb186645
Author: Richard Henderson <rth at twiddle.net>
Date:   Fri Oct 5 16:54:49 2012 -0700

    target-sparc: Make CPU_LOG_INT useful by default
    
    No need for ifdefs when the log mask does just as well.
    No need to print pc/npc when we're dumping the whole cpu state.
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/target-sparc/int32_helper.c b/target-sparc/int32_helper.c
index 5e33d50..9ac5aac 100644
--- a/target-sparc/int32_helper.c
+++ b/target-sparc/int32_helper.c
@@ -21,7 +21,7 @@
 #include "trace.h"
 #include "sysemu.h"
 
-//#define DEBUG_PCALL
+#define DEBUG_PCALL
 
 #ifdef DEBUG_PCALL
 static const char * const excp_names[0x80] = {
@@ -78,10 +78,7 @@ void do_interrupt(CPUSPARCState *env)
             }
         }
 
-        qemu_log("%6d: %s (v=%02x) pc=%08x npc=%08x SP=%08x\n",
-                count, name, intno,
-                env->pc,
-                env->npc, env->regwptr[6]);
+        qemu_log("%6d: %s (v=%02x)\n", count, name, intno);
         log_cpu_state(env, 0);
 #if 0
         {
diff --git a/target-sparc/int64_helper.c b/target-sparc/int64_helper.c
index 5e3eff7..5d0bc6c 100644
--- a/target-sparc/int64_helper.c
+++ b/target-sparc/int64_helper.c
@@ -21,7 +21,7 @@
 #include "helper.h"
 #include "trace.h"
 
-//#define DEBUG_PCALL
+#define DEBUG_PCALL
 
 #ifdef DEBUG_PCALL
 static const char * const excp_names[0x80] = {
@@ -84,11 +84,7 @@ void do_interrupt(CPUSPARCState *env)
             }
         }
 
-        qemu_log("%6d: %s (v=%04x) pc=%016" PRIx64 " npc=%016" PRIx64
-                " SP=%016" PRIx64 "\n",
-                count, name, intno,
-                env->pc,
-                env->npc, env->regwptr[6]);
+        qemu_log("%6d: %s (v=%04x)\n", count, name, intno);
         log_cpu_state(env, 0);
 #if 0
         {
commit 76a23ca099084ce72f5c30a3c5dc18e331384c59
Author: Richard Henderson <rth at twiddle.net>
Date:   Fri Oct 5 16:54:48 2012 -0700

    target-sparc: Tidy cpu_dump_state
    
    We don't really need to be told that %g are general register, etc.
    Issue a trailing newline to separate blocks.
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/target-sparc/cpu.c b/target-sparc/cpu.c
index eb9f0e7..882d306 100644
--- a/target-sparc/cpu.c
+++ b/target-sparc/cpu.c
@@ -792,7 +792,6 @@ void cpu_dump_state(CPUSPARCState *env, FILE *f, fprintf_function cpu_fprintf,
 
     cpu_fprintf(f, "pc: " TARGET_FMT_lx "  npc: " TARGET_FMT_lx "\n", env->pc,
                 env->npc);
-    cpu_fprintf(f, "General Registers:\n");
 
     for (i = 0; i < 8; i++) {
         if (i % REGS_PER_LINE == 0) {
@@ -803,7 +802,6 @@ void cpu_dump_state(CPUSPARCState *env, FILE *f, fprintf_function cpu_fprintf,
             cpu_fprintf(f, "\n");
         }
     }
-    cpu_fprintf(f, "\nCurrent Register Window:\n");
     for (x = 0; x < 3; x++) {
         for (i = 0; i < 8; i++) {
             if (i % REGS_PER_LINE == 0) {
@@ -817,10 +815,10 @@ void cpu_dump_state(CPUSPARCState *env, FILE *f, fprintf_function cpu_fprintf,
             }
         }
     }
-    cpu_fprintf(f, "\nFloating Point Registers:\n");
+
     for (i = 0; i < TARGET_DPREGS; i++) {
         if ((i & 3) == 0) {
-            cpu_fprintf(f, "%%f%02d:", i * 2);
+            cpu_fprintf(f, "%%f%02d: ", i * 2);
         }
         cpu_fprintf(f, " %016" PRIx64, env->fpr[i].ll);
         if ((i & 3) == 3) {
@@ -850,6 +848,7 @@ void cpu_dump_state(CPUSPARCState *env, FILE *f, fprintf_function cpu_fprintf,
     cpu_fprintf(f, "fsr: " TARGET_FMT_lx " y: " TARGET_FMT_lx "\n",
                 env->fsr, env->y);
 #endif
+    cpu_fprintf(f, "\n");
 }
 
 static void sparc_cpu_initfn(Object *obj)
commit d838201111894b17ca9123d5d2a74ab4cf6376d6
Author: Stefan Weil <sw at weilnetz.de>
Date:   Thu Oct 4 20:29:02 2012 +0200

    tcg: Remove redundant pointer from TCGContext
    
    The pointer entry 'temps' always refers to the array entry 'static_temps'.
    Removing the pointer and renaming 'static_temps' to 'temps' reduces the
    size of TCGContext (4 or 8 byte) and allows better code generation.
    
    Signed-off-by: Stefan Weil <sw at weilnetz.de>
    Reviewed-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/tcg/tcg.c b/tcg/tcg.c
index 78ef50b..32cd0c6 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -242,7 +242,6 @@ void tcg_context_init(TCGContext *s)
     int *sorted_args;
 
     memset(s, 0, sizeof(*s));
-    s->temps = s->static_temps;
     s->nb_globals = 0;
     
     /* Count total number of arguments and allocate the corresponding
diff --git a/tcg/tcg.h b/tcg/tcg.h
index 562f0ad..7bafe0e 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -367,7 +367,6 @@ struct TCGContext {
     TCGPool *pool_first, *pool_current, *pool_first_large;
     TCGLabel *labels;
     int nb_labels;
-    TCGTemp *temps; /* globals first, temps after */
     int nb_globals;
     int nb_temps;
     /* index of free temps, -1 if none */
@@ -393,7 +392,7 @@ struct TCGContext {
     int frame_reg;
 
     uint8_t *code_ptr;
-    TCGTemp static_temps[TCG_MAX_TEMPS];
+    TCGTemp temps[TCG_MAX_TEMPS]; /* globals first, temps after */
 
     TCGHelperInfo *helpers;
     int nb_helpers;
commit 046dbab95f33e007428190610d638d2fcaf37fdf
Merge: 048d361... 1273d9c...
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Sat Oct 6 18:54:46 2012 +0200

    Merge branch 'target-arm.for-upstream' of git://git.linaro.org/people/pmaydell/qemu-arm
    
    * 'target-arm.for-upstream' of git://git.linaro.org/people/pmaydell/qemu-arm:
      target-arm: Drop unused DECODE_CPREG_CRN macro
      target-arm: use deposit instead of hardcoded version
      target-arm: mark a few integer helpers const and pure
      target-arm: convert sar, shl and shr helpers to TCG
      target-arm: convert add_cc and sub_cc helpers to TCG
      target-arm: use globals for CC flags
      target-arm: Reinstate display of VFP registers in cpu_dump_state
      cpu_dump_state: move DUMP_FPU and DUMP_CCOP flags from x86-only to generic

commit 048d3612a51b3da45081091b734f93428925ebf8
Merge: 6b2f90f... 02cd521...
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Sat Oct 6 18:54:14 2012 +0200

    Merge branch 'trivial-patches' of git://github.com/stefanha/qemu
    
    * 'trivial-patches' of git://github.com/stefanha/qemu:
      versatilepb: Use symbolic indices for ARM PIC
      qdev: kill bogus comment
      qemu-barrier: Fix compiler version check for future gcc versions
      hw: Add missing 'static' attribute for QEMUMachine
      cleanup useless return sentence
      qemu-sockets: Fix compiler warning (regression for MinGW)
      vnc: Fix spelling (hellmen -> hellman) in comment
      slirp: Fix spelling in comment (enought -> enough, insure -> ensure)
      tcg/arm: Use tcg_out_mov_reg rather than inline equivalent code
      cpu: Add missing 'static' attribute to qemu_global_mutex
      configure: Support empty target list (--target-list=)
      hw: Fix return value check for bdrv_read, bdrv_write

commit 6b2f90fbbd31d594238098f46ef63ee307a12f55
Merge: 1d31fca... ef8beb0...
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Sat Oct 6 18:51:36 2012 +0200

    Merge branch 'ppc-for-upstream' of git://repo.or.cz/qemu/agraf
    
    * 'ppc-for-upstream' of git://repo.or.cz/qemu/agraf: (35 commits)
      PPC: KVM: Fix BAT put
      PPC: e500: Only expose even TLB sizes in initial TLB
      ppc/pseries: Reset VPA registration on CPU reset
      pseries: Don't test for MSR_PR for hypercalls under KVM
      PPC: e500: calculate initrd_base like dt_base
      PPC: e500: increase DTC_LOAD_PAD
      device tree: simplify dumpdtb code
      fdt: move dumpdtb interpretation code to device_tree.c
      target-ppc: Remove unused power_mode field from cpu state
      pseries: Set hash table size based on RAM size
      pseries: Remove unnecessary locking from PAPR hash table hcalls
      ppc405_uc: Fix buffer overflow
      target-ppc: KVM: Fix some kernel version edge cases for kvmppc_reset_htab()
      pseries: Fix semantics of RTAS int-on, int-off and set-xive functions
      pseries: Rework implementation of TCE bypass
      pseries: Remove never used flags field from spapr vio devices
      pseries: Remove XICS irq type enum type
      pseries: Remove C bitfields from xics code
      pseries: Small cleanup to H_CEDE implementation
      pseries: Fix XICS reset
      ...

commit 1d31fca470648ec66afd8743491bfb5846306341
Author: Stefan Weil <sw at weilnetz.de>
Date:   Sat Oct 6 12:46:15 2012 +0200

    qemu-barrier: Fix compilation on i386 hosts
    
    Commit 610b823ef66b993660f1ab1447a769f190e4f3b3 uses QEMU_GNUC_PREREQ
    on i386 hosts.
    
    That macro is defined in qemu-common.h which is not always included
    before qemu-barrier.h, so compilation on i386 hosts was broken.
    
    Signed-off-by: Stefan Weil <sw at weilnetz.de>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/qemu-barrier.h b/qemu-barrier.h
index 16f0942..1de914e 100644
--- a/qemu-barrier.h
+++ b/qemu-barrier.h
@@ -6,6 +6,8 @@
 
 #if defined(__i386__)
 
+#include "qemu-common.h"        /* QEMU_GNUC_PREREQ */
+
 /*
  * Because of the strongly ordered x86 storage model, wmb() and rmb() are nops
  * on x86(well, a compiler barrier only).  Well, at least as long as
commit d1e321b82a021c2e86d204af870356bc72b22546
Author: Richard Henderson <rth at twiddle.net>
Date:   Mon Sep 24 14:21:41 2012 -0700

    tcg: Add tcg_high_cond
    
    The table that was recently added for hppa is generally usable.
    And with the renumbering of the TCG_COND constants it's not too
    difficult to compute rather than have a table.
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/tcg/hppa/tcg-target.c b/tcg/hppa/tcg-target.c
index b933430..de500ae 100644
--- a/tcg/hppa/tcg-target.c
+++ b/tcg/hppa/tcg-target.c
@@ -814,19 +814,6 @@ static void tcg_out_comclr(TCGContext *s, int cond, TCGArg ret,
     tcg_out32(s, op);
 }
 
-static TCGCond const tcg_high_cond[] = {
-    [TCG_COND_EQ] = TCG_COND_EQ,
-    [TCG_COND_NE] = TCG_COND_NE,
-    [TCG_COND_LT] = TCG_COND_LT,
-    [TCG_COND_LE] = TCG_COND_LT,
-    [TCG_COND_GT] = TCG_COND_GT,
-    [TCG_COND_GE] = TCG_COND_GT,
-    [TCG_COND_LTU] = TCG_COND_LTU,
-    [TCG_COND_LEU] = TCG_COND_LTU,
-    [TCG_COND_GTU] = TCG_COND_GTU,
-    [TCG_COND_GEU] = TCG_COND_GTU
-};
-
 static void tcg_out_brcond2(TCGContext *s, int cond, TCGArg al, TCGArg ah,
                             TCGArg bl, int blconst, TCGArg bh, int bhconst,
                             int label_index)
@@ -841,7 +828,7 @@ static void tcg_out_brcond2(TCGContext *s, int cond, TCGArg al, TCGArg ah,
         tcg_out_brcond(s, TCG_COND_NE, ah, bh, bhconst, label_index);
         break;
     default:
-        tcg_out_brcond(s, tcg_high_cond[cond], ah, bh, bhconst, label_index);
+        tcg_out_brcond(s, tcg_high_cond(cond), ah, bh, bhconst, label_index);
         tcg_out_comclr(s, TCG_COND_NE, TCG_REG_R0, ah, bh, bhconst);
         tcg_out_brcond(s, tcg_unsigned_cond(cond),
                        al, bl, blconst, label_index);
@@ -894,7 +881,7 @@ static void tcg_out_setcond2(TCGContext *s, int cond, TCGArg ret,
         tcg_out_setcond(s, tcg_unsigned_cond(cond), scratch, al, bl, blconst);
         tcg_out_comclr(s, TCG_COND_EQ, TCG_REG_R0, ah, bh, bhconst);
         tcg_out_movi(s, TCG_TYPE_I32, scratch, 0);
-        tcg_out_comclr(s, tcg_invert_cond(tcg_high_cond[cond]),
+        tcg_out_comclr(s, tcg_invert_cond(tcg_high_cond(cond)),
                        TCG_REG_R0, ah, bh, bhconst);
         tcg_out_movi(s, TCG_TYPE_I32, scratch, 1);
         break;
diff --git a/tcg/tcg.h b/tcg/tcg.h
index 75f0239..562f0ad 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -302,16 +302,33 @@ static inline TCGCond tcg_swap_cond(TCGCond c)
     return c & 6 ? (TCGCond)(c ^ 9) : c;
 }
 
+/* Create an "unsigned" version of a "signed" comparison.  */
 static inline TCGCond tcg_unsigned_cond(TCGCond c)
 {
     return c & 2 ? (TCGCond)(c ^ 6) : c;
 }
 
+/* Must a comparison be considered unsigned?  */
 static inline bool is_unsigned_cond(TCGCond c)
 {
     return (c & 4) != 0;
 }
 
+/* Create a "high" version of a double-word comparison.
+   This removes equality from a LTE or GTE comparison.  */
+static inline TCGCond tcg_high_cond(TCGCond c)
+{
+    switch (c) {
+    case TCG_COND_GE:
+    case TCG_COND_LE:
+    case TCG_COND_GEU:
+    case TCG_COND_LEU:
+        return (TCGCond)(c ^ 8);
+    default:
+        return c;
+    }
+}
+
 #define TEMP_VAL_DEAD  0
 #define TEMP_VAL_REG   1
 #define TEMP_VAL_MEM   2
commit 0aed257f08444feb6269d0c302b35a8fb10fcb3f
Author: Richard Henderson <rth at twiddle.net>
Date:   Mon Sep 24 14:21:40 2012 -0700

    tcg: Add TCG_COND_NEVER, TCG_COND_ALWAYS
    
    There are several cases that can be handled easier inside both
    translators and code generators if we have out-of-band values
    for conditions.  It's easy enough to handle ALWAYS and NEVER in
    the natural way inside the tcg middle-end.
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c
index 1e61864..fbbbefe 100644
--- a/tcg/arm/tcg-target.c
+++ b/tcg/arm/tcg-target.c
@@ -335,7 +335,7 @@ enum arm_cond_code_e {
     COND_AL = 0xe,
 };
 
-static const uint8_t tcg_cond_to_arm_cond[10] = {
+static const uint8_t tcg_cond_to_arm_cond[] = {
     [TCG_COND_EQ] = COND_EQ,
     [TCG_COND_NE] = COND_NE,
     [TCG_COND_LT] = COND_LT,
diff --git a/tcg/hppa/tcg-target.c b/tcg/hppa/tcg-target.c
index 44974c4..b933430 100644
--- a/tcg/hppa/tcg-target.c
+++ b/tcg/hppa/tcg-target.c
@@ -732,7 +732,7 @@ static void tcg_out_branch(TCGContext *s, int label_index, int nul)
     }
 }
 
-static const uint8_t tcg_cond_to_cmp_cond[10] =
+static const uint8_t tcg_cond_to_cmp_cond[] =
 {
     [TCG_COND_EQ] = COND_EQ,
     [TCG_COND_NE] = COND_EQ | COND_FALSE,
diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index bb2306d..4952c05 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -338,7 +338,7 @@ static inline int tcg_target_const_match(tcg_target_long val,
 #define JCC_JLE 0xe
 #define JCC_JG  0xf
 
-static const uint8_t tcg_cond_to_jcc[10] = {
+static const uint8_t tcg_cond_to_jcc[] = {
     [TCG_COND_EQ] = JCC_JE,
     [TCG_COND_NE] = JCC_JNE,
     [TCG_COND_LT] = JCC_JL,
diff --git a/tcg/optimize.c b/tcg/optimize.c
index 35532a1..edb2b0e 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -321,6 +321,8 @@ static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
                 return (uint32_t)temps[x].val <= (uint32_t)temps[y].val;
             case TCG_COND_GTU:
                 return (uint32_t)temps[x].val > (uint32_t)temps[y].val;
+            default:
+                break;
             }
             break;
         case 64:
@@ -345,6 +347,8 @@ static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
                 return (uint64_t)temps[x].val <= (uint64_t)temps[y].val;
             case TCG_COND_GTU:
                 return (uint64_t)temps[x].val > (uint64_t)temps[y].val;
+            default:
+                break;
             }
             break;
         }
@@ -362,6 +366,8 @@ static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
         case TCG_COND_LEU:
         case TCG_COND_EQ:
             return 1;
+        default:
+            break;
         }
     } else if (temps[y].state == TCG_TEMP_CONST && temps[y].val == 0) {
         switch (c) {
diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c
index 56baedd..60b7b92 100644
--- a/tcg/ppc/tcg-target.c
+++ b/tcg/ppc/tcg-target.c
@@ -437,7 +437,7 @@ enum {
     CR_SO
 };
 
-static const uint32_t tcg_to_bc[10] = {
+static const uint32_t tcg_to_bc[] = {
     [TCG_COND_EQ]  = BC | BI (7, CR_EQ) | BO_COND_TRUE,
     [TCG_COND_NE]  = BC | BI (7, CR_EQ) | BO_COND_FALSE,
     [TCG_COND_LT]  = BC | BI (7, CR_LT) | BO_COND_TRUE,
diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
index 6e9b363..5403fc1 100644
--- a/tcg/ppc64/tcg-target.c
+++ b/tcg/ppc64/tcg-target.c
@@ -418,7 +418,7 @@ enum {
     CR_SO
 };
 
-static const uint32_t tcg_to_bc[10] = {
+static const uint32_t tcg_to_bc[] = {
     [TCG_COND_EQ]  = BC | BI (7, CR_EQ) | BO_COND_TRUE,
     [TCG_COND_NE]  = BC | BI (7, CR_EQ) | BO_COND_FALSE,
     [TCG_COND_LT]  = BC | BI (7, CR_LT) | BO_COND_TRUE,
diff --git a/tcg/s390/tcg-target.c b/tcg/s390/tcg-target.c
index c0ef6ba..fd9286f 100644
--- a/tcg/s390/tcg-target.c
+++ b/tcg/s390/tcg-target.c
@@ -268,7 +268,7 @@ static const int tcg_target_call_oarg_regs[] = {
 #define S390_CC_ALWAYS  15
 
 /* Condition codes that result from a COMPARE and COMPARE LOGICAL.  */
-static const uint8_t tcg_cond_to_s390_cond[10] = {
+static const uint8_t tcg_cond_to_s390_cond[] = {
     [TCG_COND_EQ]  = S390_CC_EQ,
     [TCG_COND_NE]  = S390_CC_NE,
     [TCG_COND_LT]  = S390_CC_LT,
@@ -284,7 +284,7 @@ static const uint8_t tcg_cond_to_s390_cond[10] = {
 /* Condition codes that result from a LOAD AND TEST.  Here, we have no
    unsigned instruction variation, however since the test is vs zero we
    can re-map the outcomes appropriately.  */
-static const uint8_t tcg_cond_to_ltr_cond[10] = {
+static const uint8_t tcg_cond_to_ltr_cond[] = {
     [TCG_COND_EQ]  = S390_CC_EQ,
     [TCG_COND_NE]  = S390_CC_NE,
     [TCG_COND_LT]  = S390_CC_LT,
diff --git a/tcg/sparc/tcg-target.c b/tcg/sparc/tcg-target.c
index e82fab2..0c32baa 100644
--- a/tcg/sparc/tcg-target.c
+++ b/tcg/sparc/tcg-target.c
@@ -512,7 +512,7 @@ static void tcg_out_branch_i64(TCGContext *s, int opc, int label_index)
 }
 #endif
 
-static const uint8_t tcg_cond_to_bcond[10] = {
+static const uint8_t tcg_cond_to_bcond[] = {
     [TCG_COND_EQ] = COND_E,
     [TCG_COND_NE] = COND_NE,
     [TCG_COND_LT] = COND_L,
diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
index bd93fe4..5518458 100644
--- a/tcg/tcg-op.h
+++ b/tcg/tcg-op.h
@@ -646,29 +646,49 @@ static inline void tcg_gen_sari_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
 static inline void tcg_gen_brcond_i32(TCGCond cond, TCGv_i32 arg1,
                                       TCGv_i32 arg2, int label_index)
 {
-    tcg_gen_op4ii_i32(INDEX_op_brcond_i32, arg1, arg2, cond, label_index);
+    if (cond == TCG_COND_ALWAYS) {
+        tcg_gen_br(label_index);
+    } else if (cond != TCG_COND_NEVER) {
+        tcg_gen_op4ii_i32(INDEX_op_brcond_i32, arg1, arg2, cond, label_index);
+    }
 }
 
 static inline void tcg_gen_brcondi_i32(TCGCond cond, TCGv_i32 arg1,
                                        int32_t arg2, int label_index)
 {
-    TCGv_i32 t0 = tcg_const_i32(arg2);
-    tcg_gen_brcond_i32(cond, arg1, t0, label_index);
-    tcg_temp_free_i32(t0);
+    if (cond == TCG_COND_ALWAYS) {
+        tcg_gen_br(label_index);
+    } else if (cond != TCG_COND_NEVER) {
+        TCGv_i32 t0 = tcg_const_i32(arg2);
+        tcg_gen_brcond_i32(cond, arg1, t0, label_index);
+        tcg_temp_free_i32(t0);
+    }
 }
 
 static inline void tcg_gen_setcond_i32(TCGCond cond, TCGv_i32 ret,
                                        TCGv_i32 arg1, TCGv_i32 arg2)
 {
-    tcg_gen_op4i_i32(INDEX_op_setcond_i32, ret, arg1, arg2, cond);
+    if (cond == TCG_COND_ALWAYS) {
+        tcg_gen_movi_i32(ret, 1);
+    } else if (cond == TCG_COND_NEVER) {
+        tcg_gen_movi_i32(ret, 0);
+    } else {
+        tcg_gen_op4i_i32(INDEX_op_setcond_i32, ret, arg1, arg2, cond);
+    }
 }
 
 static inline void tcg_gen_setcondi_i32(TCGCond cond, TCGv_i32 ret,
                                         TCGv_i32 arg1, int32_t arg2)
 {
-    TCGv_i32 t0 = tcg_const_i32(arg2);
-    tcg_gen_setcond_i32(cond, ret, arg1, t0);
-    tcg_temp_free_i32(t0);
+    if (cond == TCG_COND_ALWAYS) {
+        tcg_gen_movi_i32(ret, 1);
+    } else if (cond == TCG_COND_NEVER) {
+        tcg_gen_movi_i32(ret, 0);
+    } else {
+        TCGv_i32 t0 = tcg_const_i32(arg2);
+        tcg_gen_setcond_i32(cond, ret, arg1, t0);
+        tcg_temp_free_i32(t0);
+    }
 }
 
 static inline void tcg_gen_mul_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
@@ -964,17 +984,27 @@ static inline void tcg_gen_sari_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
 static inline void tcg_gen_brcond_i64(TCGCond cond, TCGv_i64 arg1,
                                       TCGv_i64 arg2, int label_index)
 {
-    tcg_gen_op6ii_i32(INDEX_op_brcond2_i32,
-                      TCGV_LOW(arg1), TCGV_HIGH(arg1), TCGV_LOW(arg2),
-                      TCGV_HIGH(arg2), cond, label_index);
+    if (cond == TCG_COND_ALWAYS) {
+        tcg_gen_br(label_index);
+    } else if (cond != TCG_COND_NEVER) {
+        tcg_gen_op6ii_i32(INDEX_op_brcond2_i32,
+                          TCGV_LOW(arg1), TCGV_HIGH(arg1), TCGV_LOW(arg2),
+                          TCGV_HIGH(arg2), cond, label_index);
+    }
 }
 
 static inline void tcg_gen_setcond_i64(TCGCond cond, TCGv_i64 ret,
                                        TCGv_i64 arg1, TCGv_i64 arg2)
 {
-    tcg_gen_op6i_i32(INDEX_op_setcond2_i32, TCGV_LOW(ret),
-                     TCGV_LOW(arg1), TCGV_HIGH(arg1),
-                     TCGV_LOW(arg2), TCGV_HIGH(arg2), cond);
+    if (cond == TCG_COND_ALWAYS) {
+        tcg_gen_movi_i32(TCGV_LOW(ret), 1);
+    } else if (cond == TCG_COND_NEVER) {
+        tcg_gen_movi_i32(TCGV_LOW(ret), 0);
+    } else {
+        tcg_gen_op6i_i32(INDEX_op_setcond2_i32, TCGV_LOW(ret),
+                         TCGV_LOW(arg1), TCGV_HIGH(arg1),
+                         TCGV_LOW(arg2), TCGV_HIGH(arg2), cond);
+    }
     tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
 }
 
@@ -1273,13 +1303,23 @@ static inline void tcg_gen_sari_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
 static inline void tcg_gen_brcond_i64(TCGCond cond, TCGv_i64 arg1,
                                       TCGv_i64 arg2, int label_index)
 {
-    tcg_gen_op4ii_i64(INDEX_op_brcond_i64, arg1, arg2, cond, label_index);
+    if (cond == TCG_COND_ALWAYS) {
+        tcg_gen_br(label_index);
+    } else if (cond != TCG_COND_NEVER) {
+        tcg_gen_op4ii_i64(INDEX_op_brcond_i64, arg1, arg2, cond, label_index);
+    }
 }
 
 static inline void tcg_gen_setcond_i64(TCGCond cond, TCGv_i64 ret,
                                        TCGv_i64 arg1, TCGv_i64 arg2)
 {
-    tcg_gen_op4i_i64(INDEX_op_setcond_i64, ret, arg1, arg2, cond);
+    if (cond == TCG_COND_ALWAYS) {
+        tcg_gen_movi_i64(ret, 1);
+    } else if (cond == TCG_COND_NEVER) {
+        tcg_gen_movi_i64(ret, 0);
+    } else {
+        tcg_gen_op4i_i64(INDEX_op_setcond_i64, ret, arg1, arg2, cond);
+    }
 }
 
 static inline void tcg_gen_mul_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
@@ -1397,9 +1437,13 @@ static inline void tcg_gen_subi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
 static inline void tcg_gen_brcondi_i64(TCGCond cond, TCGv_i64 arg1,
                                        int64_t arg2, int label_index)
 {
-    TCGv_i64 t0 = tcg_const_i64(arg2);
-    tcg_gen_brcond_i64(cond, arg1, t0, label_index);
-    tcg_temp_free_i64(t0);
+    if (cond == TCG_COND_ALWAYS) {
+        tcg_gen_br(label_index);
+    } else if (cond != TCG_COND_NEVER) {
+        TCGv_i64 t0 = tcg_const_i64(arg2);
+        tcg_gen_brcond_i64(cond, arg1, t0, label_index);
+        tcg_temp_free_i64(t0);
+    }
 }
 
 static inline void tcg_gen_setcondi_i64(TCGCond cond, TCGv_i64 ret,
diff --git a/tcg/tcg.c b/tcg/tcg.c
index c069e44..78ef50b 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -864,6 +864,8 @@ static TCGHelperInfo *tcg_find_helper(TCGContext *s, tcg_target_ulong val)
 
 static const char * const cond_name[] =
 {
+    [TCG_COND_NEVER] = "never",
+    [TCG_COND_ALWAYS] = "always",
     [TCG_COND_EQ] = "eq",
     [TCG_COND_NE] = "ne",
     [TCG_COND_LT] = "lt",
diff --git a/tcg/tcg.h b/tcg/tcg.h
index 6ff2ab5..75f0239 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -266,18 +266,28 @@ typedef int TCGv_i64;
 #define TCG_CALL_DUMMY_TCGV     MAKE_TCGV_I32(-1)
 #define TCG_CALL_DUMMY_ARG      ((TCGArg)(-1))
 
+/* Conditions.  Note that these are layed out for easy manipulation by
+   the the functions below:
+     bit 0 is used for inverting;
+     bit 1 is signed,
+     bit 2 is unsigned,
+     bit 3 is used with bit 0 for swapping signed/unsigned.  */
 typedef enum {
-    TCG_COND_EQ,
-    TCG_COND_NE,
-    TCG_COND_LT,
-    TCG_COND_GE,
-    TCG_COND_LE,
-    TCG_COND_GT,
+    /* non-signed */
+    TCG_COND_NEVER  = 0 | 0 | 0 | 0,
+    TCG_COND_ALWAYS = 0 | 0 | 0 | 1,
+    TCG_COND_EQ     = 8 | 0 | 0 | 0,
+    TCG_COND_NE     = 8 | 0 | 0 | 1,
+    /* signed */
+    TCG_COND_LT     = 0 | 0 | 2 | 0,
+    TCG_COND_GE     = 0 | 0 | 2 | 1,
+    TCG_COND_LE     = 8 | 0 | 2 | 0,
+    TCG_COND_GT     = 8 | 0 | 2 | 1,
     /* unsigned */
-    TCG_COND_LTU,
-    TCG_COND_GEU,
-    TCG_COND_LEU,
-    TCG_COND_GTU,
+    TCG_COND_LTU    = 0 | 4 | 0 | 0,
+    TCG_COND_GEU    = 0 | 4 | 0 | 1,
+    TCG_COND_LEU    = 8 | 4 | 0 | 0,
+    TCG_COND_GTU    = 8 | 4 | 0 | 1,
 } TCGCond;
 
 /* Invert the sense of the comparison.  */
@@ -289,18 +299,17 @@ static inline TCGCond tcg_invert_cond(TCGCond c)
 /* Swap the operands in a comparison.  */
 static inline TCGCond tcg_swap_cond(TCGCond c)
 {
-    int mask = (c < TCG_COND_LT ? 0 : c < TCG_COND_LTU ? 7 : 15);
-    return (TCGCond)(c ^ mask);
+    return c & 6 ? (TCGCond)(c ^ 9) : c;
 }
 
 static inline TCGCond tcg_unsigned_cond(TCGCond c)
 {
-    return (c >= TCG_COND_LT && c <= TCG_COND_GT ? c + 4 : c);
+    return c & 2 ? (TCGCond)(c ^ 6) : c;
 }
 
 static inline bool is_unsigned_cond(TCGCond c)
 {
-    return c >= TCG_COND_LTU;
+    return (c & 4) != 0;
 }
 
 #define TEMP_VAL_DEAD  0
commit bcc66562ad185e9c2a667b00426f625e2489bda9
Author: Richard Henderson <rth at twiddle.net>
Date:   Mon Sep 24 14:21:39 2012 -0700

    tcg: Add is_unsigned_cond
    
    Before we rearrange the TCG_COND enumeration, add a predicate for
    the (single) use of comparisons vs TCGCond.
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/tcg/s390/tcg-target.c b/tcg/s390/tcg-target.c
index 4b43059..c0ef6ba 100644
--- a/tcg/s390/tcg-target.c
+++ b/tcg/s390/tcg-target.c
@@ -1113,7 +1113,7 @@ static void tgen64_xori(TCGContext *s, TCGReg dest, tcg_target_ulong val)
 static int tgen_cmp(TCGContext *s, TCGType type, TCGCond c, TCGReg r1,
                     TCGArg c2, int c2const)
 {
-    bool is_unsigned = (c > TCG_COND_GT);
+    bool is_unsigned = is_unsigned_cond(c);
     if (c2const) {
         if (c2 == 0) {
             if (type == TCG_TYPE_I32) {
diff --git a/tcg/tcg.h b/tcg/tcg.h
index af7464a..6ff2ab5 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -298,6 +298,11 @@ static inline TCGCond tcg_unsigned_cond(TCGCond c)
     return (c >= TCG_COND_LT && c <= TCG_COND_GT ? c + 4 : c);
 }
 
+static inline bool is_unsigned_cond(TCGCond c)
+{
+    return c >= TCG_COND_LTU;
+}
+
 #define TEMP_VAL_DEAD  0
 #define TEMP_VAL_REG   1
 #define TEMP_VAL_MEM   2
commit 457ebdcd91aa7f0df094e12a8c46235f0c7d4f9b
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Sat Sep 8 12:58:09 2012 +0200

    vga: cleanup after isa_vga_init() and pci_vga_init() conversion
    
    Now that all machines call isa_vga_init() or pci_vga_init(), some unused
    code can be removed.
    
    Cc: Anthony Liguori <aliguori at us.ibm.com>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/hw/cirrus_vga.c b/hw/cirrus_vga.c
index 9a0a565..a101329 100644
--- a/hw/cirrus_vga.c
+++ b/hw/cirrus_vga.c
@@ -28,7 +28,6 @@
  */
 #include "hw.h"
 #include "pci.h"
-#include "vga-pci.h"
 #include "console.h"
 #include "vga_int.h"
 #include "loader.h"
@@ -2970,11 +2969,6 @@ static int pci_cirrus_vga_initfn(PCIDevice *dev)
      return 0;
 }
 
-DeviceState *pci_cirrus_vga_init(PCIBus *bus)
-{
-    return &pci_create_simple(bus, -1, "cirrus-vga")->qdev;
-}
-
 static void cirrus_vga_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
diff --git a/hw/pc.h b/hw/pc.h
index 77c38d6..9923d96 100644
--- a/hw/pc.h
+++ b/hw/pc.h
@@ -176,19 +176,6 @@ enum vga_retrace_method {
 
 extern enum vga_retrace_method vga_retrace_method;
 
-static inline DeviceState *isa_std_vga_init(ISABus *bus)
-{
-    ISADevice *dev;
-
-    dev = isa_try_create(bus, "isa-vga");
-    if (!dev) {
-        fprintf(stderr, "Warning: isa-vga not available\n");
-        return NULL;
-    }
-    qdev_init_nofail(&dev->qdev);
-    return &dev->qdev;
-}
-
 int isa_vga_mm_init(target_phys_addr_t vram_base,
                     target_phys_addr_t ctrl_base, int it_shift,
                     MemoryRegion *address_space);
diff --git a/hw/vga-pci.c b/hw/vga-pci.c
index 992ffd9..996d47f 100644
--- a/hw/vga-pci.c
+++ b/hw/vga-pci.c
@@ -24,7 +24,6 @@
 #include "hw.h"
 #include "console.h"
 #include "pci.h"
-#include "vga-pci.h"
 #include "vga_int.h"
 #include "pixel_ops.h"
 #include "qemu-timer.h"
@@ -70,11 +69,6 @@ static int pci_std_vga_initfn(PCIDevice *dev)
      return 0;
 }
 
-DeviceState *pci_std_vga_init(PCIBus *bus)
-{
-    return &pci_create_simple(bus, -1, "VGA")->qdev;
-}
-
 static Property vga_pci_properties[] = {
     DEFINE_PROP_UINT32("vgamem_mb", PCIVGAState, vga.vram_size_mb, 16),
     DEFINE_PROP_END_OF_LIST(),
diff --git a/hw/vga-pci.h b/hw/vga-pci.h
deleted file mode 100644
index d111cdc..0000000
--- a/hw/vga-pci.h
+++ /dev/null
@@ -1,12 +0,0 @@
-#ifndef VGA_PCI_H
-#define VGA_PCI_H
-
-#include "qemu-common.h"
-
-/* vga-pci.c */
-DeviceState *pci_std_vga_init(PCIBus *bus);
-
-/* cirrus_vga.c */
-DeviceState *pci_cirrus_vga_init(PCIBus *bus);
-
-#endif
diff --git a/hw/vmware_vga.c b/hw/vmware_vga.c
index e815a04..6f7074e 100644
--- a/hw/vmware_vga.c
+++ b/hw/vmware_vga.c
@@ -25,7 +25,6 @@
 #include "loader.h"
 #include "console.h"
 #include "pci.h"
-#include "vmware_vga.h"
 
 #undef VERBOSE
 #define HW_RECT_ACCEL
diff --git a/hw/vmware_vga.h b/hw/vmware_vga.h
deleted file mode 100644
index 000fbdd..0000000
--- a/hw/vmware_vga.h
+++ /dev/null
@@ -1,15 +0,0 @@
-#ifndef QEMU_VMWARE_VGA_H
-#define QEMU_VMWARE_VGA_H
-
-#include "qemu-common.h"
-
-/* vmware_vga.c */
-static inline DeviceState *pci_vmsvga_init(PCIBus *bus)
-{
-    PCIDevice *dev;
-
-    dev = pci_create_simple(bus, -1, "vmware-svga");
-    return &dev->qdev;
-}
-
-#endif
diff --git a/sysemu.h b/sysemu.h
index 1093046..0c39a3a 100644
--- a/sysemu.h
+++ b/sysemu.h
@@ -105,10 +105,7 @@ typedef enum {
 } VGAInterfaceType;
 
 extern int vga_interface_type;
-#define cirrus_vga_enabled (vga_interface_type == VGA_CIRRUS)
-#define std_vga_enabled (vga_interface_type == VGA_STD)
 #define xenfb_enabled (vga_interface_type == VGA_XENFB)
-#define vmsvga_enabled (vga_interface_type == VGA_VMWARE)
 #define qxl_enabled (vga_interface_type == VGA_QXL)
 
 extern int graphic_width;
commit f642dfce199eecda5a558ce94c17a34707d63ccd
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Sat Sep 8 17:02:29 2012 +0200

    mips/r4k: use the new is_vga_init() function
    
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/hw/mips_r4k.c b/hw/mips_r4k.c
index 2115f7f..967a76e 100644
--- a/hw/mips_r4k.c
+++ b/hw/mips_r4k.c
@@ -281,7 +281,7 @@ void mips_r4k_init (ram_addr_t ram_size,
         }
     }
 
-    isa_std_vga_init(isa_bus);
+    isa_vga_init(isa_bus);
 
     if (nd_table[0].used)
         isa_ne2000_init(isa_bus, 0x300, 9, &nd_table[0]);
commit 16094b75b38dcb5582810bd4149fa8336a939dc8
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Sat Sep 8 12:47:45 2012 +0200

    pc: use the new pci_vga_init() and isa_vga_init() functions
    
    The CONFIG_SPICE is now tested in vl.c and thus not needed anymore.
    
    Cc: Anthony Liguori <aliguori at us.ibm.com>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/hw/pc.c b/hw/pc.c
index 78805f6..6c0722d 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -27,7 +27,6 @@
 #include "fdc.h"
 #include "ide.h"
 #include "pci.h"
-#include "vmware_vga.h"
 #include "monitor.h"
 #include "fw_cfg.h"
 #include "hpet_emul.h"
@@ -51,7 +50,6 @@
 #include "exec-memory.h"
 #include "arch_init.h"
 #include "bitmap.h"
-#include "vga-pci.h"
 
 /* debug PC/ISA interrupts */
 //#define DEBUG_IRQ
@@ -992,34 +990,13 @@ DeviceState *pc_vga_init(ISABus *isa_bus, PCIBus *pci_bus)
 {
     DeviceState *dev = NULL;
 
-    if (cirrus_vga_enabled) {
-        if (pci_bus) {
-            dev = pci_cirrus_vga_init(pci_bus);
-        } else {
-            dev = &isa_create_simple(isa_bus, "isa-cirrus-vga")->qdev;
-        }
-    } else if (vmsvga_enabled) {
-        if (pci_bus) {
-            dev = pci_vmsvga_init(pci_bus);
-        } else {
-            fprintf(stderr, "%s: vmware_vga: no PCI bus\n", __FUNCTION__);
-        }
-#ifdef CONFIG_SPICE
-    } else if (qxl_enabled) {
-        if (pci_bus) {
-            dev = &pci_create_simple(pci_bus, -1, "qxl-vga")->qdev;
-        } else {
-            fprintf(stderr, "%s: qxl: no PCI bus\n", __FUNCTION__);
-        }
-#endif
-    } else if (std_vga_enabled) {
-        if (pci_bus) {
-            dev = pci_std_vga_init(pci_bus);
-        } else {
-            dev = isa_std_vga_init(isa_bus);
-        }
+    if (pci_bus) {
+        PCIDevice *pcidev = pci_vga_init(pci_bus);
+        dev = pcidev ? &pcidev->qdev : NULL;
+    } else if (isa_bus) {
+        ISADevice *isadev = isa_vga_init(isa_bus);
+        dev = isadev ? &isadev->qdev : NULL;
     }
-
     return dev;
 }
 
commit f2898771435701df33145cacabeb42c6aa3a9a16
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Sat Sep 8 12:23:54 2012 +0200

    sun/sun4u: use the new pci_vga_init() function
    
    As a bonus it allows new vga card types (including none).
    
    Acked-by: Blue Swirl <blauwirbel at gmail.com>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/hw/sun4u.c b/hw/sun4u.c
index cca090f..137a7c6 100644
--- a/hw/sun4u.c
+++ b/hw/sun4u.c
@@ -39,7 +39,6 @@
 #include "elf.h"
 #include "blockdev.h"
 #include "exec-memory.h"
-#include "vga-pci.h"
 
 //#define DEBUG_IRQ
 //#define DEBUG_EBUS
@@ -821,7 +820,7 @@ static void sun4uv_init(MemoryRegion *address_space_mem,
     ivec_irqs = qemu_allocate_irqs(cpu_set_ivec_irq, env, IVEC_MAX);
     pci_bus = pci_apb_init(APB_SPECIAL_BASE, APB_MEM_BASE, ivec_irqs, &pci_bus2,
                            &pci_bus3, &pbm_irqs);
-    pci_std_vga_init(pci_bus);
+    pci_vga_init(pci_bus);
 
     // XXX Should be pci_bus3
     isa_bus = pci_ebus_init(pci_bus, -1, pbm_irqs);
commit 1ddcae82a2509668b94a13e84921bdcafddcdfff
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Sat Sep 8 12:40:45 2012 +0200

    ppc/pSeries: use the new pci_vga_init() function
    
    Keep the case to prevent some vga card to be selected.
    
    Cc: Alexander Graf <agraf at suse.de>
    Cc: David Gibson <david at gibson.dropbear.id.au>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/hw/spapr.c b/hw/spapr.c
index 80735d6..8b0c390 100644
--- a/hw/spapr.c
+++ b/hw/spapr.c
@@ -46,7 +46,6 @@
 #include "kvm.h"
 #include "kvm_ppc.h"
 #include "pci.h"
-#include "vga-pci.h"
 
 #include "exec-memory.h"
 #include "hw/usb.h"
@@ -589,11 +588,9 @@ static void spapr_cpu_reset(void *opaque)
 static int spapr_vga_init(PCIBus *pci_bus)
 {
     switch (vga_interface_type) {
-    case VGA_STD:
-        pci_std_vga_init(pci_bus);
-        return 1;
     case VGA_NONE:
-        return 0;
+    case VGA_STD:
+        return pci_vga_init(pci_bus) != NULL;
     default:
         fprintf(stderr, "This vga model is not supported,"
                 "currently it only supports -vga std\n");
commit 95fa01fab0082483c1849f1439303db4183c8dd4
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Sat Sep 8 12:22:44 2012 +0200

    ppc/prep: use the new pci_vga_init() function
    
    As a bonus it allows new vga card types (including none).
    
    Acked-by: Andreas Färber <andreas.faerber at web.de>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/hw/ppc_prep.c b/hw/ppc_prep.c
index 1fa7609..1544430 100644
--- a/hw/ppc_prep.c
+++ b/hw/ppc_prep.c
@@ -39,7 +39,6 @@
 #include "blockdev.h"
 #include "arch_init.h"
 #include "exec-memory.h"
-#include "vga-pci.h"
 
 //#define HARD_DEBUG_PPC_IO
 //#define DEBUG_PPC_IO
@@ -611,7 +610,7 @@ static void ppc_prep_init (ram_addr_t ram_size,
     memory_region_add_subregion(sysmem, 0x80000000, PPC_io_memory);
 
     /* init basic PC hardware */
-    pci_std_vga_init(pci_bus);
+    pci_vga_init(pci_bus);
 
     if (serial_hds[0])
         serial_isa_init(isa_bus, 0, serial_hds[0]);
commit 3e20ad3a9b674abc3464504d59bba46304316136
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Sat Sep 8 12:21:20 2012 +0200

    ppc/oldworld: use the new pci_vga_init() function
    
    As a bonus it allows new vga card types (including none).
    
    Cc: Alexander Graf <agraf at suse.de>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/hw/ppc_oldworld.c b/hw/ppc_oldworld.c
index 8267eb4..2c4a478 100644
--- a/hw/ppc_oldworld.c
+++ b/hw/ppc_oldworld.c
@@ -43,7 +43,6 @@
 #include "kvm_ppc.h"
 #include "blockdev.h"
 #include "exec-memory.h"
-#include "vga-pci.h"
 
 #define MAX_IDE_BUS 2
 #define CFG_ADDR 0xf0000510
@@ -250,7 +249,7 @@ static void ppc_heathrow_init (ram_addr_t ram_size,
     pci_bus = pci_grackle_init(0xfec00000, pic,
                                get_system_memory(),
                                get_system_io());
-    pci_std_vga_init(pci_bus);
+    pci_vga_init(pci_bus);
 
     escc_mem = escc_init(0, pic[0x0f], pic[0x10], serial_hds[0],
                                serial_hds[1], ESCC_CLOCK, 4);
commit e7a2e96df0692e19f253476027a565dc7c11da76
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Sat Sep 8 12:19:38 2012 +0200

    ppc/newworld: use the new pci_vga_init() function
    
    As a bonus it allows new vga card types (including none).
    
    Cc: Alexander Graf <agraf at suse.de>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/hw/ppc_newworld.c b/hw/ppc_newworld.c
index 84af948..b8d3c9c 100644
--- a/hw/ppc_newworld.c
+++ b/hw/ppc_newworld.c
@@ -67,7 +67,6 @@
 #include "hw/usb.h"
 #include "blockdev.h"
 #include "exec-memory.h"
-#include "vga-pci.h"
 
 #define MAX_IDE_BUS 2
 #define CFG_ADDR 0xf0000510
@@ -330,7 +329,7 @@ static void ppc_core99_init (ram_addr_t ram_size,
         machine_arch = ARCH_MAC99;
     }
     /* init basic PC hardware */
-    pci_std_vga_init(pci_bus);
+    pci_vga_init(pci_bus);
 
     escc_mem = escc_init(0, pic[0x25], pic[0x24],
                          serial_hds[0], serial_hds[1], ESCC_CLOCK, 4);
commit 606f90cc22094bab743c84d40a11f6ded603ad34
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Sat Sep 8 12:16:28 2012 +0200

    alpha: use the new pci_vga_init() function
    
    This remove the fallback to std-vga in case, as availability of the
    requested vga device is now tested in vl.c, and returns an error message
    to the user.
    
    Acked-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/hw/alpha_dp264.c b/hw/alpha_dp264.c
index 9eb939f..5ea04c7 100644
--- a/hw/alpha_dp264.c
+++ b/hw/alpha_dp264.c
@@ -77,7 +77,7 @@ static void clipper_init(ram_addr_t ram_size,
     isa_create_simple(isa_bus, "i8042");
 
     /* VGA setup.  Don't bother loading the bios.  */
-    alpha_pci_vga_setup(pci_bus);
+    pci_vga_init(pci_bus);
 
     /* Serial code setup.  */
     for (i = 0; i < MAX_SERIAL_PORTS; ++i) {
diff --git a/hw/alpha_pci.c b/hw/alpha_pci.c
index 0352e72..8079a46 100644
--- a/hw/alpha_pci.c
+++ b/hw/alpha_pci.c
@@ -10,8 +10,6 @@
 #include "alpha_sys.h"
 #include "qemu-log.h"
 #include "sysemu.h"
-#include "vmware_vga.h"
-#include "vga-pci.h"
 
 
 /* PCI IO reads/writes, to byte-word addressable memory.  */
@@ -109,25 +107,3 @@ const MemoryRegionOps alpha_pci_iack_ops = {
         .max_access_size = 4,
     },
 };
-
-void alpha_pci_vga_setup(PCIBus *pci_bus)
-{
-    switch (vga_interface_type) {
-#ifdef CONFIG_SPICE
-    case VGA_QXL:
-        pci_create_simple(pci_bus, -1, "qxl-vga");
-        return;
-#endif
-    case VGA_CIRRUS:
-        pci_cirrus_vga_init(pci_bus);
-        return;
-    case VGA_VMWARE:
-        pci_vmsvga_init(pci_bus);
-        return;
-    }
-    /* If VGA is enabled at all, and one of the above didn't work, then
-       fallback to Standard VGA.  */
-    if (vga_interface_type != VGA_NONE) {
-        pci_std_vga_init(pci_bus);
-    }
-}
diff --git a/hw/alpha_sys.h b/hw/alpha_sys.h
index de40f8b..7604d09 100644
--- a/hw/alpha_sys.h
+++ b/hw/alpha_sys.h
@@ -19,6 +19,4 @@ extern const MemoryRegionOps alpha_pci_bw_io_ops;
 extern const MemoryRegionOps alpha_pci_conf1_ops;
 extern const MemoryRegionOps alpha_pci_iack_ops;
 
-void alpha_pci_vga_setup(PCIBus *pci_bus);
-
 #endif
commit 9c59864d16d720184e723c0c29c505c34e94fed5
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Sat Sep 8 11:53:12 2012 +0200

    mips/malta: use the new pci_vga_init() function
    
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/hw/mips_malta.c b/hw/mips_malta.c
index c39dee5..632b466 100644
--- a/hw/mips_malta.c
+++ b/hw/mips_malta.c
@@ -33,7 +33,6 @@
 #include "mips.h"
 #include "mips_cpudevs.h"
 #include "pci.h"
-#include "vmware_vga.h"
 #include "qemu-char.h"
 #include "sysemu.h"
 #include "arch_init.h"
@@ -48,7 +47,6 @@
 #include "blockdev.h"
 #include "exec-memory.h"
 #include "sysbus.h"             /* SysBusDevice */
-#include "vga-pci.h"
 
 //#define DEBUG_BOARD_INIT
 
@@ -987,13 +985,7 @@ void mips_malta_init (ram_addr_t ram_size,
     network_init();
 
     /* Optional PCI video card */
-    if (cirrus_vga_enabled) {
-        pci_cirrus_vga_init(pci_bus);
-    } else if (vmsvga_enabled) {
-        pci_vmsvga_init(pci_bus);
-    } else if (std_vga_enabled) {
-        pci_std_vga_init(pci_bus);
-    }
+    pci_vga_init(pci_bus);
 }
 
 static int mips_malta_sysbus_device_init(SysBusDevice *sysbusdev)
commit 14e7a6456e1dba2c6499b3b05637fee9f553e6ce
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Sat Sep 8 16:58:57 2012 +0200

    isa: add a isa_vga_init() function
    
    This function create a ISA VGA device according to the value of
    vga_interface_type. It returns a ISADevice (and not a DeviceState).
    
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/hw/isa-bus.c b/hw/isa-bus.c
index 47c93d3..214f194 100644
--- a/hw/isa-bus.c
+++ b/hw/isa-bus.c
@@ -19,6 +19,7 @@
 #include "hw.h"
 #include "monitor.h"
 #include "sysbus.h"
+#include "sysemu.h"
 #include "isa.h"
 #include "exec-memory.h"
 
@@ -166,6 +167,25 @@ ISADevice *isa_create_simple(ISABus *bus, const char *name)
     return dev;
 }
 
+ISADevice *isa_vga_init(ISABus *bus)
+{
+    switch (vga_interface_type) {
+    case VGA_CIRRUS:
+        return isa_create_simple(bus, "isa-cirrus-vga");
+    case VGA_QXL:
+        fprintf(stderr, "%s: qxl: no PCI bus\n", __func__);
+        return NULL;
+    case VGA_STD:
+        return isa_create_simple(bus, "isa-vga");
+    case VGA_VMWARE:
+        fprintf(stderr, "%s: vmware_vga: no PCI bus\n", __func__);
+        return NULL;
+    case VGA_NONE:
+    default:
+        return NULL;
+    }
+}
+
 static void isabus_dev_print(Monitor *mon, DeviceState *dev, int indent)
 {
     ISADevice *d = ISA_DEVICE(dev);
diff --git a/hw/isa.h b/hw/isa.h
index dc97052..8fb498a 100644
--- a/hw/isa.h
+++ b/hw/isa.h
@@ -47,6 +47,8 @@ ISADevice *isa_create(ISABus *bus, const char *name);
 ISADevice *isa_try_create(ISABus *bus, const char *name);
 ISADevice *isa_create_simple(ISABus *bus, const char *name);
 
+ISADevice *isa_vga_init(ISABus *bus);
+
 /**
  * isa_register_ioport: Install an I/O port region on the ISA bus.
  *
commit 129d42fb8496de5c1896160fddab949784d4dea4
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Sat Sep 8 11:49:24 2012 +0200

    pci: add a pci_vga_init() function
    
    This function create a PCI VGA device according to the value of
    vga_interface_type. It returns a PCIDevice (and not a DeviceState).
    
    Cc: Anthony Liguori <aliguori at us.ibm.com>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/hw/pci.c b/hw/pci.c
index de4b448..2ca6ff6 100644
--- a/hw/pci.c
+++ b/hw/pci.c
@@ -1474,6 +1474,24 @@ PCIDevice *pci_nic_init_nofail(NICInfo *nd, const char *default_model,
     return res;
 }
 
+PCIDevice *pci_vga_init(PCIBus *bus)
+{
+    switch (vga_interface_type) {
+    case VGA_CIRRUS:
+        return pci_create_simple(bus, -1, "cirrus-vga");
+    case VGA_QXL:
+        return pci_create_simple(bus, -1, "qxl-vga");
+    case VGA_STD:
+        return pci_create_simple(bus, -1, "VGA");
+    case VGA_VMWARE:
+        return pci_create_simple(bus, -1, "vmware-svga");
+    case VGA_NONE:
+    default: /* Other non-PCI types. Checking for unsupported types is already
+                done in vl.c. */
+        return NULL;
+    }
+}
+
 /* Whether a given bus number is in range of the secondary
  * bus of the given bridge device. */
 static bool pci_secondary_bus_in_range(PCIDevice *dev, int bus_num)
diff --git a/hw/pci.h b/hw/pci.h
index 4b6ab3d..d50d26c 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -334,6 +334,9 @@ PCIDevice *pci_nic_init(NICInfo *nd, const char *default_model,
                         const char *default_devaddr);
 PCIDevice *pci_nic_init_nofail(NICInfo *nd, const char *default_model,
                                const char *default_devaddr);
+
+PCIDevice *pci_vga_init(PCIBus *bus);
+
 int pci_bus_num(PCIBus *s);
 void pci_for_each_device(PCIBus *bus, int bus_num,
                          void (*fn)(PCIBus *bus, PCIDevice *d, void *opaque),
commit 3605ded557eb2a6d06a942b952c8fac4c676b125
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Mon Sep 10 01:01:44 2012 +0200

    vl.c: default to std if cirrus is not available
    
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/vl.c b/vl.c
index 17a33a0..02dade1 100644
--- a/vl.c
+++ b/vl.c
@@ -3617,8 +3617,12 @@ int main(int argc, char **argv, char **envp)
         exit(1);
 
     /* If no default VGA is requested, the default is "none".  */
-    if (default_vga && cirrus_vga_available()) {
-        vga_model = "cirrus";
+    if (default_vga) {
+        if (cirrus_vga_available()) {
+            vga_model = "cirrus";
+        } else if (vga_available()) {
+            vga_model = "std";
+        }
     }
     select_vgahw(vga_model);
 
commit 879049a39724f91224f10c8d80b3cf70c9dc1c5e
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Sat Sep 8 12:01:06 2012 +0200

    vl.c: check for qxl availability
    
    Check for qxl availability in vl.c. This will allow to remove #ifdef
    CONFIG_SPICE .. #endif later in this series
    
    Cc: Anthony Liguori <aliguori at us.ibm.com>
    Cc: Gerd Hoffmann <kraxel at redhat.com>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/vl.c b/vl.c
index bf9cdf8..17a33a0 100644
--- a/vl.c
+++ b/vl.c
@@ -1714,6 +1714,11 @@ static bool vmware_vga_available(void)
     return object_class_by_name("vmware-svga");
 }
 
+static bool qxl_vga_available(void)
+{
+    return object_class_by_name("qxl-vga");
+}
+
 static void select_vgahw (const char *p)
 {
     const char *opts;
@@ -1743,7 +1748,12 @@ static void select_vgahw (const char *p)
     } else if (strstart(p, "xenfb", &opts)) {
         vga_interface_type = VGA_XENFB;
     } else if (strstart(p, "qxl", &opts)) {
-        vga_interface_type = VGA_QXL;
+        if (qxl_vga_available()) {
+            vga_interface_type = VGA_QXL;
+        } else {
+            fprintf(stderr, "Error: QXL VGA not available\n");
+            exit(0);
+        }
     } else if (!strstart(p, "none", &opts)) {
     invalid_vga:
         fprintf(stderr, "Unknown vga type: %s\n", p);
commit 36b7f27d2195a09dbfb7f08b2323d807c1ad90b0
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Sat Sep 8 16:01:20 2012 +0200

    vl.c: convert *vga_enabled functions to QOM
    
    And get rid of qdev_exists().
    
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/hw/qdev.c b/hw/qdev.c
index 3b5ce33..adfc4a7 100644
--- a/hw/qdev.c
+++ b/hw/qdev.c
@@ -52,11 +52,6 @@ const char *qdev_fw_name(DeviceState *dev)
     return object_get_typename(OBJECT(dev));
 }
 
-bool qdev_exists(const char *name)
-{
-    return !!object_class_by_name(name);
-}
-
 static void qdev_property_add_legacy(DeviceState *dev, Property *prop,
                                      Error **errp);
 
diff --git a/hw/qdev.h b/hw/qdev.h
index d699194..c6ac636 100644
--- a/hw/qdev.h
+++ b/hw/qdev.h
@@ -153,7 +153,6 @@ typedef struct GlobalProperty {
 
 DeviceState *qdev_create(BusState *bus, const char *name);
 DeviceState *qdev_try_create(BusState *bus, const char *name);
-bool qdev_exists(const char *name);
 int qdev_device_help(QemuOpts *opts);
 DeviceState *qdev_device_add(QemuOpts *opts);
 int qdev_init(DeviceState *dev) QEMU_WARN_UNUSED_RESULT;
diff --git a/vl.c b/vl.c
index 8d305ca..bf9cdf8 100644
--- a/vl.c
+++ b/vl.c
@@ -1700,17 +1700,18 @@ static const QEMUOption qemu_options[] = {
 
 static bool vga_available(void)
 {
-    return qdev_exists("VGA") || qdev_exists("isa-vga");
+    return object_class_by_name("VGA") || object_class_by_name("isa-vga");
 }
 
 static bool cirrus_vga_available(void)
 {
-    return qdev_exists("cirrus-vga") || qdev_exists("isa-cirrus-vga");
+    return object_class_by_name("cirrus-vga")
+           || object_class_by_name("isa-cirrus-vga");
 }
 
 static bool vmware_vga_available(void)
 {
-    return qdev_exists("vmware-svga");
+    return object_class_by_name("vmware-svga");
 }
 
 static void select_vgahw (const char *p)
commit 6405c86412719a0ccd1a09b81baf0b78e8a4ae6b
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Sat Sep 8 16:40:56 2012 +0200

    vga: rename isa_vga_init() to isa_std_vga_init()
    
    This better explains what is this function about. Adjust all callers.
    
    Cc: Anthony Liguori <aliguori at us.ibm.com>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/hw/mips_r4k.c b/hw/mips_r4k.c
index 967a76e..2115f7f 100644
--- a/hw/mips_r4k.c
+++ b/hw/mips_r4k.c
@@ -281,7 +281,7 @@ void mips_r4k_init (ram_addr_t ram_size,
         }
     }
 
-    isa_vga_init(isa_bus);
+    isa_std_vga_init(isa_bus);
 
     if (nd_table[0].used)
         isa_ne2000_init(isa_bus, 0x300, 9, &nd_table[0]);
diff --git a/hw/pc.c b/hw/pc.c
index 33fee62..78805f6 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -1016,7 +1016,7 @@ DeviceState *pc_vga_init(ISABus *isa_bus, PCIBus *pci_bus)
         if (pci_bus) {
             dev = pci_std_vga_init(pci_bus);
         } else {
-            dev = isa_vga_init(isa_bus);
+            dev = isa_std_vga_init(isa_bus);
         }
     }
 
diff --git a/hw/pc.h b/hw/pc.h
index e4db071..77c38d6 100644
--- a/hw/pc.h
+++ b/hw/pc.h
@@ -176,7 +176,7 @@ enum vga_retrace_method {
 
 extern enum vga_retrace_method vga_retrace_method;
 
-static inline DeviceState *isa_vga_init(ISABus *bus)
+static inline DeviceState *isa_std_vga_init(ISABus *bus)
 {
     ISADevice *dev;
 
commit a1e472119aa2efa88a2a24b1aa50e45ea8cc8f31
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Sat Sep 8 11:38:41 2012 +0200

    vga: rename pci_vga_init() into pci_std_vga_init()
    
    This better explains what is this function about. Adjust all callers.
    
    Cc: Alexander Graf <agraf at suse.de>
    Cc: Andreas Färber <andreas.faerber at web.de>
    Cc: David Gibson <david at gibson.dropbear.id.au>
    Cc: Anthony Liguori <aliguori at us.ibm.com>
    Acked-by: Richard Henderson <rth at twiddle.net>
    Acked-by: Blue Swirl <blauwirbel at gmail.com>
    Acked-by: Andreas Färber <andreas.faerber at web.de>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/hw/alpha_pci.c b/hw/alpha_pci.c
index ea546f8..0352e72 100644
--- a/hw/alpha_pci.c
+++ b/hw/alpha_pci.c
@@ -128,6 +128,6 @@ void alpha_pci_vga_setup(PCIBus *pci_bus)
     /* If VGA is enabled at all, and one of the above didn't work, then
        fallback to Standard VGA.  */
     if (vga_interface_type != VGA_NONE) {
-        pci_vga_init(pci_bus);
+        pci_std_vga_init(pci_bus);
     }
 }
diff --git a/hw/mips_malta.c b/hw/mips_malta.c
index ad23f26..c39dee5 100644
--- a/hw/mips_malta.c
+++ b/hw/mips_malta.c
@@ -992,7 +992,7 @@ void mips_malta_init (ram_addr_t ram_size,
     } else if (vmsvga_enabled) {
         pci_vmsvga_init(pci_bus);
     } else if (std_vga_enabled) {
-        pci_vga_init(pci_bus);
+        pci_std_vga_init(pci_bus);
     }
 }
 
diff --git a/hw/pc.c b/hw/pc.c
index 7e7e0e2..33fee62 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -1014,7 +1014,7 @@ DeviceState *pc_vga_init(ISABus *isa_bus, PCIBus *pci_bus)
 #endif
     } else if (std_vga_enabled) {
         if (pci_bus) {
-            dev = pci_vga_init(pci_bus);
+            dev = pci_std_vga_init(pci_bus);
         } else {
             dev = isa_vga_init(isa_bus);
         }
diff --git a/hw/ppc_newworld.c b/hw/ppc_newworld.c
index e95cfe8..84af948 100644
--- a/hw/ppc_newworld.c
+++ b/hw/ppc_newworld.c
@@ -330,7 +330,7 @@ static void ppc_core99_init (ram_addr_t ram_size,
         machine_arch = ARCH_MAC99;
     }
     /* init basic PC hardware */
-    pci_vga_init(pci_bus);
+    pci_std_vga_init(pci_bus);
 
     escc_mem = escc_init(0, pic[0x25], pic[0x24],
                          serial_hds[0], serial_hds[1], ESCC_CLOCK, 4);
diff --git a/hw/ppc_oldworld.c b/hw/ppc_oldworld.c
index 1dcd8a6..8267eb4 100644
--- a/hw/ppc_oldworld.c
+++ b/hw/ppc_oldworld.c
@@ -250,7 +250,7 @@ static void ppc_heathrow_init (ram_addr_t ram_size,
     pci_bus = pci_grackle_init(0xfec00000, pic,
                                get_system_memory(),
                                get_system_io());
-    pci_vga_init(pci_bus);
+    pci_std_vga_init(pci_bus);
 
     escc_mem = escc_init(0, pic[0x0f], pic[0x10], serial_hds[0],
                                serial_hds[1], ESCC_CLOCK, 4);
diff --git a/hw/ppc_prep.c b/hw/ppc_prep.c
index 592b7b2..1fa7609 100644
--- a/hw/ppc_prep.c
+++ b/hw/ppc_prep.c
@@ -611,7 +611,7 @@ static void ppc_prep_init (ram_addr_t ram_size,
     memory_region_add_subregion(sysmem, 0x80000000, PPC_io_memory);
 
     /* init basic PC hardware */
-    pci_vga_init(pci_bus);
+    pci_std_vga_init(pci_bus);
 
     if (serial_hds[0])
         serial_isa_init(isa_bus, 0, serial_hds[0]);
diff --git a/hw/spapr.c b/hw/spapr.c
index c34b767..80735d6 100644
--- a/hw/spapr.c
+++ b/hw/spapr.c
@@ -590,7 +590,7 @@ static int spapr_vga_init(PCIBus *pci_bus)
 {
     switch (vga_interface_type) {
     case VGA_STD:
-        pci_vga_init(pci_bus);
+        pci_std_vga_init(pci_bus);
         return 1;
     case VGA_NONE:
         return 0;
diff --git a/hw/sun4u.c b/hw/sun4u.c
index 07cd042..cca090f 100644
--- a/hw/sun4u.c
+++ b/hw/sun4u.c
@@ -821,7 +821,7 @@ static void sun4uv_init(MemoryRegion *address_space_mem,
     ivec_irqs = qemu_allocate_irqs(cpu_set_ivec_irq, env, IVEC_MAX);
     pci_bus = pci_apb_init(APB_SPECIAL_BASE, APB_MEM_BASE, ivec_irqs, &pci_bus2,
                            &pci_bus3, &pbm_irqs);
-    pci_vga_init(pci_bus);
+    pci_std_vga_init(pci_bus);
 
     // XXX Should be pci_bus3
     isa_bus = pci_ebus_init(pci_bus, -1, pbm_irqs);
diff --git a/hw/vga-pci.c b/hw/vga-pci.c
index 9abbada..992ffd9 100644
--- a/hw/vga-pci.c
+++ b/hw/vga-pci.c
@@ -47,7 +47,7 @@ static const VMStateDescription vmstate_vga_pci = {
     }
 };
 
-static int pci_vga_initfn(PCIDevice *dev)
+static int pci_std_vga_initfn(PCIDevice *dev)
 {
      PCIVGAState *d = DO_UPCAST(PCIVGAState, dev, dev);
      VGACommonState *s = &d->vga;
@@ -70,7 +70,7 @@ static int pci_vga_initfn(PCIDevice *dev)
      return 0;
 }
 
-DeviceState *pci_vga_init(PCIBus *bus)
+DeviceState *pci_std_vga_init(PCIBus *bus)
 {
     return &pci_create_simple(bus, -1, "VGA")->qdev;
 }
@@ -86,7 +86,7 @@ static void vga_class_init(ObjectClass *klass, void *data)
     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
 
     k->no_hotplug = 1;
-    k->init = pci_vga_initfn;
+    k->init = pci_std_vga_initfn;
     k->romfile = "vgabios-stdvga.bin";
     k->vendor_id = PCI_VENDOR_ID_QEMU;
     k->device_id = PCI_DEVICE_ID_QEMU_VGA;
diff --git a/hw/vga-pci.h b/hw/vga-pci.h
index 49abf13..d111cdc 100644
--- a/hw/vga-pci.h
+++ b/hw/vga-pci.h
@@ -4,7 +4,7 @@
 #include "qemu-common.h"
 
 /* vga-pci.c */
-DeviceState *pci_vga_init(PCIBus *bus);
+DeviceState *pci_std_vga_init(PCIBus *bus);
 
 /* cirrus_vga.c */
 DeviceState *pci_cirrus_vga_init(PCIBus *bus);
commit 626cd050e2c9094c1b005bc39cab637f8cbe3755
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Mon Oct 1 21:00:43 2012 +0200

    tcg: remove obsolete jmp op
    
    The TCG jmp operation doesn't really make sense in the QEMU context, it
    is unused, it is not implemented by some targets, and it is wrongly
    implemented by some others.
    
    This patch simply removes it.
    
    Reviewed-by: Richard Henderson <rth at twiddle.net>
    Acked-by: Blue Swirl <blauwirbel at gmail.com>
    Acked-by: Stefan Weil<sw at weilnetz.de>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/tcg/README b/tcg/README
index 27846f1..aa86992 100644
--- a/tcg/README
+++ b/tcg/README
@@ -88,8 +88,7 @@ supported.
 
 * Branches:
 
-Use the instruction 'br' to jump to a label. Use 'jmp' to jump to an
-explicit address. Conditional branches can only jump to labels.
+Use the instruction 'br' to jump to a label.
 
 3.3) Code Optimizations
 
@@ -129,10 +128,6 @@ call function 'ptr' (pointer type)
 
 ********* Jumps/Labels
 
-* jmp t0
-
-Absolute jump to address t0 (pointer type).
-
 * set_label $label
 
 Define label 'label' at the current program point.
diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c
index 2bad0a2..1e61864 100644
--- a/tcg/arm/tcg-target.c
+++ b/tcg/arm/tcg-target.c
@@ -1530,12 +1530,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         else
             tcg_out_callr(s, COND_AL, args[0]);
         break;
-    case INDEX_op_jmp:
-        if (const_args[0])
-            tcg_out_goto(s, COND_AL, args[0]);
-        else
-            tcg_out_bx(s, COND_AL, args[0]);
-        break;
     case INDEX_op_br:
         tcg_out_goto_label(s, COND_AL, args[0]);
         break;
@@ -1769,7 +1763,6 @@ static const TCGTargetOpDef arm_op_defs[] = {
     { INDEX_op_exit_tb, { } },
     { INDEX_op_goto_tb, { } },
     { INDEX_op_call, { "ri" } },
-    { INDEX_op_jmp, { "ri" } },
     { INDEX_op_br, { } },
 
     { INDEX_op_mov_i32, { "r", "r" } },
diff --git a/tcg/hppa/tcg-target.c b/tcg/hppa/tcg-target.c
index 2c79c10..44974c4 100644
--- a/tcg/hppa/tcg-target.c
+++ b/tcg/hppa/tcg-target.c
@@ -1353,11 +1353,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
         }
         break;
 
-    case INDEX_op_jmp:
-        fprintf(stderr, "unimplemented jmp\n");
-        tcg_abort();
-        break;
-
     case INDEX_op_br:
         tcg_out_branch(s, args[0], 1);
         break;
@@ -1592,7 +1587,6 @@ static const TCGTargetOpDef hppa_op_defs[] = {
     { INDEX_op_goto_tb, { } },
 
     { INDEX_op_call, { "ri" } },
-    { INDEX_op_jmp, { "r" } },
     { INDEX_op_br, { } },
 
     { INDEX_op_mov_i32, { "r", "r" } },
diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index 0e218c8..bb2306d 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -1513,14 +1513,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
             tcg_out_modrm(s, OPC_GRP5, EXT5_CALLN_Ev, args[0]);
         }
         break;
-    case INDEX_op_jmp:
-        if (const_args[0]) {
-            tcg_out_jmp(s, args[0]);
-        } else {
-            /* jmp *reg */
-            tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, args[0]);
-        }
-        break;
     case INDEX_op_br:
         tcg_out_jxx(s, JCC_JMP, args[0], 0);
         break;
@@ -1848,7 +1840,6 @@ static const TCGTargetOpDef x86_op_defs[] = {
     { INDEX_op_exit_tb, { } },
     { INDEX_op_goto_tb, { } },
     { INDEX_op_call, { "ri" } },
-    { INDEX_op_jmp, { "ri" } },
     { INDEX_op_br, { } },
     { INDEX_op_mov_i32, { "r", "r" } },
     { INDEX_op_movi_i32, { "r" } },
diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
index dc9c12c..705712f 100644
--- a/tcg/ia64/tcg-target.c
+++ b/tcg/ia64/tcg-target.c
@@ -1916,9 +1916,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
     case INDEX_op_goto_tb:
         tcg_out_goto_tb(s, args[0]);
         break;
-    case INDEX_op_jmp:
-        tcg_out_jmp(s, args[0]);
-        break;
 
     case INDEX_op_movi_i32:
         tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1]);
@@ -2156,7 +2153,6 @@ static const TCGTargetOpDef ia64_op_defs[] = {
     { INDEX_op_call, { "r" } },
     { INDEX_op_exit_tb, { } },
     { INDEX_op_goto_tb, { } },
-    { INDEX_op_jmp, { "r" } },
 
     { INDEX_op_mov_i32, { "r", "r" } },
     { INDEX_op_movi_i32, { "r" } },
diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c
index 34e3e7f..7e4013e 100644
--- a/tcg/mips/tcg-target.c
+++ b/tcg/mips/tcg-target.c
@@ -1322,10 +1322,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         tcg_out_opc_reg(s, OPC_JALR, TCG_REG_RA, args[0], 0);
         tcg_out_nop(s);
         break;
-    case INDEX_op_jmp:
-        tcg_out_opc_reg(s, OPC_JR, 0, args[0], 0);
-        tcg_out_nop(s);
-        break;
     case INDEX_op_br:
         tcg_out_brcond(s, TCG_COND_EQ, TCG_REG_ZERO, TCG_REG_ZERO, args[0]);
         break;
@@ -1577,7 +1573,6 @@ static const TCGTargetOpDef mips_op_defs[] = {
     { INDEX_op_exit_tb, { } },
     { INDEX_op_goto_tb, { } },
     { INDEX_op_call, { "C" } },
-    { INDEX_op_jmp, { "r" } },
     { INDEX_op_br, { } },
 
     { INDEX_op_mov_i32, { "r", "r" } },
diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c
index 90c275d..56baedd 100644
--- a/tcg/ppc/tcg-target.c
+++ b/tcg/ppc/tcg-target.c
@@ -1417,15 +1417,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
     case INDEX_op_call:
         tcg_out_call (s, args[0], const_args[0]);
         break;
-    case INDEX_op_jmp:
-        if (const_args[0]) {
-            tcg_out_b (s, 0, args[0]);
-        }
-        else {
-            tcg_out32 (s, MTSPR | RS (args[0]) | CTR);
-            tcg_out32 (s, BCCTR | BO_ALWAYS);
-        }
-        break;
     case INDEX_op_movi_i32:
         tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1]);
         break;
@@ -1904,7 +1895,6 @@ static const TCGTargetOpDef ppc_op_defs[] = {
     { INDEX_op_exit_tb, { } },
     { INDEX_op_goto_tb, { } },
     { INDEX_op_call, { "ri" } },
-    { INDEX_op_jmp, { "ri" } },
     { INDEX_op_br, { } },
 
     { INDEX_op_mov_i32, { "r", "r" } },
diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
index 19944bc..6e9b363 100644
--- a/tcg/ppc64/tcg-target.c
+++ b/tcg/ppc64/tcg-target.c
@@ -1245,15 +1245,6 @@ static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args,
     case INDEX_op_call:
         tcg_out_call (s, args[0], const_args[0]);
         break;
-    case INDEX_op_jmp:
-        if (const_args[0]) {
-            tcg_out_b (s, 0, args[0]);
-        }
-        else {
-            tcg_out32 (s, MTSPR | RS (args[0]) | CTR);
-            tcg_out32 (s, BCCTR | BO_ALWAYS);
-        }
-        break;
     case INDEX_op_movi_i32:
         tcg_out_movi (s, TCG_TYPE_I32, args[0], args[1]);
         break;
@@ -1588,7 +1579,6 @@ static const TCGTargetOpDef ppc_op_defs[] = {
     { INDEX_op_exit_tb, { } },
     { INDEX_op_goto_tb, { } },
     { INDEX_op_call, { "ri" } },
-    { INDEX_op_jmp, { "ri" } },
     { INDEX_op_br, { } },
 
     { INDEX_op_mov_i32, { "r", "r" } },
diff --git a/tcg/s390/tcg-target.c b/tcg/s390/tcg-target.c
index 3b90605..4b43059 100644
--- a/tcg/s390/tcg-target.c
+++ b/tcg/s390/tcg-target.c
@@ -2037,11 +2037,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         break;
 #endif /* TCG_TARGET_REG_BITS == 64 */
 
-    case INDEX_op_jmp:
-        /* This one is obsolete and never emitted.  */
-        tcg_abort();
-        break;
-
     default:
         fprintf(stderr,"unimplemented opc 0x%x\n",opc);
         tcg_abort();
@@ -2052,7 +2047,6 @@ static const TCGTargetOpDef s390_op_defs[] = {
     { INDEX_op_exit_tb, { } },
     { INDEX_op_goto_tb, { } },
     { INDEX_op_call, { "ri" } },
-    { INDEX_op_jmp, { "ri" } },
     { INDEX_op_br, { } },
 
     { INDEX_op_mov_i32, { "r", "r" } },
diff --git a/tcg/sparc/tcg-target.c b/tcg/sparc/tcg-target.c
index 8fd7f86..e82fab2 100644
--- a/tcg/sparc/tcg-target.c
+++ b/tcg/sparc/tcg-target.c
@@ -1097,7 +1097,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
         /* delay slot */
         tcg_out_nop(s);
         break;
-    case INDEX_op_jmp:
     case INDEX_op_br:
         tcg_out_branch_i32(s, COND_A, args[0]);
         tcg_out_nop(s);
@@ -1367,7 +1366,6 @@ static const TCGTargetOpDef sparc_op_defs[] = {
     { INDEX_op_exit_tb, { } },
     { INDEX_op_goto_tb, { } },
     { INDEX_op_call, { "ri" } },
-    { INDEX_op_jmp, { "ri" } },
     { INDEX_op_br, { } },
 
     { INDEX_op_mov_i32, { "r", "r" } },
diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h
index dbb0e39..04cb7ca 100644
--- a/tcg/tcg-opc.h
+++ b/tcg/tcg-opc.h
@@ -38,7 +38,6 @@ DEF(discard, 1, 0, 0, 0)
 
 DEF(set_label, 0, 0, 1, TCG_OPF_BB_END)
 DEF(call, 0, 1, 2, TCG_OPF_SIDE_EFFECTS) /* variable number of parameters */
-DEF(jmp, 0, 1, 0, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
 DEF(br, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
 
 #define IMPL(X) (X ? 0 : TCG_OPF_NOT_PRESENT)
diff --git a/tcg/tci/tcg-target.c b/tcg/tci/tcg-target.c
index d272a90..e930740 100644
--- a/tcg/tci/tcg-target.c
+++ b/tcg/tci/tcg-target.c
@@ -69,7 +69,6 @@ static const TCGTargetOpDef tcg_target_op_defs[] = {
     { INDEX_op_exit_tb, { NULL } },
     { INDEX_op_goto_tb, { NULL } },
     { INDEX_op_call, { RI } },
-    { INDEX_op_jmp, { RI } },
     { INDEX_op_br, { NULL } },
 
     { INDEX_op_mov_i32, { R, R } },
@@ -583,9 +582,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
     case INDEX_op_call:
         tcg_out_ri(s, const_args[0], args[0]);
         break;
-    case INDEX_op_jmp:
-        TODO();
-        break;
     case INDEX_op_setcond_i32:
         tcg_out_r(s, args[0]);
         tcg_out_r(s, args[1]);
diff --git a/tci.c b/tci.c
index a4f7b78..98f5f71 100644
--- a/tci.c
+++ b/tci.c
@@ -505,7 +505,6 @@ tcg_target_ulong tcg_qemu_tb_exec(CPUArchState *cpustate, uint8_t *tb_ptr)
             tci_write_reg(TCG_REG_R0, tmp64);
 #endif
             break;
-        case INDEX_op_jmp:
         case INDEX_op_br:
             label = tci_read_label(&tb_ptr);
             assert(tb_ptr == old_code_ptr + op_size);
commit f783cb22409c6537b3cab7e78e527f62b4237d1e
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Mon Oct 1 21:00:42 2012 +0200

    target-xtensa: de-optimize EXTUI
    
    Now that "and" with 0xff, 0xffff and 0xffffffff and "shr" with 0 shift
    are optimized in tcg/tcg-op.h there is no need to do it in
    target-xtensa/translate.c.
    
    Acked-by: Max Filippov <jcmvbkbc at gmail.com>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/target-xtensa/translate.c b/target-xtensa/translate.c
index b9acd70..82e8ccc 100644
--- a/target-xtensa/translate.c
+++ b/target-xtensa/translate.c
@@ -1829,26 +1829,8 @@ static void disas_xtensa_insn(DisasContext *dc)
                 int maskimm = (1 << (OP2 + 1)) - 1;
 
                 TCGv_i32 tmp = tcg_temp_new_i32();
-
-                if (shiftimm) {
-                    tcg_gen_shri_i32(tmp, cpu_R[RRR_T], shiftimm);
-                } else {
-                    tcg_gen_mov_i32(tmp, cpu_R[RRR_T]);
-                }
-
-                switch (maskimm) {
-                case 0xff:
-                    tcg_gen_ext8u_i32(cpu_R[RRR_R], tmp);
-                    break;
-
-                case 0xffff:
-                    tcg_gen_ext16u_i32(cpu_R[RRR_R], tmp);
-                    break;
-
-                default:
-                    tcg_gen_andi_i32(cpu_R[RRR_R], tmp, maskimm);
-                    break;
-                }
+                tcg_gen_shri_i32(tmp, cpu_R[RRR_T], shiftimm);
+                tcg_gen_andi_i32(cpu_R[RRR_R], tmp, maskimm);
                 tcg_temp_free(tmp);
             }
             break;
commit c9159fe9aa9abe24115ea4d16127179e9cb07e22
Author: Stefan Weil <sw at weilnetz.de>
Date:   Fri Oct 5 19:39:33 2012 +0200

    Remove libhw
    
    The entries for libhw* are no longer needed in .gitignore.
    
    There is also no longer a difference between common-obj-y and
    hw-obj-y, so one of those two macros is sufficient.
    
    Signed-off-by: Stefan Weil <sw at weilnetz.de>
    Signed-off-by: Anthony Liguori <aliguori at us.ibm.com>

diff --git a/.gitignore b/.gitignore
index 3ef77d0..bd6ba1c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -12,9 +12,6 @@ trace-dtrace.dtrace
 *-linux-user
 *-bsd-user
 libdis*
-libhw
-libhw32
-libhw64
 libuser
 linux-headers/asm
 qapi-generated
diff --git a/Makefile b/Makefile
index 1cebe3a..a9c22bf 100644
--- a/Makefile
+++ b/Makefile
@@ -214,7 +214,7 @@ $(qga-obj-y) qemu-ga.o: $(QGALIB_GEN)
 
 qemu-ga$(EXESUF): qemu-ga.o $(qga-obj-y) $(tools-obj-y) $(qapi-obj-y) $(qobject-obj-y) $(version-obj-y)
 
-QEMULIBS=libhw libuser libdis libdis-user
+QEMULIBS=libuser libdis libdis-user
 
 clean:
 # avoid old build problems by removing potentially incorrect old files
diff --git a/Makefile.hw b/Makefile.hw
deleted file mode 100644
index 86f0bf4..0000000
--- a/Makefile.hw
+++ /dev/null
@@ -1,22 +0,0 @@
-# Makefile for qemu target independent devices.
-
-include ../config-host.mak
-include ../config-all-devices.mak
-include $(SRC_PATH)/rules.mak
-
-.PHONY: all
-
-$(call set-vpath, $(SRC_PATH))
-
-QEMU_CFLAGS+=-I..
-QEMU_CFLAGS += -I$(SRC_PATH)/include
-
-include $(SRC_PATH)/Makefile.objs
-
-all: $(hw-obj-y)
-# Dummy command so that make thinks it has done something
-	@true
-
-clean:
-	rm -f $(addsuffix *.o, $(sort $(dir $(hw-obj-y))))
-	rm -f $(addsuffix *.d, $(sort $(dir $(hw-obj-y))))
diff --git a/Makefile.objs b/Makefile.objs
index b1f3e22..74b3542 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -90,10 +90,13 @@ common-obj-y += hw/
 common-obj-y += ui/
 common-obj-y += bt-host.o bt-vhci.o
 
+common-obj-y += dma-helpers.o
 common-obj-y += iov.o acl.o
 common-obj-$(CONFIG_POSIX) += compatfd.o
 common-obj-y += notify.o event_notifier.o
 common-obj-y += qemu-timer.o qemu-timer-common.o
+common-obj-y += qtest.o
+common-obj-y += vl.o
 
 common-obj-$(CONFIG_SLIRP) += slirp/
 
@@ -116,11 +119,6 @@ user-obj-y += $(trace-obj-y)
 user-obj-y += qom/
 
 ######################################################################
-# libhw
-
-hw-obj-y = vl.o dma-helpers.o qtest.o hw/
-
-######################################################################
 # libdis
 # NOTE: the disassembler code is only needed for debugging
 
@@ -240,7 +238,6 @@ vl.o: QEMU_CFLAGS+=$(SDL_CFLAGS)
 QEMU_CFLAGS+=$(GLIB_CFLAGS)
 
 nested-vars += \
-	hw-obj-y \
 	qga-obj-y \
 	block-obj-y \
 	qom-obj-y \
diff --git a/Makefile.target b/Makefile.target
index 4449444..3822bc5 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -154,7 +154,6 @@ all-obj-y += $(addprefix ../, $(universal-obj-y))
 ifdef CONFIG_SOFTMMU
 all-obj-y += $(addprefix ../, $(common-obj-y))
 all-obj-y += $(addprefix ../libdis/, $(libdis-y))
-all-obj-y += $(addprefix $(HWDIR)/, $(hw-obj-y))
 all-obj-y += $(addprefix ../, $(trace-obj-y))
 else
 all-obj-y += $(addprefix ../libuser/, $(user-obj-y))
diff --git a/configure b/configure
index e58846d..73a5f3d 100755
--- a/configure
+++ b/configure
@@ -3914,8 +3914,6 @@ fi
 if test "$target_softmmu" = "yes" ; then
   echo "CONFIG_SOFTMMU=y" >> $config_target_mak
   echo "LIBS+=$libs_softmmu $target_libs_softmmu" >> $config_target_mak
-  echo "HWDIR=../libhw" >> $config_target_mak
-  echo "subdir-$target: subdir-libhw" >> $config_host_mak
   if test "$smartcard_nss" = "yes" ; then
     echo "subdir-$target: subdir-libcacard" >> $config_host_mak
   fi
@@ -4157,9 +4155,6 @@ for rom in seabios vgabios ; do
     echo "LD=$ld" >> $config_mak
 done
 
-d=libhw
-symlink "$source_path/Makefile.hw" "$d/Makefile"
-
 d=libuser
 symlink "$source_path/Makefile.user" "$d/Makefile"
 
diff --git a/hw/9pfs/Makefile.objs b/hw/9pfs/Makefile.objs
index 972df24..1e9b595 100644
--- a/hw/9pfs/Makefile.objs
+++ b/hw/9pfs/Makefile.objs
@@ -1,9 +1,9 @@
-hw-obj-y  = virtio-9p.o
-hw-obj-y += virtio-9p-local.o virtio-9p-xattr.o
-hw-obj-y += virtio-9p-xattr-user.o virtio-9p-posix-acl.o
-hw-obj-y += virtio-9p-coth.o cofs.o codir.o cofile.o
-hw-obj-y += coxattr.o virtio-9p-synth.o
-hw-obj-$(CONFIG_OPEN_BY_HANDLE) +=  virtio-9p-handle.o
-hw-obj-y += virtio-9p-proxy.o
+common-obj-y  = virtio-9p.o
+common-obj-y += virtio-9p-local.o virtio-9p-xattr.o
+common-obj-y += virtio-9p-xattr-user.o virtio-9p-posix-acl.o
+common-obj-y += virtio-9p-coth.o cofs.o codir.o cofile.o
+common-obj-y += coxattr.o virtio-9p-synth.o
+common-obj-$(CONFIG_OPEN_BY_HANDLE) +=  virtio-9p-handle.o
+common-obj-y += virtio-9p-proxy.o
 
 obj-y += virtio-9p-device.o
diff --git a/hw/Makefile.objs b/hw/Makefile.objs
index ecdbe44..b59c749 100644
--- a/hw/Makefile.objs
+++ b/hw/Makefile.objs
@@ -1,140 +1,140 @@
-hw-obj-y = usb/ ide/
-hw-obj-y += loader.o
-hw-obj-$(CONFIG_VIRTIO) += virtio-console.o
-hw-obj-$(CONFIG_VIRTIO_PCI) += virtio-pci.o
-hw-obj-y += fw_cfg.o
-hw-obj-$(CONFIG_PCI) += pci.o pci_bridge.o pci_bridge_dev.o
-hw-obj-$(CONFIG_PCI) += msix.o msi.o
-hw-obj-$(CONFIG_PCI) += shpc.o
-hw-obj-$(CONFIG_PCI) += slotid_cap.o
-hw-obj-$(CONFIG_PCI) += pci_host.o pcie_host.o
-hw-obj-$(CONFIG_PCI) += ioh3420.o xio3130_upstream.o xio3130_downstream.o
-hw-obj-y += watchdog.o
-hw-obj-$(CONFIG_ISA_MMIO) += isa_mmio.o
-hw-obj-$(CONFIG_ECC) += ecc.o
-hw-obj-$(CONFIG_NAND) += nand.o
-hw-obj-$(CONFIG_PFLASH_CFI01) += pflash_cfi01.o
-hw-obj-$(CONFIG_PFLASH_CFI02) += pflash_cfi02.o
-
-hw-obj-$(CONFIG_M48T59) += m48t59.o
-hw-obj-$(CONFIG_ESCC) += escc.o
-hw-obj-$(CONFIG_EMPTY_SLOT) += empty_slot.o
-
-hw-obj-$(CONFIG_SERIAL) += serial.o
-hw-obj-$(CONFIG_PARALLEL) += parallel.o
-hw-obj-$(CONFIG_I8254) += i8254_common.o i8254.o
-hw-obj-$(CONFIG_PCSPK) += pcspk.o
-hw-obj-$(CONFIG_PCKBD) += pckbd.o
-hw-obj-$(CONFIG_FDC) += fdc.o
-hw-obj-$(CONFIG_ACPI) += acpi.o acpi_piix4.o
-hw-obj-$(CONFIG_APM) += pm_smbus.o apm.o
-hw-obj-$(CONFIG_DMA) += dma.o
-hw-obj-$(CONFIG_I82374) += i82374.o
-hw-obj-$(CONFIG_HPET) += hpet.o
-hw-obj-$(CONFIG_APPLESMC) += applesmc.o
-hw-obj-$(CONFIG_SMARTCARD) += ccid-card-passthru.o
-hw-obj-$(CONFIG_SMARTCARD_NSS) += ccid-card-emulated.o
-hw-obj-$(CONFIG_I8259) += i8259_common.o i8259.o
+common-obj-y = usb/ ide/
+common-obj-y += loader.o
+common-obj-$(CONFIG_VIRTIO) += virtio-console.o
+common-obj-$(CONFIG_VIRTIO_PCI) += virtio-pci.o
+common-obj-y += fw_cfg.o
+common-obj-$(CONFIG_PCI) += pci.o pci_bridge.o pci_bridge_dev.o
+common-obj-$(CONFIG_PCI) += msix.o msi.o
+common-obj-$(CONFIG_PCI) += shpc.o
+common-obj-$(CONFIG_PCI) += slotid_cap.o
+common-obj-$(CONFIG_PCI) += pci_host.o pcie_host.o
+common-obj-$(CONFIG_PCI) += ioh3420.o xio3130_upstream.o xio3130_downstream.o
+common-obj-y += watchdog.o
+common-obj-$(CONFIG_ISA_MMIO) += isa_mmio.o
+common-obj-$(CONFIG_ECC) += ecc.o
+common-obj-$(CONFIG_NAND) += nand.o
+common-obj-$(CONFIG_PFLASH_CFI01) += pflash_cfi01.o
+common-obj-$(CONFIG_PFLASH_CFI02) += pflash_cfi02.o
+
+common-obj-$(CONFIG_M48T59) += m48t59.o
+common-obj-$(CONFIG_ESCC) += escc.o
+common-obj-$(CONFIG_EMPTY_SLOT) += empty_slot.o
+
+common-obj-$(CONFIG_SERIAL) += serial.o
+common-obj-$(CONFIG_PARALLEL) += parallel.o
+common-obj-$(CONFIG_I8254) += i8254_common.o i8254.o
+common-obj-$(CONFIG_PCSPK) += pcspk.o
+common-obj-$(CONFIG_PCKBD) += pckbd.o
+common-obj-$(CONFIG_FDC) += fdc.o
+common-obj-$(CONFIG_ACPI) += acpi.o acpi_piix4.o
+common-obj-$(CONFIG_APM) += pm_smbus.o apm.o
+common-obj-$(CONFIG_DMA) += dma.o
+common-obj-$(CONFIG_I82374) += i82374.o
+common-obj-$(CONFIG_HPET) += hpet.o
+common-obj-$(CONFIG_APPLESMC) += applesmc.o
+common-obj-$(CONFIG_SMARTCARD) += ccid-card-passthru.o
+common-obj-$(CONFIG_SMARTCARD_NSS) += ccid-card-emulated.o
+common-obj-$(CONFIG_I8259) += i8259_common.o i8259.o
 
 # PPC devices
-hw-obj-$(CONFIG_PREP_PCI) += prep_pci.o
-hw-obj-$(CONFIG_I82378) += i82378.o
+common-obj-$(CONFIG_PREP_PCI) += prep_pci.o
+common-obj-$(CONFIG_I82378) += i82378.o
 # Mac shared devices
-hw-obj-$(CONFIG_MACIO) += macio.o
-hw-obj-$(CONFIG_CUDA) += cuda.o
-hw-obj-$(CONFIG_ADB) += adb.o
-hw-obj-$(CONFIG_MAC_NVRAM) += mac_nvram.o
-hw-obj-$(CONFIG_MAC_DBDMA) += mac_dbdma.o
+common-obj-$(CONFIG_MACIO) += macio.o
+common-obj-$(CONFIG_CUDA) += cuda.o
+common-obj-$(CONFIG_ADB) += adb.o
+common-obj-$(CONFIG_MAC_NVRAM) += mac_nvram.o
+common-obj-$(CONFIG_MAC_DBDMA) += mac_dbdma.o
 # OldWorld PowerMac
-hw-obj-$(CONFIG_HEATHROW_PIC) += heathrow_pic.o
-hw-obj-$(CONFIG_GRACKLE_PCI) += grackle_pci.o
+common-obj-$(CONFIG_HEATHROW_PIC) += heathrow_pic.o
+common-obj-$(CONFIG_GRACKLE_PCI) += grackle_pci.o
 # NewWorld PowerMac
-hw-obj-$(CONFIG_UNIN_PCI) += unin_pci.o
-hw-obj-$(CONFIG_DEC_PCI) += dec_pci.o
+common-obj-$(CONFIG_UNIN_PCI) += unin_pci.o
+common-obj-$(CONFIG_DEC_PCI) += dec_pci.o
 # PowerPC E500 boards
-hw-obj-$(CONFIG_PPCE500_PCI) += ppce500_pci.o
+common-obj-$(CONFIG_PPCE500_PCI) += ppce500_pci.o
 
 # MIPS devices
-hw-obj-$(CONFIG_PIIX4) += piix4.o
-hw-obj-$(CONFIG_G364FB) += g364fb.o
-hw-obj-$(CONFIG_JAZZ_LED) += jazz_led.o
+common-obj-$(CONFIG_PIIX4) += piix4.o
+common-obj-$(CONFIG_G364FB) += g364fb.o
+common-obj-$(CONFIG_JAZZ_LED) += jazz_led.o
 
 # Xilinx devices
-hw-obj-$(CONFIG_XILINX) += xilinx_intc.o
-hw-obj-$(CONFIG_XILINX) += xilinx_timer.o
-hw-obj-$(CONFIG_XILINX) += xilinx_uartlite.o
-hw-obj-$(CONFIG_XILINX_AXI) += xilinx_axidma.o
-hw-obj-$(CONFIG_XILINX_AXI) += xilinx_axienet.o
-hw-obj-$(CONFIG_XILINX_AXI) += stream.o
+common-obj-$(CONFIG_XILINX) += xilinx_intc.o
+common-obj-$(CONFIG_XILINX) += xilinx_timer.o
+common-obj-$(CONFIG_XILINX) += xilinx_uartlite.o
+common-obj-$(CONFIG_XILINX_AXI) += xilinx_axidma.o
+common-obj-$(CONFIG_XILINX_AXI) += xilinx_axienet.o
+common-obj-$(CONFIG_XILINX_AXI) += stream.o
 
 # PKUnity SoC devices
-hw-obj-$(CONFIG_PUV3) += puv3_intc.o
-hw-obj-$(CONFIG_PUV3) += puv3_ost.o
-hw-obj-$(CONFIG_PUV3) += puv3_gpio.o
-hw-obj-$(CONFIG_PUV3) += puv3_pm.o
-hw-obj-$(CONFIG_PUV3) += puv3_dma.o
+common-obj-$(CONFIG_PUV3) += puv3_intc.o
+common-obj-$(CONFIG_PUV3) += puv3_ost.o
+common-obj-$(CONFIG_PUV3) += puv3_gpio.o
+common-obj-$(CONFIG_PUV3) += puv3_pm.o
+common-obj-$(CONFIG_PUV3) += puv3_dma.o
 
 # ARM devices
-hw-obj-$(CONFIG_ARM_TIMER) += arm_timer.o
-hw-obj-$(CONFIG_PL011) += pl011.o
-hw-obj-$(CONFIG_PL022) += pl022.o
-hw-obj-$(CONFIG_PL031) += pl031.o
-hw-obj-$(CONFIG_PL041) += pl041.o lm4549.o
-hw-obj-$(CONFIG_PL050) += pl050.o
-hw-obj-$(CONFIG_PL061) += pl061.o
-hw-obj-$(CONFIG_PL080) += pl080.o
-hw-obj-$(CONFIG_PL110) += pl110.o
-hw-obj-$(CONFIG_PL181) += pl181.o
-hw-obj-$(CONFIG_PL190) += pl190.o
-hw-obj-$(CONFIG_PL310) += arm_l2x0.o
-hw-obj-$(CONFIG_VERSATILE_PCI) += versatile_pci.o
-hw-obj-$(CONFIG_VERSATILE_I2C) += versatile_i2c.o
-hw-obj-$(CONFIG_CADENCE) += cadence_uart.o
-hw-obj-$(CONFIG_CADENCE) += cadence_ttc.o
-hw-obj-$(CONFIG_CADENCE) += cadence_gem.o
-hw-obj-$(CONFIG_XGMAC) += xgmac.o
+common-obj-$(CONFIG_ARM_TIMER) += arm_timer.o
+common-obj-$(CONFIG_PL011) += pl011.o
+common-obj-$(CONFIG_PL022) += pl022.o
+common-obj-$(CONFIG_PL031) += pl031.o
+common-obj-$(CONFIG_PL041) += pl041.o lm4549.o
+common-obj-$(CONFIG_PL050) += pl050.o
+common-obj-$(CONFIG_PL061) += pl061.o
+common-obj-$(CONFIG_PL080) += pl080.o
+common-obj-$(CONFIG_PL110) += pl110.o
+common-obj-$(CONFIG_PL181) += pl181.o
+common-obj-$(CONFIG_PL190) += pl190.o
+common-obj-$(CONFIG_PL310) += arm_l2x0.o
+common-obj-$(CONFIG_VERSATILE_PCI) += versatile_pci.o
+common-obj-$(CONFIG_VERSATILE_I2C) += versatile_i2c.o
+common-obj-$(CONFIG_CADENCE) += cadence_uart.o
+common-obj-$(CONFIG_CADENCE) += cadence_ttc.o
+common-obj-$(CONFIG_CADENCE) += cadence_gem.o
+common-obj-$(CONFIG_XGMAC) += xgmac.o
 
 # PCI watchdog devices
-hw-obj-$(CONFIG_PCI) += wdt_i6300esb.o
+common-obj-$(CONFIG_PCI) += wdt_i6300esb.o
 
-hw-obj-$(CONFIG_PCI) += pcie.o pcie_aer.o pcie_port.o
+common-obj-$(CONFIG_PCI) += pcie.o pcie_aer.o pcie_port.o
 
 # PCI network cards
-hw-obj-$(CONFIG_NE2000_PCI) += ne2000.o
-hw-obj-$(CONFIG_EEPRO100_PCI) += eepro100.o
-hw-obj-$(CONFIG_PCNET_PCI) += pcnet-pci.o
-hw-obj-$(CONFIG_PCNET_COMMON) += pcnet.o
-hw-obj-$(CONFIG_E1000_PCI) += e1000.o
-hw-obj-$(CONFIG_RTL8139_PCI) += rtl8139.o
-
-hw-obj-$(CONFIG_SMC91C111) += smc91c111.o
-hw-obj-$(CONFIG_LAN9118) += lan9118.o
-hw-obj-$(CONFIG_NE2000_ISA) += ne2000-isa.o
-hw-obj-$(CONFIG_OPENCORES_ETH) += opencores_eth.o
+common-obj-$(CONFIG_NE2000_PCI) += ne2000.o
+common-obj-$(CONFIG_EEPRO100_PCI) += eepro100.o
+common-obj-$(CONFIG_PCNET_PCI) += pcnet-pci.o
+common-obj-$(CONFIG_PCNET_COMMON) += pcnet.o
+common-obj-$(CONFIG_E1000_PCI) += e1000.o
+common-obj-$(CONFIG_RTL8139_PCI) += rtl8139.o
+
+common-obj-$(CONFIG_SMC91C111) += smc91c111.o
+common-obj-$(CONFIG_LAN9118) += lan9118.o
+common-obj-$(CONFIG_NE2000_ISA) += ne2000-isa.o
+common-obj-$(CONFIG_OPENCORES_ETH) += opencores_eth.o
 
 # SCSI layer
-hw-obj-$(CONFIG_LSI_SCSI_PCI) += lsi53c895a.o
-hw-obj-$(CONFIG_MEGASAS_SCSI_PCI) += megasas.o
-hw-obj-$(CONFIG_ESP) += esp.o
-hw-obj-$(CONFIG_ESP_PCI) += esp-pci.o
+common-obj-$(CONFIG_LSI_SCSI_PCI) += lsi53c895a.o
+common-obj-$(CONFIG_MEGASAS_SCSI_PCI) += megasas.o
+common-obj-$(CONFIG_ESP) += esp.o
+common-obj-$(CONFIG_ESP_PCI) += esp-pci.o
 
-hw-obj-y += sysbus.o isa-bus.o
-hw-obj-y += qdev-addr.o
+common-obj-y += sysbus.o isa-bus.o
+common-obj-y += qdev-addr.o
 
 # VGA
-hw-obj-$(CONFIG_VGA_PCI) += vga-pci.o
-hw-obj-$(CONFIG_VGA_ISA) += vga-isa.o
-hw-obj-$(CONFIG_VGA_ISA_MM) += vga-isa-mm.o
-hw-obj-$(CONFIG_VMWARE_VGA) += vmware_vga.o
-hw-obj-$(CONFIG_VMMOUSE) += vmmouse.o
-hw-obj-$(CONFIG_VGA_CIRRUS) += cirrus_vga.o
+common-obj-$(CONFIG_VGA_PCI) += vga-pci.o
+common-obj-$(CONFIG_VGA_ISA) += vga-isa.o
+common-obj-$(CONFIG_VGA_ISA_MM) += vga-isa-mm.o
+common-obj-$(CONFIG_VMWARE_VGA) += vmware_vga.o
+common-obj-$(CONFIG_VMMOUSE) += vmmouse.o
+common-obj-$(CONFIG_VGA_CIRRUS) += cirrus_vga.o
 
-hw-obj-$(CONFIG_RC4030) += rc4030.o
-hw-obj-$(CONFIG_DP8393X) += dp8393x.o
-hw-obj-$(CONFIG_DS1225Y) += ds1225y.o
-hw-obj-$(CONFIG_MIPSNET) += mipsnet.o
+common-obj-$(CONFIG_RC4030) += rc4030.o
+common-obj-$(CONFIG_DP8393X) += dp8393x.o
+common-obj-$(CONFIG_DS1225Y) += ds1225y.o
+common-obj-$(CONFIG_MIPSNET) += mipsnet.o
 
-hw-obj-y += null-machine.o
+common-obj-y += null-machine.o
 
 # Sound
 sound-obj-y =
@@ -148,9 +148,9 @@ sound-obj-$(CONFIG_HDA) += intel-hda.o hda-audio.o
 
 $(obj)/adlib.o $(obj)/fmopl.o: QEMU_CFLAGS += -DBUILD_Y8950=0
 
-hw-obj-$(CONFIG_SOUND) += $(sound-obj-y)
+common-obj-$(CONFIG_SOUND) += $(sound-obj-y)
 
-hw-obj-$(CONFIG_REALLY_VIRTFS) += 9pfs/
+common-obj-$(CONFIG_REALLY_VIRTFS) += 9pfs/
 
 common-obj-y += usb/
 common-obj-y += irq.o
diff --git a/hw/ide/Makefile.objs b/hw/ide/Makefile.objs
index cf718dd..5c8c22a 100644
--- a/hw/ide/Makefile.objs
+++ b/hw/ide/Makefile.objs
@@ -1,10 +1,10 @@
-hw-obj-$(CONFIG_IDE_CORE) += core.o atapi.o
-hw-obj-$(CONFIG_IDE_QDEV) += qdev.o
-hw-obj-$(CONFIG_IDE_PCI) += pci.o
-hw-obj-$(CONFIG_IDE_ISA) += isa.o
-hw-obj-$(CONFIG_IDE_PIIX) += piix.o
-hw-obj-$(CONFIG_IDE_CMD646) += cmd646.o
-hw-obj-$(CONFIG_IDE_MACIO) += macio.o
-hw-obj-$(CONFIG_IDE_VIA) += via.o
-hw-obj-$(CONFIG_AHCI) += ahci.o
-hw-obj-$(CONFIG_AHCI) += ich.o
+common-obj-$(CONFIG_IDE_CORE) += core.o atapi.o
+common-obj-$(CONFIG_IDE_QDEV) += qdev.o
+common-obj-$(CONFIG_IDE_PCI) += pci.o
+common-obj-$(CONFIG_IDE_ISA) += isa.o
+common-obj-$(CONFIG_IDE_PIIX) += piix.o
+common-obj-$(CONFIG_IDE_CMD646) += cmd646.o
+common-obj-$(CONFIG_IDE_MACIO) += macio.o
+common-obj-$(CONFIG_IDE_VIA) += via.o
+common-obj-$(CONFIG_AHCI) += ahci.o
+common-obj-$(CONFIG_AHCI) += ich.o
diff --git a/hw/usb/Makefile.objs b/hw/usb/Makefile.objs
index 4225136..6425c1f 100644
--- a/hw/usb/Makefile.objs
+++ b/hw/usb/Makefile.objs
@@ -1,11 +1,11 @@
-hw-obj-$(CONFIG_USB_UHCI) += hcd-uhci.o
-hw-obj-$(CONFIG_USB_OHCI) += hcd-ohci.o
-hw-obj-$(CONFIG_USB_EHCI) += hcd-ehci.o
-hw-obj-$(CONFIG_USB_XHCI) += hcd-xhci.o
-hw-obj-y += libhw.o
+common-obj-$(CONFIG_USB_UHCI) += hcd-uhci.o
+common-obj-$(CONFIG_USB_OHCI) += hcd-ohci.o
+common-obj-$(CONFIG_USB_EHCI) += hcd-ehci.o
+common-obj-$(CONFIG_USB_XHCI) += hcd-xhci.o
+common-obj-y += libhw.o
 
-hw-obj-$(CONFIG_SMARTCARD) += dev-smartcard-reader.o
-hw-obj-$(CONFIG_USB_REDIR) += redirect.o
+common-obj-$(CONFIG_SMARTCARD) += dev-smartcard-reader.o
+common-obj-$(CONFIG_USB_REDIR) += redirect.o
 
 common-obj-y += core.o bus.o desc.o dev-hub.o
 common-obj-y += host-$(HOST_USB).o dev-bluetooth.o
commit 1273d9ca09e91bb290d10f704055f6abec363dd6
Author: Peter Maydell <peter.maydell at linaro.org>
Date:   Fri Oct 5 15:04:45 2012 +0100

    target-arm: Drop unused DECODE_CPREG_CRN macro
    
    This macro snuck through code review despite being unused; drop it.
    
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>

diff --git a/target-arm/cpu.h b/target-arm/cpu.h
index 7fac94f..ff4de10 100644
--- a/target-arm/cpu.h
+++ b/target-arm/cpu.h
@@ -423,8 +423,6 @@ void armv7m_nvic_complete_irq(void *opaque, int irq);
     (((cp) << 16) | ((is64) << 15) | ((crn) << 11) |    \
      ((crm) << 7) | ((opc1) << 3) | (opc2))
 
-#define DECODE_CPREG_CRN(enc) (((enc) >> 7) & 0xf)
-
 /* ARMCPRegInfo type field bits. If the SPECIAL bit is set this is a
  * special-behaviour cp reg and bits [15..8] indicate what behaviour
  * it has. Otherwise it is a simple cp reg, where CONST indicates that
commit d593c48ef91b6fee609d9c87958fe76dd2b36852
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Fri Oct 5 15:04:45 2012 +0100

    target-arm: use deposit instead of hardcoded version
    
    Use the deposit op instead of and hardcoded bit field insertion. It
    allows the host to emit the corresponding instruction if available.
    
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>

diff --git a/target-arm/translate.c b/target-arm/translate.c
index 92ceacd..c6840b7 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -277,15 +277,6 @@ static void gen_sbfx(TCGv var, int shift, int width)
     }
 }
 
-/* Bitfield insertion.  Insert val into base.  Clobbers base and val.  */
-static void gen_bfi(TCGv dest, TCGv base, TCGv val, int shift, uint32_t mask)
-{
-    tcg_gen_andi_i32(val, val, mask);
-    tcg_gen_shli_i32(val, val, shift);
-    tcg_gen_andi_i32(base, base, ~(mask << shift));
-    tcg_gen_or_i32(dest, base, val);
-}
-
 /* Return (b << 32) + a. Mark inputs as dead */
 static TCGv_i64 gen_addq_msw(TCGv_i64 a, TCGv b)
 {
@@ -2660,12 +2651,12 @@ static int disas_vfp_insn(CPUARMState * env, DisasContext *s, uint32_t insn)
                         switch (size) {
                         case 0:
                             tmp2 = neon_load_reg(rn, pass);
-                            gen_bfi(tmp, tmp2, tmp, offset, 0xff);
+                            tcg_gen_deposit_i32(tmp, tmp2, tmp, offset, 8);
                             tcg_temp_free_i32(tmp2);
                             break;
                         case 1:
                             tmp2 = neon_load_reg(rn, pass);
-                            gen_bfi(tmp, tmp2, tmp, offset, 0xffff);
+                            tcg_gen_deposit_i32(tmp, tmp2, tmp, offset, 16);
                             tcg_temp_free_i32(tmp2);
                             break;
                         case 2:
@@ -4021,7 +4012,8 @@ static int disas_neon_ls_insn(CPUARMState * env, DisasContext *s, uint32_t insn)
                     }
                     if (size != 2) {
                         tmp2 = neon_load_reg(rd, pass);
-                        gen_bfi(tmp, tmp2, tmp, shift, size ? 0xffff : 0xff);
+                        tcg_gen_deposit_i32(tmp, tmp2, tmp,
+                                            shift, size ? 16 : 8);
                         tcg_temp_free_i32(tmp2);
                     }
                     neon_store_reg(rd, pass, tmp);
@@ -7625,7 +7617,7 @@ static void disas_arm_insn(CPUARMState * env, DisasContext *s)
                         }
                         if (i != 32) {
                             tmp2 = load_reg(s, rd);
-                            gen_bfi(tmp, tmp2, tmp, shift, (1u << i) - 1);
+                            tcg_gen_deposit_i32(tmp, tmp2, tmp, shift, i);
                             tcg_temp_free_i32(tmp2);
                         }
                         store_reg(s, rd, tmp);
@@ -8736,7 +8728,7 @@ static int disas_thumb2_insn(CPUARMState *env, DisasContext *s, uint16_t insn_hw
                         imm = imm + 1 - shift;
                         if (imm != 32) {
                             tmp2 = load_reg(s, rd);
-                            gen_bfi(tmp, tmp2, tmp, shift, (1u << imm) - 1);
+                            tcg_gen_deposit_i32(tmp, tmp2, tmp, shift, imm);
                             tcg_temp_free_i32(tmp2);
                         }
                         break;
commit 00e3ab2db059d243e210052937fc4f4169aa839c
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Fri Oct 5 15:04:45 2012 +0100

    target-arm: mark a few integer helpers const and pure
    
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>

diff --git a/target-arm/helper.h b/target-arm/helper.h
index 794e2b1..8b9adf1 100644
--- a/target-arm/helper.h
+++ b/target-arm/helper.h
@@ -1,8 +1,8 @@
 #include "def-helper.h"
 
-DEF_HELPER_1(clz, i32, i32)
-DEF_HELPER_1(sxtb16, i32, i32)
-DEF_HELPER_1(uxtb16, i32, i32)
+DEF_HELPER_FLAGS_1(clz, TCG_CALL_CONST | TCG_CALL_PURE, i32, i32)
+DEF_HELPER_FLAGS_1(sxtb16, TCG_CALL_CONST | TCG_CALL_PURE, i32, i32)
+DEF_HELPER_FLAGS_1(uxtb16, TCG_CALL_CONST | TCG_CALL_PURE, i32, i32)
 
 DEF_HELPER_3(add_setq, i32, env, i32, i32)
 DEF_HELPER_3(add_saturate, i32, env, i32, i32)
@@ -10,10 +10,10 @@ DEF_HELPER_3(sub_saturate, i32, env, i32, i32)
 DEF_HELPER_3(add_usaturate, i32, env, i32, i32)
 DEF_HELPER_3(sub_usaturate, i32, env, i32, i32)
 DEF_HELPER_2(double_saturate, i32, env, s32)
-DEF_HELPER_2(sdiv, s32, s32, s32)
-DEF_HELPER_2(udiv, i32, i32, i32)
-DEF_HELPER_1(rbit, i32, i32)
-DEF_HELPER_1(abs, i32, i32)
+DEF_HELPER_FLAGS_2(sdiv, TCG_CALL_CONST | TCG_CALL_PURE, s32, s32, s32)
+DEF_HELPER_FLAGS_2(udiv, TCG_CALL_CONST | TCG_CALL_PURE, i32, i32, i32)
+DEF_HELPER_FLAGS_1(rbit, TCG_CALL_CONST | TCG_CALL_PURE, i32, i32)
+DEF_HELPER_FLAGS_1(abs, TCG_CALL_CONST | TCG_CALL_PURE, i32, i32)
 
 #define PAS_OP(pfx)  \
     DEF_HELPER_3(pfx ## add8, i32, i32, i32, ptr) \
@@ -45,11 +45,12 @@ DEF_HELPER_3(usat, i32, env, i32, i32)
 DEF_HELPER_3(ssat16, i32, env, i32, i32)
 DEF_HELPER_3(usat16, i32, env, i32, i32)
 
-DEF_HELPER_2(usad8, i32, i32, i32)
+DEF_HELPER_FLAGS_2(usad8, TCG_CALL_CONST | TCG_CALL_PURE, i32, i32, i32)
 
 DEF_HELPER_1(logicq_cc, i32, i64)
 
-DEF_HELPER_3(sel_flags, i32, i32, i32, i32)
+DEF_HELPER_FLAGS_3(sel_flags, TCG_CALL_CONST | TCG_CALL_PURE,
+                   i32, i32, i32, i32)
 DEF_HELPER_2(exception, void, env, i32)
 DEF_HELPER_1(wfi, void, env)
 
commit 365af80e47bf150506345cdfbcbf32120f18cdf6
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Fri Oct 5 15:04:44 2012 +0100

    target-arm: convert sar, shl and shr helpers to TCG
    
    Now that the movcond TCG op is available, it's possible to replace
    shl and shr helpers by TCG code. The code generated by TCG is slightly
    longer than the code generated by GCC for the helper but is still worth
    it as this avoid all the consequences of using an helper: globals saved
    back to memory, no possible optimization, call overhead, etc.
    
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>

diff --git a/target-arm/helper.h b/target-arm/helper.h
index 7151e28..794e2b1 100644
--- a/target-arm/helper.h
+++ b/target-arm/helper.h
@@ -145,9 +145,6 @@ DEF_HELPER_5(neon_tbl, i32, env, i32, i32, i32, i32)
 DEF_HELPER_3(adc_cc, i32, env, i32, i32)
 DEF_HELPER_3(sbc_cc, i32, env, i32, i32)
 
-DEF_HELPER_3(shl, i32, env, i32, i32)
-DEF_HELPER_3(shr, i32, env, i32, i32)
-DEF_HELPER_3(sar, i32, env, i32, i32)
 DEF_HELPER_3(shl_cc, i32, env, i32, i32)
 DEF_HELPER_3(shr_cc, i32, env, i32, i32)
 DEF_HELPER_3(sar_cc, i32, env, i32, i32)
diff --git a/target-arm/op_helper.c b/target-arm/op_helper.c
index 6095f24..aef592a 100644
--- a/target-arm/op_helper.c
+++ b/target-arm/op_helper.c
@@ -355,30 +355,6 @@ uint32_t HELPER(sbc_cc)(CPUARMState *env, uint32_t a, uint32_t b)
 
 /* Similarly for variable shift instructions.  */
 
-uint32_t HELPER(shl)(CPUARMState *env, uint32_t x, uint32_t i)
-{
-    int shift = i & 0xff;
-    if (shift >= 32)
-        return 0;
-    return x << shift;
-}
-
-uint32_t HELPER(shr)(CPUARMState *env, uint32_t x, uint32_t i)
-{
-    int shift = i & 0xff;
-    if (shift >= 32)
-        return 0;
-    return (uint32_t)x >> shift;
-}
-
-uint32_t HELPER(sar)(CPUARMState *env, uint32_t x, uint32_t i)
-{
-    int shift = i & 0xff;
-    if (shift >= 32)
-        shift = 31;
-    return (int32_t)x >> shift;
-}
-
 uint32_t HELPER(shl_cc)(CPUARMState *env, uint32_t x, uint32_t i)
 {
     int shift = i & 0xff;
diff --git a/target-arm/translate.c b/target-arm/translate.c
index da3246a..92ceacd 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -440,6 +440,37 @@ static void gen_sub_CC(TCGv dest, TCGv t0, TCGv t1)
     tcg_gen_mov_i32(dest, cpu_NF);
 }
 
+#define GEN_SHIFT(name)                                               \
+static void gen_##name(TCGv dest, TCGv t0, TCGv t1)                   \
+{                                                                     \
+    TCGv tmp1, tmp2, tmp3;                                            \
+    tmp1 = tcg_temp_new_i32();                                        \
+    tcg_gen_andi_i32(tmp1, t1, 0xff);                                 \
+    tmp2 = tcg_const_i32(0);                                          \
+    tmp3 = tcg_const_i32(0x1f);                                       \
+    tcg_gen_movcond_i32(TCG_COND_GTU, tmp2, tmp1, tmp3, tmp2, t0);    \
+    tcg_temp_free_i32(tmp3);                                          \
+    tcg_gen_andi_i32(tmp1, tmp1, 0x1f);                               \
+    tcg_gen_##name##_i32(dest, tmp2, tmp1);                           \
+    tcg_temp_free_i32(tmp2);                                          \
+    tcg_temp_free_i32(tmp1);                                          \
+}
+GEN_SHIFT(shl)
+GEN_SHIFT(shr)
+#undef GEN_SHIFT
+
+static void gen_sar(TCGv dest, TCGv t0, TCGv t1)
+{
+    TCGv tmp1, tmp2;
+    tmp1 = tcg_temp_new_i32();
+    tcg_gen_andi_i32(tmp1, t1, 0xff);
+    tmp2 = tcg_const_i32(0x1f);
+    tcg_gen_movcond_i32(TCG_COND_GTU, tmp1, tmp1, tmp2, tmp2, tmp1);
+    tcg_temp_free_i32(tmp2);
+    tcg_gen_sar_i32(dest, t0, tmp1);
+    tcg_temp_free_i32(tmp1);
+}
+
 /* FIXME:  Implement this natively.  */
 #define tcg_gen_abs_i32(t0, t1) gen_helper_abs(t0, t1)
 
@@ -516,9 +547,15 @@ static inline void gen_arm_shift_reg(TCGv var, int shiftop,
         }
     } else {
         switch (shiftop) {
-        case 0: gen_helper_shl(var, cpu_env, var, shift); break;
-        case 1: gen_helper_shr(var, cpu_env, var, shift); break;
-        case 2: gen_helper_sar(var, cpu_env, var, shift); break;
+        case 0:
+            gen_shl(var, var, shift);
+            break;
+        case 1:
+            gen_shr(var, var, shift);
+            break;
+        case 2:
+            gen_sar(var, var, shift);
+            break;
         case 3: tcg_gen_andi_i32(shift, shift, 0x1f);
                 tcg_gen_rotr_i32(var, var, shift); break;
         }
@@ -9161,7 +9198,7 @@ static void disas_thumb_insn(CPUARMState *env, DisasContext *s)
             break;
         case 0x2: /* lsl */
             if (s->condexec_mask) {
-                gen_helper_shl(tmp2, cpu_env, tmp2, tmp);
+                gen_shl(tmp2, tmp2, tmp);
             } else {
                 gen_helper_shl_cc(tmp2, cpu_env, tmp2, tmp);
                 gen_logic_CC(tmp2);
@@ -9169,7 +9206,7 @@ static void disas_thumb_insn(CPUARMState *env, DisasContext *s)
             break;
         case 0x3: /* lsr */
             if (s->condexec_mask) {
-                gen_helper_shr(tmp2, cpu_env, tmp2, tmp);
+                gen_shr(tmp2, tmp2, tmp);
             } else {
                 gen_helper_shr_cc(tmp2, cpu_env, tmp2, tmp);
                 gen_logic_CC(tmp2);
@@ -9177,7 +9214,7 @@ static void disas_thumb_insn(CPUARMState *env, DisasContext *s)
             break;
         case 0x4: /* asr */
             if (s->condexec_mask) {
-                gen_helper_sar(tmp2, cpu_env, tmp2, tmp);
+                gen_sar(tmp2, tmp2, tmp);
             } else {
                 gen_helper_sar_cc(tmp2, cpu_env, tmp2, tmp);
                 gen_logic_CC(tmp2);
commit 72485ec4f63d86c428f9223fc966bd7d2cc8100c
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Fri Oct 5 15:04:44 2012 +0100

    target-arm: convert add_cc and sub_cc helpers to TCG
    
    Now that the setcond TCG op is available, it's possible to replace
    add_cc and sub_cc helpers by TCG code. The code generated by TCG is
    actually very close to the one generated by GCC for the helper, and
    this avoid all the consequences of using an helper: globals saved back
    to memory, no possible optimization, call overhead, etc.
    
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>

diff --git a/target-arm/helper.h b/target-arm/helper.h
index afdb2b5..7151e28 100644
--- a/target-arm/helper.h
+++ b/target-arm/helper.h
@@ -142,9 +142,7 @@ DEF_HELPER_2(recpe_u32, i32, i32, env)
 DEF_HELPER_2(rsqrte_u32, i32, i32, env)
 DEF_HELPER_5(neon_tbl, i32, env, i32, i32, i32, i32)
 
-DEF_HELPER_3(add_cc, i32, env, i32, i32)
 DEF_HELPER_3(adc_cc, i32, env, i32, i32)
-DEF_HELPER_3(sub_cc, i32, env, i32, i32)
 DEF_HELPER_3(sbc_cc, i32, env, i32, i32)
 
 DEF_HELPER_3(shl, i32, env, i32, i32)
diff --git a/target-arm/op_helper.c b/target-arm/op_helper.c
index f13fc3a..6095f24 100644
--- a/target-arm/op_helper.c
+++ b/target-arm/op_helper.c
@@ -323,16 +323,6 @@ uint64_t HELPER(get_cp_reg64)(CPUARMState *env, void *rip)
    The only way to do that in TCG is a conditional branch, which clobbers
    all our temporaries.  For now implement these as helper functions.  */
 
-uint32_t HELPER (add_cc)(CPUARMState *env, uint32_t a, uint32_t b)
-{
-    uint32_t result;
-    result = a + b;
-    env->NF = env->ZF = result;
-    env->CF = result < a;
-    env->VF = (a ^ b ^ -1) & (a ^ result);
-    return result;
-}
-
 uint32_t HELPER(adc_cc)(CPUARMState *env, uint32_t a, uint32_t b)
 {
     uint32_t result;
@@ -348,16 +338,6 @@ uint32_t HELPER(adc_cc)(CPUARMState *env, uint32_t a, uint32_t b)
     return result;
 }
 
-uint32_t HELPER(sub_cc)(CPUARMState *env, uint32_t a, uint32_t b)
-{
-    uint32_t result;
-    result = a - b;
-    env->NF = env->ZF = result;
-    env->CF = a >= b;
-    env->VF = (a ^ b) & (a ^ result);
-    return result;
-}
-
 uint32_t HELPER(sbc_cc)(CPUARMState *env, uint32_t a, uint32_t b)
 {
     uint32_t result;
diff --git a/target-arm/translate.c b/target-arm/translate.c
index 1055931..da3246a 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -410,6 +410,36 @@ static void gen_sub_carry(TCGv dest, TCGv t0, TCGv t1)
     tcg_gen_subi_i32(dest, dest, 1);
 }
 
+/* dest = T0 + T1. Compute C, N, V and Z flags */
+static void gen_add_CC(TCGv dest, TCGv t0, TCGv t1)
+{
+    TCGv tmp;
+    tcg_gen_add_i32(cpu_NF, t0, t1);
+    tcg_gen_mov_i32(cpu_ZF, cpu_NF);
+    tcg_gen_setcond_i32(TCG_COND_LTU, cpu_CF, cpu_NF, t0);
+    tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
+    tmp = tcg_temp_new_i32();
+    tcg_gen_xor_i32(tmp, t0, t1);
+    tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
+    tcg_temp_free_i32(tmp);
+    tcg_gen_mov_i32(dest, cpu_NF);
+}
+
+/* dest = T0 - T1. Compute C, N, V and Z flags */
+static void gen_sub_CC(TCGv dest, TCGv t0, TCGv t1)
+{
+    TCGv tmp;
+    tcg_gen_sub_i32(cpu_NF, t0, t1);
+    tcg_gen_mov_i32(cpu_ZF, cpu_NF);
+    tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0, t1);
+    tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
+    tmp = tcg_temp_new_i32();
+    tcg_gen_xor_i32(tmp, t0, t1);
+    tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
+    tcg_temp_free_i32(tmp);
+    tcg_gen_mov_i32(dest, cpu_NF);
+}
+
 /* FIXME:  Implement this natively.  */
 #define tcg_gen_abs_i32(t0, t1) gen_helper_abs(t0, t1)
 
@@ -6970,11 +7000,11 @@ static void disas_arm_insn(CPUARMState * env, DisasContext *s)
                 if (IS_USER(s)) {
                     goto illegal_op;
                 }
-                gen_helper_sub_cc(tmp, cpu_env, tmp, tmp2);
+                gen_sub_CC(tmp, tmp, tmp2);
                 gen_exception_return(s, tmp);
             } else {
                 if (set_cc) {
-                    gen_helper_sub_cc(tmp, cpu_env, tmp, tmp2);
+                    gen_sub_CC(tmp, tmp, tmp2);
                 } else {
                     tcg_gen_sub_i32(tmp, tmp, tmp2);
                 }
@@ -6983,7 +7013,7 @@ static void disas_arm_insn(CPUARMState * env, DisasContext *s)
             break;
         case 0x03:
             if (set_cc) {
-                gen_helper_sub_cc(tmp, cpu_env, tmp2, tmp);
+                gen_sub_CC(tmp, tmp2, tmp);
             } else {
                 tcg_gen_sub_i32(tmp, tmp2, tmp);
             }
@@ -6991,7 +7021,7 @@ static void disas_arm_insn(CPUARMState * env, DisasContext *s)
             break;
         case 0x04:
             if (set_cc) {
-                gen_helper_add_cc(tmp, cpu_env, tmp, tmp2);
+                gen_add_CC(tmp, tmp, tmp2);
             } else {
                 tcg_gen_add_i32(tmp, tmp, tmp2);
             }
@@ -7037,13 +7067,13 @@ static void disas_arm_insn(CPUARMState * env, DisasContext *s)
             break;
         case 0x0a:
             if (set_cc) {
-                gen_helper_sub_cc(tmp, cpu_env, tmp, tmp2);
+                gen_sub_CC(tmp, tmp, tmp2);
             }
             tcg_temp_free_i32(tmp);
             break;
         case 0x0b:
             if (set_cc) {
-                gen_helper_add_cc(tmp, cpu_env, tmp, tmp2);
+                gen_add_CC(tmp, tmp, tmp2);
             }
             tcg_temp_free_i32(tmp);
             break;
@@ -7830,7 +7860,7 @@ gen_thumb2_data_op(DisasContext *s, int op, int conds, uint32_t shifter_out, TCG
         break;
     case 8: /* add */
         if (conds)
-            gen_helper_add_cc(t0, cpu_env, t0, t1);
+            gen_add_CC(t0, t0, t1);
         else
             tcg_gen_add_i32(t0, t0, t1);
         break;
@@ -7848,13 +7878,13 @@ gen_thumb2_data_op(DisasContext *s, int op, int conds, uint32_t shifter_out, TCG
         break;
     case 13: /* sub */
         if (conds)
-            gen_helper_sub_cc(t0, cpu_env, t0, t1);
+            gen_sub_CC(t0, t0, t1);
         else
             tcg_gen_sub_i32(t0, t0, t1);
         break;
     case 14: /* rsb */
         if (conds)
-            gen_helper_sub_cc(t0, cpu_env, t1, t0);
+            gen_sub_CC(t0, t1, t0);
         else
             tcg_gen_sub_i32(t0, t1, t0);
         break;
@@ -8982,12 +9012,12 @@ static void disas_thumb_insn(CPUARMState *env, DisasContext *s)
                 if (s->condexec_mask)
                     tcg_gen_sub_i32(tmp, tmp, tmp2);
                 else
-                    gen_helper_sub_cc(tmp, cpu_env, tmp, tmp2);
+                    gen_sub_CC(tmp, tmp, tmp2);
             } else {
                 if (s->condexec_mask)
                     tcg_gen_add_i32(tmp, tmp, tmp2);
                 else
-                    gen_helper_add_cc(tmp, cpu_env, tmp, tmp2);
+                    gen_add_CC(tmp, tmp, tmp2);
             }
             tcg_temp_free_i32(tmp2);
             store_reg(s, rd, tmp);
@@ -9018,7 +9048,7 @@ static void disas_thumb_insn(CPUARMState *env, DisasContext *s)
             tcg_gen_movi_i32(tmp2, insn & 0xff);
             switch (op) {
             case 1: /* cmp */
-                gen_helper_sub_cc(tmp, cpu_env, tmp, tmp2);
+                gen_sub_CC(tmp, tmp, tmp2);
                 tcg_temp_free_i32(tmp);
                 tcg_temp_free_i32(tmp2);
                 break;
@@ -9026,7 +9056,7 @@ static void disas_thumb_insn(CPUARMState *env, DisasContext *s)
                 if (s->condexec_mask)
                     tcg_gen_add_i32(tmp, tmp, tmp2);
                 else
-                    gen_helper_add_cc(tmp, cpu_env, tmp, tmp2);
+                    gen_add_CC(tmp, tmp, tmp2);
                 tcg_temp_free_i32(tmp2);
                 store_reg(s, rd, tmp);
                 break;
@@ -9034,7 +9064,7 @@ static void disas_thumb_insn(CPUARMState *env, DisasContext *s)
                 if (s->condexec_mask)
                     tcg_gen_sub_i32(tmp, tmp, tmp2);
                 else
-                    gen_helper_sub_cc(tmp, cpu_env, tmp, tmp2);
+                    gen_sub_CC(tmp, tmp, tmp2);
                 tcg_temp_free_i32(tmp2);
                 store_reg(s, rd, tmp);
                 break;
@@ -9070,7 +9100,7 @@ static void disas_thumb_insn(CPUARMState *env, DisasContext *s)
             case 1: /* cmp */
                 tmp = load_reg(s, rd);
                 tmp2 = load_reg(s, rm);
-                gen_helper_sub_cc(tmp, cpu_env, tmp, tmp2);
+                gen_sub_CC(tmp, tmp, tmp2);
                 tcg_temp_free_i32(tmp2);
                 tcg_temp_free_i32(tmp);
                 break;
@@ -9183,14 +9213,14 @@ static void disas_thumb_insn(CPUARMState *env, DisasContext *s)
             if (s->condexec_mask)
                 tcg_gen_neg_i32(tmp, tmp2);
             else
-                gen_helper_sub_cc(tmp, cpu_env, tmp, tmp2);
+                gen_sub_CC(tmp, tmp, tmp2);
             break;
         case 0xa: /* cmp */
-            gen_helper_sub_cc(tmp, cpu_env, tmp, tmp2);
+            gen_sub_CC(tmp, tmp, tmp2);
             rd = 16;
             break;
         case 0xb: /* cmn */
-            gen_helper_add_cc(tmp, cpu_env, tmp, tmp2);
+            gen_add_CC(tmp, tmp, tmp2);
             rd = 16;
             break;
         case 0xc: /* orr */
commit 66c374de8a949062bfb1792f7e685a168c61c416
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Fri Oct 5 15:04:44 2012 +0100

    target-arm: use globals for CC flags
    
    Use globals for CC flags instead of loading/storing them each they are
    accessed. This allows some optimizations to be performed by the TCG
    optimization passes.
    
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>

diff --git a/target-arm/translate.c b/target-arm/translate.c
index bb53e35..1055931 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -85,6 +85,7 @@ static TCGv_ptr cpu_env;
 /* We reuse the same 64-bit temporaries for efficiency.  */
 static TCGv_i64 cpu_V0, cpu_V1, cpu_M0;
 static TCGv_i32 cpu_R[16];
+static TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF;
 static TCGv_i32 cpu_exclusive_addr;
 static TCGv_i32 cpu_exclusive_val;
 static TCGv_i32 cpu_exclusive_high;
@@ -115,6 +116,11 @@ void arm_translate_init(void)
                                           offsetof(CPUARMState, regs[i]),
                                           regnames[i]);
     }
+    cpu_CF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, CF), "CF");
+    cpu_NF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, NF), "NF");
+    cpu_VF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, VF), "VF");
+    cpu_ZF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, ZF), "ZF");
+
     cpu_exclusive_addr = tcg_global_mem_new_i32(TCG_AREG0,
         offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
     cpu_exclusive_val = tcg_global_mem_new_i32(TCG_AREG0,
@@ -369,53 +375,39 @@ static void gen_add16(TCGv t0, TCGv t1)
     tcg_temp_free_i32(t1);
 }
 
-#define gen_set_CF(var) tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, CF))
-
 /* Set CF to the top bit of var.  */
 static void gen_set_CF_bit31(TCGv var)
 {
-    TCGv tmp = tcg_temp_new_i32();
-    tcg_gen_shri_i32(tmp, var, 31);
-    gen_set_CF(tmp);
-    tcg_temp_free_i32(tmp);
+    tcg_gen_shri_i32(cpu_CF, var, 31);
 }
 
 /* Set N and Z flags from var.  */
 static inline void gen_logic_CC(TCGv var)
 {
-    tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, NF));
-    tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, ZF));
+    tcg_gen_mov_i32(cpu_NF, var);
+    tcg_gen_mov_i32(cpu_ZF, var);
 }
 
 /* T0 += T1 + CF.  */
 static void gen_adc(TCGv t0, TCGv t1)
 {
-    TCGv tmp;
     tcg_gen_add_i32(t0, t0, t1);
-    tmp = load_cpu_field(CF);
-    tcg_gen_add_i32(t0, t0, tmp);
-    tcg_temp_free_i32(tmp);
+    tcg_gen_add_i32(t0, t0, cpu_CF);
 }
 
 /* dest = T0 + T1 + CF. */
 static void gen_add_carry(TCGv dest, TCGv t0, TCGv t1)
 {
-    TCGv tmp;
     tcg_gen_add_i32(dest, t0, t1);
-    tmp = load_cpu_field(CF);
-    tcg_gen_add_i32(dest, dest, tmp);
-    tcg_temp_free_i32(tmp);
+    tcg_gen_add_i32(dest, dest, cpu_CF);
 }
 
 /* dest = T0 - T1 + CF - 1.  */
 static void gen_sub_carry(TCGv dest, TCGv t0, TCGv t1)
 {
-    TCGv tmp;
     tcg_gen_sub_i32(dest, t0, t1);
-    tmp = load_cpu_field(CF);
-    tcg_gen_add_i32(dest, dest, tmp);
+    tcg_gen_add_i32(dest, dest, cpu_CF);
     tcg_gen_subi_i32(dest, dest, 1);
-    tcg_temp_free_i32(tmp);
 }
 
 /* FIXME:  Implement this natively.  */
@@ -423,16 +415,14 @@ static void gen_sub_carry(TCGv dest, TCGv t0, TCGv t1)
 
 static void shifter_out_im(TCGv var, int shift)
 {
-    TCGv tmp = tcg_temp_new_i32();
     if (shift == 0) {
-        tcg_gen_andi_i32(tmp, var, 1);
+        tcg_gen_andi_i32(cpu_CF, var, 1);
     } else {
-        tcg_gen_shri_i32(tmp, var, shift);
-        if (shift != 31)
-            tcg_gen_andi_i32(tmp, tmp, 1);
+        tcg_gen_shri_i32(cpu_CF, var, shift);
+        if (shift != 31) {
+            tcg_gen_andi_i32(cpu_CF, cpu_CF, 1);
+        }
     }
-    gen_set_CF(tmp);
-    tcg_temp_free_i32(tmp);
 }
 
 /* Shift by immediate.  Includes special handling for shift == 0.  */
@@ -449,8 +439,7 @@ static inline void gen_arm_shift_im(TCGv var, int shiftop, int shift, int flags)
     case 1: /* LSR */
         if (shift == 0) {
             if (flags) {
-                tcg_gen_shri_i32(var, var, 31);
-                gen_set_CF(var);
+                tcg_gen_shri_i32(cpu_CF, var, 31);
             }
             tcg_gen_movi_i32(var, 0);
         } else {
@@ -474,11 +463,11 @@ static inline void gen_arm_shift_im(TCGv var, int shiftop, int shift, int flags)
                 shifter_out_im(var, shift - 1);
             tcg_gen_rotri_i32(var, var, shift); break;
         } else {
-            TCGv tmp = load_cpu_field(CF);
+            TCGv tmp = tcg_temp_new_i32();
             if (flags)
                 shifter_out_im(var, 0);
             tcg_gen_shri_i32(var, var, 1);
-            tcg_gen_shli_i32(tmp, tmp, 31);
+            tcg_gen_shli_i32(tmp, cpu_CF, 31);
             tcg_gen_or_i32(var, var, tmp);
             tcg_temp_free_i32(tmp);
         }
@@ -603,99 +592,75 @@ static void gen_thumb2_parallel_addsub(int op1, int op2, TCGv a, TCGv b)
 static void gen_test_cc(int cc, int label)
 {
     TCGv tmp;
-    TCGv tmp2;
     int inv;
 
     switch (cc) {
     case 0: /* eq: Z */
-        tmp = load_cpu_field(ZF);
-        tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, label);
+        tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_ZF, 0, label);
         break;
     case 1: /* ne: !Z */
-        tmp = load_cpu_field(ZF);
-        tcg_gen_brcondi_i32(TCG_COND_NE, tmp, 0, label);
+        tcg_gen_brcondi_i32(TCG_COND_NE, cpu_ZF, 0, label);
         break;
     case 2: /* cs: C */
-        tmp = load_cpu_field(CF);
-        tcg_gen_brcondi_i32(TCG_COND_NE, tmp, 0, label);
+        tcg_gen_brcondi_i32(TCG_COND_NE, cpu_CF, 0, label);
         break;
     case 3: /* cc: !C */
-        tmp = load_cpu_field(CF);
-        tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, label);
+        tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_CF, 0, label);
         break;
     case 4: /* mi: N */
-        tmp = load_cpu_field(NF);
-        tcg_gen_brcondi_i32(TCG_COND_LT, tmp, 0, label);
+        tcg_gen_brcondi_i32(TCG_COND_LT, cpu_NF, 0, label);
         break;
     case 5: /* pl: !N */
-        tmp = load_cpu_field(NF);
-        tcg_gen_brcondi_i32(TCG_COND_GE, tmp, 0, label);
+        tcg_gen_brcondi_i32(TCG_COND_GE, cpu_NF, 0, label);
         break;
     case 6: /* vs: V */
-        tmp = load_cpu_field(VF);
-        tcg_gen_brcondi_i32(TCG_COND_LT, tmp, 0, label);
+        tcg_gen_brcondi_i32(TCG_COND_LT, cpu_VF, 0, label);
         break;
     case 7: /* vc: !V */
-        tmp = load_cpu_field(VF);
-        tcg_gen_brcondi_i32(TCG_COND_GE, tmp, 0, label);
+        tcg_gen_brcondi_i32(TCG_COND_GE, cpu_VF, 0, label);
         break;
     case 8: /* hi: C && !Z */
         inv = gen_new_label();
-        tmp = load_cpu_field(CF);
-        tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, inv);
-        tcg_temp_free_i32(tmp);
-        tmp = load_cpu_field(ZF);
-        tcg_gen_brcondi_i32(TCG_COND_NE, tmp, 0, label);
+        tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_CF, 0, inv);
+        tcg_gen_brcondi_i32(TCG_COND_NE, cpu_ZF, 0, label);
         gen_set_label(inv);
         break;
     case 9: /* ls: !C || Z */
-        tmp = load_cpu_field(CF);
-        tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, label);
-        tcg_temp_free_i32(tmp);
-        tmp = load_cpu_field(ZF);
-        tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, label);
+        tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_CF, 0, label);
+        tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_ZF, 0, label);
         break;
     case 10: /* ge: N == V -> N ^ V == 0 */
-        tmp = load_cpu_field(VF);
-        tmp2 = load_cpu_field(NF);
-        tcg_gen_xor_i32(tmp, tmp, tmp2);
-        tcg_temp_free_i32(tmp2);
+        tmp = tcg_temp_new_i32();
+        tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
         tcg_gen_brcondi_i32(TCG_COND_GE, tmp, 0, label);
+        tcg_temp_free_i32(tmp);
         break;
     case 11: /* lt: N != V -> N ^ V != 0 */
-        tmp = load_cpu_field(VF);
-        tmp2 = load_cpu_field(NF);
-        tcg_gen_xor_i32(tmp, tmp, tmp2);
-        tcg_temp_free_i32(tmp2);
+        tmp = tcg_temp_new_i32();
+        tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
         tcg_gen_brcondi_i32(TCG_COND_LT, tmp, 0, label);
+        tcg_temp_free_i32(tmp);
         break;
     case 12: /* gt: !Z && N == V */
         inv = gen_new_label();
-        tmp = load_cpu_field(ZF);
-        tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, inv);
-        tcg_temp_free_i32(tmp);
-        tmp = load_cpu_field(VF);
-        tmp2 = load_cpu_field(NF);
-        tcg_gen_xor_i32(tmp, tmp, tmp2);
-        tcg_temp_free_i32(tmp2);
+        tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_ZF, 0, inv);
+        tmp = tcg_temp_new_i32();
+        tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
         tcg_gen_brcondi_i32(TCG_COND_GE, tmp, 0, label);
+        tcg_temp_free_i32(tmp);
         gen_set_label(inv);
         break;
     case 13: /* le: Z || N != V */
-        tmp = load_cpu_field(ZF);
-        tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, label);
-        tcg_temp_free_i32(tmp);
-        tmp = load_cpu_field(VF);
-        tmp2 = load_cpu_field(NF);
-        tcg_gen_xor_i32(tmp, tmp, tmp2);
-        tcg_temp_free_i32(tmp2);
+        tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_ZF, 0, label);
+        tmp = tcg_temp_new_i32();
+        tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
         tcg_gen_brcondi_i32(TCG_COND_LT, tmp, 0, label);
+        tcg_temp_free_i32(tmp);
         break;
     default:
         fprintf(stderr, "Bad condition code 0x%x\n", cc);
         abort();
     }
-    tcg_temp_free_i32(tmp);
 }
 
 static const uint8_t table_logic_cc[16] = {
commit f2617cfc237a7f7e72bd3791ed1f3d35f496bb22
Author: Peter Maydell <peter.maydell at linaro.org>
Date:   Fri Oct 5 15:04:44 2012 +0100

    target-arm: Reinstate display of VFP registers in cpu_dump_state
    
    Reinstate the display of VFP registers in cpu_dump_state(), if
    the CPU has them (this code had been #if 0'd out a for a long time).
    We drop the attempt ot display the values as floating point, since
    this makes assumptions about the host 'float' and 'double' formats
    and is not done by eg the i386 cpu_dump_state().
    This display is gated on the CPU_DUMP_FPU flag, as for x86.
    
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>

diff --git a/target-arm/translate.c b/target-arm/translate.c
index 5fded49..bb53e35 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -9970,19 +9970,6 @@ void cpu_dump_state(CPUARMState *env, FILE *f, fprintf_function cpu_fprintf,
                     int flags)
 {
     int i;
-#if 0
-    union {
-        uint32_t i;
-        float s;
-    } s0, s1;
-    CPU_DoubleU d;
-    /* ??? This assumes float64 and double have the same layout.
-       Oh well, it's only debug dumps.  */
-    union {
-        float64 f64;
-        double d;
-    } d0;
-#endif
     uint32_t psr;
 
     for(i=0;i<16;i++) {
@@ -10002,20 +9989,23 @@ void cpu_dump_state(CPUARMState *env, FILE *f, fprintf_function cpu_fprintf,
                 psr & CPSR_T ? 'T' : 'A',
                 cpu_mode_names[psr & 0xf], (psr & 0x10) ? 32 : 26);
 
-#if 0
-    for (i = 0; i < 16; i++) {
-        d.d = env->vfp.regs[i];
-        s0.i = d.l.lower;
-        s1.i = d.l.upper;
-        d0.f64 = d.d;
-        cpu_fprintf(f, "s%02d=%08x(%8g) s%02d=%08x(%8g) d%02d=%08x%08x(%8g)\n",
-                    i * 2, (int)s0.i, s0.s,
-                    i * 2 + 1, (int)s1.i, s1.s,
-                    i, (int)(uint32_t)d.l.upper, (int)(uint32_t)d.l.lower,
-                    d0.d);
+    if (flags & CPU_DUMP_FPU) {
+        int numvfpregs = 0;
+        if (arm_feature(env, ARM_FEATURE_VFP)) {
+            numvfpregs += 16;
+        }
+        if (arm_feature(env, ARM_FEATURE_VFP3)) {
+            numvfpregs += 16;
+        }
+        for (i = 0; i < numvfpregs; i++) {
+            uint64_t v = float64_val(env->vfp.regs[i]);
+            cpu_fprintf(f, "s%02d=%08x s%02d=%08x d%02d=%016" PRIx64 "\n",
+                        i * 2, (uint32_t)v,
+                        i * 2 + 1, (uint32_t)(v >> 32),
+                        i, v);
+        }
+        cpu_fprintf(f, "FPSCR: %08x\n", (int)env->vfp.xregs[ARM_VFP_FPSCR]);
     }
-    cpu_fprintf(f, "FPSCR: %08x\n", (int)env->vfp.xregs[ARM_VFP_FPSCR]);
-#endif
 }
 
 void restore_state_to_opc(CPUARMState *env, TranslationBlock *tb, int pc_pos)
commit 6fd2a026fbf46ed5927a57e3e043a2039a0651d0
Author: Peter Maydell <peter.maydell at linaro.org>
Date:   Fri Oct 5 15:04:43 2012 +0100

    cpu_dump_state: move DUMP_FPU and DUMP_CCOP flags from x86-only to generic
    
    Move the DUMP_FPU and DUMP_CCOP flags for cpu_dump_state() from being
    x86-specific flags to being generic ones. This allows us to drop some
    TARGET_I386 ifdefs in various places, and means that we can (potentially)
    be more consistent across architectures about which monitor commands or
    debug abort printouts include FPU register contents and info about
    QEMU's condition-code optimisations.
    
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>

diff --git a/cpu-all.h b/cpu-all.h
index 74d3681..2b99682 100644
--- a/cpu-all.h
+++ b/cpu-all.h
@@ -356,6 +356,9 @@ CPUArchState *cpu_copy(CPUArchState *env);
 CPUArchState *qemu_get_cpu(int cpu);
 
 #define CPU_DUMP_CODE 0x00010000
+#define CPU_DUMP_FPU 0x00020000 /* dump FPU register state, not just integer */
+/* dump info about TCG QEMU's condition code optimization state */
+#define CPU_DUMP_CCOP 0x00040000
 
 void cpu_dump_state(CPUArchState *env, FILE *f, fprintf_function cpu_fprintf,
                     int flags);
diff --git a/cpu-exec.c b/cpu-exec.c
index 134b3c4..252da86 100644
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -552,7 +552,7 @@ int cpu_exec(CPUArchState *env)
 #if defined(TARGET_I386)
                     env->eflags = env->eflags | cpu_cc_compute_all(env, CC_OP)
                         | (DF & DF_MASK);
-                    log_cpu_state(env, X86_DUMP_CCOP);
+                    log_cpu_state(env, CPU_DUMP_CCOP);
                     env->eflags &= ~(DF_MASK | CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C);
 #elif defined(TARGET_M68K)
                     cpu_m68k_flush_flags(env, env->cc_op);
diff --git a/cpus.c b/cpus.c
index 4b726ef..5ba87fe 100644
--- a/cpus.c
+++ b/cpus.c
@@ -395,11 +395,7 @@ void hw_error(const char *fmt, ...)
     fprintf(stderr, "\n");
     for(env = first_cpu; env != NULL; env = env->next_cpu) {
         fprintf(stderr, "CPU #%d:\n", env->cpu_index);
-#ifdef TARGET_I386
-        cpu_dump_state(env, stderr, fprintf, X86_DUMP_FPU);
-#else
-        cpu_dump_state(env, stderr, fprintf, 0);
-#endif
+        cpu_dump_state(env, stderr, fprintf, CPU_DUMP_FPU);
     }
     va_end(ap);
     abort();
diff --git a/exec.c b/exec.c
index 1114a09..7899042 100644
--- a/exec.c
+++ b/exec.c
@@ -1742,20 +1742,12 @@ void cpu_abort(CPUArchState *env, const char *fmt, ...)
     fprintf(stderr, "qemu: fatal: ");
     vfprintf(stderr, fmt, ap);
     fprintf(stderr, "\n");
-#ifdef TARGET_I386
-    cpu_dump_state(env, stderr, fprintf, X86_DUMP_FPU | X86_DUMP_CCOP);
-#else
-    cpu_dump_state(env, stderr, fprintf, 0);
-#endif
+    cpu_dump_state(env, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
     if (qemu_log_enabled()) {
         qemu_log("qemu: fatal: ");
         qemu_log_vprintf(fmt, ap2);
         qemu_log("\n");
-#ifdef TARGET_I386
-        log_cpu_state(env, X86_DUMP_FPU | X86_DUMP_CCOP);
-#else
-        log_cpu_state(env, 0);
-#endif
+        log_cpu_state(env, CPU_DUMP_FPU | CPU_DUMP_CCOP);
         qemu_log_flush();
         qemu_log_close();
     }
diff --git a/monitor.c b/monitor.c
index a0e3ffb..131b325 100644
--- a/monitor.c
+++ b/monitor.c
@@ -898,13 +898,7 @@ static void do_info_registers(Monitor *mon)
 {
     CPUArchState *env;
     env = mon_get_cpu();
-#ifdef TARGET_I386
-    cpu_dump_state(env, (FILE *)mon, monitor_fprintf,
-                   X86_DUMP_FPU);
-#else
-    cpu_dump_state(env, (FILE *)mon, monitor_fprintf,
-                   0);
-#endif
+    cpu_dump_state(env, (FILE *)mon, monitor_fprintf, CPU_DUMP_FPU);
 }
 
 static void do_info_jit(Monitor *mon)
diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index bb1e44e..f3708e6 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -1759,7 +1759,7 @@ static void x86_cpu_reset(CPUState *s)
 
     if (qemu_loglevel_mask(CPU_LOG_RESET)) {
         qemu_log("CPU Reset (CPU %d)\n", env->cpu_index);
-        log_cpu_state(env, X86_DUMP_FPU | X86_DUMP_CCOP);
+        log_cpu_state(env, CPU_DUMP_FPU | CPU_DUMP_CCOP);
     }
 
     xcc->parent_reset(s);
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index e4a7d5b..871c270 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -995,10 +995,6 @@ void cpu_x86_update_cr4(CPUX86State *env, uint32_t new_cr4);
 void cpu_smm_update(CPUX86State *env);
 uint64_t cpu_get_tsc(CPUX86State *env);
 
-/* used to debug */
-#define X86_DUMP_FPU  0x0001 /* dump FPU state too */
-#define X86_DUMP_CCOP 0x0002 /* dump qemu flag cache */
-
 #define TARGET_PAGE_BITS 12
 
 #ifdef TARGET_X86_64
diff --git a/target-i386/helper.c b/target-i386/helper.c
index c635667..2ee7c6d 100644
--- a/target-i386/helper.c
+++ b/target-i386/helper.c
@@ -284,7 +284,7 @@ void cpu_dump_state(CPUX86State *env, FILE *f, fprintf_function cpu_fprintf,
         cpu_fprintf(f, "\nDR6=" TARGET_FMT_lx " DR7=" TARGET_FMT_lx "\n",
                     env->dr[6], env->dr[7]);
     }
-    if (flags & X86_DUMP_CCOP) {
+    if (flags & CPU_DUMP_CCOP) {
         if ((unsigned)env->cc_op < CC_OP_NB)
             snprintf(cc_op_name, sizeof(cc_op_name), "%s", cc_op_str[env->cc_op]);
         else
@@ -303,7 +303,7 @@ void cpu_dump_state(CPUX86State *env, FILE *f, fprintf_function cpu_fprintf,
         }
     }
     cpu_fprintf(f, "EFER=%016" PRIx64 "\n", env->efer);
-    if (flags & X86_DUMP_FPU) {
+    if (flags & CPU_DUMP_FPU) {
         int fptag;
         fptag = 0;
         for(i = 0; i < 8; i++) {
diff --git a/target-i386/seg_helper.c b/target-i386/seg_helper.c
index 5fff8d5..ff93374 100644
--- a/target-i386/seg_helper.c
+++ b/target-i386/seg_helper.c
@@ -31,7 +31,7 @@
 #ifdef DEBUG_PCALL
 # define LOG_PCALL(...) qemu_log_mask(CPU_LOG_PCALL, ## __VA_ARGS__)
 # define LOG_PCALL_STATE(env)                                  \
-    log_cpu_state_mask(CPU_LOG_PCALL, (env), X86_DUMP_CCOP)
+    log_cpu_state_mask(CPU_LOG_PCALL, (env), CPU_DUMP_CCOP)
 #else
 # define LOG_PCALL(...) do { } while (0)
 # define LOG_PCALL_STATE(env) do { } while (0)
@@ -1177,7 +1177,7 @@ static void do_interrupt_all(CPUX86State *env, int intno, int is_int,
                 qemu_log(" EAX=" TARGET_FMT_lx, EAX);
             }
             qemu_log("\n");
-            log_cpu_state(env, X86_DUMP_CCOP);
+            log_cpu_state(env, CPU_DUMP_CCOP);
 #if 0
             {
                 int i;
diff --git a/target-i386/smm_helper.c b/target-i386/smm_helper.c
index 8b04eb2..eea2fe9 100644
--- a/target-i386/smm_helper.c
+++ b/target-i386/smm_helper.c
@@ -47,7 +47,7 @@ void do_smm_enter(CPUX86State *env)
     int i, offset;
 
     qemu_log_mask(CPU_LOG_INT, "SMM: enter\n");
-    log_cpu_state_mask(CPU_LOG_INT, env, X86_DUMP_CCOP);
+    log_cpu_state_mask(CPU_LOG_INT, env, CPU_DUMP_CCOP);
 
     env->hflags |= HF_SMM_MASK;
     cpu_smm_update(env);
@@ -295,7 +295,7 @@ void helper_rsm(CPUX86State *env)
     cpu_smm_update(env);
 
     qemu_log_mask(CPU_LOG_INT, "SMM: after RSM\n");
-    log_cpu_state_mask(CPU_LOG_INT, env, X86_DUMP_CCOP);
+    log_cpu_state_mask(CPU_LOG_INT, env, CPU_DUMP_CCOP);
 }
 
 #endif /* !CONFIG_USER_ONLY */
commit 02cd521f6e207bd8843a7c1309f6d065cf01c6ee
Author: Stefan Weil <sw at weilnetz.de>
Date:   Thu Oct 4 22:49:43 2012 +0200

    versatilepb: Use symbolic indices for ARM PIC
    
    It is more readable, and all other code does it like that, too.
    
    Signed-off-by: Stefan Weil <sw at weilnetz.de>
    Reviewed-by: Peter Maydell <peter.maydell at linaro.org>
    Signed-off-by: Stefan Hajnoczi <stefanha at gmail.com>

diff --git a/hw/versatilepb.c b/hw/versatilepb.c
index 7a92034..b3f8077 100644
--- a/hw/versatilepb.c
+++ b/hw/versatilepb.c
@@ -211,7 +211,8 @@ static void versatile_init(ram_addr_t ram_size,
 
     cpu_pic = arm_pic_init_cpu(cpu);
     dev = sysbus_create_varargs("pl190", 0x10140000,
-                                cpu_pic[0], cpu_pic[1], NULL);
+                                cpu_pic[ARM_PIC_CPU_IRQ],
+                                cpu_pic[ARM_PIC_CPU_FIQ], NULL);
     for (n = 0; n < 32; n++) {
         pic[n] = qdev_get_gpio_in(dev, n);
     }
commit c36b7de64dfb106ee0384d2634fd4037218ffbca
Author: Eduardo Habkost <ehabkost at redhat.com>
Date:   Thu Oct 4 16:15:21 2012 -0300

    qdev: kill bogus comment
    
    When the DeviceInfo code was removed, the comment describing
    qdev_subclass_init() was left in the code by mistake. Remove it.
    
    Cc: qemu-trivial at nongnu.org
    Signed-off-by: Eduardo Habkost <ehabkost at redhat.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at gmail.com>

diff --git a/hw/qdev.c b/hw/qdev.c
index b5a52ac..a7270a5 100644
--- a/hw/qdev.c
+++ b/hw/qdev.c
@@ -34,7 +34,6 @@ int qdev_hotplug = 0;
 static bool qdev_hot_added = false;
 static bool qdev_hot_removed = false;
 
-/* Register a new device type.  */
 const VMStateDescription *qdev_get_vmsd(DeviceState *dev)
 {
     DeviceClass *dc = DEVICE_GET_CLASS(dev);
commit 8e7e2b14e7c86cdceb337cf7f5c4c351ab350775
Author: Stefan Weil <sw at weilnetz.de>
Date:   Wed Oct 3 23:11:02 2012 +0200

    qemu-barrier: Fix compiler version check for future gcc versions
    
    The current check will give a wrong result for gcc-5.x with x < 4.
    Using QEMU_GNUC_PREREQ is simpler and fixes that issue.
    
    Signed-off-by: Stefan Weil <sw at weilnetz.de>
    Signed-off-by: Stefan Hajnoczi <stefanha at gmail.com>

diff --git a/qemu-barrier.h b/qemu-barrier.h
index 7e11197..16f0942 100644
--- a/qemu-barrier.h
+++ b/qemu-barrier.h
@@ -19,7 +19,7 @@
  * mfence on 32 bit as well, e.g. if built with -march=pentium-m.
  * However, on i386, there seem to be known bugs as recently as 4.3.
  * */
-#if defined(__GNUC__) && __GNUC__ >= 4 && __GNUC_MINOR__ >= 4
+#if QEMU_GNUC_PREREQ(4, 4)
 #define smp_mb() __sync_synchronize()
 #else
 #define smp_mb() asm volatile("lock; addl $0,0(%%esp) " ::: "memory")
commit da665c99f1b5d3247f7f9f8a24f3eb1fda94e1e4
Author: Stefan Weil <sw at weilnetz.de>
Date:   Wed Oct 3 11:19:39 2012 +0200

    hw: Add missing 'static' attribute for QEMUMachine
    
    It was missing for leon3 and mips_fulong2e.
    
    Signed-off-by: Stefan Weil <sw at weilnetz.de>
    Reviewed-by: Andreas Färber <afaerber at suse.de>
    Signed-off-by: Stefan Hajnoczi <stefanha at gmail.com>

diff --git a/hw/leon3.c b/hw/leon3.c
index 878d3aa..7a9729d 100644
--- a/hw/leon3.c
+++ b/hw/leon3.c
@@ -210,7 +210,7 @@ static void leon3_generic_hw_init(ram_addr_t  ram_size,
     }
 }
 
-QEMUMachine leon3_generic_machine = {
+static QEMUMachine leon3_generic_machine = {
     .name     = "leon3_generic",
     .desc     = "Leon-3 generic",
     .init     = leon3_generic_hw_init,
diff --git a/hw/mips_fulong2e.c b/hw/mips_fulong2e.c
index 38e4b86..d4a8672 100644
--- a/hw/mips_fulong2e.c
+++ b/hw/mips_fulong2e.c
@@ -392,7 +392,7 @@ static void mips_fulong2e_init(ram_addr_t ram_size, const char *boot_device,
     network_init();
 }
 
-QEMUMachine mips_fulong2e_machine = {
+static QEMUMachine mips_fulong2e_machine = {
     .name = "fulong2e",
     .desc = "Fulong 2e mini pc",
     .init = mips_fulong2e_init,
commit 4d5b97da35ecbfdad6fe1cbe7fdd7d71ef735602
Author: Amos Kong <akong at redhat.com>
Date:   Sat Sep 29 13:36:21 2012 +0800

    cleanup useless return sentence
    
    This patch cleans up return sentences in the end of void functions.
    
    Reported-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Amos Kong <akong at redhat.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at gmail.com>

diff --git a/block/qed-table.c b/block/qed-table.c
index ce07b05..de845ec 100644
--- a/block/qed-table.c
+++ b/block/qed-table.c
@@ -103,7 +103,6 @@ static void qed_write_table_cb(void *opaque, int ret)
 out:
     qemu_vfree(write_table_cb->table);
     gencb_complete(&write_table_cb->gencb, ret);
-    return;
 }
 
 /**
diff --git a/blockdev.c b/blockdev.c
index 5f18dfa..99828ad 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -830,7 +830,6 @@ exit:
     QSIMPLEQ_FOREACH_SAFE(states, &snap_bdrv_states, entry, next) {
         g_free(states);
     }
-    return;
 }
 
 
diff --git a/hw/9pfs/virtio-9p.c b/hw/9pfs/virtio-9p.c
index 4b52540..8b9cdc9 100644
--- a/hw/9pfs/virtio-9p.c
+++ b/hw/9pfs/virtio-9p.c
@@ -505,7 +505,6 @@ static void virtfs_reset(V9fsPDU *pdu)
         error_report("9pfs:%s: One or more uncluncked fids "
                      "found during reset", __func__);
     }
-    return;
 }
 
 #define P9_QID_TYPE_DIR         0x80
@@ -934,7 +933,6 @@ static void v9fs_version(void *opaque)
 out:
     complete_pdu(s, pdu, offset);
     v9fs_string_free(&version);
-    return;
 }
 
 static void v9fs_attach(void *opaque)
@@ -1314,7 +1312,6 @@ out_nofid:
         g_free(wnames);
         g_free(qids);
     }
-    return;
 }
 
 static int32_t get_iounit(V9fsPDU *pdu, V9fsPath *path)
@@ -2257,7 +2254,6 @@ static void v9fs_flush(void *opaque)
         free_pdu(pdu->s, cancel_pdu);
     }
     complete_pdu(s, pdu, 7);
-    return;
 }
 
 static void v9fs_link(void *opaque)
@@ -2763,7 +2759,6 @@ out:
     put_fid(pdu, fidp);
 out_nofid:
     complete_pdu(s, pdu, retval);
-    return;
 }
 
 static void v9fs_mknod(void *opaque)
diff --git a/hw/blizzard.c b/hw/blizzard.c
index d1c9d81..06e19b3 100644
--- a/hw/blizzard.c
+++ b/hw/blizzard.c
@@ -878,8 +878,6 @@ void s1d13745_write_block(void *opaque, int dc,
         len -= 2;
         buf += 2;
     }
-
-    return;
 }
 
 static void blizzard_update_display(void *opaque)
diff --git a/hw/bt-hci.c b/hw/bt-hci.c
index a3a7fb4..f19299a 100644
--- a/hw/bt-hci.c
+++ b/hw/bt-hci.c
@@ -786,7 +786,6 @@ static void bt_hci_lmp_connection_request(struct bt_link_s *link)
     memcpy(&params.dev_class, &link->host->class, sizeof(params.dev_class));
     params.link_type	= ACL_LINK;
     bt_hci_event(hci, EVT_CONN_REQUEST, &params, EVT_CONN_REQUEST_SIZE);
-    return;
 }
 
 static void bt_hci_conn_accept_timeout(void *opaque)
diff --git a/hw/device-hotplug.c b/hw/device-hotplug.c
index 2bdc615..eec0fe3 100644
--- a/hw/device-hotplug.c
+++ b/hw/device-hotplug.c
@@ -89,5 +89,4 @@ err:
     if (dinfo) {
         drive_put_ref(dinfo);
     }
-    return;
 }
diff --git a/hw/exynos4210_combiner.c b/hw/exynos4210_combiner.c
index 80af22c..60b33c7 100644
--- a/hw/exynos4210_combiner.c
+++ b/hw/exynos4210_combiner.c
@@ -347,8 +347,6 @@ static void exynos4210_combiner_write(void *opaque, target_phys_addr_t offset,
                 TARGET_FMT_plx "\n", offset);
         break;
     }
-
-    return;
 }
 
 /* Get combiner group and bit from irq number */
@@ -380,8 +378,6 @@ static void exynos4210_combiner_handler(void *opaque, int irq, int level)
     }
 
     exynos4210_combiner_update(s, group_n);
-
-    return;
 }
 
 static void exynos4210_combiner_reset(DeviceState *d)
diff --git a/hw/exynos4210_gic.c b/hw/exynos4210_gic.c
index 7d03dd9..4fea098 100644
--- a/hw/exynos4210_gic.c
+++ b/hw/exynos4210_gic.c
@@ -193,8 +193,6 @@ static void exynos4210_irq_handler(void *opaque, int irq, int level)
 
     /* Bypass */
     qemu_set_irq(s->board_irqs[irq], level);
-
-    return;
 }
 
 /*
@@ -410,8 +408,6 @@ static void exynos4210_irq_gate_handler(void *opaque, int irq, int level)
     }
 
     qemu_irq_lower(s->out);
-
-    return;
 }
 
 static void exynos4210_irq_gate_reset(DeviceState *d)
diff --git a/hw/exynos4210_mct.c b/hw/exynos4210_mct.c
index 7a22b1f..6f94ce2 100644
--- a/hw/exynos4210_mct.c
+++ b/hw/exynos4210_mct.c
@@ -574,8 +574,6 @@ static void exynos4210_gfrc_event(void *opaque)
     exynos4210_gfrc_set_count(&s->g_timer, distance);
 
     exynos4210_gfrc_start(&s->g_timer);
-
-    return;
 }
 
 /*
diff --git a/hw/fdc.c b/hw/fdc.c
index 43b0f20..25a49e3 100644
--- a/hw/fdc.c
+++ b/hw/fdc.c
@@ -1286,8 +1286,6 @@ static void fdctrl_start_transfer(FDCtrl *fdctrl, int direction)
         fdctrl->msr |= FD_MSR_DIO;
     /* IO based transfer: calculate len */
     fdctrl_raise_irq(fdctrl, FD_SR0_SEEK);
-
-    return;
 }
 
 /* Prepare a transfer of deleted data */
diff --git a/hw/framebuffer.c b/hw/framebuffer.c
index 85a00a5..27fa6f5 100644
--- a/hw/framebuffer.c
+++ b/hw/framebuffer.c
@@ -107,5 +107,4 @@ void framebuffer_update_display(
                               DIRTY_MEMORY_VGA);
     *first_row = first;
     *last_row = last;
-    return;
 }
diff --git a/hw/ide/macio.c b/hw/ide/macio.c
index 848cb31..f228725 100644
--- a/hw/ide/macio.c
+++ b/hw/ide/macio.c
@@ -89,7 +89,6 @@ static void pmac_ide_atapi_transfer_cb(void *opaque, int ret)
 done:
     bdrv_acct_done(s->bs, &s->acct);
     io->dma_end(opaque);
-    return;
 }
 
 static void pmac_ide_transfer_cb(void *opaque, int ret)
diff --git a/hw/ivshmem.c b/hw/ivshmem.c
index 59f1aa4..5c4ccb8 100644
--- a/hw/ivshmem.c
+++ b/hw/ivshmem.c
@@ -149,7 +149,6 @@ static void ivshmem_IntrStatus_write(IVShmemState *s, uint32_t val)
     s->intrstatus = val;
 
     ivshmem_update_irq(s, val);
-    return;
 }
 
 static uint32_t ivshmem_IntrStatus_read(IVShmemState *s)
@@ -510,8 +509,6 @@ static void ivshmem_read(void *opaque, const uint8_t * buf, int flags)
     if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD)) {
         ivshmem_add_eventfd(s, incoming_posn, guest_max_eventfd);
     }
-
-    return;
 }
 
 /* Select the MSI-X vectors used by device.
@@ -536,7 +533,6 @@ static void ivshmem_reset(DeviceState *d)
 
     s->intrstatus = 0;
     ivshmem_use_msix(s);
-    return;
 }
 
 static uint64_t ivshmem_get_size(IVShmemState * s) {
diff --git a/hw/megasas.c b/hw/megasas.c
index c728aea..0e57740 100644
--- a/hw/megasas.c
+++ b/hw/megasas.c
@@ -652,7 +652,6 @@ static void megasas_finish_dcmd(MegasasCmd *cmd, uint32_t iov_size)
         }
     }
     cmd->iov_size = 0;
-    return;
 }
 
 static int megasas_ctrl_get_info(MegasasState *s, MegasasCmd *cmd)
diff --git a/hw/msix.c b/hw/msix.c
index d812094..b623cb5 100644
--- a/hw/msix.c
+++ b/hw/msix.c
@@ -366,7 +366,6 @@ void msix_uninit(PCIDevice *dev, MemoryRegion *table_bar, MemoryRegion *pba_bar)
     g_free(dev->msix_entry_used);
     dev->msix_entry_used = NULL;
     dev->cap_present &= ~QEMU_PCI_CAP_MSIX;
-    return;
 }
 
 void msix_uninit_exclusive_bar(PCIDevice *dev)
diff --git a/hw/pflash_cfi01.c b/hw/pflash_cfi01.c
index 855890d..3b437da 100644
--- a/hw/pflash_cfi01.c
+++ b/hw/pflash_cfi01.c
@@ -442,7 +442,6 @@ static void pflash_write(pflash_t *pfl, target_phys_addr_t offset,
     pfl->bypass = 0;
     pfl->wcycle = 0;
     pfl->cmd = 0;
-    return;
 }
 
 
diff --git a/hw/pflash_cfi02.c b/hw/pflash_cfi02.c
index 8cb1549..39337ec 100644
--- a/hw/pflash_cfi02.c
+++ b/hw/pflash_cfi02.c
@@ -474,7 +474,6 @@ static void pflash_write (pflash_t *pfl, target_phys_addr_t offset,
  do_bypass:
     pfl->wcycle = 2;
     pfl->cmd = 0;
-    return;
 }
 
 
diff --git a/hw/pxa2xx_keypad.c b/hw/pxa2xx_keypad.c
index 59db025..1a997c9 100644
--- a/hw/pxa2xx_keypad.c
+++ b/hw/pxa2xx_keypad.c
@@ -172,7 +172,6 @@ static void pxa27x_keyboard_event (PXA2xxKeyPadState *kp, int keycode)
         kp->kpc |= KPC_MI;
         qemu_irq_raise(kp->irq);
     }
-    return;
 }
 
 static uint64_t pxa2xx_keypad_read(void *opaque, target_phys_addr_t offset,
diff --git a/hw/sb16.c b/hw/sb16.c
index c81455d..523ab0d 100644
--- a/hw/sb16.c
+++ b/hw/sb16.c
@@ -822,7 +822,6 @@ static void complete (SB16State *s)
 
     ldebug ("\n");
     s->cmd = -1;
-    return;
 }
 
 static void legacy_reset (SB16State *s)
diff --git a/hw/scsi-disk.c b/hw/scsi-disk.c
index 99bb02e..1b0afa6 100644
--- a/hw/scsi-disk.c
+++ b/hw/scsi-disk.c
@@ -1437,7 +1437,6 @@ invalid_param_len:
 
 invalid_field:
     scsi_check_condition(r, SENSE_CODE(INVALID_FIELD));
-    return;
 }
 
 static inline bool check_lba_range(SCSIDiskState *s,
@@ -1535,7 +1534,6 @@ static void scsi_disk_emulate_unmap(SCSIDiskReq *r, uint8_t *inbuf)
 
 invalid_param_len:
     scsi_check_condition(r, SENSE_CODE(INVALID_PARAM_LEN));
-    return;
 }
 
 static void scsi_disk_emulate_write_data(SCSIRequest *req)
diff --git a/hw/tcx.c b/hw/tcx.c
index 93994d6..2db2db1 100644
--- a/hw/tcx.c
+++ b/hw/tcx.c
@@ -472,7 +472,6 @@ static void tcx_dac_writel(void *opaque, target_phys_addr_t addr, uint64_t val,
     default:
         break;
     }
-    return;
 }
 
 static const MemoryRegionOps tcx_dac_ops = {
diff --git a/hw/usb/dev-uas.c b/hw/usb/dev-uas.c
index 5a0057a..4389380 100644
--- a/hw/usb/dev-uas.c
+++ b/hw/usb/dev-uas.c
@@ -576,7 +576,6 @@ bad_target:
      */
     usb_uas_queue_response(uas, req->tag, UAS_RC_INVALID_INFO_UNIT, 0);
     g_free(req);
-    return;
 }
 
 static void usb_uas_task(UASDevice *uas, uas_ui *ui)
@@ -640,7 +639,6 @@ bad_target:
 
 incorrect_lun:
     usb_uas_queue_response(uas, tag, UAS_RC_INCORRECT_LUN, 0);
-    return;
 }
 
 static int usb_uas_handle_data(USBDevice *dev, USBPacket *p)
diff --git a/hw/usb/hcd-ohci.c b/hw/usb/hcd-ohci.c
index c36184a..59c7055 100644
--- a/hw/usb/hcd-ohci.c
+++ b/hw/usb/hcd-ohci.c
@@ -1470,8 +1470,6 @@ static void ohci_port_set_status(OHCIState *ohci, int portnum, uint32_t val)
 
     if (old_state != port->ctrl)
         ohci_set_interrupt(ohci, OHCI_INTR_RHSC);
-
-    return;
 }
 
 static uint64_t ohci_mem_read(void *opaque,
diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c
index e79a872..3a41b06 100644
--- a/hw/usb/hcd-xhci.c
+++ b/hw/usb/hcd-xhci.c
@@ -807,7 +807,6 @@ static void xhci_events_update(XHCIState *xhci, int v)
         DPRINTF("xhci_events_update(): event ring no longer full\n");
         intr->er_full = 0;
     }
-    return;
 }
 
 static void xhci_event(XHCIState *xhci, XHCIEvent *event, int v)
diff --git a/hw/xen_domainbuild.c b/hw/xen_domainbuild.c
index a6a12e5..db14974 100644
--- a/hw/xen_domainbuild.c
+++ b/hw/xen_domainbuild.c
@@ -153,7 +153,6 @@ static void xen_domain_poll(void *opaque)
 
 quit:
     qemu_system_shutdown_request();
-    return;
 }
 
 static int xen_domain_watcher(void)
diff --git a/libcacard/vcard.c b/libcacard/vcard.c
index b02556e..539177b 100644
--- a/libcacard/vcard.c
+++ b/libcacard/vcard.c
@@ -200,7 +200,6 @@ vcard_free(VCard *vcard)
     }
     vcard_buffer_response_delete(vcard->vcard_buffer_response);
     g_free(vcard);
-    return;
 }
 
 void
diff --git a/libcacard/vcard_emul_nss.c b/libcacard/vcard_emul_nss.c
index 802cae3..b861c5a 100644
--- a/libcacard/vcard_emul_nss.c
+++ b/libcacard/vcard_emul_nss.c
@@ -168,7 +168,6 @@ vcard_emul_delete_key(VCardKey *key)
     if (key->slot) {
         PK11_FreeSlot(key->slot);
     }
-    return;
 }
 
 /*
@@ -418,7 +417,6 @@ vcard_emul_reset(VCard *card, VCardPower power)
     /* TODO: we may also need to send insertion/removal events? */
     slot = vcard_emul_card_get_slot(card);
     PK11_Logout(slot); /* NOTE: ignoring SECStatus return value */
-    return;
 }
 
 
@@ -535,7 +533,6 @@ vcard_emul_get_atr(VCard *card, unsigned char *atr, int *atr_len)
 
     memcpy(atr, nss_atr, len);
     *atr_len = len;
-    return;
 }
 
 /*
diff --git a/libcacard/vreader.c b/libcacard/vreader.c
index ec126df..96d2407 100644
--- a/libcacard/vreader.c
+++ b/libcacard/vreader.c
@@ -93,7 +93,6 @@ vreader_free(VReader *reader)
         reader->reader_private_free(reader->reader_private);
     }
     g_free(reader);
-    return;
 }
 
 static VCard *
diff --git a/linux-user/signal.c b/linux-user/signal.c
index 7869147..15bc4e8 100644
--- a/linux-user/signal.c
+++ b/linux-user/signal.c
@@ -2762,7 +2762,6 @@ static void setup_frame(int sig, struct target_sigaction * ka,
 give_sigsegv:
     unlock_user_struct(frame, frame_addr, 1);
     force_sig(TARGET_SIGSEGV/*, current*/);
-    return;
 }
 
 long do_sigreturn(CPUMIPSState *regs)
@@ -2871,7 +2870,6 @@ static void setup_rt_frame(int sig, struct target_sigaction *ka,
 give_sigsegv:
     unlock_user_struct(frame, frame_addr, 1);
     force_sig(TARGET_SIGSEGV/*, current*/);
-    return;
 }
 
 long do_rt_sigreturn(CPUMIPSState *env)
diff --git a/os-posix.c b/os-posix.c
index eabccb8..178e7ca 100644
--- a/os-posix.c
+++ b/os-posix.c
@@ -194,7 +194,6 @@ void os_parse_cmd_args(int index, const char *optarg)
         break;
 #endif
     }
-    return;
 }
 
 static void change_process_uid(void)
diff --git a/qemu-sockets.c b/qemu-sockets.c
index 0f59490..2b1ed2f 100644
--- a/qemu-sockets.c
+++ b/qemu-sockets.c
@@ -266,7 +266,6 @@ static void wait_for_connect(void *opaque)
         s->callback(s->fd, s->opaque);
     }
     g_free(s);
-    return;
 }
 
 static int inet_connect_addr(struct addrinfo *addr, bool *in_progress,
diff --git a/qga/commands-posix.c b/qga/commands-posix.c
index ce90421..b0eec74 100644
--- a/qga/commands-posix.c
+++ b/qga/commands-posix.c
@@ -988,8 +988,6 @@ int64_t qmp_guest_fsfreeze_thaw(Error **err)
 void qmp_guest_fstrim(bool has_minimum, int64_t minimum, Error **err)
 {
     error_set(err, QERR_UNSUPPORTED);
-
-    return;
 }
 #endif
 
diff --git a/qga/commands-win32.c b/qga/commands-win32.c
index 54bc546..5bd8fb2 100644
--- a/qga/commands-win32.c
+++ b/qga/commands-win32.c
@@ -180,8 +180,6 @@ int64_t qmp_guest_fsfreeze_thaw(Error **err)
 void qmp_guest_fstrim(bool has_minimum, int64_t minimum, Error **err)
 {
     error_set(err, QERR_UNSUPPORTED);
-
-    return;
 }
 
 typedef enum {
diff --git a/savevm.c b/savevm.c
index c7fe283..31fd2e0 100644
--- a/savevm.c
+++ b/savevm.c
@@ -2201,7 +2201,6 @@ void qmp_xen_save_devices_state(const char *filename, Error **errp)
  the_end:
     if (saved_vm_running)
         vm_start();
-    return;
 }
 
 int load_vmstate(const char *name)
diff --git a/slirp/ip_input.c b/slirp/ip_input.c
index ce24faf..6f4cff8 100644
--- a/slirp/ip_input.c
+++ b/slirp/ip_input.c
@@ -213,7 +213,6 @@ ip_input(struct mbuf *m)
 	return;
 bad:
 	m_free(m);
-	return;
 }
 
 #define iptofrag(P) ((struct ipasfrag *)(((char*)(P)) - sizeof(struct qlink)))
diff --git a/slirp/tcp_input.c b/slirp/tcp_input.c
index 942aaf4..6440eae 100644
--- a/slirp/tcp_input.c
+++ b/slirp/tcp_input.c
@@ -1281,8 +1281,6 @@ drop:
 	 * Drop space held by incoming segment and return.
 	 */
 	m_free(m);
-
-	return;
 }
 
 static void
diff --git a/slirp/udp.c b/slirp/udp.c
index ced5096..9286cb7 100644
--- a/slirp/udp.c
+++ b/slirp/udp.c
@@ -231,7 +231,6 @@ udp_input(register struct mbuf *m, int iphlen)
 	return;
 bad:
 	m_free(m);
-	return;
 }
 
 int udp_output2(struct socket *so, struct mbuf *m,
diff --git a/target-unicore32/translate.c b/target-unicore32/translate.c
index 36f4f2f..c3cdafa 100644
--- a/target-unicore32/translate.c
+++ b/target-unicore32/translate.c
@@ -1932,8 +1932,6 @@ static void disas_uc32_insn(CPUUniCore32State *env, DisasContext *s)
         }
         ILLEGAL;
     }
-
-    return;
 }
 
 /* generate intermediate code in gen_opc_buf and gen_opparam_buf for
diff --git a/ui/vnc-auth-sasl.c b/ui/vnc-auth-sasl.c
index 8fba770..9a0ad30 100644
--- a/ui/vnc-auth-sasl.c
+++ b/ui/vnc-auth-sasl.c
@@ -619,7 +619,6 @@ void start_auth_sasl(VncState *vs)
 
  authabort:
     vnc_client_error(vs);
-    return;
 }
 
 
diff --git a/vl.c b/vl.c
index 8d305ca..29e05a3 100644
--- a/vl.c
+++ b/vl.c
@@ -1023,7 +1023,6 @@ static void numa_add(const char *optarg)
         }
         nb_numa_nodes++;
     }
-    return;
 }
 
 static void smp_parse(const char *optarg)
diff --git a/xen-all.c b/xen-all.c
index e6308be..bcb7ef7 100644
--- a/xen-all.c
+++ b/xen-all.c
@@ -1092,7 +1092,6 @@ static void xen_read_physmap(XenIOState *state)
         QLIST_INSERT_HEAD(&state->physmap, physmap, list);
     }
     free(entries);
-    return;
 }
 
 int xen_hvm_init(void)
commit b8994faf2a8d6fc791669bb432bdb3a7a1711013
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Mon Oct 1 14:22:08 2012 +0200

    rtc: implement century byte
    
    Implement the century byte in the RTC emulation, and test that it works.
    This leads to some annoying compatibility code because we need to treat
    a value of 2000 for the base_year property as "use the century byte
    properly" (which would be a value of 0).
    
    The century byte will now be always-zero, rather than always-20,
    for the MIPS Magnum machine whose base_year is 1980.  Commit 42fc73a
    (Support epoch of 1980 in RTC emulation for MIPS Magnum, 2009-01-24)
    correctly said:
    
        With an epoch of 1980 and a year of 2009, one could argue that [the
        century byte] should hold either 0, 1, 19 or 20.  NT 3.50 on MIPS
        does not read the century byte.
    
    so I picked the simplest and most sensible implementation which is to
    return 0 for 1980-2079, 1 for 2080-2179 and so on.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Anthony Liguori <aliguori at us.ibm.com>

diff --git a/hw/mc146818rtc.c b/hw/mc146818rtc.c
index a7d20d5..332a77d 100644
--- a/hw/mc146818rtc.c
+++ b/hw/mc146818rtc.c
@@ -519,7 +519,9 @@ static void rtc_get_time(RTCState *s, struct tm *tm)
     tm->tm_wday = rtc_from_bcd(s, s->cmos_data[RTC_DAY_OF_WEEK]) - 1;
     tm->tm_mday = rtc_from_bcd(s, s->cmos_data[RTC_DAY_OF_MONTH]);
     tm->tm_mon = rtc_from_bcd(s, s->cmos_data[RTC_MONTH]) - 1;
-    tm->tm_year = rtc_from_bcd(s, s->cmos_data[RTC_YEAR]) + s->base_year - 1900;
+    tm->tm_year =
+        rtc_from_bcd(s, s->cmos_data[RTC_YEAR]) + s->base_year +
+        rtc_from_bcd(s, s->cmos_data[RTC_CENTURY]) * 100 - 1900;
 }
 
 static void rtc_set_time(RTCState *s)
@@ -552,10 +554,9 @@ static void rtc_set_cmos(RTCState *s, const struct tm *tm)
     s->cmos_data[RTC_DAY_OF_WEEK] = rtc_to_bcd(s, tm->tm_wday + 1);
     s->cmos_data[RTC_DAY_OF_MONTH] = rtc_to_bcd(s, tm->tm_mday);
     s->cmos_data[RTC_MONTH] = rtc_to_bcd(s, tm->tm_mon + 1);
-    year = (tm->tm_year - s->base_year) % 100;
-    if (year < 0)
-        year += 100;
-    s->cmos_data[RTC_YEAR] = rtc_to_bcd(s, year);
+    year = tm->tm_year + 1900 - s->base_year;
+    s->cmos_data[RTC_YEAR] = rtc_to_bcd(s, year % 100);
+    s->cmos_data[RTC_CENTURY] = rtc_to_bcd(s, year / 100);
 }
 
 static void rtc_update_time(RTCState *s)
@@ -673,7 +674,6 @@ static void rtc_set_date_from_host(ISADevice *dev)
 {
     RTCState *s = DO_UPCAST(RTCState, dev, dev);
     struct tm tm;
-    int val;
 
     qemu_get_timedate(&tm, 0);
 
@@ -683,9 +683,6 @@ static void rtc_set_date_from_host(ISADevice *dev)
 
     /* set the CMOS date */
     rtc_set_cmos(s, &tm);
-
-    val = rtc_to_bcd(s, (tm.tm_year / 100) + 19);
-    rtc_set_memory(dev, RTC_CENTURY, val);
 }
 
 static int rtc_post_load(void *opaque, int version_id)
@@ -810,6 +807,18 @@ static int rtc_initfn(ISADevice *dev)
     s->cmos_data[RTC_REG_C] = 0x00;
     s->cmos_data[RTC_REG_D] = 0x80;
 
+    /* This is for historical reasons.  The default base year qdev property
+     * was set to 2000 for most machine types before the century byte was
+     * implemented.
+     *
+     * This if statement means that the century byte will be always 0
+     * (at least until 2079...) for base_year = 1980, but will be set
+     * correctly for base_year = 2000.
+     */
+    if (s->base_year == 2000) {
+        s->base_year = 0;
+    }
+
     rtc_set_date_from_host(dev);
 
 #ifdef TARGET_I386
diff --git a/tests/rtc-test.c b/tests/rtc-test.c
index 2b9aa63..7fdc94a 100644
--- a/tests/rtc-test.c
+++ b/tests/rtc-test.c
@@ -179,12 +179,13 @@ static void check_time(int wiggle)
 
 static int wiggle = 2;
 
-static void set_year(void)
+static void set_year_20xx(void)
 {
     /* Set BCD mode */
     cmos_write(RTC_REG_B, cmos_read(RTC_REG_B) & ~REG_B_DM);
     cmos_write(RTC_REG_A, 0x76);
     cmos_write(RTC_YEAR, 0x11);
+    cmos_write(RTC_CENTURY, 0x20);
     cmos_write(RTC_MONTH, 0x02);
     cmos_write(RTC_DAY_OF_MONTH, 0x02);
     cmos_write(RTC_HOURS, 0x02);
@@ -198,6 +199,7 @@ static void set_year(void)
     g_assert_cmpint(cmos_read(RTC_DAY_OF_MONTH), ==, 0x02);
     g_assert_cmpint(cmos_read(RTC_MONTH), ==, 0x02);
     g_assert_cmpint(cmos_read(RTC_YEAR), ==, 0x11);
+    g_assert_cmpint(cmos_read(RTC_CENTURY), ==, 0x20);
 
     /* Set a date in 2080 to ensure there is no year-2038 overflow.  */
     cmos_write(RTC_REG_A, 0x76);
@@ -210,6 +212,7 @@ static void set_year(void)
     g_assert_cmpint(cmos_read(RTC_DAY_OF_MONTH), ==, 0x02);
     g_assert_cmpint(cmos_read(RTC_MONTH), ==, 0x02);
     g_assert_cmpint(cmos_read(RTC_YEAR), ==, 0x80);
+    g_assert_cmpint(cmos_read(RTC_CENTURY), ==, 0x20);
 
     cmos_write(RTC_REG_A, 0x76);
     cmos_write(RTC_YEAR, 0x11);
@@ -221,6 +224,30 @@ static void set_year(void)
     g_assert_cmpint(cmos_read(RTC_DAY_OF_MONTH), ==, 0x02);
     g_assert_cmpint(cmos_read(RTC_MONTH), ==, 0x02);
     g_assert_cmpint(cmos_read(RTC_YEAR), ==, 0x11);
+    g_assert_cmpint(cmos_read(RTC_CENTURY), ==, 0x20);
+}
+
+static void set_year_1980(void)
+{
+    /* Set BCD mode */
+    cmos_write(RTC_REG_B, cmos_read(RTC_REG_B) & ~REG_B_DM);
+    cmos_write(RTC_REG_A, 0x76);
+    cmos_write(RTC_YEAR, 0x80);
+    cmos_write(RTC_CENTURY, 0x19);
+    cmos_write(RTC_MONTH, 0x02);
+    cmos_write(RTC_DAY_OF_MONTH, 0x02);
+    cmos_write(RTC_HOURS, 0x02);
+    cmos_write(RTC_MINUTES, 0x04);
+    cmos_write(RTC_SECONDS, 0x58);
+    cmos_write(RTC_REG_A, 0x26);
+
+    g_assert_cmpint(cmos_read(RTC_HOURS), ==, 0x02);
+    g_assert_cmpint(cmos_read(RTC_MINUTES), ==, 0x04);
+    g_assert_cmpint(cmos_read(RTC_SECONDS), >=, 0x58);
+    g_assert_cmpint(cmos_read(RTC_DAY_OF_MONTH), ==, 0x02);
+    g_assert_cmpint(cmos_read(RTC_MONTH), ==, 0x02);
+    g_assert_cmpint(cmos_read(RTC_YEAR), ==, 0x80);
+    g_assert_cmpint(cmos_read(RTC_CENTURY), ==, 0x19);
 }
 
 static void bcd_check_time(void)
@@ -313,7 +340,8 @@ int main(int argc, char **argv)
     qtest_add_func("/rtc/bcd/check-time", bcd_check_time);
     qtest_add_func("/rtc/dec/check-time", dec_check_time);
     qtest_add_func("/rtc/alarm-time", alarm_time);
-    qtest_add_func("/rtc/set-year", set_year);
+    qtest_add_func("/rtc/set-year/20xx", set_year_20xx);
+    qtest_add_func("/rtc/set-year/1980", set_year_1980);
     qtest_add_func("/rtc/fuzz-registers", fuzz_registers);
     ret = g_test_run();
 
commit e67edb943f0c812530aaae2491da56f9542f928b
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Mon Oct 1 14:22:07 2012 +0200

    rtc: map CMOS index 0x37 to 0x32 on read and writes
    
    QEMU's attempt to implement the century byte cover two possible places
    for the byte.  A common one on modern chipsets is 0x32, but QEMU also
    stores the value in 0x37 (apparently for IBM PS/2 compatibility---it's
    only been 25 years).  To simplify the implementation of the century
    byte, store it only at 0x32 but remap transparently 0x37 to 0x32 when
    reading and writing from CMOS.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Anthony Liguori <aliguori at us.ibm.com>

diff --git a/hw/mc146818rtc.c b/hw/mc146818rtc.c
index d63554f..a7d20d5 100644
--- a/hw/mc146818rtc.c
+++ b/hw/mc146818rtc.c
@@ -399,6 +399,10 @@ static void cmos_ioport_write(void *opaque, uint32_t addr, uint32_t data)
             s->cmos_data[s->cmos_index] = data;
             check_update_timer(s);
             break;
+	case RTC_IBM_PS2_CENTURY_BYTE:
+            s->cmos_index = RTC_CENTURY;
+            /* fall through */
+        case RTC_CENTURY:
         case RTC_SECONDS:
         case RTC_MINUTES:
         case RTC_HOURS:
@@ -598,6 +602,10 @@ static uint32_t cmos_ioport_read(void *opaque, uint32_t addr)
         return 0xff;
     } else {
         switch(s->cmos_index) {
+	case RTC_IBM_PS2_CENTURY_BYTE:
+            s->cmos_index = RTC_CENTURY;
+            /* fall through */
+        case RTC_CENTURY:
         case RTC_SECONDS:
         case RTC_MINUTES:
         case RTC_HOURS:
@@ -661,10 +669,6 @@ void rtc_set_memory(ISADevice *dev, int addr, int val)
         s->cmos_data[addr] = val;
 }
 
-/* PC cmos mappings */
-#define REG_IBM_CENTURY_BYTE        0x32
-#define REG_IBM_PS2_CENTURY_BYTE    0x37
-
 static void rtc_set_date_from_host(ISADevice *dev)
 {
     RTCState *s = DO_UPCAST(RTCState, dev, dev);
@@ -681,8 +685,7 @@ static void rtc_set_date_from_host(ISADevice *dev)
     rtc_set_cmos(s, &tm);
 
     val = rtc_to_bcd(s, (tm.tm_year / 100) + 19);
-    rtc_set_memory(dev, REG_IBM_CENTURY_BYTE, val);
-    rtc_set_memory(dev, REG_IBM_PS2_CENTURY_BYTE, val);
+    rtc_set_memory(dev, RTC_CENTURY, val);
 }
 
 static int rtc_post_load(void *opaque, int version_id)
diff --git a/hw/mc146818rtc_regs.h b/hw/mc146818rtc_regs.h
index fc10076..ccdee42 100644
--- a/hw/mc146818rtc_regs.h
+++ b/hw/mc146818rtc_regs.h
@@ -44,6 +44,10 @@
 #define RTC_REG_C               12
 #define RTC_REG_D               13
 
+/* PC cmos mappings */
+#define RTC_CENTURY              0x32
+#define RTC_IBM_PS2_CENTURY_BYTE 0x37
+
 #define REG_A_UIP 0x80
 
 #define REG_B_SET  0x80
commit b6db4aca20e9af4f62c9c9e08b9b9672a6ed3390
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Mon Oct 1 14:22:06 2012 +0200

    rtc: fix overflow in mktimegm
    
    When setting a date in 1980, Linux is actually disregarding the century
    byte and setting the year to 2080.  This causes a year-2038 overflow
    in mktimegm.  Fix this by doing the days-to-seconds computation in
    64-bit math.
    
    Reported-by: Lucas Meneghel Rodrigues <lookkas at gmail.com>
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Anthony Liguori <aliguori at us.ibm.com>

diff --git a/cutils.c b/cutils.c
index 8ef648f..8edd8fa 100644
--- a/cutils.c
+++ b/cutils.c
@@ -115,7 +115,7 @@ time_t mktimegm(struct tm *tm)
         m += 12;
         y--;
     }
-    t = 86400 * (d + (153 * m - 457) / 5 + 365 * y + y / 4 - y / 100 + 
+    t = 86400ULL * (d + (153 * m - 457) / 5 + 365 * y + y / 4 - y / 100 + 
                  y / 400 - 719469);
     t += 3600 * tm->tm_hour + 60 * tm->tm_min + tm->tm_sec;
     return t;
diff --git a/tests/rtc-test.c b/tests/rtc-test.c
index f23ac3a..2b9aa63 100644
--- a/tests/rtc-test.c
+++ b/tests/rtc-test.c
@@ -179,6 +179,50 @@ static void check_time(int wiggle)
 
 static int wiggle = 2;
 
+static void set_year(void)
+{
+    /* Set BCD mode */
+    cmos_write(RTC_REG_B, cmos_read(RTC_REG_B) & ~REG_B_DM);
+    cmos_write(RTC_REG_A, 0x76);
+    cmos_write(RTC_YEAR, 0x11);
+    cmos_write(RTC_MONTH, 0x02);
+    cmos_write(RTC_DAY_OF_MONTH, 0x02);
+    cmos_write(RTC_HOURS, 0x02);
+    cmos_write(RTC_MINUTES, 0x04);
+    cmos_write(RTC_SECONDS, 0x58);
+    cmos_write(RTC_REG_A, 0x26);
+
+    g_assert_cmpint(cmos_read(RTC_HOURS), ==, 0x02);
+    g_assert_cmpint(cmos_read(RTC_MINUTES), ==, 0x04);
+    g_assert_cmpint(cmos_read(RTC_SECONDS), >=, 0x58);
+    g_assert_cmpint(cmos_read(RTC_DAY_OF_MONTH), ==, 0x02);
+    g_assert_cmpint(cmos_read(RTC_MONTH), ==, 0x02);
+    g_assert_cmpint(cmos_read(RTC_YEAR), ==, 0x11);
+
+    /* Set a date in 2080 to ensure there is no year-2038 overflow.  */
+    cmos_write(RTC_REG_A, 0x76);
+    cmos_write(RTC_YEAR, 0x80);
+    cmos_write(RTC_REG_A, 0x26);
+
+    g_assert_cmpint(cmos_read(RTC_HOURS), ==, 0x02);
+    g_assert_cmpint(cmos_read(RTC_MINUTES), ==, 0x04);
+    g_assert_cmpint(cmos_read(RTC_SECONDS), >=, 0x58);
+    g_assert_cmpint(cmos_read(RTC_DAY_OF_MONTH), ==, 0x02);
+    g_assert_cmpint(cmos_read(RTC_MONTH), ==, 0x02);
+    g_assert_cmpint(cmos_read(RTC_YEAR), ==, 0x80);
+
+    cmos_write(RTC_REG_A, 0x76);
+    cmos_write(RTC_YEAR, 0x11);
+    cmos_write(RTC_REG_A, 0x26);
+
+    g_assert_cmpint(cmos_read(RTC_HOURS), ==, 0x02);
+    g_assert_cmpint(cmos_read(RTC_MINUTES), ==, 0x04);
+    g_assert_cmpint(cmos_read(RTC_SECONDS), >=, 0x58);
+    g_assert_cmpint(cmos_read(RTC_DAY_OF_MONTH), ==, 0x02);
+    g_assert_cmpint(cmos_read(RTC_MONTH), ==, 0x02);
+    g_assert_cmpint(cmos_read(RTC_YEAR), ==, 0x11);
+}
+
 static void bcd_check_time(void)
 {
     /* Set BCD mode */
@@ -269,6 +313,7 @@ int main(int argc, char **argv)
     qtest_add_func("/rtc/bcd/check-time", bcd_check_time);
     qtest_add_func("/rtc/dec/check-time", dec_check_time);
     qtest_add_func("/rtc/alarm-time", alarm_time);
+    qtest_add_func("/rtc/set-year", set_year);
     qtest_add_func("/rtc/fuzz-registers", fuzz_registers);
     ret = g_test_run();
 
commit e0fea6b1e4df2067a51e08e67a17cb98a547287c
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Mon Oct 1 14:18:07 2012 +0200

    qtest: implement QTEST_STOP
    
    It is quite difficult to debug qtest test cases without extra wrapper
    scripts for QEMU or similar.  This patch adds a simple environment
    variable-based trigger that sends a STOP signal to the QEMU instance
    under test, before attempting to connect to its QMP session.
    
    This will block execution of the testcase and give time to attach a
    debugger to the stopped QEMU process.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Anthony Liguori <aliguori at us.ibm.com>

diff --git a/tests/libqtest.c b/tests/libqtest.c
index 02d0392..71b84c1 100644
--- a/tests/libqtest.c
+++ b/tests/libqtest.c
@@ -85,6 +85,22 @@ static int socket_accept(int sock)
     return ret;
 }
 
+static pid_t qtest_qemu_pid(QTestState *s)
+{
+    FILE *f;
+    char buffer[1024];
+    pid_t pid = -1;
+
+    f = fopen(s->pid_file, "r");
+    if (f) {
+        if (fgets(buffer, sizeof(buffer), f)) {
+            pid = atoi(buffer);
+        }
+    }
+    fclose(f);
+    return pid;
+}
+
 QTestState *qtest_init(const char *extra_args)
 {
     QTestState *s;
@@ -136,25 +152,21 @@ QTestState *qtest_init(const char *extra_args)
     qtest_qmp(s, "");
     qtest_qmp(s, "{ 'execute': 'qmp_capabilities' }");
 
+    if (getenv("QTEST_STOP")) {
+        kill(qtest_qemu_pid(s), SIGSTOP);
+    }
+
     return s;
 }
 
 void qtest_quit(QTestState *s)
 {
-    FILE *f;
-    char buffer[1024];
-
-    f = fopen(s->pid_file, "r");
-    if (f) {
-        if (fgets(buffer, sizeof(buffer), f)) {
-            pid_t pid = atoi(buffer);
-            int status = 0;
-
-            kill(pid, SIGTERM);
-            waitpid(pid, &status, 0);
-        }
+    int status;
 
-        fclose(f);
+    pid_t pid = qtest_qemu_pid(s);
+    if (pid != -1) {
+        kill(pid, SIGTERM);
+        waitpid(pid, &status, 0);
     }
 
     unlink(s->pid_file);
commit 610b823ef66b993660f1ab1447a769f190e4f3b3
Author: Stefan Weil <sw at weilnetz.de>
Date:   Wed Oct 3 23:11:02 2012 +0200

    qemu-barrier: Fix compiler version check for future gcc versions
    
    The current check will give a wrong result for gcc-5.x with x < 4.
    Using QEMU_GNUC_PREREQ is simpler and fixes that issue.
    
    Signed-off-by: Stefan Weil <sw at weilnetz.de>
    Signed-off-by: Anthony Liguori <aliguori at us.ibm.com>

diff --git a/qemu-barrier.h b/qemu-barrier.h
index 7e11197..16f0942 100644
--- a/qemu-barrier.h
+++ b/qemu-barrier.h
@@ -19,7 +19,7 @@
  * mfence on 32 bit as well, e.g. if built with -march=pentium-m.
  * However, on i386, there seem to be known bugs as recently as 4.3.
  * */
-#if defined(__GNUC__) && __GNUC__ >= 4 && __GNUC_MINOR__ >= 4
+#if QEMU_GNUC_PREREQ(4, 4)
 #define smp_mb() __sync_synchronize()
 #else
 #define smp_mb() asm volatile("lock; addl $0,0(%%esp) " ::: "memory")
commit 9b9e3ec1b47f615f635055924e24705cc3f45b20
Author: Jim Meyering <meyering at redhat.com>
Date:   Thu Oct 4 13:10:03 2012 +0200

    doc: update HACKING wrt strncpy/pstrcpy
    
    Reword the section on strncpy: its NUL-filling is important
    in some cases.  Mention that pstrcpy's signature is different.
    
    Signed-off-by: Jim Meyering <meyering at redhat.com>
    Signed-off-by: Anthony Liguori <aliguori at us.ibm.com>

diff --git a/HACKING b/HACKING
index 471cf1d..dddd617 100644
--- a/HACKING
+++ b/HACKING
@@ -91,10 +91,11 @@ emulators.
 
 4. String manipulation
 
-Do not use the strncpy function.  According to the man page, it does
-*not* guarantee a NULL-terminated buffer, which makes it extremely dangerous
-to use.  Instead, use functionally equivalent function:
-void pstrcpy(char *buf, int buf_size, const char *str)
+Do not use the strncpy function.  As mentioned in the man page, it does *not*
+guarantee a NULL-terminated buffer, which makes it extremely dangerous to use.
+It also zeros trailing destination bytes out to the specified length.  Instead,
+use this similar function when possible, but note its different signature:
+void pstrcpy(char *dest, int dest_buf_size, const char *src)
 
 Don't use strcat because it can't check for buffer overflows, but:
 char *pstrcat(char *buf, int buf_size, const char *s)
commit 9310b9be14f73d4c1e98bfa315fe84326ad9e8e7
Author: Jim Meyering <meyering at redhat.com>
Date:   Thu Oct 4 13:10:02 2012 +0200

    hw/r2d: add comment: this strncpy use is ok
    
    Signed-off-by: Jim Meyering <meyering at redhat.com>
    Signed-off-by: Anthony Liguori <aliguori at us.ibm.com>

diff --git a/hw/r2d.c b/hw/r2d.c
index 0f16e81..1bc191f 100644
--- a/hw/r2d.c
+++ b/hw/r2d.c
@@ -332,6 +332,8 @@ static void r2d_init(ram_addr_t ram_size,
     }
 
     if (kernel_cmdline) {
+        /* I see no evidence that this .kernel_cmdline buffer requires
+           NUL-termination, so using strncpy should be ok. */
         strncpy(boot_params.kernel_cmdline, kernel_cmdline,
                 sizeof(boot_params.kernel_cmdline));
     }
commit 00ea188125f6ee33e6beaff5da878fa9478e6a0d
Author: Jim Meyering <meyering at redhat.com>
Date:   Thu Oct 4 13:10:01 2012 +0200

    qcow2: mark this file's sole strncpy use as justified
    
    Acked-by: Kevin Wolf <kwolf at redhat.com>
    Signed-off-by: Jim Meyering <meyering at redhat.com>
    Signed-off-by: Anthony Liguori <aliguori at us.ibm.com>

diff --git a/block/qcow2.c b/block/qcow2.c
index aa5e603..c1ff31f 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -1096,6 +1096,7 @@ int qcow2_update_header(BlockDriverState *bs)
             goto fail;
         }
 
+        /* Using strncpy is ok here, since buf is not NUL-terminated. */
         strncpy(buf, bs->backing_file, buflen);
 
         header->backing_file_offset = cpu_to_be64(buf - ((char*) header));
commit 3cda346269784c234c7a296ff6851f36a1a9189d
Author: Jim Meyering <meyering at redhat.com>
Date:   Thu Oct 4 13:10:00 2012 +0200

    acpi: remove strzcpy (strncpy-identical) function; just use strncpy
    
    Adjust all uses s/strzcpy/strncpy/ and mark these uses
    of strncpy as "ok".
    
    Signed-off-by: Jim Meyering <meyering at redhat.com>
    Signed-off-by: Anthony Liguori <aliguori at us.ibm.com>

diff --git a/hw/acpi.c b/hw/acpi.c
index f7950be..f4aca49 100644
--- a/hw/acpi.c
+++ b/hw/acpi.c
@@ -61,18 +61,6 @@ static int acpi_checksum(const uint8_t *data, int len)
     return (-sum) & 0xff;
 }
 
-/* like strncpy() but zero-fills the tail of destination */
-static void strzcpy(char *dst, const char *src, size_t size)
-{
-    size_t len = strlen(src);
-    if (len >= size) {
-        len = size;
-    } else {
-      memset(dst + len, 0, size - len);
-    }
-    memcpy(dst, src, len);
-}
-
 /* XXX fixme: this function uses obsolete argument parsing interface */
 int acpi_table_add(const char *t)
 {
@@ -157,7 +145,8 @@ int acpi_table_add(const char *t)
     hdr._length = cpu_to_le16(len);
 
     if (get_param_value(buf, sizeof(buf), "sig", t)) {
-        strzcpy(hdr.sig, buf, sizeof(hdr.sig));
+        /* strncpy is justified: the field need not be NUL-terminated. */
+        strncpy(hdr.sig, buf, sizeof(hdr.sig));
         ++changed;
     }
 
@@ -187,12 +176,14 @@ int acpi_table_add(const char *t)
     }
 
     if (get_param_value(buf, sizeof(buf), "oem_id", t)) {
-        strzcpy(hdr.oem_id, buf, sizeof(hdr.oem_id));
+        /* strncpy is justified: the field need not be NUL-terminated. */
+        strncpy(hdr.oem_id, buf, sizeof(hdr.oem_id));
         ++changed;
     }
 
     if (get_param_value(buf, sizeof(buf), "oem_table_id", t)) {
-        strzcpy(hdr.oem_table_id, buf, sizeof(hdr.oem_table_id));
+        /* strncpy is justified: the field need not be NUL-terminated. */
+        strncpy(hdr.oem_table_id, buf, sizeof(hdr.oem_table_id));
         ++changed;
     }
 
@@ -207,7 +198,8 @@ int acpi_table_add(const char *t)
     }
 
     if (get_param_value(buf, sizeof(buf), "asl_compiler_id", t)) {
-        strzcpy(hdr.asl_compiler_id, buf, sizeof(hdr.asl_compiler_id));
+        /* strncpy is justified: the field need not be NUL-terminated. */
+        strncpy(hdr.asl_compiler_id, buf, sizeof(hdr.asl_compiler_id));
         ++changed;
     }
 
commit 2e679780ae86c6ca8bc81efe0a376a0b99b09b8f
Author: Jim Meyering <meyering at redhat.com>
Date:   Thu Oct 4 13:09:59 2012 +0200

    libcacard/vcard_emul_nss: use pstrcpy in place of strncpy
    
    Replace strncpy+NUL-terminate use with use of pstrcpy.
    This requires linking with cutils.o (or else vssclient doesn't link),
    so add that in the Makefile.
    
    Acked-by: Alon Levy <alevy at redhat.com>
    Signed-off-by: Jim Meyering <meyering at redhat.com>
    Signed-off-by: Anthony Liguori <aliguori at us.ibm.com>

diff --git a/libcacard/Makefile b/libcacard/Makefile
index 63990b7..487f434 100644
--- a/libcacard/Makefile
+++ b/libcacard/Makefile
@@ -14,6 +14,9 @@ QEMU_CFLAGS+=-I../
 
 libcacard.lib-y=$(patsubst %.o,%.lo,$(libcacard-y))
 
+vscclient: $(libcacard-y) $(QEMU_OBJS) vscclient.o cutils.o
+	$(call quiet-command,$(CC) -o $@ $^ $(libcacard_libs) $(LIBS),"  LINK  $@")
+
 clean:
 	rm -f *.o */*.o *.d */*.d *.a */*.a *~ */*~ vscclient *.lo */*.lo .libs/* */.libs/* *.la */*.la *.pc
 	rm -Rf .libs */.libs
diff --git a/libcacard/vcard_emul_nss.c b/libcacard/vcard_emul_nss.c
index 802cae3..e1cae5b 100644
--- a/libcacard/vcard_emul_nss.c
+++ b/libcacard/vcard_emul_nss.c
@@ -1169,8 +1169,7 @@ vcard_emul_options(const char *args)
             NEXT_TOKEN(vname)
             NEXT_TOKEN(type_params)
             type_params_length = MIN(type_params_length, sizeof(type_str)-1);
-            strncpy(type_str, type_params, type_params_length);
-            type_str[type_params_length] = 0;
+            pstrcpy(type_str, type_params_length, type_params);
             type = vcard_emul_type_from_string(type_str);
 
             NEXT_TOKEN(type_params)
commit 1ab516ed9b6ba00bafc5ca37604f8af4680323ca
Author: Jim Meyering <meyering at redhat.com>
Date:   Thu Oct 4 13:09:58 2012 +0200

    qemu-ga: prefer pstrcpy: consistently NUL-terminate ifreq.ifr_name
    
    NUL-termination of the .ifr_name field is not required, but is fine
    (and preferable to using strncpy and leaving the reader to wonder),
    since the first thing the linux kernel does is to clear the last byte.
    Besides, using pstrcpy here makes this setting of ifr_name consistent
    with the other code (e.g., net/tap-linux.c) that does the same thing.
    
    Reviewed-by: Luiz Capitulino <lcapitulino at redhat.com>
    Signed-off-by: Jim Meyering <meyering at redhat.com>
    Signed-off-by: Anthony Liguori <aliguori at us.ibm.com>

diff --git a/qga/commands-posix.c b/qga/commands-posix.c
index ce90421..b9f357c 100644
--- a/qga/commands-posix.c
+++ b/qga/commands-posix.c
@@ -828,7 +828,7 @@ GuestNetworkInterfaceList *qmp_guest_network_get_interfaces(Error **errp)
             }
 
             memset(&ifr, 0, sizeof(ifr));
-            strncpy(ifr.ifr_name,  info->value->name, IF_NAMESIZE);
+            pstrcpy(ifr.ifr_name, IF_NAMESIZE, info->value->name);
             if (ioctl(sock, SIOCGIFHWADDR, &ifr) == -1) {
                 snprintf(err_msg, sizeof(err_msg),
                          "failed to get MAC address of %s: %s",
commit 9d055d8ac83cfd590263e8862ff683f705dfdf56
Author: Jim Meyering <meyering at redhat.com>
Date:   Thu Oct 4 13:09:57 2012 +0200

    vscsi: avoid unwarranted strncpy
    
    Don't use strncpy when the source string is known to fit
    in the destination buffer.  Use equivalent memcpy.
    We could even use strcpy, here, but some static analyzers
    warn about that, so don't add new uses.
    
    Acked-by: David Gibson <david at gibson.dropbear.id.au>
    Signed-off-by: Jim Meyering <meyering at redhat.com>
    Signed-off-by: Anthony Liguori <aliguori at us.ibm.com>

diff --git a/hw/spapr_vscsi.c b/hw/spapr_vscsi.c
index 3cf5844..e3d4b23 100644
--- a/hw/spapr_vscsi.c
+++ b/hw/spapr_vscsi.c
@@ -737,7 +737,7 @@ static int vscsi_send_adapter_info(VSCSIState *s, vscsi_req *req)
 #endif
     memset(&info, 0, sizeof(info));
     strcpy(info.srp_version, SRP_VERSION);
-    strncpy(info.partition_name, "qemu", sizeof("qemu"));
+    memcpy(info.partition_name, "qemu", sizeof("qemu"));
     info.partition_number = cpu_to_be32(0);
     info.mad_version = cpu_to_be32(1);
     info.os_type = cpu_to_be32(2);
commit 9238c2099d37748a4e2cbbe709ed1ebffa6f3c8b
Author: Jim Meyering <meyering at redhat.com>
Date:   Thu Oct 4 13:09:56 2012 +0200

    virtio-9p: avoid unwarranted uses of strncpy
    
    In all of these cases, the uses of strncpy were unnecessary, since
    at each point of use we know that the NUL-terminated source bytes
    fit in the destination buffer.  Use memcpy in place of strncpy.
    
    Acked-by: Aneesh Kumar K.V <aneesh.kumar at linux.vnet.ibm.com>
    Signed-off-by: Jim Meyering <meyering at redhat.com>
    Signed-off-by: Anthony Liguori <aliguori at us.ibm.com>

diff --git a/hw/9pfs/virtio-9p-posix-acl.c b/hw/9pfs/virtio-9p-posix-acl.c
index a1948e3..c064017 100644
--- a/hw/9pfs/virtio-9p-posix-acl.c
+++ b/hw/9pfs/virtio-9p-posix-acl.c
@@ -44,7 +44,8 @@ static ssize_t mp_pacl_listxattr(FsContext *ctx, const char *path,
         return -1;
     }
 
-    strncpy(value, ACL_ACCESS, len);
+    /* len includes the trailing NUL */
+    memcpy(value, ACL_ACCESS, len);
     return 0;
 }
 
@@ -95,7 +96,8 @@ static ssize_t mp_dacl_listxattr(FsContext *ctx, const char *path,
         return -1;
     }
 
-    strncpy(value, ACL_DEFAULT, len);
+    /* len includes the trailing NUL */
+    memcpy(value, ACL_ACCESS, len);
     return 0;
 }
 
diff --git a/hw/9pfs/virtio-9p-xattr-user.c b/hw/9pfs/virtio-9p-xattr-user.c
index 5044a3e..5bb6020 100644
--- a/hw/9pfs/virtio-9p-xattr-user.c
+++ b/hw/9pfs/virtio-9p-xattr-user.c
@@ -61,7 +61,8 @@ static ssize_t mp_user_listxattr(FsContext *ctx, const char *path,
         return -1;
     }
 
-    strncpy(value, name, name_size);
+    /* name_size includes the trailing NUL. */
+    memcpy(value, name, name_size);
     return name_size;
 }
 
diff --git a/hw/9pfs/virtio-9p-xattr.c b/hw/9pfs/virtio-9p-xattr.c
index 7f08f6e..a839606 100644
--- a/hw/9pfs/virtio-9p-xattr.c
+++ b/hw/9pfs/virtio-9p-xattr.c
@@ -53,7 +53,8 @@ ssize_t pt_listxattr(FsContext *ctx, const char *path,
         return -1;
     }
 
-    strncpy(value, name, name_size);
+    /* no need for strncpy: name_size is strlen(name)+1 */
+    memcpy(value, name, name_size);
     return name_size;
 }
 
commit e5fda03839e3c61b01d6c60de5625501d01c69d0
Author: Jim Meyering <meyering at redhat.com>
Date:   Thu Oct 4 13:09:55 2012 +0200

    bt: replace fragile snprintf use and unwarranted strncpy
    
    In bt_hci_name_req a failed snprintf could return len larger than
    sizeof(params.name), which means the following memset call would
    have a "length" value of (size_t)-1, -2, etc...  Sounds scary.
    But currently, one can deduce that there is no problem:
    strlen(slave->lmp_name) is guaranteed to be smaller than
    CHANGE_LOCAL_NAME_CP_SIZE, which is the same as sizeof(params.name),
    so this cannot happen.  Regardless, there is no justification for
    using snprintf+memset.  Use pstrcpy instead.
    
    Also, in bt_hci_event_complete_read_local_name, use pstrcpy in place
    of unwarranted strncpy.
    
    Signed-off-by: Jim Meyering <meyering at redhat.com>
    Signed-off-by: Anthony Liguori <aliguori at us.ibm.com>

diff --git a/hw/bt-hci.c b/hw/bt-hci.c
index a3a7fb4..47f9a4e 100644
--- a/hw/bt-hci.c
+++ b/hw/bt-hci.c
@@ -943,7 +943,6 @@ static int bt_hci_name_req(struct bt_hci_s *hci, bdaddr_t *bdaddr)
 {
     struct bt_device_s *slave;
     evt_remote_name_req_complete params;
-    int len;
 
     for (slave = hci->device.net->slave; slave; slave = slave->next)
         if (slave->page_scan && !bacmp(&slave->bd_addr, bdaddr))
@@ -955,9 +954,7 @@ static int bt_hci_name_req(struct bt_hci_s *hci, bdaddr_t *bdaddr)
 
     params.status       = HCI_SUCCESS;
     bacpy(&params.bdaddr, &slave->bd_addr);
-    len = snprintf(params.name, sizeof(params.name),
-                    "%s", slave->lmp_name ?: "");
-    memset(params.name + len, 0, sizeof(params.name) - len);
+    pstrcpy(params.name, sizeof(params.name), slave->lmp_name ?: "");
     bt_hci_event(hci, EVT_REMOTE_NAME_REQ_COMPLETE,
                     &params, EVT_REMOTE_NAME_REQ_COMPLETE_SIZE);
 
@@ -1388,7 +1385,7 @@ static inline void bt_hci_event_complete_read_local_name(struct bt_hci_s *hci)
     params.status = HCI_SUCCESS;
     memset(params.name, 0, sizeof(params.name));
     if (hci->device.lmp_name)
-        strncpy(params.name, hci->device.lmp_name, sizeof(params.name));
+        pstrcpy(params.name, sizeof(params.name), hci->device.lmp_name);
 
     bt_hci_event_complete(hci, &params, READ_LOCAL_NAME_RP_SIZE);
 }
commit 5847d9e1399d3497be8eeca6f3a20a18a40b114b
Author: Jim Meyering <meyering at redhat.com>
Date:   Thu Oct 4 13:09:54 2012 +0200

    ui/vnc: simplify and avoid strncpy
    
    Don't bother with strncpy.  There's no need for its zero-fill.
    Use g_strndup in place of g_malloc+strncpy+NUL-terminate.
    
    Reviewed-by: Peter Maydell <peter.maydell at linaro.org>
    Signed-off-by: Jim Meyering <meyering at redhat.com>
    Signed-off-by: Anthony Liguori <aliguori at us.ibm.com>

diff --git a/ui/vnc-auth-sasl.c b/ui/vnc-auth-sasl.c
index 8fba770..bfdcb46 100644
--- a/ui/vnc-auth-sasl.c
+++ b/ui/vnc-auth-sasl.c
@@ -432,9 +432,7 @@ static int protocol_client_auth_sasl_start_len(VncState *vs, uint8_t *data, size
 
 static int protocol_client_auth_sasl_mechname(VncState *vs, uint8_t *data, size_t len)
 {
-    char *mechname = g_malloc(len + 1);
-    strncpy(mechname, (char*)data, len);
-    mechname[len] = '\0';
+    char *mechname = g_strndup((const char *) data, len);
     VNC_DEBUG("Got client mechname '%s' check against '%s'\n",
               mechname, vs->sasl.mechlist);
 
commit 900cfbcac6fa689b5fc8d53b60c3ed39047b8a33
Author: Jim Meyering <meyering at redhat.com>
Date:   Thu Oct 4 13:09:53 2012 +0200

    linux-user: remove two unchecked uses of strdup
    
    Remove two uses of strdup (use g_path_get_basename instead),
    and add a comment that this strncpy use is ok.
    
    Reviewed-by: Peter Maydell <peter.maydell at linaro.org>
    Signed-off-by: Jim Meyering <meyering at redhat.com>
    Signed-off-by: Anthony Liguori <aliguori at us.ibm.com>

diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index 819fdd5..1d8bcb4 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -2442,7 +2442,7 @@ static void fill_prstatus(struct target_elf_prstatus *prstatus,
 
 static int fill_psinfo(struct target_elf_prpsinfo *psinfo, const TaskState *ts)
 {
-    char *filename, *base_filename;
+    char *base_filename;
     unsigned int i, len;
 
     (void) memset(psinfo, 0, sizeof (*psinfo));
@@ -2464,13 +2464,15 @@ static int fill_psinfo(struct target_elf_prpsinfo *psinfo, const TaskState *ts)
     psinfo->pr_uid = getuid();
     psinfo->pr_gid = getgid();
 
-    filename = strdup(ts->bprm->filename);
-    base_filename = strdup(basename(filename));
+    base_filename = g_path_get_basename(ts->bprm->filename);
+    /*
+     * Using strncpy here is fine: at max-length,
+     * this field is not NUL-terminated.
+     */
     (void) strncpy(psinfo->pr_fname, base_filename,
                    sizeof(psinfo->pr_fname));
-    free(base_filename);
-    free(filename);
 
+    g_free(base_filename);
     bswap_psinfo(psinfo);
     return (0);
 }
commit ae2150680190e510dcbcdfdbfb3a54369c75367f
Author: Jim Meyering <meyering at redhat.com>
Date:   Thu Oct 4 13:09:52 2012 +0200

    ppc: avoid buffer overrun: use pstrcpy, not strncpy
    
    A terminal NUL is required by caller's use of strchr.
    It's better not to use strncpy at all, since there is no need
    to zero out hundreds of trailing bytes for each iteration.
    
    Signed-off-by: Jim Meyering <meyering at redhat.com>
    Signed-off-by: Anthony Liguori <aliguori at us.ibm.com>

diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
index a31d278..7f6e4e0 100644
--- a/target-ppc/kvm.c
+++ b/target-ppc/kvm.c
@@ -795,7 +795,7 @@ static int read_cpuinfo(const char *field, char *value, int len)
             break;
         }
         if (!strncmp(line, field, field_len)) {
-            strncpy(value, line, len);
+            pstrcpy(value, len, line);
             ret = 0;
             break;
         }
commit 3eadc68ebd174f5bad51fe6e0bbcf6d6651c784c
Author: Jim Meyering <meyering at redhat.com>
Date:   Thu Oct 4 13:09:51 2012 +0200

    os-posix: avoid buffer overrun
    
    os_set_proc_name: Use pstrcpy, in place of strncpy and the
    ineffectual preceding assignment: name[sizeof(name) - 1] = 0;
    
    Signed-off-by: Jim Meyering <meyering at redhat.com>
    Signed-off-by: Anthony Liguori <aliguori at us.ibm.com>

diff --git a/os-posix.c b/os-posix.c
index eabccb8..f855abb 100644
--- a/os-posix.c
+++ b/os-posix.c
@@ -148,8 +148,7 @@ void os_set_proc_name(const char *s)
     char name[16];
     if (!s)
         return;
-    name[sizeof(name) - 1] = 0;
-    strncpy(name, s, sizeof(name));
+    pstrcpy(name, sizeof(name), s);
     /* Could rewrite argv[0] too, but that's a bit more complicated.
        This simple way is enough for `top'. */
     if (prctl(PR_SET_NAME, name)) {
commit 1044dc1118d9a90e2aa324047bea9c91c889e28f
Author: Jim Meyering <meyering at redhat.com>
Date:   Thu Oct 4 13:09:50 2012 +0200

    lm32: avoid buffer overrun
    
    Actually do what the comment says, using pstrcpy NUL-terminate:
    strncpy does not always do that.
    
    Signed-off-by: Jim Meyering <meyering at redhat.com>
    Signed-off-by: Anthony Liguori <aliguori at us.ibm.com>

diff --git a/hw/lm32_hwsetup.h b/hw/lm32_hwsetup.h
index 8fc285e..70dc61f 100644
--- a/hw/lm32_hwsetup.h
+++ b/hw/lm32_hwsetup.h
@@ -96,7 +96,7 @@ static inline void hwsetup_add_tag(HWSetup *hw, enum hwsetup_tag t)
 
 static inline void hwsetup_add_str(HWSetup *hw, const char *str)
 {
-    strncpy(hw->ptr, str, 31); /* make sure last byte is zero */
+    pstrcpy(hw->ptr, 32, str);
     hw->ptr += 32;
 }
 
commit a79b5f8b80890b402fdb0733b0a073695a7875b5
Author: Jim Meyering <meyering at redhat.com>
Date:   Thu Oct 4 13:09:49 2012 +0200

    hw/9pfs: avoid buffer overrun
    
    v9fs_add_dir_node and qemu_v9fs_synth_add_file used strncpy
    to form node->name, which requires NUL-termination, but
    strncpy does not ensure NUL-termination.
    Use pstrcpy, which does.
    
    Acked-by: Aneesh Kumar K.V <aneesh.kumar at linux.vnet.ibm.com>
    Signed-off-by: Jim Meyering <meyering at redhat.com>
    Signed-off-by: Anthony Liguori <aliguori at us.ibm.com>

diff --git a/hw/9pfs/virtio-9p-synth.c b/hw/9pfs/virtio-9p-synth.c
index 92e0b09..e95a856 100644
--- a/hw/9pfs/virtio-9p-synth.c
+++ b/hw/9pfs/virtio-9p-synth.c
@@ -58,7 +58,7 @@ static V9fsSynthNode *v9fs_add_dir_node(V9fsSynthNode *parent, int mode,
         node->attr->read  = NULL;
     }
     node->private = node;
-    strncpy(node->name, name, sizeof(node->name));
+    pstrcpy(node->name, sizeof(node->name), name);
     QLIST_INSERT_HEAD_RCU(&parent->child, node, sibling);
     return node;
 }
@@ -132,7 +132,7 @@ int qemu_v9fs_synth_add_file(V9fsSynthNode *parent, int mode,
     node->attr->write  = write;
     node->attr->mode   = mode;
     node->private      = arg;
-    strncpy(node->name, name, sizeof(node->name));
+    pstrcpy(node->name, sizeof(node->name), name);
     QLIST_INSERT_HEAD_RCU(&parent->child, node, sibling);
     ret = 0;
 err_out:
commit d66f8e7bd3de4a2ecf0680c635f870f2138425b8
Author: Jim Meyering <meyering at redhat.com>
Date:   Thu Oct 4 13:09:48 2012 +0200

    vmdk: relative_path: use pstrcpy in place of strncpy
    
    Avoid strncpy+manual-NUL-terminate.  Use pstrcpy instead.
    
    Acked-by: Kevin Wolf <kwolf at redhat.com>
    Signed-off-by: Jim Meyering <meyering at redhat.com>
    Signed-off-by: Anthony Liguori <aliguori at us.ibm.com>

diff --git a/block/vmdk.c b/block/vmdk.c
index f2e861b..1a80e5a 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -1408,8 +1408,7 @@ static int relative_path(char *dest, int dest_size,
         return -1;
     }
     if (path_is_absolute(target)) {
-        dest[dest_size - 1] = '\0';
-        strncpy(dest, target, dest_size - 1);
+        pstrcpy(dest, dest_size, target);
         return 0;
     }
     while (base[i] == target[i]) {
commit 3178e2755ec5a7fb1afe583fb6ac2622c2c42184
Author: Jim Meyering <meyering at redhat.com>
Date:   Thu Oct 4 13:09:47 2012 +0200

    sheepdog: avoid a few buffer overruns
    
    * parse_vdiname: Use pstrcpy, not strncpy, when the destination
    buffer must be NUL-terminated.
    * sd_open: Likewise, avoid buffer overrun.
    * do_sd_create: Likewise.  Leave the preceding memset, since
    pstrcpy does not NUL-fill, and filename needs that.
    * sd_snapshot_create: Add a comment/question.
    * find_vdi_name: Remove a useless memset.
    * sd_snapshot_goto: Remove a useless memset.
    Use pstrcpy to NUL-terminate, because find_vdi_name requires
    that its vdi arg (filename parameter) be NUL-terminated.
    It seems ok not to NUL-fill the buffer.
    Do the same for snapid: remove useless memset-0 (instead,
    zero tag[0]).  Use pstrcpy, not strncpy.
    * sd_snapshot_list: Use pstrcpy, not strncpy to write
    into the ->name member.  Each must be NUL-terminated.
    
    Acked-by: Kevin Wolf <kwolf at redhat.com>
    Acked-by: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
    Signed-off-by: Jim Meyering <meyering at redhat.com>
    Signed-off-by: Anthony Liguori <aliguori at us.ibm.com>

diff --git a/block/sheepdog.c b/block/sheepdog.c
index 4742f8a..f35ff5b 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -866,14 +866,14 @@ static int parse_vdiname(BDRVSheepdogState *s, const char *filename,
         s->port = 0;
     }
 
-    strncpy(vdi, p, SD_MAX_VDI_LEN);
+    pstrcpy(vdi, SD_MAX_VDI_LEN, p);
 
     p = strchr(vdi, ':');
     if (p) {
         *p++ = '\0';
         *snapid = strtoul(p, NULL, 10);
         if (*snapid == 0) {
-            strncpy(tag, p, SD_MAX_VDI_TAG_LEN);
+            pstrcpy(tag, SD_MAX_VDI_TAG_LEN, p);
         }
     } else {
         *snapid = CURRENT_VDI_ID; /* search current vdi */
@@ -900,7 +900,10 @@ static int find_vdi_name(BDRVSheepdogState *s, char *filename, uint32_t snapid,
         return fd;
     }
 
-    memset(buf, 0, sizeof(buf));
+    /* This pair of strncpy calls ensures that the buffer is zero-filled,
+     * which is desirable since we'll soon be sending those bytes, and
+     * don't want the send_req to read uninitialized data.
+     */
     strncpy(buf, filename, SD_MAX_VDI_LEN);
     strncpy(buf + SD_MAX_VDI_LEN, tag, SD_MAX_VDI_TAG_LEN);
 
@@ -1149,7 +1152,7 @@ static int sd_open(BlockDriverState *bs, const char *filename, int flags)
     s->max_dirty_data_idx = 0;
 
     bs->total_sectors = s->inode.vdi_size / SECTOR_SIZE;
-    strncpy(s->name, vdi, sizeof(s->name));
+    pstrcpy(s->name, sizeof(s->name), vdi);
     qemu_co_mutex_init(&s->lock);
     g_free(buf);
     return 0;
@@ -1177,8 +1180,11 @@ static int do_sd_create(char *filename, int64_t vdi_size,
         return fd;
     }
 
+    /* FIXME: would it be better to fail (e.g., return -EIO) when filename
+     * does not fit in buf?  For now, just truncate and avoid buffer overrun.
+     */
     memset(buf, 0, sizeof(buf));
-    strncpy(buf, filename, SD_MAX_VDI_LEN);
+    pstrcpy(buf, sizeof(buf), filename);
 
     memset(&hdr, 0, sizeof(hdr));
     hdr.opcode = SD_OP_NEW_VDI;
@@ -1752,6 +1758,9 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
 
     s->inode.vm_state_size = sn_info->vm_state_size;
     s->inode.vm_clock_nsec = sn_info->vm_clock_nsec;
+    /* It appears that inode.tag does not require a NUL terminator,
+     * which means this use of strncpy is ok.
+     */
     strncpy(s->inode.tag, sn_info->name, sizeof(s->inode.tag));
     /* we don't need to update entire object */
     datalen = SD_INODE_SIZE - sizeof(s->inode.data_vdi_id);
@@ -1811,13 +1820,13 @@ static int sd_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
 
     memcpy(old_s, s, sizeof(BDRVSheepdogState));
 
-    memset(vdi, 0, sizeof(vdi));
-    strncpy(vdi, s->name, sizeof(vdi));
+    pstrcpy(vdi, sizeof(vdi), s->name);
 
-    memset(tag, 0, sizeof(tag));
     snapid = strtoul(snapshot_id, NULL, 10);
-    if (!snapid) {
-        strncpy(tag, s->name, sizeof(tag));
+    if (snapid) {
+        tag[0] = 0;
+    } else {
+        pstrcpy(tag, sizeof(tag), s->name);
     }
 
     ret = find_vdi_name(s, vdi, snapid, tag, &vid, 1);
@@ -1946,8 +1955,9 @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
 
             snprintf(sn_tab[found].id_str, sizeof(sn_tab[found].id_str), "%u",
                      inode.snap_id);
-            strncpy(sn_tab[found].name, inode.tag,
-                    MIN(sizeof(sn_tab[found].name), sizeof(inode.tag)));
+            pstrcpy(sn_tab[found].name,
+                    MIN(sizeof(sn_tab[found].name), sizeof(inode.tag)),
+                    inode.tag);
             found++;
         }
     }
commit c2cba3d9314f972dfaf724d0ec2d018eb54c95f1
Author: Jim Meyering <meyering at redhat.com>
Date:   Thu Oct 4 13:09:46 2012 +0200

    block: avoid buffer overrun by using pstrcpy, not strncpy
    
    Also, use PATH_MAX, rather than the arbitrary 1024.
    Using PATH_MAX is more consistent with other filename-related
    variables in this file, like backing_filename and tmp_filename.
    
    Acked-by: Kevin Wolf <kwolf at redhat.com>
    Signed-off-by: Jim Meyering <meyering at redhat.com>
    Signed-off-by: Anthony Liguori <aliguori at us.ibm.com>

diff --git a/block.c b/block.c
index c108a76..e95f613 100644
--- a/block.c
+++ b/block.c
@@ -1506,7 +1506,7 @@ int bdrv_commit(BlockDriverState *bs)
     int n, ro, open_flags;
     int ret = 0;
     uint8_t *buf;
-    char filename[1024];
+    char filename[PATH_MAX];
 
     if (!drv)
         return -ENOMEDIUM;
@@ -1520,7 +1520,8 @@ int bdrv_commit(BlockDriverState *bs)
     }
 
     ro = bs->backing_hd->read_only;
-    strncpy(filename, bs->backing_hd->filename, sizeof(filename));
+    /* Use pstrcpy (not strncpy): filename must be NUL-terminated. */
+    pstrcpy(filename, sizeof(filename), bs->backing_hd->filename);
     open_flags =  bs->backing_hd->open_flags;
 
     if (ro) {
commit bfad67399bcca8c1afbbc93593d365044d92f7c6
Author: Jim Meyering <meyering at redhat.com>
Date:   Thu Oct 4 13:09:45 2012 +0200

    sparc: use g_strdup in place of unchecked strdup
    
    This avoids a NULL-deref upon strdup failure.
    Also update matching free to g_free.
    
    Signed-off-by: Jim Meyering <meyering at redhat.com>
    Signed-off-by: Anthony Liguori <aliguori at us.ibm.com>

diff --git a/target-sparc/cpu.c b/target-sparc/cpu.c
index f7c004c..eb9f0e7 100644
--- a/target-sparc/cpu.c
+++ b/target-sparc/cpu.c
@@ -643,7 +643,7 @@ static int cpu_sparc_find_by_name(sparc_def_t *cpu_def, const char *cpu_model)
 {
     unsigned int i;
     const sparc_def_t *def = NULL;
-    char *s = strdup(cpu_model);
+    char *s = g_strdup(cpu_model);
     char *featurestr, *name = strtok(s, ",");
     uint32_t plus_features = 0;
     uint32_t minus_features = 0;
@@ -735,7 +735,7 @@ static int cpu_sparc_find_by_name(sparc_def_t *cpu_def, const char *cpu_model)
 #ifdef DEBUG_FEATURES
     print_features(stderr, fprintf, cpu_def->features, NULL);
 #endif
-    free(s);
+    g_free(s);
     return 0;
 
  error:
commit a5cf8262e4eb9c4646434e2c6211ef8608db3233
Author: Jim Meyering <meyering at redhat.com>
Date:   Thu Oct 4 13:09:44 2012 +0200

    scsi, pci, qdev, isa-bus, sysbus: don't let *_get_fw_dev_path return NULL
    
    Use g_strdup rather than strdup, because the sole caller
    (qdev_get_fw_dev_path_helper) assumes it gets non-NULL, and dereferences
    it.  Besides, in that caller, the allocated buffer is already freed with
    g_free, so it's better to allocate with a matching g_strdup.
    
    In one case, (scsi-bus.c) it was trivial, so I replaced an snprintf+
    g_strdup combination with an equivalent g_strdup_printf use.
    
    Signed-off-by: Jim Meyering <meyering at redhat.com>
    Signed-off-by: Anthony Liguori <aliguori at us.ibm.com>

diff --git a/hw/ide/qdev.c b/hw/ide/qdev.c
index 5ea9b8f..f2e4ea4 100644
--- a/hw/ide/qdev.c
+++ b/hw/ide/qdev.c
@@ -60,7 +60,7 @@ static char *idebus_get_fw_dev_path(DeviceState *dev)
     snprintf(path, sizeof(path), "%s@%d", qdev_fw_name(dev),
              ((IDEBus*)dev->parent_bus)->bus_id);
 
-    return strdup(path);
+    return g_strdup(path);
 }
 
 static int ide_qdev_init(DeviceState *qdev)
diff --git a/hw/isa-bus.c b/hw/isa-bus.c
index f9b2373..47c93d3 100644
--- a/hw/isa-bus.c
+++ b/hw/isa-bus.c
@@ -236,7 +236,7 @@ static char *isabus_get_fw_dev_path(DeviceState *dev)
         snprintf(path + off, sizeof(path) - off, "@%04x", d->ioport_id);
     }
 
-    return strdup(path);
+    return g_strdup(path);
 }
 
 MemoryRegion *isa_address_space(ISADevice *dev)
diff --git a/hw/pci.c b/hw/pci.c
index f855cf3..de4b448 100644
--- a/hw/pci.c
+++ b/hw/pci.c
@@ -1962,7 +1962,7 @@ static char *pcibus_get_fw_dev_path(DeviceState *dev)
                    PCI_SLOT(d->devfn));
     if (PCI_FUNC(d->devfn))
         snprintf(path + off, sizeof(path) + off, ",%x", PCI_FUNC(d->devfn));
-    return strdup(path);
+    return g_strdup(path);
 }
 
 static char *pcibus_get_dev_path(DeviceState *dev)
diff --git a/hw/qdev.c b/hw/qdev.c
index b5a52ac..3b5ce33 100644
--- a/hw/qdev.c
+++ b/hw/qdev.c
@@ -520,7 +520,7 @@ char* qdev_get_fw_dev_path(DeviceState *dev)
 
     path[l-1] = '\0';
 
-    return strdup(path);
+    return g_strdup(path);
 }
 
 char *qdev_get_dev_path(DeviceState *dev)
diff --git a/hw/scsi-bus.c b/hw/scsi-bus.c
index 058d3b2..dfb2631 100644
--- a/hw/scsi-bus.c
+++ b/hw/scsi-bus.c
@@ -1723,12 +1723,8 @@ static char *scsibus_get_dev_path(DeviceState *dev)
 static char *scsibus_get_fw_dev_path(DeviceState *dev)
 {
     SCSIDevice *d = SCSI_DEVICE(dev);
-    char path[100];
-
-    snprintf(path, sizeof(path), "channel@%x/%s@%x,%x", d->channel,
-             qdev_fw_name(dev), d->id, d->lun);
-
-    return strdup(path);
+    return g_strdup_printf("channel@%x/%s@%x,%x", d->channel,
+                           qdev_fw_name(dev), d->id, d->lun);
 }
 
 SCSIDevice *scsi_device_find(SCSIBus *bus, int channel, int id, int lun)
diff --git a/hw/sysbus.c b/hw/sysbus.c
index 9d8b1ea..c173840 100644
--- a/hw/sysbus.c
+++ b/hw/sysbus.c
@@ -211,7 +211,7 @@ static char *sysbus_get_fw_dev_path(DeviceState *dev)
         snprintf(path + off, sizeof(path) - off, "@i%04x", s->pio[0]);
     }
 
-    return strdup(path);
+    return g_strdup(path);
 }
 
 void sysbus_add_memory(SysBusDevice *dev, target_phys_addr_t addr,
commit 58455eb9f2865d54c8bf208805f7a4c217b5c00d
Author: Stefan Weil <sw at weilnetz.de>
Date:   Fri Sep 28 19:07:39 2012 +0200

    qemu-sockets: Fix compiler warning (regression for MinGW)
    
    setsockopt needs a type cast for MinGW. That type cast is missing in
    a recent commit which results in a compiler warning.
    
    Like for other socket related functions which have the same problem,
    we add a 'qemu_setsockopt' macro which provides that type cast where
    needed and use the new macro to avoid the warning.
    
    A 'qemu_getsockopt' is also added and can be used for future
    modifications.
    
    Signed-off-by: Stefan Weil <sw at weilnetz.de>
    Signed-off-by: Stefan Hajnoczi <stefanha at gmail.com>

diff --git a/qemu-common.h b/qemu-common.h
index 15d9e4e..b54612b 100644
--- a/qemu-common.h
+++ b/qemu-common.h
@@ -223,11 +223,19 @@ int qemu_pipe(int pipefd[2]);
 #endif
 
 #ifdef _WIN32
-/* MinGW needs a type cast for the 'buf' argument. */
+/* MinGW needs type casts for the 'buf' and 'optval' arguments. */
+#define qemu_getsockopt(sockfd, level, optname, optval, optlen) \
+    getsockopt(sockfd, level, optname, (void *)optval, optlen)
+#define qemu_setsockopt(sockfd, level, optname, optval, optlen) \
+    setsockopt(sockfd, level, optname, (const void *)optval, optlen)
 #define qemu_recv(sockfd, buf, len, flags) recv(sockfd, (void *)buf, len, flags)
 #define qemu_sendto(sockfd, buf, len, flags, destaddr, addrlen) \
     sendto(sockfd, (const void *)buf, len, flags, destaddr, addrlen)
 #else
+#define qemu_getsockopt(sockfd, level, optname, optval, optlen) \
+    getsockopt(sockfd, level, optname, optval, optlen)
+#define qemu_setsockopt(sockfd, level, optname, optval, optlen) \
+    setsockopt(sockfd, level, optname, optval, optlen)
 #define qemu_recv(sockfd, buf, len, flags) recv(sockfd, buf, len, flags)
 #define qemu_sendto(sockfd, buf, len, flags, destaddr, addrlen) \
     sendto(sockfd, buf, len, flags, destaddr, addrlen)
diff --git a/qemu-sockets.c b/qemu-sockets.c
index 1f14e8b..0f59490 100644
--- a/qemu-sockets.c
+++ b/qemu-sockets.c
@@ -282,7 +282,7 @@ static int inet_connect_addr(struct addrinfo *addr, bool *in_progress,
                 inet_strfamily(addr->ai_family), strerror(errno));
         return -1;
     }
-    setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on));
+    qemu_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on));
     if (connect_state != NULL) {
         socket_set_nonblock(sock);
     }
commit d69eba24269c0d3d376b7de4095dcca376a3beea
Author: Stefan Weil <sw at weilnetz.de>
Date:   Fri Sep 28 18:11:49 2012 +0200

    vnc: Fix spelling (hellmen -> hellman) in comment
    
    The algorithm was named after Martin E. Hellman.
    
    Signed-off-by: Stefan Weil <sw at weilnetz.de>
    Signed-off-by: Stefan Hajnoczi <stefanha at gmail.com>

diff --git a/ui/vnc-tls.c b/ui/vnc-tls.c
index 3aaa939..a7f7d07 100644
--- a/ui/vnc-tls.c
+++ b/ui/vnc-tls.c
@@ -49,7 +49,7 @@ static int vnc_tls_initialize(void)
     if (gnutls_global_init () < 0)
         return 0;
 
-    /* XXX ought to re-generate diffie-hellmen params periodically */
+    /* XXX ought to re-generate diffie-hellman params periodically */
     if (gnutls_dh_params_init (&dh_params) < 0)
         return 0;
     if (gnutls_dh_params_generate2 (dh_params, DH_BITS) < 0)
commit 30daca5f77588a75bf9bcd604e21c5506402fabb
Author: Stefan Weil <sw at weilnetz.de>
Date:   Thu Sep 27 20:57:38 2012 +0200

    slirp: Fix spelling in comment (enought -> enough, insure -> ensure)
    
    Signed-off-by: Stefan Weil <sw at weilnetz.de>
    Signed-off-by: Stefan Hajnoczi <stefanha at gmail.com>

diff --git a/slirp/ip_icmp.h b/slirp/ip_icmp.h
index 1a1af91..be4426b 100644
--- a/slirp/ip_icmp.h
+++ b/slirp/ip_icmp.h
@@ -92,8 +92,8 @@ struct icmp {
 
 /*
  * Lower bounds on packet lengths for various types.
- * For the error advice packets must first insure that the
- * packet is large enought to contain the returned ip header.
+ * For the error advice packets must first ensure that the
+ * packet is large enough to contain the returned ip header.
  * Only then can we do the check to see if 64 bits of packet
  * data have been returned, since we need to check the returned
  * ip header length.
commit f97713ff191ab855ca5c03458f8dab0c9bf58443
Author: Peter Maydell <peter.maydell at linaro.org>
Date:   Thu Sep 27 14:55:43 2012 +0100

    tcg/arm: Use tcg_out_mov_reg rather than inline equivalent code
    
    Use the recently introduced tcg_out_mov_reg() function rather than
    the equivalent inline code.
    
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>
    Reviewed-by: Aurelien Jarno <aurelien at aurel32.net>
    Signed-off-by: Stefan Hajnoczi <stefanha at gmail.com>

diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c
index 2bad0a2..5e8dbdd 100644
--- a/tcg/arm/tcg-target.c
+++ b/tcg/arm/tcg-target.c
@@ -1197,20 +1197,11 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
     case 1:
     case 2:
     default:
-        if (data_reg != TCG_REG_R0) {
-            tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
-                            data_reg, 0, TCG_REG_R0, SHIFT_IMM_LSL(0));
-        }
+        tcg_out_mov_reg(s, COND_AL, data_reg, TCG_REG_R0);
         break;
     case 3:
-        if (data_reg != TCG_REG_R0) {
-            tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
-                            data_reg, 0, TCG_REG_R0, SHIFT_IMM_LSL(0));
-        }
-        if (data_reg2 != TCG_REG_R1) {
-            tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
-                            data_reg2, 0, TCG_REG_R1, SHIFT_IMM_LSL(0));
-        }
+        tcg_out_mov_reg(s, COND_AL, data_reg, TCG_REG_R0);
+        tcg_out_mov_reg(s, COND_AL, data_reg2, TCG_REG_R1);
         break;
     }
 
commit b2532d88c9eb4f4734f62d5075585ee8c1516605
Author: Stefan Weil <sw at weilnetz.de>
Date:   Thu Sep 27 07:41:42 2012 +0200

    cpu: Add missing 'static' attribute to qemu_global_mutex
    
    Contrary to its name, 'qemu_global_mutex' is only used locally
    in cpus.c.
    
    Signed-off-by: Stefan Weil <sw at weilnetz.de>
    Signed-off-by: Stefan Hajnoczi <stefanha at gmail.com>

diff --git a/cpus.c b/cpus.c
index 4b726ef..1d5d490 100644
--- a/cpus.c
+++ b/cpus.c
@@ -613,7 +613,7 @@ static void qemu_tcg_init_cpu_signals(void)
 }
 #endif /* _WIN32 */
 
-QemuMutex qemu_global_mutex;
+static QemuMutex qemu_global_mutex;
 static QemuCond qemu_io_proceeded_cond;
 static bool iothread_requesting_mutex;
 
commit afb63ebd0a9599312c27ecceb839a399740e00ef
Author: Stefan Weil <sw at weilnetz.de>
Date:   Wed Sep 26 22:04:38 2012 +0200

    configure: Support empty target list (--target-list=)
    
    Specifying an empty target list with --target-list= is shorter
    than specifying --disable-user --disable-system.
    
    Both variants should give the same result: no targets at all.
    
    This modification implements that feature.
    
    It uses a trick which works with POSIX compliant shells to test whether
    target_list is undefined (=> default targets) or empty (=> no targets).
    
    Signed-off-by: Stefan Weil <sw at weilnetz.de>
    Signed-off-by: Stefan Hajnoczi <stefanha at gmail.com>

diff --git a/configure b/configure
index e58846d..e51a793 100755
--- a/configure
+++ b/configure
@@ -125,7 +125,8 @@ cc_i386=i386-pc-linux-gnu-gcc
 libs_qga=""
 debug_info="yes"
 
-target_list=""
+# Don't accept a target_list environment variable.
+unset target_list
 
 # Default value for a variable defining feature "foo".
 #  * foo="no"  feature will only be used if --enable-foo arg is given
@@ -1288,7 +1289,7 @@ if ! "$python" -c 'import sys; sys.exit(sys.version_info < (2,4) or sys.version_
   exit 1
 fi
 
-if test -z "$target_list" ; then
+if test -z "${target_list+xxx}" ; then
     target_list="$default_target_list"
 else
     target_list=`echo "$target_list" | sed -e 's/,/ /g'`
commit 7a608f562ebd91e811ed0b725e528c894e4f19c4
Author: Stefan Weil <sw at weilnetz.de>
Date:   Sun Sep 23 08:51:01 2012 +0200

    hw: Fix return value check for bdrv_read, bdrv_write
    
    Those functions return -errno in case of an error.
    The old code would typically only detect EPERM (1) errors.
    
    Signed-off-by: Stefan Weil <sw at weilnetz.de>
    Signed-off-by: Stefan Hajnoczi <stefanha at gmail.com>

diff --git a/hw/nand.c b/hw/nand.c
index e9501ae..01f3ada 100644
--- a/hw/nand.c
+++ b/hw/nand.c
@@ -654,7 +654,7 @@ static void glue(nand_blk_write_, PAGE_SIZE)(NANDFlashState *s)
         sector = SECTOR(s->addr);
         off = (s->addr & PAGE_MASK) + s->offset;
         soff = SECTOR_OFFSET(s->addr);
-        if (bdrv_read(s->bdrv, sector, iobuf, PAGE_SECTORS) == -1) {
+        if (bdrv_read(s->bdrv, sector, iobuf, PAGE_SECTORS) < 0) {
             printf("%s: read error in sector %" PRIu64 "\n", __func__, sector);
             return;
         }
@@ -666,21 +666,23 @@ static void glue(nand_blk_write_, PAGE_SIZE)(NANDFlashState *s)
                             MIN(OOB_SIZE, off + s->iolen - PAGE_SIZE));
         }
 
-        if (bdrv_write(s->bdrv, sector, iobuf, PAGE_SECTORS) == -1)
+        if (bdrv_write(s->bdrv, sector, iobuf, PAGE_SECTORS) < 0) {
             printf("%s: write error in sector %" PRIu64 "\n", __func__, sector);
+        }
     } else {
         off = PAGE_START(s->addr) + (s->addr & PAGE_MASK) + s->offset;
         sector = off >> 9;
         soff = off & 0x1ff;
-        if (bdrv_read(s->bdrv, sector, iobuf, PAGE_SECTORS + 2) == -1) {
+        if (bdrv_read(s->bdrv, sector, iobuf, PAGE_SECTORS + 2) < 0) {
             printf("%s: read error in sector %" PRIu64 "\n", __func__, sector);
             return;
         }
 
         mem_and(iobuf + soff, s->io, s->iolen);
 
-        if (bdrv_write(s->bdrv, sector, iobuf, PAGE_SECTORS + 2) == -1)
+        if (bdrv_write(s->bdrv, sector, iobuf, PAGE_SECTORS + 2) < 0) {
             printf("%s: write error in sector %" PRIu64 "\n", __func__, sector);
+        }
     }
     s->offset = 0;
 }
@@ -704,31 +706,37 @@ static void glue(nand_blk_erase_, PAGE_SIZE)(NANDFlashState *s)
         i = SECTOR(addr);
         page = SECTOR(addr + (ADDR_SHIFT + s->erase_shift));
         for (; i < page; i ++)
-            if (bdrv_write(s->bdrv, i, iobuf, 1) == -1)
+            if (bdrv_write(s->bdrv, i, iobuf, 1) < 0) {
                 printf("%s: write error in sector %" PRIu64 "\n", __func__, i);
+            }
     } else {
         addr = PAGE_START(addr);
         page = addr >> 9;
-        if (bdrv_read(s->bdrv, page, iobuf, 1) == -1)
+        if (bdrv_read(s->bdrv, page, iobuf, 1) < 0) {
             printf("%s: read error in sector %" PRIu64 "\n", __func__, page);
+        }
         memset(iobuf + (addr & 0x1ff), 0xff, (~addr & 0x1ff) + 1);
-        if (bdrv_write(s->bdrv, page, iobuf, 1) == -1)
+        if (bdrv_write(s->bdrv, page, iobuf, 1) < 0) {
             printf("%s: write error in sector %" PRIu64 "\n", __func__, page);
+        }
 
         memset(iobuf, 0xff, 0x200);
         i = (addr & ~0x1ff) + 0x200;
         for (addr += ((PAGE_SIZE + OOB_SIZE) << s->erase_shift) - 0x200;
                         i < addr; i += 0x200)
-            if (bdrv_write(s->bdrv, i >> 9, iobuf, 1) == -1)
+            if (bdrv_write(s->bdrv, i >> 9, iobuf, 1) < 0) {
                 printf("%s: write error in sector %" PRIu64 "\n",
                        __func__, i >> 9);
+            }
 
         page = i >> 9;
-        if (bdrv_read(s->bdrv, page, iobuf, 1) == -1)
+        if (bdrv_read(s->bdrv, page, iobuf, 1) < 0) {
             printf("%s: read error in sector %" PRIu64 "\n", __func__, page);
+        }
         memset(iobuf, 0xff, ((addr - 1) & 0x1ff) + 1);
-        if (bdrv_write(s->bdrv, page, iobuf, 1) == -1)
+        if (bdrv_write(s->bdrv, page, iobuf, 1) < 0) {
             printf("%s: write error in sector %" PRIu64 "\n", __func__, page);
+        }
     }
 }
 
@@ -740,18 +748,20 @@ static void glue(nand_blk_load_, PAGE_SIZE)(NANDFlashState *s,
 
     if (s->bdrv) {
         if (s->mem_oob) {
-            if (bdrv_read(s->bdrv, SECTOR(addr), s->io, PAGE_SECTORS) == -1)
+            if (bdrv_read(s->bdrv, SECTOR(addr), s->io, PAGE_SECTORS) < 0) {
                 printf("%s: read error in sector %" PRIu64 "\n",
                                 __func__, SECTOR(addr));
+            }
             memcpy(s->io + SECTOR_OFFSET(s->addr) + PAGE_SIZE,
                             s->storage + (PAGE(s->addr) << OOB_SHIFT),
                             OOB_SIZE);
             s->ioaddr = s->io + SECTOR_OFFSET(s->addr) + offset;
         } else {
             if (bdrv_read(s->bdrv, PAGE_START(addr) >> 9,
-                                    s->io, (PAGE_SECTORS + 2)) == -1)
+                                    s->io, (PAGE_SECTORS + 2)) < 0) {
                 printf("%s: read error in sector %" PRIu64 "\n",
                                 __func__, PAGE_START(addr) >> 9);
+            }
             s->ioaddr = s->io + (PAGE_START(addr) & 0x1ff) + offset;
         }
     } else {
diff --git a/hw/onenand.c b/hw/onenand.c
index db6af68..0f7b755 100644
--- a/hw/onenand.c
+++ b/hw/onenand.c
@@ -351,7 +351,7 @@ static inline int onenand_erase(OneNANDState *s, int sec, int num)
     for (; num > 0; num--, sec++) {
         if (s->bdrv_cur) {
             int erasesec = s->secs_cur + (sec >> 5);
-            if (bdrv_write(s->bdrv_cur, sec, blankbuf, 1)) {
+            if (bdrv_write(s->bdrv_cur, sec, blankbuf, 1) < 0) {
                 goto fail;
             }
             if (bdrv_read(s->bdrv_cur, erasesec, tmpbuf, 1) < 0) {
diff --git a/hw/sd.c b/hw/sd.c
index ec26407..297580a 100644
--- a/hw/sd.c
+++ b/hw/sd.c
@@ -1407,7 +1407,7 @@ static void sd_blk_read(SDState *sd, uint64_t addr, uint32_t len)
 
     DPRINTF("sd_blk_read: addr = 0x%08llx, len = %d\n",
             (unsigned long long) addr, len);
-    if (!sd->bdrv || bdrv_read(sd->bdrv, addr >> 9, sd->buf, 1) == -1) {
+    if (!sd->bdrv || bdrv_read(sd->bdrv, addr >> 9, sd->buf, 1) < 0) {
         fprintf(stderr, "sd_blk_read: read error on host side\n");
         return;
     }
@@ -1415,7 +1415,7 @@ static void sd_blk_read(SDState *sd, uint64_t addr, uint32_t len)
     if (end > (addr & ~511) + 512) {
         memcpy(sd->data, sd->buf + (addr & 511), 512 - (addr & 511));
 
-        if (bdrv_read(sd->bdrv, end >> 9, sd->buf, 1) == -1) {
+        if (bdrv_read(sd->bdrv, end >> 9, sd->buf, 1) < 0) {
             fprintf(stderr, "sd_blk_read: read error on host side\n");
             return;
         }
@@ -1429,29 +1429,31 @@ static void sd_blk_write(SDState *sd, uint64_t addr, uint32_t len)
     uint64_t end = addr + len;
 
     if ((addr & 511) || len < 512)
-        if (!sd->bdrv || bdrv_read(sd->bdrv, addr >> 9, sd->buf, 1) == -1) {
+        if (!sd->bdrv || bdrv_read(sd->bdrv, addr >> 9, sd->buf, 1) < 0) {
             fprintf(stderr, "sd_blk_write: read error on host side\n");
             return;
         }
 
     if (end > (addr & ~511) + 512) {
         memcpy(sd->buf + (addr & 511), sd->data, 512 - (addr & 511));
-        if (bdrv_write(sd->bdrv, addr >> 9, sd->buf, 1) == -1) {
+        if (bdrv_write(sd->bdrv, addr >> 9, sd->buf, 1) < 0) {
             fprintf(stderr, "sd_blk_write: write error on host side\n");
             return;
         }
 
-        if (bdrv_read(sd->bdrv, end >> 9, sd->buf, 1) == -1) {
+        if (bdrv_read(sd->bdrv, end >> 9, sd->buf, 1) < 0) {
             fprintf(stderr, "sd_blk_write: read error on host side\n");
             return;
         }
         memcpy(sd->buf, sd->data + 512 - (addr & 511), end & 511);
-        if (bdrv_write(sd->bdrv, end >> 9, sd->buf, 1) == -1)
+        if (bdrv_write(sd->bdrv, end >> 9, sd->buf, 1) < 0) {
             fprintf(stderr, "sd_blk_write: write error on host side\n");
+        }
     } else {
         memcpy(sd->buf + (addr & 511), sd->data, len);
-        if (!sd->bdrv || bdrv_write(sd->bdrv, addr >> 9, sd->buf, 1) == -1)
+        if (!sd->bdrv || bdrv_write(sd->bdrv, addr >> 9, sd->buf, 1) < 0) {
             fprintf(stderr, "sd_blk_write: write error on host side\n");
+        }
     }
 }
 
commit ef8beb0e94c75984e016e855164361c36e15396c
Author: Alexander Graf <agraf at suse.de>
Date:   Fri Oct 5 04:34:40 2012 +0200

    PPC: KVM: Fix BAT put
    
    In the sregs API, upper and lower 32bit segments of the BAT registers
    are swapped when doing a set. Since we need to support old kernels out
    there, don't bother to fix it in the kernel, but instead work around
    the problem in QEMU by swapping on put.
    
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
index 1975323..93c5bb7 100644
--- a/target-ppc/kvm.c
+++ b/target-ppc/kvm.c
@@ -493,10 +493,11 @@ int kvm_arch_put_registers(CPUPPCState *env, int level)
 
         /* Sync BATs */
         for (i = 0; i < 8; i++) {
-            sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[1][i] << 32)
-                | env->DBAT[0][i];
-            sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[1][i] << 32)
-                | env->IBAT[0][i];
+            /* Beware. We have to swap upper and lower bits here */
+            sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
+                | env->DBAT[1][i];
+            sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
+                | env->IBAT[1][i];
         }
 
         ret = kvm_vcpu_ioctl(env, KVM_SET_SREGS, &sregs);
commit a14c74928ba1fdaada515717f4d3c3fa3275d6f7
Merge: 05d4f2f... 8aba7dc...
Author: Anthony Liguori <aliguori at us.ibm.com>
Date:   Thu Oct 4 19:56:26 2012 -0500

    Merge remote-tracking branch 'sstabellini/xen-2012-10-03' into staging
    
    * sstabellini/xen-2012-10-03:
      xen: Set the vram dirty when an error occur.
      exec, memory: Call to xen_modified_memory.
      exec: Introduce helper to set dirty flags.
      xen: Introduce xen_modified_memory.
      QMP, Introduce xen-set-global-dirty-log command.
      qemu/xen: Add 64 bits big bar support on qemu
      xen: Fix, no unplug of pt device by platform device.

commit 05d4f2f2ca6053bfea2e97cf324901ca18e49c76
Merge: 97f3461... 90f0b71...
Author: Anthony Liguori <aliguori at us.ibm.com>
Date:   Thu Oct 4 19:53:50 2012 -0500

    Merge remote-tracking branch 'kwolf/for-anthony' into staging
    
    * kwolf/for-anthony: (30 commits)
      qemu-iotests: add tests for streaming error handling
      qemu-iotests: map underscore to dash in QMP argument names
      blkdebug: process all set_state rules in the old state
      stream: add on-error argument
      block: introduce block job error
      iostatus: reorganize io error code
      iostatus: change is_read to a bool
      iostatus: move BlockdevOnError declaration to QAPI
      iostatus: rename BlockErrorAction, BlockQMPEventAction
      qemu-iotests: add test for pausing a streaming operation
      qmp: add block-job-pause and block-job-resume
      block: add support for job pause/resume
      qmp: add 'busy' member to BlockJobInfo
      block: add block_job_query
      block: move job APIs to separate files
      block: fix documentation of block_job_cancel_sync
      qerror/block: introduce QERR_BLOCK_JOB_NOT_ACTIVE
      qemu-iotests: add initial tests for live block commit
      QAPI: add command for live block commit, 'block-commit'
      block: helper function, to find the base image of a chain
      ...

commit 97f3461555b1f5d8d4e6cfc839efe215098d786d
Merge: 6929cf1... 8bde9b6...
Author: Anthony Liguori <aliguori at us.ibm.com>
Date:   Thu Oct 4 19:52:09 2012 -0500

    Merge remote-tracking branch 'qmp/queue/qmp' into staging
    
    * qmp/queue/qmp:
      block: live snapshot documentation tweaks
      input: index_from_key(): drop unused code
      qmp: qmp_send_key(): accept key codes in hex
      input: qmp_send_key(): simplify
      hmp: dump-guest-memory: hardcode protocol argument to "file:"
      qmp: dump-guest-memory: don't spin if non-blocking fd would block
      qmp: dump-guest-memory: improve schema doc (again)
      qapi: convert add_client
      monitor: add Error * argument to monitor_get_fd
      pci-assign: use monitor_handle_fd_param
      qapi: add "unix" to the set of reserved words
      qapi: do not protect enum values from namespace pollution
      Add qemu-ga-client script
      Support settimeout in QEMUMonitorProtocol
      Make negotiation optional in QEMUMonitorProtocol

commit 6929cf11bb8ec5ff9f9e76c870c513204854a1eb
Merge: 938406d... ad3005a...
Author: Anthony Liguori <aliguori at us.ibm.com>
Date:   Thu Oct 4 19:50:33 2012 -0500

    Merge remote-tracking branch 'mst/tags/for_anthony' into staging
    
    * mst/tags/for_anthony:
      virtio-serial-bus: let chardev know the exact number of bytes requested
      virtio: Introduce virtqueue_get_avail_bytes()
      virtio: use unsigned int for counting bytes in vq
      iov: add const annotation
      virtio-net: fix used len for tx
      virtio: don't mark unaccessed memory as dirty

commit 938406dfb11d8a40d9228b3596d49a583d7218ff
Merge: 4be403c... 39c138c...
Author: Anthony Liguori <aliguori at us.ibm.com>
Date:   Thu Oct 4 19:49:15 2012 -0500

    Merge remote-tracking branch 'kraxel/usb.66' into staging
    
    * kraxel/usb.66:
      usb: Fix usb_packet_map() in the presence of IOMMUs
      usb-redir: Adjust pkg-config check for usbredirparser .pc file rename (v2)
      ehci: Fix interrupt packet MULT handling
      xhci: create a memory region for each port
      xhci: route string & usb hub support
      xhci: tweak limits
      compat: turn off msi/msix on xhci for old machine types
      add pc-1.3 machine type
    
    Conflicts:
    	hw/pc_piix.c
    
    Signed-off-by: Anthony Liguori <aliguori at us.ibm.com>

commit 4be403c8158e1b6be743f0fef004310cea4e3975
Author: Avi Kivity <avi at redhat.com>
Date:   Thu Oct 4 12:36:04 2012 +0200

    Make target_phys_addr_t 64 bits unconditionally
    
    The hassle and compile time overhead of maintaining both 32-bit and 64-bit
    capable source isn't worth the tiny performance advantage which is seen on
    a minority of configurations.  Switch to compiling libhw only once, with
    target_phys_addr_t unconditionally typedefed to uint64_t.
    
    Signed-off-by: Avi Kivity <avi at redhat.com>
    Signed-off-by: Anthony Liguori <aliguori at us.ibm.com>

diff --git a/.gitignore b/.gitignore
index 824c0d2..3ef77d0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -12,6 +12,7 @@ trace-dtrace.dtrace
 *-linux-user
 *-bsd-user
 libdis*
+libhw
 libhw32
 libhw64
 libuser
diff --git a/Makefile b/Makefile
index 0464297..1cebe3a 100644
--- a/Makefile
+++ b/Makefile
@@ -214,7 +214,7 @@ $(qga-obj-y) qemu-ga.o: $(QGALIB_GEN)
 
 qemu-ga$(EXESUF): qemu-ga.o $(qga-obj-y) $(tools-obj-y) $(qapi-obj-y) $(qobject-obj-y) $(version-obj-y)
 
-QEMULIBS=libhw32 libhw64 libuser libdis libdis-user
+QEMULIBS=libhw libuser libdis libdis-user
 
 clean:
 # avoid old build problems by removing potentially incorrect old files
diff --git a/Makefile.hw b/Makefile.hw
index 59f5b48..86f0bf4 100644
--- a/Makefile.hw
+++ b/Makefile.hw
@@ -2,7 +2,6 @@
 
 include ../config-host.mak
 include ../config-all-devices.mak
-include config.mak
 include $(SRC_PATH)/rules.mak
 
 .PHONY: all
diff --git a/Makefile.target b/Makefile.target
index d9d54b8..4449444 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -4,9 +4,6 @@ include ../config-host.mak
 include config-devices.mak
 include config-target.mak
 include $(SRC_PATH)/rules.mak
-ifneq ($(HWDIR),)
-include $(HWDIR)/config.mak
-endif
 
 $(call set-vpath, $(SRC_PATH))
 ifdef CONFIG_LINUX
diff --git a/configure b/configure
index 8f99b7b..65bd876 100755
--- a/configure
+++ b/configure
@@ -3694,7 +3694,6 @@ TARGET_ABI_DIR=""
 
 case "$target_arch2" in
   i386)
-    target_phys_bits=64
   ;;
   x86_64)
     TARGET_BASE_ARCH=i386
@@ -3702,7 +3701,6 @@ case "$target_arch2" in
     target_long_alignment=8
   ;;
   alpha)
-    target_phys_bits=64
     target_long_alignment=8
     target_nptl="yes"
   ;;
@@ -3711,22 +3709,18 @@ case "$target_arch2" in
     bflt="yes"
     target_nptl="yes"
     gdb_xml_files="arm-core.xml arm-vfp.xml arm-vfp3.xml arm-neon.xml"
-    target_phys_bits=64
     target_llong_alignment=4
     target_libs_softmmu="$fdt_libs"
   ;;
   cris)
     target_nptl="yes"
-    target_phys_bits=32
   ;;
   lm32)
-    target_phys_bits=32
     target_libs_softmmu="$opengl_libs"
   ;;
   m68k)
     bflt="yes"
     gdb_xml_files="cf-core.xml cf-fp.xml"
-    target_phys_bits=32
     target_int_alignment=2
     target_long_alignment=2
     target_llong_alignment=2
@@ -3735,36 +3729,30 @@ case "$target_arch2" in
     TARGET_ARCH=microblaze
     bflt="yes"
     target_nptl="yes"
-    target_phys_bits=32
     target_libs_softmmu="$fdt_libs"
   ;;
   mips|mipsel)
     TARGET_ARCH=mips
     echo "TARGET_ABI_MIPSO32=y" >> $config_target_mak
     target_nptl="yes"
-    target_phys_bits=64
   ;;
   mipsn32|mipsn32el)
     TARGET_ARCH=mipsn32
     TARGET_BASE_ARCH=mips
     echo "TARGET_ABI_MIPSN32=y" >> $config_target_mak
-    target_phys_bits=64
   ;;
   mips64|mips64el)
     TARGET_ARCH=mips64
     TARGET_BASE_ARCH=mips
     echo "TARGET_ABI_MIPSN64=y" >> $config_target_mak
-    target_phys_bits=64
     target_long_alignment=8
   ;;
   or32)
     TARGET_ARCH=openrisc
     TARGET_BASE_ARCH=openrisc
-    target_phys_bits=32
   ;;
   ppc)
     gdb_xml_files="power-core.xml power-fpu.xml power-altivec.xml power-spe.xml"
-    target_phys_bits=64
     target_nptl="yes"
     target_libs_softmmu="$fdt_libs"
   ;;
@@ -3772,7 +3760,6 @@ case "$target_arch2" in
     TARGET_BASE_ARCH=ppc
     TARGET_ABI_DIR=ppc
     gdb_xml_files="power-core.xml power-fpu.xml power-altivec.xml power-spe.xml"
-    target_phys_bits=64
     target_nptl="yes"
     target_libs_softmmu="$fdt_libs"
   ;;
@@ -3780,7 +3767,6 @@ case "$target_arch2" in
     TARGET_BASE_ARCH=ppc
     TARGET_ABI_DIR=ppc
     gdb_xml_files="power64-core.xml power-fpu.xml power-altivec.xml power-spe.xml"
-    target_phys_bits=64
     target_long_alignment=8
     target_libs_softmmu="$fdt_libs"
   ;;
@@ -3790,21 +3776,17 @@ case "$target_arch2" in
     TARGET_ABI_DIR=ppc
     echo "TARGET_ABI32=y" >> $config_target_mak
     gdb_xml_files="power64-core.xml power-fpu.xml power-altivec.xml power-spe.xml"
-    target_phys_bits=64
     target_libs_softmmu="$fdt_libs"
   ;;
   sh4|sh4eb)
     TARGET_ARCH=sh4
     bflt="yes"
     target_nptl="yes"
-    target_phys_bits=32
   ;;
   sparc)
-    target_phys_bits=64
   ;;
   sparc64)
     TARGET_BASE_ARCH=sparc
-    target_phys_bits=64
     target_long_alignment=8
   ;;
   sparc32plus)
@@ -3812,11 +3794,9 @@ case "$target_arch2" in
     TARGET_BASE_ARCH=sparc
     TARGET_ABI_DIR=sparc
     echo "TARGET_ABI32=y" >> $config_target_mak
-    target_phys_bits=64
   ;;
   s390x)
     target_nptl="yes"
-    target_phys_bits=64
     target_long_alignment=8
   ;;
   unicore32)
@@ -3824,7 +3804,6 @@ case "$target_arch2" in
   ;;
   xtensa|xtensaeb)
     TARGET_ARCH=xtensa
-    target_phys_bits=32
   ;;
   *)
     echo "Unsupported target CPU"
@@ -3859,7 +3838,6 @@ echo "TARGET_ABI_DIR=$TARGET_ABI_DIR" >> $config_target_mak
 case "$target_arch2" in
   i386|x86_64)
     if test "$xen" = "yes" -a "$target_softmmu" = "yes" ; then
-      target_phys_bits=64
       echo "CONFIG_XEN=y" >> $config_target_mak
       if test "$xen_pci_passthrough" = yes; then
         echo "CONFIG_XEN_PCI_PASSTHROUGH=y" >> "$config_target_mak"
@@ -3899,11 +3877,10 @@ if test "$target_bigendian" = "yes" ; then
   echo "TARGET_WORDS_BIGENDIAN=y" >> $config_target_mak
 fi
 if test "$target_softmmu" = "yes" ; then
-  echo "TARGET_PHYS_ADDR_BITS=$target_phys_bits" >> $config_target_mak
   echo "CONFIG_SOFTMMU=y" >> $config_target_mak
   echo "LIBS+=$libs_softmmu $target_libs_softmmu" >> $config_target_mak
-  echo "HWDIR=../libhw$target_phys_bits" >> $config_target_mak
-  echo "subdir-$target: subdir-libhw$target_phys_bits" >> $config_host_mak
+  echo "HWDIR=../libhw" >> $config_target_mak
+  echo "subdir-$target: subdir-libhw" >> $config_host_mak
   if test "$smartcard_nss" = "yes" ; then
     echo "subdir-$target: subdir-libcacard" >> $config_host_mak
   fi
@@ -4145,11 +4122,8 @@ for rom in seabios vgabios ; do
     echo "LD=$ld" >> $config_mak
 done
 
-for hwlib in 32 64; do
-  d=libhw$hwlib
-  symlink "$source_path/Makefile.hw" "$d/Makefile"
-  echo "QEMU_CFLAGS+=-DTARGET_PHYS_ADDR_BITS=$hwlib" > $d/config.mak
-done
+d=libhw
+symlink "$source_path/Makefile.hw" "$d/Makefile"
 
 d=libuser
 symlink "$source_path/Makefile.user" "$d/Makefile"
diff --git a/cpu-common.h b/cpu-common.h
index 85548de..c0d27af 100644
--- a/cpu-common.h
+++ b/cpu-common.h
@@ -21,7 +21,7 @@ enum device_endian {
 };
 
 /* address in the RAM (different from a physical address) */
-#if defined(CONFIG_XEN_BACKEND) && TARGET_PHYS_ADDR_BITS == 64
+#if defined(CONFIG_XEN_BACKEND)
 typedef uint64_t ram_addr_t;
 #  define RAM_ADDR_MAX UINT64_MAX
 #  define RAM_ADDR_FMT "%" PRIx64
diff --git a/dma.h b/dma.h
index f35c4b6..1a33603 100644
--- a/dma.h
+++ b/dma.h
@@ -31,7 +31,7 @@ struct QEMUSGList {
     DMAContext *dma;
 };
 
-#if defined(TARGET_PHYS_ADDR_BITS)
+#ifndef CONFIG_USER_ONLY
 
 /*
  * When an IOMMU is present, bus addresses become distinct from
diff --git a/hw/hw.h b/hw/hw.h
index e5cb9bf..16101de 100644
--- a/hw/hw.h
+++ b/hw/hw.h
@@ -4,7 +4,7 @@
 
 #include "qemu-common.h"
 
-#if defined(TARGET_PHYS_ADDR_BITS) && !defined(NEED_CPU_H)
+#if !defined(CONFIG_USER_ONLY) && !defined(NEED_CPU_H)
 #include "cpu-common.h"
 #endif
 
diff --git a/hw/intel-hda.c b/hw/intel-hda.c
index 127e818..d8e1b23 100644
--- a/hw/intel-hda.c
+++ b/hw/intel-hda.c
@@ -210,13 +210,7 @@ static target_phys_addr_t intel_hda_addr(uint32_t lbase, uint32_t ubase)
 {
     target_phys_addr_t addr;
 
-#if TARGET_PHYS_ADDR_BITS == 32
-    addr = lbase;
-#else
-    addr = ubase;
-    addr <<= 32;
-    addr |= lbase;
-#endif
+    addr = ((uint64_t)ubase << 32) | lbase;
     return addr;
 }
 
diff --git a/hw/rtl8139.c b/hw/rtl8139.c
index 844f1b8..b7c82ee 100644
--- a/hw/rtl8139.c
+++ b/hw/rtl8139.c
@@ -774,11 +774,7 @@ static void rtl8139_write_buffer(RTL8139State *s, const void *buf, int size)
 #define MIN_BUF_SIZE 60
 static inline dma_addr_t rtl8139_addr64(uint32_t low, uint32_t high)
 {
-#if TARGET_PHYS_ADDR_BITS > 32
-    return low | ((target_phys_addr_t)high << 32);
-#else
-    return low;
-#endif
+    return low | ((uint64_t)high << 32);
 }
 
 /* Workaround for buggy guest driver such as linux who allocates rx
diff --git a/monitor.c b/monitor.c
index 67064e2..7beac9a 100644
--- a/monitor.c
+++ b/monitor.c
@@ -3259,11 +3259,7 @@ static int64_t expr_unary(Monitor *mon)
         break;
     default:
         errno = 0;
-#if TARGET_PHYS_ADDR_BITS > 32
         n = strtoull(pch, &p, 0);
-#else
-        n = strtoul(pch, &p, 0);
-#endif
         if (errno == ERANGE) {
             expr_error(mon, "number too large");
         }
diff --git a/target-ppc/mmu_helper.c b/target-ppc/mmu_helper.c
index d2664ac..532b114 100644
--- a/target-ppc/mmu_helper.c
+++ b/target-ppc/mmu_helper.c
@@ -1032,12 +1032,10 @@ static int ppcemb_tlb_check(CPUPPCState *env, ppcemb_tlb_t *tlb,
         return -1;
     }
     *raddrp = (tlb->RPN & mask) | (address & ~mask);
-#if (TARGET_PHYS_ADDR_BITS >= 36)
     if (ext) {
         /* Extend the physical address to 36 bits */
-        *raddrp |= (target_phys_addr_t)(tlb->RPN & 0xF) << 32;
+        *raddrp |= (uint64_t)(tlb->RPN & 0xF) << 32;
     }
-#endif
 
     return 0;
 }
diff --git a/targphys.h b/targphys.h
index bd4938f..08cade9 100644
--- a/targphys.h
+++ b/targphys.h
@@ -3,25 +3,10 @@
 #ifndef TARGPHYS_H
 #define TARGPHYS_H
 
-#ifdef TARGET_PHYS_ADDR_BITS
+#define TARGET_PHYS_ADDR_BITS 64
 /* target_phys_addr_t is the type of a physical address (its size can
    be different from 'target_ulong').  */
 
-#if TARGET_PHYS_ADDR_BITS == 32
-typedef uint32_t target_phys_addr_t;
-#define TARGET_PHYS_ADDR_MAX UINT32_MAX
-#define TARGET_FMT_plx "%08x"
-/* Format strings for printing target_phys_addr_t types.
- * These are recommended over the less flexible TARGET_FMT_plx,
- * which is retained for the benefit of existing code.
- */
-#define TARGET_PRIdPHYS PRId32
-#define TARGET_PRIiPHYS PRIi32
-#define TARGET_PRIoPHYS PRIo32
-#define TARGET_PRIuPHYS PRIu32
-#define TARGET_PRIxPHYS PRIx32
-#define TARGET_PRIXPHYS PRIX32
-#elif TARGET_PHYS_ADDR_BITS == 64
 typedef uint64_t target_phys_addr_t;
 #define TARGET_PHYS_ADDR_MAX UINT64_MAX
 #define TARGET_FMT_plx "%016" PRIx64
@@ -31,7 +16,5 @@ typedef uint64_t target_phys_addr_t;
 #define TARGET_PRIuPHYS PRIu64
 #define TARGET_PRIxPHYS PRIx64
 #define TARGET_PRIXPHYS PRIX64
-#endif
-#endif
 
 #endif
commit fb37c3029c5a695e367baaacc6baf17640cc63cc
Author: Alexander Graf <agraf at suse.de>
Date:   Thu Oct 4 18:52:21 2012 +0200

    PPC: e500: Only expose even TLB sizes in initial TLB
    
    When booting our e500 machine, we automatically generate a big TLB entry
    in TLB1 that covers all of the code we need to run in there until the guest
    can handle its TLB on its own.
    
    However, e500v2 can only handle MAS1.0 sizes. However, we keep our TLB
    information in MAS2.0 layout, which means we have twice as many TLB sizes
    to choose from. That also means we can run into a situation where we try
    to add a TLB size that could not fit into the MAS1.0 size bits.
    
    Fix it by making sure we always have the lower bit set to 0. That way we
    are always guaranteed to have MAS1.0 compatible TLB size information.
    
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c
index feb712e..d23f9b2 100644
--- a/hw/ppc/e500.c
+++ b/hw/ppc/e500.c
@@ -362,6 +362,10 @@ static void mmubooke_create_initial_mapping(CPUPPCState *env)
        the device tree top */
     dt_end = bi->dt_base + bi->dt_size;
     ps = booke206_page_size_to_tlb(dt_end) + 1;
+    if (ps & 1) {
+        /* e500v2 can only do even TLB size bits */
+        ps++;
+    }
     size = (ps << MAS1_TSIZE_SHIFT);
     tlb->mas1 = MAS1_VALID | size;
     tlb->mas2 = 0;
commit 2b15811c8cd4dbe8dcde32320936fe74e51e4279
Author: David Gibson <david at gibson.dropbear.id.au>
Date:   Tue Sep 25 17:12:21 2012 +0000

    ppc/pseries: Reset VPA registration on CPU reset
    
    The ppc specific CPU state contains several variables which track the
    VPA, SLB shadow and dispatch trace log.  These are structures shared
    between OS and hypervisor that are used on the pseries machine to track
    various per-CPU quantities.
    
    The address of these structures needs to be registered by the guest on each
    boot, however currently this registration is not cleared when we reset the
    cpu.  This patch corrects this bug.
    
    Signed-off-by: David Gibson <david at gibson.dropbear.id.au>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index fba2b42..a972287 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -10423,6 +10423,14 @@ static void ppc_cpu_reset(CPUState *s)
     env->pending_interrupts = 0;
     env->exception_index = POWERPC_EXCP_NONE;
     env->error_code = 0;
+
+#if defined(TARGET_PPC64) && !defined(CONFIG_USER_ONLY)
+    env->vpa = 0;
+    env->slb_shadow = 0;
+    env->dispatch_trace_log = 0;
+    env->dtl_size = 0;
+#endif /* TARGET_PPC64 */
+
     /* Flush all TLBs */
     tlb_flush(env, 1);
 }
commit efcb9383b974114e5f682e531346006f8f2466c0
Author: David Gibson <david at gibson.dropbear.id.au>
Date:   Tue Sep 25 17:12:20 2012 +0000

    pseries: Don't test for MSR_PR for hypercalls under KVM
    
    PAPR hypercalls should only be invoked from the guest kernel, not guest
    user programs, that is, with MSR[PR]=0.  Currently we check this in
    spapr_hypercall, returning H_PRIVILEGE if MSR[PR]=1.
    
    However, under KVM the state of MSR[PR] is already checked by the host
    kernel before passing the hypercall to qemu, making this check redundant.
    Worse, however, we don't generally synchronize KVM and qemu state on the
    hypercall path, meaning that qemu could incorrectly reject a hypercall
    because it has a stale MSR value.
    
    This patch fixes the problem by moving the privilege test exclusively to
    the TCG hypercall path.
    
    Signed-off-by: David Gibson <david at gibson.dropbear.id.au>
    CC: qemu-stable at nongnu.org
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/spapr.c b/hw/spapr.c
index a8bd3c1..ab227a0 100644
--- a/hw/spapr.c
+++ b/hw/spapr.c
@@ -573,7 +573,12 @@ static uint64_t translate_kernel_address(void *opaque, uint64_t addr)
 
 static void emulate_spapr_hypercall(CPUPPCState *env)
 {
-    env->gpr[3] = spapr_hypercall(env, env->gpr[3], &env->gpr[4]);
+    if (msr_pr) {
+        hcall_dprintf("Hypercall made with MSR[PR]=1\n");
+        env->gpr[3] = H_PRIVILEGE;
+    } else {
+        env->gpr[3] = spapr_hypercall(env, env->gpr[3], &env->gpr[4]);
+    }
 }
 
 static void spapr_reset_htab(sPAPREnvironment *spapr)
diff --git a/hw/spapr_hcall.c b/hw/spapr_hcall.c
index 826ca67..194d9c2 100644
--- a/hw/spapr_hcall.c
+++ b/hw/spapr_hcall.c
@@ -681,11 +681,6 @@ void spapr_register_hypercall(target_ulong opcode, spapr_hcall_fn fn)
 target_ulong spapr_hypercall(CPUPPCState *env, target_ulong opcode,
                              target_ulong *args)
 {
-    if (msr_pr) {
-        hcall_dprintf("Hypercall made with MSR[PR]=1\n");
-        return H_PRIVILEGE;
-    }
-
     if ((opcode <= MAX_HCALL_OPCODE)
         && ((opcode & 0x3) == 0)) {
         spapr_hcall_fn fn = papr_hypercall_table[opcode / 4];
commit 7e7ec2d290ca5b1bdd555da9852dc5ee60232fe5
Author: Scott Wood <scottwood at freescale.com>
Date:   Wed Aug 22 14:55:41 2012 +0000

    PPC: e500: calculate initrd_base like dt_base
    
    While investigating dtb pad issues, I noticed that initrd_base wasn't taking
    loadaddr into account the way dt_base was.  This seems wrong.
    
    Signed-off-by: Scott Wood <scottwood at freescale.com>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c
index fc3fde0..feb712e 100644
--- a/hw/ppc/e500.c
+++ b/hw/ppc/e500.c
@@ -540,7 +540,8 @@ void ppce500_init(PPCE500Params *params)
 
     /* Load initrd. */
     if (params->initrd_filename) {
-        initrd_base = (kernel_size + INITRD_LOAD_PAD) & ~INITRD_PAD_MASK;
+        initrd_base = (loadaddr + kernel_size + INITRD_LOAD_PAD) &
+            ~INITRD_PAD_MASK;
         initrd_size = load_image_targphys(params->initrd_filename, initrd_base,
                                           ram_size - initrd_base);
 
commit 9dd5eba1bc69bccbd83885d157d84e2514799a22
Author: Scott Wood <scottwood at freescale.com>
Date:   Wed Aug 22 14:55:40 2012 +0000

    PPC: e500: increase DTC_LOAD_PAD
    
    An allowance of 5 MiB for BSS is not enough for Linux kernels with certain
    debug options enabled (not sure exactly which one caused it, but I'd guess
    lockdep).  The kernel I ran into this with had a BSS of around 6.4 MB.
    
    Unfortunately, uImage does not give us enough information to determine the
    actual BSS size.  Increase the allowance to 18 MiB to give us plenty of
    room.  Eventually this should be more intelligent, possibly packing
    initrd+dtb at the end of guest RAM.
    
    Signed-off-by: Scott Wood <scottwood at freescale.com>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c
index 5bab340..fc3fde0 100644
--- a/hw/ppc/e500.c
+++ b/hw/ppc/e500.c
@@ -36,7 +36,7 @@
 
 #define BINARY_DEVICE_TREE_FILE    "mpc8544ds.dtb"
 #define UIMAGE_LOAD_BASE           0
-#define DTC_LOAD_PAD               0x500000
+#define DTC_LOAD_PAD               0x1800000
 #define DTC_PAD_MASK               0xFFFFF
 #define INITRD_LOAD_PAD            0x2000000
 #define INITRD_PAD_MASK            0xFFFFFF
commit 6641b77254d3c191ecee5a87947425f623ac9ca0
Author: Alexander Graf <agraf at suse.de>
Date:   Sun Sep 23 23:27:37 2012 +0200

    device tree: simplify dumpdtb code
    
    As per Peter's suggestion, we can use glib to write out a buffer in whole to
    a file, simplifying the code dramatically.
    
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/device_tree.c b/device_tree.c
index 69ca953..a923613 100644
--- a/device_tree.c
+++ b/device_tree.c
@@ -314,14 +314,7 @@ void qemu_devtree_dumpdtb(void *fdt, int size)
         const char *dumpdtb = qemu_opt_get(machine_opts, "dumpdtb");
         if (dumpdtb) {
             /* Dump the dtb to a file and quit */
-            FILE *f = fopen(dumpdtb, "wb");
-            size_t len;
-            len = fwrite(fdt, size, 1, f);
-            fclose(f);
-            if (len != size) {
-                exit(1);
-            }
-            exit(0);
+            exit(g_file_set_contents(dumpdtb, fdt, size, NULL) ? 0 : 1);
         }
     }
 
commit 711934334eb3895a89c555c1f57eb3d84ddb2906
Author: Alexander Graf <agraf at suse.de>
Date:   Sun Sep 23 08:37:59 2012 +0200

    fdt: move dumpdtb interpretation code to device_tree.c
    
    The dumpdtb code can be useful in more places than just for e500. Move it
    to a generic place.
    
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/device_tree.c b/device_tree.c
index d7a9b6b..69ca953 100644
--- a/device_tree.c
+++ b/device_tree.c
@@ -304,3 +304,25 @@ int qemu_devtree_add_subnode(void *fdt, const char *name)
     g_free(dupname);
     return retval;
 }
+
+void qemu_devtree_dumpdtb(void *fdt, int size)
+{
+    QemuOpts *machine_opts;
+
+    machine_opts = qemu_opts_find(qemu_find_opts("machine"), 0);
+    if (machine_opts) {
+        const char *dumpdtb = qemu_opt_get(machine_opts, "dumpdtb");
+        if (dumpdtb) {
+            /* Dump the dtb to a file and quit */
+            FILE *f = fopen(dumpdtb, "wb");
+            size_t len;
+            len = fwrite(fdt, size, 1, f);
+            fclose(f);
+            if (len != size) {
+                exit(1);
+            }
+            exit(0);
+        }
+    }
+
+}
diff --git a/device_tree.h b/device_tree.h
index f7a3e6c..f0b3f35 100644
--- a/device_tree.h
+++ b/device_tree.h
@@ -49,4 +49,6 @@ int qemu_devtree_add_subnode(void *fdt, const char *name);
                              sizeof(qdt_tmp));                                \
     } while (0)
 
+void qemu_devtree_dumpdtb(void *fdt, int size);
+
 #endif /* __DEVICE_TREE_H__ */
diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c
index 6f0de6d..5bab340 100644
--- a/hw/ppc/e500.c
+++ b/hw/ppc/e500.c
@@ -139,12 +139,10 @@ static int ppce500_load_device_tree(CPUPPCState *env,
             0x0, 0x10000,
         };
     QemuOpts *machine_opts;
-    const char *dumpdtb = NULL;
     const char *dtb_file = NULL;
 
     machine_opts = qemu_opts_find(qemu_find_opts("machine"), 0);
     if (machine_opts) {
-        dumpdtb = qemu_opt_get(machine_opts, "dumpdtb");
         dtb_file = qemu_opt_get(machine_opts, "dtb");
         toplevel_compat = qemu_opt_get(machine_opts, "dt_compatible");
     }
@@ -334,18 +332,7 @@ static int ppce500_load_device_tree(CPUPPCState *env,
     }
 
 done:
-    if (dumpdtb) {
-        /* Dump the dtb to a file and quit */
-        FILE *f = fopen(dumpdtb, "wb");
-        size_t len;
-        len = fwrite(fdt, fdt_size, 1, f);
-        fclose(f);
-        if (len != fdt_size) {
-            exit(1);
-        }
-        exit(0);
-    }
-
+    qemu_devtree_dumpdtb(fdt, fdt_size);
     ret = rom_add_blob_fixed(BINARY_DEVICE_TREE_FILE, fdt, fdt_size, addr);
     if (ret < 0) {
         goto out;
commit 011aba24ed73000e126dd1bb90a6bea2afd91649
Author: David Gibson <david at gibson.dropbear.id.au>
Date:   Thu Sep 20 17:42:28 2012 +0000

    target-ppc: Remove unused power_mode field from cpu state
    
    CPUPPCState includes a variable 'power_mode' which is used nowhere.  This
    patch removes it.  This includes saving a dummy zero in its place during
    vmsave, to avoid breaking the save format.
    
    Signed-off-by: David Gibson <david at gibson.dropbear.id.au>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
index ca2fc21..faf4404 100644
--- a/target-ppc/cpu.h
+++ b/target-ppc/cpu.h
@@ -1079,7 +1079,6 @@ struct CPUPPCState {
     int mmu_idx;         /* precomputed MMU index to speed up mem accesses */
 
     /* Power management */
-    int power_mode;
     int (*check_pow)(CPUPPCState *env);
 
 #if !defined(CONFIG_USER_ONLY)
diff --git a/target-ppc/machine.c b/target-ppc/machine.c
index d6c2ee4..21ce757 100644
--- a/target-ppc/machine.c
+++ b/target-ppc/machine.c
@@ -82,7 +82,7 @@ void cpu_save(QEMUFile *f, void *opaque)
     qemu_put_betls(f, &env->hflags);
     qemu_put_betls(f, &env->hflags_nmsr);
     qemu_put_sbe32s(f, &env->mmu_idx);
-    qemu_put_sbe32s(f, &env->power_mode);
+    qemu_put_sbe32(f, 0);
 }
 
 int cpu_load(QEMUFile *f, void *opaque, int version_id)
@@ -167,7 +167,7 @@ int cpu_load(QEMUFile *f, void *opaque, int version_id)
     qemu_get_betls(f, &env->hflags);
     qemu_get_betls(f, &env->hflags_nmsr);
     qemu_get_sbe32s(f, &env->mmu_idx);
-    qemu_get_sbe32s(f, &env->power_mode);
+    qemu_get_sbe32(f); /* Discard unused power_mode */
 
     return 0;
 }
commit 382be75df77142cf6bdc7f5852738029eeb9e23a
Author: David Gibson <david at gibson.dropbear.id.au>
Date:   Thu Sep 20 17:42:27 2012 +0000

    pseries: Set hash table size based on RAM size
    
    Currently the pseries machine code always attempts to set the size of the
    guests's hash page table to 16MB.  However, because of the way the POWER
    MMU works, a suitable hash page table size should really depend on memory
    size.  16MB will be excessive for guests with <1GB and RAM, and may not be
    enough for guests with >2GB of RAM (depending on guest page size and
    other factors).
    
    The usual given rule of thumb is that the hash table should be 1/64 of
    the size of memory, but in fact the Linux guests we are aiming at don't
    really need that much.  This patch, therefore, changes the hash table
    allocation code to aim for 1/128 of the size of RAM (rounding up).  When
    using KVM, this size may still be adjusted by the host kernel if it is
    unable to allocate a suitable (contiguous) table.
    
    Signed-off-by: David Gibson <david at gibson.dropbear.id.au>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/spapr.c b/hw/spapr.c
index 1177efa..a8bd3c1 100644
--- a/hw/spapr.c
+++ b/hw/spapr.c
@@ -725,9 +725,16 @@ static void ppc_spapr_init(ram_addr_t ram_size,
     spapr->fdt_addr = spapr->rtas_addr - FDT_MAX_SIZE;
     load_limit = spapr->fdt_addr - FW_OVERHEAD;
 
-    /* For now, always aim for a 16MB hash table */
-    /* FIXME: we should change this default based on RAM size */
-    spapr->htab_shift = 24;
+    /* We aim for a hash table of size 1/128 the size of RAM.  The
+     * normal rule of thumb is 1/64 the size of RAM, but that's much
+     * more than needed for the Linux guests we support. */
+    spapr->htab_shift = 18; /* Minimum architected size */
+    while (spapr->htab_shift <= 46) {
+        if ((1ULL << (spapr->htab_shift + 7)) >= ram_size) {
+            break;
+        }
+        spapr->htab_shift++;
+    }
 
     /* init CPUs */
     if (cpu_model == NULL) {
commit 35f9304d925a5423c51bd2c83a81fa3cc2b6e680
Author: David Gibson <david at gibson.dropbear.id.au>
Date:   Thu Sep 20 17:42:30 2012 +0000

    pseries: Remove unnecessary locking from PAPR hash table hcalls
    
    In the paravirtualized environment provided by PAPR, there is a standard
    locking scheme so that hypercalls updating the hash page table from
    different guest threads don't corrupt the haah table state.  We implement
    this HVLOCK bit in out page table hypercalls.  However, it is not necessary
    in our case, since the hypercalls all run in the qemu environment under the
    big qemu lock.
    
    Therefore, this patch removes the locking code.  This has the additional
    advantage of freeing up a hash PTE bit which will be useful for migration
    support.
    
    Signed-off-by: David Gibson <david at gibson.dropbear.id.au>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/spapr_hcall.c b/hw/spapr_hcall.c
index 2df94d1..826ca67 100644
--- a/hw/spapr_hcall.c
+++ b/hw/spapr_hcall.c
@@ -39,22 +39,6 @@
 #define HPTE_V_1TB_SEG          0x4000000000000000ULL
 #define HPTE_V_VRMA_MASK        0x4001ffffff000000ULL
 
-#define HPTE_V_HVLOCK           0x40ULL
-
-static inline int lock_hpte(void *hpte, target_ulong bits)
-{
-    uint64_t pteh;
-
-    pteh = ldq_p(hpte);
-
-    /* We're protected by qemu's global lock here */
-    if (pteh & bits) {
-        return 0;
-    }
-    stq_p(hpte, pteh | HPTE_V_HVLOCK);
-    return 1;
-}
-
 static target_ulong compute_tlbie_rb(target_ulong v, target_ulong r,
                                      target_ulong pte_index)
 {
@@ -151,8 +135,7 @@ static target_ulong h_enter(CPUPPCState *env, sPAPREnvironment *spapr,
             if (i == 8) {
                 return H_PTEG_FULL;
             }
-            if (((ldq_p(hpte) & HPTE_V_VALID) == 0) &&
-                lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID)) {
+            if ((ldq_p(hpte) & HPTE_V_VALID) == 0) {
                 break;
             }
             hpte += HASH_PTE_SIZE_64;
@@ -160,7 +143,7 @@ static target_ulong h_enter(CPUPPCState *env, sPAPREnvironment *spapr,
     } else {
         i = 0;
         hpte = env->external_htab + (pte_index * HASH_PTE_SIZE_64);
-        if (!lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID)) {
+        if (ldq_p(hpte) & HPTE_V_VALID) {
             return H_PTEG_FULL;
         }
     }
@@ -168,7 +151,6 @@ static target_ulong h_enter(CPUPPCState *env, sPAPREnvironment *spapr,
     /* eieio();  FIXME: need some sort of barrier for smp? */
     stq_p(hpte, pteh);
 
-    assert(!(ldq_p(hpte) & HPTE_V_HVLOCK));
     args[0] = pte_index + i;
     return H_SUCCESS;
 }
@@ -193,11 +175,6 @@ static target_ulong remove_hpte(CPUPPCState *env, target_ulong ptex,
     }
 
     hpte = env->external_htab + (ptex * HASH_PTE_SIZE_64);
-    while (!lock_hpte(hpte, HPTE_V_HVLOCK)) {
-        /* We have no real concurrency in qemu soft-emulation, so we
-         * will never actually have a contested lock */
-        assert(0);
-    }
 
     v = ldq_p(hpte);
     r = ldq_p(hpte + (HASH_PTE_SIZE_64/2));
@@ -205,16 +182,13 @@ static target_ulong remove_hpte(CPUPPCState *env, target_ulong ptex,
     if ((v & HPTE_V_VALID) == 0 ||
         ((flags & H_AVPN) && (v & ~0x7fULL) != avpn) ||
         ((flags & H_ANDCOND) && (v & avpn) != 0)) {
-        stq_p(hpte, v & ~HPTE_V_HVLOCK);
-        assert(!(ldq_p(hpte) & HPTE_V_HVLOCK));
         return REMOVE_NOT_FOUND;
     }
-    *vp = v & ~HPTE_V_HVLOCK;
+    *vp = v;
     *rp = r;
     stq_p(hpte, 0);
     rb = compute_tlbie_rb(v, r, ptex);
     ppc_tlb_invalidate_one(env, rb);
-    assert(!(ldq_p(hpte) & HPTE_V_HVLOCK));
     return REMOVE_SUCCESS;
 }
 
@@ -324,19 +298,12 @@ static target_ulong h_protect(CPUPPCState *env, sPAPREnvironment *spapr,
     }
 
     hpte = env->external_htab + (pte_index * HASH_PTE_SIZE_64);
-    while (!lock_hpte(hpte, HPTE_V_HVLOCK)) {
-        /* We have no real concurrency in qemu soft-emulation, so we
-         * will never actually have a contested lock */
-        assert(0);
-    }
 
     v = ldq_p(hpte);
     r = ldq_p(hpte + (HASH_PTE_SIZE_64/2));
 
     if ((v & HPTE_V_VALID) == 0 ||
         ((flags & H_AVPN) && (v & ~0x7fULL) != avpn)) {
-        stq_p(hpte, v & ~HPTE_V_HVLOCK);
-        assert(!(ldq_p(hpte) & HPTE_V_HVLOCK));
         return H_NOT_FOUND;
     }
 
@@ -350,8 +317,7 @@ static target_ulong h_protect(CPUPPCState *env, sPAPREnvironment *spapr,
     ppc_tlb_invalidate_one(env, rb);
     stq_p(hpte + (HASH_PTE_SIZE_64/2), r);
     /* Don't need a memory barrier, due to qemu's global lock */
-    stq_p(hpte, v & ~HPTE_V_HVLOCK);
-    assert(!(ldq_p(hpte) & HPTE_V_HVLOCK));
+    stq_p(hpte, v);
     return H_SUCCESS;
 }
 
commit 5a1972c8472fafd519a68b689fdcaf33ec857945
Author: Stefan Weil <sw at weilnetz.de>
Date:   Fri Aug 31 22:21:21 2012 +0200

    ppc405_uc: Fix buffer overflow
    
    Report from smatch:
    
    ppc405_uc.c:209 dcr_read_pob(12) error: buffer overflow 'pob->besr' 2 <= 2
    ppc405_uc.c:232 dcr_write_pob(12) error: buffer overflow 'pob->besr' 2 <= 2
    
    The old code reads and writes besr[POB0_BESR1 - POB0_BESR0] or besr[2]
    which is one too much.
    
    Signed-off-by: Stefan Weil <sw at weilnetz.de>
    Reviewed-by: Andreas Färber <afaerber at suse.de>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/ppc405_uc.c b/hw/ppc405_uc.c
index 89e5013..b52ab2f 100644
--- a/hw/ppc405_uc.c
+++ b/hw/ppc405_uc.c
@@ -191,7 +191,8 @@ enum {
 typedef struct ppc4xx_pob_t ppc4xx_pob_t;
 struct ppc4xx_pob_t {
     uint32_t bear;
-    uint32_t besr[2];
+    uint32_t besr0;
+    uint32_t besr1;
 };
 
 static uint32_t dcr_read_pob (void *opaque, int dcrn)
@@ -205,8 +206,10 @@ static uint32_t dcr_read_pob (void *opaque, int dcrn)
         ret = pob->bear;
         break;
     case POB0_BESR0:
+        ret = pob->besr0;
+        break;
     case POB0_BESR1:
-        ret = pob->besr[dcrn - POB0_BESR0];
+        ret = pob->besr1;
         break;
     default:
         /* Avoid gcc warning */
@@ -227,9 +230,12 @@ static void dcr_write_pob (void *opaque, int dcrn, uint32_t val)
         /* Read only */
         break;
     case POB0_BESR0:
+        /* Write-clear */
+        pob->besr0 &= ~val;
+        break;
     case POB0_BESR1:
         /* Write-clear */
-        pob->besr[dcrn - POB0_BESR0] &= ~val;
+        pob->besr1 &= ~val;
         break;
     }
 }
@@ -241,8 +247,8 @@ static void ppc4xx_pob_reset (void *opaque)
     pob = opaque;
     /* No error */
     pob->bear = 0x00000000;
-    pob->besr[0] = 0x0000000;
-    pob->besr[1] = 0x0000000;
+    pob->besr0 = 0x0000000;
+    pob->besr1 = 0x0000000;
 }
 
 static void ppc4xx_pob_init(CPUPPCState *env)
commit ace9a2cbac9b2949aadc176ffa83a1d2a0873e2c
Author: David Gibson <david at gibson.dropbear.id.au>
Date:   Wed Sep 19 21:08:42 2012 +0000

    target-ppc: KVM: Fix some kernel version edge cases for kvmppc_reset_htab()
    
    The kvmppc_reset_htab() function invokes the KVM_PPC_ALLOCATE_HTAB vm ioctl
    to request KVM to allocate and reset a hash page table for the guest - it
    returns the size of hash table allocated, or 0 to indicate that qemu needs
    to allocate the hash table itself.  In practice qemu needs to allocate the
    htab for full emulation and with Book3sPR KVM, but the kernel has to
    allocate it for Book3sHV KVM (the hash table needs to be physically
    contiguous in that case).
    
    Unfortunately, the logic in this function is incorrect for some existing
    kernels.  Specifically:
      * at least some PR KVM versions advertise the relevant capability but
    don't actually implement the ioctl(), returning ENOTTY.
      * For old kernels which don't have the capability, we currently return 0.
    This is correct for PV KVM, where we need to allocate the htab, but not for
    HV KVM - kernels of this era always allocate a 16MB hash table per guest.
    
    This patch corrects both of these edge cases.
    
    Signed-off-by: David Gibson <david at gibson.dropbear.id.au>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
index 546c116..1975323 100644
--- a/target-ppc/kvm.c
+++ b/target-ppc/kvm.c
@@ -1127,18 +1127,38 @@ int kvmppc_reset_htab(int shift_hint)
 {
     uint32_t shift = shift_hint;
 
-    if (kvm_enabled() &&
-        kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
+    if (!kvm_enabled()) {
+        /* Full emulation, tell caller to allocate htab itself */
+        return 0;
+    }
+    if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
         int ret;
         ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
-        if (ret < 0) {
+        if (ret == -ENOTTY) {
+            /* At least some versions of PR KVM advertise the
+             * capability, but don't implement the ioctl().  Oops.
+             * Return 0 so that we allocate the htab in qemu, as is
+             * correct for PR. */
+            return 0;
+        } else if (ret < 0) {
             return ret;
         }
         return shift;
     }
 
-    /* For now.. */
-    return 0;
+    /* We have a kernel that predates the htab reset calls.  For PR
+     * KVM, we need to allocate the htab ourselves, for an HV KVM of
+     * this era, it has allocated a 16MB fixed size hash table
+     * already.  Kernels of this era have the GET_PVINFO capability
+     * only on PR, so we use this hack to determine the right
+     * answer */
+    if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
+        /* PR - tell caller to allocate htab */
+        return 0;
+    } else {
+        /* HV - assume 16MB kernel allocated htab */
+        return 24;
+    }
 }
 
 static inline uint32_t mfpvr(void)
commit 3fe719f467530b7c8ac0797881ff4b66d1357c18
Author: David Gibson <david at gibson.dropbear.id.au>
Date:   Wed Sep 12 16:57:21 2012 +0000

    pseries: Fix semantics of RTAS int-on, int-off and set-xive functions
    
    Currently the ibm,int-on and ibm,int-off RTAS functions are implemented as
    no-ops.  This is because when implemented as specified in PAPR they caused
    Linux (which calls both int-on/off and set-xive) to end up with interrupts
    masked when they should not be.  Since Linux's set-xive calls make the
    int-on/off calls redundant, making them nops worked around the problem.
    
    In fact, the problem was caused because there was a subtle bug in set-xive,
    PAPR specifies that as well as updating the current priority, it also needs
    to update the saved priority used by int-on/off.  With this bug fixed the
    problem goes away.  This patch implements this more correct fix.
    
    Signed-off-by: David Gibson <david at gibson.dropbear.id.au>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/xics.c b/hw/xics.c
index 75c8cca..ce88aa7 100644
--- a/hw/xics.c
+++ b/hw/xics.c
@@ -270,13 +270,14 @@ static void write_xive_lsi(struct ics_state *ics, int srcno)
 }
 
 static void ics_write_xive(struct ics_state *ics, int nr, int server,
-                           uint8_t priority)
+                           uint8_t priority, uint8_t saved_priority)
 {
     int srcno = nr - ics->offset;
     struct ics_irq_state *irq = ics->irqs + srcno;
 
     irq->server = server;
     irq->priority = priority;
+    irq->saved_priority = saved_priority;
 
     if (irq->lsi) {
         write_xive_lsi(ics, srcno);
@@ -405,7 +406,7 @@ static void rtas_set_xive(sPAPREnvironment *spapr, uint32_t token,
         return;
     }
 
-    ics_write_xive(ics, nr, server, priority);
+    ics_write_xive(ics, nr, server, priority, priority);
 
     rtas_st(rets, 0, 0); /* Success */
 }
@@ -453,14 +454,8 @@ static void rtas_int_off(sPAPREnvironment *spapr, uint32_t token,
         return;
     }
 
-    /* This is a NOP for now, since the described PAPR semantics don't
-     * seem to gel with what Linux does */
-#if 0
-    struct ics_irq_state *irq = xics->irqs + (nr - xics->offset);
-
-    irq->saved_priority = irq->priority;
-    ics_write_xive_msi(xics, nr, irq->server, 0xff);
-#endif
+    ics_write_xive(ics, nr, ics->irqs[nr - ics->offset].server, 0xff,
+                   ics->irqs[nr - ics->offset].priority);
 
     rtas_st(rets, 0, 0); /* Success */
 }
@@ -484,13 +479,9 @@ static void rtas_int_on(sPAPREnvironment *spapr, uint32_t token,
         return;
     }
 
-    /* This is a NOP for now, since the described PAPR semantics don't
-     * seem to gel with what Linux does */
-#if 0
-    struct ics_irq_state *irq = xics->irqs + (nr - xics->offset);
-
-    ics_write_xive_msi(xics, nr, irq->server, irq->saved_priority);
-#endif
+    ics_write_xive(ics, nr, ics->irqs[nr - ics->offset].server,
+                   ics->irqs[nr - ics->offset].saved_priority,
+                   ics->irqs[nr - ics->offset].saved_priority);
 
     rtas_st(rets, 0, 0); /* Success */
 }
commit 53724ee565565f69560dbe17553bede8c0169379
Author: David Gibson <david at gibson.dropbear.id.au>
Date:   Wed Sep 12 16:57:20 2012 +0000

    pseries: Rework implementation of TCE bypass
    
    On the pseries machine the IOMMU (aka TCE tables) is always active for all
    PCI and VIO devices.  Mostly to simplify the SLOF firmware, we implement an
    extension which allows the IOMMU to be temporarily disabled for certain
    devices.
    
    Currently this is implemented by setting the device's DMAContext pointer to
    NULL (thus reverting to qemu's default no-IOMMU DMA behaviour), then
    replacing it when bypass mode is disabled.
    
    This approach causes a bunch of complications though.  It complexifies the
    management of the DMAContext lifetimes, it's problematic for savevm/loadvm,
    and it means that while bypass is active we have nowhere to store the
    device's LIOBN (Logical IO Bus Number, used to identify DMA address
    spaces).  At present we regenerate the LIOBN from other address information
    but this restricts how we can allocate LIOBNs.
    
    This patch gives up on this approach, replacing it with the much simpler
    one of having a 'bypass' boolean flag in the TCE state structure.
    
    Signed-off-by: David Gibson <david at gibson.dropbear.id.au>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/spapr.h b/hw/spapr.h
index 51a966b..e984e3f 100644
--- a/hw/spapr.h
+++ b/hw/spapr.h
@@ -339,6 +339,7 @@ void spapr_iommu_init(void);
 DMAContext *spapr_tce_new_dma_context(uint32_t liobn, size_t window_size);
 void spapr_tce_free(DMAContext *dma);
 void spapr_tce_reset(DMAContext *dma);
+void spapr_tce_set_bypass(DMAContext *dma, bool bypass);
 int spapr_dma_dt(void *fdt, int node_off, const char *propname,
                  uint32_t liobn, uint64_t window, uint32_t size);
 int spapr_tcet_dma_dt(void *fdt, int node_off, const char *propname,
diff --git a/hw/spapr_iommu.c b/hw/spapr_iommu.c
index 216aa06..38034c0 100644
--- a/hw/spapr_iommu.c
+++ b/hw/spapr_iommu.c
@@ -42,6 +42,7 @@ struct sPAPRTCETable {
     uint32_t liobn;
     uint32_t window_size;
     sPAPRTCE *table;
+    bool bypass;
     int fd;
     QLIST_ENTRY(sPAPRTCETable) list;
 };
@@ -78,6 +79,12 @@ static int spapr_tce_translate(DMAContext *dma,
             DMA_ADDR_FMT "\n", tcet->liobn, addr);
 #endif
 
+    if (tcet->bypass) {
+        *paddr = addr;
+        *len = (target_phys_addr_t)-1;
+        return 0;
+    }
+
     /* Check if we are in bound */
     if (addr >= tcet->window_size) {
 #ifdef DEBUG_TCE
@@ -162,15 +169,21 @@ void spapr_tce_free(DMAContext *dma)
     }
 }
 
+void spapr_tce_set_bypass(DMAContext *dma, bool bypass)
+{
+    sPAPRTCETable *tcet = DO_UPCAST(sPAPRTCETable, dma, dma);
+
+    tcet->bypass = bypass;
+}
+
 void spapr_tce_reset(DMAContext *dma)
 {
-    if (dma) {
-        sPAPRTCETable *tcet = DO_UPCAST(sPAPRTCETable, dma, dma);
-        size_t table_size = (tcet->window_size >> SPAPR_TCE_PAGE_SHIFT)
-            * sizeof(sPAPRTCE);
+    sPAPRTCETable *tcet = DO_UPCAST(sPAPRTCETable, dma, dma);
+    size_t table_size = (tcet->window_size >> SPAPR_TCE_PAGE_SHIFT)
+        * sizeof(sPAPRTCE);
 
-        memset(tcet->table, 0, table_size);
-    }
+    tcet->bypass = false;
+    memset(tcet->table, 0, table_size);
 }
 
 static target_ulong put_tce_emu(sPAPRTCETable *tcet, target_ulong ioba,
diff --git a/hw/spapr_vio.c b/hw/spapr_vio.c
index 752836e..848806d 100644
--- a/hw/spapr_vio.c
+++ b/hw/spapr_vio.c
@@ -316,14 +316,9 @@ int spapr_vio_send_crq(VIOsPAPRDevice *dev, uint8_t *crq)
 
 static void spapr_vio_quiesce_one(VIOsPAPRDevice *dev)
 {
-    VIOsPAPRDeviceClass *pc = VIO_SPAPR_DEVICE_GET_CLASS(dev);
-    uint32_t liobn = SPAPR_VIO_BASE_LIOBN | dev->reg;
-
     if (dev->dma) {
-        spapr_tce_free(dev->dma);
+        spapr_tce_reset(dev->dma);
     }
-    dev->dma = spapr_tce_new_dma_context(liobn, pc->rtce_window_size);
-
     free_crq(dev);
 }
 
@@ -346,16 +341,14 @@ static void rtas_set_tce_bypass(sPAPREnvironment *spapr, uint32_t token,
         rtas_st(rets, 0, -3);
         return;
     }
-    if (enable) {
-        spapr_tce_free(dev->dma);
-        dev->dma = NULL;
-    } else {
-        VIOsPAPRDeviceClass *pc = VIO_SPAPR_DEVICE_GET_CLASS(dev);
-        uint32_t liobn = SPAPR_VIO_BASE_LIOBN | dev->reg;
 
-        dev->dma = spapr_tce_new_dma_context(liobn, pc->rtce_window_size);
+    if (!dev->dma) {
+        rtas_st(rets, 0, -3);
+        return;
     }
 
+    spapr_tce_set_bypass(dev->dma, !!enable);
+
     rtas_st(rets, 0, 0);
 }
 
@@ -421,7 +414,6 @@ static int spapr_vio_busdev_init(DeviceState *qdev)
 {
     VIOsPAPRDevice *dev = (VIOsPAPRDevice *)qdev;
     VIOsPAPRDeviceClass *pc = VIO_SPAPR_DEVICE_GET_CLASS(dev);
-    uint32_t liobn;
     char *id;
 
     if (dev->reg != -1) {
@@ -463,8 +455,10 @@ static int spapr_vio_busdev_init(DeviceState *qdev)
         return -1;
     }
 
-    liobn = SPAPR_VIO_BASE_LIOBN | dev->reg;
-    dev->dma = spapr_tce_new_dma_context(liobn, pc->rtce_window_size);
+    if (pc->rtce_window_size) {
+        uint32_t liobn = SPAPR_VIO_BASE_LIOBN | dev->reg;
+        dev->dma = spapr_tce_new_dma_context(liobn, pc->rtce_window_size);
+    }
 
     return pc->init(dev);
 }
diff --git a/hw/spapr_vio.h b/hw/spapr_vio.h
index acef65e..cc85d26 100644
--- a/hw/spapr_vio.h
+++ b/hw/spapr_vio.h
@@ -131,7 +131,6 @@ void spapr_vscsi_create(VIOsPAPRBus *bus);
 
 VIOsPAPRDevice *spapr_vty_get_default(VIOsPAPRBus *bus);
 
-int spapr_tce_set_bypass(uint32_t unit, uint32_t enable);
 void spapr_vio_quiesce(void);
 
 #endif /* _HW_SPAPR_VIO_H */
commit 490d4a2b6edafb27cd688ded7fdb1290453d71b0
Author: David Gibson <david at gibson.dropbear.id.au>
Date:   Wed Sep 12 16:57:19 2012 +0000

    pseries: Remove never used flags field from spapr vio devices
    
    The general device state structure for PAPR VIO emulated devices includes a
    'flags' field which was never used.  This patch removes it.
    
    Signed-off-by: David Gibson <david at gibson.dropbear.id.au>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/spapr_vio.h b/hw/spapr_vio.h
index ea6aa43..acef65e 100644
--- a/hw/spapr_vio.h
+++ b/hw/spapr_vio.h
@@ -60,7 +60,6 @@ typedef struct VIOsPAPRDeviceClass {
 struct VIOsPAPRDevice {
     DeviceState qdev;
     uint32_t reg;
-    uint32_t flags;
     uint32_t irq;
     target_ulong signal_state;
     VIOsPAPR_CRQ crq;
commit ff9d2afa618acd81d926c9c213b4ff5f7163db1d
Author: David Gibson <david at gibson.dropbear.id.au>
Date:   Wed Sep 12 16:57:18 2012 +0000

    pseries: Remove XICS irq type enum type
    
    Currently the XICS interrupt controller emulation uses a custom enum to
    specify whether a given interrupt is level-sensitive or message-triggered.
    This enum makes life awkward for saving the state, and isn't particularly
    useful since there are only two possibilities.  This patch replaces the
    enum with a simple bool.
    
    Signed-off-by: David Gibson <david at gibson.dropbear.id.au>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/spapr.c b/hw/spapr.c
index 0a0e9cd..1177efa 100644
--- a/hw/spapr.c
+++ b/hw/spapr.c
@@ -89,7 +89,7 @@
 
 sPAPREnvironment *spapr;
 
-int spapr_allocate_irq(int hint, enum xics_irq_type type)
+int spapr_allocate_irq(int hint, bool lsi)
 {
     int irq;
 
@@ -105,13 +105,13 @@ int spapr_allocate_irq(int hint, enum xics_irq_type type)
         return 0;
     }
 
-    xics_set_irq_type(spapr->icp, irq, type);
+    xics_set_irq_type(spapr->icp, irq, lsi);
 
     return irq;
 }
 
 /* Allocate block of consequtive IRQs, returns a number of the first */
-int spapr_allocate_irq_block(int num, enum xics_irq_type type)
+int spapr_allocate_irq_block(int num, bool lsi)
 {
     int first = -1;
     int i;
@@ -119,7 +119,7 @@ int spapr_allocate_irq_block(int num, enum xics_irq_type type)
     for (i = 0; i < num; ++i) {
         int irq;
 
-        irq = spapr_allocate_irq(0, type);
+        irq = spapr_allocate_irq(0, lsi);
         if (!irq) {
             return -1;
         }
diff --git a/hw/spapr.h b/hw/spapr.h
index f9a7b0f..51a966b 100644
--- a/hw/spapr.h
+++ b/hw/spapr.h
@@ -291,17 +291,17 @@ void spapr_register_hypercall(target_ulong opcode, spapr_hcall_fn fn);
 target_ulong spapr_hypercall(CPUPPCState *env, target_ulong opcode,
                              target_ulong *args);
 
-int spapr_allocate_irq(int hint, enum xics_irq_type type);
-int spapr_allocate_irq_block(int num, enum xics_irq_type type);
+int spapr_allocate_irq(int hint, bool lsi);
+int spapr_allocate_irq_block(int num, bool lsi);
 
 static inline int spapr_allocate_msi(int hint)
 {
-    return spapr_allocate_irq(hint, XICS_MSI);
+    return spapr_allocate_irq(hint, false);
 }
 
 static inline int spapr_allocate_lsi(int hint)
 {
-    return spapr_allocate_irq(hint, XICS_LSI);
+    return spapr_allocate_irq(hint, true);
 }
 
 static inline uint32_t rtas_ld(target_ulong phys, int n)
diff --git a/hw/spapr_pci.c b/hw/spapr_pci.c
index 203155e..b628f89 100644
--- a/hw/spapr_pci.c
+++ b/hw/spapr_pci.c
@@ -351,7 +351,7 @@ static void rtas_ibm_change_msi(sPAPREnvironment *spapr,
 
     /* There is no cached config, allocate MSIs */
     if (!phb->msi_table[ndev].nvec) {
-        irq = spapr_allocate_irq_block(req_num, XICS_MSI);
+        irq = spapr_allocate_irq_block(req_num, true);
         if (irq < 0) {
             fprintf(stderr, "Cannot allocate MSIs for device#%d", ndev);
             rtas_st(rets, 0, -1); /* Hardware error */
diff --git a/hw/xics.c b/hw/xics.c
index 648af25..75c8cca 100644
--- a/hw/xics.c
+++ b/hw/xics.c
@@ -170,7 +170,7 @@ struct ics_irq_state {
 #define XICS_STATUS_REJECTED           0x4
 #define XICS_STATUS_MASKED_PENDING     0x8
     uint8_t status;
-    enum xics_irq_type type;
+    bool lsi;
 };
 
 struct ics_state {
@@ -244,7 +244,7 @@ static void ics_set_irq(void *opaque, int srcno, int val)
     struct ics_state *ics = (struct ics_state *)opaque;
     struct ics_irq_state *irq = ics->irqs + srcno;
 
-    if (irq->type == XICS_LSI) {
+    if (irq->lsi) {
         set_irq_lsi(ics, srcno, val);
     } else {
         set_irq_msi(ics, srcno, val);
@@ -278,7 +278,7 @@ static void ics_write_xive(struct ics_state *ics, int nr, int server,
     irq->server = server;
     irq->priority = priority;
 
-    if (irq->type == XICS_LSI) {
+    if (irq->lsi) {
         write_xive_lsi(ics, srcno);
     } else {
         write_xive_msi(ics, srcno);
@@ -301,7 +301,7 @@ static void ics_resend(struct ics_state *ics)
         struct ics_irq_state *irq = ics->irqs + i;
 
         /* FIXME: filter by server#? */
-        if (irq->type == XICS_LSI) {
+        if (irq->lsi) {
             resend_lsi(ics, i);
         } else {
             resend_msi(ics, i);
@@ -314,7 +314,7 @@ static void ics_eoi(struct ics_state *ics, int nr)
     int srcno = nr - ics->offset;
     struct ics_irq_state *irq = ics->irqs + srcno;
 
-    if (irq->type == XICS_LSI) {
+    if (irq->lsi) {
         irq->status &= ~XICS_STATUS_SENT;
     }
 }
@@ -333,14 +333,12 @@ qemu_irq xics_get_qirq(struct icp_state *icp, int irq)
     return icp->ics->qirqs[irq - icp->ics->offset];
 }
 
-void xics_set_irq_type(struct icp_state *icp, int irq,
-                       enum xics_irq_type type)
+void xics_set_irq_type(struct icp_state *icp, int irq, bool lsi)
 {
     assert((irq >= icp->ics->offset)
            && (irq < (icp->ics->offset + icp->ics->nr_irqs)));
-    assert((type == XICS_MSI) || (type == XICS_LSI));
 
-    icp->ics->irqs[irq - icp->ics->offset].type = type;
+    icp->ics->irqs[irq - icp->ics->offset].lsi = lsi;
 }
 
 static target_ulong h_cppr(CPUPPCState *env, sPAPREnvironment *spapr,
diff --git a/hw/xics.h b/hw/xics.h
index 99b96ac..6817268 100644
--- a/hw/xics.h
+++ b/hw/xics.h
@@ -31,14 +31,8 @@
 
 struct icp_state;
 
-enum xics_irq_type {
-    XICS_MSI,        /* Message-signalled (edge) interrupt */
-    XICS_LSI,        /* Level-signalled interrupt */
-};
-
 qemu_irq xics_get_qirq(struct icp_state *icp, int irq);
-void xics_set_irq_type(struct icp_state *icp, int irq,
-                       enum xics_irq_type type);
+void xics_set_irq_type(struct icp_state *icp, int irq, bool lsi);
 
 struct icp_state *xics_system_init(int nr_irqs);
 
commit 98ca8c023825fc6dd99e6cea1956d84ed8cadb3a
Author: David Gibson <david at gibson.dropbear.id.au>
Date:   Wed Sep 12 16:57:17 2012 +0000

    pseries: Remove C bitfields from xics code
    
    The XICS interrupt controller emulation uses some C bitfield variables in
    its internal state structure.  This makes like awkward for saving the state
    because we don't have easy VMSTATE helpers for bitfields.
    
    This patch removes the bitfields, instead using explicit bit masking in a
    single status variable.
    
    Signed-off-by: David Gibson <david at gibson.dropbear.id.au>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/xics.c b/hw/xics.c
index a8a08ce..648af25 100644
--- a/hw/xics.c
+++ b/hw/xics.c
@@ -165,11 +165,12 @@ struct ics_irq_state {
     int server;
     uint8_t priority;
     uint8_t saved_priority;
+#define XICS_STATUS_ASSERTED           0x1
+#define XICS_STATUS_SENT               0x2
+#define XICS_STATUS_REJECTED           0x4
+#define XICS_STATUS_MASKED_PENDING     0x8
+    uint8_t status;
     enum xics_irq_type type;
-    int asserted:1;
-    int sent:1;
-    int rejected:1;
-    int masked_pending:1;
 };
 
 struct ics_state {
@@ -191,8 +192,8 @@ static void resend_msi(struct ics_state *ics, int srcno)
     struct ics_irq_state *irq = ics->irqs + srcno;
 
     /* FIXME: filter by server#? */
-    if (irq->rejected) {
-        irq->rejected = 0;
+    if (irq->status & XICS_STATUS_REJECTED) {
+        irq->status &= ~XICS_STATUS_REJECTED;
         if (irq->priority != 0xff) {
             icp_irq(ics->icp, irq->server, srcno + ics->offset,
                     irq->priority);
@@ -204,8 +205,10 @@ static void resend_lsi(struct ics_state *ics, int srcno)
 {
     struct ics_irq_state *irq = ics->irqs + srcno;
 
-    if ((irq->priority != 0xff) && irq->asserted && !irq->sent) {
-        irq->sent = 1;
+    if ((irq->priority != 0xff)
+        && (irq->status & XICS_STATUS_ASSERTED)
+        && !(irq->status & XICS_STATUS_SENT)) {
+        irq->status |= XICS_STATUS_SENT;
         icp_irq(ics->icp, irq->server, srcno + ics->offset, irq->priority);
     }
 }
@@ -216,7 +219,7 @@ static void set_irq_msi(struct ics_state *ics, int srcno, int val)
 
     if (val) {
         if (irq->priority == 0xff) {
-            irq->masked_pending = 1;
+            irq->status |= XICS_STATUS_MASKED_PENDING;
             /* masked pending */ ;
         } else  {
             icp_irq(ics->icp, irq->server, srcno + ics->offset, irq->priority);
@@ -228,7 +231,11 @@ static void set_irq_lsi(struct ics_state *ics, int srcno, int val)
 {
     struct ics_irq_state *irq = ics->irqs + srcno;
 
-    irq->asserted = val;
+    if (val) {
+        irq->status |= XICS_STATUS_ASSERTED;
+    } else {
+        irq->status &= ~XICS_STATUS_ASSERTED;
+    }
     resend_lsi(ics, srcno);
 }
 
@@ -248,11 +255,12 @@ static void write_xive_msi(struct ics_state *ics, int srcno)
 {
     struct ics_irq_state *irq = ics->irqs + srcno;
 
-    if (!irq->masked_pending || (irq->priority == 0xff)) {
+    if (!(irq->status & XICS_STATUS_MASKED_PENDING)
+        || (irq->priority == 0xff)) {
         return;
     }
 
-    irq->masked_pending = 0;
+    irq->status &= ~XICS_STATUS_MASKED_PENDING;
     icp_irq(ics->icp, irq->server, srcno + ics->offset, irq->priority);
 }
 
@@ -281,8 +289,8 @@ static void ics_reject(struct ics_state *ics, int nr)
 {
     struct ics_irq_state *irq = ics->irqs + nr - ics->offset;
 
-    irq->rejected = 1; /* Irrelevant but harmless for LSI */
-    irq->sent = 0; /* Irrelevant but harmless for MSI */
+    irq->status |= XICS_STATUS_REJECTED; /* Irrelevant but harmless for LSI */
+    irq->status &= ~XICS_STATUS_SENT; /* Irrelevant but harmless for MSI */
 }
 
 static void ics_resend(struct ics_state *ics)
@@ -307,7 +315,7 @@ static void ics_eoi(struct ics_state *ics, int nr)
     struct ics_irq_state *irq = ics->irqs + srcno;
 
     if (irq->type == XICS_LSI) {
-        irq->sent = 0;
+        irq->status &= ~XICS_STATUS_SENT;
     }
 }
 
@@ -506,10 +514,7 @@ static void xics_reset(void *opaque)
     for (i = 0; i < ics->nr_irqs; i++) {
         /* Reset everything *except* the type */
         ics->irqs[i].server = 0;
-        ics->irqs[i].asserted = 0;
-        ics->irqs[i].sent = 0;
-        ics->irqs[i].rejected = 0;
-        ics->irqs[i].masked_pending = 0;
+        ics->irqs[i].status = 0;
         ics->irqs[i].priority = 0xff;
         ics->irqs[i].saved_priority = 0xff;
     }
commit 1dd088946cf464a994bc93945a360aef049493af
Author: David Gibson <david at gibson.dropbear.id.au>
Date:   Wed Sep 12 16:57:16 2012 +0000

    pseries: Small cleanup to H_CEDE implementation
    
    The H_CEDE hypercall implementation for the pseries machine doesn't trigger
    quite the right path in the main cpu exec loop.  We should set exit_request
    to pop up one extra level and recheck state, and we should set the
    exception_index to EXCP_HLT (H_CEDE is roughly equivalent to the hlt
    instruction on x86).
    
    In practice, this doesn't really matter except for KVM, and KVM implements
    H_CEDE internally so we never hit this code path.  But we might as well
    get it right, just in case it matters some day.
    
    Signed-off-by: David Gibson <david at gibson.dropbear.id.au>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/spapr_hcall.c b/hw/spapr_hcall.c
index abd847f..2df94d1 100644
--- a/hw/spapr_hcall.c
+++ b/hw/spapr_hcall.c
@@ -544,6 +544,8 @@ static target_ulong h_cede(CPUPPCState *env, sPAPREnvironment *spapr,
     hreg_compute_hflags(env);
     if (!cpu_has_work(env)) {
         env->halted = 1;
+        env->exception_index = EXCP_HLT;
+        env->exit_request = 1;
     }
     return H_SUCCESS;
 }
commit 256b408abea2cfe18d8c0278e5b46213509db271
Author: David Gibson <david at gibson.dropbear.id.au>
Date:   Wed Sep 12 16:57:15 2012 +0000

    pseries: Fix XICS reset
    
    The XICS interrupt controller used on the pseries machine currently has no
    reset handler.  We can get away with this under some circumstances, but
    it's not correct, and can cause failures if the XICS happens to be in the
    wrong state at the time of reset.
    
    This patch adds a hook to properly reset the XICS state.
    
    Signed-off-by: David Gibson <david at gibson.dropbear.id.au>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/xics.c b/hw/xics.c
index b674771..a8a08ce 100644
--- a/hw/xics.c
+++ b/hw/xics.c
@@ -489,11 +489,36 @@ static void rtas_int_on(sPAPREnvironment *spapr, uint32_t token,
     rtas_st(rets, 0, 0); /* Success */
 }
 
+static void xics_reset(void *opaque)
+{
+    struct icp_state *icp = (struct icp_state *)opaque;
+    struct ics_state *ics = icp->ics;
+    int i;
+
+    for (i = 0; i < icp->nr_servers; i++) {
+        icp->ss[i].xirr = 0;
+        icp->ss[i].pending_priority = 0;
+        icp->ss[i].mfrr = 0xff;
+        /* Make all outputs are deasserted */
+        qemu_set_irq(icp->ss[i].output, 0);
+    }
+
+    for (i = 0; i < ics->nr_irqs; i++) {
+        /* Reset everything *except* the type */
+        ics->irqs[i].server = 0;
+        ics->irqs[i].asserted = 0;
+        ics->irqs[i].sent = 0;
+        ics->irqs[i].rejected = 0;
+        ics->irqs[i].masked_pending = 0;
+        ics->irqs[i].priority = 0xff;
+        ics->irqs[i].saved_priority = 0xff;
+    }
+}
+
 struct icp_state *xics_system_init(int nr_irqs)
 {
     CPUPPCState *env;
     int max_server_num;
-    int i;
     struct icp_state *icp;
     struct ics_state *ics;
 
@@ -508,10 +533,6 @@ struct icp_state *xics_system_init(int nr_irqs)
     icp->nr_servers = max_server_num + 1;
     icp->ss = g_malloc0(icp->nr_servers*sizeof(struct icp_server_state));
 
-    for (i = 0; i < icp->nr_servers; i++) {
-        icp->ss[i].mfrr = 0xff;
-    }
-
     for (env = first_cpu; env != NULL; env = env->next_cpu) {
         struct icp_server_state *ss = &icp->ss[env->cpu_index];
 
@@ -539,11 +560,6 @@ struct icp_state *xics_system_init(int nr_irqs)
     icp->ics = ics;
     ics->icp = icp;
 
-    for (i = 0; i < nr_irqs; i++) {
-        ics->irqs[i].priority = 0xff;
-        ics->irqs[i].saved_priority = 0xff;
-    }
-
     ics->qirqs = qemu_allocate_irqs(ics_set_irq, ics, nr_irqs);
 
     spapr_register_hypercall(H_CPPR, h_cppr);
@@ -556,5 +572,7 @@ struct icp_state *xics_system_init(int nr_irqs)
     spapr_rtas_register("ibm,int-off", rtas_int_off);
     spapr_rtas_register("ibm,int-on", rtas_int_on);
 
+    qemu_register_reset(xics_reset, icp);
+
     return icp;
 }
commit eddeed26ac83392053aef823a341f643ea8e3d2f
Author: David Gibson <david at gibson.dropbear.id.au>
Date:   Wed Sep 12 16:57:14 2012 +0000

    pseries: Reset emulated PCI TCE tables on system reset
    
    The emulated PCI host bridge on the pseries machine incorporates an IOMMU
    (PAPR TCE table).  Currently the mappings in this IOMMU are not cleared
    when we reset the system.  This patch fixes this bug.  To do this it adds
    a new reset function to the IOMMU emulation code.  The VIO devices already
    reset their TCE tables, but they do so by destroying and re-creating their
    DMA context.  This doesn't work for the PCI host bridge, because the
    infrastructure for PCI IOMMUs has already copied/cached the DMA pointer
    context into the subordinate PCI device structures.
    
    Signed-off-by: David Gibson <david at gibson.dropbear.id.au>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/spapr.h b/hw/spapr.h
index f1fb646..f9a7b0f 100644
--- a/hw/spapr.h
+++ b/hw/spapr.h
@@ -338,6 +338,7 @@ typedef struct sPAPRTCE {
 void spapr_iommu_init(void);
 DMAContext *spapr_tce_new_dma_context(uint32_t liobn, size_t window_size);
 void spapr_tce_free(DMAContext *dma);
+void spapr_tce_reset(DMAContext *dma);
 int spapr_dma_dt(void *fdt, int node_off, const char *propname,
                  uint32_t liobn, uint64_t window, uint32_t size);
 int spapr_tcet_dma_dt(void *fdt, int node_off, const char *propname,
diff --git a/hw/spapr_iommu.c b/hw/spapr_iommu.c
index 53b7317..216aa06 100644
--- a/hw/spapr_iommu.c
+++ b/hw/spapr_iommu.c
@@ -162,6 +162,17 @@ void spapr_tce_free(DMAContext *dma)
     }
 }
 
+void spapr_tce_reset(DMAContext *dma)
+{
+    if (dma) {
+        sPAPRTCETable *tcet = DO_UPCAST(sPAPRTCETable, dma, dma);
+        size_t table_size = (tcet->window_size >> SPAPR_TCE_PAGE_SHIFT)
+            * sizeof(sPAPRTCE);
+
+        memset(tcet->table, 0, table_size);
+    }
+}
+
 static target_ulong put_tce_emu(sPAPRTCETable *tcet, target_ulong ioba,
                                 target_ulong tce)
 {
diff --git a/hw/spapr_pci.c b/hw/spapr_pci.c
index 661c05b..203155e 100644
--- a/hw/spapr_pci.c
+++ b/hw/spapr_pci.c
@@ -595,6 +595,15 @@ static int spapr_phb_init(SysBusDevice *s)
     return 0;
 }
 
+static void spapr_phb_reset(DeviceState *qdev)
+{
+    SysBusDevice *s = sysbus_from_qdev(qdev);
+    sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(s);
+
+    /* Reset the IOMMU state */
+    spapr_tce_reset(sphb->dma);
+}
+
 static Property spapr_phb_properties[] = {
     DEFINE_PROP_HEX64("buid", sPAPRPHBState, buid, 0),
     DEFINE_PROP_STRING("busname", sPAPRPHBState, busname),
@@ -613,6 +622,7 @@ static void spapr_phb_class_init(ObjectClass *klass, void *data)
 
     sdc->init = spapr_phb_init;
     dc->props = spapr_phb_properties;
+    dc->reset = spapr_phb_reset;
 }
 
 static const TypeInfo spapr_phb_info = {
commit 4dd96f244f62d5e4b493c1f4071c0d4a4a57474d
Author: David Gibson <david at gibson.dropbear.id.au>
Date:   Wed Sep 12 16:57:13 2012 +0000

    pseries: Clear TCE and signal state when resetting PAPR VIO devices
    
    When we reset the system, the reset method for VIO bus devices resets
    the state of their request queue (if present) as it should.  However
    it was not resetting the state of their TCE table (DMA translation) if
    present.  It was also not resetting the state of the per-device signal
    mask set with H_VIO_SIGNAL.  This patch corrects both bugs, and also
    removes some small code duplication in the reset paths.
    
    Signed-off-by: David Gibson <david at gibson.dropbear.id.au>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/spapr_vio.c b/hw/spapr_vio.c
index 7ca4452..752836e 100644
--- a/hw/spapr_vio.c
+++ b/hw/spapr_vio.c
@@ -324,9 +324,7 @@ static void spapr_vio_quiesce_one(VIOsPAPRDevice *dev)
     }
     dev->dma = spapr_tce_new_dma_context(liobn, pc->rtce_window_size);
 
-    dev->crq.qladdr = 0;
-    dev->crq.qsize = 0;
-    dev->crq.qnext = 0;
+    free_crq(dev);
 }
 
 static void rtas_set_tce_bypass(sPAPREnvironment *spapr, uint32_t token,
@@ -409,9 +407,10 @@ static void spapr_vio_busdev_reset(DeviceState *qdev)
     VIOsPAPRDevice *dev = DO_UPCAST(VIOsPAPRDevice, qdev, qdev);
     VIOsPAPRDeviceClass *pc = VIO_SPAPR_DEVICE_GET_CLASS(dev);
 
-    if (dev->crq.qsize) {
-        free_crq(dev);
-    }
+    /* Shut down the request queue and TCEs if necessary */
+    spapr_vio_quiesce_one(dev);
+
+    dev->signal_state = 0;
 
     if (pc->reset) {
         pc->reset(dev);
commit 7f763a5d994bbddb50705d2e50decdf52937521f
Author: David Gibson <david at gibson.dropbear.id.au>
Date:   Wed Sep 12 16:57:12 2012 +0000

    pseries: Add support for new KVM hash table control call
    
    This adds support for then new "reset htab" ioctl which allows qemu
    to properly cleanup the MMU hash table when the guest is reset. With
    the corresponding kernel support, reset of a guest now works properly.
    
    This also paves the way for indicating a different size hash table
    to the kernel and for the kernel to be able to impose limits on
    the requested size.
    
    Signed-off-by: Benjamin Herrenschmidt <benh at kernel.crashing.org>
    Signed-off-by: David Gibson <david at gibson.dropbear.id.au>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/spapr.c b/hw/spapr.c
index 68542e8..0a0e9cd 100644
--- a/hw/spapr.c
+++ b/hw/spapr.c
@@ -85,6 +85,8 @@
 
 #define PHANDLE_XICP            0x00001111
 
+#define HTAB_SIZE(spapr)        (1ULL << ((spapr)->htab_shift))
+
 sPAPREnvironment *spapr;
 
 int spapr_allocate_irq(int hint, enum xics_irq_type type)
@@ -134,12 +136,13 @@ int spapr_allocate_irq_block(int num, enum xics_irq_type type)
     return first;
 }
 
-static int spapr_set_associativity(void *fdt, sPAPREnvironment *spapr)
+static int spapr_fixup_cpu_dt(void *fdt, sPAPREnvironment *spapr)
 {
     int ret = 0, offset;
     CPUPPCState *env;
     char cpu_model[32];
     int smt = kvmppc_smt_threads();
+    uint32_t pft_size_prop[] = {0, cpu_to_be32(spapr->htab_shift)};
 
     assert(spapr->cpu_model);
 
@@ -163,8 +166,16 @@ static int spapr_set_associativity(void *fdt, sPAPREnvironment *spapr)
             return offset;
         }
 
-        ret = fdt_setprop(fdt, offset, "ibm,associativity", associativity,
-                          sizeof(associativity));
+        if (nb_numa_nodes > 1) {
+            ret = fdt_setprop(fdt, offset, "ibm,associativity", associativity,
+                              sizeof(associativity));
+            if (ret < 0) {
+                return ret;
+            }
+        }
+
+        ret = fdt_setprop(fdt, offset, "ibm,pft-size",
+                          pft_size_prop, sizeof(pft_size_prop));
         if (ret < 0) {
             return ret;
         }
@@ -206,45 +217,36 @@ static size_t create_page_sizes_prop(CPUPPCState *env, uint32_t *prop,
     return (p - prop) * sizeof(uint32_t);
 }
 
+#define _FDT(exp) \
+    do { \
+        int ret = (exp);                                           \
+        if (ret < 0) {                                             \
+            fprintf(stderr, "qemu: error creating device tree: %s: %s\n", \
+                    #exp, fdt_strerror(ret));                      \
+            exit(1);                                               \
+        }                                                          \
+    } while (0)
+
+
 static void *spapr_create_fdt_skel(const char *cpu_model,
-                                   target_phys_addr_t rma_size,
                                    target_phys_addr_t initrd_base,
                                    target_phys_addr_t initrd_size,
                                    target_phys_addr_t kernel_size,
                                    const char *boot_device,
-                                   const char *kernel_cmdline,
-                                   long hash_shift)
+                                   const char *kernel_cmdline)
 {
     void *fdt;
     CPUPPCState *env;
-    uint64_t mem_reg_property[2];
     uint32_t start_prop = cpu_to_be32(initrd_base);
     uint32_t end_prop = cpu_to_be32(initrd_base + initrd_size);
-    uint32_t pft_size_prop[] = {0, cpu_to_be32(hash_shift)};
     char hypertas_prop[] = "hcall-pft\0hcall-term\0hcall-dabr\0hcall-interrupt"
         "\0hcall-tce\0hcall-vio\0hcall-splpar\0hcall-bulk";
     char qemu_hypertas_prop[] = "hcall-memop1";
+    uint32_t refpoints[] = {cpu_to_be32(0x4), cpu_to_be32(0x4)};
     uint32_t interrupt_server_ranges_prop[] = {0, cpu_to_be32(smp_cpus)};
-    int i;
     char *modelname;
-    int smt = kvmppc_smt_threads();
+    int i, smt = kvmppc_smt_threads();
     unsigned char vec5[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x80};
-    uint32_t refpoints[] = {cpu_to_be32(0x4), cpu_to_be32(0x4)};
-    uint32_t associativity[] = {cpu_to_be32(0x4), cpu_to_be32(0x0),
-                                cpu_to_be32(0x0), cpu_to_be32(0x0),
-                                cpu_to_be32(0x0)};
-    char mem_name[32];
-    target_phys_addr_t node0_size, mem_start;
-
-#define _FDT(exp) \
-    do { \
-        int ret = (exp);                                           \
-        if (ret < 0) {                                             \
-            fprintf(stderr, "qemu: error creating device tree: %s: %s\n", \
-                    #exp, fdt_strerror(ret));                      \
-            exit(1);                                               \
-        }                                                          \
-    } while (0)
 
     fdt = g_malloc0(FDT_MAX_SIZE);
     _FDT((fdt_create(fdt, FDT_MAX_SIZE)));
@@ -289,55 +291,6 @@ static void *spapr_create_fdt_skel(const char *cpu_model,
 
     _FDT((fdt_end_node(fdt)));
 
-    /* memory node(s) */
-    node0_size = (nb_numa_nodes > 1) ? node_mem[0] : ram_size;
-    if (rma_size > node0_size) {
-        rma_size = node0_size;
-    }
-
-    /* RMA */
-    mem_reg_property[0] = 0;
-    mem_reg_property[1] = cpu_to_be64(rma_size);
-    _FDT((fdt_begin_node(fdt, "memory at 0")));
-    _FDT((fdt_property_string(fdt, "device_type", "memory")));
-    _FDT((fdt_property(fdt, "reg", mem_reg_property,
-        sizeof(mem_reg_property))));
-    _FDT((fdt_property(fdt, "ibm,associativity", associativity,
-        sizeof(associativity))));
-    _FDT((fdt_end_node(fdt)));
-
-    /* RAM: Node 0 */
-    if (node0_size > rma_size) {
-        mem_reg_property[0] = cpu_to_be64(rma_size);
-        mem_reg_property[1] = cpu_to_be64(node0_size - rma_size);
-
-        sprintf(mem_name, "memory@" TARGET_FMT_lx, rma_size);
-        _FDT((fdt_begin_node(fdt, mem_name)));
-        _FDT((fdt_property_string(fdt, "device_type", "memory")));
-        _FDT((fdt_property(fdt, "reg", mem_reg_property,
-                           sizeof(mem_reg_property))));
-        _FDT((fdt_property(fdt, "ibm,associativity", associativity,
-                           sizeof(associativity))));
-        _FDT((fdt_end_node(fdt)));
-    }
-
-    /* RAM: Node 1 and beyond */
-    mem_start = node0_size;
-    for (i = 1; i < nb_numa_nodes; i++) {
-        mem_reg_property[0] = cpu_to_be64(mem_start);
-        mem_reg_property[1] = cpu_to_be64(node_mem[i]);
-        associativity[3] = associativity[4] = cpu_to_be32(i);
-        sprintf(mem_name, "memory@" TARGET_FMT_lx, mem_start);
-        _FDT((fdt_begin_node(fdt, mem_name)));
-        _FDT((fdt_property_string(fdt, "device_type", "memory")));
-        _FDT((fdt_property(fdt, "reg", mem_reg_property,
-            sizeof(mem_reg_property))));
-        _FDT((fdt_property(fdt, "ibm,associativity", associativity,
-            sizeof(associativity))));
-        _FDT((fdt_end_node(fdt)));
-        mem_start += node_mem[i];
-    }
-
     /* cpus */
     _FDT((fdt_begin_node(fdt, "cpus")));
 
@@ -389,8 +342,6 @@ static void *spapr_create_fdt_skel(const char *cpu_model,
         _FDT((fdt_property_cell(fdt, "timebase-frequency", tbfreq)));
         _FDT((fdt_property_cell(fdt, "clock-frequency", cpufreq)));
         _FDT((fdt_property_cell(fdt, "ibm,slb-size", env->slb_nr)));
-        _FDT((fdt_property(fdt, "ibm,pft-size",
-                           pft_size_prop, sizeof(pft_size_prop))));
         _FDT((fdt_property_string(fdt, "status", "okay")));
         _FDT((fdt_property(fdt, "64-bit", NULL, 0)));
 
@@ -489,6 +440,68 @@ static void *spapr_create_fdt_skel(const char *cpu_model,
     return fdt;
 }
 
+static int spapr_populate_memory(sPAPREnvironment *spapr, void *fdt)
+{
+    uint32_t associativity[] = {cpu_to_be32(0x4), cpu_to_be32(0x0),
+                                cpu_to_be32(0x0), cpu_to_be32(0x0),
+                                cpu_to_be32(0x0)};
+    char mem_name[32];
+    target_phys_addr_t node0_size, mem_start;
+    uint64_t mem_reg_property[2];
+    int i, off;
+
+    /* memory node(s) */
+    node0_size = (nb_numa_nodes > 1) ? node_mem[0] : ram_size;
+    if (spapr->rma_size > node0_size) {
+        spapr->rma_size = node0_size;
+    }
+
+    /* RMA */
+    mem_reg_property[0] = 0;
+    mem_reg_property[1] = cpu_to_be64(spapr->rma_size);
+    off = fdt_add_subnode(fdt, 0, "memory at 0");
+    _FDT(off);
+    _FDT((fdt_setprop_string(fdt, off, "device_type", "memory")));
+    _FDT((fdt_setprop(fdt, off, "reg", mem_reg_property,
+                      sizeof(mem_reg_property))));
+    _FDT((fdt_setprop(fdt, off, "ibm,associativity", associativity,
+                      sizeof(associativity))));
+
+    /* RAM: Node 0 */
+    if (node0_size > spapr->rma_size) {
+        mem_reg_property[0] = cpu_to_be64(spapr->rma_size);
+        mem_reg_property[1] = cpu_to_be64(node0_size - spapr->rma_size);
+
+        sprintf(mem_name, "memory@" TARGET_FMT_lx, spapr->rma_size);
+        off = fdt_add_subnode(fdt, 0, mem_name);
+        _FDT(off);
+        _FDT((fdt_setprop_string(fdt, off, "device_type", "memory")));
+        _FDT((fdt_setprop(fdt, off, "reg", mem_reg_property,
+                          sizeof(mem_reg_property))));
+        _FDT((fdt_setprop(fdt, off, "ibm,associativity", associativity,
+                          sizeof(associativity))));
+    }
+
+    /* RAM: Node 1 and beyond */
+    mem_start = node0_size;
+    for (i = 1; i < nb_numa_nodes; i++) {
+        mem_reg_property[0] = cpu_to_be64(mem_start);
+        mem_reg_property[1] = cpu_to_be64(node_mem[i]);
+        associativity[3] = associativity[4] = cpu_to_be32(i);
+        sprintf(mem_name, "memory@" TARGET_FMT_lx, mem_start);
+        off = fdt_add_subnode(fdt, 0, mem_name);
+        _FDT(off);
+        _FDT((fdt_setprop_string(fdt, off, "device_type", "memory")));
+        _FDT((fdt_setprop(fdt, off, "reg", mem_reg_property,
+                          sizeof(mem_reg_property))));
+        _FDT((fdt_setprop(fdt, off, "ibm,associativity", associativity,
+                          sizeof(associativity))));
+        mem_start += node_mem[i];
+    }
+
+    return 0;
+}
+
 static void spapr_finalize_fdt(sPAPREnvironment *spapr,
                                target_phys_addr_t fdt_addr,
                                target_phys_addr_t rtas_addr,
@@ -503,6 +516,12 @@ static void spapr_finalize_fdt(sPAPREnvironment *spapr,
     /* open out the base tree into a temp buffer for the final tweaks */
     _FDT((fdt_open_into(spapr->fdt_skel, fdt, FDT_MAX_SIZE)));
 
+    ret = spapr_populate_memory(spapr, fdt);
+    if (ret < 0) {
+        fprintf(stderr, "couldn't setup memory nodes in fdt\n");
+        exit(1);
+    }
+
     ret = spapr_populate_vdevice(spapr->vio_bus, fdt);
     if (ret < 0) {
         fprintf(stderr, "couldn't setup vio devices in fdt\n");
@@ -525,11 +544,9 @@ static void spapr_finalize_fdt(sPAPREnvironment *spapr,
     }
 
     /* Advertise NUMA via ibm,associativity */
-    if (nb_numa_nodes > 1) {
-        ret = spapr_set_associativity(fdt, spapr);
-        if (ret < 0) {
-            fprintf(stderr, "Couldn't set up NUMA device tree properties\n");
-        }
+    ret = spapr_fixup_cpu_dt(fdt, spapr);
+    if (ret < 0) {
+        fprintf(stderr, "Couldn't finalize CPU device tree properties\n");
     }
 
     if (!spapr->has_graphics) {
@@ -559,10 +576,39 @@ static void emulate_spapr_hypercall(CPUPPCState *env)
     env->gpr[3] = spapr_hypercall(env, env->gpr[3], &env->gpr[4]);
 }
 
+static void spapr_reset_htab(sPAPREnvironment *spapr)
+{
+    long shift;
+
+    /* allocate hash page table.  For now we always make this 16mb,
+     * later we should probably make it scale to the size of guest
+     * RAM */
+
+    shift = kvmppc_reset_htab(spapr->htab_shift);
+
+    if (shift > 0) {
+        /* Kernel handles htab, we don't need to allocate one */
+        spapr->htab_shift = shift;
+    } else {
+        if (!spapr->htab) {
+            /* Allocate an htab if we don't yet have one */
+            spapr->htab = qemu_memalign(HTAB_SIZE(spapr), HTAB_SIZE(spapr));
+        }
+
+        /* And clear it */
+        memset(spapr->htab, 0, HTAB_SIZE(spapr));
+    }
+
+    /* Update the RMA size if necessary */
+    if (spapr->vrma_adjust) {
+        spapr->rma_size = kvmppc_rma_size(ram_size, spapr->htab_shift);
+    }
+}
+
 static void ppc_spapr_reset(void)
 {
-    /* flush out the hash table */
-    memset(spapr->htab, 0, spapr->htab_size);
+    /* Reset the hash table & recalc the RMA */
+    spapr_reset_htab(spapr);
 
     qemu_devices_reset();
 
@@ -591,6 +637,12 @@ static void spapr_cpu_reset(void *opaque)
     env->halted = 1;
 
     env->spr[SPR_HIOR] = 0;
+
+    env->external_htab = spapr->htab;
+    env->htab_base = -1;
+    env->htab_mask = HTAB_SIZE(spapr) - 1;
+    env->spr[SPR_SDR1] = (unsigned long)spapr->htab |
+        (spapr->htab_shift - 18);
 }
 
 /* Returns whether we want to use VGA or not */
@@ -624,11 +676,10 @@ static void ppc_spapr_init(ram_addr_t ram_size,
     int i;
     MemoryRegion *sysmem = get_system_memory();
     MemoryRegion *ram = g_new(MemoryRegion, 1);
-    target_phys_addr_t rma_alloc_size, rma_size;
+    target_phys_addr_t rma_alloc_size;
     uint32_t initrd_base = 0;
     long kernel_size = 0, initrd_size = 0;
     long load_limit, rtas_limit, fw_size;
-    long pteg_shift = 17;
     char *filename;
 
     msi_supported = true;
@@ -645,20 +696,39 @@ static void ppc_spapr_init(ram_addr_t ram_size,
         hw_error("qemu: Unable to create RMA\n");
         exit(1);
     }
+
     if (rma_alloc_size && (rma_alloc_size < ram_size)) {
-        rma_size = rma_alloc_size;
+        spapr->rma_size = rma_alloc_size;
     } else {
-        rma_size = ram_size;
+        spapr->rma_size = ram_size;
+
+        /* With KVM, we don't actually know whether KVM supports an
+         * unbounded RMA (PR KVM) or is limited by the hash table size
+         * (HV KVM using VRMA), so we always assume the latter
+         *
+         * In that case, we also limit the initial allocations for RTAS
+         * etc... to 256M since we have no way to know what the VRMA size
+         * is going to be as it depends on the size of the hash table
+         * isn't determined yet.
+         */
+        if (kvm_enabled()) {
+            spapr->vrma_adjust = 1;
+            spapr->rma_size = MIN(spapr->rma_size, 0x10000000);
+        }
     }
 
     /* We place the device tree and RTAS just below either the top of the RMA,
      * or just below 2GB, whichever is lowere, so that it can be
      * processed with 32-bit real mode code if necessary */
-    rtas_limit = MIN(rma_size, 0x80000000);
+    rtas_limit = MIN(spapr->rma_size, 0x80000000);
     spapr->rtas_addr = rtas_limit - RTAS_MAX_SIZE;
     spapr->fdt_addr = spapr->rtas_addr - FDT_MAX_SIZE;
     load_limit = spapr->fdt_addr - FW_OVERHEAD;
 
+    /* For now, always aim for a 16MB hash table */
+    /* FIXME: we should change this default based on RAM size */
+    spapr->htab_shift = 24;
+
     /* init CPUs */
     if (cpu_model == NULL) {
         cpu_model = kvm_enabled() ? "host" : "POWER7";
@@ -696,25 +766,6 @@ static void ppc_spapr_init(ram_addr_t ram_size,
         memory_region_add_subregion(sysmem, nonrma_base, ram);
     }
 
-    /* allocate hash page table.  For now we always make this 16mb,
-     * later we should probably make it scale to the size of guest
-     * RAM.  FIXME: setting the htab information in the CPU env really
-     * belongs at CPU reset time, but we can get away with it for now
-     * because the PAPR guest is not permitted to write SDR1 so in
-     * fact these settings will never change during the run */
-    spapr->htab_size = 1ULL << (pteg_shift + 7);
-    spapr->htab = qemu_memalign(spapr->htab_size, spapr->htab_size);
-
-    for (env = first_cpu; env != NULL; env = env->next_cpu) {
-        env->external_htab = spapr->htab;
-        env->htab_base = -1;
-        env->htab_mask = spapr->htab_size - 1;
-
-        /* Tell KVM that we're in PAPR mode */
-        env->spr[SPR_SDR1] = (unsigned long)spapr->htab |
-                             ((pteg_shift + 7) - 18);
-    }
-
     filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, "spapr-rtas.bin");
     spapr->rtas_size = load_image_targphys(filename, spapr->rtas_addr,
                                            rtas_limit - spapr->rtas_addr);
@@ -787,7 +838,7 @@ static void ppc_spapr_init(ram_addr_t ram_size,
         }
     }
 
-    if (rma_size < (MIN_RMA_SLOF << 20)) {
+    if (spapr->rma_size < (MIN_RMA_SLOF << 20)) {
         fprintf(stderr, "qemu: pSeries SLOF firmware requires >= "
                 "%ldM guest RMA (Real Mode Area memory)\n", MIN_RMA_SLOF);
         exit(1);
@@ -839,11 +890,10 @@ static void ppc_spapr_init(ram_addr_t ram_size,
     spapr->entry_point = 0x100;
 
     /* Prepare the device tree */
-    spapr->fdt_skel = spapr_create_fdt_skel(cpu_model, rma_size,
+    spapr->fdt_skel = spapr_create_fdt_skel(cpu_model,
                                             initrd_base, initrd_size,
                                             kernel_size,
-                                            boot_device, kernel_cmdline,
-                                            pteg_shift + 7);
+                                            boot_device, kernel_cmdline);
     assert(spapr->fdt_skel != NULL);
 }
 
diff --git a/hw/spapr.h b/hw/spapr.h
index ac34a17..f1fb646 100644
--- a/hw/spapr.h
+++ b/hw/spapr.h
@@ -15,7 +15,9 @@ typedef struct sPAPREnvironment {
 
     target_phys_addr_t ram_limit;
     void *htab;
-    long htab_size;
+    long htab_shift;
+    target_phys_addr_t rma_size;
+    int vrma_adjust;
     target_phys_addr_t fdt_addr, rtas_addr;
     long rtas_size;
     void *fdt_skel;
diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
index 1a7489b..546c116 100644
--- a/target-ppc/kvm.c
+++ b/target-ppc/kvm.c
@@ -1010,6 +1010,7 @@ int kvmppc_smt_threads(void)
     return cap_ppc_smt ? cap_ppc_smt : 1;
 }
 
+#ifdef TARGET_PPC64
 off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem)
 {
     void *rma;
@@ -1053,6 +1054,16 @@ off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem)
     return size;
 }
 
+uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
+{
+    if (cap_ppc_rma >= 2) {
+        return current_size;
+    }
+    return MIN(current_size,
+               getrampagesize() << (hash_shift - 7));
+}
+#endif
+
 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd)
 {
     struct kvm_create_spapr_tce args = {
@@ -1112,6 +1123,24 @@ int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t window_size)
     return 0;
 }
 
+int kvmppc_reset_htab(int shift_hint)
+{
+    uint32_t shift = shift_hint;
+
+    if (kvm_enabled() &&
+        kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
+        int ret;
+        ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
+        if (ret < 0) {
+            return ret;
+        }
+        return shift;
+    }
+
+    /* For now.. */
+    return 0;
+}
+
 static inline uint32_t mfpvr(void)
 {
     uint32_t pvr;
diff --git a/target-ppc/kvm_ppc.h b/target-ppc/kvm_ppc.h
index e2f8703..baad6eb 100644
--- a/target-ppc/kvm_ppc.h
+++ b/target-ppc/kvm_ppc.h
@@ -27,6 +27,8 @@ int kvmppc_smt_threads(void);
 off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem);
 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd);
 int kvmppc_remove_spapr_tce(void *table, int pfd, uint32_t window_size);
+int kvmppc_reset_htab(int shift_hint);
+uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift);
 #endif /* !CONFIG_USER_ONLY */
 const ppc_def_t *kvmppc_host_cpu_def(void);
 int kvmppc_fixup_cpu(CPUPPCState *env);
@@ -94,6 +96,23 @@ static inline int kvmppc_remove_spapr_tce(void *table, int pfd,
 {
     return -1;
 }
+
+static inline int kvmppc_reset_htab(int shift_hint)
+{
+    return -1;
+}
+
+static inline uint64_t kvmppc_rma_size(uint64_t current_size,
+                                       unsigned int hash_shift)
+{
+    return ram_size;
+}
+
+static inline int kvmppc_update_sdr1(CPUPPCState *env)
+{
+    return 0;
+}
+
 #endif /* !CONFIG_USER_ONLY */
 
 static inline const ppc_def_t *kvmppc_host_cpu_def(void)
commit c8787ad477f3be5a971b877dcb1bae5752c5796a
Author: David Gibson <david at gibson.dropbear.id.au>
Date:   Wed Sep 12 16:57:11 2012 +0000

    pseries: Use new method to correct reset sequence
    
    A number of things need to occur during reset of the PAPR
    paravirtualized platform in a specific order.  For example, the hash
    table needs to be cleared before the CPUs are reset, so that they
    initialize their register state correctly, and the CPUs need to have
    their main reset called before we set up the entry point state on the
    boot cpu.  We also need to have the main qdev reset happen before the
    creation and installation of the device tree for the new boot, because
    we need the state of the devices settled to correctly construct the
    device tree.
    
    We currently do the pseries once-per-reset initializations done from a
    reset handler.  However we can't adequately control when this handler
    is called during the reset - in particular we can't guarantee it
    happens after all the qdev resets (since qdevs might be registered
    after the machine init function has executed).
    
    This patch uses the new QEMUMachine reset method to to fix this
    problem, ensuring the various order dependent reset steps happen in
    the correct order.
    
    Signed-off-by: David Gibson <david at gibson.dropbear.id.au>
    Reviewed-by: Andreas Färber <afaerber at suse.de>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/spapr.c b/hw/spapr.c
index d88525a..68542e8 100644
--- a/hw/spapr.c
+++ b/hw/spapr.c
@@ -559,13 +559,13 @@ static void emulate_spapr_hypercall(CPUPPCState *env)
     env->gpr[3] = spapr_hypercall(env, env->gpr[3], &env->gpr[4]);
 }
 
-static void spapr_reset(void *opaque)
+static void ppc_spapr_reset(void)
 {
-    sPAPREnvironment *spapr = (sPAPREnvironment *)opaque;
-
     /* flush out the hash table */
     memset(spapr->htab, 0, spapr->htab_size);
 
+    qemu_devices_reset();
+
     /* Load the fdt */
     spapr_finalize_fdt(spapr, spapr->fdt_addr, spapr->rtas_addr,
                        spapr->rtas_size);
@@ -845,14 +845,13 @@ static void ppc_spapr_init(ram_addr_t ram_size,
                                             boot_device, kernel_cmdline,
                                             pteg_shift + 7);
     assert(spapr->fdt_skel != NULL);
-
-    qemu_register_reset(spapr_reset, spapr);
 }
 
 static QEMUMachine spapr_machine = {
     .name = "pseries",
     .desc = "pSeries Logical Partition (PAPR compliant)",
     .init = ppc_spapr_init,
+    .reset = ppc_spapr_reset,
     .max_cpus = MAX_CPUS,
     .no_parallel = 1,
     .use_scsi = 1,
commit 048706d971c1830d7813052ca027ae00c519e894
Author: David Gibson <david at gibson.dropbear.id.au>
Date:   Wed Sep 12 16:57:10 2012 +0000

    pseries: Fix and cleanup CPU initialization and reset
    
    The current pseries machine init function iterates over the CPUs at several
    points, doing various bits of initialization.  This is messy; these can
    and should be merged into a single iteration doing all the necessary per
    cpu initialization.  Worse, some of these initializations were setting up
    state which should be set on every reset, not just at machine init time.
    A few of the initializations simply weren't necessary at all.
    
    This patch, therefore, moves those things that need to be to the
    per-cpu reset handler, and combines the remainder into two loops over
    the cpus (which also creates them).  The second loop is for setting up
    hash table information, and will be removed in a subsequent patch also
    making other fixes to the hash table setup.
    
    This exposes a bug in our start-cpu RTAS routine (called by the guest to
    start up CPUs other than CPU0) under kvm.  Previously, this function did
    not make a call to ensure that it's changes to the new cpu's state were
    pushed into KVM in-kernel state.  We sort-of got away with this because
    some of the initializations had already placed the secondary CPUs into the
    right starting state for the sorts of Linux guests we've been running.
    
    Nonetheless the start-cpu RTAS call's behaviour was not correct and could
    easily have been broken by guest changes.  This patch also fixes it.
    
    Signed-off-by: David Gibson <david at gibson.dropbear.id.au>
    Reviewed-by: Andreas Färber <afaerber at suse.de>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/spapr.c b/hw/spapr.c
index c34b767..d88525a 100644
--- a/hw/spapr.c
+++ b/hw/spapr.c
@@ -581,8 +581,16 @@ static void spapr_reset(void *opaque)
 static void spapr_cpu_reset(void *opaque)
 {
     PowerPCCPU *cpu = opaque;
+    CPUPPCState *env = &cpu->env;
 
     cpu_reset(CPU(cpu));
+
+    /* All CPUs start halted.  CPU0 is unhalted from the machine level
+     * reset code and the rest are explicitly started up by the guest
+     * using an RTAS call */
+    env->halted = 1;
+
+    env->spr[SPR_HIOR] = 0;
 }
 
 /* Returns whether we want to use VGA or not */
@@ -665,11 +673,16 @@ static void ppc_spapr_init(ram_addr_t ram_size,
 
         /* Set time-base frequency to 512 MHz */
         cpu_ppc_tb_init(env, TIMEBASE_FREQ);
-        qemu_register_reset(spapr_cpu_reset, cpu);
 
-        env->hreset_vector = 0x60;
+        /* PAPR always has exception vectors in RAM not ROM */
         env->hreset_excp_prefix = 0;
-        env->gpr[3] = env->cpu_index;
+
+        /* Tell KVM that we're in PAPR mode */
+        if (kvm_enabled()) {
+            kvmppc_set_papr(env);
+        }
+
+        qemu_register_reset(spapr_cpu_reset, cpu);
     }
 
     /* allocate RAM */
@@ -685,7 +698,10 @@ static void ppc_spapr_init(ram_addr_t ram_size,
 
     /* allocate hash page table.  For now we always make this 16mb,
      * later we should probably make it scale to the size of guest
-     * RAM */
+     * RAM.  FIXME: setting the htab information in the CPU env really
+     * belongs at CPU reset time, but we can get away with it for now
+     * because the PAPR guest is not permitted to write SDR1 so in
+     * fact these settings will never change during the run */
     spapr->htab_size = 1ULL << (pteg_shift + 7);
     spapr->htab = qemu_memalign(spapr->htab_size, spapr->htab_size);
 
@@ -697,11 +713,6 @@ static void ppc_spapr_init(ram_addr_t ram_size,
         /* Tell KVM that we're in PAPR mode */
         env->spr[SPR_SDR1] = (unsigned long)spapr->htab |
                              ((pteg_shift + 7) - 18);
-        env->spr[SPR_HIOR] = 0;
-
-        if (kvm_enabled()) {
-            kvmppc_set_papr(env);
-        }
     }
 
     filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, "spapr-rtas.bin");
@@ -827,11 +838,6 @@ static void ppc_spapr_init(ram_addr_t ram_size,
 
     spapr->entry_point = 0x100;
 
-    /* SLOF will startup the secondary CPUs using RTAS */
-    for (env = first_cpu; env != NULL; env = env->next_cpu) {
-        env->halted = 1;
-    }
-
     /* Prepare the device tree */
     spapr->fdt_skel = spapr_create_fdt_skel(cpu_model, rma_size,
                                             initrd_base, initrd_size,
diff --git a/hw/spapr_rtas.c b/hw/spapr_rtas.c
index ae18595..b808f80 100644
--- a/hw/spapr_rtas.c
+++ b/hw/spapr_rtas.c
@@ -184,6 +184,11 @@ static void rtas_start_cpu(sPAPREnvironment *spapr,
             return;
         }
 
+        /* This will make sure qemu state is up to date with kvm, and
+         * mark it dirty so our changes get flushed back before the
+         * new cpu enters */
+        kvm_cpu_synchronize_state(env);
+
         env->msr = (1ULL << MSR_SF) | (1ULL << MSR_ME);
         env->nip = start;
         env->gpr[3] = r3;
commit f1af19d767073a0926ce12c19b1f06c4933bca35
Author: David Gibson <david at gibson.dropbear.id.au>
Date:   Wed Sep 12 16:57:09 2012 +0000

    ppc: Make kvm_arch_put_registers() put *all* the registers
    
    At least when invoked with high enough 'level' arguments,
    kvm_arch_put_registers() is supposed to copy essentially all the cpu state
    as encoded in qemu's internal structures into the kvm state.  Currently
    the ppc version does not do this - it never calls KVM_SET_SREGS, for
    example, and therefore never sets the SDR1 and various other important
    though rarely changed registers.
    
    Instead, the code paths which need to set these registers need to
    explicitly make (conditional) kvm calls which transfer the changes to kvm.
    This breaks the usual model of handling state updates in qemu, where code
    just changes the internal model and has it flushed out to kvm automatically
    at some later point.
    
    This patch fixes this for Book S ppc CPUs by adding a suitable call to
    KVM_SET_SREGS and als to KVM_SET_ONE_REG to set the HIOR (the only register
    that is set with that call so far).  This lets us remove the hacks to
    explicitly set these registers from the kvmppc_set_papr() function.
    
    The problem still exists for Book E CPUs (which use a different version of
    the kvm_sregs structure).  But fixing that has some complications of its
    own so can be left to another day.
    
    Lkewise, there is still some ugly code for setting the PVR through special
    calls to SET_SREGS which is left in for now.  The PVR needs to be set
    especially early because it can affect what other features are available
    on the CPU, so I need to do more thinking to see if it can be integrated
    into the normal paths or not.
    
    Signed-off-by: David Gibson <david at gibson.dropbear.id.au>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
index a31d278..1a7489b 100644
--- a/target-ppc/kvm.c
+++ b/target-ppc/kvm.c
@@ -60,6 +60,7 @@ static int cap_booke_sregs;
 static int cap_ppc_smt;
 static int cap_ppc_rma;
 static int cap_spapr_tce;
+static int cap_hior;
 
 /* XXX We have a race condition where we actually have a level triggered
  *     interrupt, but the infrastructure can't expose that yet, so the guest
@@ -86,6 +87,7 @@ int kvm_arch_init(KVMState *s)
     cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
     cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
+    cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
 
     if (!cap_interrupt_level) {
         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
@@ -469,6 +471,53 @@ int kvm_arch_put_registers(CPUPPCState *env, int level)
         env->tlb_dirty = false;
     }
 
+    if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
+        struct kvm_sregs sregs;
+
+        sregs.pvr = env->spr[SPR_PVR];
+
+        sregs.u.s.sdr1 = env->spr[SPR_SDR1];
+
+        /* Sync SLB */
+#ifdef TARGET_PPC64
+        for (i = 0; i < 64; i++) {
+            sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
+            sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
+        }
+#endif
+
+        /* Sync SRs */
+        for (i = 0; i < 16; i++) {
+            sregs.u.s.ppc32.sr[i] = env->sr[i];
+        }
+
+        /* Sync BATs */
+        for (i = 0; i < 8; i++) {
+            sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[1][i] << 32)
+                | env->DBAT[0][i];
+            sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[1][i] << 32)
+                | env->IBAT[0][i];
+        }
+
+        ret = kvm_vcpu_ioctl(env, KVM_SET_SREGS, &sregs);
+        if (ret) {
+            return ret;
+        }
+    }
+
+    if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
+        uint64_t hior = env->spr[SPR_HIOR];
+        struct kvm_one_reg reg = {
+            .id = KVM_REG_PPC_HIOR,
+            .addr = (uintptr_t) &hior,
+        };
+
+        ret = kvm_vcpu_ioctl(env, KVM_SET_ONE_REG, &reg);
+        if (ret) {
+            return ret;
+        }
+    }
+
     return ret;
 }
 
@@ -946,52 +995,14 @@ int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
 void kvmppc_set_papr(CPUPPCState *env)
 {
     struct kvm_enable_cap cap = {};
-    struct kvm_one_reg reg = {};
-    struct kvm_sregs sregs = {};
     int ret;
-    uint64_t hior = env->spr[SPR_HIOR];
 
     cap.cap = KVM_CAP_PPC_PAPR;
     ret = kvm_vcpu_ioctl(env, KVM_ENABLE_CAP, &cap);
 
     if (ret) {
-        goto fail;
-    }
-
-    /*
-     * XXX We set HIOR here. It really should be a qdev property of
-     *     the CPU node, but we don't have CPUs converted to qdev yet.
-     *
-     *     Once we have qdev CPUs, move HIOR to a qdev property and
-     *     remove this chunk.
-     */
-    reg.id = KVM_REG_PPC_HIOR;
-    reg.addr = (uintptr_t)&hior;
-    ret = kvm_vcpu_ioctl(env, KVM_SET_ONE_REG, &reg);
-    if (ret) {
-        fprintf(stderr, "Couldn't set HIOR. Maybe you're running an old \n"
-                        "kernel with support for HV KVM but no PAPR PR \n"
-                        "KVM in which case things will work. If they don't \n"
-                        "please update your host kernel!\n");
-    }
-
-    /* Set SDR1 so kernel space finds the HTAB */
-    ret = kvm_vcpu_ioctl(env, KVM_GET_SREGS, &sregs);
-    if (ret) {
-        goto fail;
-    }
-
-    sregs.u.s.sdr1 = env->spr[SPR_SDR1];
-
-    ret = kvm_vcpu_ioctl(env, KVM_SET_SREGS, &sregs);
-    if (ret) {
-        goto fail;
+        cpu_abort(env, "This KVM version does not support PAPR\n");
     }
-
-    return;
-
-fail:
-    cpu_abort(env, "This KVM version does not support PAPR\n");
 }
 
 int kvmppc_smt_threads(void)
commit 89243b3b0183471a373d1457e7f93811773b64a7
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Tue Sep 11 08:47:13 2012 +0000

    target-ppc: get rid of the HANDLE_NAN{1, 2, 3} macros
    
    We can finally get rid of the ugly HANDLE_NAN{1,2,3} macros.
    
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
index 6f9beff..f39b4f6 100644
--- a/target-ppc/int_helper.c
+++ b/target-ppc/int_helper.c
@@ -287,23 +287,6 @@ target_ulong helper_602_mfrom(target_ulong arg)
     for (index = ARRAY_SIZE(r->element)-1; index >= 0; index--)
 #endif
 
-/* If X is a NaN, store the corresponding QNaN into RESULT.  Otherwise,
- * execute the following block.  */
-#define DO_HANDLE_NAN(result, x)                        \
-    if (float32_is_any_nan(x)) {                        \
-        CPU_FloatU __f;                                 \
-        __f.f = x;                                      \
-        __f.l = __f.l | (1 << 22);  /* Set QNaN bit. */ \
-        result = __f.f;                                 \
-    } else
-
-#define HANDLE_NAN1(result, x)                  \
-    DO_HANDLE_NAN(result, x)
-#define HANDLE_NAN2(result, x, y)                       \
-    DO_HANDLE_NAN(result, x) DO_HANDLE_NAN(result, y)
-#define HANDLE_NAN3(result, x, y, z)                                    \
-    DO_HANDLE_NAN(result, x) DO_HANDLE_NAN(result, y) DO_HANDLE_NAN(result, z)
-
 /* Saturating arithmetic helpers.  */
 #define SATCVT(from, to, from_type, to_type, min, max)          \
     static inline to_type cvt##from##to(from_type x, int *sat)  \
@@ -1413,10 +1396,6 @@ VUPK(lsh, s32, s16, UPKLO)
 #undef UPKHI
 #undef UPKLO
 
-#undef DO_HANDLE_NAN
-#undef HANDLE_NAN1
-#undef HANDLE_NAN2
-#undef HANDLE_NAN3
 #undef VECTOR_FOR_INORDER_I
 #undef HI_IDX
 #undef LO_IDX
commit 2f93c23fe71420e5095f2fae1877fe747ad9f876
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Tue Sep 11 08:47:12 2012 +0000

    target-ppc: use the softfloat float32_muladd function
    
    Use the new softfloat float32_muladd() function to implement the vmaddfp
    and vnmsubfp instructions. As a bonus we can get rid of the call to the
    HANDLE_NAN3 macro, as the NaN handling is directly done at the softfloat
    level.
    
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
index 6141243..6f9beff 100644
--- a/target-ppc/int_helper.c
+++ b/target-ppc/int_helper.c
@@ -418,6 +418,20 @@ VARITHFP(minfp, float32_min)
 VARITHFP(maxfp, float32_max)
 #undef VARITHFP
 
+#define VARITHFPFMA(suffix, type)                                       \
+    void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
+                           ppc_avr_t *b, ppc_avr_t *c)                  \
+    {                                                                   \
+        int i;                                                          \
+        for (i = 0; i < ARRAY_SIZE(r->f); i++) {                        \
+            r->f[i] = float32_muladd(a->f[i], c->f[i], b->f[i],         \
+                                     type, &env->vec_status);           \
+        }                                                               \
+    }
+VARITHFPFMA(maddfp, 0);
+VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c);
+#undef VARITHFPFMA
+
 #define VARITHSAT_CASE(type, op, cvt, element)                          \
     {                                                                   \
         type result = (type)a->element[i] op (type)b->element[i];       \
@@ -649,27 +663,6 @@ VCT(uxs, cvtsduw, u32)
 VCT(sxs, cvtsdsw, s32)
 #undef VCT
 
-void helper_vmaddfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
-                    ppc_avr_t *c)
-{
-    int i;
-
-    for (i = 0; i < ARRAY_SIZE(r->f); i++) {
-        HANDLE_NAN3(r->f[i], a->f[i], b->f[i], c->f[i]) {
-            /* Need to do the computation in higher precision and round
-             * once at the end.  */
-            float64 af, bf, cf, t;
-
-            af = float32_to_float64(a->f[i], &env->vec_status);
-            bf = float32_to_float64(b->f[i], &env->vec_status);
-            cf = float32_to_float64(c->f[i], &env->vec_status);
-            t = float64_mul(af, cf, &env->vec_status);
-            t = float64_add(t, bf, &env->vec_status);
-            r->f[i] = float64_to_float32(t, &env->vec_status);
-        }
-    }
-}
-
 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
                       ppc_avr_t *b, ppc_avr_t *c)
 {
@@ -909,28 +902,6 @@ VMUL(uh, u16, u32)
 #undef VMUL_DO
 #undef VMUL
 
-void helper_vnmsubfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
-                     ppc_avr_t *b, ppc_avr_t *c)
-{
-    int i;
-
-    for (i = 0; i < ARRAY_SIZE(r->f); i++) {
-        HANDLE_NAN3(r->f[i], a->f[i], b->f[i], c->f[i]) {
-            /* Need to do the computation is higher precision and round
-             * once at the end.  */
-            float64 af, bf, cf, t;
-
-            af = float32_to_float64(a->f[i], &env->vec_status);
-            bf = float32_to_float64(b->f[i], &env->vec_status);
-            cf = float32_to_float64(c->f[i], &env->vec_status);
-            t = float64_mul(af, cf, &env->vec_status);
-            t = float64_sub(t, bf, &env->vec_status);
-            t = float64_chs(t);
-            r->f[i] = float64_to_float32(t, &env->vec_status);
-        }
-    }
-}
-
 void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
                   ppc_avr_t *c)
 {
commit db1babb8dbb6d18433a51f1b4c3d186ea7057a6f
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Tue Sep 11 08:47:11 2012 +0000

    target-ppc: use the softfloat min/max functions
    
    Use the new softfloat float32_min() and float32_max() to implement the
    vminfp and vmaxfp instructions. As a bonus we can get rid of the call to
    the HANDLE_NAN2 macro, as the NaN handling is directly done at the
    softfloat level.
    
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
index 5b2a3c8..6141243 100644
--- a/target-ppc/int_helper.c
+++ b/target-ppc/int_helper.c
@@ -414,6 +414,8 @@ VARITH(uwm, u32)
     }
 VARITHFP(addfp, float32_add)
 VARITHFP(subfp, float32_sub)
+VARITHFP(minfp, float32_min)
+VARITHFP(maxfp, float32_max)
 #undef VARITHFP
 
 #define VARITHSAT_CASE(type, op, cvt, element)                          \
@@ -728,27 +730,6 @@ VMINMAX(uw, u32)
 #undef VMINMAX_DO
 #undef VMINMAX
 
-#define VMINMAXFP(suffix, rT, rF)                                       \
-    void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
-                          ppc_avr_t *b)                                 \
-    {                                                                   \
-        int i;                                                          \
-                                                                        \
-        for (i = 0; i < ARRAY_SIZE(r->f); i++) {                        \
-            HANDLE_NAN2(r->f[i], a->f[i], b->f[i]) {                    \
-                if (float32_lt_quiet(a->f[i], b->f[i],                  \
-                                     &env->vec_status)) {               \
-                    r->f[i] = rT->f[i];                                 \
-                } else {                                                \
-                    r->f[i] = rF->f[i];                                 \
-                }                                                       \
-            }                                                           \
-        }                                                               \
-    }
-VMINMAXFP(minfp, a, b)
-VMINMAXFP(maxfp, b, a)
-#undef VMINMAXFP
-
 void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
 {
     int i;
commit ef9bd1507b60edba0619e16f1179b158eac2c3ac
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Tue Sep 11 08:47:10 2012 +0000

    target-ppc: simplify NaN propagation for vector functions
    
    Commit e024e881bb1a8b5085026589360d26ed97acdd64 provided a pickNaN()
    function for PowerPC, implementing the correct NaN propagation rules.
    Therefore there is no need to test the operands manually, we can rely
    on the softfloat code to do that.
    
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
index f638b2a..5b2a3c8 100644
--- a/target-ppc/int_helper.c
+++ b/target-ppc/int_helper.c
@@ -409,9 +409,7 @@ VARITH(uwm, u32)
         int i;                                                          \
                                                                         \
         for (i = 0; i < ARRAY_SIZE(r->f); i++) {                        \
-            HANDLE_NAN2(r->f[i], a->f[i], b->f[i]) {                    \
-                r->f[i] = func(a->f[i], b->f[i], &env->vec_status);     \
-            }                                                           \
+            r->f[i] = func(a->f[i], b->f[i], &env->vec_status);         \
         }                                                               \
     }
 VARITHFP(addfp, float32_add)
@@ -1039,9 +1037,7 @@ void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
     int i;
 
     for (i = 0; i < ARRAY_SIZE(r->f); i++) {
-        HANDLE_NAN1(r->f[i], b->f[i]) {
-            r->f[i] = float32_div(float32_one, b->f[i], &env->vec_status);
-        }
+        r->f[i] = float32_div(float32_one, b->f[i], &env->vec_status);
     }
 }
 
@@ -1054,9 +1050,7 @@ void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
                                                                 \
         set_float_rounding_mode(rounding, &s);                  \
         for (i = 0; i < ARRAY_SIZE(r->f); i++) {                \
-            HANDLE_NAN1(r->f[i], b->f[i]) {                     \
-                r->f[i] = float32_round_to_int (b->f[i], &s);   \
-            }                                                   \
+            r->f[i] = float32_round_to_int (b->f[i], &s);       \
         }                                                       \
     }
 VRFI(n, float_round_nearest_even)
@@ -1089,11 +1083,9 @@ void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
     int i;
 
     for (i = 0; i < ARRAY_SIZE(r->f); i++) {
-        HANDLE_NAN1(r->f[i], b->f[i]) {
-            float32 t = float32_sqrt(b->f[i], &env->vec_status);
+        float32 t = float32_sqrt(b->f[i], &env->vec_status);
 
-            r->f[i] = float32_div(float32_one, t, &env->vec_status);
-        }
+        r->f[i] = float32_div(float32_one, t, &env->vec_status);
     }
 }
 
@@ -1109,9 +1101,7 @@ void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
     int i;
 
     for (i = 0; i < ARRAY_SIZE(r->f); i++) {
-        HANDLE_NAN1(r->f[i], b->f[i]) {
-            r->f[i] = float32_exp2(b->f[i], &env->vec_status);
-        }
+        r->f[i] = float32_exp2(b->f[i], &env->vec_status);
     }
 }
 
@@ -1120,9 +1110,7 @@ void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
     int i;
 
     for (i = 0; i < ARRAY_SIZE(r->f); i++) {
-        HANDLE_NAN1(r->f[i], b->f[i]) {
-            r->f[i] = float32_log2(b->f[i], &env->vec_status);
-        }
+        r->f[i] = float32_log2(b->f[i], &env->vec_status);
     }
 }
 
commit 794d00bfc7c38cfc559fbec137ecd0117b40e77b
Author: Andreas Färber <afaerber at suse.de>
Date:   Wed Aug 22 05:48:48 2012 +0000

    MAINTAINERS: Document virtex_ml507 machine
    
    Place it in alphabetical order, there is a separate section for sharing
    ppc4xx devices now.
    
    Signed-off-by: Andreas Färber <afaerber at suse.de>
    Acked-by: Edgar E. Iglesias <edgar.iglesias at gmail.com>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/MAINTAINERS b/MAINTAINERS
index dca8753..78d4ff2 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -403,6 +403,12 @@ L: qemu-ppc at nongnu.org
 S: Supported
 F: hw/spapr*
 
+virtex_ml507
+M: Edgar E. Iglesias <edgar.iglesias at gmail.com>
+L: qemu-ppc at nongnu.org
+S: Odd Fixes
+F: hw/virtex_ml507.c
+
 SH4 Machines
 ------------
 R2D
commit aaade8d7d4d2a3d9b7fb99b198369b1b35a75450
Author: Andreas Färber <afaerber at suse.de>
Date:   Wed Aug 22 05:48:47 2012 +0000

    MAINTAINERS: Document Bamboo machine and ppc4xx devices
    
    Place it in alphabetical order and add new Devices section ppc4xx to
    share file rules with 405 and virtex_ml507.
    
    Signed-off-by: Andreas Färber <afaerber at suse.de>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/MAINTAINERS b/MAINTAINERS
index 9d4ccfc..dca8753 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -352,6 +352,12 @@ L: qemu-ppc at nongnu.org
 S: Odd Fixes
 F: hw/ppc405_boards.c
 
+Bamboo
+M: Alexander Graf <agraf at suse.de>
+L: qemu-ppc at nongnu.org
+S: Odd Fixes
+F: hw/ppc440_bamboo.c
+
 e500
 M: Alexander Graf <agraf at suse.de>
 M: Scott Wood <scottwood at freescale.com>
@@ -480,6 +486,12 @@ S: Supported
 F: hw/pci*
 F: hw/piix*
 
+ppc4xx
+M: Alexander Graf <agraf at suse.de>
+L: qemu-ppc at nongnu.org
+S: Odd Fixes
+F: hw/ppc4xx*.[hc]
+
 ppce500
 M: Alexander Graf <agraf at suse.de>
 M: Scott Wood <scottwood at freescale.com>
commit 9b9fe13540d2375a109c1d17b53d240a592e442d
Author: Andreas Färber <afaerber at suse.de>
Date:   Wed Aug 22 05:48:46 2012 +0000

    MAINTAINERS: Downgrade ppc405 to Odd Fixes
    
    As requested by Alex.
    
    Signed-off-by: Andreas Färber <afaerber at suse.de>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/MAINTAINERS b/MAINTAINERS
index cfb7394..9d4ccfc 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -349,7 +349,7 @@ PowerPC Machines
 405
 M: Alexander Graf <agraf at suse.de>
 L: qemu-ppc at nongnu.org
-S: Maintained
+S: Odd Fixes
 F: hw/ppc405_boards.c
 
 e500
commit 98cded3a05de6a3295e89cc149ff267f1619bdc4
Author: Andreas Färber <afaerber at suse.de>
Date:   Wed Aug 22 05:48:45 2012 +0000

    MAINTAINERS: Document e500 machines and devices
    
    Signed-off-by: Andreas Färber <afaerber at suse.de>
    Cc: Alexander Graf <agraf at suse.de>
    Cc: Scott Wood <scottwood at freescale.com>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/MAINTAINERS b/MAINTAINERS
index af7c7db..cfb7394 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -352,6 +352,22 @@ L: qemu-ppc at nongnu.org
 S: Maintained
 F: hw/ppc405_boards.c
 
+e500
+M: Alexander Graf <agraf at suse.de>
+M: Scott Wood <scottwood at freescale.com>
+L: qemu-ppc at nongnu.org
+S: Supported
+F: hw/ppc/e500.[hc]
+F: hw/ppc/e500plat.c
+
+mpc8544ds
+M: Alexander Graf <agraf at suse.de>
+M: Scott Wood <scottwood at freescale.com>
+L: qemu-ppc at nongnu.org
+S: Supported
+F: hw/ppc/mpc8544ds.c
+F: hw/mpc8544_guts.c
+
 New World
 M: Alexander Graf <agraf at suse.de>
 L: qemu-ppc at nongnu.org
@@ -464,6 +480,13 @@ S: Supported
 F: hw/pci*
 F: hw/piix*
 
+ppce500
+M: Alexander Graf <agraf at suse.de>
+M: Scott Wood <scottwood at freescale.com>
+L: qemu-ppc at nongnu.org
+S: Supported
+F: hw/ppce500_*
+
 SCSI
 M: Paolo Bonzini <pbonzini at redhat.com>
 S: Supported
commit 8a269ca4167b43cbf1acaf51929f3f5919edcd1e
Author: Andreas Färber <afaerber at suse.de>
Date:   Wed Aug 22 05:48:44 2012 +0000

    MAINTAINERS: Document sPAPR (pSeries) machine
    
    Signed-off-by: Andreas Färber <afaerber at suse.de>
    Acked-by: David Gibson <david at gibson.dropbear.id.au>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/MAINTAINERS b/MAINTAINERS
index 29aac4f..af7c7db 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -374,6 +374,13 @@ S: Odd Fixes
 F: hw/ppc_prep.c
 F: hw/prep_pci.[hc]
 
+sPAPR
+M: David Gibson <david at gibson.dropbear.id.au>
+M: Alexander Graf <agraf at suse.de>
+L: qemu-ppc at nongnu.org
+S: Supported
+F: hw/spapr*
+
 SH4 Machines
 ------------
 R2D
commit 8aba7dc02d5660df7e7d8651304b3079908358be
Author: Anthony PERARD <anthony.perard at citrix.com>
Date:   Wed Oct 3 13:49:40 2012 +0000

    xen: Set the vram dirty when an error occur.
    
    If the call to xc_hvm_track_dirty_vram() fails, then we set dirtybit on all the
    video ram. This case happens during migration.
    
    Signed-off-by: Anthony PERARD <anthony.perard at citrix.com>
    Acked-by: Stefano Stabellini <stefano.stabellini at eu.citrix.com>

diff --git a/xen-all.c b/xen-all.c
index b11542c..e6308be 100644
--- a/xen-all.c
+++ b/xen-all.c
@@ -507,7 +507,8 @@ static void xen_sync_dirty_bitmap(XenIOState *state,
                                  bitmap);
     if (rc < 0) {
         if (rc != -ENODATA) {
-            fprintf(stderr, "xen: track_dirty_vram failed (0x" TARGET_FMT_plx
+            memory_region_set_dirty(framebuffer, 0, size);
+            DPRINTF("xen: track_dirty_vram failed (0x" TARGET_FMT_plx
                     ", 0x" TARGET_FMT_plx "): %s\n",
                     start_addr, start_addr + size, strerror(-rc));
         }
commit e226939de5814527a21396903b08c3d0ed989558
Author: Anthony PERARD <anthony.perard at citrix.com>
Date:   Wed Oct 3 13:49:22 2012 +0000

    exec, memory: Call to xen_modified_memory.
    
    This patch add some calls to xen_modified_memory to notify Xen about dirtybits
    during migration.
    
    Signed-off-by: Anthony PERARD <anthony.perard at citrix.com>
    Reviewed-by: Avi Kivity <avi at redhat.com>

diff --git a/exec-obsolete.h b/exec-obsolete.h
index c099256..286e2f7 100644
--- a/exec-obsolete.h
+++ b/exec-obsolete.h
@@ -24,6 +24,7 @@
 #endif
 
 #ifndef CONFIG_USER_ONLY
+#include "hw/xen.h"
 
 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
                                    MemoryRegion *mr);
@@ -111,6 +112,7 @@ static inline void cpu_physical_memory_set_dirty_range(ram_addr_t start,
     for (addr = start; addr < end; addr += TARGET_PAGE_SIZE) {
         cpu_physical_memory_set_dirty_flags(addr, dirty_flags);
     }
+    xen_modified_memory(addr, length);
 }
 
 static inline void cpu_physical_memory_mask_dirty_range(ram_addr_t start,
diff --git a/exec.c b/exec.c
index 366684c..1114a09 100644
--- a/exec.c
+++ b/exec.c
@@ -3427,6 +3427,7 @@ static void invalidate_and_set_dirty(target_phys_addr_t addr,
         /* set dirty bit */
         cpu_physical_memory_set_dirty_flags(addr, (0xff & ~CODE_DIRTY_FLAG));
     }
+    xen_modified_memory(addr, length);
 }
 
 void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
commit 51d7a9eb2b64e787c90bea1027308087eac22065
Author: Anthony PERARD <anthony.perard at citrix.com>
Date:   Wed Oct 3 13:49:05 2012 +0000

    exec: Introduce helper to set dirty flags.
    
    This new helper/hook is used in the next patch to add an extra call in a single
    place.
    
    Signed-off-by: Anthony PERARD <anthony.perard at citrix.com>
    Reviewed-by: Avi Kivity <avi at redhat.com>

diff --git a/exec.c b/exec.c
index bb6aa4a..366684c 100644
--- a/exec.c
+++ b/exec.c
@@ -3417,6 +3417,18 @@ int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
 }
 
 #else
+
+static void invalidate_and_set_dirty(target_phys_addr_t addr,
+                                     target_phys_addr_t length)
+{
+    if (!cpu_physical_memory_is_dirty(addr)) {
+        /* invalidate code */
+        tb_invalidate_phys_page_range(addr, addr + length, 0);
+        /* set dirty bit */
+        cpu_physical_memory_set_dirty_flags(addr, (0xff & ~CODE_DIRTY_FLAG));
+    }
+}
+
 void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
                             int len, int is_write)
 {
@@ -3462,13 +3474,7 @@ void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
                 /* RAM case */
                 ptr = qemu_get_ram_ptr(addr1);
                 memcpy(ptr, buf, l);
-                if (!cpu_physical_memory_is_dirty(addr1)) {
-                    /* invalidate code */
-                    tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
-                    /* set dirty bit */
-                    cpu_physical_memory_set_dirty_flags(
-                        addr1, (0xff & ~CODE_DIRTY_FLAG));
-                }
+                invalidate_and_set_dirty(addr1, l);
                 qemu_put_ram_ptr(ptr);
             }
         } else {
@@ -3534,13 +3540,7 @@ void cpu_physical_memory_write_rom(target_phys_addr_t addr,
             /* ROM/RAM case */
             ptr = qemu_get_ram_ptr(addr1);
             memcpy(ptr, buf, l);
-            if (!cpu_physical_memory_is_dirty(addr1)) {
-                /* invalidate code */
-                tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
-                /* set dirty bit */
-                cpu_physical_memory_set_dirty_flags(
-                    addr1, (0xff & ~CODE_DIRTY_FLAG));
-            }
+            invalidate_and_set_dirty(addr1, l);
             qemu_put_ram_ptr(ptr);
         }
         len -= l;
@@ -3666,13 +3666,7 @@ void cpu_physical_memory_unmap(void *buffer, target_phys_addr_t len,
                 l = TARGET_PAGE_SIZE;
                 if (l > access_len)
                     l = access_len;
-                if (!cpu_physical_memory_is_dirty(addr1)) {
-                    /* invalidate code */
-                    tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
-                    /* set dirty bit */
-                    cpu_physical_memory_set_dirty_flags(
-                        addr1, (0xff & ~CODE_DIRTY_FLAG));
-                }
+                invalidate_and_set_dirty(addr1, l);
                 addr1 += l;
                 access_len -= l;
             }
@@ -3978,13 +3972,7 @@ static inline void stl_phys_internal(target_phys_addr_t addr, uint32_t val,
             stl_p(ptr, val);
             break;
         }
-        if (!cpu_physical_memory_is_dirty(addr1)) {
-            /* invalidate code */
-            tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
-            /* set dirty bit */
-            cpu_physical_memory_set_dirty_flags(addr1,
-                (0xff & ~CODE_DIRTY_FLAG));
-        }
+        invalidate_and_set_dirty(addr1, 4);
     }
 }
 
@@ -4051,13 +4039,7 @@ static inline void stw_phys_internal(target_phys_addr_t addr, uint32_t val,
             stw_p(ptr, val);
             break;
         }
-        if (!cpu_physical_memory_is_dirty(addr1)) {
-            /* invalidate code */
-            tb_invalidate_phys_page_range(addr1, addr1 + 2, 0);
-            /* set dirty bit */
-            cpu_physical_memory_set_dirty_flags(addr1,
-                (0xff & ~CODE_DIRTY_FLAG));
-        }
+        invalidate_and_set_dirty(addr1, 2);
     }
 }
 
commit 910b38e4dc4c37683c8b821e75a7f4cf095e4b21
Author: Anthony PERARD <anthony.perard at citrix.com>
Date:   Wed Oct 3 13:48:45 2012 +0000

    xen: Introduce xen_modified_memory.
    
    This function is to be used during live migration. Every write access to the
    guest memory should call this funcion so the Xen tools knows which pages are
    dirty.
    
    Signed-off-by: Anthony PERARD <anthony.perard at citrix.com>
    Acked-by: Stefano Stabellini <stefano.stabellini at eu.citrix.com>

diff --git a/hw/xen.h b/hw/xen.h
index e5926b7..d14e92d 100644
--- a/hw/xen.h
+++ b/hw/xen.h
@@ -48,6 +48,7 @@ void xenstore_store_pv_console_info(int i, struct CharDriverState *chr);
 struct MemoryRegion;
 void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size,
                    struct MemoryRegion *mr);
+void xen_modified_memory(ram_addr_t start, ram_addr_t length);
 #endif
 
 struct MemoryRegion;
diff --git a/xen-all.c b/xen-all.c
index f75ae9f..b11542c 100644
--- a/xen-all.c
+++ b/xen-all.c
@@ -1228,3 +1228,24 @@ void xen_shutdown_fatal_error(const char *fmt, ...)
     /* destroy the domain */
     qemu_system_shutdown_request();
 }
+
+void xen_modified_memory(ram_addr_t start, ram_addr_t length)
+{
+    if (unlikely(xen_in_migration)) {
+        int rc;
+        ram_addr_t start_pfn, nb_pages;
+
+        if (length == 0) {
+            length = TARGET_PAGE_SIZE;
+        }
+        start_pfn = start >> TARGET_PAGE_BITS;
+        nb_pages = ((start + length + TARGET_PAGE_SIZE - 1) >> TARGET_PAGE_BITS)
+            - start_pfn;
+        rc = xc_hvm_modified_memory(xen_xc, xen_domid, start_pfn, nb_pages);
+        if (rc) {
+            fprintf(stderr,
+                    "%s failed for "RAM_ADDR_FMT" ("RAM_ADDR_FMT"): %i, %s\n",
+                    __func__, start, nb_pages, rc, strerror(-rc));
+        }
+    }
+}
diff --git a/xen-stub.c b/xen-stub.c
index 5e66ba8..9214392 100644
--- a/xen-stub.c
+++ b/xen-stub.c
@@ -59,3 +59,7 @@ void xen_register_framebuffer(MemoryRegion *mr)
 void qmp_xen_set_global_dirty_log(bool enable, Error **errp)
 {
 }
+
+void xen_modified_memory(ram_addr_t start, ram_addr_t length)
+{
+}
commit 39f42439d0629d3921629dc4b38e68df8f2f7b83
Author: Anthony PERARD <anthony.perard at citrix.com>
Date:   Wed Oct 3 13:48:19 2012 +0000

    QMP, Introduce xen-set-global-dirty-log command.
    
    This command is used during a migration of a guest under Xen. It calls
    memory_global_dirty_log_start or memory_global_dirty_log_stop according to the
    argument pass to the command.
    
    Signed-off-by: Anthony PERARD <anthony.perard at citrix.com>
    Reviewed-by: Luiz Capitulino <lcapitulino at redhat.com>

diff --git a/qapi-schema.json b/qapi-schema.json
index 14e4419..4a4a850 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -1956,6 +1956,19 @@
 { 'command': 'xen-save-devices-state', 'data': {'filename': 'str'} }
 
 ##
+# @xen-set-global-dirty-log
+#
+# Enable or disable the global dirty log mode.
+#
+# @enable: true to enable, false to disable.
+#
+# Returns: nothing
+#
+# Since: 1.3
+##
+{ 'command': 'xen-set-global-dirty-log', 'data': { 'enable': 'bool' } }
+
+##
 # @device_del:
 #
 # Remove a device from a guest
diff --git a/qmp-commands.hx b/qmp-commands.hx
index 6e21ddb..662b7cf 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -493,6 +493,30 @@ Example:
 EQMP
 
     {
+        .name       = "xen-set-global-dirty-log",
+        .args_type  = "enable:b",
+        .mhandler.cmd_new = qmp_marshal_input_xen_set_global_dirty_log,
+    },
+
+SQMP
+xen-set-global-dirty-log
+-------
+
+Enable or disable the global dirty log mode.
+
+Arguments:
+
+- "enable": Enable it or disable it.
+
+Example:
+
+-> { "execute": "xen-set-global-dirty-log",
+     "arguments": { "enable": true } }
+<- { "return": {} }
+
+EQMP
+
+    {
         .name       = "migrate",
         .args_type  = "detach:-d,blk:-b,inc:-i,uri:s",
         .mhandler.cmd_new = qmp_marshal_input_migrate,
diff --git a/xen-all.c b/xen-all.c
index f76b051..f75ae9f 100644
--- a/xen-all.c
+++ b/xen-all.c
@@ -14,6 +14,7 @@
 #include "hw/pc.h"
 #include "hw/xen_common.h"
 #include "hw/xen_backend.h"
+#include "qmp-commands.h"
 
 #include "range.h"
 #include "xen-mapcache.h"
@@ -36,6 +37,7 @@
 
 static MemoryRegion ram_memory, ram_640k, ram_lo, ram_hi;
 static MemoryRegion *framebuffer;
+static bool xen_in_migration;
 
 /* Compatibility with older version */
 #if __XEN_LATEST_INTERFACE_VERSION__ < 0x0003020a
@@ -552,10 +554,14 @@ static void xen_log_sync(MemoryListener *listener, MemoryRegionSection *section)
 
 static void xen_log_global_start(MemoryListener *listener)
 {
+    if (xen_enabled()) {
+        xen_in_migration = true;
+    }
 }
 
 static void xen_log_global_stop(MemoryListener *listener)
 {
+    xen_in_migration = false;
 }
 
 static void xen_eventfd_add(MemoryListener *listener,
@@ -588,6 +594,15 @@ static MemoryListener xen_memory_listener = {
     .priority = 10,
 };
 
+void qmp_xen_set_global_dirty_log(bool enable, Error **errp)
+{
+    if (enable) {
+        memory_global_dirty_log_start();
+    } else {
+        memory_global_dirty_log_stop();
+    }
+}
+
 /* VCPU Operations, MMIO, IO ring ... */
 
 static void xen_reset_vcpu(void *opaque)
diff --git a/xen-stub.c b/xen-stub.c
index 8ff2b79..5e66ba8 100644
--- a/xen-stub.c
+++ b/xen-stub.c
@@ -11,6 +11,7 @@
 #include "qemu-common.h"
 #include "hw/xen.h"
 #include "memory.h"
+#include "qmp-commands.h"
 
 void xenstore_store_pv_console_info(int i, CharDriverState *chr)
 {
@@ -54,3 +55,7 @@ int xen_init(void)
 void xen_register_framebuffer(MemoryRegion *mr)
 {
 }
+
+void qmp_xen_set_global_dirty_log(bool enable, Error **errp)
+{
+}
commit aabc8530c7ba2be89e21463f051056ad7c255e6e
Author: Xudong Hao <xudong.hao at intel.com>
Date:   Wed Oct 3 13:46:23 2012 +0000

    qemu/xen: Add 64 bits big bar support on qemu
    
    Currently it is assumed PCI device BAR access < 4G memory. If there is such a
    device whose BAR size is larger than 4G, it must access > 4G memory address.
    This patch enable the 64bits big BAR support on qemu.
    
    Signed-off-by: Xudong Hao <xudong.hao at intel.com>
    Signed-off-by: Xiantao Zhang <xiantao.zhang at intel.com>
    Signed-off-by: Stefano Stabellini <stefano.stabellini at eu.citrix.com>

diff --git a/hw/xen_pt.c b/hw/xen_pt.c
index 307119a..838bcea 100644
--- a/hw/xen_pt.c
+++ b/hw/xen_pt.c
@@ -410,14 +410,17 @@ static int xen_pt_register_regions(XenPCIPassthroughState *s)
             if (r->type & XEN_HOST_PCI_REGION_TYPE_PREFETCH) {
                 type |= PCI_BASE_ADDRESS_MEM_PREFETCH;
             }
+            if (r->type & XEN_HOST_PCI_REGION_TYPE_MEM_64) {
+                type |= PCI_BASE_ADDRESS_MEM_TYPE_64;
+            }
         }
 
         memory_region_init_io(&s->bar[i], &ops, &s->dev,
                               "xen-pci-pt-bar", r->size);
         pci_register_bar(&s->dev, i, type, &s->bar[i]);
 
-        XEN_PT_LOG(&s->dev, "IO region %i registered (size=0x%08"PRIx64
-                   " base_addr=0x%08"PRIx64" type: %#x)\n",
+        XEN_PT_LOG(&s->dev, "IO region %i registered (size=0x%lx"PRIx64
+                   " base_addr=0x%lx"PRIx64" type: %#x)\n",
                    i, r->size, r->base_addr, type);
     }
 
diff --git a/hw/xen_pt_config_init.c b/hw/xen_pt_config_init.c
index e524a40..0a5f82c 100644
--- a/hw/xen_pt_config_init.c
+++ b/hw/xen_pt_config_init.c
@@ -342,6 +342,23 @@ static int xen_pt_cmd_reg_write(XenPCIPassthroughState *s, XenPTReg *cfg_entry,
 #define XEN_PT_BAR_IO_RO_MASK     0x00000003  /* BAR ReadOnly mask(I/O) */
 #define XEN_PT_BAR_IO_EMU_MASK    0xFFFFFFFC  /* BAR emul mask(I/O) */
 
+static bool is_64bit_bar(PCIIORegion *r)
+{
+    return !!(r->type & PCI_BASE_ADDRESS_MEM_TYPE_64);
+}
+
+static uint64_t xen_pt_get_bar_size(PCIIORegion *r)
+{
+    if (is_64bit_bar(r)) {
+        uint64_t size64;
+        size64 = (r + 1)->size;
+        size64 <<= 32;
+        size64 += r->size;
+        return size64;
+    }
+    return r->size;
+}
+
 static XenPTBarFlag xen_pt_bar_reg_parse(XenPCIPassthroughState *s,
                                          XenPTRegInfo *reg)
 {
@@ -366,7 +383,7 @@ static XenPTBarFlag xen_pt_bar_reg_parse(XenPCIPassthroughState *s,
 
     /* check unused BAR */
     r = &d->io_regions[index];
-    if (r->size == 0) {
+    if (!xen_pt_get_bar_size(r)) {
         return XEN_PT_BAR_FLAG_UNUSED;
     }
 
@@ -481,7 +498,12 @@ static int xen_pt_bar_reg_write(XenPCIPassthroughState *s, XenPTReg *cfg_entry,
     switch (s->bases[index].bar_flag) {
     case XEN_PT_BAR_FLAG_MEM:
         bar_emu_mask = XEN_PT_BAR_MEM_EMU_MASK;
-        bar_ro_mask = XEN_PT_BAR_MEM_RO_MASK | (r_size - 1);
+        if (!r_size) {
+            /* low 32 bits mask for 64 bit bars */
+            bar_ro_mask = XEN_PT_BAR_ALLF;
+        } else {
+            bar_ro_mask = XEN_PT_BAR_MEM_RO_MASK | (r_size - 1);
+        }
         break;
     case XEN_PT_BAR_FLAG_IO:
         bar_emu_mask = XEN_PT_BAR_IO_EMU_MASK;
@@ -489,7 +511,7 @@ static int xen_pt_bar_reg_write(XenPCIPassthroughState *s, XenPTReg *cfg_entry,
         break;
     case XEN_PT_BAR_FLAG_UPPER:
         bar_emu_mask = XEN_PT_BAR_ALLF;
-        bar_ro_mask = 0;    /* all upper 32bit are R/W */
+        bar_ro_mask = r_size ? r_size - 1 : 0;
         break;
     default:
         break;
@@ -501,22 +523,13 @@ static int xen_pt_bar_reg_write(XenPCIPassthroughState *s, XenPTReg *cfg_entry,
 
     /* check whether we need to update the virtual region address or not */
     switch (s->bases[index].bar_flag) {
+    case XEN_PT_BAR_FLAG_UPPER:
     case XEN_PT_BAR_FLAG_MEM:
         /* nothing to do */
         break;
     case XEN_PT_BAR_FLAG_IO:
         /* nothing to do */
         break;
-    case XEN_PT_BAR_FLAG_UPPER:
-        if (cfg_entry->data) {
-            if (cfg_entry->data != (XEN_PT_BAR_ALLF & ~bar_ro_mask)) {
-                XEN_PT_WARN(d, "Guest attempt to set high MMIO Base Address. "
-                            "Ignore mapping. "
-                            "(offset: 0x%02x, high address: 0x%08x)\n",
-                            reg->offset, cfg_entry->data);
-            }
-        }
-        break;
     default:
         break;
     }
commit bd4982a6c6f1b133aff38873bc3b580af15cd334
Author: Anthony PERARD <anthony.perard at citrix.com>
Date:   Wed Oct 3 13:45:24 2012 +0000

    xen: Fix, no unplug of pt device by platform device.
    
    The Xen platform device will unplug any NICs if requested by the guest (PVonHVM)
    including a NIC that would have been passthrough. This patch makes sure that a
    passthrough device will not be unplug.
    
    Reported-by: "Zhang, Yang Z" <yang.z.zhang at intel.com>
    Signed-off-by: Anthony PERARD <anthony.perard at citrix.com>
    Signed-off-by: Stefano Stabellini <stefano.stabellini at eu.citrix.com>

diff --git a/hw/xen_platform.c b/hw/xen_platform.c
index 0d6c2ff..956dbfe 100644
--- a/hw/xen_platform.c
+++ b/hw/xen_platform.c
@@ -85,8 +85,10 @@ static void log_writeb(PCIXenPlatformState *s, char val)
 
 static void unplug_nic(PCIBus *b, PCIDevice *d, void *o)
 {
+    /* We have to ignore passthrough devices */
     if (pci_get_word(d->config + PCI_CLASS_DEVICE) ==
-            PCI_CLASS_NETWORK_ETHERNET) {
+            PCI_CLASS_NETWORK_ETHERNET
+            && strcmp(d->name, "xen-pci-passthrough") != 0) {
         qdev_free(&d->qdev);
     }
 }
@@ -98,8 +100,10 @@ static void pci_unplug_nics(PCIBus *bus)
 
 static void unplug_disks(PCIBus *b, PCIDevice *d, void *o)
 {
+    /* We have to ignore passthrough devices */
     if (pci_get_word(d->config + PCI_CLASS_DEVICE) ==
-            PCI_CLASS_STORAGE_IDE) {
+            PCI_CLASS_STORAGE_IDE
+            && strcmp(d->name, "xen-pci-passthrough") != 0) {
         qdev_unplug(&(d->qdev), NULL);
     }
 }
commit e744c06fca438dc08271e626034e632a270c91c8
Author: Peter Maydell <peter.maydell at linaro.org>
Date:   Fri Sep 28 16:17:03 2012 +0100

    fpu/softfloat.c: Return correctly signed values from uint64_to_float32
    
    The uint64_to_float32() conversion function was incorrectly always
    returning numbers with the sign bit set (ie negative numbers). Correct
    this so we return positive numbers instead.
    
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index 01a28ca..8413146 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -1238,7 +1238,7 @@ float32 uint64_to_float32( uint64 a STATUS_PARAM )
     if ( a == 0 ) return float32_zero;
     shiftCount = countLeadingZeros64( a ) - 40;
     if ( 0 <= shiftCount ) {
-        return packFloat32( 1 > 0, 0x95 - shiftCount, a<<shiftCount );
+        return packFloat32(0, 0x95 - shiftCount, a<<shiftCount);
     }
     else {
         shiftCount += 7;
@@ -1248,7 +1248,7 @@ float32 uint64_to_float32( uint64 a STATUS_PARAM )
         else {
             a <<= shiftCount;
         }
-        return roundAndPackFloat32( 1 > 0, 0x9C - shiftCount, a STATUS_VAR );
+        return roundAndPackFloat32(0, 0x9C - shiftCount, a STATUS_VAR);
     }
 }
 
commit 4be8eeacb9f2b01ad98747ad88e54a7feabc664c
Author: Peter Maydell <peter.maydell at linaro.org>
Date:   Mon Sep 24 17:28:35 2012 +0100

    fpu/softfloat.c: Remove pointless shift of always-zero value
    
    In float16_to_float32, when returning an infinity, just pass zero
    as the mantissa argument to packFloat32(), rather than shifting
    a value which we know must be zero.
    
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>
    Reviewed-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index b29256a..01a28ca 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -3007,7 +3007,7 @@ float32 float16_to_float32(float16 a, flag ieee STATUS_PARAM)
         if (aSig) {
             return commonNaNToFloat32(float16ToCommonNaN(a STATUS_VAR) STATUS_VAR);
         }
-        return packFloat32(aSign, 0xff, aSig << 13);
+        return packFloat32(aSign, 0xff, 0);
     }
     if (aExp == 0) {
         int8 shiftCount;
commit 0f41dc182c7e4e2ae160641552101a2a199f7c4c
Author: Anthony Liguori <aliguori at us.ibm.com>
Date:   Mon Oct 1 13:40:15 2012 -0500

    vfio_pci: fix build on 32-bit systems
    
    We cannot cast directly from pointer to uint64.
    
    Cc: Alex Williamson <alex.williamson at redhat.com>
    Cc: Alex Barcelo <abarcelo at ac.upc.edu>
    Reported-by: Alex Barcelo <abarcelo at ac.upc.edu>
    Signed-off-by: Anthony Liguori <aliguori at us.ibm.com>

diff --git a/hw/vfio_pci.c b/hw/vfio_pci.c
index a24558a..a1eeced 100644
--- a/hw/vfio_pci.c
+++ b/hw/vfio_pci.c
@@ -768,7 +768,7 @@ static int vfio_dma_map(VFIOContainer *container, target_phys_addr_t iova,
     struct vfio_iommu_type1_dma_map map = {
         .argsz = sizeof(map),
         .flags = VFIO_DMA_MAP_FLAG_READ,
-        .vaddr = (__u64)vaddr,
+        .vaddr = (__u64)(intptr_t)vaddr,
         .iova = iova,
         .size = size,
     };
commit 92e1fb5ed1a26d09ee754845d5691d473410c1dc
Author: Alex Williamson <alex.williamson at redhat.com>
Date:   Wed Sep 26 11:19:39 2012 -0600

    vfio: Enable vfio-pci and mark supported
    
    Enabled for all softmmu guests supporting PCI on Linux hosts.  Note
    that currently only x86 hosts have the kernel side VFIO IOMMU support
    for this.  PPC (g3beige) is the only non-x86 guest known to work.
    ARM (veratile) hangs in firmware, others untested.
    
    Signed-off-by: Alex Williamson <alex.williamson at redhat.com>
    Signed-off-by: Anthony Liguori <aliguori at us.ibm.com>

diff --git a/MAINTAINERS b/MAINTAINERS
index 25733fc..29aac4f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -474,6 +474,11 @@ M: Gerd Hoffmann <kraxel at redhat.com>
 S: Maintained
 F: hw/usb*
 
+VFIO
+M: Alex Williamson <alex.williamson at redhat.com>
+S: Supported
+F: hw/vfio*
+
 vhost
 M: Michael S. Tsirkin <mst at redhat.com>
 S: Supported
diff --git a/hw/Makefile.objs b/hw/Makefile.objs
index 6dfebd2..ecdbe44 100644
--- a/hw/Makefile.objs
+++ b/hw/Makefile.objs
@@ -198,7 +198,8 @@ obj-$(CONFIG_VGA) += vga.o
 obj-$(CONFIG_SOFTMMU) += device-hotplug.o
 obj-$(CONFIG_XEN) += xen_domainbuild.o xen_machine_pv.o
 
-# Inter-VM PCI shared memory
+# Inter-VM PCI shared memory & VFIO PCI device assignment
 ifeq ($(CONFIG_PCI), y)
 obj-$(CONFIG_KVM) += ivshmem.o
+obj-$(CONFIG_LINUX) += vfio_pci.o
 endif
commit 65501a745dbaf28284e099f724aa5fc478eae0e7
Author: Alex Williamson <alex.williamson at redhat.com>
Date:   Wed Sep 26 11:19:32 2012 -0600

    vfio: vfio-pci device assignment driver
    
    This adds the core of the QEMU VFIO-based PCI device assignment driver.
    To make use of this driver, enable CONFIG_VFIO, CONFIG_VFIO_IOMMU_TYPE1,
    and CONFIG_VFIO_PCI in your host Linux kernel config.  Load the vfio-pci
    module.  To assign device 0000:05:00.0 to a guest, do the following:
    
    for dev in $(ls /sys/bus/pci/devices/0000:05:00.0/iommu_group/devices); do
        vendor=$(cat /sys/bus/pci/devices/$dev/vendor)
        device=$(cat /sys/bus/pci/devices/$dev/device)
        if [ -e /sys/bus/pci/devices/$dev/driver ]; then
            echo $dev > /sys/bus/pci/devices/$dev/driver/unbind
        fi
        echo $vendor $device > /sys/bus/pci/drivers/vfio-pci/new_id
    done
    
    See Documentation/vfio.txt in the Linux kernel tree for further
    description of IOMMU groups and VFIO.
    
    Then launch qemu including the option:
    
    -device vfio-pci,host=0000:05:00.0
    
    Legacy PCI interrupts (INTx) currently makes use of a kludge where we
    trap BAR accesses and assume the access is in response to an interrupt,
    therefore de-asserting and unmasking the interrupt.  It's not quite as
    targetted as using the EOI for this, but it's self contained and seems
    to work across all architectures.  The side-effect is a significant
    performance slow-down for device in INTx mode.  Some devices, like
    graphics cards, don't really use their interrupt, so this can be turned
    off with the x-intx=off option, which disables INTx alltogether.  This
    should be considered an experimental option until we refine this code.
    Both MSI and MSI-X are supported and avoid these issues.
    
    Signed-off-by: Alex Williamson <alex.williamson at redhat.com>
    Acked-by: Michael S. Tsirkin <mst at redhat.com>
    Signed-off-by: Anthony Liguori <aliguori at us.ibm.com>

diff --git a/hw/vfio_pci.c b/hw/vfio_pci.c
new file mode 100644
index 0000000..a24558a
--- /dev/null
+++ b/hw/vfio_pci.c
@@ -0,0 +1,1864 @@
+/*
+ * vfio based device assignment support
+ *
+ * Copyright Red Hat, Inc. 2012
+ *
+ * Authors:
+ *  Alex Williamson <alex.williamson at redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ * Based on qemu-kvm device-assignment:
+ *  Adapted for KVM by Qumranet.
+ *  Copyright (c) 2007, Neocleus, Alex Novik (alex at neocleus.com)
+ *  Copyright (c) 2007, Neocleus, Guy Zana (guy at neocleus.com)
+ *  Copyright (C) 2008, Qumranet, Amit Shah (amit.shah at qumranet.com)
+ *  Copyright (C) 2008, Red Hat, Amit Shah (amit.shah at redhat.com)
+ *  Copyright (C) 2008, IBM, Muli Ben-Yehuda (muli at il.ibm.com)
+ */
+
+#include <dirent.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <linux/vfio.h>
+
+#include "config.h"
+#include "event_notifier.h"
+#include "exec-memory.h"
+#include "kvm.h"
+#include "memory.h"
+#include "msi.h"
+#include "msix.h"
+#include "qemu-error.h"
+#include "range.h"
+#include "vfio_pci_int.h"
+
+/* #define DEBUG_VFIO */
+#ifdef DEBUG_VFIO
+#define DPRINTF(fmt, ...) \
+    do { fprintf(stderr, "vfio: " fmt, ## __VA_ARGS__); } while (0)
+#else
+#define DPRINTF(fmt, ...) \
+    do { } while (0)
+#endif
+
+#define MSIX_CAP_LENGTH 12
+
+static QLIST_HEAD(, VFIOContainer)
+    container_list = QLIST_HEAD_INITIALIZER(container_list);
+
+static QLIST_HEAD(, VFIOGroup)
+    group_list = QLIST_HEAD_INITIALIZER(group_list);
+
+static void vfio_disable_interrupts(VFIODevice *vdev);
+static uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len);
+static void vfio_mmap_set_enabled(VFIODevice *vdev, bool enabled);
+
+/*
+ * Common VFIO interrupt disable
+ */
+static void vfio_disable_irqindex(VFIODevice *vdev, int index)
+{
+    struct vfio_irq_set irq_set = {
+        .argsz = sizeof(irq_set),
+        .flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER,
+        .index = index,
+        .start = 0,
+        .count = 0,
+    };
+
+    ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, &irq_set);
+
+    vdev->interrupt = VFIO_INT_NONE;
+}
+
+/*
+ * INTx
+ */
+static void vfio_unmask_intx(VFIODevice *vdev)
+{
+    struct vfio_irq_set irq_set = {
+        .argsz = sizeof(irq_set),
+        .flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK,
+        .index = VFIO_PCI_INTX_IRQ_INDEX,
+        .start = 0,
+        .count = 1,
+    };
+
+    ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, &irq_set);
+}
+
+static void vfio_intx_interrupt(void *opaque)
+{
+    VFIODevice *vdev = opaque;
+
+    if (!event_notifier_test_and_clear(&vdev->intx.interrupt)) {
+        return;
+    }
+
+    DPRINTF("%s(%04x:%02x:%02x.%x) Pin %c\n", __func__, vdev->host.domain,
+            vdev->host.bus, vdev->host.slot, vdev->host.function,
+            'A' + vdev->intx.pin);
+
+    vdev->intx.pending = true;
+    qemu_set_irq(vdev->pdev.irq[vdev->intx.pin], 1);
+}
+
+static void vfio_eoi(VFIODevice *vdev)
+{
+    if (!vdev->intx.pending) {
+        return;
+    }
+
+    DPRINTF("%s(%04x:%02x:%02x.%x) EOI\n", __func__, vdev->host.domain,
+            vdev->host.bus, vdev->host.slot, vdev->host.function);
+
+    vdev->intx.pending = false;
+    qemu_set_irq(vdev->pdev.irq[vdev->intx.pin], 0);
+    vfio_unmask_intx(vdev);
+}
+
+typedef struct QEMU_PACKED VFIOIRQSetFD {
+    struct vfio_irq_set irq_set;
+    int32_t fd;
+} VFIOIRQSetFD;
+
+static int vfio_enable_intx(VFIODevice *vdev)
+{
+    VFIOIRQSetFD irq_set_fd = {
+        .irq_set = {
+            .argsz = sizeof(irq_set_fd),
+            .flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER,
+            .index = VFIO_PCI_INTX_IRQ_INDEX,
+            .start = 0,
+            .count = 1,
+        },
+    };
+    uint8_t pin = vfio_pci_read_config(&vdev->pdev, PCI_INTERRUPT_PIN, 1);
+    int ret;
+
+    if (vdev->intx.disabled || !pin) {
+        return 0;
+    }
+
+    vfio_disable_interrupts(vdev);
+
+    vdev->intx.pin = pin - 1; /* Pin A (1) -> irq[0] */
+    ret = event_notifier_init(&vdev->intx.interrupt, 0);
+    if (ret) {
+        error_report("vfio: Error: event_notifier_init failed\n");
+        return ret;
+    }
+
+    irq_set_fd.fd = event_notifier_get_fd(&vdev->intx.interrupt);
+    qemu_set_fd_handler(irq_set_fd.fd, vfio_intx_interrupt, NULL, vdev);
+
+    if (ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, &irq_set_fd)) {
+        error_report("vfio: Error: Failed to setup INTx fd: %m\n");
+        return -errno;
+    }
+
+    /*
+     * Disable mmaps so we can trap on BAR accesses.  We interpret any
+     * access as a response to an interrupt and unmask the physical
+     * device.  The device will re-assert if the interrupt is still
+     * pending.  We'll likely retrigger on the host multiple times per
+     * guest interrupt, but without EOI notification it's better than
+     * nothing.  Acceleration paths through KVM will avoid this.
+     */
+    vfio_mmap_set_enabled(vdev, false);
+
+    vdev->interrupt = VFIO_INT_INTx;
+
+    DPRINTF("%s(%04x:%02x:%02x.%x)\n", __func__, vdev->host.domain,
+            vdev->host.bus, vdev->host.slot, vdev->host.function);
+
+    return 0;
+}
+
+static void vfio_disable_intx(VFIODevice *vdev)
+{
+    int fd;
+
+    vfio_disable_irqindex(vdev, VFIO_PCI_INTX_IRQ_INDEX);
+    vdev->intx.pending = false;
+    qemu_set_irq(vdev->pdev.irq[vdev->intx.pin], 0);
+    vfio_mmap_set_enabled(vdev, true);
+
+    fd = event_notifier_get_fd(&vdev->intx.interrupt);
+    qemu_set_fd_handler(fd, NULL, NULL, vdev);
+    event_notifier_cleanup(&vdev->intx.interrupt);
+
+    vdev->interrupt = VFIO_INT_NONE;
+
+    DPRINTF("%s(%04x:%02x:%02x.%x)\n", __func__, vdev->host.domain,
+            vdev->host.bus, vdev->host.slot, vdev->host.function);
+}
+
+/*
+ * MSI/X
+ */
+static void vfio_msi_interrupt(void *opaque)
+{
+    VFIOMSIVector *vector = opaque;
+    VFIODevice *vdev = vector->vdev;
+    int nr = vector - vdev->msi_vectors;
+
+    if (!event_notifier_test_and_clear(&vector->interrupt)) {
+        return;
+    }
+
+    DPRINTF("%s(%04x:%02x:%02x.%x) vector %d\n", __func__,
+            vdev->host.domain, vdev->host.bus, vdev->host.slot,
+            vdev->host.function, nr);
+
+    if (vdev->interrupt == VFIO_INT_MSIX) {
+        msix_notify(&vdev->pdev, nr);
+    } else if (vdev->interrupt == VFIO_INT_MSI) {
+        msi_notify(&vdev->pdev, nr);
+    } else {
+        error_report("vfio: MSI interrupt receieved, but not enabled?\n");
+    }
+}
+
+static int vfio_enable_vectors(VFIODevice *vdev, bool msix)
+{
+    struct vfio_irq_set *irq_set;
+    int ret = 0, i, argsz;
+    int32_t *fds;
+
+    argsz = sizeof(*irq_set) + (vdev->nr_vectors * sizeof(*fds));
+
+    irq_set = g_malloc0(argsz);
+    irq_set->argsz = argsz;
+    irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER;
+    irq_set->index = msix ? VFIO_PCI_MSIX_IRQ_INDEX : VFIO_PCI_MSI_IRQ_INDEX;
+    irq_set->start = 0;
+    irq_set->count = vdev->nr_vectors;
+    fds = (int32_t *)&irq_set->data;
+
+    for (i = 0; i < vdev->nr_vectors; i++) {
+        if (!vdev->msi_vectors[i].use) {
+            fds[i] = -1;
+            continue;
+        }
+
+        fds[i] = event_notifier_get_fd(&vdev->msi_vectors[i].interrupt);
+    }
+
+    ret = ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, irq_set);
+
+    g_free(irq_set);
+
+    if (!ret) {
+        vdev->interrupt = msix ? VFIO_INT_MSIX : VFIO_INT_MSI;
+    }
+
+    return ret;
+}
+
+static int vfio_msix_vector_use(PCIDevice *pdev,
+                                unsigned int nr, MSIMessage msg)
+{
+    VFIODevice *vdev = DO_UPCAST(VFIODevice, pdev, pdev);
+    VFIOMSIVector *vector;
+    int ret;
+
+    DPRINTF("%s(%04x:%02x:%02x.%x) vector %d used\n", __func__,
+            vdev->host.domain, vdev->host.bus, vdev->host.slot,
+            vdev->host.function, nr);
+
+    if (vdev->interrupt != VFIO_INT_MSIX) {
+        vfio_disable_interrupts(vdev);
+    }
+
+    if (!vdev->msi_vectors) {
+        vdev->msi_vectors = g_malloc0(vdev->msix->entries *
+                                      sizeof(VFIOMSIVector));
+    }
+
+    vector = &vdev->msi_vectors[nr];
+    vector->vdev = vdev;
+    vector->use = true;
+
+    msix_vector_use(pdev, nr);
+
+    if (event_notifier_init(&vector->interrupt, 0)) {
+        error_report("vfio: Error: event_notifier_init failed\n");
+    }
+
+    /*
+     * Attempt to enable route through KVM irqchip,
+     * default to userspace handling if unavailable.
+     */
+    vector->virq = kvm_irqchip_add_msi_route(kvm_state, msg);
+    if (vector->virq < 0 ||
+        kvm_irqchip_add_irqfd_notifier(kvm_state, &vector->interrupt,
+                                       vector->virq) < 0) {
+        if (vector->virq >= 0) {
+            kvm_irqchip_release_virq(kvm_state, vector->virq);
+            vector->virq = -1;
+        }
+        qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt),
+                            vfio_msi_interrupt, NULL, vector);
+    }
+
+    /*
+     * We don't want to have the host allocate all possible MSI vectors
+     * for a device if they're not in use, so we shutdown and incrementally
+     * increase them as needed.
+     */
+    if (vdev->nr_vectors < nr + 1) {
+        int i;
+
+        vfio_disable_irqindex(vdev, VFIO_PCI_MSIX_IRQ_INDEX);
+        vdev->nr_vectors = nr + 1;
+        ret = vfio_enable_vectors(vdev, true);
+        if (ret) {
+            error_report("vfio: failed to enable vectors, %d\n", ret);
+        }
+
+        /* We don't know if we've missed interrupts in the interim... */
+        for (i = 0; i < vdev->msix->entries; i++) {
+            if (vdev->msi_vectors[i].use) {
+                msix_notify(&vdev->pdev, i);
+            }
+        }
+    } else {
+        VFIOIRQSetFD irq_set_fd = {
+            .irq_set = {
+                .argsz = sizeof(irq_set_fd),
+                .flags = VFIO_IRQ_SET_DATA_EVENTFD |
+                         VFIO_IRQ_SET_ACTION_TRIGGER,
+                .index = VFIO_PCI_MSIX_IRQ_INDEX,
+                .start = nr,
+                .count = 1,
+            },
+            .fd = event_notifier_get_fd(&vector->interrupt),
+        };
+        ret = ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, &irq_set_fd);
+        if (ret) {
+            error_report("vfio: failed to modify vector, %d\n", ret);
+        }
+
+        /*
+         * If we were connected to the hardware PBA we could skip this,
+         * until then, a spurious interrupt is better than starvation.
+         */
+        msix_notify(&vdev->pdev, nr);
+    }
+
+    return 0;
+}
+
+static void vfio_msix_vector_release(PCIDevice *pdev, unsigned int nr)
+{
+    VFIODevice *vdev = DO_UPCAST(VFIODevice, pdev, pdev);
+    VFIOMSIVector *vector = &vdev->msi_vectors[nr];
+    VFIOIRQSetFD irq_set_fd = {
+        .irq_set = {
+            .argsz = sizeof(irq_set_fd),
+            .flags = VFIO_IRQ_SET_DATA_EVENTFD |
+                     VFIO_IRQ_SET_ACTION_TRIGGER,
+            .index = VFIO_PCI_MSIX_IRQ_INDEX,
+            .start = nr,
+            .count = 1,
+        },
+        .fd = -1,
+    };
+
+    DPRINTF("%s(%04x:%02x:%02x.%x) vector %d released\n", __func__,
+            vdev->host.domain, vdev->host.bus, vdev->host.slot,
+            vdev->host.function, nr);
+
+    /*
+     * XXX What's the right thing to do here?  This turns off the interrupt
+     * completely, but do we really just want to switch the interrupt to
+     * bouncing through userspace and let msix.c drop it?  Not sure.
+     */
+    msix_vector_unuse(pdev, nr);
+    ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, &irq_set_fd);
+
+    if (vector->virq < 0) {
+        qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt),
+                            NULL, NULL, NULL);
+    } else {
+        kvm_irqchip_remove_irqfd_notifier(kvm_state, &vector->interrupt,
+                                          vector->virq);
+        kvm_irqchip_release_virq(kvm_state, vector->virq);
+        vector->virq = -1;
+    }
+
+    event_notifier_cleanup(&vector->interrupt);
+    vector->use = false;
+}
+
+/* TODO This should move to msi.c */
+static MSIMessage msi_get_msg(PCIDevice *pdev, unsigned int vector)
+{
+    uint16_t flags = pci_get_word(pdev->config + pdev->msi_cap + PCI_MSI_FLAGS);
+    bool msi64bit = flags & PCI_MSI_FLAGS_64BIT;
+    MSIMessage msg;
+
+    if (msi64bit) {
+        msg.address = pci_get_quad(pdev->config +
+                                   pdev->msi_cap + PCI_MSI_ADDRESS_LO);
+    } else {
+        msg.address = pci_get_long(pdev->config +
+                                   pdev->msi_cap + PCI_MSI_ADDRESS_LO);
+    }
+
+    msg.data = pci_get_word(pdev->config + pdev->msi_cap +
+                            (msi64bit ? PCI_MSI_DATA_64 : PCI_MSI_DATA_32));
+    msg.data += vector;
+
+    return msg;
+}
+
+/* So should this */
+static void msi_set_qsize(PCIDevice *pdev, uint8_t size)
+{
+    uint8_t *config = pdev->config + pdev->msi_cap;
+    uint16_t flags;
+
+    flags = pci_get_word(config + PCI_MSI_FLAGS);
+    flags = le16_to_cpu(flags);
+    flags &= ~PCI_MSI_FLAGS_QSIZE;
+    flags |= (size & 0x7) << 4;
+    flags = cpu_to_le16(flags);
+    pci_set_word(config + PCI_MSI_FLAGS, flags);
+}
+
+static void vfio_enable_msi(VFIODevice *vdev)
+{
+    int ret, i;
+
+    vfio_disable_interrupts(vdev);
+
+    vdev->nr_vectors = msi_nr_vectors_allocated(&vdev->pdev);
+retry:
+    vdev->msi_vectors = g_malloc0(vdev->nr_vectors * sizeof(VFIOMSIVector));
+
+    for (i = 0; i < vdev->nr_vectors; i++) {
+        MSIMessage msg;
+        VFIOMSIVector *vector = &vdev->msi_vectors[i];
+
+        vector->vdev = vdev;
+        vector->use = true;
+
+        if (event_notifier_init(&vector->interrupt, 0)) {
+            error_report("vfio: Error: event_notifier_init failed\n");
+        }
+
+        msg = msi_get_msg(&vdev->pdev, i);
+
+        /*
+         * Attempt to enable route through KVM irqchip,
+         * default to userspace handling if unavailable.
+         */
+        vector->virq = kvm_irqchip_add_msi_route(kvm_state, msg);
+        if (vector->virq < 0 ||
+            kvm_irqchip_add_irqfd_notifier(kvm_state, &vector->interrupt,
+                                           vector->virq) < 0) {
+            qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt),
+                                vfio_msi_interrupt, NULL, vector);
+        }
+    }
+
+    ret = vfio_enable_vectors(vdev, false);
+    if (ret) {
+        if (ret < 0) {
+            error_report("vfio: Error: Failed to setup MSI fds: %m\n");
+        } else if (ret != vdev->nr_vectors) {
+            error_report("vfio: Error: Failed to enable %d "
+                         "MSI vectors, retry with %d\n", vdev->nr_vectors, ret);
+        }
+
+        for (i = 0; i < vdev->nr_vectors; i++) {
+            VFIOMSIVector *vector = &vdev->msi_vectors[i];
+            if (vector->virq >= 0) {
+                kvm_irqchip_remove_irqfd_notifier(kvm_state, &vector->interrupt,
+                                                  vector->virq);
+                kvm_irqchip_release_virq(kvm_state, vector->virq);
+                vector->virq = -1;
+            } else {
+                qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt),
+                                    NULL, NULL, NULL);
+            }
+            event_notifier_cleanup(&vector->interrupt);
+        }
+
+        g_free(vdev->msi_vectors);
+
+        if (ret > 0 && ret != vdev->nr_vectors) {
+            vdev->nr_vectors = ret;
+            goto retry;
+        }
+        vdev->nr_vectors = 0;
+
+        return;
+    }
+
+    msi_set_qsize(&vdev->pdev, vdev->nr_vectors);
+
+    DPRINTF("%s(%04x:%02x:%02x.%x) Enabled %d MSI vectors\n", __func__,
+            vdev->host.domain, vdev->host.bus, vdev->host.slot,
+            vdev->host.function, vdev->nr_vectors);
+}
+
+static void vfio_disable_msi_x(VFIODevice *vdev, bool msix)
+{
+    int i;
+
+    vfio_disable_irqindex(vdev, msix ? VFIO_PCI_MSIX_IRQ_INDEX :
+                                       VFIO_PCI_MSI_IRQ_INDEX);
+
+    for (i = 0; i < vdev->nr_vectors; i++) {
+        VFIOMSIVector *vector = &vdev->msi_vectors[i];
+
+        if (!vector->use) {
+            continue;
+        }
+
+        if (vector->virq >= 0) {
+            kvm_irqchip_remove_irqfd_notifier(kvm_state,
+                                              &vector->interrupt, vector->virq);
+            kvm_irqchip_release_virq(kvm_state, vector->virq);
+            vector->virq = -1;
+        } else {
+            qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt),
+                                NULL, NULL, NULL);
+        }
+
+        if (msix) {
+            msix_vector_unuse(&vdev->pdev, i);
+        }
+
+        event_notifier_cleanup(&vector->interrupt);
+    }
+
+    g_free(vdev->msi_vectors);
+    vdev->msi_vectors = NULL;
+    vdev->nr_vectors = 0;
+
+    if (!msix) {
+        msi_set_qsize(&vdev->pdev, 0); /* Actually still means 1 vector */
+    }
+
+    DPRINTF("%s(%04x:%02x:%02x.%x, msi%s)\n", __func__,
+            vdev->host.domain, vdev->host.bus, vdev->host.slot,
+            vdev->host.function, msix ? "x" : "");
+
+    vfio_enable_intx(vdev);
+}
+
+/*
+ * IO Port/MMIO - Beware of the endians, VFIO is always little endian
+ */
+static void vfio_bar_write(void *opaque, target_phys_addr_t addr,
+                           uint64_t data, unsigned size)
+{
+    VFIOBAR *bar = opaque;
+    union {
+        uint8_t byte;
+        uint16_t word;
+        uint32_t dword;
+        uint64_t qword;
+    } buf;
+
+    switch (size) {
+    case 1:
+        buf.byte = data;
+        break;
+    case 2:
+        buf.word = cpu_to_le16(data);
+        break;
+    case 4:
+        buf.dword = cpu_to_le32(data);
+        break;
+    default:
+        hw_error("vfio: unsupported write size, %d bytes\n", size);
+        break;
+    }
+
+    if (pwrite(bar->fd, &buf, size, bar->fd_offset + addr) != size) {
+        error_report("%s(,0x%"TARGET_PRIxPHYS", 0x%"PRIx64", %d) failed: %m\n",
+                     __func__, addr, data, size);
+    }
+
+    DPRINTF("%s(BAR%d+0x%"TARGET_PRIxPHYS", 0x%"PRIx64", %d)\n",
+            __func__, bar->nr, addr, data, size);
+
+    /*
+     * A read or write to a BAR always signals an INTx EOI.  This will
+     * do nothing if not pending (including not in INTx mode).  We assume
+     * that a BAR access is in response to an interrupt and that BAR
+     * accesses will service the interrupt.  Unfortunately, we don't know
+     * which access will service the interrupt, so we're potentially
+     * getting quite a few host interrupts per guest interrupt.
+     */
+    vfio_eoi(DO_UPCAST(VFIODevice, bars[bar->nr], bar));
+}
+
+static uint64_t vfio_bar_read(void *opaque,
+                              target_phys_addr_t addr, unsigned size)
+{
+    VFIOBAR *bar = opaque;
+    union {
+        uint8_t byte;
+        uint16_t word;
+        uint32_t dword;
+        uint64_t qword;
+    } buf;
+    uint64_t data = 0;
+
+    if (pread(bar->fd, &buf, size, bar->fd_offset + addr) != size) {
+        error_report("%s(,0x%"TARGET_PRIxPHYS", %d) failed: %m\n",
+                     __func__, addr, size);
+        return (uint64_t)-1;
+    }
+
+    switch (size) {
+    case 1:
+        data = buf.byte;
+        break;
+    case 2:
+        data = le16_to_cpu(buf.word);
+        break;
+    case 4:
+        data = le32_to_cpu(buf.dword);
+        break;
+    default:
+        hw_error("vfio: unsupported read size, %d bytes\n", size);
+        break;
+    }
+
+    DPRINTF("%s(BAR%d+0x%"TARGET_PRIxPHYS", %d) = 0x%"PRIx64"\n",
+            __func__, bar->nr, addr, size, data);
+
+    /* Same as write above */
+    vfio_eoi(DO_UPCAST(VFIODevice, bars[bar->nr], bar));
+
+    return data;
+}
+
+static const MemoryRegionOps vfio_bar_ops = {
+    .read = vfio_bar_read,
+    .write = vfio_bar_write,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+};
+
+/*
+ * PCI config space
+ */
+static uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len)
+{
+    VFIODevice *vdev = DO_UPCAST(VFIODevice, pdev, pdev);
+    uint32_t val = 0;
+
+    /*
+     * We only need QEMU PCI config support for the ROM BAR, the MSI and MSIX
+     * capabilities, and the multifunction bit below.  We let VFIO handle
+     * virtualizing everything else.  Performance is not a concern here.
+     */
+    if (ranges_overlap(addr, len, PCI_ROM_ADDRESS, 4) ||
+        (pdev->cap_present & QEMU_PCI_CAP_MSIX &&
+         ranges_overlap(addr, len, pdev->msix_cap, MSIX_CAP_LENGTH)) ||
+        (pdev->cap_present & QEMU_PCI_CAP_MSI &&
+         ranges_overlap(addr, len, pdev->msi_cap, vdev->msi_cap_size))) {
+
+        val = pci_default_read_config(pdev, addr, len);
+    } else {
+        if (pread(vdev->fd, &val, len, vdev->config_offset + addr) != len) {
+            error_report("%s(%04x:%02x:%02x.%x, 0x%x, 0x%x) failed: %m\n",
+                         __func__, vdev->host.domain, vdev->host.bus,
+                         vdev->host.slot, vdev->host.function, addr, len);
+            return -errno;
+        }
+        val = le32_to_cpu(val);
+    }
+
+    /* Multifunction bit is virualized in QEMU */
+    if (unlikely(ranges_overlap(addr, len, PCI_HEADER_TYPE, 1))) {
+        uint32_t mask = PCI_HEADER_TYPE_MULTI_FUNCTION;
+
+        if (len == 4) {
+            mask <<= 16;
+        }
+
+        if (pdev->cap_present & QEMU_PCI_CAP_MULTIFUNCTION) {
+            val |= mask;
+        } else {
+            val &= ~mask;
+        }
+    }
+
+    DPRINTF("%s(%04x:%02x:%02x.%x, @0x%x, len=0x%x) %x\n", __func__,
+            vdev->host.domain, vdev->host.bus, vdev->host.slot,
+            vdev->host.function, addr, len, val);
+
+    return val;
+}
+
+static void vfio_pci_write_config(PCIDevice *pdev, uint32_t addr,
+                                  uint32_t val, int len)
+{
+    VFIODevice *vdev = DO_UPCAST(VFIODevice, pdev, pdev);
+    uint32_t val_le = cpu_to_le32(val);
+
+    DPRINTF("%s(%04x:%02x:%02x.%x, @0x%x, 0x%x, len=0x%x)\n", __func__,
+            vdev->host.domain, vdev->host.bus, vdev->host.slot,
+            vdev->host.function, addr, val, len);
+
+    /* Write everything to VFIO, let it filter out what we can't write */
+    if (pwrite(vdev->fd, &val_le, len, vdev->config_offset + addr) != len) {
+        error_report("%s(%04x:%02x:%02x.%x, 0x%x, 0x%x, 0x%x) failed: %m\n",
+                     __func__, vdev->host.domain, vdev->host.bus,
+                     vdev->host.slot, vdev->host.function, addr, val, len);
+    }
+
+    /* Write standard header bits to emulation */
+    if (addr < PCI_CONFIG_HEADER_SIZE) {
+        pci_default_write_config(pdev, addr, val, len);
+        return;
+    }
+
+    /* MSI/MSI-X Enabling/Disabling */
+    if (pdev->cap_present & QEMU_PCI_CAP_MSI &&
+        ranges_overlap(addr, len, pdev->msi_cap, vdev->msi_cap_size)) {
+        int is_enabled, was_enabled = msi_enabled(pdev);
+
+        pci_default_write_config(pdev, addr, val, len);
+
+        is_enabled = msi_enabled(pdev);
+
+        if (!was_enabled && is_enabled) {
+            vfio_enable_msi(vdev);
+        } else if (was_enabled && !is_enabled) {
+            vfio_disable_msi_x(vdev, false);
+        }
+    }
+
+    if (pdev->cap_present & QEMU_PCI_CAP_MSIX &&
+        ranges_overlap(addr, len, pdev->msix_cap, MSIX_CAP_LENGTH)) {
+        int is_enabled, was_enabled = msix_enabled(pdev);
+
+        pci_default_write_config(pdev, addr, val, len);
+
+        is_enabled = msix_enabled(pdev);
+
+        if (!was_enabled && is_enabled) {
+            /* vfio_msix_vector_use handles this automatically */
+        } else if (was_enabled && !is_enabled) {
+            vfio_disable_msi_x(vdev, true);
+        }
+    }
+}
+
+/*
+ * DMA - Mapping and unmapping for the "type1" IOMMU interface used on x86
+ */
+static int vfio_dma_map(VFIOContainer *container, target_phys_addr_t iova,
+                        ram_addr_t size, void *vaddr, bool readonly)
+{
+    struct vfio_iommu_type1_dma_map map = {
+        .argsz = sizeof(map),
+        .flags = VFIO_DMA_MAP_FLAG_READ,
+        .vaddr = (__u64)vaddr,
+        .iova = iova,
+        .size = size,
+    };
+
+    if (!readonly) {
+        map.flags |= VFIO_DMA_MAP_FLAG_WRITE;
+    }
+
+    if (ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map)) {
+        DPRINTF("VFIO_MAP_DMA: %d\n", -errno);
+        return -errno;
+    }
+
+    return 0;
+}
+
+static int vfio_dma_unmap(VFIOContainer *container,
+                          target_phys_addr_t iova, ram_addr_t size)
+{
+    struct vfio_iommu_type1_dma_unmap unmap = {
+        .argsz = sizeof(unmap),
+        .flags = 0,
+        .iova = iova,
+        .size = size,
+    };
+
+    if (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, &unmap)) {
+        DPRINTF("VFIO_UNMAP_DMA: %d\n", -errno);
+        return -errno;
+    }
+
+    return 0;
+}
+
+static void vfio_listener_dummy1(MemoryListener *listener)
+{
+    /* We don't do batching (begin/commit) or care about logging */
+}
+
+static void vfio_listener_dummy2(MemoryListener *listener,
+                                 MemoryRegionSection *section)
+{
+    /* We don't do logging or care about nops */
+}
+
+static void vfio_listener_dummy3(MemoryListener *listener,
+                                 MemoryRegionSection *section,
+                                 bool match_data, uint64_t data,
+                                 EventNotifier *e)
+{
+    /* We don't care about eventfds */
+}
+
+static bool vfio_listener_skipped_section(MemoryRegionSection *section)
+{
+    return !memory_region_is_ram(section->mr);
+}
+
+static void vfio_listener_region_add(MemoryListener *listener,
+                                     MemoryRegionSection *section)
+{
+    VFIOContainer *container = container_of(listener, VFIOContainer,
+                                            iommu_data.listener);
+    target_phys_addr_t iova, end;
+    void *vaddr;
+    int ret;
+
+    if (vfio_listener_skipped_section(section)) {
+        DPRINTF("vfio: SKIPPING region_add %"TARGET_PRIxPHYS" - %"PRIx64"\n",
+                section->offset_within_address_space,
+                section->offset_within_address_space + section->size - 1);
+        return;
+    }
+
+    if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) !=
+                 (section->offset_within_region & ~TARGET_PAGE_MASK))) {
+        error_report("%s received unaligned region\n", __func__);
+        return;
+    }
+
+    iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
+    end = (section->offset_within_address_space + section->size) &
+          TARGET_PAGE_MASK;
+
+    if (iova >= end) {
+        return;
+    }
+
+    vaddr = memory_region_get_ram_ptr(section->mr) +
+            section->offset_within_region +
+            (iova - section->offset_within_address_space);
+
+    DPRINTF("vfio: region_add %"TARGET_PRIxPHYS" - %"TARGET_PRIxPHYS" [%p]\n",
+            iova, end - 1, vaddr);
+
+    ret = vfio_dma_map(container, iova, end - iova, vaddr, section->readonly);
+    if (ret) {
+        error_report("vfio_dma_map(%p, 0x%"TARGET_PRIxPHYS", "
+                     "0x%"TARGET_PRIxPHYS", %p) = %d (%m)\n",
+                     container, iova, end - iova, vaddr, ret);
+    }
+}
+
+static void vfio_listener_region_del(MemoryListener *listener,
+                                     MemoryRegionSection *section)
+{
+    VFIOContainer *container = container_of(listener, VFIOContainer,
+                                            iommu_data.listener);
+    target_phys_addr_t iova, end;
+    int ret;
+
+    if (vfio_listener_skipped_section(section)) {
+        DPRINTF("vfio: SKIPPING region_del %"TARGET_PRIxPHYS" - %"PRIx64"\n",
+                section->offset_within_address_space,
+                section->offset_within_address_space + section->size - 1);
+        return;
+    }
+
+    if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) !=
+                 (section->offset_within_region & ~TARGET_PAGE_MASK))) {
+        error_report("%s received unaligned region\n", __func__);
+        return;
+    }
+
+    iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
+    end = (section->offset_within_address_space + section->size) &
+          TARGET_PAGE_MASK;
+
+    if (iova >= end) {
+        return;
+    }
+
+    DPRINTF("vfio: region_del %"TARGET_PRIxPHYS" - %"TARGET_PRIxPHYS"\n",
+            iova, end - 1);
+
+    ret = vfio_dma_unmap(container, iova, end - iova);
+    if (ret) {
+        error_report("vfio_dma_unmap(%p, 0x%"TARGET_PRIxPHYS", "
+                     "0x%"TARGET_PRIxPHYS") = %d (%m)\n",
+                     container, iova, end - iova, ret);
+    }
+}
+
+static MemoryListener vfio_memory_listener = {
+    .begin = vfio_listener_dummy1,
+    .commit = vfio_listener_dummy1,
+    .region_add = vfio_listener_region_add,
+    .region_del = vfio_listener_region_del,
+    .region_nop = vfio_listener_dummy2,
+    .log_start = vfio_listener_dummy2,
+    .log_stop = vfio_listener_dummy2,
+    .log_sync = vfio_listener_dummy2,
+    .log_global_start = vfio_listener_dummy1,
+    .log_global_stop = vfio_listener_dummy1,
+    .eventfd_add = vfio_listener_dummy3,
+    .eventfd_del = vfio_listener_dummy3,
+};
+
+static void vfio_listener_release(VFIOContainer *container)
+{
+    memory_listener_unregister(&container->iommu_data.listener);
+}
+
+/*
+ * Interrupt setup
+ */
+static void vfio_disable_interrupts(VFIODevice *vdev)
+{
+    switch (vdev->interrupt) {
+    case VFIO_INT_INTx:
+        vfio_disable_intx(vdev);
+        break;
+    case VFIO_INT_MSI:
+        vfio_disable_msi_x(vdev, false);
+        break;
+    case VFIO_INT_MSIX:
+        vfio_disable_msi_x(vdev, true);
+        break;
+    }
+}
+
+static int vfio_setup_msi(VFIODevice *vdev, int pos)
+{
+    uint16_t ctrl;
+    bool msi_64bit, msi_maskbit;
+    int ret, entries;
+
+    /*
+     * TODO: don't peek into msi_supported, let msi_init fail and
+     * check for ENOTSUP
+     */
+    if (!msi_supported) {
+        return 0;
+    }
+
+    if (pread(vdev->fd, &ctrl, sizeof(ctrl),
+              vdev->config_offset + pos + PCI_CAP_FLAGS) != sizeof(ctrl)) {
+        return -errno;
+    }
+    ctrl = le16_to_cpu(ctrl);
+
+    msi_64bit = !!(ctrl & PCI_MSI_FLAGS_64BIT);
+    msi_maskbit = !!(ctrl & PCI_MSI_FLAGS_MASKBIT);
+    entries = 1 << ((ctrl & PCI_MSI_FLAGS_QMASK) >> 1);
+
+    DPRINTF("%04x:%02x:%02x.%x PCI MSI CAP @0x%x\n", vdev->host.domain,
+            vdev->host.bus, vdev->host.slot, vdev->host.function, pos);
+
+    ret = msi_init(&vdev->pdev, pos, entries, msi_64bit, msi_maskbit);
+    if (ret < 0) {
+        error_report("vfio: msi_init failed\n");
+        return ret;
+    }
+    vdev->msi_cap_size = 0xa + (msi_maskbit ? 0xa : 0) + (msi_64bit ? 0x4 : 0);
+
+    return 0;
+}
+
+/*
+ * We don't have any control over how pci_add_capability() inserts
+ * capabilities into the chain.  In order to setup MSI-X we need a
+ * MemoryRegion for the BAR.  In order to setup the BAR and not
+ * attempt to mmap the MSI-X table area, which VFIO won't allow, we
+ * need to first look for where the MSI-X table lives.  So we
+ * unfortunately split MSI-X setup across two functions.
+ */
+static int vfio_early_setup_msix(VFIODevice *vdev)
+{
+    uint8_t pos;
+    uint16_t ctrl;
+    uint32_t table, pba;
+
+    pos = pci_find_capability(&vdev->pdev, PCI_CAP_ID_MSIX);
+    if (!pos) {
+        return 0;
+    }
+
+    if (pread(vdev->fd, &ctrl, sizeof(ctrl),
+              vdev->config_offset + pos + PCI_CAP_FLAGS) != sizeof(ctrl)) {
+        return -errno;
+    }
+
+    if (pread(vdev->fd, &table, sizeof(table),
+              vdev->config_offset + pos + PCI_MSIX_TABLE) != sizeof(table)) {
+        return -errno;
+    }
+
+    if (pread(vdev->fd, &pba, sizeof(pba),
+              vdev->config_offset + pos + PCI_MSIX_PBA) != sizeof(pba)) {
+        return -errno;
+    }
+
+    ctrl = le16_to_cpu(ctrl);
+    table = le32_to_cpu(table);
+    pba = le32_to_cpu(pba);
+
+    vdev->msix = g_malloc0(sizeof(*(vdev->msix)));
+    vdev->msix->table_bar = table & PCI_MSIX_FLAGS_BIRMASK;
+    vdev->msix->table_offset = table & ~PCI_MSIX_FLAGS_BIRMASK;
+    vdev->msix->pba_bar = pba & PCI_MSIX_FLAGS_BIRMASK;
+    vdev->msix->pba_offset = pba & ~PCI_MSIX_FLAGS_BIRMASK;
+    vdev->msix->entries = (ctrl & PCI_MSIX_FLAGS_QSIZE) + 1;
+
+    DPRINTF("%04x:%02x:%02x.%x "
+            "PCI MSI-X CAP @0x%x, BAR %d, offset 0x%x, entries %d\n",
+            vdev->host.domain, vdev->host.bus, vdev->host.slot,
+            vdev->host.function, pos, vdev->msix->table_bar,
+            vdev->msix->table_offset, vdev->msix->entries);
+
+    return 0;
+}
+
+static int vfio_setup_msix(VFIODevice *vdev, int pos)
+{
+    int ret;
+
+    /*
+     * TODO: don't peek into msi_supported, let msix_init fail and
+     * check for ENOTSUP
+     */
+    if (!msi_supported) {
+        return 0;
+    }
+
+    ret = msix_init(&vdev->pdev, vdev->msix->entries,
+                    &vdev->bars[vdev->msix->table_bar].mem,
+                    vdev->msix->table_bar, vdev->msix->table_offset,
+                    &vdev->bars[vdev->msix->pba_bar].mem,
+                    vdev->msix->pba_bar, vdev->msix->pba_offset, pos);
+    if (ret < 0) {
+        error_report("vfio: msix_init failed\n");
+        return ret;
+    }
+
+    ret = msix_set_vector_notifiers(&vdev->pdev, vfio_msix_vector_use,
+                                    vfio_msix_vector_release);
+    if (ret) {
+        error_report("vfio: msix_set_vector_notifiers failed %d\n", ret);
+        msix_uninit(&vdev->pdev, &vdev->bars[vdev->msix->table_bar].mem,
+                    &vdev->bars[vdev->msix->pba_bar].mem);
+        return ret;
+    }
+
+    return 0;
+}
+
+static void vfio_teardown_msi(VFIODevice *vdev)
+{
+    msi_uninit(&vdev->pdev);
+
+    if (vdev->msix) {
+        /* FIXME: Why can't unset just silently do nothing?? */
+        if (vdev->pdev.msix_vector_use_notifier &&
+            vdev->pdev.msix_vector_release_notifier) {
+            msix_unset_vector_notifiers(&vdev->pdev);
+        }
+
+        msix_uninit(&vdev->pdev, &vdev->bars[vdev->msix->table_bar].mem,
+                    &vdev->bars[vdev->msix->pba_bar].mem);
+    }
+}
+
+/*
+ * Resource setup
+ */
+static void vfio_mmap_set_enabled(VFIODevice *vdev, bool enabled)
+{
+    int i;
+
+    for (i = 0; i < PCI_ROM_SLOT; i++) {
+        VFIOBAR *bar = &vdev->bars[i];
+
+        if (!bar->size) {
+            continue;
+        }
+
+        memory_region_set_enabled(&bar->mmap_mem, enabled);
+        if (vdev->msix && vdev->msix->table_bar == i) {
+            memory_region_set_enabled(&vdev->msix->mmap_mem, enabled);
+        }
+    }
+}
+
+static void vfio_unmap_bar(VFIODevice *vdev, int nr)
+{
+    VFIOBAR *bar = &vdev->bars[nr];
+
+    if (!bar->size) {
+        return;
+    }
+
+    memory_region_del_subregion(&bar->mem, &bar->mmap_mem);
+    munmap(bar->mmap, memory_region_size(&bar->mmap_mem));
+
+    if (vdev->msix && vdev->msix->table_bar == nr) {
+        memory_region_del_subregion(&bar->mem, &vdev->msix->mmap_mem);
+        munmap(vdev->msix->mmap, memory_region_size(&vdev->msix->mmap_mem));
+    }
+
+    memory_region_destroy(&bar->mem);
+}
+
+static int vfio_mmap_bar(VFIOBAR *bar, MemoryRegion *mem, MemoryRegion *submem,
+                         void **map, size_t size, off_t offset,
+                         const char *name)
+{
+    int ret = 0;
+
+    if (size && bar->flags & VFIO_REGION_INFO_FLAG_MMAP) {
+        int prot = 0;
+
+        if (bar->flags & VFIO_REGION_INFO_FLAG_READ) {
+            prot |= PROT_READ;
+        }
+
+        if (bar->flags & VFIO_REGION_INFO_FLAG_WRITE) {
+            prot |= PROT_WRITE;
+        }
+
+        *map = mmap(NULL, size, prot, MAP_SHARED,
+                    bar->fd, bar->fd_offset + offset);
+        if (*map == MAP_FAILED) {
+            *map = NULL;
+            ret = -errno;
+            goto empty_region;
+        }
+
+        memory_region_init_ram_ptr(submem, name, size, *map);
+    } else {
+empty_region:
+        /* Create a zero sized sub-region to make cleanup easy. */
+        memory_region_init(submem, name, 0);
+    }
+
+    memory_region_add_subregion(mem, offset, submem);
+
+    return ret;
+}
+
+static void vfio_map_bar(VFIODevice *vdev, int nr)
+{
+    VFIOBAR *bar = &vdev->bars[nr];
+    unsigned size = bar->size;
+    char name[64];
+    uint32_t pci_bar;
+    uint8_t type;
+    int ret;
+
+    /* Skip both unimplemented BARs and the upper half of 64bit BARS. */
+    if (!size) {
+        return;
+    }
+
+    snprintf(name, sizeof(name), "VFIO %04x:%02x:%02x.%x BAR %d",
+             vdev->host.domain, vdev->host.bus, vdev->host.slot,
+             vdev->host.function, nr);
+
+    /* Determine what type of BAR this is for registration */
+    ret = pread(vdev->fd, &pci_bar, sizeof(pci_bar),
+                vdev->config_offset + PCI_BASE_ADDRESS_0 + (4 * nr));
+    if (ret != sizeof(pci_bar)) {
+        error_report("vfio: Failed to read BAR %d (%m)\n", nr);
+        return;
+    }
+
+    pci_bar = le32_to_cpu(pci_bar);
+    type = pci_bar & (pci_bar & PCI_BASE_ADDRESS_SPACE_IO ?
+           ~PCI_BASE_ADDRESS_IO_MASK : ~PCI_BASE_ADDRESS_MEM_MASK);
+
+    /* A "slow" read/write mapping underlies all BARs */
+    memory_region_init_io(&bar->mem, &vfio_bar_ops, bar, name, size);
+    pci_register_bar(&vdev->pdev, nr, type, &bar->mem);
+
+    /*
+     * We can't mmap areas overlapping the MSIX vector table, so we
+     * potentially insert a direct-mapped subregion before and after it.
+     */
+    if (vdev->msix && vdev->msix->table_bar == nr) {
+        size = vdev->msix->table_offset & TARGET_PAGE_MASK;
+    }
+
+    strncat(name, " mmap", sizeof(name) - strlen(name) - 1);
+    if (vfio_mmap_bar(bar, &bar->mem,
+                      &bar->mmap_mem, &bar->mmap, size, 0, name)) {
+        error_report("%s unsupported. Performance may be slow\n", name);
+    }
+
+    if (vdev->msix && vdev->msix->table_bar == nr) {
+        unsigned start;
+
+        start = TARGET_PAGE_ALIGN(vdev->msix->table_offset +
+                                  (vdev->msix->entries * PCI_MSIX_ENTRY_SIZE));
+
+        size = start < bar->size ? bar->size - start : 0;
+        strncat(name, " msix-hi", sizeof(name) - strlen(name) - 1);
+        /* VFIOMSIXInfo contains another MemoryRegion for this mapping */
+        if (vfio_mmap_bar(bar, &bar->mem, &vdev->msix->mmap_mem,
+                          &vdev->msix->mmap, size, start, name)) {
+            error_report("%s unsupported. Performance may be slow\n", name);
+        }
+    }
+}
+
+static void vfio_map_bars(VFIODevice *vdev)
+{
+    int i;
+
+    for (i = 0; i < PCI_ROM_SLOT; i++) {
+        vfio_map_bar(vdev, i);
+    }
+}
+
+static void vfio_unmap_bars(VFIODevice *vdev)
+{
+    int i;
+
+    for (i = 0; i < PCI_ROM_SLOT; i++) {
+        vfio_unmap_bar(vdev, i);
+    }
+}
+
+/*
+ * General setup
+ */
+static uint8_t vfio_std_cap_max_size(PCIDevice *pdev, uint8_t pos)
+{
+    uint8_t tmp, next = 0xff;
+
+    for (tmp = pdev->config[PCI_CAPABILITY_LIST]; tmp;
+         tmp = pdev->config[tmp + 1]) {
+        if (tmp > pos && tmp < next) {
+            next = tmp;
+        }
+    }
+
+    return next - pos;
+}
+
+static int vfio_add_std_cap(VFIODevice *vdev, uint8_t pos)
+{
+    PCIDevice *pdev = &vdev->pdev;
+    uint8_t cap_id, next, size;
+    int ret;
+
+    cap_id = pdev->config[pos];
+    next = pdev->config[pos + 1];
+
+    /*
+     * If it becomes important to configure capabilities to their actual
+     * size, use this as the default when it's something we don't recognize.
+     * Since QEMU doesn't actually handle many of the config accesses,
+     * exact size doesn't seem worthwhile.
+     */
+    size = vfio_std_cap_max_size(pdev, pos);
+
+    /*
+     * pci_add_capability always inserts the new capability at the head
+     * of the chain.  Therefore to end up with a chain that matches the
+     * physical device, we insert from the end by making this recursive.
+     * This is also why we pre-caclulate size above as cached config space
+     * will be changed as we unwind the stack.
+     */
+    if (next) {
+        ret = vfio_add_std_cap(vdev, next);
+        if (ret) {
+            return ret;
+        }
+    } else {
+        pdev->config[PCI_CAPABILITY_LIST] = 0; /* Begin the rebuild */
+    }
+
+    switch (cap_id) {
+    case PCI_CAP_ID_MSI:
+        ret = vfio_setup_msi(vdev, pos);
+        break;
+    case PCI_CAP_ID_MSIX:
+        ret = vfio_setup_msix(vdev, pos);
+        break;
+    default:
+        ret = pci_add_capability(pdev, cap_id, pos, size);
+        break;
+    }
+
+    if (ret < 0) {
+        error_report("vfio: %04x:%02x:%02x.%x Error adding PCI capability "
+                     "0x%x[0x%x]@0x%x: %d\n", vdev->host.domain,
+                     vdev->host.bus, vdev->host.slot, vdev->host.function,
+                     cap_id, size, pos, ret);
+        return ret;
+    }
+
+    return 0;
+}
+
+static int vfio_add_capabilities(VFIODevice *vdev)
+{
+    PCIDevice *pdev = &vdev->pdev;
+
+    if (!(pdev->config[PCI_STATUS] & PCI_STATUS_CAP_LIST) ||
+        !pdev->config[PCI_CAPABILITY_LIST]) {
+        return 0; /* Nothing to add */
+    }
+
+    return vfio_add_std_cap(vdev, pdev->config[PCI_CAPABILITY_LIST]);
+}
+
+static int vfio_load_rom(VFIODevice *vdev)
+{
+    uint64_t size = vdev->rom_size;
+    char name[32];
+    off_t off = 0, voff = vdev->rom_offset;
+    ssize_t bytes;
+    void *ptr;
+
+    /* If loading ROM from file, pci handles it */
+    if (vdev->pdev.romfile || !vdev->pdev.rom_bar || !size) {
+        return 0;
+    }
+
+    DPRINTF("%s(%04x:%02x:%02x.%x)\n", __func__, vdev->host.domain,
+            vdev->host.bus, vdev->host.slot, vdev->host.function);
+
+    snprintf(name, sizeof(name), "vfio[%04x:%02x:%02x.%x].rom",
+             vdev->host.domain, vdev->host.bus, vdev->host.slot,
+             vdev->host.function);
+    memory_region_init_ram(&vdev->pdev.rom, name, size);
+    ptr = memory_region_get_ram_ptr(&vdev->pdev.rom);
+    memset(ptr, 0xff, size);
+
+    while (size) {
+        bytes = pread(vdev->fd, ptr + off, size, voff + off);
+        if (bytes == 0) {
+            break; /* expect that we could get back less than the ROM BAR */
+        } else if (bytes > 0) {
+            off += bytes;
+            size -= bytes;
+        } else {
+            if (errno == EINTR || errno == EAGAIN) {
+                continue;
+            }
+            error_report("vfio: Error reading device ROM: %m\n");
+            memory_region_destroy(&vdev->pdev.rom);
+            return -errno;
+        }
+    }
+
+    pci_register_bar(&vdev->pdev, PCI_ROM_SLOT, 0, &vdev->pdev.rom);
+    vdev->pdev.has_rom = true;
+    return 0;
+}
+
+static int vfio_connect_container(VFIOGroup *group)
+{
+    VFIOContainer *container;
+    int ret, fd;
+
+    if (group->container) {
+        return 0;
+    }
+
+    QLIST_FOREACH(container, &container_list, next) {
+        if (!ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) {
+            group->container = container;
+            QLIST_INSERT_HEAD(&container->group_list, group, container_next);
+            return 0;
+        }
+    }
+
+    fd = qemu_open("/dev/vfio/vfio", O_RDWR);
+    if (fd < 0) {
+        error_report("vfio: failed to open /dev/vfio/vfio: %m\n");
+        return -errno;
+    }
+
+    ret = ioctl(fd, VFIO_GET_API_VERSION);
+    if (ret != VFIO_API_VERSION) {
+        error_report("vfio: supported vfio version: %d, "
+                     "reported version: %d\n", VFIO_API_VERSION, ret);
+        close(fd);
+        return -EINVAL;
+    }
+
+    container = g_malloc0(sizeof(*container));
+    container->fd = fd;
+
+    if (ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_TYPE1_IOMMU)) {
+        ret = ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &fd);
+        if (ret) {
+            error_report("vfio: failed to set group container: %m\n");
+            g_free(container);
+            close(fd);
+            return -errno;
+        }
+
+        ret = ioctl(fd, VFIO_SET_IOMMU, VFIO_TYPE1_IOMMU);
+        if (ret) {
+            error_report("vfio: failed to set iommu for container: %m\n");
+            g_free(container);
+            close(fd);
+            return -errno;
+        }
+
+        container->iommu_data.listener = vfio_memory_listener;
+        container->iommu_data.release = vfio_listener_release;
+
+        memory_listener_register(&container->iommu_data.listener,
+                                 get_system_memory());
+    } else {
+        error_report("vfio: No available IOMMU models\n");
+        g_free(container);
+        close(fd);
+        return -EINVAL;
+    }
+
+    QLIST_INIT(&container->group_list);
+    QLIST_INSERT_HEAD(&container_list, container, next);
+
+    group->container = container;
+    QLIST_INSERT_HEAD(&container->group_list, group, container_next);
+
+    return 0;
+}
+
+static void vfio_disconnect_container(VFIOGroup *group)
+{
+    VFIOContainer *container = group->container;
+
+    if (ioctl(group->fd, VFIO_GROUP_UNSET_CONTAINER, &container->fd)) {
+        error_report("vfio: error disconnecting group %d from container\n",
+                     group->groupid);
+    }
+
+    QLIST_REMOVE(group, container_next);
+    group->container = NULL;
+
+    if (QLIST_EMPTY(&container->group_list)) {
+        if (container->iommu_data.release) {
+            container->iommu_data.release(container);
+        }
+        QLIST_REMOVE(container, next);
+        DPRINTF("vfio_disconnect_container: close container->fd\n");
+        close(container->fd);
+        g_free(container);
+    }
+}
+
+static VFIOGroup *vfio_get_group(int groupid)
+{
+    VFIOGroup *group;
+    char path[32];
+    struct vfio_group_status status = { .argsz = sizeof(status) };
+
+    QLIST_FOREACH(group, &group_list, next) {
+        if (group->groupid == groupid) {
+            return group;
+        }
+    }
+
+    group = g_malloc0(sizeof(*group));
+
+    snprintf(path, sizeof(path), "/dev/vfio/%d", groupid);
+    group->fd = qemu_open(path, O_RDWR);
+    if (group->fd < 0) {
+        error_report("vfio: error opening %s: %m\n", path);
+        g_free(group);
+        return NULL;
+    }
+
+    if (ioctl(group->fd, VFIO_GROUP_GET_STATUS, &status)) {
+        error_report("vfio: error getting group status: %m\n");
+        close(group->fd);
+        g_free(group);
+        return NULL;
+    }
+
+    if (!(status.flags & VFIO_GROUP_FLAGS_VIABLE)) {
+        error_report("vfio: error, group %d is not viable, please ensure "
+                     "all devices within the iommu_group are bound to their "
+                     "vfio bus driver.\n", groupid);
+        close(group->fd);
+        g_free(group);
+        return NULL;
+    }
+
+    group->groupid = groupid;
+    QLIST_INIT(&group->device_list);
+
+    if (vfio_connect_container(group)) {
+        error_report("vfio: failed to setup container for group %d\n", groupid);
+        close(group->fd);
+        g_free(group);
+        return NULL;
+    }
+
+    QLIST_INSERT_HEAD(&group_list, group, next);
+
+    return group;
+}
+
+static void vfio_put_group(VFIOGroup *group)
+{
+    if (!QLIST_EMPTY(&group->device_list)) {
+        return;
+    }
+
+    vfio_disconnect_container(group);
+    QLIST_REMOVE(group, next);
+    DPRINTF("vfio_put_group: close group->fd\n");
+    close(group->fd);
+    g_free(group);
+}
+
+static int vfio_get_device(VFIOGroup *group, const char *name, VFIODevice *vdev)
+{
+    struct vfio_device_info dev_info = { .argsz = sizeof(dev_info) };
+    struct vfio_region_info reg_info = { .argsz = sizeof(reg_info) };
+    int ret, i;
+
+    ret = ioctl(group->fd, VFIO_GROUP_GET_DEVICE_FD, name);
+    if (ret < 0) {
+        error_report("vfio: error getting device %s from group %d: %m\n",
+                     name, group->groupid);
+        error_report("Verify all devices in group %d are bound to vfio-pci "
+                     "or pci-stub and not already in use\n", group->groupid);
+        return ret;
+    }
+
+    vdev->fd = ret;
+    vdev->group = group;
+    QLIST_INSERT_HEAD(&group->device_list, vdev, next);
+
+    /* Sanity check device */
+    ret = ioctl(vdev->fd, VFIO_DEVICE_GET_INFO, &dev_info);
+    if (ret) {
+        error_report("vfio: error getting device info: %m\n");
+        goto error;
+    }
+
+    DPRINTF("Device %s flags: %u, regions: %u, irgs: %u\n", name,
+            dev_info.flags, dev_info.num_regions, dev_info.num_irqs);
+
+    if (!(dev_info.flags & VFIO_DEVICE_FLAGS_PCI)) {
+        error_report("vfio: Um, this isn't a PCI device\n");
+        goto error;
+    }
+
+    vdev->reset_works = !!(dev_info.flags & VFIO_DEVICE_FLAGS_RESET);
+    if (!vdev->reset_works) {
+        error_report("Warning, device %s does not support reset\n", name);
+    }
+
+    if (dev_info.num_regions != VFIO_PCI_NUM_REGIONS) {
+        error_report("vfio: unexpected number of io regions %u\n",
+                     dev_info.num_regions);
+        goto error;
+    }
+
+    if (dev_info.num_irqs != VFIO_PCI_NUM_IRQS) {
+        error_report("vfio: unexpected number of irqs %u\n", dev_info.num_irqs);
+        goto error;
+    }
+
+    for (i = VFIO_PCI_BAR0_REGION_INDEX; i < VFIO_PCI_ROM_REGION_INDEX; i++) {
+        reg_info.index = i;
+
+        ret = ioctl(vdev->fd, VFIO_DEVICE_GET_REGION_INFO, &reg_info);
+        if (ret) {
+            error_report("vfio: Error getting region %d info: %m\n", i);
+            goto error;
+        }
+
+        DPRINTF("Device %s region %d:\n", name, i);
+        DPRINTF("  size: 0x%lx, offset: 0x%lx, flags: 0x%lx\n",
+                (unsigned long)reg_info.size, (unsigned long)reg_info.offset,
+                (unsigned long)reg_info.flags);
+
+        vdev->bars[i].flags = reg_info.flags;
+        vdev->bars[i].size = reg_info.size;
+        vdev->bars[i].fd_offset = reg_info.offset;
+        vdev->bars[i].fd = vdev->fd;
+        vdev->bars[i].nr = i;
+    }
+
+    reg_info.index = VFIO_PCI_ROM_REGION_INDEX;
+
+    ret = ioctl(vdev->fd, VFIO_DEVICE_GET_REGION_INFO, &reg_info);
+    if (ret) {
+        error_report("vfio: Error getting ROM info: %m\n");
+        goto error;
+    }
+
+    DPRINTF("Device %s ROM:\n", name);
+    DPRINTF("  size: 0x%lx, offset: 0x%lx, flags: 0x%lx\n",
+            (unsigned long)reg_info.size, (unsigned long)reg_info.offset,
+            (unsigned long)reg_info.flags);
+
+    vdev->rom_size = reg_info.size;
+    vdev->rom_offset = reg_info.offset;
+
+    reg_info.index = VFIO_PCI_CONFIG_REGION_INDEX;
+
+    ret = ioctl(vdev->fd, VFIO_DEVICE_GET_REGION_INFO, &reg_info);
+    if (ret) {
+        error_report("vfio: Error getting config info: %m\n");
+        goto error;
+    }
+
+    DPRINTF("Device %s config:\n", name);
+    DPRINTF("  size: 0x%lx, offset: 0x%lx, flags: 0x%lx\n",
+            (unsigned long)reg_info.size, (unsigned long)reg_info.offset,
+            (unsigned long)reg_info.flags);
+
+    vdev->config_size = reg_info.size;
+    vdev->config_offset = reg_info.offset;
+
+error:
+    if (ret) {
+        QLIST_REMOVE(vdev, next);
+        vdev->group = NULL;
+        close(vdev->fd);
+    }
+    return ret;
+}
+
+static void vfio_put_device(VFIODevice *vdev)
+{
+    QLIST_REMOVE(vdev, next);
+    vdev->group = NULL;
+    DPRINTF("vfio_put_device: close vdev->fd\n");
+    close(vdev->fd);
+    if (vdev->msix) {
+        g_free(vdev->msix);
+        vdev->msix = NULL;
+    }
+}
+
+static int vfio_initfn(PCIDevice *pdev)
+{
+    VFIODevice *pvdev, *vdev = DO_UPCAST(VFIODevice, pdev, pdev);
+    VFIOGroup *group;
+    char path[PATH_MAX], iommu_group_path[PATH_MAX], *group_name;
+    ssize_t len;
+    struct stat st;
+    int groupid;
+    int ret;
+
+    /* Check that the host device exists */
+    snprintf(path, sizeof(path),
+             "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/",
+             vdev->host.domain, vdev->host.bus, vdev->host.slot,
+             vdev->host.function);
+    if (stat(path, &st) < 0) {
+        error_report("vfio: error: no such host device: %s\n", path);
+        return -errno;
+    }
+
+    strncat(path, "iommu_group", sizeof(path) - strlen(path) - 1);
+
+    len = readlink(path, iommu_group_path, PATH_MAX);
+    if (len <= 0) {
+        error_report("vfio: error no iommu_group for device\n");
+        return -errno;
+    }
+
+    iommu_group_path[len] = 0;
+    group_name = basename(iommu_group_path);
+
+    if (sscanf(group_name, "%d", &groupid) != 1) {
+        error_report("vfio: error reading %s: %m\n", path);
+        return -errno;
+    }
+
+    DPRINTF("%s(%04x:%02x:%02x.%x) group %d\n", __func__, vdev->host.domain,
+            vdev->host.bus, vdev->host.slot, vdev->host.function, groupid);
+
+    group = vfio_get_group(groupid);
+    if (!group) {
+        error_report("vfio: failed to get group %d\n", groupid);
+        return -ENOENT;
+    }
+
+    snprintf(path, sizeof(path), "%04x:%02x:%02x.%01x",
+            vdev->host.domain, vdev->host.bus, vdev->host.slot,
+            vdev->host.function);
+
+    QLIST_FOREACH(pvdev, &group->device_list, next) {
+        if (pvdev->host.domain == vdev->host.domain &&
+            pvdev->host.bus == vdev->host.bus &&
+            pvdev->host.slot == vdev->host.slot &&
+            pvdev->host.function == vdev->host.function) {
+
+            error_report("vfio: error: device %s is already attached\n", path);
+            vfio_put_group(group);
+            return -EBUSY;
+        }
+    }
+
+    ret = vfio_get_device(group, path, vdev);
+    if (ret) {
+        error_report("vfio: failed to get device %s\n", path);
+        vfio_put_group(group);
+        return ret;
+    }
+
+    /* Get a copy of config space */
+    ret = pread(vdev->fd, vdev->pdev.config,
+                MIN(pci_config_size(&vdev->pdev), vdev->config_size),
+                vdev->config_offset);
+    if (ret < (int)MIN(pci_config_size(&vdev->pdev), vdev->config_size)) {
+        ret = ret < 0 ? -errno : -EFAULT;
+        error_report("vfio: Failed to read device config space\n");
+        goto out_put;
+    }
+
+    /*
+     * Clear host resource mapping info.  If we choose not to register a
+     * BAR, such as might be the case with the option ROM, we can get
+     * confusing, unwritable, residual addresses from the host here.
+     */
+    memset(&vdev->pdev.config[PCI_BASE_ADDRESS_0], 0, 24);
+    memset(&vdev->pdev.config[PCI_ROM_ADDRESS], 0, 4);
+
+    vfio_load_rom(vdev);
+
+    ret = vfio_early_setup_msix(vdev);
+    if (ret) {
+        goto out_put;
+    }
+
+    vfio_map_bars(vdev);
+
+    ret = vfio_add_capabilities(vdev);
+    if (ret) {
+        goto out_teardown;
+    }
+
+    if (vfio_pci_read_config(&vdev->pdev, PCI_INTERRUPT_PIN, 1)) {
+        if (vdev->intx.intx && strcmp(vdev->intx.intx, "off")) {
+            error_report("vfio: Unknown option x-intx=%s, "
+                         "valid options: \"off\".\n", vdev->intx.intx);
+            ret = -EINVAL;
+            goto out_teardown;
+        }
+
+        if (vdev->intx.intx && !strcmp(vdev->intx.intx, "off")) {
+            vdev->intx.disabled = true;
+        }
+
+        ret = vfio_enable_intx(vdev);
+        if (ret) {
+            goto out_teardown;
+        }
+    }
+
+    return 0;
+
+out_teardown:
+    pci_device_set_intx_routing_notifier(&vdev->pdev, NULL);
+    vfio_teardown_msi(vdev);
+    vfio_unmap_bars(vdev);
+out_put:
+    vfio_put_device(vdev);
+    vfio_put_group(group);
+    return ret;
+}
+
+static void vfio_exitfn(PCIDevice *pdev)
+{
+    VFIODevice *vdev = DO_UPCAST(VFIODevice, pdev, pdev);
+    VFIOGroup *group = vdev->group;
+
+    pci_device_set_intx_routing_notifier(&vdev->pdev, NULL);
+    vfio_disable_interrupts(vdev);
+    vfio_teardown_msi(vdev);
+    vfio_unmap_bars(vdev);
+    vfio_put_device(vdev);
+    vfio_put_group(group);
+}
+
+static void vfio_pci_reset(DeviceState *dev)
+{
+    PCIDevice *pdev = DO_UPCAST(PCIDevice, qdev, dev);
+    VFIODevice *vdev = DO_UPCAST(VFIODevice, pdev, pdev);
+
+    if (!vdev->reset_works) {
+        return;
+    }
+
+    if (ioctl(vdev->fd, VFIO_DEVICE_RESET)) {
+        error_report("vfio: Error unable to reset physical device "
+                     "(%04x:%02x:%02x.%x): %m\n", vdev->host.domain,
+                     vdev->host.bus, vdev->host.slot, vdev->host.function);
+    }
+}
+
+static Property vfio_pci_dev_properties[] = {
+    DEFINE_PROP_PCI_HOST_DEVADDR("host", VFIODevice, host),
+    DEFINE_PROP_STRING("x-intx", VFIODevice, intx.intx),
+    /*
+     * TODO - support passed fds... is this necessary?
+     * DEFINE_PROP_STRING("vfiofd", VFIODevice, vfiofd_name),
+     * DEFINE_PROP_STRING("vfiogroupfd, VFIODevice, vfiogroupfd_name),
+     */
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+
+static void vfio_pci_dev_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    PCIDeviceClass *pdc = PCI_DEVICE_CLASS(klass);
+
+    dc->reset = vfio_pci_reset;
+    dc->props = vfio_pci_dev_properties;
+    pdc->init = vfio_initfn;
+    pdc->exit = vfio_exitfn;
+    pdc->config_read = vfio_pci_read_config;
+    pdc->config_write = vfio_pci_write_config;
+}
+
+static const TypeInfo vfio_pci_dev_info = {
+    .name = "vfio-pci",
+    .parent = TYPE_PCI_DEVICE,
+    .instance_size = sizeof(VFIODevice),
+    .class_init = vfio_pci_dev_class_init,
+};
+
+static void register_vfio_pci_dev_type(void)
+{
+    type_register_static(&vfio_pci_dev_info);
+}
+
+type_init(register_vfio_pci_dev_type)
diff --git a/hw/vfio_pci_int.h b/hw/vfio_pci_int.h
new file mode 100644
index 0000000..3812d8d
--- /dev/null
+++ b/hw/vfio_pci_int.h
@@ -0,0 +1,114 @@
+/*
+ * vfio based device assignment support
+ *
+ * Copyright Red Hat, Inc. 2012
+ *
+ * Authors:
+ *  Alex Williamson <alex.williamson at redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#ifndef HW_VFIO_PCI_INT_H
+#define HW_VFIO_PCI_INT_H
+
+#include "qemu-common.h"
+#include "qemu-queue.h"
+#include "pci.h"
+#include "event_notifier.h"
+
+typedef struct VFIOBAR {
+    off_t fd_offset; /* offset of BAR within device fd */
+    int fd; /* device fd, allows us to pass VFIOBAR as opaque data */
+    MemoryRegion mem; /* slow, read/write access */
+    MemoryRegion mmap_mem; /* direct mapped access */
+    void *mmap;
+    size_t size;
+    uint32_t flags; /* VFIO region flags (rd/wr/mmap) */
+    uint8_t nr; /* cache the BAR number for debug */
+} VFIOBAR;
+
+typedef struct VFIOINTx {
+    bool pending; /* interrupt pending */
+    bool kvm_accel; /* set when QEMU bypass through KVM enabled */
+    uint8_t pin; /* which pin to pull for qemu_set_irq */
+    EventNotifier interrupt; /* eventfd triggered on interrupt */
+    EventNotifier unmask; /* eventfd for unmask on QEMU bypass */
+    PCIINTxRoute route; /* routing info for QEMU bypass */
+    bool disabled;
+    char *intx;
+} VFIOINTx;
+
+struct VFIODevice;
+
+typedef struct VFIOMSIVector {
+    EventNotifier interrupt; /* eventfd triggered on interrupt */
+    struct VFIODevice *vdev; /* back pointer to device */
+    int virq; /* KVM irqchip route for QEMU bypass */
+    bool use;
+} VFIOMSIVector;
+
+enum {
+    VFIO_INT_NONE = 0,
+    VFIO_INT_INTx = 1,
+    VFIO_INT_MSI  = 2,
+    VFIO_INT_MSIX = 3,
+};
+
+struct VFIOGroup;
+
+typedef struct VFIOContainer {
+    int fd; /* /dev/vfio/vfio, empowered by the attached groups */
+    struct {
+        /* enable abstraction to support various iommu backends */
+        union {
+            MemoryListener listener; /* Used by type1 iommu */
+        };
+        void (*release)(struct VFIOContainer *);
+    } iommu_data;
+    QLIST_HEAD(, VFIOGroup) group_list;
+    QLIST_ENTRY(VFIOContainer) next;
+} VFIOContainer;
+
+/* Cache of MSI-X setup plus extra mmap and memory region for split BAR map */
+typedef struct VFIOMSIXInfo {
+    uint8_t table_bar;
+    uint8_t pba_bar;
+    uint16_t entries;
+    uint32_t table_offset;
+    uint32_t pba_offset;
+    MemoryRegion mmap_mem;
+    void *mmap;
+} VFIOMSIXInfo;
+
+typedef struct VFIODevice {
+    PCIDevice pdev;
+    int fd;
+    VFIOINTx intx;
+    unsigned int config_size;
+    off_t config_offset; /* Offset of config space region within device fd */
+    unsigned int rom_size;
+    off_t rom_offset; /* Offset of ROM region within device fd */
+    int msi_cap_size;
+    VFIOMSIVector *msi_vectors;
+    VFIOMSIXInfo *msix;
+    int nr_vectors; /* Number of MSI/MSIX vectors currently in use */
+    int interrupt; /* Current interrupt type */
+    VFIOBAR bars[PCI_NUM_REGIONS - 1]; /* No ROM */
+    PCIHostDeviceAddress host;
+    QLIST_ENTRY(VFIODevice) next;
+    struct VFIOGroup *group;
+    bool reset_works;
+} VFIODevice;
+
+typedef struct VFIOGroup {
+    int fd;
+    int groupid;
+    VFIOContainer *container;
+    QLIST_HEAD(, VFIODevice) device_list;
+    QLIST_ENTRY(VFIOGroup) next;
+    QLIST_ENTRY(VFIOGroup) container_next;
+} VFIOGroup;
+
+#endif /* HW_VFIO_PCI_INT_H */
commit 883f0b85f0cbb8c31baeadfcbfd144a1ed29d935
Author: Alex Williamson <alex.williamson at redhat.com>
Date:   Wed Sep 26 11:19:25 2012 -0600

    Update Linux kernel headers
    
    Based on Linux as of 1a95620.
    
    Signed-off-by: Alex Williamson <alex.williamson at redhat.com>
    Acked-by: Michael S. Tsirkin <mst at redhat.com>
    Signed-off-by: Anthony Liguori <aliguori at us.ibm.com>

diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h
new file mode 100644
index 0000000..f787b72
--- /dev/null
+++ b/linux-headers/linux/vfio.h
@@ -0,0 +1,368 @@
+/*
+ * VFIO API definition
+ *
+ * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
+ *     Author: Alex Williamson <alex.williamson at redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef VFIO_H
+#define VFIO_H
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+#define VFIO_API_VERSION	0
+
+
+/* Kernel & User level defines for VFIO IOCTLs. */
+
+/* Extensions */
+
+#define VFIO_TYPE1_IOMMU		1
+
+/*
+ * The IOCTL interface is designed for extensibility by embedding the
+ * structure length (argsz) and flags into structures passed between
+ * kernel and userspace.  We therefore use the _IO() macro for these
+ * defines to avoid implicitly embedding a size into the ioctl request.
+ * As structure fields are added, argsz will increase to match and flag
+ * bits will be defined to indicate additional fields with valid data.
+ * It's *always* the caller's responsibility to indicate the size of
+ * the structure passed by setting argsz appropriately.
+ */
+
+#define VFIO_TYPE	(';')
+#define VFIO_BASE	100
+
+/* -------- IOCTLs for VFIO file descriptor (/dev/vfio/vfio) -------- */
+
+/**
+ * VFIO_GET_API_VERSION - _IO(VFIO_TYPE, VFIO_BASE + 0)
+ *
+ * Report the version of the VFIO API.  This allows us to bump the entire
+ * API version should we later need to add or change features in incompatible
+ * ways.
+ * Return: VFIO_API_VERSION
+ * Availability: Always
+ */
+#define VFIO_GET_API_VERSION		_IO(VFIO_TYPE, VFIO_BASE + 0)
+
+/**
+ * VFIO_CHECK_EXTENSION - _IOW(VFIO_TYPE, VFIO_BASE + 1, __u32)
+ *
+ * Check whether an extension is supported.
+ * Return: 0 if not supported, 1 (or some other positive integer) if supported.
+ * Availability: Always
+ */
+#define VFIO_CHECK_EXTENSION		_IO(VFIO_TYPE, VFIO_BASE + 1)
+
+/**
+ * VFIO_SET_IOMMU - _IOW(VFIO_TYPE, VFIO_BASE + 2, __s32)
+ *
+ * Set the iommu to the given type.  The type must be supported by an
+ * iommu driver as verified by calling CHECK_EXTENSION using the same
+ * type.  A group must be set to this file descriptor before this
+ * ioctl is available.  The IOMMU interfaces enabled by this call are
+ * specific to the value set.
+ * Return: 0 on success, -errno on failure
+ * Availability: When VFIO group attached
+ */
+#define VFIO_SET_IOMMU			_IO(VFIO_TYPE, VFIO_BASE + 2)
+
+/* -------- IOCTLs for GROUP file descriptors (/dev/vfio/$GROUP) -------- */
+
+/**
+ * VFIO_GROUP_GET_STATUS - _IOR(VFIO_TYPE, VFIO_BASE + 3,
+ *						struct vfio_group_status)
+ *
+ * Retrieve information about the group.  Fills in provided
+ * struct vfio_group_info.  Caller sets argsz.
+ * Return: 0 on succes, -errno on failure.
+ * Availability: Always
+ */
+struct vfio_group_status {
+	__u32	argsz;
+	__u32	flags;
+#define VFIO_GROUP_FLAGS_VIABLE		(1 << 0)
+#define VFIO_GROUP_FLAGS_CONTAINER_SET	(1 << 1)
+};
+#define VFIO_GROUP_GET_STATUS		_IO(VFIO_TYPE, VFIO_BASE + 3)
+
+/**
+ * VFIO_GROUP_SET_CONTAINER - _IOW(VFIO_TYPE, VFIO_BASE + 4, __s32)
+ *
+ * Set the container for the VFIO group to the open VFIO file
+ * descriptor provided.  Groups may only belong to a single
+ * container.  Containers may, at their discretion, support multiple
+ * groups.  Only when a container is set are all of the interfaces
+ * of the VFIO file descriptor and the VFIO group file descriptor
+ * available to the user.
+ * Return: 0 on success, -errno on failure.
+ * Availability: Always
+ */
+#define VFIO_GROUP_SET_CONTAINER	_IO(VFIO_TYPE, VFIO_BASE + 4)
+
+/**
+ * VFIO_GROUP_UNSET_CONTAINER - _IO(VFIO_TYPE, VFIO_BASE + 5)
+ *
+ * Remove the group from the attached container.  This is the
+ * opposite of the SET_CONTAINER call and returns the group to
+ * an initial state.  All device file descriptors must be released
+ * prior to calling this interface.  When removing the last group
+ * from a container, the IOMMU will be disabled and all state lost,
+ * effectively also returning the VFIO file descriptor to an initial
+ * state.
+ * Return: 0 on success, -errno on failure.
+ * Availability: When attached to container
+ */
+#define VFIO_GROUP_UNSET_CONTAINER	_IO(VFIO_TYPE, VFIO_BASE + 5)
+
+/**
+ * VFIO_GROUP_GET_DEVICE_FD - _IOW(VFIO_TYPE, VFIO_BASE + 6, char)
+ *
+ * Return a new file descriptor for the device object described by
+ * the provided string.  The string should match a device listed in
+ * the devices subdirectory of the IOMMU group sysfs entry.  The
+ * group containing the device must already be added to this context.
+ * Return: new file descriptor on success, -errno on failure.
+ * Availability: When attached to container
+ */
+#define VFIO_GROUP_GET_DEVICE_FD	_IO(VFIO_TYPE, VFIO_BASE + 6)
+
+/* --------------- IOCTLs for DEVICE file descriptors --------------- */
+
+/**
+ * VFIO_DEVICE_GET_INFO - _IOR(VFIO_TYPE, VFIO_BASE + 7,
+ *						struct vfio_device_info)
+ *
+ * Retrieve information about the device.  Fills in provided
+ * struct vfio_device_info.  Caller sets argsz.
+ * Return: 0 on success, -errno on failure.
+ */
+struct vfio_device_info {
+	__u32	argsz;
+	__u32	flags;
+#define VFIO_DEVICE_FLAGS_RESET	(1 << 0)	/* Device supports reset */
+#define VFIO_DEVICE_FLAGS_PCI	(1 << 1)	/* vfio-pci device */
+	__u32	num_regions;	/* Max region index + 1 */
+	__u32	num_irqs;	/* Max IRQ index + 1 */
+};
+#define VFIO_DEVICE_GET_INFO		_IO(VFIO_TYPE, VFIO_BASE + 7)
+
+/**
+ * VFIO_DEVICE_GET_REGION_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 8,
+ *				       struct vfio_region_info)
+ *
+ * Retrieve information about a device region.  Caller provides
+ * struct vfio_region_info with index value set.  Caller sets argsz.
+ * Implementation of region mapping is bus driver specific.  This is
+ * intended to describe MMIO, I/O port, as well as bus specific
+ * regions (ex. PCI config space).  Zero sized regions may be used
+ * to describe unimplemented regions (ex. unimplemented PCI BARs).
+ * Return: 0 on success, -errno on failure.
+ */
+struct vfio_region_info {
+	__u32	argsz;
+	__u32	flags;
+#define VFIO_REGION_INFO_FLAG_READ	(1 << 0) /* Region supports read */
+#define VFIO_REGION_INFO_FLAG_WRITE	(1 << 1) /* Region supports write */
+#define VFIO_REGION_INFO_FLAG_MMAP	(1 << 2) /* Region supports mmap */
+	__u32	index;		/* Region index */
+	__u32	resv;		/* Reserved for alignment */
+	__u64	size;		/* Region size (bytes) */
+	__u64	offset;		/* Region offset from start of device fd */
+};
+#define VFIO_DEVICE_GET_REGION_INFO	_IO(VFIO_TYPE, VFIO_BASE + 8)
+
+/**
+ * VFIO_DEVICE_GET_IRQ_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 9,
+ *				    struct vfio_irq_info)
+ *
+ * Retrieve information about a device IRQ.  Caller provides
+ * struct vfio_irq_info with index value set.  Caller sets argsz.
+ * Implementation of IRQ mapping is bus driver specific.  Indexes
+ * using multiple IRQs are primarily intended to support MSI-like
+ * interrupt blocks.  Zero count irq blocks may be used to describe
+ * unimplemented interrupt types.
+ *
+ * The EVENTFD flag indicates the interrupt index supports eventfd based
+ * signaling.
+ *
+ * The MASKABLE flags indicates the index supports MASK and UNMASK
+ * actions described below.
+ *
+ * AUTOMASKED indicates that after signaling, the interrupt line is
+ * automatically masked by VFIO and the user needs to unmask the line
+ * to receive new interrupts.  This is primarily intended to distinguish
+ * level triggered interrupts.
+ *
+ * The NORESIZE flag indicates that the interrupt lines within the index
+ * are setup as a set and new subindexes cannot be enabled without first
+ * disabling the entire index.  This is used for interrupts like PCI MSI
+ * and MSI-X where the driver may only use a subset of the available
+ * indexes, but VFIO needs to enable a specific number of vectors
+ * upfront.  In the case of MSI-X, where the user can enable MSI-X and
+ * then add and unmask vectors, it's up to userspace to make the decision
+ * whether to allocate the maximum supported number of vectors or tear
+ * down setup and incrementally increase the vectors as each is enabled.
+ */
+struct vfio_irq_info {
+	__u32	argsz;
+	__u32	flags;
+#define VFIO_IRQ_INFO_EVENTFD		(1 << 0)
+#define VFIO_IRQ_INFO_MASKABLE		(1 << 1)
+#define VFIO_IRQ_INFO_AUTOMASKED	(1 << 2)
+#define VFIO_IRQ_INFO_NORESIZE		(1 << 3)
+	__u32	index;		/* IRQ index */
+	__u32	count;		/* Number of IRQs within this index */
+};
+#define VFIO_DEVICE_GET_IRQ_INFO	_IO(VFIO_TYPE, VFIO_BASE + 9)
+
+/**
+ * VFIO_DEVICE_SET_IRQS - _IOW(VFIO_TYPE, VFIO_BASE + 10, struct vfio_irq_set)
+ *
+ * Set signaling, masking, and unmasking of interrupts.  Caller provides
+ * struct vfio_irq_set with all fields set.  'start' and 'count' indicate
+ * the range of subindexes being specified.
+ *
+ * The DATA flags specify the type of data provided.  If DATA_NONE, the
+ * operation performs the specified action immediately on the specified
+ * interrupt(s).  For example, to unmask AUTOMASKED interrupt [0,0]:
+ * flags = (DATA_NONE|ACTION_UNMASK), index = 0, start = 0, count = 1.
+ *
+ * DATA_BOOL allows sparse support for the same on arrays of interrupts.
+ * For example, to mask interrupts [0,1] and [0,3] (but not [0,2]):
+ * flags = (DATA_BOOL|ACTION_MASK), index = 0, start = 1, count = 3,
+ * data = {1,0,1}
+ *
+ * DATA_EVENTFD binds the specified ACTION to the provided __s32 eventfd.
+ * A value of -1 can be used to either de-assign interrupts if already
+ * assigned or skip un-assigned interrupts.  For example, to set an eventfd
+ * to be trigger for interrupts [0,0] and [0,2]:
+ * flags = (DATA_EVENTFD|ACTION_TRIGGER), index = 0, start = 0, count = 3,
+ * data = {fd1, -1, fd2}
+ * If index [0,1] is previously set, two count = 1 ioctls calls would be
+ * required to set [0,0] and [0,2] without changing [0,1].
+ *
+ * Once a signaling mechanism is set, DATA_BOOL or DATA_NONE can be used
+ * with ACTION_TRIGGER to perform kernel level interrupt loopback testing
+ * from userspace (ie. simulate hardware triggering).
+ *
+ * Setting of an event triggering mechanism to userspace for ACTION_TRIGGER
+ * enables the interrupt index for the device.  Individual subindex interrupts
+ * can be disabled using the -1 value for DATA_EVENTFD or the index can be
+ * disabled as a whole with: flags = (DATA_NONE|ACTION_TRIGGER), count = 0.
+ *
+ * Note that ACTION_[UN]MASK specify user->kernel signaling (irqfds) while
+ * ACTION_TRIGGER specifies kernel->user signaling.
+ */
+struct vfio_irq_set {
+	__u32	argsz;
+	__u32	flags;
+#define VFIO_IRQ_SET_DATA_NONE		(1 << 0) /* Data not present */
+#define VFIO_IRQ_SET_DATA_BOOL		(1 << 1) /* Data is bool (u8) */
+#define VFIO_IRQ_SET_DATA_EVENTFD	(1 << 2) /* Data is eventfd (s32) */
+#define VFIO_IRQ_SET_ACTION_MASK	(1 << 3) /* Mask interrupt */
+#define VFIO_IRQ_SET_ACTION_UNMASK	(1 << 4) /* Unmask interrupt */
+#define VFIO_IRQ_SET_ACTION_TRIGGER	(1 << 5) /* Trigger interrupt */
+	__u32	index;
+	__u32	start;
+	__u32	count;
+	__u8	data[];
+};
+#define VFIO_DEVICE_SET_IRQS		_IO(VFIO_TYPE, VFIO_BASE + 10)
+
+#define VFIO_IRQ_SET_DATA_TYPE_MASK	(VFIO_IRQ_SET_DATA_NONE | \
+					 VFIO_IRQ_SET_DATA_BOOL | \
+					 VFIO_IRQ_SET_DATA_EVENTFD)
+#define VFIO_IRQ_SET_ACTION_TYPE_MASK	(VFIO_IRQ_SET_ACTION_MASK | \
+					 VFIO_IRQ_SET_ACTION_UNMASK | \
+					 VFIO_IRQ_SET_ACTION_TRIGGER)
+/**
+ * VFIO_DEVICE_RESET - _IO(VFIO_TYPE, VFIO_BASE + 11)
+ *
+ * Reset a device.
+ */
+#define VFIO_DEVICE_RESET		_IO(VFIO_TYPE, VFIO_BASE + 11)
+
+/*
+ * The VFIO-PCI bus driver makes use of the following fixed region and
+ * IRQ index mapping.  Unimplemented regions return a size of zero.
+ * Unimplemented IRQ types return a count of zero.
+ */
+
+enum {
+	VFIO_PCI_BAR0_REGION_INDEX,
+	VFIO_PCI_BAR1_REGION_INDEX,
+	VFIO_PCI_BAR2_REGION_INDEX,
+	VFIO_PCI_BAR3_REGION_INDEX,
+	VFIO_PCI_BAR4_REGION_INDEX,
+	VFIO_PCI_BAR5_REGION_INDEX,
+	VFIO_PCI_ROM_REGION_INDEX,
+	VFIO_PCI_CONFIG_REGION_INDEX,
+	VFIO_PCI_NUM_REGIONS
+};
+
+enum {
+	VFIO_PCI_INTX_IRQ_INDEX,
+	VFIO_PCI_MSI_IRQ_INDEX,
+	VFIO_PCI_MSIX_IRQ_INDEX,
+	VFIO_PCI_NUM_IRQS
+};
+
+/* -------- API for Type1 VFIO IOMMU -------- */
+
+/**
+ * VFIO_IOMMU_GET_INFO - _IOR(VFIO_TYPE, VFIO_BASE + 12, struct vfio_iommu_info)
+ *
+ * Retrieve information about the IOMMU object. Fills in provided
+ * struct vfio_iommu_info. Caller sets argsz.
+ *
+ * XXX Should we do these by CHECK_EXTENSION too?
+ */
+struct vfio_iommu_type1_info {
+	__u32	argsz;
+	__u32	flags;
+#define VFIO_IOMMU_INFO_PGSIZES (1 << 0)	/* supported page sizes info */
+	__u64	iova_pgsizes;		/* Bitmap of supported page sizes */
+};
+
+#define VFIO_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12)
+
+/**
+ * VFIO_IOMMU_MAP_DMA - _IOW(VFIO_TYPE, VFIO_BASE + 13, struct vfio_dma_map)
+ *
+ * Map process virtual addresses to IO virtual addresses using the
+ * provided struct vfio_dma_map. Caller sets argsz. READ &/ WRITE required.
+ */
+struct vfio_iommu_type1_dma_map {
+	__u32	argsz;
+	__u32	flags;
+#define VFIO_DMA_MAP_FLAG_READ (1 << 0)		/* readable from device */
+#define VFIO_DMA_MAP_FLAG_WRITE (1 << 1)	/* writable from device */
+	__u64	vaddr;				/* Process virtual address */
+	__u64	iova;				/* IO virtual address */
+	__u64	size;				/* Size of mapping (bytes) */
+};
+
+#define VFIO_IOMMU_MAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 13)
+
+/**
+ * VFIO_IOMMU_UNMAP_DMA - _IOW(VFIO_TYPE, VFIO_BASE + 14, struct vfio_dma_unmap)
+ *
+ * Unmap IO virtual addresses using the provided struct vfio_dma_unmap.
+ * Caller sets argsz.
+ */
+struct vfio_iommu_type1_dma_unmap {
+	__u32	argsz;
+	__u32	flags;
+	__u64	iova;				/* IO virtual address */
+	__u64	size;				/* Size of mapping (bytes) */
+};
+
+#define VFIO_IOMMU_UNMAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 14)
+
+#endif /* VFIO_H */
commit df8c1b0207689a4bcc2396bf5327d5ea135a45bb
Author: Alex Williamson <alex.williamson at redhat.com>
Date:   Wed Sep 26 11:19:18 2012 -0600

    Update kernel header script to include vfio
    
    Signed-off-by: Alex Williamson <alex.williamson at redhat.com>
    Acked-by: Michael S. Tsirkin <mst at redhat.com>
    Signed-off-by: Anthony Liguori <aliguori at us.ibm.com>

diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh
index 53a6f87..67be2ef 100755
--- a/scripts/update-linux-headers.sh
+++ b/scripts/update-linux-headers.sh
@@ -57,7 +57,7 @@ done
 
 rm -rf "$output/linux-headers/linux"
 mkdir -p "$output/linux-headers/linux"
-for header in kvm.h kvm_para.h vhost.h virtio_config.h virtio_ring.h; do
+for header in kvm.h kvm_para.h vfio.h vhost.h virtio_config.h virtio_ring.h; do
     cp "$tmpdir/include/linux/$header" "$output/linux-headers/linux"
 done
 rm -rf "$output/linux-headers/asm-generic"
commit a9321a4d49d65d29c2926a51aedc5b91a01f3591
Author: H. Peter Anvin <hpa at linux.intel.com>
Date:   Wed Sep 26 13:18:43 2012 -0700

    x86: Implement SMEP and SMAP
    
    This patch implements Supervisor Mode Execution Prevention (SMEP) and
    Supervisor Mode Access Prevention (SMAP) for x86.  The purpose of the
    patch, obviously, is to help kernel developers debug the support for
    those features.
    
    A fair bit of the code relates to the handling of CPUID features.  The
    CPUID code probably would get greatly simplified if all the feature
    bit words were unified into a single vector object, but in the
    interest of producing a minimal patch for SMEP/SMAP, and because I had
    very limited time for this project, I followed the existing style.
    
    [ v2: don't change the definition of the qemu64 CPU shorthand, since
      that breaks loading old snapshots.  Per Anthony Liguori this can be
      fixed once the CPU feature set is snapshot.
    
      Change the coding style slightly to conform to checkpatch.pl. ]
    
    Signed-off-by: H. Peter Anvin <hpa at linux.intel.com>
    Signed-off-by: Anthony Liguori <aliguori at us.ibm.com>

diff --git a/target-i386/cc_helper.c b/target-i386/cc_helper.c
index 07892f9..9422003 100644
--- a/target-i386/cc_helper.c
+++ b/target-i386/cc_helper.c
@@ -353,6 +353,16 @@ void helper_sti(CPUX86State *env)
     env->eflags |= IF_MASK;
 }
 
+void helper_clac(CPUX86State *env)
+{
+    env->eflags &= ~AC_MASK;
+}
+
+void helper_stac(CPUX86State *env)
+{
+    env->eflags |= AC_MASK;
+}
+
 #if 0
 /* vm86plus instructions */
 void helper_cli_vm(CPUX86State *env)
diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index c4e6163..bb1e44e 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -104,6 +104,13 @@ static const char *svm_feature_name[] = {
     NULL, NULL, NULL, NULL,
 };
 
+static const char *cpuid_7_0_ebx_feature_name[] = {
+    NULL, NULL, NULL, NULL, NULL, NULL, NULL, "smep",
+    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+    NULL, NULL, NULL, NULL, "smap", NULL, NULL, NULL,
+    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+};
+
 /* collects per-function cpuid data
  */
 typedef struct model_features_t {
@@ -219,14 +226,17 @@ static void add_flagname_to_bitmaps(const char *flagname, uint32_t *features,
                                     uint32_t *ext2_features,
                                     uint32_t *ext3_features,
                                     uint32_t *kvm_features,
-                                    uint32_t *svm_features)
+                                    uint32_t *svm_features,
+                                    uint32_t *cpuid_7_0_ebx_features)
 {
     if (!lookup_feature(features, flagname, NULL, feature_name) &&
         !lookup_feature(ext_features, flagname, NULL, ext_feature_name) &&
         !lookup_feature(ext2_features, flagname, NULL, ext2_feature_name) &&
         !lookup_feature(ext3_features, flagname, NULL, ext3_feature_name) &&
         !lookup_feature(kvm_features, flagname, NULL, kvm_feature_name) &&
-        !lookup_feature(svm_features, flagname, NULL, svm_feature_name))
+        !lookup_feature(svm_features, flagname, NULL, svm_feature_name) &&
+        !lookup_feature(cpuid_7_0_ebx_features, flagname, NULL,
+                        cpuid_7_0_ebx_feature_name))
             fprintf(stderr, "CPU feature %s not found\n", flagname);
 }
 
@@ -287,6 +297,7 @@ typedef struct x86_def_t {
 #define TCG_EXT3_FEATURES (CPUID_EXT3_LAHF_LM | CPUID_EXT3_SVM | \
           CPUID_EXT3_CR8LEG | CPUID_EXT3_ABM | CPUID_EXT3_SSE4A)
 #define TCG_SVM_FEATURES 0
+#define TCG_7_0_EBX_FEATURES (CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_SMAP)
 
 /* maintains list of cpu model definitions
  */
@@ -1097,10 +1108,12 @@ static int cpu_x86_find_by_name(x86_def_t *x86_cpu_def, const char *cpu_model)
     uint32_t plus_features = 0, plus_ext_features = 0;
     uint32_t plus_ext2_features = 0, plus_ext3_features = 0;
     uint32_t plus_kvm_features = 0, plus_svm_features = 0;
+    uint32_t plus_7_0_ebx_features = 0;
     /* Features to be removed */
     uint32_t minus_features = 0, minus_ext_features = 0;
     uint32_t minus_ext2_features = 0, minus_ext3_features = 0;
     uint32_t minus_kvm_features = 0, minus_svm_features = 0;
+    uint32_t minus_7_0_ebx_features = 0;
     uint32_t numvalue;
 
     for (def = x86_defs; def; def = def->next)
@@ -1127,8 +1140,8 @@ static int cpu_x86_find_by_name(x86_def_t *x86_cpu_def, const char *cpu_model)
 #endif
 
     add_flagname_to_bitmaps("hypervisor", &plus_features,
-        &plus_ext_features, &plus_ext2_features, &plus_ext3_features,
-        &plus_kvm_features, &plus_svm_features);
+            &plus_ext_features, &plus_ext2_features, &plus_ext3_features,
+            &plus_kvm_features, &plus_svm_features,  &plus_7_0_ebx_features);
 
     featurestr = strtok(NULL, ",");
 
@@ -1138,12 +1151,12 @@ static int cpu_x86_find_by_name(x86_def_t *x86_cpu_def, const char *cpu_model)
             add_flagname_to_bitmaps(featurestr + 1, &plus_features,
                             &plus_ext_features, &plus_ext2_features,
                             &plus_ext3_features, &plus_kvm_features,
-                            &plus_svm_features);
+                            &plus_svm_features, &plus_7_0_ebx_features);
         } else if (featurestr[0] == '-') {
             add_flagname_to_bitmaps(featurestr + 1, &minus_features,
                             &minus_ext_features, &minus_ext2_features,
                             &minus_ext3_features, &minus_kvm_features,
-                            &minus_svm_features);
+                            &minus_svm_features, &minus_7_0_ebx_features);
         } else if ((val = strchr(featurestr, '='))) {
             *val = 0; val++;
             if (!strcmp(featurestr, "family")) {
@@ -1249,16 +1262,21 @@ static int cpu_x86_find_by_name(x86_def_t *x86_cpu_def, const char *cpu_model)
     x86_cpu_def->ext3_features |= plus_ext3_features;
     x86_cpu_def->kvm_features |= plus_kvm_features;
     x86_cpu_def->svm_features |= plus_svm_features;
+    x86_cpu_def->cpuid_7_0_ebx_features |= plus_7_0_ebx_features;
     x86_cpu_def->features &= ~minus_features;
     x86_cpu_def->ext_features &= ~minus_ext_features;
     x86_cpu_def->ext2_features &= ~minus_ext2_features;
     x86_cpu_def->ext3_features &= ~minus_ext3_features;
     x86_cpu_def->kvm_features &= ~minus_kvm_features;
     x86_cpu_def->svm_features &= ~minus_svm_features;
+    x86_cpu_def->cpuid_7_0_ebx_features &= ~minus_7_0_ebx_features;
     if (check_cpuid) {
         if (check_features_against_host(x86_cpu_def) && enforce_cpuid)
             goto error;
     }
+    if (x86_cpu_def->cpuid_7_0_ebx_features && x86_cpu_def->level < 7) {
+        x86_cpu_def->level = 7;
+    }
     g_free(s);
     return 0;
 
@@ -1374,7 +1392,7 @@ int cpu_x86_register(X86CPU *cpu, const char *cpu_model)
     env->cpuid_kvm_features = def->kvm_features;
     env->cpuid_svm_features = def->svm_features;
     env->cpuid_ext4_features = def->ext4_features;
-    env->cpuid_7_0_ebx = def->cpuid_7_0_ebx_features;
+    env->cpuid_7_0_ebx_features = def->cpuid_7_0_ebx_features;
     env->cpuid_xlevel2 = def->xlevel2;
     object_property_set_int(OBJECT(cpu), (int64_t)def->tsc_khz * 1000,
                             "tsc-frequency", &error);
@@ -1562,7 +1580,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
         /* Structured Extended Feature Flags Enumeration Leaf */
         if (count == 0) {
             *eax = 0; /* Maximum ECX value for sub-leaves */
-            *ebx = env->cpuid_7_0_ebx; /* Feature flags */
+            *ebx = env->cpuid_7_0_ebx_features; /* Feature flags */
             *ecx = 0; /* Reserved */
             *edx = 0; /* Reserved */
         } else {
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index 4995084..e4a7d5b 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -123,8 +123,8 @@
 
 /* hidden flags - used internally by qemu to represent additional cpu
    states. Only the CPL, INHIBIT_IRQ, SMM and SVMI are not
-   redundant. We avoid using the IOPL_MASK, TF_MASK and VM_MASK bit
-   position to ease oring with eflags. */
+   redundant. We avoid using the IOPL_MASK, TF_MASK, VM_MASK and AC_MASK
+   bit positions to ease oring with eflags. */
 /* current cpl */
 #define HF_CPL_SHIFT         0
 /* true if soft mmu is being used */
@@ -147,10 +147,12 @@
 #define HF_CS64_SHIFT       15 /* only used on x86_64: 64 bit code segment  */
 #define HF_RF_SHIFT         16 /* must be same as eflags */
 #define HF_VM_SHIFT         17 /* must be same as eflags */
+#define HF_AC_SHIFT         18 /* must be same as eflags */
 #define HF_SMM_SHIFT        19 /* CPU in SMM mode */
 #define HF_SVME_SHIFT       20 /* SVME enabled (copy of EFER.SVME) */
 #define HF_SVMI_SHIFT       21 /* SVM intercepts are active */
 #define HF_OSFXSR_SHIFT     22 /* CR4.OSFXSR */
+#define HF_SMAP_SHIFT       23 /* CR4.SMAP */
 
 #define HF_CPL_MASK          (3 << HF_CPL_SHIFT)
 #define HF_SOFTMMU_MASK      (1 << HF_SOFTMMU_SHIFT)
@@ -168,10 +170,12 @@
 #define HF_CS64_MASK         (1 << HF_CS64_SHIFT)
 #define HF_RF_MASK           (1 << HF_RF_SHIFT)
 #define HF_VM_MASK           (1 << HF_VM_SHIFT)
+#define HF_AC_MASK           (1 << HF_AC_SHIFT)
 #define HF_SMM_MASK          (1 << HF_SMM_SHIFT)
 #define HF_SVME_MASK         (1 << HF_SVME_SHIFT)
 #define HF_SVMI_MASK         (1 << HF_SVMI_SHIFT)
 #define HF_OSFXSR_MASK       (1 << HF_OSFXSR_SHIFT)
+#define HF_SMAP_MASK         (1 << HF_SMAP_SHIFT)
 
 /* hflags2 */
 
@@ -210,6 +214,13 @@
 #define CR4_OSFXSR_SHIFT 9
 #define CR4_OSFXSR_MASK (1 << CR4_OSFXSR_SHIFT)
 #define CR4_OSXMMEXCPT_MASK  (1 << 10)
+#define CR4_VMXE_MASK   (1 << 13)
+#define CR4_SMXE_MASK   (1 << 14)
+#define CR4_FSGSBASE_MASK (1 << 16)
+#define CR4_PCIDE_MASK  (1 << 17)
+#define CR4_OSXSAVE_MASK (1 << 18)
+#define CR4_SMEP_MASK   (1 << 20)
+#define CR4_SMAP_MASK   (1 << 21)
 
 #define DR6_BD          (1 << 13)
 #define DR6_BS          (1 << 14)
@@ -474,6 +485,9 @@
 #define CPUID_SVM_PAUSEFILTER  (1 << 10)
 #define CPUID_SVM_PFTHRESHOLD  (1 << 12)
 
+#define CPUID_7_0_EBX_SMEP     (1 << 7)
+#define CPUID_7_0_EBX_SMAP     (1 << 20)
+
 #define CPUID_VENDOR_INTEL_1 0x756e6547 /* "Genu" */
 #define CPUID_VENDOR_INTEL_2 0x49656e69 /* "ineI" */
 #define CPUID_VENDOR_INTEL_3 0x6c65746e /* "ntel" */
@@ -649,7 +663,7 @@ typedef struct {
 #define CPU_NB_REGS CPU_NB_REGS32
 #endif
 
-#define NB_MMU_MODES 2
+#define NB_MMU_MODES 3
 
 typedef enum TPRAccess {
     TPR_ACCESS_READ,
@@ -779,7 +793,7 @@ typedef struct CPUX86State {
     uint32_t cpuid_xlevel2;
     uint32_t cpuid_ext4_features;
     /* Flags from CPUID[EAX=7,ECX=0].EBX */
-    uint32_t cpuid_7_0_ebx;
+    uint32_t cpuid_7_0_ebx_features;
 
     /* MTRRs */
     uint64_t mtrr_fixed[11];
@@ -1018,10 +1032,15 @@ static inline CPUX86State *cpu_init(const char *cpu_model)
 /* MMU modes definitions */
 #define MMU_MODE0_SUFFIX _kernel
 #define MMU_MODE1_SUFFIX _user
-#define MMU_USER_IDX 1
+#define MMU_MODE2_SUFFIX _ksmap /* Kernel with SMAP override */
+#define MMU_KERNEL_IDX  0
+#define MMU_USER_IDX    1
+#define MMU_KSMAP_IDX   2
 static inline int cpu_mmu_index (CPUX86State *env)
 {
-    return (env->hflags & HF_CPL_MASK) == 3 ? 1 : 0;
+    return (env->hflags & HF_CPL_MASK) == 3 ? MMU_USER_IDX :
+        ((env->hflags & HF_SMAP_MASK) && (env->eflags & AC_MASK))
+        ? MMU_KSMAP_IDX : MMU_KERNEL_IDX;
 }
 
 #undef EAX
@@ -1107,7 +1126,7 @@ static inline void cpu_get_tb_cpu_state(CPUX86State *env, target_ulong *pc,
     *cs_base = env->segs[R_CS].base;
     *pc = *cs_base + env->eip;
     *flags = env->hflags |
-        (env->eflags & (IOPL_MASK | TF_MASK | RF_MASK | VM_MASK));
+        (env->eflags & (IOPL_MASK | TF_MASK | RF_MASK | VM_MASK | AC_MASK));
 }
 
 void do_cpu_init(X86CPU *cpu);
diff --git a/target-i386/helper.c b/target-i386/helper.c
index 8a5da3d..c635667 100644
--- a/target-i386/helper.c
+++ b/target-i386/helper.c
@@ -443,17 +443,27 @@ void cpu_x86_update_cr4(CPUX86State *env, uint32_t new_cr4)
 #if defined(DEBUG_MMU)
     printf("CR4 update: CR4=%08x\n", (uint32_t)env->cr[4]);
 #endif
-    if ((new_cr4 & (CR4_PGE_MASK | CR4_PAE_MASK | CR4_PSE_MASK)) !=
-        (env->cr[4] & (CR4_PGE_MASK | CR4_PAE_MASK | CR4_PSE_MASK))) {
+    if ((new_cr4 ^ env->cr[4]) &
+        (CR4_PGE_MASK | CR4_PAE_MASK | CR4_PSE_MASK |
+         CR4_SMEP_MASK | CR4_SMAP_MASK)) {
         tlb_flush(env, 1);
     }
     /* SSE handling */
-    if (!(env->cpuid_features & CPUID_SSE))
+    if (!(env->cpuid_features & CPUID_SSE)) {
         new_cr4 &= ~CR4_OSFXSR_MASK;
-    if (new_cr4 & CR4_OSFXSR_MASK)
+    }
+    env->hflags &= ~HF_OSFXSR_MASK;
+    if (new_cr4 & CR4_OSFXSR_MASK) {
         env->hflags |= HF_OSFXSR_MASK;
-    else
-        env->hflags &= ~HF_OSFXSR_MASK;
+    }
+
+    if (!(env->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SMAP)) {
+        new_cr4 &= ~CR4_SMAP_MASK;
+    }
+    env->hflags &= ~HF_SMAP_MASK;
+    if (new_cr4 & CR4_SMAP_MASK) {
+        env->hflags |= HF_SMAP_MASK;
+    }
 
     env->cr[4] = new_cr4;
 }
@@ -591,17 +601,38 @@ int cpu_x86_handle_mmu_fault(CPUX86State *env, target_ulong addr,
             /* 2 MB page */
             page_size = 2048 * 1024;
             ptep ^= PG_NX_MASK;
-            if ((ptep & PG_NX_MASK) && is_write1 == 2)
+            if ((ptep & PG_NX_MASK) && is_write1 == 2) {
                 goto do_fault_protect;
-            if (is_user) {
-                if (!(ptep & PG_USER_MASK))
+            }
+            switch (mmu_idx) {
+            case MMU_USER_IDX:
+                if (!(ptep & PG_USER_MASK)) {
                     goto do_fault_protect;
-                if (is_write && !(ptep & PG_RW_MASK))
+                }
+                if (is_write && !(ptep & PG_RW_MASK)) {
                     goto do_fault_protect;
-            } else {
+                }
+                break;
+
+            case MMU_KERNEL_IDX:
+                if (is_write1 != 2 && (env->cr[4] & CR4_SMAP_MASK) &&
+                    (ptep & PG_USER_MASK)) {
+                    goto do_fault_protect;
+                }
+                /* fall through */
+            case MMU_KSMAP_IDX:
+                if (is_write1 == 2 && (env->cr[4] & CR4_SMEP_MASK) &&
+                    (ptep & PG_USER_MASK)) {
+                    goto do_fault_protect;
+                }
                 if ((env->cr[0] & CR0_WP_MASK) &&
-                    is_write && !(ptep & PG_RW_MASK))
+                    is_write && !(ptep & PG_RW_MASK)) {
                     goto do_fault_protect;
+                }
+                break;
+
+            default: /* cannot happen */
+                break;
             }
             is_dirty = is_write && !(pde & PG_DIRTY_MASK);
             if (!(pde & PG_ACCESSED_MASK) || is_dirty) {
@@ -635,15 +666,35 @@ int cpu_x86_handle_mmu_fault(CPUX86State *env, target_ulong addr,
             ptep ^= PG_NX_MASK;
             if ((ptep & PG_NX_MASK) && is_write1 == 2)
                 goto do_fault_protect;
-            if (is_user) {
-                if (!(ptep & PG_USER_MASK))
+            switch (mmu_idx) {
+            case MMU_USER_IDX:
+                if (!(ptep & PG_USER_MASK)) {
                     goto do_fault_protect;
-                if (is_write && !(ptep & PG_RW_MASK))
+                }
+                if (is_write && !(ptep & PG_RW_MASK)) {
                     goto do_fault_protect;
-            } else {
+                }
+                break;
+
+            case MMU_KERNEL_IDX:
+                if (is_write1 != 2 && (env->cr[4] & CR4_SMAP_MASK) &&
+                    (ptep & PG_USER_MASK)) {
+                    goto do_fault_protect;
+                }
+                /* fall through */
+            case MMU_KSMAP_IDX:
+                if (is_write1 == 2 && (env->cr[4] & CR4_SMEP_MASK) &&
+                    (ptep & PG_USER_MASK)) {
+                    goto do_fault_protect;
+                }
                 if ((env->cr[0] & CR0_WP_MASK) &&
-                    is_write && !(ptep & PG_RW_MASK))
+                    is_write && !(ptep & PG_RW_MASK)) {
                     goto do_fault_protect;
+                }
+                break;
+
+            default: /* cannot happen */
+                break;
             }
             is_dirty = is_write && !(pte & PG_DIRTY_MASK);
             if (!(pte & PG_ACCESSED_MASK) || is_dirty) {
@@ -670,15 +721,35 @@ int cpu_x86_handle_mmu_fault(CPUX86State *env, target_ulong addr,
         /* if PSE bit is set, then we use a 4MB page */
         if ((pde & PG_PSE_MASK) && (env->cr[4] & CR4_PSE_MASK)) {
             page_size = 4096 * 1024;
-            if (is_user) {
-                if (!(pde & PG_USER_MASK))
+            switch (mmu_idx) {
+            case MMU_USER_IDX:
+                if (!(pde & PG_USER_MASK)) {
                     goto do_fault_protect;
-                if (is_write && !(pde & PG_RW_MASK))
+                }
+                if (is_write && !(pde & PG_RW_MASK)) {
                     goto do_fault_protect;
-            } else {
+                }
+                break;
+
+            case MMU_KERNEL_IDX:
+                if (is_write1 != 2 && (env->cr[4] & CR4_SMAP_MASK) &&
+                    (pde & PG_USER_MASK)) {
+                    goto do_fault_protect;
+                }
+                /* fall through */
+            case MMU_KSMAP_IDX:
+                if (is_write1 == 2 && (env->cr[4] & CR4_SMEP_MASK) &&
+                    (pde & PG_USER_MASK)) {
+                    goto do_fault_protect;
+                }
                 if ((env->cr[0] & CR0_WP_MASK) &&
-                    is_write && !(pde & PG_RW_MASK))
+                    is_write && !(pde & PG_RW_MASK)) {
                     goto do_fault_protect;
+                }
+                break;
+
+            default: /* cannot happen */
+                break;
             }
             is_dirty = is_write && !(pde & PG_DIRTY_MASK);
             if (!(pde & PG_ACCESSED_MASK) || is_dirty) {
@@ -707,15 +778,35 @@ int cpu_x86_handle_mmu_fault(CPUX86State *env, target_ulong addr,
             }
             /* combine pde and pte user and rw protections */
             ptep = pte & pde;
-            if (is_user) {
-                if (!(ptep & PG_USER_MASK))
+            switch (mmu_idx) {
+            case MMU_USER_IDX:
+                if (!(ptep & PG_USER_MASK)) {
                     goto do_fault_protect;
-                if (is_write && !(ptep & PG_RW_MASK))
+                }
+                if (is_write && !(ptep & PG_RW_MASK)) {
                     goto do_fault_protect;
-            } else {
+                }
+                break;
+
+            case MMU_KERNEL_IDX:
+                if (is_write1 != 2 && (env->cr[4] & CR4_SMAP_MASK) &&
+                    (ptep & PG_USER_MASK)) {
+                    goto do_fault_protect;
+                }
+                /* fall through */
+            case MMU_KSMAP_IDX:
+                if (is_write1 == 2 && (env->cr[4] & CR4_SMEP_MASK) &&
+                    (ptep & PG_USER_MASK)) {
+                    goto do_fault_protect;
+                }
                 if ((env->cr[0] & CR0_WP_MASK) &&
-                    is_write && !(ptep & PG_RW_MASK))
+                    is_write && !(ptep & PG_RW_MASK)) {
                     goto do_fault_protect;
+                }
+                break;
+
+            default: /* cannot happen */
+                break;
             }
             is_dirty = is_write && !(pte & PG_DIRTY_MASK);
             if (!(pte & PG_ACCESSED_MASK) || is_dirty) {
@@ -762,8 +853,9 @@ int cpu_x86_handle_mmu_fault(CPUX86State *env, target_ulong addr,
     if (is_user)
         error_code |= PG_ERROR_U_MASK;
     if (is_write1 == 2 &&
-        (env->efer & MSR_EFER_NXE) &&
-        (env->cr[4] & CR4_PAE_MASK))
+        (((env->efer & MSR_EFER_NXE) &&
+          (env->cr[4] & CR4_PAE_MASK)) ||
+         (env->cr[4] & CR4_SMEP_MASK)))
         error_code |= PG_ERROR_I_D_MASK;
     if (env->intercept_exceptions & (1 << EXCP0E_PAGE)) {
         /* cr2 is not modified in case of exceptions */
diff --git a/target-i386/helper.h b/target-i386/helper.h
index ab6af63..93850ce 100644
--- a/target-i386/helper.h
+++ b/target-i386/helper.h
@@ -67,6 +67,8 @@ DEF_HELPER_3(raise_interrupt, void, env, int, int)
 DEF_HELPER_2(raise_exception, void, env, int)
 DEF_HELPER_1(cli, void, env)
 DEF_HELPER_1(sti, void, env)
+DEF_HELPER_1(clac, void, env)
+DEF_HELPER_1(stac, void, env)
 DEF_HELPER_1(set_inhibit_irq, void, env)
 DEF_HELPER_1(reset_inhibit_irq, void, env)
 DEF_HELPER_3(boundw, void, env, tl, int)
diff --git a/target-i386/translate.c b/target-i386/translate.c
index 323869d..0a7e4e3 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -107,6 +107,7 @@ typedef struct DisasContext {
     int cpuid_ext_features;
     int cpuid_ext2_features;
     int cpuid_ext3_features;
+    int cpuid_7_0_ebx_features;
 } DisasContext;
 
 static void gen_eob(DisasContext *s);
@@ -6556,7 +6557,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             }
             gen_pop_update(s);
             s->cc_op = CC_OP_EFLAGS;
-            /* abort translation because TF flag may change */
+            /* abort translation because TF/AC flag may change */
             gen_jmp_im(s->pc - s->cs_base);
             gen_eob(s);
         }
@@ -7206,6 +7207,24 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
                     gen_helper_mwait(cpu_env, tcg_const_i32(s->pc - pc_start));
                     gen_eob(s);
                     break;
+                case 2: /* clac */
+                    if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SMAP) ||
+                        s->cpl != 0) {
+                        goto illegal_op;
+                    }
+                    gen_helper_clac(cpu_env);
+                    gen_jmp_im(s->pc - s->cs_base);
+                    gen_eob(s);
+                    break;
+                case 3: /* stac */
+                    if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SMAP) ||
+                        s->cpl != 0) {
+                        goto illegal_op;
+                    }
+                    gen_helper_stac(cpu_env);
+                    gen_jmp_im(s->pc - s->cs_base);
+                    gen_eob(s);
+                    break;
                 default:
                     goto illegal_op;
                 }
@@ -7901,15 +7920,13 @@ static inline void gen_intermediate_code_internal(CPUX86State *env,
     /* select memory access functions */
     dc->mem_index = 0;
     if (flags & HF_SOFTMMU_MASK) {
-        if (dc->cpl == 3)
-            dc->mem_index = 2 * 4;
-        else
-            dc->mem_index = 1 * 4;
+        dc->mem_index = (cpu_mmu_index(env) + 1) << 2;
     }
     dc->cpuid_features = env->cpuid_features;
     dc->cpuid_ext_features = env->cpuid_ext_features;
     dc->cpuid_ext2_features = env->cpuid_ext2_features;
     dc->cpuid_ext3_features = env->cpuid_ext3_features;
+    dc->cpuid_7_0_ebx_features = env->cpuid_7_0_ebx_features;
 #ifdef TARGET_X86_64
     dc->lma = (flags >> HF_LMA_SHIFT) & 1;
     dc->code64 = (flags >> HF_CS64_SHIFT) & 1;
commit 4a19e505df659dd25a77fb790399744f3e1f971c
Author: Eduardo Habkost <ehabkost at redhat.com>
Date:   Thu Sep 6 10:05:39 2012 +0000

    i386: -cpu help: remove reference to specific CPUID leaves/registers
    
    The -cpu configuration interface is based on a list of feature names or
    properties, on a single namespace, so there's no need to mention on
    which CPUID leaf/register each flag is located.
    
    Signed-off-by: Eduardo Habkost <ehabkost at redhat.com>
    Reviewed-by: Don Slutz <Don at CloudSwitch.com>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index 7577381..c4e6163 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -1312,13 +1312,13 @@ void x86_cpu_list(FILE *f, fprintf_function cpu_fprintf)
     }
     (*cpu_fprintf)(f, "\nRecognized CPUID flags:\n");
     listflags(buf, sizeof(buf), (uint32_t)~0, feature_name, 1);
-    (*cpu_fprintf)(f, "  f_edx: %s\n", buf);
+    (*cpu_fprintf)(f, "  %s\n", buf);
     listflags(buf, sizeof(buf), (uint32_t)~0, ext_feature_name, 1);
-    (*cpu_fprintf)(f, "  f_ecx: %s\n", buf);
+    (*cpu_fprintf)(f, "  %s\n", buf);
     listflags(buf, sizeof(buf), (uint32_t)~0, ext2_feature_name, 1);
-    (*cpu_fprintf)(f, "  extf_edx: %s\n", buf);
+    (*cpu_fprintf)(f, "  %s\n", buf);
     listflags(buf, sizeof(buf), (uint32_t)~0, ext3_feature_name, 1);
-    (*cpu_fprintf)(f, "  extf_ecx: %s\n", buf);
+    (*cpu_fprintf)(f, "  %s\n", buf);
 }
 
 CpuDefinitionInfoList *arch_query_cpu_definitions(Error **errp)
commit 3b671a40cab2404bc63e57db8cd3afa4ec70bfab
Author: Eduardo Habkost <ehabkost at redhat.com>
Date:   Thu Sep 6 10:05:38 2012 +0000

    i386: cpu: eliminate duplicate feature names
    
    Instead of having duplicate feature names on the ext2_feature array for
    the AMD feature bit aliases, we keep the feature names only on the
    feature_name[] array, and copy the corresponding bits to
    cpuid_ext2_features in case the CPU vendor is AMD.
    
    This will:
    
    - Make sure we don't set the feature bit aliases on Intel CPUs;
    - Make it easier to convert feature bits to CPU properties, as now we
      have a single bit on the x86_def_t struct for each CPU feature.
    
    Signed-off-by: Eduardo Habkost <ehabkost at redhat.com>
    Reviewed-by: Don Slutz <Don at CloudSwitch.com>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index 7cad3b5..7577381 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -61,15 +61,19 @@ static const char *ext_feature_name[] = {
     "tsc-deadline", "aes", "xsave", "osxsave",
     "avx", NULL, NULL, "hypervisor",
 };
+/* Feature names that are already defined on feature_name[] but are set on
+ * CPUID[8000_0001].EDX on AMD CPUs don't have their names on
+ * ext2_feature_name[]. They are copied automatically to cpuid_ext2_features
+ * if and only if CPU vendor is AMD.
+ */
 static const char *ext2_feature_name[] = {
-    "fpu", "vme", "de", "pse",
-    "tsc", "msr", "pae", "mce",
-    "cx8" /* AMD CMPXCHG8B */, "apic", NULL, "syscall",
-    "mtrr", "pge", "mca", "cmov",
-    "pat", "pse36", NULL, NULL /* Linux mp */,
-    "nx|xd", NULL, "mmxext", "mmx",
-    "fxsr", "fxsr_opt|ffxsr", "pdpe1gb" /* AMD Page1GB */, "rdtscp",
-    NULL, "lm|i64", "3dnowext", "3dnow",
+    NULL /* fpu */, NULL /* vme */, NULL /* de */, NULL /* pse */,
+    NULL /* tsc */, NULL /* msr */, NULL /* pae */, NULL /* mce */,
+    NULL /* cx8 */ /* AMD CMPXCHG8B */, NULL /* apic */, NULL, "syscall",
+    NULL /* mtrr */, NULL /* pge */, NULL /* mca */, NULL /* cmov */,
+    NULL /* pat */, NULL /* pse36 */, NULL, NULL /* Linux mp */,
+    "nx|xd", NULL, "mmxext", NULL /* mmx */,
+    NULL /* fxsr */, "fxsr_opt|ffxsr", "pdpe1gb" /* AMD Page1GB */, "rdtscp",
 };
 static const char *ext3_feature_name[] = {
     "lahf_lm" /* AMD LahfSahf */, "cmp_legacy", "svm", "extapic" /* AMD ExtApicSpace */,
@@ -1374,6 +1378,17 @@ int cpu_x86_register(X86CPU *cpu, const char *cpu_model)
     env->cpuid_xlevel2 = def->xlevel2;
     object_property_set_int(OBJECT(cpu), (int64_t)def->tsc_khz * 1000,
                             "tsc-frequency", &error);
+
+    /* On AMD CPUs, some CPUID[8000_0001].EDX bits must match the bits on
+     * CPUID[1].EDX.
+     */
+    if (env->cpuid_vendor1 == CPUID_VENDOR_AMD_1 &&
+            env->cpuid_vendor2 == CPUID_VENDOR_AMD_2 &&
+            env->cpuid_vendor3 == CPUID_VENDOR_AMD_3) {
+        env->cpuid_ext2_features &= ~CPUID_EXT2_AMD_ALIASES;
+        env->cpuid_ext2_features |= (def->features & CPUID_EXT2_AMD_ALIASES);
+    }
+
     if (!kvm_enabled()) {
         env->cpuid_features &= TCG_FEATURES;
         env->cpuid_ext_features &= TCG_EXT_FEATURES;
commit 60032ac04c675cf8950497f9d06e681b2dc7085c
Author: Eduardo Habkost <ehabkost at redhat.com>
Date:   Thu Sep 6 10:05:37 2012 +0000

    i386: cpu: replace EXT2_FEATURE_MASK with CPUID_EXT2_AMD_ALIASES
    
    Both constants have the same value, but CPUID_EXT2_AMD_ALIASES is
    defined without using magic numbers.
    
    Signed-off-by: Eduardo Habkost <ehabkost at redhat.com>
    Reviewed-by: Don Slutz <Don at CloudSwitch.com>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index fd4fe28..7cad3b5 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -258,7 +258,6 @@ typedef struct x86_def_t {
           CPUID_MSR | CPUID_MCE | CPUID_CX8 | CPUID_PGE | CPUID_CMOV | \
           CPUID_PAT | CPUID_FXSR | CPUID_MMX | CPUID_SSE | CPUID_SSE2 | \
           CPUID_PAE | CPUID_SEP | CPUID_APIC)
-#define EXT2_FEATURE_MASK 0x0183F3FF
 
 #define TCG_FEATURES (CPUID_FP87 | CPUID_PSE | CPUID_TSC | CPUID_MSR | \
           CPUID_PAE | CPUID_MCE | CPUID_CX8 | CPUID_APIC | CPUID_SEP | \
@@ -276,7 +275,7 @@ typedef struct x86_def_t {
           /* missing:
           CPUID_EXT_DTES64, CPUID_EXT_DSCPL, CPUID_EXT_VMX, CPUID_EXT_EST,
           CPUID_EXT_TM2, CPUID_EXT_XTPR, CPUID_EXT_PDCM, CPUID_EXT_XSAVE */
-#define TCG_EXT2_FEATURES ((TCG_FEATURES & EXT2_FEATURE_MASK) | \
+#define TCG_EXT2_FEATURES ((TCG_FEATURES & CPUID_EXT2_AMD_ALIASES) | \
           CPUID_EXT2_NX | CPUID_EXT2_MMXEXT | CPUID_EXT2_RDTSCP | \
           CPUID_EXT2_3DNOW | CPUID_EXT2_3DNOWEXT)
           /* missing:
@@ -305,7 +304,7 @@ static x86_def_t builtin_x86_defs[] = {
             CPUID_MTRR | CPUID_CLFLUSH | CPUID_MCA |
             CPUID_PSE36,
         .ext_features = CPUID_EXT_SSE3 | CPUID_EXT_CX16 | CPUID_EXT_POPCNT,
-        .ext2_features = (PPRO_FEATURES & EXT2_FEATURE_MASK) |
+        .ext2_features = (PPRO_FEATURES & CPUID_EXT2_AMD_ALIASES) |
             CPUID_EXT2_LM | CPUID_EXT2_SYSCALL | CPUID_EXT2_NX,
         .ext3_features = CPUID_EXT3_LAHF_LM | CPUID_EXT3_SVM |
             CPUID_EXT3_ABM | CPUID_EXT3_SSE4A,
@@ -325,7 +324,7 @@ static x86_def_t builtin_x86_defs[] = {
             CPUID_PSE36 | CPUID_VME | CPUID_HT,
         .ext_features = CPUID_EXT_SSE3 | CPUID_EXT_MONITOR | CPUID_EXT_CX16 |
             CPUID_EXT_POPCNT,
-        .ext2_features = (PPRO_FEATURES & EXT2_FEATURE_MASK) |
+        .ext2_features = (PPRO_FEATURES & CPUID_EXT2_AMD_ALIASES) |
             CPUID_EXT2_LM | CPUID_EXT2_SYSCALL | CPUID_EXT2_NX |
             CPUID_EXT2_3DNOW | CPUID_EXT2_3DNOWEXT | CPUID_EXT2_MMXEXT |
             CPUID_EXT2_FFXSR | CPUID_EXT2_PDPE1GB | CPUID_EXT2_RDTSCP,
@@ -373,7 +372,7 @@ static x86_def_t builtin_x86_defs[] = {
         /* Missing: CPUID_EXT_POPCNT, CPUID_EXT_MONITOR */
         .ext_features = CPUID_EXT_SSE3 | CPUID_EXT_CX16,
         /* Missing: CPUID_EXT2_PDPE1GB, CPUID_EXT2_RDTSCP */
-        .ext2_features = (PPRO_FEATURES & EXT2_FEATURE_MASK) |
+        .ext2_features = (PPRO_FEATURES & CPUID_EXT2_AMD_ALIASES) |
             CPUID_EXT2_LM | CPUID_EXT2_SYSCALL | CPUID_EXT2_NX,
         /* Missing: CPUID_EXT3_LAHF_LM, CPUID_EXT3_CMP_LEG, CPUID_EXT3_EXTAPIC,
                     CPUID_EXT3_CR8LEG, CPUID_EXT3_ABM, CPUID_EXT3_SSE4A,
@@ -402,7 +401,7 @@ static x86_def_t builtin_x86_defs[] = {
         .features = PPRO_FEATURES |
             CPUID_MTRR | CPUID_CLFLUSH | CPUID_MCA | CPUID_PSE36,
         .ext_features = CPUID_EXT_SSE3,
-        .ext2_features = PPRO_FEATURES & EXT2_FEATURE_MASK,
+        .ext2_features = PPRO_FEATURES & CPUID_EXT2_AMD_ALIASES,
         .ext3_features = 0,
         .xlevel = 0x80000008,
         .model_id = "Common 32-bit KVM processor"
@@ -467,8 +466,10 @@ static x86_def_t builtin_x86_defs[] = {
         .family = 6,
         .model = 2,
         .stepping = 3,
-        .features = PPRO_FEATURES | CPUID_PSE36 | CPUID_VME | CPUID_MTRR | CPUID_MCA,
-        .ext2_features = (PPRO_FEATURES & EXT2_FEATURE_MASK) | CPUID_EXT2_MMXEXT | CPUID_EXT2_3DNOW | CPUID_EXT2_3DNOWEXT,
+        .features = PPRO_FEATURES | CPUID_PSE36 | CPUID_VME | CPUID_MTRR |
+            CPUID_MCA,
+        .ext2_features = (PPRO_FEATURES & CPUID_EXT2_AMD_ALIASES) |
+            CPUID_EXT2_MMXEXT | CPUID_EXT2_3DNOW | CPUID_EXT2_3DNOWEXT,
         .xlevel = 0x80000008,
     },
     {
@@ -484,7 +485,8 @@ static x86_def_t builtin_x86_defs[] = {
             /* Some CPUs got no CPUID_SEP */
         .ext_features = CPUID_EXT_SSE3 | CPUID_EXT_MONITOR | CPUID_EXT_SSSE3 |
             CPUID_EXT_DSCPL | CPUID_EXT_EST | CPUID_EXT_TM2 | CPUID_EXT_XTPR,
-        .ext2_features = (PPRO_FEATURES & EXT2_FEATURE_MASK) | CPUID_EXT2_NX,
+        .ext2_features = (PPRO_FEATURES & CPUID_EXT2_AMD_ALIASES) |
+            CPUID_EXT2_NX,
         .ext3_features = CPUID_EXT3_LAHF_LM,
         .xlevel = 0x8000000A,
         .model_id = "Intel(R) Atom(TM) CPU N270   @ 1.60GHz",
commit 8fad4b44a0f71cd404f95f109657c0ccbf11f8f9
Author: Eduardo Habkost <ehabkost at redhat.com>
Date:   Thu Sep 6 10:05:36 2012 +0000

    i386: kvm: use a #define for the set of alias feature bits
    
    Instea of using a hardcoded hex constant, define CPUID_EXT2_AMD_ALIASES
    as the set of CPUID[8000_0001].EDX bits that on AMD are the same as the
    bits of CPUID[1].EDX.
    
    Signed-off-by: Eduardo Habkost <ehabkost at redhat.com>
    Reviewed-By: Igor Mammedov <imammedo at redhat.com>
    Reviewed-by: Don Slutz <Don at CloudSwitch.com>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index d7ea2f9..4995084 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -409,6 +409,7 @@
 #define CPUID_EXT_HYPERVISOR  (1 << 31)
 
 #define CPUID_EXT2_FPU     (1 << 0)
+#define CPUID_EXT2_VME     (1 << 1)
 #define CPUID_EXT2_DE      (1 << 2)
 #define CPUID_EXT2_PSE     (1 << 3)
 #define CPUID_EXT2_TSC     (1 << 4)
@@ -436,6 +437,17 @@
 #define CPUID_EXT2_3DNOWEXT (1 << 30)
 #define CPUID_EXT2_3DNOW   (1 << 31)
 
+/* CPUID[8000_0001].EDX bits that are aliase of CPUID[1].EDX bits on AMD CPUs */
+#define CPUID_EXT2_AMD_ALIASES (CPUID_EXT2_FPU | CPUID_EXT2_VME | \
+                                CPUID_EXT2_DE | CPUID_EXT2_PSE | \
+                                CPUID_EXT2_TSC | CPUID_EXT2_MSR | \
+                                CPUID_EXT2_PAE | CPUID_EXT2_MCE | \
+                                CPUID_EXT2_CX8 | CPUID_EXT2_APIC | \
+                                CPUID_EXT2_MTRR | CPUID_EXT2_PGE | \
+                                CPUID_EXT2_MCA | CPUID_EXT2_CMOV | \
+                                CPUID_EXT2_PAT | CPUID_EXT2_PSE36 | \
+                                CPUID_EXT2_MMX | CPUID_EXT2_FXSR)
+
 #define CPUID_EXT3_LAHF_LM (1 << 0)
 #define CPUID_EXT3_CMP_LEG (1 << 1)
 #define CPUID_EXT3_SVM     (1 << 2)
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index acb9369..5b18383 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -165,7 +165,7 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function,
                      * so add missing bits according to the AMD spec:
                      */
                     cpuid_1_edx = kvm_arch_get_supported_cpuid(s, 1, 0, R_EDX);
-                    ret |= cpuid_1_edx & 0x183f3ff;
+                    ret |= cpuid_1_edx & CPUID_EXT2_AMD_ALIASES;
                     break;
                 }
                 break;
commit b1f4679392a03f2b26a37bfa52e95d6cc4f73d82
Author: Eduardo Habkost <ehabkost at redhat.com>
Date:   Thu Sep 6 10:05:35 2012 +0000

    i386: kvm: bit 10 of CPUID[8000_0001].EDX is reserved
    
    Bit 10 of CPUID[8000_0001].EDX is not defined as an alias of
    CPUID[1].EDX[10], so do not duplicate it on
    kvm_arch_get_supported_cpuid().
    
    Signed-off-by: Eduardo Habkost <ehabkost at redhat.com>
    Reviewed-By: Igor Mammedov <imammedo at redhat.com>
    Reviewed-by: Don Slutz <Don at CloudSwitch.com>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index 6790180..acb9369 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -165,7 +165,7 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function,
                      * so add missing bits according to the AMD spec:
                      */
                     cpuid_1_edx = kvm_arch_get_supported_cpuid(s, 1, 0, R_EDX);
-                    ret |= cpuid_1_edx & 0x183f7ff;
+                    ret |= cpuid_1_edx & 0x183f3ff;
                     break;
                 }
                 break;
commit 90f0b71153c6a85d03967244b9889f892841d835
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Fri Sep 28 17:23:02 2012 +0200

    qemu-iotests: add tests for streaming error handling
    
    Add a test for each of report/ignore/stop.  The tests use blkdebug
    to generate an error in the middle of a script.  The error is
    recoverable (once = "on") so that we can test resuming a job after
    stopping for an error.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/tests/qemu-iotests/030 b/tests/qemu-iotests/030
index dfacdf1..dd4ef11 100755
--- a/tests/qemu-iotests/030
+++ b/tests/qemu-iotests/030
@@ -195,6 +195,226 @@ class TestSmallerBackingFile(ImageStreamingTestCase):
         self.assert_no_active_streams()
         self.vm.shutdown()
 
+class TestErrors(ImageStreamingTestCase):
+    image_len = 2 * 1024 * 1024 # MB
+
+    # this should match STREAM_BUFFER_SIZE/512 in block/stream.c
+    STREAM_BUFFER_SIZE = 512 * 1024
+
+    def create_blkdebug_file(self, name, event, errno):
+        file = open(name, 'w')
+        file.write('''
+[inject-error]
+state = "1"
+event = "%s"
+errno = "%d"
+immediately = "off"
+once = "on"
+sector = "%d"
+
+[set-state]
+state = "1"
+event = "%s"
+new_state = "2"
+
+[set-state]
+state = "2"
+event = "%s"
+new_state = "1"
+''' % (event, errno, self.STREAM_BUFFER_SIZE / 512, event, event))
+        file.close()
+
+class TestEIO(TestErrors):
+    def setUp(self):
+        self.blkdebug_file = backing_img + ".blkdebug"
+        self.create_image(backing_img, TestErrors.image_len)
+        self.create_blkdebug_file(self.blkdebug_file, "read_aio", 5)
+        qemu_img('create', '-f', iotests.imgfmt,
+                 '-o', 'backing_file=blkdebug:%s:%s,backing_fmt=raw'
+                       % (self.blkdebug_file, backing_img),
+                 test_img)
+        self.vm = iotests.VM().add_drive(test_img)
+        self.vm.launch()
+
+    def tearDown(self):
+        self.vm.shutdown()
+        os.remove(test_img)
+        os.remove(backing_img)
+        os.remove(self.blkdebug_file)
+
+    def test_report(self):
+        self.assert_no_active_streams()
+
+        result = self.vm.qmp('block-stream', device='drive0')
+        self.assert_qmp(result, 'return', {})
+
+        completed = False
+        error = False
+        while not completed:
+            for event in self.vm.get_qmp_events(wait=True):
+                if event['event'] == 'BLOCK_JOB_ERROR':
+                    self.assert_qmp(event, 'data/device', 'drive0')
+                    self.assert_qmp(event, 'data/operation', 'read')
+                    error = True
+                elif event['event'] == 'BLOCK_JOB_COMPLETED':
+                    self.assertTrue(error, 'job completed unexpectedly')
+                    self.assert_qmp(event, 'data/type', 'stream')
+                    self.assert_qmp(event, 'data/device', 'drive0')
+                    self.assert_qmp(event, 'data/error', 'Input/output error')
+                    self.assert_qmp(event, 'data/offset', self.STREAM_BUFFER_SIZE)
+                    self.assert_qmp(event, 'data/len', self.image_len)
+                    completed = True
+
+        self.assert_no_active_streams()
+        self.vm.shutdown()
+
+    def test_ignore(self):
+        self.assert_no_active_streams()
+
+        result = self.vm.qmp('block-stream', device='drive0', on_error='ignore')
+        self.assert_qmp(result, 'return', {})
+
+        error = False
+        completed = False
+        while not completed:
+            for event in self.vm.get_qmp_events(wait=True):
+                if event['event'] == 'BLOCK_JOB_ERROR':
+                    self.assert_qmp(event, 'data/device', 'drive0')
+                    self.assert_qmp(event, 'data/operation', 'read')
+                    result = self.vm.qmp('query-block-jobs')
+                    self.assert_qmp(result, 'return[0]/paused', False)
+                    error = True
+                elif event['event'] == 'BLOCK_JOB_COMPLETED':
+                    self.assertTrue(error, 'job completed unexpectedly')
+                    self.assert_qmp(event, 'data/type', 'stream')
+                    self.assert_qmp(event, 'data/device', 'drive0')
+                    self.assert_qmp(event, 'data/error', 'Input/output error')
+                    self.assert_qmp(event, 'data/offset', self.image_len)
+                    self.assert_qmp(event, 'data/len', self.image_len)
+                    completed = True
+
+        self.assert_no_active_streams()
+        self.vm.shutdown()
+
+    def test_stop(self):
+        self.assert_no_active_streams()
+
+        result = self.vm.qmp('block-stream', device='drive0', on_error='stop')
+        self.assert_qmp(result, 'return', {})
+
+        error = False
+        completed = False
+        while not completed:
+            for event in self.vm.get_qmp_events(wait=True):
+                if event['event'] == 'BLOCK_JOB_ERROR':
+                    self.assert_qmp(event, 'data/device', 'drive0')
+                    self.assert_qmp(event, 'data/operation', 'read')
+
+                    result = self.vm.qmp('query-block-jobs')
+                    self.assert_qmp(result, 'return[0]/paused', True)
+                    self.assert_qmp(result, 'return[0]/offset', self.STREAM_BUFFER_SIZE)
+                    self.assert_qmp(result, 'return[0]/io-status', 'failed')
+
+                    result = self.vm.qmp('block-job-resume', device='drive0')
+                    self.assert_qmp(result, 'return', {})
+
+                    result = self.vm.qmp('query-block-jobs')
+                    self.assert_qmp(result, 'return[0]/paused', False)
+                    self.assert_qmp(result, 'return[0]/io-status', 'ok')
+                    error = True
+                elif event['event'] == 'BLOCK_JOB_COMPLETED':
+                    self.assertTrue(error, 'job completed unexpectedly')
+                    self.assert_qmp(event, 'data/type', 'stream')
+                    self.assert_qmp(event, 'data/device', 'drive0')
+                    self.assert_qmp_absent(event, 'data/error')
+                    self.assert_qmp(event, 'data/offset', self.image_len)
+                    self.assert_qmp(event, 'data/len', self.image_len)
+                    completed = True
+
+        self.assert_no_active_streams()
+        self.vm.shutdown()
+
+    def test_enospc(self):
+        self.assert_no_active_streams()
+
+        result = self.vm.qmp('block-stream', device='drive0', on_error='enospc')
+        self.assert_qmp(result, 'return', {})
+
+        completed = False
+        error = False
+        while not completed:
+            for event in self.vm.get_qmp_events(wait=True):
+                if event['event'] == 'BLOCK_JOB_ERROR':
+                    self.assert_qmp(event, 'data/device', 'drive0')
+                    self.assert_qmp(event, 'data/operation', 'read')
+                    error = True
+                elif event['event'] == 'BLOCK_JOB_COMPLETED':
+                    self.assertTrue(error, 'job completed unexpectedly')
+                    self.assert_qmp(event, 'data/type', 'stream')
+                    self.assert_qmp(event, 'data/device', 'drive0')
+                    self.assert_qmp(event, 'data/error', 'Input/output error')
+                    self.assert_qmp(event, 'data/offset', self.STREAM_BUFFER_SIZE)
+                    self.assert_qmp(event, 'data/len', self.image_len)
+                    completed = True
+
+        self.assert_no_active_streams()
+        self.vm.shutdown()
+
+class TestENOSPC(TestErrors):
+    def setUp(self):
+        self.blkdebug_file = backing_img + ".blkdebug"
+        self.create_image(backing_img, TestErrors.image_len)
+        self.create_blkdebug_file(self.blkdebug_file, "read_aio", 28)
+        qemu_img('create', '-f', iotests.imgfmt,
+                 '-o', 'backing_file=blkdebug:%s:%s,backing_fmt=raw'
+                       % (self.blkdebug_file, backing_img),
+                 test_img)
+        self.vm = iotests.VM().add_drive(test_img)
+        self.vm.launch()
+
+    def tearDown(self):
+        self.vm.shutdown()
+        os.remove(test_img)
+        os.remove(backing_img)
+        os.remove(self.blkdebug_file)
+
+    def test_enospc(self):
+        self.assert_no_active_streams()
+
+        result = self.vm.qmp('block-stream', device='drive0', on_error='enospc')
+        self.assert_qmp(result, 'return', {})
+
+        error = False
+        completed = False
+        while not completed:
+            for event in self.vm.get_qmp_events(wait=True):
+                if event['event'] == 'BLOCK_JOB_ERROR':
+                    self.assert_qmp(event, 'data/device', 'drive0')
+                    self.assert_qmp(event, 'data/operation', 'read')
+
+                    result = self.vm.qmp('query-block-jobs')
+                    self.assert_qmp(result, 'return[0]/paused', True)
+                    self.assert_qmp(result, 'return[0]/offset', self.STREAM_BUFFER_SIZE)
+                    self.assert_qmp(result, 'return[0]/io-status', 'nospace')
+
+                    result = self.vm.qmp('block-job-resume', device='drive0')
+                    self.assert_qmp(result, 'return', {})
+
+                    result = self.vm.qmp('query-block-jobs')
+                    self.assert_qmp(result, 'return[0]/paused', False)
+                    self.assert_qmp(result, 'return[0]/io-status', 'ok')
+                    error = True
+                elif event['event'] == 'BLOCK_JOB_COMPLETED':
+                    self.assertTrue(error, 'job completed unexpectedly')
+                    self.assert_qmp(event, 'data/type', 'stream')
+                    self.assert_qmp(event, 'data/device', 'drive0')
+                    self.assert_qmp_absent(event, 'data/error')
+                    self.assert_qmp(event, 'data/offset', self.image_len)
+                    self.assert_qmp(event, 'data/len', self.image_len)
+                    completed = True
+
+        self.assert_no_active_streams()
+        self.vm.shutdown()
 
 class TestStreamStop(ImageStreamingTestCase):
     image_len = 8 * 1024 * 1024 * 1024 # GB
diff --git a/tests/qemu-iotests/030.out b/tests/qemu-iotests/030.out
index 594c16f..fa16b5c 100644
--- a/tests/qemu-iotests/030.out
+++ b/tests/qemu-iotests/030.out
@@ -1,5 +1,5 @@
-........
+.............
 ----------------------------------------------------------------------
-Ran 8 tests
+Ran 13 tests
 
 OK
diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
index a94ea75..3c60b2d 100644
--- a/tests/qemu-iotests/iotests.py
+++ b/tests/qemu-iotests/iotests.py
@@ -138,6 +138,13 @@ class QMPTestCase(unittest.TestCase):
                     self.fail('invalid index "%s" in path "%s" in "%s"' % (idx, path, str(d)))
         return d
 
+    def assert_qmp_absent(self, d, path):
+        try:
+            result = self.dictpath(d, path)
+        except AssertionError:
+            return
+        self.fail('path "%s" has value "%s"' % (path, str(result)))
+
     def assert_qmp(self, d, path, value):
         '''Assert that the value for a specific path in a QMP dict matches'''
         result = self.dictpath(d, path)
commit 4f45056841abced5d57485edf0ff1d2ffc042cb1
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Fri Sep 28 17:23:01 2012 +0200

    qemu-iotests: map underscore to dash in QMP argument names
    
    iotests.py provides a convenience function that uses Python keyword
    arguments to represent QMP command arguments.  However, almost all
    QMP commands use dashes for argument names (the sole exception is
    block_set_io_throttle), and dashes are not allowed in a keyword
    argument name.  Hence provide automatic conversion of underscores
    to dashes.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
index e05b1d6..a94ea75 100644
--- a/tests/qemu-iotests/iotests.py
+++ b/tests/qemu-iotests/iotests.py
@@ -19,6 +19,7 @@
 import os
 import re
 import subprocess
+import string
 import unittest
 import sys; sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'QMP'))
 import qmp
@@ -96,9 +97,14 @@ class VM(object):
             os.remove(self._qemu_log_path)
             self._popen = None
 
+    underscore_to_dash = string.maketrans('_', '-')
     def qmp(self, cmd, **args):
         '''Invoke a QMP command and return the result dict'''
-        return self._qmp.cmd(cmd, args=args)
+        qmp_args = dict()
+        for k in args.keys():
+            qmp_args[k.translate(self.underscore_to_dash)] = args[k]
+
+        return self._qmp.cmd(cmd, args=qmp_args)
 
     def get_qmp_events(self, wait=False):
         '''Poll for queued QMP events and return a list of dicts'''
commit 8f96b5be92fbd74798b97b1dc1ff5fbbe249ed11
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Fri Sep 28 17:23:00 2012 +0200

    blkdebug: process all set_state rules in the old state
    
    Currently it is impossible to write a blkdebug script that ping-pongs
    between two states, because the second set-state rule will use the
    state that is set in the first.  If you have
    
        [set-state]
        event = "..."
        state = "1"
        new_state = "2"
    
        [set-state]
        event = "..."
        state = "2"
        new_state = "1"
    
    for example the state will remain locked at 1.  This can be fixed
    by first processing all rules, and then setting the state.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/block/blkdebug.c b/block/blkdebug.c
index 59dcea0..1206d52 100644
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -28,6 +28,7 @@
 
 typedef struct BDRVBlkdebugState {
     int state;
+    int new_state;
     QLIST_HEAD(, BlkdebugRule) rules[BLKDBG_EVENT_MAX];
     QSIMPLEQ_HEAD(, BlkdebugRule) active_rules;
 } BDRVBlkdebugState;
@@ -403,12 +404,12 @@ static void blkdebug_close(BlockDriverState *bs)
 }
 
 static bool process_rule(BlockDriverState *bs, struct BlkdebugRule *rule,
-    int old_state, bool injected)
+    bool injected)
 {
     BDRVBlkdebugState *s = bs->opaque;
 
     /* Only process rules for the current state */
-    if (rule->state && rule->state != old_state) {
+    if (rule->state && rule->state != s->state) {
         return injected;
     }
 
@@ -423,7 +424,7 @@ static bool process_rule(BlockDriverState *bs, struct BlkdebugRule *rule,
         break;
 
     case ACTION_SET_STATE:
-        s->state = rule->options.set_state.new_state;
+        s->new_state = rule->options.set_state.new_state;
         break;
     }
     return injected;
@@ -433,15 +434,16 @@ static void blkdebug_debug_event(BlockDriverState *bs, BlkDebugEvent event)
 {
     BDRVBlkdebugState *s = bs->opaque;
     struct BlkdebugRule *rule;
-    int old_state = s->state;
     bool injected;
 
     assert((int)event >= 0 && event < BLKDBG_EVENT_MAX);
 
     injected = false;
+    s->new_state = s->state;
     QLIST_FOREACH(rule, &s->rules[event], next) {
-        injected = process_rule(bs, rule, old_state, injected);
+        injected = process_rule(bs, rule, injected);
     }
+    s->state = s->new_state;
 }
 
 static int64_t blkdebug_getlength(BlockDriverState *bs)
commit 1d809098aa9518cda41c2cf6e660d3d602614907
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Fri Sep 28 17:22:59 2012 +0200

    stream: add on-error argument
    
    This patch adds support for error management to streaming.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/block/stream.c b/block/stream.c
index 57e4be7..7926652 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -31,6 +31,7 @@ typedef struct StreamBlockJob {
     BlockJob common;
     RateLimit limit;
     BlockDriverState *base;
+    BlockdevOnError on_error;
     char backing_file_id[1024];
 } StreamBlockJob;
 
@@ -78,6 +79,7 @@ static void coroutine_fn stream_run(void *opaque)
     BlockDriverState *bs = s->common.bs;
     BlockDriverState *base = s->base;
     int64_t sector_num, end;
+    int error = 0;
     int ret = 0;
     int n = 0;
     void *buf;
@@ -142,7 +144,19 @@ wait:
             ret = stream_populate(bs, sector_num, n, buf);
         }
         if (ret < 0) {
-            break;
+            BlockErrorAction action =
+                block_job_error_action(&s->common, s->common.bs, s->on_error,
+                                       true, -ret);
+            if (action == BDRV_ACTION_STOP) {
+                n = 0;
+                continue;
+            }
+            if (error == 0) {
+                error = ret;
+            }
+            if (action == BDRV_ACTION_REPORT) {
+                break;
+            }
         }
         ret = 0;
 
@@ -154,6 +168,9 @@ wait:
         bdrv_disable_copy_on_read(bs);
     }
 
+    /* Do not remove the backing file if an error was there but ignored.  */
+    ret = error;
+
     if (!block_job_is_cancelled(&s->common) && sector_num == end && ret == 0) {
         const char *base_id = NULL, *base_fmt = NULL;
         if (base) {
@@ -189,11 +206,19 @@ static BlockJobType stream_job_type = {
 
 void stream_start(BlockDriverState *bs, BlockDriverState *base,
                   const char *base_id, int64_t speed,
+                  BlockdevOnError on_error,
                   BlockDriverCompletionFunc *cb,
                   void *opaque, Error **errp)
 {
     StreamBlockJob *s;
 
+    if ((on_error == BLOCKDEV_ON_ERROR_STOP ||
+         on_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
+        !bdrv_iostatus_is_enabled(bs)) {
+        error_set(errp, QERR_INVALID_PARAMETER, "on-error");
+        return;
+    }
+
     s = block_job_create(&stream_job_type, bs, speed, cb, opaque, errp);
     if (!s) {
         return;
@@ -204,6 +229,7 @@ void stream_start(BlockDriverState *bs, BlockDriverState *base,
         pstrcpy(s->backing_file_id, sizeof(s->backing_file_id), base_id);
     }
 
+    s->on_error = on_error;
     s->common.co = qemu_coroutine_create(stream_run);
     trace_stream_start(bs, base, s, s->common.co, opaque);
     qemu_coroutine_enter(s->common.co, s);
diff --git a/block_int.h b/block_int.h
index 785d43d..f4bae04 100644
--- a/block_int.h
+++ b/block_int.h
@@ -299,6 +299,7 @@ void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv,
  * @base_id: The file name that will be written to @bs as the new
  * backing file if the job completes.  Ignored if @base is %NULL.
  * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
+ * @on_error: The action to take upon error.
  * @cb: Completion function for the job.
  * @opaque: Opaque pointer value passed to @cb.
  * @errp: Error object.
@@ -310,7 +311,7 @@ void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv,
  * @base_id in the written image and to @base in the live BlockDriverState.
  */
 void stream_start(BlockDriverState *bs, BlockDriverState *base,
-                  const char *base_id, int64_t speed,
+                  const char *base_id, int64_t speed, BlockdevOnError on_error,
                   BlockDriverCompletionFunc *cb,
                   void *opaque, Error **errp);
 
diff --git a/blockdev.c b/blockdev.c
index d52a830..5f18dfa 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1096,13 +1096,18 @@ static void block_job_cb(void *opaque, int ret)
 }
 
 void qmp_block_stream(const char *device, bool has_base,
-                      const char *base, bool has_speed,
-                      int64_t speed, Error **errp)
+                      const char *base, bool has_speed, int64_t speed,
+                      bool has_on_error, BlockdevOnError on_error,
+                      Error **errp)
 {
     BlockDriverState *bs;
     BlockDriverState *base_bs = NULL;
     Error *local_err = NULL;
 
+    if (!has_on_error) {
+        on_error = BLOCKDEV_ON_ERROR_REPORT;
+    }
+
     bs = bdrv_find(device);
     if (!bs) {
         error_set(errp, QERR_DEVICE_NOT_FOUND, device);
@@ -1118,7 +1123,7 @@ void qmp_block_stream(const char *device, bool has_base,
     }
 
     stream_start(bs, base_bs, base, has_speed ? speed : 0,
-                 block_job_cb, bs, &local_err);
+                 on_error, block_job_cb, bs, &local_err);
     if (error_is_set(&local_err)) {
         error_propagate(errp, local_err);
         return;
diff --git a/hmp.c b/hmp.c
index 55601f7..df789b2 100644
--- a/hmp.c
+++ b/hmp.c
@@ -930,7 +930,8 @@ void hmp_block_stream(Monitor *mon, const QDict *qdict)
     int64_t speed = qdict_get_try_int(qdict, "speed", 0);
 
     qmp_block_stream(device, base != NULL, base,
-                     qdict_haskey(qdict, "speed"), speed, &error);
+                     qdict_haskey(qdict, "speed"), speed,
+                     BLOCKDEV_ON_ERROR_REPORT, true, &error);
 
     hmp_handle_error(mon, &error);
 }
diff --git a/qapi-schema.json b/qapi-schema.json
index 14e7a0d..768b4c7 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -1869,13 +1869,18 @@
 #
 # @speed:  #optional the maximum speed, in bytes per second
 #
+# @on-error: #optional the action to take on an error (default report).
+#            'stop' and 'enospc' can only be used if the block device
+#            supports io-status (see BlockInfo).  Since 1.3.
+#
 # Returns: Nothing on success
 #          If @device does not exist, DeviceNotFound
 #
 # Since: 1.1
 ##
-{ 'command': 'block-stream', 'data': { 'device': 'str', '*base': 'str',
-                                       '*speed': 'int' } }
+{ 'command': 'block-stream',
+  'data': { 'device': 'str', '*base': 'str', '*speed': 'int',
+            '*on-error': 'BlockdevOnError' } }
 
 ##
 # @block-job-set-speed:
diff --git a/qmp-commands.hx b/qmp-commands.hx
index 71d7c25..ea93b1d 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -787,7 +787,7 @@ EQMP
 
     {
         .name       = "block-stream",
-        .args_type  = "device:B,base:s?,speed:o?",
+        .args_type  = "device:B,base:s?,speed:o?,on-error:s?",
         .mhandler.cmd_new = qmp_marshal_input_block_stream,
     },
 
commit 32c81a4a6ecc3f50efc9c270a269e4d3d8a9fbd5
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Fri Sep 28 17:22:58 2012 +0200

    block: introduce block job error
    
    The following behaviors are possible:
    
    'report': The behavior is the same as in 1.1.  An I/O error,
    respectively during a read or a write, will complete the job immediately
    with an error code.
    
    'ignore': An I/O error, respectively during a read or a write, will be
    ignored.  For streaming, the job will complete with an error and the
    backing file will be left in place.  For mirroring, the sector will be
    marked again as dirty and re-examined later.
    
    'stop': The job will be paused and the job iostatus will be set to
    failed or nospace, while the VM will keep running.  This can only be
    specified if the block device has rerror=stop and werror=stop or enospc.
    
    'enospc': Behaves as 'stop' for ENOSPC errors, 'report' for others.
    
    In all cases, even for 'report', the I/O error is reported as a QMP
    event BLOCK_JOB_ERROR, with the same arguments as BLOCK_IO_ERROR.
    
    It is possible that while stopping the VM a BLOCK_IO_ERROR event will be
    reported and will clobber the event from BLOCK_JOB_ERROR, or vice versa.
    This is not really avoidable since stopping the VM completes all pending
    I/O requests.  In fact, it is already possible now that a series of
    BLOCK_IO_ERROR events are reported with rerror=stop, because vm_stop
    calls bdrv_drain_all and this can generate further errors.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/QMP/qmp-events.txt b/QMP/qmp-events.txt
index 4491020..987c575 100644
--- a/QMP/qmp-events.txt
+++ b/QMP/qmp-events.txt
@@ -96,6 +96,28 @@ Example:
                "speed": 0 },
      "timestamp": { "seconds": 1267061043, "microseconds": 959568 } }
 
+BLOCK_JOB_ERROR
+---------------
+
+Emitted when a block job encounters an error.
+
+Data:
+
+- "device": device name (json-string)
+- "operation": I/O operation (json-string, "read" or "write")
+- "action": action that has been taken, it's one of the following (json-string):
+    "ignore": error has been ignored, the job may fail later
+    "report": error will be reported and the job canceled
+    "stop": error caused job to be paused
+
+Example:
+
+{ "event": "BLOCK_JOB_ERROR",
+    "data": { "device": "ide0-hd1",
+              "operation": "write",
+              "action": "stop" },
+    "timestamp": { "seconds": 1265044230, "microseconds": 450486 } }
+
 DEVICE_TRAY_MOVED
 -----------------
 
diff --git a/block.c b/block.c
index 8b0ba67..c108a76 100644
--- a/block.c
+++ b/block.c
@@ -1387,8 +1387,9 @@ void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
     }
 }
 
-static void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv,
-                                      BlockErrorAction action, bool is_read)
+void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv,
+                               enum MonitorEvent ev,
+                               BlockErrorAction action, bool is_read)
 {
     QObject *data;
     const char *action_str;
@@ -1411,7 +1412,7 @@ static void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv,
                               bdrv->device_name,
                               action_str,
                               is_read ? "read" : "write");
-    monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data);
+    monitor_protocol_event(ev, data);
 
     qobject_decref(data);
 }
@@ -2513,7 +2514,7 @@ void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action,
                        bool is_read, int error)
 {
     assert(error >= 0);
-    bdrv_emit_qmp_error_event(bs, action, is_read);
+    bdrv_emit_qmp_error_event(bs, QEVENT_BLOCK_IO_ERROR, action, is_read);
     if (action == BDRV_ACTION_STOP) {
         vm_stop(RUN_STATE_IO_ERROR);
         bdrv_iostatus_set_err(bs, error);
diff --git a/block_int.h b/block_int.h
index 615aafc..785d43d 100644
--- a/block_int.h
+++ b/block_int.h
@@ -31,6 +31,7 @@
 #include "qemu-timer.h"
 #include "qapi-types.h"
 #include "qerror.h"
+#include "monitor.h"
 
 #define BLOCK_FLAG_ENCRYPT          1
 #define BLOCK_FLAG_COMPAT6          4
@@ -286,6 +287,9 @@ void bdrv_set_io_limits(BlockDriverState *bs,
 #ifdef _WIN32
 int is_windows_drive(const char *filename);
 #endif
+void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv,
+                               enum MonitorEvent ev,
+                               BlockErrorAction action, bool is_read);
 
 /**
  * stream_start:
diff --git a/blockjob.c b/blockjob.c
index 8219f73..f55f55a 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -112,6 +112,7 @@ bool block_job_is_paused(BlockJob *job)
 void block_job_resume(BlockJob *job)
 {
     job->paused = false;
+    block_job_iostatus_reset(job);
     if (job->co && !job->busy) {
         qemu_coroutine_enter(job->co, NULL);
     }
@@ -128,6 +129,11 @@ bool block_job_is_cancelled(BlockJob *job)
     return job->cancelled;
 }
 
+void block_job_iostatus_reset(BlockJob *job)
+{
+    job->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
+}
+
 struct BlockCancelData {
     BlockJob *job;
     BlockDriverCompletionFunc *cb;
@@ -189,12 +195,55 @@ void block_job_sleep_ns(BlockJob *job, QEMUClock *clock, int64_t ns)
 BlockJobInfo *block_job_query(BlockJob *job)
 {
     BlockJobInfo *info = g_new0(BlockJobInfo, 1);
-    info->type   = g_strdup(job->job_type->job_type);
-    info->device = g_strdup(bdrv_get_device_name(job->bs));
-    info->len    = job->len;
-    info->busy   = job->busy;
-    info->paused = job->paused;
-    info->offset = job->offset;
-    info->speed  = job->speed;
+    info->type      = g_strdup(job->job_type->job_type);
+    info->device    = g_strdup(bdrv_get_device_name(job->bs));
+    info->len       = job->len;
+    info->busy      = job->busy;
+    info->paused    = job->paused;
+    info->offset    = job->offset;
+    info->speed     = job->speed;
+    info->io_status = job->iostatus;
     return info;
 }
+
+static void block_job_iostatus_set_err(BlockJob *job, int error)
+{
+    if (job->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
+        job->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
+                                          BLOCK_DEVICE_IO_STATUS_FAILED;
+    }
+}
+
+
+BlockErrorAction block_job_error_action(BlockJob *job, BlockDriverState *bs,
+                                        BlockdevOnError on_err,
+                                        int is_read, int error)
+{
+    BlockErrorAction action;
+
+    switch (on_err) {
+    case BLOCKDEV_ON_ERROR_ENOSPC:
+        action = (error == ENOSPC) ? BDRV_ACTION_STOP : BDRV_ACTION_REPORT;
+        break;
+    case BLOCKDEV_ON_ERROR_STOP:
+        action = BDRV_ACTION_STOP;
+        break;
+    case BLOCKDEV_ON_ERROR_REPORT:
+        action = BDRV_ACTION_REPORT;
+        break;
+    case BLOCKDEV_ON_ERROR_IGNORE:
+        action = BDRV_ACTION_IGNORE;
+        break;
+    default:
+        abort();
+    }
+    bdrv_emit_qmp_error_event(job->bs, QEVENT_BLOCK_JOB_ERROR, action, is_read);
+    if (action == BDRV_ACTION_STOP) {
+        block_job_pause(job);
+        block_job_iostatus_set_err(job, error);
+        if (bs != job->bs) {
+            bdrv_iostatus_set_err(bs, error);
+        }
+    }
+    return action;
+}
diff --git a/blockjob.h b/blockjob.h
index ece5afa..930cc3c 100644
--- a/blockjob.h
+++ b/blockjob.h
@@ -82,6 +82,9 @@ struct BlockJob {
      */
     bool busy;
 
+    /** Status that is published by the query-block-jobs QMP API */
+    BlockDeviceIoStatus iostatus;
+
     /** Offset that is published by the query-block-jobs QMP API */
     int64_t offset;
 
@@ -215,4 +218,26 @@ bool block_job_is_paused(BlockJob *job);
  */
 int block_job_cancel_sync(BlockJob *job);
 
+/**
+ * block_job_iostatus_reset:
+ * @job: The job whose I/O status should be reset.
+ *
+ * Reset I/O status on @job.
+ */
+void block_job_iostatus_reset(BlockJob *job);
+
+/**
+ * block_job_error_action:
+ * @job: The job to signal an error for.
+ * @bs: The block device on which to set an I/O error.
+ * @on_err: The error action setting.
+ * @is_read: Whether the operation was a read.
+ * @error: The error that was reported.
+ *
+ * Report an I/O error for a block job and possibly stop the VM.  Return the
+ * action that was selected based on @on_err and @error.
+ */
+BlockErrorAction block_job_error_action(BlockJob *job, BlockDriverState *bs,
+                                        BlockdevOnError on_err,
+                                        int is_read, int error);
 #endif
diff --git a/monitor.c b/monitor.c
index 67064e2..d4bd5fe 100644
--- a/monitor.c
+++ b/monitor.c
@@ -450,6 +450,7 @@ static const char *monitor_event_names[] = {
     [QEVENT_SPICE_DISCONNECTED] = "SPICE_DISCONNECTED",
     [QEVENT_BLOCK_JOB_COMPLETED] = "BLOCK_JOB_COMPLETED",
     [QEVENT_BLOCK_JOB_CANCELLED] = "BLOCK_JOB_CANCELLED",
+    [QEVENT_BLOCK_JOB_ERROR] = "BLOCK_JOB_ERROR",
     [QEVENT_DEVICE_TRAY_MOVED] = "DEVICE_TRAY_MOVED",
     [QEVENT_SUSPEND] = "SUSPEND",
     [QEVENT_SUSPEND_DISK] = "SUSPEND_DISK",
diff --git a/monitor.h b/monitor.h
index 64c1561..43040af 100644
--- a/monitor.h
+++ b/monitor.h
@@ -38,6 +38,7 @@ typedef enum MonitorEvent {
     QEVENT_SPICE_DISCONNECTED,
     QEVENT_BLOCK_JOB_COMPLETED,
     QEVENT_BLOCK_JOB_CANCELLED,
+    QEVENT_BLOCK_JOB_ERROR,
     QEVENT_DEVICE_TRAY_MOVED,
     QEVENT_SUSPEND,
     QEVENT_SUSPEND_DISK,
diff --git a/qapi-schema.json b/qapi-schema.json
index a726413..14e7a0d 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -1131,11 +1131,14 @@
 #
 # @speed: the rate limit, bytes per second
 #
+# @io-status: the status of the job (since 1.3)
+#
 # Since: 1.1
 ##
 { 'type': 'BlockJobInfo',
   'data': {'type': 'str', 'device': 'str', 'len': 'int',
-           'offset': 'int', 'busy': 'bool', 'paused': 'bool', 'speed': 'int'} }
+           'offset': 'int', 'busy': 'bool', 'paused': 'bool', 'speed': 'int',
+           'io-status': 'BlockDeviceIoStatus'} }
 
 ##
 # @query-block-jobs:
@@ -1958,6 +1961,8 @@
 # operation.  It is an error to call this command if no operation is in
 # progress.  Resuming an already running job is not an error.
 #
+# This command also clears the error status of the job.
+#
 # @device: the device name
 #
 # Returns: Nothing on success
commit 3e1caa5f76a9104a0d574b0f28b3dafe986a8408
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Fri Sep 28 17:22:57 2012 +0200

    iostatus: reorganize io error code
    
    Move the common part of IDE/SCSI/virtio error handling to the block
    layer.  The new function bdrv_error_action subsumes all three of
    bdrv_emit_qmp_error_event, vm_stop, bdrv_iostatus_set_err.
    
    The same scheme will be used for errors in block jobs.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/block.c b/block.c
index 0bae046..8b0ba67 100644
--- a/block.c
+++ b/block.c
@@ -29,6 +29,7 @@
 #include "blockjob.h"
 #include "module.h"
 #include "qjson.h"
+#include "sysemu.h"
 #include "qemu-coroutine.h"
 #include "qmp-commands.h"
 #include "qemu-timer.h"
@@ -1386,8 +1387,8 @@ void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
     }
 }
 
-void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv,
-                               BlockErrorAction action, bool is_read)
+static void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv,
+                                      BlockErrorAction action, bool is_read)
 {
     QObject *data;
     const char *action_str;
@@ -2486,6 +2487,39 @@ BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read)
     return is_read ? bs->on_read_error : bs->on_write_error;
 }
 
+BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error)
+{
+    BlockdevOnError on_err = is_read ? bs->on_read_error : bs->on_write_error;
+
+    switch (on_err) {
+    case BLOCKDEV_ON_ERROR_ENOSPC:
+        return (error == ENOSPC) ? BDRV_ACTION_STOP : BDRV_ACTION_REPORT;
+    case BLOCKDEV_ON_ERROR_STOP:
+        return BDRV_ACTION_STOP;
+    case BLOCKDEV_ON_ERROR_REPORT:
+        return BDRV_ACTION_REPORT;
+    case BLOCKDEV_ON_ERROR_IGNORE:
+        return BDRV_ACTION_IGNORE;
+    default:
+        abort();
+    }
+}
+
+/* This is done by device models because, while the block layer knows
+ * about the error, it does not know whether an operation comes from
+ * the device or the block layer (from a job, for example).
+ */
+void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action,
+                       bool is_read, int error)
+{
+    assert(error >= 0);
+    bdrv_emit_qmp_error_event(bs, action, is_read);
+    if (action == BDRV_ACTION_STOP) {
+        vm_stop(RUN_STATE_IO_ERROR);
+        bdrv_iostatus_set_err(bs, error);
+    }
+}
+
 int bdrv_is_read_only(BlockDriverState *bs)
 {
     return bs->read_only;
@@ -4226,14 +4260,10 @@ void bdrv_iostatus_reset(BlockDriverState *bs)
     }
 }
 
-/* XXX: Today this is set by device models because it makes the implementation
-   quite simple. However, the block layer knows about the error, so it's
-   possible to implement this without device models being involved */
 void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
 {
-    if (bdrv_iostatus_is_enabled(bs) &&
-        bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
-        assert(error >= 0);
+    assert(bdrv_iostatus_is_enabled(bs));
+    if (bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
         bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
                                          BLOCK_DEVICE_IO_STATUS_FAILED;
     }
diff --git a/block.h b/block.h
index 47dd905..e2d89d7 100644
--- a/block.h
+++ b/block.h
@@ -108,8 +108,6 @@ void bdrv_iostatus_reset(BlockDriverState *bs);
 void bdrv_iostatus_disable(BlockDriverState *bs);
 bool bdrv_iostatus_is_enabled(const BlockDriverState *bs);
 void bdrv_iostatus_set_err(BlockDriverState *bs, int error);
-void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv,
-                               BlockErrorAction action, bool is_read);
 void bdrv_info_print(Monitor *mon, const QObject *data);
 void bdrv_info(Monitor *mon, QObject **ret_data);
 void bdrv_stats_print(Monitor *mon, const QObject *data);
@@ -282,6 +280,9 @@ int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
 void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
                        BlockdevOnError on_write_error);
 BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read);
+BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error);
+void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action,
+                       bool is_read, int error);
 int bdrv_is_read_only(BlockDriverState *bs);
 int bdrv_is_sg(BlockDriverState *bs);
 int bdrv_enable_write_cache(BlockDriverState *bs);
diff --git a/hw/ide/core.c b/hw/ide/core.c
index c03db4a..d683a8c 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -557,31 +557,21 @@ void ide_dma_error(IDEState *s)
 static int ide_handle_rw_error(IDEState *s, int error, int op)
 {
     bool is_read = (op & BM_STATUS_RETRY_READ) != 0;
-    BlockdevOnError action = bdrv_get_on_error(s->bs, is_read);
+    BlockErrorAction action = bdrv_get_error_action(s->bs, is_read, error);
 
-    if (action == BLOCKDEV_ON_ERROR_IGNORE) {
-        bdrv_emit_qmp_error_event(s->bs, BDRV_ACTION_IGNORE, is_read);
-        return 0;
-    }
-
-    if ((error == ENOSPC && action == BLOCKDEV_ON_ERROR_ENOSPC)
-            || action == BLOCKDEV_ON_ERROR_STOP) {
+    if (action == BDRV_ACTION_STOP) {
         s->bus->dma->ops->set_unit(s->bus->dma, s->unit);
         s->bus->error_status = op;
-        bdrv_emit_qmp_error_event(s->bs, BDRV_ACTION_STOP, is_read);
-        vm_stop(RUN_STATE_IO_ERROR);
-        bdrv_iostatus_set_err(s->bs, error);
-    } else {
+    } else if (action == BDRV_ACTION_REPORT) {
         if (op & BM_STATUS_DMA_RETRY) {
             dma_buf_commit(s);
             ide_dma_error(s);
         } else {
             ide_rw_error(s);
         }
-        bdrv_emit_qmp_error_event(s->bs, BDRV_ACTION_REPORT, is_read);
     }
-
-    return 1;
+    bdrv_error_action(s->bs, action, is_read, error);
+    return action != BDRV_ACTION_IGNORE;
 }
 
 void ide_dma_cb(void *opaque, int ret)
diff --git a/hw/scsi-disk.c b/hw/scsi-disk.c
index 2dd99a9..99bb02e 100644
--- a/hw/scsi-disk.c
+++ b/hw/scsi-disk.c
@@ -388,21 +388,9 @@ static int scsi_handle_rw_error(SCSIDiskReq *r, int error)
 {
     bool is_read = (r->req.cmd.xfer == SCSI_XFER_FROM_DEV);
     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
-    BlockdevOnError action = bdrv_get_on_error(s->qdev.conf.bs, is_read);
+    BlockErrorAction action = bdrv_get_error_action(s->qdev.conf.bs, is_read, error);
 
-    if (action == BLOCKDEV_ON_ERROR_IGNORE) {
-        bdrv_emit_qmp_error_event(s->qdev.conf.bs, BDRV_ACTION_IGNORE, is_read);
-        return 0;
-    }
-
-    if ((error == ENOSPC && action == BLOCKDEV_ON_ERROR_ENOSPC)
-            || action == BLOCKDEV_ON_ERROR_STOP) {
-
-        bdrv_emit_qmp_error_event(s->qdev.conf.bs, BDRV_ACTION_STOP, is_read);
-        vm_stop(RUN_STATE_IO_ERROR);
-        bdrv_iostatus_set_err(s->qdev.conf.bs, error);
-        scsi_req_retry(&r->req);
-    } else {
+    if (action == BDRV_ACTION_REPORT) {
         switch (error) {
         case ENOMEDIUM:
             scsi_check_condition(r, SENSE_CODE(NO_MEDIUM));
@@ -417,9 +405,12 @@ static int scsi_handle_rw_error(SCSIDiskReq *r, int error)
             scsi_check_condition(r, SENSE_CODE(IO_ERROR));
             break;
         }
-        bdrv_emit_qmp_error_event(s->qdev.conf.bs, BDRV_ACTION_REPORT, is_read);
     }
-    return 1;
+    bdrv_error_action(s->qdev.conf.bs, action, is_read, error);
+    if (action == BDRV_ACTION_STOP) {
+        scsi_req_retry(&r->req);
+    }
+    return action != BDRV_ACTION_IGNORE;
 }
 
 static void scsi_write_complete(void * opaque, int ret)
diff --git a/hw/virtio-blk.c b/hw/virtio-blk.c
index 1ac2483..e25cc96 100644
--- a/hw/virtio-blk.c
+++ b/hw/virtio-blk.c
@@ -66,29 +66,20 @@ static void virtio_blk_req_complete(VirtIOBlockReq *req, int status)
 static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, int error,
     bool is_read)
 {
-    BlockdevOnError action = bdrv_get_on_error(req->dev->bs, is_read);
+    BlockErrorAction action = bdrv_get_error_action(req->dev->bs, is_read, error);
     VirtIOBlock *s = req->dev;
 
-    if (action == BLOCKDEV_ON_ERROR_IGNORE) {
-        bdrv_emit_qmp_error_event(s->bs, BDRV_ACTION_IGNORE, is_read);
-        return 0;
-    }
-
-    if ((error == ENOSPC && action == BLOCKDEV_ON_ERROR_ENOSPC)
-            || action == BLOCKDEV_ON_ERROR_STOP) {
+    if (action == BDRV_ACTION_STOP) {
         req->next = s->rq;
         s->rq = req;
-        bdrv_emit_qmp_error_event(s->bs, BDRV_ACTION_STOP, is_read);
-        vm_stop(RUN_STATE_IO_ERROR);
-        bdrv_iostatus_set_err(s->bs, error);
-    } else {
+    } else if (action == BDRV_ACTION_REPORT) {
         virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR);
         bdrv_acct_done(s->bs, &req->acct);
         g_free(req);
-        bdrv_emit_qmp_error_event(s->bs, BDRV_ACTION_REPORT, is_read);
     }
 
-    return 1;
+    bdrv_error_action(s->bs, action, is_read, error);
+    return action != BDRV_ACTION_IGNORE;
 }
 
 static void virtio_blk_rw_complete(void *opaque, int ret)
diff --git a/qemu-tool.c b/qemu-tool.c
index 18205ba..f2f9813 100644
--- a/qemu-tool.c
+++ b/qemu-tool.c
@@ -19,6 +19,7 @@
 #include "qemu-log.h"
 #include "migration.h"
 #include "main-loop.h"
+#include "sysemu.h"
 #include "qemu_socket.h"
 #include "slirp/libslirp.h"
 
@@ -37,6 +38,11 @@ const char *qemu_get_vm_name(void)
 
 Monitor *cur_mon;
 
+void vm_stop(RunState state)
+{
+    abort();
+}
+
 int monitor_cur_is_qmp(void)
 {
     return 0;
commit 1ceee0d5cc841fc9ca8e72b81450b598ab307f14
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Fri Sep 28 17:22:56 2012 +0200

    iostatus: change is_read to a bool
    
    Do this while we are touching this part of the code, before introducing
    more uses of "int is_read".
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/block.c b/block.c
index 1c3ebd7..0bae046 100644
--- a/block.c
+++ b/block.c
@@ -1387,7 +1387,7 @@ void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
 }
 
 void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv,
-                               BlockErrorAction action, int is_read)
+                               BlockErrorAction action, bool is_read)
 {
     QObject *data;
     const char *action_str;
@@ -2481,7 +2481,7 @@ void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
     bs->on_write_error = on_write_error;
 }
 
-BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, int is_read)
+BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read)
 {
     return is_read ? bs->on_read_error : bs->on_write_error;
 }
diff --git a/block.h b/block.h
index ee81129..47dd905 100644
--- a/block.h
+++ b/block.h
@@ -109,7 +109,7 @@ void bdrv_iostatus_disable(BlockDriverState *bs);
 bool bdrv_iostatus_is_enabled(const BlockDriverState *bs);
 void bdrv_iostatus_set_err(BlockDriverState *bs, int error);
 void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv,
-                               BlockErrorAction action, int is_read);
+                               BlockErrorAction action, bool is_read);
 void bdrv_info_print(Monitor *mon, const QObject *data);
 void bdrv_info(Monitor *mon, QObject **ret_data);
 void bdrv_stats_print(Monitor *mon, const QObject *data);
@@ -281,7 +281,7 @@ int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
 
 void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
                        BlockdevOnError on_write_error);
-BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, int is_read);
+BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read);
 int bdrv_is_read_only(BlockDriverState *bs);
 int bdrv_is_sg(BlockDriverState *bs);
 int bdrv_enable_write_cache(BlockDriverState *bs);
diff --git a/blockdev.c b/blockdev.c
index 6330715..d52a830 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -238,7 +238,7 @@ static void drive_put_ref_bh_schedule(DriveInfo *dinfo)
     qemu_bh_schedule(s->bh);
 }
 
-static int parse_block_error_action(const char *buf, int is_read)
+static int parse_block_error_action(const char *buf, bool is_read)
 {
     if (!strcmp(buf, "ignore")) {
         return BLOCKDEV_ON_ERROR_IGNORE;
diff --git a/hw/ide/core.c b/hw/ide/core.c
index 2620e87..c03db4a 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -556,7 +556,7 @@ void ide_dma_error(IDEState *s)
 
 static int ide_handle_rw_error(IDEState *s, int error, int op)
 {
-    int is_read = (op & BM_STATUS_RETRY_READ);
+    bool is_read = (op & BM_STATUS_RETRY_READ) != 0;
     BlockdevOnError action = bdrv_get_on_error(s->bs, is_read);
 
     if (action == BLOCKDEV_ON_ERROR_IGNORE) {
diff --git a/hw/ide/pci.c b/hw/ide/pci.c
index 88c0942..644533f 100644
--- a/hw/ide/pci.c
+++ b/hw/ide/pci.c
@@ -188,7 +188,7 @@ static void bmdma_restart_bh(void *opaque)
 {
     BMDMAState *bm = opaque;
     IDEBus *bus = bm->bus;
-    int is_read;
+    bool is_read;
     int error_status;
 
     qemu_bh_delete(bm->bh);
@@ -198,7 +198,7 @@ static void bmdma_restart_bh(void *opaque)
         return;
     }
 
-    is_read = !!(bus->error_status & BM_STATUS_RETRY_READ);
+    is_read = (bus->error_status & BM_STATUS_RETRY_READ) != 0;
 
     /* The error status must be cleared before resubmitting the request: The
      * request may fail again, and this case can only be distinguished if the
diff --git a/hw/scsi-disk.c b/hw/scsi-disk.c
index c295326..2dd99a9 100644
--- a/hw/scsi-disk.c
+++ b/hw/scsi-disk.c
@@ -386,7 +386,7 @@ static void scsi_read_data(SCSIRequest *req)
  */
 static int scsi_handle_rw_error(SCSIDiskReq *r, int error)
 {
-    int is_read = (r->req.cmd.xfer == SCSI_XFER_FROM_DEV);
+    bool is_read = (r->req.cmd.xfer == SCSI_XFER_FROM_DEV);
     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
     BlockdevOnError action = bdrv_get_on_error(s->qdev.conf.bs, is_read);
 
diff --git a/hw/virtio-blk.c b/hw/virtio-blk.c
index f178fa8..1ac2483 100644
--- a/hw/virtio-blk.c
+++ b/hw/virtio-blk.c
@@ -64,7 +64,7 @@ static void virtio_blk_req_complete(VirtIOBlockReq *req, int status)
 }
 
 static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, int error,
-    int is_read)
+    bool is_read)
 {
     BlockdevOnError action = bdrv_get_on_error(req->dev->bs, is_read);
     VirtIOBlock *s = req->dev;
@@ -98,7 +98,7 @@ static void virtio_blk_rw_complete(void *opaque, int ret)
     trace_virtio_blk_rw_complete(req, ret);
 
     if (ret) {
-        int is_read = !(ldl_p(&req->out->type) & VIRTIO_BLK_T_OUT);
+        bool is_read = !(ldl_p(&req->out->type) & VIRTIO_BLK_T_OUT);
         if (virtio_blk_handle_rw_error(req, -ret, is_read))
             return;
     }
commit 92aa5c6d77ac29574c1717bcf57827fa1e586f31
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Fri Sep 28 17:22:55 2012 +0200

    iostatus: move BlockdevOnError declaration to QAPI
    
    This will let block-stream reuse the enum.  Places that used the enums
    are renamed accordingly.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/block.c b/block.c
index 7b45082..1c3ebd7 100644
--- a/block.c
+++ b/block.c
@@ -4209,9 +4209,9 @@ void bdrv_iostatus_enable(BlockDriverState *bs)
 bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
 {
     return (bs->iostatus_enabled &&
-           (bs->on_write_error == BLOCK_ERR_STOP_ENOSPC ||
-            bs->on_write_error == BLOCK_ERR_STOP_ANY    ||
-            bs->on_read_error == BLOCK_ERR_STOP_ANY));
+           (bs->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
+            bs->on_write_error == BLOCKDEV_ON_ERROR_STOP   ||
+            bs->on_read_error == BLOCKDEV_ON_ERROR_STOP));
 }
 
 void bdrv_iostatus_disable(BlockDriverState *bs)
diff --git a/block.h b/block.h
index 038621f..ee81129 100644
--- a/block.h
+++ b/block.h
@@ -91,11 +91,6 @@ typedef struct BlockDevOps {
 #define BDRV_SECTOR_MASK   ~(BDRV_SECTOR_SIZE - 1)
 
 typedef enum {
-    BLOCK_ERR_REPORT, BLOCK_ERR_IGNORE, BLOCK_ERR_STOP_ENOSPC,
-    BLOCK_ERR_STOP_ANY
-} BlockdevOnError;
-
-typedef enum {
     BDRV_ACTION_REPORT, BDRV_ACTION_IGNORE, BDRV_ACTION_STOP
 } BlockErrorAction;
 
diff --git a/block/commit.c b/block/commit.c
index cabb470..733c914 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -34,7 +34,7 @@ typedef struct CommitBlockJob {
     BlockDriverState *active;
     BlockDriverState *top;
     BlockDriverState *base;
-    BlockErrorAction on_error;
+    BlockdevOnError on_error;
     int base_flags;
     int orig_overlay_flags;
 } CommitBlockJob;
@@ -126,9 +126,9 @@ wait:
             bytes_written += n * BDRV_SECTOR_SIZE;
         }
         if (ret < 0) {
-            if (s->on_error == BLOCK_ERR_STOP_ANY    ||
-                s->on_error == BLOCK_ERR_REPORT      ||
-                (s->on_error == BLOCK_ERR_STOP_ENOSPC && ret == -ENOSPC)) {
+            if (s->on_error == BLOCKDEV_ON_ERROR_STOP ||
+                s->on_error == BLOCKDEV_ON_ERROR_REPORT||
+                (s->on_error == BLOCKDEV_ON_ERROR_ENOSPC && ret == -ENOSPC)) {
                 goto exit_free_buf;
             } else {
                 n = 0;
@@ -182,7 +182,7 @@ static BlockJobType commit_job_type = {
 
 void commit_start(BlockDriverState *bs, BlockDriverState *base,
                   BlockDriverState *top, int64_t speed,
-                  BlockErrorAction on_error, BlockDriverCompletionFunc *cb,
+                  BlockdevOnError on_error, BlockDriverCompletionFunc *cb,
                   void *opaque, Error **errp)
 {
     CommitBlockJob *s;
@@ -192,8 +192,8 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base,
     BlockDriverState *overlay_bs;
     Error *local_err = NULL;
 
-    if ((on_error == BLOCK_ERR_STOP_ANY ||
-         on_error == BLOCK_ERR_STOP_ENOSPC) &&
+    if ((on_error == BLOCKDEV_ON_ERROR_STOP ||
+         on_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
         !bdrv_iostatus_is_enabled(bs)) {
         error_set(errp, QERR_INVALID_PARAMETER_COMBINATION);
         return;
diff --git a/block_int.h b/block_int.h
index b98c770..615aafc 100644
--- a/block_int.h
+++ b/block_int.h
@@ -323,7 +323,7 @@ void stream_start(BlockDriverState *bs, BlockDriverState *base,
  */
 void commit_start(BlockDriverState *bs, BlockDriverState *base,
                  BlockDriverState *top, int64_t speed,
-                 BlockErrorAction on_error, BlockDriverCompletionFunc *cb,
+                 BlockdevOnError on_error, BlockDriverCompletionFunc *cb,
                  void *opaque, Error **errp);
 
 #endif /* BLOCK_INT_H */
diff --git a/blockdev.c b/blockdev.c
index f097e57..6330715 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -241,13 +241,13 @@ static void drive_put_ref_bh_schedule(DriveInfo *dinfo)
 static int parse_block_error_action(const char *buf, int is_read)
 {
     if (!strcmp(buf, "ignore")) {
-        return BLOCK_ERR_IGNORE;
+        return BLOCKDEV_ON_ERROR_IGNORE;
     } else if (!is_read && !strcmp(buf, "enospc")) {
-        return BLOCK_ERR_STOP_ENOSPC;
+        return BLOCKDEV_ON_ERROR_ENOSPC;
     } else if (!strcmp(buf, "stop")) {
-        return BLOCK_ERR_STOP_ANY;
+        return BLOCKDEV_ON_ERROR_STOP;
     } else if (!strcmp(buf, "report")) {
-        return BLOCK_ERR_REPORT;
+        return BLOCKDEV_ON_ERROR_REPORT;
     } else {
         error_report("'%s' invalid %s error action",
                      buf, is_read ? "read" : "write");
@@ -433,7 +433,7 @@ DriveInfo *drive_init(QemuOpts *opts, int default_to_scsi)
         return NULL;
     }
 
-    on_write_error = BLOCK_ERR_STOP_ENOSPC;
+    on_write_error = BLOCKDEV_ON_ERROR_ENOSPC;
     if ((buf = qemu_opt_get(opts, "werror")) != NULL) {
         if (type != IF_IDE && type != IF_SCSI && type != IF_VIRTIO && type != IF_NONE) {
             error_report("werror is not supported by this bus type");
@@ -446,7 +446,7 @@ DriveInfo *drive_init(QemuOpts *opts, int default_to_scsi)
         }
     }
 
-    on_read_error = BLOCK_ERR_REPORT;
+    on_read_error = BLOCKDEV_ON_ERROR_REPORT;
     if ((buf = qemu_opt_get(opts, "rerror")) != NULL) {
         if (type != IF_IDE && type != IF_VIRTIO && type != IF_SCSI && type != IF_NONE) {
             error_report("rerror is not supported by this bus type");
@@ -1143,7 +1143,7 @@ void qmp_block_commit(const char *device,
     /* This will be part of the QMP command, if/when the
      * BlockdevOnError change for blkmirror makes it in
      */
-    BlockErrorAction on_error = BLOCK_ERR_REPORT;
+    BlockdevOnError on_error = BLOCKDEV_ON_ERROR_REPORT;
 
     /* drain all i/o before commits */
     bdrv_drain_all();
diff --git a/hw/fdc.c b/hw/fdc.c
index 08830c1..43b0f20 100644
--- a/hw/fdc.c
+++ b/hw/fdc.c
@@ -1994,11 +1994,11 @@ static int fdctrl_connect_drives(FDCtrl *fdctrl)
         drive->fdctrl = fdctrl;
 
         if (drive->bs) {
-            if (bdrv_get_on_error(drive->bs, 0) != BLOCK_ERR_STOP_ENOSPC) {
+            if (bdrv_get_on_error(drive->bs, 0) != BLOCKDEV_ON_ERROR_ENOSPC) {
                 error_report("fdc doesn't support drive option werror");
                 return -1;
             }
-            if (bdrv_get_on_error(drive->bs, 1) != BLOCK_ERR_REPORT) {
+            if (bdrv_get_on_error(drive->bs, 1) != BLOCKDEV_ON_ERROR_REPORT) {
                 error_report("fdc doesn't support drive option rerror");
                 return -1;
             }
diff --git a/hw/ide/core.c b/hw/ide/core.c
index 57b9fa4..2620e87 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -559,13 +559,13 @@ static int ide_handle_rw_error(IDEState *s, int error, int op)
     int is_read = (op & BM_STATUS_RETRY_READ);
     BlockdevOnError action = bdrv_get_on_error(s->bs, is_read);
 
-    if (action == BLOCK_ERR_IGNORE) {
+    if (action == BLOCKDEV_ON_ERROR_IGNORE) {
         bdrv_emit_qmp_error_event(s->bs, BDRV_ACTION_IGNORE, is_read);
         return 0;
     }
 
-    if ((error == ENOSPC && action == BLOCK_ERR_STOP_ENOSPC)
-            || action == BLOCK_ERR_STOP_ANY) {
+    if ((error == ENOSPC && action == BLOCKDEV_ON_ERROR_ENOSPC)
+            || action == BLOCKDEV_ON_ERROR_STOP) {
         s->bus->dma->ops->set_unit(s->bus->dma, s->unit);
         s->bus->error_status = op;
         bdrv_emit_qmp_error_event(s->bs, BDRV_ACTION_STOP, is_read);
diff --git a/hw/scsi-disk.c b/hw/scsi-disk.c
index fef83a3..c295326 100644
--- a/hw/scsi-disk.c
+++ b/hw/scsi-disk.c
@@ -390,13 +390,13 @@ static int scsi_handle_rw_error(SCSIDiskReq *r, int error)
     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
     BlockdevOnError action = bdrv_get_on_error(s->qdev.conf.bs, is_read);
 
-    if (action == BLOCK_ERR_IGNORE) {
+    if (action == BLOCKDEV_ON_ERROR_IGNORE) {
         bdrv_emit_qmp_error_event(s->qdev.conf.bs, BDRV_ACTION_IGNORE, is_read);
         return 0;
     }
 
-    if ((error == ENOSPC && action == BLOCK_ERR_STOP_ENOSPC)
-            || action == BLOCK_ERR_STOP_ANY) {
+    if ((error == ENOSPC && action == BLOCKDEV_ON_ERROR_ENOSPC)
+            || action == BLOCKDEV_ON_ERROR_STOP) {
 
         bdrv_emit_qmp_error_event(s->qdev.conf.bs, BDRV_ACTION_STOP, is_read);
         vm_stop(RUN_STATE_IO_ERROR);
diff --git a/hw/scsi-generic.c b/hw/scsi-generic.c
index a5eb663..d904534 100644
--- a/hw/scsi-generic.c
+++ b/hw/scsi-generic.c
@@ -400,11 +400,11 @@ static int scsi_generic_initfn(SCSIDevice *s)
         return -1;
     }
 
-    if (bdrv_get_on_error(s->conf.bs, 0) != BLOCK_ERR_STOP_ENOSPC) {
+    if (bdrv_get_on_error(s->conf.bs, 0) != BLOCKDEV_ON_ERROR_ENOSPC) {
         error_report("Device doesn't support drive option werror");
         return -1;
     }
-    if (bdrv_get_on_error(s->conf.bs, 1) != BLOCK_ERR_REPORT) {
+    if (bdrv_get_on_error(s->conf.bs, 1) != BLOCKDEV_ON_ERROR_REPORT) {
         error_report("Device doesn't support drive option rerror");
         return -1;
     }
diff --git a/hw/virtio-blk.c b/hw/virtio-blk.c
index 01e537d..f178fa8 100644
--- a/hw/virtio-blk.c
+++ b/hw/virtio-blk.c
@@ -69,13 +69,13 @@ static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, int error,
     BlockdevOnError action = bdrv_get_on_error(req->dev->bs, is_read);
     VirtIOBlock *s = req->dev;
 
-    if (action == BLOCK_ERR_IGNORE) {
+    if (action == BLOCKDEV_ON_ERROR_IGNORE) {
         bdrv_emit_qmp_error_event(s->bs, BDRV_ACTION_IGNORE, is_read);
         return 0;
     }
 
-    if ((error == ENOSPC && action == BLOCK_ERR_STOP_ENOSPC)
-            || action == BLOCK_ERR_STOP_ANY) {
+    if ((error == ENOSPC && action == BLOCKDEV_ON_ERROR_ENOSPC)
+            || action == BLOCKDEV_ON_ERROR_STOP) {
         req->next = s->rq;
         s->rq = req;
         bdrv_emit_qmp_error_event(s->bs, BDRV_ACTION_STOP, is_read);
diff --git a/qapi-schema.json b/qapi-schema.json
index 0f2b1a0..a726413 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -1088,6 +1088,29 @@
 { 'command': 'query-pci', 'returns': ['PciInfo'] }
 
 ##
+# @BlockdevOnError:
+#
+# An enumeration of possible behaviors for errors on I/O operations.
+# The exact meaning depends on whether the I/O was initiated by a guest
+# or by a block job
+#
+# @report: for guest operations, report the error to the guest;
+#          for jobs, cancel the job
+#
+# @ignore: ignore the error, only report a QMP event (BLOCK_IO_ERROR
+#          or BLOCK_JOB_ERROR)
+#
+# @enospc: same as @stop on ENOSPC, same as @report otherwise.
+#
+# @stop: for guest operations, stop the virtual machine;
+#        for jobs, pause the job
+#
+# Since: 1.3
+##
+{ 'enum': 'BlockdevOnError',
+  'data': ['report', 'ignore', 'enospc', 'stop'] }
+
+##
 # @BlockJobInfo:
 #
 # Information about a long-running block device operation.
commit ff06f5f351c3b19d5cdcb8bcb9f9cc9a01cac066
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Fri Sep 28 17:22:54 2012 +0200

    iostatus: rename BlockErrorAction, BlockQMPEventAction
    
    We want to remove knowledge of BLOCK_ERR_STOP_ENOSPC from drivers;
    drivers should only be told whether to stop/report/ignore the error.
    On the other hand, we want to keep using the nicer BlockErrorAction
    name in the drivers.  So rename the enums, while leaving aside the
    names of the enum values for now.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/block.c b/block.c
index 8202f27..7b45082 100644
--- a/block.c
+++ b/block.c
@@ -1387,7 +1387,7 @@ void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
 }
 
 void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv,
-                               BlockQMPEventAction action, int is_read)
+                               BlockErrorAction action, int is_read)
 {
     QObject *data;
     const char *action_str;
@@ -2474,14 +2474,14 @@ void bdrv_set_io_limits(BlockDriverState *bs,
     bs->io_limits_enabled = bdrv_io_limits_enabled(bs);
 }
 
-void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error,
-                       BlockErrorAction on_write_error)
+void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
+                       BlockdevOnError on_write_error)
 {
     bs->on_read_error = on_read_error;
     bs->on_write_error = on_write_error;
 }
 
-BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read)
+BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, int is_read)
 {
     return is_read ? bs->on_read_error : bs->on_write_error;
 }
diff --git a/block.h b/block.h
index bd002d5..038621f 100644
--- a/block.h
+++ b/block.h
@@ -93,11 +93,11 @@ typedef struct BlockDevOps {
 typedef enum {
     BLOCK_ERR_REPORT, BLOCK_ERR_IGNORE, BLOCK_ERR_STOP_ENOSPC,
     BLOCK_ERR_STOP_ANY
-} BlockErrorAction;
+} BlockdevOnError;
 
 typedef enum {
     BDRV_ACTION_REPORT, BDRV_ACTION_IGNORE, BDRV_ACTION_STOP
-} BlockQMPEventAction;
+} BlockErrorAction;
 
 typedef QSIMPLEQ_HEAD(BlockReopenQueue, BlockReopenQueueEntry) BlockReopenQueue;
 
@@ -114,7 +114,7 @@ void bdrv_iostatus_disable(BlockDriverState *bs);
 bool bdrv_iostatus_is_enabled(const BlockDriverState *bs);
 void bdrv_iostatus_set_err(BlockDriverState *bs, int error);
 void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv,
-                               BlockQMPEventAction action, int is_read);
+                               BlockErrorAction action, int is_read);
 void bdrv_info_print(Monitor *mon, const QObject *data);
 void bdrv_info(Monitor *mon, QObject **ret_data);
 void bdrv_stats_print(Monitor *mon, const QObject *data);
@@ -284,9 +284,9 @@ int bdrv_has_zero_init(BlockDriverState *bs);
 int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
                       int *pnum);
 
-void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error,
-                       BlockErrorAction on_write_error);
-BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read);
+void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
+                       BlockdevOnError on_write_error);
+BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, int is_read);
 int bdrv_is_read_only(BlockDriverState *bs);
 int bdrv_is_sg(BlockDriverState *bs);
 int bdrv_enable_write_cache(BlockDriverState *bs);
diff --git a/block_int.h b/block_int.h
index 61dc73b..b98c770 100644
--- a/block_int.h
+++ b/block_int.h
@@ -262,7 +262,7 @@ struct BlockDriverState {
 
     /* NOTE: the following infos are only hints for real hardware
        drivers. They are not used by the block driver */
-    BlockErrorAction on_read_error, on_write_error;
+    BlockdevOnError on_read_error, on_write_error;
     bool iostatus_enabled;
     BlockDeviceIoStatus iostatus;
     char device_name[32];
diff --git a/hw/ide/core.c b/hw/ide/core.c
index d6fb69c..57b9fa4 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -557,7 +557,7 @@ void ide_dma_error(IDEState *s)
 static int ide_handle_rw_error(IDEState *s, int error, int op)
 {
     int is_read = (op & BM_STATUS_RETRY_READ);
-    BlockErrorAction action = bdrv_get_on_error(s->bs, is_read);
+    BlockdevOnError action = bdrv_get_on_error(s->bs, is_read);
 
     if (action == BLOCK_ERR_IGNORE) {
         bdrv_emit_qmp_error_event(s->bs, BDRV_ACTION_IGNORE, is_read);
diff --git a/hw/scsi-disk.c b/hw/scsi-disk.c
index 95e9158..fef83a3 100644
--- a/hw/scsi-disk.c
+++ b/hw/scsi-disk.c
@@ -388,7 +388,7 @@ static int scsi_handle_rw_error(SCSIDiskReq *r, int error)
 {
     int is_read = (r->req.cmd.xfer == SCSI_XFER_FROM_DEV);
     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
-    BlockErrorAction action = bdrv_get_on_error(s->qdev.conf.bs, is_read);
+    BlockdevOnError action = bdrv_get_on_error(s->qdev.conf.bs, is_read);
 
     if (action == BLOCK_ERR_IGNORE) {
         bdrv_emit_qmp_error_event(s->qdev.conf.bs, BDRV_ACTION_IGNORE, is_read);
diff --git a/hw/virtio-blk.c b/hw/virtio-blk.c
index 6f6d172..01e537d 100644
--- a/hw/virtio-blk.c
+++ b/hw/virtio-blk.c
@@ -66,7 +66,7 @@ static void virtio_blk_req_complete(VirtIOBlockReq *req, int status)
 static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, int error,
     int is_read)
 {
-    BlockErrorAction action = bdrv_get_on_error(req->dev->bs, is_read);
+    BlockdevOnError action = bdrv_get_on_error(req->dev->bs, is_read);
     VirtIOBlock *s = req->dev;
 
     if (action == BLOCK_ERR_IGNORE) {
commit 0c81734765c9af1705f8e531b9431d63ee8ffd3d
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Fri Sep 28 17:22:52 2012 +0200

    qemu-iotests: add test for pausing a streaming operation
    
    These check that a paused streaming job does not advance its offset.
    
    Sometimes the new test fails; the map is different between the source
    and the destination of the streaming because qemu-io does not always
    pack adjacent clusters that have the same allocated/unallocated state.
    However, this also happens with the existing test_stream testcase, and
    is better fixed in qemu-io.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/tests/qemu-iotests/030 b/tests/qemu-iotests/030
index 55b16f8..dfacdf1 100755
--- a/tests/qemu-iotests/030
+++ b/tests/qemu-iotests/030
@@ -18,6 +18,7 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
 
+import time
 import os
 import iotests
 from iotests import qemu_img, qemu_io
@@ -98,6 +99,43 @@ class TestSingleDrive(ImageStreamingTestCase):
                          qemu_io('-c', 'map', test_img),
                          'image file map does not match backing file after streaming')
 
+    def test_stream_pause(self):
+        self.assert_no_active_streams()
+
+        result = self.vm.qmp('block-stream', device='drive0')
+        self.assert_qmp(result, 'return', {})
+
+        result = self.vm.qmp('block-job-pause', device='drive0')
+        self.assert_qmp(result, 'return', {})
+
+        time.sleep(1)
+        result = self.vm.qmp('query-block-jobs')
+        offset = self.dictpath(result, 'return[0]/offset')
+
+        time.sleep(1)
+        result = self.vm.qmp('query-block-jobs')
+        self.assert_qmp(result, 'return[0]/offset', offset)
+
+        result = self.vm.qmp('block-job-resume', device='drive0')
+        self.assert_qmp(result, 'return', {})
+
+        completed = False
+        while not completed:
+            for event in self.vm.get_qmp_events(wait=True):
+                if event['event'] == 'BLOCK_JOB_COMPLETED':
+                    self.assert_qmp(event, 'data/type', 'stream')
+                    self.assert_qmp(event, 'data/device', 'drive0')
+                    self.assert_qmp(event, 'data/offset', self.image_len)
+                    self.assert_qmp(event, 'data/len', self.image_len)
+                    completed = True
+
+        self.assert_no_active_streams()
+        self.vm.shutdown()
+
+        self.assertEqual(qemu_io('-c', 'map', backing_img),
+                         qemu_io('-c', 'map', test_img),
+                         'image file map does not match backing file after streaming')
+
     def test_stream_partial(self):
         self.assert_no_active_streams()
 
@@ -173,8 +211,6 @@ class TestStreamStop(ImageStreamingTestCase):
         os.remove(backing_img)
 
     def test_stream_stop(self):
-        import time
-
         self.assert_no_active_streams()
 
         result = self.vm.qmp('block-stream', device='drive0')
diff --git a/tests/qemu-iotests/030.out b/tests/qemu-iotests/030.out
index 2f7d390..594c16f 100644
--- a/tests/qemu-iotests/030.out
+++ b/tests/qemu-iotests/030.out
@@ -1,5 +1,5 @@
-.......
+........
 ----------------------------------------------------------------------
-Ran 7 tests
+Ran 8 tests
 
 OK
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index 4b54fa6..66d2ba9 100644
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -36,7 +36,7 @@
 027 rw auto quick
 028 rw backing auto
 029 rw auto quick
-030 rw auto
+030 rw auto backing
 031 rw auto quick
 032 rw auto
 033 rw auto
commit 6e37fb811ac86739e5ed30dba3a8e4848bd21b56
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Fri Sep 28 17:22:51 2012 +0200

    qmp: add block-job-pause and block-job-resume
    
    Add QMP commands matching the functionality.
    
    Paused jobs cannot be canceled without first resuming them.  This
    ensures that I/O errors are never missed by management.  However, an
    optional force argument can be specified to allow that.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/blockdev.c b/blockdev.c
index 612dd71..f097e57 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1213,15 +1213,20 @@ void qmp_block_job_set_speed(const char *device, int64_t speed, Error **errp)
     block_job_set_speed(job, speed, errp);
 }
 
-void qmp_block_job_cancel(const char *device, Error **errp)
+void qmp_block_job_cancel(const char *device,
+                          bool has_force, bool force, Error **errp)
 {
     BlockJob *job = find_block_job(device);
 
+    if (!has_force) {
+        force = false;
+    }
+
     if (!job) {
         error_set(errp, QERR_BLOCK_JOB_NOT_ACTIVE, device);
         return;
     }
-    if (job->paused) {
+    if (job->paused && !force) {
         error_set(errp, QERR_BLOCK_JOB_PAUSED, device);
         return;
     }
@@ -1230,6 +1235,32 @@ void qmp_block_job_cancel(const char *device, Error **errp)
     block_job_cancel(job);
 }
 
+void qmp_block_job_pause(const char *device, Error **errp)
+{
+    BlockJob *job = find_block_job(device);
+
+    if (!job) {
+        error_set(errp, QERR_BLOCK_JOB_NOT_ACTIVE, device);
+        return;
+    }
+
+    trace_qmp_block_job_pause(job);
+    block_job_pause(job);
+}
+
+void qmp_block_job_resume(const char *device, Error **errp)
+{
+    BlockJob *job = find_block_job(device);
+
+    if (!job) {
+        error_set(errp, QERR_BLOCK_JOB_NOT_ACTIVE, device);
+        return;
+    }
+
+    trace_qmp_block_job_resume(job);
+    block_job_resume(job);
+}
+
 static void do_qmp_query_block_jobs_one(void *opaque, BlockDriverState *bs)
 {
     BlockJobInfoList **prev = opaque;
diff --git a/hmp-commands.hx b/hmp-commands.hx
index ed67e99..27d90a2 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -99,9 +99,10 @@ ETEXI
 
     {
         .name       = "block_job_cancel",
-        .args_type  = "device:B",
-        .params     = "device",
-        .help       = "stop an active background block operation",
+        .args_type  = "force:-f,device:B",
+        .params     = "[-f] device",
+        .help       = "stop an active background block operation (use -f"
+                      "\n\t\t\t if the operation is currently paused)",
         .mhandler.cmd = hmp_block_job_cancel,
     },
 
@@ -112,6 +113,34 @@ Stop an active block streaming operation.
 ETEXI
 
     {
+        .name       = "block_job_pause",
+        .args_type  = "device:B",
+        .params     = "device",
+        .help       = "pause an active background block operation",
+        .mhandler.cmd = hmp_block_job_pause,
+    },
+
+STEXI
+ at item block_job_pause
+ at findex block_job_pause
+Pause an active block streaming operation.
+ETEXI
+
+    {
+        .name       = "block_job_resume",
+        .args_type  = "device:B",
+        .params     = "device",
+        .help       = "resume a paused background block operation",
+        .mhandler.cmd = hmp_block_job_resume,
+    },
+
+STEXI
+ at item block_job_resume
+ at findex block_job_resume
+Resume a paused block streaming operation.
+ETEXI
+
+    {
         .name       = "eject",
         .args_type  = "force:-f,device:B",
         .params     = "[-f] device",
diff --git a/hmp.c b/hmp.c
index ba6fbd3..55601f7 100644
--- a/hmp.c
+++ b/hmp.c
@@ -950,8 +950,29 @@ void hmp_block_job_cancel(Monitor *mon, const QDict *qdict)
 {
     Error *error = NULL;
     const char *device = qdict_get_str(qdict, "device");
+    bool force = qdict_get_try_bool(qdict, "force", 0);
 
-    qmp_block_job_cancel(device, &error);
+    qmp_block_job_cancel(device, true, force, &error);
+
+    hmp_handle_error(mon, &error);
+}
+
+void hmp_block_job_pause(Monitor *mon, const QDict *qdict)
+{
+    Error *error = NULL;
+    const char *device = qdict_get_str(qdict, "device");
+
+    qmp_block_job_pause(device, &error);
+
+    hmp_handle_error(mon, &error);
+}
+
+void hmp_block_job_resume(Monitor *mon, const QDict *qdict)
+{
+    Error *error = NULL;
+    const char *device = qdict_get_str(qdict, "device");
+
+    qmp_block_job_resume(device, &error);
 
     hmp_handle_error(mon, &error);
 }
diff --git a/hmp.h b/hmp.h
index 48b9c59..71ea384 100644
--- a/hmp.h
+++ b/hmp.h
@@ -64,6 +64,8 @@ void hmp_block_set_io_throttle(Monitor *mon, const QDict *qdict);
 void hmp_block_stream(Monitor *mon, const QDict *qdict);
 void hmp_block_job_set_speed(Monitor *mon, const QDict *qdict);
 void hmp_block_job_cancel(Monitor *mon, const QDict *qdict);
+void hmp_block_job_pause(Monitor *mon, const QDict *qdict);
+void hmp_block_job_resume(Monitor *mon, const QDict *qdict);
 void hmp_migrate(Monitor *mon, const QDict *qdict);
 void hmp_device_del(Monitor *mon, const QDict *qdict);
 void hmp_dump_guest_memory(Monitor *mon, const QDict *qdict);
diff --git a/qapi-schema.json b/qapi-schema.json
index 86a6c7f..0f2b1a0 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -1893,12 +1893,56 @@
 #
 # @device: the device name
 #
+# @force: #optional whether to allow cancellation of a paused job (default
+#         false).  Since 1.3.
+#
 # Returns: Nothing on success
 #          If no background operation is active on this device, DeviceNotActive
 #
 # Since: 1.1
 ##
-{ 'command': 'block-job-cancel', 'data': { 'device': 'str' } }
+{ 'command': 'block-job-cancel', 'data': { 'device': 'str', '*force': 'bool' } }
+
+##
+# @block-job-pause:
+#
+# Pause an active background block operation.
+#
+# This command returns immediately after marking the active background block
+# operation for pausing.  It is an error to call this command if no
+# operation is in progress.  Pausing an already paused job has no cumulative
+# effect; a single block-job-resume command will resume the job.
+#
+# The operation will pause as soon as possible.  No event is emitted when
+# the operation is actually paused.  Cancelling a paused job automatically
+# resumes it.
+#
+# @device: the device name
+#
+# Returns: Nothing on success
+#          If no background operation is active on this device, DeviceNotActive
+#
+# Since: 1.3
+##
+{ 'command': 'block-job-pause', 'data': { 'device': 'str' } }
+
+##
+# @block-job-resume:
+#
+# Resume an active background block operation.
+#
+# This command returns immediately after resuming a paused background block
+# operation.  It is an error to call this command if no operation is in
+# progress.  Resuming an already running job is not an error.
+#
+# @device: the device name
+#
+# Returns: Nothing on success
+#          If no background operation is active on this device, DeviceNotActive
+#
+# Since: 1.3
+##
+{ 'command': 'block-job-resume', 'data': { 'device': 'str' } }
 
 ##
 # @ObjectTypeInfo:
diff --git a/qmp-commands.hx b/qmp-commands.hx
index a55a3f5..71d7c25 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -805,10 +805,20 @@ EQMP
 
     {
         .name       = "block-job-cancel",
-        .args_type  = "device:B",
+        .args_type  = "device:B,force:b?",
         .mhandler.cmd_new = qmp_marshal_input_block_job_cancel,
     },
     {
+        .name       = "block-job-pause",
+        .args_type  = "device:B",
+        .mhandler.cmd_new = qmp_marshal_input_block_job_pause,
+    },
+    {
+        .name       = "block-job-resume",
+        .args_type  = "device:B",
+        .mhandler.cmd_new = qmp_marshal_input_block_job_resume,
+    },
+    {
         .name       = "transaction",
         .args_type  = "actions:q",
         .mhandler.cmd_new = qmp_marshal_input_transaction,
diff --git a/trace-events b/trace-events
index 29771a7..42b66f1 100644
--- a/trace-events
+++ b/trace-events
@@ -79,6 +79,8 @@ commit_start(void *bs, void *base, void *top, void *s, void *co, void *opaque) "
 
 # blockdev.c
 qmp_block_job_cancel(void *job) "job %p"
+qmp_block_job_pause(void *job) "job %p"
+qmp_block_job_resume(void *job) "job %p"
 block_job_cb(void *bs, void *job, int ret) "bs %p job %p ret %d"
 qmp_block_stream(void *bs, void *job) "bs %p job %p"
 
commit 8acc72a4d20910d522516dab31272fe66da8da28
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Fri Sep 28 17:22:50 2012 +0200

    block: add support for job pause/resume
    
    Job pausing reuses the existing support for cancellable sleeps.  A pause
    happens at the next sleeping point and lasts until the coroutine is
    re-entered explicitly.  Cancellation was already doing a forced resume,
    so implement it explicitly in terms of resume.
    
    Paused jobs cannot be canceled without first resuming them.  This ensures
    that I/O errors are never missed by management.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/blockdev.c b/blockdev.c
index 9a98ce9..612dd71 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1221,6 +1221,10 @@ void qmp_block_job_cancel(const char *device, Error **errp)
         error_set(errp, QERR_BLOCK_JOB_NOT_ACTIVE, device);
         return;
     }
+    if (job->paused) {
+        error_set(errp, QERR_BLOCK_JOB_PAUSED, device);
+        return;
+    }
 
     trace_qmp_block_job_cancel(job);
     block_job_cancel(job);
diff --git a/blockjob.c b/blockjob.c
index 64c9d2d..8219f73 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -99,14 +99,30 @@ void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp)
     job->speed = speed;
 }
 
-void block_job_cancel(BlockJob *job)
+void block_job_pause(BlockJob *job)
 {
-    job->cancelled = true;
+    job->paused = true;
+}
+
+bool block_job_is_paused(BlockJob *job)
+{
+    return job->paused;
+}
+
+void block_job_resume(BlockJob *job)
+{
+    job->paused = false;
     if (job->co && !job->busy) {
         qemu_coroutine_enter(job->co, NULL);
     }
 }
 
+void block_job_cancel(BlockJob *job)
+{
+    job->cancelled = true;
+    block_job_resume(job);
+}
+
 bool block_job_is_cancelled(BlockJob *job)
 {
     return job->cancelled;
@@ -154,12 +170,20 @@ int block_job_cancel_sync(BlockJob *job)
 
 void block_job_sleep_ns(BlockJob *job, QEMUClock *clock, int64_t ns)
 {
+    assert(job->busy);
+
     /* Check cancellation *before* setting busy = false, too!  */
-    if (!block_job_is_cancelled(job)) {
-        job->busy = false;
+    if (block_job_is_cancelled(job)) {
+        return;
+    }
+
+    job->busy = false;
+    if (block_job_is_paused(job)) {
+        qemu_coroutine_yield();
+    } else {
         co_sleep_ns(clock, ns);
-        job->busy = true;
     }
+    job->busy = true;
 }
 
 BlockJobInfo *block_job_query(BlockJob *job)
@@ -169,6 +193,7 @@ BlockJobInfo *block_job_query(BlockJob *job)
     info->device = g_strdup(bdrv_get_device_name(job->bs));
     info->len    = job->len;
     info->busy   = job->busy;
+    info->paused = job->paused;
     info->offset = job->offset;
     info->speed  = job->speed;
     return info;
diff --git a/blockjob.h b/blockjob.h
index f3d8d58..ece5afa 100644
--- a/blockjob.h
+++ b/blockjob.h
@@ -70,6 +70,12 @@ struct BlockJob {
     bool cancelled;
 
     /**
+     * Set to true if the job is either paused, or will pause itself
+     * as soon as possible (if busy == true).
+     */
+    bool paused;
+
+    /**
      * Set to false by the job while it is in a quiescent state, where
      * no I/O is pending and the job has yielded on any condition
      * that is not detected by #qemu_aio_wait, such as a timer.
@@ -171,6 +177,31 @@ bool block_job_is_cancelled(BlockJob *job);
 BlockJobInfo *block_job_query(BlockJob *job);
 
 /**
+ * block_job_pause:
+ * @job: The job to be paused.
+ *
+ * Asynchronously pause the specified job.
+ */
+void block_job_pause(BlockJob *job);
+
+/**
+ * block_job_resume:
+ * @job: The job to be resumed.
+ *
+ * Resume the specified job.
+ */
+void block_job_resume(BlockJob *job);
+
+/**
+ * block_job_is_paused:
+ * @job: The job being queried.
+ *
+ * Returns whether the job is currently paused, or will pause
+ * as soon as it reaches a sleeping point.
+ */
+bool block_job_is_paused(BlockJob *job);
+
+/**
  * block_job_cancel_sync:
  * @job: The job to be canceled.
  *
diff --git a/qapi-schema.json b/qapi-schema.json
index 6fc6eda..86a6c7f 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -1101,6 +1101,9 @@
 # @busy: false if the job is known to be in a quiescent state, with
 #        no pending I/O.  Since 1.3.
 #
+# @paused: whether the job is paused or, if @busy is true, will
+#          pause itself as soon as possible.  Since 1.3.
+#
 # @offset: the current progress value
 #
 # @speed: the rate limit, bytes per second
@@ -1109,7 +1112,7 @@
 ##
 { 'type': 'BlockJobInfo',
   'data': {'type': 'str', 'device': 'str', 'len': 'int',
-           'offset': 'int', 'busy': 'bool', 'speed': 'int'} }
+           'offset': 'int', 'busy': 'bool', 'paused': 'bool', 'speed': 'int'} }
 
 ##
 # @query-block-jobs:
diff --git a/qerror.h b/qerror.h
index 485c773..c91708c 100644
--- a/qerror.h
+++ b/qerror.h
@@ -51,6 +51,9 @@ void assert_no_error(Error *err);
 #define QERR_BLOCK_JOB_NOT_ACTIVE \
     ERROR_CLASS_DEVICE_NOT_ACTIVE, "No active block job on device '%s'"
 
+#define QERR_BLOCK_JOB_PAUSED \
+    ERROR_CLASS_GENERIC_ERROR, "The block job for device '%s' is currently paused"
+
 #define QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED \
     ERROR_CLASS_GENERIC_ERROR, "Block format '%s' used by device '%s' does not support feature '%s'"
 
commit 8d65883fff22e00d70f5880a26b7a1248c59a2d8
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Fri Sep 28 17:22:49 2012 +0200

    qmp: add 'busy' member to BlockJobInfo
    
    Because pausing a job is asynchronous, we need to know whether it has
    completed.  This is described by the "busy" field of BlockJob; copy it
    to BlockJobInfo.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/blockjob.c b/blockjob.c
index dea63f8..64c9d2d 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -168,6 +168,7 @@ BlockJobInfo *block_job_query(BlockJob *job)
     info->type   = g_strdup(job->job_type->job_type);
     info->device = g_strdup(bdrv_get_device_name(job->bs));
     info->len    = job->len;
+    info->busy   = job->busy;
     info->offset = job->offset;
     info->speed  = job->speed;
     return info;
diff --git a/qapi-schema.json b/qapi-schema.json
index 5816545..6fc6eda 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -1098,6 +1098,9 @@
 #
 # @len: the maximum progress value
 #
+# @busy: false if the job is known to be in a quiescent state, with
+#        no pending I/O.  Since 1.3.
+#
 # @offset: the current progress value
 #
 # @speed: the rate limit, bytes per second
@@ -1106,7 +1109,7 @@
 ##
 { 'type': 'BlockJobInfo',
   'data': {'type': 'str', 'device': 'str', 'len': 'int',
-           'offset': 'int', 'speed': 'int'} }
+           'offset': 'int', 'busy': 'bool', 'speed': 'int'} }
 
 ##
 # @query-block-jobs:
commit 30e628b709fcd30db298878e435e3bc93919c48c
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Fri Sep 28 17:22:48 2012 +0200

    block: add block_job_query
    
    Extract it out of the implementation of info block-jobs.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/blockdev.c b/blockdev.c
index d3f91c0..9a98ce9 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1232,19 +1232,8 @@ static void do_qmp_query_block_jobs_one(void *opaque, BlockDriverState *bs)
     BlockJob *job = bs->job;
 
     if (job) {
-        BlockJobInfoList *elem;
-        BlockJobInfo *info = g_new(BlockJobInfo, 1);
-        *info = (BlockJobInfo){
-            .type   = g_strdup(job->job_type->job_type),
-            .device = g_strdup(bdrv_get_device_name(bs)),
-            .len    = job->len,
-            .offset = job->offset,
-            .speed  = job->speed,
-        };
-
-        elem = g_new0(BlockJobInfoList, 1);
-        elem->value = info;
-
+        BlockJobInfoList *elem = g_new0(BlockJobInfoList, 1);
+        elem->value = block_job_query(bs->job);
         (*prev)->next = elem;
         *prev = elem;
     }
diff --git a/blockjob.c b/blockjob.c
index 9737a43..dea63f8 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -161,3 +161,14 @@ void block_job_sleep_ns(BlockJob *job, QEMUClock *clock, int64_t ns)
         job->busy = true;
     }
 }
+
+BlockJobInfo *block_job_query(BlockJob *job)
+{
+    BlockJobInfo *info = g_new0(BlockJobInfo, 1);
+    info->type   = g_strdup(job->job_type->job_type);
+    info->device = g_strdup(bdrv_get_device_name(job->bs));
+    info->len    = job->len;
+    info->offset = job->offset;
+    info->speed  = job->speed;
+    return info;
+}
diff --git a/blockjob.h b/blockjob.h
index 753f5bc..f3d8d58 100644
--- a/blockjob.h
+++ b/blockjob.h
@@ -163,6 +163,14 @@ void block_job_cancel(BlockJob *job);
 bool block_job_is_cancelled(BlockJob *job);
 
 /**
+ * block_job_query:
+ * @job: The job to get information about.
+ *
+ * Return information about a job.
+ */
+BlockJobInfo *block_job_query(BlockJob *job);
+
+/**
  * block_job_cancel_sync:
  * @job: The job to be canceled.
  *
commit 2f0c9fe64c6a2887047b7eab05cd85b2643234c8
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Fri Sep 28 17:22:47 2012 +0200

    block: move job APIs to separate files
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/Makefile.objs b/Makefile.objs
index 7c1c682..b1f3e22 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -42,7 +42,8 @@ coroutine-obj-$(CONFIG_WIN32) += coroutine-win32.o
 # block-obj-y is code used by both qemu system emulation and qemu-img
 
 block-obj-y = cutils.o iov.o cache-utils.o qemu-option.o module.o async.o
-block-obj-y += nbd.o block.o aio.o aes.o qemu-config.o qemu-progress.o qemu-sockets.o uri.o
+block-obj-y += nbd.o block.o blockjob.o aio.o aes.o qemu-config.o
+block-obj-y += qemu-progress.o qemu-sockets.o uri.o
 block-obj-y += $(coroutine-obj-y) $(qobject-obj-y) $(version-obj-y)
 block-obj-$(CONFIG_POSIX) += posix-aio-compat.o
 block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
@@ -59,7 +60,7 @@ endif
 # suppress *all* target specific code in case of system emulation, i.e. a
 # single QEMU executable should support all CPUs and machines.
 
-common-obj-y = $(block-obj-y) blockdev.o
+common-obj-y = $(block-obj-y) blockdev.o block/
 common-obj-y += net.o net/
 common-obj-y += qom/
 common-obj-y += readline.o console.o cursor.o
diff --git a/block.c b/block.c
index d7a6d14..8202f27 100644
--- a/block.c
+++ b/block.c
@@ -26,6 +26,7 @@
 #include "trace.h"
 #include "monitor.h"
 #include "block_int.h"
+#include "blockjob.h"
 #include "module.h"
 #include "qjson.h"
 #include "qemu-coroutine.h"
@@ -4406,130 +4407,3 @@ out:
 
     return ret;
 }
-
-void *block_job_create(const BlockJobType *job_type, BlockDriverState *bs,
-                       int64_t speed, BlockDriverCompletionFunc *cb,
-                       void *opaque, Error **errp)
-{
-    BlockJob *job;
-
-    if (bs->job || bdrv_in_use(bs)) {
-        error_set(errp, QERR_DEVICE_IN_USE, bdrv_get_device_name(bs));
-        return NULL;
-    }
-    bdrv_set_in_use(bs, 1);
-
-    job = g_malloc0(job_type->instance_size);
-    job->job_type      = job_type;
-    job->bs            = bs;
-    job->cb            = cb;
-    job->opaque        = opaque;
-    job->busy          = true;
-    bs->job = job;
-
-    /* Only set speed when necessary to avoid NotSupported error */
-    if (speed != 0) {
-        Error *local_err = NULL;
-
-        block_job_set_speed(job, speed, &local_err);
-        if (error_is_set(&local_err)) {
-            bs->job = NULL;
-            g_free(job);
-            bdrv_set_in_use(bs, 0);
-            error_propagate(errp, local_err);
-            return NULL;
-        }
-    }
-    return job;
-}
-
-void block_job_complete(BlockJob *job, int ret)
-{
-    BlockDriverState *bs = job->bs;
-
-    assert(bs->job == job);
-    job->cb(job->opaque, ret);
-    bs->job = NULL;
-    g_free(job);
-    bdrv_set_in_use(bs, 0);
-}
-
-void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp)
-{
-    Error *local_err = NULL;
-
-    if (!job->job_type->set_speed) {
-        error_set(errp, QERR_NOT_SUPPORTED);
-        return;
-    }
-    job->job_type->set_speed(job, speed, &local_err);
-    if (error_is_set(&local_err)) {
-        error_propagate(errp, local_err);
-        return;
-    }
-
-    job->speed = speed;
-}
-
-void block_job_cancel(BlockJob *job)
-{
-    job->cancelled = true;
-    if (job->co && !job->busy) {
-        qemu_coroutine_enter(job->co, NULL);
-    }
-}
-
-bool block_job_is_cancelled(BlockJob *job)
-{
-    return job->cancelled;
-}
-
-struct BlockCancelData {
-    BlockJob *job;
-    BlockDriverCompletionFunc *cb;
-    void *opaque;
-    bool cancelled;
-    int ret;
-};
-
-static void block_job_cancel_cb(void *opaque, int ret)
-{
-    struct BlockCancelData *data = opaque;
-
-    data->cancelled = block_job_is_cancelled(data->job);
-    data->ret = ret;
-    data->cb(data->opaque, ret);
-}
-
-int block_job_cancel_sync(BlockJob *job)
-{
-    struct BlockCancelData data;
-    BlockDriverState *bs = job->bs;
-
-    assert(bs->job == job);
-
-    /* Set up our own callback to store the result and chain to
-     * the original callback.
-     */
-    data.job = job;
-    data.cb = job->cb;
-    data.opaque = job->opaque;
-    data.ret = -EINPROGRESS;
-    job->cb = block_job_cancel_cb;
-    job->opaque = &data;
-    block_job_cancel(job);
-    while (data.ret == -EINPROGRESS) {
-        qemu_aio_wait();
-    }
-    return (data.cancelled && data.ret == 0) ? -ECANCELED : data.ret;
-}
-
-void block_job_sleep_ns(BlockJob *job, QEMUClock *clock, int64_t ns)
-{
-    /* Check cancellation *before* setting busy = false, too!  */
-    if (!block_job_is_cancelled(job)) {
-        job->busy = false;
-        co_sleep_ns(clock, ns);
-        job->busy = true;
-    }
-}
diff --git a/block.h b/block.h
index e9249c4..bd002d5 100644
--- a/block.h
+++ b/block.h
@@ -6,9 +6,11 @@
 #include "qemu-option.h"
 #include "qemu-coroutine.h"
 #include "qobject.h"
+#include "qapi-types.h"
 
 /* block.c */
 typedef struct BlockDriver BlockDriver;
+typedef struct BlockJob BlockJob;
 
 typedef struct BlockDriverInfo {
     /* in bytes, 0 if irrelevant */
diff --git a/block/Makefile.objs b/block/Makefile.objs
index 81fd43c..554f429 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -3,11 +3,12 @@ block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-c
 block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
 block-obj-y += qed-check.o
 block-obj-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o
-block-obj-y += stream.o
-block-obj-y += commit.o
 block-obj-$(CONFIG_WIN32) += raw-win32.o
 block-obj-$(CONFIG_POSIX) += raw-posix.o
 block-obj-$(CONFIG_LIBISCSI) += iscsi.o
 block-obj-$(CONFIG_CURL) += curl.o
 block-obj-$(CONFIG_RBD) += rbd.o
 block-obj-$(CONFIG_GLUSTERFS) += gluster.o
+
+common-obj-y += stream.o
+common-obj-y += commit.o
diff --git a/block/commit.c b/block/commit.c
index 624ec5f..cabb470 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -14,6 +14,7 @@
 
 #include "trace.h"
 #include "block_int.h"
+#include "blockjob.h"
 #include "qemu/ratelimit.h"
 
 enum {
diff --git a/block/stream.c b/block/stream.c
index c4f87dd..57e4be7 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -13,6 +13,7 @@
 
 #include "trace.h"
 #include "block_int.h"
+#include "blockjob.h"
 #include "qemu/ratelimit.h"
 
 enum {
diff --git a/block_int.h b/block_int.h
index 6b6b3ab..61dc73b 100644
--- a/block_int.h
+++ b/block_int.h
@@ -67,73 +67,6 @@ typedef struct BlockIOBaseValue {
     uint64_t ios[2];
 } BlockIOBaseValue;
 
-typedef struct BlockJob BlockJob;
-
-/**
- * BlockJobType:
- *
- * A class type for block job objects.
- */
-typedef struct BlockJobType {
-    /** Derived BlockJob struct size */
-    size_t instance_size;
-
-    /** String describing the operation, part of query-block-jobs QMP API */
-    const char *job_type;
-
-    /** Optional callback for job types that support setting a speed limit */
-    void (*set_speed)(BlockJob *job, int64_t speed, Error **errp);
-} BlockJobType;
-
-/**
- * BlockJob:
- *
- * Long-running operation on a BlockDriverState.
- */
-struct BlockJob {
-    /** The job type, including the job vtable.  */
-    const BlockJobType *job_type;
-
-    /** The block device on which the job is operating.  */
-    BlockDriverState *bs;
-
-    /**
-     * The coroutine that executes the job.  If not NULL, it is
-     * reentered when busy is false and the job is cancelled.
-     */
-    Coroutine *co;
-
-    /**
-     * Set to true if the job should cancel itself.  The flag must
-     * always be tested just before toggling the busy flag from false
-     * to true.  After a job has been cancelled, it should only yield
-     * if #qemu_aio_wait will ("sooner or later") reenter the coroutine.
-     */
-    bool cancelled;
-
-    /**
-     * Set to false by the job while it is in a quiescent state, where
-     * no I/O is pending and the job has yielded on any condition
-     * that is not detected by #qemu_aio_wait, such as a timer.
-     */
-    bool busy;
-
-    /** Offset that is published by the query-block-jobs QMP API */
-    int64_t offset;
-
-    /** Length that is published by the query-block-jobs QMP API */
-    int64_t len;
-
-    /** Speed that was set with @block_job_set_speed.  */
-    int64_t speed;
-
-    /** The completion function that will be called when the job completes.  */
-    BlockDriverCompletionFunc *cb;
-
-    /** The opaque value that is passed to the completion function.  */
-    void *opaque;
-};
-
 struct BlockDriver {
     const char *format_name;
     int instance_size;
@@ -355,90 +288,6 @@ int is_windows_drive(const char *filename);
 #endif
 
 /**
- * block_job_create:
- * @job_type: The class object for the newly-created job.
- * @bs: The block
- * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
- * @cb: Completion function for the job.
- * @opaque: Opaque pointer value passed to @cb.
- * @errp: Error object.
- *
- * Create a new long-running block device job and return it.  The job
- * will call @cb asynchronously when the job completes.  Note that
- * @bs may have been closed at the time the @cb it is called.  If
- * this is the case, the job may be reported as either cancelled or
- * completed.
- *
- * This function is not part of the public job interface; it should be
- * called from a wrapper that is specific to the job type.
- */
-void *block_job_create(const BlockJobType *job_type, BlockDriverState *bs,
-                       int64_t speed, BlockDriverCompletionFunc *cb,
-                       void *opaque, Error **errp);
-
-/**
- * block_job_sleep_ns:
- * @job: The job that calls the function.
- * @clock: The clock to sleep on.
- * @ns: How many nanoseconds to stop for.
- *
- * Put the job to sleep (assuming that it wasn't canceled) for @ns
- * nanoseconds.  Canceling the job will interrupt the wait immediately.
- */
-void block_job_sleep_ns(BlockJob *job, QEMUClock *clock, int64_t ns);
-
-/**
- * block_job_complete:
- * @job: The job being completed.
- * @ret: The status code.
- *
- * Call the completion function that was registered at creation time, and
- * free @job.
- */
-void block_job_complete(BlockJob *job, int ret);
-
-/**
- * block_job_set_speed:
- * @job: The job to set the speed for.
- * @speed: The new value
- * @errp: Error object.
- *
- * Set a rate-limiting parameter for the job; the actual meaning may
- * vary depending on the job type.
- */
-void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp);
-
-/**
- * block_job_cancel:
- * @job: The job to be canceled.
- *
- * Asynchronously cancel the specified job.
- */
-void block_job_cancel(BlockJob *job);
-
-/**
- * block_job_is_cancelled:
- * @job: The job being queried.
- *
- * Returns whether the job is scheduled for cancellation.
- */
-bool block_job_is_cancelled(BlockJob *job);
-
-/**
- * block_job_cancel_sync:
- * @job: The job to be canceled.
- *
- * Synchronously cancel the job.  The completion callback is called
- * before the function returns.  The job may actually complete
- * instead of canceling itself; the circumstances under which this
- * happens depend on the kind of job that is active.
- *
- * Returns the return value from the job if the job actually completed
- * during the call, or -ECANCELED if it was canceled.
- */
-int block_job_cancel_sync(BlockJob *job);
-
-/**
  * stream_start:
  * @bs: Block device to operate on.
  * @base: Block device that will become the new base, or %NULL to
diff --git a/blockdev.c b/blockdev.c
index d824612..d3f91c0 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -9,6 +9,7 @@
 
 #include "blockdev.h"
 #include "hw/block-common.h"
+#include "blockjob.h"
 #include "monitor.h"
 #include "qerror.h"
 #include "qemu-option.h"
diff --git a/blockjob.c b/blockjob.c
new file mode 100644
index 0000000..9737a43
--- /dev/null
+++ b/blockjob.c
@@ -0,0 +1,163 @@
+/*
+ * QEMU System Emulator block driver
+ *
+ * Copyright (c) 2011 IBM Corp.
+ * Copyright (c) 2012 Red Hat, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "config-host.h"
+#include "qemu-common.h"
+#include "trace.h"
+#include "monitor.h"
+#include "block.h"
+#include "blockjob.h"
+#include "block_int.h"
+#include "qjson.h"
+#include "qemu-coroutine.h"
+#include "qmp-commands.h"
+#include "qemu-timer.h"
+
+void *block_job_create(const BlockJobType *job_type, BlockDriverState *bs,
+                       int64_t speed, BlockDriverCompletionFunc *cb,
+                       void *opaque, Error **errp)
+{
+    BlockJob *job;
+
+    if (bs->job || bdrv_in_use(bs)) {
+        error_set(errp, QERR_DEVICE_IN_USE, bdrv_get_device_name(bs));
+        return NULL;
+    }
+    bdrv_set_in_use(bs, 1);
+
+    job = g_malloc0(job_type->instance_size);
+    job->job_type      = job_type;
+    job->bs            = bs;
+    job->cb            = cb;
+    job->opaque        = opaque;
+    job->busy          = true;
+    bs->job = job;
+
+    /* Only set speed when necessary to avoid NotSupported error */
+    if (speed != 0) {
+        Error *local_err = NULL;
+
+        block_job_set_speed(job, speed, &local_err);
+        if (error_is_set(&local_err)) {
+            bs->job = NULL;
+            g_free(job);
+            bdrv_set_in_use(bs, 0);
+            error_propagate(errp, local_err);
+            return NULL;
+        }
+    }
+    return job;
+}
+
+void block_job_complete(BlockJob *job, int ret)
+{
+    BlockDriverState *bs = job->bs;
+
+    assert(bs->job == job);
+    job->cb(job->opaque, ret);
+    bs->job = NULL;
+    g_free(job);
+    bdrv_set_in_use(bs, 0);
+}
+
+void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp)
+{
+    Error *local_err = NULL;
+
+    if (!job->job_type->set_speed) {
+        error_set(errp, QERR_NOT_SUPPORTED);
+        return;
+    }
+    job->job_type->set_speed(job, speed, &local_err);
+    if (error_is_set(&local_err)) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    job->speed = speed;
+}
+
+void block_job_cancel(BlockJob *job)
+{
+    job->cancelled = true;
+    if (job->co && !job->busy) {
+        qemu_coroutine_enter(job->co, NULL);
+    }
+}
+
+bool block_job_is_cancelled(BlockJob *job)
+{
+    return job->cancelled;
+}
+
+struct BlockCancelData {
+    BlockJob *job;
+    BlockDriverCompletionFunc *cb;
+    void *opaque;
+    bool cancelled;
+    int ret;
+};
+
+static void block_job_cancel_cb(void *opaque, int ret)
+{
+    struct BlockCancelData *data = opaque;
+
+    data->cancelled = block_job_is_cancelled(data->job);
+    data->ret = ret;
+    data->cb(data->opaque, ret);
+}
+
+int block_job_cancel_sync(BlockJob *job)
+{
+    struct BlockCancelData data;
+    BlockDriverState *bs = job->bs;
+
+    assert(bs->job == job);
+
+    /* Set up our own callback to store the result and chain to
+     * the original callback.
+     */
+    data.job = job;
+    data.cb = job->cb;
+    data.opaque = job->opaque;
+    data.ret = -EINPROGRESS;
+    job->cb = block_job_cancel_cb;
+    job->opaque = &data;
+    block_job_cancel(job);
+    while (data.ret == -EINPROGRESS) {
+        qemu_aio_wait();
+    }
+    return (data.cancelled && data.ret == 0) ? -ECANCELED : data.ret;
+}
+
+void block_job_sleep_ns(BlockJob *job, QEMUClock *clock, int64_t ns)
+{
+    /* Check cancellation *before* setting busy = false, too!  */
+    if (!block_job_is_cancelled(job)) {
+        job->busy = false;
+        co_sleep_ns(clock, ns);
+        job->busy = true;
+    }
+}
diff --git a/blockjob.h b/blockjob.h
new file mode 100644
index 0000000..753f5bc
--- /dev/null
+++ b/blockjob.h
@@ -0,0 +1,179 @@
+/*
+ * Declarations for long-running block device operations
+ *
+ * Copyright (c) 2011 IBM Corp.
+ * Copyright (c) 2012 Red Hat, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef BLOCKJOB_H
+#define BLOCKJOB_H 1
+
+#include "block.h"
+
+/**
+ * BlockJobType:
+ *
+ * A class type for block job objects.
+ */
+typedef struct BlockJobType {
+    /** Derived BlockJob struct size */
+    size_t instance_size;
+
+    /** String describing the operation, part of query-block-jobs QMP API */
+    const char *job_type;
+
+    /** Optional callback for job types that support setting a speed limit */
+    void (*set_speed)(BlockJob *job, int64_t speed, Error **errp);
+} BlockJobType;
+
+/**
+ * BlockJob:
+ *
+ * Long-running operation on a BlockDriverState.
+ */
+struct BlockJob {
+    /** The job type, including the job vtable.  */
+    const BlockJobType *job_type;
+
+    /** The block device on which the job is operating.  */
+    BlockDriverState *bs;
+
+    /**
+     * The coroutine that executes the job.  If not NULL, it is
+     * reentered when busy is false and the job is cancelled.
+     */
+    Coroutine *co;
+
+    /**
+     * Set to true if the job should cancel itself.  The flag must
+     * always be tested just before toggling the busy flag from false
+     * to true.  After a job has been cancelled, it should only yield
+     * if #qemu_aio_wait will ("sooner or later") reenter the coroutine.
+     */
+    bool cancelled;
+
+    /**
+     * Set to false by the job while it is in a quiescent state, where
+     * no I/O is pending and the job has yielded on any condition
+     * that is not detected by #qemu_aio_wait, such as a timer.
+     */
+    bool busy;
+
+    /** Offset that is published by the query-block-jobs QMP API */
+    int64_t offset;
+
+    /** Length that is published by the query-block-jobs QMP API */
+    int64_t len;
+
+    /** Speed that was set with @block_job_set_speed.  */
+    int64_t speed;
+
+    /** The completion function that will be called when the job completes.  */
+    BlockDriverCompletionFunc *cb;
+
+    /** The opaque value that is passed to the completion function.  */
+    void *opaque;
+};
+
+/**
+ * block_job_create:
+ * @job_type: The class object for the newly-created job.
+ * @bs: The block
+ * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
+ * @cb: Completion function for the job.
+ * @opaque: Opaque pointer value passed to @cb.
+ * @errp: Error object.
+ *
+ * Create a new long-running block device job and return it.  The job
+ * will call @cb asynchronously when the job completes.  Note that
+ * @bs may have been closed at the time the @cb it is called.  If
+ * this is the case, the job may be reported as either cancelled or
+ * completed.
+ *
+ * This function is not part of the public job interface; it should be
+ * called from a wrapper that is specific to the job type.
+ */
+void *block_job_create(const BlockJobType *job_type, BlockDriverState *bs,
+                       int64_t speed, BlockDriverCompletionFunc *cb,
+                       void *opaque, Error **errp);
+
+/**
+ * block_job_sleep_ns:
+ * @job: The job that calls the function.
+ * @clock: The clock to sleep on.
+ * @ns: How many nanoseconds to stop for.
+ *
+ * Put the job to sleep (assuming that it wasn't canceled) for @ns
+ * nanoseconds.  Canceling the job will interrupt the wait immediately.
+ */
+void block_job_sleep_ns(BlockJob *job, QEMUClock *clock, int64_t ns);
+
+/**
+ * block_job_complete:
+ * @job: The job being completed.
+ * @ret: The status code.
+ *
+ * Call the completion function that was registered at creation time, and
+ * free @job.
+ */
+void block_job_complete(BlockJob *job, int ret);
+
+/**
+ * block_job_set_speed:
+ * @job: The job to set the speed for.
+ * @speed: The new value
+ * @errp: Error object.
+ *
+ * Set a rate-limiting parameter for the job; the actual meaning may
+ * vary depending on the job type.
+ */
+void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp);
+
+/**
+ * block_job_cancel:
+ * @job: The job to be canceled.
+ *
+ * Asynchronously cancel the specified job.
+ */
+void block_job_cancel(BlockJob *job);
+
+/**
+ * block_job_is_cancelled:
+ * @job: The job being queried.
+ *
+ * Returns whether the job is scheduled for cancellation.
+ */
+bool block_job_is_cancelled(BlockJob *job);
+
+/**
+ * block_job_cancel_sync:
+ * @job: The job to be canceled.
+ *
+ * Synchronously cancel the job.  The completion callback is called
+ * before the function returns.  The job may actually complete
+ * instead of canceling itself; the circumstances under which this
+ * happens depend on the kind of job that is active.
+ *
+ * Returns the return value from the job if the job actually completed
+ * during the call, or -ECANCELED if it was canceled.
+ */
+int block_job_cancel_sync(BlockJob *job);
+
+#endif
commit 7e03a9342fff50f2a6a4086906fa66e6c6d4351d
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Fri Sep 28 17:22:46 2012 +0200

    block: fix documentation of block_job_cancel_sync
    
    Do this in a separate commit before we move the functions to
    blockjob.h.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/block_int.h b/block_int.h
index 56164a7..6b6b3ab 100644
--- a/block_int.h
+++ b/block_int.h
@@ -425,15 +425,13 @@ void block_job_cancel(BlockJob *job);
 bool block_job_is_cancelled(BlockJob *job);
 
 /**
- * block_job_cancel:
+ * block_job_cancel_sync:
  * @job: The job to be canceled.
  *
- * Asynchronously cancel the job and wait for it to reach a quiescent
- * state.  Note that the completion callback will still be called
- * asynchronously, hence it is *not* valid to call #bdrv_delete
- * immediately after #block_job_cancel_sync.  Users of block jobs
- * will usually protect the BlockDriverState objects with a reference
- * count, should this be a concern.
+ * Synchronously cancel the job.  The completion callback is called
+ * before the function returns.  The job may actually complete
+ * instead of canceling itself; the circumstances under which this
+ * happens depend on the kind of job that is active.
  *
  * Returns the return value from the job if the job actually completed
  * during the call, or -ECANCELED if it was canceled.
commit 7ef1507045d35bd4d220cf3bfe0e80e7ac101d00
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Fri Sep 28 17:22:44 2012 +0200

    qerror/block: introduce QERR_BLOCK_JOB_NOT_ACTIVE
    
    The DeviceNotActive text is not a particularly good match, add
    a separate text while keeping the same class.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/blockdev.c b/blockdev.c
index cea22e4..d824612 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1205,7 +1205,7 @@ void qmp_block_job_set_speed(const char *device, int64_t speed, Error **errp)
     BlockJob *job = find_block_job(device);
 
     if (!job) {
-        error_set(errp, QERR_DEVICE_NOT_ACTIVE, device);
+        error_set(errp, QERR_BLOCK_JOB_NOT_ACTIVE, device);
         return;
     }
 
@@ -1217,7 +1217,7 @@ void qmp_block_job_cancel(const char *device, Error **errp)
     BlockJob *job = find_block_job(device);
 
     if (!job) {
-        error_set(errp, QERR_DEVICE_NOT_ACTIVE, device);
+        error_set(errp, QERR_BLOCK_JOB_NOT_ACTIVE, device);
         return;
     }
 
diff --git a/qerror.h b/qerror.h
index d0a76a4..485c773 100644
--- a/qerror.h
+++ b/qerror.h
@@ -48,6 +48,9 @@ void assert_no_error(Error *err);
 #define QERR_BASE_NOT_FOUND \
     ERROR_CLASS_GENERIC_ERROR, "Base '%s' not found"
 
+#define QERR_BLOCK_JOB_NOT_ACTIVE \
+    ERROR_CLASS_DEVICE_NOT_ACTIVE, "No active block job on device '%s'"
+
 #define QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED \
     ERROR_CLASS_GENERIC_ERROR, "Block format '%s' used by device '%s' does not support feature '%s'"
 
commit 747051cd97c384e70eec0ceb905f08e630b6a1c4
Author: Jeff Cody <jcody at redhat.com>
Date:   Thu Sep 27 13:29:17 2012 -0400

    qemu-iotests: add initial tests for live block commit
    
    Derived from the streaming test cases (030), this adds the
    following 9 tests:
    
    1. For the following image chain, commit [mid] into [backing],
       and use qemu-io to verify [backing] has its original data, as
       well as the data from [mid]
    
               [backing] <-- [mid] <-- [test]
    
    2. Verifies that 'block-commit' with the 'speed' parameter sets the
       speed parameter, as reported by 'query-block-jobs'
    
    3. Verifies that a bogus 'device' parameter to 'block-commit'
       results in error
    
    4-9: Appropriate error values returned for the following argument errors:
        * top == base
        * top is nonexistent
        * base is nonexistent
        * top == active layer (this is currently not supported)
        * top and base arguments are reversed
        * top argument is omitted
    
    Signed-off-by: Jeff Cody <jcody at redhat.com>
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/tests/qemu-iotests/040 b/tests/qemu-iotests/040
new file mode 100755
index 0000000..258e7ea
--- /dev/null
+++ b/tests/qemu-iotests/040
@@ -0,0 +1,178 @@
+#!/usr/bin/env python
+#
+# Tests for image block commit.
+#
+# Copyright (C) 2012 IBM, Corp.
+# Copyright (C) 2012 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+# Test for live block commit
+# Derived from Image Streaming Test 030
+
+import time
+import os
+import iotests
+from iotests import qemu_img, qemu_io
+import struct
+
+backing_img = os.path.join(iotests.test_dir, 'backing.img')
+mid_img = os.path.join(iotests.test_dir, 'mid.img')
+test_img = os.path.join(iotests.test_dir, 'test.img')
+
+class ImageCommitTestCase(iotests.QMPTestCase):
+    '''Abstract base class for image commit test cases'''
+
+    def assert_no_active_commit(self):
+        result = self.vm.qmp('query-block-jobs')
+        self.assert_qmp(result, 'return', [])
+
+    def cancel_and_wait(self, drive='drive0'):
+        '''Cancel a block job and wait for it to finish'''
+        result = self.vm.qmp('block-job-cancel', device=drive)
+        self.assert_qmp(result, 'return', {})
+
+        cancelled = False
+        while not cancelled:
+            for event in self.vm.get_qmp_events(wait=True):
+                if event['event'] == 'BLOCK_JOB_CANCELLED':
+                    self.assert_qmp(event, 'data/type', 'commit')
+                    self.assert_qmp(event, 'data/device', drive)
+                    cancelled = True
+
+        self.assert_no_active_commit()
+
+    def create_image(self, name, size):
+        file = open(name, 'w')
+        i = 0
+        while i < size:
+            sector = struct.pack('>l504xl', i / 512, i / 512)
+            file.write(sector)
+            i = i + 512
+        file.close()
+
+
+class TestSingleDrive(ImageCommitTestCase):
+    image_len = 1 * 1024 * 1024
+    test_len = 1 * 1024 * 256
+
+    def setUp(self):
+        self.create_image(backing_img, TestSingleDrive.image_len)
+        qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % backing_img, mid_img)
+        qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % mid_img, test_img)
+        qemu_io('-c', 'write -P 0xab 0 524288', backing_img)
+        qemu_io('-c', 'write -P 0xef 524288 524288', mid_img)
+        self.vm = iotests.VM().add_drive(test_img)
+        self.vm.launch()
+
+    def tearDown(self):
+        self.vm.shutdown()
+        os.remove(test_img)
+        os.remove(mid_img)
+        os.remove(backing_img)
+
+    def test_commit(self):
+        self.assert_no_active_commit()
+        result = self.vm.qmp('block-commit', device='drive0', top='%s' % mid_img)
+        self.assert_qmp(result, 'return', {})
+
+        completed = False
+        while not completed:
+            for event in self.vm.get_qmp_events(wait=True):
+                if event['event'] == 'BLOCK_JOB_COMPLETED':
+                    self.assert_qmp(event, 'data/type', 'commit')
+                    self.assert_qmp(event, 'data/device', 'drive0')
+                    self.assert_qmp(event, 'data/offset', self.image_len)
+                    self.assert_qmp(event, 'data/len', self.image_len)
+                    completed = True
+
+        self.assert_no_active_commit()
+        self.vm.shutdown()
+
+        self.assertEqual(-1, qemu_io('-c', 'read -P 0xab 0 524288', backing_img).find("verification failed"))
+        self.assertEqual(-1, qemu_io('-c', 'read -P 0xef 524288 524288', backing_img).find("verification failed"))
+
+    def test_device_not_found(self):
+        result = self.vm.qmp('block-commit', device='nonexistent', top='%s' % mid_img)
+        self.assert_qmp(result, 'error/class', 'DeviceNotFound')
+
+    def test_top_same_base(self):
+        self.assert_no_active_commit()
+        result = self.vm.qmp('block-commit', device='drive0', top='%s' % backing_img, base='%s' % backing_img)
+        self.assert_qmp(result, 'error/class', 'GenericError')
+        self.assert_qmp(result, 'error/desc', 'Invalid files for merge: top and base are the same')
+
+    def test_top_invalid(self):
+        self.assert_no_active_commit()
+        result = self.vm.qmp('block-commit', device='drive0', top='badfile', base='%s' % backing_img)
+        self.assert_qmp(result, 'error/class', 'GenericError')
+        self.assert_qmp(result, 'error/desc', 'Top image file badfile not found')
+
+    def test_base_invalid(self):
+        self.assert_no_active_commit()
+        result = self.vm.qmp('block-commit', device='drive0', top='%s' % mid_img, base='badfile')
+        self.assert_qmp(result, 'error/class', 'GenericError')
+        self.assert_qmp(result, 'error/desc', 'Base \'badfile\' not found')
+
+    def test_top_is_active(self):
+        self.assert_no_active_commit()
+        result = self.vm.qmp('block-commit', device='drive0', top='%s' % test_img, base='%s' % backing_img)
+        self.assert_qmp(result, 'error/class', 'GenericError')
+        self.assert_qmp(result, 'error/desc', 'Top image as the active layer is currently unsupported')
+
+    def test_top_and_base_reversed(self):
+        self.assert_no_active_commit()
+        result = self.vm.qmp('block-commit', device='drive0', top='%s' % backing_img, base='%s' % mid_img)
+        self.assert_qmp(result, 'error/class', 'GenericError')
+        self.assert_qmp(result, 'error/desc', 'Base (%(1)s) is not reachable from top (%(2)s)' % {"1" : mid_img, "2" : backing_img})
+
+    def test_top_omitted(self):
+        self.assert_no_active_commit()
+        result = self.vm.qmp('block-commit', device='drive0')
+        self.assert_qmp(result, 'error/class', 'GenericError')
+        self.assert_qmp(result, 'error/desc', "Parameter 'top' is missing")
+
+
+class TestSetSpeed(ImageCommitTestCase):
+    image_len = 80 * 1024 * 1024 # MB
+
+    def setUp(self):
+        qemu_img('create', backing_img, str(TestSetSpeed.image_len))
+        qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % backing_img, mid_img)
+        qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % mid_img, test_img)
+        self.vm = iotests.VM().add_drive(test_img)
+        self.vm.launch()
+
+    def tearDown(self):
+        self.vm.shutdown()
+        os.remove(test_img)
+        os.remove(mid_img)
+        os.remove(backing_img)
+
+    def test_set_speed(self):
+        self.assert_no_active_commit()
+
+        result = self.vm.qmp('block-commit', device='drive0', top=mid_img, speed=1024 * 1024)
+        self.assert_qmp(result, 'return', {})
+
+        # Ensure the speed we set was accepted
+        result = self.vm.qmp('query-block-jobs')
+        self.assert_qmp(result, 'return[0]/device', 'drive0')
+        self.assert_qmp(result, 'return[0]/speed', 1024 * 1024)
+
+        self.cancel_and_wait()
+
+
+if __name__ == '__main__':
+    iotests.main(supported_fmts=['qcow2', 'qed'])
diff --git a/tests/qemu-iotests/040.out b/tests/qemu-iotests/040.out
new file mode 100644
index 0000000..dae404e
--- /dev/null
+++ b/tests/qemu-iotests/040.out
@@ -0,0 +1,5 @@
+.........
+----------------------------------------------------------------------
+Ran 9 tests
+
+OK
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index ebb5ca4..4b54fa6 100644
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -46,3 +46,4 @@
 037 rw auto backing
 038 rw auto backing
 039 rw auto
+040 rw auto
commit ed61fc10e8c8d2d1287f7edae92e44f5c97c540d
Author: Jeff Cody <jcody at redhat.com>
Date:   Thu Sep 27 13:29:16 2012 -0400

    QAPI: add command for live block commit, 'block-commit'
    
    The command for live block commit is added, which has the following
    arguments:
    
    device: the block device to perform the commit on (mandatory)
    base:   the base image to commit into; optional (if not specified,
            it is the underlying original image)
    top:    the top image of the commit - all data from inside top down
            to base will be committed into base (mandatory for now; see
            note, below)
    
    speed:  maximum speed, in bytes/sec
    
    Note: Eventually this command will support merging down the active layer,
          but that code is not yet complete.  If the active layer is passed
          in as top, then an error will be returned.  Once merging down the
          active layer is supported, the 'top' argument may become optional,
          and default to the active layer.
    
    The is done as a block job, so upon completion a BLOCK_JOB_COMPLETED will
    be emitted.
    
    Signed-off-by: Jeff Cody <jcody at redhat.com>
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/QMP/qmp-events.txt b/QMP/qmp-events.txt
index 2878058..4491020 100644
--- a/QMP/qmp-events.txt
+++ b/QMP/qmp-events.txt
@@ -50,7 +50,8 @@ Emitted when a block job has been cancelled.
 
 Data:
 
-- "type":     Job type ("stream" for image streaming, json-string)
+- "type":     Job type (json-string; "stream" for image streaming
+                                     "commit" for block commit)
 - "device":   Device name (json-string)
 - "len":      Maximum progress value (json-int)
 - "offset":   Current progress value (json-int)
@@ -73,7 +74,8 @@ Emitted when a block job has completed.
 
 Data:
 
-- "type":     Job type ("stream" for image streaming, json-string)
+- "type":     Job type (json-string; "stream" for image streaming
+                                     "commit" for block commit)
 - "device":   Device name (json-string)
 - "len":      Maximum progress value (json-int)
 - "offset":   Current progress value (json-int)
diff --git a/blockdev.c b/blockdev.c
index f910ac5..cea22e4 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1131,6 +1131,64 @@ void qmp_block_stream(const char *device, bool has_base,
     trace_qmp_block_stream(bs, bs->job);
 }
 
+void qmp_block_commit(const char *device,
+                      bool has_base, const char *base, const char *top,
+                      bool has_speed, int64_t speed,
+                      Error **errp)
+{
+    BlockDriverState *bs;
+    BlockDriverState *base_bs, *top_bs;
+    Error *local_err = NULL;
+    /* This will be part of the QMP command, if/when the
+     * BlockdevOnError change for blkmirror makes it in
+     */
+    BlockErrorAction on_error = BLOCK_ERR_REPORT;
+
+    /* drain all i/o before commits */
+    bdrv_drain_all();
+
+    bs = bdrv_find(device);
+    if (!bs) {
+        error_set(errp, QERR_DEVICE_NOT_FOUND, device);
+        return;
+    }
+    if (base && has_base) {
+        base_bs = bdrv_find_backing_image(bs, base);
+    } else {
+        base_bs = bdrv_find_base(bs);
+    }
+
+    if (base_bs == NULL) {
+        error_set(errp, QERR_BASE_NOT_FOUND, base ? base : "NULL");
+        return;
+    }
+
+    /* default top_bs is the active layer */
+    top_bs = bs;
+
+    if (top) {
+        if (strcmp(bs->filename, top) != 0) {
+            top_bs = bdrv_find_backing_image(bs, top);
+        }
+    }
+
+    if (top_bs == NULL) {
+        error_setg(errp, "Top image file %s not found", top ? top : "NULL");
+        return;
+    }
+
+    commit_start(bs, base_bs, top_bs, speed, on_error, block_job_cb, bs,
+                &local_err);
+    if (local_err != NULL) {
+        error_propagate(errp, local_err);
+        return;
+    }
+    /* Grab a reference so hotplug does not delete the BlockDriverState from
+     * underneath us.
+     */
+    drive_get_ref(drive_get_by_blockdev(bs));
+}
+
 static BlockJob *find_block_job(const char *device)
 {
     BlockDriverState *bs;
diff --git a/qapi-schema.json b/qapi-schema.json
index 14e4419..5816545 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -1468,6 +1468,40 @@
   'returns': 'str' }
 
 ##
+# @block-commit
+#
+# Live commit of data from overlay image nodes into backing nodes - i.e.,
+# writes data between 'top' and 'base' into 'base'.
+#
+# @device:  the name of the device
+#
+# @base:   #optional The file name of the backing image to write data into.
+#                    If not specified, this is the deepest backing image
+#
+# @top:              The file name of the backing image within the image chain,
+#                    which contains the topmost data to be committed down.
+#                    Note, the active layer as 'top' is currently unsupported.
+#
+#                    If top == base, that is an error.
+#
+#
+# @speed:  #optional the maximum speed, in bytes per second
+#
+# Returns: Nothing on success
+#          If commit or stream is already active on this device, DeviceInUse
+#          If @device does not exist, DeviceNotFound
+#          If image commit is not supported by this device, NotSupported
+#          If @base or @top is invalid, a generic error is returned
+#          If @top is the active layer, or omitted, a generic error is returned
+#          If @speed is invalid, InvalidParameter
+#
+# Since: 1.3
+#
+##
+{ 'command': 'block-commit',
+  'data': { 'device': 'str', '*base': 'str', 'top': 'str',
+            '*speed': 'int' } }
+
 # @migrate_cancel
 #
 # Cancel the current executing migration process.
diff --git a/qmp-commands.hx b/qmp-commands.hx
index 6e21ddb..a55a3f5 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -792,6 +792,12 @@ EQMP
     },
 
     {
+        .name       = "block-commit",
+        .args_type  = "device:B,base:s?,top:s,speed:o?",
+        .mhandler.cmd_new = qmp_marshal_input_block_commit,
+    },
+
+    {
         .name       = "block-job-set-speed",
         .args_type  = "device:B,speed:o",
         .mhandler.cmd_new = qmp_marshal_input_block_job_set_speed,
commit 79fac5680d3680c9fb43d14a8d4e39ced25530f8
Author: Jeff Cody <jcody at redhat.com>
Date:   Thu Sep 27 13:29:15 2012 -0400

    block: helper function, to find the base image of a chain
    
    This is a simple helper function, that will return the base image
    of a given image chain.
    
    Signed-off-by: Jeff Cody <jcody at redhat.com>
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/block.c b/block.c
index 54209a5..d7a6d14 100644
--- a/block.c
+++ b/block.c
@@ -3117,6 +3117,22 @@ int bdrv_get_backing_file_depth(BlockDriverState *bs)
     return 1 + bdrv_get_backing_file_depth(bs->backing_hd);
 }
 
+BlockDriverState *bdrv_find_base(BlockDriverState *bs)
+{
+    BlockDriverState *curr_bs = NULL;
+
+    if (!bs) {
+        return NULL;
+    }
+
+    curr_bs = bs;
+
+    while (curr_bs->backing_hd) {
+        curr_bs = curr_bs->backing_hd;
+    }
+    return curr_bs;
+}
+
 #define NB_SUFFIXES 4
 
 char *get_human_readable_size(char *buf, int buf_size, int64_t size)
diff --git a/block.h b/block.h
index 8c9b424..e9249c4 100644
--- a/block.h
+++ b/block.h
@@ -207,6 +207,7 @@ int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
                            BlockDriverState *base);
 BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
                                     BlockDriverState *bs);
+BlockDriverState *bdrv_find_base(BlockDriverState *bs);
 
 
 typedef struct BdrvCheckResult {
commit 9abf2dbaf6ae990c498ec0245986bddcd6b013ea
Author: Jeff Cody <jcody at redhat.com>
Date:   Thu Sep 27 13:29:14 2012 -0400

    blockdev: rename block_stream_cb to a generic block_job_cb
    
    Signed-off-by: Jeff Cody <jcody at redhat.com>
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/blockdev.c b/blockdev.c
index 0267fa3..f910ac5 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1070,12 +1070,12 @@ static QObject *qobject_from_block_job(BlockJob *job)
                               job->speed);
 }
 
-static void block_stream_cb(void *opaque, int ret)
+static void block_job_cb(void *opaque, int ret)
 {
     BlockDriverState *bs = opaque;
     QObject *obj;
 
-    trace_block_stream_cb(bs, bs->job, ret);
+    trace_block_job_cb(bs, bs->job, ret);
 
     assert(bs->job);
     obj = qobject_from_block_job(bs->job);
@@ -1117,7 +1117,7 @@ void qmp_block_stream(const char *device, bool has_base,
     }
 
     stream_start(bs, base_bs, base, has_speed ? speed : 0,
-                 block_stream_cb, bs, &local_err);
+                 block_job_cb, bs, &local_err);
     if (error_is_set(&local_err)) {
         error_propagate(errp, local_err);
         return;
diff --git a/trace-events b/trace-events
index dbc3007..29771a7 100644
--- a/trace-events
+++ b/trace-events
@@ -79,7 +79,7 @@ commit_start(void *bs, void *base, void *top, void *s, void *co, void *opaque) "
 
 # blockdev.c
 qmp_block_job_cancel(void *job) "job %p"
-block_stream_cb(void *bs, void *job, int ret) "bs %p job %p ret %d"
+block_job_cb(void *bs, void *job, int ret) "bs %p job %p ret %d"
 qmp_block_stream(void *bs, void *job) "bs %p job %p"
 
 # hw/virtio-blk.c
commit 747ff602636fd83daae7ee4b1dd6e8d257a89fea
Author: Jeff Cody <jcody at redhat.com>
Date:   Thu Sep 27 13:29:13 2012 -0400

    block: add live block commit functionality
    
    This adds the live commit coroutine.  This iteration focuses on the
    commit only below the active layer, and not the active layer itself.
    
    The behaviour is similar to block streaming; the sectors are walked
    through, and anything that exists above 'base' is committed back down
    into base.  At the end, intermediate images are deleted, and the
    chain stitched together.  Images are restored to their original open
    flags upon completion.
    
    Signed-off-by: Jeff Cody <jcody at redhat.com>
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/block/Makefile.objs b/block/Makefile.objs
index a1ae67f..81fd43c 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -4,6 +4,7 @@ block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
 block-obj-y += qed-check.o
 block-obj-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o
 block-obj-y += stream.o
+block-obj-y += commit.o
 block-obj-$(CONFIG_WIN32) += raw-win32.o
 block-obj-$(CONFIG_POSIX) += raw-posix.o
 block-obj-$(CONFIG_LIBISCSI) += iscsi.o
diff --git a/block/commit.c b/block/commit.c
new file mode 100644
index 0000000..624ec5f
--- /dev/null
+++ b/block/commit.c
@@ -0,0 +1,267 @@
+/*
+ * Live block commit
+ *
+ * Copyright Red Hat, Inc. 2012
+ *
+ * Authors:
+ *  Jeff Cody   <jcody at redhat.com>
+ *  Based on stream.c by Stefan Hajnoczi
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "trace.h"
+#include "block_int.h"
+#include "qemu/ratelimit.h"
+
+enum {
+    /*
+     * Size of data buffer for populating the image file.  This should be large
+     * enough to process multiple clusters in a single call, so that populating
+     * contiguous regions of the image is efficient.
+     */
+    COMMIT_BUFFER_SIZE = 512 * 1024, /* in bytes */
+};
+
+#define SLICE_TIME 100000000ULL /* ns */
+
+typedef struct CommitBlockJob {
+    BlockJob common;
+    RateLimit limit;
+    BlockDriverState *active;
+    BlockDriverState *top;
+    BlockDriverState *base;
+    BlockErrorAction on_error;
+    int base_flags;
+    int orig_overlay_flags;
+} CommitBlockJob;
+
+static int coroutine_fn commit_populate(BlockDriverState *bs,
+                                        BlockDriverState *base,
+                                        int64_t sector_num, int nb_sectors,
+                                        void *buf)
+{
+    int ret = 0;
+
+    ret = bdrv_read(bs, sector_num, buf, nb_sectors);
+    if (ret) {
+        return ret;
+    }
+
+    ret = bdrv_write(base, sector_num, buf, nb_sectors);
+    if (ret) {
+        return ret;
+    }
+
+    return 0;
+}
+
+static void coroutine_fn commit_run(void *opaque)
+{
+    CommitBlockJob *s = opaque;
+    BlockDriverState *active = s->active;
+    BlockDriverState *top = s->top;
+    BlockDriverState *base = s->base;
+    BlockDriverState *overlay_bs = NULL;
+    int64_t sector_num, end;
+    int ret = 0;
+    int n = 0;
+    void *buf;
+    int bytes_written = 0;
+    int64_t base_len;
+
+    ret = s->common.len = bdrv_getlength(top);
+
+
+    if (s->common.len < 0) {
+        goto exit_restore_reopen;
+    }
+
+    ret = base_len = bdrv_getlength(base);
+    if (base_len < 0) {
+        goto exit_restore_reopen;
+    }
+
+    if (base_len < s->common.len) {
+        ret = bdrv_truncate(base, s->common.len);
+        if (ret) {
+            goto exit_restore_reopen;
+        }
+    }
+
+    overlay_bs = bdrv_find_overlay(active, top);
+
+    end = s->common.len >> BDRV_SECTOR_BITS;
+    buf = qemu_blockalign(top, COMMIT_BUFFER_SIZE);
+
+    for (sector_num = 0; sector_num < end; sector_num += n) {
+        uint64_t delay_ns = 0;
+        bool copy;
+
+wait:
+        /* Note that even when no rate limit is applied we need to yield
+         * with no pending I/O here so that qemu_aio_flush() returns.
+         */
+        block_job_sleep_ns(&s->common, rt_clock, delay_ns);
+        if (block_job_is_cancelled(&s->common)) {
+            break;
+        }
+        /* Copy if allocated above the base */
+        ret = bdrv_co_is_allocated_above(top, base, sector_num,
+                                         COMMIT_BUFFER_SIZE / BDRV_SECTOR_SIZE,
+                                         &n);
+        copy = (ret == 1);
+        trace_commit_one_iteration(s, sector_num, n, ret);
+        if (copy) {
+            if (s->common.speed) {
+                delay_ns = ratelimit_calculate_delay(&s->limit, n);
+                if (delay_ns > 0) {
+                    goto wait;
+                }
+            }
+            ret = commit_populate(top, base, sector_num, n, buf);
+            bytes_written += n * BDRV_SECTOR_SIZE;
+        }
+        if (ret < 0) {
+            if (s->on_error == BLOCK_ERR_STOP_ANY    ||
+                s->on_error == BLOCK_ERR_REPORT      ||
+                (s->on_error == BLOCK_ERR_STOP_ENOSPC && ret == -ENOSPC)) {
+                goto exit_free_buf;
+            } else {
+                n = 0;
+                continue;
+            }
+        }
+        /* Publish progress */
+        s->common.offset += n * BDRV_SECTOR_SIZE;
+    }
+
+    ret = 0;
+
+    if (!block_job_is_cancelled(&s->common) && sector_num == end) {
+        /* success */
+        ret = bdrv_drop_intermediate(active, top, base);
+    }
+
+exit_free_buf:
+    qemu_vfree(buf);
+
+exit_restore_reopen:
+    /* restore base open flags here if appropriate (e.g., change the base back
+     * to r/o). These reopens do not need to be atomic, since we won't abort
+     * even on failure here */
+    if (s->base_flags != bdrv_get_flags(base)) {
+        bdrv_reopen(base, s->base_flags, NULL);
+    }
+    if (s->orig_overlay_flags != bdrv_get_flags(overlay_bs)) {
+        bdrv_reopen(overlay_bs, s->orig_overlay_flags, NULL);
+    }
+
+    block_job_complete(&s->common, ret);
+}
+
+static void commit_set_speed(BlockJob *job, int64_t speed, Error **errp)
+{
+    CommitBlockJob *s = container_of(job, CommitBlockJob, common);
+
+    if (speed < 0) {
+        error_set(errp, QERR_INVALID_PARAMETER, "speed");
+        return;
+    }
+    ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME);
+}
+
+static BlockJobType commit_job_type = {
+    .instance_size = sizeof(CommitBlockJob),
+    .job_type      = "commit",
+    .set_speed     = commit_set_speed,
+};
+
+void commit_start(BlockDriverState *bs, BlockDriverState *base,
+                  BlockDriverState *top, int64_t speed,
+                  BlockErrorAction on_error, BlockDriverCompletionFunc *cb,
+                  void *opaque, Error **errp)
+{
+    CommitBlockJob *s;
+    BlockReopenQueue *reopen_queue = NULL;
+    int orig_overlay_flags;
+    int orig_base_flags;
+    BlockDriverState *overlay_bs;
+    Error *local_err = NULL;
+
+    if ((on_error == BLOCK_ERR_STOP_ANY ||
+         on_error == BLOCK_ERR_STOP_ENOSPC) &&
+        !bdrv_iostatus_is_enabled(bs)) {
+        error_set(errp, QERR_INVALID_PARAMETER_COMBINATION);
+        return;
+    }
+
+    /* Once we support top == active layer, remove this check */
+    if (top == bs) {
+        error_setg(errp,
+                   "Top image as the active layer is currently unsupported");
+        return;
+    }
+
+    if (top == base) {
+        error_setg(errp, "Invalid files for merge: top and base are the same");
+        return;
+    }
+
+    /* top and base may be valid, but let's make sure that base is reachable
+     * from top */
+    if (bdrv_find_backing_image(top, base->filename) != base) {
+        error_setg(errp,
+                   "Base (%s) is not reachable from top (%s)",
+                   base->filename, top->filename);
+        return;
+    }
+
+    overlay_bs = bdrv_find_overlay(bs, top);
+
+    if (overlay_bs == NULL) {
+        error_setg(errp, "Could not find overlay image for %s:", top->filename);
+        return;
+    }
+
+    orig_base_flags    = bdrv_get_flags(base);
+    orig_overlay_flags = bdrv_get_flags(overlay_bs);
+
+    /* convert base & overlay_bs to r/w, if necessary */
+    if (!(orig_base_flags & BDRV_O_RDWR)) {
+        reopen_queue = bdrv_reopen_queue(reopen_queue, base,
+                                         orig_base_flags | BDRV_O_RDWR);
+    }
+    if (!(orig_overlay_flags & BDRV_O_RDWR)) {
+        reopen_queue = bdrv_reopen_queue(reopen_queue, overlay_bs,
+                                         orig_overlay_flags | BDRV_O_RDWR);
+    }
+    if (reopen_queue) {
+        bdrv_reopen_multiple(reopen_queue, &local_err);
+        if (local_err != NULL) {
+            error_propagate(errp, local_err);
+            return;
+        }
+    }
+
+
+    s = block_job_create(&commit_job_type, bs, speed, cb, opaque, errp);
+    if (!s) {
+        return;
+    }
+
+    s->base   = base;
+    s->top    = top;
+    s->active = bs;
+
+    s->base_flags          = orig_base_flags;
+    s->orig_overlay_flags  = orig_overlay_flags;
+
+    s->on_error = on_error;
+    s->common.co = qemu_coroutine_create(commit_run);
+
+    trace_commit_start(bs, base, top, s, s->common.co, opaque);
+    qemu_coroutine_enter(s->common.co, s);
+}
diff --git a/block_int.h b/block_int.h
index ac4245c..56164a7 100644
--- a/block_int.h
+++ b/block_int.h
@@ -463,4 +463,20 @@ void stream_start(BlockDriverState *bs, BlockDriverState *base,
                   BlockDriverCompletionFunc *cb,
                   void *opaque, Error **errp);
 
+/**
+ * commit_start:
+ * @bs: Top Block device
+ * @base: Block device that will be written into, and become the new top
+ * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
+ * @on_error: The action to take upon error.
+ * @cb: Completion function for the job.
+ * @opaque: Opaque pointer value passed to @cb.
+ * @errp: Error object.
+ *
+ */
+void commit_start(BlockDriverState *bs, BlockDriverState *base,
+                 BlockDriverState *top, int64_t speed,
+                 BlockErrorAction on_error, BlockDriverCompletionFunc *cb,
+                 void *opaque, Error **errp);
+
 #endif /* BLOCK_INT_H */
diff --git a/trace-events b/trace-events
index f5b5097..dbc3007 100644
--- a/trace-events
+++ b/trace-events
@@ -74,6 +74,8 @@ bdrv_co_do_copy_on_readv(void *bs, int64_t sector_num, int nb_sectors, int64_t c
 # block/stream.c
 stream_one_iteration(void *s, int64_t sector_num, int nb_sectors, int is_allocated) "s %p sector_num %"PRId64" nb_sectors %d is_allocated %d"
 stream_start(void *bs, void *base, void *s, void *co, void *opaque) "bs %p base %p s %p co %p opaque %p"
+commit_one_iteration(void *s, int64_t sector_num, int nb_sectors, int is_allocated) "s %p sector_num %"PRId64" nb_sectors %d is_allocated %d"
+commit_start(void *bs, void *base, void *top, void *s, void *co, void *opaque) "bs %p base %p top %p s %p co %p opaque %p"
 
 # blockdev.c
 qmp_block_job_cancel(void *job) "job %p"
commit 6ebdcee2d8e9e4b41ffe4e49039927550848b926
Author: Jeff Cody <jcody at redhat.com>
Date:   Thu Sep 27 13:29:12 2012 -0400

    block: add support functions for live commit, to find and delete images.
    
    Add bdrv_find_overlay(), and bdrv_drop_intermediate().
    
    bdrv_find_overlay():  given 'bs' and the active (topmost) BDS of an image chain,
                        find the image that is the immediate top of 'bs'
    
    bdrv_drop_intermediate():
                        Given 3 BDS (active, top, base), drop images above
                        base up to and including top, and set base to be the
                        backing file of top's overlay node.
    
                        E.g., this converts:
    
                        bottom <- base <- intermediate <- top <- active
    
                        to
    
                        bottom <- base <- active
    
    Signed-off-by: Jeff Cody <jcody at redhat.com>
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/block.c b/block.c
index 751ebdc..54209a5 100644
--- a/block.c
+++ b/block.c
@@ -1724,6 +1724,149 @@ int bdrv_change_backing_file(BlockDriverState *bs,
     return ret;
 }
 
+/*
+ * Finds the image layer in the chain that has 'bs' as its backing file.
+ *
+ * active is the current topmost image.
+ *
+ * Returns NULL if bs is not found in active's image chain,
+ * or if active == bs.
+ */
+BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
+                                    BlockDriverState *bs)
+{
+    BlockDriverState *overlay = NULL;
+    BlockDriverState *intermediate;
+
+    assert(active != NULL);
+    assert(bs != NULL);
+
+    /* if bs is the same as active, then by definition it has no overlay
+     */
+    if (active == bs) {
+        return NULL;
+    }
+
+    intermediate = active;
+    while (intermediate->backing_hd) {
+        if (intermediate->backing_hd == bs) {
+            overlay = intermediate;
+            break;
+        }
+        intermediate = intermediate->backing_hd;
+    }
+
+    return overlay;
+}
+
+typedef struct BlkIntermediateStates {
+    BlockDriverState *bs;
+    QSIMPLEQ_ENTRY(BlkIntermediateStates) entry;
+} BlkIntermediateStates;
+
+
+/*
+ * Drops images above 'base' up to and including 'top', and sets the image
+ * above 'top' to have base as its backing file.
+ *
+ * Requires that the overlay to 'top' is opened r/w, so that the backing file
+ * information in 'bs' can be properly updated.
+ *
+ * E.g., this will convert the following chain:
+ * bottom <- base <- intermediate <- top <- active
+ *
+ * to
+ *
+ * bottom <- base <- active
+ *
+ * It is allowed for bottom==base, in which case it converts:
+ *
+ * base <- intermediate <- top <- active
+ *
+ * to
+ *
+ * base <- active
+ *
+ * Error conditions:
+ *  if active == top, that is considered an error
+ *
+ */
+int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
+                           BlockDriverState *base)
+{
+    BlockDriverState *intermediate;
+    BlockDriverState *base_bs = NULL;
+    BlockDriverState *new_top_bs = NULL;
+    BlkIntermediateStates *intermediate_state, *next;
+    int ret = -EIO;
+
+    QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete;
+    QSIMPLEQ_INIT(&states_to_delete);
+
+    if (!top->drv || !base->drv) {
+        goto exit;
+    }
+
+    new_top_bs = bdrv_find_overlay(active, top);
+
+    if (new_top_bs == NULL) {
+        /* we could not find the image above 'top', this is an error */
+        goto exit;
+    }
+
+    /* special case of new_top_bs->backing_hd already pointing to base - nothing
+     * to do, no intermediate images */
+    if (new_top_bs->backing_hd == base) {
+        ret = 0;
+        goto exit;
+    }
+
+    intermediate = top;
+
+    /* now we will go down through the list, and add each BDS we find
+     * into our deletion queue, until we hit the 'base'
+     */
+    while (intermediate) {
+        intermediate_state = g_malloc0(sizeof(BlkIntermediateStates));
+        intermediate_state->bs = intermediate;
+        QSIMPLEQ_INSERT_TAIL(&states_to_delete, intermediate_state, entry);
+
+        if (intermediate->backing_hd == base) {
+            base_bs = intermediate->backing_hd;
+            break;
+        }
+        intermediate = intermediate->backing_hd;
+    }
+    if (base_bs == NULL) {
+        /* something went wrong, we did not end at the base. safely
+         * unravel everything, and exit with error */
+        goto exit;
+    }
+
+    /* success - we can delete the intermediate states, and link top->base */
+    ret = bdrv_change_backing_file(new_top_bs, base_bs->filename,
+                                   base_bs->drv ? base_bs->drv->format_name : "");
+    if (ret) {
+        goto exit;
+    }
+    new_top_bs->backing_hd = base_bs;
+
+
+    QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
+        /* so that bdrv_close() does not recursively close the chain */
+        intermediate_state->bs->backing_hd = NULL;
+        bdrv_delete(intermediate_state->bs);
+    }
+    ret = 0;
+
+exit:
+    QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
+        g_free(intermediate_state);
+    }
+    return ret;
+}
+
+
 static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
                                    size_t size)
 {
diff --git a/block.h b/block.h
index b1095d8..8c9b424 100644
--- a/block.h
+++ b/block.h
@@ -203,6 +203,10 @@ int bdrv_commit_all(void);
 int bdrv_change_backing_file(BlockDriverState *bs,
     const char *backing_file, const char *backing_fmt);
 void bdrv_register(BlockDriver *bdrv);
+int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
+                           BlockDriverState *base);
+BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
+                                    BlockDriverState *bs);
 
 
 typedef struct BdrvCheckResult {
commit 8d6d89cb63c57569864ecdeb84d3a1c2ebd031cc
Author: Bharata B Rao <bharata at linux.vnet.ibm.com>
Date:   Thu Sep 27 19:30:32 2012 +0530

    block: Support GlusterFS as a QEMU block backend.
    
    This patch adds gluster as the new block backend in QEMU. This gives
    QEMU the ability to boot VM images from gluster volumes. Its already
    possible to boot from VM images on gluster volumes using FUSE mount, but
    this patchset provides the ability to boot VM images from gluster volumes
    by by-passing the FUSE layer in gluster. This is made possible by
    using libgfapi routines to perform IO on gluster volumes directly.
    
    VM Image on gluster volume is specified like this:
    
    file=gluster[+transport]://[server[:port]]/volname/image[?socket=...]
    
    'gluster' is the protocol.
    
    'transport' specifies the transport type used to connect to gluster
    management daemon (glusterd). Valid transport types are
    tcp, unix and rdma. If a transport type isn't specified, then tcp
    type is assumed.
    
    'server' specifies the server where the volume file specification for
    the given volume resides. This can be either hostname, ipv4 address
    or ipv6 address. ipv6 address needs to be within square brackets [ ].
    If transport type is 'unix', then 'server' field should not be specifed.
    The 'socket' field needs to be populated with the path to unix domain
    socket.
    
    'port' is the port number on which glusterd is listening. This is optional
    and if not specified, QEMU will send 0 which will make gluster to use the
    default port. If the transport type is unix, then 'port' should not be
    specified.
    
    'volname' is the name of the gluster volume which contains the VM image.
    
    'image' is the path to the actual VM image that resides on gluster volume.
    
    Examples:
    
    file=gluster://1.2.3.4/testvol/a.img
    file=gluster+tcp://1.2.3.4/testvol/a.img
    file=gluster+tcp://1.2.3.4:24007/testvol/dir/a.img
    file=gluster+tcp://[1:2:3:4:5:6:7:8]/testvol/dir/a.img
    file=gluster+tcp://[1:2:3:4:5:6:7:8]:24007/testvol/dir/a.img
    file=gluster+tcp://server.domain.com:24007/testvol/dir/a.img
    file=gluster+unix:///testvol/dir/a.img?socket=/tmp/glusterd.socket
    file=gluster+rdma://1.2.3.4:24007/testvol/a.img
    
    Signed-off-by: Bharata B Rao <bharata at linux.vnet.ibm.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/block/Makefile.objs b/block/Makefile.objs
index b5754d3..a1ae67f 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -9,3 +9,4 @@ block-obj-$(CONFIG_POSIX) += raw-posix.o
 block-obj-$(CONFIG_LIBISCSI) += iscsi.o
 block-obj-$(CONFIG_CURL) += curl.o
 block-obj-$(CONFIG_RBD) += rbd.o
+block-obj-$(CONFIG_GLUSTERFS) += gluster.o
diff --git a/block/gluster.c b/block/gluster.c
new file mode 100644
index 0000000..3588d73
--- /dev/null
+++ b/block/gluster.c
@@ -0,0 +1,624 @@
+/*
+ * GlusterFS backend for QEMU
+ *
+ * Copyright (C) 2012 Bharata B Rao <bharata at linux.vnet.ibm.com>
+ *
+ * Pipe handling mechanism in AIO implementation is derived from
+ * block/rbd.c. Hence,
+ *
+ * Copyright (C) 2010-2011 Christian Brunner <chb at muc.de>,
+ *                         Josh Durgin <josh.durgin at dreamhost.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+#include <glusterfs/api/glfs.h>
+#include "block_int.h"
+#include "qemu_socket.h"
+#include "uri.h"
+
+typedef struct GlusterAIOCB {
+    BlockDriverAIOCB common;
+    int64_t size;
+    int ret;
+    bool *finished;
+    QEMUBH *bh;
+} GlusterAIOCB;
+
+typedef struct BDRVGlusterState {
+    struct glfs *glfs;
+    int fds[2];
+    struct glfs_fd *fd;
+    int qemu_aio_count;
+    int event_reader_pos;
+    GlusterAIOCB *event_acb;
+} BDRVGlusterState;
+
+#define GLUSTER_FD_READ  0
+#define GLUSTER_FD_WRITE 1
+
+typedef struct GlusterConf {
+    char *server;
+    int port;
+    char *volname;
+    char *image;
+    char *transport;
+} GlusterConf;
+
+static void qemu_gluster_gconf_free(GlusterConf *gconf)
+{
+    g_free(gconf->server);
+    g_free(gconf->volname);
+    g_free(gconf->image);
+    g_free(gconf->transport);
+    g_free(gconf);
+}
+
+static int parse_volume_options(GlusterConf *gconf, char *path)
+{
+    char *p, *q;
+
+    if (!path) {
+        return -EINVAL;
+    }
+
+    /* volume */
+    p = q = path + strspn(path, "/");
+    p += strcspn(p, "/");
+    if (*p == '\0') {
+        return -EINVAL;
+    }
+    gconf->volname = g_strndup(q, p - q);
+
+    /* image */
+    p += strspn(p, "/");
+    if (*p == '\0') {
+        return -EINVAL;
+    }
+    gconf->image = g_strdup(p);
+    return 0;
+}
+
+/*
+ * file=gluster[+transport]://[server[:port]]/volname/image[?socket=...]
+ *
+ * 'gluster' is the protocol.
+ *
+ * 'transport' specifies the transport type used to connect to gluster
+ * management daemon (glusterd). Valid transport types are
+ * tcp, unix and rdma. If a transport type isn't specified, then tcp
+ * type is assumed.
+ *
+ * 'server' specifies the server where the volume file specification for
+ * the given volume resides. This can be either hostname, ipv4 address
+ * or ipv6 address. ipv6 address needs to be within square brackets [ ].
+ * If transport type is 'unix', then 'server' field should not be specifed.
+ * The 'socket' field needs to be populated with the path to unix domain
+ * socket.
+ *
+ * 'port' is the port number on which glusterd is listening. This is optional
+ * and if not specified, QEMU will send 0 which will make gluster to use the
+ * default port. If the transport type is unix, then 'port' should not be
+ * specified.
+ *
+ * 'volname' is the name of the gluster volume which contains the VM image.
+ *
+ * 'image' is the path to the actual VM image that resides on gluster volume.
+ *
+ * Examples:
+ *
+ * file=gluster://1.2.3.4/testvol/a.img
+ * file=gluster+tcp://1.2.3.4/testvol/a.img
+ * file=gluster+tcp://1.2.3.4:24007/testvol/dir/a.img
+ * file=gluster+tcp://[1:2:3:4:5:6:7:8]/testvol/dir/a.img
+ * file=gluster+tcp://[1:2:3:4:5:6:7:8]:24007/testvol/dir/a.img
+ * file=gluster+tcp://server.domain.com:24007/testvol/dir/a.img
+ * file=gluster+unix:///testvol/dir/a.img?socket=/tmp/glusterd.socket
+ * file=gluster+rdma://1.2.3.4:24007/testvol/a.img
+ */
+static int qemu_gluster_parseuri(GlusterConf *gconf, const char *filename)
+{
+    URI *uri;
+    QueryParams *qp = NULL;
+    bool is_unix = false;
+    int ret = 0;
+
+    uri = uri_parse(filename);
+    if (!uri) {
+        return -EINVAL;
+    }
+
+    /* transport */
+    if (!strcmp(uri->scheme, "gluster")) {
+        gconf->transport = g_strdup("tcp");
+    } else if (!strcmp(uri->scheme, "gluster+tcp")) {
+        gconf->transport = g_strdup("tcp");
+    } else if (!strcmp(uri->scheme, "gluster+unix")) {
+        gconf->transport = g_strdup("unix");
+        is_unix = true;
+    } else if (!strcmp(uri->scheme, "gluster+rdma")) {
+        gconf->transport = g_strdup("rdma");
+    } else {
+        ret = -EINVAL;
+        goto out;
+    }
+
+    ret = parse_volume_options(gconf, uri->path);
+    if (ret < 0) {
+        goto out;
+    }
+
+    qp = query_params_parse(uri->query);
+    if (qp->n > 1 || (is_unix && !qp->n) || (!is_unix && qp->n)) {
+        ret = -EINVAL;
+        goto out;
+    }
+
+    if (is_unix) {
+        if (uri->server || uri->port) {
+            ret = -EINVAL;
+            goto out;
+        }
+        if (strcmp(qp->p[0].name, "socket")) {
+            ret = -EINVAL;
+            goto out;
+        }
+        gconf->server = g_strdup(qp->p[0].value);
+    } else {
+        gconf->server = g_strdup(uri->server);
+        gconf->port = uri->port;
+    }
+
+out:
+    if (qp) {
+        query_params_free(qp);
+    }
+    uri_free(uri);
+    return ret;
+}
+
+static struct glfs *qemu_gluster_init(GlusterConf *gconf, const char *filename)
+{
+    struct glfs *glfs = NULL;
+    int ret;
+    int old_errno;
+
+    ret = qemu_gluster_parseuri(gconf, filename);
+    if (ret < 0) {
+        error_report("Usage: file=gluster[+transport]://[server[:port]]/"
+            "volname/image[?socket=...]");
+        errno = -ret;
+        goto out;
+    }
+
+    glfs = glfs_new(gconf->volname);
+    if (!glfs) {
+        goto out;
+    }
+
+    ret = glfs_set_volfile_server(glfs, gconf->transport, gconf->server,
+            gconf->port);
+    if (ret < 0) {
+        goto out;
+    }
+
+    /*
+     * TODO: Use GF_LOG_ERROR instead of hard code value of 4 here when
+     * GlusterFS makes GF_LOG_* macros available to libgfapi users.
+     */
+    ret = glfs_set_logging(glfs, "-", 4);
+    if (ret < 0) {
+        goto out;
+    }
+
+    ret = glfs_init(glfs);
+    if (ret) {
+        error_report("Gluster connection failed for server=%s port=%d "
+             "volume=%s image=%s transport=%s\n", gconf->server, gconf->port,
+             gconf->volname, gconf->image, gconf->transport);
+        goto out;
+    }
+    return glfs;
+
+out:
+    if (glfs) {
+        old_errno = errno;
+        glfs_fini(glfs);
+        errno = old_errno;
+    }
+    return NULL;
+}
+
+static void qemu_gluster_complete_aio(GlusterAIOCB *acb, BDRVGlusterState *s)
+{
+    int ret;
+    bool *finished = acb->finished;
+    BlockDriverCompletionFunc *cb = acb->common.cb;
+    void *opaque = acb->common.opaque;
+
+    if (!acb->ret || acb->ret == acb->size) {
+        ret = 0; /* Success */
+    } else if (acb->ret < 0) {
+        ret = acb->ret; /* Read/Write failed */
+    } else {
+        ret = -EIO; /* Partial read/write - fail it */
+    }
+
+    s->qemu_aio_count--;
+    qemu_aio_release(acb);
+    cb(opaque, ret);
+    if (finished) {
+        *finished = true;
+    }
+}
+
+static void qemu_gluster_aio_event_reader(void *opaque)
+{
+    BDRVGlusterState *s = opaque;
+    ssize_t ret;
+
+    do {
+        char *p = (char *)&s->event_acb;
+
+        ret = read(s->fds[GLUSTER_FD_READ], p + s->event_reader_pos,
+                   sizeof(s->event_acb) - s->event_reader_pos);
+        if (ret > 0) {
+            s->event_reader_pos += ret;
+            if (s->event_reader_pos == sizeof(s->event_acb)) {
+                s->event_reader_pos = 0;
+                qemu_gluster_complete_aio(s->event_acb, s);
+            }
+        }
+    } while (ret < 0 && errno == EINTR);
+}
+
+static int qemu_gluster_aio_flush_cb(void *opaque)
+{
+    BDRVGlusterState *s = opaque;
+
+    return (s->qemu_aio_count > 0);
+}
+
+static int qemu_gluster_open(BlockDriverState *bs, const char *filename,
+    int bdrv_flags)
+{
+    BDRVGlusterState *s = bs->opaque;
+    int open_flags = O_BINARY;
+    int ret = 0;
+    GlusterConf *gconf = g_malloc0(sizeof(GlusterConf));
+
+    s->glfs = qemu_gluster_init(gconf, filename);
+    if (!s->glfs) {
+        ret = -errno;
+        goto out;
+    }
+
+    if (bdrv_flags & BDRV_O_RDWR) {
+        open_flags |= O_RDWR;
+    } else {
+        open_flags |= O_RDONLY;
+    }
+
+    if ((bdrv_flags & BDRV_O_NOCACHE)) {
+        open_flags |= O_DIRECT;
+    }
+
+    s->fd = glfs_open(s->glfs, gconf->image, open_flags);
+    if (!s->fd) {
+        ret = -errno;
+        goto out;
+    }
+
+    ret = qemu_pipe(s->fds);
+    if (ret < 0) {
+        ret = -errno;
+        goto out;
+    }
+    fcntl(s->fds[GLUSTER_FD_READ], F_SETFL, O_NONBLOCK);
+    qemu_aio_set_fd_handler(s->fds[GLUSTER_FD_READ],
+        qemu_gluster_aio_event_reader, NULL, qemu_gluster_aio_flush_cb, s);
+
+out:
+    qemu_gluster_gconf_free(gconf);
+    if (!ret) {
+        return ret;
+    }
+    if (s->fd) {
+        glfs_close(s->fd);
+    }
+    if (s->glfs) {
+        glfs_fini(s->glfs);
+    }
+    return ret;
+}
+
+static int qemu_gluster_create(const char *filename,
+        QEMUOptionParameter *options)
+{
+    struct glfs *glfs;
+    struct glfs_fd *fd;
+    int ret = 0;
+    int64_t total_size = 0;
+    GlusterConf *gconf = g_malloc0(sizeof(GlusterConf));
+
+    glfs = qemu_gluster_init(gconf, filename);
+    if (!glfs) {
+        ret = -errno;
+        goto out;
+    }
+
+    while (options && options->name) {
+        if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
+            total_size = options->value.n / BDRV_SECTOR_SIZE;
+        }
+        options++;
+    }
+
+    fd = glfs_creat(glfs, gconf->image,
+        O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR | S_IWUSR);
+    if (!fd) {
+        ret = -errno;
+    } else {
+        if (glfs_ftruncate(fd, total_size * BDRV_SECTOR_SIZE) != 0) {
+            ret = -errno;
+        }
+        if (glfs_close(fd) != 0) {
+            ret = -errno;
+        }
+    }
+out:
+    qemu_gluster_gconf_free(gconf);
+    if (glfs) {
+        glfs_fini(glfs);
+    }
+    return ret;
+}
+
+static void qemu_gluster_aio_cancel(BlockDriverAIOCB *blockacb)
+{
+    GlusterAIOCB *acb = (GlusterAIOCB *)blockacb;
+    bool finished = false;
+
+    acb->finished = &finished;
+    while (!finished) {
+        qemu_aio_wait();
+    }
+}
+
+static AIOPool gluster_aio_pool = {
+    .aiocb_size = sizeof(GlusterAIOCB),
+    .cancel = qemu_gluster_aio_cancel,
+};
+
+static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret, void *arg)
+{
+    GlusterAIOCB *acb = (GlusterAIOCB *)arg;
+    BlockDriverState *bs = acb->common.bs;
+    BDRVGlusterState *s = bs->opaque;
+    int retval;
+
+    acb->ret = ret;
+    retval = qemu_write_full(s->fds[GLUSTER_FD_WRITE], &acb, sizeof(acb));
+    if (retval != sizeof(acb)) {
+        /*
+         * Gluster AIO callback thread failed to notify the waiting
+         * QEMU thread about IO completion.
+         *
+         * Complete this IO request and make the disk inaccessible for
+         * subsequent reads and writes.
+         */
+        error_report("Gluster failed to notify QEMU about IO completion");
+
+        qemu_mutex_lock_iothread(); /* We are in gluster thread context */
+        acb->common.cb(acb->common.opaque, -EIO);
+        qemu_aio_release(acb);
+        s->qemu_aio_count--;
+        close(s->fds[GLUSTER_FD_READ]);
+        close(s->fds[GLUSTER_FD_WRITE]);
+        qemu_aio_set_fd_handler(s->fds[GLUSTER_FD_READ], NULL, NULL, NULL,
+            NULL);
+        bs->drv = NULL; /* Make the disk inaccessible */
+        qemu_mutex_unlock_iothread();
+    }
+}
+
+static BlockDriverAIOCB *qemu_gluster_aio_rw(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockDriverCompletionFunc *cb, void *opaque, int write)
+{
+    int ret;
+    GlusterAIOCB *acb;
+    BDRVGlusterState *s = bs->opaque;
+    size_t size;
+    off_t offset;
+
+    offset = sector_num * BDRV_SECTOR_SIZE;
+    size = nb_sectors * BDRV_SECTOR_SIZE;
+    s->qemu_aio_count++;
+
+    acb = qemu_aio_get(&gluster_aio_pool, bs, cb, opaque);
+    acb->size = size;
+    acb->ret = 0;
+    acb->finished = NULL;
+
+    if (write) {
+        ret = glfs_pwritev_async(s->fd, qiov->iov, qiov->niov, offset, 0,
+            &gluster_finish_aiocb, acb);
+    } else {
+        ret = glfs_preadv_async(s->fd, qiov->iov, qiov->niov, offset, 0,
+            &gluster_finish_aiocb, acb);
+    }
+
+    if (ret < 0) {
+        goto out;
+    }
+    return &acb->common;
+
+out:
+    s->qemu_aio_count--;
+    qemu_aio_release(acb);
+    return NULL;
+}
+
+static BlockDriverAIOCB *qemu_gluster_aio_readv(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockDriverCompletionFunc *cb, void *opaque)
+{
+    return qemu_gluster_aio_rw(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
+}
+
+static BlockDriverAIOCB *qemu_gluster_aio_writev(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockDriverCompletionFunc *cb, void *opaque)
+{
+    return qemu_gluster_aio_rw(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
+}
+
+static BlockDriverAIOCB *qemu_gluster_aio_flush(BlockDriverState *bs,
+        BlockDriverCompletionFunc *cb, void *opaque)
+{
+    int ret;
+    GlusterAIOCB *acb;
+    BDRVGlusterState *s = bs->opaque;
+
+    acb = qemu_aio_get(&gluster_aio_pool, bs, cb, opaque);
+    acb->size = 0;
+    acb->ret = 0;
+    acb->finished = NULL;
+    s->qemu_aio_count++;
+
+    ret = glfs_fsync_async(s->fd, &gluster_finish_aiocb, acb);
+    if (ret < 0) {
+        goto out;
+    }
+    return &acb->common;
+
+out:
+    s->qemu_aio_count--;
+    qemu_aio_release(acb);
+    return NULL;
+}
+
+static int64_t qemu_gluster_getlength(BlockDriverState *bs)
+{
+    BDRVGlusterState *s = bs->opaque;
+    int64_t ret;
+
+    ret = glfs_lseek(s->fd, 0, SEEK_END);
+    if (ret < 0) {
+        return -errno;
+    } else {
+        return ret;
+    }
+}
+
+static int64_t qemu_gluster_allocated_file_size(BlockDriverState *bs)
+{
+    BDRVGlusterState *s = bs->opaque;
+    struct stat st;
+    int ret;
+
+    ret = glfs_fstat(s->fd, &st);
+    if (ret < 0) {
+        return -errno;
+    } else {
+        return st.st_blocks * 512;
+    }
+}
+
+static void qemu_gluster_close(BlockDriverState *bs)
+{
+    BDRVGlusterState *s = bs->opaque;
+
+    close(s->fds[GLUSTER_FD_READ]);
+    close(s->fds[GLUSTER_FD_WRITE]);
+    qemu_aio_set_fd_handler(s->fds[GLUSTER_FD_READ], NULL, NULL, NULL, NULL);
+
+    if (s->fd) {
+        glfs_close(s->fd);
+        s->fd = NULL;
+    }
+    glfs_fini(s->glfs);
+}
+
+static QEMUOptionParameter qemu_gluster_create_options[] = {
+    {
+        .name = BLOCK_OPT_SIZE,
+        .type = OPT_SIZE,
+        .help = "Virtual disk size"
+    },
+    { NULL }
+};
+
+static BlockDriver bdrv_gluster = {
+    .format_name                  = "gluster",
+    .protocol_name                = "gluster",
+    .instance_size                = sizeof(BDRVGlusterState),
+    .bdrv_file_open               = qemu_gluster_open,
+    .bdrv_close                   = qemu_gluster_close,
+    .bdrv_create                  = qemu_gluster_create,
+    .bdrv_getlength               = qemu_gluster_getlength,
+    .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
+    .bdrv_aio_readv               = qemu_gluster_aio_readv,
+    .bdrv_aio_writev              = qemu_gluster_aio_writev,
+    .bdrv_aio_flush               = qemu_gluster_aio_flush,
+    .create_options               = qemu_gluster_create_options,
+};
+
+static BlockDriver bdrv_gluster_tcp = {
+    .format_name                  = "gluster",
+    .protocol_name                = "gluster+tcp",
+    .instance_size                = sizeof(BDRVGlusterState),
+    .bdrv_file_open               = qemu_gluster_open,
+    .bdrv_close                   = qemu_gluster_close,
+    .bdrv_create                  = qemu_gluster_create,
+    .bdrv_getlength               = qemu_gluster_getlength,
+    .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
+    .bdrv_aio_readv               = qemu_gluster_aio_readv,
+    .bdrv_aio_writev              = qemu_gluster_aio_writev,
+    .bdrv_aio_flush               = qemu_gluster_aio_flush,
+    .create_options               = qemu_gluster_create_options,
+};
+
+static BlockDriver bdrv_gluster_unix = {
+    .format_name                  = "gluster",
+    .protocol_name                = "gluster+unix",
+    .instance_size                = sizeof(BDRVGlusterState),
+    .bdrv_file_open               = qemu_gluster_open,
+    .bdrv_close                   = qemu_gluster_close,
+    .bdrv_create                  = qemu_gluster_create,
+    .bdrv_getlength               = qemu_gluster_getlength,
+    .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
+    .bdrv_aio_readv               = qemu_gluster_aio_readv,
+    .bdrv_aio_writev              = qemu_gluster_aio_writev,
+    .bdrv_aio_flush               = qemu_gluster_aio_flush,
+    .create_options               = qemu_gluster_create_options,
+};
+
+static BlockDriver bdrv_gluster_rdma = {
+    .format_name                  = "gluster",
+    .protocol_name                = "gluster+rdma",
+    .instance_size                = sizeof(BDRVGlusterState),
+    .bdrv_file_open               = qemu_gluster_open,
+    .bdrv_close                   = qemu_gluster_close,
+    .bdrv_create                  = qemu_gluster_create,
+    .bdrv_getlength               = qemu_gluster_getlength,
+    .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
+    .bdrv_aio_readv               = qemu_gluster_aio_readv,
+    .bdrv_aio_writev              = qemu_gluster_aio_writev,
+    .bdrv_aio_flush               = qemu_gluster_aio_flush,
+    .create_options               = qemu_gluster_create_options,
+};
+
+static void bdrv_gluster_init(void)
+{
+    bdrv_register(&bdrv_gluster_rdma);
+    bdrv_register(&bdrv_gluster_unix);
+    bdrv_register(&bdrv_gluster_tcp);
+    bdrv_register(&bdrv_gluster);
+}
+
+block_init(bdrv_gluster_init);
commit eb100396b9d2658e8bf9cdc14bfcb16a9152f464
Author: Bharata B Rao <bharata at linux.vnet.ibm.com>
Date:   Mon Sep 24 14:42:45 2012 +0530

    configure: Add a config option for GlusterFS as block backend
    
    GlusterFS support in QEMU depends on libgfapi, libgfrpc and
    libgfxdr provided by GlusterFS.
    
    Signed-off-by: Bharata B Rao <bharata at linux.vnet.ibm.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/configure b/configure
index 8f99b7b..72d4b0d 100755
--- a/configure
+++ b/configure
@@ -219,6 +219,7 @@ want_tools="yes"
 libiscsi=""
 coroutine=""
 seccomp=""
+glusterfs=""
 
 # parse CC options first
 for opt do
@@ -856,6 +857,10 @@ for opt do
   ;;
   --disable-seccomp) seccomp="no"
   ;;
+  --disable-glusterfs) glusterfs="no"
+  ;;
+  --enable-glusterfs) glusterfs="yes"
+  ;;
   *) echo "ERROR: unknown option $opt"; show_help="yes"
   ;;
   esac
@@ -1128,6 +1133,8 @@ echo "  --disable-seccomp        disable seccomp support"
 echo "  --enable-seccomp         enables seccomp support"
 echo "  --with-coroutine=BACKEND coroutine backend. Supported options:"
 echo "                           gthread, ucontext, sigaltstack, windows"
+echo "  --enable-glusterfs       enable GlusterFS backend"
+echo "  --disable-glusterfs      disable GlusterFS backend"
 echo ""
 echo "NOTE: The object files are built at the place where configure is launched"
 exit 1
@@ -2303,6 +2310,29 @@ EOF
   fi
 fi
 
+##########################################
+# glusterfs probe
+if test "$glusterfs" != "no" ; then
+  cat > $TMPC <<EOF
+#include <glusterfs/api/glfs.h>
+int main(void) {
+    (void) glfs_new("volume");
+    return 0;
+}
+EOF
+  glusterfs_libs="-lgfapi -lgfrpc -lgfxdr"
+  if compile_prog "" "$glusterfs_libs" ; then
+    glusterfs=yes
+    libs_tools="$glusterfs_libs $libs_tools"
+    libs_softmmu="$glusterfs_libs $libs_softmmu"
+  else
+    if test "$glusterfs" = "yes" ; then
+      feature_not_found "GlusterFS backend support"
+    fi
+    glusterfs=no
+  fi
+fi
+
 #
 # Check for xxxat() functions when we are building linux-user
 # emulator.  This is done because older glibc versions don't
@@ -3170,6 +3200,7 @@ echo "libiscsi support  $libiscsi"
 echo "build guest agent $guest_agent"
 echo "seccomp support   $seccomp"
 echo "coroutine backend $coroutine_backend"
+echo "GlusterFS support $glusterfs"
 
 if test "$sdl_too_old" = "yes"; then
 echo "-> Your SDL version is too old - please upgrade to have SDL support"
@@ -3516,6 +3547,10 @@ if test "$has_environ" = "yes" ; then
   echo "CONFIG_HAS_ENVIRON=y" >> $config_host_mak
 fi
 
+if test "$glusterfs" = "yes" ; then
+  echo "CONFIG_GLUSTERFS=y" >> $config_host_mak
+fi
+
 # USB host support
 case "$usb" in
 linux)
commit 2db2bfc0ccac5fd68dbf0ceb70fbc372c5d8a8c7
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Thu Sep 27 19:27:43 2012 +0530

    aio: Another fix to the walking_handlers logic
    
    The AIO dispatch loop will call QLIST_REMOVE and g_free even if there
    are other pending calls to qemu_aio_wait outside the current one.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Bharata B Rao <bharata at linux.vnet.ibm.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/aio.c b/aio.c
index 99b8b72..c738a4e 100644
--- a/aio.c
+++ b/aio.c
@@ -159,14 +159,14 @@ bool qemu_aio_wait(void)
 
     /* if we have any readable fds, dispatch event */
     if (ret > 0) {
-        walking_handlers++;
-
         /* we have to walk very carefully in case
          * qemu_aio_set_fd_handler is called while we're walking */
         node = QLIST_FIRST(&aio_handlers);
         while (node) {
             AioHandler *tmp;
 
+            walking_handlers++;
+
             if (!node->deleted &&
                 FD_ISSET(node->fd, &rdfds) &&
                 node->io_read) {
@@ -181,13 +181,13 @@ bool qemu_aio_wait(void)
             tmp = node;
             node = QLIST_NEXT(node, node);
 
-            if (tmp->deleted) {
+            walking_handlers--;
+
+            if (!walking_handlers && tmp->deleted) {
                 QLIST_REMOVE(tmp, node);
                 g_free(tmp);
             }
         }
-
-        walking_handlers--;
     }
 
     return true;
commit ca0defb95c69f88a905d64adbe253e7bb8fde14a
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Mon Sep 24 14:42:02 2012 +0530

    qemu: URI parsing library
    
    Add a new URI parsing library to QEMU. The code has been borrowed from
    libxml2 and libvirt.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Bharata B Rao <bharata at linux.vnet.ibm.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/Makefile.objs b/Makefile.objs
index 4412757..7c1c682 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -42,7 +42,7 @@ coroutine-obj-$(CONFIG_WIN32) += coroutine-win32.o
 # block-obj-y is code used by both qemu system emulation and qemu-img
 
 block-obj-y = cutils.o iov.o cache-utils.o qemu-option.o module.o async.o
-block-obj-y += nbd.o block.o aio.o aes.o qemu-config.o qemu-progress.o qemu-sockets.o
+block-obj-y += nbd.o block.o aio.o aes.o qemu-config.o qemu-progress.o qemu-sockets.o uri.o
 block-obj-y += $(coroutine-obj-y) $(qobject-obj-y) $(version-obj-y)
 block-obj-$(CONFIG_POSIX) += posix-aio-compat.o
 block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
diff --git a/uri.c b/uri.c
new file mode 100644
index 0000000..dd922de
--- /dev/null
+++ b/uri.c
@@ -0,0 +1,2249 @@
+/**
+ * uri.c: set of generic URI related routines
+ *
+ * Reference: RFCs 3986, 2732 and 2373
+ *
+ * Copyright (C) 1998-2003 Daniel Veillard.  All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * DANIEL VEILLARD BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Except as contained in this notice, the name of Daniel Veillard shall not
+ * be used in advertising or otherwise to promote the sale, use or other
+ * dealings in this Software without prior written authorization from him.
+ *
+ * daniel at veillard.com
+ *
+ **
+ *
+ * Copyright (C) 2007, 2009-2010 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
+ *
+ * Authors:
+ *    Richard W.M. Jones <rjones at redhat.com>
+ *
+ */
+
+#include <glib.h>
+#include <string.h>
+#include <stdio.h>
+
+#include "uri.h"
+
+static void uri_clean(URI *uri);
+
+/*
+ * Old rule from 2396 used in legacy handling code
+ * alpha    = lowalpha | upalpha
+ */
+#define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
+
+
+/*
+ * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
+ *            "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
+ *            "u" | "v" | "w" | "x" | "y" | "z"
+ */
+
+#define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
+
+/*
+ * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
+ *           "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
+ *           "U" | "V" | "W" | "X" | "Y" | "Z"
+ */
+#define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
+
+#ifdef IS_DIGIT
+#undef IS_DIGIT
+#endif
+/*
+ * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
+ */
+#define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
+
+/*
+ * alphanum = alpha | digit
+ */
+
+#define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
+
+/*
+ * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
+ */
+
+#define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') ||     \
+    ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') ||    \
+    ((x) == '(') || ((x) == ')'))
+
+/*
+ * unwise = "{" | "}" | "|" | "\" | "^" | "`"
+ */
+
+#define IS_UNWISE(p)                                                    \
+      (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) ||         \
+       ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) ||        \
+       ((*(p) == ']')) || ((*(p) == '`')))
+/*
+ * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
+ *            "[" | "]"
+ */
+
+#define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
+        ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
+        ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
+        ((x) == ']'))
+
+/*
+ * unreserved = alphanum | mark
+ */
+
+#define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
+
+/*
+ * Skip to next pointer char, handle escaped sequences
+ */
+
+#define NEXT(p) ((*p == '%')? p += 3 : p++)
+
+/*
+ * Productions from the spec.
+ *
+ *    authority     = server | reg_name
+ *    reg_name      = 1*( unreserved | escaped | "$" | "," |
+ *                        ";" | ":" | "@" | "&" | "=" | "+" )
+ *
+ * path          = [ abs_path | opaque_part ]
+ */
+
+
+/************************************************************************
+ *									*
+ *                         RFC 3986 parser				*
+ *									*
+ ************************************************************************/
+
+#define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9'))
+#define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) ||		\
+                      ((*(p) >= 'A') && (*(p) <= 'Z')))
+#define ISA_HEXDIG(p)							\
+       (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) ||		\
+        ((*(p) >= 'A') && (*(p) <= 'F')))
+
+/*
+ *    sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
+ *                     / "*" / "+" / "," / ";" / "="
+ */
+#define ISA_SUB_DELIM(p)						\
+      (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) ||		\
+       ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) ||		\
+       ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) ||		\
+       ((*(p) == '=')) || ((*(p) == '\'')))
+
+/*
+ *    gen-delims    = ":" / "/" / "?" / "#" / "[" / "]" / "@"
+ */
+#define ISA_GEN_DELIM(p)						\
+      (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) ||         \
+       ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) ||         \
+       ((*(p) == '@')))
+
+/*
+ *    reserved      = gen-delims / sub-delims
+ */
+#define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p)))
+
+/*
+ *    unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
+ */
+#define ISA_UNRESERVED(p)						\
+      ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) ||		\
+       ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~')))
+
+/*
+ *    pct-encoded   = "%" HEXDIG HEXDIG
+ */
+#define ISA_PCT_ENCODED(p)						\
+     ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))
+
+/*
+ *    pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"
+ */
+#define ISA_PCHAR(p)							\
+     (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) ||	\
+      ((*(p) == ':')) || ((*(p) == '@')))
+
+/**
+ * rfc3986_parse_scheme:
+ * @uri:  pointer to an URI structure
+ * @str:  pointer to the string to analyze
+ *
+ * Parse an URI scheme
+ *
+ * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_scheme(URI *uri, const char **str) {
+    const char *cur;
+
+    if (str == NULL)
+	return(-1);
+
+    cur = *str;
+    if (!ISA_ALPHA(cur))
+	return(2);
+    cur++;
+    while (ISA_ALPHA(cur) || ISA_DIGIT(cur) ||
+           (*cur == '+') || (*cur == '-') || (*cur == '.')) cur++;
+    if (uri != NULL) {
+	if (uri->scheme != NULL) g_free(uri->scheme);
+	uri->scheme = g_strndup(*str, cur - *str);
+    }
+    *str = cur;
+    return(0);
+}
+
+/**
+ * rfc3986_parse_fragment:
+ * @uri:  pointer to an URI structure
+ * @str:  pointer to the string to analyze
+ *
+ * Parse the query part of an URI
+ *
+ * fragment      = *( pchar / "/" / "?" )
+ * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'
+ *       in the fragment identifier but this is used very broadly for
+ *       xpointer scheme selection, so we are allowing it here to not break
+ *       for example all the DocBook processing chains.
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_fragment(URI *uri, const char **str)
+{
+    const char *cur;
+
+    if (str == NULL)
+        return (-1);
+
+    cur = *str;
+
+    while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
+           (*cur == '[') || (*cur == ']') ||
+           ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
+        NEXT(cur);
+    if (uri != NULL) {
+        if (uri->fragment != NULL)
+            g_free(uri->fragment);
+	if (uri->cleanup & 2)
+	    uri->fragment = g_strndup(*str, cur - *str);
+	else
+	    uri->fragment = uri_string_unescape(*str, cur - *str, NULL);
+    }
+    *str = cur;
+    return (0);
+}
+
+/**
+ * rfc3986_parse_query:
+ * @uri:  pointer to an URI structure
+ * @str:  pointer to the string to analyze
+ *
+ * Parse the query part of an URI
+ *
+ * query = *uric
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_query(URI *uri, const char **str)
+{
+    const char *cur;
+
+    if (str == NULL)
+        return (-1);
+
+    cur = *str;
+
+    while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
+           ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
+        NEXT(cur);
+    if (uri != NULL) {
+	if (uri->query != NULL)
+	    g_free (uri->query);
+	uri->query = g_strndup (*str, cur - *str);
+    }
+    *str = cur;
+    return (0);
+}
+
+/**
+ * rfc3986_parse_port:
+ * @uri:  pointer to an URI structure
+ * @str:  the string to analyze
+ *
+ * Parse a port  part and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * port          = *DIGIT
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_port(URI *uri, const char **str)
+{
+    const char *cur = *str;
+
+    if (ISA_DIGIT(cur)) {
+	if (uri != NULL)
+	    uri->port = 0;
+	while (ISA_DIGIT(cur)) {
+	    if (uri != NULL)
+		uri->port = uri->port * 10 + (*cur - '0');
+	    cur++;
+	}
+	*str = cur;
+	return(0);
+    }
+    return(1);
+}
+
+/**
+ * rfc3986_parse_user_info:
+ * @uri:  pointer to an URI structure
+ * @str:  the string to analyze
+ *
+ * Parse an user informations part and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * userinfo      = *( unreserved / pct-encoded / sub-delims / ":" )
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_user_info(URI *uri, const char **str)
+{
+    const char *cur;
+
+    cur = *str;
+    while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) ||
+           ISA_SUB_DELIM(cur) || (*cur == ':'))
+	NEXT(cur);
+    if (*cur == '@') {
+	if (uri != NULL) {
+	    if (uri->user != NULL) g_free(uri->user);
+	    if (uri->cleanup & 2)
+		uri->user = g_strndup(*str, cur - *str);
+	    else
+		uri->user = uri_string_unescape(*str, cur - *str, NULL);
+	}
+	*str = cur;
+	return(0);
+    }
+    return(1);
+}
+
+/**
+ * rfc3986_parse_dec_octet:
+ * @str:  the string to analyze
+ *
+ *    dec-octet     = DIGIT                 ; 0-9
+ *                  / %x31-39 DIGIT         ; 10-99
+ *                  / "1" 2DIGIT            ; 100-199
+ *                  / "2" %x30-34 DIGIT     ; 200-249
+ *                  / "25" %x30-35          ; 250-255
+ *
+ * Skip a dec-octet.
+ *
+ * Returns 0 if found and skipped, 1 otherwise
+ */
+static int
+rfc3986_parse_dec_octet(const char **str) {
+    const char *cur = *str;
+
+    if (!(ISA_DIGIT(cur)))
+        return(1);
+    if (!ISA_DIGIT(cur+1))
+	cur++;
+    else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2)))
+	cur += 2;
+    else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2)))
+	cur += 3;
+    else if ((*cur == '2') && (*(cur + 1) >= '0') &&
+	     (*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2)))
+	cur += 3;
+    else if ((*cur == '2') && (*(cur + 1) == '5') &&
+	     (*(cur + 2) >= '0') && (*(cur + 1) <= '5'))
+	cur += 3;
+    else
+        return(1);
+    *str = cur;
+    return(0);
+}
+/**
+ * rfc3986_parse_host:
+ * @uri:  pointer to an URI structure
+ * @str:  the string to analyze
+ *
+ * Parse an host part and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * host          = IP-literal / IPv4address / reg-name
+ * IP-literal    = "[" ( IPv6address / IPvFuture  ) "]"
+ * IPv4address   = dec-octet "." dec-octet "." dec-octet "." dec-octet
+ * reg-name      = *( unreserved / pct-encoded / sub-delims )
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_host(URI *uri, const char **str)
+{
+    const char *cur = *str;
+    const char *host;
+
+    host = cur;
+    /*
+     * IPv6 and future adressing scheme are enclosed between brackets
+     */
+    if (*cur == '[') {
+        cur++;
+	while ((*cur != ']') && (*cur != 0))
+	    cur++;
+	if (*cur != ']')
+	    return(1);
+	cur++;
+	goto found;
+    }
+    /*
+     * try to parse an IPv4
+     */
+    if (ISA_DIGIT(cur)) {
+        if (rfc3986_parse_dec_octet(&cur) != 0)
+	    goto not_ipv4;
+	if (*cur != '.')
+	    goto not_ipv4;
+	cur++;
+        if (rfc3986_parse_dec_octet(&cur) != 0)
+	    goto not_ipv4;
+	if (*cur != '.')
+	    goto not_ipv4;
+        if (rfc3986_parse_dec_octet(&cur) != 0)
+	    goto not_ipv4;
+	if (*cur != '.')
+	    goto not_ipv4;
+        if (rfc3986_parse_dec_octet(&cur) != 0)
+	    goto not_ipv4;
+	goto found;
+not_ipv4:
+        cur = *str;
+    }
+    /*
+     * then this should be a hostname which can be empty
+     */
+    while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur))
+        NEXT(cur);
+found:
+    if (uri != NULL) {
+	if (uri->authority != NULL) g_free(uri->authority);
+	uri->authority = NULL;
+	if (uri->server != NULL) g_free(uri->server);
+	if (cur != host) {
+	    if (uri->cleanup & 2)
+		uri->server = g_strndup(host, cur - host);
+	    else
+		uri->server = uri_string_unescape(host, cur - host, NULL);
+	} else
+	    uri->server = NULL;
+    }
+    *str = cur;
+    return(0);
+}
+
+/**
+ * rfc3986_parse_authority:
+ * @uri:  pointer to an URI structure
+ * @str:  the string to analyze
+ *
+ * Parse an authority part and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * authority     = [ userinfo "@" ] host [ ":" port ]
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_authority(URI *uri, const char **str)
+{
+    const char *cur;
+    int ret;
+
+    cur = *str;
+    /*
+     * try to parse an userinfo and check for the trailing @
+     */
+    ret = rfc3986_parse_user_info(uri, &cur);
+    if ((ret != 0) || (*cur != '@'))
+        cur = *str;
+    else
+        cur++;
+    ret = rfc3986_parse_host(uri, &cur);
+    if (ret != 0) return(ret);
+    if (*cur == ':') {
+        cur++;
+        ret = rfc3986_parse_port(uri, &cur);
+	if (ret != 0) return(ret);
+    }
+    *str = cur;
+    return(0);
+}
+
+/**
+ * rfc3986_parse_segment:
+ * @str:  the string to analyze
+ * @forbid: an optional forbidden character
+ * @empty: allow an empty segment
+ *
+ * Parse a segment and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * segment       = *pchar
+ * segment-nz    = 1*pchar
+ * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
+ *               ; non-zero-length segment without any colon ":"
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_segment(const char **str, char forbid, int empty)
+{
+    const char *cur;
+
+    cur = *str;
+    if (!ISA_PCHAR(cur)) {
+        if (empty)
+	    return(0);
+	return(1);
+    }
+    while (ISA_PCHAR(cur) && (*cur != forbid))
+        NEXT(cur);
+    *str = cur;
+    return (0);
+}
+
+/**
+ * rfc3986_parse_path_ab_empty:
+ * @uri:  pointer to an URI structure
+ * @str:  the string to analyze
+ *
+ * Parse an path absolute or empty and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * path-abempty  = *( "/" segment )
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_path_ab_empty(URI *uri, const char **str)
+{
+    const char *cur;
+    int ret;
+
+    cur = *str;
+
+    while (*cur == '/') {
+        cur++;
+	ret = rfc3986_parse_segment(&cur, 0, 1);
+	if (ret != 0) return(ret);
+    }
+    if (uri != NULL) {
+	if (uri->path != NULL) g_free(uri->path);
+        if (*str != cur) {
+            if (uri->cleanup & 2)
+                uri->path = g_strndup(*str, cur - *str);
+            else
+                uri->path = uri_string_unescape(*str, cur - *str, NULL);
+        } else {
+            uri->path = NULL;
+        }
+    }
+    *str = cur;
+    return (0);
+}
+
+/**
+ * rfc3986_parse_path_absolute:
+ * @uri:  pointer to an URI structure
+ * @str:  the string to analyze
+ *
+ * Parse an path absolute and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * path-absolute = "/" [ segment-nz *( "/" segment ) ]
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_path_absolute(URI *uri, const char **str)
+{
+    const char *cur;
+    int ret;
+
+    cur = *str;
+
+    if (*cur != '/')
+        return(1);
+    cur++;
+    ret = rfc3986_parse_segment(&cur, 0, 0);
+    if (ret == 0) {
+	while (*cur == '/') {
+	    cur++;
+	    ret = rfc3986_parse_segment(&cur, 0, 1);
+	    if (ret != 0) return(ret);
+	}
+    }
+    if (uri != NULL) {
+	if (uri->path != NULL) g_free(uri->path);
+        if (cur != *str) {
+            if (uri->cleanup & 2)
+                uri->path = g_strndup(*str, cur - *str);
+            else
+                uri->path = uri_string_unescape(*str, cur - *str, NULL);
+        } else {
+            uri->path = NULL;
+        }
+    }
+    *str = cur;
+    return (0);
+}
+
+/**
+ * rfc3986_parse_path_rootless:
+ * @uri:  pointer to an URI structure
+ * @str:  the string to analyze
+ *
+ * Parse an path without root and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * path-rootless = segment-nz *( "/" segment )
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_path_rootless(URI *uri, const char **str)
+{
+    const char *cur;
+    int ret;
+
+    cur = *str;
+
+    ret = rfc3986_parse_segment(&cur, 0, 0);
+    if (ret != 0) return(ret);
+    while (*cur == '/') {
+        cur++;
+	ret = rfc3986_parse_segment(&cur, 0, 1);
+	if (ret != 0) return(ret);
+    }
+    if (uri != NULL) {
+	if (uri->path != NULL) g_free(uri->path);
+        if (cur != *str) {
+            if (uri->cleanup & 2)
+                uri->path = g_strndup(*str, cur - *str);
+            else
+                uri->path = uri_string_unescape(*str, cur - *str, NULL);
+        } else {
+            uri->path = NULL;
+        }
+    }
+    *str = cur;
+    return (0);
+}
+
+/**
+ * rfc3986_parse_path_no_scheme:
+ * @uri:  pointer to an URI structure
+ * @str:  the string to analyze
+ *
+ * Parse an path which is not a scheme and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * path-noscheme = segment-nz-nc *( "/" segment )
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_path_no_scheme(URI *uri, const char **str)
+{
+    const char *cur;
+    int ret;
+
+    cur = *str;
+
+    ret = rfc3986_parse_segment(&cur, ':', 0);
+    if (ret != 0) return(ret);
+    while (*cur == '/') {
+        cur++;
+	ret = rfc3986_parse_segment(&cur, 0, 1);
+	if (ret != 0) return(ret);
+    }
+    if (uri != NULL) {
+	if (uri->path != NULL) g_free(uri->path);
+        if (cur != *str) {
+            if (uri->cleanup & 2)
+                uri->path = g_strndup(*str, cur - *str);
+            else
+                uri->path = uri_string_unescape(*str, cur - *str, NULL);
+        } else {
+            uri->path = NULL;
+        }
+    }
+    *str = cur;
+    return (0);
+}
+
+/**
+ * rfc3986_parse_hier_part:
+ * @uri:  pointer to an URI structure
+ * @str:  the string to analyze
+ *
+ * Parse an hierarchical part and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * hier-part     = "//" authority path-abempty
+ *                / path-absolute
+ *                / path-rootless
+ *                / path-empty
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_hier_part(URI *uri, const char **str)
+{
+    const char *cur;
+    int ret;
+
+    cur = *str;
+
+    if ((*cur == '/') && (*(cur + 1) == '/')) {
+        cur += 2;
+	ret = rfc3986_parse_authority(uri, &cur);
+	if (ret != 0) return(ret);
+	ret = rfc3986_parse_path_ab_empty(uri, &cur);
+	if (ret != 0) return(ret);
+	*str = cur;
+	return(0);
+    } else if (*cur == '/') {
+        ret = rfc3986_parse_path_absolute(uri, &cur);
+	if (ret != 0) return(ret);
+    } else if (ISA_PCHAR(cur)) {
+        ret = rfc3986_parse_path_rootless(uri, &cur);
+	if (ret != 0) return(ret);
+    } else {
+	/* path-empty is effectively empty */
+	if (uri != NULL) {
+	    if (uri->path != NULL) g_free(uri->path);
+	    uri->path = NULL;
+	}
+    }
+    *str = cur;
+    return (0);
+}
+
+/**
+ * rfc3986_parse_relative_ref:
+ * @uri:  pointer to an URI structure
+ * @str:  the string to analyze
+ *
+ * Parse an URI string and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * relative-ref  = relative-part [ "?" query ] [ "#" fragment ]
+ * relative-part = "//" authority path-abempty
+ *               / path-absolute
+ *               / path-noscheme
+ *               / path-empty
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_relative_ref(URI *uri, const char *str) {
+    int ret;
+
+    if ((*str == '/') && (*(str + 1) == '/')) {
+        str += 2;
+	ret = rfc3986_parse_authority(uri, &str);
+	if (ret != 0) return(ret);
+	ret = rfc3986_parse_path_ab_empty(uri, &str);
+	if (ret != 0) return(ret);
+    } else if (*str == '/') {
+	ret = rfc3986_parse_path_absolute(uri, &str);
+	if (ret != 0) return(ret);
+    } else if (ISA_PCHAR(str)) {
+        ret = rfc3986_parse_path_no_scheme(uri, &str);
+	if (ret != 0) return(ret);
+    } else {
+	/* path-empty is effectively empty */
+	if (uri != NULL) {
+	    if (uri->path != NULL) g_free(uri->path);
+	    uri->path = NULL;
+	}
+    }
+
+    if (*str == '?') {
+	str++;
+	ret = rfc3986_parse_query(uri, &str);
+	if (ret != 0) return(ret);
+    }
+    if (*str == '#') {
+	str++;
+	ret = rfc3986_parse_fragment(uri, &str);
+	if (ret != 0) return(ret);
+    }
+    if (*str != 0) {
+	uri_clean(uri);
+	return(1);
+    }
+    return(0);
+}
+
+
+/**
+ * rfc3986_parse:
+ * @uri:  pointer to an URI structure
+ * @str:  the string to analyze
+ *
+ * Parse an URI string and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * scheme ":" hier-part [ "?" query ] [ "#" fragment ]
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse(URI *uri, const char *str) {
+    int ret;
+
+    ret = rfc3986_parse_scheme(uri, &str);
+    if (ret != 0) return(ret);
+    if (*str != ':') {
+	return(1);
+    }
+    str++;
+    ret = rfc3986_parse_hier_part(uri, &str);
+    if (ret != 0) return(ret);
+    if (*str == '?') {
+	str++;
+	ret = rfc3986_parse_query(uri, &str);
+	if (ret != 0) return(ret);
+    }
+    if (*str == '#') {
+	str++;
+	ret = rfc3986_parse_fragment(uri, &str);
+	if (ret != 0) return(ret);
+    }
+    if (*str != 0) {
+	uri_clean(uri);
+	return(1);
+    }
+    return(0);
+}
+
+/**
+ * rfc3986_parse_uri_reference:
+ * @uri:  pointer to an URI structure
+ * @str:  the string to analyze
+ *
+ * Parse an URI reference string and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * URI-reference = URI / relative-ref
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_uri_reference(URI *uri, const char *str) {
+    int ret;
+
+    if (str == NULL)
+	return(-1);
+    uri_clean(uri);
+
+    /*
+     * Try first to parse absolute refs, then fallback to relative if
+     * it fails.
+     */
+    ret = rfc3986_parse(uri, str);
+    if (ret != 0) {
+	uri_clean(uri);
+        ret = rfc3986_parse_relative_ref(uri, str);
+	if (ret != 0) {
+	    uri_clean(uri);
+	    return(ret);
+	}
+    }
+    return(0);
+}
+
+/**
+ * uri_parse:
+ * @str:  the URI string to analyze
+ *
+ * Parse an URI based on RFC 3986
+ *
+ * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
+ *
+ * Returns a newly built URI or NULL in case of error
+ */
+URI *
+uri_parse(const char *str) {
+    URI *uri;
+    int ret;
+
+    if (str == NULL)
+	return(NULL);
+    uri = uri_new();
+    if (uri != NULL) {
+	ret = rfc3986_parse_uri_reference(uri, str);
+        if (ret) {
+	    uri_free(uri);
+	    return(NULL);
+	}
+    }
+    return(uri);
+}
+
+/**
+ * uri_parse_into:
+ * @uri:  pointer to an URI structure
+ * @str:  the string to analyze
+ *
+ * Parse an URI reference string based on RFC 3986 and fills in the
+ * appropriate fields of the @uri structure
+ *
+ * URI-reference = URI / relative-ref
+ *
+ * Returns 0 or the error code
+ */
+int
+uri_parse_into(URI *uri, const char *str) {
+    return(rfc3986_parse_uri_reference(uri, str));
+}
+
+/**
+ * uri_parse_raw:
+ * @str:  the URI string to analyze
+ * @raw:  if 1 unescaping of URI pieces are disabled
+ *
+ * Parse an URI but allows to keep intact the original fragments.
+ *
+ * URI-reference = URI / relative-ref
+ *
+ * Returns a newly built URI or NULL in case of error
+ */
+URI *
+uri_parse_raw(const char *str, int raw) {
+    URI *uri;
+    int ret;
+
+    if (str == NULL)
+	return(NULL);
+    uri = uri_new();
+    if (uri != NULL) {
+        if (raw) {
+	    uri->cleanup |= 2;
+	}
+	ret = uri_parse_into(uri, str);
+        if (ret) {
+	    uri_free(uri);
+	    return(NULL);
+	}
+    }
+    return(uri);
+}
+
+/************************************************************************
+ *									*
+ *			Generic URI structure functions			*
+ *									*
+ ************************************************************************/
+
+/**
+ * uri_new:
+ *
+ * Simply creates an empty URI
+ *
+ * Returns the new structure or NULL in case of error
+ */
+URI *
+uri_new(void) {
+    URI *ret;
+
+    ret = (URI *) g_malloc(sizeof(URI));
+    memset(ret, 0, sizeof(URI));
+    return(ret);
+}
+
+/**
+ * realloc2n:
+ *
+ * Function to handle properly a reallocation when saving an URI
+ * Also imposes some limit on the length of an URI string output
+ */
+static char *
+realloc2n(char *ret, int *max) {
+    char *temp;
+    int tmp;
+
+    tmp = *max * 2;
+    temp = g_realloc(ret, (tmp + 1));
+    *max = tmp;
+    return(temp);
+}
+
+/**
+ * uri_to_string:
+ * @uri:  pointer to an URI
+ *
+ * Save the URI as an escaped string
+ *
+ * Returns a new string (to be deallocated by caller)
+ */
+char *
+uri_to_string(URI *uri) {
+    char *ret = NULL;
+    char *temp;
+    const char *p;
+    int len;
+    int max;
+
+    if (uri == NULL) return(NULL);
+
+
+    max = 80;
+    ret = g_malloc(max + 1);
+    len = 0;
+
+    if (uri->scheme != NULL) {
+	p = uri->scheme;
+	while (*p != 0) {
+	    if (len >= max) {
+                temp = realloc2n(ret, &max);
+                if (temp == NULL) goto mem_error;
+		ret = temp;
+	    }
+	    ret[len++] = *p++;
+	}
+	if (len >= max) {
+            temp = realloc2n(ret, &max);
+            if (temp == NULL) goto mem_error;
+            ret = temp;
+	}
+	ret[len++] = ':';
+    }
+    if (uri->opaque != NULL) {
+	p = uri->opaque;
+	while (*p != 0) {
+	    if (len + 3 >= max) {
+                temp = realloc2n(ret, &max);
+                if (temp == NULL) goto mem_error;
+                ret = temp;
+	    }
+	    if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
+		ret[len++] = *p++;
+	    else {
+		int val = *(unsigned char *)p++;
+		int hi = val / 0x10, lo = val % 0x10;
+		ret[len++] = '%';
+		ret[len++] = hi + (hi > 9? 'A'-10 : '0');
+		ret[len++] = lo + (lo > 9? 'A'-10 : '0');
+	    }
+	}
+    } else {
+	if (uri->server != NULL) {
+	    if (len + 3 >= max) {
+                temp = realloc2n(ret, &max);
+                if (temp == NULL) goto mem_error;
+                ret = temp;
+	    }
+	    ret[len++] = '/';
+	    ret[len++] = '/';
+	    if (uri->user != NULL) {
+		p = uri->user;
+		while (*p != 0) {
+		    if (len + 3 >= max) {
+                        temp = realloc2n(ret, &max);
+                        if (temp == NULL) goto mem_error;
+                        ret = temp;
+		    }
+		    if ((IS_UNRESERVED(*(p))) ||
+			((*(p) == ';')) || ((*(p) == ':')) ||
+			((*(p) == '&')) || ((*(p) == '=')) ||
+			((*(p) == '+')) || ((*(p) == '$')) ||
+			((*(p) == ',')))
+			ret[len++] = *p++;
+		    else {
+			int val = *(unsigned char *)p++;
+			int hi = val / 0x10, lo = val % 0x10;
+			ret[len++] = '%';
+			ret[len++] = hi + (hi > 9? 'A'-10 : '0');
+			ret[len++] = lo + (lo > 9? 'A'-10 : '0');
+		    }
+		}
+		if (len + 3 >= max) {
+                    temp = realloc2n(ret, &max);
+                    if (temp == NULL) goto mem_error;
+                    ret = temp;
+		}
+		ret[len++] = '@';
+	    }
+	    p = uri->server;
+	    while (*p != 0) {
+		if (len >= max) {
+                    temp = realloc2n(ret, &max);
+                    if (temp == NULL) goto mem_error;
+                    ret = temp;
+		}
+		ret[len++] = *p++;
+	    }
+	    if (uri->port > 0) {
+		if (len + 10 >= max) {
+                    temp = realloc2n(ret, &max);
+                    if (temp == NULL) goto mem_error;
+                    ret = temp;
+		}
+		len += snprintf(&ret[len], max - len, ":%d", uri->port);
+	    }
+	} else if (uri->authority != NULL) {
+	    if (len + 3 >= max) {
+                temp = realloc2n(ret, &max);
+                if (temp == NULL) goto mem_error;
+                ret = temp;
+	    }
+	    ret[len++] = '/';
+	    ret[len++] = '/';
+	    p = uri->authority;
+	    while (*p != 0) {
+		if (len + 3 >= max) {
+                    temp = realloc2n(ret, &max);
+                    if (temp == NULL) goto mem_error;
+                    ret = temp;
+		}
+		if ((IS_UNRESERVED(*(p))) ||
+                    ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
+                    ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
+                    ((*(p) == '=')) || ((*(p) == '+')))
+		    ret[len++] = *p++;
+		else {
+		    int val = *(unsigned char *)p++;
+		    int hi = val / 0x10, lo = val % 0x10;
+		    ret[len++] = '%';
+		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
+		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
+		}
+	    }
+	} else if (uri->scheme != NULL) {
+	    if (len + 3 >= max) {
+                temp = realloc2n(ret, &max);
+                if (temp == NULL) goto mem_error;
+                ret = temp;
+	    }
+	    ret[len++] = '/';
+	    ret[len++] = '/';
+	}
+	if (uri->path != NULL) {
+	    p = uri->path;
+	    /*
+	     * the colon in file:///d: should not be escaped or
+	     * Windows accesses fail later.
+	     */
+	    if ((uri->scheme != NULL) &&
+		(p[0] == '/') &&
+		(((p[1] >= 'a') && (p[1] <= 'z')) ||
+		 ((p[1] >= 'A') && (p[1] <= 'Z'))) &&
+		(p[2] == ':') &&
+	        (!strcmp(uri->scheme, "file"))) {
+		if (len + 3 >= max) {
+                    temp = realloc2n(ret, &max);
+                    if (temp == NULL) goto mem_error;
+                    ret = temp;
+		}
+		ret[len++] = *p++;
+		ret[len++] = *p++;
+		ret[len++] = *p++;
+	    }
+	    while (*p != 0) {
+		if (len + 3 >= max) {
+                    temp = realloc2n(ret, &max);
+                    if (temp == NULL) goto mem_error;
+                    ret = temp;
+		}
+		if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
+                    ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
+	            ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
+	            ((*(p) == ',')))
+		    ret[len++] = *p++;
+		else {
+		    int val = *(unsigned char *)p++;
+		    int hi = val / 0x10, lo = val % 0x10;
+		    ret[len++] = '%';
+		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
+		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
+		}
+	    }
+	}
+	if (uri->query != NULL) {
+	    if (len + 1 >= max) {
+                temp = realloc2n(ret, &max);
+                if (temp == NULL) goto mem_error;
+                ret = temp;
+	    }
+	    ret[len++] = '?';
+	    p = uri->query;
+	    while (*p != 0) {
+		if (len + 1 >= max) {
+                    temp = realloc2n(ret, &max);
+                    if (temp == NULL) goto mem_error;
+                    ret = temp;
+		}
+		ret[len++] = *p++;
+	    }
+	}
+    }
+    if (uri->fragment != NULL) {
+	if (len + 3 >= max) {
+            temp = realloc2n(ret, &max);
+            if (temp == NULL) goto mem_error;
+            ret = temp;
+	}
+	ret[len++] = '#';
+	p = uri->fragment;
+	while (*p != 0) {
+	    if (len + 3 >= max) {
+                temp = realloc2n(ret, &max);
+                if (temp == NULL) goto mem_error;
+                ret = temp;
+	    }
+	    if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
+		ret[len++] = *p++;
+	    else {
+		int val = *(unsigned char *)p++;
+		int hi = val / 0x10, lo = val % 0x10;
+		ret[len++] = '%';
+		ret[len++] = hi + (hi > 9? 'A'-10 : '0');
+		ret[len++] = lo + (lo > 9? 'A'-10 : '0');
+	    }
+	}
+    }
+    if (len >= max) {
+        temp = realloc2n(ret, &max);
+        if (temp == NULL) goto mem_error;
+        ret = temp;
+    }
+    ret[len] = 0;
+    return(ret);
+
+mem_error:
+    g_free(ret);
+    return(NULL);
+}
+
+/**
+ * uri_clean:
+ * @uri:  pointer to an URI
+ *
+ * Make sure the URI struct is free of content
+ */
+static void
+uri_clean(URI *uri) {
+    if (uri == NULL) return;
+
+    if (uri->scheme != NULL) g_free(uri->scheme);
+    uri->scheme = NULL;
+    if (uri->server != NULL) g_free(uri->server);
+    uri->server = NULL;
+    if (uri->user != NULL) g_free(uri->user);
+    uri->user = NULL;
+    if (uri->path != NULL) g_free(uri->path);
+    uri->path = NULL;
+    if (uri->fragment != NULL) g_free(uri->fragment);
+    uri->fragment = NULL;
+    if (uri->opaque != NULL) g_free(uri->opaque);
+    uri->opaque = NULL;
+    if (uri->authority != NULL) g_free(uri->authority);
+    uri->authority = NULL;
+    if (uri->query != NULL) g_free(uri->query);
+    uri->query = NULL;
+}
+
+/**
+ * uri_free:
+ * @uri:  pointer to an URI
+ *
+ * Free up the URI struct
+ */
+void
+uri_free(URI *uri) {
+    uri_clean(uri);
+    g_free(uri);
+}
+
+/************************************************************************
+ *									*
+ *			Helper functions				*
+ *									*
+ ************************************************************************/
+
+/**
+ * normalize_uri_path:
+ * @path:  pointer to the path string
+ *
+ * Applies the 5 normalization steps to a path string--that is, RFC 2396
+ * Section 5.2, steps 6.c through 6.g.
+ *
+ * Normalization occurs directly on the string, no new allocation is done
+ *
+ * Returns 0 or an error code
+ */
+static int
+normalize_uri_path(char *path) {
+    char *cur, *out;
+
+    if (path == NULL)
+	return(-1);
+
+    /* Skip all initial "/" chars.  We want to get to the beginning of the
+     * first non-empty segment.
+     */
+    cur = path;
+    while (cur[0] == '/')
+      ++cur;
+    if (cur[0] == '\0')
+      return(0);
+
+    /* Keep everything we've seen so far.  */
+    out = cur;
+
+    /*
+     * Analyze each segment in sequence for cases (c) and (d).
+     */
+    while (cur[0] != '\0') {
+	/*
+	 * c) All occurrences of "./", where "." is a complete path segment,
+	 *    are removed from the buffer string.
+	 */
+	if ((cur[0] == '.') && (cur[1] == '/')) {
+	    cur += 2;
+	    /* '//' normalization should be done at this point too */
+	    while (cur[0] == '/')
+		cur++;
+	    continue;
+	}
+
+	/*
+	 * d) If the buffer string ends with "." as a complete path segment,
+	 *    that "." is removed.
+	 */
+	if ((cur[0] == '.') && (cur[1] == '\0'))
+	    break;
+
+	/* Otherwise keep the segment.  */
+	while (cur[0] != '/') {
+            if (cur[0] == '\0')
+              goto done_cd;
+	    (out++)[0] = (cur++)[0];
+	}
+	/* nomalize // */
+	while ((cur[0] == '/') && (cur[1] == '/'))
+	    cur++;
+
+        (out++)[0] = (cur++)[0];
+    }
+ done_cd:
+    out[0] = '\0';
+
+    /* Reset to the beginning of the first segment for the next sequence.  */
+    cur = path;
+    while (cur[0] == '/')
+      ++cur;
+    if (cur[0] == '\0')
+	return(0);
+
+    /*
+     * Analyze each segment in sequence for cases (e) and (f).
+     *
+     * e) All occurrences of "<segment>/../", where <segment> is a
+     *    complete path segment not equal to "..", are removed from the
+     *    buffer string.  Removal of these path segments is performed
+     *    iteratively, removing the leftmost matching pattern on each
+     *    iteration, until no matching pattern remains.
+     *
+     * f) If the buffer string ends with "<segment>/..", where <segment>
+     *    is a complete path segment not equal to "..", that
+     *    "<segment>/.." is removed.
+     *
+     * To satisfy the "iterative" clause in (e), we need to collapse the
+     * string every time we find something that needs to be removed.  Thus,
+     * we don't need to keep two pointers into the string: we only need a
+     * "current position" pointer.
+     */
+    while (1) {
+        char *segp, *tmp;
+
+        /* At the beginning of each iteration of this loop, "cur" points to
+         * the first character of the segment we want to examine.
+         */
+
+        /* Find the end of the current segment.  */
+        segp = cur;
+        while ((segp[0] != '/') && (segp[0] != '\0'))
+          ++segp;
+
+        /* If this is the last segment, we're done (we need at least two
+         * segments to meet the criteria for the (e) and (f) cases).
+         */
+        if (segp[0] == '\0')
+          break;
+
+        /* If the first segment is "..", or if the next segment _isn't_ "..",
+         * keep this segment and try the next one.
+         */
+        ++segp;
+        if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
+            || ((segp[0] != '.') || (segp[1] != '.')
+                || ((segp[2] != '/') && (segp[2] != '\0')))) {
+          cur = segp;
+          continue;
+        }
+
+        /* If we get here, remove this segment and the next one and back up
+         * to the previous segment (if there is one), to implement the
+         * "iteratively" clause.  It's pretty much impossible to back up
+         * while maintaining two pointers into the buffer, so just compact
+         * the whole buffer now.
+         */
+
+        /* If this is the end of the buffer, we're done.  */
+        if (segp[2] == '\0') {
+          cur[0] = '\0';
+          break;
+        }
+        /* Valgrind complained, strcpy(cur, segp + 3); */
+        /* string will overlap, do not use strcpy */
+        tmp = cur;
+        segp += 3;
+        while ((*tmp++ = *segp++) != 0)
+          ;
+
+        /* If there are no previous segments, then keep going from here.  */
+        segp = cur;
+        while ((segp > path) && ((--segp)[0] == '/'))
+          ;
+        if (segp == path)
+          continue;
+
+        /* "segp" is pointing to the end of a previous segment; find it's
+         * start.  We need to back up to the previous segment and start
+         * over with that to handle things like "foo/bar/../..".  If we
+         * don't do this, then on the first pass we'll remove the "bar/..",
+         * but be pointing at the second ".." so we won't realize we can also
+         * remove the "foo/..".
+         */
+        cur = segp;
+        while ((cur > path) && (cur[-1] != '/'))
+          --cur;
+    }
+    out[0] = '\0';
+
+    /*
+     * g) If the resulting buffer string still begins with one or more
+     *    complete path segments of "..", then the reference is
+     *    considered to be in error. Implementations may handle this
+     *    error by retaining these components in the resolved path (i.e.,
+     *    treating them as part of the final URI), by removing them from
+     *    the resolved path (i.e., discarding relative levels above the
+     *    root), or by avoiding traversal of the reference.
+     *
+     * We discard them from the final path.
+     */
+    if (path[0] == '/') {
+      cur = path;
+      while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
+             && ((cur[3] == '/') || (cur[3] == '\0')))
+	cur += 3;
+
+      if (cur != path) {
+	out = path;
+	while (cur[0] != '\0')
+          (out++)[0] = (cur++)[0];
+	out[0] = 0;
+      }
+    }
+
+    return(0);
+}
+
+static int is_hex(char c) {
+    if (((c >= '0') && (c <= '9')) ||
+        ((c >= 'a') && (c <= 'f')) ||
+        ((c >= 'A') && (c <= 'F')))
+	return(1);
+    return(0);
+}
+
+
+/**
+ * uri_string_unescape:
+ * @str:  the string to unescape
+ * @len:   the length in bytes to unescape (or <= 0 to indicate full string)
+ * @target:  optional destination buffer
+ *
+ * Unescaping routine, but does not check that the string is an URI. The
+ * output is a direct unsigned char translation of %XX values (no encoding)
+ * Note that the length of the result can only be smaller or same size as
+ * the input string.
+ *
+ * Returns a copy of the string, but unescaped, will return NULL only in case
+ * of error
+ */
+char *
+uri_string_unescape(const char *str, int len, char *target) {
+    char *ret, *out;
+    const char *in;
+
+    if (str == NULL)
+	return(NULL);
+    if (len <= 0) len = strlen(str);
+    if (len < 0) return(NULL);
+
+    if (target == NULL) {
+	ret = g_malloc(len + 1);
+    } else
+	ret = target;
+    in = str;
+    out = ret;
+    while(len > 0) {
+	if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
+	    in++;
+	    if ((*in >= '0') && (*in <= '9'))
+	        *out = (*in - '0');
+	    else if ((*in >= 'a') && (*in <= 'f'))
+	        *out = (*in - 'a') + 10;
+	    else if ((*in >= 'A') && (*in <= 'F'))
+	        *out = (*in - 'A') + 10;
+	    in++;
+	    if ((*in >= '0') && (*in <= '9'))
+	        *out = *out * 16 + (*in - '0');
+	    else if ((*in >= 'a') && (*in <= 'f'))
+	        *out = *out * 16 + (*in - 'a') + 10;
+	    else if ((*in >= 'A') && (*in <= 'F'))
+	        *out = *out * 16 + (*in - 'A') + 10;
+	    in++;
+	    len -= 3;
+	    out++;
+	} else {
+	    *out++ = *in++;
+	    len--;
+	}
+    }
+    *out = 0;
+    return(ret);
+}
+
+/**
+ * uri_string_escape:
+ * @str:  string to escape
+ * @list: exception list string of chars not to escape
+ *
+ * This routine escapes a string to hex, ignoring reserved characters (a-z)
+ * and the characters in the exception list.
+ *
+ * Returns a new escaped string or NULL in case of error.
+ */
+char *
+uri_string_escape(const char *str, const char *list) {
+    char *ret, ch;
+    char *temp;
+    const char *in;
+    int len, out;
+
+    if (str == NULL)
+	return(NULL);
+    if (str[0] == 0)
+	return(g_strdup(str));
+    len = strlen(str);
+    if (!(len > 0)) return(NULL);
+
+    len += 20;
+    ret = g_malloc(len);
+    in = str;
+    out = 0;
+    while(*in != 0) {
+	if (len - out <= 3) {
+            temp = realloc2n(ret, &len);
+	    ret = temp;
+	}
+
+	ch = *in;
+
+	if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!strchr(list, ch))) {
+	    unsigned char val;
+	    ret[out++] = '%';
+	    val = ch >> 4;
+	    if (val <= 9)
+		ret[out++] = '0' + val;
+	    else
+		ret[out++] = 'A' + val - 0xA;
+	    val = ch & 0xF;
+	    if (val <= 9)
+		ret[out++] = '0' + val;
+	    else
+		ret[out++] = 'A' + val - 0xA;
+	    in++;
+	} else {
+	    ret[out++] = *in++;
+	}
+
+    }
+    ret[out] = 0;
+    return(ret);
+}
+
+/************************************************************************
+ *									*
+ *			Public functions				*
+ *									*
+ ************************************************************************/
+
+/**
+ * uri_resolve:
+ * @URI:  the URI instance found in the document
+ * @base:  the base value
+ *
+ * Computes he final URI of the reference done by checking that
+ * the given URI is valid, and building the final URI using the
+ * base URI. This is processed according to section 5.2 of the
+ * RFC 2396
+ *
+ * 5.2. Resolving Relative References to Absolute Form
+ *
+ * Returns a new URI string (to be freed by the caller) or NULL in case
+ *         of error.
+ */
+char *
+uri_resolve(const char *uri, const char *base) {
+    char *val = NULL;
+    int ret, len, indx, cur, out;
+    URI *ref = NULL;
+    URI *bas = NULL;
+    URI *res = NULL;
+
+    /*
+     * 1) The URI reference is parsed into the potential four components and
+     *    fragment identifier, as described in Section 4.3.
+     *
+     *    NOTE that a completely empty URI is treated by modern browsers
+     *    as a reference to "." rather than as a synonym for the current
+     *    URI.  Should we do that here?
+     */
+    if (uri == NULL)
+	ret = -1;
+    else {
+	if (*uri) {
+	    ref = uri_new();
+	    if (ref == NULL)
+		goto done;
+	    ret = uri_parse_into(ref, uri);
+	}
+	else
+	    ret = 0;
+    }
+    if (ret != 0)
+	goto done;
+    if ((ref != NULL) && (ref->scheme != NULL)) {
+	/*
+	 * The URI is absolute don't modify.
+	 */
+	val = g_strdup(uri);
+	goto done;
+    }
+    if (base == NULL)
+	ret = -1;
+    else {
+	bas = uri_new();
+	if (bas == NULL)
+	    goto done;
+	ret = uri_parse_into(bas, base);
+    }
+    if (ret != 0) {
+	if (ref)
+	    val = uri_to_string(ref);
+	goto done;
+    }
+    if (ref == NULL) {
+	/*
+	 * the base fragment must be ignored
+	 */
+	if (bas->fragment != NULL) {
+	    g_free(bas->fragment);
+	    bas->fragment = NULL;
+	}
+	val = uri_to_string(bas);
+	goto done;
+    }
+
+    /*
+     * 2) If the path component is empty and the scheme, authority, and
+     *    query components are undefined, then it is a reference to the
+     *    current document and we are done.  Otherwise, the reference URI's
+     *    query and fragment components are defined as found (or not found)
+     *    within the URI reference and not inherited from the base URI.
+     *
+     *    NOTE that in modern browsers, the parsing differs from the above
+     *    in the following aspect:  the query component is allowed to be
+     *    defined while still treating this as a reference to the current
+     *    document.
+     */
+    res = uri_new();
+    if (res == NULL)
+	goto done;
+    if ((ref->scheme == NULL) && (ref->path == NULL) &&
+	((ref->authority == NULL) && (ref->server == NULL))) {
+	if (bas->scheme != NULL)
+	    res->scheme = g_strdup(bas->scheme);
+	if (bas->authority != NULL)
+	    res->authority = g_strdup(bas->authority);
+	else if (bas->server != NULL) {
+	    res->server = g_strdup(bas->server);
+	    if (bas->user != NULL)
+		res->user = g_strdup(bas->user);
+	    res->port = bas->port;
+	}
+	if (bas->path != NULL)
+	    res->path = g_strdup(bas->path);
+	if (ref->query != NULL)
+	    res->query = g_strdup (ref->query);
+	else if (bas->query != NULL)
+	    res->query = g_strdup(bas->query);
+	if (ref->fragment != NULL)
+	    res->fragment = g_strdup(ref->fragment);
+	goto step_7;
+    }
+
+    /*
+     * 3) If the scheme component is defined, indicating that the reference
+     *    starts with a scheme name, then the reference is interpreted as an
+     *    absolute URI and we are done.  Otherwise, the reference URI's
+     *    scheme is inherited from the base URI's scheme component.
+     */
+    if (ref->scheme != NULL) {
+	val = uri_to_string(ref);
+	goto done;
+    }
+    if (bas->scheme != NULL)
+	res->scheme = g_strdup(bas->scheme);
+
+    if (ref->query != NULL)
+	res->query = g_strdup(ref->query);
+    if (ref->fragment != NULL)
+	res->fragment = g_strdup(ref->fragment);
+
+    /*
+     * 4) If the authority component is defined, then the reference is a
+     *    network-path and we skip to step 7.  Otherwise, the reference
+     *    URI's authority is inherited from the base URI's authority
+     *    component, which will also be undefined if the URI scheme does not
+     *    use an authority component.
+     */
+    if ((ref->authority != NULL) || (ref->server != NULL)) {
+	if (ref->authority != NULL)
+	    res->authority = g_strdup(ref->authority);
+	else {
+	    res->server = g_strdup(ref->server);
+	    if (ref->user != NULL)
+		res->user = g_strdup(ref->user);
+            res->port = ref->port;
+	}
+	if (ref->path != NULL)
+	    res->path = g_strdup(ref->path);
+	goto step_7;
+    }
+    if (bas->authority != NULL)
+	res->authority = g_strdup(bas->authority);
+    else if (bas->server != NULL) {
+	res->server = g_strdup(bas->server);
+	if (bas->user != NULL)
+	    res->user = g_strdup(bas->user);
+	res->port = bas->port;
+    }
+
+    /*
+     * 5) If the path component begins with a slash character ("/"), then
+     *    the reference is an absolute-path and we skip to step 7.
+     */
+    if ((ref->path != NULL) && (ref->path[0] == '/')) {
+	res->path = g_strdup(ref->path);
+	goto step_7;
+    }
+
+
+    /*
+     * 6) If this step is reached, then we are resolving a relative-path
+     *    reference.  The relative path needs to be merged with the base
+     *    URI's path.  Although there are many ways to do this, we will
+     *    describe a simple method using a separate string buffer.
+     *
+     * Allocate a buffer large enough for the result string.
+     */
+    len = 2; /* extra / and 0 */
+    if (ref->path != NULL)
+	len += strlen(ref->path);
+    if (bas->path != NULL)
+	len += strlen(bas->path);
+    res->path = g_malloc(len);
+    res->path[0] = 0;
+
+    /*
+     * a) All but the last segment of the base URI's path component is
+     *    copied to the buffer.  In other words, any characters after the
+     *    last (right-most) slash character, if any, are excluded.
+     */
+    cur = 0;
+    out = 0;
+    if (bas->path != NULL) {
+	while (bas->path[cur] != 0) {
+	    while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
+		cur++;
+	    if (bas->path[cur] == 0)
+		break;
+
+	    cur++;
+	    while (out < cur) {
+		res->path[out] = bas->path[out];
+		out++;
+	    }
+	}
+    }
+    res->path[out] = 0;
+
+    /*
+     * b) The reference's path component is appended to the buffer
+     *    string.
+     */
+    if (ref->path != NULL && ref->path[0] != 0) {
+	indx = 0;
+	/*
+	 * Ensure the path includes a '/'
+	 */
+	if ((out == 0) && (bas->server != NULL))
+	    res->path[out++] = '/';
+	while (ref->path[indx] != 0) {
+	    res->path[out++] = ref->path[indx++];
+	}
+    }
+    res->path[out] = 0;
+
+    /*
+     * Steps c) to h) are really path normalization steps
+     */
+    normalize_uri_path(res->path);
+
+step_7:
+
+    /*
+     * 7) The resulting URI components, including any inherited from the
+     *    base URI, are recombined to give the absolute form of the URI
+     *    reference.
+     */
+    val = uri_to_string(res);
+
+done:
+    if (ref != NULL)
+	uri_free(ref);
+    if (bas != NULL)
+	uri_free(bas);
+    if (res != NULL)
+	uri_free(res);
+    return(val);
+}
+
+/**
+ * uri_resolve_relative:
+ * @URI:  the URI reference under consideration
+ * @base:  the base value
+ *
+ * Expresses the URI of the reference in terms relative to the
+ * base.  Some examples of this operation include:
+ *     base = "http://site1.com/docs/book1.html"
+ *        URI input                        URI returned
+ *     docs/pic1.gif                    pic1.gif
+ *     docs/img/pic1.gif                img/pic1.gif
+ *     img/pic1.gif                     ../img/pic1.gif
+ *     http://site1.com/docs/pic1.gif   pic1.gif
+ *     http://site2.com/docs/pic1.gif   http://site2.com/docs/pic1.gif
+ *
+ *     base = "docs/book1.html"
+ *        URI input                        URI returned
+ *     docs/pic1.gif                    pic1.gif
+ *     docs/img/pic1.gif                img/pic1.gif
+ *     img/pic1.gif                     ../img/pic1.gif
+ *     http://site1.com/docs/pic1.gif   http://site1.com/docs/pic1.gif
+ *
+ *
+ * Note: if the URI reference is really wierd or complicated, it may be
+ *       worthwhile to first convert it into a "nice" one by calling
+ *       uri_resolve (using 'base') before calling this routine,
+ *       since this routine (for reasonable efficiency) assumes URI has
+ *       already been through some validation.
+ *
+ * Returns a new URI string (to be freed by the caller) or NULL in case
+ * error.
+ */
+char *
+uri_resolve_relative (const char *uri, const char * base)
+{
+    char *val = NULL;
+    int ret;
+    int ix;
+    int pos = 0;
+    int nbslash = 0;
+    int len;
+    URI *ref = NULL;
+    URI *bas = NULL;
+    char *bptr, *uptr, *vptr;
+    int remove_path = 0;
+
+    if ((uri == NULL) || (*uri == 0))
+	return NULL;
+
+    /*
+     * First parse URI into a standard form
+     */
+    ref = uri_new ();
+    if (ref == NULL)
+	return NULL;
+    /* If URI not already in "relative" form */
+    if (uri[0] != '.') {
+	ret = uri_parse_into (ref, uri);
+	if (ret != 0)
+	    goto done;		/* Error in URI, return NULL */
+    } else
+	ref->path = g_strdup(uri);
+
+    /*
+     * Next parse base into the same standard form
+     */
+    if ((base == NULL) || (*base == 0)) {
+	val = g_strdup (uri);
+	goto done;
+    }
+    bas = uri_new ();
+    if (bas == NULL)
+	goto done;
+    if (base[0] != '.') {
+	ret = uri_parse_into (bas, base);
+	if (ret != 0)
+	    goto done;		/* Error in base, return NULL */
+    } else
+	bas->path = g_strdup(base);
+
+    /*
+     * If the scheme / server on the URI differs from the base,
+     * just return the URI
+     */
+    if ((ref->scheme != NULL) &&
+	((bas->scheme == NULL) ||
+	 (strcmp (bas->scheme, ref->scheme)) ||
+	 (strcmp (bas->server, ref->server)))) {
+	val = g_strdup (uri);
+	goto done;
+    }
+    if (!strcmp(bas->path, ref->path)) {
+	val = g_strdup("");
+	goto done;
+    }
+    if (bas->path == NULL) {
+	val = g_strdup(ref->path);
+	goto done;
+    }
+    if (ref->path == NULL) {
+        ref->path = (char *) "/";
+	remove_path = 1;
+    }
+
+    /*
+     * At this point (at last!) we can compare the two paths
+     *
+     * First we take care of the special case where either of the
+     * two path components may be missing (bug 316224)
+     */
+    if (bas->path == NULL) {
+	if (ref->path != NULL) {
+	    uptr = ref->path;
+	    if (*uptr == '/')
+		uptr++;
+	    /* exception characters from uri_to_string */
+	    val = uri_string_escape(uptr, "/;&=+$,");
+	}
+	goto done;
+    }
+    bptr = bas->path;
+    if (ref->path == NULL) {
+	for (ix = 0; bptr[ix] != 0; ix++) {
+	    if (bptr[ix] == '/')
+		nbslash++;
+	}
+	uptr = NULL;
+	len = 1;	/* this is for a string terminator only */
+    } else {
+    /*
+     * Next we compare the two strings and find where they first differ
+     */
+	if ((ref->path[pos] == '.') && (ref->path[pos+1] == '/'))
+            pos += 2;
+	if ((*bptr == '.') && (bptr[1] == '/'))
+            bptr += 2;
+	else if ((*bptr == '/') && (ref->path[pos] != '/'))
+	    bptr++;
+	while ((bptr[pos] == ref->path[pos]) && (bptr[pos] != 0))
+	    pos++;
+
+	if (bptr[pos] == ref->path[pos]) {
+	    val = g_strdup("");
+	    goto done;		/* (I can't imagine why anyone would do this) */
+	}
+
+	/*
+	 * In URI, "back up" to the last '/' encountered.  This will be the
+	 * beginning of the "unique" suffix of URI
+	 */
+	ix = pos;
+	if ((ref->path[ix] == '/') && (ix > 0))
+	    ix--;
+	else if ((ref->path[ix] == 0) && (ix > 1) && (ref->path[ix - 1] == '/'))
+	    ix -= 2;
+	for (; ix > 0; ix--) {
+	    if (ref->path[ix] == '/')
+		break;
+	}
+	if (ix == 0) {
+	    uptr = ref->path;
+	} else {
+	    ix++;
+	    uptr = &ref->path[ix];
+	}
+
+	/*
+	 * In base, count the number of '/' from the differing point
+	 */
+	if (bptr[pos] != ref->path[pos]) {/* check for trivial URI == base */
+	    for (; bptr[ix] != 0; ix++) {
+		if (bptr[ix] == '/')
+		    nbslash++;
+	    }
+	}
+	len = strlen (uptr) + 1;
+    }
+
+    if (nbslash == 0) {
+	if (uptr != NULL)
+	    /* exception characters from uri_to_string */
+	    val = uri_string_escape(uptr, "/;&=+$,");
+	goto done;
+    }
+
+    /*
+     * Allocate just enough space for the returned string -
+     * length of the remainder of the URI, plus enough space
+     * for the "../" groups, plus one for the terminator
+     */
+    val = g_malloc (len + 3 * nbslash);
+    vptr = val;
+    /*
+     * Put in as many "../" as needed
+     */
+    for (; nbslash>0; nbslash--) {
+	*vptr++ = '.';
+	*vptr++ = '.';
+	*vptr++ = '/';
+    }
+    /*
+     * Finish up with the end of the URI
+     */
+    if (uptr != NULL) {
+        if ((vptr > val) && (len > 0) &&
+	    (uptr[0] == '/') && (vptr[-1] == '/')) {
+	    memcpy (vptr, uptr + 1, len - 1);
+	    vptr[len - 2] = 0;
+	} else {
+	    memcpy (vptr, uptr, len);
+	    vptr[len - 1] = 0;
+	}
+    } else {
+	vptr[len - 1] = 0;
+    }
+
+    /* escape the freshly-built path */
+    vptr = val;
+	/* exception characters from uri_to_string */
+    val = uri_string_escape(vptr, "/;&=+$,");
+    g_free(vptr);
+
+done:
+    /*
+     * Free the working variables
+     */
+    if (remove_path != 0)
+        ref->path = NULL;
+    if (ref != NULL)
+	uri_free (ref);
+    if (bas != NULL)
+	uri_free (bas);
+
+    return val;
+}
+
+/*
+ * Utility functions to help parse and assemble query strings.
+ */
+
+struct QueryParams *
+query_params_new (int init_alloc)
+{
+    struct QueryParams *ps;
+
+    if (init_alloc <= 0) init_alloc = 1;
+
+    ps = g_new(QueryParams, 1);
+    ps->n = 0;
+    ps->alloc = init_alloc;
+    ps->p = g_new(QueryParam, ps->alloc);
+
+    return ps;
+}
+
+/* Ensure there is space to store at least one more parameter
+ * at the end of the set.
+ */
+static int
+query_params_append (struct QueryParams *ps,
+               const char *name, const char *value)
+{
+    if (ps->n >= ps->alloc) {
+        ps->p = g_renew(QueryParam, ps->p, ps->alloc * 2);
+        ps->alloc *= 2;
+    }
+
+    ps->p[ps->n].name = g_strdup(name);
+    ps->p[ps->n].value = value ? g_strdup(value) : NULL;
+    ps->p[ps->n].ignore = 0;
+    ps->n++;
+
+    return 0;
+}
+
+void
+query_params_free (struct QueryParams *ps)
+{
+    int i;
+
+    for (i = 0; i < ps->n; ++i) {
+        g_free (ps->p[i].name);
+        g_free (ps->p[i].value);
+    }
+    g_free (ps->p);
+    g_free (ps);
+}
+
+struct QueryParams *
+query_params_parse (const char *query)
+{
+    struct QueryParams *ps;
+    const char *end, *eq;
+
+    ps = query_params_new (0);
+    if (!query || query[0] == '\0') return ps;
+
+    while (*query) {
+        char *name = NULL, *value = NULL;
+
+        /* Find the next separator, or end of the string. */
+        end = strchr (query, '&');
+        if (!end)
+            end = strchr (query, ';');
+        if (!end)
+            end = query + strlen (query);
+
+        /* Find the first '=' character between here and end. */
+        eq = strchr (query, '=');
+        if (eq && eq >= end) eq = NULL;
+
+        /* Empty section (eg. "&&"). */
+        if (end == query)
+            goto next;
+
+        /* If there is no '=' character, then we have just "name"
+         * and consistent with CGI.pm we assume value is "".
+         */
+        else if (!eq) {
+            name = uri_string_unescape (query, end - query, NULL);
+            value = NULL;
+        }
+        /* Or if we have "name=" here (works around annoying
+         * problem when calling uri_string_unescape with len = 0).
+         */
+        else if (eq+1 == end) {
+            name = uri_string_unescape (query, eq - query, NULL);
+            value = g_new0(char, 1);
+        }
+        /* If the '=' character is at the beginning then we have
+         * "=value" and consistent with CGI.pm we _ignore_ this.
+         */
+        else if (query == eq)
+            goto next;
+
+        /* Otherwise it's "name=value". */
+        else {
+            name = uri_string_unescape (query, eq - query, NULL);
+            value = uri_string_unescape (eq+1, end - (eq+1), NULL);
+        }
+
+        /* Append to the parameter set. */
+        query_params_append (ps, name, value);
+        g_free(name);
+        g_free(value);
+
+    next:
+        query = end;
+        if (*query) query ++; /* skip '&' separator */
+    }
+
+    return ps;
+}
diff --git a/uri.h b/uri.h
new file mode 100644
index 0000000..de99b3b
--- /dev/null
+++ b/uri.h
@@ -0,0 +1,113 @@
+/**
+ * Summary: library of generic URI related routines
+ * Description: library of generic URI related routines
+ *              Implements RFC 2396
+ *
+ * Copyright (C) 1998-2003 Daniel Veillard.  All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * DANIEL VEILLARD BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Except as contained in this notice, the name of Daniel Veillard shall not
+ * be used in advertising or otherwise to promote the sale, use or other
+ * dealings in this Software without prior written authorization from him.
+ *
+ * Author: Daniel Veillard
+ **
+ * Copyright (C) 2007 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
+ *
+ * Authors:
+ *    Richard W.M. Jones <rjones at redhat.com>
+ *
+ * Utility functions to help parse and assemble query strings.
+ */
+
+#ifndef QEMU_URI_H
+#define QEMU_URI_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * URI:
+ *
+ * A parsed URI reference. This is a struct containing the various fields
+ * as described in RFC 2396 but separated for further processing.
+ */
+typedef struct URI {
+    char *scheme;	/* the URI scheme */
+    char *opaque;	/* opaque part */
+    char *authority;	/* the authority part */
+    char *server;	/* the server part */
+    char *user;		/* the user part */
+    int port;		/* the port number */
+    char *path;		/* the path string */
+    char *fragment;	/* the fragment identifier */
+    int  cleanup;	/* parsing potentially unclean URI */
+    char *query;	/* the query string (as it appears in the URI) */
+} URI;
+
+URI *uri_new(void);
+char *uri_resolve(const char *URI, const char *base);
+char *uri_resolve_relative(const char *URI, const char *base);
+URI *uri_parse(const char *str);
+URI *uri_parse_raw(const char *str, int raw);
+int uri_parse_into(URI *uri, const char *str);
+char *uri_to_string(URI *uri);
+char *uri_string_escape(const char *str, const char *list);
+char *uri_string_unescape(const char *str, int len, char *target);
+void uri_free(URI *uri);
+
+/* Single web service query parameter 'name=value'. */
+typedef struct QueryParam {
+  char *name;			/* Name (unescaped). */
+  char *value;			/* Value (unescaped). */
+  int ignore;			/* Ignore this field in qparam_get_query */
+} QueryParam;
+
+/* Set of parameters. */
+typedef struct QueryParams {
+  int n;			/* number of parameters used */
+  int alloc;			/* allocated space */
+  QueryParam *p;		/* array of parameters */
+} QueryParams;
+
+struct QueryParams *query_params_new (int init_alloc);
+int query_param_append (QueryParams *ps, const char *name, const char *value);
+extern char *query_param_to_string (const QueryParams *ps);
+extern QueryParams *query_params_parse (const char *query);
+extern void query_params_free (QueryParams *ps);
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* QEMU_URI_H */
commit 00f78533326c5ba2e62fafada16655aa558a5520
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Mon Sep 24 14:40:56 2012 +0530

    aio: Fix qemu_aio_wait() to maintain correct walking_handlers count
    
    Fix qemu_aio_wait() to ensure that registered aio handlers don't get
    deleted when they are still active. This is ensured by maintaning the
    right count of walking_handlers.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Bharata B Rao <bharata at linux.vnet.ibm.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/aio.c b/aio.c
index 0a9eb10..99b8b72 100644
--- a/aio.c
+++ b/aio.c
@@ -119,7 +119,7 @@ bool qemu_aio_wait(void)
         return true;
     }
 
-    walking_handlers = 1;
+    walking_handlers++;
 
     FD_ZERO(&rdfds);
     FD_ZERO(&wrfds);
@@ -147,7 +147,7 @@ bool qemu_aio_wait(void)
         }
     }
 
-    walking_handlers = 0;
+    walking_handlers--;
 
     /* No AIO operations?  Get us out of here */
     if (!busy) {
@@ -159,7 +159,7 @@ bool qemu_aio_wait(void)
 
     /* if we have any readable fds, dispatch event */
     if (ret > 0) {
-        walking_handlers = 1;
+        walking_handlers++;
 
         /* we have to walk very carefully in case
          * qemu_aio_set_fd_handler is called while we're walking */
@@ -187,7 +187,7 @@ bool qemu_aio_wait(void)
             }
         }
 
-        walking_handlers = 0;
+        walking_handlers--;
     }
 
     return true;
commit 870f5681c9dbafc738082b1fd48e0cc013bf43c7
Author: Jeff Cody <jcody at redhat.com>
Date:   Tue Sep 25 12:29:39 2012 -0400

    block: after creating a live snapshot, make old image read-only
    
    Currently, after a live snapshot of a drive, the image that has
    been 'demoted' to be below the new active layer remains r/w.
    This patch reopens it read-only.
    
    Note that we do not check for error on the reopen(), because we
    will not abort the snapshots if the reopen fails.
    
    This patch depends on the bdrv_reopen() series.
    
    Signed-off-by: Jeff Cody <jcody at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/blockdev.c b/blockdev.c
index e5d450f..0267fa3 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -805,6 +805,11 @@ void qmp_transaction(BlockdevActionList *dev_list, Error **errp)
     QSIMPLEQ_FOREACH(states, &snap_bdrv_states, entry) {
         /* This removes our old bs from the bdrv_states, and adds the new bs */
         bdrv_append(states->new_bs, states->old_bs);
+        /* We don't need (or want) to use the transactional
+         * bdrv_reopen_multiple() across all the entries at once, because we
+         * don't want to abort all of them if one of them fails the reopen */
+        bdrv_reopen(states->new_bs, states->new_bs->open_flags & ~BDRV_O_RDWR,
+                    NULL);
     }
 
     /* success */
commit 946d58be1533bf843b499df12e1d9f97b28245c8
Author: Kevin Wolf <kwolf at redhat.com>
Date:   Tue Sep 25 15:47:36 2012 +0200

    block-migration: Flush requests in blk_mig_cleanup
    
    When cancelling block migration, all in-flight requests of the block
    migration must be completed before the data can be freed. This was
    visible as failing assertions and segfaults.
    
    Reported-by: Peter Lieven <pl at dlhnet.de>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/block-migration.c b/block-migration.c
index 7def8ab..ed93301 100644
--- a/block-migration.c
+++ b/block-migration.c
@@ -519,6 +519,8 @@ static void blk_mig_cleanup(void)
     BlkMigDevState *bmds;
     BlkMigBlock *blk;
 
+    bdrv_drain_all();
+
     set_dirty_tracking(0);
 
     while ((bmds = QSIMPLEQ_FIRST(&block_mig_state.bmds_list)) != NULL) {
commit ad3005ad8c70a69705149d3ce6d1e51fb76edb15
Author: Amit Shah <amit.shah at redhat.com>
Date:   Tue Sep 25 00:05:16 2012 +0530

    virtio-serial-bus: let chardev know the exact number of bytes requested
    
    Using the virtqueue_avail_bytes() function had an unnecessarily
    crippling effect on the number of bytes needed by the guest as reported
    to the chardev layer in the can_read() callback.
    
    Using the new virtqueue_get_avail_bytes() function will let us advertise
    the exact number of bytes we can send to the guest.
    
    Signed-off-by: Amit Shah <amit.shah at redhat.com>
    Signed-off-by: Michael S. Tsirkin <mst at redhat.com>

diff --git a/hw/virtio-serial-bus.c b/hw/virtio-serial-bus.c
index 82073f5..d20bd8b 100644
--- a/hw/virtio-serial-bus.c
+++ b/hw/virtio-serial-bus.c
@@ -287,6 +287,7 @@ ssize_t virtio_serial_write(VirtIOSerialPort *port, const uint8_t *buf,
 size_t virtio_serial_guest_ready(VirtIOSerialPort *port)
 {
     VirtQueue *vq = port->ivq;
+    unsigned int bytes;
 
     if (!virtio_queue_ready(vq) ||
         !(port->vser->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK) ||
@@ -296,14 +297,8 @@ size_t virtio_serial_guest_ready(VirtIOSerialPort *port)
     if (use_multiport(port->vser) && !port->guest_connected) {
         return 0;
     }
-
-    if (virtqueue_avail_bytes(vq, 4096, 0)) {
-        return 4096;
-    }
-    if (virtqueue_avail_bytes(vq, 1, 0)) {
-        return 1;
-    }
-    return 0;
+    virtqueue_get_avail_bytes(vq, &bytes, NULL);
+    return bytes;
 }
 
 static void flush_queued_data_bh(void *opaque)
commit 0d8d7690850eb0cf2b2b60933cf47669a6b6f18f
Author: Amit Shah <amit.shah at redhat.com>
Date:   Tue Sep 25 00:05:15 2012 +0530

    virtio: Introduce virtqueue_get_avail_bytes()
    
    The current virtqueue_avail_bytes() is oddly named, and checks if a
    particular number of bytes are available in a vq.  A better API is to
    fetch the number of bytes available in the vq, and let the caller do
    what's interesting with the numbers.
    
    Introduce virtqueue_get_avail_bytes(), which returns the number of bytes
    for buffers marked for both, in as well as out.  virtqueue_avail_bytes()
    is made a wrapper over this new function.
    
    Signed-off-by: Amit Shah <amit.shah at redhat.com>
    Signed-off-by: Michael S. Tsirkin <mst at redhat.com>

diff --git a/hw/virtio.c b/hw/virtio.c
index cfad363..6821092 100644
--- a/hw/virtio.c
+++ b/hw/virtio.c
@@ -335,7 +335,8 @@ static unsigned virtqueue_next_desc(target_phys_addr_t desc_pa,
     return next;
 }
 
-int virtqueue_avail_bytes(VirtQueue *vq, int in_bytes, int out_bytes)
+void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
+                               unsigned int *out_bytes)
 {
     unsigned int idx;
     unsigned int total_bufs, in_total, out_total;
@@ -380,13 +381,9 @@ int virtqueue_avail_bytes(VirtQueue *vq, int in_bytes, int out_bytes)
             }
 
             if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_WRITE) {
-                if (in_bytes > 0 &&
-                    (in_total += vring_desc_len(desc_pa, i)) >= in_bytes)
-                    return 1;
+                in_total += vring_desc_len(desc_pa, i);
             } else {
-                if (out_bytes > 0 &&
-                    (out_total += vring_desc_len(desc_pa, i)) >= out_bytes)
-                    return 1;
+                out_total += vring_desc_len(desc_pa, i);
             }
         } while ((i = virtqueue_next_desc(desc_pa, i, max)) != max);
 
@@ -395,7 +392,24 @@ int virtqueue_avail_bytes(VirtQueue *vq, int in_bytes, int out_bytes)
         else
             total_bufs++;
     }
+    if (in_bytes) {
+        *in_bytes = in_total;
+    }
+    if (out_bytes) {
+        *out_bytes = out_total;
+    }
+}
 
+int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
+                          unsigned int out_bytes)
+{
+    unsigned int in_total, out_total;
+
+    virtqueue_get_avail_bytes(vq, &in_total, &out_total);
+    if ((in_bytes && in_bytes < in_total)
+        || (out_bytes && out_bytes < out_total)) {
+        return 1;
+    }
     return 0;
 }
 
diff --git a/hw/virtio.h b/hw/virtio.h
index 7a4f564..80de375 100644
--- a/hw/virtio.h
+++ b/hw/virtio.h
@@ -147,7 +147,10 @@ void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
 void virtqueue_map_sg(struct iovec *sg, target_phys_addr_t *addr,
     size_t num_sg, int is_write);
 int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem);
-int virtqueue_avail_bytes(VirtQueue *vq, int in_bytes, int out_bytes);
+int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
+                          unsigned int out_bytes);
+void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
+                               unsigned int *out_bytes);
 
 void virtio_notify(VirtIODevice *vdev, VirtQueue *vq);
 
commit 385ce95d9d060f20870402c8b2b503d0b6ab8af0
Author: Amit Shah <amit.shah at redhat.com>
Date:   Tue Sep 25 00:05:14 2012 +0530

    virtio: use unsigned int for counting bytes in vq
    
    The virtqueue_avail_bytes() function counts bytes in an int.  Use an
    unsigned int instead.
    
    Signed-off-by: Amit Shah <amit.shah at redhat.com>
    Signed-off-by: Michael S. Tsirkin <mst at redhat.com>

diff --git a/hw/virtio.c b/hw/virtio.c
index b5764bb..cfad363 100644
--- a/hw/virtio.c
+++ b/hw/virtio.c
@@ -338,7 +338,7 @@ static unsigned virtqueue_next_desc(target_phys_addr_t desc_pa,
 int virtqueue_avail_bytes(VirtQueue *vq, int in_bytes, int out_bytes)
 {
     unsigned int idx;
-    int total_bufs, in_total, out_total;
+    unsigned int total_bufs, in_total, out_total;
 
     idx = vq->last_avail_idx;
 
commit 844b5cea8ea6cbe964670a26d1b34037067569df
Author: Michael S. Tsirkin <mst at redhat.com>
Date:   Mon Sep 24 12:50:32 2012 +0200

    iov: add const annotation
    
    iov_from_buf does not change iov, make it const.
    
    Signed-off-by: Michael S. Tsirkin <mst at redhat.com>

diff --git a/iov.c b/iov.c
index 60705c7..c6a66f0 100644
--- a/iov.c
+++ b/iov.c
@@ -26,7 +26,7 @@
 # include <sys/socket.h>
 #endif
 
-size_t iov_from_buf(struct iovec *iov, unsigned int iov_cnt,
+size_t iov_from_buf(const struct iovec *iov, unsigned int iov_cnt,
                     size_t offset, const void *buf, size_t bytes)
 {
     size_t done;
diff --git a/iov.h b/iov.h
index 381f37a..a73569f 100644
--- a/iov.h
+++ b/iov.h
@@ -36,7 +36,7 @@ size_t iov_size(const struct iovec *iov, const unsigned int iov_cnt);
  * such "large" value is -1 (sinice size_t is unsigned),
  * so specifying `-1' as `bytes' means 'up to the end of iovec'.
  */
-size_t iov_from_buf(struct iovec *iov, unsigned int iov_cnt,
+size_t iov_from_buf(const struct iovec *iov, unsigned int iov_cnt,
                     size_t offset, const void *buf, size_t bytes);
 size_t iov_to_buf(const struct iovec *iov, const unsigned int iov_cnt,
                   size_t offset, void *buf, size_t bytes);
commit 40bad8f3deba15e2074ff34cfe923c12916b1cc5
Author: Michael S. Tsirkin <mst at redhat.com>
Date:   Mon Sep 24 15:15:43 2012 +0200

    virtio-net: fix used len for tx
    
    There is no out sg for TX, so used buf length for tx
    should always be 0.
    
    Signed-off-by: Michael S. Tsirkin <mst at redhat.com>

diff --git a/hw/virtio-net.c b/hw/virtio-net.c
index 6490743..247d7be 100644
--- a/hw/virtio-net.c
+++ b/hw/virtio-net.c
@@ -690,7 +690,7 @@ static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
 {
     VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
 
-    virtqueue_push(n->tx_vq, &n->async_tx.elem, n->async_tx.len);
+    virtqueue_push(n->tx_vq, &n->async_tx.elem, 0);
     virtio_notify(&n->vdev, n->tx_vq);
 
     n->async_tx.elem.out_num = n->async_tx.len = 0;
@@ -754,7 +754,7 @@ static int32_t virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq)
 
         len += ret;
 
-        virtqueue_push(vq, &elem, len);
+        virtqueue_push(vq, &elem, 0);
         virtio_notify(&n->vdev, vq);
 
         if (++num_packets >= n->tx_burst) {
commit 0cea71a207508c2b8f563b2644ac46009832c8f4
Author: Michael S. Tsirkin <mst at redhat.com>
Date:   Mon Sep 24 15:09:30 2012 +0200

    virtio: don't mark unaccessed memory as dirty
    
    offset of accessed buffer is calculated using iov_length, so it
    can exceed accessed len. If that happens
    math in len - offset wraps around, and size becomes wrong.
    As real value is 0, so this is harmless but unnecessary.
    
    Signed-off-by: Michael S. Tsirkin <mst at redhat.com>

diff --git a/hw/virtio.c b/hw/virtio.c
index 209c763..b5764bb 100644
--- a/hw/virtio.c
+++ b/hw/virtio.c
@@ -241,7 +241,7 @@ void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
                                   elem->in_sg[i].iov_len,
                                   1, size);
 
-        offset += elem->in_sg[i].iov_len;
+        offset += size;
     }
 
     for (i = 0; i < elem->out_num; i++)
commit 6f8fd2530e9a530f237240daf1c981fa5df7f978
Merge: d885bdd... 3dc3e7d...
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Thu Sep 27 21:48:56 2012 +0200

    Merge branch 'arm-devs.for-upstream' of git://git.linaro.org/people/pmaydell/qemu-arm
    
    * 'arm-devs.for-upstream' of git://git.linaro.org/people/pmaydell/qemu-arm:
      Versatile Express: Add modelling of NOR flash
      Versatile Express: Fix NOR flash 0 address and remove flash alias
      hw/armv7m_nvic: Correctly register GIC region when setting up NVIC
      pl190: fix read of VECTADDR

commit d885bdd481fc1c11d3158cc1c4c68bffdb2c26fe
Author: Richard Henderson <rth at twiddle.net>
Date:   Mon Sep 24 14:55:53 2012 -0700

    target-s390x: Tidy cpu_dump_state
    
    The blank lines inside the single dump make it difficult for the
    eye to pick out the block.  Worse, with interior newlines, but
    no blank line following, the PSW line appears to belong to the
    next dump block.
    
    Reviewed-by: Alexander Graf <agraf at suse.de>
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/target-s390x/translate.c b/target-s390x/translate.c
index 4cc9225..db464cc 100644
--- a/target-s390x/translate.c
+++ b/target-s390x/translate.c
@@ -79,6 +79,14 @@ void cpu_dump_state(CPUS390XState *env, FILE *f, fprintf_function cpu_fprintf,
 {
     int i;
 
+    if (env->cc_op > 3) {
+        cpu_fprintf(f, "PSW=mask %016" PRIx64 " addr %016" PRIx64 " cc %15s\n",
+                    env->psw.mask, env->psw.addr, cc_name(env->cc_op));
+    } else {
+        cpu_fprintf(f, "PSW=mask %016" PRIx64 " addr %016" PRIx64 " cc %02x\n",
+                    env->psw.mask, env->psw.addr, env->cc_op);
+    }
+
     for (i = 0; i < 16; i++) {
         cpu_fprintf(f, "R%02d=%016" PRIx64, i, env->regs[i]);
         if ((i % 4) == 3) {
@@ -97,8 +105,6 @@ void cpu_dump_state(CPUS390XState *env, FILE *f, fprintf_function cpu_fprintf,
         }
     }
 
-    cpu_fprintf(f, "\n");
-
 #ifndef CONFIG_USER_ONLY
     for (i = 0; i < 16; i++) {
         cpu_fprintf(f, "C%02d=%016" PRIx64, i, env->cregs[i]);
@@ -110,22 +116,14 @@ void cpu_dump_state(CPUS390XState *env, FILE *f, fprintf_function cpu_fprintf,
     }
 #endif
 
-    cpu_fprintf(f, "\n");
-
-    if (env->cc_op > 3) {
-        cpu_fprintf(f, "PSW=mask %016" PRIx64 " addr %016" PRIx64 " cc %15s\n",
-                    env->psw.mask, env->psw.addr, cc_name(env->cc_op));
-    } else {
-        cpu_fprintf(f, "PSW=mask %016" PRIx64 " addr %016" PRIx64 " cc %02x\n",
-                    env->psw.mask, env->psw.addr, env->cc_op);
-    }
-
 #ifdef DEBUG_INLINE_BRANCHES
     for (i = 0; i < CC_OP_MAX; i++) {
         cpu_fprintf(f, "  %15s = %10ld\t%10ld\n", cc_name(i),
                     inline_branch_miss[i], inline_branch_hit[i]);
     }
 #endif
+
+    cpu_fprintf(f, "\n");
 }
 
 static TCGv_i64 psw_addr;
commit 87a5395bdd75c22e8c9b92c5655810762a7fd5bf
Author: Richard Henderson <rth at twiddle.net>
Date:   Mon Sep 24 14:55:52 2012 -0700

    target-s390x: Avoid double CPU_LOG_TB_CPU
    
    This is already handled generically in cpu_exec.
    
    Reviewed-by: Alexander Graf <agraf at suse.de>
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/target-s390x/translate.c b/target-s390x/translate.c
index 6fa76a0..4cc9225 100644
--- a/target-s390x/translate.c
+++ b/target-s390x/translate.c
@@ -5220,7 +5220,6 @@ static inline void gen_intermediate_code_internal(CPUS390XState *env,
         tb->icount = num_insns;
     }
 #if defined(S390X_DEBUG_DISAS)
-    log_cpu_state_mask(CPU_LOG_TB_CPU, env, 0);
     if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
         qemu_log("IN: %s\n", lookup_symbol(pc_start));
         log_target_disas(pc_start, dc.pc - pc_start, 1);
commit 0d404541b24b332f6a822139c6bd889b7e319762
Author: Richard Henderson <rth at twiddle.net>
Date:   Mon Sep 24 14:55:51 2012 -0700

    target-s390x: Use CPU_LOG_INT
    
    Three places in the interrupt code did we not honor the mask.
    
    Reviewed-by: Alexander Graf <agraf at suse.de>
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/target-s390x/helper.c b/target-s390x/helper.c
index a5741ec..22256b0 100644
--- a/target-s390x/helper.c
+++ b/target-s390x/helper.c
@@ -511,7 +511,8 @@ static void do_program_interrupt(CPUS390XState *env)
         break;
     }
 
-    qemu_log("%s: code=0x%x ilc=%d\n", __func__, env->int_pgm_code, ilc);
+    qemu_log_mask(CPU_LOG_INT, "%s: code=0x%x ilc=%d\n",
+                  __func__, env->int_pgm_code, ilc);
 
     lowcore = cpu_physical_memory_map(env->psa, &len, 1);
 
@@ -575,8 +576,8 @@ static void do_ext_interrupt(CPUS390XState *env)
 
 void do_interrupt(CPUS390XState *env)
 {
-    qemu_log("%s: %d at pc=%" PRIx64 "\n", __func__, env->exception_index,
-             env->psw.addr);
+    qemu_log_mask(CPU_LOG_INT, "%s: %d at pc=%" PRIx64 "\n",
+                  __func__, env->exception_index, env->psw.addr);
 
     s390_add_running_cpu(env);
     /* handle external interrupts */
diff --git a/target-s390x/misc_helper.c b/target-s390x/misc_helper.c
index 2938ac9..e9b3cae 100644
--- a/target-s390x/misc_helper.c
+++ b/target-s390x/misc_helper.c
@@ -53,7 +53,8 @@ void HELPER(exception)(CPUS390XState *env, uint32_t excp)
 #ifndef CONFIG_USER_ONLY
 void program_interrupt(CPUS390XState *env, uint32_t code, int ilc)
 {
-    qemu_log("program interrupt at %#" PRIx64 "\n", env->psw.addr);
+    qemu_log_mask(CPU_LOG_INT, "program interrupt at %#" PRIx64 "\n",
+                  env->psw.addr);
 
     if (kvm_enabled()) {
 #ifdef CONFIG_KVM
commit daa47c34a893917d712923b107d33f7b89a3a53b
Author: Richard Henderson <rth at twiddle.net>
Date:   Mon Sep 24 14:55:50 2012 -0700

    target-unicore32: Call tcg_gen_debug_insn_start
    
    Acked-by: Guan Xuetao <gxt at mprc.pku.edu.cn>
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/target-unicore32/translate.c b/target-unicore32/translate.c
index b786a6b..36f4f2f 100644
--- a/target-unicore32/translate.c
+++ b/target-unicore32/translate.c
@@ -1861,6 +1861,10 @@ static void disas_uc32_insn(CPUUniCore32State *env, DisasContext *s)
 {
     unsigned int insn;
 
+    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
+        tcg_gen_debug_insn_start(s->pc);
+    }
+
     insn = cpu_ldl_code(env, s->pc);
     s->pc += 4;
 
commit 7193b5f6f52d633531406771b9370d7b591cef88
Author: Richard Henderson <rth at twiddle.net>
Date:   Mon Sep 24 14:55:49 2012 -0700

    target-s390x: Call tcg_gen_debug_insn_start
    
    Cc: Alexander Graf <agraf at suse.de>
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/target-s390x/translate.c b/target-s390x/translate.c
index 3214783..6fa76a0 100644
--- a/target-s390x/translate.c
+++ b/target-s390x/translate.c
@@ -5173,10 +5173,11 @@ static inline void gen_intermediate_code_internal(CPUS390XState *env,
         if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO)) {
             gen_io_start();
         }
-#if defined(S390X_DEBUG_DISAS_VERBOSE)
-        LOG_DISAS("pc " TARGET_FMT_lx "\n",
-                  dc.pc);
-#endif
+
+        if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
+            tcg_gen_debug_insn_start(dc.pc);
+        }
+
         disas_s390_insn(env, &dc);
 
         num_insns++;
commit fa547e617c2f499903dccb8f1b9031bfe724e11e
Author: Richard Henderson <rth at twiddle.net>
Date:   Mon Sep 24 14:55:48 2012 -0700

    target-m68k: Call tcg_gen_debug_insn_start
    
    Cc: Paul Brook <paul at codesourcery.com>
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/target-m68k/translate.c b/target-m68k/translate.c
index fb707f2..451ef74 100644
--- a/target-m68k/translate.c
+++ b/target-m68k/translate.c
@@ -2953,6 +2953,10 @@ static void disas_m68k_insn(CPUM68KState * env, DisasContext *s)
 {
     uint16_t insn;
 
+    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
+        tcg_gen_debug_insn_start(s->pc);
+    }
+
     insn = cpu_lduw_code(env, s->pc);
     s->pc += 2;
 
commit fdefe51c288866b98e62663fa18c8af1d66bf5f6
Author: Richard Henderson <rth at twiddle.net>
Date:   Mon Sep 24 14:55:47 2012 -0700

    Emit debug_insn for CPU_LOG_TB_OP_OPT as well.
    
    For all targets that currently call tcg_gen_debug_insn_start,
    add CPU_LOG_TB_OP_OPT to the condition that gates it.
    
    This is useful for comparing optimization dumps, when the
    pre-optimization dump is merely noise.
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/target-alpha/translate.c b/target-alpha/translate.c
index 3f9aee1..f707d8d 100644
--- a/target-alpha/translate.c
+++ b/target-alpha/translate.c
@@ -3421,7 +3421,7 @@ static inline void gen_intermediate_code_internal(CPUAlphaState *env,
         insn = cpu_ldl_code(env, ctx.pc);
         num_insns++;
 
-	if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP))) {
+	if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
             tcg_gen_debug_insn_start(ctx.pc);
         }
 
diff --git a/target-arm/translate.c b/target-arm/translate.c
index f4b447a..5fded49 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -9816,7 +9816,7 @@ static inline void gen_intermediate_code_internal(CPUARMState *env,
         if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO))
             gen_io_start();
 
-        if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP))) {
+        if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
             tcg_gen_debug_insn_start(dc->pc);
         }
 
diff --git a/target-cris/translate.c b/target-cris/translate.c
index 19144b5..755de65 100644
--- a/target-cris/translate.c
+++ b/target-cris/translate.c
@@ -3074,8 +3074,9 @@ static unsigned int crisv32_decoder(CPUCRISState *env, DisasContext *dc)
 	int insn_len = 2;
 	int i;
 
-	if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)))
+	if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
 		tcg_gen_debug_insn_start(dc->pc);
+        }
 
 	/* Load a halfword onto the instruction register.  */
         dc->ir = cris_fetch(env, dc, dc->pc, 2, 0);
diff --git a/target-i386/translate.c b/target-i386/translate.c
index eb0cabc..323869d 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -4202,8 +4202,9 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
     target_ulong next_eip, tval;
     int rex_w, rex_r;
 
-    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)))
+    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
         tcg_gen_debug_insn_start(pc_start);
+    }
     s->pc = pc_start;
     prefixes = 0;
     aflag = s->code32;
diff --git a/target-lm32/translate.c b/target-lm32/translate.c
index 5f6dcba..77c2866 100644
--- a/target-lm32/translate.c
+++ b/target-lm32/translate.c
@@ -942,7 +942,7 @@ static const DecoderInfo decinfo[] = {
 
 static inline void decode(DisasContext *dc, uint32_t ir)
 {
-    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP))) {
+    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
         tcg_gen_debug_insn_start(dc->pc);
     }
 
diff --git a/target-microblaze/translate.c b/target-microblaze/translate.c
index 9c7d77f..7d864b1 100644
--- a/target-microblaze/translate.c
+++ b/target-microblaze/translate.c
@@ -1664,8 +1664,9 @@ static inline void decode(DisasContext *dc, uint32_t ir)
 {
     int i;
 
-    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)))
+    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
         tcg_gen_debug_insn_start(dc->pc);
+    }
 
     dc->ir = ir;
     LOG_DIS("%8.8x\t", dc->ir);
diff --git a/target-mips/translate.c b/target-mips/translate.c
index fa79d49..454e5cc 100644
--- a/target-mips/translate.c
+++ b/target-mips/translate.c
@@ -12124,8 +12124,9 @@ static void decode_opc (CPUMIPSState *env, DisasContext *ctx, int *is_branch)
         gen_set_label(l1);
     }
 
-    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)))
+    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
         tcg_gen_debug_insn_start(ctx->pc);
+    }
 
     op = MASK_OP_MAJOR(ctx->opcode);
     rs = (ctx->opcode >> 21) & 0x1f;
diff --git a/target-openrisc/translate.c b/target-openrisc/translate.c
index 325ba09..e2cad3a 100644
--- a/target-openrisc/translate.c
+++ b/target-openrisc/translate.c
@@ -1715,7 +1715,7 @@ static inline void gen_intermediate_code_internal(OpenRISCCPU *cpu,
             gen_opc_icount[k] = num_insns;
         }
 
-        if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP))) {
+        if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
             tcg_gen_debug_insn_start(dc->pc);
         }
 
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index ac915cc..1042268 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -9690,8 +9690,9 @@ static inline void gen_intermediate_code_internal(CPUPPCState *env,
         LOG_DISAS("translate opcode %08x (%02x %02x %02x) (%s)\n",
                     ctx.opcode, opc1(ctx.opcode), opc2(ctx.opcode),
                     opc3(ctx.opcode), little_endian ? "little" : "big");
-        if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)))
+        if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
             tcg_gen_debug_insn_start(ctx.nip);
+        }
         ctx.nip += 4;
         table = env->opcodes;
         num_insns++;
diff --git a/target-sh4/translate.c b/target-sh4/translate.c
index 0fa83ca..9d955eb 100644
--- a/target-sh4/translate.c
+++ b/target-sh4/translate.c
@@ -1924,7 +1924,7 @@ static void decode_opc(DisasContext * ctx)
 {
     uint32_t old_flags = ctx->flags;
 
-    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP))) {
+    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
         tcg_gen_debug_insn_start(ctx->pc);
     }
 
diff --git a/target-sparc/translate.c b/target-sparc/translate.c
index b95f91c..e5ebedf 100644
--- a/target-sparc/translate.c
+++ b/target-sparc/translate.c
@@ -2394,8 +2394,9 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
     TCGv_i64 cpu_src1_64, cpu_src2_64, cpu_dst_64;
     target_long simm;
 
-    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)))
+    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
         tcg_gen_debug_insn_start(dc->pc);
+    }
 
     opc = GET_FIELD(insn, 0, 1);
 
diff --git a/target-xtensa/translate.c b/target-xtensa/translate.c
index ba3ffcb..b9acd70 100644
--- a/target-xtensa/translate.c
+++ b/target-xtensa/translate.c
@@ -2923,7 +2923,7 @@ static void gen_intermediate_code_internal(
             gen_opc_icount[lj] = insn_count;
         }
 
-        if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP))) {
+        if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
             tcg_gen_debug_insn_start(dc.pc);
         }
 
commit 6673f47da21718d07346b0f3725f0dbf0d6d8e45
Author: Stefan Weil <sw at weilnetz.de>
Date:   Tue Sep 18 22:43:38 2012 +0200

    tci: Fix for AREG0 free mode
    
    Support for helper functions with 5 arguments was missing
    in the code generator and in the interpreter.
    
    There is no need to pass the constant TCG_AREG0 from the
    code generator to the interpreter. Remove that code for
    the INDEX_op_qemu_st* opcodes.
    
    Signed-off-by: Stefan Weil <sw at weilnetz.de>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/tcg/tci/tcg-target.c b/tcg/tci/tcg-target.c
index 3f4a24b..d272a90 100644
--- a/tcg/tci/tcg-target.c
+++ b/tcg/tci/tcg-target.c
@@ -300,7 +300,7 @@ static const int tcg_target_reg_alloc_order[] = {
 #endif
 };
 
-#if MAX_OPC_PARAM_IARGS != 4
+#if MAX_OPC_PARAM_IARGS != 5
 # error Fix needed, number of supported input arguments changed!
 #endif
 
@@ -309,16 +309,18 @@ static const int tcg_target_call_iarg_regs[] = {
     TCG_REG_R1,
     TCG_REG_R2,
     TCG_REG_R3,
-#if TCG_TARGET_REG_BITS == 32
-    /* 32 bit hosts need 2 * MAX_OPC_PARAM_IARGS registers. */
 #if 0 /* used for TCG_REG_CALL_STACK */
     TCG_REG_R4,
 #endif
     TCG_REG_R5,
+#if TCG_TARGET_REG_BITS == 32
+    /* 32 bit hosts need 2 * MAX_OPC_PARAM_IARGS registers. */
     TCG_REG_R6,
     TCG_REG_R7,
 #if TCG_TARGET_NB_REGS >= 16
     TCG_REG_R8,
+    TCG_REG_R9,
+    TCG_REG_R10,
 #else
 # error Too few input registers available
 #endif
@@ -798,7 +800,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
     case INDEX_op_qemu_st8:
     case INDEX_op_qemu_st16:
     case INDEX_op_qemu_st32:
-        tcg_out_r(s, TCG_AREG0);
         tcg_out_r(s, *args++);
         tcg_out_r(s, *args++);
 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
@@ -809,7 +810,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
 #endif
         break;
     case INDEX_op_qemu_st64:
-        tcg_out_r(s, TCG_AREG0);
         tcg_out_r(s, *args++);
 #if TCG_TARGET_REG_BITS == 32
         tcg_out_r(s, *args++);
diff --git a/tci.c b/tci.c
index ce8a988..a4f7b78 100644
--- a/tci.c
+++ b/tci.c
@@ -36,17 +36,19 @@
         tcg_abort(); \
     } while (0)
 
-#if MAX_OPC_PARAM_IARGS != 4
+#if MAX_OPC_PARAM_IARGS != 5
 # error Fix needed, number of supported input arguments changed!
 #endif
 #if TCG_TARGET_REG_BITS == 32
 typedef uint64_t (*helper_function)(tcg_target_ulong, tcg_target_ulong,
                                     tcg_target_ulong, tcg_target_ulong,
                                     tcg_target_ulong, tcg_target_ulong,
+                                    tcg_target_ulong, tcg_target_ulong,
                                     tcg_target_ulong, tcg_target_ulong);
 #else
 typedef uint64_t (*helper_function)(tcg_target_ulong, tcg_target_ulong,
-                                    tcg_target_ulong, tcg_target_ulong);
+                                    tcg_target_ulong, tcg_target_ulong,
+                                    tcg_target_ulong);
 #endif
 
 /* TCI can optionally use a global register variable for env. */
@@ -489,14 +491,17 @@ tcg_target_ulong tcg_qemu_tb_exec(CPUArchState *cpustate, uint8_t *tb_ptr)
                                           tci_read_reg(TCG_REG_R5),
                                           tci_read_reg(TCG_REG_R6),
                                           tci_read_reg(TCG_REG_R7),
-                                          tci_read_reg(TCG_REG_R8));
+                                          tci_read_reg(TCG_REG_R8),
+                                          tci_read_reg(TCG_REG_R9),
+                                          tci_read_reg(TCG_REG_R10));
             tci_write_reg(TCG_REG_R0, tmp64);
             tci_write_reg(TCG_REG_R1, tmp64 >> 32);
 #else
             tmp64 = ((helper_function)t0)(tci_read_reg(TCG_REG_R0),
                                           tci_read_reg(TCG_REG_R1),
                                           tci_read_reg(TCG_REG_R2),
-                                          tci_read_reg(TCG_REG_R3));
+                                          tci_read_reg(TCG_REG_R3),
+                                          tci_read_reg(TCG_REG_R5));
             tci_write_reg(TCG_REG_R0, tmp64);
 #endif
             break;
commit 8bde9b6f8892c15e46e1f37a37ac038313be4d58
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Wed Sep 26 16:34:29 2012 +0200

    block: live snapshot documentation tweaks
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Luiz Capitulino <lcapitulino at redhat.com>

diff --git a/qapi-schema.json b/qapi-schema.json
index 28d8815..f4c2185 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -1399,7 +1399,7 @@
 # @format: #optional the format of the snapshot image, default is 'qcow2'.
 #
 # @mode: #optional whether and how QEMU should create a new image, default is
-# 'absolute-paths'.
+#        'absolute-paths'.
 ##
 { 'type': 'BlockdevSnapshot',
   'data': { 'device': 'str', 'snapshot-file': 'str', '*format': 'str',
@@ -1453,7 +1453,7 @@
 # @format: #optional the format of the snapshot image, default is 'qcow2'.
 #
 # @mode: #optional whether and how QEMU should create a new image, default is
-# 'absolute-paths'.
+#        'absolute-paths'.
 #
 # Returns: nothing on success
 #          If @device is not a valid block device, DeviceNotFound
commit 9d537c9019e6a05713b44900c78447a0cfb06567
Author: Luiz Capitulino <lcapitulino at redhat.com>
Date:   Thu Sep 20 14:47:02 2012 -0300

    input: index_from_key(): drop unused code
    
    The hex key conversion is unused since last commit.
    
    Signed-off-by: Luiz Capitulino <lcapitulino at redhat.com>
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Reviewed-by: Markus Armbruster <armbru at redhat.com>

diff --git a/input.c b/input.c
index 76ade64..25d3973 100644
--- a/input.c
+++ b/input.c
@@ -186,8 +186,7 @@ static const int key_defs[] = {
 
 int index_from_key(const char *key)
 {
-    int i, keycode;
-    char *endp;
+    int i;
 
     for (i = 0; QKeyCode_lookup[i] != NULL; i++) {
         if (!strcmp(key, QKeyCode_lookup[i])) {
@@ -195,17 +194,6 @@ int index_from_key(const char *key)
         }
     }
 
-    if (strstart(key, "0x", NULL)) {
-        keycode = strtoul(key, &endp, 0);
-        if (*endp == '\0' && keycode >= 0x01 && keycode <= 0xff) {
-            for (i = 0; i < Q_KEY_CODE_MAX; i++) {
-                if (keycode == key_defs[i]) {
-                    break;
-                }
-            }
-        }
-    }
-
     /* Return Q_KEY_CODE_MAX if the key is invalid */
     return i;
 }
commit 9f32897768064841fe9a99145c9d15ab6667ffed
Author: Luiz Capitulino <lcapitulino at redhat.com>
Date:   Thu Sep 20 14:19:47 2012 -0300

    qmp: qmp_send_key(): accept key codes in hex
    
    Before the qapi conversion, the sendkey command could be used to
    send key codes in hex directly to the guest. In HMP, this would
    be like:
    
     (qemu) sendkey 0xdc
    
    However, the qapi conversion broke this, as it only supports sending
    QKeyCode values to the guest. That's a regression.
    
    This commit fixes the problem by adding hex value support down
    the QMP interface, qmp_send_key().
    
    In more detail, this commit:
    
     1. Adds the KeyValue union. This can represent an hex value or
        a QKeyCode value
    
     2. *Changes* the QMP send-key command to take an KeyValue argument
        instead of a QKeyCode one
    
     3. Adapt hmp_send_key() to the QMP interface changes
    
    Item 2 is an incompatible change, but as we're in development phase
    (and this command has been merged a few weeks ago) this shouldn't be
    a problem.
    
    Finally, it's not possible to split this commit without breaking the
    build.
    
    Reported-by: Avi Kivity <avi at redhat.com>
    Signed-off-by: Luiz Capitulino <lcapitulino at redhat.com>
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Reviewed-by: Markus Armbruster <armbru at redhat.com>

diff --git a/hmp.c b/hmp.c
index 2de3140..3306bcd 100644
--- a/hmp.c
+++ b/hmp.c
@@ -1113,13 +1113,13 @@ void hmp_closefd(Monitor *mon, const QDict *qdict)
 void hmp_send_key(Monitor *mon, const QDict *qdict)
 {
     const char *keys = qdict_get_str(qdict, "keys");
-    QKeyCodeList *keylist, *head = NULL, *tmp = NULL;
+    KeyValueList *keylist, *head = NULL, *tmp = NULL;
     int has_hold_time = qdict_haskey(qdict, "hold-time");
     int hold_time = qdict_get_try_int(qdict, "hold-time", -1);
     Error *err = NULL;
     char keyname_buf[16];
     char *separator;
-    int keyname_len, idx;
+    int keyname_len;
 
     while (1) {
         separator = strchr(keys, '-');
@@ -1133,15 +1133,8 @@ void hmp_send_key(Monitor *mon, const QDict *qdict)
         }
         keyname_buf[keyname_len] = 0;
 
-        idx = index_from_key(keyname_buf);
-        if (idx == Q_KEY_CODE_MAX) {
-            monitor_printf(mon, "invalid parameter: %s\n", keyname_buf);
-            break;
-        }
-
         keylist = g_malloc0(sizeof(*keylist));
-        keylist->value = idx;
-        keylist->next = NULL;
+        keylist->value = g_malloc0(sizeof(*keylist->value));
 
         if (!head) {
             head = keylist;
@@ -1151,17 +1144,39 @@ void hmp_send_key(Monitor *mon, const QDict *qdict)
         }
         tmp = keylist;
 
+        if (strstart(keyname_buf, "0x", NULL)) {
+            char *endp;
+            int value = strtoul(keyname_buf, &endp, 0);
+            if (*endp != '\0') {
+                goto err_out;
+            }
+            keylist->value->kind = KEY_VALUE_KIND_NUMBER;
+            keylist->value->number = value;
+        } else {
+            int idx = index_from_key(keyname_buf);
+            if (idx == Q_KEY_CODE_MAX) {
+                goto err_out;
+            }
+            keylist->value->kind = KEY_VALUE_KIND_QCODE;
+            keylist->value->qcode = idx;
+        }
+
         if (!separator) {
             break;
         }
         keys = separator + 1;
     }
 
-    if (idx != Q_KEY_CODE_MAX) {
-        qmp_send_key(head, has_hold_time, hold_time, &err);
-    }
+    qmp_send_key(head, has_hold_time, hold_time, &err);
     hmp_handle_error(mon, &err);
-    qapi_free_QKeyCodeList(head);
+
+out:
+    qapi_free_KeyValueList(head);
+    return;
+
+err_out:
+    monitor_printf(mon, "invalid parameter: %s\n", keyname_buf);
+    goto out;
 }
 
 void hmp_screen_dump(Monitor *mon, const QDict *qdict)
diff --git a/input.c b/input.c
index 32c6057..76ade64 100644
--- a/input.c
+++ b/input.c
@@ -228,6 +228,23 @@ static int *keycodes;
 static int keycodes_size;
 static QEMUTimer *key_timer;
 
+static int keycode_from_keyvalue(const KeyValue *value)
+{
+    if (value->kind == KEY_VALUE_KIND_QCODE) {
+        return key_defs[value->qcode];
+    } else {
+        assert(value->kind == KEY_VALUE_KIND_NUMBER);
+        return value->number;
+    }
+}
+
+static void free_keycodes(void)
+{
+    g_free(keycodes);
+    keycodes = NULL;
+    keycodes_size = 0;
+}
+
 static void release_keys(void *opaque)
 {
     int i;
@@ -239,16 +256,14 @@ static void release_keys(void *opaque)
         kbd_put_keycode(keycodes[i]| 0x80);
     }
 
-    g_free(keycodes);
-    keycodes = NULL;
-    keycodes_size = 0;
+    free_keycodes();
 }
 
-void qmp_send_key(QKeyCodeList *keys, bool has_hold_time, int64_t hold_time,
+void qmp_send_key(KeyValueList *keys, bool has_hold_time, int64_t hold_time,
                   Error **errp)
 {
     int keycode;
-    QKeyCodeList *p;
+    KeyValueList *p;
 
     if (!key_timer) {
         key_timer = qemu_new_timer_ns(vm_clock, release_keys, NULL);
@@ -265,7 +280,13 @@ void qmp_send_key(QKeyCodeList *keys, bool has_hold_time, int64_t hold_time,
 
     for (p = keys; p != NULL; p = p->next) {
         /* key down events */
-        keycode = key_defs[p->value];
+        keycode = keycode_from_keyvalue(p->value);
+        if (keycode < 0x01 || keycode > 0xff) {
+            error_setg(errp, "invalid hex keycode 0x%x\n", keycode);
+            free_keycodes();
+            return;
+        }
+
         if (keycode & 0x80) {
             kbd_put_keycode(0xe0);
         }
diff --git a/qapi-schema.json b/qapi-schema.json
index c6a6767..28d8815 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -2621,12 +2621,26 @@
              'lf', 'help', 'meta_l', 'meta_r', 'compose' ] }
 
 ##
+# @KeyValue
+#
+# Represents a keyboard key.
+#
+# Since: 1.3.0
+##
+{ 'union': 'KeyValue',
+  'data': {
+    'number': 'int',
+    'qcode': 'QKeyCode' } }
+
+##
 # @send-key:
 #
 # Send keys to guest.
 #
-# @keys: key sequence. 'keys' is the name of the key. Use a JSON array to
-#        press several keys simultaneously.
+# @keys: An array of @KeyValue elements. All @KeyValues in this array are
+#        simultaneously sent to the guest. A @KeyValue.number value is sent
+#        directly to the guest, while @KeyValue.qcode must be a valid
+#        @QKeyCode value
 #
 # @hold-time: #optional time to delay key up events, milliseconds. Defaults
 #             to 100
@@ -2638,7 +2652,7 @@
 #
 ##
 { 'command': 'send-key',
-  'data': { 'keys': ['QKeyCode'], '*hold-time': 'int' } }
+  'data': { 'keys': ['KeyValue'], '*hold-time': 'int' } }
 
 ##
 # @screendump:
commit 05a3543dbddd03d6be723be4074e2e661b00b851
Author: Luiz Capitulino <lcapitulino at redhat.com>
Date:   Thu Sep 20 13:44:28 2012 -0300

    input: qmp_send_key(): simplify
    
    The current code duplicates the QKeyCodeList keys in order to store
    the key values for release_keys() late run. This is a bit complicated
    though, as we have to care about correct ordering and then release_keys()
    will have to index key_defs[] over again.
    
    Switch to an array of integers, which is dynamically allocated and stores
    the already converted key value.
    
    This simplifies the current code and the next commit.
    
    Signed-off-by: Luiz Capitulino <lcapitulino at redhat.com>
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Reviewed-by: Markus Armbruster <armbru at redhat.com>

diff --git a/input.c b/input.c
index c4b0619..32c6057 100644
--- a/input.c
+++ b/input.c
@@ -224,30 +224,31 @@ int index_from_keycode(int code)
     return i;
 }
 
-static QKeyCodeList *keycodes;
+static int *keycodes;
+static int keycodes_size;
 static QEMUTimer *key_timer;
 
 static void release_keys(void *opaque)
 {
-    int keycode;
-    QKeyCodeList *p;
+    int i;
 
-    for (p = keycodes; p != NULL; p = p->next) {
-        keycode = key_defs[p->value];
-        if (keycode & 0x80) {
+    for (i = 0; i < keycodes_size; i++) {
+        if (keycodes[i] & 0x80) {
             kbd_put_keycode(0xe0);
         }
-        kbd_put_keycode(keycode | 0x80);
+        kbd_put_keycode(keycodes[i]| 0x80);
     }
-    qapi_free_QKeyCodeList(keycodes);
+
+    g_free(keycodes);
     keycodes = NULL;
+    keycodes_size = 0;
 }
 
 void qmp_send_key(QKeyCodeList *keys, bool has_hold_time, int64_t hold_time,
                   Error **errp)
 {
     int keycode;
-    QKeyCodeList *p, *keylist, *head = NULL, *tmp = NULL;
+    QKeyCodeList *p;
 
     if (!key_timer) {
         key_timer = qemu_new_timer_ns(vm_clock, release_keys, NULL);
@@ -257,31 +258,22 @@ void qmp_send_key(QKeyCodeList *keys, bool has_hold_time, int64_t hold_time,
         qemu_del_timer(key_timer);
         release_keys(NULL);
     }
+
     if (!has_hold_time) {
         hold_time = 100;
     }
 
     for (p = keys; p != NULL; p = p->next) {
-        keylist = g_malloc0(sizeof(*keylist));
-        keylist->value = p->value;
-        keylist->next = NULL;
-
-        if (!head) {
-            head = keylist;
-        }
-        if (tmp) {
-            tmp->next = keylist;
-        }
-        tmp = keylist;
-
         /* key down events */
         keycode = key_defs[p->value];
         if (keycode & 0x80) {
             kbd_put_keycode(0xe0);
         }
         kbd_put_keycode(keycode & 0x7f);
+
+        keycodes = g_realloc(keycodes, sizeof(int) * (keycodes_size + 1));
+        keycodes[keycodes_size++] = keycode;
     }
-    keycodes = head;
 
     /* delayed key up events */
     qemu_mod_timer(key_timer, qemu_get_clock_ns(vm_clock) +
commit 753637695bcf6b80ea96614de5d31161603ad50f
Author: Luiz Capitulino <lcapitulino at redhat.com>
Date:   Fri Sep 21 13:53:00 2012 -0300

    hmp: dump-guest-memory: hardcode protocol argument to "file:"
    
    Today, it's necessary to specify the protocol you want to use
    when dumping the guest memory, for example:
    
     (qemu) dump-guest-memory file:/tmp/guest-memory
    
    This has a few issues:
    
     1. It's cumbersome to type
     2. We loose file path autocompletion
     3. Being able to specify fd:X in HMP makes little sense for humans
    
    Because of these reasons, hardcode the 'protocol' argument to
    'file:' in HMP.
    
    Signed-off-by: Luiz Capitulino <lcapitulino at redhat.com>
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Reviewed-by: Markus Armbruster <armbru at redhat.com>

diff --git a/hmp-commands.hx b/hmp-commands.hx
index ed67e99..0302458 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -914,12 +914,11 @@ ETEXI
 #if defined(CONFIG_HAVE_CORE_DUMP)
     {
         .name       = "dump-guest-memory",
-        .args_type  = "paging:-p,protocol:s,begin:i?,length:i?",
-        .params     = "[-p] protocol [begin] [length]",
+        .args_type  = "paging:-p,filename:F,begin:i?,length:i?",
+        .params     = "[-p] filename [begin] [length]",
         .help       = "dump guest memory to file"
                       "\n\t\t\t begin(optional): the starting physical address"
                       "\n\t\t\t length(optional): the memory size, in bytes",
-        .user_print = monitor_user_noop,
         .mhandler.cmd = hmp_dump_guest_memory,
     },
 
@@ -929,8 +928,7 @@ STEXI
 @findex dump-guest-memory
 Dump guest memory to @var{protocol}. The file can be processed with crash or
 gdb.
-  protocol: destination file(started with "file:") or destination file
-            descriptor (started with "fd:")
+  filename: dump file name
     paging: do paging to get guest's memory mapping
      begin: the starting physical address. It's optional, and should be
             specified with length together.
diff --git a/hmp.c b/hmp.c
index ba6fbd3..2de3140 100644
--- a/hmp.c
+++ b/hmp.c
@@ -1042,11 +1042,12 @@ void hmp_dump_guest_memory(Monitor *mon, const QDict *qdict)
 {
     Error *errp = NULL;
     int paging = qdict_get_try_bool(qdict, "paging", 0);
-    const char *file = qdict_get_str(qdict, "protocol");
+    const char *file = qdict_get_str(qdict, "filename");
     bool has_begin = qdict_haskey(qdict, "begin");
     bool has_length = qdict_haskey(qdict, "length");
     int64_t begin = 0;
     int64_t length = 0;
+    char *prot;
 
     if (has_begin) {
         begin = qdict_get_int(qdict, "begin");
@@ -1055,9 +1056,12 @@ void hmp_dump_guest_memory(Monitor *mon, const QDict *qdict)
         length = qdict_get_int(qdict, "length");
     }
 
-    qmp_dump_guest_memory(paging, file, has_begin, begin, has_length, length,
+    prot = g_strconcat("file:", file, NULL);
+
+    qmp_dump_guest_memory(paging, prot, has_begin, begin, has_length, length,
                           &errp);
     hmp_handle_error(mon, &errp);
+    g_free(prot);
 }
 
 void hmp_netdev_add(Monitor *mon, const QDict *qdict)
commit 2f61652d660ec1ffdadf926401a174c11f5c13a7
Author: Luiz Capitulino <lcapitulino at redhat.com>
Date:   Fri Sep 21 13:17:55 2012 -0300

    qmp: dump-guest-memory: don't spin if non-blocking fd would block
    
    fd_write_vmcore() will indefinitely spin for a non-blocking
    file-descriptor that would block. However, if the fd is non-blocking,
    how does it make sense to spin?
    
    Change this behavior to return an error instead.
    
    Note that this can only happen with an fd provided by a management
    application. The fd opened internally by dump-guest-memory is blocking.
    
    While there, also fix 'writen_size' variable name.
    
    Signed-off-by: Luiz Capitulino <lcapitulino at redhat.com>
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Reviewed-by: Markus Armbruster <armbru at redhat.com>

diff --git a/dump.c b/dump.c
index 1a3c716..6b7c127 100644
--- a/dump.c
+++ b/dump.c
@@ -100,18 +100,11 @@ static void dump_error(DumpState *s, const char *reason)
 static int fd_write_vmcore(void *buf, size_t size, void *opaque)
 {
     DumpState *s = opaque;
-    int fd = s->fd;
-    size_t writen_size;
+    size_t written_size;
 
-    /* The fd may be passed from user, and it can be non-blocked */
-    while (size) {
-        writen_size = qemu_write_full(fd, buf, size);
-        if (writen_size != size && errno != EAGAIN) {
-            return -1;
-        }
-
-        buf += writen_size;
-        size -= writen_size;
+    written_size = qemu_write_full(s->fd, buf, size);
+    if (written_size != size) {
+        return -1;
     }
 
     return 0;
commit d691180e41f58cc3c1f9fa848c2ab89193503160
Author: Luiz Capitulino <lcapitulino at redhat.com>
Date:   Fri Sep 21 13:10:58 2012 -0300

    qmp: dump-guest-memory: improve schema doc (again)
    
     o Add a note about memory allocation with paging=true
     o Fix indentation
    
    Signed-off-by: Luiz Capitulino <lcapitulino at redhat.com>
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Reviewed-by: Markus Armbruster <armbru at redhat.com>

diff --git a/qapi-schema.json b/qapi-schema.json
index 191d921..c6a6767 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -2007,26 +2007,33 @@
 # supported on i386 and x86_64.
 #
 # @paging: if true, do paging to get guest's memory mapping. This allows
-# using gdb to process the core file. However, setting @paging to false
-# may be desirable because of two reasons:
+#          using gdb to process the core file.
 #
-#   1. The guest may be in a catastrophic state or can have corrupted
-#      memory, which cannot be trusted
-#   2. The guest can be in real-mode even if paging is enabled. For example,
-#      the guest uses ACPI to sleep, and ACPI sleep state goes in real-mode
+#          IMPORTANT: this option can make QEMU allocate several gigabytes
+#                     of RAM. This can happen for a large guest, or a
+#                     malicious guest pretending to be large.
+#
+#          Also, paging=true has the following limitations:
+#
+#             1. The guest may be in a catastrophic state or can have corrupted
+#                memory, which cannot be trusted
+#             2. The guest can be in real-mode even if paging is enabled. For
+#                example, the guest uses ACPI to sleep, and ACPI sleep state
+#                goes in real-mode
 #
 # @protocol: the filename or file descriptor of the vmcore. The supported
-# protocols are:
+#            protocols are:
 #
-#   1. file: the protocol starts with "file:", and the following string is
-#      the file's path.
-#   2. fd: the protocol starts with "fd:", and the following string is the
-#      fd's name.
+#            1. file: the protocol starts with "file:", and the following
+#               string is the file's path.
+#            2. fd: the protocol starts with "fd:", and the following string
+#               is the fd's name.
 #
 # @begin: #optional if specified, the starting physical address.
 #
 # @length: #optional if specified, the memory size, in bytes. If you don't
-# want to dump all guest's memory, please specify the start @begin and @length
+#          want to dump all guest's memory, please specify the start @begin
+#          and @length
 #
 # Returns: nothing on success
 #
@@ -2035,6 +2042,7 @@
 { 'command': 'dump-guest-memory',
   'data': { 'paging': 'bool', 'protocol': 'str', '*begin': 'int',
             '*length': 'int' } }
+
 ##
 # @netdev_add:
 #
commit 3dc3e7dd936f2e7f3e6dd4056f81c8961dc8201b
Author: Francesco Lavra <francescolavra.fl at gmail.com>
Date:   Wed Sep 19 05:57:21 2012 +0000

    Versatile Express: Add modelling of NOR flash
    
    This patch adds modelling of the two NOR flash banks found on the
    Versatile Express motherboard. Tested with U-Boot running on an emulated
    Versatile Express, with either A9 or A15 CoreTile.
    
    Signed-off-by: Francesco Lavra <francescolavra.fl at gmail.com>
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>

diff --git a/hw/vexpress.c b/hw/vexpress.c
index 454c2bb..3596d1e 100644
--- a/hw/vexpress.c
+++ b/hw/vexpress.c
@@ -29,8 +29,12 @@
 #include "sysemu.h"
 #include "boards.h"
 #include "exec-memory.h"
+#include "blockdev.h"
+#include "flash.h"
 
 #define VEXPRESS_BOARD_ID 0x8e0
+#define VEXPRESS_FLASH_SIZE (64 * 1024 * 1024)
+#define VEXPRESS_FLASH_SECT_SIZE (256 * 1024)
 
 static struct arm_boot_info vexpress_binfo;
 
@@ -355,6 +359,7 @@ static void vexpress_common_init(const VEDBoardInfo *daughterboard,
     qemu_irq pic[64];
     uint32_t proc_id;
     uint32_t sys_id;
+    DriveInfo *dinfo;
     ram_addr_t vram_size, sram_size;
     MemoryRegion *sysmem = get_system_memory();
     MemoryRegion *vram = g_new(MemoryRegion, 1);
@@ -410,8 +415,25 @@ static void vexpress_common_init(const VEDBoardInfo *daughterboard,
 
     sysbus_create_simple("pl111", map[VE_CLCD], pic[14]);
 
-    /* VE_NORFLASH0: not modelled */
-    /* VE_NORFLASH1: not modelled */
+    dinfo = drive_get_next(IF_PFLASH);
+    if (!pflash_cfi01_register(map[VE_NORFLASH0], NULL, "vexpress.flash0",
+            VEXPRESS_FLASH_SIZE, dinfo ? dinfo->bdrv : NULL,
+            VEXPRESS_FLASH_SECT_SIZE,
+            VEXPRESS_FLASH_SIZE / VEXPRESS_FLASH_SECT_SIZE, 4,
+            0x00, 0x89, 0x00, 0x18, 0)) {
+        fprintf(stderr, "vexpress: error registering flash 0.\n");
+        exit(1);
+    }
+
+    dinfo = drive_get_next(IF_PFLASH);
+    if (!pflash_cfi01_register(map[VE_NORFLASH1], NULL, "vexpress.flash1",
+            VEXPRESS_FLASH_SIZE, dinfo ? dinfo->bdrv : NULL,
+            VEXPRESS_FLASH_SECT_SIZE,
+            VEXPRESS_FLASH_SIZE / VEXPRESS_FLASH_SECT_SIZE, 4,
+            0x00, 0x89, 0x00, 0x18, 0)) {
+        fprintf(stderr, "vexpress: error registering flash 1.\n");
+        exit(1);
+    }
 
     sram_size = 0x2000000;
     memory_region_init_ram(sram, "vexpress.sram", sram_size);
commit 661bafb3e14bfffcb0a7c7910534c7944608ca45
Author: Francesco Lavra <francescolavra.fl at gmail.com>
Date:   Wed Sep 19 05:51:58 2012 +0000

    Versatile Express: Fix NOR flash 0 address and remove flash alias
    
    In the A series memory map (implemented in the Cortex A15 CoreTile), the
    first NOR flash bank (flash 0) is mapped to address 0x08000000, while
    address 0x00000000 can be configured as alias to either the first or the
    second flash bank. This patch fixes the definition of flash 0 address,
    and for simplicity removes the alias definition.
    
    Signed-off-by: Francesco Lavra <francescolavra.fl at gmail.com>
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>

diff --git a/hw/vexpress.c b/hw/vexpress.c
index b615844..454c2bb 100644
--- a/hw/vexpress.c
+++ b/hw/vexpress.c
@@ -62,7 +62,6 @@ enum {
     VE_COMPACTFLASH,
     VE_CLCD,
     VE_NORFLASH0,
-    VE_NORFLASH0ALIAS,
     VE_NORFLASH1,
     VE_SRAM,
     VE_VIDEORAM,
@@ -104,9 +103,8 @@ static target_phys_addr_t motherboard_legacy_map[] = {
 };
 
 static target_phys_addr_t motherboard_aseries_map[] = {
-    /* CS0: 0x00000000 .. 0x0c000000 */
-    [VE_NORFLASH0] = 0x00000000,
-    [VE_NORFLASH0ALIAS] = 0x08000000,
+    /* CS0: 0x08000000 .. 0x0c000000 */
+    [VE_NORFLASH0] = 0x08000000,
     /* CS4: 0x0c000000 .. 0x10000000 */
     [VE_NORFLASH1] = 0x0c000000,
     /* CS5: 0x10000000 .. 0x14000000 */
@@ -413,7 +411,6 @@ static void vexpress_common_init(const VEDBoardInfo *daughterboard,
     sysbus_create_simple("pl111", map[VE_CLCD], pic[14]);
 
     /* VE_NORFLASH0: not modelled */
-    /* VE_NORFLASH0ALIAS: not modelled */
     /* VE_NORFLASH1: not modelled */
 
     sram_size = 0x2000000;
commit 9892cae39562d2e6c00ccc5966302c00f23be6d4
Author: Meador Inge <meadori at codesourcery.com>
Date:   Wed Sep 26 16:46:28 2012 +0100

    hw/armv7m_nvic: Correctly register GIC region when setting up NVIC
    
    When setting up the NVIC memory regions the memory range
    0x100..0xcff is aliased to an IO memory region that belongs
    to the ARM GIC.  This aliased region should be added to the
    NVIC memory container, but the actual GIC IO memory region
    was being added instead.  This mixup was causing the wrong
    IO memory access functions to be called when accessing parts
    of the NVIC memory.
    
    Signed-off-by: Meador Inge <meadori at codesourcery.com>
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>

diff --git a/hw/armv7m_nvic.c b/hw/armv7m_nvic.c
index 6a0832e..5c09116 100644
--- a/hw/armv7m_nvic.c
+++ b/hw/armv7m_nvic.c
@@ -489,7 +489,8 @@ static int armv7m_nvic_init(SysBusDevice *dev)
      */
     memory_region_init_alias(&s->gic_iomem_alias, "nvic-gic", &s->gic.iomem,
                              0x100, 0xc00);
-    memory_region_add_subregion_overlap(&s->container, 0x100, &s->gic.iomem, 1);
+    memory_region_add_subregion_overlap(&s->container, 0x100,
+                                        &s->gic_iomem_alias, 1);
     /* Map the whole thing into system memory at the location required
      * by the v7M architecture.
      */
commit 14c126baf1c38607c5bd988878de85a06cefd8cf
Author: Brendan Fennell <bfennell at skynet.ie>
Date:   Wed Sep 26 16:46:28 2012 +0100

    pl190: fix read of VECTADDR
    
    Reading VECTADDR was causing us to set the current priority to
    the wrong value, the most obvious effect of which was that we
    would return the vector for the wrong interrupt as the result
    of the read.
    
    Signed-off-by: Brendan Fennell <bfennell at skynet.ie>
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>

diff --git a/hw/pl190.c b/hw/pl190.c
index cb50afb..7332f4d 100644
--- a/hw/pl190.c
+++ b/hw/pl190.c
@@ -117,12 +117,18 @@ static uint64_t pl190_read(void *opaque, target_phys_addr_t offset,
         return s->protected;
     case 12: /* VECTADDR */
         /* Read vector address at the start of an ISR.  Increases the
-           current priority level to that of the current interrupt.  */
-        for (i = 0; i < s->priority; i++)
-          {
-            if ((s->level | s->soft_level) & s->prio_mask[i])
-              break;
-          }
+         * current priority level to that of the current interrupt.
+         *
+         * Since an enabled interrupt X at priority P causes prio_mask[Y]
+         * to have bit X set for all Y > P, this loop will stop with
+         * i == the priority of the highest priority set interrupt.
+         */
+        for (i = 0; i < s->priority; i++) {
+            if ((s->level | s->soft_level) & s->prio_mask[i + 1]) {
+                break;
+            }
+        }
+
         /* Reading this value with no pending interrupts is undefined.
            We return the default address.  */
         if (i == PL190_NUM_PRIO)
commit b224e5e2162a767dd56dbc366f796fbe45ca5baa
Author: Luiz Capitulino <lcapitulino at redhat.com>
Date:   Thu Sep 13 16:52:20 2012 -0300

    qapi: convert add_client
    
    Also fixes a few issues while there:
    
     1. The fd returned by monitor_get_fd() leaks in most error conditions
     2. monitor_get_fd() return value is not checked. Best case we get
        an error that is not correctly reported, worse case one of the
        functions using the fd (with value of -1) will explode
     3. A few error conditions aren't reported
     4. We now "use up" @fdname always.  Before, it was left alone for
        invalid @protocol
    
    Signed-off-by: Luiz Capitulino <lcapitulino at redhat.com>
    Reviewed-by: Markus Armbruster <armbru at redhat.com>

diff --git a/monitor.c b/monitor.c
index c24235e..c9f460a 100644
--- a/monitor.c
+++ b/monitor.c
@@ -944,45 +944,6 @@ static void do_trace_print_events(Monitor *mon)
     trace_print_events((FILE *)mon, &monitor_fprintf);
 }
 
-static int add_graphics_client(Monitor *mon, const QDict *qdict, QObject **ret_data)
-{
-    const char *protocol  = qdict_get_str(qdict, "protocol");
-    const char *fdname = qdict_get_str(qdict, "fdname");
-    CharDriverState *s;
-
-    if (strcmp(protocol, "spice") == 0) {
-        int fd = monitor_get_fd(mon, fdname, NULL);
-        int skipauth = qdict_get_try_bool(qdict, "skipauth", 0);
-        int tls = qdict_get_try_bool(qdict, "tls", 0);
-        if (!using_spice) {
-            /* correct one? spice isn't a device ,,, */
-            qerror_report(QERR_DEVICE_NOT_ACTIVE, "spice");
-            return -1;
-        }
-        if (qemu_spice_display_add_client(fd, skipauth, tls) < 0) {
-            close(fd);
-        }
-        return 0;
-#ifdef CONFIG_VNC
-    } else if (strcmp(protocol, "vnc") == 0) {
-	int fd = monitor_get_fd(mon, fdname, NULL);
-        int skipauth = qdict_get_try_bool(qdict, "skipauth", 0);
-	vnc_display_add_client(NULL, fd, skipauth);
-	return 0;
-#endif
-    } else if ((s = qemu_chr_find(protocol)) != NULL) {
-	int fd = monitor_get_fd(mon, fdname, NULL);
-	if (qemu_chr_add_client(s, fd) < 0) {
-	    qerror_report(QERR_ADD_CLIENT_FAILED);
-	    return -1;
-	}
-	return 0;
-    }
-
-    qerror_report(QERR_INVALID_PARAMETER, "protocol");
-    return -1;
-}
-
 static int client_migrate_info(Monitor *mon, const QDict *qdict,
                                MonitorCompletion cb, void *opaque)
 {
diff --git a/qapi-schema.json b/qapi-schema.json
index 14e4419..191d921 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -33,6 +33,31 @@
             'MigrationExpected' ] }
 
 ##
+# @add_client
+#
+# Allow client connections for VNC, Spice and socket based
+# character devices to be passed in to QEMU via SCM_RIGHTS.
+#
+# @protocol: protocol name. Valid names are "vnc", "spice" or the
+#            name of a character device (eg. from -chardev id=XXXX)
+#
+# @fdname: file descriptor name previously passed via 'getfd' command
+#
+# @skipauth: #optional whether to skip authentication. Only applies
+#            to "vnc" and "spice" protocols
+#
+# @tls: #optional whether to perform TLS. Only applies to the "spice"
+#       protocol
+#
+# Returns: nothing on success.
+#
+# Since: 0.14.0
+##
+{ 'command': 'add_client',
+  'data': { 'protocol': 'str', 'fdname': 'str', '*skipauth': 'bool',
+            '*tls': 'bool' } }
+
+##
 # @NameInfo:
 #
 # Guest name information.
diff --git a/qmp-commands.hx b/qmp-commands.hx
index 6e21ddb..36e08d9 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -1231,10 +1231,7 @@ EQMP
     {
         .name       = "add_client",
         .args_type  = "protocol:s,fdname:s,skipauth:b?,tls:b?",
-        .params     = "protocol fdname skipauth tls",
-        .help       = "add a graphics client",
-        .user_print = monitor_user_noop,
-        .mhandler.cmd_new = add_graphics_client,
+        .mhandler.cmd_new = qmp_marshal_input_add_client,
     },
 
 SQMP
diff --git a/qmp.c b/qmp.c
index 8463922..36c54c5 100644
--- a/qmp.c
+++ b/qmp.c
@@ -479,3 +479,46 @@ CpuDefinitionInfoList *qmp_query_cpu_definitions(Error **errp)
     return arch_query_cpu_definitions(errp);
 }
 
+void qmp_add_client(const char *protocol, const char *fdname,
+                    bool has_skipauth, bool skipauth, bool has_tls, bool tls,
+                    Error **errp)
+{
+    CharDriverState *s;
+    int fd;
+
+    fd = monitor_get_fd(cur_mon, fdname, errp);
+    if (fd < 0) {
+        return;
+    }
+
+    if (strcmp(protocol, "spice") == 0) {
+        if (!using_spice) {
+            error_set(errp, QERR_DEVICE_NOT_ACTIVE, "spice");
+            close(fd);
+            return;
+        }
+        skipauth = has_skipauth ? skipauth : false;
+        tls = has_tls ? tls : false;
+        if (qemu_spice_display_add_client(fd, skipauth, tls) < 0) {
+            error_setg(errp, "spice failed to add client");
+            close(fd);
+        }
+        return;
+#ifdef CONFIG_VNC
+    } else if (strcmp(protocol, "vnc") == 0) {
+        skipauth = has_skipauth ? skipauth : false;
+        vnc_display_add_client(NULL, fd, skipauth);
+        return;
+#endif
+    } else if ((s = qemu_chr_find(protocol)) != NULL) {
+        if (qemu_chr_add_client(s, fd) < 0) {
+            error_setg(errp, "failed to add client");
+            close(fd);
+            return;
+        }
+        return;
+    }
+
+    error_setg(errp, "protocol '%s' is invalid", protocol);
+    close(fd);
+}
commit a9940fc4cba811adfb296fe07b247ee707265f90
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Thu Sep 20 16:50:32 2012 +0200

    monitor: add Error * argument to monitor_get_fd
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Luiz Capitulino <lcapitulino at redhat.com>
    Reviewed-by: Markus Armbruster <armbru at redhat.com>

diff --git a/dump.c b/dump.c
index 2bf8d8d..1a3c716 100644
--- a/dump.c
+++ b/dump.c
@@ -836,9 +836,8 @@ void qmp_dump_guest_memory(bool paging, const char *file, bool has_begin,
 
 #if !defined(WIN32)
     if (strstart(file, "fd:", &p)) {
-        fd = monitor_get_fd(cur_mon, p);
+        fd = monitor_get_fd(cur_mon, p, errp);
         if (fd == -1) {
-            error_set(errp, QERR_FD_NOT_FOUND, p);
             return;
         }
     }
diff --git a/migration-fd.c b/migration-fd.c
index 50138ed..7335167 100644
--- a/migration-fd.c
+++ b/migration-fd.c
@@ -75,7 +75,7 @@ static int fd_close(MigrationState *s)
 
 int fd_start_outgoing_migration(MigrationState *s, const char *fdname)
 {
-    s->fd = monitor_get_fd(cur_mon, fdname);
+    s->fd = monitor_get_fd(cur_mon, fdname, NULL);
     if (s->fd == -1) {
         DPRINTF("fd_migration: invalid file descriptor identifier\n");
         goto err_after_get_fd;
diff --git a/monitor.c b/monitor.c
index 67064e2..c24235e 100644
--- a/monitor.c
+++ b/monitor.c
@@ -951,7 +951,7 @@ static int add_graphics_client(Monitor *mon, const QDict *qdict, QObject **ret_d
     CharDriverState *s;
 
     if (strcmp(protocol, "spice") == 0) {
-        int fd = monitor_get_fd(mon, fdname);
+        int fd = monitor_get_fd(mon, fdname, NULL);
         int skipauth = qdict_get_try_bool(qdict, "skipauth", 0);
         int tls = qdict_get_try_bool(qdict, "tls", 0);
         if (!using_spice) {
@@ -965,13 +965,13 @@ static int add_graphics_client(Monitor *mon, const QDict *qdict, QObject **ret_d
         return 0;
 #ifdef CONFIG_VNC
     } else if (strcmp(protocol, "vnc") == 0) {
-	int fd = monitor_get_fd(mon, fdname);
+	int fd = monitor_get_fd(mon, fdname, NULL);
         int skipauth = qdict_get_try_bool(qdict, "skipauth", 0);
 	vnc_display_add_client(NULL, fd, skipauth);
 	return 0;
 #endif
     } else if ((s = qemu_chr_find(protocol)) != NULL) {
-	int fd = monitor_get_fd(mon, fdname);
+	int fd = monitor_get_fd(mon, fdname, NULL);
 	if (qemu_chr_add_client(s, fd) < 0) {
 	    qerror_report(QERR_ADD_CLIENT_FAILED);
 	    return -1;
@@ -2118,7 +2118,7 @@ static void do_loadvm(Monitor *mon, const QDict *qdict)
     }
 }
 
-int monitor_get_fd(Monitor *mon, const char *fdname)
+int monitor_get_fd(Monitor *mon, const char *fdname, Error **errp)
 {
     mon_fd_t *monfd;
 
@@ -2139,6 +2139,7 @@ int monitor_get_fd(Monitor *mon, const char *fdname)
         return fd;
     }
 
+    error_setg(errp, "File descriptor named '%s' has not been found", fdname);
     return -1;
 }
 
@@ -2410,12 +2411,14 @@ int monitor_fdset_dup_fd_remove(int dup_fd)
 int monitor_handle_fd_param(Monitor *mon, const char *fdname)
 {
     int fd;
+    Error *local_err = NULL;
 
     if (!qemu_isdigit(fdname[0]) && mon) {
 
-        fd = monitor_get_fd(mon, fdname);
+        fd = monitor_get_fd(mon, fdname, &local_err);
         if (fd == -1) {
-            error_report("No file descriptor named %s found", fdname);
+            qerror_report_err(local_err);
+            error_free(local_err);
             return -1;
         }
     } else {
diff --git a/monitor.h b/monitor.h
index 64c1561..e240c3f 100644
--- a/monitor.h
+++ b/monitor.h
@@ -66,7 +66,7 @@ int monitor_read_block_device_key(Monitor *mon, const char *device,
                                   BlockDriverCompletionFunc *completion_cb,
                                   void *opaque);
 
-int monitor_get_fd(Monitor *mon, const char *fdname);
+int monitor_get_fd(Monitor *mon, const char *fdname, Error **errp);
 int monitor_handle_fd_param(Monitor *mon, const char *fdname);
 
 void monitor_vprintf(Monitor *mon, const char *fmt, va_list ap)
commit 9a3a88956c7f32130ad20011f7c3c161fa5876d8
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Thu Sep 20 16:50:32 2012 +0200

    pci-assign: use monitor_handle_fd_param
    
    There is no need to open-code the choice between a file descriptor
    number or a named one.  Just use monitor_handle_fd_param, which
    also takes care of printing the error message.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Luiz Capitulino <lcapitulino at redhat.com>
    Reviewed-by: Markus Armbruster <armbru at redhat.com>

diff --git a/hw/kvm/pci-assign.c b/hw/kvm/pci-assign.c
index 05b93d9..7a0998c 100644
--- a/hw/kvm/pci-assign.c
+++ b/hw/kvm/pci-assign.c
@@ -579,15 +579,9 @@ static int get_real_device(AssignedDevice *pci_dev, uint16_t r_seg,
     snprintf(name, sizeof(name), "%sconfig", dir);
 
     if (pci_dev->configfd_name && *pci_dev->configfd_name) {
-        if (qemu_isdigit(pci_dev->configfd_name[0])) {
-            dev->config_fd = strtol(pci_dev->configfd_name, NULL, 0);
-        } else {
-            dev->config_fd = monitor_get_fd(cur_mon, pci_dev->configfd_name);
-            if (dev->config_fd < 0) {
-                error_report("%s: (%s) unkown", __func__,
-                             pci_dev->configfd_name);
-                return 1;
-            }
+        dev->config_fd = monitor_handle_fd_param(cur_mon, pci_dev->configfd_name);
+        if (dev->config_fd < 0) {
+            return 1;
         }
     } else {
         dev->config_fd = open(name, O_RDWR);
commit 1057725f6629fc2771a294a92ce8eedb92c86fe8
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Wed Sep 19 16:31:07 2012 +0200

    qapi: add "unix" to the set of reserved words
    
    It is #defined to 1.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Luiz Capitulino <lcapitulino at redhat.com>

diff --git a/scripts/qapi.py b/scripts/qapi.py
index 057332e..afc5f32 100644
--- a/scripts/qapi.py
+++ b/scripts/qapi.py
@@ -156,7 +156,9 @@ def c_var(name, protect=True):
     # GCC http://gcc.gnu.org/onlinedocs/gcc-4.7.1/gcc/C-Extensions.html
     # excluding _.*
     gcc_words = set(['asm', 'typeof'])
-    if protect and (name in c89_words | c99_words | c11_words | gcc_words):
+    # namespace pollution:
+    polluted_words = set(['unix'])
+    if protect and (name in c89_words | c99_words | c11_words | gcc_words | polluted_words):
         return "q_" + name
     return name.replace('-', '_').lstrip("*")
 
commit eda50a656f52a5172fa8a95f7b217565b90d413e
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Wed Sep 19 16:31:06 2012 +0200

    qapi: do not protect enum values from namespace pollution
    
    Enum values are always preceded by the uppercase name of the enum, so
    they do not conflict with reserved words.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Luiz Capitulino <lcapitulino at redhat.com>

diff --git a/scripts/qapi-types.py b/scripts/qapi-types.py
index 49ef569..1b84834 100644
--- a/scripts/qapi-types.py
+++ b/scripts/qapi-types.py
@@ -91,9 +91,9 @@ const char *%(name)s_lookup[] = {
 
 def generate_enum_name(name):
     if name.isupper():
-        return c_fun(name)
+        return c_fun(name, False)
     new_name = ''
-    for c in c_fun(name):
+    for c in c_fun(name, False):
         if c.isupper():
             new_name += '_'
         new_name += c
diff --git a/scripts/qapi-visit.py b/scripts/qapi-visit.py
index e2093e8..a360de7 100644
--- a/scripts/qapi-visit.py
+++ b/scripts/qapi-visit.py
@@ -173,7 +173,7 @@ void visit_type_%(name)s(Visitor *m, %(name)s ** obj, const char *name, Error **
                 break;
 ''',
                 abbrev = de_camel_case(name).upper(),
-                enum = c_fun(de_camel_case(key)).upper(),
+                enum = c_fun(de_camel_case(key),False).upper(),
                 c_type=members[key],
                 c_name=c_fun(key))
 
diff --git a/scripts/qapi.py b/scripts/qapi.py
index 122b4cb..057332e 100644
--- a/scripts/qapi.py
+++ b/scripts/qapi.py
@@ -141,7 +141,7 @@ def camel_case(name):
             new_name += ch.lower()
     return new_name
 
-def c_var(name):
+def c_var(name, protect=True):
     # ANSI X3J11/88-090, 3.1.1
     c89_words = set(['auto', 'break', 'case', 'char', 'const', 'continue',
                      'default', 'do', 'double', 'else', 'enum', 'extern', 'float',
@@ -156,12 +156,12 @@ def c_var(name):
     # GCC http://gcc.gnu.org/onlinedocs/gcc-4.7.1/gcc/C-Extensions.html
     # excluding _.*
     gcc_words = set(['asm', 'typeof'])
-    if name in c89_words | c99_words | c11_words | gcc_words:
+    if protect and (name in c89_words | c99_words | c11_words | gcc_words):
         return "q_" + name
     return name.replace('-', '_').lstrip("*")
 
-def c_fun(name):
-    return c_var(name).replace('.', '_')
+def c_fun(name, protect=True):
+    return c_var(name, protect).replace('.', '_')
 
 def c_list_type(name):
     return '%sList' % name
commit f513cbf7503d8db3778df436beaf25f3d8260317
Author: Ryota Ozaki <ozaki.ryota at gmail.com>
Date:   Fri Sep 14 21:44:22 2012 +0900

    Add qemu-ga-client script
    
    This is an easy-to-use QEMU guest agent client written in
    Python. It simply provides commands to call guest agent
    functions like ping, fsfreeze and shutdown. Additionally,
    it provides extra useful commands, e.g, cat, ifconfig and
    reboot, by using guet agent functions.
    
    Examples:
      $ export QGA_CLIENT_ADDRESS=/tmp/qga.sock
      $ qemu-ga-client ping
    
      $ qemu-ga-client cat /etc/resolv.conf
      # Generated by NetworkManager
      nameserver 10.0.2.3
    
      $ qemu-ga-client fsfreeze status
      thawed
      $ qemu-ga-client fsfreeze freeze
      2 filesystems frozen
    
    The script communicates with a guest agent by means of
    qmp.QEMUMonitorProtocol. Every commands are called with
    timeout (3 sec.) to avoid blocking. The script always
    calls sync command prior to issuing an actual command
    (except for ping which doesn't need sync).
    
    Signed-off-by: Ryota Ozaki <ozaki.ryota at gmail.com>
    Signed-off-by: Luiz Capitulino <lcapitulino at redhat.com>

diff --git a/QMP/qemu-ga-client b/QMP/qemu-ga-client
new file mode 100755
index 0000000..46676c3
--- /dev/null
+++ b/QMP/qemu-ga-client
@@ -0,0 +1,299 @@
+#!/usr/bin/python
+
+# QEMU Guest Agent Client
+#
+# Copyright (C) 2012 Ryota Ozaki <ozaki.ryota at gmail.com>
+#
+# This work is licensed under the terms of the GNU GPL, version 2.  See
+# the COPYING file in the top-level directory.
+#
+# Usage:
+#
+# Start QEMU with:
+#
+# # qemu [...] -chardev socket,path=/tmp/qga.sock,server,nowait,id=qga0 \
+#   -device virtio-serial -device virtserialport,chardev=qga0,name=org.qemu.guest_agent.0
+#
+# Run the script:
+#
+# $ qemu-ga-client --address=/tmp/qga.sock <command> [args...]
+#
+# or
+#
+# $ export QGA_CLIENT_ADDRESS=/tmp/qga.sock
+# $ qemu-ga-client <command> [args...]
+#
+# For example:
+#
+# $ qemu-ga-client cat /etc/resolv.conf
+# # Generated by NetworkManager
+# nameserver 10.0.2.3
+# $ qemu-ga-client fsfreeze status
+# thawed
+# $ qemu-ga-client fsfreeze freeze
+# 2 filesystems frozen
+#
+# See also: http://wiki.qemu.org/Features/QAPI/GuestAgent
+#
+
+import base64
+import random
+
+import qmp
+
+
+class QemuGuestAgent(qmp.QEMUMonitorProtocol):
+    def __getattr__(self, name):
+        def wrapper(**kwds):
+            return self.command('guest-' + name.replace('_', '-'), **kwds)
+        return wrapper
+
+
+class QemuGuestAgentClient:
+    error = QemuGuestAgent.error
+
+    def __init__(self, address):
+        self.qga = QemuGuestAgent(address)
+        self.qga.connect(negotiate=False)
+
+    def sync(self, timeout=3):
+        # Avoid being blocked forever
+        if not self.ping(timeout):
+            raise EnvironmentError('Agent seems not alive')
+        uid = random.randint(0, (1 << 32) - 1)
+        while True:
+            ret = self.qga.sync(id=uid)
+            if isinstance(ret, int) and int(ret) == uid:
+                break
+
+    def __file_read_all(self, handle):
+        eof = False
+        data = ''
+        while not eof:
+            ret = self.qga.file_read(handle=handle, count=1024)
+            _data = base64.b64decode(ret['buf-b64'])
+            data += _data
+            eof = ret['eof']
+        return data
+
+    def read(self, path):
+        handle = self.qga.file_open(path=path)
+        try:
+            data = self.__file_read_all(handle)
+        finally:
+            self.qga.file_close(handle=handle)
+        return data
+
+    def info(self):
+        info = self.qga.info()
+
+        msgs = []
+        msgs.append('version: ' + info['version'])
+        msgs.append('supported_commands:')
+        enabled = [c['name'] for c in info['supported_commands'] if c['enabled']]
+        msgs.append('\tenabled: ' + ', '.join(enabled))
+        disabled = [c['name'] for c in info['supported_commands'] if not c['enabled']]
+        msgs.append('\tdisabled: ' + ', '.join(disabled))
+
+        return '\n'.join(msgs)
+
+    def __gen_ipv4_netmask(self, prefixlen):
+        mask = int('1' * prefixlen + '0' * (32 - prefixlen), 2)
+        return '.'.join([str(mask >> 24),
+                         str((mask >> 16) & 0xff),
+                         str((mask >> 8) & 0xff),
+                         str(mask & 0xff)])
+
+    def ifconfig(self):
+        nifs = self.qga.network_get_interfaces()
+
+        msgs = []
+        for nif in nifs:
+            msgs.append(nif['name'] + ':')
+            if 'ip-addresses' in nif:
+                for ipaddr in nif['ip-addresses']:
+                    if ipaddr['ip-address-type'] == 'ipv4':
+                        addr = ipaddr['ip-address']
+                        mask = self.__gen_ipv4_netmask(int(ipaddr['prefix']))
+                        msgs.append("\tinet %s  netmask %s" % (addr, mask))
+                    elif ipaddr['ip-address-type'] == 'ipv6':
+                        addr = ipaddr['ip-address']
+                        prefix = ipaddr['prefix']
+                        msgs.append("\tinet6 %s  prefixlen %s" % (addr, prefix))
+            if nif['hardware-address'] != '00:00:00:00:00:00':
+                msgs.append("\tether " + nif['hardware-address'])
+
+        return '\n'.join(msgs)
+
+    def ping(self, timeout):
+        self.qga.settimeout(timeout)
+        try:
+            self.qga.ping()
+        except self.qga.timeout:
+            return False
+        return True
+
+    def fsfreeze(self, cmd):
+        if cmd not in ['status', 'freeze', 'thaw']:
+            raise StandardError('Invalid command: ' + cmd)
+
+        return getattr(self.qga, 'fsfreeze' + '_' + cmd)()
+
+    def fstrim(self, minimum=0):
+        return getattr(self.qga, 'fstrim')(minimum=minimum)
+
+    def suspend(self, mode):
+        if mode not in ['disk', 'ram', 'hybrid']:
+            raise StandardError('Invalid mode: ' + mode)
+
+        try:
+            getattr(self.qga, 'suspend' + '_' + mode)()
+            # On error exception will raise
+        except self.qga.timeout:
+            # On success command will timed out
+            return
+
+    def shutdown(self, mode='powerdown'):
+        if mode not in ['powerdown', 'halt', 'reboot']:
+            raise StandardError('Invalid mode: ' + mode)
+
+        try:
+            self.qga.shutdown(mode=mode)
+        except self.qga.timeout:
+            return
+
+
+def _cmd_cat(client, args):
+    if len(args) != 1:
+        print('Invalid argument')
+        print('Usage: cat <file>')
+        sys.exit(1)
+    print(client.read(args[0]))
+
+
+def _cmd_fsfreeze(client, args):
+    usage = 'Usage: fsfreeze status|freeze|thaw'
+    if len(args) != 1:
+        print('Invalid argument')
+        print(usage)
+        sys.exit(1)
+    if args[0] not in ['status', 'freeze', 'thaw']:
+        print('Invalid command: ' + args[0])
+        print(usage)
+        sys.exit(1)
+    cmd = args[0]
+    ret = client.fsfreeze(cmd)
+    if cmd == 'status':
+        print(ret)
+    elif cmd == 'freeze':
+        print("%d filesystems frozen" % ret)
+    else:
+        print("%d filesystems thawed" % ret)
+
+
+def _cmd_fstrim(client, args):
+    if len(args) == 0:
+        minimum = 0
+    else:
+        minimum = int(args[0])
+    print(client.fstrim(minimum))
+
+
+def _cmd_ifconfig(client, args):
+    print(client.ifconfig())
+
+
+def _cmd_info(client, args):
+    print(client.info())
+
+
+def _cmd_ping(client, args):
+    if len(args) == 0:
+        timeout = 3
+    else:
+        timeout = float(args[0])
+    alive = client.ping(timeout)
+    if not alive:
+        print("Not responded in %s sec" % args[0])
+        sys.exit(1)
+
+
+def _cmd_suspend(client, args):
+    usage = 'Usage: suspend disk|ram|hybrid'
+    if len(args) != 1:
+        print('Less argument')
+        print(usage)
+        sys.exit(1)
+    if args[0] not in ['disk', 'ram', 'hybrid']:
+        print('Invalid command: ' + args[0])
+        print(usage)
+        sys.exit(1)
+    client.suspend(args[0])
+
+
+def _cmd_shutdown(client, args):
+    client.shutdown()
+_cmd_powerdown = _cmd_shutdown
+
+
+def _cmd_halt(client, args):
+    client.shutdown('halt')
+
+
+def _cmd_reboot(client, args):
+    client.shutdown('reboot')
+
+
+commands = [m.replace('_cmd_', '') for m in dir() if '_cmd_' in m]
+
+
+def main(address, cmd, args):
+    if not os.path.exists(address):
+        print('%s not found' % address)
+        sys.exit(1)
+
+    if cmd not in commands:
+        print('Invalid command: ' + cmd)
+        print('Available commands: ' + ', '.join(commands))
+        sys.exit(1)
+
+    try:
+        client = QemuGuestAgentClient(address)
+    except QemuGuestAgent.error, e:
+        import errno
+
+        print(e)
+        if e.errno == errno.ECONNREFUSED:
+            print('Hint: qemu is not running?')
+        sys.exit(1)
+
+    if cmd != 'ping':
+        client.sync()
+
+    globals()['_cmd_' + cmd](client, args)
+
+
+if __name__ == '__main__':
+    import sys
+    import os
+    import optparse
+
+    address = os.environ['QGA_CLIENT_ADDRESS'] if 'QGA_CLIENT_ADDRESS' in os.environ else None
+
+    usage = "%prog [--address=<unix_path>|<ipv4_address>] <command> [args...]\n"
+    usage += '<command>: ' + ', '.join(commands)
+    parser = optparse.OptionParser(usage=usage)
+    parser.add_option('--address', action='store', type='string',
+                      default=address, help='Specify a ip:port pair or a unix socket path')
+    options, args = parser.parse_args()
+
+    address = options.address
+    if address is None:
+        parser.error('address is not specified')
+        sys.exit(1)
+
+    if len(args) == 0:
+        parser.error('Less argument')
+        sys.exit(1)
+
+    main(address, args[0], args[1:])
commit e37b350aa8766ae36ec9bf40fab665455d4a5530
Author: Ryota Ozaki <ozaki.ryota at gmail.com>
Date:   Fri Sep 14 21:44:21 2012 +0900

    Support settimeout in QEMUMonitorProtocol
    
    This method is used in the following qemu-ga-client script
    to implement non-blocking operations.
    
    Signed-off-by: Ryota Ozaki <ozaki.ryota at gmail.com>
    Signed-off-by: Luiz Capitulino <lcapitulino at redhat.com>

diff --git a/QMP/qmp.py b/QMP/qmp.py
index 5a573e1..33c7d36 100644
--- a/QMP/qmp.py
+++ b/QMP/qmp.py
@@ -162,3 +162,8 @@ class QEMUMonitorProtocol:
     def close(self):
         self.__sock.close()
         self.__sockfile.close()
+
+    timeout = socket.timeout
+
+    def settimeout(self, timeout):
+        self.__sock.settimeout(timeout)
commit e9d17b6890ae772f3652c8cacf4e1f72f576f907
Author: Ryota Ozaki <ozaki.ryota at gmail.com>
Date:   Fri Sep 14 21:44:20 2012 +0900

    Make negotiation optional in QEMUMonitorProtocol
    
    This is a preparation for qemu-ga-client which uses
    QEMUMonitorProtocol class. The class tries to
    negotiate capabilities on connect, however, qemu-ga
    doesn't suppose it and fails.
    
    This change makes the negotiation optional, though
    it's still performed by default for compatibility.
    
    Signed-off-by: Ryota Ozaki <ozaki.ryota at gmail.com>
    Signed-off-by: Luiz Capitulino <lcapitulino at redhat.com>

diff --git a/QMP/qmp.py b/QMP/qmp.py
index 36ecc1d..5a573e1 100644
--- a/QMP/qmp.py
+++ b/QMP/qmp.py
@@ -49,7 +49,6 @@ class QEMUMonitorProtocol:
         return socket.socket(family, socket.SOCK_STREAM)
 
     def __negotiate_capabilities(self):
-        self.__sockfile = self.__sock.makefile()
         greeting = self.__json_read()
         if greeting is None or not greeting.has_key('QMP'):
             raise QMPConnectError
@@ -73,7 +72,7 @@ class QEMUMonitorProtocol:
 
     error = socket.error
 
-    def connect(self):
+    def connect(self, negotiate=True):
         """
         Connect to the QMP Monitor and perform capabilities negotiation.
 
@@ -83,7 +82,9 @@ class QEMUMonitorProtocol:
         @raise QMPCapabilitiesError if fails to negotiate capabilities
         """
         self.__sock.connect(self.__address)
-        return self.__negotiate_capabilities()
+        self.__sockfile = self.__sock.makefile()
+        if negotiate:
+            return self.__negotiate_capabilities()
 
     def accept(self):
         """
commit 39c138c8420f51a7da7b35233a8d7400a0b589ac
Author: David Gibson <david at gibson.dropbear.id.au>
Date:   Wed Sep 26 12:59:31 2012 +1000

    usb: Fix usb_packet_map() in the presence of IOMMUs
    
    With the IOMMU infrastructure introduced before 1.2, we need to use
    dma_memory_map() to obtain a qemu pointer to memory from an IO bus address.
    However, dma_memory_map() alters the given length to reflect the length
    over which the used DMA translation is valid - which could be either more
    or less than the requested length.
    
    usb_packet_map() does not correctly handle these cases, simply failing if
    dma_memory_map() alters the requested length.  If dma_memory_map()
    increased the length, we just need to use the requested length for the
    qemu_iovec_add().  However, if it decreased the length, it means that a
    single DMA translation is not valid for the whole sglist element, and so
    we need to loop, splitting it up into multiple iovec entries for each
    piece with a DMA translation (in practice >2 pieces is unlikely).
    
    This patch implements the correct behaviour
    
    Signed-off-by: David Gibson <david at gibson.dropbear.id.au>
    Signed-off-by: Gerd Hoffmann <kraxel at redhat.com>

diff --git a/hw/usb/libhw.c b/hw/usb/libhw.c
index c0de30e..703e2d2 100644
--- a/hw/usb/libhw.c
+++ b/hw/usb/libhw.c
@@ -28,19 +28,25 @@ int usb_packet_map(USBPacket *p, QEMUSGList *sgl)
 {
     DMADirection dir = (p->pid == USB_TOKEN_IN) ?
         DMA_DIRECTION_FROM_DEVICE : DMA_DIRECTION_TO_DEVICE;
-    dma_addr_t len;
     void *mem;
     int i;
 
     for (i = 0; i < sgl->nsg; i++) {
-        len = sgl->sg[i].len;
-        mem = dma_memory_map(sgl->dma, sgl->sg[i].base, &len, dir);
-        if (!mem) {
-            goto err;
-        }
-        qemu_iovec_add(&p->iov, mem, len);
-        if (len != sgl->sg[i].len) {
-            goto err;
+        dma_addr_t base = sgl->sg[i].base;
+        dma_addr_t len = sgl->sg[i].len;
+
+        while (len) {
+            dma_addr_t xlen = len;
+            mem = dma_memory_map(sgl->dma, sgl->sg[i].base, &xlen, dir);
+            if (!mem) {
+                goto err;
+            }
+            if (xlen > len) {
+                xlen = len;
+            }
+            qemu_iovec_add(&p->iov, mem, xlen);
+            len -= xlen;
+            base += xlen;
         }
     }
     return 0;
commit 8b626aa7841ef79b70066c880b3b6c29496797af
Author: Hans de Goede <hdegoede at redhat.com>
Date:   Tue Sep 25 13:22:21 2012 +0200

    usb-redir: Adjust pkg-config check for usbredirparser .pc file rename (v2)
    
    The usbredir 0.5 release introduced the new API for 64 bit packet ids, but
    it kept the libusbredirparser.pc name as is, meaning that older versions of
    qemu will still have their pkg-config check for usbredirparser fulfilled,
    and build with the usb-redir device. Due to the API change there will be
    some compiler warnings, but the build will succeed, however the usb-redir
    device will be broken on 32 bit machines.
    
    To solve this a new usbredir-0.5.2 release is coming, which renames the
    libusbredirparser.pc file to libusbredirparser-0.5.pc, so that it will no
    longer fulfill the pkg-config check of the qemu-1.2 and older releases,
    stopping the (silent) breakage. This patch adjusts qemu master's configure
    to properly detect the new usbredir release.
    
    Changes in v2:
    -Not only use the new .pc name in the check but also when getting cflags
     and libs!
    
    Signed-off-by: Hans de Goede <hdegoede at redhat.com>
    Signed-off-by: Gerd Hoffmann <kraxel at redhat.com>

diff --git a/configure b/configure
index 1b86517..4f24062 100755
--- a/configure
+++ b/configure
@@ -2752,10 +2752,10 @@ fi
 
 # check for usbredirparser for usb network redirection support
 if test "$usb_redir" != "no" ; then
-    if $pkg_config --atleast-version=0.5 libusbredirparser >/dev/null 2>&1 ; then
+    if $pkg_config --atleast-version=0.5 libusbredirparser-0.5 >/dev/null 2>&1 ; then
         usb_redir="yes"
-        usb_redir_cflags=$($pkg_config --cflags libusbredirparser 2>/dev/null)
-        usb_redir_libs=$($pkg_config --libs libusbredirparser 2>/dev/null)
+        usb_redir_cflags=$($pkg_config --cflags libusbredirparser-0.5 2>/dev/null)
+        usb_redir_libs=$($pkg_config --libs libusbredirparser-0.5 2>/dev/null)
         QEMU_CFLAGS="$QEMU_CFLAGS $usb_redir_cflags"
         libs_softmmu="$libs_softmmu $usb_redir_libs"
     else
commit cae5d3f4b3fbe9b681c0c4046008af424bd1d6a5
Author: Hans de Goede <hdegoede at redhat.com>
Date:   Thu Sep 20 17:38:07 2012 +0200

    ehci: Fix interrupt packet MULT handling
    
    There are several issues with our handling of the MULT epcap field
    of interrupt qhs, which this patch fixes.
    
    1) When we don't execute a transaction because of the transaction counter
    being 0, p->async stays EHCI_ASYNC_NONE, and the next time we process the
    same qtd we hit an assert in ehci_state_fetchqtd because of this. Even though
    I believe that this is caused by 3 below, this patch still removes the assert,
    as that can still happen without 3, when multiple packets are queued for the
    same interrupt ep.
    
    2) We only *check* the transaction counter from ehci_state_execute, any
    packets queued up by fill_queue bypass this check. This is fixed by not calling
    fill_queue for interrupt packets.
    
    3) Some versions of Windows set the MULT field of the qh to 0, which is a
    clear violation of the EHCI spec, but still they do it. This means that we
    will never execute a qtd for these, making interrupt ep-s on USB-2 devices
    not work, and after recent changes, triggering 1).
    
    So far we've stored the transaction counter in our copy of the mult field,
    but with this beginnig at 0 already when dealing with these version of windows
    this won't work. So this patch adds a transact_ctr field to our qh struct,
    and sets this to the MULT field value on fetchqh. When the MULT field value
    is 0, we set it to 4. Assuming that windows gets way with setting it to 0,
    by the actual hardware going horizontal on a 1 -> 0 transition, which will
    give it 4 transactions (MULT goes from 0 - 3).
    
    Note that we cannot stop on detecting the 1 -> 0 transition, as our decrement
    of the transaction counter, and checking for it are done in 2 different places.
    
    Reported-by: Shawn Starr <shawn.starr at rogers.com>
    Signed-off-by: Hans de Goede <hdegoede at redhat.com>
    Signed-off-by: Gerd Hoffmann <kraxel at redhat.com>

diff --git a/hw/usb/hcd-ehci.c b/hw/usb/hcd-ehci.c
index 6a5da84..8bdb806 100644
--- a/hw/usb/hcd-ehci.c
+++ b/hw/usb/hcd-ehci.c
@@ -373,6 +373,7 @@ struct EHCIQueue {
     uint32_t seen;
     uint64_t ts;
     int async;
+    int transact_ctr;
 
     /* cached data from guest - needs to be flushed
      * when guest removes an entry (doorbell, handshake sequence)
@@ -1837,6 +1838,11 @@ static EHCIQueue *ehci_state_fetchqh(EHCIState *ehci, int async)
     }
     q->qh = qh;
 
+    q->transact_ctr = get_field(q->qh.epcap, QH_EPCAP_MULT);
+    if (q->transact_ctr == 0) { /* Guest bug in some versions of windows */
+        q->transact_ctr = 4;
+    }
+
     if (q->dev == NULL) {
         q->dev = ehci_find_device(q->ehci, devaddr);
     }
@@ -2014,11 +2020,8 @@ static int ehci_state_fetchqtd(EHCIQueue *q)
     } else if (p != NULL) {
         switch (p->async) {
         case EHCI_ASYNC_NONE:
-            /* Should never happen packet should at least be initialized */
-            assert(0);
-            break;
         case EHCI_ASYNC_INITIALIZED:
-            /* Previously nacked packet (likely interrupt ep) */
+            /* Not yet executed (MULT), or previously nacked (int) packet */
             ehci_set_state(q->ehci, q->async, EST_EXECUTE);
             break;
         case EHCI_ASYNC_INFLIGHT:
@@ -2107,15 +2110,12 @@ static int ehci_state_execute(EHCIQueue *q)
 
     // TODO verify enough time remains in the uframe as in 4.4.1.1
     // TODO write back ptr to async list when done or out of time
-    // TODO Windows does not seem to ever set the MULT field
 
-    if (!q->async) {
-        int transactCtr = get_field(q->qh.epcap, QH_EPCAP_MULT);
-        if (!transactCtr) {
-            ehci_set_state(q->ehci, q->async, EST_HORIZONTALQH);
-            again = 1;
-            goto out;
-        }
+    /* 4.10.3, bottom of page 82, go horizontal on transaction counter == 0 */
+    if (!q->async && q->transact_ctr == 0) {
+        ehci_set_state(q->ehci, q->async, EST_HORIZONTALQH);
+        again = 1;
+        goto out;
     }
 
     if (q->async) {
@@ -2132,7 +2132,11 @@ static int ehci_state_execute(EHCIQueue *q)
         trace_usb_ehci_packet_action(p->queue, p, "async");
         p->async = EHCI_ASYNC_INFLIGHT;
         ehci_set_state(q->ehci, q->async, EST_HORIZONTALQH);
-        again = (ehci_fill_queue(p) == USB_RET_PROCERR) ? -1 : 1;
+        if (q->async) {
+            again = (ehci_fill_queue(p) == USB_RET_PROCERR) ? -1 : 1;
+        } else {
+            again = 1;
+        }
         goto out;
     }
 
@@ -2152,13 +2156,9 @@ static int ehci_state_executing(EHCIQueue *q)
 
     ehci_execute_complete(q);
 
-    // 4.10.3
-    if (!q->async) {
-        int transactCtr = get_field(q->qh.epcap, QH_EPCAP_MULT);
-        transactCtr--;
-        set_field(&q->qh.epcap, transactCtr, QH_EPCAP_MULT);
-        // 4.10.3, bottom of page 82, should exit this state when transaction
-        // counter decrements to 0
+    /* 4.10.3 */
+    if (!q->async && q->transact_ctr > 0) {
+        q->transact_ctr--;
     }
 
     /* 4.10.5 */
commit 1d8a4e69eeda7e474d1a6b50951b0b1680f8186e
Author: Gerd Hoffmann <kraxel at redhat.com>
Date:   Thu Sep 20 13:36:04 2012 +0200

    xhci: create a memory region for each port
    
    Signed-off-by: Gerd Hoffmann <kraxel at redhat.com>

diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c
index 8c0155b..e79a872 100644
--- a/hw/usb/hcd-xhci.c
+++ b/hw/usb/hcd-xhci.c
@@ -285,6 +285,8 @@ typedef enum TRBCCode {
 #define SLOT_CONTEXT_ENTRIES_MASK 0x1f
 #define SLOT_CONTEXT_ENTRIES_SHIFT 27
 
+typedef struct XHCIState XHCIState;
+
 typedef enum EPType {
     ET_INVALID = 0,
     ET_ISO_OUT,
@@ -303,15 +305,15 @@ typedef struct XHCIRing {
 } XHCIRing;
 
 typedef struct XHCIPort {
+    XHCIState *xhci;
     uint32_t portsc;
     uint32_t portnr;
     USBPort  *uport;
     uint32_t speedmask;
+    char name[16];
+    MemoryRegion mem;
 } XHCIPort;
 
-struct XHCIState;
-typedef struct XHCIState XHCIState;
-
 typedef struct XHCITransfer {
     XHCIState *xhci;
     USBPacket packet;
@@ -2430,20 +2432,14 @@ static uint64_t xhci_cap_read(void *ptr, target_phys_addr_t reg, unsigned size)
     return ret;
 }
 
-static uint32_t xhci_port_read(XHCIState *xhci, uint32_t reg)
+static uint64_t xhci_port_read(void *ptr, target_phys_addr_t reg, unsigned size)
 {
-    uint32_t port = reg >> 4;
+    XHCIPort *port = ptr;
     uint32_t ret;
 
-    if (port >= xhci->numports) {
-        fprintf(stderr, "xhci_port_read: port %d out of bounds\n", port);
-        ret = 0;
-        goto out;
-    }
-
-    switch (reg & 0xf) {
+    switch (reg) {
     case 0x00: /* PORTSC */
-        ret = xhci->ports[port].portsc;
+        ret = port->portsc;
         break;
     case 0x04: /* PORTPMSC */
     case 0x08: /* PORTLI */
@@ -2452,30 +2448,25 @@ static uint32_t xhci_port_read(XHCIState *xhci, uint32_t reg)
     case 0x0c: /* reserved */
     default:
         fprintf(stderr, "xhci_port_read (port %d): reg 0x%x unimplemented\n",
-                port, reg);
+                port->portnr, (uint32_t)reg);
         ret = 0;
     }
 
-out:
-    trace_usb_xhci_port_read(port, reg & 0x0f, ret);
+    trace_usb_xhci_port_read(port->portnr, reg, ret);
     return ret;
 }
 
-static void xhci_port_write(XHCIState *xhci, uint32_t reg, uint32_t val)
+static void xhci_port_write(void *ptr, target_phys_addr_t reg,
+                            uint64_t val, unsigned size)
 {
-    uint32_t port = reg >> 4;
+    XHCIPort *port = ptr;
     uint32_t portsc;
 
-    trace_usb_xhci_port_write(port, reg & 0x0f, val);
+    trace_usb_xhci_port_write(port->portnr, reg, val);
 
-    if (port >= xhci->numports) {
-        fprintf(stderr, "xhci_port_read: port %d out of bounds\n", port);
-        return;
-    }
-
-    switch (reg & 0xf) {
+    switch (reg) {
     case 0x00: /* PORTSC */
-        portsc = xhci->ports[port].portsc;
+        portsc = port->portsc;
         /* write-1-to-clear bits*/
         portsc &= ~(val & (PORTSC_CSC|PORTSC_PEC|PORTSC_WRC|PORTSC_OCC|
                            PORTSC_PRC|PORTSC_PLC|PORTSC_CEC));
@@ -2490,16 +2481,16 @@ static void xhci_port_write(XHCIState *xhci, uint32_t reg, uint32_t val)
         /* write-1-to-start bits */
         if (val & PORTSC_PR) {
             DPRINTF("xhci: port %d reset\n", port);
-            usb_device_reset(xhci->ports[port].uport->dev);
+            usb_device_reset(port->uport->dev);
             portsc |= PORTSC_PRC | PORTSC_PED;
         }
-        xhci->ports[port].portsc = portsc;
+        port->portsc = portsc;
         break;
     case 0x04: /* PORTPMSC */
     case 0x08: /* PORTLI */
     default:
         fprintf(stderr, "xhci_port_write (port %d): reg 0x%x unimplemented\n",
-                port, reg);
+                port->portnr, (uint32_t)reg);
     }
 }
 
@@ -2508,10 +2499,6 @@ static uint64_t xhci_oper_read(void *ptr, target_phys_addr_t reg, unsigned size)
     XHCIState *xhci = ptr;
     uint32_t ret;
 
-    if (reg >= 0x400) {
-        return xhci_port_read(xhci, reg - 0x400);
-    }
-
     switch (reg) {
     case 0x00: /* USBCMD */
         ret = xhci->usbcmd;
@@ -2554,11 +2541,6 @@ static void xhci_oper_write(void *ptr, target_phys_addr_t reg,
 {
     XHCIState *xhci = ptr;
 
-    if (reg >= 0x400) {
-        xhci_port_write(xhci, reg - 0x400, val);
-        return;
-    }
-
     trace_usb_xhci_oper_write(reg, val);
 
     switch (reg) {
@@ -2777,6 +2759,14 @@ static const MemoryRegionOps xhci_oper_ops = {
     .endianness = DEVICE_LITTLE_ENDIAN,
 };
 
+static const MemoryRegionOps xhci_port_ops = {
+    .read = xhci_port_read,
+    .write = xhci_port_write,
+    .valid.min_access_size = 4,
+    .valid.max_access_size = 4,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+};
+
 static const MemoryRegionOps xhci_runtime_ops = {
     .read = xhci_runtime_read,
     .write = xhci_runtime_write,
@@ -2850,7 +2840,7 @@ static void xhci_child_detach(USBPort *uport, USBDevice *child)
     }
 }
 
-static USBPortOps xhci_port_ops = {
+static USBPortOps xhci_uport_ops = {
     .attach   = xhci_attach,
     .detach   = xhci_detach,
     .wakeup   = xhci_wakeup,
@@ -2930,6 +2920,7 @@ static void usb_xhci_init(XHCIState *xhci, DeviceState *dev)
                 USB_SPEED_MASK_LOW  |
                 USB_SPEED_MASK_FULL |
                 USB_SPEED_MASK_HIGH;
+            snprintf(port->name, sizeof(port->name), "usb2 port #%d", i+1);
             speedmask |= port->speedmask;
         }
         if (i < xhci->numports_3) {
@@ -2937,16 +2928,17 @@ static void usb_xhci_init(XHCIState *xhci, DeviceState *dev)
             port->portnr = i + 1 + xhci->numports_2;
             port->uport = &xhci->uports[i];
             port->speedmask = USB_SPEED_MASK_SUPER;
+            snprintf(port->name, sizeof(port->name), "usb3 port #%d", i+1);
             speedmask |= port->speedmask;
         }
         usb_register_port(&xhci->bus, &xhci->uports[i], xhci, i,
-                          &xhci_port_ops, speedmask);
+                          &xhci_uport_ops, speedmask);
     }
 }
 
 static int usb_xhci_initfn(struct PCIDevice *dev)
 {
-    int ret;
+    int i, ret;
 
     XHCIState *xhci = DO_UPCAST(XHCIState, pci_dev, dev);
 
@@ -2965,7 +2957,7 @@ static int usb_xhci_initfn(struct PCIDevice *dev)
     memory_region_init_io(&xhci->mem_cap, &xhci_cap_ops, xhci,
                           "capabilities", LEN_CAP);
     memory_region_init_io(&xhci->mem_oper, &xhci_oper_ops, xhci,
-                          "operational", 0x400 + 0x10 * xhci->numports);
+                          "operational", 0x400);
     memory_region_init_io(&xhci->mem_runtime, &xhci_runtime_ops, xhci,
                           "runtime", LEN_RUNTIME);
     memory_region_init_io(&xhci->mem_doorbell, &xhci_doorbell_ops, xhci,
@@ -2976,6 +2968,15 @@ static int usb_xhci_initfn(struct PCIDevice *dev)
     memory_region_add_subregion(&xhci->mem, OFF_RUNTIME,  &xhci->mem_runtime);
     memory_region_add_subregion(&xhci->mem, OFF_DOORBELL, &xhci->mem_doorbell);
 
+    for (i = 0; i < xhci->numports; i++) {
+        XHCIPort *port = &xhci->ports[i];
+        uint32_t offset = OFF_OPER + 0x400 + 0x10 * i;
+        port->xhci = xhci;
+        memory_region_init_io(&port->mem, &xhci_port_ops, port,
+                              port->name, 0x10);
+        memory_region_add_subregion(&xhci->mem, offset, &port->mem);
+    }
+
     pci_register_bar(&xhci->pci_dev, 0,
                      PCI_BASE_ADDRESS_SPACE_MEMORY|PCI_BASE_ADDRESS_MEM_TYPE_64,
                      &xhci->mem);
commit ccaf87a085c748910efddbcfb5077f6a67cc354a
Author: Gerd Hoffmann <kraxel at redhat.com>
Date:   Tue Sep 18 17:30:52 2012 +0200

    xhci: route string & usb hub support
    
    Parse route string in slot contexts and
    support devices connected via hub.

diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c
index 1414826..8c0155b 100644
--- a/hw/usb/hcd-xhci.c
+++ b/hw/usb/hcd-xhci.c
@@ -363,7 +363,7 @@ typedef struct XHCIEPContext {
 typedef struct XHCISlot {
     bool enabled;
     dma_addr_t ctx;
-    unsigned int port;
+    USBPort *uport;
     unsigned int devaddr;
     XHCIEPContext * eps[31];
 } XHCISlot;
@@ -1230,7 +1230,7 @@ static TRBCCode xhci_reset_ep(XHCIState *xhci, unsigned int slotid,
         ep |= 0x80;
     }
 
-    dev = xhci->ports[xhci->slots[slotid-1].port-1].uport->dev;
+    dev = xhci->slots[slotid-1].uport->dev;
     if (!dev) {
         return CC_USB_TRANSACTION_ERROR;
     }
@@ -1412,18 +1412,9 @@ static void xhci_stall_ep(XHCITransfer *xfer)
 static int xhci_submit(XHCIState *xhci, XHCITransfer *xfer,
                        XHCIEPContext *epctx);
 
-static USBDevice *xhci_find_device(XHCIPort *port, uint8_t addr)
-{
-    if (!(port->portsc & PORTSC_PED)) {
-        return NULL;
-    }
-    return usb_find_device(port->uport, addr);
-}
-
 static int xhci_setup_packet(XHCITransfer *xfer)
 {
     XHCIState *xhci = xfer->xhci;
-    XHCIPort *port;
     USBDevice *dev;
     USBEndpoint *ep;
     int dir;
@@ -1434,13 +1425,12 @@ static int xhci_setup_packet(XHCITransfer *xfer)
         ep = xfer->packet.ep;
         dev = ep->dev;
     } else {
-        port = &xhci->ports[xhci->slots[xfer->slotid-1].port-1];
-        dev = xhci_find_device(port, xhci->slots[xfer->slotid-1].devaddr);
-        if (!dev) {
-            fprintf(stderr, "xhci: slot %d port %d has no device\n",
-                    xfer->slotid, xhci->slots[xfer->slotid-1].port);
+        if (!xhci->slots[xfer->slotid-1].uport) {
+            fprintf(stderr, "xhci: slot %d has no device\n",
+                    xfer->slotid);
             return -1;
         }
+        dev = xhci->slots[xfer->slotid-1].uport->dev;
         ep = usb_ep_get(dev, dir, xfer->epid >> 1);
     }
 
@@ -1772,7 +1762,7 @@ static TRBCCode xhci_enable_slot(XHCIState *xhci, unsigned int slotid)
     trace_usb_xhci_slot_enable(slotid);
     assert(slotid >= 1 && slotid <= MAXSLOTS);
     xhci->slots[slotid-1].enabled = 1;
-    xhci->slots[slotid-1].port = 0;
+    xhci->slots[slotid-1].uport = NULL;
     memset(xhci->slots[slotid-1].eps, 0, sizeof(XHCIEPContext*)*31);
 
     return CC_SUCCESS;
@@ -1795,17 +1785,42 @@ static TRBCCode xhci_disable_slot(XHCIState *xhci, unsigned int slotid)
     return CC_SUCCESS;
 }
 
+static USBPort *xhci_lookup_uport(XHCIState *xhci, uint32_t *slot_ctx)
+{
+    USBPort *uport;
+    char path[32];
+    int i, pos, port;
+
+    port = (slot_ctx[1]>>16) & 0xFF;
+    port = xhci->ports[port-1].uport->index+1;
+    pos = snprintf(path, sizeof(path), "%d", port);
+    for (i = 0; i < 5; i++) {
+        port = (slot_ctx[0] >> 4*i) & 0x0f;
+        if (!port) {
+            break;
+        }
+        pos += snprintf(path + pos, sizeof(path) - pos, ".%d", port);
+    }
+
+    QTAILQ_FOREACH(uport, &xhci->bus.used, next) {
+        if (strcmp(uport->path, path) == 0) {
+            return uport;
+        }
+    }
+    return NULL;
+}
+
 static TRBCCode xhci_address_slot(XHCIState *xhci, unsigned int slotid,
                                   uint64_t pictx, bool bsr)
 {
     XHCISlot *slot;
+    USBPort *uport;
     USBDevice *dev;
     dma_addr_t ictx, octx, dcbaap;
     uint64_t poctx;
     uint32_t ictl_ctx[2];
     uint32_t slot_ctx[4];
     uint32_t ep0_ctx[5];
-    unsigned int port;
     int i;
     TRBCCode res;
 
@@ -1837,27 +1852,28 @@ static TRBCCode xhci_address_slot(XHCIState *xhci, unsigned int slotid,
     DPRINTF("xhci: input ep0 context: %08x %08x %08x %08x %08x\n",
             ep0_ctx[0], ep0_ctx[1], ep0_ctx[2], ep0_ctx[3], ep0_ctx[4]);
 
-    port = (slot_ctx[1]>>16) & 0xFF;
-    dev = xhci->ports[port-1].uport->dev;
-
-    if (port < 1 || port > xhci->numports) {
-        fprintf(stderr, "xhci: bad port %d\n", port);
+    uport = xhci_lookup_uport(xhci, slot_ctx);
+    if (uport == NULL) {
+        fprintf(stderr, "xhci: port not found\n");
         return CC_TRB_ERROR;
-    } else if (!dev) {
-        fprintf(stderr, "xhci: port %d not connected\n", port);
+    }
+
+    dev = uport->dev;
+    if (!dev) {
+        fprintf(stderr, "xhci: port %s not connected\n", uport->path);
         return CC_USB_TRANSACTION_ERROR;
     }
 
     for (i = 0; i < MAXSLOTS; i++) {
-        if (xhci->slots[i].port == port) {
-            fprintf(stderr, "xhci: port %d already assigned to slot %d\n",
-                    port, i+1);
+        if (xhci->slots[i].uport == uport) {
+            fprintf(stderr, "xhci: port %s already assigned to slot %d\n",
+                    uport->path, i+1);
             return CC_TRB_ERROR;
         }
     }
 
     slot = &xhci->slots[slotid-1];
-    slot->port = port;
+    slot->uport = uport;
     slot->ctx = octx;
 
     if (bsr) {
@@ -2821,9 +2837,17 @@ static void xhci_complete(USBPort *port, USBPacket *packet)
     xhci_kick_ep(xfer->xhci, xfer->slotid, xfer->epid);
 }
 
-static void xhci_child_detach(USBPort *port, USBDevice *child)
+static void xhci_child_detach(USBPort *uport, USBDevice *child)
 {
-    FIXME();
+    USBBus *bus = usb_bus_from_device(child);
+    XHCIState *xhci = container_of(bus, XHCIState, bus);
+    int i;
+
+    for (i = 0; i < MAXSLOTS; i++) {
+        if (xhci->slots[i].uport == uport) {
+            xhci->slots[i].uport = NULL;
+        }
+    }
 }
 
 static USBPortOps xhci_port_ops = {
commit d95e74eaed8b74b0c75ab343e9cb826b1f5c9007
Author: Gerd Hoffmann <kraxel at redhat.com>
Date:   Thu Sep 20 09:55:49 2012 +0200

    xhci: tweak limits
    
    Set maxports to 15.  This is what the usb3 route string can handle.
    
    Set maxslots to 64.  This is more than the number of root ports we
    can have, but with additional hubs you can end up with more devices.
    
    Set maxintrs (aka msi vectors) to 16.  Should be enougth, especially
    considering that vectors are a limited ressource.  Linux guests use
    only three at the moment.
    
    Signed-off-by: Gerd Hoffmann <kraxel at redhat.com>

diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c
index e0ca690..1414826 100644
--- a/hw/usb/hcd-xhci.c
+++ b/hw/usb/hcd-xhci.c
@@ -37,12 +37,12 @@
 #define FIXME() do { fprintf(stderr, "FIXME %s:%d\n", \
                              __func__, __LINE__); abort(); } while (0)
 
-#define MAXPORTS_2 8
-#define MAXPORTS_3 8
+#define MAXPORTS_2 15
+#define MAXPORTS_3 15
 
 #define MAXPORTS (MAXPORTS_2+MAXPORTS_3)
-#define MAXSLOTS MAXPORTS
-#define MAXINTRS MAXPORTS
+#define MAXSLOTS 64
+#define MAXINTRS 16
 
 #define TD_QUEUE 24
 
commit a2879190ab08b2b75d65b576fad7ff95d7d7d641
Author: Gerd Hoffmann <kraxel at redhat.com>
Date:   Thu Sep 13 10:53:23 2012 +0200

    compat: turn off msi/msix on xhci for old machine types
    
    Signed-off-by: Gerd Hoffmann <kraxel at redhat.com>

diff --git a/hw/pc_piix.c b/hw/pc_piix.c
index 5a0796b..afd8361 100644
--- a/hw/pc_piix.c
+++ b/hw/pc_piix.c
@@ -358,14 +358,30 @@ static QEMUMachine pc_machine_v1_3 = {
     .is_default = 1,
 };
 
+#define PC_COMPAT_1_2 \
+        {\
+            .driver   = "nec-usb-xhci",\
+            .property = "msi",\
+            .value    = "off",\
+        },{\
+            .driver   = "nec-usb-xhci",\
+            .property = "msix",\
+            .value    = "off",\
+        }
+
 static QEMUMachine pc_machine_v1_2 = {
     .name = "pc-1.2",
     .desc = "Standard PC",
     .init = pc_init_pci,
     .max_cpus = 255,
+    .compat_props = (GlobalProperty[]) {
+        PC_COMPAT_1_2,
+        { /* end of list */ }
+    },
 };
 
 #define PC_COMPAT_1_1 \
+        PC_COMPAT_1_2,\
         {\
             .driver   = "virtio-scsi-pci",\
             .property = "hotplug",\
commit 356d83725675c0140db27b24afed3a2c0c7d9702
Author: Gerd Hoffmann <kraxel at redhat.com>
Date:   Thu Sep 13 10:50:31 2012 +0200

    add pc-1.3 machine type
    
    Signed-off-by: Gerd Hoffmann <kraxel at redhat.com>

diff --git a/hw/pc_piix.c b/hw/pc_piix.c
index 88ff041..5a0796b 100644
--- a/hw/pc_piix.c
+++ b/hw/pc_piix.c
@@ -349,8 +349,8 @@ static void pc_xen_hvm_init(ram_addr_t ram_size,
 }
 #endif
 
-static QEMUMachine pc_machine_v1_2 = {
-    .name = "pc-1.2",
+static QEMUMachine pc_machine_v1_3 = {
+    .name = "pc-1.3",
     .alias = "pc",
     .desc = "Standard PC",
     .init = pc_init_pci,
@@ -358,6 +358,13 @@ static QEMUMachine pc_machine_v1_2 = {
     .is_default = 1,
 };
 
+static QEMUMachine pc_machine_v1_2 = {
+    .name = "pc-1.2",
+    .desc = "Standard PC",
+    .init = pc_init_pci,
+    .max_cpus = 255,
+};
+
 #define PC_COMPAT_1_1 \
         {\
             .driver   = "virtio-scsi-pci",\
@@ -655,6 +662,7 @@ static QEMUMachine xenfv_machine = {
 
 static void pc_machine_init(void)
 {
+    qemu_register_machine(&pc_machine_v1_3);
     qemu_register_machine(&pc_machine_v1_2);
     qemu_register_machine(&pc_machine_v1_1);
     qemu_register_machine(&pc_machine_v1_0);


More information about the Spice-commits mailing list