[Spice-commits] 103 commits - .gitignore Makefile Makefile.objs QMP/qemu-ga-client QMP/qmp-events.txt block-migration.c block.c block/backup.c block/blkverify.c block/commit.c block/cow.c block/iscsi.c block/mirror.c block/qapi.c block/qcow.c block/qcow2.c block/qed.c block/raw-posix.c block/raw-win32.c block/raw_bsd.c block/sheepdog.c block/snapshot.c block/stream.c block/vdi.c block/vmdk.c block/vvfat.c blockdev-nbd.c blockdev.c blockjob.c configure gdb-xml/aarch64-core.xml hmp.c hw/block hw/bt hw/display hw/i386 hw/net hw/pci hw/usb include/block include/exec include/hw include/net include/qapi include/qemu linux-user/aarch64 linux-user/cpu-uname.c linux-user/elfload.c linux-user/main.c linux-user/qemu.h linux-user/signal.c linux-user/syscall.c linux-user/syscall_defs.h nbd.c net/hub.c net/net.c net/tap-bsd.c qapi-schema.json qemu-img-cmds.hx qemu-img.c qemu-img.texi qemu-io-cmds.c qemu-io.c qemu-options.hx qga/Makefile.objs qga/commands-win32.c qga/main.c qga/vss-win32 qga/vss-w in32.c qga/vss-win32.h qmp-commands.hx rules.mak scripts/checkpatch.pl scripts/extract-vsssdk-headers scripts/qapi-types.py scripts/qapi.py target-arm/Makefile.objs target-arm/cpu-qom.h target-arm/cpu.c target-arm/cpu.h target-arm/cpu64.c target-arm/gdbstub64.c target-arm/helper.c target-arm/machine.c target-arm/translate-a64.c target-arm/translate.c target-arm/translate.h tests/Makefile tests/qemu-iotests tests/test-aio.c tests/test-throttle.c trace-events ui/spice-core.c util/Makefile.objs util/error.c util/throttle.c vl.c xen-all.c xen-stub.c

Gerd Hoffmann kraxel at kemper.freedesktop.org
Thu Sep 12 01:04:19 PDT 2013


 .gitignore                         |    1 
 Makefile                           |    3 
 Makefile.objs                      |    2 
 QMP/qemu-ga-client                 |    4 
 QMP/qmp-events.txt                 |   22 +
 block-migration.c                  |    4 
 block.c                            |  556 +++++++++++++-------------------
 block/backup.c                     |    6 
 block/blkverify.c                  |    4 
 block/commit.c                     |    6 
 block/cow.c                        |   93 +++--
 block/iscsi.c                      |   16 
 block/mirror.c                     |    6 
 block/qapi.c                       |   50 ++
 block/qcow.c                       |   15 
 block/qcow2.c                      |   26 +
 block/qed.c                        |   41 +-
 block/raw-posix.c                  |   24 -
 block/raw-win32.c                  |   36 +-
 block/raw_bsd.c                    |   10 
 block/sheepdog.c                   |   20 -
 block/snapshot.c                   |    2 
 block/stream.c                     |   12 
 block/vdi.c                        |   17 -
 block/vmdk.c                       |   33 +
 block/vvfat.c                      |   21 -
 blockdev-nbd.c                     |   10 
 blockdev.c                         |  242 ++++++++------
 blockjob.c                         |    1 
 configure                          |  103 +++++-
 gdb-xml/aarch64-core.xml           |   46 ++
 hmp.c                              |   38 ++
 hw/block/dataplane/virtio-blk.c    |    9 
 hw/block/xen_disk.c                |   13 
 hw/bt/core.c                       |   23 +
 hw/bt/hci.c                        |   48 ++
 hw/display/pl110.c                 |   18 -
 hw/display/qxl-render.c            |   15 
 hw/display/qxl.c                   |    5 
 hw/i386/pc_piix.c                  |   18 -
 hw/i386/pc_q35.c                   |   20 +
 hw/net/e1000.c                     |  131 +++++++
 hw/net/ne2000.c                    |    2 
 hw/net/vmxnet3.h                   |    2 
 hw/pci/pci.c                       |    2 
 hw/usb/Makefile.objs               |    3 
 hw/usb/bus.c                       |    4 
 hw/usb/dev-bluetooth.c             |   10 
 hw/usb/hcd-ehci.c                  |    7 
 hw/usb/hcd-ehci.h                  |    1 
 include/block/block.h              |   38 +-
 include/block/block_int.h          |   35 --
 include/exec/user/abitypes.h       |    4 
 include/hw/bt.h                    |    3 
 include/hw/i386/pc.h               |    8 
 include/hw/usb.h                   |    3 
 include/hw/xen/xen.h               |    4 
 include/net/net.h                  |    2 
 include/qapi/error.h               |   13 
 include/qemu/throttle.h            |  110 ++++++
 linux-user/aarch64/syscall.h       |    9 
 linux-user/aarch64/syscall_nr.h    |  323 +++++++++++++++++++
 linux-user/aarch64/target_cpu.h    |   35 ++
 linux-user/aarch64/target_signal.h |   29 +
 linux-user/aarch64/termbits.h      |  220 +++++++++++++
 linux-user/cpu-uname.c             |    3 
 linux-user/elfload.c               |   15 
 linux-user/main.c                  |  100 +++++
 linux-user/qemu.h                  |    5 
 linux-user/signal.c                |  260 +++++++++++++++
 linux-user/syscall.c               |   67 +++
 linux-user/syscall_defs.h          |   28 +
 nbd.c                              |    5 
 net/hub.c                          |    2 
 net/net.c                          |   14 
 net/tap-bsd.c                      |   11 
 qapi-schema.json                   |   69 +++-
 qemu-img-cmds.hx                   |   10 
 qemu-img.c                         |  317 +++++++++++++++---
 qemu-img.texi                      |   70 ++++
 qemu-io-cmds.c                     |    4 
 qemu-io.c                          |    6 
 qemu-options.hx                    |    6 
 qga/Makefile.objs                  |    3 
 qga/commands-win32.c               |   82 ++++
 qga/main.c                         |   10 
 qga/vss-win32.c                    |  166 +++++++++
 qga/vss-win32.h                    |   27 +
 qga/vss-win32/Makefile.objs        |   23 +
 qga/vss-win32/install.cpp          |  458 +++++++++++++++++++++++++++
 qga/vss-win32/provider.cpp         |  523 ++++++++++++++++++++++++++++++
 qga/vss-win32/qga-vss.def          |   13 
 qga/vss-win32/qga-vss.idl          |   20 +
 qga/vss-win32/qga-vss.tlb          |binary
 qga/vss-win32/requester.cpp        |  507 +++++++++++++++++++++++++++++
 qga/vss-win32/requester.h          |   42 ++
 qga/vss-win32/vss-common.h         |  129 +++++++
 qmp-commands.hx                    |   32 +
 rules.mak                          |    9 
 scripts/checkpatch.pl              |   34 +-
 scripts/extract-vsssdk-headers     |   35 ++
 scripts/qapi-types.py              |    5 
 scripts/qapi.py                    |   12 
 target-arm/Makefile.objs           |    1 
 target-arm/cpu-qom.h               |   19 +
 target-arm/cpu.c                   |   23 +
 target-arm/cpu.h                   |  170 +++++++---
 target-arm/cpu64.c                 |  118 ++++++
 target-arm/gdbstub64.c             |   73 ++++
 target-arm/helper.c                |   36 ++
 target-arm/machine.c               |    8 
 target-arm/translate-a64.c         |  139 ++++++++
 target-arm/translate.c             |  450 ++++++++++++++++----------
 target-arm/translate.h             |   49 ++
 tests/Makefile                     |    2 
 tests/qemu-iotests/026.out         |   28 -
 tests/qemu-iotests/026.out.nocache |  626 +++++++++++++++++++++++++++++++++++++
 tests/qemu-iotests/039.out         |    4 
 tests/qemu-iotests/063             |   97 +++++
 tests/qemu-iotests/063.out         |   10 
 tests/qemu-iotests/check           |  240 +++++++-------
 tests/qemu-iotests/common          |  422 ++++++++++++------------
 tests/qemu-iotests/common.config   |    6 
 tests/qemu-iotests/common.filter   |   42 +-
 tests/qemu-iotests/common.pattern  |    4 
 tests/qemu-iotests/common.rc       |   92 ++---
 tests/qemu-iotests/group           |    1 
 tests/test-aio.c                   |   11 
 tests/test-throttle.c              |  481 ++++++++++++++++++++++++++++
 trace-events                       |    2 
 ui/spice-core.c                    |   28 -
 util/Makefile.objs                 |    1 
 util/error.c                       |   35 ++
 util/throttle.c                    |  396 +++++++++++++++++++++++
 vl.c                               |   82 ----
 xen-all.c                          |    7 
 xen-stub.c                         |    2 
 137 files changed, 7937 insertions(+), 1527 deletions(-)

New commits:
commit 2d1fe1873a984d1c2c89ffa3d12949cafc718551
Merge: 6f52e51 6a49fa9
Author: Anthony Liguori <anthony at codemonkey.ws>
Date:   Wed Sep 11 14:46:52 2013 -0500

    Merge remote-tracking branch 'pmaydell/tags/pull-target-arm-20130910' into staging
    
    ARM queue:
     * aarch64 preparation patchset (excluding the defconfigs, so this
       doesn't actually enable the new targets yet)
     * minor bugfixes and cleanups
     * disable "-cpu any" in system emulation mode
     * fix ARMv7M stack alignment on reset
    
    # gpg: Signature made Tue 10 Sep 2013 01:46:11 PM CDT using RSA key ID 14360CDE
    # gpg: Can't check signature: public key not found
    
    # By Alexander Graf (13) and others
    # Via Peter Maydell
    * pmaydell/tags/pull-target-arm-20130910: (28 commits)
      configure: Add handling code for AArch64 targets
      linux-user: Add AArch64 support
      linux-user: Allow targets to specify a minimum uname release
      linux-user: Add AArch64 termbits.h definitions
      linux-user: Implement cpu_set_tls() and cpu_clone_regs() for AArch64
      linux-user: Make sure NWFPE code is 32 bit ARM only
      linux-user: Add signal handling for AArch64
      linux-user: Fix up AArch64 syscall handlers
      linux-user: Add syscall number definitions for AArch64
      linux-user: Add cpu loop for AArch64
      linux-user: Don't treat AArch64 cpu names specially
      target-arm: Add AArch64 gdbstub support
      target-arm: Add AArch64 translation stub
      target-arm: Prepare translation for AArch64 code
      target-arm: Disable 32 bit CPUs in 64 bit linux-user builds
      target-arm: Add new AArch64CPUInfo base class and subclasses
      target-arm: Pass DisasContext* to gen_set_pc_im()
      target-arm: Fix target_ulong/uint32_t confusions
      target-arm: Export cpu_env
      target-arm: Extract the disas struct to a header file
      ...
    
    Message-id: 1378839142-7726-1-git-send-email-peter.maydell at linaro.org
    Signed-off-by: Anthony Liguori <anthony at codemonkey.ws>

commit 6f52e51bb7706562634e5dd2755a1e9b8a5037cc
Merge: d985bd4 02dc4bf
Author: Anthony Liguori <anthony at codemonkey.ws>
Date:   Wed Sep 11 14:46:44 2013 -0500

    Merge remote-tracking branch 'luiz/queue/qmp' into staging
    
    # By Cole Robinson
    # Via Luiz Capitulino
    * luiz/queue/qmp:
      qapi-types.py: Fix enum struct sizes on i686
    
    Message-id: 1378822364-13887-1-git-send-email-lcapitulino at redhat.com
    Signed-off-by: Anthony Liguori <anthony at codemonkey.ws>

commit d985bd4d55555a06c4239eadba4e367880e938ba
Merge: a640f07 c58c7b9
Author: Anthony Liguori <anthony at codemonkey.ws>
Date:   Wed Sep 11 14:46:26 2013 -0500

    Merge remote-tracking branch 'spice/spice.v73' into staging
    
    # By Gerd Hoffmann (2) and Christophe Fergeau (1)
    # Via Gerd Hoffmann
    * spice/spice.v73:
      qxl: fix local renderer
      qxl: trace io port name
      spice-core: Use g_strdup_printf instead of snprintf
    
    Message-id: 1378807572-27902-1-git-send-email-kraxel at redhat.com
    Signed-off-by: Anthony Liguori <anthony at codemonkey.ws>

commit a640f07c0d03bfa3031af1fc0a32b0d779917d17
Merge: f69f0bc adbecc8
Author: Anthony Liguori <anthony at codemonkey.ws>
Date:   Wed Sep 11 14:46:21 2013 -0500

    Merge remote-tracking branch 'kraxel/usb.89' into staging
    
    # By Gerd Hoffmann (2) and Miroslav Rezanina (2)
    # Via Gerd Hoffmann
    * kraxel/usb.89:
      ehci: save device pointer in EHCIState
      Remove dev-bluetooth.c dependency from vl.c
      Preparation for usb-bt-dongle conditional build
      usb: sanity check setup_index+setup_len in post_load
    
    Message-id: 1378806073-25197-1-git-send-email-kraxel at redhat.com
    Signed-off-by: Anthony Liguori <anthony at codemonkey.ws>

commit f69f0bcac951f3c3089246695874b84ea8967936
Merge: 97fdb94 e2682db
Author: Anthony Liguori <anthony at codemonkey.ws>
Date:   Wed Sep 11 14:46:08 2013 -0500

    Merge remote-tracking branch 'mdroth/qga-pull-2013-9-9' into staging
    
    # By Tomoki Sekiyama (10) and Paul Burton (1)
    # Via Michael Roth
    * mdroth/qga-pull-2013-9-9:
      QMP/qemu-ga-client: Make timeout longer for guest-fsfreeze-freeze command
      qemu-ga: Install Windows VSS provider on `qemu-ga -s install'
      qemu-ga: Call Windows VSS requester in fsfreeze command handler
      qemu-ga: Add Windows VSS provider and requester as DLL
      error: Add error_set_win32 and error_setg_win32
      qemu-ga: Add configure options to specify path to Windows/VSS SDK
      Add a script to extract VSS SDK headers on POSIX system
      checkpatch.pl: Check .cpp files
      Add c++ keywords to QAPI helper script
      configure: Support configuring C++ compiler
      mips_malta: support up to 2GiB RAM
    
    Message-id: 1378755701-2051-1-git-send-email-mdroth at linux.vnet.ibm.com
    Signed-off-by: Anthony Liguori <anthony at codemonkey.ws>

commit 97fdb9410bb5398fd33f51a37e637d697ace9f73
Merge: 964737e 254c128
Author: Anthony Liguori <anthony at codemonkey.ws>
Date:   Wed Sep 11 14:45:57 2013 -0500

    Merge remote-tracking branch 'sstabellini/xen-2013-09-09' into staging
    
    # By Anthony PERARD
    # Via Stefano Stabellini
    * sstabellini/xen-2013-09-09:
      pc_q35: Initialize Xen.
      pc: Initializing ram_memory under Xen.
    
    Message-id: alpine.DEB.2.02.1309091718030.6397 at kaball.uk.xensource.com
    Signed-off-by: Anthony Liguori <anthony at codemonkey.ws>

commit 964737ea195de1560f3bcf55b8b6d4f7d0d4a619
Merge: ce2b694 8f94b07
Author: Anthony Liguori <anthony at codemonkey.ws>
Date:   Wed Sep 11 14:45:37 2013 -0500

    Merge remote-tracking branch 'stefanha/block' into staging
    
    # By Paolo Bonzini (21) and others
    # Via Stefan Hajnoczi
    * stefanha/block: (42 commits)
      qemu-iotests: Fixed test case 026
      qemu-iotests: Whitespace cleanup
      dataplane: Fix startup race.
      block: look for zero blocks in bs->file
      block: add default get_block_status implementation for protocols
      raw-posix: report unwritten extents as zero
      raw-posix: return get_block_status data and flags
      docs, qapi: document qemu-img map
      qemu-img: add a "map" subcommand
      block: return BDRV_BLOCK_ZERO past end of backing file
      block: use bdrv_has_zero_init to return BDRV_BLOCK_ZERO
      block: return get_block_status data and flags for formats
      block: define get_block_status return value
      block: introduce bdrv_get_block_status API
      block: make bdrv_has_zero_init return false for copy-on-write-images
      qemu-img: always probe the input image for allocated sectors
      block: expect errors from bdrv_co_is_allocated
      block: remove bdrv_is_allocated_above/bdrv_co_is_allocated_above distinction
      block: do not use ->total_sectors in bdrv_co_is_allocated
      block: make bdrv_co_is_allocated static
      ...
    
    Message-id: 1378481953-23099-1-git-send-email-stefanha at redhat.com
    Signed-off-by: Anthony Liguori <anthony at codemonkey.ws>

commit ce2b69417caae3731fb50f67854afa006f624a2d
Merge: df71316 45d883d
Author: Anthony Liguori <anthony at codemonkey.ws>
Date:   Wed Sep 11 14:45:18 2013 -0500

    Merge remote-tracking branch 'stefanha/net' into staging
    
    # By Brad Smith (2) and others
    # Via Stefan Hajnoczi
    * stefanha/net:
      ne2000: mark I/O as LITTLE_ENDIAN
      vmxnet3: Eliminate __packed redefined warning
      e1000: add interrupt mitigation support
      net: Rename send_queue to incoming_queue
      tap: Use numbered tap/tun devices on all *BSD OS's
    
    Message-id: 1378481624-20964-1-git-send-email-stefanha at redhat.com
    Signed-off-by: Anthony Liguori <anthony at codemonkey.ws>

commit 6a49fa95c98cd155f7aaf48e5c6fa6bb6adea862
Author: Alexander Graf <agraf at suse.de>
Date:   Tue Sep 3 20:12:22 2013 +0100

    configure: Add handling code for AArch64 targets
    
    Add the necessary code to configure to handle AArch64 as a target
    CPU (we already have some code for supporting it as host). Note
    that this doesn't enable the AArch64 targets yet.
    
    Signed-off-by: Alexander Graf <agraf at suse.de>
    Signed-off-by: John Rigby <john.rigby at linaro.org>
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>
    Message-id: 1378235544-22290-23-git-send-email-peter.maydell at linaro.org
    Message-id: 1368505980-17151-12-git-send-email-john.rigby at linaro.org
    [PMM:
     * don't need to set TARGET_ABI_DIR to aarch64 as that is the default
     * don't build nwfpe -- this is 32 bit legacy only
     * rewrite commit message
     * add aarch64 to the list of "fdt required" targets
    ]
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>

diff --git a/configure b/configure
index e989609..18adc76 100755
--- a/configure
+++ b/configure
@@ -2522,7 +2522,7 @@ fi
 fdt_required=no
 for target in $target_list; do
   case $target in
-    arm*-softmmu|ppc*-softmmu|microblaze*-softmmu)
+    aarch64*-softmmu|arm*-softmmu|ppc*-softmmu|microblaze*-softmmu)
       fdt_required=yes
     ;;
   esac
@@ -4272,6 +4272,11 @@ case "$target_name" in
     bflt="yes"
     gdb_xml_files="arm-core.xml arm-vfp.xml arm-vfp3.xml arm-neon.xml"
   ;;
+  aarch64)
+    TARGET_BASE_ARCH=arm
+    bflt="yes"
+    gdb_xml_files="aarch64-core.xml"
+  ;;
   cris)
   ;;
   lm32)
commit 99033caee6e9b339c89a368b5ed1f73ef17924a9
Author: Alexander Graf <agraf at suse.de>
Date:   Tue Sep 3 20:12:21 2013 +0100

    linux-user: Add AArch64 support
    
    This patch adds support for AArch64 in all the small corners of
    linux-user (primarily in image loading and startup code).
    
    Signed-off-by: Alexander Graf <agraf at suse.de>
    Signed-off-by: John Rigby <john.rigby at linaro.org>
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>
    Message-id: 1378235544-22290-22-git-send-email-peter.maydell at linaro.org
    Message-id: 1368505980-17151-11-git-send-email-john.rigby at linaro.org
    [PMM:
     * removed some unnecessary #defines from syscall.h
     * catch attempts to use a 32 bit only cpu with aarch64-linux-user
     * termios stuff moved into its own patch
     * we specify our minimum uname version here now
    ]
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>

diff --git a/linux-user/aarch64/syscall.h b/linux-user/aarch64/syscall.h
new file mode 100644
index 0000000..aef419e
--- /dev/null
+++ b/linux-user/aarch64/syscall.h
@@ -0,0 +1,9 @@
+struct target_pt_regs {
+    uint64_t        regs[31];
+    uint64_t        sp;
+    uint64_t        pc;
+    uint64_t        pstate;
+};
+
+#define UNAME_MACHINE "aarch64"
+#define UNAME_MINIMUM_RELEASE "3.8.0"
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index 72d9270..8dd424d 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -269,16 +269,26 @@ static void elf_core_copy_regs(target_elf_gregset_t *regs, const CPUX86State *en
 
 #define ELF_START_MMAP 0x80000000
 
-#define elf_check_arch(x) ( (x) == EM_ARM )
+#define elf_check_arch(x) ((x) == ELF_MACHINE)
 
+#define ELF_ARCH        ELF_MACHINE
+
+#ifdef TARGET_AARCH64
+#define ELF_CLASS       ELFCLASS64
+#else
 #define ELF_CLASS       ELFCLASS32
-#define ELF_ARCH        EM_ARM
+#endif
 
 static inline void init_thread(struct target_pt_regs *regs,
                                struct image_info *infop)
 {
     abi_long stack = infop->start_stack;
     memset(regs, 0, sizeof(*regs));
+
+#ifdef TARGET_AARCH64
+    regs->pc = infop->entry & ~0x3ULL;
+    regs->sp = stack;
+#else
     regs->ARM_cpsr = 0x10;
     if (infop->entry & 1)
         regs->ARM_cpsr |= CPSR_T;
@@ -292,6 +302,7 @@ static inline void init_thread(struct target_pt_regs *regs,
     /* For uClinux PIC binaries.  */
     /* XXX: Linux does this only on ARM with no MMU (do we care ?) */
     regs->ARM_r10 = infop->start_data;
+#endif
 }
 
 #define ELF_NREG    18
diff --git a/linux-user/main.c b/linux-user/main.c
index 8838305..01e3cd4 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -3968,6 +3968,22 @@ int main(int argc, char **argv, char **envp)
     cpu_x86_load_seg(env, R_FS, 0);
     cpu_x86_load_seg(env, R_GS, 0);
 #endif
+#elif defined(TARGET_AARCH64)
+    {
+        int i;
+
+        if (!(arm_feature(env, ARM_FEATURE_AARCH64))) {
+            fprintf(stderr,
+                    "The selected ARM CPU does not support 64 bit mode\n");
+            exit(1);
+        }
+
+        for (i = 0; i < 31; i++) {
+            env->xregs[i] = regs->regs[i];
+        }
+        env->pc = regs->pc;
+        env->xregs[31] = regs->sp;
+    }
 #elif defined(TARGET_ARM)
     {
         int i;
commit 4a24a758101ff726c9bd3b867e12d5580c793af0
Author: Peter Maydell <peter.maydell at linaro.org>
Date:   Tue Sep 3 20:12:20 2013 +0100

    linux-user: Allow targets to specify a minimum uname release
    
    For newer target architectures, glibc can be picky about the kernel
    version: for example, it will not run on an aarch64 system unless
    the kernel reports itself as at least 3.8.0. Accommodate this by
    enhancing the existing support for faking the kernel version so
    that each target can optionally specify a minimum version: if
    the user doesn't force a specific fake version then we will override
    with the minimum required version only if the real host kernel
    version is insufficient.
    
    Use this facility to let aarch64 report a minimum of 3.8.0.
    
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>
    Message-id: 1378235544-22290-21-git-send-email-peter.maydell at linaro.org

diff --git a/linux-user/main.c b/linux-user/main.c
index b6e434a..8838305 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -3676,6 +3676,8 @@ int main(int argc, char **argv, char **envp)
     /* Scan interp_prefix dir for replacement files. */
     init_paths(interp_prefix);
 
+    init_qemu_uname_release();
+
     if (cpu_model == NULL) {
 #if defined(TARGET_I386)
 #ifdef TARGET_X86_64
diff --git a/linux-user/qemu.h b/linux-user/qemu.h
index 4df4fcb..6ffe5a2 100644
--- a/linux-user/qemu.h
+++ b/linux-user/qemu.h
@@ -197,6 +197,7 @@ extern THREAD CPUState *thread_cpu;
 void cpu_loop(CPUArchState *env);
 char *target_strerror(int err);
 int get_osversion(void);
+void init_qemu_uname_release(void);
 void fork_start(void);
 void fork_end(int child);
 
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index ea04db1..c62d875 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -4863,12 +4863,35 @@ int host_to_target_waitstatus(int status)
     return status;
 }
 
+static int relstr_to_int(const char *s)
+{
+    /* Convert a uname release string like "2.6.18" to an integer
+     * of the form 0x020612. (Beware that 0x020612 is *not* 2.6.12.)
+     */
+    int i, n, tmp;
+
+    tmp = 0;
+    for (i = 0; i < 3; i++) {
+        n = 0;
+        while (*s >= '0' && *s <= '9') {
+            n *= 10;
+            n += *s - '0';
+            s++;
+        }
+        tmp = (tmp << 8) + n;
+        if (*s == '.') {
+            s++;
+        }
+    }
+    return tmp;
+}
+
 int get_osversion(void)
 {
     static int osversion;
     struct new_utsname buf;
     const char *s;
-    int i, n, tmp;
+
     if (osversion)
         return osversion;
     if (qemu_uname_release && *qemu_uname_release) {
@@ -4878,22 +4901,33 @@ int get_osversion(void)
             return 0;
         s = buf.release;
     }
-    tmp = 0;
-    for (i = 0; i < 3; i++) {
-        n = 0;
-        while (*s >= '0' && *s <= '9') {
-            n *= 10;
-            n += *s - '0';
-            s++;
-        }
-        tmp = (tmp << 8) + n;
-        if (*s == '.')
-            s++;
-    }
-    osversion = tmp;
+    osversion = relstr_to_int(s);
     return osversion;
 }
 
+void init_qemu_uname_release(void)
+{
+    /* Initialize qemu_uname_release for later use.
+     * If the host kernel is too old and the user hasn't asked for
+     * a specific fake version number, we might want to fake a minimum
+     * target kernel version.
+     */
+#ifdef UNAME_MINIMUM_RELEASE
+    struct new_utsname buf;
+
+    if (qemu_uname_release && *qemu_uname_release) {
+        return;
+    }
+
+    if (sys_uname(&buf)) {
+        return;
+    }
+
+    if (relstr_to_int(buf.release) < relstr_to_int(UNAME_MINIMUM_RELEASE)) {
+        qemu_uname_release = UNAME_MINIMUM_RELEASE;
+    }
+#endif
+}
 
 static int open_self_maps(void *cpu_env, int fd)
 {
commit af89c7dba52c509bdb72714139aadbe21a133f6e
Author: Alexander Graf <agraf at suse.de>
Date:   Tue Sep 3 20:12:19 2013 +0100

    linux-user: Add AArch64 termbits.h definitions
    
    Add the AArch64 termbits.h with all the target's termios related
    constants and structures.
    
    Signed-off-by: Alexander Graf <agraf at suse.de>
    Signed-off-by: John Rigby <john.rigby at linaro.org>
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>
    Message-id: 1378235544-22290-20-git-send-email-peter.maydell at linaro.org
    [PMM: split out from another patch]
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>

diff --git a/linux-user/aarch64/termbits.h b/linux-user/aarch64/termbits.h
new file mode 100644
index 0000000..b64ba97
--- /dev/null
+++ b/linux-user/aarch64/termbits.h
@@ -0,0 +1,220 @@
+/* from asm/termbits.h */
+/* NOTE: exactly the same as i386 */
+
+#define TARGET_NCCS 19
+
+struct target_termios {
+    unsigned int c_iflag;               /* input mode flags */
+    unsigned int c_oflag;               /* output mode flags */
+    unsigned int c_cflag;               /* control mode flags */
+    unsigned int c_lflag;               /* local mode flags */
+    unsigned char c_line;                    /* line discipline */
+    unsigned char c_cc[TARGET_NCCS];                /* control characters */
+};
+
+/* c_iflag bits */
+#define TARGET_IGNBRK  0000001
+#define TARGET_BRKINT  0000002
+#define TARGET_IGNPAR  0000004
+#define TARGET_PARMRK  0000010
+#define TARGET_INPCK   0000020
+#define TARGET_ISTRIP  0000040
+#define TARGET_INLCR   0000100
+#define TARGET_IGNCR   0000200
+#define TARGET_ICRNL   0000400
+#define TARGET_IUCLC   0001000
+#define TARGET_IXON    0002000
+#define TARGET_IXANY   0004000
+#define TARGET_IXOFF   0010000
+#define TARGET_IMAXBEL 0020000
+#define TARGET_IUTF8   0040000
+
+/* c_oflag bits */
+#define TARGET_OPOST   0000001
+#define TARGET_OLCUC   0000002
+#define TARGET_ONLCR   0000004
+#define TARGET_OCRNL   0000010
+#define TARGET_ONOCR   0000020
+#define TARGET_ONLRET  0000040
+#define TARGET_OFILL   0000100
+#define TARGET_OFDEL   0000200
+#define TARGET_NLDLY   0000400
+#define   TARGET_NL0   0000000
+#define   TARGET_NL1   0000400
+#define TARGET_CRDLY   0003000
+#define   TARGET_CR0   0000000
+#define   TARGET_CR1   0001000
+#define   TARGET_CR2   0002000
+#define   TARGET_CR3   0003000
+#define TARGET_TABDLY  0014000
+#define   TARGET_TAB0  0000000
+#define   TARGET_TAB1  0004000
+#define   TARGET_TAB2  0010000
+#define   TARGET_TAB3  0014000
+#define   TARGET_XTABS 0014000
+#define TARGET_BSDLY   0020000
+#define   TARGET_BS0   0000000
+#define   TARGET_BS1   0020000
+#define TARGET_VTDLY   0040000
+#define   TARGET_VT0   0000000
+#define   TARGET_VT1   0040000
+#define TARGET_FFDLY   0100000
+#define   TARGET_FF0   0000000
+#define   TARGET_FF1   0100000
+
+/* c_cflag bit meaning */
+#define TARGET_CBAUD   0010017
+#define  TARGET_B0     0000000         /* hang up */
+#define  TARGET_B50    0000001
+#define  TARGET_B75    0000002
+#define  TARGET_B110   0000003
+#define  TARGET_B134   0000004
+#define  TARGET_B150   0000005
+#define  TARGET_B200   0000006
+#define  TARGET_B300   0000007
+#define  TARGET_B600   0000010
+#define  TARGET_B1200  0000011
+#define  TARGET_B1800  0000012
+#define  TARGET_B2400  0000013
+#define  TARGET_B4800  0000014
+#define  TARGET_B9600  0000015
+#define  TARGET_B19200 0000016
+#define  TARGET_B38400 0000017
+#define TARGET_EXTA B19200
+#define TARGET_EXTB B38400
+#define TARGET_CSIZE   0000060
+#define   TARGET_CS5   0000000
+#define   TARGET_CS6   0000020
+#define   TARGET_CS7   0000040
+#define   TARGET_CS8   0000060
+#define TARGET_CSTOPB  0000100
+#define TARGET_CREAD   0000200
+#define TARGET_PARENB  0000400
+#define TARGET_PARODD  0001000
+#define TARGET_HUPCL   0002000
+#define TARGET_CLOCAL  0004000
+#define TARGET_CBAUDEX 0010000
+#define  TARGET_B57600  0010001
+#define  TARGET_B115200 0010002
+#define  TARGET_B230400 0010003
+#define  TARGET_B460800 0010004
+#define TARGET_CIBAUD    002003600000  /* input baud rate (not used) */
+#define TARGET_CMSPAR    010000000000  /* mark or space (stick) parity */
+#define TARGET_CRTSCTS   020000000000  /* flow control */
+
+/* c_lflag bits */
+#define TARGET_ISIG    0000001
+#define TARGET_ICANON  0000002
+#define TARGET_XCASE   0000004
+#define TARGET_ECHO    0000010
+#define TARGET_ECHOE   0000020
+#define TARGET_ECHOK   0000040
+#define TARGET_ECHONL  0000100
+#define TARGET_NOFLSH  0000200
+#define TARGET_TOSTOP  0000400
+#define TARGET_ECHOCTL 0001000
+#define TARGET_ECHOPRT 0002000
+#define TARGET_ECHOKE  0004000
+#define TARGET_FLUSHO  0010000
+#define TARGET_PENDIN  0040000
+#define TARGET_IEXTEN  0100000
+
+/* c_cc character offsets */
+#define TARGET_VINTR    0
+#define TARGET_VQUIT    1
+#define TARGET_VERASE   2
+#define TARGET_VKILL    3
+#define TARGET_VEOF     4
+#define TARGET_VTIME    5
+#define TARGET_VMIN     6
+#define TARGET_VSWTC    7
+#define TARGET_VSTART   8
+#define TARGET_VSTOP    9
+#define TARGET_VSUSP    10
+#define TARGET_VEOL     11
+#define TARGET_VREPRINT 12
+#define TARGET_VDISCARD 13
+#define TARGET_VWERASE  14
+#define TARGET_VLNEXT   15
+#define TARGET_VEOL2    16
+
+/* ioctls */
+
+#define TARGET_TCGETS           0x5401
+#define TARGET_TCSETS           0x5402
+#define TARGET_TCSETSW          0x5403
+#define TARGET_TCSETSF          0x5404
+#define TARGET_TCGETA           0x5405
+#define TARGET_TCSETA           0x5406
+#define TARGET_TCSETAW          0x5407
+#define TARGET_TCSETAF          0x5408
+#define TARGET_TCSBRK           0x5409
+#define TARGET_TCXONC           0x540A
+#define TARGET_TCFLSH           0x540B
+
+#define TARGET_TIOCEXCL         0x540C
+#define TARGET_TIOCNXCL         0x540D
+#define TARGET_TIOCSCTTY        0x540E
+#define TARGET_TIOCGPGRP        0x540F
+#define TARGET_TIOCSPGRP        0x5410
+#define TARGET_TIOCOUTQ         0x5411
+#define TARGET_TIOCSTI          0x5412
+#define TARGET_TIOCGWINSZ       0x5413
+#define TARGET_TIOCSWINSZ       0x5414
+#define TARGET_TIOCMGET         0x5415
+#define TARGET_TIOCMBIS         0x5416
+#define TARGET_TIOCMBIC         0x5417
+#define TARGET_TIOCMSET         0x5418
+#define TARGET_TIOCGSOFTCAR     0x5419
+#define TARGET_TIOCSSOFTCAR     0x541A
+#define TARGET_FIONREAD         0x541B
+#define TARGET_TIOCINQ          TARGET_FIONREAD
+#define TARGET_TIOCLINUX        0x541C
+#define TARGET_TIOCCONS         0x541D
+#define TARGET_TIOCGSERIAL      0x541E
+#define TARGET_TIOCSSERIAL      0x541F
+#define TARGET_TIOCPKT          0x5420
+#define TARGET_FIONBIO          0x5421
+#define TARGET_TIOCNOTTY        0x5422
+#define TARGET_TIOCSETD         0x5423
+#define TARGET_TIOCGETD         0x5424
+#define TARGET_TCSBRKP          0x5425 /* Needed for POSIX tcsendbreak() */
+#define TARGET_TIOCTTYGSTRUCT   0x5426 /* For debugging only */
+#define TARGET_TIOCSBRK         0x5427 /* BSD compatibility */
+#define TARGET_TIOCCBRK         0x5428 /* BSD compatibility */
+#define TARGET_TIOCGSID         0x5429 /* Return the session ID of FD */
+#define TARGET_TIOCGPTN         TARGET_IOR('T', 0x30, unsigned int)
+        /* Get Pty Number (of pty-mux device) */
+#define TARGET_TIOCSPTLCK       TARGET_IOW('T', 0x31, int)
+        /* Lock/unlock Pty */
+
+#define TARGET_FIONCLEX         0x5450  /* these numbers need to be adjusted. */
+#define TARGET_FIOCLEX          0x5451
+#define TARGET_FIOASYNC         0x5452
+#define TARGET_TIOCSERCONFIG    0x5453
+#define TARGET_TIOCSERGWILD     0x5454
+#define TARGET_TIOCSERSWILD     0x5455
+#define TARGET_TIOCGLCKTRMIOS   0x5456
+#define TARGET_TIOCSLCKTRMIOS   0x5457
+#define TARGET_TIOCSERGSTRUCT   0x5458 /* For debugging only */
+#define TARGET_TIOCSERGETLSR    0x5459 /* Get line status register */
+#define TARGET_TIOCSERGETMULTI  0x545A /* Get multiport config  */
+#define TARGET_TIOCSERSETMULTI  0x545B /* Set multiport config */
+
+#define TARGET_TIOCMIWAIT      0x545C
+        /* wait for a change on serial input line(s) */
+#define TARGET_TIOCGICOUNT     0x545D
+        /* read serial port inline interrupt counts */
+#define TARGET_TIOCGHAYESESP   0x545E  /* Get Hayes ESP configuration */
+#define TARGET_TIOCSHAYESESP   0x545F  /* Set Hayes ESP configuration */
+
+/* Used for packet mode */
+#define TARGET_TIOCPKT_DATA              0
+#define TARGET_TIOCPKT_FLUSHREAD         1
+#define TARGET_TIOCPKT_FLUSHWRITE        2
+#define TARGET_TIOCPKT_STOP              4
+#define TARGET_TIOCPKT_START             8
+#define TARGET_TIOCPKT_NOSTOP           16
+#define TARGET_TIOCPKT_DOSTOP           32
+
+#define TARGET_TIOCSER_TEMT    0x01 /* Transmitter physically empty */
commit e2cea499cc2e8da5b2d5753625d2c57685193783
Author: Alexander Graf <agraf at suse.de>
Date:   Tue Sep 3 20:12:18 2013 +0100

    linux-user: Implement cpu_set_tls() and cpu_clone_regs() for AArch64
    
    Signed-off-by: Alexander Graf <agraf at suse.de>
    Signed-off-by: John Rigby <john.rigby at linaro.org>
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>
    Message-id: 1378235544-22290-19-git-send-email-peter.maydell at linaro.org
    [PMM: pulled out from another patch; don't use is_a64() here;
     moved to linux-user from target-arm]
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>

diff --git a/linux-user/aarch64/target_cpu.h b/linux-user/aarch64/target_cpu.h
new file mode 100644
index 0000000..6f5539b
--- /dev/null
+++ b/linux-user/aarch64/target_cpu.h
@@ -0,0 +1,35 @@
+/*
+ * ARM AArch64 specific CPU ABI and functions for linux-user
+ *
+ * Copyright (c) 2013 Alexander Graf <agraf at suse.de>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef TARGET_CPU_H
+#define TARGET_CPU_H
+
+static inline void cpu_clone_regs(CPUARMState *env, target_ulong newsp)
+{
+    if (newsp) {
+        env->xregs[31] = newsp;
+    }
+    env->xregs[0] = 0;
+}
+
+static inline void cpu_set_tls(CPUARMState *env, target_ulong newtls)
+{
+    env->sr.tpidr_el0 = newtls;
+}
+
+#endif
commit 848d72cdd894e3a883118fd0f1ede14ff66bfa21
Author: Peter Maydell <peter.maydell at linaro.org>
Date:   Tue Sep 3 20:12:17 2013 +0100

    linux-user: Make sure NWFPE code is 32 bit ARM only
    
    On ARM, linux-user emulation includes NWFPE support for emulating the
    ancient FPA floating point coprocessor. This has long since been
    superseded by VFP and is only required for legacy binaries. The
    AArch64 linux-user target doesn't compile in NWFPE support, so make
    sure the relevant code is protected by suitable ifdefs.
    
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>
    Message-id: 1378235544-22290-18-git-send-email-peter.maydell at linaro.org

diff --git a/linux-user/qemu.h b/linux-user/qemu.h
index 4a16e8f..4df4fcb 100644
--- a/linux-user/qemu.h
+++ b/linux-user/qemu.h
@@ -74,7 +74,7 @@ struct vm86_saved_state {
 };
 #endif
 
-#ifdef TARGET_ARM
+#if defined(TARGET_ARM) && defined(TARGET_ABI32)
 /* FPU emulator */
 #include "nwfpe/fpa11.h"
 #endif
@@ -98,8 +98,10 @@ struct emulated_sigtable {
 typedef struct TaskState {
     pid_t ts_tid;     /* tid (or pid) of this task */
 #ifdef TARGET_ARM
+# ifdef TARGET_ABI32
     /* FPA state */
     FPA11 fpa;
+# endif
     int swi_errno;
 #endif
 #ifdef TARGET_UNICORE32
commit 1744aea182d0fe20e190d037ccf225cbe05e96ae
Author: Andreas Schwab <schwab at suse.de>
Date:   Tue Sep 3 20:12:16 2013 +0100

    linux-user: Add signal handling for AArch64
    
    This patch adds signal handling for AArch64. The code is based on the
    respective source in the Linux kernel.
    
    Signed-off-by: Andreas Schwab <schwab at suse.de>
    Signed-off-by: Alexander Graf <agraf at suse.de>
    Signed-off-by: John Rigby <john.rigby at linaro.org>
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>
    Message-id: 1378235544-22290-17-git-send-email-peter.maydell at linaro.org
    Message-id: 1368505980-17151-10-git-send-email-john.rigby at linaro.org
    [PMM: fixed style nits: tabs, long lines;
     pulled target_signal.h in from a later patch; it fits better here]
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>

diff --git a/linux-user/aarch64/target_signal.h b/linux-user/aarch64/target_signal.h
new file mode 100644
index 0000000..e8c677d
--- /dev/null
+++ b/linux-user/aarch64/target_signal.h
@@ -0,0 +1,29 @@
+#ifndef TARGET_SIGNAL_H
+#define TARGET_SIGNAL_H
+
+#include "cpu.h"
+
+/* this struct defines a stack used during syscall handling */
+
+typedef struct target_sigaltstack {
+    abi_ulong ss_sp;
+    abi_int ss_flags;
+    abi_ulong ss_size;
+} target_stack_t;
+
+
+/*
+ * sigaltstack controls
+ */
+#define TARGET_SS_ONSTACK 1
+#define TARGET_SS_DISABLE 2
+
+#define TARGET_MINSIGSTKSZ 2048
+#define TARGET_SIGSTKSZ 8192
+
+static inline abi_ulong get_sp_from_cpustate(CPUARMState *state)
+{
+   return state->xregs[31];
+}
+
+#endif /* TARGET_SIGNAL_H */
diff --git a/linux-user/signal.c b/linux-user/signal.c
index 23d65da..7751c47 100644
--- a/linux-user/signal.c
+++ b/linux-user/signal.c
@@ -1092,6 +1092,266 @@ badframe:
 	return 0;
 }
 
+#elif defined(TARGET_AARCH64)
+
+struct target_sigcontext {
+    uint64_t fault_address;
+    /* AArch64 registers */
+    uint64_t regs[31];
+    uint64_t sp;
+    uint64_t pc;
+    uint64_t pstate;
+    /* 4K reserved for FP/SIMD state and future expansion */
+    char __reserved[4096] __attribute__((__aligned__(16)));
+};
+
+struct target_ucontext {
+    abi_ulong tuc_flags;
+    abi_ulong tuc_link;
+    target_stack_t tuc_stack;
+    target_sigset_t tuc_sigmask;
+    /* glibc uses a 1024-bit sigset_t */
+    char __unused[1024 / 8 - sizeof(target_sigset_t)];
+    /* last for future expansion */
+    struct target_sigcontext tuc_mcontext;
+};
+
+/*
+ * Header to be used at the beginning of structures extending the user
+ * context. Such structures must be placed after the rt_sigframe on the stack
+ * and be 16-byte aligned. The last structure must be a dummy one with the
+ * magic and size set to 0.
+ */
+struct target_aarch64_ctx {
+    uint32_t magic;
+    uint32_t size;
+};
+
+#define TARGET_FPSIMD_MAGIC 0x46508001
+
+struct target_fpsimd_context {
+    struct target_aarch64_ctx head;
+    uint32_t fpsr;
+    uint32_t fpcr;
+    uint64_t vregs[32 * 2]; /* really uint128_t vregs[32] */
+};
+
+/*
+ * Auxiliary context saved in the sigcontext.__reserved array. Not exported to
+ * user space as it will change with the addition of new context. User space
+ * should check the magic/size information.
+ */
+struct target_aux_context {
+    struct target_fpsimd_context fpsimd;
+    /* additional context to be added before "end" */
+    struct target_aarch64_ctx end;
+};
+
+struct target_rt_sigframe {
+    struct target_siginfo info;
+    struct target_ucontext uc;
+    uint64_t fp;
+    uint64_t lr;
+    uint32_t tramp[2];
+};
+
+static int target_setup_sigframe(struct target_rt_sigframe *sf,
+                                 CPUARMState *env, target_sigset_t *set)
+{
+    int i;
+    struct target_aux_context *aux =
+        (struct target_aux_context *)sf->uc.tuc_mcontext.__reserved;
+
+    /* set up the stack frame for unwinding */
+    __put_user(env->xregs[29], &sf->fp);
+    __put_user(env->xregs[30], &sf->lr);
+
+    for (i = 0; i < 31; i++) {
+        __put_user(env->xregs[i], &sf->uc.tuc_mcontext.regs[i]);
+    }
+    __put_user(env->xregs[31], &sf->uc.tuc_mcontext.sp);
+    __put_user(env->pc, &sf->uc.tuc_mcontext.pc);
+    __put_user(env->pstate, &sf->uc.tuc_mcontext.pstate);
+
+    __put_user(/*current->thread.fault_address*/ 0,
+            &sf->uc.tuc_mcontext.fault_address);
+
+    for (i = 0; i < TARGET_NSIG_WORDS; i++) {
+        __put_user(set->sig[i], &sf->uc.tuc_sigmask.sig[i]);
+    }
+
+    for (i = 0; i < 32; i++) {
+#ifdef TARGET_WORDS_BIGENDIAN
+        __put_user(env->vfp.regs[i * 2], &aux->fpsimd.vregs[i * 2 + 1]);
+        __put_user(env->vfp.regs[i * 2 + 1], &aux->fpsimd.vregs[i * 2]);
+#else
+        __put_user(env->vfp.regs[i * 2], &aux->fpsimd.vregs[i * 2]);
+        __put_user(env->vfp.regs[i * 2 + 1], &aux->fpsimd.vregs[i * 2 + 1]);
+#endif
+    }
+    __put_user(/*env->fpsr*/0, &aux->fpsimd.fpsr);
+    __put_user(/*env->fpcr*/0, &aux->fpsimd.fpcr);
+    __put_user(TARGET_FPSIMD_MAGIC, &aux->fpsimd.head.magic);
+    __put_user(sizeof(struct target_fpsimd_context),
+            &aux->fpsimd.head.size);
+
+    /* set the "end" magic */
+    __put_user(0, &aux->end.magic);
+    __put_user(0, &aux->end.size);
+
+    return 0;
+}
+
+static int target_restore_sigframe(CPUARMState *env,
+                                   struct target_rt_sigframe *sf)
+{
+    sigset_t set;
+    int i;
+    struct target_aux_context *aux =
+        (struct target_aux_context *)sf->uc.tuc_mcontext.__reserved;
+    uint32_t magic, size;
+
+    target_to_host_sigset(&set, &sf->uc.tuc_sigmask);
+    sigprocmask(SIG_SETMASK, &set, NULL);
+
+    for (i = 0; i < 31; i++) {
+        __get_user(env->xregs[i], &sf->uc.tuc_mcontext.regs[i]);
+    }
+
+    __get_user(env->xregs[31], &sf->uc.tuc_mcontext.sp);
+    __get_user(env->pc, &sf->uc.tuc_mcontext.pc);
+    __get_user(env->pstate, &sf->uc.tuc_mcontext.pstate);
+
+    __get_user(magic, &aux->fpsimd.head.magic);
+    __get_user(size, &aux->fpsimd.head.size);
+
+    if (magic != TARGET_FPSIMD_MAGIC
+        || size != sizeof(struct target_fpsimd_context)) {
+        return 1;
+    }
+
+    for (i = 0; i < 32 * 2; i++) {
+        __get_user(env->vfp.regs[i], &aux->fpsimd.vregs[i]);
+    }
+
+    return 0;
+}
+
+static abi_ulong get_sigframe(struct target_sigaction *ka, CPUARMState *env)
+{
+    abi_ulong sp;
+
+    sp = env->xregs[31];
+
+    /*
+     * This is the X/Open sanctioned signal stack switching.
+     */
+    if ((ka->sa_flags & SA_ONSTACK) && !sas_ss_flags(sp)) {
+        sp = target_sigaltstack_used.ss_sp + target_sigaltstack_used.ss_size;
+    }
+
+    sp = (sp - sizeof(struct target_rt_sigframe)) & ~15;
+
+    return sp;
+}
+
+static void target_setup_frame(int usig, struct target_sigaction *ka,
+                               target_siginfo_t *info, target_sigset_t *set,
+                               CPUARMState *env)
+{
+    struct target_rt_sigframe *frame;
+    abi_ulong frame_addr;
+
+    frame_addr = get_sigframe(ka, env);
+    if (!lock_user_struct(VERIFY_WRITE, frame, frame_addr, 0)) {
+        goto give_sigsegv;
+    }
+
+    __put_user(0, &frame->uc.tuc_flags);
+    __put_user(0, &frame->uc.tuc_link);
+
+    __put_user(target_sigaltstack_used.ss_sp,
+                      &frame->uc.tuc_stack.ss_sp);
+    __put_user(sas_ss_flags(env->xregs[31]),
+                      &frame->uc.tuc_stack.ss_flags);
+    __put_user(target_sigaltstack_used.ss_size,
+                      &frame->uc.tuc_stack.ss_size);
+    target_setup_sigframe(frame, env, set);
+    /* mov x8,#__NR_rt_sigreturn; svc #0 */
+    __put_user(0xd2801168, &frame->tramp[0]);
+    __put_user(0xd4000001, &frame->tramp[1]);
+    env->xregs[0] = usig;
+    env->xregs[31] = frame_addr;
+    env->xregs[29] = env->xregs[31] + offsetof(struct target_rt_sigframe, fp);
+    env->pc = ka->_sa_handler;
+    env->xregs[30] = env->xregs[31] +
+        offsetof(struct target_rt_sigframe, tramp);
+    if (info) {
+        if (copy_siginfo_to_user(&frame->info, info)) {
+            goto give_sigsegv;
+        }
+        env->xregs[1] = frame_addr + offsetof(struct target_rt_sigframe, info);
+        env->xregs[2] = frame_addr + offsetof(struct target_rt_sigframe, uc);
+    }
+
+    unlock_user_struct(frame, frame_addr, 1);
+    return;
+
+ give_sigsegv:
+    unlock_user_struct(frame, frame_addr, 1);
+    force_sig(TARGET_SIGSEGV);
+}
+
+static void setup_rt_frame(int sig, struct target_sigaction *ka,
+                           target_siginfo_t *info, target_sigset_t *set,
+                           CPUARMState *env)
+{
+    target_setup_frame(sig, ka, info, set, env);
+}
+
+static void setup_frame(int sig, struct target_sigaction *ka,
+                        target_sigset_t *set, CPUARMState *env)
+{
+    target_setup_frame(sig, ka, 0, set, env);
+}
+
+long do_rt_sigreturn(CPUARMState *env)
+{
+    struct target_rt_sigframe *frame;
+    abi_ulong frame_addr = env->xregs[31];
+
+    if (frame_addr & 15) {
+        goto badframe;
+    }
+
+    if  (!lock_user_struct(VERIFY_READ, frame, frame_addr, 1)) {
+        goto badframe;
+    }
+
+    if (target_restore_sigframe(env, frame)) {
+        goto badframe;
+    }
+
+    if (do_sigaltstack(frame_addr +
+            offsetof(struct target_rt_sigframe, uc.tuc_stack),
+            0, get_sp_from_cpustate(env)) == -EFAULT) {
+        goto badframe;
+    }
+
+    unlock_user_struct(frame, frame_addr, 0);
+    return env->xregs[0];
+
+ badframe:
+    unlock_user_struct(frame, frame_addr, 0);
+    force_sig(TARGET_SIGSEGV);
+    return 0;
+}
+
+long do_sigreturn(CPUARMState *env)
+{
+    return do_rt_sigreturn(env);
+}
+
 #elif defined(TARGET_ARM)
 
 struct target_sigcontext {
commit 09701199f6d87ebfe609776156108c6dec812cde
Author: Alexander Graf <agraf at suse.de>
Date:   Tue Sep 3 20:12:15 2013 +0100

    linux-user: Fix up AArch64 syscall handlers
    
    Some syscall handlers have special code for ARM enabled that we don't
    need on AArch64. Exclude AArch64 in those cases. In other places we
    can share struct definitions with other targets or have to provide our
    own.
    
    With this patch applied, most syscall definitions in linux-user should
    be sound for AArch64.
    
    Signed-off-by: Alexander Graf <agraf at suse.de>
    Signed-off-by: John Rigby <john.rigby at linaro.org>
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>
    Message-id: 1378235544-22290-16-git-send-email-peter.maydell at linaro.org
    Message-id: 1368505980-17151-9-git-send-email-john.rigby at linaro.org
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>

diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index ecead51..ea04db1 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -4737,7 +4737,7 @@ static inline abi_long host_to_target_stat64(void *cpu_env,
                                              abi_ulong target_addr,
                                              struct stat *host_st)
 {
-#ifdef TARGET_ARM
+#if defined(TARGET_ARM) && defined(TARGET_ABI32)
     if (((CPUARMState *)cpu_env)->eabi) {
         struct target_eabi_stat64 *target_st;
 
@@ -6381,7 +6381,8 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
 #endif
 #ifdef TARGET_NR_mmap
     case TARGET_NR_mmap:
-#if (defined(TARGET_I386) && defined(TARGET_ABI32)) || defined(TARGET_ARM) || \
+#if (defined(TARGET_I386) && defined(TARGET_ABI32)) || \
+    (defined(TARGET_ARM) && defined(TARGET_ABI32)) || \
     defined(TARGET_M68K) || defined(TARGET_CRIS) || defined(TARGET_MICROBLAZE) \
     || defined(TARGET_S390X)
         {
diff --git a/linux-user/syscall_defs.h b/linux-user/syscall_defs.h
index 086fbff..2ebe356 100644
--- a/linux-user/syscall_defs.h
+++ b/linux-user/syscall_defs.h
@@ -1137,7 +1137,8 @@ struct target_winsize {
 #define TARGET_MAP_UNINITIALIZED 0x4000000	/* for anonymous mmap, memory could be uninitialized */
 #endif
 
-#if (defined(TARGET_I386) && defined(TARGET_ABI32)) || defined(TARGET_ARM) \
+#if (defined(TARGET_I386) && defined(TARGET_ABI32)) \
+    || (defined(TARGET_ARM) && defined(TARGET_ABI32)) \
     || defined(TARGET_CRIS) || defined(TARGET_UNICORE32)
 struct target_stat {
 	unsigned short st_dev;
@@ -1835,6 +1836,28 @@ struct target_stat {
     abi_long       st_blocks;
     abi_ulong  __unused[3];
 };
+#elif defined(TARGET_AARCH64)
+struct target_stat {
+    abi_ulong  st_dev;
+    abi_ulong  st_ino;
+    unsigned int st_mode;
+    unsigned int st_nlink;
+    unsigned int   st_uid;
+    unsigned int   st_gid;
+    abi_ulong  st_rdev;
+    abi_ulong  _pad1;
+    abi_long  st_size;
+    int        st_blksize;
+    int        __pad2;
+    abi_long   st_blocks;
+    abi_long  target_st_atime;
+    abi_ulong  target_st_atime_nsec;
+    abi_long  target_st_mtime;
+    abi_ulong  target_st_mtime_nsec;
+    abi_long  target_st_ctime;
+    abi_ulong  target_st_ctime_nsec;
+    unsigned int __unused[2];
+};
 #elif defined(TARGET_OPENRISC)
 
 /* These are the asm-generic versions of the stat and stat64 structures */
@@ -1943,7 +1966,8 @@ struct target_statfs64 {
 	uint32_t	f_spare[6];
 };
 #elif (defined(TARGET_PPC64) || defined(TARGET_X86_64) || \
-       defined(TARGET_SPARC64)) && !defined(TARGET_ABI32)
+       defined(TARGET_SPARC64) || defined(TARGET_AARCH64)) && \
+       !defined(TARGET_ABI32)
 struct target_statfs {
 	abi_long f_type;
 	abi_long f_bsize;
commit c7907301e7df9623bc5216934e30125ce66cfaea
Author: Alexander Graf <agraf at suse.de>
Date:   Tue Sep 3 20:12:14 2013 +0100

    linux-user: Add syscall number definitions for AArch64
    
    The AArch64 syscall definitions are all publicly available in the Linux
    kernel. Let's add them to our linux-user emulation target, so that we
    can easily handle AArch64 syscalls.
    
    Signed-off-by: Alexander Graf <agraf at suse.de>
    Signed-off-by: John Rigby <john.rigby at linaro.org>
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>
    Message-id: 1378235544-22290-15-git-send-email-peter.maydell at linaro.org
    Message-id: 1368505980-17151-8-git-send-email-john.rigby at linaro.org
    [PMM: changes relating to cpu_loop() removed as they are superseded
     by an earlier patch]
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>

diff --git a/linux-user/aarch64/syscall_nr.h b/linux-user/aarch64/syscall_nr.h
new file mode 100644
index 0000000..743255d
--- /dev/null
+++ b/linux-user/aarch64/syscall_nr.h
@@ -0,0 +1,323 @@
+/*
+ * This file contains the system call numbers.
+ */
+
+#define TARGET_NR_io_setup 0
+#define TARGET_NR_io_destroy 1
+#define TARGET_NR_io_submit 2
+#define TARGET_NR_io_cancel 3
+#define TARGET_NR_io_getevents 4
+#define TARGET_NR_setxattr 5
+#define TARGET_NR_lsetxattr 6
+#define TARGET_NR_fsetxattr 7
+#define TARGET_NR_getxattr 8
+#define TARGET_NR_lgetxattr 9
+#define TARGET_NR_fgetxattr 10
+#define TARGET_NR_listxattr 11
+#define TARGET_NR_llistxattr 12
+#define TARGET_NR_flistxattr 13
+#define TARGET_NR_removexattr 14
+#define TARGET_NR_lremovexattr 15
+#define TARGET_NR_fremovexattr 16
+#define TARGET_NR_getcwd 17
+#define TARGET_NR_lookup_dcookie 18
+#define TARGET_NR_eventfd2 19
+#define TARGET_NR_epoll_create1 20
+#define TARGET_NR_epoll_ctl 21
+#define TARGET_NR_epoll_pwait 22
+#define TARGET_NR_dup 23
+#define TARGET_NR_dup3 24
+#define TARGET_NR_fcntl 25
+#define TARGET_NR_inotify_init1 26
+#define TARGET_NR_inotify_add_watch 27
+#define TARGET_NR_inotify_rm_watch 28
+#define TARGET_NR_ioctl 29
+#define TARGET_NR_ioprio_set 30
+#define TARGET_NR_ioprio_get 31
+#define TARGET_NR_flock 32
+#define TARGET_NR_mknodat 33
+#define TARGET_NR_mkdirat 34
+#define TARGET_NR_unlinkat 35
+#define TARGET_NR_symlinkat 36
+#define TARGET_NR_linkat 37
+#define TARGET_NR_renameat 38
+#define TARGET_NR_umount2 39
+#define TARGET_NR_mount 40
+#define TARGET_NR_pivot_root 41
+#define TARGET_NR_nfsservctl 42
+#define TARGET_NR_statfs 43
+#define TARGET_NR_fstatfs 44
+#define TARGET_NR_truncate 45
+#define TARGET_NR_ftruncate 46
+#define TARGET_NR_fallocate 47
+#define TARGET_NR_faccessat 48
+#define TARGET_NR_chdir 49
+#define TARGET_NR_fchdir 50
+#define TARGET_NR_chroot 51
+#define TARGET_NR_fchmod 52
+#define TARGET_NR_fchmodat 53
+#define TARGET_NR_fchownat 54
+#define TARGET_NR_fchown 55
+#define TARGET_NR_openat 56
+#define TARGET_NR_close 57
+#define TARGET_NR_vhangup 58
+#define TARGET_NR_pipe2 59
+#define TARGET_NR_quotactl 60
+#define TARGET_NR_getdents64 61
+#define TARGET_NR_lseek 62
+#define TARGET_NR_read 63
+#define TARGET_NR_write 64
+#define TARGET_NR_readv 65
+#define TARGET_NR_writev 66
+#define TARGET_NR_pread64 67
+#define TARGET_NR_pwrite64 68
+#define TARGET_NR_preadv 69
+#define TARGET_NR_pwritev 70
+#define TARGET_NR_sendfile 71
+#define TARGET_NR_pselect6 72
+#define TARGET_NR_ppoll 73
+#define TARGET_NR_signalfd4 74
+#define TARGET_NR_vmsplice 75
+#define TARGET_NR_splice 76
+#define TARGET_NR_tee 77
+#define TARGET_NR_readlinkat 78
+#define TARGET_NR_fstatat64 79
+#define TARGET_NR_fstat 80
+#define TARGET_NR_sync 81
+#define TARGET_NR_fsync 82
+#define TARGET_NR_fdatasync 83
+#define TARGET_NR_sync_file_range2 84
+/* #define TARGET_NR_sync_file_range 84 */
+#define TARGET_NR_timerfd_create 85
+#define TARGET_NR_timerfd_settime 86
+#define TARGET_NR_timerfd_gettime 87
+#define TARGET_NR_utimensat 88
+#define TARGET_NR_acct 89
+#define TARGET_NR_capget 90
+#define TARGET_NR_capset 91
+#define TARGET_NR_personality 92
+#define TARGET_NR_exit 93
+#define TARGET_NR_exit_group 94
+#define TARGET_NR_waitid 95
+#define TARGET_NR_set_tid_address 96
+#define TARGET_NR_unshare 97
+#define TARGET_NR_futex 98
+#define TARGET_NR_set_robust_list 99
+#define TARGET_NR_get_robust_list 100
+#define TARGET_NR_nanosleep 101
+#define TARGET_NR_getitimer 102
+#define TARGET_NR_setitimer 103
+#define TARGET_NR_kexec_load 104
+#define TARGET_NR_init_module 105
+#define TARGET_NR_delete_module 106
+#define TARGET_NR_timer_create 107
+#define TARGET_NR_timer_gettime 108
+#define TARGET_NR_timer_getoverrun 109
+#define TARGET_NR_timer_settime 110
+#define TARGET_NR_timer_delete 111
+#define TARGET_NR_clock_settime 112
+#define TARGET_NR_clock_gettime 113
+#define TARGET_NR_clock_getres 114
+#define TARGET_NR_clock_nanosleep 115
+#define TARGET_NR_syslog 116
+#define TARGET_NR_ptrace 117
+#define TARGET_NR_sched_setparam 118
+#define TARGET_NR_sched_setscheduler 119
+#define TARGET_NR_sched_getscheduler 120
+#define TARGET_NR_sched_getparam 121
+#define TARGET_NR_sched_setaffinity 122
+#define TARGET_NR_sched_getaffinity 123
+#define TARGET_NR_sched_yield 124
+#define TARGET_NR_sched_get_priority_max 125
+#define TARGET_NR_sched_get_priority_min 126
+#define TARGET_NR_sched_rr_get_interval 127
+#define TARGET_NR_restart_syscall 128
+#define TARGET_NR_kill 129
+#define TARGET_NR_tkill 130
+#define TARGET_NR_tgkill 131
+#define TARGET_NR_sigaltstack 132
+#define TARGET_NR_rt_sigsuspend 133
+#define TARGET_NR_rt_sigaction 134
+#define TARGET_NR_rt_sigprocmask 135
+#define TARGET_NR_rt_sigpending 136
+#define TARGET_NR_rt_sigtimedwait 137
+#define TARGET_NR_rt_sigqueueinfo 138
+#define TARGET_NR_rt_sigreturn 139
+#define TARGET_NR_setpriority 140
+#define TARGET_NR_getpriority 141
+#define TARGET_NR_reboot 142
+#define TARGET_NR_setregid 143
+#define TARGET_NR_setgid 144
+#define TARGET_NR_setreuid 145
+#define TARGET_NR_setuid 146
+#define TARGET_NR_setresuid 147
+#define TARGET_NR_getresuid 148
+#define TARGET_NR_setresgid 149
+#define TARGET_NR_getresgid 150
+#define TARGET_NR_setfsuid 151
+#define TARGET_NR_setfsgid 152
+#define TARGET_NR_times 153
+#define TARGET_NR_setpgid 154
+#define TARGET_NR_getpgid 155
+#define TARGET_NR_getsid 156
+#define TARGET_NR_setsid 157
+#define TARGET_NR_getgroups 158
+#define TARGET_NR_setgroups 159
+#define TARGET_NR_uname 160
+#define TARGET_NR_sethostname 161
+#define TARGET_NR_setdomainname 162
+#define TARGET_NR_getrlimit 163
+#define TARGET_NR_setrlimit 164
+#define TARGET_NR_getrusage 165
+#define TARGET_NR_umask 166
+#define TARGET_NR_prctl 167
+#define TARGET_NR_getcpu 168
+#define TARGET_NR_gettimeofday 169
+#define TARGET_NR_settimeofday 170
+#define TARGET_NR_adjtimex 171
+#define TARGET_NR_getpid 172
+#define TARGET_NR_getppid 173
+#define TARGET_NR_getuid 174
+#define TARGET_NR_geteuid 175
+#define TARGET_NR_getgid 176
+#define TARGET_NR_getegid 177
+#define TARGET_NR_gettid 178
+#define TARGET_NR_sysinfo 179
+#define TARGET_NR_mq_open 180
+#define TARGET_NR_mq_unlink 181
+#define TARGET_NR_mq_timedsend 182
+#define TARGET_NR_mq_timedreceive 183
+#define TARGET_NR_mq_notify 184
+#define TARGET_NR_mq_getsetattr 185
+#define TARGET_NR_msgget 186
+#define TARGET_NR_msgctl 187
+#define TARGET_NR_msgrcv 188
+#define TARGET_NR_msgsnd 189
+#define TARGET_NR_semget 190
+#define TARGET_NR_semctl 191
+#define TARGET_NR_semtimedop 192
+#define TARGET_NR_semop 193
+#define TARGET_NR_shmget 194
+#define TARGET_NR_shmctl 195
+#define TARGET_NR_shmat 196
+#define TARGET_NR_shmdt 197
+#define TARGET_NR_socket 198
+#define TARGET_NR_socketpair 199
+#define TARGET_NR_bind 200
+#define TARGET_NR_listen 201
+#define TARGET_NR_accept 202
+#define TARGET_NR_connect 203
+#define TARGET_NR_getsockname 204
+#define TARGET_NR_getpeername 205
+#define TARGET_NR_sendto 206
+#define TARGET_NR_recvfrom 207
+#define TARGET_NR_setsockopt 208
+#define TARGET_NR_getsockopt 209
+#define TARGET_NR_shutdown 210
+#define TARGET_NR_sendmsg 211
+#define TARGET_NR_recvmsg 212
+#define TARGET_NR_readahead 213
+#define TARGET_NR_brk 214
+#define TARGET_NR_munmap 215
+#define TARGET_NR_mremap 216
+#define TARGET_NR_add_key 217
+#define TARGET_NR_request_key 218
+#define TARGET_NR_keyctl 219
+#define TARGET_NR_clone 220
+#define TARGET_NR_execve 221
+#define TARGET_NR_mmap 222
+#define TARGET_NR_fadvise64 223
+#define TARGET_NR_swapon 224
+#define TARGET_NR_swapoff 225
+#define TARGET_NR_mprotect 226
+#define TARGET_NR_msync 227
+#define TARGET_NR_mlock 228
+#define TARGET_NR_munlock 229
+#define TARGET_NR_mlockall 230
+#define TARGET_NR_munlockall 231
+#define TARGET_NR_mincore 232
+#define TARGET_NR_madvise 233
+#define TARGET_NR_remap_file_pages 234
+#define TARGET_NR_mbind 235
+#define TARGET_NR_get_mempolicy 236
+#define TARGET_NR_set_mempolicy 237
+#define TARGET_NR_migrate_pages 238
+#define TARGET_NR_move_pages 239
+#define TARGET_NR_rt_tgsigqueueinfo 240
+#define TARGET_NR_perf_event_open 241
+#define TARGET_NR_accept4 242
+#define TARGET_NR_recvmmsg 243
+#define TARGET_NR_arch_specific_syscall 244
+#define TARGET_NR_wait4 260
+#define TARGET_NR_prlimit64 261
+#define TARGET_NR_fanotify_init 262
+#define TARGET_NR_fanotify_mark 263
+#define TARGET_NR_name_to_handle_at         264
+#define TARGET_NR_open_by_handle_at         265
+#define TARGET_NR_clock_adjtime 266
+#define TARGET_NR_syncfs 267
+#define TARGET_NR_setns 268
+#define TARGET_NR_sendmmsg 269
+#define TARGET_NR_process_vm_readv 270
+#define TARGET_NR_process_vm_writev 271
+#define TARGET_NR_kcmp 272
+#define TARGET_NR_finit_module 273
+#define TARGET_NR_open 1024
+#define TARGET_NR_link 1025
+#define TARGET_NR_unlink 1026
+#define TARGET_NR_mknod 1027
+#define TARGET_NR_chmod 1028
+#define TARGET_NR_chown 1029
+#define TARGET_NR_mkdir 1030
+#define TARGET_NR_rmdir 1031
+#define TARGET_NR_lchown 1032
+#define TARGET_NR_access 1033
+#define TARGET_NR_rename 1034
+#define TARGET_NR_readlink 1035
+#define TARGET_NR_symlink 1036
+#define TARGET_NR_utimes 1037
+#define TARGET_NR_stat 1038
+#define TARGET_NR_lstat 1039
+#define TARGET_NR_pipe 1040
+#define TARGET_NR_dup2 1041
+#define TARGET_NR_epoll_create 1042
+#define TARGET_NR_inotify_init 1043
+#define TARGET_NR_eventfd 1044
+#define TARGET_NR_signalfd 1045
+#define TARGET_NR_sendfile64 1046
+#define TARGET_NR_ftruncate64 1047
+#define TARGET_NR_truncate64 1048
+#define TARGET_NR_stat64 1049
+#define TARGET_NR_lstat64 1050
+#define TARGET_NR_fstat64 1051
+#define TARGET_NR_fcntl64 1052
+/* #define TARGET_NR_fadvise64 1053 */
+#define TARGET_NR_newfstatat 1054
+#define TARGET_NR_fstatfs64 1055
+#define TARGET_NR_statfs64 1056
+#define TARGET_NR_lseek64 1057
+#define TARGET_NR_mmap64 1058
+#define TARGET_NR_alarm 1059
+#define TARGET_NR_getpgrp 1060
+#define TARGET_NR_pause 1061
+#define TARGET_NR_time 1062
+#define TARGET_NR_utime 1063
+#define TARGET_NR_creat 1064
+#define TARGET_NR_getdents 1065
+#define TARGET_NR_futimesat 1066
+#define TARGET_NR_select 1067
+#define TARGET_NR_poll 1068
+#define TARGET_NR_epoll_wait 1069
+#define TARGET_NR_ustat 1070
+#define TARGET_NR_vfork 1071
+#define TARGET_NR_oldwait4 1072
+#define TARGET_NR_recv 1073
+#define TARGET_NR_send 1074
+#define TARGET_NR_bdflush 1075
+#define TARGET_NR_umount 1076
+#define TARGET_NR_uselib 1077
+#define TARGET_NR__sysctl 1078
+#define TARGET_NR_fork 1079
+#define TARGET_NR_syscalls (__NR_fork+1)
+
+#define TARGET_NR_sigreturn 1999
commit 1861c4543ffa6224d0661036afaa7ec1cf30e8bb
Author: Peter Maydell <peter.maydell at linaro.org>
Date:   Tue Sep 3 20:12:13 2013 +0100

    linux-user: Add cpu loop for AArch64
    
    Add the main linux-user cpu loop for AArch64. Since AArch64
    has a different system call interface, doesn't need to worry
    about FPA emulation and may in the future keep the prefetch/data
    abort information in different system registers, it's simplest
    just to use a completely separate loop from the 32 bit ARM
    target, rather than peppering it with ifdefs.
    
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>
    Message-id: 1378235544-22290-14-git-send-email-peter.maydell at linaro.org

diff --git a/linux-user/main.c b/linux-user/main.c
index 5c2f7b2..b6e434a 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -449,6 +449,9 @@ void cpu_loop(CPUX86State *env)
         __r;                                            \
     })
 
+#ifdef TARGET_ABI32
+/* Commpage handling -- there is no commpage for AArch64 */
+
 /*
  * See the Linux kernel's Documentation/arm/kernel_user_helpers.txt
  * Input:
@@ -582,6 +585,7 @@ do_kernel_trap(CPUARMState *env)
 
     return 0;
 }
+#endif
 
 static int do_strex(CPUARMState *env)
 {
@@ -661,6 +665,7 @@ done:
     return segv;
 }
 
+#ifdef TARGET_ABI32
 void cpu_loop(CPUARMState *env)
 {
     CPUState *cs = CPU(arm_env_get_cpu(env));
@@ -873,6 +878,83 @@ void cpu_loop(CPUARMState *env)
     }
 }
 
+#else
+
+/* AArch64 main loop */
+void cpu_loop(CPUARMState *env)
+{
+    CPUState *cs = CPU(arm_env_get_cpu(env));
+    int trapnr, sig;
+    target_siginfo_t info;
+    uint32_t addr;
+
+    for (;;) {
+        cpu_exec_start(cs);
+        trapnr = cpu_arm_exec(env);
+        cpu_exec_end(cs);
+
+        switch (trapnr) {
+        case EXCP_SWI:
+            env->xregs[0] = do_syscall(env,
+                                       env->xregs[8],
+                                       env->xregs[0],
+                                       env->xregs[1],
+                                       env->xregs[2],
+                                       env->xregs[3],
+                                       env->xregs[4],
+                                       env->xregs[5],
+                                       0, 0);
+            break;
+        case EXCP_INTERRUPT:
+            /* just indicate that signals should be handled asap */
+            break;
+        case EXCP_UDEF:
+            info.si_signo = SIGILL;
+            info.si_errno = 0;
+            info.si_code = TARGET_ILL_ILLOPN;
+            info._sifields._sigfault._addr = env->pc;
+            queue_signal(env, info.si_signo, &info);
+            break;
+        case EXCP_PREFETCH_ABORT:
+            addr = env->cp15.c6_insn;
+            goto do_segv;
+        case EXCP_DATA_ABORT:
+            addr = env->cp15.c6_data;
+        do_segv:
+            info.si_signo = SIGSEGV;
+            info.si_errno = 0;
+            /* XXX: check env->error_code */
+            info.si_code = TARGET_SEGV_MAPERR;
+            info._sifields._sigfault._addr = addr;
+            queue_signal(env, info.si_signo, &info);
+            break;
+        case EXCP_DEBUG:
+        case EXCP_BKPT:
+            sig = gdb_handlesig(cs, TARGET_SIGTRAP);
+            if (sig) {
+                info.si_signo = sig;
+                info.si_errno = 0;
+                info.si_code = TARGET_TRAP_BRKPT;
+                queue_signal(env, info.si_signo, &info);
+            }
+            break;
+        case EXCP_STREX:
+            if (do_strex(env)) {
+                addr = env->cp15.c6_data;
+                goto do_segv;
+            }
+            break;
+        default:
+            fprintf(stderr, "qemu: unhandled CPU exception 0x%x - aborting\n",
+                    trapnr);
+            cpu_dump_state(cs, stderr, fprintf, 0);
+            abort();
+        }
+        process_pending_signals(env);
+    }
+}
+#endif /* ndef TARGET_ABI32 */
+
 #endif
 
 #ifdef TARGET_UNICORE32
commit 067d983127da5c05a365230b12f2f557ec721c97
Author: Alexander Graf <agraf at suse.de>
Date:   Tue Sep 3 20:12:12 2013 +0100

    linux-user: Don't treat AArch64 cpu names specially
    
    32-bit ARM has a lot of different names for different types of CPUs it supports.
    On AArch64, we don't have this, so we really don't want to execute the 32-bit
    logic. Stub it out for AArch64 linux-user guests.
    
    Signed-off-by: Alexander Graf <agraf at suse.de>
    Signed-off-by: John Rigby <john.rigby at linaro.org>
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>
    Message-id: 1378235544-22290-13-git-send-email-peter.maydell at linaro.org
    Message-id: 1368505980-17151-7-git-send-email-john.rigby at linaro.org
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>

diff --git a/linux-user/cpu-uname.c b/linux-user/cpu-uname.c
index cc713e6..5db6e89 100644
--- a/linux-user/cpu-uname.c
+++ b/linux-user/cpu-uname.c
@@ -30,7 +30,8 @@
  * return here */
 const char *cpu_to_uname_machine(void *cpu_env)
 {
-#ifdef TARGET_ARM
+#if defined(TARGET_ARM) && !defined(TARGET_AARCH64)
+
     /* utsname machine name on linux arm is CPU arch name + endianness, e.g.
      * armv7l; to get a list of CPU arch names from the linux source, use:
      *     grep arch_name: -A1 linux/arch/arm/mm/proc-*.S
commit 96c04212ba80d4f9630a5e82681285eeb41af9cc
Author: Alexander Graf <agraf at suse.de>
Date:   Tue Sep 3 20:12:11 2013 +0100

    target-arm: Add AArch64 gdbstub support
    
    We want to be able to debug AArch64 guests. So let's add the respective gdb
    stub functions and xml descriptions that allow us to do so.
    
    Signed-off-by: Alexander Graf <agraf at suse.de>
    Signed-off-by: John Rigby <john.rigby at linaro.org>
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>
    Message-id: 1378235544-22290-12-git-send-email-peter.maydell at linaro.org
    Message-id: 1368505980-17151-6-git-send-email-john.rigby at linaro.org
    [PMM: dropped unused fp regs XML for now; moved 64 bit only functions
     to new gdbstub64.c; these are hooked up in AArch64CPU, not via
     ifdefs in ARMCPU]
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>

diff --git a/gdb-xml/aarch64-core.xml b/gdb-xml/aarch64-core.xml
new file mode 100644
index 0000000..e1e9dc3
--- /dev/null
+++ b/gdb-xml/aarch64-core.xml
@@ -0,0 +1,46 @@
+<?xml version="1.0"?>
+<!-- Copyright (C) 2009-2012 Free Software Foundation, Inc.
+     Contributed by ARM Ltd.
+
+     Copying and distribution of this file, with or without modification,
+     are permitted in any medium without royalty provided the copyright
+     notice and this notice are preserved.  -->
+
+<!DOCTYPE feature SYSTEM "gdb-target.dtd">
+<feature name="org.gnu.gdb.aarch64.core">
+  <reg name="x0" bitsize="64"/>
+  <reg name="x1" bitsize="64"/>
+  <reg name="x2" bitsize="64"/>
+  <reg name="x3" bitsize="64"/>
+  <reg name="x4" bitsize="64"/>
+  <reg name="x5" bitsize="64"/>
+  <reg name="x6" bitsize="64"/>
+  <reg name="x7" bitsize="64"/>
+  <reg name="x8" bitsize="64"/>
+  <reg name="x9" bitsize="64"/>
+  <reg name="x10" bitsize="64"/>
+  <reg name="x11" bitsize="64"/>
+  <reg name="x12" bitsize="64"/>
+  <reg name="x13" bitsize="64"/>
+  <reg name="x14" bitsize="64"/>
+  <reg name="x15" bitsize="64"/>
+  <reg name="x16" bitsize="64"/>
+  <reg name="x17" bitsize="64"/>
+  <reg name="x18" bitsize="64"/>
+  <reg name="x19" bitsize="64"/>
+  <reg name="x20" bitsize="64"/>
+  <reg name="x21" bitsize="64"/>
+  <reg name="x22" bitsize="64"/>
+  <reg name="x23" bitsize="64"/>
+  <reg name="x24" bitsize="64"/>
+  <reg name="x25" bitsize="64"/>
+  <reg name="x26" bitsize="64"/>
+  <reg name="x27" bitsize="64"/>
+  <reg name="x28" bitsize="64"/>
+  <reg name="x29" bitsize="64"/>
+  <reg name="x30" bitsize="64"/>
+  <reg name="sp" bitsize="64" type="data_ptr"/>
+
+  <reg name="pc" bitsize="64" type="code_ptr"/>
+  <reg name="cpsr" bitsize="32"/>
+</feature>
diff --git a/target-arm/Makefile.objs b/target-arm/Makefile.objs
index a11d76e..6453f5c 100644
--- a/target-arm/Makefile.objs
+++ b/target-arm/Makefile.objs
@@ -5,4 +5,4 @@ obj-$(CONFIG_NO_KVM) += kvm-stub.o
 obj-y += translate.o op_helper.o helper.o cpu.o
 obj-y += neon_helper.o iwmmxt_helper.o
 obj-y += gdbstub.o
-obj-$(TARGET_AARCH64) += cpu64.o translate-a64.o
+obj-$(TARGET_AARCH64) += cpu64.o translate-a64.o gdbstub64.o
diff --git a/target-arm/cpu-qom.h b/target-arm/cpu-qom.h
index 6502a7b..b55306a 100644
--- a/target-arm/cpu-qom.h
+++ b/target-arm/cpu-qom.h
@@ -176,6 +176,8 @@ void arm_gt_vtimer_cb(void *opaque);
 #ifdef TARGET_AARCH64
 void aarch64_cpu_dump_state(CPUState *cs, FILE *f,
                             fprintf_function cpu_fprintf, int flags);
+int aarch64_cpu_gdb_read_register(CPUState *cpu, uint8_t *buf, int reg);
+int aarch64_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
 #endif
 
 #endif
diff --git a/target-arm/cpu64.c b/target-arm/cpu64.c
index 4428f6c..3e99c21 100644
--- a/target-arm/cpu64.c
+++ b/target-arm/cpu64.c
@@ -73,6 +73,10 @@ static void aarch64_cpu_class_init(ObjectClass *oc, void *data)
     CPUClass *cc = CPU_CLASS(oc);
 
     cc->dump_state = aarch64_cpu_dump_state;
+    cc->gdb_read_register = aarch64_cpu_gdb_read_register;
+    cc->gdb_write_register = aarch64_cpu_gdb_write_register;
+    cc->gdb_num_core_regs = 34;
+    cc->gdb_core_xml_file = "aarch64-core.xml";
 }
 
 static void aarch64_cpu_register(const ARMCPUInfo *info)
diff --git a/target-arm/gdbstub64.c b/target-arm/gdbstub64.c
new file mode 100644
index 0000000..7cb6a7c
--- /dev/null
+++ b/target-arm/gdbstub64.c
@@ -0,0 +1,73 @@
+/*
+ * ARM gdb server stub: AArch64 specific functions.
+ *
+ * Copyright (c) 2013 SUSE LINUX Products GmbH
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+#include "config.h"
+#include "qemu-common.h"
+#include "exec/gdbstub.h"
+
+int aarch64_cpu_gdb_read_register(CPUState *cs, uint8_t *mem_buf, int n)
+{
+    ARMCPU *cpu = ARM_CPU(cs);
+    CPUARMState *env = &cpu->env;
+
+    if (n < 31) {
+        /* Core integer register.  */
+        return gdb_get_reg64(mem_buf, env->xregs[n]);
+    }
+    switch (n) {
+    case 31:
+        return gdb_get_reg64(mem_buf, env->xregs[31]);
+        break;
+    case 32:
+        return gdb_get_reg64(mem_buf, env->pc);
+        break;
+    case 33:
+        return gdb_get_reg32(mem_buf, env->pstate);
+    }
+    /* Unknown register.  */
+    return 0;
+}
+
+int aarch64_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n)
+{
+    ARMCPU *cpu = ARM_CPU(cs);
+    CPUARMState *env = &cpu->env;
+    uint64_t tmp;
+
+    tmp = ldq_p(mem_buf);
+
+    if (n < 31) {
+        /* Core integer register.  */
+        env->xregs[n] = tmp;
+        return 8;
+    }
+    switch (n) {
+    case 31:
+        env->xregs[31] = tmp;
+        return 8;
+    case 32:
+        env->pc = tmp;
+        return 8;
+    case 33:
+        /* CPSR */
+        env->pstate = tmp;
+        return 4;
+    }
+    /* Unknown register.  */
+    return 0;
+}
commit 14ade10f840deec02d32530e5a64bd5ec275adbd
Author: Alexander Graf <agraf at suse.de>
Date:   Tue Sep 3 20:12:10 2013 +0100

    target-arm: Add AArch64 translation stub
    
    We should translate AArch64 mode separately from AArch32 mode. In AArch64 mode,
    registers look vastly different, instruction encoding is completely different,
    basically the system turns into a different machine.
    
    So let's do a simple if() in translate.c to decide whether we can handle the
    current code in the legacy AArch32 code or in the new AArch64 code.
    
    So far, the translation always complains about unallocated instructions. There
    is no emulator functionality in this patch!
    
    Signed-off-by: Alexander Graf <agraf at suse.de>
    Signed-off-by: John Rigby <john.rigby at linaro.org>
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>
    Message-id: 1378235544-22290-11-git-send-email-peter.maydell at linaro.org
    Message-id: 1368505980-17151-5-git-send-email-john.rigby at linaro.org
    [PMM:
     * provide no-op versions of a64 functions ifndef TARGET_AARCH64;
       this lets us avoid #ifdefs in translate.c
     * insert the missing call to disas_a64_insn()
     * stash the insn in the DisasContext rather than reloading it in
       real_unallocated_encoding()
    ]
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>

diff --git a/target-arm/Makefile.objs b/target-arm/Makefile.objs
index baebc50..a11d76e 100644
--- a/target-arm/Makefile.objs
+++ b/target-arm/Makefile.objs
@@ -5,4 +5,4 @@ obj-$(CONFIG_NO_KVM) += kvm-stub.o
 obj-y += translate.o op_helper.o helper.o cpu.o
 obj-y += neon_helper.o iwmmxt_helper.o
 obj-y += gdbstub.o
-obj-$(TARGET_AARCH64) += cpu64.o
+obj-$(TARGET_AARCH64) += cpu64.o translate-a64.o
diff --git a/target-arm/cpu-qom.h b/target-arm/cpu-qom.h
index fbe846e..6502a7b 100644
--- a/target-arm/cpu-qom.h
+++ b/target-arm/cpu-qom.h
@@ -173,4 +173,9 @@ int arm_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
 void arm_gt_ptimer_cb(void *opaque);
 void arm_gt_vtimer_cb(void *opaque);
 
+#ifdef TARGET_AARCH64
+void aarch64_cpu_dump_state(CPUState *cs, FILE *f,
+                            fprintf_function cpu_fprintf, int flags);
+#endif
+
 #endif
diff --git a/target-arm/cpu64.c b/target-arm/cpu64.c
index faee0f0..4428f6c 100644
--- a/target-arm/cpu64.c
+++ b/target-arm/cpu64.c
@@ -70,6 +70,9 @@ static void aarch64_cpu_finalizefn(Object *obj)
 
 static void aarch64_cpu_class_init(ObjectClass *oc, void *data)
 {
+    CPUClass *cc = CPU_CLASS(oc);
+
+    cc->dump_state = aarch64_cpu_dump_state;
 }
 
 static void aarch64_cpu_register(const ARMCPUInfo *info)
diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
new file mode 100644
index 0000000..f120088
--- /dev/null
+++ b/target-arm/translate-a64.c
@@ -0,0 +1,139 @@
+/*
+ *  AArch64 translation
+ *
+ *  Copyright (c) 2013 Alexander Graf <agraf at suse.de>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+
+#include "cpu.h"
+#include "tcg-op.h"
+#include "qemu/log.h"
+#include "translate.h"
+#include "qemu/host-utils.h"
+
+#include "helper.h"
+#define GEN_HELPER 1
+#include "helper.h"
+
+static TCGv_i64 cpu_X[32];
+static TCGv_i64 cpu_pc;
+static TCGv_i32 pstate;
+
+static const char *regnames[] = {
+    "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
+    "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
+    "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
+    "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp"
+};
+
+/* initialize TCG globals.  */
+void a64_translate_init(void)
+{
+    int i;
+
+    cpu_pc = tcg_global_mem_new_i64(TCG_AREG0,
+                                    offsetof(CPUARMState, pc),
+                                    "pc");
+    for (i = 0; i < 32; i++) {
+        cpu_X[i] = tcg_global_mem_new_i64(TCG_AREG0,
+                                          offsetof(CPUARMState, xregs[i]),
+                                          regnames[i]);
+    }
+
+    pstate = tcg_global_mem_new_i32(TCG_AREG0,
+                                    offsetof(CPUARMState, pstate),
+                                    "pstate");
+}
+
+void aarch64_cpu_dump_state(CPUState *cs, FILE *f,
+                            fprintf_function cpu_fprintf, int flags)
+{
+    ARMCPU *cpu = ARM_CPU(cs);
+    CPUARMState *env = &cpu->env;
+    int i;
+
+    cpu_fprintf(f, "PC=%016"PRIx64"  SP=%016"PRIx64"\n",
+            env->pc, env->xregs[31]);
+    for (i = 0; i < 31; i++) {
+        cpu_fprintf(f, "X%02d=%016"PRIx64, i, env->xregs[i]);
+        if ((i % 4) == 3) {
+            cpu_fprintf(f, "\n");
+        } else {
+            cpu_fprintf(f, " ");
+        }
+    }
+    cpu_fprintf(f, "PSTATE=%c%c%c%c\n",
+        env->pstate & PSTATE_N ? 'n' : '.',
+        env->pstate & PSTATE_Z ? 'z' : '.',
+        env->pstate & PSTATE_C ? 'c' : '.',
+        env->pstate & PSTATE_V ? 'v' : '.');
+    cpu_fprintf(f, "\n");
+}
+
+void gen_a64_set_pc_im(uint64_t val)
+{
+    tcg_gen_movi_i64(cpu_pc, val);
+}
+
+static void gen_exception(int excp)
+{
+    TCGv_i32 tmp = tcg_temp_new_i32();
+    tcg_gen_movi_i32(tmp, excp);
+    gen_helper_exception(cpu_env, tmp);
+    tcg_temp_free_i32(tmp);
+}
+
+static void gen_exception_insn(DisasContext *s, int offset, int excp)
+{
+    gen_a64_set_pc_im(s->pc - offset);
+    gen_exception(excp);
+    s->is_jmp = DISAS_JUMP;
+}
+
+static void real_unallocated_encoding(DisasContext *s)
+{
+    fprintf(stderr, "Unknown instruction: %#x\n", s->insn);
+    gen_exception_insn(s, 4, EXCP_UDEF);
+}
+
+#define unallocated_encoding(s) do { \
+    fprintf(stderr, "unallocated encoding at line: %d\n", __LINE__); \
+    real_unallocated_encoding(s); \
+    } while (0)
+
+void disas_a64_insn(CPUARMState *env, DisasContext *s)
+{
+    uint32_t insn;
+
+    insn = arm_ldl_code(env, s->pc, s->bswap_code);
+    s->insn = insn;
+    s->pc += 4;
+
+    switch ((insn >> 24) & 0x1f) {
+    default:
+        unallocated_encoding(s);
+        break;
+    }
+
+    if (unlikely(s->singlestep_enabled) && (s->is_jmp == DISAS_TB_JUMP)) {
+        /* go through the main loop for single step */
+        s->is_jmp = DISAS_JUMP;
+    }
+}
diff --git a/target-arm/translate.c b/target-arm/translate.c
index db7a1d4..998bde2 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -114,6 +114,8 @@ void arm_translate_init(void)
         offsetof(CPUARMState, exclusive_info), "exclusive_info");
 #endif
 
+    a64_translate_init();
+
 #define GEN_HELPER 2
 #include "helper.h"
 }
@@ -907,7 +909,11 @@ DO_GEN_ST(st32)
 
 static inline void gen_set_pc_im(DisasContext *s, target_ulong val)
 {
-    tcg_gen_movi_i32(cpu_R[15], val);
+    if (s->aarch64) {
+        gen_a64_set_pc_im(val);
+    } else {
+        tcg_gen_movi_i32(cpu_R[15], val);
+    }
 }
 
 /* Force a TB lookup after an instruction that changes the CPU state.  */
@@ -10099,7 +10105,7 @@ static inline void gen_intermediate_code_internal(ARMCPU *cpu,
     do {
 #ifdef CONFIG_USER_ONLY
         /* Intercept jump to the magic kernel page.  */
-        if (dc->pc >= 0xffff0000) {
+        if (!dc->aarch64 && dc->pc >= 0xffff0000) {
             /* We always get here via a jump, so know we are not in a
                conditional execution block.  */
             gen_exception(EXCP_KERNEL_TRAP);
@@ -10147,7 +10153,9 @@ static inline void gen_intermediate_code_internal(ARMCPU *cpu,
             tcg_gen_debug_insn_start(dc->pc);
         }
 
-        if (dc->thumb) {
+        if (dc->aarch64) {
+            disas_a64_insn(env, dc);
+        } else if (dc->thumb) {
             disas_thumb_insn(env, dc);
             if (dc->condexec_mask) {
                 dc->condexec_cond = (dc->condexec_cond & 0xe)
diff --git a/target-arm/translate.h b/target-arm/translate.h
index 5be2eed..67c7760 100644
--- a/target-arm/translate.h
+++ b/target-arm/translate.h
@@ -4,6 +4,7 @@
 /* internal defines */
 typedef struct DisasContext {
     target_ulong pc;
+    uint32_t insn;
     int is_jmp;
     /* Nonzero if this instruction has been conditionally skipped.  */
     int condjmp;
@@ -27,4 +28,22 @@ typedef struct DisasContext {
 
 extern TCGv_ptr cpu_env;
 
+#ifdef TARGET_AARCH64
+void a64_translate_init(void);
+void disas_a64_insn(CPUARMState *env, DisasContext *s);
+void gen_a64_set_pc_im(uint64_t val);
+#else
+static inline void a64_translate_init(void)
+{
+}
+
+static inline void disas_a64_insn(CPUARMState *env, DisasContext *s)
+{
+}
+
+static inline void gen_a64_set_pc_im(uint64_t val)
+{
+}
+#endif
+
 #endif /* TARGET_ARM_TRANSLATE_H */
commit 3926cc8433542e8c9b7cdc438355fb7660838fd0
Author: Alexander Graf <agraf at suse.de>
Date:   Tue Sep 3 20:12:09 2013 +0100

    target-arm: Prepare translation for AArch64 code
    
    This patch adds all the prerequisites for AArch64 support that didn't
    fit into split up patches. It extends important bits in the core cpu
    headers to also take AArch64 mode into account.
    
    Add new ARM_TBFLAG_AARCH64_STATE translation buffer flag
    indicate an ARMv8 cpu running in aarch64 mode vs aarch32 mode.
    
    Signed-off-by: Alexander Graf <agraf at suse.de>
    Signed-off-by: John Rigby <john.rigby at linaro.org>
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>
    Message-id: 1378235544-22290-10-git-send-email-peter.maydell at linaro.org
    Message-id: 1368505980-17151-4-git-send-email-john.rigby at linaro.org
    [PMM:
     * rearranged tbflags so AArch64? is bit 31 and if it is set then
      30..0 are freely available for whatever makes most sense for that mode
     * added version bump since we change VFP migration state
     * added a comment about how VFP/Neon register state works
     * physical address space is 48 bits, not 64
     * added ARM_FEATURE_AARCH64 flag to identify 64-bit capable CPUs
    ]
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>

diff --git a/target-arm/cpu.c b/target-arm/cpu.c
index e8faadb..d40f2a7 100644
--- a/target-arm/cpu.c
+++ b/target-arm/cpu.c
@@ -84,6 +84,11 @@ static void arm_cpu_reset(CPUState *s)
         env->iwmmxt.cregs[ARM_IWMMXT_wCID] = 0x69051000 | 'Q';
     }
 
+    if (arm_feature(env, ARM_FEATURE_AARCH64)) {
+        /* 64 bit CPUs always start in 64 bit mode */
+        env->aarch64 = 1;
+    }
+
 #if defined(CONFIG_USER_ONLY)
     env->uncached_cpsr = ARM_CPU_MODE_USR;
     /* For user mode we must enable access to coprocessors */
@@ -834,6 +839,9 @@ static void arm_any_initfn(Object *obj)
     set_feature(&cpu->env, ARM_FEATURE_THUMB2EE);
     set_feature(&cpu->env, ARM_FEATURE_ARM_DIV);
     set_feature(&cpu->env, ARM_FEATURE_V7MP);
+#ifdef TARGET_AARCH64
+    set_feature(&cpu->env, ARM_FEATURE_AARCH64);
+#endif
     cpu->midr = 0xffffffff;
 }
 #endif
diff --git a/target-arm/cpu.h b/target-arm/cpu.h
index 29170d0..2c56740 100644
--- a/target-arm/cpu.h
+++ b/target-arm/cpu.h
@@ -19,13 +19,19 @@
 #ifndef CPU_ARM_H
 #define CPU_ARM_H
 
-#define TARGET_LONG_BITS 32
+#include "config.h"
 
-#define ELF_MACHINE	EM_ARM
+#if defined(TARGET_AARCH64)
+  /* AArch64 definitions */
+#  define TARGET_LONG_BITS 64
+#  define ELF_MACHINE EM_AARCH64
+#else
+#  define TARGET_LONG_BITS 32
+#  define ELF_MACHINE EM_ARM
+#endif
 
 #define CPUArchState struct CPUARMState
 
-#include "config.h"
 #include "qemu-common.h"
 #include "exec/cpu-defs.h"
 
@@ -97,6 +103,20 @@ typedef struct ARMGenericTimer {
 typedef struct CPUARMState {
     /* Regs for current mode.  */
     uint32_t regs[16];
+
+    /* 32/64 switch only happens when taking and returning from
+     * exceptions so the overlap semantics are taken care of then
+     * instead of having a complicated union.
+     */
+    /* Regs for A64 mode.  */
+    uint64_t xregs[32];
+    uint64_t pc;
+    /* TODO: pstate doesn't correspond to an architectural register;
+     * it would be better modelled as the underlying fields.
+     */
+    uint32_t pstate;
+    uint32_t aarch64; /* 1 if CPU is in aarch64 state; inverse of PSTATE.nRW */
+
     /* Frequently accessed CPSR bits are stored separately for efficiency.
        This contains all the other bits.  Use cpsr_{read,write} to access
        the whole CPSR.  */
@@ -175,6 +195,11 @@ typedef struct CPUARMState {
         uint32_t c15_power_control; /* power control */
     } cp15;
 
+    /* System registers (AArch64) */
+    struct {
+        uint64_t tpidr_el0;
+    } sr;
+
     struct {
         uint32_t other_sp;
         uint32_t vecbase;
@@ -191,7 +216,22 @@ typedef struct CPUARMState {
 
     /* VFP coprocessor state.  */
     struct {
-        float64 regs[32];
+        /* VFP/Neon register state. Note that the mapping between S, D and Q
+         * views of the register bank differs between AArch64 and AArch32:
+         * In AArch32:
+         *  Qn = regs[2n+1]:regs[2n]
+         *  Dn = regs[n]
+         *  Sn = regs[n/2] bits 31..0 for even n, and bits 63..32 for odd n
+         * (and regs[32] to regs[63] are inaccessible)
+         * In AArch64:
+         *  Qn = regs[2n+1]:regs[2n]
+         *  Dn = regs[2n]
+         *  Sn = regs[2n] bits 31..0
+         * This corresponds to the architecturally defined mapping between
+         * the two execution states, and means we do not need to explicitly
+         * map these registers when changing states.
+         */
+        float64 regs[64];
 
         uint32_t xregs[16];
         /* We store these fpcsr fields separately for convenience.  */
@@ -261,6 +301,20 @@ int bank_number(int mode);
 void switch_mode(CPUARMState *, int);
 uint32_t do_arm_semihosting(CPUARMState *env);
 
+static inline bool is_a64(CPUARMState *env)
+{
+    return env->aarch64;
+}
+
+#define PSTATE_N_SHIFT 3
+#define PSTATE_N  (1 << PSTATE_N_SHIFT)
+#define PSTATE_Z_SHIFT 2
+#define PSTATE_Z  (1 << PSTATE_Z_SHIFT)
+#define PSTATE_C_SHIFT 1
+#define PSTATE_C  (1 << PSTATE_C_SHIFT)
+#define PSTATE_V_SHIFT 0
+#define PSTATE_V  (1 << PSTATE_V_SHIFT)
+
 /* you can call this signal handler from your SIGBUS and SIGSEGV
    signal handlers to inform the virtual CPU of exceptions. non zero
    is returned if the signal was handled by the virtual CPU.  */
@@ -409,6 +463,7 @@ enum arm_features {
     ARM_FEATURE_PXN, /* has Privileged Execute Never bit */
     ARM_FEATURE_LPAE, /* has Large Physical Address Extension */
     ARM_FEATURE_V8,
+    ARM_FEATURE_AARCH64, /* supports 64 bit mode */
 };
 
 static inline int arm_feature(CPUARMState *env, int feature)
@@ -729,8 +784,13 @@ bool write_cpustate_to_list(ARMCPU *cpu);
 #define TARGET_PAGE_BITS 10
 #endif
 
-#define TARGET_PHYS_ADDR_SPACE_BITS 40
-#define TARGET_VIRT_ADDR_SPACE_BITS 32
+#if defined(TARGET_AARCH64)
+#  define TARGET_PHYS_ADDR_SPACE_BITS 48
+#  define TARGET_VIRT_ADDR_SPACE_BITS 64
+#else
+#  define TARGET_PHYS_ADDR_SPACE_BITS 40
+#  define TARGET_VIRT_ADDR_SPACE_BITS 32
+#endif
 
 static inline CPUARMState *cpu_init(const char *cpu_model)
 {
@@ -757,7 +817,13 @@ static inline int cpu_mmu_index (CPUARMState *env)
 
 #include "exec/cpu-all.h"
 
-/* Bit usage in the TB flags field: */
+/* Bit usage in the TB flags field: bit 31 indicates whether we are
+ * in 32 or 64 bit mode. The meaning of the other bits depends on that.
+ */
+#define ARM_TBFLAG_AARCH64_STATE_SHIFT 31
+#define ARM_TBFLAG_AARCH64_STATE_MASK  (1U << ARM_TBFLAG_AARCH64_STATE_SHIFT)
+
+/* Bit usage when in AArch32 state: */
 #define ARM_TBFLAG_THUMB_SHIFT      0
 #define ARM_TBFLAG_THUMB_MASK       (1 << ARM_TBFLAG_THUMB_SHIFT)
 #define ARM_TBFLAG_VECLEN_SHIFT     1
@@ -772,9 +838,12 @@ static inline int cpu_mmu_index (CPUARMState *env)
 #define ARM_TBFLAG_CONDEXEC_MASK    (0xff << ARM_TBFLAG_CONDEXEC_SHIFT)
 #define ARM_TBFLAG_BSWAP_CODE_SHIFT 16
 #define ARM_TBFLAG_BSWAP_CODE_MASK  (1 << ARM_TBFLAG_BSWAP_CODE_SHIFT)
-/* Bits 31..17 are currently unused. */
+
+/* Bit usage when in AArch64 state: currently no bits defined */
 
 /* some convenience accessor macros */
+#define ARM_TBFLAG_AARCH64_STATE(F) \
+    (((F) & ARM_TBFLAG_AARCH64_STATE_MASK) >> ARM_TBFLAG_AARCH64_STATE_SHIFT)
 #define ARM_TBFLAG_THUMB(F) \
     (((F) & ARM_TBFLAG_THUMB_MASK) >> ARM_TBFLAG_THUMB_SHIFT)
 #define ARM_TBFLAG_VECLEN(F) \
@@ -793,25 +862,31 @@ static inline int cpu_mmu_index (CPUARMState *env)
 static inline void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
                                         target_ulong *cs_base, int *flags)
 {
-    int privmode;
-    *pc = env->regs[15];
-    *cs_base = 0;
-    *flags = (env->thumb << ARM_TBFLAG_THUMB_SHIFT)
-        | (env->vfp.vec_len << ARM_TBFLAG_VECLEN_SHIFT)
-        | (env->vfp.vec_stride << ARM_TBFLAG_VECSTRIDE_SHIFT)
-        | (env->condexec_bits << ARM_TBFLAG_CONDEXEC_SHIFT)
-        | (env->bswap_code << ARM_TBFLAG_BSWAP_CODE_SHIFT);
-    if (arm_feature(env, ARM_FEATURE_M)) {
-        privmode = !((env->v7m.exception == 0) && (env->v7m.control & 1));
+    if (is_a64(env)) {
+        *pc = env->pc;
+        *flags = ARM_TBFLAG_AARCH64_STATE_MASK;
     } else {
-        privmode = (env->uncached_cpsr & CPSR_M) != ARM_CPU_MODE_USR;
-    }
-    if (privmode) {
-        *flags |= ARM_TBFLAG_PRIV_MASK;
-    }
-    if (env->vfp.xregs[ARM_VFP_FPEXC] & (1 << 30)) {
-        *flags |= ARM_TBFLAG_VFPEN_MASK;
+        int privmode;
+        *pc = env->regs[15];
+        *flags = (env->thumb << ARM_TBFLAG_THUMB_SHIFT)
+            | (env->vfp.vec_len << ARM_TBFLAG_VECLEN_SHIFT)
+            | (env->vfp.vec_stride << ARM_TBFLAG_VECSTRIDE_SHIFT)
+            | (env->condexec_bits << ARM_TBFLAG_CONDEXEC_SHIFT)
+            | (env->bswap_code << ARM_TBFLAG_BSWAP_CODE_SHIFT);
+        if (arm_feature(env, ARM_FEATURE_M)) {
+            privmode = !((env->v7m.exception == 0) && (env->v7m.control & 1));
+        } else {
+            privmode = (env->uncached_cpsr & CPSR_M) != ARM_CPU_MODE_USR;
+        }
+        if (privmode) {
+            *flags |= ARM_TBFLAG_PRIV_MASK;
+        }
+        if (env->vfp.xregs[ARM_VFP_FPEXC] & (1 << 30)) {
+            *flags |= ARM_TBFLAG_VFPEN_MASK;
+        }
     }
+
+    *cs_base = 0;
 }
 
 static inline bool cpu_has_work(CPUState *cpu)
@@ -822,6 +897,15 @@ static inline bool cpu_has_work(CPUState *cpu)
 
 #include "exec/exec-all.h"
 
+static inline void cpu_pc_from_tb(CPUARMState *env, TranslationBlock *tb)
+{
+    if (ARM_TBFLAG_AARCH64_STATE(tb->flags)) {
+        env->pc = tb->pc;
+    } else {
+        env->regs[15] = tb->pc;
+    }
+}
+
 /* Load an instruction and return it in the standard little-endian order */
 static inline uint32_t arm_ldl_code(CPUARMState *env, target_ulong addr,
                                     bool do_swap)
diff --git a/target-arm/machine.c b/target-arm/machine.c
index 5b6f375..74f010f 100644
--- a/target-arm/machine.c
+++ b/target-arm/machine.c
@@ -37,11 +37,11 @@ static const VMStateInfo vmstate_fpscr = {
 
 static const VMStateDescription vmstate_vfp = {
     .name = "cpu/vfp",
-    .version_id = 2,
-    .minimum_version_id = 2,
-    .minimum_version_id_old = 2,
+    .version_id = 3,
+    .minimum_version_id = 3,
+    .minimum_version_id_old = 3,
     .fields = (VMStateField[]) {
-        VMSTATE_FLOAT64_ARRAY(env.vfp.regs, ARMCPU, 32),
+        VMSTATE_FLOAT64_ARRAY(env.vfp.regs, ARMCPU, 64),
         /* The xregs array is a little awkward because element 1 (FPSCR)
          * requires a specific accessor, so we have to split it up in
          * the vmstate:
diff --git a/target-arm/translate.c b/target-arm/translate.c
index 2d8e0a5..db7a1d4 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -10012,16 +10012,32 @@ static inline void gen_intermediate_code_internal(ARMCPU *cpu,
     dc->pc = pc_start;
     dc->singlestep_enabled = cs->singlestep_enabled;
     dc->condjmp = 0;
-    dc->thumb = ARM_TBFLAG_THUMB(tb->flags);
-    dc->bswap_code = ARM_TBFLAG_BSWAP_CODE(tb->flags);
-    dc->condexec_mask = (ARM_TBFLAG_CONDEXEC(tb->flags) & 0xf) << 1;
-    dc->condexec_cond = ARM_TBFLAG_CONDEXEC(tb->flags) >> 4;
+
+    if (ARM_TBFLAG_AARCH64_STATE(tb->flags)) {
+        dc->aarch64 = 1;
+        dc->thumb = 0;
+        dc->bswap_code = 0;
+        dc->condexec_mask = 0;
+        dc->condexec_cond = 0;
+#if !defined(CONFIG_USER_ONLY)
+        dc->user = 0;
+#endif
+        dc->vfp_enabled = 0;
+        dc->vec_len = 0;
+        dc->vec_stride = 0;
+    } else {
+        dc->aarch64 = 0;
+        dc->thumb = ARM_TBFLAG_THUMB(tb->flags);
+        dc->bswap_code = ARM_TBFLAG_BSWAP_CODE(tb->flags);
+        dc->condexec_mask = (ARM_TBFLAG_CONDEXEC(tb->flags) & 0xf) << 1;
+        dc->condexec_cond = ARM_TBFLAG_CONDEXEC(tb->flags) >> 4;
 #if !defined(CONFIG_USER_ONLY)
-    dc->user = (ARM_TBFLAG_PRIV(tb->flags) == 0);
+        dc->user = (ARM_TBFLAG_PRIV(tb->flags) == 0);
 #endif
-    dc->vfp_enabled = ARM_TBFLAG_VFPEN(tb->flags);
-    dc->vec_len = ARM_TBFLAG_VECLEN(tb->flags);
-    dc->vec_stride = ARM_TBFLAG_VECSTRIDE(tb->flags);
+        dc->vfp_enabled = ARM_TBFLAG_VFPEN(tb->flags);
+        dc->vec_len = ARM_TBFLAG_VECLEN(tb->flags);
+        dc->vec_stride = ARM_TBFLAG_VECSTRIDE(tb->flags);
+    }
     cpu_F0s = tcg_temp_new_i32();
     cpu_F1s = tcg_temp_new_i32();
     cpu_F0d = tcg_temp_new_i64();
@@ -10324,6 +10340,10 @@ void arm_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf,
 
 void restore_state_to_opc(CPUARMState *env, TranslationBlock *tb, int pc_pos)
 {
-    env->regs[15] = tcg_ctx.gen_opc_pc[pc_pos];
+    if (is_a64(env)) {
+        env->pc = tcg_ctx.gen_opc_pc[pc_pos];
+    } else {
+        env->regs[15] = tcg_ctx.gen_opc_pc[pc_pos];
+    }
     env->condexec_bits = gen_opc_condexec_bits[pc_pos];
 }
diff --git a/target-arm/translate.h b/target-arm/translate.h
index 8ba1433..5be2eed 100644
--- a/target-arm/translate.h
+++ b/target-arm/translate.h
@@ -22,6 +22,7 @@ typedef struct DisasContext {
     int vfp_enabled;
     int vec_len;
     int vec_stride;
+    int aarch64;
 } DisasContext;
 
 extern TCGv_ptr cpu_env;
commit 15ee776bf2001883781cc83d456249a60532bb01
Author: Peter Maydell <peter.maydell at linaro.org>
Date:   Tue Sep 3 20:12:08 2013 +0100

    target-arm: Disable 32 bit CPUs in 64 bit linux-user builds
    
    If we're building aarch64-linux-user then the 32 bit CPUs are
    all unwanted, because they can't possibly execute the 64 bit
    binaries we will be running; disable them.
    
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>
    Message-id: 1378235544-22290-9-git-send-email-peter.maydell at linaro.org

diff --git a/target-arm/cpu.c b/target-arm/cpu.c
index 09206b5..e8faadb 100644
--- a/target-arm/cpu.c
+++ b/target-arm/cpu.c
@@ -288,8 +288,6 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
     acc->parent_realize(dev, errp);
 }
 
-/* CPU models */
-
 static ObjectClass *arm_cpu_class_by_name(const char *cpu_model)
 {
     ObjectClass *oc;
@@ -309,6 +307,9 @@ static ObjectClass *arm_cpu_class_by_name(const char *cpu_model)
     return oc;
 }
 
+/* CPU models. These are not needed for the AArch64 linux-user build. */
+#if !defined(CONFIG_USER_ONLY) || !defined(TARGET_AARCH64)
+
 static void arm926_initfn(Object *obj)
 {
     ARMCPU *cpu = ARM_CPU(obj);
@@ -837,6 +838,8 @@ static void arm_any_initfn(Object *obj)
 }
 #endif
 
+#endif /* !defined(CONFIG_USER_ONLY) || !defined(TARGET_AARCH64) */
+
 typedef struct ARMCPUInfo {
     const char *name;
     void (*initfn)(Object *obj);
@@ -844,6 +847,7 @@ typedef struct ARMCPUInfo {
 } ARMCPUInfo;
 
 static const ARMCPUInfo arm_cpus[] = {
+#if !defined(CONFIG_USER_ONLY) || !defined(TARGET_AARCH64)
     { .name = "arm926",      .initfn = arm926_initfn },
     { .name = "arm946",      .initfn = arm946_initfn },
     { .name = "arm1026",     .initfn = arm1026_initfn },
@@ -879,6 +883,7 @@ static const ARMCPUInfo arm_cpus[] = {
 #ifdef CONFIG_USER_ONLY
     { .name = "any",         .initfn = arm_any_initfn },
 #endif
+#endif
 };
 
 static void arm_cpu_class_init(ObjectClass *oc, void *data)
commit d14d42f19bf3dcef5c81ec2324843121f552a6fc
Author: Peter Maydell <peter.maydell at linaro.org>
Date:   Tue Sep 3 20:12:07 2013 +0100

    target-arm: Add new AArch64CPUInfo base class and subclasses
    
    Create a new AArch64CPU class; all 64-bit capable ARM
    CPUs are subclasses of this. (Currently we only support
    one, the "any" CPU used by linux-user.)
    
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>
    Message-id: 1378235544-22290-8-git-send-email-peter.maydell at linaro.org

diff --git a/target-arm/Makefile.objs b/target-arm/Makefile.objs
index 2d9f77f..baebc50 100644
--- a/target-arm/Makefile.objs
+++ b/target-arm/Makefile.objs
@@ -5,3 +5,4 @@ obj-$(CONFIG_NO_KVM) += kvm-stub.o
 obj-y += translate.o op_helper.o helper.o cpu.o
 obj-y += neon_helper.o iwmmxt_helper.o
 obj-y += gdbstub.o
+obj-$(TARGET_AARCH64) += cpu64.o
diff --git a/target-arm/cpu-qom.h b/target-arm/cpu-qom.h
index 9f47bae..fbe846e 100644
--- a/target-arm/cpu-qom.h
+++ b/target-arm/cpu-qom.h
@@ -130,6 +130,18 @@ typedef struct ARMCPU {
     uint32_t reset_auxcr;
 } ARMCPU;
 
+#define TYPE_AARCH64_CPU "aarch64-cpu"
+#define AARCH64_CPU_CLASS(klass) \
+    OBJECT_CLASS_CHECK(AArch64CPUClass, (klass), TYPE_AARCH64_CPU)
+#define AARCH64_CPU_GET_CLASS(obj) \
+    OBJECT_GET_CLASS(AArch64CPUClass, (obj), TYPE_AArch64_CPU)
+
+typedef struct AArch64CPUClass {
+    /*< private >*/
+    ARMCPUClass parent_class;
+    /*< public >*/
+} AArch64CPUClass;
+
 static inline ARMCPU *arm_env_get_cpu(CPUARMState *env)
 {
     return container_of(env, ARMCPU, env);
diff --git a/target-arm/cpu64.c b/target-arm/cpu64.c
new file mode 100644
index 0000000..faee0f0
--- /dev/null
+++ b/target-arm/cpu64.c
@@ -0,0 +1,111 @@
+/*
+ * QEMU AArch64 CPU
+ *
+ * Copyright (c) 2013 Linaro Ltd
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see
+ * <http://www.gnu.org/licenses/gpl-2.0.html>
+ */
+
+#include "cpu.h"
+#include "qemu-common.h"
+#if !defined(CONFIG_USER_ONLY)
+#include "hw/loader.h"
+#endif
+#include "hw/arm/arm.h"
+#include "sysemu/sysemu.h"
+#include "sysemu/kvm.h"
+
+static inline void set_feature(CPUARMState *env, int feature)
+{
+    env->features |= 1ULL << feature;
+}
+
+#ifdef CONFIG_USER_ONLY
+static void aarch64_any_initfn(Object *obj)
+{
+    ARMCPU *cpu = ARM_CPU(obj);
+
+    set_feature(&cpu->env, ARM_FEATURE_V8);
+    set_feature(&cpu->env, ARM_FEATURE_VFP4);
+    set_feature(&cpu->env, ARM_FEATURE_VFP_FP16);
+    set_feature(&cpu->env, ARM_FEATURE_NEON);
+    set_feature(&cpu->env, ARM_FEATURE_THUMB2EE);
+    set_feature(&cpu->env, ARM_FEATURE_ARM_DIV);
+    set_feature(&cpu->env, ARM_FEATURE_V7MP);
+    set_feature(&cpu->env, ARM_FEATURE_AARCH64);
+}
+#endif
+
+typedef struct ARMCPUInfo {
+    const char *name;
+    void (*initfn)(Object *obj);
+    void (*class_init)(ObjectClass *oc, void *data);
+} ARMCPUInfo;
+
+static const ARMCPUInfo aarch64_cpus[] = {
+#ifdef CONFIG_USER_ONLY
+    { .name = "any",         .initfn = aarch64_any_initfn },
+#endif
+};
+
+static void aarch64_cpu_initfn(Object *obj)
+{
+}
+
+static void aarch64_cpu_finalizefn(Object *obj)
+{
+}
+
+static void aarch64_cpu_class_init(ObjectClass *oc, void *data)
+{
+}
+
+static void aarch64_cpu_register(const ARMCPUInfo *info)
+{
+    TypeInfo type_info = {
+        .parent = TYPE_AARCH64_CPU,
+        .instance_size = sizeof(ARMCPU),
+        .instance_init = info->initfn,
+        .class_size = sizeof(ARMCPUClass),
+        .class_init = info->class_init,
+    };
+
+    type_info.name = g_strdup_printf("%s-" TYPE_ARM_CPU, info->name);
+    type_register(&type_info);
+    g_free((void *)type_info.name);
+}
+
+static const TypeInfo aarch64_cpu_type_info = {
+    .name = TYPE_AARCH64_CPU,
+    .parent = TYPE_ARM_CPU,
+    .instance_size = sizeof(ARMCPU),
+    .instance_init = aarch64_cpu_initfn,
+    .instance_finalize = aarch64_cpu_finalizefn,
+    .abstract = true,
+    .class_size = sizeof(AArch64CPUClass),
+    .class_init = aarch64_cpu_class_init,
+};
+
+static void aarch64_cpu_register_types(void)
+{
+    int i;
+
+    type_register_static(&aarch64_cpu_type_info);
+    for (i = 0; i < ARRAY_SIZE(aarch64_cpus); i++) {
+        aarch64_cpu_register(&aarch64_cpus[i]);
+    }
+}
+
+type_init(aarch64_cpu_register_types)
commit eaed129deaea393640cf6bff006cd5cec3b38d8f
Author: Peter Maydell <peter.maydell at linaro.org>
Date:   Tue Sep 3 20:12:06 2013 +0100

    target-arm: Pass DisasContext* to gen_set_pc_im()
    
    We want gen_set_pc_im() to work for both AArch64 and AArch32, but
    to do this we'll need the DisasContext* so we can tell which mode
    we're in, so pass it in as a parameter.
    
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>
    Message-id: 1378235544-22290-7-git-send-email-peter.maydell at linaro.org

diff --git a/target-arm/translate.c b/target-arm/translate.c
index ca411b3..2d8e0a5 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -905,7 +905,7 @@ DO_GEN_ST(st8)
 DO_GEN_ST(st16)
 DO_GEN_ST(st32)
 
-static inline void gen_set_pc_im(target_ulong val)
+static inline void gen_set_pc_im(DisasContext *s, target_ulong val)
 {
     tcg_gen_movi_i32(cpu_R[15], val);
 }
@@ -3420,10 +3420,10 @@ static inline void gen_goto_tb(DisasContext *s, int n, target_ulong dest)
     tb = s->tb;
     if ((tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK)) {
         tcg_gen_goto_tb(n);
-        gen_set_pc_im(dest);
+        gen_set_pc_im(s, dest);
         tcg_gen_exit_tb((uintptr_t)tb + n);
     } else {
-        gen_set_pc_im(dest);
+        gen_set_pc_im(s, dest);
         tcg_gen_exit_tb(0);
     }
 }
@@ -3552,7 +3552,7 @@ gen_set_condexec (DisasContext *s)
 static void gen_exception_insn(DisasContext *s, int offset, int excp)
 {
     gen_set_condexec(s);
-    gen_set_pc_im(s->pc - offset);
+    gen_set_pc_im(s, s->pc - offset);
     gen_exception(excp);
     s->is_jmp = DISAS_JUMP;
 }
@@ -3561,7 +3561,7 @@ static void gen_nop_hint(DisasContext *s, int val)
 {
     switch (val) {
     case 3: /* wfi */
-        gen_set_pc_im(s->pc);
+        gen_set_pc_im(s, s->pc);
         s->is_jmp = DISAS_WFI;
         break;
     case 2: /* wfe */
@@ -6338,7 +6338,7 @@ static int disas_coproc_insn(CPUARMState * env, DisasContext *s, uint32_t insn)
             if (isread) {
                 return 1;
             }
-            gen_set_pc_im(s->pc);
+            gen_set_pc_im(s, s->pc);
             s->is_jmp = DISAS_WFI;
             return 0;
         default:
@@ -6358,7 +6358,7 @@ static int disas_coproc_insn(CPUARMState * env, DisasContext *s, uint32_t insn)
                     tmp64 = tcg_const_i64(ri->resetvalue);
                 } else if (ri->readfn) {
                     TCGv_ptr tmpptr;
-                    gen_set_pc_im(s->pc);
+                    gen_set_pc_im(s, s->pc);
                     tmp64 = tcg_temp_new_i64();
                     tmpptr = tcg_const_ptr(ri);
                     gen_helper_get_cp_reg64(tmp64, cpu_env, tmpptr);
@@ -6381,7 +6381,7 @@ static int disas_coproc_insn(CPUARMState * env, DisasContext *s, uint32_t insn)
                     tmp = tcg_const_i32(ri->resetvalue);
                 } else if (ri->readfn) {
                     TCGv_ptr tmpptr;
-                    gen_set_pc_im(s->pc);
+                    gen_set_pc_im(s, s->pc);
                     tmp = tcg_temp_new_i32();
                     tmpptr = tcg_const_ptr(ri);
                     gen_helper_get_cp_reg(tmp, cpu_env, tmpptr);
@@ -6416,7 +6416,7 @@ static int disas_coproc_insn(CPUARMState * env, DisasContext *s, uint32_t insn)
                 tcg_temp_free_i32(tmphi);
                 if (ri->writefn) {
                     TCGv_ptr tmpptr = tcg_const_ptr(ri);
-                    gen_set_pc_im(s->pc);
+                    gen_set_pc_im(s, s->pc);
                     gen_helper_set_cp_reg64(cpu_env, tmpptr, tmp64);
                     tcg_temp_free_ptr(tmpptr);
                 } else {
@@ -6427,7 +6427,7 @@ static int disas_coproc_insn(CPUARMState * env, DisasContext *s, uint32_t insn)
                 if (ri->writefn) {
                     TCGv_i32 tmp;
                     TCGv_ptr tmpptr;
-                    gen_set_pc_im(s->pc);
+                    gen_set_pc_im(s, s->pc);
                     tmp = load_reg(s, rt);
                     tmpptr = tcg_const_ptr(ri);
                     gen_helper_set_cp_reg(cpu_env, tmpptr, tmp);
@@ -8036,7 +8036,7 @@ static void disas_arm_insn(CPUARMState * env, DisasContext *s)
             break;
         case 0xf:
             /* swi */
-            gen_set_pc_im(s->pc);
+            gen_set_pc_im(s, s->pc);
             s->is_jmp = DISAS_SWI;
             break;
         default:
@@ -9940,7 +9940,7 @@ static void disas_thumb_insn(CPUARMState *env, DisasContext *s)
 
         if (cond == 0xf) {
             /* swi */
-            gen_set_pc_im(s->pc);
+            gen_set_pc_im(s, s->pc);
             s->is_jmp = DISAS_SWI;
             break;
         }
@@ -10190,7 +10190,7 @@ static inline void gen_intermediate_code_internal(ARMCPU *cpu,
             gen_set_label(dc->condlabel);
         }
         if (dc->condjmp || !dc->is_jmp) {
-            gen_set_pc_im(dc->pc);
+            gen_set_pc_im(dc, dc->pc);
             dc->condjmp = 0;
         }
         gen_set_condexec(dc);
commit 0a2461fa49e4d2aeb846390e1eb1bdb9e8196ca4
Author: Alexander Graf <agraf at suse.de>
Date:   Tue Sep 3 20:12:05 2013 +0100

    target-arm: Fix target_ulong/uint32_t confusions
    
    Correct a few places that were using uint32_t or a 32 bit
    only format string to handle something that should be a target_ulong.
    
    Signed-off-by: Alexander Graf <agraf at suse.de>
    Signed-off-by: John Rigby <john.rigby at linaro.org>
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>
    Message-id: 1378235544-22290-6-git-send-email-peter.maydell at linaro.org
    [PMM: split out to separate patch; added gen_goto_tb() and
    gen_set_pc_im() dest params to list of things to change.]
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>

diff --git a/target-arm/cpu.h b/target-arm/cpu.h
index af7cf8a..29170d0 100644
--- a/target-arm/cpu.h
+++ b/target-arm/cpu.h
@@ -823,7 +823,7 @@ static inline bool cpu_has_work(CPUState *cpu)
 #include "exec/exec-all.h"
 
 /* Load an instruction and return it in the standard little-endian order */
-static inline uint32_t arm_ldl_code(CPUARMState *env, uint32_t addr,
+static inline uint32_t arm_ldl_code(CPUARMState *env, target_ulong addr,
                                     bool do_swap)
 {
     uint32_t insn = cpu_ldl_code(env, addr);
@@ -834,7 +834,7 @@ static inline uint32_t arm_ldl_code(CPUARMState *env, uint32_t addr,
 }
 
 /* Ditto, for a halfword (Thumb) instruction */
-static inline uint16_t arm_lduw_code(CPUARMState *env, uint32_t addr,
+static inline uint16_t arm_lduw_code(CPUARMState *env, target_ulong addr,
                                      bool do_swap)
 {
     uint16_t insn = cpu_lduw_code(env, addr);
diff --git a/target-arm/translate.c b/target-arm/translate.c
index 2605833..ca411b3 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -905,7 +905,7 @@ DO_GEN_ST(st8)
 DO_GEN_ST(st16)
 DO_GEN_ST(st32)
 
-static inline void gen_set_pc_im(uint32_t val)
+static inline void gen_set_pc_im(target_ulong val)
 {
     tcg_gen_movi_i32(cpu_R[15], val);
 }
@@ -3413,7 +3413,7 @@ static int disas_vfp_insn(CPUARMState * env, DisasContext *s, uint32_t insn)
     return 0;
 }
 
-static inline void gen_goto_tb(DisasContext *s, int n, uint32_t dest)
+static inline void gen_goto_tb(DisasContext *s, int n, target_ulong dest)
 {
     TranslationBlock *tb;
 
@@ -9997,7 +9997,7 @@ static inline void gen_intermediate_code_internal(ARMCPU *cpu,
     uint16_t *gen_opc_end;
     int j, lj;
     target_ulong pc_start;
-    uint32_t next_page_start;
+    target_ulong next_page_start;
     int num_insns;
     int max_insns;
 
@@ -10151,7 +10151,8 @@ static inline void gen_intermediate_code_internal(ARMCPU *cpu,
         }
 
         if (tcg_check_temp_count()) {
-            fprintf(stderr, "TCG temporary leak before %08x\n", dc->pc);
+            fprintf(stderr, "TCG temporary leak before "TARGET_FMT_lx"\n",
+                    dc->pc);
         }
 
         /* Translation stops when a conditional branch is encountered.
commit 3407ad0e7a6f04905fc6a8ea72be03553e777988
Author: Alexander Graf <agraf at suse.de>
Date:   Tue Sep 3 20:12:04 2013 +0100

    target-arm: Export cpu_env
    
    The cpu_env tcg variable will be used by both the AArch32 and AArch64
    handling code. Unstaticify it, so that both sides can make use of it.
    
    Signed-off-by: Alexander Graf <agraf at suse.de>
    Signed-off-by: John Rigby <john.rigby at linaro.org>
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>
    Message-id: 1378235544-22290-5-git-send-email-peter.maydell at linaro.org
    Message-id: 1368505980-17151-3-git-send-email-john.rigby at linaro.org
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>

diff --git a/target-arm/translate.c b/target-arm/translate.c
index 450a0b6..2605833 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -61,7 +61,7 @@ static uint32_t gen_opc_condexec_bits[OPC_BUF_SIZE];
 #define DISAS_WFI 4
 #define DISAS_SWI 5
 
-static TCGv_ptr cpu_env;
+TCGv_ptr cpu_env;
 /* We reuse the same 64-bit temporaries for efficiency.  */
 static TCGv_i64 cpu_V0, cpu_V1, cpu_M0;
 static TCGv_i32 cpu_R[16];
diff --git a/target-arm/translate.h b/target-arm/translate.h
index e727bc6..8ba1433 100644
--- a/target-arm/translate.h
+++ b/target-arm/translate.h
@@ -24,4 +24,6 @@ typedef struct DisasContext {
     int vec_stride;
 } DisasContext;
 
+extern TCGv_ptr cpu_env;
+
 #endif /* TARGET_ARM_TRANSLATE_H */
commit f570c61e694d78fc2f6717f4fbb7e820bf72d8dc
Author: Alexander Graf <agraf at suse.de>
Date:   Tue Sep 3 20:12:03 2013 +0100

    target-arm: Extract the disas struct to a header file
    
    We will need to share the disassembly status struct between AArch32 and
    AArch64 modes. So put it into a header file that both sides can use.
    
    Signed-off-by: Alexander Graf <agraf at suse.de>
    Signed-off-by: John Rigby <john.rigby at linaro.org>
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>
    Message-id: 1378235544-22290-4-git-send-email-peter.maydell at linaro.org
    Message-id: 1368505980-17151-2-git-send-email-john.rigby at linaro.org
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>

diff --git a/target-arm/translate.c b/target-arm/translate.c
index ad499b6..450a0b6 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -47,29 +47,7 @@
 
 #define ARCH(x) do { if (!ENABLE_ARCH_##x) goto illegal_op; } while(0)
 
-/* internal defines */
-typedef struct DisasContext {
-    target_ulong pc;
-    int is_jmp;
-    /* Nonzero if this instruction has been conditionally skipped.  */
-    int condjmp;
-    /* The label that will be jumped to when the instruction is skipped.  */
-    int condlabel;
-    /* Thumb-2 conditional execution bits.  */
-    int condexec_mask;
-    int condexec_cond;
-    struct TranslationBlock *tb;
-    int singlestep_enabled;
-    int thumb;
-    int bswap_code;
-#if !defined(CONFIG_USER_ONLY)
-    int user;
-#endif
-    int vfp_enabled;
-    int vec_len;
-    int vec_stride;
-} DisasContext;
-
+#include "translate.h"
 static uint32_t gen_opc_condexec_bits[OPC_BUF_SIZE];
 
 #if defined(CONFIG_USER_ONLY)
diff --git a/target-arm/translate.h b/target-arm/translate.h
new file mode 100644
index 0000000..e727bc6
--- /dev/null
+++ b/target-arm/translate.h
@@ -0,0 +1,27 @@
+#ifndef TARGET_ARM_TRANSLATE_H
+#define TARGET_ARM_TRANSLATE_H
+
+/* internal defines */
+typedef struct DisasContext {
+    target_ulong pc;
+    int is_jmp;
+    /* Nonzero if this instruction has been conditionally skipped.  */
+    int condjmp;
+    /* The label that will be jumped to when the instruction is skipped.  */
+    int condlabel;
+    /* Thumb-2 conditional execution bits.  */
+    int condexec_mask;
+    int condexec_cond;
+    struct TranslationBlock *tb;
+    int singlestep_enabled;
+    int thumb;
+    int bswap_code;
+#if !defined(CONFIG_USER_ONLY)
+    int user;
+#endif
+    int vfp_enabled;
+    int vec_len;
+    int vec_stride;
+} DisasContext;
+
+#endif /* TARGET_ARM_TRANSLATE_H */
commit 08307563ff6cf8cb8d2a7927804dfc5c7dbe86d6
Author: Peter Maydell <peter.maydell at linaro.org>
Date:   Tue Sep 3 20:12:02 2013 +0100

    target-arm: Abstract out load/store from a vaddr in AArch32
    
    AArch32 code (ie traditional 32 bit world) expects to be
    able to pass a vaddr in a TCGv_i32. However when QEMU is
    compiled with TARGET_LONG_BITS=32 the TCG load/store
    functions take a TCGv_i64. Abstract out load/store with
    a 32 bit vaddr so we have a place to put the zero extension
    of the vaddr and the extension/truncation of the data value.
    
    Apart from the function definitions most of this patch is
    a simple s/tcg_gen_qemu_/gen_aa32_/.
    
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>
    Message-id: 1378235544-22290-3-git-send-email-peter.maydell at linaro.org

diff --git a/target-arm/translate.c b/target-arm/translate.c
index 8bcfaf3..ad499b6 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -843,6 +843,90 @@ static inline void store_reg_from_load(CPUARMState *env, DisasContext *s,
     }
 }
 
+/* Abstractions of "generate code to do a guest load/store for
+ * AArch32", where a vaddr is always 32 bits (and is zero
+ * extended if we're a 64 bit core) and  data is also
+ * 32 bits unless specifically doing a 64 bit access.
+ * These functions work like tcg_gen_qemu_{ld,st}* except
+ * that their arguments are TCGv_i32 rather than TCGv.
+ */
+#if TARGET_LONG_BITS == 32
+
+#define DO_GEN_LD(OP)                                                    \
+static inline void gen_aa32_##OP(TCGv_i32 val, TCGv_i32 addr, int index) \
+{                                                                        \
+    tcg_gen_qemu_##OP(val, addr, index);                                 \
+}
+
+#define DO_GEN_ST(OP)                                                    \
+static inline void gen_aa32_##OP(TCGv_i32 val, TCGv_i32 addr, int index) \
+{                                                                        \
+    tcg_gen_qemu_##OP(val, addr, index);                                 \
+}
+
+static inline void gen_aa32_ld64(TCGv_i64 val, TCGv_i32 addr, int index)
+{
+    tcg_gen_qemu_ld64(val, addr, index);
+}
+
+static inline void gen_aa32_st64(TCGv_i64 val, TCGv_i32 addr, int index)
+{
+    tcg_gen_qemu_st64(val, addr, index);
+}
+
+#else
+
+#define DO_GEN_LD(OP)                                                    \
+static inline void gen_aa32_##OP(TCGv_i32 val, TCGv_i32 addr, int index) \
+{                                                                        \
+    TCGv addr64 = tcg_temp_new();                                        \
+    TCGv val64 = tcg_temp_new();                                         \
+    tcg_gen_extu_i32_i64(addr64, addr);                                  \
+    tcg_gen_qemu_##OP(val64, addr64, index);                             \
+    tcg_temp_free(addr64);                                               \
+    tcg_gen_trunc_i64_i32(val, val64);                                   \
+    tcg_temp_free(val64);                                                \
+}
+
+#define DO_GEN_ST(OP)                                                    \
+static inline void gen_aa32_##OP(TCGv_i32 val, TCGv_i32 addr, int index) \
+{                                                                        \
+    TCGv addr64 = tcg_temp_new();                                        \
+    TCGv val64 = tcg_temp_new();                                         \
+    tcg_gen_extu_i32_i64(addr64, addr);                                  \
+    tcg_gen_extu_i32_i64(val64, val);                                    \
+    tcg_gen_qemu_##OP(val64, addr64, index);                             \
+    tcg_temp_free(addr64);                                               \
+    tcg_temp_free(val64);                                                \
+}
+
+static inline void gen_aa32_ld64(TCGv_i64 val, TCGv_i32 addr, int index)
+{
+    TCGv addr64 = tcg_temp_new();
+    tcg_gen_extu_i32_i64(addr64, addr);
+    tcg_gen_qemu_ld64(val, addr64, index);
+    tcg_temp_free(addr64);
+}
+
+static inline void gen_aa32_st64(TCGv_i64 val, TCGv_i32 addr, int index)
+{
+    TCGv addr64 = tcg_temp_new();
+    tcg_gen_extu_i32_i64(addr64, addr);
+    tcg_gen_qemu_st64(val, addr64, index);
+    tcg_temp_free(addr64);
+}
+
+#endif
+
+DO_GEN_LD(ld8s)
+DO_GEN_LD(ld8u)
+DO_GEN_LD(ld16s)
+DO_GEN_LD(ld16u)
+DO_GEN_LD(ld32u)
+DO_GEN_ST(st8)
+DO_GEN_ST(st16)
+DO_GEN_ST(st32)
+
 static inline void gen_set_pc_im(uint32_t val)
 {
     tcg_gen_movi_i32(cpu_R[15], val);
@@ -1072,18 +1156,20 @@ VFP_GEN_FIX(ulto)
 
 static inline void gen_vfp_ld(DisasContext *s, int dp, TCGv_i32 addr)
 {
-    if (dp)
-        tcg_gen_qemu_ld64(cpu_F0d, addr, IS_USER(s));
-    else
-        tcg_gen_qemu_ld32u(cpu_F0s, addr, IS_USER(s));
+    if (dp) {
+        gen_aa32_ld64(cpu_F0d, addr, IS_USER(s));
+    } else {
+        gen_aa32_ld32u(cpu_F0s, addr, IS_USER(s));
+    }
 }
 
 static inline void gen_vfp_st(DisasContext *s, int dp, TCGv_i32 addr)
 {
-    if (dp)
-        tcg_gen_qemu_st64(cpu_F0d, addr, IS_USER(s));
-    else
-        tcg_gen_qemu_st32(cpu_F0s, addr, IS_USER(s));
+    if (dp) {
+        gen_aa32_st64(cpu_F0d, addr, IS_USER(s));
+    } else {
+        gen_aa32_st32(cpu_F0s, addr, IS_USER(s));
+    }
 }
 
 static inline long
@@ -1420,24 +1506,24 @@ static int disas_iwmmxt_insn(CPUARMState *env, DisasContext *s, uint32_t insn)
         if (insn & ARM_CP_RW_BIT) {
             if ((insn >> 28) == 0xf) {			/* WLDRW wCx */
                 tmp = tcg_temp_new_i32();
-                tcg_gen_qemu_ld32u(tmp, addr, IS_USER(s));
+                gen_aa32_ld32u(tmp, addr, IS_USER(s));
                 iwmmxt_store_creg(wrd, tmp);
             } else {
                 i = 1;
                 if (insn & (1 << 8)) {
                     if (insn & (1 << 22)) {		/* WLDRD */
-                        tcg_gen_qemu_ld64(cpu_M0, addr, IS_USER(s));
+                        gen_aa32_ld64(cpu_M0, addr, IS_USER(s));
                         i = 0;
                     } else {				/* WLDRW wRd */
                         tmp = tcg_temp_new_i32();
-                        tcg_gen_qemu_ld32u(tmp, addr, IS_USER(s));
+                        gen_aa32_ld32u(tmp, addr, IS_USER(s));
                     }
                 } else {
                     tmp = tcg_temp_new_i32();
                     if (insn & (1 << 22)) {		/* WLDRH */
-                        tcg_gen_qemu_ld16u(tmp, addr, IS_USER(s));
+                        gen_aa32_ld16u(tmp, addr, IS_USER(s));
                     } else {				/* WLDRB */
-                        tcg_gen_qemu_ld8u(tmp, addr, IS_USER(s));
+                        gen_aa32_ld8u(tmp, addr, IS_USER(s));
                     }
                 }
                 if (i) {
@@ -1449,24 +1535,24 @@ static int disas_iwmmxt_insn(CPUARMState *env, DisasContext *s, uint32_t insn)
         } else {
             if ((insn >> 28) == 0xf) {			/* WSTRW wCx */
                 tmp = iwmmxt_load_creg(wrd);
-                tcg_gen_qemu_st32(tmp, addr, IS_USER(s));
+                gen_aa32_st32(tmp, addr, IS_USER(s));
             } else {
                 gen_op_iwmmxt_movq_M0_wRn(wrd);
                 tmp = tcg_temp_new_i32();
                 if (insn & (1 << 8)) {
                     if (insn & (1 << 22)) {		/* WSTRD */
-                        tcg_gen_qemu_st64(cpu_M0, addr, IS_USER(s));
+                        gen_aa32_st64(cpu_M0, addr, IS_USER(s));
                     } else {				/* WSTRW wRd */
                         tcg_gen_trunc_i64_i32(tmp, cpu_M0);
-                        tcg_gen_qemu_st32(tmp, addr, IS_USER(s));
+                        gen_aa32_st32(tmp, addr, IS_USER(s));
                     }
                 } else {
                     if (insn & (1 << 22)) {		/* WSTRH */
                         tcg_gen_trunc_i64_i32(tmp, cpu_M0);
-                        tcg_gen_qemu_st16(tmp, addr, IS_USER(s));
+                        gen_aa32_st16(tmp, addr, IS_USER(s));
                     } else {				/* WSTRB */
                         tcg_gen_trunc_i64_i32(tmp, cpu_M0);
-                        tcg_gen_qemu_st8(tmp, addr, IS_USER(s));
+                        gen_aa32_st8(tmp, addr, IS_USER(s));
                     }
                 }
             }
@@ -2531,15 +2617,15 @@ static TCGv_i32 gen_load_and_replicate(DisasContext *s, TCGv_i32 addr, int size)
     TCGv_i32 tmp = tcg_temp_new_i32();
     switch (size) {
     case 0:
-        tcg_gen_qemu_ld8u(tmp, addr, IS_USER(s));
+        gen_aa32_ld8u(tmp, addr, IS_USER(s));
         gen_neon_dup_u8(tmp, 0);
         break;
     case 1:
-        tcg_gen_qemu_ld16u(tmp, addr, IS_USER(s));
+        gen_aa32_ld16u(tmp, addr, IS_USER(s));
         gen_neon_dup_low16(tmp);
         break;
     case 2:
-        tcg_gen_qemu_ld32u(tmp, addr, IS_USER(s));
+        gen_aa32_ld32u(tmp, addr, IS_USER(s));
         break;
     default: /* Avoid compiler warnings.  */
         abort();
@@ -3817,11 +3903,11 @@ static int disas_neon_ls_insn(CPUARMState * env, DisasContext *s, uint32_t insn)
             if (size == 3) {
                 tmp64 = tcg_temp_new_i64();
                 if (load) {
-                    tcg_gen_qemu_ld64(tmp64, addr, IS_USER(s));
+                    gen_aa32_ld64(tmp64, addr, IS_USER(s));
                     neon_store_reg64(tmp64, rd);
                 } else {
                     neon_load_reg64(tmp64, rd);
-                    tcg_gen_qemu_st64(tmp64, addr, IS_USER(s));
+                    gen_aa32_st64(tmp64, addr, IS_USER(s));
                 }
                 tcg_temp_free_i64(tmp64);
                 tcg_gen_addi_i32(addr, addr, stride);
@@ -3830,21 +3916,21 @@ static int disas_neon_ls_insn(CPUARMState * env, DisasContext *s, uint32_t insn)
                     if (size == 2) {
                         if (load) {
                             tmp = tcg_temp_new_i32();
-                            tcg_gen_qemu_ld32u(tmp, addr, IS_USER(s));
+                            gen_aa32_ld32u(tmp, addr, IS_USER(s));
                             neon_store_reg(rd, pass, tmp);
                         } else {
                             tmp = neon_load_reg(rd, pass);
-                            tcg_gen_qemu_st32(tmp, addr, IS_USER(s));
+                            gen_aa32_st32(tmp, addr, IS_USER(s));
                             tcg_temp_free_i32(tmp);
                         }
                         tcg_gen_addi_i32(addr, addr, stride);
                     } else if (size == 1) {
                         if (load) {
                             tmp = tcg_temp_new_i32();
-                            tcg_gen_qemu_ld16u(tmp, addr, IS_USER(s));
+                            gen_aa32_ld16u(tmp, addr, IS_USER(s));
                             tcg_gen_addi_i32(addr, addr, stride);
                             tmp2 = tcg_temp_new_i32();
-                            tcg_gen_qemu_ld16u(tmp2, addr, IS_USER(s));
+                            gen_aa32_ld16u(tmp2, addr, IS_USER(s));
                             tcg_gen_addi_i32(addr, addr, stride);
                             tcg_gen_shli_i32(tmp2, tmp2, 16);
                             tcg_gen_or_i32(tmp, tmp, tmp2);
@@ -3854,10 +3940,10 @@ static int disas_neon_ls_insn(CPUARMState * env, DisasContext *s, uint32_t insn)
                             tmp = neon_load_reg(rd, pass);
                             tmp2 = tcg_temp_new_i32();
                             tcg_gen_shri_i32(tmp2, tmp, 16);
-                            tcg_gen_qemu_st16(tmp, addr, IS_USER(s));
+                            gen_aa32_st16(tmp, addr, IS_USER(s));
                             tcg_temp_free_i32(tmp);
                             tcg_gen_addi_i32(addr, addr, stride);
-                            tcg_gen_qemu_st16(tmp2, addr, IS_USER(s));
+                            gen_aa32_st16(tmp2, addr, IS_USER(s));
                             tcg_temp_free_i32(tmp2);
                             tcg_gen_addi_i32(addr, addr, stride);
                         }
@@ -3866,7 +3952,7 @@ static int disas_neon_ls_insn(CPUARMState * env, DisasContext *s, uint32_t insn)
                             TCGV_UNUSED_I32(tmp2);
                             for (n = 0; n < 4; n++) {
                                 tmp = tcg_temp_new_i32();
-                                tcg_gen_qemu_ld8u(tmp, addr, IS_USER(s));
+                                gen_aa32_ld8u(tmp, addr, IS_USER(s));
                                 tcg_gen_addi_i32(addr, addr, stride);
                                 if (n == 0) {
                                     tmp2 = tmp;
@@ -3886,7 +3972,7 @@ static int disas_neon_ls_insn(CPUARMState * env, DisasContext *s, uint32_t insn)
                                 } else {
                                     tcg_gen_shri_i32(tmp, tmp2, n * 8);
                                 }
-                                tcg_gen_qemu_st8(tmp, addr, IS_USER(s));
+                                gen_aa32_st8(tmp, addr, IS_USER(s));
                                 tcg_temp_free_i32(tmp);
                                 tcg_gen_addi_i32(addr, addr, stride);
                             }
@@ -4010,13 +4096,13 @@ static int disas_neon_ls_insn(CPUARMState * env, DisasContext *s, uint32_t insn)
                     tmp = tcg_temp_new_i32();
                     switch (size) {
                     case 0:
-                        tcg_gen_qemu_ld8u(tmp, addr, IS_USER(s));
+                        gen_aa32_ld8u(tmp, addr, IS_USER(s));
                         break;
                     case 1:
-                        tcg_gen_qemu_ld16u(tmp, addr, IS_USER(s));
+                        gen_aa32_ld16u(tmp, addr, IS_USER(s));
                         break;
                     case 2:
-                        tcg_gen_qemu_ld32u(tmp, addr, IS_USER(s));
+                        gen_aa32_ld32u(tmp, addr, IS_USER(s));
                         break;
                     default: /* Avoid compiler warnings.  */
                         abort();
@@ -4034,13 +4120,13 @@ static int disas_neon_ls_insn(CPUARMState * env, DisasContext *s, uint32_t insn)
                         tcg_gen_shri_i32(tmp, tmp, shift);
                     switch (size) {
                     case 0:
-                        tcg_gen_qemu_st8(tmp, addr, IS_USER(s));
+                        gen_aa32_st8(tmp, addr, IS_USER(s));
                         break;
                     case 1:
-                        tcg_gen_qemu_st16(tmp, addr, IS_USER(s));
+                        gen_aa32_st16(tmp, addr, IS_USER(s));
                         break;
                     case 2:
-                        tcg_gen_qemu_st32(tmp, addr, IS_USER(s));
+                        gen_aa32_st32(tmp, addr, IS_USER(s));
                         break;
                     }
                     tcg_temp_free_i32(tmp);
@@ -6464,14 +6550,14 @@ static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
 
     switch (size) {
     case 0:
-        tcg_gen_qemu_ld8u(tmp, addr, IS_USER(s));
+        gen_aa32_ld8u(tmp, addr, IS_USER(s));
         break;
     case 1:
-        tcg_gen_qemu_ld16u(tmp, addr, IS_USER(s));
+        gen_aa32_ld16u(tmp, addr, IS_USER(s));
         break;
     case 2:
     case 3:
-        tcg_gen_qemu_ld32u(tmp, addr, IS_USER(s));
+        gen_aa32_ld32u(tmp, addr, IS_USER(s));
         break;
     default:
         abort();
@@ -6482,7 +6568,7 @@ static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
         TCGv_i32 tmp2 = tcg_temp_new_i32();
         tcg_gen_addi_i32(tmp2, addr, 4);
         tmp = tcg_temp_new_i32();
-        tcg_gen_qemu_ld32u(tmp, tmp2, IS_USER(s));
+        gen_aa32_ld32u(tmp, tmp2, IS_USER(s));
         tcg_temp_free_i32(tmp2);
         tcg_gen_mov_i32(cpu_exclusive_high, tmp);
         store_reg(s, rt2, tmp);
@@ -6524,14 +6610,14 @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
     tmp = tcg_temp_new_i32();
     switch (size) {
     case 0:
-        tcg_gen_qemu_ld8u(tmp, addr, IS_USER(s));
+        gen_aa32_ld8u(tmp, addr, IS_USER(s));
         break;
     case 1:
-        tcg_gen_qemu_ld16u(tmp, addr, IS_USER(s));
+        gen_aa32_ld16u(tmp, addr, IS_USER(s));
         break;
     case 2:
     case 3:
-        tcg_gen_qemu_ld32u(tmp, addr, IS_USER(s));
+        gen_aa32_ld32u(tmp, addr, IS_USER(s));
         break;
     default:
         abort();
@@ -6542,7 +6628,7 @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
         TCGv_i32 tmp2 = tcg_temp_new_i32();
         tcg_gen_addi_i32(tmp2, addr, 4);
         tmp = tcg_temp_new_i32();
-        tcg_gen_qemu_ld32u(tmp, tmp2, IS_USER(s));
+        gen_aa32_ld32u(tmp, tmp2, IS_USER(s));
         tcg_temp_free_i32(tmp2);
         tcg_gen_brcond_i32(TCG_COND_NE, tmp, cpu_exclusive_high, fail_label);
         tcg_temp_free_i32(tmp);
@@ -6550,14 +6636,14 @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
     tmp = load_reg(s, rt);
     switch (size) {
     case 0:
-        tcg_gen_qemu_st8(tmp, addr, IS_USER(s));
+        gen_aa32_st8(tmp, addr, IS_USER(s));
         break;
     case 1:
-        tcg_gen_qemu_st16(tmp, addr, IS_USER(s));
+        gen_aa32_st16(tmp, addr, IS_USER(s));
         break;
     case 2:
     case 3:
-        tcg_gen_qemu_st32(tmp, addr, IS_USER(s));
+        gen_aa32_st32(tmp, addr, IS_USER(s));
         break;
     default:
         abort();
@@ -6566,7 +6652,7 @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
     if (size == 3) {
         tcg_gen_addi_i32(addr, addr, 4);
         tmp = load_reg(s, rt2);
-        tcg_gen_qemu_st32(tmp, addr, IS_USER(s));
+        gen_aa32_st32(tmp, addr, IS_USER(s));
         tcg_temp_free_i32(tmp);
     }
     tcg_gen_movi_i32(cpu_R[rd], 0);
@@ -6613,11 +6699,11 @@ static void gen_srs(DisasContext *s,
     }
     tcg_gen_addi_i32(addr, addr, offset);
     tmp = load_reg(s, 14);
-    tcg_gen_qemu_st32(tmp, addr, 0);
+    gen_aa32_st32(tmp, addr, 0);
     tcg_temp_free_i32(tmp);
     tmp = load_cpu_field(spsr);
     tcg_gen_addi_i32(addr, addr, 4);
-    tcg_gen_qemu_st32(tmp, addr, 0);
+    gen_aa32_st32(tmp, addr, 0);
     tcg_temp_free_i32(tmp);
     if (writeback) {
         switch (amode) {
@@ -6763,10 +6849,10 @@ static void disas_arm_insn(CPUARMState * env, DisasContext *s)
                 tcg_gen_addi_i32(addr, addr, offset);
             /* Load PC into tmp and CPSR into tmp2.  */
             tmp = tcg_temp_new_i32();
-            tcg_gen_qemu_ld32u(tmp, addr, 0);
+            gen_aa32_ld32u(tmp, addr, 0);
             tcg_gen_addi_i32(addr, addr, 4);
             tmp2 = tcg_temp_new_i32();
-            tcg_gen_qemu_ld32u(tmp2, addr, 0);
+            gen_aa32_ld32u(tmp2, addr, 0);
             if (insn & (1 << 21)) {
                 /* Base writeback.  */
                 switch (i) {
@@ -7322,13 +7408,13 @@ static void disas_arm_insn(CPUARMState * env, DisasContext *s)
                                 tmp = tcg_temp_new_i32();
                                 switch (op1) {
                                 case 0: /* lda */
-                                    tcg_gen_qemu_ld32u(tmp, addr, IS_USER(s));
+                                    gen_aa32_ld32u(tmp, addr, IS_USER(s));
                                     break;
                                 case 2: /* ldab */
-                                    tcg_gen_qemu_ld8u(tmp, addr, IS_USER(s));
+                                    gen_aa32_ld8u(tmp, addr, IS_USER(s));
                                     break;
                                 case 3: /* ldah */
-                                    tcg_gen_qemu_ld16u(tmp, addr, IS_USER(s));
+                                    gen_aa32_ld16u(tmp, addr, IS_USER(s));
                                     break;
                                 default:
                                     abort();
@@ -7339,13 +7425,13 @@ static void disas_arm_insn(CPUARMState * env, DisasContext *s)
                                 tmp = load_reg(s, rm);
                                 switch (op1) {
                                 case 0: /* stl */
-                                    tcg_gen_qemu_st32(tmp, addr, IS_USER(s));
+                                    gen_aa32_st32(tmp, addr, IS_USER(s));
                                     break;
                                 case 2: /* stlb */
-                                    tcg_gen_qemu_st8(tmp, addr, IS_USER(s));
+                                    gen_aa32_st8(tmp, addr, IS_USER(s));
                                     break;
                                 case 3: /* stlh */
-                                    tcg_gen_qemu_st16(tmp, addr, IS_USER(s));
+                                    gen_aa32_st16(tmp, addr, IS_USER(s));
                                     break;
                                 default:
                                     abort();
@@ -7400,11 +7486,11 @@ static void disas_arm_insn(CPUARMState * env, DisasContext *s)
                         tmp = load_reg(s, rm);
                         tmp2 = tcg_temp_new_i32();
                         if (insn & (1 << 22)) {
-                            tcg_gen_qemu_ld8u(tmp2, addr, IS_USER(s));
-                            tcg_gen_qemu_st8(tmp, addr, IS_USER(s));
+                            gen_aa32_ld8u(tmp2, addr, IS_USER(s));
+                            gen_aa32_st8(tmp, addr, IS_USER(s));
                         } else {
-                            tcg_gen_qemu_ld32u(tmp2, addr, IS_USER(s));
-                            tcg_gen_qemu_st32(tmp, addr, IS_USER(s));
+                            gen_aa32_ld32u(tmp2, addr, IS_USER(s));
+                            gen_aa32_st32(tmp, addr, IS_USER(s));
                         }
                         tcg_temp_free_i32(tmp);
                         tcg_temp_free_i32(addr);
@@ -7426,14 +7512,14 @@ static void disas_arm_insn(CPUARMState * env, DisasContext *s)
                     tmp = tcg_temp_new_i32();
                     switch(sh) {
                     case 1:
-                        tcg_gen_qemu_ld16u(tmp, addr, IS_USER(s));
+                        gen_aa32_ld16u(tmp, addr, IS_USER(s));
                         break;
                     case 2:
-                        tcg_gen_qemu_ld8s(tmp, addr, IS_USER(s));
+                        gen_aa32_ld8s(tmp, addr, IS_USER(s));
                         break;
                     default:
                     case 3:
-                        tcg_gen_qemu_ld16s(tmp, addr, IS_USER(s));
+                        gen_aa32_ld16s(tmp, addr, IS_USER(s));
                         break;
                     }
                     load = 1;
@@ -7443,21 +7529,21 @@ static void disas_arm_insn(CPUARMState * env, DisasContext *s)
                     if (sh & 1) {
                         /* store */
                         tmp = load_reg(s, rd);
-                        tcg_gen_qemu_st32(tmp, addr, IS_USER(s));
+                        gen_aa32_st32(tmp, addr, IS_USER(s));
                         tcg_temp_free_i32(tmp);
                         tcg_gen_addi_i32(addr, addr, 4);
                         tmp = load_reg(s, rd + 1);
-                        tcg_gen_qemu_st32(tmp, addr, IS_USER(s));
+                        gen_aa32_st32(tmp, addr, IS_USER(s));
                         tcg_temp_free_i32(tmp);
                         load = 0;
                     } else {
                         /* load */
                         tmp = tcg_temp_new_i32();
-                        tcg_gen_qemu_ld32u(tmp, addr, IS_USER(s));
+                        gen_aa32_ld32u(tmp, addr, IS_USER(s));
                         store_reg(s, rd, tmp);
                         tcg_gen_addi_i32(addr, addr, 4);
                         tmp = tcg_temp_new_i32();
-                        tcg_gen_qemu_ld32u(tmp, addr, IS_USER(s));
+                        gen_aa32_ld32u(tmp, addr, IS_USER(s));
                         rd++;
                         load = 1;
                     }
@@ -7465,7 +7551,7 @@ static void disas_arm_insn(CPUARMState * env, DisasContext *s)
                 } else {
                     /* store */
                     tmp = load_reg(s, rd);
-                    tcg_gen_qemu_st16(tmp, addr, IS_USER(s));
+                    gen_aa32_st16(tmp, addr, IS_USER(s));
                     tcg_temp_free_i32(tmp);
                     load = 0;
                 }
@@ -7798,17 +7884,17 @@ static void disas_arm_insn(CPUARMState * env, DisasContext *s)
                 /* load */
                 tmp = tcg_temp_new_i32();
                 if (insn & (1 << 22)) {
-                    tcg_gen_qemu_ld8u(tmp, tmp2, i);
+                    gen_aa32_ld8u(tmp, tmp2, i);
                 } else {
-                    tcg_gen_qemu_ld32u(tmp, tmp2, i);
+                    gen_aa32_ld32u(tmp, tmp2, i);
                 }
             } else {
                 /* store */
                 tmp = load_reg(s, rd);
                 if (insn & (1 << 22)) {
-                    tcg_gen_qemu_st8(tmp, tmp2, i);
+                    gen_aa32_st8(tmp, tmp2, i);
                 } else {
-                    tcg_gen_qemu_st32(tmp, tmp2, i);
+                    gen_aa32_st32(tmp, tmp2, i);
                 }
                 tcg_temp_free_i32(tmp);
             }
@@ -7875,7 +7961,7 @@ static void disas_arm_insn(CPUARMState * env, DisasContext *s)
                         if (insn & (1 << 20)) {
                             /* load */
                             tmp = tcg_temp_new_i32();
-                            tcg_gen_qemu_ld32u(tmp, addr, IS_USER(s));
+                            gen_aa32_ld32u(tmp, addr, IS_USER(s));
                             if (user) {
                                 tmp2 = tcg_const_i32(i);
                                 gen_helper_set_user_reg(cpu_env, tmp2, tmp);
@@ -7902,7 +7988,7 @@ static void disas_arm_insn(CPUARMState * env, DisasContext *s)
                             } else {
                                 tmp = load_reg(s, i);
                             }
-                            tcg_gen_qemu_st32(tmp, addr, IS_USER(s));
+                            gen_aa32_st32(tmp, addr, IS_USER(s));
                             tcg_temp_free_i32(tmp);
                         }
                         j++;
@@ -8161,20 +8247,20 @@ static int disas_thumb2_insn(CPUARMState *env, DisasContext *s, uint16_t insn_hw
                 if (insn & (1 << 20)) {
                     /* ldrd */
                     tmp = tcg_temp_new_i32();
-                    tcg_gen_qemu_ld32u(tmp, addr, IS_USER(s));
+                    gen_aa32_ld32u(tmp, addr, IS_USER(s));
                     store_reg(s, rs, tmp);
                     tcg_gen_addi_i32(addr, addr, 4);
                     tmp = tcg_temp_new_i32();
-                    tcg_gen_qemu_ld32u(tmp, addr, IS_USER(s));
+                    gen_aa32_ld32u(tmp, addr, IS_USER(s));
                     store_reg(s, rd, tmp);
                 } else {
                     /* strd */
                     tmp = load_reg(s, rs);
-                    tcg_gen_qemu_st32(tmp, addr, IS_USER(s));
+                    gen_aa32_st32(tmp, addr, IS_USER(s));
                     tcg_temp_free_i32(tmp);
                     tcg_gen_addi_i32(addr, addr, 4);
                     tmp = load_reg(s, rd);
-                    tcg_gen_qemu_st32(tmp, addr, IS_USER(s));
+                    gen_aa32_st32(tmp, addr, IS_USER(s));
                     tcg_temp_free_i32(tmp);
                 }
                 if (insn & (1 << 21)) {
@@ -8212,11 +8298,11 @@ static int disas_thumb2_insn(CPUARMState *env, DisasContext *s, uint16_t insn_hw
                     tcg_gen_add_i32(addr, addr, tmp);
                     tcg_temp_free_i32(tmp);
                     tmp = tcg_temp_new_i32();
-                    tcg_gen_qemu_ld16u(tmp, addr, IS_USER(s));
+                    gen_aa32_ld16u(tmp, addr, IS_USER(s));
                 } else { /* tbb */
                     tcg_temp_free_i32(tmp);
                     tmp = tcg_temp_new_i32();
-                    tcg_gen_qemu_ld8u(tmp, addr, IS_USER(s));
+                    gen_aa32_ld8u(tmp, addr, IS_USER(s));
                 }
                 tcg_temp_free_i32(addr);
                 tcg_gen_shli_i32(tmp, tmp, 1);
@@ -8253,13 +8339,13 @@ static int disas_thumb2_insn(CPUARMState *env, DisasContext *s, uint16_t insn_hw
                         tmp = tcg_temp_new_i32();
                         switch (op) {
                         case 0: /* ldab */
-                            tcg_gen_qemu_ld8u(tmp, addr, IS_USER(s));
+                            gen_aa32_ld8u(tmp, addr, IS_USER(s));
                             break;
                         case 1: /* ldah */
-                            tcg_gen_qemu_ld16u(tmp, addr, IS_USER(s));
+                            gen_aa32_ld16u(tmp, addr, IS_USER(s));
                             break;
                         case 2: /* lda */
-                            tcg_gen_qemu_ld32u(tmp, addr, IS_USER(s));
+                            gen_aa32_ld32u(tmp, addr, IS_USER(s));
                             break;
                         default:
                             abort();
@@ -8269,13 +8355,13 @@ static int disas_thumb2_insn(CPUARMState *env, DisasContext *s, uint16_t insn_hw
                         tmp = load_reg(s, rs);
                         switch (op) {
                         case 0: /* stlb */
-                            tcg_gen_qemu_st8(tmp, addr, IS_USER(s));
+                            gen_aa32_st8(tmp, addr, IS_USER(s));
                             break;
                         case 1: /* stlh */
-                            tcg_gen_qemu_st16(tmp, addr, IS_USER(s));
+                            gen_aa32_st16(tmp, addr, IS_USER(s));
                             break;
                         case 2: /* stl */
-                            tcg_gen_qemu_st32(tmp, addr, IS_USER(s));
+                            gen_aa32_st32(tmp, addr, IS_USER(s));
                             break;
                         default:
                             abort();
@@ -8303,10 +8389,10 @@ static int disas_thumb2_insn(CPUARMState *env, DisasContext *s, uint16_t insn_hw
                         tcg_gen_addi_i32(addr, addr, -8);
                     /* Load PC into tmp and CPSR into tmp2.  */
                     tmp = tcg_temp_new_i32();
-                    tcg_gen_qemu_ld32u(tmp, addr, 0);
+                    gen_aa32_ld32u(tmp, addr, 0);
                     tcg_gen_addi_i32(addr, addr, 4);
                     tmp2 = tcg_temp_new_i32();
-                    tcg_gen_qemu_ld32u(tmp2, addr, 0);
+                    gen_aa32_ld32u(tmp2, addr, 0);
                     if (insn & (1 << 21)) {
                         /* Base writeback.  */
                         if (insn & (1 << 24)) {
@@ -8345,7 +8431,7 @@ static int disas_thumb2_insn(CPUARMState *env, DisasContext *s, uint16_t insn_hw
                     if (insn & (1 << 20)) {
                         /* Load.  */
                         tmp = tcg_temp_new_i32();
-                        tcg_gen_qemu_ld32u(tmp, addr, IS_USER(s));
+                        gen_aa32_ld32u(tmp, addr, IS_USER(s));
                         if (i == 15) {
                             gen_bx(s, tmp);
                         } else if (i == rn) {
@@ -8357,7 +8443,7 @@ static int disas_thumb2_insn(CPUARMState *env, DisasContext *s, uint16_t insn_hw
                     } else {
                         /* Store.  */
                         tmp = load_reg(s, i);
-                        tcg_gen_qemu_st32(tmp, addr, IS_USER(s));
+                        gen_aa32_st32(tmp, addr, IS_USER(s));
                         tcg_temp_free_i32(tmp);
                     }
                     tcg_gen_addi_i32(addr, addr, 4);
@@ -9135,19 +9221,19 @@ static int disas_thumb2_insn(CPUARMState *env, DisasContext *s, uint16_t insn_hw
             tmp = tcg_temp_new_i32();
             switch (op) {
             case 0:
-                tcg_gen_qemu_ld8u(tmp, addr, user);
+                gen_aa32_ld8u(tmp, addr, user);
                 break;
             case 4:
-                tcg_gen_qemu_ld8s(tmp, addr, user);
+                gen_aa32_ld8s(tmp, addr, user);
                 break;
             case 1:
-                tcg_gen_qemu_ld16u(tmp, addr, user);
+                gen_aa32_ld16u(tmp, addr, user);
                 break;
             case 5:
-                tcg_gen_qemu_ld16s(tmp, addr, user);
+                gen_aa32_ld16s(tmp, addr, user);
                 break;
             case 2:
-                tcg_gen_qemu_ld32u(tmp, addr, user);
+                gen_aa32_ld32u(tmp, addr, user);
                 break;
             default:
                 tcg_temp_free_i32(tmp);
@@ -9164,13 +9250,13 @@ static int disas_thumb2_insn(CPUARMState *env, DisasContext *s, uint16_t insn_hw
             tmp = load_reg(s, rs);
             switch (op) {
             case 0:
-                tcg_gen_qemu_st8(tmp, addr, user);
+                gen_aa32_st8(tmp, addr, user);
                 break;
             case 1:
-                tcg_gen_qemu_st16(tmp, addr, user);
+                gen_aa32_st16(tmp, addr, user);
                 break;
             case 2:
-                tcg_gen_qemu_st32(tmp, addr, user);
+                gen_aa32_st32(tmp, addr, user);
                 break;
             default:
                 tcg_temp_free_i32(tmp);
@@ -9307,7 +9393,7 @@ static void disas_thumb_insn(CPUARMState *env, DisasContext *s)
             addr = tcg_temp_new_i32();
             tcg_gen_movi_i32(addr, val);
             tmp = tcg_temp_new_i32();
-            tcg_gen_qemu_ld32u(tmp, addr, IS_USER(s));
+            gen_aa32_ld32u(tmp, addr, IS_USER(s));
             tcg_temp_free_i32(addr);
             store_reg(s, rd, tmp);
             break;
@@ -9510,28 +9596,28 @@ static void disas_thumb_insn(CPUARMState *env, DisasContext *s)
 
         switch (op) {
         case 0: /* str */
-            tcg_gen_qemu_st32(tmp, addr, IS_USER(s));
+            gen_aa32_st32(tmp, addr, IS_USER(s));
             break;
         case 1: /* strh */
-            tcg_gen_qemu_st16(tmp, addr, IS_USER(s));
+            gen_aa32_st16(tmp, addr, IS_USER(s));
             break;
         case 2: /* strb */
-            tcg_gen_qemu_st8(tmp, addr, IS_USER(s));
+            gen_aa32_st8(tmp, addr, IS_USER(s));
             break;
         case 3: /* ldrsb */
-            tcg_gen_qemu_ld8s(tmp, addr, IS_USER(s));
+            gen_aa32_ld8s(tmp, addr, IS_USER(s));
             break;
         case 4: /* ldr */
-            tcg_gen_qemu_ld32u(tmp, addr, IS_USER(s));
+            gen_aa32_ld32u(tmp, addr, IS_USER(s));
             break;
         case 5: /* ldrh */
-            tcg_gen_qemu_ld16u(tmp, addr, IS_USER(s));
+            gen_aa32_ld16u(tmp, addr, IS_USER(s));
             break;
         case 6: /* ldrb */
-            tcg_gen_qemu_ld8u(tmp, addr, IS_USER(s));
+            gen_aa32_ld8u(tmp, addr, IS_USER(s));
             break;
         case 7: /* ldrsh */
-            tcg_gen_qemu_ld16s(tmp, addr, IS_USER(s));
+            gen_aa32_ld16s(tmp, addr, IS_USER(s));
             break;
         }
         if (op >= 3) { /* load */
@@ -9553,12 +9639,12 @@ static void disas_thumb_insn(CPUARMState *env, DisasContext *s)
         if (insn & (1 << 11)) {
             /* load */
             tmp = tcg_temp_new_i32();
-            tcg_gen_qemu_ld32u(tmp, addr, IS_USER(s));
+            gen_aa32_ld32u(tmp, addr, IS_USER(s));
             store_reg(s, rd, tmp);
         } else {
             /* store */
             tmp = load_reg(s, rd);
-            tcg_gen_qemu_st32(tmp, addr, IS_USER(s));
+            gen_aa32_st32(tmp, addr, IS_USER(s));
             tcg_temp_free_i32(tmp);
         }
         tcg_temp_free_i32(addr);
@@ -9575,12 +9661,12 @@ static void disas_thumb_insn(CPUARMState *env, DisasContext *s)
         if (insn & (1 << 11)) {
             /* load */
             tmp = tcg_temp_new_i32();
-            tcg_gen_qemu_ld8u(tmp, addr, IS_USER(s));
+            gen_aa32_ld8u(tmp, addr, IS_USER(s));
             store_reg(s, rd, tmp);
         } else {
             /* store */
             tmp = load_reg(s, rd);
-            tcg_gen_qemu_st8(tmp, addr, IS_USER(s));
+            gen_aa32_st8(tmp, addr, IS_USER(s));
             tcg_temp_free_i32(tmp);
         }
         tcg_temp_free_i32(addr);
@@ -9597,12 +9683,12 @@ static void disas_thumb_insn(CPUARMState *env, DisasContext *s)
         if (insn & (1 << 11)) {
             /* load */
             tmp = tcg_temp_new_i32();
-            tcg_gen_qemu_ld16u(tmp, addr, IS_USER(s));
+            gen_aa32_ld16u(tmp, addr, IS_USER(s));
             store_reg(s, rd, tmp);
         } else {
             /* store */
             tmp = load_reg(s, rd);
-            tcg_gen_qemu_st16(tmp, addr, IS_USER(s));
+            gen_aa32_st16(tmp, addr, IS_USER(s));
             tcg_temp_free_i32(tmp);
         }
         tcg_temp_free_i32(addr);
@@ -9618,12 +9704,12 @@ static void disas_thumb_insn(CPUARMState *env, DisasContext *s)
         if (insn & (1 << 11)) {
             /* load */
             tmp = tcg_temp_new_i32();
-            tcg_gen_qemu_ld32u(tmp, addr, IS_USER(s));
+            gen_aa32_ld32u(tmp, addr, IS_USER(s));
             store_reg(s, rd, tmp);
         } else {
             /* store */
             tmp = load_reg(s, rd);
-            tcg_gen_qemu_st32(tmp, addr, IS_USER(s));
+            gen_aa32_st32(tmp, addr, IS_USER(s));
             tcg_temp_free_i32(tmp);
         }
         tcg_temp_free_i32(addr);
@@ -9691,12 +9777,12 @@ static void disas_thumb_insn(CPUARMState *env, DisasContext *s)
                     if (insn & (1 << 11)) {
                         /* pop */
                         tmp = tcg_temp_new_i32();
-                        tcg_gen_qemu_ld32u(tmp, addr, IS_USER(s));
+                        gen_aa32_ld32u(tmp, addr, IS_USER(s));
                         store_reg(s, i, tmp);
                     } else {
                         /* push */
                         tmp = load_reg(s, i);
-                        tcg_gen_qemu_st32(tmp, addr, IS_USER(s));
+                        gen_aa32_st32(tmp, addr, IS_USER(s));
                         tcg_temp_free_i32(tmp);
                     }
                     /* advance to the next address.  */
@@ -9708,13 +9794,13 @@ static void disas_thumb_insn(CPUARMState *env, DisasContext *s)
                 if (insn & (1 << 11)) {
                     /* pop pc */
                     tmp = tcg_temp_new_i32();
-                    tcg_gen_qemu_ld32u(tmp, addr, IS_USER(s));
+                    gen_aa32_ld32u(tmp, addr, IS_USER(s));
                     /* don't set the pc until the rest of the instruction
                        has completed */
                 } else {
                     /* push lr */
                     tmp = load_reg(s, 14);
-                    tcg_gen_qemu_st32(tmp, addr, IS_USER(s));
+                    gen_aa32_st32(tmp, addr, IS_USER(s));
                     tcg_temp_free_i32(tmp);
                 }
                 tcg_gen_addi_i32(addr, addr, 4);
@@ -9840,7 +9926,7 @@ static void disas_thumb_insn(CPUARMState *env, DisasContext *s)
                 if (insn & (1 << 11)) {
                     /* load */
                     tmp = tcg_temp_new_i32();
-                    tcg_gen_qemu_ld32u(tmp, addr, IS_USER(s));
+                    gen_aa32_ld32u(tmp, addr, IS_USER(s));
                     if (i == rn) {
                         loaded_var = tmp;
                     } else {
@@ -9849,7 +9935,7 @@ static void disas_thumb_insn(CPUARMState *env, DisasContext *s)
                 } else {
                     /* store */
                     tmp = load_reg(s, i);
-                    tcg_gen_qemu_st32(tmp, addr, IS_USER(s));
+                    gen_aa32_st32(tmp, addr, IS_USER(s));
                     tcg_temp_free_i32(tmp);
                 }
                 /* advance to the next address */
commit 4d017979aa1672b40ccc083daf455f8740eead82
Author: Peter Maydell <peter.maydell at linaro.org>
Date:   Tue Sep 10 19:09:33 2013 +0100

    abitypes.h: Remove incorrect ARM ABI_LLONG_ALIGNMENT
    
    The ARM EABI specifies that 64 bit integers should be
    8 aligned; remove our incorrect setting of 4 alignment.
    This has no actual effect since it only set the alignment
    for the 'abi_ullong' and 'abi_llong' types, which are used
    only inside code which is MIPS-specific, but it will
    avoid problems later if we use the types elsewhere.
    
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>

diff --git a/include/exec/user/abitypes.h b/include/exec/user/abitypes.h
index 008501b..80eedac 100644
--- a/include/exec/user/abitypes.h
+++ b/include/exec/user/abitypes.h
@@ -14,10 +14,6 @@
 #define ABI_LLONG_ALIGNMENT 2
 #endif
 
-#ifdef TARGET_ARM
-#define ABI_LLONG_ALIGNMENT 4
-#endif
-
 #ifndef ABI_SHORT_ALIGNMENT
 #define ABI_SHORT_ALIGNMENT 2
 #endif
commit 031c44e4deedbd7829703654e381ca0b18e78a12
Author: Peter Maydell <peter.maydell at linaro.org>
Date:   Tue Sep 10 19:09:33 2013 +0100

    pl110: Clarify comment about PL110 ID on VersatilePB
    
    Clarify a comment about the ID register value presented by
    the PL110 variant present on the VersatilePB board (based
    on testing what the actual hardware does), to indicate that
    this is not an error in our emulation, and to remove an #if-0.
    
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>

diff --git a/hw/display/pl110.c b/hw/display/pl110.c
index e79ab4b..790e510 100644
--- a/hw/display/pl110.c
+++ b/hw/display/pl110.c
@@ -94,23 +94,21 @@ static const VMStateDescription vmstate_pl110 = {
 static const unsigned char pl110_id[] =
 { 0x10, 0x11, 0x04, 0x00, 0x0d, 0xf0, 0x05, 0xb1 };
 
-/* The Arm documentation (DDI0224C) says the CLDC on the Versatile board
-   has a different ID.  However Linux only looks for the normal ID.  */
-#if 0
-static const unsigned char pl110_versatile_id[] =
-{ 0x93, 0x10, 0x04, 0x00, 0x0d, 0xf0, 0x05, 0xb1 };
-#else
-#define pl110_versatile_id pl110_id
-#endif
-
 static const unsigned char pl111_id[] = {
     0x11, 0x11, 0x24, 0x00, 0x0d, 0xf0, 0x05, 0xb1
 };
 
+
 /* Indexed by pl110_version */
 static const unsigned char *idregs[] = {
     pl110_id,
-    pl110_versatile_id,
+    /* The ARM documentation (DDI0224C) says the CLCDC on the Versatile board
+     * has a different ID (0x93, 0x10, 0x04, 0x00, ...). However the hardware
+     * itself has the same ID values as a stock PL110, and guests (in
+     * particular Linux) rely on this. We emulate what the hardware does,
+     * rather than what the docs claim it ought to do.
+     */
+    pl110_id,
     pl111_id
 };
 
commit 78027bb6d9111c8ccd515930cfa05d7f532ecb2a
Author: Cole Robinson <crobinso at redhat.com>
Date:   Tue Sep 10 19:09:33 2013 +0100

    target-arm: Implement qmp query-cpu-definitions
    
    Libvirt uses this to introspect available CPU models.
    
    Signed-off-by: Cole Robinson <crobinso at redhat.com>
    Reviewed-by: Andreas Färber <afaerber at suse.de>
    Message-id: c0bdcd6c7ea6a085a6902ccaa73180fd771c8267.1378303555.git.crobinso at redhat.com
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>

diff --git a/target-arm/helper.c b/target-arm/helper.c
index c1a68c7..2a98be7 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -2,6 +2,7 @@
 #include "exec/gdbstub.h"
 #include "helper.h"
 #include "qemu/host-utils.h"
+#include "sysemu/arch_init.h"
 #include "sysemu/sysemu.h"
 #include "qemu/bitops.h"
 
@@ -1829,6 +1830,37 @@ void arm_cpu_list(FILE *f, fprintf_function cpu_fprintf)
     g_slist_free(list);
 }
 
+static void arm_cpu_add_definition(gpointer data, gpointer user_data)
+{
+    ObjectClass *oc = data;
+    CpuDefinitionInfoList **cpu_list = user_data;
+    CpuDefinitionInfoList *entry;
+    CpuDefinitionInfo *info;
+    const char *typename;
+
+    typename = object_class_get_name(oc);
+    info = g_malloc0(sizeof(*info));
+    info->name = g_strndup(typename,
+                           strlen(typename) - strlen("-" TYPE_ARM_CPU));
+
+    entry = g_malloc0(sizeof(*entry));
+    entry->value = info;
+    entry->next = *cpu_list;
+    *cpu_list = entry;
+}
+
+CpuDefinitionInfoList *arch_query_cpu_definitions(Error **errp)
+{
+    CpuDefinitionInfoList *cpu_list = NULL;
+    GSList *list;
+
+    list = object_class_get_list(TYPE_ARM_CPU, false);
+    g_slist_foreach(list, arm_cpu_add_definition, &cpu_list);
+    g_slist_free(list);
+
+    return cpu_list;
+}
+
 void define_one_arm_cp_reg_with_opaque(ARMCPU *cpu,
                                        const ARMCPRegInfo *r, void *opaque)
 {
commit f62cafd4c87fad7bb9b9544b4cf4991d34764b11
Author: Sebastian Ottlik <ottlik at fzi.de>
Date:   Tue Sep 10 19:09:32 2013 +0100

    target-arm: fix ARMv7M stack alignment on reset
    
    When the initial SP is loaded from the vector table on ARMv7M systems the two
    least significant bits are ignored as the stack is always aligned at a four byte
    boundary (see ARM DDI 0403C, B1.4.1 and B1.5.5). So far QEMU did not ignore
    these bits leading to a stack alignment inconsitent with real hardware for
    binaries that rely on this behaviour. This patch fixes this issue by masking the
    two least significant bits when loading the SP.
    
    Signed-off-by: Sebastian Ottlik <ottlik at fzi.de>
    Reviewed-by: Peter Maydell <peter.maydell at linaro.org>
    Message-id: 1378286595-27072-1-git-send-email-ottlik at fzi.de
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>

diff --git a/target-arm/cpu.c b/target-arm/cpu.c
index 827e28e..09206b5 100644
--- a/target-arm/cpu.c
+++ b/target-arm/cpu.c
@@ -108,7 +108,7 @@ static void arm_cpu_reset(CPUState *s)
                modified flash and reset itself.  However images
                loaded via -kernel have not been copied yet, so load the
                values directly from there.  */
-            env->regs[13] = ldl_p(rom);
+            env->regs[13] = ldl_p(rom) & 0xFFFFFFFC;
             pc = ldl_p(rom + 4);
             env->thumb = pc & 1;
             env->regs[15] = pc & ~1;
commit 78dbbbe4dff95369c63bf77ee0df23371e1d6602
Author: Peter Maydell <peter.maydell at linaro.org>
Date:   Tue Sep 10 19:09:32 2013 +0100

    target-arm: Avoid "1 << 31" undefined behaviour
    
    Avoid the undefined behaviour of "1 << 31" by using 1U to make
    the shift be of an unsigned value rather than shifting into the
    sign bit of a signed integer. For consistency, we make all the
    CPSR_* constants unsigned, though the only one which triggers
    undefined behaviour is CPSR_N.
    
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>
    Reviewed-by: Richard Henderson <rth at twiddle.net>
    Message-id: 1378391908-22137-3-git-send-email-peter.maydell at linaro.org

diff --git a/target-arm/cpu.h b/target-arm/cpu.h
index f2abdf3..af7cf8a 100644
--- a/target-arm/cpu.h
+++ b/target-arm/cpu.h
@@ -270,22 +270,22 @@ int cpu_arm_handle_mmu_fault (CPUARMState *env, target_ulong address, int rw,
                               int mmu_idx);
 #define cpu_handle_mmu_fault cpu_arm_handle_mmu_fault
 
-#define CPSR_M (0x1f)
-#define CPSR_T (1 << 5)
-#define CPSR_F (1 << 6)
-#define CPSR_I (1 << 7)
-#define CPSR_A (1 << 8)
-#define CPSR_E (1 << 9)
-#define CPSR_IT_2_7 (0xfc00)
-#define CPSR_GE (0xf << 16)
-#define CPSR_RESERVED (0xf << 20)
-#define CPSR_J (1 << 24)
-#define CPSR_IT_0_1 (3 << 25)
-#define CPSR_Q (1 << 27)
-#define CPSR_V (1 << 28)
-#define CPSR_C (1 << 29)
-#define CPSR_Z (1 << 30)
-#define CPSR_N (1 << 31)
+#define CPSR_M (0x1fU)
+#define CPSR_T (1U << 5)
+#define CPSR_F (1U << 6)
+#define CPSR_I (1U << 7)
+#define CPSR_A (1U << 8)
+#define CPSR_E (1U << 9)
+#define CPSR_IT_2_7 (0xfc00U)
+#define CPSR_GE (0xfU << 16)
+#define CPSR_RESERVED (0xfU << 20)
+#define CPSR_J (1U << 24)
+#define CPSR_IT_0_1 (3U << 25)
+#define CPSR_Q (1U << 27)
+#define CPSR_V (1U << 28)
+#define CPSR_C (1U << 29)
+#define CPSR_Z (1U << 30)
+#define CPSR_N (1U << 31)
 #define CPSR_NZCV (CPSR_N | CPSR_Z | CPSR_C | CPSR_V)
 
 #define CPSR_IT (CPSR_IT_0_1 | CPSR_IT_2_7)
diff --git a/target-arm/helper.c b/target-arm/helper.c
index e51ef20..c1a68c7 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -972,7 +972,7 @@ static int par_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
 static inline bool extended_addresses_enabled(CPUARMState *env)
 {
     return arm_feature(env, ARM_FEATURE_LPAE)
-        && (env->cp15.c2_control & (1 << 31));
+        && (env->cp15.c2_control & (1U << 31));
 }
 
 static int ats_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
@@ -1385,7 +1385,7 @@ static int mpidr_read(CPUARMState *env, const ARMCPRegInfo *ri,
      * so these bits always RAZ.
      */
     if (arm_feature(env, ARM_FEATURE_V7MP)) {
-        mpidr |= (1 << 31);
+        mpidr |= (1U << 31);
         /* Cores which are uniprocessor (non-coherent)
          * but still implement the MP extensions set
          * bit 30. (For instance, A9UP.) However we do
commit 534df156090539854c2ac819dcdb096d01dab5c1
Author: Peter Maydell <peter.maydell at linaro.org>
Date:   Tue Sep 10 19:09:32 2013 +0100

    target-arm: Use sextract32() in branch decode
    
    In the decode of ARM B and BL insns, swap the order of the
    "append 2 implicit zeros to imm24" and the sign extend, and
    use the new sextract32() utility function to do the latter.
    This avoids a direct dependency on the undefined C behaviour
    of shifting into the sign bit of an integer.
    
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>
    Reviewed-by: Richard Henderson <rth at twiddle.net>
    Message-id: 1378391908-22137-2-git-send-email-peter.maydell at linaro.org

diff --git a/target-arm/translate.c b/target-arm/translate.c
index 4f4a0a9..8bcfaf3 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -28,6 +28,7 @@
 #include "disas/disas.h"
 #include "tcg-op.h"
 #include "qemu/log.h"
+#include "qemu/bitops.h"
 
 #include "helper.h"
 #define GEN_HELPER 1
@@ -7957,8 +7958,8 @@ static void disas_arm_insn(CPUARMState * env, DisasContext *s)
                     tcg_gen_movi_i32(tmp, val);
                     store_reg(s, 14, tmp);
                 }
-                offset = (((int32_t)insn << 8) >> 8);
-                val += (offset << 2) + 4;
+                offset = sextract32(insn << 2, 0, 26);
+                val += offset + 4;
                 gen_jmp(s, val);
             }
             break;
commit f5f6d38b7458b8a1a46a750ac131ca8a2d45d946
Author: Peter Maydell <peter.maydell at linaro.org>
Date:   Tue Sep 10 19:09:32 2013 +0100

    target-arm: Make '-cpu any' available in linux-user mode only
    
    Make the 'any' CPU for target-arm available only in linux-user mode.
    The ARM target provides a CPU named "any", which turns on support for
    all user-level instruction set extensions we know about. This is
    intended for linux-user emulation mode, where it is the default CPU type.
    It makes no sense to try to use this for system emulation, since we don't
    initialize it with any system-level information like feature register
    values or implementation specific cp15 registers. (Unsurprisingly, some
    boards won't boot at all, though you might get lucky in some cases where
    the guest doesn't happen to prod things that aren't there.)
    
    Prevent users from making this command line error by removing the
    CPU definition from the softmmu build.
    
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>
    Reviewed-by: Andreas Färber <afaerber at suse.de>
    Message-id: 1378213995-12945-1-git-send-email-peter.maydell at linaro.org

diff --git a/target-arm/cpu.c b/target-arm/cpu.c
index b2556c6..827e28e 100644
--- a/target-arm/cpu.c
+++ b/target-arm/cpu.c
@@ -822,6 +822,7 @@ static void pxa270c5_initfn(Object *obj)
     cpu->reset_sctlr = 0x00000078;
 }
 
+#ifdef CONFIG_USER_ONLY
 static void arm_any_initfn(Object *obj)
 {
     ARMCPU *cpu = ARM_CPU(obj);
@@ -834,6 +835,7 @@ static void arm_any_initfn(Object *obj)
     set_feature(&cpu->env, ARM_FEATURE_V7MP);
     cpu->midr = 0xffffffff;
 }
+#endif
 
 typedef struct ARMCPUInfo {
     const char *name;
@@ -874,7 +876,9 @@ static const ARMCPUInfo arm_cpus[] = {
     { .name = "pxa270-b1",   .initfn = pxa270b1_initfn },
     { .name = "pxa270-c0",   .initfn = pxa270c0_initfn },
     { .name = "pxa270-c5",   .initfn = pxa270c5_initfn },
+#ifdef CONFIG_USER_ONLY
     { .name = "any",         .initfn = arm_any_initfn },
+#endif
 };
 
 static void arm_cpu_class_init(ObjectClass *oc, void *data)
commit 02dc4bf5684d3fb46786fab2ecff98214b1df9fe
Author: Cole Robinson <crobinso at redhat.com>
Date:   Sat Aug 31 18:36:17 2013 -0400

    qapi-types.py: Fix enum struct sizes on i686
    
    Unlike other list types, enum wasn't adding any padding, which caused
    a mismatch between the generated struct size and GenericList struct
    size. More details in a678e26cbe89f7a27cbce794c2c2784571ee9d21
    
    This crashed qemu if calling qmp query-tpm-types for example, which
    upsets libvirt capabilities probing. Reproducer on i686:
    
    (sleep 5; printf '{"execute":"qmp_capabilities"}\n{"execute":"query-tpm-types"}\n') | ./i386-softmmu/qemu-system-i386 -S -nodefaults -nographic -M none -qmp stdio
    
    https://bugs.launchpad.net/qemu/+bug/1219207
    
    Cc: qemu-stable at nongnu.org
    Signed-off-by: Cole Robinson <crobinso at redhat.com>
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Tested-by: Richard W.M. Jones <rjones at redhat.com>
    Signed-off-by: Luiz Capitulino <lcapitulino at redhat.com>

diff --git a/scripts/qapi-types.py b/scripts/qapi-types.py
index 86de980..5222463 100644
--- a/scripts/qapi-types.py
+++ b/scripts/qapi-types.py
@@ -51,7 +51,10 @@ def generate_fwd_enum_struct(name, members):
     return mcgen('''
 typedef struct %(name)sList
 {
-    %(name)s value;
+    union {
+        %(name)s value;
+        uint64_t padding;
+    };
     struct %(name)sList *next;
 } %(name)sList;
 ''',
commit adbecc89731cf3e0ae656d50ea9fa58c589c4bdc
Author: Gerd Hoffmann <kraxel at redhat.com>
Date:   Mon Sep 9 10:18:17 2013 +0200

    ehci: save device pointer in EHCIState
    
    We'll need a pointer to the actual pci/sysbus device,
    stick a pointer to it into the EHCIState struct.
    
    https://bugzilla.redhat.com/show_bug.cgi?id=1005495
    
    Signed-off-by: Gerd Hoffmann <kraxel at redhat.com>

diff --git a/hw/usb/hcd-ehci.c b/hw/usb/hcd-ehci.c
index 137e200..22bdbf4 100644
--- a/hw/usb/hcd-ehci.c
+++ b/hw/usb/hcd-ehci.c
@@ -1241,13 +1241,11 @@ static int ehci_init_transfer(EHCIPacket *p)
 {
     uint32_t cpage, offset, bytes, plen;
     dma_addr_t page;
-    USBBus *bus = &p->queue->ehci->bus;
-    BusState *qbus = BUS(bus);
 
     cpage  = get_field(p->qtd.token, QTD_TOKEN_CPAGE);
     bytes  = get_field(p->qtd.token, QTD_TOKEN_TBYTES);
     offset = p->qtd.bufptr[0] & ~QTD_BUFPTR_MASK;
-    qemu_sglist_init(&p->sgl, qbus->parent, 5, p->queue->ehci->as);
+    qemu_sglist_init(&p->sgl, p->queue->ehci->device, 5, p->queue->ehci->as);
 
     while (bytes > 0) {
         if (cpage > 4) {
@@ -1486,7 +1484,7 @@ static int ehci_process_itd(EHCIState *ehci,
                 return -1;
             }
 
-            qemu_sglist_init(&ehci->isgl, DEVICE(ehci), 2, ehci->as);
+            qemu_sglist_init(&ehci->isgl, ehci->device, 2, ehci->as);
             if (off + len > 4096) {
                 /* transfer crosses page border */
                 uint32_t len2 = off + len - 4096;
@@ -2529,6 +2527,7 @@ void usb_ehci_realize(EHCIState *s, DeviceState *dev, Error **errp)
 
     s->frame_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, ehci_frame_timer, s);
     s->async_bh = qemu_bh_new(ehci_frame_timer, s);
+    s->device = dev;
 
     qemu_register_reset(ehci_reset, s);
     qemu_add_vm_change_state_handler(usb_ehci_vm_state_change, s);
diff --git a/hw/usb/hcd-ehci.h b/hw/usb/hcd-ehci.h
index 15a28e8..065c9fa 100644
--- a/hw/usb/hcd-ehci.h
+++ b/hw/usb/hcd-ehci.h
@@ -255,6 +255,7 @@ typedef QTAILQ_HEAD(EHCIQueueHead, EHCIQueue) EHCIQueueHead;
 
 struct EHCIState {
     USBBus bus;
+    DeviceState *device;
     qemu_irq irq;
     MemoryRegion mem;
     AddressSpace *as;
commit 615fe4de4b3c26619611078960d3103550bde7d0
Author: Miroslav Rezanina <mrezanin at redhat.com>
Date:   Tue Sep 3 11:23:09 2013 +0200

    Remove dev-bluetooth.c dependency from vl.c
    
    Use usb_legacy_register handling to create bt-dongle device and remove code
    dependency from vl.c so CONFIG_USB_BLUETOOTH can be disabled.
    
    Signed-off-by: Miroslav Rezanina <mrezanin at redhat.com>
    Signed-off-by: Gerd Hoffmann <kraxel at redhat.com>

diff --git a/hw/usb/Makefile.objs b/hw/usb/Makefile.objs
index f9695e7..a3eac3e 100644
--- a/hw/usb/Makefile.objs
+++ b/hw/usb/Makefile.objs
@@ -18,9 +18,6 @@ common-obj-$(CONFIG_USB_STORAGE_UAS)  += dev-uas.o
 common-obj-$(CONFIG_USB_AUDIO)        += dev-audio.o
 common-obj-$(CONFIG_USB_SERIAL)       += dev-serial.o
 common-obj-$(CONFIG_USB_NETWORK)      += dev-network.o
-
-# FIXME: make configurable too
-CONFIG_USB_BLUETOOTH := y
 common-obj-$(CONFIG_USB_BLUETOOTH)    += dev-bluetooth.o
 
 ifeq ($(CONFIG_USB_SMARTCARD),y)
diff --git a/hw/usb/dev-bluetooth.c b/hw/usb/dev-bluetooth.c
index f2fc2a8..7f292b1 100644
--- a/hw/usb/dev-bluetooth.c
+++ b/hw/usb/dev-bluetooth.c
@@ -511,10 +511,17 @@ static int usb_bt_initfn(USBDevice *dev)
     return 0;
 }
 
-USBDevice *usb_bt_init(USBBus *bus, HCIInfo *hci)
+static USBDevice *usb_bt_init(USBBus *bus, const char *cmdline)
 {
     USBDevice *dev;
     struct USBBtState *s;
+    HCIInfo *hci;
+
+    if (*cmdline) {
+        hci = hci_init(cmdline);
+    } else {
+        hci = bt_new_hci(qemu_find_bt_vlan(0));
+    }
 
     if (!hci)
         return NULL;
@@ -566,6 +573,7 @@ static const TypeInfo bt_info = {
 static void usb_bt_register_types(void)
 {
     type_register_static(&bt_info);
+    usb_legacy_register("usb-bt-dongle", "bt", usb_bt_init);
 }
 
 type_init(usb_bt_register_types)
diff --git a/include/hw/usb.h b/include/hw/usb.h
index 1b8acba..a7680d4 100644
--- a/include/hw/usb.h
+++ b/include/hw/usb.h
@@ -442,9 +442,6 @@ int set_usb_string(uint8_t *buf, const char *str);
 USBDevice *usb_host_device_open(USBBus *bus, const char *devname);
 void usb_host_info(Monitor *mon, const QDict *qdict);
 
-/* usb-bt.c */
-USBDevice *usb_bt_init(USBBus *bus, HCIInfo *hci);
-
 /* usb ports of the VM */
 
 #define VM_USB_HUB_SIZE 8
diff --git a/vl.c b/vl.c
index faefd9f..4e709d5 100644
--- a/vl.c
+++ b/vl.c
@@ -1457,8 +1457,10 @@ static void configure_msg(QemuOpts *opts)
 
 static int usb_device_add(const char *devname)
 {
-    const char *p;
     USBDevice *dev = NULL;
+#ifndef CONFIG_LINUX
+    const char *p;
+#endif
 
     if (!usb_enabled(false)) {
         return -1;
@@ -1474,15 +1476,8 @@ static int usb_device_add(const char *devname)
     /* only the linux version is qdev-ified, usb-bsd still needs this */
     if (strstart(devname, "host:", &p)) {
         dev = usb_host_device_open(usb_bus_find(-1), p);
-    } else
-#endif
-    if (!strcmp(devname, "bt") || strstart(devname, "bt:", &p)) {
-        dev = usb_bt_init(usb_bus_find(-1),
-                          devname[2] ? hci_init(p)
-                                     : bt_new_hci(qemu_find_bt_vlan(0)));
-    } else {
-        return -1;
     }
+#endif
     if (!dev)
         return -1;
 
commit 644e1a8a34d2f799bfeefae94b71593a2aa662ae
Author: Miroslav Rezanina <mrezanin at redhat.com>
Date:   Tue Sep 3 11:23:08 2013 +0200

    Preparation for usb-bt-dongle conditional build
    
    To allow disable usb-bt-dongle device using CONFIG_BLUETOOTH option, some of
    functions in vl.c file has to be made accessible in dev-bluetooth.c. This is
    pure code moving.
    
    Signed-off-by: Miroslav Rezanina <mrezanin at redhat.com>
    Signed-off-by: Gerd Hoffmann <kraxel at redhat.com>

diff --git a/hw/bt/core.c b/hw/bt/core.c
index 49012e0..0ffc948 100644
--- a/hw/bt/core.c
+++ b/hw/bt/core.c
@@ -119,3 +119,26 @@ void bt_device_done(struct bt_device_s *dev)
 
     *p = dev->next;
 }
+
+static struct bt_vlan_s {
+    struct bt_scatternet_s net;
+    int id;
+    struct bt_vlan_s *next;
+} *first_bt_vlan;
+
+/* find or alloc a new bluetooth "VLAN" */
+struct bt_scatternet_s *qemu_find_bt_vlan(int id)
+{
+    struct bt_vlan_s **pvlan, *vlan;
+    for (vlan = first_bt_vlan; vlan != NULL; vlan = vlan->next) {
+        if (vlan->id == id)
+            return &vlan->net;
+    }
+    vlan = g_malloc0(sizeof(struct bt_vlan_s));
+    vlan->id = id;
+    pvlan = &first_bt_vlan;
+    while (*pvlan != NULL)
+        pvlan = &(*pvlan)->next;
+    *pvlan = vlan;
+    return &vlan->net;
+}
diff --git a/hw/bt/hci.c b/hw/bt/hci.c
index d1c0604..7ea3dc6 100644
--- a/hw/bt/hci.c
+++ b/hw/bt/hci.c
@@ -429,6 +429,24 @@ static const uint8_t bt_event_reserved_mask[8] = {
     0xff, 0x9f, 0xfb, 0xff, 0x07, 0x18, 0x00, 0x00,
 };
 
+
+static void null_hci_send(struct HCIInfo *hci, const uint8_t *data, int len)
+{
+}
+
+static int null_hci_addr_set(struct HCIInfo *hci, const uint8_t *bd_addr)
+{
+    return -ENOTSUP;
+}
+
+struct HCIInfo null_hci = {
+    .cmd_send = null_hci_send,
+    .sco_send = null_hci_send,
+    .acl_send = null_hci_send,
+    .bdaddr_set = null_hci_addr_set,
+};
+
+
 static inline uint8_t *bt_hci_event_start(struct bt_hci_s *hci,
                 int evt, int len)
 {
@@ -2176,6 +2194,36 @@ struct HCIInfo *bt_new_hci(struct bt_scatternet_s *net)
     return &s->info;
 }
 
+struct HCIInfo *hci_init(const char *str)
+{
+    char *endp;
+    struct bt_scatternet_s *vlan = 0;
+
+    if (!strcmp(str, "null"))
+        /* null */
+        return &null_hci;
+    else if (!strncmp(str, "host", 4) && (str[4] == '\0' || str[4] == ':'))
+        /* host[:hciN] */
+        return bt_host_hci(str[4] ? str + 5 : "hci0");
+    else if (!strncmp(str, "hci", 3)) {
+        /* hci[,vlan=n] */
+        if (str[3]) {
+            if (!strncmp(str + 3, ",vlan=", 6)) {
+                vlan = qemu_find_bt_vlan(strtol(str + 9, &endp, 0));
+                if (*endp)
+                    vlan = 0;
+            }
+        } else
+            vlan = qemu_find_bt_vlan(0);
+        if (vlan)
+           return bt_new_hci(vlan);
+    }
+
+    fprintf(stderr, "qemu: Unknown bluetooth HCI `%s'.\n", str);
+
+    return 0;
+}
+
 static void bt_hci_done(struct HCIInfo *info)
 {
     struct bt_hci_s *hci = hci_from_info(info);
diff --git a/include/hw/bt.h b/include/hw/bt.h
index 3f365bc..cb2a7e6 100644
--- a/include/hw/bt.h
+++ b/include/hw/bt.h
@@ -108,12 +108,15 @@ struct bt_device_s {
     uint16_t clkoff;	/* Note: Always little-endian */
 };
 
+extern struct HCIInfo null_hci;
 /* bt.c */
 void bt_device_init(struct bt_device_s *dev, struct bt_scatternet_s *net);
 void bt_device_done(struct bt_device_s *dev);
+struct bt_scatternet_s *qemu_find_bt_vlan(int id);
 
 /* bt-hci.c */
 struct HCIInfo *bt_new_hci(struct bt_scatternet_s *net);
+struct HCIInfo *hci_init(const char *str);
 
 /* bt-vhci.c */
 void bt_vhci_init(struct HCIInfo *info);
diff --git a/vl.c b/vl.c
index b4b119a..faefd9f 100644
--- a/vl.c
+++ b/vl.c
@@ -843,45 +843,6 @@ static int nb_hcis;
 static int cur_hci;
 static struct HCIInfo *hci_table[MAX_NICS];
 
-static struct bt_vlan_s {
-    struct bt_scatternet_s net;
-    int id;
-    struct bt_vlan_s *next;
-} *first_bt_vlan;
-
-/* find or alloc a new bluetooth "VLAN" */
-static struct bt_scatternet_s *qemu_find_bt_vlan(int id)
-{
-    struct bt_vlan_s **pvlan, *vlan;
-    for (vlan = first_bt_vlan; vlan != NULL; vlan = vlan->next) {
-        if (vlan->id == id)
-            return &vlan->net;
-    }
-    vlan = g_malloc0(sizeof(struct bt_vlan_s));
-    vlan->id = id;
-    pvlan = &first_bt_vlan;
-    while (*pvlan != NULL)
-        pvlan = &(*pvlan)->next;
-    *pvlan = vlan;
-    return &vlan->net;
-}
-
-static void null_hci_send(struct HCIInfo *hci, const uint8_t *data, int len)
-{
-}
-
-static int null_hci_addr_set(struct HCIInfo *hci, const uint8_t *bd_addr)
-{
-    return -ENOTSUP;
-}
-
-static struct HCIInfo null_hci = {
-    .cmd_send = null_hci_send,
-    .sco_send = null_hci_send,
-    .acl_send = null_hci_send,
-    .bdaddr_set = null_hci_addr_set,
-};
-
 struct HCIInfo *qemu_next_hci(void)
 {
     if (cur_hci == nb_hcis)
@@ -890,36 +851,6 @@ struct HCIInfo *qemu_next_hci(void)
     return hci_table[cur_hci++];
 }
 
-static struct HCIInfo *hci_init(const char *str)
-{
-    char *endp;
-    struct bt_scatternet_s *vlan = 0;
-
-    if (!strcmp(str, "null"))
-        /* null */
-        return &null_hci;
-    else if (!strncmp(str, "host", 4) && (str[4] == '\0' || str[4] == ':'))
-        /* host[:hciN] */
-        return bt_host_hci(str[4] ? str + 5 : "hci0");
-    else if (!strncmp(str, "hci", 3)) {
-        /* hci[,vlan=n] */
-        if (str[3]) {
-            if (!strncmp(str + 3, ",vlan=", 6)) {
-                vlan = qemu_find_bt_vlan(strtol(str + 9, &endp, 0));
-                if (*endp)
-                    vlan = 0;
-            }
-        } else
-            vlan = qemu_find_bt_vlan(0);
-        if (vlan)
-           return bt_new_hci(vlan);
-    }
-
-    fprintf(stderr, "qemu: Unknown bluetooth HCI `%s'.\n", str);
-
-    return 0;
-}
-
 static int bt_hci_parse(const char *str)
 {
     struct HCIInfo *hci;
commit c60174e847082ab9f70720f86509a3353f816fad
Author: Gerd Hoffmann <kraxel at redhat.com>
Date:   Wed Aug 28 17:09:30 2013 +0200

    usb: sanity check setup_index+setup_len in post_load
    
    Signed-off-by: Gerd Hoffmann <kraxel at redhat.com>

diff --git a/hw/usb/bus.c b/hw/usb/bus.c
index 82ca6a1..72d5b92 100644
--- a/hw/usb/bus.c
+++ b/hw/usb/bus.c
@@ -47,6 +47,10 @@ static int usb_device_post_load(void *opaque, int version_id)
     } else {
         dev->attached = 1;
     }
+    if (dev->setup_index >= sizeof(dev->data_buf) ||
+        dev->setup_len >= sizeof(dev->data_buf)) {
+        return -EINVAL;
+    }
     return 0;
 }
 
commit c58c7b959b93b864a27fd6b3646ee1465ab8832b
Author: Gerd Hoffmann <kraxel at redhat.com>
Date:   Thu Sep 5 21:57:19 2013 +0200

    qxl: fix local renderer
    
    The local spice renderer assumes the primary surface is located at the
    start of the "ram" bar.  This used to be a requirement in qxl hardware
    revision 1.  In revision 2+ this is relaxed.  Nevertheless guest drivers
    continued to use the traditional location, for historical and backward
    compatibility reasons.  The qxl kms driver doesn't though as it depends
    on qxl revision 4+ anyway.
    
    Result is that local rendering is hosed for recent linux guests, you'll
    get pixel garbage with non-spice ui (gtk, sdl, vnc) and when doing
    screendumps.  Fix that by doing a proper mapping of the guest-specified
    memory location.
    
    https://bugzilla.redhat.com/show_bug.cgi?id=948717
    
    Signed-off-by: Gerd Hoffmann <kraxel at redhat.com>

diff --git a/hw/display/qxl-render.c b/hw/display/qxl-render.c
index 269b1a7..d34b0c4 100644
--- a/hw/display/qxl-render.c
+++ b/hw/display/qxl-render.c
@@ -31,10 +31,6 @@ static void qxl_blit(PCIQXLDevice *qxl, QXLRect *rect)
     if (is_buffer_shared(surface)) {
         return;
     }
-    if (!qxl->guest_primary.data) {
-        trace_qxl_render_blit_guest_primary_initialized();
-        qxl->guest_primary.data = memory_region_get_ram_ptr(&qxl->vga.vram);
-    }
     trace_qxl_render_blit(qxl->guest_primary.qxl_stride,
             rect->left, rect->right, rect->top, rect->bottom);
     src = qxl->guest_primary.data;
@@ -104,7 +100,12 @@ static void qxl_render_update_area_unlocked(PCIQXLDevice *qxl)
 
     if (qxl->guest_primary.resized) {
         qxl->guest_primary.resized = 0;
-        qxl->guest_primary.data = memory_region_get_ram_ptr(&qxl->vga.vram);
+        qxl->guest_primary.data = qxl_phys2virt(qxl,
+                                                qxl->guest_primary.surface.mem,
+                                                MEMSLOT_GROUP_GUEST);
+        if (!qxl->guest_primary.data) {
+            return;
+        }
         qxl_set_rect_to_surface(qxl, &qxl->dirty[0]);
         qxl->num_dirty_rects = 1;
         trace_qxl_render_guest_primary_resized(
@@ -128,6 +129,10 @@ static void qxl_render_update_area_unlocked(PCIQXLDevice *qxl)
         }
         dpy_gfx_replace_surface(vga->con, surface);
     }
+
+    if (!qxl->guest_primary.data) {
+        return;
+    }
     for (i = 0; i < qxl->num_dirty_rects; i++) {
         if (qemu_spice_rect_is_empty(qxl->dirty+i)) {
             break;
commit 18b203850a859f1d4688baa8a0ddb8e7af55962f
Author: Gerd Hoffmann <kraxel at redhat.com>
Date:   Thu Sep 5 17:30:05 2013 +0200

    qxl: trace io port name
    
    Signed-off-by: Gerd Hoffmann <kraxel at redhat.com>

diff --git a/hw/display/qxl.c b/hw/display/qxl.c
index 7649f2b..c50e285 100644
--- a/hw/display/qxl.c
+++ b/hw/display/qxl.c
@@ -1541,8 +1541,9 @@ async_common:
     default:
         break;
     }
-    trace_qxl_io_write(d->id, qxl_mode_to_string(d->mode), addr, val, size,
-                       async);
+    trace_qxl_io_write(d->id, qxl_mode_to_string(d->mode),
+                       addr, io_port_to_string(addr),
+                       val, size, async);
 
     switch (io_port) {
     case QXL_IO_UPDATE_AREA:
diff --git a/trace-events b/trace-events
index 8285c5a..d4dba24 100644
--- a/trace-events
+++ b/trace-events
@@ -1059,7 +1059,7 @@ qxl_io_destroy_primary_ignored(int qid, const char *mode) "%d %s"
 qxl_io_log(int qid, const uint8_t *log_buf) "%d %s"
 qxl_io_read_unexpected(int qid) "%d"
 qxl_io_unexpected_vga_mode(int qid, uint64_t addr, uint64_t val, const char *desc) "%d 0x%"PRIx64"=%"PRIu64" (%s)"
-qxl_io_write(int qid, const char *mode, uint64_t addr, uint64_t val, unsigned size, int async) "%d %s addr=%"PRIu64 " val=%"PRIu64" size=%u async=%d"
+qxl_io_write(int qid, const char *mode, uint64_t addr, const char *aname, uint64_t val, unsigned size, int async) "%d %s addr=%"PRIu64 " (%s) val=%"PRIu64" size=%u async=%d"
 qxl_memslot_add_guest(int qid, uint32_t slot_id, uint64_t guest_start, uint64_t guest_end) "%d %u: guest phys 0x%"PRIx64 " - 0x%" PRIx64
 qxl_post_load(int qid, const char *mode) "%d %s"
 qxl_pre_load(int qid) "%d"
commit 6735aa99a43c70c09b53af190b24600a61178b95
Author: Christophe Fergeau <cfergeau at redhat.com>
Date:   Mon Sep 2 15:41:32 2013 +0200

    spice-core: Use g_strdup_printf instead of snprintf
    
    Several places in spice-core.c were using either g_malloc+snprintf
    or snprintf+g_strdup to achieve the same result as g_strdup_printf.
    
    Signed-off-by: Christophe Fergeau <cfergeau at redhat.com>
    Signed-off-by: Gerd Hoffmann <kraxel at redhat.com>

diff --git a/ui/spice-core.c b/ui/spice-core.c
index 3a2cd7e..33ef837 100644
--- a/ui/spice-core.c
+++ b/ui/spice-core.c
@@ -511,7 +511,9 @@ SpiceInfo *qmp_query_spice(Error **errp)
     int port, tls_port;
     const char *addr;
     SpiceInfo *info;
-    char version_string[20]; /* 12 = |255.255.255\0| is the max */
+    unsigned int major;
+    unsigned int minor;
+    unsigned int micro;
 
     info = g_malloc0(sizeof(*info));
 
@@ -534,11 +536,10 @@ SpiceInfo *qmp_query_spice(Error **errp)
     info->host = g_strdup(addr ? addr : "0.0.0.0");
 
     info->has_compiled_version = true;
-    snprintf(version_string, sizeof(version_string), "%d.%d.%d",
-             (SPICE_SERVER_VERSION & 0xff0000) >> 16,
-             (SPICE_SERVER_VERSION & 0xff00) >> 8,
-             SPICE_SERVER_VERSION & 0xff);
-    info->compiled_version = g_strdup(version_string);
+    major = (SPICE_SERVER_VERSION & 0xff0000) >> 16;
+    minor = (SPICE_SERVER_VERSION & 0xff00) >> 8;
+    micro = SPICE_SERVER_VERSION & 0xff;
+    info->compiled_version = g_strdup_printf("%d.%d.%d", major, minor, micro);
 
     if (port) {
         info->has_port = true;
@@ -640,7 +641,7 @@ void qemu_spice_init(void)
     char *x509_key_file = NULL,
         *x509_cert_file = NULL,
         *x509_cacert_file = NULL;
-    int port, tls_port, len, addr_flags;
+    int port, tls_port, addr_flags;
     spice_image_compression_t compression;
     spice_wan_compression_t wan_compr;
     bool seamless_migration;
@@ -671,30 +672,29 @@ void qemu_spice_init(void)
         if (NULL == x509_dir) {
             x509_dir = ".";
         }
-        len = strlen(x509_dir) + 32;
 
         str = qemu_opt_get(opts, "x509-key-file");
         if (str) {
             x509_key_file = g_strdup(str);
         } else {
-            x509_key_file = g_malloc(len);
-            snprintf(x509_key_file, len, "%s/%s", x509_dir, X509_SERVER_KEY_FILE);
+            x509_key_file = g_strdup_printf("%s/%s", x509_dir,
+                                            X509_SERVER_KEY_FILE);
         }
 
         str = qemu_opt_get(opts, "x509-cert-file");
         if (str) {
             x509_cert_file = g_strdup(str);
         } else {
-            x509_cert_file = g_malloc(len);
-            snprintf(x509_cert_file, len, "%s/%s", x509_dir, X509_SERVER_CERT_FILE);
+            x509_cert_file = g_strdup_printf("%s/%s", x509_dir,
+                                             X509_SERVER_CERT_FILE);
         }
 
         str = qemu_opt_get(opts, "x509-cacert-file");
         if (str) {
             x509_cacert_file = g_strdup(str);
         } else {
-            x509_cacert_file = g_malloc(len);
-            snprintf(x509_cacert_file, len, "%s/%s", x509_dir, X509_CA_CERT_FILE);
+            x509_cacert_file = g_strdup_printf("%s/%s", x509_dir,
+                                               X509_CA_CERT_FILE);
         }
 
         x509_key_password = qemu_opt_get(opts, "x509-key-password");
commit e2682db06a6c218f149ff990959c31f3b3d82003
Author: Tomoki Sekiyama <tomoki.sekiyama at hds.com>
Date:   Wed Aug 7 11:40:39 2013 -0400

    QMP/qemu-ga-client: Make timeout longer for guest-fsfreeze-freeze command
    
    guest-fsfreeze-freeze command can take longer than 3 seconds when heavy
    disk I/O is running. To avoid unexpected timeout, this changes the timeout
    to 60 seconds (timeout of pre-commit phase of VSS).
    
    Signed-off-by: Tomoki Sekiyama <tomoki.sekiyama at hds.com>
    Reviewed-by: Paolo Bonzini <pbonzini at redhat.com>
    Reviewed-by: Laszlo Ersek <lersek at redhat.com>
    Reviewed-by: Michael Roth <mdroth at linux.vnet.ibm.com>
    Signed-off-by: Michael Roth <mdroth at linux.vnet.ibm.com>

diff --git a/QMP/qemu-ga-client b/QMP/qemu-ga-client
index 46676c3..b5f7e7c 100755
--- a/QMP/qemu-ga-client
+++ b/QMP/qemu-ga-client
@@ -267,7 +267,9 @@ def main(address, cmd, args):
             print('Hint: qemu is not running?')
         sys.exit(1)
 
-    if cmd != 'ping':
+    if cmd == 'fsfreeze' and args[0] == 'freeze':
+        client.sync(60)
+    elif cmd != 'ping':
         client.sync()
 
     globals()['_cmd_' + cmd](client, args)
commit f311f2c20a1e33c1e5fdb50ee21e69a5bf26c950
Author: Tomoki Sekiyama <tomoki.sekiyama at hds.com>
Date:   Wed Aug 7 11:40:32 2013 -0400

    qemu-ga: Install Windows VSS provider on `qemu-ga -s install'
    
    Register QGA VSS provider library into Windows when qemu-ga is installed as
    Windows service ('-s install' option). It is deregistered when the service
    is uninstalled ('-s uninstall' option).
    
    Signed-off-by: Tomoki Sekiyama <tomoki.sekiyama at hds.com>
    Reviewed-by: Michael Roth <mdroth at linux.vnet.ibm.com>
    Signed-off-by: Michael Roth <mdroth at linux.vnet.ibm.com>

diff --git a/qga/main.c b/qga/main.c
index 0e04e73..6c746c8 100644
--- a/qga/main.c
+++ b/qga/main.c
@@ -34,6 +34,7 @@
 #include "qemu/bswap.h"
 #ifdef _WIN32
 #include "qga/service-win32.h"
+#include "qga/vss-win32.h"
 #include <windows.h>
 #endif
 #ifdef __linux__
@@ -1031,8 +1032,15 @@ int main(int argc, char **argv)
                 fixed_state_dir = (state_dir == dfl_pathnames.state_dir) ?
                                   NULL :
                                   state_dir;
-                return ga_install_service(path, log_filepath, fixed_state_dir);
+                if (ga_install_vss_provider()) {
+                    return EXIT_FAILURE;
+                }
+                if (ga_install_service(path, log_filepath, fixed_state_dir)) {
+                    return EXIT_FAILURE;
+                }
+                return 0;
             } else if (strcmp(service, "uninstall") == 0) {
+                ga_uninstall_vss_provider();
                 return ga_uninstall_service();
             } else {
                 printf("Unknown service command.\n");
diff --git a/qga/vss-win32.c b/qga/vss-win32.c
index 89c0f3b..24c4288 100644
--- a/qga/vss-win32.c
+++ b/qga/vss-win32.c
@@ -119,6 +119,31 @@ bool vss_initialized(void)
     return !!provider_lib;
 }
 
+int ga_install_vss_provider(void)
+{
+    HRESULT hr;
+
+    if (!vss_init(false)) {
+        fprintf(stderr, "Installation of VSS provider is skipped. "
+                "fsfreeze will be disabled.\n");
+        return 0;
+    }
+    hr = call_vss_provider_func("COMRegister");
+    vss_deinit(false);
+
+    return SUCCEEDED(hr) ? 0 : EXIT_FAILURE;
+}
+
+void ga_uninstall_vss_provider(void)
+{
+    if (!vss_init(false)) {
+        fprintf(stderr, "Removal of VSS provider is skipped.\n");
+        return;
+    }
+    call_vss_provider_func("COMUnregister");
+    vss_deinit(false);
+}
+
 /* Call VSS requester and freeze/thaw filesystems and applications */
 void qga_vss_fsfreeze(int *nr_volume, Error **err, bool freeze)
 {
diff --git a/qga/vss-win32.h b/qga/vss-win32.h
index eac669c..db8fbe5 100644
--- a/qga/vss-win32.h
+++ b/qga/vss-win32.h
@@ -19,6 +19,9 @@ bool vss_init(bool init_requester);
 void vss_deinit(bool deinit_requester);
 bool vss_initialized(void);
 
+int ga_install_vss_provider(void);
+void ga_uninstall_vss_provider(void);
+
 void qga_vss_fsfreeze(int *nr_volume, Error **err, bool freeze);
 
 #endif
commit 64c003174039d0c63ea2bef48d600363ce80a58b
Author: Tomoki Sekiyama <tomoki.sekiyama at hds.com>
Date:   Wed Aug 7 11:40:25 2013 -0400

    qemu-ga: Call Windows VSS requester in fsfreeze command handler
    
    Support guest-fsfreeze-freeze and guest-fsfreeze-thaw commands for Windows
    guests. When fsfreeze command is issued, it calls the VSS requester to
    freeze filesystems and applications. On thaw command, it again tells the VSS
    requester to thaw them.
    
    This also adds calling of initialize functions for the VSS requester.
    
    Signed-off-by: Tomoki Sekiyama <tomoki.sekiyama at hds.com>
    Reviewed-by: Michael Roth <mdroth at linux.vnet.ibm.com>
    Signed-off-by: Michael Roth <mdroth at linux.vnet.ibm.com>

diff --git a/qga/Makefile.objs b/qga/Makefile.objs
index c4bd151..1c5986c 100644
--- a/qga/Makefile.objs
+++ b/qga/Makefile.objs
@@ -1,6 +1,7 @@
 qga-obj-y = commands.o guest-agent-command-state.o main.o
 qga-obj-$(CONFIG_POSIX) += commands-posix.o channel-posix.o
 qga-obj-$(CONFIG_WIN32) += commands-win32.o channel-win32.o service-win32.o
+qga-obj-$(CONFIG_WIN32) += vss-win32.o
 qga-obj-y += qapi-generated/qga-qapi-types.o qapi-generated/qga-qapi-visit.o
 qga-obj-y += qapi-generated/qga-qmp-marshal.o
 
diff --git a/qga/commands-win32.c b/qga/commands-win32.c
index 24e4ad0..7a37f5c 100644
--- a/qga/commands-win32.c
+++ b/qga/commands-win32.c
@@ -15,6 +15,7 @@
 #include <wtypes.h>
 #include <powrprof.h>
 #include "qga/guest-agent-core.h"
+#include "qga/vss-win32.h"
 #include "qga-qmp-commands.h"
 #include "qapi/qmp/qerror.h"
 
@@ -156,27 +157,89 @@ void qmp_guest_file_flush(int64_t handle, Error **err)
  */
 GuestFsfreezeStatus qmp_guest_fsfreeze_status(Error **err)
 {
-    error_set(err, QERR_UNSUPPORTED);
-    return 0;
+    if (!vss_initialized()) {
+        error_set(err, QERR_UNSUPPORTED);
+        return 0;
+    }
+
+    if (ga_is_frozen(ga_state)) {
+        return GUEST_FSFREEZE_STATUS_FROZEN;
+    }
+
+    return GUEST_FSFREEZE_STATUS_THAWED;
 }
 
 /*
- * Walk list of mounted file systems in the guest, and freeze the ones which
- * are real local file systems.
+ * Freeze local file systems using Volume Shadow-copy Service.
+ * The frozen state is limited for up to 10 seconds by VSS.
  */
 int64_t qmp_guest_fsfreeze_freeze(Error **err)
 {
-    error_set(err, QERR_UNSUPPORTED);
+    int i;
+    Error *local_err = NULL;
+
+    if (!vss_initialized()) {
+        error_set(err, QERR_UNSUPPORTED);
+        return 0;
+    }
+
+    slog("guest-fsfreeze called");
+
+    /* cannot risk guest agent blocking itself on a write in this state */
+    ga_set_frozen(ga_state);
+
+    qga_vss_fsfreeze(&i, err, true);
+    if (error_is_set(err)) {
+        goto error;
+    }
+
+    return i;
+
+error:
+    qmp_guest_fsfreeze_thaw(&local_err);
+    if (error_is_set(&local_err)) {
+        g_debug("cleanup thaw: %s", error_get_pretty(local_err));
+        error_free(local_err);
+    }
     return 0;
 }
 
 /*
- * Walk list of frozen file systems in the guest, and thaw them.
+ * Thaw local file systems using Volume Shadow-copy Service.
  */
 int64_t qmp_guest_fsfreeze_thaw(Error **err)
 {
-    error_set(err, QERR_UNSUPPORTED);
-    return 0;
+    int i;
+
+    if (!vss_initialized()) {
+        error_set(err, QERR_UNSUPPORTED);
+        return 0;
+    }
+
+    qga_vss_fsfreeze(&i, err, false);
+
+    ga_unset_frozen(ga_state);
+    return i;
+}
+
+static void guest_fsfreeze_cleanup(void)
+{
+    Error *err = NULL;
+
+    if (!vss_initialized()) {
+        return;
+    }
+
+    if (ga_is_frozen(ga_state) == GUEST_FSFREEZE_STATUS_FROZEN) {
+        qmp_guest_fsfreeze_thaw(&err);
+        if (err) {
+            slog("failed to clean up frozen filesystems: %s",
+                 error_get_pretty(err));
+            error_free(err);
+        }
+    }
+
+    vss_deinit(true);
 }
 
 /*
@@ -354,4 +417,7 @@ int64_t qmp_guest_set_vcpus(GuestLogicalProcessorList *vcpus, Error **errp)
 /* register init/cleanup routines for stateful command groups */
 void ga_command_state_init(GAState *s, GACommandState *cs)
 {
+    if (vss_init(true)) {
+        ga_command_state_add(cs, NULL, guest_fsfreeze_cleanup);
+    }
 }
diff --git a/qga/vss-win32.c b/qga/vss-win32.c
new file mode 100644
index 0000000..89c0f3b
--- /dev/null
+++ b/qga/vss-win32.c
@@ -0,0 +1,141 @@
+/*
+ * QEMU Guest Agent VSS utility functions
+ *
+ * Copyright Hitachi Data Systems Corp. 2013
+ *
+ * Authors:
+ *  Tomoki Sekiyama   <tomoki.sekiyama at hds.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include <stdio.h>
+#include <windows.h>
+#include "qga/guest-agent-core.h"
+#include "qga/vss-win32.h"
+#include "qga/vss-win32/requester.h"
+
+#define QGA_VSS_DLL "qga-vss.dll"
+
+static HMODULE provider_lib;
+
+/* Call a function in qga-vss.dll with the specified name */
+static HRESULT call_vss_provider_func(const char *func_name)
+{
+    FARPROC WINAPI func;
+
+    g_assert(provider_lib);
+
+    func = GetProcAddress(provider_lib, func_name);
+    if (!func) {
+        char *msg;
+        FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER |
+                      FORMAT_MESSAGE_FROM_SYSTEM, NULL, GetLastError(),
+                      MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
+                      (char *)&msg, 0, NULL);
+        fprintf(stderr, "failed to load %s from %s: %s",
+                func_name, QGA_VSS_DLL, msg);
+        LocalFree(msg);
+        return E_FAIL;
+    }
+
+    return func();
+}
+
+/* Check whether this OS version supports VSS providers */
+static bool vss_check_os_version(void)
+{
+    OSVERSIONINFO OSver;
+
+    OSver.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
+    GetVersionEx(&OSver);
+    if ((OSver.dwMajorVersion == 5 && OSver.dwMinorVersion >= 2) ||
+       OSver.dwMajorVersion > 5) {
+        BOOL wow64 = false;
+#ifndef _WIN64
+        /* Provider doesn't work under WOW64 (32bit agent on 64bit OS) */
+        if (!IsWow64Process(GetCurrentProcess(), &wow64)) {
+            fprintf(stderr, "failed to IsWow64Process (Error: %lx\n)\n",
+                    GetLastError());
+            return false;
+        }
+        if (wow64) {
+            fprintf(stderr, "Warning: Running under WOW64\n");
+        }
+#endif
+        return !wow64;
+    }
+    return false;
+}
+
+/* Load qga-vss.dll */
+bool vss_init(bool init_requester)
+{
+    if (!vss_check_os_version()) {
+        /* Do nothing if OS doesn't support providers. */
+        fprintf(stderr, "VSS provider is not supported in this OS version: "
+                "fsfreeze is disabled.\n");
+        return false;
+    }
+
+    provider_lib = LoadLibraryA(QGA_VSS_DLL);
+    if (!provider_lib) {
+        char *msg;
+        FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER |
+                      FORMAT_MESSAGE_FROM_SYSTEM, NULL, GetLastError(),
+                      MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
+                      (char *)&msg, 0, NULL);
+        fprintf(stderr, "failed to load %s: %sfsfreeze is disabled\n",
+                QGA_VSS_DLL, msg);
+        LocalFree(msg);
+        return false;
+    }
+
+    if (init_requester) {
+        HRESULT hr = call_vss_provider_func("requester_init");
+        if (FAILED(hr)) {
+            fprintf(stderr, "fsfreeze is disabled.\n");
+            vss_deinit(false);
+            return false;
+        }
+    }
+
+    return true;
+}
+
+/* Unload qga-provider.dll */
+void vss_deinit(bool deinit_requester)
+{
+    if (deinit_requester) {
+        call_vss_provider_func("requester_deinit");
+    }
+    FreeLibrary(provider_lib);
+    provider_lib = NULL;
+}
+
+bool vss_initialized(void)
+{
+    return !!provider_lib;
+}
+
+/* Call VSS requester and freeze/thaw filesystems and applications */
+void qga_vss_fsfreeze(int *nr_volume, Error **err, bool freeze)
+{
+    const char *func_name = freeze ? "requester_freeze" : "requester_thaw";
+    QGAVSSRequesterFunc func;
+    ErrorSet errset = {
+        .error_set = (ErrorSetFunc)error_set_win32,
+        .errp = (void **)err,
+        .err_class = ERROR_CLASS_GENERIC_ERROR
+    };
+
+    func = (QGAVSSRequesterFunc)GetProcAddress(provider_lib, func_name);
+    if (!func) {
+        error_setg_win32(err, GetLastError(), "failed to load %s from %s",
+                         func_name, QGA_VSS_DLL);
+        return;
+    }
+
+    func(nr_volume, &errset);
+}
diff --git a/qga/vss-win32.h b/qga/vss-win32.h
new file mode 100644
index 0000000..eac669c
--- /dev/null
+++ b/qga/vss-win32.h
@@ -0,0 +1,24 @@
+/*
+ * QEMU Guest Agent VSS utility declarations
+ *
+ * Copyright Hitachi Data Systems Corp. 2013
+ *
+ * Authors:
+ *  Tomoki Sekiyama   <tomoki.sekiyama at hds.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef VSS_WIN32_H
+#define VSS_WIN32_H
+
+#include "qapi/error.h"
+
+bool vss_init(bool init_requester);
+void vss_deinit(bool deinit_requester);
+bool vss_initialized(void);
+
+void qga_vss_fsfreeze(int *nr_volume, Error **err, bool freeze);
+
+#endif
commit b39297aedfabe9b2c426cd540413be991500da25
Author: Tomoki Sekiyama <tomoki.sekiyama at hds.com>
Date:   Wed Aug 7 11:40:18 2013 -0400

    qemu-ga: Add Windows VSS provider and requester as DLL
    
    Adds VSS provider and requester as a qga-vss.dll, which is loaded by
    Windows VSS service as well as by qemu-ga.
    
    "provider.cpp" implements a basic stub of a software VSS provider.
    Currently, this module only relays a frozen event from VSS service to the
    agent, and thaw event from the agent to VSS service, to block VSS process
    to keep the system frozen while snapshots are taken at the host.
    
    To register the provider to the guest system as COM+ application, the type
    library (.tlb) for qga-vss.dll is required. To build it from COM IDL (.idl),
    VisualC++, MIDL and stdole2.tlb in Windows SDK are required. This patch also
    adds pre-compiled .tlb file in the repository in order to enable
    cross-compile qemu-ga.exe for Windows with VSS support.
    
    "requester.cpp" provides the VSS requester to kick the VSS snapshot process.
    Qemu-ga.exe works without the DLL, although fsfreeze features are disabled.
    
    These functions are only supported in Windows 2003 or later. In older
    systems, fsfreeze features are disabled.
    
    In several versions of Windows which don't support attribute
    VSS_VOLSNAP_ATTR_NO_AUTORECOVERY, DoSnapshotSet fails with error
    VSS_E_OBJECT_NOT_FOUND. In this patch, we just ignore this error.
    To solve this fundamentally, we need a framework to handle mount writable
    snapshot on guests, which is required by VSS auto-recovery feature
    (cleanup phase after a snapshot is taken).
    
    Signed-off-by: Tomoki Sekiyama <tomoki.sekiyama at hds.com>
    Signed-off-by: Michael Roth <mdroth at linux.vnet.ibm.com>

diff --git a/Makefile b/Makefile
index 2fb0e5a..362fe3e 100644
--- a/Makefile
+++ b/Makefile
@@ -235,7 +235,7 @@ clean:
 	rm -f qemu-options.def
 	find . -name '*.[oda]' -type f -exec rm -f {} +
 	find . -name '*.l[oa]' -type f -exec rm -f {} +
-	rm -f $(TOOLS) $(HELPERS-y) qemu-ga TAGS cscope.* *.pod *~ */*~
+	rm -f $(filter-out %.tlb,$(TOOLS)) $(HELPERS-y) qemu-ga TAGS cscope.* *.pod *~ */*~
 	rm -Rf .libs
 	rm -f qemu-img-cmds.h
 	@# May not be present in GENERATED_HEADERS
diff --git a/Makefile.objs b/Makefile.objs
index f46a4cd..2b6c1fe 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -109,6 +109,7 @@ version-lobj-$(CONFIG_WIN32) += $(BUILD_DIR)/version.lo
 # FIXME: a few definitions from qapi-types.o/qapi-visit.o are needed
 # by libqemuutil.a.  These should be moved to a separate .json schema.
 qga-obj-y = qga/ qapi-types.o qapi-visit.o
+qga-vss-dll-obj-y = qga/
 
 vl.o: QEMU_CFLAGS+=$(GPROF_CFLAGS)
 
@@ -120,6 +121,7 @@ nested-vars += \
 	stub-obj-y \
 	util-obj-y \
 	qga-obj-y \
+	qga-vss-dll-obj-y \
 	block-obj-y \
 	common-obj-y
 dummy := $(call unnest-vars)
diff --git a/configure b/configure
index abc29ab..cf61f75 100755
--- a/configure
+++ b/configure
@@ -3568,8 +3568,11 @@ if test "$softmmu" = yes ; then
   fi
 fi
 if [ "$guest_agent" != "no" ]; then
-  if [ "$linux" = "yes" -o "$bsd" = "yes" -o "$solaris" = "yes" ] ; then
+  if [ "$linux" = "yes" -o "$bsd" = "yes" -o "$solaris" = "yes" -o "$mingw32" = "yes" ] ; then
       tools="qemu-ga\$(EXESUF) $tools"
+      if [ "$mingw32" = "yes" -a "$guest_agent_with_vss" = "yes" ]; then
+        tools="qga/vss-win32/qga-vss.dll qga/vss-win32/qga-vss.tlb $tools"
+      fi
       guest_agent=yes
   elif [ "$guest_agent" != yes ]; then
       guest_agent=no
diff --git a/qga/Makefile.objs b/qga/Makefile.objs
index b8d7cd0..c4bd151 100644
--- a/qga/Makefile.objs
+++ b/qga/Makefile.objs
@@ -3,3 +3,5 @@ qga-obj-$(CONFIG_POSIX) += commands-posix.o channel-posix.o
 qga-obj-$(CONFIG_WIN32) += commands-win32.o channel-win32.o service-win32.o
 qga-obj-y += qapi-generated/qga-qapi-types.o qapi-generated/qga-qapi-visit.o
 qga-obj-y += qapi-generated/qga-qmp-marshal.o
+
+qga-vss-dll-obj-$(CONFIG_QGA_VSS) += vss-win32/
diff --git a/qga/vss-win32/Makefile.objs b/qga/vss-win32/Makefile.objs
new file mode 100644
index 0000000..6a69d50
--- /dev/null
+++ b/qga/vss-win32/Makefile.objs
@@ -0,0 +1,23 @@
+# rules to build qga-vss.dll
+
+qga-vss-dll-obj-y += requester.o provider.o install.o
+
+obj-qga-vss-dll-obj-y = $(addprefix $(obj)/, $(qga-vss-dll-obj-y))
+$(obj-qga-vss-dll-obj-y): QEMU_CXXFLAGS = $(filter-out -Wstrict-prototypes -Wmissing-prototypes -Wnested-externs -Wold-style-declaration -Wold-style-definition -Wredundant-decls -fstack-protector-all, $(QEMU_CFLAGS)) -Wno-unknown-pragmas -Wno-delete-non-virtual-dtor
+
+$(obj)/qga-vss.dll: LDFLAGS = -shared -Wl,--add-stdcall-alias,--enable-stdcall-fixup -lole32 -loleaut32 -lshlwapi -luuid -static
+$(obj)/qga-vss.dll: $(obj-qga-vss-dll-obj-y) $(SRC_PATH)/$(obj)/qga-vss.def
+	$(call quiet-command,$(CXX) -o $@ $(qga-vss-dll-obj-y) $(SRC_PATH)/qga/vss-win32/qga-vss.def $(CXXFLAGS) $(LDFLAGS),"  LINK  $(TARGET_DIR)$@")
+
+
+# rules to build qga-provider.tlb
+# Currently, only native build is supported because building .tlb
+# (TypeLibrary) from .idl requires WindowsSDK and MIDL (and cl.exe in VC++).
+MIDL=$(WIN_SDK)/Bin/midl
+
+$(obj)/qga-vss.tlb: $(SRC_PATH)/$(obj)/qga-vss.idl
+ifeq ($(WIN_SDK),"")
+	$(call quiet-command,cp $(dir $<)qga-vss.tlb $@, "  COPY  $(TARGET_DIR)$@")
+else
+	$(call quiet-command,$(MIDL) -tlb $@ -I $(WIN_SDK)/Include $<,"  MIDL  $(TARGET_DIR)$@")
+endif
diff --git a/qga/vss-win32/install.cpp b/qga/vss-win32/install.cpp
new file mode 100644
index 0000000..37731a7
--- /dev/null
+++ b/qga/vss-win32/install.cpp
@@ -0,0 +1,458 @@
+/*
+ * QEMU Guest Agent win32 VSS Provider installer
+ *
+ * Copyright Hitachi Data Systems Corp. 2013
+ *
+ * Authors:
+ *  Tomoki Sekiyama   <tomoki.sekiyama at hds.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+#include "vss-common.h"
+#include "inc/win2003/vscoordint.h"
+
+#include <comadmin.h>
+#include <wbemidl.h>
+#include <comdef.h>
+#include <comutil.h>
+
+extern HINSTANCE g_hinstDll;
+
+const GUID CLSID_COMAdminCatalog = { 0xF618C514, 0xDFB8, 0x11d1,
+    {0xA2, 0xCF, 0x00, 0x80, 0x5F, 0xC7, 0x92, 0x35} };
+const GUID IID_ICOMAdminCatalog = { 0xDD662187, 0xDFC2, 0x11d1,
+    {0xA2, 0xCF, 0x00, 0x80, 0x5F, 0xC7, 0x92, 0x35} };
+const GUID CLSID_WbemLocator = { 0x4590f811, 0x1d3a, 0x11d0,
+    {0x89, 0x1f, 0x00, 0xaa, 0x00, 0x4b, 0x2e, 0x24} };
+const GUID IID_IWbemLocator = { 0xdc12a687, 0x737f, 0x11cf,
+    {0x88, 0x4d, 0x00, 0xaa, 0x00, 0x4b, 0x2e, 0x24} };
+
+void errmsg(DWORD err, const char *text)
+{
+    /*
+     * `text' contains function call statement when errmsg is called via chk().
+     * To make error message more readable, we cut off the text after '('.
+     * If text doesn't contains '(', negative precision is given, which is
+     * treated as though it were missing.
+     */
+    char *msg = NULL, *nul = strchr(text, '(');
+    int len = nul ? nul - text : -1;
+
+    FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER |
+                  FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS,
+                  NULL, err, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
+                  (char *)&msg, 0, NULL);
+    fprintf(stderr, "%.*s. (Error: %lx) %s\n", len, text, err, msg);
+    LocalFree(msg);
+}
+
+static void errmsg_dialog(DWORD err, const char *text, const char *opt = "")
+{
+    char *msg, buf[512];
+
+    FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER |
+                  FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS,
+                  NULL, err, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
+                  (char *)&msg, 0, NULL);
+    snprintf(buf, sizeof(buf), "%s%s. (Error: %lx) %s", text, opt, err, msg);
+    MessageBox(NULL, buf, "Error from " QGA_PROVIDER_NAME, MB_OK|MB_ICONERROR);
+    LocalFree(msg);
+}
+
+#define _chk(hr, status, msg, err_label)        \
+    do {                                        \
+        hr = (status);                          \
+        if (FAILED(hr)) {                       \
+            errmsg(hr, msg);                    \
+            goto err_label;                     \
+        }                                       \
+    } while (0)
+
+#define chk(status) _chk(hr, status, "Failed to " #status, out)
+
+void __stdcall _com_issue_error(HRESULT hr)
+{
+    errmsg(hr, "Unexpected error in COM");
+}
+
+template<class T>
+HRESULT put_Value(ICatalogObject *pObj, LPCWSTR name, T val)
+{
+    return pObj->put_Value(_bstr_t(name), _variant_t(val));
+}
+
+/* Lookup Administrators group name from winmgmt */
+static HRESULT GetAdminName(_bstr_t *name)
+{
+    HRESULT hr;
+    COMPointer<IWbemLocator> pLoc;
+    COMPointer<IWbemServices> pSvc;
+    COMPointer<IEnumWbemClassObject> pEnum;
+    COMPointer<IWbemClassObject> pWobj;
+    ULONG returned;
+    _variant_t var;
+
+    chk(CoCreateInstance(CLSID_WbemLocator, NULL, CLSCTX_INPROC_SERVER,
+                         IID_IWbemLocator, (LPVOID *)pLoc.replace()));
+    chk(pLoc->ConnectServer(_bstr_t(L"ROOT\\CIMV2"), NULL, NULL, NULL,
+                            0, 0, 0, pSvc.replace()));
+    chk(CoSetProxyBlanket(pSvc, RPC_C_AUTHN_WINNT, RPC_C_AUTHZ_NONE,
+                          NULL, RPC_C_AUTHN_LEVEL_CALL,
+                          RPC_C_IMP_LEVEL_IMPERSONATE, NULL, EOAC_NONE));
+    chk(pSvc->ExecQuery(_bstr_t(L"WQL"),
+                        _bstr_t(L"select * from Win32_Account where "
+                                "SID='S-1-5-32-544' and localAccount=TRUE"),
+                        WBEM_FLAG_RETURN_IMMEDIATELY | WBEM_FLAG_FORWARD_ONLY,
+                        NULL, pEnum.replace()));
+    if (!pEnum) {
+        hr = E_FAIL;
+        errmsg(hr, "Failed to query for Administrators");
+        goto out;
+    }
+    chk(pEnum->Next(WBEM_INFINITE, 1, pWobj.replace(), &returned));
+    if (returned == 0) {
+        hr = E_FAIL;
+        errmsg(hr, "No Administrators found");
+        goto out;
+    }
+
+    chk(pWobj->Get(_bstr_t(L"Name"), 0, &var, 0, 0));
+    try {
+        *name = var;
+    } catch(...) {
+        hr = E_FAIL;
+        errmsg(hr, "Failed to get name of Administrators");
+        goto out;
+    }
+
+out:
+    return hr;
+}
+
+/* Find and iterate QGA VSS provider in COM+ Application Catalog */
+static HRESULT QGAProviderFind(
+    HRESULT (*found)(ICatalogCollection *, int, void *), void *arg)
+{
+    HRESULT hr;
+    COMInitializer initializer;
+    COMPointer<IUnknown> pUnknown;
+    COMPointer<ICOMAdminCatalog> pCatalog;
+    COMPointer<ICatalogCollection> pColl;
+    COMPointer<ICatalogObject> pObj;
+    _variant_t var;
+    long i, n;
+
+    chk(CoCreateInstance(CLSID_COMAdminCatalog, NULL, CLSCTX_INPROC_SERVER,
+                         IID_IUnknown, (void **)pUnknown.replace()));
+    chk(pUnknown->QueryInterface(IID_ICOMAdminCatalog,
+                                 (void **)pCatalog.replace()));
+    chk(pCatalog->GetCollection(_bstr_t(L"Applications"),
+                                (IDispatch **)pColl.replace()));
+    chk(pColl->Populate());
+
+    chk(pColl->get_Count(&n));
+    for (i = n - 1; i >= 0; i--) {
+        chk(pColl->get_Item(i, (IDispatch **)pObj.replace()));
+        chk(pObj->get_Value(_bstr_t(L"Name"), &var));
+        if (var == _variant_t(QGA_PROVIDER_LNAME)) {
+            if (FAILED(found(pColl, i, arg))) {
+                goto out;
+            }
+        }
+    }
+    chk(pColl->SaveChanges(&n));
+
+out:
+    return hr;
+}
+
+/* Count QGA VSS provider in COM+ Application Catalog */
+static HRESULT QGAProviderCount(ICatalogCollection *coll, int i, void *arg)
+{
+    (*(int *)arg)++;
+    return S_OK;
+}
+
+/* Remove QGA VSS provider from COM+ Application Catalog Collection */
+static HRESULT QGAProviderRemove(ICatalogCollection *coll, int i, void *arg)
+{
+    HRESULT hr;
+
+    fprintf(stderr, "Removing COM+ Application: %s\n", QGA_PROVIDER_NAME);
+    chk(coll->Remove(i));
+out:
+    return hr;
+}
+
+/* Unregister this module from COM+ Applications Catalog */
+STDAPI COMUnregister(void)
+{
+    HRESULT hr;
+
+    DllUnregisterServer();
+    chk(QGAProviderFind(QGAProviderRemove, NULL));
+out:
+    return hr;
+}
+
+/* Register this module to COM+ Applications Catalog */
+STDAPI COMRegister(void)
+{
+    HRESULT hr;
+    COMInitializer initializer;
+    COMPointer<IUnknown> pUnknown;
+    COMPointer<ICOMAdminCatalog> pCatalog;
+    COMPointer<ICatalogCollection> pApps, pRoles, pUsersInRole;
+    COMPointer<ICatalogObject> pObj;
+    long n;
+    _bstr_t name;
+    _variant_t key;
+    CHAR dllPath[MAX_PATH], tlbPath[MAX_PATH];
+    bool unregisterOnFailure = false;
+    int count = 0;
+
+    if (!g_hinstDll) {
+        errmsg(E_FAIL, "Failed to initialize DLL");
+        return E_FAIL;
+    }
+
+    chk(QGAProviderFind(QGAProviderCount, (void *)&count));
+    if (count) {
+        errmsg(E_ABORT, "QGA VSS Provider is already installed");
+        return E_ABORT;
+    }
+
+    chk(CoCreateInstance(CLSID_COMAdminCatalog, NULL, CLSCTX_INPROC_SERVER,
+                         IID_IUnknown, (void **)pUnknown.replace()));
+    chk(pUnknown->QueryInterface(IID_ICOMAdminCatalog,
+                                 (void **)pCatalog.replace()));
+
+    /* Install COM+ Component */
+
+    chk(pCatalog->GetCollection(_bstr_t(L"Applications"),
+                                (IDispatch **)pApps.replace()));
+    chk(pApps->Populate());
+    chk(pApps->Add((IDispatch **)&pObj));
+    chk(put_Value(pObj, L"Name",        QGA_PROVIDER_LNAME));
+    chk(put_Value(pObj, L"Description", QGA_PROVIDER_LNAME));
+    chk(put_Value(pObj, L"ApplicationAccessChecksEnabled", true));
+    chk(put_Value(pObj, L"Authentication",                 short(6)));
+    chk(put_Value(pObj, L"AuthenticationCapability",       short(2)));
+    chk(put_Value(pObj, L"ImpersonationLevel",             short(2)));
+    chk(pApps->SaveChanges(&n));
+
+    /* The app should be deleted if something fails after SaveChanges */
+    unregisterOnFailure = true;
+
+    chk(pObj->get_Key(&key));
+
+    if (!GetModuleFileName(g_hinstDll, dllPath, sizeof(dllPath))) {
+        hr = HRESULT_FROM_WIN32(GetLastError());
+        errmsg(hr, "GetModuleFileName failed");
+        goto out;
+    }
+    n = strlen(dllPath);
+    if (n < 3) {
+        hr = E_FAIL;
+        errmsg(hr, "Failed to lookup dll");
+        goto out;
+    }
+    strcpy(tlbPath, dllPath);
+    strcpy(tlbPath+n-3, "tlb");
+    fprintf(stderr, "Registering " QGA_PROVIDER_NAME ":\n");
+    fprintf(stderr, "  %s\n", dllPath);
+    fprintf(stderr, "  %s\n", tlbPath);
+    if (!PathFileExists(tlbPath)) {
+        hr = HRESULT_FROM_WIN32(ERROR_FILE_NOT_FOUND);
+        errmsg(hr, "Failed to lookup tlb");
+        goto out;
+    }
+
+    chk(pCatalog->InstallComponent(_bstr_t(QGA_PROVIDER_LNAME),
+                                   _bstr_t(dllPath), _bstr_t(tlbPath),
+                                   _bstr_t("")));
+
+    /* Setup roles of the applicaion */
+
+    chk(pApps->GetCollection(_bstr_t(L"Roles"), key,
+                             (IDispatch **)pRoles.replace()));
+    chk(pRoles->Populate());
+    chk(pRoles->Add((IDispatch **)pObj.replace()));
+    chk(put_Value(pObj, L"Name",        L"Administrators"));
+    chk(put_Value(pObj, L"Description", L"Administrators group"));
+    chk(pRoles->SaveChanges(&n));
+    chk(pObj->get_Key(&key));
+
+    /* Setup users in the role */
+
+    chk(pRoles->GetCollection(_bstr_t(L"UsersInRole"), key,
+                              (IDispatch **)pUsersInRole.replace()));
+    chk(pUsersInRole->Populate());
+
+    chk(pUsersInRole->Add((IDispatch **)pObj.replace()));
+    chk(GetAdminName(&name));
+    chk(put_Value(pObj, L"User", _bstr_t(".\\") + name));
+
+    chk(pUsersInRole->Add((IDispatch **)pObj.replace()));
+    chk(put_Value(pObj, L"User", L"SYSTEM"));
+    chk(pUsersInRole->SaveChanges(&n));
+
+out:
+    if (unregisterOnFailure && FAILED(hr)) {
+        COMUnregister();
+    }
+
+    return hr;
+}
+
+
+static BOOL CreateRegistryKey(LPCTSTR key, LPCTSTR value, LPCTSTR data)
+{
+    HKEY  hKey;
+    LONG  ret;
+    DWORD size;
+
+    ret = RegCreateKeyEx(HKEY_CLASSES_ROOT, key, 0, NULL,
+        REG_OPTION_NON_VOLATILE, KEY_WRITE, NULL, &hKey, NULL);
+    if (ret != ERROR_SUCCESS) {
+        goto out;
+    }
+
+    if (data != NULL) {
+        size = strlen(data) + 1;
+    } else {
+        size = 0;
+    }
+
+    ret = RegSetValueEx(hKey, value, 0, REG_SZ, (LPBYTE)data, size);
+    RegCloseKey(hKey);
+
+out:
+    if (ret != ERROR_SUCCESS) {
+        /* As we cannot printf within DllRegisterServer(), show a dialog. */
+        errmsg_dialog(ret, "Cannot add registry", key);
+        return FALSE;
+    }
+    return TRUE;
+}
+
+/* Register this dll as a VSS provider */
+STDAPI DllRegisterServer(void)
+{
+    COMInitializer initializer;
+    COMPointer<IVssAdmin> pVssAdmin;
+    HRESULT hr = E_FAIL;
+    char dllPath[MAX_PATH];
+    char key[256];
+
+    if (!g_hinstDll) {
+        errmsg_dialog(hr, "Module instance is not available");
+        goto out;
+    }
+
+    /* Add this module to registery */
+
+    sprintf(key, "CLSID\\%s", g_szClsid);
+    if (!CreateRegistryKey(key, NULL, g_szClsid)) {
+        goto out;
+    }
+
+    if (!GetModuleFileName(g_hinstDll, dllPath, sizeof(dllPath))) {
+        errmsg_dialog(GetLastError(), "GetModuleFileName failed");
+        goto out;
+    }
+
+    sprintf(key, "CLSID\\%s\\InprocServer32", g_szClsid);
+    if (!CreateRegistryKey(key, NULL, dllPath)) {
+        goto out;
+    }
+
+    if (!CreateRegistryKey(key, "ThreadingModel", "Apartment")) {
+        goto out;
+    }
+
+    sprintf(key, "CLSID\\%s\\ProgID", g_szClsid);
+    if (!CreateRegistryKey(key, NULL, g_szProgid)) {
+        goto out;
+    }
+
+    if (!CreateRegistryKey(g_szProgid, NULL, QGA_PROVIDER_NAME)) {
+        goto out;
+    }
+
+    sprintf(key, "%s\\CLSID", g_szProgid);
+    if (!CreateRegistryKey(key, NULL, g_szClsid)) {
+        goto out;
+    }
+
+    hr = CoCreateInstance(CLSID_VSSCoordinator, NULL, CLSCTX_ALL,
+                          IID_IVssAdmin, (void **)pVssAdmin.replace());
+    if (FAILED(hr)) {
+        errmsg_dialog(hr, "CoCreateInstance(VSSCoordinator) failed");
+        goto out;
+    }
+
+    hr = pVssAdmin->RegisterProvider(g_gProviderId, CLSID_QGAVSSProvider,
+                                     const_cast<WCHAR*>(QGA_PROVIDER_LNAME),
+                                     VSS_PROV_SOFTWARE,
+                                     const_cast<WCHAR*>(QGA_PROVIDER_VERSION),
+                                     g_gProviderVersion);
+    if (FAILED(hr)) {
+        errmsg_dialog(hr, "RegisterProvider failed");
+    }
+
+out:
+    if (FAILED(hr)) {
+        DllUnregisterServer();
+    }
+
+    return hr;
+}
+
+/* Unregister this VSS hardware provider from the system */
+STDAPI DllUnregisterServer(void)
+{
+    TCHAR key[256];
+    COMInitializer initializer;
+    COMPointer<IVssAdmin> pVssAdmin;
+
+    HRESULT hr = CoCreateInstance(CLSID_VSSCoordinator,
+                                  NULL, CLSCTX_ALL, IID_IVssAdmin,
+                                  (void **)pVssAdmin.replace());
+    if (SUCCEEDED(hr)) {
+        hr = pVssAdmin->UnregisterProvider(g_gProviderId);
+    } else {
+        errmsg(hr, "CoCreateInstance(VSSCoordinator) failed");
+    }
+
+    sprintf(key, "CLSID\\%s", g_szClsid);
+    SHDeleteKey(HKEY_CLASSES_ROOT, key);
+    SHDeleteKey(HKEY_CLASSES_ROOT, g_szProgid);
+
+    return S_OK; /* Uninstall should never fail */
+}
+
+
+/* Support function to convert ASCII string into BSTR (used in _bstr_t) */
+namespace _com_util
+{
+    BSTR WINAPI ConvertStringToBSTR(const char *ascii) {
+        int len = strlen(ascii);
+        BSTR bstr = SysAllocStringLen(NULL, len);
+
+        if (!bstr) {
+            return NULL;
+        }
+
+        if (mbstowcs(bstr, ascii, len) == (size_t)-1) {
+            fprintf(stderr, "Failed to convert string '%s' into BSTR", ascii);
+            bstr[0] = 0;
+        }
+        return bstr;
+    }
+}
diff --git a/qga/vss-win32/provider.cpp b/qga/vss-win32/provider.cpp
new file mode 100644
index 0000000..bf42b5e
--- /dev/null
+++ b/qga/vss-win32/provider.cpp
@@ -0,0 +1,523 @@
+/*
+ * QEMU Guest Agent win32 VSS Provider implementations
+ *
+ * Copyright Hitachi Data Systems Corp. 2013
+ *
+ * Authors:
+ *  Tomoki Sekiyama   <tomoki.sekiyama at hds.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include <stdio.h>
+#include "vss-common.h"
+#include "inc/win2003/vscoordint.h"
+#include "inc/win2003/vsprov.h"
+
+#define VSS_TIMEOUT_MSEC (60*1000)
+
+static long g_nComObjsInUse;
+HINSTANCE g_hinstDll;
+
+/* VSS common GUID's */
+
+const CLSID CLSID_VSSCoordinator = { 0xE579AB5F, 0x1CC4, 0x44b4,
+    {0xBE, 0xD9, 0xDE, 0x09, 0x91, 0xFF, 0x06, 0x23} };
+const IID IID_IVssAdmin = { 0x77ED5996, 0x2F63, 0x11d3,
+    {0x8A, 0x39, 0x00, 0xC0, 0x4F, 0x72, 0xD8, 0xE3} };
+
+const IID IID_IVssHardwareSnapshotProvider = { 0x9593A157, 0x44E9, 0x4344,
+    {0xBB, 0xEB, 0x44, 0xFB, 0xF9, 0xB0, 0x6B, 0x10} };
+const IID IID_IVssSoftwareSnapshotProvider = { 0x609e123e, 0x2c5a, 0x44d3,
+    {0x8f, 0x01, 0x0b, 0x1d, 0x9a, 0x47, 0xd1, 0xff} };
+const IID IID_IVssProviderCreateSnapshotSet = { 0x5F894E5B, 0x1E39, 0x4778,
+    {0x8E, 0x23, 0x9A, 0xBA, 0xD9, 0xF0, 0xE0, 0x8C} };
+const IID IID_IVssProviderNotifications = { 0xE561901F, 0x03A5, 0x4afe,
+    {0x86, 0xD0, 0x72, 0xBA, 0xEE, 0xCE, 0x70, 0x04} };
+
+const IID IID_IVssEnumObject = { 0xAE1C7110, 0x2F60, 0x11d3,
+    {0x8A, 0x39, 0x00, 0xC0, 0x4F, 0x72, 0xD8, 0xE3} };
+
+
+void LockModule(BOOL lock)
+{
+    if (lock) {
+        InterlockedIncrement(&g_nComObjsInUse);
+    } else {
+        InterlockedDecrement(&g_nComObjsInUse);
+    }
+}
+
+/* Empty enumerator for VssObject */
+
+class CQGAVSSEnumObject : public IVssEnumObject
+{
+public:
+    STDMETHODIMP QueryInterface(REFIID riid, void **ppObj);
+    STDMETHODIMP_(ULONG) AddRef();
+    STDMETHODIMP_(ULONG) Release();
+
+    /* IVssEnumObject Methods */
+    STDMETHODIMP Next(
+        ULONG celt, VSS_OBJECT_PROP *rgelt, ULONG *pceltFetched);
+    STDMETHODIMP Skip(ULONG celt);
+    STDMETHODIMP Reset(void);
+    STDMETHODIMP Clone(IVssEnumObject **ppenum);
+
+    /* CQGAVSSEnumObject Methods */
+    CQGAVSSEnumObject();
+    ~CQGAVSSEnumObject();
+
+private:
+    long m_nRefCount;
+};
+
+CQGAVSSEnumObject::CQGAVSSEnumObject()
+{
+    m_nRefCount = 0;
+    LockModule(TRUE);
+}
+
+CQGAVSSEnumObject::~CQGAVSSEnumObject()
+{
+    LockModule(FALSE);
+}
+
+STDMETHODIMP CQGAVSSEnumObject::QueryInterface(REFIID riid, void **ppObj)
+{
+    if (riid == IID_IUnknown || riid == IID_IVssEnumObject) {
+        *ppObj = static_cast<void*>(static_cast<IVssEnumObject*>(this));
+        AddRef();
+        return S_OK;
+    }
+    *ppObj = NULL;
+    return E_NOINTERFACE;
+}
+
+STDMETHODIMP_(ULONG) CQGAVSSEnumObject::AddRef()
+{
+    return InterlockedIncrement(&m_nRefCount);
+}
+
+STDMETHODIMP_(ULONG) CQGAVSSEnumObject::Release()
+{
+    long nRefCount = InterlockedDecrement(&m_nRefCount);
+    if (m_nRefCount == 0) {
+        delete this;
+    }
+    return nRefCount;
+}
+
+STDMETHODIMP CQGAVSSEnumObject::Next(
+    ULONG celt, VSS_OBJECT_PROP *rgelt, ULONG *pceltFetched)
+{
+    *pceltFetched = 0;
+    return S_FALSE;
+}
+
+STDMETHODIMP CQGAVSSEnumObject::Skip(ULONG celt)
+{
+    return S_FALSE;
+}
+
+STDMETHODIMP CQGAVSSEnumObject::Reset(void)
+{
+    return S_OK;
+}
+
+STDMETHODIMP CQGAVSSEnumObject::Clone(IVssEnumObject **ppenum)
+{
+    return E_NOTIMPL;
+}
+
+
+/* QGAVssProvider */
+
+class CQGAVssProvider :
+    public IVssSoftwareSnapshotProvider,
+    public IVssProviderCreateSnapshotSet,
+    public IVssProviderNotifications
+{
+public:
+    STDMETHODIMP QueryInterface(REFIID riid, void **ppObj);
+    STDMETHODIMP_(ULONG) AddRef();
+    STDMETHODIMP_(ULONG) Release();
+
+    /* IVssSoftwareSnapshotProvider Methods */
+    STDMETHODIMP SetContext(LONG lContext);
+    STDMETHODIMP GetSnapshotProperties(
+        VSS_ID SnapshotId, VSS_SNAPSHOT_PROP *pProp);
+    STDMETHODIMP Query(
+        VSS_ID QueriedObjectId, VSS_OBJECT_TYPE eQueriedObjectType,
+        VSS_OBJECT_TYPE eReturnedObjectsType, IVssEnumObject **ppEnum);
+    STDMETHODIMP DeleteSnapshots(
+        VSS_ID SourceObjectId, VSS_OBJECT_TYPE eSourceObjectType,
+        BOOL bForceDelete, LONG *plDeletedSnapshots,
+        VSS_ID *pNondeletedSnapshotID);
+    STDMETHODIMP BeginPrepareSnapshot(
+        VSS_ID SnapshotSetId, VSS_ID SnapshotId,
+        VSS_PWSZ pwszVolumeName, LONG lNewContext);
+    STDMETHODIMP IsVolumeSupported(
+        VSS_PWSZ pwszVolumeName, BOOL *pbSupportedByThisProvider);
+    STDMETHODIMP IsVolumeSnapshotted(
+        VSS_PWSZ pwszVolumeName, BOOL *pbSnapshotsPresent,
+        LONG *plSnapshotCompatibility);
+    STDMETHODIMP SetSnapshotProperty(
+        VSS_ID SnapshotId, VSS_SNAPSHOT_PROPERTY_ID eSnapshotPropertyId,
+        VARIANT vProperty);
+    STDMETHODIMP RevertToSnapshot(VSS_ID SnapshotId);
+    STDMETHODIMP QueryRevertStatus(VSS_PWSZ pwszVolume, IVssAsync **ppAsync);
+
+    /* IVssProviderCreateSnapshotSet Methods */
+    STDMETHODIMP EndPrepareSnapshots(VSS_ID SnapshotSetId);
+    STDMETHODIMP PreCommitSnapshots(VSS_ID SnapshotSetId);
+    STDMETHODIMP CommitSnapshots(VSS_ID SnapshotSetId);
+    STDMETHODIMP PostCommitSnapshots(
+        VSS_ID SnapshotSetId, LONG lSnapshotsCount);
+    STDMETHODIMP PreFinalCommitSnapshots(VSS_ID SnapshotSetId);
+    STDMETHODIMP PostFinalCommitSnapshots(VSS_ID SnapshotSetId);
+    STDMETHODIMP AbortSnapshots(VSS_ID SnapshotSetId);
+
+    /* IVssProviderNotifications Methods */
+    STDMETHODIMP OnLoad(IUnknown *pCallback);
+    STDMETHODIMP OnUnload(BOOL bForceUnload);
+
+    /* CQGAVssProvider Methods */
+    CQGAVssProvider();
+    ~CQGAVssProvider();
+
+private:
+    long m_nRefCount;
+};
+
+CQGAVssProvider::CQGAVssProvider()
+{
+    m_nRefCount = 0;
+    LockModule(TRUE);
+}
+
+CQGAVssProvider::~CQGAVssProvider()
+{
+    LockModule(FALSE);
+}
+
+STDMETHODIMP CQGAVssProvider::QueryInterface(REFIID riid, void **ppObj)
+{
+    if (riid == IID_IUnknown) {
+        *ppObj = static_cast<void*>(this);
+        AddRef();
+        return S_OK;
+    }
+    if (riid == IID_IVssSoftwareSnapshotProvider) {
+        *ppObj = static_cast<void*>(
+            static_cast<IVssSoftwareSnapshotProvider*>(this));
+        AddRef();
+        return S_OK;
+    }
+    if (riid == IID_IVssProviderCreateSnapshotSet) {
+        *ppObj = static_cast<void*>(
+            static_cast<IVssProviderCreateSnapshotSet*>(this));
+        AddRef();
+        return S_OK;
+    }
+    if (riid == IID_IVssProviderNotifications) {
+        *ppObj = static_cast<void*>(
+            static_cast<IVssProviderNotifications*>(this));
+        AddRef();
+        return S_OK;
+    }
+    *ppObj = NULL;
+    return E_NOINTERFACE;
+}
+
+STDMETHODIMP_(ULONG) CQGAVssProvider::AddRef()
+{
+    return InterlockedIncrement(&m_nRefCount);
+}
+
+STDMETHODIMP_(ULONG) CQGAVssProvider::Release()
+{
+    long nRefCount = InterlockedDecrement(&m_nRefCount);
+    if (m_nRefCount == 0) {
+        delete this;
+    }
+    return nRefCount;
+}
+
+
+/*
+ * IVssSoftwareSnapshotProvider methods
+ */
+
+STDMETHODIMP CQGAVssProvider::SetContext(LONG lContext)
+{
+    return S_OK;
+}
+
+STDMETHODIMP CQGAVssProvider::GetSnapshotProperties(
+    VSS_ID SnapshotId, VSS_SNAPSHOT_PROP *pProp)
+{
+    return VSS_E_OBJECT_NOT_FOUND;
+}
+
+STDMETHODIMP CQGAVssProvider::Query(
+    VSS_ID QueriedObjectId, VSS_OBJECT_TYPE eQueriedObjectType,
+    VSS_OBJECT_TYPE eReturnedObjectsType, IVssEnumObject **ppEnum)
+{
+    try {
+        *ppEnum = new CQGAVSSEnumObject;
+    } catch (...) {
+        return E_OUTOFMEMORY;
+    }
+    (*ppEnum)->AddRef();
+    return S_OK;
+}
+
+STDMETHODIMP CQGAVssProvider::DeleteSnapshots(
+    VSS_ID SourceObjectId, VSS_OBJECT_TYPE eSourceObjectType,
+    BOOL bForceDelete, LONG *plDeletedSnapshots, VSS_ID *pNondeletedSnapshotID)
+{
+    return E_NOTIMPL;
+}
+
+STDMETHODIMP CQGAVssProvider::BeginPrepareSnapshot(
+    VSS_ID SnapshotSetId, VSS_ID SnapshotId,
+    VSS_PWSZ pwszVolumeName, LONG lNewContext)
+{
+    return S_OK;
+}
+
+STDMETHODIMP CQGAVssProvider::IsVolumeSupported(
+    VSS_PWSZ pwszVolumeName, BOOL *pbSupportedByThisProvider)
+{
+    *pbSupportedByThisProvider = TRUE;
+
+    return S_OK;
+}
+
+STDMETHODIMP CQGAVssProvider::IsVolumeSnapshotted(VSS_PWSZ pwszVolumeName,
+    BOOL *pbSnapshotsPresent, LONG *plSnapshotCompatibility)
+{
+    *pbSnapshotsPresent = FALSE;
+    *plSnapshotCompatibility = 0;
+    return S_OK;
+}
+
+STDMETHODIMP CQGAVssProvider::SetSnapshotProperty(VSS_ID SnapshotId,
+    VSS_SNAPSHOT_PROPERTY_ID eSnapshotPropertyId, VARIANT vProperty)
+{
+    return E_NOTIMPL;
+}
+
+STDMETHODIMP CQGAVssProvider::RevertToSnapshot(VSS_ID SnapshotId)
+{
+    return E_NOTIMPL;
+}
+
+STDMETHODIMP CQGAVssProvider::QueryRevertStatus(
+    VSS_PWSZ pwszVolume, IVssAsync **ppAsync)
+{
+    return E_NOTIMPL;
+}
+
+
+/*
+ * IVssProviderCreateSnapshotSet methods
+ */
+
+STDMETHODIMP CQGAVssProvider::EndPrepareSnapshots(VSS_ID SnapshotSetId)
+{
+    return S_OK;
+}
+
+STDMETHODIMP CQGAVssProvider::PreCommitSnapshots(VSS_ID SnapshotSetId)
+{
+    return S_OK;
+}
+
+STDMETHODIMP CQGAVssProvider::CommitSnapshots(VSS_ID SnapshotSetId)
+{
+    HRESULT hr = S_OK;
+    HANDLE hEventFrozen, hEventThaw, hEventTimeout;
+
+    hEventFrozen = OpenEvent(EVENT_ALL_ACCESS, FALSE, EVENT_NAME_FROZEN);
+    if (hEventFrozen == INVALID_HANDLE_VALUE) {
+        return E_FAIL;
+    }
+
+    hEventThaw = OpenEvent(EVENT_ALL_ACCESS, FALSE, EVENT_NAME_THAW);
+    if (hEventThaw == INVALID_HANDLE_VALUE) {
+        CloseHandle(hEventFrozen);
+        return E_FAIL;
+    }
+
+    hEventTimeout = OpenEvent(EVENT_ALL_ACCESS, FALSE, EVENT_NAME_TIMEOUT);
+    if (hEventTimeout == INVALID_HANDLE_VALUE) {
+        CloseHandle(hEventFrozen);
+        CloseHandle(hEventThaw);
+        return E_FAIL;
+    }
+
+    /* Send event to qemu-ga to notify filesystem is frozen */
+    SetEvent(hEventFrozen);
+
+    /* Wait until the snapshot is taken by the host. */
+    if (WaitForSingleObject(hEventThaw, VSS_TIMEOUT_MSEC) != WAIT_OBJECT_0) {
+        /* Send event to qemu-ga to notify the provider is timed out */
+        SetEvent(hEventTimeout);
+        hr = E_ABORT;
+    }
+
+    CloseHandle(hEventThaw);
+    CloseHandle(hEventFrozen);
+    CloseHandle(hEventTimeout);
+    return hr;
+}
+
+STDMETHODIMP CQGAVssProvider::PostCommitSnapshots(
+    VSS_ID SnapshotSetId, LONG lSnapshotsCount)
+{
+    return S_OK;
+}
+
+STDMETHODIMP CQGAVssProvider::PreFinalCommitSnapshots(VSS_ID SnapshotSetId)
+{
+    return S_OK;
+}
+
+STDMETHODIMP CQGAVssProvider::PostFinalCommitSnapshots(VSS_ID SnapshotSetId)
+{
+    return S_OK;
+}
+
+STDMETHODIMP CQGAVssProvider::AbortSnapshots(VSS_ID SnapshotSetId)
+{
+    return S_OK;
+}
+
+/*
+ * IVssProviderNotifications methods
+ */
+
+STDMETHODIMP CQGAVssProvider::OnLoad(IUnknown *pCallback)
+{
+    return S_OK;
+}
+
+STDMETHODIMP CQGAVssProvider::OnUnload(BOOL bForceUnload)
+{
+    return S_OK;
+}
+
+
+/*
+ * CQGAVssProviderFactory class
+ */
+
+class CQGAVssProviderFactory : public IClassFactory
+{
+public:
+    STDMETHODIMP QueryInterface(REFIID riid, void **ppv);
+    STDMETHODIMP_(ULONG) AddRef();
+    STDMETHODIMP_(ULONG) Release();
+    STDMETHODIMP CreateInstance(
+        IUnknown *pUnknownOuter, REFIID iid, void **ppv);
+    STDMETHODIMP LockServer(BOOL lock) { return E_NOTIMPL; }
+
+    CQGAVssProviderFactory();
+    ~CQGAVssProviderFactory();
+
+private:
+    long m_nRefCount;
+};
+
+CQGAVssProviderFactory::CQGAVssProviderFactory()
+{
+    m_nRefCount = 0;
+    LockModule(TRUE);
+}
+
+CQGAVssProviderFactory::~CQGAVssProviderFactory()
+{
+    LockModule(FALSE);
+}
+
+STDMETHODIMP CQGAVssProviderFactory::QueryInterface(REFIID riid, void **ppv)
+{
+    if (riid == IID_IUnknown || riid == IID_IClassFactory) {
+        *ppv = static_cast<void*>(this);
+        AddRef();
+        return S_OK;
+    }
+    *ppv = NULL;
+    return E_NOINTERFACE;
+}
+
+STDMETHODIMP_(ULONG) CQGAVssProviderFactory::AddRef()
+{
+    return InterlockedIncrement(&m_nRefCount);
+}
+
+STDMETHODIMP_(ULONG) CQGAVssProviderFactory::Release()
+{
+    long nRefCount = InterlockedDecrement(&m_nRefCount);
+    if (m_nRefCount == 0) {
+        delete this;
+    }
+    return nRefCount;
+}
+
+STDMETHODIMP CQGAVssProviderFactory::CreateInstance(
+    IUnknown *pUnknownOuter, REFIID iid, void **ppv)
+{
+    CQGAVssProvider *pObj;
+
+    if (pUnknownOuter) {
+        return CLASS_E_NOAGGREGATION;
+    }
+    try {
+        pObj = new CQGAVssProvider;
+    } catch (...) {
+        return E_OUTOFMEMORY;
+    }
+    HRESULT hr = pObj->QueryInterface(iid, ppv);
+    if (FAILED(hr)) {
+        delete pObj;
+    }
+    return hr;
+}
+
+
+/*
+ * DLL functions
+ */
+
+STDAPI DllGetClassObject(REFCLSID rclsid, REFIID riid, LPVOID *ppv)
+{
+    CQGAVssProviderFactory *factory;
+    try {
+        factory = new CQGAVssProviderFactory;
+    } catch (...) {
+        return E_OUTOFMEMORY;
+    }
+    factory->AddRef();
+    HRESULT hr = factory->QueryInterface(riid, ppv);
+    factory->Release();
+    return hr;
+}
+
+STDAPI DllCanUnloadNow()
+{
+    return g_nComObjsInUse == 0 ? S_OK : S_FALSE;
+}
+
+EXTERN_C
+BOOL WINAPI DllMain(HINSTANCE hinstDll, DWORD dwReason, LPVOID lpReserved)
+{
+    if (dwReason == DLL_PROCESS_ATTACH) {
+        g_hinstDll = hinstDll;
+        DisableThreadLibraryCalls(hinstDll);
+    }
+    return TRUE;
+}
diff --git a/qga/vss-win32/qga-vss.def b/qga/vss-win32/qga-vss.def
new file mode 100644
index 0000000..927782c
--- /dev/null
+++ b/qga/vss-win32/qga-vss.def
@@ -0,0 +1,13 @@
+LIBRARY      "QGA-PROVIDER.DLL"
+
+EXPORTS
+	COMRegister		PRIVATE
+	COMUnregister		PRIVATE
+	DllCanUnloadNow		PRIVATE
+	DllGetClassObject	PRIVATE
+	DllRegisterServer	PRIVATE
+	DllUnregisterServer	PRIVATE
+	requester_init		PRIVATE
+	requester_deinit	PRIVATE
+	requester_freeze	PRIVATE
+	requester_thaw		PRIVATE
diff --git a/qga/vss-win32/qga-vss.idl b/qga/vss-win32/qga-vss.idl
new file mode 100644
index 0000000..17abca0
--- /dev/null
+++ b/qga/vss-win32/qga-vss.idl
@@ -0,0 +1,20 @@
+import "oaidl.idl";
+import "ocidl.idl";
+
+[
+    uuid(103B8142-6CE5-48A7-BDE1-794D3192FCF1),
+    version(1.0),
+    helpstring("QGAVSSProvider Type Library")
+]
+library QGAVSSHWProviderLib
+{
+    importlib("stdole2.tlb");
+    [
+        uuid(6E6A3492-8D4D-440C-9619-5E5D0CC31CA8),
+        helpstring("QGAVSSProvider Class")
+    ]
+    coclass QGAVSSHWProvider
+    {
+        [default] interface IUnknown;
+    };
+};
diff --git a/qga/vss-win32/qga-vss.tlb b/qga/vss-win32/qga-vss.tlb
new file mode 100644
index 0000000..226452a
Binary files /dev/null and b/qga/vss-win32/qga-vss.tlb differ
diff --git a/qga/vss-win32/requester.cpp b/qga/vss-win32/requester.cpp
new file mode 100644
index 0000000..1e8dd3d
--- /dev/null
+++ b/qga/vss-win32/requester.cpp
@@ -0,0 +1,507 @@
+/*
+ * QEMU Guest Agent win32 VSS Requester implementations
+ *
+ * Copyright Hitachi Data Systems Corp. 2013
+ *
+ * Authors:
+ *  Tomoki Sekiyama   <tomoki.sekiyama at hds.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include <stdio.h>
+#include "vss-common.h"
+#include "requester.h"
+#include "assert.h"
+#include "inc/win2003/vswriter.h"
+#include "inc/win2003/vsbackup.h"
+
+/* Max wait time for frozen event (VSS can only hold writes for 10 seconds) */
+#define VSS_TIMEOUT_FREEZE_MSEC 10000
+
+/* Call QueryStatus every 10 ms while waiting for frozen event */
+#define VSS_TIMEOUT_EVENT_MSEC 10
+
+#define err_set(e, err, fmt, ...) \
+    ((e)->error_set((e)->errp, err, (e)->err_class, fmt, ## __VA_ARGS__))
+#define err_is_set(e) ((e)->errp && *(e)->errp)
+
+
+/* Handle to VSSAPI.DLL */
+static HMODULE hLib;
+
+/* Functions in VSSAPI.DLL */
+typedef HRESULT(STDAPICALLTYPE * t_CreateVssBackupComponents)(
+    OUT IVssBackupComponents**);
+typedef void(APIENTRY * t_VssFreeSnapshotProperties)(IN VSS_SNAPSHOT_PROP*);
+static t_CreateVssBackupComponents pCreateVssBackupComponents;
+static t_VssFreeSnapshotProperties pVssFreeSnapshotProperties;
+
+/* Variables used while applications and filesystes are frozen by VSS */
+static struct QGAVSSContext {
+    IVssBackupComponents *pVssbc;  /* VSS requester interface */
+    IVssAsync *pAsyncSnapshot;     /* async info of VSS snapshot operation */
+    HANDLE hEventFrozen;           /* notify fs/writer freeze from provider */
+    HANDLE hEventThaw;             /* request provider to thaw */
+    HANDLE hEventTimeout;          /* notify timeout in provider */
+    int cFrozenVols;               /* number of frozen volumes */
+} vss_ctx;
+
+STDAPI requester_init(void)
+{
+    vss_ctx.hEventFrozen =  INVALID_HANDLE_VALUE;
+    vss_ctx.hEventThaw = INVALID_HANDLE_VALUE;
+    vss_ctx.hEventTimeout = INVALID_HANDLE_VALUE;
+
+    COMInitializer initializer; /* to call CoInitializeSecurity */
+    HRESULT hr = CoInitializeSecurity(
+        NULL, -1, NULL, NULL, RPC_C_AUTHN_LEVEL_PKT_PRIVACY,
+        RPC_C_IMP_LEVEL_IDENTIFY, NULL, EOAC_NONE, NULL);
+    if (FAILED(hr)) {
+        fprintf(stderr, "failed to CoInitializeSecurity (error %lx)\n", hr);
+        return hr;
+    }
+
+    hLib = LoadLibraryA("VSSAPI.DLL");
+    if (!hLib) {
+        fprintf(stderr, "failed to load VSSAPI.DLL\n");
+        return HRESULT_FROM_WIN32(GetLastError());
+    }
+
+    pCreateVssBackupComponents = (t_CreateVssBackupComponents)
+        GetProcAddress(hLib,
+#ifdef _WIN64 /* 64bit environment */
+        "?CreateVssBackupComponents@@YAJPEAPEAVIVssBackupComponents@@@Z"
+#else /* 32bit environment */
+        "?CreateVssBackupComponents@@YGJPAPAVIVssBackupComponents@@@Z"
+#endif
+        );
+    if (!pCreateVssBackupComponents) {
+        fprintf(stderr, "failed to get proc address from VSSAPI.DLL\n");
+        return HRESULT_FROM_WIN32(GetLastError());
+    }
+
+    pVssFreeSnapshotProperties = (t_VssFreeSnapshotProperties)
+        GetProcAddress(hLib, "VssFreeSnapshotProperties");
+    if (!pVssFreeSnapshotProperties) {
+        fprintf(stderr, "failed to get proc address from VSSAPI.DLL\n");
+        return HRESULT_FROM_WIN32(GetLastError());
+    }
+
+    return S_OK;
+}
+
+static void requester_cleanup(void)
+{
+    if (vss_ctx.hEventFrozen != INVALID_HANDLE_VALUE) {
+        CloseHandle(vss_ctx.hEventFrozen);
+        vss_ctx.hEventFrozen = INVALID_HANDLE_VALUE;
+    }
+    if (vss_ctx.hEventThaw != INVALID_HANDLE_VALUE) {
+        CloseHandle(vss_ctx.hEventThaw);
+        vss_ctx.hEventThaw = INVALID_HANDLE_VALUE;
+    }
+    if (vss_ctx.hEventTimeout != INVALID_HANDLE_VALUE) {
+        CloseHandle(vss_ctx.hEventTimeout);
+        vss_ctx.hEventTimeout = INVALID_HANDLE_VALUE;
+    }
+    if (vss_ctx.pAsyncSnapshot) {
+        vss_ctx.pAsyncSnapshot->Release();
+        vss_ctx.pAsyncSnapshot = NULL;
+    }
+    if (vss_ctx.pVssbc) {
+        vss_ctx.pVssbc->Release();
+        vss_ctx.pVssbc = NULL;
+    }
+    vss_ctx.cFrozenVols = 0;
+}
+
+STDAPI requester_deinit(void)
+{
+    requester_cleanup();
+
+    pCreateVssBackupComponents = NULL;
+    pVssFreeSnapshotProperties = NULL;
+    if (hLib) {
+        FreeLibrary(hLib);
+        hLib = NULL;
+    }
+
+    return S_OK;
+}
+
+static HRESULT WaitForAsync(IVssAsync *pAsync)
+{
+    HRESULT ret, hr;
+
+    do {
+        hr = pAsync->Wait();
+        if (FAILED(hr)) {
+            ret = hr;
+            break;
+        }
+        hr = pAsync->QueryStatus(&ret, NULL);
+        if (FAILED(hr)) {
+            ret = hr;
+            break;
+        }
+    } while (ret == VSS_S_ASYNC_PENDING);
+
+    return ret;
+}
+
+static void AddComponents(ErrorSet *errset)
+{
+    unsigned int cWriters, i;
+    VSS_ID id, idInstance, idWriter;
+    BSTR bstrWriterName = NULL;
+    VSS_USAGE_TYPE usage;
+    VSS_SOURCE_TYPE source;
+    unsigned int cComponents, c1, c2, j;
+    COMPointer<IVssExamineWriterMetadata> pMetadata;
+    COMPointer<IVssWMComponent> pComponent;
+    PVSSCOMPONENTINFO info;
+    HRESULT hr;
+
+    hr = vss_ctx.pVssbc->GetWriterMetadataCount(&cWriters);
+    if (FAILED(hr)) {
+        err_set(errset, hr, "failed to get writer metadata count");
+        goto out;
+    }
+
+    for (i = 0; i < cWriters; i++) {
+        hr = vss_ctx.pVssbc->GetWriterMetadata(i, &id, pMetadata.replace());
+        if (FAILED(hr)) {
+            err_set(errset, hr, "failed to get writer metadata of %d/%d",
+                             i, cWriters);
+            goto out;
+        }
+
+        hr = pMetadata->GetIdentity(&idInstance, &idWriter,
+                                    &bstrWriterName, &usage, &source);
+        if (FAILED(hr)) {
+            err_set(errset, hr, "failed to get identity of writer %d/%d",
+                             i, cWriters);
+            goto out;
+        }
+
+        hr = pMetadata->GetFileCounts(&c1, &c2, &cComponents);
+        if (FAILED(hr)) {
+            err_set(errset, hr, "failed to get file counts of %S",
+                             bstrWriterName);
+            goto out;
+        }
+
+        for (j = 0; j < cComponents; j++) {
+            hr = pMetadata->GetComponent(j, pComponent.replace());
+            if (FAILED(hr)) {
+                err_set(errset, hr,
+                                 "failed to get component %d/%d of %S",
+                                 j, cComponents, bstrWriterName);
+                goto out;
+            }
+
+            hr = pComponent->GetComponentInfo(&info);
+            if (FAILED(hr)) {
+                err_set(errset, hr,
+                                 "failed to get component info %d/%d of %S",
+                                 j, cComponents, bstrWriterName);
+                goto out;
+            }
+
+            if (info->bSelectable) {
+                hr = vss_ctx.pVssbc->AddComponent(idInstance, idWriter,
+                                                  info->type,
+                                                  info->bstrLogicalPath,
+                                                  info->bstrComponentName);
+                if (FAILED(hr)) {
+                    err_set(errset, hr, "failed to add component %S(%S)",
+                                     info->bstrComponentName, bstrWriterName);
+                    goto out;
+                }
+            }
+            SysFreeString(bstrWriterName);
+            bstrWriterName = NULL;
+            pComponent->FreeComponentInfo(info);
+            info = NULL;
+        }
+    }
+out:
+    if (bstrWriterName) {
+        SysFreeString(bstrWriterName);
+    }
+    if (pComponent && info) {
+        pComponent->FreeComponentInfo(info);
+    }
+}
+
+void requester_freeze(int *num_vols, ErrorSet *errset)
+{
+    COMPointer<IVssAsync> pAsync;
+    HANDLE volume;
+    HRESULT hr;
+    LONG ctx;
+    GUID guidSnapshotSet = GUID_NULL;
+    SECURITY_DESCRIPTOR sd;
+    SECURITY_ATTRIBUTES sa;
+    WCHAR short_volume_name[64], *display_name = short_volume_name;
+    DWORD wait_status;
+    int num_fixed_drives = 0, i;
+
+    if (vss_ctx.pVssbc) { /* already frozen */
+        *num_vols = 0;
+        return;
+    }
+
+    CoInitialize(NULL);
+
+    assert(pCreateVssBackupComponents != NULL);
+    hr = pCreateVssBackupComponents(&vss_ctx.pVssbc);
+    if (FAILED(hr)) {
+        err_set(errset, hr, "failed to create VSS backup components");
+        goto out;
+    }
+
+    hr = vss_ctx.pVssbc->InitializeForBackup();
+    if (FAILED(hr)) {
+        err_set(errset, hr, "failed to initialize for backup");
+        goto out;
+    }
+
+    hr = vss_ctx.pVssbc->SetBackupState(true, true, VSS_BT_FULL, false);
+    if (FAILED(hr)) {
+        err_set(errset, hr, "failed to set backup state");
+        goto out;
+    }
+
+    /*
+     * Currently writable snapshots are not supported.
+     * To prevent the final commit (which requires to write to snapshots),
+     * ATTR_NO_AUTORECOVERY and ATTR_TRANSPORTABLE are specified here.
+     */
+    ctx = VSS_CTX_APP_ROLLBACK | VSS_VOLSNAP_ATTR_TRANSPORTABLE |
+        VSS_VOLSNAP_ATTR_NO_AUTORECOVERY | VSS_VOLSNAP_ATTR_TXF_RECOVERY;
+    hr = vss_ctx.pVssbc->SetContext(ctx);
+    if (hr == (HRESULT)VSS_E_UNSUPPORTED_CONTEXT) {
+        /* Non-server version of Windows doesn't support ATTR_TRANSPORTABLE */
+        ctx &= ~VSS_VOLSNAP_ATTR_TRANSPORTABLE;
+        hr = vss_ctx.pVssbc->SetContext(ctx);
+    }
+    if (FAILED(hr)) {
+        err_set(errset, hr, "failed to set backup context");
+        goto out;
+    }
+
+    hr = vss_ctx.pVssbc->GatherWriterMetadata(pAsync.replace());
+    if (SUCCEEDED(hr)) {
+        hr = WaitForAsync(pAsync);
+    }
+    if (FAILED(hr)) {
+        err_set(errset, hr, "failed to gather writer metadata");
+        goto out;
+    }
+
+    AddComponents(errset);
+    if (err_is_set(errset)) {
+        goto out;
+    }
+
+    hr = vss_ctx.pVssbc->StartSnapshotSet(&guidSnapshotSet);
+    if (FAILED(hr)) {
+        err_set(errset, hr, "failed to start snapshot set");
+        goto out;
+    }
+
+    volume = FindFirstVolumeW(short_volume_name, sizeof(short_volume_name));
+    if (volume == INVALID_HANDLE_VALUE) {
+        err_set(errset, hr, "failed to find first volume");
+        goto out;
+    }
+    for (;;) {
+        if (GetDriveTypeW(short_volume_name) == DRIVE_FIXED) {
+            VSS_ID pid;
+            hr = vss_ctx.pVssbc->AddToSnapshotSet(short_volume_name,
+                                                  g_gProviderId, &pid);
+            if (FAILED(hr)) {
+                WCHAR volume_path_name[PATH_MAX];
+                if (GetVolumePathNamesForVolumeNameW(
+                        short_volume_name, volume_path_name,
+                        sizeof(volume_path_name), NULL) && *volume_path_name) {
+                    display_name = volume_path_name;
+                }
+                err_set(errset, hr, "failed to add %S to snapshot set",
+                                 display_name);
+                FindVolumeClose(volume);
+                goto out;
+            }
+            num_fixed_drives++;
+        }
+        if (!FindNextVolumeW(volume, short_volume_name,
+                             sizeof(short_volume_name))) {
+            FindVolumeClose(volume);
+            break;
+        }
+    }
+
+    if (num_fixed_drives == 0) {
+        goto out; /* If there is no fixed drive, just exit. */
+    }
+
+    hr = vss_ctx.pVssbc->PrepareForBackup(pAsync.replace());
+    if (SUCCEEDED(hr)) {
+        hr = WaitForAsync(pAsync);
+    }
+    if (FAILED(hr)) {
+        err_set(errset, hr, "failed to prepare for backup");
+        goto out;
+    }
+
+    hr = vss_ctx.pVssbc->GatherWriterStatus(pAsync.replace());
+    if (SUCCEEDED(hr)) {
+        hr = WaitForAsync(pAsync);
+    }
+    if (FAILED(hr)) {
+        err_set(errset, hr, "failed to gather writer status");
+        goto out;
+    }
+
+    /* Allow unrestricted access to events */
+    InitializeSecurityDescriptor(&sd, SECURITY_DESCRIPTOR_REVISION);
+    SetSecurityDescriptorDacl(&sd, TRUE, NULL, FALSE);
+    sa.nLength = sizeof(sa);
+    sa.lpSecurityDescriptor = &sd;
+    sa.bInheritHandle = FALSE;
+
+    vss_ctx.hEventFrozen = CreateEvent(&sa, TRUE, FALSE, EVENT_NAME_FROZEN);
+    if (vss_ctx.hEventFrozen == INVALID_HANDLE_VALUE) {
+        err_set(errset, GetLastError(), "failed to create event %s",
+                EVENT_NAME_FROZEN);
+        goto out;
+    }
+    vss_ctx.hEventThaw = CreateEvent(&sa, TRUE, FALSE, EVENT_NAME_THAW);
+    if (vss_ctx.hEventThaw == INVALID_HANDLE_VALUE) {
+        err_set(errset, GetLastError(), "failed to create event %s",
+                EVENT_NAME_THAW);
+        goto out;
+    }
+    vss_ctx.hEventTimeout = CreateEvent(&sa, TRUE, FALSE, EVENT_NAME_TIMEOUT);
+    if (vss_ctx.hEventTimeout == INVALID_HANDLE_VALUE) {
+        err_set(errset, GetLastError(), "failed to create event %s",
+                EVENT_NAME_TIMEOUT);
+        goto out;
+    }
+
+    /*
+     * Start VSS quiescing operations.
+     * CQGAVssProvider::CommitSnapshots will kick vss_ctx.hEventFrozen
+     * after the applications and filesystems are frozen.
+     */
+    hr = vss_ctx.pVssbc->DoSnapshotSet(&vss_ctx.pAsyncSnapshot);
+    if (FAILED(hr)) {
+        err_set(errset, hr, "failed to do snapshot set");
+        goto out;
+    }
+
+    /* Need to call QueryStatus several times to make VSS provider progress */
+    for (i = 0; i < VSS_TIMEOUT_FREEZE_MSEC/VSS_TIMEOUT_EVENT_MSEC; i++) {
+        HRESULT hr2 = vss_ctx.pAsyncSnapshot->QueryStatus(&hr, NULL);
+        if (FAILED(hr2)) {
+            err_set(errset, hr, "failed to do snapshot set");
+            goto out;
+        }
+        if (hr != VSS_S_ASYNC_PENDING) {
+            err_set(errset, E_FAIL,
+                    "DoSnapshotSet exited without Frozen event");
+            goto out;
+        }
+        wait_status = WaitForSingleObject(vss_ctx.hEventFrozen,
+                                          VSS_TIMEOUT_EVENT_MSEC);
+        if (wait_status != WAIT_TIMEOUT) {
+            break;
+        }
+    }
+    if (wait_status != WAIT_OBJECT_0) {
+        err_set(errset, E_FAIL,
+                "couldn't receive Frozen event from VSS provider");
+        goto out;
+    }
+
+    *num_vols = vss_ctx.cFrozenVols = num_fixed_drives;
+    return;
+
+out:
+    if (vss_ctx.pVssbc) {
+        vss_ctx.pVssbc->AbortBackup();
+    }
+    requester_cleanup();
+    CoUninitialize();
+}
+
+
+void requester_thaw(int *num_vols, ErrorSet *errset)
+{
+    COMPointer<IVssAsync> pAsync;
+
+    if (vss_ctx.hEventThaw == INVALID_HANDLE_VALUE) {
+        /*
+         * In this case, DoSnapshotSet is aborted or not started,
+         * and no volumes must be frozen. We return without an error.
+         */
+        *num_vols = 0;
+        return;
+    }
+
+    /* Tell the provider that the snapshot is finished. */
+    SetEvent(vss_ctx.hEventThaw);
+
+    assert(vss_ctx.pVssbc);
+    assert(vss_ctx.pAsyncSnapshot);
+
+    HRESULT hr = WaitForAsync(vss_ctx.pAsyncSnapshot);
+    switch (hr) {
+    case VSS_S_ASYNC_FINISHED:
+        hr = vss_ctx.pVssbc->BackupComplete(pAsync.replace());
+        if (SUCCEEDED(hr)) {
+            hr = WaitForAsync(pAsync);
+        }
+        if (FAILED(hr)) {
+            err_set(errset, hr, "failed to complete backup");
+        }
+        break;
+
+    case (HRESULT)VSS_E_OBJECT_NOT_FOUND:
+        /*
+         * On Windows earlier than 2008 SP2 which does not support
+         * VSS_VOLSNAP_ATTR_NO_AUTORECOVERY context, the final commit is not
+         * skipped and VSS is aborted by VSS_E_OBJECT_NOT_FOUND. However, as
+         * the system had been frozen until fsfreeze-thaw command was issued,
+         * we ignore this error.
+         */
+        vss_ctx.pVssbc->AbortBackup();
+        break;
+
+    case VSS_E_UNEXPECTED_PROVIDER_ERROR:
+        if (WaitForSingleObject(vss_ctx.hEventTimeout, 0) != WAIT_OBJECT_0) {
+            err_set(errset, hr, "unexpected error in VSS provider");
+            break;
+        }
+        /* fall through if hEventTimeout is signaled */
+
+    case (HRESULT)VSS_E_HOLD_WRITES_TIMEOUT:
+        err_set(errset, hr, "couldn't hold writes: "
+                "fsfreeze is limited up to 10 seconds");
+        break;
+
+    default:
+        err_set(errset, hr, "failed to do snapshot set");
+    }
+
+    if (err_is_set(errset)) {
+        vss_ctx.pVssbc->AbortBackup();
+    }
+    *num_vols = vss_ctx.cFrozenVols;
+    requester_cleanup();
+
+    CoUninitialize();
+}
diff --git a/qga/vss-win32/requester.h b/qga/vss-win32/requester.h
new file mode 100644
index 0000000..cffec01
--- /dev/null
+++ b/qga/vss-win32/requester.h
@@ -0,0 +1,42 @@
+/*
+ * QEMU Guest Agent VSS requester declarations
+ *
+ * Copyright Hitachi Data Systems Corp. 2013
+ *
+ * Authors:
+ *  Tomoki Sekiyama   <tomoki.sekiyama at hds.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef VSS_WIN32_REQUESTER_H
+#define VSS_WIN32_REQUESTER_H
+
+#include "qemu/compiler.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Callback to set Error; used to avoid linking glib to the DLL */
+typedef void (*ErrorSetFunc)(void **errp, int win32_err, int err_class,
+                             const char *fmt, ...) GCC_FMT_ATTR(4, 5);
+typedef struct ErrorSet {
+    ErrorSetFunc error_set;
+    void **errp;
+    int err_class;
+} ErrorSet;
+
+STDAPI requester_init(void);
+STDAPI requester_deinit(void);
+
+typedef void (*QGAVSSRequesterFunc)(int *, ErrorSet *);
+void requester_freeze(int *num_vols, ErrorSet *errset);
+void requester_thaw(int *num_vols, ErrorSet *errset);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/qga/vss-win32/vss-common.h b/qga/vss-win32/vss-common.h
new file mode 100644
index 0000000..ce14e14
--- /dev/null
+++ b/qga/vss-win32/vss-common.h
@@ -0,0 +1,129 @@
+/*
+ * QEMU Guest Agent win32 VSS common declarations
+ *
+ * Copyright Hitachi Data Systems Corp. 2013
+ *
+ * Authors:
+ *  Tomoki Sekiyama   <tomoki.sekiyama at hds.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef VSS_WIN32_H
+#define VSS_WIN32_H
+
+#define __MIDL_user_allocate_free_DEFINED__
+#include "config-host.h"
+#include <windows.h>
+#include <shlwapi.h>
+
+/* Reduce warnings to include vss.h */
+
+/* Ignore annotations for MS IDE */
+#define __in  IN
+#define __out OUT
+#define __RPC_unique_pointer
+#define __RPC_string
+#define __RPC__deref_inout_opt
+#define __RPC__out
+#ifndef __RPC__out_ecount_part
+#define __RPC__out_ecount_part(x, y)
+#endif
+#define _declspec(x)
+#undef uuid
+#define uuid(x)
+
+/* Undef some duplicated error codes redefined in vss.h */
+#undef VSS_E_BAD_STATE
+#undef VSS_E_PROVIDER_NOT_REGISTERED
+#undef VSS_E_PROVIDER_VETO
+#undef VSS_E_OBJECT_NOT_FOUND
+#undef VSS_E_VOLUME_NOT_SUPPORTED
+#undef VSS_E_VOLUME_NOT_SUPPORTED_BY_PROVIDER
+#undef VSS_E_OBJECT_ALREADY_EXISTS
+#undef VSS_E_UNEXPECTED_PROVIDER_ERROR
+#undef VSS_E_INVALID_XML_DOCUMENT
+#undef VSS_E_MAXIMUM_NUMBER_OF_VOLUMES_REACHED
+#undef VSS_E_MAXIMUM_NUMBER_OF_SNAPSHOTS_REACHED
+
+/*
+ * VSS headers must be installed from Microsoft VSS SDK 7.2 available at:
+ * http://www.microsoft.com/en-us/download/details.aspx?id=23490
+ */
+#include "inc/win2003/vss.h"
+
+/* Macros to convert char definitions to wchar */
+#define _L(a) L##a
+#define L(a) _L(a)
+
+/* Constants for QGA VSS Provider */
+
+#define QGA_PROVIDER_NAME "QEMU Guest Agent VSS Provider"
+#define QGA_PROVIDER_LNAME L(QGA_PROVIDER_NAME)
+#define QGA_PROVIDER_VERSION L(QEMU_VERSION)
+
+#define EVENT_NAME_FROZEN  "Global\\QGAVSSEvent-frozen"
+#define EVENT_NAME_THAW    "Global\\QGAVSSEvent-thaw"
+#define EVENT_NAME_TIMEOUT "Global\\QGAVSSEvent-timeout"
+
+const GUID g_gProviderId = { 0x3629d4ed, 0xee09, 0x4e0e,
+    {0x9a, 0x5c, 0x6d, 0x8b, 0xa2, 0x87, 0x2a, 0xef} };
+const GUID g_gProviderVersion = { 0x11ef8b15, 0xcac6, 0x40d6,
+    {0x8d, 0x5c, 0x8f, 0xfc, 0x16, 0x3f, 0x24, 0xca} };
+
+const CLSID CLSID_QGAVSSProvider = { 0x6e6a3492, 0x8d4d, 0x440c,
+    {0x96, 0x19, 0x5e, 0x5d, 0x0c, 0xc3, 0x1c, 0xa8} };
+
+const TCHAR g_szClsid[] = TEXT("{6E6A3492-8D4D-440C-9619-5E5D0CC31CA8}");
+const TCHAR g_szProgid[] = TEXT("QGAVSSProvider");
+
+/* Enums undefined in VSS SDK 7.2 but defined in newer Windows SDK */
+enum __VSS_VOLUME_SNAPSHOT_ATTRIBUTES {
+    VSS_VOLSNAP_ATTR_NO_AUTORECOVERY       = 0x00000002,
+    VSS_VOLSNAP_ATTR_TXF_RECOVERY          = 0x02000000
+};
+
+
+/* COM pointer utility; call ->Release() when it goes out of scope */
+template <class T>
+class COMPointer {
+    COMPointer(const COMPointer<T> &p) { } /* no copy */
+    T *p;
+public:
+    COMPointer &operator=(T *new_p)
+    {
+        /* Assignment of a new T* (or NULL) causes release of previous p */
+        if (p && p != new_p) {
+            p->Release();
+        }
+        p = new_p;
+        return *this;
+    }
+    /* Replace by assignment to the pointer of p  */
+    T **replace(void)
+    {
+        *this = NULL;
+        return &p;
+    }
+    /* Make COMPointer be used like T* */
+    operator T*() { return p; }
+    T *operator->(void) { return p; }
+    T &operator*(void) { return *p; }
+    operator bool() { return !!p; }
+
+    COMPointer(T *p = NULL) : p(p) { }
+    ~COMPointer() { *this = NULL; }  /* Automatic release */
+};
+
+/*
+ * COM initializer; this should declared before COMPointer to uninitialize COM
+ * after releasing COM objects.
+ */
+class COMInitializer {
+public:
+    COMInitializer() { CoInitialize(NULL); }
+    ~COMInitializer() { CoUninitialize(); }
+};
+
+#endif
commit 20840d4cfe5198cde313ac953279e76f16c5b76d
Author: Tomoki Sekiyama <tomoki.sekiyama at hds.com>
Date:   Wed Aug 7 11:40:11 2013 -0400

    error: Add error_set_win32 and error_setg_win32
    
    These functions help maintaining homogeneous formatting of error messages
    with Windows error code and description (generated by
    g_win32_error_message()).
    
    Signed-off-by: Tomoki Sekiyama <tomoki.sekiyama at hds.com>
    Reviewed-by: Michael Roth <mdroth at linux.vnet.ibm.com>
    Signed-off-by: Michael Roth <mdroth at linux.vnet.ibm.com>

diff --git a/include/qapi/error.h b/include/qapi/error.h
index ffd1cea..7d4c696 100644
--- a/include/qapi/error.h
+++ b/include/qapi/error.h
@@ -36,6 +36,15 @@ void error_set(Error **err, ErrorClass err_class, const char *fmt, ...) GCC_FMT_
  */
 void error_set_errno(Error **err, int os_error, ErrorClass err_class, const char *fmt, ...) GCC_FMT_ATTR(4, 5);
 
+#ifdef _WIN32
+/**
+ * Set an indirect pointer to an error given a ErrorClass value and a
+ * printf-style human message, followed by a g_win32_error_message() string if
+ * @win32_err is not zero.
+ */
+void error_set_win32(Error **err, int win32_err, ErrorClass err_class, const char *fmt, ...) GCC_FMT_ATTR(4, 5);
+#endif
+
 /**
  * Same as error_set(), but sets a generic error
  */
@@ -43,6 +52,10 @@ void error_set_errno(Error **err, int os_error, ErrorClass err_class, const char
     error_set(err, ERROR_CLASS_GENERIC_ERROR, fmt, ## __VA_ARGS__)
 #define error_setg_errno(err, os_error, fmt, ...) \
     error_set_errno(err, os_error, ERROR_CLASS_GENERIC_ERROR, fmt, ## __VA_ARGS__)
+#ifdef _WIN32
+#define error_setg_win32(err, win32_err, fmt, ...) \
+    error_set_win32(err, win32_err, ERROR_CLASS_GENERIC_ERROR, fmt, ## __VA_ARGS__)
+#endif
 
 /**
  * Helper for open() errors
diff --git a/util/error.c b/util/error.c
index 53b0435..ec0faa6 100644
--- a/util/error.c
+++ b/util/error.c
@@ -76,6 +76,41 @@ void error_setg_file_open(Error **errp, int os_errno, const char *filename)
     error_setg_errno(errp, os_errno, "Could not open '%s'", filename);
 }
 
+#ifdef _WIN32
+
+void error_set_win32(Error **errp, int win32_err, ErrorClass err_class,
+                     const char *fmt, ...)
+{
+    Error *err;
+    char *msg1;
+    va_list ap;
+
+    if (errp == NULL) {
+        return;
+    }
+    assert(*errp == NULL);
+
+    err = g_malloc0(sizeof(*err));
+
+    va_start(ap, fmt);
+    msg1 = g_strdup_vprintf(fmt, ap);
+    if (win32_err != 0) {
+        char *msg2 = g_win32_error_message(win32_err);
+        err->msg = g_strdup_printf("%s: %s (error: %x)", msg1, msg2,
+                                   (unsigned)win32_err);
+        g_free(msg2);
+        g_free(msg1);
+    } else {
+        err->msg = msg1;
+    }
+    va_end(ap);
+    err->err_class = err_class;
+
+    *errp = err;
+}
+
+#endif
+
 Error *error_copy(const Error *err)
 {
     Error *err_new;
commit d9840e2592493c816ad50f4211a9a4ec35371def
Author: Tomoki Sekiyama <tomoki.sekiyama at hds.com>
Date:   Wed Aug 7 11:40:03 2013 -0400

    qemu-ga: Add configure options to specify path to Windows/VSS SDK
    
    To enable VSS support in qemu-ga for Windows, header files included in
    VSS SDK are required.
    The VSS support is enabled by the configure option like below:
      ./configure --with-vss-sdk="/path/to/VSS SDK"
    
    If the path is omitted, it tries to search the headers from default paths
    and VSS support is enabled only if the SDK is found.
    VSS support is disabled if --without-vss-sdk or --with-vss-sdk=no is
    specified.
    
    VSS SDK is available from:
      http://www.microsoft.com/en-us/download/details.aspx?id=23490
    
    To cross-compile using mingw, you need to setup the SDK on Windows
    environments to extract headers. You can also extract the SDK headers on
    POSIX environments using scripts/extract-vss-headers and msitools.
    
    In addition, --with-win-sdk="/path/to/Windows SDK" option is also added to
    specify path to Windows SDK, which may be used for native-compile of .tlb
    file of qemu-ga VSS provider. However, this is usually unnecessary because
    pre-compiled .tlb file is included.
    
    Signed-off-by: Tomoki Sekiyama <tomoki.sekiyama at hds.com>
    Reviewed-by: Laszlo Ersek <lersek at redhat.com>
    Reviewed-by: Michael Roth <mdroth at linux.vnet.ibm.com>
    Signed-off-by: Michael Roth <mdroth at linux.vnet.ibm.com>

diff --git a/.gitignore b/.gitignore
index d2c5c2f..8e1b73f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -82,6 +82,7 @@ fsdev/virtfs-proxy-helper.pod
 *.la
 *.pc
 .libs
+.sdk
 *.swp
 *.orig
 .pc
diff --git a/Makefile b/Makefile
index 806946e..2fb0e5a 100644
--- a/Makefile
+++ b/Makefile
@@ -272,6 +272,7 @@ distclean: clean
 	for d in $(TARGET_DIRS); do \
 	rm -rf $$d || exit 1 ; \
         done
+	rm -Rf .sdk
 	if test -f pixman/config.log; then make -C pixman distclean; fi
 	if test -f dtc/version_gen.h; then make $(DTC_MAKE_ARGS) clean; fi
 
diff --git a/configure b/configure
index ca6c376..abc29ab 100755
--- a/configure
+++ b/configure
@@ -232,6 +232,9 @@ usb_redir=""
 glx=""
 zlib="yes"
 guest_agent=""
+guest_agent_with_vss="no"
+vss_win32_sdk=""
+win_sdk="no"
 want_tools="yes"
 libiscsi=""
 coroutine=""
@@ -927,6 +930,18 @@ for opt do
   ;;
   --disable-guest-agent) guest_agent="no"
   ;;
+  --with-vss-sdk) vss_win32_sdk=""
+  ;;
+  --with-vss-sdk=*) vss_win32_sdk="$optarg"
+  ;;
+  --without-vss-sdk) vss_win32_sdk="no"
+  ;;
+  --with-win-sdk) win_sdk=""
+  ;;
+  --with-win-sdk=*) win_sdk="$optarg"
+  ;;
+  --without-win-sdk) win_sdk="no"
+  ;;
   --enable-tools) want_tools="yes"
   ;;
   --disable-tools) want_tools="no"
@@ -1168,6 +1183,8 @@ echo "  --disable-usb-redir      disable usb network redirection support"
 echo "  --enable-usb-redir       enable usb network redirection support"
 echo "  --disable-guest-agent    disable building of the QEMU Guest Agent"
 echo "  --enable-guest-agent     enable building of the QEMU Guest Agent"
+echo "  --with-vss-sdk=SDK-path  enable Windows VSS support in QEMU Guest Agent"
+echo "  --with-win-sdk=SDK-path  path to Windows Platform SDK (to build VSS .tlb)"
 echo "  --disable-seccomp        disable seccomp support"
 echo "  --enable-seccomp         enables seccomp support"
 echo "  --with-coroutine=BACKEND coroutine backend. Supported options:"
@@ -3132,6 +3149,61 @@ if test "$usb_redir" != "no" ; then
 fi
 
 ##########################################
+# check if we have VSS SDK headers for win
+
+if test "$mingw32" = "yes" -a "$guest_agent" != "no" -a "$vss_win32_sdk" != "no" ; then
+  case "$vss_win32_sdk" in
+    "")   vss_win32_include="-I$source_path" ;;
+    *\ *) # The SDK is installed in "Program Files" by default, but we cannot
+          # handle path with spaces. So we symlink the headers into ".sdk/vss".
+          vss_win32_include="-I$source_path/.sdk/vss"
+	  symlink "$vss_win32_sdk/inc" "$source_path/.sdk/vss/inc"
+	  ;;
+    *)    vss_win32_include="-I$vss_win32_sdk"
+  esac
+  cat > $TMPC << EOF
+#define __MIDL_user_allocate_free_DEFINED__
+#include <inc/win2003/vss.h>
+int main(void) { return VSS_CTX_BACKUP; }
+EOF
+  if compile_prog "$vss_win32_include" "" ; then
+    guest_agent_with_vss="yes"
+    QEMU_CFLAGS="$QEMU_CFLAGS $vss_win32_include"
+    libs_qga="-lole32 -loleaut32 -lshlwapi -luuid -lstdc++ -Wl,--enable-stdcall-fixup $libs_qga"
+  else
+    if test "$vss_win32_sdk" != "" ; then
+      echo "ERROR: Please download and install Microsoft VSS SDK:"
+      echo "ERROR:   http://www.microsoft.com/en-us/download/details.aspx?id=23490"
+      echo "ERROR: On POSIX-systems, you can extract the SDK headers by:"
+      echo "ERROR:   scripts/extract-vsssdk-headers setup.exe"
+      echo "ERROR: The headers are extracted in the directory \`inc'."
+      feature_not_found "VSS support"
+    fi
+    guest_agent_with_vss="no"
+  fi
+fi
+
+##########################################
+# lookup Windows platform SDK (if not specified)
+# The SDK is needed only to build .tlb (type library) file of guest agent
+# VSS provider from the source. It is usually unnecessary because the
+# pre-compiled .tlb file is included.
+
+if test "$mingw32" = "yes" -a "$guest_agent" != "no" -a "$guest_agent_with_vss" = "yes" ; then
+  if test -z "$win_sdk"; then
+    programfiles="$PROGRAMFILES"
+    test -n "$PROGRAMW6432" && programfiles="$PROGRAMW6432"
+    if test -n "$programfiles"; then
+      win_sdk=$(ls -d "$programfiles/Microsoft SDKs/Windows/v"* | tail -1) 2>/dev/null
+    else
+      feature_not_found "Windows SDK"
+    fi
+  elif test "$win_sdk" = "no"; then
+    win_sdk=""
+  fi
+fi
+
+##########################################
 
 ##########################################
 # check if we have fdatasync
@@ -3568,6 +3640,7 @@ echo "Manual directory  `eval echo $mandir`"
 echo "ELF interp prefix $interp_prefix"
 else
 echo "local state directory   queried at runtime"
+echo "Windows SDK       $win_sdk"
 fi
 echo "Source path       $source_path"
 echo "C compiler        $cc"
@@ -3654,6 +3727,7 @@ echo "usb net redir     $usb_redir"
 echo "GLX support       $glx"
 echo "libiscsi support  $libiscsi"
 echo "build guest agent $guest_agent"
+echo "QGA VSS support   $guest_agent_with_vss"
 echo "seccomp support   $seccomp"
 echo "coroutine backend $coroutine"
 echo "GlusterFS support $glusterfs"
@@ -3728,6 +3802,10 @@ if test "$mingw32" = "yes" ; then
   version_micro=0
   echo "CONFIG_FILEVERSION=$version_major,$version_minor,$version_subminor,$version_micro" >> $config_host_mak
   echo "CONFIG_PRODUCTVERSION=$version_major,$version_minor,$version_subminor,$version_micro" >> $config_host_mak
+  if test "$guest_agent_with_vss" = "yes" ; then
+    echo "CONFIG_QGA_VSS=y" >> $config_host_mak
+    echo "WIN_SDK=\"$win_sdk\"" >> $config_host_mak
+  fi
 else
   echo "CONFIG_POSIX=y" >> $config_host_mak
 fi
commit 24482749c7d6d7bc0106a43ebac124526fb5b376
Author: Tomoki Sekiyama <tomoki.sekiyama at hds.com>
Date:   Wed Aug 7 11:39:57 2013 -0400

    Add a script to extract VSS SDK headers on POSIX system
    
    VSS SDK(*) setup.exe is only runnable on Windows. This adds a script
    to extract VSS SDK headers on POSIX-systems using msitools.
    
      * http://www.microsoft.com/en-us/download/details.aspx?id=23490
    
    From: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Tomoki Sekiyama <tomoki.sekiyama at hds.com>
    Reviewed-by: Laszlo Ersek <lersek at redhat.com>
    Reviewed-by: Michael Roth <mdroth at linux.vnet.ibm.com>
    Signed-off-by: Michael Roth <mdroth at linux.vnet.ibm.com>

diff --git a/scripts/extract-vsssdk-headers b/scripts/extract-vsssdk-headers
new file mode 100755
index 0000000..9e38510
--- /dev/null
+++ b/scripts/extract-vsssdk-headers
@@ -0,0 +1,35 @@
+#! /bin/bash
+
+# extract-vsssdk-headers
+# Author: Paolo Bonzini <pbonzini at redhat.com>
+
+set -e
+if test $# != 1 || ! test -f "$1"; then
+  echo 'Usage: extract-vsssdk-headers /path/to/setup.exe' >&2
+  exit 1
+fi
+
+if ! command -v msiextract > /dev/null; then
+  echo 'msiextract not found. Please install msitools.' >&2
+  exit 1
+fi
+
+if test -e inc; then
+  echo '"inc" already exists.' >&2
+  exit 1
+fi
+
+# Extract .MSI file in the .exe, looking for the OLE compound
+# document signature.  Extra data at the end does not matter.
+export LC_ALL=C
+MAGIC=$'\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1'
+offset=$(grep -abom1 "$MAGIC" "$1" | sed -n 's/:/\n/; P')
+tmpdir=$(mktemp -d)
+trap 'rm -fr -- "$tmpdir" vsssdk.msi' EXIT HUP INT QUIT ALRM TERM
+tail -c +$(($offset+1)) -- "$1" > vsssdk.msi
+
+# Now extract the files.
+msiextract -C $tmpdir vsssdk.msi
+mv "$tmpdir/Program Files/Microsoft/VSSSDK72/inc" inc
+echo 'Extracted SDK headers into "inc" directory.'
+exit 0
commit 69d5d21f90516a8b988a88915865b38e543fc994
Author: Tomoki Sekiyama <tomoki.sekiyama at hds.com>
Date:   Wed Aug 7 11:39:50 2013 -0400

    checkpatch.pl: Check .cpp files
    
    Enable checkpatch.pl to apply the same checks as C source files for
    C++ files with .cpp extensions. It also adds some exceptions for C++
    sources to suppress errors for:
      - <> used in C++ template arguments (e.g. template <class T>)
      - :: used to represent namespaces   (e.g. SomeClass::method())
      - : used in class declaration       (e.g. class T : public Super)
      - ~ used in destructor method name  (e.g. T::~T())
      - spacing around 'catch'            (e.g. catch (...))
    
    Signed-off-by: Tomoki Sekiyama <tomoki.sekiyama at hds.com>
    Reviewed-by: Michael Roth <mdroth at linux.vnet.ibm.com>
    Signed-off-by: Michael Roth <mdroth at linux.vnet.ibm.com>

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index ec0aa4c..9d46e5a 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -1363,7 +1363,7 @@ sub process {
 # Check for incorrect file permissions
 		if ($line =~ /^new (file )?mode.*[7531]\d{0,2}$/) {
 			my $permhere = $here . "FILE: $realfile\n";
-			if ($realfile =~ /(Makefile|Kconfig|\.c|\.h|\.S|\.tmpl)$/) {
+			if ($realfile =~ /(Makefile|Kconfig|\.c|\.cpp|\.h|\.S|\.tmpl)$/) {
 				ERROR("do not set execute permissions for source files\n" . $permhere);
 			}
 		}
@@ -1460,7 +1460,7 @@ sub process {
 		}
 
 # check we are in a valid source file if not then ignore this hunk
-		next if ($realfile !~ /\.(h|c|s|S|pl|sh)$/);
+		next if ($realfile !~ /\.(h|c|cpp|s|S|pl|sh)$/);
 
 #80 column limit
 		if ($line =~ /^\+/ && $prevrawline !~ /\/\*\*/ &&
@@ -1495,7 +1495,7 @@ sub process {
 		}
 
 # check we are in a valid source file C or perl if not then ignore this hunk
-		next if ($realfile !~ /\.(h|c|pl)$/);
+		next if ($realfile !~ /\.(h|c|cpp|pl)$/);
 
 # in QEMU, no tabs are allowed
 		if ($rawline =~ /^\+.*\t/) {
@@ -1505,7 +1505,7 @@ sub process {
 		}
 
 # check we are in a valid C source file if not then ignore this hunk
-		next if ($realfile !~ /\.(h|c)$/);
+		next if ($realfile !~ /\.(h|c|cpp)$/);
 
 # check for RCS/CVS revision markers
 		if ($rawline =~ /^\+.*\$(Revision|Log|Id)(?:\$|)/) {
@@ -1969,6 +1969,9 @@ sub process {
 				asm|__asm__)$/x)
 			{
 
+			# Ignore 'catch (...)' in C++
+			} elsif ($name =~ /^catch$/ && $realfile =~ /(\.cpp|\.h)$/) {
+
 			# cpp #define statements have non-optional spaces, ie
 			# if there is a space between the name and the open
 			# parenthesis it is simply not a parameter group.
@@ -1992,7 +1995,7 @@ sub process {
 				\+=|-=|\*=|\/=|%=|\^=|\|=|&=|
 				=>|->|<<|>>|<|>|=|!|~|
 				&&|\|\||,|\^|\+\+|--|&|\||\+|-|\*|\/|%|
-				\?|:
+				\?|::|:
 			}x;
 			my @elements = split(/($ops|;)/, $opline);
 			my $off = 0;
@@ -2062,6 +2065,10 @@ sub process {
 				# // is a comment
 				} elsif ($op eq '//') {
 
+				# Ignore : used in class declaration in C++
+				} elsif ($opv eq ':B' && $ctx =~ /Wx[WE]/ &&
+						 $line =~ /class/ && $realfile =~ /(\.cpp|\.h)$/) {
+
 				# No spaces for:
 				#   ->
 				#   :   when part of a bitfield
@@ -2088,7 +2095,10 @@ sub process {
 				} elsif ($op eq '!' || $op eq '~' ||
 					 $opv eq '*U' || $opv eq '-U' ||
 					 $opv eq '&U' || $opv eq '&&U') {
-					if ($ctx !~ /[WEBC]x./ && $ca !~ /(?:\)|!|~|\*|-|\&|\||\+\+|\-\-|\{)$/) {
+					if ($op eq '~' && $ca =~ /::$/ && $realfile =~ /(\.cpp|\.h)$/) {
+						# '~' used as a name of Destructor
+
+					} elsif ($ctx !~ /[WEBC]x./ && $ca !~ /(?:\)|!|~|\*|-|\&|\||\+\+|\-\-|\{)$/) {
 						ERROR("space required before that '$op' $at\n" . $hereptr);
 					}
 					if ($op eq '*' && $cc =~/\s*$Modifier\b/) {
@@ -2135,6 +2145,18 @@ sub process {
 				} elsif ($ctx !~ /[EWC]x[CWE]/) {
 					my $ok = 0;
 
+					if ($realfile =~ /\.cpp|\.h$/) {
+						# Ignore template arguments <...> in C++
+						if (($op eq '<' || $op eq '>') && $line =~ /<.*>/) {
+							$ok = 1;
+						}
+
+						# Ignore :: in C++
+						if ($op eq '::') {
+							$ok = 1;
+						}
+					}
+
 					# Ignore email addresses <foo at bar>
 					if (($op eq '<' &&
 					     $cc =~ /^\S+\@\S+>/) ||
commit 6f88009ee505e1e9fbf6b74b2e2fb3e24cd3411b
Author: Tomoki Sekiyama <tomoki.sekiyama at hds.com>
Date:   Wed Aug 7 11:39:43 2013 -0400

    Add c++ keywords to QAPI helper script
    
    Add c++ keywords to avoid errors in compiling with c++ compiler.
    This also renames class member of PciDeviceInfo to q_class.
    
    Signed-off-by: Tomoki Sekiyama <tomoki.sekiyama at hds.com>
    Reviewed-by: Laszlo Ersek <lersek at redhat.com>
    Reviewed-by: Michael Roth <mdroth at linux.vnet.ibm.com>
    Signed-off-by: Michael Roth <mdroth at linux.vnet.ibm.com>

diff --git a/hmp.c b/hmp.c
index fcca6ae..baadbc0 100644
--- a/hmp.c
+++ b/hmp.c
@@ -528,7 +528,7 @@ static void hmp_info_pci_device(Monitor *mon, const PciDeviceInfo *dev)
     if (dev->class_info.has_desc) {
         monitor_printf(mon, "%s", dev->class_info.desc);
     } else {
-        monitor_printf(mon, "Class %04" PRId64, dev->class_info.class);
+        monitor_printf(mon, "Class %04" PRId64, dev->class_info.q_class);
     }
 
     monitor_printf(mon, ": PCI device %04" PRIx64 ":%04" PRIx64 "\n",
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index d00682e..ad1c1ca 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -1461,7 +1461,7 @@ static PciDeviceInfo *qmp_query_pci_device(PCIDevice *dev, PCIBus *bus,
     info->function = PCI_FUNC(dev->devfn);
 
     class = pci_get_word(dev->config + PCI_CLASS_DEVICE);
-    info->class_info.class = class;
+    info->class_info.q_class = class;
     desc = get_class_desc(class);
     if (desc->desc) {
         info->class_info.has_desc = true;
diff --git a/scripts/qapi.py b/scripts/qapi.py
index 1069310..750e9fb 100644
--- a/scripts/qapi.py
+++ b/scripts/qapi.py
@@ -236,9 +236,19 @@ def c_var(name, protect=True):
     # GCC http://gcc.gnu.org/onlinedocs/gcc-4.7.1/gcc/C-Extensions.html
     # excluding _.*
     gcc_words = set(['asm', 'typeof'])
+    # C++ ISO/IEC 14882:2003 2.11
+    cpp_words = set(['bool', 'catch', 'class', 'const_cast', 'delete',
+                     'dynamic_cast', 'explicit', 'false', 'friend', 'mutable',
+                     'namespace', 'new', 'operator', 'private', 'protected',
+                     'public', 'reinterpret_cast', 'static_cast', 'template',
+                     'this', 'throw', 'true', 'try', 'typeid', 'typename',
+                     'using', 'virtual', 'wchar_t',
+                     # alternative representations
+                     'and', 'and_eq', 'bitand', 'bitor', 'compl', 'not',
+                     'not_eq', 'or', 'or_eq', 'xor', 'xor_eq'])
     # namespace pollution:
     polluted_words = set(['unix'])
-    if protect and (name in c89_words | c99_words | c11_words | gcc_words | polluted_words):
+    if protect and (name in c89_words | c99_words | c11_words | gcc_words | cpp_words | polluted_words):
         return "q_" + name
     return name.replace('-', '_').lstrip("*")
 
commit 83f73fce4cf18cf36e99f0e78e8e87dfb6b12a71
Author: Tomoki Sekiyama <tomoki.sekiyama at hds.com>
Date:   Wed Aug 7 11:39:36 2013 -0400

    configure: Support configuring C++ compiler
    
    Add configuration for C++ compiler in configure and Makefiles.
    The C++ compiler is choosed as following:
     - ${CXX}, if it is specified.
     - ${cross_prefix}g++, if ${cross_prefix} is specified.
     - Otherwise, c++ is used.
    
    Currently, usage of C++ language is only for access to Windows VSS
    using COM+ services in qemu-guest-agent for Windows.
    
    Signed-off-by: Tomoki Sekiyama <tomoki.sekiyama at hds.com>
    Reviewed-by: Laszlo Ersek <lersek at redhat.com>
    Reviewed-by: Micael Roth <mdroth at linux.vnet.ibm.com>
    Signed-off-by: Michael Roth <mdroth at linux.vnet.ibm.com>

diff --git a/configure b/configure
index e989609..ca6c376 100755
--- a/configure
+++ b/configure
@@ -252,6 +252,8 @@ for opt do
   ;;
   --cc=*) CC="$optarg"
   ;;
+  --cxx=*) CXX="$optarg"
+  ;;
   --source-path=*) source_path="$optarg"
   ;;
   --cpu=*) cpu="$optarg"
@@ -282,6 +284,12 @@ else
   cc="${CC-${cross_prefix}gcc}"
 fi
 
+if test -z "${CXX}${cross_prefix}"; then
+  cxx="c++"
+else
+  cxx="${CXX-${cross_prefix}g++}"
+fi
+
 ar="${AR-${cross_prefix}ar}"
 as="${AS-${cross_prefix}as}"
 cpp="${CPP-$cc -E}"
@@ -626,6 +634,8 @@ for opt do
   ;;
   --host-cc=*) host_cc="$optarg"
   ;;
+  --cxx=*)
+  ;;
   --objcc=*) objcc="$optarg"
   ;;
   --make=*) make="$optarg"
@@ -1032,6 +1042,7 @@ echo "  --cross-prefix=PREFIX    use PREFIX for compile tools [$cross_prefix]"
 echo "  --cc=CC                  use C compiler CC [$cc]"
 echo "  --host-cc=CC             use C compiler CC [$host_cc] for code run at"
 echo "                           build time"
+echo "  --cxx=CXX                use C++ compiler CXX [$cxx]"
 echo "  --objcc=OBJCC            use Objective-C compiler OBJCC [$objcc]"
 echo "  --extra-cflags=CFLAGS    append extra C compiler flags QEMU_CFLAGS"
 echo "  --extra-ldflags=LDFLAGS  append extra linker flags LDFLAGS"
@@ -3561,6 +3572,7 @@ fi
 echo "Source path       $source_path"
 echo "C compiler        $cc"
 echo "Host C compiler   $host_cc"
+echo "C++ compiler      $cxx"
 echo "Objective-C compiler $objcc"
 echo "CFLAGS            $CFLAGS"
 echo "QEMU_CFLAGS       $QEMU_CFLAGS"
@@ -4148,6 +4160,7 @@ echo "PYTHON=$python" >> $config_host_mak
 echo "CC=$cc" >> $config_host_mak
 echo "CC_I386=$cc_i386" >> $config_host_mak
 echo "HOST_CC=$host_cc" >> $config_host_mak
+echo "CXX=$cxx" >> $config_host_mak
 echo "OBJCC=$objcc" >> $config_host_mak
 echo "AR=$ar" >> $config_host_mak
 echo "AS=$as" >> $config_host_mak
diff --git a/rules.mak b/rules.mak
index 4499745..abc2e84 100644
--- a/rules.mak
+++ b/rules.mak
@@ -8,9 +8,13 @@ MAKEFLAGS += -rR
 %.d:
 %.h:
 %.c:
+%.cpp:
 %.m:
 %.mak:
 
+# Flags for C++ compilation
+QEMU_CXXFLAGS = -D__STDC_LIMIT_MACROS $(filter-out -Wstrict-prototypes -Wmissing-prototypes -Wnested-externs -Wold-style-declaration -Wold-style-definition -Wredundant-decls, $(QEMU_CFLAGS))
+
 # Flags for dependency generation
 QEMU_DGFLAGS += -MMD -MP -MT $@ -MF $(*D)/$(*F).d
 
@@ -50,6 +54,9 @@ endif
 %.o: %.asm
 	$(call quiet-command,$(AS) $(ASFLAGS) -o $@ $<,"  AS    $(TARGET_DIR)$@")
 
+%.o: %.cpp
+	$(call quiet-command,$(CXX) $(QEMU_INCLUDES) $(QEMU_CXXFLAGS) $(QEMU_DGFLAGS) $(CFLAGS) -c -o $@ $<,"  CXX   $(TARGET_DIR)$@")
+
 %.o: %.m
 	$(call quiet-command,$(OBJCC) $(QEMU_INCLUDES) $(QEMU_CFLAGS) $(QEMU_DGFLAGS) $(CFLAGS) -c -o $@ $<,"  OBJC  $(TARGET_DIR)$@")
 
@@ -70,7 +77,7 @@ quiet-command = $(if $(V),$1,$(if $(2), at echo $2 && $1, @$1))
 cc-option = $(if $(shell $(CC) $1 $2 -S -o /dev/null -xc /dev/null \
               >/dev/null 2>&1 && echo OK), $2, $3)
 
-VPATH_SUFFIXES = %.c %.h %.S %.m %.mak %.texi %.sh %.rc
+VPATH_SUFFIXES = %.c %.h %.S %.cpp %.m %.mak %.texi %.sh %.rc
 set-vpath = $(if $1,$(foreach PATTERN,$(VPATH_SUFFIXES),$(eval vpath $(PATTERN) $1)))
 
 # find-in-path
commit 254c12825f93f405658ca3366cd34f8a8ad23511
Author: Anthony PERARD <anthony.perard at citrix.com>
Date:   Mon Sep 9 16:15:53 2013 +0000

    pc_q35: Initialize Xen.
    
    Signed-off-by: Anthony PERARD <anthony.perard at citrix.com>
    Signed-off-by: Stefano Stabellini <stefano.stabellini at eu.citrix.com>
    Acked-by: Michael S. Tsirkin <mst at redhat.com>

diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index d7b7c3b..464a892 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -76,6 +76,11 @@ static void pc_q35_init(QEMUMachineInitArgs *args)
     DeviceState *icc_bridge;
     PcGuestInfo *guest_info;
 
+    if (xen_enabled() && xen_hvm_init(&ram_memory) != 0) {
+        fprintf(stderr, "xen hardware virtual machine initialisation failed\n");
+        exit(1);
+    }
+
     icc_bridge = qdev_create(NULL, TYPE_ICC_BRIDGE);
     object_property_add_child(qdev_get_machine(), "icc-bridge",
                               OBJECT(icc_bridge), NULL);
commit 04d7bad8a4fb23e6d9af9d06ce3ddc28a251d94d
Author: Anthony PERARD <anthony.perard at citrix.com>
Date:   Mon Sep 9 16:15:52 2013 +0000

    pc: Initializing ram_memory under Xen.
    
    Signed-off-by: Anthony PERARD <anthony.perard at citrix.com>
    Signed-off-by: Stefano Stabellini <stefano.stabellini at eu.citrix.com>
    Acked-by: Michael S. Tsirkin <mst at redhat.com>
    CC: qemu-stable at nongnu.org

diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 275e395..5bb4937 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -89,7 +89,7 @@ static void pc_init1(QEMUMachineInitArgs *args,
     FWCfgState *fw_cfg = NULL;
     PcGuestInfo *guest_info;
 
-    if (xen_enabled() && xen_hvm_init() != 0) {
+    if (xen_enabled() && xen_hvm_init(&ram_memory) != 0) {
         fprintf(stderr, "xen hardware virtual machine initialisation failed\n");
         exit(1);
     }
diff --git a/include/hw/xen/xen.h b/include/hw/xen/xen.h
index 6d42dd1..e1f88bf 100644
--- a/include/hw/xen/xen.h
+++ b/include/hw/xen/xen.h
@@ -37,17 +37,15 @@ void xen_cmos_set_s3_resume(void *opaque, int irq, int level);
 qemu_irq *xen_interrupt_controller_init(void);
 
 int xen_init(void);
-int xen_hvm_init(void);
+int xen_hvm_init(MemoryRegion **ram_memory);
 void xenstore_store_pv_console_info(int i, struct CharDriverState *chr);
 
 #if defined(NEED_CPU_H) && !defined(CONFIG_USER_ONLY)
-struct MemoryRegion;
 void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size,
                    struct MemoryRegion *mr);
 void xen_modified_memory(ram_addr_t start, ram_addr_t length);
 #endif
 
-struct MemoryRegion;
 void xen_register_framebuffer(struct MemoryRegion *mr);
 
 #if defined(CONFIG_XEN) && CONFIG_XEN_CTRL_INTERFACE_VERSION < 400
diff --git a/xen-all.c b/xen-all.c
index eb13111..839f14f 100644
--- a/xen-all.c
+++ b/xen-all.c
@@ -154,7 +154,7 @@ qemu_irq *xen_interrupt_controller_init(void)
 
 /* Memory Ops */
 
-static void xen_ram_init(ram_addr_t ram_size)
+static void xen_ram_init(ram_addr_t ram_size, MemoryRegion **ram_memory_p)
 {
     MemoryRegion *sysmem = get_system_memory();
     ram_addr_t below_4g_mem_size, above_4g_mem_size = 0;
@@ -168,6 +168,7 @@ static void xen_ram_init(ram_addr_t ram_size)
         block_len += HVM_BELOW_4G_MMIO_LENGTH;
     }
     memory_region_init_ram(&ram_memory, NULL, "xen.ram", block_len);
+    *ram_memory_p = &ram_memory;
     vmstate_register_ram_global(&ram_memory);
 
     if (ram_size >= HVM_BELOW_4G_RAM_END) {
@@ -1059,7 +1060,7 @@ static void xen_read_physmap(XenIOState *state)
     free(entries);
 }
 
-int xen_hvm_init(void)
+int xen_hvm_init(MemoryRegion **ram_memory)
 {
     int i, rc;
     unsigned long ioreq_pfn;
@@ -1134,7 +1135,7 @@ int xen_hvm_init(void)
 
     /* Init RAM management */
     xen_map_cache_init(xen_phys_offset_to_gaddr, state);
-    xen_ram_init(ram_size);
+    xen_ram_init(ram_size, ram_memory);
 
     qemu_add_vm_change_state_handler(xen_hvm_change_state_handler, state);
 
diff --git a/xen-stub.c b/xen-stub.c
index 47c8e73..ad189a6 100644
--- a/xen-stub.c
+++ b/xen-stub.c
@@ -64,7 +64,7 @@ void xen_modified_memory(ram_addr_t start, ram_addr_t length)
 {
 }
 
-int xen_hvm_init(void)
+int xen_hvm_init(MemoryRegion **ram_memory)
 {
     return 0;
 }
commit 45d883dcf208160e2db308d1b368beb74f37dc7e
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Mon Sep 2 13:10:34 2013 +0200

    ne2000: mark I/O as LITTLE_ENDIAN
    
    Now that the memory subsystem is propagating the endianness correctly,
    the ne2000 device should have its I/O ports marked as LITTLE_ENDIAN, as
    PCI devices are little endian.
    
    This makes the ne2000 NIC to work again on PowerPC.
    
    Cc: qemu-stable at nongnu.org
    Cc: Stefan Hajnoczi <stefanha at redhat.com>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/hw/net/ne2000.c b/hw/net/ne2000.c
index 31afd28..c961258 100644
--- a/hw/net/ne2000.c
+++ b/hw/net/ne2000.c
@@ -693,7 +693,7 @@ static void ne2000_write(void *opaque, hwaddr addr,
 static const MemoryRegionOps ne2000_ops = {
     .read = ne2000_read,
     .write = ne2000_write,
-    .endianness = DEVICE_NATIVE_ENDIAN,
+    .endianness = DEVICE_LITTLE_ENDIAN,
 };
 
 /***********************************************************/
commit 3dbb9786e9f5fa8429824818b6f799d8f65d1199
Author: Brad Smith <brad at comstyle.com>
Date:   Fri Aug 23 12:28:25 2013 -0400

    vmxnet3: Eliminate __packed redefined warning
    
    This eliminates a warning about __packed being redefined as exposed by the
    vmxnet3 code. __packed is not used anywhere in the vmxnet3 code.
    
      CC    hw/net/vmxnet3.o
    In file included from hw/net/vmxnet3.c:29:
    hw/net/vmxnet3.h:37:1: warning: "__packed" redefined
    In file included from /usr/include/stdlib.h:38,
                     from /buildbot-qemu/default_openbsd_current/build/include/qemu-common.h:26,
                     from /buildbot-qemu/default_openbsd_current/build/include/hw/hw.h:5,
                     from hw/net/vmxnet3.c:18:
    /usr/include/sys/cdefs.h:209:1: warning: this is the location of the previous definition
    
    Signed-off-by: Brad Smith <brad at comstyle.com>
    Reviewed-by: Andreas Färber <afaerber at suse.de>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/hw/net/vmxnet3.h b/hw/net/vmxnet3.h
index 4eae7c7..f987d71 100644
--- a/hw/net/vmxnet3.h
+++ b/hw/net/vmxnet3.h
@@ -34,7 +34,6 @@
 #define __le16  uint16_t
 #define __le32  uint32_t
 #define __le64  uint64_t
-#define __packed QEMU_PACKED
 
 #if defined(HOST_WORDS_BIGENDIAN)
 #define __BIG_ENDIAN_BITFIELD
@@ -749,7 +748,6 @@ struct Vmxnet3_DriverShared {
 #undef __le16
 #undef __le32
 #undef __le64
-#undef __packed
 #if defined(HOST_WORDS_BIGENDIAN)
 #undef __BIG_ENDIAN_BITFIELD
 #endif
commit e9845f0985f088dd01790f4821026df0afba5795
Author: Vincenzo Maffione <v.maffione at gmail.com>
Date:   Fri Aug 2 18:30:52 2013 +0200

    e1000: add interrupt mitigation support
    
    This patch partially implements the e1000 interrupt mitigation mechanisms.
    Using a single QEMUTimer, it emulates the ITR register (which is the newer
    mitigation register, recommended by Intel) and approximately emulates
    RADV and TADV registers. TIDV and RDTR register functionalities are not
    emulated (RDTR is only used to validate RADV, according to the e1000 specs).
    
    RADV, TADV, TIDV and RDTR registers make up the older e1000 mitigation
    mechanism and would need a timer each to be completely emulated. However,
    a single timer has been used in order to reach a good compromise between
    emulation accuracy and simplicity/efficiency.
    
    The implemented mechanism can be enabled/disabled specifying the command
    line e1000-specific boolean parameter "mitigation", e.g.
    
        qemu-system-x86_64 -device e1000,mitigation=on,... ...
    
    For more information, see the Software developer's manual at
    http://download.intel.com/design/network/manuals/8254x_GBe_SDM.pdf.
    
    Interrupt mitigation boosts performance when the guest suffers from
    an high interrupt rate (i.e. receiving short UDP packets at high packet
    rate). For some numerical results see the following link
    http://info.iet.unipi.it/~luigi/papers/20130520-rizzo-vm.pdf
    
    Signed-off-by: Vincenzo Maffione <v.maffione at gmail.com>
    Reviewed-by: Andreas Färber <afaerber at suse.de> (for pc-* machines)
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 275e395..147d08c 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -339,14 +339,25 @@ static void pc_xen_hvm_init(QEMUMachineInitArgs *args)
     .desc = "Standard PC (i440FX + PIIX, 1996)", \
     .hot_add_cpu = pc_hot_add_cpu
 
+#define PC_I440FX_1_7_MACHINE_OPTIONS PC_I440FX_MACHINE_OPTIONS
+static QEMUMachine pc_i440fx_machine_v1_7 = {
+    PC_I440FX_1_7_MACHINE_OPTIONS,
+    .name = "pc-i440fx-1.7",
+    .alias = "pc",
+    .init = pc_init_pci,
+    .is_default = 1,
+};
+
 #define PC_I440FX_1_6_MACHINE_OPTIONS PC_I440FX_MACHINE_OPTIONS
 
 static QEMUMachine pc_i440fx_machine_v1_6 = {
     PC_I440FX_1_6_MACHINE_OPTIONS,
     .name = "pc-i440fx-1.6",
-    .alias = "pc",
     .init = pc_init_pci_1_6,
-    .is_default = 1,
+    .compat_props = (GlobalProperty[]) {
+        PC_COMPAT_1_6,
+        { /* end of list */ }
+    },
 };
 
 static QEMUMachine pc_i440fx_machine_v1_5 = {
@@ -735,6 +746,7 @@ static QEMUMachine xenfv_machine = {
 
 static void pc_machine_init(void)
 {
+    qemu_register_machine(&pc_i440fx_machine_v1_7);
     qemu_register_machine(&pc_i440fx_machine_v1_6);
     qemu_register_machine(&pc_i440fx_machine_v1_5);
     qemu_register_machine(&pc_i440fx_machine_v1_4);
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index d7b7c3b..b4b5155 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -258,13 +258,25 @@ static void pc_q35_init_1_4(QEMUMachineInitArgs *args)
     .desc = "Standard PC (Q35 + ICH9, 2009)", \
     .hot_add_cpu = pc_hot_add_cpu
 
+#define PC_Q35_1_7_MACHINE_OPTIONS PC_Q35_MACHINE_OPTIONS
+
+static QEMUMachine pc_q35_machine_v1_7 = {
+    PC_Q35_1_7_MACHINE_OPTIONS,
+    .name = "pc-q35-1.7",
+    .alias = "q35",
+    .init = pc_q35_init,
+};
+
 #define PC_Q35_1_6_MACHINE_OPTIONS PC_Q35_MACHINE_OPTIONS
 
 static QEMUMachine pc_q35_machine_v1_6 = {
     PC_Q35_1_6_MACHINE_OPTIONS,
     .name = "pc-q35-1.6",
-    .alias = "q35",
     .init = pc_q35_init_1_6,
+    .compat_props = (GlobalProperty[]) {
+        PC_COMPAT_1_6,
+        { /* end of list */ }
+    },
 };
 
 static QEMUMachine pc_q35_machine_v1_5 = {
@@ -293,6 +305,7 @@ static QEMUMachine pc_q35_machine_v1_4 = {
 
 static void pc_q35_machine_init(void)
 {
+    qemu_register_machine(&pc_q35_machine_v1_7);
     qemu_register_machine(&pc_q35_machine_v1_6);
     qemu_register_machine(&pc_q35_machine_v1_5);
     qemu_register_machine(&pc_q35_machine_v1_4);
diff --git a/hw/net/e1000.c b/hw/net/e1000.c
index f5ebed4..d3f274c 100644
--- a/hw/net/e1000.c
+++ b/hw/net/e1000.c
@@ -135,9 +135,16 @@ typedef struct E1000State_st {
 
     QEMUTimer *autoneg_timer;
 
+    QEMUTimer *mit_timer;      /* Mitigation timer. */
+    bool mit_timer_on;         /* Mitigation timer is running. */
+    bool mit_irq_level;        /* Tracks interrupt pin level. */
+    uint32_t mit_ide;          /* Tracks E1000_TXD_CMD_IDE bit. */
+
 /* Compatibility flags for migration to/from qemu 1.3.0 and older */
 #define E1000_FLAG_AUTONEG_BIT 0
+#define E1000_FLAG_MIT_BIT 1
 #define E1000_FLAG_AUTONEG (1 << E1000_FLAG_AUTONEG_BIT)
+#define E1000_FLAG_MIT (1 << E1000_FLAG_MIT_BIT)
     uint32_t compat_flags;
 } E1000State;
 
@@ -158,7 +165,8 @@ enum {
     defreg(TORH),	defreg(TORL),	defreg(TOTH),	defreg(TOTL),
     defreg(TPR),	defreg(TPT),	defreg(TXDCTL),	defreg(WUFC),
     defreg(RA),		defreg(MTA),	defreg(CRCERRS),defreg(VFTA),
-    defreg(VET),
+    defreg(VET),        defreg(RDTR),   defreg(RADV),   defreg(TADV),
+    defreg(ITR),
 };
 
 static void
@@ -245,10 +253,21 @@ static const uint32_t mac_reg_init[] = {
                 E1000_MANC_RMCP_EN,
 };
 
+/* Helper function, *curr == 0 means the value is not set */
+static inline void
+mit_update_delay(uint32_t *curr, uint32_t value)
+{
+    if (value && (*curr == 0 || value < *curr)) {
+        *curr = value;
+    }
+}
+
 static void
 set_interrupt_cause(E1000State *s, int index, uint32_t val)
 {
     PCIDevice *d = PCI_DEVICE(s);
+    uint32_t pending_ints;
+    uint32_t mit_delay;
 
     if (val && (E1000_DEVID >= E1000_DEV_ID_82547EI_MOBILE)) {
         /* Only for 8257x */
@@ -266,7 +285,57 @@ set_interrupt_cause(E1000State *s, int index, uint32_t val)
      */
     s->mac_reg[ICS] = val;
 
-    qemu_set_irq(d->irq[0], (s->mac_reg[IMS] & s->mac_reg[ICR]) != 0);
+    pending_ints = (s->mac_reg[IMS] & s->mac_reg[ICR]);
+    if (!s->mit_irq_level && pending_ints) {
+        /*
+         * Here we detect a potential raising edge. We postpone raising the
+         * interrupt line if we are inside the mitigation delay window
+         * (s->mit_timer_on == 1).
+         * We provide a partial implementation of interrupt mitigation,
+         * emulating only RADV, TADV and ITR (lower 16 bits, 1024ns units for
+         * RADV and TADV, 256ns units for ITR). RDTR is only used to enable
+         * RADV; relative timers based on TIDV and RDTR are not implemented.
+         */
+        if (s->mit_timer_on) {
+            return;
+        }
+        if (s->compat_flags & E1000_FLAG_MIT) {
+            /* Compute the next mitigation delay according to pending
+             * interrupts and the current values of RADV (provided
+             * RDTR!=0), TADV and ITR.
+             * Then rearm the timer.
+             */
+            mit_delay = 0;
+            if (s->mit_ide &&
+                    (pending_ints & (E1000_ICR_TXQE | E1000_ICR_TXDW))) {
+                mit_update_delay(&mit_delay, s->mac_reg[TADV] * 4);
+            }
+            if (s->mac_reg[RDTR] && (pending_ints & E1000_ICS_RXT0)) {
+                mit_update_delay(&mit_delay, s->mac_reg[RADV] * 4);
+            }
+            mit_update_delay(&mit_delay, s->mac_reg[ITR]);
+
+            if (mit_delay) {
+                s->mit_timer_on = 1;
+                timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
+                          mit_delay * 256);
+            }
+            s->mit_ide = 0;
+        }
+    }
+
+    s->mit_irq_level = (pending_ints != 0);
+    qemu_set_irq(d->irq[0], s->mit_irq_level);
+}
+
+static void
+e1000_mit_timer(void *opaque)
+{
+    E1000State *s = opaque;
+
+    s->mit_timer_on = 0;
+    /* Call set_interrupt_cause to update the irq level (if necessary). */
+    set_interrupt_cause(s, 0, s->mac_reg[ICR]);
 }
 
 static void
@@ -307,6 +376,10 @@ static void e1000_reset(void *opaque)
     int i;
 
     timer_del(d->autoneg_timer);
+    timer_del(d->mit_timer);
+    d->mit_timer_on = 0;
+    d->mit_irq_level = 0;
+    d->mit_ide = 0;
     memset(d->phy_reg, 0, sizeof d->phy_reg);
     memmove(d->phy_reg, phy_reg_init, sizeof phy_reg_init);
     memset(d->mac_reg, 0, sizeof d->mac_reg);
@@ -572,6 +645,7 @@ process_tx_desc(E1000State *s, struct e1000_tx_desc *dp)
     struct e1000_context_desc *xp = (struct e1000_context_desc *)dp;
     struct e1000_tx *tp = &s->tx;
 
+    s->mit_ide |= (txd_lower & E1000_TXD_CMD_IDE);
     if (dtype == E1000_TXD_CMD_DEXT) {	// context descriptor
         op = le32_to_cpu(xp->cmd_and_length);
         tp->ipcss = xp->lower_setup.ip_fields.ipcss;
@@ -1047,7 +1121,8 @@ static uint32_t (*macreg_readops[])(E1000State *, int) = {
     getreg(TORL),	getreg(TOTL),	getreg(IMS),	getreg(TCTL),
     getreg(RDH),	getreg(RDT),	getreg(VET),	getreg(ICS),
     getreg(TDBAL),	getreg(TDBAH),	getreg(RDBAH),	getreg(RDBAL),
-    getreg(TDLEN),	getreg(RDLEN),
+    getreg(TDLEN),      getreg(RDLEN),  getreg(RDTR),   getreg(RADV),
+    getreg(TADV),       getreg(ITR),
 
     [TOTH] = mac_read_clr8,	[TORH] = mac_read_clr8,	[GPRC] = mac_read_clr4,
     [GPTC] = mac_read_clr4,	[TPR] = mac_read_clr4,	[TPT] = mac_read_clr4,
@@ -1069,6 +1144,8 @@ static void (*macreg_writeops[])(E1000State *, int, uint32_t) = {
     [TDH] = set_16bit,	[RDH] = set_16bit,	[RDT] = set_rdt,
     [IMC] = set_imc,	[IMS] = set_ims,	[ICR] = set_icr,
     [EECD] = set_eecd,	[RCTL] = set_rx_control, [CTRL] = set_ctrl,
+    [RDTR] = set_16bit, [RADV] = set_16bit,     [TADV] = set_16bit,
+    [ITR] = set_16bit,
     [RA ... RA+31] = &mac_writereg,
     [MTA ... MTA+127] = &mac_writereg,
     [VFTA ... VFTA+127] = &mac_writereg,
@@ -1150,6 +1227,11 @@ static void e1000_pre_save(void *opaque)
     E1000State *s = opaque;
     NetClientState *nc = qemu_get_queue(s->nic);
 
+    /* If the mitigation timer is active, emulate a timeout now. */
+    if (s->mit_timer_on) {
+        e1000_mit_timer(s);
+    }
+
     if (!(s->compat_flags & E1000_FLAG_AUTONEG)) {
         return;
     }
@@ -1171,6 +1253,14 @@ static int e1000_post_load(void *opaque, int version_id)
     E1000State *s = opaque;
     NetClientState *nc = qemu_get_queue(s->nic);
 
+    if (!(s->compat_flags & E1000_FLAG_MIT)) {
+        s->mac_reg[ITR] = s->mac_reg[RDTR] = s->mac_reg[RADV] =
+            s->mac_reg[TADV] = 0;
+        s->mit_irq_level = false;
+    }
+    s->mit_ide = 0;
+    s->mit_timer_on = false;
+
     /* nc.link_down can't be migrated, so infer link_down according
      * to link status bit in mac_reg[STATUS].
      * Alternatively, restart link negotiation if it was in progress. */
@@ -1190,6 +1280,28 @@ static int e1000_post_load(void *opaque, int version_id)
     return 0;
 }
 
+static bool e1000_mit_state_needed(void *opaque)
+{
+    E1000State *s = opaque;
+
+    return s->compat_flags & E1000_FLAG_MIT;
+}
+
+static const VMStateDescription vmstate_e1000_mit_state = {
+    .name = "e1000/mit_state",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .minimum_version_id_old = 1,
+    .fields    = (VMStateField[]) {
+        VMSTATE_UINT32(mac_reg[RDTR], E1000State),
+        VMSTATE_UINT32(mac_reg[RADV], E1000State),
+        VMSTATE_UINT32(mac_reg[TADV], E1000State),
+        VMSTATE_UINT32(mac_reg[ITR], E1000State),
+        VMSTATE_BOOL(mit_irq_level, E1000State),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
 static const VMStateDescription vmstate_e1000 = {
     .name = "e1000",
     .version_id = 2,
@@ -1267,6 +1379,14 @@ static const VMStateDescription vmstate_e1000 = {
         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, MTA, 128),
         VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, VFTA, 128),
         VMSTATE_END_OF_LIST()
+    },
+    .subsections = (VMStateSubsection[]) {
+        {
+            .vmsd = &vmstate_e1000_mit_state,
+            .needed = e1000_mit_state_needed,
+        }, {
+            /* empty */
+        }
     }
 };
 
@@ -1316,6 +1436,8 @@ pci_e1000_uninit(PCIDevice *dev)
 
     timer_del(d->autoneg_timer);
     timer_free(d->autoneg_timer);
+    timer_del(d->mit_timer);
+    timer_free(d->mit_timer);
     memory_region_destroy(&d->mmio);
     memory_region_destroy(&d->io);
     qemu_del_nic(d->nic);
@@ -1371,6 +1493,7 @@ static int pci_e1000_init(PCIDevice *pci_dev)
     add_boot_device_path(d->conf.bootindex, dev, "/ethernet-phy at 0");
 
     d->autoneg_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, e1000_autoneg_timer, d);
+    d->mit_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000_mit_timer, d);
 
     return 0;
 }
@@ -1385,6 +1508,8 @@ static Property e1000_properties[] = {
     DEFINE_NIC_PROPERTIES(E1000State, conf),
     DEFINE_PROP_BIT("autonegotiation", E1000State,
                     compat_flags, E1000_FLAG_AUTONEG_BIT, true),
+    DEFINE_PROP_BIT("mitigation", E1000State,
+                    compat_flags, E1000_FLAG_MIT_BIT, true),
     DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index 7fb04d8..9b2ddc4 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -225,7 +225,15 @@ void pvpanic_init(ISABus *bus);
 
 int e820_add_entry(uint64_t, uint64_t, uint32_t);
 
+#define PC_COMPAT_1_6 \
+        {\
+            .driver   = "e1000",\
+            .property = "mitigation",\
+            .value    = "off",\
+        }
+
 #define PC_COMPAT_1_5 \
+        PC_COMPAT_1_6, \
         {\
             .driver   = "Conroe-" TYPE_X86_CPU,\
             .property = "model",\
commit 067404be626d03656788adb7deff8072ca84299f
Author: Jan Kiszka <jan.kiszka at siemens.com>
Date:   Fri Aug 2 21:47:08 2013 +0200

    net: Rename send_queue to incoming_queue
    
    Each networking client has a queue for packets that could not yet be
    delivered to that client. Calling this queue "send_queue" is highly
    confusing as it has nothing to to with packets send from this client but
    to it. Avoid this confusing by renaming it to "incoming_queue".
    
    Signed-off-by: Jan Kiszka <jan.kiszka at siemens.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/include/net/net.h b/include/net/net.h
index 30e4b04..11e1468 100644
--- a/include/net/net.h
+++ b/include/net/net.h
@@ -69,7 +69,7 @@ struct NetClientState {
     int link_down;
     QTAILQ_ENTRY(NetClientState) next;
     NetClientState *peer;
-    NetQueue *send_queue;
+    NetQueue *incoming_queue;
     char *model;
     char *name;
     char info_str[256];
diff --git a/net/hub.c b/net/hub.c
index df32074..33a99c9 100644
--- a/net/hub.c
+++ b/net/hub.c
@@ -347,7 +347,7 @@ bool net_hub_flush(NetClientState *nc)
 
     QLIST_FOREACH(port, &source_port->hub->ports, next) {
         if (port != source_port) {
-            ret += qemu_net_queue_flush(port->nc.send_queue);
+            ret += qemu_net_queue_flush(port->nc.incoming_queue);
         }
     }
     return ret ? true : false;
diff --git a/net/net.c b/net/net.c
index 1148592..c330c9a 100644
--- a/net/net.c
+++ b/net/net.c
@@ -207,7 +207,7 @@ static void qemu_net_client_setup(NetClientState *nc,
     }
     QTAILQ_INSERT_TAIL(&net_clients, nc, next);
 
-    nc->send_queue = qemu_new_net_queue(nc);
+    nc->incoming_queue = qemu_new_net_queue(nc);
     nc->destructor = destructor;
 }
 
@@ -289,8 +289,8 @@ static void qemu_cleanup_net_client(NetClientState *nc)
 
 static void qemu_free_net_client(NetClientState *nc)
 {
-    if (nc->send_queue) {
-        qemu_del_net_queue(nc->send_queue);
+    if (nc->incoming_queue) {
+        qemu_del_net_queue(nc->incoming_queue);
     }
     if (nc->peer) {
         nc->peer->peer = NULL;
@@ -431,7 +431,7 @@ void qemu_purge_queued_packets(NetClientState *nc)
         return;
     }
 
-    qemu_net_queue_purge(nc->peer->send_queue, nc);
+    qemu_net_queue_purge(nc->peer->incoming_queue, nc);
 }
 
 void qemu_flush_queued_packets(NetClientState *nc)
@@ -444,7 +444,7 @@ void qemu_flush_queued_packets(NetClientState *nc)
         }
         return;
     }
-    if (qemu_net_queue_flush(nc->send_queue)) {
+    if (qemu_net_queue_flush(nc->incoming_queue)) {
         /* We emptied the queue successfully, signal to the IO thread to repoll
          * the file descriptor (for tap, for example).
          */
@@ -468,7 +468,7 @@ static ssize_t qemu_send_packet_async_with_flags(NetClientState *sender,
         return size;
     }
 
-    queue = sender->peer->send_queue;
+    queue = sender->peer->incoming_queue;
 
     return qemu_net_queue_send(queue, sender, flags, buf, size, sent_cb);
 }
@@ -543,7 +543,7 @@ ssize_t qemu_sendv_packet_async(NetClientState *sender,
         return iov_size(iov, iovcnt);
     }
 
-    queue = sender->peer->send_queue;
+    queue = sender->peer->incoming_queue;
 
     return qemu_net_queue_send_iov(queue, sender,
                                    QEMU_NET_PACKET_FLAG_NONE,
commit aa4f082f7526d39dac8e2ca64d192d858014ee10
Author: Brad Smith <brad at comstyle.com>
Date:   Sat Aug 3 22:20:41 2013 -0400

    tap: Use numbered tap/tun devices on all *BSD OS's
    
    The following patch simplifies the *BSD tap/tun code and makes use of numbered
    tap/tun interfaces on all *BSD OS's. NetBSD has a patch in their pkgsrc tree
    to make use of this feature and DragonFly also supports this as well.
    
    Signed-off-by: Brad Smith <brad at comstyle.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/net/tap-bsd.c b/net/tap-bsd.c
index f61d580..90f8a02 100644
--- a/net/tap-bsd.c
+++ b/net/tap-bsd.c
@@ -44,8 +44,6 @@ int tap_open(char *ifname, int ifname_size, int *vnet_hdr,
     struct stat s;
 #endif
 
-#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || \
-    defined(__OpenBSD__) || defined(__APPLE__)
     /* if no ifname is given, always start the search from tap0/tun0. */
     int i;
     char dname[100];
@@ -76,15 +74,6 @@ int tap_open(char *ifname, int ifname_size, int *vnet_hdr,
                    dname, strerror(errno));
         return -1;
     }
-#else
-    TFR(fd = open("/dev/tap", O_RDWR));
-    if (fd < 0) {
-        fprintf(stderr,
-            "warning: could not open /dev/tap: no virtual network emulation: %s\n",
-            strerror(errno));
-        return -1;
-    }
-#endif
 
 #ifdef TAPGIFNAME
     if (ioctl(fd, TAPGIFNAME, (void *)&ifr) < 0) {
commit 8f94b077877151de93a63c73f796897309568ddb
Author: Kevin Wolf <kwolf at redhat.com>
Date:   Wed Sep 4 13:16:05 2013 +0200

    qemu-iotests: Fixed test case 026
    
    The reference output for test case 026 hasn't been updated in a long
    time and it's one of the "known failing" cases. This patch updates the
    reference output so that unintentional changes can be reliably detected
    again.
    
    The problem with this test case is that it produces different output
    depending on whether -nocache is used or not. The solution of this patch
    is to actually have two different reference outputs. If nnn.out.nocache
    exists, it is used as the reference output for -nocache; otherwise,
    nnn.out stays valid for both cases.
    
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/tests/qemu-iotests/026.out b/tests/qemu-iotests/026.out
index fb4f20e..0764389 100644
--- a/tests/qemu-iotests/026.out
+++ b/tests/qemu-iotests/026.out
@@ -126,62 +126,64 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
 Event: l2_update; errno: 5; imm: off; once: on; write 
 write failed: Input/output error
 
-128 leaked clusters were found on the image.
+127 leaked clusters were found on the image.
 This means waste of disk space, but no harm to data.
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
 
 Event: l2_update; errno: 5; imm: off; once: on; write -b
 write failed: Input/output error
 
-128 leaked clusters were found on the image.
+127 leaked clusters were found on the image.
 This means waste of disk space, but no harm to data.
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
 
 Event: l2_update; errno: 5; imm: off; once: off; write 
 write failed: Input/output error
 
-128 leaked clusters were found on the image.
+127 leaked clusters were found on the image.
 This means waste of disk space, but no harm to data.
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
 
 Event: l2_update; errno: 5; imm: off; once: off; write -b
 write failed: Input/output error
 
-128 leaked clusters were found on the image.
+127 leaked clusters were found on the image.
 This means waste of disk space, but no harm to data.
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
 
 Event: l2_update; errno: 28; imm: off; once: on; write 
 write failed: No space left on device
 
-128 leaked clusters were found on the image.
+127 leaked clusters were found on the image.
 This means waste of disk space, but no harm to data.
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
 
 Event: l2_update; errno: 28; imm: off; once: on; write -b
 write failed: No space left on device
 
-128 leaked clusters were found on the image.
+127 leaked clusters were found on the image.
 This means waste of disk space, but no harm to data.
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
 
 Event: l2_update; errno: 28; imm: off; once: off; write 
 write failed: No space left on device
 
-128 leaked clusters were found on the image.
+127 leaked clusters were found on the image.
 This means waste of disk space, but no harm to data.
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
 
 Event: l2_update; errno: 28; imm: off; once: off; write -b
 write failed: No space left on device
 
-128 leaked clusters were found on the image.
+127 leaked clusters were found on the image.
 This means waste of disk space, but no harm to data.
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
 
 Event: l2_alloc.write; errno: 5; imm: off; once: on; write 
 write failed: Input/output error
-No errors were found on the image.
+
+1 leaked clusters were found on the image.
+This means waste of disk space, but no harm to data.
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
 
 Event: l2_alloc.write; errno: 5; imm: off; once: on; write -b
@@ -205,7 +207,9 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
 
 Event: l2_alloc.write; errno: 28; imm: off; once: on; write 
 write failed: No space left on device
-No errors were found on the image.
+
+1 leaked clusters were found on the image.
+This means waste of disk space, but no harm to data.
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
 
 Event: l2_alloc.write; errno: 28; imm: off; once: on; write -b
@@ -575,7 +579,6 @@ No errors were found on the image.
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
 
 Event: l1_grow.write_table; errno: 5; imm: off; once: off
-qcow2_free_clusters failed: Input/output error
 write failed: Input/output error
 No errors were found on the image.
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
@@ -586,7 +589,6 @@ No errors were found on the image.
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
 
 Event: l1_grow.write_table; errno: 28; imm: off; once: off
-qcow2_free_clusters failed: No space left on device
 write failed: No space left on device
 No errors were found on the image.
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
@@ -597,7 +599,6 @@ No errors were found on the image.
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
 
 Event: l1_grow.activate_table; errno: 5; imm: off; once: off
-qcow2_free_clusters failed: Input/output error
 write failed: Input/output error
 
 96 leaked clusters were found on the image.
@@ -610,7 +611,6 @@ No errors were found on the image.
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
 
 Event: l1_grow.activate_table; errno: 28; imm: off; once: off
-qcow2_free_clusters failed: No space left on device
 write failed: No space left on device
 
 96 leaked clusters were found on the image.
diff --git a/tests/qemu-iotests/026.out.nocache b/tests/qemu-iotests/026.out.nocache
new file mode 100644
index 0000000..33bad0d
--- /dev/null
+++ b/tests/qemu-iotests/026.out.nocache
@@ -0,0 +1,626 @@
+QA output created by 026
+Errors while writing 128 kB
+
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: l1_update; errno: 5; imm: off; once: on; write 
+write failed: Input/output error
+
+1 leaked clusters were found on the image.
+This means waste of disk space, but no harm to data.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: l1_update; errno: 5; imm: off; once: on; write -b
+write failed: Input/output error
+
+1 leaked clusters were found on the image.
+This means waste of disk space, but no harm to data.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: l1_update; errno: 5; imm: off; once: off; write 
+write failed: Input/output error
+
+1 leaked clusters were found on the image.
+This means waste of disk space, but no harm to data.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: l1_update; errno: 5; imm: off; once: off; write -b
+write failed: Input/output error
+
+1 leaked clusters were found on the image.
+This means waste of disk space, but no harm to data.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: l1_update; errno: 28; imm: off; once: on; write 
+write failed: No space left on device
+
+1 leaked clusters were found on the image.
+This means waste of disk space, but no harm to data.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: l1_update; errno: 28; imm: off; once: on; write -b
+write failed: No space left on device
+
+1 leaked clusters were found on the image.
+This means waste of disk space, but no harm to data.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: l1_update; errno: 28; imm: off; once: off; write 
+write failed: No space left on device
+
+1 leaked clusters were found on the image.
+This means waste of disk space, but no harm to data.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: l1_update; errno: 28; imm: off; once: off; write -b
+write failed: No space left on device
+
+1 leaked clusters were found on the image.
+This means waste of disk space, but no harm to data.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: l2_load; errno: 5; imm: off; once: on; write 
+wrote 131072/131072 bytes at offset 0
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+write failed: Input/output error
+read failed: Input/output error
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: l2_load; errno: 5; imm: off; once: on; write -b
+wrote 131072/131072 bytes at offset 0
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+write failed: Input/output error
+read failed: Input/output error
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: l2_load; errno: 5; imm: off; once: off; write 
+wrote 131072/131072 bytes at offset 0
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+write failed: Input/output error
+read failed: Input/output error
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: l2_load; errno: 5; imm: off; once: off; write -b
+wrote 131072/131072 bytes at offset 0
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+write failed: Input/output error
+read failed: Input/output error
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: l2_load; errno: 28; imm: off; once: on; write 
+wrote 131072/131072 bytes at offset 0
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+write failed: No space left on device
+read failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: l2_load; errno: 28; imm: off; once: on; write -b
+wrote 131072/131072 bytes at offset 0
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+write failed: No space left on device
+read failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: l2_load; errno: 28; imm: off; once: off; write 
+wrote 131072/131072 bytes at offset 0
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+write failed: No space left on device
+read failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: l2_load; errno: 28; imm: off; once: off; write -b
+wrote 131072/131072 bytes at offset 0
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+write failed: No space left on device
+read failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: l2_update; errno: 5; imm: off; once: on; write 
+wrote 131072/131072 bytes at offset 0
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+127 leaked clusters were found on the image.
+This means waste of disk space, but no harm to data.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: l2_update; errno: 5; imm: off; once: on; write -b
+wrote 131072/131072 bytes at offset 0
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+127 leaked clusters were found on the image.
+This means waste of disk space, but no harm to data.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: l2_update; errno: 5; imm: off; once: off; write 
+wrote 131072/131072 bytes at offset 0
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+127 leaked clusters were found on the image.
+This means waste of disk space, but no harm to data.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: l2_update; errno: 5; imm: off; once: off; write -b
+wrote 131072/131072 bytes at offset 0
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+127 leaked clusters were found on the image.
+This means waste of disk space, but no harm to data.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: l2_update; errno: 28; imm: off; once: on; write 
+wrote 131072/131072 bytes at offset 0
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+127 leaked clusters were found on the image.
+This means waste of disk space, but no harm to data.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: l2_update; errno: 28; imm: off; once: on; write -b
+wrote 131072/131072 bytes at offset 0
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+127 leaked clusters were found on the image.
+This means waste of disk space, but no harm to data.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: l2_update; errno: 28; imm: off; once: off; write 
+wrote 131072/131072 bytes at offset 0
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+127 leaked clusters were found on the image.
+This means waste of disk space, but no harm to data.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: l2_update; errno: 28; imm: off; once: off; write -b
+wrote 131072/131072 bytes at offset 0
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+127 leaked clusters were found on the image.
+This means waste of disk space, but no harm to data.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: l2_alloc.write; errno: 5; imm: off; once: on; write 
+write failed: Input/output error
+
+1 leaked clusters were found on the image.
+This means waste of disk space, but no harm to data.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: l2_alloc.write; errno: 5; imm: off; once: on; write -b
+write failed: Input/output error
+
+1 leaked clusters were found on the image.
+This means waste of disk space, but no harm to data.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: l2_alloc.write; errno: 5; imm: off; once: off; write 
+write failed: Input/output error
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: l2_alloc.write; errno: 5; imm: off; once: off; write -b
+write failed: Input/output error
+
+1 leaked clusters were found on the image.
+This means waste of disk space, but no harm to data.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: l2_alloc.write; errno: 28; imm: off; once: on; write 
+write failed: No space left on device
+
+1 leaked clusters were found on the image.
+This means waste of disk space, but no harm to data.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: l2_alloc.write; errno: 28; imm: off; once: on; write -b
+write failed: No space left on device
+
+1 leaked clusters were found on the image.
+This means waste of disk space, but no harm to data.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: l2_alloc.write; errno: 28; imm: off; once: off; write 
+write failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: l2_alloc.write; errno: 28; imm: off; once: off; write -b
+write failed: No space left on device
+
+1 leaked clusters were found on the image.
+This means waste of disk space, but no harm to data.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: write_aio; errno: 5; imm: off; once: on; write 
+write failed: Input/output error
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: write_aio; errno: 5; imm: off; once: on; write -b
+write failed: Input/output error
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: write_aio; errno: 5; imm: off; once: off; write 
+write failed: Input/output error
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: write_aio; errno: 5; imm: off; once: off; write -b
+write failed: Input/output error
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: write_aio; errno: 28; imm: off; once: on; write 
+write failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: write_aio; errno: 28; imm: off; once: on; write -b
+write failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: write_aio; errno: 28; imm: off; once: off; write 
+write failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: write_aio; errno: 28; imm: off; once: off; write -b
+write failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: refblock_load; errno: 5; imm: off; once: on; write 
+write failed: Input/output error
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: refblock_load; errno: 5; imm: off; once: on; write -b
+write failed: Input/output error
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: refblock_load; errno: 5; imm: off; once: off; write 
+write failed: Input/output error
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: refblock_load; errno: 5; imm: off; once: off; write -b
+write failed: Input/output error
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: refblock_load; errno: 28; imm: off; once: on; write 
+write failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: refblock_load; errno: 28; imm: off; once: on; write -b
+write failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: refblock_load; errno: 28; imm: off; once: off; write 
+write failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: refblock_load; errno: 28; imm: off; once: off; write -b
+write failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: refblock_update_part; errno: 5; imm: off; once: on; write 
+write failed: Input/output error
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: refblock_update_part; errno: 5; imm: off; once: on; write -b
+write failed: Input/output error
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: refblock_update_part; errno: 5; imm: off; once: off; write 
+write failed: Input/output error
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: refblock_update_part; errno: 5; imm: off; once: off; write -b
+write failed: Input/output error
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: refblock_update_part; errno: 28; imm: off; once: on; write 
+write failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: refblock_update_part; errno: 28; imm: off; once: on; write -b
+write failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: refblock_update_part; errno: 28; imm: off; once: off; write 
+write failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: refblock_update_part; errno: 28; imm: off; once: off; write -b
+write failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: refblock_alloc; errno: 5; imm: off; once: on; write 
+write failed: Input/output error
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: refblock_alloc; errno: 5; imm: off; once: on; write -b
+write failed: Input/output error
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: refblock_alloc; errno: 5; imm: off; once: off; write 
+write failed: Input/output error
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: refblock_alloc; errno: 5; imm: off; once: off; write -b
+write failed: Input/output error
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: refblock_alloc; errno: 28; imm: off; once: on; write 
+write failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: refblock_alloc; errno: 28; imm: off; once: on; write -b
+write failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: refblock_alloc; errno: 28; imm: off; once: off; write 
+write failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: refblock_alloc; errno: 28; imm: off; once: off; write -b
+write failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: cluster_alloc; errno: 5; imm: off; once: on; write 
+write failed: Input/output error
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: cluster_alloc; errno: 5; imm: off; once: on; write -b
+write failed: Input/output error
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: cluster_alloc; errno: 5; imm: off; once: off; write 
+write failed: Input/output error
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: cluster_alloc; errno: 5; imm: off; once: off; write -b
+write failed: Input/output error
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: cluster_alloc; errno: 28; imm: off; once: on; write 
+write failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: cluster_alloc; errno: 28; imm: off; once: on; write -b
+write failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: cluster_alloc; errno: 28; imm: off; once: off; write 
+write failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: cluster_alloc; errno: 28; imm: off; once: off; write -b
+write failed: No space left on device
+No errors were found on the image.
+
+=== Refcout table growth tests ===
+
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: refblock_alloc.hookup; errno: 28; imm: off; once: on; write 
+write failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: refblock_alloc.hookup; errno: 28; imm: off; once: on; write -b
+write failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: refblock_alloc.hookup; errno: 28; imm: off; once: off; write 
+write failed: No space left on device
+
+55 leaked clusters were found on the image.
+This means waste of disk space, but no harm to data.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: refblock_alloc.hookup; errno: 28; imm: off; once: off; write -b
+write failed: No space left on device
+
+251 leaked clusters were found on the image.
+This means waste of disk space, but no harm to data.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: refblock_alloc.write; errno: 28; imm: off; once: on; write 
+write failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: refblock_alloc.write; errno: 28; imm: off; once: on; write -b
+write failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: refblock_alloc.write; errno: 28; imm: off; once: off; write 
+write failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: refblock_alloc.write; errno: 28; imm: off; once: off; write -b
+write failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: refblock_alloc.write_blocks; errno: 28; imm: off; once: on; write 
+write failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: refblock_alloc.write_blocks; errno: 28; imm: off; once: on; write -b
+write failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: refblock_alloc.write_blocks; errno: 28; imm: off; once: off; write 
+write failed: No space left on device
+
+10 leaked clusters were found on the image.
+This means waste of disk space, but no harm to data.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: refblock_alloc.write_blocks; errno: 28; imm: off; once: off; write -b
+write failed: No space left on device
+
+23 leaked clusters were found on the image.
+This means waste of disk space, but no harm to data.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: refblock_alloc.write_table; errno: 28; imm: off; once: on; write 
+write failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: refblock_alloc.write_table; errno: 28; imm: off; once: on; write -b
+write failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: refblock_alloc.write_table; errno: 28; imm: off; once: off; write 
+write failed: No space left on device
+
+10 leaked clusters were found on the image.
+This means waste of disk space, but no harm to data.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: refblock_alloc.write_table; errno: 28; imm: off; once: off; write -b
+write failed: No space left on device
+
+23 leaked clusters were found on the image.
+This means waste of disk space, but no harm to data.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: refblock_alloc.switch_table; errno: 28; imm: off; once: on; write 
+write failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: refblock_alloc.switch_table; errno: 28; imm: off; once: on; write -b
+write failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: refblock_alloc.switch_table; errno: 28; imm: off; once: off; write 
+write failed: No space left on device
+
+10 leaked clusters were found on the image.
+This means waste of disk space, but no harm to data.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: refblock_alloc.switch_table; errno: 28; imm: off; once: off; write -b
+write failed: No space left on device
+
+23 leaked clusters were found on the image.
+This means waste of disk space, but no harm to data.
+
+=== L1 growth tests ===
+
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: l1_grow.alloc_table; errno: 5; imm: off; once: on
+write failed: Input/output error
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: l1_grow.alloc_table; errno: 5; imm: off; once: off
+write failed: Input/output error
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: l1_grow.alloc_table; errno: 28; imm: off; once: on
+write failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: l1_grow.alloc_table; errno: 28; imm: off; once: off
+write failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: l1_grow.write_table; errno: 5; imm: off; once: on
+write failed: Input/output error
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: l1_grow.write_table; errno: 5; imm: off; once: off
+write failed: Input/output error
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: l1_grow.write_table; errno: 28; imm: off; once: on
+write failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: l1_grow.write_table; errno: 28; imm: off; once: off
+write failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: l1_grow.activate_table; errno: 5; imm: off; once: on
+write failed: Input/output error
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: l1_grow.activate_table; errno: 5; imm: off; once: off
+write failed: Input/output error
+
+96 leaked clusters were found on the image.
+This means waste of disk space, but no harm to data.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: l1_grow.activate_table; errno: 28; imm: off; once: on
+write failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
+
+Event: l1_grow.activate_table; errno: 28; imm: off; once: off
+write failed: No space left on device
+
+96 leaked clusters were found on the image.
+This means waste of disk space, but no harm to data.
+*** done
diff --git a/tests/qemu-iotests/check b/tests/qemu-iotests/check
index e51bae8..4ecf497 100755
--- a/tests/qemu-iotests/check
+++ b/tests/qemu-iotests/check
@@ -239,12 +239,18 @@ do
                 echo -n " [failed, exit status $sts]"
                 err=true
             fi
-            if [ ! -f $seq.out ]
+
+            reference=$seq.out
+            if (echo $QEMU_IO_OPTIONS | grep -s -- '--nocache' > /dev/null); then
+                [ -f $seq.out.nocache ] && reference=$seq.out.nocache
+            fi
+
+            if [ ! -f $reference ]
             then
                 echo " - no qualified output"
                 err=true
             else
-                if diff -w $seq.out $tmp.out >/dev/null 2>&1
+                if diff -w $reference $tmp.out >/dev/null 2>&1
                 then
                     echo ""
                     if $err
@@ -256,7 +262,7 @@ do
                 else
                     echo " - output mismatch (see $seq.out.bad)"
                     mv $tmp.out $seq.out.bad
-                    $diff -w $seq.out $seq.out.bad
+                    $diff -w $reference $seq.out.bad
                     err=true
                 fi
             fi
commit 79e40ab10e1f4450c11ab8430cb2547146ded639
Author: Kevin Wolf <kwolf at redhat.com>
Date:   Wed Sep 4 13:16:04 2013 +0200

    qemu-iotests: Whitespace cleanup
    
    These scripts used to have a four characters indentation, with eight
    consecutive spaces converted into a tab. Convert everything into spaces.
    
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/tests/qemu-iotests/check b/tests/qemu-iotests/check
index 74628ae..e51bae8 100755
--- a/tests/qemu-iotests/check
+++ b/tests/qemu-iotests/check
@@ -78,50 +78,50 @@ _wrapup()
 
     if $showme
     then
-	:
+        :
     elif $needwrap
     then
-	if [ -f check.time -a -f $tmp.time ]
-	then
-	    cat check.time $tmp.time \
-	    | $AWK_PROG '
-	{ t[$1] = $2 }
-END	{ if (NR > 0) {
-	    for (i in t) print i " " t[i]
-	  }
-	}' \
-	    | sort -n >$tmp.out
-	    mv $tmp.out check.time
-	fi
-
-	if [ -f $tmp.expunged ]
-	then
-	    notrun=`wc -l <$tmp.expunged | sed -e 's/  *//g'`
-	    try=`expr $try - $notrun`
-	    list=`echo "$list" | sed -f $tmp.expunged`
-	fi
-
-	echo "" >>check.log
-	date >>check.log
-	echo $list | fmt | sed -e 's/^/    /' >>check.log
-	$interrupt && echo "Interrupted!" >>check.log
-        
-	if [ ! -z "$notrun" ]
-	then
-	    echo "Not run:$notrun"
-	    echo "Not run:$notrun" >>check.log
-	fi
+        if [ -f check.time -a -f $tmp.time ]
+        then
+            cat check.time $tmp.time \
+            | $AWK_PROG '
+        { t[$1] = $2 }
+END        { if (NR > 0) {
+            for (i in t) print i " " t[i]
+          }
+        }' \
+            | sort -n >$tmp.out
+            mv $tmp.out check.time
+        fi
+
+        if [ -f $tmp.expunged ]
+        then
+            notrun=`wc -l <$tmp.expunged | sed -e 's/  *//g'`
+            try=`expr $try - $notrun`
+            list=`echo "$list" | sed -f $tmp.expunged`
+        fi
+
+        echo "" >>check.log
+        date >>check.log
+        echo $list | fmt | sed -e 's/^/    /' >>check.log
+        $interrupt && echo "Interrupted!" >>check.log
+
+        if [ ! -z "$notrun" ]
+        then
+            echo "Not run:$notrun"
+            echo "Not run:$notrun" >>check.log
+        fi
         if [ ! -z "$n_bad" -a $n_bad != 0 ]
-	then
-	    echo "Failures:$bad"
-	    echo "Failed $n_bad of $try tests"
-	    echo "Failures:$bad" | fmt >>check.log
-	    echo "Failed $n_bad of $try tests" >>check.log
-	else
-	    echo "Passed all $try tests"
-	    echo "Passed all $try tests" >>check.log
-	fi
-	needwrap=false
+        then
+            echo "Failures:$bad"
+            echo "Failed $n_bad of $try tests"
+            echo "Failures:$bad" | fmt >>check.log
+            echo "Failed $n_bad of $try tests" >>check.log
+        else
+            echo "Passed all $try tests"
+            echo "Passed all $try tests" >>check.log
+        fi
+        needwrap=false
     fi
 
     rm -f /tmp/*.out /tmp/*.err /tmp/*.time
@@ -185,82 +185,82 @@ do
 
     if $showme
     then
-	echo
-	continue
-    elif [ -f expunged ] && $expunge && egrep "^$seq([ 	]|\$)" expunged >/dev/null
+        echo
+        continue
+    elif [ -f expunged ] && $expunge && egrep "^$seq([         ]|\$)" expunged >/dev/null
     then
-	echo " - expunged"
-	rm -f $seq.out.bad
-	echo "/^$seq\$/d" >>$tmp.expunged
+        echo " - expunged"
+        rm -f $seq.out.bad
+        echo "/^$seq\$/d" >>$tmp.expunged
     elif [ ! -f $seq ]
     then
-	echo " - no such test?"
-	echo "/^$seq\$/d" >>$tmp.expunged
+        echo " - no such test?"
+        echo "/^$seq\$/d" >>$tmp.expunged
     else
-	# really going to try and run this one
-	#
-	rm -f $seq.out.bad
-	lasttime=`sed -n -e "/^$seq /s/.* //p" <check.time`
-	if [ "X$lasttime" != X ]; then
-		echo -n " ${lasttime}s ..."
-	else
-		echo -n "	"	# prettier output with timestamps.
-	fi
-	rm -f core $seq.notrun
-
-	# for hangcheck ...
-	echo "$seq" >/tmp/check.sts
-
-	start=`_wallclock`
-	$timestamp && echo -n "	["`date "+%T"`"]"
-	[ ! -x $seq ] && chmod u+x $seq # ensure we can run it
-	MALLOC_PERTURB_=${MALLOC_PERTURB_:-$(($RANDOM % 255 + 1))} \
-		./$seq >$tmp.out 2>&1
-	sts=$?
-	$timestamp && _timestamp
-	stop=`_wallclock`
-
-	if [ -f core ]
-	then
-	    echo -n " [dumped core]"
-	    mv core $seq.core
-	    err=true
-	fi
-
-	if [ -f $seq.notrun ]
-	then
-	    $timestamp || echo -n " [not run] "
-	    $timestamp && echo " [not run]" && echo -n "	$seq -- "
-	    cat $seq.notrun
-	    notrun="$notrun $seq"
-	else
-	    if [ $sts -ne 0 ]
-	    then
-		echo -n " [failed, exit status $sts]"
-		err=true
-	    fi
-	    if [ ! -f $seq.out ]
-	    then
-		echo " - no qualified output"
-		err=true
-	    else
-		if diff -w $seq.out $tmp.out >/dev/null 2>&1
-		then
-		    echo ""
-		    if $err
-		    then
-			:
-		    else
-			echo "$seq `expr $stop - $start`" >>$tmp.time
-		    fi
-		else
-		    echo " - output mismatch (see $seq.out.bad)"
-		    mv $tmp.out $seq.out.bad
-		    $diff -w $seq.out $seq.out.bad
-		    err=true
-		fi
-	    fi
-	fi
+        # really going to try and run this one
+        #
+        rm -f $seq.out.bad
+        lasttime=`sed -n -e "/^$seq /s/.* //p" <check.time`
+        if [ "X$lasttime" != X ]; then
+                echo -n " ${lasttime}s ..."
+        else
+                echo -n "        "        # prettier output with timestamps.
+        fi
+        rm -f core $seq.notrun
+
+        # for hangcheck ...
+        echo "$seq" >/tmp/check.sts
+
+        start=`_wallclock`
+        $timestamp && echo -n "        ["`date "+%T"`"]"
+        [ ! -x $seq ] && chmod u+x $seq # ensure we can run it
+        MALLOC_PERTURB_=${MALLOC_PERTURB_:-$(($RANDOM % 255 + 1))} \
+                ./$seq >$tmp.out 2>&1
+        sts=$?
+        $timestamp && _timestamp
+        stop=`_wallclock`
+
+        if [ -f core ]
+        then
+            echo -n " [dumped core]"
+            mv core $seq.core
+            err=true
+        fi
+
+        if [ -f $seq.notrun ]
+        then
+            $timestamp || echo -n " [not run] "
+            $timestamp && echo " [not run]" && echo -n "        $seq -- "
+            cat $seq.notrun
+            notrun="$notrun $seq"
+        else
+            if [ $sts -ne 0 ]
+            then
+                echo -n " [failed, exit status $sts]"
+                err=true
+            fi
+            if [ ! -f $seq.out ]
+            then
+                echo " - no qualified output"
+                err=true
+            else
+                if diff -w $seq.out $tmp.out >/dev/null 2>&1
+                then
+                    echo ""
+                    if $err
+                    then
+                        :
+                    else
+                        echo "$seq `expr $stop - $start`" >>$tmp.time
+                    fi
+                else
+                    echo " - output mismatch (see $seq.out.bad)"
+                    mv $tmp.out $seq.out.bad
+                    $diff -w $seq.out $seq.out.bad
+                    err=true
+                fi
+            fi
+        fi
 
     fi
 
@@ -268,12 +268,12 @@ do
     #
     if $err
     then
-	bad="$bad $seq"
-	n_bad=`expr $n_bad + 1`
-	quick=false
+        bad="$bad $seq"
+        n_bad=`expr $n_bad + 1`
+        quick=false
     fi
     [ -f $seq.notrun ] || try=`expr $try + 1`
-    
+
     seq="after_$seq"
 done
 
diff --git a/tests/qemu-iotests/common b/tests/qemu-iotests/common
index 6826ea7..fecaf85 100644
--- a/tests/qemu-iotests/common
+++ b/tests/qemu-iotests/common
@@ -54,58 +54,58 @@ do
 
     if $group
     then
-	# arg after -g
-	group_list=`sed -n <group -e 's/$/ /' -e "/^[0-9][0-9][0-9].* $r /"'{
+        # arg after -g
+        group_list=`sed -n <group -e 's/$/ /' -e "/^[0-9][0-9][0-9].* $r /"'{
 s/ .*//p
 }'`
-	if [ -z "$group_list" ]
-	then
-	    echo "Group \"$r\" is empty or not defined?"
-	    exit 1
-	fi
-	[ ! -s $tmp.list ] && touch $tmp.list
-	for t in $group_list
-	do
-	    if grep -s "^$t\$" $tmp.list >/dev/null
-	    then
-		:
-	    else
-		echo "$t" >>$tmp.list
-	    fi
-	done
-	group=false
-	continue
+        if [ -z "$group_list" ]
+        then
+            echo "Group \"$r\" is empty or not defined?"
+            exit 1
+        fi
+        [ ! -s $tmp.list ] && touch $tmp.list
+        for t in $group_list
+        do
+            if grep -s "^$t\$" $tmp.list >/dev/null
+            then
+                :
+            else
+                echo "$t" >>$tmp.list
+            fi
+        done
+        group=false
+        continue
 
     elif $xgroup
     then
-	# arg after -x
-	[ ! -s $tmp.list ] && ls [0-9][0-9][0-9] [0-9][0-9][0-9][0-9] >$tmp.list 2>/dev/null
-	group_list=`sed -n <group -e 's/$/ /' -e "/^[0-9][0-9][0-9].* $r /"'{
+        # arg after -x
+        [ ! -s $tmp.list ] && ls [0-9][0-9][0-9] [0-9][0-9][0-9][0-9] >$tmp.list 2>/dev/null
+        group_list=`sed -n <group -e 's/$/ /' -e "/^[0-9][0-9][0-9].* $r /"'{
 s/ .*//p
 }'`
-	if [ -z "$group_list" ]
-	then
-	    echo "Group \"$r\" is empty or not defined?"
-	    exit 1
-	fi
-	numsed=0
-	rm -f $tmp.sed
-	for t in $group_list
-	do
-	    if [ $numsed -gt 100 ]
-	    then
-		sed -f $tmp.sed <$tmp.list >$tmp.tmp
-		mv $tmp.tmp $tmp.list
-		numsed=0
-		rm -f $tmp.sed
-	    fi
-	    echo "/^$t\$/d" >>$tmp.sed
-	    numsed=`expr $numsed + 1`
-	done
-	sed -f $tmp.sed <$tmp.list >$tmp.tmp
-	mv $tmp.tmp $tmp.list
-	xgroup=false
-	continue
+        if [ -z "$group_list" ]
+        then
+            echo "Group \"$r\" is empty or not defined?"
+            exit 1
+        fi
+        numsed=0
+        rm -f $tmp.sed
+        for t in $group_list
+        do
+            if [ $numsed -gt 100 ]
+            then
+                sed -f $tmp.sed <$tmp.list >$tmp.tmp
+                mv $tmp.tmp $tmp.list
+                numsed=0
+                rm -f $tmp.sed
+            fi
+            echo "/^$t\$/d" >>$tmp.sed
+            numsed=`expr $numsed + 1`
+        done
+        sed -f $tmp.sed <$tmp.list >$tmp.tmp
+        mv $tmp.tmp $tmp.list
+        xgroup=false
+        continue
 
     elif $imgopts
     then
@@ -119,11 +119,11 @@ s/ .*//p
     case "$r"
     in
 
-	-\? | -h | --help)	# usage
-	    echo "Usage: $0 [options] [testlist]"'
+        -\? | -h | --help)        # usage
+            echo "Usage: $0 [options] [testlist]"'
 
 common options
-    -v			verbose
+    -v                        verbose
 
 check options
     -raw                test raw (default)
@@ -138,162 +138,162 @@ check options
     -sheepdog           test sheepdog
     -nbd                test nbd
     -ssh                test ssh
-    -xdiff		graphical mode diff
-    -nocache		use O_DIRECT on backing file
-    -misalign		misalign memory allocations
-    -n			show me, do not run tests
+    -xdiff                graphical mode diff
+    -nocache                use O_DIRECT on backing file
+    -misalign                misalign memory allocations
+    -n                        show me, do not run tests
     -o options          -o options to pass to qemu-img create/convert
-    -T			output timestamps
-    -r 			randomize test order
-    
+    -T                        output timestamps
+    -r                         randomize test order
+
 testlist options
-    -g group[,group...]	include tests from these groups
-    -x group[,group...]	exclude tests from these groups
-    NNN			include test NNN
-    NNN-NNN		include test range (eg. 012-021)
+    -g group[,group...]        include tests from these groups
+    -x group[,group...]        exclude tests from these groups
+    NNN                        include test NNN
+    NNN-NNN                include test range (eg. 012-021)
 '
-	    exit 0
-	    ;;
-
-	-raw)
-	    IMGFMT=raw
-	    xpand=false
-	    ;;
-
-	-cow)
-	    IMGFMT=cow
-	    xpand=false
-	    ;;
-
-	-qcow)
-	    IMGFMT=qcow
-	    xpand=false
-	    ;;
-
-	-qcow2)
-	    IMGFMT=qcow2
-	    xpand=false
-	    ;;
-
-	-qed)
-	    IMGFMT=qed
-	    xpand=false
-	    ;;
-
-	-vdi)
-	    IMGFMT=vdi
-	    xpand=false
-	    ;;
-
-	-vmdk)
-	    IMGFMT=vmdk
-	    xpand=false
-	    ;;
-
-	-vpc)
-	    IMGFMT=vpc
-	    xpand=false
-	    ;;
-
-	-rbd)
-	    IMGPROTO=rbd
-	    xpand=false
-	    ;;
-	-sheepdog)
-	    IMGPROTO=sheepdog
-	    xpand=false
-	    ;;
-	-nbd)
-	    IMGPROTO=nbd
-	    xpand=false
-	    ;;
+            exit 0
+            ;;
+
+        -raw)
+            IMGFMT=raw
+            xpand=false
+            ;;
+
+        -cow)
+            IMGFMT=cow
+            xpand=false
+            ;;
+
+        -qcow)
+            IMGFMT=qcow
+            xpand=false
+            ;;
+
+        -qcow2)
+            IMGFMT=qcow2
+            xpand=false
+            ;;
+
+        -qed)
+            IMGFMT=qed
+            xpand=false
+            ;;
+
+        -vdi)
+            IMGFMT=vdi
+            xpand=false
+            ;;
+
+        -vmdk)
+            IMGFMT=vmdk
+            xpand=false
+            ;;
+
+        -vpc)
+            IMGFMT=vpc
+            xpand=false
+            ;;
+
+        -rbd)
+            IMGPROTO=rbd
+            xpand=false
+            ;;
+        -sheepdog)
+            IMGPROTO=sheepdog
+            xpand=false
+            ;;
+        -nbd)
+            IMGPROTO=nbd
+            xpand=false
+            ;;
         -ssh)
             IMGPROTO=ssh
             xpand=false
             ;;
-	-nocache)
-	    QEMU_IO_OPTIONS="$QEMU_IO_OPTIONS --nocache"
-	    xpand=false
-	    ;;
+        -nocache)
+            QEMU_IO_OPTIONS="$QEMU_IO_OPTIONS --nocache"
+            xpand=false
+            ;;
 
-	-misalign)
-	    QEMU_IO_OPTIONS="$QEMU_IO_OPTIONS --misalign"
-	    xpand=false
-	    ;;
+        -misalign)
+            QEMU_IO_OPTIONS="$QEMU_IO_OPTIONS --misalign"
+            xpand=false
+            ;;
 
     -valgrind)
         valgrind=true
-	    xpand=false
+            xpand=false
         ;;
 
-	-g)	# -g group ... pick from group file
-	    group=true
-	    xpand=false
-	    ;;
-
-	-xdiff)	# graphical diff mode
-	    xpand=false
-
-	    if [ ! -z "$DISPLAY" ]
-	    then
-		which xdiff >/dev/null 2>&1 && diff=xdiff
-		which gdiff >/dev/null 2>&1 && diff=gdiff
-		which tkdiff >/dev/null 2>&1 && diff=tkdiff
-		which xxdiff >/dev/null 2>&1 && diff=xxdiff
-	    fi
-	    ;;
-
-	-n)	# show me, don't do it
-	    showme=true
-	    xpand=false
-	    ;;
+        -g)        # -g group ... pick from group file
+            group=true
+            xpand=false
+            ;;
+
+        -xdiff)        # graphical diff mode
+            xpand=false
+
+            if [ ! -z "$DISPLAY" ]
+            then
+                which xdiff >/dev/null 2>&1 && diff=xdiff
+                which gdiff >/dev/null 2>&1 && diff=gdiff
+                which tkdiff >/dev/null 2>&1 && diff=tkdiff
+                which xxdiff >/dev/null 2>&1 && diff=xxdiff
+            fi
+            ;;
+
+        -n)        # show me, don't do it
+            showme=true
+            xpand=false
+            ;;
         -o)
             imgopts=true
             xpand=false
             ;;
-        -r)	# randomize test order
-	    randomize=true
-	    xpand=false
-	    ;;
-
-	-T)	# turn on timestamp output
-	    timestamp=true
-	    xpand=false
-	    ;;
-
-	-v)
-	    verbose=true
-	    xpand=false
-	    ;;
-	-x)	# -x group ... exclude from group file
-	    xgroup=true
-	    xpand=false
-	    ;;
-	'[0-9][0-9][0-9] [0-9][0-9][0-9][0-9]')
-	    echo "No tests?"
-	    status=1
-	    exit $status
-	    ;;
-
-	[0-9]*-[0-9]*)
-	    eval `echo $r | sed -e 's/^/start=/' -e 's/-/ end=/'`
-	    ;;
-
-	[0-9]*-)
-	    eval `echo $r | sed -e 's/^/start=/' -e 's/-//'`
-	    end=`echo [0-9][0-9][0-9] [0-9][0-9][0-9][0-9] | sed -e 's/\[0-9]//g' -e 's/  *$//' -e 's/.* //'`
-	    if [ -z "$end" ]
-	    then
-		echo "No tests in range \"$r\"?"
-		status=1
-		exit $status
-	    fi
-	    ;;
-
-	*)
-	    start=$r
-	    end=$r
-	    ;;
+        -r)        # randomize test order
+            randomize=true
+            xpand=false
+            ;;
+
+        -T)        # turn on timestamp output
+            timestamp=true
+            xpand=false
+            ;;
+
+        -v)
+            verbose=true
+            xpand=false
+            ;;
+        -x)        # -x group ... exclude from group file
+            xgroup=true
+            xpand=false
+            ;;
+        '[0-9][0-9][0-9] [0-9][0-9][0-9][0-9]')
+            echo "No tests?"
+            status=1
+            exit $status
+            ;;
+
+        [0-9]*-[0-9]*)
+            eval `echo $r | sed -e 's/^/start=/' -e 's/-/ end=/'`
+            ;;
+
+        [0-9]*-)
+            eval `echo $r | sed -e 's/^/start=/' -e 's/-//'`
+            end=`echo [0-9][0-9][0-9] [0-9][0-9][0-9][0-9] | sed -e 's/\[0-9]//g' -e 's/  *$//' -e 's/.* //'`
+            if [ -z "$end" ]
+            then
+                echo "No tests in range \"$r\"?"
+                status=1
+                exit $status
+            fi
+            ;;
+
+        *)
+            start=$r
+            end=$r
+            ;;
 
     esac
 
@@ -303,26 +303,26 @@ testlist options
 
     if $xpand
     then
-	have_test_arg=true
-	$AWK_PROG </dev/null '
-BEGIN	{ for (t='$start'; t<='$end'; t++) printf "%03d\n",t }' \
-	| while read id
-	do
-	    if grep -s "^$id " group >/dev/null
-	    then
-		# in group file ... OK
-		echo $id >>$tmp.list
-	    else
-		if [ -f expunged ] && $expunge && egrep "^$id([ 	]|\$)" expunged >/dev/null
-		then
-		    # expunged ... will be reported, but not run, later
-		    echo $id >>$tmp.list
-		else
-		    # oops
-		    echo "$id - unknown test, ignored"
-		fi
-	    fi
-	done
+        have_test_arg=true
+        $AWK_PROG </dev/null '
+BEGIN        { for (t='$start'; t<='$end'; t++) printf "%03d\n",t }' \
+        | while read id
+        do
+            if grep -s "^$id " group >/dev/null
+            then
+                # in group file ... OK
+                echo $id >>$tmp.list
+            else
+                if [ -f expunged ] && $expunge && egrep "^$id([         ]|\$)" expunged >/dev/null
+                then
+                    # expunged ... will be reported, but not run, later
+                    echo $id >>$tmp.list
+                else
+                    # oops
+                    echo "$id - unknown test, ignored"
+                fi
+            fi
+        done
     fi
 
 done
@@ -337,11 +337,11 @@ then
 else
     if $have_test_arg
     then
-	# had test numbers, but none in group file ... do nothing
-	touch $tmp.list
+        # had test numbers, but none in group file ... do nothing
+        touch $tmp.list
     else
-	# no test numbers, do everything from group file
-	sed -n -e '/^[0-9][0-9][0-9]*/s/[ 	].*//p' <group >$tmp.list
+        # no test numbers, do everything from group file
+        sed -n -e '/^[0-9][0-9][0-9]*/s/[         ].*//p' <group >$tmp.list
     fi
 fi
 
diff --git a/tests/qemu-iotests/common.config b/tests/qemu-iotests/common.config
index 08a3f10..d794e62 100644
--- a/tests/qemu-iotests/common.config
+++ b/tests/qemu-iotests/common.config
@@ -19,7 +19,7 @@
 # setup and check for config parameters, and in particular
 #
 # EMAIL -           email of the script runner.
-# TEST_DIR -        scratch test directory 
+# TEST_DIR -        scratch test directory
 #
 # - These can be added to $HOST_CONFIG_DIR (witch default to ./config)
 #   below or a separate local configuration file can be used (using
@@ -111,11 +111,11 @@ export QEMU_NBD=$QEMU_NBD_PROG
 [ -f /etc/qemu-iotest.config ]       && . /etc/qemu-iotest.config
 
 if [ -z "$TEST_DIR" ]; then
-	TEST_DIR=`pwd`/scratch
+        TEST_DIR=`pwd`/scratch
 fi
 
 if [ ! -e "$TEST_DIR" ]; then
-	mkdir "$TEST_DIR"
+        mkdir "$TEST_DIR"
 fi
 
 if [ ! -d "$TEST_DIR" ]; then
diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter
index 97a31ff..5dfda63 100644
--- a/tests/qemu-iotests/common.filter
+++ b/tests/qemu-iotests/common.filter
@@ -25,19 +25,19 @@
 # Outputs suitable message to stdout if it's not in range.
 #
 # A verbose option, -v, may be used as the LAST argument
-# 
-# e.g. 
+#
+# e.g.
 # foo: 0.0298 = 0.03 +/- 5%
-# _within_tolerance "foo" 0.0298 0.03 5%  
-# 
+# _within_tolerance "foo" 0.0298 0.03 5%
+#
 # foo: 0.0298 = 0.03 +/- 0.01
 # _within_tolerance "foo" 0.0298 0.03 0.01
 #
 # foo: 0.0298 = 0.03 -0.01 +0.002
 # _within_tolerance "foo" 0.0298 0.03 0.01 0.002
 #
-# foo: verbose output of 0.0298 = 0.03 +/- 5% 
-# _within_tolerance "foo" 0.0298 0.03 5% -v 
+# foo: verbose output of 0.0298 = 0.03 +/- 5%
+# _within_tolerance "foo" 0.0298 0.03 5% -v
 _within_tolerance()
 {
   _name=$1
@@ -51,10 +51,10 @@ _within_tolerance()
   # maxtol arg is optional
   # verbose arg is optional
   if [ $# -ge 5 ]
-  then 
+  then
      if [ "$5" = "-v" ]
      then
-	_verbose=1
+        _verbose=1
      else
         _maxtol=$5
      fi
@@ -65,18 +65,18 @@ _within_tolerance()
   fi
 
   # find min with or without %
-  _mintolerance=`echo $_mintol | sed -e 's/%//'` 
+  _mintolerance=`echo $_mintol | sed -e 's/%//'`
   if [ $_mintol = $_mintolerance ]
-  then 
+  then
       _min=`echo "scale=5; $_correct_val-$_mintolerance" | bc`
   else
       _min=`echo "scale=5; $_correct_val-$_mintolerance*0.01*$_correct_val" | bc`
   fi
 
   # find max with or without %
-  _maxtolerance=`echo $_maxtol | sed -e 's/%//'` 
+  _maxtolerance=`echo $_maxtol | sed -e 's/%//'`
   if [ $_maxtol = $_maxtolerance ]
-  then 
+  then
       _max=`echo "scale=5; $_correct_val+$_maxtolerance" | bc`
   else
       _max=`echo "scale=5; $_correct_val+$_maxtolerance*0.01*$_correct_val" | bc`
@@ -88,7 +88,7 @@ _within_tolerance()
   cat <<EOF >$tmp.bc.1
 scale=5;
 if ($_min <= $_given_val) 1;
-if ($_min > $_given_val) 0; 
+if ($_min > $_given_val) 0;
 EOF
 
   cat <<EOF >$tmp.bc.2
@@ -102,21 +102,21 @@ EOF
 
   rm -f $tmp.bc.[12]
 
-  _in_range=`expr $_above_min \& $_below_max` 
+  _in_range=`expr $_above_min \& $_below_max`
 
   # fix up min, max precision for output
   # can vary for 5.3, 6.2
   _min=`echo $_min | sed -e 's/0*$//'` # get rid of trailling zeroes
   _max=`echo $_max | sed -e 's/0*$//'` # get rid of trailling zeroes
 
-  if [ $_in_range -eq 1 ] 
+  if [ $_in_range -eq 1 ]
   then
-	[ $_verbose -eq 1 ] && echo $_name is in range
-	return 0
+        [ $_verbose -eq 1 ] && echo $_name is in range
+        return 0
   else
-	[ $_verbose -eq 1 ] && echo $_name has value of $_given_val
-	[ $_verbose -eq 1 ] && echo $_name is NOT in range $_min .. $_max	
-	return 1
+        [ $_verbose -eq 1 ] && echo $_name has value of $_given_val
+        [ $_verbose -eq 1 ] && echo $_name is NOT in range $_min .. $_max
+        return 1
   fi
 }
 
@@ -125,7 +125,7 @@ EOF
 _filter_date()
 {
     sed \
-	-e 's/[A-Z][a-z][a-z] [A-z][a-z][a-z]  *[0-9][0-9]* [0-9][0-9]:[0-9][0-9]:[0-9][0-9] [0-9][0-9][0-9][0-9]$/DATE/'
+        -e 's/[A-Z][a-z][a-z] [A-z][a-z][a-z]  *[0-9][0-9]* [0-9][0-9]:[0-9][0-9]:[0-9][0-9] [0-9][0-9][0-9][0-9]$/DATE/'
 }
 
 # replace occurrences of the actual TEST_DIR value with TEST_DIR
diff --git a/tests/qemu-iotests/common.pattern b/tests/qemu-iotests/common.pattern
index 85a40ee..00e0f60 100644
--- a/tests/qemu-iotests/common.pattern
+++ b/tests/qemu-iotests/common.pattern
@@ -106,8 +106,8 @@ function io_test2() {
     local num=$3
 
     # Pattern (repeat after 9 clusters):
-    #	used - used - free - used - compressed - compressed -
-    #	free - free - compressed
+    #        used - used - free - used - compressed - compressed -
+    #        free - free - compressed
 
     # Write the clusters to be compressed
     echo === Clusters to be compressed [1]
diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc
index 5e077c3..88fecf7 100644
--- a/tests/qemu-iotests/common.rc
+++ b/tests/qemu-iotests/common.rc
@@ -20,17 +20,17 @@
 dd()
 {
    if [ "$HOSTOS" == "Linux" ]
-   then	
-	command dd --help | grep noxfer > /dev/null 2>&1
-	
-	if [ "$?" -eq 0 ]
-	    then
-		command dd status=noxfer $@
-	    else
-		command dd $@
-    	fi
+   then
+        command dd --help | grep noxfer > /dev/null 2>&1
+
+        if [ "$?" -eq 0 ]
+            then
+                command dd status=noxfer $@
+            else
+                command dd $@
+            fi
    else
-	command dd $@
+        command dd $@
    fi
 }
 
@@ -193,8 +193,8 @@ _get_pids_by_name()
 {
     if [ $# -ne 1 ]
     then
-	echo "Usage: _get_pids_by_name process-name" 1>&2
-	exit 1
+        echo "Usage: _get_pids_by_name process-name" 1>&2
+        exit 1
     fi
 
     # Algorithm ... all ps(1) variants have a time of the form MM:SS or
@@ -206,12 +206,12 @@ _get_pids_by_name()
 
     ps $PS_ALL_FLAGS \
     | sed -n \
-	-e 's/$/ /' \
-	-e 's/[ 	][ 	]*/ /g' \
-	-e 's/^ //' \
-	-e 's/^[^ ]* //' \
-	-e "/[0-9]:[0-9][0-9]  *[^ ]*\/$1 /s/ .*//p" \
-	-e "/[0-9]:[0-9][0-9]  *$1 /s/ .*//p"
+        -e 's/$/ /' \
+        -e 's/[         ][         ]*/ /g' \
+        -e 's/^ //' \
+        -e 's/^[^ ]* //' \
+        -e "/[0-9]:[0-9][0-9]  *[^ ]*\/$1 /s/ .*//p" \
+        -e "/[0-9]:[0-9][0-9]  *$1 /s/ .*//p"
 }
 
 # fqdn for localhost
@@ -229,8 +229,8 @@ _need_to_be_root()
     id=`id | $SED_PROG -e 's/(.*//' -e 's/.*=//'`
     if [ "$id" -ne 0 ]
     then
-	echo "Arrgh ... you need to be root (not uid=$id) to run this test"
-	exit 1
+        echo "Arrgh ... you need to be root (not uid=$id) to run this test"
+        exit 1
     fi
 }
 
@@ -248,33 +248,33 @@ _need_to_be_root()
 _do()
 {
     if [ $# -eq 1 ]; then
-	_cmd=$1
+        _cmd=$1
     elif [ $# -eq 2 ]; then
-	_note=$1
-	_cmd=$2
-	echo -n "$_note... "
+        _note=$1
+        _cmd=$2
+        echo -n "$_note... "
     else
-	echo "Usage: _do [note] cmd" 1>&2
-	status=1; exit
+        echo "Usage: _do [note] cmd" 1>&2
+        status=1; exit
     fi
 
     (eval "echo '---' \"$_cmd\"") >>$here/$seq.full
     (eval "$_cmd") >$tmp._out 2>&1; ret=$?
     cat $tmp._out >>$here/$seq.full
     if [ $# -eq 2 ]; then
-	if [ $ret -eq 0 ]; then
-	    echo "done"
-	else
-	    echo "fail"
-	fi
+        if [ $ret -eq 0 ]; then
+            echo "done"
+        else
+            echo "fail"
+        fi
     fi
     if [ $ret -ne 0  ] \
-	&& [ "$_do_die_on_error" = "always" \
-	    -o \( $# -eq 2 -a "$_do_die_on_error" = "message_only" \) ]
+        && [ "$_do_die_on_error" = "always" \
+            -o \( $# -eq 2 -a "$_do_die_on_error" = "message_only" \) ]
     then
-	[ $# -ne 2 ] && echo
-	eval "echo \"$_cmd\" failed \(returned $ret\): see $seq.full"
-	status=1; exit
+        [ $# -ne 2 ] && echo
+        eval "echo \"$_cmd\" failed \(returned $ret\): see $seq.full"
+        status=1; exit
     fi
 
     return $ret
@@ -305,9 +305,9 @@ _fail()
 _supported_fmt()
 {
     for f; do
-	if [ "$f" = "$IMGFMT" -o "$f" = "generic" ]; then
-	    return
-	fi
+        if [ "$f" = "$IMGFMT" -o "$f" = "generic" ]; then
+            return
+        fi
     done
 
     _notrun "not suitable for this image format: $IMGFMT"
@@ -318,9 +318,9 @@ _supported_fmt()
 _supported_proto()
 {
     for f; do
-	if [ "$f" = "$IMGPROTO" -o "$f" = "generic" ]; then
-	    return
-	fi
+        if [ "$f" = "$IMGPROTO" -o "$f" = "generic" ]; then
+            return
+        fi
     done
 
     _notrun "not suitable for this image protocol: $IMGPROTO"
@@ -332,10 +332,10 @@ _supported_os()
 {
     for h
     do
-	if [ "$h" = "$HOSTOS" ]
-	then
-	    return
-	fi
+        if [ "$h" = "$HOSTOS" ]
+        then
+            return
+        fi
     done
 
     _notrun "not suitable for this OS: $HOSTOS"
commit 8caf907f07688972e5e7cd11526079b1665d6dba
Author: Cornelia Huck <cornelia.huck at de.ibm.com>
Date:   Wed Sep 4 14:16:15 2013 +0200

    dataplane: Fix startup race.
    
    Avoid trying to setup dataplane again if dataplane setup is already in
    progress. This may happen if an eventfd is triggered during setup.
    
    I saw this occasionally with an experimental s390 irqfd implementation:
    
    virtio_blk_handle_output
    -> virtio_blk_data_plane_start
    -> virtio_ccw_set_host_notifier
    ...
    -> virtio_queue_set_host_notifier_fd_handler
    -> virtio_queue_host_notifier_read
    -> virtio_queue_notify_vq
    -> virtio_blk_handle_output
    -> virtio_blk_data_plane_start
    -> vring_setup
    -> hostmem_init
    -> memory_listener_register
    -> BOOM
    
    As virtio-ccw tries to follow what virtio-pci does, it might be triggerable
    for other platforms as well.
    
    Signed-off-by: Cornelia Huck <cornelia.huck at de.ibm.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
index 5a96ccd..f2d7350 100644
--- a/hw/block/dataplane/virtio-blk.c
+++ b/hw/block/dataplane/virtio-blk.c
@@ -42,6 +42,7 @@ typedef struct {
 
 struct VirtIOBlockDataPlane {
     bool started;
+    bool starting;
     bool stopping;
     QEMUBH *start_bh;
     QemuThread thread;
@@ -451,8 +452,15 @@ void virtio_blk_data_plane_start(VirtIOBlockDataPlane *s)
         return;
     }
 
+    if (s->starting) {
+        return;
+    }
+
+    s->starting = true;
+
     vq = virtio_get_queue(s->vdev, 0);
     if (!vring_setup(&s->vring, s->vdev, 0)) {
+        s->starting = false;
         return;
     }
 
@@ -482,6 +490,7 @@ void virtio_blk_data_plane_start(VirtIOBlockDataPlane *s)
     s->io_notifier = *ioq_get_notifier(&s->ioqueue);
     aio_set_event_notifier(s->ctx, &s->io_notifier, handle_io);
 
+    s->starting = false;
     s->started = true;
     trace_virtio_blk_data_plane_start(s);
 
commit 5daa74a6ebce7543aaad178c4061dc087bb4c705
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Wed Sep 4 19:00:38 2013 +0200

    block: look for zero blocks in bs->file
    
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/block.c b/block.c
index 3c39769..a325efc 100644
--- a/block.c
+++ b/block.c
@@ -3075,7 +3075,7 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
 {
     int64_t length;
     int64_t n;
-    int64_t ret;
+    int64_t ret, ret2;
 
     length = bdrv_getlength(bs);
     if (length < 0) {
@@ -3117,6 +3117,20 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
             }
         }
     }
+
+    if (bs->file &&
+        (ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO) &&
+        (ret & BDRV_BLOCK_OFFSET_VALID)) {
+        ret2 = bdrv_co_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS,
+                                        *pnum, pnum);
+        if (ret2 >= 0) {
+            /* Ignore errors.  This is just providing extra information, it
+             * is useful but not necessary.
+             */
+            ret |= (ret2 & BDRV_BLOCK_ZERO);
+        }
+    }
+
     return ret;
 }
 
commit 918e92d71b38306a6bf4fedfd1109367f1425587
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Wed Sep 4 19:00:37 2013 +0200

    block: add default get_block_status implementation for protocols
    
    Protocols return raw data, so you can assume the offsets to pass
    through unchanged.
    
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/block.c b/block.c
index 82bbd6c..3c39769 100644
--- a/block.c
+++ b/block.c
@@ -3094,7 +3094,11 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
 
     if (!bs->drv->bdrv_co_get_block_status) {
         *pnum = nb_sectors;
-        return BDRV_BLOCK_DATA;
+        ret = BDRV_BLOCK_DATA;
+        if (bs->drv->protocol_name) {
+            ret |= BDRV_BLOCK_OFFSET_VALID | (sector_num * BDRV_SECTOR_SIZE);
+        }
+        return ret;
     }
 
     ret = bs->drv->bdrv_co_get_block_status(bs, sector_num, nb_sectors, pnum);
commit f5f7abcfd50bca38ae278a9c7a3ed60fba3d76dc
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Wed Sep 4 19:00:36 2013 +0200

    raw-posix: report unwritten extents as zero
    
    These are created for example with XFS_IOC_ZERO_RANGE.
    
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/block/raw-posix.c b/block/raw-posix.c
index d011cfd..1b41ea3 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -1128,6 +1128,9 @@ static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
     } else {
         data = f.fe.fe_logical;
         hole = f.fe.fe_logical + f.fe.fe_length;
+        if (f.fe.fe_flags & FIEMAP_EXTENT_UNWRITTEN) {
+            ret |= BDRV_BLOCK_ZERO;
+        }
     }
 
 #elif defined SEEK_HOLE && defined SEEK_DATA
commit 63390a8d14c9006f42bdaab22291c9c97676322d
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Wed Sep 4 19:00:35 2013 +0200

    raw-posix: return get_block_status data and flags
    
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/block/raw-posix.c b/block/raw-posix.c
index dbc65b0..d011cfd 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -1089,7 +1089,7 @@ static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
                                             int nb_sectors, int *pnum)
 {
     off_t start, data, hole;
-    int ret;
+    int64_t ret;
 
     ret = fd_open(bs);
     if (ret < 0) {
@@ -1097,6 +1097,7 @@ static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
     }
 
     start = sector_num * BDRV_SECTOR_SIZE;
+    ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start;
 
 #ifdef CONFIG_FIEMAP
 
@@ -1114,7 +1115,7 @@ static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
     if (ioctl(s->fd, FS_IOC_FIEMAP, &f) == -1) {
         /* Assume everything is allocated.  */
         *pnum = nb_sectors;
-        return 1;
+        return ret;
     }
 
     if (f.fm.fm_mapped_extents == 0) {
@@ -1141,7 +1142,7 @@ static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
 
         /* Most likely EINVAL.  Assume everything is allocated.  */
         *pnum = nb_sectors;
-        return 1;
+        return ret;
     }
 
     if (hole > start) {
@@ -1154,19 +1155,21 @@ static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
         }
     }
 #else
-    *pnum = nb_sectors;
-    return 1;
+    data = 0;
+    hole = start + nb_sectors * BDRV_SECTOR_SIZE;
 #endif
 
     if (data <= start) {
         /* On a data extent, compute sectors to the end of the extent.  */
         *pnum = MIN(nb_sectors, (hole - start) / BDRV_SECTOR_SIZE);
-        return 1;
     } else {
         /* On a hole, compute sectors to the beginning of the next extent.  */
         *pnum = MIN(nb_sectors, (data - start) / BDRV_SECTOR_SIZE);
-        return 0;
+        ret &= ~BDRV_BLOCK_DATA;
+        ret |= BDRV_BLOCK_ZERO;
     }
+
+    return ret;
 }
 
 static coroutine_fn BlockDriverAIOCB *raw_aio_discard(BlockDriverState *bs,
commit facd6e2b5c0217f9d9eeb2ee497dda28009518bd
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Wed Sep 4 19:00:34 2013 +0200

    docs, qapi: document qemu-img map
    
    Eric Blake also requested including the output in qapi-schema.json,
    so that it is published through the introspection mechanism.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/qapi-schema.json b/qapi-schema.json
index 5d5164f..2b2c8bc 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -831,6 +831,35 @@
 { 'enum': 'BlockDeviceIoStatus', 'data': [ 'ok', 'failed', 'nospace' ] }
 
 ##
+# @BlockDeviceMapEntry:
+#
+# Entry in the metadata map of the device (returned by "qemu-img map")
+#
+# @start: Offset in the image of the first byte described by this entry
+#         (in bytes)
+#
+# @length: Length of the range described by this entry (in bytes)
+#
+# @depth: Number of layers (0 = top image, 1 = top image's backing file, etc.)
+#         before reaching one for which the range is allocated.  The value is
+#         in the range 0 to the depth of the image chain - 1.
+#
+# @zero: the sectors in this range read as zeros
+#
+# @data: reading the image will actually read data from a file (in particular,
+#        if @offset is present this means that the sectors are not simply
+#        preallocated, but contain actual data in raw format)
+#
+# @offset: if present, the image file stores the data for this range in
+#          raw format at the given offset.
+#
+# Since 1.7
+##
+{ 'type': 'BlockDeviceMapEntry',
+  'data': { 'start': 'int', 'length': 'int', 'depth': 'int', 'zero': 'bool',
+            'data': 'bool', '*offset': 'int' } }
+
+##
 # @BlockDirtyInfo:
 #
 # Block dirty bitmap information.
diff --git a/qemu-img.texi b/qemu-img.texi
index ad45a6d..43ee4eb 100644
--- a/qemu-img.texi
+++ b/qemu-img.texi
@@ -226,6 +226,61 @@ To enumerate information about each disk image in the above chain, starting from
 qemu-img info --backing-chain snap2.qcow2
 @end example
 
+ at item map [-f @var{fmt}] [--output=@var{ofmt}] @var{filename}
+
+Dump the metadata of image @var{filename} and its backing file chain.
+In particular, this commands dumps the allocation state of every sector
+of @var{filename}, together with the topmost file that allocates it in
+the backing file chain.
+
+Two option formats are possible.  The default format (@code{human})
+only dumps known-nonzero areas of the file.  Known-zero parts of the
+file are omitted altogether, and likewise for parts that are not allocated
+throughout the chain.  @command{qemu-img} output will identify a file
+from where the data can be read, and the offset in the file.  Each line
+will include four fields, the first three of which are hexadecimal
+numbers.  For example the first line of:
+ at example
+Offset          Length          Mapped to       File
+0               0x20000         0x50000         /tmp/overlay.qcow2
+0x100000        0x10000         0x95380000      /tmp/backing.qcow2
+ at end example
+ at noindent
+means that 0x20000 (131072) bytes starting at offset 0 in the image are
+available in /tmp/overlay.qcow2 (opened in @code{raw} format) starting
+at offset 0x50000 (327680).  Data that is compressed, encrypted, or
+otherwise not available in raw format will cause an error if @code{human}
+format is in use.  Note that file names can include newlines, thus it is
+not safe to parse this output format in scripts.
+
+The alternative format @code{json} will return an array of dictionaries
+in JSON format.  It will include similar information in
+the @code{start}, @code{length}, @code{offset} fields;
+it will also include other more specific information:
+ at itemize @minus
+ at item
+whether the sectors contain actual data or not (boolean field @code{data};
+if false, the sectors are either unallocated or stored as optimized
+all-zero clusters);
+
+ at item
+whether the data is known to read as zero (boolean field @code{zero});
+
+ at item
+in order to make the output shorter, the target file is expressed as
+a @code{depth}; for example, a depth of 2 refers to the backing file
+of the backing file of @var{filename}.
+ at end itemize
+
+In JSON format, the @code{offset} field is optional; it is absent in
+cases where @code{human} format would omit the entry or exit with an error.
+If @code{data} is false and the @code{offset} field is present, the
+corresponding sectors in the file are not yet in use, but they are
+preallocated.
+
+For more information, consult @file{include/block/block.h} in QEMU's
+source code.
+
 @item snapshot [-l | -a @var{snapshot} | -c @var{snapshot} | -d @var{snapshot} ] @var{filename}
 
 List, apply, create or delete snapshots in image @var{filename}.
commit 4c93a13b5daf9bd5fca1a547661b0fb9a2f0ca52
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Wed Sep 4 19:00:33 2013 +0200

    qemu-img: add a "map" subcommand
    
    This command dumps the metadata of an entire chain, in either tabular or JSON
    format.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx
index 2f6d579..0c36e59 100644
--- a/qemu-img-cmds.hx
+++ b/qemu-img-cmds.hx
@@ -45,6 +45,12 @@ STEXI
 @item info [-f @var{fmt}] [--output=@var{ofmt}] [--backing-chain] @var{filename}
 ETEXI
 
+DEF("map", img_map,
+    "map [-f fmt] [--output=ofmt] filename")
+STEXI
+ at item map [-f @var{fmt}] [--output=@var{ofmt}] @var{filename}
+ETEXI
+
 DEF("snapshot", img_snapshot,
     "snapshot [-q] [-l | -a snapshot | -c snapshot | -d snapshot] filename")
 STEXI
diff --git a/qemu-img.c b/qemu-img.c
index b074fa7..3e5e388 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -1801,6 +1801,197 @@ static int img_info(int argc, char **argv)
     return 0;
 }
 
+
+typedef struct MapEntry {
+    int flags;
+    int depth;
+    int64_t start;
+    int64_t length;
+    int64_t offset;
+    BlockDriverState *bs;
+} MapEntry;
+
+static void dump_map_entry(OutputFormat output_format, MapEntry *e,
+                           MapEntry *next)
+{
+    switch (output_format) {
+    case OFORMAT_HUMAN:
+        if ((e->flags & BDRV_BLOCK_DATA) &&
+            !(e->flags & BDRV_BLOCK_OFFSET_VALID)) {
+            error_report("File contains external, encrypted or compressed clusters.");
+            exit(1);
+        }
+        if ((e->flags & (BDRV_BLOCK_DATA|BDRV_BLOCK_ZERO)) == BDRV_BLOCK_DATA) {
+            printf("%#-16"PRIx64"%#-16"PRIx64"%#-16"PRIx64"%s\n",
+                   e->start, e->length, e->offset, e->bs->filename);
+        }
+        /* This format ignores the distinction between 0, ZERO and ZERO|DATA.
+         * Modify the flags here to allow more coalescing.
+         */
+        if (next &&
+            (next->flags & (BDRV_BLOCK_DATA|BDRV_BLOCK_ZERO)) != BDRV_BLOCK_DATA) {
+            next->flags &= ~BDRV_BLOCK_DATA;
+            next->flags |= BDRV_BLOCK_ZERO;
+        }
+        break;
+    case OFORMAT_JSON:
+        printf("%s{ \"start\": %"PRId64", \"length\": %"PRId64", \"depth\": %d,"
+               " \"zero\": %s, \"data\": %s",
+               (e->start == 0 ? "[" : ",\n"),
+               e->start, e->length, e->depth,
+               (e->flags & BDRV_BLOCK_ZERO) ? "true" : "false",
+               (e->flags & BDRV_BLOCK_DATA) ? "true" : "false");
+        if (e->flags & BDRV_BLOCK_OFFSET_VALID) {
+            printf(", 'offset': %"PRId64"", e->offset);
+        }
+        putchar('}');
+
+        if (!next) {
+            printf("]\n");
+        }
+        break;
+    }
+}
+
+static int get_block_status(BlockDriverState *bs, int64_t sector_num,
+                            int nb_sectors, MapEntry *e)
+{
+    int64_t ret;
+    int depth;
+
+    /* As an optimization, we could cache the current range of unallocated
+     * clusters in each file of the chain, and avoid querying the same
+     * range repeatedly.
+     */
+
+    depth = 0;
+    for (;;) {
+        ret = bdrv_get_block_status(bs, sector_num, nb_sectors, &nb_sectors);
+        if (ret < 0) {
+            return ret;
+        }
+        assert(nb_sectors);
+        if (ret & (BDRV_BLOCK_ZERO|BDRV_BLOCK_DATA)) {
+            break;
+        }
+        bs = bs->backing_hd;
+        if (bs == NULL) {
+            ret = 0;
+            break;
+        }
+
+        depth++;
+    }
+
+    e->start = sector_num * BDRV_SECTOR_SIZE;
+    e->length = nb_sectors * BDRV_SECTOR_SIZE;
+    e->flags = ret & ~BDRV_BLOCK_OFFSET_MASK;
+    e->offset = ret & BDRV_BLOCK_OFFSET_MASK;
+    e->depth = depth;
+    e->bs = bs;
+    return 0;
+}
+
+static int img_map(int argc, char **argv)
+{
+    int c;
+    OutputFormat output_format = OFORMAT_HUMAN;
+    BlockDriverState *bs;
+    const char *filename, *fmt, *output;
+    int64_t length;
+    MapEntry curr = { .length = 0 }, next;
+    int ret = 0;
+
+    fmt = NULL;
+    output = NULL;
+    for (;;) {
+        int option_index = 0;
+        static const struct option long_options[] = {
+            {"help", no_argument, 0, 'h'},
+            {"format", required_argument, 0, 'f'},
+            {"output", required_argument, 0, OPTION_OUTPUT},
+            {0, 0, 0, 0}
+        };
+        c = getopt_long(argc, argv, "f:h",
+                        long_options, &option_index);
+        if (c == -1) {
+            break;
+        }
+        switch (c) {
+        case '?':
+        case 'h':
+            help();
+            break;
+        case 'f':
+            fmt = optarg;
+            break;
+        case OPTION_OUTPUT:
+            output = optarg;
+            break;
+        }
+    }
+    if (optind >= argc) {
+        help();
+    }
+    filename = argv[optind++];
+
+    if (output && !strcmp(output, "json")) {
+        output_format = OFORMAT_JSON;
+    } else if (output && !strcmp(output, "human")) {
+        output_format = OFORMAT_HUMAN;
+    } else if (output) {
+        error_report("--output must be used with human or json as argument.");
+        return 1;
+    }
+
+    bs = bdrv_new_open(filename, fmt, BDRV_O_FLAGS, true, false);
+    if (!bs) {
+        return 1;
+    }
+
+    if (output_format == OFORMAT_HUMAN) {
+        printf("%-16s%-16s%-16s%s\n", "Offset", "Length", "Mapped to", "File");
+    }
+
+    length = bdrv_getlength(bs);
+    while (curr.start + curr.length < length) {
+        int64_t nsectors_left;
+        int64_t sector_num;
+        int n;
+
+        sector_num = (curr.start + curr.length) >> BDRV_SECTOR_BITS;
+
+        /* Probe up to 1 GiB at a time.  */
+        nsectors_left = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE) - sector_num;
+        n = MIN(1 << (30 - BDRV_SECTOR_BITS), nsectors_left);
+        ret = get_block_status(bs, sector_num, n, &next);
+
+        if (ret < 0) {
+            error_report("Could not read file metadata: %s", strerror(-ret));
+            goto out;
+        }
+
+        if (curr.length != 0 && curr.flags == next.flags &&
+            curr.depth == next.depth &&
+            ((curr.flags & BDRV_BLOCK_OFFSET_VALID) == 0 ||
+             curr.offset + curr.length == next.offset)) {
+            curr.length += next.length;
+            continue;
+        }
+
+        if (curr.length > 0) {
+            dump_map_entry(output_format, &curr, &next);
+        }
+        curr = next;
+    }
+
+    dump_map_entry(output_format, &curr, NULL);
+
+out:
+    bdrv_unref(bs);
+    return ret < 0;
+}
+
 #define SNAPSHOT_LIST   1
 #define SNAPSHOT_CREATE 2
 #define SNAPSHOT_APPLY  3
commit f0ad5712d5d15ff272b9e107910be4aae468fb3d
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Wed Sep 4 19:00:32 2013 +0200

    block: return BDRV_BLOCK_ZERO past end of backing file
    
    If the sectors are unallocated and we are past the end of the
    backing file, they will read as zero.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/block.c b/block.c
index aa9ec83..82bbd6c 100644
--- a/block.c
+++ b/block.c
@@ -3102,8 +3102,16 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
         return ret;
     }
 
-    if (!(ret & BDRV_BLOCK_DATA) && bdrv_has_zero_init(bs)) {
-        ret |= BDRV_BLOCK_ZERO;
+    if (!(ret & BDRV_BLOCK_DATA)) {
+        if (bdrv_has_zero_init(bs)) {
+            ret |= BDRV_BLOCK_ZERO;
+        } else {
+            BlockDriverState *bs2 = bs->backing_hd;
+            int64_t length2 = bdrv_getlength(bs2);
+            if (length2 >= 0 && sector_num >= (length2 >> BDRV_SECTOR_BITS)) {
+                ret |= BDRV_BLOCK_ZERO;
+            }
+        }
     }
     return ret;
 }
commit 415b5b013ce74126e71459b922a92377918ae2ef
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Wed Sep 4 19:00:31 2013 +0200

    block: use bdrv_has_zero_init to return BDRV_BLOCK_ZERO
    
    Alternatively, this could use a "discard zeroes data" flag returned
    by bdrv_get_info.
    
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/block.c b/block.c
index 97406ec..aa9ec83 100644
--- a/block.c
+++ b/block.c
@@ -3075,6 +3075,7 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
 {
     int64_t length;
     int64_t n;
+    int64_t ret;
 
     length = bdrv_getlength(bs);
     if (length < 0) {
@@ -3096,7 +3097,15 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
         return BDRV_BLOCK_DATA;
     }
 
-    return bs->drv->bdrv_co_get_block_status(bs, sector_num, nb_sectors, pnum);
+    ret = bs->drv->bdrv_co_get_block_status(bs, sector_num, nb_sectors, pnum);
+    if (ret < 0) {
+        return ret;
+    }
+
+    if (!(ret & BDRV_BLOCK_DATA) && bdrv_has_zero_init(bs)) {
+        ret |= BDRV_BLOCK_ZERO;
+    }
+    return ret;
 }
 
 /* Coroutine wrapper for bdrv_get_block_status() */
commit 4bc74be997a72922170f4f272fd5e8074a3ecc27
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Wed Sep 4 19:00:30 2013 +0200

    block: return get_block_status data and flags for formats
    
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/block/cow.c b/block/cow.c
index d4b8c6b..764b93f 100644
--- a/block/cow.c
+++ b/block/cow.c
@@ -191,7 +191,13 @@ static int coroutine_fn cow_co_is_allocated(BlockDriverState *bs,
 static int64_t coroutine_fn cow_co_get_block_status(BlockDriverState *bs,
         int64_t sector_num, int nb_sectors, int *num_same)
 {
-    return cow_co_is_allocated(bs, sector_num, nb_sectors, num_same);
+    BDRVCowState *s = bs->opaque;
+    int ret = cow_co_is_allocated(bs, sector_num, nb_sectors, num_same);
+    int64_t offset = s->cow_sectors_offset + (sector_num << BDRV_SECTOR_BITS);
+    if (ret < 0) {
+        return ret;
+    }
+    return (ret ? BDRV_BLOCK_DATA : 0) | offset | BDRV_BLOCK_OFFSET_VALID;
 }
 
 static int cow_update_bitmap(BlockDriverState *bs, int64_t sector_num,
diff --git a/block/qcow.c b/block/qcow.c
index 0d76656..93a993b 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -410,7 +410,14 @@ static int64_t coroutine_fn qcow_co_get_block_status(BlockDriverState *bs,
     if (n > nb_sectors)
         n = nb_sectors;
     *pnum = n;
-    return (cluster_offset != 0);
+    if (!cluster_offset) {
+        return 0;
+    }
+    if ((cluster_offset & QCOW_OFLAG_COMPRESSED) || s->crypt_method) {
+        return BDRV_BLOCK_DATA;
+    }
+    cluster_offset |= (index_in_cluster << BDRV_SECTOR_BITS);
+    return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | cluster_offset;
 }
 
 static int decompress_buffer(uint8_t *out_buf, int out_buf_size,
diff --git a/block/qcow2.c b/block/qcow2.c
index c220654..578792f 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -693,7 +693,8 @@ static int64_t coroutine_fn qcow2_co_get_block_status(BlockDriverState *bs,
 {
     BDRVQcowState *s = bs->opaque;
     uint64_t cluster_offset;
-    int ret;
+    int index_in_cluster, ret;
+    int64_t status = 0;
 
     *pnum = nb_sectors;
     qemu_co_mutex_lock(&s->lock);
@@ -703,7 +704,18 @@ static int64_t coroutine_fn qcow2_co_get_block_status(BlockDriverState *bs,
         return ret;
     }
 
-    return (cluster_offset != 0) || (ret == QCOW2_CLUSTER_ZERO);
+    if (cluster_offset != 0 && ret != QCOW2_CLUSTER_COMPRESSED &&
+        !s->crypt_method) {
+        index_in_cluster = sector_num & (s->cluster_sectors - 1);
+        cluster_offset |= (index_in_cluster << BDRV_SECTOR_BITS);
+        status |= BDRV_BLOCK_OFFSET_VALID | cluster_offset;
+    }
+    if (ret == QCOW2_CLUSTER_ZERO) {
+        status |= BDRV_BLOCK_ZERO;
+    } else if (ret != QCOW2_CLUSTER_UNALLOCATED) {
+        status |= BDRV_BLOCK_DATA;
+    }
+    return status;
 }
 
 /* handle reading after the end of the backing file */
diff --git a/block/qed.c b/block/qed.c
index 86bf179..49b3a37 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -652,16 +652,36 @@ static int bdrv_qed_create(const char *filename, QEMUOptionParameter *options)
 }
 
 typedef struct {
+    BlockDriverState *bs;
     Coroutine *co;
-    int is_allocated;
+    uint64_t pos;
+    int64_t status;
     int *pnum;
 } QEDIsAllocatedCB;
 
 static void qed_is_allocated_cb(void *opaque, int ret, uint64_t offset, size_t len)
 {
     QEDIsAllocatedCB *cb = opaque;
+    BDRVQEDState *s = cb->bs->opaque;
     *cb->pnum = len / BDRV_SECTOR_SIZE;
-    cb->is_allocated = (ret == QED_CLUSTER_FOUND || ret == QED_CLUSTER_ZERO);
+    switch (ret) {
+    case QED_CLUSTER_FOUND:
+        offset |= qed_offset_into_cluster(s, cb->pos);
+        cb->status = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | offset;
+        break;
+    case QED_CLUSTER_ZERO:
+        cb->status = BDRV_BLOCK_ZERO;
+        break;
+    case QED_CLUSTER_L2:
+    case QED_CLUSTER_L1:
+        cb->status = 0;
+        break;
+    default:
+        assert(ret < 0);
+        cb->status = ret;
+        break;
+    }
+
     if (cb->co) {
         qemu_coroutine_enter(cb->co, NULL);
     }
@@ -672,25 +692,26 @@ static int64_t coroutine_fn bdrv_qed_co_get_block_status(BlockDriverState *bs,
                                                  int nb_sectors, int *pnum)
 {
     BDRVQEDState *s = bs->opaque;
-    uint64_t pos = (uint64_t)sector_num * BDRV_SECTOR_SIZE;
     size_t len = (size_t)nb_sectors * BDRV_SECTOR_SIZE;
     QEDIsAllocatedCB cb = {
-        .is_allocated = -1,
+        .bs = bs,
+        .pos = (uint64_t)sector_num * BDRV_SECTOR_SIZE,
+        .status = BDRV_BLOCK_OFFSET_MASK,
         .pnum = pnum,
     };
     QEDRequest request = { .l2_table = NULL };
 
-    qed_find_cluster(s, &request, pos, len, qed_is_allocated_cb, &cb);
+    qed_find_cluster(s, &request, cb.pos, len, qed_is_allocated_cb, &cb);
 
     /* Now sleep if the callback wasn't invoked immediately */
-    while (cb.is_allocated == -1) {
+    while (cb.status == BDRV_BLOCK_OFFSET_MASK) {
         cb.co = qemu_coroutine_self();
         qemu_coroutine_yield();
     }
 
     qed_unref_l2_cache_entry(request.l2_table);
 
-    return cb.is_allocated;
+    return cb.status;
 }
 
 static int bdrv_qed_make_empty(BlockDriverState *bs)
diff --git a/block/sheepdog.c b/block/sheepdog.c
index 7d3fc55..f9988d3 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -2280,7 +2280,7 @@ sd_co_get_block_status(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
                   end = DIV_ROUND_UP((sector_num + nb_sectors) *
                                      BDRV_SECTOR_SIZE, SD_DATA_OBJ_SIZE);
     unsigned long idx;
-    int ret = 1;
+    int64_t ret = BDRV_BLOCK_DATA;
 
     for (idx = start; idx < end; idx++) {
         if (inode->data_vdi_id[idx] == 0) {
diff --git a/block/vdi.c b/block/vdi.c
index 7ab2567..1bf7dc5 100644
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -479,12 +479,23 @@ static int64_t coroutine_fn vdi_co_get_block_status(BlockDriverState *bs,
     size_t sector_in_block = sector_num % s->block_sectors;
     int n_sectors = s->block_sectors - sector_in_block;
     uint32_t bmap_entry = le32_to_cpu(s->bmap[bmap_index]);
+    uint64_t offset;
+    int result;
+
     logout("%p, %" PRId64 ", %d, %p\n", bs, sector_num, nb_sectors, pnum);
     if (n_sectors > nb_sectors) {
         n_sectors = nb_sectors;
     }
     *pnum = n_sectors;
-    return VDI_IS_ALLOCATED(bmap_entry);
+    result = VDI_IS_ALLOCATED(bmap_entry);
+    if (!result) {
+        return 0;
+    }
+
+    offset = s->header.offset_data +
+                              (uint64_t)bmap_entry * s->block_size +
+                              sector_in_block * SECTOR_SIZE;
+    return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | offset;
 }
 
 static int vdi_co_read(BlockDriverState *bs,
diff --git a/block/vmdk.c b/block/vmdk.c
index 23b4954..fb5b529 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -1059,7 +1059,24 @@ static int64_t coroutine_fn vmdk_co_get_block_status(BlockDriverState *bs,
                             sector_num * 512, 0, &offset);
     qemu_co_mutex_unlock(&s->lock);
 
-    ret = (ret == VMDK_OK || ret == VMDK_ZEROED);
+    switch (ret) {
+    case VMDK_ERROR:
+        ret = -EIO;
+        break;
+    case VMDK_UNALLOC:
+        ret = 0;
+        break;
+    case VMDK_ZEROED:
+        ret = BDRV_BLOCK_ZERO;
+        break;
+    case VMDK_OK:
+        ret = BDRV_BLOCK_DATA;
+        if (extent->file == bs->file) {
+            ret |= BDRV_BLOCK_OFFSET_VALID | offset;
+        }
+
+        break;
+    }
 
     index_in_cluster = sector_num % extent->cluster_sectors;
     n = extent->cluster_sectors - index_in_cluster;
diff --git a/block/vvfat.c b/block/vvfat.c
index 2a5bca3..0129195 100644
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -2879,11 +2879,12 @@ static int64_t coroutine_fn vvfat_co_get_block_status(BlockDriverState *bs,
 {
     BDRVVVFATState* s = bs->opaque;
     *n = s->sector_count - sector_num;
-    if (*n > nb_sectors)
-	*n = nb_sectors;
-    else if (*n < 0)
-	return 0;
-    return 1;
+    if (*n > nb_sectors) {
+        *n = nb_sectors;
+    } else if (*n < 0) {
+        return 0;
+    }
+    return BDRV_BLOCK_DATA;
 }
 
 static int write_target_commit(BlockDriverState *bs, int64_t sector_num,
commit 4333bb71405f58a8dc8d3255feb3ca5960b0daf8
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Wed Sep 4 19:00:29 2013 +0200

    block: define get_block_status return value
    
    Define the return value of get_block_status.  Bits 0, 1, 2 and 9-62
    are valid; bit 63 (the sign bit) is reserved for errors.  Bits 3-8
    are left for future extensions.
    
    The return code is compatible with the old is_allocated API: if a driver
    only returns 0 or 1 (aka BDRV_BLOCK_DATA) like is_allocated used to,
    clients of is_allocated will not have any change in behavior.  Still,
    we will return more precise information in the next patches and the
    new definition of bdrv_is_allocated is already prepared for this.
    
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/block.c b/block.c
index a5bfe74..97406ec 100644
--- a/block.c
+++ b/block.c
@@ -3093,7 +3093,7 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
 
     if (!bs->drv->bdrv_co_get_block_status) {
         *pnum = nb_sectors;
-        return 1;
+        return BDRV_BLOCK_DATA;
     }
 
     return bs->drv->bdrv_co_get_block_status(bs, sector_num, nb_sectors, pnum);
@@ -3143,7 +3143,13 @@ int64_t bdrv_get_block_status(BlockDriverState *bs, int64_t sector_num,
 int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num,
                                    int nb_sectors, int *pnum)
 {
-    return bdrv_get_block_status(bs, sector_num, nb_sectors, pnum);
+    int64_t ret = bdrv_get_block_status(bs, sector_num, nb_sectors, pnum);
+    if (ret < 0) {
+        return ret;
+    }
+    return
+        (ret & BDRV_BLOCK_DATA) ||
+        ((ret & BDRV_BLOCK_ZERO) && !bdrv_has_zero_init(bs));
 }
 
 /*
diff --git a/include/block/block.h b/include/block/block.h
index 0b83321..728ec1a 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -81,6 +81,32 @@ typedef struct BlockDevOps {
 #define BDRV_SECTOR_SIZE   (1ULL << BDRV_SECTOR_BITS)
 #define BDRV_SECTOR_MASK   ~(BDRV_SECTOR_SIZE - 1)
 
+/* BDRV_BLOCK_DATA: data is read from bs->file or another file
+ * BDRV_BLOCK_ZERO: sectors read as zero
+ * BDRV_BLOCK_OFFSET_VALID: sector stored in bs->file as raw data
+ *
+ * If BDRV_BLOCK_OFFSET_VALID is set, bits 9-62 represent the offset in
+ * bs->file where sector data can be read from as raw data.
+ *
+ * DATA == 0 && ZERO == 0 means that data is read from backing_hd if present.
+ *
+ * DATA ZERO OFFSET_VALID
+ *  t    t        t       sectors read as zero, bs->file is zero at offset
+ *  t    f        t       sectors read as valid from bs->file at offset
+ *  f    t        t       sectors preallocated, read as zero, bs->file not
+ *                        necessarily zero at offset
+ *  f    f        t       sectors preallocated but read from backing_hd,
+ *                        bs->file contains garbage at offset
+ *  t    t        f       sectors preallocated, read as zero, unknown offset
+ *  t    f        f       sectors read from unknown file or offset
+ *  f    t        f       not allocated or unknown offset, read as zero
+ *  f    f        f       not allocated or unknown offset, read from backing_hd
+ */
+#define BDRV_BLOCK_DATA         1
+#define BDRV_BLOCK_ZERO         2
+#define BDRV_BLOCK_OFFSET_VALID 4
+#define BDRV_BLOCK_OFFSET_MASK  BDRV_SECTOR_MASK
+
 typedef enum {
     BDRV_ACTION_REPORT, BDRV_ACTION_IGNORE, BDRV_ACTION_STOP
 } BlockErrorAction;
commit b6b8a33354a448ee421f57676c1a93a536a63269
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Wed Sep 4 19:00:28 2013 +0200

    block: introduce bdrv_get_block_status API
    
    For now, bdrv_get_block_status is just another name for bdrv_is_allocated.
    The next patches will add more flags.
    
    This also touches all block drivers with a mostly mechanical rename.  The
    sole exception is cow; because it calls cow_co_is_allocated from the read
    code, we keep that function and make cow_co_get_block_status a wrapper.
    
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/block.c b/block.c
index 2a90893..a5bfe74 100644
--- a/block.c
+++ b/block.c
@@ -3044,15 +3044,15 @@ int bdrv_has_zero_init(BlockDriverState *bs)
     return 0;
 }
 
-typedef struct BdrvCoIsAllocatedData {
+typedef struct BdrvCoGetBlockStatusData {
     BlockDriverState *bs;
     BlockDriverState *base;
     int64_t sector_num;
     int nb_sectors;
     int *pnum;
-    int ret;
+    int64_t ret;
     bool done;
-} BdrvCoIsAllocatedData;
+} BdrvCoGetBlockStatusData;
 
 /*
  * Returns true iff the specified sector is present in the disk image. Drivers
@@ -3069,9 +3069,9 @@ typedef struct BdrvCoIsAllocatedData {
  * 'nb_sectors' is the max value 'pnum' should be set to.  If nb_sectors goes
  * beyond the end of the disk image it will be clamped.
  */
-static int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs,
-                                             int64_t sector_num,
-                                             int nb_sectors, int *pnum)
+static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
+                                                     int64_t sector_num,
+                                                     int nb_sectors, int *pnum)
 {
     int64_t length;
     int64_t n;
@@ -3091,35 +3091,35 @@ static int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs,
         nb_sectors = n;
     }
 
-    if (!bs->drv->bdrv_co_is_allocated) {
+    if (!bs->drv->bdrv_co_get_block_status) {
         *pnum = nb_sectors;
         return 1;
     }
 
-    return bs->drv->bdrv_co_is_allocated(bs, sector_num, nb_sectors, pnum);
+    return bs->drv->bdrv_co_get_block_status(bs, sector_num, nb_sectors, pnum);
 }
 
-/* Coroutine wrapper for bdrv_is_allocated() */
-static void coroutine_fn bdrv_is_allocated_co_entry(void *opaque)
+/* Coroutine wrapper for bdrv_get_block_status() */
+static void coroutine_fn bdrv_get_block_status_co_entry(void *opaque)
 {
-    BdrvCoIsAllocatedData *data = opaque;
+    BdrvCoGetBlockStatusData *data = opaque;
     BlockDriverState *bs = data->bs;
 
-    data->ret = bdrv_co_is_allocated(bs, data->sector_num, data->nb_sectors,
-                                     data->pnum);
+    data->ret = bdrv_co_get_block_status(bs, data->sector_num, data->nb_sectors,
+                                         data->pnum);
     data->done = true;
 }
 
 /*
- * Synchronous wrapper around bdrv_co_is_allocated().
+ * Synchronous wrapper around bdrv_co_get_block_status().
  *
- * See bdrv_co_is_allocated() for details.
+ * See bdrv_co_get_block_status() for details.
  */
-int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
-                      int *pnum)
+int64_t bdrv_get_block_status(BlockDriverState *bs, int64_t sector_num,
+                              int nb_sectors, int *pnum)
 {
     Coroutine *co;
-    BdrvCoIsAllocatedData data = {
+    BdrvCoGetBlockStatusData data = {
         .bs = bs,
         .sector_num = sector_num,
         .nb_sectors = nb_sectors,
@@ -3129,9 +3129,9 @@ int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
 
     if (qemu_in_coroutine()) {
         /* Fast-path if already in coroutine context */
-        bdrv_is_allocated_co_entry(&data);
+        bdrv_get_block_status_co_entry(&data);
     } else {
-        co = qemu_coroutine_create(bdrv_is_allocated_co_entry);
+        co = qemu_coroutine_create(bdrv_get_block_status_co_entry);
         qemu_coroutine_enter(co, &data);
         while (!data.done) {
             qemu_aio_wait();
@@ -3140,6 +3140,12 @@ int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
     return data.ret;
 }
 
+int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num,
+                                   int nb_sectors, int *pnum)
+{
+    return bdrv_get_block_status(bs, sector_num, nb_sectors, pnum);
+}
+
 /*
  * Given an image chain: ... -> [BASE] -> [INTER1] -> [INTER2] -> [TOP]
  *
diff --git a/block/cow.c b/block/cow.c
index 7450801..d4b8c6b 100644
--- a/block/cow.c
+++ b/block/cow.c
@@ -188,6 +188,12 @@ static int coroutine_fn cow_co_is_allocated(BlockDriverState *bs,
     return changed;
 }
 
+static int64_t coroutine_fn cow_co_get_block_status(BlockDriverState *bs,
+        int64_t sector_num, int nb_sectors, int *num_same)
+{
+    return cow_co_is_allocated(bs, sector_num, nb_sectors, num_same);
+}
+
 static int cow_update_bitmap(BlockDriverState *bs, int64_t sector_num,
         int nb_sectors)
 {
@@ -371,7 +377,7 @@ static BlockDriver bdrv_cow = {
 
     .bdrv_read              = cow_co_read,
     .bdrv_write             = cow_co_write,
-    .bdrv_co_is_allocated   = cow_co_is_allocated,
+    .bdrv_co_get_block_status   = cow_co_get_block_status,
 
     .create_options = cow_create_options,
 };
diff --git a/block/qcow.c b/block/qcow.c
index 6b891ac..0d76656 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -395,7 +395,7 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
     return cluster_offset;
 }
 
-static int coroutine_fn qcow_co_is_allocated(BlockDriverState *bs,
+static int64_t coroutine_fn qcow_co_get_block_status(BlockDriverState *bs,
         int64_t sector_num, int nb_sectors, int *pnum)
 {
     BDRVQcowState *s = bs->opaque;
@@ -896,7 +896,7 @@ static BlockDriver bdrv_qcow = {
 
     .bdrv_co_readv          = qcow_co_readv,
     .bdrv_co_writev         = qcow_co_writev,
-    .bdrv_co_is_allocated   = qcow_co_is_allocated,
+    .bdrv_co_get_block_status   = qcow_co_get_block_status,
 
     .bdrv_set_key           = qcow_set_key,
     .bdrv_make_empty        = qcow_make_empty,
diff --git a/block/qcow2.c b/block/qcow2.c
index cf03a14..c220654 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -688,7 +688,7 @@ static int qcow2_reopen_prepare(BDRVReopenState *state,
     return 0;
 }
 
-static int coroutine_fn qcow2_co_is_allocated(BlockDriverState *bs,
+static int64_t coroutine_fn qcow2_co_get_block_status(BlockDriverState *bs,
         int64_t sector_num, int nb_sectors, int *pnum)
 {
     BDRVQcowState *s = bs->opaque;
@@ -1866,7 +1866,7 @@ static BlockDriver bdrv_qcow2 = {
     .bdrv_reopen_prepare  = qcow2_reopen_prepare,
     .bdrv_create        = qcow2_create,
     .bdrv_has_zero_init = bdrv_has_zero_init_1,
-    .bdrv_co_is_allocated = qcow2_co_is_allocated,
+    .bdrv_co_get_block_status = qcow2_co_get_block_status,
     .bdrv_set_key       = qcow2_set_key,
     .bdrv_make_empty    = qcow2_make_empty,
 
diff --git a/block/qed.c b/block/qed.c
index 9712a42..86bf179 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -667,7 +667,7 @@ static void qed_is_allocated_cb(void *opaque, int ret, uint64_t offset, size_t l
     }
 }
 
-static int coroutine_fn bdrv_qed_co_is_allocated(BlockDriverState *bs,
+static int64_t coroutine_fn bdrv_qed_co_get_block_status(BlockDriverState *bs,
                                                  int64_t sector_num,
                                                  int nb_sectors, int *pnum)
 {
@@ -1575,7 +1575,7 @@ static BlockDriver bdrv_qed = {
     .bdrv_reopen_prepare      = bdrv_qed_reopen_prepare,
     .bdrv_create              = bdrv_qed_create,
     .bdrv_has_zero_init       = bdrv_has_zero_init_1,
-    .bdrv_co_is_allocated     = bdrv_qed_co_is_allocated,
+    .bdrv_co_get_block_status = bdrv_qed_co_get_block_status,
     .bdrv_make_empty          = bdrv_qed_make_empty,
     .bdrv_aio_readv           = bdrv_qed_aio_readv,
     .bdrv_aio_writev          = bdrv_qed_aio_writev,
diff --git a/block/raw-posix.c b/block/raw-posix.c
index ba721d3..dbc65b0 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -1084,7 +1084,7 @@ static int raw_create(const char *filename, QEMUOptionParameter *options)
  * 'nb_sectors' is the max value 'pnum' should be set to.  If nb_sectors goes
  * beyond the end of the disk image it will be clamped.
  */
-static int coroutine_fn raw_co_is_allocated(BlockDriverState *bs,
+static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
                                             int64_t sector_num,
                                             int nb_sectors, int *pnum)
 {
@@ -1200,7 +1200,7 @@ static BlockDriver bdrv_file = {
     .bdrv_close = raw_close,
     .bdrv_create = raw_create,
     .bdrv_has_zero_init = bdrv_has_zero_init_1,
-    .bdrv_co_is_allocated = raw_co_is_allocated,
+    .bdrv_co_get_block_status = raw_co_get_block_status,
 
     .bdrv_aio_readv = raw_aio_readv,
     .bdrv_aio_writev = raw_aio_writev,
diff --git a/block/raw_bsd.c b/block/raw_bsd.c
index 926712e..a9060ca 100644
--- a/block/raw_bsd.c
+++ b/block/raw_bsd.c
@@ -58,11 +58,11 @@ static int coroutine_fn raw_co_writev(BlockDriverState *bs, int64_t sector_num,
     return bdrv_co_writev(bs->file, sector_num, nb_sectors, qiov);
 }
 
-static int coroutine_fn raw_co_is_allocated(BlockDriverState *bs,
-                                            int64_t sector_num, int nb_sectors,
-                                            int *pnum)
+static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
+                                            int64_t sector_num,
+                                            int nb_sectors, int *pnum)
 {
-    return bdrv_is_allocated(bs->file, sector_num, nb_sectors, pnum);
+    return bdrv_get_block_status(bs->file, sector_num, nb_sectors, pnum);
 }
 
 static int coroutine_fn raw_co_write_zeroes(BlockDriverState *bs,
@@ -164,7 +164,7 @@ static BlockDriver bdrv_raw = {
     .bdrv_co_writev       = &raw_co_writev,
     .bdrv_co_write_zeroes = &raw_co_write_zeroes,
     .bdrv_co_discard      = &raw_co_discard,
-    .bdrv_co_is_allocated = &raw_co_is_allocated,
+    .bdrv_co_get_block_status = &raw_co_get_block_status,
     .bdrv_truncate        = &raw_truncate,
     .bdrv_getlength       = &raw_getlength,
     .bdrv_get_info        = &raw_get_info,
diff --git a/block/sheepdog.c b/block/sheepdog.c
index 36c3cc8..7d3fc55 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -2270,9 +2270,9 @@ static coroutine_fn int sd_co_discard(BlockDriverState *bs, int64_t sector_num,
     return acb->ret;
 }
 
-static coroutine_fn int
-sd_co_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
-                   int *pnum)
+static coroutine_fn int64_t
+sd_co_get_block_status(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
+                       int *pnum)
 {
     BDRVSheepdogState *s = bs->opaque;
     SheepdogInode *inode = &s->inode;
@@ -2338,7 +2338,7 @@ static BlockDriver bdrv_sheepdog = {
     .bdrv_co_writev = sd_co_writev,
     .bdrv_co_flush_to_disk  = sd_co_flush_to_disk,
     .bdrv_co_discard = sd_co_discard,
-    .bdrv_co_is_allocated = sd_co_is_allocated,
+    .bdrv_co_get_block_status = sd_co_get_block_status,
 
     .bdrv_snapshot_create   = sd_snapshot_create,
     .bdrv_snapshot_goto     = sd_snapshot_goto,
@@ -2366,7 +2366,7 @@ static BlockDriver bdrv_sheepdog_tcp = {
     .bdrv_co_writev = sd_co_writev,
     .bdrv_co_flush_to_disk  = sd_co_flush_to_disk,
     .bdrv_co_discard = sd_co_discard,
-    .bdrv_co_is_allocated = sd_co_is_allocated,
+    .bdrv_co_get_block_status = sd_co_get_block_status,
 
     .bdrv_snapshot_create   = sd_snapshot_create,
     .bdrv_snapshot_goto     = sd_snapshot_goto,
@@ -2394,7 +2394,7 @@ static BlockDriver bdrv_sheepdog_unix = {
     .bdrv_co_writev = sd_co_writev,
     .bdrv_co_flush_to_disk  = sd_co_flush_to_disk,
     .bdrv_co_discard = sd_co_discard,
-    .bdrv_co_is_allocated = sd_co_is_allocated,
+    .bdrv_co_get_block_status = sd_co_get_block_status,
 
     .bdrv_snapshot_create   = sd_snapshot_create,
     .bdrv_snapshot_goto     = sd_snapshot_goto,
diff --git a/block/vdi.c b/block/vdi.c
index 8a91525..7ab2567 100644
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -470,7 +470,7 @@ static int vdi_reopen_prepare(BDRVReopenState *state,
     return 0;
 }
 
-static int coroutine_fn vdi_co_is_allocated(BlockDriverState *bs,
+static int64_t coroutine_fn vdi_co_get_block_status(BlockDriverState *bs,
         int64_t sector_num, int nb_sectors, int *pnum)
 {
     /* TODO: Check for too large sector_num (in bdrv_is_allocated or here). */
@@ -780,7 +780,7 @@ static BlockDriver bdrv_vdi = {
     .bdrv_reopen_prepare = vdi_reopen_prepare,
     .bdrv_create = vdi_create,
     .bdrv_has_zero_init = bdrv_has_zero_init_1,
-    .bdrv_co_is_allocated = vdi_co_is_allocated,
+    .bdrv_co_get_block_status = vdi_co_get_block_status,
     .bdrv_make_empty = vdi_make_empty,
 
     .bdrv_read = vdi_co_read,
diff --git a/block/vmdk.c b/block/vmdk.c
index 9f82c45..23b4954 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -1042,7 +1042,7 @@ static VmdkExtent *find_extent(BDRVVmdkState *s,
     return NULL;
 }
 
-static int coroutine_fn vmdk_co_is_allocated(BlockDriverState *bs,
+static int64_t coroutine_fn vmdk_co_get_block_status(BlockDriverState *bs,
         int64_t sector_num, int nb_sectors, int *pnum)
 {
     BDRVVmdkState *s = bs->opaque;
@@ -1837,7 +1837,7 @@ static BlockDriver bdrv_vmdk = {
     .bdrv_close                   = vmdk_close,
     .bdrv_create                  = vmdk_create,
     .bdrv_co_flush_to_disk        = vmdk_co_flush,
-    .bdrv_co_is_allocated         = vmdk_co_is_allocated,
+    .bdrv_co_get_block_status     = vmdk_co_get_block_status,
     .bdrv_get_allocated_file_size = vmdk_get_allocated_file_size,
     .bdrv_has_zero_init           = vmdk_has_zero_init,
 
diff --git a/block/vvfat.c b/block/vvfat.c
index 2178a13..2a5bca3 100644
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -2874,7 +2874,7 @@ static coroutine_fn int vvfat_co_write(BlockDriverState *bs, int64_t sector_num,
     return ret;
 }
 
-static int coroutine_fn vvfat_co_is_allocated(BlockDriverState *bs,
+static int64_t coroutine_fn vvfat_co_get_block_status(BlockDriverState *bs,
 	int64_t sector_num, int nb_sectors, int* n)
 {
     BDRVVVFATState* s = bs->opaque;
@@ -2984,7 +2984,7 @@ static BlockDriver bdrv_vvfat = {
 
     .bdrv_read              = vvfat_co_read,
     .bdrv_write             = vvfat_co_write,
-    .bdrv_co_is_allocated   = vvfat_co_is_allocated,
+    .bdrv_co_get_block_status = vvfat_co_get_block_status,
 };
 
 static void bdrv_vvfat_init(void)
diff --git a/include/block/block.h b/include/block/block.h
index 5469c94..0b83321 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -269,6 +269,8 @@ int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors);
 int bdrv_co_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors);
 int bdrv_has_zero_init_1(BlockDriverState *bs);
 int bdrv_has_zero_init(BlockDriverState *bs);
+int64_t bdrv_get_block_status(BlockDriverState *bs, int64_t sector_num,
+                              int nb_sectors, int *pnum);
 int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
                       int *pnum);
 int bdrv_is_allocated_above(BlockDriverState *top, BlockDriverState *base,
diff --git a/include/block/block_int.h b/include/block/block_int.h
index b9212b8..7c35198 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -118,7 +118,7 @@ struct BlockDriver {
         int64_t sector_num, int nb_sectors);
     int coroutine_fn (*bdrv_co_discard)(BlockDriverState *bs,
         int64_t sector_num, int nb_sectors);
-    int coroutine_fn (*bdrv_co_is_allocated)(BlockDriverState *bs,
+    int64_t coroutine_fn (*bdrv_co_get_block_status)(BlockDriverState *bs,
         int64_t sector_num, int nb_sectors, int *pnum);
 
     /*
commit 11212d8fa08a4e5bd56bb4f6877f9a4c0439a02b
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Wed Sep 4 19:00:27 2013 +0200

    block: make bdrv_has_zero_init return false for copy-on-write-images
    
    This helps implementing is_allocated on top of get_block_status.
    
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/block.c b/block.c
index 5df17de..2a90893 100644
--- a/block.c
+++ b/block.c
@@ -3031,6 +3031,11 @@ int bdrv_has_zero_init(BlockDriverState *bs)
 {
     assert(bs->drv);
 
+    /* If BS is a copy on write image, it is initialized to
+       the contents of the base image, which may not be zeroes.  */
+    if (bs->backing_hd) {
+        return 0;
+    }
     if (bs->drv->bdrv_has_zero_init) {
         return bs->drv->bdrv_has_zero_init(bs);
     }
diff --git a/qemu-img.c b/qemu-img.c
index 5cc579d..b074fa7 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -1537,14 +1537,7 @@ static int img_convert(int argc, char **argv)
                should add a specific call to have the info to go faster */
             buf1 = buf;
             while (n > 0) {
-                /* If the output image is being created as a copy on write image,
-                   copy all sectors even the ones containing only NUL bytes,
-                   because they may differ from the sectors in the base image.
-
-                   If the output is to a host device, we also write out
-                   sectors that are entirely 0, since whatever data was
-                   already there is garbage, not 0s. */
-                if (!has_zero_init || out_baseimg ||
+                if (!has_zero_init ||
                     is_allocated_sectors_min(buf1, n, &n1, min_sparse)) {
                     ret = bdrv_write(out_bs, sector_num, buf1, n1);
                     if (ret < 0) {
commit e4a86f88cc6b214c37b4abe9160e41f0338ce4cd
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Wed Sep 4 19:00:26 2013 +0200

    qemu-img: always probe the input image for allocated sectors
    
    qemu-img convert can assume "that sectors which are unallocated in the
    input image are present in both the output's and input's base images".
    
    However it is only doing this if the output image returns true for
    bdrv_has_zero_init().  Testing bdrv_has_zero_init() does not make much
    sense if the output image is copy-on-write, because a copy-on-write
    image is never initialized to zero (it is initialized to the content
    of the backing file).
    
    There is nothing here that makes has_zero_init images special.  The
    input and output must be equal for the operation to make sense, and
    that's it.
    
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/qemu-img.c b/qemu-img.c
index 27cc006..5cc579d 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -1502,28 +1502,26 @@ static int img_convert(int argc, char **argv)
                 n = bs_offset + bs_sectors - sector_num;
             }
 
-            if (has_zero_init) {
-                /* If the output image is being created as a copy on write image,
-                   assume that sectors which are unallocated in the input image
-                   are present in both the output's and input's base images (no
-                   need to copy them). */
-                if (out_baseimg) {
-                    ret = bdrv_is_allocated(bs[bs_i], sector_num - bs_offset,
-                                            n, &n1);
-                    if (ret < 0) {
-                        error_report("error while reading metadata for sector "
-                                     "%" PRId64 ": %s",
-                                     sector_num - bs_offset, strerror(-ret));
-                        goto out;
-                    }
-                    if (!ret) {
-                        sector_num += n1;
-                        continue;
-                    }
-                    /* The next 'n1' sectors are allocated in the input image. Copy
-                       only those as they may be followed by unallocated sectors. */
-                    n = n1;
+            /* If the output image is being created as a copy on write image,
+               assume that sectors which are unallocated in the input image
+               are present in both the output's and input's base images (no
+               need to copy them). */
+            if (out_baseimg) {
+                ret = bdrv_is_allocated(bs[bs_i], sector_num - bs_offset,
+                                        n, &n1);
+                if (ret < 0) {
+                    error_report("error while reading metadata for sector "
+                                 "%" PRId64 ": %s",
+                                 sector_num - bs_offset, strerror(-ret));
+                    goto out;
+                }
+                if (!ret) {
+                    sector_num += n1;
+                    continue;
                 }
+                /* The next 'n1' sectors are allocated in the input image. Copy
+                   only those as they may be followed by unallocated sectors. */
+                n = n1;
             } else {
                 n1 = n;
             }
commit d663640c04f2aab810915c556390211d75457704
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Wed Sep 4 19:00:25 2013 +0200

    block: expect errors from bdrv_co_is_allocated
    
    Some bdrv_is_allocated callers do not expect errors, but the fallback
    in qcow2.c might make other callers trip on assertion failures or
    infinite loops.
    
    Fix the callers to always look for errors.
    
    Cc: qemu-stable at nongnu.org
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/block.c b/block.c
index c6404eb..5df17de 100644
--- a/block.c
+++ b/block.c
@@ -1857,8 +1857,11 @@ int bdrv_commit(BlockDriverState *bs)
     buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
 
     for (sector = 0; sector < total_sectors; sector += n) {
-        if (bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) {
-
+        ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
+        if (ret < 0) {
+            goto ro_cleanup;
+        }
+        if (ret) {
             if (bdrv_read(bs, sector, buf, n) != 0) {
                 ret = -EIO;
                 goto ro_cleanup;
diff --git a/block/cow.c b/block/cow.c
index f4eca10..7450801 100644
--- a/block/cow.c
+++ b/block/cow.c
@@ -212,7 +212,11 @@ static int coroutine_fn cow_read(BlockDriverState *bs, int64_t sector_num,
     int ret, n;
 
     while (nb_sectors > 0) {
-        if (cow_co_is_allocated(bs, sector_num, nb_sectors, &n)) {
+        ret = cow_co_is_allocated(bs, sector_num, nb_sectors, &n);
+        if (ret < 0) {
+            return ret;
+        }
+        if (ret) {
             ret = bdrv_pread(bs->file,
                         s->cow_sectors_offset + sector_num * 512,
                         buf, n * 512);
diff --git a/block/qcow2.c b/block/qcow2.c
index 4d7bd78..cf03a14 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -696,13 +696,11 @@ static int coroutine_fn qcow2_co_is_allocated(BlockDriverState *bs,
     int ret;
 
     *pnum = nb_sectors;
-    /* FIXME We can get errors here, but the bdrv_co_is_allocated interface
-     * can't pass them on today */
     qemu_co_mutex_lock(&s->lock);
     ret = qcow2_get_cluster_offset(bs, sector_num << 9, pnum, &cluster_offset);
     qemu_co_mutex_unlock(&s->lock);
     if (ret < 0) {
-        *pnum = 0;
+        return ret;
     }
 
     return (cluster_offset != 0) || (ret == QCOW2_CLUSTER_ZERO);
diff --git a/block/stream.c b/block/stream.c
index e640bc5..078ce4a 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -124,7 +124,7 @@ wait:
         if (ret == 1) {
             /* Allocated in the top, no need to copy.  */
             copy = false;
-        } else {
+        } else if (ret >= 0) {
             /* Copy if allocated in the intermediate images.  Limit to the
              * known-unallocated area [sector_num, sector_num+n).  */
             ret = bdrv_is_allocated_above(bs->backing_hd, base,
diff --git a/qemu-img.c b/qemu-img.c
index 744c0d9..27cc006 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -1508,8 +1508,15 @@ static int img_convert(int argc, char **argv)
                    are present in both the output's and input's base images (no
                    need to copy them). */
                 if (out_baseimg) {
-                    if (!bdrv_is_allocated(bs[bs_i], sector_num - bs_offset,
-                                           n, &n1)) {
+                    ret = bdrv_is_allocated(bs[bs_i], sector_num - bs_offset,
+                                            n, &n1);
+                    if (ret < 0) {
+                        error_report("error while reading metadata for sector "
+                                     "%" PRId64 ": %s",
+                                     sector_num - bs_offset, strerror(-ret));
+                        goto out;
+                    }
+                    if (!ret) {
                         sector_num += n1;
                         continue;
                     }
@@ -2099,6 +2106,11 @@ static int img_rebase(int argc, char **argv)
 
             /* If the cluster is allocated, we don't need to take action */
             ret = bdrv_is_allocated(bs, sector, n, &n);
+            if (ret < 0) {
+                error_report("error while reading image metadata: %s",
+                             strerror(-ret));
+                goto out;
+            }
             if (ret) {
                 continue;
             }
diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c
index f91b6c4..8565d49 100644
--- a/qemu-io-cmds.c
+++ b/qemu-io-cmds.c
@@ -1830,6 +1830,10 @@ static int alloc_f(BlockDriverState *bs, int argc, char **argv)
     sector_num = offset >> 9;
     while (remaining) {
         ret = bdrv_is_allocated(bs, sector_num, remaining, &num);
+        if (ret < 0) {
+            printf("is_allocated failed: %s\n", strerror(-ret));
+            return 0;
+        }
         sector_num += num;
         remaining -= num;
         if (ret) {
commit 4f5786376e41980e78af45a123c56ebdc5295099
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Wed Sep 4 19:00:24 2013 +0200

    block: remove bdrv_is_allocated_above/bdrv_co_is_allocated_above distinction
    
    Now that bdrv_is_allocated detects coroutine context, the two can
    use the same code.
    
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/block.c b/block.c
index 4a10fb1..c6404eb 100644
--- a/block.c
+++ b/block.c
@@ -3144,10 +3144,10 @@ int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
  *  allocated/unallocated state.
  *
  */
-int coroutine_fn bdrv_co_is_allocated_above(BlockDriverState *top,
-                                            BlockDriverState *base,
-                                            int64_t sector_num,
-                                            int nb_sectors, int *pnum)
+int bdrv_is_allocated_above(BlockDriverState *top,
+                            BlockDriverState *base,
+                            int64_t sector_num,
+                            int nb_sectors, int *pnum)
 {
     BlockDriverState *intermediate;
     int ret, n = nb_sectors;
@@ -3183,44 +3183,6 @@ int coroutine_fn bdrv_co_is_allocated_above(BlockDriverState *top,
     return 0;
 }
 
-/* Coroutine wrapper for bdrv_is_allocated_above() */
-static void coroutine_fn bdrv_is_allocated_above_co_entry(void *opaque)
-{
-    BdrvCoIsAllocatedData *data = opaque;
-    BlockDriverState *top = data->bs;
-    BlockDriverState *base = data->base;
-
-    data->ret = bdrv_co_is_allocated_above(top, base, data->sector_num,
-                                           data->nb_sectors, data->pnum);
-    data->done = true;
-}
-
-/*
- * Synchronous wrapper around bdrv_co_is_allocated_above().
- *
- * See bdrv_co_is_allocated_above() for details.
- */
-int bdrv_is_allocated_above(BlockDriverState *top, BlockDriverState *base,
-                            int64_t sector_num, int nb_sectors, int *pnum)
-{
-    Coroutine *co;
-    BdrvCoIsAllocatedData data = {
-        .bs = top,
-        .base = base,
-        .sector_num = sector_num,
-        .nb_sectors = nb_sectors,
-        .pnum = pnum,
-        .done = false,
-    };
-
-    co = qemu_coroutine_create(bdrv_is_allocated_above_co_entry);
-    qemu_coroutine_enter(co, &data);
-    while (!data.done) {
-        qemu_aio_wait();
-    }
-    return data.ret;
-}
-
 const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
 {
     if (bs->backing_hd && bs->backing_hd->encrypted)
diff --git a/block/commit.c b/block/commit.c
index 51a1ab3..ac4b7cc 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -108,9 +108,9 @@ wait:
             break;
         }
         /* Copy if allocated above the base */
-        ret = bdrv_co_is_allocated_above(top, base, sector_num,
-                                         COMMIT_BUFFER_SIZE / BDRV_SECTOR_SIZE,
-                                         &n);
+        ret = bdrv_is_allocated_above(top, base, sector_num,
+                                      COMMIT_BUFFER_SIZE / BDRV_SECTOR_SIZE,
+                                      &n);
         copy = (ret == 1);
         trace_commit_one_iteration(s, sector_num, n, ret);
         if (copy) {
diff --git a/block/mirror.c b/block/mirror.c
index 8dbc5f7..f61a779 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -338,8 +338,8 @@ static void coroutine_fn mirror_run(void *opaque)
         base = s->mode == MIRROR_SYNC_MODE_FULL ? NULL : bs->backing_hd;
         for (sector_num = 0; sector_num < end; ) {
             int64_t next = (sector_num | (sectors_per_chunk - 1)) + 1;
-            ret = bdrv_co_is_allocated_above(bs, base,
-                                             sector_num, next - sector_num, &n);
+            ret = bdrv_is_allocated_above(bs, base,
+                                          sector_num, next - sector_num, &n);
 
             if (ret < 0) {
                 goto immediate_exit;
diff --git a/block/stream.c b/block/stream.c
index fe242ba..e640bc5 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -127,8 +127,8 @@ wait:
         } else {
             /* Copy if allocated in the intermediate images.  Limit to the
              * known-unallocated area [sector_num, sector_num+n).  */
-            ret = bdrv_co_is_allocated_above(bs->backing_hd, base,
-                                             sector_num, n, &n);
+            ret = bdrv_is_allocated_above(bs->backing_hd, base,
+                                          sector_num, n, &n);
 
             /* Finish early if end of backing file has been reached */
             if (ret == 0 && n == 0) {
diff --git a/include/block/block.h b/include/block/block.h
index de75d2d..5469c94 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -179,10 +179,6 @@ int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
  */
 int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs, int64_t sector_num,
     int nb_sectors);
-int coroutine_fn bdrv_co_is_allocated_above(BlockDriverState *top,
-                                            BlockDriverState *base,
-                                            int64_t sector_num,
-                                            int nb_sectors, int *pnum);
 BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
     const char *backing_file);
 int bdrv_get_backing_file_depth(BlockDriverState *bs);
commit 617ccb466e1937a5c99332ce77a47ebd29861ae4
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Wed Sep 4 19:00:23 2013 +0200

    block: do not use ->total_sectors in bdrv_co_is_allocated
    
    This is more robust when the device has removable media.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/block.c b/block.c
index f27bab6..4a10fb1 100644
--- a/block.c
+++ b/block.c
@@ -3065,9 +3065,15 @@ static int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs,
                                              int64_t sector_num,
                                              int nb_sectors, int *pnum)
 {
+    int64_t length;
     int64_t n;
 
-    if (sector_num >= bs->total_sectors) {
+    length = bdrv_getlength(bs);
+    if (length < 0) {
+        return length;
+    }
+
+    if (sector_num >= (length >> BDRV_SECTOR_BITS)) {
         *pnum = 0;
         return 0;
     }
commit bdad13b9deec47d5d9eaf7f43867d19a79471244
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Wed Sep 4 19:00:22 2013 +0200

    block: make bdrv_co_is_allocated static
    
    bdrv_is_allocated can detect coroutine context and go through a fast
    path, similar to other block layer functions.
    
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/block.c b/block.c
index d9a3a7f..f27bab6 100644
--- a/block.c
+++ b/block.c
@@ -2587,7 +2587,7 @@ static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
     if (flags & BDRV_REQ_COPY_ON_READ) {
         int pnum;
 
-        ret = bdrv_co_is_allocated(bs, sector_num, nb_sectors, &pnum);
+        ret = bdrv_is_allocated(bs, sector_num, nb_sectors, &pnum);
         if (ret < 0) {
             goto out;
         }
@@ -3061,8 +3061,9 @@ typedef struct BdrvCoIsAllocatedData {
  * 'nb_sectors' is the max value 'pnum' should be set to.  If nb_sectors goes
  * beyond the end of the disk image it will be clamped.
  */
-int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t sector_num,
-                                      int nb_sectors, int *pnum)
+static int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs,
+                                             int64_t sector_num,
+                                             int nb_sectors, int *pnum)
 {
     int64_t n;
 
@@ -3112,10 +3113,15 @@ int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
         .done = false,
     };
 
-    co = qemu_coroutine_create(bdrv_is_allocated_co_entry);
-    qemu_coroutine_enter(co, &data);
-    while (!data.done) {
-        qemu_aio_wait();
+    if (qemu_in_coroutine()) {
+        /* Fast-path if already in coroutine context */
+        bdrv_is_allocated_co_entry(&data);
+    } else {
+        co = qemu_coroutine_create(bdrv_is_allocated_co_entry);
+        qemu_coroutine_enter(co, &data);
+        while (!data.done) {
+            qemu_aio_wait();
+        }
     }
     return data.ret;
 }
@@ -3143,8 +3149,8 @@ int coroutine_fn bdrv_co_is_allocated_above(BlockDriverState *top,
     intermediate = top;
     while (intermediate && intermediate != base) {
         int pnum_inter;
-        ret = bdrv_co_is_allocated(intermediate, sector_num, nb_sectors,
-                                   &pnum_inter);
+        ret = bdrv_is_allocated(intermediate, sector_num, nb_sectors,
+                                &pnum_inter);
         if (ret < 0) {
             return ret;
         } else if (ret) {
diff --git a/block/backup.c b/block/backup.c
index 47fb23f..04c4b5c 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -289,14 +289,14 @@ static void coroutine_fn backup_run(void *opaque)
                  * backing file. */
 
                 for (i = 0; i < BACKUP_SECTORS_PER_CLUSTER;) {
-                    /* bdrv_co_is_allocated() only returns true/false based
+                    /* bdrv_is_allocated() only returns true/false based
                      * on the first set of sectors it comes across that
                      * are are all in the same state.
                      * For that reason we must verify each sector in the
                      * backup cluster length.  We end up copying more than
                      * needed but at some point that is always the case. */
                     alloced =
-                        bdrv_co_is_allocated(bs,
+                        bdrv_is_allocated(bs,
                                 start * BACKUP_SECTORS_PER_CLUSTER + i,
                                 BACKUP_SECTORS_PER_CLUSTER - i, &n);
                     i += n;
diff --git a/block/raw_bsd.c b/block/raw_bsd.c
index ab2b0fd..926712e 100644
--- a/block/raw_bsd.c
+++ b/block/raw_bsd.c
@@ -62,7 +62,7 @@ static int coroutine_fn raw_co_is_allocated(BlockDriverState *bs,
                                             int64_t sector_num, int nb_sectors,
                                             int *pnum)
 {
-    return bdrv_co_is_allocated(bs->file, sector_num, nb_sectors, pnum);
+    return bdrv_is_allocated(bs->file, sector_num, nb_sectors, pnum);
 }
 
 static int coroutine_fn raw_co_write_zeroes(BlockDriverState *bs,
diff --git a/block/stream.c b/block/stream.c
index 7aa2500..fe242ba 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -119,8 +119,8 @@ wait:
             break;
         }
 
-        ret = bdrv_co_is_allocated(bs, sector_num,
-                                   STREAM_BUFFER_SIZE / BDRV_SECTOR_SIZE, &n);
+        ret = bdrv_is_allocated(bs, sector_num,
+                                STREAM_BUFFER_SIZE / BDRV_SECTOR_SIZE, &n);
         if (ret == 1) {
             /* Allocated in the top, no need to copy.  */
             copy = false;
diff --git a/include/block/block.h b/include/block/block.h
index 107f5a0..de75d2d 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -179,8 +179,6 @@ int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
  */
 int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs, int64_t sector_num,
     int nb_sectors);
-int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t sector_num,
-    int nb_sectors, int *pnum);
 int coroutine_fn bdrv_co_is_allocated_above(BlockDriverState *top,
                                             BlockDriverState *base,
                                             int64_t sector_num,
commit df2a6f29a5019707d69f6eeb30cf792841cae5aa
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Wed Sep 4 19:00:21 2013 +0200

    block: keep bs->total_sectors up to date even for growable block devices
    
    If a BlockDriverState is growable, after every write we need to
    check if bs->total_sectors might have changed.  With this change,
    bdrv_getlength does not need anymore a system call.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/block.c b/block.c
index 905bf34..d9a3a7f 100644
--- a/block.c
+++ b/block.c
@@ -2739,6 +2739,9 @@ static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
     if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
         bs->wr_highest_sector = sector_num + nb_sectors - 1;
     }
+    if (bs->growable && ret >= 0) {
+        bs->total_sectors = MAX(bs->total_sectors, sector_num + nb_sectors);
+    }
 
     tracked_request_end(&req);
 
@@ -2813,7 +2816,7 @@ int64_t bdrv_getlength(BlockDriverState *bs)
     if (!drv)
         return -ENOMEDIUM;
 
-    if (bs->growable || bdrv_dev_has_removable_media(bs)) {
+    if (bdrv_dev_has_removable_media(bs)) {
         if (drv->bdrv_getlength) {
             return drv->bdrv_getlength(bs);
         }
commit e641c1e81e54fc14908ff6785ef7d51e42c2e1bb
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Wed Sep 4 19:00:20 2013 +0200

    cow: do not call bdrv_co_is_allocated
    
    As we change bdrv_is_allocated to gather more information from bs and
    bs->file, it will become a bit slower.  It is still appropriate for online
    jobs, but not for reads/writes.  Call the internal function instead.
    
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/block/cow.c b/block/cow.c
index cd889e6..f4eca10 100644
--- a/block/cow.c
+++ b/block/cow.c
@@ -212,7 +212,7 @@ static int coroutine_fn cow_read(BlockDriverState *bs, int64_t sector_num,
     int ret, n;
 
     while (nb_sectors > 0) {
-        if (bdrv_co_is_allocated(bs, sector_num, nb_sectors, &n)) {
+        if (cow_co_is_allocated(bs, sector_num, nb_sectors, &n)) {
             ret = bdrv_pread(bs->file,
                         s->cow_sectors_offset + sector_num * 512,
                         buf, n * 512);
commit 26ae980492920e417bc91761cc85950f18e41f85
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Wed Sep 4 19:00:19 2013 +0200

    cow: make writes go at a less indecent speed
    
    Only sync once per write, rather than once per sector.
    
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/block/cow.c b/block/cow.c
index 4512806..cd889e6 100644
--- a/block/cow.c
+++ b/block/cow.c
@@ -106,7 +106,7 @@ static int cow_open(BlockDriverState *bs, QDict *options, int flags)
  * XXX(hch): right now these functions are extremely inefficient.
  * We should just read the whole bitmap we'll need in one go instead.
  */
-static inline int cow_set_bit(BlockDriverState *bs, int64_t bitnum)
+static inline int cow_set_bit(BlockDriverState *bs, int64_t bitnum, bool *first)
 {
     uint64_t offset = sizeof(struct cow_header_v2) + bitnum / 8;
     uint8_t bitmap;
@@ -117,9 +117,21 @@ static inline int cow_set_bit(BlockDriverState *bs, int64_t bitnum)
        return ret;
     }
 
+    if (bitmap & (1 << (bitnum % 8))) {
+        return 0;
+    }
+
+    if (*first) {
+        ret = bdrv_flush(bs->file);
+        if (ret < 0) {
+            return ret;
+        }
+        *first = false;
+    }
+
     bitmap |= (1 << (bitnum % 8));
 
-    ret = bdrv_pwrite_sync(bs->file, offset, &bitmap, sizeof(bitmap));
+    ret = bdrv_pwrite(bs->file, offset, &bitmap, sizeof(bitmap));
     if (ret < 0) {
        return ret;
     }
@@ -181,9 +193,10 @@ static int cow_update_bitmap(BlockDriverState *bs, int64_t sector_num,
 {
     int error = 0;
     int i;
+    bool first = true;
 
     for (i = 0; i < nb_sectors; i++) {
-        error = cow_set_bit(bs, sector_num + i);
+        error = cow_set_bit(bs, sector_num + i, &first);
         if (error) {
             break;
         }
commit 276cbc7f2fc1bd3810887995dbc9cbb739c975bf
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Wed Sep 4 19:00:18 2013 +0200

    cow: make reads go at a decent speed
    
    Do not do two reads for each sector; load each sector of the bitmap
    and use bitmap operations to process it.
    
    Writes are still dog slow!
    
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/block/cow.c b/block/cow.c
index 767639c..4512806 100644
--- a/block/cow.c
+++ b/block/cow.c
@@ -126,18 +126,31 @@ static inline int cow_set_bit(BlockDriverState *bs, int64_t bitnum)
     return 0;
 }
 
-static inline int is_bit_set(BlockDriverState *bs, int64_t bitnum)
+#define BITS_PER_BITMAP_SECTOR (512 * 8)
+
+/* Cannot use bitmap.c on big-endian machines.  */
+static int cow_test_bit(int64_t bitnum, const uint8_t *bitmap)
 {
-    uint64_t offset = sizeof(struct cow_header_v2) + bitnum / 8;
-    uint8_t bitmap;
-    int ret;
+    return (bitmap[bitnum / 8] & (1 << (bitnum & 7))) != 0;
+}
 
-    ret = bdrv_pread(bs->file, offset, &bitmap, sizeof(bitmap));
-    if (ret < 0) {
-       return ret;
+static int cow_find_streak(const uint8_t *bitmap, int value, int start, int nb_sectors)
+{
+    int streak_value = value ? 0xFF : 0;
+    int last = MIN(start + nb_sectors, BITS_PER_BITMAP_SECTOR);
+    int bitnum = start;
+    while (bitnum < last) {
+        if ((bitnum & 7) == 0 && bitmap[bitnum / 8] == streak_value) {
+            bitnum += 8;
+            continue;
+        }
+        if (cow_test_bit(bitnum, bitmap) == value) {
+            bitnum++;
+            continue;
+        }
+        break;
     }
-
-    return !!(bitmap & (1 << (bitnum % 8)));
+    return MIN(bitnum, last) - start;
 }
 
 /* Return true if first block has been changed (ie. current version is
@@ -146,23 +159,20 @@ static inline int is_bit_set(BlockDriverState *bs, int64_t bitnum)
 static int coroutine_fn cow_co_is_allocated(BlockDriverState *bs,
         int64_t sector_num, int nb_sectors, int *num_same)
 {
+    int64_t bitnum = sector_num + sizeof(struct cow_header_v2) * 8;
+    uint64_t offset = (bitnum / 8) & -BDRV_SECTOR_SIZE;
+    uint8_t bitmap[BDRV_SECTOR_SIZE];
+    int ret;
     int changed;
 
-    if (nb_sectors == 0) {
-	*num_same = nb_sectors;
-	return 0;
-    }
-
-    changed = is_bit_set(bs, sector_num);
-    if (changed < 0) {
-        return 0; /* XXX: how to return I/O errors? */
-    }
-
-    for (*num_same = 1; *num_same < nb_sectors; (*num_same)++) {
-	if (is_bit_set(bs, sector_num + *num_same) != changed)
-	    break;
+    ret = bdrv_pread(bs->file, offset, &bitmap, sizeof(bitmap));
+    if (ret < 0) {
+        return ret;
     }
 
+    bitnum &= BITS_PER_BITMAP_SECTOR - 1;
+    changed = cow_test_bit(bitnum, bitmap);
+    *num_same = cow_find_streak(bitmap, changed, bitnum, nb_sectors);
     return changed;
 }
 
commit 0ca0b0d5f8a87dbc6daa5095771d036d0e6dc5b4
Author: Max Reitz <mreitz at redhat.com>
Date:   Tue Sep 3 15:49:39 2013 +0200

    qmp: Documentation for BLOCK_IMAGE_CORRUPTED
    
    Add an appropriate entry describing this event and its parameters into
    qmp-events.txt.
    
    Signed-off-by: Max Reitz <mreitz at redhat.com>
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Reviewed-by: Kevin Wolf <kwolf at redhat.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/QMP/qmp-events.txt b/QMP/qmp-events.txt
index 39b6016..4b24ec9 100644
--- a/QMP/qmp-events.txt
+++ b/QMP/qmp-events.txt
@@ -18,6 +18,28 @@ Example:
     "data": { "actual": 944766976 },
     "timestamp": { "seconds": 1267020223, "microseconds": 435656 } }
 
+BLOCK_IMAGE_CORRUPTED
+---------------------
+
+Emitted when a disk image is being marked corrupt.
+
+Data:
+
+- "device": Device name (json-string)
+- "msg":    Informative message (e.g., reason for the corruption) (json-string)
+- "offset": If the corruption resulted from an image access, this is the access
+            offset into the image (json-int)
+- "size":   If the corruption resulted from an image access, this is the access
+            size (json-int)
+
+Example:
+
+{ "event": "BLOCK_IMAGE_CORRUPTED",
+    "data": { "device": "ide0-hd0",
+        "msg": "Prevented active L1 table overwrite", "offset": 196608,
+        "size": 65536 },
+    "timestamp": { "seconds": 1378126126, "microseconds": 966463 } }
+
 BLOCK_IO_ERROR
 --------------
 
commit fa510ebffa1fd509323e2e0888e369e23adbb508
Author: Fam Zheng <famz at redhat.com>
Date:   Fri Aug 23 09:14:51 2013 +0800

    block: use BDS ref for block jobs
    
    Block jobs used drive_get_ref(drive_get_by_blockdev(bs)) to avoid BDS
    being deleted. Now we have BDS reference count, and block jobs don't
    care about dinfo, so replace them to get cleaner code. It is also the
    safe way when BDS has no drive info.
    
    Signed-off-by: Fam Zheng <famz at redhat.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/blockdev.c b/blockdev.c
index 9616cc4..07dac05 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -234,32 +234,32 @@ void drive_get_ref(DriveInfo *dinfo)
 
 typedef struct {
     QEMUBH *bh;
-    DriveInfo *dinfo;
-} DrivePutRefBH;
+    BlockDriverState *bs;
+} BDRVPutRefBH;
 
-static void drive_put_ref_bh(void *opaque)
+static void bdrv_put_ref_bh(void *opaque)
 {
-    DrivePutRefBH *s = opaque;
+    BDRVPutRefBH *s = opaque;
 
-    drive_put_ref(s->dinfo);
+    bdrv_unref(s->bs);
     qemu_bh_delete(s->bh);
     g_free(s);
 }
 
 /*
- * Release a drive reference in a BH
+ * Release a BDS reference in a BH
  *
- * It is not possible to use drive_put_ref() from a callback function when the
- * callers still need the drive.  In such cases we schedule a BH to release the
- * reference.
+ * It is not safe to use bdrv_unref() from a callback function when the callers
+ * still need the BlockDriverState.  In such cases we schedule a BH to release
+ * the reference.
  */
-static void drive_put_ref_bh_schedule(DriveInfo *dinfo)
+static void bdrv_put_ref_bh_schedule(BlockDriverState *bs)
 {
-    DrivePutRefBH *s;
+    BDRVPutRefBH *s;
 
-    s = g_new(DrivePutRefBH, 1);
-    s->bh = qemu_bh_new(drive_put_ref_bh, s);
-    s->dinfo = dinfo;
+    s = g_new(BDRVPutRefBH, 1);
+    s->bh = qemu_bh_new(bdrv_put_ref_bh, s);
+    s->bs = bs;
     qemu_bh_schedule(s->bh);
 }
 
@@ -1436,7 +1436,7 @@ static void block_job_cb(void *opaque, int ret)
     }
     qobject_decref(obj);
 
-    drive_put_ref_bh_schedule(drive_get_by_blockdev(bs));
+    bdrv_put_ref_bh_schedule(bs);
 }
 
 void qmp_block_stream(const char *device, bool has_base,
@@ -1473,11 +1473,6 @@ void qmp_block_stream(const char *device, bool has_base,
         return;
     }
 
-    /* Grab a reference so hotplug does not delete the BlockDriverState from
-     * underneath us.
-     */
-    drive_get_ref(drive_get_by_blockdev(bs));
-
     trace_qmp_block_stream(bs, bs->job);
 }
 
@@ -1534,10 +1529,6 @@ void qmp_block_commit(const char *device,
         error_propagate(errp, local_err);
         return;
     }
-    /* Grab a reference so hotplug does not delete the BlockDriverState from
-     * underneath us.
-     */
-    drive_get_ref(drive_get_by_blockdev(bs));
 }
 
 void qmp_drive_backup(const char *device, const char *target,
@@ -1650,11 +1641,6 @@ void qmp_drive_backup(const char *device, const char *target,
         error_propagate(errp, local_err);
         return;
     }
-
-    /* Grab a reference so hotplug does not delete the BlockDriverState from
-     * underneath us.
-     */
-    drive_get_ref(drive_get_by_blockdev(bs));
 }
 
 #define DEFAULT_MIRROR_BUF_SIZE   (10 << 20)
@@ -1791,11 +1777,6 @@ void qmp_drive_mirror(const char *device, const char *target,
         error_propagate(errp, local_err);
         return;
     }
-
-    /* Grab a reference so hotplug does not delete the BlockDriverState from
-     * underneath us.
-     */
-    drive_get_ref(drive_get_by_blockdev(bs));
 }
 
 static BlockJob *find_block_job(const char *device)
diff --git a/blockjob.c b/blockjob.c
index 7edc945..e7d49b7 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -45,6 +45,7 @@ void *block_job_create(const BlockJobType *job_type, BlockDriverState *bs,
         error_set(errp, QERR_DEVICE_IN_USE, bdrv_get_device_name(bs));
         return NULL;
     }
+    bdrv_ref(bs);
     bdrv_set_in_use(bs, 1);
 
     job = g_malloc0(job_type->instance_size);
commit 38b54b6dc18ae7e2a32e5ae38b312fb13f0584c8
Author: Fam Zheng <famz at redhat.com>
Date:   Fri Aug 23 09:14:50 2013 +0800

    nbd: use BlockDriverState refcnt
    
    Previously, nbd calls drive_get_ref() on the drive of bs. A BDS doesn't
    always have associated dinfo, which nbd doesn't care either. We already
    have BDS ref count, so use it to make it safe for a BDS w/o blockdev.
    
    Signed-off-by: Fam Zheng <famz at redhat.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/blockdev-nbd.c b/blockdev-nbd.c
index 95f10c8..922cf56 100644
--- a/blockdev-nbd.c
+++ b/blockdev-nbd.c
@@ -69,12 +69,6 @@ static void nbd_close_notifier(Notifier *n, void *data)
     g_free(cn);
 }
 
-static void nbd_server_put_ref(NBDExport *exp)
-{
-    BlockDriverState *bs = nbd_export_get_blockdev(exp);
-    drive_put_ref(drive_get_by_blockdev(bs));
-}
-
 void qmp_nbd_server_add(const char *device, bool has_writable, bool writable,
                         Error **errp)
 {
@@ -105,11 +99,9 @@ void qmp_nbd_server_add(const char *device, bool has_writable, bool writable,
         writable = false;
     }
 
-    exp = nbd_export_new(bs, 0, -1, writable ? 0 : NBD_FLAG_READ_ONLY,
-                         nbd_server_put_ref);
+    exp = nbd_export_new(bs, 0, -1, writable ? 0 : NBD_FLAG_READ_ONLY, NULL);
 
     nbd_export_set_name(exp, device);
-    drive_get_ref(drive_get_by_blockdev(bs));
 
     n = g_malloc0(sizeof(NBDCloseNotifier));
     n->n.notify = nbd_close_notifier;
diff --git a/nbd.c b/nbd.c
index 0fd0583..f847940 100644
--- a/nbd.c
+++ b/nbd.c
@@ -882,6 +882,7 @@ NBDExport *nbd_export_new(BlockDriverState *bs, off_t dev_offset,
     exp->nbdflags = nbdflags;
     exp->size = size == -1 ? bdrv_getlength(bs) : size;
     exp->close = close;
+    bdrv_ref(bs);
     return exp;
 }
 
@@ -928,6 +929,10 @@ void nbd_export_close(NBDExport *exp)
     }
     nbd_export_set_name(exp, NULL);
     nbd_export_put(exp);
+    if (exp->bs) {
+        bdrv_unref(exp->bs);
+        exp->bs = NULL;
+    }
 }
 
 void nbd_export_get(NBDExport *exp)
commit c0777fe18b08c3a6de0c53325d6b9b75ebb9de9a
Author: Fam Zheng <famz at redhat.com>
Date:   Fri Aug 23 09:14:49 2013 +0800

    xen_disk: simplify blk_disconnect with refcnt
    
    We call bdrv_attach_dev when initializing whether or not bs is created
    locally, so call bdrv_detach_dev and let the refcnt handle the
    lifecycle.
    
    Signed-off-by: Fam Zheng <famz at redhat.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c
index 8bfa04e..668cc06 100644
--- a/hw/block/xen_disk.c
+++ b/hw/block/xen_disk.c
@@ -824,6 +824,9 @@ static int blk_connect(struct XenDevice *xendev)
         /* setup via qemu cmdline -> already setup for us */
         xen_be_printf(&blkdev->xendev, 2, "get configured bdrv (cmdline setup)\n");
         blkdev->bs = blkdev->dinfo->bdrv;
+        /* blkdev->bs is not create by us, we get a reference
+         * so we can bdrv_unref() unconditionally */
+        bdrv_ref(blkdev->bs);
     }
     bdrv_attach_dev_nofail(blkdev->bs, blkdev);
     blkdev->file_size = bdrv_getlength(blkdev->bs);
@@ -922,12 +925,8 @@ static void blk_disconnect(struct XenDevice *xendev)
     struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev);
 
     if (blkdev->bs) {
-        if (!blkdev->dinfo) {
-            /* close/delete only if we created it ourself */
-            bdrv_close(blkdev->bs);
-            bdrv_detach_dev(blkdev->bs, blkdev);
-            bdrv_unref(blkdev->bs);
-        }
+        bdrv_detach_dev(blkdev->bs, blkdev);
+        bdrv_unref(blkdev->bs);
         blkdev->bs = NULL;
     }
     xen_be_unbind_evtchn(&blkdev->xendev);
commit 8442cfd0346de849264494c5adedbac12553867e
Author: Fam Zheng <famz at redhat.com>
Date:   Fri Aug 23 09:14:48 2013 +0800

    migration: omit drive ref as we have bdrv_ref now
    
    block-migration.c does not actually use DriveInfo anywhere.  Hence it's
    safe to drive ref code, we really only care about referencing BDS.
    
    Signed-off-by: Fam Zheng <famz at redhat.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/block-migration.c b/block-migration.c
index f803f20..daf9ec1 100644
--- a/block-migration.c
+++ b/block-migration.c
@@ -336,8 +336,8 @@ static void init_blk_migration_it(void *opaque, BlockDriverState *bs)
         bmds->completed_sectors = 0;
         bmds->shared_base = block_mig_state.shared_base;
         alloc_aio_bitmap(bmds);
-        drive_get_ref(drive_get_by_blockdev(bs));
         bdrv_set_in_use(bs, 1);
+        bdrv_ref(bs);
 
         block_mig_state.total_sector_sum += sectors;
 
@@ -575,7 +575,7 @@ static void blk_mig_cleanup(void)
     while ((bmds = QSIMPLEQ_FIRST(&block_mig_state.bmds_list)) != NULL) {
         QSIMPLEQ_REMOVE_HEAD(&block_mig_state.bmds_list, entry);
         bdrv_set_in_use(bmds->bs, 0);
-        drive_put_ref(drive_get_by_blockdev(bmds->bs));
+        bdrv_unref(bmds->bs);
         g_free(bmds->aio_bitmap);
         g_free(bmds);
     }
commit 4f6fd3491cf0f768b135ed2e242bd1d1d2a2efec
Author: Fam Zheng <famz at redhat.com>
Date:   Fri Aug 23 09:14:47 2013 +0800

    block: make bdrv_delete() static
    
    Manage BlockDriverState lifecycle with refcnt, so bdrv_delete() is no
    longer public and should be called by bdrv_unref() if refcnt is
    decreased to 0.
    
    This is an identical change because effectively, there's no multiple
    reference of BDS now: no caller of bdrv_ref() yet, only bdrv_new() sets
    bs->refcnt to 1, so all bdrv_unref() now actually delete the BDS.
    
    Signed-off-by: Fam Zheng <famz at redhat.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/block.c b/block.c
index a2b6003..905bf34 100644
--- a/block.c
+++ b/block.c
@@ -903,7 +903,7 @@ fail:
     if (!bs->drv) {
         QDECREF(bs->options);
     }
-    bdrv_delete(bs);
+    bdrv_unref(bs);
     return ret;
 }
 
@@ -954,7 +954,7 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *options)
                     *backing_filename ? backing_filename : NULL, options,
                     back_flags, back_drv);
     if (ret < 0) {
-        bdrv_delete(bs->backing_hd);
+        bdrv_unref(bs->backing_hd);
         bs->backing_hd = NULL;
         bs->open_flags |= BDRV_O_NO_BACKING;
         return ret;
@@ -1029,12 +1029,12 @@ int bdrv_open(BlockDriverState *bs, const char *filename, QDict *options,
         bs1 = bdrv_new("");
         ret = bdrv_open(bs1, filename, NULL, 0, drv);
         if (ret < 0) {
-            bdrv_delete(bs1);
+            bdrv_unref(bs1);
             goto fail;
         }
         total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
 
-        bdrv_delete(bs1);
+        bdrv_unref(bs1);
 
         ret = get_tmp_filename(tmp_filename, sizeof(tmp_filename));
         if (ret < 0) {
@@ -1108,7 +1108,7 @@ int bdrv_open(BlockDriverState *bs, const char *filename, QDict *options,
     }
 
     if (bs->file != file) {
-        bdrv_delete(file);
+        bdrv_unref(file);
         file = NULL;
     }
 
@@ -1143,7 +1143,7 @@ int bdrv_open(BlockDriverState *bs, const char *filename, QDict *options,
 
 unlink_and_fail:
     if (file != NULL) {
-        bdrv_delete(file);
+        bdrv_unref(file);
     }
     if (bs->is_temporary) {
         unlink(filename);
@@ -1404,7 +1404,7 @@ void bdrv_close(BlockDriverState *bs)
 
     if (bs->drv) {
         if (bs->backing_hd) {
-            bdrv_delete(bs->backing_hd);
+            bdrv_unref(bs->backing_hd);
             bs->backing_hd = NULL;
         }
         bs->drv->bdrv_close(bs);
@@ -1429,7 +1429,7 @@ void bdrv_close(BlockDriverState *bs)
         bs->options = NULL;
 
         if (bs->file != NULL) {
-            bdrv_delete(bs->file);
+            bdrv_unref(bs->file);
             bs->file = NULL;
         }
     }
@@ -1653,11 +1653,12 @@ void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
             bs_new->drv ? bs_new->drv->format_name : "");
 }
 
-void bdrv_delete(BlockDriverState *bs)
+static void bdrv_delete(BlockDriverState *bs)
 {
     assert(!bs->dev);
     assert(!bs->job);
     assert(!bs->in_use);
+    assert(!bs->refcnt);
 
     bdrv_close(bs);
 
@@ -2173,7 +2174,7 @@ int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
     QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
         /* so that bdrv_close() does not recursively close the chain */
         intermediate_state->bs->backing_hd = NULL;
-        bdrv_delete(intermediate_state->bs);
+        bdrv_unref(intermediate_state->bs);
     }
     ret = 0;
 
@@ -4531,7 +4532,7 @@ out:
     free_option_parameters(param);
 
     if (bs) {
-        bdrv_delete(bs);
+        bdrv_unref(bs);
     }
 }
 
diff --git a/block/backup.c b/block/backup.c
index 23c7264..47fb23f 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -338,7 +338,7 @@ static void coroutine_fn backup_run(void *opaque)
     hbitmap_free(job->bitmap);
 
     bdrv_iostatus_disable(target);
-    bdrv_delete(target);
+    bdrv_unref(target);
 
     block_job_completed(&job->common, ret);
 }
diff --git a/block/blkverify.c b/block/blkverify.c
index 1d58cc3..c4e961e 100644
--- a/block/blkverify.c
+++ b/block/blkverify.c
@@ -155,7 +155,7 @@ static int blkverify_open(BlockDriverState *bs, QDict *options, int flags)
     s->test_file = bdrv_new("");
     ret = bdrv_open(s->test_file, filename, NULL, flags, NULL);
     if (ret < 0) {
-        bdrv_delete(s->test_file);
+        bdrv_unref(s->test_file);
         s->test_file = NULL;
         goto fail;
     }
@@ -169,7 +169,7 @@ static void blkverify_close(BlockDriverState *bs)
 {
     BDRVBlkverifyState *s = bs->opaque;
 
-    bdrv_delete(s->test_file);
+    bdrv_unref(s->test_file);
     s->test_file = NULL;
 }
 
diff --git a/block/cow.c b/block/cow.c
index 1cc2e89..767639c 100644
--- a/block/cow.c
+++ b/block/cow.c
@@ -314,7 +314,7 @@ static int cow_create(const char *filename, QEMUOptionParameter *options)
     }
 
 exit:
-    bdrv_delete(cow_bs);
+    bdrv_unref(cow_bs);
     return ret;
 }
 
diff --git a/block/iscsi.c b/block/iscsi.c
index b2be147..813abd8 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -1286,7 +1286,7 @@ out:
     }
     g_free(bs->opaque);
     bs->opaque = NULL;
-    bdrv_delete(bs);
+    bdrv_unref(bs);
     return ret;
 }
 
diff --git a/block/mirror.c b/block/mirror.c
index 86de458..8dbc5f7 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -480,7 +480,7 @@ immediate_exit:
         bdrv_swap(s->target, s->common.bs);
     }
     bdrv_close(s->target);
-    bdrv_delete(s->target);
+    bdrv_unref(s->target);
     block_job_completed(&s->common, ret);
 }
 
diff --git a/block/qcow.c b/block/qcow.c
index 5239bd6..6b891ac 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -751,7 +751,7 @@ static int qcow_create(const char *filename, QEMUOptionParameter *options)
     g_free(tmp);
     ret = 0;
 exit:
-    bdrv_delete(qcow_bs);
+    bdrv_unref(qcow_bs);
     return ret;
 }
 
diff --git a/block/qcow2.c b/block/qcow2.c
index 4bc679a..4d7bd78 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -1452,7 +1452,7 @@ static int qcow2_create2(const char *filename, int64_t total_size,
 
     ret = 0;
 out:
-    bdrv_delete(bs);
+    bdrv_unref(bs);
     return ret;
 }
 
diff --git a/block/qed.c b/block/qed.c
index cc904c4..9712a42 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -599,7 +599,7 @@ static int qed_create(const char *filename, uint32_t cluster_size,
     ret = 0; /* success */
 out:
     g_free(l1_table);
-    bdrv_delete(bs);
+    bdrv_unref(bs);
     return ret;
 }
 
diff --git a/block/sheepdog.c b/block/sheepdog.c
index 1ad4d07..36c3cc8 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -1430,7 +1430,7 @@ static int sd_prealloc(const char *filename)
     }
 out:
     if (bs) {
-        bdrv_delete(bs);
+        bdrv_unref(bs);
     }
     g_free(buf);
 
@@ -1509,13 +1509,13 @@ static int sd_create(const char *filename, QEMUOptionParameter *options)
 
         if (!is_snapshot(&s->inode)) {
             error_report("cannot clone from a non snapshot vdi");
-            bdrv_delete(bs);
+            bdrv_unref(bs);
             ret = -EINVAL;
             goto out;
         }
 
         base_vid = s->inode.vdi_id;
-        bdrv_delete(bs);
+        bdrv_unref(bs);
     }
 
     ret = do_sd_create(s, vdi, vdi_size, base_vid, &vid, 0);
diff --git a/block/snapshot.c b/block/snapshot.c
index 6c6d9de..8f61cc0 100644
--- a/block/snapshot.c
+++ b/block/snapshot.c
@@ -99,7 +99,7 @@ int bdrv_snapshot_goto(BlockDriverState *bs,
         ret = bdrv_snapshot_goto(bs->file, snapshot_id);
         open_ret = drv->bdrv_open(bs, NULL, bs->open_flags);
         if (open_ret < 0) {
-            bdrv_delete(bs->file);
+            bdrv_unref(bs->file);
             bs->drv = NULL;
             return open_ret;
         }
diff --git a/block/stream.c b/block/stream.c
index 9982125..7aa2500 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -73,7 +73,7 @@ static void close_unused_images(BlockDriverState *top, BlockDriverState *base,
         unused = intermediate;
         intermediate = intermediate->backing_hd;
         unused->backing_hd = NULL;
-        bdrv_delete(unused);
+        bdrv_unref(unused);
     }
 }
 
diff --git a/block/vmdk.c b/block/vmdk.c
index 63b489d..9f82c45 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -216,7 +216,7 @@ static void vmdk_free_extents(BlockDriverState *bs)
         g_free(e->l2_cache);
         g_free(e->l1_backup_table);
         if (e->file != bs->file) {
-            bdrv_delete(e->file);
+            bdrv_unref(e->file);
         }
     }
     g_free(s->extents);
@@ -746,7 +746,7 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
             /* SPARSE extent and VMFSSPARSE extent are both "COWD" sparse file*/
             ret = vmdk_open_sparse(bs, extent_file, bs->open_flags);
             if (ret) {
-                bdrv_delete(extent_file);
+                bdrv_unref(extent_file);
                 return ret;
             }
         } else {
@@ -1636,15 +1636,15 @@ static int vmdk_create(const char *filename, QEMUOptionParameter *options)
         BlockDriverState *bs = bdrv_new("");
         ret = bdrv_open(bs, backing_file, NULL, 0, NULL);
         if (ret != 0) {
-            bdrv_delete(bs);
+            bdrv_unref(bs);
             return ret;
         }
         if (strcmp(bs->drv->format_name, "vmdk")) {
-            bdrv_delete(bs);
+            bdrv_unref(bs);
             return -EINVAL;
         }
         parent_cid = vmdk_read_cid(bs, 0);
-        bdrv_delete(bs);
+        bdrv_unref(bs);
         snprintf(parent_desc_line, sizeof(parent_desc_line),
                 "parentFileNameHint=\"%s\"", backing_file);
     }
diff --git a/block/vvfat.c b/block/vvfat.c
index a827d91..2178a13 100644
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -2894,7 +2894,7 @@ static int write_target_commit(BlockDriverState *bs, int64_t sector_num,
 
 static void write_target_close(BlockDriverState *bs) {
     BDRVVVFATState* s = *((BDRVVVFATState**) bs->opaque);
-    bdrv_delete(s->qcow);
+    bdrv_unref(s->qcow);
     g_free(s->qcow_filename);
 }
 
@@ -2935,7 +2935,7 @@ static int enable_write_target(BDRVVVFATState *s)
     ret = bdrv_open(s->qcow, s->qcow_filename, NULL,
             BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH, bdrv_qcow);
     if (ret < 0) {
-        bdrv_delete(s->qcow);
+        bdrv_unref(s->qcow);
         goto err;
     }
 
diff --git a/blockdev.c b/blockdev.c
index fe2f318..9616cc4 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -212,7 +212,7 @@ static void bdrv_format_print(void *opaque, const char *name)
 static void drive_uninit(DriveInfo *dinfo)
 {
     qemu_opts_del(dinfo->opts);
-    bdrv_delete(dinfo->bdrv);
+    bdrv_unref(dinfo->bdrv);
     g_free(dinfo->id);
     QTAILQ_REMOVE(&drives, dinfo, next);
     g_free(dinfo->serial);
@@ -735,7 +735,7 @@ static DriveInfo *blockdev_init(QemuOpts *all_opts,
 err:
     qemu_opts_del(opts);
     QDECREF(bs_opts);
-    bdrv_delete(dinfo->bdrv);
+    bdrv_unref(dinfo->bdrv);
     g_free(dinfo->id);
     QTAILQ_REMOVE(&drives, dinfo, next);
     g_free(dinfo);
@@ -996,7 +996,7 @@ static void external_snapshot_abort(BlkTransactionState *common)
     ExternalSnapshotState *state =
                              DO_UPCAST(ExternalSnapshotState, common, common);
     if (state->new_bs) {
-        bdrv_delete(state->new_bs);
+        bdrv_unref(state->new_bs);
     }
 }
 
@@ -1638,7 +1638,7 @@ void qmp_drive_backup(const char *device, const char *target,
     target_bs = bdrv_new("");
     ret = bdrv_open(target_bs, target, NULL, flags, drv);
     if (ret < 0) {
-        bdrv_delete(target_bs);
+        bdrv_unref(target_bs);
         error_setg_file_open(errp, -ret, target);
         return;
     }
@@ -1646,7 +1646,7 @@ void qmp_drive_backup(const char *device, const char *target,
     backup_start(bs, target_bs, speed, sync, on_source_error, on_target_error,
                  block_job_cb, bs, &local_err);
     if (local_err != NULL) {
-        bdrv_delete(target_bs);
+        bdrv_unref(target_bs);
         error_propagate(errp, local_err);
         return;
     }
@@ -1778,7 +1778,7 @@ void qmp_drive_mirror(const char *device, const char *target,
     target_bs = bdrv_new("");
     ret = bdrv_open(target_bs, target, NULL, flags | BDRV_O_NO_BACKING, drv);
     if (ret < 0) {
-        bdrv_delete(target_bs);
+        bdrv_unref(target_bs);
         error_setg_file_open(errp, -ret, target);
         return;
     }
@@ -1787,7 +1787,7 @@ void qmp_drive_mirror(const char *device, const char *target,
                  on_source_error, on_target_error,
                  block_job_cb, bs, &local_err);
     if (local_err != NULL) {
-        bdrv_delete(target_bs);
+        bdrv_unref(target_bs);
         error_propagate(errp, local_err);
         return;
     }
diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c
index 727f433..8bfa04e 100644
--- a/hw/block/xen_disk.c
+++ b/hw/block/xen_disk.c
@@ -813,7 +813,7 @@ static int blk_connect(struct XenDevice *xendev)
                                                            readonly);
             if (bdrv_open(blkdev->bs,
                           blkdev->filename, NULL, qflags, drv) != 0) {
-                bdrv_delete(blkdev->bs);
+                bdrv_unref(blkdev->bs);
                 blkdev->bs = NULL;
             }
         }
@@ -926,7 +926,7 @@ static void blk_disconnect(struct XenDevice *xendev)
             /* close/delete only if we created it ourself */
             bdrv_close(blkdev->bs);
             bdrv_detach_dev(blkdev->bs, blkdev);
-            bdrv_delete(blkdev->bs);
+            bdrv_unref(blkdev->bs);
         }
         blkdev->bs = NULL;
     }
diff --git a/include/block/block.h b/include/block/block.h
index f24e1ff..107f5a0 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -122,7 +122,6 @@ BlockDriverState *bdrv_new(const char *device_name);
 void bdrv_make_anon(BlockDriverState *bs);
 void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old);
 void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top);
-void bdrv_delete(BlockDriverState *bs);
 int bdrv_parse_cache_flags(const char *mode, int *flags);
 int bdrv_parse_discard_flags(const char *mode, int *flags);
 int bdrv_file_open(BlockDriverState **pbs, const char *filename,
diff --git a/qemu-img.c b/qemu-img.c
index e790d6a..744c0d9 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -300,7 +300,7 @@ static BlockDriverState *bdrv_new_open(const char *filename,
     return bs;
 fail:
     if (bs) {
-        bdrv_delete(bs);
+        bdrv_unref(bs);
     }
     return NULL;
 }
@@ -654,7 +654,7 @@ static int img_check(int argc, char **argv)
 
 fail:
     qapi_free_ImageCheck(check);
-    bdrv_delete(bs);
+    bdrv_unref(bs);
 
     return ret;
 }
@@ -724,7 +724,7 @@ static int img_commit(int argc, char **argv)
         break;
     }
 
-    bdrv_delete(bs);
+    bdrv_unref(bs);
     if (ret) {
         return 1;
     }
@@ -1106,11 +1106,11 @@ static int img_compare(int argc, char **argv)
     ret = 0;
 
 out:
-    bdrv_delete(bs2);
+    bdrv_unref(bs2);
     qemu_vfree(buf1);
     qemu_vfree(buf2);
 out2:
-    bdrv_delete(bs1);
+    bdrv_unref(bs1);
 out3:
     qemu_progress_end();
     return ret;
@@ -1561,12 +1561,12 @@ out:
     free_option_parameters(param);
     qemu_vfree(buf);
     if (out_bs) {
-        bdrv_delete(out_bs);
+        bdrv_unref(out_bs);
     }
     if (bs) {
         for (bs_i = 0; bs_i < bs_n; bs_i++) {
             if (bs[bs_i]) {
-                bdrv_delete(bs[bs_i]);
+                bdrv_unref(bs[bs_i]);
             }
         }
         g_free(bs);
@@ -1704,7 +1704,7 @@ static ImageInfoList *collect_image_info_list(const char *filename,
         *last = elem;
         last = &elem->next;
 
-        bdrv_delete(bs);
+        bdrv_unref(bs);
 
         filename = fmt = NULL;
         if (chain) {
@@ -1918,7 +1918,7 @@ static int img_snapshot(int argc, char **argv)
     }
 
     /* Cleanup */
-    bdrv_delete(bs);
+    bdrv_unref(bs);
     if (ret) {
         return 1;
     }
@@ -2193,14 +2193,14 @@ out:
     /* Cleanup */
     if (!unsafe) {
         if (bs_old_backing != NULL) {
-            bdrv_delete(bs_old_backing);
+            bdrv_unref(bs_old_backing);
         }
         if (bs_new_backing != NULL) {
-            bdrv_delete(bs_new_backing);
+            bdrv_unref(bs_new_backing);
         }
     }
 
-    bdrv_delete(bs);
+    bdrv_unref(bs);
     if (ret) {
         return 1;
     }
@@ -2323,7 +2323,7 @@ static int img_resize(int argc, char **argv)
     }
 out:
     if (bs) {
-        bdrv_delete(bs);
+        bdrv_unref(bs);
     }
     if (ret) {
         return 1;
diff --git a/qemu-io.c b/qemu-io.c
index d54dc86..71f4ff1 100644
--- a/qemu-io.c
+++ b/qemu-io.c
@@ -32,7 +32,7 @@ static char **cmdline;
 
 static int close_f(BlockDriverState *bs, int argc, char **argv)
 {
-    bdrv_delete(bs);
+    bdrv_unref(bs);
     qemuio_bs = NULL;
     return 0;
 }
@@ -61,7 +61,7 @@ static int openfile(char *name, int flags, int growable)
 
         if (bdrv_open(qemuio_bs, name, NULL, flags, NULL) < 0) {
             fprintf(stderr, "%s: can't open device %s\n", progname, name);
-            bdrv_delete(qemuio_bs);
+            bdrv_unref(qemuio_bs);
             qemuio_bs = NULL;
             return 1;
         }
@@ -422,7 +422,7 @@ int main(int argc, char **argv)
     bdrv_drain_all();
 
     if (qemuio_bs) {
-        bdrv_delete(qemuio_bs);
+        bdrv_unref(qemuio_bs);
     }
     return 0;
 }
commit 9fcb025146676ab376e6159b58f5a5ddb67bf03c
Author: Fam Zheng <famz at redhat.com>
Date:   Fri Aug 23 09:14:46 2013 +0800

    block: implement reference count for BlockDriverState
    
    Introduce bdrv_ref/bdrv_unref to manage the lifecycle of
    BlockDriverState. They are unused for now but will used to replace
    bdrv_delete() later.
    
    Signed-off-by: Fam Zheng <famz at redhat.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/block.c b/block.c
index 0292d1d..a2b6003 100644
--- a/block.c
+++ b/block.c
@@ -332,6 +332,7 @@ BlockDriverState *bdrv_new(const char *device_name)
     notifier_with_return_list_init(&bs->before_write_notifiers);
     qemu_co_queue_init(&bs->throttled_reqs[0]);
     qemu_co_queue_init(&bs->throttled_reqs[1]);
+    bs->refcnt = 1;
 
     return bs;
 }
@@ -1566,6 +1567,9 @@ static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
     /* dirty bitmap */
     bs_dest->dirty_bitmap       = bs_src->dirty_bitmap;
 
+    /* reference count */
+    bs_dest->refcnt             = bs_src->refcnt;
+
     /* job */
     bs_dest->in_use             = bs_src->in_use;
     bs_dest->job                = bs_src->job;
@@ -4297,6 +4301,23 @@ int64_t bdrv_get_dirty_count(BlockDriverState *bs)
     }
 }
 
+/* Get a reference to bs */
+void bdrv_ref(BlockDriverState *bs)
+{
+    bs->refcnt++;
+}
+
+/* Release a previously grabbed reference to bs.
+ * If after releasing, reference count is zero, the BlockDriverState is
+ * deleted. */
+void bdrv_unref(BlockDriverState *bs)
+{
+    assert(bs->refcnt > 0);
+    if (--bs->refcnt == 0) {
+        bdrv_delete(bs);
+    }
+}
+
 void bdrv_set_in_use(BlockDriverState *bs, int in_use)
 {
     assert(bs->in_use != in_use);
diff --git a/include/block/block.h b/include/block/block.h
index 6207ff2..f24e1ff 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -355,6 +355,8 @@ int64_t bdrv_get_dirty_count(BlockDriverState *bs);
 void bdrv_enable_copy_on_read(BlockDriverState *bs);
 void bdrv_disable_copy_on_read(BlockDriverState *bs);
 
+void bdrv_ref(BlockDriverState *bs);
+void bdrv_unref(BlockDriverState *bs);
 void bdrv_set_in_use(BlockDriverState *bs, int in_use);
 int bdrv_in_use(BlockDriverState *bs);
 
diff --git a/include/block/block_int.h b/include/block/block_int.h
index c3c9c61..b9212b8 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -277,6 +277,7 @@ struct BlockDriverState {
     BlockDeviceIoStatus iostatus;
     char device_name[32];
     HBitmap *dirty_bitmap;
+    int refcnt;
     int in_use; /* users other than guest access, eg. block migration */
     QTAILQ_ENTRY(BlockDriverState) list;
 
commit 13c91cb7e28b47f5c4227f7e88a1378570117704
Author: Fam Zheng <famz at redhat.com>
Date:   Fri Aug 23 09:14:45 2013 +0800

    iscsi: use bdrv_new() instead of stack structure
    
    BlockDriverState structure needs bdrv_new() to initialize refcnt, don't
    allocate a local structure variable and memset to 0, becasue with coming
    refcnt implementation, bdrv_unref will crash if bs->refcnt not
    initialized to 1.
    
    Signed-off-by: Fam Zheng <famz at redhat.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/block/iscsi.c b/block/iscsi.c
index 2bbee1f..b2be147 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -1241,11 +1241,11 @@ static int iscsi_create(const char *filename, QEMUOptionParameter *options)
 {
     int ret = 0;
     int64_t total_size = 0;
-    BlockDriverState bs;
+    BlockDriverState *bs;
     IscsiLun *iscsilun = NULL;
     QDict *bs_options;
 
-    memset(&bs, 0, sizeof(BlockDriverState));
+    bs = bdrv_new("");
 
     /* Read out options */
     while (options && options->name) {
@@ -1255,12 +1255,12 @@ static int iscsi_create(const char *filename, QEMUOptionParameter *options)
         options++;
     }
 
-    bs.opaque = g_malloc0(sizeof(struct IscsiLun));
-    iscsilun = bs.opaque;
+    bs->opaque = g_malloc0(sizeof(struct IscsiLun));
+    iscsilun = bs->opaque;
 
     bs_options = qdict_new();
     qdict_put(bs_options, "filename", qstring_from_str(filename));
-    ret = iscsi_open(&bs, bs_options, 0);
+    ret = iscsi_open(bs, bs_options, 0);
     QDECREF(bs_options);
 
     if (ret != 0) {
@@ -1274,7 +1274,7 @@ static int iscsi_create(const char *filename, QEMUOptionParameter *options)
         ret = -ENODEV;
         goto out;
     }
-    if (bs.total_sectors < total_size) {
+    if (bs->total_sectors < total_size) {
         ret = -ENOSPC;
         goto out;
     }
@@ -1284,7 +1284,9 @@ out:
     if (iscsilun->iscsi != NULL) {
         iscsi_destroy_context(iscsilun->iscsi);
     }
-    g_free(bs.opaque);
+    g_free(bs->opaque);
+    bs->opaque = NULL;
+    bdrv_delete(bs);
     return ret;
 }
 
commit 3d34c6cd99f434126365150c2535bbf93b94f891
Author: Fam Zheng <famz at redhat.com>
Date:   Fri Aug 23 09:14:44 2013 +0800

    vvfat: use bdrv_new() to allocate BlockDriverState
    
    we need bdrv_new() to properly initialize BDS, don't allocate memory
    manually.
    
    Signed-off-by: Fam Zheng <famz at redhat.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/block/vvfat.c b/block/vvfat.c
index cd3b8ed..a827d91 100644
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -2943,7 +2943,7 @@ static int enable_write_target(BDRVVVFATState *s)
     unlink(s->qcow_filename);
 #endif
 
-    s->bs->backing_hd = calloc(sizeof(BlockDriverState), 1);
+    s->bs->backing_hd = bdrv_new("");
     s->bs->backing_hd->drv = &vvfat_write_target;
     s->bs->backing_hd->opaque = g_malloc(sizeof(void*));
     *(void**)s->bs->backing_hd->opaque = s;
commit a94a3fac19c37c093203e9e01c3a54e92e1dc701
Author: Alex Bligh <alex at alex.org.uk>
Date:   Thu Aug 29 17:48:16 2013 +0100

    aio / timers: fix build of test/test-aio.c on non-linux platforms
    
    tests/test-aio.c used pipe2 which is Linux only. Use qemu_pipe
    and qemu_set_nonblock for portabillity. Addition of O_CLOEXEC
    is a harmless bonus.
    
    Signed-off-by: Alex Bligh <alex at alex.org.uk>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/tests/test-aio.c b/tests/test-aio.c
index 532a1de..c4fe0fc 100644
--- a/tests/test-aio.c
+++ b/tests/test-aio.c
@@ -13,6 +13,7 @@
 #include <glib.h>
 #include "block/aio.h"
 #include "qemu/timer.h"
+#include "qemu/sockets.h"
 
 AioContext *ctx;
 
@@ -375,7 +376,10 @@ static void test_timer_schedule(void)
     /* aio_poll will not block to wait for timers to complete unless it has
      * an fd to wait on. Fixing this breaks other tests. So create a dummy one.
      */
-    g_assert(!pipe2(pipefd, O_NONBLOCK));
+    g_assert(!qemu_pipe(pipefd));
+    qemu_set_nonblock(pipefd[0]);
+    qemu_set_nonblock(pipefd[1]);
+
     aio_set_fd_handler(ctx, pipefd[0],
                        dummy_io_handler_read, NULL, NULL);
     aio_poll(ctx, false);
@@ -716,7 +720,10 @@ static void test_source_timer_schedule(void)
     /* aio_poll will not block to wait for timers to complete unless it has
      * an fd to wait on. Fixing this breaks other tests. So create a dummy one.
      */
-    g_assert(!pipe2(pipefd, O_NONBLOCK));
+    g_assert(!qemu_pipe(pipefd));
+    qemu_set_nonblock(pipefd[0]);
+    qemu_set_nonblock(pipefd[1]);
+
     aio_set_fd_handler(ctx, pipefd[0],
                        dummy_io_handler_read, NULL, NULL);
     do {} while (g_main_context_iteration(NULL, false));
commit 68dc036488dfea170627a55e6ee3dfd7f2c2063e
Author: Stefan Weil <sw at weilnetz.de>
Date:   Sun Sep 1 22:59:25 2013 +0200

    w32: Fix access to host devices (regression)
    
    QEMU failed to open host devices like \\.\PhysicalDrive0 (first hard disk)
    since some time (commit 8a79380b8ef1b02d2abd705dd026a18863b09020?).
    
    Those devices use hdev_open which did not use the latest API for options.
    This resulted in a fatal runtime error:
    
      Block protocol 'host_device' doesn't support the option 'filename'
    
    Duplicate code from raw_open to fix this.
    
    Cc: qemu-stable at nongnu.org
    Reported-by: David Brenner <david.brenner3 at gmail.com>
    Signed-off-by: Stefan Weil <sw at weilnetz.de>
    Reviewed-by: Kevin Wolf <kwolf at redhat.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/block/raw-win32.c b/block/raw-win32.c
index 9b5b2af..d2d2d9f 100644
--- a/block/raw-win32.c
+++ b/block/raw-win32.c
@@ -535,13 +535,29 @@ static int hdev_open(BlockDriverState *bs, QDict *options, int flags)
 {
     BDRVRawState *s = bs->opaque;
     int access_flags, create_flags;
+    int ret = 0;
     DWORD overlapped;
     char device_name[64];
-    const char *filename = qdict_get_str(options, "filename");
+
+    Error *local_err = NULL;
+    const char *filename;
+
+    QemuOpts *opts = qemu_opts_create_nofail(&raw_runtime_opts);
+    qemu_opts_absorb_qdict(opts, options, &local_err);
+    if (error_is_set(&local_err)) {
+        qerror_report_err(local_err);
+        error_free(local_err);
+        ret = -EINVAL;
+        goto done;
+    }
+
+    filename = qemu_opt_get(opts, "filename");
 
     if (strstart(filename, "/dev/cdrom", NULL)) {
-        if (find_cdrom(device_name, sizeof(device_name)) < 0)
-            return -ENOENT;
+        if (find_cdrom(device_name, sizeof(device_name)) < 0) {
+            ret = -ENOENT;
+            goto done;
+        }
         filename = device_name;
     } else {
         /* transform drive letters into device name */
@@ -564,11 +580,17 @@ static int hdev_open(BlockDriverState *bs, QDict *options, int flags)
     if (s->hfile == INVALID_HANDLE_VALUE) {
         int err = GetLastError();
 
-        if (err == ERROR_ACCESS_DENIED)
-            return -EACCES;
-        return -1;
+        if (err == ERROR_ACCESS_DENIED) {
+            ret = -EACCES;
+        } else {
+            ret = -1;
+        }
+        goto done;
     }
-    return 0;
+
+done:
+    qemu_opts_del(opts);
+    return ret;
 }
 
 static BlockDriver bdrv_host_device = {
commit b2e10493c71160d88bb823cae9a92e806a79b9d6
Author: Alexandre Derumier <aderumier at odiso.com>
Date:   Mon Sep 2 19:07:24 2013 +0100

    add qemu-img convert -n option (skip target volume creation)
    
    Add a -n option to skip volume creation on qemu-img convert.
    This is useful for targets such as rbd / ceph, where the
    target volume may already exist; we cannot always rely on
    qemu-img convert to create the image, as dependent on the
    output format, there may be parameters which are not possible
    to specify through the qemu-img convert command line.
    
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Signed-off-by: Alexandre Derumier <aderumier at odiso.com>
    Signed-off-by: Alex Bligh <alex at alex.org.uk>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx
index 4ca7e95..2f6d579 100644
--- a/qemu-img-cmds.hx
+++ b/qemu-img-cmds.hx
@@ -34,9 +34,9 @@ STEXI
 ETEXI
 
 DEF("convert", img_convert,
-    "convert [-c] [-p] [-q] [-f fmt] [-t cache] [-O output_fmt] [-o options] [-s snapshot_name] [-S sparse_size] filename [filename2 [...]] output_filename")
+    "convert [-c] [-p] [-q] [-n] [-f fmt] [-t cache] [-O output_fmt] [-o options] [-s snapshot_name] [-S sparse_size] filename [filename2 [...]] output_filename")
 STEXI
- at item convert [-c] [-p] [-q] [-f @var{fmt}] [-t @var{cache}] [-O @var{output_fmt}] [-o @var{options}] [-s @var{snapshot_name}] [-S @var{sparse_size}] @var{filename} [@var{filename2} [...]] @var{output_filename}
+ at item convert [-c] [-p] [-q] [-n] [-f @var{fmt}] [-t @var{cache}] [-O @var{output_fmt}] [-o @var{options}] [-s @var{snapshot_name}] [-S @var{sparse_size}] @var{filename} [@var{filename2} [...]] @var{output_filename}
 ETEXI
 
 DEF("info", img_info,
diff --git a/qemu-img.c b/qemu-img.c
index b9a848d..e790d6a 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -103,6 +103,8 @@ static void help(void)
            "  '-S' indicates the consecutive number of bytes that must contain only zeros\n"
            "       for qemu-img to create a sparse image during conversion\n"
            "  '--output' takes the format in which the output must be done (human or json)\n"
+           "  '-n' skips the target volume creation (useful if the volume is created\n"
+           "       prior to running qemu-img)\n"
            "\n"
            "Parameters to check subcommand:\n"
            "  '-r' tries to repair any inconsistencies that are found during the check.\n"
@@ -1116,7 +1118,8 @@ out3:
 
 static int img_convert(int argc, char **argv)
 {
-    int c, ret = 0, n, n1, bs_n, bs_i, compress, cluster_size, cluster_sectors;
+    int c, ret = 0, n, n1, bs_n, bs_i, compress, cluster_size,
+        cluster_sectors, skip_create;
     int progress = 0, flags;
     const char *fmt, *out_fmt, *cache, *out_baseimg, *out_filename;
     BlockDriver *drv, *proto_drv;
@@ -1139,8 +1142,9 @@ static int img_convert(int argc, char **argv)
     cache = "unsafe";
     out_baseimg = NULL;
     compress = 0;
+    skip_create = 0;
     for(;;) {
-        c = getopt(argc, argv, "f:O:B:s:hce6o:pS:t:q");
+        c = getopt(argc, argv, "f:O:B:s:hce6o:pS:t:qn");
         if (c == -1) {
             break;
         }
@@ -1197,6 +1201,9 @@ static int img_convert(int argc, char **argv)
         case 'q':
             quiet = true;
             break;
+        case 'n':
+            skip_create = 1;
+            break;
         }
     }
 
@@ -1329,20 +1336,22 @@ static int img_convert(int argc, char **argv)
         }
     }
 
-    /* Create the new image */
-    ret = bdrv_create(drv, out_filename, param);
-    if (ret < 0) {
-        if (ret == -ENOTSUP) {
-            error_report("Formatting not supported for file format '%s'",
-                         out_fmt);
-        } else if (ret == -EFBIG) {
-            error_report("The image size is too large for file format '%s'",
-                         out_fmt);
-        } else {
-            error_report("%s: error while converting %s: %s",
-                         out_filename, out_fmt, strerror(-ret));
+    if (!skip_create) {
+        /* Create the new image */
+        ret = bdrv_create(drv, out_filename, param);
+        if (ret < 0) {
+            if (ret == -ENOTSUP) {
+                error_report("Formatting not supported for file format '%s'",
+                             out_fmt);
+            } else if (ret == -EFBIG) {
+                error_report("The image size is too large for file format '%s'",
+                             out_fmt);
+            } else {
+                error_report("%s: error while converting %s: %s",
+                             out_filename, out_fmt, strerror(-ret));
+            }
+            goto out;
         }
-        goto out;
     }
 
     flags = BDRV_O_RDWR;
@@ -1363,6 +1372,20 @@ static int img_convert(int argc, char **argv)
     bdrv_get_geometry(bs[0], &bs_sectors);
     buf = qemu_blockalign(out_bs, IO_BUF_SIZE);
 
+    if (skip_create) {
+        int64_t output_length = bdrv_getlength(out_bs);
+        if (output_length < 0) {
+            error_report("unable to get output image length: %s\n",
+                         strerror(-output_length));
+            ret = -1;
+            goto out;
+        } else if (output_length < total_sectors << BDRV_SECTOR_BITS) {
+            error_report("output file is smaller than input file");
+            ret = -1;
+            goto out;
+        }
+    }
+
     if (compress) {
         ret = bdrv_get_info(out_bs, &bdi);
         if (ret < 0) {
diff --git a/qemu-img.texi b/qemu-img.texi
index 69f1bda..ad45a6d 100644
--- a/qemu-img.texi
+++ b/qemu-img.texi
@@ -96,6 +96,14 @@ Second image format
 Strict mode - fail on on different image size or sector allocation
 @end table
 
+Parameters to convert subcommand:
+
+ at table @option
+
+ at item -n
+Skip the creation of the target volume
+ at end table
+
 Command description:
 
 @table @option
@@ -171,7 +179,7 @@ Error on reading data
 
 @end table
 
- at item convert [-c] [-p] [-f @var{fmt}] [-t @var{cache}] [-O @var{output_fmt}] [-o @var{options}] [-s @var{snapshot_name}] [-S @var{sparse_size}] @var{filename} [@var{filename2} [...]] @var{output_filename}
+ at item convert [-c] [-p] [-n] [-f @var{fmt}] [-t @var{cache}] [-O @var{output_fmt}] [-o @var{options}] [-s @var{snapshot_name}] [-S @var{sparse_size}] @var{filename} [@var{filename2} [...]] @var{output_filename}
 
 Convert the disk image @var{filename} or a snapshot @var{snapshot_name} to disk image @var{output_filename}
 using format @var{output_fmt}. It can be optionally compressed (@code{-c}
@@ -190,6 +198,11 @@ created as a copy on write image of the specified base image; the
 @var{backing_file} should have the same content as the input's base image,
 however the path, image format, etc may differ.
 
+If the @code{-n} option is specified, the target volume creation will be
+skipped. This is useful for formats such as @code{rbd} if the target
+volume has already been created with site specific options that cannot
+be supplied through qemu-img.
+
 @item info [-f @var{fmt}] [--output=@var{ofmt}] [--backing-chain] @var{filename}
 
 Give information about the disk image @var{filename}. Use it in
diff --git a/tests/qemu-iotests/063 b/tests/qemu-iotests/063
new file mode 100755
index 0000000..de0cbbd
--- /dev/null
+++ b/tests/qemu-iotests/063
@@ -0,0 +1,97 @@
+#!/bin/bash
+#
+# test of qemu-img convert -n - convert without creation
+#
+# Copyright (C) 2009 Red Hat, Inc.
+# Copyright (C) 2013 Alex Bligh (alex at alex.org.uk)
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+# creator
+owner=alex at alex.org.uk
+
+seq=`basename $0`
+echo "QA output created by $seq"
+
+here=`pwd`
+tmp=/tmp/$$
+status=1	# failure is the default!
+
+_cleanup()
+{
+	_cleanup_test_img
+	rm -f $TEST_IMG.orig $TEST_IMG.raw $TEST_IMG.raw2
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+. ./common.rc
+. ./common.filter
+. ./common.pattern
+
+_supported_fmt qcow qcow2 vmdk qed raw
+_supported_proto generic
+_supported_os Linux
+
+_make_test_img 4M
+
+echo "== Testing conversion with -n fails with no target file =="
+# check .orig file does not exist
+rm -f $TEST_IMG.orig
+if $QEMU_IMG convert -f $IMGFMT -O $IMGFMT -n $TEST_IMG $TEST_IMG.orig >/dev/null 2>&1; then
+    exit 1
+fi
+
+echo "== Testing conversion with -n succeeds with a target file =="
+rm -f $TEST_IMG.orig
+cp $TEST_IMG $TEST_IMG.orig
+if ! $QEMU_IMG convert -f $IMGFMT -O $IMGFMT -n $TEST_IMG $TEST_IMG.orig ; then
+    exit 1
+fi
+
+echo "== Testing conversion to raw is the same after conversion with -n =="
+# compare the raw files
+if ! $QEMU_IMG convert -f $IMGFMT -O raw $TEST_IMG $TEST_IMG.raw1 ; then
+    exit 1
+fi
+
+if ! $QEMU_IMG convert -f $IMGFMT -O raw $TEST_IMG.orig $TEST_IMG.raw2 ; then
+    exit 1
+fi
+
+if ! cmp $TEST_IMG.raw1 $TEST_IMG.raw2 ; then
+    exit 1
+fi
+
+echo "== Testing conversion back to original format =="
+if ! $QEMU_IMG convert -f raw -O $IMGFMT -n $TEST_IMG.raw2 $TEST_IMG ; then
+    exit 1
+fi
+_check_test_img
+
+echo "== Testing conversion to a smaller file fails =="
+rm -f $TEST_IMG.orig
+mv $TEST_IMG $TEST_IMG.orig
+_make_test_img 2M
+if $QEMU_IMG convert -f $IMGFMT -O $IMGFMT -n $TEST_IMG.orig $TEST_IMG >/dev/null 2>&1; then
+    exit 1
+fi
+
+rm -f $TEST_IMG.orig $TEST_IMG.raw $TEST_IMG.raw2
+
+echo "*** done"
+rm -f $seq.full
+status=0
+exit 0
diff --git a/tests/qemu-iotests/063.out b/tests/qemu-iotests/063.out
new file mode 100644
index 0000000..de1c99a
--- /dev/null
+++ b/tests/qemu-iotests/063.out
@@ -0,0 +1,10 @@
+QA output created by 063
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=4194304
+== Testing conversion with -n fails with no target file ==
+== Testing conversion with -n succeeds with a target file ==
+== Testing conversion to raw is the same after conversion with -n ==
+== Testing conversion back to original format ==
+No errors were found on the image.
+== Testing conversion to a smaller file fails ==
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=2097152
+*** done
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index b696242..316b1dd 100644
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -66,3 +66,4 @@
 059 rw auto
 060 rw auto
 062 rw auto
+063 rw auto
commit b3f3a30f387f34308b3e4d910a2824e69c34182f
Author: Max Reitz <mreitz at redhat.com>
Date:   Mon Sep 2 16:36:15 2013 +0200

    qemu-iotests: Adjust test result 039
    
    The moved OFLAG_COPIED check in qcow2_check_refcounts results in a
    different output from test 039 (mismatches are now found after the
    general refcount check (as far as any remain)). This patch adjusts the
    expected test result accordingly.
    
    Signed-off-by: Max Reitz <mreitz at redhat.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/tests/qemu-iotests/039.out b/tests/qemu-iotests/039.out
index cb510d6..077fa64 100644
--- a/tests/qemu-iotests/039.out
+++ b/tests/qemu-iotests/039.out
@@ -12,8 +12,8 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728
 wrote 512/512 bytes at offset 0
 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 incompatible_features     0x1
-ERROR OFLAG_COPIED: offset=8000000000050000 refcount=0
 ERROR cluster 5 refcount=0 reference=1
+ERROR OFLAG_COPIED data cluster: l2_entry=8000000000050000 refcount=0
 
 2 errors were found on the image.
 Data may be corrupted, or further writes to the image may corrupt it.
@@ -24,7 +24,6 @@ read 512/512 bytes at offset 0
 incompatible_features     0x1
 
 == Repairing the image file must succeed ==
-ERROR OFLAG_COPIED: offset=8000000000050000 refcount=0
 Repairing cluster 5 refcount=0 reference=1
 The following inconsistencies were found and repaired:
 
@@ -44,7 +43,6 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728
 wrote 512/512 bytes at offset 0
 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 incompatible_features     0x1
-ERROR OFLAG_COPIED: offset=8000000000050000 refcount=0
 Repairing cluster 5 refcount=0 reference=1
 wrote 512/512 bytes at offset 0
 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
commit 2024c1df43eae0d2e35663da0c6e8c51290a386e
Author: Benoît Canet <benoit at irqsave.net>
Date:   Mon Sep 2 14:14:41 2013 +0200

    block: Add iops_size to do the iops accounting for a given io size.
    
    This feature can be used in case where users are avoiding the iops limit by
    doing jumbo I/Os hammering the storage backend.
    
    Signed-off-by: Benoit Canet <benoit at irqsave.net>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/block/qapi.c b/block/qapi.c
index b1edc66..782051c 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -258,6 +258,9 @@ void bdrv_query_info(BlockDriverState *bs,
                 cfg.buckets[THROTTLE_OPS_WRITE].max;
             info->inserted->iops_wr_max     =
                 cfg.buckets[THROTTLE_OPS_WRITE].max;
+
+            info->inserted->has_iops_size = cfg.op_size;
+            info->inserted->iops_size = cfg.op_size;
         }
 
         bs0 = bs;
diff --git a/blockdev.c b/blockdev.c
index 76e9308..fe2f318 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -507,7 +507,7 @@ static DriveInfo *blockdev_init(QemuOpts *all_opts,
     cfg.buckets[THROTTLE_OPS_WRITE].max =
         qemu_opt_get_number(opts, "throttling.iops-write-max", 0);
 
-    cfg.op_size = 0;
+    cfg.op_size = qemu_opt_get_number(opts, "throttling.iops-size", 0);
 
     if (!check_throttle_config(&cfg, &error)) {
         error_report("%s", error_get_pretty(error));
@@ -774,6 +774,9 @@ DriveInfo *drive_init(QemuOpts *all_opts, BlockInterfaceType block_default_type)
     qemu_opt_rename(all_opts, "bps_rd_max", "throttling.bps-read-max");
     qemu_opt_rename(all_opts, "bps_wr_max", "throttling.bps-write-max");
 
+    qemu_opt_rename(all_opts,
+                    "iops_size", "throttling.iops-size");
+
     qemu_opt_rename(all_opts, "readonly", "read-only");
 
     value = qemu_opt_get(all_opts, "cache");
@@ -1273,7 +1276,9 @@ void qmp_block_set_io_throttle(const char *device, int64_t bps, int64_t bps_rd,
                                bool has_iops_rd_max,
                                int64_t iops_rd_max,
                                bool has_iops_wr_max,
-                               int64_t iops_wr_max, Error **errp)
+                               int64_t iops_wr_max,
+                               bool has_iops_size,
+                               int64_t iops_size, Error **errp)
 {
     ThrottleConfig cfg;
     BlockDriverState *bs;
@@ -1312,7 +1317,9 @@ void qmp_block_set_io_throttle(const char *device, int64_t bps, int64_t bps_rd,
         cfg.buckets[THROTTLE_OPS_WRITE].max = iops_wr_max;
     }
 
-    cfg.op_size = 0;
+    if (has_iops_size) {
+        cfg.op_size = iops_size;
+    }
 
     if (!check_throttle_config(&cfg, errp)) {
         return;
@@ -2038,6 +2045,10 @@ QemuOptsList qemu_common_drive_opts = {
             .type = QEMU_OPT_NUMBER,
             .help = "total bytes write burst",
         },{
+            .name = "throttling.iops-size",
+            .type = QEMU_OPT_NUMBER,
+            .help = "when limiting by iops max size of an I/O in bytes",
+        },{
             .name = "copy-on-read",
             .type = QEMU_OPT_BOOL,
             .help = "copy read data from backing file into image file",
diff --git a/hmp.c b/hmp.c
index 85a6c16..2bd31d1 100644
--- a/hmp.c
+++ b/hmp.c
@@ -351,7 +351,8 @@ void hmp_info_block(Monitor *mon, const QDict *qdict)
                             " iops_wr=%" PRId64
                             " iops_max=%" PRId64
                             " iops_rd_max=%" PRId64
-                            " iops_wr_max=%" PRId64 "\n",
+                            " iops_wr_max=%" PRId64
+                            " iops_size=%" PRId64 "\n",
                             info->value->inserted->bps,
                             info->value->inserted->bps_rd,
                             info->value->inserted->bps_wr,
@@ -363,7 +364,8 @@ void hmp_info_block(Monitor *mon, const QDict *qdict)
                             info->value->inserted->iops_wr,
                             info->value->inserted->iops_max,
                             info->value->inserted->iops_rd_max,
-                            info->value->inserted->iops_wr_max);
+                            info->value->inserted->iops_wr_max,
+                            info->value->inserted->iops_size);
         } else {
             monitor_printf(mon, " [not inserted]");
         }
@@ -1124,6 +1126,8 @@ void hmp_block_set_io_throttle(Monitor *mon, const QDict *qdict)
                               false,
                               0,
                               false,
+                              0,
+                              false, /* No default I/O size */
                               0, &err);
     hmp_handle_error(mon, &err);
 }
diff --git a/qapi-schema.json b/qapi-schema.json
index 6a9b8ca..5d5164f 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -797,6 +797,8 @@
 #
 # @iops_wr_max: #optional write I/O operations max (Since 1.7)
 #
+# @iops_size: #optional an I/O size in bytes (Since 1.7)
+#
 # Since: 0.14.0
 #
 # Notes: This interface is only found in @BlockInfo.
@@ -810,7 +812,8 @@
             'image': 'ImageInfo',
             '*bps_max': 'int', '*bps_rd_max': 'int',
             '*bps_wr_max': 'int', '*iops_max': 'int',
-            '*iops_rd_max': 'int', '*iops_wr_max': 'int' } }
+            '*iops_rd_max': 'int', '*iops_wr_max': 'int',
+            '*iops_size': 'int' } }
 
 ##
 # @BlockDeviceIoStatus:
@@ -2201,6 +2204,8 @@
 #
 # @iops_wr_max: #optional write I/O operations max (Since 1.7)
 #
+# @iops_size: #optional an I/O size in bytes (Since 1.7)
+#
 # Returns: Nothing on success
 #          If @device is not a valid block device, DeviceNotFound
 #
@@ -2211,7 +2216,8 @@
             'iops': 'int', 'iops_rd': 'int', 'iops_wr': 'int',
             '*bps_max': 'int', '*bps_rd_max': 'int',
             '*bps_wr_max': 'int', '*iops_max': 'int',
-            '*iops_rd_max': 'int', '*iops_wr_max': 'int' } }
+            '*iops_rd_max': 'int', '*iops_wr_max': 'int',
+            '*iops_size': 'int' } }
 
 ##
 # @block-stream:
diff --git a/qemu-options.hx b/qemu-options.hx
index d3760df..5dc8b75 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -413,6 +413,7 @@ DEF("drive", HAS_ARG, QEMU_OPTION_drive,
     "       [[,iops=i]|[[,iops_rd=r][,iops_wr=w]]]\n"
     "       [[,bps_max=bm]|[[,bps_rd_max=rm][,bps_wr_max=wm]]]\n"
     "       [[,iops_max=im]|[[,iops_rd_max=irm][,iops_wr_max=iwm]]]\n"
+    "       [[,iops_size=is]]\n"
     "                use 'file' as a drive image\n", QEMU_ARCH_ALL)
 STEXI
 @item -drive @var{option}[, at var{option}[, at var{option}[,...]]]
diff --git a/qmp-commands.hx b/qmp-commands.hx
index 7c9667b..008cad9 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -1389,7 +1389,7 @@ EQMP
 
     {
         .name       = "block_set_io_throttle",
-        .args_type  = "device:B,bps:l,bps_rd:l,bps_wr:l,iops:l,iops_rd:l,iops_wr:l,bps_max:l?,bps_rd_max:l?,bps_wr_max:l?,iops_max:l?,iops_rd_max:l?,iops_wr_max:l?",
+        .args_type  = "device:B,bps:l,bps_rd:l,bps_wr:l,iops:l,iops_rd:l,iops_wr:l,bps_max:l?,bps_rd_max:l?,bps_wr_max:l?,iops_max:l?,iops_rd_max:l?,iops_wr_max:l?,iops_size:l?",
         .mhandler.cmd_new = qmp_marshal_input_block_set_io_throttle,
     },
 
@@ -1414,6 +1414,7 @@ Arguments:
 - "iops_max":  total I/O operations max (json-int)
 - "iops_rd_max":  read I/O operations max (json-int)
 - "iops_wr_max":  write I/O operations max (json-int)
+- "iops_size":  I/O size in bytes when limiting (json-int)
 
 Example:
 
@@ -1429,7 +1430,8 @@ Example:
                                                "bps_wr_max": 0,
                                                "iops_max": 0,
                                                "iops_rd_max": 0,
-                                               "iops_wr_max": 0 } }
+                                               "iops_wr_max": 0,
+                                               "iops_size": 0 } }
 <- { "return": {} }
 
 EQMP
@@ -1776,6 +1778,7 @@ Each json-object contain the following:
          - "iops_max":  total I/O operations max (json-int)
          - "iops_rd_max":  read I/O operations max (json-int)
          - "iops_wr_max":  write I/O operations max (json-int)
+         - "iops_size": I/O size when limiting by iops (json-int)
          - "image": the detail of the image, it is a json-object containing
             the following:
              - "filename": image file name (json-string)
@@ -1851,6 +1854,7 @@ Example:
                "iops_max": 0,
                "iops_rd_max": 0,
                "iops_wr_max": 0,
+               "iops_size": 0,
                "image":{
                   "filename":"disks/test.qcow2",
                   "format":"qcow2",
commit 3e9fab690d59ac15956c3733fe0794ce1ae4c4af
Author: Benoît Canet <benoit at irqsave.net>
Date:   Mon Sep 2 14:14:40 2013 +0200

    block: Add support for throttling burst max in QMP and the command line.
    
    The max parameter of the leaky bucket throttling algorithm can be used to
    allow the guest to do bursts.
    The max value is a pool of I/O that the guest can use without being throttled
    at all. Throttling is triggered once this pool is empty.
    
    Signed-off-by: Benoit Canet <benoit at irqsave.net>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/block/qapi.c b/block/qapi.c
index cac3919..b1edc66 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -232,6 +232,32 @@ void bdrv_query_info(BlockDriverState *bs,
             info->inserted->iops    = cfg.buckets[THROTTLE_OPS_TOTAL].avg;
             info->inserted->iops_rd = cfg.buckets[THROTTLE_OPS_READ].avg;
             info->inserted->iops_wr = cfg.buckets[THROTTLE_OPS_WRITE].avg;
+
+            info->inserted->has_bps_max     =
+                cfg.buckets[THROTTLE_BPS_TOTAL].max;
+            info->inserted->bps_max         =
+                cfg.buckets[THROTTLE_BPS_TOTAL].max;
+            info->inserted->has_bps_rd_max  =
+                cfg.buckets[THROTTLE_BPS_READ].max;
+            info->inserted->bps_rd_max      =
+                cfg.buckets[THROTTLE_BPS_READ].max;
+            info->inserted->has_bps_wr_max  =
+                cfg.buckets[THROTTLE_BPS_WRITE].max;
+            info->inserted->bps_wr_max      =
+                cfg.buckets[THROTTLE_BPS_WRITE].max;
+
+            info->inserted->has_iops_max    =
+                cfg.buckets[THROTTLE_OPS_TOTAL].max;
+            info->inserted->iops_max        =
+                cfg.buckets[THROTTLE_OPS_TOTAL].max;
+            info->inserted->has_iops_rd_max =
+                cfg.buckets[THROTTLE_OPS_READ].max;
+            info->inserted->iops_rd_max     =
+                cfg.buckets[THROTTLE_OPS_READ].max;
+            info->inserted->has_iops_wr_max =
+                cfg.buckets[THROTTLE_OPS_WRITE].max;
+            info->inserted->iops_wr_max     =
+                cfg.buckets[THROTTLE_OPS_WRITE].max;
         }
 
         bs0 = bs;
diff --git a/blockdev.c b/blockdev.c
index 5f5ba96..76e9308 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -494,13 +494,18 @@ static DriveInfo *blockdev_init(QemuOpts *all_opts,
     cfg.buckets[THROTTLE_OPS_WRITE].avg =
         qemu_opt_get_number(opts, "throttling.iops-write", 0);
 
-    cfg.buckets[THROTTLE_BPS_TOTAL].max = 0;
-    cfg.buckets[THROTTLE_BPS_READ].max  = 0;
-    cfg.buckets[THROTTLE_BPS_WRITE].max = 0;
-
-    cfg.buckets[THROTTLE_OPS_TOTAL].max = 0;
-    cfg.buckets[THROTTLE_OPS_READ].max  = 0;
-    cfg.buckets[THROTTLE_OPS_WRITE].max = 0;
+    cfg.buckets[THROTTLE_BPS_TOTAL].max =
+        qemu_opt_get_number(opts, "throttling.bps-total-max", 0);
+    cfg.buckets[THROTTLE_BPS_READ].max  =
+        qemu_opt_get_number(opts, "throttling.bps-read-max", 0);
+    cfg.buckets[THROTTLE_BPS_WRITE].max =
+        qemu_opt_get_number(opts, "throttling.bps-write-max", 0);
+    cfg.buckets[THROTTLE_OPS_TOTAL].max =
+        qemu_opt_get_number(opts, "throttling.iops-total-max", 0);
+    cfg.buckets[THROTTLE_OPS_READ].max =
+        qemu_opt_get_number(opts, "throttling.iops-read-max", 0);
+    cfg.buckets[THROTTLE_OPS_WRITE].max =
+        qemu_opt_get_number(opts, "throttling.iops-write-max", 0);
 
     cfg.op_size = 0;
 
@@ -761,6 +766,14 @@ DriveInfo *drive_init(QemuOpts *all_opts, BlockInterfaceType block_default_type)
     qemu_opt_rename(all_opts, "bps_rd", "throttling.bps-read");
     qemu_opt_rename(all_opts, "bps_wr", "throttling.bps-write");
 
+    qemu_opt_rename(all_opts, "iops_max", "throttling.iops-total-max");
+    qemu_opt_rename(all_opts, "iops_rd_max", "throttling.iops-read-max");
+    qemu_opt_rename(all_opts, "iops_wr_max", "throttling.iops-write-max");
+
+    qemu_opt_rename(all_opts, "bps_max", "throttling.bps-total-max");
+    qemu_opt_rename(all_opts, "bps_rd_max", "throttling.bps-read-max");
+    qemu_opt_rename(all_opts, "bps_wr_max", "throttling.bps-write-max");
+
     qemu_opt_rename(all_opts, "readonly", "read-only");
 
     value = qemu_opt_get(all_opts, "cache");
@@ -1245,8 +1258,22 @@ void qmp_change_blockdev(const char *device, const char *filename,
 
 /* throttling disk I/O limits */
 void qmp_block_set_io_throttle(const char *device, int64_t bps, int64_t bps_rd,
-                               int64_t bps_wr, int64_t iops, int64_t iops_rd,
-                               int64_t iops_wr, Error **errp)
+                               int64_t bps_wr,
+                               int64_t iops,
+                               int64_t iops_rd,
+                               int64_t iops_wr,
+                               bool has_bps_max,
+                               int64_t bps_max,
+                               bool has_bps_rd_max,
+                               int64_t bps_rd_max,
+                               bool has_bps_wr_max,
+                               int64_t bps_wr_max,
+                               bool has_iops_max,
+                               int64_t iops_max,
+                               bool has_iops_rd_max,
+                               int64_t iops_rd_max,
+                               bool has_iops_wr_max,
+                               int64_t iops_wr_max, Error **errp)
 {
     ThrottleConfig cfg;
     BlockDriverState *bs;
@@ -1266,13 +1293,24 @@ void qmp_block_set_io_throttle(const char *device, int64_t bps, int64_t bps_rd,
     cfg.buckets[THROTTLE_OPS_READ].avg  = iops_rd;
     cfg.buckets[THROTTLE_OPS_WRITE].avg = iops_wr;
 
-    cfg.buckets[THROTTLE_BPS_TOTAL].max = 0;
-    cfg.buckets[THROTTLE_BPS_READ].max  = 0;
-    cfg.buckets[THROTTLE_BPS_WRITE].max = 0;
-
-    cfg.buckets[THROTTLE_OPS_TOTAL].max = 0;
-    cfg.buckets[THROTTLE_OPS_READ].max  = 0;
-    cfg.buckets[THROTTLE_OPS_WRITE].max = 0;
+    if (has_bps_max) {
+        cfg.buckets[THROTTLE_BPS_TOTAL].max = bps_max;
+    }
+    if (has_bps_rd_max) {
+        cfg.buckets[THROTTLE_BPS_READ].max = bps_rd_max;
+    }
+    if (has_bps_wr_max) {
+        cfg.buckets[THROTTLE_BPS_WRITE].max = bps_wr_max;
+    }
+    if (has_iops_max) {
+        cfg.buckets[THROTTLE_OPS_TOTAL].max = iops_max;
+    }
+    if (has_iops_rd_max) {
+        cfg.buckets[THROTTLE_OPS_READ].max = iops_rd_max;
+    }
+    if (has_iops_wr_max) {
+        cfg.buckets[THROTTLE_OPS_WRITE].max = iops_wr_max;
+    }
 
     cfg.op_size = 0;
 
@@ -1976,6 +2014,30 @@ QemuOptsList qemu_common_drive_opts = {
             .type = QEMU_OPT_NUMBER,
             .help = "limit write bytes per second",
         },{
+            .name = "throttling.iops-total-max",
+            .type = QEMU_OPT_NUMBER,
+            .help = "I/O operations burst",
+        },{
+            .name = "throttling.iops-read-max",
+            .type = QEMU_OPT_NUMBER,
+            .help = "I/O operations read burst",
+        },{
+            .name = "throttling.iops-write-max",
+            .type = QEMU_OPT_NUMBER,
+            .help = "I/O operations write burst",
+        },{
+            .name = "throttling.bps-total-max",
+            .type = QEMU_OPT_NUMBER,
+            .help = "total bytes burst",
+        },{
+            .name = "throttling.bps-read-max",
+            .type = QEMU_OPT_NUMBER,
+            .help = "total bytes read burst",
+        },{
+            .name = "throttling.bps-write-max",
+            .type = QEMU_OPT_NUMBER,
+            .help = "total bytes write burst",
+        },{
             .name = "copy-on-read",
             .type = QEMU_OPT_BOOL,
             .help = "copy read data from backing file into image file",
diff --git a/hmp.c b/hmp.c
index fcca6ae..85a6c16 100644
--- a/hmp.c
+++ b/hmp.c
@@ -344,14 +344,28 @@ void hmp_info_block(Monitor *mon, const QDict *qdict)
         {
             monitor_printf(mon, "    I/O throttling:   bps=%" PRId64
                             " bps_rd=%" PRId64  " bps_wr=%" PRId64
+                            " bps_max=%" PRId64
+                            " bps_rd_max=%" PRId64
+                            " bps_wr_max=%" PRId64
                             " iops=%" PRId64 " iops_rd=%" PRId64
-                            " iops_wr=%" PRId64 "\n",
+                            " iops_wr=%" PRId64
+                            " iops_max=%" PRId64
+                            " iops_rd_max=%" PRId64
+                            " iops_wr_max=%" PRId64 "\n",
                             info->value->inserted->bps,
                             info->value->inserted->bps_rd,
                             info->value->inserted->bps_wr,
+                            info->value->inserted->bps_max,
+                            info->value->inserted->bps_rd_max,
+                            info->value->inserted->bps_wr_max,
                             info->value->inserted->iops,
                             info->value->inserted->iops_rd,
-                            info->value->inserted->iops_wr);
+                            info->value->inserted->iops_wr,
+                            info->value->inserted->iops_max,
+                            info->value->inserted->iops_rd_max,
+                            info->value->inserted->iops_wr_max);
+        } else {
+            monitor_printf(mon, " [not inserted]");
         }
 
         if (verbose) {
@@ -1098,7 +1112,19 @@ void hmp_block_set_io_throttle(Monitor *mon, const QDict *qdict)
                               qdict_get_int(qdict, "bps_wr"),
                               qdict_get_int(qdict, "iops"),
                               qdict_get_int(qdict, "iops_rd"),
-                              qdict_get_int(qdict, "iops_wr"), &err);
+                              qdict_get_int(qdict, "iops_wr"),
+                              false, /* no burst max via HMP */
+                              0,
+                              false,
+                              0,
+                              false,
+                              0,
+                              false,
+                              0,
+                              false,
+                              0,
+                              false,
+                              0, &err);
     hmp_handle_error(mon, &err);
 }
 
diff --git a/qapi-schema.json b/qapi-schema.json
index a51f7d2..6a9b8ca 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -785,6 +785,18 @@
 #
 # @image: the info of image used (since: 1.6)
 #
+# @bps_max: #optional total max in bytes (Since 1.7)
+#
+# @bps_rd_max: #optional read max in bytes (Since 1.7)
+#
+# @bps_wr_max: #optional write max in bytes (Since 1.7)
+#
+# @iops_max: #optional total I/O operations max (Since 1.7)
+#
+# @iops_rd_max: #optional read I/O operations max (Since 1.7)
+#
+# @iops_wr_max: #optional write I/O operations max (Since 1.7)
+#
 # Since: 0.14.0
 #
 # Notes: This interface is only found in @BlockInfo.
@@ -795,7 +807,10 @@
             'encrypted': 'bool', 'encryption_key_missing': 'bool',
             'bps': 'int', 'bps_rd': 'int', 'bps_wr': 'int',
             'iops': 'int', 'iops_rd': 'int', 'iops_wr': 'int',
-            'image': 'ImageInfo' } }
+            'image': 'ImageInfo',
+            '*bps_max': 'int', '*bps_rd_max': 'int',
+            '*bps_wr_max': 'int', '*iops_max': 'int',
+            '*iops_rd_max': 'int', '*iops_wr_max': 'int' } }
 
 ##
 # @BlockDeviceIoStatus:
@@ -2174,6 +2189,18 @@
 #
 # @iops_wr: write I/O operations per second
 #
+# @bps_max: #optional total max in bytes (Since 1.7)
+#
+# @bps_rd_max: #optional read max in bytes (Since 1.7)
+#
+# @bps_wr_max: #optional write max in bytes (Since 1.7)
+#
+# @iops_max: #optional total I/O operations max (Since 1.7)
+#
+# @iops_rd_max: #optional read I/O operations max (Since 1.7)
+#
+# @iops_wr_max: #optional write I/O operations max (Since 1.7)
+#
 # Returns: Nothing on success
 #          If @device is not a valid block device, DeviceNotFound
 #
@@ -2181,7 +2208,10 @@
 ##
 { 'command': 'block_set_io_throttle',
   'data': { 'device': 'str', 'bps': 'int', 'bps_rd': 'int', 'bps_wr': 'int',
-            'iops': 'int', 'iops_rd': 'int', 'iops_wr': 'int' } }
+            'iops': 'int', 'iops_rd': 'int', 'iops_wr': 'int',
+            '*bps_max': 'int', '*bps_rd_max': 'int',
+            '*bps_wr_max': 'int', '*iops_max': 'int',
+            '*iops_rd_max': 'int', '*iops_wr_max': 'int' } }
 
 ##
 # @block-stream:
diff --git a/qemu-options.hx b/qemu-options.hx
index d15338e..d3760df 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -409,7 +409,10 @@ DEF("drive", HAS_ARG, QEMU_OPTION_drive,
     "       [,cache=writethrough|writeback|none|directsync|unsafe][,format=f]\n"
     "       [,serial=s][,addr=A][,id=name][,aio=threads|native]\n"
     "       [,readonly=on|off][,copy-on-read=on|off]\n"
-    "       [[,bps=b]|[[,bps_rd=r][,bps_wr=w]]][[,iops=i]|[[,iops_rd=r][,iops_wr=w]]\n"
+    "       [[,bps=b]|[[,bps_rd=r][,bps_wr=w]]]\n"
+    "       [[,iops=i]|[[,iops_rd=r][,iops_wr=w]]]\n"
+    "       [[,bps_max=bm]|[[,bps_rd_max=rm][,bps_wr_max=wm]]]\n"
+    "       [[,iops_max=im]|[[,iops_rd_max=irm][,iops_wr_max=iwm]]]\n"
     "                use 'file' as a drive image\n", QEMU_ARCH_ALL)
 STEXI
 @item -drive @var{option}[, at var{option}[, at var{option}[,...]]]
diff --git a/qmp-commands.hx b/qmp-commands.hx
index 8a8f342..7c9667b 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -1389,7 +1389,7 @@ EQMP
 
     {
         .name       = "block_set_io_throttle",
-        .args_type  = "device:B,bps:l,bps_rd:l,bps_wr:l,iops:l,iops_rd:l,iops_wr:l",
+        .args_type  = "device:B,bps:l,bps_rd:l,bps_wr:l,iops:l,iops_rd:l,iops_wr:l,bps_max:l?,bps_rd_max:l?,bps_wr_max:l?,iops_max:l?,iops_rd_max:l?,iops_wr_max:l?",
         .mhandler.cmd_new = qmp_marshal_input_block_set_io_throttle,
     },
 
@@ -1408,6 +1408,12 @@ Arguments:
 - "iops": total I/O operations per second (json-int)
 - "iops_rd": read I/O operations per second (json-int)
 - "iops_wr": write I/O operations per second (json-int)
+- "bps_max":  total max in bytes (json-int)
+- "bps_rd_max":  read max in bytes (json-int)
+- "bps_wr_max":  write max in bytes (json-int)
+- "iops_max":  total I/O operations max (json-int)
+- "iops_rd_max":  read I/O operations max (json-int)
+- "iops_wr_max":  write I/O operations max (json-int)
 
 Example:
 
@@ -1417,7 +1423,13 @@ Example:
                                                "bps_wr": 0,
                                                "iops": 0,
                                                "iops_rd": 0,
-                                               "iops_wr": 0 } }
+                                               "iops_wr": 0,
+                                               "bps_max": 8000000,
+                                               "bps_rd_max": 0,
+                                               "bps_wr_max": 0,
+                                               "iops_max": 0,
+                                               "iops_rd_max": 0,
+                                               "iops_wr_max": 0 } }
 <- { "return": {} }
 
 EQMP
@@ -1758,6 +1770,12 @@ Each json-object contain the following:
          - "iops": limit total I/O operations per second (json-int)
          - "iops_rd": limit read operations per second (json-int)
          - "iops_wr": limit write operations per second (json-int)
+         - "bps_max":  total max in bytes (json-int)
+         - "bps_rd_max":  read max in bytes (json-int)
+         - "bps_wr_max":  write max in bytes (json-int)
+         - "iops_max":  total I/O operations max (json-int)
+         - "iops_rd_max":  read I/O operations max (json-int)
+         - "iops_wr_max":  write I/O operations max (json-int)
          - "image": the detail of the image, it is a json-object containing
             the following:
              - "filename": image file name (json-string)
@@ -1827,6 +1845,12 @@ Example:
                "iops":1000000,
                "iops_rd":0,
                "iops_wr":0,
+               "bps_max": 8000000,
+               "bps_rd_max": 0,
+               "bps_wr_max": 0,
+               "iops_max": 0,
+               "iops_rd_max": 0,
+               "iops_wr_max": 0,
                "image":{
                   "filename":"disks/test.qcow2",
                   "format":"qcow2",
commit cc0681c45430a1f1a4c2d06e9499b7775afc9a18
Author: Benoît Canet <benoit at irqsave.net>
Date:   Mon Sep 2 14:14:39 2013 +0200

    block: Enable the new throttling code in the block layer.
    
    Signed-off-by: Benoit Canet <benoit at irqsave.net>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/block.c b/block.c
index 26639e8..0292d1d 100644
--- a/block.c
+++ b/block.c
@@ -86,13 +86,6 @@ static void coroutine_fn bdrv_co_do_rw(void *opaque);
 static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
     int64_t sector_num, int nb_sectors);
 
-static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
-        bool is_write, double elapsed_time, uint64_t *wait);
-static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
-        double elapsed_time, uint64_t *wait);
-static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
-        bool is_write, int64_t *wait);
-
 static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
     QTAILQ_HEAD_INITIALIZER(bdrv_states);
 
@@ -123,69 +116,101 @@ int is_windows_drive(const char *filename)
 #endif
 
 /* throttling disk I/O limits */
-void bdrv_io_limits_disable(BlockDriverState *bs)
+void bdrv_set_io_limits(BlockDriverState *bs,
+                        ThrottleConfig *cfg)
 {
-    bs->io_limits_enabled = false;
+    int i;
 
-    do {} while (qemu_co_enter_next(&bs->throttled_reqs));
+    throttle_config(&bs->throttle_state, cfg);
 
-    if (bs->block_timer) {
-        timer_del(bs->block_timer);
-        timer_free(bs->block_timer);
-        bs->block_timer = NULL;
+    for (i = 0; i < 2; i++) {
+        qemu_co_enter_next(&bs->throttled_reqs[i]);
     }
+}
+
+/* this function drain all the throttled IOs */
+static bool bdrv_start_throttled_reqs(BlockDriverState *bs)
+{
+    bool drained = false;
+    bool enabled = bs->io_limits_enabled;
+    int i;
+
+    bs->io_limits_enabled = false;
+
+    for (i = 0; i < 2; i++) {
+        while (qemu_co_enter_next(&bs->throttled_reqs[i])) {
+            drained = true;
+        }
+    }
+
+    bs->io_limits_enabled = enabled;
 
-    bs->slice_start = 0;
-    bs->slice_end   = 0;
+    return drained;
 }
 
-static void bdrv_block_timer(void *opaque)
+void bdrv_io_limits_disable(BlockDriverState *bs)
 {
-    BlockDriverState *bs = opaque;
+    bs->io_limits_enabled = false;
 
-    qemu_co_enter_next(&bs->throttled_reqs);
+    bdrv_start_throttled_reqs(bs);
+
+    throttle_destroy(&bs->throttle_state);
 }
 
-void bdrv_io_limits_enable(BlockDriverState *bs)
+static void bdrv_throttle_read_timer_cb(void *opaque)
 {
-    bs->block_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, bdrv_block_timer, bs);
-    bs->io_limits_enabled = true;
+    BlockDriverState *bs = opaque;
+    qemu_co_enter_next(&bs->throttled_reqs[0]);
 }
 
-bool bdrv_io_limits_enabled(BlockDriverState *bs)
+static void bdrv_throttle_write_timer_cb(void *opaque)
 {
-    BlockIOLimit *io_limits = &bs->io_limits;
-    return io_limits->bps[BLOCK_IO_LIMIT_READ]
-         || io_limits->bps[BLOCK_IO_LIMIT_WRITE]
-         || io_limits->bps[BLOCK_IO_LIMIT_TOTAL]
-         || io_limits->iops[BLOCK_IO_LIMIT_READ]
-         || io_limits->iops[BLOCK_IO_LIMIT_WRITE]
-         || io_limits->iops[BLOCK_IO_LIMIT_TOTAL];
+    BlockDriverState *bs = opaque;
+    qemu_co_enter_next(&bs->throttled_reqs[1]);
 }
 
+/* should be called before bdrv_set_io_limits if a limit is set */
+void bdrv_io_limits_enable(BlockDriverState *bs)
+{
+    assert(!bs->io_limits_enabled);
+    throttle_init(&bs->throttle_state,
+                  QEMU_CLOCK_VIRTUAL,
+                  bdrv_throttle_read_timer_cb,
+                  bdrv_throttle_write_timer_cb,
+                  bs);
+    bs->io_limits_enabled = true;
+}
+
+/* This function makes an IO wait if needed
+ *
+ * @nb_sectors: the number of sectors of the IO
+ * @is_write:   is the IO a write
+ */
 static void bdrv_io_limits_intercept(BlockDriverState *bs,
-                                     bool is_write, int nb_sectors)
+                                     int nb_sectors,
+                                     bool is_write)
 {
-    int64_t wait_time = -1;
+    /* does this io must wait */
+    bool must_wait = throttle_schedule_timer(&bs->throttle_state, is_write);
 
-    if (!qemu_co_queue_empty(&bs->throttled_reqs)) {
-        qemu_co_queue_wait(&bs->throttled_reqs);
+    /* if must wait or any request of this type throttled queue the IO */
+    if (must_wait ||
+        !qemu_co_queue_empty(&bs->throttled_reqs[is_write])) {
+        qemu_co_queue_wait(&bs->throttled_reqs[is_write]);
     }
 
-    /* In fact, we hope to keep each request's timing, in FIFO mode. The next
-     * throttled requests will not be dequeued until the current request is
-     * allowed to be serviced. So if the current request still exceeds the
-     * limits, it will be inserted to the head. All requests followed it will
-     * be still in throttled_reqs queue.
-     */
+    /* the IO will be executed, do the accounting */
+    throttle_account(&bs->throttle_state,
+                     is_write,
+                     nb_sectors * BDRV_SECTOR_SIZE);
 
-    while (bdrv_exceed_io_limits(bs, nb_sectors, is_write, &wait_time)) {
-        timer_mod(bs->block_timer,
-                       wait_time + qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL));
-        qemu_co_queue_wait_insert_head(&bs->throttled_reqs);
+    /* if the next request must wait -> do nothing */
+    if (throttle_schedule_timer(&bs->throttle_state, is_write)) {
+        return;
     }
 
-    qemu_co_queue_next(&bs->throttled_reqs);
+    /* else queue next request for execution */
+    qemu_co_queue_next(&bs->throttled_reqs[is_write]);
 }
 
 /* check if the path starts with "<protocol>:" */
@@ -305,7 +330,8 @@ BlockDriverState *bdrv_new(const char *device_name)
     bdrv_iostatus_disable(bs);
     notifier_list_init(&bs->close_notifiers);
     notifier_with_return_list_init(&bs->before_write_notifiers);
-    qemu_co_queue_init(&bs->throttled_reqs);
+    qemu_co_queue_init(&bs->throttled_reqs[0]);
+    qemu_co_queue_init(&bs->throttled_reqs[1]);
 
     return bs;
 }
@@ -1112,11 +1138,6 @@ int bdrv_open(BlockDriverState *bs, const char *filename, QDict *options,
         bdrv_dev_change_media_cb(bs, true);
     }
 
-    /* throttling disk I/O limits */
-    if (bs->io_limits_enabled) {
-        bdrv_io_limits_enable(bs);
-    }
-
     return 0;
 
 unlink_and_fail:
@@ -1435,7 +1456,10 @@ static bool bdrv_requests_pending(BlockDriverState *bs)
     if (!QLIST_EMPTY(&bs->tracked_requests)) {
         return true;
     }
-    if (!qemu_co_queue_empty(&bs->throttled_reqs)) {
+    if (!qemu_co_queue_empty(&bs->throttled_reqs[0])) {
+        return true;
+    }
+    if (!qemu_co_queue_empty(&bs->throttled_reqs[1])) {
         return true;
     }
     if (bs->file && bdrv_requests_pending(bs->file)) {
@@ -1481,7 +1505,7 @@ void bdrv_drain_all(void)
          * a busy wait.
          */
         QTAILQ_FOREACH(bs, &bdrv_states, list) {
-            while (qemu_co_enter_next(&bs->throttled_reqs)) {
+            if (bdrv_start_throttled_reqs(bs)) {
                 busy = true;
             }
         }
@@ -1523,13 +1547,12 @@ static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
 
     bs_dest->enable_write_cache = bs_src->enable_write_cache;
 
-    /* i/o timing parameters */
-    bs_dest->slice_start        = bs_src->slice_start;
-    bs_dest->slice_end          = bs_src->slice_end;
-    bs_dest->slice_submitted    = bs_src->slice_submitted;
-    bs_dest->io_limits          = bs_src->io_limits;
-    bs_dest->throttled_reqs     = bs_src->throttled_reqs;
-    bs_dest->block_timer        = bs_src->block_timer;
+    /* i/o throttled req */
+    memcpy(&bs_dest->throttle_state,
+           &bs_src->throttle_state,
+           sizeof(ThrottleState));
+    bs_dest->throttled_reqs[0]  = bs_src->throttled_reqs[0];
+    bs_dest->throttled_reqs[1]  = bs_src->throttled_reqs[1];
     bs_dest->io_limits_enabled  = bs_src->io_limits_enabled;
 
     /* r/w error */
@@ -1576,7 +1599,7 @@ void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old)
     assert(bs_new->dev == NULL);
     assert(bs_new->in_use == 0);
     assert(bs_new->io_limits_enabled == false);
-    assert(bs_new->block_timer == NULL);
+    assert(!throttle_have_timer(&bs_new->throttle_state));
 
     tmp = *bs_new;
     *bs_new = *bs_old;
@@ -1595,7 +1618,7 @@ void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old)
     assert(bs_new->job == NULL);
     assert(bs_new->in_use == 0);
     assert(bs_new->io_limits_enabled == false);
-    assert(bs_new->block_timer == NULL);
+    assert(!throttle_have_timer(&bs_new->throttle_state));
 
     bdrv_rebind(bs_new);
     bdrv_rebind(bs_old);
@@ -2538,11 +2561,6 @@ static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
         return -EIO;
     }
 
-    /* throttling disk read I/O */
-    if (bs->io_limits_enabled) {
-        bdrv_io_limits_intercept(bs, false, nb_sectors);
-    }
-
     if (bs->copy_on_read) {
         flags |= BDRV_REQ_COPY_ON_READ;
     }
@@ -2554,6 +2572,11 @@ static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
         wait_for_overlapping_requests(bs, sector_num, nb_sectors);
     }
 
+    /* throttling disk I/O */
+    if (bs->io_limits_enabled) {
+        bdrv_io_limits_intercept(bs, nb_sectors, false);
+    }
+
     tracked_request_begin(&req, bs, sector_num, nb_sectors, false);
 
     if (flags & BDRV_REQ_COPY_ON_READ) {
@@ -2679,15 +2702,15 @@ static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
         return -EIO;
     }
 
-    /* throttling disk write I/O */
-    if (bs->io_limits_enabled) {
-        bdrv_io_limits_intercept(bs, true, nb_sectors);
-    }
-
     if (bs->copy_on_read_in_flight) {
         wait_for_overlapping_requests(bs, sector_num, nb_sectors);
     }
 
+    /* throttling disk I/O */
+    if (bs->io_limits_enabled) {
+        bdrv_io_limits_intercept(bs, nb_sectors, true);
+    }
+
     tracked_request_begin(&req, bs, sector_num, nb_sectors, true);
 
     ret = notifier_with_return_list_notify(&bs->before_write_notifiers, &req);
@@ -2805,14 +2828,6 @@ void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
     *nb_sectors_ptr = length;
 }
 
-/* throttling disk io limits */
-void bdrv_set_io_limits(BlockDriverState *bs,
-                        BlockIOLimit *io_limits)
-{
-    bs->io_limits = *io_limits;
-    bs->io_limits_enabled = bdrv_io_limits_enabled(bs);
-}
-
 void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
                        BlockdevOnError on_write_error)
 {
@@ -3622,169 +3637,6 @@ void bdrv_aio_cancel(BlockDriverAIOCB *acb)
     acb->aiocb_info->cancel(acb);
 }
 
-/* block I/O throttling */
-static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
-                 bool is_write, double elapsed_time, uint64_t *wait)
-{
-    uint64_t bps_limit = 0;
-    uint64_t extension;
-    double   bytes_limit, bytes_base, bytes_res;
-    double   slice_time, wait_time;
-
-    if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
-        bps_limit = bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
-    } else if (bs->io_limits.bps[is_write]) {
-        bps_limit = bs->io_limits.bps[is_write];
-    } else {
-        if (wait) {
-            *wait = 0;
-        }
-
-        return false;
-    }
-
-    slice_time = bs->slice_end - bs->slice_start;
-    slice_time /= (NANOSECONDS_PER_SECOND);
-    bytes_limit = bps_limit * slice_time;
-    bytes_base  = bs->slice_submitted.bytes[is_write];
-    if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
-        bytes_base += bs->slice_submitted.bytes[!is_write];
-    }
-
-    /* bytes_base: the bytes of data which have been read/written; and
-     *             it is obtained from the history statistic info.
-     * bytes_res: the remaining bytes of data which need to be read/written.
-     * (bytes_base + bytes_res) / bps_limit: used to calcuate
-     *             the total time for completing reading/writting all data.
-     */
-    bytes_res   = (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
-
-    if (bytes_base + bytes_res <= bytes_limit) {
-        if (wait) {
-            *wait = 0;
-        }
-
-        return false;
-    }
-
-    /* Calc approx time to dispatch */
-    wait_time = (bytes_base + bytes_res) / bps_limit - elapsed_time;
-
-    /* When the I/O rate at runtime exceeds the limits,
-     * bs->slice_end need to be extended in order that the current statistic
-     * info can be kept until the timer fire, so it is increased and tuned
-     * based on the result of experiment.
-     */
-    extension = wait_time * NANOSECONDS_PER_SECOND;
-    extension = DIV_ROUND_UP(extension, BLOCK_IO_SLICE_TIME) *
-                BLOCK_IO_SLICE_TIME;
-    bs->slice_end += extension;
-    if (wait) {
-        *wait = wait_time * NANOSECONDS_PER_SECOND;
-    }
-
-    return true;
-}
-
-static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
-                             double elapsed_time, uint64_t *wait)
-{
-    uint64_t iops_limit = 0;
-    double   ios_limit, ios_base;
-    double   slice_time, wait_time;
-
-    if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
-        iops_limit = bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
-    } else if (bs->io_limits.iops[is_write]) {
-        iops_limit = bs->io_limits.iops[is_write];
-    } else {
-        if (wait) {
-            *wait = 0;
-        }
-
-        return false;
-    }
-
-    slice_time = bs->slice_end - bs->slice_start;
-    slice_time /= (NANOSECONDS_PER_SECOND);
-    ios_limit  = iops_limit * slice_time;
-    ios_base   = bs->slice_submitted.ios[is_write];
-    if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
-        ios_base += bs->slice_submitted.ios[!is_write];
-    }
-
-    if (ios_base + 1 <= ios_limit) {
-        if (wait) {
-            *wait = 0;
-        }
-
-        return false;
-    }
-
-    /* Calc approx time to dispatch, in seconds */
-    wait_time = (ios_base + 1) / iops_limit;
-    if (wait_time > elapsed_time) {
-        wait_time = wait_time - elapsed_time;
-    } else {
-        wait_time = 0;
-    }
-
-    /* Exceeded current slice, extend it by another slice time */
-    bs->slice_end += BLOCK_IO_SLICE_TIME;
-    if (wait) {
-        *wait = wait_time * NANOSECONDS_PER_SECOND;
-    }
-
-    return true;
-}
-
-static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
-                           bool is_write, int64_t *wait)
-{
-    int64_t  now, max_wait;
-    uint64_t bps_wait = 0, iops_wait = 0;
-    double   elapsed_time;
-    int      bps_ret, iops_ret;
-
-    now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
-    if (now > bs->slice_end) {
-        bs->slice_start = now;
-        bs->slice_end   = now + BLOCK_IO_SLICE_TIME;
-        memset(&bs->slice_submitted, 0, sizeof(bs->slice_submitted));
-    }
-
-    elapsed_time  = now - bs->slice_start;
-    elapsed_time  /= (NANOSECONDS_PER_SECOND);
-
-    bps_ret  = bdrv_exceed_bps_limits(bs, nb_sectors,
-                                      is_write, elapsed_time, &bps_wait);
-    iops_ret = bdrv_exceed_iops_limits(bs, is_write,
-                                      elapsed_time, &iops_wait);
-    if (bps_ret || iops_ret) {
-        max_wait = bps_wait > iops_wait ? bps_wait : iops_wait;
-        if (wait) {
-            *wait = max_wait;
-        }
-
-        now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
-        if (bs->slice_end < now + max_wait) {
-            bs->slice_end = now + max_wait;
-        }
-
-        return true;
-    }
-
-    if (wait) {
-        *wait = 0;
-    }
-
-    bs->slice_submitted.bytes[is_write] += (int64_t)nb_sectors *
-                                           BDRV_SECTOR_SIZE;
-    bs->slice_submitted.ios[is_write]++;
-
-    return false;
-}
-
 /**************************************************************/
 /* async block device emulation */
 
diff --git a/block/qapi.c b/block/qapi.c
index a4bc411..cac3919 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -223,18 +223,15 @@ void bdrv_query_info(BlockDriverState *bs,
         info->inserted->backing_file_depth = bdrv_get_backing_file_depth(bs);
 
         if (bs->io_limits_enabled) {
-            info->inserted->bps =
-                           bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
-            info->inserted->bps_rd =
-                           bs->io_limits.bps[BLOCK_IO_LIMIT_READ];
-            info->inserted->bps_wr =
-                           bs->io_limits.bps[BLOCK_IO_LIMIT_WRITE];
-            info->inserted->iops =
-                           bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
-            info->inserted->iops_rd =
-                           bs->io_limits.iops[BLOCK_IO_LIMIT_READ];
-            info->inserted->iops_wr =
-                           bs->io_limits.iops[BLOCK_IO_LIMIT_WRITE];
+            ThrottleConfig cfg;
+            throttle_get_config(&bs->throttle_state, &cfg);
+            info->inserted->bps     = cfg.buckets[THROTTLE_BPS_TOTAL].avg;
+            info->inserted->bps_rd  = cfg.buckets[THROTTLE_BPS_READ].avg;
+            info->inserted->bps_wr  = cfg.buckets[THROTTLE_BPS_WRITE].avg;
+
+            info->inserted->iops    = cfg.buckets[THROTTLE_OPS_TOTAL].avg;
+            info->inserted->iops_rd = cfg.buckets[THROTTLE_OPS_READ].avg;
+            info->inserted->iops_wr = cfg.buckets[THROTTLE_OPS_WRITE].avg;
         }
 
         bs0 = bs;
diff --git a/blockdev.c b/blockdev.c
index e70e16e..5f5ba96 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -280,32 +280,16 @@ static int parse_block_error_action(const char *buf, bool is_read)
     }
 }
 
-static bool do_check_io_limits(BlockIOLimit *io_limits, Error **errp)
+static bool check_throttle_config(ThrottleConfig *cfg, Error **errp)
 {
-    bool bps_flag;
-    bool iops_flag;
-
-    assert(io_limits);
-
-    bps_flag  = (io_limits->bps[BLOCK_IO_LIMIT_TOTAL] != 0)
-                 && ((io_limits->bps[BLOCK_IO_LIMIT_READ] != 0)
-                 || (io_limits->bps[BLOCK_IO_LIMIT_WRITE] != 0));
-    iops_flag = (io_limits->iops[BLOCK_IO_LIMIT_TOTAL] != 0)
-                 && ((io_limits->iops[BLOCK_IO_LIMIT_READ] != 0)
-                 || (io_limits->iops[BLOCK_IO_LIMIT_WRITE] != 0));
-    if (bps_flag || iops_flag) {
-        error_setg(errp, "bps(iops) and bps_rd/bps_wr(iops_rd/iops_wr) "
-                         "cannot be used at the same time");
+    if (throttle_conflicting(cfg)) {
+        error_setg(errp, "bps/iops/max total values and read/write values"
+                         " cannot be used at the same time");
         return false;
     }
 
-    if (io_limits->bps[BLOCK_IO_LIMIT_TOTAL] < 0 ||
-        io_limits->bps[BLOCK_IO_LIMIT_WRITE] < 0 ||
-        io_limits->bps[BLOCK_IO_LIMIT_READ] < 0 ||
-        io_limits->iops[BLOCK_IO_LIMIT_TOTAL] < 0 ||
-        io_limits->iops[BLOCK_IO_LIMIT_WRITE] < 0 ||
-        io_limits->iops[BLOCK_IO_LIMIT_READ] < 0) {
-        error_setg(errp, "bps and iops values must be 0 or greater");
+    if (!throttle_is_valid(cfg)) {
+        error_setg(errp, "bps/iops/maxs values must be 0 or greater");
         return false;
     }
 
@@ -330,7 +314,7 @@ static DriveInfo *blockdev_init(QemuOpts *all_opts,
     int on_read_error, on_write_error;
     const char *devaddr;
     DriveInfo *dinfo;
-    BlockIOLimit io_limits;
+    ThrottleConfig cfg;
     int snapshot = 0;
     bool copy_on_read;
     int ret;
@@ -496,20 +480,31 @@ static DriveInfo *blockdev_init(QemuOpts *all_opts,
     }
 
     /* disk I/O throttling */
-    io_limits.bps[BLOCK_IO_LIMIT_TOTAL]  =
+    memset(&cfg, 0, sizeof(cfg));
+    cfg.buckets[THROTTLE_BPS_TOTAL].avg =
         qemu_opt_get_number(opts, "throttling.bps-total", 0);
-    io_limits.bps[BLOCK_IO_LIMIT_READ]   =
+    cfg.buckets[THROTTLE_BPS_READ].avg  =
         qemu_opt_get_number(opts, "throttling.bps-read", 0);
-    io_limits.bps[BLOCK_IO_LIMIT_WRITE]  =
+    cfg.buckets[THROTTLE_BPS_WRITE].avg =
         qemu_opt_get_number(opts, "throttling.bps-write", 0);
-    io_limits.iops[BLOCK_IO_LIMIT_TOTAL] =
+    cfg.buckets[THROTTLE_OPS_TOTAL].avg =
         qemu_opt_get_number(opts, "throttling.iops-total", 0);
-    io_limits.iops[BLOCK_IO_LIMIT_READ]  =
+    cfg.buckets[THROTTLE_OPS_READ].avg =
         qemu_opt_get_number(opts, "throttling.iops-read", 0);
-    io_limits.iops[BLOCK_IO_LIMIT_WRITE] =
+    cfg.buckets[THROTTLE_OPS_WRITE].avg =
         qemu_opt_get_number(opts, "throttling.iops-write", 0);
 
-    if (!do_check_io_limits(&io_limits, &error)) {
+    cfg.buckets[THROTTLE_BPS_TOTAL].max = 0;
+    cfg.buckets[THROTTLE_BPS_READ].max  = 0;
+    cfg.buckets[THROTTLE_BPS_WRITE].max = 0;
+
+    cfg.buckets[THROTTLE_OPS_TOTAL].max = 0;
+    cfg.buckets[THROTTLE_OPS_READ].max  = 0;
+    cfg.buckets[THROTTLE_OPS_WRITE].max = 0;
+
+    cfg.op_size = 0;
+
+    if (!check_throttle_config(&cfg, &error)) {
         error_report("%s", error_get_pretty(error));
         error_free(error);
         return NULL;
@@ -636,7 +631,10 @@ static DriveInfo *blockdev_init(QemuOpts *all_opts,
     bdrv_set_on_error(dinfo->bdrv, on_read_error, on_write_error);
 
     /* disk I/O throttling */
-    bdrv_set_io_limits(dinfo->bdrv, &io_limits);
+    if (throttle_enabled(&cfg)) {
+        bdrv_io_limits_enable(dinfo->bdrv);
+        bdrv_set_io_limits(dinfo->bdrv, &cfg);
+    }
 
     switch(type) {
     case IF_IDE:
@@ -1250,7 +1248,7 @@ void qmp_block_set_io_throttle(const char *device, int64_t bps, int64_t bps_rd,
                                int64_t bps_wr, int64_t iops, int64_t iops_rd,
                                int64_t iops_wr, Error **errp)
 {
-    BlockIOLimit io_limits;
+    ThrottleConfig cfg;
     BlockDriverState *bs;
 
     bs = bdrv_find(device);
@@ -1259,27 +1257,37 @@ void qmp_block_set_io_throttle(const char *device, int64_t bps, int64_t bps_rd,
         return;
     }
 
-    io_limits.bps[BLOCK_IO_LIMIT_TOTAL] = bps;
-    io_limits.bps[BLOCK_IO_LIMIT_READ]  = bps_rd;
-    io_limits.bps[BLOCK_IO_LIMIT_WRITE] = bps_wr;
-    io_limits.iops[BLOCK_IO_LIMIT_TOTAL]= iops;
-    io_limits.iops[BLOCK_IO_LIMIT_READ] = iops_rd;
-    io_limits.iops[BLOCK_IO_LIMIT_WRITE]= iops_wr;
+    memset(&cfg, 0, sizeof(cfg));
+    cfg.buckets[THROTTLE_BPS_TOTAL].avg = bps;
+    cfg.buckets[THROTTLE_BPS_READ].avg  = bps_rd;
+    cfg.buckets[THROTTLE_BPS_WRITE].avg = bps_wr;
+
+    cfg.buckets[THROTTLE_OPS_TOTAL].avg = iops;
+    cfg.buckets[THROTTLE_OPS_READ].avg  = iops_rd;
+    cfg.buckets[THROTTLE_OPS_WRITE].avg = iops_wr;
+
+    cfg.buckets[THROTTLE_BPS_TOTAL].max = 0;
+    cfg.buckets[THROTTLE_BPS_READ].max  = 0;
+    cfg.buckets[THROTTLE_BPS_WRITE].max = 0;
+
+    cfg.buckets[THROTTLE_OPS_TOTAL].max = 0;
+    cfg.buckets[THROTTLE_OPS_READ].max  = 0;
+    cfg.buckets[THROTTLE_OPS_WRITE].max = 0;
 
-    if (!do_check_io_limits(&io_limits, errp)) {
+    cfg.op_size = 0;
+
+    if (!check_throttle_config(&cfg, errp)) {
         return;
     }
 
-    bs->io_limits = io_limits;
-
-    if (!bs->io_limits_enabled && bdrv_io_limits_enabled(bs)) {
+    if (!bs->io_limits_enabled && throttle_enabled(&cfg)) {
         bdrv_io_limits_enable(bs);
-    } else if (bs->io_limits_enabled && !bdrv_io_limits_enabled(bs)) {
+    } else if (bs->io_limits_enabled && !throttle_enabled(&cfg)) {
         bdrv_io_limits_disable(bs);
-    } else {
-        if (bs->block_timer) {
-            timer_mod(bs->block_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL));
-        }
+    }
+
+    if (bs->io_limits_enabled) {
+        bdrv_set_io_limits(bs, &cfg);
     }
 }
 
diff --git a/include/block/block.h b/include/block/block.h
index e6b391c..6207ff2 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -107,7 +107,6 @@ void bdrv_info_stats(Monitor *mon, QObject **ret_data);
 /* disk I/O throttling */
 void bdrv_io_limits_enable(BlockDriverState *bs);
 void bdrv_io_limits_disable(BlockDriverState *bs);
-bool bdrv_io_limits_enabled(BlockDriverState *bs);
 
 void bdrv_init(void);
 void bdrv_init_with_whitelist(void);
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 8012e25..c3c9c61 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -35,18 +35,12 @@
 #include "qemu/hbitmap.h"
 #include "block/snapshot.h"
 #include "qemu/main-loop.h"
+#include "qemu/throttle.h"
 
 #define BLOCK_FLAG_ENCRYPT          1
 #define BLOCK_FLAG_COMPAT6          4
 #define BLOCK_FLAG_LAZY_REFCOUNTS   8
 
-#define BLOCK_IO_LIMIT_READ     0
-#define BLOCK_IO_LIMIT_WRITE    1
-#define BLOCK_IO_LIMIT_TOTAL    2
-
-#define BLOCK_IO_SLICE_TIME     100000000
-#define NANOSECONDS_PER_SECOND  1000000000.0
-
 #define BLOCK_OPT_SIZE              "size"
 #define BLOCK_OPT_ENCRYPT           "encryption"
 #define BLOCK_OPT_COMPAT6           "compat6"
@@ -70,17 +64,6 @@ typedef struct BdrvTrackedRequest {
     CoQueue wait_queue; /* coroutines blocked on this request */
 } BdrvTrackedRequest;
 
-
-typedef struct BlockIOLimit {
-    int64_t bps[3];
-    int64_t iops[3];
-} BlockIOLimit;
-
-typedef struct BlockIOBaseValue {
-    uint64_t bytes[2];
-    uint64_t ios[2];
-} BlockIOBaseValue;
-
 struct BlockDriver {
     const char *format_name;
     int instance_size;
@@ -264,13 +247,9 @@ struct BlockDriverState {
     /* number of in-flight copy-on-read requests */
     unsigned int copy_on_read_in_flight;
 
-    /* the time for latest disk I/O */
-    int64_t slice_start;
-    int64_t slice_end;
-    BlockIOLimit io_limits;
-    BlockIOBaseValue slice_submitted;
-    CoQueue      throttled_reqs;
-    QEMUTimer    *block_timer;
+    /* I/O throttling */
+    ThrottleState throttle_state;
+    CoQueue      throttled_reqs[2];
     bool         io_limits_enabled;
 
     /* I/O stats (display with "info blockstats"). */
@@ -312,7 +291,8 @@ struct BlockDriverState {
 int get_tmp_filename(char *filename, int size);
 
 void bdrv_set_io_limits(BlockDriverState *bs,
-                        BlockIOLimit *io_limits);
+                        ThrottleConfig *cfg);
+
 
 /**
  * bdrv_add_before_write_notifier:
commit f17cfe813c40792cc1622ba447581c906beb091e
Author: Benoît Canet <benoit at irqsave.net>
Date:   Mon Sep 2 14:14:38 2013 +0200

    throttle: Add units tests
    
    Signed-off-by: Benoit Canet <benoit at irqsave.net>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/tests/Makefile b/tests/Makefile
index baba9e9..c13fefc 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -31,6 +31,7 @@ check-unit-y += tests/test-visitor-serialization$(EXESUF)
 check-unit-y += tests/test-iov$(EXESUF)
 gcov-files-test-iov-y = util/iov.c
 check-unit-y += tests/test-aio$(EXESUF)
+check-unit-y += tests/test-throttle$(EXESUF)
 gcov-files-test-aio-$(CONFIG_WIN32) = aio-win32.c
 gcov-files-test-aio-$(CONFIG_POSIX) = aio-posix.c
 check-unit-y += tests/test-thread-pool$(EXESUF)
@@ -120,6 +121,7 @@ tests/check-qfloat$(EXESUF): tests/check-qfloat.o libqemuutil.a
 tests/check-qjson$(EXESUF): tests/check-qjson.o libqemuutil.a libqemustub.a
 tests/test-coroutine$(EXESUF): tests/test-coroutine.o $(block-obj-y) libqemuutil.a libqemustub.a
 tests/test-aio$(EXESUF): tests/test-aio.o $(block-obj-y) libqemuutil.a libqemustub.a
+tests/test-throttle$(EXESUF): tests/test-throttle.o $(block-obj-y) libqemuutil.a libqemustub.a
 tests/test-thread-pool$(EXESUF): tests/test-thread-pool.o $(block-obj-y) libqemuutil.a libqemustub.a
 tests/test-iov$(EXESUF): tests/test-iov.o libqemuutil.a
 tests/test-hbitmap$(EXESUF): tests/test-hbitmap.o libqemuutil.a libqemustub.a
diff --git a/tests/test-throttle.c b/tests/test-throttle.c
new file mode 100644
index 0000000..7608126
--- /dev/null
+++ b/tests/test-throttle.c
@@ -0,0 +1,481 @@
+/*
+ * Throttle infrastructure tests
+ *
+ * Copyright Nodalink, SARL. 2013
+ *
+ * Authors:
+ *  Benoît Canet     <benoit.canet at irqsave.net>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ */
+
+#include <glib.h>
+#include <math.h>
+#include "qemu/throttle.h"
+
+LeakyBucket    bkt;
+ThrottleConfig cfg;
+ThrottleState  ts;
+
+/* usefull function */
+static bool double_cmp(double x, double y)
+{
+    return fabsl(x - y) < 1e-6;
+}
+
+/* tests for single bucket operations */
+static void test_leak_bucket(void)
+{
+    /* set initial value */
+    bkt.avg = 150;
+    bkt.max = 15;
+    bkt.level = 1.5;
+
+    /* leak an op work of time */
+    throttle_leak_bucket(&bkt, NANOSECONDS_PER_SECOND / 150);
+    g_assert(bkt.avg == 150);
+    g_assert(bkt.max == 15);
+    g_assert(double_cmp(bkt.level, 0.5));
+
+    /* leak again emptying the bucket */
+    throttle_leak_bucket(&bkt, NANOSECONDS_PER_SECOND / 150);
+    g_assert(bkt.avg == 150);
+    g_assert(bkt.max == 15);
+    g_assert(double_cmp(bkt.level, 0));
+
+    /* check that the bucket level won't go lower */
+    throttle_leak_bucket(&bkt, NANOSECONDS_PER_SECOND / 150);
+    g_assert(bkt.avg == 150);
+    g_assert(bkt.max == 15);
+    g_assert(double_cmp(bkt.level, 0));
+}
+
+static void test_compute_wait(void)
+{
+    int64_t wait;
+    int64_t result;
+
+    /* no operation limit set */
+    bkt.avg = 0;
+    bkt.max = 15;
+    bkt.level = 1.5;
+    wait = throttle_compute_wait(&bkt);
+    g_assert(!wait);
+
+    /* zero delta */
+    bkt.avg = 150;
+    bkt.max = 15;
+    bkt.level = 15;
+    wait = throttle_compute_wait(&bkt);
+    g_assert(!wait);
+
+    /* below zero delta */
+    bkt.avg = 150;
+    bkt.max = 15;
+    bkt.level = 9;
+    wait = throttle_compute_wait(&bkt);
+    g_assert(!wait);
+
+    /* half an operation above max */
+    bkt.avg = 150;
+    bkt.max = 15;
+    bkt.level = 15.5;
+    wait = throttle_compute_wait(&bkt);
+    /* time required to do half an operation */
+    result = (int64_t)  NANOSECONDS_PER_SECOND / 150 / 2;
+    g_assert(wait == result);
+}
+
+/* functions to test ThrottleState initialization/destroy methods */
+static void read_timer_cb(void *opaque)
+{
+}
+
+static void write_timer_cb(void *opaque)
+{
+}
+
+static void test_init(void)
+{
+    int i;
+
+    /* fill the structure with crap */
+    memset(&ts, 1, sizeof(ts));
+
+    /* init the structure */
+    throttle_init(&ts, QEMU_CLOCK_VIRTUAL, read_timer_cb, write_timer_cb, &ts);
+
+    /* check initialized fields */
+    g_assert(ts.clock_type == QEMU_CLOCK_VIRTUAL);
+    g_assert(ts.timers[0]);
+    g_assert(ts.timers[1]);
+
+    /* check other fields where cleared */
+    g_assert(!ts.previous_leak);
+    g_assert(!ts.cfg.op_size);
+    for (i = 0; i < BUCKETS_COUNT; i++) {
+        g_assert(!ts.cfg.buckets[i].avg);
+        g_assert(!ts.cfg.buckets[i].max);
+        g_assert(!ts.cfg.buckets[i].level);
+    }
+
+    throttle_destroy(&ts);
+}
+
+static void test_destroy(void)
+{
+    int i;
+    throttle_init(&ts, QEMU_CLOCK_VIRTUAL, read_timer_cb, write_timer_cb, &ts);
+    throttle_destroy(&ts);
+    for (i = 0; i < 2; i++) {
+        g_assert(!ts.timers[i]);
+    }
+}
+
+/* function to test throttle_config and throttle_get_config */
+static void test_config_functions(void)
+{
+    int i;
+    ThrottleConfig orig_cfg, final_cfg;
+
+    orig_cfg.buckets[THROTTLE_BPS_TOTAL].avg = 153;
+    orig_cfg.buckets[THROTTLE_BPS_READ].avg  = 56;
+    orig_cfg.buckets[THROTTLE_BPS_WRITE].avg = 1;
+
+    orig_cfg.buckets[THROTTLE_OPS_TOTAL].avg = 150;
+    orig_cfg.buckets[THROTTLE_OPS_READ].avg  = 69;
+    orig_cfg.buckets[THROTTLE_OPS_WRITE].avg = 23;
+
+    orig_cfg.buckets[THROTTLE_BPS_TOTAL].max = 0; /* should be corrected */
+    orig_cfg.buckets[THROTTLE_BPS_READ].max  = 1; /* should not be corrected */
+    orig_cfg.buckets[THROTTLE_BPS_WRITE].max = 120;
+
+    orig_cfg.buckets[THROTTLE_OPS_TOTAL].max = 150;
+    orig_cfg.buckets[THROTTLE_OPS_READ].max  = 400;
+    orig_cfg.buckets[THROTTLE_OPS_WRITE].max = 500;
+
+    orig_cfg.buckets[THROTTLE_BPS_TOTAL].level = 45;
+    orig_cfg.buckets[THROTTLE_BPS_READ].level  = 65;
+    orig_cfg.buckets[THROTTLE_BPS_WRITE].level = 23;
+
+    orig_cfg.buckets[THROTTLE_OPS_TOTAL].level = 1;
+    orig_cfg.buckets[THROTTLE_OPS_READ].level  = 90;
+    orig_cfg.buckets[THROTTLE_OPS_WRITE].level = 75;
+
+    orig_cfg.op_size = 1;
+
+    throttle_init(&ts, QEMU_CLOCK_VIRTUAL, read_timer_cb, write_timer_cb, &ts);
+    /* structure reset by throttle_init previous_leak should be null */
+    g_assert(!ts.previous_leak);
+    throttle_config(&ts, &orig_cfg);
+
+    /* has previous leak been initialized by throttle_config ? */
+    g_assert(ts.previous_leak);
+
+    /* get back the fixed configuration */
+    throttle_get_config(&ts, &final_cfg);
+
+    throttle_destroy(&ts);
+
+    g_assert(final_cfg.buckets[THROTTLE_BPS_TOTAL].avg == 153);
+    g_assert(final_cfg.buckets[THROTTLE_BPS_READ].avg  == 56);
+    g_assert(final_cfg.buckets[THROTTLE_BPS_WRITE].avg == 1);
+
+    g_assert(final_cfg.buckets[THROTTLE_OPS_TOTAL].avg == 150);
+    g_assert(final_cfg.buckets[THROTTLE_OPS_READ].avg  == 69);
+    g_assert(final_cfg.buckets[THROTTLE_OPS_WRITE].avg == 23);
+
+    g_assert(final_cfg.buckets[THROTTLE_BPS_TOTAL].max == 15.3);/* fixed */
+    g_assert(final_cfg.buckets[THROTTLE_BPS_READ].max  == 1);   /* not fixed */
+    g_assert(final_cfg.buckets[THROTTLE_BPS_WRITE].max == 120);
+
+    g_assert(final_cfg.buckets[THROTTLE_OPS_TOTAL].max == 150);
+    g_assert(final_cfg.buckets[THROTTLE_OPS_READ].max  == 400);
+    g_assert(final_cfg.buckets[THROTTLE_OPS_WRITE].max == 500);
+
+    g_assert(final_cfg.op_size == 1);
+
+    /* check bucket have been cleared */
+    for (i = 0; i < BUCKETS_COUNT; i++) {
+        g_assert(!final_cfg.buckets[i].level);
+    }
+}
+
+/* functions to test is throttle is enabled by a config */
+static void set_cfg_value(bool is_max, int index, int value)
+{
+    if (is_max) {
+        cfg.buckets[index].max = value;
+    } else {
+        cfg.buckets[index].avg = value;
+    }
+}
+
+static void test_enabled(void)
+{
+    int i;
+
+    memset(&cfg, 0, sizeof(cfg));
+    g_assert(!throttle_enabled(&cfg));
+
+    for (i = 0; i < BUCKETS_COUNT; i++) {
+        memset(&cfg, 0, sizeof(cfg));
+        set_cfg_value(false, i, 150);
+        g_assert(throttle_enabled(&cfg));
+    }
+
+    for (i = 0; i < BUCKETS_COUNT; i++) {
+        memset(&cfg, 0, sizeof(cfg));
+        set_cfg_value(false, i, -150);
+        g_assert(!throttle_enabled(&cfg));
+    }
+}
+
+/* tests functions for throttle_conflicting */
+
+static void test_conflicts_for_one_set(bool is_max,
+                                       int total,
+                                       int read,
+                                       int write)
+{
+    memset(&cfg, 0, sizeof(cfg));
+    g_assert(!throttle_conflicting(&cfg));
+
+    set_cfg_value(is_max, total, 1);
+    set_cfg_value(is_max, read,  1);
+    g_assert(throttle_conflicting(&cfg));
+
+    memset(&cfg, 0, sizeof(cfg));
+    set_cfg_value(is_max, total, 1);
+    set_cfg_value(is_max, write, 1);
+    g_assert(throttle_conflicting(&cfg));
+
+    memset(&cfg, 0, sizeof(cfg));
+    set_cfg_value(is_max, total, 1);
+    set_cfg_value(is_max, read,  1);
+    set_cfg_value(is_max, write, 1);
+    g_assert(throttle_conflicting(&cfg));
+
+    memset(&cfg, 0, sizeof(cfg));
+    set_cfg_value(is_max, total, 1);
+    g_assert(!throttle_conflicting(&cfg));
+
+    memset(&cfg, 0, sizeof(cfg));
+    set_cfg_value(is_max, read,  1);
+    set_cfg_value(is_max, write, 1);
+    g_assert(!throttle_conflicting(&cfg));
+}
+
+static void test_conflicting_config(void)
+{
+    /* bps average conflicts */
+    test_conflicts_for_one_set(false,
+                               THROTTLE_BPS_TOTAL,
+                               THROTTLE_BPS_READ,
+                               THROTTLE_BPS_WRITE);
+
+    /* ops average conflicts */
+    test_conflicts_for_one_set(false,
+                               THROTTLE_OPS_TOTAL,
+                               THROTTLE_OPS_READ,
+                               THROTTLE_OPS_WRITE);
+
+    /* bps average conflicts */
+    test_conflicts_for_one_set(true,
+                               THROTTLE_BPS_TOTAL,
+                               THROTTLE_BPS_READ,
+                               THROTTLE_BPS_WRITE);
+    /* ops average conflicts */
+    test_conflicts_for_one_set(true,
+                               THROTTLE_OPS_TOTAL,
+                               THROTTLE_OPS_READ,
+                               THROTTLE_OPS_WRITE);
+}
+/* functions to test the throttle_is_valid function */
+static void test_is_valid_for_value(int value, bool should_be_valid)
+{
+    int is_max, index;
+    for (is_max = 0; is_max < 2; is_max++) {
+        for (index = 0; index < BUCKETS_COUNT; index++) {
+            memset(&cfg, 0, sizeof(cfg));
+            set_cfg_value(is_max, index, value);
+            g_assert(throttle_is_valid(&cfg) == should_be_valid);
+        }
+    }
+}
+
+static void test_is_valid(void)
+{
+    /* negative number are invalid */
+    test_is_valid_for_value(-1, false);
+    /* zero are valids */
+    test_is_valid_for_value(0, true);
+    /* positives numers are valids */
+    test_is_valid_for_value(1, true);
+}
+
+static void test_have_timer(void)
+{
+    /* zero the structure */
+    memset(&ts, 0, sizeof(ts));
+
+    /* no timer set shoudl return false */
+    g_assert(!throttle_have_timer(&ts));
+
+    /* init the structure */
+    throttle_init(&ts, QEMU_CLOCK_VIRTUAL, read_timer_cb, write_timer_cb, &ts);
+
+    /* timer set by init should return true */
+    g_assert(throttle_have_timer(&ts));
+
+    throttle_destroy(&ts);
+}
+
+static bool do_test_accounting(bool is_ops, /* are we testing bps or ops */
+                int size,                   /* size of the operation to do */
+                double avg,                 /* io limit */
+                uint64_t op_size,           /* ideal size of an io */
+                double total_result,
+                double read_result,
+                double write_result)
+{
+    BucketType to_test[2][3] = { { THROTTLE_BPS_TOTAL,
+                                   THROTTLE_BPS_READ,
+                                   THROTTLE_BPS_WRITE, },
+                                 { THROTTLE_OPS_TOTAL,
+                                   THROTTLE_OPS_READ,
+                                   THROTTLE_OPS_WRITE, } };
+    ThrottleConfig cfg;
+    BucketType index;
+    int i;
+
+    for (i = 0; i < 3; i++) {
+        BucketType index = to_test[is_ops][i];
+        cfg.buckets[index].avg = avg;
+    }
+
+    cfg.op_size = op_size;
+
+    throttle_init(&ts, QEMU_CLOCK_VIRTUAL, read_timer_cb, write_timer_cb, &ts);
+    throttle_config(&ts, &cfg);
+
+    /* account a read */
+    throttle_account(&ts, false, size);
+    /* account a write */
+    throttle_account(&ts, true, size);
+
+    /* check total result */
+    index = to_test[is_ops][0];
+    if (!double_cmp(ts.cfg.buckets[index].level, total_result)) {
+        return false;
+    }
+
+    /* check read result */
+    index = to_test[is_ops][1];
+    if (!double_cmp(ts.cfg.buckets[index].level, read_result)) {
+        return false;
+    }
+
+    /* check write result */
+    index = to_test[is_ops][2];
+    if (!double_cmp(ts.cfg.buckets[index].level, write_result)) {
+        return false;
+    }
+
+    throttle_destroy(&ts);
+
+    return true;
+}
+
+static void test_accounting(void)
+{
+    /* tests for bps */
+
+    /* op of size 1 */
+    g_assert(do_test_accounting(false,
+                                1 * 512,
+                                150,
+                                0,
+                                1024,
+                                512,
+                                512));
+
+    /* op of size 2 */
+    g_assert(do_test_accounting(false,
+                                2 * 512,
+                                150,
+                                0,
+                                2048,
+                                1024,
+                                1024));
+
+    /* op of size 2 and orthogonal parameter change */
+    g_assert(do_test_accounting(false,
+                                2 * 512,
+                                150,
+                                17,
+                                2048,
+                                1024,
+                                1024));
+
+
+    /* tests for ops */
+
+    /* op of size 1 */
+    g_assert(do_test_accounting(true,
+                                1 * 512,
+                                150,
+                                0,
+                                2,
+                                1,
+                                1));
+
+    /* op of size 2 */
+    g_assert(do_test_accounting(true,
+                                2 *  512,
+                                150,
+                                0,
+                                2,
+                                1,
+                                1));
+
+    /* jumbo op accounting fragmentation : size 64 with op size of 13 units */
+    g_assert(do_test_accounting(true,
+                                64 * 512,
+                                150,
+                                13 * 512,
+                                (64.0 * 2) / 13,
+                                (64.0 / 13),
+                                (64.0 / 13)));
+
+    /* same with orthogonal parameters changes */
+    g_assert(do_test_accounting(true,
+                                64 * 512,
+                                300,
+                                13 * 512,
+                                (64.0 * 2) / 13,
+                                (64.0 / 13),
+                                (64.0 / 13)));
+}
+
+int main(int argc, char **argv)
+{
+    init_clocks();
+    do {} while (g_main_context_iteration(NULL, false));
+
+    /* tests in the same order as the header function declarations */
+    g_test_init(&argc, &argv, NULL);
+    g_test_add_func("/throttle/leak_bucket",        test_leak_bucket);
+    g_test_add_func("/throttle/compute_wait",       test_compute_wait);
+    g_test_add_func("/throttle/init",               test_init);
+    g_test_add_func("/throttle/destroy",            test_destroy);
+    g_test_add_func("/throttle/have_timer",         test_have_timer);
+    g_test_add_func("/throttle/config/enabled",     test_enabled);
+    g_test_add_func("/throttle/config/conflicting", test_conflicting_config);
+    g_test_add_func("/throttle/config/is_valid",    test_is_valid);
+    g_test_add_func("/throttle/config_functions",   test_config_functions);
+    g_test_add_func("/throttle/accounting",         test_accounting);
+    return g_test_run();
+}
+
commit 5ddfffbdc5e024014b77816dab88d372ad95a5b0
Author: Benoît Canet <benoit at irqsave.net>
Date:   Mon Sep 2 14:14:37 2013 +0200

    throttle: Add a new throttling API implementing continuous leaky bucket.
    
    Implement the continuous leaky bucket algorithm devised on IRC as a separate
    module.
    
    Signed-off-by: Benoit Canet <benoit at irqsave.net>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/include/qemu/throttle.h b/include/qemu/throttle.h
new file mode 100644
index 0000000..ab29b0b
--- /dev/null
+++ b/include/qemu/throttle.h
@@ -0,0 +1,110 @@
+/*
+ * QEMU throttling infrastructure
+ *
+ * Copyright (C) Nodalink, SARL. 2013
+ *
+ * Author:
+ *   Benoît Canet <benoit.canet at irqsave.net>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 or
+ * (at your option) version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef THROTTLE_H
+#define THROTTLE_H
+
+#include <stdint.h>
+#include "qemu-common.h"
+#include "qemu/timer.h"
+
+#define NANOSECONDS_PER_SECOND  1000000000.0
+
+typedef enum {
+    THROTTLE_BPS_TOTAL,
+    THROTTLE_BPS_READ,
+    THROTTLE_BPS_WRITE,
+    THROTTLE_OPS_TOTAL,
+    THROTTLE_OPS_READ,
+    THROTTLE_OPS_WRITE,
+    BUCKETS_COUNT,
+} BucketType;
+
+/*
+ * The max parameter of the leaky bucket throttling algorithm can be used to
+ * allow the guest to do bursts.
+ * The max value is a pool of I/O that the guest can use without being throttled
+ * at all. Throttling is triggered once this pool is empty.
+ */
+
+typedef struct LeakyBucket {
+    double  avg;              /* average goal in units per second */
+    double  max;              /* leaky bucket max burst in units */
+    double  level;            /* bucket level in units */
+} LeakyBucket;
+
+/* The following structure is used to configure a ThrottleState
+ * It contains a bit of state: the bucket field of the LeakyBucket structure.
+ * However it allows to keep the code clean and the bucket field is reset to
+ * zero at the right time.
+ */
+typedef struct ThrottleConfig {
+    LeakyBucket buckets[BUCKETS_COUNT]; /* leaky buckets */
+    uint64_t op_size;         /* size of an operation in bytes */
+} ThrottleConfig;
+
+typedef struct ThrottleState {
+    ThrottleConfig cfg;       /* configuration */
+    int64_t previous_leak;    /* timestamp of the last leak done */
+    QEMUTimer * timers[2];    /* timers used to do the throttling */
+    QEMUClockType clock_type; /* the clock used */
+} ThrottleState;
+
+/* operations on single leaky buckets */
+void throttle_leak_bucket(LeakyBucket *bkt, int64_t delta);
+
+int64_t throttle_compute_wait(LeakyBucket *bkt);
+
+/* expose timer computation function for unit tests */
+bool throttle_compute_timer(ThrottleState *ts,
+                            bool is_write,
+                            int64_t now,
+                            int64_t *next_timestamp);
+
+/* init/destroy cycle */
+void throttle_init(ThrottleState *ts,
+                   QEMUClockType clock_type,
+                   void (read_timer)(void *),
+                   void (write_timer)(void *),
+                   void *timer_opaque);
+
+void throttle_destroy(ThrottleState *ts);
+
+bool throttle_have_timer(ThrottleState *ts);
+
+/* configuration */
+bool throttle_enabled(ThrottleConfig *cfg);
+
+bool throttle_conflicting(ThrottleConfig *cfg);
+
+bool throttle_is_valid(ThrottleConfig *cfg);
+
+void throttle_config(ThrottleState *ts, ThrottleConfig *cfg);
+
+void throttle_get_config(ThrottleState *ts, ThrottleConfig *cfg);
+
+/* usage */
+bool throttle_schedule_timer(ThrottleState *ts, bool is_write);
+
+void throttle_account(ThrottleState *ts, bool is_write, uint64_t size);
+
+#endif
diff --git a/util/Makefile.objs b/util/Makefile.objs
index dc72ab0..2bb13a2 100644
--- a/util/Makefile.objs
+++ b/util/Makefile.objs
@@ -11,3 +11,4 @@ util-obj-y += iov.o aes.o qemu-config.o qemu-sockets.o uri.o notify.o
 util-obj-y += qemu-option.o qemu-progress.o
 util-obj-y += hexdump.o
 util-obj-y += crc32c.o
+util-obj-y += throttle.o
diff --git a/util/throttle.c b/util/throttle.c
new file mode 100644
index 0000000..02e6f15
--- /dev/null
+++ b/util/throttle.c
@@ -0,0 +1,396 @@
+/*
+ * QEMU throttling infrastructure
+ *
+ * Copyright (C) Nodalink, SARL. 2013
+ *
+ * Author:
+ *   Benoît Canet <benoit.canet at irqsave.net>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 or
+ * (at your option) version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/throttle.h"
+#include "qemu/timer.h"
+
+/* This function make a bucket leak
+ *
+ * @bkt:   the bucket to make leak
+ * @delta_ns: the time delta
+ */
+void throttle_leak_bucket(LeakyBucket *bkt, int64_t delta_ns)
+{
+    double leak;
+
+    /* compute how much to leak */
+    leak = (bkt->avg * (double) delta_ns) / NANOSECONDS_PER_SECOND;
+
+    /* make the bucket leak */
+    bkt->level = MAX(bkt->level - leak, 0);
+}
+
+/* Calculate the time delta since last leak and make proportionals leaks
+ *
+ * @now:      the current timestamp in ns
+ */
+static void throttle_do_leak(ThrottleState *ts, int64_t now)
+{
+    /* compute the time elapsed since the last leak */
+    int64_t delta_ns = now - ts->previous_leak;
+    int i;
+
+    ts->previous_leak = now;
+
+    if (delta_ns <= 0) {
+        return;
+    }
+
+    /* make each bucket leak */
+    for (i = 0; i < BUCKETS_COUNT; i++) {
+        throttle_leak_bucket(&ts->cfg.buckets[i], delta_ns);
+    }
+}
+
+/* do the real job of computing the time to wait
+ *
+ * @limit: the throttling limit
+ * @extra: the number of operation to delay
+ * @ret:   the time to wait in ns
+ */
+static int64_t throttle_do_compute_wait(double limit, double extra)
+{
+    double wait = extra * NANOSECONDS_PER_SECOND;
+    wait /= limit;
+    return wait;
+}
+
+/* This function compute the wait time in ns that a leaky bucket should trigger
+ *
+ * @bkt: the leaky bucket we operate on
+ * @ret: the resulting wait time in ns or 0 if the operation can go through
+ */
+int64_t throttle_compute_wait(LeakyBucket *bkt)
+{
+    double extra; /* the number of extra units blocking the io */
+
+    if (!bkt->avg) {
+        return 0;
+    }
+
+    extra = bkt->level - bkt->max;
+
+    if (extra <= 0) {
+        return 0;
+    }
+
+    return throttle_do_compute_wait(bkt->avg, extra);
+}
+
+/* This function compute the time that must be waited while this IO
+ *
+ * @is_write:   true if the current IO is a write, false if it's a read
+ * @ret:        time to wait
+ */
+static int64_t throttle_compute_wait_for(ThrottleState *ts,
+                                         bool is_write)
+{
+    BucketType to_check[2][4] = { {THROTTLE_BPS_TOTAL,
+                                   THROTTLE_OPS_TOTAL,
+                                   THROTTLE_BPS_READ,
+                                   THROTTLE_OPS_READ},
+                                  {THROTTLE_BPS_TOTAL,
+                                   THROTTLE_OPS_TOTAL,
+                                   THROTTLE_BPS_WRITE,
+                                   THROTTLE_OPS_WRITE}, };
+    int64_t wait, max_wait = 0;
+    int i;
+
+    for (i = 0; i < 4; i++) {
+        BucketType index = to_check[is_write][i];
+        wait = throttle_compute_wait(&ts->cfg.buckets[index]);
+        if (wait > max_wait) {
+            max_wait = wait;
+        }
+    }
+
+    return max_wait;
+}
+
+/* compute the timer for this type of operation
+ *
+ * @is_write:   the type of operation
+ * @now:        the current clock timestamp
+ * @next_timestamp: the resulting timer
+ * @ret:        true if a timer must be set
+ */
+bool throttle_compute_timer(ThrottleState *ts,
+                            bool is_write,
+                            int64_t now,
+                            int64_t *next_timestamp)
+{
+    int64_t wait;
+
+    /* leak proportionally to the time elapsed */
+    throttle_do_leak(ts, now);
+
+    /* compute the wait time if any */
+    wait = throttle_compute_wait_for(ts, is_write);
+
+    /* if the code must wait compute when the next timer should fire */
+    if (wait) {
+        *next_timestamp = now + wait;
+        return true;
+    }
+
+    /* else no need to wait at all */
+    *next_timestamp = now;
+    return false;
+}
+
+/* To be called first on the ThrottleState */
+void throttle_init(ThrottleState *ts,
+                   QEMUClockType clock_type,
+                   QEMUTimerCB *read_timer_cb,
+                   QEMUTimerCB *write_timer_cb,
+                   void *timer_opaque)
+{
+    memset(ts, 0, sizeof(ThrottleState));
+
+    ts->clock_type = clock_type;
+    ts->timers[0] = timer_new_ns(clock_type, read_timer_cb, timer_opaque);
+    ts->timers[1] = timer_new_ns(clock_type, write_timer_cb, timer_opaque);
+}
+
+/* destroy a timer */
+static void throttle_timer_destroy(QEMUTimer **timer)
+{
+    assert(*timer != NULL);
+
+    timer_del(*timer);
+    timer_free(*timer);
+    *timer = NULL;
+}
+
+/* To be called last on the ThrottleState */
+void throttle_destroy(ThrottleState *ts)
+{
+    int i;
+
+    for (i = 0; i < 2; i++) {
+        throttle_timer_destroy(&ts->timers[i]);
+    }
+}
+
+/* is any throttling timer configured */
+bool throttle_have_timer(ThrottleState *ts)
+{
+    if (ts->timers[0]) {
+        return true;
+    }
+
+    return false;
+}
+
+/* Does any throttling must be done
+ *
+ * @cfg: the throttling configuration to inspect
+ * @ret: true if throttling must be done else false
+ */
+bool throttle_enabled(ThrottleConfig *cfg)
+{
+    int i;
+
+    for (i = 0; i < BUCKETS_COUNT; i++) {
+        if (cfg->buckets[i].avg > 0) {
+            return true;
+        }
+    }
+
+    return false;
+}
+
+/* return true if any two throttling parameters conflicts
+ *
+ * @cfg: the throttling configuration to inspect
+ * @ret: true if any conflict detected else false
+ */
+bool throttle_conflicting(ThrottleConfig *cfg)
+{
+    bool bps_flag, ops_flag;
+    bool bps_max_flag, ops_max_flag;
+
+    bps_flag = cfg->buckets[THROTTLE_BPS_TOTAL].avg &&
+               (cfg->buckets[THROTTLE_BPS_READ].avg ||
+                cfg->buckets[THROTTLE_BPS_WRITE].avg);
+
+    ops_flag = cfg->buckets[THROTTLE_OPS_TOTAL].avg &&
+               (cfg->buckets[THROTTLE_OPS_READ].avg ||
+                cfg->buckets[THROTTLE_OPS_WRITE].avg);
+
+    bps_max_flag = cfg->buckets[THROTTLE_BPS_TOTAL].max &&
+                  (cfg->buckets[THROTTLE_BPS_READ].max  ||
+                   cfg->buckets[THROTTLE_BPS_WRITE].max);
+
+    ops_max_flag = cfg->buckets[THROTTLE_OPS_TOTAL].max &&
+                   (cfg->buckets[THROTTLE_OPS_READ].max ||
+                   cfg->buckets[THROTTLE_OPS_WRITE].max);
+
+    return bps_flag || ops_flag || bps_max_flag || ops_max_flag;
+}
+
+/* check if a throttling configuration is valid
+ * @cfg: the throttling configuration to inspect
+ * @ret: true if valid else false
+ */
+bool throttle_is_valid(ThrottleConfig *cfg)
+{
+    bool invalid = false;
+    int i;
+
+    for (i = 0; i < BUCKETS_COUNT; i++) {
+        if (cfg->buckets[i].avg < 0) {
+            invalid = true;
+        }
+    }
+
+    for (i = 0; i < BUCKETS_COUNT; i++) {
+        if (cfg->buckets[i].max < 0) {
+            invalid = true;
+        }
+    }
+
+    return !invalid;
+}
+
+/* fix bucket parameters */
+static void throttle_fix_bucket(LeakyBucket *bkt)
+{
+    double min;
+
+    /* zero bucket level */
+    bkt->level = 0;
+
+    /* The following is done to cope with the Linux CFQ block scheduler
+     * which regroup reads and writes by block of 100ms in the guest.
+     * When they are two process one making reads and one making writes cfq
+     * make a pattern looking like the following:
+     * WWWWWWWWWWWRRRRRRRRRRRRRRWWWWWWWWWWWWWwRRRRRRRRRRRRRRRRR
+     * Having a max burst value of 100ms of the average will help smooth the
+     * throttling
+     */
+    min = bkt->avg / 10;
+    if (bkt->avg && !bkt->max) {
+        bkt->max = min;
+    }
+}
+
+/* take care of canceling a timer */
+static void throttle_cancel_timer(QEMUTimer *timer)
+{
+    assert(timer != NULL);
+
+    timer_del(timer);
+}
+
+/* Used to configure the throttle
+ *
+ * @ts: the throttle state we are working on
+ * @cfg: the config to set
+ */
+void throttle_config(ThrottleState *ts, ThrottleConfig *cfg)
+{
+    int i;
+
+    ts->cfg = *cfg;
+
+    for (i = 0; i < BUCKETS_COUNT; i++) {
+        throttle_fix_bucket(&ts->cfg.buckets[i]);
+    }
+
+    ts->previous_leak = qemu_clock_get_ns(ts->clock_type);
+
+    for (i = 0; i < 2; i++) {
+        throttle_cancel_timer(ts->timers[i]);
+    }
+}
+
+/* used to get config
+ *
+ * @ts:  the throttle state we are working on
+ * @cfg: the config to write
+ */
+void throttle_get_config(ThrottleState *ts, ThrottleConfig *cfg)
+{
+    *cfg = ts->cfg;
+}
+
+
+/* Schedule the read or write timer if needed
+ *
+ * NOTE: this function is not unit tested due to it's usage of timer_mod
+ *
+ * @is_write: the type of operation (read/write)
+ * @ret:      true if the timer has been scheduled else false
+ */
+bool throttle_schedule_timer(ThrottleState *ts, bool is_write)
+{
+    int64_t now = qemu_clock_get_ns(ts->clock_type);
+    int64_t next_timestamp;
+    bool must_wait;
+
+    must_wait = throttle_compute_timer(ts,
+                                       is_write,
+                                       now,
+                                       &next_timestamp);
+
+    /* request not throttled */
+    if (!must_wait) {
+        return false;
+    }
+
+    /* request throttled and timer pending -> do nothing */
+    if (timer_pending(ts->timers[is_write])) {
+        return true;
+    }
+
+    /* request throttled and timer not pending -> arm timer */
+    timer_mod(ts->timers[is_write], next_timestamp);
+    return true;
+}
+
+/* do the accounting for this operation
+ *
+ * @is_write: the type of operation (read/write)
+ * @size:     the size of the operation
+ */
+void throttle_account(ThrottleState *ts, bool is_write, uint64_t size)
+{
+    double units = 1.0;
+
+    /* if cfg.op_size is defined and smaller than size we compute unit count */
+    if (ts->cfg.op_size && size > ts->cfg.op_size) {
+        units = (double) size / ts->cfg.op_size;
+    }
+
+    ts->cfg.buckets[THROTTLE_BPS_TOTAL].level += size;
+    ts->cfg.buckets[THROTTLE_OPS_TOTAL].level += units;
+
+    if (is_write) {
+        ts->cfg.buckets[THROTTLE_BPS_WRITE].level += size;
+        ts->cfg.buckets[THROTTLE_OPS_WRITE].level += units;
+    } else {
+        ts->cfg.buckets[THROTTLE_BPS_READ].level += size;
+        ts->cfg.buckets[THROTTLE_OPS_READ].level += units;
+    }
+}
+


More information about the Spice-commits mailing list